diff --git "a/models/CT-CHAT/llama_3.1_70b/trainer_state.json" "b/models/CT-CHAT/llama_3.1_70b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/models/CT-CHAT/llama_3.1_70b/trainer_state.json" @@ -0,0 +1,392033 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9676527508985348, + "eval_steps": 500, + "global_step": 56000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.7279513408902406e-05, + "grad_norm": 493.840414600537, + "learning_rate": 1.1514104778353484e-08, + "loss": 9.5039, + "step": 1 + }, + { + "epoch": 3.455902681780481e-05, + "grad_norm": 518.3308181642611, + "learning_rate": 2.3028209556706968e-08, + "loss": 8.8787, + "step": 2 + }, + { + "epoch": 5.1838540226707214e-05, + "grad_norm": 502.48865329676187, + "learning_rate": 3.454231433506045e-08, + "loss": 8.9386, + "step": 3 + }, + { + "epoch": 6.911805363560962e-05, + "grad_norm": 400.5707801552175, + "learning_rate": 4.6056419113413937e-08, + "loss": 8.808, + "step": 4 + }, + { + "epoch": 8.639756704451203e-05, + "grad_norm": 344.54074207241473, + "learning_rate": 5.757052389176742e-08, + "loss": 8.6264, + "step": 5 + }, + { + "epoch": 0.00010367708045341443, + "grad_norm": 338.6730653427219, + "learning_rate": 6.90846286701209e-08, + "loss": 8.4952, + "step": 6 + }, + { + "epoch": 0.00012095659386231684, + "grad_norm": 353.76037349952156, + "learning_rate": 8.059873344847438e-08, + "loss": 8.1545, + "step": 7 + }, + { + "epoch": 0.00013823610727121925, + "grad_norm": 364.60591153693974, + "learning_rate": 9.211283822682787e-08, + "loss": 8.9603, + "step": 8 + }, + { + "epoch": 0.00015551562068012164, + "grad_norm": 348.76805671822007, + "learning_rate": 1.0362694300518136e-07, + "loss": 9.0914, + "step": 9 + }, + { + "epoch": 0.00017279513408902407, + "grad_norm": 382.89772183482, + "learning_rate": 1.1514104778353484e-07, + "loss": 9.5033, + "step": 10 + }, + { + "epoch": 0.00019007464749792646, + "grad_norm": 370.5140328392712, + "learning_rate": 1.2665515256188832e-07, + "loss": 1.1866, + "step": 11 + }, + { + "epoch": 0.00020735416090682886, + "grad_norm": 595.790497364354, + "learning_rate": 1.381692573402418e-07, + "loss": 9.3982, + "step": 12 + }, + { + "epoch": 0.00022463367431573128, + "grad_norm": 399.22241995620124, + "learning_rate": 1.496833621185953e-07, + "loss": 9.2329, + "step": 13 + }, + { + "epoch": 0.00024191318772463368, + "grad_norm": 362.3206663332414, + "learning_rate": 1.6119746689694876e-07, + "loss": 9.6931, + "step": 14 + }, + { + "epoch": 0.0002591927011335361, + "grad_norm": 341.26898144181035, + "learning_rate": 1.7271157167530226e-07, + "loss": 8.4544, + "step": 15 + }, + { + "epoch": 0.0002764722145424385, + "grad_norm": 321.9317286671505, + "learning_rate": 1.8422567645365575e-07, + "loss": 8.6915, + "step": 16 + }, + { + "epoch": 0.0002937517279513409, + "grad_norm": 366.2843670158931, + "learning_rate": 1.957397812320092e-07, + "loss": 8.89, + "step": 17 + }, + { + "epoch": 0.0003110312413602433, + "grad_norm": 388.35543147098673, + "learning_rate": 2.0725388601036273e-07, + "loss": 8.1529, + "step": 18 + }, + { + "epoch": 0.0003283107547691457, + "grad_norm": 378.44886323901073, + "learning_rate": 2.187679907887162e-07, + "loss": 8.6319, + "step": 19 + }, + { + "epoch": 0.00034559026817804813, + "grad_norm": 390.6704345869671, + "learning_rate": 2.3028209556706968e-07, + "loss": 9.0838, + "step": 20 + }, + { + "epoch": 0.0003628697815869505, + "grad_norm": 354.0164428030922, + "learning_rate": 2.417962003454232e-07, + "loss": 9.1083, + "step": 21 + }, + { + "epoch": 0.0003801492949958529, + "grad_norm": 356.81965837940163, + "learning_rate": 2.5331030512377664e-07, + "loss": 8.9253, + "step": 22 + }, + { + "epoch": 0.0003974288084047553, + "grad_norm": 337.42098434286726, + "learning_rate": 2.6482440990213013e-07, + "loss": 8.0338, + "step": 23 + }, + { + "epoch": 0.0004147083218136577, + "grad_norm": 326.21281840056264, + "learning_rate": 2.763385146804836e-07, + "loss": 1.2993, + "step": 24 + }, + { + "epoch": 0.0004319878352225601, + "grad_norm": 336.88377139287144, + "learning_rate": 2.878526194588371e-07, + "loss": 8.3137, + "step": 25 + }, + { + "epoch": 0.00044926734863146256, + "grad_norm": 359.34186206620274, + "learning_rate": 2.993667242371906e-07, + "loss": 9.4434, + "step": 26 + }, + { + "epoch": 0.00046654686204036495, + "grad_norm": 308.7958090560352, + "learning_rate": 3.1088082901554404e-07, + "loss": 8.3611, + "step": 27 + }, + { + "epoch": 0.00048382637544926735, + "grad_norm": 284.49116773774807, + "learning_rate": 3.2239493379389753e-07, + "loss": 1.3752, + "step": 28 + }, + { + "epoch": 0.0005011058888581697, + "grad_norm": 215.60075465716855, + "learning_rate": 3.3390903857225107e-07, + "loss": 7.6644, + "step": 29 + }, + { + "epoch": 0.0005183854022670722, + "grad_norm": 315.66203599330726, + "learning_rate": 3.454231433506045e-07, + "loss": 8.164, + "step": 30 + }, + { + "epoch": 0.0005356649156759745, + "grad_norm": 177.4560927026417, + "learning_rate": 3.56937248128958e-07, + "loss": 7.8134, + "step": 31 + }, + { + "epoch": 0.000552944429084877, + "grad_norm": 198.90324208808994, + "learning_rate": 3.684513529073115e-07, + "loss": 8.353, + "step": 32 + }, + { + "epoch": 0.0005702239424937793, + "grad_norm": 144.14534080597846, + "learning_rate": 3.79965457685665e-07, + "loss": 7.8956, + "step": 33 + }, + { + "epoch": 0.0005875034559026818, + "grad_norm": 161.577887741479, + "learning_rate": 3.914795624640184e-07, + "loss": 7.3921, + "step": 34 + }, + { + "epoch": 0.0006047829693115842, + "grad_norm": 158.60362413852337, + "learning_rate": 4.029936672423719e-07, + "loss": 1.2395, + "step": 35 + }, + { + "epoch": 0.0006220624827204866, + "grad_norm": 237.4693765057713, + "learning_rate": 4.1450777202072546e-07, + "loss": 7.4983, + "step": 36 + }, + { + "epoch": 0.000639341996129389, + "grad_norm": 150.93552149175147, + "learning_rate": 4.260218767990789e-07, + "loss": 7.4312, + "step": 37 + }, + { + "epoch": 0.0006566215095382914, + "grad_norm": 181.99000793772902, + "learning_rate": 4.375359815774324e-07, + "loss": 7.2197, + "step": 38 + }, + { + "epoch": 0.0006739010229471938, + "grad_norm": 132.4022609952606, + "learning_rate": 4.490500863557859e-07, + "loss": 7.7458, + "step": 39 + }, + { + "epoch": 0.0006911805363560963, + "grad_norm": 128.85167644986024, + "learning_rate": 4.6056419113413937e-07, + "loss": 1.1522, + "step": 40 + }, + { + "epoch": 0.0007084600497649986, + "grad_norm": 120.91362466954976, + "learning_rate": 4.720782959124928e-07, + "loss": 7.4049, + "step": 41 + }, + { + "epoch": 0.000725739563173901, + "grad_norm": 119.30120004795259, + "learning_rate": 4.835924006908464e-07, + "loss": 7.327, + "step": 42 + }, + { + "epoch": 0.0007430190765828034, + "grad_norm": 124.49481653281475, + "learning_rate": 4.951065054691998e-07, + "loss": 7.3637, + "step": 43 + }, + { + "epoch": 0.0007602985899917058, + "grad_norm": 124.52575272375124, + "learning_rate": 5.066206102475533e-07, + "loss": 7.1553, + "step": 44 + }, + { + "epoch": 0.0007775781034006082, + "grad_norm": 159.44663586803878, + "learning_rate": 5.181347150259067e-07, + "loss": 6.5678, + "step": 45 + }, + { + "epoch": 0.0007948576168095106, + "grad_norm": 119.82646223687317, + "learning_rate": 5.296488198042603e-07, + "loss": 7.0403, + "step": 46 + }, + { + "epoch": 0.0008121371302184131, + "grad_norm": 116.79057317656827, + "learning_rate": 5.411629245826137e-07, + "loss": 7.0546, + "step": 47 + }, + { + "epoch": 0.0008294166436273154, + "grad_norm": 134.64345213414356, + "learning_rate": 5.526770293609672e-07, + "loss": 7.4153, + "step": 48 + }, + { + "epoch": 0.0008466961570362179, + "grad_norm": 157.16856498294425, + "learning_rate": 5.641911341393208e-07, + "loss": 7.0879, + "step": 49 + }, + { + "epoch": 0.0008639756704451202, + "grad_norm": 133.51323279671027, + "learning_rate": 5.757052389176742e-07, + "loss": 6.564, + "step": 50 + }, + { + "epoch": 0.0008812551838540227, + "grad_norm": 113.7970283618032, + "learning_rate": 5.872193436960277e-07, + "loss": 6.9803, + "step": 51 + }, + { + "epoch": 0.0008985346972629251, + "grad_norm": 146.52087082496945, + "learning_rate": 5.987334484743812e-07, + "loss": 6.341, + "step": 52 + }, + { + "epoch": 0.0009158142106718275, + "grad_norm": 126.05474245345326, + "learning_rate": 6.102475532527346e-07, + "loss": 7.0179, + "step": 53 + }, + { + "epoch": 0.0009330937240807299, + "grad_norm": 118.77050988037927, + "learning_rate": 6.217616580310881e-07, + "loss": 6.5658, + "step": 54 + }, + { + "epoch": 0.0009503732374896323, + "grad_norm": 98.2693590356539, + "learning_rate": 6.332757628094416e-07, + "loss": 6.4741, + "step": 55 + }, + { + "epoch": 0.0009676527508985347, + "grad_norm": 92.7776858145365, + "learning_rate": 6.447898675877951e-07, + "loss": 6.1341, + "step": 56 + }, + { + "epoch": 0.000984932264307437, + "grad_norm": 96.56010313874725, + "learning_rate": 6.563039723661486e-07, + "loss": 6.4058, + "step": 57 + }, + { + "epoch": 0.0010022117777163395, + "grad_norm": 85.85271676457059, + "learning_rate": 6.678180771445021e-07, + "loss": 6.0569, + "step": 58 + }, + { + "epoch": 0.001019491291125242, + "grad_norm": 103.70588542031446, + "learning_rate": 6.793321819228556e-07, + "loss": 5.6757, + "step": 59 + }, + { + "epoch": 0.0010367708045341444, + "grad_norm": 99.46883101604824, + "learning_rate": 6.90846286701209e-07, + "loss": 5.6859, + "step": 60 + }, + { + "epoch": 0.0010540503179430466, + "grad_norm": 102.2124583776169, + "learning_rate": 7.023603914795625e-07, + "loss": 5.9326, + "step": 61 + }, + { + "epoch": 0.001071329831351949, + "grad_norm": 115.53021219992776, + "learning_rate": 7.13874496257916e-07, + "loss": 5.3913, + "step": 62 + }, + { + "epoch": 0.0010886093447608515, + "grad_norm": 80.33888360113784, + "learning_rate": 7.253886010362694e-07, + "loss": 5.4661, + "step": 63 + }, + { + "epoch": 0.001105888858169754, + "grad_norm": 84.48154582792033, + "learning_rate": 7.36902705814623e-07, + "loss": 4.7215, + "step": 64 + }, + { + "epoch": 0.0011231683715786564, + "grad_norm": 92.23783186694641, + "learning_rate": 7.484168105929765e-07, + "loss": 5.2049, + "step": 65 + }, + { + "epoch": 0.0011404478849875587, + "grad_norm": 102.7557006517475, + "learning_rate": 7.5993091537133e-07, + "loss": 5.2504, + "step": 66 + }, + { + "epoch": 0.001157727398396461, + "grad_norm": 78.68254381127215, + "learning_rate": 7.714450201496834e-07, + "loss": 4.8389, + "step": 67 + }, + { + "epoch": 0.0011750069118053636, + "grad_norm": 87.9334296852975, + "learning_rate": 7.829591249280368e-07, + "loss": 5.1231, + "step": 68 + }, + { + "epoch": 0.001192286425214266, + "grad_norm": 76.45529307463734, + "learning_rate": 7.944732297063904e-07, + "loss": 4.9691, + "step": 69 + }, + { + "epoch": 0.0012095659386231685, + "grad_norm": 77.33150666063915, + "learning_rate": 8.059873344847438e-07, + "loss": 4.916, + "step": 70 + }, + { + "epoch": 0.0012268454520320707, + "grad_norm": 65.56068762979181, + "learning_rate": 8.175014392630974e-07, + "loss": 4.4488, + "step": 71 + }, + { + "epoch": 0.0012441249654409731, + "grad_norm": 64.35262159731663, + "learning_rate": 8.290155440414509e-07, + "loss": 1.1278, + "step": 72 + }, + { + "epoch": 0.0012614044788498756, + "grad_norm": 51.97883138373045, + "learning_rate": 8.405296488198043e-07, + "loss": 4.1333, + "step": 73 + }, + { + "epoch": 0.001278683992258778, + "grad_norm": 56.412356212689005, + "learning_rate": 8.520437535981578e-07, + "loss": 4.2249, + "step": 74 + }, + { + "epoch": 0.0012959635056676805, + "grad_norm": 67.59823705138867, + "learning_rate": 8.635578583765112e-07, + "loss": 4.4283, + "step": 75 + }, + { + "epoch": 0.0013132430190765827, + "grad_norm": 55.972596829249575, + "learning_rate": 8.750719631548648e-07, + "loss": 4.6912, + "step": 76 + }, + { + "epoch": 0.0013305225324854852, + "grad_norm": 62.87491469388081, + "learning_rate": 8.865860679332183e-07, + "loss": 4.1597, + "step": 77 + }, + { + "epoch": 0.0013478020458943876, + "grad_norm": 61.1067381039056, + "learning_rate": 8.981001727115717e-07, + "loss": 1.1352, + "step": 78 + }, + { + "epoch": 0.00136508155930329, + "grad_norm": 59.33141250617555, + "learning_rate": 9.096142774899253e-07, + "loss": 3.859, + "step": 79 + }, + { + "epoch": 0.0013823610727121925, + "grad_norm": 58.510309840353685, + "learning_rate": 9.211283822682787e-07, + "loss": 1.2659, + "step": 80 + }, + { + "epoch": 0.0013996405861210948, + "grad_norm": 35.18713616175102, + "learning_rate": 9.326424870466322e-07, + "loss": 3.6612, + "step": 81 + }, + { + "epoch": 0.0014169200995299972, + "grad_norm": 58.05046675214005, + "learning_rate": 9.441565918249856e-07, + "loss": 4.2279, + "step": 82 + }, + { + "epoch": 0.0014341996129388997, + "grad_norm": 43.249811194761385, + "learning_rate": 9.55670696603339e-07, + "loss": 3.6023, + "step": 83 + }, + { + "epoch": 0.001451479126347802, + "grad_norm": 43.69875620403071, + "learning_rate": 9.671848013816928e-07, + "loss": 1.1256, + "step": 84 + }, + { + "epoch": 0.0014687586397567046, + "grad_norm": 64.27883552346233, + "learning_rate": 9.786989061600461e-07, + "loss": 4.287, + "step": 85 + }, + { + "epoch": 0.0014860381531656068, + "grad_norm": 43.05201527987516, + "learning_rate": 9.902130109383997e-07, + "loss": 3.5335, + "step": 86 + }, + { + "epoch": 0.0015033176665745092, + "grad_norm": 46.726159898045836, + "learning_rate": 1.001727115716753e-06, + "loss": 3.1065, + "step": 87 + }, + { + "epoch": 0.0015205971799834117, + "grad_norm": 71.13771238823236, + "learning_rate": 1.0132412204951066e-06, + "loss": 3.6548, + "step": 88 + }, + { + "epoch": 0.0015378766933923141, + "grad_norm": 113.63718994643989, + "learning_rate": 1.02475532527346e-06, + "loss": 4.0386, + "step": 89 + }, + { + "epoch": 0.0015551562068012164, + "grad_norm": 46.21302346176177, + "learning_rate": 1.0362694300518134e-06, + "loss": 3.4353, + "step": 90 + }, + { + "epoch": 0.0015724357202101188, + "grad_norm": 44.74727159649899, + "learning_rate": 1.0477835348301672e-06, + "loss": 3.5417, + "step": 91 + }, + { + "epoch": 0.0015897152336190213, + "grad_norm": 24.998588994289502, + "learning_rate": 1.0592976396085205e-06, + "loss": 3.1706, + "step": 92 + }, + { + "epoch": 0.0016069947470279237, + "grad_norm": 30.01394282905727, + "learning_rate": 1.070811744386874e-06, + "loss": 3.0255, + "step": 93 + }, + { + "epoch": 0.0016242742604368262, + "grad_norm": 31.804750671654435, + "learning_rate": 1.0823258491652274e-06, + "loss": 1.1392, + "step": 94 + }, + { + "epoch": 0.0016415537738457284, + "grad_norm": 30.106360806354843, + "learning_rate": 1.093839953943581e-06, + "loss": 2.7393, + "step": 95 + }, + { + "epoch": 0.0016588332872546309, + "grad_norm": 98.89199301035532, + "learning_rate": 1.1053540587219345e-06, + "loss": 3.5571, + "step": 96 + }, + { + "epoch": 0.0016761128006635333, + "grad_norm": 26.92042937878804, + "learning_rate": 1.116868163500288e-06, + "loss": 2.9823, + "step": 97 + }, + { + "epoch": 0.0016933923140724358, + "grad_norm": 21.568742325407218, + "learning_rate": 1.1283822682786416e-06, + "loss": 3.0226, + "step": 98 + }, + { + "epoch": 0.0017106718274813382, + "grad_norm": 37.70647559814462, + "learning_rate": 1.139896373056995e-06, + "loss": 3.3783, + "step": 99 + }, + { + "epoch": 0.0017279513408902404, + "grad_norm": 25.797206434961396, + "learning_rate": 1.1514104778353484e-06, + "loss": 2.7181, + "step": 100 + }, + { + "epoch": 0.0017452308542991429, + "grad_norm": 25.481911206138427, + "learning_rate": 1.1629245826137018e-06, + "loss": 3.0602, + "step": 101 + }, + { + "epoch": 0.0017625103677080453, + "grad_norm": 20.218042420904172, + "learning_rate": 1.1744386873920553e-06, + "loss": 3.0351, + "step": 102 + }, + { + "epoch": 0.0017797898811169478, + "grad_norm": 30.895842761882836, + "learning_rate": 1.1859527921704089e-06, + "loss": 2.8064, + "step": 103 + }, + { + "epoch": 0.0017970693945258502, + "grad_norm": 23.56006192478716, + "learning_rate": 1.1974668969487624e-06, + "loss": 3.0541, + "step": 104 + }, + { + "epoch": 0.0018143489079347525, + "grad_norm": 18.57146258862623, + "learning_rate": 1.208981001727116e-06, + "loss": 3.2013, + "step": 105 + }, + { + "epoch": 0.001831628421343655, + "grad_norm": 25.147163094052566, + "learning_rate": 1.2204951065054693e-06, + "loss": 2.8263, + "step": 106 + }, + { + "epoch": 0.0018489079347525574, + "grad_norm": 23.736065663825244, + "learning_rate": 1.2320092112838228e-06, + "loss": 2.6456, + "step": 107 + }, + { + "epoch": 0.0018661874481614598, + "grad_norm": 17.751710042875125, + "learning_rate": 1.2435233160621762e-06, + "loss": 2.7754, + "step": 108 + }, + { + "epoch": 0.0018834669615703623, + "grad_norm": 17.576446655661606, + "learning_rate": 1.2550374208405297e-06, + "loss": 3.0138, + "step": 109 + }, + { + "epoch": 0.0019007464749792645, + "grad_norm": 26.306701564723873, + "learning_rate": 1.2665515256188832e-06, + "loss": 2.8745, + "step": 110 + }, + { + "epoch": 0.001918025988388167, + "grad_norm": 21.009553610904725, + "learning_rate": 1.2780656303972366e-06, + "loss": 2.8741, + "step": 111 + }, + { + "epoch": 0.0019353055017970694, + "grad_norm": 19.46753444524016, + "learning_rate": 1.2895797351755901e-06, + "loss": 3.1816, + "step": 112 + }, + { + "epoch": 0.0019525850152059719, + "grad_norm": 20.55887446745726, + "learning_rate": 1.3010938399539437e-06, + "loss": 1.0294, + "step": 113 + }, + { + "epoch": 0.001969864528614874, + "grad_norm": 18.464401640514055, + "learning_rate": 1.3126079447322972e-06, + "loss": 2.9009, + "step": 114 + }, + { + "epoch": 0.0019871440420237765, + "grad_norm": 22.07305160078858, + "learning_rate": 1.3241220495106507e-06, + "loss": 3.0766, + "step": 115 + }, + { + "epoch": 0.002004423555432679, + "grad_norm": 19.676337905828504, + "learning_rate": 1.3356361542890043e-06, + "loss": 2.9331, + "step": 116 + }, + { + "epoch": 0.0020217030688415814, + "grad_norm": 16.34655120357469, + "learning_rate": 1.3471502590673576e-06, + "loss": 2.446, + "step": 117 + }, + { + "epoch": 0.002038982582250484, + "grad_norm": 16.434878289468788, + "learning_rate": 1.3586643638457112e-06, + "loss": 2.9226, + "step": 118 + }, + { + "epoch": 0.0020562620956593863, + "grad_norm": 16.7003501200908, + "learning_rate": 1.3701784686240647e-06, + "loss": 2.7439, + "step": 119 + }, + { + "epoch": 0.002073541609068289, + "grad_norm": 13.23806956865185, + "learning_rate": 1.381692573402418e-06, + "loss": 2.5957, + "step": 120 + }, + { + "epoch": 0.0020908211224771912, + "grad_norm": 20.564612381205002, + "learning_rate": 1.3932066781807716e-06, + "loss": 2.5761, + "step": 121 + }, + { + "epoch": 0.0021081006358860933, + "grad_norm": 23.56266706900898, + "learning_rate": 1.404720782959125e-06, + "loss": 2.7052, + "step": 122 + }, + { + "epoch": 0.0021253801492949957, + "grad_norm": 15.683105054511419, + "learning_rate": 1.4162348877374785e-06, + "loss": 2.674, + "step": 123 + }, + { + "epoch": 0.002142659662703898, + "grad_norm": 15.850185934910344, + "learning_rate": 1.427748992515832e-06, + "loss": 2.68, + "step": 124 + }, + { + "epoch": 0.0021599391761128006, + "grad_norm": 18.15931912078463, + "learning_rate": 1.4392630972941853e-06, + "loss": 2.4464, + "step": 125 + }, + { + "epoch": 0.002177218689521703, + "grad_norm": 20.606538238289513, + "learning_rate": 1.4507772020725389e-06, + "loss": 2.5009, + "step": 126 + }, + { + "epoch": 0.0021944982029306055, + "grad_norm": 15.358407603205778, + "learning_rate": 1.4622913068508926e-06, + "loss": 2.374, + "step": 127 + }, + { + "epoch": 0.002211777716339508, + "grad_norm": 22.876431750668456, + "learning_rate": 1.473805411629246e-06, + "loss": 1.372, + "step": 128 + }, + { + "epoch": 0.0022290572297484104, + "grad_norm": 22.714092890779224, + "learning_rate": 1.4853195164075995e-06, + "loss": 2.6283, + "step": 129 + }, + { + "epoch": 0.002246336743157313, + "grad_norm": 12.424293041038164, + "learning_rate": 1.496833621185953e-06, + "loss": 2.5435, + "step": 130 + }, + { + "epoch": 0.0022636162565662153, + "grad_norm": 12.765614588188402, + "learning_rate": 1.5083477259643064e-06, + "loss": 2.6769, + "step": 131 + }, + { + "epoch": 0.0022808957699751173, + "grad_norm": 15.583309935053334, + "learning_rate": 1.51986183074266e-06, + "loss": 2.5211, + "step": 132 + }, + { + "epoch": 0.0022981752833840198, + "grad_norm": 18.726067553944908, + "learning_rate": 1.5313759355210135e-06, + "loss": 2.2918, + "step": 133 + }, + { + "epoch": 0.002315454796792922, + "grad_norm": 12.96845540351683, + "learning_rate": 1.5428900402993668e-06, + "loss": 2.6392, + "step": 134 + }, + { + "epoch": 0.0023327343102018247, + "grad_norm": 13.064412541668345, + "learning_rate": 1.5544041450777204e-06, + "loss": 2.4383, + "step": 135 + }, + { + "epoch": 0.002350013823610727, + "grad_norm": 11.774776092856401, + "learning_rate": 1.5659182498560737e-06, + "loss": 2.3725, + "step": 136 + }, + { + "epoch": 0.0023672933370196296, + "grad_norm": 16.414137215115666, + "learning_rate": 1.5774323546344272e-06, + "loss": 2.4078, + "step": 137 + }, + { + "epoch": 0.002384572850428532, + "grad_norm": 22.927970438756073, + "learning_rate": 1.5889464594127808e-06, + "loss": 2.3605, + "step": 138 + }, + { + "epoch": 0.0024018523638374345, + "grad_norm": 14.576983099329613, + "learning_rate": 1.600460564191134e-06, + "loss": 2.7586, + "step": 139 + }, + { + "epoch": 0.002419131877246337, + "grad_norm": 12.936761445730053, + "learning_rate": 1.6119746689694876e-06, + "loss": 2.5382, + "step": 140 + }, + { + "epoch": 0.002436411390655239, + "grad_norm": 15.368702204637746, + "learning_rate": 1.6234887737478414e-06, + "loss": 2.5388, + "step": 141 + }, + { + "epoch": 0.0024536909040641414, + "grad_norm": 11.015369706361081, + "learning_rate": 1.6350028785261947e-06, + "loss": 2.5213, + "step": 142 + }, + { + "epoch": 0.002470970417473044, + "grad_norm": 13.335593612830024, + "learning_rate": 1.6465169833045483e-06, + "loss": 2.7123, + "step": 143 + }, + { + "epoch": 0.0024882499308819463, + "grad_norm": 17.037386191577305, + "learning_rate": 1.6580310880829018e-06, + "loss": 1.2075, + "step": 144 + }, + { + "epoch": 0.0025055294442908487, + "grad_norm": 9.01588379548285, + "learning_rate": 1.6695451928612552e-06, + "loss": 2.393, + "step": 145 + }, + { + "epoch": 0.002522808957699751, + "grad_norm": 17.762361730562358, + "learning_rate": 1.6810592976396087e-06, + "loss": 1.3403, + "step": 146 + }, + { + "epoch": 0.0025400884711086536, + "grad_norm": 12.014611733171808, + "learning_rate": 1.6925734024179622e-06, + "loss": 2.1246, + "step": 147 + }, + { + "epoch": 0.002557367984517556, + "grad_norm": 16.704903287570172, + "learning_rate": 1.7040875071963156e-06, + "loss": 1.0283, + "step": 148 + }, + { + "epoch": 0.0025746474979264585, + "grad_norm": 9.711607173615073, + "learning_rate": 1.7156016119746691e-06, + "loss": 2.5095, + "step": 149 + }, + { + "epoch": 0.002591927011335361, + "grad_norm": 9.746009780797493, + "learning_rate": 1.7271157167530224e-06, + "loss": 2.454, + "step": 150 + }, + { + "epoch": 0.002609206524744263, + "grad_norm": 9.151810193956388, + "learning_rate": 1.738629821531376e-06, + "loss": 2.3223, + "step": 151 + }, + { + "epoch": 0.0026264860381531654, + "grad_norm": 14.195461163987863, + "learning_rate": 1.7501439263097295e-06, + "loss": 2.3091, + "step": 152 + }, + { + "epoch": 0.002643765551562068, + "grad_norm": 9.539351795103203, + "learning_rate": 1.7616580310880829e-06, + "loss": 2.3858, + "step": 153 + }, + { + "epoch": 0.0026610450649709703, + "grad_norm": 11.655454158058445, + "learning_rate": 1.7731721358664366e-06, + "loss": 2.6101, + "step": 154 + }, + { + "epoch": 0.002678324578379873, + "grad_norm": 8.079010467830924, + "learning_rate": 1.7846862406447902e-06, + "loss": 2.4461, + "step": 155 + }, + { + "epoch": 0.0026956040917887752, + "grad_norm": 9.157070106381882, + "learning_rate": 1.7962003454231435e-06, + "loss": 2.3681, + "step": 156 + }, + { + "epoch": 0.0027128836051976777, + "grad_norm": 9.397816100634454, + "learning_rate": 1.807714450201497e-06, + "loss": 2.1762, + "step": 157 + }, + { + "epoch": 0.00273016311860658, + "grad_norm": 8.645476894524416, + "learning_rate": 1.8192285549798506e-06, + "loss": 2.3695, + "step": 158 + }, + { + "epoch": 0.0027474426320154826, + "grad_norm": 10.129970523254583, + "learning_rate": 1.830742659758204e-06, + "loss": 2.3692, + "step": 159 + }, + { + "epoch": 0.002764722145424385, + "grad_norm": 9.463011789284737, + "learning_rate": 1.8422567645365575e-06, + "loss": 2.3591, + "step": 160 + }, + { + "epoch": 0.002782001658833287, + "grad_norm": 16.919469757491438, + "learning_rate": 1.853770869314911e-06, + "loss": 1.6574, + "step": 161 + }, + { + "epoch": 0.0027992811722421895, + "grad_norm": 6.335228821787579, + "learning_rate": 1.8652849740932643e-06, + "loss": 2.2994, + "step": 162 + }, + { + "epoch": 0.002816560685651092, + "grad_norm": 8.490413694145094, + "learning_rate": 1.8767990788716179e-06, + "loss": 2.4191, + "step": 163 + }, + { + "epoch": 0.0028338401990599944, + "grad_norm": 8.256979951307194, + "learning_rate": 1.8883131836499712e-06, + "loss": 2.4893, + "step": 164 + }, + { + "epoch": 0.002851119712468897, + "grad_norm": 10.808688597649128, + "learning_rate": 1.8998272884283248e-06, + "loss": 2.3578, + "step": 165 + }, + { + "epoch": 0.0028683992258777993, + "grad_norm": 11.398075481897866, + "learning_rate": 1.911341393206678e-06, + "loss": 1.9642, + "step": 166 + }, + { + "epoch": 0.0028856787392867018, + "grad_norm": 16.115961305675146, + "learning_rate": 1.9228554979850316e-06, + "loss": 1.1886, + "step": 167 + }, + { + "epoch": 0.002902958252695604, + "grad_norm": 10.207121207720823, + "learning_rate": 1.9343696027633856e-06, + "loss": 2.1211, + "step": 168 + }, + { + "epoch": 0.0029202377661045067, + "grad_norm": 8.94129816972982, + "learning_rate": 1.9458837075417387e-06, + "loss": 2.1684, + "step": 169 + }, + { + "epoch": 0.002937517279513409, + "grad_norm": 8.617411900033229, + "learning_rate": 1.9573978123200923e-06, + "loss": 2.1762, + "step": 170 + }, + { + "epoch": 0.002954796792922311, + "grad_norm": 7.9891921597932924, + "learning_rate": 1.968911917098446e-06, + "loss": 2.0693, + "step": 171 + }, + { + "epoch": 0.0029720763063312136, + "grad_norm": 8.446957089125721, + "learning_rate": 1.9804260218767994e-06, + "loss": 2.1445, + "step": 172 + }, + { + "epoch": 0.002989355819740116, + "grad_norm": 8.326542308469355, + "learning_rate": 1.991940126655153e-06, + "loss": 2.3497, + "step": 173 + }, + { + "epoch": 0.0030066353331490185, + "grad_norm": 9.83706107227379, + "learning_rate": 2.003454231433506e-06, + "loss": 2.1685, + "step": 174 + }, + { + "epoch": 0.003023914846557921, + "grad_norm": 7.782636010680279, + "learning_rate": 2.0149683362118596e-06, + "loss": 2.2794, + "step": 175 + }, + { + "epoch": 0.0030411943599668234, + "grad_norm": 9.368979106857175, + "learning_rate": 2.026482440990213e-06, + "loss": 1.8981, + "step": 176 + }, + { + "epoch": 0.003058473873375726, + "grad_norm": 7.661442848119474, + "learning_rate": 2.0379965457685666e-06, + "loss": 2.0225, + "step": 177 + }, + { + "epoch": 0.0030757533867846283, + "grad_norm": 7.963521471406552, + "learning_rate": 2.04951065054692e-06, + "loss": 2.2012, + "step": 178 + }, + { + "epoch": 0.0030930329001935307, + "grad_norm": 16.596272027760133, + "learning_rate": 2.0610247553252733e-06, + "loss": 1.14, + "step": 179 + }, + { + "epoch": 0.0031103124136024327, + "grad_norm": 6.9976587832210235, + "learning_rate": 2.072538860103627e-06, + "loss": 2.0849, + "step": 180 + }, + { + "epoch": 0.003127591927011335, + "grad_norm": 10.420385034609554, + "learning_rate": 2.084052964881981e-06, + "loss": 1.9338, + "step": 181 + }, + { + "epoch": 0.0031448714404202376, + "grad_norm": 7.212774768682671, + "learning_rate": 2.0955670696603344e-06, + "loss": 2.1588, + "step": 182 + }, + { + "epoch": 0.00316215095382914, + "grad_norm": 8.974902154322049, + "learning_rate": 2.1070811744386875e-06, + "loss": 2.1601, + "step": 183 + }, + { + "epoch": 0.0031794304672380425, + "grad_norm": 8.957065451055664, + "learning_rate": 2.118595279217041e-06, + "loss": 2.0331, + "step": 184 + }, + { + "epoch": 0.003196709980646945, + "grad_norm": 13.923461248450915, + "learning_rate": 2.1301093839953946e-06, + "loss": 1.9935, + "step": 185 + }, + { + "epoch": 0.0032139894940558474, + "grad_norm": 10.649751201408533, + "learning_rate": 2.141623488773748e-06, + "loss": 2.0947, + "step": 186 + }, + { + "epoch": 0.00323126900746475, + "grad_norm": 7.195064200969944, + "learning_rate": 2.1531375935521017e-06, + "loss": 2.2394, + "step": 187 + }, + { + "epoch": 0.0032485485208736523, + "grad_norm": 22.507751058644644, + "learning_rate": 2.1646516983304548e-06, + "loss": 1.7992, + "step": 188 + }, + { + "epoch": 0.003265828034282555, + "grad_norm": 7.477988778889132, + "learning_rate": 2.1761658031088083e-06, + "loss": 2.2616, + "step": 189 + }, + { + "epoch": 0.003283107547691457, + "grad_norm": 7.709559577835019, + "learning_rate": 2.187679907887162e-06, + "loss": 2.1514, + "step": 190 + }, + { + "epoch": 0.0033003870611003593, + "grad_norm": 8.043772027528593, + "learning_rate": 2.1991940126655154e-06, + "loss": 2.1309, + "step": 191 + }, + { + "epoch": 0.0033176665745092617, + "grad_norm": 7.459733442746371, + "learning_rate": 2.210708117443869e-06, + "loss": 2.0358, + "step": 192 + }, + { + "epoch": 0.003334946087918164, + "grad_norm": 7.939356124352186, + "learning_rate": 2.222222222222222e-06, + "loss": 2.0006, + "step": 193 + }, + { + "epoch": 0.0033522256013270666, + "grad_norm": 8.02904586826385, + "learning_rate": 2.233736327000576e-06, + "loss": 1.6911, + "step": 194 + }, + { + "epoch": 0.003369505114735969, + "grad_norm": 11.003634594395349, + "learning_rate": 2.2452504317789296e-06, + "loss": 1.926, + "step": 195 + }, + { + "epoch": 0.0033867846281448715, + "grad_norm": 6.632835862654047, + "learning_rate": 2.256764536557283e-06, + "loss": 1.7448, + "step": 196 + }, + { + "epoch": 0.003404064141553774, + "grad_norm": 7.120911130619321, + "learning_rate": 2.2682786413356362e-06, + "loss": 2.1379, + "step": 197 + }, + { + "epoch": 0.0034213436549626764, + "grad_norm": 7.135269285183938, + "learning_rate": 2.27979274611399e-06, + "loss": 1.6438, + "step": 198 + }, + { + "epoch": 0.003438623168371579, + "grad_norm": 7.329329205464777, + "learning_rate": 2.2913068508923433e-06, + "loss": 1.6112, + "step": 199 + }, + { + "epoch": 0.003455902681780481, + "grad_norm": 25.285911526050995, + "learning_rate": 2.302820955670697e-06, + "loss": 1.5685, + "step": 200 + }, + { + "epoch": 0.0034731821951893833, + "grad_norm": 7.6960237536271014, + "learning_rate": 2.3143350604490504e-06, + "loss": 1.8717, + "step": 201 + }, + { + "epoch": 0.0034904617085982858, + "grad_norm": 13.274878315362187, + "learning_rate": 2.3258491652274035e-06, + "loss": 1.0642, + "step": 202 + }, + { + "epoch": 0.0035077412220071882, + "grad_norm": 6.333869415791177, + "learning_rate": 2.337363270005757e-06, + "loss": 1.958, + "step": 203 + }, + { + "epoch": 0.0035250207354160907, + "grad_norm": 8.062058917090775, + "learning_rate": 2.3488773747841106e-06, + "loss": 1.9545, + "step": 204 + }, + { + "epoch": 0.003542300248824993, + "grad_norm": 7.687337426677733, + "learning_rate": 2.360391479562464e-06, + "loss": 1.5414, + "step": 205 + }, + { + "epoch": 0.0035595797622338956, + "grad_norm": 7.921748563354151, + "learning_rate": 2.3719055843408177e-06, + "loss": 1.9237, + "step": 206 + }, + { + "epoch": 0.003576859275642798, + "grad_norm": 10.956616153582681, + "learning_rate": 2.383419689119171e-06, + "loss": 1.6555, + "step": 207 + }, + { + "epoch": 0.0035941387890517005, + "grad_norm": 6.496604905581522, + "learning_rate": 2.394933793897525e-06, + "loss": 1.8886, + "step": 208 + }, + { + "epoch": 0.003611418302460603, + "grad_norm": 7.299310902623465, + "learning_rate": 2.4064478986758784e-06, + "loss": 1.6931, + "step": 209 + }, + { + "epoch": 0.003628697815869505, + "grad_norm": 8.462532668771395, + "learning_rate": 2.417962003454232e-06, + "loss": 1.5013, + "step": 210 + }, + { + "epoch": 0.0036459773292784074, + "grad_norm": 7.8091010735698605, + "learning_rate": 2.429476108232585e-06, + "loss": 1.6414, + "step": 211 + }, + { + "epoch": 0.00366325684268731, + "grad_norm": 7.936266652347465, + "learning_rate": 2.4409902130109386e-06, + "loss": 1.5563, + "step": 212 + }, + { + "epoch": 0.0036805363560962123, + "grad_norm": 6.378473642247383, + "learning_rate": 2.452504317789292e-06, + "loss": 1.7806, + "step": 213 + }, + { + "epoch": 0.0036978158695051147, + "grad_norm": 6.099030247229362, + "learning_rate": 2.4640184225676456e-06, + "loss": 1.5794, + "step": 214 + }, + { + "epoch": 0.003715095382914017, + "grad_norm": 7.3194126369233015, + "learning_rate": 2.475532527345999e-06, + "loss": 1.5061, + "step": 215 + }, + { + "epoch": 0.0037323748963229196, + "grad_norm": 6.480726385659847, + "learning_rate": 2.4870466321243523e-06, + "loss": 1.7327, + "step": 216 + }, + { + "epoch": 0.003749654409731822, + "grad_norm": 9.309459756487703, + "learning_rate": 2.498560736902706e-06, + "loss": 1.5912, + "step": 217 + }, + { + "epoch": 0.0037669339231407245, + "grad_norm": 7.719281008738882, + "learning_rate": 2.5100748416810594e-06, + "loss": 1.8251, + "step": 218 + }, + { + "epoch": 0.0037842134365496266, + "grad_norm": 6.35850845899653, + "learning_rate": 2.521588946459413e-06, + "loss": 1.489, + "step": 219 + }, + { + "epoch": 0.003801492949958529, + "grad_norm": 9.846599588669369, + "learning_rate": 2.5331030512377665e-06, + "loss": 1.7481, + "step": 220 + }, + { + "epoch": 0.0038187724633674315, + "grad_norm": 6.512541296953125, + "learning_rate": 2.5446171560161196e-06, + "loss": 1.9014, + "step": 221 + }, + { + "epoch": 0.003836051976776334, + "grad_norm": 6.799189210485867, + "learning_rate": 2.556131260794473e-06, + "loss": 1.5913, + "step": 222 + }, + { + "epoch": 0.0038533314901852364, + "grad_norm": 6.320425919773886, + "learning_rate": 2.5676453655728267e-06, + "loss": 1.9269, + "step": 223 + }, + { + "epoch": 0.003870611003594139, + "grad_norm": 6.655696821035603, + "learning_rate": 2.5791594703511802e-06, + "loss": 1.7685, + "step": 224 + }, + { + "epoch": 0.0038878905170030413, + "grad_norm": 5.6327455807145, + "learning_rate": 2.5906735751295338e-06, + "loss": 1.6976, + "step": 225 + }, + { + "epoch": 0.0039051700304119437, + "grad_norm": 7.098993019120141, + "learning_rate": 2.6021876799078873e-06, + "loss": 1.729, + "step": 226 + }, + { + "epoch": 0.003922449543820846, + "grad_norm": 7.3513459575721365, + "learning_rate": 2.6137017846862404e-06, + "loss": 1.8254, + "step": 227 + }, + { + "epoch": 0.003939729057229748, + "grad_norm": 7.954641476207313, + "learning_rate": 2.6252158894645944e-06, + "loss": 1.5284, + "step": 228 + }, + { + "epoch": 0.003957008570638651, + "grad_norm": 8.669902993831139, + "learning_rate": 2.636729994242948e-06, + "loss": 1.3994, + "step": 229 + }, + { + "epoch": 0.003974288084047553, + "grad_norm": 6.98768774113766, + "learning_rate": 2.6482440990213015e-06, + "loss": 1.6165, + "step": 230 + }, + { + "epoch": 0.0039915675974564555, + "grad_norm": 9.837953544792091, + "learning_rate": 2.659758203799655e-06, + "loss": 0.9678, + "step": 231 + }, + { + "epoch": 0.004008847110865358, + "grad_norm": 7.849392204756464, + "learning_rate": 2.6712723085780086e-06, + "loss": 1.7937, + "step": 232 + }, + { + "epoch": 0.00402612662427426, + "grad_norm": 5.118416410530837, + "learning_rate": 2.682786413356362e-06, + "loss": 1.7001, + "step": 233 + }, + { + "epoch": 0.004043406137683163, + "grad_norm": 5.378465129618938, + "learning_rate": 2.6943005181347152e-06, + "loss": 1.9932, + "step": 234 + }, + { + "epoch": 0.004060685651092065, + "grad_norm": 5.724351434643975, + "learning_rate": 2.705814622913069e-06, + "loss": 1.7301, + "step": 235 + }, + { + "epoch": 0.004077965164500968, + "grad_norm": 6.871149043535187, + "learning_rate": 2.7173287276914223e-06, + "loss": 1.7152, + "step": 236 + }, + { + "epoch": 0.00409524467790987, + "grad_norm": 7.512456024363748, + "learning_rate": 2.728842832469776e-06, + "loss": 1.4154, + "step": 237 + }, + { + "epoch": 0.004112524191318773, + "grad_norm": 6.2479042349709735, + "learning_rate": 2.7403569372481294e-06, + "loss": 1.5442, + "step": 238 + }, + { + "epoch": 0.004129803704727675, + "grad_norm": 5.420661010812501, + "learning_rate": 2.7518710420264825e-06, + "loss": 1.6424, + "step": 239 + }, + { + "epoch": 0.004147083218136578, + "grad_norm": 7.634571924109227, + "learning_rate": 2.763385146804836e-06, + "loss": 1.513, + "step": 240 + }, + { + "epoch": 0.00416436273154548, + "grad_norm": 5.68060874243658, + "learning_rate": 2.7748992515831896e-06, + "loss": 1.6299, + "step": 241 + }, + { + "epoch": 0.0041816422449543825, + "grad_norm": 5.131295165196676, + "learning_rate": 2.786413356361543e-06, + "loss": 1.4799, + "step": 242 + }, + { + "epoch": 0.004198921758363285, + "grad_norm": 7.347554514017072, + "learning_rate": 2.7979274611398967e-06, + "loss": 1.8131, + "step": 243 + }, + { + "epoch": 0.0042162012717721865, + "grad_norm": 5.6015228386930245, + "learning_rate": 2.80944156591825e-06, + "loss": 1.3481, + "step": 244 + }, + { + "epoch": 0.004233480785181089, + "grad_norm": 5.338096607356029, + "learning_rate": 2.8209556706966034e-06, + "loss": 1.7048, + "step": 245 + }, + { + "epoch": 0.004250760298589991, + "grad_norm": 5.4759349753978706, + "learning_rate": 2.832469775474957e-06, + "loss": 1.7531, + "step": 246 + }, + { + "epoch": 0.004268039811998894, + "grad_norm": 4.744991970915898, + "learning_rate": 2.8439838802533105e-06, + "loss": 1.5703, + "step": 247 + }, + { + "epoch": 0.004285319325407796, + "grad_norm": 5.564079215534249, + "learning_rate": 2.855497985031664e-06, + "loss": 1.5947, + "step": 248 + }, + { + "epoch": 0.004302598838816699, + "grad_norm": 4.319053079893507, + "learning_rate": 2.867012089810017e-06, + "loss": 1.5925, + "step": 249 + }, + { + "epoch": 0.004319878352225601, + "grad_norm": 6.828129221494431, + "learning_rate": 2.8785261945883707e-06, + "loss": 1.3542, + "step": 250 + }, + { + "epoch": 0.004337157865634504, + "grad_norm": 8.996276568223038, + "learning_rate": 2.8900402993667242e-06, + "loss": 1.2842, + "step": 251 + }, + { + "epoch": 0.004354437379043406, + "grad_norm": 6.249205745025599, + "learning_rate": 2.9015544041450778e-06, + "loss": 1.5277, + "step": 252 + }, + { + "epoch": 0.0043717168924523086, + "grad_norm": 6.958921694672148, + "learning_rate": 2.9130685089234313e-06, + "loss": 0.8494, + "step": 253 + }, + { + "epoch": 0.004388996405861211, + "grad_norm": 7.582279225375931, + "learning_rate": 2.9245826137017853e-06, + "loss": 1.5187, + "step": 254 + }, + { + "epoch": 0.0044062759192701135, + "grad_norm": 5.5907864455368825, + "learning_rate": 2.936096718480139e-06, + "loss": 1.4036, + "step": 255 + }, + { + "epoch": 0.004423555432679016, + "grad_norm": 4.946974100061778, + "learning_rate": 2.947610823258492e-06, + "loss": 1.6925, + "step": 256 + }, + { + "epoch": 0.004440834946087918, + "grad_norm": 6.204454169380135, + "learning_rate": 2.9591249280368455e-06, + "loss": 1.5966, + "step": 257 + }, + { + "epoch": 0.004458114459496821, + "grad_norm": 5.844301655111674, + "learning_rate": 2.970639032815199e-06, + "loss": 1.5441, + "step": 258 + }, + { + "epoch": 0.004475393972905723, + "grad_norm": 6.8177290332925224, + "learning_rate": 2.9821531375935526e-06, + "loss": 1.5776, + "step": 259 + }, + { + "epoch": 0.004492673486314626, + "grad_norm": 4.938514222741019, + "learning_rate": 2.993667242371906e-06, + "loss": 1.3994, + "step": 260 + }, + { + "epoch": 0.004509952999723528, + "grad_norm": 6.754304647551013, + "learning_rate": 3.0051813471502592e-06, + "loss": 1.4576, + "step": 261 + }, + { + "epoch": 0.004527232513132431, + "grad_norm": 5.493613968093377, + "learning_rate": 3.0166954519286128e-06, + "loss": 1.7586, + "step": 262 + }, + { + "epoch": 0.004544512026541332, + "grad_norm": 6.439244654417436, + "learning_rate": 3.0282095567069663e-06, + "loss": 1.7126, + "step": 263 + }, + { + "epoch": 0.004561791539950235, + "grad_norm": 5.830103356656438, + "learning_rate": 3.03972366148532e-06, + "loss": 1.5941, + "step": 264 + }, + { + "epoch": 0.004579071053359137, + "grad_norm": 5.557444970128106, + "learning_rate": 3.0512377662636734e-06, + "loss": 1.4193, + "step": 265 + }, + { + "epoch": 0.0045963505667680395, + "grad_norm": 5.160863294844622, + "learning_rate": 3.062751871042027e-06, + "loss": 1.6376, + "step": 266 + }, + { + "epoch": 0.004613630080176942, + "grad_norm": 5.842603021046727, + "learning_rate": 3.07426597582038e-06, + "loss": 0.8365, + "step": 267 + }, + { + "epoch": 0.004630909593585844, + "grad_norm": 4.840632225728925, + "learning_rate": 3.0857800805987336e-06, + "loss": 1.392, + "step": 268 + }, + { + "epoch": 0.004648189106994747, + "grad_norm": 5.9599857591739935, + "learning_rate": 3.097294185377087e-06, + "loss": 1.3429, + "step": 269 + }, + { + "epoch": 0.004665468620403649, + "grad_norm": 5.7901254650899086, + "learning_rate": 3.1088082901554407e-06, + "loss": 1.3444, + "step": 270 + }, + { + "epoch": 0.004682748133812552, + "grad_norm": 5.585110855556, + "learning_rate": 3.1203223949337942e-06, + "loss": 1.3758, + "step": 271 + }, + { + "epoch": 0.004700027647221454, + "grad_norm": 5.9518466016263805, + "learning_rate": 3.1318364997121474e-06, + "loss": 0.8804, + "step": 272 + }, + { + "epoch": 0.004717307160630357, + "grad_norm": 4.433412997435442, + "learning_rate": 3.143350604490501e-06, + "loss": 1.4936, + "step": 273 + }, + { + "epoch": 0.004734586674039259, + "grad_norm": 8.891523552733826, + "learning_rate": 3.1548647092688545e-06, + "loss": 1.4794, + "step": 274 + }, + { + "epoch": 0.004751866187448162, + "grad_norm": 5.092468285349666, + "learning_rate": 3.166378814047208e-06, + "loss": 1.3947, + "step": 275 + }, + { + "epoch": 0.004769145700857064, + "grad_norm": 5.620593997219968, + "learning_rate": 3.1778929188255615e-06, + "loss": 1.2679, + "step": 276 + }, + { + "epoch": 0.0047864252142659665, + "grad_norm": 9.382261702710492, + "learning_rate": 3.1894070236039147e-06, + "loss": 1.2024, + "step": 277 + }, + { + "epoch": 0.004803704727674869, + "grad_norm": 6.769159360887838, + "learning_rate": 3.200921128382268e-06, + "loss": 1.5071, + "step": 278 + }, + { + "epoch": 0.004820984241083771, + "grad_norm": 5.468145699592301, + "learning_rate": 3.2124352331606218e-06, + "loss": 1.1908, + "step": 279 + }, + { + "epoch": 0.004838263754492674, + "grad_norm": 6.347493423146807, + "learning_rate": 3.2239493379389753e-06, + "loss": 1.4827, + "step": 280 + }, + { + "epoch": 0.004855543267901576, + "grad_norm": 5.87454795648699, + "learning_rate": 3.2354634427173293e-06, + "loss": 1.5062, + "step": 281 + }, + { + "epoch": 0.004872822781310478, + "grad_norm": 5.660835306151936, + "learning_rate": 3.246977547495683e-06, + "loss": 0.9036, + "step": 282 + }, + { + "epoch": 0.00489010229471938, + "grad_norm": 5.936507067450664, + "learning_rate": 3.2584916522740363e-06, + "loss": 1.2446, + "step": 283 + }, + { + "epoch": 0.004907381808128283, + "grad_norm": 5.298699549121622, + "learning_rate": 3.2700057570523895e-06, + "loss": 1.589, + "step": 284 + }, + { + "epoch": 0.004924661321537185, + "grad_norm": 5.031360145644731, + "learning_rate": 3.281519861830743e-06, + "loss": 1.4825, + "step": 285 + }, + { + "epoch": 0.004941940834946088, + "grad_norm": 5.794465683746552, + "learning_rate": 3.2930339666090966e-06, + "loss": 1.0825, + "step": 286 + }, + { + "epoch": 0.00495922034835499, + "grad_norm": 6.377765325959388, + "learning_rate": 3.30454807138745e-06, + "loss": 1.1419, + "step": 287 + }, + { + "epoch": 0.0049764998617638926, + "grad_norm": 7.706893981626815, + "learning_rate": 3.3160621761658036e-06, + "loss": 1.3699, + "step": 288 + }, + { + "epoch": 0.004993779375172795, + "grad_norm": 7.2914953771984585, + "learning_rate": 3.3275762809441568e-06, + "loss": 0.9692, + "step": 289 + }, + { + "epoch": 0.0050110588885816975, + "grad_norm": 7.623034599157111, + "learning_rate": 3.3390903857225103e-06, + "loss": 1.5008, + "step": 290 + }, + { + "epoch": 0.0050283384019906, + "grad_norm": 4.441642117561936, + "learning_rate": 3.350604490500864e-06, + "loss": 1.3227, + "step": 291 + }, + { + "epoch": 0.005045617915399502, + "grad_norm": 4.982020019267313, + "learning_rate": 3.3621185952792174e-06, + "loss": 1.1939, + "step": 292 + }, + { + "epoch": 0.005062897428808405, + "grad_norm": 7.222429935074482, + "learning_rate": 3.373632700057571e-06, + "loss": 1.4221, + "step": 293 + }, + { + "epoch": 0.005080176942217307, + "grad_norm": 6.158026502783825, + "learning_rate": 3.3851468048359245e-06, + "loss": 1.319, + "step": 294 + }, + { + "epoch": 0.00509745645562621, + "grad_norm": 5.268534398997149, + "learning_rate": 3.3966609096142776e-06, + "loss": 1.3341, + "step": 295 + }, + { + "epoch": 0.005114735969035112, + "grad_norm": 7.590674896175058, + "learning_rate": 3.408175014392631e-06, + "loss": 1.3631, + "step": 296 + }, + { + "epoch": 0.005132015482444015, + "grad_norm": 6.631474694325136, + "learning_rate": 3.4196891191709847e-06, + "loss": 1.0954, + "step": 297 + }, + { + "epoch": 0.005149294995852917, + "grad_norm": 5.18629391150337, + "learning_rate": 3.4312032239493382e-06, + "loss": 0.9823, + "step": 298 + }, + { + "epoch": 0.0051665745092618195, + "grad_norm": 5.803003720095795, + "learning_rate": 3.4427173287276918e-06, + "loss": 1.7283, + "step": 299 + }, + { + "epoch": 0.005183854022670722, + "grad_norm": 6.610693910245518, + "learning_rate": 3.454231433506045e-06, + "loss": 1.4796, + "step": 300 + }, + { + "epoch": 0.005201133536079624, + "grad_norm": 6.976966967318143, + "learning_rate": 3.4657455382843984e-06, + "loss": 1.4901, + "step": 301 + }, + { + "epoch": 0.005218413049488526, + "grad_norm": 9.686848283706462, + "learning_rate": 3.477259643062752e-06, + "loss": 1.5975, + "step": 302 + }, + { + "epoch": 0.0052356925628974284, + "grad_norm": 4.5880398336732835, + "learning_rate": 3.4887737478411055e-06, + "loss": 1.277, + "step": 303 + }, + { + "epoch": 0.005252972076306331, + "grad_norm": 5.346659807590088, + "learning_rate": 3.500287852619459e-06, + "loss": 1.3918, + "step": 304 + }, + { + "epoch": 0.005270251589715233, + "grad_norm": 7.321550774694597, + "learning_rate": 3.511801957397812e-06, + "loss": 1.2247, + "step": 305 + }, + { + "epoch": 0.005287531103124136, + "grad_norm": 5.114866668528371, + "learning_rate": 3.5233160621761657e-06, + "loss": 1.171, + "step": 306 + }, + { + "epoch": 0.005304810616533038, + "grad_norm": 4.393372308486796, + "learning_rate": 3.5348301669545193e-06, + "loss": 0.9782, + "step": 307 + }, + { + "epoch": 0.005322090129941941, + "grad_norm": 5.16990292658669, + "learning_rate": 3.5463442717328732e-06, + "loss": 1.3326, + "step": 308 + }, + { + "epoch": 0.005339369643350843, + "grad_norm": 6.262639566626162, + "learning_rate": 3.557858376511227e-06, + "loss": 1.147, + "step": 309 + }, + { + "epoch": 0.005356649156759746, + "grad_norm": 4.512400009853076, + "learning_rate": 3.5693724812895803e-06, + "loss": 1.3337, + "step": 310 + }, + { + "epoch": 0.005373928670168648, + "grad_norm": 5.293700514186402, + "learning_rate": 3.580886586067934e-06, + "loss": 1.2452, + "step": 311 + }, + { + "epoch": 0.0053912081835775505, + "grad_norm": 4.877015875364927, + "learning_rate": 3.592400690846287e-06, + "loss": 0.828, + "step": 312 + }, + { + "epoch": 0.005408487696986453, + "grad_norm": 5.122597502510406, + "learning_rate": 3.6039147956246405e-06, + "loss": 1.1606, + "step": 313 + }, + { + "epoch": 0.005425767210395355, + "grad_norm": 4.554490668573595, + "learning_rate": 3.615428900402994e-06, + "loss": 1.4096, + "step": 314 + }, + { + "epoch": 0.005443046723804258, + "grad_norm": 6.045678027660684, + "learning_rate": 3.6269430051813476e-06, + "loss": 1.2147, + "step": 315 + }, + { + "epoch": 0.00546032623721316, + "grad_norm": 5.668122280893595, + "learning_rate": 3.638457109959701e-06, + "loss": 1.0936, + "step": 316 + }, + { + "epoch": 0.005477605750622063, + "grad_norm": 7.009258908337648, + "learning_rate": 3.6499712147380543e-06, + "loss": 1.3829, + "step": 317 + }, + { + "epoch": 0.005494885264030965, + "grad_norm": 5.806878384270046, + "learning_rate": 3.661485319516408e-06, + "loss": 1.5463, + "step": 318 + }, + { + "epoch": 0.005512164777439868, + "grad_norm": 5.36410682046897, + "learning_rate": 3.6729994242947614e-06, + "loss": 1.1323, + "step": 319 + }, + { + "epoch": 0.00552944429084877, + "grad_norm": 5.692513920350694, + "learning_rate": 3.684513529073115e-06, + "loss": 1.3424, + "step": 320 + }, + { + "epoch": 0.005546723804257672, + "grad_norm": 4.7988428371956955, + "learning_rate": 3.6960276338514685e-06, + "loss": 1.2576, + "step": 321 + }, + { + "epoch": 0.005564003317666574, + "grad_norm": 3.9997288687327313, + "learning_rate": 3.707541738629822e-06, + "loss": 1.4315, + "step": 322 + }, + { + "epoch": 0.005581282831075477, + "grad_norm": 5.689708771524223, + "learning_rate": 3.719055843408175e-06, + "loss": 1.2664, + "step": 323 + }, + { + "epoch": 0.005598562344484379, + "grad_norm": 7.994169006503923, + "learning_rate": 3.7305699481865287e-06, + "loss": 1.2676, + "step": 324 + }, + { + "epoch": 0.0056158418578932815, + "grad_norm": 3.481635735920909, + "learning_rate": 3.7420840529648822e-06, + "loss": 1.2455, + "step": 325 + }, + { + "epoch": 0.005633121371302184, + "grad_norm": 4.208213766858177, + "learning_rate": 3.7535981577432358e-06, + "loss": 1.2482, + "step": 326 + }, + { + "epoch": 0.005650400884711086, + "grad_norm": 7.042820217762322, + "learning_rate": 3.7651122625215893e-06, + "loss": 1.3468, + "step": 327 + }, + { + "epoch": 0.005667680398119989, + "grad_norm": 4.145019023197917, + "learning_rate": 3.7766263672999424e-06, + "loss": 1.1699, + "step": 328 + }, + { + "epoch": 0.005684959911528891, + "grad_norm": 3.8565942722937647, + "learning_rate": 3.788140472078296e-06, + "loss": 0.9549, + "step": 329 + }, + { + "epoch": 0.005702239424937794, + "grad_norm": 4.9898681061843275, + "learning_rate": 3.7996545768566495e-06, + "loss": 1.1077, + "step": 330 + }, + { + "epoch": 0.005719518938346696, + "grad_norm": 4.599633445887843, + "learning_rate": 3.811168681635003e-06, + "loss": 0.7906, + "step": 331 + }, + { + "epoch": 0.005736798451755599, + "grad_norm": 4.418418485831657, + "learning_rate": 3.822682786413356e-06, + "loss": 1.1888, + "step": 332 + }, + { + "epoch": 0.005754077965164501, + "grad_norm": 5.577198235713905, + "learning_rate": 3.83419689119171e-06, + "loss": 1.4332, + "step": 333 + }, + { + "epoch": 0.0057713574785734035, + "grad_norm": 6.124209292721913, + "learning_rate": 3.845710995970063e-06, + "loss": 1.3298, + "step": 334 + }, + { + "epoch": 0.005788636991982306, + "grad_norm": 7.0315019552443445, + "learning_rate": 3.857225100748417e-06, + "loss": 1.0696, + "step": 335 + }, + { + "epoch": 0.005805916505391208, + "grad_norm": 5.460375312965243, + "learning_rate": 3.868739205526771e-06, + "loss": 1.1773, + "step": 336 + }, + { + "epoch": 0.005823196018800111, + "grad_norm": 5.043928573634012, + "learning_rate": 3.880253310305124e-06, + "loss": 1.2906, + "step": 337 + }, + { + "epoch": 0.005840475532209013, + "grad_norm": 4.534589107445787, + "learning_rate": 3.8917674150834774e-06, + "loss": 1.4329, + "step": 338 + }, + { + "epoch": 0.005857755045617916, + "grad_norm": 4.653229089484249, + "learning_rate": 3.903281519861831e-06, + "loss": 1.2395, + "step": 339 + }, + { + "epoch": 0.005875034559026818, + "grad_norm": 6.2566400296623685, + "learning_rate": 3.9147956246401845e-06, + "loss": 1.0432, + "step": 340 + }, + { + "epoch": 0.00589231407243572, + "grad_norm": 5.610166657126306, + "learning_rate": 3.9263097294185385e-06, + "loss": 1.1823, + "step": 341 + }, + { + "epoch": 0.005909593585844622, + "grad_norm": 6.627865320075536, + "learning_rate": 3.937823834196892e-06, + "loss": 1.1847, + "step": 342 + }, + { + "epoch": 0.005926873099253525, + "grad_norm": 4.94320242914729, + "learning_rate": 3.949337938975245e-06, + "loss": 1.4573, + "step": 343 + }, + { + "epoch": 0.005944152612662427, + "grad_norm": 6.302752641085503, + "learning_rate": 3.960852043753599e-06, + "loss": 1.3603, + "step": 344 + }, + { + "epoch": 0.00596143212607133, + "grad_norm": 5.270310671009767, + "learning_rate": 3.972366148531952e-06, + "loss": 1.3115, + "step": 345 + }, + { + "epoch": 0.005978711639480232, + "grad_norm": 4.674529018202339, + "learning_rate": 3.983880253310306e-06, + "loss": 1.3233, + "step": 346 + }, + { + "epoch": 0.0059959911528891345, + "grad_norm": 5.1334509237465396, + "learning_rate": 3.995394358088659e-06, + "loss": 1.4105, + "step": 347 + }, + { + "epoch": 0.006013270666298037, + "grad_norm": 5.022545829168764, + "learning_rate": 4.006908462867012e-06, + "loss": 1.2489, + "step": 348 + }, + { + "epoch": 0.006030550179706939, + "grad_norm": 4.771528020913648, + "learning_rate": 4.018422567645366e-06, + "loss": 1.0618, + "step": 349 + }, + { + "epoch": 0.006047829693115842, + "grad_norm": 4.60301541894445, + "learning_rate": 4.029936672423719e-06, + "loss": 1.1671, + "step": 350 + }, + { + "epoch": 0.006065109206524744, + "grad_norm": 3.895589321671572, + "learning_rate": 4.041450777202073e-06, + "loss": 1.3228, + "step": 351 + }, + { + "epoch": 0.006082388719933647, + "grad_norm": 7.038864740116393, + "learning_rate": 4.052964881980426e-06, + "loss": 1.1658, + "step": 352 + }, + { + "epoch": 0.006099668233342549, + "grad_norm": 3.378050836387332, + "learning_rate": 4.064478986758779e-06, + "loss": 1.3488, + "step": 353 + }, + { + "epoch": 0.006116947746751452, + "grad_norm": 6.993785456709086, + "learning_rate": 4.075993091537133e-06, + "loss": 0.9638, + "step": 354 + }, + { + "epoch": 0.006134227260160354, + "grad_norm": 5.607174059997616, + "learning_rate": 4.087507196315486e-06, + "loss": 1.4198, + "step": 355 + }, + { + "epoch": 0.0061515067735692566, + "grad_norm": 4.674561589856844, + "learning_rate": 4.09902130109384e-06, + "loss": 0.9937, + "step": 356 + }, + { + "epoch": 0.006168786286978159, + "grad_norm": 4.774238413510719, + "learning_rate": 4.1105354058721935e-06, + "loss": 0.8407, + "step": 357 + }, + { + "epoch": 0.0061860658003870615, + "grad_norm": 5.316808009615942, + "learning_rate": 4.122049510650547e-06, + "loss": 1.4487, + "step": 358 + }, + { + "epoch": 0.006203345313795964, + "grad_norm": 7.213675007513088, + "learning_rate": 4.133563615428901e-06, + "loss": 1.3098, + "step": 359 + }, + { + "epoch": 0.0062206248272048655, + "grad_norm": 6.107450387513266, + "learning_rate": 4.145077720207254e-06, + "loss": 1.3186, + "step": 360 + }, + { + "epoch": 0.006237904340613768, + "grad_norm": 3.9275870902492085, + "learning_rate": 4.156591824985608e-06, + "loss": 1.187, + "step": 361 + }, + { + "epoch": 0.00625518385402267, + "grad_norm": 5.164038799282922, + "learning_rate": 4.168105929763962e-06, + "loss": 1.3802, + "step": 362 + }, + { + "epoch": 0.006272463367431573, + "grad_norm": 6.088576788716823, + "learning_rate": 4.179620034542315e-06, + "loss": 1.7924, + "step": 363 + }, + { + "epoch": 0.006289742880840475, + "grad_norm": 4.519095878782097, + "learning_rate": 4.191134139320669e-06, + "loss": 1.5187, + "step": 364 + }, + { + "epoch": 0.006307022394249378, + "grad_norm": 3.997295432411449, + "learning_rate": 4.202648244099022e-06, + "loss": 1.1015, + "step": 365 + }, + { + "epoch": 0.00632430190765828, + "grad_norm": 4.274602385735992, + "learning_rate": 4.214162348877375e-06, + "loss": 1.2584, + "step": 366 + }, + { + "epoch": 0.006341581421067183, + "grad_norm": 4.801022913887858, + "learning_rate": 4.225676453655729e-06, + "loss": 1.4063, + "step": 367 + }, + { + "epoch": 0.006358860934476085, + "grad_norm": 4.004054026934469, + "learning_rate": 4.237190558434082e-06, + "loss": 0.8982, + "step": 368 + }, + { + "epoch": 0.0063761404478849875, + "grad_norm": 5.7149068752290395, + "learning_rate": 4.248704663212436e-06, + "loss": 0.9787, + "step": 369 + }, + { + "epoch": 0.00639341996129389, + "grad_norm": 5.243442682715752, + "learning_rate": 4.260218767990789e-06, + "loss": 0.796, + "step": 370 + }, + { + "epoch": 0.0064106994747027924, + "grad_norm": 3.9171914859327925, + "learning_rate": 4.271732872769142e-06, + "loss": 1.0531, + "step": 371 + }, + { + "epoch": 0.006427978988111695, + "grad_norm": 3.6981623871428897, + "learning_rate": 4.283246977547496e-06, + "loss": 0.9118, + "step": 372 + }, + { + "epoch": 0.006445258501520597, + "grad_norm": 4.140914695117898, + "learning_rate": 4.294761082325849e-06, + "loss": 1.3524, + "step": 373 + }, + { + "epoch": 0.0064625380149295, + "grad_norm": 4.75245289574328, + "learning_rate": 4.306275187104203e-06, + "loss": 0.9469, + "step": 374 + }, + { + "epoch": 0.006479817528338402, + "grad_norm": 6.0116307018895725, + "learning_rate": 4.3177892918825564e-06, + "loss": 1.2222, + "step": 375 + }, + { + "epoch": 0.006497097041747305, + "grad_norm": 7.027727223925527, + "learning_rate": 4.3293033966609096e-06, + "loss": 1.2143, + "step": 376 + }, + { + "epoch": 0.006514376555156207, + "grad_norm": 6.181373036869655, + "learning_rate": 4.3408175014392635e-06, + "loss": 1.0526, + "step": 377 + }, + { + "epoch": 0.00653165606856511, + "grad_norm": 5.268140030850982, + "learning_rate": 4.352331606217617e-06, + "loss": 1.0021, + "step": 378 + }, + { + "epoch": 0.006548935581974012, + "grad_norm": 4.805790723677804, + "learning_rate": 4.363845710995971e-06, + "loss": 1.5399, + "step": 379 + }, + { + "epoch": 0.006566215095382914, + "grad_norm": 3.4722187773071, + "learning_rate": 4.375359815774324e-06, + "loss": 1.1226, + "step": 380 + }, + { + "epoch": 0.006583494608791816, + "grad_norm": 3.152589223366099, + "learning_rate": 4.386873920552677e-06, + "loss": 0.9472, + "step": 381 + }, + { + "epoch": 0.0066007741222007185, + "grad_norm": 5.099920664230532, + "learning_rate": 4.398388025331031e-06, + "loss": 0.9535, + "step": 382 + }, + { + "epoch": 0.006618053635609621, + "grad_norm": 4.916597288154108, + "learning_rate": 4.409902130109384e-06, + "loss": 0.9298, + "step": 383 + }, + { + "epoch": 0.006635333149018523, + "grad_norm": 4.393214628394579, + "learning_rate": 4.421416234887738e-06, + "loss": 1.1067, + "step": 384 + }, + { + "epoch": 0.006652612662427426, + "grad_norm": 5.605821896281356, + "learning_rate": 4.432930339666091e-06, + "loss": 1.295, + "step": 385 + }, + { + "epoch": 0.006669892175836328, + "grad_norm": 4.102450940152994, + "learning_rate": 4.444444444444444e-06, + "loss": 1.1767, + "step": 386 + }, + { + "epoch": 0.006687171689245231, + "grad_norm": 4.450490295791315, + "learning_rate": 4.455958549222798e-06, + "loss": 1.0406, + "step": 387 + }, + { + "epoch": 0.006704451202654133, + "grad_norm": 4.934803519951293, + "learning_rate": 4.467472654001152e-06, + "loss": 1.3168, + "step": 388 + }, + { + "epoch": 0.006721730716063036, + "grad_norm": 4.316893858025246, + "learning_rate": 4.478986758779505e-06, + "loss": 1.143, + "step": 389 + }, + { + "epoch": 0.006739010229471938, + "grad_norm": 4.149859216060526, + "learning_rate": 4.490500863557859e-06, + "loss": 1.0671, + "step": 390 + }, + { + "epoch": 0.006756289742880841, + "grad_norm": 4.05254979181842, + "learning_rate": 4.502014968336212e-06, + "loss": 1.2399, + "step": 391 + }, + { + "epoch": 0.006773569256289743, + "grad_norm": 4.255705497399932, + "learning_rate": 4.513529073114566e-06, + "loss": 1.173, + "step": 392 + }, + { + "epoch": 0.0067908487696986455, + "grad_norm": 4.968324408694341, + "learning_rate": 4.525043177892919e-06, + "loss": 0.9673, + "step": 393 + }, + { + "epoch": 0.006808128283107548, + "grad_norm": 4.031182687309109, + "learning_rate": 4.5365572826712725e-06, + "loss": 1.1953, + "step": 394 + }, + { + "epoch": 0.00682540779651645, + "grad_norm": 4.0618736526716175, + "learning_rate": 4.5480713874496265e-06, + "loss": 1.1391, + "step": 395 + }, + { + "epoch": 0.006842687309925353, + "grad_norm": 4.935241451273063, + "learning_rate": 4.55958549222798e-06, + "loss": 1.2107, + "step": 396 + }, + { + "epoch": 0.006859966823334255, + "grad_norm": 4.866769946374426, + "learning_rate": 4.5710995970063336e-06, + "loss": 1.2432, + "step": 397 + }, + { + "epoch": 0.006877246336743158, + "grad_norm": 6.2655076077954135, + "learning_rate": 4.582613701784687e-06, + "loss": 1.0862, + "step": 398 + }, + { + "epoch": 0.006894525850152059, + "grad_norm": 4.902510145569257, + "learning_rate": 4.59412780656304e-06, + "loss": 1.1765, + "step": 399 + }, + { + "epoch": 0.006911805363560962, + "grad_norm": 4.060767563210245, + "learning_rate": 4.605641911341394e-06, + "loss": 1.1429, + "step": 400 + }, + { + "epoch": 0.006929084876969864, + "grad_norm": 3.5603102443823125, + "learning_rate": 4.617156016119747e-06, + "loss": 1.2102, + "step": 401 + }, + { + "epoch": 0.006946364390378767, + "grad_norm": 3.683019757485576, + "learning_rate": 4.628670120898101e-06, + "loss": 0.836, + "step": 402 + }, + { + "epoch": 0.006963643903787669, + "grad_norm": 4.134769095400446, + "learning_rate": 4.640184225676454e-06, + "loss": 0.9433, + "step": 403 + }, + { + "epoch": 0.0069809234171965715, + "grad_norm": 4.260552902762231, + "learning_rate": 4.651698330454807e-06, + "loss": 1.0672, + "step": 404 + }, + { + "epoch": 0.006998202930605474, + "grad_norm": 4.944426453180656, + "learning_rate": 4.663212435233161e-06, + "loss": 1.2071, + "step": 405 + }, + { + "epoch": 0.0070154824440143764, + "grad_norm": 3.808066979929848, + "learning_rate": 4.674726540011514e-06, + "loss": 1.2577, + "step": 406 + }, + { + "epoch": 0.007032761957423279, + "grad_norm": 4.304664694910666, + "learning_rate": 4.686240644789868e-06, + "loss": 0.8939, + "step": 407 + }, + { + "epoch": 0.007050041470832181, + "grad_norm": 5.331601302482366, + "learning_rate": 4.697754749568221e-06, + "loss": 1.0747, + "step": 408 + }, + { + "epoch": 0.007067320984241084, + "grad_norm": 8.264924175894334, + "learning_rate": 4.709268854346574e-06, + "loss": 1.2472, + "step": 409 + }, + { + "epoch": 0.007084600497649986, + "grad_norm": 4.367841577471092, + "learning_rate": 4.720782959124928e-06, + "loss": 1.296, + "step": 410 + }, + { + "epoch": 0.007101880011058889, + "grad_norm": 4.740372048781003, + "learning_rate": 4.7322970639032815e-06, + "loss": 1.1819, + "step": 411 + }, + { + "epoch": 0.007119159524467791, + "grad_norm": 5.132662759192409, + "learning_rate": 4.7438111686816354e-06, + "loss": 1.3296, + "step": 412 + }, + { + "epoch": 0.007136439037876694, + "grad_norm": 4.225960930049706, + "learning_rate": 4.7553252734599886e-06, + "loss": 1.3863, + "step": 413 + }, + { + "epoch": 0.007153718551285596, + "grad_norm": 4.373457355173031, + "learning_rate": 4.766839378238342e-06, + "loss": 1.1837, + "step": 414 + }, + { + "epoch": 0.0071709980646944985, + "grad_norm": 4.538949229506553, + "learning_rate": 4.7783534830166965e-06, + "loss": 1.0407, + "step": 415 + }, + { + "epoch": 0.007188277578103401, + "grad_norm": 6.521244370819144, + "learning_rate": 4.78986758779505e-06, + "loss": 1.1611, + "step": 416 + }, + { + "epoch": 0.007205557091512303, + "grad_norm": 5.725395120719962, + "learning_rate": 4.801381692573403e-06, + "loss": 0.6667, + "step": 417 + }, + { + "epoch": 0.007222836604921206, + "grad_norm": 5.851376677962053, + "learning_rate": 4.812895797351757e-06, + "loss": 1.3525, + "step": 418 + }, + { + "epoch": 0.007240116118330107, + "grad_norm": 4.63952941656844, + "learning_rate": 4.82440990213011e-06, + "loss": 0.8738, + "step": 419 + }, + { + "epoch": 0.00725739563173901, + "grad_norm": 3.9404360867517236, + "learning_rate": 4.835924006908464e-06, + "loss": 1.0326, + "step": 420 + }, + { + "epoch": 0.007274675145147912, + "grad_norm": 4.943848544985471, + "learning_rate": 4.847438111686817e-06, + "loss": 1.4205, + "step": 421 + }, + { + "epoch": 0.007291954658556815, + "grad_norm": 3.6214286489098004, + "learning_rate": 4.85895221646517e-06, + "loss": 1.1843, + "step": 422 + }, + { + "epoch": 0.007309234171965717, + "grad_norm": 3.539792983033384, + "learning_rate": 4.870466321243524e-06, + "loss": 0.6828, + "step": 423 + }, + { + "epoch": 0.00732651368537462, + "grad_norm": 4.394906968223961, + "learning_rate": 4.881980426021877e-06, + "loss": 1.1391, + "step": 424 + }, + { + "epoch": 0.007343793198783522, + "grad_norm": 5.3900037826806795, + "learning_rate": 4.893494530800231e-06, + "loss": 1.2384, + "step": 425 + }, + { + "epoch": 0.007361072712192425, + "grad_norm": 5.408272672860037, + "learning_rate": 4.905008635578584e-06, + "loss": 1.1537, + "step": 426 + }, + { + "epoch": 0.007378352225601327, + "grad_norm": 4.286069027614285, + "learning_rate": 4.916522740356937e-06, + "loss": 1.4075, + "step": 427 + }, + { + "epoch": 0.0073956317390102295, + "grad_norm": 5.4090781466592865, + "learning_rate": 4.928036845135291e-06, + "loss": 1.0162, + "step": 428 + }, + { + "epoch": 0.007412911252419132, + "grad_norm": 4.379789450234063, + "learning_rate": 4.939550949913644e-06, + "loss": 0.9834, + "step": 429 + }, + { + "epoch": 0.007430190765828034, + "grad_norm": 4.30396249105047, + "learning_rate": 4.951065054691998e-06, + "loss": 1.1466, + "step": 430 + }, + { + "epoch": 0.007447470279236937, + "grad_norm": 4.416152530177979, + "learning_rate": 4.9625791594703515e-06, + "loss": 1.1712, + "step": 431 + }, + { + "epoch": 0.007464749792645839, + "grad_norm": 4.2703182612673425, + "learning_rate": 4.974093264248705e-06, + "loss": 1.1128, + "step": 432 + }, + { + "epoch": 0.007482029306054742, + "grad_norm": 3.7563140280282186, + "learning_rate": 4.985607369027059e-06, + "loss": 1.3184, + "step": 433 + }, + { + "epoch": 0.007499308819463644, + "grad_norm": 5.341943664604804, + "learning_rate": 4.997121473805412e-06, + "loss": 1.0085, + "step": 434 + }, + { + "epoch": 0.007516588332872547, + "grad_norm": 4.800447514179715, + "learning_rate": 5.008635578583766e-06, + "loss": 1.0882, + "step": 435 + }, + { + "epoch": 0.007533867846281449, + "grad_norm": 5.341857707405692, + "learning_rate": 5.020149683362119e-06, + "loss": 1.1876, + "step": 436 + }, + { + "epoch": 0.0075511473596903515, + "grad_norm": 3.706891049055153, + "learning_rate": 5.031663788140473e-06, + "loss": 1.2743, + "step": 437 + }, + { + "epoch": 0.007568426873099253, + "grad_norm": 5.088398761878722, + "learning_rate": 5.043177892918826e-06, + "loss": 1.0002, + "step": 438 + }, + { + "epoch": 0.0075857063865081556, + "grad_norm": 5.511692037162851, + "learning_rate": 5.05469199769718e-06, + "loss": 1.2258, + "step": 439 + }, + { + "epoch": 0.007602985899917058, + "grad_norm": 4.863572867818976, + "learning_rate": 5.066206102475533e-06, + "loss": 1.0617, + "step": 440 + }, + { + "epoch": 0.0076202654133259605, + "grad_norm": 4.857686860585458, + "learning_rate": 5.077720207253887e-06, + "loss": 1.2165, + "step": 441 + }, + { + "epoch": 0.007637544926734863, + "grad_norm": 3.8681222112181595, + "learning_rate": 5.089234312032239e-06, + "loss": 1.3982, + "step": 442 + }, + { + "epoch": 0.007654824440143765, + "grad_norm": 3.5273176944155837, + "learning_rate": 5.100748416810594e-06, + "loss": 1.0247, + "step": 443 + }, + { + "epoch": 0.007672103953552668, + "grad_norm": 3.757115237380953, + "learning_rate": 5.112262521588946e-06, + "loss": 1.1604, + "step": 444 + }, + { + "epoch": 0.00768938346696157, + "grad_norm": 4.314762195442869, + "learning_rate": 5.1237766263673e-06, + "loss": 0.8653, + "step": 445 + }, + { + "epoch": 0.007706662980370473, + "grad_norm": 4.240922890752097, + "learning_rate": 5.135290731145653e-06, + "loss": 1.0419, + "step": 446 + }, + { + "epoch": 0.007723942493779375, + "grad_norm": 4.5387626187429735, + "learning_rate": 5.146804835924007e-06, + "loss": 1.0824, + "step": 447 + }, + { + "epoch": 0.007741222007188278, + "grad_norm": 4.723060860244788, + "learning_rate": 5.1583189407023605e-06, + "loss": 1.2121, + "step": 448 + }, + { + "epoch": 0.00775850152059718, + "grad_norm": 4.3855770435762915, + "learning_rate": 5.1698330454807144e-06, + "loss": 0.8795, + "step": 449 + }, + { + "epoch": 0.0077757810340060825, + "grad_norm": 3.9655866691084665, + "learning_rate": 5.1813471502590676e-06, + "loss": 1.067, + "step": 450 + }, + { + "epoch": 0.007793060547414985, + "grad_norm": 4.357524276420321, + "learning_rate": 5.1928612550374215e-06, + "loss": 1.2346, + "step": 451 + }, + { + "epoch": 0.007810340060823887, + "grad_norm": 4.305060747784897, + "learning_rate": 5.204375359815775e-06, + "loss": 0.8378, + "step": 452 + }, + { + "epoch": 0.007827619574232789, + "grad_norm": 5.297734036072547, + "learning_rate": 5.215889464594129e-06, + "loss": 1.2092, + "step": 453 + }, + { + "epoch": 0.007844899087641691, + "grad_norm": 4.144120351396724, + "learning_rate": 5.227403569372481e-06, + "loss": 1.2143, + "step": 454 + }, + { + "epoch": 0.007862178601050594, + "grad_norm": 2.7631825173294158, + "learning_rate": 5.238917674150835e-06, + "loss": 1.1043, + "step": 455 + }, + { + "epoch": 0.007879458114459496, + "grad_norm": 4.857084172899143, + "learning_rate": 5.250431778929189e-06, + "loss": 1.1434, + "step": 456 + }, + { + "epoch": 0.007896737627868399, + "grad_norm": 5.070805568457419, + "learning_rate": 5.261945883707542e-06, + "loss": 1.1244, + "step": 457 + }, + { + "epoch": 0.007914017141277301, + "grad_norm": 3.411549943332689, + "learning_rate": 5.273459988485896e-06, + "loss": 1.3057, + "step": 458 + }, + { + "epoch": 0.007931296654686204, + "grad_norm": 3.6000698247976683, + "learning_rate": 5.284974093264249e-06, + "loss": 0.8573, + "step": 459 + }, + { + "epoch": 0.007948576168095106, + "grad_norm": 4.149180475912368, + "learning_rate": 5.296488198042603e-06, + "loss": 1.123, + "step": 460 + }, + { + "epoch": 0.007965855681504009, + "grad_norm": 4.33455723791932, + "learning_rate": 5.308002302820956e-06, + "loss": 1.0444, + "step": 461 + }, + { + "epoch": 0.007983135194912911, + "grad_norm": 4.208223360308723, + "learning_rate": 5.31951640759931e-06, + "loss": 0.9566, + "step": 462 + }, + { + "epoch": 0.008000414708321813, + "grad_norm": 3.0480292696893527, + "learning_rate": 5.331030512377663e-06, + "loss": 0.933, + "step": 463 + }, + { + "epoch": 0.008017694221730716, + "grad_norm": 4.443049277601559, + "learning_rate": 5.342544617156017e-06, + "loss": 1.1944, + "step": 464 + }, + { + "epoch": 0.008034973735139618, + "grad_norm": 4.316078156428469, + "learning_rate": 5.3540587219343694e-06, + "loss": 0.9874, + "step": 465 + }, + { + "epoch": 0.00805225324854852, + "grad_norm": 4.701156006973674, + "learning_rate": 5.365572826712724e-06, + "loss": 1.0963, + "step": 466 + }, + { + "epoch": 0.008069532761957423, + "grad_norm": 4.1786691528646625, + "learning_rate": 5.3770869314910765e-06, + "loss": 0.7407, + "step": 467 + }, + { + "epoch": 0.008086812275366326, + "grad_norm": 3.0114578982768685, + "learning_rate": 5.3886010362694305e-06, + "loss": 1.0488, + "step": 468 + }, + { + "epoch": 0.008104091788775228, + "grad_norm": 4.5358551693427245, + "learning_rate": 5.400115141047784e-06, + "loss": 1.1474, + "step": 469 + }, + { + "epoch": 0.00812137130218413, + "grad_norm": 3.175999531633239, + "learning_rate": 5.411629245826138e-06, + "loss": 1.2384, + "step": 470 + }, + { + "epoch": 0.008138650815593033, + "grad_norm": 4.3201318980479755, + "learning_rate": 5.423143350604491e-06, + "loss": 1.2048, + "step": 471 + }, + { + "epoch": 0.008155930329001936, + "grad_norm": 3.4965124878107643, + "learning_rate": 5.434657455382845e-06, + "loss": 0.8041, + "step": 472 + }, + { + "epoch": 0.008173209842410838, + "grad_norm": 4.111523508589304, + "learning_rate": 5.446171560161198e-06, + "loss": 0.9652, + "step": 473 + }, + { + "epoch": 0.00819048935581974, + "grad_norm": 4.60838820801793, + "learning_rate": 5.457685664939552e-06, + "loss": 1.1982, + "step": 474 + }, + { + "epoch": 0.008207768869228643, + "grad_norm": 4.0894916579115055, + "learning_rate": 5.469199769717904e-06, + "loss": 1.0745, + "step": 475 + }, + { + "epoch": 0.008225048382637545, + "grad_norm": 3.229326958172591, + "learning_rate": 5.480713874496259e-06, + "loss": 1.1947, + "step": 476 + }, + { + "epoch": 0.008242327896046448, + "grad_norm": 4.5629523423113865, + "learning_rate": 5.492227979274611e-06, + "loss": 1.0189, + "step": 477 + }, + { + "epoch": 0.00825960740945535, + "grad_norm": 4.1121093953357875, + "learning_rate": 5.503742084052965e-06, + "loss": 1.2387, + "step": 478 + }, + { + "epoch": 0.008276886922864253, + "grad_norm": 5.001700433380007, + "learning_rate": 5.515256188831318e-06, + "loss": 0.9877, + "step": 479 + }, + { + "epoch": 0.008294166436273155, + "grad_norm": 3.238615887057778, + "learning_rate": 5.526770293609672e-06, + "loss": 1.3591, + "step": 480 + }, + { + "epoch": 0.008311445949682058, + "grad_norm": 4.03890903679064, + "learning_rate": 5.538284398388025e-06, + "loss": 0.8311, + "step": 481 + }, + { + "epoch": 0.00832872546309096, + "grad_norm": 3.7453525043292095, + "learning_rate": 5.549798503166379e-06, + "loss": 1.0657, + "step": 482 + }, + { + "epoch": 0.008346004976499862, + "grad_norm": 4.008978683644834, + "learning_rate": 5.561312607944733e-06, + "loss": 1.1406, + "step": 483 + }, + { + "epoch": 0.008363284489908765, + "grad_norm": 4.290267497095259, + "learning_rate": 5.572826712723086e-06, + "loss": 1.0981, + "step": 484 + }, + { + "epoch": 0.008380564003317667, + "grad_norm": 3.309263263638847, + "learning_rate": 5.58434081750144e-06, + "loss": 1.0159, + "step": 485 + }, + { + "epoch": 0.00839784351672657, + "grad_norm": 2.770398534413802, + "learning_rate": 5.5958549222797934e-06, + "loss": 1.0491, + "step": 486 + }, + { + "epoch": 0.00841512303013547, + "grad_norm": 4.010910409872065, + "learning_rate": 5.607369027058147e-06, + "loss": 1.4311, + "step": 487 + }, + { + "epoch": 0.008432402543544373, + "grad_norm": 4.071728266006526, + "learning_rate": 5.6188831318365e-06, + "loss": 1.185, + "step": 488 + }, + { + "epoch": 0.008449682056953275, + "grad_norm": 3.9740391364327983, + "learning_rate": 5.630397236614854e-06, + "loss": 1.1566, + "step": 489 + }, + { + "epoch": 0.008466961570362178, + "grad_norm": 5.751127474601595, + "learning_rate": 5.641911341393207e-06, + "loss": 1.4553, + "step": 490 + }, + { + "epoch": 0.00848424108377108, + "grad_norm": 3.6987196094926014, + "learning_rate": 5.653425446171561e-06, + "loss": 0.7931, + "step": 491 + }, + { + "epoch": 0.008501520597179983, + "grad_norm": 3.1657922244583747, + "learning_rate": 5.664939550949914e-06, + "loss": 0.9722, + "step": 492 + }, + { + "epoch": 0.008518800110588885, + "grad_norm": 4.875812959668639, + "learning_rate": 5.676453655728268e-06, + "loss": 1.2283, + "step": 493 + }, + { + "epoch": 0.008536079623997788, + "grad_norm": 4.696337182366524, + "learning_rate": 5.687967760506621e-06, + "loss": 0.7864, + "step": 494 + }, + { + "epoch": 0.00855335913740669, + "grad_norm": 3.9658325517125, + "learning_rate": 5.699481865284975e-06, + "loss": 0.885, + "step": 495 + }, + { + "epoch": 0.008570638650815593, + "grad_norm": 3.1200395601496163, + "learning_rate": 5.710995970063328e-06, + "loss": 0.9028, + "step": 496 + }, + { + "epoch": 0.008587918164224495, + "grad_norm": 4.166430299375139, + "learning_rate": 5.722510074841682e-06, + "loss": 0.9784, + "step": 497 + }, + { + "epoch": 0.008605197677633398, + "grad_norm": 4.02656377803456, + "learning_rate": 5.734024179620034e-06, + "loss": 1.0745, + "step": 498 + }, + { + "epoch": 0.0086224771910423, + "grad_norm": 4.571071285332866, + "learning_rate": 5.745538284398389e-06, + "loss": 1.0868, + "step": 499 + }, + { + "epoch": 0.008639756704451202, + "grad_norm": 4.741118362745268, + "learning_rate": 5.757052389176741e-06, + "loss": 1.3171, + "step": 500 + }, + { + "epoch": 0.008657036217860105, + "grad_norm": 5.170331235095122, + "learning_rate": 5.768566493955095e-06, + "loss": 1.0957, + "step": 501 + }, + { + "epoch": 0.008674315731269007, + "grad_norm": 3.85957490659775, + "learning_rate": 5.7800805987334484e-06, + "loss": 1.1396, + "step": 502 + }, + { + "epoch": 0.00869159524467791, + "grad_norm": 4.479504906318992, + "learning_rate": 5.791594703511802e-06, + "loss": 1.0329, + "step": 503 + }, + { + "epoch": 0.008708874758086812, + "grad_norm": 3.586744163660461, + "learning_rate": 5.8031088082901555e-06, + "loss": 0.8596, + "step": 504 + }, + { + "epoch": 0.008726154271495715, + "grad_norm": 4.313620696383691, + "learning_rate": 5.8146229130685095e-06, + "loss": 1.133, + "step": 505 + }, + { + "epoch": 0.008743433784904617, + "grad_norm": 4.294595234017294, + "learning_rate": 5.826137017846863e-06, + "loss": 0.9154, + "step": 506 + }, + { + "epoch": 0.00876071329831352, + "grad_norm": 4.782997480297537, + "learning_rate": 5.837651122625217e-06, + "loss": 1.1325, + "step": 507 + }, + { + "epoch": 0.008777992811722422, + "grad_norm": 4.6541681866400335, + "learning_rate": 5.8491652274035706e-06, + "loss": 1.1127, + "step": 508 + }, + { + "epoch": 0.008795272325131324, + "grad_norm": 3.448110256904519, + "learning_rate": 5.860679332181924e-06, + "loss": 0.9555, + "step": 509 + }, + { + "epoch": 0.008812551838540227, + "grad_norm": 3.670004406218873, + "learning_rate": 5.872193436960278e-06, + "loss": 0.93, + "step": 510 + }, + { + "epoch": 0.00882983135194913, + "grad_norm": 4.273402850760513, + "learning_rate": 5.88370754173863e-06, + "loss": 1.0805, + "step": 511 + }, + { + "epoch": 0.008847110865358032, + "grad_norm": 3.0477048843816688, + "learning_rate": 5.895221646516984e-06, + "loss": 1.0002, + "step": 512 + }, + { + "epoch": 0.008864390378766934, + "grad_norm": 4.168317316060988, + "learning_rate": 5.906735751295337e-06, + "loss": 0.9975, + "step": 513 + }, + { + "epoch": 0.008881669892175837, + "grad_norm": 3.384419799593198, + "learning_rate": 5.918249856073691e-06, + "loss": 0.5652, + "step": 514 + }, + { + "epoch": 0.00889894940558474, + "grad_norm": 4.563633133631313, + "learning_rate": 5.929763960852044e-06, + "loss": 1.0332, + "step": 515 + }, + { + "epoch": 0.008916228918993642, + "grad_norm": 3.8371933706081274, + "learning_rate": 5.941278065630398e-06, + "loss": 0.9104, + "step": 516 + }, + { + "epoch": 0.008933508432402544, + "grad_norm": 3.796722939443128, + "learning_rate": 5.952792170408751e-06, + "loss": 1.0257, + "step": 517 + }, + { + "epoch": 0.008950787945811447, + "grad_norm": 3.8687451939830377, + "learning_rate": 5.964306275187105e-06, + "loss": 0.9128, + "step": 518 + }, + { + "epoch": 0.008968067459220349, + "grad_norm": 3.393404533677279, + "learning_rate": 5.975820379965458e-06, + "loss": 1.1634, + "step": 519 + }, + { + "epoch": 0.008985346972629251, + "grad_norm": 3.112510706236241, + "learning_rate": 5.987334484743812e-06, + "loss": 0.7817, + "step": 520 + }, + { + "epoch": 0.009002626486038154, + "grad_norm": 5.4879620606794886, + "learning_rate": 5.9988485895221645e-06, + "loss": 1.0012, + "step": 521 + }, + { + "epoch": 0.009019905999447056, + "grad_norm": 3.6864905215166286, + "learning_rate": 6.0103626943005185e-06, + "loss": 1.0005, + "step": 522 + }, + { + "epoch": 0.009037185512855959, + "grad_norm": 3.8994123783553167, + "learning_rate": 6.021876799078872e-06, + "loss": 0.7183, + "step": 523 + }, + { + "epoch": 0.009054465026264861, + "grad_norm": 2.8880565719605795, + "learning_rate": 6.0333909038572256e-06, + "loss": 0.8148, + "step": 524 + }, + { + "epoch": 0.009071744539673764, + "grad_norm": 4.40984165823965, + "learning_rate": 6.044905008635579e-06, + "loss": 1.2411, + "step": 525 + }, + { + "epoch": 0.009089024053082664, + "grad_norm": 3.2803571002095286, + "learning_rate": 6.056419113413933e-06, + "loss": 0.7644, + "step": 526 + }, + { + "epoch": 0.009106303566491567, + "grad_norm": 5.485716516499636, + "learning_rate": 6.067933218192286e-06, + "loss": 1.0493, + "step": 527 + }, + { + "epoch": 0.00912358307990047, + "grad_norm": 3.357936052110594, + "learning_rate": 6.07944732297064e-06, + "loss": 1.1995, + "step": 528 + }, + { + "epoch": 0.009140862593309372, + "grad_norm": 3.7531599794059822, + "learning_rate": 6.090961427748993e-06, + "loss": 0.9964, + "step": 529 + }, + { + "epoch": 0.009158142106718274, + "grad_norm": 5.263187796129138, + "learning_rate": 6.102475532527347e-06, + "loss": 0.6995, + "step": 530 + }, + { + "epoch": 0.009175421620127177, + "grad_norm": 3.4925181037678352, + "learning_rate": 6.113989637305699e-06, + "loss": 1.0223, + "step": 531 + }, + { + "epoch": 0.009192701133536079, + "grad_norm": 2.9242066725472307, + "learning_rate": 6.125503742084054e-06, + "loss": 0.9555, + "step": 532 + }, + { + "epoch": 0.009209980646944982, + "grad_norm": 3.686431675362739, + "learning_rate": 6.137017846862406e-06, + "loss": 1.0534, + "step": 533 + }, + { + "epoch": 0.009227260160353884, + "grad_norm": 2.860110454912817, + "learning_rate": 6.14853195164076e-06, + "loss": 0.7146, + "step": 534 + }, + { + "epoch": 0.009244539673762786, + "grad_norm": 3.609384913971872, + "learning_rate": 6.160046056419114e-06, + "loss": 1.1629, + "step": 535 + }, + { + "epoch": 0.009261819187171689, + "grad_norm": 3.1226293795570608, + "learning_rate": 6.171560161197467e-06, + "loss": 0.9672, + "step": 536 + }, + { + "epoch": 0.009279098700580591, + "grad_norm": 4.237839500814088, + "learning_rate": 6.183074265975821e-06, + "loss": 1.2686, + "step": 537 + }, + { + "epoch": 0.009296378213989494, + "grad_norm": 4.024074194394633, + "learning_rate": 6.194588370754174e-06, + "loss": 0.7655, + "step": 538 + }, + { + "epoch": 0.009313657727398396, + "grad_norm": 4.978256738606508, + "learning_rate": 6.206102475532528e-06, + "loss": 1.3058, + "step": 539 + }, + { + "epoch": 0.009330937240807299, + "grad_norm": 4.038537872661016, + "learning_rate": 6.217616580310881e-06, + "loss": 1.0532, + "step": 540 + }, + { + "epoch": 0.009348216754216201, + "grad_norm": 4.691865990855322, + "learning_rate": 6.229130685089235e-06, + "loss": 0.9628, + "step": 541 + }, + { + "epoch": 0.009365496267625104, + "grad_norm": 6.025433598226425, + "learning_rate": 6.2406447898675885e-06, + "loss": 1.2762, + "step": 542 + }, + { + "epoch": 0.009382775781034006, + "grad_norm": 3.3664373097696827, + "learning_rate": 6.2521588946459425e-06, + "loss": 0.7194, + "step": 543 + }, + { + "epoch": 0.009400055294442908, + "grad_norm": 3.59069136631652, + "learning_rate": 6.263672999424295e-06, + "loss": 1.0156, + "step": 544 + }, + { + "epoch": 0.009417334807851811, + "grad_norm": 4.098733551129255, + "learning_rate": 6.275187104202649e-06, + "loss": 0.996, + "step": 545 + }, + { + "epoch": 0.009434614321260713, + "grad_norm": 3.613024459709552, + "learning_rate": 6.286701208981002e-06, + "loss": 1.3059, + "step": 546 + }, + { + "epoch": 0.009451893834669616, + "grad_norm": 3.1017970745902352, + "learning_rate": 6.298215313759356e-06, + "loss": 0.8596, + "step": 547 + }, + { + "epoch": 0.009469173348078518, + "grad_norm": 3.5148220380409727, + "learning_rate": 6.309729418537709e-06, + "loss": 0.8853, + "step": 548 + }, + { + "epoch": 0.00948645286148742, + "grad_norm": 3.950415174651013, + "learning_rate": 6.321243523316063e-06, + "loss": 1.0327, + "step": 549 + }, + { + "epoch": 0.009503732374896323, + "grad_norm": 4.142100675933463, + "learning_rate": 6.332757628094416e-06, + "loss": 0.9499, + "step": 550 + }, + { + "epoch": 0.009521011888305226, + "grad_norm": 3.1436021809040704, + "learning_rate": 6.34427173287277e-06, + "loss": 0.9719, + "step": 551 + }, + { + "epoch": 0.009538291401714128, + "grad_norm": 4.093155773338306, + "learning_rate": 6.355785837651123e-06, + "loss": 1.2416, + "step": 552 + }, + { + "epoch": 0.00955557091512303, + "grad_norm": 5.06028271401191, + "learning_rate": 6.367299942429477e-06, + "loss": 1.3252, + "step": 553 + }, + { + "epoch": 0.009572850428531933, + "grad_norm": 3.2313198517282524, + "learning_rate": 6.378814047207829e-06, + "loss": 0.9245, + "step": 554 + }, + { + "epoch": 0.009590129941940835, + "grad_norm": 3.851150003773031, + "learning_rate": 6.390328151986184e-06, + "loss": 0.9643, + "step": 555 + }, + { + "epoch": 0.009607409455349738, + "grad_norm": 2.9073870464109564, + "learning_rate": 6.401842256764536e-06, + "loss": 0.9013, + "step": 556 + }, + { + "epoch": 0.00962468896875864, + "grad_norm": 4.182054041173847, + "learning_rate": 6.41335636154289e-06, + "loss": 1.0983, + "step": 557 + }, + { + "epoch": 0.009641968482167543, + "grad_norm": 4.607944930298026, + "learning_rate": 6.4248704663212435e-06, + "loss": 1.0627, + "step": 558 + }, + { + "epoch": 0.009659247995576445, + "grad_norm": 2.84166199254402, + "learning_rate": 6.4363845710995975e-06, + "loss": 1.2103, + "step": 559 + }, + { + "epoch": 0.009676527508985348, + "grad_norm": 3.1917820173189106, + "learning_rate": 6.447898675877951e-06, + "loss": 0.9611, + "step": 560 + }, + { + "epoch": 0.00969380702239425, + "grad_norm": 3.3840624936713093, + "learning_rate": 6.4594127806563046e-06, + "loss": 1.3785, + "step": 561 + }, + { + "epoch": 0.009711086535803153, + "grad_norm": 5.298214268258415, + "learning_rate": 6.4709268854346585e-06, + "loss": 1.2762, + "step": 562 + }, + { + "epoch": 0.009728366049212055, + "grad_norm": 4.184930168742646, + "learning_rate": 6.482440990213012e-06, + "loss": 1.1347, + "step": 563 + }, + { + "epoch": 0.009745645562620956, + "grad_norm": 3.659581724862379, + "learning_rate": 6.493955094991366e-06, + "loss": 0.8711, + "step": 564 + }, + { + "epoch": 0.009762925076029858, + "grad_norm": 2.8510976915790187, + "learning_rate": 6.505469199769719e-06, + "loss": 1.1073, + "step": 565 + }, + { + "epoch": 0.00978020458943876, + "grad_norm": 3.542043855424844, + "learning_rate": 6.516983304548073e-06, + "loss": 1.0115, + "step": 566 + }, + { + "epoch": 0.009797484102847663, + "grad_norm": 3.234262584064869, + "learning_rate": 6.528497409326425e-06, + "loss": 1.0376, + "step": 567 + }, + { + "epoch": 0.009814763616256566, + "grad_norm": 2.968189839662753, + "learning_rate": 6.540011514104779e-06, + "loss": 0.9511, + "step": 568 + }, + { + "epoch": 0.009832043129665468, + "grad_norm": 2.8787560896129185, + "learning_rate": 6.551525618883132e-06, + "loss": 0.8798, + "step": 569 + }, + { + "epoch": 0.00984932264307437, + "grad_norm": 3.1125611321610394, + "learning_rate": 6.563039723661486e-06, + "loss": 0.9244, + "step": 570 + }, + { + "epoch": 0.009866602156483273, + "grad_norm": 3.37934592621041, + "learning_rate": 6.574553828439839e-06, + "loss": 1.0775, + "step": 571 + }, + { + "epoch": 0.009883881669892175, + "grad_norm": 3.660182470945455, + "learning_rate": 6.586067933218193e-06, + "loss": 1.152, + "step": 572 + }, + { + "epoch": 0.009901161183301078, + "grad_norm": 3.9123375822121904, + "learning_rate": 6.597582037996546e-06, + "loss": 0.7657, + "step": 573 + }, + { + "epoch": 0.00991844069670998, + "grad_norm": 5.153170428728001, + "learning_rate": 6.6090961427749e-06, + "loss": 1.1397, + "step": 574 + }, + { + "epoch": 0.009935720210118883, + "grad_norm": 3.164726729264875, + "learning_rate": 6.620610247553253e-06, + "loss": 0.9977, + "step": 575 + }, + { + "epoch": 0.009952999723527785, + "grad_norm": 3.6296163815228404, + "learning_rate": 6.632124352331607e-06, + "loss": 0.9542, + "step": 576 + }, + { + "epoch": 0.009970279236936688, + "grad_norm": 7.590773914370668, + "learning_rate": 6.6436384571099596e-06, + "loss": 1.1618, + "step": 577 + }, + { + "epoch": 0.00998755875034559, + "grad_norm": 3.127691543981599, + "learning_rate": 6.6551525618883135e-06, + "loss": 0.9184, + "step": 578 + }, + { + "epoch": 0.010004838263754492, + "grad_norm": 3.5970035718106814, + "learning_rate": 6.666666666666667e-06, + "loss": 1.049, + "step": 579 + }, + { + "epoch": 0.010022117777163395, + "grad_norm": 3.3780828904632525, + "learning_rate": 6.678180771445021e-06, + "loss": 0.8046, + "step": 580 + }, + { + "epoch": 0.010039397290572297, + "grad_norm": 2.7227040260015825, + "learning_rate": 6.689694876223374e-06, + "loss": 0.7253, + "step": 581 + }, + { + "epoch": 0.0100566768039812, + "grad_norm": 3.9052190918169765, + "learning_rate": 6.701208981001728e-06, + "loss": 0.858, + "step": 582 + }, + { + "epoch": 0.010073956317390102, + "grad_norm": 3.439279324011167, + "learning_rate": 6.712723085780081e-06, + "loss": 0.7248, + "step": 583 + }, + { + "epoch": 0.010091235830799005, + "grad_norm": 4.893250175192245, + "learning_rate": 6.724237190558435e-06, + "loss": 0.8448, + "step": 584 + }, + { + "epoch": 0.010108515344207907, + "grad_norm": 3.395509696222165, + "learning_rate": 6.735751295336788e-06, + "loss": 0.8595, + "step": 585 + }, + { + "epoch": 0.01012579485761681, + "grad_norm": 3.542818948169239, + "learning_rate": 6.747265400115142e-06, + "loss": 0.889, + "step": 586 + }, + { + "epoch": 0.010143074371025712, + "grad_norm": 4.919561865107683, + "learning_rate": 6.758779504893494e-06, + "loss": 1.1359, + "step": 587 + }, + { + "epoch": 0.010160353884434615, + "grad_norm": 4.547505265079543, + "learning_rate": 6.770293609671849e-06, + "loss": 1.1388, + "step": 588 + }, + { + "epoch": 0.010177633397843517, + "grad_norm": 3.521534201766448, + "learning_rate": 6.781807714450203e-06, + "loss": 0.6537, + "step": 589 + }, + { + "epoch": 0.01019491291125242, + "grad_norm": 4.305096360385645, + "learning_rate": 6.793321819228555e-06, + "loss": 1.1675, + "step": 590 + }, + { + "epoch": 0.010212192424661322, + "grad_norm": 3.5064354223298753, + "learning_rate": 6.804835924006909e-06, + "loss": 1.0655, + "step": 591 + }, + { + "epoch": 0.010229471938070224, + "grad_norm": 3.6611842147550773, + "learning_rate": 6.816350028785262e-06, + "loss": 1.1791, + "step": 592 + }, + { + "epoch": 0.010246751451479127, + "grad_norm": 3.08913903203027, + "learning_rate": 6.827864133563616e-06, + "loss": 0.9481, + "step": 593 + }, + { + "epoch": 0.01026403096488803, + "grad_norm": 3.8643278209886773, + "learning_rate": 6.839378238341969e-06, + "loss": 1.1369, + "step": 594 + }, + { + "epoch": 0.010281310478296932, + "grad_norm": 3.2596185051827797, + "learning_rate": 6.850892343120323e-06, + "loss": 0.7339, + "step": 595 + }, + { + "epoch": 0.010298589991705834, + "grad_norm": 3.9546332427049413, + "learning_rate": 6.8624064478986765e-06, + "loss": 1.1705, + "step": 596 + }, + { + "epoch": 0.010315869505114737, + "grad_norm": 3.427562899887982, + "learning_rate": 6.8739205526770304e-06, + "loss": 0.9935, + "step": 597 + }, + { + "epoch": 0.010333149018523639, + "grad_norm": 5.096579280393205, + "learning_rate": 6.8854346574553836e-06, + "loss": 1.0961, + "step": 598 + }, + { + "epoch": 0.010350428531932541, + "grad_norm": 3.943773709137912, + "learning_rate": 6.8969487622337375e-06, + "loss": 0.7231, + "step": 599 + }, + { + "epoch": 0.010367708045341444, + "grad_norm": 3.9954873392862087, + "learning_rate": 6.90846286701209e-06, + "loss": 1.127, + "step": 600 + }, + { + "epoch": 0.010384987558750346, + "grad_norm": 3.7603541659903463, + "learning_rate": 6.919976971790444e-06, + "loss": 0.8099, + "step": 601 + }, + { + "epoch": 0.010402267072159249, + "grad_norm": 4.932883052423535, + "learning_rate": 6.931491076568797e-06, + "loss": 1.1835, + "step": 602 + }, + { + "epoch": 0.01041954658556815, + "grad_norm": 3.666117065578736, + "learning_rate": 6.943005181347151e-06, + "loss": 0.9857, + "step": 603 + }, + { + "epoch": 0.010436826098977052, + "grad_norm": 4.486981657310095, + "learning_rate": 6.954519286125504e-06, + "loss": 0.9162, + "step": 604 + }, + { + "epoch": 0.010454105612385954, + "grad_norm": 3.794117451796064, + "learning_rate": 6.966033390903858e-06, + "loss": 0.6918, + "step": 605 + }, + { + "epoch": 0.010471385125794857, + "grad_norm": 3.4528967990743253, + "learning_rate": 6.977547495682211e-06, + "loss": 0.6287, + "step": 606 + }, + { + "epoch": 0.01048866463920376, + "grad_norm": 3.147175112464127, + "learning_rate": 6.989061600460565e-06, + "loss": 1.0335, + "step": 607 + }, + { + "epoch": 0.010505944152612662, + "grad_norm": 4.69813816907057, + "learning_rate": 7.000575705238918e-06, + "loss": 0.8378, + "step": 608 + }, + { + "epoch": 0.010523223666021564, + "grad_norm": 4.363371025984158, + "learning_rate": 7.012089810017272e-06, + "loss": 1.4055, + "step": 609 + }, + { + "epoch": 0.010540503179430467, + "grad_norm": 4.656268959806546, + "learning_rate": 7.023603914795624e-06, + "loss": 1.0787, + "step": 610 + }, + { + "epoch": 0.01055778269283937, + "grad_norm": 5.144657050972341, + "learning_rate": 7.035118019573978e-06, + "loss": 1.322, + "step": 611 + }, + { + "epoch": 0.010575062206248272, + "grad_norm": 3.4769636522920724, + "learning_rate": 7.0466321243523315e-06, + "loss": 1.0544, + "step": 612 + }, + { + "epoch": 0.010592341719657174, + "grad_norm": 4.116458636008791, + "learning_rate": 7.0581462291306854e-06, + "loss": 1.2162, + "step": 613 + }, + { + "epoch": 0.010609621233066076, + "grad_norm": 3.2850974822531462, + "learning_rate": 7.0696603339090386e-06, + "loss": 1.0386, + "step": 614 + }, + { + "epoch": 0.010626900746474979, + "grad_norm": 5.53484359708683, + "learning_rate": 7.0811744386873925e-06, + "loss": 1.2047, + "step": 615 + }, + { + "epoch": 0.010644180259883881, + "grad_norm": 3.705176789339446, + "learning_rate": 7.0926885434657465e-06, + "loss": 1.0907, + "step": 616 + }, + { + "epoch": 0.010661459773292784, + "grad_norm": 4.881889155506728, + "learning_rate": 7.1042026482441e-06, + "loss": 0.9659, + "step": 617 + }, + { + "epoch": 0.010678739286701686, + "grad_norm": 3.5790293492087404, + "learning_rate": 7.115716753022454e-06, + "loss": 0.6645, + "step": 618 + }, + { + "epoch": 0.010696018800110589, + "grad_norm": 4.380783444249239, + "learning_rate": 7.127230857800807e-06, + "loss": 1.3912, + "step": 619 + }, + { + "epoch": 0.010713298313519491, + "grad_norm": 4.666648509379827, + "learning_rate": 7.138744962579161e-06, + "loss": 0.9797, + "step": 620 + }, + { + "epoch": 0.010730577826928394, + "grad_norm": 3.393600747151793, + "learning_rate": 7.150259067357514e-06, + "loss": 0.8911, + "step": 621 + }, + { + "epoch": 0.010747857340337296, + "grad_norm": 2.758459915084795, + "learning_rate": 7.161773172135868e-06, + "loss": 0.8529, + "step": 622 + }, + { + "epoch": 0.010765136853746199, + "grad_norm": 3.179894433470069, + "learning_rate": 7.17328727691422e-06, + "loss": 0.9391, + "step": 623 + }, + { + "epoch": 0.010782416367155101, + "grad_norm": 3.516156495720369, + "learning_rate": 7.184801381692574e-06, + "loss": 1.1767, + "step": 624 + }, + { + "epoch": 0.010799695880564003, + "grad_norm": 3.7302033009177933, + "learning_rate": 7.196315486470927e-06, + "loss": 1.1432, + "step": 625 + }, + { + "epoch": 0.010816975393972906, + "grad_norm": 4.459400968564564, + "learning_rate": 7.207829591249281e-06, + "loss": 1.0662, + "step": 626 + }, + { + "epoch": 0.010834254907381808, + "grad_norm": 3.4392243997283862, + "learning_rate": 7.219343696027634e-06, + "loss": 1.0286, + "step": 627 + }, + { + "epoch": 0.01085153442079071, + "grad_norm": 3.719683738746272, + "learning_rate": 7.230857800805988e-06, + "loss": 1.1457, + "step": 628 + }, + { + "epoch": 0.010868813934199613, + "grad_norm": 3.1578024164607648, + "learning_rate": 7.242371905584341e-06, + "loss": 0.95, + "step": 629 + }, + { + "epoch": 0.010886093447608516, + "grad_norm": 3.033024963260319, + "learning_rate": 7.253886010362695e-06, + "loss": 1.1125, + "step": 630 + }, + { + "epoch": 0.010903372961017418, + "grad_norm": 4.180949737253565, + "learning_rate": 7.265400115141048e-06, + "loss": 0.8274, + "step": 631 + }, + { + "epoch": 0.01092065247442632, + "grad_norm": 3.184754059487252, + "learning_rate": 7.276914219919402e-06, + "loss": 0.7776, + "step": 632 + }, + { + "epoch": 0.010937931987835223, + "grad_norm": 4.010180244092177, + "learning_rate": 7.288428324697755e-06, + "loss": 1.0266, + "step": 633 + }, + { + "epoch": 0.010955211501244125, + "grad_norm": 3.7564911882002545, + "learning_rate": 7.299942429476109e-06, + "loss": 1.18, + "step": 634 + }, + { + "epoch": 0.010972491014653028, + "grad_norm": 3.9042583833239797, + "learning_rate": 7.311456534254462e-06, + "loss": 1.1797, + "step": 635 + }, + { + "epoch": 0.01098977052806193, + "grad_norm": 3.25902345904811, + "learning_rate": 7.322970639032816e-06, + "loss": 0.9691, + "step": 636 + }, + { + "epoch": 0.011007050041470833, + "grad_norm": 2.766759352951884, + "learning_rate": 7.334484743811169e-06, + "loss": 0.8363, + "step": 637 + }, + { + "epoch": 0.011024329554879735, + "grad_norm": 2.261243419698101, + "learning_rate": 7.345998848589523e-06, + "loss": 0.8194, + "step": 638 + }, + { + "epoch": 0.011041609068288638, + "grad_norm": 5.397752687228797, + "learning_rate": 7.357512953367876e-06, + "loss": 0.8934, + "step": 639 + }, + { + "epoch": 0.01105888858169754, + "grad_norm": 2.9449529013529148, + "learning_rate": 7.36902705814623e-06, + "loss": 0.9985, + "step": 640 + }, + { + "epoch": 0.011076168095106443, + "grad_norm": 2.821757779755631, + "learning_rate": 7.380541162924583e-06, + "loss": 0.934, + "step": 641 + }, + { + "epoch": 0.011093447608515343, + "grad_norm": 3.629850242010214, + "learning_rate": 7.392055267702937e-06, + "loss": 1.0834, + "step": 642 + }, + { + "epoch": 0.011110727121924246, + "grad_norm": 3.2395158170798144, + "learning_rate": 7.403569372481291e-06, + "loss": 0.7698, + "step": 643 + }, + { + "epoch": 0.011128006635333148, + "grad_norm": 3.539747400045925, + "learning_rate": 7.415083477259644e-06, + "loss": 0.9955, + "step": 644 + }, + { + "epoch": 0.01114528614874205, + "grad_norm": 3.8144171125459763, + "learning_rate": 7.426597582037998e-06, + "loss": 0.6621, + "step": 645 + }, + { + "epoch": 0.011162565662150953, + "grad_norm": 3.6507895358551545, + "learning_rate": 7.43811168681635e-06, + "loss": 1.2345, + "step": 646 + }, + { + "epoch": 0.011179845175559856, + "grad_norm": 4.116585817523, + "learning_rate": 7.449625791594704e-06, + "loss": 0.9866, + "step": 647 + }, + { + "epoch": 0.011197124688968758, + "grad_norm": 4.416615701734331, + "learning_rate": 7.461139896373057e-06, + "loss": 1.0016, + "step": 648 + }, + { + "epoch": 0.01121440420237766, + "grad_norm": 4.541074053921985, + "learning_rate": 7.472654001151411e-06, + "loss": 1.1003, + "step": 649 + }, + { + "epoch": 0.011231683715786563, + "grad_norm": 4.967387432829416, + "learning_rate": 7.4841681059297644e-06, + "loss": 0.7884, + "step": 650 + }, + { + "epoch": 0.011248963229195465, + "grad_norm": 3.141056479162246, + "learning_rate": 7.495682210708118e-06, + "loss": 0.9196, + "step": 651 + }, + { + "epoch": 0.011266242742604368, + "grad_norm": 3.0336831496110337, + "learning_rate": 7.5071963154864715e-06, + "loss": 0.9151, + "step": 652 + }, + { + "epoch": 0.01128352225601327, + "grad_norm": 4.202260205107746, + "learning_rate": 7.5187104202648255e-06, + "loss": 1.0264, + "step": 653 + }, + { + "epoch": 0.011300801769422173, + "grad_norm": 3.062082059930666, + "learning_rate": 7.530224525043179e-06, + "loss": 1.2183, + "step": 654 + }, + { + "epoch": 0.011318081282831075, + "grad_norm": 2.66550820555173, + "learning_rate": 7.541738629821533e-06, + "loss": 0.9141, + "step": 655 + }, + { + "epoch": 0.011335360796239978, + "grad_norm": 4.015070779303728, + "learning_rate": 7.553252734599885e-06, + "loss": 1.0291, + "step": 656 + }, + { + "epoch": 0.01135264030964888, + "grad_norm": 3.0681474055125384, + "learning_rate": 7.564766839378239e-06, + "loss": 1.0021, + "step": 657 + }, + { + "epoch": 0.011369919823057783, + "grad_norm": 4.2375332073708565, + "learning_rate": 7.576280944156592e-06, + "loss": 1.008, + "step": 658 + }, + { + "epoch": 0.011387199336466685, + "grad_norm": 3.935448127749744, + "learning_rate": 7.587795048934946e-06, + "loss": 0.8749, + "step": 659 + }, + { + "epoch": 0.011404478849875587, + "grad_norm": 3.2404114941415356, + "learning_rate": 7.599309153713299e-06, + "loss": 1.0722, + "step": 660 + }, + { + "epoch": 0.01142175836328449, + "grad_norm": 2.559264686440445, + "learning_rate": 7.610823258491653e-06, + "loss": 0.7966, + "step": 661 + }, + { + "epoch": 0.011439037876693392, + "grad_norm": 3.123943337033143, + "learning_rate": 7.622337363270006e-06, + "loss": 1.0402, + "step": 662 + }, + { + "epoch": 0.011456317390102295, + "grad_norm": 3.8818913290022943, + "learning_rate": 7.63385146804836e-06, + "loss": 1.1297, + "step": 663 + }, + { + "epoch": 0.011473596903511197, + "grad_norm": 3.7415936954961726, + "learning_rate": 7.645365572826712e-06, + "loss": 1.3894, + "step": 664 + }, + { + "epoch": 0.0114908764169201, + "grad_norm": 3.349610090757886, + "learning_rate": 7.656879677605067e-06, + "loss": 1.0697, + "step": 665 + }, + { + "epoch": 0.011508155930329002, + "grad_norm": 3.027966313543948, + "learning_rate": 7.66839378238342e-06, + "loss": 1.1829, + "step": 666 + }, + { + "epoch": 0.011525435443737905, + "grad_norm": 3.5654561355663428, + "learning_rate": 7.679907887161773e-06, + "loss": 1.1878, + "step": 667 + }, + { + "epoch": 0.011542714957146807, + "grad_norm": 3.2849533976885605, + "learning_rate": 7.691421991940127e-06, + "loss": 1.1813, + "step": 668 + }, + { + "epoch": 0.01155999447055571, + "grad_norm": 2.4713374436831033, + "learning_rate": 7.702936096718481e-06, + "loss": 0.8005, + "step": 669 + }, + { + "epoch": 0.011577273983964612, + "grad_norm": 2.186568593616918, + "learning_rate": 7.714450201496834e-06, + "loss": 1.2107, + "step": 670 + }, + { + "epoch": 0.011594553497373514, + "grad_norm": 3.7762487908188307, + "learning_rate": 7.725964306275188e-06, + "loss": 1.2348, + "step": 671 + }, + { + "epoch": 0.011611833010782417, + "grad_norm": 4.07841734851875, + "learning_rate": 7.737478411053542e-06, + "loss": 1.0929, + "step": 672 + }, + { + "epoch": 0.01162911252419132, + "grad_norm": 3.683801265600708, + "learning_rate": 7.748992515831894e-06, + "loss": 1.2221, + "step": 673 + }, + { + "epoch": 0.011646392037600222, + "grad_norm": 3.6165550214718993, + "learning_rate": 7.760506620610249e-06, + "loss": 0.982, + "step": 674 + }, + { + "epoch": 0.011663671551009124, + "grad_norm": 2.7123904627405735, + "learning_rate": 7.772020725388602e-06, + "loss": 0.7855, + "step": 675 + }, + { + "epoch": 0.011680951064418027, + "grad_norm": 4.383650344378771, + "learning_rate": 7.783534830166955e-06, + "loss": 1.0776, + "step": 676 + }, + { + "epoch": 0.011698230577826929, + "grad_norm": 3.88514304941641, + "learning_rate": 7.795048934945308e-06, + "loss": 0.921, + "step": 677 + }, + { + "epoch": 0.011715510091235832, + "grad_norm": 3.0020085516385353, + "learning_rate": 7.806563039723663e-06, + "loss": 0.8157, + "step": 678 + }, + { + "epoch": 0.011732789604644734, + "grad_norm": 2.206399992867007, + "learning_rate": 7.818077144502016e-06, + "loss": 0.7029, + "step": 679 + }, + { + "epoch": 0.011750069118053636, + "grad_norm": 3.509309955606118, + "learning_rate": 7.829591249280369e-06, + "loss": 0.9261, + "step": 680 + }, + { + "epoch": 0.011767348631462537, + "grad_norm": 3.6877876806667675, + "learning_rate": 7.841105354058722e-06, + "loss": 1.0106, + "step": 681 + }, + { + "epoch": 0.01178462814487144, + "grad_norm": 3.06052762767494, + "learning_rate": 7.852619458837077e-06, + "loss": 0.8324, + "step": 682 + }, + { + "epoch": 0.011801907658280342, + "grad_norm": 3.0863517803443106, + "learning_rate": 7.864133563615428e-06, + "loss": 1.0094, + "step": 683 + }, + { + "epoch": 0.011819187171689245, + "grad_norm": 3.3482801413353074, + "learning_rate": 7.875647668393783e-06, + "loss": 1.1093, + "step": 684 + }, + { + "epoch": 0.011836466685098147, + "grad_norm": 3.981385944302042, + "learning_rate": 7.887161773172136e-06, + "loss": 0.9622, + "step": 685 + }, + { + "epoch": 0.01185374619850705, + "grad_norm": 3.525474276291305, + "learning_rate": 7.89867587795049e-06, + "loss": 0.9629, + "step": 686 + }, + { + "epoch": 0.011871025711915952, + "grad_norm": 4.638286795412061, + "learning_rate": 7.910189982728843e-06, + "loss": 0.9694, + "step": 687 + }, + { + "epoch": 0.011888305225324854, + "grad_norm": 3.41930705609626, + "learning_rate": 7.921704087507197e-06, + "loss": 1.2355, + "step": 688 + }, + { + "epoch": 0.011905584738733757, + "grad_norm": 2.7499158242994635, + "learning_rate": 7.93321819228555e-06, + "loss": 0.9283, + "step": 689 + }, + { + "epoch": 0.01192286425214266, + "grad_norm": 2.682297340330258, + "learning_rate": 7.944732297063904e-06, + "loss": 0.8598, + "step": 690 + }, + { + "epoch": 0.011940143765551562, + "grad_norm": 2.577270199612974, + "learning_rate": 7.956246401842257e-06, + "loss": 0.6091, + "step": 691 + }, + { + "epoch": 0.011957423278960464, + "grad_norm": 2.6040325532612414, + "learning_rate": 7.967760506620612e-06, + "loss": 1.0041, + "step": 692 + }, + { + "epoch": 0.011974702792369367, + "grad_norm": 3.3588251618751754, + "learning_rate": 7.979274611398965e-06, + "loss": 0.936, + "step": 693 + }, + { + "epoch": 0.011991982305778269, + "grad_norm": 3.652907038441021, + "learning_rate": 7.990788716177318e-06, + "loss": 1.0139, + "step": 694 + }, + { + "epoch": 0.012009261819187171, + "grad_norm": 3.5372199953746506, + "learning_rate": 8.002302820955673e-06, + "loss": 1.2231, + "step": 695 + }, + { + "epoch": 0.012026541332596074, + "grad_norm": 3.2697591843461207, + "learning_rate": 8.013816925734024e-06, + "loss": 0.8928, + "step": 696 + }, + { + "epoch": 0.012043820846004976, + "grad_norm": 6.435161213206354, + "learning_rate": 8.025331030512379e-06, + "loss": 1.2508, + "step": 697 + }, + { + "epoch": 0.012061100359413879, + "grad_norm": 2.669110661938318, + "learning_rate": 8.036845135290732e-06, + "loss": 0.7698, + "step": 698 + }, + { + "epoch": 0.012078379872822781, + "grad_norm": 3.7278263176706234, + "learning_rate": 8.048359240069085e-06, + "loss": 0.9575, + "step": 699 + }, + { + "epoch": 0.012095659386231684, + "grad_norm": 2.8497334120921205, + "learning_rate": 8.059873344847438e-06, + "loss": 0.7641, + "step": 700 + }, + { + "epoch": 0.012112938899640586, + "grad_norm": 2.930944300190554, + "learning_rate": 8.071387449625793e-06, + "loss": 1.0145, + "step": 701 + }, + { + "epoch": 0.012130218413049489, + "grad_norm": 4.738037382446676, + "learning_rate": 8.082901554404146e-06, + "loss": 1.099, + "step": 702 + }, + { + "epoch": 0.012147497926458391, + "grad_norm": 3.1578897587838686, + "learning_rate": 8.0944156591825e-06, + "loss": 0.8681, + "step": 703 + }, + { + "epoch": 0.012164777439867294, + "grad_norm": 3.914391925904889, + "learning_rate": 8.105929763960852e-06, + "loss": 1.2433, + "step": 704 + }, + { + "epoch": 0.012182056953276196, + "grad_norm": 2.924729130468201, + "learning_rate": 8.117443868739207e-06, + "loss": 1.0607, + "step": 705 + }, + { + "epoch": 0.012199336466685098, + "grad_norm": 2.9802045579577534, + "learning_rate": 8.128957973517559e-06, + "loss": 0.6734, + "step": 706 + }, + { + "epoch": 0.012216615980094, + "grad_norm": 3.2216078593858466, + "learning_rate": 8.140472078295913e-06, + "loss": 0.5955, + "step": 707 + }, + { + "epoch": 0.012233895493502903, + "grad_norm": 2.6919433043528187, + "learning_rate": 8.151986183074267e-06, + "loss": 1.0259, + "step": 708 + }, + { + "epoch": 0.012251175006911806, + "grad_norm": 3.4497579943069927, + "learning_rate": 8.16350028785262e-06, + "loss": 1.0666, + "step": 709 + }, + { + "epoch": 0.012268454520320708, + "grad_norm": 3.2474717410623515, + "learning_rate": 8.175014392630973e-06, + "loss": 1.0995, + "step": 710 + }, + { + "epoch": 0.01228573403372961, + "grad_norm": 2.846125551109174, + "learning_rate": 8.186528497409328e-06, + "loss": 1.0917, + "step": 711 + }, + { + "epoch": 0.012303013547138513, + "grad_norm": 2.837706179830891, + "learning_rate": 8.19804260218768e-06, + "loss": 0.8557, + "step": 712 + }, + { + "epoch": 0.012320293060547416, + "grad_norm": 2.731544299063026, + "learning_rate": 8.209556706966034e-06, + "loss": 1.0668, + "step": 713 + }, + { + "epoch": 0.012337572573956318, + "grad_norm": 3.0176050699511188, + "learning_rate": 8.221070811744387e-06, + "loss": 1.0849, + "step": 714 + }, + { + "epoch": 0.01235485208736522, + "grad_norm": 2.8347014039060805, + "learning_rate": 8.232584916522742e-06, + "loss": 1.1009, + "step": 715 + }, + { + "epoch": 0.012372131600774123, + "grad_norm": 2.823156341723933, + "learning_rate": 8.244099021301093e-06, + "loss": 1.0214, + "step": 716 + }, + { + "epoch": 0.012389411114183025, + "grad_norm": 3.855121956261941, + "learning_rate": 8.255613126079448e-06, + "loss": 1.1817, + "step": 717 + }, + { + "epoch": 0.012406690627591928, + "grad_norm": 3.3208590261826094, + "learning_rate": 8.267127230857801e-06, + "loss": 0.8882, + "step": 718 + }, + { + "epoch": 0.01242397014100083, + "grad_norm": 2.918102821114281, + "learning_rate": 8.278641335636154e-06, + "loss": 0.81, + "step": 719 + }, + { + "epoch": 0.012441249654409731, + "grad_norm": 3.8591834800115015, + "learning_rate": 8.290155440414507e-06, + "loss": 1.179, + "step": 720 + }, + { + "epoch": 0.012458529167818633, + "grad_norm": 2.8377974604925735, + "learning_rate": 8.301669545192862e-06, + "loss": 1.0147, + "step": 721 + }, + { + "epoch": 0.012475808681227536, + "grad_norm": 3.5236248725117836, + "learning_rate": 8.313183649971215e-06, + "loss": 0.7495, + "step": 722 + }, + { + "epoch": 0.012493088194636438, + "grad_norm": 3.944758412464027, + "learning_rate": 8.324697754749568e-06, + "loss": 0.9848, + "step": 723 + }, + { + "epoch": 0.01251036770804534, + "grad_norm": 3.483157647609649, + "learning_rate": 8.336211859527923e-06, + "loss": 0.9557, + "step": 724 + }, + { + "epoch": 0.012527647221454243, + "grad_norm": 3.4309426354241395, + "learning_rate": 8.347725964306276e-06, + "loss": 1.0034, + "step": 725 + }, + { + "epoch": 0.012544926734863146, + "grad_norm": 4.772806793361595, + "learning_rate": 8.35924006908463e-06, + "loss": 1.1212, + "step": 726 + }, + { + "epoch": 0.012562206248272048, + "grad_norm": 2.845430019160622, + "learning_rate": 8.370754173862983e-06, + "loss": 0.845, + "step": 727 + }, + { + "epoch": 0.01257948576168095, + "grad_norm": 3.878496310731824, + "learning_rate": 8.382268278641337e-06, + "loss": 1.0848, + "step": 728 + }, + { + "epoch": 0.012596765275089853, + "grad_norm": 3.2846893384189872, + "learning_rate": 8.393782383419689e-06, + "loss": 0.9358, + "step": 729 + }, + { + "epoch": 0.012614044788498755, + "grad_norm": 4.4505986026058455, + "learning_rate": 8.405296488198044e-06, + "loss": 0.917, + "step": 730 + }, + { + "epoch": 0.012631324301907658, + "grad_norm": 3.36538755615726, + "learning_rate": 8.416810592976397e-06, + "loss": 0.8223, + "step": 731 + }, + { + "epoch": 0.01264860381531656, + "grad_norm": 3.507591527991975, + "learning_rate": 8.42832469775475e-06, + "loss": 1.0523, + "step": 732 + }, + { + "epoch": 0.012665883328725463, + "grad_norm": 2.837657983364851, + "learning_rate": 8.439838802533103e-06, + "loss": 0.8716, + "step": 733 + }, + { + "epoch": 0.012683162842134365, + "grad_norm": 2.6382075272135967, + "learning_rate": 8.451352907311458e-06, + "loss": 1.1173, + "step": 734 + }, + { + "epoch": 0.012700442355543268, + "grad_norm": 3.244612633305828, + "learning_rate": 8.462867012089811e-06, + "loss": 1.0575, + "step": 735 + }, + { + "epoch": 0.01271772186895217, + "grad_norm": 3.2300529734308894, + "learning_rate": 8.474381116868164e-06, + "loss": 0.9956, + "step": 736 + }, + { + "epoch": 0.012735001382361073, + "grad_norm": 3.705650522257141, + "learning_rate": 8.485895221646517e-06, + "loss": 0.9802, + "step": 737 + }, + { + "epoch": 0.012752280895769975, + "grad_norm": 3.203448268307144, + "learning_rate": 8.497409326424872e-06, + "loss": 1.125, + "step": 738 + }, + { + "epoch": 0.012769560409178878, + "grad_norm": 2.8656264272792438, + "learning_rate": 8.508923431203223e-06, + "loss": 0.9478, + "step": 739 + }, + { + "epoch": 0.01278683992258778, + "grad_norm": 3.6456557856131644, + "learning_rate": 8.520437535981578e-06, + "loss": 0.8902, + "step": 740 + }, + { + "epoch": 0.012804119435996682, + "grad_norm": 2.648740259142434, + "learning_rate": 8.531951640759931e-06, + "loss": 0.9388, + "step": 741 + }, + { + "epoch": 0.012821398949405585, + "grad_norm": 2.7852102247440222, + "learning_rate": 8.543465745538285e-06, + "loss": 0.9975, + "step": 742 + }, + { + "epoch": 0.012838678462814487, + "grad_norm": 2.137253333416636, + "learning_rate": 8.554979850316638e-06, + "loss": 0.7182, + "step": 743 + }, + { + "epoch": 0.01285595797622339, + "grad_norm": 3.2354126777371044, + "learning_rate": 8.566493955094992e-06, + "loss": 0.9227, + "step": 744 + }, + { + "epoch": 0.012873237489632292, + "grad_norm": 2.9240600652131525, + "learning_rate": 8.578008059873346e-06, + "loss": 1.155, + "step": 745 + }, + { + "epoch": 0.012890517003041195, + "grad_norm": 2.827344160578482, + "learning_rate": 8.589522164651699e-06, + "loss": 0.9585, + "step": 746 + }, + { + "epoch": 0.012907796516450097, + "grad_norm": 3.5131968442293573, + "learning_rate": 8.601036269430052e-06, + "loss": 0.7941, + "step": 747 + }, + { + "epoch": 0.012925076029859, + "grad_norm": 3.039537990658748, + "learning_rate": 8.612550374208407e-06, + "loss": 0.8956, + "step": 748 + }, + { + "epoch": 0.012942355543267902, + "grad_norm": 3.05588151732222, + "learning_rate": 8.62406447898676e-06, + "loss": 0.7912, + "step": 749 + }, + { + "epoch": 0.012959635056676804, + "grad_norm": 3.2544273806893065, + "learning_rate": 8.635578583765113e-06, + "loss": 0.8755, + "step": 750 + }, + { + "epoch": 0.012976914570085707, + "grad_norm": 3.555964857588178, + "learning_rate": 8.647092688543468e-06, + "loss": 0.819, + "step": 751 + }, + { + "epoch": 0.01299419408349461, + "grad_norm": 3.073334395323476, + "learning_rate": 8.658606793321819e-06, + "loss": 0.6641, + "step": 752 + }, + { + "epoch": 0.013011473596903512, + "grad_norm": 3.2477072636580973, + "learning_rate": 8.670120898100174e-06, + "loss": 1.0911, + "step": 753 + }, + { + "epoch": 0.013028753110312414, + "grad_norm": 2.8455883608098507, + "learning_rate": 8.681635002878527e-06, + "loss": 0.9094, + "step": 754 + }, + { + "epoch": 0.013046032623721317, + "grad_norm": 3.7141000614805835, + "learning_rate": 8.69314910765688e-06, + "loss": 1.0325, + "step": 755 + }, + { + "epoch": 0.01306331213713022, + "grad_norm": 3.4159001158007607, + "learning_rate": 8.704663212435233e-06, + "loss": 0.8788, + "step": 756 + }, + { + "epoch": 0.013080591650539122, + "grad_norm": 3.9451899579195113, + "learning_rate": 8.716177317213588e-06, + "loss": 1.3235, + "step": 757 + }, + { + "epoch": 0.013097871163948024, + "grad_norm": 3.1160191004249715, + "learning_rate": 8.727691421991941e-06, + "loss": 1.0242, + "step": 758 + }, + { + "epoch": 0.013115150677356925, + "grad_norm": 2.7589499382792617, + "learning_rate": 8.739205526770294e-06, + "loss": 1.2332, + "step": 759 + }, + { + "epoch": 0.013132430190765827, + "grad_norm": 5.34402351729534, + "learning_rate": 8.750719631548647e-06, + "loss": 1.011, + "step": 760 + }, + { + "epoch": 0.01314970970417473, + "grad_norm": 2.8984511521384966, + "learning_rate": 8.762233736327002e-06, + "loss": 0.8088, + "step": 761 + }, + { + "epoch": 0.013166989217583632, + "grad_norm": 3.3361646485187446, + "learning_rate": 8.773747841105354e-06, + "loss": 0.6342, + "step": 762 + }, + { + "epoch": 0.013184268730992535, + "grad_norm": 3.0761269731964282, + "learning_rate": 8.785261945883709e-06, + "loss": 1.0319, + "step": 763 + }, + { + "epoch": 0.013201548244401437, + "grad_norm": 6.417185608529989, + "learning_rate": 8.796776050662062e-06, + "loss": 0.9973, + "step": 764 + }, + { + "epoch": 0.01321882775781034, + "grad_norm": 2.531787542459668, + "learning_rate": 8.808290155440415e-06, + "loss": 0.9839, + "step": 765 + }, + { + "epoch": 0.013236107271219242, + "grad_norm": 3.348018275166507, + "learning_rate": 8.819804260218768e-06, + "loss": 0.8943, + "step": 766 + }, + { + "epoch": 0.013253386784628144, + "grad_norm": 2.9017682808896037, + "learning_rate": 8.831318364997123e-06, + "loss": 0.9822, + "step": 767 + }, + { + "epoch": 0.013270666298037047, + "grad_norm": 6.642005355722091, + "learning_rate": 8.842832469775476e-06, + "loss": 1.0111, + "step": 768 + }, + { + "epoch": 0.01328794581144595, + "grad_norm": 3.751712910425523, + "learning_rate": 8.854346574553829e-06, + "loss": 1.1395, + "step": 769 + }, + { + "epoch": 0.013305225324854852, + "grad_norm": 3.8633756272703934, + "learning_rate": 8.865860679332182e-06, + "loss": 0.8542, + "step": 770 + }, + { + "epoch": 0.013322504838263754, + "grad_norm": 3.5652477526359423, + "learning_rate": 8.877374784110537e-06, + "loss": 1.0117, + "step": 771 + }, + { + "epoch": 0.013339784351672657, + "grad_norm": 5.993988676128802, + "learning_rate": 8.888888888888888e-06, + "loss": 0.9622, + "step": 772 + }, + { + "epoch": 0.013357063865081559, + "grad_norm": 3.245928633260552, + "learning_rate": 8.900402993667243e-06, + "loss": 0.7162, + "step": 773 + }, + { + "epoch": 0.013374343378490462, + "grad_norm": 3.7513871958305085, + "learning_rate": 8.911917098445596e-06, + "loss": 0.6079, + "step": 774 + }, + { + "epoch": 0.013391622891899364, + "grad_norm": 2.643588877493462, + "learning_rate": 8.92343120322395e-06, + "loss": 1.1171, + "step": 775 + }, + { + "epoch": 0.013408902405308266, + "grad_norm": 2.872939711337992, + "learning_rate": 8.934945308002304e-06, + "loss": 0.6722, + "step": 776 + }, + { + "epoch": 0.013426181918717169, + "grad_norm": 5.384279156666957, + "learning_rate": 8.946459412780657e-06, + "loss": 0.9003, + "step": 777 + }, + { + "epoch": 0.013443461432126071, + "grad_norm": 3.6364166076976034, + "learning_rate": 8.95797351755901e-06, + "loss": 1.1133, + "step": 778 + }, + { + "epoch": 0.013460740945534974, + "grad_norm": 3.304490912253532, + "learning_rate": 8.969487622337364e-06, + "loss": 0.994, + "step": 779 + }, + { + "epoch": 0.013478020458943876, + "grad_norm": 3.1098292728841526, + "learning_rate": 8.981001727115718e-06, + "loss": 1.0129, + "step": 780 + }, + { + "epoch": 0.013495299972352779, + "grad_norm": 2.629580522307034, + "learning_rate": 8.992515831894071e-06, + "loss": 0.866, + "step": 781 + }, + { + "epoch": 0.013512579485761681, + "grad_norm": 3.67735364699367, + "learning_rate": 9.004029936672425e-06, + "loss": 0.7807, + "step": 782 + }, + { + "epoch": 0.013529858999170584, + "grad_norm": 3.2009027640093426, + "learning_rate": 9.015544041450778e-06, + "loss": 0.7901, + "step": 783 + }, + { + "epoch": 0.013547138512579486, + "grad_norm": 3.2435987446960945, + "learning_rate": 9.027058146229133e-06, + "loss": 1.1609, + "step": 784 + }, + { + "epoch": 0.013564418025988388, + "grad_norm": 3.386896780913511, + "learning_rate": 9.038572251007484e-06, + "loss": 0.953, + "step": 785 + }, + { + "epoch": 0.013581697539397291, + "grad_norm": 3.3195196851530233, + "learning_rate": 9.050086355785839e-06, + "loss": 1.074, + "step": 786 + }, + { + "epoch": 0.013598977052806193, + "grad_norm": 3.069391945533621, + "learning_rate": 9.061600460564192e-06, + "loss": 0.7432, + "step": 787 + }, + { + "epoch": 0.013616256566215096, + "grad_norm": 2.9969863683898272, + "learning_rate": 9.073114565342545e-06, + "loss": 1.1285, + "step": 788 + }, + { + "epoch": 0.013633536079623998, + "grad_norm": 3.7002901228564067, + "learning_rate": 9.084628670120898e-06, + "loss": 0.9927, + "step": 789 + }, + { + "epoch": 0.0136508155930329, + "grad_norm": 2.6645796220677833, + "learning_rate": 9.096142774899253e-06, + "loss": 0.9799, + "step": 790 + }, + { + "epoch": 0.013668095106441803, + "grad_norm": 2.4189956623083013, + "learning_rate": 9.107656879677606e-06, + "loss": 0.8911, + "step": 791 + }, + { + "epoch": 0.013685374619850706, + "grad_norm": 2.6687167431826415, + "learning_rate": 9.11917098445596e-06, + "loss": 0.8405, + "step": 792 + }, + { + "epoch": 0.013702654133259608, + "grad_norm": 2.7995158045708264, + "learning_rate": 9.130685089234312e-06, + "loss": 0.8563, + "step": 793 + }, + { + "epoch": 0.01371993364666851, + "grad_norm": 3.3125604020461106, + "learning_rate": 9.142199194012667e-06, + "loss": 1.0446, + "step": 794 + }, + { + "epoch": 0.013737213160077413, + "grad_norm": 2.850171933625257, + "learning_rate": 9.153713298791019e-06, + "loss": 1.0499, + "step": 795 + }, + { + "epoch": 0.013754492673486315, + "grad_norm": 3.363781309789217, + "learning_rate": 9.165227403569373e-06, + "loss": 1.18, + "step": 796 + }, + { + "epoch": 0.013771772186895218, + "grad_norm": 3.3606748790904857, + "learning_rate": 9.176741508347726e-06, + "loss": 0.7302, + "step": 797 + }, + { + "epoch": 0.013789051700304119, + "grad_norm": 3.4211749598430834, + "learning_rate": 9.18825561312608e-06, + "loss": 0.8791, + "step": 798 + }, + { + "epoch": 0.013806331213713021, + "grad_norm": 4.352403973891056, + "learning_rate": 9.199769717904433e-06, + "loss": 1.092, + "step": 799 + }, + { + "epoch": 0.013823610727121923, + "grad_norm": 2.9039916802229264, + "learning_rate": 9.211283822682788e-06, + "loss": 0.8629, + "step": 800 + }, + { + "epoch": 0.013840890240530826, + "grad_norm": 2.64956886649566, + "learning_rate": 9.22279792746114e-06, + "loss": 0.9553, + "step": 801 + }, + { + "epoch": 0.013858169753939728, + "grad_norm": 3.542618149714872, + "learning_rate": 9.234312032239494e-06, + "loss": 1.1167, + "step": 802 + }, + { + "epoch": 0.01387544926734863, + "grad_norm": 3.020433403948087, + "learning_rate": 9.245826137017849e-06, + "loss": 0.8943, + "step": 803 + }, + { + "epoch": 0.013892728780757533, + "grad_norm": 3.2265773474367307, + "learning_rate": 9.257340241796202e-06, + "loss": 1.0344, + "step": 804 + }, + { + "epoch": 0.013910008294166436, + "grad_norm": 2.1690629491631266, + "learning_rate": 9.268854346574555e-06, + "loss": 0.7637, + "step": 805 + }, + { + "epoch": 0.013927287807575338, + "grad_norm": 2.5027306417580135, + "learning_rate": 9.280368451352908e-06, + "loss": 0.9125, + "step": 806 + }, + { + "epoch": 0.01394456732098424, + "grad_norm": 2.59643744642911, + "learning_rate": 9.291882556131263e-06, + "loss": 1.0279, + "step": 807 + }, + { + "epoch": 0.013961846834393143, + "grad_norm": 2.1482128289555256, + "learning_rate": 9.303396660909614e-06, + "loss": 0.8774, + "step": 808 + }, + { + "epoch": 0.013979126347802046, + "grad_norm": 2.9218151036810283, + "learning_rate": 9.314910765687969e-06, + "loss": 0.626, + "step": 809 + }, + { + "epoch": 0.013996405861210948, + "grad_norm": 2.952491881030406, + "learning_rate": 9.326424870466322e-06, + "loss": 0.9327, + "step": 810 + }, + { + "epoch": 0.01401368537461985, + "grad_norm": 3.2498718104488944, + "learning_rate": 9.337938975244675e-06, + "loss": 1.0542, + "step": 811 + }, + { + "epoch": 0.014030964888028753, + "grad_norm": 2.1830433555949993, + "learning_rate": 9.349453080023028e-06, + "loss": 0.6191, + "step": 812 + }, + { + "epoch": 0.014048244401437655, + "grad_norm": 3.059562352169935, + "learning_rate": 9.360967184801383e-06, + "loss": 0.7211, + "step": 813 + }, + { + "epoch": 0.014065523914846558, + "grad_norm": 3.9304481889948155, + "learning_rate": 9.372481289579736e-06, + "loss": 0.8039, + "step": 814 + }, + { + "epoch": 0.01408280342825546, + "grad_norm": 2.458554417955451, + "learning_rate": 9.38399539435809e-06, + "loss": 0.8735, + "step": 815 + }, + { + "epoch": 0.014100082941664363, + "grad_norm": 2.3230408490220547, + "learning_rate": 9.395509499136443e-06, + "loss": 0.7985, + "step": 816 + }, + { + "epoch": 0.014117362455073265, + "grad_norm": 3.585087391489545, + "learning_rate": 9.407023603914797e-06, + "loss": 1.035, + "step": 817 + }, + { + "epoch": 0.014134641968482168, + "grad_norm": 2.93246591843932, + "learning_rate": 9.418537708693149e-06, + "loss": 0.9156, + "step": 818 + }, + { + "epoch": 0.01415192148189107, + "grad_norm": 3.769699840029188, + "learning_rate": 9.430051813471504e-06, + "loss": 0.9665, + "step": 819 + }, + { + "epoch": 0.014169200995299972, + "grad_norm": 3.419676388875079, + "learning_rate": 9.441565918249857e-06, + "loss": 1.0406, + "step": 820 + }, + { + "epoch": 0.014186480508708875, + "grad_norm": 3.2119242158735277, + "learning_rate": 9.45308002302821e-06, + "loss": 1.0038, + "step": 821 + }, + { + "epoch": 0.014203760022117777, + "grad_norm": 4.0763780863273364, + "learning_rate": 9.464594127806563e-06, + "loss": 1.1508, + "step": 822 + }, + { + "epoch": 0.01422103953552668, + "grad_norm": 3.2004171395356766, + "learning_rate": 9.476108232584918e-06, + "loss": 0.9708, + "step": 823 + }, + { + "epoch": 0.014238319048935582, + "grad_norm": 3.3282589190321277, + "learning_rate": 9.487622337363271e-06, + "loss": 0.8727, + "step": 824 + }, + { + "epoch": 0.014255598562344485, + "grad_norm": 3.274883551374176, + "learning_rate": 9.499136442141624e-06, + "loss": 0.8731, + "step": 825 + }, + { + "epoch": 0.014272878075753387, + "grad_norm": 2.4848746957757295, + "learning_rate": 9.510650546919977e-06, + "loss": 0.8638, + "step": 826 + }, + { + "epoch": 0.01429015758916229, + "grad_norm": 2.939815143752206, + "learning_rate": 9.522164651698332e-06, + "loss": 0.681, + "step": 827 + }, + { + "epoch": 0.014307437102571192, + "grad_norm": 3.2183240050075987, + "learning_rate": 9.533678756476683e-06, + "loss": 1.0226, + "step": 828 + }, + { + "epoch": 0.014324716615980095, + "grad_norm": 2.9155944778889973, + "learning_rate": 9.545192861255038e-06, + "loss": 1.1126, + "step": 829 + }, + { + "epoch": 0.014341996129388997, + "grad_norm": 6.952350952183329, + "learning_rate": 9.556706966033393e-06, + "loss": 1.0858, + "step": 830 + }, + { + "epoch": 0.0143592756427979, + "grad_norm": 3.298378890119501, + "learning_rate": 9.568221070811744e-06, + "loss": 0.9134, + "step": 831 + }, + { + "epoch": 0.014376555156206802, + "grad_norm": 2.5616129196486526, + "learning_rate": 9.5797351755901e-06, + "loss": 1.1135, + "step": 832 + }, + { + "epoch": 0.014393834669615704, + "grad_norm": 3.011556015950308, + "learning_rate": 9.591249280368452e-06, + "loss": 1.1259, + "step": 833 + }, + { + "epoch": 0.014411114183024607, + "grad_norm": 5.210456921138002, + "learning_rate": 9.602763385146805e-06, + "loss": 0.8448, + "step": 834 + }, + { + "epoch": 0.01442839369643351, + "grad_norm": 2.530199866680799, + "learning_rate": 9.614277489925159e-06, + "loss": 1.2417, + "step": 835 + }, + { + "epoch": 0.014445673209842412, + "grad_norm": 3.9942065281266195, + "learning_rate": 9.625791594703513e-06, + "loss": 0.7559, + "step": 836 + }, + { + "epoch": 0.014462952723251312, + "grad_norm": 3.1233573161647965, + "learning_rate": 9.637305699481867e-06, + "loss": 1.0749, + "step": 837 + }, + { + "epoch": 0.014480232236660215, + "grad_norm": 2.286912552486194, + "learning_rate": 9.64881980426022e-06, + "loss": 0.7282, + "step": 838 + }, + { + "epoch": 0.014497511750069117, + "grad_norm": 3.0252369509825243, + "learning_rate": 9.660333909038573e-06, + "loss": 0.7638, + "step": 839 + }, + { + "epoch": 0.01451479126347802, + "grad_norm": 2.9596946904173254, + "learning_rate": 9.671848013816928e-06, + "loss": 0.8345, + "step": 840 + }, + { + "epoch": 0.014532070776886922, + "grad_norm": 3.8520252107753397, + "learning_rate": 9.683362118595279e-06, + "loss": 1.152, + "step": 841 + }, + { + "epoch": 0.014549350290295825, + "grad_norm": 3.7059943211297317, + "learning_rate": 9.694876223373634e-06, + "loss": 1.1182, + "step": 842 + }, + { + "epoch": 0.014566629803704727, + "grad_norm": 3.609443161138643, + "learning_rate": 9.706390328151987e-06, + "loss": 1.171, + "step": 843 + }, + { + "epoch": 0.01458390931711363, + "grad_norm": 3.6795511977654707, + "learning_rate": 9.71790443293034e-06, + "loss": 0.9999, + "step": 844 + }, + { + "epoch": 0.014601188830522532, + "grad_norm": 2.9687390153855757, + "learning_rate": 9.729418537708693e-06, + "loss": 0.8192, + "step": 845 + }, + { + "epoch": 0.014618468343931434, + "grad_norm": 2.035396119424099, + "learning_rate": 9.740932642487048e-06, + "loss": 0.7387, + "step": 846 + }, + { + "epoch": 0.014635747857340337, + "grad_norm": 2.7872400780220916, + "learning_rate": 9.752446747265401e-06, + "loss": 0.8834, + "step": 847 + }, + { + "epoch": 0.01465302737074924, + "grad_norm": 4.034162033401408, + "learning_rate": 9.763960852043754e-06, + "loss": 0.853, + "step": 848 + }, + { + "epoch": 0.014670306884158142, + "grad_norm": 3.2624723057255127, + "learning_rate": 9.775474956822107e-06, + "loss": 0.9325, + "step": 849 + }, + { + "epoch": 0.014687586397567044, + "grad_norm": 9.45631064595327, + "learning_rate": 9.786989061600462e-06, + "loss": 0.9238, + "step": 850 + }, + { + "epoch": 0.014704865910975947, + "grad_norm": 2.491426226549233, + "learning_rate": 9.798503166378814e-06, + "loss": 0.7988, + "step": 851 + }, + { + "epoch": 0.01472214542438485, + "grad_norm": 3.518700895270424, + "learning_rate": 9.810017271157168e-06, + "loss": 1.0185, + "step": 852 + }, + { + "epoch": 0.014739424937793752, + "grad_norm": 3.1248294178857994, + "learning_rate": 9.821531375935522e-06, + "loss": 1.2166, + "step": 853 + }, + { + "epoch": 0.014756704451202654, + "grad_norm": 3.468437614001757, + "learning_rate": 9.833045480713875e-06, + "loss": 0.9026, + "step": 854 + }, + { + "epoch": 0.014773983964611557, + "grad_norm": 2.925515187685323, + "learning_rate": 9.844559585492228e-06, + "loss": 1.1142, + "step": 855 + }, + { + "epoch": 0.014791263478020459, + "grad_norm": 3.3581322724260456, + "learning_rate": 9.856073690270583e-06, + "loss": 0.9614, + "step": 856 + }, + { + "epoch": 0.014808542991429361, + "grad_norm": 3.1023111070185063, + "learning_rate": 9.867587795048936e-06, + "loss": 1.0334, + "step": 857 + }, + { + "epoch": 0.014825822504838264, + "grad_norm": 3.3378865784002514, + "learning_rate": 9.879101899827289e-06, + "loss": 0.9401, + "step": 858 + }, + { + "epoch": 0.014843102018247166, + "grad_norm": 2.989949250373051, + "learning_rate": 9.890616004605644e-06, + "loss": 1.0708, + "step": 859 + }, + { + "epoch": 0.014860381531656069, + "grad_norm": 2.45877503319339, + "learning_rate": 9.902130109383997e-06, + "loss": 1.064, + "step": 860 + }, + { + "epoch": 0.014877661045064971, + "grad_norm": 3.090807922075019, + "learning_rate": 9.91364421416235e-06, + "loss": 0.9623, + "step": 861 + }, + { + "epoch": 0.014894940558473874, + "grad_norm": 2.72357924012842, + "learning_rate": 9.925158318940703e-06, + "loss": 0.9242, + "step": 862 + }, + { + "epoch": 0.014912220071882776, + "grad_norm": 2.8107763298836255, + "learning_rate": 9.936672423719058e-06, + "loss": 0.9647, + "step": 863 + }, + { + "epoch": 0.014929499585291679, + "grad_norm": 3.47469829755691, + "learning_rate": 9.94818652849741e-06, + "loss": 0.8167, + "step": 864 + }, + { + "epoch": 0.014946779098700581, + "grad_norm": 2.4952904239748457, + "learning_rate": 9.959700633275764e-06, + "loss": 1.1188, + "step": 865 + }, + { + "epoch": 0.014964058612109483, + "grad_norm": 3.4059274818807723, + "learning_rate": 9.971214738054117e-06, + "loss": 1.0629, + "step": 866 + }, + { + "epoch": 0.014981338125518386, + "grad_norm": 2.3776186834233104, + "learning_rate": 9.98272884283247e-06, + "loss": 0.8008, + "step": 867 + }, + { + "epoch": 0.014998617638927288, + "grad_norm": 3.119308162395499, + "learning_rate": 9.994242947610823e-06, + "loss": 0.9676, + "step": 868 + }, + { + "epoch": 0.01501589715233619, + "grad_norm": 3.121687047095498, + "learning_rate": 1.0005757052389178e-05, + "loss": 1.15, + "step": 869 + }, + { + "epoch": 0.015033176665745093, + "grad_norm": 3.1201579554083647, + "learning_rate": 1.0017271157167531e-05, + "loss": 0.8185, + "step": 870 + }, + { + "epoch": 0.015050456179153996, + "grad_norm": 7.530475625816125, + "learning_rate": 1.0028785261945884e-05, + "loss": 0.9416, + "step": 871 + }, + { + "epoch": 0.015067735692562898, + "grad_norm": 2.472260799254331, + "learning_rate": 1.0040299366724238e-05, + "loss": 1.236, + "step": 872 + }, + { + "epoch": 0.0150850152059718, + "grad_norm": 2.7957727248685353, + "learning_rate": 1.0051813471502592e-05, + "loss": 0.8506, + "step": 873 + }, + { + "epoch": 0.015102294719380703, + "grad_norm": 2.622560767930549, + "learning_rate": 1.0063327576280946e-05, + "loss": 0.8148, + "step": 874 + }, + { + "epoch": 0.015119574232789606, + "grad_norm": 1.7427566791970386, + "learning_rate": 1.0074841681059297e-05, + "loss": 0.9761, + "step": 875 + }, + { + "epoch": 0.015136853746198506, + "grad_norm": 3.852856402361206, + "learning_rate": 1.0086355785837652e-05, + "loss": 0.8822, + "step": 876 + }, + { + "epoch": 0.015154133259607409, + "grad_norm": 3.494896206879191, + "learning_rate": 1.0097869890616005e-05, + "loss": 0.9601, + "step": 877 + }, + { + "epoch": 0.015171412773016311, + "grad_norm": 2.94348526697225, + "learning_rate": 1.010938399539436e-05, + "loss": 0.9237, + "step": 878 + }, + { + "epoch": 0.015188692286425214, + "grad_norm": 3.081555575735631, + "learning_rate": 1.0120898100172711e-05, + "loss": 1.0591, + "step": 879 + }, + { + "epoch": 0.015205971799834116, + "grad_norm": 2.4113670546108694, + "learning_rate": 1.0132412204951066e-05, + "loss": 0.6346, + "step": 880 + }, + { + "epoch": 0.015223251313243018, + "grad_norm": 2.292112913696939, + "learning_rate": 1.0143926309729419e-05, + "loss": 1.2266, + "step": 881 + }, + { + "epoch": 0.015240530826651921, + "grad_norm": 2.520179679793527, + "learning_rate": 1.0155440414507774e-05, + "loss": 1.156, + "step": 882 + }, + { + "epoch": 0.015257810340060823, + "grad_norm": 2.530994445921907, + "learning_rate": 1.0166954519286127e-05, + "loss": 0.8439, + "step": 883 + }, + { + "epoch": 0.015275089853469726, + "grad_norm": 3.2228124800730127, + "learning_rate": 1.0178468624064478e-05, + "loss": 0.9196, + "step": 884 + }, + { + "epoch": 0.015292369366878628, + "grad_norm": 2.7507409400806773, + "learning_rate": 1.0189982728842833e-05, + "loss": 0.8306, + "step": 885 + }, + { + "epoch": 0.01530964888028753, + "grad_norm": 2.736202225058832, + "learning_rate": 1.0201496833621188e-05, + "loss": 0.9313, + "step": 886 + }, + { + "epoch": 0.015326928393696433, + "grad_norm": 3.5557665242568763, + "learning_rate": 1.0213010938399541e-05, + "loss": 0.8976, + "step": 887 + }, + { + "epoch": 0.015344207907105336, + "grad_norm": 2.258325796744729, + "learning_rate": 1.0224525043177893e-05, + "loss": 0.8038, + "step": 888 + }, + { + "epoch": 0.015361487420514238, + "grad_norm": 2.6209869068889082, + "learning_rate": 1.0236039147956247e-05, + "loss": 0.7612, + "step": 889 + }, + { + "epoch": 0.01537876693392314, + "grad_norm": 2.6179466863707543, + "learning_rate": 1.02475532527346e-05, + "loss": 0.7313, + "step": 890 + }, + { + "epoch": 0.015396046447332043, + "grad_norm": 2.0580180473622147, + "learning_rate": 1.0259067357512955e-05, + "loss": 0.7859, + "step": 891 + }, + { + "epoch": 0.015413325960740945, + "grad_norm": 2.4599004678406784, + "learning_rate": 1.0270581462291307e-05, + "loss": 1.1264, + "step": 892 + }, + { + "epoch": 0.015430605474149848, + "grad_norm": 2.013757632643972, + "learning_rate": 1.0282095567069662e-05, + "loss": 0.9819, + "step": 893 + }, + { + "epoch": 0.01544788498755875, + "grad_norm": 3.6804590520998413, + "learning_rate": 1.0293609671848015e-05, + "loss": 0.8961, + "step": 894 + }, + { + "epoch": 0.015465164500967653, + "grad_norm": 2.968131933076805, + "learning_rate": 1.030512377662637e-05, + "loss": 0.8128, + "step": 895 + }, + { + "epoch": 0.015482444014376555, + "grad_norm": 9.647150153354644, + "learning_rate": 1.0316637881404721e-05, + "loss": 0.875, + "step": 896 + }, + { + "epoch": 0.015499723527785458, + "grad_norm": 2.857170412673167, + "learning_rate": 1.0328151986183074e-05, + "loss": 0.9623, + "step": 897 + }, + { + "epoch": 0.01551700304119436, + "grad_norm": 2.6265506072095666, + "learning_rate": 1.0339666090961429e-05, + "loss": 0.7783, + "step": 898 + }, + { + "epoch": 0.015534282554603263, + "grad_norm": 3.231825002622925, + "learning_rate": 1.0351180195739782e-05, + "loss": 0.9324, + "step": 899 + }, + { + "epoch": 0.015551562068012165, + "grad_norm": 3.488253323198452, + "learning_rate": 1.0362694300518135e-05, + "loss": 0.8742, + "step": 900 + }, + { + "epoch": 0.015568841581421067, + "grad_norm": 3.369487802888854, + "learning_rate": 1.0374208405296488e-05, + "loss": 1.3119, + "step": 901 + }, + { + "epoch": 0.01558612109482997, + "grad_norm": 2.8308575656783552, + "learning_rate": 1.0385722510074843e-05, + "loss": 0.9368, + "step": 902 + }, + { + "epoch": 0.015603400608238872, + "grad_norm": 2.9980340059184534, + "learning_rate": 1.0397236614853196e-05, + "loss": 0.7121, + "step": 903 + }, + { + "epoch": 0.015620680121647775, + "grad_norm": 2.6817477739693176, + "learning_rate": 1.040875071963155e-05, + "loss": 0.9591, + "step": 904 + }, + { + "epoch": 0.015637959635056677, + "grad_norm": 2.5006945605291118, + "learning_rate": 1.0420264824409902e-05, + "loss": 1.0849, + "step": 905 + }, + { + "epoch": 0.015655239148465578, + "grad_norm": 3.1907066651625415, + "learning_rate": 1.0431778929188257e-05, + "loss": 1.0471, + "step": 906 + }, + { + "epoch": 0.015672518661874482, + "grad_norm": 2.5933913660062506, + "learning_rate": 1.044329303396661e-05, + "loss": 0.7185, + "step": 907 + }, + { + "epoch": 0.015689798175283383, + "grad_norm": 3.2549826768388237, + "learning_rate": 1.0454807138744962e-05, + "loss": 1.0908, + "step": 908 + }, + { + "epoch": 0.015707077688692287, + "grad_norm": 2.261707474880414, + "learning_rate": 1.0466321243523317e-05, + "loss": 0.728, + "step": 909 + }, + { + "epoch": 0.015724357202101188, + "grad_norm": 2.170944357193025, + "learning_rate": 1.047783534830167e-05, + "loss": 0.7029, + "step": 910 + }, + { + "epoch": 0.015741636715510092, + "grad_norm": 2.247235175422112, + "learning_rate": 1.0489349453080025e-05, + "loss": 0.873, + "step": 911 + }, + { + "epoch": 0.015758916228918993, + "grad_norm": 3.289561299970894, + "learning_rate": 1.0500863557858378e-05, + "loss": 0.7405, + "step": 912 + }, + { + "epoch": 0.015776195742327897, + "grad_norm": 2.1966173590101405, + "learning_rate": 1.051237766263673e-05, + "loss": 0.7814, + "step": 913 + }, + { + "epoch": 0.015793475255736798, + "grad_norm": 3.447563660877774, + "learning_rate": 1.0523891767415084e-05, + "loss": 0.8503, + "step": 914 + }, + { + "epoch": 0.015810754769145702, + "grad_norm": 3.0116745439068966, + "learning_rate": 1.0535405872193439e-05, + "loss": 0.5645, + "step": 915 + }, + { + "epoch": 0.015828034282554602, + "grad_norm": 2.300251265306448, + "learning_rate": 1.0546919976971792e-05, + "loss": 0.7082, + "step": 916 + }, + { + "epoch": 0.015845313795963507, + "grad_norm": 2.569265358378446, + "learning_rate": 1.0558434081750143e-05, + "loss": 0.7681, + "step": 917 + }, + { + "epoch": 0.015862593309372407, + "grad_norm": 6.359551593363521, + "learning_rate": 1.0569948186528498e-05, + "loss": 0.9982, + "step": 918 + }, + { + "epoch": 0.01587987282278131, + "grad_norm": 5.344112917961519, + "learning_rate": 1.0581462291306853e-05, + "loss": 0.6541, + "step": 919 + }, + { + "epoch": 0.015897152336190212, + "grad_norm": 3.033918000318939, + "learning_rate": 1.0592976396085206e-05, + "loss": 0.8596, + "step": 920 + }, + { + "epoch": 0.015914431849599116, + "grad_norm": 2.6628088195158086, + "learning_rate": 1.0604490500863557e-05, + "loss": 0.8048, + "step": 921 + }, + { + "epoch": 0.015931711363008017, + "grad_norm": 3.409961707598373, + "learning_rate": 1.0616004605641912e-05, + "loss": 0.9537, + "step": 922 + }, + { + "epoch": 0.01594899087641692, + "grad_norm": 2.9232715237436544, + "learning_rate": 1.0627518710420265e-05, + "loss": 0.9942, + "step": 923 + }, + { + "epoch": 0.015966270389825822, + "grad_norm": 2.557997045863527, + "learning_rate": 1.063903281519862e-05, + "loss": 1.1269, + "step": 924 + }, + { + "epoch": 0.015983549903234726, + "grad_norm": 2.5874114892181423, + "learning_rate": 1.0650546919976972e-05, + "loss": 1.1684, + "step": 925 + }, + { + "epoch": 0.016000829416643627, + "grad_norm": 2.649169130503668, + "learning_rate": 1.0662061024755326e-05, + "loss": 0.9241, + "step": 926 + }, + { + "epoch": 0.01601810893005253, + "grad_norm": 3.0760720892790476, + "learning_rate": 1.067357512953368e-05, + "loss": 0.8802, + "step": 927 + }, + { + "epoch": 0.016035388443461432, + "grad_norm": 2.298485929815039, + "learning_rate": 1.0685089234312034e-05, + "loss": 0.5671, + "step": 928 + }, + { + "epoch": 0.016052667956870336, + "grad_norm": 2.4837145148071196, + "learning_rate": 1.0696603339090386e-05, + "loss": 1.0617, + "step": 929 + }, + { + "epoch": 0.016069947470279237, + "grad_norm": 2.5430562554466154, + "learning_rate": 1.0708117443868739e-05, + "loss": 0.9422, + "step": 930 + }, + { + "epoch": 0.01608722698368814, + "grad_norm": 2.5981955628422275, + "learning_rate": 1.0719631548647094e-05, + "loss": 1.0078, + "step": 931 + }, + { + "epoch": 0.01610450649709704, + "grad_norm": 3.2749138510095808, + "learning_rate": 1.0731145653425449e-05, + "loss": 1.042, + "step": 932 + }, + { + "epoch": 0.016121786010505946, + "grad_norm": 3.0020352756494377, + "learning_rate": 1.07426597582038e-05, + "loss": 0.9719, + "step": 933 + }, + { + "epoch": 0.016139065523914847, + "grad_norm": 3.6055975334737917, + "learning_rate": 1.0754173862982153e-05, + "loss": 0.9146, + "step": 934 + }, + { + "epoch": 0.016156345037323747, + "grad_norm": 2.861089720670711, + "learning_rate": 1.0765687967760508e-05, + "loss": 1.1203, + "step": 935 + }, + { + "epoch": 0.01617362455073265, + "grad_norm": 2.969595474955542, + "learning_rate": 1.0777202072538861e-05, + "loss": 1.1056, + "step": 936 + }, + { + "epoch": 0.016190904064141552, + "grad_norm": 4.045567955592381, + "learning_rate": 1.0788716177317216e-05, + "loss": 1.0668, + "step": 937 + }, + { + "epoch": 0.016208183577550456, + "grad_norm": 2.6195378531450353, + "learning_rate": 1.0800230282095567e-05, + "loss": 0.9551, + "step": 938 + }, + { + "epoch": 0.016225463090959357, + "grad_norm": 2.4155807917605547, + "learning_rate": 1.0811744386873922e-05, + "loss": 1.0106, + "step": 939 + }, + { + "epoch": 0.01624274260436826, + "grad_norm": 2.8292185551471336, + "learning_rate": 1.0823258491652275e-05, + "loss": 0.8037, + "step": 940 + }, + { + "epoch": 0.016260022117777162, + "grad_norm": 2.5110080764191465, + "learning_rate": 1.083477259643063e-05, + "loss": 1.0483, + "step": 941 + }, + { + "epoch": 0.016277301631186066, + "grad_norm": 2.6934611047030907, + "learning_rate": 1.0846286701208981e-05, + "loss": 1.0642, + "step": 942 + }, + { + "epoch": 0.016294581144594967, + "grad_norm": 3.0664397077179513, + "learning_rate": 1.0857800805987335e-05, + "loss": 1.015, + "step": 943 + }, + { + "epoch": 0.01631186065800387, + "grad_norm": 3.387624007498245, + "learning_rate": 1.086931491076569e-05, + "loss": 1.2661, + "step": 944 + }, + { + "epoch": 0.016329140171412772, + "grad_norm": 2.214242618072722, + "learning_rate": 1.0880829015544042e-05, + "loss": 0.9816, + "step": 945 + }, + { + "epoch": 0.016346419684821676, + "grad_norm": 2.3036631057600525, + "learning_rate": 1.0892343120322396e-05, + "loss": 0.8559, + "step": 946 + }, + { + "epoch": 0.016363699198230577, + "grad_norm": 2.6152487804465134, + "learning_rate": 1.0903857225100749e-05, + "loss": 0.88, + "step": 947 + }, + { + "epoch": 0.01638097871163948, + "grad_norm": 3.0080570857116546, + "learning_rate": 1.0915371329879104e-05, + "loss": 0.9173, + "step": 948 + }, + { + "epoch": 0.01639825822504838, + "grad_norm": 2.5135563708700834, + "learning_rate": 1.0926885434657457e-05, + "loss": 0.8016, + "step": 949 + }, + { + "epoch": 0.016415537738457286, + "grad_norm": 3.0844143109868907, + "learning_rate": 1.0938399539435808e-05, + "loss": 0.9681, + "step": 950 + }, + { + "epoch": 0.016432817251866186, + "grad_norm": 2.8494746714113597, + "learning_rate": 1.0949913644214163e-05, + "loss": 1.0643, + "step": 951 + }, + { + "epoch": 0.01645009676527509, + "grad_norm": 2.887609802562355, + "learning_rate": 1.0961427748992518e-05, + "loss": 0.7475, + "step": 952 + }, + { + "epoch": 0.01646737627868399, + "grad_norm": 1.8379658960566136, + "learning_rate": 1.097294185377087e-05, + "loss": 0.6677, + "step": 953 + }, + { + "epoch": 0.016484655792092896, + "grad_norm": 2.2879708546321216, + "learning_rate": 1.0984455958549222e-05, + "loss": 0.7913, + "step": 954 + }, + { + "epoch": 0.016501935305501796, + "grad_norm": 2.046087450272076, + "learning_rate": 1.0995970063327577e-05, + "loss": 0.7739, + "step": 955 + }, + { + "epoch": 0.0165192148189107, + "grad_norm": 2.5016068127944715, + "learning_rate": 1.100748416810593e-05, + "loss": 1.1657, + "step": 956 + }, + { + "epoch": 0.0165364943323196, + "grad_norm": 1.5428873788947486, + "learning_rate": 1.1018998272884285e-05, + "loss": 0.7751, + "step": 957 + }, + { + "epoch": 0.016553773845728505, + "grad_norm": 3.4063841793803102, + "learning_rate": 1.1030512377662636e-05, + "loss": 0.9078, + "step": 958 + }, + { + "epoch": 0.016571053359137406, + "grad_norm": 2.6522236928135916, + "learning_rate": 1.1042026482440991e-05, + "loss": 0.8331, + "step": 959 + }, + { + "epoch": 0.01658833287254631, + "grad_norm": 3.148919631816275, + "learning_rate": 1.1053540587219344e-05, + "loss": 1.0868, + "step": 960 + }, + { + "epoch": 0.01660561238595521, + "grad_norm": 2.766857148529857, + "learning_rate": 1.10650546919977e-05, + "loss": 1.0219, + "step": 961 + }, + { + "epoch": 0.016622891899364115, + "grad_norm": 2.653819104592175, + "learning_rate": 1.107656879677605e-05, + "loss": 0.8682, + "step": 962 + }, + { + "epoch": 0.016640171412773016, + "grad_norm": 3.678119112597821, + "learning_rate": 1.1088082901554404e-05, + "loss": 1.1367, + "step": 963 + }, + { + "epoch": 0.01665745092618192, + "grad_norm": 2.625177124276577, + "learning_rate": 1.1099597006332759e-05, + "loss": 0.7219, + "step": 964 + }, + { + "epoch": 0.01667473043959082, + "grad_norm": 1.8394699409251474, + "learning_rate": 1.1111111111111113e-05, + "loss": 1.003, + "step": 965 + }, + { + "epoch": 0.016692009952999725, + "grad_norm": 3.1420513331925446, + "learning_rate": 1.1122625215889466e-05, + "loss": 0.9029, + "step": 966 + }, + { + "epoch": 0.016709289466408626, + "grad_norm": 3.026562457631666, + "learning_rate": 1.1134139320667818e-05, + "loss": 0.9018, + "step": 967 + }, + { + "epoch": 0.01672656897981753, + "grad_norm": 2.105876564478302, + "learning_rate": 1.1145653425446173e-05, + "loss": 0.9977, + "step": 968 + }, + { + "epoch": 0.01674384849322643, + "grad_norm": 2.936577679049743, + "learning_rate": 1.1157167530224526e-05, + "loss": 0.892, + "step": 969 + }, + { + "epoch": 0.016761128006635335, + "grad_norm": 2.805570143240493, + "learning_rate": 1.116868163500288e-05, + "loss": 0.9176, + "step": 970 + }, + { + "epoch": 0.016778407520044235, + "grad_norm": 2.7071844075338554, + "learning_rate": 1.1180195739781232e-05, + "loss": 0.9623, + "step": 971 + }, + { + "epoch": 0.01679568703345314, + "grad_norm": 2.2545077805748304, + "learning_rate": 1.1191709844559587e-05, + "loss": 0.8084, + "step": 972 + }, + { + "epoch": 0.01681296654686204, + "grad_norm": 2.9168998965469903, + "learning_rate": 1.120322394933794e-05, + "loss": 0.8441, + "step": 973 + }, + { + "epoch": 0.01683024606027094, + "grad_norm": 2.9415624961451647, + "learning_rate": 1.1214738054116295e-05, + "loss": 0.8203, + "step": 974 + }, + { + "epoch": 0.016847525573679845, + "grad_norm": 2.371881926706896, + "learning_rate": 1.1226252158894646e-05, + "loss": 0.7915, + "step": 975 + }, + { + "epoch": 0.016864805087088746, + "grad_norm": 3.447578281144183, + "learning_rate": 1.1237766263673e-05, + "loss": 0.9054, + "step": 976 + }, + { + "epoch": 0.01688208460049765, + "grad_norm": 2.325419498569959, + "learning_rate": 1.1249280368451354e-05, + "loss": 0.8686, + "step": 977 + }, + { + "epoch": 0.01689936411390655, + "grad_norm": 3.225563528917853, + "learning_rate": 1.1260794473229707e-05, + "loss": 1.1039, + "step": 978 + }, + { + "epoch": 0.016916643627315455, + "grad_norm": 2.4763906484609954, + "learning_rate": 1.127230857800806e-05, + "loss": 0.8012, + "step": 979 + }, + { + "epoch": 0.016933923140724356, + "grad_norm": 2.9507053167324426, + "learning_rate": 1.1283822682786414e-05, + "loss": 1.17, + "step": 980 + }, + { + "epoch": 0.01695120265413326, + "grad_norm": 2.604477491701683, + "learning_rate": 1.1295336787564768e-05, + "loss": 0.8924, + "step": 981 + }, + { + "epoch": 0.01696848216754216, + "grad_norm": 2.33965025897346, + "learning_rate": 1.1306850892343121e-05, + "loss": 0.8931, + "step": 982 + }, + { + "epoch": 0.016985761680951065, + "grad_norm": 2.574242796932319, + "learning_rate": 1.1318364997121475e-05, + "loss": 0.9289, + "step": 983 + }, + { + "epoch": 0.017003041194359966, + "grad_norm": 2.42513582914349, + "learning_rate": 1.1329879101899828e-05, + "loss": 0.9852, + "step": 984 + }, + { + "epoch": 0.01702032070776887, + "grad_norm": 4.316013256363468, + "learning_rate": 1.1341393206678183e-05, + "loss": 0.9702, + "step": 985 + }, + { + "epoch": 0.01703760022117777, + "grad_norm": 2.649833578248801, + "learning_rate": 1.1352907311456536e-05, + "loss": 0.7967, + "step": 986 + }, + { + "epoch": 0.017054879734586675, + "grad_norm": 3.518124760509434, + "learning_rate": 1.1364421416234887e-05, + "loss": 1.1452, + "step": 987 + }, + { + "epoch": 0.017072159247995575, + "grad_norm": 2.4212193159897963, + "learning_rate": 1.1375935521013242e-05, + "loss": 0.5678, + "step": 988 + }, + { + "epoch": 0.01708943876140448, + "grad_norm": 3.3938950791930083, + "learning_rate": 1.1387449625791595e-05, + "loss": 0.9375, + "step": 989 + }, + { + "epoch": 0.01710671827481338, + "grad_norm": 2.793634457167892, + "learning_rate": 1.139896373056995e-05, + "loss": 0.9514, + "step": 990 + }, + { + "epoch": 0.017123997788222284, + "grad_norm": 3.0820725434184424, + "learning_rate": 1.1410477835348303e-05, + "loss": 0.8303, + "step": 991 + }, + { + "epoch": 0.017141277301631185, + "grad_norm": 2.72129875648817, + "learning_rate": 1.1421991940126656e-05, + "loss": 0.6826, + "step": 992 + }, + { + "epoch": 0.01715855681504009, + "grad_norm": 2.6003654092014266, + "learning_rate": 1.143350604490501e-05, + "loss": 0.9145, + "step": 993 + }, + { + "epoch": 0.01717583632844899, + "grad_norm": 1.4624603034311177, + "learning_rate": 1.1445020149683364e-05, + "loss": 0.6408, + "step": 994 + }, + { + "epoch": 0.017193115841857894, + "grad_norm": 2.645013525701607, + "learning_rate": 1.1456534254461717e-05, + "loss": 0.767, + "step": 995 + }, + { + "epoch": 0.017210395355266795, + "grad_norm": 2.491377105987184, + "learning_rate": 1.1468048359240069e-05, + "loss": 0.752, + "step": 996 + }, + { + "epoch": 0.0172276748686757, + "grad_norm": 3.1314910963345173, + "learning_rate": 1.1479562464018423e-05, + "loss": 1.1256, + "step": 997 + }, + { + "epoch": 0.0172449543820846, + "grad_norm": 2.0982257510288864, + "learning_rate": 1.1491076568796778e-05, + "loss": 0.6201, + "step": 998 + }, + { + "epoch": 0.017262233895493504, + "grad_norm": 2.5490160968524376, + "learning_rate": 1.1502590673575131e-05, + "loss": 0.7025, + "step": 999 + }, + { + "epoch": 0.017279513408902405, + "grad_norm": 3.2161454654592325, + "learning_rate": 1.1514104778353483e-05, + "loss": 0.9841, + "step": 1000 + }, + { + "epoch": 0.01729679292231131, + "grad_norm": 3.03676421820657, + "learning_rate": 1.1525618883131838e-05, + "loss": 1.0471, + "step": 1001 + }, + { + "epoch": 0.01731407243572021, + "grad_norm": 2.546700422487166, + "learning_rate": 1.153713298791019e-05, + "loss": 1.0214, + "step": 1002 + }, + { + "epoch": 0.017331351949129114, + "grad_norm": 3.249645762690602, + "learning_rate": 1.1548647092688545e-05, + "loss": 0.8603, + "step": 1003 + }, + { + "epoch": 0.017348631462538015, + "grad_norm": 2.2052645264994557, + "learning_rate": 1.1560161197466897e-05, + "loss": 0.6759, + "step": 1004 + }, + { + "epoch": 0.01736591097594692, + "grad_norm": 2.4351720739382308, + "learning_rate": 1.1571675302245252e-05, + "loss": 1.0047, + "step": 1005 + }, + { + "epoch": 0.01738319048935582, + "grad_norm": 2.293883192941855, + "learning_rate": 1.1583189407023605e-05, + "loss": 0.7075, + "step": 1006 + }, + { + "epoch": 0.017400470002764724, + "grad_norm": 2.8727913909336777, + "learning_rate": 1.159470351180196e-05, + "loss": 0.7741, + "step": 1007 + }, + { + "epoch": 0.017417749516173624, + "grad_norm": 2.602927775273622, + "learning_rate": 1.1606217616580311e-05, + "loss": 0.9456, + "step": 1008 + }, + { + "epoch": 0.01743502902958253, + "grad_norm": 2.607674245062296, + "learning_rate": 1.1617731721358664e-05, + "loss": 0.7609, + "step": 1009 + }, + { + "epoch": 0.01745230854299143, + "grad_norm": 2.5908510820290953, + "learning_rate": 1.1629245826137019e-05, + "loss": 1.3027, + "step": 1010 + }, + { + "epoch": 0.017469588056400333, + "grad_norm": 2.159539098764458, + "learning_rate": 1.1640759930915372e-05, + "loss": 1.0275, + "step": 1011 + }, + { + "epoch": 0.017486867569809234, + "grad_norm": 2.370747418015636, + "learning_rate": 1.1652274035693725e-05, + "loss": 0.8816, + "step": 1012 + }, + { + "epoch": 0.017504147083218135, + "grad_norm": 2.8335460945811173, + "learning_rate": 1.1663788140472078e-05, + "loss": 0.8988, + "step": 1013 + }, + { + "epoch": 0.01752142659662704, + "grad_norm": 2.9580800669977605, + "learning_rate": 1.1675302245250433e-05, + "loss": 1.1024, + "step": 1014 + }, + { + "epoch": 0.01753870611003594, + "grad_norm": 2.2842050860973555, + "learning_rate": 1.1686816350028786e-05, + "loss": 1.1127, + "step": 1015 + }, + { + "epoch": 0.017555985623444844, + "grad_norm": 2.635670243371335, + "learning_rate": 1.1698330454807141e-05, + "loss": 1.0575, + "step": 1016 + }, + { + "epoch": 0.017573265136853745, + "grad_norm": 2.857768512231382, + "learning_rate": 1.1709844559585493e-05, + "loss": 0.9837, + "step": 1017 + }, + { + "epoch": 0.01759054465026265, + "grad_norm": 1.9901826987558835, + "learning_rate": 1.1721358664363847e-05, + "loss": 0.6729, + "step": 1018 + }, + { + "epoch": 0.01760782416367155, + "grad_norm": 2.9381257565183927, + "learning_rate": 1.17328727691422e-05, + "loss": 1.0168, + "step": 1019 + }, + { + "epoch": 0.017625103677080454, + "grad_norm": 2.256811646632681, + "learning_rate": 1.1744386873920555e-05, + "loss": 0.7826, + "step": 1020 + }, + { + "epoch": 0.017642383190489355, + "grad_norm": 2.618534052601605, + "learning_rate": 1.1755900978698907e-05, + "loss": 1.0589, + "step": 1021 + }, + { + "epoch": 0.01765966270389826, + "grad_norm": 4.356202038634042, + "learning_rate": 1.176741508347726e-05, + "loss": 0.965, + "step": 1022 + }, + { + "epoch": 0.01767694221730716, + "grad_norm": 2.757848788997814, + "learning_rate": 1.1778929188255615e-05, + "loss": 0.9594, + "step": 1023 + }, + { + "epoch": 0.017694221730716064, + "grad_norm": 2.7284135839997328, + "learning_rate": 1.1790443293033968e-05, + "loss": 0.8324, + "step": 1024 + }, + { + "epoch": 0.017711501244124964, + "grad_norm": 2.4922292048654904, + "learning_rate": 1.1801957397812321e-05, + "loss": 0.6976, + "step": 1025 + }, + { + "epoch": 0.01772878075753387, + "grad_norm": 3.247686321436899, + "learning_rate": 1.1813471502590674e-05, + "loss": 0.8735, + "step": 1026 + }, + { + "epoch": 0.01774606027094277, + "grad_norm": 2.78738177721896, + "learning_rate": 1.1824985607369029e-05, + "loss": 0.9414, + "step": 1027 + }, + { + "epoch": 0.017763339784351673, + "grad_norm": 2.596626175493103, + "learning_rate": 1.1836499712147382e-05, + "loss": 0.7469, + "step": 1028 + }, + { + "epoch": 0.017780619297760574, + "grad_norm": 1.9514687262248192, + "learning_rate": 1.1848013816925733e-05, + "loss": 0.635, + "step": 1029 + }, + { + "epoch": 0.01779789881116948, + "grad_norm": 2.9003298458031286, + "learning_rate": 1.1859527921704088e-05, + "loss": 1.0357, + "step": 1030 + }, + { + "epoch": 0.01781517832457838, + "grad_norm": 2.6846635672443213, + "learning_rate": 1.1871042026482443e-05, + "loss": 0.885, + "step": 1031 + }, + { + "epoch": 0.017832457837987283, + "grad_norm": 1.6372916948547176, + "learning_rate": 1.1882556131260796e-05, + "loss": 0.6522, + "step": 1032 + }, + { + "epoch": 0.017849737351396184, + "grad_norm": 2.4719213802892006, + "learning_rate": 1.1894070236039148e-05, + "loss": 0.9789, + "step": 1033 + }, + { + "epoch": 0.017867016864805088, + "grad_norm": 2.563567951510723, + "learning_rate": 1.1905584340817502e-05, + "loss": 1.1769, + "step": 1034 + }, + { + "epoch": 0.01788429637821399, + "grad_norm": 1.9938129900856951, + "learning_rate": 1.1917098445595855e-05, + "loss": 0.7937, + "step": 1035 + }, + { + "epoch": 0.017901575891622893, + "grad_norm": 2.4728693527299512, + "learning_rate": 1.192861255037421e-05, + "loss": 0.825, + "step": 1036 + }, + { + "epoch": 0.017918855405031794, + "grad_norm": 2.740029701365287, + "learning_rate": 1.1940126655152562e-05, + "loss": 1.393, + "step": 1037 + }, + { + "epoch": 0.017936134918440698, + "grad_norm": 2.103309123863329, + "learning_rate": 1.1951640759930917e-05, + "loss": 0.7519, + "step": 1038 + }, + { + "epoch": 0.0179534144318496, + "grad_norm": 2.366133950195326, + "learning_rate": 1.196315486470927e-05, + "loss": 0.9563, + "step": 1039 + }, + { + "epoch": 0.017970693945258503, + "grad_norm": 2.233087525860839, + "learning_rate": 1.1974668969487624e-05, + "loss": 1.0737, + "step": 1040 + }, + { + "epoch": 0.017987973458667404, + "grad_norm": 3.122692660747609, + "learning_rate": 1.1986183074265976e-05, + "loss": 0.9981, + "step": 1041 + }, + { + "epoch": 0.018005252972076308, + "grad_norm": 2.58855371903368, + "learning_rate": 1.1997697179044329e-05, + "loss": 0.8721, + "step": 1042 + }, + { + "epoch": 0.01802253248548521, + "grad_norm": 2.5978176986220873, + "learning_rate": 1.2009211283822684e-05, + "loss": 0.9433, + "step": 1043 + }, + { + "epoch": 0.018039811998894113, + "grad_norm": 1.9898760201481533, + "learning_rate": 1.2020725388601037e-05, + "loss": 0.6721, + "step": 1044 + }, + { + "epoch": 0.018057091512303013, + "grad_norm": 2.3812504484618033, + "learning_rate": 1.2032239493379392e-05, + "loss": 1.0932, + "step": 1045 + }, + { + "epoch": 0.018074371025711918, + "grad_norm": 3.7957189520651355, + "learning_rate": 1.2043753598157743e-05, + "loss": 1.0582, + "step": 1046 + }, + { + "epoch": 0.018091650539120818, + "grad_norm": 2.5212989995445625, + "learning_rate": 1.2055267702936098e-05, + "loss": 1.005, + "step": 1047 + }, + { + "epoch": 0.018108930052529722, + "grad_norm": 2.832056117608865, + "learning_rate": 1.2066781807714451e-05, + "loss": 1.1699, + "step": 1048 + }, + { + "epoch": 0.018126209565938623, + "grad_norm": 2.766533085385149, + "learning_rate": 1.2078295912492806e-05, + "loss": 0.9167, + "step": 1049 + }, + { + "epoch": 0.018143489079347527, + "grad_norm": 2.261395352814483, + "learning_rate": 1.2089810017271157e-05, + "loss": 0.7415, + "step": 1050 + }, + { + "epoch": 0.018160768592756428, + "grad_norm": 2.6296847090855575, + "learning_rate": 1.2101324122049512e-05, + "loss": 0.6852, + "step": 1051 + }, + { + "epoch": 0.01817804810616533, + "grad_norm": 2.18293993098916, + "learning_rate": 1.2112838226827865e-05, + "loss": 0.6774, + "step": 1052 + }, + { + "epoch": 0.018195327619574233, + "grad_norm": 2.2961253152213343, + "learning_rate": 1.212435233160622e-05, + "loss": 0.8258, + "step": 1053 + }, + { + "epoch": 0.018212607132983134, + "grad_norm": 1.6367092375461845, + "learning_rate": 1.2135866436384572e-05, + "loss": 0.6127, + "step": 1054 + }, + { + "epoch": 0.018229886646392038, + "grad_norm": 2.305702959094649, + "learning_rate": 1.2147380541162925e-05, + "loss": 0.7682, + "step": 1055 + }, + { + "epoch": 0.01824716615980094, + "grad_norm": 2.6094365013378495, + "learning_rate": 1.215889464594128e-05, + "loss": 0.9439, + "step": 1056 + }, + { + "epoch": 0.018264445673209843, + "grad_norm": 2.8286961744629027, + "learning_rate": 1.2170408750719633e-05, + "loss": 1.0352, + "step": 1057 + }, + { + "epoch": 0.018281725186618743, + "grad_norm": 3.02977762090552, + "learning_rate": 1.2181922855497986e-05, + "loss": 1.0628, + "step": 1058 + }, + { + "epoch": 0.018299004700027648, + "grad_norm": 2.4615273019738364, + "learning_rate": 1.2193436960276339e-05, + "loss": 0.9779, + "step": 1059 + }, + { + "epoch": 0.01831628421343655, + "grad_norm": 3.1893240621850847, + "learning_rate": 1.2204951065054694e-05, + "loss": 0.8871, + "step": 1060 + }, + { + "epoch": 0.018333563726845453, + "grad_norm": 2.8966140583798135, + "learning_rate": 1.2216465169833047e-05, + "loss": 0.9076, + "step": 1061 + }, + { + "epoch": 0.018350843240254353, + "grad_norm": 2.3137380508266805, + "learning_rate": 1.2227979274611398e-05, + "loss": 0.8386, + "step": 1062 + }, + { + "epoch": 0.018368122753663257, + "grad_norm": 2.936308034588253, + "learning_rate": 1.2239493379389753e-05, + "loss": 1.0026, + "step": 1063 + }, + { + "epoch": 0.018385402267072158, + "grad_norm": 2.844094206944876, + "learning_rate": 1.2251007484168108e-05, + "loss": 0.8523, + "step": 1064 + }, + { + "epoch": 0.018402681780481062, + "grad_norm": 2.5070283439088694, + "learning_rate": 1.2262521588946461e-05, + "loss": 0.8382, + "step": 1065 + }, + { + "epoch": 0.018419961293889963, + "grad_norm": 2.9339490562486925, + "learning_rate": 1.2274035693724812e-05, + "loss": 0.8269, + "step": 1066 + }, + { + "epoch": 0.018437240807298867, + "grad_norm": 3.0408446369649593, + "learning_rate": 1.2285549798503167e-05, + "loss": 1.0314, + "step": 1067 + }, + { + "epoch": 0.018454520320707768, + "grad_norm": 2.163811244131407, + "learning_rate": 1.229706390328152e-05, + "loss": 0.5907, + "step": 1068 + }, + { + "epoch": 0.018471799834116672, + "grad_norm": 3.4658483694870075, + "learning_rate": 1.2308578008059875e-05, + "loss": 1.0965, + "step": 1069 + }, + { + "epoch": 0.018489079347525573, + "grad_norm": 2.245802333845748, + "learning_rate": 1.2320092112838228e-05, + "loss": 0.9589, + "step": 1070 + }, + { + "epoch": 0.018506358860934477, + "grad_norm": 2.3879708529216055, + "learning_rate": 1.2331606217616581e-05, + "loss": 0.8726, + "step": 1071 + }, + { + "epoch": 0.018523638374343378, + "grad_norm": 2.0320392951484227, + "learning_rate": 1.2343120322394934e-05, + "loss": 0.6882, + "step": 1072 + }, + { + "epoch": 0.018540917887752282, + "grad_norm": 2.552362560208274, + "learning_rate": 1.235463442717329e-05, + "loss": 0.908, + "step": 1073 + }, + { + "epoch": 0.018558197401161183, + "grad_norm": 2.63973314372422, + "learning_rate": 1.2366148531951642e-05, + "loss": 0.6842, + "step": 1074 + }, + { + "epoch": 0.018575476914570087, + "grad_norm": 2.937976370204779, + "learning_rate": 1.2377662636729994e-05, + "loss": 1.0232, + "step": 1075 + }, + { + "epoch": 0.018592756427978988, + "grad_norm": 3.0691997051752673, + "learning_rate": 1.2389176741508349e-05, + "loss": 0.9508, + "step": 1076 + }, + { + "epoch": 0.01861003594138789, + "grad_norm": 2.3210491032239915, + "learning_rate": 1.2400690846286703e-05, + "loss": 0.7608, + "step": 1077 + }, + { + "epoch": 0.018627315454796792, + "grad_norm": 2.805870650082167, + "learning_rate": 1.2412204951065057e-05, + "loss": 0.9886, + "step": 1078 + }, + { + "epoch": 0.018644594968205697, + "grad_norm": 3.549175686608928, + "learning_rate": 1.2423719055843408e-05, + "loss": 0.9684, + "step": 1079 + }, + { + "epoch": 0.018661874481614597, + "grad_norm": 3.0200374890740567, + "learning_rate": 1.2435233160621763e-05, + "loss": 0.9479, + "step": 1080 + }, + { + "epoch": 0.0186791539950235, + "grad_norm": 3.0836462067888264, + "learning_rate": 1.2446747265400116e-05, + "loss": 1.0319, + "step": 1081 + }, + { + "epoch": 0.018696433508432402, + "grad_norm": 2.2022105123370443, + "learning_rate": 1.245826137017847e-05, + "loss": 0.6645, + "step": 1082 + }, + { + "epoch": 0.018713713021841306, + "grad_norm": 2.6976893633050656, + "learning_rate": 1.2469775474956822e-05, + "loss": 0.7152, + "step": 1083 + }, + { + "epoch": 0.018730992535250207, + "grad_norm": 2.5912293125480246, + "learning_rate": 1.2481289579735177e-05, + "loss": 0.8105, + "step": 1084 + }, + { + "epoch": 0.01874827204865911, + "grad_norm": 2.677383376265953, + "learning_rate": 1.249280368451353e-05, + "loss": 1.0298, + "step": 1085 + }, + { + "epoch": 0.018765551562068012, + "grad_norm": 2.78945482164804, + "learning_rate": 1.2504317789291885e-05, + "loss": 0.7381, + "step": 1086 + }, + { + "epoch": 0.018782831075476916, + "grad_norm": 2.6034120463120707, + "learning_rate": 1.2515831894070236e-05, + "loss": 0.5472, + "step": 1087 + }, + { + "epoch": 0.018800110588885817, + "grad_norm": 2.4321182402546393, + "learning_rate": 1.252734599884859e-05, + "loss": 1.0124, + "step": 1088 + }, + { + "epoch": 0.018817390102294718, + "grad_norm": 3.599828876202036, + "learning_rate": 1.2538860103626944e-05, + "loss": 0.8329, + "step": 1089 + }, + { + "epoch": 0.018834669615703622, + "grad_norm": 3.5745708641414287, + "learning_rate": 1.2550374208405297e-05, + "loss": 0.7615, + "step": 1090 + }, + { + "epoch": 0.018851949129112523, + "grad_norm": 2.6770440199857006, + "learning_rate": 1.256188831318365e-05, + "loss": 0.9558, + "step": 1091 + }, + { + "epoch": 0.018869228642521427, + "grad_norm": 2.877338018129079, + "learning_rate": 1.2573402417962004e-05, + "loss": 1.0627, + "step": 1092 + }, + { + "epoch": 0.018886508155930327, + "grad_norm": 2.2949261675605492, + "learning_rate": 1.2584916522740358e-05, + "loss": 1.0211, + "step": 1093 + }, + { + "epoch": 0.01890378766933923, + "grad_norm": 2.218954972661595, + "learning_rate": 1.2596430627518712e-05, + "loss": 0.9854, + "step": 1094 + }, + { + "epoch": 0.018921067182748132, + "grad_norm": 2.4210868485136814, + "learning_rate": 1.2607944732297063e-05, + "loss": 1.2498, + "step": 1095 + }, + { + "epoch": 0.018938346696157037, + "grad_norm": 2.214490187785138, + "learning_rate": 1.2619458837075418e-05, + "loss": 0.8458, + "step": 1096 + }, + { + "epoch": 0.018955626209565937, + "grad_norm": 1.889137485559508, + "learning_rate": 1.2630972941853773e-05, + "loss": 1.1764, + "step": 1097 + }, + { + "epoch": 0.01897290572297484, + "grad_norm": 2.2019665364300653, + "learning_rate": 1.2642487046632126e-05, + "loss": 1.0448, + "step": 1098 + }, + { + "epoch": 0.018990185236383742, + "grad_norm": 2.670429259193559, + "learning_rate": 1.265400115141048e-05, + "loss": 1.1016, + "step": 1099 + }, + { + "epoch": 0.019007464749792646, + "grad_norm": 2.0712138555659014, + "learning_rate": 1.2665515256188832e-05, + "loss": 1.0803, + "step": 1100 + }, + { + "epoch": 0.019024744263201547, + "grad_norm": 2.2434249399829964, + "learning_rate": 1.2677029360967185e-05, + "loss": 0.9048, + "step": 1101 + }, + { + "epoch": 0.01904202377661045, + "grad_norm": 2.618193288198703, + "learning_rate": 1.268854346574554e-05, + "loss": 0.9904, + "step": 1102 + }, + { + "epoch": 0.019059303290019352, + "grad_norm": 2.3370749936987396, + "learning_rate": 1.2700057570523893e-05, + "loss": 0.8733, + "step": 1103 + }, + { + "epoch": 0.019076582803428256, + "grad_norm": 2.6080074927163714, + "learning_rate": 1.2711571675302246e-05, + "loss": 1.0495, + "step": 1104 + }, + { + "epoch": 0.019093862316837157, + "grad_norm": 2.175630695601149, + "learning_rate": 1.27230857800806e-05, + "loss": 0.9063, + "step": 1105 + }, + { + "epoch": 0.01911114183024606, + "grad_norm": 1.582869592748052, + "learning_rate": 1.2734599884858954e-05, + "loss": 0.6502, + "step": 1106 + }, + { + "epoch": 0.019128421343654962, + "grad_norm": 2.6915540212334714, + "learning_rate": 1.2746113989637307e-05, + "loss": 1.131, + "step": 1107 + }, + { + "epoch": 0.019145700857063866, + "grad_norm": 2.321214557545564, + "learning_rate": 1.2757628094415659e-05, + "loss": 1.0905, + "step": 1108 + }, + { + "epoch": 0.019162980370472767, + "grad_norm": 2.681060898005258, + "learning_rate": 1.2769142199194013e-05, + "loss": 1.0324, + "step": 1109 + }, + { + "epoch": 0.01918025988388167, + "grad_norm": 2.154495644515818, + "learning_rate": 1.2780656303972368e-05, + "loss": 0.6329, + "step": 1110 + }, + { + "epoch": 0.01919753939729057, + "grad_norm": 2.3584241914123742, + "learning_rate": 1.2792170408750721e-05, + "loss": 0.7409, + "step": 1111 + }, + { + "epoch": 0.019214818910699476, + "grad_norm": 2.6363122396387, + "learning_rate": 1.2803684513529073e-05, + "loss": 0.7391, + "step": 1112 + }, + { + "epoch": 0.019232098424108376, + "grad_norm": 2.236446322541833, + "learning_rate": 1.2815198618307428e-05, + "loss": 0.7352, + "step": 1113 + }, + { + "epoch": 0.01924937793751728, + "grad_norm": 3.2197751581673084, + "learning_rate": 1.282671272308578e-05, + "loss": 0.9637, + "step": 1114 + }, + { + "epoch": 0.01926665745092618, + "grad_norm": 1.9692057810250079, + "learning_rate": 1.2838226827864136e-05, + "loss": 0.7932, + "step": 1115 + }, + { + "epoch": 0.019283936964335086, + "grad_norm": 2.433984201235519, + "learning_rate": 1.2849740932642487e-05, + "loss": 0.6039, + "step": 1116 + }, + { + "epoch": 0.019301216477743986, + "grad_norm": 2.711308438454824, + "learning_rate": 1.2861255037420842e-05, + "loss": 0.7624, + "step": 1117 + }, + { + "epoch": 0.01931849599115289, + "grad_norm": 2.868596825898442, + "learning_rate": 1.2872769142199195e-05, + "loss": 0.7376, + "step": 1118 + }, + { + "epoch": 0.01933577550456179, + "grad_norm": 2.7018586962351496, + "learning_rate": 1.288428324697755e-05, + "loss": 0.9578, + "step": 1119 + }, + { + "epoch": 0.019353055017970695, + "grad_norm": 2.0657876646725675, + "learning_rate": 1.2895797351755901e-05, + "loss": 0.7044, + "step": 1120 + }, + { + "epoch": 0.019370334531379596, + "grad_norm": 1.4451293138976162, + "learning_rate": 1.2907311456534254e-05, + "loss": 0.5994, + "step": 1121 + }, + { + "epoch": 0.0193876140447885, + "grad_norm": 2.498129134892909, + "learning_rate": 1.2918825561312609e-05, + "loss": 0.7877, + "step": 1122 + }, + { + "epoch": 0.0194048935581974, + "grad_norm": 2.1287158964780244, + "learning_rate": 1.2930339666090962e-05, + "loss": 0.8798, + "step": 1123 + }, + { + "epoch": 0.019422173071606305, + "grad_norm": 2.2260395151394143, + "learning_rate": 1.2941853770869317e-05, + "loss": 1.065, + "step": 1124 + }, + { + "epoch": 0.019439452585015206, + "grad_norm": 2.5466629764259583, + "learning_rate": 1.2953367875647668e-05, + "loss": 0.9278, + "step": 1125 + }, + { + "epoch": 0.01945673209842411, + "grad_norm": 2.8903866318740734, + "learning_rate": 1.2964881980426023e-05, + "loss": 0.7839, + "step": 1126 + }, + { + "epoch": 0.01947401161183301, + "grad_norm": 2.412323820045906, + "learning_rate": 1.2976396085204376e-05, + "loss": 0.84, + "step": 1127 + }, + { + "epoch": 0.01949129112524191, + "grad_norm": 2.495602357331624, + "learning_rate": 1.2987910189982731e-05, + "loss": 0.8612, + "step": 1128 + }, + { + "epoch": 0.019508570638650816, + "grad_norm": 2.543746686477404, + "learning_rate": 1.2999424294761083e-05, + "loss": 1.2192, + "step": 1129 + }, + { + "epoch": 0.019525850152059716, + "grad_norm": 2.597410150064269, + "learning_rate": 1.3010938399539437e-05, + "loss": 0.9783, + "step": 1130 + }, + { + "epoch": 0.01954312966546862, + "grad_norm": 2.5191294810819382, + "learning_rate": 1.302245250431779e-05, + "loss": 0.971, + "step": 1131 + }, + { + "epoch": 0.01956040917887752, + "grad_norm": 2.502210160649667, + "learning_rate": 1.3033966609096145e-05, + "loss": 0.6723, + "step": 1132 + }, + { + "epoch": 0.019577688692286425, + "grad_norm": 2.546117359921025, + "learning_rate": 1.3045480713874497e-05, + "loss": 0.7911, + "step": 1133 + }, + { + "epoch": 0.019594968205695326, + "grad_norm": 1.7457918953154008, + "learning_rate": 1.305699481865285e-05, + "loss": 0.7226, + "step": 1134 + }, + { + "epoch": 0.01961224771910423, + "grad_norm": 2.369397289503638, + "learning_rate": 1.3068508923431205e-05, + "loss": 0.9136, + "step": 1135 + }, + { + "epoch": 0.01962952723251313, + "grad_norm": 2.261464746005655, + "learning_rate": 1.3080023028209558e-05, + "loss": 0.9979, + "step": 1136 + }, + { + "epoch": 0.019646806745922035, + "grad_norm": 2.5179902401132224, + "learning_rate": 1.3091537132987911e-05, + "loss": 1.1093, + "step": 1137 + }, + { + "epoch": 0.019664086259330936, + "grad_norm": 2.579442089767577, + "learning_rate": 1.3103051237766264e-05, + "loss": 0.9305, + "step": 1138 + }, + { + "epoch": 0.01968136577273984, + "grad_norm": 2.5881977784059975, + "learning_rate": 1.3114565342544619e-05, + "loss": 0.8834, + "step": 1139 + }, + { + "epoch": 0.01969864528614874, + "grad_norm": 2.766364055247433, + "learning_rate": 1.3126079447322972e-05, + "loss": 1.1905, + "step": 1140 + }, + { + "epoch": 0.019715924799557645, + "grad_norm": 2.4208458103641677, + "learning_rate": 1.3137593552101323e-05, + "loss": 0.858, + "step": 1141 + }, + { + "epoch": 0.019733204312966546, + "grad_norm": 2.512455511428189, + "learning_rate": 1.3149107656879678e-05, + "loss": 1.0084, + "step": 1142 + }, + { + "epoch": 0.01975048382637545, + "grad_norm": 2.57480503152451, + "learning_rate": 1.3160621761658033e-05, + "loss": 0.7492, + "step": 1143 + }, + { + "epoch": 0.01976776333978435, + "grad_norm": 2.2188266753571195, + "learning_rate": 1.3172135866436386e-05, + "loss": 0.863, + "step": 1144 + }, + { + "epoch": 0.019785042853193255, + "grad_norm": 2.8773722533158415, + "learning_rate": 1.3183649971214738e-05, + "loss": 1.0639, + "step": 1145 + }, + { + "epoch": 0.019802322366602156, + "grad_norm": 2.817364396267603, + "learning_rate": 1.3195164075993092e-05, + "loss": 0.7269, + "step": 1146 + }, + { + "epoch": 0.01981960188001106, + "grad_norm": 2.0883401282676237, + "learning_rate": 1.3206678180771446e-05, + "loss": 0.6898, + "step": 1147 + }, + { + "epoch": 0.01983688139341996, + "grad_norm": 2.5138827477280667, + "learning_rate": 1.32181922855498e-05, + "loss": 0.8191, + "step": 1148 + }, + { + "epoch": 0.019854160906828865, + "grad_norm": 3.144290450618952, + "learning_rate": 1.3229706390328152e-05, + "loss": 1.0149, + "step": 1149 + }, + { + "epoch": 0.019871440420237765, + "grad_norm": 2.789757610360016, + "learning_rate": 1.3241220495106507e-05, + "loss": 0.8552, + "step": 1150 + }, + { + "epoch": 0.01988871993364667, + "grad_norm": 2.2174038161774985, + "learning_rate": 1.325273459988486e-05, + "loss": 0.7922, + "step": 1151 + }, + { + "epoch": 0.01990599944705557, + "grad_norm": 1.3885241085115692, + "learning_rate": 1.3264248704663215e-05, + "loss": 0.5941, + "step": 1152 + }, + { + "epoch": 0.019923278960464474, + "grad_norm": 2.4740092197356307, + "learning_rate": 1.3275762809441568e-05, + "loss": 0.7962, + "step": 1153 + }, + { + "epoch": 0.019940558473873375, + "grad_norm": 3.3836831700808703, + "learning_rate": 1.3287276914219919e-05, + "loss": 0.986, + "step": 1154 + }, + { + "epoch": 0.01995783798728228, + "grad_norm": 2.5511026245741886, + "learning_rate": 1.3298791018998274e-05, + "loss": 0.802, + "step": 1155 + }, + { + "epoch": 0.01997511750069118, + "grad_norm": 2.2607245612466054, + "learning_rate": 1.3310305123776627e-05, + "loss": 0.7782, + "step": 1156 + }, + { + "epoch": 0.019992397014100084, + "grad_norm": 1.7707887452237727, + "learning_rate": 1.3321819228554982e-05, + "loss": 0.7729, + "step": 1157 + }, + { + "epoch": 0.020009676527508985, + "grad_norm": 1.9982618620063346, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.9729, + "step": 1158 + }, + { + "epoch": 0.02002695604091789, + "grad_norm": 2.139832856314204, + "learning_rate": 1.3344847438111688e-05, + "loss": 0.8389, + "step": 1159 + }, + { + "epoch": 0.02004423555432679, + "grad_norm": 2.525163211590399, + "learning_rate": 1.3356361542890041e-05, + "loss": 1.0834, + "step": 1160 + }, + { + "epoch": 0.020061515067735694, + "grad_norm": 2.697186123781181, + "learning_rate": 1.3367875647668396e-05, + "loss": 0.7146, + "step": 1161 + }, + { + "epoch": 0.020078794581144595, + "grad_norm": 2.630481555603887, + "learning_rate": 1.3379389752446747e-05, + "loss": 0.9479, + "step": 1162 + }, + { + "epoch": 0.0200960740945535, + "grad_norm": 1.7020322202414617, + "learning_rate": 1.3390903857225102e-05, + "loss": 0.6811, + "step": 1163 + }, + { + "epoch": 0.0201133536079624, + "grad_norm": 2.301532586713514, + "learning_rate": 1.3402417962003455e-05, + "loss": 0.9525, + "step": 1164 + }, + { + "epoch": 0.020130633121371304, + "grad_norm": 2.232176548521964, + "learning_rate": 1.341393206678181e-05, + "loss": 0.8974, + "step": 1165 + }, + { + "epoch": 0.020147912634780205, + "grad_norm": 2.14150547888518, + "learning_rate": 1.3425446171560162e-05, + "loss": 1.1449, + "step": 1166 + }, + { + "epoch": 0.020165192148189105, + "grad_norm": 2.606679806186862, + "learning_rate": 1.3436960276338515e-05, + "loss": 0.9263, + "step": 1167 + }, + { + "epoch": 0.02018247166159801, + "grad_norm": 1.9634213128269493, + "learning_rate": 1.344847438111687e-05, + "loss": 1.0967, + "step": 1168 + }, + { + "epoch": 0.02019975117500691, + "grad_norm": 2.1119141713097482, + "learning_rate": 1.3459988485895223e-05, + "loss": 0.8358, + "step": 1169 + }, + { + "epoch": 0.020217030688415814, + "grad_norm": 2.571653840972533, + "learning_rate": 1.3471502590673576e-05, + "loss": 0.8854, + "step": 1170 + }, + { + "epoch": 0.020234310201824715, + "grad_norm": 2.4432644778642345, + "learning_rate": 1.3483016695451929e-05, + "loss": 0.9471, + "step": 1171 + }, + { + "epoch": 0.02025158971523362, + "grad_norm": 2.5032671691729806, + "learning_rate": 1.3494530800230284e-05, + "loss": 1.0841, + "step": 1172 + }, + { + "epoch": 0.02026886922864252, + "grad_norm": 2.537019940468625, + "learning_rate": 1.3506044905008637e-05, + "loss": 1.0934, + "step": 1173 + }, + { + "epoch": 0.020286148742051424, + "grad_norm": 2.427943905178579, + "learning_rate": 1.3517559009786988e-05, + "loss": 0.8445, + "step": 1174 + }, + { + "epoch": 0.020303428255460325, + "grad_norm": 1.8824717010571193, + "learning_rate": 1.3529073114565343e-05, + "loss": 1.0915, + "step": 1175 + }, + { + "epoch": 0.02032070776886923, + "grad_norm": 2.272122737941583, + "learning_rate": 1.3540587219343698e-05, + "loss": 0.8819, + "step": 1176 + }, + { + "epoch": 0.02033798728227813, + "grad_norm": 2.6109495127160294, + "learning_rate": 1.3552101324122051e-05, + "loss": 0.8569, + "step": 1177 + }, + { + "epoch": 0.020355266795687034, + "grad_norm": 3.0389013169345818, + "learning_rate": 1.3563615428900406e-05, + "loss": 0.7799, + "step": 1178 + }, + { + "epoch": 0.020372546309095935, + "grad_norm": 1.958103192067815, + "learning_rate": 1.3575129533678757e-05, + "loss": 0.9007, + "step": 1179 + }, + { + "epoch": 0.02038982582250484, + "grad_norm": 2.511917999339059, + "learning_rate": 1.358664363845711e-05, + "loss": 0.8677, + "step": 1180 + }, + { + "epoch": 0.02040710533591374, + "grad_norm": 2.572389972580523, + "learning_rate": 1.3598157743235465e-05, + "loss": 0.9303, + "step": 1181 + }, + { + "epoch": 0.020424384849322644, + "grad_norm": 2.926263368015854, + "learning_rate": 1.3609671848013818e-05, + "loss": 0.8803, + "step": 1182 + }, + { + "epoch": 0.020441664362731544, + "grad_norm": 2.5714489962521023, + "learning_rate": 1.3621185952792171e-05, + "loss": 0.641, + "step": 1183 + }, + { + "epoch": 0.02045894387614045, + "grad_norm": 1.9918700033127987, + "learning_rate": 1.3632700057570525e-05, + "loss": 0.5877, + "step": 1184 + }, + { + "epoch": 0.02047622338954935, + "grad_norm": 2.429612256747974, + "learning_rate": 1.364421416234888e-05, + "loss": 0.7705, + "step": 1185 + }, + { + "epoch": 0.020493502902958254, + "grad_norm": 2.350291840255743, + "learning_rate": 1.3655728267127233e-05, + "loss": 0.945, + "step": 1186 + }, + { + "epoch": 0.020510782416367154, + "grad_norm": 2.0924245521404945, + "learning_rate": 1.3667242371905584e-05, + "loss": 0.8396, + "step": 1187 + }, + { + "epoch": 0.02052806192977606, + "grad_norm": 2.1879341048870127, + "learning_rate": 1.3678756476683939e-05, + "loss": 0.8634, + "step": 1188 + }, + { + "epoch": 0.02054534144318496, + "grad_norm": 2.6348249126812573, + "learning_rate": 1.3690270581462292e-05, + "loss": 0.7769, + "step": 1189 + }, + { + "epoch": 0.020562620956593863, + "grad_norm": 1.842272075479962, + "learning_rate": 1.3701784686240647e-05, + "loss": 0.6056, + "step": 1190 + }, + { + "epoch": 0.020579900470002764, + "grad_norm": 2.315135417875032, + "learning_rate": 1.3713298791018998e-05, + "loss": 0.9618, + "step": 1191 + }, + { + "epoch": 0.020597179983411668, + "grad_norm": 2.332285635288003, + "learning_rate": 1.3724812895797353e-05, + "loss": 0.6101, + "step": 1192 + }, + { + "epoch": 0.02061445949682057, + "grad_norm": 1.642524056255501, + "learning_rate": 1.3736327000575706e-05, + "loss": 0.7059, + "step": 1193 + }, + { + "epoch": 0.020631739010229473, + "grad_norm": 3.1081686214819224, + "learning_rate": 1.3747841105354061e-05, + "loss": 1.0401, + "step": 1194 + }, + { + "epoch": 0.020649018523638374, + "grad_norm": 2.4632552713951825, + "learning_rate": 1.3759355210132412e-05, + "loss": 0.9147, + "step": 1195 + }, + { + "epoch": 0.020666298037047278, + "grad_norm": 1.9858876786308546, + "learning_rate": 1.3770869314910767e-05, + "loss": 1.0263, + "step": 1196 + }, + { + "epoch": 0.02068357755045618, + "grad_norm": 1.2442947897978647, + "learning_rate": 1.378238341968912e-05, + "loss": 0.605, + "step": 1197 + }, + { + "epoch": 0.020700857063865083, + "grad_norm": 2.7221001504647093, + "learning_rate": 1.3793897524467475e-05, + "loss": 0.8858, + "step": 1198 + }, + { + "epoch": 0.020718136577273984, + "grad_norm": 2.049978710277739, + "learning_rate": 1.3805411629245826e-05, + "loss": 0.9254, + "step": 1199 + }, + { + "epoch": 0.020735416090682888, + "grad_norm": 2.4373528291900897, + "learning_rate": 1.381692573402418e-05, + "loss": 0.6816, + "step": 1200 + }, + { + "epoch": 0.02075269560409179, + "grad_norm": 2.1044499855038548, + "learning_rate": 1.3828439838802534e-05, + "loss": 1.0324, + "step": 1201 + }, + { + "epoch": 0.020769975117500693, + "grad_norm": 2.033421702981743, + "learning_rate": 1.3839953943580888e-05, + "loss": 0.8993, + "step": 1202 + }, + { + "epoch": 0.020787254630909593, + "grad_norm": 1.4276227174394174, + "learning_rate": 1.3851468048359242e-05, + "loss": 0.6195, + "step": 1203 + }, + { + "epoch": 0.020804534144318498, + "grad_norm": 2.30799141559064, + "learning_rate": 1.3862982153137594e-05, + "loss": 0.8119, + "step": 1204 + }, + { + "epoch": 0.0208218136577274, + "grad_norm": 2.3429522815407475, + "learning_rate": 1.3874496257915949e-05, + "loss": 1.0386, + "step": 1205 + }, + { + "epoch": 0.0208390931711363, + "grad_norm": 2.1080722229213773, + "learning_rate": 1.3886010362694302e-05, + "loss": 0.8496, + "step": 1206 + }, + { + "epoch": 0.020856372684545203, + "grad_norm": 2.213187929633984, + "learning_rate": 1.3897524467472657e-05, + "loss": 0.8397, + "step": 1207 + }, + { + "epoch": 0.020873652197954104, + "grad_norm": 1.9416715158101985, + "learning_rate": 1.3909038572251008e-05, + "loss": 0.9351, + "step": 1208 + }, + { + "epoch": 0.020890931711363008, + "grad_norm": 3.237726365811569, + "learning_rate": 1.3920552677029363e-05, + "loss": 0.7562, + "step": 1209 + }, + { + "epoch": 0.02090821122477191, + "grad_norm": 1.7151765979157576, + "learning_rate": 1.3932066781807716e-05, + "loss": 0.6662, + "step": 1210 + }, + { + "epoch": 0.020925490738180813, + "grad_norm": 2.4199098460224056, + "learning_rate": 1.394358088658607e-05, + "loss": 0.8665, + "step": 1211 + }, + { + "epoch": 0.020942770251589714, + "grad_norm": 2.3998643888108475, + "learning_rate": 1.3955094991364422e-05, + "loss": 0.9176, + "step": 1212 + }, + { + "epoch": 0.020960049764998618, + "grad_norm": 2.7638135491176268, + "learning_rate": 1.3966609096142775e-05, + "loss": 1.0376, + "step": 1213 + }, + { + "epoch": 0.02097732927840752, + "grad_norm": 2.5191157261183914, + "learning_rate": 1.397812320092113e-05, + "loss": 0.8019, + "step": 1214 + }, + { + "epoch": 0.020994608791816423, + "grad_norm": 2.7005607168933157, + "learning_rate": 1.3989637305699483e-05, + "loss": 0.9805, + "step": 1215 + }, + { + "epoch": 0.021011888305225324, + "grad_norm": 3.0364446966833603, + "learning_rate": 1.4001151410477836e-05, + "loss": 0.8433, + "step": 1216 + }, + { + "epoch": 0.021029167818634228, + "grad_norm": 1.583258367815695, + "learning_rate": 1.401266551525619e-05, + "loss": 0.6003, + "step": 1217 + }, + { + "epoch": 0.02104644733204313, + "grad_norm": 2.2474392310789524, + "learning_rate": 1.4024179620034544e-05, + "loss": 0.822, + "step": 1218 + }, + { + "epoch": 0.021063726845452033, + "grad_norm": 3.0857113823749027, + "learning_rate": 1.4035693724812897e-05, + "loss": 0.8897, + "step": 1219 + }, + { + "epoch": 0.021081006358860933, + "grad_norm": 2.288042129897696, + "learning_rate": 1.4047207829591249e-05, + "loss": 0.8586, + "step": 1220 + }, + { + "epoch": 0.021098285872269838, + "grad_norm": 2.6337351435655223, + "learning_rate": 1.4058721934369604e-05, + "loss": 0.892, + "step": 1221 + }, + { + "epoch": 0.02111556538567874, + "grad_norm": 1.9178210481624245, + "learning_rate": 1.4070236039147957e-05, + "loss": 0.8983, + "step": 1222 + }, + { + "epoch": 0.021132844899087642, + "grad_norm": 1.9702707840920688, + "learning_rate": 1.4081750143926312e-05, + "loss": 0.7822, + "step": 1223 + }, + { + "epoch": 0.021150124412496543, + "grad_norm": 2.253698731080303, + "learning_rate": 1.4093264248704663e-05, + "loss": 0.7777, + "step": 1224 + }, + { + "epoch": 0.021167403925905447, + "grad_norm": 2.578043624856808, + "learning_rate": 1.4104778353483018e-05, + "loss": 0.8554, + "step": 1225 + }, + { + "epoch": 0.021184683439314348, + "grad_norm": 1.9138162937008978, + "learning_rate": 1.4116292458261371e-05, + "loss": 0.7907, + "step": 1226 + }, + { + "epoch": 0.021201962952723252, + "grad_norm": 1.9155708958231517, + "learning_rate": 1.4127806563039726e-05, + "loss": 0.7814, + "step": 1227 + }, + { + "epoch": 0.021219242466132153, + "grad_norm": 2.240161646885367, + "learning_rate": 1.4139320667818077e-05, + "loss": 0.948, + "step": 1228 + }, + { + "epoch": 0.021236521979541057, + "grad_norm": 2.0813448865458946, + "learning_rate": 1.4150834772596432e-05, + "loss": 0.7781, + "step": 1229 + }, + { + "epoch": 0.021253801492949958, + "grad_norm": 2.6155424571074346, + "learning_rate": 1.4162348877374785e-05, + "loss": 1.0665, + "step": 1230 + }, + { + "epoch": 0.021271081006358862, + "grad_norm": 2.3635731696668167, + "learning_rate": 1.417386298215314e-05, + "loss": 1.0919, + "step": 1231 + }, + { + "epoch": 0.021288360519767763, + "grad_norm": 2.012344483087705, + "learning_rate": 1.4185377086931493e-05, + "loss": 0.6891, + "step": 1232 + }, + { + "epoch": 0.021305640033176667, + "grad_norm": 2.720075719983184, + "learning_rate": 1.4196891191709844e-05, + "loss": 1.1479, + "step": 1233 + }, + { + "epoch": 0.021322919546585568, + "grad_norm": 2.739788313301392, + "learning_rate": 1.42084052964882e-05, + "loss": 0.9035, + "step": 1234 + }, + { + "epoch": 0.021340199059994472, + "grad_norm": 2.0247826344710615, + "learning_rate": 1.4219919401266552e-05, + "loss": 0.7261, + "step": 1235 + }, + { + "epoch": 0.021357478573403373, + "grad_norm": 1.1917906996820813, + "learning_rate": 1.4231433506044907e-05, + "loss": 0.6634, + "step": 1236 + }, + { + "epoch": 0.021374758086812277, + "grad_norm": 3.486292862644756, + "learning_rate": 1.4242947610823259e-05, + "loss": 0.982, + "step": 1237 + }, + { + "epoch": 0.021392037600221177, + "grad_norm": 2.7426184017337327, + "learning_rate": 1.4254461715601613e-05, + "loss": 0.8729, + "step": 1238 + }, + { + "epoch": 0.02140931711363008, + "grad_norm": 2.090836807108603, + "learning_rate": 1.4265975820379967e-05, + "loss": 1.0705, + "step": 1239 + }, + { + "epoch": 0.021426596627038982, + "grad_norm": 1.9992726485750925, + "learning_rate": 1.4277489925158321e-05, + "loss": 0.569, + "step": 1240 + }, + { + "epoch": 0.021443876140447887, + "grad_norm": 2.3019606805540653, + "learning_rate": 1.4289004029936673e-05, + "loss": 0.8822, + "step": 1241 + }, + { + "epoch": 0.021461155653856787, + "grad_norm": 1.8031200081114462, + "learning_rate": 1.4300518134715028e-05, + "loss": 0.868, + "step": 1242 + }, + { + "epoch": 0.02147843516726569, + "grad_norm": 1.858978334254099, + "learning_rate": 1.431203223949338e-05, + "loss": 0.8337, + "step": 1243 + }, + { + "epoch": 0.021495714680674592, + "grad_norm": 2.424302460982979, + "learning_rate": 1.4323546344271736e-05, + "loss": 0.8935, + "step": 1244 + }, + { + "epoch": 0.021512994194083493, + "grad_norm": 2.6738437258239394, + "learning_rate": 1.4335060449050087e-05, + "loss": 1.06, + "step": 1245 + }, + { + "epoch": 0.021530273707492397, + "grad_norm": 2.1308642102374105, + "learning_rate": 1.434657455382844e-05, + "loss": 0.6967, + "step": 1246 + }, + { + "epoch": 0.021547553220901298, + "grad_norm": 2.3011666046216783, + "learning_rate": 1.4358088658606795e-05, + "loss": 0.9415, + "step": 1247 + }, + { + "epoch": 0.021564832734310202, + "grad_norm": 2.2128419962802903, + "learning_rate": 1.4369602763385148e-05, + "loss": 0.8694, + "step": 1248 + }, + { + "epoch": 0.021582112247719103, + "grad_norm": 1.32238038047442, + "learning_rate": 1.4381116868163501e-05, + "loss": 0.5267, + "step": 1249 + }, + { + "epoch": 0.021599391761128007, + "grad_norm": 3.0621886870764725, + "learning_rate": 1.4392630972941854e-05, + "loss": 1.0396, + "step": 1250 + }, + { + "epoch": 0.021616671274536908, + "grad_norm": 2.6205201599976276, + "learning_rate": 1.4404145077720209e-05, + "loss": 0.8456, + "step": 1251 + }, + { + "epoch": 0.021633950787945812, + "grad_norm": 2.6649795867130517, + "learning_rate": 1.4415659182498562e-05, + "loss": 0.9987, + "step": 1252 + }, + { + "epoch": 0.021651230301354712, + "grad_norm": 2.0764676294914417, + "learning_rate": 1.4427173287276914e-05, + "loss": 1.0003, + "step": 1253 + }, + { + "epoch": 0.021668509814763617, + "grad_norm": 1.4035007790395189, + "learning_rate": 1.4438687392055268e-05, + "loss": 0.7781, + "step": 1254 + }, + { + "epoch": 0.021685789328172517, + "grad_norm": 2.494854879210807, + "learning_rate": 1.4450201496833623e-05, + "loss": 0.7995, + "step": 1255 + }, + { + "epoch": 0.02170306884158142, + "grad_norm": 2.149844690083996, + "learning_rate": 1.4461715601611976e-05, + "loss": 0.7288, + "step": 1256 + }, + { + "epoch": 0.021720348354990322, + "grad_norm": 2.6775835681590827, + "learning_rate": 1.4473229706390331e-05, + "loss": 0.9179, + "step": 1257 + }, + { + "epoch": 0.021737627868399226, + "grad_norm": 2.1355707278729787, + "learning_rate": 1.4484743811168683e-05, + "loss": 0.8294, + "step": 1258 + }, + { + "epoch": 0.021754907381808127, + "grad_norm": 2.2761567002938823, + "learning_rate": 1.4496257915947036e-05, + "loss": 0.9119, + "step": 1259 + }, + { + "epoch": 0.02177218689521703, + "grad_norm": 2.0968464432557505, + "learning_rate": 1.450777202072539e-05, + "loss": 0.9188, + "step": 1260 + }, + { + "epoch": 0.021789466408625932, + "grad_norm": 3.3829420354196675, + "learning_rate": 1.4519286125503744e-05, + "loss": 0.7107, + "step": 1261 + }, + { + "epoch": 0.021806745922034836, + "grad_norm": 1.9036895367364164, + "learning_rate": 1.4530800230282097e-05, + "loss": 0.7475, + "step": 1262 + }, + { + "epoch": 0.021824025435443737, + "grad_norm": 2.700270499614902, + "learning_rate": 1.454231433506045e-05, + "loss": 0.7994, + "step": 1263 + }, + { + "epoch": 0.02184130494885264, + "grad_norm": 2.0558734173014055, + "learning_rate": 1.4553828439838805e-05, + "loss": 0.8605, + "step": 1264 + }, + { + "epoch": 0.021858584462261542, + "grad_norm": 2.0712605273383042, + "learning_rate": 1.4565342544617158e-05, + "loss": 0.8008, + "step": 1265 + }, + { + "epoch": 0.021875863975670446, + "grad_norm": 2.394605279171376, + "learning_rate": 1.457685664939551e-05, + "loss": 0.7624, + "step": 1266 + }, + { + "epoch": 0.021893143489079347, + "grad_norm": 1.690230886431946, + "learning_rate": 1.4588370754173864e-05, + "loss": 0.8, + "step": 1267 + }, + { + "epoch": 0.02191042300248825, + "grad_norm": 1.9879970574953412, + "learning_rate": 1.4599884858952217e-05, + "loss": 0.7222, + "step": 1268 + }, + { + "epoch": 0.02192770251589715, + "grad_norm": 3.354740266109418, + "learning_rate": 1.4611398963730572e-05, + "loss": 0.9768, + "step": 1269 + }, + { + "epoch": 0.021944982029306056, + "grad_norm": 2.5501053920189474, + "learning_rate": 1.4622913068508923e-05, + "loss": 1.2406, + "step": 1270 + }, + { + "epoch": 0.021962261542714957, + "grad_norm": 2.7176636644329446, + "learning_rate": 1.4634427173287278e-05, + "loss": 0.7963, + "step": 1271 + }, + { + "epoch": 0.02197954105612386, + "grad_norm": 2.3180872419230902, + "learning_rate": 1.4645941278065631e-05, + "loss": 0.824, + "step": 1272 + }, + { + "epoch": 0.02199682056953276, + "grad_norm": 2.4864375234258707, + "learning_rate": 1.4657455382843986e-05, + "loss": 0.8805, + "step": 1273 + }, + { + "epoch": 0.022014100082941666, + "grad_norm": 2.477123162560499, + "learning_rate": 1.4668969487622338e-05, + "loss": 0.8175, + "step": 1274 + }, + { + "epoch": 0.022031379596350566, + "grad_norm": 2.5369374829478746, + "learning_rate": 1.4680483592400692e-05, + "loss": 0.8838, + "step": 1275 + }, + { + "epoch": 0.02204865910975947, + "grad_norm": 1.8328760195490972, + "learning_rate": 1.4691997697179046e-05, + "loss": 0.9566, + "step": 1276 + }, + { + "epoch": 0.02206593862316837, + "grad_norm": 2.0171487943600988, + "learning_rate": 1.47035118019574e-05, + "loss": 0.995, + "step": 1277 + }, + { + "epoch": 0.022083218136577275, + "grad_norm": 1.6430878306843524, + "learning_rate": 1.4715025906735752e-05, + "loss": 0.8531, + "step": 1278 + }, + { + "epoch": 0.022100497649986176, + "grad_norm": 1.7746010533464125, + "learning_rate": 1.4726540011514105e-05, + "loss": 1.1045, + "step": 1279 + }, + { + "epoch": 0.02211777716339508, + "grad_norm": 1.590417954127767, + "learning_rate": 1.473805411629246e-05, + "loss": 0.8516, + "step": 1280 + }, + { + "epoch": 0.02213505667680398, + "grad_norm": 2.329672228443342, + "learning_rate": 1.4749568221070813e-05, + "loss": 0.8827, + "step": 1281 + }, + { + "epoch": 0.022152336190212885, + "grad_norm": 2.798682578992956, + "learning_rate": 1.4761082325849166e-05, + "loss": 0.8147, + "step": 1282 + }, + { + "epoch": 0.022169615703621786, + "grad_norm": 2.2409199634042953, + "learning_rate": 1.4772596430627519e-05, + "loss": 0.9763, + "step": 1283 + }, + { + "epoch": 0.022186895217030687, + "grad_norm": 2.773755708987599, + "learning_rate": 1.4784110535405874e-05, + "loss": 0.9811, + "step": 1284 + }, + { + "epoch": 0.02220417473043959, + "grad_norm": 2.128006522952723, + "learning_rate": 1.4795624640184227e-05, + "loss": 0.8407, + "step": 1285 + }, + { + "epoch": 0.02222145424384849, + "grad_norm": 1.6736790846994616, + "learning_rate": 1.4807138744962582e-05, + "loss": 0.8871, + "step": 1286 + }, + { + "epoch": 0.022238733757257396, + "grad_norm": 2.411736728071887, + "learning_rate": 1.4818652849740933e-05, + "loss": 0.8386, + "step": 1287 + }, + { + "epoch": 0.022256013270666297, + "grad_norm": 2.167780216070742, + "learning_rate": 1.4830166954519288e-05, + "loss": 0.842, + "step": 1288 + }, + { + "epoch": 0.0222732927840752, + "grad_norm": 1.921515617651457, + "learning_rate": 1.4841681059297641e-05, + "loss": 0.8688, + "step": 1289 + }, + { + "epoch": 0.0222905722974841, + "grad_norm": 1.6199769206939953, + "learning_rate": 1.4853195164075996e-05, + "loss": 0.847, + "step": 1290 + }, + { + "epoch": 0.022307851810893006, + "grad_norm": 2.3522830463869675, + "learning_rate": 1.4864709268854347e-05, + "loss": 0.6065, + "step": 1291 + }, + { + "epoch": 0.022325131324301906, + "grad_norm": 2.017178085685431, + "learning_rate": 1.48762233736327e-05, + "loss": 0.9465, + "step": 1292 + }, + { + "epoch": 0.02234241083771081, + "grad_norm": 1.4744940957881245, + "learning_rate": 1.4887737478411055e-05, + "loss": 0.5633, + "step": 1293 + }, + { + "epoch": 0.02235969035111971, + "grad_norm": 2.318019292329681, + "learning_rate": 1.4899251583189408e-05, + "loss": 0.7331, + "step": 1294 + }, + { + "epoch": 0.022376969864528615, + "grad_norm": 1.8685041063982326, + "learning_rate": 1.4910765687967762e-05, + "loss": 0.8869, + "step": 1295 + }, + { + "epoch": 0.022394249377937516, + "grad_norm": 1.889000654375513, + "learning_rate": 1.4922279792746115e-05, + "loss": 1.128, + "step": 1296 + }, + { + "epoch": 0.02241152889134642, + "grad_norm": 1.8937745736221692, + "learning_rate": 1.493379389752447e-05, + "loss": 0.7354, + "step": 1297 + }, + { + "epoch": 0.02242880840475532, + "grad_norm": 2.1901699622733393, + "learning_rate": 1.4945308002302823e-05, + "loss": 0.7928, + "step": 1298 + }, + { + "epoch": 0.022446087918164225, + "grad_norm": 1.8761199935540989, + "learning_rate": 1.4956822107081174e-05, + "loss": 0.6737, + "step": 1299 + }, + { + "epoch": 0.022463367431573126, + "grad_norm": 2.0348686432336485, + "learning_rate": 1.4968336211859529e-05, + "loss": 0.8895, + "step": 1300 + }, + { + "epoch": 0.02248064694498203, + "grad_norm": 2.5492663789232233, + "learning_rate": 1.4979850316637882e-05, + "loss": 0.9848, + "step": 1301 + }, + { + "epoch": 0.02249792645839093, + "grad_norm": 1.8380953721373359, + "learning_rate": 1.4991364421416237e-05, + "loss": 0.795, + "step": 1302 + }, + { + "epoch": 0.022515205971799835, + "grad_norm": 2.0249938574830377, + "learning_rate": 1.5002878526194588e-05, + "loss": 0.7533, + "step": 1303 + }, + { + "epoch": 0.022532485485208736, + "grad_norm": 2.494405088390087, + "learning_rate": 1.5014392630972943e-05, + "loss": 1.1193, + "step": 1304 + }, + { + "epoch": 0.02254976499861764, + "grad_norm": 2.6008722055671663, + "learning_rate": 1.5025906735751296e-05, + "loss": 0.8054, + "step": 1305 + }, + { + "epoch": 0.02256704451202654, + "grad_norm": 2.496422656246188, + "learning_rate": 1.5037420840529651e-05, + "loss": 0.8909, + "step": 1306 + }, + { + "epoch": 0.022584324025435445, + "grad_norm": 2.038266208449133, + "learning_rate": 1.5048934945308002e-05, + "loss": 0.7517, + "step": 1307 + }, + { + "epoch": 0.022601603538844346, + "grad_norm": 2.411277060199517, + "learning_rate": 1.5060449050086357e-05, + "loss": 1.043, + "step": 1308 + }, + { + "epoch": 0.02261888305225325, + "grad_norm": 2.4987522611190114, + "learning_rate": 1.507196315486471e-05, + "loss": 0.9597, + "step": 1309 + }, + { + "epoch": 0.02263616256566215, + "grad_norm": 2.459833803234341, + "learning_rate": 1.5083477259643065e-05, + "loss": 1.0267, + "step": 1310 + }, + { + "epoch": 0.022653442079071055, + "grad_norm": 2.2857225346683983, + "learning_rate": 1.5094991364421418e-05, + "loss": 0.9055, + "step": 1311 + }, + { + "epoch": 0.022670721592479955, + "grad_norm": 1.500738345368658, + "learning_rate": 1.510650546919977e-05, + "loss": 0.7532, + "step": 1312 + }, + { + "epoch": 0.02268800110588886, + "grad_norm": 2.572027907582603, + "learning_rate": 1.5118019573978125e-05, + "loss": 1.1452, + "step": 1313 + }, + { + "epoch": 0.02270528061929776, + "grad_norm": 2.523750581446761, + "learning_rate": 1.5129533678756478e-05, + "loss": 0.7882, + "step": 1314 + }, + { + "epoch": 0.022722560132706664, + "grad_norm": 2.5078059124328314, + "learning_rate": 1.5141047783534832e-05, + "loss": 0.9317, + "step": 1315 + }, + { + "epoch": 0.022739839646115565, + "grad_norm": 2.365773904920523, + "learning_rate": 1.5152561888313184e-05, + "loss": 0.8787, + "step": 1316 + }, + { + "epoch": 0.02275711915952447, + "grad_norm": 2.0720255343574263, + "learning_rate": 1.5164075993091539e-05, + "loss": 0.8697, + "step": 1317 + }, + { + "epoch": 0.02277439867293337, + "grad_norm": 2.5130976641416196, + "learning_rate": 1.5175590097869892e-05, + "loss": 0.9904, + "step": 1318 + }, + { + "epoch": 0.022791678186342274, + "grad_norm": 2.1556616476455948, + "learning_rate": 1.5187104202648247e-05, + "loss": 0.9927, + "step": 1319 + }, + { + "epoch": 0.022808957699751175, + "grad_norm": 1.7041226668277727, + "learning_rate": 1.5198618307426598e-05, + "loss": 1.0371, + "step": 1320 + }, + { + "epoch": 0.02282623721316008, + "grad_norm": 2.1612469927451783, + "learning_rate": 1.5210132412204953e-05, + "loss": 0.9726, + "step": 1321 + }, + { + "epoch": 0.02284351672656898, + "grad_norm": 2.1860780425528468, + "learning_rate": 1.5221646516983306e-05, + "loss": 0.9636, + "step": 1322 + }, + { + "epoch": 0.02286079623997788, + "grad_norm": 2.0419348707217573, + "learning_rate": 1.523316062176166e-05, + "loss": 1.0035, + "step": 1323 + }, + { + "epoch": 0.022878075753386785, + "grad_norm": 3.292786015591365, + "learning_rate": 1.5244674726540012e-05, + "loss": 0.6885, + "step": 1324 + }, + { + "epoch": 0.022895355266795685, + "grad_norm": 3.047942903682306, + "learning_rate": 1.5256188831318365e-05, + "loss": 0.6925, + "step": 1325 + }, + { + "epoch": 0.02291263478020459, + "grad_norm": 2.4105885335362065, + "learning_rate": 1.526770293609672e-05, + "loss": 1.0151, + "step": 1326 + }, + { + "epoch": 0.02292991429361349, + "grad_norm": 2.478855132075801, + "learning_rate": 1.5279217040875073e-05, + "loss": 1.0761, + "step": 1327 + }, + { + "epoch": 0.022947193807022395, + "grad_norm": 1.8027230817877091, + "learning_rate": 1.5290731145653425e-05, + "loss": 0.8358, + "step": 1328 + }, + { + "epoch": 0.022964473320431295, + "grad_norm": 2.9010289644798593, + "learning_rate": 1.530224525043178e-05, + "loss": 1.0933, + "step": 1329 + }, + { + "epoch": 0.0229817528338402, + "grad_norm": 1.8918966254446261, + "learning_rate": 1.5313759355210134e-05, + "loss": 0.777, + "step": 1330 + }, + { + "epoch": 0.0229990323472491, + "grad_norm": 1.7567773933878152, + "learning_rate": 1.532527345998849e-05, + "loss": 0.6832, + "step": 1331 + }, + { + "epoch": 0.023016311860658004, + "grad_norm": 1.6878856068965773, + "learning_rate": 1.533678756476684e-05, + "loss": 0.5626, + "step": 1332 + }, + { + "epoch": 0.023033591374066905, + "grad_norm": 2.01500237679258, + "learning_rate": 1.5348301669545192e-05, + "loss": 0.8901, + "step": 1333 + }, + { + "epoch": 0.02305087088747581, + "grad_norm": 1.326352231042059, + "learning_rate": 1.5359815774323547e-05, + "loss": 0.848, + "step": 1334 + }, + { + "epoch": 0.02306815040088471, + "grad_norm": 1.8233022794685778, + "learning_rate": 1.53713298791019e-05, + "loss": 0.8266, + "step": 1335 + }, + { + "epoch": 0.023085429914293614, + "grad_norm": 2.2873352765850132, + "learning_rate": 1.5382843983880253e-05, + "loss": 0.8618, + "step": 1336 + }, + { + "epoch": 0.023102709427702515, + "grad_norm": 2.057303725765145, + "learning_rate": 1.5394358088658608e-05, + "loss": 0.8536, + "step": 1337 + }, + { + "epoch": 0.02311998894111142, + "grad_norm": 2.045004554911231, + "learning_rate": 1.5405872193436963e-05, + "loss": 1.0333, + "step": 1338 + }, + { + "epoch": 0.02313726845452032, + "grad_norm": 2.3780537917028837, + "learning_rate": 1.5417386298215314e-05, + "loss": 0.964, + "step": 1339 + }, + { + "epoch": 0.023154547967929224, + "grad_norm": 1.3837451922504784, + "learning_rate": 1.542890040299367e-05, + "loss": 0.777, + "step": 1340 + }, + { + "epoch": 0.023171827481338125, + "grad_norm": 2.399777818926182, + "learning_rate": 1.544041450777202e-05, + "loss": 1.0117, + "step": 1341 + }, + { + "epoch": 0.02318910699474703, + "grad_norm": 2.3351766318615264, + "learning_rate": 1.5451928612550375e-05, + "loss": 0.8031, + "step": 1342 + }, + { + "epoch": 0.02320638650815593, + "grad_norm": 2.641698455706408, + "learning_rate": 1.546344271732873e-05, + "loss": 0.974, + "step": 1343 + }, + { + "epoch": 0.023223666021564834, + "grad_norm": 2.5901251106644394, + "learning_rate": 1.5474956822107085e-05, + "loss": 0.8416, + "step": 1344 + }, + { + "epoch": 0.023240945534973734, + "grad_norm": 2.5166355169157613, + "learning_rate": 1.5486470926885436e-05, + "loss": 1.0751, + "step": 1345 + }, + { + "epoch": 0.02325822504838264, + "grad_norm": 2.1430900120170375, + "learning_rate": 1.5497985031663788e-05, + "loss": 1.0133, + "step": 1346 + }, + { + "epoch": 0.02327550456179154, + "grad_norm": 2.038748622621202, + "learning_rate": 1.5509499136442142e-05, + "loss": 0.6142, + "step": 1347 + }, + { + "epoch": 0.023292784075200443, + "grad_norm": 1.7647621403061884, + "learning_rate": 1.5521013241220497e-05, + "loss": 0.6034, + "step": 1348 + }, + { + "epoch": 0.023310063588609344, + "grad_norm": 2.640412998861666, + "learning_rate": 1.553252734599885e-05, + "loss": 1.1483, + "step": 1349 + }, + { + "epoch": 0.02332734310201825, + "grad_norm": 3.0700895123387424, + "learning_rate": 1.5544041450777204e-05, + "loss": 0.9579, + "step": 1350 + }, + { + "epoch": 0.02334462261542715, + "grad_norm": 2.081586797882495, + "learning_rate": 1.555555555555556e-05, + "loss": 0.9186, + "step": 1351 + }, + { + "epoch": 0.023361902128836053, + "grad_norm": 1.6301146413413774, + "learning_rate": 1.556706966033391e-05, + "loss": 0.5197, + "step": 1352 + }, + { + "epoch": 0.023379181642244954, + "grad_norm": 2.2352487855690133, + "learning_rate": 1.5578583765112265e-05, + "loss": 1.039, + "step": 1353 + }, + { + "epoch": 0.023396461155653858, + "grad_norm": 2.007943168780353, + "learning_rate": 1.5590097869890616e-05, + "loss": 0.9451, + "step": 1354 + }, + { + "epoch": 0.02341374066906276, + "grad_norm": 1.9463979424943252, + "learning_rate": 1.560161197466897e-05, + "loss": 0.9285, + "step": 1355 + }, + { + "epoch": 0.023431020182471663, + "grad_norm": 2.0336554252387584, + "learning_rate": 1.5613126079447326e-05, + "loss": 0.91, + "step": 1356 + }, + { + "epoch": 0.023448299695880564, + "grad_norm": 2.1194804619944754, + "learning_rate": 1.5624640184225677e-05, + "loss": 1.0497, + "step": 1357 + }, + { + "epoch": 0.023465579209289468, + "grad_norm": 1.5927635787155547, + "learning_rate": 1.5636154289004032e-05, + "loss": 0.5828, + "step": 1358 + }, + { + "epoch": 0.02348285872269837, + "grad_norm": 1.8854941559892664, + "learning_rate": 1.5647668393782383e-05, + "loss": 0.7614, + "step": 1359 + }, + { + "epoch": 0.023500138236107273, + "grad_norm": 2.0959217713184706, + "learning_rate": 1.5659182498560738e-05, + "loss": 1.0289, + "step": 1360 + }, + { + "epoch": 0.023517417749516174, + "grad_norm": 2.2120978743675916, + "learning_rate": 1.567069660333909e-05, + "loss": 0.8486, + "step": 1361 + }, + { + "epoch": 0.023534697262925074, + "grad_norm": 2.117296355239035, + "learning_rate": 1.5682210708117444e-05, + "loss": 0.8581, + "step": 1362 + }, + { + "epoch": 0.02355197677633398, + "grad_norm": 1.9773587000309616, + "learning_rate": 1.56937248128958e-05, + "loss": 0.8347, + "step": 1363 + }, + { + "epoch": 0.02356925628974288, + "grad_norm": 2.1474922794666456, + "learning_rate": 1.5705238917674154e-05, + "loss": 1.0174, + "step": 1364 + }, + { + "epoch": 0.023586535803151783, + "grad_norm": 2.263886564981482, + "learning_rate": 1.5716753022452505e-05, + "loss": 0.6325, + "step": 1365 + }, + { + "epoch": 0.023603815316560684, + "grad_norm": 1.8641737256336857, + "learning_rate": 1.5728267127230857e-05, + "loss": 0.6134, + "step": 1366 + }, + { + "epoch": 0.02362109482996959, + "grad_norm": 1.9237225969117535, + "learning_rate": 1.573978123200921e-05, + "loss": 0.8404, + "step": 1367 + }, + { + "epoch": 0.02363837434337849, + "grad_norm": 1.9370627801557452, + "learning_rate": 1.5751295336787566e-05, + "loss": 1.0128, + "step": 1368 + }, + { + "epoch": 0.023655653856787393, + "grad_norm": 1.963887880436873, + "learning_rate": 1.576280944156592e-05, + "loss": 0.7281, + "step": 1369 + }, + { + "epoch": 0.023672933370196294, + "grad_norm": 2.0787239491216654, + "learning_rate": 1.5774323546344273e-05, + "loss": 0.9022, + "step": 1370 + }, + { + "epoch": 0.023690212883605198, + "grad_norm": 2.01800077205414, + "learning_rate": 1.5785837651122628e-05, + "loss": 0.8298, + "step": 1371 + }, + { + "epoch": 0.0237074923970141, + "grad_norm": 2.1417710646984975, + "learning_rate": 1.579735175590098e-05, + "loss": 0.9264, + "step": 1372 + }, + { + "epoch": 0.023724771910423003, + "grad_norm": 2.2746517464036136, + "learning_rate": 1.5808865860679334e-05, + "loss": 0.9712, + "step": 1373 + }, + { + "epoch": 0.023742051423831904, + "grad_norm": 1.728663243368079, + "learning_rate": 1.5820379965457685e-05, + "loss": 0.6751, + "step": 1374 + }, + { + "epoch": 0.023759330937240808, + "grad_norm": 2.1830789073210473, + "learning_rate": 1.583189407023604e-05, + "loss": 0.7824, + "step": 1375 + }, + { + "epoch": 0.02377661045064971, + "grad_norm": 2.2642985104619924, + "learning_rate": 1.5843408175014395e-05, + "loss": 0.9104, + "step": 1376 + }, + { + "epoch": 0.023793889964058613, + "grad_norm": 2.2748213028032915, + "learning_rate": 1.585492227979275e-05, + "loss": 1.0032, + "step": 1377 + }, + { + "epoch": 0.023811169477467514, + "grad_norm": 2.1335541451773157, + "learning_rate": 1.58664363845711e-05, + "loss": 0.8855, + "step": 1378 + }, + { + "epoch": 0.023828448990876418, + "grad_norm": 2.0538739186552895, + "learning_rate": 1.5877950489349452e-05, + "loss": 0.887, + "step": 1379 + }, + { + "epoch": 0.02384572850428532, + "grad_norm": 2.1344579774539665, + "learning_rate": 1.5889464594127807e-05, + "loss": 1.0545, + "step": 1380 + }, + { + "epoch": 0.023863008017694223, + "grad_norm": 2.521078149121028, + "learning_rate": 1.5900978698906162e-05, + "loss": 0.71, + "step": 1381 + }, + { + "epoch": 0.023880287531103123, + "grad_norm": 1.9761735197165888, + "learning_rate": 1.5912492803684514e-05, + "loss": 0.9131, + "step": 1382 + }, + { + "epoch": 0.023897567044512028, + "grad_norm": 2.367727642861952, + "learning_rate": 1.592400690846287e-05, + "loss": 1.098, + "step": 1383 + }, + { + "epoch": 0.023914846557920928, + "grad_norm": 2.1711940664307727, + "learning_rate": 1.5935521013241223e-05, + "loss": 0.8629, + "step": 1384 + }, + { + "epoch": 0.023932126071329832, + "grad_norm": 1.7069449342223624, + "learning_rate": 1.5947035118019575e-05, + "loss": 0.7965, + "step": 1385 + }, + { + "epoch": 0.023949405584738733, + "grad_norm": 2.215552178270542, + "learning_rate": 1.595854922279793e-05, + "loss": 0.9612, + "step": 1386 + }, + { + "epoch": 0.023966685098147637, + "grad_norm": 1.820080494042323, + "learning_rate": 1.597006332757628e-05, + "loss": 0.8099, + "step": 1387 + }, + { + "epoch": 0.023983964611556538, + "grad_norm": 1.9249114506267118, + "learning_rate": 1.5981577432354636e-05, + "loss": 0.9197, + "step": 1388 + }, + { + "epoch": 0.024001244124965442, + "grad_norm": 1.7659498110267442, + "learning_rate": 1.599309153713299e-05, + "loss": 0.5915, + "step": 1389 + }, + { + "epoch": 0.024018523638374343, + "grad_norm": 2.0222133491087795, + "learning_rate": 1.6004605641911345e-05, + "loss": 1.007, + "step": 1390 + }, + { + "epoch": 0.024035803151783247, + "grad_norm": 2.0043573384583624, + "learning_rate": 1.6016119746689697e-05, + "loss": 0.7857, + "step": 1391 + }, + { + "epoch": 0.024053082665192148, + "grad_norm": 2.29177754535486, + "learning_rate": 1.6027633851468048e-05, + "loss": 0.9535, + "step": 1392 + }, + { + "epoch": 0.024070362178601052, + "grad_norm": 2.7375022220579313, + "learning_rate": 1.6039147956246403e-05, + "loss": 1.1575, + "step": 1393 + }, + { + "epoch": 0.024087641692009953, + "grad_norm": 2.4791596963935, + "learning_rate": 1.6050662061024758e-05, + "loss": 0.983, + "step": 1394 + }, + { + "epoch": 0.024104921205418857, + "grad_norm": 1.7312587309829897, + "learning_rate": 1.606217616580311e-05, + "loss": 0.8057, + "step": 1395 + }, + { + "epoch": 0.024122200718827758, + "grad_norm": 2.7094368766227395, + "learning_rate": 1.6073690270581464e-05, + "loss": 1.0573, + "step": 1396 + }, + { + "epoch": 0.024139480232236662, + "grad_norm": 2.1808349724692606, + "learning_rate": 1.608520437535982e-05, + "loss": 0.8895, + "step": 1397 + }, + { + "epoch": 0.024156759745645563, + "grad_norm": 2.6883707561409667, + "learning_rate": 1.609671848013817e-05, + "loss": 0.9659, + "step": 1398 + }, + { + "epoch": 0.024174039259054467, + "grad_norm": 2.13710622024998, + "learning_rate": 1.610823258491652e-05, + "loss": 0.776, + "step": 1399 + }, + { + "epoch": 0.024191318772463367, + "grad_norm": 2.196865912088012, + "learning_rate": 1.6119746689694876e-05, + "loss": 0.8057, + "step": 1400 + }, + { + "epoch": 0.024208598285872268, + "grad_norm": 2.1919308090979883, + "learning_rate": 1.613126079447323e-05, + "loss": 1.0185, + "step": 1401 + }, + { + "epoch": 0.024225877799281172, + "grad_norm": 2.0091272334800667, + "learning_rate": 1.6142774899251586e-05, + "loss": 0.8095, + "step": 1402 + }, + { + "epoch": 0.024243157312690073, + "grad_norm": 2.4583433188942623, + "learning_rate": 1.6154289004029938e-05, + "loss": 0.8212, + "step": 1403 + }, + { + "epoch": 0.024260436826098977, + "grad_norm": 1.863397144501505, + "learning_rate": 1.6165803108808292e-05, + "loss": 0.9253, + "step": 1404 + }, + { + "epoch": 0.024277716339507878, + "grad_norm": 2.255979631416335, + "learning_rate": 1.6177317213586644e-05, + "loss": 0.8441, + "step": 1405 + }, + { + "epoch": 0.024294995852916782, + "grad_norm": 1.5406976858092212, + "learning_rate": 1.6188831318365e-05, + "loss": 0.6108, + "step": 1406 + }, + { + "epoch": 0.024312275366325683, + "grad_norm": 1.584380498562239, + "learning_rate": 1.620034542314335e-05, + "loss": 0.4524, + "step": 1407 + }, + { + "epoch": 0.024329554879734587, + "grad_norm": 1.1509191847872515, + "learning_rate": 1.6211859527921705e-05, + "loss": 0.631, + "step": 1408 + }, + { + "epoch": 0.024346834393143488, + "grad_norm": 1.9667444099519829, + "learning_rate": 1.622337363270006e-05, + "loss": 0.9817, + "step": 1409 + }, + { + "epoch": 0.024364113906552392, + "grad_norm": 2.173552329595892, + "learning_rate": 1.6234887737478414e-05, + "loss": 0.9918, + "step": 1410 + }, + { + "epoch": 0.024381393419961293, + "grad_norm": 2.149949551789629, + "learning_rate": 1.6246401842256766e-05, + "loss": 0.9389, + "step": 1411 + }, + { + "epoch": 0.024398672933370197, + "grad_norm": 2.2022019898479326, + "learning_rate": 1.6257915947035117e-05, + "loss": 0.8603, + "step": 1412 + }, + { + "epoch": 0.024415952446779098, + "grad_norm": 1.9716775985523047, + "learning_rate": 1.6269430051813472e-05, + "loss": 0.7893, + "step": 1413 + }, + { + "epoch": 0.024433231960188, + "grad_norm": 2.3569489727665527, + "learning_rate": 1.6280944156591827e-05, + "loss": 1.1399, + "step": 1414 + }, + { + "epoch": 0.024450511473596902, + "grad_norm": 1.9533354754305368, + "learning_rate": 1.629245826137018e-05, + "loss": 0.6679, + "step": 1415 + }, + { + "epoch": 0.024467790987005807, + "grad_norm": 2.0443103224340406, + "learning_rate": 1.6303972366148533e-05, + "loss": 0.9755, + "step": 1416 + }, + { + "epoch": 0.024485070500414707, + "grad_norm": 2.371701492056963, + "learning_rate": 1.6315486470926888e-05, + "loss": 1.0162, + "step": 1417 + }, + { + "epoch": 0.02450235001382361, + "grad_norm": 2.153653735679025, + "learning_rate": 1.632700057570524e-05, + "loss": 0.7519, + "step": 1418 + }, + { + "epoch": 0.024519629527232512, + "grad_norm": 1.817692588573813, + "learning_rate": 1.6338514680483594e-05, + "loss": 1.0244, + "step": 1419 + }, + { + "epoch": 0.024536909040641416, + "grad_norm": 1.9404430374385133, + "learning_rate": 1.6350028785261946e-05, + "loss": 0.9832, + "step": 1420 + }, + { + "epoch": 0.024554188554050317, + "grad_norm": 1.9825555669654544, + "learning_rate": 1.63615428900403e-05, + "loss": 0.8685, + "step": 1421 + }, + { + "epoch": 0.02457146806745922, + "grad_norm": 2.233735666691348, + "learning_rate": 1.6373056994818655e-05, + "loss": 0.6782, + "step": 1422 + }, + { + "epoch": 0.024588747580868122, + "grad_norm": 1.6853486968927338, + "learning_rate": 1.638457109959701e-05, + "loss": 0.9063, + "step": 1423 + }, + { + "epoch": 0.024606027094277026, + "grad_norm": 2.0510244866537852, + "learning_rate": 1.639608520437536e-05, + "loss": 0.8435, + "step": 1424 + }, + { + "epoch": 0.024623306607685927, + "grad_norm": 2.1747534932939248, + "learning_rate": 1.6407599309153713e-05, + "loss": 1.0002, + "step": 1425 + }, + { + "epoch": 0.02464058612109483, + "grad_norm": 1.8942855929361753, + "learning_rate": 1.6419113413932068e-05, + "loss": 0.9067, + "step": 1426 + }, + { + "epoch": 0.024657865634503732, + "grad_norm": 1.7446037620624875, + "learning_rate": 1.6430627518710423e-05, + "loss": 0.8544, + "step": 1427 + }, + { + "epoch": 0.024675145147912636, + "grad_norm": 1.698627960586172, + "learning_rate": 1.6442141623488774e-05, + "loss": 0.7962, + "step": 1428 + }, + { + "epoch": 0.024692424661321537, + "grad_norm": 2.2696324582126537, + "learning_rate": 1.645365572826713e-05, + "loss": 0.7086, + "step": 1429 + }, + { + "epoch": 0.02470970417473044, + "grad_norm": 1.9385906906326629, + "learning_rate": 1.6465169833045484e-05, + "loss": 0.8863, + "step": 1430 + }, + { + "epoch": 0.02472698368813934, + "grad_norm": 1.279095197274781, + "learning_rate": 1.6476683937823835e-05, + "loss": 0.7057, + "step": 1431 + }, + { + "epoch": 0.024744263201548246, + "grad_norm": 2.9577045435661273, + "learning_rate": 1.6488198042602186e-05, + "loss": 0.9334, + "step": 1432 + }, + { + "epoch": 0.024761542714957147, + "grad_norm": 1.878129504619498, + "learning_rate": 1.649971214738054e-05, + "loss": 0.711, + "step": 1433 + }, + { + "epoch": 0.02477882222836605, + "grad_norm": 2.2727885218354773, + "learning_rate": 1.6511226252158896e-05, + "loss": 0.9548, + "step": 1434 + }, + { + "epoch": 0.02479610174177495, + "grad_norm": 2.0957906077273867, + "learning_rate": 1.652274035693725e-05, + "loss": 0.919, + "step": 1435 + }, + { + "epoch": 0.024813381255183856, + "grad_norm": 1.4338548554410695, + "learning_rate": 1.6534254461715602e-05, + "loss": 0.5863, + "step": 1436 + }, + { + "epoch": 0.024830660768592756, + "grad_norm": 1.705585481444766, + "learning_rate": 1.6545768566493957e-05, + "loss": 0.767, + "step": 1437 + }, + { + "epoch": 0.02484794028200166, + "grad_norm": 2.7111839121035706, + "learning_rate": 1.655728267127231e-05, + "loss": 0.9151, + "step": 1438 + }, + { + "epoch": 0.02486521979541056, + "grad_norm": 2.041209690298817, + "learning_rate": 1.6568796776050663e-05, + "loss": 0.669, + "step": 1439 + }, + { + "epoch": 0.024882499308819462, + "grad_norm": 1.291128246666017, + "learning_rate": 1.6580310880829015e-05, + "loss": 0.7073, + "step": 1440 + }, + { + "epoch": 0.024899778822228366, + "grad_norm": 1.4028043662878014, + "learning_rate": 1.659182498560737e-05, + "loss": 0.7889, + "step": 1441 + }, + { + "epoch": 0.024917058335637267, + "grad_norm": 1.6834809041098544, + "learning_rate": 1.6603339090385724e-05, + "loss": 0.8909, + "step": 1442 + }, + { + "epoch": 0.02493433784904617, + "grad_norm": 2.1368958270948775, + "learning_rate": 1.661485319516408e-05, + "loss": 1.0652, + "step": 1443 + }, + { + "epoch": 0.024951617362455072, + "grad_norm": 1.9601707080083477, + "learning_rate": 1.662636729994243e-05, + "loss": 0.9187, + "step": 1444 + }, + { + "epoch": 0.024968896875863976, + "grad_norm": 2.1536613186236577, + "learning_rate": 1.6637881404720782e-05, + "loss": 0.8882, + "step": 1445 + }, + { + "epoch": 0.024986176389272877, + "grad_norm": 1.3510374729858918, + "learning_rate": 1.6649395509499137e-05, + "loss": 0.6217, + "step": 1446 + }, + { + "epoch": 0.02500345590268178, + "grad_norm": 2.33516814754062, + "learning_rate": 1.6660909614277492e-05, + "loss": 0.8392, + "step": 1447 + }, + { + "epoch": 0.02502073541609068, + "grad_norm": 1.962774322937731, + "learning_rate": 1.6672423719055847e-05, + "loss": 0.8498, + "step": 1448 + }, + { + "epoch": 0.025038014929499586, + "grad_norm": 1.9919622060822557, + "learning_rate": 1.6683937823834198e-05, + "loss": 0.8423, + "step": 1449 + }, + { + "epoch": 0.025055294442908486, + "grad_norm": 1.500613833455289, + "learning_rate": 1.6695451928612553e-05, + "loss": 0.8651, + "step": 1450 + }, + { + "epoch": 0.02507257395631739, + "grad_norm": 1.775711649067849, + "learning_rate": 1.6706966033390904e-05, + "loss": 0.7332, + "step": 1451 + }, + { + "epoch": 0.02508985346972629, + "grad_norm": 1.9030262447898068, + "learning_rate": 1.671848013816926e-05, + "loss": 0.8588, + "step": 1452 + }, + { + "epoch": 0.025107132983135196, + "grad_norm": 2.372625778818037, + "learning_rate": 1.672999424294761e-05, + "loss": 0.9811, + "step": 1453 + }, + { + "epoch": 0.025124412496544096, + "grad_norm": 1.7857554836597382, + "learning_rate": 1.6741508347725965e-05, + "loss": 1.1319, + "step": 1454 + }, + { + "epoch": 0.025141692009953, + "grad_norm": 2.1285184051539847, + "learning_rate": 1.675302245250432e-05, + "loss": 0.9029, + "step": 1455 + }, + { + "epoch": 0.0251589715233619, + "grad_norm": 2.1753889133630433, + "learning_rate": 1.6764536557282675e-05, + "loss": 0.9646, + "step": 1456 + }, + { + "epoch": 0.025176251036770805, + "grad_norm": 1.8069505022409198, + "learning_rate": 1.6776050662061026e-05, + "loss": 0.9079, + "step": 1457 + }, + { + "epoch": 0.025193530550179706, + "grad_norm": 1.8301727435326058, + "learning_rate": 1.6787564766839378e-05, + "loss": 0.7686, + "step": 1458 + }, + { + "epoch": 0.02521081006358861, + "grad_norm": 2.143134178877233, + "learning_rate": 1.6799078871617733e-05, + "loss": 1.086, + "step": 1459 + }, + { + "epoch": 0.02522808957699751, + "grad_norm": 2.0813764240018546, + "learning_rate": 1.6810592976396087e-05, + "loss": 0.8538, + "step": 1460 + }, + { + "epoch": 0.025245369090406415, + "grad_norm": 2.56341291182027, + "learning_rate": 1.682210708117444e-05, + "loss": 1.085, + "step": 1461 + }, + { + "epoch": 0.025262648603815316, + "grad_norm": 1.4008789908928885, + "learning_rate": 1.6833621185952794e-05, + "loss": 0.61, + "step": 1462 + }, + { + "epoch": 0.02527992811722422, + "grad_norm": 2.012293037027907, + "learning_rate": 1.684513529073115e-05, + "loss": 0.9811, + "step": 1463 + }, + { + "epoch": 0.02529720763063312, + "grad_norm": 2.0542156821409465, + "learning_rate": 1.68566493955095e-05, + "loss": 0.8459, + "step": 1464 + }, + { + "epoch": 0.025314487144042025, + "grad_norm": 1.9791850391442856, + "learning_rate": 1.686816350028785e-05, + "loss": 0.9873, + "step": 1465 + }, + { + "epoch": 0.025331766657450926, + "grad_norm": 1.5028682756036367, + "learning_rate": 1.6879677605066206e-05, + "loss": 0.6572, + "step": 1466 + }, + { + "epoch": 0.02534904617085983, + "grad_norm": 1.9843782928244584, + "learning_rate": 1.689119170984456e-05, + "loss": 0.6873, + "step": 1467 + }, + { + "epoch": 0.02536632568426873, + "grad_norm": 2.3228347516626657, + "learning_rate": 1.6902705814622916e-05, + "loss": 1.0455, + "step": 1468 + }, + { + "epoch": 0.025383605197677635, + "grad_norm": 1.4602312897120062, + "learning_rate": 1.6914219919401267e-05, + "loss": 0.6366, + "step": 1469 + }, + { + "epoch": 0.025400884711086535, + "grad_norm": 1.84143066608816, + "learning_rate": 1.6925734024179622e-05, + "loss": 0.827, + "step": 1470 + }, + { + "epoch": 0.02541816422449544, + "grad_norm": 2.0357002591550795, + "learning_rate": 1.6937248128957973e-05, + "loss": 0.9469, + "step": 1471 + }, + { + "epoch": 0.02543544373790434, + "grad_norm": 1.8068928677959126, + "learning_rate": 1.6948762233736328e-05, + "loss": 1.0243, + "step": 1472 + }, + { + "epoch": 0.025452723251313245, + "grad_norm": 2.0172420788928, + "learning_rate": 1.6960276338514683e-05, + "loss": 1.0051, + "step": 1473 + }, + { + "epoch": 0.025470002764722145, + "grad_norm": 2.403747357588326, + "learning_rate": 1.6971790443293034e-05, + "loss": 0.9648, + "step": 1474 + }, + { + "epoch": 0.02548728227813105, + "grad_norm": 2.2493810145960844, + "learning_rate": 1.698330454807139e-05, + "loss": 0.9764, + "step": 1475 + }, + { + "epoch": 0.02550456179153995, + "grad_norm": 1.783814896255291, + "learning_rate": 1.6994818652849744e-05, + "loss": 1.1307, + "step": 1476 + }, + { + "epoch": 0.025521841304948854, + "grad_norm": 2.2260818109398195, + "learning_rate": 1.7006332757628096e-05, + "loss": 1.0219, + "step": 1477 + }, + { + "epoch": 0.025539120818357755, + "grad_norm": 1.6750953905681292, + "learning_rate": 1.7017846862406447e-05, + "loss": 0.9339, + "step": 1478 + }, + { + "epoch": 0.025556400331766656, + "grad_norm": 1.7774107994635797, + "learning_rate": 1.7029360967184802e-05, + "loss": 0.7773, + "step": 1479 + }, + { + "epoch": 0.02557367984517556, + "grad_norm": 2.140031307305837, + "learning_rate": 1.7040875071963157e-05, + "loss": 0.9407, + "step": 1480 + }, + { + "epoch": 0.02559095935858446, + "grad_norm": 1.9358074080922554, + "learning_rate": 1.705238917674151e-05, + "loss": 0.8668, + "step": 1481 + }, + { + "epoch": 0.025608238871993365, + "grad_norm": 1.6066615998874818, + "learning_rate": 1.7063903281519863e-05, + "loss": 0.8401, + "step": 1482 + }, + { + "epoch": 0.025625518385402266, + "grad_norm": 2.2590801061562633, + "learning_rate": 1.7075417386298218e-05, + "loss": 0.716, + "step": 1483 + }, + { + "epoch": 0.02564279789881117, + "grad_norm": 2.781447849908948, + "learning_rate": 1.708693149107657e-05, + "loss": 0.8393, + "step": 1484 + }, + { + "epoch": 0.02566007741222007, + "grad_norm": 1.6245900935902087, + "learning_rate": 1.7098445595854924e-05, + "loss": 0.7509, + "step": 1485 + }, + { + "epoch": 0.025677356925628975, + "grad_norm": 2.151258670107989, + "learning_rate": 1.7109959700633275e-05, + "loss": 0.821, + "step": 1486 + }, + { + "epoch": 0.025694636439037875, + "grad_norm": 2.0471980155804723, + "learning_rate": 1.712147380541163e-05, + "loss": 0.8105, + "step": 1487 + }, + { + "epoch": 0.02571191595244678, + "grad_norm": 1.8426494790850751, + "learning_rate": 1.7132987910189985e-05, + "loss": 0.8411, + "step": 1488 + }, + { + "epoch": 0.02572919546585568, + "grad_norm": 1.9789362465641738, + "learning_rate": 1.714450201496834e-05, + "loss": 0.6633, + "step": 1489 + }, + { + "epoch": 0.025746474979264584, + "grad_norm": 1.6190092265865281, + "learning_rate": 1.715601611974669e-05, + "loss": 0.8384, + "step": 1490 + }, + { + "epoch": 0.025763754492673485, + "grad_norm": 1.98556649882879, + "learning_rate": 1.7167530224525043e-05, + "loss": 1.1824, + "step": 1491 + }, + { + "epoch": 0.02578103400608239, + "grad_norm": 1.5667506264041537, + "learning_rate": 1.7179044329303397e-05, + "loss": 0.6058, + "step": 1492 + }, + { + "epoch": 0.02579831351949129, + "grad_norm": 1.8080252572987212, + "learning_rate": 1.7190558434081752e-05, + "loss": 0.9495, + "step": 1493 + }, + { + "epoch": 0.025815593032900194, + "grad_norm": 2.202278779980645, + "learning_rate": 1.7202072538860104e-05, + "loss": 0.853, + "step": 1494 + }, + { + "epoch": 0.025832872546309095, + "grad_norm": 2.1676653040393594, + "learning_rate": 1.721358664363846e-05, + "loss": 0.9469, + "step": 1495 + }, + { + "epoch": 0.025850152059718, + "grad_norm": 1.7445708130923325, + "learning_rate": 1.7225100748416813e-05, + "loss": 0.7668, + "step": 1496 + }, + { + "epoch": 0.0258674315731269, + "grad_norm": 2.0546330077347825, + "learning_rate": 1.7236614853195165e-05, + "loss": 0.8887, + "step": 1497 + }, + { + "epoch": 0.025884711086535804, + "grad_norm": 1.9254327986495048, + "learning_rate": 1.724812895797352e-05, + "loss": 0.8025, + "step": 1498 + }, + { + "epoch": 0.025901990599944705, + "grad_norm": 1.5615141682263591, + "learning_rate": 1.725964306275187e-05, + "loss": 0.6116, + "step": 1499 + }, + { + "epoch": 0.02591927011335361, + "grad_norm": 2.003685504638353, + "learning_rate": 1.7271157167530226e-05, + "loss": 0.6799, + "step": 1500 + }, + { + "epoch": 0.02593654962676251, + "grad_norm": 1.83639166028822, + "learning_rate": 1.728267127230858e-05, + "loss": 0.6889, + "step": 1501 + }, + { + "epoch": 0.025953829140171414, + "grad_norm": 1.7555702674805695, + "learning_rate": 1.7294185377086935e-05, + "loss": 0.7451, + "step": 1502 + }, + { + "epoch": 0.025971108653580315, + "grad_norm": 1.9005367640793862, + "learning_rate": 1.7305699481865287e-05, + "loss": 0.868, + "step": 1503 + }, + { + "epoch": 0.02598838816698922, + "grad_norm": 1.943452176541021, + "learning_rate": 1.7317213586643638e-05, + "loss": 0.8762, + "step": 1504 + }, + { + "epoch": 0.02600566768039812, + "grad_norm": 1.251478977066573, + "learning_rate": 1.7328727691421993e-05, + "loss": 0.7364, + "step": 1505 + }, + { + "epoch": 0.026022947193807024, + "grad_norm": 2.1792882217311376, + "learning_rate": 1.7340241796200348e-05, + "loss": 0.7855, + "step": 1506 + }, + { + "epoch": 0.026040226707215924, + "grad_norm": 2.133516456325462, + "learning_rate": 1.73517559009787e-05, + "loss": 0.9203, + "step": 1507 + }, + { + "epoch": 0.02605750622062483, + "grad_norm": 1.7003024950314003, + "learning_rate": 1.7363270005757054e-05, + "loss": 0.8279, + "step": 1508 + }, + { + "epoch": 0.02607478573403373, + "grad_norm": 1.6804455101684639, + "learning_rate": 1.737478411053541e-05, + "loss": 0.9709, + "step": 1509 + }, + { + "epoch": 0.026092065247442633, + "grad_norm": 1.7766746060634473, + "learning_rate": 1.738629821531376e-05, + "loss": 0.8796, + "step": 1510 + }, + { + "epoch": 0.026109344760851534, + "grad_norm": 1.9054518216383867, + "learning_rate": 1.7397812320092112e-05, + "loss": 0.9882, + "step": 1511 + }, + { + "epoch": 0.02612662427426044, + "grad_norm": 1.815401999357305, + "learning_rate": 1.7409326424870467e-05, + "loss": 0.828, + "step": 1512 + }, + { + "epoch": 0.02614390378766934, + "grad_norm": 1.3795180900983868, + "learning_rate": 1.742084052964882e-05, + "loss": 0.682, + "step": 1513 + }, + { + "epoch": 0.026161183301078243, + "grad_norm": 2.7392780480283516, + "learning_rate": 1.7432354634427176e-05, + "loss": 1.0909, + "step": 1514 + }, + { + "epoch": 0.026178462814487144, + "grad_norm": 2.701836331121472, + "learning_rate": 1.7443868739205528e-05, + "loss": 0.9166, + "step": 1515 + }, + { + "epoch": 0.026195742327896048, + "grad_norm": 1.848512189691909, + "learning_rate": 1.7455382843983882e-05, + "loss": 1.0314, + "step": 1516 + }, + { + "epoch": 0.02621302184130495, + "grad_norm": 2.0844361190838856, + "learning_rate": 1.7466896948762234e-05, + "loss": 1.1648, + "step": 1517 + }, + { + "epoch": 0.02623030135471385, + "grad_norm": 2.1565540007818766, + "learning_rate": 1.747841105354059e-05, + "loss": 0.8942, + "step": 1518 + }, + { + "epoch": 0.026247580868122754, + "grad_norm": 2.1444216138745738, + "learning_rate": 1.748992515831894e-05, + "loss": 0.8934, + "step": 1519 + }, + { + "epoch": 0.026264860381531654, + "grad_norm": 1.735827337324912, + "learning_rate": 1.7501439263097295e-05, + "loss": 0.9153, + "step": 1520 + }, + { + "epoch": 0.02628213989494056, + "grad_norm": 1.8765218765449065, + "learning_rate": 1.751295336787565e-05, + "loss": 0.8502, + "step": 1521 + }, + { + "epoch": 0.02629941940834946, + "grad_norm": 1.713731639105074, + "learning_rate": 1.7524467472654005e-05, + "loss": 0.9244, + "step": 1522 + }, + { + "epoch": 0.026316698921758364, + "grad_norm": 2.107245697581233, + "learning_rate": 1.7535981577432356e-05, + "loss": 0.9208, + "step": 1523 + }, + { + "epoch": 0.026333978435167264, + "grad_norm": 2.5556914128000003, + "learning_rate": 1.7547495682210707e-05, + "loss": 0.9207, + "step": 1524 + }, + { + "epoch": 0.02635125794857617, + "grad_norm": 1.7704362805408325, + "learning_rate": 1.7559009786989062e-05, + "loss": 0.7141, + "step": 1525 + }, + { + "epoch": 0.02636853746198507, + "grad_norm": 1.8909178956248707, + "learning_rate": 1.7570523891767417e-05, + "loss": 0.71, + "step": 1526 + }, + { + "epoch": 0.026385816975393973, + "grad_norm": 1.879485485700508, + "learning_rate": 1.7582037996545772e-05, + "loss": 0.8504, + "step": 1527 + }, + { + "epoch": 0.026403096488802874, + "grad_norm": 2.515250528739661, + "learning_rate": 1.7593552101324123e-05, + "loss": 1.2001, + "step": 1528 + }, + { + "epoch": 0.026420376002211778, + "grad_norm": 1.9192635811229266, + "learning_rate": 1.7605066206102478e-05, + "loss": 0.9473, + "step": 1529 + }, + { + "epoch": 0.02643765551562068, + "grad_norm": 1.6876835638988756, + "learning_rate": 1.761658031088083e-05, + "loss": 0.6023, + "step": 1530 + }, + { + "epoch": 0.026454935029029583, + "grad_norm": 1.7895121044371602, + "learning_rate": 1.7628094415659184e-05, + "loss": 0.8213, + "step": 1531 + }, + { + "epoch": 0.026472214542438484, + "grad_norm": 1.885250233308215, + "learning_rate": 1.7639608520437536e-05, + "loss": 0.7655, + "step": 1532 + }, + { + "epoch": 0.026489494055847388, + "grad_norm": 1.5608422904070194, + "learning_rate": 1.765112262521589e-05, + "loss": 0.9977, + "step": 1533 + }, + { + "epoch": 0.02650677356925629, + "grad_norm": 1.9082948934663095, + "learning_rate": 1.7662636729994245e-05, + "loss": 0.7437, + "step": 1534 + }, + { + "epoch": 0.026524053082665193, + "grad_norm": 2.9395831504257695, + "learning_rate": 1.76741508347726e-05, + "loss": 0.7688, + "step": 1535 + }, + { + "epoch": 0.026541332596074094, + "grad_norm": 1.494330245369826, + "learning_rate": 1.768566493955095e-05, + "loss": 0.6641, + "step": 1536 + }, + { + "epoch": 0.026558612109482998, + "grad_norm": 2.114581233258333, + "learning_rate": 1.7697179044329303e-05, + "loss": 0.788, + "step": 1537 + }, + { + "epoch": 0.0265758916228919, + "grad_norm": 1.9228240945866084, + "learning_rate": 1.7708693149107658e-05, + "loss": 0.3774, + "step": 1538 + }, + { + "epoch": 0.026593171136300803, + "grad_norm": 1.5661536420784437, + "learning_rate": 1.7720207253886013e-05, + "loss": 0.8719, + "step": 1539 + }, + { + "epoch": 0.026610450649709703, + "grad_norm": 1.976437987022259, + "learning_rate": 1.7731721358664364e-05, + "loss": 1.2609, + "step": 1540 + }, + { + "epoch": 0.026627730163118608, + "grad_norm": 3.1154880579773265, + "learning_rate": 1.774323546344272e-05, + "loss": 0.7733, + "step": 1541 + }, + { + "epoch": 0.02664500967652751, + "grad_norm": 2.311336956409917, + "learning_rate": 1.7754749568221074e-05, + "loss": 0.9048, + "step": 1542 + }, + { + "epoch": 0.026662289189936413, + "grad_norm": 1.718982183455834, + "learning_rate": 1.7766263672999425e-05, + "loss": 1.0238, + "step": 1543 + }, + { + "epoch": 0.026679568703345313, + "grad_norm": 1.4860351908242628, + "learning_rate": 1.7777777777777777e-05, + "loss": 0.7144, + "step": 1544 + }, + { + "epoch": 0.026696848216754217, + "grad_norm": 3.538038973534003, + "learning_rate": 1.778929188255613e-05, + "loss": 0.7857, + "step": 1545 + }, + { + "epoch": 0.026714127730163118, + "grad_norm": 1.9744176308804038, + "learning_rate": 1.7800805987334486e-05, + "loss": 0.7817, + "step": 1546 + }, + { + "epoch": 0.026731407243572022, + "grad_norm": 1.681567209097485, + "learning_rate": 1.781232009211284e-05, + "loss": 0.7261, + "step": 1547 + }, + { + "epoch": 0.026748686756980923, + "grad_norm": 1.1444626819576615, + "learning_rate": 1.7823834196891192e-05, + "loss": 0.8588, + "step": 1548 + }, + { + "epoch": 0.026765966270389827, + "grad_norm": 1.6567383448743969, + "learning_rate": 1.7835348301669547e-05, + "loss": 0.7269, + "step": 1549 + }, + { + "epoch": 0.026783245783798728, + "grad_norm": 2.0857322999536034, + "learning_rate": 1.78468624064479e-05, + "loss": 0.8866, + "step": 1550 + }, + { + "epoch": 0.026800525297207632, + "grad_norm": 2.1199022888844654, + "learning_rate": 1.7858376511226254e-05, + "loss": 0.8192, + "step": 1551 + }, + { + "epoch": 0.026817804810616533, + "grad_norm": 1.8527881215200495, + "learning_rate": 1.786989061600461e-05, + "loss": 0.7341, + "step": 1552 + }, + { + "epoch": 0.026835084324025437, + "grad_norm": 1.431878201252653, + "learning_rate": 1.788140472078296e-05, + "loss": 0.8536, + "step": 1553 + }, + { + "epoch": 0.026852363837434338, + "grad_norm": 1.5041674352569563, + "learning_rate": 1.7892918825561315e-05, + "loss": 0.751, + "step": 1554 + }, + { + "epoch": 0.026869643350843242, + "grad_norm": 1.962059849821554, + "learning_rate": 1.790443293033967e-05, + "loss": 1.0616, + "step": 1555 + }, + { + "epoch": 0.026886922864252143, + "grad_norm": 1.987493589992851, + "learning_rate": 1.791594703511802e-05, + "loss": 0.7233, + "step": 1556 + }, + { + "epoch": 0.026904202377661043, + "grad_norm": 3.0679577660563107, + "learning_rate": 1.7927461139896372e-05, + "loss": 0.8401, + "step": 1557 + }, + { + "epoch": 0.026921481891069948, + "grad_norm": 2.5489690008700356, + "learning_rate": 1.7938975244674727e-05, + "loss": 0.6278, + "step": 1558 + }, + { + "epoch": 0.02693876140447885, + "grad_norm": 1.623386804136885, + "learning_rate": 1.7950489349453082e-05, + "loss": 0.9403, + "step": 1559 + }, + { + "epoch": 0.026956040917887752, + "grad_norm": 2.478023134556603, + "learning_rate": 1.7962003454231437e-05, + "loss": 0.9432, + "step": 1560 + }, + { + "epoch": 0.026973320431296653, + "grad_norm": 2.9545859105018772, + "learning_rate": 1.7973517559009788e-05, + "loss": 0.9223, + "step": 1561 + }, + { + "epoch": 0.026990599944705557, + "grad_norm": 2.151904363637524, + "learning_rate": 1.7985031663788143e-05, + "loss": 0.5922, + "step": 1562 + }, + { + "epoch": 0.027007879458114458, + "grad_norm": 2.178697207344111, + "learning_rate": 1.7996545768566494e-05, + "loss": 0.9691, + "step": 1563 + }, + { + "epoch": 0.027025158971523362, + "grad_norm": 2.135426524847005, + "learning_rate": 1.800805987334485e-05, + "loss": 0.7091, + "step": 1564 + }, + { + "epoch": 0.027042438484932263, + "grad_norm": 1.8111929249849985, + "learning_rate": 1.80195739781232e-05, + "loss": 0.8986, + "step": 1565 + }, + { + "epoch": 0.027059717998341167, + "grad_norm": 1.8448918466657425, + "learning_rate": 1.8031088082901555e-05, + "loss": 1.0916, + "step": 1566 + }, + { + "epoch": 0.027076997511750068, + "grad_norm": 2.842000585159799, + "learning_rate": 1.804260218767991e-05, + "loss": 0.9845, + "step": 1567 + }, + { + "epoch": 0.027094277025158972, + "grad_norm": 1.7356708574320412, + "learning_rate": 1.8054116292458265e-05, + "loss": 0.6713, + "step": 1568 + }, + { + "epoch": 0.027111556538567873, + "grad_norm": 2.118579158346302, + "learning_rate": 1.8065630397236616e-05, + "loss": 0.706, + "step": 1569 + }, + { + "epoch": 0.027128836051976777, + "grad_norm": 1.6162135401065525, + "learning_rate": 1.8077144502014968e-05, + "loss": 0.7638, + "step": 1570 + }, + { + "epoch": 0.027146115565385678, + "grad_norm": 1.7957864797971264, + "learning_rate": 1.8088658606793323e-05, + "loss": 0.773, + "step": 1571 + }, + { + "epoch": 0.027163395078794582, + "grad_norm": 1.7865338468878702, + "learning_rate": 1.8100172711571678e-05, + "loss": 0.9909, + "step": 1572 + }, + { + "epoch": 0.027180674592203483, + "grad_norm": 1.9865004734046452, + "learning_rate": 1.811168681635003e-05, + "loss": 0.7589, + "step": 1573 + }, + { + "epoch": 0.027197954105612387, + "grad_norm": 1.8369322218300435, + "learning_rate": 1.8123200921128384e-05, + "loss": 0.6512, + "step": 1574 + }, + { + "epoch": 0.027215233619021287, + "grad_norm": 2.161938506359431, + "learning_rate": 1.813471502590674e-05, + "loss": 1.0062, + "step": 1575 + }, + { + "epoch": 0.02723251313243019, + "grad_norm": 1.8275756874821232, + "learning_rate": 1.814622913068509e-05, + "loss": 0.8398, + "step": 1576 + }, + { + "epoch": 0.027249792645839092, + "grad_norm": 2.371964738633369, + "learning_rate": 1.8157743235463445e-05, + "loss": 1.0033, + "step": 1577 + }, + { + "epoch": 0.027267072159247997, + "grad_norm": 2.3406786647028643, + "learning_rate": 1.8169257340241796e-05, + "loss": 0.7707, + "step": 1578 + }, + { + "epoch": 0.027284351672656897, + "grad_norm": 2.316252909063399, + "learning_rate": 1.818077144502015e-05, + "loss": 1.0019, + "step": 1579 + }, + { + "epoch": 0.0273016311860658, + "grad_norm": 1.841333795798775, + "learning_rate": 1.8192285549798506e-05, + "loss": 0.6863, + "step": 1580 + }, + { + "epoch": 0.027318910699474702, + "grad_norm": 1.1561721410929258, + "learning_rate": 1.820379965457686e-05, + "loss": 0.5917, + "step": 1581 + }, + { + "epoch": 0.027336190212883606, + "grad_norm": 2.0859200792373715, + "learning_rate": 1.8215313759355212e-05, + "loss": 0.8632, + "step": 1582 + }, + { + "epoch": 0.027353469726292507, + "grad_norm": 1.567558626381117, + "learning_rate": 1.8226827864133564e-05, + "loss": 0.9665, + "step": 1583 + }, + { + "epoch": 0.02737074923970141, + "grad_norm": 1.9753359167180031, + "learning_rate": 1.823834196891192e-05, + "loss": 0.9109, + "step": 1584 + }, + { + "epoch": 0.027388028753110312, + "grad_norm": 2.001015817900782, + "learning_rate": 1.8249856073690273e-05, + "loss": 0.8332, + "step": 1585 + }, + { + "epoch": 0.027405308266519216, + "grad_norm": 1.7708880357866787, + "learning_rate": 1.8261370178468625e-05, + "loss": 0.8004, + "step": 1586 + }, + { + "epoch": 0.027422587779928117, + "grad_norm": 2.4645182416102838, + "learning_rate": 1.827288428324698e-05, + "loss": 0.8961, + "step": 1587 + }, + { + "epoch": 0.02743986729333702, + "grad_norm": 1.977967022736787, + "learning_rate": 1.8284398388025334e-05, + "loss": 1.0585, + "step": 1588 + }, + { + "epoch": 0.027457146806745922, + "grad_norm": 2.0299949503839017, + "learning_rate": 1.8295912492803686e-05, + "loss": 0.9065, + "step": 1589 + }, + { + "epoch": 0.027474426320154826, + "grad_norm": 1.8633482331546174, + "learning_rate": 1.8307426597582037e-05, + "loss": 0.8998, + "step": 1590 + }, + { + "epoch": 0.027491705833563727, + "grad_norm": 1.9105166968318303, + "learning_rate": 1.8318940702360392e-05, + "loss": 0.7116, + "step": 1591 + }, + { + "epoch": 0.02750898534697263, + "grad_norm": 2.037226236257001, + "learning_rate": 1.8330454807138747e-05, + "loss": 0.8887, + "step": 1592 + }, + { + "epoch": 0.02752626486038153, + "grad_norm": 1.7146969819871305, + "learning_rate": 1.83419689119171e-05, + "loss": 0.7193, + "step": 1593 + }, + { + "epoch": 0.027543544373790436, + "grad_norm": 2.0450303635119735, + "learning_rate": 1.8353483016695453e-05, + "loss": 0.699, + "step": 1594 + }, + { + "epoch": 0.027560823887199336, + "grad_norm": 1.8013419763479541, + "learning_rate": 1.8364997121473808e-05, + "loss": 0.996, + "step": 1595 + }, + { + "epoch": 0.027578103400608237, + "grad_norm": 1.8484353290452726, + "learning_rate": 1.837651122625216e-05, + "loss": 0.6411, + "step": 1596 + }, + { + "epoch": 0.02759538291401714, + "grad_norm": 1.849660278535941, + "learning_rate": 1.8388025331030514e-05, + "loss": 0.8446, + "step": 1597 + }, + { + "epoch": 0.027612662427426042, + "grad_norm": 2.144302116653354, + "learning_rate": 1.8399539435808865e-05, + "loss": 0.9208, + "step": 1598 + }, + { + "epoch": 0.027629941940834946, + "grad_norm": 1.4557620592912204, + "learning_rate": 1.841105354058722e-05, + "loss": 0.7697, + "step": 1599 + }, + { + "epoch": 0.027647221454243847, + "grad_norm": 2.1749240004546366, + "learning_rate": 1.8422567645365575e-05, + "loss": 0.9014, + "step": 1600 + }, + { + "epoch": 0.02766450096765275, + "grad_norm": 1.6251876891855424, + "learning_rate": 1.843408175014393e-05, + "loss": 0.9055, + "step": 1601 + }, + { + "epoch": 0.027681780481061652, + "grad_norm": 1.9570436512457554, + "learning_rate": 1.844559585492228e-05, + "loss": 0.8901, + "step": 1602 + }, + { + "epoch": 0.027699059994470556, + "grad_norm": 1.4919097882703554, + "learning_rate": 1.8457109959700633e-05, + "loss": 0.8995, + "step": 1603 + }, + { + "epoch": 0.027716339507879457, + "grad_norm": 1.0762639665576559, + "learning_rate": 1.8468624064478988e-05, + "loss": 0.8027, + "step": 1604 + }, + { + "epoch": 0.02773361902128836, + "grad_norm": 2.177370986301619, + "learning_rate": 1.8480138169257342e-05, + "loss": 0.8102, + "step": 1605 + }, + { + "epoch": 0.02775089853469726, + "grad_norm": 2.0744934386939478, + "learning_rate": 1.8491652274035697e-05, + "loss": 1.051, + "step": 1606 + }, + { + "epoch": 0.027768178048106166, + "grad_norm": 1.6331240819024995, + "learning_rate": 1.850316637881405e-05, + "loss": 0.696, + "step": 1607 + }, + { + "epoch": 0.027785457561515067, + "grad_norm": 1.9520137509359485, + "learning_rate": 1.8514680483592403e-05, + "loss": 1.1857, + "step": 1608 + }, + { + "epoch": 0.02780273707492397, + "grad_norm": 2.012099644377455, + "learning_rate": 1.8526194588370755e-05, + "loss": 0.6751, + "step": 1609 + }, + { + "epoch": 0.02782001658833287, + "grad_norm": 1.7543618619121009, + "learning_rate": 1.853770869314911e-05, + "loss": 0.7417, + "step": 1610 + }, + { + "epoch": 0.027837296101741776, + "grad_norm": 1.5985264696596393, + "learning_rate": 1.854922279792746e-05, + "loss": 0.7586, + "step": 1611 + }, + { + "epoch": 0.027854575615150676, + "grad_norm": 2.080664013335671, + "learning_rate": 1.8560736902705816e-05, + "loss": 1.0514, + "step": 1612 + }, + { + "epoch": 0.02787185512855958, + "grad_norm": 1.6750038243983307, + "learning_rate": 1.857225100748417e-05, + "loss": 0.6766, + "step": 1613 + }, + { + "epoch": 0.02788913464196848, + "grad_norm": 1.5667982383862487, + "learning_rate": 1.8583765112262526e-05, + "loss": 0.5968, + "step": 1614 + }, + { + "epoch": 0.027906414155377385, + "grad_norm": 1.8161196175447172, + "learning_rate": 1.8595279217040877e-05, + "loss": 0.8458, + "step": 1615 + }, + { + "epoch": 0.027923693668786286, + "grad_norm": 1.2156925407750525, + "learning_rate": 1.860679332181923e-05, + "loss": 0.67, + "step": 1616 + }, + { + "epoch": 0.02794097318219519, + "grad_norm": 1.5194339006778643, + "learning_rate": 1.8618307426597583e-05, + "loss": 0.9384, + "step": 1617 + }, + { + "epoch": 0.02795825269560409, + "grad_norm": 1.6704749362300388, + "learning_rate": 1.8629821531375938e-05, + "loss": 0.7555, + "step": 1618 + }, + { + "epoch": 0.027975532209012995, + "grad_norm": 2.1098969542849764, + "learning_rate": 1.864133563615429e-05, + "loss": 0.9144, + "step": 1619 + }, + { + "epoch": 0.027992811722421896, + "grad_norm": 1.4257128036412503, + "learning_rate": 1.8652849740932644e-05, + "loss": 0.6049, + "step": 1620 + }, + { + "epoch": 0.0280100912358308, + "grad_norm": 1.8176251141137403, + "learning_rate": 1.8664363845711e-05, + "loss": 0.7326, + "step": 1621 + }, + { + "epoch": 0.0280273707492397, + "grad_norm": 1.8201206019373832, + "learning_rate": 1.867587795048935e-05, + "loss": 0.9105, + "step": 1622 + }, + { + "epoch": 0.028044650262648605, + "grad_norm": 1.9915003355818384, + "learning_rate": 1.8687392055267702e-05, + "loss": 0.7894, + "step": 1623 + }, + { + "epoch": 0.028061929776057506, + "grad_norm": 1.74154675189759, + "learning_rate": 1.8698906160046057e-05, + "loss": 0.699, + "step": 1624 + }, + { + "epoch": 0.02807920928946641, + "grad_norm": 2.058618381601641, + "learning_rate": 1.871042026482441e-05, + "loss": 0.736, + "step": 1625 + }, + { + "epoch": 0.02809648880287531, + "grad_norm": 1.6226311035993672, + "learning_rate": 1.8721934369602766e-05, + "loss": 0.7954, + "step": 1626 + }, + { + "epoch": 0.028113768316284215, + "grad_norm": 1.741268830539374, + "learning_rate": 1.8733448474381118e-05, + "loss": 0.6134, + "step": 1627 + }, + { + "epoch": 0.028131047829693116, + "grad_norm": 1.838378812495015, + "learning_rate": 1.8744962579159473e-05, + "loss": 0.8897, + "step": 1628 + }, + { + "epoch": 0.02814832734310202, + "grad_norm": 1.617083754401382, + "learning_rate": 1.8756476683937824e-05, + "loss": 0.9725, + "step": 1629 + }, + { + "epoch": 0.02816560685651092, + "grad_norm": 2.0466572726485617, + "learning_rate": 1.876799078871618e-05, + "loss": 0.9656, + "step": 1630 + }, + { + "epoch": 0.028182886369919825, + "grad_norm": 1.4502649027263625, + "learning_rate": 1.8779504893494534e-05, + "loss": 0.6864, + "step": 1631 + }, + { + "epoch": 0.028200165883328725, + "grad_norm": 1.7770139630678254, + "learning_rate": 1.8791018998272885e-05, + "loss": 0.8992, + "step": 1632 + }, + { + "epoch": 0.02821744539673763, + "grad_norm": 2.3840839439317247, + "learning_rate": 1.880253310305124e-05, + "loss": 1.2029, + "step": 1633 + }, + { + "epoch": 0.02823472491014653, + "grad_norm": 1.8179448612422575, + "learning_rate": 1.8814047207829595e-05, + "loss": 0.6088, + "step": 1634 + }, + { + "epoch": 0.02825200442355543, + "grad_norm": 2.17045276214015, + "learning_rate": 1.8825561312607946e-05, + "loss": 1.0704, + "step": 1635 + }, + { + "epoch": 0.028269283936964335, + "grad_norm": 1.434428217140989, + "learning_rate": 1.8837075417386298e-05, + "loss": 0.7624, + "step": 1636 + }, + { + "epoch": 0.028286563450373236, + "grad_norm": 1.7678094597968121, + "learning_rate": 1.8848589522164652e-05, + "loss": 1.04, + "step": 1637 + }, + { + "epoch": 0.02830384296378214, + "grad_norm": 1.9647830016004013, + "learning_rate": 1.8860103626943007e-05, + "loss": 0.8287, + "step": 1638 + }, + { + "epoch": 0.02832112247719104, + "grad_norm": 1.9469655124654475, + "learning_rate": 1.8871617731721362e-05, + "loss": 0.8218, + "step": 1639 + }, + { + "epoch": 0.028338401990599945, + "grad_norm": 1.5586401198496123, + "learning_rate": 1.8883131836499713e-05, + "loss": 0.9253, + "step": 1640 + }, + { + "epoch": 0.028355681504008846, + "grad_norm": 1.9926102666891734, + "learning_rate": 1.8894645941278068e-05, + "loss": 0.9642, + "step": 1641 + }, + { + "epoch": 0.02837296101741775, + "grad_norm": 1.6380214067563241, + "learning_rate": 1.890616004605642e-05, + "loss": 0.864, + "step": 1642 + }, + { + "epoch": 0.02839024053082665, + "grad_norm": 1.7223481363957318, + "learning_rate": 1.8917674150834774e-05, + "loss": 0.749, + "step": 1643 + }, + { + "epoch": 0.028407520044235555, + "grad_norm": 1.7738848502644065, + "learning_rate": 1.8929188255613126e-05, + "loss": 0.8945, + "step": 1644 + }, + { + "epoch": 0.028424799557644456, + "grad_norm": 1.998124817170899, + "learning_rate": 1.894070236039148e-05, + "loss": 0.7942, + "step": 1645 + }, + { + "epoch": 0.02844207907105336, + "grad_norm": 1.7714228223693174, + "learning_rate": 1.8952216465169836e-05, + "loss": 0.8101, + "step": 1646 + }, + { + "epoch": 0.02845935858446226, + "grad_norm": 1.9420085233886266, + "learning_rate": 1.896373056994819e-05, + "loss": 0.9051, + "step": 1647 + }, + { + "epoch": 0.028476638097871165, + "grad_norm": 1.9624883692022839, + "learning_rate": 1.8975244674726542e-05, + "loss": 0.8617, + "step": 1648 + }, + { + "epoch": 0.028493917611280065, + "grad_norm": 1.6939843433274004, + "learning_rate": 1.8986758779504893e-05, + "loss": 0.7172, + "step": 1649 + }, + { + "epoch": 0.02851119712468897, + "grad_norm": 2.073787036116492, + "learning_rate": 1.8998272884283248e-05, + "loss": 0.9676, + "step": 1650 + }, + { + "epoch": 0.02852847663809787, + "grad_norm": 2.018869960521773, + "learning_rate": 1.9009786989061603e-05, + "loss": 0.7928, + "step": 1651 + }, + { + "epoch": 0.028545756151506774, + "grad_norm": 2.0704086957053187, + "learning_rate": 1.9021301093839954e-05, + "loss": 0.9748, + "step": 1652 + }, + { + "epoch": 0.028563035664915675, + "grad_norm": 2.209550500413756, + "learning_rate": 1.903281519861831e-05, + "loss": 0.9475, + "step": 1653 + }, + { + "epoch": 0.02858031517832458, + "grad_norm": 1.2760548879506228, + "learning_rate": 1.9044329303396664e-05, + "loss": 0.5736, + "step": 1654 + }, + { + "epoch": 0.02859759469173348, + "grad_norm": 1.8335359363630368, + "learning_rate": 1.9055843408175015e-05, + "loss": 0.8586, + "step": 1655 + }, + { + "epoch": 0.028614874205142384, + "grad_norm": 1.9328365272362291, + "learning_rate": 1.9067357512953367e-05, + "loss": 1.0575, + "step": 1656 + }, + { + "epoch": 0.028632153718551285, + "grad_norm": 1.6649800996662165, + "learning_rate": 1.907887161773172e-05, + "loss": 0.8481, + "step": 1657 + }, + { + "epoch": 0.02864943323196019, + "grad_norm": 1.9536467387381984, + "learning_rate": 1.9090385722510076e-05, + "loss": 0.903, + "step": 1658 + }, + { + "epoch": 0.02866671274536909, + "grad_norm": 1.620747142681916, + "learning_rate": 1.910189982728843e-05, + "loss": 0.6773, + "step": 1659 + }, + { + "epoch": 0.028683992258777994, + "grad_norm": 1.9092464086666678, + "learning_rate": 1.9113413932066786e-05, + "loss": 0.8625, + "step": 1660 + }, + { + "epoch": 0.028701271772186895, + "grad_norm": 1.3987351572686384, + "learning_rate": 1.9124928036845137e-05, + "loss": 0.8028, + "step": 1661 + }, + { + "epoch": 0.0287185512855958, + "grad_norm": 2.039940305321588, + "learning_rate": 1.913644214162349e-05, + "loss": 0.7866, + "step": 1662 + }, + { + "epoch": 0.0287358307990047, + "grad_norm": 1.4896453722163263, + "learning_rate": 1.9147956246401844e-05, + "loss": 0.9314, + "step": 1663 + }, + { + "epoch": 0.028753110312413604, + "grad_norm": 2.4951792252443274, + "learning_rate": 1.91594703511802e-05, + "loss": 1.0953, + "step": 1664 + }, + { + "epoch": 0.028770389825822505, + "grad_norm": 1.711993573206121, + "learning_rate": 1.917098445595855e-05, + "loss": 0.7444, + "step": 1665 + }, + { + "epoch": 0.02878766933923141, + "grad_norm": 1.9500492627617267, + "learning_rate": 1.9182498560736905e-05, + "loss": 0.8459, + "step": 1666 + }, + { + "epoch": 0.02880494885264031, + "grad_norm": 1.7148298144688585, + "learning_rate": 1.919401266551526e-05, + "loss": 0.9944, + "step": 1667 + }, + { + "epoch": 0.028822228366049214, + "grad_norm": 2.3349359214644214, + "learning_rate": 1.920552677029361e-05, + "loss": 0.6774, + "step": 1668 + }, + { + "epoch": 0.028839507879458114, + "grad_norm": 1.4707453878786574, + "learning_rate": 1.9217040875071962e-05, + "loss": 0.7047, + "step": 1669 + }, + { + "epoch": 0.02885678739286702, + "grad_norm": 3.003173707534881, + "learning_rate": 1.9228554979850317e-05, + "loss": 0.7695, + "step": 1670 + }, + { + "epoch": 0.02887406690627592, + "grad_norm": 2.0558650804418757, + "learning_rate": 1.9240069084628672e-05, + "loss": 0.8124, + "step": 1671 + }, + { + "epoch": 0.028891346419684823, + "grad_norm": 1.6136095180788834, + "learning_rate": 1.9251583189407027e-05, + "loss": 0.7653, + "step": 1672 + }, + { + "epoch": 0.028908625933093724, + "grad_norm": 1.8261216357803653, + "learning_rate": 1.9263097294185378e-05, + "loss": 1.045, + "step": 1673 + }, + { + "epoch": 0.028925905446502625, + "grad_norm": 1.7786651220871927, + "learning_rate": 1.9274611398963733e-05, + "loss": 1.0468, + "step": 1674 + }, + { + "epoch": 0.02894318495991153, + "grad_norm": 2.525129580793508, + "learning_rate": 1.9286125503742084e-05, + "loss": 1.1342, + "step": 1675 + }, + { + "epoch": 0.02896046447332043, + "grad_norm": 1.9505755854286948, + "learning_rate": 1.929763960852044e-05, + "loss": 0.8449, + "step": 1676 + }, + { + "epoch": 0.028977743986729334, + "grad_norm": 1.3592074706317046, + "learning_rate": 1.930915371329879e-05, + "loss": 0.5811, + "step": 1677 + }, + { + "epoch": 0.028995023500138235, + "grad_norm": 2.0771269758755957, + "learning_rate": 1.9320667818077146e-05, + "loss": 0.6472, + "step": 1678 + }, + { + "epoch": 0.02901230301354714, + "grad_norm": 1.6048823317250884, + "learning_rate": 1.93321819228555e-05, + "loss": 1.0104, + "step": 1679 + }, + { + "epoch": 0.02902958252695604, + "grad_norm": 1.882271034118414, + "learning_rate": 1.9343696027633855e-05, + "loss": 0.7644, + "step": 1680 + }, + { + "epoch": 0.029046862040364944, + "grad_norm": 1.4454853125214235, + "learning_rate": 1.9355210132412207e-05, + "loss": 0.7514, + "step": 1681 + }, + { + "epoch": 0.029064141553773844, + "grad_norm": 1.8226095046107005, + "learning_rate": 1.9366724237190558e-05, + "loss": 0.7448, + "step": 1682 + }, + { + "epoch": 0.02908142106718275, + "grad_norm": 1.426803802744846, + "learning_rate": 1.9378238341968913e-05, + "loss": 1.0555, + "step": 1683 + }, + { + "epoch": 0.02909870058059165, + "grad_norm": 1.0419677965057317, + "learning_rate": 1.9389752446747268e-05, + "loss": 0.5357, + "step": 1684 + }, + { + "epoch": 0.029115980094000554, + "grad_norm": 1.6071534927534046, + "learning_rate": 1.9401266551525622e-05, + "loss": 0.7629, + "step": 1685 + }, + { + "epoch": 0.029133259607409454, + "grad_norm": 1.494904679995581, + "learning_rate": 1.9412780656303974e-05, + "loss": 0.6559, + "step": 1686 + }, + { + "epoch": 0.02915053912081836, + "grad_norm": 1.8673704554157828, + "learning_rate": 1.942429476108233e-05, + "loss": 0.999, + "step": 1687 + }, + { + "epoch": 0.02916781863422726, + "grad_norm": 1.555467199759657, + "learning_rate": 1.943580886586068e-05, + "loss": 0.8359, + "step": 1688 + }, + { + "epoch": 0.029185098147636163, + "grad_norm": 1.5805932319608718, + "learning_rate": 1.9447322970639035e-05, + "loss": 0.3977, + "step": 1689 + }, + { + "epoch": 0.029202377661045064, + "grad_norm": 1.6097188326324576, + "learning_rate": 1.9458837075417386e-05, + "loss": 0.843, + "step": 1690 + }, + { + "epoch": 0.029219657174453968, + "grad_norm": 1.9030032116835238, + "learning_rate": 1.947035118019574e-05, + "loss": 1.0038, + "step": 1691 + }, + { + "epoch": 0.02923693668786287, + "grad_norm": 1.6467732722705004, + "learning_rate": 1.9481865284974096e-05, + "loss": 0.9907, + "step": 1692 + }, + { + "epoch": 0.029254216201271773, + "grad_norm": 2.242164054652387, + "learning_rate": 1.949337938975245e-05, + "loss": 1.0179, + "step": 1693 + }, + { + "epoch": 0.029271495714680674, + "grad_norm": 1.8801744906207456, + "learning_rate": 1.9504893494530802e-05, + "loss": 0.8887, + "step": 1694 + }, + { + "epoch": 0.029288775228089578, + "grad_norm": 1.3973283226383593, + "learning_rate": 1.9516407599309154e-05, + "loss": 0.7348, + "step": 1695 + }, + { + "epoch": 0.02930605474149848, + "grad_norm": 1.291907168392238, + "learning_rate": 1.952792170408751e-05, + "loss": 0.4273, + "step": 1696 + }, + { + "epoch": 0.029323334254907383, + "grad_norm": 1.6940484517723264, + "learning_rate": 1.9539435808865863e-05, + "loss": 0.7683, + "step": 1697 + }, + { + "epoch": 0.029340613768316284, + "grad_norm": 1.5466904830929888, + "learning_rate": 1.9550949913644215e-05, + "loss": 0.7741, + "step": 1698 + }, + { + "epoch": 0.029357893281725188, + "grad_norm": 1.8855058222665442, + "learning_rate": 1.956246401842257e-05, + "loss": 1.0973, + "step": 1699 + }, + { + "epoch": 0.02937517279513409, + "grad_norm": 1.8588891846311326, + "learning_rate": 1.9573978123200924e-05, + "loss": 0.9959, + "step": 1700 + }, + { + "epoch": 0.029392452308542993, + "grad_norm": 1.7935566572991337, + "learning_rate": 1.9585492227979276e-05, + "loss": 0.9045, + "step": 1701 + }, + { + "epoch": 0.029409731821951893, + "grad_norm": 1.1354442050239677, + "learning_rate": 1.9597006332757627e-05, + "loss": 0.9189, + "step": 1702 + }, + { + "epoch": 0.029427011335360798, + "grad_norm": 1.4688686308033863, + "learning_rate": 1.9608520437535982e-05, + "loss": 0.8241, + "step": 1703 + }, + { + "epoch": 0.0294442908487697, + "grad_norm": 1.9609962455916956, + "learning_rate": 1.9620034542314337e-05, + "loss": 0.9903, + "step": 1704 + }, + { + "epoch": 0.029461570362178603, + "grad_norm": 1.8494960933187627, + "learning_rate": 1.963154864709269e-05, + "loss": 1.0309, + "step": 1705 + }, + { + "epoch": 0.029478849875587503, + "grad_norm": 1.4409932415912494, + "learning_rate": 1.9643062751871043e-05, + "loss": 0.7591, + "step": 1706 + }, + { + "epoch": 0.029496129388996407, + "grad_norm": 1.6511192501399383, + "learning_rate": 1.9654576856649398e-05, + "loss": 0.6644, + "step": 1707 + }, + { + "epoch": 0.029513408902405308, + "grad_norm": 1.7310554169477237, + "learning_rate": 1.966609096142775e-05, + "loss": 0.9297, + "step": 1708 + }, + { + "epoch": 0.029530688415814212, + "grad_norm": 1.9811861330156106, + "learning_rate": 1.9677605066206104e-05, + "loss": 1.0721, + "step": 1709 + }, + { + "epoch": 0.029547967929223113, + "grad_norm": 1.6783448067478206, + "learning_rate": 1.9689119170984456e-05, + "loss": 0.7726, + "step": 1710 + }, + { + "epoch": 0.029565247442632017, + "grad_norm": 1.909361941528062, + "learning_rate": 1.970063327576281e-05, + "loss": 0.8975, + "step": 1711 + }, + { + "epoch": 0.029582526956040918, + "grad_norm": 2.1601296874819425, + "learning_rate": 1.9712147380541165e-05, + "loss": 0.868, + "step": 1712 + }, + { + "epoch": 0.02959980646944982, + "grad_norm": 1.6161624542945554, + "learning_rate": 1.972366148531952e-05, + "loss": 0.7802, + "step": 1713 + }, + { + "epoch": 0.029617085982858723, + "grad_norm": 1.6642922226932353, + "learning_rate": 1.973517559009787e-05, + "loss": 0.7328, + "step": 1714 + }, + { + "epoch": 0.029634365496267624, + "grad_norm": 1.3256340916485534, + "learning_rate": 1.9746689694876223e-05, + "loss": 0.9359, + "step": 1715 + }, + { + "epoch": 0.029651645009676528, + "grad_norm": 1.886058846031852, + "learning_rate": 1.9758203799654578e-05, + "loss": 0.8634, + "step": 1716 + }, + { + "epoch": 0.02966892452308543, + "grad_norm": 1.7177775419463301, + "learning_rate": 1.9769717904432932e-05, + "loss": 1.1036, + "step": 1717 + }, + { + "epoch": 0.029686204036494333, + "grad_norm": 1.5338352219578566, + "learning_rate": 1.9781232009211287e-05, + "loss": 0.6976, + "step": 1718 + }, + { + "epoch": 0.029703483549903233, + "grad_norm": 1.6038925257759131, + "learning_rate": 1.979274611398964e-05, + "loss": 0.9045, + "step": 1719 + }, + { + "epoch": 0.029720763063312138, + "grad_norm": 1.9154013711597315, + "learning_rate": 1.9804260218767994e-05, + "loss": 0.8431, + "step": 1720 + }, + { + "epoch": 0.029738042576721038, + "grad_norm": 1.8814808473140403, + "learning_rate": 1.9815774323546345e-05, + "loss": 0.8088, + "step": 1721 + }, + { + "epoch": 0.029755322090129942, + "grad_norm": 1.8071587320667741, + "learning_rate": 1.98272884283247e-05, + "loss": 0.8735, + "step": 1722 + }, + { + "epoch": 0.029772601603538843, + "grad_norm": 1.2057089583416445, + "learning_rate": 1.983880253310305e-05, + "loss": 0.5726, + "step": 1723 + }, + { + "epoch": 0.029789881116947747, + "grad_norm": 1.9279354208000294, + "learning_rate": 1.9850316637881406e-05, + "loss": 0.8513, + "step": 1724 + }, + { + "epoch": 0.029807160630356648, + "grad_norm": 1.430170303732994, + "learning_rate": 1.986183074265976e-05, + "loss": 0.7786, + "step": 1725 + }, + { + "epoch": 0.029824440143765552, + "grad_norm": 2.0453838730308567, + "learning_rate": 1.9873344847438116e-05, + "loss": 0.9425, + "step": 1726 + }, + { + "epoch": 0.029841719657174453, + "grad_norm": 2.181182610595479, + "learning_rate": 1.9884858952216467e-05, + "loss": 0.9444, + "step": 1727 + }, + { + "epoch": 0.029858999170583357, + "grad_norm": 1.5176292112304384, + "learning_rate": 1.989637305699482e-05, + "loss": 0.9776, + "step": 1728 + }, + { + "epoch": 0.029876278683992258, + "grad_norm": 1.4363823710763688, + "learning_rate": 1.9907887161773173e-05, + "loss": 0.7972, + "step": 1729 + }, + { + "epoch": 0.029893558197401162, + "grad_norm": 1.6202369263591532, + "learning_rate": 1.9919401266551528e-05, + "loss": 0.6582, + "step": 1730 + }, + { + "epoch": 0.029910837710810063, + "grad_norm": 1.6165324888667132, + "learning_rate": 1.993091537132988e-05, + "loss": 0.65, + "step": 1731 + }, + { + "epoch": 0.029928117224218967, + "grad_norm": 1.7319604849509425, + "learning_rate": 1.9942429476108234e-05, + "loss": 0.8585, + "step": 1732 + }, + { + "epoch": 0.029945396737627868, + "grad_norm": 2.599153214593955, + "learning_rate": 1.995394358088659e-05, + "loss": 1.0065, + "step": 1733 + }, + { + "epoch": 0.029962676251036772, + "grad_norm": 1.7353897074499811, + "learning_rate": 1.996545768566494e-05, + "loss": 0.6578, + "step": 1734 + }, + { + "epoch": 0.029979955764445673, + "grad_norm": 1.8015999918639725, + "learning_rate": 1.9976971790443292e-05, + "loss": 0.8836, + "step": 1735 + }, + { + "epoch": 0.029997235277854577, + "grad_norm": 1.354907607862614, + "learning_rate": 1.9988485895221647e-05, + "loss": 0.9879, + "step": 1736 + }, + { + "epoch": 0.030014514791263477, + "grad_norm": 1.682501274116266, + "learning_rate": 2e-05, + "loss": 0.8066, + "step": 1737 + }, + { + "epoch": 0.03003179430467238, + "grad_norm": 1.8748507664948846, + "learning_rate": 1.9999999984339623e-05, + "loss": 0.8105, + "step": 1738 + }, + { + "epoch": 0.030049073818081282, + "grad_norm": 1.1908212093567647, + "learning_rate": 1.9999999937358483e-05, + "loss": 0.749, + "step": 1739 + }, + { + "epoch": 0.030066353331490187, + "grad_norm": 1.2032009047249832, + "learning_rate": 1.9999999859056582e-05, + "loss": 0.8378, + "step": 1740 + }, + { + "epoch": 0.030083632844899087, + "grad_norm": 1.7064652702483625, + "learning_rate": 1.9999999749433924e-05, + "loss": 0.9654, + "step": 1741 + }, + { + "epoch": 0.03010091235830799, + "grad_norm": 1.5567268053273324, + "learning_rate": 1.9999999608490504e-05, + "loss": 0.7822, + "step": 1742 + }, + { + "epoch": 0.030118191871716892, + "grad_norm": 1.594216138714242, + "learning_rate": 1.9999999436226327e-05, + "loss": 1.0344, + "step": 1743 + }, + { + "epoch": 0.030135471385125796, + "grad_norm": 1.5354345825386395, + "learning_rate": 1.9999999232641393e-05, + "loss": 0.9433, + "step": 1744 + }, + { + "epoch": 0.030152750898534697, + "grad_norm": 1.7164273416462363, + "learning_rate": 1.99999989977357e-05, + "loss": 0.9955, + "step": 1745 + }, + { + "epoch": 0.0301700304119436, + "grad_norm": 1.6114303367707297, + "learning_rate": 1.999999873150925e-05, + "loss": 1.0486, + "step": 1746 + }, + { + "epoch": 0.030187309925352502, + "grad_norm": 1.5752585596130975, + "learning_rate": 1.9999998433962046e-05, + "loss": 0.5431, + "step": 1747 + }, + { + "epoch": 0.030204589438761406, + "grad_norm": 1.8450465319833669, + "learning_rate": 1.9999998105094084e-05, + "loss": 0.8395, + "step": 1748 + }, + { + "epoch": 0.030221868952170307, + "grad_norm": 1.8184763078749253, + "learning_rate": 1.9999997744905365e-05, + "loss": 0.806, + "step": 1749 + }, + { + "epoch": 0.03023914846557921, + "grad_norm": 2.13148588611815, + "learning_rate": 1.99999973533959e-05, + "loss": 0.8749, + "step": 1750 + }, + { + "epoch": 0.03025642797898811, + "grad_norm": 1.5621816693291826, + "learning_rate": 1.999999693056568e-05, + "loss": 0.9048, + "step": 1751 + }, + { + "epoch": 0.030273707492397012, + "grad_norm": 3.346225649515611, + "learning_rate": 1.9999996476414712e-05, + "loss": 1.0149, + "step": 1752 + }, + { + "epoch": 0.030290987005805917, + "grad_norm": 2.0349844393820624, + "learning_rate": 1.9999995990942992e-05, + "loss": 0.8768, + "step": 1753 + }, + { + "epoch": 0.030308266519214817, + "grad_norm": 1.705753810046606, + "learning_rate": 1.9999995474150526e-05, + "loss": 0.5751, + "step": 1754 + }, + { + "epoch": 0.03032554603262372, + "grad_norm": 1.9205993845314802, + "learning_rate": 1.9999994926037315e-05, + "loss": 0.6681, + "step": 1755 + }, + { + "epoch": 0.030342825546032622, + "grad_norm": 1.8644708376586367, + "learning_rate": 1.9999994346603356e-05, + "loss": 0.9962, + "step": 1756 + }, + { + "epoch": 0.030360105059441526, + "grad_norm": 1.5906355160702887, + "learning_rate": 1.999999373584866e-05, + "loss": 0.7794, + "step": 1757 + }, + { + "epoch": 0.030377384572850427, + "grad_norm": 1.9768417205925397, + "learning_rate": 1.999999309377322e-05, + "loss": 0.7925, + "step": 1758 + }, + { + "epoch": 0.03039466408625933, + "grad_norm": 1.6713044967295747, + "learning_rate": 1.9999992420377044e-05, + "loss": 0.5989, + "step": 1759 + }, + { + "epoch": 0.030411943599668232, + "grad_norm": 1.480556776652388, + "learning_rate": 1.999999171566013e-05, + "loss": 0.7534, + "step": 1760 + }, + { + "epoch": 0.030429223113077136, + "grad_norm": 1.7349289469969498, + "learning_rate": 1.9999990979622485e-05, + "loss": 0.8578, + "step": 1761 + }, + { + "epoch": 0.030446502626486037, + "grad_norm": 1.7842577188524782, + "learning_rate": 1.9999990212264103e-05, + "loss": 0.9335, + "step": 1762 + }, + { + "epoch": 0.03046378213989494, + "grad_norm": 1.7339627844461687, + "learning_rate": 1.9999989413584994e-05, + "loss": 0.949, + "step": 1763 + }, + { + "epoch": 0.030481061653303842, + "grad_norm": 1.3392931129682568, + "learning_rate": 1.999998858358516e-05, + "loss": 0.7787, + "step": 1764 + }, + { + "epoch": 0.030498341166712746, + "grad_norm": 1.6118004501288117, + "learning_rate": 1.9999987722264604e-05, + "loss": 0.9167, + "step": 1765 + }, + { + "epoch": 0.030515620680121647, + "grad_norm": 2.0243905623163667, + "learning_rate": 1.999998682962332e-05, + "loss": 0.9422, + "step": 1766 + }, + { + "epoch": 0.03053290019353055, + "grad_norm": 2.0368415344131368, + "learning_rate": 1.9999985905661323e-05, + "loss": 0.8151, + "step": 1767 + }, + { + "epoch": 0.03055017970693945, + "grad_norm": 1.3938348501636073, + "learning_rate": 1.9999984950378605e-05, + "loss": 0.9193, + "step": 1768 + }, + { + "epoch": 0.030567459220348356, + "grad_norm": 1.4139602208707431, + "learning_rate": 1.9999983963775177e-05, + "loss": 0.6877, + "step": 1769 + }, + { + "epoch": 0.030584738733757257, + "grad_norm": 1.408287260191714, + "learning_rate": 1.999998294585104e-05, + "loss": 0.8416, + "step": 1770 + }, + { + "epoch": 0.03060201824716616, + "grad_norm": 1.6900735390807546, + "learning_rate": 1.9999981896606194e-05, + "loss": 0.8004, + "step": 1771 + }, + { + "epoch": 0.03061929776057506, + "grad_norm": 1.6298511987884117, + "learning_rate": 1.999998081604065e-05, + "loss": 0.8018, + "step": 1772 + }, + { + "epoch": 0.030636577273983966, + "grad_norm": 1.8451322551248512, + "learning_rate": 1.99999797041544e-05, + "loss": 0.8999, + "step": 1773 + }, + { + "epoch": 0.030653856787392866, + "grad_norm": 1.7673997161587605, + "learning_rate": 1.9999978560947456e-05, + "loss": 0.7262, + "step": 1774 + }, + { + "epoch": 0.03067113630080177, + "grad_norm": 1.9134292270033835, + "learning_rate": 1.999997738641982e-05, + "loss": 0.8504, + "step": 1775 + }, + { + "epoch": 0.03068841581421067, + "grad_norm": 1.7812175710254587, + "learning_rate": 1.9999976180571493e-05, + "loss": 1.0498, + "step": 1776 + }, + { + "epoch": 0.030705695327619575, + "grad_norm": 1.8287513158782094, + "learning_rate": 1.999997494340248e-05, + "loss": 0.9431, + "step": 1777 + }, + { + "epoch": 0.030722974841028476, + "grad_norm": 1.4984688878629149, + "learning_rate": 1.999997367491279e-05, + "loss": 0.641, + "step": 1778 + }, + { + "epoch": 0.03074025435443738, + "grad_norm": 1.401135281588002, + "learning_rate": 1.999997237510242e-05, + "loss": 0.9226, + "step": 1779 + }, + { + "epoch": 0.03075753386784628, + "grad_norm": 1.9259971457290819, + "learning_rate": 1.9999971043971373e-05, + "loss": 1.0298, + "step": 1780 + }, + { + "epoch": 0.030774813381255185, + "grad_norm": 1.3572398036082995, + "learning_rate": 1.9999969681519664e-05, + "loss": 0.7434, + "step": 1781 + }, + { + "epoch": 0.030792092894664086, + "grad_norm": 1.2799946264528865, + "learning_rate": 1.999996828774728e-05, + "loss": 0.6469, + "step": 1782 + }, + { + "epoch": 0.03080937240807299, + "grad_norm": 1.7599809671617705, + "learning_rate": 1.9999966862654243e-05, + "loss": 0.6773, + "step": 1783 + }, + { + "epoch": 0.03082665192148189, + "grad_norm": 1.3330044217939359, + "learning_rate": 1.9999965406240548e-05, + "loss": 0.7555, + "step": 1784 + }, + { + "epoch": 0.030843931434890795, + "grad_norm": 1.1848708960556278, + "learning_rate": 1.99999639185062e-05, + "loss": 0.7922, + "step": 1785 + }, + { + "epoch": 0.030861210948299696, + "grad_norm": 1.4799661281771719, + "learning_rate": 1.9999962399451208e-05, + "loss": 0.6236, + "step": 1786 + }, + { + "epoch": 0.0308784904617086, + "grad_norm": 1.4448770974484564, + "learning_rate": 1.9999960849075573e-05, + "loss": 0.8461, + "step": 1787 + }, + { + "epoch": 0.0308957699751175, + "grad_norm": 1.6516925133448086, + "learning_rate": 1.99999592673793e-05, + "loss": 0.8681, + "step": 1788 + }, + { + "epoch": 0.030913049488526405, + "grad_norm": 1.1300990379841558, + "learning_rate": 1.9999957654362395e-05, + "loss": 0.4907, + "step": 1789 + }, + { + "epoch": 0.030930329001935306, + "grad_norm": 1.5326003701493038, + "learning_rate": 1.999995601002486e-05, + "loss": 0.5029, + "step": 1790 + }, + { + "epoch": 0.030947608515344206, + "grad_norm": 1.4708572599025334, + "learning_rate": 1.9999954334366706e-05, + "loss": 0.8295, + "step": 1791 + }, + { + "epoch": 0.03096488802875311, + "grad_norm": 1.5353695203499287, + "learning_rate": 1.9999952627387933e-05, + "loss": 0.9891, + "step": 1792 + }, + { + "epoch": 0.03098216754216201, + "grad_norm": 1.545170790064434, + "learning_rate": 1.9999950889088548e-05, + "loss": 0.6872, + "step": 1793 + }, + { + "epoch": 0.030999447055570915, + "grad_norm": 1.55037535545066, + "learning_rate": 1.999994911946856e-05, + "loss": 0.6983, + "step": 1794 + }, + { + "epoch": 0.031016726568979816, + "grad_norm": 1.7969850827561316, + "learning_rate": 1.999994731852797e-05, + "loss": 0.9867, + "step": 1795 + }, + { + "epoch": 0.03103400608238872, + "grad_norm": 2.031969543966859, + "learning_rate": 1.9999945486266786e-05, + "loss": 0.9082, + "step": 1796 + }, + { + "epoch": 0.03105128559579762, + "grad_norm": 2.0288111806625575, + "learning_rate": 1.9999943622685008e-05, + "loss": 0.946, + "step": 1797 + }, + { + "epoch": 0.031068565109206525, + "grad_norm": 1.5625200389347091, + "learning_rate": 1.999994172778265e-05, + "loss": 0.8824, + "step": 1798 + }, + { + "epoch": 0.031085844622615426, + "grad_norm": 1.5854871091133205, + "learning_rate": 1.9999939801559717e-05, + "loss": 0.8925, + "step": 1799 + }, + { + "epoch": 0.03110312413602433, + "grad_norm": 1.58668341792514, + "learning_rate": 1.999993784401621e-05, + "loss": 0.5476, + "step": 1800 + }, + { + "epoch": 0.03112040364943323, + "grad_norm": 1.4409921500741583, + "learning_rate": 1.9999935855152138e-05, + "loss": 0.7666, + "step": 1801 + }, + { + "epoch": 0.031137683162842135, + "grad_norm": 1.761831422394334, + "learning_rate": 1.9999933834967507e-05, + "loss": 0.8623, + "step": 1802 + }, + { + "epoch": 0.031154962676251036, + "grad_norm": 1.5126012709851622, + "learning_rate": 1.999993178346232e-05, + "loss": 0.7988, + "step": 1803 + }, + { + "epoch": 0.03117224218965994, + "grad_norm": 1.696529303628476, + "learning_rate": 1.999992970063659e-05, + "loss": 0.9028, + "step": 1804 + }, + { + "epoch": 0.03118952170306884, + "grad_norm": 1.703171092060233, + "learning_rate": 1.999992758649032e-05, + "loss": 0.7669, + "step": 1805 + }, + { + "epoch": 0.031206801216477745, + "grad_norm": 1.9036183251611987, + "learning_rate": 1.999992544102352e-05, + "loss": 1.0276, + "step": 1806 + }, + { + "epoch": 0.031224080729886645, + "grad_norm": 1.4865837682278529, + "learning_rate": 1.9999923264236188e-05, + "loss": 0.8851, + "step": 1807 + }, + { + "epoch": 0.03124136024329555, + "grad_norm": 0.9717553981369681, + "learning_rate": 1.999992105612834e-05, + "loss": 0.6434, + "step": 1808 + }, + { + "epoch": 0.031258639756704454, + "grad_norm": 1.3448618092297915, + "learning_rate": 1.9999918816699976e-05, + "loss": 0.859, + "step": 1809 + }, + { + "epoch": 0.031275919270113355, + "grad_norm": 1.4144150392361439, + "learning_rate": 1.999991654595111e-05, + "loss": 0.7844, + "step": 1810 + }, + { + "epoch": 0.031293198783522255, + "grad_norm": 1.2757798489684773, + "learning_rate": 1.9999914243881744e-05, + "loss": 0.7276, + "step": 1811 + }, + { + "epoch": 0.031310478296931156, + "grad_norm": 1.4977008137977628, + "learning_rate": 1.9999911910491888e-05, + "loss": 0.9714, + "step": 1812 + }, + { + "epoch": 0.031327757810340064, + "grad_norm": 1.684386187245006, + "learning_rate": 1.9999909545781545e-05, + "loss": 0.7634, + "step": 1813 + }, + { + "epoch": 0.031345037323748964, + "grad_norm": 1.331026443109404, + "learning_rate": 1.9999907149750726e-05, + "loss": 1.0581, + "step": 1814 + }, + { + "epoch": 0.031362316837157865, + "grad_norm": 1.3703313527762964, + "learning_rate": 1.9999904722399438e-05, + "loss": 0.9114, + "step": 1815 + }, + { + "epoch": 0.031379596350566766, + "grad_norm": 1.557389089916893, + "learning_rate": 1.999990226372769e-05, + "loss": 0.904, + "step": 1816 + }, + { + "epoch": 0.03139687586397567, + "grad_norm": 1.8089612267215864, + "learning_rate": 1.999989977373549e-05, + "loss": 0.7814, + "step": 1817 + }, + { + "epoch": 0.031414155377384574, + "grad_norm": 1.6079559359762008, + "learning_rate": 1.999989725242284e-05, + "loss": 0.6882, + "step": 1818 + }, + { + "epoch": 0.031431434890793475, + "grad_norm": 1.362145307167212, + "learning_rate": 1.999989469978975e-05, + "loss": 0.7754, + "step": 1819 + }, + { + "epoch": 0.031448714404202376, + "grad_norm": 1.9768062891334457, + "learning_rate": 1.9999892115836234e-05, + "loss": 0.9289, + "step": 1820 + }, + { + "epoch": 0.03146599391761128, + "grad_norm": 1.700895813879713, + "learning_rate": 1.9999889500562294e-05, + "loss": 0.9391, + "step": 1821 + }, + { + "epoch": 0.031483273431020184, + "grad_norm": 1.4423183385202945, + "learning_rate": 1.9999886853967938e-05, + "loss": 0.6556, + "step": 1822 + }, + { + "epoch": 0.031500552944429085, + "grad_norm": 1.4856443664968058, + "learning_rate": 1.999988417605318e-05, + "loss": 0.8373, + "step": 1823 + }, + { + "epoch": 0.031517832457837985, + "grad_norm": 1.6000575226972504, + "learning_rate": 1.9999881466818024e-05, + "loss": 0.6251, + "step": 1824 + }, + { + "epoch": 0.03153511197124689, + "grad_norm": 1.0733646049289476, + "learning_rate": 1.999987872626248e-05, + "loss": 0.7592, + "step": 1825 + }, + { + "epoch": 0.031552391484655794, + "grad_norm": 1.686523831209984, + "learning_rate": 1.9999875954386553e-05, + "loss": 0.8831, + "step": 1826 + }, + { + "epoch": 0.031569670998064694, + "grad_norm": 1.4443448343568905, + "learning_rate": 1.9999873151190256e-05, + "loss": 0.9094, + "step": 1827 + }, + { + "epoch": 0.031586950511473595, + "grad_norm": 1.7127932963535732, + "learning_rate": 1.99998703166736e-05, + "loss": 0.9495, + "step": 1828 + }, + { + "epoch": 0.031604230024882496, + "grad_norm": 1.1797239292064112, + "learning_rate": 1.9999867450836587e-05, + "loss": 0.7264, + "step": 1829 + }, + { + "epoch": 0.031621509538291404, + "grad_norm": 1.6375660383928428, + "learning_rate": 1.999986455367923e-05, + "loss": 0.6917, + "step": 1830 + }, + { + "epoch": 0.031638789051700304, + "grad_norm": 1.8774412466758896, + "learning_rate": 1.9999861625201535e-05, + "loss": 1.0601, + "step": 1831 + }, + { + "epoch": 0.031656068565109205, + "grad_norm": 1.4374383544229759, + "learning_rate": 1.9999858665403516e-05, + "loss": 0.9775, + "step": 1832 + }, + { + "epoch": 0.031673348078518106, + "grad_norm": 1.6838394722520225, + "learning_rate": 1.9999855674285177e-05, + "loss": 0.869, + "step": 1833 + }, + { + "epoch": 0.03169062759192701, + "grad_norm": 1.4239348622705057, + "learning_rate": 1.999985265184653e-05, + "loss": 0.7576, + "step": 1834 + }, + { + "epoch": 0.031707907105335914, + "grad_norm": 1.8366229073588423, + "learning_rate": 1.9999849598087588e-05, + "loss": 1.0225, + "step": 1835 + }, + { + "epoch": 0.031725186618744815, + "grad_norm": 1.397660963779594, + "learning_rate": 1.9999846513008358e-05, + "loss": 0.9396, + "step": 1836 + }, + { + "epoch": 0.031742466132153715, + "grad_norm": 1.3860007747518635, + "learning_rate": 1.9999843396608845e-05, + "loss": 0.8424, + "step": 1837 + }, + { + "epoch": 0.03175974564556262, + "grad_norm": 1.9967225700456492, + "learning_rate": 1.999984024888906e-05, + "loss": 0.95, + "step": 1838 + }, + { + "epoch": 0.031777025158971524, + "grad_norm": 2.004621632585115, + "learning_rate": 1.9999837069849023e-05, + "loss": 0.7554, + "step": 1839 + }, + { + "epoch": 0.031794304672380425, + "grad_norm": 1.5976782101441553, + "learning_rate": 1.999983385948873e-05, + "loss": 0.9791, + "step": 1840 + }, + { + "epoch": 0.031811584185789325, + "grad_norm": 1.551421118391439, + "learning_rate": 1.99998306178082e-05, + "loss": 0.5935, + "step": 1841 + }, + { + "epoch": 0.03182886369919823, + "grad_norm": 1.895609328523695, + "learning_rate": 1.999982734480744e-05, + "loss": 0.8724, + "step": 1842 + }, + { + "epoch": 0.031846143212607134, + "grad_norm": 1.407588800957367, + "learning_rate": 1.999982404048646e-05, + "loss": 1.0321, + "step": 1843 + }, + { + "epoch": 0.031863422726016034, + "grad_norm": 2.034069669863939, + "learning_rate": 1.9999820704845273e-05, + "loss": 0.7571, + "step": 1844 + }, + { + "epoch": 0.031880702239424935, + "grad_norm": 1.6311639108050824, + "learning_rate": 1.999981733788389e-05, + "loss": 1.0626, + "step": 1845 + }, + { + "epoch": 0.03189798175283384, + "grad_norm": 1.3763842148268752, + "learning_rate": 1.9999813939602312e-05, + "loss": 0.7841, + "step": 1846 + }, + { + "epoch": 0.03191526126624274, + "grad_norm": 1.3222074131701846, + "learning_rate": 1.9999810510000562e-05, + "loss": 0.7509, + "step": 1847 + }, + { + "epoch": 0.031932540779651644, + "grad_norm": 1.608105508465045, + "learning_rate": 1.9999807049078644e-05, + "loss": 0.7779, + "step": 1848 + }, + { + "epoch": 0.031949820293060545, + "grad_norm": 1.8018034106802279, + "learning_rate": 1.9999803556836572e-05, + "loss": 0.8318, + "step": 1849 + }, + { + "epoch": 0.03196709980646945, + "grad_norm": 1.4570807174486853, + "learning_rate": 1.9999800033274352e-05, + "loss": 0.7457, + "step": 1850 + }, + { + "epoch": 0.03198437931987835, + "grad_norm": 1.415146476681778, + "learning_rate": 1.9999796478392e-05, + "loss": 0.9409, + "step": 1851 + }, + { + "epoch": 0.032001658833287254, + "grad_norm": 1.919052398824012, + "learning_rate": 1.9999792892189527e-05, + "loss": 0.9098, + "step": 1852 + }, + { + "epoch": 0.032018938346696155, + "grad_norm": 1.5422673512954848, + "learning_rate": 1.9999789274666945e-05, + "loss": 0.8334, + "step": 1853 + }, + { + "epoch": 0.03203621786010506, + "grad_norm": 1.594630437047155, + "learning_rate": 1.999978562582426e-05, + "loss": 0.7233, + "step": 1854 + }, + { + "epoch": 0.03205349737351396, + "grad_norm": 1.9362245755417402, + "learning_rate": 1.9999781945661483e-05, + "loss": 0.6709, + "step": 1855 + }, + { + "epoch": 0.032070776886922864, + "grad_norm": 1.826196848569117, + "learning_rate": 1.9999778234178634e-05, + "loss": 0.844, + "step": 1856 + }, + { + "epoch": 0.032088056400331764, + "grad_norm": 1.0668047130331912, + "learning_rate": 1.9999774491375715e-05, + "loss": 0.5382, + "step": 1857 + }, + { + "epoch": 0.03210533591374067, + "grad_norm": 1.8363772481328124, + "learning_rate": 1.9999770717252747e-05, + "loss": 0.9655, + "step": 1858 + }, + { + "epoch": 0.03212261542714957, + "grad_norm": 1.7815284039370052, + "learning_rate": 1.9999766911809732e-05, + "loss": 0.6731, + "step": 1859 + }, + { + "epoch": 0.032139894940558474, + "grad_norm": 1.7243855773898573, + "learning_rate": 1.9999763075046692e-05, + "loss": 0.9225, + "step": 1860 + }, + { + "epoch": 0.032157174453967374, + "grad_norm": 1.5951808798170506, + "learning_rate": 1.9999759206963633e-05, + "loss": 0.8892, + "step": 1861 + }, + { + "epoch": 0.03217445396737628, + "grad_norm": 1.5745391897182588, + "learning_rate": 1.9999755307560568e-05, + "loss": 0.6575, + "step": 1862 + }, + { + "epoch": 0.03219173348078518, + "grad_norm": 2.4364825905333762, + "learning_rate": 1.9999751376837508e-05, + "loss": 0.7984, + "step": 1863 + }, + { + "epoch": 0.03220901299419408, + "grad_norm": 1.8814942558643604, + "learning_rate": 1.9999747414794466e-05, + "loss": 0.8391, + "step": 1864 + }, + { + "epoch": 0.032226292507602984, + "grad_norm": 1.879041941797401, + "learning_rate": 1.999974342143146e-05, + "loss": 1.0097, + "step": 1865 + }, + { + "epoch": 0.03224357202101189, + "grad_norm": 1.725028836264532, + "learning_rate": 1.9999739396748492e-05, + "loss": 1.0276, + "step": 1866 + }, + { + "epoch": 0.03226085153442079, + "grad_norm": 1.9468915215817186, + "learning_rate": 1.9999735340745583e-05, + "loss": 0.874, + "step": 1867 + }, + { + "epoch": 0.03227813104782969, + "grad_norm": 1.3094133971643624, + "learning_rate": 1.9999731253422743e-05, + "loss": 0.6701, + "step": 1868 + }, + { + "epoch": 0.032295410561238594, + "grad_norm": 1.254959954355014, + "learning_rate": 1.9999727134779982e-05, + "loss": 0.6126, + "step": 1869 + }, + { + "epoch": 0.032312690074647495, + "grad_norm": 0.9468885619669286, + "learning_rate": 1.999972298481732e-05, + "loss": 0.7942, + "step": 1870 + }, + { + "epoch": 0.0323299695880564, + "grad_norm": 1.4538408932733136, + "learning_rate": 1.9999718803534763e-05, + "loss": 0.6336, + "step": 1871 + }, + { + "epoch": 0.0323472491014653, + "grad_norm": 1.6172059723791308, + "learning_rate": 1.9999714590932328e-05, + "loss": 1.0281, + "step": 1872 + }, + { + "epoch": 0.032364528614874204, + "grad_norm": 2.0593657127592953, + "learning_rate": 1.9999710347010027e-05, + "loss": 0.687, + "step": 1873 + }, + { + "epoch": 0.032381808128283104, + "grad_norm": 1.2406741105949692, + "learning_rate": 1.999970607176787e-05, + "loss": 0.8201, + "step": 1874 + }, + { + "epoch": 0.03239908764169201, + "grad_norm": 2.171274945465683, + "learning_rate": 1.999970176520588e-05, + "loss": 1.0127, + "step": 1875 + }, + { + "epoch": 0.03241636715510091, + "grad_norm": 1.5552668932676503, + "learning_rate": 1.999969742732406e-05, + "loss": 0.7394, + "step": 1876 + }, + { + "epoch": 0.032433646668509813, + "grad_norm": 1.3285961853857773, + "learning_rate": 1.9999693058122427e-05, + "loss": 0.5002, + "step": 1877 + }, + { + "epoch": 0.032450926181918714, + "grad_norm": 1.5351069441151695, + "learning_rate": 1.9999688657600996e-05, + "loss": 0.7871, + "step": 1878 + }, + { + "epoch": 0.03246820569532762, + "grad_norm": 2.3055591131859523, + "learning_rate": 1.999968422575978e-05, + "loss": 0.7329, + "step": 1879 + }, + { + "epoch": 0.03248548520873652, + "grad_norm": 1.7126510280175407, + "learning_rate": 1.9999679762598796e-05, + "loss": 0.9373, + "step": 1880 + }, + { + "epoch": 0.03250276472214542, + "grad_norm": 1.7395459076987196, + "learning_rate": 1.9999675268118053e-05, + "loss": 0.7529, + "step": 1881 + }, + { + "epoch": 0.032520044235554324, + "grad_norm": 1.5421883623486088, + "learning_rate": 1.9999670742317566e-05, + "loss": 0.9158, + "step": 1882 + }, + { + "epoch": 0.03253732374896323, + "grad_norm": 1.6192013775843934, + "learning_rate": 1.999966618519735e-05, + "loss": 0.8717, + "step": 1883 + }, + { + "epoch": 0.03255460326237213, + "grad_norm": 1.4483914945025238, + "learning_rate": 1.9999661596757423e-05, + "loss": 0.7292, + "step": 1884 + }, + { + "epoch": 0.03257188277578103, + "grad_norm": 1.3894939758867617, + "learning_rate": 1.9999656976997794e-05, + "loss": 0.6846, + "step": 1885 + }, + { + "epoch": 0.032589162289189934, + "grad_norm": 1.4821367275518906, + "learning_rate": 1.9999652325918478e-05, + "loss": 0.8116, + "step": 1886 + }, + { + "epoch": 0.03260644180259884, + "grad_norm": 1.9648211216325764, + "learning_rate": 1.999964764351949e-05, + "loss": 0.6454, + "step": 1887 + }, + { + "epoch": 0.03262372131600774, + "grad_norm": 1.991349353576018, + "learning_rate": 1.999964292980085e-05, + "loss": 1.0159, + "step": 1888 + }, + { + "epoch": 0.03264100082941664, + "grad_norm": 1.8247174143994427, + "learning_rate": 1.9999638184762563e-05, + "loss": 0.8114, + "step": 1889 + }, + { + "epoch": 0.032658280342825544, + "grad_norm": 1.4347197392498077, + "learning_rate": 1.9999633408404655e-05, + "loss": 0.5908, + "step": 1890 + }, + { + "epoch": 0.03267555985623445, + "grad_norm": 1.8932465699306043, + "learning_rate": 1.999962860072713e-05, + "loss": 0.7479, + "step": 1891 + }, + { + "epoch": 0.03269283936964335, + "grad_norm": 1.9170004642790386, + "learning_rate": 1.9999623761730015e-05, + "loss": 0.9254, + "step": 1892 + }, + { + "epoch": 0.03271011888305225, + "grad_norm": 1.5296461437955755, + "learning_rate": 1.9999618891413313e-05, + "loss": 0.805, + "step": 1893 + }, + { + "epoch": 0.03272739839646115, + "grad_norm": 1.5928590668857407, + "learning_rate": 1.9999613989777043e-05, + "loss": 0.9429, + "step": 1894 + }, + { + "epoch": 0.03274467790987006, + "grad_norm": 2.0874805820272, + "learning_rate": 1.9999609056821225e-05, + "loss": 0.7054, + "step": 1895 + }, + { + "epoch": 0.03276195742327896, + "grad_norm": 2.6086438476845064, + "learning_rate": 1.9999604092545873e-05, + "loss": 1.1093, + "step": 1896 + }, + { + "epoch": 0.03277923693668786, + "grad_norm": 2.0933852512096314, + "learning_rate": 1.9999599096951e-05, + "loss": 0.8786, + "step": 1897 + }, + { + "epoch": 0.03279651645009676, + "grad_norm": 1.7335439382682793, + "learning_rate": 1.9999594070036622e-05, + "loss": 0.8182, + "step": 1898 + }, + { + "epoch": 0.03281379596350567, + "grad_norm": 1.7811665597879782, + "learning_rate": 1.9999589011802754e-05, + "loss": 0.8431, + "step": 1899 + }, + { + "epoch": 0.03283107547691457, + "grad_norm": 1.4447830808475184, + "learning_rate": 1.9999583922249416e-05, + "loss": 0.9258, + "step": 1900 + }, + { + "epoch": 0.03284835499032347, + "grad_norm": 1.4458431477783, + "learning_rate": 1.9999578801376622e-05, + "loss": 0.9119, + "step": 1901 + }, + { + "epoch": 0.03286563450373237, + "grad_norm": 1.0884158669142854, + "learning_rate": 1.9999573649184384e-05, + "loss": 0.5409, + "step": 1902 + }, + { + "epoch": 0.03288291401714128, + "grad_norm": 1.779390572698315, + "learning_rate": 1.9999568465672724e-05, + "loss": 0.8294, + "step": 1903 + }, + { + "epoch": 0.03290019353055018, + "grad_norm": 1.6615590527318422, + "learning_rate": 1.999956325084165e-05, + "loss": 1.0842, + "step": 1904 + }, + { + "epoch": 0.03291747304395908, + "grad_norm": 1.5521906251921167, + "learning_rate": 1.999955800469119e-05, + "loss": 0.8887, + "step": 1905 + }, + { + "epoch": 0.03293475255736798, + "grad_norm": 4.290503370113229, + "learning_rate": 1.9999552727221353e-05, + "loss": 0.9053, + "step": 1906 + }, + { + "epoch": 0.032952032070776884, + "grad_norm": 1.841946690366756, + "learning_rate": 1.9999547418432157e-05, + "loss": 0.9913, + "step": 1907 + }, + { + "epoch": 0.03296931158418579, + "grad_norm": 1.6535737317922397, + "learning_rate": 1.999954207832362e-05, + "loss": 0.9249, + "step": 1908 + }, + { + "epoch": 0.03298659109759469, + "grad_norm": 1.247458832845375, + "learning_rate": 1.9999536706895753e-05, + "loss": 0.6127, + "step": 1909 + }, + { + "epoch": 0.03300387061100359, + "grad_norm": 1.615577552068515, + "learning_rate": 1.999953130414858e-05, + "loss": 0.8465, + "step": 1910 + }, + { + "epoch": 0.03302115012441249, + "grad_norm": 1.3855986337248813, + "learning_rate": 1.9999525870082115e-05, + "loss": 0.9028, + "step": 1911 + }, + { + "epoch": 0.0330384296378214, + "grad_norm": 1.3287797720651027, + "learning_rate": 1.9999520404696376e-05, + "loss": 0.6867, + "step": 1912 + }, + { + "epoch": 0.0330557091512303, + "grad_norm": 1.8173396793338352, + "learning_rate": 1.9999514907991376e-05, + "loss": 0.9456, + "step": 1913 + }, + { + "epoch": 0.0330729886646392, + "grad_norm": 1.400561912028913, + "learning_rate": 1.999950937996714e-05, + "loss": 0.9358, + "step": 1914 + }, + { + "epoch": 0.0330902681780481, + "grad_norm": 1.8626835980772296, + "learning_rate": 1.9999503820623675e-05, + "loss": 0.7895, + "step": 1915 + }, + { + "epoch": 0.03310754769145701, + "grad_norm": 1.3248869802938799, + "learning_rate": 1.9999498229961007e-05, + "loss": 0.7611, + "step": 1916 + }, + { + "epoch": 0.03312482720486591, + "grad_norm": 1.4791583318776154, + "learning_rate": 1.999949260797915e-05, + "loss": 0.8679, + "step": 1917 + }, + { + "epoch": 0.03314210671827481, + "grad_norm": 1.8596043746165716, + "learning_rate": 1.9999486954678125e-05, + "loss": 1.0751, + "step": 1918 + }, + { + "epoch": 0.03315938623168371, + "grad_norm": 1.4409757154063154, + "learning_rate": 1.9999481270057944e-05, + "loss": 0.8595, + "step": 1919 + }, + { + "epoch": 0.03317666574509262, + "grad_norm": 1.5434330636478264, + "learning_rate": 1.999947555411863e-05, + "loss": 1.0494, + "step": 1920 + }, + { + "epoch": 0.03319394525850152, + "grad_norm": 1.5825884472517207, + "learning_rate": 1.9999469806860196e-05, + "loss": 0.8565, + "step": 1921 + }, + { + "epoch": 0.03321122477191042, + "grad_norm": 1.4058885094367775, + "learning_rate": 1.9999464028282666e-05, + "loss": 0.6122, + "step": 1922 + }, + { + "epoch": 0.03322850428531932, + "grad_norm": 1.597109028096792, + "learning_rate": 1.999945821838605e-05, + "loss": 0.6525, + "step": 1923 + }, + { + "epoch": 0.03324578379872823, + "grad_norm": 1.415257043320238, + "learning_rate": 1.999945237717037e-05, + "loss": 0.9355, + "step": 1924 + }, + { + "epoch": 0.03326306331213713, + "grad_norm": 1.6053205203472467, + "learning_rate": 1.999944650463565e-05, + "loss": 0.7843, + "step": 1925 + }, + { + "epoch": 0.03328034282554603, + "grad_norm": 1.370610262740285, + "learning_rate": 1.9999440600781902e-05, + "loss": 0.6092, + "step": 1926 + }, + { + "epoch": 0.03329762233895493, + "grad_norm": 1.8058145786650859, + "learning_rate": 1.999943466560915e-05, + "loss": 0.7475, + "step": 1927 + }, + { + "epoch": 0.03331490185236384, + "grad_norm": 1.2402834266345115, + "learning_rate": 1.9999428699117403e-05, + "loss": 0.7476, + "step": 1928 + }, + { + "epoch": 0.03333218136577274, + "grad_norm": 1.3418942233080076, + "learning_rate": 1.999942270130669e-05, + "loss": 0.7855, + "step": 1929 + }, + { + "epoch": 0.03334946087918164, + "grad_norm": 1.2864636326747785, + "learning_rate": 1.999941667217702e-05, + "loss": 0.8218, + "step": 1930 + }, + { + "epoch": 0.03336674039259054, + "grad_norm": 1.711450285757615, + "learning_rate": 1.9999410611728422e-05, + "loss": 0.7373, + "step": 1931 + }, + { + "epoch": 0.03338401990599945, + "grad_norm": 1.4864726367761036, + "learning_rate": 1.9999404519960907e-05, + "loss": 0.8936, + "step": 1932 + }, + { + "epoch": 0.03340129941940835, + "grad_norm": 1.427400626262, + "learning_rate": 1.9999398396874497e-05, + "loss": 0.9242, + "step": 1933 + }, + { + "epoch": 0.03341857893281725, + "grad_norm": 1.4471522602962636, + "learning_rate": 1.999939224246921e-05, + "loss": 0.8027, + "step": 1934 + }, + { + "epoch": 0.03343585844622615, + "grad_norm": 1.5800028950731695, + "learning_rate": 1.999938605674507e-05, + "loss": 0.8032, + "step": 1935 + }, + { + "epoch": 0.03345313795963506, + "grad_norm": 1.4037871461463902, + "learning_rate": 1.999937983970209e-05, + "loss": 0.933, + "step": 1936 + }, + { + "epoch": 0.03347041747304396, + "grad_norm": 0.8765244733246893, + "learning_rate": 1.9999373591340294e-05, + "loss": 0.7225, + "step": 1937 + }, + { + "epoch": 0.03348769698645286, + "grad_norm": 1.6104867970978891, + "learning_rate": 1.99993673116597e-05, + "loss": 0.846, + "step": 1938 + }, + { + "epoch": 0.03350497649986176, + "grad_norm": 1.9281219900620963, + "learning_rate": 1.999936100066033e-05, + "loss": 1.1898, + "step": 1939 + }, + { + "epoch": 0.03352225601327067, + "grad_norm": 1.626304105046729, + "learning_rate": 1.9999354658342197e-05, + "loss": 0.8344, + "step": 1940 + }, + { + "epoch": 0.03353953552667957, + "grad_norm": 1.3371860249020628, + "learning_rate": 1.999934828470533e-05, + "loss": 0.7762, + "step": 1941 + }, + { + "epoch": 0.03355681504008847, + "grad_norm": 1.8886164383190533, + "learning_rate": 1.999934187974974e-05, + "loss": 0.7028, + "step": 1942 + }, + { + "epoch": 0.03357409455349737, + "grad_norm": 1.4661188348239567, + "learning_rate": 1.9999335443475452e-05, + "loss": 0.7802, + "step": 1943 + }, + { + "epoch": 0.03359137406690628, + "grad_norm": 1.5218245040217806, + "learning_rate": 1.9999328975882488e-05, + "loss": 0.9231, + "step": 1944 + }, + { + "epoch": 0.03360865358031518, + "grad_norm": 1.6385630263940973, + "learning_rate": 1.9999322476970863e-05, + "loss": 0.9089, + "step": 1945 + }, + { + "epoch": 0.03362593309372408, + "grad_norm": 1.1791123406091732, + "learning_rate": 1.9999315946740602e-05, + "loss": 0.8598, + "step": 1946 + }, + { + "epoch": 0.03364321260713298, + "grad_norm": 1.7529268372396032, + "learning_rate": 1.9999309385191724e-05, + "loss": 0.959, + "step": 1947 + }, + { + "epoch": 0.03366049212054188, + "grad_norm": 1.563344626827666, + "learning_rate": 1.9999302792324247e-05, + "loss": 0.7717, + "step": 1948 + }, + { + "epoch": 0.03367777163395079, + "grad_norm": 1.4615099678273664, + "learning_rate": 1.9999296168138197e-05, + "loss": 0.8902, + "step": 1949 + }, + { + "epoch": 0.03369505114735969, + "grad_norm": 1.3187360916495536, + "learning_rate": 1.9999289512633586e-05, + "loss": 0.6784, + "step": 1950 + }, + { + "epoch": 0.03371233066076859, + "grad_norm": 1.4965019844042586, + "learning_rate": 1.9999282825810443e-05, + "loss": 0.8649, + "step": 1951 + }, + { + "epoch": 0.03372961017417749, + "grad_norm": 1.5004556311288841, + "learning_rate": 1.9999276107668785e-05, + "loss": 0.8946, + "step": 1952 + }, + { + "epoch": 0.0337468896875864, + "grad_norm": 1.465405396906583, + "learning_rate": 1.999926935820864e-05, + "loss": 0.9188, + "step": 1953 + }, + { + "epoch": 0.0337641692009953, + "grad_norm": 1.9118972778684333, + "learning_rate": 1.9999262577430017e-05, + "loss": 0.7159, + "step": 1954 + }, + { + "epoch": 0.0337814487144042, + "grad_norm": 1.4540507129747495, + "learning_rate": 1.9999255765332947e-05, + "loss": 0.9898, + "step": 1955 + }, + { + "epoch": 0.0337987282278131, + "grad_norm": 2.03361213974819, + "learning_rate": 1.9999248921917447e-05, + "loss": 0.7083, + "step": 1956 + }, + { + "epoch": 0.03381600774122201, + "grad_norm": 1.7635441353035266, + "learning_rate": 1.999924204718354e-05, + "loss": 0.9212, + "step": 1957 + }, + { + "epoch": 0.03383328725463091, + "grad_norm": 1.4061135894599495, + "learning_rate": 1.9999235141131246e-05, + "loss": 0.6622, + "step": 1958 + }, + { + "epoch": 0.03385056676803981, + "grad_norm": 1.6974836699821998, + "learning_rate": 1.999922820376059e-05, + "loss": 0.9554, + "step": 1959 + }, + { + "epoch": 0.03386784628144871, + "grad_norm": 1.5127227396573217, + "learning_rate": 1.9999221235071586e-05, + "loss": 0.8286, + "step": 1960 + }, + { + "epoch": 0.03388512579485762, + "grad_norm": 1.5154198938138261, + "learning_rate": 1.9999214235064265e-05, + "loss": 0.6955, + "step": 1961 + }, + { + "epoch": 0.03390240530826652, + "grad_norm": 2.0671007767608542, + "learning_rate": 1.9999207203738646e-05, + "loss": 1.129, + "step": 1962 + }, + { + "epoch": 0.03391968482167542, + "grad_norm": 1.3710660052664885, + "learning_rate": 1.999920014109475e-05, + "loss": 0.7892, + "step": 1963 + }, + { + "epoch": 0.03393696433508432, + "grad_norm": 0.8656149272007592, + "learning_rate": 1.99991930471326e-05, + "loss": 0.6082, + "step": 1964 + }, + { + "epoch": 0.03395424384849323, + "grad_norm": 1.6880924553208605, + "learning_rate": 1.9999185921852213e-05, + "loss": 0.9728, + "step": 1965 + }, + { + "epoch": 0.03397152336190213, + "grad_norm": 1.5630440402263062, + "learning_rate": 1.9999178765253617e-05, + "loss": 0.797, + "step": 1966 + }, + { + "epoch": 0.03398880287531103, + "grad_norm": 0.9741890065350626, + "learning_rate": 1.9999171577336834e-05, + "loss": 0.6454, + "step": 1967 + }, + { + "epoch": 0.03400608238871993, + "grad_norm": 1.2938916060217165, + "learning_rate": 1.999916435810189e-05, + "loss": 0.9108, + "step": 1968 + }, + { + "epoch": 0.03402336190212884, + "grad_norm": 1.9616931137665097, + "learning_rate": 1.9999157107548795e-05, + "loss": 1.0862, + "step": 1969 + }, + { + "epoch": 0.03404064141553774, + "grad_norm": 1.5729983283161728, + "learning_rate": 1.9999149825677587e-05, + "loss": 0.7744, + "step": 1970 + }, + { + "epoch": 0.03405792092894664, + "grad_norm": 1.4617557234632472, + "learning_rate": 1.999914251248828e-05, + "loss": 0.8808, + "step": 1971 + }, + { + "epoch": 0.03407520044235554, + "grad_norm": 1.6859063846505729, + "learning_rate": 1.99991351679809e-05, + "loss": 0.8492, + "step": 1972 + }, + { + "epoch": 0.03409247995576445, + "grad_norm": 1.3808186456947218, + "learning_rate": 1.9999127792155465e-05, + "loss": 0.6069, + "step": 1973 + }, + { + "epoch": 0.03410975946917335, + "grad_norm": 1.336013642138584, + "learning_rate": 1.9999120385012004e-05, + "loss": 0.9776, + "step": 1974 + }, + { + "epoch": 0.03412703898258225, + "grad_norm": 1.3347199146111524, + "learning_rate": 1.999911294655054e-05, + "loss": 0.8135, + "step": 1975 + }, + { + "epoch": 0.03414431849599115, + "grad_norm": 1.6202990694174628, + "learning_rate": 1.9999105476771092e-05, + "loss": 0.8015, + "step": 1976 + }, + { + "epoch": 0.03416159800940006, + "grad_norm": 1.4626066728130942, + "learning_rate": 1.9999097975673686e-05, + "loss": 0.9167, + "step": 1977 + }, + { + "epoch": 0.03417887752280896, + "grad_norm": 1.6711822855119487, + "learning_rate": 1.9999090443258344e-05, + "loss": 0.9198, + "step": 1978 + }, + { + "epoch": 0.03419615703621786, + "grad_norm": 1.4577349124179237, + "learning_rate": 1.999908287952509e-05, + "loss": 0.5684, + "step": 1979 + }, + { + "epoch": 0.03421343654962676, + "grad_norm": 1.565934075344114, + "learning_rate": 1.9999075284473953e-05, + "loss": 1.0751, + "step": 1980 + }, + { + "epoch": 0.03423071606303567, + "grad_norm": 1.5398902288287961, + "learning_rate": 1.9999067658104948e-05, + "loss": 0.8753, + "step": 1981 + }, + { + "epoch": 0.03424799557644457, + "grad_norm": 2.0233690973572016, + "learning_rate": 1.9999060000418106e-05, + "loss": 0.8998, + "step": 1982 + }, + { + "epoch": 0.03426527508985347, + "grad_norm": 1.5233897290618788, + "learning_rate": 1.999905231141345e-05, + "loss": 0.677, + "step": 1983 + }, + { + "epoch": 0.03428255460326237, + "grad_norm": 1.721659443077621, + "learning_rate": 1.9999044591091e-05, + "loss": 1.0455, + "step": 1984 + }, + { + "epoch": 0.03429983411667127, + "grad_norm": 1.598881784720459, + "learning_rate": 1.9999036839450777e-05, + "loss": 1.0301, + "step": 1985 + }, + { + "epoch": 0.03431711363008018, + "grad_norm": 1.3977153874700623, + "learning_rate": 1.9999029056492817e-05, + "loss": 0.772, + "step": 1986 + }, + { + "epoch": 0.03433439314348908, + "grad_norm": 1.613882810961785, + "learning_rate": 1.999902124221714e-05, + "loss": 0.7356, + "step": 1987 + }, + { + "epoch": 0.03435167265689798, + "grad_norm": 1.700017424054763, + "learning_rate": 1.9999013396623762e-05, + "loss": 1.1061, + "step": 1988 + }, + { + "epoch": 0.03436895217030688, + "grad_norm": 1.9905506581517314, + "learning_rate": 1.9999005519712718e-05, + "loss": 0.5849, + "step": 1989 + }, + { + "epoch": 0.03438623168371579, + "grad_norm": 1.0891693432313378, + "learning_rate": 1.9998997611484026e-05, + "loss": 0.4923, + "step": 1990 + }, + { + "epoch": 0.03440351119712469, + "grad_norm": 1.601124809010796, + "learning_rate": 1.9998989671937718e-05, + "loss": 0.9345, + "step": 1991 + }, + { + "epoch": 0.03442079071053359, + "grad_norm": 1.442029027177446, + "learning_rate": 1.9998981701073813e-05, + "loss": 0.8359, + "step": 1992 + }, + { + "epoch": 0.03443807022394249, + "grad_norm": 1.7277075892842195, + "learning_rate": 1.999897369889234e-05, + "loss": 0.8776, + "step": 1993 + }, + { + "epoch": 0.0344553497373514, + "grad_norm": 1.6133264525681952, + "learning_rate": 1.999896566539332e-05, + "loss": 0.8561, + "step": 1994 + }, + { + "epoch": 0.0344726292507603, + "grad_norm": 1.2242956088789132, + "learning_rate": 1.9998957600576773e-05, + "loss": 0.5783, + "step": 1995 + }, + { + "epoch": 0.0344899087641692, + "grad_norm": 1.4394886388220511, + "learning_rate": 1.9998949504442737e-05, + "loss": 0.8767, + "step": 1996 + }, + { + "epoch": 0.0345071882775781, + "grad_norm": 1.5826642225361036, + "learning_rate": 1.999894137699123e-05, + "loss": 0.7714, + "step": 1997 + }, + { + "epoch": 0.03452446779098701, + "grad_norm": 1.9824739353061185, + "learning_rate": 1.9998933218222277e-05, + "loss": 0.8739, + "step": 1998 + }, + { + "epoch": 0.03454174730439591, + "grad_norm": 1.3711970634583257, + "learning_rate": 1.9998925028135908e-05, + "loss": 0.61, + "step": 1999 + }, + { + "epoch": 0.03455902681780481, + "grad_norm": 1.4572447230337777, + "learning_rate": 1.9998916806732146e-05, + "loss": 0.721, + "step": 2000 + }, + { + "epoch": 0.03457630633121371, + "grad_norm": 1.3739600927777909, + "learning_rate": 1.9998908554011014e-05, + "loss": 0.7834, + "step": 2001 + }, + { + "epoch": 0.03459358584462262, + "grad_norm": 1.3756832449906289, + "learning_rate": 1.9998900269972544e-05, + "loss": 0.7691, + "step": 2002 + }, + { + "epoch": 0.03461086535803152, + "grad_norm": 1.5981209017269389, + "learning_rate": 1.9998891954616755e-05, + "loss": 0.9995, + "step": 2003 + }, + { + "epoch": 0.03462814487144042, + "grad_norm": 1.5098911475048151, + "learning_rate": 1.9998883607943677e-05, + "loss": 0.7915, + "step": 2004 + }, + { + "epoch": 0.03464542438484932, + "grad_norm": 1.6072307571702973, + "learning_rate": 1.999887522995334e-05, + "loss": 0.9891, + "step": 2005 + }, + { + "epoch": 0.03466270389825823, + "grad_norm": 1.3212561808479208, + "learning_rate": 1.9998866820645763e-05, + "loss": 0.6899, + "step": 2006 + }, + { + "epoch": 0.03467998341166713, + "grad_norm": 2.120634418074734, + "learning_rate": 1.9998858380020974e-05, + "loss": 0.8504, + "step": 2007 + }, + { + "epoch": 0.03469726292507603, + "grad_norm": 1.4932039495322242, + "learning_rate": 1.9998849908079e-05, + "loss": 0.731, + "step": 2008 + }, + { + "epoch": 0.03471454243848493, + "grad_norm": 1.4080160176823562, + "learning_rate": 1.9998841404819873e-05, + "loss": 0.6958, + "step": 2009 + }, + { + "epoch": 0.03473182195189384, + "grad_norm": 1.4833502438904684, + "learning_rate": 1.9998832870243608e-05, + "loss": 0.8024, + "step": 2010 + }, + { + "epoch": 0.03474910146530274, + "grad_norm": 1.7509912232448175, + "learning_rate": 1.9998824304350246e-05, + "loss": 0.9675, + "step": 2011 + }, + { + "epoch": 0.03476638097871164, + "grad_norm": 1.7486086432551649, + "learning_rate": 1.99988157071398e-05, + "loss": 0.785, + "step": 2012 + }, + { + "epoch": 0.03478366049212054, + "grad_norm": 1.3307482262206674, + "learning_rate": 1.9998807078612306e-05, + "loss": 0.868, + "step": 2013 + }, + { + "epoch": 0.03480094000552945, + "grad_norm": 1.421425354374307, + "learning_rate": 1.9998798418767787e-05, + "loss": 0.907, + "step": 2014 + }, + { + "epoch": 0.03481821951893835, + "grad_norm": 1.6590041383767014, + "learning_rate": 1.9998789727606274e-05, + "loss": 1.0618, + "step": 2015 + }, + { + "epoch": 0.03483549903234725, + "grad_norm": 2.07954411869619, + "learning_rate": 1.999878100512779e-05, + "loss": 0.7395, + "step": 2016 + }, + { + "epoch": 0.03485277854575615, + "grad_norm": 1.3924005644122024, + "learning_rate": 1.9998772251332364e-05, + "loss": 0.715, + "step": 2017 + }, + { + "epoch": 0.03487005805916506, + "grad_norm": 1.1283317184784818, + "learning_rate": 1.9998763466220024e-05, + "loss": 0.5951, + "step": 2018 + }, + { + "epoch": 0.03488733757257396, + "grad_norm": 1.5396777311276661, + "learning_rate": 1.99987546497908e-05, + "loss": 0.5429, + "step": 2019 + }, + { + "epoch": 0.03490461708598286, + "grad_norm": 1.6032705321693728, + "learning_rate": 1.999874580204471e-05, + "loss": 0.7385, + "step": 2020 + }, + { + "epoch": 0.03492189659939176, + "grad_norm": 1.0518776790041102, + "learning_rate": 1.9998736922981795e-05, + "loss": 0.5259, + "step": 2021 + }, + { + "epoch": 0.03493917611280067, + "grad_norm": 1.8137637148200216, + "learning_rate": 1.999872801260207e-05, + "loss": 0.7657, + "step": 2022 + }, + { + "epoch": 0.03495645562620957, + "grad_norm": 1.2266615241773746, + "learning_rate": 1.9998719070905573e-05, + "loss": 0.6668, + "step": 2023 + }, + { + "epoch": 0.03497373513961847, + "grad_norm": 1.3738365768360714, + "learning_rate": 1.9998710097892326e-05, + "loss": 0.8597, + "step": 2024 + }, + { + "epoch": 0.03499101465302737, + "grad_norm": 1.5917423583932442, + "learning_rate": 1.999870109356236e-05, + "loss": 0.8793, + "step": 2025 + }, + { + "epoch": 0.03500829416643627, + "grad_norm": 1.3725093512611024, + "learning_rate": 1.9998692057915702e-05, + "loss": 0.7204, + "step": 2026 + }, + { + "epoch": 0.03502557367984518, + "grad_norm": 1.603893340639968, + "learning_rate": 1.999868299095238e-05, + "loss": 0.9773, + "step": 2027 + }, + { + "epoch": 0.03504285319325408, + "grad_norm": 1.2218772349155882, + "learning_rate": 1.9998673892672426e-05, + "loss": 0.6159, + "step": 2028 + }, + { + "epoch": 0.03506013270666298, + "grad_norm": 1.6049724862046275, + "learning_rate": 1.9998664763075862e-05, + "loss": 0.9582, + "step": 2029 + }, + { + "epoch": 0.03507741222007188, + "grad_norm": 1.6537338463319415, + "learning_rate": 1.999865560216272e-05, + "loss": 0.8699, + "step": 2030 + }, + { + "epoch": 0.03509469173348079, + "grad_norm": 1.520599393718575, + "learning_rate": 1.999864640993303e-05, + "loss": 0.8499, + "step": 2031 + }, + { + "epoch": 0.03511197124688969, + "grad_norm": 1.65369110635924, + "learning_rate": 1.999863718638682e-05, + "loss": 0.7663, + "step": 2032 + }, + { + "epoch": 0.03512925076029859, + "grad_norm": 1.6174955703921348, + "learning_rate": 1.9998627931524117e-05, + "loss": 0.69, + "step": 2033 + }, + { + "epoch": 0.03514653027370749, + "grad_norm": 1.2063652861907137, + "learning_rate": 1.9998618645344954e-05, + "loss": 0.8353, + "step": 2034 + }, + { + "epoch": 0.0351638097871164, + "grad_norm": 1.411257138532354, + "learning_rate": 1.9998609327849357e-05, + "loss": 0.8029, + "step": 2035 + }, + { + "epoch": 0.0351810893005253, + "grad_norm": 1.555556352168896, + "learning_rate": 1.9998599979037353e-05, + "loss": 1.0586, + "step": 2036 + }, + { + "epoch": 0.0351983688139342, + "grad_norm": 1.4643540547149179, + "learning_rate": 1.9998590598908975e-05, + "loss": 0.7685, + "step": 2037 + }, + { + "epoch": 0.0352156483273431, + "grad_norm": 1.5877664172137722, + "learning_rate": 1.9998581187464254e-05, + "loss": 0.7673, + "step": 2038 + }, + { + "epoch": 0.03523292784075201, + "grad_norm": 1.3531199071704811, + "learning_rate": 1.9998571744703215e-05, + "loss": 0.7744, + "step": 2039 + }, + { + "epoch": 0.03525020735416091, + "grad_norm": 1.6205310213584252, + "learning_rate": 1.999856227062589e-05, + "loss": 1.0066, + "step": 2040 + }, + { + "epoch": 0.03526748686756981, + "grad_norm": 1.7933260634350536, + "learning_rate": 1.9998552765232306e-05, + "loss": 0.7428, + "step": 2041 + }, + { + "epoch": 0.03528476638097871, + "grad_norm": 1.1228463167188443, + "learning_rate": 1.99985432285225e-05, + "loss": 0.804, + "step": 2042 + }, + { + "epoch": 0.03530204589438762, + "grad_norm": 1.4260646420725642, + "learning_rate": 1.999853366049649e-05, + "loss": 0.9073, + "step": 2043 + }, + { + "epoch": 0.03531932540779652, + "grad_norm": 1.076851006933586, + "learning_rate": 1.9998524061154316e-05, + "loss": 0.8739, + "step": 2044 + }, + { + "epoch": 0.03533660492120542, + "grad_norm": 1.722456304123722, + "learning_rate": 1.9998514430496005e-05, + "loss": 1.0237, + "step": 2045 + }, + { + "epoch": 0.03535388443461432, + "grad_norm": 1.2331056692096682, + "learning_rate": 1.9998504768521588e-05, + "loss": 0.6195, + "step": 2046 + }, + { + "epoch": 0.035371163948023226, + "grad_norm": 1.5763920659036685, + "learning_rate": 1.9998495075231093e-05, + "loss": 0.8542, + "step": 2047 + }, + { + "epoch": 0.03538844346143213, + "grad_norm": 1.5895629307653465, + "learning_rate": 1.999848535062455e-05, + "loss": 0.9848, + "step": 2048 + }, + { + "epoch": 0.03540572297484103, + "grad_norm": 1.1095576751448928, + "learning_rate": 1.999847559470199e-05, + "loss": 0.6583, + "step": 2049 + }, + { + "epoch": 0.03542300248824993, + "grad_norm": 1.521474129913544, + "learning_rate": 1.999846580746345e-05, + "loss": 0.9302, + "step": 2050 + }, + { + "epoch": 0.035440282001658836, + "grad_norm": 1.5758325902094066, + "learning_rate": 1.9998455988908952e-05, + "loss": 0.8681, + "step": 2051 + }, + { + "epoch": 0.03545756151506774, + "grad_norm": 1.4537584959052943, + "learning_rate": 1.999844613903853e-05, + "loss": 0.7741, + "step": 2052 + }, + { + "epoch": 0.03547484102847664, + "grad_norm": 1.5290003249323383, + "learning_rate": 1.9998436257852215e-05, + "loss": 0.6996, + "step": 2053 + }, + { + "epoch": 0.03549212054188554, + "grad_norm": 1.4869618419696495, + "learning_rate": 1.999842634535004e-05, + "loss": 0.6988, + "step": 2054 + }, + { + "epoch": 0.035509400055294446, + "grad_norm": 1.4257485496428435, + "learning_rate": 1.999841640153203e-05, + "loss": 0.7517, + "step": 2055 + }, + { + "epoch": 0.03552667956870335, + "grad_norm": 1.9313683485014534, + "learning_rate": 1.9998406426398223e-05, + "loss": 0.8805, + "step": 2056 + }, + { + "epoch": 0.03554395908211225, + "grad_norm": 1.2388479068147404, + "learning_rate": 1.9998396419948646e-05, + "loss": 0.9671, + "step": 2057 + }, + { + "epoch": 0.03556123859552115, + "grad_norm": 1.2743224726643247, + "learning_rate": 1.999838638218333e-05, + "loss": 0.8512, + "step": 2058 + }, + { + "epoch": 0.035578518108930056, + "grad_norm": 1.7737767152052315, + "learning_rate": 1.999837631310231e-05, + "loss": 0.9046, + "step": 2059 + }, + { + "epoch": 0.03559579762233896, + "grad_norm": 1.438911587098702, + "learning_rate": 1.9998366212705615e-05, + "loss": 1.0249, + "step": 2060 + }, + { + "epoch": 0.03561307713574786, + "grad_norm": 1.5029385903586898, + "learning_rate": 1.9998356080993275e-05, + "loss": 0.9023, + "step": 2061 + }, + { + "epoch": 0.03563035664915676, + "grad_norm": 1.566276312859583, + "learning_rate": 1.9998345917965327e-05, + "loss": 0.7288, + "step": 2062 + }, + { + "epoch": 0.03564763616256566, + "grad_norm": 1.3571066144824735, + "learning_rate": 1.99983357236218e-05, + "loss": 0.8765, + "step": 2063 + }, + { + "epoch": 0.035664915675974566, + "grad_norm": 1.2589922218271896, + "learning_rate": 1.9998325497962724e-05, + "loss": 0.8092, + "step": 2064 + }, + { + "epoch": 0.03568219518938347, + "grad_norm": 1.42162678344195, + "learning_rate": 1.9998315240988133e-05, + "loss": 0.7827, + "step": 2065 + }, + { + "epoch": 0.03569947470279237, + "grad_norm": 1.3544447993150013, + "learning_rate": 1.999830495269806e-05, + "loss": 0.7386, + "step": 2066 + }, + { + "epoch": 0.03571675421620127, + "grad_norm": 1.2571480864288058, + "learning_rate": 1.9998294633092535e-05, + "loss": 0.5881, + "step": 2067 + }, + { + "epoch": 0.035734033729610176, + "grad_norm": 1.3527478186676023, + "learning_rate": 1.9998284282171592e-05, + "loss": 0.862, + "step": 2068 + }, + { + "epoch": 0.03575131324301908, + "grad_norm": 1.7441937481453904, + "learning_rate": 1.999827389993526e-05, + "loss": 1.0664, + "step": 2069 + }, + { + "epoch": 0.03576859275642798, + "grad_norm": 1.4719912546631337, + "learning_rate": 1.9998263486383578e-05, + "loss": 0.8258, + "step": 2070 + }, + { + "epoch": 0.03578587226983688, + "grad_norm": 1.385934473351803, + "learning_rate": 1.9998253041516574e-05, + "loss": 0.8986, + "step": 2071 + }, + { + "epoch": 0.035803151783245786, + "grad_norm": 0.9314697553909435, + "learning_rate": 1.999824256533428e-05, + "loss": 0.4361, + "step": 2072 + }, + { + "epoch": 0.03582043129665469, + "grad_norm": 1.3181373039287352, + "learning_rate": 1.999823205783673e-05, + "loss": 0.8375, + "step": 2073 + }, + { + "epoch": 0.03583771081006359, + "grad_norm": 1.479696651851654, + "learning_rate": 1.999822151902396e-05, + "loss": 0.9547, + "step": 2074 + }, + { + "epoch": 0.03585499032347249, + "grad_norm": 1.7161412707699721, + "learning_rate": 1.9998210948896e-05, + "loss": 0.8544, + "step": 2075 + }, + { + "epoch": 0.035872269836881396, + "grad_norm": 1.8045108702080108, + "learning_rate": 1.9998200347452882e-05, + "loss": 1.0345, + "step": 2076 + }, + { + "epoch": 0.035889549350290297, + "grad_norm": 1.6222435387809133, + "learning_rate": 1.999818971469464e-05, + "loss": 0.7765, + "step": 2077 + }, + { + "epoch": 0.0359068288636992, + "grad_norm": 1.7248089430032962, + "learning_rate": 1.999817905062131e-05, + "loss": 0.9135, + "step": 2078 + }, + { + "epoch": 0.0359241083771081, + "grad_norm": 1.5374357220499033, + "learning_rate": 1.9998168355232926e-05, + "loss": 0.8388, + "step": 2079 + }, + { + "epoch": 0.035941387890517006, + "grad_norm": 1.2035054057234158, + "learning_rate": 1.9998157628529513e-05, + "loss": 0.5651, + "step": 2080 + }, + { + "epoch": 0.035958667403925906, + "grad_norm": 0.8748895435886985, + "learning_rate": 1.9998146870511115e-05, + "loss": 0.7121, + "step": 2081 + }, + { + "epoch": 0.03597594691733481, + "grad_norm": 1.6447881327457157, + "learning_rate": 1.999813608117776e-05, + "loss": 0.8171, + "step": 2082 + }, + { + "epoch": 0.03599322643074371, + "grad_norm": 1.2678559879565006, + "learning_rate": 1.999812526052948e-05, + "loss": 0.7996, + "step": 2083 + }, + { + "epoch": 0.036010505944152615, + "grad_norm": 1.5382029585904289, + "learning_rate": 1.9998114408566314e-05, + "loss": 0.7982, + "step": 2084 + }, + { + "epoch": 0.036027785457561516, + "grad_norm": 2.0839419640419448, + "learning_rate": 1.9998103525288294e-05, + "loss": 0.8964, + "step": 2085 + }, + { + "epoch": 0.03604506497097042, + "grad_norm": 1.6902436785537573, + "learning_rate": 1.9998092610695453e-05, + "loss": 0.7728, + "step": 2086 + }, + { + "epoch": 0.03606234448437932, + "grad_norm": 1.542993289463471, + "learning_rate": 1.9998081664787827e-05, + "loss": 0.9123, + "step": 2087 + }, + { + "epoch": 0.036079623997788225, + "grad_norm": 1.357731509536573, + "learning_rate": 1.999807068756545e-05, + "loss": 0.9183, + "step": 2088 + }, + { + "epoch": 0.036096903511197126, + "grad_norm": 1.7203573122362898, + "learning_rate": 1.9998059679028355e-05, + "loss": 0.9012, + "step": 2089 + }, + { + "epoch": 0.03611418302460603, + "grad_norm": 1.2094482963886517, + "learning_rate": 1.9998048639176576e-05, + "loss": 0.5915, + "step": 2090 + }, + { + "epoch": 0.03613146253801493, + "grad_norm": 1.4900914981874858, + "learning_rate": 1.9998037568010152e-05, + "loss": 0.826, + "step": 2091 + }, + { + "epoch": 0.036148742051423835, + "grad_norm": 1.6042200844013716, + "learning_rate": 1.9998026465529113e-05, + "loss": 0.8546, + "step": 2092 + }, + { + "epoch": 0.036166021564832736, + "grad_norm": 1.588876816195238, + "learning_rate": 1.9998015331733493e-05, + "loss": 0.914, + "step": 2093 + }, + { + "epoch": 0.036183301078241636, + "grad_norm": 0.9361183591558053, + "learning_rate": 1.9998004166623332e-05, + "loss": 0.7683, + "step": 2094 + }, + { + "epoch": 0.03620058059165054, + "grad_norm": 1.4146453222327533, + "learning_rate": 1.9997992970198662e-05, + "loss": 1.0138, + "step": 2095 + }, + { + "epoch": 0.036217860105059445, + "grad_norm": 1.4952519203843764, + "learning_rate": 1.999798174245952e-05, + "loss": 0.8117, + "step": 2096 + }, + { + "epoch": 0.036235139618468346, + "grad_norm": 1.515624822072086, + "learning_rate": 1.9997970483405937e-05, + "loss": 1.0148, + "step": 2097 + }, + { + "epoch": 0.036252419131877246, + "grad_norm": 1.4766981243052761, + "learning_rate": 1.9997959193037947e-05, + "loss": 0.8477, + "step": 2098 + }, + { + "epoch": 0.03626969864528615, + "grad_norm": 1.3056981285235956, + "learning_rate": 1.9997947871355596e-05, + "loss": 0.6192, + "step": 2099 + }, + { + "epoch": 0.036286978158695055, + "grad_norm": 1.6692181108975117, + "learning_rate": 1.9997936518358907e-05, + "loss": 1.0904, + "step": 2100 + }, + { + "epoch": 0.036304257672103955, + "grad_norm": 1.2197662801987934, + "learning_rate": 1.9997925134047923e-05, + "loss": 0.9186, + "step": 2101 + }, + { + "epoch": 0.036321537185512856, + "grad_norm": 1.4356922308432378, + "learning_rate": 1.999791371842268e-05, + "loss": 0.793, + "step": 2102 + }, + { + "epoch": 0.03633881669892176, + "grad_norm": 1.279113657521204, + "learning_rate": 1.9997902271483207e-05, + "loss": 0.8551, + "step": 2103 + }, + { + "epoch": 0.03635609621233066, + "grad_norm": 1.3167192388255167, + "learning_rate": 1.9997890793229546e-05, + "loss": 0.9659, + "step": 2104 + }, + { + "epoch": 0.036373375725739565, + "grad_norm": 1.2643138778211351, + "learning_rate": 1.9997879283661732e-05, + "loss": 0.6158, + "step": 2105 + }, + { + "epoch": 0.036390655239148466, + "grad_norm": 1.5706682254332023, + "learning_rate": 1.99978677427798e-05, + "loss": 0.9201, + "step": 2106 + }, + { + "epoch": 0.03640793475255737, + "grad_norm": 1.3768207019959653, + "learning_rate": 1.9997856170583786e-05, + "loss": 0.913, + "step": 2107 + }, + { + "epoch": 0.03642521426596627, + "grad_norm": 1.1817553737245763, + "learning_rate": 1.9997844567073726e-05, + "loss": 0.8198, + "step": 2108 + }, + { + "epoch": 0.036442493779375175, + "grad_norm": 1.3638893718277842, + "learning_rate": 1.999783293224966e-05, + "loss": 0.7067, + "step": 2109 + }, + { + "epoch": 0.036459773292784076, + "grad_norm": 1.2015890190921559, + "learning_rate": 1.999782126611162e-05, + "loss": 0.5659, + "step": 2110 + }, + { + "epoch": 0.036477052806192976, + "grad_norm": 1.820402012383759, + "learning_rate": 1.999780956865964e-05, + "loss": 1.1438, + "step": 2111 + }, + { + "epoch": 0.03649433231960188, + "grad_norm": 1.3015481540611569, + "learning_rate": 1.9997797839893766e-05, + "loss": 0.7807, + "step": 2112 + }, + { + "epoch": 0.036511611833010785, + "grad_norm": 1.701874298394503, + "learning_rate": 1.999778607981403e-05, + "loss": 0.9127, + "step": 2113 + }, + { + "epoch": 0.036528891346419685, + "grad_norm": 1.2914010273407557, + "learning_rate": 1.999777428842046e-05, + "loss": 0.7955, + "step": 2114 + }, + { + "epoch": 0.036546170859828586, + "grad_norm": 1.7124353086316673, + "learning_rate": 1.999776246571311e-05, + "loss": 1.0869, + "step": 2115 + }, + { + "epoch": 0.03656345037323749, + "grad_norm": 1.6873933478437522, + "learning_rate": 1.9997750611692005e-05, + "loss": 0.918, + "step": 2116 + }, + { + "epoch": 0.036580729886646395, + "grad_norm": 1.003355806160177, + "learning_rate": 1.9997738726357184e-05, + "loss": 0.8, + "step": 2117 + }, + { + "epoch": 0.036598009400055295, + "grad_norm": 1.3938074804422433, + "learning_rate": 1.9997726809708686e-05, + "loss": 0.8103, + "step": 2118 + }, + { + "epoch": 0.036615288913464196, + "grad_norm": 1.6077959558056072, + "learning_rate": 1.9997714861746548e-05, + "loss": 0.9066, + "step": 2119 + }, + { + "epoch": 0.0366325684268731, + "grad_norm": 1.5974956077693216, + "learning_rate": 1.999770288247081e-05, + "loss": 0.9442, + "step": 2120 + }, + { + "epoch": 0.036649847940282004, + "grad_norm": 1.4665090139192563, + "learning_rate": 1.99976908718815e-05, + "loss": 1.0609, + "step": 2121 + }, + { + "epoch": 0.036667127453690905, + "grad_norm": 1.3353217732306388, + "learning_rate": 1.9997678829978667e-05, + "loss": 0.8094, + "step": 2122 + }, + { + "epoch": 0.036684406967099806, + "grad_norm": 1.5546461586766012, + "learning_rate": 1.999766675676235e-05, + "loss": 0.8671, + "step": 2123 + }, + { + "epoch": 0.036701686480508706, + "grad_norm": 1.4796550581539871, + "learning_rate": 1.9997654652232572e-05, + "loss": 0.601, + "step": 2124 + }, + { + "epoch": 0.036718965993917614, + "grad_norm": 1.3639531934706142, + "learning_rate": 1.9997642516389382e-05, + "loss": 0.8339, + "step": 2125 + }, + { + "epoch": 0.036736245507326515, + "grad_norm": 1.4892628806417318, + "learning_rate": 1.9997630349232818e-05, + "loss": 0.8731, + "step": 2126 + }, + { + "epoch": 0.036753525020735416, + "grad_norm": 0.882461701147723, + "learning_rate": 1.9997618150762916e-05, + "loss": 0.8113, + "step": 2127 + }, + { + "epoch": 0.036770804534144316, + "grad_norm": 0.8673263190357697, + "learning_rate": 1.999760592097971e-05, + "loss": 0.8198, + "step": 2128 + }, + { + "epoch": 0.036788084047553224, + "grad_norm": 1.159378217690656, + "learning_rate": 1.9997593659883244e-05, + "loss": 0.626, + "step": 2129 + }, + { + "epoch": 0.036805363560962125, + "grad_norm": 1.3520102920127626, + "learning_rate": 1.9997581367473556e-05, + "loss": 0.5204, + "step": 2130 + }, + { + "epoch": 0.036822643074371025, + "grad_norm": 1.3279243786323036, + "learning_rate": 1.9997569043750683e-05, + "loss": 0.803, + "step": 2131 + }, + { + "epoch": 0.036839922587779926, + "grad_norm": 1.3246117397842871, + "learning_rate": 1.999755668871467e-05, + "loss": 0.6539, + "step": 2132 + }, + { + "epoch": 0.036857202101188834, + "grad_norm": 1.2450610646615385, + "learning_rate": 1.9997544302365537e-05, + "loss": 0.7738, + "step": 2133 + }, + { + "epoch": 0.036874481614597734, + "grad_norm": 1.4464870184914267, + "learning_rate": 1.9997531884703346e-05, + "loss": 1.0892, + "step": 2134 + }, + { + "epoch": 0.036891761128006635, + "grad_norm": 1.5131613815333478, + "learning_rate": 1.9997519435728122e-05, + "loss": 0.7562, + "step": 2135 + }, + { + "epoch": 0.036909040641415536, + "grad_norm": 1.7555509419710134, + "learning_rate": 1.9997506955439906e-05, + "loss": 0.8123, + "step": 2136 + }, + { + "epoch": 0.036926320154824444, + "grad_norm": 1.3561688607744944, + "learning_rate": 1.9997494443838742e-05, + "loss": 0.6787, + "step": 2137 + }, + { + "epoch": 0.036943599668233344, + "grad_norm": 1.3335827720038709, + "learning_rate": 1.9997481900924664e-05, + "loss": 0.6689, + "step": 2138 + }, + { + "epoch": 0.036960879181642245, + "grad_norm": 1.3504043398479386, + "learning_rate": 1.999746932669771e-05, + "loss": 0.8476, + "step": 2139 + }, + { + "epoch": 0.036978158695051146, + "grad_norm": 1.3976316770203911, + "learning_rate": 1.9997456721157925e-05, + "loss": 0.779, + "step": 2140 + }, + { + "epoch": 0.036995438208460046, + "grad_norm": 1.4441176950980128, + "learning_rate": 1.9997444084305345e-05, + "loss": 0.9784, + "step": 2141 + }, + { + "epoch": 0.037012717721868954, + "grad_norm": 1.1359443319154192, + "learning_rate": 1.999743141614001e-05, + "loss": 0.7948, + "step": 2142 + }, + { + "epoch": 0.037029997235277855, + "grad_norm": 1.7295608621254996, + "learning_rate": 1.999741871666196e-05, + "loss": 0.9427, + "step": 2143 + }, + { + "epoch": 0.037047276748686755, + "grad_norm": 2.083419164660137, + "learning_rate": 1.9997405985871235e-05, + "loss": 0.4727, + "step": 2144 + }, + { + "epoch": 0.037064556262095656, + "grad_norm": 1.6052880974269859, + "learning_rate": 1.9997393223767877e-05, + "loss": 0.6804, + "step": 2145 + }, + { + "epoch": 0.037081835775504564, + "grad_norm": 1.0157873981288907, + "learning_rate": 1.999738043035192e-05, + "loss": 0.7786, + "step": 2146 + }, + { + "epoch": 0.037099115288913465, + "grad_norm": 1.5280303711509715, + "learning_rate": 1.999736760562341e-05, + "loss": 0.8955, + "step": 2147 + }, + { + "epoch": 0.037116394802322365, + "grad_norm": 1.5543647237918892, + "learning_rate": 1.9997354749582386e-05, + "loss": 0.8444, + "step": 2148 + }, + { + "epoch": 0.037133674315731266, + "grad_norm": 1.1936043581325877, + "learning_rate": 1.9997341862228886e-05, + "loss": 0.8744, + "step": 2149 + }, + { + "epoch": 0.037150953829140174, + "grad_norm": 1.5874974895821967, + "learning_rate": 1.999732894356295e-05, + "loss": 0.8675, + "step": 2150 + }, + { + "epoch": 0.037168233342549074, + "grad_norm": 1.3014970474969583, + "learning_rate": 1.999731599358462e-05, + "loss": 0.8816, + "step": 2151 + }, + { + "epoch": 0.037185512855957975, + "grad_norm": 1.4918467672221063, + "learning_rate": 1.9997303012293935e-05, + "loss": 0.7927, + "step": 2152 + }, + { + "epoch": 0.037202792369366876, + "grad_norm": 1.3290505977256355, + "learning_rate": 1.999728999969094e-05, + "loss": 0.6165, + "step": 2153 + }, + { + "epoch": 0.03722007188277578, + "grad_norm": 1.4807172026380608, + "learning_rate": 1.9997276955775674e-05, + "loss": 0.9145, + "step": 2154 + }, + { + "epoch": 0.037237351396184684, + "grad_norm": 1.2378076098226907, + "learning_rate": 1.9997263880548175e-05, + "loss": 0.7624, + "step": 2155 + }, + { + "epoch": 0.037254630909593585, + "grad_norm": 1.2263693156450144, + "learning_rate": 1.9997250774008488e-05, + "loss": 0.7748, + "step": 2156 + }, + { + "epoch": 0.037271910423002486, + "grad_norm": 1.6606903846050973, + "learning_rate": 1.999723763615665e-05, + "loss": 0.8181, + "step": 2157 + }, + { + "epoch": 0.03728918993641139, + "grad_norm": 1.7796816991364475, + "learning_rate": 1.9997224466992704e-05, + "loss": 0.7912, + "step": 2158 + }, + { + "epoch": 0.037306469449820294, + "grad_norm": 4.17744824369689, + "learning_rate": 1.999721126651669e-05, + "loss": 0.9492, + "step": 2159 + }, + { + "epoch": 0.037323748963229195, + "grad_norm": 1.5171990435915583, + "learning_rate": 1.9997198034728652e-05, + "loss": 0.5771, + "step": 2160 + }, + { + "epoch": 0.037341028476638095, + "grad_norm": 1.097644939115907, + "learning_rate": 1.999718477162863e-05, + "loss": 0.9661, + "step": 2161 + }, + { + "epoch": 0.037358307990047, + "grad_norm": 1.9218030067906655, + "learning_rate": 1.9997171477216665e-05, + "loss": 0.5585, + "step": 2162 + }, + { + "epoch": 0.037375587503455904, + "grad_norm": 1.4735020232568203, + "learning_rate": 1.99971581514928e-05, + "loss": 0.886, + "step": 2163 + }, + { + "epoch": 0.037392867016864804, + "grad_norm": 1.5370869462007646, + "learning_rate": 1.9997144794457076e-05, + "loss": 0.6783, + "step": 2164 + }, + { + "epoch": 0.037410146530273705, + "grad_norm": 1.2000577377859076, + "learning_rate": 1.9997131406109533e-05, + "loss": 0.7682, + "step": 2165 + }, + { + "epoch": 0.03742742604368261, + "grad_norm": 1.5843117212933653, + "learning_rate": 1.9997117986450215e-05, + "loss": 0.9721, + "step": 2166 + }, + { + "epoch": 0.037444705557091514, + "grad_norm": 1.6408681443673307, + "learning_rate": 1.9997104535479165e-05, + "loss": 0.7067, + "step": 2167 + }, + { + "epoch": 0.037461985070500414, + "grad_norm": 1.5242552036185923, + "learning_rate": 1.999709105319642e-05, + "loss": 0.9303, + "step": 2168 + }, + { + "epoch": 0.037479264583909315, + "grad_norm": 1.7060138513342713, + "learning_rate": 1.9997077539602033e-05, + "loss": 0.8272, + "step": 2169 + }, + { + "epoch": 0.03749654409731822, + "grad_norm": 1.2941985953735224, + "learning_rate": 1.9997063994696036e-05, + "loss": 0.8605, + "step": 2170 + }, + { + "epoch": 0.03751382361072712, + "grad_norm": 1.7764996061272353, + "learning_rate": 1.9997050418478476e-05, + "loss": 0.7701, + "step": 2171 + }, + { + "epoch": 0.037531103124136024, + "grad_norm": 1.9283873193591758, + "learning_rate": 1.999703681094939e-05, + "loss": 0.8798, + "step": 2172 + }, + { + "epoch": 0.037548382637544925, + "grad_norm": 1.9164210781026583, + "learning_rate": 1.999702317210883e-05, + "loss": 0.8458, + "step": 2173 + }, + { + "epoch": 0.03756566215095383, + "grad_norm": 1.2806433298307236, + "learning_rate": 1.999700950195683e-05, + "loss": 0.6724, + "step": 2174 + }, + { + "epoch": 0.03758294166436273, + "grad_norm": 1.5127569127152358, + "learning_rate": 1.999699580049344e-05, + "loss": 0.8811, + "step": 2175 + }, + { + "epoch": 0.037600221177771634, + "grad_norm": 1.4839731492269526, + "learning_rate": 1.9996982067718694e-05, + "loss": 0.7353, + "step": 2176 + }, + { + "epoch": 0.037617500691180535, + "grad_norm": 1.899258309065385, + "learning_rate": 1.9996968303632646e-05, + "loss": 0.7942, + "step": 2177 + }, + { + "epoch": 0.037634780204589435, + "grad_norm": 1.357132425066542, + "learning_rate": 1.9996954508235333e-05, + "loss": 0.7012, + "step": 2178 + }, + { + "epoch": 0.03765205971799834, + "grad_norm": 1.7927822903583102, + "learning_rate": 1.9996940681526796e-05, + "loss": 0.5181, + "step": 2179 + }, + { + "epoch": 0.037669339231407244, + "grad_norm": 1.4570590635147604, + "learning_rate": 1.9996926823507084e-05, + "loss": 0.7798, + "step": 2180 + }, + { + "epoch": 0.037686618744816144, + "grad_norm": 1.8569978645176148, + "learning_rate": 1.9996912934176237e-05, + "loss": 0.7934, + "step": 2181 + }, + { + "epoch": 0.037703898258225045, + "grad_norm": 1.1950277933710902, + "learning_rate": 1.9996899013534294e-05, + "loss": 0.7386, + "step": 2182 + }, + { + "epoch": 0.03772117777163395, + "grad_norm": 1.672970126208459, + "learning_rate": 1.999688506158131e-05, + "loss": 0.9193, + "step": 2183 + }, + { + "epoch": 0.03773845728504285, + "grad_norm": 1.3825367597633684, + "learning_rate": 1.999687107831732e-05, + "loss": 0.6403, + "step": 2184 + }, + { + "epoch": 0.037755736798451754, + "grad_norm": 1.5521865848989496, + "learning_rate": 1.9996857063742367e-05, + "loss": 0.8847, + "step": 2185 + }, + { + "epoch": 0.037773016311860655, + "grad_norm": 1.406485667604837, + "learning_rate": 1.99968430178565e-05, + "loss": 0.9047, + "step": 2186 + }, + { + "epoch": 0.03779029582526956, + "grad_norm": 1.462797102064687, + "learning_rate": 1.9996828940659763e-05, + "loss": 0.765, + "step": 2187 + }, + { + "epoch": 0.03780757533867846, + "grad_norm": 1.070417238956573, + "learning_rate": 1.9996814832152198e-05, + "loss": 0.7203, + "step": 2188 + }, + { + "epoch": 0.037824854852087364, + "grad_norm": 1.0557107986096765, + "learning_rate": 1.9996800692333846e-05, + "loss": 0.6623, + "step": 2189 + }, + { + "epoch": 0.037842134365496265, + "grad_norm": 1.2479835205843661, + "learning_rate": 1.9996786521204753e-05, + "loss": 0.6755, + "step": 2190 + }, + { + "epoch": 0.03785941387890517, + "grad_norm": 1.5075766455343789, + "learning_rate": 1.999677231876497e-05, + "loss": 0.8784, + "step": 2191 + }, + { + "epoch": 0.03787669339231407, + "grad_norm": 1.398738793774476, + "learning_rate": 1.9996758085014535e-05, + "loss": 0.772, + "step": 2192 + }, + { + "epoch": 0.037893972905722974, + "grad_norm": 1.5287793209554172, + "learning_rate": 1.9996743819953496e-05, + "loss": 0.9232, + "step": 2193 + }, + { + "epoch": 0.037911252419131874, + "grad_norm": 1.4047512548798062, + "learning_rate": 1.999672952358189e-05, + "loss": 0.7732, + "step": 2194 + }, + { + "epoch": 0.03792853193254078, + "grad_norm": 1.3380548821844718, + "learning_rate": 1.9996715195899773e-05, + "loss": 0.7291, + "step": 2195 + }, + { + "epoch": 0.03794581144594968, + "grad_norm": 1.7162396334725465, + "learning_rate": 1.9996700836907178e-05, + "loss": 0.8314, + "step": 2196 + }, + { + "epoch": 0.037963090959358584, + "grad_norm": 1.3739663847704502, + "learning_rate": 1.9996686446604162e-05, + "loss": 0.6083, + "step": 2197 + }, + { + "epoch": 0.037980370472767484, + "grad_norm": 2.2668662077221287, + "learning_rate": 1.9996672024990763e-05, + "loss": 0.8071, + "step": 2198 + }, + { + "epoch": 0.03799764998617639, + "grad_norm": 1.8573081911763993, + "learning_rate": 1.9996657572067025e-05, + "loss": 0.697, + "step": 2199 + }, + { + "epoch": 0.03801492949958529, + "grad_norm": 1.3388021688056042, + "learning_rate": 1.9996643087832995e-05, + "loss": 0.7716, + "step": 2200 + }, + { + "epoch": 0.03803220901299419, + "grad_norm": 1.1311986824147844, + "learning_rate": 1.9996628572288725e-05, + "loss": 0.7766, + "step": 2201 + }, + { + "epoch": 0.038049488526403094, + "grad_norm": 1.3483222519053364, + "learning_rate": 1.999661402543425e-05, + "loss": 0.7802, + "step": 2202 + }, + { + "epoch": 0.038066768039812, + "grad_norm": 1.2557545264427652, + "learning_rate": 1.9996599447269624e-05, + "loss": 0.7808, + "step": 2203 + }, + { + "epoch": 0.0380840475532209, + "grad_norm": 0.9316911714951904, + "learning_rate": 1.9996584837794885e-05, + "loss": 0.7517, + "step": 2204 + }, + { + "epoch": 0.0381013270666298, + "grad_norm": 1.1597134373765567, + "learning_rate": 1.9996570197010084e-05, + "loss": 0.7703, + "step": 2205 + }, + { + "epoch": 0.038118606580038704, + "grad_norm": 1.7238437134463158, + "learning_rate": 1.9996555524915263e-05, + "loss": 0.7517, + "step": 2206 + }, + { + "epoch": 0.03813588609344761, + "grad_norm": 1.4133563157585565, + "learning_rate": 1.9996540821510476e-05, + "loss": 0.7876, + "step": 2207 + }, + { + "epoch": 0.03815316560685651, + "grad_norm": 1.284110745972597, + "learning_rate": 1.999652608679576e-05, + "loss": 0.8845, + "step": 2208 + }, + { + "epoch": 0.03817044512026541, + "grad_norm": 1.4399255136222102, + "learning_rate": 1.999651132077116e-05, + "loss": 0.9251, + "step": 2209 + }, + { + "epoch": 0.038187724633674314, + "grad_norm": 1.5618814278394977, + "learning_rate": 1.9996496523436735e-05, + "loss": 0.9302, + "step": 2210 + }, + { + "epoch": 0.03820500414708322, + "grad_norm": 1.3135796408948877, + "learning_rate": 1.999648169479252e-05, + "loss": 0.7932, + "step": 2211 + }, + { + "epoch": 0.03822228366049212, + "grad_norm": 1.559677553582142, + "learning_rate": 1.9996466834838564e-05, + "loss": 0.7863, + "step": 2212 + }, + { + "epoch": 0.03823956317390102, + "grad_norm": 1.3726282760223467, + "learning_rate": 1.9996451943574913e-05, + "loss": 0.7329, + "step": 2213 + }, + { + "epoch": 0.038256842687309923, + "grad_norm": 1.2658376669961795, + "learning_rate": 1.9996437021001614e-05, + "loss": 0.997, + "step": 2214 + }, + { + "epoch": 0.03827412220071883, + "grad_norm": 1.3634479270218518, + "learning_rate": 1.999642206711872e-05, + "loss": 0.6647, + "step": 2215 + }, + { + "epoch": 0.03829140171412773, + "grad_norm": 1.4777990083697428, + "learning_rate": 1.9996407081926266e-05, + "loss": 0.9384, + "step": 2216 + }, + { + "epoch": 0.03830868122753663, + "grad_norm": 1.0754526656239205, + "learning_rate": 1.9996392065424312e-05, + "loss": 0.6227, + "step": 2217 + }, + { + "epoch": 0.03832596074094553, + "grad_norm": 1.514290189808972, + "learning_rate": 1.9996377017612896e-05, + "loss": 0.9882, + "step": 2218 + }, + { + "epoch": 0.038343240254354434, + "grad_norm": 1.1421824721781226, + "learning_rate": 1.9996361938492062e-05, + "loss": 0.9244, + "step": 2219 + }, + { + "epoch": 0.03836051976776334, + "grad_norm": 1.2044073740913903, + "learning_rate": 1.999634682806187e-05, + "loss": 0.8165, + "step": 2220 + }, + { + "epoch": 0.03837779928117224, + "grad_norm": 1.5252340365875674, + "learning_rate": 1.9996331686322356e-05, + "loss": 0.8957, + "step": 2221 + }, + { + "epoch": 0.03839507879458114, + "grad_norm": 1.5223001549016841, + "learning_rate": 1.9996316513273574e-05, + "loss": 1.0583, + "step": 2222 + }, + { + "epoch": 0.038412358307990044, + "grad_norm": 1.3875320246610467, + "learning_rate": 1.9996301308915565e-05, + "loss": 0.7366, + "step": 2223 + }, + { + "epoch": 0.03842963782139895, + "grad_norm": 1.4616717212147234, + "learning_rate": 1.9996286073248386e-05, + "loss": 0.5081, + "step": 2224 + }, + { + "epoch": 0.03844691733480785, + "grad_norm": 1.421874628602246, + "learning_rate": 1.9996270806272077e-05, + "loss": 0.8085, + "step": 2225 + }, + { + "epoch": 0.03846419684821675, + "grad_norm": 1.380328445765962, + "learning_rate": 1.9996255507986686e-05, + "loss": 0.829, + "step": 2226 + }, + { + "epoch": 0.038481476361625654, + "grad_norm": 1.2385671235442364, + "learning_rate": 1.9996240178392267e-05, + "loss": 1.0869, + "step": 2227 + }, + { + "epoch": 0.03849875587503456, + "grad_norm": 1.6848990515307862, + "learning_rate": 1.9996224817488857e-05, + "loss": 0.8059, + "step": 2228 + }, + { + "epoch": 0.03851603538844346, + "grad_norm": 1.6207052994748452, + "learning_rate": 1.9996209425276518e-05, + "loss": 0.7832, + "step": 2229 + }, + { + "epoch": 0.03853331490185236, + "grad_norm": 1.447818767428122, + "learning_rate": 1.999619400175529e-05, + "loss": 0.9474, + "step": 2230 + }, + { + "epoch": 0.03855059441526126, + "grad_norm": 1.3998159247146154, + "learning_rate": 1.9996178546925223e-05, + "loss": 0.9296, + "step": 2231 + }, + { + "epoch": 0.03856787392867017, + "grad_norm": 1.2761538166313964, + "learning_rate": 1.9996163060786363e-05, + "loss": 0.8377, + "step": 2232 + }, + { + "epoch": 0.03858515344207907, + "grad_norm": 0.7804868951895217, + "learning_rate": 1.9996147543338762e-05, + "loss": 0.844, + "step": 2233 + }, + { + "epoch": 0.03860243295548797, + "grad_norm": 1.4995076087599195, + "learning_rate": 1.9996131994582466e-05, + "loss": 0.7521, + "step": 2234 + }, + { + "epoch": 0.03861971246889687, + "grad_norm": 0.872058731265352, + "learning_rate": 1.9996116414517525e-05, + "loss": 0.5454, + "step": 2235 + }, + { + "epoch": 0.03863699198230578, + "grad_norm": 1.4399160561506432, + "learning_rate": 1.999610080314399e-05, + "loss": 0.8746, + "step": 2236 + }, + { + "epoch": 0.03865427149571468, + "grad_norm": 1.5747870417777468, + "learning_rate": 1.9996085160461907e-05, + "loss": 0.8279, + "step": 2237 + }, + { + "epoch": 0.03867155100912358, + "grad_norm": 1.3594749949330598, + "learning_rate": 1.9996069486471325e-05, + "loss": 0.5887, + "step": 2238 + }, + { + "epoch": 0.03868883052253248, + "grad_norm": 1.5836863915545245, + "learning_rate": 1.9996053781172295e-05, + "loss": 0.7465, + "step": 2239 + }, + { + "epoch": 0.03870611003594139, + "grad_norm": 1.321338195327259, + "learning_rate": 1.999603804456486e-05, + "loss": 0.8935, + "step": 2240 + }, + { + "epoch": 0.03872338954935029, + "grad_norm": 1.5167894263402801, + "learning_rate": 1.999602227664908e-05, + "loss": 0.6881, + "step": 2241 + }, + { + "epoch": 0.03874066906275919, + "grad_norm": 1.4733582189594747, + "learning_rate": 1.9996006477424997e-05, + "loss": 0.9571, + "step": 2242 + }, + { + "epoch": 0.03875794857616809, + "grad_norm": 1.6532460406089304, + "learning_rate": 1.999599064689266e-05, + "loss": 0.828, + "step": 2243 + }, + { + "epoch": 0.038775228089577, + "grad_norm": 0.9088822038305936, + "learning_rate": 1.9995974785052123e-05, + "loss": 0.682, + "step": 2244 + }, + { + "epoch": 0.0387925076029859, + "grad_norm": 0.872132871233789, + "learning_rate": 1.999595889190343e-05, + "loss": 0.9423, + "step": 2245 + }, + { + "epoch": 0.0388097871163948, + "grad_norm": 1.218218803376703, + "learning_rate": 1.9995942967446637e-05, + "loss": 0.9644, + "step": 2246 + }, + { + "epoch": 0.0388270666298037, + "grad_norm": 1.533119670433226, + "learning_rate": 1.999592701168179e-05, + "loss": 0.7394, + "step": 2247 + }, + { + "epoch": 0.03884434614321261, + "grad_norm": 1.7974218208790975, + "learning_rate": 1.999591102460894e-05, + "loss": 0.5656, + "step": 2248 + }, + { + "epoch": 0.03886162565662151, + "grad_norm": 1.3787452912882543, + "learning_rate": 1.9995895006228137e-05, + "loss": 0.7693, + "step": 2249 + }, + { + "epoch": 0.03887890517003041, + "grad_norm": 1.455585926968805, + "learning_rate": 1.9995878956539433e-05, + "loss": 0.6307, + "step": 2250 + }, + { + "epoch": 0.03889618468343931, + "grad_norm": 1.6528297681364228, + "learning_rate": 1.9995862875542876e-05, + "loss": 0.8351, + "step": 2251 + }, + { + "epoch": 0.03891346419684822, + "grad_norm": 1.2202497452041465, + "learning_rate": 1.9995846763238514e-05, + "loss": 0.8349, + "step": 2252 + }, + { + "epoch": 0.03893074371025712, + "grad_norm": 1.4741646889368611, + "learning_rate": 1.9995830619626398e-05, + "loss": 0.7956, + "step": 2253 + }, + { + "epoch": 0.03894802322366602, + "grad_norm": 1.4364085896201906, + "learning_rate": 1.9995814444706586e-05, + "loss": 0.7294, + "step": 2254 + }, + { + "epoch": 0.03896530273707492, + "grad_norm": 1.5071671157680866, + "learning_rate": 1.9995798238479125e-05, + "loss": 0.6946, + "step": 2255 + }, + { + "epoch": 0.03898258225048382, + "grad_norm": 1.7185787450421994, + "learning_rate": 1.9995782000944062e-05, + "loss": 0.824, + "step": 2256 + }, + { + "epoch": 0.03899986176389273, + "grad_norm": 1.0691074458617607, + "learning_rate": 1.9995765732101446e-05, + "loss": 0.7737, + "step": 2257 + }, + { + "epoch": 0.03901714127730163, + "grad_norm": 1.3123634387953673, + "learning_rate": 1.9995749431951336e-05, + "loss": 0.892, + "step": 2258 + }, + { + "epoch": 0.03903442079071053, + "grad_norm": 1.513128327633324, + "learning_rate": 1.999573310049378e-05, + "loss": 0.9467, + "step": 2259 + }, + { + "epoch": 0.03905170030411943, + "grad_norm": 1.2308049766675253, + "learning_rate": 1.9995716737728827e-05, + "loss": 0.8317, + "step": 2260 + }, + { + "epoch": 0.03906897981752834, + "grad_norm": 1.3054895926378025, + "learning_rate": 1.999570034365653e-05, + "loss": 0.6804, + "step": 2261 + }, + { + "epoch": 0.03908625933093724, + "grad_norm": 1.2794212728666203, + "learning_rate": 1.9995683918276942e-05, + "loss": 0.5854, + "step": 2262 + }, + { + "epoch": 0.03910353884434614, + "grad_norm": 1.4034848101064692, + "learning_rate": 1.999566746159011e-05, + "loss": 0.8794, + "step": 2263 + }, + { + "epoch": 0.03912081835775504, + "grad_norm": 1.7558540041937585, + "learning_rate": 1.9995650973596087e-05, + "loss": 0.8325, + "step": 2264 + }, + { + "epoch": 0.03913809787116395, + "grad_norm": 1.5047233268157036, + "learning_rate": 1.9995634454294922e-05, + "loss": 0.707, + "step": 2265 + }, + { + "epoch": 0.03915537738457285, + "grad_norm": 1.0680968268503643, + "learning_rate": 1.9995617903686677e-05, + "loss": 0.661, + "step": 2266 + }, + { + "epoch": 0.03917265689798175, + "grad_norm": 1.6039641656091792, + "learning_rate": 1.9995601321771393e-05, + "loss": 0.8323, + "step": 2267 + }, + { + "epoch": 0.03918993641139065, + "grad_norm": 1.2663476557314703, + "learning_rate": 1.999558470854913e-05, + "loss": 0.6831, + "step": 2268 + }, + { + "epoch": 0.03920721592479956, + "grad_norm": 1.5829165015516444, + "learning_rate": 1.9995568064019934e-05, + "loss": 0.7077, + "step": 2269 + }, + { + "epoch": 0.03922449543820846, + "grad_norm": 1.0616238570863978, + "learning_rate": 1.9995551388183858e-05, + "loss": 0.738, + "step": 2270 + }, + { + "epoch": 0.03924177495161736, + "grad_norm": 1.5240271094194264, + "learning_rate": 1.9995534681040956e-05, + "loss": 0.9325, + "step": 2271 + }, + { + "epoch": 0.03925905446502626, + "grad_norm": 1.5282869915306374, + "learning_rate": 1.999551794259128e-05, + "loss": 0.8152, + "step": 2272 + }, + { + "epoch": 0.03927633397843517, + "grad_norm": 1.38945410570834, + "learning_rate": 1.9995501172834882e-05, + "loss": 0.5093, + "step": 2273 + }, + { + "epoch": 0.03929361349184407, + "grad_norm": 1.7590107675599171, + "learning_rate": 1.9995484371771815e-05, + "loss": 0.9633, + "step": 2274 + }, + { + "epoch": 0.03931089300525297, + "grad_norm": 1.127735871506338, + "learning_rate": 1.9995467539402133e-05, + "loss": 0.7133, + "step": 2275 + }, + { + "epoch": 0.03932817251866187, + "grad_norm": 1.3435250624422874, + "learning_rate": 1.9995450675725882e-05, + "loss": 0.5452, + "step": 2276 + }, + { + "epoch": 0.03934545203207078, + "grad_norm": 1.3301444163877976, + "learning_rate": 1.9995433780743124e-05, + "loss": 0.6835, + "step": 2277 + }, + { + "epoch": 0.03936273154547968, + "grad_norm": 1.7231290022162413, + "learning_rate": 1.9995416854453907e-05, + "loss": 0.772, + "step": 2278 + }, + { + "epoch": 0.03938001105888858, + "grad_norm": 1.5520341416745407, + "learning_rate": 1.9995399896858284e-05, + "loss": 0.791, + "step": 2279 + }, + { + "epoch": 0.03939729057229748, + "grad_norm": 4.836824242655808, + "learning_rate": 1.999538290795631e-05, + "loss": 0.742, + "step": 2280 + }, + { + "epoch": 0.03941457008570639, + "grad_norm": 3.825687363692499, + "learning_rate": 1.9995365887748038e-05, + "loss": 0.6811, + "step": 2281 + }, + { + "epoch": 0.03943184959911529, + "grad_norm": 1.5361186338496837, + "learning_rate": 1.9995348836233517e-05, + "loss": 0.8258, + "step": 2282 + }, + { + "epoch": 0.03944912911252419, + "grad_norm": 1.2042633566924705, + "learning_rate": 1.9995331753412804e-05, + "loss": 0.6347, + "step": 2283 + }, + { + "epoch": 0.03946640862593309, + "grad_norm": 0.7772243632606356, + "learning_rate": 1.9995314639285957e-05, + "loss": 0.4592, + "step": 2284 + }, + { + "epoch": 0.039483688139342, + "grad_norm": 1.3829658714092354, + "learning_rate": 1.9995297493853022e-05, + "loss": 0.5738, + "step": 2285 + }, + { + "epoch": 0.0395009676527509, + "grad_norm": 1.8148014929381575, + "learning_rate": 1.9995280317114055e-05, + "loss": 0.9777, + "step": 2286 + }, + { + "epoch": 0.0395182471661598, + "grad_norm": 1.1234175642139257, + "learning_rate": 1.999526310906911e-05, + "loss": 0.8093, + "step": 2287 + }, + { + "epoch": 0.0395355266795687, + "grad_norm": 1.3927725053404756, + "learning_rate": 1.9995245869718242e-05, + "loss": 0.8979, + "step": 2288 + }, + { + "epoch": 0.03955280619297761, + "grad_norm": 0.8644562443399858, + "learning_rate": 1.9995228599061505e-05, + "loss": 0.633, + "step": 2289 + }, + { + "epoch": 0.03957008570638651, + "grad_norm": 1.483170183479937, + "learning_rate": 1.9995211297098956e-05, + "loss": 0.874, + "step": 2290 + }, + { + "epoch": 0.03958736521979541, + "grad_norm": 1.4513923776281803, + "learning_rate": 1.9995193963830638e-05, + "loss": 0.6778, + "step": 2291 + }, + { + "epoch": 0.03960464473320431, + "grad_norm": 1.9192352537101416, + "learning_rate": 1.999517659925662e-05, + "loss": 0.8939, + "step": 2292 + }, + { + "epoch": 0.03962192424661322, + "grad_norm": 1.0569469538193936, + "learning_rate": 1.9995159203376943e-05, + "loss": 0.6282, + "step": 2293 + }, + { + "epoch": 0.03963920376002212, + "grad_norm": 1.6151918482061698, + "learning_rate": 1.9995141776191672e-05, + "loss": 0.9774, + "step": 2294 + }, + { + "epoch": 0.03965648327343102, + "grad_norm": 1.3295056622310564, + "learning_rate": 1.9995124317700856e-05, + "loss": 0.8459, + "step": 2295 + }, + { + "epoch": 0.03967376278683992, + "grad_norm": 1.4431984600214829, + "learning_rate": 1.9995106827904552e-05, + "loss": 0.9452, + "step": 2296 + }, + { + "epoch": 0.03969104230024882, + "grad_norm": 1.298412068263796, + "learning_rate": 1.9995089306802812e-05, + "loss": 0.6345, + "step": 2297 + }, + { + "epoch": 0.03970832181365773, + "grad_norm": 1.0627615682888822, + "learning_rate": 1.9995071754395693e-05, + "loss": 0.6245, + "step": 2298 + }, + { + "epoch": 0.03972560132706663, + "grad_norm": 1.888313539133396, + "learning_rate": 1.999505417068325e-05, + "loss": 0.9292, + "step": 2299 + }, + { + "epoch": 0.03974288084047553, + "grad_norm": 1.9184539187855283, + "learning_rate": 1.999503655566554e-05, + "loss": 0.7135, + "step": 2300 + }, + { + "epoch": 0.03976016035388443, + "grad_norm": 1.638073207505388, + "learning_rate": 1.9995018909342615e-05, + "loss": 0.8587, + "step": 2301 + }, + { + "epoch": 0.03977743986729334, + "grad_norm": 1.5909801665248646, + "learning_rate": 1.999500123171453e-05, + "loss": 1.1912, + "step": 2302 + }, + { + "epoch": 0.03979471938070224, + "grad_norm": 1.778593753764062, + "learning_rate": 1.999498352278134e-05, + "loss": 0.7804, + "step": 2303 + }, + { + "epoch": 0.03981199889411114, + "grad_norm": 1.6013047178331508, + "learning_rate": 1.9994965782543106e-05, + "loss": 1.0913, + "step": 2304 + }, + { + "epoch": 0.03982927840752004, + "grad_norm": 1.386994988499855, + "learning_rate": 1.9994948010999877e-05, + "loss": 0.7515, + "step": 2305 + }, + { + "epoch": 0.03984655792092895, + "grad_norm": 1.5332573241202831, + "learning_rate": 1.9994930208151712e-05, + "loss": 0.8796, + "step": 2306 + }, + { + "epoch": 0.03986383743433785, + "grad_norm": 1.1821659674950973, + "learning_rate": 1.9994912373998663e-05, + "loss": 0.8649, + "step": 2307 + }, + { + "epoch": 0.03988111694774675, + "grad_norm": 0.7670978394103167, + "learning_rate": 1.999489450854079e-05, + "loss": 0.5906, + "step": 2308 + }, + { + "epoch": 0.03989839646115565, + "grad_norm": 1.4568707497446811, + "learning_rate": 1.999487661177815e-05, + "loss": 0.8577, + "step": 2309 + }, + { + "epoch": 0.03991567597456456, + "grad_norm": 0.8814863832229408, + "learning_rate": 1.9994858683710798e-05, + "loss": 0.4882, + "step": 2310 + }, + { + "epoch": 0.03993295548797346, + "grad_norm": 1.1490363622175512, + "learning_rate": 1.9994840724338788e-05, + "loss": 0.5463, + "step": 2311 + }, + { + "epoch": 0.03995023500138236, + "grad_norm": 1.4450690103487802, + "learning_rate": 1.9994822733662175e-05, + "loss": 0.9363, + "step": 2312 + }, + { + "epoch": 0.03996751451479126, + "grad_norm": 1.5767048052929211, + "learning_rate": 1.999480471168102e-05, + "loss": 1.1027, + "step": 2313 + }, + { + "epoch": 0.03998479402820017, + "grad_norm": 1.5476235811624768, + "learning_rate": 1.9994786658395375e-05, + "loss": 0.8397, + "step": 2314 + }, + { + "epoch": 0.04000207354160907, + "grad_norm": 1.471150407366265, + "learning_rate": 1.9994768573805302e-05, + "loss": 0.8962, + "step": 2315 + }, + { + "epoch": 0.04001935305501797, + "grad_norm": 1.2544721951092492, + "learning_rate": 1.9994750457910853e-05, + "loss": 0.7846, + "step": 2316 + }, + { + "epoch": 0.04003663256842687, + "grad_norm": 1.416277881138392, + "learning_rate": 1.9994732310712083e-05, + "loss": 0.6533, + "step": 2317 + }, + { + "epoch": 0.04005391208183578, + "grad_norm": 1.3712205494511454, + "learning_rate": 1.9994714132209055e-05, + "loss": 0.5444, + "step": 2318 + }, + { + "epoch": 0.04007119159524468, + "grad_norm": 1.3655088117271974, + "learning_rate": 1.999469592240182e-05, + "loss": 0.7778, + "step": 2319 + }, + { + "epoch": 0.04008847110865358, + "grad_norm": 1.174237588667747, + "learning_rate": 1.9994677681290437e-05, + "loss": 0.875, + "step": 2320 + }, + { + "epoch": 0.04010575062206248, + "grad_norm": 1.1501592966832372, + "learning_rate": 1.999465940887497e-05, + "loss": 0.7723, + "step": 2321 + }, + { + "epoch": 0.04012303013547139, + "grad_norm": 1.34484686021327, + "learning_rate": 1.9994641105155463e-05, + "loss": 0.672, + "step": 2322 + }, + { + "epoch": 0.04014030964888029, + "grad_norm": 1.4965102135631296, + "learning_rate": 1.9994622770131985e-05, + "loss": 1.0455, + "step": 2323 + }, + { + "epoch": 0.04015758916228919, + "grad_norm": 1.5191662895836373, + "learning_rate": 1.9994604403804588e-05, + "loss": 0.823, + "step": 2324 + }, + { + "epoch": 0.04017486867569809, + "grad_norm": 1.5016539387035486, + "learning_rate": 1.999458600617333e-05, + "loss": 0.8073, + "step": 2325 + }, + { + "epoch": 0.040192148189107, + "grad_norm": 1.1813940048598843, + "learning_rate": 1.9994567577238272e-05, + "loss": 0.9808, + "step": 2326 + }, + { + "epoch": 0.0402094277025159, + "grad_norm": 1.5166742879231825, + "learning_rate": 1.9994549116999465e-05, + "loss": 0.9266, + "step": 2327 + }, + { + "epoch": 0.0402267072159248, + "grad_norm": 1.3081465093719407, + "learning_rate": 1.999453062545697e-05, + "loss": 0.907, + "step": 2328 + }, + { + "epoch": 0.0402439867293337, + "grad_norm": 1.0872514824924027, + "learning_rate": 1.9994512102610848e-05, + "loss": 0.7323, + "step": 2329 + }, + { + "epoch": 0.04026126624274261, + "grad_norm": 1.4324666599956668, + "learning_rate": 1.9994493548461156e-05, + "loss": 0.7915, + "step": 2330 + }, + { + "epoch": 0.04027854575615151, + "grad_norm": 1.3644080411786297, + "learning_rate": 1.9994474963007948e-05, + "loss": 0.8462, + "step": 2331 + }, + { + "epoch": 0.04029582526956041, + "grad_norm": 1.502976791944644, + "learning_rate": 1.9994456346251283e-05, + "loss": 0.9031, + "step": 2332 + }, + { + "epoch": 0.04031310478296931, + "grad_norm": 1.257020842684955, + "learning_rate": 1.9994437698191224e-05, + "loss": 0.8774, + "step": 2333 + }, + { + "epoch": 0.04033038429637821, + "grad_norm": 1.3753190238486404, + "learning_rate": 1.999441901882783e-05, + "loss": 0.6981, + "step": 2334 + }, + { + "epoch": 0.04034766380978712, + "grad_norm": 1.438414267814653, + "learning_rate": 1.999440030816115e-05, + "loss": 0.8608, + "step": 2335 + }, + { + "epoch": 0.04036494332319602, + "grad_norm": 1.5659481586030852, + "learning_rate": 1.999438156619125e-05, + "loss": 1.0697, + "step": 2336 + }, + { + "epoch": 0.04038222283660492, + "grad_norm": 1.0011468702678747, + "learning_rate": 1.999436279291819e-05, + "loss": 0.7269, + "step": 2337 + }, + { + "epoch": 0.04039950235001382, + "grad_norm": 1.0559119022372012, + "learning_rate": 1.999434398834202e-05, + "loss": 0.8849, + "step": 2338 + }, + { + "epoch": 0.04041678186342273, + "grad_norm": 1.1300738387554348, + "learning_rate": 1.999432515246281e-05, + "loss": 0.6862, + "step": 2339 + }, + { + "epoch": 0.04043406137683163, + "grad_norm": 1.3449434694030364, + "learning_rate": 1.9994306285280614e-05, + "loss": 0.7337, + "step": 2340 + }, + { + "epoch": 0.04045134089024053, + "grad_norm": 1.023463902827546, + "learning_rate": 1.9994287386795492e-05, + "loss": 0.6642, + "step": 2341 + }, + { + "epoch": 0.04046862040364943, + "grad_norm": 0.874313142035601, + "learning_rate": 1.99942684570075e-05, + "loss": 0.6396, + "step": 2342 + }, + { + "epoch": 0.04048589991705834, + "grad_norm": 1.4809490820741236, + "learning_rate": 1.9994249495916703e-05, + "loss": 0.8897, + "step": 2343 + }, + { + "epoch": 0.04050317943046724, + "grad_norm": 1.3813496337138949, + "learning_rate": 1.9994230503523155e-05, + "loss": 0.8344, + "step": 2344 + }, + { + "epoch": 0.04052045894387614, + "grad_norm": 1.676538260317511, + "learning_rate": 1.9994211479826918e-05, + "loss": 0.7998, + "step": 2345 + }, + { + "epoch": 0.04053773845728504, + "grad_norm": 1.4251041289577746, + "learning_rate": 1.999419242482805e-05, + "loss": 0.879, + "step": 2346 + }, + { + "epoch": 0.04055501797069395, + "grad_norm": 1.1687430818667406, + "learning_rate": 1.9994173338526613e-05, + "loss": 0.5899, + "step": 2347 + }, + { + "epoch": 0.04057229748410285, + "grad_norm": 0.6668762942358686, + "learning_rate": 1.9994154220922662e-05, + "loss": 0.6592, + "step": 2348 + }, + { + "epoch": 0.04058957699751175, + "grad_norm": 1.5885344268858113, + "learning_rate": 1.9994135072016264e-05, + "loss": 0.5801, + "step": 2349 + }, + { + "epoch": 0.04060685651092065, + "grad_norm": 1.240200437496339, + "learning_rate": 1.9994115891807478e-05, + "loss": 0.4732, + "step": 2350 + }, + { + "epoch": 0.04062413602432956, + "grad_norm": 1.4019653214759875, + "learning_rate": 1.9994096680296356e-05, + "loss": 1.0787, + "step": 2351 + }, + { + "epoch": 0.04064141553773846, + "grad_norm": 1.77622634648367, + "learning_rate": 1.9994077437482965e-05, + "loss": 1.0163, + "step": 2352 + }, + { + "epoch": 0.04065869505114736, + "grad_norm": 2.202345095297251, + "learning_rate": 1.9994058163367364e-05, + "loss": 0.8284, + "step": 2353 + }, + { + "epoch": 0.04067597456455626, + "grad_norm": 2.2718498933687017, + "learning_rate": 1.9994038857949616e-05, + "loss": 0.7387, + "step": 2354 + }, + { + "epoch": 0.04069325407796517, + "grad_norm": 1.3059744075376278, + "learning_rate": 1.999401952122978e-05, + "loss": 0.8398, + "step": 2355 + }, + { + "epoch": 0.04071053359137407, + "grad_norm": 1.4175027447722461, + "learning_rate": 1.999400015320791e-05, + "loss": 0.9239, + "step": 2356 + }, + { + "epoch": 0.04072781310478297, + "grad_norm": 1.5348621644810523, + "learning_rate": 1.9993980753884074e-05, + "loss": 0.878, + "step": 2357 + }, + { + "epoch": 0.04074509261819187, + "grad_norm": 0.967322814025096, + "learning_rate": 1.9993961323258332e-05, + "loss": 0.6719, + "step": 2358 + }, + { + "epoch": 0.04076237213160078, + "grad_norm": 1.6958826939093725, + "learning_rate": 1.9993941861330742e-05, + "loss": 0.9413, + "step": 2359 + }, + { + "epoch": 0.04077965164500968, + "grad_norm": 1.4645247115543678, + "learning_rate": 1.9993922368101368e-05, + "loss": 0.7911, + "step": 2360 + }, + { + "epoch": 0.04079693115841858, + "grad_norm": 1.2707583808875602, + "learning_rate": 1.999390284357027e-05, + "loss": 0.8315, + "step": 2361 + }, + { + "epoch": 0.04081421067182748, + "grad_norm": 1.4994270459774084, + "learning_rate": 1.9993883287737504e-05, + "loss": 0.8038, + "step": 2362 + }, + { + "epoch": 0.04083149018523639, + "grad_norm": 1.2341317826135358, + "learning_rate": 1.999386370060314e-05, + "loss": 0.89, + "step": 2363 + }, + { + "epoch": 0.04084876969864529, + "grad_norm": 1.1000071171130879, + "learning_rate": 1.9993844082167236e-05, + "loss": 0.6526, + "step": 2364 + }, + { + "epoch": 0.04086604921205419, + "grad_norm": 1.3457691562949734, + "learning_rate": 1.9993824432429853e-05, + "loss": 0.9009, + "step": 2365 + }, + { + "epoch": 0.04088332872546309, + "grad_norm": 1.3333697757938296, + "learning_rate": 1.999380475139105e-05, + "loss": 0.7671, + "step": 2366 + }, + { + "epoch": 0.040900608238872, + "grad_norm": 1.5870415330449217, + "learning_rate": 1.9993785039050896e-05, + "loss": 0.7939, + "step": 2367 + }, + { + "epoch": 0.0409178877522809, + "grad_norm": 0.8452351674158521, + "learning_rate": 1.999376529540944e-05, + "loss": 0.5959, + "step": 2368 + }, + { + "epoch": 0.0409351672656898, + "grad_norm": 1.1480897667188428, + "learning_rate": 1.9993745520466758e-05, + "loss": 0.9452, + "step": 2369 + }, + { + "epoch": 0.0409524467790987, + "grad_norm": 1.3200260724748851, + "learning_rate": 1.9993725714222904e-05, + "loss": 0.7541, + "step": 2370 + }, + { + "epoch": 0.040969726292507606, + "grad_norm": 1.5856365845683076, + "learning_rate": 1.999370587667794e-05, + "loss": 0.7827, + "step": 2371 + }, + { + "epoch": 0.04098700580591651, + "grad_norm": 1.2594964631272518, + "learning_rate": 1.999368600783193e-05, + "loss": 0.7988, + "step": 2372 + }, + { + "epoch": 0.04100428531932541, + "grad_norm": 1.2094540407733085, + "learning_rate": 1.9993666107684933e-05, + "loss": 0.8276, + "step": 2373 + }, + { + "epoch": 0.04102156483273431, + "grad_norm": 1.693750676570019, + "learning_rate": 1.999364617623702e-05, + "loss": 0.5167, + "step": 2374 + }, + { + "epoch": 0.04103884434614321, + "grad_norm": 1.2047388084518627, + "learning_rate": 1.9993626213488246e-05, + "loss": 0.768, + "step": 2375 + }, + { + "epoch": 0.04105612385955212, + "grad_norm": 1.300369047872293, + "learning_rate": 1.9993606219438675e-05, + "loss": 0.6777, + "step": 2376 + }, + { + "epoch": 0.04107340337296102, + "grad_norm": 1.2339624743516535, + "learning_rate": 1.9993586194088367e-05, + "loss": 0.6799, + "step": 2377 + }, + { + "epoch": 0.04109068288636992, + "grad_norm": 1.464463022411236, + "learning_rate": 1.9993566137437392e-05, + "loss": 0.8589, + "step": 2378 + }, + { + "epoch": 0.04110796239977882, + "grad_norm": 1.731942942330358, + "learning_rate": 1.9993546049485806e-05, + "loss": 0.8026, + "step": 2379 + }, + { + "epoch": 0.04112524191318773, + "grad_norm": 1.37218783915401, + "learning_rate": 1.9993525930233676e-05, + "loss": 0.5986, + "step": 2380 + }, + { + "epoch": 0.04114252142659663, + "grad_norm": 1.453787381289424, + "learning_rate": 1.9993505779681063e-05, + "loss": 0.8715, + "step": 2381 + }, + { + "epoch": 0.04115980094000553, + "grad_norm": 1.4228920626587813, + "learning_rate": 1.9993485597828027e-05, + "loss": 0.6595, + "step": 2382 + }, + { + "epoch": 0.04117708045341443, + "grad_norm": 1.0833509854014407, + "learning_rate": 1.9993465384674638e-05, + "loss": 0.61, + "step": 2383 + }, + { + "epoch": 0.041194359966823337, + "grad_norm": 1.8162842106013126, + "learning_rate": 1.9993445140220955e-05, + "loss": 0.8329, + "step": 2384 + }, + { + "epoch": 0.04121163948023224, + "grad_norm": 1.7048458527275439, + "learning_rate": 1.999342486446704e-05, + "loss": 1.0703, + "step": 2385 + }, + { + "epoch": 0.04122891899364114, + "grad_norm": 1.7716374310078118, + "learning_rate": 1.9993404557412963e-05, + "loss": 0.8724, + "step": 2386 + }, + { + "epoch": 0.04124619850705004, + "grad_norm": 1.3957594824362163, + "learning_rate": 1.9993384219058784e-05, + "loss": 1.1568, + "step": 2387 + }, + { + "epoch": 0.041263478020458946, + "grad_norm": 1.3051288127071523, + "learning_rate": 1.999336384940456e-05, + "loss": 0.5963, + "step": 2388 + }, + { + "epoch": 0.04128075753386785, + "grad_norm": 1.2952851681086546, + "learning_rate": 1.9993343448450367e-05, + "loss": 0.7515, + "step": 2389 + }, + { + "epoch": 0.04129803704727675, + "grad_norm": 0.8294497912683384, + "learning_rate": 1.999332301619626e-05, + "loss": 0.7011, + "step": 2390 + }, + { + "epoch": 0.04131531656068565, + "grad_norm": 1.1918913253717411, + "learning_rate": 1.9993302552642306e-05, + "loss": 0.8245, + "step": 2391 + }, + { + "epoch": 0.041332596074094556, + "grad_norm": 1.1599746737501955, + "learning_rate": 1.999328205778857e-05, + "loss": 0.8222, + "step": 2392 + }, + { + "epoch": 0.04134987558750346, + "grad_norm": 1.449689255292504, + "learning_rate": 1.9993261531635114e-05, + "loss": 0.8995, + "step": 2393 + }, + { + "epoch": 0.04136715510091236, + "grad_norm": 1.3718645567548327, + "learning_rate": 1.9993240974182005e-05, + "loss": 0.6343, + "step": 2394 + }, + { + "epoch": 0.04138443461432126, + "grad_norm": 0.8790123725668479, + "learning_rate": 1.99932203854293e-05, + "loss": 0.7599, + "step": 2395 + }, + { + "epoch": 0.041401714127730166, + "grad_norm": 1.4238924711436811, + "learning_rate": 1.9993199765377075e-05, + "loss": 1.0179, + "step": 2396 + }, + { + "epoch": 0.04141899364113907, + "grad_norm": 1.2029625702110571, + "learning_rate": 1.999317911402539e-05, + "loss": 0.6693, + "step": 2397 + }, + { + "epoch": 0.04143627315454797, + "grad_norm": 1.3351281581019454, + "learning_rate": 1.9993158431374305e-05, + "loss": 0.6051, + "step": 2398 + }, + { + "epoch": 0.04145355266795687, + "grad_norm": 1.572045796047843, + "learning_rate": 1.999313771742389e-05, + "loss": 0.8338, + "step": 2399 + }, + { + "epoch": 0.041470832181365776, + "grad_norm": 1.436559282956521, + "learning_rate": 1.999311697217421e-05, + "loss": 0.9563, + "step": 2400 + }, + { + "epoch": 0.041488111694774676, + "grad_norm": 1.3759977744406897, + "learning_rate": 1.9993096195625325e-05, + "loss": 0.7951, + "step": 2401 + }, + { + "epoch": 0.04150539120818358, + "grad_norm": 1.1043806084555838, + "learning_rate": 1.9993075387777306e-05, + "loss": 0.7131, + "step": 2402 + }, + { + "epoch": 0.04152267072159248, + "grad_norm": 1.2801359595084936, + "learning_rate": 1.9993054548630214e-05, + "loss": 0.8879, + "step": 2403 + }, + { + "epoch": 0.041539950235001386, + "grad_norm": 0.8431064761277233, + "learning_rate": 1.9993033678184116e-05, + "loss": 0.8523, + "step": 2404 + }, + { + "epoch": 0.041557229748410286, + "grad_norm": 1.497586704048904, + "learning_rate": 1.9993012776439076e-05, + "loss": 0.7619, + "step": 2405 + }, + { + "epoch": 0.04157450926181919, + "grad_norm": 1.1369975645425565, + "learning_rate": 1.999299184339516e-05, + "loss": 0.8899, + "step": 2406 + }, + { + "epoch": 0.04159178877522809, + "grad_norm": 1.1271097251469449, + "learning_rate": 1.9992970879052437e-05, + "loss": 0.5919, + "step": 2407 + }, + { + "epoch": 0.041609068288636995, + "grad_norm": 1.135454415172543, + "learning_rate": 1.9992949883410966e-05, + "loss": 0.8155, + "step": 2408 + }, + { + "epoch": 0.041626347802045896, + "grad_norm": 1.4933636893534554, + "learning_rate": 1.9992928856470818e-05, + "loss": 0.6735, + "step": 2409 + }, + { + "epoch": 0.0416436273154548, + "grad_norm": 1.7098024035473895, + "learning_rate": 1.999290779823206e-05, + "loss": 0.7692, + "step": 2410 + }, + { + "epoch": 0.0416609068288637, + "grad_norm": 1.288685644842163, + "learning_rate": 1.9992886708694752e-05, + "loss": 0.9394, + "step": 2411 + }, + { + "epoch": 0.0416781863422726, + "grad_norm": 1.155178520634537, + "learning_rate": 1.9992865587858965e-05, + "loss": 0.8878, + "step": 2412 + }, + { + "epoch": 0.041695465855681506, + "grad_norm": 1.2954169361510226, + "learning_rate": 1.9992844435724766e-05, + "loss": 0.7419, + "step": 2413 + }, + { + "epoch": 0.04171274536909041, + "grad_norm": 1.2901970140438956, + "learning_rate": 1.9992823252292215e-05, + "loss": 0.664, + "step": 2414 + }, + { + "epoch": 0.04173002488249931, + "grad_norm": 1.6167855642868312, + "learning_rate": 1.9992802037561383e-05, + "loss": 0.6587, + "step": 2415 + }, + { + "epoch": 0.04174730439590821, + "grad_norm": 1.445945326455444, + "learning_rate": 1.9992780791532335e-05, + "loss": 0.7068, + "step": 2416 + }, + { + "epoch": 0.041764583909317116, + "grad_norm": 1.4477019048525586, + "learning_rate": 1.999275951420514e-05, + "loss": 0.6878, + "step": 2417 + }, + { + "epoch": 0.041781863422726016, + "grad_norm": 1.409486100454276, + "learning_rate": 1.9992738205579863e-05, + "loss": 0.9484, + "step": 2418 + }, + { + "epoch": 0.04179914293613492, + "grad_norm": 1.544188559661344, + "learning_rate": 1.999271686565657e-05, + "loss": 0.8782, + "step": 2419 + }, + { + "epoch": 0.04181642244954382, + "grad_norm": 1.6600374689036088, + "learning_rate": 1.9992695494435326e-05, + "loss": 0.6842, + "step": 2420 + }, + { + "epoch": 0.041833701962952725, + "grad_norm": 1.6683431307397691, + "learning_rate": 1.99926740919162e-05, + "loss": 0.8075, + "step": 2421 + }, + { + "epoch": 0.041850981476361626, + "grad_norm": 1.5361195860752987, + "learning_rate": 1.999265265809926e-05, + "loss": 0.8403, + "step": 2422 + }, + { + "epoch": 0.04186826098977053, + "grad_norm": 1.2377490713777903, + "learning_rate": 1.9992631192984575e-05, + "loss": 0.855, + "step": 2423 + }, + { + "epoch": 0.04188554050317943, + "grad_norm": 1.241517393800097, + "learning_rate": 1.9992609696572208e-05, + "loss": 0.6822, + "step": 2424 + }, + { + "epoch": 0.041902820016588335, + "grad_norm": 1.4059341334275748, + "learning_rate": 1.999258816886223e-05, + "loss": 0.8255, + "step": 2425 + }, + { + "epoch": 0.041920099529997236, + "grad_norm": 1.5731817175700078, + "learning_rate": 1.9992566609854705e-05, + "loss": 0.8548, + "step": 2426 + }, + { + "epoch": 0.04193737904340614, + "grad_norm": 1.6318773027305162, + "learning_rate": 1.9992545019549702e-05, + "loss": 0.7431, + "step": 2427 + }, + { + "epoch": 0.04195465855681504, + "grad_norm": 1.1158014108578547, + "learning_rate": 1.9992523397947288e-05, + "loss": 0.7653, + "step": 2428 + }, + { + "epoch": 0.041971938070223945, + "grad_norm": 1.3843446779904356, + "learning_rate": 1.9992501745047532e-05, + "loss": 0.7646, + "step": 2429 + }, + { + "epoch": 0.041989217583632846, + "grad_norm": 1.2603337796013943, + "learning_rate": 1.9992480060850502e-05, + "loss": 0.6926, + "step": 2430 + }, + { + "epoch": 0.042006497097041746, + "grad_norm": 1.408202624674364, + "learning_rate": 1.999245834535626e-05, + "loss": 0.767, + "step": 2431 + }, + { + "epoch": 0.04202377661045065, + "grad_norm": 1.268734782073504, + "learning_rate": 1.9992436598564886e-05, + "loss": 0.779, + "step": 2432 + }, + { + "epoch": 0.042041056123859555, + "grad_norm": 1.208967947598081, + "learning_rate": 1.999241482047644e-05, + "loss": 0.6803, + "step": 2433 + }, + { + "epoch": 0.042058335637268456, + "grad_norm": 1.3656846531581204, + "learning_rate": 1.9992393011090988e-05, + "loss": 1.0032, + "step": 2434 + }, + { + "epoch": 0.042075615150677356, + "grad_norm": 1.093820893132577, + "learning_rate": 1.99923711704086e-05, + "loss": 0.7012, + "step": 2435 + }, + { + "epoch": 0.04209289466408626, + "grad_norm": 1.253499906624103, + "learning_rate": 1.999234929842935e-05, + "loss": 0.8148, + "step": 2436 + }, + { + "epoch": 0.042110174177495165, + "grad_norm": 1.3374683237941842, + "learning_rate": 1.9992327395153303e-05, + "loss": 0.6986, + "step": 2437 + }, + { + "epoch": 0.042127453690904065, + "grad_norm": 1.1401436059556858, + "learning_rate": 1.9992305460580527e-05, + "loss": 0.7788, + "step": 2438 + }, + { + "epoch": 0.042144733204312966, + "grad_norm": 1.853179721802893, + "learning_rate": 1.9992283494711092e-05, + "loss": 0.8268, + "step": 2439 + }, + { + "epoch": 0.04216201271772187, + "grad_norm": 1.2617271054743644, + "learning_rate": 1.999226149754506e-05, + "loss": 0.8143, + "step": 2440 + }, + { + "epoch": 0.042179292231130774, + "grad_norm": 1.1219325612409006, + "learning_rate": 1.9992239469082514e-05, + "loss": 0.7671, + "step": 2441 + }, + { + "epoch": 0.042196571744539675, + "grad_norm": 1.3939203302501186, + "learning_rate": 1.9992217409323506e-05, + "loss": 1.0167, + "step": 2442 + }, + { + "epoch": 0.042213851257948576, + "grad_norm": 1.181755981939456, + "learning_rate": 1.9992195318268118e-05, + "loss": 0.7418, + "step": 2443 + }, + { + "epoch": 0.04223113077135748, + "grad_norm": 1.5214800676085665, + "learning_rate": 1.9992173195916414e-05, + "loss": 1.0187, + "step": 2444 + }, + { + "epoch": 0.042248410284766384, + "grad_norm": 1.3561396949076832, + "learning_rate": 1.9992151042268466e-05, + "loss": 0.7316, + "step": 2445 + }, + { + "epoch": 0.042265689798175285, + "grad_norm": 1.3231238896354052, + "learning_rate": 1.999212885732434e-05, + "loss": 0.7358, + "step": 2446 + }, + { + "epoch": 0.042282969311584186, + "grad_norm": 1.134544438964373, + "learning_rate": 1.9992106641084107e-05, + "loss": 0.6748, + "step": 2447 + }, + { + "epoch": 0.042300248824993086, + "grad_norm": 1.2599575333994588, + "learning_rate": 1.9992084393547836e-05, + "loss": 0.8186, + "step": 2448 + }, + { + "epoch": 0.042317528338401994, + "grad_norm": 1.5319539644346651, + "learning_rate": 1.99920621147156e-05, + "loss": 0.8785, + "step": 2449 + }, + { + "epoch": 0.042334807851810895, + "grad_norm": 1.250724735318345, + "learning_rate": 1.9992039804587463e-05, + "loss": 0.8655, + "step": 2450 + }, + { + "epoch": 0.042352087365219795, + "grad_norm": 1.193529692805326, + "learning_rate": 1.99920174631635e-05, + "loss": 0.7908, + "step": 2451 + }, + { + "epoch": 0.042369366878628696, + "grad_norm": 1.3593944641428148, + "learning_rate": 1.999199509044378e-05, + "loss": 0.666, + "step": 2452 + }, + { + "epoch": 0.0423866463920376, + "grad_norm": 0.7388751506625901, + "learning_rate": 1.999197268642837e-05, + "loss": 0.6593, + "step": 2453 + }, + { + "epoch": 0.042403925905446505, + "grad_norm": 1.5890609604143655, + "learning_rate": 1.999195025111734e-05, + "loss": 1.024, + "step": 2454 + }, + { + "epoch": 0.042421205418855405, + "grad_norm": 1.3356120644591178, + "learning_rate": 1.9991927784510767e-05, + "loss": 0.8346, + "step": 2455 + }, + { + "epoch": 0.042438484932264306, + "grad_norm": 1.125651062662803, + "learning_rate": 1.9991905286608713e-05, + "loss": 0.849, + "step": 2456 + }, + { + "epoch": 0.04245576444567321, + "grad_norm": 1.44768912475231, + "learning_rate": 1.9991882757411256e-05, + "loss": 0.9464, + "step": 2457 + }, + { + "epoch": 0.042473043959082114, + "grad_norm": 1.0625360936078112, + "learning_rate": 1.999186019691846e-05, + "loss": 0.8973, + "step": 2458 + }, + { + "epoch": 0.042490323472491015, + "grad_norm": 1.2362429225093277, + "learning_rate": 1.99918376051304e-05, + "loss": 0.9028, + "step": 2459 + }, + { + "epoch": 0.042507602985899916, + "grad_norm": 1.0303252410404917, + "learning_rate": 1.9991814982047145e-05, + "loss": 0.7632, + "step": 2460 + }, + { + "epoch": 0.042524882499308816, + "grad_norm": 1.4329400181029452, + "learning_rate": 1.9991792327668765e-05, + "loss": 0.7987, + "step": 2461 + }, + { + "epoch": 0.042542162012717724, + "grad_norm": 1.6893164066485584, + "learning_rate": 1.9991769641995333e-05, + "loss": 0.8122, + "step": 2462 + }, + { + "epoch": 0.042559441526126625, + "grad_norm": 1.0614454137184945, + "learning_rate": 1.9991746925026917e-05, + "loss": 0.6519, + "step": 2463 + }, + { + "epoch": 0.042576721039535526, + "grad_norm": 1.4119755362875377, + "learning_rate": 1.9991724176763595e-05, + "loss": 1.0275, + "step": 2464 + }, + { + "epoch": 0.042594000552944426, + "grad_norm": 1.298340986684185, + "learning_rate": 1.999170139720543e-05, + "loss": 0.6635, + "step": 2465 + }, + { + "epoch": 0.042611280066353334, + "grad_norm": 1.2940929907050758, + "learning_rate": 1.99916785863525e-05, + "loss": 1.0327, + "step": 2466 + }, + { + "epoch": 0.042628559579762235, + "grad_norm": 1.0486215644109922, + "learning_rate": 1.999165574420487e-05, + "loss": 0.6716, + "step": 2467 + }, + { + "epoch": 0.042645839093171135, + "grad_norm": 1.2539856327832941, + "learning_rate": 1.9991632870762614e-05, + "loss": 0.9142, + "step": 2468 + }, + { + "epoch": 0.042663118606580036, + "grad_norm": 1.534324351406861, + "learning_rate": 1.9991609966025807e-05, + "loss": 0.9034, + "step": 2469 + }, + { + "epoch": 0.042680398119988944, + "grad_norm": 1.3537861460889213, + "learning_rate": 1.9991587029994515e-05, + "loss": 0.8056, + "step": 2470 + }, + { + "epoch": 0.042697677633397844, + "grad_norm": 1.2099552357511956, + "learning_rate": 1.999156406266882e-05, + "loss": 0.6301, + "step": 2471 + }, + { + "epoch": 0.042714957146806745, + "grad_norm": 1.1574965815555467, + "learning_rate": 1.999154106404878e-05, + "loss": 1.0222, + "step": 2472 + }, + { + "epoch": 0.042732236660215646, + "grad_norm": 1.0621829619976513, + "learning_rate": 1.9991518034134478e-05, + "loss": 0.4558, + "step": 2473 + }, + { + "epoch": 0.042749516173624554, + "grad_norm": 1.4582992107097719, + "learning_rate": 1.999149497292598e-05, + "loss": 0.8593, + "step": 2474 + }, + { + "epoch": 0.042766795687033454, + "grad_norm": 1.1951407520959376, + "learning_rate": 1.9991471880423363e-05, + "loss": 0.8739, + "step": 2475 + }, + { + "epoch": 0.042784075200442355, + "grad_norm": 1.496155162573763, + "learning_rate": 1.9991448756626696e-05, + "loss": 0.8804, + "step": 2476 + }, + { + "epoch": 0.042801354713851256, + "grad_norm": 1.3449702875553553, + "learning_rate": 1.999142560153605e-05, + "loss": 0.6144, + "step": 2477 + }, + { + "epoch": 0.04281863422726016, + "grad_norm": 1.5619664805622395, + "learning_rate": 1.9991402415151503e-05, + "loss": 0.8324, + "step": 2478 + }, + { + "epoch": 0.042835913740669064, + "grad_norm": 1.241940164765288, + "learning_rate": 1.999137919747312e-05, + "loss": 0.723, + "step": 2479 + }, + { + "epoch": 0.042853193254077965, + "grad_norm": 1.2984741319136621, + "learning_rate": 1.9991355948500982e-05, + "loss": 0.7423, + "step": 2480 + }, + { + "epoch": 0.042870472767486865, + "grad_norm": 1.5019100489642974, + "learning_rate": 1.9991332668235155e-05, + "loss": 0.8363, + "step": 2481 + }, + { + "epoch": 0.04288775228089577, + "grad_norm": 1.069443387794566, + "learning_rate": 1.9991309356675715e-05, + "loss": 0.7211, + "step": 2482 + }, + { + "epoch": 0.042905031794304674, + "grad_norm": 1.2838859789044625, + "learning_rate": 1.999128601382274e-05, + "loss": 0.6158, + "step": 2483 + }, + { + "epoch": 0.042922311307713575, + "grad_norm": 1.2281064584831254, + "learning_rate": 1.9991262639676292e-05, + "loss": 0.5871, + "step": 2484 + }, + { + "epoch": 0.042939590821122475, + "grad_norm": 1.5243095702381853, + "learning_rate": 1.999123923423645e-05, + "loss": 1.0859, + "step": 2485 + }, + { + "epoch": 0.04295687033453138, + "grad_norm": 1.0025666111877438, + "learning_rate": 1.999121579750329e-05, + "loss": 0.6309, + "step": 2486 + }, + { + "epoch": 0.042974149847940284, + "grad_norm": 1.2664621323836476, + "learning_rate": 1.9991192329476884e-05, + "loss": 0.7569, + "step": 2487 + }, + { + "epoch": 0.042991429361349184, + "grad_norm": 1.4648890894447368, + "learning_rate": 1.99911688301573e-05, + "loss": 0.9147, + "step": 2488 + }, + { + "epoch": 0.043008708874758085, + "grad_norm": 1.0849175483552893, + "learning_rate": 1.999114529954462e-05, + "loss": 0.804, + "step": 2489 + }, + { + "epoch": 0.043025988388166986, + "grad_norm": 1.1395370556085458, + "learning_rate": 1.9991121737638913e-05, + "loss": 0.5993, + "step": 2490 + }, + { + "epoch": 0.04304326790157589, + "grad_norm": 1.129753894252149, + "learning_rate": 1.999109814444025e-05, + "loss": 0.7009, + "step": 2491 + }, + { + "epoch": 0.043060547414984794, + "grad_norm": 1.0147870057816437, + "learning_rate": 1.999107451994871e-05, + "loss": 0.7348, + "step": 2492 + }, + { + "epoch": 0.043077826928393695, + "grad_norm": 1.0470101994063798, + "learning_rate": 1.9991050864164367e-05, + "loss": 0.6588, + "step": 2493 + }, + { + "epoch": 0.043095106441802596, + "grad_norm": 1.437824348442693, + "learning_rate": 1.999102717708729e-05, + "loss": 0.5565, + "step": 2494 + }, + { + "epoch": 0.0431123859552115, + "grad_norm": 1.0018512044063403, + "learning_rate": 1.999100345871756e-05, + "loss": 0.7081, + "step": 2495 + }, + { + "epoch": 0.043129665468620404, + "grad_norm": 1.4393183102398799, + "learning_rate": 1.999097970905525e-05, + "loss": 0.8269, + "step": 2496 + }, + { + "epoch": 0.043146944982029305, + "grad_norm": 1.386939409034699, + "learning_rate": 1.9990955928100427e-05, + "loss": 0.7759, + "step": 2497 + }, + { + "epoch": 0.043164224495438205, + "grad_norm": 1.4087547520707702, + "learning_rate": 1.999093211585317e-05, + "loss": 0.6753, + "step": 2498 + }, + { + "epoch": 0.04318150400884711, + "grad_norm": 1.4631815384073548, + "learning_rate": 1.999090827231356e-05, + "loss": 0.8528, + "step": 2499 + }, + { + "epoch": 0.043198783522256014, + "grad_norm": 1.6392202738572264, + "learning_rate": 1.9990884397481664e-05, + "loss": 0.8837, + "step": 2500 + }, + { + "epoch": 0.043216063035664914, + "grad_norm": 1.1598632543377287, + "learning_rate": 1.9990860491357557e-05, + "loss": 0.717, + "step": 2501 + }, + { + "epoch": 0.043233342549073815, + "grad_norm": 1.758750462891826, + "learning_rate": 1.9990836553941316e-05, + "loss": 0.6612, + "step": 2502 + }, + { + "epoch": 0.04325062206248272, + "grad_norm": 1.3145188382499322, + "learning_rate": 1.9990812585233018e-05, + "loss": 0.8691, + "step": 2503 + }, + { + "epoch": 0.043267901575891624, + "grad_norm": 1.2810205716497658, + "learning_rate": 1.9990788585232735e-05, + "loss": 0.7255, + "step": 2504 + }, + { + "epoch": 0.043285181089300524, + "grad_norm": 1.4049539621869749, + "learning_rate": 1.999076455394054e-05, + "loss": 0.729, + "step": 2505 + }, + { + "epoch": 0.043302460602709425, + "grad_norm": 1.3373582183890516, + "learning_rate": 1.9990740491356516e-05, + "loss": 0.7857, + "step": 2506 + }, + { + "epoch": 0.04331974011611833, + "grad_norm": 1.4266470567476888, + "learning_rate": 1.9990716397480726e-05, + "loss": 0.7232, + "step": 2507 + }, + { + "epoch": 0.04333701962952723, + "grad_norm": 1.1568294843517846, + "learning_rate": 1.999069227231326e-05, + "loss": 0.7293, + "step": 2508 + }, + { + "epoch": 0.043354299142936134, + "grad_norm": 1.7386637115016823, + "learning_rate": 1.9990668115854182e-05, + "loss": 0.9521, + "step": 2509 + }, + { + "epoch": 0.043371578656345035, + "grad_norm": 1.2383889523324487, + "learning_rate": 1.9990643928103574e-05, + "loss": 0.7262, + "step": 2510 + }, + { + "epoch": 0.04338885816975394, + "grad_norm": 1.121053643908253, + "learning_rate": 1.9990619709061512e-05, + "loss": 0.8492, + "step": 2511 + }, + { + "epoch": 0.04340613768316284, + "grad_norm": 1.4083601148829823, + "learning_rate": 1.9990595458728068e-05, + "loss": 0.73, + "step": 2512 + }, + { + "epoch": 0.043423417196571744, + "grad_norm": 1.230761008627521, + "learning_rate": 1.999057117710332e-05, + "loss": 0.9121, + "step": 2513 + }, + { + "epoch": 0.043440696709980645, + "grad_norm": 1.4803178713506742, + "learning_rate": 1.9990546864187343e-05, + "loss": 0.6673, + "step": 2514 + }, + { + "epoch": 0.04345797622338955, + "grad_norm": 1.3463248408914028, + "learning_rate": 1.9990522519980216e-05, + "loss": 0.8518, + "step": 2515 + }, + { + "epoch": 0.04347525573679845, + "grad_norm": 1.4110570017208481, + "learning_rate": 1.999049814448201e-05, + "loss": 0.6911, + "step": 2516 + }, + { + "epoch": 0.043492535250207354, + "grad_norm": 1.1711178561909084, + "learning_rate": 1.9990473737692805e-05, + "loss": 0.8675, + "step": 2517 + }, + { + "epoch": 0.043509814763616254, + "grad_norm": 1.347092550035394, + "learning_rate": 1.999044929961268e-05, + "loss": 1.0269, + "step": 2518 + }, + { + "epoch": 0.04352709427702516, + "grad_norm": 1.3500184861401647, + "learning_rate": 1.9990424830241708e-05, + "loss": 0.7887, + "step": 2519 + }, + { + "epoch": 0.04354437379043406, + "grad_norm": 1.2648860529834096, + "learning_rate": 1.9990400329579965e-05, + "loss": 0.6784, + "step": 2520 + }, + { + "epoch": 0.04356165330384296, + "grad_norm": 0.7905745040493624, + "learning_rate": 1.9990375797627527e-05, + "loss": 0.6846, + "step": 2521 + }, + { + "epoch": 0.043578932817251864, + "grad_norm": 1.3301280476475374, + "learning_rate": 1.9990351234384476e-05, + "loss": 1.0152, + "step": 2522 + }, + { + "epoch": 0.04359621233066077, + "grad_norm": 0.8094880784293625, + "learning_rate": 1.9990326639850886e-05, + "loss": 0.5769, + "step": 2523 + }, + { + "epoch": 0.04361349184406967, + "grad_norm": 1.2359113499932801, + "learning_rate": 1.999030201402683e-05, + "loss": 0.643, + "step": 2524 + }, + { + "epoch": 0.04363077135747857, + "grad_norm": 1.403095270001842, + "learning_rate": 1.999027735691239e-05, + "loss": 0.6534, + "step": 2525 + }, + { + "epoch": 0.043648050870887474, + "grad_norm": 1.5581855025220008, + "learning_rate": 1.9990252668507644e-05, + "loss": 0.7768, + "step": 2526 + }, + { + "epoch": 0.04366533038429638, + "grad_norm": 0.9900551517243654, + "learning_rate": 1.9990227948812668e-05, + "loss": 0.7432, + "step": 2527 + }, + { + "epoch": 0.04368260989770528, + "grad_norm": 0.6196170150366535, + "learning_rate": 1.9990203197827535e-05, + "loss": 0.6238, + "step": 2528 + }, + { + "epoch": 0.04369988941111418, + "grad_norm": 1.3278539195068546, + "learning_rate": 1.999017841555233e-05, + "loss": 0.572, + "step": 2529 + }, + { + "epoch": 0.043717168924523084, + "grad_norm": 1.3214171956932477, + "learning_rate": 1.9990153601987127e-05, + "loss": 0.5631, + "step": 2530 + }, + { + "epoch": 0.043734448437931984, + "grad_norm": 1.2622230865495718, + "learning_rate": 1.9990128757132e-05, + "loss": 0.7018, + "step": 2531 + }, + { + "epoch": 0.04375172795134089, + "grad_norm": 1.4312988318821134, + "learning_rate": 1.9990103880987034e-05, + "loss": 0.9279, + "step": 2532 + }, + { + "epoch": 0.04376900746474979, + "grad_norm": 1.5473976593662884, + "learning_rate": 1.9990078973552304e-05, + "loss": 1.0084, + "step": 2533 + }, + { + "epoch": 0.043786286978158694, + "grad_norm": 1.2637620284759357, + "learning_rate": 1.9990054034827885e-05, + "loss": 0.5338, + "step": 2534 + }, + { + "epoch": 0.043803566491567594, + "grad_norm": 1.2710290110516362, + "learning_rate": 1.9990029064813856e-05, + "loss": 0.5518, + "step": 2535 + }, + { + "epoch": 0.0438208460049765, + "grad_norm": 1.5883961747914932, + "learning_rate": 1.99900040635103e-05, + "loss": 0.61, + "step": 2536 + }, + { + "epoch": 0.0438381255183854, + "grad_norm": 0.8596231743680777, + "learning_rate": 1.998997903091729e-05, + "loss": 0.553, + "step": 2537 + }, + { + "epoch": 0.0438554050317943, + "grad_norm": 1.620557674017574, + "learning_rate": 1.998995396703491e-05, + "loss": 0.7005, + "step": 2538 + }, + { + "epoch": 0.043872684545203204, + "grad_norm": 1.210693505136201, + "learning_rate": 1.998992887186323e-05, + "loss": 0.758, + "step": 2539 + }, + { + "epoch": 0.04388996405861211, + "grad_norm": 1.5994773385568972, + "learning_rate": 1.9989903745402337e-05, + "loss": 1.1565, + "step": 2540 + }, + { + "epoch": 0.04390724357202101, + "grad_norm": 1.4882436542947168, + "learning_rate": 1.998987858765231e-05, + "loss": 0.8292, + "step": 2541 + }, + { + "epoch": 0.04392452308542991, + "grad_norm": 0.9117296093474646, + "learning_rate": 1.998985339861322e-05, + "loss": 0.4813, + "step": 2542 + }, + { + "epoch": 0.043941802598838814, + "grad_norm": 1.5975499163622695, + "learning_rate": 1.9989828178285148e-05, + "loss": 0.9406, + "step": 2543 + }, + { + "epoch": 0.04395908211224772, + "grad_norm": 1.4240245631491115, + "learning_rate": 1.9989802926668175e-05, + "loss": 0.7433, + "step": 2544 + }, + { + "epoch": 0.04397636162565662, + "grad_norm": 1.2194631300229966, + "learning_rate": 1.998977764376238e-05, + "loss": 0.762, + "step": 2545 + }, + { + "epoch": 0.04399364113906552, + "grad_norm": 0.8621807087647, + "learning_rate": 1.9989752329567843e-05, + "loss": 0.5343, + "step": 2546 + }, + { + "epoch": 0.044010920652474424, + "grad_norm": 1.4583614536130491, + "learning_rate": 1.9989726984084644e-05, + "loss": 0.9889, + "step": 2547 + }, + { + "epoch": 0.04402820016588333, + "grad_norm": 1.303449883507293, + "learning_rate": 1.998970160731286e-05, + "loss": 0.8261, + "step": 2548 + }, + { + "epoch": 0.04404547967929223, + "grad_norm": 1.4416913709372567, + "learning_rate": 1.998967619925257e-05, + "loss": 0.9713, + "step": 2549 + }, + { + "epoch": 0.04406275919270113, + "grad_norm": 0.992094200775631, + "learning_rate": 1.9989650759903858e-05, + "loss": 0.7412, + "step": 2550 + }, + { + "epoch": 0.044080038706110033, + "grad_norm": 1.367531212249042, + "learning_rate": 1.9989625289266797e-05, + "loss": 0.8682, + "step": 2551 + }, + { + "epoch": 0.04409731821951894, + "grad_norm": 1.1576837668681534, + "learning_rate": 1.9989599787341475e-05, + "loss": 0.6703, + "step": 2552 + }, + { + "epoch": 0.04411459773292784, + "grad_norm": 1.4830497075836155, + "learning_rate": 1.9989574254127965e-05, + "loss": 0.967, + "step": 2553 + }, + { + "epoch": 0.04413187724633674, + "grad_norm": 1.248645325805292, + "learning_rate": 1.998954868962635e-05, + "loss": 0.776, + "step": 2554 + }, + { + "epoch": 0.04414915675974564, + "grad_norm": 1.3683489601970018, + "learning_rate": 1.9989523093836704e-05, + "loss": 0.8162, + "step": 2555 + }, + { + "epoch": 0.04416643627315455, + "grad_norm": 1.4389134315824763, + "learning_rate": 1.998949746675912e-05, + "loss": 0.9267, + "step": 2556 + }, + { + "epoch": 0.04418371578656345, + "grad_norm": 1.0461148786538546, + "learning_rate": 1.9989471808393665e-05, + "loss": 0.8815, + "step": 2557 + }, + { + "epoch": 0.04420099529997235, + "grad_norm": 1.3472894511655957, + "learning_rate": 1.998944611874043e-05, + "loss": 0.9847, + "step": 2558 + }, + { + "epoch": 0.04421827481338125, + "grad_norm": 1.0826149738930184, + "learning_rate": 1.998942039779949e-05, + "loss": 0.4572, + "step": 2559 + }, + { + "epoch": 0.04423555432679016, + "grad_norm": 1.8342644699401434, + "learning_rate": 1.998939464557092e-05, + "loss": 0.8576, + "step": 2560 + }, + { + "epoch": 0.04425283384019906, + "grad_norm": 1.231919304898245, + "learning_rate": 1.9989368862054814e-05, + "loss": 0.78, + "step": 2561 + }, + { + "epoch": 0.04427011335360796, + "grad_norm": 0.9823236308988761, + "learning_rate": 1.998934304725124e-05, + "loss": 0.7882, + "step": 2562 + }, + { + "epoch": 0.04428739286701686, + "grad_norm": 0.8083699409259084, + "learning_rate": 1.9989317201160288e-05, + "loss": 0.7373, + "step": 2563 + }, + { + "epoch": 0.04430467238042577, + "grad_norm": 1.2726190769322763, + "learning_rate": 1.9989291323782034e-05, + "loss": 0.8117, + "step": 2564 + }, + { + "epoch": 0.04432195189383467, + "grad_norm": 1.314834503391165, + "learning_rate": 1.9989265415116563e-05, + "loss": 0.951, + "step": 2565 + }, + { + "epoch": 0.04433923140724357, + "grad_norm": 1.5506658877451405, + "learning_rate": 1.998923947516395e-05, + "loss": 1.0973, + "step": 2566 + }, + { + "epoch": 0.04435651092065247, + "grad_norm": 0.9075786784722643, + "learning_rate": 1.9989213503924282e-05, + "loss": 0.6185, + "step": 2567 + }, + { + "epoch": 0.04437379043406137, + "grad_norm": 1.399918388753188, + "learning_rate": 1.9989187501397636e-05, + "loss": 0.8062, + "step": 2568 + }, + { + "epoch": 0.04439106994747028, + "grad_norm": 1.24693747531056, + "learning_rate": 1.9989161467584096e-05, + "loss": 0.8158, + "step": 2569 + }, + { + "epoch": 0.04440834946087918, + "grad_norm": 1.2115013846046239, + "learning_rate": 1.9989135402483745e-05, + "loss": 0.8376, + "step": 2570 + }, + { + "epoch": 0.04442562897428808, + "grad_norm": 1.3367069945473762, + "learning_rate": 1.9989109306096663e-05, + "loss": 0.7835, + "step": 2571 + }, + { + "epoch": 0.04444290848769698, + "grad_norm": 1.4152511741598925, + "learning_rate": 1.998908317842293e-05, + "loss": 0.8709, + "step": 2572 + }, + { + "epoch": 0.04446018800110589, + "grad_norm": 1.7636180944745834, + "learning_rate": 1.998905701946263e-05, + "loss": 0.7931, + "step": 2573 + }, + { + "epoch": 0.04447746751451479, + "grad_norm": 1.4224215955536648, + "learning_rate": 1.9989030829215843e-05, + "loss": 0.8758, + "step": 2574 + }, + { + "epoch": 0.04449474702792369, + "grad_norm": 1.2791662648311275, + "learning_rate": 1.9989004607682655e-05, + "loss": 0.8038, + "step": 2575 + }, + { + "epoch": 0.04451202654133259, + "grad_norm": 1.35252406928263, + "learning_rate": 1.9988978354863143e-05, + "loss": 0.8489, + "step": 2576 + }, + { + "epoch": 0.0445293060547415, + "grad_norm": 1.5742546839773328, + "learning_rate": 1.9988952070757392e-05, + "loss": 1.0162, + "step": 2577 + }, + { + "epoch": 0.0445465855681504, + "grad_norm": 1.910464485462648, + "learning_rate": 1.9988925755365486e-05, + "loss": 0.8575, + "step": 2578 + }, + { + "epoch": 0.0445638650815593, + "grad_norm": 1.5445633657590745, + "learning_rate": 1.9988899408687503e-05, + "loss": 0.8597, + "step": 2579 + }, + { + "epoch": 0.0445811445949682, + "grad_norm": 1.3647797262244719, + "learning_rate": 1.998887303072353e-05, + "loss": 0.7422, + "step": 2580 + }, + { + "epoch": 0.04459842410837711, + "grad_norm": 1.2695703586619687, + "learning_rate": 1.9988846621473647e-05, + "loss": 0.8752, + "step": 2581 + }, + { + "epoch": 0.04461570362178601, + "grad_norm": 1.1195489240951437, + "learning_rate": 1.9988820180937932e-05, + "loss": 0.6782, + "step": 2582 + }, + { + "epoch": 0.04463298313519491, + "grad_norm": 1.2090838209375816, + "learning_rate": 1.998879370911648e-05, + "loss": 0.6876, + "step": 2583 + }, + { + "epoch": 0.04465026264860381, + "grad_norm": 1.20737585118302, + "learning_rate": 1.9988767206009364e-05, + "loss": 0.7582, + "step": 2584 + }, + { + "epoch": 0.04466754216201272, + "grad_norm": 0.9307058749458499, + "learning_rate": 1.9988740671616673e-05, + "loss": 0.6871, + "step": 2585 + }, + { + "epoch": 0.04468482167542162, + "grad_norm": 1.2409580495475452, + "learning_rate": 1.9988714105938484e-05, + "loss": 0.7948, + "step": 2586 + }, + { + "epoch": 0.04470210118883052, + "grad_norm": 1.2377570944876068, + "learning_rate": 1.9988687508974883e-05, + "loss": 0.6077, + "step": 2587 + }, + { + "epoch": 0.04471938070223942, + "grad_norm": 1.545804023718106, + "learning_rate": 1.9988660880725957e-05, + "loss": 0.869, + "step": 2588 + }, + { + "epoch": 0.04473666021564833, + "grad_norm": 1.2643777959010776, + "learning_rate": 1.998863422119178e-05, + "loss": 0.7606, + "step": 2589 + }, + { + "epoch": 0.04475393972905723, + "grad_norm": 1.4583594384589447, + "learning_rate": 1.9988607530372448e-05, + "loss": 0.8869, + "step": 2590 + }, + { + "epoch": 0.04477121924246613, + "grad_norm": 1.4613592354420337, + "learning_rate": 1.9988580808268033e-05, + "loss": 0.8139, + "step": 2591 + }, + { + "epoch": 0.04478849875587503, + "grad_norm": 1.3498717115763001, + "learning_rate": 1.998855405487863e-05, + "loss": 0.8653, + "step": 2592 + }, + { + "epoch": 0.04480577826928394, + "grad_norm": 1.514898842170295, + "learning_rate": 1.998852727020431e-05, + "loss": 0.9828, + "step": 2593 + }, + { + "epoch": 0.04482305778269284, + "grad_norm": 0.9721323089999212, + "learning_rate": 1.9988500454245166e-05, + "loss": 0.5263, + "step": 2594 + }, + { + "epoch": 0.04484033729610174, + "grad_norm": 1.7922891313772698, + "learning_rate": 1.998847360700128e-05, + "loss": 1.127, + "step": 2595 + }, + { + "epoch": 0.04485761680951064, + "grad_norm": 0.8897760000186778, + "learning_rate": 1.9988446728472735e-05, + "loss": 0.6121, + "step": 2596 + }, + { + "epoch": 0.04487489632291955, + "grad_norm": 1.466353124306541, + "learning_rate": 1.9988419818659615e-05, + "loss": 0.8007, + "step": 2597 + }, + { + "epoch": 0.04489217583632845, + "grad_norm": 1.0867639368116717, + "learning_rate": 1.9988392877562005e-05, + "loss": 0.8265, + "step": 2598 + }, + { + "epoch": 0.04490945534973735, + "grad_norm": 1.4829272880207598, + "learning_rate": 1.998836590517999e-05, + "loss": 0.683, + "step": 2599 + }, + { + "epoch": 0.04492673486314625, + "grad_norm": 1.4498020777917449, + "learning_rate": 1.9988338901513656e-05, + "loss": 0.6747, + "step": 2600 + }, + { + "epoch": 0.04494401437655516, + "grad_norm": 1.3497363005084217, + "learning_rate": 1.9988311866563084e-05, + "loss": 0.6722, + "step": 2601 + }, + { + "epoch": 0.04496129388996406, + "grad_norm": 1.1801213092140617, + "learning_rate": 1.998828480032836e-05, + "loss": 0.834, + "step": 2602 + }, + { + "epoch": 0.04497857340337296, + "grad_norm": 1.2011417589577673, + "learning_rate": 1.998825770280957e-05, + "loss": 0.8033, + "step": 2603 + }, + { + "epoch": 0.04499585291678186, + "grad_norm": 1.9168863855724163, + "learning_rate": 1.9988230574006796e-05, + "loss": 1.0449, + "step": 2604 + }, + { + "epoch": 0.04501313243019077, + "grad_norm": 1.3734933936757847, + "learning_rate": 1.9988203413920123e-05, + "loss": 0.7773, + "step": 2605 + }, + { + "epoch": 0.04503041194359967, + "grad_norm": 0.9743081990582398, + "learning_rate": 1.9988176222549644e-05, + "loss": 0.7629, + "step": 2606 + }, + { + "epoch": 0.04504769145700857, + "grad_norm": 1.629946074414381, + "learning_rate": 1.9988148999895432e-05, + "loss": 0.9617, + "step": 2607 + }, + { + "epoch": 0.04506497097041747, + "grad_norm": 1.339376891367978, + "learning_rate": 1.998812174595758e-05, + "loss": 0.7495, + "step": 2608 + }, + { + "epoch": 0.04508225048382637, + "grad_norm": 1.2760727787364687, + "learning_rate": 1.9988094460736175e-05, + "loss": 0.7078, + "step": 2609 + }, + { + "epoch": 0.04509952999723528, + "grad_norm": 1.634608612098083, + "learning_rate": 1.9988067144231292e-05, + "loss": 0.9826, + "step": 2610 + }, + { + "epoch": 0.04511680951064418, + "grad_norm": 1.2099214020235505, + "learning_rate": 1.998803979644303e-05, + "loss": 0.7018, + "step": 2611 + }, + { + "epoch": 0.04513408902405308, + "grad_norm": 1.0761906233455014, + "learning_rate": 1.998801241737147e-05, + "loss": 0.5864, + "step": 2612 + }, + { + "epoch": 0.04515136853746198, + "grad_norm": 1.4268308292029355, + "learning_rate": 1.9987985007016687e-05, + "loss": 0.9291, + "step": 2613 + }, + { + "epoch": 0.04516864805087089, + "grad_norm": 1.111311817022449, + "learning_rate": 1.9987957565378783e-05, + "loss": 0.6518, + "step": 2614 + }, + { + "epoch": 0.04518592756427979, + "grad_norm": 1.6298507241248281, + "learning_rate": 1.9987930092457835e-05, + "loss": 0.9063, + "step": 2615 + }, + { + "epoch": 0.04520320707768869, + "grad_norm": 1.4535838486979775, + "learning_rate": 1.998790258825393e-05, + "loss": 0.8488, + "step": 2616 + }, + { + "epoch": 0.04522048659109759, + "grad_norm": 1.2252904330690648, + "learning_rate": 1.9987875052767155e-05, + "loss": 1.1105, + "step": 2617 + }, + { + "epoch": 0.0452377661045065, + "grad_norm": 1.214581109763265, + "learning_rate": 1.99878474859976e-05, + "loss": 0.8425, + "step": 2618 + }, + { + "epoch": 0.0452550456179154, + "grad_norm": 1.457641602454763, + "learning_rate": 1.998781988794534e-05, + "loss": 0.8786, + "step": 2619 + }, + { + "epoch": 0.0452723251313243, + "grad_norm": 1.359138700575964, + "learning_rate": 1.9987792258610475e-05, + "loss": 1.0649, + "step": 2620 + }, + { + "epoch": 0.0452896046447332, + "grad_norm": 0.9120536715139705, + "learning_rate": 1.9987764597993084e-05, + "loss": 0.6095, + "step": 2621 + }, + { + "epoch": 0.04530688415814211, + "grad_norm": 0.9775099453038604, + "learning_rate": 1.9987736906093258e-05, + "loss": 0.6305, + "step": 2622 + }, + { + "epoch": 0.04532416367155101, + "grad_norm": 1.1909155980654678, + "learning_rate": 1.9987709182911076e-05, + "loss": 0.874, + "step": 2623 + }, + { + "epoch": 0.04534144318495991, + "grad_norm": 1.0876408367162236, + "learning_rate": 1.9987681428446635e-05, + "loss": 0.9435, + "step": 2624 + }, + { + "epoch": 0.04535872269836881, + "grad_norm": 1.4775589680029477, + "learning_rate": 1.9987653642700015e-05, + "loss": 1.0965, + "step": 2625 + }, + { + "epoch": 0.04537600221177772, + "grad_norm": 0.791637971242099, + "learning_rate": 1.9987625825671305e-05, + "loss": 0.6121, + "step": 2626 + }, + { + "epoch": 0.04539328172518662, + "grad_norm": 1.3878710107440306, + "learning_rate": 1.9987597977360587e-05, + "loss": 0.8972, + "step": 2627 + }, + { + "epoch": 0.04541056123859552, + "grad_norm": 1.2115172542941386, + "learning_rate": 1.9987570097767957e-05, + "loss": 0.8653, + "step": 2628 + }, + { + "epoch": 0.04542784075200442, + "grad_norm": 1.250002828060874, + "learning_rate": 1.99875421868935e-05, + "loss": 0.8698, + "step": 2629 + }, + { + "epoch": 0.04544512026541333, + "grad_norm": 1.3924660368592883, + "learning_rate": 1.99875142447373e-05, + "loss": 0.7915, + "step": 2630 + }, + { + "epoch": 0.04546239977882223, + "grad_norm": 1.9677740495528193, + "learning_rate": 1.9987486271299445e-05, + "loss": 0.7916, + "step": 2631 + }, + { + "epoch": 0.04547967929223113, + "grad_norm": 1.1420962010225353, + "learning_rate": 1.9987458266580025e-05, + "loss": 0.724, + "step": 2632 + }, + { + "epoch": 0.04549695880564003, + "grad_norm": 1.3044462487466473, + "learning_rate": 1.998743023057913e-05, + "loss": 0.73, + "step": 2633 + }, + { + "epoch": 0.04551423831904894, + "grad_norm": 1.424280693354525, + "learning_rate": 1.998740216329684e-05, + "loss": 1.0233, + "step": 2634 + }, + { + "epoch": 0.04553151783245784, + "grad_norm": 1.4426413469152404, + "learning_rate": 1.9987374064733253e-05, + "loss": 0.8444, + "step": 2635 + }, + { + "epoch": 0.04554879734586674, + "grad_norm": 1.20974060812509, + "learning_rate": 1.9987345934888445e-05, + "loss": 0.8402, + "step": 2636 + }, + { + "epoch": 0.04556607685927564, + "grad_norm": 1.0780667977860856, + "learning_rate": 1.9987317773762514e-05, + "loss": 0.7589, + "step": 2637 + }, + { + "epoch": 0.04558335637268455, + "grad_norm": 1.3276043890732638, + "learning_rate": 1.998728958135554e-05, + "loss": 0.7903, + "step": 2638 + }, + { + "epoch": 0.04560063588609345, + "grad_norm": 1.3159517605285873, + "learning_rate": 1.998726135766762e-05, + "loss": 0.8492, + "step": 2639 + }, + { + "epoch": 0.04561791539950235, + "grad_norm": 1.3373416642008933, + "learning_rate": 1.9987233102698843e-05, + "loss": 0.8734, + "step": 2640 + }, + { + "epoch": 0.04563519491291125, + "grad_norm": 1.1526544363001463, + "learning_rate": 1.9987204816449286e-05, + "loss": 0.8622, + "step": 2641 + }, + { + "epoch": 0.04565247442632016, + "grad_norm": 1.398516492522069, + "learning_rate": 1.9987176498919047e-05, + "loss": 0.8911, + "step": 2642 + }, + { + "epoch": 0.04566975393972906, + "grad_norm": 1.0981808531368797, + "learning_rate": 1.998714815010821e-05, + "loss": 0.7778, + "step": 2643 + }, + { + "epoch": 0.04568703345313796, + "grad_norm": 1.1933609232358757, + "learning_rate": 1.998711977001687e-05, + "loss": 0.7858, + "step": 2644 + }, + { + "epoch": 0.04570431296654686, + "grad_norm": 1.6620037117095896, + "learning_rate": 1.9987091358645106e-05, + "loss": 0.8295, + "step": 2645 + }, + { + "epoch": 0.04572159247995576, + "grad_norm": 1.025106208325839, + "learning_rate": 1.998706291599302e-05, + "loss": 0.7272, + "step": 2646 + }, + { + "epoch": 0.04573887199336467, + "grad_norm": 1.0780065129193535, + "learning_rate": 1.9987034442060688e-05, + "loss": 1.0576, + "step": 2647 + }, + { + "epoch": 0.04575615150677357, + "grad_norm": 1.309118880866163, + "learning_rate": 1.998700593684821e-05, + "loss": 1.0137, + "step": 2648 + }, + { + "epoch": 0.04577343102018247, + "grad_norm": 1.485298061789168, + "learning_rate": 1.9986977400355666e-05, + "loss": 1.0152, + "step": 2649 + }, + { + "epoch": 0.04579071053359137, + "grad_norm": 1.4983937444982764, + "learning_rate": 1.9986948832583156e-05, + "loss": 0.9051, + "step": 2650 + }, + { + "epoch": 0.04580799004700028, + "grad_norm": 1.2865170571140103, + "learning_rate": 1.998692023353076e-05, + "loss": 0.7827, + "step": 2651 + }, + { + "epoch": 0.04582526956040918, + "grad_norm": 2.1432558489898543, + "learning_rate": 1.9986891603198568e-05, + "loss": 0.7244, + "step": 2652 + }, + { + "epoch": 0.04584254907381808, + "grad_norm": 0.8903629788753095, + "learning_rate": 1.9986862941586672e-05, + "loss": 0.6867, + "step": 2653 + }, + { + "epoch": 0.04585982858722698, + "grad_norm": 0.6381418231057266, + "learning_rate": 1.9986834248695168e-05, + "loss": 0.5425, + "step": 2654 + }, + { + "epoch": 0.04587710810063589, + "grad_norm": 1.2215094958555965, + "learning_rate": 1.9986805524524136e-05, + "loss": 0.6719, + "step": 2655 + }, + { + "epoch": 0.04589438761404479, + "grad_norm": 1.228272637450298, + "learning_rate": 1.998677676907367e-05, + "loss": 0.8841, + "step": 2656 + }, + { + "epoch": 0.04591166712745369, + "grad_norm": 1.2970700107081554, + "learning_rate": 1.998674798234386e-05, + "loss": 0.7876, + "step": 2657 + }, + { + "epoch": 0.04592894664086259, + "grad_norm": 1.013967022091346, + "learning_rate": 1.99867191643348e-05, + "loss": 0.5426, + "step": 2658 + }, + { + "epoch": 0.0459462261542715, + "grad_norm": 1.2546196161511345, + "learning_rate": 1.998669031504657e-05, + "loss": 0.9092, + "step": 2659 + }, + { + "epoch": 0.0459635056676804, + "grad_norm": 1.123847214923689, + "learning_rate": 1.9986661434479275e-05, + "loss": 0.7393, + "step": 2660 + }, + { + "epoch": 0.0459807851810893, + "grad_norm": 1.5164694181850662, + "learning_rate": 1.9986632522632993e-05, + "loss": 0.8378, + "step": 2661 + }, + { + "epoch": 0.0459980646944982, + "grad_norm": 1.3109318702594626, + "learning_rate": 1.998660357950782e-05, + "loss": 1.0073, + "step": 2662 + }, + { + "epoch": 0.04601534420790711, + "grad_norm": 1.2574675754403597, + "learning_rate": 1.998657460510385e-05, + "loss": 0.667, + "step": 2663 + }, + { + "epoch": 0.04603262372131601, + "grad_norm": 1.2888466596609807, + "learning_rate": 1.998654559942116e-05, + "loss": 0.7503, + "step": 2664 + }, + { + "epoch": 0.04604990323472491, + "grad_norm": 1.6876903461192811, + "learning_rate": 1.9986516562459858e-05, + "loss": 0.9871, + "step": 2665 + }, + { + "epoch": 0.04606718274813381, + "grad_norm": 1.2115917869404023, + "learning_rate": 1.998648749422002e-05, + "loss": 0.6532, + "step": 2666 + }, + { + "epoch": 0.04608446226154272, + "grad_norm": 1.8764619164244167, + "learning_rate": 1.9986458394701752e-05, + "loss": 1.0488, + "step": 2667 + }, + { + "epoch": 0.04610174177495162, + "grad_norm": 1.1405347835370983, + "learning_rate": 1.998642926390513e-05, + "loss": 0.6641, + "step": 2668 + }, + { + "epoch": 0.04611902128836052, + "grad_norm": 1.186223010503754, + "learning_rate": 1.998640010183026e-05, + "loss": 0.6393, + "step": 2669 + }, + { + "epoch": 0.04613630080176942, + "grad_norm": 1.270611092446953, + "learning_rate": 1.998637090847722e-05, + "loss": 0.8993, + "step": 2670 + }, + { + "epoch": 0.04615358031517833, + "grad_norm": 1.0002603079900236, + "learning_rate": 1.998634168384611e-05, + "loss": 0.6995, + "step": 2671 + }, + { + "epoch": 0.04617085982858723, + "grad_norm": 0.7277605579977813, + "learning_rate": 1.9986312427937018e-05, + "loss": 1.0395, + "step": 2672 + }, + { + "epoch": 0.04618813934199613, + "grad_norm": 1.7718563473298297, + "learning_rate": 1.9986283140750037e-05, + "loss": 0.9607, + "step": 2673 + }, + { + "epoch": 0.04620541885540503, + "grad_norm": 1.4592298118514282, + "learning_rate": 1.998625382228526e-05, + "loss": 0.9266, + "step": 2674 + }, + { + "epoch": 0.04622269836881394, + "grad_norm": 1.3731862225394584, + "learning_rate": 1.9986224472542775e-05, + "loss": 0.6899, + "step": 2675 + }, + { + "epoch": 0.04623997788222284, + "grad_norm": 0.9071544410298135, + "learning_rate": 1.9986195091522677e-05, + "loss": 0.8362, + "step": 2676 + }, + { + "epoch": 0.04625725739563174, + "grad_norm": 1.3191096502711106, + "learning_rate": 1.9986165679225057e-05, + "loss": 0.8034, + "step": 2677 + }, + { + "epoch": 0.04627453690904064, + "grad_norm": 1.3314559262108008, + "learning_rate": 1.9986136235650007e-05, + "loss": 0.87, + "step": 2678 + }, + { + "epoch": 0.04629181642244955, + "grad_norm": 0.9536005925406461, + "learning_rate": 1.998610676079762e-05, + "loss": 0.5515, + "step": 2679 + }, + { + "epoch": 0.04630909593585845, + "grad_norm": 1.7609122826735832, + "learning_rate": 1.9986077254667986e-05, + "loss": 0.9233, + "step": 2680 + }, + { + "epoch": 0.04632637544926735, + "grad_norm": 1.1080653943712033, + "learning_rate": 1.9986047717261198e-05, + "loss": 0.6495, + "step": 2681 + }, + { + "epoch": 0.04634365496267625, + "grad_norm": 0.7172062297048369, + "learning_rate": 1.9986018148577352e-05, + "loss": 0.9784, + "step": 2682 + }, + { + "epoch": 0.04636093447608516, + "grad_norm": 1.7892760911456658, + "learning_rate": 1.998598854861654e-05, + "loss": 0.7853, + "step": 2683 + }, + { + "epoch": 0.04637821398949406, + "grad_norm": 1.8886015370994749, + "learning_rate": 1.998595891737885e-05, + "loss": 0.7372, + "step": 2684 + }, + { + "epoch": 0.04639549350290296, + "grad_norm": 1.343668893800884, + "learning_rate": 1.9985929254864378e-05, + "loss": 0.8582, + "step": 2685 + }, + { + "epoch": 0.04641277301631186, + "grad_norm": 1.0843730220782395, + "learning_rate": 1.998589956107322e-05, + "loss": 0.9003, + "step": 2686 + }, + { + "epoch": 0.04643005252972076, + "grad_norm": 1.2502599088802353, + "learning_rate": 1.998586983600546e-05, + "loss": 0.7211, + "step": 2687 + }, + { + "epoch": 0.04644733204312967, + "grad_norm": 1.1117799731207583, + "learning_rate": 1.99858400796612e-05, + "loss": 0.6594, + "step": 2688 + }, + { + "epoch": 0.04646461155653857, + "grad_norm": 1.0689345716395362, + "learning_rate": 1.998581029204053e-05, + "loss": 0.7516, + "step": 2689 + }, + { + "epoch": 0.04648189106994747, + "grad_norm": 1.292554476401022, + "learning_rate": 1.998578047314354e-05, + "loss": 0.6854, + "step": 2690 + }, + { + "epoch": 0.04649917058335637, + "grad_norm": 0.7891893991277289, + "learning_rate": 1.998575062297033e-05, + "loss": 0.5403, + "step": 2691 + }, + { + "epoch": 0.04651645009676528, + "grad_norm": 1.1508964081149093, + "learning_rate": 1.998572074152099e-05, + "loss": 0.7612, + "step": 2692 + }, + { + "epoch": 0.04653372961017418, + "grad_norm": 1.2337717648633237, + "learning_rate": 1.9985690828795613e-05, + "loss": 0.7905, + "step": 2693 + }, + { + "epoch": 0.04655100912358308, + "grad_norm": 1.1132215093661335, + "learning_rate": 1.9985660884794294e-05, + "loss": 0.692, + "step": 2694 + }, + { + "epoch": 0.04656828863699198, + "grad_norm": 0.9393845194975933, + "learning_rate": 1.9985630909517127e-05, + "loss": 0.5349, + "step": 2695 + }, + { + "epoch": 0.04658556815040089, + "grad_norm": 1.1638042186853526, + "learning_rate": 1.9985600902964207e-05, + "loss": 0.7573, + "step": 2696 + }, + { + "epoch": 0.04660284766380979, + "grad_norm": 1.4979025354784545, + "learning_rate": 1.9985570865135622e-05, + "loss": 0.9095, + "step": 2697 + }, + { + "epoch": 0.04662012717721869, + "grad_norm": 1.372179062322976, + "learning_rate": 1.9985540796031474e-05, + "loss": 0.83, + "step": 2698 + }, + { + "epoch": 0.04663740669062759, + "grad_norm": 1.3930919962193022, + "learning_rate": 1.998551069565185e-05, + "loss": 0.6222, + "step": 2699 + }, + { + "epoch": 0.0466546862040365, + "grad_norm": 1.212079992167824, + "learning_rate": 1.9985480563996846e-05, + "loss": 0.7964, + "step": 2700 + }, + { + "epoch": 0.0466719657174454, + "grad_norm": 1.2210304602827502, + "learning_rate": 1.998545040106656e-05, + "loss": 0.6938, + "step": 2701 + }, + { + "epoch": 0.0466892452308543, + "grad_norm": 1.2028318716374469, + "learning_rate": 1.9985420206861087e-05, + "loss": 0.5382, + "step": 2702 + }, + { + "epoch": 0.0467065247442632, + "grad_norm": 0.8113283383908082, + "learning_rate": 1.9985389981380518e-05, + "loss": 0.6914, + "step": 2703 + }, + { + "epoch": 0.04672380425767211, + "grad_norm": 1.1888572659728773, + "learning_rate": 1.9985359724624948e-05, + "loss": 0.837, + "step": 2704 + }, + { + "epoch": 0.04674108377108101, + "grad_norm": 0.8130438523835091, + "learning_rate": 1.9985329436594472e-05, + "loss": 0.622, + "step": 2705 + }, + { + "epoch": 0.04675836328448991, + "grad_norm": 1.1210562181606565, + "learning_rate": 1.9985299117289185e-05, + "loss": 0.6881, + "step": 2706 + }, + { + "epoch": 0.04677564279789881, + "grad_norm": 1.5368308643658086, + "learning_rate": 1.998526876670918e-05, + "loss": 0.8591, + "step": 2707 + }, + { + "epoch": 0.046792922311307716, + "grad_norm": 1.4972070582019223, + "learning_rate": 1.998523838485456e-05, + "loss": 0.7737, + "step": 2708 + }, + { + "epoch": 0.04681020182471662, + "grad_norm": 1.4197262257568828, + "learning_rate": 1.998520797172541e-05, + "loss": 0.8539, + "step": 2709 + }, + { + "epoch": 0.04682748133812552, + "grad_norm": 1.1609599854224155, + "learning_rate": 1.9985177527321832e-05, + "loss": 0.7193, + "step": 2710 + }, + { + "epoch": 0.04684476085153442, + "grad_norm": 0.9807760076599212, + "learning_rate": 1.9985147051643922e-05, + "loss": 0.6283, + "step": 2711 + }, + { + "epoch": 0.046862040364943326, + "grad_norm": 1.7522482170392106, + "learning_rate": 1.9985116544691767e-05, + "loss": 0.7444, + "step": 2712 + }, + { + "epoch": 0.04687931987835223, + "grad_norm": 1.293278018616671, + "learning_rate": 1.998508600646547e-05, + "loss": 0.6296, + "step": 2713 + }, + { + "epoch": 0.04689659939176113, + "grad_norm": 1.5110172695860487, + "learning_rate": 1.9985055436965127e-05, + "loss": 0.8629, + "step": 2714 + }, + { + "epoch": 0.04691387890517003, + "grad_norm": 1.1343150109189055, + "learning_rate": 1.998502483619083e-05, + "loss": 0.8737, + "step": 2715 + }, + { + "epoch": 0.046931158418578936, + "grad_norm": 1.4448455642273608, + "learning_rate": 1.9984994204142675e-05, + "loss": 0.972, + "step": 2716 + }, + { + "epoch": 0.04694843793198784, + "grad_norm": 0.9831752950101116, + "learning_rate": 1.998496354082076e-05, + "loss": 0.7172, + "step": 2717 + }, + { + "epoch": 0.04696571744539674, + "grad_norm": 1.3503429790709938, + "learning_rate": 1.998493284622518e-05, + "loss": 0.7843, + "step": 2718 + }, + { + "epoch": 0.04698299695880564, + "grad_norm": 1.126097564699691, + "learning_rate": 1.9984902120356034e-05, + "loss": 0.7074, + "step": 2719 + }, + { + "epoch": 0.047000276472214546, + "grad_norm": 1.5055310560536685, + "learning_rate": 1.9984871363213413e-05, + "loss": 0.8493, + "step": 2720 + }, + { + "epoch": 0.047017555985623447, + "grad_norm": 0.9612056175448357, + "learning_rate": 1.998484057479742e-05, + "loss": 0.7567, + "step": 2721 + }, + { + "epoch": 0.04703483549903235, + "grad_norm": 0.9505322707746741, + "learning_rate": 1.9984809755108144e-05, + "loss": 0.6193, + "step": 2722 + }, + { + "epoch": 0.04705211501244125, + "grad_norm": 1.1419373838656008, + "learning_rate": 1.9984778904145684e-05, + "loss": 0.6128, + "step": 2723 + }, + { + "epoch": 0.04706939452585015, + "grad_norm": 1.0667406957355716, + "learning_rate": 1.998474802191014e-05, + "loss": 1.0186, + "step": 2724 + }, + { + "epoch": 0.047086674039259056, + "grad_norm": 1.808554210312187, + "learning_rate": 1.9984717108401605e-05, + "loss": 1.2665, + "step": 2725 + }, + { + "epoch": 0.04710395355266796, + "grad_norm": 2.1142570945897092, + "learning_rate": 1.998468616362018e-05, + "loss": 0.8048, + "step": 2726 + }, + { + "epoch": 0.04712123306607686, + "grad_norm": 1.4026161789240195, + "learning_rate": 1.9984655187565955e-05, + "loss": 0.4863, + "step": 2727 + }, + { + "epoch": 0.04713851257948576, + "grad_norm": 1.3425889495746475, + "learning_rate": 1.9984624180239034e-05, + "loss": 0.5535, + "step": 2728 + }, + { + "epoch": 0.047155792092894666, + "grad_norm": 1.3944155121266617, + "learning_rate": 1.998459314163951e-05, + "loss": 0.8984, + "step": 2729 + }, + { + "epoch": 0.04717307160630357, + "grad_norm": 1.269382677003938, + "learning_rate": 1.9984562071767483e-05, + "loss": 0.83, + "step": 2730 + }, + { + "epoch": 0.04719035111971247, + "grad_norm": 1.6889184764153677, + "learning_rate": 1.9984530970623047e-05, + "loss": 0.9032, + "step": 2731 + }, + { + "epoch": 0.04720763063312137, + "grad_norm": 1.4866645592706451, + "learning_rate": 1.99844998382063e-05, + "loss": 1.0071, + "step": 2732 + }, + { + "epoch": 0.047224910146530276, + "grad_norm": 1.07609362086497, + "learning_rate": 1.9984468674517344e-05, + "loss": 0.8645, + "step": 2733 + }, + { + "epoch": 0.04724218965993918, + "grad_norm": 1.3219723582146905, + "learning_rate": 1.9984437479556274e-05, + "loss": 0.7904, + "step": 2734 + }, + { + "epoch": 0.04725946917334808, + "grad_norm": 1.261755948843255, + "learning_rate": 1.9984406253323185e-05, + "loss": 0.8569, + "step": 2735 + }, + { + "epoch": 0.04727674868675698, + "grad_norm": 1.352369334826395, + "learning_rate": 1.9984374995818174e-05, + "loss": 0.9361, + "step": 2736 + }, + { + "epoch": 0.047294028200165886, + "grad_norm": 1.357768213421408, + "learning_rate": 1.9984343707041348e-05, + "loss": 0.8041, + "step": 2737 + }, + { + "epoch": 0.047311307713574786, + "grad_norm": 1.5528429912776158, + "learning_rate": 1.9984312386992793e-05, + "loss": 0.8632, + "step": 2738 + }, + { + "epoch": 0.04732858722698369, + "grad_norm": 1.6050475457379074, + "learning_rate": 1.9984281035672615e-05, + "loss": 0.9483, + "step": 2739 + }, + { + "epoch": 0.04734586674039259, + "grad_norm": 1.0521459213577307, + "learning_rate": 1.998424965308091e-05, + "loss": 0.81, + "step": 2740 + }, + { + "epoch": 0.047363146253801496, + "grad_norm": 1.154192309762861, + "learning_rate": 1.9984218239217778e-05, + "loss": 0.8965, + "step": 2741 + }, + { + "epoch": 0.047380425767210396, + "grad_norm": 1.3008443941532641, + "learning_rate": 1.9984186794083314e-05, + "loss": 0.8574, + "step": 2742 + }, + { + "epoch": 0.0473977052806193, + "grad_norm": 1.274709910485245, + "learning_rate": 1.9984155317677618e-05, + "loss": 0.8833, + "step": 2743 + }, + { + "epoch": 0.0474149847940282, + "grad_norm": 1.257752432318984, + "learning_rate": 1.998412381000079e-05, + "loss": 0.7761, + "step": 2744 + }, + { + "epoch": 0.047432264307437105, + "grad_norm": 1.0934565657490611, + "learning_rate": 1.9984092271052927e-05, + "loss": 0.6536, + "step": 2745 + }, + { + "epoch": 0.047449543820846006, + "grad_norm": 1.2976645917353336, + "learning_rate": 1.9984060700834127e-05, + "loss": 0.9935, + "step": 2746 + }, + { + "epoch": 0.04746682333425491, + "grad_norm": 1.3732535274943234, + "learning_rate": 1.998402909934449e-05, + "loss": 1.0004, + "step": 2747 + }, + { + "epoch": 0.04748410284766381, + "grad_norm": 1.1530111648247114, + "learning_rate": 1.9983997466584117e-05, + "loss": 0.8696, + "step": 2748 + }, + { + "epoch": 0.047501382361072715, + "grad_norm": 1.1125118481135114, + "learning_rate": 1.9983965802553104e-05, + "loss": 0.6041, + "step": 2749 + }, + { + "epoch": 0.047518661874481616, + "grad_norm": 1.3660998143649392, + "learning_rate": 1.998393410725155e-05, + "loss": 0.8623, + "step": 2750 + }, + { + "epoch": 0.04753594138789052, + "grad_norm": 1.6364924024729075, + "learning_rate": 1.998390238067956e-05, + "loss": 0.8497, + "step": 2751 + }, + { + "epoch": 0.04755322090129942, + "grad_norm": 1.1408588595149567, + "learning_rate": 1.9983870622837226e-05, + "loss": 0.7933, + "step": 2752 + }, + { + "epoch": 0.047570500414708325, + "grad_norm": 1.4268023933386376, + "learning_rate": 1.9983838833724647e-05, + "loss": 1.0629, + "step": 2753 + }, + { + "epoch": 0.047587779928117226, + "grad_norm": 1.356092476443465, + "learning_rate": 1.998380701334193e-05, + "loss": 0.7425, + "step": 2754 + }, + { + "epoch": 0.047605059441526126, + "grad_norm": 1.2218019208823354, + "learning_rate": 1.9983775161689172e-05, + "loss": 0.7585, + "step": 2755 + }, + { + "epoch": 0.04762233895493503, + "grad_norm": 1.1739130534042284, + "learning_rate": 1.998374327876647e-05, + "loss": 0.8368, + "step": 2756 + }, + { + "epoch": 0.047639618468343935, + "grad_norm": 0.7250397075381344, + "learning_rate": 1.9983711364573924e-05, + "loss": 0.5765, + "step": 2757 + }, + { + "epoch": 0.047656897981752835, + "grad_norm": 1.254909918391736, + "learning_rate": 1.9983679419111637e-05, + "loss": 0.8307, + "step": 2758 + }, + { + "epoch": 0.047674177495161736, + "grad_norm": 1.3314911118584745, + "learning_rate": 1.9983647442379706e-05, + "loss": 0.7868, + "step": 2759 + }, + { + "epoch": 0.04769145700857064, + "grad_norm": 1.1103617406728457, + "learning_rate": 1.9983615434378235e-05, + "loss": 1.0025, + "step": 2760 + }, + { + "epoch": 0.047708736521979545, + "grad_norm": 1.3496124837450378, + "learning_rate": 1.9983583395107317e-05, + "loss": 0.7817, + "step": 2761 + }, + { + "epoch": 0.047726016035388445, + "grad_norm": 1.580579355589003, + "learning_rate": 1.9983551324567063e-05, + "loss": 0.8123, + "step": 2762 + }, + { + "epoch": 0.047743295548797346, + "grad_norm": 1.1561957554340547, + "learning_rate": 1.9983519222757564e-05, + "loss": 0.7065, + "step": 2763 + }, + { + "epoch": 0.04776057506220625, + "grad_norm": 1.2997313270968198, + "learning_rate": 1.9983487089678926e-05, + "loss": 1.0561, + "step": 2764 + }, + { + "epoch": 0.04777785457561515, + "grad_norm": 1.4058321209002609, + "learning_rate": 1.9983454925331243e-05, + "loss": 0.6512, + "step": 2765 + }, + { + "epoch": 0.047795134089024055, + "grad_norm": 1.1705256867825033, + "learning_rate": 1.9983422729714624e-05, + "loss": 0.6461, + "step": 2766 + }, + { + "epoch": 0.047812413602432956, + "grad_norm": 1.3268642797496375, + "learning_rate": 1.9983390502829168e-05, + "loss": 0.5928, + "step": 2767 + }, + { + "epoch": 0.047829693115841856, + "grad_norm": 1.692480862560549, + "learning_rate": 1.998335824467497e-05, + "loss": 0.8472, + "step": 2768 + }, + { + "epoch": 0.04784697262925076, + "grad_norm": 1.1842812038892552, + "learning_rate": 1.998332595525214e-05, + "loss": 0.5954, + "step": 2769 + }, + { + "epoch": 0.047864252142659665, + "grad_norm": 1.5856919753212224, + "learning_rate": 1.9983293634560773e-05, + "loss": 0.9023, + "step": 2770 + }, + { + "epoch": 0.047881531656068566, + "grad_norm": 1.1299006354480228, + "learning_rate": 1.998326128260097e-05, + "loss": 0.4336, + "step": 2771 + }, + { + "epoch": 0.047898811169477466, + "grad_norm": 1.1400539908505192, + "learning_rate": 1.9983228899372836e-05, + "loss": 0.6552, + "step": 2772 + }, + { + "epoch": 0.04791609068288637, + "grad_norm": 1.5202774844976783, + "learning_rate": 1.998319648487647e-05, + "loss": 0.9655, + "step": 2773 + }, + { + "epoch": 0.047933370196295275, + "grad_norm": 1.0300719169576271, + "learning_rate": 1.998316403911197e-05, + "loss": 0.5227, + "step": 2774 + }, + { + "epoch": 0.047950649709704175, + "grad_norm": 1.4997654065893407, + "learning_rate": 1.9983131562079446e-05, + "loss": 0.8129, + "step": 2775 + }, + { + "epoch": 0.047967929223113076, + "grad_norm": 0.982531532811753, + "learning_rate": 1.998309905377899e-05, + "loss": 0.6277, + "step": 2776 + }, + { + "epoch": 0.04798520873652198, + "grad_norm": 1.6056230824060485, + "learning_rate": 1.9983066514210713e-05, + "loss": 0.7162, + "step": 2777 + }, + { + "epoch": 0.048002488249930884, + "grad_norm": 1.2936737080596392, + "learning_rate": 1.9983033943374714e-05, + "loss": 0.7547, + "step": 2778 + }, + { + "epoch": 0.048019767763339785, + "grad_norm": 1.6150816682858324, + "learning_rate": 1.9983001341271093e-05, + "loss": 0.7007, + "step": 2779 + }, + { + "epoch": 0.048037047276748686, + "grad_norm": 1.4248276964696611, + "learning_rate": 1.9982968707899953e-05, + "loss": 1.1396, + "step": 2780 + }, + { + "epoch": 0.04805432679015759, + "grad_norm": 1.1679787222592681, + "learning_rate": 1.9982936043261394e-05, + "loss": 0.7786, + "step": 2781 + }, + { + "epoch": 0.048071606303566494, + "grad_norm": 1.2252567875447609, + "learning_rate": 1.9982903347355526e-05, + "loss": 0.9039, + "step": 2782 + }, + { + "epoch": 0.048088885816975395, + "grad_norm": 1.2274241066778198, + "learning_rate": 1.9982870620182443e-05, + "loss": 0.7861, + "step": 2783 + }, + { + "epoch": 0.048106165330384296, + "grad_norm": 1.1477527197810424, + "learning_rate": 1.998283786174225e-05, + "loss": 0.7111, + "step": 2784 + }, + { + "epoch": 0.048123444843793196, + "grad_norm": 1.2048736023122006, + "learning_rate": 1.998280507203505e-05, + "loss": 0.7494, + "step": 2785 + }, + { + "epoch": 0.048140724357202104, + "grad_norm": 1.0921003028623502, + "learning_rate": 1.9982772251060946e-05, + "loss": 0.7815, + "step": 2786 + }, + { + "epoch": 0.048158003870611005, + "grad_norm": 0.7675916053379125, + "learning_rate": 1.998273939882004e-05, + "loss": 0.6198, + "step": 2787 + }, + { + "epoch": 0.048175283384019905, + "grad_norm": 1.2276706958590917, + "learning_rate": 1.998270651531244e-05, + "loss": 0.7323, + "step": 2788 + }, + { + "epoch": 0.048192562897428806, + "grad_norm": 1.1728877437569092, + "learning_rate": 1.9982673600538242e-05, + "loss": 0.8529, + "step": 2789 + }, + { + "epoch": 0.048209842410837714, + "grad_norm": 1.3482847504577897, + "learning_rate": 1.998264065449755e-05, + "loss": 0.681, + "step": 2790 + }, + { + "epoch": 0.048227121924246615, + "grad_norm": 2.1206983362279512, + "learning_rate": 1.9982607677190468e-05, + "loss": 0.4088, + "step": 2791 + }, + { + "epoch": 0.048244401437655515, + "grad_norm": 1.0375562682878308, + "learning_rate": 1.99825746686171e-05, + "loss": 0.6072, + "step": 2792 + }, + { + "epoch": 0.048261680951064416, + "grad_norm": 1.8115468512969177, + "learning_rate": 1.998254162877755e-05, + "loss": 0.7338, + "step": 2793 + }, + { + "epoch": 0.048278960464473324, + "grad_norm": 1.3119137511108085, + "learning_rate": 1.9982508557671926e-05, + "loss": 0.7781, + "step": 2794 + }, + { + "epoch": 0.048296239977882224, + "grad_norm": 1.6865644374147377, + "learning_rate": 1.998247545530032e-05, + "loss": 0.7506, + "step": 2795 + }, + { + "epoch": 0.048313519491291125, + "grad_norm": 0.9078025377038043, + "learning_rate": 1.9982442321662847e-05, + "loss": 0.7004, + "step": 2796 + }, + { + "epoch": 0.048330799004700026, + "grad_norm": 0.9733041340546531, + "learning_rate": 1.9982409156759603e-05, + "loss": 0.5482, + "step": 2797 + }, + { + "epoch": 0.04834807851810893, + "grad_norm": 1.4150443823294243, + "learning_rate": 1.9982375960590696e-05, + "loss": 0.8638, + "step": 2798 + }, + { + "epoch": 0.048365358031517834, + "grad_norm": 0.7927241310076748, + "learning_rate": 1.9982342733156227e-05, + "loss": 0.5444, + "step": 2799 + }, + { + "epoch": 0.048382637544926735, + "grad_norm": 1.3872981293346551, + "learning_rate": 1.9982309474456305e-05, + "loss": 0.6288, + "step": 2800 + }, + { + "epoch": 0.048399917058335636, + "grad_norm": 1.3421733388486234, + "learning_rate": 1.998227618449103e-05, + "loss": 0.9829, + "step": 2801 + }, + { + "epoch": 0.048417196571744536, + "grad_norm": 0.8656848287175226, + "learning_rate": 1.9982242863260508e-05, + "loss": 0.5892, + "step": 2802 + }, + { + "epoch": 0.048434476085153444, + "grad_norm": 1.162348794004432, + "learning_rate": 1.9982209510764843e-05, + "loss": 0.7775, + "step": 2803 + }, + { + "epoch": 0.048451755598562345, + "grad_norm": 1.6327379938101805, + "learning_rate": 1.9982176127004135e-05, + "loss": 0.8897, + "step": 2804 + }, + { + "epoch": 0.048469035111971245, + "grad_norm": 1.216867415462071, + "learning_rate": 1.99821427119785e-05, + "loss": 0.933, + "step": 2805 + }, + { + "epoch": 0.048486314625380146, + "grad_norm": 1.1597208099783374, + "learning_rate": 1.998210926568803e-05, + "loss": 0.8251, + "step": 2806 + }, + { + "epoch": 0.048503594138789054, + "grad_norm": 1.0800066281761156, + "learning_rate": 1.9982075788132835e-05, + "loss": 0.6761, + "step": 2807 + }, + { + "epoch": 0.048520873652197954, + "grad_norm": 0.9670957641937779, + "learning_rate": 1.9982042279313022e-05, + "loss": 0.6121, + "step": 2808 + }, + { + "epoch": 0.048538153165606855, + "grad_norm": 1.5267957146775932, + "learning_rate": 1.9982008739228694e-05, + "loss": 0.6538, + "step": 2809 + }, + { + "epoch": 0.048555432679015756, + "grad_norm": 1.3650563928394353, + "learning_rate": 1.9981975167879956e-05, + "loss": 0.7519, + "step": 2810 + }, + { + "epoch": 0.048572712192424664, + "grad_norm": 1.7161652059440786, + "learning_rate": 1.998194156526691e-05, + "loss": 0.7829, + "step": 2811 + }, + { + "epoch": 0.048589991705833564, + "grad_norm": 1.0159757497117825, + "learning_rate": 1.998190793138967e-05, + "loss": 0.7427, + "step": 2812 + }, + { + "epoch": 0.048607271219242465, + "grad_norm": 1.1090371341446876, + "learning_rate": 1.9981874266248332e-05, + "loss": 0.685, + "step": 2813 + }, + { + "epoch": 0.048624550732651366, + "grad_norm": 1.0635118021827654, + "learning_rate": 1.9981840569843006e-05, + "loss": 0.7967, + "step": 2814 + }, + { + "epoch": 0.04864183024606027, + "grad_norm": 1.307506447446229, + "learning_rate": 1.9981806842173796e-05, + "loss": 0.6434, + "step": 2815 + }, + { + "epoch": 0.048659109759469174, + "grad_norm": 1.0858553309587737, + "learning_rate": 1.9981773083240806e-05, + "loss": 0.6635, + "step": 2816 + }, + { + "epoch": 0.048676389272878075, + "grad_norm": 0.9873200198737053, + "learning_rate": 1.998173929304415e-05, + "loss": 0.5771, + "step": 2817 + }, + { + "epoch": 0.048693668786286975, + "grad_norm": 1.4487619385971142, + "learning_rate": 1.9981705471583926e-05, + "loss": 0.6742, + "step": 2818 + }, + { + "epoch": 0.04871094829969588, + "grad_norm": 1.151866702383325, + "learning_rate": 1.9981671618860238e-05, + "loss": 0.7388, + "step": 2819 + }, + { + "epoch": 0.048728227813104784, + "grad_norm": 1.3785994289842824, + "learning_rate": 1.99816377348732e-05, + "loss": 0.5194, + "step": 2820 + }, + { + "epoch": 0.048745507326513685, + "grad_norm": 2.2696107030533246, + "learning_rate": 1.9981603819622914e-05, + "loss": 1.0237, + "step": 2821 + }, + { + "epoch": 0.048762786839922585, + "grad_norm": 1.3109045213409627, + "learning_rate": 1.9981569873109484e-05, + "loss": 0.6989, + "step": 2822 + }, + { + "epoch": 0.04878006635333149, + "grad_norm": 1.4271146648918684, + "learning_rate": 1.9981535895333023e-05, + "loss": 0.8688, + "step": 2823 + }, + { + "epoch": 0.048797345866740394, + "grad_norm": 1.4492626665644086, + "learning_rate": 1.9981501886293627e-05, + "loss": 0.7465, + "step": 2824 + }, + { + "epoch": 0.048814625380149294, + "grad_norm": 1.278805842842838, + "learning_rate": 1.998146784599141e-05, + "loss": 0.7195, + "step": 2825 + }, + { + "epoch": 0.048831904893558195, + "grad_norm": 1.6812537694160015, + "learning_rate": 1.998143377442648e-05, + "loss": 1.0779, + "step": 2826 + }, + { + "epoch": 0.0488491844069671, + "grad_norm": 1.1978829265844413, + "learning_rate": 1.998139967159894e-05, + "loss": 0.8467, + "step": 2827 + }, + { + "epoch": 0.048866463920376, + "grad_norm": 1.4372546988549062, + "learning_rate": 1.9981365537508896e-05, + "loss": 0.7898, + "step": 2828 + }, + { + "epoch": 0.048883743433784904, + "grad_norm": 1.2600337760121756, + "learning_rate": 1.9981331372156457e-05, + "loss": 0.6624, + "step": 2829 + }, + { + "epoch": 0.048901022947193805, + "grad_norm": 1.2937796165966586, + "learning_rate": 1.998129717554173e-05, + "loss": 0.8123, + "step": 2830 + }, + { + "epoch": 0.04891830246060271, + "grad_norm": 1.3598473895660594, + "learning_rate": 1.9981262947664823e-05, + "loss": 0.8438, + "step": 2831 + }, + { + "epoch": 0.04893558197401161, + "grad_norm": 1.2767790196208473, + "learning_rate": 1.9981228688525843e-05, + "loss": 0.8443, + "step": 2832 + }, + { + "epoch": 0.048952861487420514, + "grad_norm": 1.141821012104633, + "learning_rate": 1.9981194398124894e-05, + "loss": 0.7692, + "step": 2833 + }, + { + "epoch": 0.048970141000829415, + "grad_norm": 1.1330226648302555, + "learning_rate": 1.9981160076462083e-05, + "loss": 0.4793, + "step": 2834 + }, + { + "epoch": 0.04898742051423832, + "grad_norm": 1.2977223390296915, + "learning_rate": 1.9981125723537525e-05, + "loss": 0.653, + "step": 2835 + }, + { + "epoch": 0.04900470002764722, + "grad_norm": 1.2953607035280947, + "learning_rate": 1.998109133935132e-05, + "loss": 0.8257, + "step": 2836 + }, + { + "epoch": 0.049021979541056124, + "grad_norm": 1.2392763627894035, + "learning_rate": 1.998105692390358e-05, + "loss": 0.6461, + "step": 2837 + }, + { + "epoch": 0.049039259054465024, + "grad_norm": 1.2855334432705137, + "learning_rate": 1.998102247719441e-05, + "loss": 0.9111, + "step": 2838 + }, + { + "epoch": 0.04905653856787393, + "grad_norm": 1.7377835749191064, + "learning_rate": 1.998098799922392e-05, + "loss": 1.0444, + "step": 2839 + }, + { + "epoch": 0.04907381808128283, + "grad_norm": 1.0687678980382258, + "learning_rate": 1.9980953489992214e-05, + "loss": 0.6864, + "step": 2840 + }, + { + "epoch": 0.049091097594691734, + "grad_norm": 0.836738646972469, + "learning_rate": 1.9980918949499403e-05, + "loss": 0.8528, + "step": 2841 + }, + { + "epoch": 0.049108377108100634, + "grad_norm": 1.1436873813271407, + "learning_rate": 1.9980884377745598e-05, + "loss": 0.6774, + "step": 2842 + }, + { + "epoch": 0.049125656621509535, + "grad_norm": 1.255726757550884, + "learning_rate": 1.9980849774730907e-05, + "loss": 0.6572, + "step": 2843 + }, + { + "epoch": 0.04914293613491844, + "grad_norm": 0.9914002982885415, + "learning_rate": 1.998081514045543e-05, + "loss": 0.5624, + "step": 2844 + }, + { + "epoch": 0.04916021564832734, + "grad_norm": 0.997815678278172, + "learning_rate": 1.9980780474919287e-05, + "loss": 0.6824, + "step": 2845 + }, + { + "epoch": 0.049177495161736244, + "grad_norm": 1.3077487499108542, + "learning_rate": 1.9980745778122578e-05, + "loss": 1.0665, + "step": 2846 + }, + { + "epoch": 0.049194774675145145, + "grad_norm": 1.4688960975537404, + "learning_rate": 1.9980711050065414e-05, + "loss": 0.9191, + "step": 2847 + }, + { + "epoch": 0.04921205418855405, + "grad_norm": 1.2128030529071425, + "learning_rate": 1.9980676290747906e-05, + "loss": 0.7605, + "step": 2848 + }, + { + "epoch": 0.04922933370196295, + "grad_norm": 1.3467484389424162, + "learning_rate": 1.9980641500170162e-05, + "loss": 0.7419, + "step": 2849 + }, + { + "epoch": 0.049246613215371854, + "grad_norm": 1.2576803490720976, + "learning_rate": 1.998060667833229e-05, + "loss": 0.5876, + "step": 2850 + }, + { + "epoch": 0.049263892728780755, + "grad_norm": 1.6482950851403642, + "learning_rate": 1.99805718252344e-05, + "loss": 0.8377, + "step": 2851 + }, + { + "epoch": 0.04928117224218966, + "grad_norm": 1.3241840471922166, + "learning_rate": 1.99805369408766e-05, + "loss": 1.0031, + "step": 2852 + }, + { + "epoch": 0.04929845175559856, + "grad_norm": 1.676883939193605, + "learning_rate": 1.9980502025258995e-05, + "loss": 0.9451, + "step": 2853 + }, + { + "epoch": 0.049315731269007464, + "grad_norm": 0.940638495537326, + "learning_rate": 1.9980467078381704e-05, + "loss": 0.6785, + "step": 2854 + }, + { + "epoch": 0.049333010782416364, + "grad_norm": 1.491035450398201, + "learning_rate": 1.998043210024483e-05, + "loss": 0.9045, + "step": 2855 + }, + { + "epoch": 0.04935029029582527, + "grad_norm": 1.1663600257348579, + "learning_rate": 1.9980397090848486e-05, + "loss": 0.9334, + "step": 2856 + }, + { + "epoch": 0.04936756980923417, + "grad_norm": 1.1521865673476308, + "learning_rate": 1.9980362050192783e-05, + "loss": 1.072, + "step": 2857 + }, + { + "epoch": 0.049384849322643073, + "grad_norm": 1.4151823058469188, + "learning_rate": 1.9980326978277823e-05, + "loss": 0.9573, + "step": 2858 + }, + { + "epoch": 0.049402128836051974, + "grad_norm": 1.1770304984165596, + "learning_rate": 1.9980291875103722e-05, + "loss": 0.7492, + "step": 2859 + }, + { + "epoch": 0.04941940834946088, + "grad_norm": 1.1134700418738288, + "learning_rate": 1.9980256740670585e-05, + "loss": 0.7268, + "step": 2860 + }, + { + "epoch": 0.04943668786286978, + "grad_norm": 1.0342840466150116, + "learning_rate": 1.9980221574978527e-05, + "loss": 0.6874, + "step": 2861 + }, + { + "epoch": 0.04945396737627868, + "grad_norm": 1.1560668170017578, + "learning_rate": 1.998018637802766e-05, + "loss": 0.8454, + "step": 2862 + }, + { + "epoch": 0.049471246889687584, + "grad_norm": 1.448195807971272, + "learning_rate": 1.9980151149818087e-05, + "loss": 0.8548, + "step": 2863 + }, + { + "epoch": 0.04948852640309649, + "grad_norm": 1.2305591132050324, + "learning_rate": 1.9980115890349924e-05, + "loss": 0.7277, + "step": 2864 + }, + { + "epoch": 0.04950580591650539, + "grad_norm": 1.2264623753858832, + "learning_rate": 1.9980080599623277e-05, + "loss": 0.8405, + "step": 2865 + }, + { + "epoch": 0.04952308542991429, + "grad_norm": 1.2835834635817436, + "learning_rate": 1.998004527763826e-05, + "loss": 0.8195, + "step": 2866 + }, + { + "epoch": 0.049540364943323194, + "grad_norm": 1.2163632563609603, + "learning_rate": 1.9980009924394987e-05, + "loss": 0.9694, + "step": 2867 + }, + { + "epoch": 0.0495576444567321, + "grad_norm": 1.2245118762992058, + "learning_rate": 1.997997453989356e-05, + "loss": 0.8067, + "step": 2868 + }, + { + "epoch": 0.049574923970141, + "grad_norm": 0.9054491091557342, + "learning_rate": 1.9979939124134096e-05, + "loss": 0.6548, + "step": 2869 + }, + { + "epoch": 0.0495922034835499, + "grad_norm": 0.7804890880870543, + "learning_rate": 1.9979903677116705e-05, + "loss": 0.9205, + "step": 2870 + }, + { + "epoch": 0.049609482996958804, + "grad_norm": 0.8808894035870747, + "learning_rate": 1.9979868198841493e-05, + "loss": 0.8639, + "step": 2871 + }, + { + "epoch": 0.04962676251036771, + "grad_norm": 1.4899992915131535, + "learning_rate": 1.9979832689308577e-05, + "loss": 1.0354, + "step": 2872 + }, + { + "epoch": 0.04964404202377661, + "grad_norm": 1.8366739930806595, + "learning_rate": 1.997979714851807e-05, + "loss": 0.7447, + "step": 2873 + }, + { + "epoch": 0.04966132153718551, + "grad_norm": 1.1382944913750102, + "learning_rate": 1.997976157647008e-05, + "loss": 0.6138, + "step": 2874 + }, + { + "epoch": 0.04967860105059441, + "grad_norm": 1.3052319821619784, + "learning_rate": 1.9979725973164714e-05, + "loss": 0.9648, + "step": 2875 + }, + { + "epoch": 0.04969588056400332, + "grad_norm": 1.2346220901031868, + "learning_rate": 1.997969033860209e-05, + "loss": 0.8952, + "step": 2876 + }, + { + "epoch": 0.04971316007741222, + "grad_norm": 1.2788264482604494, + "learning_rate": 1.9979654672782317e-05, + "loss": 0.8645, + "step": 2877 + }, + { + "epoch": 0.04973043959082112, + "grad_norm": 1.1373829919247311, + "learning_rate": 1.997961897570551e-05, + "loss": 0.6927, + "step": 2878 + }, + { + "epoch": 0.04974771910423002, + "grad_norm": 1.1178197826166187, + "learning_rate": 1.9979583247371775e-05, + "loss": 0.8692, + "step": 2879 + }, + { + "epoch": 0.049764998617638924, + "grad_norm": 1.5156657989314737, + "learning_rate": 1.9979547487781225e-05, + "loss": 0.8971, + "step": 2880 + }, + { + "epoch": 0.04978227813104783, + "grad_norm": 1.1730191257749678, + "learning_rate": 1.9979511696933977e-05, + "loss": 0.8336, + "step": 2881 + }, + { + "epoch": 0.04979955764445673, + "grad_norm": 1.2551154758074827, + "learning_rate": 1.9979475874830138e-05, + "loss": 0.8761, + "step": 2882 + }, + { + "epoch": 0.04981683715786563, + "grad_norm": 0.8847111129360762, + "learning_rate": 1.9979440021469822e-05, + "loss": 0.7052, + "step": 2883 + }, + { + "epoch": 0.049834116671274534, + "grad_norm": 1.3610442121364525, + "learning_rate": 1.9979404136853143e-05, + "loss": 1.0013, + "step": 2884 + }, + { + "epoch": 0.04985139618468344, + "grad_norm": 1.0437223584612068, + "learning_rate": 1.9979368220980213e-05, + "loss": 0.4126, + "step": 2885 + }, + { + "epoch": 0.04986867569809234, + "grad_norm": 1.0472285408266178, + "learning_rate": 1.9979332273851144e-05, + "loss": 0.5518, + "step": 2886 + }, + { + "epoch": 0.04988595521150124, + "grad_norm": 1.2607102644978763, + "learning_rate": 1.9979296295466044e-05, + "loss": 0.8141, + "step": 2887 + }, + { + "epoch": 0.049903234724910144, + "grad_norm": 1.340041540118832, + "learning_rate": 1.997926028582503e-05, + "loss": 0.87, + "step": 2888 + }, + { + "epoch": 0.04992051423831905, + "grad_norm": 1.7811785716455866, + "learning_rate": 1.9979224244928215e-05, + "loss": 0.8865, + "step": 2889 + }, + { + "epoch": 0.04993779375172795, + "grad_norm": 1.5167384990700172, + "learning_rate": 1.9979188172775714e-05, + "loss": 0.9375, + "step": 2890 + }, + { + "epoch": 0.04995507326513685, + "grad_norm": 1.50572239988919, + "learning_rate": 1.9979152069367637e-05, + "loss": 0.7434, + "step": 2891 + }, + { + "epoch": 0.04997235277854575, + "grad_norm": 1.3769126715514588, + "learning_rate": 1.9979115934704094e-05, + "loss": 1.0754, + "step": 2892 + }, + { + "epoch": 0.04998963229195466, + "grad_norm": 1.3026902847359079, + "learning_rate": 1.9979079768785205e-05, + "loss": 0.6396, + "step": 2893 + }, + { + "epoch": 0.05000691180536356, + "grad_norm": 1.269429020773354, + "learning_rate": 1.9979043571611078e-05, + "loss": 0.9596, + "step": 2894 + }, + { + "epoch": 0.05002419131877246, + "grad_norm": 0.9513204483427745, + "learning_rate": 1.9979007343181828e-05, + "loss": 0.4328, + "step": 2895 + }, + { + "epoch": 0.05004147083218136, + "grad_norm": 1.4970125418456333, + "learning_rate": 1.9978971083497567e-05, + "loss": 0.8639, + "step": 2896 + }, + { + "epoch": 0.05005875034559027, + "grad_norm": 0.810631088555132, + "learning_rate": 1.9978934792558414e-05, + "loss": 0.5599, + "step": 2897 + }, + { + "epoch": 0.05007602985899917, + "grad_norm": 0.8542092200168584, + "learning_rate": 1.9978898470364474e-05, + "loss": 0.7338, + "step": 2898 + }, + { + "epoch": 0.05009330937240807, + "grad_norm": 1.196137714484473, + "learning_rate": 1.997886211691587e-05, + "loss": 0.7066, + "step": 2899 + }, + { + "epoch": 0.05011058888581697, + "grad_norm": 1.1427392218482686, + "learning_rate": 1.997882573221271e-05, + "loss": 0.8881, + "step": 2900 + }, + { + "epoch": 0.05012786839922588, + "grad_norm": 1.4417299445475416, + "learning_rate": 1.997878931625511e-05, + "loss": 1.0471, + "step": 2901 + }, + { + "epoch": 0.05014514791263478, + "grad_norm": 1.2150984500660567, + "learning_rate": 1.997875286904318e-05, + "loss": 0.7045, + "step": 2902 + }, + { + "epoch": 0.05016242742604368, + "grad_norm": 1.128414391667551, + "learning_rate": 1.9978716390577042e-05, + "loss": 0.7134, + "step": 2903 + }, + { + "epoch": 0.05017970693945258, + "grad_norm": 1.2494363560171782, + "learning_rate": 1.9978679880856804e-05, + "loss": 0.767, + "step": 2904 + }, + { + "epoch": 0.05019698645286149, + "grad_norm": 1.7186446298859817, + "learning_rate": 1.997864333988258e-05, + "loss": 1.0571, + "step": 2905 + }, + { + "epoch": 0.05021426596627039, + "grad_norm": 1.1709032607934355, + "learning_rate": 1.9978606767654493e-05, + "loss": 0.7935, + "step": 2906 + }, + { + "epoch": 0.05023154547967929, + "grad_norm": 1.0557866986394584, + "learning_rate": 1.9978570164172648e-05, + "loss": 0.5931, + "step": 2907 + }, + { + "epoch": 0.05024882499308819, + "grad_norm": 1.4415599104954895, + "learning_rate": 1.997853352943716e-05, + "loss": 0.7683, + "step": 2908 + }, + { + "epoch": 0.0502661045064971, + "grad_norm": 1.5729831898208229, + "learning_rate": 1.997849686344815e-05, + "loss": 0.8414, + "step": 2909 + }, + { + "epoch": 0.050283384019906, + "grad_norm": 1.8184646813024985, + "learning_rate": 1.9978460166205726e-05, + "loss": 0.6631, + "step": 2910 + }, + { + "epoch": 0.0503006635333149, + "grad_norm": 1.27968753760507, + "learning_rate": 1.997842343771001e-05, + "loss": 0.7057, + "step": 2911 + }, + { + "epoch": 0.0503179430467238, + "grad_norm": 1.4076817184528703, + "learning_rate": 1.9978386677961112e-05, + "loss": 0.9057, + "step": 2912 + }, + { + "epoch": 0.05033522256013271, + "grad_norm": 1.1639124287844933, + "learning_rate": 1.9978349886959152e-05, + "loss": 0.9447, + "step": 2913 + }, + { + "epoch": 0.05035250207354161, + "grad_norm": 1.0417748989138085, + "learning_rate": 1.9978313064704237e-05, + "loss": 0.8921, + "step": 2914 + }, + { + "epoch": 0.05036978158695051, + "grad_norm": 1.2686603636147589, + "learning_rate": 1.9978276211196487e-05, + "loss": 0.8491, + "step": 2915 + }, + { + "epoch": 0.05038706110035941, + "grad_norm": 1.3180184580407144, + "learning_rate": 1.9978239326436023e-05, + "loss": 0.8385, + "step": 2916 + }, + { + "epoch": 0.05040434061376831, + "grad_norm": 1.0795057038730664, + "learning_rate": 1.997820241042295e-05, + "loss": 0.6954, + "step": 2917 + }, + { + "epoch": 0.05042162012717722, + "grad_norm": 1.3065046096258588, + "learning_rate": 1.9978165463157394e-05, + "loss": 0.6027, + "step": 2918 + }, + { + "epoch": 0.05043889964058612, + "grad_norm": 1.285693572379867, + "learning_rate": 1.997812848463946e-05, + "loss": 0.8467, + "step": 2919 + }, + { + "epoch": 0.05045617915399502, + "grad_norm": 1.1019487254067304, + "learning_rate": 1.997809147486927e-05, + "loss": 0.6265, + "step": 2920 + }, + { + "epoch": 0.05047345866740392, + "grad_norm": 1.0903972054200686, + "learning_rate": 1.997805443384694e-05, + "loss": 0.6866, + "step": 2921 + }, + { + "epoch": 0.05049073818081283, + "grad_norm": 1.1194347038356784, + "learning_rate": 1.997801736157259e-05, + "loss": 0.7217, + "step": 2922 + }, + { + "epoch": 0.05050801769422173, + "grad_norm": 1.0395095566697248, + "learning_rate": 1.9977980258046324e-05, + "loss": 0.5171, + "step": 2923 + }, + { + "epoch": 0.05052529720763063, + "grad_norm": 1.3718904715762883, + "learning_rate": 1.997794312326827e-05, + "loss": 0.6667, + "step": 2924 + }, + { + "epoch": 0.05054257672103953, + "grad_norm": 1.6218369548404097, + "learning_rate": 1.9977905957238538e-05, + "loss": 0.8907, + "step": 2925 + }, + { + "epoch": 0.05055985623444844, + "grad_norm": 1.173689938423814, + "learning_rate": 1.9977868759957245e-05, + "loss": 0.6229, + "step": 2926 + }, + { + "epoch": 0.05057713574785734, + "grad_norm": 1.3028379842691338, + "learning_rate": 1.997783153142451e-05, + "loss": 0.7409, + "step": 2927 + }, + { + "epoch": 0.05059441526126624, + "grad_norm": 1.2249590290451882, + "learning_rate": 1.9977794271640448e-05, + "loss": 0.7455, + "step": 2928 + }, + { + "epoch": 0.05061169477467514, + "grad_norm": 0.941891160313295, + "learning_rate": 1.9977756980605178e-05, + "loss": 0.6254, + "step": 2929 + }, + { + "epoch": 0.05062897428808405, + "grad_norm": 0.7268050530287538, + "learning_rate": 1.9977719658318817e-05, + "loss": 0.7886, + "step": 2930 + }, + { + "epoch": 0.05064625380149295, + "grad_norm": 1.2572741286496685, + "learning_rate": 1.9977682304781473e-05, + "loss": 0.8912, + "step": 2931 + }, + { + "epoch": 0.05066353331490185, + "grad_norm": 0.9180245168578752, + "learning_rate": 1.9977644919993272e-05, + "loss": 0.577, + "step": 2932 + }, + { + "epoch": 0.05068081282831075, + "grad_norm": 1.2706474988397902, + "learning_rate": 1.997760750395433e-05, + "loss": 0.8921, + "step": 2933 + }, + { + "epoch": 0.05069809234171966, + "grad_norm": 0.8526140796419923, + "learning_rate": 1.9977570056664764e-05, + "loss": 0.682, + "step": 2934 + }, + { + "epoch": 0.05071537185512856, + "grad_norm": 1.119255342182443, + "learning_rate": 1.9977532578124687e-05, + "loss": 0.5365, + "step": 2935 + }, + { + "epoch": 0.05073265136853746, + "grad_norm": 0.901530503729602, + "learning_rate": 1.9977495068334223e-05, + "loss": 0.559, + "step": 2936 + }, + { + "epoch": 0.05074993088194636, + "grad_norm": 2.2046521119275457, + "learning_rate": 1.9977457527293485e-05, + "loss": 0.8701, + "step": 2937 + }, + { + "epoch": 0.05076721039535527, + "grad_norm": 1.2637268462964621, + "learning_rate": 1.9977419955002592e-05, + "loss": 0.8877, + "step": 2938 + }, + { + "epoch": 0.05078448990876417, + "grad_norm": 1.4070465427018233, + "learning_rate": 1.997738235146166e-05, + "loss": 0.6428, + "step": 2939 + }, + { + "epoch": 0.05080176942217307, + "grad_norm": 1.531291345061118, + "learning_rate": 1.9977344716670812e-05, + "loss": 0.9238, + "step": 2940 + }, + { + "epoch": 0.05081904893558197, + "grad_norm": 1.0630837688257913, + "learning_rate": 1.997730705063016e-05, + "loss": 0.6916, + "step": 2941 + }, + { + "epoch": 0.05083632844899088, + "grad_norm": 1.2224178145764446, + "learning_rate": 1.9977269353339822e-05, + "loss": 0.671, + "step": 2942 + }, + { + "epoch": 0.05085360796239978, + "grad_norm": 1.039510038892956, + "learning_rate": 1.997723162479992e-05, + "loss": 0.9459, + "step": 2943 + }, + { + "epoch": 0.05087088747580868, + "grad_norm": 1.258279865909157, + "learning_rate": 1.997719386501057e-05, + "loss": 0.6495, + "step": 2944 + }, + { + "epoch": 0.05088816698921758, + "grad_norm": 1.0043972729976476, + "learning_rate": 1.997715607397189e-05, + "loss": 0.8454, + "step": 2945 + }, + { + "epoch": 0.05090544650262649, + "grad_norm": 1.3963968973377079, + "learning_rate": 1.9977118251684004e-05, + "loss": 0.8175, + "step": 2946 + }, + { + "epoch": 0.05092272601603539, + "grad_norm": 1.1981292062600541, + "learning_rate": 1.9977080398147018e-05, + "loss": 0.5617, + "step": 2947 + }, + { + "epoch": 0.05094000552944429, + "grad_norm": 1.3640771895521466, + "learning_rate": 1.9977042513361062e-05, + "loss": 0.7508, + "step": 2948 + }, + { + "epoch": 0.05095728504285319, + "grad_norm": 1.097280090793999, + "learning_rate": 1.997700459732625e-05, + "loss": 0.5492, + "step": 2949 + }, + { + "epoch": 0.0509745645562621, + "grad_norm": 1.1826493415867776, + "learning_rate": 1.99769666500427e-05, + "loss": 0.6976, + "step": 2950 + }, + { + "epoch": 0.050991844069671, + "grad_norm": 1.3566529865320511, + "learning_rate": 1.9976928671510534e-05, + "loss": 0.8473, + "step": 2951 + }, + { + "epoch": 0.0510091235830799, + "grad_norm": 1.1462809855908087, + "learning_rate": 1.9976890661729868e-05, + "loss": 0.7873, + "step": 2952 + }, + { + "epoch": 0.0510264030964888, + "grad_norm": 1.304897667004151, + "learning_rate": 1.9976852620700825e-05, + "loss": 0.7734, + "step": 2953 + }, + { + "epoch": 0.05104368260989771, + "grad_norm": 0.9726181293236037, + "learning_rate": 1.9976814548423516e-05, + "loss": 0.7139, + "step": 2954 + }, + { + "epoch": 0.05106096212330661, + "grad_norm": 1.1162428970499927, + "learning_rate": 1.9976776444898068e-05, + "loss": 0.5477, + "step": 2955 + }, + { + "epoch": 0.05107824163671551, + "grad_norm": 1.020200463622289, + "learning_rate": 1.9976738310124602e-05, + "loss": 0.6767, + "step": 2956 + }, + { + "epoch": 0.05109552115012441, + "grad_norm": 1.0754870808080228, + "learning_rate": 1.997670014410323e-05, + "loss": 0.7459, + "step": 2957 + }, + { + "epoch": 0.05111280066353331, + "grad_norm": 1.3388577465779614, + "learning_rate": 1.9976661946834074e-05, + "loss": 0.92, + "step": 2958 + }, + { + "epoch": 0.05113008017694222, + "grad_norm": 1.240907948108646, + "learning_rate": 1.9976623718317255e-05, + "loss": 0.8187, + "step": 2959 + }, + { + "epoch": 0.05114735969035112, + "grad_norm": 0.9324555943567723, + "learning_rate": 1.9976585458552894e-05, + "loss": 0.7193, + "step": 2960 + }, + { + "epoch": 0.05116463920376002, + "grad_norm": 1.017190237115477, + "learning_rate": 1.9976547167541107e-05, + "loss": 0.6475, + "step": 2961 + }, + { + "epoch": 0.05118191871716892, + "grad_norm": 1.3013834067839885, + "learning_rate": 1.9976508845282018e-05, + "loss": 0.932, + "step": 2962 + }, + { + "epoch": 0.05119919823057783, + "grad_norm": 1.103557127977569, + "learning_rate": 1.9976470491775744e-05, + "loss": 0.5698, + "step": 2963 + }, + { + "epoch": 0.05121647774398673, + "grad_norm": 1.4790520313156557, + "learning_rate": 1.9976432107022408e-05, + "loss": 1.0244, + "step": 2964 + }, + { + "epoch": 0.05123375725739563, + "grad_norm": 1.1354910181325124, + "learning_rate": 1.9976393691022124e-05, + "loss": 0.7613, + "step": 2965 + }, + { + "epoch": 0.05125103677080453, + "grad_norm": 0.9808688704054142, + "learning_rate": 1.9976355243775023e-05, + "loss": 0.4838, + "step": 2966 + }, + { + "epoch": 0.05126831628421344, + "grad_norm": 0.7976046369322712, + "learning_rate": 1.9976316765281214e-05, + "loss": 0.7554, + "step": 2967 + }, + { + "epoch": 0.05128559579762234, + "grad_norm": 1.3915053335607375, + "learning_rate": 1.9976278255540825e-05, + "loss": 0.8465, + "step": 2968 + }, + { + "epoch": 0.05130287531103124, + "grad_norm": 0.9452617827199729, + "learning_rate": 1.9976239714553976e-05, + "loss": 0.476, + "step": 2969 + }, + { + "epoch": 0.05132015482444014, + "grad_norm": 1.241912334707108, + "learning_rate": 1.9976201142320783e-05, + "loss": 0.8643, + "step": 2970 + }, + { + "epoch": 0.05133743433784905, + "grad_norm": 1.1774316575834907, + "learning_rate": 1.997616253884137e-05, + "loss": 0.8015, + "step": 2971 + }, + { + "epoch": 0.05135471385125795, + "grad_norm": 1.3054848829541272, + "learning_rate": 1.997612390411586e-05, + "loss": 0.8344, + "step": 2972 + }, + { + "epoch": 0.05137199336466685, + "grad_norm": 1.2804858106318298, + "learning_rate": 1.997608523814437e-05, + "loss": 0.8894, + "step": 2973 + }, + { + "epoch": 0.05138927287807575, + "grad_norm": 1.1802692465481384, + "learning_rate": 1.9976046540927023e-05, + "loss": 0.8217, + "step": 2974 + }, + { + "epoch": 0.05140655239148466, + "grad_norm": 1.1860799866364895, + "learning_rate": 1.997600781246394e-05, + "loss": 0.7055, + "step": 2975 + }, + { + "epoch": 0.05142383190489356, + "grad_norm": 0.9896830919116137, + "learning_rate": 1.9975969052755243e-05, + "loss": 0.6752, + "step": 2976 + }, + { + "epoch": 0.05144111141830246, + "grad_norm": 1.0618740420019568, + "learning_rate": 1.997593026180105e-05, + "loss": 0.7145, + "step": 2977 + }, + { + "epoch": 0.05145839093171136, + "grad_norm": 0.9169302063600275, + "learning_rate": 1.997589143960149e-05, + "loss": 0.6116, + "step": 2978 + }, + { + "epoch": 0.05147567044512027, + "grad_norm": 1.0849807152145736, + "learning_rate": 1.9975852586156678e-05, + "loss": 0.7198, + "step": 2979 + }, + { + "epoch": 0.05149294995852917, + "grad_norm": 1.2626644537998863, + "learning_rate": 1.997581370146674e-05, + "loss": 0.8046, + "step": 2980 + }, + { + "epoch": 0.05151022947193807, + "grad_norm": 1.1013941271581231, + "learning_rate": 1.997577478553179e-05, + "loss": 0.8814, + "step": 2981 + }, + { + "epoch": 0.05152750898534697, + "grad_norm": 0.8350240556064636, + "learning_rate": 1.997573583835196e-05, + "loss": 0.7778, + "step": 2982 + }, + { + "epoch": 0.05154478849875588, + "grad_norm": 1.7274460777520102, + "learning_rate": 1.9975696859927362e-05, + "loss": 0.7157, + "step": 2983 + }, + { + "epoch": 0.05156206801216478, + "grad_norm": 1.0288313953427362, + "learning_rate": 1.997565785025813e-05, + "loss": 0.6647, + "step": 2984 + }, + { + "epoch": 0.05157934752557368, + "grad_norm": 1.3333615844126985, + "learning_rate": 1.9975618809344374e-05, + "loss": 0.5695, + "step": 2985 + }, + { + "epoch": 0.05159662703898258, + "grad_norm": 1.3794047262652174, + "learning_rate": 1.9975579737186223e-05, + "loss": 0.8075, + "step": 2986 + }, + { + "epoch": 0.05161390655239149, + "grad_norm": 0.943330961589141, + "learning_rate": 1.99755406337838e-05, + "loss": 0.6727, + "step": 2987 + }, + { + "epoch": 0.05163118606580039, + "grad_norm": 1.499112786373198, + "learning_rate": 1.9975501499137225e-05, + "loss": 1.0043, + "step": 2988 + }, + { + "epoch": 0.05164846557920929, + "grad_norm": 1.0318040189580941, + "learning_rate": 1.997546233324662e-05, + "loss": 0.787, + "step": 2989 + }, + { + "epoch": 0.05166574509261819, + "grad_norm": 1.2512842766630592, + "learning_rate": 1.9975423136112108e-05, + "loss": 1.0197, + "step": 2990 + }, + { + "epoch": 0.0516830246060271, + "grad_norm": 0.9666118424756808, + "learning_rate": 1.9975383907733814e-05, + "loss": 0.8163, + "step": 2991 + }, + { + "epoch": 0.051700304119436, + "grad_norm": 0.9644959353159038, + "learning_rate": 1.997534464811186e-05, + "loss": 0.4038, + "step": 2992 + }, + { + "epoch": 0.0517175836328449, + "grad_norm": 1.1642520025550533, + "learning_rate": 1.997530535724637e-05, + "loss": 0.686, + "step": 2993 + }, + { + "epoch": 0.0517348631462538, + "grad_norm": 1.7230274973304227, + "learning_rate": 1.9975266035137463e-05, + "loss": 0.8637, + "step": 2994 + }, + { + "epoch": 0.0517521426596627, + "grad_norm": 1.0021804538568033, + "learning_rate": 1.9975226681785265e-05, + "loss": 0.8583, + "step": 2995 + }, + { + "epoch": 0.05176942217307161, + "grad_norm": 1.1910281302733325, + "learning_rate": 1.9975187297189898e-05, + "loss": 0.6019, + "step": 2996 + }, + { + "epoch": 0.05178670168648051, + "grad_norm": 1.1443689376971413, + "learning_rate": 1.997514788135149e-05, + "loss": 0.5894, + "step": 2997 + }, + { + "epoch": 0.05180398119988941, + "grad_norm": 1.4444191908421415, + "learning_rate": 1.9975108434270156e-05, + "loss": 1.024, + "step": 2998 + }, + { + "epoch": 0.05182126071329831, + "grad_norm": 1.263471534281287, + "learning_rate": 1.9975068955946026e-05, + "loss": 0.7482, + "step": 2999 + }, + { + "epoch": 0.05183854022670722, + "grad_norm": 0.8364186725440711, + "learning_rate": 1.9975029446379225e-05, + "loss": 0.7907, + "step": 3000 + }, + { + "epoch": 0.05185581974011612, + "grad_norm": 1.4176325808774244, + "learning_rate": 1.997498990556987e-05, + "loss": 0.7128, + "step": 3001 + }, + { + "epoch": 0.05187309925352502, + "grad_norm": 1.3344241656388027, + "learning_rate": 1.997495033351809e-05, + "loss": 0.7868, + "step": 3002 + }, + { + "epoch": 0.05189037876693392, + "grad_norm": 1.6599179310939156, + "learning_rate": 1.997491073022401e-05, + "loss": 0.9117, + "step": 3003 + }, + { + "epoch": 0.05190765828034283, + "grad_norm": 0.8629384338285234, + "learning_rate": 1.9974871095687748e-05, + "loss": 0.6945, + "step": 3004 + }, + { + "epoch": 0.05192493779375173, + "grad_norm": 1.3154381126771062, + "learning_rate": 1.9974831429909433e-05, + "loss": 0.9207, + "step": 3005 + }, + { + "epoch": 0.05194221730716063, + "grad_norm": 0.9523441864916155, + "learning_rate": 1.9974791732889185e-05, + "loss": 0.7349, + "step": 3006 + }, + { + "epoch": 0.05195949682056953, + "grad_norm": 1.4635877206623265, + "learning_rate": 1.9974752004627134e-05, + "loss": 0.8242, + "step": 3007 + }, + { + "epoch": 0.05197677633397844, + "grad_norm": 1.4618650261588104, + "learning_rate": 1.99747122451234e-05, + "loss": 0.7278, + "step": 3008 + }, + { + "epoch": 0.05199405584738734, + "grad_norm": 1.3752721600938556, + "learning_rate": 1.997467245437811e-05, + "loss": 0.9605, + "step": 3009 + }, + { + "epoch": 0.05201133536079624, + "grad_norm": 1.2770517072916978, + "learning_rate": 1.9974632632391388e-05, + "loss": 0.662, + "step": 3010 + }, + { + "epoch": 0.05202861487420514, + "grad_norm": 1.139693302974065, + "learning_rate": 1.997459277916336e-05, + "loss": 0.6079, + "step": 3011 + }, + { + "epoch": 0.05204589438761405, + "grad_norm": 1.4357157675248489, + "learning_rate": 1.997455289469415e-05, + "loss": 0.9886, + "step": 3012 + }, + { + "epoch": 0.05206317390102295, + "grad_norm": 1.5566161774261904, + "learning_rate": 1.9974512978983878e-05, + "loss": 1.0797, + "step": 3013 + }, + { + "epoch": 0.05208045341443185, + "grad_norm": 1.0970635278431924, + "learning_rate": 1.9974473032032678e-05, + "loss": 0.7003, + "step": 3014 + }, + { + "epoch": 0.05209773292784075, + "grad_norm": 1.3233483959126728, + "learning_rate": 1.9974433053840667e-05, + "loss": 0.8292, + "step": 3015 + }, + { + "epoch": 0.05211501244124966, + "grad_norm": 1.7495788159390306, + "learning_rate": 1.9974393044407976e-05, + "loss": 0.6525, + "step": 3016 + }, + { + "epoch": 0.05213229195465856, + "grad_norm": 1.3319561428258322, + "learning_rate": 1.997435300373473e-05, + "loss": 0.7823, + "step": 3017 + }, + { + "epoch": 0.05214957146806746, + "grad_norm": 1.2049024910705377, + "learning_rate": 1.9974312931821046e-05, + "loss": 0.5978, + "step": 3018 + }, + { + "epoch": 0.05216685098147636, + "grad_norm": 1.6951179753594792, + "learning_rate": 1.997427282866706e-05, + "loss": 1.0568, + "step": 3019 + }, + { + "epoch": 0.05218413049488527, + "grad_norm": 1.6687721364647836, + "learning_rate": 1.997423269427289e-05, + "loss": 0.996, + "step": 3020 + }, + { + "epoch": 0.05220141000829417, + "grad_norm": 1.4212334254517935, + "learning_rate": 1.997419252863867e-05, + "loss": 0.5256, + "step": 3021 + }, + { + "epoch": 0.05221868952170307, + "grad_norm": 1.2478977812364411, + "learning_rate": 1.9974152331764518e-05, + "loss": 0.8315, + "step": 3022 + }, + { + "epoch": 0.05223596903511197, + "grad_norm": 0.7904941802345423, + "learning_rate": 1.9974112103650563e-05, + "loss": 0.7057, + "step": 3023 + }, + { + "epoch": 0.05225324854852088, + "grad_norm": 1.5730574609436938, + "learning_rate": 1.9974071844296932e-05, + "loss": 0.5962, + "step": 3024 + }, + { + "epoch": 0.05227052806192978, + "grad_norm": 1.1345757432917005, + "learning_rate": 1.997403155370375e-05, + "loss": 0.9534, + "step": 3025 + }, + { + "epoch": 0.05228780757533868, + "grad_norm": 1.201253913254998, + "learning_rate": 1.997399123187114e-05, + "loss": 0.7158, + "step": 3026 + }, + { + "epoch": 0.05230508708874758, + "grad_norm": 0.8894978220464933, + "learning_rate": 1.9973950878799236e-05, + "loss": 0.5671, + "step": 3027 + }, + { + "epoch": 0.052322366602156486, + "grad_norm": 1.1346132956244446, + "learning_rate": 1.9973910494488157e-05, + "loss": 0.8902, + "step": 3028 + }, + { + "epoch": 0.05233964611556539, + "grad_norm": 0.8791753962650911, + "learning_rate": 1.9973870078938033e-05, + "loss": 0.5141, + "step": 3029 + }, + { + "epoch": 0.05235692562897429, + "grad_norm": 1.1034515266921832, + "learning_rate": 1.997382963214899e-05, + "loss": 0.772, + "step": 3030 + }, + { + "epoch": 0.05237420514238319, + "grad_norm": 1.2896253379544302, + "learning_rate": 1.9973789154121157e-05, + "loss": 0.8132, + "step": 3031 + }, + { + "epoch": 0.052391484655792096, + "grad_norm": 1.1898330080821233, + "learning_rate": 1.997374864485466e-05, + "loss": 0.7183, + "step": 3032 + }, + { + "epoch": 0.052408764169201, + "grad_norm": 1.4006669661567464, + "learning_rate": 1.9973708104349618e-05, + "loss": 0.8751, + "step": 3033 + }, + { + "epoch": 0.0524260436826099, + "grad_norm": 1.4928200926512754, + "learning_rate": 1.9973667532606167e-05, + "loss": 0.7207, + "step": 3034 + }, + { + "epoch": 0.0524433231960188, + "grad_norm": 0.875149096107096, + "learning_rate": 1.997362692962443e-05, + "loss": 0.5036, + "step": 3035 + }, + { + "epoch": 0.0524606027094277, + "grad_norm": 1.169347786642063, + "learning_rate": 1.9973586295404536e-05, + "loss": 0.9825, + "step": 3036 + }, + { + "epoch": 0.05247788222283661, + "grad_norm": 1.2534127628064002, + "learning_rate": 1.9973545629946613e-05, + "loss": 0.5542, + "step": 3037 + }, + { + "epoch": 0.05249516173624551, + "grad_norm": 1.1425753613585172, + "learning_rate": 1.9973504933250786e-05, + "loss": 0.6509, + "step": 3038 + }, + { + "epoch": 0.05251244124965441, + "grad_norm": 1.0254527851436146, + "learning_rate": 1.9973464205317184e-05, + "loss": 0.6468, + "step": 3039 + }, + { + "epoch": 0.05252972076306331, + "grad_norm": 1.523244191902442, + "learning_rate": 1.9973423446145935e-05, + "loss": 0.9192, + "step": 3040 + }, + { + "epoch": 0.05254700027647222, + "grad_norm": 1.3130067803745011, + "learning_rate": 1.9973382655737165e-05, + "loss": 0.8257, + "step": 3041 + }, + { + "epoch": 0.05256427978988112, + "grad_norm": 1.3780441165020298, + "learning_rate": 1.9973341834091e-05, + "loss": 0.9962, + "step": 3042 + }, + { + "epoch": 0.05258155930329002, + "grad_norm": 1.207504921059186, + "learning_rate": 1.9973300981207578e-05, + "loss": 0.669, + "step": 3043 + }, + { + "epoch": 0.05259883881669892, + "grad_norm": 1.2158141067376993, + "learning_rate": 1.9973260097087013e-05, + "loss": 0.7423, + "step": 3044 + }, + { + "epoch": 0.052616118330107826, + "grad_norm": 2.1975695121291734, + "learning_rate": 1.997321918172944e-05, + "loss": 0.7129, + "step": 3045 + }, + { + "epoch": 0.05263339784351673, + "grad_norm": 1.1874894933840343, + "learning_rate": 1.9973178235134988e-05, + "loss": 0.8309, + "step": 3046 + }, + { + "epoch": 0.05265067735692563, + "grad_norm": 0.7316333591289671, + "learning_rate": 1.9973137257303783e-05, + "loss": 0.7048, + "step": 3047 + }, + { + "epoch": 0.05266795687033453, + "grad_norm": 1.333383857530226, + "learning_rate": 1.9973096248235954e-05, + "loss": 0.7249, + "step": 3048 + }, + { + "epoch": 0.052685236383743436, + "grad_norm": 1.6456431942605716, + "learning_rate": 1.997305520793163e-05, + "loss": 0.835, + "step": 3049 + }, + { + "epoch": 0.05270251589715234, + "grad_norm": 1.2091361458230787, + "learning_rate": 1.997301413639094e-05, + "loss": 0.7733, + "step": 3050 + }, + { + "epoch": 0.05271979541056124, + "grad_norm": 2.3780753510719705, + "learning_rate": 1.997297303361401e-05, + "loss": 0.821, + "step": 3051 + }, + { + "epoch": 0.05273707492397014, + "grad_norm": 1.2361601785932226, + "learning_rate": 1.9972931899600972e-05, + "loss": 0.9032, + "step": 3052 + }, + { + "epoch": 0.052754354437379046, + "grad_norm": 1.260511556384328, + "learning_rate": 1.9972890734351953e-05, + "loss": 0.8971, + "step": 3053 + }, + { + "epoch": 0.05277163395078795, + "grad_norm": 1.2546677376540003, + "learning_rate": 1.9972849537867084e-05, + "loss": 0.9195, + "step": 3054 + }, + { + "epoch": 0.05278891346419685, + "grad_norm": 1.3425697813874595, + "learning_rate": 1.997280831014649e-05, + "loss": 0.5072, + "step": 3055 + }, + { + "epoch": 0.05280619297760575, + "grad_norm": 1.1710990038756828, + "learning_rate": 1.9972767051190303e-05, + "loss": 0.6678, + "step": 3056 + }, + { + "epoch": 0.052823472491014656, + "grad_norm": 1.5631305771663302, + "learning_rate": 1.997272576099865e-05, + "loss": 0.8845, + "step": 3057 + }, + { + "epoch": 0.052840752004423557, + "grad_norm": 0.997561516116087, + "learning_rate": 1.9972684439571667e-05, + "loss": 0.7777, + "step": 3058 + }, + { + "epoch": 0.05285803151783246, + "grad_norm": 1.0221665592628661, + "learning_rate": 1.9972643086909473e-05, + "loss": 0.7643, + "step": 3059 + }, + { + "epoch": 0.05287531103124136, + "grad_norm": 1.3003727184376812, + "learning_rate": 1.9972601703012208e-05, + "loss": 1.0399, + "step": 3060 + }, + { + "epoch": 0.052892590544650266, + "grad_norm": 1.1649642465025762, + "learning_rate": 1.9972560287879993e-05, + "loss": 0.765, + "step": 3061 + }, + { + "epoch": 0.052909870058059166, + "grad_norm": 1.2291886621374009, + "learning_rate": 1.997251884151296e-05, + "loss": 0.9817, + "step": 3062 + }, + { + "epoch": 0.05292714957146807, + "grad_norm": 1.1651662314125235, + "learning_rate": 1.9972477363911242e-05, + "loss": 0.7505, + "step": 3063 + }, + { + "epoch": 0.05294442908487697, + "grad_norm": 1.1382153746549066, + "learning_rate": 1.9972435855074967e-05, + "loss": 0.8302, + "step": 3064 + }, + { + "epoch": 0.052961708598285875, + "grad_norm": 1.2736097347086694, + "learning_rate": 1.9972394315004268e-05, + "loss": 0.8479, + "step": 3065 + }, + { + "epoch": 0.052978988111694776, + "grad_norm": 1.2833216271822272, + "learning_rate": 1.997235274369927e-05, + "loss": 0.8383, + "step": 3066 + }, + { + "epoch": 0.05299626762510368, + "grad_norm": 1.0881257206150239, + "learning_rate": 1.9972311141160103e-05, + "loss": 0.7331, + "step": 3067 + }, + { + "epoch": 0.05301354713851258, + "grad_norm": 1.6475084320470756, + "learning_rate": 1.99722695073869e-05, + "loss": 0.9351, + "step": 3068 + }, + { + "epoch": 0.053030826651921485, + "grad_norm": 1.1316346635193046, + "learning_rate": 1.9972227842379794e-05, + "loss": 0.757, + "step": 3069 + }, + { + "epoch": 0.053048106165330386, + "grad_norm": 1.3229124272712443, + "learning_rate": 1.997218614613891e-05, + "loss": 0.6584, + "step": 3070 + }, + { + "epoch": 0.05306538567873929, + "grad_norm": 0.8868066962540534, + "learning_rate": 1.9972144418664383e-05, + "loss": 0.8371, + "step": 3071 + }, + { + "epoch": 0.05308266519214819, + "grad_norm": 1.262076925011067, + "learning_rate": 1.997210265995634e-05, + "loss": 0.6568, + "step": 3072 + }, + { + "epoch": 0.05309994470555709, + "grad_norm": 1.3577746686204522, + "learning_rate": 1.9972060870014915e-05, + "loss": 0.6506, + "step": 3073 + }, + { + "epoch": 0.053117224218965996, + "grad_norm": 1.31153172379406, + "learning_rate": 1.9972019048840238e-05, + "loss": 0.5707, + "step": 3074 + }, + { + "epoch": 0.053134503732374896, + "grad_norm": 3.042947380752267, + "learning_rate": 1.9971977196432438e-05, + "loss": 0.5248, + "step": 3075 + }, + { + "epoch": 0.0531517832457838, + "grad_norm": 1.4203669398536263, + "learning_rate": 1.9971935312791646e-05, + "loss": 0.8222, + "step": 3076 + }, + { + "epoch": 0.0531690627591927, + "grad_norm": 1.8731107987623083, + "learning_rate": 1.9971893397917998e-05, + "loss": 0.8837, + "step": 3077 + }, + { + "epoch": 0.053186342272601606, + "grad_norm": 1.4311328766477813, + "learning_rate": 1.997185145181162e-05, + "loss": 1.0328, + "step": 3078 + }, + { + "epoch": 0.053203621786010506, + "grad_norm": 1.4134791784122043, + "learning_rate": 1.9971809474472646e-05, + "loss": 0.8873, + "step": 3079 + }, + { + "epoch": 0.05322090129941941, + "grad_norm": 1.6248481181529657, + "learning_rate": 1.997176746590121e-05, + "loss": 0.8702, + "step": 3080 + }, + { + "epoch": 0.05323818081282831, + "grad_norm": 1.2800991742507641, + "learning_rate": 1.9971725426097436e-05, + "loss": 0.9041, + "step": 3081 + }, + { + "epoch": 0.053255460326237215, + "grad_norm": 1.3409239722696638, + "learning_rate": 1.997168335506146e-05, + "loss": 0.8527, + "step": 3082 + }, + { + "epoch": 0.053272739839646116, + "grad_norm": 1.1236844318457473, + "learning_rate": 1.9971641252793418e-05, + "loss": 0.6536, + "step": 3083 + }, + { + "epoch": 0.05329001935305502, + "grad_norm": 0.7436822874807808, + "learning_rate": 1.9971599119293434e-05, + "loss": 0.5659, + "step": 3084 + }, + { + "epoch": 0.05330729886646392, + "grad_norm": 2.3841112819957315, + "learning_rate": 1.9971556954561645e-05, + "loss": 0.8326, + "step": 3085 + }, + { + "epoch": 0.053324578379872825, + "grad_norm": 1.2096809483733229, + "learning_rate": 1.9971514758598183e-05, + "loss": 0.6663, + "step": 3086 + }, + { + "epoch": 0.053341857893281726, + "grad_norm": 0.7588577764459791, + "learning_rate": 1.997147253140318e-05, + "loss": 0.5801, + "step": 3087 + }, + { + "epoch": 0.05335913740669063, + "grad_norm": 1.5333439660185342, + "learning_rate": 1.9971430272976762e-05, + "loss": 1.1047, + "step": 3088 + }, + { + "epoch": 0.05337641692009953, + "grad_norm": 1.2993679287299558, + "learning_rate": 1.997138798331907e-05, + "loss": 0.7495, + "step": 3089 + }, + { + "epoch": 0.053393696433508435, + "grad_norm": 1.1057102220779274, + "learning_rate": 1.9971345662430234e-05, + "loss": 0.6327, + "step": 3090 + }, + { + "epoch": 0.053410975946917336, + "grad_norm": 1.1946766418544652, + "learning_rate": 1.9971303310310383e-05, + "loss": 0.6316, + "step": 3091 + }, + { + "epoch": 0.053428255460326236, + "grad_norm": 1.367302085146776, + "learning_rate": 1.9971260926959654e-05, + "loss": 0.7961, + "step": 3092 + }, + { + "epoch": 0.05344553497373514, + "grad_norm": 1.6643504039212416, + "learning_rate": 1.997121851237818e-05, + "loss": 0.8069, + "step": 3093 + }, + { + "epoch": 0.053462814487144045, + "grad_norm": 1.3780276533932336, + "learning_rate": 1.997117606656609e-05, + "loss": 0.9874, + "step": 3094 + }, + { + "epoch": 0.053480094000552945, + "grad_norm": 1.2755975217353142, + "learning_rate": 1.9971133589523516e-05, + "loss": 0.5493, + "step": 3095 + }, + { + "epoch": 0.053497373513961846, + "grad_norm": 1.03422874772013, + "learning_rate": 1.9971091081250597e-05, + "loss": 0.6835, + "step": 3096 + }, + { + "epoch": 0.05351465302737075, + "grad_norm": 1.0138005719970122, + "learning_rate": 1.997104854174746e-05, + "loss": 0.7227, + "step": 3097 + }, + { + "epoch": 0.053531932540779655, + "grad_norm": 1.1832313411222917, + "learning_rate": 1.9971005971014247e-05, + "loss": 1.0253, + "step": 3098 + }, + { + "epoch": 0.053549212054188555, + "grad_norm": 1.2671272685450554, + "learning_rate": 1.997096336905108e-05, + "loss": 0.5826, + "step": 3099 + }, + { + "epoch": 0.053566491567597456, + "grad_norm": 3.9240765057971183, + "learning_rate": 1.9970920735858098e-05, + "loss": 0.7757, + "step": 3100 + }, + { + "epoch": 0.05358377108100636, + "grad_norm": 1.5268450246782115, + "learning_rate": 1.9970878071435437e-05, + "loss": 0.8671, + "step": 3101 + }, + { + "epoch": 0.053601050594415264, + "grad_norm": 1.69227693330774, + "learning_rate": 1.9970835375783227e-05, + "loss": 0.7369, + "step": 3102 + }, + { + "epoch": 0.053618330107824165, + "grad_norm": 1.1833021634500902, + "learning_rate": 1.9970792648901604e-05, + "loss": 0.732, + "step": 3103 + }, + { + "epoch": 0.053635609621233066, + "grad_norm": 2.868228501980167, + "learning_rate": 1.99707498907907e-05, + "loss": 0.7704, + "step": 3104 + }, + { + "epoch": 0.053652889134641966, + "grad_norm": 1.4639154120070548, + "learning_rate": 1.997070710145065e-05, + "loss": 0.5848, + "step": 3105 + }, + { + "epoch": 0.053670168648050874, + "grad_norm": 1.3516792843503136, + "learning_rate": 1.9970664280881587e-05, + "loss": 0.7065, + "step": 3106 + }, + { + "epoch": 0.053687448161459775, + "grad_norm": 2.0699219305653314, + "learning_rate": 1.9970621429083644e-05, + "loss": 0.9519, + "step": 3107 + }, + { + "epoch": 0.053704727674868676, + "grad_norm": 1.1066791423097926, + "learning_rate": 1.997057854605696e-05, + "loss": 0.7021, + "step": 3108 + }, + { + "epoch": 0.053722007188277576, + "grad_norm": 1.1570633087231195, + "learning_rate": 1.9970535631801666e-05, + "loss": 0.7031, + "step": 3109 + }, + { + "epoch": 0.053739286701686484, + "grad_norm": 0.8995868831795966, + "learning_rate": 1.9970492686317894e-05, + "loss": 0.7142, + "step": 3110 + }, + { + "epoch": 0.053756566215095385, + "grad_norm": 0.8289468441836289, + "learning_rate": 1.9970449709605784e-05, + "loss": 0.7731, + "step": 3111 + }, + { + "epoch": 0.053773845728504285, + "grad_norm": 1.22084651590637, + "learning_rate": 1.9970406701665465e-05, + "loss": 0.6987, + "step": 3112 + }, + { + "epoch": 0.053791125241913186, + "grad_norm": 0.8554550373256045, + "learning_rate": 1.9970363662497075e-05, + "loss": 0.6986, + "step": 3113 + }, + { + "epoch": 0.05380840475532209, + "grad_norm": 1.0599013297819748, + "learning_rate": 1.9970320592100752e-05, + "loss": 0.7167, + "step": 3114 + }, + { + "epoch": 0.053825684268730994, + "grad_norm": 1.248906378542703, + "learning_rate": 1.9970277490476625e-05, + "loss": 0.929, + "step": 3115 + }, + { + "epoch": 0.053842963782139895, + "grad_norm": 2.076505569999921, + "learning_rate": 1.9970234357624832e-05, + "loss": 0.8699, + "step": 3116 + }, + { + "epoch": 0.053860243295548796, + "grad_norm": 2.023788180530496, + "learning_rate": 1.9970191193545503e-05, + "loss": 0.6484, + "step": 3117 + }, + { + "epoch": 0.0538775228089577, + "grad_norm": 1.4028756910226683, + "learning_rate": 1.997014799823878e-05, + "loss": 0.8375, + "step": 3118 + }, + { + "epoch": 0.053894802322366604, + "grad_norm": 1.1286229982530083, + "learning_rate": 1.9970104771704798e-05, + "loss": 0.8508, + "step": 3119 + }, + { + "epoch": 0.053912081835775505, + "grad_norm": 1.4076823304553807, + "learning_rate": 1.997006151394369e-05, + "loss": 0.7353, + "step": 3120 + }, + { + "epoch": 0.053929361349184406, + "grad_norm": 0.9200935535842724, + "learning_rate": 1.997001822495559e-05, + "loss": 0.8359, + "step": 3121 + }, + { + "epoch": 0.053946640862593306, + "grad_norm": 1.1292194261398292, + "learning_rate": 1.9969974904740634e-05, + "loss": 0.758, + "step": 3122 + }, + { + "epoch": 0.053963920376002214, + "grad_norm": 0.9981594329319541, + "learning_rate": 1.996993155329896e-05, + "loss": 0.6763, + "step": 3123 + }, + { + "epoch": 0.053981199889411115, + "grad_norm": 1.3954621374070544, + "learning_rate": 1.99698881706307e-05, + "loss": 0.8263, + "step": 3124 + }, + { + "epoch": 0.053998479402820015, + "grad_norm": 1.0639785529417414, + "learning_rate": 1.9969844756736e-05, + "loss": 0.5268, + "step": 3125 + }, + { + "epoch": 0.054015758916228916, + "grad_norm": 1.307018535870165, + "learning_rate": 1.9969801311614984e-05, + "loss": 0.5243, + "step": 3126 + }, + { + "epoch": 0.054033038429637824, + "grad_norm": 1.0049841612280916, + "learning_rate": 1.996975783526779e-05, + "loss": 0.7857, + "step": 3127 + }, + { + "epoch": 0.054050317943046725, + "grad_norm": 1.4844152645662527, + "learning_rate": 1.996971432769456e-05, + "loss": 0.7539, + "step": 3128 + }, + { + "epoch": 0.054067597456455625, + "grad_norm": 1.1482607882423466, + "learning_rate": 1.996967078889543e-05, + "loss": 0.767, + "step": 3129 + }, + { + "epoch": 0.054084876969864526, + "grad_norm": 1.0692553242516112, + "learning_rate": 1.9969627218870525e-05, + "loss": 0.7629, + "step": 3130 + }, + { + "epoch": 0.054102156483273434, + "grad_norm": 0.6717799268200868, + "learning_rate": 1.9969583617619995e-05, + "loss": 0.6118, + "step": 3131 + }, + { + "epoch": 0.054119435996682334, + "grad_norm": 1.0963027054892576, + "learning_rate": 1.9969539985143972e-05, + "loss": 0.8673, + "step": 3132 + }, + { + "epoch": 0.054136715510091235, + "grad_norm": 1.1315894061887157, + "learning_rate": 1.996949632144259e-05, + "loss": 0.5713, + "step": 3133 + }, + { + "epoch": 0.054153995023500136, + "grad_norm": 1.1806260969991325, + "learning_rate": 1.996945262651599e-05, + "loss": 0.9167, + "step": 3134 + }, + { + "epoch": 0.05417127453690904, + "grad_norm": 1.0540121744922017, + "learning_rate": 1.9969408900364307e-05, + "loss": 0.4391, + "step": 3135 + }, + { + "epoch": 0.054188554050317944, + "grad_norm": 0.9867178002233601, + "learning_rate": 1.9969365142987676e-05, + "loss": 0.6946, + "step": 3136 + }, + { + "epoch": 0.054205833563726845, + "grad_norm": 1.3761174434671377, + "learning_rate": 1.9969321354386238e-05, + "loss": 1.1525, + "step": 3137 + }, + { + "epoch": 0.054223113077135746, + "grad_norm": 1.5551989530643884, + "learning_rate": 1.9969277534560124e-05, + "loss": 0.6991, + "step": 3138 + }, + { + "epoch": 0.05424039259054465, + "grad_norm": 1.1197971895677379, + "learning_rate": 1.9969233683509478e-05, + "loss": 0.8353, + "step": 3139 + }, + { + "epoch": 0.054257672103953554, + "grad_norm": 0.8783381872129891, + "learning_rate": 1.9969189801234433e-05, + "loss": 0.5448, + "step": 3140 + }, + { + "epoch": 0.054274951617362455, + "grad_norm": 0.8242677143444458, + "learning_rate": 1.996914588773513e-05, + "loss": 0.7538, + "step": 3141 + }, + { + "epoch": 0.054292231130771355, + "grad_norm": 1.051425308237744, + "learning_rate": 1.9969101943011705e-05, + "loss": 0.6676, + "step": 3142 + }, + { + "epoch": 0.05430951064418026, + "grad_norm": 1.071665425182938, + "learning_rate": 1.9969057967064293e-05, + "loss": 0.8148, + "step": 3143 + }, + { + "epoch": 0.054326790157589164, + "grad_norm": 1.0506642777506727, + "learning_rate": 1.9969013959893034e-05, + "loss": 0.6589, + "step": 3144 + }, + { + "epoch": 0.054344069670998064, + "grad_norm": 0.7443001944679517, + "learning_rate": 1.9968969921498063e-05, + "loss": 0.7117, + "step": 3145 + }, + { + "epoch": 0.054361349184406965, + "grad_norm": 1.4602845095662846, + "learning_rate": 1.9968925851879524e-05, + "loss": 0.7922, + "step": 3146 + }, + { + "epoch": 0.05437862869781587, + "grad_norm": 1.4774933251014275, + "learning_rate": 1.996888175103755e-05, + "loss": 0.7475, + "step": 3147 + }, + { + "epoch": 0.054395908211224774, + "grad_norm": 1.9674725113692761, + "learning_rate": 1.9968837618972286e-05, + "loss": 0.6174, + "step": 3148 + }, + { + "epoch": 0.054413187724633674, + "grad_norm": 1.421749213314382, + "learning_rate": 1.996879345568386e-05, + "loss": 0.6262, + "step": 3149 + }, + { + "epoch": 0.054430467238042575, + "grad_norm": 1.1398817470641291, + "learning_rate": 1.9968749261172412e-05, + "loss": 0.7405, + "step": 3150 + }, + { + "epoch": 0.054447746751451476, + "grad_norm": 1.1793003529218387, + "learning_rate": 1.996870503543809e-05, + "loss": 0.8131, + "step": 3151 + }, + { + "epoch": 0.05446502626486038, + "grad_norm": 0.7131106081250679, + "learning_rate": 1.9968660778481024e-05, + "loss": 0.5733, + "step": 3152 + }, + { + "epoch": 0.054482305778269284, + "grad_norm": 1.1850021181080608, + "learning_rate": 1.9968616490301357e-05, + "loss": 0.6153, + "step": 3153 + }, + { + "epoch": 0.054499585291678185, + "grad_norm": 1.1818344650873969, + "learning_rate": 1.996857217089922e-05, + "loss": 0.8939, + "step": 3154 + }, + { + "epoch": 0.054516864805087085, + "grad_norm": 1.0372297065623872, + "learning_rate": 1.9968527820274764e-05, + "loss": 0.7965, + "step": 3155 + }, + { + "epoch": 0.05453414431849599, + "grad_norm": 1.0394970007924396, + "learning_rate": 1.9968483438428116e-05, + "loss": 0.5498, + "step": 3156 + }, + { + "epoch": 0.054551423831904894, + "grad_norm": 1.2963233490184785, + "learning_rate": 1.9968439025359425e-05, + "loss": 0.7394, + "step": 3157 + }, + { + "epoch": 0.054568703345313795, + "grad_norm": 1.2951153711247247, + "learning_rate": 1.996839458106882e-05, + "loss": 0.7246, + "step": 3158 + }, + { + "epoch": 0.054585982858722695, + "grad_norm": 1.04269888941577, + "learning_rate": 1.996835010555645e-05, + "loss": 0.7392, + "step": 3159 + }, + { + "epoch": 0.0546032623721316, + "grad_norm": 1.0831288751679593, + "learning_rate": 1.9968305598822448e-05, + "loss": 0.6571, + "step": 3160 + }, + { + "epoch": 0.054620541885540504, + "grad_norm": 1.0713905347562287, + "learning_rate": 1.9968261060866957e-05, + "loss": 0.6993, + "step": 3161 + }, + { + "epoch": 0.054637821398949404, + "grad_norm": 1.4354899613657932, + "learning_rate": 1.996821649169011e-05, + "loss": 0.8146, + "step": 3162 + }, + { + "epoch": 0.054655100912358305, + "grad_norm": 1.0594953992195673, + "learning_rate": 1.9968171891292056e-05, + "loss": 0.681, + "step": 3163 + }, + { + "epoch": 0.05467238042576721, + "grad_norm": 1.028389262563757, + "learning_rate": 1.996812725967293e-05, + "loss": 0.8167, + "step": 3164 + }, + { + "epoch": 0.05468965993917611, + "grad_norm": 1.169341228705967, + "learning_rate": 1.9968082596832866e-05, + "loss": 0.6551, + "step": 3165 + }, + { + "epoch": 0.054706939452585014, + "grad_norm": 1.2104694515817995, + "learning_rate": 1.996803790277202e-05, + "loss": 0.8059, + "step": 3166 + }, + { + "epoch": 0.054724218965993915, + "grad_norm": 1.2938724557650223, + "learning_rate": 1.9967993177490516e-05, + "loss": 0.8021, + "step": 3167 + }, + { + "epoch": 0.05474149847940282, + "grad_norm": 1.6654652933675458, + "learning_rate": 1.9967948420988497e-05, + "loss": 0.6256, + "step": 3168 + }, + { + "epoch": 0.05475877799281172, + "grad_norm": 1.1443130905964205, + "learning_rate": 1.996790363326611e-05, + "loss": 0.6558, + "step": 3169 + }, + { + "epoch": 0.054776057506220624, + "grad_norm": 1.1984593276840458, + "learning_rate": 1.996785881432349e-05, + "loss": 0.4161, + "step": 3170 + }, + { + "epoch": 0.054793337019629525, + "grad_norm": 1.1305431861494764, + "learning_rate": 1.996781396416078e-05, + "loss": 0.7166, + "step": 3171 + }, + { + "epoch": 0.05481061653303843, + "grad_norm": 1.2289496863273233, + "learning_rate": 1.9967769082778118e-05, + "loss": 1.0026, + "step": 3172 + }, + { + "epoch": 0.05482789604644733, + "grad_norm": 1.1254307226748175, + "learning_rate": 1.9967724170175645e-05, + "loss": 0.6933, + "step": 3173 + }, + { + "epoch": 0.054845175559856234, + "grad_norm": 1.0996031234434978, + "learning_rate": 1.9967679226353503e-05, + "loss": 0.5988, + "step": 3174 + }, + { + "epoch": 0.054862455073265134, + "grad_norm": 1.0958800184959008, + "learning_rate": 1.996763425131183e-05, + "loss": 0.7228, + "step": 3175 + }, + { + "epoch": 0.05487973458667404, + "grad_norm": 1.2094672141582208, + "learning_rate": 1.9967589245050774e-05, + "loss": 0.6578, + "step": 3176 + }, + { + "epoch": 0.05489701410008294, + "grad_norm": 1.4110845469612951, + "learning_rate": 1.9967544207570467e-05, + "loss": 0.7986, + "step": 3177 + }, + { + "epoch": 0.054914293613491844, + "grad_norm": 1.0919065396975145, + "learning_rate": 1.9967499138871057e-05, + "loss": 1.0388, + "step": 3178 + }, + { + "epoch": 0.054931573126900744, + "grad_norm": 1.1801408076779485, + "learning_rate": 1.996745403895268e-05, + "loss": 0.8477, + "step": 3179 + }, + { + "epoch": 0.05494885264030965, + "grad_norm": 1.274640456440187, + "learning_rate": 1.996740890781548e-05, + "loss": 0.7119, + "step": 3180 + }, + { + "epoch": 0.05496613215371855, + "grad_norm": 1.1852545384706048, + "learning_rate": 1.9967363745459596e-05, + "loss": 0.7885, + "step": 3181 + }, + { + "epoch": 0.05498341166712745, + "grad_norm": 1.2191704242575945, + "learning_rate": 1.9967318551885174e-05, + "loss": 0.7152, + "step": 3182 + }, + { + "epoch": 0.055000691180536354, + "grad_norm": 1.7981206617840941, + "learning_rate": 1.9967273327092354e-05, + "loss": 0.8062, + "step": 3183 + }, + { + "epoch": 0.05501797069394526, + "grad_norm": 1.4077523984760698, + "learning_rate": 1.9967228071081274e-05, + "loss": 0.8631, + "step": 3184 + }, + { + "epoch": 0.05503525020735416, + "grad_norm": 0.993161783908386, + "learning_rate": 1.9967182783852077e-05, + "loss": 0.6494, + "step": 3185 + }, + { + "epoch": 0.05505252972076306, + "grad_norm": 1.31816650423986, + "learning_rate": 1.9967137465404908e-05, + "loss": 0.7867, + "step": 3186 + }, + { + "epoch": 0.055069809234171964, + "grad_norm": 1.001983899191418, + "learning_rate": 1.9967092115739905e-05, + "loss": 0.5841, + "step": 3187 + }, + { + "epoch": 0.05508708874758087, + "grad_norm": 1.6274488959237456, + "learning_rate": 1.9967046734857213e-05, + "loss": 0.8363, + "step": 3188 + }, + { + "epoch": 0.05510436826098977, + "grad_norm": 1.1968821793507456, + "learning_rate": 1.9967001322756975e-05, + "loss": 0.7357, + "step": 3189 + }, + { + "epoch": 0.05512164777439867, + "grad_norm": 1.321731484322447, + "learning_rate": 1.996695587943933e-05, + "loss": 0.8096, + "step": 3190 + }, + { + "epoch": 0.055138927287807574, + "grad_norm": 1.0480159529765058, + "learning_rate": 1.9966910404904423e-05, + "loss": 0.5495, + "step": 3191 + }, + { + "epoch": 0.055156206801216474, + "grad_norm": 1.4050288427290925, + "learning_rate": 1.9966864899152394e-05, + "loss": 0.772, + "step": 3192 + }, + { + "epoch": 0.05517348631462538, + "grad_norm": 1.1764822631983989, + "learning_rate": 1.9966819362183386e-05, + "loss": 0.8748, + "step": 3193 + }, + { + "epoch": 0.05519076582803428, + "grad_norm": 1.4481126117266985, + "learning_rate": 1.9966773793997544e-05, + "loss": 0.8483, + "step": 3194 + }, + { + "epoch": 0.055208045341443183, + "grad_norm": 1.4624629474971396, + "learning_rate": 1.9966728194595008e-05, + "loss": 0.882, + "step": 3195 + }, + { + "epoch": 0.055225324854852084, + "grad_norm": 1.9796643234409441, + "learning_rate": 1.996668256397592e-05, + "loss": 0.711, + "step": 3196 + }, + { + "epoch": 0.05524260436826099, + "grad_norm": 1.1226867393943918, + "learning_rate": 1.9966636902140428e-05, + "loss": 0.8203, + "step": 3197 + }, + { + "epoch": 0.05525988388166989, + "grad_norm": 1.2534885319789724, + "learning_rate": 1.9966591209088672e-05, + "loss": 0.6132, + "step": 3198 + }, + { + "epoch": 0.05527716339507879, + "grad_norm": 1.1921150686567357, + "learning_rate": 1.996654548482079e-05, + "loss": 0.8602, + "step": 3199 + }, + { + "epoch": 0.055294442908487694, + "grad_norm": 1.439081165484243, + "learning_rate": 1.9966499729336933e-05, + "loss": 0.7607, + "step": 3200 + }, + { + "epoch": 0.0553117224218966, + "grad_norm": 1.0090999071567106, + "learning_rate": 1.9966453942637245e-05, + "loss": 0.8639, + "step": 3201 + }, + { + "epoch": 0.0553290019353055, + "grad_norm": 1.419209211987456, + "learning_rate": 1.9966408124721862e-05, + "loss": 0.8012, + "step": 3202 + }, + { + "epoch": 0.0553462814487144, + "grad_norm": 0.8758155528091592, + "learning_rate": 1.9966362275590933e-05, + "loss": 0.5599, + "step": 3203 + }, + { + "epoch": 0.055363560962123304, + "grad_norm": 1.9507456832768948, + "learning_rate": 1.9966316395244597e-05, + "loss": 0.8179, + "step": 3204 + }, + { + "epoch": 0.05538084047553221, + "grad_norm": 1.2100649227789455, + "learning_rate": 1.9966270483683003e-05, + "loss": 0.7254, + "step": 3205 + }, + { + "epoch": 0.05539811998894111, + "grad_norm": 1.260482368956477, + "learning_rate": 1.9966224540906293e-05, + "loss": 0.7724, + "step": 3206 + }, + { + "epoch": 0.05541539950235001, + "grad_norm": 1.429750132459995, + "learning_rate": 1.9966178566914607e-05, + "loss": 0.722, + "step": 3207 + }, + { + "epoch": 0.055432679015758914, + "grad_norm": 1.3803050274560107, + "learning_rate": 1.9966132561708096e-05, + "loss": 0.9147, + "step": 3208 + }, + { + "epoch": 0.05544995852916782, + "grad_norm": 1.1398876650102012, + "learning_rate": 1.99660865252869e-05, + "loss": 0.962, + "step": 3209 + }, + { + "epoch": 0.05546723804257672, + "grad_norm": 1.148303878089736, + "learning_rate": 1.996604045765116e-05, + "loss": 0.8526, + "step": 3210 + }, + { + "epoch": 0.05548451755598562, + "grad_norm": 1.3093470871596486, + "learning_rate": 1.9965994358801027e-05, + "loss": 1.0072, + "step": 3211 + }, + { + "epoch": 0.05550179706939452, + "grad_norm": 1.4915293684863253, + "learning_rate": 1.9965948228736646e-05, + "loss": 0.6106, + "step": 3212 + }, + { + "epoch": 0.05551907658280343, + "grad_norm": 1.1913672743058534, + "learning_rate": 1.996590206745815e-05, + "loss": 0.7996, + "step": 3213 + }, + { + "epoch": 0.05553635609621233, + "grad_norm": 1.0548574452850876, + "learning_rate": 1.9965855874965697e-05, + "loss": 0.7101, + "step": 3214 + }, + { + "epoch": 0.05555363560962123, + "grad_norm": 1.314483504939109, + "learning_rate": 1.9965809651259424e-05, + "loss": 0.8801, + "step": 3215 + }, + { + "epoch": 0.05557091512303013, + "grad_norm": 1.3468580080793595, + "learning_rate": 1.9965763396339476e-05, + "loss": 0.8533, + "step": 3216 + }, + { + "epoch": 0.05558819463643904, + "grad_norm": 1.1063086981562957, + "learning_rate": 1.9965717110206e-05, + "loss": 0.615, + "step": 3217 + }, + { + "epoch": 0.05560547414984794, + "grad_norm": 1.5956974539784063, + "learning_rate": 1.9965670792859145e-05, + "loss": 0.751, + "step": 3218 + }, + { + "epoch": 0.05562275366325684, + "grad_norm": 1.166792152286084, + "learning_rate": 1.9965624444299047e-05, + "loss": 0.8452, + "step": 3219 + }, + { + "epoch": 0.05564003317666574, + "grad_norm": 1.0839966563186034, + "learning_rate": 1.996557806452586e-05, + "loss": 0.6613, + "step": 3220 + }, + { + "epoch": 0.05565731269007465, + "grad_norm": 1.2912675560005193, + "learning_rate": 1.996553165353972e-05, + "loss": 0.8663, + "step": 3221 + }, + { + "epoch": 0.05567459220348355, + "grad_norm": 1.1065263733304436, + "learning_rate": 1.996548521134078e-05, + "loss": 0.7638, + "step": 3222 + }, + { + "epoch": 0.05569187171689245, + "grad_norm": 1.156756204389439, + "learning_rate": 1.9965438737929186e-05, + "loss": 0.7962, + "step": 3223 + }, + { + "epoch": 0.05570915123030135, + "grad_norm": 1.2657009998546556, + "learning_rate": 1.996539223330508e-05, + "loss": 0.8499, + "step": 3224 + }, + { + "epoch": 0.05572643074371026, + "grad_norm": 0.845245083387036, + "learning_rate": 1.9965345697468602e-05, + "loss": 0.7237, + "step": 3225 + }, + { + "epoch": 0.05574371025711916, + "grad_norm": 1.0866920462599656, + "learning_rate": 1.996529913041991e-05, + "loss": 0.6066, + "step": 3226 + }, + { + "epoch": 0.05576098977052806, + "grad_norm": 1.4927705754269673, + "learning_rate": 1.9965252532159144e-05, + "loss": 0.6729, + "step": 3227 + }, + { + "epoch": 0.05577826928393696, + "grad_norm": 1.172553115546871, + "learning_rate": 1.9965205902686444e-05, + "loss": 1.0992, + "step": 3228 + }, + { + "epoch": 0.05579554879734586, + "grad_norm": 0.9934686585893958, + "learning_rate": 1.9965159242001966e-05, + "loss": 0.6588, + "step": 3229 + }, + { + "epoch": 0.05581282831075477, + "grad_norm": 1.057838160693124, + "learning_rate": 1.996511255010585e-05, + "loss": 0.6815, + "step": 3230 + }, + { + "epoch": 0.05583010782416367, + "grad_norm": 0.938743003998684, + "learning_rate": 1.9965065826998247e-05, + "loss": 0.6243, + "step": 3231 + }, + { + "epoch": 0.05584738733757257, + "grad_norm": 0.8908988623198372, + "learning_rate": 1.9965019072679298e-05, + "loss": 0.9385, + "step": 3232 + }, + { + "epoch": 0.05586466685098147, + "grad_norm": 0.9551501029862434, + "learning_rate": 1.9964972287149153e-05, + "loss": 0.6507, + "step": 3233 + }, + { + "epoch": 0.05588194636439038, + "grad_norm": 1.7027661928006153, + "learning_rate": 1.9964925470407958e-05, + "loss": 0.6049, + "step": 3234 + }, + { + "epoch": 0.05589922587779928, + "grad_norm": 1.4992596153484012, + "learning_rate": 1.996487862245586e-05, + "loss": 0.6408, + "step": 3235 + }, + { + "epoch": 0.05591650539120818, + "grad_norm": 0.9771817559852145, + "learning_rate": 1.9964831743293e-05, + "loss": 0.7104, + "step": 3236 + }, + { + "epoch": 0.05593378490461708, + "grad_norm": 1.2091420297842377, + "learning_rate": 1.9964784832919534e-05, + "loss": 0.7924, + "step": 3237 + }, + { + "epoch": 0.05595106441802599, + "grad_norm": 1.35758923064685, + "learning_rate": 1.9964737891335603e-05, + "loss": 0.8267, + "step": 3238 + }, + { + "epoch": 0.05596834393143489, + "grad_norm": 0.9204357394960807, + "learning_rate": 1.9964690918541357e-05, + "loss": 0.6013, + "step": 3239 + }, + { + "epoch": 0.05598562344484379, + "grad_norm": 1.035551760701494, + "learning_rate": 1.996464391453694e-05, + "loss": 0.9325, + "step": 3240 + }, + { + "epoch": 0.05600290295825269, + "grad_norm": 2.8394571824995127, + "learning_rate": 1.99645968793225e-05, + "loss": 0.6771, + "step": 3241 + }, + { + "epoch": 0.0560201824716616, + "grad_norm": 1.0686402076816341, + "learning_rate": 1.996454981289819e-05, + "loss": 0.7439, + "step": 3242 + }, + { + "epoch": 0.0560374619850705, + "grad_norm": 1.0455069626382032, + "learning_rate": 1.9964502715264152e-05, + "loss": 0.9731, + "step": 3243 + }, + { + "epoch": 0.0560547414984794, + "grad_norm": 1.0132891986578227, + "learning_rate": 1.9964455586420532e-05, + "loss": 0.992, + "step": 3244 + }, + { + "epoch": 0.0560720210118883, + "grad_norm": 1.8305328121641824, + "learning_rate": 1.9964408426367478e-05, + "loss": 0.5801, + "step": 3245 + }, + { + "epoch": 0.05608930052529721, + "grad_norm": 0.7964967301218866, + "learning_rate": 1.996436123510514e-05, + "loss": 0.6457, + "step": 3246 + }, + { + "epoch": 0.05610658003870611, + "grad_norm": 1.4123968626268888, + "learning_rate": 1.996431401263367e-05, + "loss": 0.8878, + "step": 3247 + }, + { + "epoch": 0.05612385955211501, + "grad_norm": 1.1393832868589822, + "learning_rate": 1.9964266758953208e-05, + "loss": 0.9578, + "step": 3248 + }, + { + "epoch": 0.05614113906552391, + "grad_norm": 2.024133296646305, + "learning_rate": 1.9964219474063906e-05, + "loss": 0.938, + "step": 3249 + }, + { + "epoch": 0.05615841857893282, + "grad_norm": 1.254287876725877, + "learning_rate": 1.996417215796591e-05, + "loss": 0.7458, + "step": 3250 + }, + { + "epoch": 0.05617569809234172, + "grad_norm": 1.0748973507311252, + "learning_rate": 1.996412481065937e-05, + "loss": 0.5871, + "step": 3251 + }, + { + "epoch": 0.05619297760575062, + "grad_norm": 2.4049942683213392, + "learning_rate": 1.9964077432144434e-05, + "loss": 0.8744, + "step": 3252 + }, + { + "epoch": 0.05621025711915952, + "grad_norm": 1.2041593292572772, + "learning_rate": 1.996403002242125e-05, + "loss": 0.779, + "step": 3253 + }, + { + "epoch": 0.05622753663256843, + "grad_norm": 1.1533254962338406, + "learning_rate": 1.9963982581489966e-05, + "loss": 0.6452, + "step": 3254 + }, + { + "epoch": 0.05624481614597733, + "grad_norm": 0.9976166538406418, + "learning_rate": 1.9963935109350733e-05, + "loss": 0.8229, + "step": 3255 + }, + { + "epoch": 0.05626209565938623, + "grad_norm": 1.2594133997674402, + "learning_rate": 1.99638876060037e-05, + "loss": 0.7398, + "step": 3256 + }, + { + "epoch": 0.05627937517279513, + "grad_norm": 1.0376755721137059, + "learning_rate": 1.996384007144901e-05, + "loss": 0.8946, + "step": 3257 + }, + { + "epoch": 0.05629665468620404, + "grad_norm": 0.8796277078264617, + "learning_rate": 1.9963792505686816e-05, + "loss": 0.5741, + "step": 3258 + }, + { + "epoch": 0.05631393419961294, + "grad_norm": 1.244183968005114, + "learning_rate": 1.9963744908717268e-05, + "loss": 0.6058, + "step": 3259 + }, + { + "epoch": 0.05633121371302184, + "grad_norm": 1.1488358816160655, + "learning_rate": 1.9963697280540516e-05, + "loss": 0.6175, + "step": 3260 + }, + { + "epoch": 0.05634849322643074, + "grad_norm": 1.33111124410394, + "learning_rate": 1.99636496211567e-05, + "loss": 0.8785, + "step": 3261 + }, + { + "epoch": 0.05636577273983965, + "grad_norm": 1.349654923486497, + "learning_rate": 1.996360193056598e-05, + "loss": 0.8345, + "step": 3262 + }, + { + "epoch": 0.05638305225324855, + "grad_norm": 1.2860026694314504, + "learning_rate": 1.9963554208768502e-05, + "loss": 0.7926, + "step": 3263 + }, + { + "epoch": 0.05640033176665745, + "grad_norm": 1.048709753170748, + "learning_rate": 1.996350645576442e-05, + "loss": 0.5987, + "step": 3264 + }, + { + "epoch": 0.05641761128006635, + "grad_norm": 0.9489177670982402, + "learning_rate": 1.996345867155387e-05, + "loss": 0.6885, + "step": 3265 + }, + { + "epoch": 0.05643489079347526, + "grad_norm": 1.0610846295993275, + "learning_rate": 1.9963410856137013e-05, + "loss": 0.6472, + "step": 3266 + }, + { + "epoch": 0.05645217030688416, + "grad_norm": 0.7079919890356985, + "learning_rate": 1.9963363009513998e-05, + "loss": 0.9014, + "step": 3267 + }, + { + "epoch": 0.05646944982029306, + "grad_norm": 1.056673943966365, + "learning_rate": 1.9963315131684973e-05, + "loss": 0.7251, + "step": 3268 + }, + { + "epoch": 0.05648672933370196, + "grad_norm": 1.2328234163413008, + "learning_rate": 1.9963267222650087e-05, + "loss": 0.6071, + "step": 3269 + }, + { + "epoch": 0.05650400884711086, + "grad_norm": 1.489039641711222, + "learning_rate": 1.9963219282409486e-05, + "loss": 0.6785, + "step": 3270 + }, + { + "epoch": 0.05652128836051977, + "grad_norm": 1.478920643235394, + "learning_rate": 1.996317131096333e-05, + "loss": 1.1095, + "step": 3271 + }, + { + "epoch": 0.05653856787392867, + "grad_norm": 1.2423975209443123, + "learning_rate": 1.9963123308311765e-05, + "loss": 0.6223, + "step": 3272 + }, + { + "epoch": 0.05655584738733757, + "grad_norm": 1.3546668826067532, + "learning_rate": 1.9963075274454938e-05, + "loss": 0.9628, + "step": 3273 + }, + { + "epoch": 0.05657312690074647, + "grad_norm": 1.4679139162953097, + "learning_rate": 1.9963027209393005e-05, + "loss": 0.8324, + "step": 3274 + }, + { + "epoch": 0.05659040641415538, + "grad_norm": 1.3063709682203934, + "learning_rate": 1.9962979113126113e-05, + "loss": 0.8144, + "step": 3275 + }, + { + "epoch": 0.05660768592756428, + "grad_norm": 1.003287296211344, + "learning_rate": 1.9962930985654413e-05, + "loss": 0.8195, + "step": 3276 + }, + { + "epoch": 0.05662496544097318, + "grad_norm": 1.2317491888965373, + "learning_rate": 1.9962882826978055e-05, + "loss": 0.7041, + "step": 3277 + }, + { + "epoch": 0.05664224495438208, + "grad_norm": 1.2431065631972023, + "learning_rate": 1.996283463709719e-05, + "loss": 1.0453, + "step": 3278 + }, + { + "epoch": 0.05665952446779099, + "grad_norm": 0.7555764547221825, + "learning_rate": 1.996278641601197e-05, + "loss": 0.7963, + "step": 3279 + }, + { + "epoch": 0.05667680398119989, + "grad_norm": 1.2019000967655118, + "learning_rate": 1.996273816372255e-05, + "loss": 0.895, + "step": 3280 + }, + { + "epoch": 0.05669408349460879, + "grad_norm": 0.9130847219747531, + "learning_rate": 1.996268988022907e-05, + "loss": 0.7876, + "step": 3281 + }, + { + "epoch": 0.05671136300801769, + "grad_norm": 0.9452635460253805, + "learning_rate": 1.9962641565531694e-05, + "loss": 0.8377, + "step": 3282 + }, + { + "epoch": 0.0567286425214266, + "grad_norm": 1.1139970553254683, + "learning_rate": 1.9962593219630563e-05, + "loss": 0.8878, + "step": 3283 + }, + { + "epoch": 0.0567459220348355, + "grad_norm": 1.0536584318477373, + "learning_rate": 1.9962544842525835e-05, + "loss": 0.7726, + "step": 3284 + }, + { + "epoch": 0.0567632015482444, + "grad_norm": 1.2137036897091853, + "learning_rate": 1.996249643421766e-05, + "loss": 0.8567, + "step": 3285 + }, + { + "epoch": 0.0567804810616533, + "grad_norm": 1.1020064755967833, + "learning_rate": 1.9962447994706188e-05, + "loss": 0.764, + "step": 3286 + }, + { + "epoch": 0.05679776057506221, + "grad_norm": 0.8971233727232967, + "learning_rate": 1.9962399523991574e-05, + "loss": 0.6257, + "step": 3287 + }, + { + "epoch": 0.05681504008847111, + "grad_norm": 0.8751714605031908, + "learning_rate": 1.9962351022073966e-05, + "loss": 0.568, + "step": 3288 + }, + { + "epoch": 0.05683231960188001, + "grad_norm": 0.9371807396423365, + "learning_rate": 1.996230248895352e-05, + "loss": 0.512, + "step": 3289 + }, + { + "epoch": 0.05684959911528891, + "grad_norm": 1.1066494564509466, + "learning_rate": 1.9962253924630383e-05, + "loss": 0.918, + "step": 3290 + }, + { + "epoch": 0.05686687862869782, + "grad_norm": 1.1227191099500808, + "learning_rate": 1.9962205329104708e-05, + "loss": 0.5934, + "step": 3291 + }, + { + "epoch": 0.05688415814210672, + "grad_norm": 1.1685310211443505, + "learning_rate": 1.996215670237665e-05, + "loss": 0.8338, + "step": 3292 + }, + { + "epoch": 0.05690143765551562, + "grad_norm": 1.1213825496608272, + "learning_rate": 1.996210804444636e-05, + "loss": 0.8445, + "step": 3293 + }, + { + "epoch": 0.05691871716892452, + "grad_norm": 0.7357729890863448, + "learning_rate": 1.9962059355313988e-05, + "loss": 0.5586, + "step": 3294 + }, + { + "epoch": 0.05693599668233343, + "grad_norm": 1.05034162476167, + "learning_rate": 1.9962010634979693e-05, + "loss": 0.7684, + "step": 3295 + }, + { + "epoch": 0.05695327619574233, + "grad_norm": 1.0252859208314906, + "learning_rate": 1.9961961883443624e-05, + "loss": 0.4939, + "step": 3296 + }, + { + "epoch": 0.05697055570915123, + "grad_norm": 1.04956890687857, + "learning_rate": 1.996191310070593e-05, + "loss": 0.7549, + "step": 3297 + }, + { + "epoch": 0.05698783522256013, + "grad_norm": 1.2378304578702373, + "learning_rate": 1.9961864286766773e-05, + "loss": 1.1366, + "step": 3298 + }, + { + "epoch": 0.05700511473596904, + "grad_norm": 1.0436783875870932, + "learning_rate": 1.9961815441626292e-05, + "loss": 0.6189, + "step": 3299 + }, + { + "epoch": 0.05702239424937794, + "grad_norm": 1.115242752778028, + "learning_rate": 1.9961766565284652e-05, + "loss": 0.7572, + "step": 3300 + }, + { + "epoch": 0.05703967376278684, + "grad_norm": 1.0791188822784556, + "learning_rate": 1.9961717657742004e-05, + "loss": 0.7154, + "step": 3301 + }, + { + "epoch": 0.05705695327619574, + "grad_norm": 1.1682028665039368, + "learning_rate": 1.9961668718998498e-05, + "loss": 0.6672, + "step": 3302 + }, + { + "epoch": 0.05707423278960465, + "grad_norm": 1.1564545491198583, + "learning_rate": 1.9961619749054285e-05, + "loss": 0.8469, + "step": 3303 + }, + { + "epoch": 0.05709151230301355, + "grad_norm": 1.2814919132032099, + "learning_rate": 1.9961570747909523e-05, + "loss": 0.7112, + "step": 3304 + }, + { + "epoch": 0.05710879181642245, + "grad_norm": 1.146593635448628, + "learning_rate": 1.996152171556437e-05, + "loss": 0.6378, + "step": 3305 + }, + { + "epoch": 0.05712607132983135, + "grad_norm": 1.0941424607064913, + "learning_rate": 1.9961472652018968e-05, + "loss": 0.5342, + "step": 3306 + }, + { + "epoch": 0.05714335084324025, + "grad_norm": 1.5431148805329367, + "learning_rate": 1.996142355727348e-05, + "loss": 0.7512, + "step": 3307 + }, + { + "epoch": 0.05716063035664916, + "grad_norm": 1.588911402494546, + "learning_rate": 1.9961374431328053e-05, + "loss": 1.1734, + "step": 3308 + }, + { + "epoch": 0.05717790987005806, + "grad_norm": 1.3591388110747984, + "learning_rate": 1.9961325274182847e-05, + "loss": 0.8344, + "step": 3309 + }, + { + "epoch": 0.05719518938346696, + "grad_norm": 0.7380897895764178, + "learning_rate": 1.9961276085838013e-05, + "loss": 0.7461, + "step": 3310 + }, + { + "epoch": 0.05721246889687586, + "grad_norm": 1.137138877409362, + "learning_rate": 1.9961226866293706e-05, + "loss": 0.7503, + "step": 3311 + }, + { + "epoch": 0.05722974841028477, + "grad_norm": 1.3186552611750386, + "learning_rate": 1.9961177615550076e-05, + "loss": 0.6948, + "step": 3312 + }, + { + "epoch": 0.05724702792369367, + "grad_norm": 1.1050765860093843, + "learning_rate": 1.996112833360728e-05, + "loss": 0.7638, + "step": 3313 + }, + { + "epoch": 0.05726430743710257, + "grad_norm": 1.3632493953914915, + "learning_rate": 1.996107902046548e-05, + "loss": 0.7714, + "step": 3314 + }, + { + "epoch": 0.05728158695051147, + "grad_norm": 1.0736112022761797, + "learning_rate": 1.996102967612482e-05, + "loss": 0.8938, + "step": 3315 + }, + { + "epoch": 0.05729886646392038, + "grad_norm": 1.07118109671447, + "learning_rate": 1.9960980300585457e-05, + "loss": 0.7434, + "step": 3316 + }, + { + "epoch": 0.05731614597732928, + "grad_norm": 4.710888490271653, + "learning_rate": 1.9960930893847546e-05, + "loss": 0.7887, + "step": 3317 + }, + { + "epoch": 0.05733342549073818, + "grad_norm": 1.667151094303556, + "learning_rate": 1.9960881455911244e-05, + "loss": 0.6989, + "step": 3318 + }, + { + "epoch": 0.05735070500414708, + "grad_norm": 1.1813497017928092, + "learning_rate": 1.9960831986776702e-05, + "loss": 0.8076, + "step": 3319 + }, + { + "epoch": 0.05736798451755599, + "grad_norm": 1.4903099762046417, + "learning_rate": 1.996078248644408e-05, + "loss": 0.7106, + "step": 3320 + }, + { + "epoch": 0.05738526403096489, + "grad_norm": 0.939190075445075, + "learning_rate": 1.9960732954913528e-05, + "loss": 0.7961, + "step": 3321 + }, + { + "epoch": 0.05740254354437379, + "grad_norm": 1.5543839222489801, + "learning_rate": 1.9960683392185206e-05, + "loss": 0.9989, + "step": 3322 + }, + { + "epoch": 0.05741982305778269, + "grad_norm": 1.5750839030795787, + "learning_rate": 1.9960633798259262e-05, + "loss": 0.9233, + "step": 3323 + }, + { + "epoch": 0.0574371025711916, + "grad_norm": 1.5387972533975618, + "learning_rate": 1.996058417313586e-05, + "loss": 1.1481, + "step": 3324 + }, + { + "epoch": 0.0574543820846005, + "grad_norm": 1.036682035203407, + "learning_rate": 1.9960534516815153e-05, + "loss": 0.7237, + "step": 3325 + }, + { + "epoch": 0.0574716615980094, + "grad_norm": 0.6720764223762413, + "learning_rate": 1.9960484829297293e-05, + "loss": 0.7376, + "step": 3326 + }, + { + "epoch": 0.0574889411114183, + "grad_norm": 1.1916097962353545, + "learning_rate": 1.9960435110582437e-05, + "loss": 0.843, + "step": 3327 + }, + { + "epoch": 0.05750622062482721, + "grad_norm": 1.1505417668155806, + "learning_rate": 1.996038536067074e-05, + "loss": 0.6259, + "step": 3328 + }, + { + "epoch": 0.05752350013823611, + "grad_norm": 1.0969364079726036, + "learning_rate": 1.9960335579562364e-05, + "loss": 0.833, + "step": 3329 + }, + { + "epoch": 0.05754077965164501, + "grad_norm": 1.0135319925098454, + "learning_rate": 1.996028576725745e-05, + "loss": 0.7297, + "step": 3330 + }, + { + "epoch": 0.05755805916505391, + "grad_norm": 3.362197285426678, + "learning_rate": 1.9960235923756173e-05, + "loss": 0.6683, + "step": 3331 + }, + { + "epoch": 0.05757533867846282, + "grad_norm": 1.736129235471708, + "learning_rate": 1.9960186049058675e-05, + "loss": 0.7572, + "step": 3332 + }, + { + "epoch": 0.05759261819187172, + "grad_norm": 1.157353627756338, + "learning_rate": 1.9960136143165123e-05, + "loss": 0.5465, + "step": 3333 + }, + { + "epoch": 0.05760989770528062, + "grad_norm": 1.3470691106328612, + "learning_rate": 1.9960086206075663e-05, + "loss": 0.934, + "step": 3334 + }, + { + "epoch": 0.05762717721868952, + "grad_norm": 1.2096396300801044, + "learning_rate": 1.996003623779046e-05, + "loss": 0.9389, + "step": 3335 + }, + { + "epoch": 0.05764445673209843, + "grad_norm": 1.1212086385423043, + "learning_rate": 1.9959986238309664e-05, + "loss": 0.7029, + "step": 3336 + }, + { + "epoch": 0.05766173624550733, + "grad_norm": 0.9766228775597129, + "learning_rate": 1.9959936207633434e-05, + "loss": 0.6759, + "step": 3337 + }, + { + "epoch": 0.05767901575891623, + "grad_norm": 1.0847242097899772, + "learning_rate": 1.9959886145761926e-05, + "loss": 0.7885, + "step": 3338 + }, + { + "epoch": 0.05769629527232513, + "grad_norm": 1.3739147557648412, + "learning_rate": 1.9959836052695302e-05, + "loss": 1.0155, + "step": 3339 + }, + { + "epoch": 0.05771357478573404, + "grad_norm": 1.1358895970690224, + "learning_rate": 1.9959785928433712e-05, + "loss": 0.8207, + "step": 3340 + }, + { + "epoch": 0.05773085429914294, + "grad_norm": 1.164281969820304, + "learning_rate": 1.9959735772977314e-05, + "loss": 0.8402, + "step": 3341 + }, + { + "epoch": 0.05774813381255184, + "grad_norm": 1.1270817678218583, + "learning_rate": 1.9959685586326266e-05, + "loss": 0.5835, + "step": 3342 + }, + { + "epoch": 0.05776541332596074, + "grad_norm": 1.3756094931230545, + "learning_rate": 1.995963536848073e-05, + "loss": 0.7736, + "step": 3343 + }, + { + "epoch": 0.05778269283936965, + "grad_norm": 0.9188739642070968, + "learning_rate": 1.9959585119440855e-05, + "loss": 0.599, + "step": 3344 + }, + { + "epoch": 0.05779997235277855, + "grad_norm": 1.2499031944308792, + "learning_rate": 1.9959534839206804e-05, + "loss": 0.791, + "step": 3345 + }, + { + "epoch": 0.05781725186618745, + "grad_norm": 1.2635587767580727, + "learning_rate": 1.9959484527778732e-05, + "loss": 0.8756, + "step": 3346 + }, + { + "epoch": 0.05783453137959635, + "grad_norm": 1.2744070209531297, + "learning_rate": 1.9959434185156798e-05, + "loss": 0.9438, + "step": 3347 + }, + { + "epoch": 0.05785181089300525, + "grad_norm": 1.6157584550715236, + "learning_rate": 1.995938381134116e-05, + "loss": 0.8338, + "step": 3348 + }, + { + "epoch": 0.05786909040641416, + "grad_norm": 1.0370433018401715, + "learning_rate": 1.9959333406331975e-05, + "loss": 0.9003, + "step": 3349 + }, + { + "epoch": 0.05788636991982306, + "grad_norm": 0.99939448442961, + "learning_rate": 1.9959282970129404e-05, + "loss": 0.5427, + "step": 3350 + }, + { + "epoch": 0.05790364943323196, + "grad_norm": 1.2060365308024297, + "learning_rate": 1.9959232502733598e-05, + "loss": 0.6911, + "step": 3351 + }, + { + "epoch": 0.05792092894664086, + "grad_norm": 1.547978528840639, + "learning_rate": 1.995918200414472e-05, + "loss": 0.7789, + "step": 3352 + }, + { + "epoch": 0.05793820846004977, + "grad_norm": 1.143554097721934, + "learning_rate": 1.9959131474362924e-05, + "loss": 0.9267, + "step": 3353 + }, + { + "epoch": 0.05795548797345867, + "grad_norm": 1.0538319735693795, + "learning_rate": 1.9959080913388375e-05, + "loss": 0.7094, + "step": 3354 + }, + { + "epoch": 0.05797276748686757, + "grad_norm": 1.2217300484602347, + "learning_rate": 1.9959030321221228e-05, + "loss": 0.7042, + "step": 3355 + }, + { + "epoch": 0.05799004700027647, + "grad_norm": 1.2943980307948293, + "learning_rate": 1.995897969786164e-05, + "loss": 0.5778, + "step": 3356 + }, + { + "epoch": 0.05800732651368538, + "grad_norm": 1.951469040718873, + "learning_rate": 1.995892904330977e-05, + "loss": 0.7764, + "step": 3357 + }, + { + "epoch": 0.05802460602709428, + "grad_norm": 1.3221357772000866, + "learning_rate": 1.9958878357565776e-05, + "loss": 0.8344, + "step": 3358 + }, + { + "epoch": 0.05804188554050318, + "grad_norm": 0.7224314379323149, + "learning_rate": 1.9958827640629822e-05, + "loss": 0.8225, + "step": 3359 + }, + { + "epoch": 0.05805916505391208, + "grad_norm": 1.4260127145413366, + "learning_rate": 1.9958776892502063e-05, + "loss": 0.6552, + "step": 3360 + }, + { + "epoch": 0.05807644456732099, + "grad_norm": 1.1104708799096616, + "learning_rate": 1.9958726113182654e-05, + "loss": 0.6988, + "step": 3361 + }, + { + "epoch": 0.05809372408072989, + "grad_norm": 1.4471368656081436, + "learning_rate": 1.995867530267176e-05, + "loss": 0.6571, + "step": 3362 + }, + { + "epoch": 0.05811100359413879, + "grad_norm": 0.9344608088093196, + "learning_rate": 1.9958624460969538e-05, + "loss": 0.5394, + "step": 3363 + }, + { + "epoch": 0.05812828310754769, + "grad_norm": 0.7676862118011828, + "learning_rate": 1.995857358807615e-05, + "loss": 0.6522, + "step": 3364 + }, + { + "epoch": 0.058145562620956597, + "grad_norm": 1.0302057595958278, + "learning_rate": 1.995852268399175e-05, + "loss": 0.6156, + "step": 3365 + }, + { + "epoch": 0.0581628421343655, + "grad_norm": 1.3702642009567807, + "learning_rate": 1.9958471748716498e-05, + "loss": 0.7746, + "step": 3366 + }, + { + "epoch": 0.0581801216477744, + "grad_norm": 1.2247035004232323, + "learning_rate": 1.995842078225056e-05, + "loss": 0.9502, + "step": 3367 + }, + { + "epoch": 0.0581974011611833, + "grad_norm": 1.000225802184719, + "learning_rate": 1.995836978459409e-05, + "loss": 0.4556, + "step": 3368 + }, + { + "epoch": 0.058214680674592206, + "grad_norm": 1.2360162875500862, + "learning_rate": 1.995831875574725e-05, + "loss": 0.5982, + "step": 3369 + }, + { + "epoch": 0.05823196018800111, + "grad_norm": 1.5381999259449544, + "learning_rate": 1.9958267695710194e-05, + "loss": 0.6375, + "step": 3370 + }, + { + "epoch": 0.05824923970141001, + "grad_norm": 1.262182888916422, + "learning_rate": 1.995821660448309e-05, + "loss": 0.7921, + "step": 3371 + }, + { + "epoch": 0.05826651921481891, + "grad_norm": 1.3761721590196345, + "learning_rate": 1.9958165482066094e-05, + "loss": 0.881, + "step": 3372 + }, + { + "epoch": 0.058283798728227816, + "grad_norm": 1.1189495126402067, + "learning_rate": 1.995811432845937e-05, + "loss": 0.8593, + "step": 3373 + }, + { + "epoch": 0.05830107824163672, + "grad_norm": 1.4561793900050772, + "learning_rate": 1.9958063143663073e-05, + "loss": 0.6329, + "step": 3374 + }, + { + "epoch": 0.05831835775504562, + "grad_norm": 1.3974180180836537, + "learning_rate": 1.9958011927677363e-05, + "loss": 0.5166, + "step": 3375 + }, + { + "epoch": 0.05833563726845452, + "grad_norm": 1.1353097962529766, + "learning_rate": 1.9957960680502406e-05, + "loss": 0.8573, + "step": 3376 + }, + { + "epoch": 0.058352916781863426, + "grad_norm": 0.92973200296154, + "learning_rate": 1.9957909402138357e-05, + "loss": 0.6474, + "step": 3377 + }, + { + "epoch": 0.05837019629527233, + "grad_norm": 1.2238965512046254, + "learning_rate": 1.9957858092585383e-05, + "loss": 0.8384, + "step": 3378 + }, + { + "epoch": 0.05838747580868123, + "grad_norm": 1.1032860872789396, + "learning_rate": 1.9957806751843636e-05, + "loss": 0.7253, + "step": 3379 + }, + { + "epoch": 0.05840475532209013, + "grad_norm": 1.456796659903576, + "learning_rate": 1.9957755379913285e-05, + "loss": 0.5871, + "step": 3380 + }, + { + "epoch": 0.058422034835499036, + "grad_norm": 1.243433239169503, + "learning_rate": 1.9957703976794483e-05, + "loss": 0.7112, + "step": 3381 + }, + { + "epoch": 0.058439314348907936, + "grad_norm": 2.4149775628500514, + "learning_rate": 1.99576525424874e-05, + "loss": 0.7581, + "step": 3382 + }, + { + "epoch": 0.05845659386231684, + "grad_norm": 1.1989877117483692, + "learning_rate": 1.995760107699219e-05, + "loss": 0.8176, + "step": 3383 + }, + { + "epoch": 0.05847387337572574, + "grad_norm": 1.545678266882226, + "learning_rate": 1.9957549580309022e-05, + "loss": 0.7812, + "step": 3384 + }, + { + "epoch": 0.05849115288913464, + "grad_norm": 1.0044427965393172, + "learning_rate": 1.9957498052438046e-05, + "loss": 0.5145, + "step": 3385 + }, + { + "epoch": 0.058508432402543546, + "grad_norm": 1.1739248567524356, + "learning_rate": 1.995744649337943e-05, + "loss": 0.7351, + "step": 3386 + }, + { + "epoch": 0.05852571191595245, + "grad_norm": 1.2753979188580387, + "learning_rate": 1.9957394903133335e-05, + "loss": 0.6501, + "step": 3387 + }, + { + "epoch": 0.05854299142936135, + "grad_norm": 1.1252880823875067, + "learning_rate": 1.9957343281699923e-05, + "loss": 0.7051, + "step": 3388 + }, + { + "epoch": 0.05856027094277025, + "grad_norm": 1.483174788286608, + "learning_rate": 1.9957291629079357e-05, + "loss": 0.7228, + "step": 3389 + }, + { + "epoch": 0.058577550456179156, + "grad_norm": 1.5247939896228844, + "learning_rate": 1.9957239945271797e-05, + "loss": 0.7477, + "step": 3390 + }, + { + "epoch": 0.05859482996958806, + "grad_norm": 1.1417545294306863, + "learning_rate": 1.99571882302774e-05, + "loss": 0.6744, + "step": 3391 + }, + { + "epoch": 0.05861210948299696, + "grad_norm": 1.1141002041529882, + "learning_rate": 1.995713648409634e-05, + "loss": 0.7383, + "step": 3392 + }, + { + "epoch": 0.05862938899640586, + "grad_norm": 0.942137760469671, + "learning_rate": 1.9957084706728765e-05, + "loss": 0.6211, + "step": 3393 + }, + { + "epoch": 0.058646668509814766, + "grad_norm": 0.6620904401690362, + "learning_rate": 1.9957032898174845e-05, + "loss": 0.6892, + "step": 3394 + }, + { + "epoch": 0.058663948023223667, + "grad_norm": 1.2830139507384901, + "learning_rate": 1.9956981058434747e-05, + "loss": 0.6056, + "step": 3395 + }, + { + "epoch": 0.05868122753663257, + "grad_norm": 1.0823022329010699, + "learning_rate": 1.995692918750862e-05, + "loss": 0.7218, + "step": 3396 + }, + { + "epoch": 0.05869850705004147, + "grad_norm": 1.1523650782318289, + "learning_rate": 1.9956877285396638e-05, + "loss": 0.669, + "step": 3397 + }, + { + "epoch": 0.058715786563450376, + "grad_norm": 1.1760500469387765, + "learning_rate": 1.9956825352098962e-05, + "loss": 0.8008, + "step": 3398 + }, + { + "epoch": 0.058733066076859276, + "grad_norm": 1.1968831005694924, + "learning_rate": 1.9956773387615746e-05, + "loss": 0.8613, + "step": 3399 + }, + { + "epoch": 0.05875034559026818, + "grad_norm": 1.7932526763418613, + "learning_rate": 1.995672139194716e-05, + "loss": 1.0554, + "step": 3400 + }, + { + "epoch": 0.05876762510367708, + "grad_norm": 1.1350935327788065, + "learning_rate": 1.9956669365093372e-05, + "loss": 0.9211, + "step": 3401 + }, + { + "epoch": 0.058784904617085985, + "grad_norm": 1.1758695997165554, + "learning_rate": 1.9956617307054537e-05, + "loss": 0.7799, + "step": 3402 + }, + { + "epoch": 0.058802184130494886, + "grad_norm": 1.323691541751625, + "learning_rate": 1.9956565217830818e-05, + "loss": 0.7483, + "step": 3403 + }, + { + "epoch": 0.05881946364390379, + "grad_norm": 1.3269835873262645, + "learning_rate": 1.995651309742238e-05, + "loss": 0.9641, + "step": 3404 + }, + { + "epoch": 0.05883674315731269, + "grad_norm": 1.0441632761242887, + "learning_rate": 1.9956460945829385e-05, + "loss": 0.8049, + "step": 3405 + }, + { + "epoch": 0.058854022670721595, + "grad_norm": 1.7540815184962444, + "learning_rate": 1.9956408763051998e-05, + "loss": 0.7782, + "step": 3406 + }, + { + "epoch": 0.058871302184130496, + "grad_norm": 1.279734475042119, + "learning_rate": 1.9956356549090386e-05, + "loss": 0.7993, + "step": 3407 + }, + { + "epoch": 0.0588885816975394, + "grad_norm": 1.0307201792905767, + "learning_rate": 1.9956304303944707e-05, + "loss": 0.663, + "step": 3408 + }, + { + "epoch": 0.0589058612109483, + "grad_norm": 1.1615810168315948, + "learning_rate": 1.9956252027615123e-05, + "loss": 0.7374, + "step": 3409 + }, + { + "epoch": 0.058923140724357205, + "grad_norm": 1.019995770374885, + "learning_rate": 1.9956199720101805e-05, + "loss": 0.7281, + "step": 3410 + }, + { + "epoch": 0.058940420237766106, + "grad_norm": 4.684038858044954, + "learning_rate": 1.9956147381404908e-05, + "loss": 0.7795, + "step": 3411 + }, + { + "epoch": 0.058957699751175006, + "grad_norm": 1.0718882510686187, + "learning_rate": 1.9956095011524605e-05, + "loss": 0.7257, + "step": 3412 + }, + { + "epoch": 0.05897497926458391, + "grad_norm": 0.8507369614799833, + "learning_rate": 1.9956042610461055e-05, + "loss": 0.6249, + "step": 3413 + }, + { + "epoch": 0.058992258777992815, + "grad_norm": 1.5208796815501076, + "learning_rate": 1.995599017821442e-05, + "loss": 0.809, + "step": 3414 + }, + { + "epoch": 0.059009538291401716, + "grad_norm": 1.1498980893888724, + "learning_rate": 1.9955937714784874e-05, + "loss": 0.7075, + "step": 3415 + }, + { + "epoch": 0.059026817804810616, + "grad_norm": 1.1794159131624533, + "learning_rate": 1.995588522017257e-05, + "loss": 0.7107, + "step": 3416 + }, + { + "epoch": 0.05904409731821952, + "grad_norm": 1.2755517559371465, + "learning_rate": 1.9955832694377678e-05, + "loss": 0.9332, + "step": 3417 + }, + { + "epoch": 0.059061376831628425, + "grad_norm": 1.1308621980869014, + "learning_rate": 1.9955780137400357e-05, + "loss": 0.6208, + "step": 3418 + }, + { + "epoch": 0.059078656345037325, + "grad_norm": 1.2127831690409299, + "learning_rate": 1.995572754924078e-05, + "loss": 0.74, + "step": 3419 + }, + { + "epoch": 0.059095935858446226, + "grad_norm": 1.5707187532755478, + "learning_rate": 1.9955674929899107e-05, + "loss": 0.7997, + "step": 3420 + }, + { + "epoch": 0.05911321537185513, + "grad_norm": 1.1740203024747249, + "learning_rate": 1.9955622279375504e-05, + "loss": 0.6843, + "step": 3421 + }, + { + "epoch": 0.059130494885264034, + "grad_norm": 1.1795487596243246, + "learning_rate": 1.9955569597670138e-05, + "loss": 0.6446, + "step": 3422 + }, + { + "epoch": 0.059147774398672935, + "grad_norm": 1.1672629924609426, + "learning_rate": 1.9955516884783165e-05, + "loss": 0.6519, + "step": 3423 + }, + { + "epoch": 0.059165053912081836, + "grad_norm": 1.216563304685866, + "learning_rate": 1.9955464140714758e-05, + "loss": 0.7716, + "step": 3424 + }, + { + "epoch": 0.05918233342549074, + "grad_norm": 0.8421517002547139, + "learning_rate": 1.9955411365465086e-05, + "loss": 0.6352, + "step": 3425 + }, + { + "epoch": 0.05919961293889964, + "grad_norm": 1.0667228947436713, + "learning_rate": 1.9955358559034305e-05, + "loss": 0.6297, + "step": 3426 + }, + { + "epoch": 0.059216892452308545, + "grad_norm": 1.2034348350536963, + "learning_rate": 1.9955305721422585e-05, + "loss": 0.838, + "step": 3427 + }, + { + "epoch": 0.059234171965717446, + "grad_norm": 1.2089719157032972, + "learning_rate": 1.995525285263009e-05, + "loss": 0.7616, + "step": 3428 + }, + { + "epoch": 0.059251451479126346, + "grad_norm": 1.2171488145395855, + "learning_rate": 1.995519995265699e-05, + "loss": 0.5539, + "step": 3429 + }, + { + "epoch": 0.05926873099253525, + "grad_norm": 1.4694957415036016, + "learning_rate": 1.9955147021503446e-05, + "loss": 0.8864, + "step": 3430 + }, + { + "epoch": 0.059286010505944155, + "grad_norm": 1.408294427854276, + "learning_rate": 1.995509405916962e-05, + "loss": 0.7856, + "step": 3431 + }, + { + "epoch": 0.059303290019353055, + "grad_norm": 1.132206282073107, + "learning_rate": 1.995504106565569e-05, + "loss": 0.6945, + "step": 3432 + }, + { + "epoch": 0.059320569532761956, + "grad_norm": 1.2750983994124099, + "learning_rate": 1.995498804096181e-05, + "loss": 0.8662, + "step": 3433 + }, + { + "epoch": 0.05933784904617086, + "grad_norm": 1.3429112116507387, + "learning_rate": 1.995493498508815e-05, + "loss": 0.7896, + "step": 3434 + }, + { + "epoch": 0.059355128559579765, + "grad_norm": 1.2523231142519433, + "learning_rate": 1.995488189803488e-05, + "loss": 0.8977, + "step": 3435 + }, + { + "epoch": 0.059372408072988665, + "grad_norm": 1.269140562106745, + "learning_rate": 1.9954828779802164e-05, + "loss": 0.7781, + "step": 3436 + }, + { + "epoch": 0.059389687586397566, + "grad_norm": 1.341974941120925, + "learning_rate": 1.9954775630390165e-05, + "loss": 1.0844, + "step": 3437 + }, + { + "epoch": 0.05940696709980647, + "grad_norm": 1.1069812214914265, + "learning_rate": 1.9954722449799056e-05, + "loss": 0.6343, + "step": 3438 + }, + { + "epoch": 0.059424246613215374, + "grad_norm": 0.9821465270204358, + "learning_rate": 1.9954669238028997e-05, + "loss": 0.6014, + "step": 3439 + }, + { + "epoch": 0.059441526126624275, + "grad_norm": 1.0836105840457724, + "learning_rate": 1.995461599508016e-05, + "loss": 0.7885, + "step": 3440 + }, + { + "epoch": 0.059458805640033176, + "grad_norm": 1.2526542380380048, + "learning_rate": 1.9954562720952703e-05, + "loss": 0.9181, + "step": 3441 + }, + { + "epoch": 0.059476085153442076, + "grad_norm": 1.1224179535913923, + "learning_rate": 1.9954509415646807e-05, + "loss": 0.6951, + "step": 3442 + }, + { + "epoch": 0.059493364666850984, + "grad_norm": 1.1903402702188275, + "learning_rate": 1.9954456079162625e-05, + "loss": 0.729, + "step": 3443 + }, + { + "epoch": 0.059510644180259885, + "grad_norm": 1.087403350281524, + "learning_rate": 1.9954402711500332e-05, + "loss": 0.7111, + "step": 3444 + }, + { + "epoch": 0.059527923693668786, + "grad_norm": 1.29388503145694, + "learning_rate": 1.9954349312660092e-05, + "loss": 0.7527, + "step": 3445 + }, + { + "epoch": 0.059545203207077686, + "grad_norm": 0.9849201708615378, + "learning_rate": 1.9954295882642076e-05, + "loss": 0.6309, + "step": 3446 + }, + { + "epoch": 0.059562482720486594, + "grad_norm": 1.1348721504294277, + "learning_rate": 1.9954242421446448e-05, + "loss": 0.9251, + "step": 3447 + }, + { + "epoch": 0.059579762233895495, + "grad_norm": 1.2347365283777705, + "learning_rate": 1.9954188929073374e-05, + "loss": 0.8882, + "step": 3448 + }, + { + "epoch": 0.059597041747304395, + "grad_norm": 1.1943757697871007, + "learning_rate": 1.995413540552303e-05, + "loss": 0.9863, + "step": 3449 + }, + { + "epoch": 0.059614321260713296, + "grad_norm": 1.3239023794416918, + "learning_rate": 1.995408185079557e-05, + "loss": 0.9657, + "step": 3450 + }, + { + "epoch": 0.059631600774122204, + "grad_norm": 1.1958229251906451, + "learning_rate": 1.9954028264891173e-05, + "loss": 1.0429, + "step": 3451 + }, + { + "epoch": 0.059648880287531104, + "grad_norm": 1.6046349955123154, + "learning_rate": 1.9953974647810003e-05, + "loss": 1.0121, + "step": 3452 + }, + { + "epoch": 0.059666159800940005, + "grad_norm": 1.4958911941582025, + "learning_rate": 1.9953920999552224e-05, + "loss": 0.8535, + "step": 3453 + }, + { + "epoch": 0.059683439314348906, + "grad_norm": 1.2343707467829994, + "learning_rate": 1.9953867320118012e-05, + "loss": 0.7011, + "step": 3454 + }, + { + "epoch": 0.059700718827757814, + "grad_norm": 1.5932393185641052, + "learning_rate": 1.9953813609507532e-05, + "loss": 0.9825, + "step": 3455 + }, + { + "epoch": 0.059717998341166714, + "grad_norm": 1.1393213254459051, + "learning_rate": 1.9953759867720948e-05, + "loss": 0.6879, + "step": 3456 + }, + { + "epoch": 0.059735277854575615, + "grad_norm": 1.1597565564982515, + "learning_rate": 1.995370609475843e-05, + "loss": 0.7564, + "step": 3457 + }, + { + "epoch": 0.059752557367984516, + "grad_norm": 1.1680429185090995, + "learning_rate": 1.9953652290620152e-05, + "loss": 0.7976, + "step": 3458 + }, + { + "epoch": 0.05976983688139342, + "grad_norm": 1.4381008782059932, + "learning_rate": 1.995359845530628e-05, + "loss": 0.7335, + "step": 3459 + }, + { + "epoch": 0.059787116394802324, + "grad_norm": 1.2351813652182284, + "learning_rate": 1.995354458881698e-05, + "loss": 0.9148, + "step": 3460 + }, + { + "epoch": 0.059804395908211225, + "grad_norm": 1.2660145714714612, + "learning_rate": 1.995349069115242e-05, + "loss": 0.5204, + "step": 3461 + }, + { + "epoch": 0.059821675421620125, + "grad_norm": 1.2799238367194663, + "learning_rate": 1.995343676231277e-05, + "loss": 0.6711, + "step": 3462 + }, + { + "epoch": 0.059838954935029026, + "grad_norm": 1.2837676310056918, + "learning_rate": 1.99533828022982e-05, + "loss": 0.6085, + "step": 3463 + }, + { + "epoch": 0.059856234448437934, + "grad_norm": 1.1728253323953794, + "learning_rate": 1.995332881110888e-05, + "loss": 0.7239, + "step": 3464 + }, + { + "epoch": 0.059873513961846835, + "grad_norm": 1.7019232706730847, + "learning_rate": 1.9953274788744978e-05, + "loss": 0.8949, + "step": 3465 + }, + { + "epoch": 0.059890793475255735, + "grad_norm": 0.9269651836086555, + "learning_rate": 1.9953220735206664e-05, + "loss": 0.6526, + "step": 3466 + }, + { + "epoch": 0.059908072988664636, + "grad_norm": 0.9863148425644741, + "learning_rate": 1.99531666504941e-05, + "loss": 0.8323, + "step": 3467 + }, + { + "epoch": 0.059925352502073544, + "grad_norm": 1.1520889164010062, + "learning_rate": 1.995311253460747e-05, + "loss": 0.8494, + "step": 3468 + }, + { + "epoch": 0.059942632015482444, + "grad_norm": 4.549864555133415, + "learning_rate": 1.995305838754693e-05, + "loss": 0.5358, + "step": 3469 + }, + { + "epoch": 0.059959911528891345, + "grad_norm": 1.1671082385400133, + "learning_rate": 1.9953004209312657e-05, + "loss": 0.7884, + "step": 3470 + }, + { + "epoch": 0.059977191042300246, + "grad_norm": 1.4633025866441094, + "learning_rate": 1.9952949999904817e-05, + "loss": 0.6653, + "step": 3471 + }, + { + "epoch": 0.05999447055570915, + "grad_norm": 1.0934831515295984, + "learning_rate": 1.9952895759323586e-05, + "loss": 0.9173, + "step": 3472 + }, + { + "epoch": 0.060011750069118054, + "grad_norm": 0.9820060576054113, + "learning_rate": 1.995284148756912e-05, + "loss": 0.7053, + "step": 3473 + }, + { + "epoch": 0.060029029582526955, + "grad_norm": 1.1502164477754298, + "learning_rate": 1.9952787184641606e-05, + "loss": 0.6809, + "step": 3474 + }, + { + "epoch": 0.060046309095935856, + "grad_norm": 0.9915445099161804, + "learning_rate": 1.9952732850541202e-05, + "loss": 0.6694, + "step": 3475 + }, + { + "epoch": 0.06006358860934476, + "grad_norm": 0.9582760049167931, + "learning_rate": 1.9952678485268082e-05, + "loss": 0.7085, + "step": 3476 + }, + { + "epoch": 0.060080868122753664, + "grad_norm": 1.1452957431512656, + "learning_rate": 1.9952624088822418e-05, + "loss": 0.7592, + "step": 3477 + }, + { + "epoch": 0.060098147636162565, + "grad_norm": 1.2283482523947358, + "learning_rate": 1.995256966120438e-05, + "loss": 0.7946, + "step": 3478 + }, + { + "epoch": 0.060115427149571465, + "grad_norm": 1.1600078398895202, + "learning_rate": 1.995251520241414e-05, + "loss": 0.7245, + "step": 3479 + }, + { + "epoch": 0.06013270666298037, + "grad_norm": 1.356156541793547, + "learning_rate": 1.995246071245186e-05, + "loss": 0.7944, + "step": 3480 + }, + { + "epoch": 0.060149986176389274, + "grad_norm": 1.063762112071308, + "learning_rate": 1.9952406191317718e-05, + "loss": 0.8018, + "step": 3481 + }, + { + "epoch": 0.060167265689798174, + "grad_norm": 1.442175967707619, + "learning_rate": 1.9952351639011886e-05, + "loss": 0.5908, + "step": 3482 + }, + { + "epoch": 0.060184545203207075, + "grad_norm": 1.2504048633436646, + "learning_rate": 1.995229705553453e-05, + "loss": 0.5541, + "step": 3483 + }, + { + "epoch": 0.06020182471661598, + "grad_norm": 1.1086846240055077, + "learning_rate": 1.9952242440885826e-05, + "loss": 0.6537, + "step": 3484 + }, + { + "epoch": 0.060219104230024884, + "grad_norm": 1.1450959915653534, + "learning_rate": 1.995218779506594e-05, + "loss": 0.9246, + "step": 3485 + }, + { + "epoch": 0.060236383743433784, + "grad_norm": 0.9462006031366631, + "learning_rate": 1.9952133118075046e-05, + "loss": 0.813, + "step": 3486 + }, + { + "epoch": 0.060253663256842685, + "grad_norm": 1.3691002012893143, + "learning_rate": 1.9952078409913316e-05, + "loss": 0.9202, + "step": 3487 + }, + { + "epoch": 0.06027094277025159, + "grad_norm": 1.7547282003072187, + "learning_rate": 1.9952023670580915e-05, + "loss": 1.0803, + "step": 3488 + }, + { + "epoch": 0.06028822228366049, + "grad_norm": 1.1495168884007627, + "learning_rate": 1.9951968900078027e-05, + "loss": 0.6817, + "step": 3489 + }, + { + "epoch": 0.060305501797069394, + "grad_norm": 0.9891927358263599, + "learning_rate": 1.995191409840481e-05, + "loss": 0.6397, + "step": 3490 + }, + { + "epoch": 0.060322781310478295, + "grad_norm": 0.9928316496781648, + "learning_rate": 1.9951859265561447e-05, + "loss": 0.658, + "step": 3491 + }, + { + "epoch": 0.0603400608238872, + "grad_norm": 1.1435702685268716, + "learning_rate": 1.99518044015481e-05, + "loss": 0.7487, + "step": 3492 + }, + { + "epoch": 0.0603573403372961, + "grad_norm": 1.1820546405544952, + "learning_rate": 1.9951749506364947e-05, + "loss": 0.8999, + "step": 3493 + }, + { + "epoch": 0.060374619850705004, + "grad_norm": 0.8872624958783941, + "learning_rate": 1.995169458001216e-05, + "loss": 0.657, + "step": 3494 + }, + { + "epoch": 0.060391899364113905, + "grad_norm": 1.0866764334277073, + "learning_rate": 1.9951639622489907e-05, + "loss": 0.8897, + "step": 3495 + }, + { + "epoch": 0.06040917887752281, + "grad_norm": 0.7480430681681084, + "learning_rate": 1.9951584633798363e-05, + "loss": 0.623, + "step": 3496 + }, + { + "epoch": 0.06042645839093171, + "grad_norm": 1.0825884270578383, + "learning_rate": 1.9951529613937696e-05, + "loss": 0.6124, + "step": 3497 + }, + { + "epoch": 0.060443737904340614, + "grad_norm": 0.9313034087087609, + "learning_rate": 1.9951474562908087e-05, + "loss": 0.6558, + "step": 3498 + }, + { + "epoch": 0.060461017417749514, + "grad_norm": 1.1108381257476543, + "learning_rate": 1.9951419480709702e-05, + "loss": 1.0516, + "step": 3499 + }, + { + "epoch": 0.06047829693115842, + "grad_norm": 1.0241950620500464, + "learning_rate": 1.9951364367342717e-05, + "loss": 0.7706, + "step": 3500 + }, + { + "epoch": 0.06049557644456732, + "grad_norm": 1.1176816223260606, + "learning_rate": 1.9951309222807298e-05, + "loss": 0.6828, + "step": 3501 + }, + { + "epoch": 0.06051285595797622, + "grad_norm": 1.5280024089203654, + "learning_rate": 1.9951254047103625e-05, + "loss": 1.0064, + "step": 3502 + }, + { + "epoch": 0.060530135471385124, + "grad_norm": 1.0235657615203526, + "learning_rate": 1.9951198840231866e-05, + "loss": 0.6209, + "step": 3503 + }, + { + "epoch": 0.060547414984794025, + "grad_norm": 1.1553943131228617, + "learning_rate": 1.99511436021922e-05, + "loss": 0.68, + "step": 3504 + }, + { + "epoch": 0.06056469449820293, + "grad_norm": 1.3700482406699592, + "learning_rate": 1.9951088332984793e-05, + "loss": 0.9447, + "step": 3505 + }, + { + "epoch": 0.06058197401161183, + "grad_norm": 1.1439708415443395, + "learning_rate": 1.9951033032609823e-05, + "loss": 0.7516, + "step": 3506 + }, + { + "epoch": 0.060599253525020734, + "grad_norm": 1.1165091099447826, + "learning_rate": 1.995097770106746e-05, + "loss": 0.8078, + "step": 3507 + }, + { + "epoch": 0.060616533038429635, + "grad_norm": 1.477411590572507, + "learning_rate": 1.9950922338357878e-05, + "loss": 1.0202, + "step": 3508 + }, + { + "epoch": 0.06063381255183854, + "grad_norm": 1.2278502300853837, + "learning_rate": 1.995086694448125e-05, + "loss": 0.9569, + "step": 3509 + }, + { + "epoch": 0.06065109206524744, + "grad_norm": 0.7824214067441289, + "learning_rate": 1.9950811519437753e-05, + "loss": 0.6662, + "step": 3510 + }, + { + "epoch": 0.060668371578656344, + "grad_norm": 0.8929023720375967, + "learning_rate": 1.9950756063227555e-05, + "loss": 0.6847, + "step": 3511 + }, + { + "epoch": 0.060685651092065244, + "grad_norm": 1.1698388580134353, + "learning_rate": 1.9950700575850838e-05, + "loss": 0.8308, + "step": 3512 + }, + { + "epoch": 0.06070293060547415, + "grad_norm": 1.1731984557038346, + "learning_rate": 1.9950645057307767e-05, + "loss": 0.9545, + "step": 3513 + }, + { + "epoch": 0.06072021011888305, + "grad_norm": 0.925522810746473, + "learning_rate": 1.995058950759852e-05, + "loss": 0.9982, + "step": 3514 + }, + { + "epoch": 0.060737489632291954, + "grad_norm": 1.097208467257389, + "learning_rate": 1.995053392672327e-05, + "loss": 0.8143, + "step": 3515 + }, + { + "epoch": 0.060754769145700854, + "grad_norm": 1.1133034742997951, + "learning_rate": 1.995047831468219e-05, + "loss": 0.7829, + "step": 3516 + }, + { + "epoch": 0.06077204865910976, + "grad_norm": 1.2638044648945999, + "learning_rate": 1.9950422671475457e-05, + "loss": 0.9625, + "step": 3517 + }, + { + "epoch": 0.06078932817251866, + "grad_norm": 0.8099872034808289, + "learning_rate": 1.9950366997103247e-05, + "loss": 0.5684, + "step": 3518 + }, + { + "epoch": 0.06080660768592756, + "grad_norm": 1.1618898763556482, + "learning_rate": 1.995031129156573e-05, + "loss": 0.8467, + "step": 3519 + }, + { + "epoch": 0.060823887199336464, + "grad_norm": 1.2546252134099813, + "learning_rate": 1.995025555486308e-05, + "loss": 0.6356, + "step": 3520 + }, + { + "epoch": 0.06084116671274537, + "grad_norm": 1.5220172075458853, + "learning_rate": 1.9950199786995474e-05, + "loss": 0.8303, + "step": 3521 + }, + { + "epoch": 0.06085844622615427, + "grad_norm": 0.798072398049102, + "learning_rate": 1.9950143987963087e-05, + "loss": 0.4128, + "step": 3522 + }, + { + "epoch": 0.06087572573956317, + "grad_norm": 1.3953881447895997, + "learning_rate": 1.995008815776609e-05, + "loss": 1.1468, + "step": 3523 + }, + { + "epoch": 0.060893005252972074, + "grad_norm": 1.1684350201895155, + "learning_rate": 1.9950032296404663e-05, + "loss": 0.7629, + "step": 3524 + }, + { + "epoch": 0.06091028476638098, + "grad_norm": 1.0098725943146358, + "learning_rate": 1.9949976403878982e-05, + "loss": 0.5583, + "step": 3525 + }, + { + "epoch": 0.06092756427978988, + "grad_norm": 1.133978453363172, + "learning_rate": 1.9949920480189214e-05, + "loss": 0.7013, + "step": 3526 + }, + { + "epoch": 0.06094484379319878, + "grad_norm": 1.3499340618334985, + "learning_rate": 1.9949864525335538e-05, + "loss": 0.8203, + "step": 3527 + }, + { + "epoch": 0.060962123306607684, + "grad_norm": 1.1764859606004365, + "learning_rate": 1.9949808539318134e-05, + "loss": 0.6381, + "step": 3528 + }, + { + "epoch": 0.06097940282001659, + "grad_norm": 1.3611687448094805, + "learning_rate": 1.9949752522137172e-05, + "loss": 0.8212, + "step": 3529 + }, + { + "epoch": 0.06099668233342549, + "grad_norm": 1.2508590865564, + "learning_rate": 1.9949696473792828e-05, + "loss": 1.0248, + "step": 3530 + }, + { + "epoch": 0.06101396184683439, + "grad_norm": 1.0232612197731696, + "learning_rate": 1.9949640394285277e-05, + "loss": 0.5952, + "step": 3531 + }, + { + "epoch": 0.061031241360243293, + "grad_norm": 0.9884519919012957, + "learning_rate": 1.99495842836147e-05, + "loss": 0.6002, + "step": 3532 + }, + { + "epoch": 0.0610485208736522, + "grad_norm": 1.8419747453864503, + "learning_rate": 1.994952814178127e-05, + "loss": 0.7961, + "step": 3533 + }, + { + "epoch": 0.0610658003870611, + "grad_norm": 1.6943636184984405, + "learning_rate": 1.9949471968785158e-05, + "loss": 0.7919, + "step": 3534 + }, + { + "epoch": 0.06108307990047, + "grad_norm": 1.9605199384815113, + "learning_rate": 1.9949415764626542e-05, + "loss": 0.7618, + "step": 3535 + }, + { + "epoch": 0.0611003594138789, + "grad_norm": 1.221411499666751, + "learning_rate": 1.9949359529305604e-05, + "loss": 0.7221, + "step": 3536 + }, + { + "epoch": 0.06111763892728781, + "grad_norm": 1.1309706810575963, + "learning_rate": 1.9949303262822513e-05, + "loss": 0.7596, + "step": 3537 + }, + { + "epoch": 0.06113491844069671, + "grad_norm": 1.6037737383668644, + "learning_rate": 1.994924696517745e-05, + "loss": 0.5763, + "step": 3538 + }, + { + "epoch": 0.06115219795410561, + "grad_norm": 1.1396008657018495, + "learning_rate": 1.994919063637059e-05, + "loss": 0.5746, + "step": 3539 + }, + { + "epoch": 0.06116947746751451, + "grad_norm": 0.8985599724155913, + "learning_rate": 1.9949134276402106e-05, + "loss": 0.5432, + "step": 3540 + }, + { + "epoch": 0.061186756980923414, + "grad_norm": 1.0530663429006526, + "learning_rate": 1.9949077885272177e-05, + "loss": 0.5583, + "step": 3541 + }, + { + "epoch": 0.06120403649433232, + "grad_norm": 1.268861586010293, + "learning_rate": 1.9949021462980984e-05, + "loss": 0.6507, + "step": 3542 + }, + { + "epoch": 0.06122131600774122, + "grad_norm": 1.2117222407231085, + "learning_rate": 1.9948965009528696e-05, + "loss": 0.6229, + "step": 3543 + }, + { + "epoch": 0.06123859552115012, + "grad_norm": 1.1074614624842194, + "learning_rate": 1.9948908524915496e-05, + "loss": 0.6953, + "step": 3544 + }, + { + "epoch": 0.061255875034559024, + "grad_norm": 0.9840230646839551, + "learning_rate": 1.9948852009141556e-05, + "loss": 0.7778, + "step": 3545 + }, + { + "epoch": 0.06127315454796793, + "grad_norm": 5.153345135401073, + "learning_rate": 1.994879546220706e-05, + "loss": 0.835, + "step": 3546 + }, + { + "epoch": 0.06129043406137683, + "grad_norm": 0.9793385497851068, + "learning_rate": 1.9948738884112175e-05, + "loss": 0.7079, + "step": 3547 + }, + { + "epoch": 0.06130771357478573, + "grad_norm": 1.2687758169358352, + "learning_rate": 1.9948682274857084e-05, + "loss": 0.8005, + "step": 3548 + }, + { + "epoch": 0.06132499308819463, + "grad_norm": 1.700212863173374, + "learning_rate": 1.9948625634441968e-05, + "loss": 0.9552, + "step": 3549 + }, + { + "epoch": 0.06134227260160354, + "grad_norm": 1.065163265632454, + "learning_rate": 1.9948568962867e-05, + "loss": 0.7053, + "step": 3550 + }, + { + "epoch": 0.06135955211501244, + "grad_norm": 1.03249043516176, + "learning_rate": 1.9948512260132356e-05, + "loss": 0.6837, + "step": 3551 + }, + { + "epoch": 0.06137683162842134, + "grad_norm": 1.0622982215157954, + "learning_rate": 1.9948455526238216e-05, + "loss": 0.6072, + "step": 3552 + }, + { + "epoch": 0.06139411114183024, + "grad_norm": 1.3475821650222237, + "learning_rate": 1.9948398761184753e-05, + "loss": 0.9879, + "step": 3553 + }, + { + "epoch": 0.06141139065523915, + "grad_norm": 1.15626453781216, + "learning_rate": 1.9948341964972157e-05, + "loss": 0.6097, + "step": 3554 + }, + { + "epoch": 0.06142867016864805, + "grad_norm": 0.9811616541549623, + "learning_rate": 1.9948285137600593e-05, + "loss": 0.7687, + "step": 3555 + }, + { + "epoch": 0.06144594968205695, + "grad_norm": 1.0688041878435264, + "learning_rate": 1.9948228279070244e-05, + "loss": 0.6819, + "step": 3556 + }, + { + "epoch": 0.06146322919546585, + "grad_norm": 0.9104654070487377, + "learning_rate": 1.994817138938129e-05, + "loss": 0.829, + "step": 3557 + }, + { + "epoch": 0.06148050870887476, + "grad_norm": 0.9077652367717803, + "learning_rate": 1.9948114468533908e-05, + "loss": 0.8083, + "step": 3558 + }, + { + "epoch": 0.06149778822228366, + "grad_norm": 2.5464176683206103, + "learning_rate": 1.9948057516528273e-05, + "loss": 0.6593, + "step": 3559 + }, + { + "epoch": 0.06151506773569256, + "grad_norm": 1.4060931351584731, + "learning_rate": 1.9948000533364568e-05, + "loss": 0.8763, + "step": 3560 + }, + { + "epoch": 0.06153234724910146, + "grad_norm": 1.280875358179127, + "learning_rate": 1.9947943519042966e-05, + "loss": 0.7505, + "step": 3561 + }, + { + "epoch": 0.06154962676251037, + "grad_norm": 0.84310218665637, + "learning_rate": 1.994788647356365e-05, + "loss": 0.7062, + "step": 3562 + }, + { + "epoch": 0.06156690627591927, + "grad_norm": 0.9780289879853492, + "learning_rate": 1.99478293969268e-05, + "loss": 0.6317, + "step": 3563 + }, + { + "epoch": 0.06158418578932817, + "grad_norm": 1.0756782379356673, + "learning_rate": 1.9947772289132588e-05, + "loss": 0.7294, + "step": 3564 + }, + { + "epoch": 0.06160146530273707, + "grad_norm": 0.9482977714991261, + "learning_rate": 1.99477151501812e-05, + "loss": 0.7913, + "step": 3565 + }, + { + "epoch": 0.06161874481614598, + "grad_norm": 1.1056326319469207, + "learning_rate": 1.9947657980072815e-05, + "loss": 0.9325, + "step": 3566 + }, + { + "epoch": 0.06163602432955488, + "grad_norm": 1.1032460010259335, + "learning_rate": 1.9947600778807606e-05, + "loss": 0.5566, + "step": 3567 + }, + { + "epoch": 0.06165330384296378, + "grad_norm": 3.1643288019226636, + "learning_rate": 1.9947543546385754e-05, + "loss": 0.6338, + "step": 3568 + }, + { + "epoch": 0.06167058335637268, + "grad_norm": 1.2095492389443674, + "learning_rate": 1.994748628280744e-05, + "loss": 0.8068, + "step": 3569 + }, + { + "epoch": 0.06168786286978159, + "grad_norm": 1.2758261888614664, + "learning_rate": 1.9947428988072845e-05, + "loss": 0.6881, + "step": 3570 + }, + { + "epoch": 0.06170514238319049, + "grad_norm": 0.9481460170876256, + "learning_rate": 1.9947371662182147e-05, + "loss": 0.684, + "step": 3571 + }, + { + "epoch": 0.06172242189659939, + "grad_norm": 1.0593783964543837, + "learning_rate": 1.9947314305135525e-05, + "loss": 0.7051, + "step": 3572 + }, + { + "epoch": 0.06173970141000829, + "grad_norm": 0.917152550761822, + "learning_rate": 1.9947256916933156e-05, + "loss": 0.6405, + "step": 3573 + }, + { + "epoch": 0.0617569809234172, + "grad_norm": 1.0513809349465346, + "learning_rate": 1.9947199497575222e-05, + "loss": 0.7552, + "step": 3574 + }, + { + "epoch": 0.0617742604368261, + "grad_norm": 0.9846510796836854, + "learning_rate": 1.9947142047061905e-05, + "loss": 0.8714, + "step": 3575 + }, + { + "epoch": 0.061791539950235, + "grad_norm": 0.9678826296688077, + "learning_rate": 1.994708456539338e-05, + "loss": 0.7515, + "step": 3576 + }, + { + "epoch": 0.0618088194636439, + "grad_norm": 1.0657951819476938, + "learning_rate": 1.9947027052569837e-05, + "loss": 0.4584, + "step": 3577 + }, + { + "epoch": 0.06182609897705281, + "grad_norm": 1.1543446829703055, + "learning_rate": 1.9946969508591446e-05, + "loss": 0.8645, + "step": 3578 + }, + { + "epoch": 0.06184337849046171, + "grad_norm": 1.031247831035997, + "learning_rate": 1.994691193345839e-05, + "loss": 0.8004, + "step": 3579 + }, + { + "epoch": 0.06186065800387061, + "grad_norm": 1.532427150312435, + "learning_rate": 1.9946854327170848e-05, + "loss": 0.7974, + "step": 3580 + }, + { + "epoch": 0.06187793751727951, + "grad_norm": 1.2435674383104391, + "learning_rate": 1.9946796689729004e-05, + "loss": 0.7947, + "step": 3581 + }, + { + "epoch": 0.06189521703068841, + "grad_norm": 0.9225415178565107, + "learning_rate": 1.994673902113304e-05, + "loss": 0.6402, + "step": 3582 + }, + { + "epoch": 0.06191249654409732, + "grad_norm": 1.7048253676621956, + "learning_rate": 1.994668132138313e-05, + "loss": 0.9735, + "step": 3583 + }, + { + "epoch": 0.06192977605750622, + "grad_norm": 0.9146705510852752, + "learning_rate": 1.9946623590479457e-05, + "loss": 0.7736, + "step": 3584 + }, + { + "epoch": 0.06194705557091512, + "grad_norm": 0.568284991555581, + "learning_rate": 1.9946565828422204e-05, + "loss": 0.7389, + "step": 3585 + }, + { + "epoch": 0.06196433508432402, + "grad_norm": 1.273846975632497, + "learning_rate": 1.994650803521155e-05, + "loss": 0.5421, + "step": 3586 + }, + { + "epoch": 0.06198161459773293, + "grad_norm": 1.0744816861828759, + "learning_rate": 1.9946450210847683e-05, + "loss": 0.6299, + "step": 3587 + }, + { + "epoch": 0.06199889411114183, + "grad_norm": 0.9570509063184651, + "learning_rate": 1.994639235533077e-05, + "loss": 0.6852, + "step": 3588 + }, + { + "epoch": 0.06201617362455073, + "grad_norm": 1.273273374914663, + "learning_rate": 1.9946334468661008e-05, + "loss": 0.8474, + "step": 3589 + }, + { + "epoch": 0.06203345313795963, + "grad_norm": 1.1496171172458973, + "learning_rate": 1.994627655083856e-05, + "loss": 0.7689, + "step": 3590 + }, + { + "epoch": 0.06205073265136854, + "grad_norm": 1.1174948308744037, + "learning_rate": 1.9946218601863626e-05, + "loss": 0.8118, + "step": 3591 + }, + { + "epoch": 0.06206801216477744, + "grad_norm": 0.9719785820307626, + "learning_rate": 1.994616062173638e-05, + "loss": 0.9096, + "step": 3592 + }, + { + "epoch": 0.06208529167818634, + "grad_norm": 0.9117761213622754, + "learning_rate": 1.9946102610457e-05, + "loss": 0.8026, + "step": 3593 + }, + { + "epoch": 0.06210257119159524, + "grad_norm": 0.6047352756669614, + "learning_rate": 1.994604456802567e-05, + "loss": 0.8363, + "step": 3594 + }, + { + "epoch": 0.06211985070500415, + "grad_norm": 1.1207011990795683, + "learning_rate": 1.9945986494442574e-05, + "loss": 0.6933, + "step": 3595 + }, + { + "epoch": 0.06213713021841305, + "grad_norm": 1.1386734962131457, + "learning_rate": 1.9945928389707894e-05, + "loss": 0.8034, + "step": 3596 + }, + { + "epoch": 0.06215440973182195, + "grad_norm": 1.024959412631978, + "learning_rate": 1.9945870253821808e-05, + "loss": 0.6758, + "step": 3597 + }, + { + "epoch": 0.06217168924523085, + "grad_norm": 1.05365550075839, + "learning_rate": 1.9945812086784503e-05, + "loss": 0.8131, + "step": 3598 + }, + { + "epoch": 0.06218896875863976, + "grad_norm": 1.3369767649215893, + "learning_rate": 1.9945753888596155e-05, + "loss": 0.8039, + "step": 3599 + }, + { + "epoch": 0.06220624827204866, + "grad_norm": 1.052705901439405, + "learning_rate": 1.9945695659256955e-05, + "loss": 0.5963, + "step": 3600 + }, + { + "epoch": 0.06222352778545756, + "grad_norm": 0.9373162715051774, + "learning_rate": 1.9945637398767074e-05, + "loss": 0.7028, + "step": 3601 + }, + { + "epoch": 0.06224080729886646, + "grad_norm": 0.8400255950503365, + "learning_rate": 1.9945579107126707e-05, + "loss": 0.6989, + "step": 3602 + }, + { + "epoch": 0.06225808681227537, + "grad_norm": 1.0845800114109354, + "learning_rate": 1.9945520784336025e-05, + "loss": 0.8515, + "step": 3603 + }, + { + "epoch": 0.06227536632568427, + "grad_norm": 1.2646206365400912, + "learning_rate": 1.994546243039522e-05, + "loss": 0.6123, + "step": 3604 + }, + { + "epoch": 0.06229264583909317, + "grad_norm": 0.9019172704164024, + "learning_rate": 1.994540404530447e-05, + "loss": 0.5214, + "step": 3605 + }, + { + "epoch": 0.06230992535250207, + "grad_norm": 1.0188436681150124, + "learning_rate": 1.9945345629063957e-05, + "loss": 0.7201, + "step": 3606 + }, + { + "epoch": 0.06232720486591098, + "grad_norm": 0.6120975438789505, + "learning_rate": 1.9945287181673867e-05, + "loss": 0.5335, + "step": 3607 + }, + { + "epoch": 0.06234448437931988, + "grad_norm": 1.1036098154742666, + "learning_rate": 1.9945228703134382e-05, + "loss": 0.8043, + "step": 3608 + }, + { + "epoch": 0.06236176389272878, + "grad_norm": 1.1477335294181625, + "learning_rate": 1.9945170193445684e-05, + "loss": 0.6849, + "step": 3609 + }, + { + "epoch": 0.06237904340613768, + "grad_norm": 1.0584488286231162, + "learning_rate": 1.994511165260796e-05, + "loss": 0.8898, + "step": 3610 + }, + { + "epoch": 0.06239632291954659, + "grad_norm": 1.015773720786075, + "learning_rate": 1.9945053080621387e-05, + "loss": 0.7261, + "step": 3611 + }, + { + "epoch": 0.06241360243295549, + "grad_norm": 1.189869374382841, + "learning_rate": 1.9944994477486154e-05, + "loss": 0.7123, + "step": 3612 + }, + { + "epoch": 0.06243088194636439, + "grad_norm": 1.2781098985326107, + "learning_rate": 1.9944935843202443e-05, + "loss": 0.97, + "step": 3613 + }, + { + "epoch": 0.06244816145977329, + "grad_norm": 0.9708546922896568, + "learning_rate": 1.9944877177770433e-05, + "loss": 0.7593, + "step": 3614 + }, + { + "epoch": 0.0624654409731822, + "grad_norm": 0.982046641435268, + "learning_rate": 1.9944818481190315e-05, + "loss": 0.5914, + "step": 3615 + }, + { + "epoch": 0.0624827204865911, + "grad_norm": 1.348460950255697, + "learning_rate": 1.994475975346227e-05, + "loss": 0.6355, + "step": 3616 + }, + { + "epoch": 0.0625, + "grad_norm": 1.371284395280617, + "learning_rate": 1.9944700994586482e-05, + "loss": 0.8314, + "step": 3617 + }, + { + "epoch": 0.06251727951340891, + "grad_norm": 1.0424786592935433, + "learning_rate": 1.9944642204563133e-05, + "loss": 0.6221, + "step": 3618 + }, + { + "epoch": 0.0625345590268178, + "grad_norm": 1.401307436731278, + "learning_rate": 1.994458338339241e-05, + "loss": 0.743, + "step": 3619 + }, + { + "epoch": 0.06255183854022671, + "grad_norm": 1.2137731438289896, + "learning_rate": 1.9944524531074497e-05, + "loss": 0.533, + "step": 3620 + }, + { + "epoch": 0.0625691180536356, + "grad_norm": 1.2147357295823804, + "learning_rate": 1.9944465647609576e-05, + "loss": 0.6274, + "step": 3621 + }, + { + "epoch": 0.06258639756704451, + "grad_norm": 1.2897793047779424, + "learning_rate": 1.994440673299783e-05, + "loss": 0.7731, + "step": 3622 + }, + { + "epoch": 0.06260367708045342, + "grad_norm": 1.3099890137113168, + "learning_rate": 1.994434778723945e-05, + "loss": 0.672, + "step": 3623 + }, + { + "epoch": 0.06262095659386231, + "grad_norm": 1.0008838580879356, + "learning_rate": 1.994428881033462e-05, + "loss": 0.604, + "step": 3624 + }, + { + "epoch": 0.06263823610727122, + "grad_norm": 1.033089970914366, + "learning_rate": 1.9944229802283516e-05, + "loss": 0.6723, + "step": 3625 + }, + { + "epoch": 0.06265551562068013, + "grad_norm": 0.7533721784423747, + "learning_rate": 1.9944170763086333e-05, + "loss": 0.7829, + "step": 3626 + }, + { + "epoch": 0.06267279513408902, + "grad_norm": 0.924988812761199, + "learning_rate": 1.9944111692743248e-05, + "loss": 0.7539, + "step": 3627 + }, + { + "epoch": 0.06269007464749793, + "grad_norm": 1.0719527924632781, + "learning_rate": 1.994405259125445e-05, + "loss": 0.8429, + "step": 3628 + }, + { + "epoch": 0.06270735416090682, + "grad_norm": 1.1262502777565595, + "learning_rate": 1.9943993458620123e-05, + "loss": 0.8296, + "step": 3629 + }, + { + "epoch": 0.06272463367431573, + "grad_norm": 1.021387139712662, + "learning_rate": 1.9943934294840453e-05, + "loss": 0.6034, + "step": 3630 + }, + { + "epoch": 0.06274191318772464, + "grad_norm": 1.0914396089041318, + "learning_rate": 1.9943875099915626e-05, + "loss": 0.7575, + "step": 3631 + }, + { + "epoch": 0.06275919270113353, + "grad_norm": 1.0722405706066338, + "learning_rate": 1.9943815873845827e-05, + "loss": 0.7812, + "step": 3632 + }, + { + "epoch": 0.06277647221454244, + "grad_norm": 1.263621930861043, + "learning_rate": 1.994375661663124e-05, + "loss": 0.817, + "step": 3633 + }, + { + "epoch": 0.06279375172795135, + "grad_norm": 1.0597993777562358, + "learning_rate": 1.9943697328272052e-05, + "loss": 0.6141, + "step": 3634 + }, + { + "epoch": 0.06281103124136024, + "grad_norm": 1.2971668711880464, + "learning_rate": 1.994363800876845e-05, + "loss": 0.6588, + "step": 3635 + }, + { + "epoch": 0.06282831075476915, + "grad_norm": 1.0388927063273736, + "learning_rate": 1.994357865812062e-05, + "loss": 0.8208, + "step": 3636 + }, + { + "epoch": 0.06284559026817804, + "grad_norm": 0.9446117577043834, + "learning_rate": 1.994351927632874e-05, + "loss": 0.5723, + "step": 3637 + }, + { + "epoch": 0.06286286978158695, + "grad_norm": 1.2004267920705154, + "learning_rate": 1.9943459863393003e-05, + "loss": 0.8129, + "step": 3638 + }, + { + "epoch": 0.06288014929499586, + "grad_norm": 1.0472638294648804, + "learning_rate": 1.9943400419313594e-05, + "loss": 0.5642, + "step": 3639 + }, + { + "epoch": 0.06289742880840475, + "grad_norm": 1.1492050124027897, + "learning_rate": 1.99433409440907e-05, + "loss": 0.8184, + "step": 3640 + }, + { + "epoch": 0.06291470832181366, + "grad_norm": 1.172261364630806, + "learning_rate": 1.994328143772451e-05, + "loss": 0.6683, + "step": 3641 + }, + { + "epoch": 0.06293198783522257, + "grad_norm": 0.7456749318921172, + "learning_rate": 1.99432219002152e-05, + "loss": 0.7172, + "step": 3642 + }, + { + "epoch": 0.06294926734863146, + "grad_norm": 1.0247718708304685, + "learning_rate": 1.9943162331562972e-05, + "loss": 0.7402, + "step": 3643 + }, + { + "epoch": 0.06296654686204037, + "grad_norm": 1.2415241394589227, + "learning_rate": 1.9943102731767997e-05, + "loss": 0.7818, + "step": 3644 + }, + { + "epoch": 0.06298382637544926, + "grad_norm": 0.939996314691518, + "learning_rate": 1.994304310083047e-05, + "loss": 0.7159, + "step": 3645 + }, + { + "epoch": 0.06300110588885817, + "grad_norm": 0.8949868378495467, + "learning_rate": 1.9942983438750577e-05, + "loss": 0.6396, + "step": 3646 + }, + { + "epoch": 0.06301838540226708, + "grad_norm": 1.1742145589498658, + "learning_rate": 1.9942923745528505e-05, + "loss": 0.8522, + "step": 3647 + }, + { + "epoch": 0.06303566491567597, + "grad_norm": 1.0712926166318752, + "learning_rate": 1.994286402116444e-05, + "loss": 0.7771, + "step": 3648 + }, + { + "epoch": 0.06305294442908488, + "grad_norm": 1.3280073992627444, + "learning_rate": 1.9942804265658568e-05, + "loss": 1.0787, + "step": 3649 + }, + { + "epoch": 0.06307022394249379, + "grad_norm": 0.9845442002830553, + "learning_rate": 1.994274447901108e-05, + "loss": 0.8747, + "step": 3650 + }, + { + "epoch": 0.06308750345590268, + "grad_norm": 1.101658445624181, + "learning_rate": 1.9942684661222156e-05, + "loss": 0.7936, + "step": 3651 + }, + { + "epoch": 0.06310478296931159, + "grad_norm": 0.9923396426367775, + "learning_rate": 1.9942624812291993e-05, + "loss": 0.918, + "step": 3652 + }, + { + "epoch": 0.06312206248272048, + "grad_norm": 1.0802299859039344, + "learning_rate": 1.994256493222077e-05, + "loss": 0.6325, + "step": 3653 + }, + { + "epoch": 0.06313934199612939, + "grad_norm": 1.0902041468666976, + "learning_rate": 1.9942505021008678e-05, + "loss": 0.8403, + "step": 3654 + }, + { + "epoch": 0.0631566215095383, + "grad_norm": 0.9831853879599849, + "learning_rate": 1.9942445078655907e-05, + "loss": 0.4407, + "step": 3655 + }, + { + "epoch": 0.06317390102294719, + "grad_norm": 1.1657598515246443, + "learning_rate": 1.9942385105162643e-05, + "loss": 0.7178, + "step": 3656 + }, + { + "epoch": 0.0631911805363561, + "grad_norm": 1.4435633058204518, + "learning_rate": 1.994232510052907e-05, + "loss": 0.8927, + "step": 3657 + }, + { + "epoch": 0.06320846004976499, + "grad_norm": 1.1829101025645519, + "learning_rate": 1.9942265064755382e-05, + "loss": 0.7368, + "step": 3658 + }, + { + "epoch": 0.0632257395631739, + "grad_norm": 1.322972670538482, + "learning_rate": 1.9942204997841764e-05, + "loss": 0.7354, + "step": 3659 + }, + { + "epoch": 0.06324301907658281, + "grad_norm": 0.875411503135697, + "learning_rate": 1.9942144899788403e-05, + "loss": 0.7077, + "step": 3660 + }, + { + "epoch": 0.0632602985899917, + "grad_norm": 1.5582973915563096, + "learning_rate": 1.9942084770595488e-05, + "loss": 0.8971, + "step": 3661 + }, + { + "epoch": 0.06327757810340061, + "grad_norm": 1.0125123994617902, + "learning_rate": 1.994202461026321e-05, + "loss": 0.8297, + "step": 3662 + }, + { + "epoch": 0.06329485761680952, + "grad_norm": 0.804190606827325, + "learning_rate": 1.9941964418791752e-05, + "loss": 0.7042, + "step": 3663 + }, + { + "epoch": 0.06331213713021841, + "grad_norm": 1.6642042392735703, + "learning_rate": 1.9941904196181308e-05, + "loss": 0.7715, + "step": 3664 + }, + { + "epoch": 0.06332941664362732, + "grad_norm": 1.0050187649698044, + "learning_rate": 1.9941843942432066e-05, + "loss": 0.7171, + "step": 3665 + }, + { + "epoch": 0.06334669615703621, + "grad_norm": 1.0530717390662623, + "learning_rate": 1.9941783657544212e-05, + "loss": 0.6331, + "step": 3666 + }, + { + "epoch": 0.06336397567044512, + "grad_norm": 1.2138092061379204, + "learning_rate": 1.9941723341517937e-05, + "loss": 0.7707, + "step": 3667 + }, + { + "epoch": 0.06338125518385403, + "grad_norm": 1.537735909743742, + "learning_rate": 1.994166299435343e-05, + "loss": 0.983, + "step": 3668 + }, + { + "epoch": 0.06339853469726292, + "grad_norm": 0.9541757533455905, + "learning_rate": 1.9941602616050875e-05, + "loss": 0.7129, + "step": 3669 + }, + { + "epoch": 0.06341581421067183, + "grad_norm": 1.0135925641985781, + "learning_rate": 1.9941542206610466e-05, + "loss": 0.8212, + "step": 3670 + }, + { + "epoch": 0.06343309372408074, + "grad_norm": 1.0675828023125766, + "learning_rate": 1.9941481766032393e-05, + "loss": 0.589, + "step": 3671 + }, + { + "epoch": 0.06345037323748963, + "grad_norm": 1.3112832490237987, + "learning_rate": 1.9941421294316843e-05, + "loss": 0.8721, + "step": 3672 + }, + { + "epoch": 0.06346765275089854, + "grad_norm": 0.9646479800732501, + "learning_rate": 1.9941360791464007e-05, + "loss": 0.6897, + "step": 3673 + }, + { + "epoch": 0.06348493226430743, + "grad_norm": 0.6593008024802025, + "learning_rate": 1.9941300257474074e-05, + "loss": 0.76, + "step": 3674 + }, + { + "epoch": 0.06350221177771634, + "grad_norm": 0.6998724542890904, + "learning_rate": 1.994123969234723e-05, + "loss": 0.7098, + "step": 3675 + }, + { + "epoch": 0.06351949129112525, + "grad_norm": 1.4262351705399696, + "learning_rate": 1.9941179096083668e-05, + "loss": 0.9593, + "step": 3676 + }, + { + "epoch": 0.06353677080453414, + "grad_norm": 0.9690582601314882, + "learning_rate": 1.994111846868358e-05, + "loss": 0.8189, + "step": 3677 + }, + { + "epoch": 0.06355405031794305, + "grad_norm": 0.5541312774731343, + "learning_rate": 1.994105781014715e-05, + "loss": 0.5195, + "step": 3678 + }, + { + "epoch": 0.06357132983135196, + "grad_norm": 0.8959550710663735, + "learning_rate": 1.9940997120474573e-05, + "loss": 0.8287, + "step": 3679 + }, + { + "epoch": 0.06358860934476085, + "grad_norm": 1.3422823683283, + "learning_rate": 1.994093639966604e-05, + "loss": 0.6906, + "step": 3680 + }, + { + "epoch": 0.06360588885816976, + "grad_norm": 1.0050636530044623, + "learning_rate": 1.9940875647721737e-05, + "loss": 0.6383, + "step": 3681 + }, + { + "epoch": 0.06362316837157865, + "grad_norm": 1.8189995078922188, + "learning_rate": 1.9940814864641856e-05, + "loss": 0.808, + "step": 3682 + }, + { + "epoch": 0.06364044788498756, + "grad_norm": 1.0995424108038276, + "learning_rate": 1.9940754050426587e-05, + "loss": 0.6349, + "step": 3683 + }, + { + "epoch": 0.06365772739839647, + "grad_norm": 1.3033383599751782, + "learning_rate": 1.9940693205076124e-05, + "loss": 0.6632, + "step": 3684 + }, + { + "epoch": 0.06367500691180536, + "grad_norm": 0.7826993336213829, + "learning_rate": 1.9940632328590648e-05, + "loss": 0.5743, + "step": 3685 + }, + { + "epoch": 0.06369228642521427, + "grad_norm": 0.8401357340681578, + "learning_rate": 1.994057142097036e-05, + "loss": 0.5548, + "step": 3686 + }, + { + "epoch": 0.06370956593862317, + "grad_norm": 1.2053234360837088, + "learning_rate": 1.9940510482215448e-05, + "loss": 0.79, + "step": 3687 + }, + { + "epoch": 0.06372684545203207, + "grad_norm": 0.9690219867050471, + "learning_rate": 1.99404495123261e-05, + "loss": 0.8623, + "step": 3688 + }, + { + "epoch": 0.06374412496544098, + "grad_norm": 1.3215161218604152, + "learning_rate": 1.9940388511302508e-05, + "loss": 0.9189, + "step": 3689 + }, + { + "epoch": 0.06376140447884987, + "grad_norm": 1.0336026248739276, + "learning_rate": 1.9940327479144864e-05, + "loss": 0.8497, + "step": 3690 + }, + { + "epoch": 0.06377868399225878, + "grad_norm": 0.8389520784568688, + "learning_rate": 1.994026641585336e-05, + "loss": 0.6989, + "step": 3691 + }, + { + "epoch": 0.06379596350566769, + "grad_norm": 1.3737523350906395, + "learning_rate": 1.9940205321428186e-05, + "loss": 0.8055, + "step": 3692 + }, + { + "epoch": 0.06381324301907658, + "grad_norm": 1.1654046488030911, + "learning_rate": 1.9940144195869535e-05, + "loss": 0.845, + "step": 3693 + }, + { + "epoch": 0.06383052253248549, + "grad_norm": 1.207650869309219, + "learning_rate": 1.9940083039177594e-05, + "loss": 0.8247, + "step": 3694 + }, + { + "epoch": 0.06384780204589438, + "grad_norm": 0.983242445397273, + "learning_rate": 1.9940021851352557e-05, + "loss": 0.6946, + "step": 3695 + }, + { + "epoch": 0.06386508155930329, + "grad_norm": 1.0945437775426423, + "learning_rate": 1.9939960632394618e-05, + "loss": 0.4306, + "step": 3696 + }, + { + "epoch": 0.0638823610727122, + "grad_norm": 1.0431526613918825, + "learning_rate": 1.9939899382303966e-05, + "loss": 0.6039, + "step": 3697 + }, + { + "epoch": 0.06389964058612109, + "grad_norm": 0.8758666914517186, + "learning_rate": 1.9939838101080794e-05, + "loss": 0.5447, + "step": 3698 + }, + { + "epoch": 0.06391692009953, + "grad_norm": 0.7246665733417168, + "learning_rate": 1.9939776788725296e-05, + "loss": 0.4663, + "step": 3699 + }, + { + "epoch": 0.0639341996129389, + "grad_norm": 1.4881069358532748, + "learning_rate": 1.993971544523766e-05, + "loss": 0.8997, + "step": 3700 + }, + { + "epoch": 0.0639514791263478, + "grad_norm": 1.1480611654176804, + "learning_rate": 1.9939654070618078e-05, + "loss": 0.7737, + "step": 3701 + }, + { + "epoch": 0.0639687586397567, + "grad_norm": 1.1113818317568969, + "learning_rate": 1.9939592664866748e-05, + "loss": 0.6776, + "step": 3702 + }, + { + "epoch": 0.0639860381531656, + "grad_norm": 1.61079022479016, + "learning_rate": 1.9939531227983853e-05, + "loss": 0.5427, + "step": 3703 + }, + { + "epoch": 0.06400331766657451, + "grad_norm": 0.7270240931794596, + "learning_rate": 1.9939469759969595e-05, + "loss": 0.7071, + "step": 3704 + }, + { + "epoch": 0.06402059717998342, + "grad_norm": 0.8553473605968703, + "learning_rate": 1.993940826082416e-05, + "loss": 0.6744, + "step": 3705 + }, + { + "epoch": 0.06403787669339231, + "grad_norm": 0.7544984577991288, + "learning_rate": 1.9939346730547746e-05, + "loss": 0.5674, + "step": 3706 + }, + { + "epoch": 0.06405515620680122, + "grad_norm": 1.0327459147093419, + "learning_rate": 1.993928516914054e-05, + "loss": 0.6824, + "step": 3707 + }, + { + "epoch": 0.06407243572021012, + "grad_norm": 1.040130454736606, + "learning_rate": 1.993922357660274e-05, + "loss": 0.6521, + "step": 3708 + }, + { + "epoch": 0.06408971523361902, + "grad_norm": 1.0031577166862622, + "learning_rate": 1.9939161952934533e-05, + "loss": 0.7506, + "step": 3709 + }, + { + "epoch": 0.06410699474702793, + "grad_norm": 1.0650588099878664, + "learning_rate": 1.993910029813612e-05, + "loss": 0.7756, + "step": 3710 + }, + { + "epoch": 0.06412427426043682, + "grad_norm": 0.8745079384814499, + "learning_rate": 1.9939038612207686e-05, + "loss": 0.693, + "step": 3711 + }, + { + "epoch": 0.06414155377384573, + "grad_norm": 1.0578128027248492, + "learning_rate": 1.9938976895149427e-05, + "loss": 0.9153, + "step": 3712 + }, + { + "epoch": 0.06415883328725464, + "grad_norm": 1.2242524929512604, + "learning_rate": 1.993891514696154e-05, + "loss": 1.0102, + "step": 3713 + }, + { + "epoch": 0.06417611280066353, + "grad_norm": 1.0943095650265244, + "learning_rate": 1.9938853367644213e-05, + "loss": 0.8, + "step": 3714 + }, + { + "epoch": 0.06419339231407244, + "grad_norm": 1.1737062343994586, + "learning_rate": 1.993879155719764e-05, + "loss": 0.8942, + "step": 3715 + }, + { + "epoch": 0.06421067182748134, + "grad_norm": 1.0021879647287872, + "learning_rate": 1.9938729715622022e-05, + "loss": 0.7122, + "step": 3716 + }, + { + "epoch": 0.06422795134089024, + "grad_norm": 1.1397015103892647, + "learning_rate": 1.9938667842917545e-05, + "loss": 0.9254, + "step": 3717 + }, + { + "epoch": 0.06424523085429915, + "grad_norm": 0.8781026775837109, + "learning_rate": 1.9938605939084405e-05, + "loss": 0.5703, + "step": 3718 + }, + { + "epoch": 0.06426251036770804, + "grad_norm": 0.7784581235958601, + "learning_rate": 1.9938544004122793e-05, + "loss": 0.5512, + "step": 3719 + }, + { + "epoch": 0.06427978988111695, + "grad_norm": 1.0558075393657727, + "learning_rate": 1.993848203803291e-05, + "loss": 0.5521, + "step": 3720 + }, + { + "epoch": 0.06429706939452585, + "grad_norm": 1.15686695068822, + "learning_rate": 1.9938420040814943e-05, + "loss": 0.9517, + "step": 3721 + }, + { + "epoch": 0.06431434890793475, + "grad_norm": 1.4535107721388927, + "learning_rate": 1.993835801246909e-05, + "loss": 0.845, + "step": 3722 + }, + { + "epoch": 0.06433162842134366, + "grad_norm": 1.0669724262183489, + "learning_rate": 1.9938295952995544e-05, + "loss": 0.7511, + "step": 3723 + }, + { + "epoch": 0.06434890793475256, + "grad_norm": 1.1570928678736674, + "learning_rate": 1.99382338623945e-05, + "loss": 0.8536, + "step": 3724 + }, + { + "epoch": 0.06436618744816146, + "grad_norm": 1.0219917639484868, + "learning_rate": 1.9938171740666154e-05, + "loss": 0.7492, + "step": 3725 + }, + { + "epoch": 0.06438346696157037, + "grad_norm": 0.9593005485460067, + "learning_rate": 1.99381095878107e-05, + "loss": 0.6364, + "step": 3726 + }, + { + "epoch": 0.06440074647497926, + "grad_norm": 1.3126959589363194, + "learning_rate": 1.9938047403828328e-05, + "loss": 0.6609, + "step": 3727 + }, + { + "epoch": 0.06441802598838817, + "grad_norm": 1.271615357645786, + "learning_rate": 1.993798518871924e-05, + "loss": 0.804, + "step": 3728 + }, + { + "epoch": 0.06443530550179707, + "grad_norm": 0.7055986253419647, + "learning_rate": 1.9937922942483624e-05, + "loss": 0.7401, + "step": 3729 + }, + { + "epoch": 0.06445258501520597, + "grad_norm": 1.0600513534807279, + "learning_rate": 1.9937860665121676e-05, + "loss": 0.9071, + "step": 3730 + }, + { + "epoch": 0.06446986452861488, + "grad_norm": 1.0860350135680377, + "learning_rate": 1.99377983566336e-05, + "loss": 0.7686, + "step": 3731 + }, + { + "epoch": 0.06448714404202378, + "grad_norm": 1.1209861180921408, + "learning_rate": 1.9937736017019578e-05, + "loss": 0.7289, + "step": 3732 + }, + { + "epoch": 0.06450442355543268, + "grad_norm": 1.1862518851523873, + "learning_rate": 1.9937673646279813e-05, + "loss": 0.8389, + "step": 3733 + }, + { + "epoch": 0.06452170306884158, + "grad_norm": 0.9387113147290759, + "learning_rate": 1.9937611244414498e-05, + "loss": 0.6131, + "step": 3734 + }, + { + "epoch": 0.06453898258225048, + "grad_norm": 1.1830747733893197, + "learning_rate": 1.9937548811423833e-05, + "loss": 0.9451, + "step": 3735 + }, + { + "epoch": 0.06455626209565939, + "grad_norm": 1.0797399066248001, + "learning_rate": 1.9937486347308006e-05, + "loss": 0.7971, + "step": 3736 + }, + { + "epoch": 0.0645735416090683, + "grad_norm": 0.998053340964081, + "learning_rate": 1.993742385206722e-05, + "loss": 0.8484, + "step": 3737 + }, + { + "epoch": 0.06459082112247719, + "grad_norm": 0.9630539886343695, + "learning_rate": 1.9937361325701664e-05, + "loss": 0.7536, + "step": 3738 + }, + { + "epoch": 0.0646081006358861, + "grad_norm": 1.0397842979456864, + "learning_rate": 1.993729876821154e-05, + "loss": 0.5881, + "step": 3739 + }, + { + "epoch": 0.06462538014929499, + "grad_norm": 1.1666539278340473, + "learning_rate": 1.993723617959704e-05, + "loss": 0.9293, + "step": 3740 + }, + { + "epoch": 0.0646426596627039, + "grad_norm": 2.6931762191229653, + "learning_rate": 1.9937173559858362e-05, + "loss": 0.8491, + "step": 3741 + }, + { + "epoch": 0.0646599391761128, + "grad_norm": 0.9582097830788725, + "learning_rate": 1.99371109089957e-05, + "loss": 0.5637, + "step": 3742 + }, + { + "epoch": 0.0646772186895217, + "grad_norm": 1.184293044510071, + "learning_rate": 1.9937048227009252e-05, + "loss": 0.7869, + "step": 3743 + }, + { + "epoch": 0.0646944982029306, + "grad_norm": 0.954017119825444, + "learning_rate": 1.993698551389921e-05, + "loss": 0.6592, + "step": 3744 + }, + { + "epoch": 0.06471177771633951, + "grad_norm": 0.93514553076575, + "learning_rate": 1.993692276966578e-05, + "loss": 0.6203, + "step": 3745 + }, + { + "epoch": 0.06472905722974841, + "grad_norm": 1.16849458083751, + "learning_rate": 1.993685999430915e-05, + "loss": 0.8277, + "step": 3746 + }, + { + "epoch": 0.06474633674315732, + "grad_norm": 1.1305632591426038, + "learning_rate": 1.993679718782952e-05, + "loss": 0.7598, + "step": 3747 + }, + { + "epoch": 0.06476361625656621, + "grad_norm": 1.0240522757740615, + "learning_rate": 1.9936734350227085e-05, + "loss": 0.6999, + "step": 3748 + }, + { + "epoch": 0.06478089576997512, + "grad_norm": 1.1887599924510468, + "learning_rate": 1.9936671481502044e-05, + "loss": 0.8375, + "step": 3749 + }, + { + "epoch": 0.06479817528338402, + "grad_norm": 1.1686909849427327, + "learning_rate": 1.993660858165459e-05, + "loss": 0.6881, + "step": 3750 + }, + { + "epoch": 0.06481545479679292, + "grad_norm": 1.1088701544572892, + "learning_rate": 1.9936545650684928e-05, + "loss": 0.8051, + "step": 3751 + }, + { + "epoch": 0.06483273431020183, + "grad_norm": 1.4051369298113265, + "learning_rate": 1.9936482688593244e-05, + "loss": 0.7906, + "step": 3752 + }, + { + "epoch": 0.06485001382361073, + "grad_norm": 1.1294484895282595, + "learning_rate": 1.9936419695379748e-05, + "loss": 0.8397, + "step": 3753 + }, + { + "epoch": 0.06486729333701963, + "grad_norm": 1.1468607026808546, + "learning_rate": 1.9936356671044625e-05, + "loss": 0.9268, + "step": 3754 + }, + { + "epoch": 0.06488457285042853, + "grad_norm": 1.144971589947714, + "learning_rate": 1.9936293615588077e-05, + "loss": 0.8793, + "step": 3755 + }, + { + "epoch": 0.06490185236383743, + "grad_norm": 1.4677384721620048, + "learning_rate": 1.9936230529010307e-05, + "loss": 0.8881, + "step": 3756 + }, + { + "epoch": 0.06491913187724634, + "grad_norm": 1.13992865546456, + "learning_rate": 1.9936167411311502e-05, + "loss": 0.6082, + "step": 3757 + }, + { + "epoch": 0.06493641139065524, + "grad_norm": 1.596319235480265, + "learning_rate": 1.993610426249187e-05, + "loss": 0.8559, + "step": 3758 + }, + { + "epoch": 0.06495369090406414, + "grad_norm": 0.9662267567128375, + "learning_rate": 1.9936041082551606e-05, + "loss": 0.5322, + "step": 3759 + }, + { + "epoch": 0.06497097041747305, + "grad_norm": 0.9472469337485576, + "learning_rate": 1.99359778714909e-05, + "loss": 0.7431, + "step": 3760 + }, + { + "epoch": 0.06498824993088195, + "grad_norm": 1.2898777663580807, + "learning_rate": 1.993591462930996e-05, + "loss": 0.9031, + "step": 3761 + }, + { + "epoch": 0.06500552944429085, + "grad_norm": 1.1568163291096112, + "learning_rate": 1.993585135600898e-05, + "loss": 0.7799, + "step": 3762 + }, + { + "epoch": 0.06502280895769975, + "grad_norm": 1.2532944900001894, + "learning_rate": 1.993578805158816e-05, + "loss": 0.9694, + "step": 3763 + }, + { + "epoch": 0.06504008847110865, + "grad_norm": 1.0862292794182624, + "learning_rate": 1.9935724716047693e-05, + "loss": 0.6717, + "step": 3764 + }, + { + "epoch": 0.06505736798451756, + "grad_norm": 1.1679328158335034, + "learning_rate": 1.9935661349387785e-05, + "loss": 0.6984, + "step": 3765 + }, + { + "epoch": 0.06507464749792646, + "grad_norm": 1.2654941483575286, + "learning_rate": 1.993559795160863e-05, + "loss": 0.7101, + "step": 3766 + }, + { + "epoch": 0.06509192701133536, + "grad_norm": 1.0883972973636857, + "learning_rate": 1.9935534522710424e-05, + "loss": 0.698, + "step": 3767 + }, + { + "epoch": 0.06510920652474426, + "grad_norm": 1.1189717171125435, + "learning_rate": 1.993547106269337e-05, + "loss": 0.6897, + "step": 3768 + }, + { + "epoch": 0.06512648603815317, + "grad_norm": 1.1470443921543783, + "learning_rate": 1.9935407571557665e-05, + "loss": 0.5549, + "step": 3769 + }, + { + "epoch": 0.06514376555156207, + "grad_norm": 0.7712401172853839, + "learning_rate": 1.993534404930351e-05, + "loss": 0.8578, + "step": 3770 + }, + { + "epoch": 0.06516104506497097, + "grad_norm": 1.2165774737798905, + "learning_rate": 1.9935280495931103e-05, + "loss": 0.8161, + "step": 3771 + }, + { + "epoch": 0.06517832457837987, + "grad_norm": 1.2969681150299697, + "learning_rate": 1.993521691144064e-05, + "loss": 0.7194, + "step": 3772 + }, + { + "epoch": 0.06519560409178878, + "grad_norm": 0.9323315059423483, + "learning_rate": 1.9935153295832325e-05, + "loss": 0.6861, + "step": 3773 + }, + { + "epoch": 0.06521288360519768, + "grad_norm": 0.9778038579702443, + "learning_rate": 1.9935089649106353e-05, + "loss": 0.6822, + "step": 3774 + }, + { + "epoch": 0.06523016311860658, + "grad_norm": 1.118684306164395, + "learning_rate": 1.9935025971262926e-05, + "loss": 0.7084, + "step": 3775 + }, + { + "epoch": 0.06524744263201548, + "grad_norm": 1.104779391286461, + "learning_rate": 1.9934962262302242e-05, + "loss": 0.7375, + "step": 3776 + }, + { + "epoch": 0.06526472214542438, + "grad_norm": 1.520427814322158, + "learning_rate": 1.9934898522224505e-05, + "loss": 0.9032, + "step": 3777 + }, + { + "epoch": 0.06528200165883329, + "grad_norm": 0.8597370811958932, + "learning_rate": 1.9934834751029905e-05, + "loss": 0.7031, + "step": 3778 + }, + { + "epoch": 0.0652992811722422, + "grad_norm": 1.0578659348936623, + "learning_rate": 1.9934770948718652e-05, + "loss": 0.7941, + "step": 3779 + }, + { + "epoch": 0.06531656068565109, + "grad_norm": 1.2388929621813174, + "learning_rate": 1.993470711529094e-05, + "loss": 0.8843, + "step": 3780 + }, + { + "epoch": 0.06533384019906, + "grad_norm": 1.0083166032593542, + "learning_rate": 1.9934643250746965e-05, + "loss": 0.8143, + "step": 3781 + }, + { + "epoch": 0.0653511197124689, + "grad_norm": 1.2063701472320731, + "learning_rate": 1.993457935508694e-05, + "loss": 0.8402, + "step": 3782 + }, + { + "epoch": 0.0653683992258778, + "grad_norm": 1.3373947078348154, + "learning_rate": 1.9934515428311054e-05, + "loss": 0.7239, + "step": 3783 + }, + { + "epoch": 0.0653856787392867, + "grad_norm": 1.104490233228638, + "learning_rate": 1.993445147041951e-05, + "loss": 0.9033, + "step": 3784 + }, + { + "epoch": 0.0654029582526956, + "grad_norm": 1.0392543592283743, + "learning_rate": 1.9934387481412512e-05, + "loss": 0.8234, + "step": 3785 + }, + { + "epoch": 0.0654202377661045, + "grad_norm": 0.8498870685095611, + "learning_rate": 1.9934323461290256e-05, + "loss": 0.5429, + "step": 3786 + }, + { + "epoch": 0.06543751727951341, + "grad_norm": 1.1616921074956317, + "learning_rate": 1.9934259410052944e-05, + "loss": 0.8786, + "step": 3787 + }, + { + "epoch": 0.0654547967929223, + "grad_norm": 1.066946513627391, + "learning_rate": 1.9934195327700773e-05, + "loss": 0.6049, + "step": 3788 + }, + { + "epoch": 0.06547207630633121, + "grad_norm": 1.1264222196882563, + "learning_rate": 1.9934131214233952e-05, + "loss": 0.8016, + "step": 3789 + }, + { + "epoch": 0.06548935581974012, + "grad_norm": 0.9431955470214256, + "learning_rate": 1.9934067069652677e-05, + "loss": 0.827, + "step": 3790 + }, + { + "epoch": 0.06550663533314902, + "grad_norm": 1.221925629712575, + "learning_rate": 1.9934002893957144e-05, + "loss": 0.8703, + "step": 3791 + }, + { + "epoch": 0.06552391484655792, + "grad_norm": 0.9385407049192982, + "learning_rate": 1.9933938687147563e-05, + "loss": 0.8049, + "step": 3792 + }, + { + "epoch": 0.06554119435996682, + "grad_norm": 0.700513041221335, + "learning_rate": 1.993387444922413e-05, + "loss": 0.3786, + "step": 3793 + }, + { + "epoch": 0.06555847387337572, + "grad_norm": 0.8754538707141248, + "learning_rate": 1.9933810180187048e-05, + "loss": 0.5656, + "step": 3794 + }, + { + "epoch": 0.06557575338678463, + "grad_norm": 1.0971224636588148, + "learning_rate": 1.993374588003652e-05, + "loss": 0.8384, + "step": 3795 + }, + { + "epoch": 0.06559303290019353, + "grad_norm": 1.107764241755973, + "learning_rate": 1.993368154877274e-05, + "loss": 0.7989, + "step": 3796 + }, + { + "epoch": 0.06561031241360243, + "grad_norm": 0.867579963564738, + "learning_rate": 1.9933617186395917e-05, + "loss": 0.4844, + "step": 3797 + }, + { + "epoch": 0.06562759192701134, + "grad_norm": 1.1802650802122394, + "learning_rate": 1.993355279290625e-05, + "loss": 0.8141, + "step": 3798 + }, + { + "epoch": 0.06564487144042024, + "grad_norm": 0.9778576870841299, + "learning_rate": 1.993348836830394e-05, + "loss": 0.5996, + "step": 3799 + }, + { + "epoch": 0.06566215095382914, + "grad_norm": 1.2528634499907259, + "learning_rate": 1.9933423912589195e-05, + "loss": 0.6878, + "step": 3800 + }, + { + "epoch": 0.06567943046723804, + "grad_norm": 0.83055091514209, + "learning_rate": 1.9933359425762204e-05, + "loss": 0.5147, + "step": 3801 + }, + { + "epoch": 0.06569670998064694, + "grad_norm": 1.0257421276586864, + "learning_rate": 1.993329490782318e-05, + "loss": 0.8373, + "step": 3802 + }, + { + "epoch": 0.06571398949405585, + "grad_norm": 0.9160171248937079, + "learning_rate": 1.9933230358772317e-05, + "loss": 0.6152, + "step": 3803 + }, + { + "epoch": 0.06573126900746475, + "grad_norm": 1.0588552523692856, + "learning_rate": 1.9933165778609827e-05, + "loss": 0.9684, + "step": 3804 + }, + { + "epoch": 0.06574854852087365, + "grad_norm": 1.1369220603538104, + "learning_rate": 1.9933101167335906e-05, + "loss": 0.6072, + "step": 3805 + }, + { + "epoch": 0.06576582803428256, + "grad_norm": 0.9572086640009951, + "learning_rate": 1.9933036524950755e-05, + "loss": 0.6393, + "step": 3806 + }, + { + "epoch": 0.06578310754769146, + "grad_norm": 1.3434481296095324, + "learning_rate": 1.993297185145458e-05, + "loss": 0.7131, + "step": 3807 + }, + { + "epoch": 0.06580038706110036, + "grad_norm": 0.8793130782862922, + "learning_rate": 1.993290714684758e-05, + "loss": 0.7047, + "step": 3808 + }, + { + "epoch": 0.06581766657450926, + "grad_norm": 1.0003875532910176, + "learning_rate": 1.9932842411129963e-05, + "loss": 0.8799, + "step": 3809 + }, + { + "epoch": 0.06583494608791816, + "grad_norm": 1.3111586332745302, + "learning_rate": 1.9932777644301927e-05, + "loss": 0.7757, + "step": 3810 + }, + { + "epoch": 0.06585222560132707, + "grad_norm": 1.1881050437845324, + "learning_rate": 1.993271284636368e-05, + "loss": 0.7269, + "step": 3811 + }, + { + "epoch": 0.06586950511473597, + "grad_norm": 0.9300950351403172, + "learning_rate": 1.9932648017315418e-05, + "loss": 0.5646, + "step": 3812 + }, + { + "epoch": 0.06588678462814487, + "grad_norm": 1.0734231993471766, + "learning_rate": 1.9932583157157348e-05, + "loss": 0.7258, + "step": 3813 + }, + { + "epoch": 0.06590406414155377, + "grad_norm": 0.9480203321962066, + "learning_rate": 1.993251826588967e-05, + "loss": 0.5436, + "step": 3814 + }, + { + "epoch": 0.06592134365496267, + "grad_norm": 1.194758926371084, + "learning_rate": 1.9932453343512596e-05, + "loss": 0.9849, + "step": 3815 + }, + { + "epoch": 0.06593862316837158, + "grad_norm": 1.203912561469989, + "learning_rate": 1.993238839002632e-05, + "loss": 0.7025, + "step": 3816 + }, + { + "epoch": 0.06595590268178048, + "grad_norm": 0.820521856057688, + "learning_rate": 1.9932323405431048e-05, + "loss": 0.7443, + "step": 3817 + }, + { + "epoch": 0.06597318219518938, + "grad_norm": 1.3767252483521792, + "learning_rate": 1.9932258389726985e-05, + "loss": 1.0707, + "step": 3818 + }, + { + "epoch": 0.06599046170859829, + "grad_norm": 1.5378628166311767, + "learning_rate": 1.9932193342914336e-05, + "loss": 0.6023, + "step": 3819 + }, + { + "epoch": 0.06600774122200719, + "grad_norm": 1.025230240850995, + "learning_rate": 1.99321282649933e-05, + "loss": 0.8451, + "step": 3820 + }, + { + "epoch": 0.06602502073541609, + "grad_norm": 0.9712880251462248, + "learning_rate": 1.9932063155964086e-05, + "loss": 0.904, + "step": 3821 + }, + { + "epoch": 0.06604230024882499, + "grad_norm": 1.267919048167683, + "learning_rate": 1.993199801582689e-05, + "loss": 1.0587, + "step": 3822 + }, + { + "epoch": 0.0660595797622339, + "grad_norm": 0.9702798540836268, + "learning_rate": 1.9931932844581928e-05, + "loss": 0.7471, + "step": 3823 + }, + { + "epoch": 0.0660768592756428, + "grad_norm": 0.7717663060736868, + "learning_rate": 1.9931867642229394e-05, + "loss": 0.4989, + "step": 3824 + }, + { + "epoch": 0.0660941387890517, + "grad_norm": 1.1133475595421378, + "learning_rate": 1.9931802408769494e-05, + "loss": 0.796, + "step": 3825 + }, + { + "epoch": 0.0661114183024606, + "grad_norm": 1.043682485118422, + "learning_rate": 1.993173714420244e-05, + "loss": 0.6291, + "step": 3826 + }, + { + "epoch": 0.06612869781586951, + "grad_norm": 1.7886414124098167, + "learning_rate": 1.9931671848528426e-05, + "loss": 0.7338, + "step": 3827 + }, + { + "epoch": 0.0661459773292784, + "grad_norm": 0.7726930724073813, + "learning_rate": 1.993160652174766e-05, + "loss": 0.5161, + "step": 3828 + }, + { + "epoch": 0.06616325684268731, + "grad_norm": 1.0511536805320911, + "learning_rate": 1.993154116386035e-05, + "loss": 0.7774, + "step": 3829 + }, + { + "epoch": 0.0661805363560962, + "grad_norm": 1.0015592743176214, + "learning_rate": 1.9931475774866698e-05, + "loss": 0.7075, + "step": 3830 + }, + { + "epoch": 0.06619781586950511, + "grad_norm": 1.0160396801074798, + "learning_rate": 1.993141035476691e-05, + "loss": 0.7408, + "step": 3831 + }, + { + "epoch": 0.06621509538291402, + "grad_norm": 0.9891877609955738, + "learning_rate": 1.993134490356119e-05, + "loss": 0.8239, + "step": 3832 + }, + { + "epoch": 0.06623237489632292, + "grad_norm": 0.8988565615320189, + "learning_rate": 1.993127942124974e-05, + "loss": 0.7341, + "step": 3833 + }, + { + "epoch": 0.06624965440973182, + "grad_norm": 0.9420337015169532, + "learning_rate": 1.9931213907832768e-05, + "loss": 0.6104, + "step": 3834 + }, + { + "epoch": 0.06626693392314073, + "grad_norm": 0.7899247533296642, + "learning_rate": 1.9931148363310484e-05, + "loss": 0.6907, + "step": 3835 + }, + { + "epoch": 0.06628421343654962, + "grad_norm": 1.0183659349085334, + "learning_rate": 1.9931082787683084e-05, + "loss": 0.7107, + "step": 3836 + }, + { + "epoch": 0.06630149294995853, + "grad_norm": 1.2031207163458333, + "learning_rate": 1.993101718095078e-05, + "loss": 0.8304, + "step": 3837 + }, + { + "epoch": 0.06631877246336743, + "grad_norm": 0.749005688290566, + "learning_rate": 1.9930951543113777e-05, + "loss": 0.5988, + "step": 3838 + }, + { + "epoch": 0.06633605197677633, + "grad_norm": 1.2890077304965535, + "learning_rate": 1.9930885874172277e-05, + "loss": 0.9224, + "step": 3839 + }, + { + "epoch": 0.06635333149018524, + "grad_norm": 1.0619101004834421, + "learning_rate": 1.9930820174126488e-05, + "loss": 0.9866, + "step": 3840 + }, + { + "epoch": 0.06637061100359413, + "grad_norm": 0.7899384047313435, + "learning_rate": 1.9930754442976615e-05, + "loss": 0.9487, + "step": 3841 + }, + { + "epoch": 0.06638789051700304, + "grad_norm": 1.2334863705548007, + "learning_rate": 1.9930688680722866e-05, + "loss": 0.7939, + "step": 3842 + }, + { + "epoch": 0.06640517003041195, + "grad_norm": 1.2208112625008587, + "learning_rate": 1.9930622887365443e-05, + "loss": 0.7856, + "step": 3843 + }, + { + "epoch": 0.06642244954382084, + "grad_norm": 1.9495018753403266, + "learning_rate": 1.9930557062904557e-05, + "loss": 0.5625, + "step": 3844 + }, + { + "epoch": 0.06643972905722975, + "grad_norm": 1.569804469494647, + "learning_rate": 1.993049120734041e-05, + "loss": 0.5584, + "step": 3845 + }, + { + "epoch": 0.06645700857063865, + "grad_norm": 1.0488522883929465, + "learning_rate": 1.993042532067321e-05, + "loss": 0.554, + "step": 3846 + }, + { + "epoch": 0.06647428808404755, + "grad_norm": 1.4623759128709934, + "learning_rate": 1.9930359402903164e-05, + "loss": 0.9218, + "step": 3847 + }, + { + "epoch": 0.06649156759745646, + "grad_norm": 0.8535424927525763, + "learning_rate": 1.9930293454030475e-05, + "loss": 0.6456, + "step": 3848 + }, + { + "epoch": 0.06650884711086535, + "grad_norm": 0.8473474529066612, + "learning_rate": 1.9930227474055357e-05, + "loss": 0.6717, + "step": 3849 + }, + { + "epoch": 0.06652612662427426, + "grad_norm": 1.2080502860754794, + "learning_rate": 1.9930161462978005e-05, + "loss": 0.7241, + "step": 3850 + }, + { + "epoch": 0.06654340613768316, + "grad_norm": 0.9664265541463967, + "learning_rate": 1.993009542079864e-05, + "loss": 0.6483, + "step": 3851 + }, + { + "epoch": 0.06656068565109206, + "grad_norm": 0.9137099304882522, + "learning_rate": 1.9930029347517455e-05, + "loss": 0.6271, + "step": 3852 + }, + { + "epoch": 0.06657796516450097, + "grad_norm": 1.2152657102970785, + "learning_rate": 1.9929963243134668e-05, + "loss": 0.9182, + "step": 3853 + }, + { + "epoch": 0.06659524467790987, + "grad_norm": 0.9971875848125415, + "learning_rate": 1.992989710765048e-05, + "loss": 0.5954, + "step": 3854 + }, + { + "epoch": 0.06661252419131877, + "grad_norm": 0.8265576408260237, + "learning_rate": 1.99298309410651e-05, + "loss": 0.8204, + "step": 3855 + }, + { + "epoch": 0.06662980370472768, + "grad_norm": 1.1914864063065846, + "learning_rate": 1.9929764743378735e-05, + "loss": 0.8237, + "step": 3856 + }, + { + "epoch": 0.06664708321813657, + "grad_norm": 1.3643405436388591, + "learning_rate": 1.9929698514591592e-05, + "loss": 0.8212, + "step": 3857 + }, + { + "epoch": 0.06666436273154548, + "grad_norm": 1.0351870282254352, + "learning_rate": 1.992963225470388e-05, + "loss": 0.9014, + "step": 3858 + }, + { + "epoch": 0.06668164224495438, + "grad_norm": 0.9176776353026143, + "learning_rate": 1.9929565963715802e-05, + "loss": 0.7603, + "step": 3859 + }, + { + "epoch": 0.06669892175836328, + "grad_norm": 0.9693852228961075, + "learning_rate": 1.9929499641627573e-05, + "loss": 0.6924, + "step": 3860 + }, + { + "epoch": 0.06671620127177219, + "grad_norm": 1.234152442927163, + "learning_rate": 1.9929433288439394e-05, + "loss": 0.7644, + "step": 3861 + }, + { + "epoch": 0.06673348078518108, + "grad_norm": 1.054119351579998, + "learning_rate": 1.9929366904151472e-05, + "loss": 0.6284, + "step": 3862 + }, + { + "epoch": 0.06675076029858999, + "grad_norm": 0.644388601722926, + "learning_rate": 1.9929300488764024e-05, + "loss": 0.7922, + "step": 3863 + }, + { + "epoch": 0.0667680398119989, + "grad_norm": 1.0932088269278823, + "learning_rate": 1.992923404227725e-05, + "loss": 0.6491, + "step": 3864 + }, + { + "epoch": 0.0667853193254078, + "grad_norm": 1.004734719130702, + "learning_rate": 1.992916756469136e-05, + "loss": 0.772, + "step": 3865 + }, + { + "epoch": 0.0668025988388167, + "grad_norm": 1.1459375149498896, + "learning_rate": 1.9929101056006562e-05, + "loss": 0.7907, + "step": 3866 + }, + { + "epoch": 0.0668198783522256, + "grad_norm": 1.0810211736809308, + "learning_rate": 1.9929034516223066e-05, + "loss": 0.7357, + "step": 3867 + }, + { + "epoch": 0.0668371578656345, + "grad_norm": 1.221167373717683, + "learning_rate": 1.9928967945341077e-05, + "loss": 0.7923, + "step": 3868 + }, + { + "epoch": 0.06685443737904341, + "grad_norm": 1.1687019901809483, + "learning_rate": 1.9928901343360808e-05, + "loss": 0.7761, + "step": 3869 + }, + { + "epoch": 0.0668717168924523, + "grad_norm": 1.3122054647872747, + "learning_rate": 1.9928834710282464e-05, + "loss": 0.6137, + "step": 3870 + }, + { + "epoch": 0.06688899640586121, + "grad_norm": 1.2733271469805563, + "learning_rate": 1.9928768046106257e-05, + "loss": 0.8211, + "step": 3871 + }, + { + "epoch": 0.06690627591927012, + "grad_norm": 1.305574163786493, + "learning_rate": 1.9928701350832394e-05, + "loss": 0.7837, + "step": 3872 + }, + { + "epoch": 0.06692355543267901, + "grad_norm": 1.0324809509454083, + "learning_rate": 1.9928634624461082e-05, + "loss": 0.7707, + "step": 3873 + }, + { + "epoch": 0.06694083494608792, + "grad_norm": 0.7058199229286906, + "learning_rate": 1.9928567866992534e-05, + "loss": 0.5883, + "step": 3874 + }, + { + "epoch": 0.06695811445949681, + "grad_norm": 1.2202707138214608, + "learning_rate": 1.9928501078426957e-05, + "loss": 1.0143, + "step": 3875 + }, + { + "epoch": 0.06697539397290572, + "grad_norm": 1.0322537846879787, + "learning_rate": 1.9928434258764557e-05, + "loss": 1.0316, + "step": 3876 + }, + { + "epoch": 0.06699267348631463, + "grad_norm": 1.1544687110720078, + "learning_rate": 1.992836740800555e-05, + "loss": 0.5957, + "step": 3877 + }, + { + "epoch": 0.06700995299972352, + "grad_norm": 1.050268782038735, + "learning_rate": 1.9928300526150138e-05, + "loss": 0.7608, + "step": 3878 + }, + { + "epoch": 0.06702723251313243, + "grad_norm": 0.9827619476657671, + "learning_rate": 1.992823361319854e-05, + "loss": 0.7281, + "step": 3879 + }, + { + "epoch": 0.06704451202654134, + "grad_norm": 1.0589168800037625, + "learning_rate": 1.9928166669150953e-05, + "loss": 1.0526, + "step": 3880 + }, + { + "epoch": 0.06706179153995023, + "grad_norm": 0.902961708758596, + "learning_rate": 1.9928099694007596e-05, + "loss": 0.6103, + "step": 3881 + }, + { + "epoch": 0.06707907105335914, + "grad_norm": 0.8427849679123138, + "learning_rate": 1.9928032687768677e-05, + "loss": 0.8313, + "step": 3882 + }, + { + "epoch": 0.06709635056676803, + "grad_norm": 0.8001209269962583, + "learning_rate": 1.9927965650434405e-05, + "loss": 0.9502, + "step": 3883 + }, + { + "epoch": 0.06711363008017694, + "grad_norm": 0.7990345610503956, + "learning_rate": 1.992789858200499e-05, + "loss": 0.6486, + "step": 3884 + }, + { + "epoch": 0.06713090959358585, + "grad_norm": 1.1507420821088523, + "learning_rate": 1.9927831482480643e-05, + "loss": 0.6274, + "step": 3885 + }, + { + "epoch": 0.06714818910699474, + "grad_norm": 1.051731456007519, + "learning_rate": 1.9927764351861573e-05, + "loss": 0.9816, + "step": 3886 + }, + { + "epoch": 0.06716546862040365, + "grad_norm": 0.8846549208846051, + "learning_rate": 1.9927697190147988e-05, + "loss": 0.6848, + "step": 3887 + }, + { + "epoch": 0.06718274813381256, + "grad_norm": 0.9440562095207262, + "learning_rate": 1.9927629997340104e-05, + "loss": 0.7434, + "step": 3888 + }, + { + "epoch": 0.06720002764722145, + "grad_norm": 1.0505200223440343, + "learning_rate": 1.9927562773438125e-05, + "loss": 0.6026, + "step": 3889 + }, + { + "epoch": 0.06721730716063036, + "grad_norm": 0.9135719800743091, + "learning_rate": 1.992749551844227e-05, + "loss": 0.4673, + "step": 3890 + }, + { + "epoch": 0.06723458667403925, + "grad_norm": 0.8712534083750783, + "learning_rate": 1.992742823235274e-05, + "loss": 0.6362, + "step": 3891 + }, + { + "epoch": 0.06725186618744816, + "grad_norm": 0.9857261625225219, + "learning_rate": 1.9927360915169754e-05, + "loss": 0.7567, + "step": 3892 + }, + { + "epoch": 0.06726914570085707, + "grad_norm": 0.693999593265327, + "learning_rate": 1.9927293566893516e-05, + "loss": 0.7381, + "step": 3893 + }, + { + "epoch": 0.06728642521426596, + "grad_norm": 1.3244174278537237, + "learning_rate": 1.992722618752424e-05, + "loss": 0.8229, + "step": 3894 + }, + { + "epoch": 0.06730370472767487, + "grad_norm": 0.7415240922970372, + "learning_rate": 1.992715877706214e-05, + "loss": 0.6176, + "step": 3895 + }, + { + "epoch": 0.06732098424108376, + "grad_norm": 0.951013406213565, + "learning_rate": 1.9927091335507422e-05, + "loss": 0.7685, + "step": 3896 + }, + { + "epoch": 0.06733826375449267, + "grad_norm": 1.1631349006817002, + "learning_rate": 1.99270238628603e-05, + "loss": 0.8226, + "step": 3897 + }, + { + "epoch": 0.06735554326790158, + "grad_norm": 1.590401543538008, + "learning_rate": 1.9926956359120987e-05, + "loss": 0.9734, + "step": 3898 + }, + { + "epoch": 0.06737282278131047, + "grad_norm": 1.2377184817886406, + "learning_rate": 1.992688882428969e-05, + "loss": 0.8057, + "step": 3899 + }, + { + "epoch": 0.06739010229471938, + "grad_norm": 1.2362974839500662, + "learning_rate": 1.9926821258366622e-05, + "loss": 0.9111, + "step": 3900 + }, + { + "epoch": 0.06740738180812829, + "grad_norm": 1.0244645980470874, + "learning_rate": 1.9926753661351997e-05, + "loss": 0.7725, + "step": 3901 + }, + { + "epoch": 0.06742466132153718, + "grad_norm": 1.115550314143095, + "learning_rate": 1.9926686033246025e-05, + "loss": 0.8444, + "step": 3902 + }, + { + "epoch": 0.06744194083494609, + "grad_norm": 0.9551023829860162, + "learning_rate": 1.9926618374048915e-05, + "loss": 0.64, + "step": 3903 + }, + { + "epoch": 0.06745922034835498, + "grad_norm": 0.9930535556123374, + "learning_rate": 1.9926550683760885e-05, + "loss": 0.7544, + "step": 3904 + }, + { + "epoch": 0.06747649986176389, + "grad_norm": 1.2775434340298961, + "learning_rate": 1.9926482962382145e-05, + "loss": 0.8757, + "step": 3905 + }, + { + "epoch": 0.0674937793751728, + "grad_norm": 0.9536585994801693, + "learning_rate": 1.9926415209912905e-05, + "loss": 0.748, + "step": 3906 + }, + { + "epoch": 0.0675110588885817, + "grad_norm": 0.9611121192457573, + "learning_rate": 1.9926347426353378e-05, + "loss": 0.5808, + "step": 3907 + }, + { + "epoch": 0.0675283384019906, + "grad_norm": 1.5027930142052723, + "learning_rate": 1.9926279611703774e-05, + "loss": 0.8186, + "step": 3908 + }, + { + "epoch": 0.06754561791539951, + "grad_norm": 1.2092555270397982, + "learning_rate": 1.992621176596431e-05, + "loss": 0.8796, + "step": 3909 + }, + { + "epoch": 0.0675628974288084, + "grad_norm": 0.8653411341558144, + "learning_rate": 1.9926143889135194e-05, + "loss": 0.7032, + "step": 3910 + }, + { + "epoch": 0.06758017694221731, + "grad_norm": 1.1708808628248033, + "learning_rate": 1.9926075981216644e-05, + "loss": 0.8067, + "step": 3911 + }, + { + "epoch": 0.0675974564556262, + "grad_norm": 1.1677297253172976, + "learning_rate": 1.992600804220887e-05, + "loss": 0.7666, + "step": 3912 + }, + { + "epoch": 0.06761473596903511, + "grad_norm": 1.223633353124928, + "learning_rate": 1.9925940072112083e-05, + "loss": 0.7182, + "step": 3913 + }, + { + "epoch": 0.06763201548244402, + "grad_norm": 0.7997356747117288, + "learning_rate": 1.99258720709265e-05, + "loss": 0.4878, + "step": 3914 + }, + { + "epoch": 0.06764929499585291, + "grad_norm": 1.0142388545639587, + "learning_rate": 1.992580403865233e-05, + "loss": 0.7684, + "step": 3915 + }, + { + "epoch": 0.06766657450926182, + "grad_norm": 0.8706900628241719, + "learning_rate": 1.9925735975289785e-05, + "loss": 1.1307, + "step": 3916 + }, + { + "epoch": 0.06768385402267073, + "grad_norm": 1.4743581458690873, + "learning_rate": 1.992566788083908e-05, + "loss": 0.7548, + "step": 3917 + }, + { + "epoch": 0.06770113353607962, + "grad_norm": 1.1765118940733625, + "learning_rate": 1.992559975530043e-05, + "loss": 0.8778, + "step": 3918 + }, + { + "epoch": 0.06771841304948853, + "grad_norm": 1.2773889769578217, + "learning_rate": 1.992553159867405e-05, + "loss": 1.1157, + "step": 3919 + }, + { + "epoch": 0.06773569256289742, + "grad_norm": 1.1549523955203773, + "learning_rate": 1.992546341096015e-05, + "loss": 0.9905, + "step": 3920 + }, + { + "epoch": 0.06775297207630633, + "grad_norm": 1.0256547315218185, + "learning_rate": 1.992539519215894e-05, + "loss": 0.7605, + "step": 3921 + }, + { + "epoch": 0.06777025158971524, + "grad_norm": 1.1656633496155446, + "learning_rate": 1.9925326942270643e-05, + "loss": 0.6944, + "step": 3922 + }, + { + "epoch": 0.06778753110312413, + "grad_norm": 1.221621503754436, + "learning_rate": 1.9925258661295467e-05, + "loss": 0.6575, + "step": 3923 + }, + { + "epoch": 0.06780481061653304, + "grad_norm": 0.8529298540283955, + "learning_rate": 1.9925190349233622e-05, + "loss": 0.6558, + "step": 3924 + }, + { + "epoch": 0.06782209012994195, + "grad_norm": 0.95392791362705, + "learning_rate": 1.9925122006085332e-05, + "loss": 0.6727, + "step": 3925 + }, + { + "epoch": 0.06783936964335084, + "grad_norm": 0.9169055628165524, + "learning_rate": 1.9925053631850805e-05, + "loss": 0.7051, + "step": 3926 + }, + { + "epoch": 0.06785664915675975, + "grad_norm": 0.9541127889518336, + "learning_rate": 1.9924985226530253e-05, + "loss": 0.6028, + "step": 3927 + }, + { + "epoch": 0.06787392867016864, + "grad_norm": 1.0240198036308659, + "learning_rate": 1.9924916790123894e-05, + "loss": 0.8485, + "step": 3928 + }, + { + "epoch": 0.06789120818357755, + "grad_norm": 0.9504837549400496, + "learning_rate": 1.9924848322631943e-05, + "loss": 0.5971, + "step": 3929 + }, + { + "epoch": 0.06790848769698646, + "grad_norm": 0.8882558975317667, + "learning_rate": 1.9924779824054612e-05, + "loss": 0.5727, + "step": 3930 + }, + { + "epoch": 0.06792576721039535, + "grad_norm": 1.0465870216595892, + "learning_rate": 1.9924711294392114e-05, + "loss": 0.815, + "step": 3931 + }, + { + "epoch": 0.06794304672380426, + "grad_norm": 1.0820159494016106, + "learning_rate": 1.992464273364467e-05, + "loss": 0.8553, + "step": 3932 + }, + { + "epoch": 0.06796032623721315, + "grad_norm": 1.0910183261103552, + "learning_rate": 1.9924574141812487e-05, + "loss": 0.5574, + "step": 3933 + }, + { + "epoch": 0.06797760575062206, + "grad_norm": 1.007637960450426, + "learning_rate": 1.9924505518895784e-05, + "loss": 0.5919, + "step": 3934 + }, + { + "epoch": 0.06799488526403097, + "grad_norm": 1.0544159415940002, + "learning_rate": 1.9924436864894775e-05, + "loss": 0.7881, + "step": 3935 + }, + { + "epoch": 0.06801216477743986, + "grad_norm": 1.019423280316751, + "learning_rate": 1.992436817980968e-05, + "loss": 0.9612, + "step": 3936 + }, + { + "epoch": 0.06802944429084877, + "grad_norm": 1.0144515517357842, + "learning_rate": 1.9924299463640704e-05, + "loss": 0.7406, + "step": 3937 + }, + { + "epoch": 0.06804672380425768, + "grad_norm": 0.5690331626039019, + "learning_rate": 1.992423071638807e-05, + "loss": 0.4767, + "step": 3938 + }, + { + "epoch": 0.06806400331766657, + "grad_norm": 0.9603873515194127, + "learning_rate": 1.9924161938051992e-05, + "loss": 0.811, + "step": 3939 + }, + { + "epoch": 0.06808128283107548, + "grad_norm": 1.0527274657290795, + "learning_rate": 1.9924093128632685e-05, + "loss": 0.8218, + "step": 3940 + }, + { + "epoch": 0.06809856234448437, + "grad_norm": 1.0668423022996412, + "learning_rate": 1.9924024288130366e-05, + "loss": 0.6209, + "step": 3941 + }, + { + "epoch": 0.06811584185789328, + "grad_norm": 0.7290881706532809, + "learning_rate": 1.9923955416545248e-05, + "loss": 0.7313, + "step": 3942 + }, + { + "epoch": 0.06813312137130219, + "grad_norm": 0.9508345605239764, + "learning_rate": 1.992388651387754e-05, + "loss": 0.7636, + "step": 3943 + }, + { + "epoch": 0.06815040088471108, + "grad_norm": 0.5783642772868656, + "learning_rate": 1.9923817580127472e-05, + "loss": 0.6519, + "step": 3944 + }, + { + "epoch": 0.06816768039811999, + "grad_norm": 1.1940540574631069, + "learning_rate": 1.9923748615295255e-05, + "loss": 1.0415, + "step": 3945 + }, + { + "epoch": 0.0681849599115289, + "grad_norm": 1.2678853110483972, + "learning_rate": 1.99236796193811e-05, + "loss": 0.7506, + "step": 3946 + }, + { + "epoch": 0.06820223942493779, + "grad_norm": 1.1310491729711345, + "learning_rate": 1.9923610592385225e-05, + "loss": 0.701, + "step": 3947 + }, + { + "epoch": 0.0682195189383467, + "grad_norm": 0.8214813907306789, + "learning_rate": 1.992354153430785e-05, + "loss": 0.7443, + "step": 3948 + }, + { + "epoch": 0.06823679845175559, + "grad_norm": 1.1278735696021438, + "learning_rate": 1.9923472445149187e-05, + "loss": 0.8529, + "step": 3949 + }, + { + "epoch": 0.0682540779651645, + "grad_norm": 0.9950395747748035, + "learning_rate": 1.9923403324909453e-05, + "loss": 0.6598, + "step": 3950 + }, + { + "epoch": 0.06827135747857341, + "grad_norm": 1.131056235667572, + "learning_rate": 1.9923334173588868e-05, + "loss": 0.7905, + "step": 3951 + }, + { + "epoch": 0.0682886369919823, + "grad_norm": 0.9044019317561592, + "learning_rate": 1.9923264991187646e-05, + "loss": 0.7107, + "step": 3952 + }, + { + "epoch": 0.06830591650539121, + "grad_norm": 0.9954328079151866, + "learning_rate": 1.9923195777706003e-05, + "loss": 0.6965, + "step": 3953 + }, + { + "epoch": 0.06832319601880012, + "grad_norm": 0.8925853495572309, + "learning_rate": 1.9923126533144157e-05, + "loss": 0.6832, + "step": 3954 + }, + { + "epoch": 0.06834047553220901, + "grad_norm": 0.8220385533082923, + "learning_rate": 1.9923057257502324e-05, + "loss": 0.5717, + "step": 3955 + }, + { + "epoch": 0.06835775504561792, + "grad_norm": 1.1049086121670089, + "learning_rate": 1.9922987950780724e-05, + "loss": 0.7721, + "step": 3956 + }, + { + "epoch": 0.06837503455902681, + "grad_norm": 0.5978013302965203, + "learning_rate": 1.9922918612979568e-05, + "loss": 0.7033, + "step": 3957 + }, + { + "epoch": 0.06839231407243572, + "grad_norm": 0.9444842655686801, + "learning_rate": 1.9922849244099082e-05, + "loss": 0.8423, + "step": 3958 + }, + { + "epoch": 0.06840959358584463, + "grad_norm": 0.9805008541147241, + "learning_rate": 1.9922779844139474e-05, + "loss": 0.5928, + "step": 3959 + }, + { + "epoch": 0.06842687309925352, + "grad_norm": 1.0816173262589401, + "learning_rate": 1.9922710413100966e-05, + "loss": 0.6561, + "step": 3960 + }, + { + "epoch": 0.06844415261266243, + "grad_norm": 0.989628941322505, + "learning_rate": 1.9922640950983772e-05, + "loss": 0.8863, + "step": 3961 + }, + { + "epoch": 0.06846143212607134, + "grad_norm": 1.0245032234690439, + "learning_rate": 1.9922571457788116e-05, + "loss": 0.7194, + "step": 3962 + }, + { + "epoch": 0.06847871163948023, + "grad_norm": 0.9965091918695749, + "learning_rate": 1.9922501933514215e-05, + "loss": 0.9092, + "step": 3963 + }, + { + "epoch": 0.06849599115288914, + "grad_norm": 0.8017214832853468, + "learning_rate": 1.992243237816228e-05, + "loss": 0.5593, + "step": 3964 + }, + { + "epoch": 0.06851327066629803, + "grad_norm": 1.0945952468444986, + "learning_rate": 1.992236279173253e-05, + "loss": 0.8542, + "step": 3965 + }, + { + "epoch": 0.06853055017970694, + "grad_norm": 0.9701378903446984, + "learning_rate": 1.9922293174225192e-05, + "loss": 0.7585, + "step": 3966 + }, + { + "epoch": 0.06854782969311585, + "grad_norm": 0.805290445068683, + "learning_rate": 1.9922223525640475e-05, + "loss": 0.4947, + "step": 3967 + }, + { + "epoch": 0.06856510920652474, + "grad_norm": 1.0513847118260131, + "learning_rate": 1.9922153845978596e-05, + "loss": 0.8818, + "step": 3968 + }, + { + "epoch": 0.06858238871993365, + "grad_norm": 1.0702966668054321, + "learning_rate": 1.9922084135239784e-05, + "loss": 0.8949, + "step": 3969 + }, + { + "epoch": 0.06859966823334254, + "grad_norm": 0.8684875418581849, + "learning_rate": 1.9922014393424244e-05, + "loss": 0.519, + "step": 3970 + }, + { + "epoch": 0.06861694774675145, + "grad_norm": 1.0901289948406023, + "learning_rate": 1.9921944620532207e-05, + "loss": 0.6752, + "step": 3971 + }, + { + "epoch": 0.06863422726016036, + "grad_norm": 0.905861674990799, + "learning_rate": 1.9921874816563883e-05, + "loss": 0.7178, + "step": 3972 + }, + { + "epoch": 0.06865150677356925, + "grad_norm": 0.9533353735600635, + "learning_rate": 1.992180498151949e-05, + "loss": 0.6473, + "step": 3973 + }, + { + "epoch": 0.06866878628697816, + "grad_norm": 1.1807231232921074, + "learning_rate": 1.9921735115399256e-05, + "loss": 0.7278, + "step": 3974 + }, + { + "epoch": 0.06868606580038707, + "grad_norm": 0.8211818107439108, + "learning_rate": 1.992166521820339e-05, + "loss": 0.532, + "step": 3975 + }, + { + "epoch": 0.06870334531379596, + "grad_norm": 1.111170051583364, + "learning_rate": 1.9921595289932112e-05, + "loss": 0.7842, + "step": 3976 + }, + { + "epoch": 0.06872062482720487, + "grad_norm": 0.8822386262196322, + "learning_rate": 1.9921525330585644e-05, + "loss": 0.6312, + "step": 3977 + }, + { + "epoch": 0.06873790434061376, + "grad_norm": 1.0545672970713673, + "learning_rate": 1.992145534016421e-05, + "loss": 0.7232, + "step": 3978 + }, + { + "epoch": 0.06875518385402267, + "grad_norm": 1.0355597426158776, + "learning_rate": 1.9921385318668016e-05, + "loss": 0.7194, + "step": 3979 + }, + { + "epoch": 0.06877246336743158, + "grad_norm": 1.0850060268477166, + "learning_rate": 1.9921315266097294e-05, + "loss": 0.803, + "step": 3980 + }, + { + "epoch": 0.06878974288084047, + "grad_norm": 0.9114092797366796, + "learning_rate": 1.992124518245226e-05, + "loss": 0.7482, + "step": 3981 + }, + { + "epoch": 0.06880702239424938, + "grad_norm": 0.9483172895350159, + "learning_rate": 1.992117506773313e-05, + "loss": 0.6836, + "step": 3982 + }, + { + "epoch": 0.06882430190765829, + "grad_norm": 0.9764367029386194, + "learning_rate": 1.9921104921940125e-05, + "loss": 0.7985, + "step": 3983 + }, + { + "epoch": 0.06884158142106718, + "grad_norm": 1.2883016777582639, + "learning_rate": 1.9921034745073468e-05, + "loss": 0.9395, + "step": 3984 + }, + { + "epoch": 0.06885886093447609, + "grad_norm": 0.8537183744245654, + "learning_rate": 1.9920964537133373e-05, + "loss": 0.5867, + "step": 3985 + }, + { + "epoch": 0.06887614044788498, + "grad_norm": 0.5499095606885005, + "learning_rate": 1.9920894298120064e-05, + "loss": 0.5011, + "step": 3986 + }, + { + "epoch": 0.06889341996129389, + "grad_norm": 0.9217012874506737, + "learning_rate": 1.992082402803376e-05, + "loss": 0.5514, + "step": 3987 + }, + { + "epoch": 0.0689106994747028, + "grad_norm": 1.0655997963943082, + "learning_rate": 1.9920753726874685e-05, + "loss": 1.1005, + "step": 3988 + }, + { + "epoch": 0.06892797898811169, + "grad_norm": 0.7548491163552401, + "learning_rate": 1.992068339464305e-05, + "loss": 0.5166, + "step": 3989 + }, + { + "epoch": 0.0689452585015206, + "grad_norm": 0.5735952084379878, + "learning_rate": 1.9920613031339083e-05, + "loss": 0.7458, + "step": 3990 + }, + { + "epoch": 0.0689625380149295, + "grad_norm": 1.0718863913662386, + "learning_rate": 1.9920542636963e-05, + "loss": 0.7676, + "step": 3991 + }, + { + "epoch": 0.0689798175283384, + "grad_norm": 1.184219228423289, + "learning_rate": 1.9920472211515027e-05, + "loss": 0.6978, + "step": 3992 + }, + { + "epoch": 0.06899709704174731, + "grad_norm": 1.8813358980254966, + "learning_rate": 1.992040175499538e-05, + "loss": 0.8423, + "step": 3993 + }, + { + "epoch": 0.0690143765551562, + "grad_norm": 1.013086161058538, + "learning_rate": 1.992033126740428e-05, + "loss": 0.527, + "step": 3994 + }, + { + "epoch": 0.06903165606856511, + "grad_norm": 1.02943831840969, + "learning_rate": 1.992026074874195e-05, + "loss": 0.8328, + "step": 3995 + }, + { + "epoch": 0.06904893558197402, + "grad_norm": 0.7465085648423665, + "learning_rate": 1.9920190199008608e-05, + "loss": 0.536, + "step": 3996 + }, + { + "epoch": 0.06906621509538291, + "grad_norm": 1.002884312005245, + "learning_rate": 1.9920119618204477e-05, + "loss": 0.6385, + "step": 3997 + }, + { + "epoch": 0.06908349460879182, + "grad_norm": 1.0293046367593324, + "learning_rate": 1.992004900632978e-05, + "loss": 0.7696, + "step": 3998 + }, + { + "epoch": 0.06910077412220073, + "grad_norm": 0.930514715398882, + "learning_rate": 1.991997836338473e-05, + "loss": 0.9112, + "step": 3999 + }, + { + "epoch": 0.06911805363560962, + "grad_norm": 0.7879180490181673, + "learning_rate": 1.9919907689369556e-05, + "loss": 0.5279, + "step": 4000 + }, + { + "epoch": 0.06913533314901853, + "grad_norm": 1.076095897176127, + "learning_rate": 1.9919836984284476e-05, + "loss": 0.7644, + "step": 4001 + }, + { + "epoch": 0.06915261266242742, + "grad_norm": 1.0824322392146146, + "learning_rate": 1.9919766248129715e-05, + "loss": 0.8047, + "step": 4002 + }, + { + "epoch": 0.06916989217583633, + "grad_norm": 1.137117360198488, + "learning_rate": 1.991969548090549e-05, + "loss": 0.9819, + "step": 4003 + }, + { + "epoch": 0.06918717168924524, + "grad_norm": 0.9427943347080829, + "learning_rate": 1.9919624682612028e-05, + "loss": 0.6893, + "step": 4004 + }, + { + "epoch": 0.06920445120265413, + "grad_norm": 0.9822013827478003, + "learning_rate": 1.9919553853249545e-05, + "loss": 0.9805, + "step": 4005 + }, + { + "epoch": 0.06922173071606304, + "grad_norm": 1.085247030349424, + "learning_rate": 1.991948299281827e-05, + "loss": 0.6293, + "step": 4006 + }, + { + "epoch": 0.06923901022947193, + "grad_norm": 0.9169254867542643, + "learning_rate": 1.9919412101318416e-05, + "loss": 0.5885, + "step": 4007 + }, + { + "epoch": 0.06925628974288084, + "grad_norm": 1.0895253408342824, + "learning_rate": 1.991934117875021e-05, + "loss": 0.6916, + "step": 4008 + }, + { + "epoch": 0.06927356925628975, + "grad_norm": 1.1725735238442858, + "learning_rate": 1.9919270225113875e-05, + "loss": 0.8564, + "step": 4009 + }, + { + "epoch": 0.06929084876969864, + "grad_norm": 0.9852088554963968, + "learning_rate": 1.9919199240409632e-05, + "loss": 0.6762, + "step": 4010 + }, + { + "epoch": 0.06930812828310755, + "grad_norm": 0.8696088778510508, + "learning_rate": 1.9919128224637702e-05, + "loss": 0.5018, + "step": 4011 + }, + { + "epoch": 0.06932540779651646, + "grad_norm": 1.0690770282819184, + "learning_rate": 1.991905717779831e-05, + "loss": 0.8068, + "step": 4012 + }, + { + "epoch": 0.06934268730992535, + "grad_norm": 1.0150893921585946, + "learning_rate": 1.9918986099891675e-05, + "loss": 0.6764, + "step": 4013 + }, + { + "epoch": 0.06935996682333426, + "grad_norm": 1.0793091566452977, + "learning_rate": 1.9918914990918022e-05, + "loss": 0.7447, + "step": 4014 + }, + { + "epoch": 0.06937724633674315, + "grad_norm": 0.8859821626485919, + "learning_rate": 1.9918843850877576e-05, + "loss": 0.6302, + "step": 4015 + }, + { + "epoch": 0.06939452585015206, + "grad_norm": 0.8959183048828502, + "learning_rate": 1.9918772679770554e-05, + "loss": 0.6919, + "step": 4016 + }, + { + "epoch": 0.06941180536356097, + "grad_norm": 1.1476975527704771, + "learning_rate": 1.9918701477597185e-05, + "loss": 0.8298, + "step": 4017 + }, + { + "epoch": 0.06942908487696986, + "grad_norm": 0.9799118273256653, + "learning_rate": 1.9918630244357687e-05, + "loss": 0.6971, + "step": 4018 + }, + { + "epoch": 0.06944636439037877, + "grad_norm": 1.11059809462754, + "learning_rate": 1.9918558980052287e-05, + "loss": 0.5803, + "step": 4019 + }, + { + "epoch": 0.06946364390378768, + "grad_norm": 0.9456265124543967, + "learning_rate": 1.9918487684681204e-05, + "loss": 0.6226, + "step": 4020 + }, + { + "epoch": 0.06948092341719657, + "grad_norm": 1.015392944206266, + "learning_rate": 1.9918416358244666e-05, + "loss": 0.7784, + "step": 4021 + }, + { + "epoch": 0.06949820293060548, + "grad_norm": 0.7613015632889762, + "learning_rate": 1.991834500074289e-05, + "loss": 0.5603, + "step": 4022 + }, + { + "epoch": 0.06951548244401437, + "grad_norm": 0.9727769333042044, + "learning_rate": 1.991827361217611e-05, + "loss": 0.7776, + "step": 4023 + }, + { + "epoch": 0.06953276195742328, + "grad_norm": 1.180406074944468, + "learning_rate": 1.991820219254454e-05, + "loss": 0.7489, + "step": 4024 + }, + { + "epoch": 0.06955004147083219, + "grad_norm": 1.151837430973861, + "learning_rate": 1.9918130741848406e-05, + "loss": 0.8951, + "step": 4025 + }, + { + "epoch": 0.06956732098424108, + "grad_norm": 0.9862566634885447, + "learning_rate": 1.9918059260087937e-05, + "loss": 0.6425, + "step": 4026 + }, + { + "epoch": 0.06958460049764999, + "grad_norm": 1.18252500305073, + "learning_rate": 1.991798774726335e-05, + "loss": 0.83, + "step": 4027 + }, + { + "epoch": 0.0696018800110589, + "grad_norm": 0.7116577651833071, + "learning_rate": 1.991791620337487e-05, + "loss": 0.4336, + "step": 4028 + }, + { + "epoch": 0.06961915952446779, + "grad_norm": 1.2461710585925185, + "learning_rate": 1.9917844628422723e-05, + "loss": 0.8646, + "step": 4029 + }, + { + "epoch": 0.0696364390378767, + "grad_norm": 1.2070371963745765, + "learning_rate": 1.9917773022407136e-05, + "loss": 0.967, + "step": 4030 + }, + { + "epoch": 0.06965371855128559, + "grad_norm": 0.7275938168652977, + "learning_rate": 1.991770138532833e-05, + "loss": 0.6808, + "step": 4031 + }, + { + "epoch": 0.0696709980646945, + "grad_norm": 1.147870740784952, + "learning_rate": 1.9917629717186523e-05, + "loss": 0.6432, + "step": 4032 + }, + { + "epoch": 0.0696882775781034, + "grad_norm": 1.0711031439476928, + "learning_rate": 1.9917558017981953e-05, + "loss": 0.7531, + "step": 4033 + }, + { + "epoch": 0.0697055570915123, + "grad_norm": 0.8022993922933922, + "learning_rate": 1.9917486287714834e-05, + "loss": 0.8535, + "step": 4034 + }, + { + "epoch": 0.0697228366049212, + "grad_norm": 1.450553600116858, + "learning_rate": 1.9917414526385398e-05, + "loss": 0.8731, + "step": 4035 + }, + { + "epoch": 0.06974011611833011, + "grad_norm": 0.993229603442437, + "learning_rate": 1.991734273399386e-05, + "loss": 0.6647, + "step": 4036 + }, + { + "epoch": 0.06975739563173901, + "grad_norm": 1.0177346161513678, + "learning_rate": 1.9917270910540457e-05, + "loss": 0.6543, + "step": 4037 + }, + { + "epoch": 0.06977467514514792, + "grad_norm": 1.0300589339559376, + "learning_rate": 1.9917199056025406e-05, + "loss": 0.7044, + "step": 4038 + }, + { + "epoch": 0.06979195465855681, + "grad_norm": 0.9177675758619175, + "learning_rate": 1.991712717044893e-05, + "loss": 0.7166, + "step": 4039 + }, + { + "epoch": 0.06980923417196572, + "grad_norm": 1.242383200460179, + "learning_rate": 1.9917055253811264e-05, + "loss": 0.9665, + "step": 4040 + }, + { + "epoch": 0.06982651368537462, + "grad_norm": 0.9576121124357406, + "learning_rate": 1.9916983306112624e-05, + "loss": 0.7021, + "step": 4041 + }, + { + "epoch": 0.06984379319878352, + "grad_norm": 0.9354720302916295, + "learning_rate": 1.991691132735324e-05, + "loss": 0.6612, + "step": 4042 + }, + { + "epoch": 0.06986107271219243, + "grad_norm": 1.1884155160423615, + "learning_rate": 1.9916839317533335e-05, + "loss": 0.8771, + "step": 4043 + }, + { + "epoch": 0.06987835222560133, + "grad_norm": 0.986584178227071, + "learning_rate": 1.9916767276653136e-05, + "loss": 0.6033, + "step": 4044 + }, + { + "epoch": 0.06989563173901023, + "grad_norm": 0.9272078382969766, + "learning_rate": 1.9916695204712866e-05, + "loss": 0.7429, + "step": 4045 + }, + { + "epoch": 0.06991291125241914, + "grad_norm": 0.5656955571527297, + "learning_rate": 1.9916623101712754e-05, + "loss": 0.5065, + "step": 4046 + }, + { + "epoch": 0.06993019076582803, + "grad_norm": 0.9972721070770859, + "learning_rate": 1.991655096765303e-05, + "loss": 0.7015, + "step": 4047 + }, + { + "epoch": 0.06994747027923694, + "grad_norm": 0.9903870689413146, + "learning_rate": 1.9916478802533907e-05, + "loss": 0.6861, + "step": 4048 + }, + { + "epoch": 0.06996474979264584, + "grad_norm": 1.2667552847503467, + "learning_rate": 1.9916406606355624e-05, + "loss": 0.9374, + "step": 4049 + }, + { + "epoch": 0.06998202930605474, + "grad_norm": 0.9654846131157979, + "learning_rate": 1.9916334379118402e-05, + "loss": 0.6941, + "step": 4050 + }, + { + "epoch": 0.06999930881946365, + "grad_norm": 0.9906547223332401, + "learning_rate": 1.9916262120822464e-05, + "loss": 0.6066, + "step": 4051 + }, + { + "epoch": 0.07001658833287254, + "grad_norm": 1.2533896019437887, + "learning_rate": 1.991618983146804e-05, + "loss": 0.8316, + "step": 4052 + }, + { + "epoch": 0.07003386784628145, + "grad_norm": 1.0604436472674847, + "learning_rate": 1.9916117511055354e-05, + "loss": 0.7592, + "step": 4053 + }, + { + "epoch": 0.07005114735969035, + "grad_norm": 1.3562619136110992, + "learning_rate": 1.991604515958464e-05, + "loss": 0.9537, + "step": 4054 + }, + { + "epoch": 0.07006842687309925, + "grad_norm": 1.2776443228727128, + "learning_rate": 1.9915972777056117e-05, + "loss": 0.8676, + "step": 4055 + }, + { + "epoch": 0.07008570638650816, + "grad_norm": 1.1495863977397074, + "learning_rate": 1.991590036347001e-05, + "loss": 0.7759, + "step": 4056 + }, + { + "epoch": 0.07010298589991706, + "grad_norm": 0.9702206450953854, + "learning_rate": 1.9915827918826552e-05, + "loss": 0.7053, + "step": 4057 + }, + { + "epoch": 0.07012026541332596, + "grad_norm": 0.7503979712046694, + "learning_rate": 1.9915755443125968e-05, + "loss": 0.657, + "step": 4058 + }, + { + "epoch": 0.07013754492673487, + "grad_norm": 1.1459149198000982, + "learning_rate": 1.9915682936368483e-05, + "loss": 0.7165, + "step": 4059 + }, + { + "epoch": 0.07015482444014376, + "grad_norm": 1.11141886924509, + "learning_rate": 1.9915610398554328e-05, + "loss": 0.7945, + "step": 4060 + }, + { + "epoch": 0.07017210395355267, + "grad_norm": 1.0104378898899136, + "learning_rate": 1.9915537829683728e-05, + "loss": 0.564, + "step": 4061 + }, + { + "epoch": 0.07018938346696157, + "grad_norm": 0.9174099378812003, + "learning_rate": 1.9915465229756905e-05, + "loss": 0.8306, + "step": 4062 + }, + { + "epoch": 0.07020666298037047, + "grad_norm": 0.5596076687202939, + "learning_rate": 1.9915392598774096e-05, + "loss": 0.6092, + "step": 4063 + }, + { + "epoch": 0.07022394249377938, + "grad_norm": 1.1594224233897408, + "learning_rate": 1.9915319936735523e-05, + "loss": 0.8457, + "step": 4064 + }, + { + "epoch": 0.07024122200718828, + "grad_norm": 1.0433505231969924, + "learning_rate": 1.991524724364141e-05, + "loss": 0.8764, + "step": 4065 + }, + { + "epoch": 0.07025850152059718, + "grad_norm": 1.0011901401765193, + "learning_rate": 1.9915174519491996e-05, + "loss": 0.6883, + "step": 4066 + }, + { + "epoch": 0.07027578103400609, + "grad_norm": 1.2768079044549279, + "learning_rate": 1.9915101764287496e-05, + "loss": 0.9844, + "step": 4067 + }, + { + "epoch": 0.07029306054741498, + "grad_norm": 1.304592626700553, + "learning_rate": 1.9915028978028146e-05, + "loss": 0.9789, + "step": 4068 + }, + { + "epoch": 0.07031034006082389, + "grad_norm": 1.1630218660924652, + "learning_rate": 1.9914956160714173e-05, + "loss": 0.6907, + "step": 4069 + }, + { + "epoch": 0.0703276195742328, + "grad_norm": 1.0616615651927042, + "learning_rate": 1.9914883312345803e-05, + "loss": 0.7387, + "step": 4070 + }, + { + "epoch": 0.07034489908764169, + "grad_norm": 0.923179638345293, + "learning_rate": 1.9914810432923264e-05, + "loss": 0.7602, + "step": 4071 + }, + { + "epoch": 0.0703621786010506, + "grad_norm": 0.9752255502680701, + "learning_rate": 1.9914737522446787e-05, + "loss": 0.7895, + "step": 4072 + }, + { + "epoch": 0.0703794581144595, + "grad_norm": 1.0192731453567594, + "learning_rate": 1.9914664580916597e-05, + "loss": 0.6707, + "step": 4073 + }, + { + "epoch": 0.0703967376278684, + "grad_norm": 1.1006638918723297, + "learning_rate": 1.9914591608332926e-05, + "loss": 0.8165, + "step": 4074 + }, + { + "epoch": 0.0704140171412773, + "grad_norm": 0.8517665396014844, + "learning_rate": 1.9914518604695997e-05, + "loss": 0.6415, + "step": 4075 + }, + { + "epoch": 0.0704312966546862, + "grad_norm": 1.0381423284073332, + "learning_rate": 1.9914445570006045e-05, + "loss": 0.5855, + "step": 4076 + }, + { + "epoch": 0.0704485761680951, + "grad_norm": 1.138544877591372, + "learning_rate": 1.9914372504263295e-05, + "loss": 0.7101, + "step": 4077 + }, + { + "epoch": 0.07046585568150401, + "grad_norm": 0.9392787866776461, + "learning_rate": 1.9914299407467975e-05, + "loss": 0.8093, + "step": 4078 + }, + { + "epoch": 0.07048313519491291, + "grad_norm": 0.996078006668974, + "learning_rate": 1.9914226279620322e-05, + "loss": 0.7457, + "step": 4079 + }, + { + "epoch": 0.07050041470832182, + "grad_norm": 0.9526779203404754, + "learning_rate": 1.9914153120720553e-05, + "loss": 0.7099, + "step": 4080 + }, + { + "epoch": 0.07051769422173072, + "grad_norm": 0.9823869549023729, + "learning_rate": 1.9914079930768905e-05, + "loss": 0.6109, + "step": 4081 + }, + { + "epoch": 0.07053497373513962, + "grad_norm": 1.023130959688244, + "learning_rate": 1.9914006709765606e-05, + "loss": 0.743, + "step": 4082 + }, + { + "epoch": 0.07055225324854852, + "grad_norm": 0.8664256578061144, + "learning_rate": 1.9913933457710885e-05, + "loss": 0.7116, + "step": 4083 + }, + { + "epoch": 0.07056953276195742, + "grad_norm": 1.1738855718376298, + "learning_rate": 1.991386017460497e-05, + "loss": 0.7255, + "step": 4084 + }, + { + "epoch": 0.07058681227536633, + "grad_norm": 1.319672569728299, + "learning_rate": 1.991378686044809e-05, + "loss": 0.9441, + "step": 4085 + }, + { + "epoch": 0.07060409178877523, + "grad_norm": 1.2281654973247116, + "learning_rate": 1.9913713515240476e-05, + "loss": 0.7217, + "step": 4086 + }, + { + "epoch": 0.07062137130218413, + "grad_norm": 0.8384589289086128, + "learning_rate": 1.991364013898236e-05, + "loss": 0.7032, + "step": 4087 + }, + { + "epoch": 0.07063865081559303, + "grad_norm": 1.061995218316226, + "learning_rate": 1.9913566731673967e-05, + "loss": 0.6852, + "step": 4088 + }, + { + "epoch": 0.07065593032900193, + "grad_norm": 1.1434571062859227, + "learning_rate": 1.9913493293315532e-05, + "loss": 0.727, + "step": 4089 + }, + { + "epoch": 0.07067320984241084, + "grad_norm": 1.089737372057297, + "learning_rate": 1.9913419823907284e-05, + "loss": 0.6775, + "step": 4090 + }, + { + "epoch": 0.07069048935581974, + "grad_norm": 0.6783449824421602, + "learning_rate": 1.991334632344945e-05, + "loss": 0.8125, + "step": 4091 + }, + { + "epoch": 0.07070776886922864, + "grad_norm": 1.1106714381772442, + "learning_rate": 1.991327279194226e-05, + "loss": 0.4668, + "step": 4092 + }, + { + "epoch": 0.07072504838263755, + "grad_norm": 1.1350311078187292, + "learning_rate": 1.991319922938595e-05, + "loss": 0.7294, + "step": 4093 + }, + { + "epoch": 0.07074232789604645, + "grad_norm": 1.4201189172066218, + "learning_rate": 1.9913125635780744e-05, + "loss": 0.7708, + "step": 4094 + }, + { + "epoch": 0.07075960740945535, + "grad_norm": 1.176211605394663, + "learning_rate": 1.9913052011126875e-05, + "loss": 0.8219, + "step": 4095 + }, + { + "epoch": 0.07077688692286425, + "grad_norm": 1.4307886000569838, + "learning_rate": 1.9912978355424576e-05, + "loss": 0.8072, + "step": 4096 + }, + { + "epoch": 0.07079416643627315, + "grad_norm": 1.0145748556206273, + "learning_rate": 1.9912904668674075e-05, + "loss": 0.6046, + "step": 4097 + }, + { + "epoch": 0.07081144594968206, + "grad_norm": 1.127201876854311, + "learning_rate": 1.9912830950875597e-05, + "loss": 0.7688, + "step": 4098 + }, + { + "epoch": 0.07082872546309096, + "grad_norm": 1.3324478339435903, + "learning_rate": 1.991275720202939e-05, + "loss": 0.8912, + "step": 4099 + }, + { + "epoch": 0.07084600497649986, + "grad_norm": 1.0739030286283595, + "learning_rate": 1.9912683422135664e-05, + "loss": 0.8342, + "step": 4100 + }, + { + "epoch": 0.07086328448990876, + "grad_norm": 0.9270761282612053, + "learning_rate": 1.9912609611194666e-05, + "loss": 0.6419, + "step": 4101 + }, + { + "epoch": 0.07088056400331767, + "grad_norm": 1.4133418769338464, + "learning_rate": 1.991253576920662e-05, + "loss": 0.7947, + "step": 4102 + }, + { + "epoch": 0.07089784351672657, + "grad_norm": 0.8558496210287577, + "learning_rate": 1.9912461896171757e-05, + "loss": 0.6327, + "step": 4103 + }, + { + "epoch": 0.07091512303013547, + "grad_norm": 1.2301067145112126, + "learning_rate": 1.991238799209031e-05, + "loss": 0.8497, + "step": 4104 + }, + { + "epoch": 0.07093240254354437, + "grad_norm": 1.0784102759472713, + "learning_rate": 1.9912314056962512e-05, + "loss": 0.687, + "step": 4105 + }, + { + "epoch": 0.07094968205695328, + "grad_norm": 1.1032802238356096, + "learning_rate": 1.9912240090788595e-05, + "loss": 0.7435, + "step": 4106 + }, + { + "epoch": 0.07096696157036218, + "grad_norm": 1.0240055747598695, + "learning_rate": 1.9912166093568788e-05, + "loss": 0.7678, + "step": 4107 + }, + { + "epoch": 0.07098424108377108, + "grad_norm": 0.9193173610214777, + "learning_rate": 1.991209206530332e-05, + "loss": 0.6068, + "step": 4108 + }, + { + "epoch": 0.07100152059717998, + "grad_norm": 1.1242526819501584, + "learning_rate": 1.991201800599243e-05, + "loss": 0.7351, + "step": 4109 + }, + { + "epoch": 0.07101880011058889, + "grad_norm": 1.3726957541269809, + "learning_rate": 1.9911943915636347e-05, + "loss": 0.8357, + "step": 4110 + }, + { + "epoch": 0.07103607962399779, + "grad_norm": 1.4616096062927526, + "learning_rate": 1.99118697942353e-05, + "loss": 0.9355, + "step": 4111 + }, + { + "epoch": 0.0710533591374067, + "grad_norm": 1.012124036744433, + "learning_rate": 1.9911795641789523e-05, + "loss": 0.7448, + "step": 4112 + }, + { + "epoch": 0.07107063865081559, + "grad_norm": 0.9008649637737347, + "learning_rate": 1.991172145829925e-05, + "loss": 0.6609, + "step": 4113 + }, + { + "epoch": 0.0710879181642245, + "grad_norm": 0.5818328684193289, + "learning_rate": 1.991164724376471e-05, + "loss": 0.5535, + "step": 4114 + }, + { + "epoch": 0.0711051976776334, + "grad_norm": 1.2699154206896206, + "learning_rate": 1.991157299818614e-05, + "loss": 0.9918, + "step": 4115 + }, + { + "epoch": 0.0711224771910423, + "grad_norm": 1.0356744194265803, + "learning_rate": 1.991149872156377e-05, + "loss": 0.7385, + "step": 4116 + }, + { + "epoch": 0.0711397567044512, + "grad_norm": 0.9799358452350362, + "learning_rate": 1.9911424413897835e-05, + "loss": 0.4715, + "step": 4117 + }, + { + "epoch": 0.07115703621786011, + "grad_norm": 1.0247186493662257, + "learning_rate": 1.991135007518856e-05, + "loss": 0.9869, + "step": 4118 + }, + { + "epoch": 0.071174315731269, + "grad_norm": 1.057136628994042, + "learning_rate": 1.9911275705436186e-05, + "loss": 0.8726, + "step": 4119 + }, + { + "epoch": 0.07119159524467791, + "grad_norm": 0.8783151958712567, + "learning_rate": 1.9911201304640942e-05, + "loss": 0.6008, + "step": 4120 + }, + { + "epoch": 0.07120887475808681, + "grad_norm": 1.2448749094231528, + "learning_rate": 1.9911126872803065e-05, + "loss": 0.9247, + "step": 4121 + }, + { + "epoch": 0.07122615427149571, + "grad_norm": 0.6133039491950416, + "learning_rate": 1.9911052409922783e-05, + "loss": 0.4603, + "step": 4122 + }, + { + "epoch": 0.07124343378490462, + "grad_norm": 1.1759024730719394, + "learning_rate": 1.9910977916000335e-05, + "loss": 0.7383, + "step": 4123 + }, + { + "epoch": 0.07126071329831352, + "grad_norm": 1.054568882150706, + "learning_rate": 1.991090339103595e-05, + "loss": 0.5581, + "step": 4124 + }, + { + "epoch": 0.07127799281172242, + "grad_norm": 1.0347055005378336, + "learning_rate": 1.991082883502986e-05, + "loss": 0.6885, + "step": 4125 + }, + { + "epoch": 0.07129527232513132, + "grad_norm": 0.9811988606728681, + "learning_rate": 1.99107542479823e-05, + "loss": 0.6566, + "step": 4126 + }, + { + "epoch": 0.07131255183854023, + "grad_norm": 0.940196911397967, + "learning_rate": 1.9910679629893508e-05, + "loss": 0.6514, + "step": 4127 + }, + { + "epoch": 0.07132983135194913, + "grad_norm": 1.054186880682362, + "learning_rate": 1.991060498076371e-05, + "loss": 0.7523, + "step": 4128 + }, + { + "epoch": 0.07134711086535803, + "grad_norm": 1.1824131898359485, + "learning_rate": 1.991053030059315e-05, + "loss": 0.8022, + "step": 4129 + }, + { + "epoch": 0.07136439037876693, + "grad_norm": 0.7981314946285306, + "learning_rate": 1.991045558938205e-05, + "loss": 0.6494, + "step": 4130 + }, + { + "epoch": 0.07138166989217584, + "grad_norm": 1.1672867063491177, + "learning_rate": 1.991038084713065e-05, + "loss": 0.8163, + "step": 4131 + }, + { + "epoch": 0.07139894940558474, + "grad_norm": 1.2656111653510111, + "learning_rate": 1.991030607383919e-05, + "loss": 0.867, + "step": 4132 + }, + { + "epoch": 0.07141622891899364, + "grad_norm": 1.185224726193023, + "learning_rate": 1.9910231269507892e-05, + "loss": 0.8251, + "step": 4133 + }, + { + "epoch": 0.07143350843240254, + "grad_norm": 1.0978967337725112, + "learning_rate": 1.9910156434137004e-05, + "loss": 0.756, + "step": 4134 + }, + { + "epoch": 0.07145078794581144, + "grad_norm": 1.1752552768981857, + "learning_rate": 1.9910081567726746e-05, + "loss": 0.8611, + "step": 4135 + }, + { + "epoch": 0.07146806745922035, + "grad_norm": 1.163269537069197, + "learning_rate": 1.991000667027736e-05, + "loss": 0.8526, + "step": 4136 + }, + { + "epoch": 0.07148534697262925, + "grad_norm": 0.8974246007821468, + "learning_rate": 1.9909931741789085e-05, + "loss": 0.8106, + "step": 4137 + }, + { + "epoch": 0.07150262648603815, + "grad_norm": 1.3138343089467728, + "learning_rate": 1.9909856782262147e-05, + "loss": 0.8501, + "step": 4138 + }, + { + "epoch": 0.07151990599944706, + "grad_norm": 0.9853828023598433, + "learning_rate": 1.9909781791696785e-05, + "loss": 0.6412, + "step": 4139 + }, + { + "epoch": 0.07153718551285596, + "grad_norm": 1.0095388438524717, + "learning_rate": 1.9909706770093235e-05, + "loss": 0.6372, + "step": 4140 + }, + { + "epoch": 0.07155446502626486, + "grad_norm": 1.0223079596915279, + "learning_rate": 1.9909631717451724e-05, + "loss": 0.6168, + "step": 4141 + }, + { + "epoch": 0.07157174453967376, + "grad_norm": 1.2323901632999041, + "learning_rate": 1.9909556633772497e-05, + "loss": 0.6198, + "step": 4142 + }, + { + "epoch": 0.07158902405308266, + "grad_norm": 1.126572656495869, + "learning_rate": 1.9909481519055787e-05, + "loss": 0.882, + "step": 4143 + }, + { + "epoch": 0.07160630356649157, + "grad_norm": 1.0016942996335478, + "learning_rate": 1.990940637330183e-05, + "loss": 0.8557, + "step": 4144 + }, + { + "epoch": 0.07162358307990047, + "grad_norm": 0.9838582560842595, + "learning_rate": 1.9909331196510852e-05, + "loss": 0.8576, + "step": 4145 + }, + { + "epoch": 0.07164086259330937, + "grad_norm": 1.2516494063974588, + "learning_rate": 1.99092559886831e-05, + "loss": 0.9773, + "step": 4146 + }, + { + "epoch": 0.07165814210671828, + "grad_norm": 0.8882702275550859, + "learning_rate": 1.9909180749818808e-05, + "loss": 0.5116, + "step": 4147 + }, + { + "epoch": 0.07167542162012717, + "grad_norm": 1.210965135601838, + "learning_rate": 1.9909105479918205e-05, + "loss": 0.7417, + "step": 4148 + }, + { + "epoch": 0.07169270113353608, + "grad_norm": 1.2677528735378787, + "learning_rate": 1.990903017898153e-05, + "loss": 0.7518, + "step": 4149 + }, + { + "epoch": 0.07170998064694498, + "grad_norm": 1.3836776808434936, + "learning_rate": 1.990895484700902e-05, + "loss": 0.6955, + "step": 4150 + }, + { + "epoch": 0.07172726016035388, + "grad_norm": 0.9558775026423902, + "learning_rate": 1.9908879484000912e-05, + "loss": 0.6741, + "step": 4151 + }, + { + "epoch": 0.07174453967376279, + "grad_norm": 1.064773257933771, + "learning_rate": 1.990880408995744e-05, + "loss": 0.9351, + "step": 4152 + }, + { + "epoch": 0.07176181918717169, + "grad_norm": 0.8299201004840986, + "learning_rate": 1.990872866487884e-05, + "loss": 0.6711, + "step": 4153 + }, + { + "epoch": 0.07177909870058059, + "grad_norm": 1.0429384093683418, + "learning_rate": 1.9908653208765347e-05, + "loss": 0.6005, + "step": 4154 + }, + { + "epoch": 0.0717963782139895, + "grad_norm": 1.254091292705033, + "learning_rate": 1.9908577721617202e-05, + "loss": 0.6037, + "step": 4155 + }, + { + "epoch": 0.0718136577273984, + "grad_norm": 0.9611336490879498, + "learning_rate": 1.9908502203434637e-05, + "loss": 0.7247, + "step": 4156 + }, + { + "epoch": 0.0718309372408073, + "grad_norm": 1.1197900610897638, + "learning_rate": 1.9908426654217893e-05, + "loss": 0.7547, + "step": 4157 + }, + { + "epoch": 0.0718482167542162, + "grad_norm": 1.0556271337219436, + "learning_rate": 1.99083510739672e-05, + "loss": 0.7112, + "step": 4158 + }, + { + "epoch": 0.0718654962676251, + "grad_norm": 0.576349318965596, + "learning_rate": 1.99082754626828e-05, + "loss": 0.6666, + "step": 4159 + }, + { + "epoch": 0.07188277578103401, + "grad_norm": 1.1657110077250294, + "learning_rate": 1.9908199820364926e-05, + "loss": 0.8253, + "step": 4160 + }, + { + "epoch": 0.0719000552944429, + "grad_norm": 1.2803902568871348, + "learning_rate": 1.990812414701382e-05, + "loss": 0.8188, + "step": 4161 + }, + { + "epoch": 0.07191733480785181, + "grad_norm": 0.7189441044883872, + "learning_rate": 1.9908048442629716e-05, + "loss": 0.688, + "step": 4162 + }, + { + "epoch": 0.0719346143212607, + "grad_norm": 1.1052204439973279, + "learning_rate": 1.990797270721285e-05, + "loss": 0.6758, + "step": 4163 + }, + { + "epoch": 0.07195189383466961, + "grad_norm": 0.9239081871898714, + "learning_rate": 1.990789694076346e-05, + "loss": 0.7484, + "step": 4164 + }, + { + "epoch": 0.07196917334807852, + "grad_norm": 1.3050295940917827, + "learning_rate": 1.9907821143281786e-05, + "loss": 0.9831, + "step": 4165 + }, + { + "epoch": 0.07198645286148742, + "grad_norm": 0.9930275140525618, + "learning_rate": 1.9907745314768064e-05, + "loss": 0.9218, + "step": 4166 + }, + { + "epoch": 0.07200373237489632, + "grad_norm": 0.8435904031664938, + "learning_rate": 1.9907669455222527e-05, + "loss": 0.734, + "step": 4167 + }, + { + "epoch": 0.07202101188830523, + "grad_norm": 0.8989539077887376, + "learning_rate": 1.9907593564645417e-05, + "loss": 0.926, + "step": 4168 + }, + { + "epoch": 0.07203829140171412, + "grad_norm": 0.8745376146684076, + "learning_rate": 1.9907517643036973e-05, + "loss": 0.5483, + "step": 4169 + }, + { + "epoch": 0.07205557091512303, + "grad_norm": 1.1215560138654055, + "learning_rate": 1.990744169039743e-05, + "loss": 0.8905, + "step": 4170 + }, + { + "epoch": 0.07207285042853193, + "grad_norm": 1.072063529819442, + "learning_rate": 1.9907365706727028e-05, + "loss": 0.6414, + "step": 4171 + }, + { + "epoch": 0.07209012994194083, + "grad_norm": 1.080579009696351, + "learning_rate": 1.9907289692026e-05, + "loss": 0.697, + "step": 4172 + }, + { + "epoch": 0.07210740945534974, + "grad_norm": 1.0655137624818678, + "learning_rate": 1.990721364629459e-05, + "loss": 0.8326, + "step": 4173 + }, + { + "epoch": 0.07212468896875864, + "grad_norm": 1.3201454963534083, + "learning_rate": 1.9907137569533033e-05, + "loss": 1.0092, + "step": 4174 + }, + { + "epoch": 0.07214196848216754, + "grad_norm": 1.1473837026463707, + "learning_rate": 1.990706146174157e-05, + "loss": 1.0187, + "step": 4175 + }, + { + "epoch": 0.07215924799557645, + "grad_norm": 0.9345238886272593, + "learning_rate": 1.9906985322920435e-05, + "loss": 0.8186, + "step": 4176 + }, + { + "epoch": 0.07217652750898534, + "grad_norm": 0.8543470254862016, + "learning_rate": 1.9906909153069874e-05, + "loss": 0.8197, + "step": 4177 + }, + { + "epoch": 0.07219380702239425, + "grad_norm": 1.0239507995310166, + "learning_rate": 1.9906832952190117e-05, + "loss": 0.6762, + "step": 4178 + }, + { + "epoch": 0.07221108653580315, + "grad_norm": 1.1665878909970715, + "learning_rate": 1.9906756720281406e-05, + "loss": 0.5965, + "step": 4179 + }, + { + "epoch": 0.07222836604921205, + "grad_norm": 0.9217984373198295, + "learning_rate": 1.990668045734398e-05, + "loss": 0.688, + "step": 4180 + }, + { + "epoch": 0.07224564556262096, + "grad_norm": 0.9408710704374247, + "learning_rate": 1.990660416337808e-05, + "loss": 0.7219, + "step": 4181 + }, + { + "epoch": 0.07226292507602985, + "grad_norm": 1.1302240066977327, + "learning_rate": 1.9906527838383942e-05, + "loss": 0.7047, + "step": 4182 + }, + { + "epoch": 0.07228020458943876, + "grad_norm": 0.825883511835026, + "learning_rate": 1.9906451482361807e-05, + "loss": 0.7248, + "step": 4183 + }, + { + "epoch": 0.07229748410284767, + "grad_norm": 0.9867418410533361, + "learning_rate": 1.990637509531191e-05, + "loss": 0.7817, + "step": 4184 + }, + { + "epoch": 0.07231476361625656, + "grad_norm": 1.006863527500809, + "learning_rate": 1.9906298677234494e-05, + "loss": 0.5982, + "step": 4185 + }, + { + "epoch": 0.07233204312966547, + "grad_norm": 0.9016401385484221, + "learning_rate": 1.9906222228129797e-05, + "loss": 0.5469, + "step": 4186 + }, + { + "epoch": 0.07234932264307437, + "grad_norm": 0.9552288828031442, + "learning_rate": 1.9906145747998064e-05, + "loss": 0.6103, + "step": 4187 + }, + { + "epoch": 0.07236660215648327, + "grad_norm": 1.0074696631314224, + "learning_rate": 1.9906069236839527e-05, + "loss": 0.692, + "step": 4188 + }, + { + "epoch": 0.07238388166989218, + "grad_norm": 0.8467406636044736, + "learning_rate": 1.9905992694654424e-05, + "loss": 0.4037, + "step": 4189 + }, + { + "epoch": 0.07240116118330107, + "grad_norm": 0.9804863372843314, + "learning_rate": 1.9905916121443006e-05, + "loss": 0.6627, + "step": 4190 + }, + { + "epoch": 0.07241844069670998, + "grad_norm": 1.1726325242571871, + "learning_rate": 1.9905839517205502e-05, + "loss": 0.7062, + "step": 4191 + }, + { + "epoch": 0.07243572021011889, + "grad_norm": 1.2959788883563306, + "learning_rate": 1.9905762881942158e-05, + "loss": 0.8475, + "step": 4192 + }, + { + "epoch": 0.07245299972352778, + "grad_norm": 1.3882232582702234, + "learning_rate": 1.990568621565321e-05, + "loss": 0.987, + "step": 4193 + }, + { + "epoch": 0.07247027923693669, + "grad_norm": 0.5975229568302262, + "learning_rate": 1.99056095183389e-05, + "loss": 0.5995, + "step": 4194 + }, + { + "epoch": 0.07248755875034558, + "grad_norm": 1.1179166811889756, + "learning_rate": 1.990553278999947e-05, + "loss": 0.9907, + "step": 4195 + }, + { + "epoch": 0.07250483826375449, + "grad_norm": 1.2895902303752158, + "learning_rate": 1.9905456030635156e-05, + "loss": 0.5509, + "step": 4196 + }, + { + "epoch": 0.0725221177771634, + "grad_norm": 1.182397190424322, + "learning_rate": 1.9905379240246205e-05, + "loss": 0.6309, + "step": 4197 + }, + { + "epoch": 0.0725393972905723, + "grad_norm": 1.071761362033043, + "learning_rate": 1.990530241883285e-05, + "loss": 0.7458, + "step": 4198 + }, + { + "epoch": 0.0725566768039812, + "grad_norm": 0.8826220692813973, + "learning_rate": 1.990522556639533e-05, + "loss": 0.7981, + "step": 4199 + }, + { + "epoch": 0.07257395631739011, + "grad_norm": 0.9492731478315177, + "learning_rate": 1.9905148682933898e-05, + "loss": 0.6702, + "step": 4200 + }, + { + "epoch": 0.072591235830799, + "grad_norm": 1.1889983497693923, + "learning_rate": 1.9905071768448785e-05, + "loss": 0.9727, + "step": 4201 + }, + { + "epoch": 0.07260851534420791, + "grad_norm": 0.8924370238894723, + "learning_rate": 1.9904994822940237e-05, + "loss": 0.7486, + "step": 4202 + }, + { + "epoch": 0.0726257948576168, + "grad_norm": 1.2583845209514584, + "learning_rate": 1.990491784640849e-05, + "loss": 0.7965, + "step": 4203 + }, + { + "epoch": 0.07264307437102571, + "grad_norm": 0.9058807699217105, + "learning_rate": 1.990484083885379e-05, + "loss": 0.7032, + "step": 4204 + }, + { + "epoch": 0.07266035388443462, + "grad_norm": 0.9317017385594526, + "learning_rate": 1.990476380027637e-05, + "loss": 0.6779, + "step": 4205 + }, + { + "epoch": 0.07267763339784351, + "grad_norm": 1.197987250978082, + "learning_rate": 1.9904686730676483e-05, + "loss": 0.7041, + "step": 4206 + }, + { + "epoch": 0.07269491291125242, + "grad_norm": 1.2045237384681386, + "learning_rate": 1.9904609630054366e-05, + "loss": 0.903, + "step": 4207 + }, + { + "epoch": 0.07271219242466131, + "grad_norm": 1.0098054651901203, + "learning_rate": 1.9904532498410252e-05, + "loss": 0.7485, + "step": 4208 + }, + { + "epoch": 0.07272947193807022, + "grad_norm": 0.9892210059631075, + "learning_rate": 1.9904455335744395e-05, + "loss": 0.73, + "step": 4209 + }, + { + "epoch": 0.07274675145147913, + "grad_norm": 1.0548394357818502, + "learning_rate": 1.990437814205703e-05, + "loss": 0.7533, + "step": 4210 + }, + { + "epoch": 0.07276403096488802, + "grad_norm": 0.9273412528383821, + "learning_rate": 1.9904300917348397e-05, + "loss": 0.5353, + "step": 4211 + }, + { + "epoch": 0.07278131047829693, + "grad_norm": 0.6217773849077408, + "learning_rate": 1.9904223661618744e-05, + "loss": 0.6076, + "step": 4212 + }, + { + "epoch": 0.07279858999170584, + "grad_norm": 0.8045009971766944, + "learning_rate": 1.990414637486831e-05, + "loss": 0.8247, + "step": 4213 + }, + { + "epoch": 0.07281586950511473, + "grad_norm": 0.6412349934166851, + "learning_rate": 1.9904069057097332e-05, + "loss": 0.8968, + "step": 4214 + }, + { + "epoch": 0.07283314901852364, + "grad_norm": 1.0275025759015697, + "learning_rate": 1.9903991708306064e-05, + "loss": 0.7455, + "step": 4215 + }, + { + "epoch": 0.07285042853193253, + "grad_norm": 1.1129346734511345, + "learning_rate": 1.9903914328494735e-05, + "loss": 0.783, + "step": 4216 + }, + { + "epoch": 0.07286770804534144, + "grad_norm": 1.1705350905640088, + "learning_rate": 1.99038369176636e-05, + "loss": 0.7176, + "step": 4217 + }, + { + "epoch": 0.07288498755875035, + "grad_norm": 0.9342756839997611, + "learning_rate": 1.990375947581289e-05, + "loss": 0.7337, + "step": 4218 + }, + { + "epoch": 0.07290226707215924, + "grad_norm": 1.4346098729063037, + "learning_rate": 1.9903682002942855e-05, + "loss": 0.7679, + "step": 4219 + }, + { + "epoch": 0.07291954658556815, + "grad_norm": 0.9104289577057185, + "learning_rate": 1.9903604499053735e-05, + "loss": 0.7065, + "step": 4220 + }, + { + "epoch": 0.07293682609897706, + "grad_norm": 0.8846444406790027, + "learning_rate": 1.990352696414577e-05, + "loss": 0.6858, + "step": 4221 + }, + { + "epoch": 0.07295410561238595, + "grad_norm": 1.075066075270854, + "learning_rate": 1.990344939821921e-05, + "loss": 0.7132, + "step": 4222 + }, + { + "epoch": 0.07297138512579486, + "grad_norm": 0.8694535321553185, + "learning_rate": 1.9903371801274293e-05, + "loss": 0.592, + "step": 4223 + }, + { + "epoch": 0.07298866463920375, + "grad_norm": 0.9798225419245626, + "learning_rate": 1.9903294173311262e-05, + "loss": 0.9166, + "step": 4224 + }, + { + "epoch": 0.07300594415261266, + "grad_norm": 1.0307797671469414, + "learning_rate": 1.9903216514330362e-05, + "loss": 0.7519, + "step": 4225 + }, + { + "epoch": 0.07302322366602157, + "grad_norm": 1.0194277391059854, + "learning_rate": 1.9903138824331834e-05, + "loss": 0.745, + "step": 4226 + }, + { + "epoch": 0.07304050317943046, + "grad_norm": 0.9712239864526886, + "learning_rate": 1.9903061103315925e-05, + "loss": 0.6887, + "step": 4227 + }, + { + "epoch": 0.07305778269283937, + "grad_norm": 1.0997275747049924, + "learning_rate": 1.9902983351282872e-05, + "loss": 0.6776, + "step": 4228 + }, + { + "epoch": 0.07307506220624828, + "grad_norm": 0.5364424158203162, + "learning_rate": 1.9902905568232923e-05, + "loss": 0.547, + "step": 4229 + }, + { + "epoch": 0.07309234171965717, + "grad_norm": 1.3792461817282156, + "learning_rate": 1.9902827754166324e-05, + "loss": 0.8811, + "step": 4230 + }, + { + "epoch": 0.07310962123306608, + "grad_norm": 0.9816624536214461, + "learning_rate": 1.9902749909083317e-05, + "loss": 0.6549, + "step": 4231 + }, + { + "epoch": 0.07312690074647497, + "grad_norm": 0.9443784699913326, + "learning_rate": 1.990267203298414e-05, + "loss": 0.8365, + "step": 4232 + }, + { + "epoch": 0.07314418025988388, + "grad_norm": 0.9138369914514337, + "learning_rate": 1.9902594125869047e-05, + "loss": 0.6714, + "step": 4233 + }, + { + "epoch": 0.07316145977329279, + "grad_norm": 0.8817942582968238, + "learning_rate": 1.990251618773827e-05, + "loss": 0.8805, + "step": 4234 + }, + { + "epoch": 0.07317873928670168, + "grad_norm": 0.8238220999566203, + "learning_rate": 1.9902438218592066e-05, + "loss": 0.8068, + "step": 4235 + }, + { + "epoch": 0.07319601880011059, + "grad_norm": 0.9706303963790611, + "learning_rate": 1.990236021843067e-05, + "loss": 0.7786, + "step": 4236 + }, + { + "epoch": 0.0732132983135195, + "grad_norm": 0.8830054847791339, + "learning_rate": 1.990228218725433e-05, + "loss": 0.6727, + "step": 4237 + }, + { + "epoch": 0.07323057782692839, + "grad_norm": 0.6344842397198647, + "learning_rate": 1.990220412506329e-05, + "loss": 0.8091, + "step": 4238 + }, + { + "epoch": 0.0732478573403373, + "grad_norm": 1.0370962724952066, + "learning_rate": 1.9902126031857795e-05, + "loss": 0.4569, + "step": 4239 + }, + { + "epoch": 0.0732651368537462, + "grad_norm": 1.2706267855528244, + "learning_rate": 1.9902047907638086e-05, + "loss": 0.9296, + "step": 4240 + }, + { + "epoch": 0.0732824163671551, + "grad_norm": 0.8819413699411712, + "learning_rate": 1.990196975240441e-05, + "loss": 0.6576, + "step": 4241 + }, + { + "epoch": 0.07329969588056401, + "grad_norm": 0.9573113641808898, + "learning_rate": 1.9901891566157014e-05, + "loss": 0.8234, + "step": 4242 + }, + { + "epoch": 0.0733169753939729, + "grad_norm": 0.8005637313410767, + "learning_rate": 1.9901813348896143e-05, + "loss": 0.6923, + "step": 4243 + }, + { + "epoch": 0.07333425490738181, + "grad_norm": 1.2660674861085959, + "learning_rate": 1.9901735100622038e-05, + "loss": 0.6275, + "step": 4244 + }, + { + "epoch": 0.0733515344207907, + "grad_norm": 0.8621720841088135, + "learning_rate": 1.9901656821334946e-05, + "loss": 0.7471, + "step": 4245 + }, + { + "epoch": 0.07336881393419961, + "grad_norm": 0.9111330947209925, + "learning_rate": 1.9901578511035112e-05, + "loss": 0.6697, + "step": 4246 + }, + { + "epoch": 0.07338609344760852, + "grad_norm": 1.3382939084204055, + "learning_rate": 1.9901500169722783e-05, + "loss": 0.8578, + "step": 4247 + }, + { + "epoch": 0.07340337296101741, + "grad_norm": 0.8587830281857929, + "learning_rate": 1.9901421797398202e-05, + "loss": 0.7749, + "step": 4248 + }, + { + "epoch": 0.07342065247442632, + "grad_norm": 1.1196137515964035, + "learning_rate": 1.9901343394061614e-05, + "loss": 0.7647, + "step": 4249 + }, + { + "epoch": 0.07343793198783523, + "grad_norm": 0.9106495867027887, + "learning_rate": 1.990126495971327e-05, + "loss": 0.6308, + "step": 4250 + }, + { + "epoch": 0.07345521150124412, + "grad_norm": 0.5606501928502833, + "learning_rate": 1.9901186494353406e-05, + "loss": 0.7694, + "step": 4251 + }, + { + "epoch": 0.07347249101465303, + "grad_norm": 0.9734691289205859, + "learning_rate": 1.990110799798228e-05, + "loss": 0.8053, + "step": 4252 + }, + { + "epoch": 0.07348977052806192, + "grad_norm": 0.9445098558957506, + "learning_rate": 1.9901029470600125e-05, + "loss": 0.7738, + "step": 4253 + }, + { + "epoch": 0.07350705004147083, + "grad_norm": 0.9382833851083138, + "learning_rate": 1.9900950912207197e-05, + "loss": 0.7857, + "step": 4254 + }, + { + "epoch": 0.07352432955487974, + "grad_norm": 1.0362457332469535, + "learning_rate": 1.9900872322803736e-05, + "loss": 0.7637, + "step": 4255 + }, + { + "epoch": 0.07354160906828863, + "grad_norm": 0.9807977828609618, + "learning_rate": 1.9900793702389995e-05, + "loss": 0.8028, + "step": 4256 + }, + { + "epoch": 0.07355888858169754, + "grad_norm": 0.8911304068052205, + "learning_rate": 1.990071505096621e-05, + "loss": 0.7772, + "step": 4257 + }, + { + "epoch": 0.07357616809510645, + "grad_norm": 0.8679639984843416, + "learning_rate": 1.9900636368532637e-05, + "loss": 0.6288, + "step": 4258 + }, + { + "epoch": 0.07359344760851534, + "grad_norm": 0.9929826827051478, + "learning_rate": 1.9900557655089518e-05, + "loss": 0.7893, + "step": 4259 + }, + { + "epoch": 0.07361072712192425, + "grad_norm": 0.8244179412790615, + "learning_rate": 1.99004789106371e-05, + "loss": 0.4812, + "step": 4260 + }, + { + "epoch": 0.07362800663533314, + "grad_norm": 1.2168344389238692, + "learning_rate": 1.9900400135175627e-05, + "loss": 0.792, + "step": 4261 + }, + { + "epoch": 0.07364528614874205, + "grad_norm": 0.9717130267154565, + "learning_rate": 1.990032132870535e-05, + "loss": 0.6969, + "step": 4262 + }, + { + "epoch": 0.07366256566215096, + "grad_norm": 0.5308370038197358, + "learning_rate": 1.9900242491226517e-05, + "loss": 0.5432, + "step": 4263 + }, + { + "epoch": 0.07367984517555985, + "grad_norm": 1.3693599737820166, + "learning_rate": 1.9900163622739367e-05, + "loss": 0.941, + "step": 4264 + }, + { + "epoch": 0.07369712468896876, + "grad_norm": 1.3958634942883312, + "learning_rate": 1.9900084723244156e-05, + "loss": 0.8765, + "step": 4265 + }, + { + "epoch": 0.07371440420237767, + "grad_norm": 1.108689287352905, + "learning_rate": 1.9900005792741127e-05, + "loss": 0.7593, + "step": 4266 + }, + { + "epoch": 0.07373168371578656, + "grad_norm": 0.8441891992965386, + "learning_rate": 1.9899926831230525e-05, + "loss": 0.5516, + "step": 4267 + }, + { + "epoch": 0.07374896322919547, + "grad_norm": 0.5667247334485996, + "learning_rate": 1.9899847838712604e-05, + "loss": 0.542, + "step": 4268 + }, + { + "epoch": 0.07376624274260436, + "grad_norm": 0.9983682676914946, + "learning_rate": 1.9899768815187603e-05, + "loss": 0.7123, + "step": 4269 + }, + { + "epoch": 0.07378352225601327, + "grad_norm": 0.6266328019065135, + "learning_rate": 1.9899689760655775e-05, + "loss": 0.5943, + "step": 4270 + }, + { + "epoch": 0.07380080176942218, + "grad_norm": 0.8820954193487058, + "learning_rate": 1.9899610675117364e-05, + "loss": 0.5968, + "step": 4271 + }, + { + "epoch": 0.07381808128283107, + "grad_norm": 0.5511422621070553, + "learning_rate": 1.9899531558572624e-05, + "loss": 0.5661, + "step": 4272 + }, + { + "epoch": 0.07383536079623998, + "grad_norm": 1.312243247051468, + "learning_rate": 1.98994524110218e-05, + "loss": 0.8652, + "step": 4273 + }, + { + "epoch": 0.07385264030964889, + "grad_norm": 0.8782268908413651, + "learning_rate": 1.9899373232465134e-05, + "loss": 0.935, + "step": 4274 + }, + { + "epoch": 0.07386991982305778, + "grad_norm": 0.7276908640548585, + "learning_rate": 1.989929402290288e-05, + "loss": 0.6498, + "step": 4275 + }, + { + "epoch": 0.07388719933646669, + "grad_norm": 1.3053781988893833, + "learning_rate": 1.989921478233529e-05, + "loss": 0.9648, + "step": 4276 + }, + { + "epoch": 0.07390447884987558, + "grad_norm": 1.1330394724502053, + "learning_rate": 1.9899135510762598e-05, + "loss": 0.7901, + "step": 4277 + }, + { + "epoch": 0.07392175836328449, + "grad_norm": 0.9045825651521194, + "learning_rate": 1.989905620818507e-05, + "loss": 0.7525, + "step": 4278 + }, + { + "epoch": 0.0739390378766934, + "grad_norm": 0.79802222840121, + "learning_rate": 1.989897687460294e-05, + "loss": 0.575, + "step": 4279 + }, + { + "epoch": 0.07395631739010229, + "grad_norm": 0.8255062359231103, + "learning_rate": 1.9898897510016463e-05, + "loss": 0.6999, + "step": 4280 + }, + { + "epoch": 0.0739735969035112, + "grad_norm": 1.1577301594355092, + "learning_rate": 1.9898818114425887e-05, + "loss": 0.7861, + "step": 4281 + }, + { + "epoch": 0.07399087641692009, + "grad_norm": 1.1965059133451856, + "learning_rate": 1.989873868783146e-05, + "loss": 0.5439, + "step": 4282 + }, + { + "epoch": 0.074008155930329, + "grad_norm": 1.1064889787479033, + "learning_rate": 1.9898659230233432e-05, + "loss": 0.9834, + "step": 4283 + }, + { + "epoch": 0.07402543544373791, + "grad_norm": 0.5927922437534552, + "learning_rate": 1.9898579741632052e-05, + "loss": 0.6292, + "step": 4284 + }, + { + "epoch": 0.0740427149571468, + "grad_norm": 0.9777524488952523, + "learning_rate": 1.9898500222027565e-05, + "loss": 0.6215, + "step": 4285 + }, + { + "epoch": 0.07405999447055571, + "grad_norm": 0.950476961806977, + "learning_rate": 1.9898420671420226e-05, + "loss": 0.6812, + "step": 4286 + }, + { + "epoch": 0.07407727398396462, + "grad_norm": 0.990055923190782, + "learning_rate": 1.989834108981028e-05, + "loss": 0.9253, + "step": 4287 + }, + { + "epoch": 0.07409455349737351, + "grad_norm": 1.201968237176566, + "learning_rate": 1.989826147719798e-05, + "loss": 0.8037, + "step": 4288 + }, + { + "epoch": 0.07411183301078242, + "grad_norm": 1.1140448795767732, + "learning_rate": 1.989818183358357e-05, + "loss": 1.0719, + "step": 4289 + }, + { + "epoch": 0.07412911252419131, + "grad_norm": 0.6565118489480214, + "learning_rate": 1.98981021589673e-05, + "loss": 0.6863, + "step": 4290 + }, + { + "epoch": 0.07414639203760022, + "grad_norm": 1.0979306755039657, + "learning_rate": 1.9898022453349423e-05, + "loss": 0.7414, + "step": 4291 + }, + { + "epoch": 0.07416367155100913, + "grad_norm": 1.0560090961957167, + "learning_rate": 1.9897942716730192e-05, + "loss": 0.8638, + "step": 4292 + }, + { + "epoch": 0.07418095106441802, + "grad_norm": 0.6582848244781206, + "learning_rate": 1.9897862949109847e-05, + "loss": 0.6744, + "step": 4293 + }, + { + "epoch": 0.07419823057782693, + "grad_norm": 0.5883295679934942, + "learning_rate": 1.9897783150488645e-05, + "loss": 0.4493, + "step": 4294 + }, + { + "epoch": 0.07421551009123584, + "grad_norm": 1.0132843778194114, + "learning_rate": 1.9897703320866833e-05, + "loss": 0.5455, + "step": 4295 + }, + { + "epoch": 0.07423278960464473, + "grad_norm": 1.3458452806634678, + "learning_rate": 1.9897623460244663e-05, + "loss": 0.7759, + "step": 4296 + }, + { + "epoch": 0.07425006911805364, + "grad_norm": 0.9507099853144587, + "learning_rate": 1.9897543568622387e-05, + "loss": 0.6885, + "step": 4297 + }, + { + "epoch": 0.07426734863146253, + "grad_norm": 1.2764548908458497, + "learning_rate": 1.9897463646000247e-05, + "loss": 0.8312, + "step": 4298 + }, + { + "epoch": 0.07428462814487144, + "grad_norm": 1.145014800017467, + "learning_rate": 1.98973836923785e-05, + "loss": 0.5736, + "step": 4299 + }, + { + "epoch": 0.07430190765828035, + "grad_norm": 0.9277676551803131, + "learning_rate": 1.9897303707757398e-05, + "loss": 0.828, + "step": 4300 + }, + { + "epoch": 0.07431918717168924, + "grad_norm": 0.8260246530337187, + "learning_rate": 1.9897223692137187e-05, + "loss": 0.637, + "step": 4301 + }, + { + "epoch": 0.07433646668509815, + "grad_norm": 0.9033293878600458, + "learning_rate": 1.989714364551812e-05, + "loss": 0.6206, + "step": 4302 + }, + { + "epoch": 0.07435374619850706, + "grad_norm": 0.5251206464730033, + "learning_rate": 1.9897063567900448e-05, + "loss": 0.6867, + "step": 4303 + }, + { + "epoch": 0.07437102571191595, + "grad_norm": 0.5637086630964301, + "learning_rate": 1.9896983459284418e-05, + "loss": 0.4181, + "step": 4304 + }, + { + "epoch": 0.07438830522532486, + "grad_norm": 1.091391505358307, + "learning_rate": 1.9896903319670286e-05, + "loss": 0.9465, + "step": 4305 + }, + { + "epoch": 0.07440558473873375, + "grad_norm": 1.469515726562577, + "learning_rate": 1.98968231490583e-05, + "loss": 0.5959, + "step": 4306 + }, + { + "epoch": 0.07442286425214266, + "grad_norm": 0.7494267472519444, + "learning_rate": 1.9896742947448707e-05, + "loss": 0.6786, + "step": 4307 + }, + { + "epoch": 0.07444014376555157, + "grad_norm": 0.9492682123880058, + "learning_rate": 1.9896662714841768e-05, + "loss": 0.7266, + "step": 4308 + }, + { + "epoch": 0.07445742327896046, + "grad_norm": 0.7351646970899244, + "learning_rate": 1.989658245123773e-05, + "loss": 0.6222, + "step": 4309 + }, + { + "epoch": 0.07447470279236937, + "grad_norm": 1.0813938985035918, + "learning_rate": 1.989650215663684e-05, + "loss": 0.7602, + "step": 4310 + }, + { + "epoch": 0.07449198230577828, + "grad_norm": 1.048183593709658, + "learning_rate": 1.9896421831039358e-05, + "loss": 0.8101, + "step": 4311 + }, + { + "epoch": 0.07450926181918717, + "grad_norm": 1.0712929515628522, + "learning_rate": 1.9896341474445526e-05, + "loss": 0.737, + "step": 4312 + }, + { + "epoch": 0.07452654133259608, + "grad_norm": 0.7409600909964896, + "learning_rate": 1.9896261086855603e-05, + "loss": 0.5994, + "step": 4313 + }, + { + "epoch": 0.07454382084600497, + "grad_norm": 0.9210340043727601, + "learning_rate": 1.989618066826984e-05, + "loss": 0.8114, + "step": 4314 + }, + { + "epoch": 0.07456110035941388, + "grad_norm": 1.0693582767178202, + "learning_rate": 1.989610021868848e-05, + "loss": 0.8396, + "step": 4315 + }, + { + "epoch": 0.07457837987282279, + "grad_norm": 0.9373157301597816, + "learning_rate": 1.9896019738111787e-05, + "loss": 0.877, + "step": 4316 + }, + { + "epoch": 0.07459565938623168, + "grad_norm": 1.08945383198999, + "learning_rate": 1.9895939226540006e-05, + "loss": 0.9292, + "step": 4317 + }, + { + "epoch": 0.07461293889964059, + "grad_norm": 1.1916771936451762, + "learning_rate": 1.9895858683973395e-05, + "loss": 0.8941, + "step": 4318 + }, + { + "epoch": 0.07463021841304948, + "grad_norm": 1.2344270279582528, + "learning_rate": 1.98957781104122e-05, + "loss": 0.947, + "step": 4319 + }, + { + "epoch": 0.07464749792645839, + "grad_norm": 1.1342463180562354, + "learning_rate": 1.9895697505856677e-05, + "loss": 0.7648, + "step": 4320 + }, + { + "epoch": 0.0746647774398673, + "grad_norm": 1.0438144914402923, + "learning_rate": 1.989561687030708e-05, + "loss": 0.6356, + "step": 4321 + }, + { + "epoch": 0.07468205695327619, + "grad_norm": 1.0837888528624726, + "learning_rate": 1.989553620376365e-05, + "loss": 0.8594, + "step": 4322 + }, + { + "epoch": 0.0746993364666851, + "grad_norm": 0.5629833955124975, + "learning_rate": 1.9895455506226656e-05, + "loss": 0.4724, + "step": 4323 + }, + { + "epoch": 0.074716615980094, + "grad_norm": 0.8055488563369259, + "learning_rate": 1.9895374777696343e-05, + "loss": 0.5397, + "step": 4324 + }, + { + "epoch": 0.0747338954935029, + "grad_norm": 0.9772305730602207, + "learning_rate": 1.9895294018172962e-05, + "loss": 0.6239, + "step": 4325 + }, + { + "epoch": 0.07475117500691181, + "grad_norm": 1.0096973355282792, + "learning_rate": 1.989521322765677e-05, + "loss": 0.6895, + "step": 4326 + }, + { + "epoch": 0.0747684545203207, + "grad_norm": 1.1152026620674402, + "learning_rate": 1.9895132406148015e-05, + "loss": 0.9342, + "step": 4327 + }, + { + "epoch": 0.07478573403372961, + "grad_norm": 1.141066312833175, + "learning_rate": 1.9895051553646956e-05, + "loss": 0.7374, + "step": 4328 + }, + { + "epoch": 0.07480301354713852, + "grad_norm": 0.9922679865820709, + "learning_rate": 1.9894970670153843e-05, + "loss": 0.8491, + "step": 4329 + }, + { + "epoch": 0.07482029306054741, + "grad_norm": 1.0274001091870582, + "learning_rate": 1.989488975566893e-05, + "loss": 0.7987, + "step": 4330 + }, + { + "epoch": 0.07483757257395632, + "grad_norm": 0.8710876065530931, + "learning_rate": 1.989480881019247e-05, + "loss": 0.6818, + "step": 4331 + }, + { + "epoch": 0.07485485208736523, + "grad_norm": 1.2942939010498655, + "learning_rate": 1.9894727833724716e-05, + "loss": 0.9546, + "step": 4332 + }, + { + "epoch": 0.07487213160077412, + "grad_norm": 0.764727719950282, + "learning_rate": 1.9894646826265922e-05, + "loss": 0.5792, + "step": 4333 + }, + { + "epoch": 0.07488941111418303, + "grad_norm": 1.1561414410501911, + "learning_rate": 1.9894565787816344e-05, + "loss": 0.6189, + "step": 4334 + }, + { + "epoch": 0.07490669062759192, + "grad_norm": 0.8101625964127359, + "learning_rate": 1.9894484718376232e-05, + "loss": 0.5773, + "step": 4335 + }, + { + "epoch": 0.07492397014100083, + "grad_norm": 1.0746562685997347, + "learning_rate": 1.9894403617945845e-05, + "loss": 0.8181, + "step": 4336 + }, + { + "epoch": 0.07494124965440974, + "grad_norm": 1.1767123181591144, + "learning_rate": 1.9894322486525432e-05, + "loss": 0.9608, + "step": 4337 + }, + { + "epoch": 0.07495852916781863, + "grad_norm": 0.9965822266504537, + "learning_rate": 1.9894241324115252e-05, + "loss": 0.6534, + "step": 4338 + }, + { + "epoch": 0.07497580868122754, + "grad_norm": 0.8447884473432955, + "learning_rate": 1.9894160130715548e-05, + "loss": 0.6722, + "step": 4339 + }, + { + "epoch": 0.07499308819463645, + "grad_norm": 1.2336389550086813, + "learning_rate": 1.989407890632659e-05, + "loss": 0.9573, + "step": 4340 + }, + { + "epoch": 0.07501036770804534, + "grad_norm": 0.9446022151707442, + "learning_rate": 1.9893997650948623e-05, + "loss": 0.6593, + "step": 4341 + }, + { + "epoch": 0.07502764722145425, + "grad_norm": 0.8018498297284988, + "learning_rate": 1.9893916364581903e-05, + "loss": 0.6131, + "step": 4342 + }, + { + "epoch": 0.07504492673486314, + "grad_norm": 1.0464088532282143, + "learning_rate": 1.989383504722669e-05, + "loss": 0.6989, + "step": 4343 + }, + { + "epoch": 0.07506220624827205, + "grad_norm": 0.8207403414606254, + "learning_rate": 1.9893753698883226e-05, + "loss": 0.4529, + "step": 4344 + }, + { + "epoch": 0.07507948576168096, + "grad_norm": 1.0233526952583238, + "learning_rate": 1.9893672319551774e-05, + "loss": 0.8735, + "step": 4345 + }, + { + "epoch": 0.07509676527508985, + "grad_norm": 0.8456871245570575, + "learning_rate": 1.9893590909232594e-05, + "loss": 0.6675, + "step": 4346 + }, + { + "epoch": 0.07511404478849876, + "grad_norm": 0.9271177255340192, + "learning_rate": 1.9893509467925933e-05, + "loss": 0.6896, + "step": 4347 + }, + { + "epoch": 0.07513132430190766, + "grad_norm": 0.8970448005245598, + "learning_rate": 1.989342799563205e-05, + "loss": 0.6499, + "step": 4348 + }, + { + "epoch": 0.07514860381531656, + "grad_norm": 0.9597415356802468, + "learning_rate": 1.9893346492351196e-05, + "loss": 1.0467, + "step": 4349 + }, + { + "epoch": 0.07516588332872547, + "grad_norm": 0.9679706548620428, + "learning_rate": 1.989326495808363e-05, + "loss": 0.5637, + "step": 4350 + }, + { + "epoch": 0.07518316284213436, + "grad_norm": 1.1218243485876458, + "learning_rate": 1.9893183392829606e-05, + "loss": 0.8995, + "step": 4351 + }, + { + "epoch": 0.07520044235554327, + "grad_norm": 0.8140097385024183, + "learning_rate": 1.9893101796589378e-05, + "loss": 0.6974, + "step": 4352 + }, + { + "epoch": 0.07521772186895218, + "grad_norm": 1.2485593699435322, + "learning_rate": 1.9893020169363203e-05, + "loss": 0.8703, + "step": 4353 + }, + { + "epoch": 0.07523500138236107, + "grad_norm": 0.627134295591002, + "learning_rate": 1.989293851115134e-05, + "loss": 0.8952, + "step": 4354 + }, + { + "epoch": 0.07525228089576998, + "grad_norm": 0.6213778730643245, + "learning_rate": 1.989285682195404e-05, + "loss": 0.4223, + "step": 4355 + }, + { + "epoch": 0.07526956040917887, + "grad_norm": 1.1237211039923323, + "learning_rate": 1.9892775101771563e-05, + "loss": 0.7253, + "step": 4356 + }, + { + "epoch": 0.07528683992258778, + "grad_norm": 0.8515491895081121, + "learning_rate": 1.989269335060416e-05, + "loss": 0.7337, + "step": 4357 + }, + { + "epoch": 0.07530411943599669, + "grad_norm": 0.9528977390840364, + "learning_rate": 1.9892611568452088e-05, + "loss": 0.3659, + "step": 4358 + }, + { + "epoch": 0.07532139894940558, + "grad_norm": 0.881644925136141, + "learning_rate": 1.989252975531561e-05, + "loss": 0.461, + "step": 4359 + }, + { + "epoch": 0.07533867846281449, + "grad_norm": 0.9137753754286034, + "learning_rate": 1.9892447911194973e-05, + "loss": 0.6848, + "step": 4360 + }, + { + "epoch": 0.0753559579762234, + "grad_norm": 1.0690950812598754, + "learning_rate": 1.9892366036090437e-05, + "loss": 0.8517, + "step": 4361 + }, + { + "epoch": 0.07537323748963229, + "grad_norm": 0.9842336969650088, + "learning_rate": 1.9892284130002263e-05, + "loss": 0.865, + "step": 4362 + }, + { + "epoch": 0.0753905170030412, + "grad_norm": 1.6044756470382886, + "learning_rate": 1.98922021929307e-05, + "loss": 0.8178, + "step": 4363 + }, + { + "epoch": 0.07540779651645009, + "grad_norm": 1.0866659966493049, + "learning_rate": 1.989212022487601e-05, + "loss": 0.6461, + "step": 4364 + }, + { + "epoch": 0.075425076029859, + "grad_norm": 1.0101776178611168, + "learning_rate": 1.9892038225838444e-05, + "loss": 0.8311, + "step": 4365 + }, + { + "epoch": 0.0754423555432679, + "grad_norm": 2.434054107245712, + "learning_rate": 1.9891956195818264e-05, + "loss": 1.0502, + "step": 4366 + }, + { + "epoch": 0.0754596350566768, + "grad_norm": 1.2222061208723993, + "learning_rate": 1.9891874134815726e-05, + "loss": 0.9395, + "step": 4367 + }, + { + "epoch": 0.0754769145700857, + "grad_norm": 0.8360095988448636, + "learning_rate": 1.9891792042831088e-05, + "loss": 0.5395, + "step": 4368 + }, + { + "epoch": 0.07549419408349461, + "grad_norm": 0.726958398019302, + "learning_rate": 1.9891709919864604e-05, + "loss": 0.556, + "step": 4369 + }, + { + "epoch": 0.07551147359690351, + "grad_norm": 0.935443606274644, + "learning_rate": 1.9891627765916534e-05, + "loss": 0.8314, + "step": 4370 + }, + { + "epoch": 0.07552875311031242, + "grad_norm": 1.116441538996231, + "learning_rate": 1.9891545580987133e-05, + "loss": 0.7376, + "step": 4371 + }, + { + "epoch": 0.07554603262372131, + "grad_norm": 1.186519721587242, + "learning_rate": 1.9891463365076658e-05, + "loss": 0.7003, + "step": 4372 + }, + { + "epoch": 0.07556331213713022, + "grad_norm": 0.6320610476218773, + "learning_rate": 1.989138111818537e-05, + "loss": 0.7763, + "step": 4373 + }, + { + "epoch": 0.07558059165053913, + "grad_norm": 0.9561406064458476, + "learning_rate": 1.9891298840313525e-05, + "loss": 0.6056, + "step": 4374 + }, + { + "epoch": 0.07559787116394802, + "grad_norm": 0.7991562166940512, + "learning_rate": 1.989121653146138e-05, + "loss": 0.5856, + "step": 4375 + }, + { + "epoch": 0.07561515067735693, + "grad_norm": 0.8176260371100194, + "learning_rate": 1.9891134191629193e-05, + "loss": 0.7076, + "step": 4376 + }, + { + "epoch": 0.07563243019076583, + "grad_norm": 1.1891852033439878, + "learning_rate": 1.9891051820817223e-05, + "loss": 0.6729, + "step": 4377 + }, + { + "epoch": 0.07564970970417473, + "grad_norm": 0.8488197297610026, + "learning_rate": 1.9890969419025727e-05, + "loss": 0.7585, + "step": 4378 + }, + { + "epoch": 0.07566698921758364, + "grad_norm": 0.47567245387421725, + "learning_rate": 1.989088698625496e-05, + "loss": 0.4863, + "step": 4379 + }, + { + "epoch": 0.07568426873099253, + "grad_norm": 1.089101436020106, + "learning_rate": 1.9890804522505186e-05, + "loss": 0.6621, + "step": 4380 + }, + { + "epoch": 0.07570154824440144, + "grad_norm": 0.9067257273377352, + "learning_rate": 1.9890722027776656e-05, + "loss": 0.5947, + "step": 4381 + }, + { + "epoch": 0.07571882775781034, + "grad_norm": 1.0029123172418244, + "learning_rate": 1.9890639502069637e-05, + "loss": 0.8652, + "step": 4382 + }, + { + "epoch": 0.07573610727121924, + "grad_norm": 0.9280007655170684, + "learning_rate": 1.9890556945384384e-05, + "loss": 0.6428, + "step": 4383 + }, + { + "epoch": 0.07575338678462815, + "grad_norm": 1.2431817635196547, + "learning_rate": 1.9890474357721154e-05, + "loss": 1.0741, + "step": 4384 + }, + { + "epoch": 0.07577066629803705, + "grad_norm": 1.0042315564893676, + "learning_rate": 1.9890391739080205e-05, + "loss": 0.6597, + "step": 4385 + }, + { + "epoch": 0.07578794581144595, + "grad_norm": 1.1102235837151944, + "learning_rate": 1.98903090894618e-05, + "loss": 0.7958, + "step": 4386 + }, + { + "epoch": 0.07580522532485486, + "grad_norm": 0.8376022808461063, + "learning_rate": 1.9890226408866194e-05, + "loss": 0.8577, + "step": 4387 + }, + { + "epoch": 0.07582250483826375, + "grad_norm": 1.24792477946931, + "learning_rate": 1.989014369729365e-05, + "loss": 1.0265, + "step": 4388 + }, + { + "epoch": 0.07583978435167266, + "grad_norm": 1.046569861003953, + "learning_rate": 1.989006095474442e-05, + "loss": 0.7774, + "step": 4389 + }, + { + "epoch": 0.07585706386508156, + "grad_norm": 1.0898820266336844, + "learning_rate": 1.988997818121877e-05, + "loss": 0.874, + "step": 4390 + }, + { + "epoch": 0.07587434337849046, + "grad_norm": 1.2216403288520084, + "learning_rate": 1.9889895376716955e-05, + "loss": 0.7491, + "step": 4391 + }, + { + "epoch": 0.07589162289189937, + "grad_norm": 0.5631742208535994, + "learning_rate": 1.9889812541239237e-05, + "loss": 0.6417, + "step": 4392 + }, + { + "epoch": 0.07590890240530827, + "grad_norm": 0.9251689459989748, + "learning_rate": 1.9889729674785877e-05, + "loss": 0.7562, + "step": 4393 + }, + { + "epoch": 0.07592618191871717, + "grad_norm": 1.0384418121643448, + "learning_rate": 1.988964677735713e-05, + "loss": 0.5942, + "step": 4394 + }, + { + "epoch": 0.07594346143212607, + "grad_norm": 0.898831339758749, + "learning_rate": 1.9889563848953258e-05, + "loss": 0.8713, + "step": 4395 + }, + { + "epoch": 0.07596074094553497, + "grad_norm": 0.9202379347473251, + "learning_rate": 1.9889480889574518e-05, + "loss": 0.5358, + "step": 4396 + }, + { + "epoch": 0.07597802045894388, + "grad_norm": 1.1666965702886911, + "learning_rate": 1.9889397899221173e-05, + "loss": 0.6868, + "step": 4397 + }, + { + "epoch": 0.07599529997235278, + "grad_norm": 0.8425695384771635, + "learning_rate": 1.9889314877893486e-05, + "loss": 0.5664, + "step": 4398 + }, + { + "epoch": 0.07601257948576168, + "grad_norm": 1.1284783905282918, + "learning_rate": 1.988923182559171e-05, + "loss": 0.997, + "step": 4399 + }, + { + "epoch": 0.07602985899917059, + "grad_norm": 1.2619497000193065, + "learning_rate": 1.988914874231611e-05, + "loss": 1.0089, + "step": 4400 + }, + { + "epoch": 0.07604713851257948, + "grad_norm": 0.6998337527467529, + "learning_rate": 1.9889065628066945e-05, + "loss": 0.5976, + "step": 4401 + }, + { + "epoch": 0.07606441802598839, + "grad_norm": 1.218402866428542, + "learning_rate": 1.9888982482844473e-05, + "loss": 0.7381, + "step": 4402 + }, + { + "epoch": 0.0760816975393973, + "grad_norm": 1.0174739542384443, + "learning_rate": 1.9888899306648955e-05, + "loss": 0.6799, + "step": 4403 + }, + { + "epoch": 0.07609897705280619, + "grad_norm": 0.8447586305092827, + "learning_rate": 1.9888816099480657e-05, + "loss": 0.8327, + "step": 4404 + }, + { + "epoch": 0.0761162565662151, + "grad_norm": 1.2409683863493552, + "learning_rate": 1.988873286133983e-05, + "loss": 0.8989, + "step": 4405 + }, + { + "epoch": 0.076133536079624, + "grad_norm": 0.9729322708851738, + "learning_rate": 1.9888649592226746e-05, + "loss": 0.6969, + "step": 4406 + }, + { + "epoch": 0.0761508155930329, + "grad_norm": 0.9387468405626128, + "learning_rate": 1.9888566292141655e-05, + "loss": 0.7612, + "step": 4407 + }, + { + "epoch": 0.0761680951064418, + "grad_norm": 1.0422045340451114, + "learning_rate": 1.9888482961084828e-05, + "loss": 0.7935, + "step": 4408 + }, + { + "epoch": 0.0761853746198507, + "grad_norm": 1.3116623127247378, + "learning_rate": 1.9888399599056515e-05, + "loss": 0.5779, + "step": 4409 + }, + { + "epoch": 0.0762026541332596, + "grad_norm": 0.9521774133094385, + "learning_rate": 1.9888316206056988e-05, + "loss": 0.8024, + "step": 4410 + }, + { + "epoch": 0.07621993364666851, + "grad_norm": 0.6859367659904068, + "learning_rate": 1.98882327820865e-05, + "loss": 0.5591, + "step": 4411 + }, + { + "epoch": 0.07623721316007741, + "grad_norm": 0.8168167479359159, + "learning_rate": 1.9888149327145318e-05, + "loss": 0.5768, + "step": 4412 + }, + { + "epoch": 0.07625449267348632, + "grad_norm": 0.9367496115883309, + "learning_rate": 1.98880658412337e-05, + "loss": 0.733, + "step": 4413 + }, + { + "epoch": 0.07627177218689522, + "grad_norm": 1.0622164987322666, + "learning_rate": 1.9887982324351908e-05, + "loss": 0.8418, + "step": 4414 + }, + { + "epoch": 0.07628905170030412, + "grad_norm": 0.6659678787016453, + "learning_rate": 1.9887898776500203e-05, + "loss": 0.6365, + "step": 4415 + }, + { + "epoch": 0.07630633121371302, + "grad_norm": 0.7476932716520954, + "learning_rate": 1.988781519767885e-05, + "loss": 0.4996, + "step": 4416 + }, + { + "epoch": 0.07632361072712192, + "grad_norm": 1.2275374887354145, + "learning_rate": 1.9887731587888104e-05, + "loss": 0.877, + "step": 4417 + }, + { + "epoch": 0.07634089024053083, + "grad_norm": 1.1736048556937335, + "learning_rate": 1.9887647947128234e-05, + "loss": 0.7498, + "step": 4418 + }, + { + "epoch": 0.07635816975393973, + "grad_norm": 0.9833102351138444, + "learning_rate": 1.98875642753995e-05, + "loss": 0.6329, + "step": 4419 + }, + { + "epoch": 0.07637544926734863, + "grad_norm": 0.795832366262294, + "learning_rate": 1.988748057270216e-05, + "loss": 0.8399, + "step": 4420 + }, + { + "epoch": 0.07639272878075754, + "grad_norm": 0.903252273136079, + "learning_rate": 1.9887396839036483e-05, + "loss": 0.5293, + "step": 4421 + }, + { + "epoch": 0.07641000829416644, + "grad_norm": 1.069633114512273, + "learning_rate": 1.9887313074402728e-05, + "loss": 0.8664, + "step": 4422 + }, + { + "epoch": 0.07642728780757534, + "grad_norm": 1.0347045536127741, + "learning_rate": 1.9887229278801153e-05, + "loss": 0.7993, + "step": 4423 + }, + { + "epoch": 0.07644456732098424, + "grad_norm": 0.8965283115267848, + "learning_rate": 1.9887145452232027e-05, + "loss": 0.7924, + "step": 4424 + }, + { + "epoch": 0.07646184683439314, + "grad_norm": 1.0535944812416458, + "learning_rate": 1.9887061594695613e-05, + "loss": 0.6517, + "step": 4425 + }, + { + "epoch": 0.07647912634780205, + "grad_norm": 1.1690976259212553, + "learning_rate": 1.9886977706192166e-05, + "loss": 0.8934, + "step": 4426 + }, + { + "epoch": 0.07649640586121095, + "grad_norm": 0.719358885774767, + "learning_rate": 1.9886893786721954e-05, + "loss": 0.7373, + "step": 4427 + }, + { + "epoch": 0.07651368537461985, + "grad_norm": 1.0402781201024998, + "learning_rate": 1.988680983628524e-05, + "loss": 0.7012, + "step": 4428 + }, + { + "epoch": 0.07653096488802875, + "grad_norm": 0.9474966102994299, + "learning_rate": 1.988672585488229e-05, + "loss": 0.7264, + "step": 4429 + }, + { + "epoch": 0.07654824440143766, + "grad_norm": 0.955037006701017, + "learning_rate": 1.9886641842513358e-05, + "loss": 0.7818, + "step": 4430 + }, + { + "epoch": 0.07656552391484656, + "grad_norm": 0.9351197162254178, + "learning_rate": 1.9886557799178712e-05, + "loss": 0.7319, + "step": 4431 + }, + { + "epoch": 0.07658280342825546, + "grad_norm": 0.8820766541727223, + "learning_rate": 1.9886473724878622e-05, + "loss": 0.6618, + "step": 4432 + }, + { + "epoch": 0.07660008294166436, + "grad_norm": 1.7748758563393232, + "learning_rate": 1.988638961961334e-05, + "loss": 0.9588, + "step": 4433 + }, + { + "epoch": 0.07661736245507327, + "grad_norm": 0.9357842810337283, + "learning_rate": 1.9886305483383136e-05, + "loss": 0.5929, + "step": 4434 + }, + { + "epoch": 0.07663464196848217, + "grad_norm": 1.037516855747041, + "learning_rate": 1.988622131618827e-05, + "loss": 0.8048, + "step": 4435 + }, + { + "epoch": 0.07665192148189107, + "grad_norm": 0.9609709034949513, + "learning_rate": 1.9886137118029007e-05, + "loss": 0.7183, + "step": 4436 + }, + { + "epoch": 0.07666920099529997, + "grad_norm": 0.9265435682774652, + "learning_rate": 1.9886052888905616e-05, + "loss": 0.7403, + "step": 4437 + }, + { + "epoch": 0.07668648050870887, + "grad_norm": 0.8687016966119404, + "learning_rate": 1.9885968628818352e-05, + "loss": 0.7328, + "step": 4438 + }, + { + "epoch": 0.07670376002211778, + "grad_norm": 1.0657945925403203, + "learning_rate": 1.9885884337767485e-05, + "loss": 0.6581, + "step": 4439 + }, + { + "epoch": 0.07672103953552668, + "grad_norm": 0.9326138695999628, + "learning_rate": 1.9885800015753273e-05, + "loss": 0.9864, + "step": 4440 + }, + { + "epoch": 0.07673831904893558, + "grad_norm": 1.2321935628611973, + "learning_rate": 1.988571566277599e-05, + "loss": 0.9662, + "step": 4441 + }, + { + "epoch": 0.07675559856234448, + "grad_norm": 1.1332885934410217, + "learning_rate": 1.9885631278835887e-05, + "loss": 0.9073, + "step": 4442 + }, + { + "epoch": 0.07677287807575339, + "grad_norm": 0.8262006769592443, + "learning_rate": 1.9885546863933244e-05, + "loss": 0.6462, + "step": 4443 + }, + { + "epoch": 0.07679015758916229, + "grad_norm": 0.7990280657003876, + "learning_rate": 1.988546241806831e-05, + "loss": 0.8602, + "step": 4444 + }, + { + "epoch": 0.0768074371025712, + "grad_norm": 1.2523426504357893, + "learning_rate": 1.988537794124136e-05, + "loss": 0.8062, + "step": 4445 + }, + { + "epoch": 0.07682471661598009, + "grad_norm": 0.939893958471584, + "learning_rate": 1.9885293433452656e-05, + "loss": 0.6708, + "step": 4446 + }, + { + "epoch": 0.076841996129389, + "grad_norm": 0.5043621537068331, + "learning_rate": 1.988520889470246e-05, + "loss": 0.5873, + "step": 4447 + }, + { + "epoch": 0.0768592756427979, + "grad_norm": 1.1100161627593577, + "learning_rate": 1.9885124324991037e-05, + "loss": 0.6631, + "step": 4448 + }, + { + "epoch": 0.0768765551562068, + "grad_norm": 1.0665451392965601, + "learning_rate": 1.9885039724318653e-05, + "loss": 0.6253, + "step": 4449 + }, + { + "epoch": 0.0768938346696157, + "grad_norm": 0.9260457077248258, + "learning_rate": 1.9884955092685578e-05, + "loss": 0.7265, + "step": 4450 + }, + { + "epoch": 0.07691111418302461, + "grad_norm": 1.1310272175029277, + "learning_rate": 1.9884870430092066e-05, + "loss": 0.7639, + "step": 4451 + }, + { + "epoch": 0.0769283936964335, + "grad_norm": 0.9310906218321099, + "learning_rate": 1.9884785736538396e-05, + "loss": 0.6185, + "step": 4452 + }, + { + "epoch": 0.07694567320984241, + "grad_norm": 0.5659928994320341, + "learning_rate": 1.988470101202482e-05, + "loss": 0.6743, + "step": 4453 + }, + { + "epoch": 0.07696295272325131, + "grad_norm": 0.541598312566739, + "learning_rate": 1.988461625655161e-05, + "loss": 0.6025, + "step": 4454 + }, + { + "epoch": 0.07698023223666021, + "grad_norm": 1.1276415599807883, + "learning_rate": 1.988453147011903e-05, + "loss": 0.7118, + "step": 4455 + }, + { + "epoch": 0.07699751175006912, + "grad_norm": 1.070840494481606, + "learning_rate": 1.988444665272735e-05, + "loss": 0.8253, + "step": 4456 + }, + { + "epoch": 0.07701479126347802, + "grad_norm": 0.9790769548686997, + "learning_rate": 1.988436180437683e-05, + "loss": 0.7973, + "step": 4457 + }, + { + "epoch": 0.07703207077688692, + "grad_norm": 1.1052148036425313, + "learning_rate": 1.9884276925067738e-05, + "loss": 0.6399, + "step": 4458 + }, + { + "epoch": 0.07704935029029583, + "grad_norm": 0.9964184410238269, + "learning_rate": 1.988419201480034e-05, + "loss": 0.7158, + "step": 4459 + }, + { + "epoch": 0.07706662980370473, + "grad_norm": 0.9371586303620534, + "learning_rate": 1.9884107073574902e-05, + "loss": 0.6302, + "step": 4460 + }, + { + "epoch": 0.07708390931711363, + "grad_norm": 1.276497927773969, + "learning_rate": 1.9884022101391685e-05, + "loss": 0.8561, + "step": 4461 + }, + { + "epoch": 0.07710118883052253, + "grad_norm": 1.1388578074452278, + "learning_rate": 1.9883937098250962e-05, + "loss": 0.6221, + "step": 4462 + }, + { + "epoch": 0.07711846834393143, + "grad_norm": 1.1611941786162083, + "learning_rate": 1.9883852064152998e-05, + "loss": 0.7541, + "step": 4463 + }, + { + "epoch": 0.07713574785734034, + "grad_norm": 0.852112037095902, + "learning_rate": 1.988376699909806e-05, + "loss": 0.8067, + "step": 4464 + }, + { + "epoch": 0.07715302737074924, + "grad_norm": 1.0118249793490375, + "learning_rate": 1.988368190308641e-05, + "loss": 0.7639, + "step": 4465 + }, + { + "epoch": 0.07717030688415814, + "grad_norm": 1.0265983668910734, + "learning_rate": 1.988359677611832e-05, + "loss": 0.6135, + "step": 4466 + }, + { + "epoch": 0.07718758639756705, + "grad_norm": 0.8841769359001794, + "learning_rate": 1.9883511618194048e-05, + "loss": 0.7728, + "step": 4467 + }, + { + "epoch": 0.07720486591097594, + "grad_norm": 1.3941413778918932, + "learning_rate": 1.9883426429313866e-05, + "loss": 1.0774, + "step": 4468 + }, + { + "epoch": 0.07722214542438485, + "grad_norm": 1.1610507526242717, + "learning_rate": 1.988334120947805e-05, + "loss": 0.6786, + "step": 4469 + }, + { + "epoch": 0.07723942493779375, + "grad_norm": 0.8161429232257034, + "learning_rate": 1.9883255958686852e-05, + "loss": 0.6441, + "step": 4470 + }, + { + "epoch": 0.07725670445120265, + "grad_norm": 1.3585243252875627, + "learning_rate": 1.9883170676940543e-05, + "loss": 0.8365, + "step": 4471 + }, + { + "epoch": 0.07727398396461156, + "grad_norm": 1.130417414834433, + "learning_rate": 1.98830853642394e-05, + "loss": 0.7112, + "step": 4472 + }, + { + "epoch": 0.07729126347802046, + "grad_norm": 0.9992760581026678, + "learning_rate": 1.9883000020583676e-05, + "loss": 0.6112, + "step": 4473 + }, + { + "epoch": 0.07730854299142936, + "grad_norm": 1.1561535989973053, + "learning_rate": 1.988291464597365e-05, + "loss": 0.6949, + "step": 4474 + }, + { + "epoch": 0.07732582250483826, + "grad_norm": 0.843227420084467, + "learning_rate": 1.9882829240409582e-05, + "loss": 0.7334, + "step": 4475 + }, + { + "epoch": 0.07734310201824716, + "grad_norm": 0.8975314043607515, + "learning_rate": 1.9882743803891736e-05, + "loss": 0.9743, + "step": 4476 + }, + { + "epoch": 0.07736038153165607, + "grad_norm": 0.6622313204362698, + "learning_rate": 1.9882658336420392e-05, + "loss": 0.7286, + "step": 4477 + }, + { + "epoch": 0.07737766104506497, + "grad_norm": 0.8853935572764109, + "learning_rate": 1.988257283799581e-05, + "loss": 0.6192, + "step": 4478 + }, + { + "epoch": 0.07739494055847387, + "grad_norm": 1.2125066482093305, + "learning_rate": 1.988248730861826e-05, + "loss": 0.9047, + "step": 4479 + }, + { + "epoch": 0.07741222007188278, + "grad_norm": 1.1106612360135744, + "learning_rate": 1.988240174828801e-05, + "loss": 0.9042, + "step": 4480 + }, + { + "epoch": 0.07742949958529168, + "grad_norm": 0.9640100848432931, + "learning_rate": 1.988231615700532e-05, + "loss": 0.8331, + "step": 4481 + }, + { + "epoch": 0.07744677909870058, + "grad_norm": 1.0339265434250975, + "learning_rate": 1.988223053477047e-05, + "loss": 0.7743, + "step": 4482 + }, + { + "epoch": 0.07746405861210948, + "grad_norm": 1.1490544475448012, + "learning_rate": 1.988214488158372e-05, + "loss": 0.9307, + "step": 4483 + }, + { + "epoch": 0.07748133812551838, + "grad_norm": 0.8884299732944537, + "learning_rate": 1.9882059197445344e-05, + "loss": 0.6007, + "step": 4484 + }, + { + "epoch": 0.07749861763892729, + "grad_norm": 0.8903691201141426, + "learning_rate": 1.9881973482355607e-05, + "loss": 0.686, + "step": 4485 + }, + { + "epoch": 0.07751589715233619, + "grad_norm": 1.0618351056814181, + "learning_rate": 1.9881887736314775e-05, + "loss": 0.7385, + "step": 4486 + }, + { + "epoch": 0.0775331766657451, + "grad_norm": 1.0661822604377102, + "learning_rate": 1.9881801959323122e-05, + "loss": 0.7283, + "step": 4487 + }, + { + "epoch": 0.077550456179154, + "grad_norm": 1.0726088965258906, + "learning_rate": 1.9881716151380915e-05, + "loss": 0.8032, + "step": 4488 + }, + { + "epoch": 0.0775677356925629, + "grad_norm": 1.0965392523968711, + "learning_rate": 1.988163031248842e-05, + "loss": 0.7889, + "step": 4489 + }, + { + "epoch": 0.0775850152059718, + "grad_norm": 0.9428640627821442, + "learning_rate": 1.988154444264591e-05, + "loss": 0.4637, + "step": 4490 + }, + { + "epoch": 0.0776022947193807, + "grad_norm": 0.9003294270159331, + "learning_rate": 1.9881458541853652e-05, + "loss": 0.5127, + "step": 4491 + }, + { + "epoch": 0.0776195742327896, + "grad_norm": 1.1207953765996688, + "learning_rate": 1.9881372610111912e-05, + "loss": 0.6395, + "step": 4492 + }, + { + "epoch": 0.07763685374619851, + "grad_norm": 1.2996632131845387, + "learning_rate": 1.9881286647420967e-05, + "loss": 0.8378, + "step": 4493 + }, + { + "epoch": 0.0776541332596074, + "grad_norm": 1.0486654811207723, + "learning_rate": 1.9881200653781078e-05, + "loss": 0.8, + "step": 4494 + }, + { + "epoch": 0.07767141277301631, + "grad_norm": 1.1020874357447794, + "learning_rate": 1.9881114629192516e-05, + "loss": 0.8434, + "step": 4495 + }, + { + "epoch": 0.07768869228642522, + "grad_norm": 0.9432266044011448, + "learning_rate": 1.9881028573655555e-05, + "loss": 0.89, + "step": 4496 + }, + { + "epoch": 0.07770597179983411, + "grad_norm": 0.9257562549606562, + "learning_rate": 1.988094248717046e-05, + "loss": 0.6453, + "step": 4497 + }, + { + "epoch": 0.07772325131324302, + "grad_norm": 1.037964863653608, + "learning_rate": 1.9880856369737502e-05, + "loss": 0.8182, + "step": 4498 + }, + { + "epoch": 0.07774053082665192, + "grad_norm": 1.0533087979325109, + "learning_rate": 1.9880770221356952e-05, + "loss": 0.6373, + "step": 4499 + }, + { + "epoch": 0.07775781034006082, + "grad_norm": 0.4982006928059352, + "learning_rate": 1.988068404202908e-05, + "loss": 0.6275, + "step": 4500 + }, + { + "epoch": 0.07777508985346973, + "grad_norm": 1.038313663711969, + "learning_rate": 1.9880597831754153e-05, + "loss": 0.7246, + "step": 4501 + }, + { + "epoch": 0.07779236936687862, + "grad_norm": 0.9073990216372376, + "learning_rate": 1.9880511590532442e-05, + "loss": 0.8898, + "step": 4502 + }, + { + "epoch": 0.07780964888028753, + "grad_norm": 1.0997470114491705, + "learning_rate": 1.9880425318364217e-05, + "loss": 0.8245, + "step": 4503 + }, + { + "epoch": 0.07782692839369644, + "grad_norm": 0.9948294573894891, + "learning_rate": 1.988033901524975e-05, + "loss": 0.5541, + "step": 4504 + }, + { + "epoch": 0.07784420790710533, + "grad_norm": 0.981033408122749, + "learning_rate": 1.9880252681189313e-05, + "loss": 0.5486, + "step": 4505 + }, + { + "epoch": 0.07786148742051424, + "grad_norm": 1.0528309557918232, + "learning_rate": 1.988016631618317e-05, + "loss": 0.88, + "step": 4506 + }, + { + "epoch": 0.07787876693392314, + "grad_norm": 0.9988638539488676, + "learning_rate": 1.98800799202316e-05, + "loss": 0.7305, + "step": 4507 + }, + { + "epoch": 0.07789604644733204, + "grad_norm": 0.8997764528060166, + "learning_rate": 1.9879993493334862e-05, + "loss": 0.7338, + "step": 4508 + }, + { + "epoch": 0.07791332596074095, + "grad_norm": 1.0959609034791846, + "learning_rate": 1.9879907035493237e-05, + "loss": 0.6212, + "step": 4509 + }, + { + "epoch": 0.07793060547414984, + "grad_norm": 1.000310156145004, + "learning_rate": 1.9879820546706994e-05, + "loss": 0.9624, + "step": 4510 + }, + { + "epoch": 0.07794788498755875, + "grad_norm": 1.0655090554972753, + "learning_rate": 1.9879734026976402e-05, + "loss": 0.6901, + "step": 4511 + }, + { + "epoch": 0.07796516450096765, + "grad_norm": 1.2024645339299054, + "learning_rate": 1.9879647476301728e-05, + "loss": 0.7066, + "step": 4512 + }, + { + "epoch": 0.07798244401437655, + "grad_norm": 0.7608262228737276, + "learning_rate": 1.987956089468325e-05, + "loss": 0.6245, + "step": 4513 + }, + { + "epoch": 0.07799972352778546, + "grad_norm": 1.0096595013801244, + "learning_rate": 1.9879474282121237e-05, + "loss": 0.6721, + "step": 4514 + }, + { + "epoch": 0.07801700304119435, + "grad_norm": 0.8081174422296072, + "learning_rate": 1.987938763861596e-05, + "loss": 0.6612, + "step": 4515 + }, + { + "epoch": 0.07803428255460326, + "grad_norm": 1.2364918174122026, + "learning_rate": 1.9879300964167688e-05, + "loss": 0.7255, + "step": 4516 + }, + { + "epoch": 0.07805156206801217, + "grad_norm": 0.6076894244513852, + "learning_rate": 1.9879214258776696e-05, + "loss": 0.6505, + "step": 4517 + }, + { + "epoch": 0.07806884158142106, + "grad_norm": 0.7576333506786945, + "learning_rate": 1.9879127522443255e-05, + "loss": 0.7168, + "step": 4518 + }, + { + "epoch": 0.07808612109482997, + "grad_norm": 0.8369512179237281, + "learning_rate": 1.9879040755167637e-05, + "loss": 0.5936, + "step": 4519 + }, + { + "epoch": 0.07810340060823887, + "grad_norm": 1.3169756404161894, + "learning_rate": 1.987895395695011e-05, + "loss": 1.0346, + "step": 4520 + }, + { + "epoch": 0.07812068012164777, + "grad_norm": 1.182547854084032, + "learning_rate": 1.987886712779095e-05, + "loss": 0.7861, + "step": 4521 + }, + { + "epoch": 0.07813795963505668, + "grad_norm": 1.0103139707692048, + "learning_rate": 1.9878780267690427e-05, + "loss": 0.8295, + "step": 4522 + }, + { + "epoch": 0.07815523914846557, + "grad_norm": 0.9130855765884779, + "learning_rate": 1.987869337664881e-05, + "loss": 0.8926, + "step": 4523 + }, + { + "epoch": 0.07817251866187448, + "grad_norm": 1.1378101383599382, + "learning_rate": 1.9878606454666377e-05, + "loss": 0.6624, + "step": 4524 + }, + { + "epoch": 0.07818979817528339, + "grad_norm": 0.8274894241586623, + "learning_rate": 1.98785195017434e-05, + "loss": 0.9313, + "step": 4525 + }, + { + "epoch": 0.07820707768869228, + "grad_norm": 0.8251028397414164, + "learning_rate": 1.987843251788015e-05, + "loss": 0.5727, + "step": 4526 + }, + { + "epoch": 0.07822435720210119, + "grad_norm": 1.0221106003304963, + "learning_rate": 1.987834550307689e-05, + "loss": 0.8923, + "step": 4527 + }, + { + "epoch": 0.07824163671551009, + "grad_norm": 1.3359598778963968, + "learning_rate": 1.987825845733391e-05, + "loss": 0.9012, + "step": 4528 + }, + { + "epoch": 0.07825891622891899, + "grad_norm": 0.8866751778309768, + "learning_rate": 1.9878171380651473e-05, + "loss": 0.5702, + "step": 4529 + }, + { + "epoch": 0.0782761957423279, + "grad_norm": 1.0180084670351324, + "learning_rate": 1.987808427302985e-05, + "loss": 0.669, + "step": 4530 + }, + { + "epoch": 0.0782934752557368, + "grad_norm": 0.8294652114837906, + "learning_rate": 1.9877997134469316e-05, + "loss": 0.6318, + "step": 4531 + }, + { + "epoch": 0.0783107547691457, + "grad_norm": 0.9410043778189443, + "learning_rate": 1.9877909964970145e-05, + "loss": 0.8075, + "step": 4532 + }, + { + "epoch": 0.07832803428255461, + "grad_norm": 1.2862950567021756, + "learning_rate": 1.987782276453261e-05, + "loss": 0.7889, + "step": 4533 + }, + { + "epoch": 0.0783453137959635, + "grad_norm": 1.3811091182389785, + "learning_rate": 1.987773553315698e-05, + "loss": 0.7937, + "step": 4534 + }, + { + "epoch": 0.07836259330937241, + "grad_norm": 1.1092115507691294, + "learning_rate": 1.9877648270843537e-05, + "loss": 0.8965, + "step": 4535 + }, + { + "epoch": 0.0783798728227813, + "grad_norm": 1.0447608898908203, + "learning_rate": 1.9877560977592547e-05, + "loss": 0.6825, + "step": 4536 + }, + { + "epoch": 0.07839715233619021, + "grad_norm": 1.0699575919349653, + "learning_rate": 1.9877473653404285e-05, + "loss": 0.8642, + "step": 4537 + }, + { + "epoch": 0.07841443184959912, + "grad_norm": 0.8694876237702958, + "learning_rate": 1.9877386298279023e-05, + "loss": 0.885, + "step": 4538 + }, + { + "epoch": 0.07843171136300801, + "grad_norm": 0.8255428277135445, + "learning_rate": 1.987729891221704e-05, + "loss": 0.6729, + "step": 4539 + }, + { + "epoch": 0.07844899087641692, + "grad_norm": 0.8654226025399401, + "learning_rate": 1.9877211495218606e-05, + "loss": 0.5471, + "step": 4540 + }, + { + "epoch": 0.07846627038982583, + "grad_norm": 0.870973508942007, + "learning_rate": 1.987712404728399e-05, + "loss": 0.5395, + "step": 4541 + }, + { + "epoch": 0.07848354990323472, + "grad_norm": 0.8422561441470143, + "learning_rate": 1.9877036568413475e-05, + "loss": 0.4936, + "step": 4542 + }, + { + "epoch": 0.07850082941664363, + "grad_norm": 0.9514317744891839, + "learning_rate": 1.987694905860733e-05, + "loss": 0.5469, + "step": 4543 + }, + { + "epoch": 0.07851810893005252, + "grad_norm": 0.8826382548632162, + "learning_rate": 1.987686151786583e-05, + "loss": 0.7753, + "step": 4544 + }, + { + "epoch": 0.07853538844346143, + "grad_norm": 0.931143498426523, + "learning_rate": 1.987677394618925e-05, + "loss": 0.5975, + "step": 4545 + }, + { + "epoch": 0.07855266795687034, + "grad_norm": 0.8972330829153673, + "learning_rate": 1.987668634357786e-05, + "loss": 0.8866, + "step": 4546 + }, + { + "epoch": 0.07856994747027923, + "grad_norm": 0.9800328198683205, + "learning_rate": 1.987659871003194e-05, + "loss": 0.7883, + "step": 4547 + }, + { + "epoch": 0.07858722698368814, + "grad_norm": 0.5070688196590623, + "learning_rate": 1.9876511045551764e-05, + "loss": 0.6942, + "step": 4548 + }, + { + "epoch": 0.07860450649709705, + "grad_norm": 1.270821886719666, + "learning_rate": 1.98764233501376e-05, + "loss": 0.7412, + "step": 4549 + }, + { + "epoch": 0.07862178601050594, + "grad_norm": 1.2179569235723176, + "learning_rate": 1.9876335623789734e-05, + "loss": 0.8773, + "step": 4550 + }, + { + "epoch": 0.07863906552391485, + "grad_norm": 0.7023605957806773, + "learning_rate": 1.987624786650843e-05, + "loss": 0.4875, + "step": 4551 + }, + { + "epoch": 0.07865634503732374, + "grad_norm": 0.9892565376664293, + "learning_rate": 1.9876160078293968e-05, + "loss": 0.7397, + "step": 4552 + }, + { + "epoch": 0.07867362455073265, + "grad_norm": 0.672352310774825, + "learning_rate": 1.9876072259146624e-05, + "loss": 0.6756, + "step": 4553 + }, + { + "epoch": 0.07869090406414156, + "grad_norm": 0.9212337245257427, + "learning_rate": 1.9875984409066668e-05, + "loss": 0.8562, + "step": 4554 + }, + { + "epoch": 0.07870818357755045, + "grad_norm": 0.9657132682664068, + "learning_rate": 1.987589652805438e-05, + "loss": 0.6803, + "step": 4555 + }, + { + "epoch": 0.07872546309095936, + "grad_norm": 0.5367499206396344, + "learning_rate": 1.987580861611003e-05, + "loss": 0.7839, + "step": 4556 + }, + { + "epoch": 0.07874274260436825, + "grad_norm": 0.8952496915372917, + "learning_rate": 1.9875720673233904e-05, + "loss": 0.7773, + "step": 4557 + }, + { + "epoch": 0.07876002211777716, + "grad_norm": 0.4990921949352895, + "learning_rate": 1.9875632699426263e-05, + "loss": 0.5662, + "step": 4558 + }, + { + "epoch": 0.07877730163118607, + "grad_norm": 0.8440680835062434, + "learning_rate": 1.9875544694687394e-05, + "loss": 0.742, + "step": 4559 + }, + { + "epoch": 0.07879458114459496, + "grad_norm": 1.1261807062152889, + "learning_rate": 1.987545665901757e-05, + "loss": 0.7757, + "step": 4560 + }, + { + "epoch": 0.07881186065800387, + "grad_norm": 0.5871417672104439, + "learning_rate": 1.987536859241706e-05, + "loss": 0.5313, + "step": 4561 + }, + { + "epoch": 0.07882914017141278, + "grad_norm": 1.0382731774909217, + "learning_rate": 1.987528049488615e-05, + "loss": 0.6284, + "step": 4562 + }, + { + "epoch": 0.07884641968482167, + "grad_norm": 1.001244782428285, + "learning_rate": 1.987519236642511e-05, + "loss": 0.8784, + "step": 4563 + }, + { + "epoch": 0.07886369919823058, + "grad_norm": 0.9899246965751202, + "learning_rate": 1.9875104207034215e-05, + "loss": 0.8807, + "step": 4564 + }, + { + "epoch": 0.07888097871163947, + "grad_norm": 1.200329780973091, + "learning_rate": 1.987501601671374e-05, + "loss": 0.9374, + "step": 4565 + }, + { + "epoch": 0.07889825822504838, + "grad_norm": 0.7780957652722547, + "learning_rate": 1.987492779546397e-05, + "loss": 0.6647, + "step": 4566 + }, + { + "epoch": 0.07891553773845729, + "grad_norm": 1.1337638343140355, + "learning_rate": 1.9874839543285175e-05, + "loss": 0.7001, + "step": 4567 + }, + { + "epoch": 0.07893281725186618, + "grad_norm": 1.1480052516639005, + "learning_rate": 1.9874751260177633e-05, + "loss": 0.9593, + "step": 4568 + }, + { + "epoch": 0.07895009676527509, + "grad_norm": 0.7464370806305815, + "learning_rate": 1.9874662946141617e-05, + "loss": 0.527, + "step": 4569 + }, + { + "epoch": 0.078967376278684, + "grad_norm": 0.9016089385827434, + "learning_rate": 1.9874574601177406e-05, + "loss": 0.7587, + "step": 4570 + }, + { + "epoch": 0.07898465579209289, + "grad_norm": 1.3548148797874147, + "learning_rate": 1.9874486225285278e-05, + "loss": 0.7835, + "step": 4571 + }, + { + "epoch": 0.0790019353055018, + "grad_norm": 1.0804568717449037, + "learning_rate": 1.987439781846551e-05, + "loss": 0.768, + "step": 4572 + }, + { + "epoch": 0.0790192148189107, + "grad_norm": 1.003154488638334, + "learning_rate": 1.9874309380718374e-05, + "loss": 0.5862, + "step": 4573 + }, + { + "epoch": 0.0790364943323196, + "grad_norm": 1.460290452357465, + "learning_rate": 1.9874220912044154e-05, + "loss": 0.6682, + "step": 4574 + }, + { + "epoch": 0.07905377384572851, + "grad_norm": 1.0877423774829258, + "learning_rate": 1.9874132412443125e-05, + "loss": 0.9593, + "step": 4575 + }, + { + "epoch": 0.0790710533591374, + "grad_norm": 0.8248147784600218, + "learning_rate": 1.9874043881915557e-05, + "loss": 0.712, + "step": 4576 + }, + { + "epoch": 0.07908833287254631, + "grad_norm": 0.7995751794667683, + "learning_rate": 1.9873955320461736e-05, + "loss": 0.716, + "step": 4577 + }, + { + "epoch": 0.07910561238595522, + "grad_norm": 0.7641721379970725, + "learning_rate": 1.987386672808194e-05, + "loss": 0.4891, + "step": 4578 + }, + { + "epoch": 0.07912289189936411, + "grad_norm": 0.6765805434230435, + "learning_rate": 1.9873778104776437e-05, + "loss": 0.7254, + "step": 4579 + }, + { + "epoch": 0.07914017141277302, + "grad_norm": 0.9223115449511168, + "learning_rate": 1.9873689450545518e-05, + "loss": 0.714, + "step": 4580 + }, + { + "epoch": 0.07915745092618191, + "grad_norm": 1.0878783905085774, + "learning_rate": 1.9873600765389446e-05, + "loss": 0.8533, + "step": 4581 + }, + { + "epoch": 0.07917473043959082, + "grad_norm": 0.9658870353713509, + "learning_rate": 1.987351204930851e-05, + "loss": 0.654, + "step": 4582 + }, + { + "epoch": 0.07919200995299973, + "grad_norm": 1.0149301865530571, + "learning_rate": 1.9873423302302984e-05, + "loss": 0.576, + "step": 4583 + }, + { + "epoch": 0.07920928946640862, + "grad_norm": 0.9072198133822934, + "learning_rate": 1.9873334524373145e-05, + "loss": 0.6963, + "step": 4584 + }, + { + "epoch": 0.07922656897981753, + "grad_norm": 0.6401396395114722, + "learning_rate": 1.987324571551927e-05, + "loss": 0.6952, + "step": 4585 + }, + { + "epoch": 0.07924384849322644, + "grad_norm": 0.7146783492210484, + "learning_rate": 1.9873156875741643e-05, + "loss": 0.5614, + "step": 4586 + }, + { + "epoch": 0.07926112800663533, + "grad_norm": 0.9795730609146673, + "learning_rate": 1.9873068005040534e-05, + "loss": 0.6636, + "step": 4587 + }, + { + "epoch": 0.07927840752004424, + "grad_norm": 0.8427572375133734, + "learning_rate": 1.9872979103416228e-05, + "loss": 0.7174, + "step": 4588 + }, + { + "epoch": 0.07929568703345313, + "grad_norm": 0.9190202324546067, + "learning_rate": 1.9872890170869002e-05, + "loss": 0.5958, + "step": 4589 + }, + { + "epoch": 0.07931296654686204, + "grad_norm": 1.0205050955702628, + "learning_rate": 1.987280120739913e-05, + "loss": 0.8787, + "step": 4590 + }, + { + "epoch": 0.07933024606027095, + "grad_norm": 0.9917657761401902, + "learning_rate": 1.98727122130069e-05, + "loss": 0.5879, + "step": 4591 + }, + { + "epoch": 0.07934752557367984, + "grad_norm": 0.8682139334775547, + "learning_rate": 1.987262318769258e-05, + "loss": 0.7075, + "step": 4592 + }, + { + "epoch": 0.07936480508708875, + "grad_norm": 1.0475486341050249, + "learning_rate": 1.9872534131456457e-05, + "loss": 0.6912, + "step": 4593 + }, + { + "epoch": 0.07938208460049764, + "grad_norm": 0.96199440899721, + "learning_rate": 1.9872445044298806e-05, + "loss": 0.784, + "step": 4594 + }, + { + "epoch": 0.07939936411390655, + "grad_norm": 0.9734804364415199, + "learning_rate": 1.9872355926219906e-05, + "loss": 0.7958, + "step": 4595 + }, + { + "epoch": 0.07941664362731546, + "grad_norm": 0.7810207428975681, + "learning_rate": 1.9872266777220038e-05, + "loss": 0.6326, + "step": 4596 + }, + { + "epoch": 0.07943392314072435, + "grad_norm": 1.035692950920092, + "learning_rate": 1.987217759729948e-05, + "loss": 0.6844, + "step": 4597 + }, + { + "epoch": 0.07945120265413326, + "grad_norm": 1.316796980145438, + "learning_rate": 1.987208838645851e-05, + "loss": 0.8219, + "step": 4598 + }, + { + "epoch": 0.07946848216754217, + "grad_norm": 0.9668862192020031, + "learning_rate": 1.9871999144697408e-05, + "loss": 0.6689, + "step": 4599 + }, + { + "epoch": 0.07948576168095106, + "grad_norm": 0.8992456243312889, + "learning_rate": 1.987190987201646e-05, + "loss": 0.6995, + "step": 4600 + }, + { + "epoch": 0.07950304119435997, + "grad_norm": 0.615940097213367, + "learning_rate": 1.9871820568415936e-05, + "loss": 0.7968, + "step": 4601 + }, + { + "epoch": 0.07952032070776886, + "grad_norm": 1.0900055734459964, + "learning_rate": 1.987173123389612e-05, + "loss": 0.7076, + "step": 4602 + }, + { + "epoch": 0.07953760022117777, + "grad_norm": 0.9584378611059376, + "learning_rate": 1.987164186845729e-05, + "loss": 0.7316, + "step": 4603 + }, + { + "epoch": 0.07955487973458668, + "grad_norm": 0.6407863789453061, + "learning_rate": 1.9871552472099732e-05, + "loss": 0.5584, + "step": 4604 + }, + { + "epoch": 0.07957215924799557, + "grad_norm": 0.8166024457827998, + "learning_rate": 1.9871463044823716e-05, + "loss": 0.8158, + "step": 4605 + }, + { + "epoch": 0.07958943876140448, + "grad_norm": 0.9681418812876194, + "learning_rate": 1.987137358662953e-05, + "loss": 0.9101, + "step": 4606 + }, + { + "epoch": 0.07960671827481339, + "grad_norm": 1.2054188810698843, + "learning_rate": 1.9871284097517453e-05, + "loss": 0.7911, + "step": 4607 + }, + { + "epoch": 0.07962399778822228, + "grad_norm": 0.6558406270995196, + "learning_rate": 1.9871194577487765e-05, + "loss": 0.5602, + "step": 4608 + }, + { + "epoch": 0.07964127730163119, + "grad_norm": 0.8224915191950916, + "learning_rate": 1.9871105026540745e-05, + "loss": 0.6259, + "step": 4609 + }, + { + "epoch": 0.07965855681504008, + "grad_norm": 0.7752601996028176, + "learning_rate": 1.9871015444676672e-05, + "loss": 0.5991, + "step": 4610 + }, + { + "epoch": 0.07967583632844899, + "grad_norm": 0.8118552541069235, + "learning_rate": 1.9870925831895826e-05, + "loss": 0.7583, + "step": 4611 + }, + { + "epoch": 0.0796931158418579, + "grad_norm": 1.0414645951451367, + "learning_rate": 1.9870836188198495e-05, + "loss": 0.8244, + "step": 4612 + }, + { + "epoch": 0.07971039535526679, + "grad_norm": 0.7792808967212235, + "learning_rate": 1.987074651358495e-05, + "loss": 0.8251, + "step": 4613 + }, + { + "epoch": 0.0797276748686757, + "grad_norm": 0.9391018368093705, + "learning_rate": 1.9870656808055482e-05, + "loss": 0.5685, + "step": 4614 + }, + { + "epoch": 0.0797449543820846, + "grad_norm": 1.1835971135327177, + "learning_rate": 1.9870567071610366e-05, + "loss": 0.9862, + "step": 4615 + }, + { + "epoch": 0.0797622338954935, + "grad_norm": 0.9274360645784261, + "learning_rate": 1.987047730424988e-05, + "loss": 0.6706, + "step": 4616 + }, + { + "epoch": 0.07977951340890241, + "grad_norm": 0.8468045618274509, + "learning_rate": 1.987038750597431e-05, + "loss": 0.7006, + "step": 4617 + }, + { + "epoch": 0.0797967929223113, + "grad_norm": 1.0638494317542797, + "learning_rate": 1.987029767678394e-05, + "loss": 0.7413, + "step": 4618 + }, + { + "epoch": 0.07981407243572021, + "grad_norm": 0.975656246690044, + "learning_rate": 1.9870207816679045e-05, + "loss": 0.5532, + "step": 4619 + }, + { + "epoch": 0.07983135194912912, + "grad_norm": 0.8738218354276399, + "learning_rate": 1.9870117925659907e-05, + "loss": 0.5363, + "step": 4620 + }, + { + "epoch": 0.07984863146253801, + "grad_norm": 0.9125125104546103, + "learning_rate": 1.987002800372681e-05, + "loss": 1.0729, + "step": 4621 + }, + { + "epoch": 0.07986591097594692, + "grad_norm": 0.5413615790563833, + "learning_rate": 1.986993805088004e-05, + "loss": 0.7788, + "step": 4622 + }, + { + "epoch": 0.07988319048935583, + "grad_norm": 1.06458212371007, + "learning_rate": 1.986984806711987e-05, + "loss": 0.8417, + "step": 4623 + }, + { + "epoch": 0.07990047000276472, + "grad_norm": 0.8573873988263365, + "learning_rate": 1.9869758052446585e-05, + "loss": 0.8111, + "step": 4624 + }, + { + "epoch": 0.07991774951617363, + "grad_norm": 1.0379856673521946, + "learning_rate": 1.986966800686047e-05, + "loss": 0.6349, + "step": 4625 + }, + { + "epoch": 0.07993502902958252, + "grad_norm": 0.953908497966866, + "learning_rate": 1.9869577930361803e-05, + "loss": 0.6093, + "step": 4626 + }, + { + "epoch": 0.07995230854299143, + "grad_norm": 1.0562373897798476, + "learning_rate": 1.986948782295087e-05, + "loss": 0.7515, + "step": 4627 + }, + { + "epoch": 0.07996958805640034, + "grad_norm": 0.8516818050950367, + "learning_rate": 1.9869397684627947e-05, + "loss": 0.6778, + "step": 4628 + }, + { + "epoch": 0.07998686756980923, + "grad_norm": 0.7473635881520484, + "learning_rate": 1.9869307515393323e-05, + "loss": 0.458, + "step": 4629 + }, + { + "epoch": 0.08000414708321814, + "grad_norm": 0.7777248199684436, + "learning_rate": 1.986921731524728e-05, + "loss": 0.6024, + "step": 4630 + }, + { + "epoch": 0.08002142659662703, + "grad_norm": 1.1149231343484791, + "learning_rate": 1.9869127084190098e-05, + "loss": 0.71, + "step": 4631 + }, + { + "epoch": 0.08003870611003594, + "grad_norm": 0.9473150481336111, + "learning_rate": 1.9869036822222055e-05, + "loss": 0.8223, + "step": 4632 + }, + { + "epoch": 0.08005598562344485, + "grad_norm": 1.1928965280676458, + "learning_rate": 1.9868946529343443e-05, + "loss": 0.9016, + "step": 4633 + }, + { + "epoch": 0.08007326513685374, + "grad_norm": 1.0046513514818645, + "learning_rate": 1.986885620555454e-05, + "loss": 0.8517, + "step": 4634 + }, + { + "epoch": 0.08009054465026265, + "grad_norm": 1.2183063762457613, + "learning_rate": 1.986876585085563e-05, + "loss": 0.7721, + "step": 4635 + }, + { + "epoch": 0.08010782416367156, + "grad_norm": 0.9945919740446517, + "learning_rate": 1.9868675465246992e-05, + "loss": 0.6597, + "step": 4636 + }, + { + "epoch": 0.08012510367708045, + "grad_norm": 0.8663463694200159, + "learning_rate": 1.9868585048728915e-05, + "loss": 0.7853, + "step": 4637 + }, + { + "epoch": 0.08014238319048936, + "grad_norm": 0.9149955904744326, + "learning_rate": 1.9868494601301677e-05, + "loss": 0.7457, + "step": 4638 + }, + { + "epoch": 0.08015966270389825, + "grad_norm": 0.9800426009483969, + "learning_rate": 1.986840412296557e-05, + "loss": 0.6099, + "step": 4639 + }, + { + "epoch": 0.08017694221730716, + "grad_norm": 1.0358285893714934, + "learning_rate": 1.9868313613720866e-05, + "loss": 0.7387, + "step": 4640 + }, + { + "epoch": 0.08019422173071607, + "grad_norm": 1.0260623021428457, + "learning_rate": 1.9868223073567858e-05, + "loss": 0.6086, + "step": 4641 + }, + { + "epoch": 0.08021150124412496, + "grad_norm": 0.851347728741094, + "learning_rate": 1.9868132502506817e-05, + "loss": 0.476, + "step": 4642 + }, + { + "epoch": 0.08022878075753387, + "grad_norm": 0.963389495615814, + "learning_rate": 1.9868041900538042e-05, + "loss": 0.651, + "step": 4643 + }, + { + "epoch": 0.08024606027094278, + "grad_norm": 1.0093604420305284, + "learning_rate": 1.9867951267661808e-05, + "loss": 0.6722, + "step": 4644 + }, + { + "epoch": 0.08026333978435167, + "grad_norm": 1.0264067335415838, + "learning_rate": 1.98678606038784e-05, + "loss": 1.0354, + "step": 4645 + }, + { + "epoch": 0.08028061929776058, + "grad_norm": 1.1601149875545718, + "learning_rate": 1.9867769909188105e-05, + "loss": 0.7448, + "step": 4646 + }, + { + "epoch": 0.08029789881116947, + "grad_norm": 0.9610223797447246, + "learning_rate": 1.9867679183591205e-05, + "loss": 0.8032, + "step": 4647 + }, + { + "epoch": 0.08031517832457838, + "grad_norm": 1.7557817357889933, + "learning_rate": 1.986758842708798e-05, + "loss": 0.6356, + "step": 4648 + }, + { + "epoch": 0.08033245783798729, + "grad_norm": 0.9757678984500712, + "learning_rate": 1.9867497639678724e-05, + "loss": 0.9697, + "step": 4649 + }, + { + "epoch": 0.08034973735139618, + "grad_norm": 0.9793568090410608, + "learning_rate": 1.9867406821363714e-05, + "loss": 0.7654, + "step": 4650 + }, + { + "epoch": 0.08036701686480509, + "grad_norm": 0.7994656911951041, + "learning_rate": 1.986731597214323e-05, + "loss": 0.6644, + "step": 4651 + }, + { + "epoch": 0.080384296378214, + "grad_norm": 1.0623022895333276, + "learning_rate": 1.9867225092017567e-05, + "loss": 0.7751, + "step": 4652 + }, + { + "epoch": 0.08040157589162289, + "grad_norm": 0.5889377493100316, + "learning_rate": 1.9867134180987005e-05, + "loss": 0.7078, + "step": 4653 + }, + { + "epoch": 0.0804188554050318, + "grad_norm": 0.6884989758687622, + "learning_rate": 1.986704323905183e-05, + "loss": 0.6171, + "step": 4654 + }, + { + "epoch": 0.08043613491844069, + "grad_norm": 0.8769291359278022, + "learning_rate": 1.986695226621232e-05, + "loss": 0.7661, + "step": 4655 + }, + { + "epoch": 0.0804534144318496, + "grad_norm": 0.931232247674008, + "learning_rate": 1.9866861262468772e-05, + "loss": 0.9502, + "step": 4656 + }, + { + "epoch": 0.0804706939452585, + "grad_norm": 0.813179110481146, + "learning_rate": 1.9866770227821463e-05, + "loss": 0.8194, + "step": 4657 + }, + { + "epoch": 0.0804879734586674, + "grad_norm": 0.977556928578564, + "learning_rate": 1.986667916227068e-05, + "loss": 0.7535, + "step": 4658 + }, + { + "epoch": 0.08050525297207631, + "grad_norm": 1.37915387655111, + "learning_rate": 1.9866588065816707e-05, + "loss": 0.7934, + "step": 4659 + }, + { + "epoch": 0.08052253248548522, + "grad_norm": 0.9084268608956266, + "learning_rate": 1.986649693845983e-05, + "loss": 0.7521, + "step": 4660 + }, + { + "epoch": 0.08053981199889411, + "grad_norm": 0.9280619899132351, + "learning_rate": 1.9866405780200337e-05, + "loss": 0.843, + "step": 4661 + }, + { + "epoch": 0.08055709151230302, + "grad_norm": 0.9016223511508055, + "learning_rate": 1.9866314591038508e-05, + "loss": 0.8009, + "step": 4662 + }, + { + "epoch": 0.08057437102571191, + "grad_norm": 0.9868151274426724, + "learning_rate": 1.9866223370974636e-05, + "loss": 0.833, + "step": 4663 + }, + { + "epoch": 0.08059165053912082, + "grad_norm": 0.9885056453463981, + "learning_rate": 1.9866132120008997e-05, + "loss": 0.7364, + "step": 4664 + }, + { + "epoch": 0.08060893005252973, + "grad_norm": 1.3463238133996167, + "learning_rate": 1.986604083814189e-05, + "loss": 0.6675, + "step": 4665 + }, + { + "epoch": 0.08062620956593862, + "grad_norm": 1.6698826304739847, + "learning_rate": 1.9865949525373588e-05, + "loss": 0.979, + "step": 4666 + }, + { + "epoch": 0.08064348907934753, + "grad_norm": 0.821504391881839, + "learning_rate": 1.986585818170438e-05, + "loss": 0.4602, + "step": 4667 + }, + { + "epoch": 0.08066076859275642, + "grad_norm": 0.9249269880148372, + "learning_rate": 1.986576680713456e-05, + "loss": 0.6456, + "step": 4668 + }, + { + "epoch": 0.08067804810616533, + "grad_norm": 1.0129565483296117, + "learning_rate": 1.9865675401664406e-05, + "loss": 0.8398, + "step": 4669 + }, + { + "epoch": 0.08069532761957424, + "grad_norm": 1.0934804683938415, + "learning_rate": 1.9865583965294205e-05, + "loss": 0.8574, + "step": 4670 + }, + { + "epoch": 0.08071260713298313, + "grad_norm": 0.9205431406796315, + "learning_rate": 1.986549249802425e-05, + "loss": 0.7272, + "step": 4671 + }, + { + "epoch": 0.08072988664639204, + "grad_norm": 0.9738057938378161, + "learning_rate": 1.986540099985482e-05, + "loss": 0.7803, + "step": 4672 + }, + { + "epoch": 0.08074716615980095, + "grad_norm": 0.8607376936830607, + "learning_rate": 1.9865309470786203e-05, + "loss": 0.6538, + "step": 4673 + }, + { + "epoch": 0.08076444567320984, + "grad_norm": 0.8453735907342528, + "learning_rate": 1.986521791081869e-05, + "loss": 0.4768, + "step": 4674 + }, + { + "epoch": 0.08078172518661875, + "grad_norm": 0.9745458970087099, + "learning_rate": 1.9865126319952562e-05, + "loss": 0.9854, + "step": 4675 + }, + { + "epoch": 0.08079900470002764, + "grad_norm": 0.8529926163596742, + "learning_rate": 1.986503469818811e-05, + "loss": 0.7032, + "step": 4676 + }, + { + "epoch": 0.08081628421343655, + "grad_norm": 0.9612947206962758, + "learning_rate": 1.986494304552562e-05, + "loss": 0.702, + "step": 4677 + }, + { + "epoch": 0.08083356372684546, + "grad_norm": 0.8827975166308751, + "learning_rate": 1.9864851361965377e-05, + "loss": 0.6967, + "step": 4678 + }, + { + "epoch": 0.08085084324025435, + "grad_norm": 0.950075438777801, + "learning_rate": 1.986475964750767e-05, + "loss": 0.8333, + "step": 4679 + }, + { + "epoch": 0.08086812275366326, + "grad_norm": 0.4976628843821189, + "learning_rate": 1.986466790215279e-05, + "loss": 0.6644, + "step": 4680 + }, + { + "epoch": 0.08088540226707217, + "grad_norm": 0.6919892977770845, + "learning_rate": 1.9864576125901016e-05, + "loss": 0.599, + "step": 4681 + }, + { + "epoch": 0.08090268178048106, + "grad_norm": 0.6299459811750098, + "learning_rate": 1.986448431875264e-05, + "loss": 0.5537, + "step": 4682 + }, + { + "epoch": 0.08091996129388997, + "grad_norm": 0.8238276073410223, + "learning_rate": 1.986439248070795e-05, + "loss": 0.7608, + "step": 4683 + }, + { + "epoch": 0.08093724080729886, + "grad_norm": 0.9130405091030069, + "learning_rate": 1.9864300611767234e-05, + "loss": 0.4546, + "step": 4684 + }, + { + "epoch": 0.08095452032070777, + "grad_norm": 1.1106545039335516, + "learning_rate": 1.986420871193078e-05, + "loss": 0.7412, + "step": 4685 + }, + { + "epoch": 0.08097179983411668, + "grad_norm": 0.9063315136412925, + "learning_rate": 1.9864116781198872e-05, + "loss": 0.6654, + "step": 4686 + }, + { + "epoch": 0.08098907934752557, + "grad_norm": 0.9543309498688924, + "learning_rate": 1.98640248195718e-05, + "loss": 0.7106, + "step": 4687 + }, + { + "epoch": 0.08100635886093448, + "grad_norm": 1.048145798051533, + "learning_rate": 1.986393282704986e-05, + "loss": 0.6667, + "step": 4688 + }, + { + "epoch": 0.08102363837434338, + "grad_norm": 0.9324794528331194, + "learning_rate": 1.9863840803633324e-05, + "loss": 0.8213, + "step": 4689 + }, + { + "epoch": 0.08104091788775228, + "grad_norm": 0.9986553136099832, + "learning_rate": 1.986374874932249e-05, + "loss": 0.5753, + "step": 4690 + }, + { + "epoch": 0.08105819740116119, + "grad_norm": 1.1438337400660121, + "learning_rate": 1.9863656664117653e-05, + "loss": 0.8097, + "step": 4691 + }, + { + "epoch": 0.08107547691457008, + "grad_norm": 1.1506912493429104, + "learning_rate": 1.9863564548019084e-05, + "loss": 0.7839, + "step": 4692 + }, + { + "epoch": 0.08109275642797899, + "grad_norm": 1.0662865060571605, + "learning_rate": 1.986347240102709e-05, + "loss": 0.8237, + "step": 4693 + }, + { + "epoch": 0.0811100359413879, + "grad_norm": 0.7083771160562924, + "learning_rate": 1.9863380223141945e-05, + "loss": 0.6908, + "step": 4694 + }, + { + "epoch": 0.08112731545479679, + "grad_norm": 1.1087154599936337, + "learning_rate": 1.9863288014363945e-05, + "loss": 0.8264, + "step": 4695 + }, + { + "epoch": 0.0811445949682057, + "grad_norm": 1.005964294575507, + "learning_rate": 1.9863195774693375e-05, + "loss": 0.6582, + "step": 4696 + }, + { + "epoch": 0.0811618744816146, + "grad_norm": 0.8452633211840265, + "learning_rate": 1.9863103504130528e-05, + "loss": 0.5093, + "step": 4697 + }, + { + "epoch": 0.0811791539950235, + "grad_norm": 0.9920925072150694, + "learning_rate": 1.9863011202675695e-05, + "loss": 0.6658, + "step": 4698 + }, + { + "epoch": 0.0811964335084324, + "grad_norm": 0.5942549006729877, + "learning_rate": 1.9862918870329157e-05, + "loss": 0.9407, + "step": 4699 + }, + { + "epoch": 0.0812137130218413, + "grad_norm": 1.1052382509701715, + "learning_rate": 1.986282650709121e-05, + "loss": 0.6676, + "step": 4700 + }, + { + "epoch": 0.08123099253525021, + "grad_norm": 0.8688325407697413, + "learning_rate": 1.986273411296214e-05, + "loss": 0.5108, + "step": 4701 + }, + { + "epoch": 0.08124827204865911, + "grad_norm": 0.5844317072216352, + "learning_rate": 1.9862641687942234e-05, + "loss": 0.704, + "step": 4702 + }, + { + "epoch": 0.08126555156206801, + "grad_norm": 1.0443991792397478, + "learning_rate": 1.9862549232031786e-05, + "loss": 0.8895, + "step": 4703 + }, + { + "epoch": 0.08128283107547692, + "grad_norm": 0.7265401980174659, + "learning_rate": 1.9862456745231088e-05, + "loss": 0.6729, + "step": 4704 + }, + { + "epoch": 0.08130011058888582, + "grad_norm": 1.0100166733928675, + "learning_rate": 1.9862364227540425e-05, + "loss": 0.8822, + "step": 4705 + }, + { + "epoch": 0.08131739010229472, + "grad_norm": 1.0841626531138815, + "learning_rate": 1.9862271678960084e-05, + "loss": 0.7969, + "step": 4706 + }, + { + "epoch": 0.08133466961570363, + "grad_norm": 1.022604434124908, + "learning_rate": 1.9862179099490362e-05, + "loss": 0.804, + "step": 4707 + }, + { + "epoch": 0.08135194912911252, + "grad_norm": 0.9443157028187206, + "learning_rate": 1.9862086489131545e-05, + "loss": 0.7696, + "step": 4708 + }, + { + "epoch": 0.08136922864252143, + "grad_norm": 1.179560738749333, + "learning_rate": 1.9861993847883922e-05, + "loss": 0.7826, + "step": 4709 + }, + { + "epoch": 0.08138650815593033, + "grad_norm": 0.9590727905186107, + "learning_rate": 1.9861901175747786e-05, + "loss": 0.8229, + "step": 4710 + }, + { + "epoch": 0.08140378766933923, + "grad_norm": 0.6288436362890586, + "learning_rate": 1.986180847272343e-05, + "loss": 0.5056, + "step": 4711 + }, + { + "epoch": 0.08142106718274814, + "grad_norm": 1.0317577902519648, + "learning_rate": 1.986171573881113e-05, + "loss": 0.9341, + "step": 4712 + }, + { + "epoch": 0.08143834669615703, + "grad_norm": 0.8305969975463204, + "learning_rate": 1.9861622974011195e-05, + "loss": 0.5715, + "step": 4713 + }, + { + "epoch": 0.08145562620956594, + "grad_norm": 1.0902028790653742, + "learning_rate": 1.9861530178323907e-05, + "loss": 0.8394, + "step": 4714 + }, + { + "epoch": 0.08147290572297484, + "grad_norm": 0.8036544558975106, + "learning_rate": 1.9861437351749554e-05, + "loss": 0.4389, + "step": 4715 + }, + { + "epoch": 0.08149018523638374, + "grad_norm": 0.8697182933097652, + "learning_rate": 1.9861344494288434e-05, + "loss": 0.6657, + "step": 4716 + }, + { + "epoch": 0.08150746474979265, + "grad_norm": 1.3516715550067988, + "learning_rate": 1.986125160594083e-05, + "loss": 0.8022, + "step": 4717 + }, + { + "epoch": 0.08152474426320155, + "grad_norm": 0.576812302565167, + "learning_rate": 1.986115868670704e-05, + "loss": 0.6202, + "step": 4718 + }, + { + "epoch": 0.08154202377661045, + "grad_norm": 1.0414963433008333, + "learning_rate": 1.986106573658735e-05, + "loss": 0.6072, + "step": 4719 + }, + { + "epoch": 0.08155930329001936, + "grad_norm": 1.163223040269093, + "learning_rate": 1.9860972755582048e-05, + "loss": 0.6599, + "step": 4720 + }, + { + "epoch": 0.08157658280342825, + "grad_norm": 0.8374169107163839, + "learning_rate": 1.9860879743691435e-05, + "loss": 0.8015, + "step": 4721 + }, + { + "epoch": 0.08159386231683716, + "grad_norm": 0.9699821868256127, + "learning_rate": 1.9860786700915796e-05, + "loss": 0.6929, + "step": 4722 + }, + { + "epoch": 0.08161114183024606, + "grad_norm": 0.8526187606588542, + "learning_rate": 1.9860693627255425e-05, + "loss": 0.6649, + "step": 4723 + }, + { + "epoch": 0.08162842134365496, + "grad_norm": 1.1184722973895709, + "learning_rate": 1.986060052271061e-05, + "loss": 0.8604, + "step": 4724 + }, + { + "epoch": 0.08164570085706387, + "grad_norm": 1.0018076914542025, + "learning_rate": 1.9860507387281644e-05, + "loss": 0.7297, + "step": 4725 + }, + { + "epoch": 0.08166298037047277, + "grad_norm": 1.1882263741780543, + "learning_rate": 1.986041422096882e-05, + "loss": 0.8777, + "step": 4726 + }, + { + "epoch": 0.08168025988388167, + "grad_norm": 0.9869992883828773, + "learning_rate": 1.9860321023772435e-05, + "loss": 0.6433, + "step": 4727 + }, + { + "epoch": 0.08169753939729058, + "grad_norm": 1.0114727922111795, + "learning_rate": 1.9860227795692766e-05, + "loss": 0.6449, + "step": 4728 + }, + { + "epoch": 0.08171481891069947, + "grad_norm": 0.8738119745854122, + "learning_rate": 1.986013453673012e-05, + "loss": 0.586, + "step": 4729 + }, + { + "epoch": 0.08173209842410838, + "grad_norm": 0.7307974182890432, + "learning_rate": 1.9860041246884783e-05, + "loss": 0.647, + "step": 4730 + }, + { + "epoch": 0.08174937793751728, + "grad_norm": 0.8381595919987459, + "learning_rate": 1.9859947926157047e-05, + "loss": 0.6579, + "step": 4731 + }, + { + "epoch": 0.08176665745092618, + "grad_norm": 1.0815763763413209, + "learning_rate": 1.9859854574547205e-05, + "loss": 0.7975, + "step": 4732 + }, + { + "epoch": 0.08178393696433509, + "grad_norm": 1.191383423483034, + "learning_rate": 1.9859761192055548e-05, + "loss": 0.8554, + "step": 4733 + }, + { + "epoch": 0.081801216477744, + "grad_norm": 0.9710257984954053, + "learning_rate": 1.985966777868237e-05, + "loss": 0.7603, + "step": 4734 + }, + { + "epoch": 0.08181849599115289, + "grad_norm": 0.8517980471420087, + "learning_rate": 1.9859574334427965e-05, + "loss": 0.5069, + "step": 4735 + }, + { + "epoch": 0.0818357755045618, + "grad_norm": 0.8628782013544234, + "learning_rate": 1.985948085929262e-05, + "loss": 0.6511, + "step": 4736 + }, + { + "epoch": 0.08185305501797069, + "grad_norm": 1.039514893917108, + "learning_rate": 1.9859387353276632e-05, + "loss": 0.7009, + "step": 4737 + }, + { + "epoch": 0.0818703345313796, + "grad_norm": 0.9980269794103572, + "learning_rate": 1.9859293816380298e-05, + "loss": 0.7976, + "step": 4738 + }, + { + "epoch": 0.0818876140447885, + "grad_norm": 1.1296467570604152, + "learning_rate": 1.9859200248603904e-05, + "loss": 0.7697, + "step": 4739 + }, + { + "epoch": 0.0819048935581974, + "grad_norm": 1.0296761019860552, + "learning_rate": 1.9859106649947744e-05, + "loss": 0.7062, + "step": 4740 + }, + { + "epoch": 0.0819221730716063, + "grad_norm": 0.8233207988637667, + "learning_rate": 1.9859013020412115e-05, + "loss": 0.7268, + "step": 4741 + }, + { + "epoch": 0.08193945258501521, + "grad_norm": 0.5592183727982343, + "learning_rate": 1.985891935999731e-05, + "loss": 0.5134, + "step": 4742 + }, + { + "epoch": 0.0819567320984241, + "grad_norm": 1.170869863083553, + "learning_rate": 1.9858825668703617e-05, + "loss": 0.8505, + "step": 4743 + }, + { + "epoch": 0.08197401161183301, + "grad_norm": 0.8178045639878911, + "learning_rate": 1.985873194653133e-05, + "loss": 0.6544, + "step": 4744 + }, + { + "epoch": 0.08199129112524191, + "grad_norm": 0.9376967564281198, + "learning_rate": 1.985863819348075e-05, + "loss": 0.5715, + "step": 4745 + }, + { + "epoch": 0.08200857063865082, + "grad_norm": 0.9613376020034718, + "learning_rate": 1.9858544409552163e-05, + "loss": 0.7592, + "step": 4746 + }, + { + "epoch": 0.08202585015205972, + "grad_norm": 1.1486443442028755, + "learning_rate": 1.9858450594745867e-05, + "loss": 0.6156, + "step": 4747 + }, + { + "epoch": 0.08204312966546862, + "grad_norm": 1.297871084966437, + "learning_rate": 1.9858356749062158e-05, + "loss": 0.7543, + "step": 4748 + }, + { + "epoch": 0.08206040917887752, + "grad_norm": 1.2619370348809698, + "learning_rate": 1.9858262872501322e-05, + "loss": 0.9022, + "step": 4749 + }, + { + "epoch": 0.08207768869228642, + "grad_norm": 0.807757186426206, + "learning_rate": 1.985816896506366e-05, + "loss": 0.5652, + "step": 4750 + }, + { + "epoch": 0.08209496820569533, + "grad_norm": 0.949008458251145, + "learning_rate": 1.985807502674946e-05, + "loss": 0.751, + "step": 4751 + }, + { + "epoch": 0.08211224771910423, + "grad_norm": 1.4703053885370647, + "learning_rate": 1.9857981057559023e-05, + "loss": 0.8314, + "step": 4752 + }, + { + "epoch": 0.08212952723251313, + "grad_norm": 0.9672615088999201, + "learning_rate": 1.985788705749264e-05, + "loss": 0.6404, + "step": 4753 + }, + { + "epoch": 0.08214680674592204, + "grad_norm": 0.9034083999822003, + "learning_rate": 1.9857793026550603e-05, + "loss": 0.649, + "step": 4754 + }, + { + "epoch": 0.08216408625933094, + "grad_norm": 0.8252285520938241, + "learning_rate": 1.985769896473321e-05, + "loss": 0.6668, + "step": 4755 + }, + { + "epoch": 0.08218136577273984, + "grad_norm": 1.344889245711748, + "learning_rate": 1.9857604872040757e-05, + "loss": 0.9001, + "step": 4756 + }, + { + "epoch": 0.08219864528614874, + "grad_norm": 1.0768213631337324, + "learning_rate": 1.9857510748473533e-05, + "loss": 0.7854, + "step": 4757 + }, + { + "epoch": 0.08221592479955764, + "grad_norm": 1.378417422838557, + "learning_rate": 1.985741659403184e-05, + "loss": 1.0364, + "step": 4758 + }, + { + "epoch": 0.08223320431296655, + "grad_norm": 0.9170628211420949, + "learning_rate": 1.9857322408715966e-05, + "loss": 0.757, + "step": 4759 + }, + { + "epoch": 0.08225048382637545, + "grad_norm": 0.6888158437924495, + "learning_rate": 1.985722819252621e-05, + "loss": 0.4706, + "step": 4760 + }, + { + "epoch": 0.08226776333978435, + "grad_norm": 0.5724893195400562, + "learning_rate": 1.985713394546287e-05, + "loss": 0.7102, + "step": 4761 + }, + { + "epoch": 0.08228504285319325, + "grad_norm": 0.8045707311323219, + "learning_rate": 1.9857039667526232e-05, + "loss": 0.4688, + "step": 4762 + }, + { + "epoch": 0.08230232236660216, + "grad_norm": 1.3541693006615036, + "learning_rate": 1.9856945358716596e-05, + "loss": 1.0285, + "step": 4763 + }, + { + "epoch": 0.08231960188001106, + "grad_norm": 1.2256452153511503, + "learning_rate": 1.9856851019034262e-05, + "loss": 0.58, + "step": 4764 + }, + { + "epoch": 0.08233688139341996, + "grad_norm": 1.0422907017624032, + "learning_rate": 1.985675664847952e-05, + "loss": 0.8194, + "step": 4765 + }, + { + "epoch": 0.08235416090682886, + "grad_norm": 0.6064607245150043, + "learning_rate": 1.985666224705267e-05, + "loss": 0.5716, + "step": 4766 + }, + { + "epoch": 0.08237144042023777, + "grad_norm": 0.8976123146644717, + "learning_rate": 1.9856567814754e-05, + "loss": 0.6194, + "step": 4767 + }, + { + "epoch": 0.08238871993364667, + "grad_norm": 1.1168387831603976, + "learning_rate": 1.985647335158381e-05, + "loss": 0.7986, + "step": 4768 + }, + { + "epoch": 0.08240599944705557, + "grad_norm": 1.37873828748784, + "learning_rate": 1.98563788575424e-05, + "loss": 0.7771, + "step": 4769 + }, + { + "epoch": 0.08242327896046447, + "grad_norm": 1.067030136453317, + "learning_rate": 1.9856284332630063e-05, + "loss": 0.7288, + "step": 4770 + }, + { + "epoch": 0.08244055847387338, + "grad_norm": 0.5929855639585762, + "learning_rate": 1.985618977684709e-05, + "loss": 0.532, + "step": 4771 + }, + { + "epoch": 0.08245783798728228, + "grad_norm": 0.8871141208077125, + "learning_rate": 1.9856095190193785e-05, + "loss": 0.6419, + "step": 4772 + }, + { + "epoch": 0.08247511750069118, + "grad_norm": 0.9143546877326154, + "learning_rate": 1.985600057267044e-05, + "loss": 0.8432, + "step": 4773 + }, + { + "epoch": 0.08249239701410008, + "grad_norm": 1.0348358876519708, + "learning_rate": 1.9855905924277352e-05, + "loss": 0.8811, + "step": 4774 + }, + { + "epoch": 0.08250967652750898, + "grad_norm": 1.3226218454257521, + "learning_rate": 1.985581124501482e-05, + "loss": 0.8073, + "step": 4775 + }, + { + "epoch": 0.08252695604091789, + "grad_norm": 1.2902319356252907, + "learning_rate": 1.9855716534883136e-05, + "loss": 1.0177, + "step": 4776 + }, + { + "epoch": 0.08254423555432679, + "grad_norm": 1.054428992159694, + "learning_rate": 1.9855621793882596e-05, + "loss": 0.6603, + "step": 4777 + }, + { + "epoch": 0.0825615150677357, + "grad_norm": 0.891406670752997, + "learning_rate": 1.9855527022013502e-05, + "loss": 0.6465, + "step": 4778 + }, + { + "epoch": 0.0825787945811446, + "grad_norm": 1.0255094521960033, + "learning_rate": 1.985543221927615e-05, + "loss": 0.7037, + "step": 4779 + }, + { + "epoch": 0.0825960740945535, + "grad_norm": 0.6373205946758562, + "learning_rate": 1.9855337385670836e-05, + "loss": 0.5357, + "step": 4780 + }, + { + "epoch": 0.0826133536079624, + "grad_norm": 1.282134916761934, + "learning_rate": 1.9855242521197852e-05, + "loss": 0.9122, + "step": 4781 + }, + { + "epoch": 0.0826306331213713, + "grad_norm": 1.2815489493618595, + "learning_rate": 1.9855147625857504e-05, + "loss": 0.9696, + "step": 4782 + }, + { + "epoch": 0.0826479126347802, + "grad_norm": 1.1992109458052695, + "learning_rate": 1.9855052699650083e-05, + "loss": 0.6632, + "step": 4783 + }, + { + "epoch": 0.08266519214818911, + "grad_norm": 0.8874055788692886, + "learning_rate": 1.9854957742575887e-05, + "loss": 0.7053, + "step": 4784 + }, + { + "epoch": 0.082682471661598, + "grad_norm": 1.1404190466211526, + "learning_rate": 1.9854862754635213e-05, + "loss": 0.792, + "step": 4785 + }, + { + "epoch": 0.08269975117500691, + "grad_norm": 0.934541704528427, + "learning_rate": 1.985476773582836e-05, + "loss": 0.7546, + "step": 4786 + }, + { + "epoch": 0.08271703068841581, + "grad_norm": 0.7999280967251026, + "learning_rate": 1.985467268615563e-05, + "loss": 0.527, + "step": 4787 + }, + { + "epoch": 0.08273431020182472, + "grad_norm": 0.9074944225443945, + "learning_rate": 1.9854577605617314e-05, + "loss": 0.7792, + "step": 4788 + }, + { + "epoch": 0.08275158971523362, + "grad_norm": 0.9910654775170334, + "learning_rate": 1.985448249421371e-05, + "loss": 0.8121, + "step": 4789 + }, + { + "epoch": 0.08276886922864252, + "grad_norm": 0.8739540447752223, + "learning_rate": 1.985438735194512e-05, + "loss": 0.5726, + "step": 4790 + }, + { + "epoch": 0.08278614874205142, + "grad_norm": 0.931457061457225, + "learning_rate": 1.985429217881184e-05, + "loss": 0.7096, + "step": 4791 + }, + { + "epoch": 0.08280342825546033, + "grad_norm": 1.0435500838343255, + "learning_rate": 1.9854196974814165e-05, + "loss": 0.5423, + "step": 4792 + }, + { + "epoch": 0.08282070776886923, + "grad_norm": 0.7793104679289589, + "learning_rate": 1.98541017399524e-05, + "loss": 0.5354, + "step": 4793 + }, + { + "epoch": 0.08283798728227813, + "grad_norm": 1.314289915396796, + "learning_rate": 1.985400647422684e-05, + "loss": 0.9591, + "step": 4794 + }, + { + "epoch": 0.08285526679568703, + "grad_norm": 1.2830084195200362, + "learning_rate": 1.9853911177637778e-05, + "loss": 0.6984, + "step": 4795 + }, + { + "epoch": 0.08287254630909593, + "grad_norm": 0.7272671955347451, + "learning_rate": 1.985381585018552e-05, + "loss": 0.6233, + "step": 4796 + }, + { + "epoch": 0.08288982582250484, + "grad_norm": 1.248070671193537, + "learning_rate": 1.9853720491870363e-05, + "loss": 0.8524, + "step": 4797 + }, + { + "epoch": 0.08290710533591374, + "grad_norm": 1.2339817210070112, + "learning_rate": 1.98536251026926e-05, + "loss": 0.6197, + "step": 4798 + }, + { + "epoch": 0.08292438484932264, + "grad_norm": 1.1362004792911742, + "learning_rate": 1.985352968265254e-05, + "loss": 0.6369, + "step": 4799 + }, + { + "epoch": 0.08294166436273155, + "grad_norm": 0.8814719185273311, + "learning_rate": 1.9853434231750476e-05, + "loss": 0.541, + "step": 4800 + }, + { + "epoch": 0.08295894387614045, + "grad_norm": 1.1435887434111944, + "learning_rate": 1.9853338749986704e-05, + "loss": 0.8018, + "step": 4801 + }, + { + "epoch": 0.08297622338954935, + "grad_norm": 1.0130266658390774, + "learning_rate": 1.9853243237361528e-05, + "loss": 0.5557, + "step": 4802 + }, + { + "epoch": 0.08299350290295825, + "grad_norm": 0.838035012899496, + "learning_rate": 1.9853147693875245e-05, + "loss": 0.6025, + "step": 4803 + }, + { + "epoch": 0.08301078241636715, + "grad_norm": 0.8259754452174499, + "learning_rate": 1.9853052119528153e-05, + "loss": 0.4844, + "step": 4804 + }, + { + "epoch": 0.08302806192977606, + "grad_norm": 1.2277730404215283, + "learning_rate": 1.9852956514320554e-05, + "loss": 0.8882, + "step": 4805 + }, + { + "epoch": 0.08304534144318496, + "grad_norm": 0.5596568636212026, + "learning_rate": 1.985286087825275e-05, + "loss": 0.4511, + "step": 4806 + }, + { + "epoch": 0.08306262095659386, + "grad_norm": 0.994273909618543, + "learning_rate": 1.9852765211325032e-05, + "loss": 0.6219, + "step": 4807 + }, + { + "epoch": 0.08307990047000277, + "grad_norm": 1.2867662675683216, + "learning_rate": 1.9852669513537708e-05, + "loss": 1.0022, + "step": 4808 + }, + { + "epoch": 0.08309717998341166, + "grad_norm": 0.9016864763630099, + "learning_rate": 1.9852573784891073e-05, + "loss": 0.7468, + "step": 4809 + }, + { + "epoch": 0.08311445949682057, + "grad_norm": 1.4312211070098073, + "learning_rate": 1.9852478025385428e-05, + "loss": 0.9326, + "step": 4810 + }, + { + "epoch": 0.08313173901022947, + "grad_norm": 0.9791663467368812, + "learning_rate": 1.985238223502107e-05, + "loss": 1.0078, + "step": 4811 + }, + { + "epoch": 0.08314901852363837, + "grad_norm": 0.9642978962696303, + "learning_rate": 1.9852286413798307e-05, + "loss": 0.9411, + "step": 4812 + }, + { + "epoch": 0.08316629803704728, + "grad_norm": 0.8885095923377434, + "learning_rate": 1.9852190561717433e-05, + "loss": 0.6039, + "step": 4813 + }, + { + "epoch": 0.08318357755045618, + "grad_norm": 0.7714323185730159, + "learning_rate": 1.9852094678778746e-05, + "loss": 0.7079, + "step": 4814 + }, + { + "epoch": 0.08320085706386508, + "grad_norm": 1.0088429679485107, + "learning_rate": 1.9851998764982553e-05, + "loss": 0.7764, + "step": 4815 + }, + { + "epoch": 0.08321813657727399, + "grad_norm": 1.2230336723867379, + "learning_rate": 1.985190282032915e-05, + "loss": 0.7685, + "step": 4816 + }, + { + "epoch": 0.08323541609068288, + "grad_norm": 1.1011040143106603, + "learning_rate": 1.985180684481884e-05, + "loss": 0.773, + "step": 4817 + }, + { + "epoch": 0.08325269560409179, + "grad_norm": 1.147475862376159, + "learning_rate": 1.985171083845192e-05, + "loss": 0.5374, + "step": 4818 + }, + { + "epoch": 0.08326997511750069, + "grad_norm": 1.148611381911366, + "learning_rate": 1.9851614801228695e-05, + "loss": 0.6622, + "step": 4819 + }, + { + "epoch": 0.0832872546309096, + "grad_norm": 0.9085011517776378, + "learning_rate": 1.9851518733149463e-05, + "loss": 0.8021, + "step": 4820 + }, + { + "epoch": 0.0833045341443185, + "grad_norm": 1.0649008983685, + "learning_rate": 1.9851422634214525e-05, + "loss": 0.8241, + "step": 4821 + }, + { + "epoch": 0.0833218136577274, + "grad_norm": 1.2793074258964463, + "learning_rate": 1.9851326504424185e-05, + "loss": 0.7814, + "step": 4822 + }, + { + "epoch": 0.0833390931711363, + "grad_norm": 1.5389605566999467, + "learning_rate": 1.985123034377874e-05, + "loss": 0.7114, + "step": 4823 + }, + { + "epoch": 0.0833563726845452, + "grad_norm": 1.0838804391338883, + "learning_rate": 1.9851134152278492e-05, + "loss": 0.8227, + "step": 4824 + }, + { + "epoch": 0.0833736521979541, + "grad_norm": 1.2224127302275523, + "learning_rate": 1.9851037929923745e-05, + "loss": 0.7794, + "step": 4825 + }, + { + "epoch": 0.08339093171136301, + "grad_norm": 0.7916471565614445, + "learning_rate": 1.9850941676714797e-05, + "loss": 0.5359, + "step": 4826 + }, + { + "epoch": 0.0834082112247719, + "grad_norm": 0.737397182604464, + "learning_rate": 1.985084539265195e-05, + "loss": 0.5631, + "step": 4827 + }, + { + "epoch": 0.08342549073818081, + "grad_norm": 0.8551583572351605, + "learning_rate": 1.985074907773551e-05, + "loss": 0.5632, + "step": 4828 + }, + { + "epoch": 0.08344277025158972, + "grad_norm": 0.5709558196241007, + "learning_rate": 1.9850652731965773e-05, + "loss": 0.6585, + "step": 4829 + }, + { + "epoch": 0.08346004976499861, + "grad_norm": 1.2118631395837611, + "learning_rate": 1.9850556355343046e-05, + "loss": 0.9366, + "step": 4830 + }, + { + "epoch": 0.08347732927840752, + "grad_norm": 1.0340299007552376, + "learning_rate": 1.9850459947867626e-05, + "loss": 0.6942, + "step": 4831 + }, + { + "epoch": 0.08349460879181642, + "grad_norm": 1.0111883177294663, + "learning_rate": 1.9850363509539813e-05, + "loss": 0.7837, + "step": 4832 + }, + { + "epoch": 0.08351188830522532, + "grad_norm": 0.7757915670015312, + "learning_rate": 1.9850267040359916e-05, + "loss": 0.3898, + "step": 4833 + }, + { + "epoch": 0.08352916781863423, + "grad_norm": 0.9923571858760989, + "learning_rate": 1.9850170540328232e-05, + "loss": 0.8082, + "step": 4834 + }, + { + "epoch": 0.08354644733204313, + "grad_norm": 1.0596508194491645, + "learning_rate": 1.9850074009445072e-05, + "loss": 0.8299, + "step": 4835 + }, + { + "epoch": 0.08356372684545203, + "grad_norm": 0.9274686847154116, + "learning_rate": 1.9849977447710725e-05, + "loss": 0.5597, + "step": 4836 + }, + { + "epoch": 0.08358100635886094, + "grad_norm": 0.7957938258912215, + "learning_rate": 1.98498808551255e-05, + "loss": 0.5678, + "step": 4837 + }, + { + "epoch": 0.08359828587226983, + "grad_norm": 1.0625373478524973, + "learning_rate": 1.9849784231689705e-05, + "loss": 0.6641, + "step": 4838 + }, + { + "epoch": 0.08361556538567874, + "grad_norm": 0.9177514953328816, + "learning_rate": 1.9849687577403632e-05, + "loss": 0.7123, + "step": 4839 + }, + { + "epoch": 0.08363284489908764, + "grad_norm": 1.1464771173156543, + "learning_rate": 1.9849590892267593e-05, + "loss": 0.9149, + "step": 4840 + }, + { + "epoch": 0.08365012441249654, + "grad_norm": 1.273320284466564, + "learning_rate": 1.9849494176281885e-05, + "loss": 0.7347, + "step": 4841 + }, + { + "epoch": 0.08366740392590545, + "grad_norm": 2.428356118810583, + "learning_rate": 1.984939742944681e-05, + "loss": 0.666, + "step": 4842 + }, + { + "epoch": 0.08368468343931434, + "grad_norm": 0.9783409581182735, + "learning_rate": 1.9849300651762678e-05, + "loss": 0.5568, + "step": 4843 + }, + { + "epoch": 0.08370196295272325, + "grad_norm": 1.0106631411418958, + "learning_rate": 1.9849203843229784e-05, + "loss": 0.9312, + "step": 4844 + }, + { + "epoch": 0.08371924246613216, + "grad_norm": 0.8188666787011335, + "learning_rate": 1.984910700384844e-05, + "loss": 0.6144, + "step": 4845 + }, + { + "epoch": 0.08373652197954105, + "grad_norm": 1.1651535295991453, + "learning_rate": 1.984901013361894e-05, + "loss": 0.8508, + "step": 4846 + }, + { + "epoch": 0.08375380149294996, + "grad_norm": 1.1819746355525746, + "learning_rate": 1.9848913232541594e-05, + "loss": 0.8008, + "step": 4847 + }, + { + "epoch": 0.08377108100635886, + "grad_norm": 0.6470028946738423, + "learning_rate": 1.98488163006167e-05, + "loss": 0.8763, + "step": 4848 + }, + { + "epoch": 0.08378836051976776, + "grad_norm": 0.9856083469255633, + "learning_rate": 1.984871933784457e-05, + "loss": 0.6897, + "step": 4849 + }, + { + "epoch": 0.08380564003317667, + "grad_norm": 1.3465984917502696, + "learning_rate": 1.9848622344225498e-05, + "loss": 0.6422, + "step": 4850 + }, + { + "epoch": 0.08382291954658556, + "grad_norm": 0.6786674722089192, + "learning_rate": 1.9848525319759796e-05, + "loss": 0.8653, + "step": 4851 + }, + { + "epoch": 0.08384019905999447, + "grad_norm": 0.7947958843955414, + "learning_rate": 1.984842826444776e-05, + "loss": 0.7847, + "step": 4852 + }, + { + "epoch": 0.08385747857340338, + "grad_norm": 0.9301022673249767, + "learning_rate": 1.98483311782897e-05, + "loss": 0.7982, + "step": 4853 + }, + { + "epoch": 0.08387475808681227, + "grad_norm": 1.1536851924450378, + "learning_rate": 1.984823406128592e-05, + "loss": 0.833, + "step": 4854 + }, + { + "epoch": 0.08389203760022118, + "grad_norm": 0.9115503730578399, + "learning_rate": 1.984813691343672e-05, + "loss": 0.6761, + "step": 4855 + }, + { + "epoch": 0.08390931711363007, + "grad_norm": 1.4059075029289376, + "learning_rate": 1.984803973474241e-05, + "loss": 0.7247, + "step": 4856 + }, + { + "epoch": 0.08392659662703898, + "grad_norm": 0.9502322397226771, + "learning_rate": 1.984794252520329e-05, + "loss": 0.6607, + "step": 4857 + }, + { + "epoch": 0.08394387614044789, + "grad_norm": 0.6301744532416623, + "learning_rate": 1.9847845284819663e-05, + "loss": 0.7909, + "step": 4858 + }, + { + "epoch": 0.08396115565385678, + "grad_norm": 1.1287303313955808, + "learning_rate": 1.9847748013591837e-05, + "loss": 0.9233, + "step": 4859 + }, + { + "epoch": 0.08397843516726569, + "grad_norm": 1.2200869033550228, + "learning_rate": 1.984765071152012e-05, + "loss": 0.8707, + "step": 4860 + }, + { + "epoch": 0.0839957146806746, + "grad_norm": 0.5597054319971776, + "learning_rate": 1.984755337860481e-05, + "loss": 0.624, + "step": 4861 + }, + { + "epoch": 0.08401299419408349, + "grad_norm": 1.0569877999996382, + "learning_rate": 1.9847456014846213e-05, + "loss": 0.9596, + "step": 4862 + }, + { + "epoch": 0.0840302737074924, + "grad_norm": 0.81611809695676, + "learning_rate": 1.9847358620244637e-05, + "loss": 0.6492, + "step": 4863 + }, + { + "epoch": 0.0840475532209013, + "grad_norm": 0.8256082831706567, + "learning_rate": 1.9847261194800382e-05, + "loss": 0.5819, + "step": 4864 + }, + { + "epoch": 0.0840648327343102, + "grad_norm": 1.2513385144427724, + "learning_rate": 1.984716373851376e-05, + "loss": 0.6914, + "step": 4865 + }, + { + "epoch": 0.08408211224771911, + "grad_norm": 0.978837293198348, + "learning_rate": 1.9847066251385073e-05, + "loss": 0.6184, + "step": 4866 + }, + { + "epoch": 0.084099391761128, + "grad_norm": 0.9042605116036372, + "learning_rate": 1.9846968733414625e-05, + "loss": 0.7014, + "step": 4867 + }, + { + "epoch": 0.08411667127453691, + "grad_norm": 0.9656631825487443, + "learning_rate": 1.9846871184602722e-05, + "loss": 0.7768, + "step": 4868 + }, + { + "epoch": 0.0841339507879458, + "grad_norm": 0.6049963903835476, + "learning_rate": 1.984677360494967e-05, + "loss": 0.434, + "step": 4869 + }, + { + "epoch": 0.08415123030135471, + "grad_norm": 0.994744884926927, + "learning_rate": 1.9846675994455778e-05, + "loss": 0.7844, + "step": 4870 + }, + { + "epoch": 0.08416850981476362, + "grad_norm": 0.8421710745068453, + "learning_rate": 1.9846578353121345e-05, + "loss": 0.7661, + "step": 4871 + }, + { + "epoch": 0.08418578932817251, + "grad_norm": 1.0128899197783179, + "learning_rate": 1.9846480680946682e-05, + "loss": 0.7837, + "step": 4872 + }, + { + "epoch": 0.08420306884158142, + "grad_norm": 0.9116827737598692, + "learning_rate": 1.9846382977932092e-05, + "loss": 0.7455, + "step": 4873 + }, + { + "epoch": 0.08422034835499033, + "grad_norm": 0.9922252745515677, + "learning_rate": 1.9846285244077882e-05, + "loss": 0.6042, + "step": 4874 + }, + { + "epoch": 0.08423762786839922, + "grad_norm": 1.3672897883089379, + "learning_rate": 1.984618747938436e-05, + "loss": 0.9114, + "step": 4875 + }, + { + "epoch": 0.08425490738180813, + "grad_norm": 0.9936162049625249, + "learning_rate": 1.984608968385183e-05, + "loss": 0.7625, + "step": 4876 + }, + { + "epoch": 0.08427218689521702, + "grad_norm": 1.3243007864032736, + "learning_rate": 1.98459918574806e-05, + "loss": 0.7969, + "step": 4877 + }, + { + "epoch": 0.08428946640862593, + "grad_norm": 0.951773208153204, + "learning_rate": 1.9845894000270972e-05, + "loss": 0.7121, + "step": 4878 + }, + { + "epoch": 0.08430674592203484, + "grad_norm": 0.7410168826855188, + "learning_rate": 1.9845796112223258e-05, + "loss": 0.7306, + "step": 4879 + }, + { + "epoch": 0.08432402543544373, + "grad_norm": 1.1906294040515657, + "learning_rate": 1.984569819333776e-05, + "loss": 0.6092, + "step": 4880 + }, + { + "epoch": 0.08434130494885264, + "grad_norm": 1.0665311440092793, + "learning_rate": 1.9845600243614792e-05, + "loss": 0.9182, + "step": 4881 + }, + { + "epoch": 0.08435858446226155, + "grad_norm": 0.9250766596535215, + "learning_rate": 1.984550226305465e-05, + "loss": 0.8294, + "step": 4882 + }, + { + "epoch": 0.08437586397567044, + "grad_norm": 0.6174727117233133, + "learning_rate": 1.9845404251657648e-05, + "loss": 1.0146, + "step": 4883 + }, + { + "epoch": 0.08439314348907935, + "grad_norm": 1.0914375014696962, + "learning_rate": 1.9845306209424096e-05, + "loss": 0.9129, + "step": 4884 + }, + { + "epoch": 0.08441042300248824, + "grad_norm": 0.9200954139453347, + "learning_rate": 1.9845208136354293e-05, + "loss": 0.693, + "step": 4885 + }, + { + "epoch": 0.08442770251589715, + "grad_norm": 1.0125387762533076, + "learning_rate": 1.984511003244855e-05, + "loss": 0.7211, + "step": 4886 + }, + { + "epoch": 0.08444498202930606, + "grad_norm": 0.8919399179093652, + "learning_rate": 1.9845011897707175e-05, + "loss": 0.6548, + "step": 4887 + }, + { + "epoch": 0.08446226154271495, + "grad_norm": 1.012035559102411, + "learning_rate": 1.9844913732130474e-05, + "loss": 0.7074, + "step": 4888 + }, + { + "epoch": 0.08447954105612386, + "grad_norm": 0.999611820145154, + "learning_rate": 1.984481553571876e-05, + "loss": 0.6704, + "step": 4889 + }, + { + "epoch": 0.08449682056953277, + "grad_norm": 1.0967651867575006, + "learning_rate": 1.9844717308472326e-05, + "loss": 0.5963, + "step": 4890 + }, + { + "epoch": 0.08451410008294166, + "grad_norm": 1.3011012428918982, + "learning_rate": 1.9844619050391496e-05, + "loss": 0.6513, + "step": 4891 + }, + { + "epoch": 0.08453137959635057, + "grad_norm": 1.0526918648502261, + "learning_rate": 1.984452076147657e-05, + "loss": 0.7804, + "step": 4892 + }, + { + "epoch": 0.08454865910975946, + "grad_norm": 0.7149392572131608, + "learning_rate": 1.9844422441727854e-05, + "loss": 0.6059, + "step": 4893 + }, + { + "epoch": 0.08456593862316837, + "grad_norm": 0.4349598437120831, + "learning_rate": 1.9844324091145663e-05, + "loss": 0.5195, + "step": 4894 + }, + { + "epoch": 0.08458321813657728, + "grad_norm": 1.2807579693393165, + "learning_rate": 1.98442257097303e-05, + "loss": 1.1187, + "step": 4895 + }, + { + "epoch": 0.08460049764998617, + "grad_norm": 0.7596357021980765, + "learning_rate": 1.984412729748207e-05, + "loss": 0.6174, + "step": 4896 + }, + { + "epoch": 0.08461777716339508, + "grad_norm": 1.1159361886362895, + "learning_rate": 1.984402885440129e-05, + "loss": 0.823, + "step": 4897 + }, + { + "epoch": 0.08463505667680399, + "grad_norm": 0.9635852407556531, + "learning_rate": 1.9843930380488257e-05, + "loss": 0.8298, + "step": 4898 + }, + { + "epoch": 0.08465233619021288, + "grad_norm": 1.2205777521061663, + "learning_rate": 1.984383187574329e-05, + "loss": 0.8109, + "step": 4899 + }, + { + "epoch": 0.08466961570362179, + "grad_norm": 0.9630719089993155, + "learning_rate": 1.984373334016669e-05, + "loss": 0.7482, + "step": 4900 + }, + { + "epoch": 0.08468689521703068, + "grad_norm": 1.0818786885992142, + "learning_rate": 1.9843634773758777e-05, + "loss": 0.9507, + "step": 4901 + }, + { + "epoch": 0.08470417473043959, + "grad_norm": 0.9445264898736785, + "learning_rate": 1.984353617651985e-05, + "loss": 0.847, + "step": 4902 + }, + { + "epoch": 0.0847214542438485, + "grad_norm": 1.027450786526861, + "learning_rate": 1.9843437548450217e-05, + "loss": 0.7039, + "step": 4903 + }, + { + "epoch": 0.08473873375725739, + "grad_norm": 0.9981683949408567, + "learning_rate": 1.9843338889550187e-05, + "loss": 0.838, + "step": 4904 + }, + { + "epoch": 0.0847560132706663, + "grad_norm": 0.5169303183073642, + "learning_rate": 1.9843240199820073e-05, + "loss": 0.5356, + "step": 4905 + }, + { + "epoch": 0.0847732927840752, + "grad_norm": 0.8775271161808137, + "learning_rate": 1.9843141479260185e-05, + "loss": 0.7851, + "step": 4906 + }, + { + "epoch": 0.0847905722974841, + "grad_norm": 0.9016608408260934, + "learning_rate": 1.984304272787083e-05, + "loss": 0.8278, + "step": 4907 + }, + { + "epoch": 0.08480785181089301, + "grad_norm": 1.125435750917815, + "learning_rate": 1.9842943945652316e-05, + "loss": 0.6981, + "step": 4908 + }, + { + "epoch": 0.0848251313243019, + "grad_norm": 0.9332753986911314, + "learning_rate": 1.984284513260495e-05, + "loss": 0.7085, + "step": 4909 + }, + { + "epoch": 0.08484241083771081, + "grad_norm": 0.8278999808402645, + "learning_rate": 1.984274628872905e-05, + "loss": 0.8855, + "step": 4910 + }, + { + "epoch": 0.08485969035111972, + "grad_norm": 0.8524487295321704, + "learning_rate": 1.9842647414024918e-05, + "loss": 0.7953, + "step": 4911 + }, + { + "epoch": 0.08487696986452861, + "grad_norm": 0.8320578240454043, + "learning_rate": 1.984254850849287e-05, + "loss": 0.616, + "step": 4912 + }, + { + "epoch": 0.08489424937793752, + "grad_norm": 1.0448486165672206, + "learning_rate": 1.9842449572133208e-05, + "loss": 0.8431, + "step": 4913 + }, + { + "epoch": 0.08491152889134641, + "grad_norm": 1.0179764286866968, + "learning_rate": 1.9842350604946247e-05, + "loss": 0.8604, + "step": 4914 + }, + { + "epoch": 0.08492880840475532, + "grad_norm": 1.1297588119365851, + "learning_rate": 1.9842251606932294e-05, + "loss": 0.8062, + "step": 4915 + }, + { + "epoch": 0.08494608791816423, + "grad_norm": 0.8975218738028704, + "learning_rate": 1.9842152578091665e-05, + "loss": 0.6341, + "step": 4916 + }, + { + "epoch": 0.08496336743157312, + "grad_norm": 0.7638189314573346, + "learning_rate": 1.984205351842466e-05, + "loss": 0.6613, + "step": 4917 + }, + { + "epoch": 0.08498064694498203, + "grad_norm": 1.0448260421801725, + "learning_rate": 1.9841954427931604e-05, + "loss": 0.6929, + "step": 4918 + }, + { + "epoch": 0.08499792645839094, + "grad_norm": 1.0092124951540555, + "learning_rate": 1.9841855306612792e-05, + "loss": 0.6121, + "step": 4919 + }, + { + "epoch": 0.08501520597179983, + "grad_norm": 1.0526913335551307, + "learning_rate": 1.9841756154468547e-05, + "loss": 0.8383, + "step": 4920 + }, + { + "epoch": 0.08503248548520874, + "grad_norm": 0.7044016118082563, + "learning_rate": 1.9841656971499167e-05, + "loss": 0.5757, + "step": 4921 + }, + { + "epoch": 0.08504976499861763, + "grad_norm": 1.128612582496106, + "learning_rate": 1.9841557757704974e-05, + "loss": 0.7449, + "step": 4922 + }, + { + "epoch": 0.08506704451202654, + "grad_norm": 1.0711187188337252, + "learning_rate": 1.9841458513086272e-05, + "loss": 0.6116, + "step": 4923 + }, + { + "epoch": 0.08508432402543545, + "grad_norm": 0.6471757889513382, + "learning_rate": 1.9841359237643375e-05, + "loss": 0.5329, + "step": 4924 + }, + { + "epoch": 0.08510160353884434, + "grad_norm": 0.7571097988367396, + "learning_rate": 1.9841259931376592e-05, + "loss": 0.9866, + "step": 4925 + }, + { + "epoch": 0.08511888305225325, + "grad_norm": 1.0756170077336877, + "learning_rate": 1.9841160594286237e-05, + "loss": 0.7932, + "step": 4926 + }, + { + "epoch": 0.08513616256566216, + "grad_norm": 1.08902001043368, + "learning_rate": 1.9841061226372617e-05, + "loss": 0.5254, + "step": 4927 + }, + { + "epoch": 0.08515344207907105, + "grad_norm": 1.2443810921081278, + "learning_rate": 1.9840961827636047e-05, + "loss": 0.6968, + "step": 4928 + }, + { + "epoch": 0.08517072159247996, + "grad_norm": 0.7321883644889944, + "learning_rate": 1.9840862398076836e-05, + "loss": 0.6031, + "step": 4929 + }, + { + "epoch": 0.08518800110588885, + "grad_norm": 0.8953419120160748, + "learning_rate": 1.9840762937695296e-05, + "loss": 0.5786, + "step": 4930 + }, + { + "epoch": 0.08520528061929776, + "grad_norm": 1.2889565711470954, + "learning_rate": 1.9840663446491738e-05, + "loss": 0.5053, + "step": 4931 + }, + { + "epoch": 0.08522256013270667, + "grad_norm": 0.9714371381552548, + "learning_rate": 1.9840563924466477e-05, + "loss": 0.6018, + "step": 4932 + }, + { + "epoch": 0.08523983964611556, + "grad_norm": 1.0383445720205582, + "learning_rate": 1.984046437161982e-05, + "loss": 0.7259, + "step": 4933 + }, + { + "epoch": 0.08525711915952447, + "grad_norm": 1.0070696405122874, + "learning_rate": 1.984036478795208e-05, + "loss": 0.8305, + "step": 4934 + }, + { + "epoch": 0.08527439867293338, + "grad_norm": 0.9582570910970474, + "learning_rate": 1.984026517346357e-05, + "loss": 0.87, + "step": 4935 + }, + { + "epoch": 0.08529167818634227, + "grad_norm": 0.7989985963698291, + "learning_rate": 1.9840165528154602e-05, + "loss": 0.7194, + "step": 4936 + }, + { + "epoch": 0.08530895769975118, + "grad_norm": 0.9424406664291864, + "learning_rate": 1.9840065852025486e-05, + "loss": 0.6177, + "step": 4937 + }, + { + "epoch": 0.08532623721316007, + "grad_norm": 0.8649130916605076, + "learning_rate": 1.983996614507654e-05, + "loss": 0.906, + "step": 4938 + }, + { + "epoch": 0.08534351672656898, + "grad_norm": 1.1599536627904097, + "learning_rate": 1.983986640730807e-05, + "loss": 0.953, + "step": 4939 + }, + { + "epoch": 0.08536079623997789, + "grad_norm": 1.1202870320318492, + "learning_rate": 1.983976663872039e-05, + "loss": 0.8189, + "step": 4940 + }, + { + "epoch": 0.08537807575338678, + "grad_norm": 1.2811546201457935, + "learning_rate": 1.983966683931381e-05, + "loss": 0.7996, + "step": 4941 + }, + { + "epoch": 0.08539535526679569, + "grad_norm": 0.7479641543724993, + "learning_rate": 1.9839567009088652e-05, + "loss": 0.7747, + "step": 4942 + }, + { + "epoch": 0.08541263478020458, + "grad_norm": 0.8345665366420916, + "learning_rate": 1.983946714804522e-05, + "loss": 0.4582, + "step": 4943 + }, + { + "epoch": 0.08542991429361349, + "grad_norm": 1.039348753938845, + "learning_rate": 1.983936725618383e-05, + "loss": 0.5559, + "step": 4944 + }, + { + "epoch": 0.0854471938070224, + "grad_norm": 1.164010146259243, + "learning_rate": 1.983926733350479e-05, + "loss": 0.6819, + "step": 4945 + }, + { + "epoch": 0.08546447332043129, + "grad_norm": 1.393798648541995, + "learning_rate": 1.983916738000842e-05, + "loss": 1.052, + "step": 4946 + }, + { + "epoch": 0.0854817528338402, + "grad_norm": 1.1199489403664746, + "learning_rate": 1.983906739569503e-05, + "loss": 0.8462, + "step": 4947 + }, + { + "epoch": 0.08549903234724911, + "grad_norm": 0.7934694077702692, + "learning_rate": 1.9838967380564935e-05, + "loss": 0.4838, + "step": 4948 + }, + { + "epoch": 0.085516311860658, + "grad_norm": 1.1192591362425026, + "learning_rate": 1.9838867334618442e-05, + "loss": 0.8002, + "step": 4949 + }, + { + "epoch": 0.08553359137406691, + "grad_norm": 0.5148485693238897, + "learning_rate": 1.983876725785587e-05, + "loss": 0.6263, + "step": 4950 + }, + { + "epoch": 0.0855508708874758, + "grad_norm": 1.08492962887481, + "learning_rate": 1.9838667150277533e-05, + "loss": 0.5461, + "step": 4951 + }, + { + "epoch": 0.08556815040088471, + "grad_norm": 1.052270499897388, + "learning_rate": 1.9838567011883743e-05, + "loss": 0.4918, + "step": 4952 + }, + { + "epoch": 0.08558542991429362, + "grad_norm": 1.77638797088355, + "learning_rate": 1.9838466842674814e-05, + "loss": 0.823, + "step": 4953 + }, + { + "epoch": 0.08560270942770251, + "grad_norm": 1.0088271225475511, + "learning_rate": 1.9838366642651056e-05, + "loss": 0.7056, + "step": 4954 + }, + { + "epoch": 0.08561998894111142, + "grad_norm": 0.6218970457555517, + "learning_rate": 1.9838266411812785e-05, + "loss": 0.6224, + "step": 4955 + }, + { + "epoch": 0.08563726845452033, + "grad_norm": 1.3918264113255143, + "learning_rate": 1.983816615016032e-05, + "loss": 0.7871, + "step": 4956 + }, + { + "epoch": 0.08565454796792922, + "grad_norm": 1.6164486518707963, + "learning_rate": 1.983806585769397e-05, + "loss": 1.0406, + "step": 4957 + }, + { + "epoch": 0.08567182748133813, + "grad_norm": 2.0951416137117325, + "learning_rate": 1.983796553441405e-05, + "loss": 0.8544, + "step": 4958 + }, + { + "epoch": 0.08568910699474702, + "grad_norm": 1.2494724683968346, + "learning_rate": 1.9837865180320873e-05, + "loss": 0.9079, + "step": 4959 + }, + { + "epoch": 0.08570638650815593, + "grad_norm": 0.9914490036330995, + "learning_rate": 1.9837764795414756e-05, + "loss": 0.7766, + "step": 4960 + }, + { + "epoch": 0.08572366602156484, + "grad_norm": 1.0342214841107642, + "learning_rate": 1.983766437969601e-05, + "loss": 0.7727, + "step": 4961 + }, + { + "epoch": 0.08574094553497373, + "grad_norm": 1.0898216968932815, + "learning_rate": 1.9837563933164953e-05, + "loss": 0.9483, + "step": 4962 + }, + { + "epoch": 0.08575822504838264, + "grad_norm": 0.9717455833023758, + "learning_rate": 1.9837463455821897e-05, + "loss": 0.6319, + "step": 4963 + }, + { + "epoch": 0.08577550456179155, + "grad_norm": 0.6133244605809971, + "learning_rate": 1.983736294766716e-05, + "loss": 0.4275, + "step": 4964 + }, + { + "epoch": 0.08579278407520044, + "grad_norm": 0.7670736929518238, + "learning_rate": 1.983726240870105e-05, + "loss": 0.582, + "step": 4965 + }, + { + "epoch": 0.08581006358860935, + "grad_norm": 0.8409733353555168, + "learning_rate": 1.983716183892389e-05, + "loss": 0.7643, + "step": 4966 + }, + { + "epoch": 0.08582734310201824, + "grad_norm": 0.8177186966735744, + "learning_rate": 1.9837061238335996e-05, + "loss": 0.5659, + "step": 4967 + }, + { + "epoch": 0.08584462261542715, + "grad_norm": 0.902699100324888, + "learning_rate": 1.9836960606937674e-05, + "loss": 0.5224, + "step": 4968 + }, + { + "epoch": 0.08586190212883606, + "grad_norm": 1.0901552658021598, + "learning_rate": 1.9836859944729244e-05, + "loss": 0.6536, + "step": 4969 + }, + { + "epoch": 0.08587918164224495, + "grad_norm": 1.9671516870645596, + "learning_rate": 1.983675925171102e-05, + "loss": 1.0064, + "step": 4970 + }, + { + "epoch": 0.08589646115565386, + "grad_norm": 0.86375330910206, + "learning_rate": 1.9836658527883317e-05, + "loss": 0.7731, + "step": 4971 + }, + { + "epoch": 0.08591374066906277, + "grad_norm": 1.5315774268649094, + "learning_rate": 1.983655777324646e-05, + "loss": 0.944, + "step": 4972 + }, + { + "epoch": 0.08593102018247166, + "grad_norm": 1.3119286776651529, + "learning_rate": 1.9836456987800748e-05, + "loss": 1.0921, + "step": 4973 + }, + { + "epoch": 0.08594829969588057, + "grad_norm": 0.9311258561130367, + "learning_rate": 1.9836356171546508e-05, + "loss": 0.6598, + "step": 4974 + }, + { + "epoch": 0.08596557920928946, + "grad_norm": 0.7918065602638754, + "learning_rate": 1.9836255324484053e-05, + "loss": 0.627, + "step": 4975 + }, + { + "epoch": 0.08598285872269837, + "grad_norm": 0.7945787641797188, + "learning_rate": 1.98361544466137e-05, + "loss": 0.5185, + "step": 4976 + }, + { + "epoch": 0.08600013823610728, + "grad_norm": 1.1026116732256022, + "learning_rate": 1.983605353793576e-05, + "loss": 0.6256, + "step": 4977 + }, + { + "epoch": 0.08601741774951617, + "grad_norm": 0.9903553738673209, + "learning_rate": 1.9835952598450556e-05, + "loss": 0.6778, + "step": 4978 + }, + { + "epoch": 0.08603469726292508, + "grad_norm": 0.8387752468573819, + "learning_rate": 1.9835851628158404e-05, + "loss": 0.6392, + "step": 4979 + }, + { + "epoch": 0.08605197677633397, + "grad_norm": 1.0584548613827727, + "learning_rate": 1.983575062705961e-05, + "loss": 0.7018, + "step": 4980 + }, + { + "epoch": 0.08606925628974288, + "grad_norm": 1.0547709704196653, + "learning_rate": 1.98356495951545e-05, + "loss": 0.7241, + "step": 4981 + }, + { + "epoch": 0.08608653580315179, + "grad_norm": 1.048014208887192, + "learning_rate": 1.983554853244339e-05, + "loss": 0.9712, + "step": 4982 + }, + { + "epoch": 0.08610381531656068, + "grad_norm": 1.100278141728839, + "learning_rate": 1.9835447438926593e-05, + "loss": 0.8158, + "step": 4983 + }, + { + "epoch": 0.08612109482996959, + "grad_norm": 1.096696359479499, + "learning_rate": 1.983534631460443e-05, + "loss": 0.5205, + "step": 4984 + }, + { + "epoch": 0.0861383743433785, + "grad_norm": 0.8470328825117797, + "learning_rate": 1.983524515947721e-05, + "loss": 0.5799, + "step": 4985 + }, + { + "epoch": 0.08615565385678739, + "grad_norm": 1.0766210024147054, + "learning_rate": 1.983514397354526e-05, + "loss": 0.7666, + "step": 4986 + }, + { + "epoch": 0.0861729333701963, + "grad_norm": 0.7619434408193855, + "learning_rate": 1.983504275680889e-05, + "loss": 0.5565, + "step": 4987 + }, + { + "epoch": 0.08619021288360519, + "grad_norm": 0.9585376477620885, + "learning_rate": 1.9834941509268416e-05, + "loss": 0.9075, + "step": 4988 + }, + { + "epoch": 0.0862074923970141, + "grad_norm": 0.9163481270011372, + "learning_rate": 1.983484023092416e-05, + "loss": 0.6721, + "step": 4989 + }, + { + "epoch": 0.086224771910423, + "grad_norm": 1.1283017036647516, + "learning_rate": 1.9834738921776435e-05, + "loss": 0.6729, + "step": 4990 + }, + { + "epoch": 0.0862420514238319, + "grad_norm": 0.953197418881404, + "learning_rate": 1.9834637581825565e-05, + "loss": 0.6049, + "step": 4991 + }, + { + "epoch": 0.08625933093724081, + "grad_norm": 0.8711929346683355, + "learning_rate": 1.983453621107186e-05, + "loss": 0.7974, + "step": 4992 + }, + { + "epoch": 0.08627661045064972, + "grad_norm": 1.1646010787761438, + "learning_rate": 1.9834434809515636e-05, + "loss": 0.8138, + "step": 4993 + }, + { + "epoch": 0.08629388996405861, + "grad_norm": 0.9370843221870383, + "learning_rate": 1.983433337715722e-05, + "loss": 0.7106, + "step": 4994 + }, + { + "epoch": 0.08631116947746752, + "grad_norm": 0.9506173399443656, + "learning_rate": 1.983423191399692e-05, + "loss": 0.8379, + "step": 4995 + }, + { + "epoch": 0.08632844899087641, + "grad_norm": 0.7206054903482912, + "learning_rate": 1.9834130420035062e-05, + "loss": 0.6368, + "step": 4996 + }, + { + "epoch": 0.08634572850428532, + "grad_norm": 1.3601326260208728, + "learning_rate": 1.9834028895271958e-05, + "loss": 0.9061, + "step": 4997 + }, + { + "epoch": 0.08636300801769423, + "grad_norm": 0.921223177450464, + "learning_rate": 1.983392733970793e-05, + "loss": 0.7291, + "step": 4998 + }, + { + "epoch": 0.08638028753110312, + "grad_norm": 0.5633729171846235, + "learning_rate": 1.9833825753343295e-05, + "loss": 0.6857, + "step": 4999 + }, + { + "epoch": 0.08639756704451203, + "grad_norm": 0.7764289274303663, + "learning_rate": 1.983372413617837e-05, + "loss": 0.5891, + "step": 5000 + }, + { + "epoch": 0.08641484655792094, + "grad_norm": 0.5027813181520202, + "learning_rate": 1.9833622488213473e-05, + "loss": 0.604, + "step": 5001 + }, + { + "epoch": 0.08643212607132983, + "grad_norm": 0.9307058925377951, + "learning_rate": 1.9833520809448924e-05, + "loss": 0.5811, + "step": 5002 + }, + { + "epoch": 0.08644940558473874, + "grad_norm": 0.9891573236495651, + "learning_rate": 1.9833419099885036e-05, + "loss": 0.6626, + "step": 5003 + }, + { + "epoch": 0.08646668509814763, + "grad_norm": 0.6762499388954428, + "learning_rate": 1.9833317359522137e-05, + "loss": 0.7142, + "step": 5004 + }, + { + "epoch": 0.08648396461155654, + "grad_norm": 1.065959358477689, + "learning_rate": 1.9833215588360537e-05, + "loss": 0.8849, + "step": 5005 + }, + { + "epoch": 0.08650124412496545, + "grad_norm": 0.8699117506783632, + "learning_rate": 1.983311378640056e-05, + "loss": 0.6283, + "step": 5006 + }, + { + "epoch": 0.08651852363837434, + "grad_norm": 0.804301744563569, + "learning_rate": 1.9833011953642525e-05, + "loss": 0.6358, + "step": 5007 + }, + { + "epoch": 0.08653580315178325, + "grad_norm": 1.0954255823701258, + "learning_rate": 1.9832910090086747e-05, + "loss": 0.7116, + "step": 5008 + }, + { + "epoch": 0.08655308266519215, + "grad_norm": 1.1715235698648863, + "learning_rate": 1.9832808195733552e-05, + "loss": 0.8576, + "step": 5009 + }, + { + "epoch": 0.08657036217860105, + "grad_norm": 0.9464384936710639, + "learning_rate": 1.983270627058325e-05, + "loss": 0.6101, + "step": 5010 + }, + { + "epoch": 0.08658764169200996, + "grad_norm": 0.9570876609276747, + "learning_rate": 1.9832604314636166e-05, + "loss": 0.7509, + "step": 5011 + }, + { + "epoch": 0.08660492120541885, + "grad_norm": 1.0056830626963662, + "learning_rate": 1.9832502327892616e-05, + "loss": 0.659, + "step": 5012 + }, + { + "epoch": 0.08662220071882776, + "grad_norm": 0.7130569924339836, + "learning_rate": 1.9832400310352923e-05, + "loss": 0.6383, + "step": 5013 + }, + { + "epoch": 0.08663948023223667, + "grad_norm": 1.4343440920476305, + "learning_rate": 1.9832298262017405e-05, + "loss": 0.7155, + "step": 5014 + }, + { + "epoch": 0.08665675974564556, + "grad_norm": 1.2834784947956273, + "learning_rate": 1.9832196182886382e-05, + "loss": 0.6537, + "step": 5015 + }, + { + "epoch": 0.08667403925905447, + "grad_norm": 1.5232720152429782, + "learning_rate": 1.9832094072960172e-05, + "loss": 0.953, + "step": 5016 + }, + { + "epoch": 0.08669131877246337, + "grad_norm": 1.153630404859813, + "learning_rate": 1.98319919322391e-05, + "loss": 0.6237, + "step": 5017 + }, + { + "epoch": 0.08670859828587227, + "grad_norm": 1.020678146859171, + "learning_rate": 1.9831889760723477e-05, + "loss": 0.6113, + "step": 5018 + }, + { + "epoch": 0.08672587779928118, + "grad_norm": 1.1128729573016458, + "learning_rate": 1.983178755841363e-05, + "loss": 0.9256, + "step": 5019 + }, + { + "epoch": 0.08674315731269007, + "grad_norm": 1.0076222838666244, + "learning_rate": 1.983168532530988e-05, + "loss": 0.7887, + "step": 5020 + }, + { + "epoch": 0.08676043682609898, + "grad_norm": 0.7796287352943889, + "learning_rate": 1.983158306141254e-05, + "loss": 0.6594, + "step": 5021 + }, + { + "epoch": 0.08677771633950788, + "grad_norm": 0.8639242372786273, + "learning_rate": 1.9831480766721935e-05, + "loss": 0.6634, + "step": 5022 + }, + { + "epoch": 0.08679499585291678, + "grad_norm": 0.7981737289050361, + "learning_rate": 1.983137844123839e-05, + "loss": 0.8638, + "step": 5023 + }, + { + "epoch": 0.08681227536632569, + "grad_norm": 0.8796940116193798, + "learning_rate": 1.9831276084962218e-05, + "loss": 0.8678, + "step": 5024 + }, + { + "epoch": 0.08682955487973458, + "grad_norm": 0.6045065940849584, + "learning_rate": 1.983117369789374e-05, + "loss": 0.8384, + "step": 5025 + }, + { + "epoch": 0.08684683439314349, + "grad_norm": 0.9058334632568659, + "learning_rate": 1.983107128003328e-05, + "loss": 0.5871, + "step": 5026 + }, + { + "epoch": 0.0868641139065524, + "grad_norm": 1.0025230337546, + "learning_rate": 1.983096883138116e-05, + "loss": 0.6598, + "step": 5027 + }, + { + "epoch": 0.08688139341996129, + "grad_norm": 0.9974042597862952, + "learning_rate": 1.9830866351937694e-05, + "loss": 0.7989, + "step": 5028 + }, + { + "epoch": 0.0868986729333702, + "grad_norm": 1.2161276507249885, + "learning_rate": 1.9830763841703208e-05, + "loss": 0.768, + "step": 5029 + }, + { + "epoch": 0.0869159524467791, + "grad_norm": 1.261363220936917, + "learning_rate": 1.9830661300678024e-05, + "loss": 0.684, + "step": 5030 + }, + { + "epoch": 0.086933231960188, + "grad_norm": 0.6141560899286113, + "learning_rate": 1.9830558728862464e-05, + "loss": 0.6849, + "step": 5031 + }, + { + "epoch": 0.0869505114735969, + "grad_norm": 0.8776462736144117, + "learning_rate": 1.9830456126256846e-05, + "loss": 0.6677, + "step": 5032 + }, + { + "epoch": 0.0869677909870058, + "grad_norm": 1.0860685944380317, + "learning_rate": 1.9830353492861493e-05, + "loss": 0.7385, + "step": 5033 + }, + { + "epoch": 0.08698507050041471, + "grad_norm": 1.0736530088177805, + "learning_rate": 1.983025082867672e-05, + "loss": 0.5662, + "step": 5034 + }, + { + "epoch": 0.08700235001382361, + "grad_norm": 1.1027023932293407, + "learning_rate": 1.9830148133702856e-05, + "loss": 0.8702, + "step": 5035 + }, + { + "epoch": 0.08701962952723251, + "grad_norm": 0.7586223189268092, + "learning_rate": 1.9830045407940227e-05, + "loss": 0.6308, + "step": 5036 + }, + { + "epoch": 0.08703690904064142, + "grad_norm": 0.8636608183182664, + "learning_rate": 1.9829942651389146e-05, + "loss": 0.7121, + "step": 5037 + }, + { + "epoch": 0.08705418855405032, + "grad_norm": 1.215375460851813, + "learning_rate": 1.9829839864049935e-05, + "loss": 0.7511, + "step": 5038 + }, + { + "epoch": 0.08707146806745922, + "grad_norm": 1.0415534495194028, + "learning_rate": 1.982973704592292e-05, + "loss": 0.817, + "step": 5039 + }, + { + "epoch": 0.08708874758086813, + "grad_norm": 1.7840986908191976, + "learning_rate": 1.9829634197008422e-05, + "loss": 0.866, + "step": 5040 + }, + { + "epoch": 0.08710602709427702, + "grad_norm": 0.8704222298739068, + "learning_rate": 1.9829531317306764e-05, + "loss": 0.4456, + "step": 5041 + }, + { + "epoch": 0.08712330660768593, + "grad_norm": 0.9761077621642114, + "learning_rate": 1.9829428406818266e-05, + "loss": 0.7492, + "step": 5042 + }, + { + "epoch": 0.08714058612109483, + "grad_norm": 1.5356729777924163, + "learning_rate": 1.982932546554325e-05, + "loss": 0.7157, + "step": 5043 + }, + { + "epoch": 0.08715786563450373, + "grad_norm": 0.9198453949245635, + "learning_rate": 1.982922249348204e-05, + "loss": 0.879, + "step": 5044 + }, + { + "epoch": 0.08717514514791264, + "grad_norm": 1.3120802343196964, + "learning_rate": 1.9829119490634956e-05, + "loss": 0.8582, + "step": 5045 + }, + { + "epoch": 0.08719242466132154, + "grad_norm": 0.9661310544254328, + "learning_rate": 1.9829016457002325e-05, + "loss": 0.6488, + "step": 5046 + }, + { + "epoch": 0.08720970417473044, + "grad_norm": 1.090869285221321, + "learning_rate": 1.9828913392584468e-05, + "loss": 0.9551, + "step": 5047 + }, + { + "epoch": 0.08722698368813935, + "grad_norm": 0.9851458305872757, + "learning_rate": 1.9828810297381707e-05, + "loss": 0.7607, + "step": 5048 + }, + { + "epoch": 0.08724426320154824, + "grad_norm": 0.9180962034064183, + "learning_rate": 1.9828707171394367e-05, + "loss": 0.7677, + "step": 5049 + }, + { + "epoch": 0.08726154271495715, + "grad_norm": 0.8485675002883905, + "learning_rate": 1.9828604014622766e-05, + "loss": 0.7612, + "step": 5050 + }, + { + "epoch": 0.08727882222836605, + "grad_norm": 1.0185333330037192, + "learning_rate": 1.982850082706723e-05, + "loss": 0.6353, + "step": 5051 + }, + { + "epoch": 0.08729610174177495, + "grad_norm": 0.8148865285569604, + "learning_rate": 1.9828397608728085e-05, + "loss": 0.7297, + "step": 5052 + }, + { + "epoch": 0.08731338125518386, + "grad_norm": 0.9501977200752544, + "learning_rate": 1.982829435960565e-05, + "loss": 0.765, + "step": 5053 + }, + { + "epoch": 0.08733066076859276, + "grad_norm": 0.5063139102658137, + "learning_rate": 1.982819107970025e-05, + "loss": 0.8349, + "step": 5054 + }, + { + "epoch": 0.08734794028200166, + "grad_norm": 1.1809262695687706, + "learning_rate": 1.9828087769012212e-05, + "loss": 0.6173, + "step": 5055 + }, + { + "epoch": 0.08736521979541056, + "grad_norm": 0.8801171198903123, + "learning_rate": 1.9827984427541852e-05, + "loss": 0.7279, + "step": 5056 + }, + { + "epoch": 0.08738249930881946, + "grad_norm": 1.0817417338372866, + "learning_rate": 1.9827881055289498e-05, + "loss": 0.9053, + "step": 5057 + }, + { + "epoch": 0.08739977882222837, + "grad_norm": 1.230749453332046, + "learning_rate": 1.982777765225548e-05, + "loss": 0.8906, + "step": 5058 + }, + { + "epoch": 0.08741705833563727, + "grad_norm": 0.7832373823766792, + "learning_rate": 1.982767421844011e-05, + "loss": 0.7851, + "step": 5059 + }, + { + "epoch": 0.08743433784904617, + "grad_norm": 1.4524142624296672, + "learning_rate": 1.982757075384372e-05, + "loss": 0.742, + "step": 5060 + }, + { + "epoch": 0.08745161736245508, + "grad_norm": 1.2222681101667445, + "learning_rate": 1.9827467258466624e-05, + "loss": 0.9583, + "step": 5061 + }, + { + "epoch": 0.08746889687586397, + "grad_norm": 1.5440903379902948, + "learning_rate": 1.9827363732309163e-05, + "loss": 0.8353, + "step": 5062 + }, + { + "epoch": 0.08748617638927288, + "grad_norm": 1.106458045820822, + "learning_rate": 1.982726017537165e-05, + "loss": 0.7998, + "step": 5063 + }, + { + "epoch": 0.08750345590268178, + "grad_norm": 1.0099226042811376, + "learning_rate": 1.9827156587654408e-05, + "loss": 0.5807, + "step": 5064 + }, + { + "epoch": 0.08752073541609068, + "grad_norm": 0.8250600866233904, + "learning_rate": 1.9827052969157765e-05, + "loss": 0.647, + "step": 5065 + }, + { + "epoch": 0.08753801492949959, + "grad_norm": 0.9831226762925652, + "learning_rate": 1.9826949319882046e-05, + "loss": 0.7296, + "step": 5066 + }, + { + "epoch": 0.0875552944429085, + "grad_norm": 1.1372340310498303, + "learning_rate": 1.9826845639827578e-05, + "loss": 0.7239, + "step": 5067 + }, + { + "epoch": 0.08757257395631739, + "grad_norm": 1.280736088816927, + "learning_rate": 1.982674192899468e-05, + "loss": 1.0247, + "step": 5068 + }, + { + "epoch": 0.0875898534697263, + "grad_norm": 1.1661138299907698, + "learning_rate": 1.982663818738368e-05, + "loss": 0.886, + "step": 5069 + }, + { + "epoch": 0.08760713298313519, + "grad_norm": 0.9813474523377635, + "learning_rate": 1.98265344149949e-05, + "loss": 0.7053, + "step": 5070 + }, + { + "epoch": 0.0876244124965441, + "grad_norm": 0.8448417399034903, + "learning_rate": 1.9826430611828668e-05, + "loss": 0.561, + "step": 5071 + }, + { + "epoch": 0.087641692009953, + "grad_norm": 0.8965547410090631, + "learning_rate": 1.9826326777885312e-05, + "loss": 0.6502, + "step": 5072 + }, + { + "epoch": 0.0876589715233619, + "grad_norm": 1.1946106272432704, + "learning_rate": 1.9826222913165153e-05, + "loss": 0.8159, + "step": 5073 + }, + { + "epoch": 0.0876762510367708, + "grad_norm": 1.0374357462837327, + "learning_rate": 1.9826119017668516e-05, + "loss": 0.7151, + "step": 5074 + }, + { + "epoch": 0.08769353055017971, + "grad_norm": 0.9991395694801557, + "learning_rate": 1.9826015091395726e-05, + "loss": 0.6298, + "step": 5075 + }, + { + "epoch": 0.0877108100635886, + "grad_norm": 1.1532589845686227, + "learning_rate": 1.9825911134347113e-05, + "loss": 0.881, + "step": 5076 + }, + { + "epoch": 0.08772808957699751, + "grad_norm": 0.8882487275015356, + "learning_rate": 1.9825807146522997e-05, + "loss": 0.6563, + "step": 5077 + }, + { + "epoch": 0.08774536909040641, + "grad_norm": 0.8419854671209493, + "learning_rate": 1.9825703127923707e-05, + "loss": 0.65, + "step": 5078 + }, + { + "epoch": 0.08776264860381532, + "grad_norm": 1.0139025862802247, + "learning_rate": 1.982559907854957e-05, + "loss": 0.7072, + "step": 5079 + }, + { + "epoch": 0.08777992811722422, + "grad_norm": 0.7975220627039877, + "learning_rate": 1.9825494998400905e-05, + "loss": 0.7929, + "step": 5080 + }, + { + "epoch": 0.08779720763063312, + "grad_norm": 1.2343669580625285, + "learning_rate": 1.9825390887478046e-05, + "loss": 0.7275, + "step": 5081 + }, + { + "epoch": 0.08781448714404202, + "grad_norm": 0.5298565923108608, + "learning_rate": 1.9825286745781315e-05, + "loss": 0.5602, + "step": 5082 + }, + { + "epoch": 0.08783176665745093, + "grad_norm": 1.0852121027257549, + "learning_rate": 1.9825182573311044e-05, + "loss": 0.5475, + "step": 5083 + }, + { + "epoch": 0.08784904617085983, + "grad_norm": 0.9903286628919863, + "learning_rate": 1.982507837006755e-05, + "loss": 0.7953, + "step": 5084 + }, + { + "epoch": 0.08786632568426873, + "grad_norm": 0.9759380687167986, + "learning_rate": 1.9824974136051163e-05, + "loss": 0.6682, + "step": 5085 + }, + { + "epoch": 0.08788360519767763, + "grad_norm": 0.6636686982119097, + "learning_rate": 1.9824869871262212e-05, + "loss": 0.5341, + "step": 5086 + }, + { + "epoch": 0.08790088471108654, + "grad_norm": 0.8317082507959108, + "learning_rate": 1.9824765575701024e-05, + "loss": 0.7053, + "step": 5087 + }, + { + "epoch": 0.08791816422449544, + "grad_norm": 0.6299238378685937, + "learning_rate": 1.9824661249367923e-05, + "loss": 0.5232, + "step": 5088 + }, + { + "epoch": 0.08793544373790434, + "grad_norm": 0.9832814631263753, + "learning_rate": 1.9824556892263232e-05, + "loss": 0.7403, + "step": 5089 + }, + { + "epoch": 0.08795272325131324, + "grad_norm": 0.8981770756163214, + "learning_rate": 1.9824452504387285e-05, + "loss": 0.6588, + "step": 5090 + }, + { + "epoch": 0.08797000276472215, + "grad_norm": 1.093876173246968, + "learning_rate": 1.982434808574041e-05, + "loss": 0.7617, + "step": 5091 + }, + { + "epoch": 0.08798728227813105, + "grad_norm": 0.8574604636881876, + "learning_rate": 1.9824243636322924e-05, + "loss": 0.7093, + "step": 5092 + }, + { + "epoch": 0.08800456179153995, + "grad_norm": 1.2362715070752675, + "learning_rate": 1.9824139156135164e-05, + "loss": 0.8682, + "step": 5093 + }, + { + "epoch": 0.08802184130494885, + "grad_norm": 0.9643235517403073, + "learning_rate": 1.982403464517745e-05, + "loss": 0.6726, + "step": 5094 + }, + { + "epoch": 0.08803912081835776, + "grad_norm": 0.48891976191716435, + "learning_rate": 1.9823930103450116e-05, + "loss": 0.6952, + "step": 5095 + }, + { + "epoch": 0.08805640033176666, + "grad_norm": 0.8744996249329201, + "learning_rate": 1.9823825530953485e-05, + "loss": 0.6835, + "step": 5096 + }, + { + "epoch": 0.08807367984517556, + "grad_norm": 1.2069345357263448, + "learning_rate": 1.9823720927687885e-05, + "loss": 0.8772, + "step": 5097 + }, + { + "epoch": 0.08809095935858446, + "grad_norm": 0.7236554022555374, + "learning_rate": 1.9823616293653646e-05, + "loss": 0.6698, + "step": 5098 + }, + { + "epoch": 0.08810823887199336, + "grad_norm": 1.1673215540538868, + "learning_rate": 1.9823511628851095e-05, + "loss": 0.7918, + "step": 5099 + }, + { + "epoch": 0.08812551838540227, + "grad_norm": 1.1339568090365735, + "learning_rate": 1.9823406933280556e-05, + "loss": 0.8478, + "step": 5100 + }, + { + "epoch": 0.08814279789881117, + "grad_norm": 1.1360075855235852, + "learning_rate": 1.9823302206942366e-05, + "loss": 0.8351, + "step": 5101 + }, + { + "epoch": 0.08816007741222007, + "grad_norm": 1.0798124002315985, + "learning_rate": 1.9823197449836842e-05, + "loss": 0.9443, + "step": 5102 + }, + { + "epoch": 0.08817735692562897, + "grad_norm": 0.8157551648652119, + "learning_rate": 1.9823092661964317e-05, + "loss": 0.6524, + "step": 5103 + }, + { + "epoch": 0.08819463643903788, + "grad_norm": 1.0779025094920736, + "learning_rate": 1.9822987843325122e-05, + "loss": 0.8854, + "step": 5104 + }, + { + "epoch": 0.08821191595244678, + "grad_norm": 1.4361898232730346, + "learning_rate": 1.982288299391958e-05, + "loss": 0.6794, + "step": 5105 + }, + { + "epoch": 0.08822919546585568, + "grad_norm": 0.8902761127985176, + "learning_rate": 1.982277811374802e-05, + "loss": 0.661, + "step": 5106 + }, + { + "epoch": 0.08824647497926458, + "grad_norm": 0.7868430334304081, + "learning_rate": 1.9822673202810776e-05, + "loss": 0.6378, + "step": 5107 + }, + { + "epoch": 0.08826375449267349, + "grad_norm": 0.8523869202771637, + "learning_rate": 1.9822568261108173e-05, + "loss": 0.6354, + "step": 5108 + }, + { + "epoch": 0.08828103400608239, + "grad_norm": 0.959256895198271, + "learning_rate": 1.982246328864054e-05, + "loss": 0.8325, + "step": 5109 + }, + { + "epoch": 0.08829831351949129, + "grad_norm": 0.7430755494735274, + "learning_rate": 1.9822358285408202e-05, + "loss": 0.5618, + "step": 5110 + }, + { + "epoch": 0.0883155930329002, + "grad_norm": 0.9421431095061089, + "learning_rate": 1.9822253251411494e-05, + "loss": 0.9092, + "step": 5111 + }, + { + "epoch": 0.0883328725463091, + "grad_norm": 0.7108541312915277, + "learning_rate": 1.9822148186650743e-05, + "loss": 0.6021, + "step": 5112 + }, + { + "epoch": 0.088350152059718, + "grad_norm": 0.8236308058444407, + "learning_rate": 1.9822043091126276e-05, + "loss": 0.555, + "step": 5113 + }, + { + "epoch": 0.0883674315731269, + "grad_norm": 0.768335076903495, + "learning_rate": 1.9821937964838424e-05, + "loss": 0.699, + "step": 5114 + }, + { + "epoch": 0.0883847110865358, + "grad_norm": 0.7881384730035081, + "learning_rate": 1.9821832807787517e-05, + "loss": 0.5528, + "step": 5115 + }, + { + "epoch": 0.0884019905999447, + "grad_norm": 0.8301748887684355, + "learning_rate": 1.9821727619973884e-05, + "loss": 0.824, + "step": 5116 + }, + { + "epoch": 0.08841927011335361, + "grad_norm": 0.7629750167558474, + "learning_rate": 1.982162240139785e-05, + "loss": 0.7288, + "step": 5117 + }, + { + "epoch": 0.0884365496267625, + "grad_norm": 0.868812500886195, + "learning_rate": 1.982151715205975e-05, + "loss": 0.8507, + "step": 5118 + }, + { + "epoch": 0.08845382914017141, + "grad_norm": 0.7505858102893279, + "learning_rate": 1.9821411871959914e-05, + "loss": 0.5667, + "step": 5119 + }, + { + "epoch": 0.08847110865358032, + "grad_norm": 1.172945001048037, + "learning_rate": 1.9821306561098666e-05, + "loss": 0.9992, + "step": 5120 + }, + { + "epoch": 0.08848838816698922, + "grad_norm": 0.8680855042436558, + "learning_rate": 1.9821201219476342e-05, + "loss": 0.7084, + "step": 5121 + }, + { + "epoch": 0.08850566768039812, + "grad_norm": 1.213124167677981, + "learning_rate": 1.9821095847093268e-05, + "loss": 0.5103, + "step": 5122 + }, + { + "epoch": 0.08852294719380702, + "grad_norm": 0.6584162871087672, + "learning_rate": 1.9820990443949776e-05, + "loss": 0.5582, + "step": 5123 + }, + { + "epoch": 0.08854022670721592, + "grad_norm": 1.0601829490651358, + "learning_rate": 1.9820885010046195e-05, + "loss": 0.8134, + "step": 5124 + }, + { + "epoch": 0.08855750622062483, + "grad_norm": 0.9604284793497503, + "learning_rate": 1.9820779545382855e-05, + "loss": 0.4718, + "step": 5125 + }, + { + "epoch": 0.08857478573403373, + "grad_norm": 1.2661194547033607, + "learning_rate": 1.982067404996009e-05, + "loss": 0.9404, + "step": 5126 + }, + { + "epoch": 0.08859206524744263, + "grad_norm": 0.9954739864675218, + "learning_rate": 1.9820568523778226e-05, + "loss": 0.7475, + "step": 5127 + }, + { + "epoch": 0.08860934476085154, + "grad_norm": 1.2289535778773364, + "learning_rate": 1.9820462966837596e-05, + "loss": 0.6615, + "step": 5128 + }, + { + "epoch": 0.08862662427426043, + "grad_norm": 1.1764885498964646, + "learning_rate": 1.9820357379138527e-05, + "loss": 0.8917, + "step": 5129 + }, + { + "epoch": 0.08864390378766934, + "grad_norm": 0.9399186022846778, + "learning_rate": 1.9820251760681353e-05, + "loss": 0.7147, + "step": 5130 + }, + { + "epoch": 0.08866118330107824, + "grad_norm": 1.1510982390402043, + "learning_rate": 1.9820146111466405e-05, + "loss": 0.7985, + "step": 5131 + }, + { + "epoch": 0.08867846281448714, + "grad_norm": 0.8403594992407488, + "learning_rate": 1.9820040431494016e-05, + "loss": 0.5008, + "step": 5132 + }, + { + "epoch": 0.08869574232789605, + "grad_norm": 0.9819825986376426, + "learning_rate": 1.981993472076451e-05, + "loss": 0.7932, + "step": 5133 + }, + { + "epoch": 0.08871302184130495, + "grad_norm": 0.9384366198459823, + "learning_rate": 1.9819828979278225e-05, + "loss": 0.8007, + "step": 5134 + }, + { + "epoch": 0.08873030135471385, + "grad_norm": 0.7616823353661918, + "learning_rate": 1.9819723207035485e-05, + "loss": 0.6229, + "step": 5135 + }, + { + "epoch": 0.08874758086812275, + "grad_norm": 1.207252811016767, + "learning_rate": 1.981961740403663e-05, + "loss": 0.9288, + "step": 5136 + }, + { + "epoch": 0.08876486038153165, + "grad_norm": 0.8662714230873146, + "learning_rate": 1.9819511570281983e-05, + "loss": 0.643, + "step": 5137 + }, + { + "epoch": 0.08878213989494056, + "grad_norm": 1.2093084511497945, + "learning_rate": 1.9819405705771883e-05, + "loss": 0.5834, + "step": 5138 + }, + { + "epoch": 0.08879941940834946, + "grad_norm": 0.8861388684601919, + "learning_rate": 1.981929981050666e-05, + "loss": 0.7483, + "step": 5139 + }, + { + "epoch": 0.08881669892175836, + "grad_norm": 1.1285763220986653, + "learning_rate": 1.981919388448664e-05, + "loss": 0.7716, + "step": 5140 + }, + { + "epoch": 0.08883397843516727, + "grad_norm": 1.4069849752553716, + "learning_rate": 1.9819087927712157e-05, + "loss": 0.6687, + "step": 5141 + }, + { + "epoch": 0.08885125794857616, + "grad_norm": 0.9502015418558528, + "learning_rate": 1.9818981940183545e-05, + "loss": 0.9152, + "step": 5142 + }, + { + "epoch": 0.08886853746198507, + "grad_norm": 1.0233140938862895, + "learning_rate": 1.981887592190114e-05, + "loss": 0.6609, + "step": 5143 + }, + { + "epoch": 0.08888581697539397, + "grad_norm": 0.9363710175474704, + "learning_rate": 1.9818769872865266e-05, + "loss": 0.8915, + "step": 5144 + }, + { + "epoch": 0.08890309648880287, + "grad_norm": 0.9461663453610631, + "learning_rate": 1.9818663793076257e-05, + "loss": 0.6311, + "step": 5145 + }, + { + "epoch": 0.08892037600221178, + "grad_norm": 1.0228698556820266, + "learning_rate": 1.981855768253445e-05, + "loss": 0.6752, + "step": 5146 + }, + { + "epoch": 0.08893765551562068, + "grad_norm": 0.8847833625911123, + "learning_rate": 1.9818451541240175e-05, + "loss": 0.5001, + "step": 5147 + }, + { + "epoch": 0.08895493502902958, + "grad_norm": 0.9205257352463158, + "learning_rate": 1.981834536919376e-05, + "loss": 0.5864, + "step": 5148 + }, + { + "epoch": 0.08897221454243849, + "grad_norm": 0.8746053045081184, + "learning_rate": 1.981823916639554e-05, + "loss": 0.6056, + "step": 5149 + }, + { + "epoch": 0.08898949405584738, + "grad_norm": 0.5924120676866775, + "learning_rate": 1.9818132932845855e-05, + "loss": 0.3837, + "step": 5150 + }, + { + "epoch": 0.08900677356925629, + "grad_norm": 1.2436741345723457, + "learning_rate": 1.9818026668545026e-05, + "loss": 0.8289, + "step": 5151 + }, + { + "epoch": 0.08902405308266519, + "grad_norm": 1.1303603558179012, + "learning_rate": 1.9817920373493395e-05, + "loss": 0.8953, + "step": 5152 + }, + { + "epoch": 0.0890413325960741, + "grad_norm": 0.9108813780937993, + "learning_rate": 1.981781404769129e-05, + "loss": 0.4425, + "step": 5153 + }, + { + "epoch": 0.089058612109483, + "grad_norm": 1.3269090525754335, + "learning_rate": 1.9817707691139045e-05, + "loss": 0.8694, + "step": 5154 + }, + { + "epoch": 0.0890758916228919, + "grad_norm": 1.6003979235292858, + "learning_rate": 1.981760130383699e-05, + "loss": 0.8361, + "step": 5155 + }, + { + "epoch": 0.0890931711363008, + "grad_norm": 1.0981311054789962, + "learning_rate": 1.9817494885785465e-05, + "loss": 0.802, + "step": 5156 + }, + { + "epoch": 0.08911045064970971, + "grad_norm": 0.9379669289894405, + "learning_rate": 1.98173884369848e-05, + "loss": 0.7896, + "step": 5157 + }, + { + "epoch": 0.0891277301631186, + "grad_norm": 0.5776001990986105, + "learning_rate": 1.981728195743533e-05, + "loss": 0.6063, + "step": 5158 + }, + { + "epoch": 0.08914500967652751, + "grad_norm": 0.9979158791512863, + "learning_rate": 1.9817175447137386e-05, + "loss": 0.7317, + "step": 5159 + }, + { + "epoch": 0.0891622891899364, + "grad_norm": 0.9282315042272221, + "learning_rate": 1.9817068906091303e-05, + "loss": 0.6898, + "step": 5160 + }, + { + "epoch": 0.08917956870334531, + "grad_norm": 1.0496677866998485, + "learning_rate": 1.981696233429741e-05, + "loss": 0.8676, + "step": 5161 + }, + { + "epoch": 0.08919684821675422, + "grad_norm": 0.8749453074188126, + "learning_rate": 1.9816855731756046e-05, + "loss": 0.7054, + "step": 5162 + }, + { + "epoch": 0.08921412773016311, + "grad_norm": 0.7424355968884009, + "learning_rate": 1.9816749098467546e-05, + "loss": 0.6026, + "step": 5163 + }, + { + "epoch": 0.08923140724357202, + "grad_norm": 1.072149558434176, + "learning_rate": 1.981664243443224e-05, + "loss": 0.6227, + "step": 5164 + }, + { + "epoch": 0.08924868675698093, + "grad_norm": 1.0380862925133048, + "learning_rate": 1.9816535739650465e-05, + "loss": 0.7695, + "step": 5165 + }, + { + "epoch": 0.08926596627038982, + "grad_norm": 1.0195358367212153, + "learning_rate": 1.9816429014122555e-05, + "loss": 0.6139, + "step": 5166 + }, + { + "epoch": 0.08928324578379873, + "grad_norm": 0.8786695191401211, + "learning_rate": 1.9816322257848842e-05, + "loss": 0.5614, + "step": 5167 + }, + { + "epoch": 0.08930052529720763, + "grad_norm": 0.5756714907245183, + "learning_rate": 1.9816215470829663e-05, + "loss": 0.982, + "step": 5168 + }, + { + "epoch": 0.08931780481061653, + "grad_norm": 1.2162446612177107, + "learning_rate": 1.981610865306535e-05, + "loss": 0.4768, + "step": 5169 + }, + { + "epoch": 0.08933508432402544, + "grad_norm": 1.0565597373010698, + "learning_rate": 1.9816001804556238e-05, + "loss": 0.7066, + "step": 5170 + }, + { + "epoch": 0.08935236383743433, + "grad_norm": 1.0290564090562313, + "learning_rate": 1.9815894925302662e-05, + "loss": 0.7016, + "step": 5171 + }, + { + "epoch": 0.08936964335084324, + "grad_norm": 1.2168553009510503, + "learning_rate": 1.981578801530496e-05, + "loss": 0.9893, + "step": 5172 + }, + { + "epoch": 0.08938692286425214, + "grad_norm": 0.7551586382544399, + "learning_rate": 1.981568107456346e-05, + "loss": 0.8746, + "step": 5173 + }, + { + "epoch": 0.08940420237766104, + "grad_norm": 0.5312610970225126, + "learning_rate": 1.98155741030785e-05, + "loss": 0.665, + "step": 5174 + }, + { + "epoch": 0.08942148189106995, + "grad_norm": 1.1042824968549425, + "learning_rate": 1.9815467100850423e-05, + "loss": 0.7609, + "step": 5175 + }, + { + "epoch": 0.08943876140447884, + "grad_norm": 0.8753556610989502, + "learning_rate": 1.981536006787955e-05, + "loss": 0.7259, + "step": 5176 + }, + { + "epoch": 0.08945604091788775, + "grad_norm": 0.8091993526170798, + "learning_rate": 1.9815253004166228e-05, + "loss": 0.6265, + "step": 5177 + }, + { + "epoch": 0.08947332043129666, + "grad_norm": 1.2425877923483353, + "learning_rate": 1.9815145909710786e-05, + "loss": 0.8359, + "step": 5178 + }, + { + "epoch": 0.08949059994470555, + "grad_norm": 0.8080513099374195, + "learning_rate": 1.9815038784513564e-05, + "loss": 0.5071, + "step": 5179 + }, + { + "epoch": 0.08950787945811446, + "grad_norm": 0.5435284225273241, + "learning_rate": 1.981493162857489e-05, + "loss": 0.6004, + "step": 5180 + }, + { + "epoch": 0.08952515897152336, + "grad_norm": 0.7334620450307121, + "learning_rate": 1.9814824441895104e-05, + "loss": 0.4686, + "step": 5181 + }, + { + "epoch": 0.08954243848493226, + "grad_norm": 1.0897436749304383, + "learning_rate": 1.9814717224474544e-05, + "loss": 0.712, + "step": 5182 + }, + { + "epoch": 0.08955971799834117, + "grad_norm": 0.5177264283447419, + "learning_rate": 1.9814609976313543e-05, + "loss": 0.5591, + "step": 5183 + }, + { + "epoch": 0.08957699751175006, + "grad_norm": 0.9080496722076942, + "learning_rate": 1.9814502697412438e-05, + "loss": 0.6397, + "step": 5184 + }, + { + "epoch": 0.08959427702515897, + "grad_norm": 0.8640585233938213, + "learning_rate": 1.981439538777157e-05, + "loss": 0.7009, + "step": 5185 + }, + { + "epoch": 0.08961155653856788, + "grad_norm": 0.9672516325361272, + "learning_rate": 1.9814288047391265e-05, + "loss": 0.6765, + "step": 5186 + }, + { + "epoch": 0.08962883605197677, + "grad_norm": 0.889534485561657, + "learning_rate": 1.981418067627186e-05, + "loss": 0.5985, + "step": 5187 + }, + { + "epoch": 0.08964611556538568, + "grad_norm": 0.9164894079762256, + "learning_rate": 1.98140732744137e-05, + "loss": 0.6381, + "step": 5188 + }, + { + "epoch": 0.08966339507879457, + "grad_norm": 1.086645623302626, + "learning_rate": 1.9813965841817116e-05, + "loss": 0.6175, + "step": 5189 + }, + { + "epoch": 0.08968067459220348, + "grad_norm": 1.463873801739669, + "learning_rate": 1.9813858378482444e-05, + "loss": 0.9395, + "step": 5190 + }, + { + "epoch": 0.08969795410561239, + "grad_norm": 0.8526362911041189, + "learning_rate": 1.9813750884410024e-05, + "loss": 0.8841, + "step": 5191 + }, + { + "epoch": 0.08971523361902128, + "grad_norm": 0.9883259768945987, + "learning_rate": 1.9813643359600188e-05, + "loss": 0.7975, + "step": 5192 + }, + { + "epoch": 0.08973251313243019, + "grad_norm": 1.0262236131683162, + "learning_rate": 1.9813535804053278e-05, + "loss": 0.7539, + "step": 5193 + }, + { + "epoch": 0.0897497926458391, + "grad_norm": 0.7230196536037893, + "learning_rate": 1.9813428217769625e-05, + "loss": 0.6794, + "step": 5194 + }, + { + "epoch": 0.089767072159248, + "grad_norm": 0.813466274636546, + "learning_rate": 1.981332060074957e-05, + "loss": 0.6287, + "step": 5195 + }, + { + "epoch": 0.0897843516726569, + "grad_norm": 0.6898760660504121, + "learning_rate": 1.981321295299345e-05, + "loss": 0.7203, + "step": 5196 + }, + { + "epoch": 0.0898016311860658, + "grad_norm": 0.9804810391398263, + "learning_rate": 1.9813105274501598e-05, + "loss": 0.6408, + "step": 5197 + }, + { + "epoch": 0.0898189106994747, + "grad_norm": 0.8305125961668389, + "learning_rate": 1.981299756527436e-05, + "loss": 0.661, + "step": 5198 + }, + { + "epoch": 0.08983619021288361, + "grad_norm": 0.8766470515717879, + "learning_rate": 1.9812889825312066e-05, + "loss": 0.7045, + "step": 5199 + }, + { + "epoch": 0.0898534697262925, + "grad_norm": 0.9627360409235625, + "learning_rate": 1.9812782054615053e-05, + "loss": 0.8573, + "step": 5200 + }, + { + "epoch": 0.08987074923970141, + "grad_norm": 1.152614308198107, + "learning_rate": 1.981267425318366e-05, + "loss": 0.7647, + "step": 5201 + }, + { + "epoch": 0.08988802875311032, + "grad_norm": 0.9160991978298613, + "learning_rate": 1.981256642101823e-05, + "loss": 0.7541, + "step": 5202 + }, + { + "epoch": 0.08990530826651921, + "grad_norm": 0.5628412982864216, + "learning_rate": 1.9812458558119095e-05, + "loss": 0.727, + "step": 5203 + }, + { + "epoch": 0.08992258777992812, + "grad_norm": 1.050355816755554, + "learning_rate": 1.981235066448659e-05, + "loss": 0.7228, + "step": 5204 + }, + { + "epoch": 0.08993986729333701, + "grad_norm": 1.0796381808079183, + "learning_rate": 1.981224274012106e-05, + "loss": 0.7721, + "step": 5205 + }, + { + "epoch": 0.08995714680674592, + "grad_norm": 0.6267252944702156, + "learning_rate": 1.981213478502284e-05, + "loss": 0.6404, + "step": 5206 + }, + { + "epoch": 0.08997442632015483, + "grad_norm": 0.9909505738032458, + "learning_rate": 1.981202679919227e-05, + "loss": 0.8392, + "step": 5207 + }, + { + "epoch": 0.08999170583356372, + "grad_norm": 0.9035050302675559, + "learning_rate": 1.9811918782629683e-05, + "loss": 0.6105, + "step": 5208 + }, + { + "epoch": 0.09000898534697263, + "grad_norm": 0.9441877121170252, + "learning_rate": 1.9811810735335424e-05, + "loss": 0.7771, + "step": 5209 + }, + { + "epoch": 0.09002626486038154, + "grad_norm": 0.9423704416946028, + "learning_rate": 1.9811702657309823e-05, + "loss": 0.7659, + "step": 5210 + }, + { + "epoch": 0.09004354437379043, + "grad_norm": 0.5617871194778884, + "learning_rate": 1.9811594548553226e-05, + "loss": 0.8504, + "step": 5211 + }, + { + "epoch": 0.09006082388719934, + "grad_norm": 0.9444921348700931, + "learning_rate": 1.9811486409065973e-05, + "loss": 0.7697, + "step": 5212 + }, + { + "epoch": 0.09007810340060823, + "grad_norm": 1.155376622260576, + "learning_rate": 1.9811378238848395e-05, + "loss": 0.7782, + "step": 5213 + }, + { + "epoch": 0.09009538291401714, + "grad_norm": 0.9020909050703299, + "learning_rate": 1.9811270037900837e-05, + "loss": 0.6826, + "step": 5214 + }, + { + "epoch": 0.09011266242742605, + "grad_norm": 0.8857812872789353, + "learning_rate": 1.9811161806223636e-05, + "loss": 0.5139, + "step": 5215 + }, + { + "epoch": 0.09012994194083494, + "grad_norm": 1.0642577000485938, + "learning_rate": 1.981105354381713e-05, + "loss": 0.7723, + "step": 5216 + }, + { + "epoch": 0.09014722145424385, + "grad_norm": 1.218806506853216, + "learning_rate": 1.9810945250681657e-05, + "loss": 0.6951, + "step": 5217 + }, + { + "epoch": 0.09016450096765274, + "grad_norm": 0.913366186568559, + "learning_rate": 1.9810836926817558e-05, + "loss": 0.595, + "step": 5218 + }, + { + "epoch": 0.09018178048106165, + "grad_norm": 0.8748450123781093, + "learning_rate": 1.9810728572225172e-05, + "loss": 0.7002, + "step": 5219 + }, + { + "epoch": 0.09019905999447056, + "grad_norm": 0.8724963928187739, + "learning_rate": 1.9810620186904838e-05, + "loss": 0.5816, + "step": 5220 + }, + { + "epoch": 0.09021633950787945, + "grad_norm": 0.8971317575892376, + "learning_rate": 1.98105117708569e-05, + "loss": 0.6018, + "step": 5221 + }, + { + "epoch": 0.09023361902128836, + "grad_norm": 1.3074832537052636, + "learning_rate": 1.9810403324081688e-05, + "loss": 0.8398, + "step": 5222 + }, + { + "epoch": 0.09025089853469727, + "grad_norm": 0.8841746565764307, + "learning_rate": 1.9810294846579554e-05, + "loss": 0.7308, + "step": 5223 + }, + { + "epoch": 0.09026817804810616, + "grad_norm": 1.6993929343594234, + "learning_rate": 1.9810186338350825e-05, + "loss": 0.9907, + "step": 5224 + }, + { + "epoch": 0.09028545756151507, + "grad_norm": 0.9899313714468235, + "learning_rate": 1.9810077799395847e-05, + "loss": 0.5859, + "step": 5225 + }, + { + "epoch": 0.09030273707492396, + "grad_norm": 1.1210507417082505, + "learning_rate": 1.9809969229714963e-05, + "loss": 0.9935, + "step": 5226 + }, + { + "epoch": 0.09032001658833287, + "grad_norm": 1.1519915546003017, + "learning_rate": 1.9809860629308507e-05, + "loss": 0.8439, + "step": 5227 + }, + { + "epoch": 0.09033729610174178, + "grad_norm": 0.8117934239389981, + "learning_rate": 1.9809751998176826e-05, + "loss": 0.5889, + "step": 5228 + }, + { + "epoch": 0.09035457561515067, + "grad_norm": 0.6152580087554357, + "learning_rate": 1.980964333632025e-05, + "loss": 0.3951, + "step": 5229 + }, + { + "epoch": 0.09037185512855958, + "grad_norm": 1.337371453986765, + "learning_rate": 1.980953464373913e-05, + "loss": 0.6442, + "step": 5230 + }, + { + "epoch": 0.09038913464196849, + "grad_norm": 1.026627495403736, + "learning_rate": 1.9809425920433806e-05, + "loss": 0.5509, + "step": 5231 + }, + { + "epoch": 0.09040641415537738, + "grad_norm": 0.6679019364876274, + "learning_rate": 1.9809317166404606e-05, + "loss": 0.5539, + "step": 5232 + }, + { + "epoch": 0.09042369366878629, + "grad_norm": 1.1929706681915553, + "learning_rate": 1.9809208381651882e-05, + "loss": 0.9053, + "step": 5233 + }, + { + "epoch": 0.09044097318219518, + "grad_norm": 1.1076588879180953, + "learning_rate": 1.9809099566175975e-05, + "loss": 0.8064, + "step": 5234 + }, + { + "epoch": 0.09045825269560409, + "grad_norm": 0.926742359419167, + "learning_rate": 1.980899071997722e-05, + "loss": 0.6676, + "step": 5235 + }, + { + "epoch": 0.090475532209013, + "grad_norm": 0.9811403431364112, + "learning_rate": 1.980888184305596e-05, + "loss": 0.7891, + "step": 5236 + }, + { + "epoch": 0.09049281172242189, + "grad_norm": 1.07867705664439, + "learning_rate": 1.980877293541254e-05, + "loss": 0.8973, + "step": 5237 + }, + { + "epoch": 0.0905100912358308, + "grad_norm": 0.8675429079605129, + "learning_rate": 1.9808663997047294e-05, + "loss": 0.6236, + "step": 5238 + }, + { + "epoch": 0.09052737074923971, + "grad_norm": 0.9535582593710894, + "learning_rate": 1.980855502796057e-05, + "loss": 0.9011, + "step": 5239 + }, + { + "epoch": 0.0905446502626486, + "grad_norm": 1.2667581714702432, + "learning_rate": 1.9808446028152706e-05, + "loss": 0.7862, + "step": 5240 + }, + { + "epoch": 0.09056192977605751, + "grad_norm": 1.3066141569361032, + "learning_rate": 1.980833699762404e-05, + "loss": 0.8447, + "step": 5241 + }, + { + "epoch": 0.0905792092894664, + "grad_norm": 1.1296427333110024, + "learning_rate": 1.9808227936374918e-05, + "loss": 0.559, + "step": 5242 + }, + { + "epoch": 0.09059648880287531, + "grad_norm": 0.6012996967577727, + "learning_rate": 1.9808118844405685e-05, + "loss": 1.0002, + "step": 5243 + }, + { + "epoch": 0.09061376831628422, + "grad_norm": 0.9716953651092799, + "learning_rate": 1.9808009721716674e-05, + "loss": 0.5529, + "step": 5244 + }, + { + "epoch": 0.09063104782969311, + "grad_norm": 1.2039209306950056, + "learning_rate": 1.9807900568308235e-05, + "loss": 0.7535, + "step": 5245 + }, + { + "epoch": 0.09064832734310202, + "grad_norm": 0.6430336754843694, + "learning_rate": 1.98077913841807e-05, + "loss": 0.6366, + "step": 5246 + }, + { + "epoch": 0.09066560685651093, + "grad_norm": 0.621574459355577, + "learning_rate": 1.9807682169334423e-05, + "loss": 0.6557, + "step": 5247 + }, + { + "epoch": 0.09068288636991982, + "grad_norm": 1.0186371408407247, + "learning_rate": 1.9807572923769738e-05, + "loss": 0.5821, + "step": 5248 + }, + { + "epoch": 0.09070016588332873, + "grad_norm": 0.9325456835267739, + "learning_rate": 1.980746364748699e-05, + "loss": 0.6704, + "step": 5249 + }, + { + "epoch": 0.09071744539673762, + "grad_norm": 1.2418760180177273, + "learning_rate": 1.980735434048652e-05, + "loss": 0.9217, + "step": 5250 + }, + { + "epoch": 0.09073472491014653, + "grad_norm": 1.0248959422832467, + "learning_rate": 1.9807245002768668e-05, + "loss": 0.8108, + "step": 5251 + }, + { + "epoch": 0.09075200442355544, + "grad_norm": 1.0966784561753138, + "learning_rate": 1.9807135634333783e-05, + "loss": 0.6899, + "step": 5252 + }, + { + "epoch": 0.09076928393696433, + "grad_norm": 0.8110213065303471, + "learning_rate": 1.9807026235182204e-05, + "loss": 0.4789, + "step": 5253 + }, + { + "epoch": 0.09078656345037324, + "grad_norm": 0.8724370248350785, + "learning_rate": 1.9806916805314273e-05, + "loss": 0.7641, + "step": 5254 + }, + { + "epoch": 0.09080384296378213, + "grad_norm": 1.004152834226837, + "learning_rate": 1.980680734473033e-05, + "loss": 0.7621, + "step": 5255 + }, + { + "epoch": 0.09082112247719104, + "grad_norm": 0.8842706587297622, + "learning_rate": 1.9806697853430722e-05, + "loss": 0.6777, + "step": 5256 + }, + { + "epoch": 0.09083840199059995, + "grad_norm": 1.037180625733436, + "learning_rate": 1.9806588331415794e-05, + "loss": 0.7444, + "step": 5257 + }, + { + "epoch": 0.09085568150400884, + "grad_norm": 1.1642199833790678, + "learning_rate": 1.9806478778685883e-05, + "loss": 0.8287, + "step": 5258 + }, + { + "epoch": 0.09087296101741775, + "grad_norm": 1.0560449817083994, + "learning_rate": 1.980636919524134e-05, + "loss": 0.6532, + "step": 5259 + }, + { + "epoch": 0.09089024053082666, + "grad_norm": 0.8070120433422101, + "learning_rate": 1.98062595810825e-05, + "loss": 0.857, + "step": 5260 + }, + { + "epoch": 0.09090752004423555, + "grad_norm": 0.8723577293806272, + "learning_rate": 1.9806149936209708e-05, + "loss": 0.7241, + "step": 5261 + }, + { + "epoch": 0.09092479955764446, + "grad_norm": 0.9973745375295195, + "learning_rate": 1.9806040260623313e-05, + "loss": 0.9163, + "step": 5262 + }, + { + "epoch": 0.09094207907105335, + "grad_norm": 0.7520570872553775, + "learning_rate": 1.9805930554323655e-05, + "loss": 0.6507, + "step": 5263 + }, + { + "epoch": 0.09095935858446226, + "grad_norm": 0.9793558932576888, + "learning_rate": 1.9805820817311073e-05, + "loss": 0.6863, + "step": 5264 + }, + { + "epoch": 0.09097663809787117, + "grad_norm": 0.7814234111965833, + "learning_rate": 1.9805711049585918e-05, + "loss": 0.7507, + "step": 5265 + }, + { + "epoch": 0.09099391761128006, + "grad_norm": 1.0798832662107587, + "learning_rate": 1.980560125114853e-05, + "loss": 0.8449, + "step": 5266 + }, + { + "epoch": 0.09101119712468897, + "grad_norm": 0.8028106468399939, + "learning_rate": 1.9805491421999252e-05, + "loss": 0.6816, + "step": 5267 + }, + { + "epoch": 0.09102847663809788, + "grad_norm": 0.8445190233377304, + "learning_rate": 1.980538156213843e-05, + "loss": 0.8234, + "step": 5268 + }, + { + "epoch": 0.09104575615150677, + "grad_norm": 0.8663661304518034, + "learning_rate": 1.9805271671566412e-05, + "loss": 0.6464, + "step": 5269 + }, + { + "epoch": 0.09106303566491568, + "grad_norm": 0.8622542010290088, + "learning_rate": 1.9805161750283534e-05, + "loss": 0.4714, + "step": 5270 + }, + { + "epoch": 0.09108031517832457, + "grad_norm": 1.2154736052671364, + "learning_rate": 1.9805051798290145e-05, + "loss": 0.9933, + "step": 5271 + }, + { + "epoch": 0.09109759469173348, + "grad_norm": 1.001297162352371, + "learning_rate": 1.9804941815586588e-05, + "loss": 0.714, + "step": 5272 + }, + { + "epoch": 0.09111487420514239, + "grad_norm": 1.0524051675565158, + "learning_rate": 1.9804831802173207e-05, + "loss": 0.668, + "step": 5273 + }, + { + "epoch": 0.09113215371855128, + "grad_norm": 1.3403440425971387, + "learning_rate": 1.9804721758050352e-05, + "loss": 0.7153, + "step": 5274 + }, + { + "epoch": 0.09114943323196019, + "grad_norm": 0.6690454756218296, + "learning_rate": 1.980461168321836e-05, + "loss": 0.7123, + "step": 5275 + }, + { + "epoch": 0.0911667127453691, + "grad_norm": 1.0286016627736836, + "learning_rate": 1.980450157767758e-05, + "loss": 0.8561, + "step": 5276 + }, + { + "epoch": 0.09118399225877799, + "grad_norm": 0.9027833450220683, + "learning_rate": 1.9804391441428356e-05, + "loss": 0.6684, + "step": 5277 + }, + { + "epoch": 0.0912012717721869, + "grad_norm": 1.1803104907395299, + "learning_rate": 1.9804281274471032e-05, + "loss": 0.9047, + "step": 5278 + }, + { + "epoch": 0.09121855128559579, + "grad_norm": 1.0006545051589648, + "learning_rate": 1.980417107680596e-05, + "loss": 0.6921, + "step": 5279 + }, + { + "epoch": 0.0912358307990047, + "grad_norm": 1.2025771244269807, + "learning_rate": 1.9804060848433472e-05, + "loss": 0.8052, + "step": 5280 + }, + { + "epoch": 0.09125311031241361, + "grad_norm": 0.9450498057231136, + "learning_rate": 1.980395058935392e-05, + "loss": 0.6816, + "step": 5281 + }, + { + "epoch": 0.0912703898258225, + "grad_norm": 1.0882969533226852, + "learning_rate": 1.980384029956765e-05, + "loss": 0.8075, + "step": 5282 + }, + { + "epoch": 0.09128766933923141, + "grad_norm": 1.3280927172326724, + "learning_rate": 1.9803729979075013e-05, + "loss": 0.7613, + "step": 5283 + }, + { + "epoch": 0.09130494885264032, + "grad_norm": 0.9751779750219502, + "learning_rate": 1.9803619627876345e-05, + "loss": 0.8884, + "step": 5284 + }, + { + "epoch": 0.09132222836604921, + "grad_norm": 1.0373504642663771, + "learning_rate": 1.9803509245971994e-05, + "loss": 0.7071, + "step": 5285 + }, + { + "epoch": 0.09133950787945812, + "grad_norm": 1.0624577468584941, + "learning_rate": 1.980339883336231e-05, + "loss": 0.705, + "step": 5286 + }, + { + "epoch": 0.09135678739286701, + "grad_norm": 0.7249081727815472, + "learning_rate": 1.980328839004763e-05, + "loss": 0.4125, + "step": 5287 + }, + { + "epoch": 0.09137406690627592, + "grad_norm": 0.8236496347369191, + "learning_rate": 1.9803177916028308e-05, + "loss": 0.6228, + "step": 5288 + }, + { + "epoch": 0.09139134641968483, + "grad_norm": 0.9468807029949023, + "learning_rate": 1.980306741130469e-05, + "loss": 0.6445, + "step": 5289 + }, + { + "epoch": 0.09140862593309372, + "grad_norm": 0.7799338824422655, + "learning_rate": 1.9802956875877122e-05, + "loss": 0.5416, + "step": 5290 + }, + { + "epoch": 0.09142590544650263, + "grad_norm": 0.7471513486010422, + "learning_rate": 1.9802846309745944e-05, + "loss": 0.699, + "step": 5291 + }, + { + "epoch": 0.09144318495991152, + "grad_norm": 1.019682046948652, + "learning_rate": 1.9802735712911506e-05, + "loss": 0.6795, + "step": 5292 + }, + { + "epoch": 0.09146046447332043, + "grad_norm": 0.7243447116910752, + "learning_rate": 1.9802625085374156e-05, + "loss": 0.5989, + "step": 5293 + }, + { + "epoch": 0.09147774398672934, + "grad_norm": 0.9058032370123807, + "learning_rate": 1.9802514427134243e-05, + "loss": 0.5688, + "step": 5294 + }, + { + "epoch": 0.09149502350013823, + "grad_norm": 0.9872668116712617, + "learning_rate": 1.9802403738192107e-05, + "loss": 0.7907, + "step": 5295 + }, + { + "epoch": 0.09151230301354714, + "grad_norm": 0.9454572254041578, + "learning_rate": 1.9802293018548096e-05, + "loss": 0.6049, + "step": 5296 + }, + { + "epoch": 0.09152958252695605, + "grad_norm": 0.9679490130325693, + "learning_rate": 1.980218226820256e-05, + "loss": 0.6547, + "step": 5297 + }, + { + "epoch": 0.09154686204036494, + "grad_norm": 0.9722599988745401, + "learning_rate": 1.980207148715584e-05, + "loss": 0.7982, + "step": 5298 + }, + { + "epoch": 0.09156414155377385, + "grad_norm": 1.1213825514755504, + "learning_rate": 1.9801960675408295e-05, + "loss": 0.7052, + "step": 5299 + }, + { + "epoch": 0.09158142106718274, + "grad_norm": 0.8904434667319147, + "learning_rate": 1.9801849832960255e-05, + "loss": 0.5889, + "step": 5300 + }, + { + "epoch": 0.09159870058059165, + "grad_norm": 0.4810097483500218, + "learning_rate": 1.9801738959812082e-05, + "loss": 0.5748, + "step": 5301 + }, + { + "epoch": 0.09161598009400056, + "grad_norm": 1.0045431590809488, + "learning_rate": 1.980162805596412e-05, + "loss": 0.6452, + "step": 5302 + }, + { + "epoch": 0.09163325960740945, + "grad_norm": 0.807603647486478, + "learning_rate": 1.9801517121416708e-05, + "loss": 0.5626, + "step": 5303 + }, + { + "epoch": 0.09165053912081836, + "grad_norm": 0.8584298978267637, + "learning_rate": 1.9801406156170205e-05, + "loss": 0.6096, + "step": 5304 + }, + { + "epoch": 0.09166781863422727, + "grad_norm": 0.5170760140419213, + "learning_rate": 1.9801295160224948e-05, + "loss": 0.8669, + "step": 5305 + }, + { + "epoch": 0.09168509814763616, + "grad_norm": 0.9423511560694617, + "learning_rate": 1.9801184133581293e-05, + "loss": 0.6376, + "step": 5306 + }, + { + "epoch": 0.09170237766104507, + "grad_norm": 0.8971919926225581, + "learning_rate": 1.9801073076239584e-05, + "loss": 0.6547, + "step": 5307 + }, + { + "epoch": 0.09171965717445396, + "grad_norm": 0.858087319730353, + "learning_rate": 1.9800961988200172e-05, + "loss": 0.705, + "step": 5308 + }, + { + "epoch": 0.09173693668786287, + "grad_norm": 1.2710375793839, + "learning_rate": 1.9800850869463398e-05, + "loss": 0.7096, + "step": 5309 + }, + { + "epoch": 0.09175421620127178, + "grad_norm": 1.0606511750790222, + "learning_rate": 1.9800739720029615e-05, + "loss": 0.7953, + "step": 5310 + }, + { + "epoch": 0.09177149571468067, + "grad_norm": 0.5864293942452262, + "learning_rate": 1.9800628539899168e-05, + "loss": 0.6082, + "step": 5311 + }, + { + "epoch": 0.09178877522808958, + "grad_norm": 0.5932114773537095, + "learning_rate": 1.9800517329072413e-05, + "loss": 0.6104, + "step": 5312 + }, + { + "epoch": 0.09180605474149849, + "grad_norm": 1.2248618078512552, + "learning_rate": 1.9800406087549692e-05, + "loss": 0.8557, + "step": 5313 + }, + { + "epoch": 0.09182333425490738, + "grad_norm": 1.002328658074323, + "learning_rate": 1.9800294815331348e-05, + "loss": 0.7848, + "step": 5314 + }, + { + "epoch": 0.09184061376831629, + "grad_norm": 0.8098040029705187, + "learning_rate": 1.9800183512417742e-05, + "loss": 0.5884, + "step": 5315 + }, + { + "epoch": 0.09185789328172518, + "grad_norm": 0.9929620644107441, + "learning_rate": 1.9800072178809215e-05, + "loss": 0.9082, + "step": 5316 + }, + { + "epoch": 0.09187517279513409, + "grad_norm": 0.9701173449679336, + "learning_rate": 1.9799960814506117e-05, + "loss": 0.7011, + "step": 5317 + }, + { + "epoch": 0.091892452308543, + "grad_norm": 0.9722293946825614, + "learning_rate": 1.9799849419508795e-05, + "loss": 0.6847, + "step": 5318 + }, + { + "epoch": 0.09190973182195189, + "grad_norm": 0.7639082915736319, + "learning_rate": 1.9799737993817605e-05, + "loss": 0.5531, + "step": 5319 + }, + { + "epoch": 0.0919270113353608, + "grad_norm": 1.0593698558261977, + "learning_rate": 1.9799626537432888e-05, + "loss": 0.7048, + "step": 5320 + }, + { + "epoch": 0.0919442908487697, + "grad_norm": 0.942620976687859, + "learning_rate": 1.9799515050354998e-05, + "loss": 0.9459, + "step": 5321 + }, + { + "epoch": 0.0919615703621786, + "grad_norm": 0.9181081126934316, + "learning_rate": 1.9799403532584275e-05, + "loss": 0.6891, + "step": 5322 + }, + { + "epoch": 0.0919788498755875, + "grad_norm": 0.5838864234005781, + "learning_rate": 1.979929198412108e-05, + "loss": 0.3647, + "step": 5323 + }, + { + "epoch": 0.0919961293889964, + "grad_norm": 1.2625824630400064, + "learning_rate": 1.979918040496576e-05, + "loss": 0.8926, + "step": 5324 + }, + { + "epoch": 0.09201340890240531, + "grad_norm": 0.7878076031446212, + "learning_rate": 1.9799068795118658e-05, + "loss": 0.7655, + "step": 5325 + }, + { + "epoch": 0.09203068841581422, + "grad_norm": 1.0336527201603145, + "learning_rate": 1.9798957154580132e-05, + "loss": 0.7822, + "step": 5326 + }, + { + "epoch": 0.09204796792922311, + "grad_norm": 0.8513804393795432, + "learning_rate": 1.9798845483350526e-05, + "loss": 0.6418, + "step": 5327 + }, + { + "epoch": 0.09206524744263202, + "grad_norm": 0.9728432407397748, + "learning_rate": 1.979873378143019e-05, + "loss": 0.6912, + "step": 5328 + }, + { + "epoch": 0.09208252695604091, + "grad_norm": 0.9622327943463272, + "learning_rate": 1.9798622048819476e-05, + "loss": 0.8141, + "step": 5329 + }, + { + "epoch": 0.09209980646944982, + "grad_norm": 0.9853161348136977, + "learning_rate": 1.9798510285518734e-05, + "loss": 0.5268, + "step": 5330 + }, + { + "epoch": 0.09211708598285873, + "grad_norm": 0.8222345583717974, + "learning_rate": 1.979839849152831e-05, + "loss": 0.5223, + "step": 5331 + }, + { + "epoch": 0.09213436549626762, + "grad_norm": 0.9656045062128927, + "learning_rate": 1.979828666684856e-05, + "loss": 0.7089, + "step": 5332 + }, + { + "epoch": 0.09215164500967653, + "grad_norm": 1.0900597746231773, + "learning_rate": 1.9798174811479832e-05, + "loss": 0.9508, + "step": 5333 + }, + { + "epoch": 0.09216892452308544, + "grad_norm": 1.0455219002139664, + "learning_rate": 1.9798062925422474e-05, + "loss": 0.8356, + "step": 5334 + }, + { + "epoch": 0.09218620403649433, + "grad_norm": 1.2776362119833933, + "learning_rate": 1.9797951008676838e-05, + "loss": 0.8026, + "step": 5335 + }, + { + "epoch": 0.09220348354990324, + "grad_norm": 1.0888336917017, + "learning_rate": 1.979783906124328e-05, + "loss": 0.8059, + "step": 5336 + }, + { + "epoch": 0.09222076306331213, + "grad_norm": 1.0825460283236872, + "learning_rate": 1.979772708312214e-05, + "loss": 0.9193, + "step": 5337 + }, + { + "epoch": 0.09223804257672104, + "grad_norm": 0.9055539009887063, + "learning_rate": 1.9797615074313774e-05, + "loss": 0.6069, + "step": 5338 + }, + { + "epoch": 0.09225532209012995, + "grad_norm": 1.1331198362940387, + "learning_rate": 1.9797503034818536e-05, + "loss": 0.679, + "step": 5339 + }, + { + "epoch": 0.09227260160353884, + "grad_norm": 1.132204492034364, + "learning_rate": 1.979739096463677e-05, + "loss": 0.6571, + "step": 5340 + }, + { + "epoch": 0.09228988111694775, + "grad_norm": 0.8270453959956692, + "learning_rate": 1.9797278863768835e-05, + "loss": 0.6137, + "step": 5341 + }, + { + "epoch": 0.09230716063035665, + "grad_norm": 1.1646586550259683, + "learning_rate": 1.9797166732215078e-05, + "loss": 0.7608, + "step": 5342 + }, + { + "epoch": 0.09232444014376555, + "grad_norm": 1.0645036681317432, + "learning_rate": 1.9797054569975848e-05, + "loss": 0.6081, + "step": 5343 + }, + { + "epoch": 0.09234171965717446, + "grad_norm": 1.1458824676888049, + "learning_rate": 1.9796942377051497e-05, + "loss": 0.7228, + "step": 5344 + }, + { + "epoch": 0.09235899917058335, + "grad_norm": 1.0274302624204112, + "learning_rate": 1.9796830153442382e-05, + "loss": 0.8245, + "step": 5345 + }, + { + "epoch": 0.09237627868399226, + "grad_norm": 0.9125209465690713, + "learning_rate": 1.979671789914885e-05, + "loss": 0.5664, + "step": 5346 + }, + { + "epoch": 0.09239355819740117, + "grad_norm": 0.9372719831722313, + "learning_rate": 1.979660561417125e-05, + "loss": 0.5988, + "step": 5347 + }, + { + "epoch": 0.09241083771081006, + "grad_norm": 0.8496883297297734, + "learning_rate": 1.979649329850994e-05, + "loss": 0.7552, + "step": 5348 + }, + { + "epoch": 0.09242811722421897, + "grad_norm": 1.049881448953914, + "learning_rate": 1.9796380952165265e-05, + "loss": 0.7017, + "step": 5349 + }, + { + "epoch": 0.09244539673762787, + "grad_norm": 0.8123822670586615, + "learning_rate": 1.979626857513758e-05, + "loss": 0.6456, + "step": 5350 + }, + { + "epoch": 0.09246267625103677, + "grad_norm": 1.1998773945113277, + "learning_rate": 1.979615616742724e-05, + "loss": 0.9636, + "step": 5351 + }, + { + "epoch": 0.09247995576444568, + "grad_norm": 0.6487259607310811, + "learning_rate": 1.9796043729034596e-05, + "loss": 0.4905, + "step": 5352 + }, + { + "epoch": 0.09249723527785457, + "grad_norm": 1.1148083853989967, + "learning_rate": 1.9795931259959995e-05, + "loss": 0.7246, + "step": 5353 + }, + { + "epoch": 0.09251451479126348, + "grad_norm": 1.063723484962461, + "learning_rate": 1.9795818760203797e-05, + "loss": 0.6348, + "step": 5354 + }, + { + "epoch": 0.09253179430467239, + "grad_norm": 0.8109610477021287, + "learning_rate": 1.9795706229766344e-05, + "loss": 0.5856, + "step": 5355 + }, + { + "epoch": 0.09254907381808128, + "grad_norm": 1.045005125913634, + "learning_rate": 1.9795593668648e-05, + "loss": 0.8525, + "step": 5356 + }, + { + "epoch": 0.09256635333149019, + "grad_norm": 0.6982363857907969, + "learning_rate": 1.979548107684911e-05, + "loss": 0.574, + "step": 5357 + }, + { + "epoch": 0.0925836328448991, + "grad_norm": 1.1898979652337733, + "learning_rate": 1.9795368454370028e-05, + "loss": 0.8139, + "step": 5358 + }, + { + "epoch": 0.09260091235830799, + "grad_norm": 0.9005837753526036, + "learning_rate": 1.9795255801211108e-05, + "loss": 0.631, + "step": 5359 + }, + { + "epoch": 0.0926181918717169, + "grad_norm": 0.8925968920681786, + "learning_rate": 1.9795143117372702e-05, + "loss": 0.6706, + "step": 5360 + }, + { + "epoch": 0.09263547138512579, + "grad_norm": 0.8492507302792717, + "learning_rate": 1.9795030402855166e-05, + "loss": 0.5939, + "step": 5361 + }, + { + "epoch": 0.0926527508985347, + "grad_norm": 1.0660259241763146, + "learning_rate": 1.979491765765885e-05, + "loss": 0.8961, + "step": 5362 + }, + { + "epoch": 0.0926700304119436, + "grad_norm": 1.219762995194466, + "learning_rate": 1.9794804881784103e-05, + "loss": 0.7066, + "step": 5363 + }, + { + "epoch": 0.0926873099253525, + "grad_norm": 0.7636244531738053, + "learning_rate": 1.979469207523129e-05, + "loss": 0.6608, + "step": 5364 + }, + { + "epoch": 0.0927045894387614, + "grad_norm": 1.167131152910161, + "learning_rate": 1.9794579238000752e-05, + "loss": 0.8654, + "step": 5365 + }, + { + "epoch": 0.09272186895217031, + "grad_norm": 0.6384580865941509, + "learning_rate": 1.9794466370092847e-05, + "loss": 0.3702, + "step": 5366 + }, + { + "epoch": 0.09273914846557921, + "grad_norm": 1.0078732156545993, + "learning_rate": 1.9794353471507934e-05, + "loss": 0.8923, + "step": 5367 + }, + { + "epoch": 0.09275642797898812, + "grad_norm": 0.9643918095306523, + "learning_rate": 1.9794240542246356e-05, + "loss": 0.9294, + "step": 5368 + }, + { + "epoch": 0.09277370749239701, + "grad_norm": 1.0155845712728955, + "learning_rate": 1.9794127582308474e-05, + "loss": 0.6119, + "step": 5369 + }, + { + "epoch": 0.09279098700580592, + "grad_norm": 0.9478817370115005, + "learning_rate": 1.979401459169464e-05, + "loss": 0.6451, + "step": 5370 + }, + { + "epoch": 0.09280826651921482, + "grad_norm": 0.8032048010000318, + "learning_rate": 1.979390157040521e-05, + "loss": 0.8295, + "step": 5371 + }, + { + "epoch": 0.09282554603262372, + "grad_norm": 0.9799989287176613, + "learning_rate": 1.9793788518440533e-05, + "loss": 0.6121, + "step": 5372 + }, + { + "epoch": 0.09284282554603263, + "grad_norm": 0.6635027241495314, + "learning_rate": 1.979367543580097e-05, + "loss": 0.4678, + "step": 5373 + }, + { + "epoch": 0.09286010505944152, + "grad_norm": 0.8621225156257105, + "learning_rate": 1.979356232248687e-05, + "loss": 0.7472, + "step": 5374 + }, + { + "epoch": 0.09287738457285043, + "grad_norm": 0.928232665753061, + "learning_rate": 1.9793449178498583e-05, + "loss": 0.7434, + "step": 5375 + }, + { + "epoch": 0.09289466408625933, + "grad_norm": 0.9740275659471558, + "learning_rate": 1.9793336003836474e-05, + "loss": 0.7232, + "step": 5376 + }, + { + "epoch": 0.09291194359966823, + "grad_norm": 0.7898900466801344, + "learning_rate": 1.9793222798500892e-05, + "loss": 0.621, + "step": 5377 + }, + { + "epoch": 0.09292922311307714, + "grad_norm": 0.9213410878574999, + "learning_rate": 1.979310956249219e-05, + "loss": 0.715, + "step": 5378 + }, + { + "epoch": 0.09294650262648604, + "grad_norm": 1.0460280716662473, + "learning_rate": 1.979299629581073e-05, + "loss": 0.8112, + "step": 5379 + }, + { + "epoch": 0.09296378213989494, + "grad_norm": 0.8926138332077606, + "learning_rate": 1.9792882998456853e-05, + "loss": 0.6355, + "step": 5380 + }, + { + "epoch": 0.09298106165330385, + "grad_norm": 0.9880217545739927, + "learning_rate": 1.9792769670430928e-05, + "loss": 0.888, + "step": 5381 + }, + { + "epoch": 0.09299834116671274, + "grad_norm": 1.0416598031875504, + "learning_rate": 1.9792656311733306e-05, + "loss": 0.792, + "step": 5382 + }, + { + "epoch": 0.09301562068012165, + "grad_norm": 0.8285646605892513, + "learning_rate": 1.9792542922364336e-05, + "loss": 0.4711, + "step": 5383 + }, + { + "epoch": 0.09303290019353055, + "grad_norm": 0.8559556244414518, + "learning_rate": 1.9792429502324378e-05, + "loss": 0.5903, + "step": 5384 + }, + { + "epoch": 0.09305017970693945, + "grad_norm": 0.8255217320364827, + "learning_rate": 1.9792316051613787e-05, + "loss": 0.5445, + "step": 5385 + }, + { + "epoch": 0.09306745922034836, + "grad_norm": 1.2350448029101089, + "learning_rate": 1.9792202570232916e-05, + "loss": 0.7475, + "step": 5386 + }, + { + "epoch": 0.09308473873375726, + "grad_norm": 0.8855900048557331, + "learning_rate": 1.9792089058182124e-05, + "loss": 0.7407, + "step": 5387 + }, + { + "epoch": 0.09310201824716616, + "grad_norm": 0.9495464239111648, + "learning_rate": 1.9791975515461766e-05, + "loss": 0.6206, + "step": 5388 + }, + { + "epoch": 0.09311929776057506, + "grad_norm": 0.8415193189650797, + "learning_rate": 1.9791861942072198e-05, + "loss": 0.8418, + "step": 5389 + }, + { + "epoch": 0.09313657727398396, + "grad_norm": 0.7181453083058857, + "learning_rate": 1.979174833801377e-05, + "loss": 0.5439, + "step": 5390 + }, + { + "epoch": 0.09315385678739287, + "grad_norm": 0.9957466511151678, + "learning_rate": 1.9791634703286843e-05, + "loss": 0.6654, + "step": 5391 + }, + { + "epoch": 0.09317113630080177, + "grad_norm": 1.0030350543775208, + "learning_rate": 1.9791521037891774e-05, + "loss": 0.8702, + "step": 5392 + }, + { + "epoch": 0.09318841581421067, + "grad_norm": 1.1018170696767426, + "learning_rate": 1.9791407341828915e-05, + "loss": 0.7777, + "step": 5393 + }, + { + "epoch": 0.09320569532761958, + "grad_norm": 0.9318105129898753, + "learning_rate": 1.9791293615098625e-05, + "loss": 0.942, + "step": 5394 + }, + { + "epoch": 0.09322297484102848, + "grad_norm": 0.9451129745531498, + "learning_rate": 1.979117985770126e-05, + "loss": 0.6928, + "step": 5395 + }, + { + "epoch": 0.09324025435443738, + "grad_norm": 0.9659460472414202, + "learning_rate": 1.9791066069637175e-05, + "loss": 0.8143, + "step": 5396 + }, + { + "epoch": 0.09325753386784628, + "grad_norm": 1.165212229872582, + "learning_rate": 1.979095225090673e-05, + "loss": 0.7299, + "step": 5397 + }, + { + "epoch": 0.09327481338125518, + "grad_norm": 0.8526905255268674, + "learning_rate": 1.9790838401510274e-05, + "loss": 0.6874, + "step": 5398 + }, + { + "epoch": 0.09329209289466409, + "grad_norm": 0.8630518175146122, + "learning_rate": 1.979072452144817e-05, + "loss": 0.6767, + "step": 5399 + }, + { + "epoch": 0.093309372408073, + "grad_norm": 1.017286074327677, + "learning_rate": 1.979061061072077e-05, + "loss": 0.8255, + "step": 5400 + }, + { + "epoch": 0.09332665192148189, + "grad_norm": 1.2226050654266696, + "learning_rate": 1.9790496669328436e-05, + "loss": 0.8198, + "step": 5401 + }, + { + "epoch": 0.0933439314348908, + "grad_norm": 0.9391162767653384, + "learning_rate": 1.9790382697271525e-05, + "loss": 0.879, + "step": 5402 + }, + { + "epoch": 0.0933612109482997, + "grad_norm": 0.7325429422035075, + "learning_rate": 1.9790268694550386e-05, + "loss": 0.557, + "step": 5403 + }, + { + "epoch": 0.0933784904617086, + "grad_norm": 0.8424072984129872, + "learning_rate": 1.9790154661165384e-05, + "loss": 0.5339, + "step": 5404 + }, + { + "epoch": 0.0933957699751175, + "grad_norm": 0.8529455412999902, + "learning_rate": 1.9790040597116874e-05, + "loss": 0.7357, + "step": 5405 + }, + { + "epoch": 0.0934130494885264, + "grad_norm": 1.0293421329098593, + "learning_rate": 1.9789926502405212e-05, + "loss": 0.8427, + "step": 5406 + }, + { + "epoch": 0.0934303290019353, + "grad_norm": 1.1374778569643063, + "learning_rate": 1.9789812377030758e-05, + "loss": 0.7225, + "step": 5407 + }, + { + "epoch": 0.09344760851534421, + "grad_norm": 0.9722567323064765, + "learning_rate": 1.9789698220993866e-05, + "loss": 0.7404, + "step": 5408 + }, + { + "epoch": 0.09346488802875311, + "grad_norm": 0.7715262138085884, + "learning_rate": 1.9789584034294894e-05, + "loss": 0.7288, + "step": 5409 + }, + { + "epoch": 0.09348216754216201, + "grad_norm": 1.0426898474337134, + "learning_rate": 1.97894698169342e-05, + "loss": 0.6596, + "step": 5410 + }, + { + "epoch": 0.09349944705557091, + "grad_norm": 0.7560500652416023, + "learning_rate": 1.9789355568912146e-05, + "loss": 0.6037, + "step": 5411 + }, + { + "epoch": 0.09351672656897982, + "grad_norm": 0.8396131640594772, + "learning_rate": 1.9789241290229086e-05, + "loss": 0.5956, + "step": 5412 + }, + { + "epoch": 0.09353400608238872, + "grad_norm": 0.5060210180453959, + "learning_rate": 1.9789126980885377e-05, + "loss": 0.5438, + "step": 5413 + }, + { + "epoch": 0.09355128559579762, + "grad_norm": 0.9492731504145355, + "learning_rate": 1.9789012640881377e-05, + "loss": 0.6117, + "step": 5414 + }, + { + "epoch": 0.09356856510920653, + "grad_norm": 1.2603025895747664, + "learning_rate": 1.9788898270217446e-05, + "loss": 0.7843, + "step": 5415 + }, + { + "epoch": 0.09358584462261543, + "grad_norm": 0.9456291185121467, + "learning_rate": 1.9788783868893942e-05, + "loss": 0.6416, + "step": 5416 + }, + { + "epoch": 0.09360312413602433, + "grad_norm": 0.9561894464738394, + "learning_rate": 1.9788669436911227e-05, + "loss": 0.5296, + "step": 5417 + }, + { + "epoch": 0.09362040364943323, + "grad_norm": 1.0439269589428952, + "learning_rate": 1.9788554974269652e-05, + "loss": 0.7811, + "step": 5418 + }, + { + "epoch": 0.09363768316284213, + "grad_norm": 1.0320709338790794, + "learning_rate": 1.978844048096958e-05, + "loss": 0.6729, + "step": 5419 + }, + { + "epoch": 0.09365496267625104, + "grad_norm": 0.8739367996724677, + "learning_rate": 1.9788325957011366e-05, + "loss": 0.6134, + "step": 5420 + }, + { + "epoch": 0.09367224218965994, + "grad_norm": 0.8073486603331596, + "learning_rate": 1.9788211402395373e-05, + "loss": 0.612, + "step": 5421 + }, + { + "epoch": 0.09368952170306884, + "grad_norm": 1.1536011330121207, + "learning_rate": 1.9788096817121958e-05, + "loss": 0.8156, + "step": 5422 + }, + { + "epoch": 0.09370680121647774, + "grad_norm": 0.9227779265150979, + "learning_rate": 1.9787982201191482e-05, + "loss": 0.7361, + "step": 5423 + }, + { + "epoch": 0.09372408072988665, + "grad_norm": 0.9988829105111027, + "learning_rate": 1.97878675546043e-05, + "loss": 0.6805, + "step": 5424 + }, + { + "epoch": 0.09374136024329555, + "grad_norm": 0.5434739076156613, + "learning_rate": 1.978775287736077e-05, + "loss": 0.7425, + "step": 5425 + }, + { + "epoch": 0.09375863975670445, + "grad_norm": 1.1153996545856313, + "learning_rate": 1.9787638169461258e-05, + "loss": 0.6557, + "step": 5426 + }, + { + "epoch": 0.09377591927011335, + "grad_norm": 1.0877772553629717, + "learning_rate": 1.9787523430906117e-05, + "loss": 0.8465, + "step": 5427 + }, + { + "epoch": 0.09379319878352226, + "grad_norm": 1.0651266400688986, + "learning_rate": 1.978740866169571e-05, + "loss": 0.5933, + "step": 5428 + }, + { + "epoch": 0.09381047829693116, + "grad_norm": 0.9847406199408497, + "learning_rate": 1.9787293861830396e-05, + "loss": 0.8461, + "step": 5429 + }, + { + "epoch": 0.09382775781034006, + "grad_norm": 0.9588112668767744, + "learning_rate": 1.9787179031310534e-05, + "loss": 0.7923, + "step": 5430 + }, + { + "epoch": 0.09384503732374896, + "grad_norm": 1.2465219084399548, + "learning_rate": 1.978706417013648e-05, + "loss": 0.7468, + "step": 5431 + }, + { + "epoch": 0.09386231683715787, + "grad_norm": 0.9732364193885287, + "learning_rate": 1.97869492783086e-05, + "loss": 0.7661, + "step": 5432 + }, + { + "epoch": 0.09387959635056677, + "grad_norm": 0.9525456207980784, + "learning_rate": 1.9786834355827253e-05, + "loss": 0.7861, + "step": 5433 + }, + { + "epoch": 0.09389687586397567, + "grad_norm": 0.9909117399893669, + "learning_rate": 1.9786719402692796e-05, + "loss": 0.5673, + "step": 5434 + }, + { + "epoch": 0.09391415537738457, + "grad_norm": 1.1065300114013195, + "learning_rate": 1.9786604418905588e-05, + "loss": 0.8714, + "step": 5435 + }, + { + "epoch": 0.09393143489079347, + "grad_norm": 0.9303117705222053, + "learning_rate": 1.9786489404465992e-05, + "loss": 0.7402, + "step": 5436 + }, + { + "epoch": 0.09394871440420238, + "grad_norm": 0.9212298019635059, + "learning_rate": 1.9786374359374368e-05, + "loss": 0.5912, + "step": 5437 + }, + { + "epoch": 0.09396599391761128, + "grad_norm": 0.9377426545016254, + "learning_rate": 1.9786259283631075e-05, + "loss": 0.7836, + "step": 5438 + }, + { + "epoch": 0.09398327343102018, + "grad_norm": 1.0194403061283865, + "learning_rate": 1.9786144177236474e-05, + "loss": 0.8131, + "step": 5439 + }, + { + "epoch": 0.09400055294442909, + "grad_norm": 1.0720250891770815, + "learning_rate": 1.9786029040190927e-05, + "loss": 0.8217, + "step": 5440 + }, + { + "epoch": 0.09401783245783799, + "grad_norm": 1.051155427236624, + "learning_rate": 1.9785913872494794e-05, + "loss": 0.8653, + "step": 5441 + }, + { + "epoch": 0.09403511197124689, + "grad_norm": 1.0584954118695076, + "learning_rate": 1.9785798674148434e-05, + "loss": 0.6979, + "step": 5442 + }, + { + "epoch": 0.09405239148465579, + "grad_norm": 0.821917589549538, + "learning_rate": 1.9785683445152205e-05, + "loss": 0.527, + "step": 5443 + }, + { + "epoch": 0.0940696709980647, + "grad_norm": 1.0288369762919551, + "learning_rate": 1.9785568185506475e-05, + "loss": 0.7188, + "step": 5444 + }, + { + "epoch": 0.0940869505114736, + "grad_norm": 0.7724290909468667, + "learning_rate": 1.9785452895211606e-05, + "loss": 0.6604, + "step": 5445 + }, + { + "epoch": 0.0941042300248825, + "grad_norm": 0.9513607125403988, + "learning_rate": 1.9785337574267948e-05, + "loss": 0.5318, + "step": 5446 + }, + { + "epoch": 0.0941215095382914, + "grad_norm": 0.8773450621302052, + "learning_rate": 1.9785222222675877e-05, + "loss": 0.5738, + "step": 5447 + }, + { + "epoch": 0.0941387890517003, + "grad_norm": 0.4947166181203938, + "learning_rate": 1.978510684043574e-05, + "loss": 0.5393, + "step": 5448 + }, + { + "epoch": 0.0941560685651092, + "grad_norm": 1.0944766875948855, + "learning_rate": 1.9784991427547903e-05, + "loss": 0.5914, + "step": 5449 + }, + { + "epoch": 0.09417334807851811, + "grad_norm": 0.9288193518311385, + "learning_rate": 1.9784875984012736e-05, + "loss": 0.5698, + "step": 5450 + }, + { + "epoch": 0.094190627591927, + "grad_norm": 0.9931687172183304, + "learning_rate": 1.9784760509830586e-05, + "loss": 0.6781, + "step": 5451 + }, + { + "epoch": 0.09420790710533591, + "grad_norm": 0.9277227268836039, + "learning_rate": 1.9784645005001828e-05, + "loss": 0.7893, + "step": 5452 + }, + { + "epoch": 0.09422518661874482, + "grad_norm": 0.599029409735491, + "learning_rate": 1.9784529469526815e-05, + "loss": 0.9986, + "step": 5453 + }, + { + "epoch": 0.09424246613215372, + "grad_norm": 1.0058121324169564, + "learning_rate": 1.9784413903405916e-05, + "loss": 0.8786, + "step": 5454 + }, + { + "epoch": 0.09425974564556262, + "grad_norm": 0.8988936705526379, + "learning_rate": 1.9784298306639487e-05, + "loss": 0.7269, + "step": 5455 + }, + { + "epoch": 0.09427702515897152, + "grad_norm": 1.1226750396119376, + "learning_rate": 1.9784182679227894e-05, + "loss": 0.6795, + "step": 5456 + }, + { + "epoch": 0.09429430467238042, + "grad_norm": 1.1093549470521527, + "learning_rate": 1.9784067021171493e-05, + "loss": 0.9164, + "step": 5457 + }, + { + "epoch": 0.09431158418578933, + "grad_norm": 1.0565389701793881, + "learning_rate": 1.9783951332470655e-05, + "loss": 0.6814, + "step": 5458 + }, + { + "epoch": 0.09432886369919823, + "grad_norm": 1.018353057188669, + "learning_rate": 1.9783835613125735e-05, + "loss": 0.7272, + "step": 5459 + }, + { + "epoch": 0.09434614321260713, + "grad_norm": 0.9351339550653097, + "learning_rate": 1.9783719863137097e-05, + "loss": 0.6853, + "step": 5460 + }, + { + "epoch": 0.09436342272601604, + "grad_norm": 1.0785952856801784, + "learning_rate": 1.9783604082505108e-05, + "loss": 0.7748, + "step": 5461 + }, + { + "epoch": 0.09438070223942494, + "grad_norm": 1.0445527209342746, + "learning_rate": 1.9783488271230126e-05, + "loss": 0.7099, + "step": 5462 + }, + { + "epoch": 0.09439798175283384, + "grad_norm": 1.0087659134438465, + "learning_rate": 1.9783372429312515e-05, + "loss": 0.684, + "step": 5463 + }, + { + "epoch": 0.09441526126624274, + "grad_norm": 1.010912105312885, + "learning_rate": 1.978325655675264e-05, + "loss": 0.8026, + "step": 5464 + }, + { + "epoch": 0.09443254077965164, + "grad_norm": 1.1708007774153677, + "learning_rate": 1.9783140653550862e-05, + "loss": 0.6422, + "step": 5465 + }, + { + "epoch": 0.09444982029306055, + "grad_norm": 1.2561851198989156, + "learning_rate": 1.978302471970754e-05, + "loss": 0.824, + "step": 5466 + }, + { + "epoch": 0.09446709980646945, + "grad_norm": 0.5942610528342156, + "learning_rate": 1.9782908755223045e-05, + "loss": 0.7456, + "step": 5467 + }, + { + "epoch": 0.09448437931987835, + "grad_norm": 1.0524330924607916, + "learning_rate": 1.9782792760097735e-05, + "loss": 0.5062, + "step": 5468 + }, + { + "epoch": 0.09450165883328726, + "grad_norm": 0.94803086567649, + "learning_rate": 1.9782676734331975e-05, + "loss": 0.7392, + "step": 5469 + }, + { + "epoch": 0.09451893834669615, + "grad_norm": 1.118860973321022, + "learning_rate": 1.978256067792613e-05, + "loss": 0.6158, + "step": 5470 + }, + { + "epoch": 0.09453621786010506, + "grad_norm": 0.9426977127584872, + "learning_rate": 1.9782444590880558e-05, + "loss": 0.8036, + "step": 5471 + }, + { + "epoch": 0.09455349737351396, + "grad_norm": 0.8210217262750619, + "learning_rate": 1.9782328473195625e-05, + "loss": 0.6071, + "step": 5472 + }, + { + "epoch": 0.09457077688692286, + "grad_norm": 1.1530543554827126, + "learning_rate": 1.97822123248717e-05, + "loss": 0.9762, + "step": 5473 + }, + { + "epoch": 0.09458805640033177, + "grad_norm": 0.8326557545805774, + "learning_rate": 1.978209614590914e-05, + "loss": 0.7102, + "step": 5474 + }, + { + "epoch": 0.09460533591374067, + "grad_norm": 0.8092872209855443, + "learning_rate": 1.9781979936308313e-05, + "loss": 0.5091, + "step": 5475 + }, + { + "epoch": 0.09462261542714957, + "grad_norm": 0.7485193593673495, + "learning_rate": 1.9781863696069582e-05, + "loss": 0.5288, + "step": 5476 + }, + { + "epoch": 0.09463989494055848, + "grad_norm": 0.8491133357014752, + "learning_rate": 1.9781747425193307e-05, + "loss": 0.6977, + "step": 5477 + }, + { + "epoch": 0.09465717445396737, + "grad_norm": 1.022794076674819, + "learning_rate": 1.9781631123679857e-05, + "loss": 0.8632, + "step": 5478 + }, + { + "epoch": 0.09467445396737628, + "grad_norm": 1.1342814293770997, + "learning_rate": 1.9781514791529597e-05, + "loss": 0.7304, + "step": 5479 + }, + { + "epoch": 0.09469173348078518, + "grad_norm": 0.979663690374788, + "learning_rate": 1.978139842874289e-05, + "loss": 0.6373, + "step": 5480 + }, + { + "epoch": 0.09470901299419408, + "grad_norm": 0.7819187231358441, + "learning_rate": 1.9781282035320095e-05, + "loss": 0.696, + "step": 5481 + }, + { + "epoch": 0.09472629250760299, + "grad_norm": 0.8314150485472604, + "learning_rate": 1.9781165611261584e-05, + "loss": 0.5114, + "step": 5482 + }, + { + "epoch": 0.09474357202101188, + "grad_norm": 1.256051425075731, + "learning_rate": 1.978104915656772e-05, + "loss": 0.9076, + "step": 5483 + }, + { + "epoch": 0.09476085153442079, + "grad_norm": 0.914483833654915, + "learning_rate": 1.9780932671238865e-05, + "loss": 0.7863, + "step": 5484 + }, + { + "epoch": 0.09477813104782969, + "grad_norm": 0.9607229511647217, + "learning_rate": 1.9780816155275385e-05, + "loss": 0.674, + "step": 5485 + }, + { + "epoch": 0.0947954105612386, + "grad_norm": 0.5742507797645673, + "learning_rate": 1.9780699608677648e-05, + "loss": 0.6448, + "step": 5486 + }, + { + "epoch": 0.0948126900746475, + "grad_norm": 0.8095947673708223, + "learning_rate": 1.9780583031446015e-05, + "loss": 0.5892, + "step": 5487 + }, + { + "epoch": 0.0948299695880564, + "grad_norm": 1.3340472423185967, + "learning_rate": 1.9780466423580853e-05, + "loss": 0.6975, + "step": 5488 + }, + { + "epoch": 0.0948472491014653, + "grad_norm": 1.247518787927185, + "learning_rate": 1.9780349785082528e-05, + "loss": 0.8574, + "step": 5489 + }, + { + "epoch": 0.09486452861487421, + "grad_norm": 0.866167794766115, + "learning_rate": 1.9780233115951402e-05, + "loss": 0.6444, + "step": 5490 + }, + { + "epoch": 0.0948818081282831, + "grad_norm": 0.7906298582523978, + "learning_rate": 1.9780116416187844e-05, + "loss": 0.7113, + "step": 5491 + }, + { + "epoch": 0.09489908764169201, + "grad_norm": 0.9415726207686392, + "learning_rate": 1.9779999685792217e-05, + "loss": 0.5568, + "step": 5492 + }, + { + "epoch": 0.0949163671551009, + "grad_norm": 0.7850372078240189, + "learning_rate": 1.9779882924764887e-05, + "loss": 0.7167, + "step": 5493 + }, + { + "epoch": 0.09493364666850981, + "grad_norm": 1.0665151848254086, + "learning_rate": 1.9779766133106224e-05, + "loss": 0.8982, + "step": 5494 + }, + { + "epoch": 0.09495092618191872, + "grad_norm": 1.0123543634452727, + "learning_rate": 1.9779649310816585e-05, + "loss": 0.7022, + "step": 5495 + }, + { + "epoch": 0.09496820569532761, + "grad_norm": 0.9817431174600756, + "learning_rate": 1.9779532457896345e-05, + "loss": 0.7542, + "step": 5496 + }, + { + "epoch": 0.09498548520873652, + "grad_norm": 0.6808782176688981, + "learning_rate": 1.9779415574345865e-05, + "loss": 0.7044, + "step": 5497 + }, + { + "epoch": 0.09500276472214543, + "grad_norm": 1.1527382345119739, + "learning_rate": 1.9779298660165516e-05, + "loss": 0.7334, + "step": 5498 + }, + { + "epoch": 0.09502004423555432, + "grad_norm": 1.1092866905665648, + "learning_rate": 1.9779181715355655e-05, + "loss": 0.7743, + "step": 5499 + }, + { + "epoch": 0.09503732374896323, + "grad_norm": 0.9110921606690534, + "learning_rate": 1.9779064739916656e-05, + "loss": 0.6313, + "step": 5500 + }, + { + "epoch": 0.09505460326237213, + "grad_norm": 0.9667466514292705, + "learning_rate": 1.9778947733848884e-05, + "loss": 0.7477, + "step": 5501 + }, + { + "epoch": 0.09507188277578103, + "grad_norm": 0.6816332712483646, + "learning_rate": 1.9778830697152703e-05, + "loss": 0.5723, + "step": 5502 + }, + { + "epoch": 0.09508916228918994, + "grad_norm": 0.7915248207445924, + "learning_rate": 1.977871362982848e-05, + "loss": 0.5669, + "step": 5503 + }, + { + "epoch": 0.09510644180259883, + "grad_norm": 0.9993431286834995, + "learning_rate": 1.9778596531876585e-05, + "loss": 0.9431, + "step": 5504 + }, + { + "epoch": 0.09512372131600774, + "grad_norm": 1.0917047850963209, + "learning_rate": 1.977847940329738e-05, + "loss": 0.8511, + "step": 5505 + }, + { + "epoch": 0.09514100082941665, + "grad_norm": 0.6815741894489196, + "learning_rate": 1.977836224409124e-05, + "loss": 0.328, + "step": 5506 + }, + { + "epoch": 0.09515828034282554, + "grad_norm": 0.6155016350205608, + "learning_rate": 1.977824505425852e-05, + "loss": 0.7021, + "step": 5507 + }, + { + "epoch": 0.09517555985623445, + "grad_norm": 0.8920673100041321, + "learning_rate": 1.9778127833799596e-05, + "loss": 0.8168, + "step": 5508 + }, + { + "epoch": 0.09519283936964335, + "grad_norm": 1.1065224272147165, + "learning_rate": 1.9778010582714832e-05, + "loss": 0.72, + "step": 5509 + }, + { + "epoch": 0.09521011888305225, + "grad_norm": 0.6264653366538697, + "learning_rate": 1.9777893301004596e-05, + "loss": 0.6583, + "step": 5510 + }, + { + "epoch": 0.09522739839646116, + "grad_norm": 0.9306901525205225, + "learning_rate": 1.9777775988669254e-05, + "loss": 0.733, + "step": 5511 + }, + { + "epoch": 0.09524467790987005, + "grad_norm": 0.8827207810170756, + "learning_rate": 1.9777658645709174e-05, + "loss": 0.8125, + "step": 5512 + }, + { + "epoch": 0.09526195742327896, + "grad_norm": 0.9680890891111137, + "learning_rate": 1.9777541272124727e-05, + "loss": 0.7325, + "step": 5513 + }, + { + "epoch": 0.09527923693668787, + "grad_norm": 0.8400809937109133, + "learning_rate": 1.9777423867916278e-05, + "loss": 0.6777, + "step": 5514 + }, + { + "epoch": 0.09529651645009676, + "grad_norm": 0.8452259244220331, + "learning_rate": 1.9777306433084193e-05, + "loss": 0.6497, + "step": 5515 + }, + { + "epoch": 0.09531379596350567, + "grad_norm": 0.9161745133541697, + "learning_rate": 1.977718896762884e-05, + "loss": 0.6907, + "step": 5516 + }, + { + "epoch": 0.09533107547691456, + "grad_norm": 0.788740196533022, + "learning_rate": 1.977707147155059e-05, + "loss": 0.6512, + "step": 5517 + }, + { + "epoch": 0.09534835499032347, + "grad_norm": 1.167595461826422, + "learning_rate": 1.9776953944849805e-05, + "loss": 0.782, + "step": 5518 + }, + { + "epoch": 0.09536563450373238, + "grad_norm": 0.9309688206958241, + "learning_rate": 1.9776836387526862e-05, + "loss": 0.6609, + "step": 5519 + }, + { + "epoch": 0.09538291401714127, + "grad_norm": 1.216866346228178, + "learning_rate": 1.9776718799582123e-05, + "loss": 0.8608, + "step": 5520 + }, + { + "epoch": 0.09540019353055018, + "grad_norm": 0.860514704273944, + "learning_rate": 1.9776601181015958e-05, + "loss": 0.7327, + "step": 5521 + }, + { + "epoch": 0.09541747304395909, + "grad_norm": 0.7723172872542692, + "learning_rate": 1.9776483531828734e-05, + "loss": 0.6941, + "step": 5522 + }, + { + "epoch": 0.09543475255736798, + "grad_norm": 0.8336866646095831, + "learning_rate": 1.977636585202082e-05, + "loss": 0.6231, + "step": 5523 + }, + { + "epoch": 0.09545203207077689, + "grad_norm": 0.8536451948952091, + "learning_rate": 1.977624814159259e-05, + "loss": 0.7268, + "step": 5524 + }, + { + "epoch": 0.09546931158418578, + "grad_norm": 0.5366712204368163, + "learning_rate": 1.9776130400544402e-05, + "loss": 0.62, + "step": 5525 + }, + { + "epoch": 0.09548659109759469, + "grad_norm": 0.7837862306438216, + "learning_rate": 1.977601262887663e-05, + "loss": 0.6212, + "step": 5526 + }, + { + "epoch": 0.0955038706110036, + "grad_norm": 0.9889812711767776, + "learning_rate": 1.977589482658965e-05, + "loss": 0.8087, + "step": 5527 + }, + { + "epoch": 0.0955211501244125, + "grad_norm": 1.317141016593489, + "learning_rate": 1.977577699368382e-05, + "loss": 0.7786, + "step": 5528 + }, + { + "epoch": 0.0955384296378214, + "grad_norm": 0.9538085958439152, + "learning_rate": 1.9775659130159513e-05, + "loss": 0.688, + "step": 5529 + }, + { + "epoch": 0.0955557091512303, + "grad_norm": 1.0785964493622282, + "learning_rate": 1.97755412360171e-05, + "loss": 0.6805, + "step": 5530 + }, + { + "epoch": 0.0955729886646392, + "grad_norm": 0.7475915272252339, + "learning_rate": 1.977542331125695e-05, + "loss": 0.8596, + "step": 5531 + }, + { + "epoch": 0.09559026817804811, + "grad_norm": 0.8180941593691842, + "learning_rate": 1.977530535587943e-05, + "loss": 0.4758, + "step": 5532 + }, + { + "epoch": 0.095607547691457, + "grad_norm": 1.0627698703308623, + "learning_rate": 1.9775187369884913e-05, + "loss": 0.6596, + "step": 5533 + }, + { + "epoch": 0.09562482720486591, + "grad_norm": 1.0129275922588041, + "learning_rate": 1.977506935327376e-05, + "loss": 0.6001, + "step": 5534 + }, + { + "epoch": 0.09564210671827482, + "grad_norm": 0.5546975927788559, + "learning_rate": 1.9774951306046352e-05, + "loss": 0.5897, + "step": 5535 + }, + { + "epoch": 0.09565938623168371, + "grad_norm": 1.259810368281551, + "learning_rate": 1.9774833228203056e-05, + "loss": 0.8238, + "step": 5536 + }, + { + "epoch": 0.09567666574509262, + "grad_norm": 0.7674964342871848, + "learning_rate": 1.9774715119744235e-05, + "loss": 0.7364, + "step": 5537 + }, + { + "epoch": 0.09569394525850151, + "grad_norm": 1.1528926901880177, + "learning_rate": 1.9774596980670265e-05, + "loss": 0.7524, + "step": 5538 + }, + { + "epoch": 0.09571122477191042, + "grad_norm": 1.0806336703201553, + "learning_rate": 1.9774478810981512e-05, + "loss": 0.8781, + "step": 5539 + }, + { + "epoch": 0.09572850428531933, + "grad_norm": 0.990081380365156, + "learning_rate": 1.9774360610678353e-05, + "loss": 0.6768, + "step": 5540 + }, + { + "epoch": 0.09574578379872822, + "grad_norm": 0.8220360434539926, + "learning_rate": 1.977424237976115e-05, + "loss": 0.5985, + "step": 5541 + }, + { + "epoch": 0.09576306331213713, + "grad_norm": 0.9691250614574041, + "learning_rate": 1.977412411823028e-05, + "loss": 0.7138, + "step": 5542 + }, + { + "epoch": 0.09578034282554604, + "grad_norm": 1.002274344883582, + "learning_rate": 1.9774005826086108e-05, + "loss": 0.7656, + "step": 5543 + }, + { + "epoch": 0.09579762233895493, + "grad_norm": 1.1420093991338183, + "learning_rate": 1.977388750332901e-05, + "loss": 0.8945, + "step": 5544 + }, + { + "epoch": 0.09581490185236384, + "grad_norm": 1.0654721403901726, + "learning_rate": 1.977376914995935e-05, + "loss": 1.0498, + "step": 5545 + }, + { + "epoch": 0.09583218136577273, + "grad_norm": 1.0652289021024517, + "learning_rate": 1.97736507659775e-05, + "loss": 0.8446, + "step": 5546 + }, + { + "epoch": 0.09584946087918164, + "grad_norm": 0.837796570893676, + "learning_rate": 1.977353235138384e-05, + "loss": 0.6687, + "step": 5547 + }, + { + "epoch": 0.09586674039259055, + "grad_norm": 0.8241016993489162, + "learning_rate": 1.977341390617873e-05, + "loss": 0.4934, + "step": 5548 + }, + { + "epoch": 0.09588401990599944, + "grad_norm": 0.8215429135539336, + "learning_rate": 1.9773295430362545e-05, + "loss": 0.789, + "step": 5549 + }, + { + "epoch": 0.09590129941940835, + "grad_norm": 0.9770845033923193, + "learning_rate": 1.9773176923935655e-05, + "loss": 0.8106, + "step": 5550 + }, + { + "epoch": 0.09591857893281726, + "grad_norm": 0.8332313029943118, + "learning_rate": 1.9773058386898434e-05, + "loss": 0.7757, + "step": 5551 + }, + { + "epoch": 0.09593585844622615, + "grad_norm": 0.9643561282110207, + "learning_rate": 1.9772939819251247e-05, + "loss": 0.8027, + "step": 5552 + }, + { + "epoch": 0.09595313795963506, + "grad_norm": 0.7489646682598989, + "learning_rate": 1.977282122099448e-05, + "loss": 0.755, + "step": 5553 + }, + { + "epoch": 0.09597041747304395, + "grad_norm": 0.9714341591837621, + "learning_rate": 1.9772702592128485e-05, + "loss": 0.5722, + "step": 5554 + }, + { + "epoch": 0.09598769698645286, + "grad_norm": 1.489772478289579, + "learning_rate": 1.9772583932653648e-05, + "loss": 0.9386, + "step": 5555 + }, + { + "epoch": 0.09600497649986177, + "grad_norm": 0.6935776619827693, + "learning_rate": 1.9772465242570333e-05, + "loss": 0.608, + "step": 5556 + }, + { + "epoch": 0.09602225601327066, + "grad_norm": 1.0860906060560889, + "learning_rate": 1.9772346521878913e-05, + "loss": 0.7054, + "step": 5557 + }, + { + "epoch": 0.09603953552667957, + "grad_norm": 0.6055431637310955, + "learning_rate": 1.977222777057976e-05, + "loss": 0.6051, + "step": 5558 + }, + { + "epoch": 0.09605681504008848, + "grad_norm": 0.8791237876331721, + "learning_rate": 1.977210898867325e-05, + "loss": 0.6591, + "step": 5559 + }, + { + "epoch": 0.09607409455349737, + "grad_norm": 0.9752507746384911, + "learning_rate": 1.9771990176159752e-05, + "loss": 0.753, + "step": 5560 + }, + { + "epoch": 0.09609137406690628, + "grad_norm": 0.7799119861009389, + "learning_rate": 1.977187133303964e-05, + "loss": 0.7349, + "step": 5561 + }, + { + "epoch": 0.09610865358031517, + "grad_norm": 0.7456747536475501, + "learning_rate": 1.9771752459313277e-05, + "loss": 0.6466, + "step": 5562 + }, + { + "epoch": 0.09612593309372408, + "grad_norm": 0.9803953830494977, + "learning_rate": 1.9771633554981048e-05, + "loss": 0.7121, + "step": 5563 + }, + { + "epoch": 0.09614321260713299, + "grad_norm": 1.0327401148759918, + "learning_rate": 1.977151462004332e-05, + "loss": 0.6503, + "step": 5564 + }, + { + "epoch": 0.09616049212054188, + "grad_norm": 0.9805936913375796, + "learning_rate": 1.9771395654500466e-05, + "loss": 0.698, + "step": 5565 + }, + { + "epoch": 0.09617777163395079, + "grad_norm": 1.0208984899492624, + "learning_rate": 1.9771276658352858e-05, + "loss": 0.7956, + "step": 5566 + }, + { + "epoch": 0.09619505114735968, + "grad_norm": 1.0286039712194275, + "learning_rate": 1.9771157631600868e-05, + "loss": 0.7657, + "step": 5567 + }, + { + "epoch": 0.09621233066076859, + "grad_norm": 1.0005354558867623, + "learning_rate": 1.977103857424487e-05, + "loss": 0.7875, + "step": 5568 + }, + { + "epoch": 0.0962296101741775, + "grad_norm": 0.8153274215749956, + "learning_rate": 1.9770919486285236e-05, + "loss": 0.5649, + "step": 5569 + }, + { + "epoch": 0.09624688968758639, + "grad_norm": 1.0955329580668607, + "learning_rate": 1.977080036772234e-05, + "loss": 0.7541, + "step": 5570 + }, + { + "epoch": 0.0962641692009953, + "grad_norm": 1.0425664193115232, + "learning_rate": 1.9770681218556555e-05, + "loss": 0.7267, + "step": 5571 + }, + { + "epoch": 0.09628144871440421, + "grad_norm": 0.8093268728539545, + "learning_rate": 1.9770562038788254e-05, + "loss": 0.6458, + "step": 5572 + }, + { + "epoch": 0.0962987282278131, + "grad_norm": 0.9211573222369984, + "learning_rate": 1.977044282841781e-05, + "loss": 0.6752, + "step": 5573 + }, + { + "epoch": 0.09631600774122201, + "grad_norm": 1.099431715432033, + "learning_rate": 1.9770323587445598e-05, + "loss": 0.8079, + "step": 5574 + }, + { + "epoch": 0.0963332872546309, + "grad_norm": 0.8830065231173209, + "learning_rate": 1.9770204315871986e-05, + "loss": 0.6319, + "step": 5575 + }, + { + "epoch": 0.09635056676803981, + "grad_norm": 0.8797578242033441, + "learning_rate": 1.9770085013697354e-05, + "loss": 0.6582, + "step": 5576 + }, + { + "epoch": 0.09636784628144872, + "grad_norm": 1.104155841891206, + "learning_rate": 1.9769965680922075e-05, + "loss": 0.8437, + "step": 5577 + }, + { + "epoch": 0.09638512579485761, + "grad_norm": 0.9871487570485896, + "learning_rate": 1.9769846317546517e-05, + "loss": 0.7922, + "step": 5578 + }, + { + "epoch": 0.09640240530826652, + "grad_norm": 1.1055018197090798, + "learning_rate": 1.976972692357106e-05, + "loss": 0.9578, + "step": 5579 + }, + { + "epoch": 0.09641968482167543, + "grad_norm": 0.8243692836637123, + "learning_rate": 1.9769607498996078e-05, + "loss": 0.8555, + "step": 5580 + }, + { + "epoch": 0.09643696433508432, + "grad_norm": 0.797707196924356, + "learning_rate": 1.976948804382194e-05, + "loss": 0.6214, + "step": 5581 + }, + { + "epoch": 0.09645424384849323, + "grad_norm": 0.8002948049405237, + "learning_rate": 1.9769368558049026e-05, + "loss": 0.6027, + "step": 5582 + }, + { + "epoch": 0.09647152336190212, + "grad_norm": 0.9209528218800593, + "learning_rate": 1.97692490416777e-05, + "loss": 0.7232, + "step": 5583 + }, + { + "epoch": 0.09648880287531103, + "grad_norm": 0.8238308368349048, + "learning_rate": 1.976912949470835e-05, + "loss": 0.5883, + "step": 5584 + }, + { + "epoch": 0.09650608238871994, + "grad_norm": 1.1300344747563436, + "learning_rate": 1.9769009917141344e-05, + "loss": 0.7701, + "step": 5585 + }, + { + "epoch": 0.09652336190212883, + "grad_norm": 0.8926483124777682, + "learning_rate": 1.9768890308977056e-05, + "loss": 0.7208, + "step": 5586 + }, + { + "epoch": 0.09654064141553774, + "grad_norm": 0.7775805119678311, + "learning_rate": 1.976877067021586e-05, + "loss": 0.7358, + "step": 5587 + }, + { + "epoch": 0.09655792092894665, + "grad_norm": 0.8209095116493885, + "learning_rate": 1.976865100085813e-05, + "loss": 0.6221, + "step": 5588 + }, + { + "epoch": 0.09657520044235554, + "grad_norm": 1.0209700530990033, + "learning_rate": 1.9768531300904247e-05, + "loss": 0.6172, + "step": 5589 + }, + { + "epoch": 0.09659247995576445, + "grad_norm": 0.9682910194000801, + "learning_rate": 1.976841157035458e-05, + "loss": 0.5963, + "step": 5590 + }, + { + "epoch": 0.09660975946917334, + "grad_norm": 0.6622924079678756, + "learning_rate": 1.9768291809209505e-05, + "loss": 0.5588, + "step": 5591 + }, + { + "epoch": 0.09662703898258225, + "grad_norm": 0.9236577163318571, + "learning_rate": 1.9768172017469396e-05, + "loss": 0.7674, + "step": 5592 + }, + { + "epoch": 0.09664431849599116, + "grad_norm": 1.240848876193304, + "learning_rate": 1.9768052195134633e-05, + "loss": 0.8706, + "step": 5593 + }, + { + "epoch": 0.09666159800940005, + "grad_norm": 0.9142913457063027, + "learning_rate": 1.9767932342205585e-05, + "loss": 0.7627, + "step": 5594 + }, + { + "epoch": 0.09667887752280896, + "grad_norm": 1.480597136070952, + "learning_rate": 1.9767812458682632e-05, + "loss": 0.8012, + "step": 5595 + }, + { + "epoch": 0.09669615703621787, + "grad_norm": 0.9725636813934831, + "learning_rate": 1.976769254456615e-05, + "loss": 0.6202, + "step": 5596 + }, + { + "epoch": 0.09671343654962676, + "grad_norm": 0.7253745957026458, + "learning_rate": 1.976757259985651e-05, + "loss": 0.6937, + "step": 5597 + }, + { + "epoch": 0.09673071606303567, + "grad_norm": 0.7338384152223529, + "learning_rate": 1.976745262455409e-05, + "loss": 0.7242, + "step": 5598 + }, + { + "epoch": 0.09674799557644456, + "grad_norm": 1.144423276999765, + "learning_rate": 1.976733261865927e-05, + "loss": 0.7545, + "step": 5599 + }, + { + "epoch": 0.09676527508985347, + "grad_norm": 1.0012651857832284, + "learning_rate": 1.9767212582172418e-05, + "loss": 0.7675, + "step": 5600 + }, + { + "epoch": 0.09678255460326238, + "grad_norm": 0.9004950678001213, + "learning_rate": 1.9767092515093916e-05, + "loss": 0.7119, + "step": 5601 + }, + { + "epoch": 0.09679983411667127, + "grad_norm": 0.7463392956963151, + "learning_rate": 1.9766972417424134e-05, + "loss": 0.7977, + "step": 5602 + }, + { + "epoch": 0.09681711363008018, + "grad_norm": 1.0234070724611268, + "learning_rate": 1.976685228916346e-05, + "loss": 0.8548, + "step": 5603 + }, + { + "epoch": 0.09683439314348907, + "grad_norm": 0.5716220091121005, + "learning_rate": 1.9766732130312257e-05, + "loss": 0.7592, + "step": 5604 + }, + { + "epoch": 0.09685167265689798, + "grad_norm": 1.4072309053027556, + "learning_rate": 1.9766611940870904e-05, + "loss": 0.4812, + "step": 5605 + }, + { + "epoch": 0.09686895217030689, + "grad_norm": 1.0723913791879425, + "learning_rate": 1.9766491720839783e-05, + "loss": 0.7261, + "step": 5606 + }, + { + "epoch": 0.09688623168371578, + "grad_norm": 0.8906830119322232, + "learning_rate": 1.976637147021927e-05, + "loss": 0.4419, + "step": 5607 + }, + { + "epoch": 0.09690351119712469, + "grad_norm": 0.48617493370504294, + "learning_rate": 1.9766251189009737e-05, + "loss": 0.5917, + "step": 5608 + }, + { + "epoch": 0.0969207907105336, + "grad_norm": 1.0644152307574286, + "learning_rate": 1.9766130877211565e-05, + "loss": 0.6682, + "step": 5609 + }, + { + "epoch": 0.09693807022394249, + "grad_norm": 0.8607092479082145, + "learning_rate": 1.9766010534825125e-05, + "loss": 0.7542, + "step": 5610 + }, + { + "epoch": 0.0969553497373514, + "grad_norm": 1.0012722093734845, + "learning_rate": 1.9765890161850804e-05, + "loss": 0.6453, + "step": 5611 + }, + { + "epoch": 0.09697262925076029, + "grad_norm": 0.9084727048890108, + "learning_rate": 1.9765769758288965e-05, + "loss": 0.7793, + "step": 5612 + }, + { + "epoch": 0.0969899087641692, + "grad_norm": 1.0719860453024057, + "learning_rate": 1.976564932414e-05, + "loss": 0.7066, + "step": 5613 + }, + { + "epoch": 0.09700718827757811, + "grad_norm": 0.834126781714576, + "learning_rate": 1.9765528859404276e-05, + "loss": 0.8764, + "step": 5614 + }, + { + "epoch": 0.097024467790987, + "grad_norm": 0.8733266561831989, + "learning_rate": 1.9765408364082175e-05, + "loss": 0.8145, + "step": 5615 + }, + { + "epoch": 0.09704174730439591, + "grad_norm": 1.0134690605862968, + "learning_rate": 1.976528783817407e-05, + "loss": 0.6431, + "step": 5616 + }, + { + "epoch": 0.09705902681780482, + "grad_norm": 0.916514232743495, + "learning_rate": 1.976516728168034e-05, + "loss": 0.6438, + "step": 5617 + }, + { + "epoch": 0.09707630633121371, + "grad_norm": 0.9869435780354415, + "learning_rate": 1.976504669460137e-05, + "loss": 0.7526, + "step": 5618 + }, + { + "epoch": 0.09709358584462262, + "grad_norm": 0.48787118956884973, + "learning_rate": 1.9764926076937526e-05, + "loss": 0.5985, + "step": 5619 + }, + { + "epoch": 0.09711086535803151, + "grad_norm": 0.7288737101298913, + "learning_rate": 1.9764805428689197e-05, + "loss": 0.5922, + "step": 5620 + }, + { + "epoch": 0.09712814487144042, + "grad_norm": 0.771631820414839, + "learning_rate": 1.976468474985675e-05, + "loss": 0.7088, + "step": 5621 + }, + { + "epoch": 0.09714542438484933, + "grad_norm": 0.47302435919098523, + "learning_rate": 1.9764564040440574e-05, + "loss": 0.7506, + "step": 5622 + }, + { + "epoch": 0.09716270389825822, + "grad_norm": 0.760804935950603, + "learning_rate": 1.9764443300441036e-05, + "loss": 0.6401, + "step": 5623 + }, + { + "epoch": 0.09717998341166713, + "grad_norm": 0.9674079696675938, + "learning_rate": 1.9764322529858523e-05, + "loss": 0.7746, + "step": 5624 + }, + { + "epoch": 0.09719726292507604, + "grad_norm": 0.9482989837362389, + "learning_rate": 1.9764201728693407e-05, + "loss": 0.7587, + "step": 5625 + }, + { + "epoch": 0.09721454243848493, + "grad_norm": 1.0757298026427018, + "learning_rate": 1.9764080896946073e-05, + "loss": 0.8357, + "step": 5626 + }, + { + "epoch": 0.09723182195189384, + "grad_norm": 0.9498409659349148, + "learning_rate": 1.976396003461689e-05, + "loss": 0.6862, + "step": 5627 + }, + { + "epoch": 0.09724910146530273, + "grad_norm": 1.244587582917982, + "learning_rate": 1.9763839141706246e-05, + "loss": 0.8902, + "step": 5628 + }, + { + "epoch": 0.09726638097871164, + "grad_norm": 0.9689655147325166, + "learning_rate": 1.9763718218214513e-05, + "loss": 0.7587, + "step": 5629 + }, + { + "epoch": 0.09728366049212055, + "grad_norm": 0.533114199130725, + "learning_rate": 1.9763597264142075e-05, + "loss": 0.5991, + "step": 5630 + }, + { + "epoch": 0.09730094000552944, + "grad_norm": 1.0154115161804698, + "learning_rate": 1.9763476279489308e-05, + "loss": 0.7383, + "step": 5631 + }, + { + "epoch": 0.09731821951893835, + "grad_norm": 0.5994624485929952, + "learning_rate": 1.9763355264256588e-05, + "loss": 0.8805, + "step": 5632 + }, + { + "epoch": 0.09733549903234726, + "grad_norm": 1.143631676986834, + "learning_rate": 1.9763234218444305e-05, + "loss": 0.8378, + "step": 5633 + }, + { + "epoch": 0.09735277854575615, + "grad_norm": 0.9504547978439392, + "learning_rate": 1.9763113142052822e-05, + "loss": 0.7915, + "step": 5634 + }, + { + "epoch": 0.09737005805916506, + "grad_norm": 0.9115499152714714, + "learning_rate": 1.976299203508253e-05, + "loss": 0.6101, + "step": 5635 + }, + { + "epoch": 0.09738733757257395, + "grad_norm": 0.8643413780293399, + "learning_rate": 1.976287089753381e-05, + "loss": 0.7669, + "step": 5636 + }, + { + "epoch": 0.09740461708598286, + "grad_norm": 0.9591609141989069, + "learning_rate": 1.976274972940703e-05, + "loss": 0.6367, + "step": 5637 + }, + { + "epoch": 0.09742189659939177, + "grad_norm": 0.9537010204960324, + "learning_rate": 1.976262853070258e-05, + "loss": 0.6088, + "step": 5638 + }, + { + "epoch": 0.09743917611280066, + "grad_norm": 0.7892467789186581, + "learning_rate": 1.9762507301420833e-05, + "loss": 0.7016, + "step": 5639 + }, + { + "epoch": 0.09745645562620957, + "grad_norm": 1.5554554849579327, + "learning_rate": 1.976238604156217e-05, + "loss": 0.6655, + "step": 5640 + }, + { + "epoch": 0.09747373513961846, + "grad_norm": 0.6248757624638634, + "learning_rate": 1.9762264751126975e-05, + "loss": 0.5889, + "step": 5641 + }, + { + "epoch": 0.09749101465302737, + "grad_norm": 0.8371109818272556, + "learning_rate": 1.9762143430115622e-05, + "loss": 0.7493, + "step": 5642 + }, + { + "epoch": 0.09750829416643628, + "grad_norm": 1.1654036898375033, + "learning_rate": 1.9762022078528494e-05, + "loss": 0.8379, + "step": 5643 + }, + { + "epoch": 0.09752557367984517, + "grad_norm": 2.8823454691802675, + "learning_rate": 1.9761900696365974e-05, + "loss": 0.603, + "step": 5644 + }, + { + "epoch": 0.09754285319325408, + "grad_norm": 1.1275812185817626, + "learning_rate": 1.9761779283628434e-05, + "loss": 0.6237, + "step": 5645 + }, + { + "epoch": 0.09756013270666299, + "grad_norm": 1.054120945100178, + "learning_rate": 1.9761657840316264e-05, + "loss": 0.7247, + "step": 5646 + }, + { + "epoch": 0.09757741222007188, + "grad_norm": 1.780187128484514, + "learning_rate": 1.976153636642984e-05, + "loss": 0.8259, + "step": 5647 + }, + { + "epoch": 0.09759469173348079, + "grad_norm": 0.8219487722089732, + "learning_rate": 1.976141486196954e-05, + "loss": 0.6429, + "step": 5648 + }, + { + "epoch": 0.09761197124688968, + "grad_norm": 1.0432305684387582, + "learning_rate": 1.976129332693575e-05, + "loss": 0.7503, + "step": 5649 + }, + { + "epoch": 0.09762925076029859, + "grad_norm": 0.8267668819998619, + "learning_rate": 1.9761171761328843e-05, + "loss": 0.5707, + "step": 5650 + }, + { + "epoch": 0.0976465302737075, + "grad_norm": 1.0924058895182749, + "learning_rate": 1.9761050165149208e-05, + "loss": 0.7245, + "step": 5651 + }, + { + "epoch": 0.09766380978711639, + "grad_norm": 1.2736928580802886, + "learning_rate": 1.9760928538397217e-05, + "loss": 0.8613, + "step": 5652 + }, + { + "epoch": 0.0976810893005253, + "grad_norm": 0.9809108839297268, + "learning_rate": 1.976080688107326e-05, + "loss": 0.5253, + "step": 5653 + }, + { + "epoch": 0.0976983688139342, + "grad_norm": 1.0682240370821687, + "learning_rate": 1.9760685193177713e-05, + "loss": 0.7506, + "step": 5654 + }, + { + "epoch": 0.0977156483273431, + "grad_norm": 0.6051431183156051, + "learning_rate": 1.976056347471096e-05, + "loss": 0.4643, + "step": 5655 + }, + { + "epoch": 0.097732927840752, + "grad_norm": 0.8370692038569472, + "learning_rate": 1.9760441725673377e-05, + "loss": 0.6506, + "step": 5656 + }, + { + "epoch": 0.0977502073541609, + "grad_norm": 0.9857190720882, + "learning_rate": 1.9760319946065352e-05, + "loss": 0.6704, + "step": 5657 + }, + { + "epoch": 0.09776748686756981, + "grad_norm": 0.9575276694934995, + "learning_rate": 1.9760198135887262e-05, + "loss": 0.6554, + "step": 5658 + }, + { + "epoch": 0.09778476638097872, + "grad_norm": 0.9911602087691771, + "learning_rate": 1.976007629513949e-05, + "loss": 0.7603, + "step": 5659 + }, + { + "epoch": 0.09780204589438761, + "grad_norm": 0.919770619515927, + "learning_rate": 1.975995442382242e-05, + "loss": 0.6405, + "step": 5660 + }, + { + "epoch": 0.09781932540779652, + "grad_norm": 1.3429360565681627, + "learning_rate": 1.9759832521936424e-05, + "loss": 0.6564, + "step": 5661 + }, + { + "epoch": 0.09783660492120543, + "grad_norm": 0.921576754658974, + "learning_rate": 1.9759710589481894e-05, + "loss": 0.6592, + "step": 5662 + }, + { + "epoch": 0.09785388443461432, + "grad_norm": 1.1561479586773244, + "learning_rate": 1.975958862645921e-05, + "loss": 0.7965, + "step": 5663 + }, + { + "epoch": 0.09787116394802323, + "grad_norm": 1.1842901435816389, + "learning_rate": 1.9759466632868754e-05, + "loss": 0.6347, + "step": 5664 + }, + { + "epoch": 0.09788844346143212, + "grad_norm": 0.9828959926431361, + "learning_rate": 1.9759344608710903e-05, + "loss": 0.6938, + "step": 5665 + }, + { + "epoch": 0.09790572297484103, + "grad_norm": 1.051333829006749, + "learning_rate": 1.9759222553986045e-05, + "loss": 0.4903, + "step": 5666 + }, + { + "epoch": 0.09792300248824994, + "grad_norm": 0.9732177193228262, + "learning_rate": 1.9759100468694563e-05, + "loss": 0.7725, + "step": 5667 + }, + { + "epoch": 0.09794028200165883, + "grad_norm": 1.0886606211159358, + "learning_rate": 1.9758978352836834e-05, + "loss": 0.7079, + "step": 5668 + }, + { + "epoch": 0.09795756151506774, + "grad_norm": 0.6355914351509583, + "learning_rate": 1.9758856206413243e-05, + "loss": 0.4682, + "step": 5669 + }, + { + "epoch": 0.09797484102847664, + "grad_norm": 0.5956090317968535, + "learning_rate": 1.975873402942417e-05, + "loss": 0.6442, + "step": 5670 + }, + { + "epoch": 0.09799212054188554, + "grad_norm": 0.4399173425738348, + "learning_rate": 1.9758611821870005e-05, + "loss": 0.593, + "step": 5671 + }, + { + "epoch": 0.09800940005529445, + "grad_norm": 0.8432648133837776, + "learning_rate": 1.9758489583751123e-05, + "loss": 0.7333, + "step": 5672 + }, + { + "epoch": 0.09802667956870334, + "grad_norm": 0.5789869907048516, + "learning_rate": 1.9758367315067915e-05, + "loss": 0.8491, + "step": 5673 + }, + { + "epoch": 0.09804395908211225, + "grad_norm": 1.1219366791875067, + "learning_rate": 1.9758245015820752e-05, + "loss": 0.6899, + "step": 5674 + }, + { + "epoch": 0.09806123859552116, + "grad_norm": 1.4729841830059096, + "learning_rate": 1.975812268601003e-05, + "loss": 0.9618, + "step": 5675 + }, + { + "epoch": 0.09807851810893005, + "grad_norm": 0.5765480310472088, + "learning_rate": 1.975800032563612e-05, + "loss": 0.6598, + "step": 5676 + }, + { + "epoch": 0.09809579762233896, + "grad_norm": 1.1149694457711596, + "learning_rate": 1.9757877934699417e-05, + "loss": 0.7677, + "step": 5677 + }, + { + "epoch": 0.09811307713574786, + "grad_norm": 0.9170629742036329, + "learning_rate": 1.9757755513200298e-05, + "loss": 0.5993, + "step": 5678 + }, + { + "epoch": 0.09813035664915676, + "grad_norm": 1.1353651075692246, + "learning_rate": 1.9757633061139145e-05, + "loss": 0.6177, + "step": 5679 + }, + { + "epoch": 0.09814763616256567, + "grad_norm": 1.0589036349727465, + "learning_rate": 1.9757510578516346e-05, + "loss": 0.8397, + "step": 5680 + }, + { + "epoch": 0.09816491567597456, + "grad_norm": 0.5218522226941307, + "learning_rate": 1.975738806533228e-05, + "loss": 0.8415, + "step": 5681 + }, + { + "epoch": 0.09818219518938347, + "grad_norm": 0.8648833173388112, + "learning_rate": 1.9757265521587333e-05, + "loss": 0.7408, + "step": 5682 + }, + { + "epoch": 0.09819947470279237, + "grad_norm": 0.9071079445290453, + "learning_rate": 1.975714294728189e-05, + "loss": 0.6216, + "step": 5683 + }, + { + "epoch": 0.09821675421620127, + "grad_norm": 1.0278102187308886, + "learning_rate": 1.9757020342416334e-05, + "loss": 0.8911, + "step": 5684 + }, + { + "epoch": 0.09823403372961018, + "grad_norm": 1.3017653446249309, + "learning_rate": 1.9756897706991048e-05, + "loss": 0.7165, + "step": 5685 + }, + { + "epoch": 0.09825131324301907, + "grad_norm": 0.9317886802062558, + "learning_rate": 1.9756775041006415e-05, + "loss": 1.0025, + "step": 5686 + }, + { + "epoch": 0.09826859275642798, + "grad_norm": 0.9004231742211789, + "learning_rate": 1.9756652344462823e-05, + "loss": 0.3693, + "step": 5687 + }, + { + "epoch": 0.09828587226983689, + "grad_norm": 1.133372858740446, + "learning_rate": 1.9756529617360657e-05, + "loss": 0.8774, + "step": 5688 + }, + { + "epoch": 0.09830315178324578, + "grad_norm": 0.6644164822512135, + "learning_rate": 1.9756406859700297e-05, + "loss": 0.5, + "step": 5689 + }, + { + "epoch": 0.09832043129665469, + "grad_norm": 0.6769762028608708, + "learning_rate": 1.975628407148213e-05, + "loss": 0.5096, + "step": 5690 + }, + { + "epoch": 0.0983377108100636, + "grad_norm": 0.9513698564982679, + "learning_rate": 1.9756161252706536e-05, + "loss": 0.5464, + "step": 5691 + }, + { + "epoch": 0.09835499032347249, + "grad_norm": 1.1979630126228824, + "learning_rate": 1.9756038403373904e-05, + "loss": 0.7674, + "step": 5692 + }, + { + "epoch": 0.0983722698368814, + "grad_norm": 0.986570090653141, + "learning_rate": 1.9755915523484623e-05, + "loss": 0.7011, + "step": 5693 + }, + { + "epoch": 0.09838954935029029, + "grad_norm": 1.0090972730513772, + "learning_rate": 1.975579261303907e-05, + "loss": 0.7406, + "step": 5694 + }, + { + "epoch": 0.0984068288636992, + "grad_norm": 0.8601432436988359, + "learning_rate": 1.9755669672037634e-05, + "loss": 0.5424, + "step": 5695 + }, + { + "epoch": 0.0984241083771081, + "grad_norm": 0.9954995244225807, + "learning_rate": 1.97555467004807e-05, + "loss": 0.9128, + "step": 5696 + }, + { + "epoch": 0.098441387890517, + "grad_norm": 1.1338653505794394, + "learning_rate": 1.975542369836865e-05, + "loss": 0.9111, + "step": 5697 + }, + { + "epoch": 0.0984586674039259, + "grad_norm": 0.8826484400250036, + "learning_rate": 1.9755300665701872e-05, + "loss": 0.6356, + "step": 5698 + }, + { + "epoch": 0.09847594691733481, + "grad_norm": 0.9020790492298023, + "learning_rate": 1.9755177602480753e-05, + "loss": 0.584, + "step": 5699 + }, + { + "epoch": 0.09849322643074371, + "grad_norm": 0.8306567518313789, + "learning_rate": 1.9755054508705675e-05, + "loss": 0.5001, + "step": 5700 + }, + { + "epoch": 0.09851050594415262, + "grad_norm": 0.8521235509329427, + "learning_rate": 1.9754931384377024e-05, + "loss": 0.6128, + "step": 5701 + }, + { + "epoch": 0.09852778545756151, + "grad_norm": 0.7770401285747043, + "learning_rate": 1.9754808229495186e-05, + "loss": 0.6229, + "step": 5702 + }, + { + "epoch": 0.09854506497097042, + "grad_norm": 0.6899663895069914, + "learning_rate": 1.975468504406055e-05, + "loss": 0.4288, + "step": 5703 + }, + { + "epoch": 0.09856234448437932, + "grad_norm": 0.8210342408107019, + "learning_rate": 1.9754561828073496e-05, + "loss": 0.9474, + "step": 5704 + }, + { + "epoch": 0.09857962399778822, + "grad_norm": 0.5024129266466522, + "learning_rate": 1.9754438581534416e-05, + "loss": 0.6864, + "step": 5705 + }, + { + "epoch": 0.09859690351119713, + "grad_norm": 0.9247728203956725, + "learning_rate": 1.9754315304443695e-05, + "loss": 0.7288, + "step": 5706 + }, + { + "epoch": 0.09861418302460603, + "grad_norm": 1.0020494447175112, + "learning_rate": 1.975419199680171e-05, + "loss": 0.6765, + "step": 5707 + }, + { + "epoch": 0.09863146253801493, + "grad_norm": 0.9727019900589832, + "learning_rate": 1.975406865860886e-05, + "loss": 0.5897, + "step": 5708 + }, + { + "epoch": 0.09864874205142384, + "grad_norm": 0.930935938665081, + "learning_rate": 1.9753945289865527e-05, + "loss": 0.4729, + "step": 5709 + }, + { + "epoch": 0.09866602156483273, + "grad_norm": 0.9596224727564959, + "learning_rate": 1.9753821890572092e-05, + "loss": 0.5968, + "step": 5710 + }, + { + "epoch": 0.09868330107824164, + "grad_norm": 1.0791805523882043, + "learning_rate": 1.9753698460728947e-05, + "loss": 0.7338, + "step": 5711 + }, + { + "epoch": 0.09870058059165054, + "grad_norm": 1.0354496907892792, + "learning_rate": 1.9753575000336477e-05, + "loss": 0.847, + "step": 5712 + }, + { + "epoch": 0.09871786010505944, + "grad_norm": 1.3216750857918564, + "learning_rate": 1.9753451509395066e-05, + "loss": 0.8371, + "step": 5713 + }, + { + "epoch": 0.09873513961846835, + "grad_norm": 1.2421860958141713, + "learning_rate": 1.9753327987905107e-05, + "loss": 0.8608, + "step": 5714 + }, + { + "epoch": 0.09875241913187725, + "grad_norm": 0.7838680489619708, + "learning_rate": 1.9753204435866987e-05, + "loss": 0.5425, + "step": 5715 + }, + { + "epoch": 0.09876969864528615, + "grad_norm": 1.06944844656662, + "learning_rate": 1.9753080853281083e-05, + "loss": 0.6778, + "step": 5716 + }, + { + "epoch": 0.09878697815869505, + "grad_norm": 0.901536401876688, + "learning_rate": 1.975295724014779e-05, + "loss": 0.4958, + "step": 5717 + }, + { + "epoch": 0.09880425767210395, + "grad_norm": 1.0213678082622817, + "learning_rate": 1.9752833596467495e-05, + "loss": 0.8439, + "step": 5718 + }, + { + "epoch": 0.09882153718551286, + "grad_norm": 1.0704347936802443, + "learning_rate": 1.9752709922240585e-05, + "loss": 0.8464, + "step": 5719 + }, + { + "epoch": 0.09883881669892176, + "grad_norm": 0.9863382700361751, + "learning_rate": 1.9752586217467445e-05, + "loss": 0.7141, + "step": 5720 + }, + { + "epoch": 0.09885609621233066, + "grad_norm": 0.9433173525758922, + "learning_rate": 1.9752462482148464e-05, + "loss": 0.6867, + "step": 5721 + }, + { + "epoch": 0.09887337572573957, + "grad_norm": 1.0158805691173252, + "learning_rate": 1.975233871628403e-05, + "loss": 0.5969, + "step": 5722 + }, + { + "epoch": 0.09889065523914846, + "grad_norm": 0.9454413248391634, + "learning_rate": 1.9752214919874527e-05, + "loss": 0.7103, + "step": 5723 + }, + { + "epoch": 0.09890793475255737, + "grad_norm": 0.5897745597311417, + "learning_rate": 1.975209109292035e-05, + "loss": 0.7971, + "step": 5724 + }, + { + "epoch": 0.09892521426596627, + "grad_norm": 0.5395847402844391, + "learning_rate": 1.975196723542188e-05, + "loss": 0.6401, + "step": 5725 + }, + { + "epoch": 0.09894249377937517, + "grad_norm": 1.0880256225485487, + "learning_rate": 1.9751843347379508e-05, + "loss": 0.8502, + "step": 5726 + }, + { + "epoch": 0.09895977329278408, + "grad_norm": 1.0101159182407013, + "learning_rate": 1.9751719428793622e-05, + "loss": 0.8234, + "step": 5727 + }, + { + "epoch": 0.09897705280619298, + "grad_norm": 0.8268023345745567, + "learning_rate": 1.975159547966461e-05, + "loss": 0.5739, + "step": 5728 + }, + { + "epoch": 0.09899433231960188, + "grad_norm": 1.0469981795999266, + "learning_rate": 1.975147149999286e-05, + "loss": 0.8251, + "step": 5729 + }, + { + "epoch": 0.09901161183301078, + "grad_norm": 1.0439701945209454, + "learning_rate": 1.9751347489778757e-05, + "loss": 0.633, + "step": 5730 + }, + { + "epoch": 0.09902889134641968, + "grad_norm": 1.1075887260892894, + "learning_rate": 1.9751223449022693e-05, + "loss": 0.7634, + "step": 5731 + }, + { + "epoch": 0.09904617085982859, + "grad_norm": 0.8069714366264277, + "learning_rate": 1.975109937772506e-05, + "loss": 0.5952, + "step": 5732 + }, + { + "epoch": 0.0990634503732375, + "grad_norm": 1.0545123408440178, + "learning_rate": 1.975097527588624e-05, + "loss": 0.7042, + "step": 5733 + }, + { + "epoch": 0.09908072988664639, + "grad_norm": 0.9028607786228534, + "learning_rate": 1.9750851143506626e-05, + "loss": 0.6338, + "step": 5734 + }, + { + "epoch": 0.0990980094000553, + "grad_norm": 1.2216115408781292, + "learning_rate": 1.9750726980586602e-05, + "loss": 0.8265, + "step": 5735 + }, + { + "epoch": 0.0991152889134642, + "grad_norm": 1.0731405401661382, + "learning_rate": 1.9750602787126565e-05, + "loss": 0.6946, + "step": 5736 + }, + { + "epoch": 0.0991325684268731, + "grad_norm": 0.895040442235098, + "learning_rate": 1.9750478563126897e-05, + "loss": 1.0648, + "step": 5737 + }, + { + "epoch": 0.099149847940282, + "grad_norm": 0.9602571705969805, + "learning_rate": 1.9750354308587988e-05, + "loss": 0.8927, + "step": 5738 + }, + { + "epoch": 0.0991671274536909, + "grad_norm": 0.9420152478678717, + "learning_rate": 1.9750230023510228e-05, + "loss": 0.6448, + "step": 5739 + }, + { + "epoch": 0.0991844069670998, + "grad_norm": 1.1162840479453442, + "learning_rate": 1.9750105707894006e-05, + "loss": 0.8309, + "step": 5740 + }, + { + "epoch": 0.09920168648050871, + "grad_norm": 0.899478464854989, + "learning_rate": 1.9749981361739715e-05, + "loss": 0.5986, + "step": 5741 + }, + { + "epoch": 0.09921896599391761, + "grad_norm": 1.0619576502458008, + "learning_rate": 1.974985698504774e-05, + "loss": 0.6013, + "step": 5742 + }, + { + "epoch": 0.09923624550732651, + "grad_norm": 0.818297656463707, + "learning_rate": 1.974973257781847e-05, + "loss": 0.656, + "step": 5743 + }, + { + "epoch": 0.09925352502073542, + "grad_norm": 0.7670625002794407, + "learning_rate": 1.97496081400523e-05, + "loss": 0.5631, + "step": 5744 + }, + { + "epoch": 0.09927080453414432, + "grad_norm": 1.318218428043995, + "learning_rate": 1.9749483671749614e-05, + "loss": 0.7115, + "step": 5745 + }, + { + "epoch": 0.09928808404755322, + "grad_norm": 0.980682346540335, + "learning_rate": 1.9749359172910804e-05, + "loss": 0.7144, + "step": 5746 + }, + { + "epoch": 0.09930536356096212, + "grad_norm": 0.9338989849496672, + "learning_rate": 1.9749234643536263e-05, + "loss": 0.5048, + "step": 5747 + }, + { + "epoch": 0.09932264307437103, + "grad_norm": 1.2838174127353226, + "learning_rate": 1.9749110083626375e-05, + "loss": 0.897, + "step": 5748 + }, + { + "epoch": 0.09933992258777993, + "grad_norm": 1.286297666966952, + "learning_rate": 1.9748985493181534e-05, + "loss": 0.8283, + "step": 5749 + }, + { + "epoch": 0.09935720210118883, + "grad_norm": 0.9049633123736319, + "learning_rate": 1.9748860872202132e-05, + "loss": 0.8831, + "step": 5750 + }, + { + "epoch": 0.09937448161459773, + "grad_norm": 0.9162095678771903, + "learning_rate": 1.9748736220688554e-05, + "loss": 0.6019, + "step": 5751 + }, + { + "epoch": 0.09939176112800664, + "grad_norm": 0.9923246074059869, + "learning_rate": 1.9748611538641193e-05, + "loss": 0.9225, + "step": 5752 + }, + { + "epoch": 0.09940904064141554, + "grad_norm": 0.7907315098357824, + "learning_rate": 1.9748486826060443e-05, + "loss": 0.6077, + "step": 5753 + }, + { + "epoch": 0.09942632015482444, + "grad_norm": 1.15910533562063, + "learning_rate": 1.974836208294669e-05, + "loss": 0.798, + "step": 5754 + }, + { + "epoch": 0.09944359966823334, + "grad_norm": 0.822772987096602, + "learning_rate": 1.9748237309300325e-05, + "loss": 0.6574, + "step": 5755 + }, + { + "epoch": 0.09946087918164224, + "grad_norm": 1.222662303720036, + "learning_rate": 1.974811250512174e-05, + "loss": 0.6531, + "step": 5756 + }, + { + "epoch": 0.09947815869505115, + "grad_norm": 0.8308540831562543, + "learning_rate": 1.9747987670411323e-05, + "loss": 0.5385, + "step": 5757 + }, + { + "epoch": 0.09949543820846005, + "grad_norm": 1.1061565467523256, + "learning_rate": 1.9747862805169473e-05, + "loss": 0.8667, + "step": 5758 + }, + { + "epoch": 0.09951271772186895, + "grad_norm": 1.1307215125473955, + "learning_rate": 1.9747737909396573e-05, + "loss": 0.8427, + "step": 5759 + }, + { + "epoch": 0.09952999723527785, + "grad_norm": 0.831266500706884, + "learning_rate": 1.974761298309302e-05, + "loss": 0.6624, + "step": 5760 + }, + { + "epoch": 0.09954727674868676, + "grad_norm": 1.1035768724701311, + "learning_rate": 1.97474880262592e-05, + "loss": 0.8852, + "step": 5761 + }, + { + "epoch": 0.09956455626209566, + "grad_norm": 0.9896147801640681, + "learning_rate": 1.9747363038895508e-05, + "loss": 0.7447, + "step": 5762 + }, + { + "epoch": 0.09958183577550456, + "grad_norm": 0.9226448973196741, + "learning_rate": 1.974723802100233e-05, + "loss": 0.6581, + "step": 5763 + }, + { + "epoch": 0.09959911528891346, + "grad_norm": 0.5153357904350674, + "learning_rate": 1.9747112972580065e-05, + "loss": 0.7636, + "step": 5764 + }, + { + "epoch": 0.09961639480232237, + "grad_norm": 1.0727652524710551, + "learning_rate": 1.9746987893629104e-05, + "loss": 0.8274, + "step": 5765 + }, + { + "epoch": 0.09963367431573127, + "grad_norm": 0.8594142787243726, + "learning_rate": 1.9746862784149833e-05, + "loss": 0.5803, + "step": 5766 + }, + { + "epoch": 0.09965095382914017, + "grad_norm": 0.8701797152822016, + "learning_rate": 1.9746737644142647e-05, + "loss": 0.5941, + "step": 5767 + }, + { + "epoch": 0.09966823334254907, + "grad_norm": 1.0740702286199804, + "learning_rate": 1.974661247360794e-05, + "loss": 0.6573, + "step": 5768 + }, + { + "epoch": 0.09968551285595798, + "grad_norm": 1.1133454763250556, + "learning_rate": 1.9746487272546098e-05, + "loss": 0.7727, + "step": 5769 + }, + { + "epoch": 0.09970279236936688, + "grad_norm": 1.0125263760248016, + "learning_rate": 1.974636204095752e-05, + "loss": 0.5483, + "step": 5770 + }, + { + "epoch": 0.09972007188277578, + "grad_norm": 0.8073265643678839, + "learning_rate": 1.9746236778842596e-05, + "loss": 0.6578, + "step": 5771 + }, + { + "epoch": 0.09973735139618468, + "grad_norm": 0.8569974288460138, + "learning_rate": 1.9746111486201717e-05, + "loss": 0.6383, + "step": 5772 + }, + { + "epoch": 0.09975463090959359, + "grad_norm": 1.0155194469689235, + "learning_rate": 1.9745986163035274e-05, + "loss": 0.8515, + "step": 5773 + }, + { + "epoch": 0.09977191042300249, + "grad_norm": 1.0886607921132772, + "learning_rate": 1.9745860809343665e-05, + "loss": 0.5795, + "step": 5774 + }, + { + "epoch": 0.0997891899364114, + "grad_norm": 1.113252990378143, + "learning_rate": 1.974573542512728e-05, + "loss": 0.7563, + "step": 5775 + }, + { + "epoch": 0.09980646944982029, + "grad_norm": 1.1579470702843908, + "learning_rate": 1.9745610010386508e-05, + "loss": 0.8452, + "step": 5776 + }, + { + "epoch": 0.0998237489632292, + "grad_norm": 0.912420017094919, + "learning_rate": 1.9745484565121743e-05, + "loss": 0.6825, + "step": 5777 + }, + { + "epoch": 0.0998410284766381, + "grad_norm": 0.5243672550520085, + "learning_rate": 1.9745359089333384e-05, + "loss": 0.6746, + "step": 5778 + }, + { + "epoch": 0.099858307990047, + "grad_norm": 1.0200907286631757, + "learning_rate": 1.974523358302182e-05, + "loss": 0.9108, + "step": 5779 + }, + { + "epoch": 0.0998755875034559, + "grad_norm": 0.9390341613012977, + "learning_rate": 1.974510804618744e-05, + "loss": 0.7702, + "step": 5780 + }, + { + "epoch": 0.09989286701686481, + "grad_norm": 0.8015853920670147, + "learning_rate": 1.9744982478830645e-05, + "loss": 0.672, + "step": 5781 + }, + { + "epoch": 0.0999101465302737, + "grad_norm": 1.0149558183327478, + "learning_rate": 1.974485688095182e-05, + "loss": 0.7528, + "step": 5782 + }, + { + "epoch": 0.09992742604368261, + "grad_norm": 0.7818565703650597, + "learning_rate": 1.9744731252551366e-05, + "loss": 0.5664, + "step": 5783 + }, + { + "epoch": 0.0999447055570915, + "grad_norm": 0.9803705856768966, + "learning_rate": 1.9744605593629673e-05, + "loss": 0.5792, + "step": 5784 + }, + { + "epoch": 0.09996198507050041, + "grad_norm": 0.8316545076954878, + "learning_rate": 1.9744479904187132e-05, + "loss": 0.6482, + "step": 5785 + }, + { + "epoch": 0.09997926458390932, + "grad_norm": 1.0726460617490718, + "learning_rate": 1.974435418422414e-05, + "loss": 0.651, + "step": 5786 + }, + { + "epoch": 0.09999654409731822, + "grad_norm": 0.9755187757491105, + "learning_rate": 1.9744228433741093e-05, + "loss": 0.681, + "step": 5787 + }, + { + "epoch": 0.10001382361072712, + "grad_norm": 1.0041179392349255, + "learning_rate": 1.974410265273838e-05, + "loss": 0.5741, + "step": 5788 + }, + { + "epoch": 0.10003110312413603, + "grad_norm": 0.4662476544322082, + "learning_rate": 1.9743976841216396e-05, + "loss": 0.7571, + "step": 5789 + }, + { + "epoch": 0.10004838263754492, + "grad_norm": 0.5963216564407313, + "learning_rate": 1.9743850999175537e-05, + "loss": 0.7474, + "step": 5790 + }, + { + "epoch": 0.10006566215095383, + "grad_norm": 0.9742772170504862, + "learning_rate": 1.9743725126616196e-05, + "loss": 0.6947, + "step": 5791 + }, + { + "epoch": 0.10008294166436273, + "grad_norm": 0.9101552079681638, + "learning_rate": 1.974359922353877e-05, + "loss": 0.5766, + "step": 5792 + }, + { + "epoch": 0.10010022117777163, + "grad_norm": 1.157196415474759, + "learning_rate": 1.9743473289943648e-05, + "loss": 0.9096, + "step": 5793 + }, + { + "epoch": 0.10011750069118054, + "grad_norm": 1.031153157531964, + "learning_rate": 1.9743347325831226e-05, + "loss": 0.7667, + "step": 5794 + }, + { + "epoch": 0.10013478020458944, + "grad_norm": 0.9741135302187091, + "learning_rate": 1.9743221331201905e-05, + "loss": 0.7058, + "step": 5795 + }, + { + "epoch": 0.10015205971799834, + "grad_norm": 1.1549080326646632, + "learning_rate": 1.974309530605607e-05, + "loss": 0.8234, + "step": 5796 + }, + { + "epoch": 0.10016933923140724, + "grad_norm": 1.1023394907867694, + "learning_rate": 1.9742969250394122e-05, + "loss": 0.7187, + "step": 5797 + }, + { + "epoch": 0.10018661874481614, + "grad_norm": 0.9486117004407835, + "learning_rate": 1.974284316421645e-05, + "loss": 0.8504, + "step": 5798 + }, + { + "epoch": 0.10020389825822505, + "grad_norm": 0.9840712806972938, + "learning_rate": 1.9742717047523457e-05, + "loss": 0.7308, + "step": 5799 + }, + { + "epoch": 0.10022117777163395, + "grad_norm": 0.4608386116381351, + "learning_rate": 1.9742590900315533e-05, + "loss": 0.5493, + "step": 5800 + }, + { + "epoch": 0.10023845728504285, + "grad_norm": 0.8238180574029962, + "learning_rate": 1.9742464722593075e-05, + "loss": 0.5567, + "step": 5801 + }, + { + "epoch": 0.10025573679845176, + "grad_norm": 0.818436766273285, + "learning_rate": 1.9742338514356474e-05, + "loss": 0.5332, + "step": 5802 + }, + { + "epoch": 0.10027301631186065, + "grad_norm": 0.8485309256865124, + "learning_rate": 1.974221227560613e-05, + "loss": 0.7245, + "step": 5803 + }, + { + "epoch": 0.10029029582526956, + "grad_norm": 1.2116282779423, + "learning_rate": 1.9742086006342434e-05, + "loss": 0.7402, + "step": 5804 + }, + { + "epoch": 0.10030757533867846, + "grad_norm": 1.1006220855269115, + "learning_rate": 1.9741959706565787e-05, + "loss": 0.9016, + "step": 5805 + }, + { + "epoch": 0.10032485485208736, + "grad_norm": 0.8143642144319468, + "learning_rate": 1.9741833376276584e-05, + "loss": 0.5849, + "step": 5806 + }, + { + "epoch": 0.10034213436549627, + "grad_norm": 1.2542358487833174, + "learning_rate": 1.9741707015475214e-05, + "loss": 0.8212, + "step": 5807 + }, + { + "epoch": 0.10035941387890517, + "grad_norm": 0.8487850510348358, + "learning_rate": 1.974158062416208e-05, + "loss": 0.7679, + "step": 5808 + }, + { + "epoch": 0.10037669339231407, + "grad_norm": 1.1844682892203315, + "learning_rate": 1.9741454202337572e-05, + "loss": 0.875, + "step": 5809 + }, + { + "epoch": 0.10039397290572298, + "grad_norm": 1.0441165516973019, + "learning_rate": 1.9741327750002095e-05, + "loss": 0.8103, + "step": 5810 + }, + { + "epoch": 0.10041125241913187, + "grad_norm": 0.7867438186432741, + "learning_rate": 1.9741201267156037e-05, + "loss": 0.7358, + "step": 5811 + }, + { + "epoch": 0.10042853193254078, + "grad_norm": 1.0119738557079552, + "learning_rate": 1.9741074753799792e-05, + "loss": 0.7947, + "step": 5812 + }, + { + "epoch": 0.10044581144594968, + "grad_norm": 0.8018079794700996, + "learning_rate": 1.9740948209933766e-05, + "loss": 0.6342, + "step": 5813 + }, + { + "epoch": 0.10046309095935858, + "grad_norm": 1.011221568636837, + "learning_rate": 1.9740821635558344e-05, + "loss": 0.8276, + "step": 5814 + }, + { + "epoch": 0.10048037047276749, + "grad_norm": 0.8906688576130568, + "learning_rate": 1.9740695030673935e-05, + "loss": 0.6915, + "step": 5815 + }, + { + "epoch": 0.10049764998617639, + "grad_norm": 0.9598125125108603, + "learning_rate": 1.9740568395280926e-05, + "loss": 0.6843, + "step": 5816 + }, + { + "epoch": 0.10051492949958529, + "grad_norm": 0.9051401829416664, + "learning_rate": 1.9740441729379716e-05, + "loss": 0.744, + "step": 5817 + }, + { + "epoch": 0.1005322090129942, + "grad_norm": 1.0584844413746686, + "learning_rate": 1.97403150329707e-05, + "loss": 0.9698, + "step": 5818 + }, + { + "epoch": 0.1005494885264031, + "grad_norm": 0.9742205631314723, + "learning_rate": 1.9740188306054283e-05, + "loss": 0.9796, + "step": 5819 + }, + { + "epoch": 0.100566768039812, + "grad_norm": 0.8914583707368235, + "learning_rate": 1.974006154863085e-05, + "loss": 0.7413, + "step": 5820 + }, + { + "epoch": 0.1005840475532209, + "grad_norm": 1.0989883522040569, + "learning_rate": 1.973993476070081e-05, + "loss": 0.7989, + "step": 5821 + }, + { + "epoch": 0.1006013270666298, + "grad_norm": 0.8991638144481556, + "learning_rate": 1.973980794226455e-05, + "loss": 0.7036, + "step": 5822 + }, + { + "epoch": 0.10061860658003871, + "grad_norm": 0.9311346227100528, + "learning_rate": 1.9739681093322473e-05, + "loss": 0.6109, + "step": 5823 + }, + { + "epoch": 0.1006358860934476, + "grad_norm": 1.1319061048667556, + "learning_rate": 1.9739554213874973e-05, + "loss": 0.8107, + "step": 5824 + }, + { + "epoch": 0.10065316560685651, + "grad_norm": 0.9893831141913721, + "learning_rate": 1.973942730392245e-05, + "loss": 0.8726, + "step": 5825 + }, + { + "epoch": 0.10067044512026542, + "grad_norm": 1.258639072121597, + "learning_rate": 1.97393003634653e-05, + "loss": 0.7577, + "step": 5826 + }, + { + "epoch": 0.10068772463367431, + "grad_norm": 0.4941878008025301, + "learning_rate": 1.973917339250392e-05, + "loss": 0.7962, + "step": 5827 + }, + { + "epoch": 0.10070500414708322, + "grad_norm": 0.8494500797815352, + "learning_rate": 1.973904639103871e-05, + "loss": 0.7719, + "step": 5828 + }, + { + "epoch": 0.10072228366049212, + "grad_norm": 0.8434331261494993, + "learning_rate": 1.9738919359070067e-05, + "loss": 0.52, + "step": 5829 + }, + { + "epoch": 0.10073956317390102, + "grad_norm": 0.8505480678395139, + "learning_rate": 1.973879229659839e-05, + "loss": 0.6255, + "step": 5830 + }, + { + "epoch": 0.10075684268730993, + "grad_norm": 0.9178208846998753, + "learning_rate": 1.9738665203624073e-05, + "loss": 1.0109, + "step": 5831 + }, + { + "epoch": 0.10077412220071882, + "grad_norm": 1.098744074483806, + "learning_rate": 1.973853808014752e-05, + "loss": 0.8094, + "step": 5832 + }, + { + "epoch": 0.10079140171412773, + "grad_norm": 0.9329608721839415, + "learning_rate": 1.973841092616912e-05, + "loss": 0.771, + "step": 5833 + }, + { + "epoch": 0.10080868122753663, + "grad_norm": 0.8535699079684188, + "learning_rate": 1.973828374168928e-05, + "loss": 0.7517, + "step": 5834 + }, + { + "epoch": 0.10082596074094553, + "grad_norm": 1.065185939596745, + "learning_rate": 1.9738156526708393e-05, + "loss": 0.655, + "step": 5835 + }, + { + "epoch": 0.10084324025435444, + "grad_norm": 1.0824043392699139, + "learning_rate": 1.9738029281226864e-05, + "loss": 0.6981, + "step": 5836 + }, + { + "epoch": 0.10086051976776333, + "grad_norm": 0.8075418005052686, + "learning_rate": 1.9737902005245084e-05, + "loss": 0.7489, + "step": 5837 + }, + { + "epoch": 0.10087779928117224, + "grad_norm": 0.7088568339437586, + "learning_rate": 1.9737774698763453e-05, + "loss": 0.4521, + "step": 5838 + }, + { + "epoch": 0.10089507879458115, + "grad_norm": 0.941467378739186, + "learning_rate": 1.973764736178238e-05, + "loss": 0.6161, + "step": 5839 + }, + { + "epoch": 0.10091235830799004, + "grad_norm": 1.012331791745691, + "learning_rate": 1.9737519994302247e-05, + "loss": 0.6889, + "step": 5840 + }, + { + "epoch": 0.10092963782139895, + "grad_norm": 0.7919880680280487, + "learning_rate": 1.973739259632346e-05, + "loss": 0.6516, + "step": 5841 + }, + { + "epoch": 0.10094691733480785, + "grad_norm": 1.0714691308339082, + "learning_rate": 1.9737265167846425e-05, + "loss": 0.6813, + "step": 5842 + }, + { + "epoch": 0.10096419684821675, + "grad_norm": 0.6704213257815499, + "learning_rate": 1.9737137708871535e-05, + "loss": 0.6221, + "step": 5843 + }, + { + "epoch": 0.10098147636162566, + "grad_norm": 0.9747691832896808, + "learning_rate": 1.9737010219399187e-05, + "loss": 0.6868, + "step": 5844 + }, + { + "epoch": 0.10099875587503455, + "grad_norm": 0.9788607702361222, + "learning_rate": 1.9736882699429787e-05, + "loss": 0.6718, + "step": 5845 + }, + { + "epoch": 0.10101603538844346, + "grad_norm": 1.9268542543152005, + "learning_rate": 1.9736755148963728e-05, + "loss": 0.6435, + "step": 5846 + }, + { + "epoch": 0.10103331490185237, + "grad_norm": 1.1201265339347661, + "learning_rate": 1.9736627568001413e-05, + "loss": 0.789, + "step": 5847 + }, + { + "epoch": 0.10105059441526126, + "grad_norm": 1.0539732971143687, + "learning_rate": 1.973649995654324e-05, + "loss": 0.6845, + "step": 5848 + }, + { + "epoch": 0.10106787392867017, + "grad_norm": 0.9589482311743207, + "learning_rate": 1.973637231458961e-05, + "loss": 0.9283, + "step": 5849 + }, + { + "epoch": 0.10108515344207906, + "grad_norm": 0.6547684444278843, + "learning_rate": 1.9736244642140918e-05, + "loss": 0.5086, + "step": 5850 + }, + { + "epoch": 0.10110243295548797, + "grad_norm": 1.1472996638162034, + "learning_rate": 1.9736116939197568e-05, + "loss": 0.5104, + "step": 5851 + }, + { + "epoch": 0.10111971246889688, + "grad_norm": 1.349507991443247, + "learning_rate": 1.9735989205759965e-05, + "loss": 0.6684, + "step": 5852 + }, + { + "epoch": 0.10113699198230577, + "grad_norm": 0.7974353401833263, + "learning_rate": 1.9735861441828498e-05, + "loss": 0.454, + "step": 5853 + }, + { + "epoch": 0.10115427149571468, + "grad_norm": 0.937379820434993, + "learning_rate": 1.9735733647403577e-05, + "loss": 0.6049, + "step": 5854 + }, + { + "epoch": 0.10117155100912359, + "grad_norm": 0.37734220880696256, + "learning_rate": 1.97356058224856e-05, + "loss": 0.5345, + "step": 5855 + }, + { + "epoch": 0.10118883052253248, + "grad_norm": 0.6284479405185911, + "learning_rate": 1.973547796707496e-05, + "loss": 0.5695, + "step": 5856 + }, + { + "epoch": 0.10120611003594139, + "grad_norm": 0.8295947897975463, + "learning_rate": 1.973535008117207e-05, + "loss": 0.7702, + "step": 5857 + }, + { + "epoch": 0.10122338954935028, + "grad_norm": 1.0979024054090034, + "learning_rate": 1.9735222164777318e-05, + "loss": 0.7339, + "step": 5858 + }, + { + "epoch": 0.10124066906275919, + "grad_norm": 1.606098685156412, + "learning_rate": 1.973509421789111e-05, + "loss": 0.7953, + "step": 5859 + }, + { + "epoch": 0.1012579485761681, + "grad_norm": 0.9518223370150178, + "learning_rate": 1.9734966240513854e-05, + "loss": 0.5594, + "step": 5860 + }, + { + "epoch": 0.101275228089577, + "grad_norm": 1.0066090601751596, + "learning_rate": 1.9734838232645937e-05, + "loss": 0.6625, + "step": 5861 + }, + { + "epoch": 0.1012925076029859, + "grad_norm": 0.9139147518614401, + "learning_rate": 1.973471019428777e-05, + "loss": 0.7649, + "step": 5862 + }, + { + "epoch": 0.10130978711639481, + "grad_norm": 1.0190324477020256, + "learning_rate": 1.973458212543975e-05, + "loss": 0.6762, + "step": 5863 + }, + { + "epoch": 0.1013270666298037, + "grad_norm": 0.8318811304769685, + "learning_rate": 1.973445402610228e-05, + "loss": 0.9228, + "step": 5864 + }, + { + "epoch": 0.10134434614321261, + "grad_norm": 0.9852866353573096, + "learning_rate": 1.9734325896275757e-05, + "loss": 0.6626, + "step": 5865 + }, + { + "epoch": 0.1013616256566215, + "grad_norm": 1.0216110135957102, + "learning_rate": 1.973419773596059e-05, + "loss": 0.801, + "step": 5866 + }, + { + "epoch": 0.10137890517003041, + "grad_norm": 0.823976165436061, + "learning_rate": 1.9734069545157174e-05, + "loss": 0.5637, + "step": 5867 + }, + { + "epoch": 0.10139618468343932, + "grad_norm": 0.8715443469819343, + "learning_rate": 1.973394132386591e-05, + "loss": 0.8446, + "step": 5868 + }, + { + "epoch": 0.10141346419684821, + "grad_norm": 1.1288806277664316, + "learning_rate": 1.9733813072087206e-05, + "loss": 0.8412, + "step": 5869 + }, + { + "epoch": 0.10143074371025712, + "grad_norm": 0.8679230064500083, + "learning_rate": 1.9733684789821455e-05, + "loss": 0.7684, + "step": 5870 + }, + { + "epoch": 0.10144802322366603, + "grad_norm": 0.7240000513096881, + "learning_rate": 1.9733556477069068e-05, + "loss": 0.5276, + "step": 5871 + }, + { + "epoch": 0.10146530273707492, + "grad_norm": 1.299154347542907, + "learning_rate": 1.973342813383044e-05, + "loss": 0.712, + "step": 5872 + }, + { + "epoch": 0.10148258225048383, + "grad_norm": 0.7154070172827103, + "learning_rate": 1.9733299760105978e-05, + "loss": 0.6141, + "step": 5873 + }, + { + "epoch": 0.10149986176389272, + "grad_norm": 0.9156282747912933, + "learning_rate": 1.973317135589608e-05, + "loss": 0.6919, + "step": 5874 + }, + { + "epoch": 0.10151714127730163, + "grad_norm": 1.0023485762300517, + "learning_rate": 1.9733042921201145e-05, + "loss": 0.6803, + "step": 5875 + }, + { + "epoch": 0.10153442079071054, + "grad_norm": 0.9180114949562742, + "learning_rate": 1.9732914456021587e-05, + "loss": 0.5794, + "step": 5876 + }, + { + "epoch": 0.10155170030411943, + "grad_norm": 0.7737736957531144, + "learning_rate": 1.9732785960357798e-05, + "loss": 0.5878, + "step": 5877 + }, + { + "epoch": 0.10156897981752834, + "grad_norm": 1.047656928472248, + "learning_rate": 1.9732657434210184e-05, + "loss": 0.6424, + "step": 5878 + }, + { + "epoch": 0.10158625933093723, + "grad_norm": 1.0305233657264778, + "learning_rate": 1.9732528877579145e-05, + "loss": 0.9213, + "step": 5879 + }, + { + "epoch": 0.10160353884434614, + "grad_norm": 0.5643592184341836, + "learning_rate": 1.973240029046509e-05, + "loss": 0.5535, + "step": 5880 + }, + { + "epoch": 0.10162081835775505, + "grad_norm": 0.8456385508753428, + "learning_rate": 1.9732271672868416e-05, + "loss": 0.9644, + "step": 5881 + }, + { + "epoch": 0.10163809787116394, + "grad_norm": 1.012487331046291, + "learning_rate": 1.973214302478953e-05, + "loss": 0.5029, + "step": 5882 + }, + { + "epoch": 0.10165537738457285, + "grad_norm": 0.9338849933815467, + "learning_rate": 1.973201434622883e-05, + "loss": 0.7458, + "step": 5883 + }, + { + "epoch": 0.10167265689798176, + "grad_norm": 1.2520024934000713, + "learning_rate": 1.9731885637186717e-05, + "loss": 0.8731, + "step": 5884 + }, + { + "epoch": 0.10168993641139065, + "grad_norm": 1.0717367225160634, + "learning_rate": 1.9731756897663606e-05, + "loss": 0.7963, + "step": 5885 + }, + { + "epoch": 0.10170721592479956, + "grad_norm": 1.0095744487293223, + "learning_rate": 1.973162812765989e-05, + "loss": 0.8116, + "step": 5886 + }, + { + "epoch": 0.10172449543820845, + "grad_norm": 1.1486815258729273, + "learning_rate": 1.9731499327175972e-05, + "loss": 0.7918, + "step": 5887 + }, + { + "epoch": 0.10174177495161736, + "grad_norm": 0.8746945970063166, + "learning_rate": 1.9731370496212263e-05, + "loss": 0.8028, + "step": 5888 + }, + { + "epoch": 0.10175905446502627, + "grad_norm": 1.0126537957706478, + "learning_rate": 1.973124163476916e-05, + "loss": 0.7397, + "step": 5889 + }, + { + "epoch": 0.10177633397843516, + "grad_norm": 0.8918101504309726, + "learning_rate": 1.973111274284707e-05, + "loss": 0.5016, + "step": 5890 + }, + { + "epoch": 0.10179361349184407, + "grad_norm": 0.9112301757046848, + "learning_rate": 1.9730983820446394e-05, + "loss": 0.4539, + "step": 5891 + }, + { + "epoch": 0.10181089300525298, + "grad_norm": 1.1025866292372886, + "learning_rate": 1.9730854867567535e-05, + "loss": 0.6186, + "step": 5892 + }, + { + "epoch": 0.10182817251866187, + "grad_norm": 1.0139320407084949, + "learning_rate": 1.9730725884210903e-05, + "loss": 0.7135, + "step": 5893 + }, + { + "epoch": 0.10184545203207078, + "grad_norm": 0.8586829144723368, + "learning_rate": 1.9730596870376897e-05, + "loss": 0.6884, + "step": 5894 + }, + { + "epoch": 0.10186273154547967, + "grad_norm": 0.8013655243465979, + "learning_rate": 1.973046782606592e-05, + "loss": 0.6915, + "step": 5895 + }, + { + "epoch": 0.10188001105888858, + "grad_norm": 0.8823420395903423, + "learning_rate": 1.973033875127838e-05, + "loss": 0.6553, + "step": 5896 + }, + { + "epoch": 0.10189729057229749, + "grad_norm": 1.1120025594836709, + "learning_rate": 1.9730209646014683e-05, + "loss": 0.667, + "step": 5897 + }, + { + "epoch": 0.10191457008570638, + "grad_norm": 1.149745063771268, + "learning_rate": 1.9730080510275224e-05, + "loss": 0.9213, + "step": 5898 + }, + { + "epoch": 0.10193184959911529, + "grad_norm": 0.7852486074763816, + "learning_rate": 1.9729951344060418e-05, + "loss": 0.7271, + "step": 5899 + }, + { + "epoch": 0.1019491291125242, + "grad_norm": 1.0859771386250185, + "learning_rate": 1.972982214737066e-05, + "loss": 0.5283, + "step": 5900 + }, + { + "epoch": 0.10196640862593309, + "grad_norm": 1.045747202111349, + "learning_rate": 1.972969292020636e-05, + "loss": 0.6689, + "step": 5901 + }, + { + "epoch": 0.101983688139342, + "grad_norm": 0.9770507459217058, + "learning_rate": 1.972956366256793e-05, + "loss": 0.8307, + "step": 5902 + }, + { + "epoch": 0.10200096765275089, + "grad_norm": 0.941805709863204, + "learning_rate": 1.9729434374455757e-05, + "loss": 0.6342, + "step": 5903 + }, + { + "epoch": 0.1020182471661598, + "grad_norm": 0.9534304560186442, + "learning_rate": 1.9729305055870264e-05, + "loss": 0.6556, + "step": 5904 + }, + { + "epoch": 0.10203552667956871, + "grad_norm": 0.8381881981797078, + "learning_rate": 1.9729175706811846e-05, + "loss": 0.6158, + "step": 5905 + }, + { + "epoch": 0.1020528061929776, + "grad_norm": 0.9063969859589228, + "learning_rate": 1.9729046327280906e-05, + "loss": 0.6597, + "step": 5906 + }, + { + "epoch": 0.10207008570638651, + "grad_norm": 1.040193570531058, + "learning_rate": 1.9728916917277858e-05, + "loss": 0.6271, + "step": 5907 + }, + { + "epoch": 0.10208736521979542, + "grad_norm": 0.9503349727390605, + "learning_rate": 1.9728787476803104e-05, + "loss": 0.752, + "step": 5908 + }, + { + "epoch": 0.10210464473320431, + "grad_norm": 0.6023415923246876, + "learning_rate": 1.9728658005857045e-05, + "loss": 0.6714, + "step": 5909 + }, + { + "epoch": 0.10212192424661322, + "grad_norm": 0.8415099744905141, + "learning_rate": 1.9728528504440093e-05, + "loss": 0.6571, + "step": 5910 + }, + { + "epoch": 0.10213920376002211, + "grad_norm": 0.8603458627805324, + "learning_rate": 1.9728398972552646e-05, + "loss": 0.6456, + "step": 5911 + }, + { + "epoch": 0.10215648327343102, + "grad_norm": 1.1233397910864442, + "learning_rate": 1.9728269410195116e-05, + "loss": 0.7379, + "step": 5912 + }, + { + "epoch": 0.10217376278683993, + "grad_norm": 0.8781618982015843, + "learning_rate": 1.9728139817367907e-05, + "loss": 0.7763, + "step": 5913 + }, + { + "epoch": 0.10219104230024882, + "grad_norm": 0.8191803857555571, + "learning_rate": 1.9728010194071425e-05, + "loss": 0.6225, + "step": 5914 + }, + { + "epoch": 0.10220832181365773, + "grad_norm": 1.1283370917298505, + "learning_rate": 1.9727880540306078e-05, + "loss": 0.835, + "step": 5915 + }, + { + "epoch": 0.10222560132706662, + "grad_norm": 0.9999691085859705, + "learning_rate": 1.9727750856072264e-05, + "loss": 0.7728, + "step": 5916 + }, + { + "epoch": 0.10224288084047553, + "grad_norm": 0.8628249939251085, + "learning_rate": 1.97276211413704e-05, + "loss": 0.6644, + "step": 5917 + }, + { + "epoch": 0.10226016035388444, + "grad_norm": 0.9777086372675533, + "learning_rate": 1.9727491396200887e-05, + "loss": 0.5519, + "step": 5918 + }, + { + "epoch": 0.10227743986729333, + "grad_norm": 0.9690531986225892, + "learning_rate": 1.972736162056413e-05, + "loss": 0.4612, + "step": 5919 + }, + { + "epoch": 0.10229471938070224, + "grad_norm": 1.1606623287429083, + "learning_rate": 1.9727231814460537e-05, + "loss": 0.7773, + "step": 5920 + }, + { + "epoch": 0.10231199889411115, + "grad_norm": 0.7762982317765952, + "learning_rate": 1.9727101977890517e-05, + "loss": 0.4101, + "step": 5921 + }, + { + "epoch": 0.10232927840752004, + "grad_norm": 0.972634612690972, + "learning_rate": 1.972697211085447e-05, + "loss": 0.7935, + "step": 5922 + }, + { + "epoch": 0.10234655792092895, + "grad_norm": 0.7756202986733496, + "learning_rate": 1.9726842213352814e-05, + "loss": 0.7058, + "step": 5923 + }, + { + "epoch": 0.10236383743433784, + "grad_norm": 0.757523467524333, + "learning_rate": 1.9726712285385944e-05, + "loss": 0.5487, + "step": 5924 + }, + { + "epoch": 0.10238111694774675, + "grad_norm": 0.9829655464331201, + "learning_rate": 1.972658232695427e-05, + "loss": 0.4561, + "step": 5925 + }, + { + "epoch": 0.10239839646115566, + "grad_norm": 1.0368226694709959, + "learning_rate": 1.9726452338058205e-05, + "loss": 0.5038, + "step": 5926 + }, + { + "epoch": 0.10241567597456455, + "grad_norm": 1.2819691250190575, + "learning_rate": 1.972632231869815e-05, + "loss": 0.8895, + "step": 5927 + }, + { + "epoch": 0.10243295548797346, + "grad_norm": 0.9325485340777799, + "learning_rate": 1.9726192268874515e-05, + "loss": 0.5838, + "step": 5928 + }, + { + "epoch": 0.10245023500138237, + "grad_norm": 1.1960909567729303, + "learning_rate": 1.9726062188587708e-05, + "loss": 0.4948, + "step": 5929 + }, + { + "epoch": 0.10246751451479126, + "grad_norm": 0.9242853213742398, + "learning_rate": 1.9725932077838135e-05, + "loss": 0.7686, + "step": 5930 + }, + { + "epoch": 0.10248479402820017, + "grad_norm": 1.0801907911157143, + "learning_rate": 1.97258019366262e-05, + "loss": 0.7889, + "step": 5931 + }, + { + "epoch": 0.10250207354160906, + "grad_norm": 0.9270323026542491, + "learning_rate": 1.9725671764952318e-05, + "loss": 0.657, + "step": 5932 + }, + { + "epoch": 0.10251935305501797, + "grad_norm": 0.9654084298297781, + "learning_rate": 1.972554156281689e-05, + "loss": 0.6859, + "step": 5933 + }, + { + "epoch": 0.10253663256842688, + "grad_norm": 1.3197648585575537, + "learning_rate": 1.972541133022033e-05, + "loss": 0.7988, + "step": 5934 + }, + { + "epoch": 0.10255391208183577, + "grad_norm": 0.8720626275064801, + "learning_rate": 1.9725281067163042e-05, + "loss": 0.9284, + "step": 5935 + }, + { + "epoch": 0.10257119159524468, + "grad_norm": 0.8684835957219829, + "learning_rate": 1.9725150773645435e-05, + "loss": 0.5317, + "step": 5936 + }, + { + "epoch": 0.10258847110865359, + "grad_norm": 1.2408566591042556, + "learning_rate": 1.9725020449667914e-05, + "loss": 0.6235, + "step": 5937 + }, + { + "epoch": 0.10260575062206248, + "grad_norm": 1.0876093994516212, + "learning_rate": 1.9724890095230892e-05, + "loss": 0.5995, + "step": 5938 + }, + { + "epoch": 0.10262303013547139, + "grad_norm": 0.4619415016548727, + "learning_rate": 1.9724759710334775e-05, + "loss": 0.4864, + "step": 5939 + }, + { + "epoch": 0.10264030964888028, + "grad_norm": 1.2518080755382848, + "learning_rate": 1.9724629294979973e-05, + "loss": 0.9464, + "step": 5940 + }, + { + "epoch": 0.10265758916228919, + "grad_norm": 1.0186504396025478, + "learning_rate": 1.9724498849166886e-05, + "loss": 0.8004, + "step": 5941 + }, + { + "epoch": 0.1026748686756981, + "grad_norm": 1.052593797729611, + "learning_rate": 1.9724368372895934e-05, + "loss": 0.9042, + "step": 5942 + }, + { + "epoch": 0.10269214818910699, + "grad_norm": 1.144603693319801, + "learning_rate": 1.9724237866167524e-05, + "loss": 0.6744, + "step": 5943 + }, + { + "epoch": 0.1027094277025159, + "grad_norm": 1.2125467727854713, + "learning_rate": 1.972410732898206e-05, + "loss": 0.5946, + "step": 5944 + }, + { + "epoch": 0.1027267072159248, + "grad_norm": 1.0664158637437826, + "learning_rate": 1.9723976761339953e-05, + "loss": 0.7595, + "step": 5945 + }, + { + "epoch": 0.1027439867293337, + "grad_norm": 0.5788342686741172, + "learning_rate": 1.972384616324161e-05, + "loss": 0.5911, + "step": 5946 + }, + { + "epoch": 0.10276126624274261, + "grad_norm": 1.1057862141001227, + "learning_rate": 1.972371553468744e-05, + "loss": 0.639, + "step": 5947 + }, + { + "epoch": 0.1027785457561515, + "grad_norm": 0.5491956680677289, + "learning_rate": 1.972358487567786e-05, + "loss": 0.5684, + "step": 5948 + }, + { + "epoch": 0.10279582526956041, + "grad_norm": 0.9011931778613562, + "learning_rate": 1.972345418621327e-05, + "loss": 0.5991, + "step": 5949 + }, + { + "epoch": 0.10281310478296932, + "grad_norm": 1.1164651105085686, + "learning_rate": 1.972332346629408e-05, + "loss": 0.6977, + "step": 5950 + }, + { + "epoch": 0.10283038429637821, + "grad_norm": 1.076219367126161, + "learning_rate": 1.972319271592071e-05, + "loss": 0.5131, + "step": 5951 + }, + { + "epoch": 0.10284766380978712, + "grad_norm": 1.1142480733998281, + "learning_rate": 1.9723061935093556e-05, + "loss": 0.842, + "step": 5952 + }, + { + "epoch": 0.10286494332319601, + "grad_norm": 1.1993242201018581, + "learning_rate": 1.972293112381303e-05, + "loss": 0.6993, + "step": 5953 + }, + { + "epoch": 0.10288222283660492, + "grad_norm": 1.2333462310408543, + "learning_rate": 1.972280028207955e-05, + "loss": 0.9076, + "step": 5954 + }, + { + "epoch": 0.10289950235001383, + "grad_norm": 1.3960228066149065, + "learning_rate": 1.972266940989352e-05, + "loss": 0.7055, + "step": 5955 + }, + { + "epoch": 0.10291678186342272, + "grad_norm": 0.9619734756318677, + "learning_rate": 1.972253850725535e-05, + "loss": 0.5796, + "step": 5956 + }, + { + "epoch": 0.10293406137683163, + "grad_norm": 1.636941295645476, + "learning_rate": 1.9722407574165446e-05, + "loss": 0.7887, + "step": 5957 + }, + { + "epoch": 0.10295134089024054, + "grad_norm": 1.0671537532127247, + "learning_rate": 1.972227661062423e-05, + "loss": 0.655, + "step": 5958 + }, + { + "epoch": 0.10296862040364943, + "grad_norm": 0.4758779614838423, + "learning_rate": 1.97221456166321e-05, + "loss": 0.6782, + "step": 5959 + }, + { + "epoch": 0.10298589991705834, + "grad_norm": 0.9350103507047841, + "learning_rate": 1.9722014592189472e-05, + "loss": 0.5708, + "step": 5960 + }, + { + "epoch": 0.10300317943046723, + "grad_norm": 1.2441632025741747, + "learning_rate": 1.9721883537296755e-05, + "loss": 0.7731, + "step": 5961 + }, + { + "epoch": 0.10302045894387614, + "grad_norm": 0.7726589039337507, + "learning_rate": 1.972175245195436e-05, + "loss": 0.4893, + "step": 5962 + }, + { + "epoch": 0.10303773845728505, + "grad_norm": 0.8810459855482454, + "learning_rate": 1.97216213361627e-05, + "loss": 0.701, + "step": 5963 + }, + { + "epoch": 0.10305501797069394, + "grad_norm": 1.0490778352510832, + "learning_rate": 1.972149018992218e-05, + "loss": 0.7353, + "step": 5964 + }, + { + "epoch": 0.10307229748410285, + "grad_norm": 0.5199728775508375, + "learning_rate": 1.9721359013233218e-05, + "loss": 0.5471, + "step": 5965 + }, + { + "epoch": 0.10308957699751176, + "grad_norm": 0.912966329260714, + "learning_rate": 1.972122780609622e-05, + "loss": 0.673, + "step": 5966 + }, + { + "epoch": 0.10310685651092065, + "grad_norm": 1.2743691894653955, + "learning_rate": 1.972109656851159e-05, + "loss": 1.0652, + "step": 5967 + }, + { + "epoch": 0.10312413602432956, + "grad_norm": 0.8570701770404984, + "learning_rate": 1.9720965300479755e-05, + "loss": 0.8346, + "step": 5968 + }, + { + "epoch": 0.10314141553773845, + "grad_norm": 0.9820491287886753, + "learning_rate": 1.9720834002001114e-05, + "loss": 0.4934, + "step": 5969 + }, + { + "epoch": 0.10315869505114736, + "grad_norm": 0.9550751074080832, + "learning_rate": 1.9720702673076085e-05, + "loss": 0.7845, + "step": 5970 + }, + { + "epoch": 0.10317597456455627, + "grad_norm": 1.0184934989722787, + "learning_rate": 1.9720571313705075e-05, + "loss": 0.7539, + "step": 5971 + }, + { + "epoch": 0.10319325407796516, + "grad_norm": 0.8801359430497003, + "learning_rate": 1.9720439923888497e-05, + "loss": 0.756, + "step": 5972 + }, + { + "epoch": 0.10321053359137407, + "grad_norm": 0.9016112432877478, + "learning_rate": 1.9720308503626763e-05, + "loss": 0.8353, + "step": 5973 + }, + { + "epoch": 0.10322781310478298, + "grad_norm": 0.9579957724218475, + "learning_rate": 1.9720177052920283e-05, + "loss": 0.7176, + "step": 5974 + }, + { + "epoch": 0.10324509261819187, + "grad_norm": 0.597234366017357, + "learning_rate": 1.972004557176947e-05, + "loss": 0.6345, + "step": 5975 + }, + { + "epoch": 0.10326237213160078, + "grad_norm": 1.0583664088931652, + "learning_rate": 1.9719914060174737e-05, + "loss": 0.7398, + "step": 5976 + }, + { + "epoch": 0.10327965164500967, + "grad_norm": 1.162392777806644, + "learning_rate": 1.9719782518136493e-05, + "loss": 0.7677, + "step": 5977 + }, + { + "epoch": 0.10329693115841858, + "grad_norm": 0.8525378391622978, + "learning_rate": 1.9719650945655152e-05, + "loss": 0.9126, + "step": 5978 + }, + { + "epoch": 0.10331421067182749, + "grad_norm": 0.788767226899735, + "learning_rate": 1.971951934273112e-05, + "loss": 0.6867, + "step": 5979 + }, + { + "epoch": 0.10333149018523638, + "grad_norm": 0.8764295949908221, + "learning_rate": 1.9719387709364822e-05, + "loss": 0.6342, + "step": 5980 + }, + { + "epoch": 0.10334876969864529, + "grad_norm": 0.8578067005101718, + "learning_rate": 1.9719256045556663e-05, + "loss": 0.5662, + "step": 5981 + }, + { + "epoch": 0.1033660492120542, + "grad_norm": 0.9707309223616234, + "learning_rate": 1.9719124351307052e-05, + "loss": 0.7315, + "step": 5982 + }, + { + "epoch": 0.10338332872546309, + "grad_norm": 1.002053159001335, + "learning_rate": 1.9718992626616408e-05, + "loss": 0.5594, + "step": 5983 + }, + { + "epoch": 0.103400608238872, + "grad_norm": 1.1703145921194402, + "learning_rate": 1.971886087148514e-05, + "loss": 0.7077, + "step": 5984 + }, + { + "epoch": 0.10341788775228089, + "grad_norm": 0.9694569866631688, + "learning_rate": 1.9718729085913658e-05, + "loss": 0.562, + "step": 5985 + }, + { + "epoch": 0.1034351672656898, + "grad_norm": 1.1884825471080118, + "learning_rate": 1.971859726990238e-05, + "loss": 0.7356, + "step": 5986 + }, + { + "epoch": 0.1034524467790987, + "grad_norm": 0.8895548274038154, + "learning_rate": 1.9718465423451718e-05, + "loss": 0.7215, + "step": 5987 + }, + { + "epoch": 0.1034697262925076, + "grad_norm": 0.8835364327499217, + "learning_rate": 1.971833354656208e-05, + "loss": 0.5159, + "step": 5988 + }, + { + "epoch": 0.10348700580591651, + "grad_norm": 0.7628932828754676, + "learning_rate": 1.9718201639233885e-05, + "loss": 0.7324, + "step": 5989 + }, + { + "epoch": 0.1035042853193254, + "grad_norm": 1.227861211974785, + "learning_rate": 1.9718069701467545e-05, + "loss": 0.5624, + "step": 5990 + }, + { + "epoch": 0.10352156483273431, + "grad_norm": 1.0305904668327424, + "learning_rate": 1.971793773326347e-05, + "loss": 0.7026, + "step": 5991 + }, + { + "epoch": 0.10353884434614322, + "grad_norm": 0.8596001413223371, + "learning_rate": 1.971780573462208e-05, + "loss": 0.5829, + "step": 5992 + }, + { + "epoch": 0.10355612385955211, + "grad_norm": 1.1732580322210748, + "learning_rate": 1.971767370554378e-05, + "loss": 0.8125, + "step": 5993 + }, + { + "epoch": 0.10357340337296102, + "grad_norm": 1.006211437485921, + "learning_rate": 1.9717541646028987e-05, + "loss": 0.6452, + "step": 5994 + }, + { + "epoch": 0.10359068288636993, + "grad_norm": 1.118572890402768, + "learning_rate": 1.971740955607812e-05, + "loss": 0.6899, + "step": 5995 + }, + { + "epoch": 0.10360796239977882, + "grad_norm": 0.6122574531993547, + "learning_rate": 1.971727743569158e-05, + "loss": 0.8037, + "step": 5996 + }, + { + "epoch": 0.10362524191318773, + "grad_norm": 1.2267553750341456, + "learning_rate": 1.9717145284869792e-05, + "loss": 0.8008, + "step": 5997 + }, + { + "epoch": 0.10364252142659662, + "grad_norm": 0.987221824201583, + "learning_rate": 1.9717013103613166e-05, + "loss": 0.6366, + "step": 5998 + }, + { + "epoch": 0.10365980094000553, + "grad_norm": 0.97380660885647, + "learning_rate": 1.971688089192212e-05, + "loss": 0.635, + "step": 5999 + }, + { + "epoch": 0.10367708045341444, + "grad_norm": 0.9453349522407306, + "learning_rate": 1.9716748649797058e-05, + "loss": 0.5984, + "step": 6000 + }, + { + "epoch": 0.10369435996682333, + "grad_norm": 1.2350476910113366, + "learning_rate": 1.9716616377238406e-05, + "loss": 0.9905, + "step": 6001 + }, + { + "epoch": 0.10371163948023224, + "grad_norm": 0.9202257232500732, + "learning_rate": 1.9716484074246574e-05, + "loss": 0.6519, + "step": 6002 + }, + { + "epoch": 0.10372891899364114, + "grad_norm": 0.9318340068341977, + "learning_rate": 1.9716351740821974e-05, + "loss": 0.7675, + "step": 6003 + }, + { + "epoch": 0.10374619850705004, + "grad_norm": 0.9289489817831179, + "learning_rate": 1.971621937696502e-05, + "loss": 0.861, + "step": 6004 + }, + { + "epoch": 0.10376347802045895, + "grad_norm": 0.9446369287661797, + "learning_rate": 1.971608698267613e-05, + "loss": 0.7611, + "step": 6005 + }, + { + "epoch": 0.10378075753386784, + "grad_norm": 0.6881300258279793, + "learning_rate": 1.971595455795572e-05, + "loss": 0.5763, + "step": 6006 + }, + { + "epoch": 0.10379803704727675, + "grad_norm": 0.9604073706686751, + "learning_rate": 1.9715822102804198e-05, + "loss": 0.7707, + "step": 6007 + }, + { + "epoch": 0.10381531656068566, + "grad_norm": 0.8762903866405671, + "learning_rate": 1.971568961722198e-05, + "loss": 0.581, + "step": 6008 + }, + { + "epoch": 0.10383259607409455, + "grad_norm": 0.8290777985064823, + "learning_rate": 1.9715557101209487e-05, + "loss": 0.7187, + "step": 6009 + }, + { + "epoch": 0.10384987558750346, + "grad_norm": 1.0286694139105947, + "learning_rate": 1.9715424554767132e-05, + "loss": 0.8514, + "step": 6010 + }, + { + "epoch": 0.10386715510091236, + "grad_norm": 1.0004842469191537, + "learning_rate": 1.971529197789533e-05, + "loss": 0.7327, + "step": 6011 + }, + { + "epoch": 0.10388443461432126, + "grad_norm": 0.9154141383376475, + "learning_rate": 1.9715159370594494e-05, + "loss": 0.8509, + "step": 6012 + }, + { + "epoch": 0.10390171412773017, + "grad_norm": 0.9170238890143356, + "learning_rate": 1.971502673286504e-05, + "loss": 0.7672, + "step": 6013 + }, + { + "epoch": 0.10391899364113906, + "grad_norm": 0.8302545131676915, + "learning_rate": 1.971489406470738e-05, + "loss": 0.7892, + "step": 6014 + }, + { + "epoch": 0.10393627315454797, + "grad_norm": 0.813253773784569, + "learning_rate": 1.971476136612194e-05, + "loss": 0.5354, + "step": 6015 + }, + { + "epoch": 0.10395355266795687, + "grad_norm": 1.030535676046116, + "learning_rate": 1.9714628637109125e-05, + "loss": 0.8262, + "step": 6016 + }, + { + "epoch": 0.10397083218136577, + "grad_norm": 0.552741344181539, + "learning_rate": 1.971449587766936e-05, + "loss": 0.6612, + "step": 6017 + }, + { + "epoch": 0.10398811169477468, + "grad_norm": 0.9201887408780832, + "learning_rate": 1.971436308780305e-05, + "loss": 0.555, + "step": 6018 + }, + { + "epoch": 0.10400539120818358, + "grad_norm": 0.926274881841536, + "learning_rate": 1.971423026751062e-05, + "loss": 0.5907, + "step": 6019 + }, + { + "epoch": 0.10402267072159248, + "grad_norm": 0.7571540911582876, + "learning_rate": 1.971409741679248e-05, + "loss": 0.6858, + "step": 6020 + }, + { + "epoch": 0.10403995023500139, + "grad_norm": 0.9047165694379875, + "learning_rate": 1.971396453564905e-05, + "loss": 0.5243, + "step": 6021 + }, + { + "epoch": 0.10405722974841028, + "grad_norm": 0.6758222801490625, + "learning_rate": 1.9713831624080742e-05, + "loss": 0.6913, + "step": 6022 + }, + { + "epoch": 0.10407450926181919, + "grad_norm": 1.0125170627480262, + "learning_rate": 1.971369868208798e-05, + "loss": 0.8094, + "step": 6023 + }, + { + "epoch": 0.1040917887752281, + "grad_norm": 0.6563143382090163, + "learning_rate": 1.9713565709671174e-05, + "loss": 0.7177, + "step": 6024 + }, + { + "epoch": 0.10410906828863699, + "grad_norm": 0.9742360740905422, + "learning_rate": 1.971343270683074e-05, + "loss": 0.6699, + "step": 6025 + }, + { + "epoch": 0.1041263478020459, + "grad_norm": 0.8086988591202293, + "learning_rate": 1.97132996735671e-05, + "loss": 0.8454, + "step": 6026 + }, + { + "epoch": 0.1041436273154548, + "grad_norm": 1.032639767359549, + "learning_rate": 1.9713166609880663e-05, + "loss": 0.596, + "step": 6027 + }, + { + "epoch": 0.1041609068288637, + "grad_norm": 0.6119694562608092, + "learning_rate": 1.9713033515771853e-05, + "loss": 0.5732, + "step": 6028 + }, + { + "epoch": 0.1041781863422726, + "grad_norm": 1.0980254130507463, + "learning_rate": 1.971290039124108e-05, + "loss": 0.6758, + "step": 6029 + }, + { + "epoch": 0.1041954658556815, + "grad_norm": 1.3925872879557788, + "learning_rate": 1.971276723628877e-05, + "loss": 0.6795, + "step": 6030 + }, + { + "epoch": 0.1042127453690904, + "grad_norm": 0.6448186904901266, + "learning_rate": 1.9712634050915335e-05, + "loss": 0.6896, + "step": 6031 + }, + { + "epoch": 0.10423002488249931, + "grad_norm": 0.744785442383909, + "learning_rate": 1.9712500835121185e-05, + "loss": 0.6375, + "step": 6032 + }, + { + "epoch": 0.10424730439590821, + "grad_norm": 0.8813112839055717, + "learning_rate": 1.9712367588906752e-05, + "loss": 0.8303, + "step": 6033 + }, + { + "epoch": 0.10426458390931712, + "grad_norm": 0.9775268873004117, + "learning_rate": 1.971223431227244e-05, + "loss": 0.5792, + "step": 6034 + }, + { + "epoch": 0.10428186342272601, + "grad_norm": 1.305352136171005, + "learning_rate": 1.9712101005218675e-05, + "loss": 0.8732, + "step": 6035 + }, + { + "epoch": 0.10429914293613492, + "grad_norm": 1.2368715915421207, + "learning_rate": 1.971196766774587e-05, + "loss": 0.8222, + "step": 6036 + }, + { + "epoch": 0.10431642244954382, + "grad_norm": 0.8108464411401146, + "learning_rate": 1.9711834299854447e-05, + "loss": 0.479, + "step": 6037 + }, + { + "epoch": 0.10433370196295272, + "grad_norm": 0.7604330432677261, + "learning_rate": 1.971170090154482e-05, + "loss": 0.5871, + "step": 6038 + }, + { + "epoch": 0.10435098147636163, + "grad_norm": 0.960573552560189, + "learning_rate": 1.9711567472817405e-05, + "loss": 0.6262, + "step": 6039 + }, + { + "epoch": 0.10436826098977053, + "grad_norm": 0.718758674351673, + "learning_rate": 1.9711434013672625e-05, + "loss": 0.6382, + "step": 6040 + }, + { + "epoch": 0.10438554050317943, + "grad_norm": 0.9721681274867282, + "learning_rate": 1.9711300524110895e-05, + "loss": 0.9781, + "step": 6041 + }, + { + "epoch": 0.10440282001658834, + "grad_norm": 0.7573458485504314, + "learning_rate": 1.9711167004132635e-05, + "loss": 0.4466, + "step": 6042 + }, + { + "epoch": 0.10442009952999723, + "grad_norm": 1.0968793939571342, + "learning_rate": 1.971103345373826e-05, + "loss": 0.6725, + "step": 6043 + }, + { + "epoch": 0.10443737904340614, + "grad_norm": 1.0189584608276936, + "learning_rate": 1.971089987292819e-05, + "loss": 0.7073, + "step": 6044 + }, + { + "epoch": 0.10445465855681504, + "grad_norm": 1.2607067379171644, + "learning_rate": 1.9710766261702845e-05, + "loss": 0.6978, + "step": 6045 + }, + { + "epoch": 0.10447193807022394, + "grad_norm": 0.9045280182803843, + "learning_rate": 1.971063262006264e-05, + "loss": 0.6138, + "step": 6046 + }, + { + "epoch": 0.10448921758363285, + "grad_norm": 0.8652988750048475, + "learning_rate": 1.9710498948007997e-05, + "loss": 0.7109, + "step": 6047 + }, + { + "epoch": 0.10450649709704175, + "grad_norm": 0.8833767730470964, + "learning_rate": 1.9710365245539335e-05, + "loss": 0.6638, + "step": 6048 + }, + { + "epoch": 0.10452377661045065, + "grad_norm": 0.8804173735100718, + "learning_rate": 1.971023151265707e-05, + "loss": 0.5249, + "step": 6049 + }, + { + "epoch": 0.10454105612385955, + "grad_norm": 0.9707618628513935, + "learning_rate": 1.971009774936162e-05, + "loss": 0.7593, + "step": 6050 + }, + { + "epoch": 0.10455833563726845, + "grad_norm": 0.9074169389248573, + "learning_rate": 1.970996395565341e-05, + "loss": 0.6375, + "step": 6051 + }, + { + "epoch": 0.10457561515067736, + "grad_norm": 1.2848307318295145, + "learning_rate": 1.970983013153285e-05, + "loss": 0.5893, + "step": 6052 + }, + { + "epoch": 0.10459289466408626, + "grad_norm": 1.024259617457185, + "learning_rate": 1.9709696277000368e-05, + "loss": 0.7335, + "step": 6053 + }, + { + "epoch": 0.10461017417749516, + "grad_norm": 0.9658472808558451, + "learning_rate": 1.9709562392056377e-05, + "loss": 0.6529, + "step": 6054 + }, + { + "epoch": 0.10462745369090407, + "grad_norm": 0.8400399746737892, + "learning_rate": 1.97094284767013e-05, + "loss": 0.6263, + "step": 6055 + }, + { + "epoch": 0.10464473320431297, + "grad_norm": 0.7870108882913032, + "learning_rate": 1.9709294530935557e-05, + "loss": 0.6011, + "step": 6056 + }, + { + "epoch": 0.10466201271772187, + "grad_norm": 0.4729354023004142, + "learning_rate": 1.9709160554759565e-05, + "loss": 0.6649, + "step": 6057 + }, + { + "epoch": 0.10467929223113077, + "grad_norm": 1.3858235392360163, + "learning_rate": 1.9709026548173738e-05, + "loss": 0.6493, + "step": 6058 + }, + { + "epoch": 0.10469657174453967, + "grad_norm": 0.8591979763539134, + "learning_rate": 1.9708892511178507e-05, + "loss": 0.5811, + "step": 6059 + }, + { + "epoch": 0.10471385125794858, + "grad_norm": 0.9069922808832781, + "learning_rate": 1.9708758443774288e-05, + "loss": 0.4952, + "step": 6060 + }, + { + "epoch": 0.10473113077135748, + "grad_norm": 0.7643851375694438, + "learning_rate": 1.97086243459615e-05, + "loss": 0.5713, + "step": 6061 + }, + { + "epoch": 0.10474841028476638, + "grad_norm": 0.8763895615576055, + "learning_rate": 1.9708490217740558e-05, + "loss": 0.7428, + "step": 6062 + }, + { + "epoch": 0.10476568979817528, + "grad_norm": 1.4844687245491446, + "learning_rate": 1.970835605911189e-05, + "loss": 0.9078, + "step": 6063 + }, + { + "epoch": 0.10478296931158419, + "grad_norm": 0.8942282702395157, + "learning_rate": 1.9708221870075916e-05, + "loss": 0.7152, + "step": 6064 + }, + { + "epoch": 0.10480024882499309, + "grad_norm": 1.5041175560340245, + "learning_rate": 1.9708087650633047e-05, + "loss": 0.7414, + "step": 6065 + }, + { + "epoch": 0.104817528338402, + "grad_norm": 0.7145517714314984, + "learning_rate": 1.9707953400783712e-05, + "loss": 0.5575, + "step": 6066 + }, + { + "epoch": 0.10483480785181089, + "grad_norm": 0.9530821718690962, + "learning_rate": 1.970781912052833e-05, + "loss": 0.7861, + "step": 6067 + }, + { + "epoch": 0.1048520873652198, + "grad_norm": 0.5836800646517945, + "learning_rate": 1.970768480986732e-05, + "loss": 0.6516, + "step": 6068 + }, + { + "epoch": 0.1048693668786287, + "grad_norm": 0.9942931377658217, + "learning_rate": 1.9707550468801105e-05, + "loss": 0.7619, + "step": 6069 + }, + { + "epoch": 0.1048866463920376, + "grad_norm": 0.7552230329358989, + "learning_rate": 1.9707416097330104e-05, + "loss": 0.4585, + "step": 6070 + }, + { + "epoch": 0.1049039259054465, + "grad_norm": 1.0606924161810076, + "learning_rate": 1.9707281695454736e-05, + "loss": 0.7979, + "step": 6071 + }, + { + "epoch": 0.1049212054188554, + "grad_norm": 0.988434909496508, + "learning_rate": 1.9707147263175425e-05, + "loss": 0.6531, + "step": 6072 + }, + { + "epoch": 0.1049384849322643, + "grad_norm": 0.9853871043335081, + "learning_rate": 1.9707012800492592e-05, + "loss": 0.6703, + "step": 6073 + }, + { + "epoch": 0.10495576444567321, + "grad_norm": 1.0196076526463287, + "learning_rate": 1.9706878307406657e-05, + "loss": 0.6439, + "step": 6074 + }, + { + "epoch": 0.10497304395908211, + "grad_norm": 1.101522436333347, + "learning_rate": 1.9706743783918042e-05, + "loss": 0.771, + "step": 6075 + }, + { + "epoch": 0.10499032347249102, + "grad_norm": 1.087017712008982, + "learning_rate": 1.970660923002717e-05, + "loss": 0.9667, + "step": 6076 + }, + { + "epoch": 0.10500760298589992, + "grad_norm": 1.1086074949558555, + "learning_rate": 1.9706474645734453e-05, + "loss": 0.7952, + "step": 6077 + }, + { + "epoch": 0.10502488249930882, + "grad_norm": 1.0561906854727445, + "learning_rate": 1.9706340031040323e-05, + "loss": 0.8195, + "step": 6078 + }, + { + "epoch": 0.10504216201271772, + "grad_norm": 0.778049862047129, + "learning_rate": 1.97062053859452e-05, + "loss": 0.6748, + "step": 6079 + }, + { + "epoch": 0.10505944152612662, + "grad_norm": 0.934673283208692, + "learning_rate": 1.9706070710449503e-05, + "loss": 0.5825, + "step": 6080 + }, + { + "epoch": 0.10507672103953553, + "grad_norm": 1.2244101906131633, + "learning_rate": 1.9705936004553655e-05, + "loss": 0.5989, + "step": 6081 + }, + { + "epoch": 0.10509400055294443, + "grad_norm": 0.9397199022911217, + "learning_rate": 1.970580126825808e-05, + "loss": 0.573, + "step": 6082 + }, + { + "epoch": 0.10511128006635333, + "grad_norm": 1.0376143686576493, + "learning_rate": 1.9705666501563194e-05, + "loss": 0.7398, + "step": 6083 + }, + { + "epoch": 0.10512855957976223, + "grad_norm": 1.0624937619046244, + "learning_rate": 1.9705531704469423e-05, + "loss": 0.7575, + "step": 6084 + }, + { + "epoch": 0.10514583909317114, + "grad_norm": 0.9769752382179311, + "learning_rate": 1.9705396876977193e-05, + "loss": 0.781, + "step": 6085 + }, + { + "epoch": 0.10516311860658004, + "grad_norm": 1.1039595559139026, + "learning_rate": 1.970526201908692e-05, + "loss": 0.7183, + "step": 6086 + }, + { + "epoch": 0.10518039811998894, + "grad_norm": 0.7753239609485795, + "learning_rate": 1.9705127130799027e-05, + "loss": 0.5952, + "step": 6087 + }, + { + "epoch": 0.10519767763339784, + "grad_norm": 0.9596057433807847, + "learning_rate": 1.970499221211394e-05, + "loss": 0.8702, + "step": 6088 + }, + { + "epoch": 0.10521495714680675, + "grad_norm": 0.9476979997532184, + "learning_rate": 1.970485726303208e-05, + "loss": 0.6913, + "step": 6089 + }, + { + "epoch": 0.10523223666021565, + "grad_norm": 0.831857698034446, + "learning_rate": 1.9704722283553872e-05, + "loss": 0.5219, + "step": 6090 + }, + { + "epoch": 0.10524951617362455, + "grad_norm": 0.4788494811457818, + "learning_rate": 1.9704587273679736e-05, + "loss": 0.549, + "step": 6091 + }, + { + "epoch": 0.10526679568703345, + "grad_norm": 1.0233568948302745, + "learning_rate": 1.970445223341009e-05, + "loss": 0.765, + "step": 6092 + }, + { + "epoch": 0.10528407520044236, + "grad_norm": 0.9724905544003474, + "learning_rate": 1.9704317162745363e-05, + "loss": 0.7275, + "step": 6093 + }, + { + "epoch": 0.10530135471385126, + "grad_norm": 1.2707461410637053, + "learning_rate": 1.970418206168598e-05, + "loss": 0.7537, + "step": 6094 + }, + { + "epoch": 0.10531863422726016, + "grad_norm": 1.0250511726285363, + "learning_rate": 1.970404693023236e-05, + "loss": 0.7239, + "step": 6095 + }, + { + "epoch": 0.10533591374066906, + "grad_norm": 1.5794405639587537, + "learning_rate": 1.9703911768384928e-05, + "loss": 0.7754, + "step": 6096 + }, + { + "epoch": 0.10535319325407796, + "grad_norm": 1.4730183423225112, + "learning_rate": 1.9703776576144106e-05, + "loss": 0.7597, + "step": 6097 + }, + { + "epoch": 0.10537047276748687, + "grad_norm": 1.061886749769055, + "learning_rate": 1.970364135351032e-05, + "loss": 0.7223, + "step": 6098 + }, + { + "epoch": 0.10538775228089577, + "grad_norm": 1.0979581107835574, + "learning_rate": 1.9703506100483995e-05, + "loss": 0.6529, + "step": 6099 + }, + { + "epoch": 0.10540503179430467, + "grad_norm": 1.6088584845198264, + "learning_rate": 1.9703370817065545e-05, + "loss": 0.5779, + "step": 6100 + }, + { + "epoch": 0.10542231130771358, + "grad_norm": 0.8040450396649917, + "learning_rate": 1.9703235503255404e-05, + "loss": 0.6549, + "step": 6101 + }, + { + "epoch": 0.10543959082112248, + "grad_norm": 0.8738652168059657, + "learning_rate": 1.9703100159053988e-05, + "loss": 0.5303, + "step": 6102 + }, + { + "epoch": 0.10545687033453138, + "grad_norm": 1.0634329172311865, + "learning_rate": 1.9702964784461728e-05, + "loss": 0.7271, + "step": 6103 + }, + { + "epoch": 0.10547414984794028, + "grad_norm": 0.7934582757983188, + "learning_rate": 1.9702829379479044e-05, + "loss": 0.7235, + "step": 6104 + }, + { + "epoch": 0.10549142936134918, + "grad_norm": 1.1624804660850125, + "learning_rate": 1.9702693944106363e-05, + "loss": 0.9379, + "step": 6105 + }, + { + "epoch": 0.10550870887475809, + "grad_norm": 1.0000056879753667, + "learning_rate": 1.9702558478344105e-05, + "loss": 0.7557, + "step": 6106 + }, + { + "epoch": 0.10552598838816699, + "grad_norm": 1.352743431167181, + "learning_rate": 1.9702422982192697e-05, + "loss": 0.8388, + "step": 6107 + }, + { + "epoch": 0.1055432679015759, + "grad_norm": 0.7970500529242537, + "learning_rate": 1.9702287455652565e-05, + "loss": 0.636, + "step": 6108 + }, + { + "epoch": 0.10556054741498479, + "grad_norm": 0.9454176235986125, + "learning_rate": 1.9702151898724127e-05, + "loss": 0.6206, + "step": 6109 + }, + { + "epoch": 0.1055778269283937, + "grad_norm": 1.1564046148672433, + "learning_rate": 1.9702016311407813e-05, + "loss": 0.7152, + "step": 6110 + }, + { + "epoch": 0.1055951064418026, + "grad_norm": 1.1263798884021876, + "learning_rate": 1.970188069370405e-05, + "loss": 0.8597, + "step": 6111 + }, + { + "epoch": 0.1056123859552115, + "grad_norm": 0.9264834464362397, + "learning_rate": 1.9701745045613257e-05, + "loss": 0.5802, + "step": 6112 + }, + { + "epoch": 0.1056296654686204, + "grad_norm": 1.0059964572055373, + "learning_rate": 1.9701609367135863e-05, + "loss": 0.8266, + "step": 6113 + }, + { + "epoch": 0.10564694498202931, + "grad_norm": 0.7848532965569197, + "learning_rate": 1.970147365827229e-05, + "loss": 0.6928, + "step": 6114 + }, + { + "epoch": 0.1056642244954382, + "grad_norm": 0.8786440463681598, + "learning_rate": 1.970133791902296e-05, + "loss": 0.6265, + "step": 6115 + }, + { + "epoch": 0.10568150400884711, + "grad_norm": 1.103422089251677, + "learning_rate": 1.9701202149388307e-05, + "loss": 0.9584, + "step": 6116 + }, + { + "epoch": 0.105698783522256, + "grad_norm": 0.8248286984032946, + "learning_rate": 1.970106634936875e-05, + "loss": 0.4892, + "step": 6117 + }, + { + "epoch": 0.10571606303566491, + "grad_norm": 0.5544259708422663, + "learning_rate": 1.9700930518964715e-05, + "loss": 0.6869, + "step": 6118 + }, + { + "epoch": 0.10573334254907382, + "grad_norm": 1.0898120485004936, + "learning_rate": 1.9700794658176635e-05, + "loss": 0.7295, + "step": 6119 + }, + { + "epoch": 0.10575062206248272, + "grad_norm": 0.629486886708144, + "learning_rate": 1.970065876700492e-05, + "loss": 0.4651, + "step": 6120 + }, + { + "epoch": 0.10576790157589162, + "grad_norm": 1.050663143734162, + "learning_rate": 1.970052284545001e-05, + "loss": 0.6823, + "step": 6121 + }, + { + "epoch": 0.10578518108930053, + "grad_norm": 1.1936759796220198, + "learning_rate": 1.9700386893512325e-05, + "loss": 0.7309, + "step": 6122 + }, + { + "epoch": 0.10580246060270942, + "grad_norm": 1.3991163015241663, + "learning_rate": 1.970025091119229e-05, + "loss": 0.8313, + "step": 6123 + }, + { + "epoch": 0.10581974011611833, + "grad_norm": 0.7898866335268844, + "learning_rate": 1.970011489849033e-05, + "loss": 0.5646, + "step": 6124 + }, + { + "epoch": 0.10583701962952723, + "grad_norm": 1.15118096036068, + "learning_rate": 1.9699978855406875e-05, + "loss": 0.5058, + "step": 6125 + }, + { + "epoch": 0.10585429914293613, + "grad_norm": 0.942344495065348, + "learning_rate": 1.9699842781942347e-05, + "loss": 0.823, + "step": 6126 + }, + { + "epoch": 0.10587157865634504, + "grad_norm": 0.7847321642125634, + "learning_rate": 1.9699706678097176e-05, + "loss": 0.6842, + "step": 6127 + }, + { + "epoch": 0.10588885816975394, + "grad_norm": 1.0152742520402458, + "learning_rate": 1.969957054387179e-05, + "loss": 0.7231, + "step": 6128 + }, + { + "epoch": 0.10590613768316284, + "grad_norm": 0.8334115166816534, + "learning_rate": 1.9699434379266608e-05, + "loss": 0.8068, + "step": 6129 + }, + { + "epoch": 0.10592341719657175, + "grad_norm": 1.4189442688659448, + "learning_rate": 1.9699298184282058e-05, + "loss": 0.5607, + "step": 6130 + }, + { + "epoch": 0.10594069670998064, + "grad_norm": 0.72479671118184, + "learning_rate": 1.9699161958918572e-05, + "loss": 0.4774, + "step": 6131 + }, + { + "epoch": 0.10595797622338955, + "grad_norm": 1.01737660072493, + "learning_rate": 1.9699025703176575e-05, + "loss": 0.8207, + "step": 6132 + }, + { + "epoch": 0.10597525573679845, + "grad_norm": 1.0537818164025137, + "learning_rate": 1.9698889417056487e-05, + "loss": 0.6925, + "step": 6133 + }, + { + "epoch": 0.10599253525020735, + "grad_norm": 0.9578523808856422, + "learning_rate": 1.9698753100558745e-05, + "loss": 0.6087, + "step": 6134 + }, + { + "epoch": 0.10600981476361626, + "grad_norm": 1.0663987928880765, + "learning_rate": 1.9698616753683772e-05, + "loss": 0.8302, + "step": 6135 + }, + { + "epoch": 0.10602709427702516, + "grad_norm": 0.9711043265435098, + "learning_rate": 1.9698480376431992e-05, + "loss": 0.7602, + "step": 6136 + }, + { + "epoch": 0.10604437379043406, + "grad_norm": 0.9016549990145185, + "learning_rate": 1.9698343968803835e-05, + "loss": 0.7722, + "step": 6137 + }, + { + "epoch": 0.10606165330384297, + "grad_norm": 1.0048167988902432, + "learning_rate": 1.9698207530799725e-05, + "loss": 0.7304, + "step": 6138 + }, + { + "epoch": 0.10607893281725186, + "grad_norm": 0.8095594678200059, + "learning_rate": 1.9698071062420095e-05, + "loss": 0.5948, + "step": 6139 + }, + { + "epoch": 0.10609621233066077, + "grad_norm": 0.9859745280606829, + "learning_rate": 1.9697934563665372e-05, + "loss": 0.4672, + "step": 6140 + }, + { + "epoch": 0.10611349184406967, + "grad_norm": 1.0988562850532482, + "learning_rate": 1.9697798034535977e-05, + "loss": 0.5229, + "step": 6141 + }, + { + "epoch": 0.10613077135747857, + "grad_norm": 1.0491550451273146, + "learning_rate": 1.9697661475032343e-05, + "loss": 0.8093, + "step": 6142 + }, + { + "epoch": 0.10614805087088748, + "grad_norm": 0.5799154971113645, + "learning_rate": 1.9697524885154896e-05, + "loss": 0.8478, + "step": 6143 + }, + { + "epoch": 0.10616533038429637, + "grad_norm": 0.8276857137083906, + "learning_rate": 1.969738826490406e-05, + "loss": 0.5589, + "step": 6144 + }, + { + "epoch": 0.10618260989770528, + "grad_norm": 1.6050671123141833, + "learning_rate": 1.9697251614280275e-05, + "loss": 0.5619, + "step": 6145 + }, + { + "epoch": 0.10619988941111418, + "grad_norm": 0.9191777206614109, + "learning_rate": 1.9697114933283954e-05, + "loss": 0.7146, + "step": 6146 + }, + { + "epoch": 0.10621716892452308, + "grad_norm": 1.1483992457656527, + "learning_rate": 1.969697822191554e-05, + "loss": 0.8045, + "step": 6147 + }, + { + "epoch": 0.10623444843793199, + "grad_norm": 1.3102258118805064, + "learning_rate": 1.9696841480175442e-05, + "loss": 0.9149, + "step": 6148 + }, + { + "epoch": 0.10625172795134089, + "grad_norm": 1.2834152041280629, + "learning_rate": 1.9696704708064106e-05, + "loss": 0.8957, + "step": 6149 + }, + { + "epoch": 0.10626900746474979, + "grad_norm": 1.3339541467222324, + "learning_rate": 1.9696567905581955e-05, + "loss": 0.7306, + "step": 6150 + }, + { + "epoch": 0.1062862869781587, + "grad_norm": 1.6450201743960864, + "learning_rate": 1.9696431072729415e-05, + "loss": 0.6778, + "step": 6151 + }, + { + "epoch": 0.1063035664915676, + "grad_norm": 0.9221191701855174, + "learning_rate": 1.9696294209506918e-05, + "loss": 0.4947, + "step": 6152 + }, + { + "epoch": 0.1063208460049765, + "grad_norm": 1.0500001603232325, + "learning_rate": 1.9696157315914885e-05, + "loss": 0.7162, + "step": 6153 + }, + { + "epoch": 0.1063381255183854, + "grad_norm": 1.2600270239828695, + "learning_rate": 1.9696020391953756e-05, + "loss": 0.566, + "step": 6154 + }, + { + "epoch": 0.1063554050317943, + "grad_norm": 0.8703986939951119, + "learning_rate": 1.9695883437623952e-05, + "loss": 0.6432, + "step": 6155 + }, + { + "epoch": 0.10637268454520321, + "grad_norm": 1.2742724789019222, + "learning_rate": 1.9695746452925902e-05, + "loss": 0.4821, + "step": 6156 + }, + { + "epoch": 0.1063899640586121, + "grad_norm": 1.2552039407961597, + "learning_rate": 1.969560943786004e-05, + "loss": 0.6459, + "step": 6157 + }, + { + "epoch": 0.10640724357202101, + "grad_norm": 1.3496896085510397, + "learning_rate": 1.9695472392426795e-05, + "loss": 0.4923, + "step": 6158 + }, + { + "epoch": 0.10642452308542992, + "grad_norm": 1.420836484318776, + "learning_rate": 1.969533531662659e-05, + "loss": 0.6411, + "step": 6159 + }, + { + "epoch": 0.10644180259883881, + "grad_norm": 1.2784152108001798, + "learning_rate": 1.969519821045986e-05, + "loss": 0.6082, + "step": 6160 + }, + { + "epoch": 0.10645908211224772, + "grad_norm": 1.156300449621513, + "learning_rate": 1.969506107392703e-05, + "loss": 0.6802, + "step": 6161 + }, + { + "epoch": 0.10647636162565662, + "grad_norm": 1.2279000341556408, + "learning_rate": 1.969492390702853e-05, + "loss": 0.8592, + "step": 6162 + }, + { + "epoch": 0.10649364113906552, + "grad_norm": 1.0927346421418298, + "learning_rate": 1.96947867097648e-05, + "loss": 0.6226, + "step": 6163 + }, + { + "epoch": 0.10651092065247443, + "grad_norm": 4.993395018136294, + "learning_rate": 1.9694649482136253e-05, + "loss": 0.7051, + "step": 6164 + }, + { + "epoch": 0.10652820016588332, + "grad_norm": 0.6331820832136767, + "learning_rate": 1.969451222414333e-05, + "loss": 0.5134, + "step": 6165 + }, + { + "epoch": 0.10654547967929223, + "grad_norm": 1.1658211659367257, + "learning_rate": 1.9694374935786457e-05, + "loss": 0.5339, + "step": 6166 + }, + { + "epoch": 0.10656275919270114, + "grad_norm": 1.1781078733849955, + "learning_rate": 1.9694237617066067e-05, + "loss": 0.4823, + "step": 6167 + }, + { + "epoch": 0.10658003870611003, + "grad_norm": 1.383010788652952, + "learning_rate": 1.9694100267982585e-05, + "loss": 0.6505, + "step": 6168 + }, + { + "epoch": 0.10659731821951894, + "grad_norm": 1.220876912858899, + "learning_rate": 1.9693962888536448e-05, + "loss": 0.6873, + "step": 6169 + }, + { + "epoch": 0.10661459773292783, + "grad_norm": 0.9771864384479131, + "learning_rate": 1.969382547872808e-05, + "loss": 0.6758, + "step": 6170 + }, + { + "epoch": 0.10663187724633674, + "grad_norm": 0.9529521146335024, + "learning_rate": 1.9693688038557914e-05, + "loss": 0.6437, + "step": 6171 + }, + { + "epoch": 0.10664915675974565, + "grad_norm": 0.6784661677194336, + "learning_rate": 1.969355056802638e-05, + "loss": 0.9215, + "step": 6172 + }, + { + "epoch": 0.10666643627315454, + "grad_norm": 0.711382107386376, + "learning_rate": 1.969341306713391e-05, + "loss": 0.4616, + "step": 6173 + }, + { + "epoch": 0.10668371578656345, + "grad_norm": 1.1324923427262301, + "learning_rate": 1.9693275535880932e-05, + "loss": 0.484, + "step": 6174 + }, + { + "epoch": 0.10670099529997236, + "grad_norm": 1.197950719290667, + "learning_rate": 1.969313797426788e-05, + "loss": 0.9816, + "step": 6175 + }, + { + "epoch": 0.10671827481338125, + "grad_norm": 1.51331931272669, + "learning_rate": 1.9693000382295184e-05, + "loss": 0.6912, + "step": 6176 + }, + { + "epoch": 0.10673555432679016, + "grad_norm": 1.339483485582945, + "learning_rate": 1.9692862759963275e-05, + "loss": 0.588, + "step": 6177 + }, + { + "epoch": 0.10675283384019905, + "grad_norm": 1.0502481985276875, + "learning_rate": 1.969272510727258e-05, + "loss": 0.7781, + "step": 6178 + }, + { + "epoch": 0.10677011335360796, + "grad_norm": 0.9517939788432768, + "learning_rate": 1.9692587424223536e-05, + "loss": 0.6038, + "step": 6179 + }, + { + "epoch": 0.10678739286701687, + "grad_norm": 1.387940740568584, + "learning_rate": 1.9692449710816567e-05, + "loss": 0.6748, + "step": 6180 + }, + { + "epoch": 0.10680467238042576, + "grad_norm": 4.277938181565859, + "learning_rate": 1.9692311967052113e-05, + "loss": 0.5215, + "step": 6181 + }, + { + "epoch": 0.10682195189383467, + "grad_norm": 1.7631704886125799, + "learning_rate": 1.96921741929306e-05, + "loss": 0.9459, + "step": 6182 + }, + { + "epoch": 0.10683923140724358, + "grad_norm": 3.608337481550325, + "learning_rate": 1.9692036388452463e-05, + "loss": 0.9797, + "step": 6183 + }, + { + "epoch": 0.10685651092065247, + "grad_norm": 2.9633933683140494, + "learning_rate": 1.969189855361813e-05, + "loss": 0.569, + "step": 6184 + }, + { + "epoch": 0.10687379043406138, + "grad_norm": 1.369904153324066, + "learning_rate": 1.969176068842803e-05, + "loss": 0.7493, + "step": 6185 + }, + { + "epoch": 0.10689106994747027, + "grad_norm": 1.1917248724133456, + "learning_rate": 1.9691622792882605e-05, + "loss": 0.8402, + "step": 6186 + }, + { + "epoch": 0.10690834946087918, + "grad_norm": 1.2826681446924957, + "learning_rate": 1.969148486698228e-05, + "loss": 0.6259, + "step": 6187 + }, + { + "epoch": 0.10692562897428809, + "grad_norm": 2.9685981393821406, + "learning_rate": 1.9691346910727485e-05, + "loss": 0.6851, + "step": 6188 + }, + { + "epoch": 0.10694290848769698, + "grad_norm": 0.9204147779716402, + "learning_rate": 1.969120892411866e-05, + "loss": 0.5298, + "step": 6189 + }, + { + "epoch": 0.10696018800110589, + "grad_norm": 2.828135718764875, + "learning_rate": 1.9691070907156227e-05, + "loss": 0.6443, + "step": 6190 + }, + { + "epoch": 0.10697746751451478, + "grad_norm": 1.6110985238629325, + "learning_rate": 1.9690932859840627e-05, + "loss": 0.676, + "step": 6191 + }, + { + "epoch": 0.10699474702792369, + "grad_norm": 1.1245467827192253, + "learning_rate": 1.9690794782172285e-05, + "loss": 0.6519, + "step": 6192 + }, + { + "epoch": 0.1070120265413326, + "grad_norm": 2.1320946275261026, + "learning_rate": 1.969065667415164e-05, + "loss": 0.624, + "step": 6193 + }, + { + "epoch": 0.1070293060547415, + "grad_norm": 1.2726858008131423, + "learning_rate": 1.969051853577912e-05, + "loss": 0.6435, + "step": 6194 + }, + { + "epoch": 0.1070465855681504, + "grad_norm": 0.6129651868981421, + "learning_rate": 1.969038036705516e-05, + "loss": 0.7093, + "step": 6195 + }, + { + "epoch": 0.10706386508155931, + "grad_norm": 1.1047177227968457, + "learning_rate": 1.9690242167980194e-05, + "loss": 0.8708, + "step": 6196 + }, + { + "epoch": 0.1070811445949682, + "grad_norm": 1.2182540553175198, + "learning_rate": 1.9690103938554652e-05, + "loss": 0.8071, + "step": 6197 + }, + { + "epoch": 0.10709842410837711, + "grad_norm": 2.3431281965687925, + "learning_rate": 1.9689965678778967e-05, + "loss": 0.457, + "step": 6198 + }, + { + "epoch": 0.107115703621786, + "grad_norm": 1.5179926647500894, + "learning_rate": 1.9689827388653576e-05, + "loss": 0.6359, + "step": 6199 + }, + { + "epoch": 0.10713298313519491, + "grad_norm": 0.6281337307633036, + "learning_rate": 1.9689689068178904e-05, + "loss": 0.6511, + "step": 6200 + }, + { + "epoch": 0.10715026264860382, + "grad_norm": 1.0428212582671348, + "learning_rate": 1.9689550717355393e-05, + "loss": 0.6089, + "step": 6201 + }, + { + "epoch": 0.10716754216201271, + "grad_norm": 0.8663097009994005, + "learning_rate": 1.9689412336183475e-05, + "loss": 0.6676, + "step": 6202 + }, + { + "epoch": 0.10718482167542162, + "grad_norm": 1.20738508159486, + "learning_rate": 1.9689273924663575e-05, + "loss": 0.7559, + "step": 6203 + }, + { + "epoch": 0.10720210118883053, + "grad_norm": 1.3748830764661097, + "learning_rate": 1.968913548279614e-05, + "loss": 0.5909, + "step": 6204 + }, + { + "epoch": 0.10721938070223942, + "grad_norm": 1.0018715340409383, + "learning_rate": 1.9688997010581592e-05, + "loss": 0.6551, + "step": 6205 + }, + { + "epoch": 0.10723666021564833, + "grad_norm": 0.5782711290631309, + "learning_rate": 1.968885850802037e-05, + "loss": 0.8128, + "step": 6206 + }, + { + "epoch": 0.10725393972905722, + "grad_norm": 0.90692815915504, + "learning_rate": 1.9688719975112906e-05, + "loss": 0.7535, + "step": 6207 + }, + { + "epoch": 0.10727121924246613, + "grad_norm": 1.5677834732952476, + "learning_rate": 1.9688581411859635e-05, + "loss": 0.7529, + "step": 6208 + }, + { + "epoch": 0.10728849875587504, + "grad_norm": 1.1158134503988368, + "learning_rate": 1.9688442818260994e-05, + "loss": 0.6028, + "step": 6209 + }, + { + "epoch": 0.10730577826928393, + "grad_norm": 0.5336994483857583, + "learning_rate": 1.968830419431741e-05, + "loss": 0.6434, + "step": 6210 + }, + { + "epoch": 0.10732305778269284, + "grad_norm": 0.768763773584747, + "learning_rate": 1.968816554002932e-05, + "loss": 0.7586, + "step": 6211 + }, + { + "epoch": 0.10734033729610175, + "grad_norm": 0.834298040421381, + "learning_rate": 1.9688026855397163e-05, + "loss": 0.5235, + "step": 6212 + }, + { + "epoch": 0.10735761680951064, + "grad_norm": 0.9636382893767999, + "learning_rate": 1.9687888140421365e-05, + "loss": 0.7391, + "step": 6213 + }, + { + "epoch": 0.10737489632291955, + "grad_norm": 1.1324388397104816, + "learning_rate": 1.968774939510237e-05, + "loss": 0.6612, + "step": 6214 + }, + { + "epoch": 0.10739217583632844, + "grad_norm": 0.9765120266724299, + "learning_rate": 1.9687610619440604e-05, + "loss": 0.9226, + "step": 6215 + }, + { + "epoch": 0.10740945534973735, + "grad_norm": 1.5437695424601097, + "learning_rate": 1.968747181343651e-05, + "loss": 0.702, + "step": 6216 + }, + { + "epoch": 0.10742673486314626, + "grad_norm": 1.0358726921574992, + "learning_rate": 1.968733297709051e-05, + "loss": 0.473, + "step": 6217 + }, + { + "epoch": 0.10744401437655515, + "grad_norm": 1.0289422141703668, + "learning_rate": 1.9687194110403054e-05, + "loss": 0.7372, + "step": 6218 + }, + { + "epoch": 0.10746129388996406, + "grad_norm": 0.991461928468412, + "learning_rate": 1.968705521337457e-05, + "loss": 0.7997, + "step": 6219 + }, + { + "epoch": 0.10747857340337297, + "grad_norm": 1.4238193598392157, + "learning_rate": 1.968691628600549e-05, + "loss": 0.9478, + "step": 6220 + }, + { + "epoch": 0.10749585291678186, + "grad_norm": 1.2544017296748617, + "learning_rate": 1.968677732829625e-05, + "loss": 0.7649, + "step": 6221 + }, + { + "epoch": 0.10751313243019077, + "grad_norm": 0.9844544925297066, + "learning_rate": 1.968663834024729e-05, + "loss": 0.851, + "step": 6222 + }, + { + "epoch": 0.10753041194359966, + "grad_norm": 1.1766488605048642, + "learning_rate": 1.968649932185904e-05, + "loss": 0.9399, + "step": 6223 + }, + { + "epoch": 0.10754769145700857, + "grad_norm": 1.0076853313665675, + "learning_rate": 1.9686360273131943e-05, + "loss": 0.6416, + "step": 6224 + }, + { + "epoch": 0.10756497097041748, + "grad_norm": 0.7544255912845863, + "learning_rate": 1.9686221194066424e-05, + "loss": 0.6841, + "step": 6225 + }, + { + "epoch": 0.10758225048382637, + "grad_norm": 0.865638428348548, + "learning_rate": 1.968608208466293e-05, + "loss": 0.6176, + "step": 6226 + }, + { + "epoch": 0.10759952999723528, + "grad_norm": 0.9135910514016765, + "learning_rate": 1.9685942944921884e-05, + "loss": 0.6565, + "step": 6227 + }, + { + "epoch": 0.10761680951064417, + "grad_norm": 0.8784707704489714, + "learning_rate": 1.9685803774843736e-05, + "loss": 0.7342, + "step": 6228 + }, + { + "epoch": 0.10763408902405308, + "grad_norm": 0.8613319409963975, + "learning_rate": 1.968566457442891e-05, + "loss": 0.6213, + "step": 6229 + }, + { + "epoch": 0.10765136853746199, + "grad_norm": 0.8315628295809421, + "learning_rate": 1.9685525343677847e-05, + "loss": 0.8064, + "step": 6230 + }, + { + "epoch": 0.10766864805087088, + "grad_norm": 0.7773067802697818, + "learning_rate": 1.9685386082590982e-05, + "loss": 0.6266, + "step": 6231 + }, + { + "epoch": 0.10768592756427979, + "grad_norm": 0.9702488920263307, + "learning_rate": 1.9685246791168754e-05, + "loss": 0.6875, + "step": 6232 + }, + { + "epoch": 0.1077032070776887, + "grad_norm": 0.7409206645045329, + "learning_rate": 1.9685107469411597e-05, + "loss": 0.5934, + "step": 6233 + }, + { + "epoch": 0.10772048659109759, + "grad_norm": 1.0423448333309777, + "learning_rate": 1.9684968117319943e-05, + "loss": 0.7279, + "step": 6234 + }, + { + "epoch": 0.1077377661045065, + "grad_norm": 0.8464005819983004, + "learning_rate": 1.9684828734894233e-05, + "loss": 0.8119, + "step": 6235 + }, + { + "epoch": 0.1077550456179154, + "grad_norm": 1.033507333194508, + "learning_rate": 1.968468932213491e-05, + "loss": 0.6936, + "step": 6236 + }, + { + "epoch": 0.1077723251313243, + "grad_norm": 0.9086986296957871, + "learning_rate": 1.96845498790424e-05, + "loss": 0.5963, + "step": 6237 + }, + { + "epoch": 0.10778960464473321, + "grad_norm": 0.6629451824051145, + "learning_rate": 1.968441040561714e-05, + "loss": 0.5297, + "step": 6238 + }, + { + "epoch": 0.1078068841581421, + "grad_norm": 1.011572605043482, + "learning_rate": 1.9684270901859576e-05, + "loss": 0.7049, + "step": 6239 + }, + { + "epoch": 0.10782416367155101, + "grad_norm": 0.8334649003924437, + "learning_rate": 1.9684131367770137e-05, + "loss": 0.7239, + "step": 6240 + }, + { + "epoch": 0.10784144318495992, + "grad_norm": 0.9307428837376598, + "learning_rate": 1.9683991803349265e-05, + "loss": 0.6433, + "step": 6241 + }, + { + "epoch": 0.10785872269836881, + "grad_norm": 1.0380696430337857, + "learning_rate": 1.9683852208597393e-05, + "loss": 0.7179, + "step": 6242 + }, + { + "epoch": 0.10787600221177772, + "grad_norm": 0.9532986184029868, + "learning_rate": 1.968371258351496e-05, + "loss": 0.7125, + "step": 6243 + }, + { + "epoch": 0.10789328172518661, + "grad_norm": 0.8417854328060801, + "learning_rate": 1.9683572928102402e-05, + "loss": 0.5809, + "step": 6244 + }, + { + "epoch": 0.10791056123859552, + "grad_norm": 1.0123849052688145, + "learning_rate": 1.968343324236016e-05, + "loss": 0.8306, + "step": 6245 + }, + { + "epoch": 0.10792784075200443, + "grad_norm": 1.2723823321558725, + "learning_rate": 1.9683293526288668e-05, + "loss": 0.6792, + "step": 6246 + }, + { + "epoch": 0.10794512026541332, + "grad_norm": 0.9950822994332731, + "learning_rate": 1.9683153779888367e-05, + "loss": 0.6954, + "step": 6247 + }, + { + "epoch": 0.10796239977882223, + "grad_norm": 1.0427720449665367, + "learning_rate": 1.968301400315969e-05, + "loss": 0.7192, + "step": 6248 + }, + { + "epoch": 0.10797967929223114, + "grad_norm": 0.9248857860455663, + "learning_rate": 1.9682874196103075e-05, + "loss": 0.771, + "step": 6249 + }, + { + "epoch": 0.10799695880564003, + "grad_norm": 0.4659955139693915, + "learning_rate": 1.9682734358718968e-05, + "loss": 0.5715, + "step": 6250 + }, + { + "epoch": 0.10801423831904894, + "grad_norm": 0.992293676314587, + "learning_rate": 1.96825944910078e-05, + "loss": 0.7257, + "step": 6251 + }, + { + "epoch": 0.10803151783245783, + "grad_norm": 0.9690267723856362, + "learning_rate": 1.9682454592970007e-05, + "loss": 0.6932, + "step": 6252 + }, + { + "epoch": 0.10804879734586674, + "grad_norm": 0.7590704355244201, + "learning_rate": 1.968231466460603e-05, + "loss": 0.8074, + "step": 6253 + }, + { + "epoch": 0.10806607685927565, + "grad_norm": 0.8124861220903008, + "learning_rate": 1.968217470591631e-05, + "loss": 0.5332, + "step": 6254 + }, + { + "epoch": 0.10808335637268454, + "grad_norm": 1.0907549013834914, + "learning_rate": 1.9682034716901285e-05, + "loss": 0.6877, + "step": 6255 + }, + { + "epoch": 0.10810063588609345, + "grad_norm": 0.8789027252371083, + "learning_rate": 1.9681894697561386e-05, + "loss": 0.5614, + "step": 6256 + }, + { + "epoch": 0.10811791539950236, + "grad_norm": 0.7721552696670634, + "learning_rate": 1.968175464789706e-05, + "loss": 0.5692, + "step": 6257 + }, + { + "epoch": 0.10813519491291125, + "grad_norm": 0.7290374126423735, + "learning_rate": 1.9681614567908743e-05, + "loss": 0.7414, + "step": 6258 + }, + { + "epoch": 0.10815247442632016, + "grad_norm": 1.1328792407757307, + "learning_rate": 1.968147445759687e-05, + "loss": 0.6711, + "step": 6259 + }, + { + "epoch": 0.10816975393972905, + "grad_norm": 0.9258108387693584, + "learning_rate": 1.9681334316961888e-05, + "loss": 0.6396, + "step": 6260 + }, + { + "epoch": 0.10818703345313796, + "grad_norm": 0.807390199362556, + "learning_rate": 1.968119414600423e-05, + "loss": 0.5088, + "step": 6261 + }, + { + "epoch": 0.10820431296654687, + "grad_norm": 0.846352670333096, + "learning_rate": 1.9681053944724335e-05, + "loss": 0.4459, + "step": 6262 + }, + { + "epoch": 0.10822159247995576, + "grad_norm": 1.0208444662314642, + "learning_rate": 1.9680913713122644e-05, + "loss": 0.7454, + "step": 6263 + }, + { + "epoch": 0.10823887199336467, + "grad_norm": 0.9140365503435769, + "learning_rate": 1.9680773451199597e-05, + "loss": 0.6746, + "step": 6264 + }, + { + "epoch": 0.10825615150677356, + "grad_norm": 0.906113625050177, + "learning_rate": 1.9680633158955625e-05, + "loss": 0.6989, + "step": 6265 + }, + { + "epoch": 0.10827343102018247, + "grad_norm": 0.8354412092133952, + "learning_rate": 1.968049283639118e-05, + "loss": 0.6636, + "step": 6266 + }, + { + "epoch": 0.10829071053359138, + "grad_norm": 1.0419845194322688, + "learning_rate": 1.96803524835067e-05, + "loss": 0.6384, + "step": 6267 + }, + { + "epoch": 0.10830799004700027, + "grad_norm": 0.7295713420522579, + "learning_rate": 1.968021210030261e-05, + "loss": 0.6388, + "step": 6268 + }, + { + "epoch": 0.10832526956040918, + "grad_norm": 1.0203053920070788, + "learning_rate": 1.9680071686779368e-05, + "loss": 0.7239, + "step": 6269 + }, + { + "epoch": 0.10834254907381809, + "grad_norm": 0.8645841239807929, + "learning_rate": 1.9679931242937404e-05, + "loss": 0.4201, + "step": 6270 + }, + { + "epoch": 0.10835982858722698, + "grad_norm": 0.7714407842114613, + "learning_rate": 1.967979076877716e-05, + "loss": 0.4967, + "step": 6271 + }, + { + "epoch": 0.10837710810063589, + "grad_norm": 1.0383641185855312, + "learning_rate": 1.9679650264299074e-05, + "loss": 0.9891, + "step": 6272 + }, + { + "epoch": 0.10839438761404478, + "grad_norm": 0.6083548679000226, + "learning_rate": 1.9679509729503587e-05, + "loss": 0.7795, + "step": 6273 + }, + { + "epoch": 0.10841166712745369, + "grad_norm": 0.957158204038882, + "learning_rate": 1.9679369164391143e-05, + "loss": 0.7581, + "step": 6274 + }, + { + "epoch": 0.1084289466408626, + "grad_norm": 0.8251078783284815, + "learning_rate": 1.9679228568962178e-05, + "loss": 0.7554, + "step": 6275 + }, + { + "epoch": 0.10844622615427149, + "grad_norm": 0.8261827314556737, + "learning_rate": 1.967908794321713e-05, + "loss": 0.4656, + "step": 6276 + }, + { + "epoch": 0.1084635056676804, + "grad_norm": 0.9426176501910293, + "learning_rate": 1.967894728715645e-05, + "loss": 0.6381, + "step": 6277 + }, + { + "epoch": 0.1084807851810893, + "grad_norm": 0.779622628434959, + "learning_rate": 1.9678806600780566e-05, + "loss": 0.5451, + "step": 6278 + }, + { + "epoch": 0.1084980646944982, + "grad_norm": 1.2828233373828046, + "learning_rate": 1.9678665884089923e-05, + "loss": 0.734, + "step": 6279 + }, + { + "epoch": 0.10851534420790711, + "grad_norm": 1.0841024257066396, + "learning_rate": 1.9678525137084967e-05, + "loss": 0.7167, + "step": 6280 + }, + { + "epoch": 0.108532623721316, + "grad_norm": 0.8537112668157245, + "learning_rate": 1.9678384359766132e-05, + "loss": 0.8267, + "step": 6281 + }, + { + "epoch": 0.10854990323472491, + "grad_norm": 1.0347630147819662, + "learning_rate": 1.9678243552133865e-05, + "loss": 0.7838, + "step": 6282 + }, + { + "epoch": 0.10856718274813382, + "grad_norm": 0.768418735917828, + "learning_rate": 1.9678102714188602e-05, + "loss": 0.5844, + "step": 6283 + }, + { + "epoch": 0.10858446226154271, + "grad_norm": 1.1498907484619496, + "learning_rate": 1.9677961845930785e-05, + "loss": 0.7241, + "step": 6284 + }, + { + "epoch": 0.10860174177495162, + "grad_norm": 1.3297857346101398, + "learning_rate": 1.9677820947360854e-05, + "loss": 0.9452, + "step": 6285 + }, + { + "epoch": 0.10861902128836053, + "grad_norm": 0.555806084522494, + "learning_rate": 1.9677680018479255e-05, + "loss": 0.6405, + "step": 6286 + }, + { + "epoch": 0.10863630080176942, + "grad_norm": 0.8646213436263976, + "learning_rate": 1.9677539059286427e-05, + "loss": 0.666, + "step": 6287 + }, + { + "epoch": 0.10865358031517833, + "grad_norm": 0.8848213940558144, + "learning_rate": 1.9677398069782813e-05, + "loss": 0.638, + "step": 6288 + }, + { + "epoch": 0.10867085982858722, + "grad_norm": 0.7886452569361412, + "learning_rate": 1.9677257049968846e-05, + "loss": 0.7852, + "step": 6289 + }, + { + "epoch": 0.10868813934199613, + "grad_norm": 1.3987745458583312, + "learning_rate": 1.967711599984498e-05, + "loss": 0.5972, + "step": 6290 + }, + { + "epoch": 0.10870541885540504, + "grad_norm": 0.5163280683878079, + "learning_rate": 1.9676974919411652e-05, + "loss": 0.675, + "step": 6291 + }, + { + "epoch": 0.10872269836881393, + "grad_norm": 0.9192982619329462, + "learning_rate": 1.9676833808669302e-05, + "loss": 0.6667, + "step": 6292 + }, + { + "epoch": 0.10873997788222284, + "grad_norm": 0.8940347342797552, + "learning_rate": 1.967669266761837e-05, + "loss": 0.837, + "step": 6293 + }, + { + "epoch": 0.10875725739563175, + "grad_norm": 0.8946280906768176, + "learning_rate": 1.9676551496259307e-05, + "loss": 0.7026, + "step": 6294 + }, + { + "epoch": 0.10877453690904064, + "grad_norm": 0.8973248546432425, + "learning_rate": 1.9676410294592544e-05, + "loss": 0.7154, + "step": 6295 + }, + { + "epoch": 0.10879181642244955, + "grad_norm": 1.1485357606803102, + "learning_rate": 1.967626906261853e-05, + "loss": 0.7446, + "step": 6296 + }, + { + "epoch": 0.10880909593585844, + "grad_norm": 0.9434175193258002, + "learning_rate": 1.9676127800337705e-05, + "loss": 0.5804, + "step": 6297 + }, + { + "epoch": 0.10882637544926735, + "grad_norm": 1.0532795045330618, + "learning_rate": 1.9675986507750517e-05, + "loss": 0.7001, + "step": 6298 + }, + { + "epoch": 0.10884365496267626, + "grad_norm": 1.0818758392812438, + "learning_rate": 1.9675845184857397e-05, + "loss": 0.6767, + "step": 6299 + }, + { + "epoch": 0.10886093447608515, + "grad_norm": 0.9795165550196429, + "learning_rate": 1.9675703831658798e-05, + "loss": 0.6692, + "step": 6300 + }, + { + "epoch": 0.10887821398949406, + "grad_norm": 1.357217376971069, + "learning_rate": 1.9675562448155162e-05, + "loss": 0.742, + "step": 6301 + }, + { + "epoch": 0.10889549350290295, + "grad_norm": 1.2365453443679009, + "learning_rate": 1.9675421034346927e-05, + "loss": 0.7209, + "step": 6302 + }, + { + "epoch": 0.10891277301631186, + "grad_norm": 1.0214412364281276, + "learning_rate": 1.9675279590234537e-05, + "loss": 0.5772, + "step": 6303 + }, + { + "epoch": 0.10893005252972077, + "grad_norm": 1.2973429553060358, + "learning_rate": 1.9675138115818435e-05, + "loss": 0.6819, + "step": 6304 + }, + { + "epoch": 0.10894733204312966, + "grad_norm": 1.205111669739828, + "learning_rate": 1.9674996611099065e-05, + "loss": 0.716, + "step": 6305 + }, + { + "epoch": 0.10896461155653857, + "grad_norm": 0.789532893126986, + "learning_rate": 1.9674855076076875e-05, + "loss": 0.7233, + "step": 6306 + }, + { + "epoch": 0.10898189106994748, + "grad_norm": 0.9011551621241248, + "learning_rate": 1.96747135107523e-05, + "loss": 0.6885, + "step": 6307 + }, + { + "epoch": 0.10899917058335637, + "grad_norm": 1.0579180020067884, + "learning_rate": 1.9674571915125787e-05, + "loss": 0.7417, + "step": 6308 + }, + { + "epoch": 0.10901645009676528, + "grad_norm": 0.9586062752957679, + "learning_rate": 1.967443028919778e-05, + "loss": 0.583, + "step": 6309 + }, + { + "epoch": 0.10903372961017417, + "grad_norm": 1.1057074241440776, + "learning_rate": 1.9674288632968724e-05, + "loss": 0.669, + "step": 6310 + }, + { + "epoch": 0.10905100912358308, + "grad_norm": 0.8430641711963635, + "learning_rate": 1.9674146946439058e-05, + "loss": 0.7356, + "step": 6311 + }, + { + "epoch": 0.10906828863699199, + "grad_norm": 0.8937593505774729, + "learning_rate": 1.967400522960923e-05, + "loss": 0.7175, + "step": 6312 + }, + { + "epoch": 0.10908556815040088, + "grad_norm": 1.0480695623551544, + "learning_rate": 1.9673863482479684e-05, + "loss": 0.6813, + "step": 6313 + }, + { + "epoch": 0.10910284766380979, + "grad_norm": 0.9437285384538464, + "learning_rate": 1.9673721705050857e-05, + "loss": 0.5674, + "step": 6314 + }, + { + "epoch": 0.1091201271772187, + "grad_norm": 0.9108795613794564, + "learning_rate": 1.9673579897323204e-05, + "loss": 0.6624, + "step": 6315 + }, + { + "epoch": 0.10913740669062759, + "grad_norm": 0.4734757966349213, + "learning_rate": 1.967343805929716e-05, + "loss": 0.5645, + "step": 6316 + }, + { + "epoch": 0.1091546862040365, + "grad_norm": 0.8179795987457523, + "learning_rate": 1.9673296190973176e-05, + "loss": 0.651, + "step": 6317 + }, + { + "epoch": 0.10917196571744539, + "grad_norm": 0.823186462012638, + "learning_rate": 1.967315429235169e-05, + "loss": 0.6458, + "step": 6318 + }, + { + "epoch": 0.1091892452308543, + "grad_norm": 1.1654902122640418, + "learning_rate": 1.9673012363433153e-05, + "loss": 0.5936, + "step": 6319 + }, + { + "epoch": 0.1092065247442632, + "grad_norm": 0.979754906422233, + "learning_rate": 1.9672870404218004e-05, + "loss": 0.8075, + "step": 6320 + }, + { + "epoch": 0.1092238042576721, + "grad_norm": 1.0004065353047102, + "learning_rate": 1.9672728414706693e-05, + "loss": 0.8786, + "step": 6321 + }, + { + "epoch": 0.10924108377108101, + "grad_norm": 0.6529976697439368, + "learning_rate": 1.967258639489966e-05, + "loss": 0.5693, + "step": 6322 + }, + { + "epoch": 0.10925836328448991, + "grad_norm": 0.8526812881866058, + "learning_rate": 1.9672444344797347e-05, + "loss": 0.9005, + "step": 6323 + }, + { + "epoch": 0.10927564279789881, + "grad_norm": 0.8507986296319373, + "learning_rate": 1.9672302264400206e-05, + "loss": 0.7646, + "step": 6324 + }, + { + "epoch": 0.10929292231130772, + "grad_norm": 0.9299901941375565, + "learning_rate": 1.967216015370868e-05, + "loss": 0.7736, + "step": 6325 + }, + { + "epoch": 0.10931020182471661, + "grad_norm": 0.9983114182440318, + "learning_rate": 1.9672018012723214e-05, + "loss": 0.8335, + "step": 6326 + }, + { + "epoch": 0.10932748133812552, + "grad_norm": 1.0392691249581507, + "learning_rate": 1.967187584144425e-05, + "loss": 0.8365, + "step": 6327 + }, + { + "epoch": 0.10934476085153443, + "grad_norm": 0.8026609502451926, + "learning_rate": 1.9671733639872236e-05, + "loss": 0.6524, + "step": 6328 + }, + { + "epoch": 0.10936204036494332, + "grad_norm": 1.041961907294484, + "learning_rate": 1.967159140800762e-05, + "loss": 0.7575, + "step": 6329 + }, + { + "epoch": 0.10937931987835223, + "grad_norm": 0.7977641816018768, + "learning_rate": 1.967144914585084e-05, + "loss": 0.7218, + "step": 6330 + }, + { + "epoch": 0.10939659939176113, + "grad_norm": 0.8895704603880152, + "learning_rate": 1.967130685340235e-05, + "loss": 0.6449, + "step": 6331 + }, + { + "epoch": 0.10941387890517003, + "grad_norm": 0.8837834764366637, + "learning_rate": 1.967116453066259e-05, + "loss": 0.8546, + "step": 6332 + }, + { + "epoch": 0.10943115841857894, + "grad_norm": 1.032235098022687, + "learning_rate": 1.967102217763201e-05, + "loss": 0.9309, + "step": 6333 + }, + { + "epoch": 0.10944843793198783, + "grad_norm": 0.7796933636530079, + "learning_rate": 1.9670879794311056e-05, + "loss": 0.5365, + "step": 6334 + }, + { + "epoch": 0.10946571744539674, + "grad_norm": 0.950585128064157, + "learning_rate": 1.9670737380700163e-05, + "loss": 0.7547, + "step": 6335 + }, + { + "epoch": 0.10948299695880565, + "grad_norm": 0.5889131406673254, + "learning_rate": 1.9670594936799793e-05, + "loss": 0.4315, + "step": 6336 + }, + { + "epoch": 0.10950027647221454, + "grad_norm": 1.251386024126278, + "learning_rate": 1.9670452462610384e-05, + "loss": 0.9984, + "step": 6337 + }, + { + "epoch": 0.10951755598562345, + "grad_norm": 0.8260917747276398, + "learning_rate": 1.9670309958132382e-05, + "loss": 0.5429, + "step": 6338 + }, + { + "epoch": 0.10953483549903235, + "grad_norm": 0.9184862013951306, + "learning_rate": 1.967016742336623e-05, + "loss": 0.6879, + "step": 6339 + }, + { + "epoch": 0.10955211501244125, + "grad_norm": 0.7427827379004465, + "learning_rate": 1.9670024858312386e-05, + "loss": 0.8067, + "step": 6340 + }, + { + "epoch": 0.10956939452585016, + "grad_norm": 0.7704344303502036, + "learning_rate": 1.9669882262971286e-05, + "loss": 0.6601, + "step": 6341 + }, + { + "epoch": 0.10958667403925905, + "grad_norm": 1.0582071706032645, + "learning_rate": 1.9669739637343377e-05, + "loss": 0.6951, + "step": 6342 + }, + { + "epoch": 0.10960395355266796, + "grad_norm": 0.7492746806856766, + "learning_rate": 1.966959698142911e-05, + "loss": 0.5271, + "step": 6343 + }, + { + "epoch": 0.10962123306607686, + "grad_norm": 0.8424141686715841, + "learning_rate": 1.9669454295228932e-05, + "loss": 0.5067, + "step": 6344 + }, + { + "epoch": 0.10963851257948576, + "grad_norm": 0.9891465087885356, + "learning_rate": 1.966931157874329e-05, + "loss": 0.9313, + "step": 6345 + }, + { + "epoch": 0.10965579209289467, + "grad_norm": 1.1016616187361234, + "learning_rate": 1.9669168831972624e-05, + "loss": 0.7422, + "step": 6346 + }, + { + "epoch": 0.10967307160630356, + "grad_norm": 1.2786902349455076, + "learning_rate": 1.966902605491739e-05, + "loss": 0.8597, + "step": 6347 + }, + { + "epoch": 0.10969035111971247, + "grad_norm": 1.1547123232473864, + "learning_rate": 1.966888324757803e-05, + "loss": 0.9635, + "step": 6348 + }, + { + "epoch": 0.10970763063312138, + "grad_norm": 1.139932031988, + "learning_rate": 1.9668740409954995e-05, + "loss": 0.6173, + "step": 6349 + }, + { + "epoch": 0.10972491014653027, + "grad_norm": 1.028645858350213, + "learning_rate": 1.9668597542048728e-05, + "loss": 0.7148, + "step": 6350 + }, + { + "epoch": 0.10974218965993918, + "grad_norm": 0.8911952045418573, + "learning_rate": 1.966845464385968e-05, + "loss": 0.4447, + "step": 6351 + }, + { + "epoch": 0.10975946917334808, + "grad_norm": 0.756518395261532, + "learning_rate": 1.96683117153883e-05, + "loss": 0.4919, + "step": 6352 + }, + { + "epoch": 0.10977674868675698, + "grad_norm": 0.8831667193689553, + "learning_rate": 1.9668168756635028e-05, + "loss": 0.6198, + "step": 6353 + }, + { + "epoch": 0.10979402820016589, + "grad_norm": 1.0752858552638567, + "learning_rate": 1.966802576760032e-05, + "loss": 0.7417, + "step": 6354 + }, + { + "epoch": 0.10981130771357478, + "grad_norm": 0.9874352037589074, + "learning_rate": 1.966788274828462e-05, + "loss": 0.6829, + "step": 6355 + }, + { + "epoch": 0.10982858722698369, + "grad_norm": 0.9015414616828599, + "learning_rate": 1.9667739698688376e-05, + "loss": 0.5457, + "step": 6356 + }, + { + "epoch": 0.1098458667403926, + "grad_norm": 1.0006448578473042, + "learning_rate": 1.9667596618812036e-05, + "loss": 0.6944, + "step": 6357 + }, + { + "epoch": 0.10986314625380149, + "grad_norm": 1.0298902442904008, + "learning_rate": 1.9667453508656047e-05, + "loss": 0.7243, + "step": 6358 + }, + { + "epoch": 0.1098804257672104, + "grad_norm": 0.8567785849232137, + "learning_rate": 1.9667310368220863e-05, + "loss": 0.5749, + "step": 6359 + }, + { + "epoch": 0.1098977052806193, + "grad_norm": 0.7713978181704895, + "learning_rate": 1.9667167197506924e-05, + "loss": 0.4295, + "step": 6360 + }, + { + "epoch": 0.1099149847940282, + "grad_norm": 0.9097449907899974, + "learning_rate": 1.9667023996514685e-05, + "loss": 0.614, + "step": 6361 + }, + { + "epoch": 0.1099322643074371, + "grad_norm": 0.9413890511649692, + "learning_rate": 1.9666880765244593e-05, + "loss": 0.5422, + "step": 6362 + }, + { + "epoch": 0.109949543820846, + "grad_norm": 0.8235503959770473, + "learning_rate": 1.9666737503697094e-05, + "loss": 0.5884, + "step": 6363 + }, + { + "epoch": 0.1099668233342549, + "grad_norm": 1.564167583515061, + "learning_rate": 1.966659421187264e-05, + "loss": 0.7503, + "step": 6364 + }, + { + "epoch": 0.10998410284766381, + "grad_norm": 1.1016079892581678, + "learning_rate": 1.9666450889771677e-05, + "loss": 0.6288, + "step": 6365 + }, + { + "epoch": 0.11000138236107271, + "grad_norm": 1.1444672083225276, + "learning_rate": 1.9666307537394657e-05, + "loss": 0.6551, + "step": 6366 + }, + { + "epoch": 0.11001866187448162, + "grad_norm": 0.779870554677779, + "learning_rate": 1.9666164154742023e-05, + "loss": 0.659, + "step": 6367 + }, + { + "epoch": 0.11003594138789052, + "grad_norm": 1.2266351999319711, + "learning_rate": 1.9666020741814232e-05, + "loss": 0.6354, + "step": 6368 + }, + { + "epoch": 0.11005322090129942, + "grad_norm": 0.9397063691867509, + "learning_rate": 1.966587729861173e-05, + "loss": 0.6574, + "step": 6369 + }, + { + "epoch": 0.11007050041470832, + "grad_norm": 1.1619144254240967, + "learning_rate": 1.9665733825134964e-05, + "loss": 0.5698, + "step": 6370 + }, + { + "epoch": 0.11008777992811722, + "grad_norm": 1.0308809368978544, + "learning_rate": 1.9665590321384385e-05, + "loss": 0.8158, + "step": 6371 + }, + { + "epoch": 0.11010505944152613, + "grad_norm": 0.9132237963613788, + "learning_rate": 1.9665446787360444e-05, + "loss": 0.65, + "step": 6372 + }, + { + "epoch": 0.11012233895493503, + "grad_norm": 0.7723176893692578, + "learning_rate": 1.9665303223063587e-05, + "loss": 0.6028, + "step": 6373 + }, + { + "epoch": 0.11013961846834393, + "grad_norm": 0.7498283458228071, + "learning_rate": 1.9665159628494266e-05, + "loss": 0.4216, + "step": 6374 + }, + { + "epoch": 0.11015689798175284, + "grad_norm": 1.084702562141985, + "learning_rate": 1.9665016003652934e-05, + "loss": 0.6889, + "step": 6375 + }, + { + "epoch": 0.11017417749516174, + "grad_norm": 0.9731668837466235, + "learning_rate": 1.9664872348540033e-05, + "loss": 1.0006, + "step": 6376 + }, + { + "epoch": 0.11019145700857064, + "grad_norm": 0.619038341129789, + "learning_rate": 1.966472866315602e-05, + "loss": 0.6984, + "step": 6377 + }, + { + "epoch": 0.11020873652197954, + "grad_norm": 0.9093497989313946, + "learning_rate": 1.9664584947501342e-05, + "loss": 0.6902, + "step": 6378 + }, + { + "epoch": 0.11022601603538844, + "grad_norm": 0.7419265095073898, + "learning_rate": 1.966444120157645e-05, + "loss": 0.5713, + "step": 6379 + }, + { + "epoch": 0.11024329554879735, + "grad_norm": 0.6476044525454318, + "learning_rate": 1.9664297425381792e-05, + "loss": 0.469, + "step": 6380 + }, + { + "epoch": 0.11026057506220625, + "grad_norm": 0.840629871804619, + "learning_rate": 1.9664153618917822e-05, + "loss": 0.7608, + "step": 6381 + }, + { + "epoch": 0.11027785457561515, + "grad_norm": 0.8265618889370278, + "learning_rate": 1.9664009782184984e-05, + "loss": 0.598, + "step": 6382 + }, + { + "epoch": 0.11029513408902406, + "grad_norm": 0.8281216579334901, + "learning_rate": 1.9663865915183735e-05, + "loss": 0.5356, + "step": 6383 + }, + { + "epoch": 0.11031241360243295, + "grad_norm": 0.8531088825657631, + "learning_rate": 1.9663722017914526e-05, + "loss": 0.603, + "step": 6384 + }, + { + "epoch": 0.11032969311584186, + "grad_norm": 0.7389596886656233, + "learning_rate": 1.9663578090377805e-05, + "loss": 0.5721, + "step": 6385 + }, + { + "epoch": 0.11034697262925076, + "grad_norm": 1.8435115336201706, + "learning_rate": 1.966343413257402e-05, + "loss": 0.7797, + "step": 6386 + }, + { + "epoch": 0.11036425214265966, + "grad_norm": 1.0119609656931194, + "learning_rate": 1.9663290144503626e-05, + "loss": 0.7855, + "step": 6387 + }, + { + "epoch": 0.11038153165606857, + "grad_norm": 1.0141398228826808, + "learning_rate": 1.9663146126167073e-05, + "loss": 0.6951, + "step": 6388 + }, + { + "epoch": 0.11039881116947747, + "grad_norm": 0.8661036018288673, + "learning_rate": 1.9663002077564813e-05, + "loss": 0.8066, + "step": 6389 + }, + { + "epoch": 0.11041609068288637, + "grad_norm": 0.9923306356545488, + "learning_rate": 1.9662857998697297e-05, + "loss": 0.902, + "step": 6390 + }, + { + "epoch": 0.11043337019629527, + "grad_norm": 0.9099656035693999, + "learning_rate": 1.966271388956497e-05, + "loss": 0.7097, + "step": 6391 + }, + { + "epoch": 0.11045064970970417, + "grad_norm": 0.8530229930615784, + "learning_rate": 1.9662569750168298e-05, + "loss": 0.5675, + "step": 6392 + }, + { + "epoch": 0.11046792922311308, + "grad_norm": 1.2342932741964083, + "learning_rate": 1.9662425580507717e-05, + "loss": 0.7798, + "step": 6393 + }, + { + "epoch": 0.11048520873652198, + "grad_norm": 0.8775596212211423, + "learning_rate": 1.9662281380583685e-05, + "loss": 0.7648, + "step": 6394 + }, + { + "epoch": 0.11050248824993088, + "grad_norm": 0.5801454210986059, + "learning_rate": 1.9662137150396653e-05, + "loss": 0.736, + "step": 6395 + }, + { + "epoch": 0.11051976776333979, + "grad_norm": 1.1350464079395484, + "learning_rate": 1.9661992889947076e-05, + "loss": 0.6406, + "step": 6396 + }, + { + "epoch": 0.11053704727674869, + "grad_norm": 1.029846846678556, + "learning_rate": 1.96618485992354e-05, + "loss": 0.7876, + "step": 6397 + }, + { + "epoch": 0.11055432679015759, + "grad_norm": 1.0521362321695429, + "learning_rate": 1.966170427826208e-05, + "loss": 0.6287, + "step": 6398 + }, + { + "epoch": 0.1105716063035665, + "grad_norm": 0.8557774718249745, + "learning_rate": 1.9661559927027572e-05, + "loss": 0.5648, + "step": 6399 + }, + { + "epoch": 0.11058888581697539, + "grad_norm": 0.9669143626075033, + "learning_rate": 1.9661415545532317e-05, + "loss": 0.7081, + "step": 6400 + }, + { + "epoch": 0.1106061653303843, + "grad_norm": 1.1177865760923476, + "learning_rate": 1.966127113377678e-05, + "loss": 0.9185, + "step": 6401 + }, + { + "epoch": 0.1106234448437932, + "grad_norm": 0.7508367999186203, + "learning_rate": 1.9661126691761408e-05, + "loss": 0.5084, + "step": 6402 + }, + { + "epoch": 0.1106407243572021, + "grad_norm": 0.9213650011909562, + "learning_rate": 1.966098221948665e-05, + "loss": 0.9184, + "step": 6403 + }, + { + "epoch": 0.110658003870611, + "grad_norm": 0.9061262152968551, + "learning_rate": 1.966083771695296e-05, + "loss": 0.6641, + "step": 6404 + }, + { + "epoch": 0.11067528338401991, + "grad_norm": 0.8000686245036337, + "learning_rate": 1.9660693184160796e-05, + "loss": 0.6883, + "step": 6405 + }, + { + "epoch": 0.1106925628974288, + "grad_norm": 1.089470405570571, + "learning_rate": 1.9660548621110606e-05, + "loss": 0.6278, + "step": 6406 + }, + { + "epoch": 0.11070984241083771, + "grad_norm": 0.894002967050391, + "learning_rate": 1.9660404027802843e-05, + "loss": 0.7761, + "step": 6407 + }, + { + "epoch": 0.11072712192424661, + "grad_norm": 0.9319497636805233, + "learning_rate": 1.966025940423796e-05, + "loss": 0.61, + "step": 6408 + }, + { + "epoch": 0.11074440143765552, + "grad_norm": 0.6688500087405027, + "learning_rate": 1.9660114750416412e-05, + "loss": 0.9033, + "step": 6409 + }, + { + "epoch": 0.11076168095106442, + "grad_norm": 1.3535368776840297, + "learning_rate": 1.9659970066338647e-05, + "loss": 0.8286, + "step": 6410 + }, + { + "epoch": 0.11077896046447332, + "grad_norm": 0.8127761253554763, + "learning_rate": 1.9659825352005124e-05, + "loss": 0.6597, + "step": 6411 + }, + { + "epoch": 0.11079623997788222, + "grad_norm": 0.8945475945165062, + "learning_rate": 1.9659680607416294e-05, + "loss": 0.7257, + "step": 6412 + }, + { + "epoch": 0.11081351949129113, + "grad_norm": 1.1561453276386437, + "learning_rate": 1.965953583257261e-05, + "loss": 0.863, + "step": 6413 + }, + { + "epoch": 0.11083079900470003, + "grad_norm": 1.062074242897667, + "learning_rate": 1.9659391027474526e-05, + "loss": 0.7085, + "step": 6414 + }, + { + "epoch": 0.11084807851810893, + "grad_norm": 1.1088561551616014, + "learning_rate": 1.9659246192122494e-05, + "loss": 0.7453, + "step": 6415 + }, + { + "epoch": 0.11086535803151783, + "grad_norm": 0.9638213513422785, + "learning_rate": 1.965910132651697e-05, + "loss": 0.4781, + "step": 6416 + }, + { + "epoch": 0.11088263754492673, + "grad_norm": 1.0288213644704332, + "learning_rate": 1.9658956430658404e-05, + "loss": 0.778, + "step": 6417 + }, + { + "epoch": 0.11089991705833564, + "grad_norm": 1.1451035125998503, + "learning_rate": 1.9658811504547256e-05, + "loss": 0.6838, + "step": 6418 + }, + { + "epoch": 0.11091719657174454, + "grad_norm": 0.7822049162432069, + "learning_rate": 1.9658666548183974e-05, + "loss": 0.6979, + "step": 6419 + }, + { + "epoch": 0.11093447608515344, + "grad_norm": 1.132952239886426, + "learning_rate": 1.9658521561569016e-05, + "loss": 0.8483, + "step": 6420 + }, + { + "epoch": 0.11095175559856234, + "grad_norm": 1.314088723662556, + "learning_rate": 1.965837654470283e-05, + "loss": 0.6478, + "step": 6421 + }, + { + "epoch": 0.11096903511197125, + "grad_norm": 0.9660041980299932, + "learning_rate": 1.9658231497585882e-05, + "loss": 0.7642, + "step": 6422 + }, + { + "epoch": 0.11098631462538015, + "grad_norm": 0.5478707307201905, + "learning_rate": 1.9658086420218618e-05, + "loss": 0.7479, + "step": 6423 + }, + { + "epoch": 0.11100359413878905, + "grad_norm": 1.1226234778776203, + "learning_rate": 1.9657941312601486e-05, + "loss": 0.7046, + "step": 6424 + }, + { + "epoch": 0.11102087365219795, + "grad_norm": 0.5009981462804871, + "learning_rate": 1.9657796174734954e-05, + "loss": 0.6218, + "step": 6425 + }, + { + "epoch": 0.11103815316560686, + "grad_norm": 1.0472100733302592, + "learning_rate": 1.965765100661947e-05, + "loss": 0.7475, + "step": 6426 + }, + { + "epoch": 0.11105543267901576, + "grad_norm": 0.9107827572618081, + "learning_rate": 1.9657505808255487e-05, + "loss": 0.5572, + "step": 6427 + }, + { + "epoch": 0.11107271219242466, + "grad_norm": 1.0096096980591298, + "learning_rate": 1.9657360579643463e-05, + "loss": 0.8213, + "step": 6428 + }, + { + "epoch": 0.11108999170583356, + "grad_norm": 0.943673896467289, + "learning_rate": 1.9657215320783852e-05, + "loss": 0.8628, + "step": 6429 + }, + { + "epoch": 0.11110727121924246, + "grad_norm": 1.0644855246048237, + "learning_rate": 1.9657070031677108e-05, + "loss": 0.838, + "step": 6430 + }, + { + "epoch": 0.11112455073265137, + "grad_norm": 1.2792121732473105, + "learning_rate": 1.9656924712323687e-05, + "loss": 0.9089, + "step": 6431 + }, + { + "epoch": 0.11114183024606027, + "grad_norm": 0.8894894340929937, + "learning_rate": 1.9656779362724042e-05, + "loss": 0.7203, + "step": 6432 + }, + { + "epoch": 0.11115910975946917, + "grad_norm": 1.0233405582857014, + "learning_rate": 1.9656633982878636e-05, + "loss": 0.646, + "step": 6433 + }, + { + "epoch": 0.11117638927287808, + "grad_norm": 0.8701092214816724, + "learning_rate": 1.9656488572787912e-05, + "loss": 0.7882, + "step": 6434 + }, + { + "epoch": 0.11119366878628698, + "grad_norm": 0.8390724374912726, + "learning_rate": 1.9656343132452335e-05, + "loss": 0.5381, + "step": 6435 + }, + { + "epoch": 0.11121094829969588, + "grad_norm": 0.8443500428370622, + "learning_rate": 1.9656197661872357e-05, + "loss": 0.7466, + "step": 6436 + }, + { + "epoch": 0.11122822781310478, + "grad_norm": 1.0145328894690924, + "learning_rate": 1.9656052161048433e-05, + "loss": 0.5635, + "step": 6437 + }, + { + "epoch": 0.11124550732651368, + "grad_norm": 1.0246530333577688, + "learning_rate": 1.965590662998102e-05, + "loss": 0.679, + "step": 6438 + }, + { + "epoch": 0.11126278683992259, + "grad_norm": 0.6068551383676046, + "learning_rate": 1.9655761068670576e-05, + "loss": 0.6625, + "step": 6439 + }, + { + "epoch": 0.11128006635333149, + "grad_norm": 0.5070228788558022, + "learning_rate": 1.965561547711755e-05, + "loss": 0.6014, + "step": 6440 + }, + { + "epoch": 0.1112973458667404, + "grad_norm": 1.468361549740326, + "learning_rate": 1.9655469855322405e-05, + "loss": 0.7445, + "step": 6441 + }, + { + "epoch": 0.1113146253801493, + "grad_norm": 0.8227597221523588, + "learning_rate": 1.9655324203285597e-05, + "loss": 0.6375, + "step": 6442 + }, + { + "epoch": 0.1113319048935582, + "grad_norm": 0.6596273143898049, + "learning_rate": 1.9655178521007578e-05, + "loss": 0.4627, + "step": 6443 + }, + { + "epoch": 0.1113491844069671, + "grad_norm": 1.2347036645244074, + "learning_rate": 1.9655032808488806e-05, + "loss": 0.9336, + "step": 6444 + }, + { + "epoch": 0.111366463920376, + "grad_norm": 1.1660351891842524, + "learning_rate": 1.9654887065729738e-05, + "loss": 0.7648, + "step": 6445 + }, + { + "epoch": 0.1113837434337849, + "grad_norm": 0.9166980326957385, + "learning_rate": 1.965474129273083e-05, + "loss": 0.809, + "step": 6446 + }, + { + "epoch": 0.11140102294719381, + "grad_norm": 1.267597743216455, + "learning_rate": 1.9654595489492537e-05, + "loss": 0.6522, + "step": 6447 + }, + { + "epoch": 0.1114183024606027, + "grad_norm": 1.0299635836431686, + "learning_rate": 1.965444965601532e-05, + "loss": 0.7937, + "step": 6448 + }, + { + "epoch": 0.11143558197401161, + "grad_norm": 0.9895617184361253, + "learning_rate": 1.965430379229963e-05, + "loss": 0.6011, + "step": 6449 + }, + { + "epoch": 0.11145286148742052, + "grad_norm": 0.9484169864332088, + "learning_rate": 1.9654157898345927e-05, + "loss": 0.6392, + "step": 6450 + }, + { + "epoch": 0.11147014100082941, + "grad_norm": 1.0638872793492813, + "learning_rate": 1.965401197415467e-05, + "loss": 0.7164, + "step": 6451 + }, + { + "epoch": 0.11148742051423832, + "grad_norm": 0.9986882212160042, + "learning_rate": 1.9653866019726312e-05, + "loss": 0.8629, + "step": 6452 + }, + { + "epoch": 0.11150470002764722, + "grad_norm": 0.923327083817913, + "learning_rate": 1.9653720035061312e-05, + "loss": 0.6963, + "step": 6453 + }, + { + "epoch": 0.11152197954105612, + "grad_norm": 0.7224409220993914, + "learning_rate": 1.9653574020160126e-05, + "loss": 0.6958, + "step": 6454 + }, + { + "epoch": 0.11153925905446503, + "grad_norm": 0.8246424159847698, + "learning_rate": 1.9653427975023215e-05, + "loss": 0.6276, + "step": 6455 + }, + { + "epoch": 0.11155653856787393, + "grad_norm": 0.963287890099092, + "learning_rate": 1.9653281899651032e-05, + "loss": 0.6281, + "step": 6456 + }, + { + "epoch": 0.11157381808128283, + "grad_norm": 0.8555890127448618, + "learning_rate": 1.965313579404404e-05, + "loss": 0.7306, + "step": 6457 + }, + { + "epoch": 0.11159109759469173, + "grad_norm": 0.7312413015272912, + "learning_rate": 1.965298965820269e-05, + "loss": 0.4963, + "step": 6458 + }, + { + "epoch": 0.11160837710810063, + "grad_norm": 1.1031902751060272, + "learning_rate": 1.9652843492127444e-05, + "loss": 0.7994, + "step": 6459 + }, + { + "epoch": 0.11162565662150954, + "grad_norm": 0.9560378395143035, + "learning_rate": 1.965269729581876e-05, + "loss": 0.6104, + "step": 6460 + }, + { + "epoch": 0.11164293613491844, + "grad_norm": 0.8814619976527449, + "learning_rate": 1.965255106927709e-05, + "loss": 0.7086, + "step": 6461 + }, + { + "epoch": 0.11166021564832734, + "grad_norm": 0.7576976223464383, + "learning_rate": 1.96524048125029e-05, + "loss": 0.7112, + "step": 6462 + }, + { + "epoch": 0.11167749516173625, + "grad_norm": 1.17441442598826, + "learning_rate": 1.9652258525496645e-05, + "loss": 0.8355, + "step": 6463 + }, + { + "epoch": 0.11169477467514514, + "grad_norm": 1.032436848449869, + "learning_rate": 1.965211220825878e-05, + "loss": 0.6858, + "step": 6464 + }, + { + "epoch": 0.11171205418855405, + "grad_norm": 0.9669238762263339, + "learning_rate": 1.9651965860789767e-05, + "loss": 0.7304, + "step": 6465 + }, + { + "epoch": 0.11172933370196295, + "grad_norm": 0.7636514128723969, + "learning_rate": 1.9651819483090065e-05, + "loss": 0.6261, + "step": 6466 + }, + { + "epoch": 0.11174661321537185, + "grad_norm": 0.9923753502084788, + "learning_rate": 1.965167307516013e-05, + "loss": 0.7646, + "step": 6467 + }, + { + "epoch": 0.11176389272878076, + "grad_norm": 0.939720925664979, + "learning_rate": 1.965152663700042e-05, + "loss": 0.7555, + "step": 6468 + }, + { + "epoch": 0.11178117224218966, + "grad_norm": 0.9026117220547285, + "learning_rate": 1.9651380168611396e-05, + "loss": 0.6409, + "step": 6469 + }, + { + "epoch": 0.11179845175559856, + "grad_norm": 0.7124347319167695, + "learning_rate": 1.9651233669993516e-05, + "loss": 0.2869, + "step": 6470 + }, + { + "epoch": 0.11181573126900747, + "grad_norm": 0.9967557682884487, + "learning_rate": 1.965108714114724e-05, + "loss": 0.7806, + "step": 6471 + }, + { + "epoch": 0.11183301078241636, + "grad_norm": 1.2277615159825712, + "learning_rate": 1.9650940582073023e-05, + "loss": 0.836, + "step": 6472 + }, + { + "epoch": 0.11185029029582527, + "grad_norm": 0.9191486293013656, + "learning_rate": 1.9650793992771328e-05, + "loss": 0.71, + "step": 6473 + }, + { + "epoch": 0.11186756980923417, + "grad_norm": 1.3670753302281218, + "learning_rate": 1.965064737324261e-05, + "loss": 0.9269, + "step": 6474 + }, + { + "epoch": 0.11188484932264307, + "grad_norm": 1.0376888452904691, + "learning_rate": 1.9650500723487335e-05, + "loss": 0.5964, + "step": 6475 + }, + { + "epoch": 0.11190212883605198, + "grad_norm": 0.717036794378354, + "learning_rate": 1.965035404350596e-05, + "loss": 0.4423, + "step": 6476 + }, + { + "epoch": 0.11191940834946087, + "grad_norm": 0.8434244731506638, + "learning_rate": 1.965020733329894e-05, + "loss": 0.6048, + "step": 6477 + }, + { + "epoch": 0.11193668786286978, + "grad_norm": 0.8789855009664771, + "learning_rate": 1.9650060592866736e-05, + "loss": 0.6498, + "step": 6478 + }, + { + "epoch": 0.11195396737627869, + "grad_norm": 0.9508068519998313, + "learning_rate": 1.9649913822209812e-05, + "loss": 0.6331, + "step": 6479 + }, + { + "epoch": 0.11197124688968758, + "grad_norm": 0.9968743015432088, + "learning_rate": 1.964976702132862e-05, + "loss": 0.5806, + "step": 6480 + }, + { + "epoch": 0.11198852640309649, + "grad_norm": 0.7689548538045643, + "learning_rate": 1.9649620190223627e-05, + "loss": 0.6135, + "step": 6481 + }, + { + "epoch": 0.11200580591650539, + "grad_norm": 1.074124900286975, + "learning_rate": 1.964947332889529e-05, + "loss": 0.6395, + "step": 6482 + }, + { + "epoch": 0.1120230854299143, + "grad_norm": 0.9302654236609977, + "learning_rate": 1.964932643734407e-05, + "loss": 0.7538, + "step": 6483 + }, + { + "epoch": 0.1120403649433232, + "grad_norm": 0.7375154583952351, + "learning_rate": 1.9649179515570426e-05, + "loss": 0.7669, + "step": 6484 + }, + { + "epoch": 0.1120576444567321, + "grad_norm": 1.103583665707458, + "learning_rate": 1.9649032563574817e-05, + "loss": 0.7642, + "step": 6485 + }, + { + "epoch": 0.112074923970141, + "grad_norm": 0.9238502006869286, + "learning_rate": 1.9648885581357706e-05, + "loss": 0.7374, + "step": 6486 + }, + { + "epoch": 0.11209220348354991, + "grad_norm": 1.072745205074499, + "learning_rate": 1.9648738568919553e-05, + "loss": 0.7355, + "step": 6487 + }, + { + "epoch": 0.1121094829969588, + "grad_norm": 0.7822118765956055, + "learning_rate": 1.9648591526260813e-05, + "loss": 0.5971, + "step": 6488 + }, + { + "epoch": 0.11212676251036771, + "grad_norm": 1.1657458319955554, + "learning_rate": 1.9648444453381958e-05, + "loss": 0.8848, + "step": 6489 + }, + { + "epoch": 0.1121440420237766, + "grad_norm": 0.9386595297702479, + "learning_rate": 1.9648297350283436e-05, + "loss": 0.6013, + "step": 6490 + }, + { + "epoch": 0.11216132153718551, + "grad_norm": 0.9665443554494973, + "learning_rate": 1.9648150216965717e-05, + "loss": 0.7458, + "step": 6491 + }, + { + "epoch": 0.11217860105059442, + "grad_norm": 0.8666802104168185, + "learning_rate": 1.9648003053429257e-05, + "loss": 0.5803, + "step": 6492 + }, + { + "epoch": 0.11219588056400331, + "grad_norm": 0.8582978012595793, + "learning_rate": 1.9647855859674517e-05, + "loss": 0.5602, + "step": 6493 + }, + { + "epoch": 0.11221316007741222, + "grad_norm": 0.8358940961812, + "learning_rate": 1.964770863570196e-05, + "loss": 0.7632, + "step": 6494 + }, + { + "epoch": 0.11223043959082113, + "grad_norm": 0.9062166714075548, + "learning_rate": 1.9647561381512047e-05, + "loss": 0.7685, + "step": 6495 + }, + { + "epoch": 0.11224771910423002, + "grad_norm": 1.0523030804195952, + "learning_rate": 1.964741409710524e-05, + "loss": 0.5602, + "step": 6496 + }, + { + "epoch": 0.11226499861763893, + "grad_norm": 0.7480748418508533, + "learning_rate": 1.9647266782481994e-05, + "loss": 0.6251, + "step": 6497 + }, + { + "epoch": 0.11228227813104782, + "grad_norm": 0.9450035762512223, + "learning_rate": 1.964711943764278e-05, + "loss": 0.7788, + "step": 6498 + }, + { + "epoch": 0.11229955764445673, + "grad_norm": 0.49703863246333174, + "learning_rate": 1.964697206258805e-05, + "loss": 0.7885, + "step": 6499 + }, + { + "epoch": 0.11231683715786564, + "grad_norm": 0.9983276454482723, + "learning_rate": 1.9646824657318274e-05, + "loss": 0.716, + "step": 6500 + }, + { + "epoch": 0.11233411667127453, + "grad_norm": 0.9235491510464197, + "learning_rate": 1.964667722183391e-05, + "loss": 0.652, + "step": 6501 + }, + { + "epoch": 0.11235139618468344, + "grad_norm": 0.8444054662533423, + "learning_rate": 1.964652975613542e-05, + "loss": 0.7155, + "step": 6502 + }, + { + "epoch": 0.11236867569809234, + "grad_norm": 1.0715734851656555, + "learning_rate": 1.9646382260223266e-05, + "loss": 0.7508, + "step": 6503 + }, + { + "epoch": 0.11238595521150124, + "grad_norm": 1.7176231132294955, + "learning_rate": 1.9646234734097907e-05, + "loss": 0.4486, + "step": 6504 + }, + { + "epoch": 0.11240323472491015, + "grad_norm": 1.006146509343318, + "learning_rate": 1.964608717775981e-05, + "loss": 0.5776, + "step": 6505 + }, + { + "epoch": 0.11242051423831904, + "grad_norm": 1.0893604734440538, + "learning_rate": 1.9645939591209435e-05, + "loss": 0.6814, + "step": 6506 + }, + { + "epoch": 0.11243779375172795, + "grad_norm": 0.9078296618371563, + "learning_rate": 1.9645791974447244e-05, + "loss": 0.673, + "step": 6507 + }, + { + "epoch": 0.11245507326513686, + "grad_norm": 0.7155209564754905, + "learning_rate": 1.96456443274737e-05, + "loss": 0.5065, + "step": 6508 + }, + { + "epoch": 0.11247235277854575, + "grad_norm": 1.0487025309557718, + "learning_rate": 1.9645496650289263e-05, + "loss": 0.7508, + "step": 6509 + }, + { + "epoch": 0.11248963229195466, + "grad_norm": 1.2864736445238334, + "learning_rate": 1.9645348942894394e-05, + "loss": 0.7814, + "step": 6510 + }, + { + "epoch": 0.11250691180536355, + "grad_norm": 0.9715669135151384, + "learning_rate": 1.9645201205289567e-05, + "loss": 0.4948, + "step": 6511 + }, + { + "epoch": 0.11252419131877246, + "grad_norm": 1.3924898772204284, + "learning_rate": 1.9645053437475232e-05, + "loss": 0.8305, + "step": 6512 + }, + { + "epoch": 0.11254147083218137, + "grad_norm": 1.1450008693815585, + "learning_rate": 1.964490563945186e-05, + "loss": 0.7428, + "step": 6513 + }, + { + "epoch": 0.11255875034559026, + "grad_norm": 1.2972615380195847, + "learning_rate": 1.9644757811219904e-05, + "loss": 0.5937, + "step": 6514 + }, + { + "epoch": 0.11257602985899917, + "grad_norm": 1.1416275013530885, + "learning_rate": 1.964460995277984e-05, + "loss": 0.9077, + "step": 6515 + }, + { + "epoch": 0.11259330937240808, + "grad_norm": 0.8573933326049648, + "learning_rate": 1.964446206413212e-05, + "loss": 0.4223, + "step": 6516 + }, + { + "epoch": 0.11261058888581697, + "grad_norm": 0.5402025737227045, + "learning_rate": 1.9644314145277215e-05, + "loss": 0.7587, + "step": 6517 + }, + { + "epoch": 0.11262786839922588, + "grad_norm": 1.0399566224048822, + "learning_rate": 1.9644166196215586e-05, + "loss": 0.6817, + "step": 6518 + }, + { + "epoch": 0.11264514791263477, + "grad_norm": 1.242398630100238, + "learning_rate": 1.9644018216947692e-05, + "loss": 0.8404, + "step": 6519 + }, + { + "epoch": 0.11266242742604368, + "grad_norm": 1.130033045362893, + "learning_rate": 1.9643870207474007e-05, + "loss": 0.7026, + "step": 6520 + }, + { + "epoch": 0.11267970693945259, + "grad_norm": 1.0784498805260865, + "learning_rate": 1.964372216779498e-05, + "loss": 0.7113, + "step": 6521 + }, + { + "epoch": 0.11269698645286148, + "grad_norm": 1.035123730111871, + "learning_rate": 1.9643574097911085e-05, + "loss": 0.719, + "step": 6522 + }, + { + "epoch": 0.11271426596627039, + "grad_norm": 0.7789851020229337, + "learning_rate": 1.9643425997822783e-05, + "loss": 0.4289, + "step": 6523 + }, + { + "epoch": 0.1127315454796793, + "grad_norm": 1.2670865329399874, + "learning_rate": 1.964327786753054e-05, + "loss": 0.7169, + "step": 6524 + }, + { + "epoch": 0.11274882499308819, + "grad_norm": 0.964207702670611, + "learning_rate": 1.9643129707034814e-05, + "loss": 0.7974, + "step": 6525 + }, + { + "epoch": 0.1127661045064971, + "grad_norm": 1.0030394922994739, + "learning_rate": 1.9642981516336075e-05, + "loss": 0.7445, + "step": 6526 + }, + { + "epoch": 0.112783384019906, + "grad_norm": 1.1607439370918928, + "learning_rate": 1.9642833295434783e-05, + "loss": 0.9077, + "step": 6527 + }, + { + "epoch": 0.1128006635333149, + "grad_norm": 0.9338068068841117, + "learning_rate": 1.9642685044331407e-05, + "loss": 0.7699, + "step": 6528 + }, + { + "epoch": 0.11281794304672381, + "grad_norm": 1.0438306683714509, + "learning_rate": 1.964253676302641e-05, + "loss": 0.7473, + "step": 6529 + }, + { + "epoch": 0.1128352225601327, + "grad_norm": 1.2578705088683746, + "learning_rate": 1.9642388451520254e-05, + "loss": 0.924, + "step": 6530 + }, + { + "epoch": 0.11285250207354161, + "grad_norm": 0.9243441173128158, + "learning_rate": 1.9642240109813403e-05, + "loss": 0.5887, + "step": 6531 + }, + { + "epoch": 0.11286978158695052, + "grad_norm": 0.9720970893239788, + "learning_rate": 1.9642091737906323e-05, + "loss": 0.612, + "step": 6532 + }, + { + "epoch": 0.11288706110035941, + "grad_norm": 1.5989794638168615, + "learning_rate": 1.964194333579948e-05, + "loss": 0.8464, + "step": 6533 + }, + { + "epoch": 0.11290434061376832, + "grad_norm": 1.0390271663607482, + "learning_rate": 1.9641794903493334e-05, + "loss": 0.5769, + "step": 6534 + }, + { + "epoch": 0.11292162012717721, + "grad_norm": 0.9413316809800603, + "learning_rate": 1.964164644098836e-05, + "loss": 0.7142, + "step": 6535 + }, + { + "epoch": 0.11293889964058612, + "grad_norm": 0.8213925900505542, + "learning_rate": 1.964149794828501e-05, + "loss": 0.6583, + "step": 6536 + }, + { + "epoch": 0.11295617915399503, + "grad_norm": 0.9267323555477353, + "learning_rate": 1.964134942538376e-05, + "loss": 0.7571, + "step": 6537 + }, + { + "epoch": 0.11297345866740392, + "grad_norm": 1.0312483362117522, + "learning_rate": 1.964120087228507e-05, + "loss": 0.6967, + "step": 6538 + }, + { + "epoch": 0.11299073818081283, + "grad_norm": 0.9475738946826948, + "learning_rate": 1.9641052288989406e-05, + "loss": 0.6205, + "step": 6539 + }, + { + "epoch": 0.11300801769422172, + "grad_norm": 1.0660236489046242, + "learning_rate": 1.9640903675497234e-05, + "loss": 0.8567, + "step": 6540 + }, + { + "epoch": 0.11302529720763063, + "grad_norm": 0.9025180304282476, + "learning_rate": 1.9640755031809018e-05, + "loss": 0.7637, + "step": 6541 + }, + { + "epoch": 0.11304257672103954, + "grad_norm": 0.8864893949769352, + "learning_rate": 1.9640606357925227e-05, + "loss": 0.6741, + "step": 6542 + }, + { + "epoch": 0.11305985623444843, + "grad_norm": 0.7214684967742944, + "learning_rate": 1.9640457653846323e-05, + "loss": 0.4998, + "step": 6543 + }, + { + "epoch": 0.11307713574785734, + "grad_norm": 0.9291469351949817, + "learning_rate": 1.9640308919572772e-05, + "loss": 0.6612, + "step": 6544 + }, + { + "epoch": 0.11309441526126625, + "grad_norm": 1.0891550835958381, + "learning_rate": 1.964016015510504e-05, + "loss": 0.8494, + "step": 6545 + }, + { + "epoch": 0.11311169477467514, + "grad_norm": 1.143666703435216, + "learning_rate": 1.9640011360443597e-05, + "loss": 0.63, + "step": 6546 + }, + { + "epoch": 0.11312897428808405, + "grad_norm": 0.9349936763352973, + "learning_rate": 1.96398625355889e-05, + "loss": 0.6715, + "step": 6547 + }, + { + "epoch": 0.11314625380149294, + "grad_norm": 0.9821389863830029, + "learning_rate": 1.963971368054143e-05, + "loss": 0.6784, + "step": 6548 + }, + { + "epoch": 0.11316353331490185, + "grad_norm": 1.0095131157510493, + "learning_rate": 1.9639564795301635e-05, + "loss": 0.5981, + "step": 6549 + }, + { + "epoch": 0.11318081282831076, + "grad_norm": 1.288005842042417, + "learning_rate": 1.9639415879869994e-05, + "loss": 0.6886, + "step": 6550 + }, + { + "epoch": 0.11319809234171965, + "grad_norm": 1.063435408399848, + "learning_rate": 1.9639266934246973e-05, + "loss": 0.6851, + "step": 6551 + }, + { + "epoch": 0.11321537185512856, + "grad_norm": 0.792465108703563, + "learning_rate": 1.9639117958433033e-05, + "loss": 0.7433, + "step": 6552 + }, + { + "epoch": 0.11323265136853747, + "grad_norm": 0.8988137020165834, + "learning_rate": 1.9638968952428642e-05, + "loss": 0.4335, + "step": 6553 + }, + { + "epoch": 0.11324993088194636, + "grad_norm": 0.8817111817708683, + "learning_rate": 1.963881991623427e-05, + "loss": 0.4923, + "step": 6554 + }, + { + "epoch": 0.11326721039535527, + "grad_norm": 0.948344224955393, + "learning_rate": 1.963867084985038e-05, + "loss": 0.7382, + "step": 6555 + }, + { + "epoch": 0.11328448990876416, + "grad_norm": 0.9061964341746536, + "learning_rate": 1.9638521753277438e-05, + "loss": 0.7243, + "step": 6556 + }, + { + "epoch": 0.11330176942217307, + "grad_norm": 0.7377126128666812, + "learning_rate": 1.9638372626515917e-05, + "loss": 0.5789, + "step": 6557 + }, + { + "epoch": 0.11331904893558198, + "grad_norm": 1.1180557440501255, + "learning_rate": 1.9638223469566278e-05, + "loss": 0.7649, + "step": 6558 + }, + { + "epoch": 0.11333632844899087, + "grad_norm": 1.2404203852180316, + "learning_rate": 1.963807428242899e-05, + "loss": 0.5333, + "step": 6559 + }, + { + "epoch": 0.11335360796239978, + "grad_norm": 0.7678287107918493, + "learning_rate": 1.9637925065104523e-05, + "loss": 0.6305, + "step": 6560 + }, + { + "epoch": 0.11337088747580869, + "grad_norm": 1.0888236127707422, + "learning_rate": 1.9637775817593343e-05, + "loss": 0.7283, + "step": 6561 + }, + { + "epoch": 0.11338816698921758, + "grad_norm": 1.1327400882533072, + "learning_rate": 1.9637626539895915e-05, + "loss": 0.6784, + "step": 6562 + }, + { + "epoch": 0.11340544650262649, + "grad_norm": 1.0905058820543965, + "learning_rate": 1.963747723201271e-05, + "loss": 0.695, + "step": 6563 + }, + { + "epoch": 0.11342272601603538, + "grad_norm": 1.3653906013025807, + "learning_rate": 1.9637327893944193e-05, + "loss": 0.8246, + "step": 6564 + }, + { + "epoch": 0.11344000552944429, + "grad_norm": 1.1017097429887999, + "learning_rate": 1.963717852569083e-05, + "loss": 0.6071, + "step": 6565 + }, + { + "epoch": 0.1134572850428532, + "grad_norm": 1.4128042776262435, + "learning_rate": 1.9637029127253095e-05, + "loss": 1.0522, + "step": 6566 + }, + { + "epoch": 0.11347456455626209, + "grad_norm": 0.6565340830220066, + "learning_rate": 1.963687969863145e-05, + "loss": 0.39, + "step": 6567 + }, + { + "epoch": 0.113491844069671, + "grad_norm": 0.933165908914049, + "learning_rate": 1.9636730239826368e-05, + "loss": 0.7109, + "step": 6568 + }, + { + "epoch": 0.11350912358307991, + "grad_norm": 0.9732988866565906, + "learning_rate": 1.9636580750838312e-05, + "loss": 0.6622, + "step": 6569 + }, + { + "epoch": 0.1135264030964888, + "grad_norm": 1.0148390603267434, + "learning_rate": 1.9636431231667755e-05, + "loss": 0.6024, + "step": 6570 + }, + { + "epoch": 0.11354368260989771, + "grad_norm": 1.2295930915745676, + "learning_rate": 1.963628168231516e-05, + "loss": 0.7046, + "step": 6571 + }, + { + "epoch": 0.1135609621233066, + "grad_norm": 0.9206979082049154, + "learning_rate": 1.9636132102781e-05, + "loss": 0.8131, + "step": 6572 + }, + { + "epoch": 0.11357824163671551, + "grad_norm": 0.5597012740676653, + "learning_rate": 1.9635982493065743e-05, + "loss": 0.6139, + "step": 6573 + }, + { + "epoch": 0.11359552115012442, + "grad_norm": 0.8080076300402699, + "learning_rate": 1.9635832853169857e-05, + "loss": 0.6015, + "step": 6574 + }, + { + "epoch": 0.11361280066353331, + "grad_norm": 0.7745402494559598, + "learning_rate": 1.9635683183093808e-05, + "loss": 0.7676, + "step": 6575 + }, + { + "epoch": 0.11363008017694222, + "grad_norm": 1.2905550271138722, + "learning_rate": 1.963553348283807e-05, + "loss": 0.7196, + "step": 6576 + }, + { + "epoch": 0.11364735969035111, + "grad_norm": 1.2431799641255112, + "learning_rate": 1.9635383752403106e-05, + "loss": 0.686, + "step": 6577 + }, + { + "epoch": 0.11366463920376002, + "grad_norm": 0.9455991674566288, + "learning_rate": 1.963523399178939e-05, + "loss": 0.5848, + "step": 6578 + }, + { + "epoch": 0.11368191871716893, + "grad_norm": 0.9581342661988219, + "learning_rate": 1.9635084200997385e-05, + "loss": 0.7254, + "step": 6579 + }, + { + "epoch": 0.11369919823057782, + "grad_norm": 0.8988101493502335, + "learning_rate": 1.9634934380027566e-05, + "loss": 0.615, + "step": 6580 + }, + { + "epoch": 0.11371647774398673, + "grad_norm": 0.7275697764255029, + "learning_rate": 1.9634784528880403e-05, + "loss": 0.6824, + "step": 6581 + }, + { + "epoch": 0.11373375725739564, + "grad_norm": 0.8152631099740818, + "learning_rate": 1.963463464755636e-05, + "loss": 0.6607, + "step": 6582 + }, + { + "epoch": 0.11375103677080453, + "grad_norm": 0.9850641198647954, + "learning_rate": 1.963448473605591e-05, + "loss": 0.6535, + "step": 6583 + }, + { + "epoch": 0.11376831628421344, + "grad_norm": 0.9389189133069136, + "learning_rate": 1.9634334794379518e-05, + "loss": 0.6158, + "step": 6584 + }, + { + "epoch": 0.11378559579762233, + "grad_norm": 1.0184146452359402, + "learning_rate": 1.9634184822527663e-05, + "loss": 0.7488, + "step": 6585 + }, + { + "epoch": 0.11380287531103124, + "grad_norm": 0.9037123043203275, + "learning_rate": 1.9634034820500804e-05, + "loss": 0.7232, + "step": 6586 + }, + { + "epoch": 0.11382015482444015, + "grad_norm": 0.9192866694445888, + "learning_rate": 1.963388478829942e-05, + "loss": 0.6541, + "step": 6587 + }, + { + "epoch": 0.11383743433784904, + "grad_norm": 1.0246610574880282, + "learning_rate": 1.9633734725923974e-05, + "loss": 0.7966, + "step": 6588 + }, + { + "epoch": 0.11385471385125795, + "grad_norm": 1.064591959816633, + "learning_rate": 1.963358463337494e-05, + "loss": 0.7633, + "step": 6589 + }, + { + "epoch": 0.11387199336466686, + "grad_norm": 0.7095110337443448, + "learning_rate": 1.9633434510652786e-05, + "loss": 0.6767, + "step": 6590 + }, + { + "epoch": 0.11388927287807575, + "grad_norm": 0.8811714979727904, + "learning_rate": 1.9633284357757986e-05, + "loss": 0.638, + "step": 6591 + }, + { + "epoch": 0.11390655239148466, + "grad_norm": 0.8749872271790137, + "learning_rate": 1.9633134174691e-05, + "loss": 0.4552, + "step": 6592 + }, + { + "epoch": 0.11392383190489355, + "grad_norm": 0.772387263516569, + "learning_rate": 1.9632983961452313e-05, + "loss": 0.4197, + "step": 6593 + }, + { + "epoch": 0.11394111141830246, + "grad_norm": 1.445802721881135, + "learning_rate": 1.9632833718042386e-05, + "loss": 0.7588, + "step": 6594 + }, + { + "epoch": 0.11395839093171137, + "grad_norm": 0.8814648850627202, + "learning_rate": 1.963268344446169e-05, + "loss": 0.7371, + "step": 6595 + }, + { + "epoch": 0.11397567044512026, + "grad_norm": 1.0089863786727689, + "learning_rate": 1.9632533140710698e-05, + "loss": 0.7937, + "step": 6596 + }, + { + "epoch": 0.11399294995852917, + "grad_norm": 1.3777697533982427, + "learning_rate": 1.9632382806789883e-05, + "loss": 0.9291, + "step": 6597 + }, + { + "epoch": 0.11401022947193808, + "grad_norm": 1.1630436110840345, + "learning_rate": 1.963223244269971e-05, + "loss": 0.6427, + "step": 6598 + }, + { + "epoch": 0.11402750898534697, + "grad_norm": 0.9889118263663299, + "learning_rate": 1.9632082048440654e-05, + "loss": 0.739, + "step": 6599 + }, + { + "epoch": 0.11404478849875588, + "grad_norm": 0.7369772850519628, + "learning_rate": 1.9631931624013183e-05, + "loss": 0.7495, + "step": 6600 + }, + { + "epoch": 0.11406206801216477, + "grad_norm": 0.8207011376779774, + "learning_rate": 1.9631781169417772e-05, + "loss": 0.6699, + "step": 6601 + }, + { + "epoch": 0.11407934752557368, + "grad_norm": 1.1377525428750115, + "learning_rate": 1.963163068465489e-05, + "loss": 0.6093, + "step": 6602 + }, + { + "epoch": 0.11409662703898259, + "grad_norm": 0.9861916593298063, + "learning_rate": 1.963148016972501e-05, + "loss": 0.5919, + "step": 6603 + }, + { + "epoch": 0.11411390655239148, + "grad_norm": 0.805684015680268, + "learning_rate": 1.96313296246286e-05, + "loss": 0.7419, + "step": 6604 + }, + { + "epoch": 0.11413118606580039, + "grad_norm": 1.1077037848474272, + "learning_rate": 1.9631179049366136e-05, + "loss": 0.871, + "step": 6605 + }, + { + "epoch": 0.1141484655792093, + "grad_norm": 1.0356528571382557, + "learning_rate": 1.9631028443938084e-05, + "loss": 0.7273, + "step": 6606 + }, + { + "epoch": 0.11416574509261819, + "grad_norm": 1.2998025489605352, + "learning_rate": 1.9630877808344923e-05, + "loss": 0.7928, + "step": 6607 + }, + { + "epoch": 0.1141830246060271, + "grad_norm": 0.8780985681402441, + "learning_rate": 1.963072714258712e-05, + "loss": 0.6822, + "step": 6608 + }, + { + "epoch": 0.11420030411943599, + "grad_norm": 1.5586663002660421, + "learning_rate": 1.9630576446665145e-05, + "loss": 0.7509, + "step": 6609 + }, + { + "epoch": 0.1142175836328449, + "grad_norm": 1.069835603266288, + "learning_rate": 1.9630425720579476e-05, + "loss": 0.8854, + "step": 6610 + }, + { + "epoch": 0.1142348631462538, + "grad_norm": 0.8615210637085717, + "learning_rate": 1.9630274964330578e-05, + "loss": 0.5838, + "step": 6611 + }, + { + "epoch": 0.1142521426596627, + "grad_norm": 1.0401670125029574, + "learning_rate": 1.963012417791893e-05, + "loss": 0.7892, + "step": 6612 + }, + { + "epoch": 0.11426942217307161, + "grad_norm": 1.133364130176869, + "learning_rate": 1.9629973361345e-05, + "loss": 0.6814, + "step": 6613 + }, + { + "epoch": 0.1142867016864805, + "grad_norm": 0.7690227911762865, + "learning_rate": 1.9629822514609263e-05, + "loss": 0.7444, + "step": 6614 + }, + { + "epoch": 0.11430398119988941, + "grad_norm": 0.46526133210079057, + "learning_rate": 1.962967163771219e-05, + "loss": 0.5326, + "step": 6615 + }, + { + "epoch": 0.11432126071329832, + "grad_norm": 0.7734175158499983, + "learning_rate": 1.962952073065425e-05, + "loss": 0.5361, + "step": 6616 + }, + { + "epoch": 0.11433854022670721, + "grad_norm": 1.138559587594199, + "learning_rate": 1.9629369793435922e-05, + "loss": 0.7893, + "step": 6617 + }, + { + "epoch": 0.11435581974011612, + "grad_norm": 0.8345827327427014, + "learning_rate": 1.9629218826057675e-05, + "loss": 0.6292, + "step": 6618 + }, + { + "epoch": 0.11437309925352503, + "grad_norm": 1.4569210505549532, + "learning_rate": 1.9629067828519983e-05, + "loss": 0.7111, + "step": 6619 + }, + { + "epoch": 0.11439037876693392, + "grad_norm": 1.1760207822703834, + "learning_rate": 1.962891680082332e-05, + "loss": 0.7302, + "step": 6620 + }, + { + "epoch": 0.11440765828034283, + "grad_norm": 1.1287646800960318, + "learning_rate": 1.9628765742968158e-05, + "loss": 0.7215, + "step": 6621 + }, + { + "epoch": 0.11442493779375172, + "grad_norm": 1.1656578012138357, + "learning_rate": 1.9628614654954967e-05, + "loss": 0.8818, + "step": 6622 + }, + { + "epoch": 0.11444221730716063, + "grad_norm": 1.1141929959675538, + "learning_rate": 1.9628463536784226e-05, + "loss": 0.4725, + "step": 6623 + }, + { + "epoch": 0.11445949682056954, + "grad_norm": 0.9609648109327207, + "learning_rate": 1.9628312388456404e-05, + "loss": 0.7442, + "step": 6624 + }, + { + "epoch": 0.11447677633397843, + "grad_norm": 0.899027297529696, + "learning_rate": 1.9628161209971976e-05, + "loss": 0.6347, + "step": 6625 + }, + { + "epoch": 0.11449405584738734, + "grad_norm": 0.680649357120366, + "learning_rate": 1.9628010001331417e-05, + "loss": 0.4056, + "step": 6626 + }, + { + "epoch": 0.11451133536079625, + "grad_norm": 0.9959765939009011, + "learning_rate": 1.9627858762535198e-05, + "loss": 0.8853, + "step": 6627 + }, + { + "epoch": 0.11452861487420514, + "grad_norm": 0.8834923450346222, + "learning_rate": 1.9627707493583796e-05, + "loss": 0.682, + "step": 6628 + }, + { + "epoch": 0.11454589438761405, + "grad_norm": 0.7550970685726193, + "learning_rate": 1.962755619447768e-05, + "loss": 0.5047, + "step": 6629 + }, + { + "epoch": 0.11456317390102294, + "grad_norm": 1.1928670441158367, + "learning_rate": 1.9627404865217324e-05, + "loss": 0.913, + "step": 6630 + }, + { + "epoch": 0.11458045341443185, + "grad_norm": 1.002951974507783, + "learning_rate": 1.9627253505803206e-05, + "loss": 0.7029, + "step": 6631 + }, + { + "epoch": 0.11459773292784076, + "grad_norm": 0.773553922008983, + "learning_rate": 1.96271021162358e-05, + "loss": 0.5685, + "step": 6632 + }, + { + "epoch": 0.11461501244124965, + "grad_norm": 1.1279303508313374, + "learning_rate": 1.9626950696515576e-05, + "loss": 0.822, + "step": 6633 + }, + { + "epoch": 0.11463229195465856, + "grad_norm": 0.959487385064141, + "learning_rate": 1.9626799246643015e-05, + "loss": 0.6882, + "step": 6634 + }, + { + "epoch": 0.11464957146806747, + "grad_norm": 0.9348380362480092, + "learning_rate": 1.962664776661858e-05, + "loss": 0.6359, + "step": 6635 + }, + { + "epoch": 0.11466685098147636, + "grad_norm": 0.8356076945316482, + "learning_rate": 1.962649625644276e-05, + "loss": 0.5653, + "step": 6636 + }, + { + "epoch": 0.11468413049488527, + "grad_norm": 0.5548769744796929, + "learning_rate": 1.962634471611602e-05, + "loss": 0.3133, + "step": 6637 + }, + { + "epoch": 0.11470141000829416, + "grad_norm": 1.2404663537156309, + "learning_rate": 1.962619314563883e-05, + "loss": 0.8101, + "step": 6638 + }, + { + "epoch": 0.11471868952170307, + "grad_norm": 1.027409177976251, + "learning_rate": 1.962604154501168e-05, + "loss": 0.5708, + "step": 6639 + }, + { + "epoch": 0.11473596903511198, + "grad_norm": 0.5400174237601783, + "learning_rate": 1.9625889914235035e-05, + "loss": 0.9887, + "step": 6640 + }, + { + "epoch": 0.11475324854852087, + "grad_norm": 0.9622322912651934, + "learning_rate": 1.962573825330937e-05, + "loss": 0.5628, + "step": 6641 + }, + { + "epoch": 0.11477052806192978, + "grad_norm": 0.8578930349819582, + "learning_rate": 1.962558656223516e-05, + "loss": 0.718, + "step": 6642 + }, + { + "epoch": 0.11478780757533869, + "grad_norm": 0.726634921008546, + "learning_rate": 1.9625434841012884e-05, + "loss": 0.6011, + "step": 6643 + }, + { + "epoch": 0.11480508708874758, + "grad_norm": 1.1319574791758689, + "learning_rate": 1.962528308964301e-05, + "loss": 0.7293, + "step": 6644 + }, + { + "epoch": 0.11482236660215649, + "grad_norm": 1.0744002642512367, + "learning_rate": 1.9625131308126023e-05, + "loss": 0.7894, + "step": 6645 + }, + { + "epoch": 0.11483964611556538, + "grad_norm": 1.2296030632615071, + "learning_rate": 1.9624979496462394e-05, + "loss": 0.8898, + "step": 6646 + }, + { + "epoch": 0.11485692562897429, + "grad_norm": 0.7481121033057141, + "learning_rate": 1.9624827654652594e-05, + "loss": 0.6297, + "step": 6647 + }, + { + "epoch": 0.1148742051423832, + "grad_norm": 1.0513175559316632, + "learning_rate": 1.9624675782697102e-05, + "loss": 0.7018, + "step": 6648 + }, + { + "epoch": 0.11489148465579209, + "grad_norm": 1.4379509964889563, + "learning_rate": 1.9624523880596395e-05, + "loss": 0.619, + "step": 6649 + }, + { + "epoch": 0.114908764169201, + "grad_norm": 1.2337699898825962, + "learning_rate": 1.9624371948350948e-05, + "loss": 0.6249, + "step": 6650 + }, + { + "epoch": 0.11492604368260989, + "grad_norm": 0.7364652180309239, + "learning_rate": 1.9624219985961238e-05, + "loss": 0.6212, + "step": 6651 + }, + { + "epoch": 0.1149433231960188, + "grad_norm": 0.8275790670041177, + "learning_rate": 1.962406799342774e-05, + "loss": 0.7189, + "step": 6652 + }, + { + "epoch": 0.1149606027094277, + "grad_norm": 1.3661112632870662, + "learning_rate": 1.9623915970750926e-05, + "loss": 0.8898, + "step": 6653 + }, + { + "epoch": 0.1149778822228366, + "grad_norm": 0.8103159422859846, + "learning_rate": 1.9623763917931277e-05, + "loss": 0.6172, + "step": 6654 + }, + { + "epoch": 0.11499516173624551, + "grad_norm": 0.9533956731566257, + "learning_rate": 1.9623611834969273e-05, + "loss": 0.5653, + "step": 6655 + }, + { + "epoch": 0.11501244124965442, + "grad_norm": 1.0349676029258186, + "learning_rate": 1.9623459721865378e-05, + "loss": 0.6287, + "step": 6656 + }, + { + "epoch": 0.11502972076306331, + "grad_norm": 0.8620813841183704, + "learning_rate": 1.962330757862008e-05, + "loss": 0.6364, + "step": 6657 + }, + { + "epoch": 0.11504700027647222, + "grad_norm": 1.106552239879887, + "learning_rate": 1.9623155405233853e-05, + "loss": 0.5903, + "step": 6658 + }, + { + "epoch": 0.11506427978988111, + "grad_norm": 0.8591437347849674, + "learning_rate": 1.9623003201707168e-05, + "loss": 0.6155, + "step": 6659 + }, + { + "epoch": 0.11508155930329002, + "grad_norm": 0.9180639488850617, + "learning_rate": 1.9622850968040506e-05, + "loss": 0.6664, + "step": 6660 + }, + { + "epoch": 0.11509883881669893, + "grad_norm": 1.2839119107737698, + "learning_rate": 1.9622698704234345e-05, + "loss": 0.6786, + "step": 6661 + }, + { + "epoch": 0.11511611833010782, + "grad_norm": 0.797999842450619, + "learning_rate": 1.962254641028916e-05, + "loss": 0.6335, + "step": 6662 + }, + { + "epoch": 0.11513339784351673, + "grad_norm": 1.043292036962474, + "learning_rate": 1.9622394086205427e-05, + "loss": 0.7788, + "step": 6663 + }, + { + "epoch": 0.11515067735692563, + "grad_norm": 0.648119645574848, + "learning_rate": 1.9622241731983625e-05, + "loss": 0.371, + "step": 6664 + }, + { + "epoch": 0.11516795687033453, + "grad_norm": 1.0676779038144835, + "learning_rate": 1.9622089347624232e-05, + "loss": 0.6882, + "step": 6665 + }, + { + "epoch": 0.11518523638374344, + "grad_norm": 1.0450781764919168, + "learning_rate": 1.9621936933127722e-05, + "loss": 0.7875, + "step": 6666 + }, + { + "epoch": 0.11520251589715233, + "grad_norm": 0.8422512409983273, + "learning_rate": 1.9621784488494575e-05, + "loss": 0.6354, + "step": 6667 + }, + { + "epoch": 0.11521979541056124, + "grad_norm": 1.0671749038325857, + "learning_rate": 1.9621632013725267e-05, + "loss": 0.6981, + "step": 6668 + }, + { + "epoch": 0.11523707492397015, + "grad_norm": 0.9765215460056001, + "learning_rate": 1.962147950882028e-05, + "loss": 0.6027, + "step": 6669 + }, + { + "epoch": 0.11525435443737904, + "grad_norm": 0.893937918266038, + "learning_rate": 1.9621326973780083e-05, + "loss": 0.6768, + "step": 6670 + }, + { + "epoch": 0.11527163395078795, + "grad_norm": 0.7548900737009065, + "learning_rate": 1.9621174408605163e-05, + "loss": 0.8568, + "step": 6671 + }, + { + "epoch": 0.11528891346419685, + "grad_norm": 1.1366746492960016, + "learning_rate": 1.9621021813295992e-05, + "loss": 0.7522, + "step": 6672 + }, + { + "epoch": 0.11530619297760575, + "grad_norm": 2.259706632990237, + "learning_rate": 1.962086918785305e-05, + "loss": 0.6573, + "step": 6673 + }, + { + "epoch": 0.11532347249101466, + "grad_norm": 1.111025349923588, + "learning_rate": 1.9620716532276812e-05, + "loss": 0.6462, + "step": 6674 + }, + { + "epoch": 0.11534075200442355, + "grad_norm": 1.2494251720681344, + "learning_rate": 1.962056384656776e-05, + "loss": 0.7696, + "step": 6675 + }, + { + "epoch": 0.11535803151783246, + "grad_norm": 0.9685465897932387, + "learning_rate": 1.9620411130726372e-05, + "loss": 0.7239, + "step": 6676 + }, + { + "epoch": 0.11537531103124136, + "grad_norm": 0.9914754892699236, + "learning_rate": 1.9620258384753126e-05, + "loss": 0.8152, + "step": 6677 + }, + { + "epoch": 0.11539259054465026, + "grad_norm": 0.8414311915564381, + "learning_rate": 1.9620105608648496e-05, + "loss": 0.7444, + "step": 6678 + }, + { + "epoch": 0.11540987005805917, + "grad_norm": 1.144495237467782, + "learning_rate": 1.9619952802412967e-05, + "loss": 0.7601, + "step": 6679 + }, + { + "epoch": 0.11542714957146807, + "grad_norm": 1.0783903420750798, + "learning_rate": 1.9619799966047014e-05, + "loss": 0.5577, + "step": 6680 + }, + { + "epoch": 0.11544442908487697, + "grad_norm": 0.8648449463443867, + "learning_rate": 1.9619647099551118e-05, + "loss": 0.7608, + "step": 6681 + }, + { + "epoch": 0.11546170859828588, + "grad_norm": 0.9314989026689884, + "learning_rate": 1.9619494202925754e-05, + "loss": 0.6794, + "step": 6682 + }, + { + "epoch": 0.11547898811169477, + "grad_norm": 0.871150624058673, + "learning_rate": 1.9619341276171405e-05, + "loss": 0.6224, + "step": 6683 + }, + { + "epoch": 0.11549626762510368, + "grad_norm": 1.148525864680335, + "learning_rate": 1.9619188319288545e-05, + "loss": 0.7666, + "step": 6684 + }, + { + "epoch": 0.11551354713851258, + "grad_norm": 0.9228094422705019, + "learning_rate": 1.9619035332277658e-05, + "loss": 0.6902, + "step": 6685 + }, + { + "epoch": 0.11553082665192148, + "grad_norm": 0.7932679723757716, + "learning_rate": 1.961888231513922e-05, + "loss": 0.669, + "step": 6686 + }, + { + "epoch": 0.11554810616533039, + "grad_norm": 0.839625498524712, + "learning_rate": 1.9618729267873714e-05, + "loss": 0.8736, + "step": 6687 + }, + { + "epoch": 0.1155653856787393, + "grad_norm": 0.873684041738111, + "learning_rate": 1.9618576190481616e-05, + "loss": 0.8341, + "step": 6688 + }, + { + "epoch": 0.11558266519214819, + "grad_norm": 1.16079168890373, + "learning_rate": 1.9618423082963406e-05, + "loss": 0.6335, + "step": 6689 + }, + { + "epoch": 0.1155999447055571, + "grad_norm": 0.9960825314141697, + "learning_rate": 1.9618269945319564e-05, + "loss": 0.684, + "step": 6690 + }, + { + "epoch": 0.11561722421896599, + "grad_norm": 0.9706524876227185, + "learning_rate": 1.9618116777550572e-05, + "loss": 0.5506, + "step": 6691 + }, + { + "epoch": 0.1156345037323749, + "grad_norm": 0.8272958097073455, + "learning_rate": 1.9617963579656904e-05, + "loss": 0.649, + "step": 6692 + }, + { + "epoch": 0.1156517832457838, + "grad_norm": 0.7578444870461148, + "learning_rate": 1.961781035163904e-05, + "loss": 0.6126, + "step": 6693 + }, + { + "epoch": 0.1156690627591927, + "grad_norm": 1.0981895085218156, + "learning_rate": 1.961765709349747e-05, + "loss": 0.7288, + "step": 6694 + }, + { + "epoch": 0.1156863422726016, + "grad_norm": 0.5474347484123054, + "learning_rate": 1.9617503805232665e-05, + "loss": 0.698, + "step": 6695 + }, + { + "epoch": 0.1157036217860105, + "grad_norm": 0.860762315911703, + "learning_rate": 1.9617350486845105e-05, + "loss": 0.5795, + "step": 6696 + }, + { + "epoch": 0.11572090129941941, + "grad_norm": 0.7562433534888813, + "learning_rate": 1.9617197138335274e-05, + "loss": 0.8922, + "step": 6697 + }, + { + "epoch": 0.11573818081282831, + "grad_norm": 0.6904570809500601, + "learning_rate": 1.961704375970365e-05, + "loss": 0.6217, + "step": 6698 + }, + { + "epoch": 0.11575546032623721, + "grad_norm": 0.7591737922616816, + "learning_rate": 1.961689035095072e-05, + "loss": 0.6748, + "step": 6699 + }, + { + "epoch": 0.11577273983964612, + "grad_norm": 0.868428443592686, + "learning_rate": 1.961673691207695e-05, + "loss": 0.6689, + "step": 6700 + }, + { + "epoch": 0.11579001935305502, + "grad_norm": 0.8975763750397928, + "learning_rate": 1.9616583443082834e-05, + "loss": 0.6177, + "step": 6701 + }, + { + "epoch": 0.11580729886646392, + "grad_norm": 0.92507986306015, + "learning_rate": 1.9616429943968845e-05, + "loss": 0.6903, + "step": 6702 + }, + { + "epoch": 0.11582457837987283, + "grad_norm": 0.9889855799835651, + "learning_rate": 1.9616276414735468e-05, + "loss": 0.4986, + "step": 6703 + }, + { + "epoch": 0.11584185789328172, + "grad_norm": 0.8518276463431018, + "learning_rate": 1.961612285538318e-05, + "loss": 0.7011, + "step": 6704 + }, + { + "epoch": 0.11585913740669063, + "grad_norm": 0.997441288467005, + "learning_rate": 1.9615969265912467e-05, + "loss": 0.9892, + "step": 6705 + }, + { + "epoch": 0.11587641692009953, + "grad_norm": 0.9599550754222141, + "learning_rate": 1.961581564632381e-05, + "loss": 0.8435, + "step": 6706 + }, + { + "epoch": 0.11589369643350843, + "grad_norm": 0.934959741479594, + "learning_rate": 1.961566199661768e-05, + "loss": 0.6853, + "step": 6707 + }, + { + "epoch": 0.11591097594691734, + "grad_norm": 0.9697073561671447, + "learning_rate": 1.9615508316794575e-05, + "loss": 0.7288, + "step": 6708 + }, + { + "epoch": 0.11592825546032624, + "grad_norm": 0.9853567317885755, + "learning_rate": 1.9615354606854962e-05, + "loss": 0.6614, + "step": 6709 + }, + { + "epoch": 0.11594553497373514, + "grad_norm": 0.8668384666677722, + "learning_rate": 1.9615200866799327e-05, + "loss": 0.7589, + "step": 6710 + }, + { + "epoch": 0.11596281448714404, + "grad_norm": 0.819675436251484, + "learning_rate": 1.9615047096628156e-05, + "loss": 0.6729, + "step": 6711 + }, + { + "epoch": 0.11598009400055294, + "grad_norm": 1.3702092176636393, + "learning_rate": 1.9614893296341922e-05, + "loss": 0.7687, + "step": 6712 + }, + { + "epoch": 0.11599737351396185, + "grad_norm": 0.8513826346905043, + "learning_rate": 1.9614739465941115e-05, + "loss": 0.6543, + "step": 6713 + }, + { + "epoch": 0.11601465302737075, + "grad_norm": 1.0155995590768323, + "learning_rate": 1.961458560542621e-05, + "loss": 0.968, + "step": 6714 + }, + { + "epoch": 0.11603193254077965, + "grad_norm": 0.9155760741382789, + "learning_rate": 1.9614431714797695e-05, + "loss": 0.5719, + "step": 6715 + }, + { + "epoch": 0.11604921205418856, + "grad_norm": 0.7107121043238014, + "learning_rate": 1.9614277794056047e-05, + "loss": 0.5975, + "step": 6716 + }, + { + "epoch": 0.11606649156759746, + "grad_norm": 0.8946038432299838, + "learning_rate": 1.9614123843201755e-05, + "loss": 0.6149, + "step": 6717 + }, + { + "epoch": 0.11608377108100636, + "grad_norm": 0.9974861834586154, + "learning_rate": 1.9613969862235292e-05, + "loss": 0.6659, + "step": 6718 + }, + { + "epoch": 0.11610105059441526, + "grad_norm": 0.739087063781547, + "learning_rate": 1.9613815851157147e-05, + "loss": 0.5751, + "step": 6719 + }, + { + "epoch": 0.11611833010782416, + "grad_norm": 0.9564341713717514, + "learning_rate": 1.96136618099678e-05, + "loss": 0.7848, + "step": 6720 + }, + { + "epoch": 0.11613560962123307, + "grad_norm": 1.048781384053111, + "learning_rate": 1.961350773866773e-05, + "loss": 0.855, + "step": 6721 + }, + { + "epoch": 0.11615288913464197, + "grad_norm": 1.0780703013507664, + "learning_rate": 1.9613353637257427e-05, + "loss": 0.6818, + "step": 6722 + }, + { + "epoch": 0.11617016864805087, + "grad_norm": 0.8499536468959799, + "learning_rate": 1.9613199505737367e-05, + "loss": 0.7911, + "step": 6723 + }, + { + "epoch": 0.11618744816145977, + "grad_norm": 1.098287720926257, + "learning_rate": 1.961304534410804e-05, + "loss": 0.6496, + "step": 6724 + }, + { + "epoch": 0.11620472767486868, + "grad_norm": 0.5951441339759426, + "learning_rate": 1.961289115236992e-05, + "loss": 0.9543, + "step": 6725 + }, + { + "epoch": 0.11622200718827758, + "grad_norm": 1.130038212676099, + "learning_rate": 1.96127369305235e-05, + "loss": 0.7096, + "step": 6726 + }, + { + "epoch": 0.11623928670168648, + "grad_norm": 0.49221898147268045, + "learning_rate": 1.961258267856925e-05, + "loss": 0.7683, + "step": 6727 + }, + { + "epoch": 0.11625656621509538, + "grad_norm": 0.8985262408274309, + "learning_rate": 1.9612428396507665e-05, + "loss": 0.6061, + "step": 6728 + }, + { + "epoch": 0.11627384572850429, + "grad_norm": 1.2052730047744507, + "learning_rate": 1.961227408433922e-05, + "loss": 0.679, + "step": 6729 + }, + { + "epoch": 0.11629112524191319, + "grad_norm": 0.9889563839325355, + "learning_rate": 1.9612119742064406e-05, + "loss": 0.7432, + "step": 6730 + }, + { + "epoch": 0.11630840475532209, + "grad_norm": 0.8827447506595218, + "learning_rate": 1.96119653696837e-05, + "loss": 0.7898, + "step": 6731 + }, + { + "epoch": 0.116325684268731, + "grad_norm": 1.223002373761608, + "learning_rate": 1.961181096719759e-05, + "loss": 0.5997, + "step": 6732 + }, + { + "epoch": 0.11634296378213989, + "grad_norm": 0.7780549644055805, + "learning_rate": 1.961165653460656e-05, + "loss": 0.4896, + "step": 6733 + }, + { + "epoch": 0.1163602432955488, + "grad_norm": 0.9998783321597383, + "learning_rate": 1.9611502071911087e-05, + "loss": 0.6745, + "step": 6734 + }, + { + "epoch": 0.1163775228089577, + "grad_norm": 0.829295049535748, + "learning_rate": 1.961134757911166e-05, + "loss": 0.9459, + "step": 6735 + }, + { + "epoch": 0.1163948023223666, + "grad_norm": 0.8522471834138725, + "learning_rate": 1.961119305620876e-05, + "loss": 0.7664, + "step": 6736 + }, + { + "epoch": 0.1164120818357755, + "grad_norm": 1.319541710317936, + "learning_rate": 1.9611038503202877e-05, + "loss": 0.8183, + "step": 6737 + }, + { + "epoch": 0.11642936134918441, + "grad_norm": 0.9894237944075922, + "learning_rate": 1.9610883920094488e-05, + "loss": 0.8311, + "step": 6738 + }, + { + "epoch": 0.1164466408625933, + "grad_norm": 0.9607266405063851, + "learning_rate": 1.961072930688408e-05, + "loss": 0.7282, + "step": 6739 + }, + { + "epoch": 0.11646392037600221, + "grad_norm": 1.084402562355748, + "learning_rate": 1.961057466357214e-05, + "loss": 0.7186, + "step": 6740 + }, + { + "epoch": 0.11648119988941111, + "grad_norm": 0.7354309029168751, + "learning_rate": 1.961041999015915e-05, + "loss": 0.4887, + "step": 6741 + }, + { + "epoch": 0.11649847940282002, + "grad_norm": 1.1037587862923783, + "learning_rate": 1.961026528664559e-05, + "loss": 0.8418, + "step": 6742 + }, + { + "epoch": 0.11651575891622892, + "grad_norm": 0.7574042364884013, + "learning_rate": 1.9610110553031953e-05, + "loss": 0.7067, + "step": 6743 + }, + { + "epoch": 0.11653303842963782, + "grad_norm": 1.0690535817270614, + "learning_rate": 1.9609955789318718e-05, + "loss": 0.5927, + "step": 6744 + }, + { + "epoch": 0.11655031794304672, + "grad_norm": 0.9974585575009653, + "learning_rate": 1.960980099550637e-05, + "loss": 0.7333, + "step": 6745 + }, + { + "epoch": 0.11656759745645563, + "grad_norm": 0.8956184331625007, + "learning_rate": 1.9609646171595396e-05, + "loss": 0.6966, + "step": 6746 + }, + { + "epoch": 0.11658487696986453, + "grad_norm": 0.9818998265849223, + "learning_rate": 1.960949131758628e-05, + "loss": 0.8306, + "step": 6747 + }, + { + "epoch": 0.11660215648327343, + "grad_norm": 0.7996210371801814, + "learning_rate": 1.9609336433479506e-05, + "loss": 0.6805, + "step": 6748 + }, + { + "epoch": 0.11661943599668233, + "grad_norm": 1.0918082259707924, + "learning_rate": 1.960918151927556e-05, + "loss": 0.801, + "step": 6749 + }, + { + "epoch": 0.11663671551009124, + "grad_norm": 1.1686261855660816, + "learning_rate": 1.960902657497493e-05, + "loss": 0.5524, + "step": 6750 + }, + { + "epoch": 0.11665399502350014, + "grad_norm": 0.939944996548622, + "learning_rate": 1.9608871600578095e-05, + "loss": 0.6154, + "step": 6751 + }, + { + "epoch": 0.11667127453690904, + "grad_norm": 1.0284259606945512, + "learning_rate": 1.9608716596085547e-05, + "loss": 0.9377, + "step": 6752 + }, + { + "epoch": 0.11668855405031794, + "grad_norm": 1.0821932875901912, + "learning_rate": 1.9608561561497765e-05, + "loss": 0.7226, + "step": 6753 + }, + { + "epoch": 0.11670583356372685, + "grad_norm": 0.9469990822429694, + "learning_rate": 1.960840649681524e-05, + "loss": 0.7203, + "step": 6754 + }, + { + "epoch": 0.11672311307713575, + "grad_norm": 0.8590132208307584, + "learning_rate": 1.9608251402038455e-05, + "loss": 0.5289, + "step": 6755 + }, + { + "epoch": 0.11674039259054465, + "grad_norm": 0.7268184553566159, + "learning_rate": 1.9608096277167895e-05, + "loss": 0.6656, + "step": 6756 + }, + { + "epoch": 0.11675767210395355, + "grad_norm": 1.0919158877681097, + "learning_rate": 1.960794112220405e-05, + "loss": 0.8429, + "step": 6757 + }, + { + "epoch": 0.11677495161736245, + "grad_norm": 0.9202490766098345, + "learning_rate": 1.9607785937147405e-05, + "loss": 0.7099, + "step": 6758 + }, + { + "epoch": 0.11679223113077136, + "grad_norm": 0.9613986599165378, + "learning_rate": 1.960763072199844e-05, + "loss": 0.7384, + "step": 6759 + }, + { + "epoch": 0.11680951064418026, + "grad_norm": 0.7239194354111206, + "learning_rate": 1.9607475476757648e-05, + "loss": 0.5597, + "step": 6760 + }, + { + "epoch": 0.11682679015758916, + "grad_norm": 0.850874861940738, + "learning_rate": 1.960732020142551e-05, + "loss": 0.5989, + "step": 6761 + }, + { + "epoch": 0.11684406967099807, + "grad_norm": 1.2186224861140569, + "learning_rate": 1.9607164896002516e-05, + "loss": 0.7506, + "step": 6762 + }, + { + "epoch": 0.11686134918440697, + "grad_norm": 0.8190105858116917, + "learning_rate": 1.9607009560489153e-05, + "loss": 0.7118, + "step": 6763 + }, + { + "epoch": 0.11687862869781587, + "grad_norm": 1.1289871879078937, + "learning_rate": 1.9606854194885904e-05, + "loss": 0.8322, + "step": 6764 + }, + { + "epoch": 0.11689590821122477, + "grad_norm": 1.2575721538610645, + "learning_rate": 1.960669879919326e-05, + "loss": 0.6761, + "step": 6765 + }, + { + "epoch": 0.11691318772463367, + "grad_norm": 0.9909950106526839, + "learning_rate": 1.96065433734117e-05, + "loss": 0.4878, + "step": 6766 + }, + { + "epoch": 0.11693046723804258, + "grad_norm": 0.5675212387013153, + "learning_rate": 1.9606387917541725e-05, + "loss": 0.7823, + "step": 6767 + }, + { + "epoch": 0.11694774675145148, + "grad_norm": 0.6393813809109158, + "learning_rate": 1.9606232431583808e-05, + "loss": 0.6368, + "step": 6768 + }, + { + "epoch": 0.11696502626486038, + "grad_norm": 0.917058114326013, + "learning_rate": 1.960607691553844e-05, + "loss": 0.7079, + "step": 6769 + }, + { + "epoch": 0.11698230577826928, + "grad_norm": 1.1855967497729822, + "learning_rate": 1.960592136940611e-05, + "loss": 0.7625, + "step": 6770 + }, + { + "epoch": 0.11699958529167818, + "grad_norm": 0.9585575914580752, + "learning_rate": 1.9605765793187302e-05, + "loss": 0.6695, + "step": 6771 + }, + { + "epoch": 0.11701686480508709, + "grad_norm": 0.9426558157103213, + "learning_rate": 1.960561018688251e-05, + "loss": 0.6362, + "step": 6772 + }, + { + "epoch": 0.11703414431849599, + "grad_norm": 1.214106935922389, + "learning_rate": 1.9605454550492213e-05, + "loss": 0.6169, + "step": 6773 + }, + { + "epoch": 0.1170514238319049, + "grad_norm": 1.1026972767817995, + "learning_rate": 1.9605298884016904e-05, + "loss": 0.6251, + "step": 6774 + }, + { + "epoch": 0.1170687033453138, + "grad_norm": 0.9412803623227811, + "learning_rate": 1.960514318745707e-05, + "loss": 0.5502, + "step": 6775 + }, + { + "epoch": 0.1170859828587227, + "grad_norm": 0.7993043280652081, + "learning_rate": 1.9604987460813195e-05, + "loss": 0.7473, + "step": 6776 + }, + { + "epoch": 0.1171032623721316, + "grad_norm": 0.691240248103297, + "learning_rate": 1.960483170408577e-05, + "loss": 0.6011, + "step": 6777 + }, + { + "epoch": 0.1171205418855405, + "grad_norm": 0.5345482865375815, + "learning_rate": 1.9604675917275283e-05, + "loss": 0.8051, + "step": 6778 + }, + { + "epoch": 0.1171378213989494, + "grad_norm": 0.97357877878733, + "learning_rate": 1.960452010038222e-05, + "loss": 0.7019, + "step": 6779 + }, + { + "epoch": 0.11715510091235831, + "grad_norm": 1.077681080916985, + "learning_rate": 1.9604364253407074e-05, + "loss": 0.7862, + "step": 6780 + }, + { + "epoch": 0.1171723804257672, + "grad_norm": 1.6735765546108912, + "learning_rate": 1.9604208376350328e-05, + "loss": 0.858, + "step": 6781 + }, + { + "epoch": 0.11718965993917611, + "grad_norm": 1.1005564178560514, + "learning_rate": 1.9604052469212468e-05, + "loss": 0.7212, + "step": 6782 + }, + { + "epoch": 0.11720693945258502, + "grad_norm": 0.7732896185018266, + "learning_rate": 1.9603896531993987e-05, + "loss": 0.666, + "step": 6783 + }, + { + "epoch": 0.11722421896599391, + "grad_norm": 0.9775478343362425, + "learning_rate": 1.960374056469537e-05, + "loss": 0.5352, + "step": 6784 + }, + { + "epoch": 0.11724149847940282, + "grad_norm": 0.4862994927750952, + "learning_rate": 1.9603584567317114e-05, + "loss": 0.723, + "step": 6785 + }, + { + "epoch": 0.11725877799281172, + "grad_norm": 1.0367351387065007, + "learning_rate": 1.9603428539859697e-05, + "loss": 0.7111, + "step": 6786 + }, + { + "epoch": 0.11727605750622062, + "grad_norm": 1.3005946008198033, + "learning_rate": 1.9603272482323616e-05, + "loss": 0.8323, + "step": 6787 + }, + { + "epoch": 0.11729333701962953, + "grad_norm": 0.6574191624712407, + "learning_rate": 1.960311639470935e-05, + "loss": 0.6142, + "step": 6788 + }, + { + "epoch": 0.11731061653303843, + "grad_norm": 1.3439421212418892, + "learning_rate": 1.9602960277017395e-05, + "loss": 0.632, + "step": 6789 + }, + { + "epoch": 0.11732789604644733, + "grad_norm": 1.0147048398815581, + "learning_rate": 1.960280412924824e-05, + "loss": 0.5911, + "step": 6790 + }, + { + "epoch": 0.11734517555985624, + "grad_norm": 0.8918516798190769, + "learning_rate": 1.9602647951402375e-05, + "loss": 0.8186, + "step": 6791 + }, + { + "epoch": 0.11736245507326513, + "grad_norm": 0.9767305758035532, + "learning_rate": 1.9602491743480282e-05, + "loss": 0.6557, + "step": 6792 + }, + { + "epoch": 0.11737973458667404, + "grad_norm": 0.9880603230324226, + "learning_rate": 1.960233550548246e-05, + "loss": 0.6707, + "step": 6793 + }, + { + "epoch": 0.11739701410008294, + "grad_norm": 0.7760477598957913, + "learning_rate": 1.960217923740939e-05, + "loss": 0.7351, + "step": 6794 + }, + { + "epoch": 0.11741429361349184, + "grad_norm": 1.0274455679119072, + "learning_rate": 1.9602022939261564e-05, + "loss": 0.6339, + "step": 6795 + }, + { + "epoch": 0.11743157312690075, + "grad_norm": 1.0652585954070712, + "learning_rate": 1.9601866611039475e-05, + "loss": 0.7135, + "step": 6796 + }, + { + "epoch": 0.11744885264030965, + "grad_norm": 0.8398492094930129, + "learning_rate": 1.960171025274361e-05, + "loss": 0.6127, + "step": 6797 + }, + { + "epoch": 0.11746613215371855, + "grad_norm": 1.3423825798994737, + "learning_rate": 1.9601553864374456e-05, + "loss": 0.5687, + "step": 6798 + }, + { + "epoch": 0.11748341166712746, + "grad_norm": 1.0640651167557993, + "learning_rate": 1.960139744593251e-05, + "loss": 0.8914, + "step": 6799 + }, + { + "epoch": 0.11750069118053635, + "grad_norm": 1.1407596321296232, + "learning_rate": 1.9601240997418254e-05, + "loss": 0.5206, + "step": 6800 + }, + { + "epoch": 0.11751797069394526, + "grad_norm": 1.4844167393183811, + "learning_rate": 1.9601084518832184e-05, + "loss": 0.818, + "step": 6801 + }, + { + "epoch": 0.11753525020735416, + "grad_norm": 1.1879796727101761, + "learning_rate": 1.9600928010174787e-05, + "loss": 0.7401, + "step": 6802 + }, + { + "epoch": 0.11755252972076306, + "grad_norm": 1.2504754435528334, + "learning_rate": 1.9600771471446552e-05, + "loss": 0.6979, + "step": 6803 + }, + { + "epoch": 0.11756980923417197, + "grad_norm": 1.0311310851064424, + "learning_rate": 1.9600614902647973e-05, + "loss": 0.6428, + "step": 6804 + }, + { + "epoch": 0.11758708874758086, + "grad_norm": 0.9340140713543871, + "learning_rate": 1.960045830377954e-05, + "loss": 0.7439, + "step": 6805 + }, + { + "epoch": 0.11760436826098977, + "grad_norm": 0.44378721406239097, + "learning_rate": 1.9600301674841736e-05, + "loss": 0.5282, + "step": 6806 + }, + { + "epoch": 0.11762164777439867, + "grad_norm": 1.1075485408732304, + "learning_rate": 1.9600145015835063e-05, + "loss": 0.5679, + "step": 6807 + }, + { + "epoch": 0.11763892728780757, + "grad_norm": 0.7375857682283787, + "learning_rate": 1.9599988326760004e-05, + "loss": 0.6008, + "step": 6808 + }, + { + "epoch": 0.11765620680121648, + "grad_norm": 1.0930296214702757, + "learning_rate": 1.959983160761705e-05, + "loss": 0.8541, + "step": 6809 + }, + { + "epoch": 0.11767348631462538, + "grad_norm": 1.0464355947491502, + "learning_rate": 1.95996748584067e-05, + "loss": 0.6725, + "step": 6810 + }, + { + "epoch": 0.11769076582803428, + "grad_norm": 1.0585415191169325, + "learning_rate": 1.9599518079129432e-05, + "loss": 1.0071, + "step": 6811 + }, + { + "epoch": 0.11770804534144319, + "grad_norm": 0.8550920192361782, + "learning_rate": 1.9599361269785747e-05, + "loss": 0.7596, + "step": 6812 + }, + { + "epoch": 0.11772532485485208, + "grad_norm": 1.110337002076908, + "learning_rate": 1.9599204430376134e-05, + "loss": 0.6791, + "step": 6813 + }, + { + "epoch": 0.11774260436826099, + "grad_norm": 1.0712917629022538, + "learning_rate": 1.9599047560901083e-05, + "loss": 0.4563, + "step": 6814 + }, + { + "epoch": 0.11775988388166989, + "grad_norm": 1.0004861186004217, + "learning_rate": 1.9598890661361085e-05, + "loss": 0.4073, + "step": 6815 + }, + { + "epoch": 0.1177771633950788, + "grad_norm": 1.0396414631018172, + "learning_rate": 1.9598733731756628e-05, + "loss": 0.8363, + "step": 6816 + }, + { + "epoch": 0.1177944429084877, + "grad_norm": 1.0342394568557487, + "learning_rate": 1.9598576772088214e-05, + "loss": 0.6355, + "step": 6817 + }, + { + "epoch": 0.1178117224218966, + "grad_norm": 0.6925181918913792, + "learning_rate": 1.9598419782356328e-05, + "loss": 0.6693, + "step": 6818 + }, + { + "epoch": 0.1178290019353055, + "grad_norm": 1.1067420272174302, + "learning_rate": 1.9598262762561456e-05, + "loss": 0.8114, + "step": 6819 + }, + { + "epoch": 0.11784628144871441, + "grad_norm": 0.4579673902719551, + "learning_rate": 1.9598105712704098e-05, + "loss": 0.5899, + "step": 6820 + }, + { + "epoch": 0.1178635609621233, + "grad_norm": 1.0239954346360347, + "learning_rate": 1.9597948632784744e-05, + "loss": 0.8415, + "step": 6821 + }, + { + "epoch": 0.11788084047553221, + "grad_norm": 0.8809583893943779, + "learning_rate": 1.9597791522803886e-05, + "loss": 0.5407, + "step": 6822 + }, + { + "epoch": 0.1178981199889411, + "grad_norm": 1.012681551064543, + "learning_rate": 1.9597634382762015e-05, + "loss": 0.7822, + "step": 6823 + }, + { + "epoch": 0.11791539950235001, + "grad_norm": 0.9326878532551174, + "learning_rate": 1.9597477212659623e-05, + "loss": 0.6025, + "step": 6824 + }, + { + "epoch": 0.11793267901575892, + "grad_norm": 1.344339663720683, + "learning_rate": 1.9597320012497206e-05, + "loss": 0.7828, + "step": 6825 + }, + { + "epoch": 0.11794995852916781, + "grad_norm": 0.9474380499665921, + "learning_rate": 1.959716278227525e-05, + "loss": 0.7698, + "step": 6826 + }, + { + "epoch": 0.11796723804257672, + "grad_norm": 1.3369855818322947, + "learning_rate": 1.959700552199425e-05, + "loss": 0.8098, + "step": 6827 + }, + { + "epoch": 0.11798451755598563, + "grad_norm": 0.7472169856891678, + "learning_rate": 1.9596848231654705e-05, + "loss": 0.5679, + "step": 6828 + }, + { + "epoch": 0.11800179706939452, + "grad_norm": 0.7763079754161868, + "learning_rate": 1.9596690911257097e-05, + "loss": 0.766, + "step": 6829 + }, + { + "epoch": 0.11801907658280343, + "grad_norm": 1.047691992453156, + "learning_rate": 1.9596533560801925e-05, + "loss": 0.7535, + "step": 6830 + }, + { + "epoch": 0.11803635609621232, + "grad_norm": 0.5392917589595256, + "learning_rate": 1.959637618028968e-05, + "loss": 0.7899, + "step": 6831 + }, + { + "epoch": 0.11805363560962123, + "grad_norm": 1.158259392547514, + "learning_rate": 1.9596218769720855e-05, + "loss": 0.9657, + "step": 6832 + }, + { + "epoch": 0.11807091512303014, + "grad_norm": 1.0574132070478204, + "learning_rate": 1.9596061329095946e-05, + "loss": 0.667, + "step": 6833 + }, + { + "epoch": 0.11808819463643903, + "grad_norm": 0.7683793050341317, + "learning_rate": 1.9595903858415445e-05, + "loss": 0.6904, + "step": 6834 + }, + { + "epoch": 0.11810547414984794, + "grad_norm": 0.83048237950079, + "learning_rate": 1.959574635767984e-05, + "loss": 0.6146, + "step": 6835 + }, + { + "epoch": 0.11812275366325685, + "grad_norm": 0.3948713752222298, + "learning_rate": 1.959558882688963e-05, + "loss": 0.4918, + "step": 6836 + }, + { + "epoch": 0.11814003317666574, + "grad_norm": 0.7385478566437917, + "learning_rate": 1.9595431266045307e-05, + "loss": 0.4652, + "step": 6837 + }, + { + "epoch": 0.11815731269007465, + "grad_norm": 1.193955587904695, + "learning_rate": 1.959527367514736e-05, + "loss": 0.5924, + "step": 6838 + }, + { + "epoch": 0.11817459220348354, + "grad_norm": 1.109766778656391, + "learning_rate": 1.959511605419629e-05, + "loss": 0.7975, + "step": 6839 + }, + { + "epoch": 0.11819187171689245, + "grad_norm": 0.7606915956104178, + "learning_rate": 1.959495840319259e-05, + "loss": 0.6721, + "step": 6840 + }, + { + "epoch": 0.11820915123030136, + "grad_norm": 1.4111792651853465, + "learning_rate": 1.9594800722136744e-05, + "loss": 0.8204, + "step": 6841 + }, + { + "epoch": 0.11822643074371025, + "grad_norm": 1.1771323912237515, + "learning_rate": 1.9594643011029258e-05, + "loss": 0.8752, + "step": 6842 + }, + { + "epoch": 0.11824371025711916, + "grad_norm": 1.1462318090176729, + "learning_rate": 1.959448526987062e-05, + "loss": 0.8192, + "step": 6843 + }, + { + "epoch": 0.11826098977052807, + "grad_norm": 0.8608737109563215, + "learning_rate": 1.9594327498661324e-05, + "loss": 0.6828, + "step": 6844 + }, + { + "epoch": 0.11827826928393696, + "grad_norm": 0.6608384445679371, + "learning_rate": 1.9594169697401865e-05, + "loss": 0.7983, + "step": 6845 + }, + { + "epoch": 0.11829554879734587, + "grad_norm": 0.7975333358597204, + "learning_rate": 1.9594011866092743e-05, + "loss": 0.6569, + "step": 6846 + }, + { + "epoch": 0.11831282831075476, + "grad_norm": 1.1566289840632724, + "learning_rate": 1.959385400473444e-05, + "loss": 0.9948, + "step": 6847 + }, + { + "epoch": 0.11833010782416367, + "grad_norm": 1.0059302567446613, + "learning_rate": 1.959369611332746e-05, + "loss": 0.6194, + "step": 6848 + }, + { + "epoch": 0.11834738733757258, + "grad_norm": 0.8193711865191519, + "learning_rate": 1.9593538191872294e-05, + "loss": 0.5892, + "step": 6849 + }, + { + "epoch": 0.11836466685098147, + "grad_norm": 0.9748955472288738, + "learning_rate": 1.9593380240369434e-05, + "loss": 0.7665, + "step": 6850 + }, + { + "epoch": 0.11838194636439038, + "grad_norm": 0.9133114725254957, + "learning_rate": 1.9593222258819385e-05, + "loss": 0.8401, + "step": 6851 + }, + { + "epoch": 0.11839922587779927, + "grad_norm": 1.3764276736899625, + "learning_rate": 1.959306424722263e-05, + "loss": 0.7776, + "step": 6852 + }, + { + "epoch": 0.11841650539120818, + "grad_norm": 1.0303886567364215, + "learning_rate": 1.959290620557967e-05, + "loss": 0.8424, + "step": 6853 + }, + { + "epoch": 0.11843378490461709, + "grad_norm": 1.3375606799736548, + "learning_rate": 1.9592748133890997e-05, + "loss": 0.7247, + "step": 6854 + }, + { + "epoch": 0.11845106441802598, + "grad_norm": 0.7482876980090901, + "learning_rate": 1.959259003215711e-05, + "loss": 0.665, + "step": 6855 + }, + { + "epoch": 0.11846834393143489, + "grad_norm": 0.9040187570981069, + "learning_rate": 1.95924319003785e-05, + "loss": 0.8299, + "step": 6856 + }, + { + "epoch": 0.1184856234448438, + "grad_norm": 0.8286845274190211, + "learning_rate": 1.959227373855567e-05, + "loss": 0.6181, + "step": 6857 + }, + { + "epoch": 0.11850290295825269, + "grad_norm": 0.8658963916505682, + "learning_rate": 1.9592115546689104e-05, + "loss": 0.5738, + "step": 6858 + }, + { + "epoch": 0.1185201824716616, + "grad_norm": 0.7126652189372794, + "learning_rate": 1.9591957324779304e-05, + "loss": 0.6737, + "step": 6859 + }, + { + "epoch": 0.1185374619850705, + "grad_norm": 1.0490720609592428, + "learning_rate": 1.9591799072826766e-05, + "loss": 0.7192, + "step": 6860 + }, + { + "epoch": 0.1185547414984794, + "grad_norm": 0.6872541867416714, + "learning_rate": 1.9591640790831983e-05, + "loss": 0.5067, + "step": 6861 + }, + { + "epoch": 0.11857202101188831, + "grad_norm": 1.039338477156816, + "learning_rate": 1.959148247879545e-05, + "loss": 0.8265, + "step": 6862 + }, + { + "epoch": 0.1185893005252972, + "grad_norm": 1.078552392854555, + "learning_rate": 1.959132413671767e-05, + "loss": 0.8949, + "step": 6863 + }, + { + "epoch": 0.11860658003870611, + "grad_norm": 1.0563513746540156, + "learning_rate": 1.9591165764599132e-05, + "loss": 0.8656, + "step": 6864 + }, + { + "epoch": 0.11862385955211502, + "grad_norm": 1.2510684533810064, + "learning_rate": 1.959100736244033e-05, + "loss": 0.8906, + "step": 6865 + }, + { + "epoch": 0.11864113906552391, + "grad_norm": 1.0987742793972648, + "learning_rate": 1.9590848930241768e-05, + "loss": 0.6225, + "step": 6866 + }, + { + "epoch": 0.11865841857893282, + "grad_norm": 1.1759422006717513, + "learning_rate": 1.959069046800394e-05, + "loss": 0.9567, + "step": 6867 + }, + { + "epoch": 0.11867569809234171, + "grad_norm": 1.0611520792894873, + "learning_rate": 1.9590531975727338e-05, + "loss": 0.6979, + "step": 6868 + }, + { + "epoch": 0.11869297760575062, + "grad_norm": 0.796559619439729, + "learning_rate": 1.9590373453412458e-05, + "loss": 0.7632, + "step": 6869 + }, + { + "epoch": 0.11871025711915953, + "grad_norm": 0.9428557117381623, + "learning_rate": 1.9590214901059805e-05, + "loss": 0.5818, + "step": 6870 + }, + { + "epoch": 0.11872753663256842, + "grad_norm": 0.8156572025032242, + "learning_rate": 1.959005631866987e-05, + "loss": 0.5886, + "step": 6871 + }, + { + "epoch": 0.11874481614597733, + "grad_norm": 0.9431519699932833, + "learning_rate": 1.9589897706243148e-05, + "loss": 0.6411, + "step": 6872 + }, + { + "epoch": 0.11876209565938624, + "grad_norm": 0.7533538288300085, + "learning_rate": 1.9589739063780134e-05, + "loss": 0.7524, + "step": 6873 + }, + { + "epoch": 0.11877937517279513, + "grad_norm": 0.9401753817953212, + "learning_rate": 1.9589580391281332e-05, + "loss": 0.6989, + "step": 6874 + }, + { + "epoch": 0.11879665468620404, + "grad_norm": 0.803034960536361, + "learning_rate": 1.9589421688747236e-05, + "loss": 0.5621, + "step": 6875 + }, + { + "epoch": 0.11881393419961293, + "grad_norm": 0.9941609691865396, + "learning_rate": 1.958926295617834e-05, + "loss": 0.6633, + "step": 6876 + }, + { + "epoch": 0.11883121371302184, + "grad_norm": 1.0707574327422968, + "learning_rate": 1.9589104193575146e-05, + "loss": 0.5617, + "step": 6877 + }, + { + "epoch": 0.11884849322643075, + "grad_norm": 0.9176989997896291, + "learning_rate": 1.9588945400938148e-05, + "loss": 0.5457, + "step": 6878 + }, + { + "epoch": 0.11886577273983964, + "grad_norm": 0.8505842105316179, + "learning_rate": 1.9588786578267848e-05, + "loss": 0.6261, + "step": 6879 + }, + { + "epoch": 0.11888305225324855, + "grad_norm": 0.7054182097362913, + "learning_rate": 1.9588627725564734e-05, + "loss": 0.5133, + "step": 6880 + }, + { + "epoch": 0.11890033176665746, + "grad_norm": 0.39246641376273916, + "learning_rate": 1.9588468842829312e-05, + "loss": 0.5578, + "step": 6881 + }, + { + "epoch": 0.11891761128006635, + "grad_norm": 1.413582305235424, + "learning_rate": 1.9588309930062076e-05, + "loss": 0.7236, + "step": 6882 + }, + { + "epoch": 0.11893489079347526, + "grad_norm": 1.3229880747335647, + "learning_rate": 1.9588150987263524e-05, + "loss": 0.8819, + "step": 6883 + }, + { + "epoch": 0.11895217030688415, + "grad_norm": 1.1253582576443064, + "learning_rate": 1.9587992014434154e-05, + "loss": 0.8422, + "step": 6884 + }, + { + "epoch": 0.11896944982029306, + "grad_norm": 0.8169926235956889, + "learning_rate": 1.9587833011574467e-05, + "loss": 0.7129, + "step": 6885 + }, + { + "epoch": 0.11898672933370197, + "grad_norm": 1.5981339424800167, + "learning_rate": 1.958767397868496e-05, + "loss": 0.6079, + "step": 6886 + }, + { + "epoch": 0.11900400884711086, + "grad_norm": 0.9387240988397144, + "learning_rate": 1.9587514915766124e-05, + "loss": 0.7506, + "step": 6887 + }, + { + "epoch": 0.11902128836051977, + "grad_norm": 0.7836099296324952, + "learning_rate": 1.9587355822818466e-05, + "loss": 0.5423, + "step": 6888 + }, + { + "epoch": 0.11903856787392866, + "grad_norm": 0.6974012576018841, + "learning_rate": 1.958719669984248e-05, + "loss": 0.537, + "step": 6889 + }, + { + "epoch": 0.11905584738733757, + "grad_norm": 1.0545086387374296, + "learning_rate": 1.9587037546838665e-05, + "loss": 0.9062, + "step": 6890 + }, + { + "epoch": 0.11907312690074648, + "grad_norm": 1.3898790772277825, + "learning_rate": 1.958687836380752e-05, + "loss": 0.7813, + "step": 6891 + }, + { + "epoch": 0.11909040641415537, + "grad_norm": 0.4943284465620509, + "learning_rate": 1.9586719150749546e-05, + "loss": 0.779, + "step": 6892 + }, + { + "epoch": 0.11910768592756428, + "grad_norm": 1.316694881454371, + "learning_rate": 1.9586559907665234e-05, + "loss": 0.7533, + "step": 6893 + }, + { + "epoch": 0.11912496544097319, + "grad_norm": 1.7093463579219486, + "learning_rate": 1.958640063455509e-05, + "loss": 0.7885, + "step": 6894 + }, + { + "epoch": 0.11914224495438208, + "grad_norm": 0.653840076518376, + "learning_rate": 1.9586241331419612e-05, + "loss": 0.6417, + "step": 6895 + }, + { + "epoch": 0.11915952446779099, + "grad_norm": 1.1718298137328726, + "learning_rate": 1.9586081998259296e-05, + "loss": 0.7192, + "step": 6896 + }, + { + "epoch": 0.11917680398119988, + "grad_norm": 0.820182942060849, + "learning_rate": 1.9585922635074643e-05, + "loss": 0.6661, + "step": 6897 + }, + { + "epoch": 0.11919408349460879, + "grad_norm": 0.40379240184482246, + "learning_rate": 1.9585763241866155e-05, + "loss": 0.5419, + "step": 6898 + }, + { + "epoch": 0.1192113630080177, + "grad_norm": 0.8861947272331475, + "learning_rate": 1.9585603818634324e-05, + "loss": 0.8091, + "step": 6899 + }, + { + "epoch": 0.11922864252142659, + "grad_norm": 0.971903030759256, + "learning_rate": 1.9585444365379657e-05, + "loss": 0.746, + "step": 6900 + }, + { + "epoch": 0.1192459220348355, + "grad_norm": 1.1885690973058947, + "learning_rate": 1.9585284882102645e-05, + "loss": 0.673, + "step": 6901 + }, + { + "epoch": 0.11926320154824441, + "grad_norm": 0.8195303849936049, + "learning_rate": 1.9585125368803798e-05, + "loss": 0.6924, + "step": 6902 + }, + { + "epoch": 0.1192804810616533, + "grad_norm": 1.3049248652199215, + "learning_rate": 1.9584965825483606e-05, + "loss": 0.6628, + "step": 6903 + }, + { + "epoch": 0.11929776057506221, + "grad_norm": 0.4909753107633891, + "learning_rate": 1.958480625214257e-05, + "loss": 0.7575, + "step": 6904 + }, + { + "epoch": 0.1193150400884711, + "grad_norm": 1.0994736624575723, + "learning_rate": 1.9584646648781198e-05, + "loss": 0.8782, + "step": 6905 + }, + { + "epoch": 0.11933231960188001, + "grad_norm": 1.376390637718582, + "learning_rate": 1.9584487015399984e-05, + "loss": 1.0301, + "step": 6906 + }, + { + "epoch": 0.11934959911528892, + "grad_norm": 0.9724583605901251, + "learning_rate": 1.9584327351999424e-05, + "loss": 0.5982, + "step": 6907 + }, + { + "epoch": 0.11936687862869781, + "grad_norm": 0.7488230530028887, + "learning_rate": 1.9584167658580023e-05, + "loss": 0.5108, + "step": 6908 + }, + { + "epoch": 0.11938415814210672, + "grad_norm": 0.7465396962524233, + "learning_rate": 1.9584007935142283e-05, + "loss": 0.8284, + "step": 6909 + }, + { + "epoch": 0.11940143765551563, + "grad_norm": 1.0476338291643472, + "learning_rate": 1.9583848181686702e-05, + "loss": 0.7666, + "step": 6910 + }, + { + "epoch": 0.11941871716892452, + "grad_norm": 0.8555606107814574, + "learning_rate": 1.958368839821378e-05, + "loss": 0.644, + "step": 6911 + }, + { + "epoch": 0.11943599668233343, + "grad_norm": 1.1978894810061185, + "learning_rate": 1.9583528584724014e-05, + "loss": 0.7912, + "step": 6912 + }, + { + "epoch": 0.11945327619574232, + "grad_norm": 0.8610706823849464, + "learning_rate": 1.958336874121791e-05, + "loss": 0.8031, + "step": 6913 + }, + { + "epoch": 0.11947055570915123, + "grad_norm": 0.8398081191018903, + "learning_rate": 1.958320886769597e-05, + "loss": 0.5917, + "step": 6914 + }, + { + "epoch": 0.11948783522256014, + "grad_norm": 0.8364563456569704, + "learning_rate": 1.9583048964158688e-05, + "loss": 0.6861, + "step": 6915 + }, + { + "epoch": 0.11950511473596903, + "grad_norm": 0.7547921159276298, + "learning_rate": 1.958288903060657e-05, + "loss": 0.7385, + "step": 6916 + }, + { + "epoch": 0.11952239424937794, + "grad_norm": 0.9161545832089882, + "learning_rate": 1.9582729067040117e-05, + "loss": 0.6865, + "step": 6917 + }, + { + "epoch": 0.11953967376278685, + "grad_norm": 0.9369824881043965, + "learning_rate": 1.9582569073459827e-05, + "loss": 0.6639, + "step": 6918 + }, + { + "epoch": 0.11955695327619574, + "grad_norm": 0.8444568518075984, + "learning_rate": 1.95824090498662e-05, + "loss": 0.6286, + "step": 6919 + }, + { + "epoch": 0.11957423278960465, + "grad_norm": 0.6587670037055909, + "learning_rate": 1.958224899625974e-05, + "loss": 0.3559, + "step": 6920 + }, + { + "epoch": 0.11959151230301354, + "grad_norm": 1.0104228838641873, + "learning_rate": 1.9582088912640952e-05, + "loss": 0.6757, + "step": 6921 + }, + { + "epoch": 0.11960879181642245, + "grad_norm": 0.5890217600628045, + "learning_rate": 1.958192879901033e-05, + "loss": 0.7189, + "step": 6922 + }, + { + "epoch": 0.11962607132983136, + "grad_norm": 1.162363822898478, + "learning_rate": 1.9581768655368377e-05, + "loss": 0.7764, + "step": 6923 + }, + { + "epoch": 0.11964335084324025, + "grad_norm": 0.5610270520845977, + "learning_rate": 1.95816084817156e-05, + "loss": 0.7936, + "step": 6924 + }, + { + "epoch": 0.11966063035664916, + "grad_norm": 0.9649390917898822, + "learning_rate": 1.9581448278052493e-05, + "loss": 0.7603, + "step": 6925 + }, + { + "epoch": 0.11967790987005805, + "grad_norm": 0.9875428319895826, + "learning_rate": 1.958128804437957e-05, + "loss": 0.8402, + "step": 6926 + }, + { + "epoch": 0.11969518938346696, + "grad_norm": 0.9352353327052666, + "learning_rate": 1.9581127780697316e-05, + "loss": 0.844, + "step": 6927 + }, + { + "epoch": 0.11971246889687587, + "grad_norm": 1.0920558817934136, + "learning_rate": 1.9580967487006242e-05, + "loss": 0.7156, + "step": 6928 + }, + { + "epoch": 0.11972974841028476, + "grad_norm": 0.8264853149551035, + "learning_rate": 1.9580807163306854e-05, + "loss": 0.5173, + "step": 6929 + }, + { + "epoch": 0.11974702792369367, + "grad_norm": 1.0714729005473582, + "learning_rate": 1.9580646809599647e-05, + "loss": 0.5717, + "step": 6930 + }, + { + "epoch": 0.11976430743710258, + "grad_norm": 0.9432197242184235, + "learning_rate": 1.9580486425885132e-05, + "loss": 0.5914, + "step": 6931 + }, + { + "epoch": 0.11978158695051147, + "grad_norm": 1.0126362466638514, + "learning_rate": 1.9580326012163797e-05, + "loss": 0.8988, + "step": 6932 + }, + { + "epoch": 0.11979886646392038, + "grad_norm": 0.8255042536580256, + "learning_rate": 1.9580165568436157e-05, + "loss": 0.6478, + "step": 6933 + }, + { + "epoch": 0.11981614597732927, + "grad_norm": 0.8160745033224015, + "learning_rate": 1.958000509470271e-05, + "loss": 0.5699, + "step": 6934 + }, + { + "epoch": 0.11983342549073818, + "grad_norm": 1.0110126967266082, + "learning_rate": 1.957984459096396e-05, + "loss": 0.7015, + "step": 6935 + }, + { + "epoch": 0.11985070500414709, + "grad_norm": 0.4681830291625723, + "learning_rate": 1.9579684057220407e-05, + "loss": 0.4895, + "step": 6936 + }, + { + "epoch": 0.11986798451755598, + "grad_norm": 1.0010450510417308, + "learning_rate": 1.957952349347256e-05, + "loss": 0.5495, + "step": 6937 + }, + { + "epoch": 0.11988526403096489, + "grad_norm": 0.4189306702793934, + "learning_rate": 1.957936289972091e-05, + "loss": 0.5896, + "step": 6938 + }, + { + "epoch": 0.1199025435443738, + "grad_norm": 1.1043863274615704, + "learning_rate": 1.9579202275965973e-05, + "loss": 0.6404, + "step": 6939 + }, + { + "epoch": 0.11991982305778269, + "grad_norm": 0.9506682651885241, + "learning_rate": 1.9579041622208244e-05, + "loss": 0.6272, + "step": 6940 + }, + { + "epoch": 0.1199371025711916, + "grad_norm": 1.018383395704644, + "learning_rate": 1.9578880938448228e-05, + "loss": 0.6308, + "step": 6941 + }, + { + "epoch": 0.11995438208460049, + "grad_norm": 1.1954957416835368, + "learning_rate": 1.957872022468643e-05, + "loss": 0.7168, + "step": 6942 + }, + { + "epoch": 0.1199716615980094, + "grad_norm": 1.038896035893174, + "learning_rate": 1.9578559480923353e-05, + "loss": 0.5486, + "step": 6943 + }, + { + "epoch": 0.1199889411114183, + "grad_norm": 0.5098052408437659, + "learning_rate": 1.95783987071595e-05, + "loss": 0.6398, + "step": 6944 + }, + { + "epoch": 0.1200062206248272, + "grad_norm": 1.1139526096891492, + "learning_rate": 1.9578237903395373e-05, + "loss": 0.6771, + "step": 6945 + }, + { + "epoch": 0.12002350013823611, + "grad_norm": 0.8217336440456561, + "learning_rate": 1.9578077069631477e-05, + "loss": 0.5341, + "step": 6946 + }, + { + "epoch": 0.12004077965164502, + "grad_norm": 1.5925871124326438, + "learning_rate": 1.9577916205868317e-05, + "loss": 0.9027, + "step": 6947 + }, + { + "epoch": 0.12005805916505391, + "grad_norm": 1.0649789133158003, + "learning_rate": 1.9577755312106394e-05, + "loss": 0.8571, + "step": 6948 + }, + { + "epoch": 0.12007533867846282, + "grad_norm": 1.2246763783856203, + "learning_rate": 1.9577594388346216e-05, + "loss": 0.7216, + "step": 6949 + }, + { + "epoch": 0.12009261819187171, + "grad_norm": 1.047768544535802, + "learning_rate": 1.9577433434588282e-05, + "loss": 0.6898, + "step": 6950 + }, + { + "epoch": 0.12010989770528062, + "grad_norm": 1.096695538598268, + "learning_rate": 1.95772724508331e-05, + "loss": 0.6788, + "step": 6951 + }, + { + "epoch": 0.12012717721868953, + "grad_norm": 0.8377728812136623, + "learning_rate": 1.957711143708117e-05, + "loss": 0.7179, + "step": 6952 + }, + { + "epoch": 0.12014445673209842, + "grad_norm": 1.0629613241011062, + "learning_rate": 1.9576950393333002e-05, + "loss": 0.6658, + "step": 6953 + }, + { + "epoch": 0.12016173624550733, + "grad_norm": 1.1229138588456713, + "learning_rate": 1.9576789319589097e-05, + "loss": 0.7252, + "step": 6954 + }, + { + "epoch": 0.12017901575891624, + "grad_norm": 0.8679230395841709, + "learning_rate": 1.9576628215849963e-05, + "loss": 0.5023, + "step": 6955 + }, + { + "epoch": 0.12019629527232513, + "grad_norm": 1.0616021359219985, + "learning_rate": 1.9576467082116095e-05, + "loss": 0.7153, + "step": 6956 + }, + { + "epoch": 0.12021357478573404, + "grad_norm": 1.0457799442193405, + "learning_rate": 1.9576305918388007e-05, + "loss": 0.7567, + "step": 6957 + }, + { + "epoch": 0.12023085429914293, + "grad_norm": 1.2255418267842084, + "learning_rate": 1.9576144724666205e-05, + "loss": 0.7277, + "step": 6958 + }, + { + "epoch": 0.12024813381255184, + "grad_norm": 1.1000649670751665, + "learning_rate": 1.9575983500951186e-05, + "loss": 0.6848, + "step": 6959 + }, + { + "epoch": 0.12026541332596075, + "grad_norm": 0.8635237150654488, + "learning_rate": 1.957582224724346e-05, + "loss": 0.6542, + "step": 6960 + }, + { + "epoch": 0.12028269283936964, + "grad_norm": 1.0596557870646488, + "learning_rate": 1.957566096354353e-05, + "loss": 1.0127, + "step": 6961 + }, + { + "epoch": 0.12029997235277855, + "grad_norm": 0.7812409758732562, + "learning_rate": 1.9575499649851903e-05, + "loss": 0.4215, + "step": 6962 + }, + { + "epoch": 0.12031725186618744, + "grad_norm": 1.285015861699527, + "learning_rate": 1.9575338306169084e-05, + "loss": 0.9101, + "step": 6963 + }, + { + "epoch": 0.12033453137959635, + "grad_norm": 0.7693643295718923, + "learning_rate": 1.9575176932495576e-05, + "loss": 0.6937, + "step": 6964 + }, + { + "epoch": 0.12035181089300526, + "grad_norm": 1.2142731842000547, + "learning_rate": 1.9575015528831887e-05, + "loss": 0.5969, + "step": 6965 + }, + { + "epoch": 0.12036909040641415, + "grad_norm": 0.8964368238212107, + "learning_rate": 1.9574854095178523e-05, + "loss": 0.5425, + "step": 6966 + }, + { + "epoch": 0.12038636991982306, + "grad_norm": 0.7181163736378985, + "learning_rate": 1.9574692631535987e-05, + "loss": 0.5561, + "step": 6967 + }, + { + "epoch": 0.12040364943323197, + "grad_norm": 0.9151566629683393, + "learning_rate": 1.9574531137904785e-05, + "loss": 0.6016, + "step": 6968 + }, + { + "epoch": 0.12042092894664086, + "grad_norm": 0.8383060661818186, + "learning_rate": 1.9574369614285426e-05, + "loss": 0.672, + "step": 6969 + }, + { + "epoch": 0.12043820846004977, + "grad_norm": 0.6339781809732528, + "learning_rate": 1.9574208060678413e-05, + "loss": 0.4118, + "step": 6970 + }, + { + "epoch": 0.12045548797345866, + "grad_norm": 0.4390309833104405, + "learning_rate": 1.9574046477084252e-05, + "loss": 0.7003, + "step": 6971 + }, + { + "epoch": 0.12047276748686757, + "grad_norm": 0.8693462662020564, + "learning_rate": 1.957388486350345e-05, + "loss": 0.8161, + "step": 6972 + }, + { + "epoch": 0.12049004700027648, + "grad_norm": 1.2764213979702952, + "learning_rate": 1.9573723219936514e-05, + "loss": 0.8188, + "step": 6973 + }, + { + "epoch": 0.12050732651368537, + "grad_norm": 1.3045740646980197, + "learning_rate": 1.957356154638395e-05, + "loss": 0.6029, + "step": 6974 + }, + { + "epoch": 0.12052460602709428, + "grad_norm": 0.9762986118439911, + "learning_rate": 1.9573399842846262e-05, + "loss": 0.9562, + "step": 6975 + }, + { + "epoch": 0.12054188554050319, + "grad_norm": 0.4523917861679251, + "learning_rate": 1.957323810932396e-05, + "loss": 0.6284, + "step": 6976 + }, + { + "epoch": 0.12055916505391208, + "grad_norm": 0.614904607285931, + "learning_rate": 1.9573076345817547e-05, + "loss": 0.5798, + "step": 6977 + }, + { + "epoch": 0.12057644456732099, + "grad_norm": 1.2207223714344135, + "learning_rate": 1.957291455232753e-05, + "loss": 0.5079, + "step": 6978 + }, + { + "epoch": 0.12059372408072988, + "grad_norm": 1.1017678950130618, + "learning_rate": 1.957275272885442e-05, + "loss": 0.7879, + "step": 6979 + }, + { + "epoch": 0.12061100359413879, + "grad_norm": 0.7994225456564893, + "learning_rate": 1.957259087539872e-05, + "loss": 0.6145, + "step": 6980 + }, + { + "epoch": 0.1206282831075477, + "grad_norm": 0.4568592478996447, + "learning_rate": 1.9572428991960938e-05, + "loss": 0.4947, + "step": 6981 + }, + { + "epoch": 0.12064556262095659, + "grad_norm": 1.0676678967212476, + "learning_rate": 1.957226707854158e-05, + "loss": 0.7603, + "step": 6982 + }, + { + "epoch": 0.1206628421343655, + "grad_norm": 0.9363715892457892, + "learning_rate": 1.9572105135141155e-05, + "loss": 0.6722, + "step": 6983 + }, + { + "epoch": 0.1206801216477744, + "grad_norm": 1.0189398988033769, + "learning_rate": 1.9571943161760168e-05, + "loss": 0.8035, + "step": 6984 + }, + { + "epoch": 0.1206974011611833, + "grad_norm": 0.5749737268218617, + "learning_rate": 1.9571781158399128e-05, + "loss": 0.449, + "step": 6985 + }, + { + "epoch": 0.1207146806745922, + "grad_norm": 0.946308910596186, + "learning_rate": 1.9571619125058542e-05, + "loss": 0.7885, + "step": 6986 + }, + { + "epoch": 0.1207319601880011, + "grad_norm": 0.9236683697555834, + "learning_rate": 1.957145706173892e-05, + "loss": 0.597, + "step": 6987 + }, + { + "epoch": 0.12074923970141001, + "grad_norm": 1.2050360319055229, + "learning_rate": 1.9571294968440765e-05, + "loss": 0.7326, + "step": 6988 + }, + { + "epoch": 0.12076651921481892, + "grad_norm": 1.0154002716136885, + "learning_rate": 1.9571132845164586e-05, + "loss": 0.718, + "step": 6989 + }, + { + "epoch": 0.12078379872822781, + "grad_norm": 0.9136854223015028, + "learning_rate": 1.957097069191089e-05, + "loss": 0.8418, + "step": 6990 + }, + { + "epoch": 0.12080107824163672, + "grad_norm": 0.8071189912989567, + "learning_rate": 1.957080850868019e-05, + "loss": 0.56, + "step": 6991 + }, + { + "epoch": 0.12081835775504562, + "grad_norm": 0.7173709689403929, + "learning_rate": 1.9570646295472988e-05, + "loss": 0.52, + "step": 6992 + }, + { + "epoch": 0.12083563726845452, + "grad_norm": 1.0061639694611986, + "learning_rate": 1.9570484052289798e-05, + "loss": 0.6119, + "step": 6993 + }, + { + "epoch": 0.12085291678186343, + "grad_norm": 0.7766392665169848, + "learning_rate": 1.957032177913112e-05, + "loss": 0.7519, + "step": 6994 + }, + { + "epoch": 0.12087019629527232, + "grad_norm": 0.8498517122967824, + "learning_rate": 1.957015947599747e-05, + "loss": 0.7419, + "step": 6995 + }, + { + "epoch": 0.12088747580868123, + "grad_norm": 0.8307834598690494, + "learning_rate": 1.956999714288935e-05, + "loss": 0.6917, + "step": 6996 + }, + { + "epoch": 0.12090475532209013, + "grad_norm": 0.9671169476665109, + "learning_rate": 1.9569834779807276e-05, + "loss": 0.7312, + "step": 6997 + }, + { + "epoch": 0.12092203483549903, + "grad_norm": 0.8772850820889927, + "learning_rate": 1.956967238675175e-05, + "loss": 0.532, + "step": 6998 + }, + { + "epoch": 0.12093931434890794, + "grad_norm": 0.7605446728264698, + "learning_rate": 1.9569509963723283e-05, + "loss": 0.7137, + "step": 6999 + }, + { + "epoch": 0.12095659386231684, + "grad_norm": 1.3167194713478123, + "learning_rate": 1.9569347510722382e-05, + "loss": 0.7662, + "step": 7000 + }, + { + "epoch": 0.12097387337572574, + "grad_norm": 1.1225353075238629, + "learning_rate": 1.956918502774956e-05, + "loss": 0.8207, + "step": 7001 + }, + { + "epoch": 0.12099115288913465, + "grad_norm": 1.0028257473016826, + "learning_rate": 1.956902251480532e-05, + "loss": 0.6269, + "step": 7002 + }, + { + "epoch": 0.12100843240254354, + "grad_norm": 1.0336841179055114, + "learning_rate": 1.956885997189018e-05, + "loss": 0.7643, + "step": 7003 + }, + { + "epoch": 0.12102571191595245, + "grad_norm": 0.8080684503553278, + "learning_rate": 1.956869739900464e-05, + "loss": 0.6026, + "step": 7004 + }, + { + "epoch": 0.12104299142936135, + "grad_norm": 1.2960640285265113, + "learning_rate": 1.956853479614921e-05, + "loss": 0.877, + "step": 7005 + }, + { + "epoch": 0.12106027094277025, + "grad_norm": 1.0052539880072264, + "learning_rate": 1.9568372163324402e-05, + "loss": 0.6255, + "step": 7006 + }, + { + "epoch": 0.12107755045617916, + "grad_norm": 1.1088035825383027, + "learning_rate": 1.9568209500530727e-05, + "loss": 0.6694, + "step": 7007 + }, + { + "epoch": 0.12109482996958805, + "grad_norm": 1.4206359499580863, + "learning_rate": 1.9568046807768697e-05, + "loss": 0.8884, + "step": 7008 + }, + { + "epoch": 0.12111210948299696, + "grad_norm": 0.8167037533274757, + "learning_rate": 1.956788408503881e-05, + "loss": 0.5433, + "step": 7009 + }, + { + "epoch": 0.12112938899640587, + "grad_norm": 0.8716305930220684, + "learning_rate": 1.9567721332341586e-05, + "loss": 0.9685, + "step": 7010 + }, + { + "epoch": 0.12114666850981476, + "grad_norm": 0.8060526679358622, + "learning_rate": 1.9567558549677536e-05, + "loss": 0.6558, + "step": 7011 + }, + { + "epoch": 0.12116394802322367, + "grad_norm": 1.034692285797358, + "learning_rate": 1.956739573704716e-05, + "loss": 0.6858, + "step": 7012 + }, + { + "epoch": 0.12118122753663257, + "grad_norm": 0.876912687567518, + "learning_rate": 1.9567232894450975e-05, + "loss": 0.4805, + "step": 7013 + }, + { + "epoch": 0.12119850705004147, + "grad_norm": 1.1647456493916768, + "learning_rate": 1.956707002188949e-05, + "loss": 0.8563, + "step": 7014 + }, + { + "epoch": 0.12121578656345038, + "grad_norm": 0.9494348423451272, + "learning_rate": 1.9566907119363212e-05, + "loss": 0.7544, + "step": 7015 + }, + { + "epoch": 0.12123306607685927, + "grad_norm": 1.2325607049699507, + "learning_rate": 1.9566744186872653e-05, + "loss": 0.7087, + "step": 7016 + }, + { + "epoch": 0.12125034559026818, + "grad_norm": 1.1655051997858086, + "learning_rate": 1.9566581224418328e-05, + "loss": 0.6033, + "step": 7017 + }, + { + "epoch": 0.12126762510367708, + "grad_norm": 1.021115971236616, + "learning_rate": 1.9566418232000744e-05, + "loss": 0.5764, + "step": 7018 + }, + { + "epoch": 0.12128490461708598, + "grad_norm": 1.1675500142790776, + "learning_rate": 1.956625520962041e-05, + "loss": 0.6863, + "step": 7019 + }, + { + "epoch": 0.12130218413049489, + "grad_norm": 1.0839900604876018, + "learning_rate": 1.9566092157277837e-05, + "loss": 0.6257, + "step": 7020 + }, + { + "epoch": 0.1213194636439038, + "grad_norm": 0.5540460789330568, + "learning_rate": 1.9565929074973537e-05, + "loss": 0.8684, + "step": 7021 + }, + { + "epoch": 0.12133674315731269, + "grad_norm": 1.2790508233394928, + "learning_rate": 1.9565765962708018e-05, + "loss": 0.7893, + "step": 7022 + }, + { + "epoch": 0.1213540226707216, + "grad_norm": 0.9286905076294241, + "learning_rate": 1.9565602820481795e-05, + "loss": 0.7014, + "step": 7023 + }, + { + "epoch": 0.12137130218413049, + "grad_norm": 1.3641440039652752, + "learning_rate": 1.9565439648295376e-05, + "loss": 0.5665, + "step": 7024 + }, + { + "epoch": 0.1213885816975394, + "grad_norm": 1.2487083981522644, + "learning_rate": 1.9565276446149273e-05, + "loss": 0.6087, + "step": 7025 + }, + { + "epoch": 0.1214058612109483, + "grad_norm": 1.1643283199266703, + "learning_rate": 1.9565113214044e-05, + "loss": 0.8301, + "step": 7026 + }, + { + "epoch": 0.1214231407243572, + "grad_norm": 0.9820184281479316, + "learning_rate": 1.9564949951980062e-05, + "loss": 0.6329, + "step": 7027 + }, + { + "epoch": 0.1214404202377661, + "grad_norm": 0.9250614727511041, + "learning_rate": 1.9564786659957977e-05, + "loss": 0.7602, + "step": 7028 + }, + { + "epoch": 0.12145769975117501, + "grad_norm": 0.9977723303498036, + "learning_rate": 1.9564623337978254e-05, + "loss": 0.5958, + "step": 7029 + }, + { + "epoch": 0.12147497926458391, + "grad_norm": 0.9255267034644722, + "learning_rate": 1.95644599860414e-05, + "loss": 0.6519, + "step": 7030 + }, + { + "epoch": 0.12149225877799281, + "grad_norm": 0.9505807579253086, + "learning_rate": 1.956429660414793e-05, + "loss": 0.7347, + "step": 7031 + }, + { + "epoch": 0.12150953829140171, + "grad_norm": 1.0219147633240666, + "learning_rate": 1.956413319229836e-05, + "loss": 0.7897, + "step": 7032 + }, + { + "epoch": 0.12152681780481062, + "grad_norm": 0.5387801349675433, + "learning_rate": 1.9563969750493193e-05, + "loss": 0.8167, + "step": 7033 + }, + { + "epoch": 0.12154409731821952, + "grad_norm": 1.0916888898205699, + "learning_rate": 1.956380627873295e-05, + "loss": 0.4382, + "step": 7034 + }, + { + "epoch": 0.12156137683162842, + "grad_norm": 1.087773822976527, + "learning_rate": 1.956364277701814e-05, + "loss": 0.6336, + "step": 7035 + }, + { + "epoch": 0.12157865634503733, + "grad_norm": 1.0796748689324007, + "learning_rate": 1.956347924534927e-05, + "loss": 0.6252, + "step": 7036 + }, + { + "epoch": 0.12159593585844623, + "grad_norm": 0.9630731342369272, + "learning_rate": 1.956331568372686e-05, + "loss": 0.6301, + "step": 7037 + }, + { + "epoch": 0.12161321537185513, + "grad_norm": 1.0100793164312731, + "learning_rate": 1.9563152092151416e-05, + "loss": 0.5437, + "step": 7038 + }, + { + "epoch": 0.12163049488526403, + "grad_norm": 0.8230786307089837, + "learning_rate": 1.956298847062345e-05, + "loss": 0.5629, + "step": 7039 + }, + { + "epoch": 0.12164777439867293, + "grad_norm": 1.035313333055272, + "learning_rate": 1.9562824819143484e-05, + "loss": 0.4431, + "step": 7040 + }, + { + "epoch": 0.12166505391208184, + "grad_norm": 1.3058225864604531, + "learning_rate": 1.956266113771202e-05, + "loss": 0.5942, + "step": 7041 + }, + { + "epoch": 0.12168233342549074, + "grad_norm": 0.9524254264736286, + "learning_rate": 1.9562497426329578e-05, + "loss": 0.7303, + "step": 7042 + }, + { + "epoch": 0.12169961293889964, + "grad_norm": 0.775862412810731, + "learning_rate": 1.956233368499666e-05, + "loss": 0.675, + "step": 7043 + }, + { + "epoch": 0.12171689245230854, + "grad_norm": 1.4365277096833013, + "learning_rate": 1.9562169913713792e-05, + "loss": 0.8534, + "step": 7044 + }, + { + "epoch": 0.12173417196571744, + "grad_norm": 0.8328204917289587, + "learning_rate": 1.9562006112481483e-05, + "loss": 0.512, + "step": 7045 + }, + { + "epoch": 0.12175145147912635, + "grad_norm": 1.3800749359205657, + "learning_rate": 1.956184228130024e-05, + "loss": 0.9006, + "step": 7046 + }, + { + "epoch": 0.12176873099253525, + "grad_norm": 0.6986198786487491, + "learning_rate": 1.956167842017058e-05, + "loss": 0.6164, + "step": 7047 + }, + { + "epoch": 0.12178601050594415, + "grad_norm": 1.2680074645859376, + "learning_rate": 1.956151452909302e-05, + "loss": 0.7888, + "step": 7048 + }, + { + "epoch": 0.12180329001935306, + "grad_norm": 0.8105548192072329, + "learning_rate": 1.956135060806807e-05, + "loss": 0.5904, + "step": 7049 + }, + { + "epoch": 0.12182056953276196, + "grad_norm": 0.6883108392573981, + "learning_rate": 1.9561186657096238e-05, + "loss": 0.7299, + "step": 7050 + }, + { + "epoch": 0.12183784904617086, + "grad_norm": 0.8738909792982157, + "learning_rate": 1.9561022676178045e-05, + "loss": 0.56, + "step": 7051 + }, + { + "epoch": 0.12185512855957976, + "grad_norm": 1.1851725863404388, + "learning_rate": 1.9560858665314005e-05, + "loss": 0.5808, + "step": 7052 + }, + { + "epoch": 0.12187240807298866, + "grad_norm": 1.1809968003171085, + "learning_rate": 1.9560694624504626e-05, + "loss": 0.8811, + "step": 7053 + }, + { + "epoch": 0.12188968758639757, + "grad_norm": 0.9227019617721314, + "learning_rate": 1.9560530553750428e-05, + "loss": 0.4894, + "step": 7054 + }, + { + "epoch": 0.12190696709980647, + "grad_norm": 0.9599857512538734, + "learning_rate": 1.956036645305192e-05, + "loss": 0.6542, + "step": 7055 + }, + { + "epoch": 0.12192424661321537, + "grad_norm": 0.8449229503308024, + "learning_rate": 1.956020232240962e-05, + "loss": 0.5911, + "step": 7056 + }, + { + "epoch": 0.12194152612662428, + "grad_norm": 0.6627981467529506, + "learning_rate": 1.9560038161824035e-05, + "loss": 0.5432, + "step": 7057 + }, + { + "epoch": 0.12195880564003318, + "grad_norm": 0.47631108799667193, + "learning_rate": 1.9559873971295684e-05, + "loss": 0.596, + "step": 7058 + }, + { + "epoch": 0.12197608515344208, + "grad_norm": 0.42025323219993027, + "learning_rate": 1.9559709750825085e-05, + "loss": 0.6347, + "step": 7059 + }, + { + "epoch": 0.12199336466685098, + "grad_norm": 0.46108897685366845, + "learning_rate": 1.9559545500412748e-05, + "loss": 0.6759, + "step": 7060 + }, + { + "epoch": 0.12201064418025988, + "grad_norm": 1.0209707387258935, + "learning_rate": 1.9559381220059186e-05, + "loss": 0.6693, + "step": 7061 + }, + { + "epoch": 0.12202792369366879, + "grad_norm": 0.7822928577755144, + "learning_rate": 1.9559216909764917e-05, + "loss": 0.446, + "step": 7062 + }, + { + "epoch": 0.1220452032070777, + "grad_norm": 0.7243898367715269, + "learning_rate": 1.955905256953045e-05, + "loss": 0.4103, + "step": 7063 + }, + { + "epoch": 0.12206248272048659, + "grad_norm": 0.9178232769334588, + "learning_rate": 1.9558888199356313e-05, + "loss": 0.7587, + "step": 7064 + }, + { + "epoch": 0.1220797622338955, + "grad_norm": 0.8872769116706747, + "learning_rate": 1.9558723799243005e-05, + "loss": 0.5303, + "step": 7065 + }, + { + "epoch": 0.1220970417473044, + "grad_norm": 0.782484117162491, + "learning_rate": 1.9558559369191047e-05, + "loss": 0.6364, + "step": 7066 + }, + { + "epoch": 0.1221143212607133, + "grad_norm": 1.1147552322079477, + "learning_rate": 1.955839490920096e-05, + "loss": 0.7986, + "step": 7067 + }, + { + "epoch": 0.1221316007741222, + "grad_norm": 0.9542298045205637, + "learning_rate": 1.955823041927325e-05, + "loss": 0.578, + "step": 7068 + }, + { + "epoch": 0.1221488802875311, + "grad_norm": 0.9501647301230669, + "learning_rate": 1.9558065899408433e-05, + "loss": 0.802, + "step": 7069 + }, + { + "epoch": 0.12216615980094, + "grad_norm": 0.4780180302628736, + "learning_rate": 1.9557901349607032e-05, + "loss": 0.5759, + "step": 7070 + }, + { + "epoch": 0.12218343931434891, + "grad_norm": 0.9994636841865726, + "learning_rate": 1.9557736769869556e-05, + "loss": 0.6086, + "step": 7071 + }, + { + "epoch": 0.1222007188277578, + "grad_norm": 1.0117441878256206, + "learning_rate": 1.9557572160196523e-05, + "loss": 0.6879, + "step": 7072 + }, + { + "epoch": 0.12221799834116671, + "grad_norm": 1.1418294199160843, + "learning_rate": 1.9557407520588448e-05, + "loss": 0.7145, + "step": 7073 + }, + { + "epoch": 0.12223527785457562, + "grad_norm": 1.0981621410069593, + "learning_rate": 1.9557242851045845e-05, + "loss": 0.591, + "step": 7074 + }, + { + "epoch": 0.12225255736798452, + "grad_norm": 1.2188984638319238, + "learning_rate": 1.955707815156923e-05, + "loss": 0.8098, + "step": 7075 + }, + { + "epoch": 0.12226983688139342, + "grad_norm": 0.7577998641681299, + "learning_rate": 1.955691342215912e-05, + "loss": 0.5035, + "step": 7076 + }, + { + "epoch": 0.12228711639480232, + "grad_norm": 1.2782028651627713, + "learning_rate": 1.955674866281603e-05, + "loss": 0.855, + "step": 7077 + }, + { + "epoch": 0.12230439590821122, + "grad_norm": 1.3562992268130971, + "learning_rate": 1.9556583873540483e-05, + "loss": 0.7368, + "step": 7078 + }, + { + "epoch": 0.12232167542162013, + "grad_norm": 0.6335364758329666, + "learning_rate": 1.9556419054332983e-05, + "loss": 0.3941, + "step": 7079 + }, + { + "epoch": 0.12233895493502903, + "grad_norm": 0.47709526160883803, + "learning_rate": 1.9556254205194055e-05, + "loss": 0.4539, + "step": 7080 + }, + { + "epoch": 0.12235623444843793, + "grad_norm": 0.9066990564150393, + "learning_rate": 1.955608932612421e-05, + "loss": 0.4258, + "step": 7081 + }, + { + "epoch": 0.12237351396184683, + "grad_norm": 0.9296387520210093, + "learning_rate": 1.955592441712397e-05, + "loss": 0.5825, + "step": 7082 + }, + { + "epoch": 0.12239079347525574, + "grad_norm": 0.8537314153626102, + "learning_rate": 1.9555759478193848e-05, + "loss": 0.7924, + "step": 7083 + }, + { + "epoch": 0.12240807298866464, + "grad_norm": 0.8992478204673988, + "learning_rate": 1.9555594509334362e-05, + "loss": 0.5872, + "step": 7084 + }, + { + "epoch": 0.12242535250207354, + "grad_norm": 0.8196019009265703, + "learning_rate": 1.9555429510546024e-05, + "loss": 0.6911, + "step": 7085 + }, + { + "epoch": 0.12244263201548244, + "grad_norm": 1.1625170299642937, + "learning_rate": 1.9555264481829356e-05, + "loss": 0.9738, + "step": 7086 + }, + { + "epoch": 0.12245991152889135, + "grad_norm": 0.8019868399381254, + "learning_rate": 1.9555099423184876e-05, + "loss": 0.734, + "step": 7087 + }, + { + "epoch": 0.12247719104230025, + "grad_norm": 0.7444797845393983, + "learning_rate": 1.9554934334613096e-05, + "loss": 0.5992, + "step": 7088 + }, + { + "epoch": 0.12249447055570915, + "grad_norm": 1.2930393845700998, + "learning_rate": 1.9554769216114534e-05, + "loss": 0.7662, + "step": 7089 + }, + { + "epoch": 0.12251175006911805, + "grad_norm": 0.5857779307863226, + "learning_rate": 1.9554604067689715e-05, + "loss": 0.4337, + "step": 7090 + }, + { + "epoch": 0.12252902958252695, + "grad_norm": 0.9939267330315549, + "learning_rate": 1.9554438889339144e-05, + "loss": 0.6754, + "step": 7091 + }, + { + "epoch": 0.12254630909593586, + "grad_norm": 0.6162860718751744, + "learning_rate": 1.9554273681063346e-05, + "loss": 0.4439, + "step": 7092 + }, + { + "epoch": 0.12256358860934476, + "grad_norm": 0.9520072036850423, + "learning_rate": 1.9554108442862836e-05, + "loss": 0.7013, + "step": 7093 + }, + { + "epoch": 0.12258086812275366, + "grad_norm": 0.8283736296633443, + "learning_rate": 1.9553943174738134e-05, + "loss": 0.5723, + "step": 7094 + }, + { + "epoch": 0.12259814763616257, + "grad_norm": 1.021338306414858, + "learning_rate": 1.9553777876689756e-05, + "loss": 0.7279, + "step": 7095 + }, + { + "epoch": 0.12261542714957147, + "grad_norm": 0.9619468244346484, + "learning_rate": 1.9553612548718218e-05, + "loss": 0.8012, + "step": 7096 + }, + { + "epoch": 0.12263270666298037, + "grad_norm": 1.3044093194415576, + "learning_rate": 1.955344719082404e-05, + "loss": 0.7886, + "step": 7097 + }, + { + "epoch": 0.12264998617638927, + "grad_norm": 0.9446259436902579, + "learning_rate": 1.9553281803007738e-05, + "loss": 0.6391, + "step": 7098 + }, + { + "epoch": 0.12266726568979817, + "grad_norm": 0.9093079546756992, + "learning_rate": 1.9553116385269834e-05, + "loss": 0.8187, + "step": 7099 + }, + { + "epoch": 0.12268454520320708, + "grad_norm": 0.8230267559803717, + "learning_rate": 1.9552950937610843e-05, + "loss": 0.7285, + "step": 7100 + }, + { + "epoch": 0.12270182471661598, + "grad_norm": 0.8724710732366774, + "learning_rate": 1.955278546003128e-05, + "loss": 0.5332, + "step": 7101 + }, + { + "epoch": 0.12271910423002488, + "grad_norm": 0.8847667615969708, + "learning_rate": 1.9552619952531672e-05, + "loss": 0.4779, + "step": 7102 + }, + { + "epoch": 0.12273638374343379, + "grad_norm": 0.9129871593501498, + "learning_rate": 1.9552454415112528e-05, + "loss": 0.6602, + "step": 7103 + }, + { + "epoch": 0.12275366325684268, + "grad_norm": 0.9216052228474493, + "learning_rate": 1.9552288847774375e-05, + "loss": 0.7327, + "step": 7104 + }, + { + "epoch": 0.12277094277025159, + "grad_norm": 0.8032643698269702, + "learning_rate": 1.9552123250517724e-05, + "loss": 0.6937, + "step": 7105 + }, + { + "epoch": 0.12278822228366049, + "grad_norm": 0.9871150004859921, + "learning_rate": 1.9551957623343097e-05, + "loss": 0.5582, + "step": 7106 + }, + { + "epoch": 0.1228055017970694, + "grad_norm": 0.9531775065710745, + "learning_rate": 1.9551791966251015e-05, + "loss": 0.6906, + "step": 7107 + }, + { + "epoch": 0.1228227813104783, + "grad_norm": 0.5547399579653398, + "learning_rate": 1.9551626279241994e-05, + "loss": 0.475, + "step": 7108 + }, + { + "epoch": 0.1228400608238872, + "grad_norm": 0.9305341185121386, + "learning_rate": 1.9551460562316555e-05, + "loss": 0.6537, + "step": 7109 + }, + { + "epoch": 0.1228573403372961, + "grad_norm": 0.731827498903062, + "learning_rate": 1.9551294815475214e-05, + "loss": 0.7444, + "step": 7110 + }, + { + "epoch": 0.12287461985070501, + "grad_norm": 0.8954551711866278, + "learning_rate": 1.9551129038718492e-05, + "loss": 0.8184, + "step": 7111 + }, + { + "epoch": 0.1228918993641139, + "grad_norm": 0.9321789524741462, + "learning_rate": 1.955096323204691e-05, + "loss": 0.7274, + "step": 7112 + }, + { + "epoch": 0.12290917887752281, + "grad_norm": 1.24533683133513, + "learning_rate": 1.955079739546098e-05, + "loss": 0.8644, + "step": 7113 + }, + { + "epoch": 0.1229264583909317, + "grad_norm": 0.5384355805420514, + "learning_rate": 1.955063152896123e-05, + "loss": 0.7773, + "step": 7114 + }, + { + "epoch": 0.12294373790434061, + "grad_norm": 0.7532472798986996, + "learning_rate": 1.955046563254818e-05, + "loss": 0.7335, + "step": 7115 + }, + { + "epoch": 0.12296101741774952, + "grad_norm": 0.45319350008292586, + "learning_rate": 1.955029970622234e-05, + "loss": 0.5463, + "step": 7116 + }, + { + "epoch": 0.12297829693115842, + "grad_norm": 0.8012547326824162, + "learning_rate": 1.9550133749984237e-05, + "loss": 0.6204, + "step": 7117 + }, + { + "epoch": 0.12299557644456732, + "grad_norm": 0.754858743229601, + "learning_rate": 1.954996776383439e-05, + "loss": 0.4253, + "step": 7118 + }, + { + "epoch": 0.12301285595797622, + "grad_norm": 1.0635876583022101, + "learning_rate": 1.9549801747773317e-05, + "loss": 0.4731, + "step": 7119 + }, + { + "epoch": 0.12303013547138512, + "grad_norm": 1.0815559976932911, + "learning_rate": 1.954963570180154e-05, + "loss": 0.8962, + "step": 7120 + }, + { + "epoch": 0.12304741498479403, + "grad_norm": 0.9660476006793718, + "learning_rate": 1.954946962591958e-05, + "loss": 0.8229, + "step": 7121 + }, + { + "epoch": 0.12306469449820293, + "grad_norm": 0.8032452811489698, + "learning_rate": 1.9549303520127955e-05, + "loss": 0.7095, + "step": 7122 + }, + { + "epoch": 0.12308197401161183, + "grad_norm": 1.174106803481351, + "learning_rate": 1.954913738442718e-05, + "loss": 0.6684, + "step": 7123 + }, + { + "epoch": 0.12309925352502074, + "grad_norm": 1.0756937844314136, + "learning_rate": 1.9548971218817788e-05, + "loss": 0.5608, + "step": 7124 + }, + { + "epoch": 0.12311653303842963, + "grad_norm": 0.8075283515879813, + "learning_rate": 1.954880502330029e-05, + "loss": 0.465, + "step": 7125 + }, + { + "epoch": 0.12313381255183854, + "grad_norm": 0.7941711612098693, + "learning_rate": 1.954863879787521e-05, + "loss": 0.5745, + "step": 7126 + }, + { + "epoch": 0.12315109206524744, + "grad_norm": 1.0408285043779522, + "learning_rate": 1.9548472542543066e-05, + "loss": 0.6137, + "step": 7127 + }, + { + "epoch": 0.12316837157865634, + "grad_norm": 0.9015326337251585, + "learning_rate": 1.954830625730438e-05, + "loss": 0.9133, + "step": 7128 + }, + { + "epoch": 0.12318565109206525, + "grad_norm": 0.8597838039091805, + "learning_rate": 1.9548139942159674e-05, + "loss": 0.703, + "step": 7129 + }, + { + "epoch": 0.12320293060547415, + "grad_norm": 0.8540178828890423, + "learning_rate": 1.954797359710947e-05, + "loss": 0.5781, + "step": 7130 + }, + { + "epoch": 0.12322021011888305, + "grad_norm": 1.1784298067438683, + "learning_rate": 1.9547807222154284e-05, + "loss": 0.7985, + "step": 7131 + }, + { + "epoch": 0.12323748963229196, + "grad_norm": 0.8028509188106919, + "learning_rate": 1.9547640817294642e-05, + "loss": 0.4655, + "step": 7132 + }, + { + "epoch": 0.12325476914570085, + "grad_norm": 1.0606707182777009, + "learning_rate": 1.954747438253106e-05, + "loss": 0.5581, + "step": 7133 + }, + { + "epoch": 0.12327204865910976, + "grad_norm": 0.7445593737819133, + "learning_rate": 1.9547307917864066e-05, + "loss": 0.4792, + "step": 7134 + }, + { + "epoch": 0.12328932817251866, + "grad_norm": 0.8648218848810535, + "learning_rate": 1.954714142329418e-05, + "loss": 0.7586, + "step": 7135 + }, + { + "epoch": 0.12330660768592756, + "grad_norm": 0.9684325853334428, + "learning_rate": 1.9546974898821917e-05, + "loss": 0.6899, + "step": 7136 + }, + { + "epoch": 0.12332388719933647, + "grad_norm": 0.8789990182917556, + "learning_rate": 1.9546808344447804e-05, + "loss": 0.6796, + "step": 7137 + }, + { + "epoch": 0.12334116671274536, + "grad_norm": 1.0217042876384341, + "learning_rate": 1.9546641760172364e-05, + "loss": 0.9202, + "step": 7138 + }, + { + "epoch": 0.12335844622615427, + "grad_norm": 0.9269411671310539, + "learning_rate": 1.9546475145996115e-05, + "loss": 0.8015, + "step": 7139 + }, + { + "epoch": 0.12337572573956318, + "grad_norm": 0.8729369650865482, + "learning_rate": 1.9546308501919583e-05, + "loss": 0.5959, + "step": 7140 + }, + { + "epoch": 0.12339300525297207, + "grad_norm": 1.0212507174794998, + "learning_rate": 1.9546141827943282e-05, + "loss": 0.6689, + "step": 7141 + }, + { + "epoch": 0.12341028476638098, + "grad_norm": 1.1824713686345094, + "learning_rate": 1.9545975124067742e-05, + "loss": 0.8663, + "step": 7142 + }, + { + "epoch": 0.12342756427978988, + "grad_norm": 0.798231155268612, + "learning_rate": 1.9545808390293484e-05, + "loss": 0.6104, + "step": 7143 + }, + { + "epoch": 0.12344484379319878, + "grad_norm": 1.3839343206820016, + "learning_rate": 1.9545641626621026e-05, + "loss": 0.6671, + "step": 7144 + }, + { + "epoch": 0.12346212330660769, + "grad_norm": 1.0588866249842765, + "learning_rate": 1.9545474833050893e-05, + "loss": 0.6476, + "step": 7145 + }, + { + "epoch": 0.12347940282001658, + "grad_norm": 1.0889632009972707, + "learning_rate": 1.954530800958361e-05, + "loss": 0.7987, + "step": 7146 + }, + { + "epoch": 0.12349668233342549, + "grad_norm": 0.8301433825911475, + "learning_rate": 1.9545141156219695e-05, + "loss": 0.6216, + "step": 7147 + }, + { + "epoch": 0.1235139618468344, + "grad_norm": 0.787439724241921, + "learning_rate": 1.9544974272959673e-05, + "loss": 0.3942, + "step": 7148 + }, + { + "epoch": 0.1235312413602433, + "grad_norm": 0.993642418961941, + "learning_rate": 1.9544807359804067e-05, + "loss": 0.5967, + "step": 7149 + }, + { + "epoch": 0.1235485208736522, + "grad_norm": 1.235863640665552, + "learning_rate": 1.95446404167534e-05, + "loss": 0.7324, + "step": 7150 + }, + { + "epoch": 0.1235658003870611, + "grad_norm": 1.0176915282754886, + "learning_rate": 1.954447344380819e-05, + "loss": 0.7707, + "step": 7151 + }, + { + "epoch": 0.12358307990047, + "grad_norm": 0.7829878572644579, + "learning_rate": 1.9544306440968966e-05, + "loss": 0.6552, + "step": 7152 + }, + { + "epoch": 0.12360035941387891, + "grad_norm": 0.9155755086650648, + "learning_rate": 1.9544139408236248e-05, + "loss": 0.8617, + "step": 7153 + }, + { + "epoch": 0.1236176389272878, + "grad_norm": 1.0177869312366052, + "learning_rate": 1.9543972345610562e-05, + "loss": 0.8703, + "step": 7154 + }, + { + "epoch": 0.12363491844069671, + "grad_norm": 0.6441949112053115, + "learning_rate": 1.9543805253092426e-05, + "loss": 0.4055, + "step": 7155 + }, + { + "epoch": 0.12365219795410562, + "grad_norm": 1.017149441906296, + "learning_rate": 1.954363813068237e-05, + "loss": 0.7027, + "step": 7156 + }, + { + "epoch": 0.12366947746751451, + "grad_norm": 1.050071408264003, + "learning_rate": 1.9543470978380915e-05, + "loss": 0.7744, + "step": 7157 + }, + { + "epoch": 0.12368675698092342, + "grad_norm": 0.9227329014924043, + "learning_rate": 1.954330379618858e-05, + "loss": 0.6788, + "step": 7158 + }, + { + "epoch": 0.12370403649433231, + "grad_norm": 0.781637525902039, + "learning_rate": 1.954313658410589e-05, + "loss": 0.6379, + "step": 7159 + }, + { + "epoch": 0.12372131600774122, + "grad_norm": 0.817307756130623, + "learning_rate": 1.9542969342133376e-05, + "loss": 0.5698, + "step": 7160 + }, + { + "epoch": 0.12373859552115013, + "grad_norm": 0.711498214710959, + "learning_rate": 1.954280207027156e-05, + "loss": 0.5356, + "step": 7161 + }, + { + "epoch": 0.12375587503455902, + "grad_norm": 0.6318648916697187, + "learning_rate": 1.9542634768520953e-05, + "loss": 0.3359, + "step": 7162 + }, + { + "epoch": 0.12377315454796793, + "grad_norm": 1.1496563154939379, + "learning_rate": 1.9542467436882096e-05, + "loss": 0.479, + "step": 7163 + }, + { + "epoch": 0.12379043406137683, + "grad_norm": 1.2851115421325336, + "learning_rate": 1.95423000753555e-05, + "loss": 0.8946, + "step": 7164 + }, + { + "epoch": 0.12380771357478573, + "grad_norm": 0.9731103145978985, + "learning_rate": 1.95421326839417e-05, + "loss": 0.4058, + "step": 7165 + }, + { + "epoch": 0.12382499308819464, + "grad_norm": 1.47718566041892, + "learning_rate": 1.9541965262641215e-05, + "loss": 0.8476, + "step": 7166 + }, + { + "epoch": 0.12384227260160353, + "grad_norm": 1.2250034762753066, + "learning_rate": 1.9541797811454567e-05, + "loss": 0.7795, + "step": 7167 + }, + { + "epoch": 0.12385955211501244, + "grad_norm": 0.8831860268945718, + "learning_rate": 1.9541630330382285e-05, + "loss": 0.8316, + "step": 7168 + }, + { + "epoch": 0.12387683162842135, + "grad_norm": 0.823898748607358, + "learning_rate": 1.9541462819424893e-05, + "loss": 0.5778, + "step": 7169 + }, + { + "epoch": 0.12389411114183024, + "grad_norm": 1.4383162287553093, + "learning_rate": 1.954129527858291e-05, + "loss": 0.6462, + "step": 7170 + }, + { + "epoch": 0.12391139065523915, + "grad_norm": 1.0507701408939427, + "learning_rate": 1.9541127707856868e-05, + "loss": 0.6129, + "step": 7171 + }, + { + "epoch": 0.12392867016864804, + "grad_norm": 1.39984038613333, + "learning_rate": 1.9540960107247292e-05, + "loss": 0.7351, + "step": 7172 + }, + { + "epoch": 0.12394594968205695, + "grad_norm": 1.1951514219518882, + "learning_rate": 1.95407924767547e-05, + "loss": 0.705, + "step": 7173 + }, + { + "epoch": 0.12396322919546586, + "grad_norm": 0.9325134727014849, + "learning_rate": 1.954062481637962e-05, + "loss": 0.5284, + "step": 7174 + }, + { + "epoch": 0.12398050870887475, + "grad_norm": 0.917327523260069, + "learning_rate": 1.954045712612258e-05, + "loss": 0.7643, + "step": 7175 + }, + { + "epoch": 0.12399778822228366, + "grad_norm": 0.8898399176230455, + "learning_rate": 1.95402894059841e-05, + "loss": 0.7727, + "step": 7176 + }, + { + "epoch": 0.12401506773569257, + "grad_norm": 1.0255828364594093, + "learning_rate": 1.9540121655964714e-05, + "loss": 0.8659, + "step": 7177 + }, + { + "epoch": 0.12403234724910146, + "grad_norm": 0.7043736912585918, + "learning_rate": 1.9539953876064938e-05, + "loss": 0.5538, + "step": 7178 + }, + { + "epoch": 0.12404962676251037, + "grad_norm": 0.9061233726437335, + "learning_rate": 1.9539786066285302e-05, + "loss": 0.8264, + "step": 7179 + }, + { + "epoch": 0.12406690627591926, + "grad_norm": 0.8598531173853493, + "learning_rate": 1.9539618226626332e-05, + "loss": 0.5386, + "step": 7180 + }, + { + "epoch": 0.12408418578932817, + "grad_norm": 0.832996929702209, + "learning_rate": 1.9539450357088553e-05, + "loss": 0.6001, + "step": 7181 + }, + { + "epoch": 0.12410146530273708, + "grad_norm": 0.4605726293064703, + "learning_rate": 1.953928245767249e-05, + "loss": 0.5712, + "step": 7182 + }, + { + "epoch": 0.12411874481614597, + "grad_norm": 0.9466876978735339, + "learning_rate": 1.9539114528378667e-05, + "loss": 0.7832, + "step": 7183 + }, + { + "epoch": 0.12413602432955488, + "grad_norm": 0.9370261702711405, + "learning_rate": 1.9538946569207614e-05, + "loss": 0.6384, + "step": 7184 + }, + { + "epoch": 0.12415330384296379, + "grad_norm": 0.7768364842411946, + "learning_rate": 1.953877858015986e-05, + "loss": 0.6709, + "step": 7185 + }, + { + "epoch": 0.12417058335637268, + "grad_norm": 1.0684410712140824, + "learning_rate": 1.953861056123592e-05, + "loss": 0.7332, + "step": 7186 + }, + { + "epoch": 0.12418786286978159, + "grad_norm": 1.0868438000046612, + "learning_rate": 1.953844251243633e-05, + "loss": 0.8188, + "step": 7187 + }, + { + "epoch": 0.12420514238319048, + "grad_norm": 1.0631005593345444, + "learning_rate": 1.953827443376161e-05, + "loss": 0.8402, + "step": 7188 + }, + { + "epoch": 0.12422242189659939, + "grad_norm": 1.1118067285726456, + "learning_rate": 1.953810632521229e-05, + "loss": 0.7972, + "step": 7189 + }, + { + "epoch": 0.1242397014100083, + "grad_norm": 0.9109046170464941, + "learning_rate": 1.95379381867889e-05, + "loss": 0.5832, + "step": 7190 + }, + { + "epoch": 0.12425698092341719, + "grad_norm": 1.0114855026003398, + "learning_rate": 1.953777001849196e-05, + "loss": 0.736, + "step": 7191 + }, + { + "epoch": 0.1242742604368261, + "grad_norm": 0.9933233220828962, + "learning_rate": 1.9537601820322e-05, + "loss": 0.5381, + "step": 7192 + }, + { + "epoch": 0.12429153995023501, + "grad_norm": 0.6564525771441162, + "learning_rate": 1.953743359227955e-05, + "loss": 0.8596, + "step": 7193 + }, + { + "epoch": 0.1243088194636439, + "grad_norm": 1.0034970991979462, + "learning_rate": 1.953726533436513e-05, + "loss": 0.4897, + "step": 7194 + }, + { + "epoch": 0.12432609897705281, + "grad_norm": 1.095901932267099, + "learning_rate": 1.9537097046579268e-05, + "loss": 0.6718, + "step": 7195 + }, + { + "epoch": 0.1243433784904617, + "grad_norm": 0.4607528102331428, + "learning_rate": 1.9536928728922496e-05, + "loss": 0.6602, + "step": 7196 + }, + { + "epoch": 0.12436065800387061, + "grad_norm": 0.8827948773620007, + "learning_rate": 1.9536760381395337e-05, + "loss": 0.5518, + "step": 7197 + }, + { + "epoch": 0.12437793751727952, + "grad_norm": 0.94630513799427, + "learning_rate": 1.953659200399832e-05, + "loss": 0.6198, + "step": 7198 + }, + { + "epoch": 0.12439521703068841, + "grad_norm": 0.7641945246388051, + "learning_rate": 1.953642359673197e-05, + "loss": 0.9242, + "step": 7199 + }, + { + "epoch": 0.12441249654409732, + "grad_norm": 1.0169520661035354, + "learning_rate": 1.953625515959682e-05, + "loss": 0.7585, + "step": 7200 + }, + { + "epoch": 0.12442977605750621, + "grad_norm": 0.7884334673993114, + "learning_rate": 1.9536086692593395e-05, + "loss": 0.5452, + "step": 7201 + }, + { + "epoch": 0.12444705557091512, + "grad_norm": 0.7616820538640502, + "learning_rate": 1.9535918195722222e-05, + "loss": 0.5925, + "step": 7202 + }, + { + "epoch": 0.12446433508432403, + "grad_norm": 0.9851898598117284, + "learning_rate": 1.9535749668983824e-05, + "loss": 0.6557, + "step": 7203 + }, + { + "epoch": 0.12448161459773292, + "grad_norm": 1.0331511433168183, + "learning_rate": 1.9535581112378735e-05, + "loss": 0.7126, + "step": 7204 + }, + { + "epoch": 0.12449889411114183, + "grad_norm": 0.7650444642130408, + "learning_rate": 1.9535412525907485e-05, + "loss": 0.6834, + "step": 7205 + }, + { + "epoch": 0.12451617362455074, + "grad_norm": 0.9320865599262036, + "learning_rate": 1.9535243909570592e-05, + "loss": 0.8955, + "step": 7206 + }, + { + "epoch": 0.12453345313795963, + "grad_norm": 0.8382250054518124, + "learning_rate": 1.9535075263368595e-05, + "loss": 0.6647, + "step": 7207 + }, + { + "epoch": 0.12455073265136854, + "grad_norm": 0.7327642209282759, + "learning_rate": 1.9534906587302015e-05, + "loss": 0.4649, + "step": 7208 + }, + { + "epoch": 0.12456801216477743, + "grad_norm": 0.9455930615282958, + "learning_rate": 1.9534737881371388e-05, + "loss": 0.699, + "step": 7209 + }, + { + "epoch": 0.12458529167818634, + "grad_norm": 1.157908655282555, + "learning_rate": 1.953456914557723e-05, + "loss": 0.7307, + "step": 7210 + }, + { + "epoch": 0.12460257119159525, + "grad_norm": 1.321753075581483, + "learning_rate": 1.953440037992008e-05, + "loss": 0.852, + "step": 7211 + }, + { + "epoch": 0.12461985070500414, + "grad_norm": 0.9658648496789647, + "learning_rate": 1.9534231584400466e-05, + "loss": 0.6126, + "step": 7212 + }, + { + "epoch": 0.12463713021841305, + "grad_norm": 1.2290369852121594, + "learning_rate": 1.9534062759018912e-05, + "loss": 0.8775, + "step": 7213 + }, + { + "epoch": 0.12465440973182196, + "grad_norm": 1.3209527968507988, + "learning_rate": 1.953389390377595e-05, + "loss": 0.7591, + "step": 7214 + }, + { + "epoch": 0.12467168924523085, + "grad_norm": 0.8945967778868392, + "learning_rate": 1.9533725018672103e-05, + "loss": 0.6345, + "step": 7215 + }, + { + "epoch": 0.12468896875863976, + "grad_norm": 1.092838174762863, + "learning_rate": 1.953355610370791e-05, + "loss": 0.6992, + "step": 7216 + }, + { + "epoch": 0.12470624827204865, + "grad_norm": 0.7374869176844366, + "learning_rate": 1.9533387158883895e-05, + "loss": 0.6475, + "step": 7217 + }, + { + "epoch": 0.12472352778545756, + "grad_norm": 0.9120212246252722, + "learning_rate": 1.9533218184200583e-05, + "loss": 0.7931, + "step": 7218 + }, + { + "epoch": 0.12474080729886647, + "grad_norm": 0.9519632799927817, + "learning_rate": 1.953304917965851e-05, + "loss": 0.7939, + "step": 7219 + }, + { + "epoch": 0.12475808681227536, + "grad_norm": 0.8179070451570178, + "learning_rate": 1.95328801452582e-05, + "loss": 0.6599, + "step": 7220 + }, + { + "epoch": 0.12477536632568427, + "grad_norm": 0.9935453698035239, + "learning_rate": 1.953271108100019e-05, + "loss": 0.6209, + "step": 7221 + }, + { + "epoch": 0.12479264583909318, + "grad_norm": 0.884747677139, + "learning_rate": 1.9532541986884997e-05, + "loss": 0.5852, + "step": 7222 + }, + { + "epoch": 0.12480992535250207, + "grad_norm": 0.6812470284062572, + "learning_rate": 1.953237286291316e-05, + "loss": 0.5536, + "step": 7223 + }, + { + "epoch": 0.12482720486591098, + "grad_norm": 0.7105792802138811, + "learning_rate": 1.9532203709085213e-05, + "loss": 0.8089, + "step": 7224 + }, + { + "epoch": 0.12484448437931987, + "grad_norm": 1.0915380191856787, + "learning_rate": 1.9532034525401674e-05, + "loss": 0.7964, + "step": 7225 + }, + { + "epoch": 0.12486176389272878, + "grad_norm": 0.6653870076157902, + "learning_rate": 1.953186531186308e-05, + "loss": 0.5585, + "step": 7226 + }, + { + "epoch": 0.12487904340613769, + "grad_norm": 0.5092000543746554, + "learning_rate": 1.953169606846996e-05, + "loss": 0.9514, + "step": 7227 + }, + { + "epoch": 0.12489632291954658, + "grad_norm": 1.0816788683593854, + "learning_rate": 1.953152679522284e-05, + "loss": 0.8098, + "step": 7228 + }, + { + "epoch": 0.12491360243295549, + "grad_norm": 0.6546325395969737, + "learning_rate": 1.953135749212226e-05, + "loss": 0.5179, + "step": 7229 + }, + { + "epoch": 0.1249308819463644, + "grad_norm": 0.9765044769022209, + "learning_rate": 1.953118815916874e-05, + "loss": 0.788, + "step": 7230 + }, + { + "epoch": 0.12494816145977329, + "grad_norm": 0.908502726040939, + "learning_rate": 1.9531018796362815e-05, + "loss": 0.5707, + "step": 7231 + }, + { + "epoch": 0.1249654409731822, + "grad_norm": 1.1293494467290197, + "learning_rate": 1.9530849403705013e-05, + "loss": 0.7306, + "step": 7232 + }, + { + "epoch": 0.12498272048659109, + "grad_norm": 1.3269401403475318, + "learning_rate": 1.953067998119587e-05, + "loss": 0.6167, + "step": 7233 + }, + { + "epoch": 0.125, + "grad_norm": 0.9434225947258603, + "learning_rate": 1.953051052883591e-05, + "loss": 0.6524, + "step": 7234 + }, + { + "epoch": 0.1250172795134089, + "grad_norm": 1.3084771638907053, + "learning_rate": 1.9530341046625668e-05, + "loss": 0.6482, + "step": 7235 + }, + { + "epoch": 0.12503455902681782, + "grad_norm": 1.0271265955333269, + "learning_rate": 1.9530171534565673e-05, + "loss": 0.7465, + "step": 7236 + }, + { + "epoch": 0.1250518385402267, + "grad_norm": 0.9415220318131761, + "learning_rate": 1.9530001992656458e-05, + "loss": 0.584, + "step": 7237 + }, + { + "epoch": 0.1250691180536356, + "grad_norm": 0.8572613200384611, + "learning_rate": 1.952983242089855e-05, + "loss": 0.6436, + "step": 7238 + }, + { + "epoch": 0.1250863975670445, + "grad_norm": 0.5290455714254048, + "learning_rate": 1.9529662819292482e-05, + "loss": 0.7419, + "step": 7239 + }, + { + "epoch": 0.12510367708045342, + "grad_norm": 0.8030640094140956, + "learning_rate": 1.952949318783879e-05, + "loss": 0.7951, + "step": 7240 + }, + { + "epoch": 0.12512095659386233, + "grad_norm": 1.110663581416493, + "learning_rate": 1.9529323526537997e-05, + "loss": 0.8504, + "step": 7241 + }, + { + "epoch": 0.1251382361072712, + "grad_norm": 0.8555013420701323, + "learning_rate": 1.9529153835390643e-05, + "loss": 0.6818, + "step": 7242 + }, + { + "epoch": 0.1251555156206801, + "grad_norm": 0.7477087613263894, + "learning_rate": 1.952898411439725e-05, + "loss": 0.5734, + "step": 7243 + }, + { + "epoch": 0.12517279513408902, + "grad_norm": 0.9701042714994187, + "learning_rate": 1.9528814363558355e-05, + "loss": 0.6681, + "step": 7244 + }, + { + "epoch": 0.12519007464749793, + "grad_norm": 0.8323684983891856, + "learning_rate": 1.9528644582874493e-05, + "loss": 0.5466, + "step": 7245 + }, + { + "epoch": 0.12520735416090684, + "grad_norm": 0.7472548059278846, + "learning_rate": 1.952847477234619e-05, + "loss": 0.7539, + "step": 7246 + }, + { + "epoch": 0.12522463367431574, + "grad_norm": 0.8352019714295096, + "learning_rate": 1.952830493197398e-05, + "loss": 0.6849, + "step": 7247 + }, + { + "epoch": 0.12524191318772462, + "grad_norm": 0.6644169027971014, + "learning_rate": 1.9528135061758395e-05, + "loss": 0.4725, + "step": 7248 + }, + { + "epoch": 0.12525919270113353, + "grad_norm": 0.42951637050262353, + "learning_rate": 1.952796516169997e-05, + "loss": 0.7044, + "step": 7249 + }, + { + "epoch": 0.12527647221454244, + "grad_norm": 0.8170695113859819, + "learning_rate": 1.9527795231799225e-05, + "loss": 0.8057, + "step": 7250 + }, + { + "epoch": 0.12529375172795135, + "grad_norm": 0.8947835538651263, + "learning_rate": 1.952762527205671e-05, + "loss": 0.6881, + "step": 7251 + }, + { + "epoch": 0.12531103124136025, + "grad_norm": 0.8454199010588107, + "learning_rate": 1.9527455282472948e-05, + "loss": 0.6449, + "step": 7252 + }, + { + "epoch": 0.12532831075476913, + "grad_norm": 1.001753232339178, + "learning_rate": 1.9527285263048468e-05, + "loss": 0.7529, + "step": 7253 + }, + { + "epoch": 0.12534559026817804, + "grad_norm": 0.9819335235779226, + "learning_rate": 1.9527115213783807e-05, + "loss": 0.6933, + "step": 7254 + }, + { + "epoch": 0.12536286978158695, + "grad_norm": 1.1315694746395997, + "learning_rate": 1.9526945134679496e-05, + "loss": 0.6679, + "step": 7255 + }, + { + "epoch": 0.12538014929499586, + "grad_norm": 1.07337336960256, + "learning_rate": 1.9526775025736072e-05, + "loss": 0.766, + "step": 7256 + }, + { + "epoch": 0.12539742880840477, + "grad_norm": 0.5424334858504638, + "learning_rate": 1.9526604886954062e-05, + "loss": 0.3905, + "step": 7257 + }, + { + "epoch": 0.12541470832181364, + "grad_norm": 1.0831292338778253, + "learning_rate": 1.9526434718334005e-05, + "loss": 0.6854, + "step": 7258 + }, + { + "epoch": 0.12543198783522255, + "grad_norm": 0.7666481745336875, + "learning_rate": 1.952626451987643e-05, + "loss": 0.4686, + "step": 7259 + }, + { + "epoch": 0.12544926734863146, + "grad_norm": 0.9065182616362603, + "learning_rate": 1.9526094291581866e-05, + "loss": 0.8265, + "step": 7260 + }, + { + "epoch": 0.12546654686204037, + "grad_norm": 0.7137003917901162, + "learning_rate": 1.9525924033450857e-05, + "loss": 0.3975, + "step": 7261 + }, + { + "epoch": 0.12548382637544928, + "grad_norm": 0.6740086328548747, + "learning_rate": 1.9525753745483926e-05, + "loss": 0.6037, + "step": 7262 + }, + { + "epoch": 0.12550110588885818, + "grad_norm": 1.220402166329426, + "learning_rate": 1.952558342768161e-05, + "loss": 0.6392, + "step": 7263 + }, + { + "epoch": 0.12551838540226706, + "grad_norm": 0.8374381157739855, + "learning_rate": 1.9525413080044444e-05, + "loss": 0.8124, + "step": 7264 + }, + { + "epoch": 0.12553566491567597, + "grad_norm": 0.8476613160769415, + "learning_rate": 1.952524270257296e-05, + "loss": 0.463, + "step": 7265 + }, + { + "epoch": 0.12555294442908488, + "grad_norm": 0.7000314819697305, + "learning_rate": 1.9525072295267697e-05, + "loss": 0.7792, + "step": 7266 + }, + { + "epoch": 0.12557022394249379, + "grad_norm": 0.9657280074000277, + "learning_rate": 1.9524901858129177e-05, + "loss": 0.7478, + "step": 7267 + }, + { + "epoch": 0.1255875034559027, + "grad_norm": 1.0096189310397785, + "learning_rate": 1.952473139115794e-05, + "loss": 0.5984, + "step": 7268 + }, + { + "epoch": 0.12560478296931157, + "grad_norm": 0.9417663168815148, + "learning_rate": 1.9524560894354523e-05, + "loss": 0.8188, + "step": 7269 + }, + { + "epoch": 0.12562206248272048, + "grad_norm": 1.1520720988569098, + "learning_rate": 1.952439036771946e-05, + "loss": 0.6338, + "step": 7270 + }, + { + "epoch": 0.1256393419961294, + "grad_norm": 0.7875203325293731, + "learning_rate": 1.952421981125328e-05, + "loss": 0.5838, + "step": 7271 + }, + { + "epoch": 0.1256566215095383, + "grad_norm": 1.1734588343286643, + "learning_rate": 1.952404922495652e-05, + "loss": 0.8144, + "step": 7272 + }, + { + "epoch": 0.1256739010229472, + "grad_norm": 0.8244344644081915, + "learning_rate": 1.9523878608829713e-05, + "loss": 0.6287, + "step": 7273 + }, + { + "epoch": 0.12569118053635608, + "grad_norm": 0.846960873575706, + "learning_rate": 1.9523707962873397e-05, + "loss": 0.7143, + "step": 7274 + }, + { + "epoch": 0.125708460049765, + "grad_norm": 0.8138757325562208, + "learning_rate": 1.95235372870881e-05, + "loss": 0.7845, + "step": 7275 + }, + { + "epoch": 0.1257257395631739, + "grad_norm": 0.9430793283456307, + "learning_rate": 1.952336658147436e-05, + "loss": 0.7906, + "step": 7276 + }, + { + "epoch": 0.1257430190765828, + "grad_norm": 0.8210353932217082, + "learning_rate": 1.9523195846032716e-05, + "loss": 0.5952, + "step": 7277 + }, + { + "epoch": 0.12576029858999171, + "grad_norm": 1.0884919361959058, + "learning_rate": 1.9523025080763695e-05, + "loss": 0.6959, + "step": 7278 + }, + { + "epoch": 0.1257775781034006, + "grad_norm": 0.8082789511512047, + "learning_rate": 1.952285428566784e-05, + "loss": 0.78, + "step": 7279 + }, + { + "epoch": 0.1257948576168095, + "grad_norm": 0.9846026147591549, + "learning_rate": 1.9522683460745678e-05, + "loss": 0.7337, + "step": 7280 + }, + { + "epoch": 0.1258121371302184, + "grad_norm": 0.9982015219817071, + "learning_rate": 1.9522512605997753e-05, + "loss": 0.7493, + "step": 7281 + }, + { + "epoch": 0.12582941664362732, + "grad_norm": 0.9260032677779727, + "learning_rate": 1.9522341721424588e-05, + "loss": 0.5663, + "step": 7282 + }, + { + "epoch": 0.12584669615703623, + "grad_norm": 1.1924919268442844, + "learning_rate": 1.952217080702673e-05, + "loss": 0.8723, + "step": 7283 + }, + { + "epoch": 0.12586397567044513, + "grad_norm": 0.9129512464739328, + "learning_rate": 1.952199986280471e-05, + "loss": 0.6197, + "step": 7284 + }, + { + "epoch": 0.125881255183854, + "grad_norm": 1.0881352853584214, + "learning_rate": 1.9521828888759057e-05, + "loss": 0.6581, + "step": 7285 + }, + { + "epoch": 0.12589853469726292, + "grad_norm": 0.8086822305116965, + "learning_rate": 1.9521657884890318e-05, + "loss": 0.6164, + "step": 7286 + }, + { + "epoch": 0.12591581421067183, + "grad_norm": 0.995752161737726, + "learning_rate": 1.9521486851199022e-05, + "loss": 0.9642, + "step": 7287 + }, + { + "epoch": 0.12593309372408074, + "grad_norm": 1.1820720005856131, + "learning_rate": 1.9521315787685702e-05, + "loss": 0.9022, + "step": 7288 + }, + { + "epoch": 0.12595037323748964, + "grad_norm": 0.8375699721852603, + "learning_rate": 1.95211446943509e-05, + "loss": 0.5614, + "step": 7289 + }, + { + "epoch": 0.12596765275089852, + "grad_norm": 1.5075876007823064, + "learning_rate": 1.9520973571195148e-05, + "loss": 0.8155, + "step": 7290 + }, + { + "epoch": 0.12598493226430743, + "grad_norm": 1.077465918197904, + "learning_rate": 1.9520802418218984e-05, + "loss": 0.7595, + "step": 7291 + }, + { + "epoch": 0.12600221177771634, + "grad_norm": 0.9425518817661346, + "learning_rate": 1.9520631235422944e-05, + "loss": 0.5439, + "step": 7292 + }, + { + "epoch": 0.12601949129112525, + "grad_norm": 0.9937806238364799, + "learning_rate": 1.9520460022807563e-05, + "loss": 0.7992, + "step": 7293 + }, + { + "epoch": 0.12603677080453415, + "grad_norm": 1.205071885305453, + "learning_rate": 1.9520288780373376e-05, + "loss": 0.4779, + "step": 7294 + }, + { + "epoch": 0.12605405031794303, + "grad_norm": 0.7986292405596939, + "learning_rate": 1.9520117508120923e-05, + "loss": 0.4949, + "step": 7295 + }, + { + "epoch": 0.12607132983135194, + "grad_norm": 0.8390276407105497, + "learning_rate": 1.9519946206050737e-05, + "loss": 0.792, + "step": 7296 + }, + { + "epoch": 0.12608860934476085, + "grad_norm": 0.8521359047040826, + "learning_rate": 1.9519774874163356e-05, + "loss": 0.3885, + "step": 7297 + }, + { + "epoch": 0.12610588885816976, + "grad_norm": 0.6715950797136221, + "learning_rate": 1.951960351245932e-05, + "loss": 0.6057, + "step": 7298 + }, + { + "epoch": 0.12612316837157866, + "grad_norm": 1.2196730803342308, + "learning_rate": 1.951943212093916e-05, + "loss": 0.9329, + "step": 7299 + }, + { + "epoch": 0.12614044788498757, + "grad_norm": 0.7224230226993136, + "learning_rate": 1.9519260699603416e-05, + "loss": 0.522, + "step": 7300 + }, + { + "epoch": 0.12615772739839645, + "grad_norm": 0.891164040584957, + "learning_rate": 1.9519089248452623e-05, + "loss": 0.581, + "step": 7301 + }, + { + "epoch": 0.12617500691180536, + "grad_norm": 1.2087090088491004, + "learning_rate": 1.951891776748732e-05, + "loss": 0.8006, + "step": 7302 + }, + { + "epoch": 0.12619228642521427, + "grad_norm": 1.2331261535075875, + "learning_rate": 1.9518746256708038e-05, + "loss": 0.5614, + "step": 7303 + }, + { + "epoch": 0.12620956593862317, + "grad_norm": 1.12629836345294, + "learning_rate": 1.9518574716115327e-05, + "loss": 0.6954, + "step": 7304 + }, + { + "epoch": 0.12622684545203208, + "grad_norm": 0.96528373624216, + "learning_rate": 1.9518403145709712e-05, + "loss": 0.7824, + "step": 7305 + }, + { + "epoch": 0.12624412496544096, + "grad_norm": 1.1197070186154847, + "learning_rate": 1.9518231545491736e-05, + "loss": 0.4574, + "step": 7306 + }, + { + "epoch": 0.12626140447884987, + "grad_norm": 0.9601626343453907, + "learning_rate": 1.9518059915461937e-05, + "loss": 0.6446, + "step": 7307 + }, + { + "epoch": 0.12627868399225878, + "grad_norm": 0.8681786809469405, + "learning_rate": 1.9517888255620847e-05, + "loss": 0.6051, + "step": 7308 + }, + { + "epoch": 0.12629596350566769, + "grad_norm": 0.7133605060630447, + "learning_rate": 1.9517716565969013e-05, + "loss": 0.4837, + "step": 7309 + }, + { + "epoch": 0.1263132430190766, + "grad_norm": 0.6609992128082628, + "learning_rate": 1.9517544846506965e-05, + "loss": 0.6515, + "step": 7310 + }, + { + "epoch": 0.12633052253248547, + "grad_norm": 1.296414079755841, + "learning_rate": 1.9517373097235242e-05, + "loss": 0.6734, + "step": 7311 + }, + { + "epoch": 0.12634780204589438, + "grad_norm": 1.347706393823481, + "learning_rate": 1.951720131815438e-05, + "loss": 0.6695, + "step": 7312 + }, + { + "epoch": 0.1263650815593033, + "grad_norm": 0.5412493384094643, + "learning_rate": 1.9517029509264928e-05, + "loss": 0.555, + "step": 7313 + }, + { + "epoch": 0.1263823610727122, + "grad_norm": 0.9363937995495403, + "learning_rate": 1.9516857670567414e-05, + "loss": 0.7263, + "step": 7314 + }, + { + "epoch": 0.1263996405861211, + "grad_norm": 0.75686686868655, + "learning_rate": 1.9516685802062377e-05, + "loss": 0.5107, + "step": 7315 + }, + { + "epoch": 0.12641692009952998, + "grad_norm": 0.8879773006846994, + "learning_rate": 1.9516513903750356e-05, + "loss": 0.7232, + "step": 7316 + }, + { + "epoch": 0.1264341996129389, + "grad_norm": 1.2177146137854757, + "learning_rate": 1.951634197563189e-05, + "loss": 0.8198, + "step": 7317 + }, + { + "epoch": 0.1264514791263478, + "grad_norm": 0.9913704244117603, + "learning_rate": 1.9516170017707518e-05, + "loss": 0.9015, + "step": 7318 + }, + { + "epoch": 0.1264687586397567, + "grad_norm": 0.9978097628790547, + "learning_rate": 1.9515998029977782e-05, + "loss": 0.7097, + "step": 7319 + }, + { + "epoch": 0.12648603815316561, + "grad_norm": 1.150541554152204, + "learning_rate": 1.9515826012443214e-05, + "loss": 0.5714, + "step": 7320 + }, + { + "epoch": 0.12650331766657452, + "grad_norm": 0.8143457515111459, + "learning_rate": 1.9515653965104354e-05, + "loss": 0.5655, + "step": 7321 + }, + { + "epoch": 0.1265205971799834, + "grad_norm": 1.0634555741429712, + "learning_rate": 1.9515481887961743e-05, + "loss": 0.7672, + "step": 7322 + }, + { + "epoch": 0.1265378766933923, + "grad_norm": 1.110951591377841, + "learning_rate": 1.951530978101592e-05, + "loss": 0.8005, + "step": 7323 + }, + { + "epoch": 0.12655515620680122, + "grad_norm": 1.152600987117242, + "learning_rate": 1.951513764426743e-05, + "loss": 0.8594, + "step": 7324 + }, + { + "epoch": 0.12657243572021012, + "grad_norm": 0.8768846879361094, + "learning_rate": 1.9514965477716797e-05, + "loss": 0.7333, + "step": 7325 + }, + { + "epoch": 0.12658971523361903, + "grad_norm": 0.827908200501149, + "learning_rate": 1.951479328136457e-05, + "loss": 0.7728, + "step": 7326 + }, + { + "epoch": 0.1266069947470279, + "grad_norm": 1.02167090927598, + "learning_rate": 1.9514621055211293e-05, + "loss": 0.829, + "step": 7327 + }, + { + "epoch": 0.12662427426043682, + "grad_norm": 1.0236433504579516, + "learning_rate": 1.9514448799257496e-05, + "loss": 0.7427, + "step": 7328 + }, + { + "epoch": 0.12664155377384573, + "grad_norm": 0.9954186272804817, + "learning_rate": 1.9514276513503724e-05, + "loss": 0.6879, + "step": 7329 + }, + { + "epoch": 0.12665883328725464, + "grad_norm": 0.865822990563525, + "learning_rate": 1.9514104197950513e-05, + "loss": 0.7428, + "step": 7330 + }, + { + "epoch": 0.12667611280066354, + "grad_norm": 1.0347893102190684, + "learning_rate": 1.9513931852598405e-05, + "loss": 0.8647, + "step": 7331 + }, + { + "epoch": 0.12669339231407242, + "grad_norm": 0.8131717575296948, + "learning_rate": 1.951375947744794e-05, + "loss": 0.5992, + "step": 7332 + }, + { + "epoch": 0.12671067182748133, + "grad_norm": 0.8769963675361556, + "learning_rate": 1.9513587072499657e-05, + "loss": 0.6913, + "step": 7333 + }, + { + "epoch": 0.12672795134089024, + "grad_norm": 0.7909252086324562, + "learning_rate": 1.9513414637754098e-05, + "loss": 0.665, + "step": 7334 + }, + { + "epoch": 0.12674523085429915, + "grad_norm": 0.7110265950021963, + "learning_rate": 1.95132421732118e-05, + "loss": 0.5372, + "step": 7335 + }, + { + "epoch": 0.12676251036770805, + "grad_norm": 0.7734492927942416, + "learning_rate": 1.9513069678873302e-05, + "loss": 0.7579, + "step": 7336 + }, + { + "epoch": 0.12677978988111696, + "grad_norm": 0.8457863891490749, + "learning_rate": 1.951289715473915e-05, + "loss": 0.5197, + "step": 7337 + }, + { + "epoch": 0.12679706939452584, + "grad_norm": 0.921016230514868, + "learning_rate": 1.951272460080988e-05, + "loss": 0.5881, + "step": 7338 + }, + { + "epoch": 0.12681434890793475, + "grad_norm": 1.1047939603254937, + "learning_rate": 1.9512552017086036e-05, + "loss": 0.6041, + "step": 7339 + }, + { + "epoch": 0.12683162842134366, + "grad_norm": 1.0620247765597286, + "learning_rate": 1.9512379403568152e-05, + "loss": 0.408, + "step": 7340 + }, + { + "epoch": 0.12684890793475256, + "grad_norm": 0.69164923172835, + "learning_rate": 1.9512206760256776e-05, + "loss": 0.6725, + "step": 7341 + }, + { + "epoch": 0.12686618744816147, + "grad_norm": 1.1122465095987122, + "learning_rate": 1.9512034087152445e-05, + "loss": 0.7342, + "step": 7342 + }, + { + "epoch": 0.12688346696157035, + "grad_norm": 1.1183003835331038, + "learning_rate": 1.9511861384255696e-05, + "loss": 0.5768, + "step": 7343 + }, + { + "epoch": 0.12690074647497926, + "grad_norm": 0.9588010075404437, + "learning_rate": 1.951168865156708e-05, + "loss": 0.8199, + "step": 7344 + }, + { + "epoch": 0.12691802598838817, + "grad_norm": 1.415575207339034, + "learning_rate": 1.951151588908713e-05, + "loss": 0.9623, + "step": 7345 + }, + { + "epoch": 0.12693530550179707, + "grad_norm": 1.2784243182557935, + "learning_rate": 1.951134309681639e-05, + "loss": 0.7301, + "step": 7346 + }, + { + "epoch": 0.12695258501520598, + "grad_norm": 0.992715138039062, + "learning_rate": 1.95111702747554e-05, + "loss": 0.7221, + "step": 7347 + }, + { + "epoch": 0.12696986452861486, + "grad_norm": 0.9179252509572572, + "learning_rate": 1.9510997422904704e-05, + "loss": 0.7397, + "step": 7348 + }, + { + "epoch": 0.12698714404202377, + "grad_norm": 0.6945021677642609, + "learning_rate": 1.9510824541264836e-05, + "loss": 0.733, + "step": 7349 + }, + { + "epoch": 0.12700442355543268, + "grad_norm": 0.5416816973334376, + "learning_rate": 1.9510651629836348e-05, + "loss": 0.3107, + "step": 7350 + }, + { + "epoch": 0.12702170306884158, + "grad_norm": 0.9797113459152305, + "learning_rate": 1.9510478688619774e-05, + "loss": 0.5944, + "step": 7351 + }, + { + "epoch": 0.1270389825822505, + "grad_norm": 0.5164379135818686, + "learning_rate": 1.951030571761566e-05, + "loss": 0.8023, + "step": 7352 + }, + { + "epoch": 0.12705626209565937, + "grad_norm": 1.1765045268064074, + "learning_rate": 1.951013271682454e-05, + "loss": 0.7178, + "step": 7353 + }, + { + "epoch": 0.12707354160906828, + "grad_norm": 0.9944110555605886, + "learning_rate": 1.950995968624697e-05, + "loss": 0.8184, + "step": 7354 + }, + { + "epoch": 0.1270908211224772, + "grad_norm": 0.7355329360833677, + "learning_rate": 1.9509786625883476e-05, + "loss": 0.7099, + "step": 7355 + }, + { + "epoch": 0.1271081006358861, + "grad_norm": 0.44429504481034027, + "learning_rate": 1.9509613535734613e-05, + "loss": 0.7621, + "step": 7356 + }, + { + "epoch": 0.127125380149295, + "grad_norm": 0.6903072060589873, + "learning_rate": 1.9509440415800914e-05, + "loss": 0.7164, + "step": 7357 + }, + { + "epoch": 0.1271426596627039, + "grad_norm": 0.9456157151145458, + "learning_rate": 1.9509267266082928e-05, + "loss": 0.6198, + "step": 7358 + }, + { + "epoch": 0.1271599391761128, + "grad_norm": 1.16180254104647, + "learning_rate": 1.9509094086581193e-05, + "loss": 0.7747, + "step": 7359 + }, + { + "epoch": 0.1271772186895217, + "grad_norm": 0.6172516270635396, + "learning_rate": 1.9508920877296252e-05, + "loss": 0.5023, + "step": 7360 + }, + { + "epoch": 0.1271944982029306, + "grad_norm": 0.9684892955277853, + "learning_rate": 1.950874763822865e-05, + "loss": 0.5736, + "step": 7361 + }, + { + "epoch": 0.1272117777163395, + "grad_norm": 0.7127510528797789, + "learning_rate": 1.9508574369378922e-05, + "loss": 0.5759, + "step": 7362 + }, + { + "epoch": 0.12722905722974842, + "grad_norm": 0.8699368334850216, + "learning_rate": 1.950840107074762e-05, + "loss": 0.4595, + "step": 7363 + }, + { + "epoch": 0.1272463367431573, + "grad_norm": 1.4027625986385563, + "learning_rate": 1.9508227742335285e-05, + "loss": 0.5544, + "step": 7364 + }, + { + "epoch": 0.1272636162565662, + "grad_norm": 1.0965239442897272, + "learning_rate": 1.950805438414246e-05, + "loss": 0.7904, + "step": 7365 + }, + { + "epoch": 0.12728089576997512, + "grad_norm": 0.7868136183116377, + "learning_rate": 1.950788099616968e-05, + "loss": 0.6359, + "step": 7366 + }, + { + "epoch": 0.12729817528338402, + "grad_norm": 1.1244773661348657, + "learning_rate": 1.9507707578417497e-05, + "loss": 0.8025, + "step": 7367 + }, + { + "epoch": 0.12731545479679293, + "grad_norm": 0.8583286401577137, + "learning_rate": 1.9507534130886454e-05, + "loss": 0.5683, + "step": 7368 + }, + { + "epoch": 0.1273327343102018, + "grad_norm": 1.0168014098148677, + "learning_rate": 1.9507360653577086e-05, + "loss": 0.4613, + "step": 7369 + }, + { + "epoch": 0.12735001382361072, + "grad_norm": 0.47082648515518366, + "learning_rate": 1.9507187146489947e-05, + "loss": 0.5873, + "step": 7370 + }, + { + "epoch": 0.12736729333701963, + "grad_norm": 1.2720459972689846, + "learning_rate": 1.9507013609625574e-05, + "loss": 0.8632, + "step": 7371 + }, + { + "epoch": 0.12738457285042853, + "grad_norm": 0.5876433034997913, + "learning_rate": 1.950684004298451e-05, + "loss": 0.8356, + "step": 7372 + }, + { + "epoch": 0.12740185236383744, + "grad_norm": 1.1156176523095873, + "learning_rate": 1.9506666446567304e-05, + "loss": 0.6074, + "step": 7373 + }, + { + "epoch": 0.12741913187724635, + "grad_norm": 1.3946554307962329, + "learning_rate": 1.9506492820374495e-05, + "loss": 0.7691, + "step": 7374 + }, + { + "epoch": 0.12743641139065523, + "grad_norm": 1.068340731978954, + "learning_rate": 1.9506319164406624e-05, + "loss": 0.6435, + "step": 7375 + }, + { + "epoch": 0.12745369090406414, + "grad_norm": 0.8742302083728511, + "learning_rate": 1.9506145478664244e-05, + "loss": 0.7578, + "step": 7376 + }, + { + "epoch": 0.12747097041747305, + "grad_norm": 0.9937160846474922, + "learning_rate": 1.950597176314789e-05, + "loss": 0.7188, + "step": 7377 + }, + { + "epoch": 0.12748824993088195, + "grad_norm": 1.1208276263128882, + "learning_rate": 1.950579801785811e-05, + "loss": 0.6315, + "step": 7378 + }, + { + "epoch": 0.12750552944429086, + "grad_norm": 1.2719039781634, + "learning_rate": 1.9505624242795453e-05, + "loss": 0.6186, + "step": 7379 + }, + { + "epoch": 0.12752280895769974, + "grad_norm": 0.8247555748018622, + "learning_rate": 1.9505450437960454e-05, + "loss": 0.6679, + "step": 7380 + }, + { + "epoch": 0.12754008847110865, + "grad_norm": 0.8558180988148839, + "learning_rate": 1.9505276603353663e-05, + "loss": 0.8693, + "step": 7381 + }, + { + "epoch": 0.12755736798451756, + "grad_norm": 1.2782926757986197, + "learning_rate": 1.9505102738975625e-05, + "loss": 0.7848, + "step": 7382 + }, + { + "epoch": 0.12757464749792646, + "grad_norm": 1.000679146144122, + "learning_rate": 1.950492884482688e-05, + "loss": 0.6559, + "step": 7383 + }, + { + "epoch": 0.12759192701133537, + "grad_norm": 1.2104440341645208, + "learning_rate": 1.9504754920907977e-05, + "loss": 0.7143, + "step": 7384 + }, + { + "epoch": 0.12760920652474425, + "grad_norm": 0.9514090833066475, + "learning_rate": 1.950458096721946e-05, + "loss": 0.8051, + "step": 7385 + }, + { + "epoch": 0.12762648603815316, + "grad_norm": 0.8510041075879106, + "learning_rate": 1.9504406983761872e-05, + "loss": 0.5381, + "step": 7386 + }, + { + "epoch": 0.12764376555156207, + "grad_norm": 1.1741495462387779, + "learning_rate": 1.9504232970535758e-05, + "loss": 0.8286, + "step": 7387 + }, + { + "epoch": 0.12766104506497097, + "grad_norm": 1.2401448985147485, + "learning_rate": 1.9504058927541667e-05, + "loss": 0.9373, + "step": 7388 + }, + { + "epoch": 0.12767832457837988, + "grad_norm": 1.0137463810731344, + "learning_rate": 1.950388485478014e-05, + "loss": 0.6585, + "step": 7389 + }, + { + "epoch": 0.12769560409178876, + "grad_norm": 0.6904188851644388, + "learning_rate": 1.950371075225172e-05, + "loss": 0.6296, + "step": 7390 + }, + { + "epoch": 0.12771288360519767, + "grad_norm": 0.9233300877712428, + "learning_rate": 1.950353661995696e-05, + "loss": 0.8336, + "step": 7391 + }, + { + "epoch": 0.12773016311860658, + "grad_norm": 0.925873018463342, + "learning_rate": 1.95033624578964e-05, + "loss": 0.6634, + "step": 7392 + }, + { + "epoch": 0.12774744263201548, + "grad_norm": 0.8612877295346212, + "learning_rate": 1.9503188266070586e-05, + "loss": 0.5851, + "step": 7393 + }, + { + "epoch": 0.1277647221454244, + "grad_norm": 1.3125337221452924, + "learning_rate": 1.9503014044480063e-05, + "loss": 0.8585, + "step": 7394 + }, + { + "epoch": 0.1277820016588333, + "grad_norm": 0.9657045641818249, + "learning_rate": 1.950283979312538e-05, + "loss": 0.4509, + "step": 7395 + }, + { + "epoch": 0.12779928117224218, + "grad_norm": 0.8666028033880655, + "learning_rate": 1.950266551200708e-05, + "loss": 0.6498, + "step": 7396 + }, + { + "epoch": 0.1278165606856511, + "grad_norm": 0.6273143741909533, + "learning_rate": 1.950249120112571e-05, + "loss": 0.7656, + "step": 7397 + }, + { + "epoch": 0.12783384019906, + "grad_norm": 0.7963414882867427, + "learning_rate": 1.9502316860481816e-05, + "loss": 0.5883, + "step": 7398 + }, + { + "epoch": 0.1278511197124689, + "grad_norm": 0.9580288947629799, + "learning_rate": 1.9502142490075942e-05, + "loss": 0.7262, + "step": 7399 + }, + { + "epoch": 0.1278683992258778, + "grad_norm": 0.7968212146492666, + "learning_rate": 1.9501968089908634e-05, + "loss": 0.6157, + "step": 7400 + }, + { + "epoch": 0.1278856787392867, + "grad_norm": 0.924097940143885, + "learning_rate": 1.950179365998044e-05, + "loss": 0.7278, + "step": 7401 + }, + { + "epoch": 0.1279029582526956, + "grad_norm": 0.9588118803505005, + "learning_rate": 1.950161920029191e-05, + "loss": 0.6763, + "step": 7402 + }, + { + "epoch": 0.1279202377661045, + "grad_norm": 0.61165259780743, + "learning_rate": 1.950144471084358e-05, + "loss": 0.7908, + "step": 7403 + }, + { + "epoch": 0.1279375172795134, + "grad_norm": 1.5580008215211347, + "learning_rate": 1.950127019163601e-05, + "loss": 0.6175, + "step": 7404 + }, + { + "epoch": 0.12795479679292232, + "grad_norm": 0.8338003733973027, + "learning_rate": 1.9501095642669737e-05, + "loss": 0.6243, + "step": 7405 + }, + { + "epoch": 0.1279720763063312, + "grad_norm": 0.8790988864321229, + "learning_rate": 1.950092106394531e-05, + "loss": 0.6421, + "step": 7406 + }, + { + "epoch": 0.1279893558197401, + "grad_norm": 0.7897295099843654, + "learning_rate": 1.9500746455463274e-05, + "loss": 0.7035, + "step": 7407 + }, + { + "epoch": 0.12800663533314902, + "grad_norm": 0.6817332978738746, + "learning_rate": 1.9500571817224182e-05, + "loss": 0.5688, + "step": 7408 + }, + { + "epoch": 0.12802391484655792, + "grad_norm": 1.195931950602554, + "learning_rate": 1.950039714922858e-05, + "loss": 0.6802, + "step": 7409 + }, + { + "epoch": 0.12804119435996683, + "grad_norm": 1.2735413260444755, + "learning_rate": 1.9500222451477004e-05, + "loss": 0.8081, + "step": 7410 + }, + { + "epoch": 0.12805847387337574, + "grad_norm": 0.7915096886309588, + "learning_rate": 1.9500047723970015e-05, + "loss": 0.7363, + "step": 7411 + }, + { + "epoch": 0.12807575338678462, + "grad_norm": 0.9840561989796771, + "learning_rate": 1.949987296670815e-05, + "loss": 0.7371, + "step": 7412 + }, + { + "epoch": 0.12809303290019353, + "grad_norm": 1.1926714172913404, + "learning_rate": 1.9499698179691965e-05, + "loss": 0.6429, + "step": 7413 + }, + { + "epoch": 0.12811031241360243, + "grad_norm": 0.885449655146865, + "learning_rate": 1.9499523362922e-05, + "loss": 0.6625, + "step": 7414 + }, + { + "epoch": 0.12812759192701134, + "grad_norm": 0.881841023725263, + "learning_rate": 1.949934851639881e-05, + "loss": 0.6176, + "step": 7415 + }, + { + "epoch": 0.12814487144042025, + "grad_norm": 0.8466474220141327, + "learning_rate": 1.9499173640122935e-05, + "loss": 0.8003, + "step": 7416 + }, + { + "epoch": 0.12816215095382913, + "grad_norm": 0.8458635630273054, + "learning_rate": 1.9498998734094926e-05, + "loss": 0.6671, + "step": 7417 + }, + { + "epoch": 0.12817943046723804, + "grad_norm": 0.572867849304249, + "learning_rate": 1.9498823798315332e-05, + "loss": 0.3768, + "step": 7418 + }, + { + "epoch": 0.12819670998064694, + "grad_norm": 0.9212334481389227, + "learning_rate": 1.9498648832784702e-05, + "loss": 0.5524, + "step": 7419 + }, + { + "epoch": 0.12821398949405585, + "grad_norm": 1.0000819641731054, + "learning_rate": 1.949847383750358e-05, + "loss": 0.4644, + "step": 7420 + }, + { + "epoch": 0.12823126900746476, + "grad_norm": 0.7826828354189841, + "learning_rate": 1.9498298812472516e-05, + "loss": 0.6008, + "step": 7421 + }, + { + "epoch": 0.12824854852087364, + "grad_norm": 1.2516539638957034, + "learning_rate": 1.949812375769206e-05, + "loss": 0.875, + "step": 7422 + }, + { + "epoch": 0.12826582803428255, + "grad_norm": 1.1833971316997658, + "learning_rate": 1.9497948673162758e-05, + "loss": 0.6264, + "step": 7423 + }, + { + "epoch": 0.12828310754769146, + "grad_norm": 0.9262601048465353, + "learning_rate": 1.9497773558885157e-05, + "loss": 0.5105, + "step": 7424 + }, + { + "epoch": 0.12830038706110036, + "grad_norm": 0.8562133875496195, + "learning_rate": 1.949759841485981e-05, + "loss": 0.8246, + "step": 7425 + }, + { + "epoch": 0.12831766657450927, + "grad_norm": 0.7477437561497428, + "learning_rate": 1.949742324108726e-05, + "loss": 0.5022, + "step": 7426 + }, + { + "epoch": 0.12833494608791818, + "grad_norm": 0.5930495767893026, + "learning_rate": 1.949724803756806e-05, + "loss": 0.5221, + "step": 7427 + }, + { + "epoch": 0.12835222560132706, + "grad_norm": 0.850032871999131, + "learning_rate": 1.9497072804302758e-05, + "loss": 0.8199, + "step": 7428 + }, + { + "epoch": 0.12836950511473597, + "grad_norm": 0.8207336144044891, + "learning_rate": 1.9496897541291904e-05, + "loss": 0.7449, + "step": 7429 + }, + { + "epoch": 0.12838678462814487, + "grad_norm": 0.9168285735185921, + "learning_rate": 1.9496722248536044e-05, + "loss": 0.4945, + "step": 7430 + }, + { + "epoch": 0.12840406414155378, + "grad_norm": 0.9772845220323612, + "learning_rate": 1.9496546926035725e-05, + "loss": 0.5309, + "step": 7431 + }, + { + "epoch": 0.1284213436549627, + "grad_norm": 1.114598859088872, + "learning_rate": 1.9496371573791505e-05, + "loss": 0.602, + "step": 7432 + }, + { + "epoch": 0.12843862316837157, + "grad_norm": 0.8547821296412643, + "learning_rate": 1.9496196191803923e-05, + "loss": 0.3763, + "step": 7433 + }, + { + "epoch": 0.12845590268178048, + "grad_norm": 0.8624660535565772, + "learning_rate": 1.949602078007353e-05, + "loss": 0.5954, + "step": 7434 + }, + { + "epoch": 0.12847318219518938, + "grad_norm": 0.9199337017731718, + "learning_rate": 1.9495845338600886e-05, + "loss": 0.5386, + "step": 7435 + }, + { + "epoch": 0.1284904617085983, + "grad_norm": 1.1117381488685416, + "learning_rate": 1.9495669867386528e-05, + "loss": 0.6115, + "step": 7436 + }, + { + "epoch": 0.1285077412220072, + "grad_norm": 0.8870819665996259, + "learning_rate": 1.949549436643101e-05, + "loss": 0.7469, + "step": 7437 + }, + { + "epoch": 0.12852502073541608, + "grad_norm": 1.3890359543215147, + "learning_rate": 1.9495318835734885e-05, + "loss": 0.8082, + "step": 7438 + }, + { + "epoch": 0.128542300248825, + "grad_norm": 0.738911196820776, + "learning_rate": 1.9495143275298698e-05, + "loss": 0.8847, + "step": 7439 + }, + { + "epoch": 0.1285595797622339, + "grad_norm": 0.6462978222108143, + "learning_rate": 1.9494967685123e-05, + "loss": 0.4146, + "step": 7440 + }, + { + "epoch": 0.1285768592756428, + "grad_norm": 0.4176620990470352, + "learning_rate": 1.949479206520834e-05, + "loss": 0.739, + "step": 7441 + }, + { + "epoch": 0.1285941387890517, + "grad_norm": 0.7346301105976877, + "learning_rate": 1.9494616415555274e-05, + "loss": 0.3537, + "step": 7442 + }, + { + "epoch": 0.1286114183024606, + "grad_norm": 1.137474338163444, + "learning_rate": 1.9494440736164348e-05, + "loss": 0.802, + "step": 7443 + }, + { + "epoch": 0.1286286978158695, + "grad_norm": 0.8561236802738307, + "learning_rate": 1.9494265027036107e-05, + "loss": 0.7472, + "step": 7444 + }, + { + "epoch": 0.1286459773292784, + "grad_norm": 1.4144028746352155, + "learning_rate": 1.949408928817111e-05, + "loss": 0.6752, + "step": 7445 + }, + { + "epoch": 0.1286632568426873, + "grad_norm": 0.8313729832869245, + "learning_rate": 1.9493913519569904e-05, + "loss": 0.6398, + "step": 7446 + }, + { + "epoch": 0.12868053635609622, + "grad_norm": 1.232532342612904, + "learning_rate": 1.9493737721233038e-05, + "loss": 0.8101, + "step": 7447 + }, + { + "epoch": 0.12869781586950513, + "grad_norm": 1.4064310315238053, + "learning_rate": 1.9493561893161063e-05, + "loss": 0.6147, + "step": 7448 + }, + { + "epoch": 0.128715095382914, + "grad_norm": 0.7960054814366272, + "learning_rate": 1.9493386035354532e-05, + "loss": 0.4597, + "step": 7449 + }, + { + "epoch": 0.12873237489632292, + "grad_norm": 0.7878291424552881, + "learning_rate": 1.9493210147813997e-05, + "loss": 0.4734, + "step": 7450 + }, + { + "epoch": 0.12874965440973182, + "grad_norm": 1.054187234408786, + "learning_rate": 1.9493034230540004e-05, + "loss": 0.6629, + "step": 7451 + }, + { + "epoch": 0.12876693392314073, + "grad_norm": 1.1010605070737445, + "learning_rate": 1.9492858283533108e-05, + "loss": 0.446, + "step": 7452 + }, + { + "epoch": 0.12878421343654964, + "grad_norm": 0.7513108698880244, + "learning_rate": 1.9492682306793854e-05, + "loss": 0.3115, + "step": 7453 + }, + { + "epoch": 0.12880149294995852, + "grad_norm": 1.2917892927547356, + "learning_rate": 1.94925063003228e-05, + "loss": 0.6614, + "step": 7454 + }, + { + "epoch": 0.12881877246336743, + "grad_norm": 0.8885453436185293, + "learning_rate": 1.9492330264120497e-05, + "loss": 0.6116, + "step": 7455 + }, + { + "epoch": 0.12883605197677633, + "grad_norm": 0.8412567315732977, + "learning_rate": 1.9492154198187492e-05, + "loss": 0.4908, + "step": 7456 + }, + { + "epoch": 0.12885333149018524, + "grad_norm": 0.49731828655017724, + "learning_rate": 1.949197810252434e-05, + "loss": 0.5545, + "step": 7457 + }, + { + "epoch": 0.12887061100359415, + "grad_norm": 1.120480185749171, + "learning_rate": 1.9491801977131592e-05, + "loss": 0.7402, + "step": 7458 + }, + { + "epoch": 0.12888789051700303, + "grad_norm": 0.9523119920114472, + "learning_rate": 1.9491625822009795e-05, + "loss": 0.7632, + "step": 7459 + }, + { + "epoch": 0.12890517003041194, + "grad_norm": 1.0769566212100445, + "learning_rate": 1.9491449637159506e-05, + "loss": 0.7691, + "step": 7460 + }, + { + "epoch": 0.12892244954382084, + "grad_norm": 0.9326846927550898, + "learning_rate": 1.9491273422581275e-05, + "loss": 0.857, + "step": 7461 + }, + { + "epoch": 0.12893972905722975, + "grad_norm": 1.2334400567417283, + "learning_rate": 1.9491097178275657e-05, + "loss": 0.6252, + "step": 7462 + }, + { + "epoch": 0.12895700857063866, + "grad_norm": 0.8181696072937301, + "learning_rate": 1.94909209042432e-05, + "loss": 0.6256, + "step": 7463 + }, + { + "epoch": 0.12897428808404757, + "grad_norm": 1.1083285651493706, + "learning_rate": 1.949074460048446e-05, + "loss": 0.7555, + "step": 7464 + }, + { + "epoch": 0.12899156759745645, + "grad_norm": 0.9759665794247824, + "learning_rate": 1.949056826699998e-05, + "loss": 0.6738, + "step": 7465 + }, + { + "epoch": 0.12900884711086535, + "grad_norm": 0.9437051157781315, + "learning_rate": 1.9490391903790322e-05, + "loss": 0.744, + "step": 7466 + }, + { + "epoch": 0.12902612662427426, + "grad_norm": 1.0886945478977985, + "learning_rate": 1.9490215510856036e-05, + "loss": 0.6725, + "step": 7467 + }, + { + "epoch": 0.12904340613768317, + "grad_norm": 1.1783378405256097, + "learning_rate": 1.9490039088197673e-05, + "loss": 0.866, + "step": 7468 + }, + { + "epoch": 0.12906068565109208, + "grad_norm": 0.5872932208306427, + "learning_rate": 1.948986263581579e-05, + "loss": 0.7117, + "step": 7469 + }, + { + "epoch": 0.12907796516450096, + "grad_norm": 0.8434964509623522, + "learning_rate": 1.9489686153710933e-05, + "loss": 0.5998, + "step": 7470 + }, + { + "epoch": 0.12909524467790987, + "grad_norm": 1.1095421930893086, + "learning_rate": 1.9489509641883656e-05, + "loss": 0.5844, + "step": 7471 + }, + { + "epoch": 0.12911252419131877, + "grad_norm": 0.9877447095655415, + "learning_rate": 1.948933310033452e-05, + "loss": 0.7857, + "step": 7472 + }, + { + "epoch": 0.12912980370472768, + "grad_norm": 0.42995129320202496, + "learning_rate": 1.9489156529064066e-05, + "loss": 0.6369, + "step": 7473 + }, + { + "epoch": 0.1291470832181366, + "grad_norm": 0.9260835150525354, + "learning_rate": 1.9488979928072853e-05, + "loss": 0.6412, + "step": 7474 + }, + { + "epoch": 0.12916436273154547, + "grad_norm": 0.9245633741600183, + "learning_rate": 1.9488803297361433e-05, + "loss": 0.509, + "step": 7475 + }, + { + "epoch": 0.12918164224495438, + "grad_norm": 0.9592544907416815, + "learning_rate": 1.9488626636930365e-05, + "loss": 0.6403, + "step": 7476 + }, + { + "epoch": 0.12919892175836328, + "grad_norm": 0.8088562364293582, + "learning_rate": 1.9488449946780193e-05, + "loss": 0.6586, + "step": 7477 + }, + { + "epoch": 0.1292162012717722, + "grad_norm": 1.7353353310463269, + "learning_rate": 1.9488273226911474e-05, + "loss": 0.8889, + "step": 7478 + }, + { + "epoch": 0.1292334807851811, + "grad_norm": 1.0116562927397772, + "learning_rate": 1.9488096477324766e-05, + "loss": 0.7218, + "step": 7479 + }, + { + "epoch": 0.12925076029858998, + "grad_norm": 0.7635800464178643, + "learning_rate": 1.9487919698020613e-05, + "loss": 0.7103, + "step": 7480 + }, + { + "epoch": 0.12926803981199889, + "grad_norm": 1.2667639610413752, + "learning_rate": 1.9487742888999582e-05, + "loss": 0.731, + "step": 7481 + }, + { + "epoch": 0.1292853193254078, + "grad_norm": 0.9074899833702701, + "learning_rate": 1.9487566050262213e-05, + "loss": 0.8485, + "step": 7482 + }, + { + "epoch": 0.1293025988388167, + "grad_norm": 1.1128778545453288, + "learning_rate": 1.948738918180907e-05, + "loss": 0.7505, + "step": 7483 + }, + { + "epoch": 0.1293198783522256, + "grad_norm": 0.8876034352619773, + "learning_rate": 1.94872122836407e-05, + "loss": 0.6782, + "step": 7484 + }, + { + "epoch": 0.12933715786563452, + "grad_norm": 0.868602317803766, + "learning_rate": 1.948703535575766e-05, + "loss": 0.7329, + "step": 7485 + }, + { + "epoch": 0.1293544373790434, + "grad_norm": 0.6978298846503243, + "learning_rate": 1.9486858398160508e-05, + "loss": 0.3379, + "step": 7486 + }, + { + "epoch": 0.1293717168924523, + "grad_norm": 0.9022283685577421, + "learning_rate": 1.948668141084979e-05, + "loss": 0.6254, + "step": 7487 + }, + { + "epoch": 0.1293889964058612, + "grad_norm": 0.8966026090127629, + "learning_rate": 1.948650439382607e-05, + "loss": 0.7301, + "step": 7488 + }, + { + "epoch": 0.12940627591927012, + "grad_norm": 0.8827450546286263, + "learning_rate": 1.9486327347089894e-05, + "loss": 0.6706, + "step": 7489 + }, + { + "epoch": 0.12942355543267903, + "grad_norm": 0.6699096222058466, + "learning_rate": 1.948615027064182e-05, + "loss": 0.5228, + "step": 7490 + }, + { + "epoch": 0.1294408349460879, + "grad_norm": 0.851508365682084, + "learning_rate": 1.94859731644824e-05, + "loss": 0.5169, + "step": 7491 + }, + { + "epoch": 0.12945811445949681, + "grad_norm": 0.913229426346368, + "learning_rate": 1.9485796028612195e-05, + "loss": 0.641, + "step": 7492 + }, + { + "epoch": 0.12947539397290572, + "grad_norm": 0.8794434608967249, + "learning_rate": 1.9485618863031758e-05, + "loss": 0.7048, + "step": 7493 + }, + { + "epoch": 0.12949267348631463, + "grad_norm": 1.2600870701735551, + "learning_rate": 1.9485441667741636e-05, + "loss": 1.0623, + "step": 7494 + }, + { + "epoch": 0.12950995299972354, + "grad_norm": 0.9082640012533253, + "learning_rate": 1.948526444274239e-05, + "loss": 0.7288, + "step": 7495 + }, + { + "epoch": 0.12952723251313242, + "grad_norm": 1.1406516004142915, + "learning_rate": 1.948508718803458e-05, + "loss": 0.7313, + "step": 7496 + }, + { + "epoch": 0.12954451202654133, + "grad_norm": 0.8808968087189091, + "learning_rate": 1.9484909903618755e-05, + "loss": 0.8137, + "step": 7497 + }, + { + "epoch": 0.12956179153995023, + "grad_norm": 1.1687498194513373, + "learning_rate": 1.9484732589495467e-05, + "loss": 0.8542, + "step": 7498 + }, + { + "epoch": 0.12957907105335914, + "grad_norm": 0.8220582680717027, + "learning_rate": 1.9484555245665277e-05, + "loss": 0.6484, + "step": 7499 + }, + { + "epoch": 0.12959635056676805, + "grad_norm": 1.1153181612772016, + "learning_rate": 1.9484377872128744e-05, + "loss": 0.681, + "step": 7500 + }, + { + "epoch": 0.12961363008017696, + "grad_norm": 0.611223213104621, + "learning_rate": 1.9484200468886413e-05, + "loss": 0.5908, + "step": 7501 + }, + { + "epoch": 0.12963090959358584, + "grad_norm": 0.7107496489462392, + "learning_rate": 1.9484023035938847e-05, + "loss": 0.8088, + "step": 7502 + }, + { + "epoch": 0.12964818910699474, + "grad_norm": 0.9860762218680866, + "learning_rate": 1.94838455732866e-05, + "loss": 0.7187, + "step": 7503 + }, + { + "epoch": 0.12966546862040365, + "grad_norm": 0.8003386241019996, + "learning_rate": 1.948366808093023e-05, + "loss": 0.7217, + "step": 7504 + }, + { + "epoch": 0.12968274813381256, + "grad_norm": 1.0591382537475977, + "learning_rate": 1.948349055887029e-05, + "loss": 0.7321, + "step": 7505 + }, + { + "epoch": 0.12970002764722147, + "grad_norm": 1.1004836706360333, + "learning_rate": 1.9483313007107337e-05, + "loss": 0.7038, + "step": 7506 + }, + { + "epoch": 0.12971730716063035, + "grad_norm": 0.8413451204992254, + "learning_rate": 1.9483135425641926e-05, + "loss": 0.5048, + "step": 7507 + }, + { + "epoch": 0.12973458667403925, + "grad_norm": 0.9851967847472665, + "learning_rate": 1.9482957814474616e-05, + "loss": 0.7558, + "step": 7508 + }, + { + "epoch": 0.12975186618744816, + "grad_norm": 0.8463424538738008, + "learning_rate": 1.948278017360596e-05, + "loss": 0.6306, + "step": 7509 + }, + { + "epoch": 0.12976914570085707, + "grad_norm": 0.9282158330687597, + "learning_rate": 1.9482602503036517e-05, + "loss": 0.8452, + "step": 7510 + }, + { + "epoch": 0.12978642521426598, + "grad_norm": 1.1020398295630651, + "learning_rate": 1.948242480276684e-05, + "loss": 0.7986, + "step": 7511 + }, + { + "epoch": 0.12980370472767486, + "grad_norm": 0.6642215238799788, + "learning_rate": 1.9482247072797488e-05, + "loss": 0.5158, + "step": 7512 + }, + { + "epoch": 0.12982098424108376, + "grad_norm": 0.7652412079827886, + "learning_rate": 1.948206931312902e-05, + "loss": 0.6746, + "step": 7513 + }, + { + "epoch": 0.12983826375449267, + "grad_norm": 0.920858674250648, + "learning_rate": 1.9481891523761985e-05, + "loss": 0.5805, + "step": 7514 + }, + { + "epoch": 0.12985554326790158, + "grad_norm": 0.866141709838976, + "learning_rate": 1.948171370469695e-05, + "loss": 0.7087, + "step": 7515 + }, + { + "epoch": 0.1298728227813105, + "grad_norm": 1.0316480795561893, + "learning_rate": 1.948153585593447e-05, + "loss": 0.7447, + "step": 7516 + }, + { + "epoch": 0.12989010229471937, + "grad_norm": 0.8876759672914057, + "learning_rate": 1.948135797747509e-05, + "loss": 0.6835, + "step": 7517 + }, + { + "epoch": 0.12990738180812827, + "grad_norm": 0.7449517759024751, + "learning_rate": 1.9481180069319382e-05, + "loss": 0.5264, + "step": 7518 + }, + { + "epoch": 0.12992466132153718, + "grad_norm": 0.9011993801917565, + "learning_rate": 1.94810021314679e-05, + "loss": 0.6234, + "step": 7519 + }, + { + "epoch": 0.1299419408349461, + "grad_norm": 0.6518618189292229, + "learning_rate": 1.9480824163921194e-05, + "loss": 0.4751, + "step": 7520 + }, + { + "epoch": 0.129959220348355, + "grad_norm": 0.8398398713484504, + "learning_rate": 1.9480646166679828e-05, + "loss": 0.7293, + "step": 7521 + }, + { + "epoch": 0.1299764998617639, + "grad_norm": 0.7083791585782621, + "learning_rate": 1.9480468139744356e-05, + "loss": 0.5757, + "step": 7522 + }, + { + "epoch": 0.12999377937517279, + "grad_norm": 0.8671120072883299, + "learning_rate": 1.948029008311534e-05, + "loss": 0.4564, + "step": 7523 + }, + { + "epoch": 0.1300110588885817, + "grad_norm": 0.974739312120242, + "learning_rate": 1.9480111996793333e-05, + "loss": 0.651, + "step": 7524 + }, + { + "epoch": 0.1300283384019906, + "grad_norm": 1.0099439837224322, + "learning_rate": 1.94799338807789e-05, + "loss": 0.633, + "step": 7525 + }, + { + "epoch": 0.1300456179153995, + "grad_norm": 0.7110682010485017, + "learning_rate": 1.9479755735072586e-05, + "loss": 0.5027, + "step": 7526 + }, + { + "epoch": 0.13006289742880842, + "grad_norm": 1.1796182703136089, + "learning_rate": 1.947957755967496e-05, + "loss": 0.646, + "step": 7527 + }, + { + "epoch": 0.1300801769422173, + "grad_norm": 1.01602167390274, + "learning_rate": 1.9479399354586578e-05, + "loss": 0.7542, + "step": 7528 + }, + { + "epoch": 0.1300974564556262, + "grad_norm": 1.4080253524832276, + "learning_rate": 1.9479221119807995e-05, + "loss": 0.8111, + "step": 7529 + }, + { + "epoch": 0.1301147359690351, + "grad_norm": 1.0876356811264905, + "learning_rate": 1.947904285533977e-05, + "loss": 0.6049, + "step": 7530 + }, + { + "epoch": 0.13013201548244402, + "grad_norm": 1.0808292096581404, + "learning_rate": 1.9478864561182464e-05, + "loss": 0.538, + "step": 7531 + }, + { + "epoch": 0.13014929499585293, + "grad_norm": 0.8595640185461497, + "learning_rate": 1.9478686237336635e-05, + "loss": 0.5105, + "step": 7532 + }, + { + "epoch": 0.1301665745092618, + "grad_norm": 0.9380740846086945, + "learning_rate": 1.9478507883802836e-05, + "loss": 0.6007, + "step": 7533 + }, + { + "epoch": 0.13018385402267071, + "grad_norm": 0.643035074075782, + "learning_rate": 1.9478329500581634e-05, + "loss": 0.4843, + "step": 7534 + }, + { + "epoch": 0.13020113353607962, + "grad_norm": 1.3122093583353822, + "learning_rate": 1.9478151087673584e-05, + "loss": 0.7012, + "step": 7535 + }, + { + "epoch": 0.13021841304948853, + "grad_norm": 0.9499371726649382, + "learning_rate": 1.947797264507924e-05, + "loss": 0.4835, + "step": 7536 + }, + { + "epoch": 0.13023569256289744, + "grad_norm": 1.2067962332048785, + "learning_rate": 1.9477794172799172e-05, + "loss": 0.6966, + "step": 7537 + }, + { + "epoch": 0.13025297207630634, + "grad_norm": 0.9271903632034557, + "learning_rate": 1.947761567083393e-05, + "loss": 0.5838, + "step": 7538 + }, + { + "epoch": 0.13027025158971522, + "grad_norm": 0.9034190825923504, + "learning_rate": 1.9477437139184073e-05, + "loss": 0.3066, + "step": 7539 + }, + { + "epoch": 0.13028753110312413, + "grad_norm": 1.101574664314196, + "learning_rate": 1.9477258577850165e-05, + "loss": 0.6387, + "step": 7540 + }, + { + "epoch": 0.13030481061653304, + "grad_norm": 1.2214471810374294, + "learning_rate": 1.947707998683276e-05, + "loss": 0.5812, + "step": 7541 + }, + { + "epoch": 0.13032209012994195, + "grad_norm": 0.9189391619316499, + "learning_rate": 1.9476901366132423e-05, + "loss": 0.5849, + "step": 7542 + }, + { + "epoch": 0.13033936964335086, + "grad_norm": 0.861337228512501, + "learning_rate": 1.947672271574971e-05, + "loss": 0.5263, + "step": 7543 + }, + { + "epoch": 0.13035664915675974, + "grad_norm": 0.9460692576886642, + "learning_rate": 1.9476544035685182e-05, + "loss": 0.7952, + "step": 7544 + }, + { + "epoch": 0.13037392867016864, + "grad_norm": 0.41356441709518005, + "learning_rate": 1.9476365325939396e-05, + "loss": 0.4936, + "step": 7545 + }, + { + "epoch": 0.13039120818357755, + "grad_norm": 0.9697695839625293, + "learning_rate": 1.9476186586512914e-05, + "loss": 0.9009, + "step": 7546 + }, + { + "epoch": 0.13040848769698646, + "grad_norm": 0.8476535264057411, + "learning_rate": 1.9476007817406297e-05, + "loss": 0.658, + "step": 7547 + }, + { + "epoch": 0.13042576721039537, + "grad_norm": 1.2527277747829415, + "learning_rate": 1.9475829018620102e-05, + "loss": 0.6667, + "step": 7548 + }, + { + "epoch": 0.13044304672380425, + "grad_norm": 1.0654861828641302, + "learning_rate": 1.947565019015489e-05, + "loss": 0.8524, + "step": 7549 + }, + { + "epoch": 0.13046032623721315, + "grad_norm": 1.0296138257659448, + "learning_rate": 1.947547133201122e-05, + "loss": 0.7335, + "step": 7550 + }, + { + "epoch": 0.13047760575062206, + "grad_norm": 0.8734628831674881, + "learning_rate": 1.947529244418966e-05, + "loss": 0.5624, + "step": 7551 + }, + { + "epoch": 0.13049488526403097, + "grad_norm": 1.037698086658281, + "learning_rate": 1.947511352669076e-05, + "loss": 0.7133, + "step": 7552 + }, + { + "epoch": 0.13051216477743988, + "grad_norm": 1.137098166250314, + "learning_rate": 1.9474934579515082e-05, + "loss": 0.8519, + "step": 7553 + }, + { + "epoch": 0.13052944429084876, + "grad_norm": 1.0571837680826643, + "learning_rate": 1.9474755602663192e-05, + "loss": 0.5501, + "step": 7554 + }, + { + "epoch": 0.13054672380425766, + "grad_norm": 0.5125563223976757, + "learning_rate": 1.9474576596135645e-05, + "loss": 0.7646, + "step": 7555 + }, + { + "epoch": 0.13056400331766657, + "grad_norm": 0.7030058299913864, + "learning_rate": 1.9474397559933006e-05, + "loss": 0.5104, + "step": 7556 + }, + { + "epoch": 0.13058128283107548, + "grad_norm": 1.0253547334430242, + "learning_rate": 1.9474218494055836e-05, + "loss": 0.6275, + "step": 7557 + }, + { + "epoch": 0.1305985623444844, + "grad_norm": 0.9682435060461416, + "learning_rate": 1.947403939850469e-05, + "loss": 0.4158, + "step": 7558 + }, + { + "epoch": 0.1306158418578933, + "grad_norm": 0.49817602797821614, + "learning_rate": 1.947386027328013e-05, + "loss": 0.8281, + "step": 7559 + }, + { + "epoch": 0.13063312137130217, + "grad_norm": 0.932344430887623, + "learning_rate": 1.9473681118382725e-05, + "loss": 0.4988, + "step": 7560 + }, + { + "epoch": 0.13065040088471108, + "grad_norm": 1.0373392024546881, + "learning_rate": 1.947350193381303e-05, + "loss": 0.9259, + "step": 7561 + }, + { + "epoch": 0.13066768039812, + "grad_norm": 0.90391587431581, + "learning_rate": 1.9473322719571604e-05, + "loss": 0.5145, + "step": 7562 + }, + { + "epoch": 0.1306849599115289, + "grad_norm": 1.197238828790749, + "learning_rate": 1.9473143475659015e-05, + "loss": 0.7356, + "step": 7563 + }, + { + "epoch": 0.1307022394249378, + "grad_norm": 0.7730297829991922, + "learning_rate": 1.947296420207582e-05, + "loss": 0.699, + "step": 7564 + }, + { + "epoch": 0.13071951893834668, + "grad_norm": 0.9462069966917221, + "learning_rate": 1.947278489882258e-05, + "loss": 0.7329, + "step": 7565 + }, + { + "epoch": 0.1307367984517556, + "grad_norm": 0.9856323449944346, + "learning_rate": 1.9472605565899857e-05, + "loss": 0.7056, + "step": 7566 + }, + { + "epoch": 0.1307540779651645, + "grad_norm": 0.7173274501386194, + "learning_rate": 1.9472426203308214e-05, + "loss": 0.5976, + "step": 7567 + }, + { + "epoch": 0.1307713574785734, + "grad_norm": 0.8132601466707736, + "learning_rate": 1.9472246811048215e-05, + "loss": 0.7054, + "step": 7568 + }, + { + "epoch": 0.13078863699198232, + "grad_norm": 0.9260903242622038, + "learning_rate": 1.9472067389120416e-05, + "loss": 0.5549, + "step": 7569 + }, + { + "epoch": 0.1308059165053912, + "grad_norm": 1.179023882110858, + "learning_rate": 1.9471887937525384e-05, + "loss": 0.7878, + "step": 7570 + }, + { + "epoch": 0.1308231960188001, + "grad_norm": 0.9382514855047486, + "learning_rate": 1.9471708456263676e-05, + "loss": 0.715, + "step": 7571 + }, + { + "epoch": 0.130840475532209, + "grad_norm": 0.9640696317949699, + "learning_rate": 1.947152894533586e-05, + "loss": 0.5994, + "step": 7572 + }, + { + "epoch": 0.13085775504561792, + "grad_norm": 1.2073267876633373, + "learning_rate": 1.9471349404742495e-05, + "loss": 0.7698, + "step": 7573 + }, + { + "epoch": 0.13087503455902683, + "grad_norm": 1.327578505182845, + "learning_rate": 1.9471169834484143e-05, + "loss": 0.7031, + "step": 7574 + }, + { + "epoch": 0.13089231407243573, + "grad_norm": 0.9122479998158393, + "learning_rate": 1.947099023456137e-05, + "loss": 0.7857, + "step": 7575 + }, + { + "epoch": 0.1309095935858446, + "grad_norm": 0.9588733512866893, + "learning_rate": 1.9470810604974733e-05, + "loss": 0.5071, + "step": 7576 + }, + { + "epoch": 0.13092687309925352, + "grad_norm": 0.778469172724355, + "learning_rate": 1.9470630945724796e-05, + "loss": 0.6318, + "step": 7577 + }, + { + "epoch": 0.13094415261266243, + "grad_norm": 0.6212240799542653, + "learning_rate": 1.9470451256812125e-05, + "loss": 0.5684, + "step": 7578 + }, + { + "epoch": 0.13096143212607134, + "grad_norm": 0.9772894844162864, + "learning_rate": 1.947027153823728e-05, + "loss": 0.8134, + "step": 7579 + }, + { + "epoch": 0.13097871163948024, + "grad_norm": 0.7068192882093937, + "learning_rate": 1.947009179000083e-05, + "loss": 0.6307, + "step": 7580 + }, + { + "epoch": 0.13099599115288912, + "grad_norm": 1.0080008857491778, + "learning_rate": 1.9469912012103326e-05, + "loss": 0.617, + "step": 7581 + }, + { + "epoch": 0.13101327066629803, + "grad_norm": 1.1484460523487134, + "learning_rate": 1.9469732204545343e-05, + "loss": 0.4692, + "step": 7582 + }, + { + "epoch": 0.13103055017970694, + "grad_norm": 1.2407292646571166, + "learning_rate": 1.9469552367327438e-05, + "loss": 0.6693, + "step": 7583 + }, + { + "epoch": 0.13104782969311585, + "grad_norm": 0.7170299667117711, + "learning_rate": 1.9469372500450173e-05, + "loss": 0.5659, + "step": 7584 + }, + { + "epoch": 0.13106510920652475, + "grad_norm": 1.0857091320523802, + "learning_rate": 1.9469192603914117e-05, + "loss": 0.7725, + "step": 7585 + }, + { + "epoch": 0.13108238871993363, + "grad_norm": 1.0527031375592386, + "learning_rate": 1.9469012677719825e-05, + "loss": 0.6988, + "step": 7586 + }, + { + "epoch": 0.13109966823334254, + "grad_norm": 0.5947327766993437, + "learning_rate": 1.946883272186787e-05, + "loss": 0.6579, + "step": 7587 + }, + { + "epoch": 0.13111694774675145, + "grad_norm": 1.3076163889977466, + "learning_rate": 1.946865273635881e-05, + "loss": 0.7564, + "step": 7588 + }, + { + "epoch": 0.13113422726016036, + "grad_norm": 0.9790616344559406, + "learning_rate": 1.946847272119321e-05, + "loss": 0.7104, + "step": 7589 + }, + { + "epoch": 0.13115150677356927, + "grad_norm": 1.2300474804822825, + "learning_rate": 1.9468292676371634e-05, + "loss": 0.6246, + "step": 7590 + }, + { + "epoch": 0.13116878628697815, + "grad_norm": 1.3163136800152369, + "learning_rate": 1.9468112601894647e-05, + "loss": 0.6872, + "step": 7591 + }, + { + "epoch": 0.13118606580038705, + "grad_norm": 0.8358929047756026, + "learning_rate": 1.9467932497762806e-05, + "loss": 0.7366, + "step": 7592 + }, + { + "epoch": 0.13120334531379596, + "grad_norm": 0.923031017317864, + "learning_rate": 1.9467752363976687e-05, + "loss": 0.7928, + "step": 7593 + }, + { + "epoch": 0.13122062482720487, + "grad_norm": 0.9643477267759074, + "learning_rate": 1.9467572200536846e-05, + "loss": 0.8306, + "step": 7594 + }, + { + "epoch": 0.13123790434061378, + "grad_norm": 0.994453206598946, + "learning_rate": 1.946739200744385e-05, + "loss": 0.7975, + "step": 7595 + }, + { + "epoch": 0.13125518385402268, + "grad_norm": 0.7009992313256129, + "learning_rate": 1.9467211784698264e-05, + "loss": 0.6242, + "step": 7596 + }, + { + "epoch": 0.13127246336743156, + "grad_norm": 1.0799845168390756, + "learning_rate": 1.946703153230065e-05, + "loss": 0.5379, + "step": 7597 + }, + { + "epoch": 0.13128974288084047, + "grad_norm": 0.7966443172786524, + "learning_rate": 1.9466851250251575e-05, + "loss": 0.5763, + "step": 7598 + }, + { + "epoch": 0.13130702239424938, + "grad_norm": 0.8470583836674443, + "learning_rate": 1.94666709385516e-05, + "loss": 0.4735, + "step": 7599 + }, + { + "epoch": 0.1313243019076583, + "grad_norm": 0.9088012641718008, + "learning_rate": 1.9466490597201294e-05, + "loss": 0.7669, + "step": 7600 + }, + { + "epoch": 0.1313415814210672, + "grad_norm": 0.9884810606747378, + "learning_rate": 1.9466310226201217e-05, + "loss": 0.7515, + "step": 7601 + }, + { + "epoch": 0.13135886093447607, + "grad_norm": 1.023254392610038, + "learning_rate": 1.9466129825551937e-05, + "loss": 0.5894, + "step": 7602 + }, + { + "epoch": 0.13137614044788498, + "grad_norm": 0.8209467892922021, + "learning_rate": 1.9465949395254023e-05, + "loss": 0.6278, + "step": 7603 + }, + { + "epoch": 0.1313934199612939, + "grad_norm": 0.9997843564771844, + "learning_rate": 1.9465768935308036e-05, + "loss": 0.5778, + "step": 7604 + }, + { + "epoch": 0.1314106994747028, + "grad_norm": 1.1056845036871543, + "learning_rate": 1.946558844571454e-05, + "loss": 0.6807, + "step": 7605 + }, + { + "epoch": 0.1314279789881117, + "grad_norm": 0.894302645700879, + "learning_rate": 1.94654079264741e-05, + "loss": 0.8573, + "step": 7606 + }, + { + "epoch": 0.13144525850152058, + "grad_norm": 0.9042396600225645, + "learning_rate": 1.9465227377587283e-05, + "loss": 0.75, + "step": 7607 + }, + { + "epoch": 0.1314625380149295, + "grad_norm": 0.6131858896584391, + "learning_rate": 1.9465046799054657e-05, + "loss": 0.5793, + "step": 7608 + }, + { + "epoch": 0.1314798175283384, + "grad_norm": 1.094232802743568, + "learning_rate": 1.9464866190876782e-05, + "loss": 0.7987, + "step": 7609 + }, + { + "epoch": 0.1314970970417473, + "grad_norm": 0.8298207857407182, + "learning_rate": 1.946468555305423e-05, + "loss": 0.7022, + "step": 7610 + }, + { + "epoch": 0.13151437655515621, + "grad_norm": 1.1084475393784008, + "learning_rate": 1.9464504885587563e-05, + "loss": 0.8104, + "step": 7611 + }, + { + "epoch": 0.13153165606856512, + "grad_norm": 1.5641522033082527, + "learning_rate": 1.9464324188477345e-05, + "loss": 0.7769, + "step": 7612 + }, + { + "epoch": 0.131548935581974, + "grad_norm": 0.7764019695819913, + "learning_rate": 1.9464143461724148e-05, + "loss": 0.646, + "step": 7613 + }, + { + "epoch": 0.1315662150953829, + "grad_norm": 1.2562728217015398, + "learning_rate": 1.946396270532853e-05, + "loss": 0.8028, + "step": 7614 + }, + { + "epoch": 0.13158349460879182, + "grad_norm": 1.1594291130088017, + "learning_rate": 1.9463781919291064e-05, + "loss": 0.6018, + "step": 7615 + }, + { + "epoch": 0.13160077412220073, + "grad_norm": 1.0465536400762008, + "learning_rate": 1.9463601103612316e-05, + "loss": 0.6007, + "step": 7616 + }, + { + "epoch": 0.13161805363560963, + "grad_norm": 1.0835349653042032, + "learning_rate": 1.9463420258292847e-05, + "loss": 0.7535, + "step": 7617 + }, + { + "epoch": 0.1316353331490185, + "grad_norm": 0.8178825559596203, + "learning_rate": 1.9463239383333228e-05, + "loss": 0.6471, + "step": 7618 + }, + { + "epoch": 0.13165261266242742, + "grad_norm": 0.5313750521571247, + "learning_rate": 1.946305847873402e-05, + "loss": 0.6866, + "step": 7619 + }, + { + "epoch": 0.13166989217583633, + "grad_norm": 0.9415948205188039, + "learning_rate": 1.94628775444958e-05, + "loss": 0.579, + "step": 7620 + }, + { + "epoch": 0.13168717168924524, + "grad_norm": 0.6090771710163567, + "learning_rate": 1.946269658061913e-05, + "loss": 0.4935, + "step": 7621 + }, + { + "epoch": 0.13170445120265414, + "grad_norm": 1.1041033291460192, + "learning_rate": 1.9462515587104567e-05, + "loss": 0.8724, + "step": 7622 + }, + { + "epoch": 0.13172173071606302, + "grad_norm": 1.211214639233843, + "learning_rate": 1.946233456395269e-05, + "loss": 0.7087, + "step": 7623 + }, + { + "epoch": 0.13173901022947193, + "grad_norm": 1.316289730970203, + "learning_rate": 1.946215351116406e-05, + "loss": 0.7088, + "step": 7624 + }, + { + "epoch": 0.13175628974288084, + "grad_norm": 1.4650417875554396, + "learning_rate": 1.946197242873925e-05, + "loss": 0.889, + "step": 7625 + }, + { + "epoch": 0.13177356925628975, + "grad_norm": 0.8815443379614031, + "learning_rate": 1.9461791316678818e-05, + "loss": 0.6449, + "step": 7626 + }, + { + "epoch": 0.13179084876969865, + "grad_norm": 0.844084776880672, + "learning_rate": 1.946161017498334e-05, + "loss": 0.4675, + "step": 7627 + }, + { + "epoch": 0.13180812828310753, + "grad_norm": 0.6919082816470439, + "learning_rate": 1.946142900365338e-05, + "loss": 0.6957, + "step": 7628 + }, + { + "epoch": 0.13182540779651644, + "grad_norm": 0.6811271782409352, + "learning_rate": 1.9461247802689507e-05, + "loss": 0.6131, + "step": 7629 + }, + { + "epoch": 0.13184268730992535, + "grad_norm": 2.232119612835119, + "learning_rate": 1.9461066572092283e-05, + "loss": 0.815, + "step": 7630 + }, + { + "epoch": 0.13185996682333426, + "grad_norm": 1.0808044016621678, + "learning_rate": 1.9460885311862282e-05, + "loss": 0.6779, + "step": 7631 + }, + { + "epoch": 0.13187724633674316, + "grad_norm": 1.362956800954851, + "learning_rate": 1.946070402200007e-05, + "loss": 0.8846, + "step": 7632 + }, + { + "epoch": 0.13189452585015207, + "grad_norm": 1.0486785059176627, + "learning_rate": 1.946052270250621e-05, + "loss": 0.7707, + "step": 7633 + }, + { + "epoch": 0.13191180536356095, + "grad_norm": 0.9182857436605075, + "learning_rate": 1.9460341353381278e-05, + "loss": 0.7, + "step": 7634 + }, + { + "epoch": 0.13192908487696986, + "grad_norm": 1.0229847290251732, + "learning_rate": 1.9460159974625835e-05, + "loss": 0.6297, + "step": 7635 + }, + { + "epoch": 0.13194636439037877, + "grad_norm": 0.9858647198380989, + "learning_rate": 1.9459978566240454e-05, + "loss": 0.7583, + "step": 7636 + }, + { + "epoch": 0.13196364390378768, + "grad_norm": 0.7760389219327767, + "learning_rate": 1.9459797128225702e-05, + "loss": 0.5482, + "step": 7637 + }, + { + "epoch": 0.13198092341719658, + "grad_norm": 0.8808629874518848, + "learning_rate": 1.9459615660582144e-05, + "loss": 0.5652, + "step": 7638 + }, + { + "epoch": 0.13199820293060546, + "grad_norm": 1.2918809897811163, + "learning_rate": 1.9459434163310353e-05, + "loss": 0.7847, + "step": 7639 + }, + { + "epoch": 0.13201548244401437, + "grad_norm": 1.0977115139780047, + "learning_rate": 1.9459252636410894e-05, + "loss": 0.6376, + "step": 7640 + }, + { + "epoch": 0.13203276195742328, + "grad_norm": 0.8327499952014866, + "learning_rate": 1.9459071079884337e-05, + "loss": 0.6185, + "step": 7641 + }, + { + "epoch": 0.13205004147083219, + "grad_norm": 0.7445128926330107, + "learning_rate": 1.945888949373125e-05, + "loss": 0.5897, + "step": 7642 + }, + { + "epoch": 0.1320673209842411, + "grad_norm": 1.5884947546843364, + "learning_rate": 1.9458707877952204e-05, + "loss": 0.6012, + "step": 7643 + }, + { + "epoch": 0.13208460049764997, + "grad_norm": 0.6838733067743538, + "learning_rate": 1.9458526232547764e-05, + "loss": 0.6567, + "step": 7644 + }, + { + "epoch": 0.13210188001105888, + "grad_norm": 0.7106003289217068, + "learning_rate": 1.9458344557518502e-05, + "loss": 0.8, + "step": 7645 + }, + { + "epoch": 0.1321191595244678, + "grad_norm": 0.9455633002574166, + "learning_rate": 1.9458162852864985e-05, + "loss": 0.6976, + "step": 7646 + }, + { + "epoch": 0.1321364390378767, + "grad_norm": 0.8093782305431317, + "learning_rate": 1.9457981118587784e-05, + "loss": 0.5281, + "step": 7647 + }, + { + "epoch": 0.1321537185512856, + "grad_norm": 0.7845504150796928, + "learning_rate": 1.945779935468747e-05, + "loss": 0.5086, + "step": 7648 + }, + { + "epoch": 0.1321709980646945, + "grad_norm": 0.7601217017540207, + "learning_rate": 1.9457617561164604e-05, + "loss": 0.74, + "step": 7649 + }, + { + "epoch": 0.1321882775781034, + "grad_norm": 0.9524530092021087, + "learning_rate": 1.9457435738019766e-05, + "loss": 0.6318, + "step": 7650 + }, + { + "epoch": 0.1322055570915123, + "grad_norm": 0.9448446669761228, + "learning_rate": 1.945725388525352e-05, + "loss": 0.5789, + "step": 7651 + }, + { + "epoch": 0.1322228366049212, + "grad_norm": 0.9889914200734181, + "learning_rate": 1.9457072002866435e-05, + "loss": 0.5882, + "step": 7652 + }, + { + "epoch": 0.13224011611833011, + "grad_norm": 0.7323382300243156, + "learning_rate": 1.945689009085908e-05, + "loss": 0.4004, + "step": 7653 + }, + { + "epoch": 0.13225739563173902, + "grad_norm": 1.3477319879093093, + "learning_rate": 1.9456708149232028e-05, + "loss": 0.8483, + "step": 7654 + }, + { + "epoch": 0.1322746751451479, + "grad_norm": 1.2482043722902063, + "learning_rate": 1.9456526177985848e-05, + "loss": 0.5795, + "step": 7655 + }, + { + "epoch": 0.1322919546585568, + "grad_norm": 1.1517622054066448, + "learning_rate": 1.945634417712111e-05, + "loss": 0.5562, + "step": 7656 + }, + { + "epoch": 0.13230923417196572, + "grad_norm": 1.0572623445659712, + "learning_rate": 1.9456162146638382e-05, + "loss": 0.7802, + "step": 7657 + }, + { + "epoch": 0.13232651368537462, + "grad_norm": 0.6398545849275762, + "learning_rate": 1.9455980086538236e-05, + "loss": 0.4632, + "step": 7658 + }, + { + "epoch": 0.13234379319878353, + "grad_norm": 1.3584904471683412, + "learning_rate": 1.9455797996821243e-05, + "loss": 0.8736, + "step": 7659 + }, + { + "epoch": 0.1323610727121924, + "grad_norm": 1.2541049624667076, + "learning_rate": 1.9455615877487968e-05, + "loss": 0.8651, + "step": 7660 + }, + { + "epoch": 0.13237835222560132, + "grad_norm": 0.860536304174311, + "learning_rate": 1.9455433728538988e-05, + "loss": 0.5809, + "step": 7661 + }, + { + "epoch": 0.13239563173901023, + "grad_norm": 0.8812684772972712, + "learning_rate": 1.945525154997487e-05, + "loss": 0.7191, + "step": 7662 + }, + { + "epoch": 0.13241291125241914, + "grad_norm": 1.0455484333387262, + "learning_rate": 1.9455069341796186e-05, + "loss": 0.6405, + "step": 7663 + }, + { + "epoch": 0.13243019076582804, + "grad_norm": 0.9097288382468064, + "learning_rate": 1.945488710400351e-05, + "loss": 0.7023, + "step": 7664 + }, + { + "epoch": 0.13244747027923692, + "grad_norm": 1.8308623184220267, + "learning_rate": 1.9454704836597406e-05, + "loss": 0.7471, + "step": 7665 + }, + { + "epoch": 0.13246474979264583, + "grad_norm": 1.0677293355830988, + "learning_rate": 1.9454522539578448e-05, + "loss": 0.721, + "step": 7666 + }, + { + "epoch": 0.13248202930605474, + "grad_norm": 1.1318390367071889, + "learning_rate": 1.9454340212947204e-05, + "loss": 0.5373, + "step": 7667 + }, + { + "epoch": 0.13249930881946365, + "grad_norm": 0.9872222796275372, + "learning_rate": 1.945415785670425e-05, + "loss": 0.5766, + "step": 7668 + }, + { + "epoch": 0.13251658833287255, + "grad_norm": 0.42357278721862685, + "learning_rate": 1.9453975470850157e-05, + "loss": 0.6373, + "step": 7669 + }, + { + "epoch": 0.13253386784628146, + "grad_norm": 1.159960525186738, + "learning_rate": 1.9453793055385493e-05, + "loss": 0.651, + "step": 7670 + }, + { + "epoch": 0.13255114735969034, + "grad_norm": 0.5929934508844089, + "learning_rate": 1.945361061031083e-05, + "loss": 0.733, + "step": 7671 + }, + { + "epoch": 0.13256842687309925, + "grad_norm": 0.7033493697783848, + "learning_rate": 1.9453428135626742e-05, + "loss": 0.5009, + "step": 7672 + }, + { + "epoch": 0.13258570638650816, + "grad_norm": 0.6909156811309729, + "learning_rate": 1.94532456313338e-05, + "loss": 0.7623, + "step": 7673 + }, + { + "epoch": 0.13260298589991706, + "grad_norm": 1.270149666169947, + "learning_rate": 1.945306309743257e-05, + "loss": 0.66, + "step": 7674 + }, + { + "epoch": 0.13262026541332597, + "grad_norm": 1.0086013553777842, + "learning_rate": 1.9452880533923627e-05, + "loss": 0.6359, + "step": 7675 + }, + { + "epoch": 0.13263754492673485, + "grad_norm": 0.8168505230110932, + "learning_rate": 1.9452697940807548e-05, + "loss": 0.5819, + "step": 7676 + }, + { + "epoch": 0.13265482444014376, + "grad_norm": 1.192958413956073, + "learning_rate": 1.9452515318084902e-05, + "loss": 0.6932, + "step": 7677 + }, + { + "epoch": 0.13267210395355267, + "grad_norm": 1.2297686912027843, + "learning_rate": 1.9452332665756256e-05, + "loss": 0.6113, + "step": 7678 + }, + { + "epoch": 0.13268938346696157, + "grad_norm": 0.8098816023917204, + "learning_rate": 1.9452149983822185e-05, + "loss": 0.7095, + "step": 7679 + }, + { + "epoch": 0.13270666298037048, + "grad_norm": 1.0198915844219953, + "learning_rate": 1.9451967272283263e-05, + "loss": 0.764, + "step": 7680 + }, + { + "epoch": 0.13272394249377936, + "grad_norm": 1.07718029779678, + "learning_rate": 1.9451784531140062e-05, + "loss": 0.3959, + "step": 7681 + }, + { + "epoch": 0.13274122200718827, + "grad_norm": 0.9632679949369626, + "learning_rate": 1.9451601760393154e-05, + "loss": 0.6471, + "step": 7682 + }, + { + "epoch": 0.13275850152059718, + "grad_norm": 1.1268196430621242, + "learning_rate": 1.9451418960043108e-05, + "loss": 0.7094, + "step": 7683 + }, + { + "epoch": 0.13277578103400609, + "grad_norm": 0.7787126541016981, + "learning_rate": 1.9451236130090504e-05, + "loss": 0.5916, + "step": 7684 + }, + { + "epoch": 0.132793060547415, + "grad_norm": 0.7501924645058192, + "learning_rate": 1.945105327053591e-05, + "loss": 0.579, + "step": 7685 + }, + { + "epoch": 0.1328103400608239, + "grad_norm": 0.9709624461895288, + "learning_rate": 1.9450870381379893e-05, + "loss": 0.7173, + "step": 7686 + }, + { + "epoch": 0.13282761957423278, + "grad_norm": 1.1930652110623412, + "learning_rate": 1.9450687462623037e-05, + "loss": 0.5785, + "step": 7687 + }, + { + "epoch": 0.1328448990876417, + "grad_norm": 0.9226257481035286, + "learning_rate": 1.945050451426591e-05, + "loss": 0.5932, + "step": 7688 + }, + { + "epoch": 0.1328621786010506, + "grad_norm": 1.3131474486409507, + "learning_rate": 1.945032153630908e-05, + "loss": 0.7592, + "step": 7689 + }, + { + "epoch": 0.1328794581144595, + "grad_norm": 1.0964190793400124, + "learning_rate": 1.945013852875313e-05, + "loss": 0.7602, + "step": 7690 + }, + { + "epoch": 0.1328967376278684, + "grad_norm": 0.8755241040415846, + "learning_rate": 1.9449955491598626e-05, + "loss": 0.4647, + "step": 7691 + }, + { + "epoch": 0.1329140171412773, + "grad_norm": 1.3314099960819912, + "learning_rate": 1.944977242484614e-05, + "loss": 0.6331, + "step": 7692 + }, + { + "epoch": 0.1329312966546862, + "grad_norm": 0.9631656970550722, + "learning_rate": 1.9449589328496254e-05, + "loss": 0.6203, + "step": 7693 + }, + { + "epoch": 0.1329485761680951, + "grad_norm": 1.2782473633712257, + "learning_rate": 1.9449406202549532e-05, + "loss": 0.8118, + "step": 7694 + }, + { + "epoch": 0.132965855681504, + "grad_norm": 1.417152165935226, + "learning_rate": 1.9449223047006554e-05, + "loss": 0.6445, + "step": 7695 + }, + { + "epoch": 0.13298313519491292, + "grad_norm": 1.374155958502331, + "learning_rate": 1.944903986186789e-05, + "loss": 0.5238, + "step": 7696 + }, + { + "epoch": 0.1330004147083218, + "grad_norm": 1.3143192437739017, + "learning_rate": 1.9448856647134116e-05, + "loss": 0.7239, + "step": 7697 + }, + { + "epoch": 0.1330176942217307, + "grad_norm": 1.2099841791358195, + "learning_rate": 1.9448673402805805e-05, + "loss": 0.5739, + "step": 7698 + }, + { + "epoch": 0.13303497373513962, + "grad_norm": 0.9092364959308631, + "learning_rate": 1.9448490128883528e-05, + "loss": 0.5001, + "step": 7699 + }, + { + "epoch": 0.13305225324854852, + "grad_norm": 1.662382647772854, + "learning_rate": 1.9448306825367865e-05, + "loss": 0.7234, + "step": 7700 + }, + { + "epoch": 0.13306953276195743, + "grad_norm": 0.8213428289056153, + "learning_rate": 1.9448123492259388e-05, + "loss": 0.5217, + "step": 7701 + }, + { + "epoch": 0.1330868122753663, + "grad_norm": 1.0962456533030736, + "learning_rate": 1.9447940129558666e-05, + "loss": 0.6668, + "step": 7702 + }, + { + "epoch": 0.13310409178877522, + "grad_norm": 0.9206819333884975, + "learning_rate": 1.944775673726628e-05, + "loss": 0.5046, + "step": 7703 + }, + { + "epoch": 0.13312137130218413, + "grad_norm": 1.346817273364765, + "learning_rate": 1.94475733153828e-05, + "loss": 0.584, + "step": 7704 + }, + { + "epoch": 0.13313865081559303, + "grad_norm": 1.0957811352247773, + "learning_rate": 1.9447389863908808e-05, + "loss": 0.6981, + "step": 7705 + }, + { + "epoch": 0.13315593032900194, + "grad_norm": 0.9989201667502359, + "learning_rate": 1.9447206382844867e-05, + "loss": 0.6813, + "step": 7706 + }, + { + "epoch": 0.13317320984241085, + "grad_norm": 1.1107276688259857, + "learning_rate": 1.9447022872191563e-05, + "loss": 0.8032, + "step": 7707 + }, + { + "epoch": 0.13319048935581973, + "grad_norm": 1.3338980461536942, + "learning_rate": 1.944683933194946e-05, + "loss": 0.7637, + "step": 7708 + }, + { + "epoch": 0.13320776886922864, + "grad_norm": 1.210702521289039, + "learning_rate": 1.944665576211914e-05, + "loss": 0.8343, + "step": 7709 + }, + { + "epoch": 0.13322504838263755, + "grad_norm": 0.7428959480167994, + "learning_rate": 1.9446472162701176e-05, + "loss": 0.4979, + "step": 7710 + }, + { + "epoch": 0.13324232789604645, + "grad_norm": 0.8354548409026475, + "learning_rate": 1.9446288533696145e-05, + "loss": 0.4714, + "step": 7711 + }, + { + "epoch": 0.13325960740945536, + "grad_norm": 0.8558799542452862, + "learning_rate": 1.944610487510462e-05, + "loss": 0.7244, + "step": 7712 + }, + { + "epoch": 0.13327688692286424, + "grad_norm": 0.6848564301679803, + "learning_rate": 1.944592118692717e-05, + "loss": 0.4431, + "step": 7713 + }, + { + "epoch": 0.13329416643627315, + "grad_norm": 1.136424061808235, + "learning_rate": 1.9445737469164387e-05, + "loss": 0.7281, + "step": 7714 + }, + { + "epoch": 0.13331144594968206, + "grad_norm": 0.7996585202166806, + "learning_rate": 1.9445553721816833e-05, + "loss": 0.6282, + "step": 7715 + }, + { + "epoch": 0.13332872546309096, + "grad_norm": 0.9773331103158516, + "learning_rate": 1.9445369944885084e-05, + "loss": 0.7974, + "step": 7716 + }, + { + "epoch": 0.13334600497649987, + "grad_norm": 1.0608770096664073, + "learning_rate": 1.944518613836972e-05, + "loss": 0.6667, + "step": 7717 + }, + { + "epoch": 0.13336328448990875, + "grad_norm": 0.8826915995655275, + "learning_rate": 1.944500230227132e-05, + "loss": 0.7784, + "step": 7718 + }, + { + "epoch": 0.13338056400331766, + "grad_norm": 5.796163671355427, + "learning_rate": 1.944481843659045e-05, + "loss": 0.7187, + "step": 7719 + }, + { + "epoch": 0.13339784351672657, + "grad_norm": 0.5247467540846384, + "learning_rate": 1.944463454132769e-05, + "loss": 0.4372, + "step": 7720 + }, + { + "epoch": 0.13341512303013547, + "grad_norm": 0.8399025675878046, + "learning_rate": 1.944445061648362e-05, + "loss": 0.7769, + "step": 7721 + }, + { + "epoch": 0.13343240254354438, + "grad_norm": 0.7920093320829725, + "learning_rate": 1.944426666205881e-05, + "loss": 0.5047, + "step": 7722 + }, + { + "epoch": 0.1334496820569533, + "grad_norm": 0.9558310583037801, + "learning_rate": 1.944408267805384e-05, + "loss": 0.7733, + "step": 7723 + }, + { + "epoch": 0.13346696157036217, + "grad_norm": 0.6816604420605363, + "learning_rate": 1.9443898664469286e-05, + "loss": 0.7836, + "step": 7724 + }, + { + "epoch": 0.13348424108377108, + "grad_norm": 0.8145854944123161, + "learning_rate": 1.9443714621305724e-05, + "loss": 0.7326, + "step": 7725 + }, + { + "epoch": 0.13350152059717998, + "grad_norm": 1.1889574728226429, + "learning_rate": 1.944353054856373e-05, + "loss": 0.8811, + "step": 7726 + }, + { + "epoch": 0.1335188001105889, + "grad_norm": 1.0193020915945383, + "learning_rate": 1.9443346446243882e-05, + "loss": 0.765, + "step": 7727 + }, + { + "epoch": 0.1335360796239978, + "grad_norm": 0.8695476359062643, + "learning_rate": 1.9443162314346754e-05, + "loss": 0.8465, + "step": 7728 + }, + { + "epoch": 0.13355335913740668, + "grad_norm": 0.43684945512595313, + "learning_rate": 1.9442978152872923e-05, + "loss": 0.7923, + "step": 7729 + }, + { + "epoch": 0.1335706386508156, + "grad_norm": 0.7343797486599927, + "learning_rate": 1.9442793961822968e-05, + "loss": 0.7687, + "step": 7730 + }, + { + "epoch": 0.1335879181642245, + "grad_norm": 1.8477990630207362, + "learning_rate": 1.944260974119746e-05, + "loss": 0.5965, + "step": 7731 + }, + { + "epoch": 0.1336051976776334, + "grad_norm": 0.9427567353902718, + "learning_rate": 1.9442425490996987e-05, + "loss": 0.7673, + "step": 7732 + }, + { + "epoch": 0.1336224771910423, + "grad_norm": 1.2626435862238743, + "learning_rate": 1.9442241211222116e-05, + "loss": 0.7265, + "step": 7733 + }, + { + "epoch": 0.1336397567044512, + "grad_norm": 0.7709061619014698, + "learning_rate": 1.9442056901873432e-05, + "loss": 0.5173, + "step": 7734 + }, + { + "epoch": 0.1336570362178601, + "grad_norm": 0.8340674571034896, + "learning_rate": 1.9441872562951505e-05, + "loss": 0.8013, + "step": 7735 + }, + { + "epoch": 0.133674315731269, + "grad_norm": 0.6778535720013817, + "learning_rate": 1.944168819445692e-05, + "loss": 0.6089, + "step": 7736 + }, + { + "epoch": 0.1336915952446779, + "grad_norm": 0.9781113022726388, + "learning_rate": 1.9441503796390245e-05, + "loss": 0.7787, + "step": 7737 + }, + { + "epoch": 0.13370887475808682, + "grad_norm": 0.6809401110644799, + "learning_rate": 1.944131936875206e-05, + "loss": 0.5374, + "step": 7738 + }, + { + "epoch": 0.13372615427149573, + "grad_norm": 0.9585774031160956, + "learning_rate": 1.9441134911542952e-05, + "loss": 0.6274, + "step": 7739 + }, + { + "epoch": 0.1337434337849046, + "grad_norm": 0.6985105517484297, + "learning_rate": 1.9440950424763488e-05, + "loss": 0.7442, + "step": 7740 + }, + { + "epoch": 0.13376071329831352, + "grad_norm": 0.933330224534067, + "learning_rate": 1.9440765908414253e-05, + "loss": 0.5129, + "step": 7741 + }, + { + "epoch": 0.13377799281172242, + "grad_norm": 1.0404592954191139, + "learning_rate": 1.9440581362495816e-05, + "loss": 0.6386, + "step": 7742 + }, + { + "epoch": 0.13379527232513133, + "grad_norm": 0.6499840488442181, + "learning_rate": 1.9440396787008767e-05, + "loss": 0.5739, + "step": 7743 + }, + { + "epoch": 0.13381255183854024, + "grad_norm": 0.8526419218372132, + "learning_rate": 1.9440212181953675e-05, + "loss": 0.4793, + "step": 7744 + }, + { + "epoch": 0.13382983135194912, + "grad_norm": 1.0270819331087577, + "learning_rate": 1.944002754733112e-05, + "loss": 0.7788, + "step": 7745 + }, + { + "epoch": 0.13384711086535803, + "grad_norm": 1.024697036923235, + "learning_rate": 1.9439842883141682e-05, + "loss": 0.7531, + "step": 7746 + }, + { + "epoch": 0.13386439037876693, + "grad_norm": 0.7159030009287201, + "learning_rate": 1.943965818938594e-05, + "loss": 0.4913, + "step": 7747 + }, + { + "epoch": 0.13388166989217584, + "grad_norm": 0.9215200291155459, + "learning_rate": 1.943947346606447e-05, + "loss": 0.5931, + "step": 7748 + }, + { + "epoch": 0.13389894940558475, + "grad_norm": 1.0933368809370223, + "learning_rate": 1.9439288713177854e-05, + "loss": 0.9537, + "step": 7749 + }, + { + "epoch": 0.13391622891899363, + "grad_norm": 0.9150358527417335, + "learning_rate": 1.9439103930726663e-05, + "loss": 0.7638, + "step": 7750 + }, + { + "epoch": 0.13393350843240254, + "grad_norm": 0.484340128803897, + "learning_rate": 1.9438919118711484e-05, + "loss": 0.8535, + "step": 7751 + }, + { + "epoch": 0.13395078794581144, + "grad_norm": 1.0191398594972532, + "learning_rate": 1.9438734277132897e-05, + "loss": 0.6952, + "step": 7752 + }, + { + "epoch": 0.13396806745922035, + "grad_norm": 0.7405897688678633, + "learning_rate": 1.943854940599147e-05, + "loss": 0.663, + "step": 7753 + }, + { + "epoch": 0.13398534697262926, + "grad_norm": 0.5398796490394804, + "learning_rate": 1.9438364505287796e-05, + "loss": 0.1922, + "step": 7754 + }, + { + "epoch": 0.13400262648603814, + "grad_norm": 0.9047445667034423, + "learning_rate": 1.943817957502244e-05, + "loss": 0.6938, + "step": 7755 + }, + { + "epoch": 0.13401990599944705, + "grad_norm": 1.308281748126245, + "learning_rate": 1.9437994615195994e-05, + "loss": 0.6831, + "step": 7756 + }, + { + "epoch": 0.13403718551285596, + "grad_norm": 1.6574012403476444, + "learning_rate": 1.943780962580903e-05, + "loss": 0.855, + "step": 7757 + }, + { + "epoch": 0.13405446502626486, + "grad_norm": 0.8272238839373156, + "learning_rate": 1.9437624606862126e-05, + "loss": 0.5165, + "step": 7758 + }, + { + "epoch": 0.13407174453967377, + "grad_norm": 0.634122028079418, + "learning_rate": 1.943743955835587e-05, + "loss": 0.6921, + "step": 7759 + }, + { + "epoch": 0.13408902405308268, + "grad_norm": 0.8526116551114127, + "learning_rate": 1.943725448029083e-05, + "loss": 0.5199, + "step": 7760 + }, + { + "epoch": 0.13410630356649156, + "grad_norm": 0.9161555616284002, + "learning_rate": 1.9437069372667598e-05, + "loss": 0.5531, + "step": 7761 + }, + { + "epoch": 0.13412358307990047, + "grad_norm": 0.8814013106070212, + "learning_rate": 1.9436884235486743e-05, + "loss": 0.7024, + "step": 7762 + }, + { + "epoch": 0.13414086259330937, + "grad_norm": 0.6220211313680549, + "learning_rate": 1.943669906874885e-05, + "loss": 0.4812, + "step": 7763 + }, + { + "epoch": 0.13415814210671828, + "grad_norm": 0.9141765039034396, + "learning_rate": 1.9436513872454498e-05, + "loss": 0.6852, + "step": 7764 + }, + { + "epoch": 0.1341754216201272, + "grad_norm": 0.9774558937546687, + "learning_rate": 1.943632864660427e-05, + "loss": 0.608, + "step": 7765 + }, + { + "epoch": 0.13419270113353607, + "grad_norm": 0.9924644283792077, + "learning_rate": 1.943614339119874e-05, + "loss": 0.6904, + "step": 7766 + }, + { + "epoch": 0.13420998064694498, + "grad_norm": 0.8674902953536662, + "learning_rate": 1.9435958106238494e-05, + "loss": 0.4546, + "step": 7767 + }, + { + "epoch": 0.13422726016035388, + "grad_norm": 0.8542900897968154, + "learning_rate": 1.943577279172411e-05, + "loss": 0.5269, + "step": 7768 + }, + { + "epoch": 0.1342445396737628, + "grad_norm": 0.9164212960513817, + "learning_rate": 1.943558744765617e-05, + "loss": 0.6853, + "step": 7769 + }, + { + "epoch": 0.1342618191871717, + "grad_norm": 0.9615755407252737, + "learning_rate": 1.943540207403525e-05, + "loss": 0.8476, + "step": 7770 + }, + { + "epoch": 0.13427909870058058, + "grad_norm": 0.8040371739891488, + "learning_rate": 1.9435216670861936e-05, + "loss": 0.4985, + "step": 7771 + }, + { + "epoch": 0.1342963782139895, + "grad_norm": 1.13944006524782, + "learning_rate": 1.9435031238136805e-05, + "loss": 0.6271, + "step": 7772 + }, + { + "epoch": 0.1343136577273984, + "grad_norm": 0.6278385997192006, + "learning_rate": 1.9434845775860437e-05, + "loss": 0.4938, + "step": 7773 + }, + { + "epoch": 0.1343309372408073, + "grad_norm": 0.8676420280655337, + "learning_rate": 1.943466028403342e-05, + "loss": 0.8998, + "step": 7774 + }, + { + "epoch": 0.1343482167542162, + "grad_norm": 1.1992335525575888, + "learning_rate": 1.9434474762656327e-05, + "loss": 0.6383, + "step": 7775 + }, + { + "epoch": 0.13436549626762512, + "grad_norm": 0.8287645479924378, + "learning_rate": 1.9434289211729744e-05, + "loss": 0.6514, + "step": 7776 + }, + { + "epoch": 0.134382775781034, + "grad_norm": 1.0523297518600072, + "learning_rate": 1.9434103631254247e-05, + "loss": 0.7475, + "step": 7777 + }, + { + "epoch": 0.1344000552944429, + "grad_norm": 0.8674156318909156, + "learning_rate": 1.9433918021230423e-05, + "loss": 0.6919, + "step": 7778 + }, + { + "epoch": 0.1344173348078518, + "grad_norm": 0.6047527346468878, + "learning_rate": 1.943373238165885e-05, + "loss": 0.7007, + "step": 7779 + }, + { + "epoch": 0.13443461432126072, + "grad_norm": 1.1945971561835445, + "learning_rate": 1.943354671254011e-05, + "loss": 0.9812, + "step": 7780 + }, + { + "epoch": 0.13445189383466963, + "grad_norm": 0.6661331230968378, + "learning_rate": 1.9433361013874785e-05, + "loss": 0.7674, + "step": 7781 + }, + { + "epoch": 0.1344691733480785, + "grad_norm": 1.1214102834937987, + "learning_rate": 1.9433175285663454e-05, + "loss": 0.7006, + "step": 7782 + }, + { + "epoch": 0.13448645286148742, + "grad_norm": 0.9726411138091561, + "learning_rate": 1.9432989527906706e-05, + "loss": 0.9123, + "step": 7783 + }, + { + "epoch": 0.13450373237489632, + "grad_norm": 0.9732642874526473, + "learning_rate": 1.9432803740605114e-05, + "loss": 0.7258, + "step": 7784 + }, + { + "epoch": 0.13452101188830523, + "grad_norm": 0.8381032580516695, + "learning_rate": 1.9432617923759266e-05, + "loss": 0.4803, + "step": 7785 + }, + { + "epoch": 0.13453829140171414, + "grad_norm": 0.8600089276003553, + "learning_rate": 1.9432432077369742e-05, + "loss": 0.6477, + "step": 7786 + }, + { + "epoch": 0.13455557091512302, + "grad_norm": 1.2005654198654965, + "learning_rate": 1.9432246201437123e-05, + "loss": 0.7173, + "step": 7787 + }, + { + "epoch": 0.13457285042853193, + "grad_norm": 1.1363010121556882, + "learning_rate": 1.9432060295961993e-05, + "loss": 0.7092, + "step": 7788 + }, + { + "epoch": 0.13459012994194083, + "grad_norm": 0.7708464202629741, + "learning_rate": 1.9431874360944932e-05, + "loss": 0.6252, + "step": 7789 + }, + { + "epoch": 0.13460740945534974, + "grad_norm": 0.7935344386806, + "learning_rate": 1.9431688396386523e-05, + "loss": 0.4983, + "step": 7790 + }, + { + "epoch": 0.13462468896875865, + "grad_norm": 0.8561224610219014, + "learning_rate": 1.943150240228735e-05, + "loss": 0.7232, + "step": 7791 + }, + { + "epoch": 0.13464196848216753, + "grad_norm": 0.6180598558630565, + "learning_rate": 1.9431316378647995e-05, + "loss": 0.6847, + "step": 7792 + }, + { + "epoch": 0.13465924799557644, + "grad_norm": 0.959735538483396, + "learning_rate": 1.9431130325469042e-05, + "loss": 0.7564, + "step": 7793 + }, + { + "epoch": 0.13467652750898534, + "grad_norm": 0.7110775865905118, + "learning_rate": 1.9430944242751068e-05, + "loss": 0.5784, + "step": 7794 + }, + { + "epoch": 0.13469380702239425, + "grad_norm": 1.070145728008033, + "learning_rate": 1.9430758130494664e-05, + "loss": 0.6855, + "step": 7795 + }, + { + "epoch": 0.13471108653580316, + "grad_norm": 0.9522041344218697, + "learning_rate": 1.9430571988700408e-05, + "loss": 0.6396, + "step": 7796 + }, + { + "epoch": 0.13472836604921207, + "grad_norm": 0.8813585093869307, + "learning_rate": 1.9430385817368883e-05, + "loss": 0.8412, + "step": 7797 + }, + { + "epoch": 0.13474564556262095, + "grad_norm": 0.8274587408452329, + "learning_rate": 1.9430199616500672e-05, + "loss": 0.7586, + "step": 7798 + }, + { + "epoch": 0.13476292507602985, + "grad_norm": 0.9674976538538436, + "learning_rate": 1.9430013386096358e-05, + "loss": 0.6357, + "step": 7799 + }, + { + "epoch": 0.13478020458943876, + "grad_norm": 0.9514477723768717, + "learning_rate": 1.942982712615653e-05, + "loss": 0.5749, + "step": 7800 + }, + { + "epoch": 0.13479748410284767, + "grad_norm": 0.880561137429621, + "learning_rate": 1.942964083668176e-05, + "loss": 0.6938, + "step": 7801 + }, + { + "epoch": 0.13481476361625658, + "grad_norm": 0.42191724552746795, + "learning_rate": 1.9429454517672644e-05, + "loss": 0.6385, + "step": 7802 + }, + { + "epoch": 0.13483204312966546, + "grad_norm": 1.040703196284327, + "learning_rate": 1.9429268169129762e-05, + "loss": 0.6873, + "step": 7803 + }, + { + "epoch": 0.13484932264307437, + "grad_norm": 1.062485284921947, + "learning_rate": 1.942908179105369e-05, + "loss": 0.5187, + "step": 7804 + }, + { + "epoch": 0.13486660215648327, + "grad_norm": 1.2285507799984503, + "learning_rate": 1.9428895383445018e-05, + "loss": 0.6763, + "step": 7805 + }, + { + "epoch": 0.13488388166989218, + "grad_norm": 0.8151023357456403, + "learning_rate": 1.9428708946304332e-05, + "loss": 0.5581, + "step": 7806 + }, + { + "epoch": 0.1349011611833011, + "grad_norm": 0.8072646904002235, + "learning_rate": 1.942852247963221e-05, + "loss": 0.695, + "step": 7807 + }, + { + "epoch": 0.13491844069670997, + "grad_norm": 0.872654729999141, + "learning_rate": 1.942833598342924e-05, + "loss": 0.4086, + "step": 7808 + }, + { + "epoch": 0.13493572021011888, + "grad_norm": 1.296753973215449, + "learning_rate": 1.9428149457696007e-05, + "loss": 0.6353, + "step": 7809 + }, + { + "epoch": 0.13495299972352778, + "grad_norm": 1.1938510532813966, + "learning_rate": 1.942796290243309e-05, + "loss": 0.9228, + "step": 7810 + }, + { + "epoch": 0.1349702792369367, + "grad_norm": 1.1458181959375051, + "learning_rate": 1.9427776317641077e-05, + "loss": 0.5407, + "step": 7811 + }, + { + "epoch": 0.1349875587503456, + "grad_norm": 0.8671670208582043, + "learning_rate": 1.9427589703320557e-05, + "loss": 0.5941, + "step": 7812 + }, + { + "epoch": 0.1350048382637545, + "grad_norm": 0.8931077319575765, + "learning_rate": 1.9427403059472106e-05, + "loss": 0.6226, + "step": 7813 + }, + { + "epoch": 0.1350221177771634, + "grad_norm": 0.9696844298405508, + "learning_rate": 1.9427216386096313e-05, + "loss": 0.5989, + "step": 7814 + }, + { + "epoch": 0.1350393972905723, + "grad_norm": 1.0603878766076755, + "learning_rate": 1.942702968319376e-05, + "loss": 0.6939, + "step": 7815 + }, + { + "epoch": 0.1350566768039812, + "grad_norm": 0.8984235576413477, + "learning_rate": 1.9426842950765034e-05, + "loss": 0.4556, + "step": 7816 + }, + { + "epoch": 0.1350739563173901, + "grad_norm": 0.7722158942989161, + "learning_rate": 1.9426656188810723e-05, + "loss": 0.7095, + "step": 7817 + }, + { + "epoch": 0.13509123583079902, + "grad_norm": 1.2643540053636662, + "learning_rate": 1.9426469397331402e-05, + "loss": 0.7122, + "step": 7818 + }, + { + "epoch": 0.1351085153442079, + "grad_norm": 0.47635234699925827, + "learning_rate": 1.942628257632767e-05, + "loss": 0.6804, + "step": 7819 + }, + { + "epoch": 0.1351257948576168, + "grad_norm": 0.871925600303261, + "learning_rate": 1.94260957258001e-05, + "loss": 0.5675, + "step": 7820 + }, + { + "epoch": 0.1351430743710257, + "grad_norm": 0.7637421238036565, + "learning_rate": 1.942590884574928e-05, + "loss": 0.5923, + "step": 7821 + }, + { + "epoch": 0.13516035388443462, + "grad_norm": 1.0871299226702864, + "learning_rate": 1.94257219361758e-05, + "loss": 0.6839, + "step": 7822 + }, + { + "epoch": 0.13517763339784353, + "grad_norm": 0.8248286405683773, + "learning_rate": 1.942553499708024e-05, + "loss": 0.7913, + "step": 7823 + }, + { + "epoch": 0.1351949129112524, + "grad_norm": 1.126843176477325, + "learning_rate": 1.942534802846319e-05, + "loss": 0.6761, + "step": 7824 + }, + { + "epoch": 0.13521219242466131, + "grad_norm": 0.8666353110482216, + "learning_rate": 1.9425161030325238e-05, + "loss": 0.8894, + "step": 7825 + }, + { + "epoch": 0.13522947193807022, + "grad_norm": 0.6767755551858561, + "learning_rate": 1.9424974002666958e-05, + "loss": 0.5554, + "step": 7826 + }, + { + "epoch": 0.13524675145147913, + "grad_norm": 1.147039362228237, + "learning_rate": 1.942478694548895e-05, + "loss": 0.6412, + "step": 7827 + }, + { + "epoch": 0.13526403096488804, + "grad_norm": 1.116142309793103, + "learning_rate": 1.9424599858791788e-05, + "loss": 0.7846, + "step": 7828 + }, + { + "epoch": 0.13528131047829692, + "grad_norm": 0.8300841170994893, + "learning_rate": 1.9424412742576068e-05, + "loss": 0.5257, + "step": 7829 + }, + { + "epoch": 0.13529858999170583, + "grad_norm": 0.5916220438130929, + "learning_rate": 1.9424225596842367e-05, + "loss": 0.8416, + "step": 7830 + }, + { + "epoch": 0.13531586950511473, + "grad_norm": 0.8224906393582878, + "learning_rate": 1.9424038421591272e-05, + "loss": 0.5632, + "step": 7831 + }, + { + "epoch": 0.13533314901852364, + "grad_norm": 0.9980164702783924, + "learning_rate": 1.9423851216823377e-05, + "loss": 0.6759, + "step": 7832 + }, + { + "epoch": 0.13535042853193255, + "grad_norm": 0.9326489642421346, + "learning_rate": 1.9423663982539263e-05, + "loss": 0.7681, + "step": 7833 + }, + { + "epoch": 0.13536770804534146, + "grad_norm": 0.9207447023306605, + "learning_rate": 1.9423476718739517e-05, + "loss": 0.7047, + "step": 7834 + }, + { + "epoch": 0.13538498755875034, + "grad_norm": 0.7339067366383094, + "learning_rate": 1.9423289425424728e-05, + "loss": 0.486, + "step": 7835 + }, + { + "epoch": 0.13540226707215924, + "grad_norm": 0.855509555923501, + "learning_rate": 1.9423102102595476e-05, + "loss": 0.5491, + "step": 7836 + }, + { + "epoch": 0.13541954658556815, + "grad_norm": 0.8598367474949855, + "learning_rate": 1.9422914750252357e-05, + "loss": 0.5658, + "step": 7837 + }, + { + "epoch": 0.13543682609897706, + "grad_norm": 1.0662016059990966, + "learning_rate": 1.9422727368395947e-05, + "loss": 0.6903, + "step": 7838 + }, + { + "epoch": 0.13545410561238597, + "grad_norm": 0.843885284559195, + "learning_rate": 1.9422539957026846e-05, + "loss": 0.6217, + "step": 7839 + }, + { + "epoch": 0.13547138512579485, + "grad_norm": 0.9655218794449747, + "learning_rate": 1.942235251614563e-05, + "loss": 0.6455, + "step": 7840 + }, + { + "epoch": 0.13548866463920375, + "grad_norm": 1.0701358016665377, + "learning_rate": 1.942216504575289e-05, + "loss": 0.7838, + "step": 7841 + }, + { + "epoch": 0.13550594415261266, + "grad_norm": 1.015656651864538, + "learning_rate": 1.9421977545849212e-05, + "loss": 0.532, + "step": 7842 + }, + { + "epoch": 0.13552322366602157, + "grad_norm": 0.6602764093589452, + "learning_rate": 1.9421790016435184e-05, + "loss": 0.6365, + "step": 7843 + }, + { + "epoch": 0.13554050317943048, + "grad_norm": 0.9342122583198469, + "learning_rate": 1.9421602457511395e-05, + "loss": 0.8195, + "step": 7844 + }, + { + "epoch": 0.13555778269283936, + "grad_norm": 0.987498540691168, + "learning_rate": 1.9421414869078428e-05, + "loss": 0.7138, + "step": 7845 + }, + { + "epoch": 0.13557506220624826, + "grad_norm": 1.0807211039834455, + "learning_rate": 1.9421227251136877e-05, + "loss": 0.6596, + "step": 7846 + }, + { + "epoch": 0.13559234171965717, + "grad_norm": 1.1175137118899259, + "learning_rate": 1.942103960368733e-05, + "loss": 0.9013, + "step": 7847 + }, + { + "epoch": 0.13560962123306608, + "grad_norm": 1.0186322366234446, + "learning_rate": 1.9420851926730364e-05, + "loss": 0.3472, + "step": 7848 + }, + { + "epoch": 0.135626900746475, + "grad_norm": 1.0635538882621114, + "learning_rate": 1.9420664220266578e-05, + "loss": 0.7995, + "step": 7849 + }, + { + "epoch": 0.1356441802598839, + "grad_norm": 1.0877003306218174, + "learning_rate": 1.9420476484296555e-05, + "loss": 0.7083, + "step": 7850 + }, + { + "epoch": 0.13566145977329278, + "grad_norm": 0.847930410255307, + "learning_rate": 1.9420288718820882e-05, + "loss": 0.6002, + "step": 7851 + }, + { + "epoch": 0.13567873928670168, + "grad_norm": 0.8255034466856831, + "learning_rate": 1.942010092384015e-05, + "loss": 0.6025, + "step": 7852 + }, + { + "epoch": 0.1356960188001106, + "grad_norm": 0.8401366955041762, + "learning_rate": 1.9419913099354944e-05, + "loss": 0.6396, + "step": 7853 + }, + { + "epoch": 0.1357132983135195, + "grad_norm": 0.9505935158415016, + "learning_rate": 1.9419725245365856e-05, + "loss": 0.5077, + "step": 7854 + }, + { + "epoch": 0.1357305778269284, + "grad_norm": 0.8133814908870459, + "learning_rate": 1.9419537361873475e-05, + "loss": 0.642, + "step": 7855 + }, + { + "epoch": 0.13574785734033729, + "grad_norm": 1.1350969202901093, + "learning_rate": 1.9419349448878385e-05, + "loss": 0.4875, + "step": 7856 + }, + { + "epoch": 0.1357651368537462, + "grad_norm": 0.7343837319517171, + "learning_rate": 1.9419161506381177e-05, + "loss": 0.5415, + "step": 7857 + }, + { + "epoch": 0.1357824163671551, + "grad_norm": 1.0724351805306922, + "learning_rate": 1.941897353438244e-05, + "loss": 0.8348, + "step": 7858 + }, + { + "epoch": 0.135799695880564, + "grad_norm": 0.9972478272076171, + "learning_rate": 1.941878553288276e-05, + "loss": 0.513, + "step": 7859 + }, + { + "epoch": 0.13581697539397292, + "grad_norm": 0.724565549703929, + "learning_rate": 1.9418597501882728e-05, + "loss": 0.5301, + "step": 7860 + }, + { + "epoch": 0.1358342549073818, + "grad_norm": 0.5476278251740876, + "learning_rate": 1.9418409441382934e-05, + "loss": 0.7697, + "step": 7861 + }, + { + "epoch": 0.1358515344207907, + "grad_norm": 0.6706929212917545, + "learning_rate": 1.9418221351383965e-05, + "loss": 0.4625, + "step": 7862 + }, + { + "epoch": 0.1358688139341996, + "grad_norm": 1.1113958006589684, + "learning_rate": 1.941803323188641e-05, + "loss": 0.7551, + "step": 7863 + }, + { + "epoch": 0.13588609344760852, + "grad_norm": 1.1766819422205124, + "learning_rate": 1.9417845082890863e-05, + "loss": 0.9014, + "step": 7864 + }, + { + "epoch": 0.13590337296101743, + "grad_norm": 0.9862075059257793, + "learning_rate": 1.9417656904397906e-05, + "loss": 0.5969, + "step": 7865 + }, + { + "epoch": 0.1359206524744263, + "grad_norm": 0.9131032526233939, + "learning_rate": 1.9417468696408135e-05, + "loss": 0.531, + "step": 7866 + }, + { + "epoch": 0.13593793198783521, + "grad_norm": 0.7626786202634531, + "learning_rate": 1.9417280458922132e-05, + "loss": 0.5952, + "step": 7867 + }, + { + "epoch": 0.13595521150124412, + "grad_norm": 0.8340838672786814, + "learning_rate": 1.9417092191940494e-05, + "loss": 0.5974, + "step": 7868 + }, + { + "epoch": 0.13597249101465303, + "grad_norm": 1.5994907329608055, + "learning_rate": 1.9416903895463806e-05, + "loss": 0.603, + "step": 7869 + }, + { + "epoch": 0.13598977052806194, + "grad_norm": 1.5136913057442412, + "learning_rate": 1.9416715569492662e-05, + "loss": 0.635, + "step": 7870 + }, + { + "epoch": 0.13600705004147084, + "grad_norm": 0.9959123118839974, + "learning_rate": 1.9416527214027648e-05, + "loss": 0.5799, + "step": 7871 + }, + { + "epoch": 0.13602432955487972, + "grad_norm": 1.1574559537764282, + "learning_rate": 1.9416338829069353e-05, + "loss": 1.0921, + "step": 7872 + }, + { + "epoch": 0.13604160906828863, + "grad_norm": 1.20083058002234, + "learning_rate": 1.941615041461837e-05, + "loss": 0.668, + "step": 7873 + }, + { + "epoch": 0.13605888858169754, + "grad_norm": 1.1083374474484893, + "learning_rate": 1.941596197067529e-05, + "loss": 0.6176, + "step": 7874 + }, + { + "epoch": 0.13607616809510645, + "grad_norm": 0.906727368205391, + "learning_rate": 1.94157734972407e-05, + "loss": 0.7341, + "step": 7875 + }, + { + "epoch": 0.13609344760851536, + "grad_norm": 0.9880348741116579, + "learning_rate": 1.9415584994315192e-05, + "loss": 0.832, + "step": 7876 + }, + { + "epoch": 0.13611072712192424, + "grad_norm": 0.468671455937, + "learning_rate": 1.9415396461899358e-05, + "loss": 0.5603, + "step": 7877 + }, + { + "epoch": 0.13612800663533314, + "grad_norm": 0.9147390807031098, + "learning_rate": 1.9415207899993783e-05, + "loss": 0.5258, + "step": 7878 + }, + { + "epoch": 0.13614528614874205, + "grad_norm": 1.1448519714673104, + "learning_rate": 1.9415019308599065e-05, + "loss": 0.6264, + "step": 7879 + }, + { + "epoch": 0.13616256566215096, + "grad_norm": 0.9591594310959448, + "learning_rate": 1.9414830687715792e-05, + "loss": 0.7526, + "step": 7880 + }, + { + "epoch": 0.13617984517555987, + "grad_norm": 0.9749649623908941, + "learning_rate": 1.941464203734455e-05, + "loss": 0.6392, + "step": 7881 + }, + { + "epoch": 0.13619712468896875, + "grad_norm": 1.0929205429010544, + "learning_rate": 1.9414453357485935e-05, + "loss": 0.8777, + "step": 7882 + }, + { + "epoch": 0.13621440420237765, + "grad_norm": 1.2035599395395211, + "learning_rate": 1.9414264648140534e-05, + "loss": 0.77, + "step": 7883 + }, + { + "epoch": 0.13623168371578656, + "grad_norm": 1.0686405666412164, + "learning_rate": 1.9414075909308943e-05, + "loss": 0.9692, + "step": 7884 + }, + { + "epoch": 0.13624896322919547, + "grad_norm": 0.9775590257555533, + "learning_rate": 1.9413887140991753e-05, + "loss": 0.6917, + "step": 7885 + }, + { + "epoch": 0.13626624274260438, + "grad_norm": 0.9489775139241431, + "learning_rate": 1.9413698343189552e-05, + "loss": 0.589, + "step": 7886 + }, + { + "epoch": 0.13628352225601328, + "grad_norm": 1.1447409966403104, + "learning_rate": 1.9413509515902934e-05, + "loss": 0.514, + "step": 7887 + }, + { + "epoch": 0.13630080176942216, + "grad_norm": 1.030809813248974, + "learning_rate": 1.9413320659132485e-05, + "loss": 0.7407, + "step": 7888 + }, + { + "epoch": 0.13631808128283107, + "grad_norm": 1.1741835994234269, + "learning_rate": 1.94131317728788e-05, + "loss": 0.6131, + "step": 7889 + }, + { + "epoch": 0.13633536079623998, + "grad_norm": 1.1715769882934097, + "learning_rate": 1.9412942857142475e-05, + "loss": 0.5439, + "step": 7890 + }, + { + "epoch": 0.1363526403096489, + "grad_norm": 0.8047307129510717, + "learning_rate": 1.9412753911924094e-05, + "loss": 0.6227, + "step": 7891 + }, + { + "epoch": 0.1363699198230578, + "grad_norm": 1.2065112532711588, + "learning_rate": 1.9412564937224256e-05, + "loss": 0.6505, + "step": 7892 + }, + { + "epoch": 0.13638719933646667, + "grad_norm": 1.5611159056911847, + "learning_rate": 1.9412375933043545e-05, + "loss": 0.7516, + "step": 7893 + }, + { + "epoch": 0.13640447884987558, + "grad_norm": 1.1322236607511356, + "learning_rate": 1.941218689938256e-05, + "loss": 0.8448, + "step": 7894 + }, + { + "epoch": 0.1364217583632845, + "grad_norm": 1.2087687953260697, + "learning_rate": 1.9411997836241888e-05, + "loss": 0.7331, + "step": 7895 + }, + { + "epoch": 0.1364390378766934, + "grad_norm": 1.0565707655313104, + "learning_rate": 1.9411808743622128e-05, + "loss": 0.7022, + "step": 7896 + }, + { + "epoch": 0.1364563173901023, + "grad_norm": 2.0496149803523642, + "learning_rate": 1.941161962152386e-05, + "loss": 1.0046, + "step": 7897 + }, + { + "epoch": 0.13647359690351119, + "grad_norm": 1.010544772803414, + "learning_rate": 1.941143046994769e-05, + "loss": 0.6622, + "step": 7898 + }, + { + "epoch": 0.1364908764169201, + "grad_norm": 1.330618877550906, + "learning_rate": 1.9411241288894204e-05, + "loss": 0.8571, + "step": 7899 + }, + { + "epoch": 0.136508155930329, + "grad_norm": 1.1116174999979658, + "learning_rate": 1.9411052078363995e-05, + "loss": 0.7146, + "step": 7900 + }, + { + "epoch": 0.1365254354437379, + "grad_norm": 1.2489527351118421, + "learning_rate": 1.9410862838357656e-05, + "loss": 0.7083, + "step": 7901 + }, + { + "epoch": 0.13654271495714682, + "grad_norm": 1.0813334622005342, + "learning_rate": 1.9410673568875777e-05, + "loss": 0.9505, + "step": 7902 + }, + { + "epoch": 0.1365599944705557, + "grad_norm": 0.7937684354093189, + "learning_rate": 1.9410484269918957e-05, + "loss": 0.4742, + "step": 7903 + }, + { + "epoch": 0.1365772739839646, + "grad_norm": 0.8347765706770511, + "learning_rate": 1.941029494148778e-05, + "loss": 0.8356, + "step": 7904 + }, + { + "epoch": 0.1365945534973735, + "grad_norm": 0.8470857519786614, + "learning_rate": 1.9410105583582847e-05, + "loss": 0.8453, + "step": 7905 + }, + { + "epoch": 0.13661183301078242, + "grad_norm": 0.6709118239839794, + "learning_rate": 1.940991619620475e-05, + "loss": 0.4764, + "step": 7906 + }, + { + "epoch": 0.13662911252419133, + "grad_norm": 0.9614859464836767, + "learning_rate": 1.940972677935408e-05, + "loss": 0.6469, + "step": 7907 + }, + { + "epoch": 0.13664639203760023, + "grad_norm": 0.6185639324050531, + "learning_rate": 1.9409537333031426e-05, + "loss": 0.4987, + "step": 7908 + }, + { + "epoch": 0.1366636715510091, + "grad_norm": 1.0683427602650828, + "learning_rate": 1.940934785723739e-05, + "loss": 0.8654, + "step": 7909 + }, + { + "epoch": 0.13668095106441802, + "grad_norm": 1.1708680320318376, + "learning_rate": 1.940915835197256e-05, + "loss": 0.6269, + "step": 7910 + }, + { + "epoch": 0.13669823057782693, + "grad_norm": 0.9682937611484401, + "learning_rate": 1.9408968817237532e-05, + "loss": 0.5982, + "step": 7911 + }, + { + "epoch": 0.13671551009123584, + "grad_norm": 1.1084155164926734, + "learning_rate": 1.9408779253032898e-05, + "loss": 0.7767, + "step": 7912 + }, + { + "epoch": 0.13673278960464474, + "grad_norm": 0.8192861525380197, + "learning_rate": 1.940858965935925e-05, + "loss": 0.613, + "step": 7913 + }, + { + "epoch": 0.13675006911805362, + "grad_norm": 0.7833034899620988, + "learning_rate": 1.9408400036217186e-05, + "loss": 0.4413, + "step": 7914 + }, + { + "epoch": 0.13676734863146253, + "grad_norm": 1.0949971972876098, + "learning_rate": 1.94082103836073e-05, + "loss": 0.6538, + "step": 7915 + }, + { + "epoch": 0.13678462814487144, + "grad_norm": 1.0006142466568138, + "learning_rate": 1.9408020701530182e-05, + "loss": 0.6394, + "step": 7916 + }, + { + "epoch": 0.13680190765828035, + "grad_norm": 1.1532595124095006, + "learning_rate": 1.940783098998643e-05, + "loss": 0.5678, + "step": 7917 + }, + { + "epoch": 0.13681918717168925, + "grad_norm": 1.1121906738553946, + "learning_rate": 1.9407641248976637e-05, + "loss": 0.9408, + "step": 7918 + }, + { + "epoch": 0.13683646668509813, + "grad_norm": 1.1437506689382864, + "learning_rate": 1.9407451478501393e-05, + "loss": 0.5257, + "step": 7919 + }, + { + "epoch": 0.13685374619850704, + "grad_norm": 1.1175177553406328, + "learning_rate": 1.94072616785613e-05, + "loss": 0.5429, + "step": 7920 + }, + { + "epoch": 0.13687102571191595, + "grad_norm": 0.8268633867445078, + "learning_rate": 1.9407071849156943e-05, + "loss": 0.3694, + "step": 7921 + }, + { + "epoch": 0.13688830522532486, + "grad_norm": 1.0763856853978289, + "learning_rate": 1.940688199028893e-05, + "loss": 0.5629, + "step": 7922 + }, + { + "epoch": 0.13690558473873377, + "grad_norm": 1.2545418964001087, + "learning_rate": 1.940669210195784e-05, + "loss": 0.6564, + "step": 7923 + }, + { + "epoch": 0.13692286425214267, + "grad_norm": 1.0765352874497889, + "learning_rate": 1.940650218416428e-05, + "loss": 0.6186, + "step": 7924 + }, + { + "epoch": 0.13694014376555155, + "grad_norm": 1.2087281827986458, + "learning_rate": 1.9406312236908837e-05, + "loss": 0.7556, + "step": 7925 + }, + { + "epoch": 0.13695742327896046, + "grad_norm": 1.560308259205251, + "learning_rate": 1.9406122260192113e-05, + "loss": 0.7447, + "step": 7926 + }, + { + "epoch": 0.13697470279236937, + "grad_norm": 1.0064362102336384, + "learning_rate": 1.9405932254014695e-05, + "loss": 0.8474, + "step": 7927 + }, + { + "epoch": 0.13699198230577828, + "grad_norm": 0.9297584288013414, + "learning_rate": 1.9405742218377187e-05, + "loss": 0.6692, + "step": 7928 + }, + { + "epoch": 0.13700926181918718, + "grad_norm": 0.7675723974549349, + "learning_rate": 1.9405552153280175e-05, + "loss": 0.4986, + "step": 7929 + }, + { + "epoch": 0.13702654133259606, + "grad_norm": 1.4721241164480607, + "learning_rate": 1.940536205872426e-05, + "loss": 0.8146, + "step": 7930 + }, + { + "epoch": 0.13704382084600497, + "grad_norm": 1.1058256061481444, + "learning_rate": 1.940517193471004e-05, + "loss": 0.7517, + "step": 7931 + }, + { + "epoch": 0.13706110035941388, + "grad_norm": 1.2695130798576049, + "learning_rate": 1.9404981781238104e-05, + "loss": 0.7006, + "step": 7932 + }, + { + "epoch": 0.1370783798728228, + "grad_norm": 1.2104887581308204, + "learning_rate": 1.940479159830905e-05, + "loss": 0.5709, + "step": 7933 + }, + { + "epoch": 0.1370956593862317, + "grad_norm": 1.602766462572836, + "learning_rate": 1.9404601385923474e-05, + "loss": 0.9711, + "step": 7934 + }, + { + "epoch": 0.13711293889964057, + "grad_norm": 1.4713862096905144, + "learning_rate": 1.940441114408197e-05, + "loss": 0.6406, + "step": 7935 + }, + { + "epoch": 0.13713021841304948, + "grad_norm": 0.9777042614965376, + "learning_rate": 1.940422087278514e-05, + "loss": 0.5935, + "step": 7936 + }, + { + "epoch": 0.1371474979264584, + "grad_norm": 1.0021324211390543, + "learning_rate": 1.9404030572033572e-05, + "loss": 0.5472, + "step": 7937 + }, + { + "epoch": 0.1371647774398673, + "grad_norm": 1.2331864023929542, + "learning_rate": 1.940384024182787e-05, + "loss": 0.8275, + "step": 7938 + }, + { + "epoch": 0.1371820569532762, + "grad_norm": 0.7914404948445297, + "learning_rate": 1.9403649882168622e-05, + "loss": 0.5086, + "step": 7939 + }, + { + "epoch": 0.13719933646668508, + "grad_norm": 1.062355036877656, + "learning_rate": 1.9403459493056428e-05, + "loss": 0.4161, + "step": 7940 + }, + { + "epoch": 0.137216615980094, + "grad_norm": 0.8644684402571674, + "learning_rate": 1.9403269074491885e-05, + "loss": 0.6745, + "step": 7941 + }, + { + "epoch": 0.1372338954935029, + "grad_norm": 1.1350807907487641, + "learning_rate": 1.9403078626475587e-05, + "loss": 0.8557, + "step": 7942 + }, + { + "epoch": 0.1372511750069118, + "grad_norm": 0.6954300359000549, + "learning_rate": 1.9402888149008136e-05, + "loss": 0.708, + "step": 7943 + }, + { + "epoch": 0.13726845452032072, + "grad_norm": 0.8117118053200538, + "learning_rate": 1.940269764209012e-05, + "loss": 0.5003, + "step": 7944 + }, + { + "epoch": 0.13728573403372962, + "grad_norm": 0.5890390470486634, + "learning_rate": 1.9402507105722145e-05, + "loss": 0.6963, + "step": 7945 + }, + { + "epoch": 0.1373030135471385, + "grad_norm": 1.0641445569651364, + "learning_rate": 1.94023165399048e-05, + "loss": 0.5628, + "step": 7946 + }, + { + "epoch": 0.1373202930605474, + "grad_norm": 1.3808910156517504, + "learning_rate": 1.9402125944638682e-05, + "loss": 0.6569, + "step": 7947 + }, + { + "epoch": 0.13733757257395632, + "grad_norm": 0.8557004655104807, + "learning_rate": 1.9401935319924395e-05, + "loss": 0.6485, + "step": 7948 + }, + { + "epoch": 0.13735485208736523, + "grad_norm": 1.1363795459690174, + "learning_rate": 1.9401744665762534e-05, + "loss": 0.684, + "step": 7949 + }, + { + "epoch": 0.13737213160077413, + "grad_norm": 1.0486839410523234, + "learning_rate": 1.940155398215369e-05, + "loss": 0.7426, + "step": 7950 + }, + { + "epoch": 0.137389411114183, + "grad_norm": 1.2697096728279613, + "learning_rate": 1.9401363269098467e-05, + "loss": 0.7129, + "step": 7951 + }, + { + "epoch": 0.13740669062759192, + "grad_norm": 1.0527168271831977, + "learning_rate": 1.940117252659746e-05, + "loss": 0.6671, + "step": 7952 + }, + { + "epoch": 0.13742397014100083, + "grad_norm": 1.1133049828884838, + "learning_rate": 1.9400981754651266e-05, + "loss": 0.6518, + "step": 7953 + }, + { + "epoch": 0.13744124965440974, + "grad_norm": 0.8443675495563079, + "learning_rate": 1.9400790953260482e-05, + "loss": 0.6035, + "step": 7954 + }, + { + "epoch": 0.13745852916781864, + "grad_norm": 1.1392681907523081, + "learning_rate": 1.9400600122425706e-05, + "loss": 0.7133, + "step": 7955 + }, + { + "epoch": 0.13747580868122752, + "grad_norm": 1.5781246553351398, + "learning_rate": 1.940040926214754e-05, + "loss": 0.7541, + "step": 7956 + }, + { + "epoch": 0.13749308819463643, + "grad_norm": 0.7623920345401387, + "learning_rate": 1.940021837242657e-05, + "loss": 0.6582, + "step": 7957 + }, + { + "epoch": 0.13751036770804534, + "grad_norm": 0.8903296310344115, + "learning_rate": 1.9400027453263405e-05, + "loss": 0.7222, + "step": 7958 + }, + { + "epoch": 0.13752764722145425, + "grad_norm": 1.752623570336686, + "learning_rate": 1.9399836504658644e-05, + "loss": 0.7869, + "step": 7959 + }, + { + "epoch": 0.13754492673486315, + "grad_norm": 0.8056189586972577, + "learning_rate": 1.9399645526612875e-05, + "loss": 0.8143, + "step": 7960 + }, + { + "epoch": 0.13756220624827206, + "grad_norm": 0.9812493699882567, + "learning_rate": 1.9399454519126704e-05, + "loss": 0.7072, + "step": 7961 + }, + { + "epoch": 0.13757948576168094, + "grad_norm": 1.132334873934288, + "learning_rate": 1.9399263482200727e-05, + "loss": 0.7136, + "step": 7962 + }, + { + "epoch": 0.13759676527508985, + "grad_norm": 0.6622598717036595, + "learning_rate": 1.9399072415835544e-05, + "loss": 0.549, + "step": 7963 + }, + { + "epoch": 0.13761404478849876, + "grad_norm": 0.9956319980820608, + "learning_rate": 1.939888132003175e-05, + "loss": 0.6244, + "step": 7964 + }, + { + "epoch": 0.13763132430190766, + "grad_norm": 0.9346925848465204, + "learning_rate": 1.9398690194789948e-05, + "loss": 0.5925, + "step": 7965 + }, + { + "epoch": 0.13764860381531657, + "grad_norm": 1.0191386248915846, + "learning_rate": 1.9398499040110728e-05, + "loss": 0.5518, + "step": 7966 + }, + { + "epoch": 0.13766588332872545, + "grad_norm": 1.1550843303751583, + "learning_rate": 1.93983078559947e-05, + "loss": 0.7865, + "step": 7967 + }, + { + "epoch": 0.13768316284213436, + "grad_norm": 0.808302467971841, + "learning_rate": 1.9398116642442456e-05, + "loss": 0.5905, + "step": 7968 + }, + { + "epoch": 0.13770044235554327, + "grad_norm": 1.1577988856745138, + "learning_rate": 1.9397925399454596e-05, + "loss": 0.5461, + "step": 7969 + }, + { + "epoch": 0.13771772186895218, + "grad_norm": 0.8421437376508097, + "learning_rate": 1.9397734127031722e-05, + "loss": 0.6104, + "step": 7970 + }, + { + "epoch": 0.13773500138236108, + "grad_norm": 1.8828499497605127, + "learning_rate": 1.939754282517443e-05, + "loss": 0.8959, + "step": 7971 + }, + { + "epoch": 0.13775228089576996, + "grad_norm": 1.188028651556848, + "learning_rate": 1.939735149388332e-05, + "loss": 0.9507, + "step": 7972 + }, + { + "epoch": 0.13776956040917887, + "grad_norm": 1.1862967052776672, + "learning_rate": 1.9397160133158985e-05, + "loss": 0.7666, + "step": 7973 + }, + { + "epoch": 0.13778683992258778, + "grad_norm": 0.4918797904206478, + "learning_rate": 1.9396968743002034e-05, + "loss": 0.8208, + "step": 7974 + }, + { + "epoch": 0.13780411943599669, + "grad_norm": 1.0408870091710096, + "learning_rate": 1.9396777323413064e-05, + "loss": 0.8685, + "step": 7975 + }, + { + "epoch": 0.1378213989494056, + "grad_norm": 1.1059926278968906, + "learning_rate": 1.9396585874392676e-05, + "loss": 0.8199, + "step": 7976 + }, + { + "epoch": 0.13783867846281447, + "grad_norm": 0.45709243667732713, + "learning_rate": 1.939639439594146e-05, + "loss": 0.6286, + "step": 7977 + }, + { + "epoch": 0.13785595797622338, + "grad_norm": 0.5107900071031798, + "learning_rate": 1.939620288806003e-05, + "loss": 0.555, + "step": 7978 + }, + { + "epoch": 0.1378732374896323, + "grad_norm": 1.0320400262250897, + "learning_rate": 1.9396011350748973e-05, + "loss": 0.6371, + "step": 7979 + }, + { + "epoch": 0.1378905170030412, + "grad_norm": 0.8433307952553657, + "learning_rate": 1.93958197840089e-05, + "loss": 0.6425, + "step": 7980 + }, + { + "epoch": 0.1379077965164501, + "grad_norm": 0.8551971109728037, + "learning_rate": 1.93956281878404e-05, + "loss": 0.7273, + "step": 7981 + }, + { + "epoch": 0.137925076029859, + "grad_norm": 0.7940841257277744, + "learning_rate": 1.9395436562244078e-05, + "loss": 0.7342, + "step": 7982 + }, + { + "epoch": 0.1379423555432679, + "grad_norm": 1.3159668693131366, + "learning_rate": 1.9395244907220538e-05, + "loss": 0.4476, + "step": 7983 + }, + { + "epoch": 0.1379596350566768, + "grad_norm": 1.2176307099492494, + "learning_rate": 1.939505322277037e-05, + "loss": 0.7565, + "step": 7984 + }, + { + "epoch": 0.1379769145700857, + "grad_norm": 0.8574617817853334, + "learning_rate": 1.939486150889419e-05, + "loss": 0.8086, + "step": 7985 + }, + { + "epoch": 0.13799419408349461, + "grad_norm": 1.0250040195806078, + "learning_rate": 1.9394669765592587e-05, + "loss": 0.6953, + "step": 7986 + }, + { + "epoch": 0.13801147359690352, + "grad_norm": 0.9047110286384866, + "learning_rate": 1.9394477992866165e-05, + "loss": 0.5662, + "step": 7987 + }, + { + "epoch": 0.1380287531103124, + "grad_norm": 0.9058827785915323, + "learning_rate": 1.9394286190715523e-05, + "loss": 0.5006, + "step": 7988 + }, + { + "epoch": 0.1380460326237213, + "grad_norm": 1.0593392926539524, + "learning_rate": 1.9394094359141263e-05, + "loss": 0.7286, + "step": 7989 + }, + { + "epoch": 0.13806331213713022, + "grad_norm": 0.632828317096331, + "learning_rate": 1.9393902498143982e-05, + "loss": 0.4357, + "step": 7990 + }, + { + "epoch": 0.13808059165053913, + "grad_norm": 0.8585997251461078, + "learning_rate": 1.939371060772429e-05, + "loss": 0.6798, + "step": 7991 + }, + { + "epoch": 0.13809787116394803, + "grad_norm": 0.5037590503515644, + "learning_rate": 1.939351868788278e-05, + "loss": 0.7506, + "step": 7992 + }, + { + "epoch": 0.1381151506773569, + "grad_norm": 1.3913059411118625, + "learning_rate": 1.9393326738620056e-05, + "loss": 0.8513, + "step": 7993 + }, + { + "epoch": 0.13813243019076582, + "grad_norm": 0.559424958851503, + "learning_rate": 1.939313475993672e-05, + "loss": 0.719, + "step": 7994 + }, + { + "epoch": 0.13814970970417473, + "grad_norm": 0.788215995619502, + "learning_rate": 1.939294275183337e-05, + "loss": 0.7337, + "step": 7995 + }, + { + "epoch": 0.13816698921758364, + "grad_norm": 1.065416158998449, + "learning_rate": 1.9392750714310608e-05, + "loss": 0.7391, + "step": 7996 + }, + { + "epoch": 0.13818426873099254, + "grad_norm": 0.7377778963118738, + "learning_rate": 1.939255864736904e-05, + "loss": 0.432, + "step": 7997 + }, + { + "epoch": 0.13820154824440145, + "grad_norm": 0.905918117681871, + "learning_rate": 1.9392366551009262e-05, + "loss": 0.679, + "step": 7998 + }, + { + "epoch": 0.13821882775781033, + "grad_norm": 0.7513204578639985, + "learning_rate": 1.939217442523188e-05, + "loss": 0.5721, + "step": 7999 + }, + { + "epoch": 0.13823610727121924, + "grad_norm": 1.0408434890639677, + "learning_rate": 1.9391982270037496e-05, + "loss": 0.5762, + "step": 8000 + }, + { + "epoch": 0.13825338678462815, + "grad_norm": 0.823248467619245, + "learning_rate": 1.9391790085426704e-05, + "loss": 0.5232, + "step": 8001 + }, + { + "epoch": 0.13827066629803705, + "grad_norm": 1.198168727837087, + "learning_rate": 1.9391597871400118e-05, + "loss": 0.8141, + "step": 8002 + }, + { + "epoch": 0.13828794581144596, + "grad_norm": 0.507019922308839, + "learning_rate": 1.9391405627958327e-05, + "loss": 0.801, + "step": 8003 + }, + { + "epoch": 0.13830522532485484, + "grad_norm": 1.1634003910748174, + "learning_rate": 1.9391213355101945e-05, + "loss": 0.6418, + "step": 8004 + }, + { + "epoch": 0.13832250483826375, + "grad_norm": 0.926162794677813, + "learning_rate": 1.9391021052831568e-05, + "loss": 0.6041, + "step": 8005 + }, + { + "epoch": 0.13833978435167266, + "grad_norm": 0.8421669196250795, + "learning_rate": 1.9390828721147802e-05, + "loss": 0.6502, + "step": 8006 + }, + { + "epoch": 0.13835706386508156, + "grad_norm": 0.7575698291044372, + "learning_rate": 1.939063636005124e-05, + "loss": 0.3754, + "step": 8007 + }, + { + "epoch": 0.13837434337849047, + "grad_norm": 1.3863706296865221, + "learning_rate": 1.9390443969542495e-05, + "loss": 0.786, + "step": 8008 + }, + { + "epoch": 0.13839162289189935, + "grad_norm": 1.3215322007703607, + "learning_rate": 1.939025154962217e-05, + "loss": 0.7241, + "step": 8009 + }, + { + "epoch": 0.13840890240530826, + "grad_norm": 0.8817267004827425, + "learning_rate": 1.939005910029086e-05, + "loss": 0.723, + "step": 8010 + }, + { + "epoch": 0.13842618191871717, + "grad_norm": 1.0828456881886455, + "learning_rate": 1.9389866621549167e-05, + "loss": 0.7429, + "step": 8011 + }, + { + "epoch": 0.13844346143212607, + "grad_norm": 0.9352251276997738, + "learning_rate": 1.9389674113397702e-05, + "loss": 0.5852, + "step": 8012 + }, + { + "epoch": 0.13846074094553498, + "grad_norm": 0.7116854749178879, + "learning_rate": 1.9389481575837066e-05, + "loss": 0.5808, + "step": 8013 + }, + { + "epoch": 0.13847802045894386, + "grad_norm": 1.0733436291953027, + "learning_rate": 1.9389289008867856e-05, + "loss": 0.6463, + "step": 8014 + }, + { + "epoch": 0.13849529997235277, + "grad_norm": 1.2634541187774821, + "learning_rate": 1.9389096412490682e-05, + "loss": 0.5723, + "step": 8015 + }, + { + "epoch": 0.13851257948576168, + "grad_norm": 1.0636570627162052, + "learning_rate": 1.9388903786706144e-05, + "loss": 0.7389, + "step": 8016 + }, + { + "epoch": 0.13852985899917059, + "grad_norm": 0.99129971407631, + "learning_rate": 1.9388711131514845e-05, + "loss": 0.9103, + "step": 8017 + }, + { + "epoch": 0.1385471385125795, + "grad_norm": 0.5824088832943565, + "learning_rate": 1.938851844691739e-05, + "loss": 0.6324, + "step": 8018 + }, + { + "epoch": 0.1385644180259884, + "grad_norm": 1.1867056867139307, + "learning_rate": 1.938832573291438e-05, + "loss": 0.654, + "step": 8019 + }, + { + "epoch": 0.13858169753939728, + "grad_norm": 0.9616038283040866, + "learning_rate": 1.9388132989506422e-05, + "loss": 0.4705, + "step": 8020 + }, + { + "epoch": 0.1385989770528062, + "grad_norm": 0.9574047200983588, + "learning_rate": 1.938794021669412e-05, + "loss": 0.5283, + "step": 8021 + }, + { + "epoch": 0.1386162565662151, + "grad_norm": 1.344212322766279, + "learning_rate": 1.9387747414478072e-05, + "loss": 0.686, + "step": 8022 + }, + { + "epoch": 0.138633536079624, + "grad_norm": 1.1629219103791824, + "learning_rate": 1.938755458285889e-05, + "loss": 0.6761, + "step": 8023 + }, + { + "epoch": 0.1386508155930329, + "grad_norm": 0.9498581927804675, + "learning_rate": 1.938736172183717e-05, + "loss": 0.5978, + "step": 8024 + }, + { + "epoch": 0.1386680951064418, + "grad_norm": 1.1575682754871175, + "learning_rate": 1.938716883141352e-05, + "loss": 0.5948, + "step": 8025 + }, + { + "epoch": 0.1386853746198507, + "grad_norm": 0.610368174481998, + "learning_rate": 1.9386975911588544e-05, + "loss": 0.6353, + "step": 8026 + }, + { + "epoch": 0.1387026541332596, + "grad_norm": 0.9295668283759336, + "learning_rate": 1.938678296236285e-05, + "loss": 0.5918, + "step": 8027 + }, + { + "epoch": 0.13871993364666851, + "grad_norm": 1.1838358028281113, + "learning_rate": 1.9386589983737033e-05, + "loss": 0.6151, + "step": 8028 + }, + { + "epoch": 0.13873721316007742, + "grad_norm": 0.8081965365822202, + "learning_rate": 1.9386396975711706e-05, + "loss": 0.5393, + "step": 8029 + }, + { + "epoch": 0.1387544926734863, + "grad_norm": 0.9662819236362303, + "learning_rate": 1.9386203938287468e-05, + "loss": 0.5258, + "step": 8030 + }, + { + "epoch": 0.1387717721868952, + "grad_norm": 0.9129099746624643, + "learning_rate": 1.9386010871464926e-05, + "loss": 0.5525, + "step": 8031 + }, + { + "epoch": 0.13878905170030412, + "grad_norm": 0.6988450674660598, + "learning_rate": 1.9385817775244686e-05, + "loss": 0.5212, + "step": 8032 + }, + { + "epoch": 0.13880633121371302, + "grad_norm": 0.9459021797411892, + "learning_rate": 1.9385624649627354e-05, + "loss": 0.5999, + "step": 8033 + }, + { + "epoch": 0.13882361072712193, + "grad_norm": 1.2420307385819218, + "learning_rate": 1.9385431494613526e-05, + "loss": 0.8896, + "step": 8034 + }, + { + "epoch": 0.13884089024053084, + "grad_norm": 0.46358861854214867, + "learning_rate": 1.9385238310203817e-05, + "loss": 0.5218, + "step": 8035 + }, + { + "epoch": 0.13885816975393972, + "grad_norm": 0.5570916615977352, + "learning_rate": 1.938504509639883e-05, + "loss": 0.6151, + "step": 8036 + }, + { + "epoch": 0.13887544926734863, + "grad_norm": 0.8909724434127241, + "learning_rate": 1.9384851853199165e-05, + "loss": 0.7578, + "step": 8037 + }, + { + "epoch": 0.13889272878075754, + "grad_norm": 0.9366581132941846, + "learning_rate": 1.938465858060543e-05, + "loss": 0.5222, + "step": 8038 + }, + { + "epoch": 0.13891000829416644, + "grad_norm": 1.1856608715962886, + "learning_rate": 1.9384465278618235e-05, + "loss": 0.7498, + "step": 8039 + }, + { + "epoch": 0.13892728780757535, + "grad_norm": 0.8275618084475773, + "learning_rate": 1.938427194723818e-05, + "loss": 0.608, + "step": 8040 + }, + { + "epoch": 0.13894456732098423, + "grad_norm": 1.080937692505159, + "learning_rate": 1.9384078586465873e-05, + "loss": 0.8068, + "step": 8041 + }, + { + "epoch": 0.13896184683439314, + "grad_norm": 1.0868384914231735, + "learning_rate": 1.9383885196301917e-05, + "loss": 0.6782, + "step": 8042 + }, + { + "epoch": 0.13897912634780205, + "grad_norm": 0.865298279224519, + "learning_rate": 1.9383691776746923e-05, + "loss": 0.5118, + "step": 8043 + }, + { + "epoch": 0.13899640586121095, + "grad_norm": 0.4847876393254965, + "learning_rate": 1.938349832780149e-05, + "loss": 0.6198, + "step": 8044 + }, + { + "epoch": 0.13901368537461986, + "grad_norm": 0.7719365210892314, + "learning_rate": 1.9383304849466228e-05, + "loss": 0.7304, + "step": 8045 + }, + { + "epoch": 0.13903096488802874, + "grad_norm": 0.7567584128382292, + "learning_rate": 1.938311134174174e-05, + "loss": 0.6109, + "step": 8046 + }, + { + "epoch": 0.13904824440143765, + "grad_norm": 1.6174648222824364, + "learning_rate": 1.9382917804628635e-05, + "loss": 0.914, + "step": 8047 + }, + { + "epoch": 0.13906552391484656, + "grad_norm": 0.9117946591646682, + "learning_rate": 1.938272423812752e-05, + "loss": 0.5327, + "step": 8048 + }, + { + "epoch": 0.13908280342825546, + "grad_norm": 0.6747820031606057, + "learning_rate": 1.9382530642239e-05, + "loss": 0.7123, + "step": 8049 + }, + { + "epoch": 0.13910008294166437, + "grad_norm": 1.2585589690591839, + "learning_rate": 1.9382337016963678e-05, + "loss": 0.7408, + "step": 8050 + }, + { + "epoch": 0.13911736245507325, + "grad_norm": 0.833943125743081, + "learning_rate": 1.9382143362302164e-05, + "loss": 0.6935, + "step": 8051 + }, + { + "epoch": 0.13913464196848216, + "grad_norm": 0.9345327643488596, + "learning_rate": 1.938194967825507e-05, + "loss": 0.6586, + "step": 8052 + }, + { + "epoch": 0.13915192148189107, + "grad_norm": 1.0945298476671101, + "learning_rate": 1.938175596482299e-05, + "loss": 0.7462, + "step": 8053 + }, + { + "epoch": 0.13916920099529997, + "grad_norm": 0.9527680340493276, + "learning_rate": 1.938156222200654e-05, + "loss": 0.6996, + "step": 8054 + }, + { + "epoch": 0.13918648050870888, + "grad_norm": 0.9718543370539117, + "learning_rate": 1.9381368449806323e-05, + "loss": 0.626, + "step": 8055 + }, + { + "epoch": 0.1392037600221178, + "grad_norm": 1.0791314458963963, + "learning_rate": 1.9381174648222946e-05, + "loss": 0.5759, + "step": 8056 + }, + { + "epoch": 0.13922103953552667, + "grad_norm": 0.8713152498855075, + "learning_rate": 1.9380980817257017e-05, + "loss": 0.6153, + "step": 8057 + }, + { + "epoch": 0.13923831904893558, + "grad_norm": 0.8576543034887278, + "learning_rate": 1.9380786956909147e-05, + "loss": 0.5913, + "step": 8058 + }, + { + "epoch": 0.13925559856234448, + "grad_norm": 0.9446665740265718, + "learning_rate": 1.9380593067179934e-05, + "loss": 0.7303, + "step": 8059 + }, + { + "epoch": 0.1392728780757534, + "grad_norm": 1.0864665119019272, + "learning_rate": 1.9380399148069994e-05, + "loss": 0.8414, + "step": 8060 + }, + { + "epoch": 0.1392901575891623, + "grad_norm": 1.028865766373283, + "learning_rate": 1.938020519957993e-05, + "loss": 0.7136, + "step": 8061 + }, + { + "epoch": 0.13930743710257118, + "grad_norm": 1.024678205592293, + "learning_rate": 1.9380011221710353e-05, + "loss": 0.6589, + "step": 8062 + }, + { + "epoch": 0.1393247166159801, + "grad_norm": 0.983596320785342, + "learning_rate": 1.9379817214461868e-05, + "loss": 0.5973, + "step": 8063 + }, + { + "epoch": 0.139341996129389, + "grad_norm": 0.6857920453229495, + "learning_rate": 1.937962317783508e-05, + "loss": 0.667, + "step": 8064 + }, + { + "epoch": 0.1393592756427979, + "grad_norm": 1.3699356409015284, + "learning_rate": 1.9379429111830598e-05, + "loss": 0.7991, + "step": 8065 + }, + { + "epoch": 0.1393765551562068, + "grad_norm": 1.051008594790218, + "learning_rate": 1.9379235016449032e-05, + "loss": 0.578, + "step": 8066 + }, + { + "epoch": 0.1393938346696157, + "grad_norm": 0.918158369925098, + "learning_rate": 1.937904089169099e-05, + "loss": 0.642, + "step": 8067 + }, + { + "epoch": 0.1394111141830246, + "grad_norm": 0.9627529661761667, + "learning_rate": 1.9378846737557084e-05, + "loss": 0.6285, + "step": 8068 + }, + { + "epoch": 0.1394283936964335, + "grad_norm": 1.2681573119816982, + "learning_rate": 1.937865255404791e-05, + "loss": 0.9024, + "step": 8069 + }, + { + "epoch": 0.1394456732098424, + "grad_norm": 0.8230586535889969, + "learning_rate": 1.9378458341164084e-05, + "loss": 0.5779, + "step": 8070 + }, + { + "epoch": 0.13946295272325132, + "grad_norm": 0.694917303420052, + "learning_rate": 1.937826409890622e-05, + "loss": 0.5498, + "step": 8071 + }, + { + "epoch": 0.13948023223666023, + "grad_norm": 1.021414761450313, + "learning_rate": 1.9378069827274913e-05, + "loss": 0.6051, + "step": 8072 + }, + { + "epoch": 0.1394975117500691, + "grad_norm": 1.0162925077293306, + "learning_rate": 1.937787552627078e-05, + "loss": 0.4966, + "step": 8073 + }, + { + "epoch": 0.13951479126347802, + "grad_norm": 1.2302118639219153, + "learning_rate": 1.9377681195894432e-05, + "loss": 0.6287, + "step": 8074 + }, + { + "epoch": 0.13953207077688692, + "grad_norm": 0.9846589057582688, + "learning_rate": 1.9377486836146474e-05, + "loss": 0.5463, + "step": 8075 + }, + { + "epoch": 0.13954935029029583, + "grad_norm": 1.4621574064582468, + "learning_rate": 1.9377292447027513e-05, + "loss": 0.7144, + "step": 8076 + }, + { + "epoch": 0.13956662980370474, + "grad_norm": 0.6500310983630553, + "learning_rate": 1.9377098028538155e-05, + "loss": 0.5619, + "step": 8077 + }, + { + "epoch": 0.13958390931711362, + "grad_norm": 1.0223656684304092, + "learning_rate": 1.9376903580679018e-05, + "loss": 0.5145, + "step": 8078 + }, + { + "epoch": 0.13960118883052253, + "grad_norm": 0.9125596028662795, + "learning_rate": 1.9376709103450704e-05, + "loss": 0.692, + "step": 8079 + }, + { + "epoch": 0.13961846834393143, + "grad_norm": 1.1231733578403087, + "learning_rate": 1.9376514596853826e-05, + "loss": 0.474, + "step": 8080 + }, + { + "epoch": 0.13963574785734034, + "grad_norm": 0.8727569224225737, + "learning_rate": 1.937632006088899e-05, + "loss": 0.63, + "step": 8081 + }, + { + "epoch": 0.13965302737074925, + "grad_norm": 0.40713424986463626, + "learning_rate": 1.937612549555681e-05, + "loss": 0.7569, + "step": 8082 + }, + { + "epoch": 0.13967030688415813, + "grad_norm": 0.9378866480941845, + "learning_rate": 1.937593090085789e-05, + "loss": 0.5869, + "step": 8083 + }, + { + "epoch": 0.13968758639756704, + "grad_norm": 0.5386320531118389, + "learning_rate": 1.9375736276792844e-05, + "loss": 0.7975, + "step": 8084 + }, + { + "epoch": 0.13970486591097594, + "grad_norm": 1.0690109380978174, + "learning_rate": 1.9375541623362276e-05, + "loss": 0.6543, + "step": 8085 + }, + { + "epoch": 0.13972214542438485, + "grad_norm": 1.5814044318781206, + "learning_rate": 1.93753469405668e-05, + "loss": 0.7696, + "step": 8086 + }, + { + "epoch": 0.13973942493779376, + "grad_norm": 0.5559410608629267, + "learning_rate": 1.9375152228407028e-05, + "loss": 0.6312, + "step": 8087 + }, + { + "epoch": 0.13975670445120267, + "grad_norm": 0.5379466951569446, + "learning_rate": 1.9374957486883566e-05, + "loss": 0.6398, + "step": 8088 + }, + { + "epoch": 0.13977398396461155, + "grad_norm": 1.0269241004454384, + "learning_rate": 1.9374762715997024e-05, + "loss": 0.8837, + "step": 8089 + }, + { + "epoch": 0.13979126347802046, + "grad_norm": 1.2849247990629673, + "learning_rate": 1.937456791574801e-05, + "loss": 0.6033, + "step": 8090 + }, + { + "epoch": 0.13980854299142936, + "grad_norm": 0.8587160249520434, + "learning_rate": 1.937437308613714e-05, + "loss": 0.6039, + "step": 8091 + }, + { + "epoch": 0.13982582250483827, + "grad_norm": 0.9729006982804588, + "learning_rate": 1.9374178227165022e-05, + "loss": 0.7575, + "step": 8092 + }, + { + "epoch": 0.13984310201824718, + "grad_norm": 0.7525445719296241, + "learning_rate": 1.9373983338832262e-05, + "loss": 0.4026, + "step": 8093 + }, + { + "epoch": 0.13986038153165606, + "grad_norm": 1.0436366884067458, + "learning_rate": 1.9373788421139477e-05, + "loss": 0.6941, + "step": 8094 + }, + { + "epoch": 0.13987766104506497, + "grad_norm": 1.2379519038835234, + "learning_rate": 1.9373593474087272e-05, + "loss": 0.7061, + "step": 8095 + }, + { + "epoch": 0.13989494055847387, + "grad_norm": 0.7377038747392597, + "learning_rate": 1.937339849767626e-05, + "loss": 0.6138, + "step": 8096 + }, + { + "epoch": 0.13991222007188278, + "grad_norm": 1.431715439465656, + "learning_rate": 1.937320349190705e-05, + "loss": 0.933, + "step": 8097 + }, + { + "epoch": 0.1399294995852917, + "grad_norm": 0.9049289658195474, + "learning_rate": 1.937300845678026e-05, + "loss": 0.5515, + "step": 8098 + }, + { + "epoch": 0.13994677909870057, + "grad_norm": 1.2894426130350478, + "learning_rate": 1.937281339229649e-05, + "loss": 0.7618, + "step": 8099 + }, + { + "epoch": 0.13996405861210948, + "grad_norm": 0.9010031027425135, + "learning_rate": 1.937261829845636e-05, + "loss": 0.4289, + "step": 8100 + }, + { + "epoch": 0.13998133812551838, + "grad_norm": 1.0215385812285127, + "learning_rate": 1.9372423175260472e-05, + "loss": 0.6046, + "step": 8101 + }, + { + "epoch": 0.1399986176389273, + "grad_norm": 0.848514102265315, + "learning_rate": 1.937222802270945e-05, + "loss": 0.5106, + "step": 8102 + }, + { + "epoch": 0.1400158971523362, + "grad_norm": 0.9760689116632946, + "learning_rate": 1.937203284080389e-05, + "loss": 0.7839, + "step": 8103 + }, + { + "epoch": 0.14003317666574508, + "grad_norm": 0.8845654387054486, + "learning_rate": 1.937183762954441e-05, + "loss": 0.4723, + "step": 8104 + }, + { + "epoch": 0.140050456179154, + "grad_norm": 0.7156742847859289, + "learning_rate": 1.9371642388931628e-05, + "loss": 0.5351, + "step": 8105 + }, + { + "epoch": 0.1400677356925629, + "grad_norm": 0.8992561259037509, + "learning_rate": 1.9371447118966148e-05, + "loss": 0.7694, + "step": 8106 + }, + { + "epoch": 0.1400850152059718, + "grad_norm": 0.9854477723388244, + "learning_rate": 1.9371251819648583e-05, + "loss": 0.7853, + "step": 8107 + }, + { + "epoch": 0.1401022947193807, + "grad_norm": 0.6389509161893949, + "learning_rate": 1.937105649097955e-05, + "loss": 0.5457, + "step": 8108 + }, + { + "epoch": 0.14011957423278962, + "grad_norm": 1.1194752773190806, + "learning_rate": 1.9370861132959647e-05, + "loss": 0.6158, + "step": 8109 + }, + { + "epoch": 0.1401368537461985, + "grad_norm": 0.3900421185197607, + "learning_rate": 1.93706657455895e-05, + "loss": 0.5612, + "step": 8110 + }, + { + "epoch": 0.1401541332596074, + "grad_norm": 1.0348366876365682, + "learning_rate": 1.9370470328869715e-05, + "loss": 0.5658, + "step": 8111 + }, + { + "epoch": 0.1401714127730163, + "grad_norm": 0.9206338277860542, + "learning_rate": 1.9370274882800904e-05, + "loss": 0.6695, + "step": 8112 + }, + { + "epoch": 0.14018869228642522, + "grad_norm": 0.8496082442681385, + "learning_rate": 1.9370079407383683e-05, + "loss": 0.6307, + "step": 8113 + }, + { + "epoch": 0.14020597179983413, + "grad_norm": 0.7767314494544172, + "learning_rate": 1.9369883902618657e-05, + "loss": 0.6467, + "step": 8114 + }, + { + "epoch": 0.140223251313243, + "grad_norm": 0.7688205384726806, + "learning_rate": 1.9369688368506443e-05, + "loss": 0.5385, + "step": 8115 + }, + { + "epoch": 0.14024053082665192, + "grad_norm": 0.8738174467664555, + "learning_rate": 1.9369492805047655e-05, + "loss": 0.4696, + "step": 8116 + }, + { + "epoch": 0.14025781034006082, + "grad_norm": 1.0053579963993222, + "learning_rate": 1.9369297212242905e-05, + "loss": 0.7672, + "step": 8117 + }, + { + "epoch": 0.14027508985346973, + "grad_norm": 0.7592777061163298, + "learning_rate": 1.93691015900928e-05, + "loss": 0.6905, + "step": 8118 + }, + { + "epoch": 0.14029236936687864, + "grad_norm": 1.0116092674929837, + "learning_rate": 1.9368905938597958e-05, + "loss": 0.6635, + "step": 8119 + }, + { + "epoch": 0.14030964888028752, + "grad_norm": 0.9021280778052935, + "learning_rate": 1.936871025775899e-05, + "loss": 0.7673, + "step": 8120 + }, + { + "epoch": 0.14032692839369643, + "grad_norm": 1.184270328222237, + "learning_rate": 1.936851454757651e-05, + "loss": 0.622, + "step": 8121 + }, + { + "epoch": 0.14034420790710533, + "grad_norm": 0.6556996639266778, + "learning_rate": 1.9368318808051134e-05, + "loss": 0.4371, + "step": 8122 + }, + { + "epoch": 0.14036148742051424, + "grad_norm": 0.9793428280388492, + "learning_rate": 1.9368123039183468e-05, + "loss": 0.7073, + "step": 8123 + }, + { + "epoch": 0.14037876693392315, + "grad_norm": 0.754290150729145, + "learning_rate": 1.936792724097413e-05, + "loss": 0.5627, + "step": 8124 + }, + { + "epoch": 0.14039604644733206, + "grad_norm": 0.9397154044041828, + "learning_rate": 1.9367731413423733e-05, + "loss": 0.4857, + "step": 8125 + }, + { + "epoch": 0.14041332596074094, + "grad_norm": 1.173662520265809, + "learning_rate": 1.9367535556532887e-05, + "loss": 0.6729, + "step": 8126 + }, + { + "epoch": 0.14043060547414984, + "grad_norm": 0.6377509928928649, + "learning_rate": 1.9367339670302206e-05, + "loss": 0.5968, + "step": 8127 + }, + { + "epoch": 0.14044788498755875, + "grad_norm": 0.8789237813680103, + "learning_rate": 1.9367143754732306e-05, + "loss": 0.5727, + "step": 8128 + }, + { + "epoch": 0.14046516450096766, + "grad_norm": 1.298901138076151, + "learning_rate": 1.9366947809823804e-05, + "loss": 0.8093, + "step": 8129 + }, + { + "epoch": 0.14048244401437657, + "grad_norm": 0.7898678556880356, + "learning_rate": 1.9366751835577306e-05, + "loss": 0.7207, + "step": 8130 + }, + { + "epoch": 0.14049972352778545, + "grad_norm": 0.9242175440210515, + "learning_rate": 1.9366555831993432e-05, + "loss": 0.4775, + "step": 8131 + }, + { + "epoch": 0.14051700304119435, + "grad_norm": 0.5230219341062052, + "learning_rate": 1.9366359799072792e-05, + "loss": 0.7033, + "step": 8132 + }, + { + "epoch": 0.14053428255460326, + "grad_norm": 0.38144517944126555, + "learning_rate": 1.9366163736816e-05, + "loss": 0.4947, + "step": 8133 + }, + { + "epoch": 0.14055156206801217, + "grad_norm": 1.0031662060695992, + "learning_rate": 1.9365967645223672e-05, + "loss": 0.5078, + "step": 8134 + }, + { + "epoch": 0.14056884158142108, + "grad_norm": 0.7938802257468314, + "learning_rate": 1.936577152429642e-05, + "loss": 0.7106, + "step": 8135 + }, + { + "epoch": 0.14058612109482996, + "grad_norm": 0.5088987414789161, + "learning_rate": 1.9365575374034863e-05, + "loss": 0.8572, + "step": 8136 + }, + { + "epoch": 0.14060340060823887, + "grad_norm": 1.1706304841215494, + "learning_rate": 1.936537919443961e-05, + "loss": 0.5142, + "step": 8137 + }, + { + "epoch": 0.14062068012164777, + "grad_norm": 1.4347305099778283, + "learning_rate": 1.936518298551128e-05, + "loss": 0.8545, + "step": 8138 + }, + { + "epoch": 0.14063795963505668, + "grad_norm": 0.9904435336147185, + "learning_rate": 1.9364986747250484e-05, + "loss": 0.6692, + "step": 8139 + }, + { + "epoch": 0.1406552391484656, + "grad_norm": 1.2556835930187322, + "learning_rate": 1.936479047965784e-05, + "loss": 0.7783, + "step": 8140 + }, + { + "epoch": 0.14067251866187447, + "grad_norm": 0.6157839872667948, + "learning_rate": 1.9364594182733956e-05, + "loss": 0.343, + "step": 8141 + }, + { + "epoch": 0.14068979817528338, + "grad_norm": 1.0096254520423924, + "learning_rate": 1.9364397856479454e-05, + "loss": 0.7547, + "step": 8142 + }, + { + "epoch": 0.14070707768869228, + "grad_norm": 1.7414327895886323, + "learning_rate": 1.9364201500894942e-05, + "loss": 0.6193, + "step": 8143 + }, + { + "epoch": 0.1407243572021012, + "grad_norm": 1.0081432265515693, + "learning_rate": 1.9364005115981047e-05, + "loss": 0.618, + "step": 8144 + }, + { + "epoch": 0.1407416367155101, + "grad_norm": 1.5615024912953452, + "learning_rate": 1.936380870173837e-05, + "loss": 0.6232, + "step": 8145 + }, + { + "epoch": 0.140758916228919, + "grad_norm": 1.5423566555309172, + "learning_rate": 1.9363612258167536e-05, + "loss": 0.6943, + "step": 8146 + }, + { + "epoch": 0.1407761957423279, + "grad_norm": 0.5764950712532618, + "learning_rate": 1.9363415785269154e-05, + "loss": 0.6962, + "step": 8147 + }, + { + "epoch": 0.1407934752557368, + "grad_norm": 1.0369991924077175, + "learning_rate": 1.9363219283043845e-05, + "loss": 0.6907, + "step": 8148 + }, + { + "epoch": 0.1408107547691457, + "grad_norm": 1.2646513370928867, + "learning_rate": 1.9363022751492223e-05, + "loss": 0.7751, + "step": 8149 + }, + { + "epoch": 0.1408280342825546, + "grad_norm": 1.226621983094257, + "learning_rate": 1.9362826190614897e-05, + "loss": 0.6948, + "step": 8150 + }, + { + "epoch": 0.14084531379596352, + "grad_norm": 0.6041022510130095, + "learning_rate": 1.936262960041249e-05, + "loss": 0.8553, + "step": 8151 + }, + { + "epoch": 0.1408625933093724, + "grad_norm": 0.9830363371004619, + "learning_rate": 1.9362432980885617e-05, + "loss": 0.6656, + "step": 8152 + }, + { + "epoch": 0.1408798728227813, + "grad_norm": 1.1248224785780074, + "learning_rate": 1.936223633203489e-05, + "loss": 0.582, + "step": 8153 + }, + { + "epoch": 0.1408971523361902, + "grad_norm": 0.8861170618313979, + "learning_rate": 1.936203965386093e-05, + "loss": 0.5634, + "step": 8154 + }, + { + "epoch": 0.14091443184959912, + "grad_norm": 0.5278639654746645, + "learning_rate": 1.9361842946364348e-05, + "loss": 1.0958, + "step": 8155 + }, + { + "epoch": 0.14093171136300803, + "grad_norm": 0.7010123404753786, + "learning_rate": 1.9361646209545763e-05, + "loss": 0.4319, + "step": 8156 + }, + { + "epoch": 0.1409489908764169, + "grad_norm": 1.273669468571195, + "learning_rate": 1.936144944340579e-05, + "loss": 0.8389, + "step": 8157 + }, + { + "epoch": 0.14096627038982582, + "grad_norm": 0.9260158411666307, + "learning_rate": 1.936125264794505e-05, + "loss": 0.4551, + "step": 8158 + }, + { + "epoch": 0.14098354990323472, + "grad_norm": 1.2310210286302639, + "learning_rate": 1.936105582316415e-05, + "loss": 0.5141, + "step": 8159 + }, + { + "epoch": 0.14100082941664363, + "grad_norm": 1.2948592823676366, + "learning_rate": 1.9360858969063715e-05, + "loss": 0.6524, + "step": 8160 + }, + { + "epoch": 0.14101810893005254, + "grad_norm": 0.665377790367767, + "learning_rate": 1.9360662085644354e-05, + "loss": 0.4615, + "step": 8161 + }, + { + "epoch": 0.14103538844346145, + "grad_norm": 0.8662765720898352, + "learning_rate": 1.9360465172906692e-05, + "loss": 0.5702, + "step": 8162 + }, + { + "epoch": 0.14105266795687033, + "grad_norm": 1.251567688489189, + "learning_rate": 1.9360268230851343e-05, + "loss": 0.7464, + "step": 8163 + }, + { + "epoch": 0.14106994747027923, + "grad_norm": 0.713426147464759, + "learning_rate": 1.936007125947892e-05, + "loss": 0.6552, + "step": 8164 + }, + { + "epoch": 0.14108722698368814, + "grad_norm": 1.041810670129672, + "learning_rate": 1.9359874258790042e-05, + "loss": 0.7383, + "step": 8165 + }, + { + "epoch": 0.14110450649709705, + "grad_norm": 1.0203957689493834, + "learning_rate": 1.9359677228785328e-05, + "loss": 0.6933, + "step": 8166 + }, + { + "epoch": 0.14112178601050596, + "grad_norm": 1.1789927078181808, + "learning_rate": 1.9359480169465392e-05, + "loss": 0.6425, + "step": 8167 + }, + { + "epoch": 0.14113906552391484, + "grad_norm": 1.1568506881993106, + "learning_rate": 1.9359283080830856e-05, + "loss": 0.8166, + "step": 8168 + }, + { + "epoch": 0.14115634503732374, + "grad_norm": 0.5503580851980042, + "learning_rate": 1.9359085962882333e-05, + "loss": 0.4709, + "step": 8169 + }, + { + "epoch": 0.14117362455073265, + "grad_norm": 0.9779067511964452, + "learning_rate": 1.9358888815620438e-05, + "loss": 0.6375, + "step": 8170 + }, + { + "epoch": 0.14119090406414156, + "grad_norm": 0.8136497699230909, + "learning_rate": 1.9358691639045796e-05, + "loss": 0.7864, + "step": 8171 + }, + { + "epoch": 0.14120818357755047, + "grad_norm": 1.2688096414756267, + "learning_rate": 1.9358494433159017e-05, + "loss": 0.5804, + "step": 8172 + }, + { + "epoch": 0.14122546309095935, + "grad_norm": 0.9434781208808136, + "learning_rate": 1.9358297197960723e-05, + "loss": 0.5704, + "step": 8173 + }, + { + "epoch": 0.14124274260436825, + "grad_norm": 0.8809096097613142, + "learning_rate": 1.9358099933451532e-05, + "loss": 0.5812, + "step": 8174 + }, + { + "epoch": 0.14126002211777716, + "grad_norm": 0.9633286543661733, + "learning_rate": 1.9357902639632063e-05, + "loss": 0.5415, + "step": 8175 + }, + { + "epoch": 0.14127730163118607, + "grad_norm": 0.9535765707208393, + "learning_rate": 1.935770531650293e-05, + "loss": 0.6584, + "step": 8176 + }, + { + "epoch": 0.14129458114459498, + "grad_norm": 1.204270320462558, + "learning_rate": 1.9357507964064753e-05, + "loss": 0.7901, + "step": 8177 + }, + { + "epoch": 0.14131186065800386, + "grad_norm": 1.0101644440934114, + "learning_rate": 1.935731058231815e-05, + "loss": 0.5625, + "step": 8178 + }, + { + "epoch": 0.14132914017141276, + "grad_norm": 0.9610707340846623, + "learning_rate": 1.9357113171263737e-05, + "loss": 0.6973, + "step": 8179 + }, + { + "epoch": 0.14134641968482167, + "grad_norm": 1.1010859219765095, + "learning_rate": 1.9356915730902134e-05, + "loss": 0.5908, + "step": 8180 + }, + { + "epoch": 0.14136369919823058, + "grad_norm": 1.0832504705006174, + "learning_rate": 1.9356718261233963e-05, + "loss": 0.7184, + "step": 8181 + }, + { + "epoch": 0.1413809787116395, + "grad_norm": 0.893818212940319, + "learning_rate": 1.9356520762259838e-05, + "loss": 0.652, + "step": 8182 + }, + { + "epoch": 0.1413982582250484, + "grad_norm": 1.0003454557008502, + "learning_rate": 1.935632323398038e-05, + "loss": 0.6204, + "step": 8183 + }, + { + "epoch": 0.14141553773845728, + "grad_norm": 0.7455352435243457, + "learning_rate": 1.9356125676396206e-05, + "loss": 0.5877, + "step": 8184 + }, + { + "epoch": 0.14143281725186618, + "grad_norm": 1.2671418088425683, + "learning_rate": 1.9355928089507935e-05, + "loss": 0.7562, + "step": 8185 + }, + { + "epoch": 0.1414500967652751, + "grad_norm": 1.0387655271086949, + "learning_rate": 1.9355730473316187e-05, + "loss": 0.6146, + "step": 8186 + }, + { + "epoch": 0.141467376278684, + "grad_norm": 1.1674347116820554, + "learning_rate": 1.9355532827821578e-05, + "loss": 0.511, + "step": 8187 + }, + { + "epoch": 0.1414846557920929, + "grad_norm": 0.8467090869923233, + "learning_rate": 1.9355335153024728e-05, + "loss": 0.707, + "step": 8188 + }, + { + "epoch": 0.14150193530550179, + "grad_norm": 1.140872627419519, + "learning_rate": 1.935513744892626e-05, + "loss": 0.5995, + "step": 8189 + }, + { + "epoch": 0.1415192148189107, + "grad_norm": 0.8336485543202212, + "learning_rate": 1.935493971552679e-05, + "loss": 0.6119, + "step": 8190 + }, + { + "epoch": 0.1415364943323196, + "grad_norm": 0.9706011573774006, + "learning_rate": 1.9354741952826937e-05, + "loss": 0.7637, + "step": 8191 + }, + { + "epoch": 0.1415537738457285, + "grad_norm": 1.2077701754049097, + "learning_rate": 1.9354544160827324e-05, + "loss": 0.8269, + "step": 8192 + }, + { + "epoch": 0.14157105335913742, + "grad_norm": 0.7883342765891054, + "learning_rate": 1.9354346339528567e-05, + "loss": 0.5489, + "step": 8193 + }, + { + "epoch": 0.1415883328725463, + "grad_norm": 0.7382044232923438, + "learning_rate": 1.9354148488931286e-05, + "loss": 0.692, + "step": 8194 + }, + { + "epoch": 0.1416056123859552, + "grad_norm": 0.9739341540990022, + "learning_rate": 1.93539506090361e-05, + "loss": 0.5454, + "step": 8195 + }, + { + "epoch": 0.1416228918993641, + "grad_norm": 0.7065959618985703, + "learning_rate": 1.9353752699843632e-05, + "loss": 0.53, + "step": 8196 + }, + { + "epoch": 0.14164017141277302, + "grad_norm": 1.2144780793927035, + "learning_rate": 1.93535547613545e-05, + "loss": 0.645, + "step": 8197 + }, + { + "epoch": 0.14165745092618193, + "grad_norm": 0.9791802881805564, + "learning_rate": 1.935335679356932e-05, + "loss": 0.8449, + "step": 8198 + }, + { + "epoch": 0.14167473043959083, + "grad_norm": 0.7776317400573449, + "learning_rate": 1.9353158796488717e-05, + "loss": 0.5157, + "step": 8199 + }, + { + "epoch": 0.14169200995299971, + "grad_norm": 1.3687492079678094, + "learning_rate": 1.935296077011331e-05, + "loss": 0.6664, + "step": 8200 + }, + { + "epoch": 0.14170928946640862, + "grad_norm": 1.173793905934103, + "learning_rate": 1.935276271444372e-05, + "loss": 0.7593, + "step": 8201 + }, + { + "epoch": 0.14172656897981753, + "grad_norm": 0.9783744774806133, + "learning_rate": 1.9352564629480564e-05, + "loss": 0.6092, + "step": 8202 + }, + { + "epoch": 0.14174384849322644, + "grad_norm": 0.5021269631347453, + "learning_rate": 1.9352366515224468e-05, + "loss": 0.7529, + "step": 8203 + }, + { + "epoch": 0.14176112800663535, + "grad_norm": 0.9362614704073703, + "learning_rate": 1.9352168371676048e-05, + "loss": 0.6957, + "step": 8204 + }, + { + "epoch": 0.14177840752004423, + "grad_norm": 0.9565380768425686, + "learning_rate": 1.9351970198835924e-05, + "loss": 0.514, + "step": 8205 + }, + { + "epoch": 0.14179568703345313, + "grad_norm": 0.8788386008304653, + "learning_rate": 1.9351771996704723e-05, + "loss": 0.7164, + "step": 8206 + }, + { + "epoch": 0.14181296654686204, + "grad_norm": 0.9773724474335196, + "learning_rate": 1.935157376528306e-05, + "loss": 0.7295, + "step": 8207 + }, + { + "epoch": 0.14183024606027095, + "grad_norm": 0.8312036081670358, + "learning_rate": 1.935137550457155e-05, + "loss": 0.6015, + "step": 8208 + }, + { + "epoch": 0.14184752557367986, + "grad_norm": 1.1377433974085636, + "learning_rate": 1.9351177214570832e-05, + "loss": 0.6063, + "step": 8209 + }, + { + "epoch": 0.14186480508708874, + "grad_norm": 1.0540810322792042, + "learning_rate": 1.935097889528151e-05, + "loss": 0.6533, + "step": 8210 + }, + { + "epoch": 0.14188208460049764, + "grad_norm": 0.7734071492454079, + "learning_rate": 1.9350780546704212e-05, + "loss": 0.5719, + "step": 8211 + }, + { + "epoch": 0.14189936411390655, + "grad_norm": 0.9152276296907207, + "learning_rate": 1.935058216883956e-05, + "loss": 0.673, + "step": 8212 + }, + { + "epoch": 0.14191664362731546, + "grad_norm": 0.9564421397099695, + "learning_rate": 1.9350383761688175e-05, + "loss": 0.6106, + "step": 8213 + }, + { + "epoch": 0.14193392314072437, + "grad_norm": 1.0431893042978255, + "learning_rate": 1.9350185325250676e-05, + "loss": 0.8138, + "step": 8214 + }, + { + "epoch": 0.14195120265413325, + "grad_norm": 1.1143679105481568, + "learning_rate": 1.9349986859527687e-05, + "loss": 0.5367, + "step": 8215 + }, + { + "epoch": 0.14196848216754215, + "grad_norm": 1.1220770145896515, + "learning_rate": 1.9349788364519825e-05, + "loss": 0.7693, + "step": 8216 + }, + { + "epoch": 0.14198576168095106, + "grad_norm": 0.9241550313781737, + "learning_rate": 1.934958984022772e-05, + "loss": 0.6848, + "step": 8217 + }, + { + "epoch": 0.14200304119435997, + "grad_norm": 1.0538732311791492, + "learning_rate": 1.9349391286651986e-05, + "loss": 0.7103, + "step": 8218 + }, + { + "epoch": 0.14202032070776888, + "grad_norm": 0.8004218363969707, + "learning_rate": 1.934919270379325e-05, + "loss": 0.6526, + "step": 8219 + }, + { + "epoch": 0.14203760022117778, + "grad_norm": 1.1989141356602402, + "learning_rate": 1.934899409165213e-05, + "loss": 0.6822, + "step": 8220 + }, + { + "epoch": 0.14205487973458666, + "grad_norm": 1.2253153975519777, + "learning_rate": 1.9348795450229253e-05, + "loss": 0.7093, + "step": 8221 + }, + { + "epoch": 0.14207215924799557, + "grad_norm": 0.7801752268670721, + "learning_rate": 1.9348596779525237e-05, + "loss": 0.6181, + "step": 8222 + }, + { + "epoch": 0.14208943876140448, + "grad_norm": 0.7141226368552961, + "learning_rate": 1.9348398079540704e-05, + "loss": 0.531, + "step": 8223 + }, + { + "epoch": 0.1421067182748134, + "grad_norm": 1.130925421485454, + "learning_rate": 1.934819935027628e-05, + "loss": 0.7151, + "step": 8224 + }, + { + "epoch": 0.1421239977882223, + "grad_norm": 0.6446011784879766, + "learning_rate": 1.934800059173258e-05, + "loss": 0.5964, + "step": 8225 + }, + { + "epoch": 0.14214127730163117, + "grad_norm": 0.9963952512153184, + "learning_rate": 1.9347801803910236e-05, + "loss": 0.7335, + "step": 8226 + }, + { + "epoch": 0.14215855681504008, + "grad_norm": 0.837924402618428, + "learning_rate": 1.9347602986809866e-05, + "loss": 0.7802, + "step": 8227 + }, + { + "epoch": 0.142175836328449, + "grad_norm": 0.9223710876230036, + "learning_rate": 1.9347404140432094e-05, + "loss": 0.7497, + "step": 8228 + }, + { + "epoch": 0.1421931158418579, + "grad_norm": 0.8173988766088292, + "learning_rate": 1.934720526477754e-05, + "loss": 0.6458, + "step": 8229 + }, + { + "epoch": 0.1422103953552668, + "grad_norm": 1.0820956972526636, + "learning_rate": 1.934700635984683e-05, + "loss": 0.6391, + "step": 8230 + }, + { + "epoch": 0.14222767486867569, + "grad_norm": 1.0983073256931626, + "learning_rate": 1.9346807425640587e-05, + "loss": 0.7002, + "step": 8231 + }, + { + "epoch": 0.1422449543820846, + "grad_norm": 1.016593360366527, + "learning_rate": 1.934660846215943e-05, + "loss": 0.5165, + "step": 8232 + }, + { + "epoch": 0.1422622338954935, + "grad_norm": 1.3103749365606083, + "learning_rate": 1.9346409469403985e-05, + "loss": 0.6202, + "step": 8233 + }, + { + "epoch": 0.1422795134089024, + "grad_norm": 1.196566590280087, + "learning_rate": 1.9346210447374876e-05, + "loss": 0.5021, + "step": 8234 + }, + { + "epoch": 0.14229679292231132, + "grad_norm": 0.9231345950527389, + "learning_rate": 1.9346011396072722e-05, + "loss": 0.8582, + "step": 8235 + }, + { + "epoch": 0.14231407243572022, + "grad_norm": 0.7962392836591861, + "learning_rate": 1.9345812315498156e-05, + "loss": 0.5246, + "step": 8236 + }, + { + "epoch": 0.1423313519491291, + "grad_norm": 1.208817198313515, + "learning_rate": 1.9345613205651796e-05, + "loss": 0.6981, + "step": 8237 + }, + { + "epoch": 0.142348631462538, + "grad_norm": 1.2524384344430735, + "learning_rate": 1.934541406653426e-05, + "loss": 0.4164, + "step": 8238 + }, + { + "epoch": 0.14236591097594692, + "grad_norm": 0.9951403211353348, + "learning_rate": 1.9345214898146182e-05, + "loss": 0.7045, + "step": 8239 + }, + { + "epoch": 0.14238319048935583, + "grad_norm": 0.6953363578424958, + "learning_rate": 1.9345015700488176e-05, + "loss": 0.4804, + "step": 8240 + }, + { + "epoch": 0.14240047000276473, + "grad_norm": 0.7687950722143618, + "learning_rate": 1.9344816473560876e-05, + "loss": 0.4693, + "step": 8241 + }, + { + "epoch": 0.14241774951617361, + "grad_norm": 1.196980157767464, + "learning_rate": 1.9344617217364895e-05, + "loss": 0.6239, + "step": 8242 + }, + { + "epoch": 0.14243502902958252, + "grad_norm": 1.5636866154364308, + "learning_rate": 1.9344417931900865e-05, + "loss": 0.7759, + "step": 8243 + }, + { + "epoch": 0.14245230854299143, + "grad_norm": 1.1304735648114244, + "learning_rate": 1.9344218617169408e-05, + "loss": 0.5705, + "step": 8244 + }, + { + "epoch": 0.14246958805640034, + "grad_norm": 0.7360226704834415, + "learning_rate": 1.934401927317115e-05, + "loss": 0.6009, + "step": 8245 + }, + { + "epoch": 0.14248686756980924, + "grad_norm": 0.8482241534634688, + "learning_rate": 1.934381989990671e-05, + "loss": 0.629, + "step": 8246 + }, + { + "epoch": 0.14250414708321812, + "grad_norm": 0.688870897634342, + "learning_rate": 1.9343620497376717e-05, + "loss": 0.4196, + "step": 8247 + }, + { + "epoch": 0.14252142659662703, + "grad_norm": 1.2833288320702074, + "learning_rate": 1.9343421065581796e-05, + "loss": 0.8104, + "step": 8248 + }, + { + "epoch": 0.14253870611003594, + "grad_norm": 1.0450271105419688, + "learning_rate": 1.9343221604522568e-05, + "loss": 0.6875, + "step": 8249 + }, + { + "epoch": 0.14255598562344485, + "grad_norm": 0.8976715329146188, + "learning_rate": 1.9343022114199663e-05, + "loss": 0.6883, + "step": 8250 + }, + { + "epoch": 0.14257326513685376, + "grad_norm": 1.0946760764400303, + "learning_rate": 1.93428225946137e-05, + "loss": 0.7871, + "step": 8251 + }, + { + "epoch": 0.14259054465026264, + "grad_norm": 1.0805384183037547, + "learning_rate": 1.934262304576531e-05, + "loss": 0.751, + "step": 8252 + }, + { + "epoch": 0.14260782416367154, + "grad_norm": 0.3921674171630003, + "learning_rate": 1.934242346765511e-05, + "loss": 0.6134, + "step": 8253 + }, + { + "epoch": 0.14262510367708045, + "grad_norm": 1.060310674331066, + "learning_rate": 1.934222386028373e-05, + "loss": 0.4578, + "step": 8254 + }, + { + "epoch": 0.14264238319048936, + "grad_norm": 1.1300992033218082, + "learning_rate": 1.9342024223651797e-05, + "loss": 0.78, + "step": 8255 + }, + { + "epoch": 0.14265966270389827, + "grad_norm": 1.5359235868819419, + "learning_rate": 1.9341824557759934e-05, + "loss": 0.6391, + "step": 8256 + }, + { + "epoch": 0.14267694221730717, + "grad_norm": 1.5494363393934152, + "learning_rate": 1.934162486260877e-05, + "loss": 0.6491, + "step": 8257 + }, + { + "epoch": 0.14269422173071605, + "grad_norm": 1.3967486970527285, + "learning_rate": 1.934142513819892e-05, + "loss": 0.5341, + "step": 8258 + }, + { + "epoch": 0.14271150124412496, + "grad_norm": 0.835401589108709, + "learning_rate": 1.9341225384531022e-05, + "loss": 0.6649, + "step": 8259 + }, + { + "epoch": 0.14272878075753387, + "grad_norm": 1.031366943854556, + "learning_rate": 1.9341025601605695e-05, + "loss": 0.7284, + "step": 8260 + }, + { + "epoch": 0.14274606027094278, + "grad_norm": 1.008832555610328, + "learning_rate": 1.9340825789423565e-05, + "loss": 0.6787, + "step": 8261 + }, + { + "epoch": 0.14276333978435168, + "grad_norm": 1.2243791286950714, + "learning_rate": 1.9340625947985255e-05, + "loss": 0.6776, + "step": 8262 + }, + { + "epoch": 0.14278061929776056, + "grad_norm": 0.9629391878415468, + "learning_rate": 1.9340426077291397e-05, + "loss": 0.9293, + "step": 8263 + }, + { + "epoch": 0.14279789881116947, + "grad_norm": 0.7988908335131252, + "learning_rate": 1.9340226177342617e-05, + "loss": 0.7256, + "step": 8264 + }, + { + "epoch": 0.14281517832457838, + "grad_norm": 0.7627513878079833, + "learning_rate": 1.934002624813954e-05, + "loss": 0.437, + "step": 8265 + }, + { + "epoch": 0.1428324578379873, + "grad_norm": 0.9011810728910802, + "learning_rate": 1.933982628968279e-05, + "loss": 0.5661, + "step": 8266 + }, + { + "epoch": 0.1428497373513962, + "grad_norm": 1.1134268258055873, + "learning_rate": 1.933962630197299e-05, + "loss": 0.7364, + "step": 8267 + }, + { + "epoch": 0.14286701686480507, + "grad_norm": 0.9982499229154409, + "learning_rate": 1.9339426285010775e-05, + "loss": 0.6688, + "step": 8268 + }, + { + "epoch": 0.14288429637821398, + "grad_norm": 0.7727729497029341, + "learning_rate": 1.9339226238796766e-05, + "loss": 0.5015, + "step": 8269 + }, + { + "epoch": 0.1429015758916229, + "grad_norm": 1.0737189197953179, + "learning_rate": 1.933902616333159e-05, + "loss": 0.7563, + "step": 8270 + }, + { + "epoch": 0.1429188554050318, + "grad_norm": 0.8604807351949851, + "learning_rate": 1.9338826058615876e-05, + "loss": 0.5386, + "step": 8271 + }, + { + "epoch": 0.1429361349184407, + "grad_norm": 1.3923783051181586, + "learning_rate": 1.933862592465025e-05, + "loss": 0.5869, + "step": 8272 + }, + { + "epoch": 0.1429534144318496, + "grad_norm": 0.6514043613955346, + "learning_rate": 1.9338425761435334e-05, + "loss": 0.5736, + "step": 8273 + }, + { + "epoch": 0.1429706939452585, + "grad_norm": 0.7309929997273313, + "learning_rate": 1.933822556897176e-05, + "loss": 0.3426, + "step": 8274 + }, + { + "epoch": 0.1429879734586674, + "grad_norm": 1.2077935532885873, + "learning_rate": 1.9338025347260156e-05, + "loss": 0.6472, + "step": 8275 + }, + { + "epoch": 0.1430052529720763, + "grad_norm": 0.975526792456135, + "learning_rate": 1.9337825096301146e-05, + "loss": 0.4198, + "step": 8276 + }, + { + "epoch": 0.14302253248548522, + "grad_norm": 1.0519446080220194, + "learning_rate": 1.933762481609536e-05, + "loss": 0.8261, + "step": 8277 + }, + { + "epoch": 0.14303981199889412, + "grad_norm": 0.8545787720743037, + "learning_rate": 1.9337424506643418e-05, + "loss": 0.6004, + "step": 8278 + }, + { + "epoch": 0.143057091512303, + "grad_norm": 0.9110979890288848, + "learning_rate": 1.9337224167945953e-05, + "loss": 0.5921, + "step": 8279 + }, + { + "epoch": 0.1430743710257119, + "grad_norm": 0.8829126701393015, + "learning_rate": 1.9337023800003595e-05, + "loss": 0.4385, + "step": 8280 + }, + { + "epoch": 0.14309165053912082, + "grad_norm": 0.4486845447857611, + "learning_rate": 1.933682340281697e-05, + "loss": 0.6907, + "step": 8281 + }, + { + "epoch": 0.14310893005252973, + "grad_norm": 1.0774390067245594, + "learning_rate": 1.9336622976386704e-05, + "loss": 0.6997, + "step": 8282 + }, + { + "epoch": 0.14312620956593863, + "grad_norm": 0.7985669598964903, + "learning_rate": 1.9336422520713423e-05, + "loss": 0.7383, + "step": 8283 + }, + { + "epoch": 0.1431434890793475, + "grad_norm": 0.3722902306965112, + "learning_rate": 1.933622203579776e-05, + "loss": 0.593, + "step": 8284 + }, + { + "epoch": 0.14316076859275642, + "grad_norm": 1.1563037509354157, + "learning_rate": 1.933602152164034e-05, + "loss": 0.6924, + "step": 8285 + }, + { + "epoch": 0.14317804810616533, + "grad_norm": 0.9259314053977572, + "learning_rate": 1.933582097824179e-05, + "loss": 0.6243, + "step": 8286 + }, + { + "epoch": 0.14319532761957424, + "grad_norm": 1.5004352459346304, + "learning_rate": 1.933562040560274e-05, + "loss": 0.7201, + "step": 8287 + }, + { + "epoch": 0.14321260713298314, + "grad_norm": 0.9925275813622858, + "learning_rate": 1.9335419803723817e-05, + "loss": 0.6262, + "step": 8288 + }, + { + "epoch": 0.14322988664639202, + "grad_norm": 0.8691436301613658, + "learning_rate": 1.933521917260565e-05, + "loss": 0.6806, + "step": 8289 + }, + { + "epoch": 0.14324716615980093, + "grad_norm": 0.9567565448941351, + "learning_rate": 1.9335018512248863e-05, + "loss": 0.6013, + "step": 8290 + }, + { + "epoch": 0.14326444567320984, + "grad_norm": 1.0719349333859753, + "learning_rate": 1.9334817822654095e-05, + "loss": 0.6682, + "step": 8291 + }, + { + "epoch": 0.14328172518661875, + "grad_norm": 0.8229412129448078, + "learning_rate": 1.9334617103821966e-05, + "loss": 0.5479, + "step": 8292 + }, + { + "epoch": 0.14329900470002765, + "grad_norm": 1.067581528661411, + "learning_rate": 1.9334416355753106e-05, + "loss": 0.7587, + "step": 8293 + }, + { + "epoch": 0.14331628421343656, + "grad_norm": 0.8523767930634054, + "learning_rate": 1.9334215578448144e-05, + "loss": 0.659, + "step": 8294 + }, + { + "epoch": 0.14333356372684544, + "grad_norm": 0.8475152305085975, + "learning_rate": 1.933401477190771e-05, + "loss": 0.5865, + "step": 8295 + }, + { + "epoch": 0.14335084324025435, + "grad_norm": 0.8475798244862296, + "learning_rate": 1.9333813936132434e-05, + "loss": 0.5888, + "step": 8296 + }, + { + "epoch": 0.14336812275366326, + "grad_norm": 0.9090910453031716, + "learning_rate": 1.933361307112294e-05, + "loss": 0.634, + "step": 8297 + }, + { + "epoch": 0.14338540226707217, + "grad_norm": 1.0918815086420535, + "learning_rate": 1.9333412176879863e-05, + "loss": 0.6265, + "step": 8298 + }, + { + "epoch": 0.14340268178048107, + "grad_norm": 0.7145125344240751, + "learning_rate": 1.933321125340383e-05, + "loss": 0.4662, + "step": 8299 + }, + { + "epoch": 0.14341996129388995, + "grad_norm": 0.8358102423887901, + "learning_rate": 1.9333010300695466e-05, + "loss": 0.7875, + "step": 8300 + }, + { + "epoch": 0.14343724080729886, + "grad_norm": 1.0076885727943288, + "learning_rate": 1.933280931875541e-05, + "loss": 0.612, + "step": 8301 + }, + { + "epoch": 0.14345452032070777, + "grad_norm": 0.7798137372704177, + "learning_rate": 1.933260830758428e-05, + "loss": 0.4744, + "step": 8302 + }, + { + "epoch": 0.14347179983411668, + "grad_norm": 1.0478043639490633, + "learning_rate": 1.9332407267182717e-05, + "loss": 0.8107, + "step": 8303 + }, + { + "epoch": 0.14348907934752558, + "grad_norm": 0.8210200723629418, + "learning_rate": 1.9332206197551344e-05, + "loss": 0.5868, + "step": 8304 + }, + { + "epoch": 0.14350635886093446, + "grad_norm": 1.1246056504235098, + "learning_rate": 1.9332005098690795e-05, + "loss": 0.734, + "step": 8305 + }, + { + "epoch": 0.14352363837434337, + "grad_norm": 1.0733104248467973, + "learning_rate": 1.933180397060169e-05, + "loss": 0.52, + "step": 8306 + }, + { + "epoch": 0.14354091788775228, + "grad_norm": 0.6316932730173616, + "learning_rate": 1.933160281328467e-05, + "loss": 0.5602, + "step": 8307 + }, + { + "epoch": 0.14355819740116119, + "grad_norm": 1.1641800422489972, + "learning_rate": 1.933140162674036e-05, + "loss": 0.5672, + "step": 8308 + }, + { + "epoch": 0.1435754769145701, + "grad_norm": 0.8863897682438044, + "learning_rate": 1.933120041096939e-05, + "loss": 0.8276, + "step": 8309 + }, + { + "epoch": 0.143592756427979, + "grad_norm": 0.9112683958335672, + "learning_rate": 1.933099916597239e-05, + "loss": 0.5246, + "step": 8310 + }, + { + "epoch": 0.14361003594138788, + "grad_norm": 0.9163275060778299, + "learning_rate": 1.9330797891749995e-05, + "loss": 0.5929, + "step": 8311 + }, + { + "epoch": 0.1436273154547968, + "grad_norm": 0.9891161456805608, + "learning_rate": 1.933059658830283e-05, + "loss": 0.7811, + "step": 8312 + }, + { + "epoch": 0.1436445949682057, + "grad_norm": 0.9577600623360234, + "learning_rate": 1.9330395255631528e-05, + "loss": 0.818, + "step": 8313 + }, + { + "epoch": 0.1436618744816146, + "grad_norm": 1.398876417780483, + "learning_rate": 1.933019389373672e-05, + "loss": 0.4508, + "step": 8314 + }, + { + "epoch": 0.1436791539950235, + "grad_norm": 0.9233103352846934, + "learning_rate": 1.9329992502619032e-05, + "loss": 0.5519, + "step": 8315 + }, + { + "epoch": 0.1436964335084324, + "grad_norm": 1.2740690901059133, + "learning_rate": 1.93297910822791e-05, + "loss": 0.8899, + "step": 8316 + }, + { + "epoch": 0.1437137130218413, + "grad_norm": 0.47773538514818803, + "learning_rate": 1.9329589632717557e-05, + "loss": 0.6415, + "step": 8317 + }, + { + "epoch": 0.1437309925352502, + "grad_norm": 0.7984356285889179, + "learning_rate": 1.9329388153935023e-05, + "loss": 0.46, + "step": 8318 + }, + { + "epoch": 0.14374827204865911, + "grad_norm": 0.8519033071430403, + "learning_rate": 1.9329186645932143e-05, + "loss": 0.6145, + "step": 8319 + }, + { + "epoch": 0.14376555156206802, + "grad_norm": 0.6052080428909705, + "learning_rate": 1.932898510870954e-05, + "loss": 0.6083, + "step": 8320 + }, + { + "epoch": 0.1437828310754769, + "grad_norm": 0.9858732284695894, + "learning_rate": 1.9328783542267844e-05, + "loss": 0.8402, + "step": 8321 + }, + { + "epoch": 0.1438001105888858, + "grad_norm": 0.5783761559556667, + "learning_rate": 1.932858194660769e-05, + "loss": 0.7095, + "step": 8322 + }, + { + "epoch": 0.14381739010229472, + "grad_norm": 0.613529706651439, + "learning_rate": 1.9328380321729708e-05, + "loss": 0.3635, + "step": 8323 + }, + { + "epoch": 0.14383466961570363, + "grad_norm": 0.40927708074678726, + "learning_rate": 1.9328178667634532e-05, + "loss": 0.6711, + "step": 8324 + }, + { + "epoch": 0.14385194912911253, + "grad_norm": 1.0865474424912829, + "learning_rate": 1.932797698432279e-05, + "loss": 0.595, + "step": 8325 + }, + { + "epoch": 0.1438692286425214, + "grad_norm": 1.0988794793595642, + "learning_rate": 1.9327775271795115e-05, + "loss": 0.6623, + "step": 8326 + }, + { + "epoch": 0.14388650815593032, + "grad_norm": 0.8294926676193534, + "learning_rate": 1.932757353005214e-05, + "loss": 0.6873, + "step": 8327 + }, + { + "epoch": 0.14390378766933923, + "grad_norm": 0.843785402443092, + "learning_rate": 1.9327371759094496e-05, + "loss": 0.5274, + "step": 8328 + }, + { + "epoch": 0.14392106718274814, + "grad_norm": 1.0290850366732962, + "learning_rate": 1.9327169958922813e-05, + "loss": 0.6334, + "step": 8329 + }, + { + "epoch": 0.14393834669615704, + "grad_norm": 0.7721788397740473, + "learning_rate": 1.932696812953773e-05, + "loss": 0.5598, + "step": 8330 + }, + { + "epoch": 0.14395562620956595, + "grad_norm": 0.7437093778612628, + "learning_rate": 1.9326766270939866e-05, + "loss": 0.5717, + "step": 8331 + }, + { + "epoch": 0.14397290572297483, + "grad_norm": 1.0427787945260774, + "learning_rate": 1.9326564383129865e-05, + "loss": 0.724, + "step": 8332 + }, + { + "epoch": 0.14399018523638374, + "grad_norm": 0.9712720726020092, + "learning_rate": 1.9326362466108358e-05, + "loss": 0.6022, + "step": 8333 + }, + { + "epoch": 0.14400746474979265, + "grad_norm": 0.9098435267340702, + "learning_rate": 1.932616051987597e-05, + "loss": 0.6981, + "step": 8334 + }, + { + "epoch": 0.14402474426320155, + "grad_norm": 0.436742534375283, + "learning_rate": 1.9325958544433343e-05, + "loss": 0.6277, + "step": 8335 + }, + { + "epoch": 0.14404202377661046, + "grad_norm": 1.026786904053309, + "learning_rate": 1.9325756539781104e-05, + "loss": 0.6902, + "step": 8336 + }, + { + "epoch": 0.14405930329001934, + "grad_norm": 0.9023913253836124, + "learning_rate": 1.9325554505919885e-05, + "loss": 0.7317, + "step": 8337 + }, + { + "epoch": 0.14407658280342825, + "grad_norm": 1.0484868083705685, + "learning_rate": 1.932535244285032e-05, + "loss": 0.6078, + "step": 8338 + }, + { + "epoch": 0.14409386231683716, + "grad_norm": 1.029146610004059, + "learning_rate": 1.9325150350573048e-05, + "loss": 0.5828, + "step": 8339 + }, + { + "epoch": 0.14411114183024606, + "grad_norm": 1.0443904738939613, + "learning_rate": 1.932494822908869e-05, + "loss": 0.6494, + "step": 8340 + }, + { + "epoch": 0.14412842134365497, + "grad_norm": 0.9834530195311662, + "learning_rate": 1.932474607839789e-05, + "loss": 0.6813, + "step": 8341 + }, + { + "epoch": 0.14414570085706385, + "grad_norm": 0.527338424833242, + "learning_rate": 1.9324543898501273e-05, + "loss": 0.8514, + "step": 8342 + }, + { + "epoch": 0.14416298037047276, + "grad_norm": 0.9922716004540841, + "learning_rate": 1.9324341689399478e-05, + "loss": 0.5763, + "step": 8343 + }, + { + "epoch": 0.14418025988388167, + "grad_norm": 0.7304075425267269, + "learning_rate": 1.9324139451093136e-05, + "loss": 0.403, + "step": 8344 + }, + { + "epoch": 0.14419753939729058, + "grad_norm": 1.3299882297981471, + "learning_rate": 1.932393718358288e-05, + "loss": 0.6892, + "step": 8345 + }, + { + "epoch": 0.14421481891069948, + "grad_norm": 1.0738119873744174, + "learning_rate": 1.9323734886869346e-05, + "loss": 0.6471, + "step": 8346 + }, + { + "epoch": 0.1442320984241084, + "grad_norm": 0.8371482705888834, + "learning_rate": 1.9323532560953163e-05, + "loss": 0.3259, + "step": 8347 + }, + { + "epoch": 0.14424937793751727, + "grad_norm": 1.3040691474395247, + "learning_rate": 1.932333020583497e-05, + "loss": 0.5389, + "step": 8348 + }, + { + "epoch": 0.14426665745092618, + "grad_norm": 0.9377940307623086, + "learning_rate": 1.9323127821515397e-05, + "loss": 0.5734, + "step": 8349 + }, + { + "epoch": 0.14428393696433509, + "grad_norm": 0.9649257488205069, + "learning_rate": 1.932292540799508e-05, + "loss": 0.6792, + "step": 8350 + }, + { + "epoch": 0.144301216477744, + "grad_norm": 1.1289350051195852, + "learning_rate": 1.932272296527465e-05, + "loss": 0.6596, + "step": 8351 + }, + { + "epoch": 0.1443184959911529, + "grad_norm": 1.3262649868744703, + "learning_rate": 1.9322520493354745e-05, + "loss": 0.8818, + "step": 8352 + }, + { + "epoch": 0.14433577550456178, + "grad_norm": 1.0535719832179908, + "learning_rate": 1.9322317992235997e-05, + "loss": 0.9164, + "step": 8353 + }, + { + "epoch": 0.1443530550179707, + "grad_norm": 0.8310922845680512, + "learning_rate": 1.932211546191904e-05, + "loss": 0.595, + "step": 8354 + }, + { + "epoch": 0.1443703345313796, + "grad_norm": 1.2328071603092758, + "learning_rate": 1.932191290240451e-05, + "loss": 0.5435, + "step": 8355 + }, + { + "epoch": 0.1443876140447885, + "grad_norm": 0.980794723070376, + "learning_rate": 1.9321710313693037e-05, + "loss": 0.6873, + "step": 8356 + }, + { + "epoch": 0.1444048935581974, + "grad_norm": 1.0383790725341175, + "learning_rate": 1.932150769578526e-05, + "loss": 0.6985, + "step": 8357 + }, + { + "epoch": 0.1444221730716063, + "grad_norm": 1.2481153831343244, + "learning_rate": 1.9321305048681817e-05, + "loss": 0.8332, + "step": 8358 + }, + { + "epoch": 0.1444394525850152, + "grad_norm": 0.8470397204773423, + "learning_rate": 1.9321102372383333e-05, + "loss": 0.6011, + "step": 8359 + }, + { + "epoch": 0.1444567320984241, + "grad_norm": 1.1291616332308658, + "learning_rate": 1.9320899666890453e-05, + "loss": 0.6618, + "step": 8360 + }, + { + "epoch": 0.14447401161183301, + "grad_norm": 0.611782604649459, + "learning_rate": 1.9320696932203803e-05, + "loss": 1.0036, + "step": 8361 + }, + { + "epoch": 0.14449129112524192, + "grad_norm": 1.1953569254674377, + "learning_rate": 1.9320494168324023e-05, + "loss": 0.5562, + "step": 8362 + }, + { + "epoch": 0.1445085706386508, + "grad_norm": 1.644614277494338, + "learning_rate": 1.9320291375251744e-05, + "loss": 0.7571, + "step": 8363 + }, + { + "epoch": 0.1445258501520597, + "grad_norm": 1.8676418806037751, + "learning_rate": 1.932008855298761e-05, + "loss": 0.7379, + "step": 8364 + }, + { + "epoch": 0.14454312966546862, + "grad_norm": 1.137460245063164, + "learning_rate": 1.9319885701532247e-05, + "loss": 0.6613, + "step": 8365 + }, + { + "epoch": 0.14456040917887752, + "grad_norm": 1.1855744695714237, + "learning_rate": 1.9319682820886295e-05, + "loss": 0.6594, + "step": 8366 + }, + { + "epoch": 0.14457768869228643, + "grad_norm": 0.9428802639845545, + "learning_rate": 1.9319479911050386e-05, + "loss": 0.5658, + "step": 8367 + }, + { + "epoch": 0.14459496820569534, + "grad_norm": 0.8389880811560033, + "learning_rate": 1.931927697202516e-05, + "loss": 0.6548, + "step": 8368 + }, + { + "epoch": 0.14461224771910422, + "grad_norm": 1.3461901856005605, + "learning_rate": 1.9319074003811247e-05, + "loss": 0.7011, + "step": 8369 + }, + { + "epoch": 0.14462952723251313, + "grad_norm": 1.0311968896945947, + "learning_rate": 1.9318871006409288e-05, + "loss": 0.6352, + "step": 8370 + }, + { + "epoch": 0.14464680674592204, + "grad_norm": 0.8773782515282195, + "learning_rate": 1.9318667979819915e-05, + "loss": 0.6695, + "step": 8371 + }, + { + "epoch": 0.14466408625933094, + "grad_norm": 0.455422054863626, + "learning_rate": 1.931846492404377e-05, + "loss": 0.6193, + "step": 8372 + }, + { + "epoch": 0.14468136577273985, + "grad_norm": 0.7506537861504604, + "learning_rate": 1.931826183908148e-05, + "loss": 0.5775, + "step": 8373 + }, + { + "epoch": 0.14469864528614873, + "grad_norm": 0.9817363195316607, + "learning_rate": 1.931805872493369e-05, + "loss": 0.6227, + "step": 8374 + }, + { + "epoch": 0.14471592479955764, + "grad_norm": 0.4085618711580353, + "learning_rate": 1.9317855581601028e-05, + "loss": 0.4712, + "step": 8375 + }, + { + "epoch": 0.14473320431296655, + "grad_norm": 0.4125980569072347, + "learning_rate": 1.931765240908414e-05, + "loss": 0.517, + "step": 8376 + }, + { + "epoch": 0.14475048382637545, + "grad_norm": 1.0356308863612576, + "learning_rate": 1.931744920738365e-05, + "loss": 0.7151, + "step": 8377 + }, + { + "epoch": 0.14476776333978436, + "grad_norm": 1.0715156043554788, + "learning_rate": 1.9317245976500204e-05, + "loss": 0.7145, + "step": 8378 + }, + { + "epoch": 0.14478504285319324, + "grad_norm": 1.3930233468070612, + "learning_rate": 1.9317042716434432e-05, + "loss": 0.799, + "step": 8379 + }, + { + "epoch": 0.14480232236660215, + "grad_norm": 0.4691236016149975, + "learning_rate": 1.931683942718698e-05, + "loss": 0.6705, + "step": 8380 + }, + { + "epoch": 0.14481960188001106, + "grad_norm": 0.962227073339789, + "learning_rate": 1.9316636108758474e-05, + "loss": 0.6442, + "step": 8381 + }, + { + "epoch": 0.14483688139341996, + "grad_norm": 1.0015671517103661, + "learning_rate": 1.931643276114956e-05, + "loss": 0.5741, + "step": 8382 + }, + { + "epoch": 0.14485416090682887, + "grad_norm": 0.8892648879904634, + "learning_rate": 1.9316229384360867e-05, + "loss": 0.6171, + "step": 8383 + }, + { + "epoch": 0.14487144042023778, + "grad_norm": 0.9542098943968969, + "learning_rate": 1.9316025978393037e-05, + "loss": 0.8238, + "step": 8384 + }, + { + "epoch": 0.14488871993364666, + "grad_norm": 1.107126655348339, + "learning_rate": 1.9315822543246704e-05, + "loss": 0.6987, + "step": 8385 + }, + { + "epoch": 0.14490599944705557, + "grad_norm": 0.9007353503924104, + "learning_rate": 1.9315619078922512e-05, + "loss": 0.7067, + "step": 8386 + }, + { + "epoch": 0.14492327896046447, + "grad_norm": 1.12585712063893, + "learning_rate": 1.9315415585421088e-05, + "loss": 0.5812, + "step": 8387 + }, + { + "epoch": 0.14494055847387338, + "grad_norm": 1.0490046796678396, + "learning_rate": 1.9315212062743075e-05, + "loss": 0.5128, + "step": 8388 + }, + { + "epoch": 0.1449578379872823, + "grad_norm": 0.9482397519404184, + "learning_rate": 1.9315008510889114e-05, + "loss": 0.7132, + "step": 8389 + }, + { + "epoch": 0.14497511750069117, + "grad_norm": 1.0691877649846033, + "learning_rate": 1.9314804929859835e-05, + "loss": 0.6496, + "step": 8390 + }, + { + "epoch": 0.14499239701410008, + "grad_norm": 0.5427571518489269, + "learning_rate": 1.9314601319655878e-05, + "loss": 0.3569, + "step": 8391 + }, + { + "epoch": 0.14500967652750898, + "grad_norm": 0.8610330133446938, + "learning_rate": 1.9314397680277884e-05, + "loss": 0.6977, + "step": 8392 + }, + { + "epoch": 0.1450269560409179, + "grad_norm": 1.0626431851586289, + "learning_rate": 1.9314194011726486e-05, + "loss": 0.8575, + "step": 8393 + }, + { + "epoch": 0.1450442355543268, + "grad_norm": 0.9362679435866778, + "learning_rate": 1.9313990314002328e-05, + "loss": 0.6209, + "step": 8394 + }, + { + "epoch": 0.14506151506773568, + "grad_norm": 0.9164460704699228, + "learning_rate": 1.931378658710604e-05, + "loss": 0.4809, + "step": 8395 + }, + { + "epoch": 0.1450787945811446, + "grad_norm": 0.8174054780398817, + "learning_rate": 1.9313582831038267e-05, + "loss": 0.775, + "step": 8396 + }, + { + "epoch": 0.1450960740945535, + "grad_norm": 0.9882392393967415, + "learning_rate": 1.9313379045799644e-05, + "loss": 0.4265, + "step": 8397 + }, + { + "epoch": 0.1451133536079624, + "grad_norm": 1.1356134988955013, + "learning_rate": 1.9313175231390812e-05, + "loss": 0.7179, + "step": 8398 + }, + { + "epoch": 0.1451306331213713, + "grad_norm": 1.3734375910847771, + "learning_rate": 1.9312971387812405e-05, + "loss": 0.7419, + "step": 8399 + }, + { + "epoch": 0.14514791263478022, + "grad_norm": 0.9943483478221666, + "learning_rate": 1.9312767515065067e-05, + "loss": 0.9412, + "step": 8400 + }, + { + "epoch": 0.1451651921481891, + "grad_norm": 0.7304243635113284, + "learning_rate": 1.9312563613149435e-05, + "loss": 0.7507, + "step": 8401 + }, + { + "epoch": 0.145182471661598, + "grad_norm": 0.9600875053001737, + "learning_rate": 1.9312359682066138e-05, + "loss": 0.6391, + "step": 8402 + }, + { + "epoch": 0.1451997511750069, + "grad_norm": 0.9144278936834833, + "learning_rate": 1.9312155721815828e-05, + "loss": 0.5827, + "step": 8403 + }, + { + "epoch": 0.14521703068841582, + "grad_norm": 0.9209236037730826, + "learning_rate": 1.931195173239914e-05, + "loss": 0.6732, + "step": 8404 + }, + { + "epoch": 0.14523431020182473, + "grad_norm": 0.9312350728065688, + "learning_rate": 1.931174771381671e-05, + "loss": 0.5988, + "step": 8405 + }, + { + "epoch": 0.1452515897152336, + "grad_norm": 1.0051649727992917, + "learning_rate": 1.9311543666069177e-05, + "loss": 0.4882, + "step": 8406 + }, + { + "epoch": 0.14526886922864252, + "grad_norm": 1.2718575635183116, + "learning_rate": 1.931133958915718e-05, + "loss": 0.6389, + "step": 8407 + }, + { + "epoch": 0.14528614874205142, + "grad_norm": 0.9556379879916269, + "learning_rate": 1.9311135483081363e-05, + "loss": 0.5219, + "step": 8408 + }, + { + "epoch": 0.14530342825546033, + "grad_norm": 1.1276113589276595, + "learning_rate": 1.9310931347842364e-05, + "loss": 0.7219, + "step": 8409 + }, + { + "epoch": 0.14532070776886924, + "grad_norm": 1.6461686076792759, + "learning_rate": 1.9310727183440816e-05, + "loss": 0.7835, + "step": 8410 + }, + { + "epoch": 0.14533798728227812, + "grad_norm": 1.4136793352417079, + "learning_rate": 1.9310522989877364e-05, + "loss": 0.7797, + "step": 8411 + }, + { + "epoch": 0.14535526679568703, + "grad_norm": 0.41687700874229944, + "learning_rate": 1.9310318767152648e-05, + "loss": 0.5174, + "step": 8412 + }, + { + "epoch": 0.14537254630909593, + "grad_norm": 0.9880780748822964, + "learning_rate": 1.9310114515267308e-05, + "loss": 0.6747, + "step": 8413 + }, + { + "epoch": 0.14538982582250484, + "grad_norm": 0.658306000952966, + "learning_rate": 1.9309910234221978e-05, + "loss": 0.5666, + "step": 8414 + }, + { + "epoch": 0.14540710533591375, + "grad_norm": 0.926928418646052, + "learning_rate": 1.93097059240173e-05, + "loss": 0.514, + "step": 8415 + }, + { + "epoch": 0.14542438484932263, + "grad_norm": 1.0539466728953655, + "learning_rate": 1.930950158465392e-05, + "loss": 0.6476, + "step": 8416 + }, + { + "epoch": 0.14544166436273154, + "grad_norm": 0.5918877826711697, + "learning_rate": 1.930929721613247e-05, + "loss": 0.6197, + "step": 8417 + }, + { + "epoch": 0.14545894387614045, + "grad_norm": 1.134024060658997, + "learning_rate": 1.9309092818453597e-05, + "loss": 0.5377, + "step": 8418 + }, + { + "epoch": 0.14547622338954935, + "grad_norm": 1.0398166257212873, + "learning_rate": 1.9308888391617936e-05, + "loss": 0.4558, + "step": 8419 + }, + { + "epoch": 0.14549350290295826, + "grad_norm": 0.7877390987428696, + "learning_rate": 1.930868393562613e-05, + "loss": 0.5865, + "step": 8420 + }, + { + "epoch": 0.14551078241636717, + "grad_norm": 1.2343351882453308, + "learning_rate": 1.9308479450478816e-05, + "loss": 0.7635, + "step": 8421 + }, + { + "epoch": 0.14552806192977605, + "grad_norm": 1.4267971450428854, + "learning_rate": 1.930827493617664e-05, + "loss": 0.7611, + "step": 8422 + }, + { + "epoch": 0.14554534144318496, + "grad_norm": 1.010590429972623, + "learning_rate": 1.9308070392720236e-05, + "loss": 0.5983, + "step": 8423 + }, + { + "epoch": 0.14556262095659386, + "grad_norm": 0.7258966590682846, + "learning_rate": 1.9307865820110252e-05, + "loss": 0.5294, + "step": 8424 + }, + { + "epoch": 0.14557990047000277, + "grad_norm": 0.9904066320178879, + "learning_rate": 1.9307661218347325e-05, + "loss": 0.6072, + "step": 8425 + }, + { + "epoch": 0.14559717998341168, + "grad_norm": 0.436968282303885, + "learning_rate": 1.930745658743209e-05, + "loss": 0.6709, + "step": 8426 + }, + { + "epoch": 0.14561445949682056, + "grad_norm": 1.170604114484384, + "learning_rate": 1.93072519273652e-05, + "loss": 0.64, + "step": 8427 + }, + { + "epoch": 0.14563173901022947, + "grad_norm": 0.8213462930591323, + "learning_rate": 1.9307047238147286e-05, + "loss": 0.4466, + "step": 8428 + }, + { + "epoch": 0.14564901852363837, + "grad_norm": 1.2593407310419247, + "learning_rate": 1.9306842519778994e-05, + "loss": 0.7654, + "step": 8429 + }, + { + "epoch": 0.14566629803704728, + "grad_norm": 1.1838332982523476, + "learning_rate": 1.9306637772260965e-05, + "loss": 0.5357, + "step": 8430 + }, + { + "epoch": 0.1456835775504562, + "grad_norm": 0.4998696156048711, + "learning_rate": 1.930643299559384e-05, + "loss": 0.6801, + "step": 8431 + }, + { + "epoch": 0.14570085706386507, + "grad_norm": 1.101781081774318, + "learning_rate": 1.9306228189778255e-05, + "loss": 0.6277, + "step": 8432 + }, + { + "epoch": 0.14571813657727398, + "grad_norm": 1.0359111921311224, + "learning_rate": 1.930602335481486e-05, + "loss": 0.6654, + "step": 8433 + }, + { + "epoch": 0.14573541609068288, + "grad_norm": 1.041704330238568, + "learning_rate": 1.930581849070429e-05, + "loss": 0.6258, + "step": 8434 + }, + { + "epoch": 0.1457526956040918, + "grad_norm": 0.9601768315809418, + "learning_rate": 1.930561359744719e-05, + "loss": 0.5242, + "step": 8435 + }, + { + "epoch": 0.1457699751175007, + "grad_norm": 1.2349707498421507, + "learning_rate": 1.93054086750442e-05, + "loss": 0.8125, + "step": 8436 + }, + { + "epoch": 0.1457872546309096, + "grad_norm": 0.9995573012074133, + "learning_rate": 1.9305203723495968e-05, + "loss": 0.6711, + "step": 8437 + }, + { + "epoch": 0.1458045341443185, + "grad_norm": 1.0126892546375563, + "learning_rate": 1.9304998742803124e-05, + "loss": 0.5589, + "step": 8438 + }, + { + "epoch": 0.1458218136577274, + "grad_norm": 0.7593385996402597, + "learning_rate": 1.9304793732966323e-05, + "loss": 0.4245, + "step": 8439 + }, + { + "epoch": 0.1458390931711363, + "grad_norm": 1.3395474892343904, + "learning_rate": 1.9304588693986198e-05, + "loss": 0.8856, + "step": 8440 + }, + { + "epoch": 0.1458563726845452, + "grad_norm": 1.2043882262284873, + "learning_rate": 1.9304383625863393e-05, + "loss": 0.6839, + "step": 8441 + }, + { + "epoch": 0.14587365219795412, + "grad_norm": 1.4420802049269776, + "learning_rate": 1.930417852859855e-05, + "loss": 0.7057, + "step": 8442 + }, + { + "epoch": 0.145890931711363, + "grad_norm": 1.3382585155315505, + "learning_rate": 1.9303973402192317e-05, + "loss": 0.9057, + "step": 8443 + }, + { + "epoch": 0.1459082112247719, + "grad_norm": 1.195690780626139, + "learning_rate": 1.930376824664533e-05, + "loss": 0.6048, + "step": 8444 + }, + { + "epoch": 0.1459254907381808, + "grad_norm": 0.4795560922920848, + "learning_rate": 1.9303563061958236e-05, + "loss": 0.3384, + "step": 8445 + }, + { + "epoch": 0.14594277025158972, + "grad_norm": 1.1251490968914586, + "learning_rate": 1.9303357848131676e-05, + "loss": 0.8261, + "step": 8446 + }, + { + "epoch": 0.14596004976499863, + "grad_norm": 0.9024991805108068, + "learning_rate": 1.9303152605166288e-05, + "loss": 0.6481, + "step": 8447 + }, + { + "epoch": 0.1459773292784075, + "grad_norm": 1.0224758343475637, + "learning_rate": 1.930294733306272e-05, + "loss": 0.8432, + "step": 8448 + }, + { + "epoch": 0.14599460879181642, + "grad_norm": 0.4566092023368724, + "learning_rate": 1.9302742031821614e-05, + "loss": 0.6472, + "step": 8449 + }, + { + "epoch": 0.14601188830522532, + "grad_norm": 0.9651448493059316, + "learning_rate": 1.9302536701443615e-05, + "loss": 0.6205, + "step": 8450 + }, + { + "epoch": 0.14602916781863423, + "grad_norm": 1.044983155403616, + "learning_rate": 1.9302331341929364e-05, + "loss": 0.6837, + "step": 8451 + }, + { + "epoch": 0.14604644733204314, + "grad_norm": 1.0805221090550035, + "learning_rate": 1.9302125953279503e-05, + "loss": 0.7624, + "step": 8452 + }, + { + "epoch": 0.14606372684545202, + "grad_norm": 0.9832576145159186, + "learning_rate": 1.9301920535494677e-05, + "loss": 0.9988, + "step": 8453 + }, + { + "epoch": 0.14608100635886093, + "grad_norm": 0.8358127359891736, + "learning_rate": 1.930171508857553e-05, + "loss": 0.6373, + "step": 8454 + }, + { + "epoch": 0.14609828587226983, + "grad_norm": 0.5815990946451893, + "learning_rate": 1.93015096125227e-05, + "loss": 0.3405, + "step": 8455 + }, + { + "epoch": 0.14611556538567874, + "grad_norm": 0.8572013949643429, + "learning_rate": 1.9301304107336837e-05, + "loss": 0.7274, + "step": 8456 + }, + { + "epoch": 0.14613284489908765, + "grad_norm": 1.0755151313547426, + "learning_rate": 1.9301098573018583e-05, + "loss": 0.6182, + "step": 8457 + }, + { + "epoch": 0.14615012441249656, + "grad_norm": 1.2785870778075412, + "learning_rate": 1.9300893009568583e-05, + "loss": 0.6004, + "step": 8458 + }, + { + "epoch": 0.14616740392590544, + "grad_norm": 0.9488634920126527, + "learning_rate": 1.9300687416987477e-05, + "loss": 0.7084, + "step": 8459 + }, + { + "epoch": 0.14618468343931434, + "grad_norm": 0.666996052475612, + "learning_rate": 1.9300481795275914e-05, + "loss": 0.6335, + "step": 8460 + }, + { + "epoch": 0.14620196295272325, + "grad_norm": 1.0694095841500282, + "learning_rate": 1.9300276144434537e-05, + "loss": 0.6536, + "step": 8461 + }, + { + "epoch": 0.14621924246613216, + "grad_norm": 0.8304661327117637, + "learning_rate": 1.9300070464463984e-05, + "loss": 0.5687, + "step": 8462 + }, + { + "epoch": 0.14623652197954107, + "grad_norm": 1.0223227743860102, + "learning_rate": 1.9299864755364903e-05, + "loss": 0.6524, + "step": 8463 + }, + { + "epoch": 0.14625380149294995, + "grad_norm": 0.43071360324752767, + "learning_rate": 1.9299659017137942e-05, + "loss": 0.7068, + "step": 8464 + }, + { + "epoch": 0.14627108100635886, + "grad_norm": 0.6933246110047561, + "learning_rate": 1.929945324978374e-05, + "loss": 0.4368, + "step": 8465 + }, + { + "epoch": 0.14628836051976776, + "grad_norm": 1.2073891749440302, + "learning_rate": 1.929924745330294e-05, + "loss": 0.7154, + "step": 8466 + }, + { + "epoch": 0.14630564003317667, + "grad_norm": 1.0258639261269764, + "learning_rate": 1.9299041627696196e-05, + "loss": 0.4322, + "step": 8467 + }, + { + "epoch": 0.14632291954658558, + "grad_norm": 0.8879166392292231, + "learning_rate": 1.9298835772964146e-05, + "loss": 0.5033, + "step": 8468 + }, + { + "epoch": 0.14634019905999446, + "grad_norm": 0.4436026219045608, + "learning_rate": 1.9298629889107436e-05, + "loss": 0.7294, + "step": 8469 + }, + { + "epoch": 0.14635747857340337, + "grad_norm": 0.7485963727820087, + "learning_rate": 1.9298423976126712e-05, + "loss": 0.5496, + "step": 8470 + }, + { + "epoch": 0.14637475808681227, + "grad_norm": 0.7636847742787641, + "learning_rate": 1.9298218034022613e-05, + "loss": 0.5352, + "step": 8471 + }, + { + "epoch": 0.14639203760022118, + "grad_norm": 1.1574963230490463, + "learning_rate": 1.9298012062795794e-05, + "loss": 0.8058, + "step": 8472 + }, + { + "epoch": 0.1464093171136301, + "grad_norm": 1.0748677055452935, + "learning_rate": 1.9297806062446887e-05, + "loss": 0.7607, + "step": 8473 + }, + { + "epoch": 0.146426596627039, + "grad_norm": 0.7098198668435473, + "learning_rate": 1.929760003297655e-05, + "loss": 0.5224, + "step": 8474 + }, + { + "epoch": 0.14644387614044788, + "grad_norm": 1.2292076386254138, + "learning_rate": 1.929739397438542e-05, + "loss": 0.6544, + "step": 8475 + }, + { + "epoch": 0.14646115565385678, + "grad_norm": 1.5793794828019816, + "learning_rate": 1.929718788667415e-05, + "loss": 0.6985, + "step": 8476 + }, + { + "epoch": 0.1464784351672657, + "grad_norm": 1.0292223291084204, + "learning_rate": 1.929698176984338e-05, + "loss": 0.65, + "step": 8477 + }, + { + "epoch": 0.1464957146806746, + "grad_norm": 0.7600940002046874, + "learning_rate": 1.9296775623893755e-05, + "loss": 0.4612, + "step": 8478 + }, + { + "epoch": 0.1465129941940835, + "grad_norm": 1.004528611579911, + "learning_rate": 1.9296569448825923e-05, + "loss": 0.5952, + "step": 8479 + }, + { + "epoch": 0.1465302737074924, + "grad_norm": 0.8029283405875048, + "learning_rate": 1.929636324464053e-05, + "loss": 0.5612, + "step": 8480 + }, + { + "epoch": 0.1465475532209013, + "grad_norm": 1.07876367651498, + "learning_rate": 1.9296157011338216e-05, + "loss": 0.6335, + "step": 8481 + }, + { + "epoch": 0.1465648327343102, + "grad_norm": 0.8582960037507268, + "learning_rate": 1.9295950748919636e-05, + "loss": 0.5172, + "step": 8482 + }, + { + "epoch": 0.1465821122477191, + "grad_norm": 0.765793463177348, + "learning_rate": 1.929574445738543e-05, + "loss": 0.4996, + "step": 8483 + }, + { + "epoch": 0.14659939176112802, + "grad_norm": 0.4591341632128231, + "learning_rate": 1.9295538136736247e-05, + "loss": 0.6551, + "step": 8484 + }, + { + "epoch": 0.1466166712745369, + "grad_norm": 1.1972386157788937, + "learning_rate": 1.9295331786972734e-05, + "loss": 0.6951, + "step": 8485 + }, + { + "epoch": 0.1466339507879458, + "grad_norm": 0.8897656269605949, + "learning_rate": 1.9295125408095533e-05, + "loss": 0.5507, + "step": 8486 + }, + { + "epoch": 0.1466512303013547, + "grad_norm": 1.0805911290171235, + "learning_rate": 1.929491900010529e-05, + "loss": 0.6214, + "step": 8487 + }, + { + "epoch": 0.14666850981476362, + "grad_norm": 1.1109145732709527, + "learning_rate": 1.929471256300266e-05, + "loss": 0.6688, + "step": 8488 + }, + { + "epoch": 0.14668578932817253, + "grad_norm": 2.1403983400857487, + "learning_rate": 1.9294506096788282e-05, + "loss": 0.7612, + "step": 8489 + }, + { + "epoch": 0.1467030688415814, + "grad_norm": 0.5407217632613324, + "learning_rate": 1.92942996014628e-05, + "loss": 0.8467, + "step": 8490 + }, + { + "epoch": 0.14672034835499032, + "grad_norm": 1.0891546788648667, + "learning_rate": 1.9294093077026872e-05, + "loss": 0.7895, + "step": 8491 + }, + { + "epoch": 0.14673762786839922, + "grad_norm": 1.3022567221570245, + "learning_rate": 1.9293886523481134e-05, + "loss": 0.6834, + "step": 8492 + }, + { + "epoch": 0.14675490738180813, + "grad_norm": 0.9544611975755494, + "learning_rate": 1.929367994082624e-05, + "loss": 0.7246, + "step": 8493 + }, + { + "epoch": 0.14677218689521704, + "grad_norm": 1.1236556642541244, + "learning_rate": 1.929347332906283e-05, + "loss": 0.6735, + "step": 8494 + }, + { + "epoch": 0.14678946640862595, + "grad_norm": 0.7450700261759207, + "learning_rate": 1.929326668819156e-05, + "loss": 0.5775, + "step": 8495 + }, + { + "epoch": 0.14680674592203483, + "grad_norm": 1.1849069970736572, + "learning_rate": 1.929306001821307e-05, + "loss": 0.6974, + "step": 8496 + }, + { + "epoch": 0.14682402543544373, + "grad_norm": 0.547013008143421, + "learning_rate": 1.929285331912801e-05, + "loss": 0.6511, + "step": 8497 + }, + { + "epoch": 0.14684130494885264, + "grad_norm": 1.0141642286723953, + "learning_rate": 1.9292646590937026e-05, + "loss": 0.649, + "step": 8498 + }, + { + "epoch": 0.14685858446226155, + "grad_norm": 1.1446430167472645, + "learning_rate": 1.929243983364077e-05, + "loss": 0.7538, + "step": 8499 + }, + { + "epoch": 0.14687586397567046, + "grad_norm": 1.2065381311229408, + "learning_rate": 1.9292233047239883e-05, + "loss": 0.7587, + "step": 8500 + }, + { + "epoch": 0.14689314348907934, + "grad_norm": 0.4778390851710067, + "learning_rate": 1.9292026231735016e-05, + "loss": 0.6556, + "step": 8501 + }, + { + "epoch": 0.14691042300248824, + "grad_norm": 0.915364628776063, + "learning_rate": 1.929181938712682e-05, + "loss": 0.5665, + "step": 8502 + }, + { + "epoch": 0.14692770251589715, + "grad_norm": 1.004751332858915, + "learning_rate": 1.9291612513415936e-05, + "loss": 0.5796, + "step": 8503 + }, + { + "epoch": 0.14694498202930606, + "grad_norm": 0.8229833151862602, + "learning_rate": 1.9291405610603012e-05, + "loss": 0.8169, + "step": 8504 + }, + { + "epoch": 0.14696226154271497, + "grad_norm": 0.8515276939846397, + "learning_rate": 1.9291198678688707e-05, + "loss": 0.591, + "step": 8505 + }, + { + "epoch": 0.14697954105612385, + "grad_norm": 0.7831584165449776, + "learning_rate": 1.929099171767366e-05, + "loss": 0.3917, + "step": 8506 + }, + { + "epoch": 0.14699682056953275, + "grad_norm": 0.4921052425426306, + "learning_rate": 1.929078472755852e-05, + "loss": 0.3827, + "step": 8507 + }, + { + "epoch": 0.14701410008294166, + "grad_norm": 0.8307192937179702, + "learning_rate": 1.9290577708343935e-05, + "loss": 0.5001, + "step": 8508 + }, + { + "epoch": 0.14703137959635057, + "grad_norm": 0.9943929246959076, + "learning_rate": 1.9290370660030555e-05, + "loss": 0.6242, + "step": 8509 + }, + { + "epoch": 0.14704865910975948, + "grad_norm": 0.9061210461465767, + "learning_rate": 1.929016358261903e-05, + "loss": 0.6076, + "step": 8510 + }, + { + "epoch": 0.14706593862316839, + "grad_norm": 0.7586055603938663, + "learning_rate": 1.9289956476110003e-05, + "loss": 0.5455, + "step": 8511 + }, + { + "epoch": 0.14708321813657727, + "grad_norm": 0.5060344897659178, + "learning_rate": 1.9289749340504128e-05, + "loss": 0.5704, + "step": 8512 + }, + { + "epoch": 0.14710049764998617, + "grad_norm": 0.9268935099295003, + "learning_rate": 1.928954217580205e-05, + "loss": 0.7665, + "step": 8513 + }, + { + "epoch": 0.14711777716339508, + "grad_norm": 1.0088466304580046, + "learning_rate": 1.9289334982004427e-05, + "loss": 0.5892, + "step": 8514 + }, + { + "epoch": 0.147135056676804, + "grad_norm": 1.2583714622150755, + "learning_rate": 1.9289127759111895e-05, + "loss": 0.7742, + "step": 8515 + }, + { + "epoch": 0.1471523361902129, + "grad_norm": 1.177639327744476, + "learning_rate": 1.928892050712511e-05, + "loss": 0.6448, + "step": 8516 + }, + { + "epoch": 0.14716961570362178, + "grad_norm": 0.659387039978929, + "learning_rate": 1.928871322604472e-05, + "loss": 0.6154, + "step": 8517 + }, + { + "epoch": 0.14718689521703068, + "grad_norm": 1.0300788502375597, + "learning_rate": 1.9288505915871373e-05, + "loss": 0.7129, + "step": 8518 + }, + { + "epoch": 0.1472041747304396, + "grad_norm": 0.8145025625634342, + "learning_rate": 1.9288298576605723e-05, + "loss": 0.5412, + "step": 8519 + }, + { + "epoch": 0.1472214542438485, + "grad_norm": 0.6507469622646278, + "learning_rate": 1.928809120824841e-05, + "loss": 0.4391, + "step": 8520 + }, + { + "epoch": 0.1472387337572574, + "grad_norm": 1.1774715064774643, + "learning_rate": 1.9287883810800095e-05, + "loss": 0.4788, + "step": 8521 + }, + { + "epoch": 0.14725601327066629, + "grad_norm": 1.1237326639451706, + "learning_rate": 1.928767638426142e-05, + "loss": 0.666, + "step": 8522 + }, + { + "epoch": 0.1472732927840752, + "grad_norm": 1.2561202683356547, + "learning_rate": 1.928746892863304e-05, + "loss": 0.5154, + "step": 8523 + }, + { + "epoch": 0.1472905722974841, + "grad_norm": 0.9376965778933576, + "learning_rate": 1.92872614439156e-05, + "loss": 0.8091, + "step": 8524 + }, + { + "epoch": 0.147307851810893, + "grad_norm": 1.3247499450826479, + "learning_rate": 1.928705393010975e-05, + "loss": 0.7375, + "step": 8525 + }, + { + "epoch": 0.14732513132430192, + "grad_norm": 0.8887972858593487, + "learning_rate": 1.928684638721614e-05, + "loss": 0.5151, + "step": 8526 + }, + { + "epoch": 0.1473424108377108, + "grad_norm": 0.45677385019498135, + "learning_rate": 1.9286638815235422e-05, + "loss": 0.6879, + "step": 8527 + }, + { + "epoch": 0.1473596903511197, + "grad_norm": 0.789313798546377, + "learning_rate": 1.9286431214168246e-05, + "loss": 0.5475, + "step": 8528 + }, + { + "epoch": 0.1473769698645286, + "grad_norm": 0.8510032196472282, + "learning_rate": 1.9286223584015263e-05, + "loss": 0.5533, + "step": 8529 + }, + { + "epoch": 0.14739424937793752, + "grad_norm": 1.0622641233076808, + "learning_rate": 1.928601592477712e-05, + "loss": 0.6512, + "step": 8530 + }, + { + "epoch": 0.14741152889134643, + "grad_norm": 1.4675210426621361, + "learning_rate": 1.9285808236454468e-05, + "loss": 0.6691, + "step": 8531 + }, + { + "epoch": 0.14742880840475533, + "grad_norm": 0.8674980303461635, + "learning_rate": 1.9285600519047964e-05, + "loss": 0.4789, + "step": 8532 + }, + { + "epoch": 0.14744608791816421, + "grad_norm": 0.6853947535035355, + "learning_rate": 1.9285392772558248e-05, + "loss": 0.3782, + "step": 8533 + }, + { + "epoch": 0.14746336743157312, + "grad_norm": 1.1484268955175403, + "learning_rate": 1.928518499698598e-05, + "loss": 0.5933, + "step": 8534 + }, + { + "epoch": 0.14748064694498203, + "grad_norm": 0.8472109828283374, + "learning_rate": 1.9284977192331807e-05, + "loss": 0.6189, + "step": 8535 + }, + { + "epoch": 0.14749792645839094, + "grad_norm": 1.046695622146941, + "learning_rate": 1.928476935859638e-05, + "loss": 0.6482, + "step": 8536 + }, + { + "epoch": 0.14751520597179985, + "grad_norm": 1.0356095749248742, + "learning_rate": 1.9284561495780346e-05, + "loss": 0.5712, + "step": 8537 + }, + { + "epoch": 0.14753248548520873, + "grad_norm": 0.5379117213105827, + "learning_rate": 1.9284353603884363e-05, + "loss": 0.9407, + "step": 8538 + }, + { + "epoch": 0.14754976499861763, + "grad_norm": 1.136197911350044, + "learning_rate": 1.9284145682909076e-05, + "loss": 0.6343, + "step": 8539 + }, + { + "epoch": 0.14756704451202654, + "grad_norm": 0.7952720469332805, + "learning_rate": 1.9283937732855144e-05, + "loss": 0.5608, + "step": 8540 + }, + { + "epoch": 0.14758432402543545, + "grad_norm": 0.7717876978811536, + "learning_rate": 1.9283729753723207e-05, + "loss": 0.7137, + "step": 8541 + }, + { + "epoch": 0.14760160353884436, + "grad_norm": 1.0103367697478456, + "learning_rate": 1.9283521745513928e-05, + "loss": 0.5967, + "step": 8542 + }, + { + "epoch": 0.14761888305225324, + "grad_norm": 1.4597095451582072, + "learning_rate": 1.928331370822795e-05, + "loss": 0.8049, + "step": 8543 + }, + { + "epoch": 0.14763616256566214, + "grad_norm": 1.1010188772137142, + "learning_rate": 1.9283105641865933e-05, + "loss": 0.7623, + "step": 8544 + }, + { + "epoch": 0.14765344207907105, + "grad_norm": 0.6708748651896443, + "learning_rate": 1.928289754642852e-05, + "loss": 0.5228, + "step": 8545 + }, + { + "epoch": 0.14767072159247996, + "grad_norm": 1.1281760417237043, + "learning_rate": 1.9282689421916368e-05, + "loss": 0.6016, + "step": 8546 + }, + { + "epoch": 0.14768800110588887, + "grad_norm": 0.49772643956230084, + "learning_rate": 1.9282481268330126e-05, + "loss": 0.74, + "step": 8547 + }, + { + "epoch": 0.14770528061929777, + "grad_norm": 1.1002786367496051, + "learning_rate": 1.9282273085670447e-05, + "loss": 0.811, + "step": 8548 + }, + { + "epoch": 0.14772256013270665, + "grad_norm": 1.311254374477782, + "learning_rate": 1.928206487393798e-05, + "loss": 0.7501, + "step": 8549 + }, + { + "epoch": 0.14773983964611556, + "grad_norm": 1.379058158997262, + "learning_rate": 1.9281856633133386e-05, + "loss": 0.9014, + "step": 8550 + }, + { + "epoch": 0.14775711915952447, + "grad_norm": 0.8319012303281172, + "learning_rate": 1.9281648363257313e-05, + "loss": 0.5736, + "step": 8551 + }, + { + "epoch": 0.14777439867293338, + "grad_norm": 0.5107589570303406, + "learning_rate": 1.9281440064310408e-05, + "loss": 0.6205, + "step": 8552 + }, + { + "epoch": 0.14779167818634228, + "grad_norm": 1.2621331816213701, + "learning_rate": 1.9281231736293326e-05, + "loss": 0.4115, + "step": 8553 + }, + { + "epoch": 0.14780895769975116, + "grad_norm": 0.9238959556834718, + "learning_rate": 1.9281023379206723e-05, + "loss": 0.5795, + "step": 8554 + }, + { + "epoch": 0.14782623721316007, + "grad_norm": 0.8161167688612992, + "learning_rate": 1.928081499305125e-05, + "loss": 0.5093, + "step": 8555 + }, + { + "epoch": 0.14784351672656898, + "grad_norm": 1.0603968229944447, + "learning_rate": 1.928060657782756e-05, + "loss": 0.8259, + "step": 8556 + }, + { + "epoch": 0.1478607962399779, + "grad_norm": 0.7086659123958575, + "learning_rate": 1.9280398133536303e-05, + "loss": 0.7051, + "step": 8557 + }, + { + "epoch": 0.1478780757533868, + "grad_norm": 0.984988168968271, + "learning_rate": 1.9280189660178132e-05, + "loss": 0.7458, + "step": 8558 + }, + { + "epoch": 0.14789535526679568, + "grad_norm": 1.1158202437341718, + "learning_rate": 1.9279981157753703e-05, + "loss": 0.7327, + "step": 8559 + }, + { + "epoch": 0.14791263478020458, + "grad_norm": 0.9180588732440803, + "learning_rate": 1.9279772626263667e-05, + "loss": 0.8159, + "step": 8560 + }, + { + "epoch": 0.1479299142936135, + "grad_norm": 0.6400738949787997, + "learning_rate": 1.927956406570868e-05, + "loss": 0.4034, + "step": 8561 + }, + { + "epoch": 0.1479471938070224, + "grad_norm": 1.0201466485214357, + "learning_rate": 1.9279355476089394e-05, + "loss": 0.6262, + "step": 8562 + }, + { + "epoch": 0.1479644733204313, + "grad_norm": 0.9284792419935288, + "learning_rate": 1.927914685740646e-05, + "loss": 0.5693, + "step": 8563 + }, + { + "epoch": 0.14798175283384019, + "grad_norm": 1.0553190119933837, + "learning_rate": 1.9278938209660533e-05, + "loss": 0.7611, + "step": 8564 + }, + { + "epoch": 0.1479990323472491, + "grad_norm": 0.8503992317324793, + "learning_rate": 1.9278729532852267e-05, + "loss": 0.7191, + "step": 8565 + }, + { + "epoch": 0.148016311860658, + "grad_norm": 1.0217565474345611, + "learning_rate": 1.9278520826982315e-05, + "loss": 0.6038, + "step": 8566 + }, + { + "epoch": 0.1480335913740669, + "grad_norm": 1.0423481612162264, + "learning_rate": 1.927831209205133e-05, + "loss": 0.595, + "step": 8567 + }, + { + "epoch": 0.14805087088747582, + "grad_norm": 1.0990158023848966, + "learning_rate": 1.9278103328059966e-05, + "loss": 0.7518, + "step": 8568 + }, + { + "epoch": 0.14806815040088472, + "grad_norm": 0.6750251580186424, + "learning_rate": 1.927789453500888e-05, + "loss": 0.553, + "step": 8569 + }, + { + "epoch": 0.1480854299142936, + "grad_norm": 1.1470027931911744, + "learning_rate": 1.927768571289872e-05, + "loss": 0.6114, + "step": 8570 + }, + { + "epoch": 0.1481027094277025, + "grad_norm": 1.1308777149095146, + "learning_rate": 1.9277476861730142e-05, + "loss": 0.8618, + "step": 8571 + }, + { + "epoch": 0.14811998894111142, + "grad_norm": 0.9135757677960609, + "learning_rate": 1.9277267981503804e-05, + "loss": 0.5456, + "step": 8572 + }, + { + "epoch": 0.14813726845452033, + "grad_norm": 0.6838020966183397, + "learning_rate": 1.9277059072220357e-05, + "loss": 0.4481, + "step": 8573 + }, + { + "epoch": 0.14815454796792923, + "grad_norm": 1.0017382210439527, + "learning_rate": 1.9276850133880454e-05, + "loss": 0.6079, + "step": 8574 + }, + { + "epoch": 0.14817182748133811, + "grad_norm": 1.0030188922961794, + "learning_rate": 1.9276641166484756e-05, + "loss": 0.5836, + "step": 8575 + }, + { + "epoch": 0.14818910699474702, + "grad_norm": 1.0505215931021599, + "learning_rate": 1.9276432170033907e-05, + "loss": 0.5906, + "step": 8576 + }, + { + "epoch": 0.14820638650815593, + "grad_norm": 1.1663342949734408, + "learning_rate": 1.927622314452857e-05, + "loss": 0.7069, + "step": 8577 + }, + { + "epoch": 0.14822366602156484, + "grad_norm": 0.8898613116605343, + "learning_rate": 1.92760140899694e-05, + "loss": 0.6017, + "step": 8578 + }, + { + "epoch": 0.14824094553497374, + "grad_norm": 1.1247601028366585, + "learning_rate": 1.9275805006357046e-05, + "loss": 0.7169, + "step": 8579 + }, + { + "epoch": 0.14825822504838262, + "grad_norm": 1.0255001574037246, + "learning_rate": 1.9275595893692166e-05, + "loss": 0.6757, + "step": 8580 + }, + { + "epoch": 0.14827550456179153, + "grad_norm": 0.509926803973884, + "learning_rate": 1.9275386751975417e-05, + "loss": 0.6903, + "step": 8581 + }, + { + "epoch": 0.14829278407520044, + "grad_norm": 1.224988396863834, + "learning_rate": 1.927517758120745e-05, + "loss": 0.8149, + "step": 8582 + }, + { + "epoch": 0.14831006358860935, + "grad_norm": 1.0803447766261125, + "learning_rate": 1.9274968381388924e-05, + "loss": 0.5627, + "step": 8583 + }, + { + "epoch": 0.14832734310201826, + "grad_norm": 0.7681404937453146, + "learning_rate": 1.9274759152520488e-05, + "loss": 0.4091, + "step": 8584 + }, + { + "epoch": 0.14834462261542716, + "grad_norm": 1.0440770249215225, + "learning_rate": 1.9274549894602806e-05, + "loss": 0.602, + "step": 8585 + }, + { + "epoch": 0.14836190212883604, + "grad_norm": 0.8604113047923938, + "learning_rate": 1.9274340607636522e-05, + "loss": 0.5557, + "step": 8586 + }, + { + "epoch": 0.14837918164224495, + "grad_norm": 1.18940549436768, + "learning_rate": 1.9274131291622304e-05, + "loss": 0.6336, + "step": 8587 + }, + { + "epoch": 0.14839646115565386, + "grad_norm": 1.3860544224370914, + "learning_rate": 1.9273921946560802e-05, + "loss": 0.6469, + "step": 8588 + }, + { + "epoch": 0.14841374066906277, + "grad_norm": 0.9685020031402538, + "learning_rate": 1.927371257245267e-05, + "loss": 0.6386, + "step": 8589 + }, + { + "epoch": 0.14843102018247167, + "grad_norm": 1.3428141019488886, + "learning_rate": 1.9273503169298565e-05, + "loss": 0.6156, + "step": 8590 + }, + { + "epoch": 0.14844829969588055, + "grad_norm": 1.245766565214529, + "learning_rate": 1.9273293737099145e-05, + "loss": 0.7264, + "step": 8591 + }, + { + "epoch": 0.14846557920928946, + "grad_norm": 1.0504608572862446, + "learning_rate": 1.9273084275855063e-05, + "loss": 0.4516, + "step": 8592 + }, + { + "epoch": 0.14848285872269837, + "grad_norm": 1.0187034394641474, + "learning_rate": 1.9272874785566977e-05, + "loss": 0.7328, + "step": 8593 + }, + { + "epoch": 0.14850013823610728, + "grad_norm": 1.140412559691576, + "learning_rate": 1.9272665266235544e-05, + "loss": 0.5638, + "step": 8594 + }, + { + "epoch": 0.14851741774951618, + "grad_norm": 1.3209120729211847, + "learning_rate": 1.9272455717861418e-05, + "loss": 0.6372, + "step": 8595 + }, + { + "epoch": 0.14853469726292506, + "grad_norm": 1.1500086110657883, + "learning_rate": 1.9272246140445253e-05, + "loss": 0.6927, + "step": 8596 + }, + { + "epoch": 0.14855197677633397, + "grad_norm": 1.4873235915334357, + "learning_rate": 1.9272036533987713e-05, + "loss": 0.5322, + "step": 8597 + }, + { + "epoch": 0.14856925628974288, + "grad_norm": 1.6740711140768503, + "learning_rate": 1.9271826898489445e-05, + "loss": 0.6623, + "step": 8598 + }, + { + "epoch": 0.1485865358031518, + "grad_norm": 1.1609812579742986, + "learning_rate": 1.9271617233951113e-05, + "loss": 0.5581, + "step": 8599 + }, + { + "epoch": 0.1486038153165607, + "grad_norm": 1.4117143871962985, + "learning_rate": 1.9271407540373372e-05, + "loss": 0.6391, + "step": 8600 + }, + { + "epoch": 0.14862109482996957, + "grad_norm": 1.2912912964830503, + "learning_rate": 1.9271197817756878e-05, + "loss": 0.6494, + "step": 8601 + }, + { + "epoch": 0.14863837434337848, + "grad_norm": 0.8728687223767598, + "learning_rate": 1.9270988066102285e-05, + "loss": 0.5326, + "step": 8602 + }, + { + "epoch": 0.1486556538567874, + "grad_norm": 1.2210284052266362, + "learning_rate": 1.9270778285410254e-05, + "loss": 0.6171, + "step": 8603 + }, + { + "epoch": 0.1486729333701963, + "grad_norm": 0.9309347325849969, + "learning_rate": 1.9270568475681442e-05, + "loss": 0.6836, + "step": 8604 + }, + { + "epoch": 0.1486902128836052, + "grad_norm": 1.3775994856596108, + "learning_rate": 1.9270358636916507e-05, + "loss": 0.5495, + "step": 8605 + }, + { + "epoch": 0.1487074923970141, + "grad_norm": 1.0025637740869513, + "learning_rate": 1.92701487691161e-05, + "loss": 0.6469, + "step": 8606 + }, + { + "epoch": 0.148724771910423, + "grad_norm": 1.4462838526051056, + "learning_rate": 1.9269938872280886e-05, + "loss": 0.6097, + "step": 8607 + }, + { + "epoch": 0.1487420514238319, + "grad_norm": 0.9048946369734252, + "learning_rate": 1.9269728946411518e-05, + "loss": 0.6194, + "step": 8608 + }, + { + "epoch": 0.1487593309372408, + "grad_norm": 0.5198240087061625, + "learning_rate": 1.9269518991508655e-05, + "loss": 0.6468, + "step": 8609 + }, + { + "epoch": 0.14877661045064972, + "grad_norm": 0.7414013266624293, + "learning_rate": 1.9269309007572952e-05, + "loss": 0.8244, + "step": 8610 + }, + { + "epoch": 0.14879388996405862, + "grad_norm": 0.9619924546729332, + "learning_rate": 1.926909899460507e-05, + "loss": 0.6237, + "step": 8611 + }, + { + "epoch": 0.1488111694774675, + "grad_norm": 0.8918859644808228, + "learning_rate": 1.926888895260566e-05, + "loss": 0.5275, + "step": 8612 + }, + { + "epoch": 0.1488284489908764, + "grad_norm": 1.2301727227912629, + "learning_rate": 1.9268678881575392e-05, + "loss": 0.6881, + "step": 8613 + }, + { + "epoch": 0.14884572850428532, + "grad_norm": 0.8117415705334463, + "learning_rate": 1.9268468781514918e-05, + "loss": 0.681, + "step": 8614 + }, + { + "epoch": 0.14886300801769423, + "grad_norm": 0.9269064481532392, + "learning_rate": 1.9268258652424893e-05, + "loss": 0.5709, + "step": 8615 + }, + { + "epoch": 0.14888028753110313, + "grad_norm": 0.8720754745862792, + "learning_rate": 1.926804849430598e-05, + "loss": 0.4911, + "step": 8616 + }, + { + "epoch": 0.148897567044512, + "grad_norm": 1.0978178601704764, + "learning_rate": 1.9267838307158832e-05, + "loss": 0.6671, + "step": 8617 + }, + { + "epoch": 0.14891484655792092, + "grad_norm": 0.8964660018216938, + "learning_rate": 1.926762809098411e-05, + "loss": 0.6051, + "step": 8618 + }, + { + "epoch": 0.14893212607132983, + "grad_norm": 1.0719596442129598, + "learning_rate": 1.9267417845782474e-05, + "loss": 0.5921, + "step": 8619 + }, + { + "epoch": 0.14894940558473874, + "grad_norm": 1.1180202794241665, + "learning_rate": 1.9267207571554577e-05, + "loss": 0.5517, + "step": 8620 + }, + { + "epoch": 0.14896668509814764, + "grad_norm": 0.7702037421276066, + "learning_rate": 1.9266997268301087e-05, + "loss": 0.597, + "step": 8621 + }, + { + "epoch": 0.14898396461155655, + "grad_norm": 1.0308933511572018, + "learning_rate": 1.9266786936022654e-05, + "loss": 0.667, + "step": 8622 + }, + { + "epoch": 0.14900124412496543, + "grad_norm": 1.2383175420053436, + "learning_rate": 1.9266576574719943e-05, + "loss": 0.8007, + "step": 8623 + }, + { + "epoch": 0.14901852363837434, + "grad_norm": 0.6913281831552389, + "learning_rate": 1.9266366184393607e-05, + "loss": 0.71, + "step": 8624 + }, + { + "epoch": 0.14903580315178325, + "grad_norm": 1.1350036944108366, + "learning_rate": 1.926615576504431e-05, + "loss": 0.6571, + "step": 8625 + }, + { + "epoch": 0.14905308266519215, + "grad_norm": 0.47127392491575787, + "learning_rate": 1.9265945316672704e-05, + "loss": 0.5461, + "step": 8626 + }, + { + "epoch": 0.14907036217860106, + "grad_norm": 0.9417664510770947, + "learning_rate": 1.926573483927946e-05, + "loss": 0.9285, + "step": 8627 + }, + { + "epoch": 0.14908764169200994, + "grad_norm": 0.8672648921058324, + "learning_rate": 1.9265524332865228e-05, + "loss": 0.4394, + "step": 8628 + }, + { + "epoch": 0.14910492120541885, + "grad_norm": 1.1356031195720322, + "learning_rate": 1.9265313797430666e-05, + "loss": 0.7698, + "step": 8629 + }, + { + "epoch": 0.14912220071882776, + "grad_norm": 0.7325390416380194, + "learning_rate": 1.9265103232976444e-05, + "loss": 0.7121, + "step": 8630 + }, + { + "epoch": 0.14913948023223667, + "grad_norm": 0.8468521893915403, + "learning_rate": 1.926489263950321e-05, + "loss": 0.5832, + "step": 8631 + }, + { + "epoch": 0.14915675974564557, + "grad_norm": 1.0120015745571047, + "learning_rate": 1.9264682017011626e-05, + "loss": 0.6107, + "step": 8632 + }, + { + "epoch": 0.14917403925905445, + "grad_norm": 0.8841638273980534, + "learning_rate": 1.9264471365502356e-05, + "loss": 0.5043, + "step": 8633 + }, + { + "epoch": 0.14919131877246336, + "grad_norm": 0.8784592538282848, + "learning_rate": 1.9264260684976058e-05, + "loss": 0.395, + "step": 8634 + }, + { + "epoch": 0.14920859828587227, + "grad_norm": 1.04375121570199, + "learning_rate": 1.9264049975433396e-05, + "loss": 0.6822, + "step": 8635 + }, + { + "epoch": 0.14922587779928118, + "grad_norm": 0.8947381580054335, + "learning_rate": 1.9263839236875017e-05, + "loss": 0.4532, + "step": 8636 + }, + { + "epoch": 0.14924315731269008, + "grad_norm": 1.2225450075297888, + "learning_rate": 1.9263628469301596e-05, + "loss": 0.7145, + "step": 8637 + }, + { + "epoch": 0.14926043682609896, + "grad_norm": 0.8590125623260283, + "learning_rate": 1.9263417672713786e-05, + "loss": 0.6502, + "step": 8638 + }, + { + "epoch": 0.14927771633950787, + "grad_norm": 1.1436027790483394, + "learning_rate": 1.9263206847112245e-05, + "loss": 0.53, + "step": 8639 + }, + { + "epoch": 0.14929499585291678, + "grad_norm": 0.7231689274963435, + "learning_rate": 1.9262995992497633e-05, + "loss": 0.3422, + "step": 8640 + }, + { + "epoch": 0.1493122753663257, + "grad_norm": 0.8585578798342203, + "learning_rate": 1.9262785108870622e-05, + "loss": 0.6983, + "step": 8641 + }, + { + "epoch": 0.1493295548797346, + "grad_norm": 0.7846207659611035, + "learning_rate": 1.926257419623186e-05, + "loss": 0.508, + "step": 8642 + }, + { + "epoch": 0.1493468343931435, + "grad_norm": 1.0938745430177568, + "learning_rate": 1.926236325458201e-05, + "loss": 0.7995, + "step": 8643 + }, + { + "epoch": 0.14936411390655238, + "grad_norm": 1.5378259759503066, + "learning_rate": 1.9262152283921736e-05, + "loss": 0.7613, + "step": 8644 + }, + { + "epoch": 0.1493813934199613, + "grad_norm": 0.7133211904550979, + "learning_rate": 1.92619412842517e-05, + "loss": 0.4668, + "step": 8645 + }, + { + "epoch": 0.1493986729333702, + "grad_norm": 0.9986496934954742, + "learning_rate": 1.9261730255572555e-05, + "loss": 0.6268, + "step": 8646 + }, + { + "epoch": 0.1494159524467791, + "grad_norm": 0.8570338659209605, + "learning_rate": 1.926151919788497e-05, + "loss": 0.5898, + "step": 8647 + }, + { + "epoch": 0.149433231960188, + "grad_norm": 1.9117257454507994, + "learning_rate": 1.92613081111896e-05, + "loss": 0.5944, + "step": 8648 + }, + { + "epoch": 0.1494505114735969, + "grad_norm": 1.2370415875241143, + "learning_rate": 1.9261096995487113e-05, + "loss": 0.681, + "step": 8649 + }, + { + "epoch": 0.1494677909870058, + "grad_norm": 0.7617832068479571, + "learning_rate": 1.926088585077816e-05, + "loss": 0.6619, + "step": 8650 + }, + { + "epoch": 0.1494850705004147, + "grad_norm": 1.2740953665022674, + "learning_rate": 1.9260674677063415e-05, + "loss": 0.8917, + "step": 8651 + }, + { + "epoch": 0.14950235001382361, + "grad_norm": 0.8853973177473791, + "learning_rate": 1.9260463474343534e-05, + "loss": 0.459, + "step": 8652 + }, + { + "epoch": 0.14951962952723252, + "grad_norm": 1.0111211031972656, + "learning_rate": 1.9260252242619172e-05, + "loss": 0.6114, + "step": 8653 + }, + { + "epoch": 0.1495369090406414, + "grad_norm": 0.7988768466752993, + "learning_rate": 1.9260040981891002e-05, + "loss": 0.5323, + "step": 8654 + }, + { + "epoch": 0.1495541885540503, + "grad_norm": 1.4100904594178736, + "learning_rate": 1.925982969215968e-05, + "loss": 0.6626, + "step": 8655 + }, + { + "epoch": 0.14957146806745922, + "grad_norm": 0.9554772019737985, + "learning_rate": 1.9259618373425863e-05, + "loss": 0.5843, + "step": 8656 + }, + { + "epoch": 0.14958874758086813, + "grad_norm": 1.1622625859503695, + "learning_rate": 1.9259407025690218e-05, + "loss": 0.8152, + "step": 8657 + }, + { + "epoch": 0.14960602709427703, + "grad_norm": 1.1828572547187883, + "learning_rate": 1.9259195648953408e-05, + "loss": 0.5509, + "step": 8658 + }, + { + "epoch": 0.14962330660768594, + "grad_norm": 1.1478602380718972, + "learning_rate": 1.925898424321609e-05, + "loss": 0.4749, + "step": 8659 + }, + { + "epoch": 0.14964058612109482, + "grad_norm": 0.8996318570882894, + "learning_rate": 1.9258772808478938e-05, + "loss": 0.75, + "step": 8660 + }, + { + "epoch": 0.14965786563450373, + "grad_norm": 1.1996998648639157, + "learning_rate": 1.9258561344742598e-05, + "loss": 0.6832, + "step": 8661 + }, + { + "epoch": 0.14967514514791264, + "grad_norm": 1.0396006465718177, + "learning_rate": 1.9258349852007743e-05, + "loss": 0.5405, + "step": 8662 + }, + { + "epoch": 0.14969242466132154, + "grad_norm": 1.2413993218283694, + "learning_rate": 1.9258138330275034e-05, + "loss": 0.648, + "step": 8663 + }, + { + "epoch": 0.14970970417473045, + "grad_norm": 1.311723771556206, + "learning_rate": 1.925792677954513e-05, + "loss": 0.6232, + "step": 8664 + }, + { + "epoch": 0.14972698368813933, + "grad_norm": 1.2461474292252672, + "learning_rate": 1.92577151998187e-05, + "loss": 0.8198, + "step": 8665 + }, + { + "epoch": 0.14974426320154824, + "grad_norm": 1.4203803577998648, + "learning_rate": 1.9257503591096397e-05, + "loss": 0.582, + "step": 8666 + }, + { + "epoch": 0.14976154271495715, + "grad_norm": 1.2060833026239293, + "learning_rate": 1.9257291953378894e-05, + "loss": 0.6811, + "step": 8667 + }, + { + "epoch": 0.14977882222836605, + "grad_norm": 1.0623225010970037, + "learning_rate": 1.9257080286666847e-05, + "loss": 0.5188, + "step": 8668 + }, + { + "epoch": 0.14979610174177496, + "grad_norm": 0.773563241711364, + "learning_rate": 1.925686859096092e-05, + "loss": 0.6117, + "step": 8669 + }, + { + "epoch": 0.14981338125518384, + "grad_norm": 0.9916886373937044, + "learning_rate": 1.925665686626178e-05, + "loss": 0.627, + "step": 8670 + }, + { + "epoch": 0.14983066076859275, + "grad_norm": 0.8868123795986391, + "learning_rate": 1.9256445112570083e-05, + "loss": 0.6107, + "step": 8671 + }, + { + "epoch": 0.14984794028200166, + "grad_norm": 0.8507232341766584, + "learning_rate": 1.92562333298865e-05, + "loss": 0.5476, + "step": 8672 + }, + { + "epoch": 0.14986521979541056, + "grad_norm": 0.8371561905746442, + "learning_rate": 1.9256021518211688e-05, + "loss": 0.414, + "step": 8673 + }, + { + "epoch": 0.14988249930881947, + "grad_norm": 1.1218166621160957, + "learning_rate": 1.9255809677546314e-05, + "loss": 0.7258, + "step": 8674 + }, + { + "epoch": 0.14989977882222835, + "grad_norm": 1.071465338589673, + "learning_rate": 1.925559780789104e-05, + "loss": 0.5374, + "step": 8675 + }, + { + "epoch": 0.14991705833563726, + "grad_norm": 1.013102238891569, + "learning_rate": 1.9255385909246532e-05, + "loss": 0.8197, + "step": 8676 + }, + { + "epoch": 0.14993433784904617, + "grad_norm": 0.8651851952106199, + "learning_rate": 1.925517398161345e-05, + "loss": 0.4898, + "step": 8677 + }, + { + "epoch": 0.14995161736245508, + "grad_norm": 1.0179499602521327, + "learning_rate": 1.9254962024992463e-05, + "loss": 0.6469, + "step": 8678 + }, + { + "epoch": 0.14996889687586398, + "grad_norm": 0.8521882898369185, + "learning_rate": 1.925475003938423e-05, + "loss": 0.8149, + "step": 8679 + }, + { + "epoch": 0.1499861763892729, + "grad_norm": 0.9660895913944718, + "learning_rate": 1.9254538024789416e-05, + "loss": 0.4966, + "step": 8680 + }, + { + "epoch": 0.15000345590268177, + "grad_norm": 0.8338923699923748, + "learning_rate": 1.9254325981208687e-05, + "loss": 0.6569, + "step": 8681 + }, + { + "epoch": 0.15002073541609068, + "grad_norm": 1.0585277247480125, + "learning_rate": 1.9254113908642703e-05, + "loss": 0.4756, + "step": 8682 + }, + { + "epoch": 0.15003801492949959, + "grad_norm": 0.9867141176841693, + "learning_rate": 1.9253901807092133e-05, + "loss": 0.7698, + "step": 8683 + }, + { + "epoch": 0.1500552944429085, + "grad_norm": 0.7254457629754563, + "learning_rate": 1.9253689676557638e-05, + "loss": 0.3797, + "step": 8684 + }, + { + "epoch": 0.1500725739563174, + "grad_norm": 1.0164586220105893, + "learning_rate": 1.9253477517039883e-05, + "loss": 0.6766, + "step": 8685 + }, + { + "epoch": 0.15008985346972628, + "grad_norm": 1.2104714429194134, + "learning_rate": 1.9253265328539535e-05, + "loss": 0.5427, + "step": 8686 + }, + { + "epoch": 0.1501071329831352, + "grad_norm": 1.0808891750009542, + "learning_rate": 1.9253053111057256e-05, + "loss": 0.5594, + "step": 8687 + }, + { + "epoch": 0.1501244124965441, + "grad_norm": 1.074330755926273, + "learning_rate": 1.9252840864593715e-05, + "loss": 0.8622, + "step": 8688 + }, + { + "epoch": 0.150141692009953, + "grad_norm": 1.1102098221646894, + "learning_rate": 1.9252628589149568e-05, + "loss": 0.707, + "step": 8689 + }, + { + "epoch": 0.1501589715233619, + "grad_norm": 1.000369173329087, + "learning_rate": 1.9252416284725486e-05, + "loss": 0.5788, + "step": 8690 + }, + { + "epoch": 0.1501762510367708, + "grad_norm": 1.2441432574797175, + "learning_rate": 1.9252203951322134e-05, + "loss": 0.7353, + "step": 8691 + }, + { + "epoch": 0.1501935305501797, + "grad_norm": 0.527794827352565, + "learning_rate": 1.925199158894018e-05, + "loss": 0.5702, + "step": 8692 + }, + { + "epoch": 0.1502108100635886, + "grad_norm": 0.6321953316807051, + "learning_rate": 1.9251779197580278e-05, + "loss": 0.4106, + "step": 8693 + }, + { + "epoch": 0.15022808957699751, + "grad_norm": 0.7971378747253398, + "learning_rate": 1.9251566777243105e-05, + "loss": 0.6641, + "step": 8694 + }, + { + "epoch": 0.15024536909040642, + "grad_norm": 0.7325949704669109, + "learning_rate": 1.9251354327929318e-05, + "loss": 0.6043, + "step": 8695 + }, + { + "epoch": 0.15026264860381533, + "grad_norm": 1.230272420130609, + "learning_rate": 1.925114184963959e-05, + "loss": 0.567, + "step": 8696 + }, + { + "epoch": 0.1502799281172242, + "grad_norm": 0.9669591474963533, + "learning_rate": 1.925092934237458e-05, + "loss": 0.574, + "step": 8697 + }, + { + "epoch": 0.15029720763063312, + "grad_norm": 0.9555927333196476, + "learning_rate": 1.925071680613496e-05, + "loss": 0.4822, + "step": 8698 + }, + { + "epoch": 0.15031448714404202, + "grad_norm": 0.9743732353061733, + "learning_rate": 1.925050424092139e-05, + "loss": 0.757, + "step": 8699 + }, + { + "epoch": 0.15033176665745093, + "grad_norm": 0.900134318945845, + "learning_rate": 1.9250291646734537e-05, + "loss": 0.5869, + "step": 8700 + }, + { + "epoch": 0.15034904617085984, + "grad_norm": 1.3478604405534542, + "learning_rate": 1.9250079023575068e-05, + "loss": 0.6115, + "step": 8701 + }, + { + "epoch": 0.15036632568426872, + "grad_norm": 1.0895576298762588, + "learning_rate": 1.9249866371443646e-05, + "loss": 0.6113, + "step": 8702 + }, + { + "epoch": 0.15038360519767763, + "grad_norm": 0.926919778099718, + "learning_rate": 1.9249653690340944e-05, + "loss": 0.6361, + "step": 8703 + }, + { + "epoch": 0.15040088471108654, + "grad_norm": 1.0406989083544287, + "learning_rate": 1.9249440980267623e-05, + "loss": 0.5394, + "step": 8704 + }, + { + "epoch": 0.15041816422449544, + "grad_norm": 0.7204314394484378, + "learning_rate": 1.9249228241224347e-05, + "loss": 0.6801, + "step": 8705 + }, + { + "epoch": 0.15043544373790435, + "grad_norm": 1.061413944903299, + "learning_rate": 1.9249015473211784e-05, + "loss": 0.7177, + "step": 8706 + }, + { + "epoch": 0.15045272325131323, + "grad_norm": 1.0639116364010675, + "learning_rate": 1.9248802676230607e-05, + "loss": 0.6684, + "step": 8707 + }, + { + "epoch": 0.15047000276472214, + "grad_norm": 0.8262343491480458, + "learning_rate": 1.9248589850281475e-05, + "loss": 0.622, + "step": 8708 + }, + { + "epoch": 0.15048728227813105, + "grad_norm": 0.8251723303374525, + "learning_rate": 1.9248376995365053e-05, + "loss": 0.5007, + "step": 8709 + }, + { + "epoch": 0.15050456179153995, + "grad_norm": 1.4421232500774088, + "learning_rate": 1.9248164111482016e-05, + "loss": 0.7724, + "step": 8710 + }, + { + "epoch": 0.15052184130494886, + "grad_norm": 0.8720733649760248, + "learning_rate": 1.9247951198633026e-05, + "loss": 0.5858, + "step": 8711 + }, + { + "epoch": 0.15053912081835774, + "grad_norm": 1.0504324893601187, + "learning_rate": 1.9247738256818752e-05, + "loss": 0.6097, + "step": 8712 + }, + { + "epoch": 0.15055640033176665, + "grad_norm": 0.8690710161299195, + "learning_rate": 1.9247525286039855e-05, + "loss": 0.601, + "step": 8713 + }, + { + "epoch": 0.15057367984517556, + "grad_norm": 1.2244178151730771, + "learning_rate": 1.9247312286297005e-05, + "loss": 0.5735, + "step": 8714 + }, + { + "epoch": 0.15059095935858446, + "grad_norm": 0.9494089383701054, + "learning_rate": 1.9247099257590872e-05, + "loss": 0.571, + "step": 8715 + }, + { + "epoch": 0.15060823887199337, + "grad_norm": 0.8071520372735738, + "learning_rate": 1.9246886199922122e-05, + "loss": 0.7138, + "step": 8716 + }, + { + "epoch": 0.15062551838540228, + "grad_norm": 0.8388883206932699, + "learning_rate": 1.9246673113291422e-05, + "loss": 0.5256, + "step": 8717 + }, + { + "epoch": 0.15064279789881116, + "grad_norm": 0.4220166118926012, + "learning_rate": 1.9246459997699438e-05, + "loss": 0.556, + "step": 8718 + }, + { + "epoch": 0.15066007741222007, + "grad_norm": 1.4125544107869528, + "learning_rate": 1.9246246853146836e-05, + "loss": 0.5908, + "step": 8719 + }, + { + "epoch": 0.15067735692562897, + "grad_norm": 1.4074073349546234, + "learning_rate": 1.9246033679634292e-05, + "loss": 0.7517, + "step": 8720 + }, + { + "epoch": 0.15069463643903788, + "grad_norm": 0.9449716094772725, + "learning_rate": 1.924582047716246e-05, + "loss": 0.6081, + "step": 8721 + }, + { + "epoch": 0.1507119159524468, + "grad_norm": 1.2994451644330054, + "learning_rate": 1.9245607245732027e-05, + "loss": 0.8212, + "step": 8722 + }, + { + "epoch": 0.15072919546585567, + "grad_norm": 0.5332013369445376, + "learning_rate": 1.924539398534364e-05, + "loss": 0.4273, + "step": 8723 + }, + { + "epoch": 0.15074647497926458, + "grad_norm": 0.8818158418689591, + "learning_rate": 1.924518069599798e-05, + "loss": 0.6883, + "step": 8724 + }, + { + "epoch": 0.15076375449267349, + "grad_norm": 0.9099268746098521, + "learning_rate": 1.924496737769571e-05, + "loss": 0.5869, + "step": 8725 + }, + { + "epoch": 0.1507810340060824, + "grad_norm": 0.970088892455643, + "learning_rate": 1.92447540304375e-05, + "loss": 0.5001, + "step": 8726 + }, + { + "epoch": 0.1507983135194913, + "grad_norm": 1.050262086122474, + "learning_rate": 1.9244540654224018e-05, + "loss": 0.57, + "step": 8727 + }, + { + "epoch": 0.15081559303290018, + "grad_norm": 0.9901293941436468, + "learning_rate": 1.9244327249055932e-05, + "loss": 0.5976, + "step": 8728 + }, + { + "epoch": 0.1508328725463091, + "grad_norm": 0.9424385111495128, + "learning_rate": 1.9244113814933912e-05, + "loss": 0.6203, + "step": 8729 + }, + { + "epoch": 0.150850152059718, + "grad_norm": 1.032123433537072, + "learning_rate": 1.9243900351858622e-05, + "loss": 0.634, + "step": 8730 + }, + { + "epoch": 0.1508674315731269, + "grad_norm": 1.3812620947978465, + "learning_rate": 1.9243686859830736e-05, + "loss": 0.8365, + "step": 8731 + }, + { + "epoch": 0.1508847110865358, + "grad_norm": 1.3151726570128819, + "learning_rate": 1.924347333885092e-05, + "loss": 0.6349, + "step": 8732 + }, + { + "epoch": 0.15090199059994472, + "grad_norm": 1.1150681841240837, + "learning_rate": 1.924325978891984e-05, + "loss": 0.7004, + "step": 8733 + }, + { + "epoch": 0.1509192701133536, + "grad_norm": 1.2808338310451184, + "learning_rate": 1.924304621003817e-05, + "loss": 0.6936, + "step": 8734 + }, + { + "epoch": 0.1509365496267625, + "grad_norm": 0.9271561357542494, + "learning_rate": 1.924283260220658e-05, + "loss": 0.5305, + "step": 8735 + }, + { + "epoch": 0.1509538291401714, + "grad_norm": 0.7761006351899025, + "learning_rate": 1.924261896542573e-05, + "loss": 0.4382, + "step": 8736 + }, + { + "epoch": 0.15097110865358032, + "grad_norm": 1.435440048492759, + "learning_rate": 1.92424052996963e-05, + "loss": 0.6674, + "step": 8737 + }, + { + "epoch": 0.15098838816698923, + "grad_norm": 0.9314038967685451, + "learning_rate": 1.9242191605018953e-05, + "loss": 0.6717, + "step": 8738 + }, + { + "epoch": 0.1510056676803981, + "grad_norm": 1.1123813030548355, + "learning_rate": 1.924197788139436e-05, + "loss": 0.4625, + "step": 8739 + }, + { + "epoch": 0.15102294719380702, + "grad_norm": 0.9095443469007608, + "learning_rate": 1.924176412882319e-05, + "loss": 0.678, + "step": 8740 + }, + { + "epoch": 0.15104022670721592, + "grad_norm": 0.5363871158119699, + "learning_rate": 1.924155034730611e-05, + "loss": 0.5241, + "step": 8741 + }, + { + "epoch": 0.15105750622062483, + "grad_norm": 1.0455760007681842, + "learning_rate": 1.924133653684379e-05, + "loss": 0.7192, + "step": 8742 + }, + { + "epoch": 0.15107478573403374, + "grad_norm": 0.9232897918433225, + "learning_rate": 1.9241122697436908e-05, + "loss": 0.5727, + "step": 8743 + }, + { + "epoch": 0.15109206524744262, + "grad_norm": 0.901808174156652, + "learning_rate": 1.924090882908612e-05, + "loss": 0.6385, + "step": 8744 + }, + { + "epoch": 0.15110934476085153, + "grad_norm": 1.16690807792207, + "learning_rate": 1.9240694931792113e-05, + "loss": 0.7093, + "step": 8745 + }, + { + "epoch": 0.15112662427426043, + "grad_norm": 1.2413935928838358, + "learning_rate": 1.9240481005555538e-05, + "loss": 0.643, + "step": 8746 + }, + { + "epoch": 0.15114390378766934, + "grad_norm": 0.8959495692893263, + "learning_rate": 1.924026705037708e-05, + "loss": 0.5672, + "step": 8747 + }, + { + "epoch": 0.15116118330107825, + "grad_norm": 0.8851071934161732, + "learning_rate": 1.92400530662574e-05, + "loss": 0.6459, + "step": 8748 + }, + { + "epoch": 0.15117846281448716, + "grad_norm": 1.275781911908415, + "learning_rate": 1.9239839053197174e-05, + "loss": 0.7649, + "step": 8749 + }, + { + "epoch": 0.15119574232789604, + "grad_norm": 1.4045683035257062, + "learning_rate": 1.9239625011197067e-05, + "loss": 0.5715, + "step": 8750 + }, + { + "epoch": 0.15121302184130495, + "grad_norm": 0.9702233419360871, + "learning_rate": 1.9239410940257754e-05, + "loss": 0.5792, + "step": 8751 + }, + { + "epoch": 0.15123030135471385, + "grad_norm": 1.564957463234469, + "learning_rate": 1.9239196840379906e-05, + "loss": 0.6927, + "step": 8752 + }, + { + "epoch": 0.15124758086812276, + "grad_norm": 1.0894089559859976, + "learning_rate": 1.9238982711564187e-05, + "loss": 0.6009, + "step": 8753 + }, + { + "epoch": 0.15126486038153167, + "grad_norm": 0.9242420131101201, + "learning_rate": 1.9238768553811275e-05, + "loss": 0.551, + "step": 8754 + }, + { + "epoch": 0.15128213989494055, + "grad_norm": 1.2084383860644692, + "learning_rate": 1.9238554367121836e-05, + "loss": 0.5247, + "step": 8755 + }, + { + "epoch": 0.15129941940834946, + "grad_norm": 1.1670728154821997, + "learning_rate": 1.9238340151496543e-05, + "loss": 0.5043, + "step": 8756 + }, + { + "epoch": 0.15131669892175836, + "grad_norm": 0.9970044795105258, + "learning_rate": 1.923812590693607e-05, + "loss": 0.8757, + "step": 8757 + }, + { + "epoch": 0.15133397843516727, + "grad_norm": 0.9023766467610115, + "learning_rate": 1.923791163344108e-05, + "loss": 0.7257, + "step": 8758 + }, + { + "epoch": 0.15135125794857618, + "grad_norm": 0.729634803347854, + "learning_rate": 1.9237697331012252e-05, + "loss": 0.4703, + "step": 8759 + }, + { + "epoch": 0.15136853746198506, + "grad_norm": 1.0627556614042677, + "learning_rate": 1.9237482999650254e-05, + "loss": 0.5045, + "step": 8760 + }, + { + "epoch": 0.15138581697539397, + "grad_norm": 0.7117212104107771, + "learning_rate": 1.923726863935575e-05, + "loss": 0.4692, + "step": 8761 + }, + { + "epoch": 0.15140309648880287, + "grad_norm": 1.2438323758958696, + "learning_rate": 1.923705425012943e-05, + "loss": 0.4319, + "step": 8762 + }, + { + "epoch": 0.15142037600221178, + "grad_norm": 0.6926201223766383, + "learning_rate": 1.9236839831971944e-05, + "loss": 0.712, + "step": 8763 + }, + { + "epoch": 0.1514376555156207, + "grad_norm": 1.1443916243319643, + "learning_rate": 1.923662538488398e-05, + "loss": 0.6581, + "step": 8764 + }, + { + "epoch": 0.15145493502902957, + "grad_norm": 0.49311274682926626, + "learning_rate": 1.92364109088662e-05, + "loss": 0.758, + "step": 8765 + }, + { + "epoch": 0.15147221454243848, + "grad_norm": 1.0255725967137026, + "learning_rate": 1.9236196403919282e-05, + "loss": 0.6305, + "step": 8766 + }, + { + "epoch": 0.15148949405584738, + "grad_norm": 0.41414068481033994, + "learning_rate": 1.9235981870043893e-05, + "loss": 0.5495, + "step": 8767 + }, + { + "epoch": 0.1515067735692563, + "grad_norm": 0.9885964598401417, + "learning_rate": 1.9235767307240706e-05, + "loss": 0.6895, + "step": 8768 + }, + { + "epoch": 0.1515240530826652, + "grad_norm": 1.3261206019473866, + "learning_rate": 1.9235552715510396e-05, + "loss": 0.7493, + "step": 8769 + }, + { + "epoch": 0.1515413325960741, + "grad_norm": 0.9781884301605361, + "learning_rate": 1.9235338094853632e-05, + "loss": 0.6679, + "step": 8770 + }, + { + "epoch": 0.151558612109483, + "grad_norm": 1.4317256539071124, + "learning_rate": 1.9235123445271083e-05, + "loss": 0.7116, + "step": 8771 + }, + { + "epoch": 0.1515758916228919, + "grad_norm": 0.8896724010673066, + "learning_rate": 1.9234908766763433e-05, + "loss": 0.5961, + "step": 8772 + }, + { + "epoch": 0.1515931711363008, + "grad_norm": 1.7630107712668583, + "learning_rate": 1.9234694059331342e-05, + "loss": 0.8303, + "step": 8773 + }, + { + "epoch": 0.1516104506497097, + "grad_norm": 0.7194558738306254, + "learning_rate": 1.923447932297549e-05, + "loss": 0.4313, + "step": 8774 + }, + { + "epoch": 0.15162773016311862, + "grad_norm": 1.1361013932238422, + "learning_rate": 1.9234264557696546e-05, + "loss": 0.5992, + "step": 8775 + }, + { + "epoch": 0.1516450096765275, + "grad_norm": 1.1003707546256556, + "learning_rate": 1.9234049763495183e-05, + "loss": 0.5018, + "step": 8776 + }, + { + "epoch": 0.1516622891899364, + "grad_norm": 0.8918173148914629, + "learning_rate": 1.9233834940372077e-05, + "loss": 0.3982, + "step": 8777 + }, + { + "epoch": 0.1516795687033453, + "grad_norm": 1.2400915703895141, + "learning_rate": 1.9233620088327894e-05, + "loss": 0.6119, + "step": 8778 + }, + { + "epoch": 0.15169684821675422, + "grad_norm": 1.1233789943041645, + "learning_rate": 1.9233405207363316e-05, + "loss": 0.7538, + "step": 8779 + }, + { + "epoch": 0.15171412773016313, + "grad_norm": 1.1565745444302018, + "learning_rate": 1.9233190297479008e-05, + "loss": 0.6894, + "step": 8780 + }, + { + "epoch": 0.151731407243572, + "grad_norm": 1.0489426127673058, + "learning_rate": 1.9232975358675646e-05, + "loss": 0.8163, + "step": 8781 + }, + { + "epoch": 0.15174868675698092, + "grad_norm": 0.9882955696563934, + "learning_rate": 1.9232760390953905e-05, + "loss": 0.896, + "step": 8782 + }, + { + "epoch": 0.15176596627038982, + "grad_norm": 1.2535432629778098, + "learning_rate": 1.9232545394314456e-05, + "loss": 0.8519, + "step": 8783 + }, + { + "epoch": 0.15178324578379873, + "grad_norm": 1.1760912874085407, + "learning_rate": 1.9232330368757973e-05, + "loss": 0.9207, + "step": 8784 + }, + { + "epoch": 0.15180052529720764, + "grad_norm": 0.4277331308828707, + "learning_rate": 1.923211531428513e-05, + "loss": 0.448, + "step": 8785 + }, + { + "epoch": 0.15181780481061655, + "grad_norm": 0.8899675064423587, + "learning_rate": 1.9231900230896602e-05, + "loss": 0.7031, + "step": 8786 + }, + { + "epoch": 0.15183508432402543, + "grad_norm": 0.9657708155665893, + "learning_rate": 1.923168511859306e-05, + "loss": 0.5429, + "step": 8787 + }, + { + "epoch": 0.15185236383743433, + "grad_norm": 0.8855935240443941, + "learning_rate": 1.9231469977375176e-05, + "loss": 0.4765, + "step": 8788 + }, + { + "epoch": 0.15186964335084324, + "grad_norm": 0.9251935537820712, + "learning_rate": 1.923125480724363e-05, + "loss": 0.7478, + "step": 8789 + }, + { + "epoch": 0.15188692286425215, + "grad_norm": 0.6788904771112154, + "learning_rate": 1.9231039608199088e-05, + "loss": 0.665, + "step": 8790 + }, + { + "epoch": 0.15190420237766106, + "grad_norm": 0.7217702963918121, + "learning_rate": 1.9230824380242232e-05, + "loss": 0.5021, + "step": 8791 + }, + { + "epoch": 0.15192148189106994, + "grad_norm": 0.6846313775350263, + "learning_rate": 1.923060912337373e-05, + "loss": 0.6055, + "step": 8792 + }, + { + "epoch": 0.15193876140447884, + "grad_norm": 0.6878711898731441, + "learning_rate": 1.9230393837594265e-05, + "loss": 0.7341, + "step": 8793 + }, + { + "epoch": 0.15195604091788775, + "grad_norm": 0.9657807028957495, + "learning_rate": 1.9230178522904498e-05, + "loss": 0.5723, + "step": 8794 + }, + { + "epoch": 0.15197332043129666, + "grad_norm": 0.8561302208855384, + "learning_rate": 1.9229963179305114e-05, + "loss": 0.8033, + "step": 8795 + }, + { + "epoch": 0.15199059994470557, + "grad_norm": 0.5757998323852819, + "learning_rate": 1.922974780679678e-05, + "loss": 0.8403, + "step": 8796 + }, + { + "epoch": 0.15200787945811445, + "grad_norm": 1.2239779641492268, + "learning_rate": 1.9229532405380177e-05, + "loss": 0.6389, + "step": 8797 + }, + { + "epoch": 0.15202515897152336, + "grad_norm": 0.7005385470699536, + "learning_rate": 1.9229316975055976e-05, + "loss": 0.5843, + "step": 8798 + }, + { + "epoch": 0.15204243848493226, + "grad_norm": 1.2350309929924315, + "learning_rate": 1.922910151582486e-05, + "loss": 0.5477, + "step": 8799 + }, + { + "epoch": 0.15205971799834117, + "grad_norm": 1.2371841664780352, + "learning_rate": 1.9228886027687488e-05, + "loss": 0.6403, + "step": 8800 + }, + { + "epoch": 0.15207699751175008, + "grad_norm": 1.1765173996857605, + "learning_rate": 1.9228670510644545e-05, + "loss": 0.6476, + "step": 8801 + }, + { + "epoch": 0.15209427702515896, + "grad_norm": 0.7752660154868111, + "learning_rate": 1.9228454964696704e-05, + "loss": 0.5244, + "step": 8802 + }, + { + "epoch": 0.15211155653856787, + "grad_norm": 0.5578126137118684, + "learning_rate": 1.9228239389844644e-05, + "loss": 0.6903, + "step": 8803 + }, + { + "epoch": 0.15212883605197677, + "grad_norm": 0.36576364064617184, + "learning_rate": 1.9228023786089034e-05, + "loss": 0.6513, + "step": 8804 + }, + { + "epoch": 0.15214611556538568, + "grad_norm": 0.7935128415216143, + "learning_rate": 1.9227808153430553e-05, + "loss": 0.4227, + "step": 8805 + }, + { + "epoch": 0.1521633950787946, + "grad_norm": 0.9446520379015878, + "learning_rate": 1.9227592491869878e-05, + "loss": 0.6186, + "step": 8806 + }, + { + "epoch": 0.1521806745922035, + "grad_norm": 1.316802863208861, + "learning_rate": 1.922737680140768e-05, + "loss": 0.5597, + "step": 8807 + }, + { + "epoch": 0.15219795410561238, + "grad_norm": 1.1392045991623898, + "learning_rate": 1.9227161082044636e-05, + "loss": 0.7518, + "step": 8808 + }, + { + "epoch": 0.15221523361902128, + "grad_norm": 0.7039143596277453, + "learning_rate": 1.9226945333781424e-05, + "loss": 0.5351, + "step": 8809 + }, + { + "epoch": 0.1522325131324302, + "grad_norm": 0.7573556599449879, + "learning_rate": 1.922672955661872e-05, + "loss": 0.6775, + "step": 8810 + }, + { + "epoch": 0.1522497926458391, + "grad_norm": 0.7848824109779052, + "learning_rate": 1.922651375055719e-05, + "loss": 0.835, + "step": 8811 + }, + { + "epoch": 0.152267072159248, + "grad_norm": 0.8869252166602493, + "learning_rate": 1.9226297915597524e-05, + "loss": 0.4559, + "step": 8812 + }, + { + "epoch": 0.1522843516726569, + "grad_norm": 0.957827569077347, + "learning_rate": 1.922608205174039e-05, + "loss": 0.7096, + "step": 8813 + }, + { + "epoch": 0.1523016311860658, + "grad_norm": 0.634752062231683, + "learning_rate": 1.9225866158986467e-05, + "loss": 0.4803, + "step": 8814 + }, + { + "epoch": 0.1523189106994747, + "grad_norm": 0.6929948910064289, + "learning_rate": 1.9225650237336427e-05, + "loss": 0.6422, + "step": 8815 + }, + { + "epoch": 0.1523361902128836, + "grad_norm": 1.1018462584387902, + "learning_rate": 1.9225434286790957e-05, + "loss": 0.85, + "step": 8816 + }, + { + "epoch": 0.15235346972629252, + "grad_norm": 0.8529908032921647, + "learning_rate": 1.9225218307350715e-05, + "loss": 0.6191, + "step": 8817 + }, + { + "epoch": 0.1523707492397014, + "grad_norm": 0.9194510881235536, + "learning_rate": 1.9225002299016395e-05, + "loss": 0.488, + "step": 8818 + }, + { + "epoch": 0.1523880287531103, + "grad_norm": 0.9121986643623475, + "learning_rate": 1.9224786261788666e-05, + "loss": 0.6508, + "step": 8819 + }, + { + "epoch": 0.1524053082665192, + "grad_norm": 1.175234265894662, + "learning_rate": 1.9224570195668202e-05, + "loss": 0.5893, + "step": 8820 + }, + { + "epoch": 0.15242258777992812, + "grad_norm": 0.788684426441643, + "learning_rate": 1.922435410065569e-05, + "loss": 0.5392, + "step": 8821 + }, + { + "epoch": 0.15243986729333703, + "grad_norm": 0.6795355856219871, + "learning_rate": 1.9224137976751797e-05, + "loss": 0.5179, + "step": 8822 + }, + { + "epoch": 0.15245714680674594, + "grad_norm": 1.1033537913035703, + "learning_rate": 1.92239218239572e-05, + "loss": 0.7626, + "step": 8823 + }, + { + "epoch": 0.15247442632015482, + "grad_norm": 1.1341852918037094, + "learning_rate": 1.922370564227258e-05, + "loss": 0.7169, + "step": 8824 + }, + { + "epoch": 0.15249170583356372, + "grad_norm": 1.3751413321504644, + "learning_rate": 1.9223489431698615e-05, + "loss": 0.822, + "step": 8825 + }, + { + "epoch": 0.15250898534697263, + "grad_norm": 1.045304769861345, + "learning_rate": 1.9223273192235976e-05, + "loss": 0.669, + "step": 8826 + }, + { + "epoch": 0.15252626486038154, + "grad_norm": 0.7011077657635338, + "learning_rate": 1.922305692388535e-05, + "loss": 0.5545, + "step": 8827 + }, + { + "epoch": 0.15254354437379045, + "grad_norm": 0.9518786302046587, + "learning_rate": 1.9222840626647403e-05, + "loss": 0.8591, + "step": 8828 + }, + { + "epoch": 0.15256082388719933, + "grad_norm": 1.3633221600840255, + "learning_rate": 1.922262430052282e-05, + "loss": 0.9107, + "step": 8829 + }, + { + "epoch": 0.15257810340060823, + "grad_norm": 0.6365797243379848, + "learning_rate": 1.9222407945512277e-05, + "loss": 0.3745, + "step": 8830 + }, + { + "epoch": 0.15259538291401714, + "grad_norm": 0.7946794459029962, + "learning_rate": 1.9222191561616453e-05, + "loss": 0.764, + "step": 8831 + }, + { + "epoch": 0.15261266242742605, + "grad_norm": 1.2724163667034354, + "learning_rate": 1.9221975148836023e-05, + "loss": 0.6273, + "step": 8832 + }, + { + "epoch": 0.15262994194083496, + "grad_norm": 1.0012036489624159, + "learning_rate": 1.9221758707171668e-05, + "loss": 0.5224, + "step": 8833 + }, + { + "epoch": 0.15264722145424384, + "grad_norm": 0.9367445220600625, + "learning_rate": 1.9221542236624062e-05, + "loss": 0.8398, + "step": 8834 + }, + { + "epoch": 0.15266450096765274, + "grad_norm": 0.9416705560837997, + "learning_rate": 1.9221325737193886e-05, + "loss": 0.7088, + "step": 8835 + }, + { + "epoch": 0.15268178048106165, + "grad_norm": 0.6363738168377354, + "learning_rate": 1.9221109208881814e-05, + "loss": 0.6268, + "step": 8836 + }, + { + "epoch": 0.15269905999447056, + "grad_norm": 1.4241863245165203, + "learning_rate": 1.922089265168853e-05, + "loss": 0.7876, + "step": 8837 + }, + { + "epoch": 0.15271633950787947, + "grad_norm": 0.9610323641462302, + "learning_rate": 1.9220676065614707e-05, + "loss": 0.5505, + "step": 8838 + }, + { + "epoch": 0.15273361902128835, + "grad_norm": 0.7959006590523082, + "learning_rate": 1.9220459450661028e-05, + "loss": 0.6523, + "step": 8839 + }, + { + "epoch": 0.15275089853469725, + "grad_norm": 1.4881700019922919, + "learning_rate": 1.922024280682817e-05, + "loss": 0.69, + "step": 8840 + }, + { + "epoch": 0.15276817804810616, + "grad_norm": 0.5839692212907536, + "learning_rate": 1.922002613411681e-05, + "loss": 0.5723, + "step": 8841 + }, + { + "epoch": 0.15278545756151507, + "grad_norm": 1.120503507316995, + "learning_rate": 1.9219809432527627e-05, + "loss": 0.5695, + "step": 8842 + }, + { + "epoch": 0.15280273707492398, + "grad_norm": 0.569504665127084, + "learning_rate": 1.92195927020613e-05, + "loss": 0.8941, + "step": 8843 + }, + { + "epoch": 0.15282001658833289, + "grad_norm": 1.1139737202075104, + "learning_rate": 1.9219375942718508e-05, + "loss": 0.5134, + "step": 8844 + }, + { + "epoch": 0.15283729610174177, + "grad_norm": 0.4173655147638708, + "learning_rate": 1.9219159154499933e-05, + "loss": 0.5063, + "step": 8845 + }, + { + "epoch": 0.15285457561515067, + "grad_norm": 1.9246443229095265, + "learning_rate": 1.9218942337406245e-05, + "loss": 0.643, + "step": 8846 + }, + { + "epoch": 0.15287185512855958, + "grad_norm": 0.8045127272479261, + "learning_rate": 1.9218725491438133e-05, + "loss": 0.5337, + "step": 8847 + }, + { + "epoch": 0.1528891346419685, + "grad_norm": 1.288555293196116, + "learning_rate": 1.921850861659627e-05, + "loss": 0.6076, + "step": 8848 + }, + { + "epoch": 0.1529064141553774, + "grad_norm": 0.8561333014393345, + "learning_rate": 1.9218291712881337e-05, + "loss": 0.5002, + "step": 8849 + }, + { + "epoch": 0.15292369366878628, + "grad_norm": 0.935167827979591, + "learning_rate": 1.921807478029402e-05, + "loss": 0.5282, + "step": 8850 + }, + { + "epoch": 0.15294097318219518, + "grad_norm": 1.5198452088893997, + "learning_rate": 1.9217857818834983e-05, + "loss": 0.5995, + "step": 8851 + }, + { + "epoch": 0.1529582526956041, + "grad_norm": 0.772400096125955, + "learning_rate": 1.921764082850492e-05, + "loss": 0.8615, + "step": 8852 + }, + { + "epoch": 0.152975532209013, + "grad_norm": 0.7366117350268087, + "learning_rate": 1.9217423809304504e-05, + "loss": 0.4293, + "step": 8853 + }, + { + "epoch": 0.1529928117224219, + "grad_norm": 0.9004129619728175, + "learning_rate": 1.9217206761234418e-05, + "loss": 0.5161, + "step": 8854 + }, + { + "epoch": 0.1530100912358308, + "grad_norm": 1.010801441702086, + "learning_rate": 1.921698968429534e-05, + "loss": 0.6937, + "step": 8855 + }, + { + "epoch": 0.1530273707492397, + "grad_norm": 0.8548363698091194, + "learning_rate": 1.9216772578487947e-05, + "loss": 0.5207, + "step": 8856 + }, + { + "epoch": 0.1530446502626486, + "grad_norm": 0.847349422002707, + "learning_rate": 1.9216555443812927e-05, + "loss": 0.4829, + "step": 8857 + }, + { + "epoch": 0.1530619297760575, + "grad_norm": 1.402273018240709, + "learning_rate": 1.921633828027095e-05, + "loss": 0.764, + "step": 8858 + }, + { + "epoch": 0.15307920928946642, + "grad_norm": 1.1329435393693164, + "learning_rate": 1.9216121087862702e-05, + "loss": 0.7982, + "step": 8859 + }, + { + "epoch": 0.15309648880287532, + "grad_norm": 0.9939545408924508, + "learning_rate": 1.9215903866588863e-05, + "loss": 0.6607, + "step": 8860 + }, + { + "epoch": 0.1531137683162842, + "grad_norm": 0.6680280860658433, + "learning_rate": 1.9215686616450113e-05, + "loss": 0.6653, + "step": 8861 + }, + { + "epoch": 0.1531310478296931, + "grad_norm": 0.9231112739966844, + "learning_rate": 1.9215469337447133e-05, + "loss": 0.6863, + "step": 8862 + }, + { + "epoch": 0.15314832734310202, + "grad_norm": 0.8908086737626311, + "learning_rate": 1.9215252029580604e-05, + "loss": 0.618, + "step": 8863 + }, + { + "epoch": 0.15316560685651093, + "grad_norm": 1.0993747954466324, + "learning_rate": 1.9215034692851197e-05, + "loss": 0.5369, + "step": 8864 + }, + { + "epoch": 0.15318288636991984, + "grad_norm": 1.3934367401881287, + "learning_rate": 1.921481732725961e-05, + "loss": 0.7882, + "step": 8865 + }, + { + "epoch": 0.15320016588332871, + "grad_norm": 0.8277368022623995, + "learning_rate": 1.9214599932806513e-05, + "loss": 0.716, + "step": 8866 + }, + { + "epoch": 0.15321744539673762, + "grad_norm": 1.2451407482994754, + "learning_rate": 1.9214382509492585e-05, + "loss": 0.5031, + "step": 8867 + }, + { + "epoch": 0.15323472491014653, + "grad_norm": 0.8968552732013748, + "learning_rate": 1.9214165057318514e-05, + "loss": 0.6477, + "step": 8868 + }, + { + "epoch": 0.15325200442355544, + "grad_norm": 1.0841375810469842, + "learning_rate": 1.9213947576284976e-05, + "loss": 0.5412, + "step": 8869 + }, + { + "epoch": 0.15326928393696435, + "grad_norm": 0.9488464541597249, + "learning_rate": 1.9213730066392656e-05, + "loss": 0.5296, + "step": 8870 + }, + { + "epoch": 0.15328656345037323, + "grad_norm": 1.0840821210992864, + "learning_rate": 1.921351252764223e-05, + "loss": 0.8916, + "step": 8871 + }, + { + "epoch": 0.15330384296378213, + "grad_norm": 1.007734349941042, + "learning_rate": 1.9213294960034384e-05, + "loss": 0.5849, + "step": 8872 + }, + { + "epoch": 0.15332112247719104, + "grad_norm": 0.9380509407243967, + "learning_rate": 1.92130773635698e-05, + "loss": 0.4357, + "step": 8873 + }, + { + "epoch": 0.15333840199059995, + "grad_norm": 0.9725868723933541, + "learning_rate": 1.9212859738249158e-05, + "loss": 0.5684, + "step": 8874 + }, + { + "epoch": 0.15335568150400886, + "grad_norm": 0.9952951343244781, + "learning_rate": 1.9212642084073137e-05, + "loss": 0.6636, + "step": 8875 + }, + { + "epoch": 0.15337296101741774, + "grad_norm": 1.1171552182328788, + "learning_rate": 1.921242440104242e-05, + "loss": 0.5346, + "step": 8876 + }, + { + "epoch": 0.15339024053082664, + "grad_norm": 1.0358200990455966, + "learning_rate": 1.921220668915769e-05, + "loss": 0.6531, + "step": 8877 + }, + { + "epoch": 0.15340752004423555, + "grad_norm": 0.8222385209216609, + "learning_rate": 1.9211988948419632e-05, + "loss": 0.6635, + "step": 8878 + }, + { + "epoch": 0.15342479955764446, + "grad_norm": 0.9111509976198914, + "learning_rate": 1.9211771178828918e-05, + "loss": 0.6056, + "step": 8879 + }, + { + "epoch": 0.15344207907105337, + "grad_norm": 1.1721597747776298, + "learning_rate": 1.9211553380386243e-05, + "loss": 0.8202, + "step": 8880 + }, + { + "epoch": 0.15345935858446227, + "grad_norm": 0.7012202701833051, + "learning_rate": 1.921133555309228e-05, + "loss": 0.6007, + "step": 8881 + }, + { + "epoch": 0.15347663809787115, + "grad_norm": 0.5882816798080234, + "learning_rate": 1.9211117696947714e-05, + "loss": 0.5863, + "step": 8882 + }, + { + "epoch": 0.15349391761128006, + "grad_norm": 1.0932213219995712, + "learning_rate": 1.9210899811953227e-05, + "loss": 0.6069, + "step": 8883 + }, + { + "epoch": 0.15351119712468897, + "grad_norm": 0.707994433368973, + "learning_rate": 1.9210681898109502e-05, + "loss": 0.4658, + "step": 8884 + }, + { + "epoch": 0.15352847663809788, + "grad_norm": 0.9035235394173541, + "learning_rate": 1.9210463955417224e-05, + "loss": 0.6522, + "step": 8885 + }, + { + "epoch": 0.15354575615150678, + "grad_norm": 1.500418645923913, + "learning_rate": 1.921024598387707e-05, + "loss": 0.7092, + "step": 8886 + }, + { + "epoch": 0.15356303566491566, + "grad_norm": 0.447404050810499, + "learning_rate": 1.9210027983489726e-05, + "loss": 0.4822, + "step": 8887 + }, + { + "epoch": 0.15358031517832457, + "grad_norm": 1.1807394998886844, + "learning_rate": 1.9209809954255875e-05, + "loss": 0.6372, + "step": 8888 + }, + { + "epoch": 0.15359759469173348, + "grad_norm": 1.5901705603931955, + "learning_rate": 1.9209591896176198e-05, + "loss": 0.6505, + "step": 8889 + }, + { + "epoch": 0.1536148742051424, + "grad_norm": 0.4001947867269083, + "learning_rate": 1.920937380925138e-05, + "loss": 0.483, + "step": 8890 + }, + { + "epoch": 0.1536321537185513, + "grad_norm": 1.210817662125712, + "learning_rate": 1.9209155693482106e-05, + "loss": 0.8637, + "step": 8891 + }, + { + "epoch": 0.15364943323196018, + "grad_norm": 1.0693529251475082, + "learning_rate": 1.9208937548869052e-05, + "loss": 0.7102, + "step": 8892 + }, + { + "epoch": 0.15366671274536908, + "grad_norm": 0.805593885268236, + "learning_rate": 1.920871937541291e-05, + "loss": 0.575, + "step": 8893 + }, + { + "epoch": 0.153683992258778, + "grad_norm": 0.8844770220411725, + "learning_rate": 1.9208501173114354e-05, + "loss": 0.6002, + "step": 8894 + }, + { + "epoch": 0.1537012717721869, + "grad_norm": 1.238304351860521, + "learning_rate": 1.9208282941974076e-05, + "loss": 0.8598, + "step": 8895 + }, + { + "epoch": 0.1537185512855958, + "grad_norm": 1.1078787139407318, + "learning_rate": 1.9208064681992757e-05, + "loss": 0.4518, + "step": 8896 + }, + { + "epoch": 0.1537358307990047, + "grad_norm": 1.0560052426273987, + "learning_rate": 1.9207846393171076e-05, + "loss": 0.6816, + "step": 8897 + }, + { + "epoch": 0.1537531103124136, + "grad_norm": 0.7650424640472688, + "learning_rate": 1.9207628075509723e-05, + "loss": 0.7434, + "step": 8898 + }, + { + "epoch": 0.1537703898258225, + "grad_norm": 0.6746154472584391, + "learning_rate": 1.920740972900938e-05, + "loss": 0.6089, + "step": 8899 + }, + { + "epoch": 0.1537876693392314, + "grad_norm": 0.7927173384998304, + "learning_rate": 1.920719135367073e-05, + "loss": 0.4659, + "step": 8900 + }, + { + "epoch": 0.15380494885264032, + "grad_norm": 0.9737255953979065, + "learning_rate": 1.9206972949494455e-05, + "loss": 0.6407, + "step": 8901 + }, + { + "epoch": 0.15382222836604922, + "grad_norm": 0.5468940955280369, + "learning_rate": 1.920675451648124e-05, + "loss": 0.677, + "step": 8902 + }, + { + "epoch": 0.1538395078794581, + "grad_norm": 0.6369625222698285, + "learning_rate": 1.9206536054631773e-05, + "loss": 0.5398, + "step": 8903 + }, + { + "epoch": 0.153856787392867, + "grad_norm": 1.3128406647935817, + "learning_rate": 1.9206317563946733e-05, + "loss": 0.6421, + "step": 8904 + }, + { + "epoch": 0.15387406690627592, + "grad_norm": 0.446453957153047, + "learning_rate": 1.9206099044426806e-05, + "loss": 0.7135, + "step": 8905 + }, + { + "epoch": 0.15389134641968483, + "grad_norm": 1.1705908709417379, + "learning_rate": 1.920588049607268e-05, + "loss": 0.6604, + "step": 8906 + }, + { + "epoch": 0.15390862593309373, + "grad_norm": 1.3750953552418685, + "learning_rate": 1.9205661918885036e-05, + "loss": 0.5709, + "step": 8907 + }, + { + "epoch": 0.15392590544650261, + "grad_norm": 1.0619782352985934, + "learning_rate": 1.9205443312864557e-05, + "loss": 0.5922, + "step": 8908 + }, + { + "epoch": 0.15394318495991152, + "grad_norm": 0.8667504791602452, + "learning_rate": 1.9205224678011933e-05, + "loss": 0.6251, + "step": 8909 + }, + { + "epoch": 0.15396046447332043, + "grad_norm": 0.9448169568828669, + "learning_rate": 1.9205006014327847e-05, + "loss": 0.5814, + "step": 8910 + }, + { + "epoch": 0.15397774398672934, + "grad_norm": 0.944391040389232, + "learning_rate": 1.9204787321812978e-05, + "loss": 0.5844, + "step": 8911 + }, + { + "epoch": 0.15399502350013825, + "grad_norm": 1.5148287049875724, + "learning_rate": 1.9204568600468017e-05, + "loss": 0.5088, + "step": 8912 + }, + { + "epoch": 0.15401230301354712, + "grad_norm": 1.0990973342729526, + "learning_rate": 1.9204349850293647e-05, + "loss": 0.7809, + "step": 8913 + }, + { + "epoch": 0.15402958252695603, + "grad_norm": 1.129661493639989, + "learning_rate": 1.9204131071290558e-05, + "loss": 0.7907, + "step": 8914 + }, + { + "epoch": 0.15404686204036494, + "grad_norm": 0.8449418276886774, + "learning_rate": 1.9203912263459427e-05, + "loss": 0.6779, + "step": 8915 + }, + { + "epoch": 0.15406414155377385, + "grad_norm": 0.8916993547554316, + "learning_rate": 1.9203693426800943e-05, + "loss": 0.5045, + "step": 8916 + }, + { + "epoch": 0.15408142106718276, + "grad_norm": 0.8699963829305708, + "learning_rate": 1.9203474561315794e-05, + "loss": 0.5787, + "step": 8917 + }, + { + "epoch": 0.15409870058059166, + "grad_norm": 0.8745767701289391, + "learning_rate": 1.9203255667004663e-05, + "loss": 0.5981, + "step": 8918 + }, + { + "epoch": 0.15411598009400054, + "grad_norm": 0.9898292328688146, + "learning_rate": 1.9203036743868235e-05, + "loss": 0.8619, + "step": 8919 + }, + { + "epoch": 0.15413325960740945, + "grad_norm": 0.6133890209107429, + "learning_rate": 1.9202817791907198e-05, + "loss": 0.6779, + "step": 8920 + }, + { + "epoch": 0.15415053912081836, + "grad_norm": 0.6729413818171173, + "learning_rate": 1.9202598811122236e-05, + "loss": 0.5286, + "step": 8921 + }, + { + "epoch": 0.15416781863422727, + "grad_norm": 0.7750123159930755, + "learning_rate": 1.9202379801514034e-05, + "loss": 0.7102, + "step": 8922 + }, + { + "epoch": 0.15418509814763617, + "grad_norm": 1.0927755357647202, + "learning_rate": 1.9202160763083276e-05, + "loss": 0.6391, + "step": 8923 + }, + { + "epoch": 0.15420237766104505, + "grad_norm": 0.9233603688672722, + "learning_rate": 1.9201941695830656e-05, + "loss": 0.4629, + "step": 8924 + }, + { + "epoch": 0.15421965717445396, + "grad_norm": 1.2309336967479068, + "learning_rate": 1.9201722599756853e-05, + "loss": 0.7592, + "step": 8925 + }, + { + "epoch": 0.15423693668786287, + "grad_norm": 0.45451550575131505, + "learning_rate": 1.9201503474862558e-05, + "loss": 0.7426, + "step": 8926 + }, + { + "epoch": 0.15425421620127178, + "grad_norm": 0.44886995997091333, + "learning_rate": 1.920128432114845e-05, + "loss": 0.5394, + "step": 8927 + }, + { + "epoch": 0.15427149571468068, + "grad_norm": 0.9512568625441623, + "learning_rate": 1.9201065138615223e-05, + "loss": 0.5175, + "step": 8928 + }, + { + "epoch": 0.15428877522808956, + "grad_norm": 1.08359631552478, + "learning_rate": 1.9200845927263563e-05, + "loss": 0.5716, + "step": 8929 + }, + { + "epoch": 0.15430605474149847, + "grad_norm": 0.9837667700240535, + "learning_rate": 1.920062668709415e-05, + "loss": 0.355, + "step": 8930 + }, + { + "epoch": 0.15432333425490738, + "grad_norm": 0.9128118237245192, + "learning_rate": 1.920040741810768e-05, + "loss": 0.746, + "step": 8931 + }, + { + "epoch": 0.1543406137683163, + "grad_norm": 1.1727457659305662, + "learning_rate": 1.920018812030483e-05, + "loss": 0.7015, + "step": 8932 + }, + { + "epoch": 0.1543578932817252, + "grad_norm": 1.1978113840535083, + "learning_rate": 1.9199968793686295e-05, + "loss": 0.7201, + "step": 8933 + }, + { + "epoch": 0.1543751727951341, + "grad_norm": 0.9183017957461818, + "learning_rate": 1.9199749438252756e-05, + "loss": 0.4871, + "step": 8934 + }, + { + "epoch": 0.15439245230854298, + "grad_norm": 1.3532416084573713, + "learning_rate": 1.9199530054004907e-05, + "loss": 0.7004, + "step": 8935 + }, + { + "epoch": 0.1544097318219519, + "grad_norm": 1.1206557769254482, + "learning_rate": 1.9199310640943426e-05, + "loss": 0.8203, + "step": 8936 + }, + { + "epoch": 0.1544270113353608, + "grad_norm": 0.7163534979673057, + "learning_rate": 1.9199091199069005e-05, + "loss": 0.5995, + "step": 8937 + }, + { + "epoch": 0.1544442908487697, + "grad_norm": 1.2647063604093691, + "learning_rate": 1.9198871728382335e-05, + "loss": 0.6854, + "step": 8938 + }, + { + "epoch": 0.1544615703621786, + "grad_norm": 1.0548456154228099, + "learning_rate": 1.91986522288841e-05, + "loss": 0.5923, + "step": 8939 + }, + { + "epoch": 0.1544788498755875, + "grad_norm": 0.7864757667521775, + "learning_rate": 1.9198432700574984e-05, + "loss": 0.7428, + "step": 8940 + }, + { + "epoch": 0.1544961293889964, + "grad_norm": 1.1767276757103702, + "learning_rate": 1.9198213143455676e-05, + "loss": 0.7288, + "step": 8941 + }, + { + "epoch": 0.1545134089024053, + "grad_norm": 1.1201814312035732, + "learning_rate": 1.919799355752687e-05, + "loss": 0.6704, + "step": 8942 + }, + { + "epoch": 0.15453068841581422, + "grad_norm": 1.1291097759778919, + "learning_rate": 1.9197773942789247e-05, + "loss": 0.5475, + "step": 8943 + }, + { + "epoch": 0.15454796792922312, + "grad_norm": 1.1521464393445302, + "learning_rate": 1.9197554299243494e-05, + "loss": 0.5898, + "step": 8944 + }, + { + "epoch": 0.154565247442632, + "grad_norm": 0.4369201483750725, + "learning_rate": 1.9197334626890307e-05, + "loss": 0.8141, + "step": 8945 + }, + { + "epoch": 0.1545825269560409, + "grad_norm": 6.411606992252594, + "learning_rate": 1.9197114925730366e-05, + "loss": 0.7102, + "step": 8946 + }, + { + "epoch": 0.15459980646944982, + "grad_norm": 1.747958008703976, + "learning_rate": 1.9196895195764363e-05, + "loss": 0.8727, + "step": 8947 + }, + { + "epoch": 0.15461708598285873, + "grad_norm": 0.9763927568983367, + "learning_rate": 1.9196675436992985e-05, + "loss": 0.6578, + "step": 8948 + }, + { + "epoch": 0.15463436549626763, + "grad_norm": 0.886900196130787, + "learning_rate": 1.9196455649416923e-05, + "loss": 0.6372, + "step": 8949 + }, + { + "epoch": 0.1546516450096765, + "grad_norm": 1.0069493165953332, + "learning_rate": 1.919623583303686e-05, + "loss": 0.618, + "step": 8950 + }, + { + "epoch": 0.15466892452308542, + "grad_norm": 0.8962109745107409, + "learning_rate": 1.9196015987853488e-05, + "loss": 0.5702, + "step": 8951 + }, + { + "epoch": 0.15468620403649433, + "grad_norm": 1.1609506136637144, + "learning_rate": 1.9195796113867495e-05, + "loss": 0.6782, + "step": 8952 + }, + { + "epoch": 0.15470348354990324, + "grad_norm": 0.8442357281840098, + "learning_rate": 1.919557621107957e-05, + "loss": 0.5209, + "step": 8953 + }, + { + "epoch": 0.15472076306331214, + "grad_norm": 0.8975797540563172, + "learning_rate": 1.9195356279490402e-05, + "loss": 0.7037, + "step": 8954 + }, + { + "epoch": 0.15473804257672105, + "grad_norm": 1.058115495613411, + "learning_rate": 1.9195136319100676e-05, + "loss": 0.577, + "step": 8955 + }, + { + "epoch": 0.15475532209012993, + "grad_norm": 1.1899175883453703, + "learning_rate": 1.9194916329911088e-05, + "loss": 0.6515, + "step": 8956 + }, + { + "epoch": 0.15477260160353884, + "grad_norm": 0.9273901598254765, + "learning_rate": 1.9194696311922322e-05, + "loss": 0.7914, + "step": 8957 + }, + { + "epoch": 0.15478988111694775, + "grad_norm": 1.1430369301725998, + "learning_rate": 1.919447626513507e-05, + "loss": 0.4452, + "step": 8958 + }, + { + "epoch": 0.15480716063035665, + "grad_norm": 1.1161642332871216, + "learning_rate": 1.919425618955002e-05, + "loss": 0.8651, + "step": 8959 + }, + { + "epoch": 0.15482444014376556, + "grad_norm": 0.9548580949769786, + "learning_rate": 1.919403608516786e-05, + "loss": 0.6831, + "step": 8960 + }, + { + "epoch": 0.15484171965717444, + "grad_norm": 0.7522983915536667, + "learning_rate": 1.9193815951989278e-05, + "loss": 0.5609, + "step": 8961 + }, + { + "epoch": 0.15485899917058335, + "grad_norm": 1.0773914763748467, + "learning_rate": 1.9193595790014967e-05, + "loss": 0.5356, + "step": 8962 + }, + { + "epoch": 0.15487627868399226, + "grad_norm": 0.8536863191134788, + "learning_rate": 1.9193375599245615e-05, + "loss": 0.6484, + "step": 8963 + }, + { + "epoch": 0.15489355819740117, + "grad_norm": 0.7565360691278443, + "learning_rate": 1.9193155379681914e-05, + "loss": 0.4747, + "step": 8964 + }, + { + "epoch": 0.15491083771081007, + "grad_norm": 1.4000096207435724, + "learning_rate": 1.919293513132455e-05, + "loss": 0.8381, + "step": 8965 + }, + { + "epoch": 0.15492811722421895, + "grad_norm": 1.0965474645043018, + "learning_rate": 1.9192714854174216e-05, + "loss": 0.7179, + "step": 8966 + }, + { + "epoch": 0.15494539673762786, + "grad_norm": 1.081700050350838, + "learning_rate": 1.9192494548231596e-05, + "loss": 0.8293, + "step": 8967 + }, + { + "epoch": 0.15496267625103677, + "grad_norm": 1.0399302462919582, + "learning_rate": 1.9192274213497387e-05, + "loss": 0.5996, + "step": 8968 + }, + { + "epoch": 0.15497995576444568, + "grad_norm": 0.9726544724010875, + "learning_rate": 1.9192053849972277e-05, + "loss": 0.4538, + "step": 8969 + }, + { + "epoch": 0.15499723527785458, + "grad_norm": 1.0134938855404088, + "learning_rate": 1.9191833457656958e-05, + "loss": 0.4994, + "step": 8970 + }, + { + "epoch": 0.1550145147912635, + "grad_norm": 1.0553693031908868, + "learning_rate": 1.9191613036552118e-05, + "loss": 0.633, + "step": 8971 + }, + { + "epoch": 0.15503179430467237, + "grad_norm": 0.8680888869687368, + "learning_rate": 1.9191392586658443e-05, + "loss": 0.768, + "step": 8972 + }, + { + "epoch": 0.15504907381808128, + "grad_norm": 1.113381419716235, + "learning_rate": 1.919117210797663e-05, + "loss": 0.7461, + "step": 8973 + }, + { + "epoch": 0.1550663533314902, + "grad_norm": 1.1962972178062867, + "learning_rate": 1.9190951600507368e-05, + "loss": 0.6915, + "step": 8974 + }, + { + "epoch": 0.1550836328448991, + "grad_norm": 0.7552765601218907, + "learning_rate": 1.9190731064251346e-05, + "loss": 0.6785, + "step": 8975 + }, + { + "epoch": 0.155100912358308, + "grad_norm": 0.8808440489357614, + "learning_rate": 1.9190510499209258e-05, + "loss": 0.4915, + "step": 8976 + }, + { + "epoch": 0.15511819187171688, + "grad_norm": 0.9250614050174735, + "learning_rate": 1.9190289905381792e-05, + "loss": 0.784, + "step": 8977 + }, + { + "epoch": 0.1551354713851258, + "grad_norm": 1.4211502515230487, + "learning_rate": 1.919006928276964e-05, + "loss": 0.6115, + "step": 8978 + }, + { + "epoch": 0.1551527508985347, + "grad_norm": 0.9387648065644751, + "learning_rate": 1.9189848631373492e-05, + "loss": 0.6673, + "step": 8979 + }, + { + "epoch": 0.1551700304119436, + "grad_norm": 0.7449083458890073, + "learning_rate": 1.918962795119404e-05, + "loss": 0.4631, + "step": 8980 + }, + { + "epoch": 0.1551873099253525, + "grad_norm": 1.2282895050310172, + "learning_rate": 1.9189407242231974e-05, + "loss": 0.8421, + "step": 8981 + }, + { + "epoch": 0.1552045894387614, + "grad_norm": 0.8217466572991331, + "learning_rate": 1.9189186504487986e-05, + "loss": 0.5372, + "step": 8982 + }, + { + "epoch": 0.1552218689521703, + "grad_norm": 1.1835970801147375, + "learning_rate": 1.9188965737962767e-05, + "loss": 0.4069, + "step": 8983 + }, + { + "epoch": 0.1552391484655792, + "grad_norm": 1.1670005549645948, + "learning_rate": 1.918874494265701e-05, + "loss": 0.7051, + "step": 8984 + }, + { + "epoch": 0.15525642797898812, + "grad_norm": 0.8776601476016038, + "learning_rate": 1.9188524118571402e-05, + "loss": 0.6536, + "step": 8985 + }, + { + "epoch": 0.15527370749239702, + "grad_norm": 0.9502602307449084, + "learning_rate": 1.918830326570664e-05, + "loss": 0.5817, + "step": 8986 + }, + { + "epoch": 0.1552909870058059, + "grad_norm": 0.6800427578771927, + "learning_rate": 1.9188082384063418e-05, + "loss": 0.4912, + "step": 8987 + }, + { + "epoch": 0.1553082665192148, + "grad_norm": 1.3036975301704248, + "learning_rate": 1.9187861473642423e-05, + "loss": 0.5024, + "step": 8988 + }, + { + "epoch": 0.15532554603262372, + "grad_norm": 1.0542297298175989, + "learning_rate": 1.9187640534444344e-05, + "loss": 0.5948, + "step": 8989 + }, + { + "epoch": 0.15534282554603263, + "grad_norm": 0.7272910943552757, + "learning_rate": 1.918741956646988e-05, + "loss": 0.5348, + "step": 8990 + }, + { + "epoch": 0.15536010505944153, + "grad_norm": 0.9015064871430986, + "learning_rate": 1.9187198569719716e-05, + "loss": 0.5441, + "step": 8991 + }, + { + "epoch": 0.15537738457285044, + "grad_norm": 1.17168626194824, + "learning_rate": 1.918697754419455e-05, + "loss": 0.5726, + "step": 8992 + }, + { + "epoch": 0.15539466408625932, + "grad_norm": 1.251814550135646, + "learning_rate": 1.918675648989507e-05, + "loss": 0.553, + "step": 8993 + }, + { + "epoch": 0.15541194359966823, + "grad_norm": 1.2100835166951822, + "learning_rate": 1.9186535406821975e-05, + "loss": 0.732, + "step": 8994 + }, + { + "epoch": 0.15542922311307714, + "grad_norm": 1.2061450858407567, + "learning_rate": 1.918631429497595e-05, + "loss": 0.8162, + "step": 8995 + }, + { + "epoch": 0.15544650262648604, + "grad_norm": 1.0857840170411728, + "learning_rate": 1.9186093154357694e-05, + "loss": 0.6152, + "step": 8996 + }, + { + "epoch": 0.15546378213989495, + "grad_norm": 1.6811199006955868, + "learning_rate": 1.918587198496789e-05, + "loss": 0.5645, + "step": 8997 + }, + { + "epoch": 0.15548106165330383, + "grad_norm": 0.8236454679717292, + "learning_rate": 1.918565078680724e-05, + "loss": 0.693, + "step": 8998 + }, + { + "epoch": 0.15549834116671274, + "grad_norm": 0.5303761986001579, + "learning_rate": 1.9185429559876435e-05, + "loss": 0.6314, + "step": 8999 + }, + { + "epoch": 0.15551562068012165, + "grad_norm": 0.700967571796822, + "learning_rate": 1.9185208304176164e-05, + "loss": 0.6503, + "step": 9000 + }, + { + "epoch": 0.15553290019353055, + "grad_norm": 0.7426400103863354, + "learning_rate": 1.9184987019707124e-05, + "loss": 0.5763, + "step": 9001 + }, + { + "epoch": 0.15555017970693946, + "grad_norm": 0.9489848277202902, + "learning_rate": 1.9184765706470005e-05, + "loss": 0.4895, + "step": 9002 + }, + { + "epoch": 0.15556745922034834, + "grad_norm": 1.3018021676703386, + "learning_rate": 1.9184544364465503e-05, + "loss": 0.6577, + "step": 9003 + }, + { + "epoch": 0.15558473873375725, + "grad_norm": 1.1322733656186854, + "learning_rate": 1.9184322993694312e-05, + "loss": 0.7105, + "step": 9004 + }, + { + "epoch": 0.15560201824716616, + "grad_norm": 1.2981123244096822, + "learning_rate": 1.918410159415712e-05, + "loss": 0.7843, + "step": 9005 + }, + { + "epoch": 0.15561929776057506, + "grad_norm": 0.7710404389996831, + "learning_rate": 1.9183880165854628e-05, + "loss": 0.7898, + "step": 9006 + }, + { + "epoch": 0.15563657727398397, + "grad_norm": 1.011898457526224, + "learning_rate": 1.918365870878752e-05, + "loss": 0.7665, + "step": 9007 + }, + { + "epoch": 0.15565385678739288, + "grad_norm": 0.8064397696536464, + "learning_rate": 1.9183437222956498e-05, + "loss": 0.6662, + "step": 9008 + }, + { + "epoch": 0.15567113630080176, + "grad_norm": 1.2041577414834375, + "learning_rate": 1.9183215708362253e-05, + "loss": 0.6129, + "step": 9009 + }, + { + "epoch": 0.15568841581421067, + "grad_norm": 0.9103200388111782, + "learning_rate": 1.9182994165005477e-05, + "loss": 0.5155, + "step": 9010 + }, + { + "epoch": 0.15570569532761958, + "grad_norm": 0.9847956957244888, + "learning_rate": 1.918277259288687e-05, + "loss": 0.602, + "step": 9011 + }, + { + "epoch": 0.15572297484102848, + "grad_norm": 0.9789756080460921, + "learning_rate": 1.9182550992007117e-05, + "loss": 0.7117, + "step": 9012 + }, + { + "epoch": 0.1557402543544374, + "grad_norm": 1.1610257348946091, + "learning_rate": 1.9182329362366916e-05, + "loss": 0.6761, + "step": 9013 + }, + { + "epoch": 0.15575753386784627, + "grad_norm": 0.8107930022790336, + "learning_rate": 1.918210770396696e-05, + "loss": 0.5241, + "step": 9014 + }, + { + "epoch": 0.15577481338125518, + "grad_norm": 0.5614730082384319, + "learning_rate": 1.9181886016807948e-05, + "loss": 0.6338, + "step": 9015 + }, + { + "epoch": 0.15579209289466409, + "grad_norm": 1.1966931207520148, + "learning_rate": 1.9181664300890573e-05, + "loss": 0.6246, + "step": 9016 + }, + { + "epoch": 0.155809372408073, + "grad_norm": 0.8407141867656756, + "learning_rate": 1.9181442556215522e-05, + "loss": 0.628, + "step": 9017 + }, + { + "epoch": 0.1558266519214819, + "grad_norm": 0.4428939317888008, + "learning_rate": 1.91812207827835e-05, + "loss": 0.5901, + "step": 9018 + }, + { + "epoch": 0.15584393143489078, + "grad_norm": 1.0526377004060763, + "learning_rate": 1.9180998980595196e-05, + "loss": 0.6782, + "step": 9019 + }, + { + "epoch": 0.1558612109482997, + "grad_norm": 0.8535799998986359, + "learning_rate": 1.9180777149651306e-05, + "loss": 0.6819, + "step": 9020 + }, + { + "epoch": 0.1558784904617086, + "grad_norm": 1.1979516028204416, + "learning_rate": 1.918055528995252e-05, + "loss": 0.705, + "step": 9021 + }, + { + "epoch": 0.1558957699751175, + "grad_norm": 0.8745741989791124, + "learning_rate": 1.9180333401499543e-05, + "loss": 0.4518, + "step": 9022 + }, + { + "epoch": 0.1559130494885264, + "grad_norm": 0.590948253274818, + "learning_rate": 1.9180111484293063e-05, + "loss": 0.542, + "step": 9023 + }, + { + "epoch": 0.1559303290019353, + "grad_norm": 1.243015618442344, + "learning_rate": 1.917988953833377e-05, + "loss": 0.6636, + "step": 9024 + }, + { + "epoch": 0.1559476085153442, + "grad_norm": 0.825664123856169, + "learning_rate": 1.9179667563622372e-05, + "loss": 0.4898, + "step": 9025 + }, + { + "epoch": 0.1559648880287531, + "grad_norm": 0.8279474029551694, + "learning_rate": 1.9179445560159555e-05, + "loss": 0.5896, + "step": 9026 + }, + { + "epoch": 0.15598216754216201, + "grad_norm": 1.0634440328235097, + "learning_rate": 1.9179223527946018e-05, + "loss": 0.3594, + "step": 9027 + }, + { + "epoch": 0.15599944705557092, + "grad_norm": 1.0323549666720901, + "learning_rate": 1.9179001466982457e-05, + "loss": 0.691, + "step": 9028 + }, + { + "epoch": 0.15601672656897983, + "grad_norm": 1.2181611137688204, + "learning_rate": 1.917877937726956e-05, + "loss": 0.633, + "step": 9029 + }, + { + "epoch": 0.1560340060823887, + "grad_norm": 1.038800390020599, + "learning_rate": 1.9178557258808032e-05, + "loss": 0.5811, + "step": 9030 + }, + { + "epoch": 0.15605128559579762, + "grad_norm": 1.1732508080586412, + "learning_rate": 1.9178335111598568e-05, + "loss": 0.8475, + "step": 9031 + }, + { + "epoch": 0.15606856510920653, + "grad_norm": 0.8113606830242907, + "learning_rate": 1.9178112935641855e-05, + "loss": 0.456, + "step": 9032 + }, + { + "epoch": 0.15608584462261543, + "grad_norm": 0.45001354256843284, + "learning_rate": 1.91778907309386e-05, + "loss": 0.5961, + "step": 9033 + }, + { + "epoch": 0.15610312413602434, + "grad_norm": 0.8107454287702515, + "learning_rate": 1.917766849748949e-05, + "loss": 0.6177, + "step": 9034 + }, + { + "epoch": 0.15612040364943322, + "grad_norm": 0.5904187533907638, + "learning_rate": 1.9177446235295225e-05, + "loss": 0.6468, + "step": 9035 + }, + { + "epoch": 0.15613768316284213, + "grad_norm": 0.6873770116091071, + "learning_rate": 1.9177223944356504e-05, + "loss": 0.5699, + "step": 9036 + }, + { + "epoch": 0.15615496267625104, + "grad_norm": 0.6438030658681522, + "learning_rate": 1.9177001624674017e-05, + "loss": 0.4283, + "step": 9037 + }, + { + "epoch": 0.15617224218965994, + "grad_norm": 0.7940543388939173, + "learning_rate": 1.9176779276248464e-05, + "loss": 0.6426, + "step": 9038 + }, + { + "epoch": 0.15618952170306885, + "grad_norm": 1.0683801050176882, + "learning_rate": 1.9176556899080542e-05, + "loss": 0.6908, + "step": 9039 + }, + { + "epoch": 0.15620680121647773, + "grad_norm": 0.979905461986645, + "learning_rate": 1.917633449317095e-05, + "loss": 0.5166, + "step": 9040 + }, + { + "epoch": 0.15622408072988664, + "grad_norm": 1.2823050910558382, + "learning_rate": 1.9176112058520372e-05, + "loss": 0.6162, + "step": 9041 + }, + { + "epoch": 0.15624136024329555, + "grad_norm": 1.5059050257058544, + "learning_rate": 1.917588959512952e-05, + "loss": 0.7479, + "step": 9042 + }, + { + "epoch": 0.15625863975670445, + "grad_norm": 1.285436307092386, + "learning_rate": 1.9175667102999084e-05, + "loss": 0.6283, + "step": 9043 + }, + { + "epoch": 0.15627591927011336, + "grad_norm": 0.937734493639733, + "learning_rate": 1.9175444582129762e-05, + "loss": 0.8049, + "step": 9044 + }, + { + "epoch": 0.15629319878352227, + "grad_norm": 1.2737125334841068, + "learning_rate": 1.9175222032522246e-05, + "loss": 0.7564, + "step": 9045 + }, + { + "epoch": 0.15631047829693115, + "grad_norm": 1.0419157949999764, + "learning_rate": 1.9174999454177243e-05, + "loss": 0.6787, + "step": 9046 + }, + { + "epoch": 0.15632775781034006, + "grad_norm": 1.1245923956315067, + "learning_rate": 1.9174776847095437e-05, + "loss": 0.6068, + "step": 9047 + }, + { + "epoch": 0.15634503732374896, + "grad_norm": 0.9229880801797506, + "learning_rate": 1.9174554211277537e-05, + "loss": 0.425, + "step": 9048 + }, + { + "epoch": 0.15636231683715787, + "grad_norm": 0.8748595146841996, + "learning_rate": 1.9174331546724238e-05, + "loss": 0.6208, + "step": 9049 + }, + { + "epoch": 0.15637959635056678, + "grad_norm": 0.9937317363831039, + "learning_rate": 1.9174108853436234e-05, + "loss": 0.3561, + "step": 9050 + }, + { + "epoch": 0.15639687586397566, + "grad_norm": 0.8707532125375398, + "learning_rate": 1.9173886131414222e-05, + "loss": 0.5465, + "step": 9051 + }, + { + "epoch": 0.15641415537738457, + "grad_norm": 1.280113777792332, + "learning_rate": 1.9173663380658905e-05, + "loss": 0.709, + "step": 9052 + }, + { + "epoch": 0.15643143489079347, + "grad_norm": 0.7777079254219784, + "learning_rate": 1.9173440601170973e-05, + "loss": 0.8025, + "step": 9053 + }, + { + "epoch": 0.15644871440420238, + "grad_norm": 1.3172574957078098, + "learning_rate": 1.9173217792951128e-05, + "loss": 0.5538, + "step": 9054 + }, + { + "epoch": 0.1564659939176113, + "grad_norm": 0.900304362700449, + "learning_rate": 1.917299495600007e-05, + "loss": 0.5146, + "step": 9055 + }, + { + "epoch": 0.15648327343102017, + "grad_norm": 0.7907510302580175, + "learning_rate": 1.9172772090318497e-05, + "loss": 0.5777, + "step": 9056 + }, + { + "epoch": 0.15650055294442908, + "grad_norm": 1.272810653150416, + "learning_rate": 1.9172549195907103e-05, + "loss": 0.6006, + "step": 9057 + }, + { + "epoch": 0.15651783245783799, + "grad_norm": 0.8998845352060809, + "learning_rate": 1.9172326272766585e-05, + "loss": 0.7012, + "step": 9058 + }, + { + "epoch": 0.1565351119712469, + "grad_norm": 1.7276323742630553, + "learning_rate": 1.9172103320897647e-05, + "loss": 0.9362, + "step": 9059 + }, + { + "epoch": 0.1565523914846558, + "grad_norm": 1.0880614710112813, + "learning_rate": 1.9171880340300983e-05, + "loss": 0.5123, + "step": 9060 + }, + { + "epoch": 0.1565696709980647, + "grad_norm": 1.2588392435541114, + "learning_rate": 1.917165733097729e-05, + "loss": 0.6067, + "step": 9061 + }, + { + "epoch": 0.1565869505114736, + "grad_norm": 1.0702081907922898, + "learning_rate": 1.917143429292727e-05, + "loss": 0.6769, + "step": 9062 + }, + { + "epoch": 0.1566042300248825, + "grad_norm": 1.3172075990506216, + "learning_rate": 1.9171211226151624e-05, + "loss": 0.5032, + "step": 9063 + }, + { + "epoch": 0.1566215095382914, + "grad_norm": 0.7099034916692026, + "learning_rate": 1.9170988130651048e-05, + "loss": 0.5036, + "step": 9064 + }, + { + "epoch": 0.1566387890517003, + "grad_norm": 1.1052123933220763, + "learning_rate": 1.9170765006426236e-05, + "loss": 0.7049, + "step": 9065 + }, + { + "epoch": 0.15665606856510922, + "grad_norm": 1.02635532589175, + "learning_rate": 1.9170541853477897e-05, + "loss": 0.8884, + "step": 9066 + }, + { + "epoch": 0.1566733480785181, + "grad_norm": 0.7460658230034499, + "learning_rate": 1.917031867180672e-05, + "loss": 0.7207, + "step": 9067 + }, + { + "epoch": 0.156690627591927, + "grad_norm": 1.2124611500329372, + "learning_rate": 1.917009546141341e-05, + "loss": 0.6676, + "step": 9068 + }, + { + "epoch": 0.15670790710533591, + "grad_norm": 0.6920351018767744, + "learning_rate": 1.916987222229866e-05, + "loss": 0.492, + "step": 9069 + }, + { + "epoch": 0.15672518661874482, + "grad_norm": 1.4206316417737257, + "learning_rate": 1.9169648954463174e-05, + "loss": 0.5696, + "step": 9070 + }, + { + "epoch": 0.15674246613215373, + "grad_norm": 1.1051903511572783, + "learning_rate": 1.9169425657907656e-05, + "loss": 0.7382, + "step": 9071 + }, + { + "epoch": 0.1567597456455626, + "grad_norm": 1.2526453037327185, + "learning_rate": 1.9169202332632798e-05, + "loss": 0.6027, + "step": 9072 + }, + { + "epoch": 0.15677702515897152, + "grad_norm": 1.118357781407371, + "learning_rate": 1.9168978978639298e-05, + "loss": 0.6697, + "step": 9073 + }, + { + "epoch": 0.15679430467238042, + "grad_norm": 0.8353501620197568, + "learning_rate": 1.916875559592786e-05, + "loss": 0.4866, + "step": 9074 + }, + { + "epoch": 0.15681158418578933, + "grad_norm": 1.2523040564885366, + "learning_rate": 1.9168532184499184e-05, + "loss": 0.7178, + "step": 9075 + }, + { + "epoch": 0.15682886369919824, + "grad_norm": 1.0819054990493304, + "learning_rate": 1.9168308744353966e-05, + "loss": 0.5977, + "step": 9076 + }, + { + "epoch": 0.15684614321260712, + "grad_norm": 0.8937419073544279, + "learning_rate": 1.9168085275492912e-05, + "loss": 0.4987, + "step": 9077 + }, + { + "epoch": 0.15686342272601603, + "grad_norm": 0.8115044353915467, + "learning_rate": 1.9167861777916715e-05, + "loss": 0.7057, + "step": 9078 + }, + { + "epoch": 0.15688070223942494, + "grad_norm": 0.3978892744398999, + "learning_rate": 1.9167638251626082e-05, + "loss": 0.6024, + "step": 9079 + }, + { + "epoch": 0.15689798175283384, + "grad_norm": 1.5132244589356363, + "learning_rate": 1.9167414696621706e-05, + "loss": 0.731, + "step": 9080 + }, + { + "epoch": 0.15691526126624275, + "grad_norm": 1.0429631191467867, + "learning_rate": 1.916719111290429e-05, + "loss": 0.5518, + "step": 9081 + }, + { + "epoch": 0.15693254077965166, + "grad_norm": 0.5280787568012242, + "learning_rate": 1.9166967500474537e-05, + "loss": 0.6704, + "step": 9082 + }, + { + "epoch": 0.15694982029306054, + "grad_norm": 0.922945562985045, + "learning_rate": 1.9166743859333145e-05, + "loss": 0.6431, + "step": 9083 + }, + { + "epoch": 0.15696709980646945, + "grad_norm": 1.5369106086118336, + "learning_rate": 1.9166520189480815e-05, + "loss": 0.5602, + "step": 9084 + }, + { + "epoch": 0.15698437931987835, + "grad_norm": 1.053886618040637, + "learning_rate": 1.9166296490918245e-05, + "loss": 0.7923, + "step": 9085 + }, + { + "epoch": 0.15700165883328726, + "grad_norm": 0.5024659028229521, + "learning_rate": 1.916607276364614e-05, + "loss": 0.6372, + "step": 9086 + }, + { + "epoch": 0.15701893834669617, + "grad_norm": 1.1441317600476777, + "learning_rate": 1.9165849007665197e-05, + "loss": 0.6895, + "step": 9087 + }, + { + "epoch": 0.15703621786010505, + "grad_norm": 1.1074153219430871, + "learning_rate": 1.9165625222976118e-05, + "loss": 0.5378, + "step": 9088 + }, + { + "epoch": 0.15705349737351396, + "grad_norm": 1.9602171033900813, + "learning_rate": 1.9165401409579604e-05, + "loss": 0.9138, + "step": 9089 + }, + { + "epoch": 0.15707077688692286, + "grad_norm": 0.5837873045418266, + "learning_rate": 1.9165177567476358e-05, + "loss": 0.6054, + "step": 9090 + }, + { + "epoch": 0.15708805640033177, + "grad_norm": 0.6991169182919826, + "learning_rate": 1.9164953696667077e-05, + "loss": 0.5674, + "step": 9091 + }, + { + "epoch": 0.15710533591374068, + "grad_norm": 1.238748086538391, + "learning_rate": 1.9164729797152467e-05, + "loss": 0.6495, + "step": 9092 + }, + { + "epoch": 0.15712261542714956, + "grad_norm": 1.3885792197538134, + "learning_rate": 1.9164505868933228e-05, + "loss": 0.6429, + "step": 9093 + }, + { + "epoch": 0.15713989494055847, + "grad_norm": 0.6031908019746413, + "learning_rate": 1.9164281912010058e-05, + "loss": 0.5518, + "step": 9094 + }, + { + "epoch": 0.15715717445396737, + "grad_norm": 0.8802751597209733, + "learning_rate": 1.916405792638366e-05, + "loss": 0.7842, + "step": 9095 + }, + { + "epoch": 0.15717445396737628, + "grad_norm": 0.5916424399921683, + "learning_rate": 1.9163833912054738e-05, + "loss": 0.341, + "step": 9096 + }, + { + "epoch": 0.1571917334807852, + "grad_norm": 0.9264326547906355, + "learning_rate": 1.9163609869023988e-05, + "loss": 0.3933, + "step": 9097 + }, + { + "epoch": 0.1572090129941941, + "grad_norm": 1.3481161816705576, + "learning_rate": 1.916338579729212e-05, + "loss": 0.6954, + "step": 9098 + }, + { + "epoch": 0.15722629250760298, + "grad_norm": 1.0843207694249968, + "learning_rate": 1.9163161696859826e-05, + "loss": 0.5761, + "step": 9099 + }, + { + "epoch": 0.15724357202101188, + "grad_norm": 1.2093503156971455, + "learning_rate": 1.9162937567727818e-05, + "loss": 0.6094, + "step": 9100 + }, + { + "epoch": 0.1572608515344208, + "grad_norm": 1.0153844570957962, + "learning_rate": 1.9162713409896788e-05, + "loss": 0.6336, + "step": 9101 + }, + { + "epoch": 0.1572781310478297, + "grad_norm": 0.7793733792829184, + "learning_rate": 1.9162489223367448e-05, + "loss": 0.7141, + "step": 9102 + }, + { + "epoch": 0.1572954105612386, + "grad_norm": 1.3558074814114855, + "learning_rate": 1.9162265008140494e-05, + "loss": 0.6296, + "step": 9103 + }, + { + "epoch": 0.1573126900746475, + "grad_norm": 0.7994812373601514, + "learning_rate": 1.9162040764216632e-05, + "loss": 0.6347, + "step": 9104 + }, + { + "epoch": 0.1573299695880564, + "grad_norm": 1.299791590704345, + "learning_rate": 1.9161816491596558e-05, + "loss": 0.6094, + "step": 9105 + }, + { + "epoch": 0.1573472491014653, + "grad_norm": 1.0349708027385924, + "learning_rate": 1.9161592190280982e-05, + "loss": 0.617, + "step": 9106 + }, + { + "epoch": 0.1573645286148742, + "grad_norm": 1.1187344314713052, + "learning_rate": 1.91613678602706e-05, + "loss": 0.7233, + "step": 9107 + }, + { + "epoch": 0.15738180812828312, + "grad_norm": 1.025303638711317, + "learning_rate": 1.916114350156612e-05, + "loss": 0.675, + "step": 9108 + }, + { + "epoch": 0.157399087641692, + "grad_norm": 1.0473446613427442, + "learning_rate": 1.916091911416824e-05, + "loss": 0.5112, + "step": 9109 + }, + { + "epoch": 0.1574163671551009, + "grad_norm": 0.9239488510860587, + "learning_rate": 1.9160694698077668e-05, + "loss": 0.7726, + "step": 9110 + }, + { + "epoch": 0.1574336466685098, + "grad_norm": 1.0651896553912958, + "learning_rate": 1.91604702532951e-05, + "loss": 0.5392, + "step": 9111 + }, + { + "epoch": 0.15745092618191872, + "grad_norm": 1.0651454544443169, + "learning_rate": 1.916024577982125e-05, + "loss": 0.7085, + "step": 9112 + }, + { + "epoch": 0.15746820569532763, + "grad_norm": 0.8798353869495752, + "learning_rate": 1.9160021277656807e-05, + "loss": 0.4623, + "step": 9113 + }, + { + "epoch": 0.1574854852087365, + "grad_norm": 1.623460776816398, + "learning_rate": 1.9159796746802483e-05, + "loss": 0.5254, + "step": 9114 + }, + { + "epoch": 0.15750276472214542, + "grad_norm": 0.8902190097087399, + "learning_rate": 1.915957218725898e-05, + "loss": 0.6758, + "step": 9115 + }, + { + "epoch": 0.15752004423555432, + "grad_norm": 0.8651217227416476, + "learning_rate": 1.9159347599027003e-05, + "loss": 0.6129, + "step": 9116 + }, + { + "epoch": 0.15753732374896323, + "grad_norm": 1.2686657234390444, + "learning_rate": 1.9159122982107253e-05, + "loss": 0.6856, + "step": 9117 + }, + { + "epoch": 0.15755460326237214, + "grad_norm": 0.7425150470696265, + "learning_rate": 1.9158898336500428e-05, + "loss": 0.6108, + "step": 9118 + }, + { + "epoch": 0.15757188277578105, + "grad_norm": 1.533217477960349, + "learning_rate": 1.9158673662207244e-05, + "loss": 0.8522, + "step": 9119 + }, + { + "epoch": 0.15758916228918993, + "grad_norm": 1.037113450753751, + "learning_rate": 1.9158448959228394e-05, + "loss": 0.5475, + "step": 9120 + }, + { + "epoch": 0.15760644180259883, + "grad_norm": 1.6319428219996672, + "learning_rate": 1.9158224227564588e-05, + "loss": 0.7467, + "step": 9121 + }, + { + "epoch": 0.15762372131600774, + "grad_norm": 1.2855107448257723, + "learning_rate": 1.9157999467216525e-05, + "loss": 0.6658, + "step": 9122 + }, + { + "epoch": 0.15764100082941665, + "grad_norm": 1.1268480334633786, + "learning_rate": 1.9157774678184915e-05, + "loss": 0.7508, + "step": 9123 + }, + { + "epoch": 0.15765828034282556, + "grad_norm": 1.5241827388075988, + "learning_rate": 1.915754986047046e-05, + "loss": 0.6838, + "step": 9124 + }, + { + "epoch": 0.15767555985623444, + "grad_norm": 2.259288049274935, + "learning_rate": 1.9157325014073858e-05, + "loss": 0.7745, + "step": 9125 + }, + { + "epoch": 0.15769283936964335, + "grad_norm": 1.4176341375741985, + "learning_rate": 1.915710013899582e-05, + "loss": 0.8246, + "step": 9126 + }, + { + "epoch": 0.15771011888305225, + "grad_norm": 0.6438530396221545, + "learning_rate": 1.9156875235237053e-05, + "loss": 0.6428, + "step": 9127 + }, + { + "epoch": 0.15772739839646116, + "grad_norm": 1.2172465316621033, + "learning_rate": 1.915665030279825e-05, + "loss": 0.5774, + "step": 9128 + }, + { + "epoch": 0.15774467790987007, + "grad_norm": 0.8639321885674722, + "learning_rate": 1.9156425341680125e-05, + "loss": 0.6847, + "step": 9129 + }, + { + "epoch": 0.15776195742327895, + "grad_norm": 1.0437095071950562, + "learning_rate": 1.915620035188338e-05, + "loss": 0.6293, + "step": 9130 + }, + { + "epoch": 0.15777923693668786, + "grad_norm": 1.1492577515256703, + "learning_rate": 1.915597533340872e-05, + "loss": 0.6291, + "step": 9131 + }, + { + "epoch": 0.15779651645009676, + "grad_norm": 1.1308905079931821, + "learning_rate": 1.9155750286256848e-05, + "loss": 0.5378, + "step": 9132 + }, + { + "epoch": 0.15781379596350567, + "grad_norm": 0.7078098451429313, + "learning_rate": 1.915552521042847e-05, + "loss": 0.5164, + "step": 9133 + }, + { + "epoch": 0.15783107547691458, + "grad_norm": 1.2173186696476475, + "learning_rate": 1.91553001059243e-05, + "loss": 0.5903, + "step": 9134 + }, + { + "epoch": 0.1578483549903235, + "grad_norm": 1.1303482806660652, + "learning_rate": 1.915507497274502e-05, + "loss": 0.6939, + "step": 9135 + }, + { + "epoch": 0.15786563450373237, + "grad_norm": 1.0549490075453865, + "learning_rate": 1.915484981089136e-05, + "loss": 0.6331, + "step": 9136 + }, + { + "epoch": 0.15788291401714127, + "grad_norm": 0.8492292544361469, + "learning_rate": 1.915462462036401e-05, + "loss": 0.5675, + "step": 9137 + }, + { + "epoch": 0.15790019353055018, + "grad_norm": 1.1167703825459223, + "learning_rate": 1.9154399401163682e-05, + "loss": 0.5716, + "step": 9138 + }, + { + "epoch": 0.1579174730439591, + "grad_norm": 1.0645603324820876, + "learning_rate": 1.915417415329108e-05, + "loss": 0.6922, + "step": 9139 + }, + { + "epoch": 0.157934752557368, + "grad_norm": 0.8122535974328589, + "learning_rate": 1.9153948876746905e-05, + "loss": 0.647, + "step": 9140 + }, + { + "epoch": 0.15795203207077688, + "grad_norm": 1.159157652586335, + "learning_rate": 1.9153723571531868e-05, + "loss": 0.7111, + "step": 9141 + }, + { + "epoch": 0.15796931158418578, + "grad_norm": 1.0753763272875558, + "learning_rate": 1.9153498237646674e-05, + "loss": 0.7322, + "step": 9142 + }, + { + "epoch": 0.1579865910975947, + "grad_norm": 0.8604261254388221, + "learning_rate": 1.9153272875092027e-05, + "loss": 0.5185, + "step": 9143 + }, + { + "epoch": 0.1580038706110036, + "grad_norm": 0.9635623424913442, + "learning_rate": 1.9153047483868638e-05, + "loss": 0.7989, + "step": 9144 + }, + { + "epoch": 0.1580211501244125, + "grad_norm": 0.8655508206498461, + "learning_rate": 1.9152822063977202e-05, + "loss": 0.556, + "step": 9145 + }, + { + "epoch": 0.1580384296378214, + "grad_norm": 0.7394879121877719, + "learning_rate": 1.9152596615418435e-05, + "loss": 0.6385, + "step": 9146 + }, + { + "epoch": 0.1580557091512303, + "grad_norm": 0.6529335994027505, + "learning_rate": 1.9152371138193043e-05, + "loss": 0.3883, + "step": 9147 + }, + { + "epoch": 0.1580729886646392, + "grad_norm": 0.5088794928460401, + "learning_rate": 1.9152145632301722e-05, + "loss": 0.7151, + "step": 9148 + }, + { + "epoch": 0.1580902681780481, + "grad_norm": 0.6727764191623826, + "learning_rate": 1.915192009774519e-05, + "loss": 0.4883, + "step": 9149 + }, + { + "epoch": 0.15810754769145702, + "grad_norm": 1.0089581016865286, + "learning_rate": 1.9151694534524148e-05, + "loss": 0.7176, + "step": 9150 + }, + { + "epoch": 0.1581248272048659, + "grad_norm": 0.8052224501018783, + "learning_rate": 1.91514689426393e-05, + "loss": 0.713, + "step": 9151 + }, + { + "epoch": 0.1581421067182748, + "grad_norm": 0.829062728636526, + "learning_rate": 1.915124332209136e-05, + "loss": 0.6882, + "step": 9152 + }, + { + "epoch": 0.1581593862316837, + "grad_norm": 0.8251321568260941, + "learning_rate": 1.9151017672881032e-05, + "loss": 0.5608, + "step": 9153 + }, + { + "epoch": 0.15817666574509262, + "grad_norm": 1.4311092512539243, + "learning_rate": 1.915079199500902e-05, + "loss": 0.583, + "step": 9154 + }, + { + "epoch": 0.15819394525850153, + "grad_norm": 0.8785529193609884, + "learning_rate": 1.915056628847603e-05, + "loss": 0.7785, + "step": 9155 + }, + { + "epoch": 0.15821122477191044, + "grad_norm": 1.2766745597158935, + "learning_rate": 1.915034055328277e-05, + "loss": 0.8244, + "step": 9156 + }, + { + "epoch": 0.15822850428531932, + "grad_norm": 0.9424071358128758, + "learning_rate": 1.915011478942995e-05, + "loss": 0.5329, + "step": 9157 + }, + { + "epoch": 0.15824578379872822, + "grad_norm": 1.115836396524397, + "learning_rate": 1.9149888996918275e-05, + "loss": 0.6975, + "step": 9158 + }, + { + "epoch": 0.15826306331213713, + "grad_norm": 0.5100409955719105, + "learning_rate": 1.914966317574845e-05, + "loss": 0.7599, + "step": 9159 + }, + { + "epoch": 0.15828034282554604, + "grad_norm": 1.2149058617094277, + "learning_rate": 1.9149437325921186e-05, + "loss": 0.5689, + "step": 9160 + }, + { + "epoch": 0.15829762233895495, + "grad_norm": 0.9884158827680323, + "learning_rate": 1.9149211447437192e-05, + "loss": 0.6616, + "step": 9161 + }, + { + "epoch": 0.15831490185236383, + "grad_norm": 1.7319566658222294, + "learning_rate": 1.914898554029717e-05, + "loss": 0.7344, + "step": 9162 + }, + { + "epoch": 0.15833218136577273, + "grad_norm": 0.7876541520274414, + "learning_rate": 1.9148759604501827e-05, + "loss": 0.5369, + "step": 9163 + }, + { + "epoch": 0.15834946087918164, + "grad_norm": 0.8511307587189327, + "learning_rate": 1.914853364005188e-05, + "loss": 0.5598, + "step": 9164 + }, + { + "epoch": 0.15836674039259055, + "grad_norm": 0.43768435616867085, + "learning_rate": 1.9148307646948024e-05, + "loss": 0.6161, + "step": 9165 + }, + { + "epoch": 0.15838401990599946, + "grad_norm": 1.5339716944965576, + "learning_rate": 1.9148081625190978e-05, + "loss": 0.6832, + "step": 9166 + }, + { + "epoch": 0.15840129941940834, + "grad_norm": 0.7094389047331987, + "learning_rate": 1.914785557478144e-05, + "loss": 0.6402, + "step": 9167 + }, + { + "epoch": 0.15841857893281724, + "grad_norm": 1.2024466500478757, + "learning_rate": 1.914762949572013e-05, + "loss": 0.6745, + "step": 9168 + }, + { + "epoch": 0.15843585844622615, + "grad_norm": 1.3764507594028732, + "learning_rate": 1.9147403388007745e-05, + "loss": 0.8146, + "step": 9169 + }, + { + "epoch": 0.15845313795963506, + "grad_norm": 0.9402134670275806, + "learning_rate": 1.9147177251644997e-05, + "loss": 0.5181, + "step": 9170 + }, + { + "epoch": 0.15847041747304397, + "grad_norm": 1.153880926362882, + "learning_rate": 1.9146951086632596e-05, + "loss": 0.6236, + "step": 9171 + }, + { + "epoch": 0.15848769698645288, + "grad_norm": 1.0582198852428766, + "learning_rate": 1.9146724892971247e-05, + "loss": 0.6673, + "step": 9172 + }, + { + "epoch": 0.15850497649986175, + "grad_norm": 0.7453647023639633, + "learning_rate": 1.914649867066166e-05, + "loss": 0.7816, + "step": 9173 + }, + { + "epoch": 0.15852225601327066, + "grad_norm": 1.0969841877920923, + "learning_rate": 1.9146272419704548e-05, + "loss": 0.5952, + "step": 9174 + }, + { + "epoch": 0.15853953552667957, + "grad_norm": 0.9157542551326457, + "learning_rate": 1.9146046140100613e-05, + "loss": 0.8093, + "step": 9175 + }, + { + "epoch": 0.15855681504008848, + "grad_norm": 0.4357434245117882, + "learning_rate": 1.9145819831850567e-05, + "loss": 0.6504, + "step": 9176 + }, + { + "epoch": 0.15857409455349739, + "grad_norm": 0.8442664796985687, + "learning_rate": 1.914559349495512e-05, + "loss": 0.5978, + "step": 9177 + }, + { + "epoch": 0.15859137406690627, + "grad_norm": 0.9767919535817524, + "learning_rate": 1.9145367129414977e-05, + "loss": 0.4146, + "step": 9178 + }, + { + "epoch": 0.15860865358031517, + "grad_norm": 1.3705344350959976, + "learning_rate": 1.9145140735230853e-05, + "loss": 0.6186, + "step": 9179 + }, + { + "epoch": 0.15862593309372408, + "grad_norm": 1.0815789349288305, + "learning_rate": 1.914491431240345e-05, + "loss": 0.7754, + "step": 9180 + }, + { + "epoch": 0.158643212607133, + "grad_norm": 1.3050358895373164, + "learning_rate": 1.9144687860933477e-05, + "loss": 0.6579, + "step": 9181 + }, + { + "epoch": 0.1586604921205419, + "grad_norm": 0.8563872024592883, + "learning_rate": 1.9144461380821652e-05, + "loss": 0.8232, + "step": 9182 + }, + { + "epoch": 0.15867777163395078, + "grad_norm": 0.6746961114381272, + "learning_rate": 1.9144234872068678e-05, + "loss": 0.4514, + "step": 9183 + }, + { + "epoch": 0.15869505114735968, + "grad_norm": 0.995710787581776, + "learning_rate": 1.9144008334675264e-05, + "loss": 0.4914, + "step": 9184 + }, + { + "epoch": 0.1587123306607686, + "grad_norm": 1.1896373354638548, + "learning_rate": 1.914378176864212e-05, + "loss": 0.6668, + "step": 9185 + }, + { + "epoch": 0.1587296101741775, + "grad_norm": 0.5343028885542827, + "learning_rate": 1.9143555173969957e-05, + "loss": 0.8247, + "step": 9186 + }, + { + "epoch": 0.1587468896875864, + "grad_norm": 0.9290442695490656, + "learning_rate": 1.9143328550659487e-05, + "loss": 0.6005, + "step": 9187 + }, + { + "epoch": 0.1587641692009953, + "grad_norm": 1.1391759578118594, + "learning_rate": 1.9143101898711416e-05, + "loss": 0.6928, + "step": 9188 + }, + { + "epoch": 0.1587814487144042, + "grad_norm": 1.003033236313211, + "learning_rate": 1.914287521812645e-05, + "loss": 0.7943, + "step": 9189 + }, + { + "epoch": 0.1587987282278131, + "grad_norm": 0.9689592507094732, + "learning_rate": 1.9142648508905313e-05, + "loss": 0.5782, + "step": 9190 + }, + { + "epoch": 0.158816007741222, + "grad_norm": 0.7658244147197036, + "learning_rate": 1.91424217710487e-05, + "loss": 0.7765, + "step": 9191 + }, + { + "epoch": 0.15883328725463092, + "grad_norm": 0.8971259280668964, + "learning_rate": 1.914219500455733e-05, + "loss": 0.5146, + "step": 9192 + }, + { + "epoch": 0.15885056676803982, + "grad_norm": 1.0041957481474681, + "learning_rate": 1.9141968209431908e-05, + "loss": 0.5175, + "step": 9193 + }, + { + "epoch": 0.1588678462814487, + "grad_norm": 1.0694714912012937, + "learning_rate": 1.9141741385673148e-05, + "loss": 0.9168, + "step": 9194 + }, + { + "epoch": 0.1588851257948576, + "grad_norm": 0.8000167672388446, + "learning_rate": 1.914151453328176e-05, + "loss": 0.6214, + "step": 9195 + }, + { + "epoch": 0.15890240530826652, + "grad_norm": 0.9460462172922005, + "learning_rate": 1.9141287652258453e-05, + "loss": 0.7767, + "step": 9196 + }, + { + "epoch": 0.15891968482167543, + "grad_norm": 0.7259942891500287, + "learning_rate": 1.914106074260394e-05, + "loss": 0.6833, + "step": 9197 + }, + { + "epoch": 0.15893696433508434, + "grad_norm": 0.8857939112893495, + "learning_rate": 1.9140833804318924e-05, + "loss": 0.5067, + "step": 9198 + }, + { + "epoch": 0.15895424384849322, + "grad_norm": 0.8634583614759405, + "learning_rate": 1.9140606837404128e-05, + "loss": 0.4804, + "step": 9199 + }, + { + "epoch": 0.15897152336190212, + "grad_norm": 1.1631126734044335, + "learning_rate": 1.9140379841860256e-05, + "loss": 0.7711, + "step": 9200 + }, + { + "epoch": 0.15898880287531103, + "grad_norm": 0.7455872998246968, + "learning_rate": 1.9140152817688018e-05, + "loss": 0.3862, + "step": 9201 + }, + { + "epoch": 0.15900608238871994, + "grad_norm": 1.001368523253349, + "learning_rate": 1.9139925764888127e-05, + "loss": 0.6078, + "step": 9202 + }, + { + "epoch": 0.15902336190212885, + "grad_norm": 1.3223024678587822, + "learning_rate": 1.9139698683461297e-05, + "loss": 0.8623, + "step": 9203 + }, + { + "epoch": 0.15904064141553773, + "grad_norm": 0.6536577250471536, + "learning_rate": 1.913947157340823e-05, + "loss": 0.317, + "step": 9204 + }, + { + "epoch": 0.15905792092894663, + "grad_norm": 1.0093385081085664, + "learning_rate": 1.913924443472965e-05, + "loss": 0.5948, + "step": 9205 + }, + { + "epoch": 0.15907520044235554, + "grad_norm": 0.9798717395857203, + "learning_rate": 1.9139017267426256e-05, + "loss": 0.6162, + "step": 9206 + }, + { + "epoch": 0.15909247995576445, + "grad_norm": 0.9490727689345342, + "learning_rate": 1.913879007149877e-05, + "loss": 0.5686, + "step": 9207 + }, + { + "epoch": 0.15910975946917336, + "grad_norm": 0.998376834093781, + "learning_rate": 1.9138562846947898e-05, + "loss": 0.5077, + "step": 9208 + }, + { + "epoch": 0.15912703898258226, + "grad_norm": 1.1547089616343045, + "learning_rate": 1.913833559377435e-05, + "loss": 0.6023, + "step": 9209 + }, + { + "epoch": 0.15914431849599114, + "grad_norm": 1.0515639952407838, + "learning_rate": 1.9138108311978844e-05, + "loss": 0.7818, + "step": 9210 + }, + { + "epoch": 0.15916159800940005, + "grad_norm": 1.239098874491792, + "learning_rate": 1.9137881001562088e-05, + "loss": 0.7743, + "step": 9211 + }, + { + "epoch": 0.15917887752280896, + "grad_norm": 1.2952103560062551, + "learning_rate": 1.913765366252479e-05, + "loss": 0.4615, + "step": 9212 + }, + { + "epoch": 0.15919615703621787, + "grad_norm": 1.2941229022503082, + "learning_rate": 1.9137426294867668e-05, + "loss": 0.8064, + "step": 9213 + }, + { + "epoch": 0.15921343654962677, + "grad_norm": 1.0820517288253315, + "learning_rate": 1.9137198898591434e-05, + "loss": 0.7451, + "step": 9214 + }, + { + "epoch": 0.15923071606303565, + "grad_norm": 0.8088123511155549, + "learning_rate": 1.9136971473696796e-05, + "loss": 0.5749, + "step": 9215 + }, + { + "epoch": 0.15924799557644456, + "grad_norm": 0.8318623808528983, + "learning_rate": 1.9136744020184473e-05, + "loss": 0.5101, + "step": 9216 + }, + { + "epoch": 0.15926527508985347, + "grad_norm": 1.1118909625920537, + "learning_rate": 1.913651653805517e-05, + "loss": 0.7506, + "step": 9217 + }, + { + "epoch": 0.15928255460326238, + "grad_norm": 0.9997910698885586, + "learning_rate": 1.9136289027309604e-05, + "loss": 0.5425, + "step": 9218 + }, + { + "epoch": 0.15929983411667129, + "grad_norm": 0.8520933918346616, + "learning_rate": 1.913606148794849e-05, + "loss": 0.6667, + "step": 9219 + }, + { + "epoch": 0.15931711363008016, + "grad_norm": 0.9856832939451469, + "learning_rate": 1.9135833919972534e-05, + "loss": 0.4983, + "step": 9220 + }, + { + "epoch": 0.15933439314348907, + "grad_norm": 0.8958386348963806, + "learning_rate": 1.913560632338245e-05, + "loss": 0.6881, + "step": 9221 + }, + { + "epoch": 0.15935167265689798, + "grad_norm": 1.0797936139883129, + "learning_rate": 1.9135378698178955e-05, + "loss": 0.7913, + "step": 9222 + }, + { + "epoch": 0.1593689521703069, + "grad_norm": 1.0509582650361047, + "learning_rate": 1.913515104436276e-05, + "loss": 0.868, + "step": 9223 + }, + { + "epoch": 0.1593862316837158, + "grad_norm": 0.9667353355377901, + "learning_rate": 1.9134923361934575e-05, + "loss": 0.6886, + "step": 9224 + }, + { + "epoch": 0.15940351119712468, + "grad_norm": 1.3807538693181889, + "learning_rate": 1.9134695650895117e-05, + "loss": 0.4633, + "step": 9225 + }, + { + "epoch": 0.15942079071053358, + "grad_norm": 1.1316598156143791, + "learning_rate": 1.91344679112451e-05, + "loss": 0.5271, + "step": 9226 + }, + { + "epoch": 0.1594380702239425, + "grad_norm": 1.3211870137132473, + "learning_rate": 1.9134240142985233e-05, + "loss": 0.736, + "step": 9227 + }, + { + "epoch": 0.1594553497373514, + "grad_norm": 0.752561381950695, + "learning_rate": 1.9134012346116232e-05, + "loss": 0.4921, + "step": 9228 + }, + { + "epoch": 0.1594726292507603, + "grad_norm": 1.3074912668103382, + "learning_rate": 1.9133784520638812e-05, + "loss": 0.7837, + "step": 9229 + }, + { + "epoch": 0.1594899087641692, + "grad_norm": 1.0770737692868637, + "learning_rate": 1.9133556666553684e-05, + "loss": 0.5702, + "step": 9230 + }, + { + "epoch": 0.1595071882775781, + "grad_norm": 1.2083602310047814, + "learning_rate": 1.913332878386156e-05, + "loss": 0.603, + "step": 9231 + }, + { + "epoch": 0.159524467790987, + "grad_norm": 0.918741696052362, + "learning_rate": 1.913310087256316e-05, + "loss": 0.5731, + "step": 9232 + }, + { + "epoch": 0.1595417473043959, + "grad_norm": 0.4087683800410573, + "learning_rate": 1.913287293265919e-05, + "loss": 0.5558, + "step": 9233 + }, + { + "epoch": 0.15955902681780482, + "grad_norm": 1.014971047920252, + "learning_rate": 1.913264496415037e-05, + "loss": 0.4259, + "step": 9234 + }, + { + "epoch": 0.15957630633121372, + "grad_norm": 0.9043588279957804, + "learning_rate": 1.913241696703741e-05, + "loss": 0.4247, + "step": 9235 + }, + { + "epoch": 0.1595935858446226, + "grad_norm": 1.3655848215430708, + "learning_rate": 1.913218894132103e-05, + "loss": 0.6131, + "step": 9236 + }, + { + "epoch": 0.1596108653580315, + "grad_norm": 1.2744118780747828, + "learning_rate": 1.9131960887001938e-05, + "loss": 0.5267, + "step": 9237 + }, + { + "epoch": 0.15962814487144042, + "grad_norm": 0.9057072478738504, + "learning_rate": 1.913173280408085e-05, + "loss": 0.5083, + "step": 9238 + }, + { + "epoch": 0.15964542438484933, + "grad_norm": 1.095417660177624, + "learning_rate": 1.9131504692558484e-05, + "loss": 0.5996, + "step": 9239 + }, + { + "epoch": 0.15966270389825823, + "grad_norm": 0.9812635729704439, + "learning_rate": 1.9131276552435547e-05, + "loss": 0.6864, + "step": 9240 + }, + { + "epoch": 0.15967998341166711, + "grad_norm": 1.2121322117225757, + "learning_rate": 1.913104838371276e-05, + "loss": 0.6898, + "step": 9241 + }, + { + "epoch": 0.15969726292507602, + "grad_norm": 1.3609380327635368, + "learning_rate": 1.9130820186390835e-05, + "loss": 0.5203, + "step": 9242 + }, + { + "epoch": 0.15971454243848493, + "grad_norm": 0.9075419644983695, + "learning_rate": 1.9130591960470488e-05, + "loss": 0.3912, + "step": 9243 + }, + { + "epoch": 0.15973182195189384, + "grad_norm": 1.511820834693825, + "learning_rate": 1.913036370595243e-05, + "loss": 0.736, + "step": 9244 + }, + { + "epoch": 0.15974910146530275, + "grad_norm": 1.6460156508303492, + "learning_rate": 1.9130135422837384e-05, + "loss": 0.7881, + "step": 9245 + }, + { + "epoch": 0.15976638097871165, + "grad_norm": 1.0403623403371351, + "learning_rate": 1.9129907111126054e-05, + "loss": 0.6004, + "step": 9246 + }, + { + "epoch": 0.15978366049212053, + "grad_norm": 0.9478519989841687, + "learning_rate": 1.9129678770819166e-05, + "loss": 0.6814, + "step": 9247 + }, + { + "epoch": 0.15980094000552944, + "grad_norm": 0.8965426480111113, + "learning_rate": 1.9129450401917432e-05, + "loss": 0.6448, + "step": 9248 + }, + { + "epoch": 0.15981821951893835, + "grad_norm": 1.2955510465607931, + "learning_rate": 1.912922200442156e-05, + "loss": 0.7202, + "step": 9249 + }, + { + "epoch": 0.15983549903234726, + "grad_norm": 1.2097789663719725, + "learning_rate": 1.9128993578332277e-05, + "loss": 0.4818, + "step": 9250 + }, + { + "epoch": 0.15985277854575616, + "grad_norm": 1.1320842305030696, + "learning_rate": 1.9128765123650286e-05, + "loss": 0.4839, + "step": 9251 + }, + { + "epoch": 0.15987005805916504, + "grad_norm": 1.218002936481032, + "learning_rate": 1.912853664037631e-05, + "loss": 0.5619, + "step": 9252 + }, + { + "epoch": 0.15988733757257395, + "grad_norm": 0.6851980177239948, + "learning_rate": 1.9128308128511066e-05, + "loss": 0.6356, + "step": 9253 + }, + { + "epoch": 0.15990461708598286, + "grad_norm": 1.1821904822272293, + "learning_rate": 1.9128079588055266e-05, + "loss": 0.417, + "step": 9254 + }, + { + "epoch": 0.15992189659939177, + "grad_norm": 1.1217052719486822, + "learning_rate": 1.9127851019009628e-05, + "loss": 0.4464, + "step": 9255 + }, + { + "epoch": 0.15993917611280067, + "grad_norm": 0.9498910445331101, + "learning_rate": 1.9127622421374866e-05, + "loss": 0.6959, + "step": 9256 + }, + { + "epoch": 0.15995645562620955, + "grad_norm": 0.961347011397225, + "learning_rate": 1.9127393795151695e-05, + "loss": 0.4806, + "step": 9257 + }, + { + "epoch": 0.15997373513961846, + "grad_norm": 0.7748843139527284, + "learning_rate": 1.9127165140340837e-05, + "loss": 0.5879, + "step": 9258 + }, + { + "epoch": 0.15999101465302737, + "grad_norm": 1.0625752173946297, + "learning_rate": 1.9126936456943003e-05, + "loss": 0.626, + "step": 9259 + }, + { + "epoch": 0.16000829416643628, + "grad_norm": 0.8601831664407604, + "learning_rate": 1.9126707744958908e-05, + "loss": 0.7912, + "step": 9260 + }, + { + "epoch": 0.16002557367984518, + "grad_norm": 0.8145647420885259, + "learning_rate": 1.9126479004389274e-05, + "loss": 0.6206, + "step": 9261 + }, + { + "epoch": 0.16004285319325406, + "grad_norm": 1.078390990797045, + "learning_rate": 1.912625023523481e-05, + "loss": 0.6616, + "step": 9262 + }, + { + "epoch": 0.16006013270666297, + "grad_norm": 0.9164612864649402, + "learning_rate": 1.912602143749624e-05, + "loss": 0.5695, + "step": 9263 + }, + { + "epoch": 0.16007741222007188, + "grad_norm": 0.7495278768501389, + "learning_rate": 1.9125792611174276e-05, + "loss": 0.6038, + "step": 9264 + }, + { + "epoch": 0.1600946917334808, + "grad_norm": 0.8633015260433188, + "learning_rate": 1.9125563756269637e-05, + "loss": 0.723, + "step": 9265 + }, + { + "epoch": 0.1601119712468897, + "grad_norm": 1.183018093685401, + "learning_rate": 1.9125334872783038e-05, + "loss": 0.4275, + "step": 9266 + }, + { + "epoch": 0.1601292507602986, + "grad_norm": 1.4288232343395433, + "learning_rate": 1.9125105960715195e-05, + "loss": 0.8392, + "step": 9267 + }, + { + "epoch": 0.16014653027370748, + "grad_norm": 0.8144257129404295, + "learning_rate": 1.912487702006683e-05, + "loss": 0.6527, + "step": 9268 + }, + { + "epoch": 0.1601638097871164, + "grad_norm": 1.2085199558069317, + "learning_rate": 1.9124648050838654e-05, + "loss": 0.4135, + "step": 9269 + }, + { + "epoch": 0.1601810893005253, + "grad_norm": 0.7984645540064156, + "learning_rate": 1.9124419053031393e-05, + "loss": 0.5447, + "step": 9270 + }, + { + "epoch": 0.1601983688139342, + "grad_norm": 0.8021696669596466, + "learning_rate": 1.912419002664575e-05, + "loss": 0.6887, + "step": 9271 + }, + { + "epoch": 0.1602156483273431, + "grad_norm": 1.0384443274650241, + "learning_rate": 1.9123960971682454e-05, + "loss": 0.6681, + "step": 9272 + }, + { + "epoch": 0.160232927840752, + "grad_norm": 0.8362549669050412, + "learning_rate": 1.9123731888142217e-05, + "loss": 0.5965, + "step": 9273 + }, + { + "epoch": 0.1602502073541609, + "grad_norm": 1.2884981111960814, + "learning_rate": 1.912350277602576e-05, + "loss": 0.7239, + "step": 9274 + }, + { + "epoch": 0.1602674868675698, + "grad_norm": 0.9102121421006508, + "learning_rate": 1.91232736353338e-05, + "loss": 0.6799, + "step": 9275 + }, + { + "epoch": 0.16028476638097872, + "grad_norm": 0.8647874271830838, + "learning_rate": 1.9123044466067052e-05, + "loss": 0.5104, + "step": 9276 + }, + { + "epoch": 0.16030204589438762, + "grad_norm": 1.4224127958652215, + "learning_rate": 1.9122815268226237e-05, + "loss": 0.5625, + "step": 9277 + }, + { + "epoch": 0.1603193254077965, + "grad_norm": 1.1261347622376794, + "learning_rate": 1.912258604181207e-05, + "loss": 0.6473, + "step": 9278 + }, + { + "epoch": 0.1603366049212054, + "grad_norm": 0.884900077469437, + "learning_rate": 1.9122356786825267e-05, + "loss": 0.521, + "step": 9279 + }, + { + "epoch": 0.16035388443461432, + "grad_norm": 0.9521795036549345, + "learning_rate": 1.9122127503266555e-05, + "loss": 0.7039, + "step": 9280 + }, + { + "epoch": 0.16037116394802323, + "grad_norm": 0.966707722242361, + "learning_rate": 1.9121898191136642e-05, + "loss": 0.5489, + "step": 9281 + }, + { + "epoch": 0.16038844346143213, + "grad_norm": 0.7709232193842788, + "learning_rate": 1.9121668850436254e-05, + "loss": 0.8655, + "step": 9282 + }, + { + "epoch": 0.16040572297484104, + "grad_norm": 1.072626085792077, + "learning_rate": 1.9121439481166105e-05, + "loss": 0.6637, + "step": 9283 + }, + { + "epoch": 0.16042300248824992, + "grad_norm": 0.8366278342414983, + "learning_rate": 1.9121210083326917e-05, + "loss": 0.5424, + "step": 9284 + }, + { + "epoch": 0.16044028200165883, + "grad_norm": 0.4477603018658289, + "learning_rate": 1.9120980656919398e-05, + "loss": 0.6987, + "step": 9285 + }, + { + "epoch": 0.16045756151506774, + "grad_norm": 0.7783739510122905, + "learning_rate": 1.912075120194428e-05, + "loss": 0.4942, + "step": 9286 + }, + { + "epoch": 0.16047484102847664, + "grad_norm": 1.3663951872296596, + "learning_rate": 1.9120521718402275e-05, + "loss": 0.7095, + "step": 9287 + }, + { + "epoch": 0.16049212054188555, + "grad_norm": 0.95318900045324, + "learning_rate": 1.9120292206294104e-05, + "loss": 0.7085, + "step": 9288 + }, + { + "epoch": 0.16050940005529443, + "grad_norm": 1.3510762738594224, + "learning_rate": 1.9120062665620485e-05, + "loss": 0.8049, + "step": 9289 + }, + { + "epoch": 0.16052667956870334, + "grad_norm": 0.8254619817594286, + "learning_rate": 1.9119833096382134e-05, + "loss": 0.4488, + "step": 9290 + }, + { + "epoch": 0.16054395908211225, + "grad_norm": 1.2231834726381194, + "learning_rate": 1.9119603498579775e-05, + "loss": 0.798, + "step": 9291 + }, + { + "epoch": 0.16056123859552116, + "grad_norm": 1.1113088930976056, + "learning_rate": 1.9119373872214124e-05, + "loss": 0.6516, + "step": 9292 + }, + { + "epoch": 0.16057851810893006, + "grad_norm": 0.5994487156524612, + "learning_rate": 1.9119144217285898e-05, + "loss": 0.7996, + "step": 9293 + }, + { + "epoch": 0.16059579762233894, + "grad_norm": 1.7254701145575424, + "learning_rate": 1.9118914533795822e-05, + "loss": 0.4982, + "step": 9294 + }, + { + "epoch": 0.16061307713574785, + "grad_norm": 0.5767250814314255, + "learning_rate": 1.9118684821744616e-05, + "loss": 0.7849, + "step": 9295 + }, + { + "epoch": 0.16063035664915676, + "grad_norm": 0.7741986708148605, + "learning_rate": 1.911845508113299e-05, + "loss": 0.5561, + "step": 9296 + }, + { + "epoch": 0.16064763616256567, + "grad_norm": 0.9013050340191759, + "learning_rate": 1.9118225311961673e-05, + "loss": 0.6767, + "step": 9297 + }, + { + "epoch": 0.16066491567597457, + "grad_norm": 0.747542075382423, + "learning_rate": 1.9117995514231383e-05, + "loss": 0.5253, + "step": 9298 + }, + { + "epoch": 0.16068219518938345, + "grad_norm": 0.615820198593228, + "learning_rate": 1.9117765687942836e-05, + "loss": 0.4413, + "step": 9299 + }, + { + "epoch": 0.16069947470279236, + "grad_norm": 1.0265847955446454, + "learning_rate": 1.9117535833096755e-05, + "loss": 0.5617, + "step": 9300 + }, + { + "epoch": 0.16071675421620127, + "grad_norm": 0.8294805039233326, + "learning_rate": 1.9117305949693857e-05, + "loss": 0.598, + "step": 9301 + }, + { + "epoch": 0.16073403372961018, + "grad_norm": 1.092356781793977, + "learning_rate": 1.9117076037734862e-05, + "loss": 0.6141, + "step": 9302 + }, + { + "epoch": 0.16075131324301908, + "grad_norm": 0.7337323870065589, + "learning_rate": 1.9116846097220496e-05, + "loss": 0.5614, + "step": 9303 + }, + { + "epoch": 0.160768592756428, + "grad_norm": 0.920165056640426, + "learning_rate": 1.9116616128151473e-05, + "loss": 0.5225, + "step": 9304 + }, + { + "epoch": 0.16078587226983687, + "grad_norm": 0.3866813595257686, + "learning_rate": 1.9116386130528516e-05, + "loss": 0.5259, + "step": 9305 + }, + { + "epoch": 0.16080315178324578, + "grad_norm": 0.9775452180815679, + "learning_rate": 1.911615610435235e-05, + "loss": 0.7169, + "step": 9306 + }, + { + "epoch": 0.1608204312966547, + "grad_norm": 0.8885493748144092, + "learning_rate": 1.911592604962368e-05, + "loss": 0.5622, + "step": 9307 + }, + { + "epoch": 0.1608377108100636, + "grad_norm": 0.9708431812193936, + "learning_rate": 1.9115695966343244e-05, + "loss": 0.4872, + "step": 9308 + }, + { + "epoch": 0.1608549903234725, + "grad_norm": 0.7785127337182788, + "learning_rate": 1.9115465854511755e-05, + "loss": 0.7213, + "step": 9309 + }, + { + "epoch": 0.16087226983688138, + "grad_norm": 1.1625924473990856, + "learning_rate": 1.9115235714129932e-05, + "loss": 0.5026, + "step": 9310 + }, + { + "epoch": 0.1608895493502903, + "grad_norm": 1.007967007066349, + "learning_rate": 1.91150055451985e-05, + "loss": 0.5416, + "step": 9311 + }, + { + "epoch": 0.1609068288636992, + "grad_norm": 0.9285071436792884, + "learning_rate": 1.9114775347718175e-05, + "loss": 0.5551, + "step": 9312 + }, + { + "epoch": 0.1609241083771081, + "grad_norm": 1.3818501699922343, + "learning_rate": 1.9114545121689684e-05, + "loss": 0.7295, + "step": 9313 + }, + { + "epoch": 0.160941387890517, + "grad_norm": 0.724724669514166, + "learning_rate": 1.9114314867113743e-05, + "loss": 0.601, + "step": 9314 + }, + { + "epoch": 0.1609586674039259, + "grad_norm": 0.9426187873632761, + "learning_rate": 1.9114084583991074e-05, + "loss": 0.5757, + "step": 9315 + }, + { + "epoch": 0.1609759469173348, + "grad_norm": 0.804170122459693, + "learning_rate": 1.9113854272322405e-05, + "loss": 0.4975, + "step": 9316 + }, + { + "epoch": 0.1609932264307437, + "grad_norm": 1.2670776946353046, + "learning_rate": 1.9113623932108448e-05, + "loss": 0.565, + "step": 9317 + }, + { + "epoch": 0.16101050594415262, + "grad_norm": 1.1951156577958404, + "learning_rate": 1.9113393563349927e-05, + "loss": 0.6548, + "step": 9318 + }, + { + "epoch": 0.16102778545756152, + "grad_norm": 1.4596982324774948, + "learning_rate": 1.9113163166047566e-05, + "loss": 0.7765, + "step": 9319 + }, + { + "epoch": 0.16104506497097043, + "grad_norm": 0.7950076494814975, + "learning_rate": 1.9112932740202084e-05, + "loss": 0.808, + "step": 9320 + }, + { + "epoch": 0.1610623444843793, + "grad_norm": 1.145413322641878, + "learning_rate": 1.9112702285814205e-05, + "loss": 0.6863, + "step": 9321 + }, + { + "epoch": 0.16107962399778822, + "grad_norm": 0.829635665853639, + "learning_rate": 1.9112471802884653e-05, + "loss": 0.4526, + "step": 9322 + }, + { + "epoch": 0.16109690351119713, + "grad_norm": 0.9210457517394718, + "learning_rate": 1.9112241291414145e-05, + "loss": 0.5921, + "step": 9323 + }, + { + "epoch": 0.16111418302460603, + "grad_norm": 1.0846598665084972, + "learning_rate": 1.9112010751403405e-05, + "loss": 0.7105, + "step": 9324 + }, + { + "epoch": 0.16113146253801494, + "grad_norm": 1.1609825034721755, + "learning_rate": 1.9111780182853154e-05, + "loss": 0.6865, + "step": 9325 + }, + { + "epoch": 0.16114874205142382, + "grad_norm": 0.8628758236759878, + "learning_rate": 1.9111549585764112e-05, + "loss": 0.3715, + "step": 9326 + }, + { + "epoch": 0.16116602156483273, + "grad_norm": 1.5503391740532637, + "learning_rate": 1.911131896013701e-05, + "loss": 0.6985, + "step": 9327 + }, + { + "epoch": 0.16118330107824164, + "grad_norm": 0.7849677329749586, + "learning_rate": 1.911108830597256e-05, + "loss": 0.7167, + "step": 9328 + }, + { + "epoch": 0.16120058059165054, + "grad_norm": 1.2476425074073534, + "learning_rate": 1.911085762327149e-05, + "loss": 0.724, + "step": 9329 + }, + { + "epoch": 0.16121786010505945, + "grad_norm": 0.9231348824354783, + "learning_rate": 1.911062691203452e-05, + "loss": 0.6907, + "step": 9330 + }, + { + "epoch": 0.16123513961846833, + "grad_norm": 1.618849544047857, + "learning_rate": 1.911039617226238e-05, + "loss": 0.5295, + "step": 9331 + }, + { + "epoch": 0.16125241913187724, + "grad_norm": 1.8594468891991864, + "learning_rate": 1.9110165403955782e-05, + "loss": 0.6627, + "step": 9332 + }, + { + "epoch": 0.16126969864528615, + "grad_norm": 1.1061866574558425, + "learning_rate": 1.9109934607115454e-05, + "loss": 0.7552, + "step": 9333 + }, + { + "epoch": 0.16128697815869505, + "grad_norm": 1.2197528342165889, + "learning_rate": 1.9109703781742122e-05, + "loss": 0.7061, + "step": 9334 + }, + { + "epoch": 0.16130425767210396, + "grad_norm": 0.8681326388132337, + "learning_rate": 1.91094729278365e-05, + "loss": 0.5239, + "step": 9335 + }, + { + "epoch": 0.16132153718551284, + "grad_norm": 0.4708045824845099, + "learning_rate": 1.9109242045399324e-05, + "loss": 0.6299, + "step": 9336 + }, + { + "epoch": 0.16133881669892175, + "grad_norm": 0.9482721624401954, + "learning_rate": 1.91090111344313e-05, + "loss": 0.6218, + "step": 9337 + }, + { + "epoch": 0.16135609621233066, + "grad_norm": 0.9557232872087816, + "learning_rate": 1.910878019493317e-05, + "loss": 0.5169, + "step": 9338 + }, + { + "epoch": 0.16137337572573957, + "grad_norm": 0.8995923392014832, + "learning_rate": 1.910854922690564e-05, + "loss": 0.3688, + "step": 9339 + }, + { + "epoch": 0.16139065523914847, + "grad_norm": 1.1299868369771946, + "learning_rate": 1.910831823034945e-05, + "loss": 0.5903, + "step": 9340 + }, + { + "epoch": 0.16140793475255738, + "grad_norm": 0.7179001882755458, + "learning_rate": 1.9108087205265308e-05, + "loss": 0.4885, + "step": 9341 + }, + { + "epoch": 0.16142521426596626, + "grad_norm": 1.0225630742466483, + "learning_rate": 1.9107856151653948e-05, + "loss": 0.7325, + "step": 9342 + }, + { + "epoch": 0.16144249377937517, + "grad_norm": 1.189137075107693, + "learning_rate": 1.910762506951609e-05, + "loss": 0.839, + "step": 9343 + }, + { + "epoch": 0.16145977329278408, + "grad_norm": 1.266529172390625, + "learning_rate": 1.9107393958852457e-05, + "loss": 0.7263, + "step": 9344 + }, + { + "epoch": 0.16147705280619298, + "grad_norm": 0.716498756905699, + "learning_rate": 1.9107162819663776e-05, + "loss": 0.3749, + "step": 9345 + }, + { + "epoch": 0.1614943323196019, + "grad_norm": 1.192441859763073, + "learning_rate": 1.9106931651950766e-05, + "loss": 0.5894, + "step": 9346 + }, + { + "epoch": 0.16151161183301077, + "grad_norm": 0.8945085509132901, + "learning_rate": 1.910670045571416e-05, + "loss": 0.8369, + "step": 9347 + }, + { + "epoch": 0.16152889134641968, + "grad_norm": 0.7351059671070143, + "learning_rate": 1.9106469230954666e-05, + "loss": 0.3251, + "step": 9348 + }, + { + "epoch": 0.1615461708598286, + "grad_norm": 1.9072198004221992, + "learning_rate": 1.910623797767303e-05, + "loss": 0.6531, + "step": 9349 + }, + { + "epoch": 0.1615634503732375, + "grad_norm": 0.8962134414233346, + "learning_rate": 1.9106006695869953e-05, + "loss": 0.4358, + "step": 9350 + }, + { + "epoch": 0.1615807298866464, + "grad_norm": 1.0297594701897541, + "learning_rate": 1.910577538554618e-05, + "loss": 0.5564, + "step": 9351 + }, + { + "epoch": 0.16159800940005528, + "grad_norm": 1.0526787596432485, + "learning_rate": 1.910554404670242e-05, + "loss": 0.5579, + "step": 9352 + }, + { + "epoch": 0.1616152889134642, + "grad_norm": 0.6054561855602751, + "learning_rate": 1.910531267933941e-05, + "loss": 0.5119, + "step": 9353 + }, + { + "epoch": 0.1616325684268731, + "grad_norm": 1.3752259788825123, + "learning_rate": 1.9105081283457867e-05, + "loss": 0.6967, + "step": 9354 + }, + { + "epoch": 0.161649847940282, + "grad_norm": 0.8155819112285826, + "learning_rate": 1.9104849859058516e-05, + "loss": 0.5481, + "step": 9355 + }, + { + "epoch": 0.1616671274536909, + "grad_norm": 1.0462593176895778, + "learning_rate": 1.9104618406142082e-05, + "loss": 0.6484, + "step": 9356 + }, + { + "epoch": 0.16168440696709982, + "grad_norm": 1.0864691955790011, + "learning_rate": 1.9104386924709292e-05, + "loss": 0.5667, + "step": 9357 + }, + { + "epoch": 0.1617016864805087, + "grad_norm": 1.2503104776976484, + "learning_rate": 1.9104155414760873e-05, + "loss": 0.7695, + "step": 9358 + }, + { + "epoch": 0.1617189659939176, + "grad_norm": 1.050483152811433, + "learning_rate": 1.9103923876297544e-05, + "loss": 0.4389, + "step": 9359 + }, + { + "epoch": 0.16173624550732651, + "grad_norm": 0.6133434810134675, + "learning_rate": 1.9103692309320037e-05, + "loss": 0.5003, + "step": 9360 + }, + { + "epoch": 0.16175352502073542, + "grad_norm": 0.8200372830223628, + "learning_rate": 1.9103460713829073e-05, + "loss": 0.6068, + "step": 9361 + }, + { + "epoch": 0.16177080453414433, + "grad_norm": 0.8786048645222317, + "learning_rate": 1.9103229089825377e-05, + "loss": 0.5232, + "step": 9362 + }, + { + "epoch": 0.1617880840475532, + "grad_norm": 1.3591921781091143, + "learning_rate": 1.9102997437309675e-05, + "loss": 0.5829, + "step": 9363 + }, + { + "epoch": 0.16180536356096212, + "grad_norm": 0.8961753594666867, + "learning_rate": 1.910276575628269e-05, + "loss": 0.6103, + "step": 9364 + }, + { + "epoch": 0.16182264307437103, + "grad_norm": 1.1597863966755022, + "learning_rate": 1.9102534046745158e-05, + "loss": 0.6842, + "step": 9365 + }, + { + "epoch": 0.16183992258777993, + "grad_norm": 1.2807267278494747, + "learning_rate": 1.9102302308697794e-05, + "loss": 0.7442, + "step": 9366 + }, + { + "epoch": 0.16185720210118884, + "grad_norm": 0.9805261103821612, + "learning_rate": 1.910207054214133e-05, + "loss": 0.5567, + "step": 9367 + }, + { + "epoch": 0.16187448161459772, + "grad_norm": 0.8973974183553616, + "learning_rate": 1.9101838747076488e-05, + "loss": 0.5868, + "step": 9368 + }, + { + "epoch": 0.16189176112800663, + "grad_norm": 0.553286354095106, + "learning_rate": 1.9101606923503993e-05, + "loss": 0.8304, + "step": 9369 + }, + { + "epoch": 0.16190904064141554, + "grad_norm": 0.8221017789168341, + "learning_rate": 1.9101375071424574e-05, + "loss": 0.5018, + "step": 9370 + }, + { + "epoch": 0.16192632015482444, + "grad_norm": 0.8216444699983849, + "learning_rate": 1.910114319083896e-05, + "loss": 0.5424, + "step": 9371 + }, + { + "epoch": 0.16194359966823335, + "grad_norm": 1.2738555506809788, + "learning_rate": 1.910091128174787e-05, + "loss": 0.701, + "step": 9372 + }, + { + "epoch": 0.16196087918164226, + "grad_norm": 1.3632450960248734, + "learning_rate": 1.9100679344152035e-05, + "loss": 0.6346, + "step": 9373 + }, + { + "epoch": 0.16197815869505114, + "grad_norm": 0.6754832912002401, + "learning_rate": 1.910044737805218e-05, + "loss": 0.4205, + "step": 9374 + }, + { + "epoch": 0.16199543820846005, + "grad_norm": 1.6337700934725878, + "learning_rate": 1.9100215383449035e-05, + "loss": 0.4968, + "step": 9375 + }, + { + "epoch": 0.16201271772186895, + "grad_norm": 1.0730610574806931, + "learning_rate": 1.9099983360343323e-05, + "loss": 0.8439, + "step": 9376 + }, + { + "epoch": 0.16202999723527786, + "grad_norm": 0.9449795722128933, + "learning_rate": 1.909975130873577e-05, + "loss": 0.7112, + "step": 9377 + }, + { + "epoch": 0.16204727674868677, + "grad_norm": 1.0905708661184728, + "learning_rate": 1.9099519228627106e-05, + "loss": 0.7735, + "step": 9378 + }, + { + "epoch": 0.16206455626209565, + "grad_norm": 1.1810854440795602, + "learning_rate": 1.9099287120018056e-05, + "loss": 0.4758, + "step": 9379 + }, + { + "epoch": 0.16208183577550456, + "grad_norm": 0.8795600771350612, + "learning_rate": 1.9099054982909347e-05, + "loss": 0.4614, + "step": 9380 + }, + { + "epoch": 0.16209911528891346, + "grad_norm": 1.297503245966534, + "learning_rate": 1.9098822817301706e-05, + "loss": 0.6116, + "step": 9381 + }, + { + "epoch": 0.16211639480232237, + "grad_norm": 0.9116872685239006, + "learning_rate": 1.9098590623195858e-05, + "loss": 0.7452, + "step": 9382 + }, + { + "epoch": 0.16213367431573128, + "grad_norm": 0.9225931029856845, + "learning_rate": 1.9098358400592535e-05, + "loss": 0.9099, + "step": 9383 + }, + { + "epoch": 0.16215095382914016, + "grad_norm": 0.9271379058478095, + "learning_rate": 1.9098126149492462e-05, + "loss": 0.5334, + "step": 9384 + }, + { + "epoch": 0.16216823334254907, + "grad_norm": 0.92133114240838, + "learning_rate": 1.909789386989637e-05, + "loss": 0.8422, + "step": 9385 + }, + { + "epoch": 0.16218551285595798, + "grad_norm": 2.1468968443497336, + "learning_rate": 1.909766156180498e-05, + "loss": 0.6377, + "step": 9386 + }, + { + "epoch": 0.16220279236936688, + "grad_norm": 0.9484551429906718, + "learning_rate": 1.909742922521902e-05, + "loss": 0.69, + "step": 9387 + }, + { + "epoch": 0.1622200718827758, + "grad_norm": 0.6925491676312856, + "learning_rate": 1.9097196860139223e-05, + "loss": 0.6339, + "step": 9388 + }, + { + "epoch": 0.16223735139618467, + "grad_norm": 1.0722948793997535, + "learning_rate": 1.9096964466566312e-05, + "loss": 0.6563, + "step": 9389 + }, + { + "epoch": 0.16225463090959358, + "grad_norm": 1.3136107716089944, + "learning_rate": 1.909673204450102e-05, + "loss": 0.7653, + "step": 9390 + }, + { + "epoch": 0.16227191042300249, + "grad_norm": 1.0263560660562216, + "learning_rate": 1.909649959394407e-05, + "loss": 0.7536, + "step": 9391 + }, + { + "epoch": 0.1622891899364114, + "grad_norm": 0.6567160741967615, + "learning_rate": 1.9096267114896193e-05, + "loss": 0.5349, + "step": 9392 + }, + { + "epoch": 0.1623064694498203, + "grad_norm": 1.0219282919432553, + "learning_rate": 1.9096034607358115e-05, + "loss": 0.5857, + "step": 9393 + }, + { + "epoch": 0.1623237489632292, + "grad_norm": 1.3726670302692485, + "learning_rate": 1.9095802071330565e-05, + "loss": 0.712, + "step": 9394 + }, + { + "epoch": 0.1623410284766381, + "grad_norm": 1.066391440589379, + "learning_rate": 1.9095569506814272e-05, + "loss": 0.6028, + "step": 9395 + }, + { + "epoch": 0.162358307990047, + "grad_norm": 0.9024042147613863, + "learning_rate": 1.9095336913809964e-05, + "loss": 0.6496, + "step": 9396 + }, + { + "epoch": 0.1623755875034559, + "grad_norm": 0.4243058000477803, + "learning_rate": 1.9095104292318372e-05, + "loss": 0.5754, + "step": 9397 + }, + { + "epoch": 0.1623928670168648, + "grad_norm": 0.8082188188167843, + "learning_rate": 1.9094871642340222e-05, + "loss": 0.6578, + "step": 9398 + }, + { + "epoch": 0.16241014653027372, + "grad_norm": 0.9184128601427206, + "learning_rate": 1.909463896387624e-05, + "loss": 0.5636, + "step": 9399 + }, + { + "epoch": 0.1624274260436826, + "grad_norm": 1.3333702943076633, + "learning_rate": 1.9094406256927158e-05, + "loss": 0.7015, + "step": 9400 + }, + { + "epoch": 0.1624447055570915, + "grad_norm": 1.2910473220684935, + "learning_rate": 1.9094173521493707e-05, + "loss": 0.5301, + "step": 9401 + }, + { + "epoch": 0.16246198507050041, + "grad_norm": 1.2674649920361913, + "learning_rate": 1.909394075757661e-05, + "loss": 0.7682, + "step": 9402 + }, + { + "epoch": 0.16247926458390932, + "grad_norm": 1.2572753695604006, + "learning_rate": 1.90937079651766e-05, + "loss": 0.7767, + "step": 9403 + }, + { + "epoch": 0.16249654409731823, + "grad_norm": 0.775738005348973, + "learning_rate": 1.9093475144294408e-05, + "loss": 0.5291, + "step": 9404 + }, + { + "epoch": 0.1625138236107271, + "grad_norm": 0.7389392578365852, + "learning_rate": 1.9093242294930757e-05, + "loss": 0.5309, + "step": 9405 + }, + { + "epoch": 0.16253110312413602, + "grad_norm": 1.0659191872877876, + "learning_rate": 1.9093009417086382e-05, + "loss": 0.6786, + "step": 9406 + }, + { + "epoch": 0.16254838263754492, + "grad_norm": 0.9859934325845258, + "learning_rate": 1.909277651076201e-05, + "loss": 0.6812, + "step": 9407 + }, + { + "epoch": 0.16256566215095383, + "grad_norm": 1.1139986042655836, + "learning_rate": 1.9092543575958374e-05, + "loss": 0.4925, + "step": 9408 + }, + { + "epoch": 0.16258294166436274, + "grad_norm": 0.7245287542556824, + "learning_rate": 1.90923106126762e-05, + "loss": 0.8384, + "step": 9409 + }, + { + "epoch": 0.16260022117777165, + "grad_norm": 0.8247167848595277, + "learning_rate": 1.9092077620916215e-05, + "loss": 0.6553, + "step": 9410 + }, + { + "epoch": 0.16261750069118053, + "grad_norm": 1.1185005882943508, + "learning_rate": 1.9091844600679153e-05, + "loss": 0.9546, + "step": 9411 + }, + { + "epoch": 0.16263478020458944, + "grad_norm": 0.656403239939647, + "learning_rate": 1.9091611551965746e-05, + "loss": 0.4439, + "step": 9412 + }, + { + "epoch": 0.16265205971799834, + "grad_norm": 0.8628265586387691, + "learning_rate": 1.9091378474776717e-05, + "loss": 0.5416, + "step": 9413 + }, + { + "epoch": 0.16266933923140725, + "grad_norm": 1.089825855967675, + "learning_rate": 1.90911453691128e-05, + "loss": 0.5292, + "step": 9414 + }, + { + "epoch": 0.16268661874481616, + "grad_norm": 1.0571881843954591, + "learning_rate": 1.9090912234974722e-05, + "loss": 0.5648, + "step": 9415 + }, + { + "epoch": 0.16270389825822504, + "grad_norm": 0.843383330957166, + "learning_rate": 1.909067907236322e-05, + "loss": 0.8973, + "step": 9416 + }, + { + "epoch": 0.16272117777163395, + "grad_norm": 0.7074032105944636, + "learning_rate": 1.9090445881279024e-05, + "loss": 0.7095, + "step": 9417 + }, + { + "epoch": 0.16273845728504285, + "grad_norm": 0.7058251835389565, + "learning_rate": 1.9090212661722852e-05, + "loss": 0.6071, + "step": 9418 + }, + { + "epoch": 0.16275573679845176, + "grad_norm": 0.8650964917251889, + "learning_rate": 1.908997941369545e-05, + "loss": 0.6006, + "step": 9419 + }, + { + "epoch": 0.16277301631186067, + "grad_norm": 0.9726882816596111, + "learning_rate": 1.908974613719754e-05, + "loss": 0.5077, + "step": 9420 + }, + { + "epoch": 0.16279029582526955, + "grad_norm": 0.9895973818232572, + "learning_rate": 1.9089512832229854e-05, + "loss": 0.6104, + "step": 9421 + }, + { + "epoch": 0.16280757533867846, + "grad_norm": 0.6880426543623097, + "learning_rate": 1.908927949879312e-05, + "loss": 0.4737, + "step": 9422 + }, + { + "epoch": 0.16282485485208736, + "grad_norm": 0.6875130970766697, + "learning_rate": 1.9089046136888075e-05, + "loss": 0.5406, + "step": 9423 + }, + { + "epoch": 0.16284213436549627, + "grad_norm": 0.9066414322855945, + "learning_rate": 1.9088812746515444e-05, + "loss": 0.568, + "step": 9424 + }, + { + "epoch": 0.16285941387890518, + "grad_norm": 1.0236292502244084, + "learning_rate": 1.9088579327675964e-05, + "loss": 0.5396, + "step": 9425 + }, + { + "epoch": 0.16287669339231406, + "grad_norm": 0.9871427707496204, + "learning_rate": 1.9088345880370363e-05, + "loss": 0.5925, + "step": 9426 + }, + { + "epoch": 0.16289397290572297, + "grad_norm": 1.1649518620858736, + "learning_rate": 1.908811240459937e-05, + "loss": 0.5677, + "step": 9427 + }, + { + "epoch": 0.16291125241913187, + "grad_norm": 0.6729992626838351, + "learning_rate": 1.9087878900363718e-05, + "loss": 0.7143, + "step": 9428 + }, + { + "epoch": 0.16292853193254078, + "grad_norm": 0.9449360566958191, + "learning_rate": 1.9087645367664137e-05, + "loss": 0.6, + "step": 9429 + }, + { + "epoch": 0.1629458114459497, + "grad_norm": 0.5949681976074221, + "learning_rate": 1.9087411806501363e-05, + "loss": 0.7373, + "step": 9430 + }, + { + "epoch": 0.1629630909593586, + "grad_norm": 1.0170156029350603, + "learning_rate": 1.9087178216876124e-05, + "loss": 0.5794, + "step": 9431 + }, + { + "epoch": 0.16298037047276748, + "grad_norm": 0.8560592733203488, + "learning_rate": 1.9086944598789155e-05, + "loss": 0.6395, + "step": 9432 + }, + { + "epoch": 0.16299764998617639, + "grad_norm": 0.6530051971629983, + "learning_rate": 1.908671095224118e-05, + "loss": 0.3809, + "step": 9433 + }, + { + "epoch": 0.1630149294995853, + "grad_norm": 0.7973441412525272, + "learning_rate": 1.908647727723294e-05, + "loss": 0.6403, + "step": 9434 + }, + { + "epoch": 0.1630322090129942, + "grad_norm": 0.7514698022472363, + "learning_rate": 1.908624357376516e-05, + "loss": 0.8885, + "step": 9435 + }, + { + "epoch": 0.1630494885264031, + "grad_norm": 1.4994590407678954, + "learning_rate": 1.9086009841838576e-05, + "loss": 0.7694, + "step": 9436 + }, + { + "epoch": 0.163066768039812, + "grad_norm": 0.5655776396139462, + "learning_rate": 1.9085776081453918e-05, + "loss": 0.4829, + "step": 9437 + }, + { + "epoch": 0.1630840475532209, + "grad_norm": 1.0026499636272053, + "learning_rate": 1.908554229261192e-05, + "loss": 0.5355, + "step": 9438 + }, + { + "epoch": 0.1631013270666298, + "grad_norm": 1.3156870496751019, + "learning_rate": 1.9085308475313308e-05, + "loss": 0.555, + "step": 9439 + }, + { + "epoch": 0.1631186065800387, + "grad_norm": 0.9397299959536297, + "learning_rate": 1.9085074629558824e-05, + "loss": 0.7309, + "step": 9440 + }, + { + "epoch": 0.16313588609344762, + "grad_norm": 1.351106599906419, + "learning_rate": 1.9084840755349196e-05, + "loss": 0.7733, + "step": 9441 + }, + { + "epoch": 0.1631531656068565, + "grad_norm": 0.9819976451190628, + "learning_rate": 1.9084606852685157e-05, + "loss": 0.5918, + "step": 9442 + }, + { + "epoch": 0.1631704451202654, + "grad_norm": 1.3068585503138055, + "learning_rate": 1.9084372921567435e-05, + "loss": 0.9614, + "step": 9443 + }, + { + "epoch": 0.1631877246336743, + "grad_norm": 0.4913172574851518, + "learning_rate": 1.9084138961996768e-05, + "loss": 0.5713, + "step": 9444 + }, + { + "epoch": 0.16320500414708322, + "grad_norm": 0.9515641171905033, + "learning_rate": 1.9083904973973892e-05, + "loss": 0.5459, + "step": 9445 + }, + { + "epoch": 0.16322228366049213, + "grad_norm": 1.2698113018327353, + "learning_rate": 1.908367095749953e-05, + "loss": 0.6466, + "step": 9446 + }, + { + "epoch": 0.16323956317390104, + "grad_norm": 1.069249815761162, + "learning_rate": 1.908343691257442e-05, + "loss": 0.5995, + "step": 9447 + }, + { + "epoch": 0.16325684268730992, + "grad_norm": 0.6680576085939447, + "learning_rate": 1.90832028391993e-05, + "loss": 0.6006, + "step": 9448 + }, + { + "epoch": 0.16327412220071882, + "grad_norm": 1.1944524898128723, + "learning_rate": 1.9082968737374896e-05, + "loss": 0.4955, + "step": 9449 + }, + { + "epoch": 0.16329140171412773, + "grad_norm": 1.1365097887663933, + "learning_rate": 1.908273460710194e-05, + "loss": 0.5876, + "step": 9450 + }, + { + "epoch": 0.16330868122753664, + "grad_norm": 0.5525145370847335, + "learning_rate": 1.9082500448381172e-05, + "loss": 0.7314, + "step": 9451 + }, + { + "epoch": 0.16332596074094555, + "grad_norm": 0.3548501259059124, + "learning_rate": 1.908226626121332e-05, + "loss": 0.4618, + "step": 9452 + }, + { + "epoch": 0.16334324025435443, + "grad_norm": 1.3320909632390923, + "learning_rate": 1.9082032045599126e-05, + "loss": 0.7854, + "step": 9453 + }, + { + "epoch": 0.16336051976776333, + "grad_norm": 1.2177433057310616, + "learning_rate": 1.9081797801539315e-05, + "loss": 0.5249, + "step": 9454 + }, + { + "epoch": 0.16337779928117224, + "grad_norm": 1.328765912483243, + "learning_rate": 1.908156352903462e-05, + "loss": 0.7704, + "step": 9455 + }, + { + "epoch": 0.16339507879458115, + "grad_norm": 1.2808104005585226, + "learning_rate": 1.9081329228085782e-05, + "loss": 0.673, + "step": 9456 + }, + { + "epoch": 0.16341235830799006, + "grad_norm": 1.1623301887047515, + "learning_rate": 1.9081094898693527e-05, + "loss": 0.6862, + "step": 9457 + }, + { + "epoch": 0.16342963782139894, + "grad_norm": 0.40536962293344686, + "learning_rate": 1.9080860540858593e-05, + "loss": 0.6925, + "step": 9458 + }, + { + "epoch": 0.16344691733480785, + "grad_norm": 1.127002001731189, + "learning_rate": 1.9080626154581717e-05, + "loss": 0.5857, + "step": 9459 + }, + { + "epoch": 0.16346419684821675, + "grad_norm": 1.4090548153583056, + "learning_rate": 1.9080391739863625e-05, + "loss": 0.7744, + "step": 9460 + }, + { + "epoch": 0.16348147636162566, + "grad_norm": 1.0715614181880768, + "learning_rate": 1.908015729670506e-05, + "loss": 0.5893, + "step": 9461 + }, + { + "epoch": 0.16349875587503457, + "grad_norm": 0.8606219831730617, + "learning_rate": 1.907992282510675e-05, + "loss": 0.632, + "step": 9462 + }, + { + "epoch": 0.16351603538844345, + "grad_norm": 1.2935187028306152, + "learning_rate": 1.9079688325069433e-05, + "loss": 0.6255, + "step": 9463 + }, + { + "epoch": 0.16353331490185236, + "grad_norm": 1.056166920528558, + "learning_rate": 1.907945379659384e-05, + "loss": 0.618, + "step": 9464 + }, + { + "epoch": 0.16355059441526126, + "grad_norm": 0.815723661472038, + "learning_rate": 1.9079219239680712e-05, + "loss": 0.5561, + "step": 9465 + }, + { + "epoch": 0.16356787392867017, + "grad_norm": 1.3633237386052648, + "learning_rate": 1.9078984654330774e-05, + "loss": 0.799, + "step": 9466 + }, + { + "epoch": 0.16358515344207908, + "grad_norm": 1.0451209046052523, + "learning_rate": 1.9078750040544768e-05, + "loss": 0.6509, + "step": 9467 + }, + { + "epoch": 0.163602432955488, + "grad_norm": 0.7335247170285195, + "learning_rate": 1.9078515398323428e-05, + "loss": 0.713, + "step": 9468 + }, + { + "epoch": 0.16361971246889687, + "grad_norm": 1.0847918046442928, + "learning_rate": 1.9078280727667487e-05, + "loss": 0.5734, + "step": 9469 + }, + { + "epoch": 0.16363699198230577, + "grad_norm": 0.7708038780216081, + "learning_rate": 1.9078046028577684e-05, + "loss": 0.7906, + "step": 9470 + }, + { + "epoch": 0.16365427149571468, + "grad_norm": 0.7879638479378811, + "learning_rate": 1.9077811301054746e-05, + "loss": 0.6122, + "step": 9471 + }, + { + "epoch": 0.1636715510091236, + "grad_norm": 1.021148975350108, + "learning_rate": 1.9077576545099416e-05, + "loss": 0.7641, + "step": 9472 + }, + { + "epoch": 0.1636888305225325, + "grad_norm": 1.0354512539018934, + "learning_rate": 1.9077341760712426e-05, + "loss": 0.6232, + "step": 9473 + }, + { + "epoch": 0.16370611003594138, + "grad_norm": 1.091431261900071, + "learning_rate": 1.9077106947894514e-05, + "loss": 0.683, + "step": 9474 + }, + { + "epoch": 0.16372338954935028, + "grad_norm": 0.4663471795024406, + "learning_rate": 1.9076872106646405e-05, + "loss": 0.6156, + "step": 9475 + }, + { + "epoch": 0.1637406690627592, + "grad_norm": 1.2912959910425204, + "learning_rate": 1.9076637236968852e-05, + "loss": 0.7507, + "step": 9476 + }, + { + "epoch": 0.1637579485761681, + "grad_norm": 1.192520312795895, + "learning_rate": 1.9076402338862576e-05, + "loss": 0.8743, + "step": 9477 + }, + { + "epoch": 0.163775228089577, + "grad_norm": 0.8134355566179773, + "learning_rate": 1.9076167412328322e-05, + "loss": 0.5016, + "step": 9478 + }, + { + "epoch": 0.1637925076029859, + "grad_norm": 1.1439622865004186, + "learning_rate": 1.9075932457366816e-05, + "loss": 0.6208, + "step": 9479 + }, + { + "epoch": 0.1638097871163948, + "grad_norm": 1.1307762333301752, + "learning_rate": 1.9075697473978806e-05, + "loss": 0.7261, + "step": 9480 + }, + { + "epoch": 0.1638270666298037, + "grad_norm": 1.1198990289923327, + "learning_rate": 1.9075462462165018e-05, + "loss": 0.6173, + "step": 9481 + }, + { + "epoch": 0.1638443461432126, + "grad_norm": 0.8174543994357052, + "learning_rate": 1.9075227421926193e-05, + "loss": 0.7941, + "step": 9482 + }, + { + "epoch": 0.16386162565662152, + "grad_norm": 1.1806868956891827, + "learning_rate": 1.9074992353263067e-05, + "loss": 0.8813, + "step": 9483 + }, + { + "epoch": 0.16387890517003043, + "grad_norm": 0.5767695905688376, + "learning_rate": 1.9074757256176375e-05, + "loss": 0.5866, + "step": 9484 + }, + { + "epoch": 0.1638961846834393, + "grad_norm": 0.5556739199902648, + "learning_rate": 1.9074522130666852e-05, + "loss": 0.7932, + "step": 9485 + }, + { + "epoch": 0.1639134641968482, + "grad_norm": 1.0368047852699058, + "learning_rate": 1.907428697673524e-05, + "loss": 0.6441, + "step": 9486 + }, + { + "epoch": 0.16393074371025712, + "grad_norm": 1.135582058806331, + "learning_rate": 1.9074051794382267e-05, + "loss": 0.5318, + "step": 9487 + }, + { + "epoch": 0.16394802322366603, + "grad_norm": 2.4924959106473037, + "learning_rate": 1.9073816583608676e-05, + "loss": 0.6029, + "step": 9488 + }, + { + "epoch": 0.16396530273707494, + "grad_norm": 1.124176886452355, + "learning_rate": 1.9073581344415202e-05, + "loss": 0.6594, + "step": 9489 + }, + { + "epoch": 0.16398258225048382, + "grad_norm": 1.443455362038527, + "learning_rate": 1.907334607680258e-05, + "loss": 0.7381, + "step": 9490 + }, + { + "epoch": 0.16399986176389272, + "grad_norm": 1.0138019625009294, + "learning_rate": 1.907311078077155e-05, + "loss": 0.6241, + "step": 9491 + }, + { + "epoch": 0.16401714127730163, + "grad_norm": 1.0473181688169557, + "learning_rate": 1.907287545632285e-05, + "loss": 0.5184, + "step": 9492 + }, + { + "epoch": 0.16403442079071054, + "grad_norm": 1.2622985525571206, + "learning_rate": 1.907264010345721e-05, + "loss": 0.7894, + "step": 9493 + }, + { + "epoch": 0.16405170030411945, + "grad_norm": 1.1131953107901533, + "learning_rate": 1.9072404722175376e-05, + "loss": 0.8331, + "step": 9494 + }, + { + "epoch": 0.16406897981752833, + "grad_norm": 1.1736177169120485, + "learning_rate": 1.9072169312478076e-05, + "loss": 0.6141, + "step": 9495 + }, + { + "epoch": 0.16408625933093723, + "grad_norm": 0.7088009891941675, + "learning_rate": 1.9071933874366056e-05, + "loss": 0.4415, + "step": 9496 + }, + { + "epoch": 0.16410353884434614, + "grad_norm": 0.6707980714356307, + "learning_rate": 1.907169840784005e-05, + "loss": 0.5604, + "step": 9497 + }, + { + "epoch": 0.16412081835775505, + "grad_norm": 1.1384157126132786, + "learning_rate": 1.9071462912900792e-05, + "loss": 0.6263, + "step": 9498 + }, + { + "epoch": 0.16413809787116396, + "grad_norm": 1.1834771908472983, + "learning_rate": 1.9071227389549024e-05, + "loss": 0.703, + "step": 9499 + }, + { + "epoch": 0.16415537738457284, + "grad_norm": 1.6386883404896464, + "learning_rate": 1.907099183778548e-05, + "loss": 0.6346, + "step": 9500 + }, + { + "epoch": 0.16417265689798174, + "grad_norm": 0.8161753415746095, + "learning_rate": 1.9070756257610905e-05, + "loss": 0.4495, + "step": 9501 + }, + { + "epoch": 0.16418993641139065, + "grad_norm": 0.4134974246387441, + "learning_rate": 1.907052064902603e-05, + "loss": 0.598, + "step": 9502 + }, + { + "epoch": 0.16420721592479956, + "grad_norm": 0.9362723276748754, + "learning_rate": 1.9070285012031596e-05, + "loss": 0.7184, + "step": 9503 + }, + { + "epoch": 0.16422449543820847, + "grad_norm": 1.0621849661460008, + "learning_rate": 1.9070049346628338e-05, + "loss": 0.469, + "step": 9504 + }, + { + "epoch": 0.16424177495161738, + "grad_norm": 0.7208859862233901, + "learning_rate": 1.9069813652816997e-05, + "loss": 0.4484, + "step": 9505 + }, + { + "epoch": 0.16425905446502626, + "grad_norm": 0.6671337459026923, + "learning_rate": 1.9069577930598312e-05, + "loss": 0.5285, + "step": 9506 + }, + { + "epoch": 0.16427633397843516, + "grad_norm": 2.1200327162500576, + "learning_rate": 1.906934217997302e-05, + "loss": 0.6346, + "step": 9507 + }, + { + "epoch": 0.16429361349184407, + "grad_norm": 1.006443026013467, + "learning_rate": 1.9069106400941856e-05, + "loss": 0.6427, + "step": 9508 + }, + { + "epoch": 0.16431089300525298, + "grad_norm": 0.8224512293085743, + "learning_rate": 1.9068870593505565e-05, + "loss": 0.4527, + "step": 9509 + }, + { + "epoch": 0.16432817251866189, + "grad_norm": 0.8091016363005065, + "learning_rate": 1.9068634757664878e-05, + "loss": 0.5556, + "step": 9510 + }, + { + "epoch": 0.16434545203207077, + "grad_norm": 1.3608082633173053, + "learning_rate": 1.906839889342054e-05, + "loss": 0.7215, + "step": 9511 + }, + { + "epoch": 0.16436273154547967, + "grad_norm": 1.2574941452281898, + "learning_rate": 1.9068163000773284e-05, + "loss": 0.4904, + "step": 9512 + }, + { + "epoch": 0.16438001105888858, + "grad_norm": 1.4698771073275037, + "learning_rate": 1.9067927079723857e-05, + "loss": 0.6221, + "step": 9513 + }, + { + "epoch": 0.1643972905722975, + "grad_norm": 0.9917247488736043, + "learning_rate": 1.9067691130272996e-05, + "loss": 0.6036, + "step": 9514 + }, + { + "epoch": 0.1644145700857064, + "grad_norm": 1.0135619511618705, + "learning_rate": 1.906745515242143e-05, + "loss": 0.4701, + "step": 9515 + }, + { + "epoch": 0.16443184959911528, + "grad_norm": 0.951195888115265, + "learning_rate": 1.906721914616991e-05, + "loss": 0.4972, + "step": 9516 + }, + { + "epoch": 0.16444912911252418, + "grad_norm": 1.4444691132949732, + "learning_rate": 1.9066983111519168e-05, + "loss": 0.5463, + "step": 9517 + }, + { + "epoch": 0.1644664086259331, + "grad_norm": 1.2444065522350545, + "learning_rate": 1.9066747048469946e-05, + "loss": 0.965, + "step": 9518 + }, + { + "epoch": 0.164483688139342, + "grad_norm": 0.9944310787324221, + "learning_rate": 1.9066510957022983e-05, + "loss": 0.7188, + "step": 9519 + }, + { + "epoch": 0.1645009676527509, + "grad_norm": 0.9080783203320577, + "learning_rate": 1.9066274837179023e-05, + "loss": 0.4008, + "step": 9520 + }, + { + "epoch": 0.16451824716615981, + "grad_norm": 0.6979435772369802, + "learning_rate": 1.90660386889388e-05, + "loss": 0.6677, + "step": 9521 + }, + { + "epoch": 0.1645355266795687, + "grad_norm": 0.9423786411100803, + "learning_rate": 1.906580251230305e-05, + "loss": 0.5744, + "step": 9522 + }, + { + "epoch": 0.1645528061929776, + "grad_norm": 0.9095821729287681, + "learning_rate": 1.906556630727252e-05, + "loss": 0.5547, + "step": 9523 + }, + { + "epoch": 0.1645700857063865, + "grad_norm": 1.2926429905265417, + "learning_rate": 1.9065330073847948e-05, + "loss": 0.6849, + "step": 9524 + }, + { + "epoch": 0.16458736521979542, + "grad_norm": 1.200540598959015, + "learning_rate": 1.9065093812030072e-05, + "loss": 0.5963, + "step": 9525 + }, + { + "epoch": 0.16460464473320432, + "grad_norm": 1.136667438139675, + "learning_rate": 1.9064857521819635e-05, + "loss": 0.5981, + "step": 9526 + }, + { + "epoch": 0.1646219242466132, + "grad_norm": 0.5662916250656014, + "learning_rate": 1.9064621203217374e-05, + "loss": 0.3098, + "step": 9527 + }, + { + "epoch": 0.1646392037600221, + "grad_norm": 1.6229152933808932, + "learning_rate": 1.906438485622403e-05, + "loss": 0.6369, + "step": 9528 + }, + { + "epoch": 0.16465648327343102, + "grad_norm": 1.60896223457799, + "learning_rate": 1.9064148480840344e-05, + "loss": 0.754, + "step": 9529 + }, + { + "epoch": 0.16467376278683993, + "grad_norm": 1.1350011348069133, + "learning_rate": 1.9063912077067057e-05, + "loss": 0.4365, + "step": 9530 + }, + { + "epoch": 0.16469104230024884, + "grad_norm": 1.7040504457771017, + "learning_rate": 1.906367564490491e-05, + "loss": 0.755, + "step": 9531 + }, + { + "epoch": 0.16470832181365772, + "grad_norm": 0.8679787663197082, + "learning_rate": 1.906343918435464e-05, + "loss": 0.5261, + "step": 9532 + }, + { + "epoch": 0.16472560132706662, + "grad_norm": 1.7311271481727744, + "learning_rate": 1.906320269541699e-05, + "loss": 0.5604, + "step": 9533 + }, + { + "epoch": 0.16474288084047553, + "grad_norm": 1.3176841254401952, + "learning_rate": 1.90629661780927e-05, + "loss": 0.5902, + "step": 9534 + }, + { + "epoch": 0.16476016035388444, + "grad_norm": 1.9708596593914156, + "learning_rate": 1.9062729632382512e-05, + "loss": 0.5536, + "step": 9535 + }, + { + "epoch": 0.16477743986729335, + "grad_norm": 1.6026156327336547, + "learning_rate": 1.9062493058287166e-05, + "loss": 0.4701, + "step": 9536 + }, + { + "epoch": 0.16479471938070223, + "grad_norm": 2.361709092467311, + "learning_rate": 1.90622564558074e-05, + "loss": 0.6564, + "step": 9537 + }, + { + "epoch": 0.16481199889411113, + "grad_norm": 2.8366329417161875, + "learning_rate": 1.906201982494396e-05, + "loss": 0.6899, + "step": 9538 + }, + { + "epoch": 0.16482927840752004, + "grad_norm": 1.6230018751427122, + "learning_rate": 1.9061783165697586e-05, + "loss": 0.6015, + "step": 9539 + }, + { + "epoch": 0.16484655792092895, + "grad_norm": 2.0295464899591162, + "learning_rate": 1.906154647806902e-05, + "loss": 0.7839, + "step": 9540 + }, + { + "epoch": 0.16486383743433786, + "grad_norm": 0.754846024086044, + "learning_rate": 1.9061309762059003e-05, + "loss": 0.3329, + "step": 9541 + }, + { + "epoch": 0.16488111694774676, + "grad_norm": 1.1423851865554806, + "learning_rate": 1.9061073017668273e-05, + "loss": 0.6428, + "step": 9542 + }, + { + "epoch": 0.16489839646115564, + "grad_norm": 0.6189322685007554, + "learning_rate": 1.906083624489757e-05, + "loss": 0.6473, + "step": 9543 + }, + { + "epoch": 0.16491567597456455, + "grad_norm": 1.1431199675833972, + "learning_rate": 1.906059944374764e-05, + "loss": 0.3648, + "step": 9544 + }, + { + "epoch": 0.16493295548797346, + "grad_norm": 1.4559487918544152, + "learning_rate": 1.906036261421923e-05, + "loss": 0.7436, + "step": 9545 + }, + { + "epoch": 0.16495023500138237, + "grad_norm": 1.1353145910193998, + "learning_rate": 1.906012575631307e-05, + "loss": 0.813, + "step": 9546 + }, + { + "epoch": 0.16496751451479127, + "grad_norm": 1.2297323914801646, + "learning_rate": 1.9059888870029913e-05, + "loss": 0.8401, + "step": 9547 + }, + { + "epoch": 0.16498479402820015, + "grad_norm": 1.130127066015725, + "learning_rate": 1.905965195537049e-05, + "loss": 0.4432, + "step": 9548 + }, + { + "epoch": 0.16500207354160906, + "grad_norm": 1.1780362890258715, + "learning_rate": 1.905941501233555e-05, + "loss": 0.7469, + "step": 9549 + }, + { + "epoch": 0.16501935305501797, + "grad_norm": 1.10658309535021, + "learning_rate": 1.9059178040925835e-05, + "loss": 0.8738, + "step": 9550 + }, + { + "epoch": 0.16503663256842688, + "grad_norm": 0.7245166357297346, + "learning_rate": 1.9058941041142088e-05, + "loss": 0.5958, + "step": 9551 + }, + { + "epoch": 0.16505391208183579, + "grad_norm": 1.3167111564656926, + "learning_rate": 1.9058704012985044e-05, + "loss": 0.7545, + "step": 9552 + }, + { + "epoch": 0.16507119159524467, + "grad_norm": 0.7492280602039856, + "learning_rate": 1.9058466956455456e-05, + "loss": 0.6156, + "step": 9553 + }, + { + "epoch": 0.16508847110865357, + "grad_norm": 0.5547854515470689, + "learning_rate": 1.9058229871554056e-05, + "loss": 0.3696, + "step": 9554 + }, + { + "epoch": 0.16510575062206248, + "grad_norm": 0.9685515062698239, + "learning_rate": 1.9057992758281595e-05, + "loss": 0.5243, + "step": 9555 + }, + { + "epoch": 0.1651230301354714, + "grad_norm": 0.7929105538364609, + "learning_rate": 1.9057755616638813e-05, + "loss": 0.6335, + "step": 9556 + }, + { + "epoch": 0.1651403096488803, + "grad_norm": 1.2099504212009438, + "learning_rate": 1.9057518446626447e-05, + "loss": 0.8263, + "step": 9557 + }, + { + "epoch": 0.1651575891622892, + "grad_norm": 0.7137707635159549, + "learning_rate": 1.905728124824525e-05, + "loss": 0.6647, + "step": 9558 + }, + { + "epoch": 0.16517486867569808, + "grad_norm": 1.1736821032654237, + "learning_rate": 1.9057044021495956e-05, + "loss": 0.5831, + "step": 9559 + }, + { + "epoch": 0.165192148189107, + "grad_norm": 1.0156402114044265, + "learning_rate": 1.9056806766379314e-05, + "loss": 0.6514, + "step": 9560 + }, + { + "epoch": 0.1652094277025159, + "grad_norm": 0.6330143182050441, + "learning_rate": 1.9056569482896067e-05, + "loss": 0.7019, + "step": 9561 + }, + { + "epoch": 0.1652267072159248, + "grad_norm": 0.9473315361893296, + "learning_rate": 1.905633217104695e-05, + "loss": 0.5634, + "step": 9562 + }, + { + "epoch": 0.16524398672933371, + "grad_norm": 1.0858971512499003, + "learning_rate": 1.905609483083272e-05, + "loss": 0.7129, + "step": 9563 + }, + { + "epoch": 0.1652612662427426, + "grad_norm": 0.8068661149114329, + "learning_rate": 1.9055857462254107e-05, + "loss": 0.5983, + "step": 9564 + }, + { + "epoch": 0.1652785457561515, + "grad_norm": 0.8257329982042386, + "learning_rate": 1.905562006531186e-05, + "loss": 0.6761, + "step": 9565 + }, + { + "epoch": 0.1652958252695604, + "grad_norm": 1.2628877014424427, + "learning_rate": 1.905538264000673e-05, + "loss": 0.6212, + "step": 9566 + }, + { + "epoch": 0.16531310478296932, + "grad_norm": 0.9573483121429986, + "learning_rate": 1.9055145186339445e-05, + "loss": 0.7248, + "step": 9567 + }, + { + "epoch": 0.16533038429637822, + "grad_norm": 0.7937174311862789, + "learning_rate": 1.905490770431076e-05, + "loss": 0.6338, + "step": 9568 + }, + { + "epoch": 0.1653476638097871, + "grad_norm": 0.49486395654932414, + "learning_rate": 1.9054670193921417e-05, + "loss": 0.6627, + "step": 9569 + }, + { + "epoch": 0.165364943323196, + "grad_norm": 0.9508674443947858, + "learning_rate": 1.905443265517216e-05, + "loss": 0.6691, + "step": 9570 + }, + { + "epoch": 0.16538222283660492, + "grad_norm": 0.9025549862670162, + "learning_rate": 1.905419508806373e-05, + "loss": 0.5098, + "step": 9571 + }, + { + "epoch": 0.16539950235001383, + "grad_norm": 1.0539760446255066, + "learning_rate": 1.9053957492596872e-05, + "loss": 0.7267, + "step": 9572 + }, + { + "epoch": 0.16541678186342273, + "grad_norm": 0.733673900272091, + "learning_rate": 1.905371986877233e-05, + "loss": 0.499, + "step": 9573 + }, + { + "epoch": 0.16543406137683161, + "grad_norm": 0.994703229036738, + "learning_rate": 1.9053482216590852e-05, + "loss": 0.5182, + "step": 9574 + }, + { + "epoch": 0.16545134089024052, + "grad_norm": 1.1876889532724173, + "learning_rate": 1.905324453605318e-05, + "loss": 0.6212, + "step": 9575 + }, + { + "epoch": 0.16546862040364943, + "grad_norm": 1.1684499589218824, + "learning_rate": 1.9053006827160055e-05, + "loss": 0.5785, + "step": 9576 + }, + { + "epoch": 0.16548589991705834, + "grad_norm": 1.0993946440979456, + "learning_rate": 1.905276908991223e-05, + "loss": 0.4284, + "step": 9577 + }, + { + "epoch": 0.16550317943046725, + "grad_norm": 0.5018402684007633, + "learning_rate": 1.905253132431044e-05, + "loss": 0.8128, + "step": 9578 + }, + { + "epoch": 0.16552045894387615, + "grad_norm": 0.9119168477805843, + "learning_rate": 1.9052293530355434e-05, + "loss": 0.5668, + "step": 9579 + }, + { + "epoch": 0.16553773845728503, + "grad_norm": 0.6879591691615249, + "learning_rate": 1.9052055708047956e-05, + "loss": 0.5203, + "step": 9580 + }, + { + "epoch": 0.16555501797069394, + "grad_norm": 0.8438715360569656, + "learning_rate": 1.905181785738875e-05, + "loss": 0.7132, + "step": 9581 + }, + { + "epoch": 0.16557229748410285, + "grad_norm": 1.3564648922382279, + "learning_rate": 1.905157997837857e-05, + "loss": 0.6575, + "step": 9582 + }, + { + "epoch": 0.16558957699751176, + "grad_norm": 1.280523223187124, + "learning_rate": 1.905134207101815e-05, + "loss": 0.6872, + "step": 9583 + }, + { + "epoch": 0.16560685651092066, + "grad_norm": 0.8209253632578983, + "learning_rate": 1.9051104135308237e-05, + "loss": 0.6214, + "step": 9584 + }, + { + "epoch": 0.16562413602432954, + "grad_norm": 1.1746485088890328, + "learning_rate": 1.905086617124958e-05, + "loss": 0.4398, + "step": 9585 + }, + { + "epoch": 0.16564141553773845, + "grad_norm": 1.1350832638051267, + "learning_rate": 1.905062817884292e-05, + "loss": 0.5355, + "step": 9586 + }, + { + "epoch": 0.16565869505114736, + "grad_norm": 1.0427600710182947, + "learning_rate": 1.9050390158089005e-05, + "loss": 0.5751, + "step": 9587 + }, + { + "epoch": 0.16567597456455627, + "grad_norm": 0.9195555005519596, + "learning_rate": 1.905015210898858e-05, + "loss": 0.6214, + "step": 9588 + }, + { + "epoch": 0.16569325407796517, + "grad_norm": 1.2750517916996147, + "learning_rate": 1.9049914031542393e-05, + "loss": 0.6522, + "step": 9589 + }, + { + "epoch": 0.16571053359137405, + "grad_norm": 1.276251846717555, + "learning_rate": 1.904967592575119e-05, + "loss": 0.6959, + "step": 9590 + }, + { + "epoch": 0.16572781310478296, + "grad_norm": 0.6062878357762457, + "learning_rate": 1.9049437791615707e-05, + "loss": 0.6243, + "step": 9591 + }, + { + "epoch": 0.16574509261819187, + "grad_norm": 0.9545003180951089, + "learning_rate": 1.9049199629136704e-05, + "loss": 0.5253, + "step": 9592 + }, + { + "epoch": 0.16576237213160078, + "grad_norm": 0.9973964393772047, + "learning_rate": 1.9048961438314915e-05, + "loss": 0.6251, + "step": 9593 + }, + { + "epoch": 0.16577965164500968, + "grad_norm": 1.4917988443795587, + "learning_rate": 1.9048723219151092e-05, + "loss": 0.638, + "step": 9594 + }, + { + "epoch": 0.1657969311584186, + "grad_norm": 1.0230230216054632, + "learning_rate": 1.9048484971645983e-05, + "loss": 0.5423, + "step": 9595 + }, + { + "epoch": 0.16581421067182747, + "grad_norm": 0.8750832921665618, + "learning_rate": 1.904824669580033e-05, + "loss": 0.6306, + "step": 9596 + }, + { + "epoch": 0.16583149018523638, + "grad_norm": 1.111704909918761, + "learning_rate": 1.9048008391614878e-05, + "loss": 0.679, + "step": 9597 + }, + { + "epoch": 0.1658487696986453, + "grad_norm": 0.7956870413312526, + "learning_rate": 1.904777005909038e-05, + "loss": 0.5787, + "step": 9598 + }, + { + "epoch": 0.1658660492120542, + "grad_norm": 1.3164793255584026, + "learning_rate": 1.9047531698227576e-05, + "loss": 0.5182, + "step": 9599 + }, + { + "epoch": 0.1658833287254631, + "grad_norm": 0.6131891382790111, + "learning_rate": 1.9047293309027215e-05, + "loss": 0.6806, + "step": 9600 + }, + { + "epoch": 0.16590060823887198, + "grad_norm": 0.9769069680594222, + "learning_rate": 1.9047054891490044e-05, + "loss": 0.5243, + "step": 9601 + }, + { + "epoch": 0.1659178877522809, + "grad_norm": 1.0966900903263725, + "learning_rate": 1.9046816445616813e-05, + "loss": 0.5272, + "step": 9602 + }, + { + "epoch": 0.1659351672656898, + "grad_norm": 1.2520105489533837, + "learning_rate": 1.9046577971408262e-05, + "loss": 0.6861, + "step": 9603 + }, + { + "epoch": 0.1659524467790987, + "grad_norm": 1.2787722970161337, + "learning_rate": 1.904633946886514e-05, + "loss": 0.5967, + "step": 9604 + }, + { + "epoch": 0.1659697262925076, + "grad_norm": 1.0779401783955695, + "learning_rate": 1.90461009379882e-05, + "loss": 0.4775, + "step": 9605 + }, + { + "epoch": 0.1659870058059165, + "grad_norm": 0.8812859315110467, + "learning_rate": 1.904586237877818e-05, + "loss": 0.762, + "step": 9606 + }, + { + "epoch": 0.1660042853193254, + "grad_norm": 1.1540468508746047, + "learning_rate": 1.9045623791235833e-05, + "loss": 0.703, + "step": 9607 + }, + { + "epoch": 0.1660215648327343, + "grad_norm": 0.8632321441936641, + "learning_rate": 1.9045385175361907e-05, + "loss": 0.6365, + "step": 9608 + }, + { + "epoch": 0.16603884434614322, + "grad_norm": 0.9088994089990003, + "learning_rate": 1.9045146531157144e-05, + "loss": 0.6479, + "step": 9609 + }, + { + "epoch": 0.16605612385955212, + "grad_norm": 1.2872172423492854, + "learning_rate": 1.9044907858622296e-05, + "loss": 0.7098, + "step": 9610 + }, + { + "epoch": 0.166073403372961, + "grad_norm": 1.119507380594907, + "learning_rate": 1.904466915775811e-05, + "loss": 0.5936, + "step": 9611 + }, + { + "epoch": 0.1660906828863699, + "grad_norm": 0.9881126000976512, + "learning_rate": 1.904443042856533e-05, + "loss": 0.578, + "step": 9612 + }, + { + "epoch": 0.16610796239977882, + "grad_norm": 1.3276500235637954, + "learning_rate": 1.904419167104471e-05, + "loss": 0.6895, + "step": 9613 + }, + { + "epoch": 0.16612524191318773, + "grad_norm": 1.1443897724746137, + "learning_rate": 1.9043952885196994e-05, + "loss": 0.7238, + "step": 9614 + }, + { + "epoch": 0.16614252142659663, + "grad_norm": 1.2223294376434481, + "learning_rate": 1.9043714071022927e-05, + "loss": 0.7358, + "step": 9615 + }, + { + "epoch": 0.16615980094000554, + "grad_norm": 1.385336502502727, + "learning_rate": 1.9043475228523265e-05, + "loss": 0.6931, + "step": 9616 + }, + { + "epoch": 0.16617708045341442, + "grad_norm": 1.5113034548611466, + "learning_rate": 1.9043236357698747e-05, + "loss": 0.8175, + "step": 9617 + }, + { + "epoch": 0.16619435996682333, + "grad_norm": 1.4037989747313095, + "learning_rate": 1.9042997458550128e-05, + "loss": 0.6807, + "step": 9618 + }, + { + "epoch": 0.16621163948023224, + "grad_norm": 1.1086282267802363, + "learning_rate": 1.9042758531078154e-05, + "loss": 0.5164, + "step": 9619 + }, + { + "epoch": 0.16622891899364114, + "grad_norm": 1.0857749642252672, + "learning_rate": 1.9042519575283573e-05, + "loss": 0.5619, + "step": 9620 + }, + { + "epoch": 0.16624619850705005, + "grad_norm": 1.3805361959988716, + "learning_rate": 1.9042280591167127e-05, + "loss": 0.6368, + "step": 9621 + }, + { + "epoch": 0.16626347802045893, + "grad_norm": 0.970922592756435, + "learning_rate": 1.904204157872958e-05, + "loss": 0.4749, + "step": 9622 + }, + { + "epoch": 0.16628075753386784, + "grad_norm": 1.2775355493316092, + "learning_rate": 1.904180253797167e-05, + "loss": 0.6811, + "step": 9623 + }, + { + "epoch": 0.16629803704727675, + "grad_norm": 1.1989371187741462, + "learning_rate": 1.9041563468894142e-05, + "loss": 0.7163, + "step": 9624 + }, + { + "epoch": 0.16631531656068566, + "grad_norm": 1.0604293852423166, + "learning_rate": 1.9041324371497754e-05, + "loss": 0.5088, + "step": 9625 + }, + { + "epoch": 0.16633259607409456, + "grad_norm": 0.967987029023926, + "learning_rate": 1.904108524578325e-05, + "loss": 0.3653, + "step": 9626 + }, + { + "epoch": 0.16634987558750344, + "grad_norm": 0.8516845406498597, + "learning_rate": 1.9040846091751383e-05, + "loss": 0.5308, + "step": 9627 + }, + { + "epoch": 0.16636715510091235, + "grad_norm": 0.9838320779146408, + "learning_rate": 1.9040606909402895e-05, + "loss": 0.6692, + "step": 9628 + }, + { + "epoch": 0.16638443461432126, + "grad_norm": 0.7573866412831063, + "learning_rate": 1.9040367698738537e-05, + "loss": 0.6182, + "step": 9629 + }, + { + "epoch": 0.16640171412773017, + "grad_norm": 1.3177316891968316, + "learning_rate": 1.9040128459759066e-05, + "loss": 0.5232, + "step": 9630 + }, + { + "epoch": 0.16641899364113907, + "grad_norm": 0.7928570085262673, + "learning_rate": 1.9039889192465222e-05, + "loss": 0.7777, + "step": 9631 + }, + { + "epoch": 0.16643627315454798, + "grad_norm": 1.175062682899613, + "learning_rate": 1.903964989685776e-05, + "loss": 0.7886, + "step": 9632 + }, + { + "epoch": 0.16645355266795686, + "grad_norm": 1.084221568916313, + "learning_rate": 1.9039410572937426e-05, + "loss": 0.6311, + "step": 9633 + }, + { + "epoch": 0.16647083218136577, + "grad_norm": 1.5485748665304622, + "learning_rate": 1.9039171220704974e-05, + "loss": 0.6833, + "step": 9634 + }, + { + "epoch": 0.16648811169477468, + "grad_norm": 0.7161479294585478, + "learning_rate": 1.9038931840161148e-05, + "loss": 0.7208, + "step": 9635 + }, + { + "epoch": 0.16650539120818358, + "grad_norm": 0.9846767895478481, + "learning_rate": 1.9038692431306703e-05, + "loss": 0.6678, + "step": 9636 + }, + { + "epoch": 0.1665226707215925, + "grad_norm": 1.4352589318656925, + "learning_rate": 1.9038452994142383e-05, + "loss": 0.5998, + "step": 9637 + }, + { + "epoch": 0.16653995023500137, + "grad_norm": 0.4224618033022145, + "learning_rate": 1.9038213528668942e-05, + "loss": 0.707, + "step": 9638 + }, + { + "epoch": 0.16655722974841028, + "grad_norm": 0.9957433827001471, + "learning_rate": 1.903797403488713e-05, + "loss": 0.7484, + "step": 9639 + }, + { + "epoch": 0.1665745092618192, + "grad_norm": 0.9504736295105274, + "learning_rate": 1.90377345127977e-05, + "loss": 0.7018, + "step": 9640 + }, + { + "epoch": 0.1665917887752281, + "grad_norm": 1.646048530606829, + "learning_rate": 1.903749496240139e-05, + "loss": 0.819, + "step": 9641 + }, + { + "epoch": 0.166609068288637, + "grad_norm": 0.792017661596574, + "learning_rate": 1.9037255383698968e-05, + "loss": 0.6102, + "step": 9642 + }, + { + "epoch": 0.16662634780204588, + "grad_norm": 1.3320866016577728, + "learning_rate": 1.903701577669117e-05, + "loss": 0.6099, + "step": 9643 + }, + { + "epoch": 0.1666436273154548, + "grad_norm": 2.020544965945595, + "learning_rate": 1.9036776141378755e-05, + "loss": 0.6065, + "step": 9644 + }, + { + "epoch": 0.1666609068288637, + "grad_norm": 0.9750616705865242, + "learning_rate": 1.9036536477762468e-05, + "loss": 0.6853, + "step": 9645 + }, + { + "epoch": 0.1666781863422726, + "grad_norm": 1.250562918701468, + "learning_rate": 1.9036296785843062e-05, + "loss": 0.5145, + "step": 9646 + }, + { + "epoch": 0.1666954658556815, + "grad_norm": 1.232874811138715, + "learning_rate": 1.903605706562129e-05, + "loss": 0.7087, + "step": 9647 + }, + { + "epoch": 0.1667127453690904, + "grad_norm": 0.6812001960079935, + "learning_rate": 1.90358173170979e-05, + "loss": 0.4276, + "step": 9648 + }, + { + "epoch": 0.1667300248824993, + "grad_norm": 0.8706370725250628, + "learning_rate": 1.903557754027364e-05, + "loss": 0.4759, + "step": 9649 + }, + { + "epoch": 0.1667473043959082, + "grad_norm": 1.173625490508971, + "learning_rate": 1.9035337735149268e-05, + "loss": 0.6283, + "step": 9650 + }, + { + "epoch": 0.16676458390931712, + "grad_norm": 1.2608742929561896, + "learning_rate": 1.903509790172553e-05, + "loss": 0.4952, + "step": 9651 + }, + { + "epoch": 0.16678186342272602, + "grad_norm": 1.1504571772009098, + "learning_rate": 1.9034858040003182e-05, + "loss": 0.7047, + "step": 9652 + }, + { + "epoch": 0.16679914293613493, + "grad_norm": 1.051880015800091, + "learning_rate": 1.903461814998297e-05, + "loss": 0.6422, + "step": 9653 + }, + { + "epoch": 0.1668164224495438, + "grad_norm": 1.1300046637176493, + "learning_rate": 1.9034378231665647e-05, + "loss": 0.8757, + "step": 9654 + }, + { + "epoch": 0.16683370196295272, + "grad_norm": 0.411048081469146, + "learning_rate": 1.9034138285051962e-05, + "loss": 0.6092, + "step": 9655 + }, + { + "epoch": 0.16685098147636163, + "grad_norm": 1.136987119469017, + "learning_rate": 1.9033898310142674e-05, + "loss": 0.6419, + "step": 9656 + }, + { + "epoch": 0.16686826098977053, + "grad_norm": 1.37191744623956, + "learning_rate": 1.903365830693853e-05, + "loss": 0.7917, + "step": 9657 + }, + { + "epoch": 0.16688554050317944, + "grad_norm": 0.879136023108597, + "learning_rate": 1.9033418275440277e-05, + "loss": 0.6561, + "step": 9658 + }, + { + "epoch": 0.16690282001658832, + "grad_norm": 1.249655254730336, + "learning_rate": 1.9033178215648672e-05, + "loss": 0.5814, + "step": 9659 + }, + { + "epoch": 0.16692009952999723, + "grad_norm": 0.8585421635062183, + "learning_rate": 1.903293812756447e-05, + "loss": 0.5805, + "step": 9660 + }, + { + "epoch": 0.16693737904340614, + "grad_norm": 1.376040129422869, + "learning_rate": 1.9032698011188417e-05, + "loss": 0.6174, + "step": 9661 + }, + { + "epoch": 0.16695465855681504, + "grad_norm": 0.853905386774101, + "learning_rate": 1.9032457866521268e-05, + "loss": 0.6105, + "step": 9662 + }, + { + "epoch": 0.16697193807022395, + "grad_norm": 0.8633716433540152, + "learning_rate": 1.9032217693563774e-05, + "loss": 0.5924, + "step": 9663 + }, + { + "epoch": 0.16698921758363283, + "grad_norm": 0.34729632754051826, + "learning_rate": 1.9031977492316688e-05, + "loss": 0.5139, + "step": 9664 + }, + { + "epoch": 0.16700649709704174, + "grad_norm": 0.822168506063308, + "learning_rate": 1.903173726278076e-05, + "loss": 0.7957, + "step": 9665 + }, + { + "epoch": 0.16702377661045065, + "grad_norm": 0.823319914124472, + "learning_rate": 1.903149700495675e-05, + "loss": 0.4503, + "step": 9666 + }, + { + "epoch": 0.16704105612385955, + "grad_norm": 1.1115728458699434, + "learning_rate": 1.90312567188454e-05, + "loss": 0.595, + "step": 9667 + }, + { + "epoch": 0.16705833563726846, + "grad_norm": 0.9644378839882423, + "learning_rate": 1.903101640444747e-05, + "loss": 0.6373, + "step": 9668 + }, + { + "epoch": 0.16707561515067737, + "grad_norm": 0.9871885817957894, + "learning_rate": 1.9030776061763713e-05, + "loss": 0.6135, + "step": 9669 + }, + { + "epoch": 0.16709289466408625, + "grad_norm": 0.45945838511568626, + "learning_rate": 1.9030535690794873e-05, + "loss": 0.5843, + "step": 9670 + }, + { + "epoch": 0.16711017417749516, + "grad_norm": 1.339068807317333, + "learning_rate": 1.9030295291541713e-05, + "loss": 0.4651, + "step": 9671 + }, + { + "epoch": 0.16712745369090407, + "grad_norm": 0.9787408936968989, + "learning_rate": 1.903005486400498e-05, + "loss": 0.6466, + "step": 9672 + }, + { + "epoch": 0.16714473320431297, + "grad_norm": 1.7513187041638727, + "learning_rate": 1.9029814408185428e-05, + "loss": 0.9157, + "step": 9673 + }, + { + "epoch": 0.16716201271772188, + "grad_norm": 1.0653842513541292, + "learning_rate": 1.9029573924083812e-05, + "loss": 0.5667, + "step": 9674 + }, + { + "epoch": 0.16717929223113076, + "grad_norm": 1.4996913408642163, + "learning_rate": 1.9029333411700887e-05, + "loss": 0.5555, + "step": 9675 + }, + { + "epoch": 0.16719657174453967, + "grad_norm": 0.6143858259761822, + "learning_rate": 1.9029092871037402e-05, + "loss": 0.4004, + "step": 9676 + }, + { + "epoch": 0.16721385125794858, + "grad_norm": 1.4896969601977914, + "learning_rate": 1.902885230209411e-05, + "loss": 0.7209, + "step": 9677 + }, + { + "epoch": 0.16723113077135748, + "grad_norm": 0.5842516865848485, + "learning_rate": 1.902861170487177e-05, + "loss": 0.6339, + "step": 9678 + }, + { + "epoch": 0.1672484102847664, + "grad_norm": 1.7132300761205985, + "learning_rate": 1.9028371079371127e-05, + "loss": 0.6254, + "step": 9679 + }, + { + "epoch": 0.16726568979817527, + "grad_norm": 0.7204429038294186, + "learning_rate": 1.9028130425592945e-05, + "loss": 0.6869, + "step": 9680 + }, + { + "epoch": 0.16728296931158418, + "grad_norm": 1.504879664258771, + "learning_rate": 1.9027889743537967e-05, + "loss": 0.6235, + "step": 9681 + }, + { + "epoch": 0.1673002488249931, + "grad_norm": 1.001797858613142, + "learning_rate": 1.9027649033206956e-05, + "loss": 0.7171, + "step": 9682 + }, + { + "epoch": 0.167317528338402, + "grad_norm": 1.1789165795507457, + "learning_rate": 1.902740829460066e-05, + "loss": 0.6152, + "step": 9683 + }, + { + "epoch": 0.1673348078518109, + "grad_norm": 1.4231596473482147, + "learning_rate": 1.9027167527719835e-05, + "loss": 0.5448, + "step": 9684 + }, + { + "epoch": 0.16735208736521978, + "grad_norm": 0.6576760828691415, + "learning_rate": 1.9026926732565237e-05, + "loss": 0.6044, + "step": 9685 + }, + { + "epoch": 0.1673693668786287, + "grad_norm": 0.9793022132912712, + "learning_rate": 1.9026685909137617e-05, + "loss": 0.5837, + "step": 9686 + }, + { + "epoch": 0.1673866463920376, + "grad_norm": 0.8336795759192138, + "learning_rate": 1.9026445057437732e-05, + "loss": 0.5848, + "step": 9687 + }, + { + "epoch": 0.1674039259054465, + "grad_norm": 0.8280671454919973, + "learning_rate": 1.9026204177466337e-05, + "loss": 0.632, + "step": 9688 + }, + { + "epoch": 0.1674212054188554, + "grad_norm": 0.7032444375097512, + "learning_rate": 1.902596326922418e-05, + "loss": 0.5132, + "step": 9689 + }, + { + "epoch": 0.16743848493226432, + "grad_norm": 1.1323834413977332, + "learning_rate": 1.9025722332712024e-05, + "loss": 0.7103, + "step": 9690 + }, + { + "epoch": 0.1674557644456732, + "grad_norm": 1.053358945801383, + "learning_rate": 1.9025481367930618e-05, + "loss": 0.7668, + "step": 9691 + }, + { + "epoch": 0.1674730439590821, + "grad_norm": 0.5257022167942809, + "learning_rate": 1.902524037488072e-05, + "loss": 0.6664, + "step": 9692 + }, + { + "epoch": 0.16749032347249102, + "grad_norm": 0.9424526870278808, + "learning_rate": 1.902499935356308e-05, + "loss": 0.6521, + "step": 9693 + }, + { + "epoch": 0.16750760298589992, + "grad_norm": 0.8775764493047665, + "learning_rate": 1.9024758303978457e-05, + "loss": 0.7497, + "step": 9694 + }, + { + "epoch": 0.16752488249930883, + "grad_norm": 1.0803382381119981, + "learning_rate": 1.902451722612761e-05, + "loss": 0.6063, + "step": 9695 + }, + { + "epoch": 0.1675421620127177, + "grad_norm": 0.7186556721232753, + "learning_rate": 1.9024276120011285e-05, + "loss": 0.3678, + "step": 9696 + }, + { + "epoch": 0.16755944152612662, + "grad_norm": 1.070842519217463, + "learning_rate": 1.902403498563024e-05, + "loss": 0.6328, + "step": 9697 + }, + { + "epoch": 0.16757672103953553, + "grad_norm": 1.0381590572820143, + "learning_rate": 1.902379382298523e-05, + "loss": 0.6553, + "step": 9698 + }, + { + "epoch": 0.16759400055294443, + "grad_norm": 1.7751956163032268, + "learning_rate": 1.902355263207702e-05, + "loss": 0.7266, + "step": 9699 + }, + { + "epoch": 0.16761128006635334, + "grad_norm": 0.758079469634016, + "learning_rate": 1.902331141290635e-05, + "loss": 0.4808, + "step": 9700 + }, + { + "epoch": 0.16762855957976222, + "grad_norm": 0.8799413708455356, + "learning_rate": 1.9023070165473987e-05, + "loss": 0.5841, + "step": 9701 + }, + { + "epoch": 0.16764583909317113, + "grad_norm": 0.42613846459569327, + "learning_rate": 1.902282888978068e-05, + "loss": 0.5961, + "step": 9702 + }, + { + "epoch": 0.16766311860658004, + "grad_norm": 0.6151206742767971, + "learning_rate": 1.9022587585827188e-05, + "loss": 0.4235, + "step": 9703 + }, + { + "epoch": 0.16768039811998894, + "grad_norm": 0.8252815078042898, + "learning_rate": 1.9022346253614267e-05, + "loss": 0.5156, + "step": 9704 + }, + { + "epoch": 0.16769767763339785, + "grad_norm": 0.8658065440528426, + "learning_rate": 1.9022104893142673e-05, + "loss": 0.4807, + "step": 9705 + }, + { + "epoch": 0.16771495714680676, + "grad_norm": 1.1209259691055604, + "learning_rate": 1.9021863504413155e-05, + "loss": 0.5049, + "step": 9706 + }, + { + "epoch": 0.16773223666021564, + "grad_norm": 1.5923527824315553, + "learning_rate": 1.902162208742648e-05, + "loss": 0.8023, + "step": 9707 + }, + { + "epoch": 0.16774951617362455, + "grad_norm": 0.4497992498108821, + "learning_rate": 1.90213806421834e-05, + "loss": 0.5707, + "step": 9708 + }, + { + "epoch": 0.16776679568703345, + "grad_norm": 1.4730504857310376, + "learning_rate": 1.9021139168684667e-05, + "loss": 0.7424, + "step": 9709 + }, + { + "epoch": 0.16778407520044236, + "grad_norm": 1.436294716726172, + "learning_rate": 1.9020897666931043e-05, + "loss": 0.5983, + "step": 9710 + }, + { + "epoch": 0.16780135471385127, + "grad_norm": 1.0305607009887283, + "learning_rate": 1.902065613692328e-05, + "loss": 0.5637, + "step": 9711 + }, + { + "epoch": 0.16781863422726015, + "grad_norm": 0.9311769909732013, + "learning_rate": 1.9020414578662135e-05, + "loss": 0.4668, + "step": 9712 + }, + { + "epoch": 0.16783591374066906, + "grad_norm": 1.0652936484956026, + "learning_rate": 1.9020172992148368e-05, + "loss": 0.6744, + "step": 9713 + }, + { + "epoch": 0.16785319325407796, + "grad_norm": 0.8983151717429179, + "learning_rate": 1.9019931377382735e-05, + "loss": 0.8185, + "step": 9714 + }, + { + "epoch": 0.16787047276748687, + "grad_norm": 1.580315282175921, + "learning_rate": 1.901968973436599e-05, + "loss": 0.7809, + "step": 9715 + }, + { + "epoch": 0.16788775228089578, + "grad_norm": 1.1488975327980993, + "learning_rate": 1.901944806309889e-05, + "loss": 0.7547, + "step": 9716 + }, + { + "epoch": 0.16790503179430466, + "grad_norm": 0.9673888452006381, + "learning_rate": 1.9019206363582197e-05, + "loss": 0.6128, + "step": 9717 + }, + { + "epoch": 0.16792231130771357, + "grad_norm": 1.1267855680377823, + "learning_rate": 1.9018964635816663e-05, + "loss": 0.9047, + "step": 9718 + }, + { + "epoch": 0.16793959082112248, + "grad_norm": 1.1055066118369699, + "learning_rate": 1.901872287980304e-05, + "loss": 0.8305, + "step": 9719 + }, + { + "epoch": 0.16795687033453138, + "grad_norm": 0.9058441396465959, + "learning_rate": 1.90184810955421e-05, + "loss": 0.6493, + "step": 9720 + }, + { + "epoch": 0.1679741498479403, + "grad_norm": 0.4639159989275797, + "learning_rate": 1.901823928303459e-05, + "loss": 0.7878, + "step": 9721 + }, + { + "epoch": 0.1679914293613492, + "grad_norm": 0.869611758906172, + "learning_rate": 1.9017997442281267e-05, + "loss": 0.5507, + "step": 9722 + }, + { + "epoch": 0.16800870887475808, + "grad_norm": 0.7863353376550207, + "learning_rate": 1.9017755573282893e-05, + "loss": 0.558, + "step": 9723 + }, + { + "epoch": 0.16802598838816699, + "grad_norm": 0.8261850215547774, + "learning_rate": 1.901751367604022e-05, + "loss": 0.6874, + "step": 9724 + }, + { + "epoch": 0.1680432679015759, + "grad_norm": 0.7134521911622885, + "learning_rate": 1.901727175055401e-05, + "loss": 0.5696, + "step": 9725 + }, + { + "epoch": 0.1680605474149848, + "grad_norm": 1.0248623349795565, + "learning_rate": 1.9017029796825022e-05, + "loss": 0.6843, + "step": 9726 + }, + { + "epoch": 0.1680778269283937, + "grad_norm": 0.6641012284116877, + "learning_rate": 1.9016787814854008e-05, + "loss": 0.4619, + "step": 9727 + }, + { + "epoch": 0.1680951064418026, + "grad_norm": 0.6345291486574239, + "learning_rate": 1.901654580464173e-05, + "loss": 0.3555, + "step": 9728 + }, + { + "epoch": 0.1681123859552115, + "grad_norm": 0.8422687997294969, + "learning_rate": 1.9016303766188946e-05, + "loss": 0.5111, + "step": 9729 + }, + { + "epoch": 0.1681296654686204, + "grad_norm": 1.1832796514541908, + "learning_rate": 1.9016061699496412e-05, + "loss": 0.8076, + "step": 9730 + }, + { + "epoch": 0.1681469449820293, + "grad_norm": 0.6924595105702077, + "learning_rate": 1.901581960456489e-05, + "loss": 0.6077, + "step": 9731 + }, + { + "epoch": 0.16816422449543822, + "grad_norm": 0.9988360902814798, + "learning_rate": 1.9015577481395135e-05, + "loss": 0.7098, + "step": 9732 + }, + { + "epoch": 0.1681815040088471, + "grad_norm": 1.0368693151307948, + "learning_rate": 1.901533532998791e-05, + "loss": 0.5886, + "step": 9733 + }, + { + "epoch": 0.168198783522256, + "grad_norm": 0.8789176477766435, + "learning_rate": 1.9015093150343965e-05, + "loss": 0.5889, + "step": 9734 + }, + { + "epoch": 0.16821606303566491, + "grad_norm": 1.305191302167308, + "learning_rate": 1.901485094246406e-05, + "loss": 0.6543, + "step": 9735 + }, + { + "epoch": 0.16823334254907382, + "grad_norm": 0.5060104402099536, + "learning_rate": 1.901460870634896e-05, + "loss": 0.6642, + "step": 9736 + }, + { + "epoch": 0.16825062206248273, + "grad_norm": 1.2575186126441145, + "learning_rate": 1.9014366441999423e-05, + "loss": 0.6558, + "step": 9737 + }, + { + "epoch": 0.1682679015758916, + "grad_norm": 0.8185063490056281, + "learning_rate": 1.9014124149416206e-05, + "loss": 0.521, + "step": 9738 + }, + { + "epoch": 0.16828518108930052, + "grad_norm": 0.462199499696039, + "learning_rate": 1.9013881828600063e-05, + "loss": 0.7113, + "step": 9739 + }, + { + "epoch": 0.16830246060270942, + "grad_norm": 1.2860344307168663, + "learning_rate": 1.901363947955176e-05, + "loss": 0.6175, + "step": 9740 + }, + { + "epoch": 0.16831974011611833, + "grad_norm": 0.8688175934992345, + "learning_rate": 1.9013397102272047e-05, + "loss": 0.6169, + "step": 9741 + }, + { + "epoch": 0.16833701962952724, + "grad_norm": 1.0286723069987365, + "learning_rate": 1.9013154696761696e-05, + "loss": 0.5271, + "step": 9742 + }, + { + "epoch": 0.16835429914293615, + "grad_norm": 1.0880087963548142, + "learning_rate": 1.901291226302146e-05, + "loss": 0.696, + "step": 9743 + }, + { + "epoch": 0.16837157865634503, + "grad_norm": 1.2559152838928829, + "learning_rate": 1.9012669801052093e-05, + "loss": 0.6422, + "step": 9744 + }, + { + "epoch": 0.16838885816975394, + "grad_norm": 0.9012621177693039, + "learning_rate": 1.901242731085436e-05, + "loss": 0.7505, + "step": 9745 + }, + { + "epoch": 0.16840613768316284, + "grad_norm": 0.3843540684860686, + "learning_rate": 1.901218479242902e-05, + "loss": 0.6671, + "step": 9746 + }, + { + "epoch": 0.16842341719657175, + "grad_norm": 1.3120810927583182, + "learning_rate": 1.9011942245776837e-05, + "loss": 0.7015, + "step": 9747 + }, + { + "epoch": 0.16844069670998066, + "grad_norm": 1.0482707191802303, + "learning_rate": 1.901169967089856e-05, + "loss": 0.8465, + "step": 9748 + }, + { + "epoch": 0.16845797622338954, + "grad_norm": 0.6548857744335258, + "learning_rate": 1.9011457067794957e-05, + "loss": 0.5433, + "step": 9749 + }, + { + "epoch": 0.16847525573679845, + "grad_norm": 1.1558624066551983, + "learning_rate": 1.9011214436466788e-05, + "loss": 0.7735, + "step": 9750 + }, + { + "epoch": 0.16849253525020735, + "grad_norm": 0.8846412664722287, + "learning_rate": 1.901097177691481e-05, + "loss": 0.5178, + "step": 9751 + }, + { + "epoch": 0.16850981476361626, + "grad_norm": 0.7009241424610692, + "learning_rate": 1.9010729089139782e-05, + "loss": 0.528, + "step": 9752 + }, + { + "epoch": 0.16852709427702517, + "grad_norm": 1.5119199608670901, + "learning_rate": 1.9010486373142464e-05, + "loss": 0.4169, + "step": 9753 + }, + { + "epoch": 0.16854437379043405, + "grad_norm": 1.230345802546257, + "learning_rate": 1.901024362892362e-05, + "loss": 0.3772, + "step": 9754 + }, + { + "epoch": 0.16856165330384296, + "grad_norm": 1.2247161912826345, + "learning_rate": 1.9010000856484008e-05, + "loss": 0.6339, + "step": 9755 + }, + { + "epoch": 0.16857893281725186, + "grad_norm": 0.7972463712487299, + "learning_rate": 1.9009758055824385e-05, + "loss": 0.5561, + "step": 9756 + }, + { + "epoch": 0.16859621233066077, + "grad_norm": 0.7550346219220128, + "learning_rate": 1.900951522694552e-05, + "loss": 0.6719, + "step": 9757 + }, + { + "epoch": 0.16861349184406968, + "grad_norm": 1.3410981568579488, + "learning_rate": 1.900927236984817e-05, + "loss": 0.7324, + "step": 9758 + }, + { + "epoch": 0.1686307713574786, + "grad_norm": 0.7465127128068304, + "learning_rate": 1.9009029484533088e-05, + "loss": 0.5531, + "step": 9759 + }, + { + "epoch": 0.16864805087088747, + "grad_norm": 0.8728597771751713, + "learning_rate": 1.9008786571001046e-05, + "loss": 0.6805, + "step": 9760 + }, + { + "epoch": 0.16866533038429637, + "grad_norm": 0.8603500077015777, + "learning_rate": 1.9008543629252798e-05, + "loss": 0.6191, + "step": 9761 + }, + { + "epoch": 0.16868260989770528, + "grad_norm": 0.9085151374578466, + "learning_rate": 1.9008300659289106e-05, + "loss": 0.4471, + "step": 9762 + }, + { + "epoch": 0.1686998894111142, + "grad_norm": 1.072330776029681, + "learning_rate": 1.9008057661110734e-05, + "loss": 0.7804, + "step": 9763 + }, + { + "epoch": 0.1687171689245231, + "grad_norm": 0.9910974579373789, + "learning_rate": 1.900781463471844e-05, + "loss": 0.7541, + "step": 9764 + }, + { + "epoch": 0.16873444843793198, + "grad_norm": 1.2118085056161725, + "learning_rate": 1.9007571580112985e-05, + "loss": 0.7102, + "step": 9765 + }, + { + "epoch": 0.16875172795134089, + "grad_norm": 0.9691483934951683, + "learning_rate": 1.9007328497295134e-05, + "loss": 0.5445, + "step": 9766 + }, + { + "epoch": 0.1687690074647498, + "grad_norm": 1.121613128371932, + "learning_rate": 1.900708538626564e-05, + "loss": 0.648, + "step": 9767 + }, + { + "epoch": 0.1687862869781587, + "grad_norm": 0.965788283451513, + "learning_rate": 1.9006842247025274e-05, + "loss": 0.4569, + "step": 9768 + }, + { + "epoch": 0.1688035664915676, + "grad_norm": 1.410705923747869, + "learning_rate": 1.9006599079574794e-05, + "loss": 0.6716, + "step": 9769 + }, + { + "epoch": 0.1688208460049765, + "grad_norm": 1.2904455813707303, + "learning_rate": 1.900635588391496e-05, + "loss": 0.6369, + "step": 9770 + }, + { + "epoch": 0.1688381255183854, + "grad_norm": 1.6615990508681073, + "learning_rate": 1.9006112660046535e-05, + "loss": 0.6075, + "step": 9771 + }, + { + "epoch": 0.1688554050317943, + "grad_norm": 1.0272355216362359, + "learning_rate": 1.900586940797028e-05, + "loss": 0.6312, + "step": 9772 + }, + { + "epoch": 0.1688726845452032, + "grad_norm": 1.4556645128155534, + "learning_rate": 1.900562612768696e-05, + "loss": 0.6902, + "step": 9773 + }, + { + "epoch": 0.16888996405861212, + "grad_norm": 0.8765321998426151, + "learning_rate": 1.900538281919733e-05, + "loss": 0.5875, + "step": 9774 + }, + { + "epoch": 0.168907243572021, + "grad_norm": 1.2797518631435734, + "learning_rate": 1.900513948250216e-05, + "loss": 0.6305, + "step": 9775 + }, + { + "epoch": 0.1689245230854299, + "grad_norm": 0.9304209221771651, + "learning_rate": 1.9004896117602208e-05, + "loss": 0.6976, + "step": 9776 + }, + { + "epoch": 0.16894180259883881, + "grad_norm": 0.8594785792756159, + "learning_rate": 1.9004652724498237e-05, + "loss": 0.6764, + "step": 9777 + }, + { + "epoch": 0.16895908211224772, + "grad_norm": 0.9805454764760744, + "learning_rate": 1.900440930319101e-05, + "loss": 0.514, + "step": 9778 + }, + { + "epoch": 0.16897636162565663, + "grad_norm": 1.1134748929480573, + "learning_rate": 1.900416585368129e-05, + "loss": 0.54, + "step": 9779 + }, + { + "epoch": 0.16899364113906554, + "grad_norm": 0.4208094988854051, + "learning_rate": 1.9003922375969837e-05, + "loss": 0.7031, + "step": 9780 + }, + { + "epoch": 0.16901092065247442, + "grad_norm": 1.1988934779160416, + "learning_rate": 1.9003678870057412e-05, + "loss": 0.4452, + "step": 9781 + }, + { + "epoch": 0.16902820016588332, + "grad_norm": 1.4701646855385984, + "learning_rate": 1.9003435335944784e-05, + "loss": 0.5986, + "step": 9782 + }, + { + "epoch": 0.16904547967929223, + "grad_norm": 0.7240698238528537, + "learning_rate": 1.900319177363271e-05, + "loss": 0.7251, + "step": 9783 + }, + { + "epoch": 0.16906275919270114, + "grad_norm": 0.830312538223356, + "learning_rate": 1.900294818312196e-05, + "loss": 0.659, + "step": 9784 + }, + { + "epoch": 0.16908003870611005, + "grad_norm": 1.1620241181556048, + "learning_rate": 1.9002704564413287e-05, + "loss": 0.5986, + "step": 9785 + }, + { + "epoch": 0.16909731821951893, + "grad_norm": 1.2020430630624315, + "learning_rate": 1.900246091750746e-05, + "loss": 0.7609, + "step": 9786 + }, + { + "epoch": 0.16911459773292783, + "grad_norm": 1.0455957941315155, + "learning_rate": 1.9002217242405243e-05, + "loss": 0.5288, + "step": 9787 + }, + { + "epoch": 0.16913187724633674, + "grad_norm": 1.1435268628375852, + "learning_rate": 1.9001973539107395e-05, + "loss": 0.3218, + "step": 9788 + }, + { + "epoch": 0.16914915675974565, + "grad_norm": 0.9628052065954223, + "learning_rate": 1.9001729807614687e-05, + "loss": 0.6032, + "step": 9789 + }, + { + "epoch": 0.16916643627315456, + "grad_norm": 0.8520422150678192, + "learning_rate": 1.900148604792787e-05, + "loss": 0.7141, + "step": 9790 + }, + { + "epoch": 0.16918371578656344, + "grad_norm": 1.618922076291769, + "learning_rate": 1.900124226004772e-05, + "loss": 0.7559, + "step": 9791 + }, + { + "epoch": 0.16920099529997235, + "grad_norm": 1.237530371263672, + "learning_rate": 1.900099844397499e-05, + "loss": 0.6076, + "step": 9792 + }, + { + "epoch": 0.16921827481338125, + "grad_norm": 0.8819488149813576, + "learning_rate": 1.9000754599710454e-05, + "loss": 0.764, + "step": 9793 + }, + { + "epoch": 0.16923555432679016, + "grad_norm": 1.273645952845669, + "learning_rate": 1.9000510727254868e-05, + "loss": 0.5926, + "step": 9794 + }, + { + "epoch": 0.16925283384019907, + "grad_norm": 1.3822293625274484, + "learning_rate": 1.9000266826609e-05, + "loss": 0.7095, + "step": 9795 + }, + { + "epoch": 0.16927011335360798, + "grad_norm": 0.6648091618070285, + "learning_rate": 1.900002289777361e-05, + "loss": 0.6652, + "step": 9796 + }, + { + "epoch": 0.16928739286701686, + "grad_norm": 1.0234086980929906, + "learning_rate": 1.8999778940749466e-05, + "loss": 0.4944, + "step": 9797 + }, + { + "epoch": 0.16930467238042576, + "grad_norm": 1.036452154922898, + "learning_rate": 1.899953495553733e-05, + "loss": 0.6491, + "step": 9798 + }, + { + "epoch": 0.16932195189383467, + "grad_norm": 0.8889003841637256, + "learning_rate": 1.8999290942137968e-05, + "loss": 0.5335, + "step": 9799 + }, + { + "epoch": 0.16933923140724358, + "grad_norm": 0.4667602914293389, + "learning_rate": 1.899904690055214e-05, + "loss": 0.6898, + "step": 9800 + }, + { + "epoch": 0.1693565109206525, + "grad_norm": 1.0780457751845718, + "learning_rate": 1.8998802830780615e-05, + "loss": 0.6007, + "step": 9801 + }, + { + "epoch": 0.16937379043406137, + "grad_norm": 0.5397874287856557, + "learning_rate": 1.8998558732824154e-05, + "loss": 0.9411, + "step": 9802 + }, + { + "epoch": 0.16939106994747027, + "grad_norm": 1.0094285480657885, + "learning_rate": 1.8998314606683522e-05, + "loss": 0.6806, + "step": 9803 + }, + { + "epoch": 0.16940834946087918, + "grad_norm": 0.8837800491634326, + "learning_rate": 1.899807045235949e-05, + "loss": 0.7672, + "step": 9804 + }, + { + "epoch": 0.1694256289742881, + "grad_norm": 0.9066065500054352, + "learning_rate": 1.8997826269852816e-05, + "loss": 0.5386, + "step": 9805 + }, + { + "epoch": 0.169442908487697, + "grad_norm": 0.5870056880986767, + "learning_rate": 1.8997582059164262e-05, + "loss": 0.5857, + "step": 9806 + }, + { + "epoch": 0.16946018800110588, + "grad_norm": 0.6751963511001157, + "learning_rate": 1.89973378202946e-05, + "loss": 0.7967, + "step": 9807 + }, + { + "epoch": 0.16947746751451478, + "grad_norm": 1.4378323590336892, + "learning_rate": 1.8997093553244593e-05, + "loss": 0.7756, + "step": 9808 + }, + { + "epoch": 0.1694947470279237, + "grad_norm": 0.95837678494874, + "learning_rate": 1.8996849258015006e-05, + "loss": 0.4255, + "step": 9809 + }, + { + "epoch": 0.1695120265413326, + "grad_norm": 1.1793725476978727, + "learning_rate": 1.89966049346066e-05, + "loss": 0.6631, + "step": 9810 + }, + { + "epoch": 0.1695293060547415, + "grad_norm": 1.1725409542804208, + "learning_rate": 1.8996360583020147e-05, + "loss": 0.7532, + "step": 9811 + }, + { + "epoch": 0.1695465855681504, + "grad_norm": 1.4676697180647515, + "learning_rate": 1.8996116203256408e-05, + "loss": 0.5685, + "step": 9812 + }, + { + "epoch": 0.1695638650815593, + "grad_norm": 0.5081978972689033, + "learning_rate": 1.899587179531615e-05, + "loss": 0.5943, + "step": 9813 + }, + { + "epoch": 0.1695811445949682, + "grad_norm": 1.0424629177347644, + "learning_rate": 1.8995627359200136e-05, + "loss": 0.7673, + "step": 9814 + }, + { + "epoch": 0.1695984241083771, + "grad_norm": 0.435184131505006, + "learning_rate": 1.8995382894909135e-05, + "loss": 0.6511, + "step": 9815 + }, + { + "epoch": 0.16961570362178602, + "grad_norm": 0.9822674358627156, + "learning_rate": 1.8995138402443908e-05, + "loss": 0.6565, + "step": 9816 + }, + { + "epoch": 0.16963298313519493, + "grad_norm": 1.3840372806707102, + "learning_rate": 1.8994893881805226e-05, + "loss": 0.6847, + "step": 9817 + }, + { + "epoch": 0.1696502626486038, + "grad_norm": 0.90365081080804, + "learning_rate": 1.8994649332993856e-05, + "loss": 0.7731, + "step": 9818 + }, + { + "epoch": 0.1696675421620127, + "grad_norm": 0.3649202800120703, + "learning_rate": 1.899440475601056e-05, + "loss": 0.5483, + "step": 9819 + }, + { + "epoch": 0.16968482167542162, + "grad_norm": 1.0871080725255016, + "learning_rate": 1.89941601508561e-05, + "loss": 0.3984, + "step": 9820 + }, + { + "epoch": 0.16970210118883053, + "grad_norm": 1.0952629512181296, + "learning_rate": 1.8993915517531248e-05, + "loss": 0.5459, + "step": 9821 + }, + { + "epoch": 0.16971938070223944, + "grad_norm": 1.4420496338213125, + "learning_rate": 1.899367085603677e-05, + "loss": 0.7199, + "step": 9822 + }, + { + "epoch": 0.16973666021564832, + "grad_norm": 1.2415129114352321, + "learning_rate": 1.8993426166373433e-05, + "loss": 0.6307, + "step": 9823 + }, + { + "epoch": 0.16975393972905722, + "grad_norm": 1.2534050381000832, + "learning_rate": 1.8993181448542e-05, + "loss": 0.6638, + "step": 9824 + }, + { + "epoch": 0.16977121924246613, + "grad_norm": 0.7523783645431886, + "learning_rate": 1.8992936702543242e-05, + "loss": 0.5891, + "step": 9825 + }, + { + "epoch": 0.16978849875587504, + "grad_norm": 1.0459325184279504, + "learning_rate": 1.899269192837792e-05, + "loss": 0.5948, + "step": 9826 + }, + { + "epoch": 0.16980577826928395, + "grad_norm": 1.0084662284641925, + "learning_rate": 1.8992447126046803e-05, + "loss": 0.4771, + "step": 9827 + }, + { + "epoch": 0.16982305778269283, + "grad_norm": 1.5189213109889614, + "learning_rate": 1.899220229555066e-05, + "loss": 0.8771, + "step": 9828 + }, + { + "epoch": 0.16984033729610173, + "grad_norm": 3.3579526746745993, + "learning_rate": 1.8991957436890256e-05, + "loss": 0.6494, + "step": 9829 + }, + { + "epoch": 0.16985761680951064, + "grad_norm": 1.6184387538866492, + "learning_rate": 1.8991712550066356e-05, + "loss": 0.6029, + "step": 9830 + }, + { + "epoch": 0.16987489632291955, + "grad_norm": 1.1172060076161845, + "learning_rate": 1.899146763507973e-05, + "loss": 0.5073, + "step": 9831 + }, + { + "epoch": 0.16989217583632846, + "grad_norm": 0.813181443142801, + "learning_rate": 1.8991222691931147e-05, + "loss": 0.5658, + "step": 9832 + }, + { + "epoch": 0.16990945534973736, + "grad_norm": 1.3574866017856326, + "learning_rate": 1.899097772062137e-05, + "loss": 0.5418, + "step": 9833 + }, + { + "epoch": 0.16992673486314624, + "grad_norm": 0.9405885587404137, + "learning_rate": 1.8990732721151165e-05, + "loss": 0.5083, + "step": 9834 + }, + { + "epoch": 0.16994401437655515, + "grad_norm": 0.9594015571388361, + "learning_rate": 1.8990487693521302e-05, + "loss": 0.5619, + "step": 9835 + }, + { + "epoch": 0.16996129388996406, + "grad_norm": 1.860487805929258, + "learning_rate": 1.8990242637732547e-05, + "loss": 0.6028, + "step": 9836 + }, + { + "epoch": 0.16997857340337297, + "grad_norm": 1.0435092566151358, + "learning_rate": 1.8989997553785673e-05, + "loss": 0.8317, + "step": 9837 + }, + { + "epoch": 0.16999585291678188, + "grad_norm": 0.9199525822857274, + "learning_rate": 1.8989752441681442e-05, + "loss": 0.73, + "step": 9838 + }, + { + "epoch": 0.17001313243019076, + "grad_norm": 1.1194839748361338, + "learning_rate": 1.8989507301420622e-05, + "loss": 0.765, + "step": 9839 + }, + { + "epoch": 0.17003041194359966, + "grad_norm": 0.6965223555784382, + "learning_rate": 1.8989262133003984e-05, + "loss": 0.379, + "step": 9840 + }, + { + "epoch": 0.17004769145700857, + "grad_norm": 0.7321563477129589, + "learning_rate": 1.8989016936432292e-05, + "loss": 0.4308, + "step": 9841 + }, + { + "epoch": 0.17006497097041748, + "grad_norm": 0.9918998598995635, + "learning_rate": 1.8988771711706316e-05, + "loss": 0.8149, + "step": 9842 + }, + { + "epoch": 0.17008225048382639, + "grad_norm": 0.9829758999950976, + "learning_rate": 1.8988526458826825e-05, + "loss": 0.9695, + "step": 9843 + }, + { + "epoch": 0.17009952999723527, + "grad_norm": 0.9940661527861064, + "learning_rate": 1.8988281177794584e-05, + "loss": 0.8036, + "step": 9844 + }, + { + "epoch": 0.17011680951064417, + "grad_norm": 0.4920102687902948, + "learning_rate": 1.8988035868610364e-05, + "loss": 0.9641, + "step": 9845 + }, + { + "epoch": 0.17013408902405308, + "grad_norm": 0.9848684513068516, + "learning_rate": 1.8987790531274935e-05, + "loss": 0.6596, + "step": 9846 + }, + { + "epoch": 0.170151368537462, + "grad_norm": 0.9930982833248516, + "learning_rate": 1.8987545165789058e-05, + "loss": 0.667, + "step": 9847 + }, + { + "epoch": 0.1701686480508709, + "grad_norm": 0.4396153634429599, + "learning_rate": 1.8987299772153508e-05, + "loss": 0.5198, + "step": 9848 + }, + { + "epoch": 0.17018592756427978, + "grad_norm": 0.9532334192003572, + "learning_rate": 1.8987054350369057e-05, + "loss": 0.6292, + "step": 9849 + }, + { + "epoch": 0.17020320707768868, + "grad_norm": 1.085465022701408, + "learning_rate": 1.8986808900436465e-05, + "loss": 0.4832, + "step": 9850 + }, + { + "epoch": 0.1702204865910976, + "grad_norm": 1.22002079682904, + "learning_rate": 1.8986563422356507e-05, + "loss": 0.524, + "step": 9851 + }, + { + "epoch": 0.1702377661045065, + "grad_norm": 0.5379374770629114, + "learning_rate": 1.8986317916129942e-05, + "loss": 0.7797, + "step": 9852 + }, + { + "epoch": 0.1702550456179154, + "grad_norm": 1.005834176250527, + "learning_rate": 1.8986072381757554e-05, + "loss": 0.6358, + "step": 9853 + }, + { + "epoch": 0.17027232513132431, + "grad_norm": 1.4538587218187036, + "learning_rate": 1.8985826819240104e-05, + "loss": 0.6245, + "step": 9854 + }, + { + "epoch": 0.1702896046447332, + "grad_norm": 1.105647135832389, + "learning_rate": 1.898558122857836e-05, + "loss": 0.711, + "step": 9855 + }, + { + "epoch": 0.1703068841581421, + "grad_norm": 1.4294577802834598, + "learning_rate": 1.898533560977309e-05, + "loss": 0.6497, + "step": 9856 + }, + { + "epoch": 0.170324163671551, + "grad_norm": 0.44338781741744215, + "learning_rate": 1.898508996282507e-05, + "loss": 0.735, + "step": 9857 + }, + { + "epoch": 0.17034144318495992, + "grad_norm": 0.7208839485209793, + "learning_rate": 1.8984844287735065e-05, + "loss": 0.4212, + "step": 9858 + }, + { + "epoch": 0.17035872269836883, + "grad_norm": 0.8486325567429267, + "learning_rate": 1.8984598584503843e-05, + "loss": 0.612, + "step": 9859 + }, + { + "epoch": 0.1703760022117777, + "grad_norm": 1.1574218976416257, + "learning_rate": 1.8984352853132177e-05, + "loss": 0.664, + "step": 9860 + }, + { + "epoch": 0.1703932817251866, + "grad_norm": 1.1920758743051898, + "learning_rate": 1.8984107093620833e-05, + "loss": 0.6846, + "step": 9861 + }, + { + "epoch": 0.17041056123859552, + "grad_norm": 0.8895976849220358, + "learning_rate": 1.8983861305970584e-05, + "loss": 0.5811, + "step": 9862 + }, + { + "epoch": 0.17042784075200443, + "grad_norm": 0.35643635546884, + "learning_rate": 1.89836154901822e-05, + "loss": 0.5124, + "step": 9863 + }, + { + "epoch": 0.17044512026541334, + "grad_norm": 0.9897023778608475, + "learning_rate": 1.8983369646256446e-05, + "loss": 0.6799, + "step": 9864 + }, + { + "epoch": 0.17046239977882222, + "grad_norm": 1.353766588076828, + "learning_rate": 1.8983123774194098e-05, + "loss": 0.5841, + "step": 9865 + }, + { + "epoch": 0.17047967929223112, + "grad_norm": 1.3700793948556638, + "learning_rate": 1.898287787399592e-05, + "loss": 0.5622, + "step": 9866 + }, + { + "epoch": 0.17049695880564003, + "grad_norm": 1.6194729617144703, + "learning_rate": 1.8982631945662692e-05, + "loss": 0.4221, + "step": 9867 + }, + { + "epoch": 0.17051423831904894, + "grad_norm": 0.5977211718955205, + "learning_rate": 1.8982385989195175e-05, + "loss": 0.5054, + "step": 9868 + }, + { + "epoch": 0.17053151783245785, + "grad_norm": 1.3232103971043714, + "learning_rate": 1.8982140004594143e-05, + "loss": 0.547, + "step": 9869 + }, + { + "epoch": 0.17054879734586675, + "grad_norm": 1.228908884589464, + "learning_rate": 1.8981893991860365e-05, + "loss": 0.7523, + "step": 9870 + }, + { + "epoch": 0.17056607685927563, + "grad_norm": 1.010434913749734, + "learning_rate": 1.8981647950994613e-05, + "loss": 0.5385, + "step": 9871 + }, + { + "epoch": 0.17058335637268454, + "grad_norm": 0.9527741827316609, + "learning_rate": 1.8981401881997658e-05, + "loss": 0.6157, + "step": 9872 + }, + { + "epoch": 0.17060063588609345, + "grad_norm": 1.137155811477217, + "learning_rate": 1.8981155784870267e-05, + "loss": 0.557, + "step": 9873 + }, + { + "epoch": 0.17061791539950236, + "grad_norm": 0.8775607843760569, + "learning_rate": 1.8980909659613217e-05, + "loss": 0.4726, + "step": 9874 + }, + { + "epoch": 0.17063519491291126, + "grad_norm": 1.168096918118031, + "learning_rate": 1.8980663506227274e-05, + "loss": 0.619, + "step": 9875 + }, + { + "epoch": 0.17065247442632014, + "grad_norm": 1.4633737402025506, + "learning_rate": 1.898041732471321e-05, + "loss": 0.5822, + "step": 9876 + }, + { + "epoch": 0.17066975393972905, + "grad_norm": 1.2836596677739514, + "learning_rate": 1.8980171115071797e-05, + "loss": 0.8029, + "step": 9877 + }, + { + "epoch": 0.17068703345313796, + "grad_norm": 1.3652801490435187, + "learning_rate": 1.8979924877303808e-05, + "loss": 0.5614, + "step": 9878 + }, + { + "epoch": 0.17070431296654687, + "grad_norm": 0.9923507173924951, + "learning_rate": 1.8979678611410005e-05, + "loss": 0.592, + "step": 9879 + }, + { + "epoch": 0.17072159247995577, + "grad_norm": 1.4393331452079656, + "learning_rate": 1.897943231739117e-05, + "loss": 0.6673, + "step": 9880 + }, + { + "epoch": 0.17073887199336465, + "grad_norm": 0.41243090654249615, + "learning_rate": 1.8979185995248072e-05, + "loss": 0.6013, + "step": 9881 + }, + { + "epoch": 0.17075615150677356, + "grad_norm": 0.4225717167438823, + "learning_rate": 1.897893964498148e-05, + "loss": 0.7507, + "step": 9882 + }, + { + "epoch": 0.17077343102018247, + "grad_norm": 1.1703668456948666, + "learning_rate": 1.8978693266592164e-05, + "loss": 0.5628, + "step": 9883 + }, + { + "epoch": 0.17079071053359138, + "grad_norm": 1.4405876065799867, + "learning_rate": 1.89784468600809e-05, + "loss": 0.6145, + "step": 9884 + }, + { + "epoch": 0.17080799004700029, + "grad_norm": 1.1854301314923055, + "learning_rate": 1.897820042544846e-05, + "loss": 0.5967, + "step": 9885 + }, + { + "epoch": 0.17082526956040917, + "grad_norm": 1.1373184761088713, + "learning_rate": 1.8977953962695613e-05, + "loss": 0.7982, + "step": 9886 + }, + { + "epoch": 0.17084254907381807, + "grad_norm": 1.367664740375872, + "learning_rate": 1.8977707471823134e-05, + "loss": 0.9667, + "step": 9887 + }, + { + "epoch": 0.17085982858722698, + "grad_norm": 0.6235352365039417, + "learning_rate": 1.897746095283179e-05, + "loss": 0.6096, + "step": 9888 + }, + { + "epoch": 0.1708771081006359, + "grad_norm": 0.8060130569241354, + "learning_rate": 1.8977214405722355e-05, + "loss": 0.6668, + "step": 9889 + }, + { + "epoch": 0.1708943876140448, + "grad_norm": 0.4854000897858558, + "learning_rate": 1.8976967830495606e-05, + "loss": 0.6348, + "step": 9890 + }, + { + "epoch": 0.1709116671274537, + "grad_norm": 0.6854062117573745, + "learning_rate": 1.897672122715231e-05, + "loss": 0.3831, + "step": 9891 + }, + { + "epoch": 0.17092894664086258, + "grad_norm": 0.9622286591828103, + "learning_rate": 1.897647459569324e-05, + "loss": 0.5627, + "step": 9892 + }, + { + "epoch": 0.1709462261542715, + "grad_norm": 1.1520294697336604, + "learning_rate": 1.897622793611917e-05, + "loss": 0.5773, + "step": 9893 + }, + { + "epoch": 0.1709635056676804, + "grad_norm": 0.6889783253574749, + "learning_rate": 1.897598124843087e-05, + "loss": 0.486, + "step": 9894 + }, + { + "epoch": 0.1709807851810893, + "grad_norm": 0.9892019426457077, + "learning_rate": 1.8975734532629118e-05, + "loss": 0.5878, + "step": 9895 + }, + { + "epoch": 0.17099806469449821, + "grad_norm": 0.9909261887113794, + "learning_rate": 1.897548778871468e-05, + "loss": 0.7031, + "step": 9896 + }, + { + "epoch": 0.1710153442079071, + "grad_norm": 1.083583762141789, + "learning_rate": 1.8975241016688334e-05, + "loss": 0.5214, + "step": 9897 + }, + { + "epoch": 0.171032623721316, + "grad_norm": 1.282619466199437, + "learning_rate": 1.897499421655085e-05, + "loss": 0.6782, + "step": 9898 + }, + { + "epoch": 0.1710499032347249, + "grad_norm": 1.206653780737325, + "learning_rate": 1.8974747388303005e-05, + "loss": 0.4359, + "step": 9899 + }, + { + "epoch": 0.17106718274813382, + "grad_norm": 1.1659596180209253, + "learning_rate": 1.8974500531945567e-05, + "loss": 0.6265, + "step": 9900 + }, + { + "epoch": 0.17108446226154272, + "grad_norm": 0.6954990216923782, + "learning_rate": 1.897425364747931e-05, + "loss": 0.4854, + "step": 9901 + }, + { + "epoch": 0.1711017417749516, + "grad_norm": 1.2461789667929042, + "learning_rate": 1.897400673490501e-05, + "loss": 0.56, + "step": 9902 + }, + { + "epoch": 0.1711190212883605, + "grad_norm": 0.850469828611147, + "learning_rate": 1.8973759794223437e-05, + "loss": 0.5497, + "step": 9903 + }, + { + "epoch": 0.17113630080176942, + "grad_norm": 0.38122879610242527, + "learning_rate": 1.897351282543537e-05, + "loss": 0.5245, + "step": 9904 + }, + { + "epoch": 0.17115358031517833, + "grad_norm": 0.4089424283146597, + "learning_rate": 1.8973265828541575e-05, + "loss": 0.6436, + "step": 9905 + }, + { + "epoch": 0.17117085982858724, + "grad_norm": 1.318435950521534, + "learning_rate": 1.8973018803542832e-05, + "loss": 0.808, + "step": 9906 + }, + { + "epoch": 0.17118813934199614, + "grad_norm": 1.1280526012933323, + "learning_rate": 1.8972771750439912e-05, + "loss": 0.4167, + "step": 9907 + }, + { + "epoch": 0.17120541885540502, + "grad_norm": 1.1902090380749961, + "learning_rate": 1.897252466923359e-05, + "loss": 0.7256, + "step": 9908 + }, + { + "epoch": 0.17122269836881393, + "grad_norm": 0.8456661897762512, + "learning_rate": 1.8972277559924636e-05, + "loss": 0.431, + "step": 9909 + }, + { + "epoch": 0.17123997788222284, + "grad_norm": 1.0844006692107817, + "learning_rate": 1.8972030422513827e-05, + "loss": 0.7726, + "step": 9910 + }, + { + "epoch": 0.17125725739563175, + "grad_norm": 1.556393645919273, + "learning_rate": 1.897178325700194e-05, + "loss": 0.6108, + "step": 9911 + }, + { + "epoch": 0.17127453690904065, + "grad_norm": 1.1626628911571635, + "learning_rate": 1.8971536063389745e-05, + "loss": 0.779, + "step": 9912 + }, + { + "epoch": 0.17129181642244953, + "grad_norm": 1.1849686258264127, + "learning_rate": 1.8971288841678015e-05, + "loss": 0.8701, + "step": 9913 + }, + { + "epoch": 0.17130909593585844, + "grad_norm": 1.2605783003560966, + "learning_rate": 1.897104159186753e-05, + "loss": 0.5385, + "step": 9914 + }, + { + "epoch": 0.17132637544926735, + "grad_norm": 0.8706307822182862, + "learning_rate": 1.8970794313959055e-05, + "loss": 0.4608, + "step": 9915 + }, + { + "epoch": 0.17134365496267626, + "grad_norm": 1.1083656945506646, + "learning_rate": 1.8970547007953378e-05, + "loss": 0.6223, + "step": 9916 + }, + { + "epoch": 0.17136093447608516, + "grad_norm": 0.9260602904285011, + "learning_rate": 1.897029967385126e-05, + "loss": 0.6535, + "step": 9917 + }, + { + "epoch": 0.17137821398949404, + "grad_norm": 0.9224507917654412, + "learning_rate": 1.8970052311653483e-05, + "loss": 0.4388, + "step": 9918 + }, + { + "epoch": 0.17139549350290295, + "grad_norm": 1.1164165005915625, + "learning_rate": 1.8969804921360824e-05, + "loss": 0.558, + "step": 9919 + }, + { + "epoch": 0.17141277301631186, + "grad_norm": 0.6739357616987044, + "learning_rate": 1.896955750297405e-05, + "loss": 0.7307, + "step": 9920 + }, + { + "epoch": 0.17143005252972077, + "grad_norm": 0.9075888171803563, + "learning_rate": 1.8969310056493942e-05, + "loss": 0.6321, + "step": 9921 + }, + { + "epoch": 0.17144733204312967, + "grad_norm": 0.9454096401396608, + "learning_rate": 1.8969062581921274e-05, + "loss": 0.8149, + "step": 9922 + }, + { + "epoch": 0.17146461155653855, + "grad_norm": 0.7052025747073702, + "learning_rate": 1.896881507925682e-05, + "loss": 0.4591, + "step": 9923 + }, + { + "epoch": 0.17148189106994746, + "grad_norm": 1.1927698715638626, + "learning_rate": 1.8968567548501355e-05, + "loss": 0.5417, + "step": 9924 + }, + { + "epoch": 0.17149917058335637, + "grad_norm": 1.302494176882143, + "learning_rate": 1.8968319989655655e-05, + "loss": 0.6331, + "step": 9925 + }, + { + "epoch": 0.17151645009676528, + "grad_norm": 0.500587702236543, + "learning_rate": 1.8968072402720496e-05, + "loss": 0.7507, + "step": 9926 + }, + { + "epoch": 0.17153372961017418, + "grad_norm": 0.9591905144760192, + "learning_rate": 1.896782478769665e-05, + "loss": 0.4328, + "step": 9927 + }, + { + "epoch": 0.1715510091235831, + "grad_norm": 1.2160074412869382, + "learning_rate": 1.89675771445849e-05, + "loss": 0.6655, + "step": 9928 + }, + { + "epoch": 0.17156828863699197, + "grad_norm": 1.3112649805123453, + "learning_rate": 1.8967329473386013e-05, + "loss": 0.6284, + "step": 9929 + }, + { + "epoch": 0.17158556815040088, + "grad_norm": 0.7877284649032278, + "learning_rate": 1.896708177410077e-05, + "loss": 0.7336, + "step": 9930 + }, + { + "epoch": 0.1716028476638098, + "grad_norm": 1.1728583682086253, + "learning_rate": 1.8966834046729943e-05, + "loss": 0.6628, + "step": 9931 + }, + { + "epoch": 0.1716201271772187, + "grad_norm": 0.6211677741604259, + "learning_rate": 1.8966586291274313e-05, + "loss": 0.408, + "step": 9932 + }, + { + "epoch": 0.1716374066906276, + "grad_norm": 1.422467075771894, + "learning_rate": 1.8966338507734655e-05, + "loss": 0.7764, + "step": 9933 + }, + { + "epoch": 0.17165468620403648, + "grad_norm": 1.2111162709537486, + "learning_rate": 1.896609069611174e-05, + "loss": 0.502, + "step": 9934 + }, + { + "epoch": 0.1716719657174454, + "grad_norm": 0.6428466634743842, + "learning_rate": 1.8965842856406348e-05, + "loss": 0.5219, + "step": 9935 + }, + { + "epoch": 0.1716892452308543, + "grad_norm": 1.24347154141259, + "learning_rate": 1.8965594988619255e-05, + "loss": 0.5657, + "step": 9936 + }, + { + "epoch": 0.1717065247442632, + "grad_norm": 1.1957381748908884, + "learning_rate": 1.8965347092751237e-05, + "loss": 0.4366, + "step": 9937 + }, + { + "epoch": 0.1717238042576721, + "grad_norm": 0.572534074523913, + "learning_rate": 1.8965099168803075e-05, + "loss": 0.5472, + "step": 9938 + }, + { + "epoch": 0.171741083771081, + "grad_norm": 0.925396996791698, + "learning_rate": 1.8964851216775536e-05, + "loss": 0.4216, + "step": 9939 + }, + { + "epoch": 0.1717583632844899, + "grad_norm": 1.6651635640003672, + "learning_rate": 1.89646032366694e-05, + "loss": 0.8428, + "step": 9940 + }, + { + "epoch": 0.1717756427978988, + "grad_norm": 0.9737807292051939, + "learning_rate": 1.8964355228485448e-05, + "loss": 0.6542, + "step": 9941 + }, + { + "epoch": 0.17179292231130772, + "grad_norm": 0.9977725808390167, + "learning_rate": 1.8964107192224457e-05, + "loss": 0.8338, + "step": 9942 + }, + { + "epoch": 0.17181020182471662, + "grad_norm": 1.011846601270162, + "learning_rate": 1.8963859127887197e-05, + "loss": 0.5518, + "step": 9943 + }, + { + "epoch": 0.17182748133812553, + "grad_norm": 1.4627726719993501, + "learning_rate": 1.896361103547445e-05, + "loss": 0.8744, + "step": 9944 + }, + { + "epoch": 0.1718447608515344, + "grad_norm": 1.0691511775901879, + "learning_rate": 1.8963362914986994e-05, + "loss": 0.574, + "step": 9945 + }, + { + "epoch": 0.17186204036494332, + "grad_norm": 2.353706084307166, + "learning_rate": 1.8963114766425602e-05, + "loss": 0.7053, + "step": 9946 + }, + { + "epoch": 0.17187931987835223, + "grad_norm": 1.3502985638112306, + "learning_rate": 1.8962866589791055e-05, + "loss": 0.7769, + "step": 9947 + }, + { + "epoch": 0.17189659939176113, + "grad_norm": 1.2982561006503832, + "learning_rate": 1.8962618385084125e-05, + "loss": 0.5102, + "step": 9948 + }, + { + "epoch": 0.17191387890517004, + "grad_norm": 0.7981812725264853, + "learning_rate": 1.89623701523056e-05, + "loss": 0.6085, + "step": 9949 + }, + { + "epoch": 0.17193115841857892, + "grad_norm": 1.6032285751220066, + "learning_rate": 1.8962121891456244e-05, + "loss": 0.6056, + "step": 9950 + }, + { + "epoch": 0.17194843793198783, + "grad_norm": 0.7284497809264279, + "learning_rate": 1.896187360253684e-05, + "loss": 0.4047, + "step": 9951 + }, + { + "epoch": 0.17196571744539674, + "grad_norm": 0.9189906384614971, + "learning_rate": 1.8961625285548173e-05, + "loss": 0.6249, + "step": 9952 + }, + { + "epoch": 0.17198299695880565, + "grad_norm": 0.9932851378363743, + "learning_rate": 1.8961376940491014e-05, + "loss": 0.5677, + "step": 9953 + }, + { + "epoch": 0.17200027647221455, + "grad_norm": 1.0158429919264718, + "learning_rate": 1.8961128567366138e-05, + "loss": 0.6444, + "step": 9954 + }, + { + "epoch": 0.17201755598562343, + "grad_norm": 0.6263330800940377, + "learning_rate": 1.8960880166174327e-05, + "loss": 0.6539, + "step": 9955 + }, + { + "epoch": 0.17203483549903234, + "grad_norm": 1.3325250181590185, + "learning_rate": 1.896063173691636e-05, + "loss": 0.7755, + "step": 9956 + }, + { + "epoch": 0.17205211501244125, + "grad_norm": 1.1522764288831466, + "learning_rate": 1.8960383279593012e-05, + "loss": 0.6527, + "step": 9957 + }, + { + "epoch": 0.17206939452585016, + "grad_norm": 1.0955680087365889, + "learning_rate": 1.8960134794205062e-05, + "loss": 0.7961, + "step": 9958 + }, + { + "epoch": 0.17208667403925906, + "grad_norm": 1.0096754950513318, + "learning_rate": 1.895988628075329e-05, + "loss": 0.4578, + "step": 9959 + }, + { + "epoch": 0.17210395355266794, + "grad_norm": 0.7049029779587578, + "learning_rate": 1.8959637739238472e-05, + "loss": 0.4781, + "step": 9960 + }, + { + "epoch": 0.17212123306607685, + "grad_norm": 1.0787040902790574, + "learning_rate": 1.8959389169661387e-05, + "loss": 0.5934, + "step": 9961 + }, + { + "epoch": 0.17213851257948576, + "grad_norm": 0.4722251062269479, + "learning_rate": 1.895914057202282e-05, + "loss": 0.6361, + "step": 9962 + }, + { + "epoch": 0.17215579209289467, + "grad_norm": 1.3116310787323187, + "learning_rate": 1.895889194632354e-05, + "loss": 0.8786, + "step": 9963 + }, + { + "epoch": 0.17217307160630357, + "grad_norm": 0.9372226777033514, + "learning_rate": 1.8958643292564326e-05, + "loss": 0.7168, + "step": 9964 + }, + { + "epoch": 0.17219035111971248, + "grad_norm": 1.1326285358183832, + "learning_rate": 1.8958394610745966e-05, + "loss": 0.5502, + "step": 9965 + }, + { + "epoch": 0.17220763063312136, + "grad_norm": 0.9999049548101125, + "learning_rate": 1.895814590086923e-05, + "loss": 0.8143, + "step": 9966 + }, + { + "epoch": 0.17222491014653027, + "grad_norm": 0.7629341440719761, + "learning_rate": 1.89578971629349e-05, + "loss": 0.4751, + "step": 9967 + }, + { + "epoch": 0.17224218965993918, + "grad_norm": 0.9652536481460283, + "learning_rate": 1.8957648396943757e-05, + "loss": 0.7601, + "step": 9968 + }, + { + "epoch": 0.17225946917334808, + "grad_norm": 0.8319390718143932, + "learning_rate": 1.895739960289658e-05, + "loss": 0.6661, + "step": 9969 + }, + { + "epoch": 0.172276748686757, + "grad_norm": 0.8244790812386064, + "learning_rate": 1.8957150780794143e-05, + "loss": 0.6704, + "step": 9970 + }, + { + "epoch": 0.17229402820016587, + "grad_norm": 1.0349746374931859, + "learning_rate": 1.895690193063723e-05, + "loss": 0.7072, + "step": 9971 + }, + { + "epoch": 0.17231130771357478, + "grad_norm": 0.8322293541776128, + "learning_rate": 1.8956653052426622e-05, + "loss": 0.5226, + "step": 9972 + }, + { + "epoch": 0.1723285872269837, + "grad_norm": 1.375243846417541, + "learning_rate": 1.8956404146163095e-05, + "loss": 0.8999, + "step": 9973 + }, + { + "epoch": 0.1723458667403926, + "grad_norm": 0.6952154492588297, + "learning_rate": 1.8956155211847426e-05, + "loss": 0.5064, + "step": 9974 + }, + { + "epoch": 0.1723631462538015, + "grad_norm": 0.7982575016569196, + "learning_rate": 1.89559062494804e-05, + "loss": 0.4843, + "step": 9975 + }, + { + "epoch": 0.17238042576721038, + "grad_norm": 0.8496166536347717, + "learning_rate": 1.89556572590628e-05, + "loss": 0.5784, + "step": 9976 + }, + { + "epoch": 0.1723977052806193, + "grad_norm": 0.9007828510568662, + "learning_rate": 1.8955408240595396e-05, + "loss": 0.6107, + "step": 9977 + }, + { + "epoch": 0.1724149847940282, + "grad_norm": 0.9704663927419621, + "learning_rate": 1.895515919407897e-05, + "loss": 0.5502, + "step": 9978 + }, + { + "epoch": 0.1724322643074371, + "grad_norm": 0.8847049784467019, + "learning_rate": 1.895491011951431e-05, + "loss": 0.6949, + "step": 9979 + }, + { + "epoch": 0.172449543820846, + "grad_norm": 0.955655253279275, + "learning_rate": 1.895466101690219e-05, + "loss": 0.4655, + "step": 9980 + }, + { + "epoch": 0.17246682333425492, + "grad_norm": 0.9928818181735933, + "learning_rate": 1.8954411886243393e-05, + "loss": 0.6841, + "step": 9981 + }, + { + "epoch": 0.1724841028476638, + "grad_norm": 0.9713688024328233, + "learning_rate": 1.8954162727538694e-05, + "loss": 0.515, + "step": 9982 + }, + { + "epoch": 0.1725013823610727, + "grad_norm": 1.450647331562238, + "learning_rate": 1.8953913540788877e-05, + "loss": 0.6123, + "step": 9983 + }, + { + "epoch": 0.17251866187448162, + "grad_norm": 0.8962623986613377, + "learning_rate": 1.8953664325994724e-05, + "loss": 0.6982, + "step": 9984 + }, + { + "epoch": 0.17253594138789052, + "grad_norm": 0.8501274895638247, + "learning_rate": 1.895341508315701e-05, + "loss": 0.6175, + "step": 9985 + }, + { + "epoch": 0.17255322090129943, + "grad_norm": 0.8039474777636142, + "learning_rate": 1.8953165812276527e-05, + "loss": 0.3984, + "step": 9986 + }, + { + "epoch": 0.1725705004147083, + "grad_norm": 0.9461860592070463, + "learning_rate": 1.895291651335404e-05, + "loss": 0.7317, + "step": 9987 + }, + { + "epoch": 0.17258777992811722, + "grad_norm": 1.0135486847658195, + "learning_rate": 1.8952667186390344e-05, + "loss": 0.7769, + "step": 9988 + }, + { + "epoch": 0.17260505944152613, + "grad_norm": 1.1209349890679554, + "learning_rate": 1.895241783138621e-05, + "loss": 0.8015, + "step": 9989 + }, + { + "epoch": 0.17262233895493503, + "grad_norm": 1.3953334001099018, + "learning_rate": 1.8952168448342427e-05, + "loss": 0.5951, + "step": 9990 + }, + { + "epoch": 0.17263961846834394, + "grad_norm": 0.8443645549244587, + "learning_rate": 1.895191903725977e-05, + "loss": 0.6004, + "step": 9991 + }, + { + "epoch": 0.17265689798175282, + "grad_norm": 0.40523265726097457, + "learning_rate": 1.895166959813902e-05, + "loss": 0.8195, + "step": 9992 + }, + { + "epoch": 0.17267417749516173, + "grad_norm": 1.1894742377446788, + "learning_rate": 1.8951420130980964e-05, + "loss": 0.6049, + "step": 9993 + }, + { + "epoch": 0.17269145700857064, + "grad_norm": 0.6288405443051092, + "learning_rate": 1.895117063578638e-05, + "loss": 0.4067, + "step": 9994 + }, + { + "epoch": 0.17270873652197954, + "grad_norm": 0.9758671479894451, + "learning_rate": 1.8950921112556048e-05, + "loss": 0.4328, + "step": 9995 + }, + { + "epoch": 0.17272601603538845, + "grad_norm": 0.9294374111289356, + "learning_rate": 1.895067156129075e-05, + "loss": 0.4375, + "step": 9996 + }, + { + "epoch": 0.17274329554879733, + "grad_norm": 0.8339952163089415, + "learning_rate": 1.8950421981991267e-05, + "loss": 0.6721, + "step": 9997 + }, + { + "epoch": 0.17276057506220624, + "grad_norm": 0.7547063768720278, + "learning_rate": 1.8950172374658384e-05, + "loss": 0.6668, + "step": 9998 + }, + { + "epoch": 0.17277785457561515, + "grad_norm": 1.0204641933534577, + "learning_rate": 1.8949922739292884e-05, + "loss": 0.7384, + "step": 9999 + }, + { + "epoch": 0.17279513408902406, + "grad_norm": 0.9831422547829578, + "learning_rate": 1.8949673075895543e-05, + "loss": 0.5643, + "step": 10000 + }, + { + "epoch": 0.17281241360243296, + "grad_norm": 0.9223944388546698, + "learning_rate": 1.8949423384467145e-05, + "loss": 0.5721, + "step": 10001 + }, + { + "epoch": 0.17282969311584187, + "grad_norm": 0.9239577526352228, + "learning_rate": 1.8949173665008475e-05, + "loss": 0.57, + "step": 10002 + }, + { + "epoch": 0.17284697262925075, + "grad_norm": 1.0352334383755133, + "learning_rate": 1.894892391752031e-05, + "loss": 0.6866, + "step": 10003 + }, + { + "epoch": 0.17286425214265966, + "grad_norm": 1.0948076682548817, + "learning_rate": 1.894867414200344e-05, + "loss": 0.5317, + "step": 10004 + }, + { + "epoch": 0.17288153165606857, + "grad_norm": 0.8024141605004474, + "learning_rate": 1.8948424338458637e-05, + "loss": 0.5126, + "step": 10005 + }, + { + "epoch": 0.17289881116947747, + "grad_norm": 0.7993896649920251, + "learning_rate": 1.8948174506886692e-05, + "loss": 0.6159, + "step": 10006 + }, + { + "epoch": 0.17291609068288638, + "grad_norm": 1.2667358012284227, + "learning_rate": 1.8947924647288383e-05, + "loss": 0.7507, + "step": 10007 + }, + { + "epoch": 0.17293337019629526, + "grad_norm": 0.4195868335511444, + "learning_rate": 1.8947674759664494e-05, + "loss": 0.6589, + "step": 10008 + }, + { + "epoch": 0.17295064970970417, + "grad_norm": 1.5742029339040404, + "learning_rate": 1.894742484401581e-05, + "loss": 0.6477, + "step": 10009 + }, + { + "epoch": 0.17296792922311308, + "grad_norm": 0.9181172976599802, + "learning_rate": 1.894717490034311e-05, + "loss": 0.583, + "step": 10010 + }, + { + "epoch": 0.17298520873652198, + "grad_norm": 1.1286138800411976, + "learning_rate": 1.8946924928647176e-05, + "loss": 0.6297, + "step": 10011 + }, + { + "epoch": 0.1730024882499309, + "grad_norm": 1.233827442446061, + "learning_rate": 1.8946674928928797e-05, + "loss": 0.5201, + "step": 10012 + }, + { + "epoch": 0.17301976776333977, + "grad_norm": 1.10909441535072, + "learning_rate": 1.894642490118875e-05, + "loss": 0.7438, + "step": 10013 + }, + { + "epoch": 0.17303704727674868, + "grad_norm": 1.057869073535463, + "learning_rate": 1.8946174845427817e-05, + "loss": 0.7189, + "step": 10014 + }, + { + "epoch": 0.1730543267901576, + "grad_norm": 0.5661652197136947, + "learning_rate": 1.894592476164679e-05, + "loss": 0.5419, + "step": 10015 + }, + { + "epoch": 0.1730716063035665, + "grad_norm": 0.5291719068163402, + "learning_rate": 1.8945674649846445e-05, + "loss": 0.6371, + "step": 10016 + }, + { + "epoch": 0.1730888858169754, + "grad_norm": 0.8759719870520724, + "learning_rate": 1.894542451002757e-05, + "loss": 0.5113, + "step": 10017 + }, + { + "epoch": 0.1731061653303843, + "grad_norm": 0.3881194308245975, + "learning_rate": 1.894517434219094e-05, + "loss": 0.5242, + "step": 10018 + }, + { + "epoch": 0.1731234448437932, + "grad_norm": 1.3290246801484495, + "learning_rate": 1.8944924146337347e-05, + "loss": 0.4992, + "step": 10019 + }, + { + "epoch": 0.1731407243572021, + "grad_norm": 0.755230576706186, + "learning_rate": 1.894467392246757e-05, + "loss": 0.4417, + "step": 10020 + }, + { + "epoch": 0.173158003870611, + "grad_norm": 0.4228095779754084, + "learning_rate": 1.89444236705824e-05, + "loss": 0.7121, + "step": 10021 + }, + { + "epoch": 0.1731752833840199, + "grad_norm": 0.8209133504280015, + "learning_rate": 1.894417339068261e-05, + "loss": 0.6161, + "step": 10022 + }, + { + "epoch": 0.17319256289742882, + "grad_norm": 1.3916241218440633, + "learning_rate": 1.8943923082768993e-05, + "loss": 0.5995, + "step": 10023 + }, + { + "epoch": 0.1732098424108377, + "grad_norm": 1.0359230853946892, + "learning_rate": 1.8943672746842322e-05, + "loss": 0.4278, + "step": 10024 + }, + { + "epoch": 0.1732271219242466, + "grad_norm": 0.8037061424917861, + "learning_rate": 1.8943422382903394e-05, + "loss": 0.8879, + "step": 10025 + }, + { + "epoch": 0.17324440143765552, + "grad_norm": 1.0178903258851975, + "learning_rate": 1.894317199095299e-05, + "loss": 0.5298, + "step": 10026 + }, + { + "epoch": 0.17326168095106442, + "grad_norm": 0.8789792698296552, + "learning_rate": 1.8942921570991885e-05, + "loss": 0.6564, + "step": 10027 + }, + { + "epoch": 0.17327896046447333, + "grad_norm": 0.6158757891110859, + "learning_rate": 1.8942671123020875e-05, + "loss": 0.4525, + "step": 10028 + }, + { + "epoch": 0.1732962399778822, + "grad_norm": 1.0404340634478089, + "learning_rate": 1.8942420647040738e-05, + "loss": 0.4885, + "step": 10029 + }, + { + "epoch": 0.17331351949129112, + "grad_norm": 1.0516260816863021, + "learning_rate": 1.8942170143052258e-05, + "loss": 0.4222, + "step": 10030 + }, + { + "epoch": 0.17333079900470003, + "grad_norm": 0.7236919428895668, + "learning_rate": 1.8941919611056227e-05, + "loss": 0.5518, + "step": 10031 + }, + { + "epoch": 0.17334807851810893, + "grad_norm": 0.9962367316059457, + "learning_rate": 1.8941669051053417e-05, + "loss": 0.6458, + "step": 10032 + }, + { + "epoch": 0.17336535803151784, + "grad_norm": 1.1027572477697365, + "learning_rate": 1.8941418463044625e-05, + "loss": 0.6509, + "step": 10033 + }, + { + "epoch": 0.17338263754492675, + "grad_norm": 0.8335578120393248, + "learning_rate": 1.8941167847030627e-05, + "loss": 0.5983, + "step": 10034 + }, + { + "epoch": 0.17339991705833563, + "grad_norm": 0.9002754599126577, + "learning_rate": 1.8940917203012215e-05, + "loss": 0.4987, + "step": 10035 + }, + { + "epoch": 0.17341719657174454, + "grad_norm": 1.5667725111197675, + "learning_rate": 1.894066653099017e-05, + "loss": 0.607, + "step": 10036 + }, + { + "epoch": 0.17343447608515344, + "grad_norm": 1.2340218576583115, + "learning_rate": 1.8940415830965278e-05, + "loss": 0.685, + "step": 10037 + }, + { + "epoch": 0.17345175559856235, + "grad_norm": 0.582661977236805, + "learning_rate": 1.8940165102938325e-05, + "loss": 0.5037, + "step": 10038 + }, + { + "epoch": 0.17346903511197126, + "grad_norm": 0.9281648385244979, + "learning_rate": 1.8939914346910092e-05, + "loss": 0.8544, + "step": 10039 + }, + { + "epoch": 0.17348631462538014, + "grad_norm": 0.6546984663864975, + "learning_rate": 1.8939663562881372e-05, + "loss": 0.5588, + "step": 10040 + }, + { + "epoch": 0.17350359413878905, + "grad_norm": 1.2863664464131326, + "learning_rate": 1.8939412750852945e-05, + "loss": 0.6945, + "step": 10041 + }, + { + "epoch": 0.17352087365219795, + "grad_norm": 1.3894466993654018, + "learning_rate": 1.8939161910825597e-05, + "loss": 0.7045, + "step": 10042 + }, + { + "epoch": 0.17353815316560686, + "grad_norm": 1.2739084344371199, + "learning_rate": 1.8938911042800113e-05, + "loss": 0.5336, + "step": 10043 + }, + { + "epoch": 0.17355543267901577, + "grad_norm": 0.9187641679605425, + "learning_rate": 1.8938660146777284e-05, + "loss": 0.3861, + "step": 10044 + }, + { + "epoch": 0.17357271219242465, + "grad_norm": 1.1681848673680972, + "learning_rate": 1.893840922275789e-05, + "loss": 0.6588, + "step": 10045 + }, + { + "epoch": 0.17358999170583356, + "grad_norm": 0.9020551082665369, + "learning_rate": 1.893815827074272e-05, + "loss": 0.6852, + "step": 10046 + }, + { + "epoch": 0.17360727121924246, + "grad_norm": 1.1458947357632916, + "learning_rate": 1.893790729073256e-05, + "loss": 0.5754, + "step": 10047 + }, + { + "epoch": 0.17362455073265137, + "grad_norm": 1.136023626332277, + "learning_rate": 1.8937656282728193e-05, + "loss": 0.6196, + "step": 10048 + }, + { + "epoch": 0.17364183024606028, + "grad_norm": 1.0452447835536134, + "learning_rate": 1.8937405246730407e-05, + "loss": 0.448, + "step": 10049 + }, + { + "epoch": 0.17365910975946916, + "grad_norm": 0.9942518588708643, + "learning_rate": 1.893715418273999e-05, + "loss": 0.6232, + "step": 10050 + }, + { + "epoch": 0.17367638927287807, + "grad_norm": 0.4387026696941811, + "learning_rate": 1.8936903090757727e-05, + "loss": 0.5503, + "step": 10051 + }, + { + "epoch": 0.17369366878628698, + "grad_norm": 1.7218163045646115, + "learning_rate": 1.8936651970784404e-05, + "loss": 0.5327, + "step": 10052 + }, + { + "epoch": 0.17371094829969588, + "grad_norm": 1.027194317293667, + "learning_rate": 1.8936400822820806e-05, + "loss": 0.5658, + "step": 10053 + }, + { + "epoch": 0.1737282278131048, + "grad_norm": 0.6017563614088347, + "learning_rate": 1.8936149646867722e-05, + "loss": 0.7628, + "step": 10054 + }, + { + "epoch": 0.1737455073265137, + "grad_norm": 1.148085575740711, + "learning_rate": 1.893589844292594e-05, + "loss": 0.6492, + "step": 10055 + }, + { + "epoch": 0.17376278683992258, + "grad_norm": 0.6574115022138806, + "learning_rate": 1.8935647210996243e-05, + "loss": 0.4242, + "step": 10056 + }, + { + "epoch": 0.17378006635333149, + "grad_norm": 1.1683253288702025, + "learning_rate": 1.8935395951079423e-05, + "loss": 0.8854, + "step": 10057 + }, + { + "epoch": 0.1737973458667404, + "grad_norm": 1.112268539074781, + "learning_rate": 1.8935144663176262e-05, + "loss": 0.8165, + "step": 10058 + }, + { + "epoch": 0.1738146253801493, + "grad_norm": 0.41125160393760857, + "learning_rate": 1.893489334728755e-05, + "loss": 0.6227, + "step": 10059 + }, + { + "epoch": 0.1738319048935582, + "grad_norm": 0.8191564573431094, + "learning_rate": 1.893464200341407e-05, + "loss": 0.5787, + "step": 10060 + }, + { + "epoch": 0.1738491844069671, + "grad_norm": 0.42182108736898033, + "learning_rate": 1.893439063155661e-05, + "loss": 0.5371, + "step": 10061 + }, + { + "epoch": 0.173866463920376, + "grad_norm": 1.4862773911887566, + "learning_rate": 1.8934139231715965e-05, + "loss": 0.5938, + "step": 10062 + }, + { + "epoch": 0.1738837434337849, + "grad_norm": 0.964111992464752, + "learning_rate": 1.893388780389292e-05, + "loss": 0.4417, + "step": 10063 + }, + { + "epoch": 0.1739010229471938, + "grad_norm": 0.7423816619885958, + "learning_rate": 1.893363634808825e-05, + "loss": 0.7362, + "step": 10064 + }, + { + "epoch": 0.17391830246060272, + "grad_norm": 0.7024351001092775, + "learning_rate": 1.8933384864302757e-05, + "loss": 0.5182, + "step": 10065 + }, + { + "epoch": 0.1739355819740116, + "grad_norm": 0.9508613285783023, + "learning_rate": 1.8933133352537224e-05, + "loss": 0.5723, + "step": 10066 + }, + { + "epoch": 0.1739528614874205, + "grad_norm": 1.4726239124882063, + "learning_rate": 1.8932881812792438e-05, + "loss": 0.687, + "step": 10067 + }, + { + "epoch": 0.17397014100082941, + "grad_norm": 0.9748149650005352, + "learning_rate": 1.893263024506919e-05, + "loss": 0.4615, + "step": 10068 + }, + { + "epoch": 0.17398742051423832, + "grad_norm": 0.5709236011157979, + "learning_rate": 1.8932378649368263e-05, + "loss": 0.6322, + "step": 10069 + }, + { + "epoch": 0.17400470002764723, + "grad_norm": 0.44293834968349133, + "learning_rate": 1.8932127025690447e-05, + "loss": 0.8125, + "step": 10070 + }, + { + "epoch": 0.17402197954105614, + "grad_norm": 1.604728102571535, + "learning_rate": 1.893187537403653e-05, + "loss": 0.7384, + "step": 10071 + }, + { + "epoch": 0.17403925905446502, + "grad_norm": 1.012791556177986, + "learning_rate": 1.89316236944073e-05, + "loss": 0.6442, + "step": 10072 + }, + { + "epoch": 0.17405653856787393, + "grad_norm": 1.1644349325585197, + "learning_rate": 1.8931371986803545e-05, + "loss": 0.6247, + "step": 10073 + }, + { + "epoch": 0.17407381808128283, + "grad_norm": 1.2411011642993799, + "learning_rate": 1.8931120251226056e-05, + "loss": 0.6817, + "step": 10074 + }, + { + "epoch": 0.17409109759469174, + "grad_norm": 1.0117685496699251, + "learning_rate": 1.893086848767562e-05, + "loss": 0.6357, + "step": 10075 + }, + { + "epoch": 0.17410837710810065, + "grad_norm": 0.7491879162807019, + "learning_rate": 1.8930616696153023e-05, + "loss": 0.4728, + "step": 10076 + }, + { + "epoch": 0.17412565662150953, + "grad_norm": 1.0821166310396422, + "learning_rate": 1.893036487665906e-05, + "loss": 0.6525, + "step": 10077 + }, + { + "epoch": 0.17414293613491844, + "grad_norm": 1.5503945558329244, + "learning_rate": 1.893011302919451e-05, + "loss": 0.8355, + "step": 10078 + }, + { + "epoch": 0.17416021564832734, + "grad_norm": 1.1797084589556586, + "learning_rate": 1.892986115376017e-05, + "loss": 0.7208, + "step": 10079 + }, + { + "epoch": 0.17417749516173625, + "grad_norm": 1.2578402465973533, + "learning_rate": 1.8929609250356827e-05, + "loss": 0.565, + "step": 10080 + }, + { + "epoch": 0.17419477467514516, + "grad_norm": 0.8240697097918201, + "learning_rate": 1.8929357318985267e-05, + "loss": 0.4833, + "step": 10081 + }, + { + "epoch": 0.17421205418855404, + "grad_norm": 1.6486356251217484, + "learning_rate": 1.8929105359646285e-05, + "loss": 0.6889, + "step": 10082 + }, + { + "epoch": 0.17422933370196295, + "grad_norm": 0.8849064015933817, + "learning_rate": 1.8928853372340663e-05, + "loss": 0.5093, + "step": 10083 + }, + { + "epoch": 0.17424661321537185, + "grad_norm": 0.9852864426352325, + "learning_rate": 1.8928601357069195e-05, + "loss": 0.5243, + "step": 10084 + }, + { + "epoch": 0.17426389272878076, + "grad_norm": 0.8858152637498563, + "learning_rate": 1.892834931383267e-05, + "loss": 0.6721, + "step": 10085 + }, + { + "epoch": 0.17428117224218967, + "grad_norm": 0.8641581471874041, + "learning_rate": 1.8928097242631874e-05, + "loss": 0.7089, + "step": 10086 + }, + { + "epoch": 0.17429845175559855, + "grad_norm": 1.3130595514266066, + "learning_rate": 1.8927845143467603e-05, + "loss": 0.5789, + "step": 10087 + }, + { + "epoch": 0.17431573126900746, + "grad_norm": 1.6652957669263515, + "learning_rate": 1.892759301634064e-05, + "loss": 0.5042, + "step": 10088 + }, + { + "epoch": 0.17433301078241636, + "grad_norm": 1.4513575314149096, + "learning_rate": 1.8927340861251775e-05, + "loss": 0.7089, + "step": 10089 + }, + { + "epoch": 0.17435029029582527, + "grad_norm": 1.2534338023692349, + "learning_rate": 1.8927088678201802e-05, + "loss": 0.7581, + "step": 10090 + }, + { + "epoch": 0.17436756980923418, + "grad_norm": 1.3532205783383946, + "learning_rate": 1.8926836467191507e-05, + "loss": 0.5619, + "step": 10091 + }, + { + "epoch": 0.1743848493226431, + "grad_norm": 0.8277970056740286, + "learning_rate": 1.8926584228221686e-05, + "loss": 0.2941, + "step": 10092 + }, + { + "epoch": 0.17440212883605197, + "grad_norm": 1.0841392966476728, + "learning_rate": 1.892633196129312e-05, + "loss": 0.5388, + "step": 10093 + }, + { + "epoch": 0.17441940834946087, + "grad_norm": 0.9029073089873646, + "learning_rate": 1.8926079666406608e-05, + "loss": 0.6427, + "step": 10094 + }, + { + "epoch": 0.17443668786286978, + "grad_norm": 0.9045728830016841, + "learning_rate": 1.8925827343562935e-05, + "loss": 0.66, + "step": 10095 + }, + { + "epoch": 0.1744539673762787, + "grad_norm": 0.8492865826853059, + "learning_rate": 1.8925574992762888e-05, + "loss": 0.4837, + "step": 10096 + }, + { + "epoch": 0.1744712468896876, + "grad_norm": 1.1977687488231543, + "learning_rate": 1.892532261400727e-05, + "loss": 0.7017, + "step": 10097 + }, + { + "epoch": 0.17448852640309648, + "grad_norm": 0.8630225709470731, + "learning_rate": 1.8925070207296857e-05, + "loss": 0.7134, + "step": 10098 + }, + { + "epoch": 0.17450580591650539, + "grad_norm": 0.5502859162891396, + "learning_rate": 1.892481777263245e-05, + "loss": 0.5093, + "step": 10099 + }, + { + "epoch": 0.1745230854299143, + "grad_norm": 0.7372990381952761, + "learning_rate": 1.892456531001483e-05, + "loss": 0.6263, + "step": 10100 + }, + { + "epoch": 0.1745403649433232, + "grad_norm": 0.9723047525386704, + "learning_rate": 1.8924312819444797e-05, + "loss": 0.5462, + "step": 10101 + }, + { + "epoch": 0.1745576444567321, + "grad_norm": 1.0478537932860592, + "learning_rate": 1.892406030092314e-05, + "loss": 0.6807, + "step": 10102 + }, + { + "epoch": 0.174574923970141, + "grad_norm": 1.4885617241467217, + "learning_rate": 1.8923807754450646e-05, + "loss": 0.5952, + "step": 10103 + }, + { + "epoch": 0.1745922034835499, + "grad_norm": 0.9759242300152418, + "learning_rate": 1.8923555180028105e-05, + "loss": 0.5713, + "step": 10104 + }, + { + "epoch": 0.1746094829969588, + "grad_norm": 1.090186148161928, + "learning_rate": 1.8923302577656312e-05, + "loss": 0.8439, + "step": 10105 + }, + { + "epoch": 0.1746267625103677, + "grad_norm": 1.0387336277870127, + "learning_rate": 1.892304994733606e-05, + "loss": 0.7476, + "step": 10106 + }, + { + "epoch": 0.17464404202377662, + "grad_norm": 0.9715568921964125, + "learning_rate": 1.8922797289068136e-05, + "loss": 0.4935, + "step": 10107 + }, + { + "epoch": 0.17466132153718553, + "grad_norm": 0.8149556151460519, + "learning_rate": 1.8922544602853333e-05, + "loss": 0.6471, + "step": 10108 + }, + { + "epoch": 0.1746786010505944, + "grad_norm": 1.065645208420729, + "learning_rate": 1.8922291888692442e-05, + "loss": 0.5558, + "step": 10109 + }, + { + "epoch": 0.17469588056400331, + "grad_norm": 0.7515526970107043, + "learning_rate": 1.8922039146586254e-05, + "loss": 0.5465, + "step": 10110 + }, + { + "epoch": 0.17471316007741222, + "grad_norm": 1.1969602848164913, + "learning_rate": 1.892178637653556e-05, + "loss": 0.3323, + "step": 10111 + }, + { + "epoch": 0.17473043959082113, + "grad_norm": 0.9193567234050414, + "learning_rate": 1.8921533578541157e-05, + "loss": 0.3875, + "step": 10112 + }, + { + "epoch": 0.17474771910423004, + "grad_norm": 1.080545598449655, + "learning_rate": 1.892128075260383e-05, + "loss": 0.6968, + "step": 10113 + }, + { + "epoch": 0.17476499861763892, + "grad_norm": 1.1072981464198721, + "learning_rate": 1.8921027898724374e-05, + "loss": 0.6457, + "step": 10114 + }, + { + "epoch": 0.17478227813104782, + "grad_norm": 0.9179340579156609, + "learning_rate": 1.8920775016903584e-05, + "loss": 0.5898, + "step": 10115 + }, + { + "epoch": 0.17479955764445673, + "grad_norm": 0.9247997586211614, + "learning_rate": 1.8920522107142244e-05, + "loss": 0.6128, + "step": 10116 + }, + { + "epoch": 0.17481683715786564, + "grad_norm": 1.0124248110492873, + "learning_rate": 1.8920269169441153e-05, + "loss": 0.5394, + "step": 10117 + }, + { + "epoch": 0.17483411667127455, + "grad_norm": 1.244498731752675, + "learning_rate": 1.8920016203801103e-05, + "loss": 0.8387, + "step": 10118 + }, + { + "epoch": 0.17485139618468343, + "grad_norm": 1.1922773195252332, + "learning_rate": 1.891976321022288e-05, + "loss": 0.6338, + "step": 10119 + }, + { + "epoch": 0.17486867569809234, + "grad_norm": 0.8860277058876939, + "learning_rate": 1.8919510188707284e-05, + "loss": 0.7798, + "step": 10120 + }, + { + "epoch": 0.17488595521150124, + "grad_norm": 0.38633317622646535, + "learning_rate": 1.89192571392551e-05, + "loss": 0.654, + "step": 10121 + }, + { + "epoch": 0.17490323472491015, + "grad_norm": 0.9788885133229507, + "learning_rate": 1.891900406186713e-05, + "loss": 0.6203, + "step": 10122 + }, + { + "epoch": 0.17492051423831906, + "grad_norm": 1.1354950487331998, + "learning_rate": 1.891875095654416e-05, + "loss": 0.5969, + "step": 10123 + }, + { + "epoch": 0.17493779375172794, + "grad_norm": 1.1523304467949673, + "learning_rate": 1.8918497823286982e-05, + "loss": 0.5664, + "step": 10124 + }, + { + "epoch": 0.17495507326513685, + "grad_norm": 0.6547116101755004, + "learning_rate": 1.8918244662096393e-05, + "loss": 0.4444, + "step": 10125 + }, + { + "epoch": 0.17497235277854575, + "grad_norm": 0.7697725702918237, + "learning_rate": 1.8917991472973182e-05, + "loss": 0.4344, + "step": 10126 + }, + { + "epoch": 0.17498963229195466, + "grad_norm": 1.1010390185704286, + "learning_rate": 1.8917738255918148e-05, + "loss": 0.9049, + "step": 10127 + }, + { + "epoch": 0.17500691180536357, + "grad_norm": 1.046951531627786, + "learning_rate": 1.8917485010932077e-05, + "loss": 0.6836, + "step": 10128 + }, + { + "epoch": 0.17502419131877248, + "grad_norm": 1.2718579645622137, + "learning_rate": 1.8917231738015765e-05, + "loss": 0.657, + "step": 10129 + }, + { + "epoch": 0.17504147083218136, + "grad_norm": 0.9046951465318501, + "learning_rate": 1.8916978437170006e-05, + "loss": 0.5193, + "step": 10130 + }, + { + "epoch": 0.17505875034559026, + "grad_norm": 1.3581943214090055, + "learning_rate": 1.8916725108395595e-05, + "loss": 0.8965, + "step": 10131 + }, + { + "epoch": 0.17507602985899917, + "grad_norm": 1.1233565446769846, + "learning_rate": 1.891647175169332e-05, + "loss": 0.6479, + "step": 10132 + }, + { + "epoch": 0.17509330937240808, + "grad_norm": 0.39830074437143365, + "learning_rate": 1.891621836706398e-05, + "loss": 0.6047, + "step": 10133 + }, + { + "epoch": 0.175110588885817, + "grad_norm": 1.0049340033718805, + "learning_rate": 1.8915964954508365e-05, + "loss": 0.429, + "step": 10134 + }, + { + "epoch": 0.17512786839922587, + "grad_norm": 1.0411248305836758, + "learning_rate": 1.8915711514027273e-05, + "loss": 0.5441, + "step": 10135 + }, + { + "epoch": 0.17514514791263477, + "grad_norm": 0.9284264380435605, + "learning_rate": 1.8915458045621493e-05, + "loss": 0.4076, + "step": 10136 + }, + { + "epoch": 0.17516242742604368, + "grad_norm": 1.3144344285622704, + "learning_rate": 1.891520454929182e-05, + "loss": 0.559, + "step": 10137 + }, + { + "epoch": 0.1751797069394526, + "grad_norm": 0.7989172751740387, + "learning_rate": 1.891495102503905e-05, + "loss": 0.6436, + "step": 10138 + }, + { + "epoch": 0.1751969864528615, + "grad_norm": 0.8504913831875757, + "learning_rate": 1.8914697472863975e-05, + "loss": 0.5342, + "step": 10139 + }, + { + "epoch": 0.17521426596627038, + "grad_norm": 1.2443867515413023, + "learning_rate": 1.8914443892767387e-05, + "loss": 0.6182, + "step": 10140 + }, + { + "epoch": 0.17523154547967928, + "grad_norm": 1.5918380394139076, + "learning_rate": 1.891419028475009e-05, + "loss": 0.7964, + "step": 10141 + }, + { + "epoch": 0.1752488249930882, + "grad_norm": 0.8305893252643278, + "learning_rate": 1.891393664881287e-05, + "loss": 0.553, + "step": 10142 + }, + { + "epoch": 0.1752661045064971, + "grad_norm": 1.0545614720563858, + "learning_rate": 1.8913682984956517e-05, + "loss": 0.509, + "step": 10143 + }, + { + "epoch": 0.175283384019906, + "grad_norm": 0.8750294067146047, + "learning_rate": 1.8913429293181837e-05, + "loss": 0.5895, + "step": 10144 + }, + { + "epoch": 0.17530066353331492, + "grad_norm": 1.0024485029592642, + "learning_rate": 1.891317557348962e-05, + "loss": 0.4952, + "step": 10145 + }, + { + "epoch": 0.1753179430467238, + "grad_norm": 0.37076274794844694, + "learning_rate": 1.8912921825880658e-05, + "loss": 0.6788, + "step": 10146 + }, + { + "epoch": 0.1753352225601327, + "grad_norm": 0.8029865439304055, + "learning_rate": 1.8912668050355748e-05, + "loss": 0.6232, + "step": 10147 + }, + { + "epoch": 0.1753525020735416, + "grad_norm": 0.609617729894948, + "learning_rate": 1.891241424691568e-05, + "loss": 0.6639, + "step": 10148 + }, + { + "epoch": 0.17536978158695052, + "grad_norm": 1.0813732650290449, + "learning_rate": 1.8912160415561258e-05, + "loss": 0.5999, + "step": 10149 + }, + { + "epoch": 0.17538706110035943, + "grad_norm": 0.7452682115887467, + "learning_rate": 1.891190655629327e-05, + "loss": 0.3573, + "step": 10150 + }, + { + "epoch": 0.1754043406137683, + "grad_norm": 0.38960687668718114, + "learning_rate": 1.8911652669112514e-05, + "loss": 0.6156, + "step": 10151 + }, + { + "epoch": 0.1754216201271772, + "grad_norm": 0.6762532638049407, + "learning_rate": 1.8911398754019786e-05, + "loss": 0.6075, + "step": 10152 + }, + { + "epoch": 0.17543889964058612, + "grad_norm": 0.3830066081065792, + "learning_rate": 1.891114481101588e-05, + "loss": 0.6283, + "step": 10153 + }, + { + "epoch": 0.17545617915399503, + "grad_norm": 1.227030059560766, + "learning_rate": 1.891089084010159e-05, + "loss": 0.4705, + "step": 10154 + }, + { + "epoch": 0.17547345866740394, + "grad_norm": 0.7705086582661127, + "learning_rate": 1.891063684127771e-05, + "loss": 0.5611, + "step": 10155 + }, + { + "epoch": 0.17549073818081282, + "grad_norm": 0.7846385241168738, + "learning_rate": 1.8910382814545044e-05, + "loss": 0.5083, + "step": 10156 + }, + { + "epoch": 0.17550801769422172, + "grad_norm": 1.049263458928275, + "learning_rate": 1.8910128759904377e-05, + "loss": 0.5052, + "step": 10157 + }, + { + "epoch": 0.17552529720763063, + "grad_norm": 1.0245867485575837, + "learning_rate": 1.8909874677356512e-05, + "loss": 0.5134, + "step": 10158 + }, + { + "epoch": 0.17554257672103954, + "grad_norm": 0.7378930794133685, + "learning_rate": 1.8909620566902242e-05, + "loss": 0.5925, + "step": 10159 + }, + { + "epoch": 0.17555985623444845, + "grad_norm": 0.9843473618157599, + "learning_rate": 1.8909366428542363e-05, + "loss": 0.713, + "step": 10160 + }, + { + "epoch": 0.17557713574785733, + "grad_norm": 1.1615913296419185, + "learning_rate": 1.890911226227767e-05, + "loss": 0.7076, + "step": 10161 + }, + { + "epoch": 0.17559441526126623, + "grad_norm": 1.3546542445881948, + "learning_rate": 1.8908858068108963e-05, + "loss": 0.8011, + "step": 10162 + }, + { + "epoch": 0.17561169477467514, + "grad_norm": 1.0497302101754669, + "learning_rate": 1.8908603846037032e-05, + "loss": 0.4969, + "step": 10163 + }, + { + "epoch": 0.17562897428808405, + "grad_norm": 0.8617498296535474, + "learning_rate": 1.890834959606268e-05, + "loss": 0.4339, + "step": 10164 + }, + { + "epoch": 0.17564625380149296, + "grad_norm": 1.3505742999933692, + "learning_rate": 1.8908095318186703e-05, + "loss": 0.6251, + "step": 10165 + }, + { + "epoch": 0.17566353331490187, + "grad_norm": 1.3363867621936063, + "learning_rate": 1.8907841012409887e-05, + "loss": 0.6067, + "step": 10166 + }, + { + "epoch": 0.17568081282831075, + "grad_norm": 0.899756198642657, + "learning_rate": 1.890758667873304e-05, + "loss": 0.5165, + "step": 10167 + }, + { + "epoch": 0.17569809234171965, + "grad_norm": 0.8681367991904788, + "learning_rate": 1.8907332317156954e-05, + "loss": 0.642, + "step": 10168 + }, + { + "epoch": 0.17571537185512856, + "grad_norm": 0.6949174237753372, + "learning_rate": 1.8907077927682428e-05, + "loss": 0.525, + "step": 10169 + }, + { + "epoch": 0.17573265136853747, + "grad_norm": 1.0150141892272537, + "learning_rate": 1.8906823510310252e-05, + "loss": 0.5674, + "step": 10170 + }, + { + "epoch": 0.17574993088194638, + "grad_norm": 0.3817443790011404, + "learning_rate": 1.8906569065041232e-05, + "loss": 0.7416, + "step": 10171 + }, + { + "epoch": 0.17576721039535526, + "grad_norm": 1.0615081230814147, + "learning_rate": 1.890631459187616e-05, + "loss": 0.5637, + "step": 10172 + }, + { + "epoch": 0.17578448990876416, + "grad_norm": 1.0360310312730987, + "learning_rate": 1.8906060090815832e-05, + "loss": 0.6218, + "step": 10173 + }, + { + "epoch": 0.17580176942217307, + "grad_norm": 0.8444951278172973, + "learning_rate": 1.890580556186105e-05, + "loss": 0.6487, + "step": 10174 + }, + { + "epoch": 0.17581904893558198, + "grad_norm": 1.3057340662235706, + "learning_rate": 1.890555100501261e-05, + "loss": 0.634, + "step": 10175 + }, + { + "epoch": 0.1758363284489909, + "grad_norm": 0.7833710632416012, + "learning_rate": 1.8905296420271303e-05, + "loss": 0.6337, + "step": 10176 + }, + { + "epoch": 0.17585360796239977, + "grad_norm": 1.0202395889134181, + "learning_rate": 1.890504180763793e-05, + "loss": 0.7541, + "step": 10177 + }, + { + "epoch": 0.17587088747580867, + "grad_norm": 1.031681496724382, + "learning_rate": 1.890478716711329e-05, + "loss": 0.5846, + "step": 10178 + }, + { + "epoch": 0.17588816698921758, + "grad_norm": 1.1281240544768378, + "learning_rate": 1.8904532498698182e-05, + "loss": 0.5144, + "step": 10179 + }, + { + "epoch": 0.1759054465026265, + "grad_norm": 0.7269651370847564, + "learning_rate": 1.89042778023934e-05, + "loss": 0.4767, + "step": 10180 + }, + { + "epoch": 0.1759227260160354, + "grad_norm": 0.7405111033594566, + "learning_rate": 1.8904023078199745e-05, + "loss": 0.5256, + "step": 10181 + }, + { + "epoch": 0.1759400055294443, + "grad_norm": 0.9813773447282739, + "learning_rate": 1.8903768326118014e-05, + "loss": 0.6155, + "step": 10182 + }, + { + "epoch": 0.17595728504285318, + "grad_norm": 0.762953474784634, + "learning_rate": 1.8903513546149e-05, + "loss": 0.4023, + "step": 10183 + }, + { + "epoch": 0.1759745645562621, + "grad_norm": 0.7434690508169428, + "learning_rate": 1.8903258738293508e-05, + "loss": 0.66, + "step": 10184 + }, + { + "epoch": 0.175991844069671, + "grad_norm": 0.8361349083728347, + "learning_rate": 1.890300390255233e-05, + "loss": 0.5436, + "step": 10185 + }, + { + "epoch": 0.1760091235830799, + "grad_norm": 0.8511360850561592, + "learning_rate": 1.890274903892627e-05, + "loss": 0.478, + "step": 10186 + }, + { + "epoch": 0.17602640309648881, + "grad_norm": 0.9271614967721183, + "learning_rate": 1.890249414741612e-05, + "loss": 0.7087, + "step": 10187 + }, + { + "epoch": 0.1760436826098977, + "grad_norm": 1.066302265478755, + "learning_rate": 1.890223922802269e-05, + "loss": 0.6993, + "step": 10188 + }, + { + "epoch": 0.1760609621233066, + "grad_norm": 1.0875060965887444, + "learning_rate": 1.8901984280746762e-05, + "loss": 0.8363, + "step": 10189 + }, + { + "epoch": 0.1760782416367155, + "grad_norm": 0.7765662169120023, + "learning_rate": 1.8901729305589143e-05, + "loss": 0.5408, + "step": 10190 + }, + { + "epoch": 0.17609552115012442, + "grad_norm": 1.3331541798106126, + "learning_rate": 1.8901474302550635e-05, + "loss": 0.5497, + "step": 10191 + }, + { + "epoch": 0.17611280066353333, + "grad_norm": 0.40371883325498487, + "learning_rate": 1.890121927163203e-05, + "loss": 0.8165, + "step": 10192 + }, + { + "epoch": 0.1761300801769422, + "grad_norm": 1.2821291265531936, + "learning_rate": 1.8900964212834134e-05, + "loss": 0.4621, + "step": 10193 + }, + { + "epoch": 0.1761473596903511, + "grad_norm": 0.8492492525523412, + "learning_rate": 1.8900709126157736e-05, + "loss": 0.4956, + "step": 10194 + }, + { + "epoch": 0.17616463920376002, + "grad_norm": 0.9352855546044585, + "learning_rate": 1.8900454011603644e-05, + "loss": 0.643, + "step": 10195 + }, + { + "epoch": 0.17618191871716893, + "grad_norm": 0.43183698363360257, + "learning_rate": 1.890019886917265e-05, + "loss": 0.5329, + "step": 10196 + }, + { + "epoch": 0.17619919823057784, + "grad_norm": 0.6823778372956841, + "learning_rate": 1.8899943698865563e-05, + "loss": 0.5325, + "step": 10197 + }, + { + "epoch": 0.17621647774398672, + "grad_norm": 0.9211831429024826, + "learning_rate": 1.8899688500683173e-05, + "loss": 0.6653, + "step": 10198 + }, + { + "epoch": 0.17623375725739562, + "grad_norm": 1.3466594467440514, + "learning_rate": 1.8899433274626284e-05, + "loss": 0.7879, + "step": 10199 + }, + { + "epoch": 0.17625103677080453, + "grad_norm": 1.1554216947226223, + "learning_rate": 1.8899178020695688e-05, + "loss": 0.4157, + "step": 10200 + }, + { + "epoch": 0.17626831628421344, + "grad_norm": 0.8760289418791732, + "learning_rate": 1.8898922738892196e-05, + "loss": 0.5922, + "step": 10201 + }, + { + "epoch": 0.17628559579762235, + "grad_norm": 0.9476146497077627, + "learning_rate": 1.88986674292166e-05, + "loss": 0.5387, + "step": 10202 + }, + { + "epoch": 0.17630287531103125, + "grad_norm": 1.2037842566366999, + "learning_rate": 1.88984120916697e-05, + "loss": 0.6303, + "step": 10203 + }, + { + "epoch": 0.17632015482444013, + "grad_norm": 0.4532887977587702, + "learning_rate": 1.8898156726252298e-05, + "loss": 0.6033, + "step": 10204 + }, + { + "epoch": 0.17633743433784904, + "grad_norm": 1.052647214210843, + "learning_rate": 1.8897901332965192e-05, + "loss": 0.5899, + "step": 10205 + }, + { + "epoch": 0.17635471385125795, + "grad_norm": 1.4184855175972257, + "learning_rate": 1.8897645911809185e-05, + "loss": 0.8246, + "step": 10206 + }, + { + "epoch": 0.17637199336466686, + "grad_norm": 0.6775300054429803, + "learning_rate": 1.8897390462785076e-05, + "loss": 0.2911, + "step": 10207 + }, + { + "epoch": 0.17638927287807576, + "grad_norm": 1.006995534932627, + "learning_rate": 1.889713498589366e-05, + "loss": 0.6024, + "step": 10208 + }, + { + "epoch": 0.17640655239148464, + "grad_norm": 1.1924418677483979, + "learning_rate": 1.889687948113574e-05, + "loss": 0.7158, + "step": 10209 + }, + { + "epoch": 0.17642383190489355, + "grad_norm": 0.8997775983704689, + "learning_rate": 1.889662394851212e-05, + "loss": 0.7254, + "step": 10210 + }, + { + "epoch": 0.17644111141830246, + "grad_norm": 0.9167811539677528, + "learning_rate": 1.8896368388023598e-05, + "loss": 0.7408, + "step": 10211 + }, + { + "epoch": 0.17645839093171137, + "grad_norm": 0.8790601196986598, + "learning_rate": 1.8896112799670974e-05, + "loss": 0.6276, + "step": 10212 + }, + { + "epoch": 0.17647567044512028, + "grad_norm": 0.94230598103939, + "learning_rate": 1.889585718345505e-05, + "loss": 0.6389, + "step": 10213 + }, + { + "epoch": 0.17649294995852916, + "grad_norm": 0.5113391915106591, + "learning_rate": 1.8895601539376624e-05, + "loss": 0.9194, + "step": 10214 + }, + { + "epoch": 0.17651022947193806, + "grad_norm": 0.7254507198255113, + "learning_rate": 1.8895345867436495e-05, + "loss": 0.5035, + "step": 10215 + }, + { + "epoch": 0.17652750898534697, + "grad_norm": 1.0423233668116394, + "learning_rate": 1.8895090167635468e-05, + "loss": 0.6065, + "step": 10216 + }, + { + "epoch": 0.17654478849875588, + "grad_norm": 1.0710423512306322, + "learning_rate": 1.8894834439974345e-05, + "loss": 0.5247, + "step": 10217 + }, + { + "epoch": 0.17656206801216479, + "grad_norm": 2.137460863666746, + "learning_rate": 1.8894578684453925e-05, + "loss": 0.7894, + "step": 10218 + }, + { + "epoch": 0.1765793475255737, + "grad_norm": 1.3472063505563223, + "learning_rate": 1.8894322901075004e-05, + "loss": 0.6473, + "step": 10219 + }, + { + "epoch": 0.17659662703898257, + "grad_norm": 0.4864940815313646, + "learning_rate": 1.889406708983839e-05, + "loss": 0.624, + "step": 10220 + }, + { + "epoch": 0.17661390655239148, + "grad_norm": 0.8733039197892208, + "learning_rate": 1.8893811250744885e-05, + "loss": 0.7018, + "step": 10221 + }, + { + "epoch": 0.1766311860658004, + "grad_norm": 1.0454921458240964, + "learning_rate": 1.8893555383795282e-05, + "loss": 0.8418, + "step": 10222 + }, + { + "epoch": 0.1766484655792093, + "grad_norm": 1.027058436091154, + "learning_rate": 1.889329948899039e-05, + "loss": 0.6116, + "step": 10223 + }, + { + "epoch": 0.1766657450926182, + "grad_norm": 0.9579085206917923, + "learning_rate": 1.889304356633101e-05, + "loss": 0.711, + "step": 10224 + }, + { + "epoch": 0.17668302460602708, + "grad_norm": 0.9236203495319113, + "learning_rate": 1.8892787615817938e-05, + "loss": 0.5937, + "step": 10225 + }, + { + "epoch": 0.176700304119436, + "grad_norm": 0.7282273725082384, + "learning_rate": 1.8892531637451982e-05, + "loss": 0.6337, + "step": 10226 + }, + { + "epoch": 0.1767175836328449, + "grad_norm": 1.2223908949908855, + "learning_rate": 1.8892275631233943e-05, + "loss": 0.6684, + "step": 10227 + }, + { + "epoch": 0.1767348631462538, + "grad_norm": 0.9499227425271909, + "learning_rate": 1.8892019597164618e-05, + "loss": 0.5345, + "step": 10228 + }, + { + "epoch": 0.17675214265966271, + "grad_norm": 0.9444001006745765, + "learning_rate": 1.889176353524481e-05, + "loss": 0.5263, + "step": 10229 + }, + { + "epoch": 0.1767694221730716, + "grad_norm": 1.3319682618731499, + "learning_rate": 1.8891507445475324e-05, + "loss": 0.6301, + "step": 10230 + }, + { + "epoch": 0.1767867016864805, + "grad_norm": 0.4692255913650443, + "learning_rate": 1.8891251327856966e-05, + "loss": 0.6804, + "step": 10231 + }, + { + "epoch": 0.1768039811998894, + "grad_norm": 1.0499937666433428, + "learning_rate": 1.889099518239053e-05, + "loss": 0.6034, + "step": 10232 + }, + { + "epoch": 0.17682126071329832, + "grad_norm": 0.8503301923750487, + "learning_rate": 1.889073900907682e-05, + "loss": 0.4877, + "step": 10233 + }, + { + "epoch": 0.17683854022670722, + "grad_norm": 1.0740942150885582, + "learning_rate": 1.8890482807916643e-05, + "loss": 0.7845, + "step": 10234 + }, + { + "epoch": 0.1768558197401161, + "grad_norm": 0.7262607847552346, + "learning_rate": 1.8890226578910792e-05, + "loss": 0.4538, + "step": 10235 + }, + { + "epoch": 0.176873099253525, + "grad_norm": 1.5021765729406655, + "learning_rate": 1.8889970322060082e-05, + "loss": 0.8397, + "step": 10236 + }, + { + "epoch": 0.17689037876693392, + "grad_norm": 0.40941349055358833, + "learning_rate": 1.8889714037365305e-05, + "loss": 0.5741, + "step": 10237 + }, + { + "epoch": 0.17690765828034283, + "grad_norm": 0.86194341330368, + "learning_rate": 1.888945772482727e-05, + "loss": 0.6455, + "step": 10238 + }, + { + "epoch": 0.17692493779375174, + "grad_norm": 0.8443828552646754, + "learning_rate": 1.888920138444678e-05, + "loss": 0.5459, + "step": 10239 + }, + { + "epoch": 0.17694221730716064, + "grad_norm": 0.8429980307343701, + "learning_rate": 1.8888945016224633e-05, + "loss": 0.5954, + "step": 10240 + }, + { + "epoch": 0.17695949682056952, + "grad_norm": 0.620393191514158, + "learning_rate": 1.8888688620161635e-05, + "loss": 0.3904, + "step": 10241 + }, + { + "epoch": 0.17697677633397843, + "grad_norm": 1.4041181892035608, + "learning_rate": 1.8888432196258585e-05, + "loss": 0.6102, + "step": 10242 + }, + { + "epoch": 0.17699405584738734, + "grad_norm": 1.1802543427043928, + "learning_rate": 1.8888175744516294e-05, + "loss": 0.502, + "step": 10243 + }, + { + "epoch": 0.17701133536079625, + "grad_norm": 1.2344312946340894, + "learning_rate": 1.8887919264935563e-05, + "loss": 0.439, + "step": 10244 + }, + { + "epoch": 0.17702861487420515, + "grad_norm": 1.0316621256319973, + "learning_rate": 1.888766275751719e-05, + "loss": 0.5248, + "step": 10245 + }, + { + "epoch": 0.17704589438761403, + "grad_norm": 0.9615577193102133, + "learning_rate": 1.8887406222261985e-05, + "loss": 0.543, + "step": 10246 + }, + { + "epoch": 0.17706317390102294, + "grad_norm": 0.9298713712754092, + "learning_rate": 1.8887149659170745e-05, + "loss": 0.5777, + "step": 10247 + }, + { + "epoch": 0.17708045341443185, + "grad_norm": 0.9806154440236411, + "learning_rate": 1.8886893068244277e-05, + "loss": 0.5265, + "step": 10248 + }, + { + "epoch": 0.17709773292784076, + "grad_norm": 1.229279857799624, + "learning_rate": 1.8886636449483385e-05, + "loss": 0.6725, + "step": 10249 + }, + { + "epoch": 0.17711501244124966, + "grad_norm": 0.9471126792263534, + "learning_rate": 1.8886379802888875e-05, + "loss": 0.5902, + "step": 10250 + }, + { + "epoch": 0.17713229195465854, + "grad_norm": 0.9745815459175026, + "learning_rate": 1.8886123128461547e-05, + "loss": 0.6189, + "step": 10251 + }, + { + "epoch": 0.17714957146806745, + "grad_norm": 0.567729296663901, + "learning_rate": 1.8885866426202204e-05, + "loss": 0.863, + "step": 10252 + }, + { + "epoch": 0.17716685098147636, + "grad_norm": 0.8497261537506201, + "learning_rate": 1.8885609696111655e-05, + "loss": 0.6471, + "step": 10253 + }, + { + "epoch": 0.17718413049488527, + "grad_norm": 2.3142399708613235, + "learning_rate": 1.8885352938190697e-05, + "loss": 0.5968, + "step": 10254 + }, + { + "epoch": 0.17720141000829417, + "grad_norm": 1.0011112370083206, + "learning_rate": 1.8885096152440142e-05, + "loss": 0.632, + "step": 10255 + }, + { + "epoch": 0.17721868952170308, + "grad_norm": 1.0315677791363007, + "learning_rate": 1.8884839338860788e-05, + "loss": 0.6846, + "step": 10256 + }, + { + "epoch": 0.17723596903511196, + "grad_norm": 0.9871247152549698, + "learning_rate": 1.8884582497453444e-05, + "loss": 0.6693, + "step": 10257 + }, + { + "epoch": 0.17725324854852087, + "grad_norm": 1.0132236807240942, + "learning_rate": 1.8884325628218912e-05, + "loss": 0.4653, + "step": 10258 + }, + { + "epoch": 0.17727052806192978, + "grad_norm": 0.7327511811783199, + "learning_rate": 1.8884068731157997e-05, + "loss": 0.4382, + "step": 10259 + }, + { + "epoch": 0.17728780757533869, + "grad_norm": 1.02368400353296, + "learning_rate": 1.8883811806271503e-05, + "loss": 0.3349, + "step": 10260 + }, + { + "epoch": 0.1773050870887476, + "grad_norm": 0.7379241394082545, + "learning_rate": 1.8883554853560235e-05, + "loss": 0.3466, + "step": 10261 + }, + { + "epoch": 0.17732236660215647, + "grad_norm": 1.0466806610942847, + "learning_rate": 1.8883297873024998e-05, + "loss": 0.5919, + "step": 10262 + }, + { + "epoch": 0.17733964611556538, + "grad_norm": 1.3009015706960418, + "learning_rate": 1.88830408646666e-05, + "loss": 0.8014, + "step": 10263 + }, + { + "epoch": 0.1773569256289743, + "grad_norm": 1.2379268634717628, + "learning_rate": 1.8882783828485838e-05, + "loss": 0.6764, + "step": 10264 + }, + { + "epoch": 0.1773742051423832, + "grad_norm": 0.8501702175153916, + "learning_rate": 1.888252676448352e-05, + "loss": 0.8994, + "step": 10265 + }, + { + "epoch": 0.1773914846557921, + "grad_norm": 1.2469038632343716, + "learning_rate": 1.888226967266046e-05, + "loss": 0.8346, + "step": 10266 + }, + { + "epoch": 0.17740876416920098, + "grad_norm": 1.404305695649132, + "learning_rate": 1.8882012553017456e-05, + "loss": 0.8323, + "step": 10267 + }, + { + "epoch": 0.1774260436826099, + "grad_norm": 1.0556865397992743, + "learning_rate": 1.8881755405555312e-05, + "loss": 0.6127, + "step": 10268 + }, + { + "epoch": 0.1774433231960188, + "grad_norm": 1.1021729787795926, + "learning_rate": 1.8881498230274834e-05, + "loss": 0.5878, + "step": 10269 + }, + { + "epoch": 0.1774606027094277, + "grad_norm": 1.2512739942498288, + "learning_rate": 1.888124102717683e-05, + "loss": 0.625, + "step": 10270 + }, + { + "epoch": 0.1774778822228366, + "grad_norm": 1.4448601480346757, + "learning_rate": 1.88809837962621e-05, + "loss": 0.7121, + "step": 10271 + }, + { + "epoch": 0.1774951617362455, + "grad_norm": 0.9084110179890017, + "learning_rate": 1.8880726537531457e-05, + "loss": 0.4234, + "step": 10272 + }, + { + "epoch": 0.1775124412496544, + "grad_norm": 1.5347963801451878, + "learning_rate": 1.8880469250985703e-05, + "loss": 0.7183, + "step": 10273 + }, + { + "epoch": 0.1775297207630633, + "grad_norm": 1.06695874753716, + "learning_rate": 1.8880211936625648e-05, + "loss": 0.5246, + "step": 10274 + }, + { + "epoch": 0.17754700027647222, + "grad_norm": 0.972335719540554, + "learning_rate": 1.8879954594452093e-05, + "loss": 0.6189, + "step": 10275 + }, + { + "epoch": 0.17756427978988112, + "grad_norm": 1.4451017440208929, + "learning_rate": 1.8879697224465842e-05, + "loss": 0.7246, + "step": 10276 + }, + { + "epoch": 0.17758155930329003, + "grad_norm": 1.2316178434240679, + "learning_rate": 1.8879439826667708e-05, + "loss": 0.5264, + "step": 10277 + }, + { + "epoch": 0.1775988388166989, + "grad_norm": 1.0807603987633612, + "learning_rate": 1.887918240105849e-05, + "loss": 0.5941, + "step": 10278 + }, + { + "epoch": 0.17761611833010782, + "grad_norm": 0.9945973328879474, + "learning_rate": 1.8878924947639e-05, + "loss": 0.5891, + "step": 10279 + }, + { + "epoch": 0.17763339784351673, + "grad_norm": 0.4452424183713855, + "learning_rate": 1.887866746641004e-05, + "loss": 0.4607, + "step": 10280 + }, + { + "epoch": 0.17765067735692563, + "grad_norm": 1.3533606182248203, + "learning_rate": 1.8878409957372423e-05, + "loss": 0.7268, + "step": 10281 + }, + { + "epoch": 0.17766795687033454, + "grad_norm": 1.187496134475983, + "learning_rate": 1.887815242052695e-05, + "loss": 0.8577, + "step": 10282 + }, + { + "epoch": 0.17768523638374342, + "grad_norm": 1.2497213040718096, + "learning_rate": 1.8877894855874424e-05, + "loss": 0.5605, + "step": 10283 + }, + { + "epoch": 0.17770251589715233, + "grad_norm": 1.3384465242624413, + "learning_rate": 1.887763726341566e-05, + "loss": 0.5912, + "step": 10284 + }, + { + "epoch": 0.17771979541056124, + "grad_norm": 0.9122003820238553, + "learning_rate": 1.8877379643151465e-05, + "loss": 0.6738, + "step": 10285 + }, + { + "epoch": 0.17773707492397015, + "grad_norm": 0.6732650331950397, + "learning_rate": 1.8877121995082638e-05, + "loss": 0.6228, + "step": 10286 + }, + { + "epoch": 0.17775435443737905, + "grad_norm": 0.8686272203829645, + "learning_rate": 1.8876864319209994e-05, + "loss": 0.5317, + "step": 10287 + }, + { + "epoch": 0.17777163395078793, + "grad_norm": 0.7637095173794175, + "learning_rate": 1.8876606615534334e-05, + "loss": 0.5311, + "step": 10288 + }, + { + "epoch": 0.17778891346419684, + "grad_norm": 1.2048937335345493, + "learning_rate": 1.8876348884056468e-05, + "loss": 0.7193, + "step": 10289 + }, + { + "epoch": 0.17780619297760575, + "grad_norm": 0.7785672571828125, + "learning_rate": 1.8876091124777202e-05, + "loss": 0.6873, + "step": 10290 + }, + { + "epoch": 0.17782347249101466, + "grad_norm": 1.0001014121149252, + "learning_rate": 1.8875833337697345e-05, + "loss": 0.5244, + "step": 10291 + }, + { + "epoch": 0.17784075200442356, + "grad_norm": 0.792581645329038, + "learning_rate": 1.88755755228177e-05, + "loss": 0.7859, + "step": 10292 + }, + { + "epoch": 0.17785803151783247, + "grad_norm": 1.4140491791286003, + "learning_rate": 1.8875317680139083e-05, + "loss": 0.62, + "step": 10293 + }, + { + "epoch": 0.17787531103124135, + "grad_norm": 0.4057604596824873, + "learning_rate": 1.8875059809662294e-05, + "loss": 0.5637, + "step": 10294 + }, + { + "epoch": 0.17789259054465026, + "grad_norm": 0.3988741573364346, + "learning_rate": 1.8874801911388143e-05, + "loss": 0.5082, + "step": 10295 + }, + { + "epoch": 0.17790987005805917, + "grad_norm": 0.8017515697737413, + "learning_rate": 1.8874543985317437e-05, + "loss": 0.6119, + "step": 10296 + }, + { + "epoch": 0.17792714957146807, + "grad_norm": 0.9754725764494042, + "learning_rate": 1.887428603145099e-05, + "loss": 0.7107, + "step": 10297 + }, + { + "epoch": 0.17794442908487698, + "grad_norm": 0.4800487263893525, + "learning_rate": 1.88740280497896e-05, + "loss": 0.7768, + "step": 10298 + }, + { + "epoch": 0.17796170859828586, + "grad_norm": 0.882540165971099, + "learning_rate": 1.887377004033408e-05, + "loss": 0.6867, + "step": 10299 + }, + { + "epoch": 0.17797898811169477, + "grad_norm": 1.0812026203986282, + "learning_rate": 1.8873512003085237e-05, + "loss": 0.5911, + "step": 10300 + }, + { + "epoch": 0.17799626762510368, + "grad_norm": 1.180228453032613, + "learning_rate": 1.8873253938043882e-05, + "loss": 0.6038, + "step": 10301 + }, + { + "epoch": 0.17801354713851258, + "grad_norm": 0.7347210858571075, + "learning_rate": 1.887299584521082e-05, + "loss": 0.7128, + "step": 10302 + }, + { + "epoch": 0.1780308266519215, + "grad_norm": 1.4471407563444056, + "learning_rate": 1.887273772458686e-05, + "loss": 0.6438, + "step": 10303 + }, + { + "epoch": 0.17804810616533037, + "grad_norm": 1.0210260455901217, + "learning_rate": 1.8872479576172816e-05, + "loss": 0.4911, + "step": 10304 + }, + { + "epoch": 0.17806538567873928, + "grad_norm": 1.3110358992343678, + "learning_rate": 1.887222139996949e-05, + "loss": 0.688, + "step": 10305 + }, + { + "epoch": 0.1780826651921482, + "grad_norm": 1.4783983257050335, + "learning_rate": 1.8871963195977688e-05, + "loss": 0.5148, + "step": 10306 + }, + { + "epoch": 0.1780999447055571, + "grad_norm": 1.5532755046682865, + "learning_rate": 1.8871704964198226e-05, + "loss": 0.6007, + "step": 10307 + }, + { + "epoch": 0.178117224218966, + "grad_norm": 0.6667155708367518, + "learning_rate": 1.887144670463191e-05, + "loss": 0.5797, + "step": 10308 + }, + { + "epoch": 0.17813450373237488, + "grad_norm": 0.9485402369927692, + "learning_rate": 1.8871188417279547e-05, + "loss": 0.6485, + "step": 10309 + }, + { + "epoch": 0.1781517832457838, + "grad_norm": 0.9498531880008467, + "learning_rate": 1.887093010214195e-05, + "loss": 0.5719, + "step": 10310 + }, + { + "epoch": 0.1781690627591927, + "grad_norm": 1.1932948599125608, + "learning_rate": 1.8870671759219923e-05, + "loss": 0.4795, + "step": 10311 + }, + { + "epoch": 0.1781863422726016, + "grad_norm": 1.1441506670017785, + "learning_rate": 1.887041338851428e-05, + "loss": 0.6924, + "step": 10312 + }, + { + "epoch": 0.1782036217860105, + "grad_norm": 1.00229312998292, + "learning_rate": 1.887015499002583e-05, + "loss": 0.4837, + "step": 10313 + }, + { + "epoch": 0.17822090129941942, + "grad_norm": 0.41283423661437946, + "learning_rate": 1.8869896563755375e-05, + "loss": 0.6215, + "step": 10314 + }, + { + "epoch": 0.1782381808128283, + "grad_norm": 1.4094441999459895, + "learning_rate": 1.8869638109703733e-05, + "loss": 0.7612, + "step": 10315 + }, + { + "epoch": 0.1782554603262372, + "grad_norm": 1.1822075992862542, + "learning_rate": 1.8869379627871712e-05, + "loss": 0.7657, + "step": 10316 + }, + { + "epoch": 0.17827273983964612, + "grad_norm": 1.3746624425622527, + "learning_rate": 1.8869121118260117e-05, + "loss": 0.688, + "step": 10317 + }, + { + "epoch": 0.17829001935305502, + "grad_norm": 1.2364133204311147, + "learning_rate": 1.886886258086976e-05, + "loss": 0.5897, + "step": 10318 + }, + { + "epoch": 0.17830729886646393, + "grad_norm": 1.152255383086027, + "learning_rate": 1.886860401570146e-05, + "loss": 0.5562, + "step": 10319 + }, + { + "epoch": 0.1783245783798728, + "grad_norm": 0.4153104937621673, + "learning_rate": 1.8868345422756007e-05, + "loss": 0.7243, + "step": 10320 + }, + { + "epoch": 0.17834185789328172, + "grad_norm": 1.2166226967070404, + "learning_rate": 1.8868086802034225e-05, + "loss": 0.5703, + "step": 10321 + }, + { + "epoch": 0.17835913740669063, + "grad_norm": 1.1709613677350859, + "learning_rate": 1.8867828153536925e-05, + "loss": 0.4529, + "step": 10322 + }, + { + "epoch": 0.17837641692009953, + "grad_norm": 1.0668364321440356, + "learning_rate": 1.886756947726491e-05, + "loss": 0.6995, + "step": 10323 + }, + { + "epoch": 0.17839369643350844, + "grad_norm": 1.1836214440473243, + "learning_rate": 1.8867310773218995e-05, + "loss": 0.7036, + "step": 10324 + }, + { + "epoch": 0.17841097594691732, + "grad_norm": 1.5579118398668899, + "learning_rate": 1.886705204139999e-05, + "loss": 0.5997, + "step": 10325 + }, + { + "epoch": 0.17842825546032623, + "grad_norm": 0.9371181339640532, + "learning_rate": 1.8866793281808703e-05, + "loss": 0.4026, + "step": 10326 + }, + { + "epoch": 0.17844553497373514, + "grad_norm": 1.5471690535071938, + "learning_rate": 1.8866534494445945e-05, + "loss": 0.6072, + "step": 10327 + }, + { + "epoch": 0.17846281448714404, + "grad_norm": 1.1569575145569684, + "learning_rate": 1.8866275679312527e-05, + "loss": 0.6384, + "step": 10328 + }, + { + "epoch": 0.17848009400055295, + "grad_norm": 1.113690804469848, + "learning_rate": 1.886601683640926e-05, + "loss": 0.4918, + "step": 10329 + }, + { + "epoch": 0.17849737351396186, + "grad_norm": 0.651278292952535, + "learning_rate": 1.8865757965736955e-05, + "loss": 0.5761, + "step": 10330 + }, + { + "epoch": 0.17851465302737074, + "grad_norm": 0.43315042870658305, + "learning_rate": 1.8865499067296424e-05, + "loss": 0.7192, + "step": 10331 + }, + { + "epoch": 0.17853193254077965, + "grad_norm": 0.9922396698145632, + "learning_rate": 1.8865240141088472e-05, + "loss": 0.6156, + "step": 10332 + }, + { + "epoch": 0.17854921205418856, + "grad_norm": 0.7126683185986272, + "learning_rate": 1.8864981187113916e-05, + "loss": 0.5468, + "step": 10333 + }, + { + "epoch": 0.17856649156759746, + "grad_norm": 0.8184214629606734, + "learning_rate": 1.8864722205373566e-05, + "loss": 0.5403, + "step": 10334 + }, + { + "epoch": 0.17858377108100637, + "grad_norm": 0.7514849707534452, + "learning_rate": 1.886446319586823e-05, + "loss": 0.5512, + "step": 10335 + }, + { + "epoch": 0.17860105059441525, + "grad_norm": 1.0415923852003481, + "learning_rate": 1.8864204158598726e-05, + "loss": 0.7147, + "step": 10336 + }, + { + "epoch": 0.17861833010782416, + "grad_norm": 0.8257986281688521, + "learning_rate": 1.8863945093565855e-05, + "loss": 0.7005, + "step": 10337 + }, + { + "epoch": 0.17863560962123307, + "grad_norm": 1.3847502057479302, + "learning_rate": 1.886368600077044e-05, + "loss": 0.6759, + "step": 10338 + }, + { + "epoch": 0.17865288913464197, + "grad_norm": 1.5670408229450317, + "learning_rate": 1.8863426880213285e-05, + "loss": 0.6628, + "step": 10339 + }, + { + "epoch": 0.17867016864805088, + "grad_norm": 1.0138206375654468, + "learning_rate": 1.88631677318952e-05, + "loss": 0.6314, + "step": 10340 + }, + { + "epoch": 0.17868744816145976, + "grad_norm": 1.0055681269560768, + "learning_rate": 1.8862908555817007e-05, + "loss": 0.4486, + "step": 10341 + }, + { + "epoch": 0.17870472767486867, + "grad_norm": 1.1036979057463343, + "learning_rate": 1.8862649351979505e-05, + "loss": 0.7883, + "step": 10342 + }, + { + "epoch": 0.17872200718827758, + "grad_norm": 1.322320724797442, + "learning_rate": 1.886239012038351e-05, + "loss": 0.5458, + "step": 10343 + }, + { + "epoch": 0.17873928670168648, + "grad_norm": 0.9968622334689765, + "learning_rate": 1.886213086102984e-05, + "loss": 0.8468, + "step": 10344 + }, + { + "epoch": 0.1787565662150954, + "grad_norm": 1.075954572354264, + "learning_rate": 1.8861871573919303e-05, + "loss": 0.5675, + "step": 10345 + }, + { + "epoch": 0.17877384572850427, + "grad_norm": 1.1503620644626291, + "learning_rate": 1.886161225905271e-05, + "loss": 0.8311, + "step": 10346 + }, + { + "epoch": 0.17879112524191318, + "grad_norm": 1.0710203682588664, + "learning_rate": 1.8861352916430872e-05, + "loss": 0.6462, + "step": 10347 + }, + { + "epoch": 0.1788084047553221, + "grad_norm": 2.6390625136461514, + "learning_rate": 1.8861093546054605e-05, + "loss": 0.7759, + "step": 10348 + }, + { + "epoch": 0.178825684268731, + "grad_norm": 1.3689160572244654, + "learning_rate": 1.886083414792472e-05, + "loss": 0.6522, + "step": 10349 + }, + { + "epoch": 0.1788429637821399, + "grad_norm": 1.3116605466494442, + "learning_rate": 1.8860574722042027e-05, + "loss": 0.5432, + "step": 10350 + }, + { + "epoch": 0.1788602432955488, + "grad_norm": 0.6529507971066809, + "learning_rate": 1.8860315268407344e-05, + "loss": 0.498, + "step": 10351 + }, + { + "epoch": 0.1788775228089577, + "grad_norm": 0.8744770447436927, + "learning_rate": 1.8860055787021476e-05, + "loss": 0.5942, + "step": 10352 + }, + { + "epoch": 0.1788948023223666, + "grad_norm": 0.9555209667994952, + "learning_rate": 1.8859796277885243e-05, + "loss": 0.5062, + "step": 10353 + }, + { + "epoch": 0.1789120818357755, + "grad_norm": 1.355597420400863, + "learning_rate": 1.8859536740999455e-05, + "loss": 0.6063, + "step": 10354 + }, + { + "epoch": 0.1789293613491844, + "grad_norm": 1.2843603157494112, + "learning_rate": 1.8859277176364922e-05, + "loss": 0.5338, + "step": 10355 + }, + { + "epoch": 0.17894664086259332, + "grad_norm": 1.2288060589003815, + "learning_rate": 1.885901758398246e-05, + "loss": 0.5625, + "step": 10356 + }, + { + "epoch": 0.1789639203760022, + "grad_norm": 1.1342990263083095, + "learning_rate": 1.8858757963852884e-05, + "loss": 0.5146, + "step": 10357 + }, + { + "epoch": 0.1789811998894111, + "grad_norm": 1.0822592150450954, + "learning_rate": 1.8858498315977005e-05, + "loss": 0.726, + "step": 10358 + }, + { + "epoch": 0.17899847940282002, + "grad_norm": 1.00701728576453, + "learning_rate": 1.8858238640355637e-05, + "loss": 0.6361, + "step": 10359 + }, + { + "epoch": 0.17901575891622892, + "grad_norm": 0.5248949690887318, + "learning_rate": 1.8857978936989586e-05, + "loss": 0.7354, + "step": 10360 + }, + { + "epoch": 0.17903303842963783, + "grad_norm": 1.4276139741829152, + "learning_rate": 1.8857719205879675e-05, + "loss": 0.7313, + "step": 10361 + }, + { + "epoch": 0.1790503179430467, + "grad_norm": 0.9298412116840762, + "learning_rate": 1.8857459447026715e-05, + "loss": 0.5845, + "step": 10362 + }, + { + "epoch": 0.17906759745645562, + "grad_norm": 0.8356700439945822, + "learning_rate": 1.885719966043152e-05, + "loss": 0.5463, + "step": 10363 + }, + { + "epoch": 0.17908487696986453, + "grad_norm": 0.9881494531418229, + "learning_rate": 1.88569398460949e-05, + "loss": 0.7457, + "step": 10364 + }, + { + "epoch": 0.17910215648327343, + "grad_norm": 1.9078301300408105, + "learning_rate": 1.8856680004017674e-05, + "loss": 0.722, + "step": 10365 + }, + { + "epoch": 0.17911943599668234, + "grad_norm": 0.7025269498840413, + "learning_rate": 1.8856420134200654e-05, + "loss": 0.6259, + "step": 10366 + }, + { + "epoch": 0.17913671551009125, + "grad_norm": 1.1105502959536726, + "learning_rate": 1.885616023664465e-05, + "loss": 0.4718, + "step": 10367 + }, + { + "epoch": 0.17915399502350013, + "grad_norm": 1.4316220540702091, + "learning_rate": 1.885590031135048e-05, + "loss": 0.6791, + "step": 10368 + }, + { + "epoch": 0.17917127453690904, + "grad_norm": 1.281989537637094, + "learning_rate": 1.8855640358318957e-05, + "loss": 0.3721, + "step": 10369 + }, + { + "epoch": 0.17918855405031794, + "grad_norm": 0.6337009931504415, + "learning_rate": 1.8855380377550893e-05, + "loss": 0.6202, + "step": 10370 + }, + { + "epoch": 0.17920583356372685, + "grad_norm": 0.9830036030136672, + "learning_rate": 1.885512036904711e-05, + "loss": 0.6776, + "step": 10371 + }, + { + "epoch": 0.17922311307713576, + "grad_norm": 1.1315463001907333, + "learning_rate": 1.8854860332808412e-05, + "loss": 0.9639, + "step": 10372 + }, + { + "epoch": 0.17924039259054464, + "grad_norm": 1.1376666935529491, + "learning_rate": 1.8854600268835624e-05, + "loss": 0.5051, + "step": 10373 + }, + { + "epoch": 0.17925767210395355, + "grad_norm": 1.2420020632052757, + "learning_rate": 1.885434017712955e-05, + "loss": 0.8821, + "step": 10374 + }, + { + "epoch": 0.17927495161736245, + "grad_norm": 1.2276135040538736, + "learning_rate": 1.8854080057691012e-05, + "loss": 0.6876, + "step": 10375 + }, + { + "epoch": 0.17929223113077136, + "grad_norm": 0.6997216867798394, + "learning_rate": 1.8853819910520822e-05, + "loss": 0.4175, + "step": 10376 + }, + { + "epoch": 0.17930951064418027, + "grad_norm": 0.9001759413061582, + "learning_rate": 1.8853559735619796e-05, + "loss": 0.5485, + "step": 10377 + }, + { + "epoch": 0.17932679015758915, + "grad_norm": 0.8881832306571975, + "learning_rate": 1.885329953298875e-05, + "loss": 0.6159, + "step": 10378 + }, + { + "epoch": 0.17934406967099806, + "grad_norm": 1.0648808223636799, + "learning_rate": 1.885303930262849e-05, + "loss": 0.5542, + "step": 10379 + }, + { + "epoch": 0.17936134918440697, + "grad_norm": 1.409299924181543, + "learning_rate": 1.885277904453984e-05, + "loss": 0.604, + "step": 10380 + }, + { + "epoch": 0.17937862869781587, + "grad_norm": 1.1702317810531646, + "learning_rate": 1.885251875872362e-05, + "loss": 0.7264, + "step": 10381 + }, + { + "epoch": 0.17939590821122478, + "grad_norm": 1.3370706207431797, + "learning_rate": 1.8852258445180633e-05, + "loss": 0.7855, + "step": 10382 + }, + { + "epoch": 0.1794131877246337, + "grad_norm": 0.9831542782376033, + "learning_rate": 1.88519981039117e-05, + "loss": 0.6245, + "step": 10383 + }, + { + "epoch": 0.17943046723804257, + "grad_norm": 0.668114094348709, + "learning_rate": 1.8851737734917636e-05, + "loss": 0.6256, + "step": 10384 + }, + { + "epoch": 0.17944774675145148, + "grad_norm": 0.9097928798502376, + "learning_rate": 1.8851477338199257e-05, + "loss": 0.538, + "step": 10385 + }, + { + "epoch": 0.17946502626486038, + "grad_norm": 0.8481854403002896, + "learning_rate": 1.885121691375738e-05, + "loss": 0.49, + "step": 10386 + }, + { + "epoch": 0.1794823057782693, + "grad_norm": 1.1848706023772593, + "learning_rate": 1.885095646159282e-05, + "loss": 0.5795, + "step": 10387 + }, + { + "epoch": 0.1794995852916782, + "grad_norm": 1.3069047178404938, + "learning_rate": 1.8850695981706387e-05, + "loss": 0.6167, + "step": 10388 + }, + { + "epoch": 0.17951686480508708, + "grad_norm": 1.2067229206594838, + "learning_rate": 1.8850435474098903e-05, + "loss": 0.5068, + "step": 10389 + }, + { + "epoch": 0.179534144318496, + "grad_norm": 1.4165272416570607, + "learning_rate": 1.8850174938771186e-05, + "loss": 0.6126, + "step": 10390 + }, + { + "epoch": 0.1795514238319049, + "grad_norm": 1.254195439736711, + "learning_rate": 1.8849914375724044e-05, + "loss": 0.6446, + "step": 10391 + }, + { + "epoch": 0.1795687033453138, + "grad_norm": 1.0973850936964613, + "learning_rate": 1.8849653784958302e-05, + "loss": 0.8322, + "step": 10392 + }, + { + "epoch": 0.1795859828587227, + "grad_norm": 1.3261263591833015, + "learning_rate": 1.8849393166474768e-05, + "loss": 0.6589, + "step": 10393 + }, + { + "epoch": 0.1796032623721316, + "grad_norm": 0.907558309072148, + "learning_rate": 1.8849132520274267e-05, + "loss": 0.4408, + "step": 10394 + }, + { + "epoch": 0.1796205418855405, + "grad_norm": 0.36991155161291595, + "learning_rate": 1.8848871846357607e-05, + "loss": 0.6745, + "step": 10395 + }, + { + "epoch": 0.1796378213989494, + "grad_norm": 1.0751145977752792, + "learning_rate": 1.884861114472561e-05, + "loss": 0.7266, + "step": 10396 + }, + { + "epoch": 0.1796551009123583, + "grad_norm": 0.9864357305847055, + "learning_rate": 1.8848350415379086e-05, + "loss": 0.6626, + "step": 10397 + }, + { + "epoch": 0.17967238042576722, + "grad_norm": 1.4285691759781856, + "learning_rate": 1.884808965831886e-05, + "loss": 0.9566, + "step": 10398 + }, + { + "epoch": 0.1796896599391761, + "grad_norm": 1.183505249887094, + "learning_rate": 1.8847828873545742e-05, + "loss": 0.7651, + "step": 10399 + }, + { + "epoch": 0.179706939452585, + "grad_norm": 0.9250385009937446, + "learning_rate": 1.8847568061060553e-05, + "loss": 0.4345, + "step": 10400 + }, + { + "epoch": 0.17972421896599391, + "grad_norm": 1.1296232748274526, + "learning_rate": 1.8847307220864106e-05, + "loss": 0.7777, + "step": 10401 + }, + { + "epoch": 0.17974149847940282, + "grad_norm": 1.4911051326187679, + "learning_rate": 1.8847046352957223e-05, + "loss": 0.7866, + "step": 10402 + }, + { + "epoch": 0.17975877799281173, + "grad_norm": 0.42997004437733305, + "learning_rate": 1.8846785457340718e-05, + "loss": 0.8258, + "step": 10403 + }, + { + "epoch": 0.17977605750622064, + "grad_norm": 1.299611060736757, + "learning_rate": 1.884652453401541e-05, + "loss": 0.5477, + "step": 10404 + }, + { + "epoch": 0.17979333701962952, + "grad_norm": 1.3532200976202031, + "learning_rate": 1.8846263582982113e-05, + "loss": 0.6144, + "step": 10405 + }, + { + "epoch": 0.17981061653303843, + "grad_norm": 0.6986040732364972, + "learning_rate": 1.884600260424165e-05, + "loss": 0.3023, + "step": 10406 + }, + { + "epoch": 0.17982789604644733, + "grad_norm": 1.1716847966201915, + "learning_rate": 1.8845741597794828e-05, + "loss": 0.6196, + "step": 10407 + }, + { + "epoch": 0.17984517555985624, + "grad_norm": 1.1522320024700154, + "learning_rate": 1.8845480563642475e-05, + "loss": 0.5398, + "step": 10408 + }, + { + "epoch": 0.17986245507326515, + "grad_norm": 1.5637962243884205, + "learning_rate": 1.8845219501785404e-05, + "loss": 0.5173, + "step": 10409 + }, + { + "epoch": 0.17987973458667403, + "grad_norm": 0.7691669081947818, + "learning_rate": 1.8844958412224432e-05, + "loss": 0.5793, + "step": 10410 + }, + { + "epoch": 0.17989701410008294, + "grad_norm": 1.1114112567841141, + "learning_rate": 1.884469729496038e-05, + "loss": 0.7942, + "step": 10411 + }, + { + "epoch": 0.17991429361349184, + "grad_norm": 0.9085787774127038, + "learning_rate": 1.8844436149994062e-05, + "loss": 0.4897, + "step": 10412 + }, + { + "epoch": 0.17993157312690075, + "grad_norm": 0.7314521701552129, + "learning_rate": 1.88441749773263e-05, + "loss": 0.3764, + "step": 10413 + }, + { + "epoch": 0.17994885264030966, + "grad_norm": 1.5951894696176765, + "learning_rate": 1.884391377695791e-05, + "loss": 0.5423, + "step": 10414 + }, + { + "epoch": 0.17996613215371854, + "grad_norm": 1.039763382378518, + "learning_rate": 1.884365254888971e-05, + "loss": 0.618, + "step": 10415 + }, + { + "epoch": 0.17998341166712745, + "grad_norm": 0.7142127210580539, + "learning_rate": 1.8843391293122516e-05, + "loss": 0.5975, + "step": 10416 + }, + { + "epoch": 0.18000069118053635, + "grad_norm": 1.227174414448629, + "learning_rate": 1.884313000965715e-05, + "loss": 0.5324, + "step": 10417 + }, + { + "epoch": 0.18001797069394526, + "grad_norm": 0.8144578894149246, + "learning_rate": 1.884286869849443e-05, + "loss": 0.5588, + "step": 10418 + }, + { + "epoch": 0.18003525020735417, + "grad_norm": 0.7709507927189656, + "learning_rate": 1.8842607359635167e-05, + "loss": 0.6349, + "step": 10419 + }, + { + "epoch": 0.18005252972076308, + "grad_norm": 1.0371788206197112, + "learning_rate": 1.884234599308019e-05, + "loss": 0.4453, + "step": 10420 + }, + { + "epoch": 0.18006980923417196, + "grad_norm": 0.9526828863273831, + "learning_rate": 1.8842084598830315e-05, + "loss": 0.5961, + "step": 10421 + }, + { + "epoch": 0.18008708874758086, + "grad_norm": 0.7296610900334907, + "learning_rate": 1.8841823176886357e-05, + "loss": 0.6671, + "step": 10422 + }, + { + "epoch": 0.18010436826098977, + "grad_norm": 1.0080920763120553, + "learning_rate": 1.884156172724914e-05, + "loss": 0.7134, + "step": 10423 + }, + { + "epoch": 0.18012164777439868, + "grad_norm": 1.1631807311763407, + "learning_rate": 1.8841300249919475e-05, + "loss": 0.651, + "step": 10424 + }, + { + "epoch": 0.1801389272878076, + "grad_norm": 0.9214094937934245, + "learning_rate": 1.884103874489819e-05, + "loss": 0.4405, + "step": 10425 + }, + { + "epoch": 0.18015620680121647, + "grad_norm": 1.046295514269549, + "learning_rate": 1.8840777212186098e-05, + "loss": 0.6651, + "step": 10426 + }, + { + "epoch": 0.18017348631462538, + "grad_norm": 0.38668751776005134, + "learning_rate": 1.884051565178402e-05, + "loss": 0.6261, + "step": 10427 + }, + { + "epoch": 0.18019076582803428, + "grad_norm": 0.4273303822951309, + "learning_rate": 1.8840254063692772e-05, + "loss": 0.6811, + "step": 10428 + }, + { + "epoch": 0.1802080453414432, + "grad_norm": 0.4304279735453938, + "learning_rate": 1.883999244791318e-05, + "loss": 0.5344, + "step": 10429 + }, + { + "epoch": 0.1802253248548521, + "grad_norm": 0.8721262729780219, + "learning_rate": 1.883973080444606e-05, + "loss": 0.6597, + "step": 10430 + }, + { + "epoch": 0.18024260436826098, + "grad_norm": 0.974720660109067, + "learning_rate": 1.883946913329223e-05, + "loss": 0.5257, + "step": 10431 + }, + { + "epoch": 0.18025988388166989, + "grad_norm": 0.7381422598466651, + "learning_rate": 1.8839207434452513e-05, + "loss": 0.4539, + "step": 10432 + }, + { + "epoch": 0.1802771633950788, + "grad_norm": 1.0088061591949904, + "learning_rate": 1.883894570792772e-05, + "loss": 0.6843, + "step": 10433 + }, + { + "epoch": 0.1802944429084877, + "grad_norm": 1.185307339116191, + "learning_rate": 1.8838683953718685e-05, + "loss": 0.6954, + "step": 10434 + }, + { + "epoch": 0.1803117224218966, + "grad_norm": 1.2199033541190638, + "learning_rate": 1.8838422171826213e-05, + "loss": 0.668, + "step": 10435 + }, + { + "epoch": 0.1803290019353055, + "grad_norm": 0.861617782304025, + "learning_rate": 1.8838160362251135e-05, + "loss": 0.6425, + "step": 10436 + }, + { + "epoch": 0.1803462814487144, + "grad_norm": 0.9600628686025876, + "learning_rate": 1.883789852499427e-05, + "loss": 0.6146, + "step": 10437 + }, + { + "epoch": 0.1803635609621233, + "grad_norm": 1.2665412412403432, + "learning_rate": 1.8837636660056434e-05, + "loss": 0.6031, + "step": 10438 + }, + { + "epoch": 0.1803808404755322, + "grad_norm": 0.6748840386617735, + "learning_rate": 1.8837374767438446e-05, + "loss": 0.5068, + "step": 10439 + }, + { + "epoch": 0.18039811998894112, + "grad_norm": 1.733232752680639, + "learning_rate": 1.883711284714113e-05, + "loss": 0.5133, + "step": 10440 + }, + { + "epoch": 0.18041539950235003, + "grad_norm": 1.2516766715208987, + "learning_rate": 1.8836850899165303e-05, + "loss": 0.6011, + "step": 10441 + }, + { + "epoch": 0.1804326790157589, + "grad_norm": 1.0563745714167825, + "learning_rate": 1.8836588923511788e-05, + "loss": 0.8781, + "step": 10442 + }, + { + "epoch": 0.18044995852916781, + "grad_norm": 0.8062125716807771, + "learning_rate": 1.8836326920181407e-05, + "loss": 0.4884, + "step": 10443 + }, + { + "epoch": 0.18046723804257672, + "grad_norm": 1.1658638112114412, + "learning_rate": 1.8836064889174977e-05, + "loss": 0.5975, + "step": 10444 + }, + { + "epoch": 0.18048451755598563, + "grad_norm": 0.854993646109842, + "learning_rate": 1.883580283049332e-05, + "loss": 0.5666, + "step": 10445 + }, + { + "epoch": 0.18050179706939454, + "grad_norm": 1.1053612479914054, + "learning_rate": 1.8835540744137257e-05, + "loss": 0.6331, + "step": 10446 + }, + { + "epoch": 0.18051907658280342, + "grad_norm": 1.7091123147989118, + "learning_rate": 1.8835278630107612e-05, + "loss": 0.8174, + "step": 10447 + }, + { + "epoch": 0.18053635609621232, + "grad_norm": 1.2533651938264152, + "learning_rate": 1.88350164884052e-05, + "loss": 0.7296, + "step": 10448 + }, + { + "epoch": 0.18055363560962123, + "grad_norm": 0.4703885390440788, + "learning_rate": 1.8834754319030845e-05, + "loss": 0.7343, + "step": 10449 + }, + { + "epoch": 0.18057091512303014, + "grad_norm": 1.0954683030424202, + "learning_rate": 1.8834492121985367e-05, + "loss": 0.8119, + "step": 10450 + }, + { + "epoch": 0.18058819463643905, + "grad_norm": 0.6388843847844383, + "learning_rate": 1.883422989726959e-05, + "loss": 0.5672, + "step": 10451 + }, + { + "epoch": 0.18060547414984793, + "grad_norm": 1.1293922873380196, + "learning_rate": 1.8833967644884336e-05, + "loss": 0.3707, + "step": 10452 + }, + { + "epoch": 0.18062275366325684, + "grad_norm": 0.9110144576778769, + "learning_rate": 1.883370536483042e-05, + "loss": 0.5034, + "step": 10453 + }, + { + "epoch": 0.18064003317666574, + "grad_norm": 1.1048186630106627, + "learning_rate": 1.8833443057108668e-05, + "loss": 0.61, + "step": 10454 + }, + { + "epoch": 0.18065731269007465, + "grad_norm": 1.3356345390620645, + "learning_rate": 1.8833180721719898e-05, + "loss": 0.5938, + "step": 10455 + }, + { + "epoch": 0.18067459220348356, + "grad_norm": 1.1051699566846813, + "learning_rate": 1.883291835866494e-05, + "loss": 0.7256, + "step": 10456 + }, + { + "epoch": 0.18069187171689247, + "grad_norm": 0.8489055592430808, + "learning_rate": 1.8832655967944607e-05, + "loss": 0.522, + "step": 10457 + }, + { + "epoch": 0.18070915123030135, + "grad_norm": 1.2817319829165847, + "learning_rate": 1.883239354955973e-05, + "loss": 0.6686, + "step": 10458 + }, + { + "epoch": 0.18072643074371025, + "grad_norm": 0.8683067855437763, + "learning_rate": 1.883213110351112e-05, + "loss": 0.6128, + "step": 10459 + }, + { + "epoch": 0.18074371025711916, + "grad_norm": 0.9661861059791801, + "learning_rate": 1.8831868629799602e-05, + "loss": 0.516, + "step": 10460 + }, + { + "epoch": 0.18076098977052807, + "grad_norm": 1.0370090218866432, + "learning_rate": 1.8831606128426005e-05, + "loss": 0.6514, + "step": 10461 + }, + { + "epoch": 0.18077826928393698, + "grad_norm": 1.1409098636094044, + "learning_rate": 1.8831343599391147e-05, + "loss": 0.7588, + "step": 10462 + }, + { + "epoch": 0.18079554879734586, + "grad_norm": 1.8019153575015243, + "learning_rate": 1.8831081042695846e-05, + "loss": 0.694, + "step": 10463 + }, + { + "epoch": 0.18081282831075476, + "grad_norm": 1.744819363122119, + "learning_rate": 1.883081845834093e-05, + "loss": 0.7669, + "step": 10464 + }, + { + "epoch": 0.18083010782416367, + "grad_norm": 1.0499629315484054, + "learning_rate": 1.8830555846327222e-05, + "loss": 0.7005, + "step": 10465 + }, + { + "epoch": 0.18084738733757258, + "grad_norm": 1.1653101873644638, + "learning_rate": 1.8830293206655537e-05, + "loss": 0.6338, + "step": 10466 + }, + { + "epoch": 0.1808646668509815, + "grad_norm": 1.6104500570047142, + "learning_rate": 1.8830030539326704e-05, + "loss": 0.7123, + "step": 10467 + }, + { + "epoch": 0.18088194636439037, + "grad_norm": 0.9452471832845215, + "learning_rate": 1.8829767844341548e-05, + "loss": 0.5599, + "step": 10468 + }, + { + "epoch": 0.18089922587779927, + "grad_norm": 1.5130971247728702, + "learning_rate": 1.8829505121700884e-05, + "loss": 0.7986, + "step": 10469 + }, + { + "epoch": 0.18091650539120818, + "grad_norm": 1.333715718872149, + "learning_rate": 1.882924237140554e-05, + "loss": 0.5247, + "step": 10470 + }, + { + "epoch": 0.1809337849046171, + "grad_norm": 1.4110181871379035, + "learning_rate": 1.8828979593456336e-05, + "loss": 0.5404, + "step": 10471 + }, + { + "epoch": 0.180951064418026, + "grad_norm": 1.2079471450238897, + "learning_rate": 1.88287167878541e-05, + "loss": 0.6248, + "step": 10472 + }, + { + "epoch": 0.18096834393143488, + "grad_norm": 0.6603479134886173, + "learning_rate": 1.882845395459965e-05, + "loss": 0.4609, + "step": 10473 + }, + { + "epoch": 0.18098562344484379, + "grad_norm": 0.757520271301044, + "learning_rate": 1.8828191093693813e-05, + "loss": 0.4124, + "step": 10474 + }, + { + "epoch": 0.1810029029582527, + "grad_norm": 0.47242036048727176, + "learning_rate": 1.882792820513741e-05, + "loss": 0.7882, + "step": 10475 + }, + { + "epoch": 0.1810201824716616, + "grad_norm": 0.8763312094896517, + "learning_rate": 1.8827665288931267e-05, + "loss": 0.5527, + "step": 10476 + }, + { + "epoch": 0.1810374619850705, + "grad_norm": 1.1913103658995199, + "learning_rate": 1.8827402345076202e-05, + "loss": 0.6532, + "step": 10477 + }, + { + "epoch": 0.18105474149847942, + "grad_norm": 1.0951171008089566, + "learning_rate": 1.882713937357304e-05, + "loss": 0.6517, + "step": 10478 + }, + { + "epoch": 0.1810720210118883, + "grad_norm": 1.1151122423651978, + "learning_rate": 1.8826876374422612e-05, + "loss": 0.7107, + "step": 10479 + }, + { + "epoch": 0.1810893005252972, + "grad_norm": 0.43815340503451283, + "learning_rate": 1.8826613347625736e-05, + "loss": 0.7548, + "step": 10480 + }, + { + "epoch": 0.1811065800387061, + "grad_norm": 1.3105390292203989, + "learning_rate": 1.8826350293183233e-05, + "loss": 0.5831, + "step": 10481 + }, + { + "epoch": 0.18112385955211502, + "grad_norm": 1.522890409232001, + "learning_rate": 1.8826087211095932e-05, + "loss": 0.7167, + "step": 10482 + }, + { + "epoch": 0.18114113906552393, + "grad_norm": 1.095119482477966, + "learning_rate": 1.8825824101364657e-05, + "loss": 0.5059, + "step": 10483 + }, + { + "epoch": 0.1811584185789328, + "grad_norm": 1.3105559680224135, + "learning_rate": 1.8825560963990225e-05, + "loss": 0.6864, + "step": 10484 + }, + { + "epoch": 0.1811756980923417, + "grad_norm": 0.5202549589367069, + "learning_rate": 1.8825297798973466e-05, + "loss": 0.8213, + "step": 10485 + }, + { + "epoch": 0.18119297760575062, + "grad_norm": 1.1275308595142124, + "learning_rate": 1.882503460631521e-05, + "loss": 0.5828, + "step": 10486 + }, + { + "epoch": 0.18121025711915953, + "grad_norm": 1.0133136966607885, + "learning_rate": 1.882477138601627e-05, + "loss": 0.7417, + "step": 10487 + }, + { + "epoch": 0.18122753663256844, + "grad_norm": 1.2629530924178798, + "learning_rate": 1.8824508138077474e-05, + "loss": 0.5715, + "step": 10488 + }, + { + "epoch": 0.18124481614597732, + "grad_norm": 1.2670251213832224, + "learning_rate": 1.8824244862499652e-05, + "loss": 0.6574, + "step": 10489 + }, + { + "epoch": 0.18126209565938622, + "grad_norm": 1.3311225812309773, + "learning_rate": 1.8823981559283623e-05, + "loss": 0.5428, + "step": 10490 + }, + { + "epoch": 0.18127937517279513, + "grad_norm": 1.1362329101526467, + "learning_rate": 1.882371822843021e-05, + "loss": 0.6426, + "step": 10491 + }, + { + "epoch": 0.18129665468620404, + "grad_norm": 0.8771831058520392, + "learning_rate": 1.8823454869940243e-05, + "loss": 0.618, + "step": 10492 + }, + { + "epoch": 0.18131393419961295, + "grad_norm": 0.9219101480962213, + "learning_rate": 1.8823191483814545e-05, + "loss": 0.7882, + "step": 10493 + }, + { + "epoch": 0.18133121371302185, + "grad_norm": 1.0217062139013942, + "learning_rate": 1.8822928070053943e-05, + "loss": 0.6564, + "step": 10494 + }, + { + "epoch": 0.18134849322643073, + "grad_norm": 0.838968472009333, + "learning_rate": 1.8822664628659258e-05, + "loss": 0.4011, + "step": 10495 + }, + { + "epoch": 0.18136577273983964, + "grad_norm": 0.9075301799992176, + "learning_rate": 1.8822401159631315e-05, + "loss": 0.7086, + "step": 10496 + }, + { + "epoch": 0.18138305225324855, + "grad_norm": 1.1877841182374567, + "learning_rate": 1.8822137662970945e-05, + "loss": 0.391, + "step": 10497 + }, + { + "epoch": 0.18140033176665746, + "grad_norm": 1.2834811340023224, + "learning_rate": 1.8821874138678967e-05, + "loss": 0.6102, + "step": 10498 + }, + { + "epoch": 0.18141761128006637, + "grad_norm": 1.4874984216896499, + "learning_rate": 1.882161058675621e-05, + "loss": 0.7564, + "step": 10499 + }, + { + "epoch": 0.18143489079347525, + "grad_norm": 0.560133499320026, + "learning_rate": 1.8821347007203498e-05, + "loss": 0.691, + "step": 10500 + }, + { + "epoch": 0.18145217030688415, + "grad_norm": 0.7101253976722136, + "learning_rate": 1.8821083400021652e-05, + "loss": 0.5923, + "step": 10501 + }, + { + "epoch": 0.18146944982029306, + "grad_norm": 1.1992002547229386, + "learning_rate": 1.8820819765211506e-05, + "loss": 0.4683, + "step": 10502 + }, + { + "epoch": 0.18148672933370197, + "grad_norm": 2.7271314133554747, + "learning_rate": 1.8820556102773882e-05, + "loss": 0.7378, + "step": 10503 + }, + { + "epoch": 0.18150400884711088, + "grad_norm": 0.7241073496362581, + "learning_rate": 1.882029241270961e-05, + "loss": 0.353, + "step": 10504 + }, + { + "epoch": 0.18152128836051976, + "grad_norm": 1.0319077202286717, + "learning_rate": 1.8820028695019504e-05, + "loss": 0.6979, + "step": 10505 + }, + { + "epoch": 0.18153856787392866, + "grad_norm": 0.873711191431909, + "learning_rate": 1.8819764949704403e-05, + "loss": 0.7425, + "step": 10506 + }, + { + "epoch": 0.18155584738733757, + "grad_norm": 0.8243615700210263, + "learning_rate": 1.8819501176765128e-05, + "loss": 0.4951, + "step": 10507 + }, + { + "epoch": 0.18157312690074648, + "grad_norm": 0.9564057890299061, + "learning_rate": 1.8819237376202503e-05, + "loss": 0.4521, + "step": 10508 + }, + { + "epoch": 0.1815904064141554, + "grad_norm": 1.1069870077247126, + "learning_rate": 1.8818973548017357e-05, + "loss": 0.6121, + "step": 10509 + }, + { + "epoch": 0.18160768592756427, + "grad_norm": 0.820535368070443, + "learning_rate": 1.8818709692210518e-05, + "loss": 0.8477, + "step": 10510 + }, + { + "epoch": 0.18162496544097317, + "grad_norm": 0.9640411594626586, + "learning_rate": 1.881844580878281e-05, + "loss": 0.4417, + "step": 10511 + }, + { + "epoch": 0.18164224495438208, + "grad_norm": 0.39619455888081906, + "learning_rate": 1.8818181897735056e-05, + "loss": 0.5374, + "step": 10512 + }, + { + "epoch": 0.181659524467791, + "grad_norm": 0.8000434318268823, + "learning_rate": 1.881791795906809e-05, + "loss": 0.5373, + "step": 10513 + }, + { + "epoch": 0.1816768039811999, + "grad_norm": 0.9306378266407633, + "learning_rate": 1.881765399278273e-05, + "loss": 0.5686, + "step": 10514 + }, + { + "epoch": 0.1816940834946088, + "grad_norm": 1.0414685949512534, + "learning_rate": 1.881738999887981e-05, + "loss": 0.7307, + "step": 10515 + }, + { + "epoch": 0.18171136300801768, + "grad_norm": 0.7651645591333254, + "learning_rate": 1.8817125977360152e-05, + "loss": 0.5524, + "step": 10516 + }, + { + "epoch": 0.1817286425214266, + "grad_norm": 0.8488491252747992, + "learning_rate": 1.8816861928224592e-05, + "loss": 0.4411, + "step": 10517 + }, + { + "epoch": 0.1817459220348355, + "grad_norm": 1.0942416315288157, + "learning_rate": 1.8816597851473945e-05, + "loss": 0.725, + "step": 10518 + }, + { + "epoch": 0.1817632015482444, + "grad_norm": 1.5826294761180768, + "learning_rate": 1.8816333747109045e-05, + "loss": 0.6538, + "step": 10519 + }, + { + "epoch": 0.18178048106165332, + "grad_norm": 0.8993981620876456, + "learning_rate": 1.8816069615130718e-05, + "loss": 0.6406, + "step": 10520 + }, + { + "epoch": 0.1817977605750622, + "grad_norm": 1.1565435745854042, + "learning_rate": 1.8815805455539788e-05, + "loss": 0.4738, + "step": 10521 + }, + { + "epoch": 0.1818150400884711, + "grad_norm": 1.180224301310766, + "learning_rate": 1.881554126833709e-05, + "loss": 0.5768, + "step": 10522 + }, + { + "epoch": 0.18183231960188, + "grad_norm": 0.943640666372471, + "learning_rate": 1.8815277053523446e-05, + "loss": 0.6284, + "step": 10523 + }, + { + "epoch": 0.18184959911528892, + "grad_norm": 1.1096103238396633, + "learning_rate": 1.881501281109968e-05, + "loss": 0.5643, + "step": 10524 + }, + { + "epoch": 0.18186687862869783, + "grad_norm": 0.7839506711930979, + "learning_rate": 1.8814748541066628e-05, + "loss": 0.5752, + "step": 10525 + }, + { + "epoch": 0.1818841581421067, + "grad_norm": 0.7850932721993071, + "learning_rate": 1.8814484243425113e-05, + "loss": 0.8779, + "step": 10526 + }, + { + "epoch": 0.1819014376555156, + "grad_norm": 0.9275424489168624, + "learning_rate": 1.881421991817596e-05, + "loss": 0.3831, + "step": 10527 + }, + { + "epoch": 0.18191871716892452, + "grad_norm": 1.1017275619083386, + "learning_rate": 1.8813955565320004e-05, + "loss": 0.662, + "step": 10528 + }, + { + "epoch": 0.18193599668233343, + "grad_norm": 1.3376540659418221, + "learning_rate": 1.881369118485807e-05, + "loss": 0.6132, + "step": 10529 + }, + { + "epoch": 0.18195327619574234, + "grad_norm": 1.012789727143777, + "learning_rate": 1.8813426776790985e-05, + "loss": 0.469, + "step": 10530 + }, + { + "epoch": 0.18197055570915124, + "grad_norm": 0.919750868036052, + "learning_rate": 1.8813162341119576e-05, + "loss": 0.6206, + "step": 10531 + }, + { + "epoch": 0.18198783522256012, + "grad_norm": 1.3288399357413172, + "learning_rate": 1.8812897877844675e-05, + "loss": 0.645, + "step": 10532 + }, + { + "epoch": 0.18200511473596903, + "grad_norm": 0.9411632722692002, + "learning_rate": 1.8812633386967104e-05, + "loss": 0.6567, + "step": 10533 + }, + { + "epoch": 0.18202239424937794, + "grad_norm": 0.9625542678263286, + "learning_rate": 1.8812368868487694e-05, + "loss": 0.5582, + "step": 10534 + }, + { + "epoch": 0.18203967376278685, + "grad_norm": 0.9992081652643514, + "learning_rate": 1.8812104322407282e-05, + "loss": 0.733, + "step": 10535 + }, + { + "epoch": 0.18205695327619575, + "grad_norm": 0.9774294404859561, + "learning_rate": 1.8811839748726685e-05, + "loss": 0.4831, + "step": 10536 + }, + { + "epoch": 0.18207423278960463, + "grad_norm": 0.8873895158003231, + "learning_rate": 1.881157514744674e-05, + "loss": 0.5489, + "step": 10537 + }, + { + "epoch": 0.18209151230301354, + "grad_norm": 0.47317102526202276, + "learning_rate": 1.881131051856827e-05, + "loss": 0.3927, + "step": 10538 + }, + { + "epoch": 0.18210879181642245, + "grad_norm": 1.172673810176075, + "learning_rate": 1.8811045862092105e-05, + "loss": 0.6798, + "step": 10539 + }, + { + "epoch": 0.18212607132983136, + "grad_norm": 1.0920715542723787, + "learning_rate": 1.8810781178019076e-05, + "loss": 0.5195, + "step": 10540 + }, + { + "epoch": 0.18214335084324026, + "grad_norm": 1.1208126469286241, + "learning_rate": 1.881051646635001e-05, + "loss": 0.6206, + "step": 10541 + }, + { + "epoch": 0.18216063035664914, + "grad_norm": 0.712086343753124, + "learning_rate": 1.8810251727085734e-05, + "loss": 0.5749, + "step": 10542 + }, + { + "epoch": 0.18217790987005805, + "grad_norm": 0.9483993248679319, + "learning_rate": 1.8809986960227086e-05, + "loss": 0.7242, + "step": 10543 + }, + { + "epoch": 0.18219518938346696, + "grad_norm": 0.7749206830259158, + "learning_rate": 1.8809722165774886e-05, + "loss": 0.5233, + "step": 10544 + }, + { + "epoch": 0.18221246889687587, + "grad_norm": 0.5051485007419514, + "learning_rate": 1.8809457343729967e-05, + "loss": 0.7763, + "step": 10545 + }, + { + "epoch": 0.18222974841028478, + "grad_norm": 0.8976313347401841, + "learning_rate": 1.8809192494093157e-05, + "loss": 0.4612, + "step": 10546 + }, + { + "epoch": 0.18224702792369366, + "grad_norm": 1.035238914202285, + "learning_rate": 1.880892761686529e-05, + "loss": 0.7326, + "step": 10547 + }, + { + "epoch": 0.18226430743710256, + "grad_norm": 0.6040189483867967, + "learning_rate": 1.8808662712047186e-05, + "loss": 0.6905, + "step": 10548 + }, + { + "epoch": 0.18228158695051147, + "grad_norm": 0.9399852203136086, + "learning_rate": 1.8808397779639685e-05, + "loss": 0.7035, + "step": 10549 + }, + { + "epoch": 0.18229886646392038, + "grad_norm": 0.8682871092878525, + "learning_rate": 1.8808132819643616e-05, + "loss": 0.525, + "step": 10550 + }, + { + "epoch": 0.18231614597732929, + "grad_norm": 0.43323596754969435, + "learning_rate": 1.88078678320598e-05, + "loss": 0.749, + "step": 10551 + }, + { + "epoch": 0.1823334254907382, + "grad_norm": 1.066963652807637, + "learning_rate": 1.8807602816889076e-05, + "loss": 0.4527, + "step": 10552 + }, + { + "epoch": 0.18235070500414707, + "grad_norm": 0.7087623991167782, + "learning_rate": 1.880733777413227e-05, + "loss": 0.6424, + "step": 10553 + }, + { + "epoch": 0.18236798451755598, + "grad_norm": 0.9050052404930545, + "learning_rate": 1.880707270379021e-05, + "loss": 0.5843, + "step": 10554 + }, + { + "epoch": 0.1823852640309649, + "grad_norm": 1.3431843665260188, + "learning_rate": 1.8806807605863734e-05, + "loss": 0.7783, + "step": 10555 + }, + { + "epoch": 0.1824025435443738, + "grad_norm": 1.50363385374276, + "learning_rate": 1.8806542480353664e-05, + "loss": 0.6494, + "step": 10556 + }, + { + "epoch": 0.1824198230577827, + "grad_norm": 0.8216722669413113, + "learning_rate": 1.8806277327260832e-05, + "loss": 0.6601, + "step": 10557 + }, + { + "epoch": 0.18243710257119158, + "grad_norm": 0.8719752794241722, + "learning_rate": 1.8806012146586074e-05, + "loss": 0.5647, + "step": 10558 + }, + { + "epoch": 0.1824543820846005, + "grad_norm": 0.85326399982681, + "learning_rate": 1.8805746938330215e-05, + "loss": 0.5652, + "step": 10559 + }, + { + "epoch": 0.1824716615980094, + "grad_norm": 1.0606749203224362, + "learning_rate": 1.8805481702494086e-05, + "loss": 0.6595, + "step": 10560 + }, + { + "epoch": 0.1824889411114183, + "grad_norm": 1.1471545343559673, + "learning_rate": 1.880521643907852e-05, + "loss": 0.6811, + "step": 10561 + }, + { + "epoch": 0.18250622062482721, + "grad_norm": 1.3529300268135924, + "learning_rate": 1.8804951148084347e-05, + "loss": 0.6425, + "step": 10562 + }, + { + "epoch": 0.1825235001382361, + "grad_norm": 0.9615666145945623, + "learning_rate": 1.8804685829512398e-05, + "loss": 0.5702, + "step": 10563 + }, + { + "epoch": 0.182540779651645, + "grad_norm": 0.9022307927387224, + "learning_rate": 1.8804420483363506e-05, + "loss": 0.7181, + "step": 10564 + }, + { + "epoch": 0.1825580591650539, + "grad_norm": 0.9386834043647008, + "learning_rate": 1.8804155109638497e-05, + "loss": 0.5074, + "step": 10565 + }, + { + "epoch": 0.18257533867846282, + "grad_norm": 0.45028422091419296, + "learning_rate": 1.8803889708338205e-05, + "loss": 0.7178, + "step": 10566 + }, + { + "epoch": 0.18259261819187173, + "grad_norm": 0.7840505709556169, + "learning_rate": 1.8803624279463464e-05, + "loss": 0.5575, + "step": 10567 + }, + { + "epoch": 0.18260989770528063, + "grad_norm": 1.0165079869770857, + "learning_rate": 1.88033588230151e-05, + "loss": 0.6898, + "step": 10568 + }, + { + "epoch": 0.1826271772186895, + "grad_norm": 1.4102609508620292, + "learning_rate": 1.880309333899395e-05, + "loss": 0.5992, + "step": 10569 + }, + { + "epoch": 0.18264445673209842, + "grad_norm": 1.0749209316999615, + "learning_rate": 1.880282782740084e-05, + "loss": 0.7433, + "step": 10570 + }, + { + "epoch": 0.18266173624550733, + "grad_norm": 1.0438762153054972, + "learning_rate": 1.880256228823661e-05, + "loss": 0.6651, + "step": 10571 + }, + { + "epoch": 0.18267901575891624, + "grad_norm": 0.9603942490228333, + "learning_rate": 1.8802296721502075e-05, + "loss": 0.5074, + "step": 10572 + }, + { + "epoch": 0.18269629527232514, + "grad_norm": 1.2664959884925588, + "learning_rate": 1.8802031127198086e-05, + "loss": 0.4109, + "step": 10573 + }, + { + "epoch": 0.18271357478573402, + "grad_norm": 0.7362452232454775, + "learning_rate": 1.8801765505325465e-05, + "loss": 0.6225, + "step": 10574 + }, + { + "epoch": 0.18273085429914293, + "grad_norm": 1.4486442853495434, + "learning_rate": 1.8801499855885045e-05, + "loss": 0.6147, + "step": 10575 + }, + { + "epoch": 0.18274813381255184, + "grad_norm": 1.3648853989022869, + "learning_rate": 1.880123417887766e-05, + "loss": 0.7633, + "step": 10576 + }, + { + "epoch": 0.18276541332596075, + "grad_norm": 1.3697676934441005, + "learning_rate": 1.8800968474304136e-05, + "loss": 0.6516, + "step": 10577 + }, + { + "epoch": 0.18278269283936965, + "grad_norm": 0.8592206610103805, + "learning_rate": 1.8800702742165315e-05, + "loss": 0.4845, + "step": 10578 + }, + { + "epoch": 0.18279997235277853, + "grad_norm": 0.3939888420159318, + "learning_rate": 1.8800436982462022e-05, + "loss": 0.7467, + "step": 10579 + }, + { + "epoch": 0.18281725186618744, + "grad_norm": 0.5355999636047674, + "learning_rate": 1.880017119519509e-05, + "loss": 0.3583, + "step": 10580 + }, + { + "epoch": 0.18283453137959635, + "grad_norm": 0.6991575861959436, + "learning_rate": 1.879990538036536e-05, + "loss": 0.4495, + "step": 10581 + }, + { + "epoch": 0.18285181089300526, + "grad_norm": 0.87368793128766, + "learning_rate": 1.879963953797365e-05, + "loss": 0.4617, + "step": 10582 + }, + { + "epoch": 0.18286909040641416, + "grad_norm": 0.8501955150771427, + "learning_rate": 1.8799373668020805e-05, + "loss": 0.4877, + "step": 10583 + }, + { + "epoch": 0.18288636991982304, + "grad_norm": 0.8638131977903541, + "learning_rate": 1.8799107770507647e-05, + "loss": 0.5995, + "step": 10584 + }, + { + "epoch": 0.18290364943323195, + "grad_norm": 1.261531481653048, + "learning_rate": 1.879884184543502e-05, + "loss": 0.5808, + "step": 10585 + }, + { + "epoch": 0.18292092894664086, + "grad_norm": 1.244925973434227, + "learning_rate": 1.879857589280375e-05, + "loss": 0.5771, + "step": 10586 + }, + { + "epoch": 0.18293820846004977, + "grad_norm": 1.0689564935243314, + "learning_rate": 1.879830991261467e-05, + "loss": 0.5509, + "step": 10587 + }, + { + "epoch": 0.18295548797345867, + "grad_norm": 1.0669793349498224, + "learning_rate": 1.8798043904868617e-05, + "loss": 0.5557, + "step": 10588 + }, + { + "epoch": 0.18297276748686758, + "grad_norm": 0.3868366185592999, + "learning_rate": 1.879777786956642e-05, + "loss": 0.6186, + "step": 10589 + }, + { + "epoch": 0.18299004700027646, + "grad_norm": 1.1792587471866123, + "learning_rate": 1.8797511806708918e-05, + "loss": 0.4856, + "step": 10590 + }, + { + "epoch": 0.18300732651368537, + "grad_norm": 1.3338348060897138, + "learning_rate": 1.8797245716296935e-05, + "loss": 0.768, + "step": 10591 + }, + { + "epoch": 0.18302460602709428, + "grad_norm": 1.5031045868511985, + "learning_rate": 1.8796979598331317e-05, + "loss": 0.665, + "step": 10592 + }, + { + "epoch": 0.18304188554050319, + "grad_norm": 0.6127473537600284, + "learning_rate": 1.879671345281288e-05, + "loss": 0.4518, + "step": 10593 + }, + { + "epoch": 0.1830591650539121, + "grad_norm": 1.179748968567265, + "learning_rate": 1.8796447279742477e-05, + "loss": 0.7948, + "step": 10594 + }, + { + "epoch": 0.18307644456732097, + "grad_norm": 0.7076768065306017, + "learning_rate": 1.879618107912093e-05, + "loss": 0.6665, + "step": 10595 + }, + { + "epoch": 0.18309372408072988, + "grad_norm": 0.440993403590456, + "learning_rate": 1.8795914850949074e-05, + "loss": 0.7564, + "step": 10596 + }, + { + "epoch": 0.1831110035941388, + "grad_norm": 1.4952049327283834, + "learning_rate": 1.8795648595227746e-05, + "loss": 0.4043, + "step": 10597 + }, + { + "epoch": 0.1831282831075477, + "grad_norm": 0.9424311433716863, + "learning_rate": 1.879538231195778e-05, + "loss": 0.7893, + "step": 10598 + }, + { + "epoch": 0.1831455626209566, + "grad_norm": 1.0093624435035795, + "learning_rate": 1.8795116001140004e-05, + "loss": 0.4756, + "step": 10599 + }, + { + "epoch": 0.18316284213436548, + "grad_norm": 1.0379811432801669, + "learning_rate": 1.8794849662775257e-05, + "loss": 0.84, + "step": 10600 + }, + { + "epoch": 0.1831801216477744, + "grad_norm": 1.5427866380004487, + "learning_rate": 1.8794583296864372e-05, + "loss": 0.8108, + "step": 10601 + }, + { + "epoch": 0.1831974011611833, + "grad_norm": 0.6202590241845741, + "learning_rate": 1.879431690340819e-05, + "loss": 0.5489, + "step": 10602 + }, + { + "epoch": 0.1832146806745922, + "grad_norm": 1.3521107039150715, + "learning_rate": 1.879405048240753e-05, + "loss": 0.6441, + "step": 10603 + }, + { + "epoch": 0.18323196018800111, + "grad_norm": 1.1402879229858693, + "learning_rate": 1.879378403386324e-05, + "loss": 0.67, + "step": 10604 + }, + { + "epoch": 0.18324923970141002, + "grad_norm": 1.4713775486490142, + "learning_rate": 1.879351755777615e-05, + "loss": 0.6638, + "step": 10605 + }, + { + "epoch": 0.1832665192148189, + "grad_norm": 0.8195382080020931, + "learning_rate": 1.8793251054147094e-05, + "loss": 0.5632, + "step": 10606 + }, + { + "epoch": 0.1832837987282278, + "grad_norm": 1.273190217382984, + "learning_rate": 1.879298452297691e-05, + "loss": 0.5014, + "step": 10607 + }, + { + "epoch": 0.18330107824163672, + "grad_norm": 1.4648751587357014, + "learning_rate": 1.8792717964266427e-05, + "loss": 0.7278, + "step": 10608 + }, + { + "epoch": 0.18331835775504562, + "grad_norm": 1.420340416678003, + "learning_rate": 1.8792451378016485e-05, + "loss": 0.6052, + "step": 10609 + }, + { + "epoch": 0.18333563726845453, + "grad_norm": 0.43396233944075124, + "learning_rate": 1.8792184764227913e-05, + "loss": 0.6405, + "step": 10610 + }, + { + "epoch": 0.1833529167818634, + "grad_norm": 0.9030058284971363, + "learning_rate": 1.8791918122901555e-05, + "loss": 0.4952, + "step": 10611 + }, + { + "epoch": 0.18337019629527232, + "grad_norm": 1.4299845523480037, + "learning_rate": 1.8791651454038243e-05, + "loss": 0.5653, + "step": 10612 + }, + { + "epoch": 0.18338747580868123, + "grad_norm": 0.7664049981795683, + "learning_rate": 1.8791384757638805e-05, + "loss": 0.5199, + "step": 10613 + }, + { + "epoch": 0.18340475532209013, + "grad_norm": 0.914539853699702, + "learning_rate": 1.8791118033704084e-05, + "loss": 0.6612, + "step": 10614 + }, + { + "epoch": 0.18342203483549904, + "grad_norm": 0.8322412501484225, + "learning_rate": 1.8790851282234914e-05, + "loss": 0.6845, + "step": 10615 + }, + { + "epoch": 0.18343931434890792, + "grad_norm": 1.4600920617076267, + "learning_rate": 1.879058450323213e-05, + "loss": 0.7612, + "step": 10616 + }, + { + "epoch": 0.18345659386231683, + "grad_norm": 1.4285494832623011, + "learning_rate": 1.8790317696696564e-05, + "loss": 0.6739, + "step": 10617 + }, + { + "epoch": 0.18347387337572574, + "grad_norm": 1.0680471890118863, + "learning_rate": 1.8790050862629058e-05, + "loss": 0.6965, + "step": 10618 + }, + { + "epoch": 0.18349115288913465, + "grad_norm": 0.4258695497667565, + "learning_rate": 1.8789784001030443e-05, + "loss": 0.5971, + "step": 10619 + }, + { + "epoch": 0.18350843240254355, + "grad_norm": 1.039050426199225, + "learning_rate": 1.878951711190156e-05, + "loss": 0.5327, + "step": 10620 + }, + { + "epoch": 0.18352571191595243, + "grad_norm": 0.8416496639574806, + "learning_rate": 1.878925019524324e-05, + "loss": 0.5475, + "step": 10621 + }, + { + "epoch": 0.18354299142936134, + "grad_norm": 1.1067307059878277, + "learning_rate": 1.8788983251056316e-05, + "loss": 0.5775, + "step": 10622 + }, + { + "epoch": 0.18356027094277025, + "grad_norm": 0.5371601062216557, + "learning_rate": 1.8788716279341633e-05, + "loss": 1.0109, + "step": 10623 + }, + { + "epoch": 0.18357755045617916, + "grad_norm": 1.1938870263202603, + "learning_rate": 1.878844928010002e-05, + "loss": 0.5454, + "step": 10624 + }, + { + "epoch": 0.18359482996958806, + "grad_norm": 0.42059545700898054, + "learning_rate": 1.878818225333232e-05, + "loss": 0.5517, + "step": 10625 + }, + { + "epoch": 0.18361210948299697, + "grad_norm": 1.0518767034108882, + "learning_rate": 1.878791519903936e-05, + "loss": 0.5661, + "step": 10626 + }, + { + "epoch": 0.18362938899640585, + "grad_norm": 0.9587198737001565, + "learning_rate": 1.8787648117221985e-05, + "loss": 0.6002, + "step": 10627 + }, + { + "epoch": 0.18364666850981476, + "grad_norm": 1.0546172783392975, + "learning_rate": 1.8787381007881025e-05, + "loss": 0.6496, + "step": 10628 + }, + { + "epoch": 0.18366394802322367, + "grad_norm": 1.2233548937938716, + "learning_rate": 1.8787113871017326e-05, + "loss": 0.5599, + "step": 10629 + }, + { + "epoch": 0.18368122753663257, + "grad_norm": 1.0509193005692454, + "learning_rate": 1.8786846706631717e-05, + "loss": 0.569, + "step": 10630 + }, + { + "epoch": 0.18369850705004148, + "grad_norm": 0.818733514819908, + "learning_rate": 1.8786579514725032e-05, + "loss": 0.526, + "step": 10631 + }, + { + "epoch": 0.18371578656345036, + "grad_norm": 0.7159089299084277, + "learning_rate": 1.8786312295298118e-05, + "loss": 0.384, + "step": 10632 + }, + { + "epoch": 0.18373306607685927, + "grad_norm": 1.134040071823688, + "learning_rate": 1.87860450483518e-05, + "loss": 0.7965, + "step": 10633 + }, + { + "epoch": 0.18375034559026818, + "grad_norm": 1.4516319123187993, + "learning_rate": 1.8785777773886925e-05, + "loss": 0.4804, + "step": 10634 + }, + { + "epoch": 0.18376762510367708, + "grad_norm": 1.5129021971624048, + "learning_rate": 1.8785510471904324e-05, + "loss": 0.5033, + "step": 10635 + }, + { + "epoch": 0.183784904617086, + "grad_norm": 0.9609966973411329, + "learning_rate": 1.8785243142404838e-05, + "loss": 0.3746, + "step": 10636 + }, + { + "epoch": 0.18380218413049487, + "grad_norm": 1.2154352280238994, + "learning_rate": 1.8784975785389303e-05, + "loss": 0.7114, + "step": 10637 + }, + { + "epoch": 0.18381946364390378, + "grad_norm": 0.92626235194496, + "learning_rate": 1.8784708400858558e-05, + "loss": 0.4293, + "step": 10638 + }, + { + "epoch": 0.1838367431573127, + "grad_norm": 1.2576920859991179, + "learning_rate": 1.8784440988813434e-05, + "loss": 0.5242, + "step": 10639 + }, + { + "epoch": 0.1838540226707216, + "grad_norm": 1.4076191618404061, + "learning_rate": 1.878417354925478e-05, + "loss": 0.5478, + "step": 10640 + }, + { + "epoch": 0.1838713021841305, + "grad_norm": 0.8739253890498571, + "learning_rate": 1.878390608218342e-05, + "loss": 0.8951, + "step": 10641 + }, + { + "epoch": 0.1838885816975394, + "grad_norm": 1.256564617425176, + "learning_rate": 1.87836385876002e-05, + "loss": 0.7788, + "step": 10642 + }, + { + "epoch": 0.1839058612109483, + "grad_norm": 1.016490721993514, + "learning_rate": 1.8783371065505958e-05, + "loss": 0.7087, + "step": 10643 + }, + { + "epoch": 0.1839231407243572, + "grad_norm": 1.105457762720377, + "learning_rate": 1.8783103515901532e-05, + "loss": 0.6766, + "step": 10644 + }, + { + "epoch": 0.1839404202377661, + "grad_norm": 0.868943145409582, + "learning_rate": 1.8782835938787756e-05, + "loss": 0.6928, + "step": 10645 + }, + { + "epoch": 0.183957699751175, + "grad_norm": 1.0567712641523912, + "learning_rate": 1.878256833416547e-05, + "loss": 0.4149, + "step": 10646 + }, + { + "epoch": 0.18397497926458392, + "grad_norm": 1.9036254467387268, + "learning_rate": 1.8782300702035514e-05, + "loss": 0.7146, + "step": 10647 + }, + { + "epoch": 0.1839922587779928, + "grad_norm": 1.5021464609812296, + "learning_rate": 1.8782033042398723e-05, + "loss": 0.5791, + "step": 10648 + }, + { + "epoch": 0.1840095382914017, + "grad_norm": 0.8290297412942113, + "learning_rate": 1.878176535525594e-05, + "loss": 0.5568, + "step": 10649 + }, + { + "epoch": 0.18402681780481062, + "grad_norm": 0.4671551251501542, + "learning_rate": 1.8781497640607997e-05, + "loss": 0.6731, + "step": 10650 + }, + { + "epoch": 0.18404409731821952, + "grad_norm": 0.8928811530027639, + "learning_rate": 1.8781229898455736e-05, + "loss": 0.5035, + "step": 10651 + }, + { + "epoch": 0.18406137683162843, + "grad_norm": 1.003296023312497, + "learning_rate": 1.8780962128799997e-05, + "loss": 0.5448, + "step": 10652 + }, + { + "epoch": 0.1840786563450373, + "grad_norm": 0.46009562119624087, + "learning_rate": 1.878069433164162e-05, + "loss": 0.7187, + "step": 10653 + }, + { + "epoch": 0.18409593585844622, + "grad_norm": 0.9622792568184496, + "learning_rate": 1.878042650698144e-05, + "loss": 0.6925, + "step": 10654 + }, + { + "epoch": 0.18411321537185513, + "grad_norm": 0.7960567632090875, + "learning_rate": 1.8780158654820292e-05, + "loss": 0.6257, + "step": 10655 + }, + { + "epoch": 0.18413049488526403, + "grad_norm": 0.9406513318474446, + "learning_rate": 1.8779890775159026e-05, + "loss": 0.6671, + "step": 10656 + }, + { + "epoch": 0.18414777439867294, + "grad_norm": 1.0424846075749943, + "learning_rate": 1.8779622867998472e-05, + "loss": 0.5343, + "step": 10657 + }, + { + "epoch": 0.18416505391208182, + "grad_norm": 0.8303066056793798, + "learning_rate": 1.877935493333947e-05, + "loss": 0.6595, + "step": 10658 + }, + { + "epoch": 0.18418233342549073, + "grad_norm": 1.3516395565882062, + "learning_rate": 1.8779086971182864e-05, + "loss": 0.7536, + "step": 10659 + }, + { + "epoch": 0.18419961293889964, + "grad_norm": 0.5979904970256602, + "learning_rate": 1.877881898152949e-05, + "loss": 0.4061, + "step": 10660 + }, + { + "epoch": 0.18421689245230854, + "grad_norm": 0.8687973751364757, + "learning_rate": 1.877855096438019e-05, + "loss": 0.5518, + "step": 10661 + }, + { + "epoch": 0.18423417196571745, + "grad_norm": 1.0717258922646844, + "learning_rate": 1.8778282919735798e-05, + "loss": 0.6348, + "step": 10662 + }, + { + "epoch": 0.18425145147912636, + "grad_norm": 1.0539969322391947, + "learning_rate": 1.8778014847597156e-05, + "loss": 0.6275, + "step": 10663 + }, + { + "epoch": 0.18426873099253524, + "grad_norm": 1.0556319574303645, + "learning_rate": 1.8777746747965103e-05, + "loss": 0.5392, + "step": 10664 + }, + { + "epoch": 0.18428601050594415, + "grad_norm": 1.179843205792114, + "learning_rate": 1.8777478620840487e-05, + "loss": 0.5831, + "step": 10665 + }, + { + "epoch": 0.18430329001935306, + "grad_norm": 1.167118409540653, + "learning_rate": 1.8777210466224133e-05, + "loss": 0.4638, + "step": 10666 + }, + { + "epoch": 0.18432056953276196, + "grad_norm": 1.8954332849261104, + "learning_rate": 1.8776942284116895e-05, + "loss": 0.6848, + "step": 10667 + }, + { + "epoch": 0.18433784904617087, + "grad_norm": 2.0306794513028104, + "learning_rate": 1.8776674074519605e-05, + "loss": 0.6529, + "step": 10668 + }, + { + "epoch": 0.18435512855957975, + "grad_norm": 0.8841887102292569, + "learning_rate": 1.8776405837433102e-05, + "loss": 0.5103, + "step": 10669 + }, + { + "epoch": 0.18437240807298866, + "grad_norm": 1.1551399506494238, + "learning_rate": 1.8776137572858234e-05, + "loss": 0.543, + "step": 10670 + }, + { + "epoch": 0.18438968758639757, + "grad_norm": 1.2416581676276404, + "learning_rate": 1.877586928079583e-05, + "loss": 0.967, + "step": 10671 + }, + { + "epoch": 0.18440696709980647, + "grad_norm": 1.0182486241155617, + "learning_rate": 1.877560096124674e-05, + "loss": 0.6049, + "step": 10672 + }, + { + "epoch": 0.18442424661321538, + "grad_norm": 0.9403845637304594, + "learning_rate": 1.87753326142118e-05, + "loss": 0.5105, + "step": 10673 + }, + { + "epoch": 0.18444152612662426, + "grad_norm": 0.5412204577414761, + "learning_rate": 1.8775064239691856e-05, + "loss": 1.3591, + "step": 10674 + }, + { + "epoch": 0.18445880564003317, + "grad_norm": 1.244117921438986, + "learning_rate": 1.877479583768774e-05, + "loss": 0.5843, + "step": 10675 + }, + { + "epoch": 0.18447608515344208, + "grad_norm": 0.8523925171195135, + "learning_rate": 1.8774527408200294e-05, + "loss": 0.7329, + "step": 10676 + }, + { + "epoch": 0.18449336466685098, + "grad_norm": 0.9733761310009307, + "learning_rate": 1.8774258951230366e-05, + "loss": 0.4426, + "step": 10677 + }, + { + "epoch": 0.1845106441802599, + "grad_norm": 1.8376696553531573, + "learning_rate": 1.877399046677879e-05, + "loss": 0.8139, + "step": 10678 + }, + { + "epoch": 0.1845279236936688, + "grad_norm": 0.9825141201273526, + "learning_rate": 1.8773721954846408e-05, + "loss": 0.3965, + "step": 10679 + }, + { + "epoch": 0.18454520320707768, + "grad_norm": 1.2434571351546277, + "learning_rate": 1.8773453415434062e-05, + "loss": 0.5229, + "step": 10680 + }, + { + "epoch": 0.1845624827204866, + "grad_norm": 1.4524460401358448, + "learning_rate": 1.8773184848542595e-05, + "loss": 0.5845, + "step": 10681 + }, + { + "epoch": 0.1845797622338955, + "grad_norm": 1.0810089515518804, + "learning_rate": 1.8772916254172844e-05, + "loss": 0.6473, + "step": 10682 + }, + { + "epoch": 0.1845970417473044, + "grad_norm": 1.0325122776279458, + "learning_rate": 1.8772647632325656e-05, + "loss": 0.582, + "step": 10683 + }, + { + "epoch": 0.1846143212607133, + "grad_norm": 1.3269549065775275, + "learning_rate": 1.8772378983001864e-05, + "loss": 0.5752, + "step": 10684 + }, + { + "epoch": 0.1846316007741222, + "grad_norm": 0.6949690804482538, + "learning_rate": 1.877211030620232e-05, + "loss": 0.8195, + "step": 10685 + }, + { + "epoch": 0.1846488802875311, + "grad_norm": 0.9164359027304226, + "learning_rate": 1.8771841601927854e-05, + "loss": 0.5766, + "step": 10686 + }, + { + "epoch": 0.18466615980094, + "grad_norm": 0.7032518362542622, + "learning_rate": 1.8771572870179317e-05, + "loss": 0.3943, + "step": 10687 + }, + { + "epoch": 0.1846834393143489, + "grad_norm": 0.8168937070986545, + "learning_rate": 1.877130411095755e-05, + "loss": 0.6913, + "step": 10688 + }, + { + "epoch": 0.18470071882775782, + "grad_norm": 1.212591621695587, + "learning_rate": 1.877103532426339e-05, + "loss": 0.5575, + "step": 10689 + }, + { + "epoch": 0.1847179983411667, + "grad_norm": 1.0236794302806325, + "learning_rate": 1.8770766510097678e-05, + "loss": 0.6902, + "step": 10690 + }, + { + "epoch": 0.1847352778545756, + "grad_norm": 0.8525947652272483, + "learning_rate": 1.877049766846126e-05, + "loss": 0.652, + "step": 10691 + }, + { + "epoch": 0.18475255736798452, + "grad_norm": 1.1411772357968755, + "learning_rate": 1.8770228799354976e-05, + "loss": 0.4246, + "step": 10692 + }, + { + "epoch": 0.18476983688139342, + "grad_norm": 1.1592259754506395, + "learning_rate": 1.8769959902779668e-05, + "loss": 0.5912, + "step": 10693 + }, + { + "epoch": 0.18478711639480233, + "grad_norm": 0.7769460648489355, + "learning_rate": 1.8769690978736183e-05, + "loss": 0.5696, + "step": 10694 + }, + { + "epoch": 0.18480439590821124, + "grad_norm": 1.2296071947365244, + "learning_rate": 1.876942202722536e-05, + "loss": 0.6488, + "step": 10695 + }, + { + "epoch": 0.18482167542162012, + "grad_norm": 1.1635357421173378, + "learning_rate": 1.8769153048248038e-05, + "loss": 0.715, + "step": 10696 + }, + { + "epoch": 0.18483895493502903, + "grad_norm": 0.7709913995806552, + "learning_rate": 1.876888404180506e-05, + "loss": 0.336, + "step": 10697 + }, + { + "epoch": 0.18485623444843793, + "grad_norm": 0.9796731213820751, + "learning_rate": 1.8768615007897274e-05, + "loss": 0.5585, + "step": 10698 + }, + { + "epoch": 0.18487351396184684, + "grad_norm": 0.7026888680222022, + "learning_rate": 1.876834594652552e-05, + "loss": 0.6317, + "step": 10699 + }, + { + "epoch": 0.18489079347525575, + "grad_norm": 0.9120791165897868, + "learning_rate": 1.876807685769064e-05, + "loss": 0.4955, + "step": 10700 + }, + { + "epoch": 0.18490807298866463, + "grad_norm": 0.7439940753613528, + "learning_rate": 1.876780774139347e-05, + "loss": 0.6071, + "step": 10701 + }, + { + "epoch": 0.18492535250207354, + "grad_norm": 1.3723056434951475, + "learning_rate": 1.8767538597634867e-05, + "loss": 0.769, + "step": 10702 + }, + { + "epoch": 0.18494263201548244, + "grad_norm": 0.8472703220715926, + "learning_rate": 1.8767269426415665e-05, + "loss": 0.616, + "step": 10703 + }, + { + "epoch": 0.18495991152889135, + "grad_norm": 1.2495784295381638, + "learning_rate": 1.8767000227736712e-05, + "loss": 0.6546, + "step": 10704 + }, + { + "epoch": 0.18497719104230026, + "grad_norm": 1.2467859696732717, + "learning_rate": 1.8766731001598845e-05, + "loss": 0.636, + "step": 10705 + }, + { + "epoch": 0.18499447055570914, + "grad_norm": 1.6510142707749431, + "learning_rate": 1.8766461748002907e-05, + "loss": 0.6552, + "step": 10706 + }, + { + "epoch": 0.18501175006911805, + "grad_norm": 0.9575790513333731, + "learning_rate": 1.876619246694975e-05, + "loss": 0.672, + "step": 10707 + }, + { + "epoch": 0.18502902958252695, + "grad_norm": 1.2780703458287894, + "learning_rate": 1.876592315844021e-05, + "loss": 0.4825, + "step": 10708 + }, + { + "epoch": 0.18504630909593586, + "grad_norm": 0.9391289902863407, + "learning_rate": 1.876565382247513e-05, + "loss": 0.554, + "step": 10709 + }, + { + "epoch": 0.18506358860934477, + "grad_norm": 0.5636022782895529, + "learning_rate": 1.876538445905536e-05, + "loss": 0.7238, + "step": 10710 + }, + { + "epoch": 0.18508086812275365, + "grad_norm": 1.0989340191732384, + "learning_rate": 1.876511506818174e-05, + "loss": 0.7053, + "step": 10711 + }, + { + "epoch": 0.18509814763616256, + "grad_norm": 0.3763602251172995, + "learning_rate": 1.876484564985511e-05, + "loss": 0.5942, + "step": 10712 + }, + { + "epoch": 0.18511542714957147, + "grad_norm": 0.634335839579713, + "learning_rate": 1.876457620407632e-05, + "loss": 0.4128, + "step": 10713 + }, + { + "epoch": 0.18513270666298037, + "grad_norm": 1.1753785346261996, + "learning_rate": 1.876430673084621e-05, + "loss": 0.4658, + "step": 10714 + }, + { + "epoch": 0.18514998617638928, + "grad_norm": 1.0781893564949632, + "learning_rate": 1.8764037230165625e-05, + "loss": 0.6716, + "step": 10715 + }, + { + "epoch": 0.1851672656897982, + "grad_norm": 0.7238854527918293, + "learning_rate": 1.8763767702035413e-05, + "loss": 0.4542, + "step": 10716 + }, + { + "epoch": 0.18518454520320707, + "grad_norm": 1.2284797909993155, + "learning_rate": 1.8763498146456407e-05, + "loss": 0.5342, + "step": 10717 + }, + { + "epoch": 0.18520182471661598, + "grad_norm": 1.5013817308847388, + "learning_rate": 1.8763228563429464e-05, + "loss": 0.5114, + "step": 10718 + }, + { + "epoch": 0.18521910423002488, + "grad_norm": 0.4249570789688032, + "learning_rate": 1.8762958952955425e-05, + "loss": 0.7518, + "step": 10719 + }, + { + "epoch": 0.1852363837434338, + "grad_norm": 0.8522373861617127, + "learning_rate": 1.8762689315035127e-05, + "loss": 0.4508, + "step": 10720 + }, + { + "epoch": 0.1852536632568427, + "grad_norm": 1.2748598541920189, + "learning_rate": 1.8762419649669423e-05, + "loss": 0.5693, + "step": 10721 + }, + { + "epoch": 0.18527094277025158, + "grad_norm": 1.0572408662768107, + "learning_rate": 1.8762149956859156e-05, + "loss": 0.4715, + "step": 10722 + }, + { + "epoch": 0.1852882222836605, + "grad_norm": 0.908545553951558, + "learning_rate": 1.876188023660517e-05, + "loss": 0.4827, + "step": 10723 + }, + { + "epoch": 0.1853055017970694, + "grad_norm": 0.8539366279891151, + "learning_rate": 1.8761610488908306e-05, + "loss": 0.3707, + "step": 10724 + }, + { + "epoch": 0.1853227813104783, + "grad_norm": 0.825127983897492, + "learning_rate": 1.8761340713769415e-05, + "loss": 0.5384, + "step": 10725 + }, + { + "epoch": 0.1853400608238872, + "grad_norm": 1.0456861223814125, + "learning_rate": 1.8761070911189337e-05, + "loss": 0.4817, + "step": 10726 + }, + { + "epoch": 0.1853573403372961, + "grad_norm": 0.8301132248254816, + "learning_rate": 1.876080108116892e-05, + "loss": 0.608, + "step": 10727 + }, + { + "epoch": 0.185374619850705, + "grad_norm": 1.2714686322978306, + "learning_rate": 1.8760531223709007e-05, + "loss": 0.6854, + "step": 10728 + }, + { + "epoch": 0.1853918993641139, + "grad_norm": 1.016982378185161, + "learning_rate": 1.8760261338810445e-05, + "loss": 0.5042, + "step": 10729 + }, + { + "epoch": 0.1854091788775228, + "grad_norm": 1.1892179961477678, + "learning_rate": 1.875999142647408e-05, + "loss": 0.5255, + "step": 10730 + }, + { + "epoch": 0.18542645839093172, + "grad_norm": 1.508584579419394, + "learning_rate": 1.8759721486700754e-05, + "loss": 0.5596, + "step": 10731 + }, + { + "epoch": 0.18544373790434063, + "grad_norm": 0.43353737810070875, + "learning_rate": 1.8759451519491315e-05, + "loss": 0.4853, + "step": 10732 + }, + { + "epoch": 0.1854610174177495, + "grad_norm": 1.066092762092257, + "learning_rate": 1.875918152484661e-05, + "loss": 0.3415, + "step": 10733 + }, + { + "epoch": 0.18547829693115842, + "grad_norm": 0.7301388128563624, + "learning_rate": 1.8758911502767483e-05, + "loss": 0.4323, + "step": 10734 + }, + { + "epoch": 0.18549557644456732, + "grad_norm": 0.353189409983984, + "learning_rate": 1.8758641453254776e-05, + "loss": 0.7681, + "step": 10735 + }, + { + "epoch": 0.18551285595797623, + "grad_norm": 1.2986971974934904, + "learning_rate": 1.8758371376309343e-05, + "loss": 0.7711, + "step": 10736 + }, + { + "epoch": 0.18553013547138514, + "grad_norm": 1.0830261273538826, + "learning_rate": 1.8758101271932022e-05, + "loss": 0.4596, + "step": 10737 + }, + { + "epoch": 0.18554741498479402, + "grad_norm": 1.0473543974789143, + "learning_rate": 1.875783114012366e-05, + "loss": 0.5925, + "step": 10738 + }, + { + "epoch": 0.18556469449820293, + "grad_norm": 1.405054078688357, + "learning_rate": 1.875756098088511e-05, + "loss": 0.664, + "step": 10739 + }, + { + "epoch": 0.18558197401161183, + "grad_norm": 0.9749532192127282, + "learning_rate": 1.875729079421721e-05, + "loss": 0.6467, + "step": 10740 + }, + { + "epoch": 0.18559925352502074, + "grad_norm": 1.1481205253173137, + "learning_rate": 1.8757020580120816e-05, + "loss": 0.7091, + "step": 10741 + }, + { + "epoch": 0.18561653303842965, + "grad_norm": 0.9040809230557137, + "learning_rate": 1.875675033859676e-05, + "loss": 0.4531, + "step": 10742 + }, + { + "epoch": 0.18563381255183853, + "grad_norm": 1.3115431919581297, + "learning_rate": 1.8756480069645903e-05, + "loss": 0.5558, + "step": 10743 + }, + { + "epoch": 0.18565109206524744, + "grad_norm": 0.8396769727702033, + "learning_rate": 1.875620977326908e-05, + "loss": 0.6441, + "step": 10744 + }, + { + "epoch": 0.18566837157865634, + "grad_norm": 1.232015314399842, + "learning_rate": 1.8755939449467145e-05, + "loss": 0.5751, + "step": 10745 + }, + { + "epoch": 0.18568565109206525, + "grad_norm": 0.7180289861334301, + "learning_rate": 1.875566909824094e-05, + "loss": 0.638, + "step": 10746 + }, + { + "epoch": 0.18570293060547416, + "grad_norm": 1.1582801515686667, + "learning_rate": 1.875539871959132e-05, + "loss": 0.7203, + "step": 10747 + }, + { + "epoch": 0.18572021011888304, + "grad_norm": 0.4089053055509718, + "learning_rate": 1.875512831351912e-05, + "loss": 0.6025, + "step": 10748 + }, + { + "epoch": 0.18573748963229195, + "grad_norm": 1.2715246115307668, + "learning_rate": 1.8754857880025194e-05, + "loss": 0.6942, + "step": 10749 + }, + { + "epoch": 0.18575476914570085, + "grad_norm": 0.7711118909631554, + "learning_rate": 1.8754587419110387e-05, + "loss": 0.592, + "step": 10750 + }, + { + "epoch": 0.18577204865910976, + "grad_norm": 0.6250387335667064, + "learning_rate": 1.875431693077555e-05, + "loss": 0.6819, + "step": 10751 + }, + { + "epoch": 0.18578932817251867, + "grad_norm": 1.1828126470610913, + "learning_rate": 1.8754046415021525e-05, + "loss": 0.3456, + "step": 10752 + }, + { + "epoch": 0.18580660768592758, + "grad_norm": 1.2183591545069326, + "learning_rate": 1.875377587184916e-05, + "loss": 0.685, + "step": 10753 + }, + { + "epoch": 0.18582388719933646, + "grad_norm": 1.0204555883486797, + "learning_rate": 1.8753505301259305e-05, + "loss": 0.6394, + "step": 10754 + }, + { + "epoch": 0.18584116671274536, + "grad_norm": 0.9328720097712389, + "learning_rate": 1.8753234703252806e-05, + "loss": 0.447, + "step": 10755 + }, + { + "epoch": 0.18585844622615427, + "grad_norm": 0.9141742424618398, + "learning_rate": 1.875296407783051e-05, + "loss": 0.433, + "step": 10756 + }, + { + "epoch": 0.18587572573956318, + "grad_norm": 0.7594312172474992, + "learning_rate": 1.8752693424993267e-05, + "loss": 0.6577, + "step": 10757 + }, + { + "epoch": 0.1858930052529721, + "grad_norm": 1.003010015596076, + "learning_rate": 1.8752422744741922e-05, + "loss": 0.5882, + "step": 10758 + }, + { + "epoch": 0.18591028476638097, + "grad_norm": 0.8894803902695004, + "learning_rate": 1.8752152037077324e-05, + "loss": 0.362, + "step": 10759 + }, + { + "epoch": 0.18592756427978988, + "grad_norm": 1.132930494289897, + "learning_rate": 1.875188130200032e-05, + "loss": 0.4274, + "step": 10760 + }, + { + "epoch": 0.18594484379319878, + "grad_norm": 0.9767738977158185, + "learning_rate": 1.875161053951176e-05, + "loss": 0.708, + "step": 10761 + }, + { + "epoch": 0.1859621233066077, + "grad_norm": 0.5542836079762538, + "learning_rate": 1.875133974961249e-05, + "loss": 0.3934, + "step": 10762 + }, + { + "epoch": 0.1859794028200166, + "grad_norm": 0.8835214717755261, + "learning_rate": 1.8751068932303358e-05, + "loss": 0.5554, + "step": 10763 + }, + { + "epoch": 0.18599668233342548, + "grad_norm": 1.2861171225922776, + "learning_rate": 1.8750798087585217e-05, + "loss": 0.5535, + "step": 10764 + }, + { + "epoch": 0.18601396184683439, + "grad_norm": 1.6117708878788854, + "learning_rate": 1.8750527215458907e-05, + "loss": 0.59, + "step": 10765 + }, + { + "epoch": 0.1860312413602433, + "grad_norm": 0.7975858255706788, + "learning_rate": 1.875025631592528e-05, + "loss": 0.6991, + "step": 10766 + }, + { + "epoch": 0.1860485208736522, + "grad_norm": 0.8799555397435379, + "learning_rate": 1.874998538898519e-05, + "loss": 0.5633, + "step": 10767 + }, + { + "epoch": 0.1860658003870611, + "grad_norm": 0.9786808369482926, + "learning_rate": 1.8749714434639476e-05, + "loss": 0.5582, + "step": 10768 + }, + { + "epoch": 0.18608307990047002, + "grad_norm": 1.0338139143044365, + "learning_rate": 1.8749443452888996e-05, + "loss": 0.6436, + "step": 10769 + }, + { + "epoch": 0.1861003594138789, + "grad_norm": 0.9018360338547945, + "learning_rate": 1.8749172443734588e-05, + "loss": 0.5659, + "step": 10770 + }, + { + "epoch": 0.1861176389272878, + "grad_norm": 1.1058375681963333, + "learning_rate": 1.8748901407177112e-05, + "loss": 0.5497, + "step": 10771 + }, + { + "epoch": 0.1861349184406967, + "grad_norm": 0.9408275198054527, + "learning_rate": 1.8748630343217412e-05, + "loss": 0.5022, + "step": 10772 + }, + { + "epoch": 0.18615219795410562, + "grad_norm": 0.6498866718583507, + "learning_rate": 1.8748359251856336e-05, + "loss": 0.4938, + "step": 10773 + }, + { + "epoch": 0.18616947746751453, + "grad_norm": 1.0608239393560215, + "learning_rate": 1.874808813309473e-05, + "loss": 0.6434, + "step": 10774 + }, + { + "epoch": 0.1861867569809234, + "grad_norm": 1.3593370863952734, + "learning_rate": 1.8747816986933453e-05, + "loss": 0.6057, + "step": 10775 + }, + { + "epoch": 0.18620403649433231, + "grad_norm": 1.369838800468863, + "learning_rate": 1.8747545813373345e-05, + "loss": 0.6136, + "step": 10776 + }, + { + "epoch": 0.18622131600774122, + "grad_norm": 1.1984138863725555, + "learning_rate": 1.874727461241526e-05, + "loss": 0.3968, + "step": 10777 + }, + { + "epoch": 0.18623859552115013, + "grad_norm": 0.9505014417875569, + "learning_rate": 1.8747003384060046e-05, + "loss": 0.6685, + "step": 10778 + }, + { + "epoch": 0.18625587503455904, + "grad_norm": 0.4494169727468484, + "learning_rate": 1.8746732128308553e-05, + "loss": 1.0105, + "step": 10779 + }, + { + "epoch": 0.18627315454796792, + "grad_norm": 1.1716211094176152, + "learning_rate": 1.874646084516163e-05, + "loss": 0.4438, + "step": 10780 + }, + { + "epoch": 0.18629043406137683, + "grad_norm": 1.066389081828966, + "learning_rate": 1.8746189534620125e-05, + "loss": 0.5424, + "step": 10781 + }, + { + "epoch": 0.18630771357478573, + "grad_norm": 0.6842704641310889, + "learning_rate": 1.874591819668489e-05, + "loss": 0.4196, + "step": 10782 + }, + { + "epoch": 0.18632499308819464, + "grad_norm": 0.8120669011261213, + "learning_rate": 1.8745646831356778e-05, + "loss": 0.615, + "step": 10783 + }, + { + "epoch": 0.18634227260160355, + "grad_norm": 1.2518103299075856, + "learning_rate": 1.8745375438636632e-05, + "loss": 0.4985, + "step": 10784 + }, + { + "epoch": 0.18635955211501243, + "grad_norm": 0.7325633129090634, + "learning_rate": 1.8745104018525307e-05, + "loss": 0.4372, + "step": 10785 + }, + { + "epoch": 0.18637683162842134, + "grad_norm": 1.0438027165166925, + "learning_rate": 1.874483257102365e-05, + "loss": 0.3936, + "step": 10786 + }, + { + "epoch": 0.18639411114183024, + "grad_norm": 1.0781553400794328, + "learning_rate": 1.8744561096132517e-05, + "loss": 0.3891, + "step": 10787 + }, + { + "epoch": 0.18641139065523915, + "grad_norm": 1.0803784300872823, + "learning_rate": 1.874428959385275e-05, + "loss": 0.4964, + "step": 10788 + }, + { + "epoch": 0.18642867016864806, + "grad_norm": 1.250090461163916, + "learning_rate": 1.8744018064185207e-05, + "loss": 0.5664, + "step": 10789 + }, + { + "epoch": 0.18644594968205697, + "grad_norm": 1.2711457538163002, + "learning_rate": 1.874374650713073e-05, + "loss": 0.6599, + "step": 10790 + }, + { + "epoch": 0.18646322919546585, + "grad_norm": 0.6259000290125702, + "learning_rate": 1.874347492269018e-05, + "loss": 0.4427, + "step": 10791 + }, + { + "epoch": 0.18648050870887475, + "grad_norm": 1.0623164617645788, + "learning_rate": 1.8743203310864397e-05, + "loss": 0.4909, + "step": 10792 + }, + { + "epoch": 0.18649778822228366, + "grad_norm": 1.0794470810258527, + "learning_rate": 1.8742931671654238e-05, + "loss": 0.7187, + "step": 10793 + }, + { + "epoch": 0.18651506773569257, + "grad_norm": 0.6564019375769368, + "learning_rate": 1.8742660005060554e-05, + "loss": 0.4361, + "step": 10794 + }, + { + "epoch": 0.18653234724910148, + "grad_norm": 1.2629761217376738, + "learning_rate": 1.8742388311084196e-05, + "loss": 0.5774, + "step": 10795 + }, + { + "epoch": 0.18654962676251036, + "grad_norm": 0.9585227957266905, + "learning_rate": 1.874211658972601e-05, + "loss": 0.4011, + "step": 10796 + }, + { + "epoch": 0.18656690627591926, + "grad_norm": 1.24024172718633, + "learning_rate": 1.874184484098685e-05, + "loss": 0.4586, + "step": 10797 + }, + { + "epoch": 0.18658418578932817, + "grad_norm": 0.41606652163621377, + "learning_rate": 1.8741573064867572e-05, + "loss": 0.7905, + "step": 10798 + }, + { + "epoch": 0.18660146530273708, + "grad_norm": 1.1944661391693299, + "learning_rate": 1.874130126136902e-05, + "loss": 0.5633, + "step": 10799 + }, + { + "epoch": 0.186618744816146, + "grad_norm": 0.9126180191367489, + "learning_rate": 1.874102943049205e-05, + "loss": 0.3028, + "step": 10800 + }, + { + "epoch": 0.18663602432955487, + "grad_norm": 1.0058733504598074, + "learning_rate": 1.874075757223751e-05, + "loss": 0.7134, + "step": 10801 + }, + { + "epoch": 0.18665330384296377, + "grad_norm": 1.257247174687733, + "learning_rate": 1.8740485686606252e-05, + "loss": 0.7637, + "step": 10802 + }, + { + "epoch": 0.18667058335637268, + "grad_norm": 1.5564443434042636, + "learning_rate": 1.8740213773599127e-05, + "loss": 0.6983, + "step": 10803 + }, + { + "epoch": 0.1866878628697816, + "grad_norm": 1.1006566395168687, + "learning_rate": 1.873994183321699e-05, + "loss": 0.618, + "step": 10804 + }, + { + "epoch": 0.1867051423831905, + "grad_norm": 0.8670871501306112, + "learning_rate": 1.8739669865460693e-05, + "loss": 0.6486, + "step": 10805 + }, + { + "epoch": 0.1867224218965994, + "grad_norm": 0.6295955041254778, + "learning_rate": 1.8739397870331084e-05, + "loss": 0.4808, + "step": 10806 + }, + { + "epoch": 0.18673970141000829, + "grad_norm": 1.3011995361367406, + "learning_rate": 1.8739125847829016e-05, + "loss": 0.7157, + "step": 10807 + }, + { + "epoch": 0.1867569809234172, + "grad_norm": 1.1728949677233778, + "learning_rate": 1.8738853797955346e-05, + "loss": 0.4884, + "step": 10808 + }, + { + "epoch": 0.1867742604368261, + "grad_norm": 0.6246475905864981, + "learning_rate": 1.8738581720710918e-05, + "loss": 0.5427, + "step": 10809 + }, + { + "epoch": 0.186791539950235, + "grad_norm": 0.8530479691190372, + "learning_rate": 1.8738309616096588e-05, + "loss": 0.596, + "step": 10810 + }, + { + "epoch": 0.18680881946364392, + "grad_norm": 0.5999687463816137, + "learning_rate": 1.873803748411321e-05, + "loss": 0.4704, + "step": 10811 + }, + { + "epoch": 0.1868260989770528, + "grad_norm": 1.051651459217579, + "learning_rate": 1.873776532476163e-05, + "loss": 0.4761, + "step": 10812 + }, + { + "epoch": 0.1868433784904617, + "grad_norm": 0.8334952532524882, + "learning_rate": 1.873749313804271e-05, + "loss": 0.6293, + "step": 10813 + }, + { + "epoch": 0.1868606580038706, + "grad_norm": 1.121807081941914, + "learning_rate": 1.8737220923957297e-05, + "loss": 0.7068, + "step": 10814 + }, + { + "epoch": 0.18687793751727952, + "grad_norm": 1.2418485811839894, + "learning_rate": 1.873694868250624e-05, + "loss": 0.7314, + "step": 10815 + }, + { + "epoch": 0.18689521703068843, + "grad_norm": 0.875540135237933, + "learning_rate": 1.87366764136904e-05, + "loss": 0.6497, + "step": 10816 + }, + { + "epoch": 0.1869124965440973, + "grad_norm": 1.0257957604750743, + "learning_rate": 1.8736404117510626e-05, + "loss": 0.9411, + "step": 10817 + }, + { + "epoch": 0.18692977605750621, + "grad_norm": 1.3199194651371042, + "learning_rate": 1.873613179396777e-05, + "loss": 0.5248, + "step": 10818 + }, + { + "epoch": 0.18694705557091512, + "grad_norm": 1.4772309761389, + "learning_rate": 1.873585944306268e-05, + "loss": 0.708, + "step": 10819 + }, + { + "epoch": 0.18696433508432403, + "grad_norm": 1.2974527021523934, + "learning_rate": 1.873558706479622e-05, + "loss": 0.7315, + "step": 10820 + }, + { + "epoch": 0.18698161459773294, + "grad_norm": 0.716989800203041, + "learning_rate": 1.8735314659169234e-05, + "loss": 0.4694, + "step": 10821 + }, + { + "epoch": 0.18699889411114182, + "grad_norm": 0.6700653455301995, + "learning_rate": 1.8735042226182582e-05, + "loss": 0.7022, + "step": 10822 + }, + { + "epoch": 0.18701617362455072, + "grad_norm": 1.9558580279598687, + "learning_rate": 1.8734769765837112e-05, + "loss": 0.7926, + "step": 10823 + }, + { + "epoch": 0.18703345313795963, + "grad_norm": 1.217220817616852, + "learning_rate": 1.873449727813368e-05, + "loss": 0.6335, + "step": 10824 + }, + { + "epoch": 0.18705073265136854, + "grad_norm": 0.3592102549069781, + "learning_rate": 1.8734224763073134e-05, + "loss": 0.5831, + "step": 10825 + }, + { + "epoch": 0.18706801216477745, + "grad_norm": 1.0786754665038982, + "learning_rate": 1.873395222065634e-05, + "loss": 0.4452, + "step": 10826 + }, + { + "epoch": 0.18708529167818636, + "grad_norm": 0.8062189341908856, + "learning_rate": 1.8733679650884138e-05, + "loss": 0.7039, + "step": 10827 + }, + { + "epoch": 0.18710257119159523, + "grad_norm": 0.8360566439960231, + "learning_rate": 1.873340705375739e-05, + "loss": 0.4692, + "step": 10828 + }, + { + "epoch": 0.18711985070500414, + "grad_norm": 0.7617691047956894, + "learning_rate": 1.8733134429276946e-05, + "loss": 0.6883, + "step": 10829 + }, + { + "epoch": 0.18713713021841305, + "grad_norm": 1.8526244101879012, + "learning_rate": 1.8732861777443663e-05, + "loss": 0.6822, + "step": 10830 + }, + { + "epoch": 0.18715440973182196, + "grad_norm": 1.4160999302491062, + "learning_rate": 1.8732589098258394e-05, + "loss": 0.7514, + "step": 10831 + }, + { + "epoch": 0.18717168924523087, + "grad_norm": 0.8492031244324737, + "learning_rate": 1.8732316391721988e-05, + "loss": 0.5614, + "step": 10832 + }, + { + "epoch": 0.18718896875863975, + "grad_norm": 0.3758056743553001, + "learning_rate": 1.873204365783531e-05, + "loss": 0.6331, + "step": 10833 + }, + { + "epoch": 0.18720624827204865, + "grad_norm": 0.8867181888785851, + "learning_rate": 1.87317708965992e-05, + "loss": 0.5744, + "step": 10834 + }, + { + "epoch": 0.18722352778545756, + "grad_norm": 1.3503411210953826, + "learning_rate": 1.8731498108014525e-05, + "loss": 0.7067, + "step": 10835 + }, + { + "epoch": 0.18724080729886647, + "grad_norm": 1.0695829761869042, + "learning_rate": 1.8731225292082134e-05, + "loss": 0.5723, + "step": 10836 + }, + { + "epoch": 0.18725808681227538, + "grad_norm": 0.9793968021312288, + "learning_rate": 1.873095244880288e-05, + "loss": 0.6171, + "step": 10837 + }, + { + "epoch": 0.18727536632568426, + "grad_norm": 0.6453415634206016, + "learning_rate": 1.8730679578177625e-05, + "loss": 0.4262, + "step": 10838 + }, + { + "epoch": 0.18729264583909316, + "grad_norm": 0.8447969939011974, + "learning_rate": 1.8730406680207212e-05, + "loss": 0.7432, + "step": 10839 + }, + { + "epoch": 0.18730992535250207, + "grad_norm": 0.8361198591396809, + "learning_rate": 1.8730133754892502e-05, + "loss": 0.3535, + "step": 10840 + }, + { + "epoch": 0.18732720486591098, + "grad_norm": 0.7139707728134337, + "learning_rate": 1.8729860802234353e-05, + "loss": 0.8367, + "step": 10841 + }, + { + "epoch": 0.1873444843793199, + "grad_norm": 1.0920560910636044, + "learning_rate": 1.8729587822233614e-05, + "loss": 0.6918, + "step": 10842 + }, + { + "epoch": 0.1873617638927288, + "grad_norm": 0.6993283539895374, + "learning_rate": 1.8729314814891142e-05, + "loss": 0.5177, + "step": 10843 + }, + { + "epoch": 0.18737904340613767, + "grad_norm": 1.5981271945541433, + "learning_rate": 1.8729041780207793e-05, + "loss": 0.6943, + "step": 10844 + }, + { + "epoch": 0.18739632291954658, + "grad_norm": 1.2908423925565882, + "learning_rate": 1.8728768718184425e-05, + "loss": 0.3995, + "step": 10845 + }, + { + "epoch": 0.1874136024329555, + "grad_norm": 1.206242217785307, + "learning_rate": 1.8728495628821886e-05, + "loss": 0.6126, + "step": 10846 + }, + { + "epoch": 0.1874308819463644, + "grad_norm": 1.0324505204594894, + "learning_rate": 1.8728222512121038e-05, + "loss": 0.6271, + "step": 10847 + }, + { + "epoch": 0.1874481614597733, + "grad_norm": 1.1001458359635847, + "learning_rate": 1.8727949368082732e-05, + "loss": 0.5222, + "step": 10848 + }, + { + "epoch": 0.18746544097318218, + "grad_norm": 1.0002561814689677, + "learning_rate": 1.8727676196707825e-05, + "loss": 0.7478, + "step": 10849 + }, + { + "epoch": 0.1874827204865911, + "grad_norm": 0.7525535531051195, + "learning_rate": 1.8727402997997175e-05, + "loss": 0.5757, + "step": 10850 + }, + { + "epoch": 0.1875, + "grad_norm": 0.426493093556987, + "learning_rate": 1.8727129771951637e-05, + "loss": 0.5975, + "step": 10851 + }, + { + "epoch": 0.1875172795134089, + "grad_norm": 0.4318800199781055, + "learning_rate": 1.8726856518572058e-05, + "loss": 0.7152, + "step": 10852 + }, + { + "epoch": 0.18753455902681782, + "grad_norm": 1.0594731137814084, + "learning_rate": 1.8726583237859304e-05, + "loss": 0.5187, + "step": 10853 + }, + { + "epoch": 0.1875518385402267, + "grad_norm": 1.103103435437888, + "learning_rate": 1.872630992981423e-05, + "loss": 0.6253, + "step": 10854 + }, + { + "epoch": 0.1875691180536356, + "grad_norm": 1.2039200505757344, + "learning_rate": 1.8726036594437692e-05, + "loss": 0.8376, + "step": 10855 + }, + { + "epoch": 0.1875863975670445, + "grad_norm": 1.2367513921879283, + "learning_rate": 1.8725763231730544e-05, + "loss": 0.789, + "step": 10856 + }, + { + "epoch": 0.18760367708045342, + "grad_norm": 1.1094457777425284, + "learning_rate": 1.872548984169364e-05, + "loss": 0.6783, + "step": 10857 + }, + { + "epoch": 0.18762095659386233, + "grad_norm": 1.077857364961103, + "learning_rate": 1.872521642432784e-05, + "loss": 0.5275, + "step": 10858 + }, + { + "epoch": 0.1876382361072712, + "grad_norm": 1.0892722373147998, + "learning_rate": 1.8724942979634e-05, + "loss": 0.5422, + "step": 10859 + }, + { + "epoch": 0.1876555156206801, + "grad_norm": 0.9008013637153905, + "learning_rate": 1.872466950761297e-05, + "loss": 0.7508, + "step": 10860 + }, + { + "epoch": 0.18767279513408902, + "grad_norm": 1.172006562070508, + "learning_rate": 1.8724396008265617e-05, + "loss": 0.4499, + "step": 10861 + }, + { + "epoch": 0.18769007464749793, + "grad_norm": 1.0135772526729996, + "learning_rate": 1.8724122481592792e-05, + "loss": 0.5586, + "step": 10862 + }, + { + "epoch": 0.18770735416090684, + "grad_norm": 1.1173383211904249, + "learning_rate": 1.8723848927595352e-05, + "loss": 0.7053, + "step": 10863 + }, + { + "epoch": 0.18772463367431574, + "grad_norm": 1.1395017111690584, + "learning_rate": 1.8723575346274153e-05, + "loss": 0.599, + "step": 10864 + }, + { + "epoch": 0.18774191318772462, + "grad_norm": 0.9429328925457736, + "learning_rate": 1.8723301737630057e-05, + "loss": 0.3477, + "step": 10865 + }, + { + "epoch": 0.18775919270113353, + "grad_norm": 0.9322394111344315, + "learning_rate": 1.8723028101663913e-05, + "loss": 0.6503, + "step": 10866 + }, + { + "epoch": 0.18777647221454244, + "grad_norm": 0.8475120341736861, + "learning_rate": 1.8722754438376583e-05, + "loss": 0.6729, + "step": 10867 + }, + { + "epoch": 0.18779375172795135, + "grad_norm": 1.1651389021813983, + "learning_rate": 1.8722480747768923e-05, + "loss": 0.5122, + "step": 10868 + }, + { + "epoch": 0.18781103124136025, + "grad_norm": 0.631678494927428, + "learning_rate": 1.872220702984179e-05, + "loss": 0.4787, + "step": 10869 + }, + { + "epoch": 0.18782831075476913, + "grad_norm": 1.1238751382128405, + "learning_rate": 1.8721933284596045e-05, + "loss": 0.7494, + "step": 10870 + }, + { + "epoch": 0.18784559026817804, + "grad_norm": 0.8291998665301012, + "learning_rate": 1.872165951203254e-05, + "loss": 0.7996, + "step": 10871 + }, + { + "epoch": 0.18786286978158695, + "grad_norm": 1.4113272336480684, + "learning_rate": 1.8721385712152135e-05, + "loss": 0.6678, + "step": 10872 + }, + { + "epoch": 0.18788014929499586, + "grad_norm": 1.0473181536237035, + "learning_rate": 1.8721111884955686e-05, + "loss": 0.8213, + "step": 10873 + }, + { + "epoch": 0.18789742880840477, + "grad_norm": 0.7950535523587527, + "learning_rate": 1.8720838030444054e-05, + "loss": 0.4627, + "step": 10874 + }, + { + "epoch": 0.18791470832181364, + "grad_norm": 1.4082491831285437, + "learning_rate": 1.8720564148618093e-05, + "loss": 0.5588, + "step": 10875 + }, + { + "epoch": 0.18793198783522255, + "grad_norm": 0.5300584303360679, + "learning_rate": 1.872029023947866e-05, + "loss": 0.5752, + "step": 10876 + }, + { + "epoch": 0.18794926734863146, + "grad_norm": 1.0723531887584086, + "learning_rate": 1.8720016303026617e-05, + "loss": 0.6663, + "step": 10877 + }, + { + "epoch": 0.18796654686204037, + "grad_norm": 0.9276845573032206, + "learning_rate": 1.871974233926282e-05, + "loss": 0.5698, + "step": 10878 + }, + { + "epoch": 0.18798382637544928, + "grad_norm": 1.0469947708345855, + "learning_rate": 1.8719468348188128e-05, + "loss": 0.488, + "step": 10879 + }, + { + "epoch": 0.18800110588885818, + "grad_norm": 0.6448631559110725, + "learning_rate": 1.87191943298034e-05, + "loss": 0.7282, + "step": 10880 + }, + { + "epoch": 0.18801838540226706, + "grad_norm": 1.2028421791813968, + "learning_rate": 1.8718920284109493e-05, + "loss": 0.6619, + "step": 10881 + }, + { + "epoch": 0.18803566491567597, + "grad_norm": 1.184050365035473, + "learning_rate": 1.871864621110726e-05, + "loss": 0.7748, + "step": 10882 + }, + { + "epoch": 0.18805294442908488, + "grad_norm": 1.1140951432223467, + "learning_rate": 1.8718372110797568e-05, + "loss": 0.7022, + "step": 10883 + }, + { + "epoch": 0.18807022394249379, + "grad_norm": 0.8673864184359702, + "learning_rate": 1.871809798318127e-05, + "loss": 0.7251, + "step": 10884 + }, + { + "epoch": 0.1880875034559027, + "grad_norm": 1.215492797897851, + "learning_rate": 1.8717823828259233e-05, + "loss": 0.6199, + "step": 10885 + }, + { + "epoch": 0.18810478296931157, + "grad_norm": 1.0532356748001175, + "learning_rate": 1.87175496460323e-05, + "loss": 0.6003, + "step": 10886 + }, + { + "epoch": 0.18812206248272048, + "grad_norm": 0.6764136582336538, + "learning_rate": 1.8717275436501346e-05, + "loss": 0.4681, + "step": 10887 + }, + { + "epoch": 0.1881393419961294, + "grad_norm": 0.9394922861785772, + "learning_rate": 1.871700119966722e-05, + "loss": 0.5281, + "step": 10888 + }, + { + "epoch": 0.1881566215095383, + "grad_norm": 0.6892487613796751, + "learning_rate": 1.8716726935530785e-05, + "loss": 0.4632, + "step": 10889 + }, + { + "epoch": 0.1881739010229472, + "grad_norm": 1.0138046578474358, + "learning_rate": 1.8716452644092898e-05, + "loss": 0.5828, + "step": 10890 + }, + { + "epoch": 0.18819118053635608, + "grad_norm": 0.7652436196199156, + "learning_rate": 1.8716178325354417e-05, + "loss": 0.504, + "step": 10891 + }, + { + "epoch": 0.188208460049765, + "grad_norm": 0.8959479038405047, + "learning_rate": 1.8715903979316207e-05, + "loss": 0.4098, + "step": 10892 + }, + { + "epoch": 0.1882257395631739, + "grad_norm": 1.4421324267000006, + "learning_rate": 1.871562960597912e-05, + "loss": 0.6012, + "step": 10893 + }, + { + "epoch": 0.1882430190765828, + "grad_norm": 0.626099969460801, + "learning_rate": 1.871535520534402e-05, + "loss": 0.6719, + "step": 10894 + }, + { + "epoch": 0.18826029858999171, + "grad_norm": 1.2587018404121697, + "learning_rate": 1.8715080777411764e-05, + "loss": 0.6481, + "step": 10895 + }, + { + "epoch": 0.1882775781034006, + "grad_norm": 0.8514339175646252, + "learning_rate": 1.8714806322183214e-05, + "loss": 0.5462, + "step": 10896 + }, + { + "epoch": 0.1882948576168095, + "grad_norm": 0.605434520834835, + "learning_rate": 1.871453183965923e-05, + "loss": 0.7926, + "step": 10897 + }, + { + "epoch": 0.1883121371302184, + "grad_norm": 1.1722050762719345, + "learning_rate": 1.8714257329840664e-05, + "loss": 0.7279, + "step": 10898 + }, + { + "epoch": 0.18832941664362732, + "grad_norm": 1.3785175533990872, + "learning_rate": 1.871398279272839e-05, + "loss": 0.7013, + "step": 10899 + }, + { + "epoch": 0.18834669615703623, + "grad_norm": 0.6720742780485373, + "learning_rate": 1.8713708228323252e-05, + "loss": 0.4498, + "step": 10900 + }, + { + "epoch": 0.18836397567044513, + "grad_norm": 1.7162834479266096, + "learning_rate": 1.8713433636626124e-05, + "loss": 0.4981, + "step": 10901 + }, + { + "epoch": 0.188381255183854, + "grad_norm": 1.2918824057036393, + "learning_rate": 1.8713159017637855e-05, + "loss": 0.4632, + "step": 10902 + }, + { + "epoch": 0.18839853469726292, + "grad_norm": 1.418252623838427, + "learning_rate": 1.8712884371359313e-05, + "loss": 0.7599, + "step": 10903 + }, + { + "epoch": 0.18841581421067183, + "grad_norm": 1.2881618632872827, + "learning_rate": 1.871260969779135e-05, + "loss": 0.7087, + "step": 10904 + }, + { + "epoch": 0.18843309372408074, + "grad_norm": 0.9374439306638511, + "learning_rate": 1.8712334996934838e-05, + "loss": 0.3606, + "step": 10905 + }, + { + "epoch": 0.18845037323748964, + "grad_norm": 1.3711772252943615, + "learning_rate": 1.8712060268790626e-05, + "loss": 0.8181, + "step": 10906 + }, + { + "epoch": 0.18846765275089852, + "grad_norm": 1.1873419326184405, + "learning_rate": 1.8711785513359583e-05, + "loss": 0.875, + "step": 10907 + }, + { + "epoch": 0.18848493226430743, + "grad_norm": 0.9079194268709123, + "learning_rate": 1.8711510730642563e-05, + "loss": 0.6153, + "step": 10908 + }, + { + "epoch": 0.18850221177771634, + "grad_norm": 0.7508010417474245, + "learning_rate": 1.871123592064043e-05, + "loss": 0.6248, + "step": 10909 + }, + { + "epoch": 0.18851949129112525, + "grad_norm": 0.8039619896436118, + "learning_rate": 1.8710961083354043e-05, + "loss": 0.6433, + "step": 10910 + }, + { + "epoch": 0.18853677080453415, + "grad_norm": 0.5667054781177641, + "learning_rate": 1.8710686218784265e-05, + "loss": 0.6622, + "step": 10911 + }, + { + "epoch": 0.18855405031794303, + "grad_norm": 1.120862750174819, + "learning_rate": 1.8710411326931956e-05, + "loss": 0.636, + "step": 10912 + }, + { + "epoch": 0.18857132983135194, + "grad_norm": 1.095039904502282, + "learning_rate": 1.8710136407797976e-05, + "loss": 0.3522, + "step": 10913 + }, + { + "epoch": 0.18858860934476085, + "grad_norm": 1.6978247962295026, + "learning_rate": 1.870986146138319e-05, + "loss": 0.6507, + "step": 10914 + }, + { + "epoch": 0.18860588885816976, + "grad_norm": 1.0637863350695362, + "learning_rate": 1.8709586487688448e-05, + "loss": 0.6448, + "step": 10915 + }, + { + "epoch": 0.18862316837157866, + "grad_norm": 1.496198882268548, + "learning_rate": 1.8709311486714626e-05, + "loss": 0.6796, + "step": 10916 + }, + { + "epoch": 0.18864044788498757, + "grad_norm": 0.970235491788182, + "learning_rate": 1.8709036458462578e-05, + "loss": 0.7759, + "step": 10917 + }, + { + "epoch": 0.18865772739839645, + "grad_norm": 1.330935348386325, + "learning_rate": 1.8708761402933163e-05, + "loss": 0.5713, + "step": 10918 + }, + { + "epoch": 0.18867500691180536, + "grad_norm": 1.4518609654557448, + "learning_rate": 1.8708486320127246e-05, + "loss": 0.658, + "step": 10919 + }, + { + "epoch": 0.18869228642521427, + "grad_norm": 1.3344826146299944, + "learning_rate": 1.870821121004569e-05, + "loss": 0.5602, + "step": 10920 + }, + { + "epoch": 0.18870956593862317, + "grad_norm": 0.8552768113442838, + "learning_rate": 1.8707936072689355e-05, + "loss": 0.7575, + "step": 10921 + }, + { + "epoch": 0.18872684545203208, + "grad_norm": 1.2159687647574087, + "learning_rate": 1.87076609080591e-05, + "loss": 0.5425, + "step": 10922 + }, + { + "epoch": 0.18874412496544096, + "grad_norm": 1.1420569928154074, + "learning_rate": 1.870738571615579e-05, + "loss": 0.6021, + "step": 10923 + }, + { + "epoch": 0.18876140447884987, + "grad_norm": 0.7588749421199206, + "learning_rate": 1.8707110496980288e-05, + "loss": 0.8333, + "step": 10924 + }, + { + "epoch": 0.18877868399225878, + "grad_norm": 2.31999898889693, + "learning_rate": 1.870683525053345e-05, + "loss": 0.8145, + "step": 10925 + }, + { + "epoch": 0.18879596350566769, + "grad_norm": 1.0338972061380167, + "learning_rate": 1.8706559976816148e-05, + "loss": 0.5205, + "step": 10926 + }, + { + "epoch": 0.1888132430190766, + "grad_norm": 1.0692070250424976, + "learning_rate": 1.8706284675829235e-05, + "loss": 0.6404, + "step": 10927 + }, + { + "epoch": 0.18883052253248547, + "grad_norm": 0.9151410227429977, + "learning_rate": 1.8706009347573578e-05, + "loss": 0.7052, + "step": 10928 + }, + { + "epoch": 0.18884780204589438, + "grad_norm": 0.8665297649144023, + "learning_rate": 1.8705733992050036e-05, + "loss": 0.5114, + "step": 10929 + }, + { + "epoch": 0.1888650815593033, + "grad_norm": 1.0420305327277675, + "learning_rate": 1.8705458609259473e-05, + "loss": 0.507, + "step": 10930 + }, + { + "epoch": 0.1888823610727122, + "grad_norm": 2.425494632066467, + "learning_rate": 1.8705183199202757e-05, + "loss": 0.4926, + "step": 10931 + }, + { + "epoch": 0.1888996405861211, + "grad_norm": 0.6084370302154726, + "learning_rate": 1.8704907761880746e-05, + "loss": 0.3888, + "step": 10932 + }, + { + "epoch": 0.18891692009952998, + "grad_norm": 0.9669922116046008, + "learning_rate": 1.8704632297294295e-05, + "loss": 0.5678, + "step": 10933 + }, + { + "epoch": 0.1889341996129389, + "grad_norm": 1.243852800341985, + "learning_rate": 1.8704356805444283e-05, + "loss": 0.521, + "step": 10934 + }, + { + "epoch": 0.1889514791263478, + "grad_norm": 1.1998954842050498, + "learning_rate": 1.8704081286331557e-05, + "loss": 0.7136, + "step": 10935 + }, + { + "epoch": 0.1889687586397567, + "grad_norm": 1.1599900455167516, + "learning_rate": 1.8703805739956987e-05, + "loss": 0.6735, + "step": 10936 + }, + { + "epoch": 0.18898603815316561, + "grad_norm": 1.2718802704584078, + "learning_rate": 1.8703530166321438e-05, + "loss": 0.541, + "step": 10937 + }, + { + "epoch": 0.18900331766657452, + "grad_norm": 0.8814608838745601, + "learning_rate": 1.8703254565425773e-05, + "loss": 0.4925, + "step": 10938 + }, + { + "epoch": 0.1890205971799834, + "grad_norm": 0.8942084320031725, + "learning_rate": 1.8702978937270854e-05, + "loss": 0.374, + "step": 10939 + }, + { + "epoch": 0.1890378766933923, + "grad_norm": 1.1471543590975481, + "learning_rate": 1.870270328185754e-05, + "loss": 0.5928, + "step": 10940 + }, + { + "epoch": 0.18905515620680122, + "grad_norm": 1.1293064805856183, + "learning_rate": 1.8702427599186695e-05, + "loss": 0.451, + "step": 10941 + }, + { + "epoch": 0.18907243572021012, + "grad_norm": 0.7865305170157404, + "learning_rate": 1.870215188925919e-05, + "loss": 0.6953, + "step": 10942 + }, + { + "epoch": 0.18908971523361903, + "grad_norm": 1.187676827289656, + "learning_rate": 1.8701876152075888e-05, + "loss": 0.7364, + "step": 10943 + }, + { + "epoch": 0.1891069947470279, + "grad_norm": 0.8263953756870724, + "learning_rate": 1.8701600387637643e-05, + "loss": 0.5564, + "step": 10944 + }, + { + "epoch": 0.18912427426043682, + "grad_norm": 1.1637497299325925, + "learning_rate": 1.8701324595945323e-05, + "loss": 0.4985, + "step": 10945 + }, + { + "epoch": 0.18914155377384573, + "grad_norm": 0.7914008532326006, + "learning_rate": 1.8701048776999796e-05, + "loss": 0.4131, + "step": 10946 + }, + { + "epoch": 0.18915883328725464, + "grad_norm": 1.0902546716280532, + "learning_rate": 1.870077293080192e-05, + "loss": 0.4923, + "step": 10947 + }, + { + "epoch": 0.18917611280066354, + "grad_norm": 0.9341384157400024, + "learning_rate": 1.8700497057352564e-05, + "loss": 0.7033, + "step": 10948 + }, + { + "epoch": 0.18919339231407242, + "grad_norm": 1.0209972391031317, + "learning_rate": 1.8700221156652593e-05, + "loss": 0.6308, + "step": 10949 + }, + { + "epoch": 0.18921067182748133, + "grad_norm": 1.4484294374846505, + "learning_rate": 1.8699945228702863e-05, + "loss": 0.5593, + "step": 10950 + }, + { + "epoch": 0.18922795134089024, + "grad_norm": 1.1551437976495373, + "learning_rate": 1.8699669273504247e-05, + "loss": 0.6789, + "step": 10951 + }, + { + "epoch": 0.18924523085429915, + "grad_norm": 1.0186034610930041, + "learning_rate": 1.8699393291057607e-05, + "loss": 0.4578, + "step": 10952 + }, + { + "epoch": 0.18926251036770805, + "grad_norm": 1.0872481723349292, + "learning_rate": 1.86991172813638e-05, + "loss": 0.7625, + "step": 10953 + }, + { + "epoch": 0.18927978988111696, + "grad_norm": 1.0527858059124682, + "learning_rate": 1.8698841244423703e-05, + "loss": 0.4855, + "step": 10954 + }, + { + "epoch": 0.18929706939452584, + "grad_norm": 0.6364766714355411, + "learning_rate": 1.8698565180238167e-05, + "loss": 0.6127, + "step": 10955 + }, + { + "epoch": 0.18931434890793475, + "grad_norm": 0.3644060830427456, + "learning_rate": 1.8698289088808067e-05, + "loss": 0.6676, + "step": 10956 + }, + { + "epoch": 0.18933162842134366, + "grad_norm": 1.3475293459069098, + "learning_rate": 1.8698012970134267e-05, + "loss": 0.7496, + "step": 10957 + }, + { + "epoch": 0.18934890793475256, + "grad_norm": 1.7693464455162455, + "learning_rate": 1.869773682421763e-05, + "loss": 0.5921, + "step": 10958 + }, + { + "epoch": 0.18936618744816147, + "grad_norm": 0.943365446667999, + "learning_rate": 1.869746065105902e-05, + "loss": 0.6049, + "step": 10959 + }, + { + "epoch": 0.18938346696157035, + "grad_norm": 1.3505116295430186, + "learning_rate": 1.86971844506593e-05, + "loss": 0.9067, + "step": 10960 + }, + { + "epoch": 0.18940074647497926, + "grad_norm": 1.3205383331072058, + "learning_rate": 1.8696908223019336e-05, + "loss": 0.745, + "step": 10961 + }, + { + "epoch": 0.18941802598838817, + "grad_norm": 1.1331758492953667, + "learning_rate": 1.8696631968139995e-05, + "loss": 0.6559, + "step": 10962 + }, + { + "epoch": 0.18943530550179707, + "grad_norm": 0.8634061553190442, + "learning_rate": 1.8696355686022145e-05, + "loss": 0.4642, + "step": 10963 + }, + { + "epoch": 0.18945258501520598, + "grad_norm": 1.2844274432175244, + "learning_rate": 1.8696079376666646e-05, + "loss": 0.4779, + "step": 10964 + }, + { + "epoch": 0.18946986452861486, + "grad_norm": 0.7192607542033779, + "learning_rate": 1.869580304007436e-05, + "loss": 0.402, + "step": 10965 + }, + { + "epoch": 0.18948714404202377, + "grad_norm": 0.9665061722659402, + "learning_rate": 1.8695526676246167e-05, + "loss": 0.4954, + "step": 10966 + }, + { + "epoch": 0.18950442355543268, + "grad_norm": 1.4416591943826451, + "learning_rate": 1.8695250285182922e-05, + "loss": 0.791, + "step": 10967 + }, + { + "epoch": 0.18952170306884158, + "grad_norm": 1.0477448317089666, + "learning_rate": 1.8694973866885488e-05, + "loss": 0.5404, + "step": 10968 + }, + { + "epoch": 0.1895389825822505, + "grad_norm": 0.8857612012197444, + "learning_rate": 1.8694697421354738e-05, + "loss": 0.4438, + "step": 10969 + }, + { + "epoch": 0.18955626209565937, + "grad_norm": 1.3666953098705028, + "learning_rate": 1.8694420948591537e-05, + "loss": 0.5874, + "step": 10970 + }, + { + "epoch": 0.18957354160906828, + "grad_norm": 0.9075216900426252, + "learning_rate": 1.8694144448596745e-05, + "loss": 0.6079, + "step": 10971 + }, + { + "epoch": 0.1895908211224772, + "grad_norm": 1.0562246333677772, + "learning_rate": 1.8693867921371232e-05, + "loss": 0.5467, + "step": 10972 + }, + { + "epoch": 0.1896081006358861, + "grad_norm": 1.0544126758061214, + "learning_rate": 1.8693591366915867e-05, + "loss": 0.6665, + "step": 10973 + }, + { + "epoch": 0.189625380149295, + "grad_norm": 0.9198081302633555, + "learning_rate": 1.869331478523151e-05, + "loss": 0.6633, + "step": 10974 + }, + { + "epoch": 0.1896426596627039, + "grad_norm": 1.127709483225516, + "learning_rate": 1.869303817631903e-05, + "loss": 0.2655, + "step": 10975 + }, + { + "epoch": 0.1896599391761128, + "grad_norm": 0.8329692901589661, + "learning_rate": 1.86927615401793e-05, + "loss": 0.5012, + "step": 10976 + }, + { + "epoch": 0.1896772186895217, + "grad_norm": 1.1013765167004623, + "learning_rate": 1.8692484876813176e-05, + "loss": 0.5821, + "step": 10977 + }, + { + "epoch": 0.1896944982029306, + "grad_norm": 1.2348920377654717, + "learning_rate": 1.8692208186221527e-05, + "loss": 0.5554, + "step": 10978 + }, + { + "epoch": 0.1897117777163395, + "grad_norm": 0.9798820844120695, + "learning_rate": 1.8691931468405223e-05, + "loss": 0.4048, + "step": 10979 + }, + { + "epoch": 0.18972905722974842, + "grad_norm": 1.0545866441057277, + "learning_rate": 1.8691654723365127e-05, + "loss": 0.6018, + "step": 10980 + }, + { + "epoch": 0.1897463367431573, + "grad_norm": 1.411445527297226, + "learning_rate": 1.869137795110211e-05, + "loss": 0.5691, + "step": 10981 + }, + { + "epoch": 0.1897636162565662, + "grad_norm": 1.6085316958622669, + "learning_rate": 1.8691101151617038e-05, + "loss": 0.6766, + "step": 10982 + }, + { + "epoch": 0.18978089576997512, + "grad_norm": 1.1451513946628349, + "learning_rate": 1.8690824324910776e-05, + "loss": 0.7428, + "step": 10983 + }, + { + "epoch": 0.18979817528338402, + "grad_norm": 1.0477851517591006, + "learning_rate": 1.8690547470984192e-05, + "loss": 0.5952, + "step": 10984 + }, + { + "epoch": 0.18981545479679293, + "grad_norm": 1.0508366535177642, + "learning_rate": 1.869027058983815e-05, + "loss": 0.5157, + "step": 10985 + }, + { + "epoch": 0.1898327343102018, + "grad_norm": 0.3939084475081123, + "learning_rate": 1.868999368147352e-05, + "loss": 0.6148, + "step": 10986 + }, + { + "epoch": 0.18985001382361072, + "grad_norm": 0.605132136427716, + "learning_rate": 1.8689716745891174e-05, + "loss": 0.506, + "step": 10987 + }, + { + "epoch": 0.18986729333701963, + "grad_norm": 0.7721755983591242, + "learning_rate": 1.8689439783091973e-05, + "loss": 0.5845, + "step": 10988 + }, + { + "epoch": 0.18988457285042853, + "grad_norm": 0.8129859343399164, + "learning_rate": 1.8689162793076782e-05, + "loss": 0.5918, + "step": 10989 + }, + { + "epoch": 0.18990185236383744, + "grad_norm": 1.0223020678764143, + "learning_rate": 1.8688885775846478e-05, + "loss": 0.4825, + "step": 10990 + }, + { + "epoch": 0.18991913187724635, + "grad_norm": 0.8766819910209572, + "learning_rate": 1.868860873140192e-05, + "loss": 0.8024, + "step": 10991 + }, + { + "epoch": 0.18993641139065523, + "grad_norm": 1.154584098364537, + "learning_rate": 1.8688331659743978e-05, + "loss": 0.6576, + "step": 10992 + }, + { + "epoch": 0.18995369090406414, + "grad_norm": 1.3005209071404942, + "learning_rate": 1.8688054560873524e-05, + "loss": 0.4457, + "step": 10993 + }, + { + "epoch": 0.18997097041747305, + "grad_norm": 1.148042958605852, + "learning_rate": 1.8687777434791425e-05, + "loss": 0.6143, + "step": 10994 + }, + { + "epoch": 0.18998824993088195, + "grad_norm": 1.0598395121681996, + "learning_rate": 1.8687500281498546e-05, + "loss": 0.7398, + "step": 10995 + }, + { + "epoch": 0.19000552944429086, + "grad_norm": 1.0880780403762174, + "learning_rate": 1.868722310099575e-05, + "loss": 0.7662, + "step": 10996 + }, + { + "epoch": 0.19002280895769974, + "grad_norm": 0.9508585714903905, + "learning_rate": 1.8686945893283916e-05, + "loss": 0.5505, + "step": 10997 + }, + { + "epoch": 0.19004008847110865, + "grad_norm": 1.12752829306331, + "learning_rate": 1.8686668658363908e-05, + "loss": 0.5127, + "step": 10998 + }, + { + "epoch": 0.19005736798451756, + "grad_norm": 1.4340155915460384, + "learning_rate": 1.868639139623659e-05, + "loss": 0.6964, + "step": 10999 + }, + { + "epoch": 0.19007464749792646, + "grad_norm": 1.2771830097407926, + "learning_rate": 1.8686114106902838e-05, + "loss": 0.7465, + "step": 11000 + }, + { + "epoch": 0.19009192701133537, + "grad_norm": 0.9782312545448169, + "learning_rate": 1.8685836790363514e-05, + "loss": 0.4558, + "step": 11001 + }, + { + "epoch": 0.19010920652474425, + "grad_norm": 0.8150630120174852, + "learning_rate": 1.868555944661949e-05, + "loss": 0.418, + "step": 11002 + }, + { + "epoch": 0.19012648603815316, + "grad_norm": 0.8745635540279497, + "learning_rate": 1.8685282075671633e-05, + "loss": 0.5686, + "step": 11003 + }, + { + "epoch": 0.19014376555156207, + "grad_norm": 0.7079870360934429, + "learning_rate": 1.8685004677520813e-05, + "loss": 0.5293, + "step": 11004 + }, + { + "epoch": 0.19016104506497097, + "grad_norm": 0.7110019368347081, + "learning_rate": 1.8684727252167897e-05, + "loss": 0.5344, + "step": 11005 + }, + { + "epoch": 0.19017832457837988, + "grad_norm": 0.8898339605582494, + "learning_rate": 1.8684449799613757e-05, + "loss": 0.5535, + "step": 11006 + }, + { + "epoch": 0.19019560409178876, + "grad_norm": 1.37753646721033, + "learning_rate": 1.8684172319859258e-05, + "loss": 0.5867, + "step": 11007 + }, + { + "epoch": 0.19021288360519767, + "grad_norm": 0.981118317408321, + "learning_rate": 1.8683894812905278e-05, + "loss": 0.5957, + "step": 11008 + }, + { + "epoch": 0.19023016311860658, + "grad_norm": 0.9792908180408139, + "learning_rate": 1.868361727875267e-05, + "loss": 0.623, + "step": 11009 + }, + { + "epoch": 0.19024744263201548, + "grad_norm": 0.8341873131609685, + "learning_rate": 1.8683339717402318e-05, + "loss": 0.4139, + "step": 11010 + }, + { + "epoch": 0.1902647221454244, + "grad_norm": 0.5899481896693042, + "learning_rate": 1.8683062128855084e-05, + "loss": 0.3465, + "step": 11011 + }, + { + "epoch": 0.1902820016588333, + "grad_norm": 1.391115034147478, + "learning_rate": 1.8682784513111844e-05, + "loss": 0.7991, + "step": 11012 + }, + { + "epoch": 0.19029928117224218, + "grad_norm": 0.5723657358373856, + "learning_rate": 1.868250687017346e-05, + "loss": 0.4291, + "step": 11013 + }, + { + "epoch": 0.1903165606856511, + "grad_norm": 0.9188152858211157, + "learning_rate": 1.8682229200040805e-05, + "loss": 0.4848, + "step": 11014 + }, + { + "epoch": 0.19033384019906, + "grad_norm": 1.2612045564368193, + "learning_rate": 1.8681951502714746e-05, + "loss": 0.61, + "step": 11015 + }, + { + "epoch": 0.1903511197124689, + "grad_norm": 0.9272670282271259, + "learning_rate": 1.8681673778196156e-05, + "loss": 0.6352, + "step": 11016 + }, + { + "epoch": 0.1903683992258778, + "grad_norm": 0.9588455652545188, + "learning_rate": 1.8681396026485907e-05, + "loss": 0.847, + "step": 11017 + }, + { + "epoch": 0.1903856787392867, + "grad_norm": 0.9066644438666609, + "learning_rate": 1.8681118247584863e-05, + "loss": 0.593, + "step": 11018 + }, + { + "epoch": 0.1904029582526956, + "grad_norm": 0.874466894874248, + "learning_rate": 1.8680840441493896e-05, + "loss": 0.6524, + "step": 11019 + }, + { + "epoch": 0.1904202377661045, + "grad_norm": 0.9507559406632977, + "learning_rate": 1.868056260821388e-05, + "loss": 0.8948, + "step": 11020 + }, + { + "epoch": 0.1904375172795134, + "grad_norm": 0.7907073743151708, + "learning_rate": 1.868028474774568e-05, + "loss": 0.4063, + "step": 11021 + }, + { + "epoch": 0.19045479679292232, + "grad_norm": 1.538371190955015, + "learning_rate": 1.8680006860090167e-05, + "loss": 0.6038, + "step": 11022 + }, + { + "epoch": 0.1904720763063312, + "grad_norm": 0.990972779093139, + "learning_rate": 1.867972894524822e-05, + "loss": 0.703, + "step": 11023 + }, + { + "epoch": 0.1904893558197401, + "grad_norm": 1.1221594347587094, + "learning_rate": 1.8679451003220696e-05, + "loss": 0.4717, + "step": 11024 + }, + { + "epoch": 0.19050663533314902, + "grad_norm": 0.3848368295086006, + "learning_rate": 1.867917303400847e-05, + "loss": 0.5726, + "step": 11025 + }, + { + "epoch": 0.19052391484655792, + "grad_norm": 0.8501596848288763, + "learning_rate": 1.867889503761242e-05, + "loss": 0.5282, + "step": 11026 + }, + { + "epoch": 0.19054119435996683, + "grad_norm": 0.4005622123370333, + "learning_rate": 1.867861701403341e-05, + "loss": 0.7208, + "step": 11027 + }, + { + "epoch": 0.19055847387337574, + "grad_norm": 1.5445648048896305, + "learning_rate": 1.867833896327231e-05, + "loss": 0.9071, + "step": 11028 + }, + { + "epoch": 0.19057575338678462, + "grad_norm": 1.3731922483095342, + "learning_rate": 1.8678060885329994e-05, + "loss": 0.6399, + "step": 11029 + }, + { + "epoch": 0.19059303290019353, + "grad_norm": 1.1384816264112965, + "learning_rate": 1.8677782780207333e-05, + "loss": 0.6597, + "step": 11030 + }, + { + "epoch": 0.19061031241360243, + "grad_norm": 0.7379137610433764, + "learning_rate": 1.8677504647905194e-05, + "loss": 0.445, + "step": 11031 + }, + { + "epoch": 0.19062759192701134, + "grad_norm": 1.1518561833753074, + "learning_rate": 1.867722648842445e-05, + "loss": 0.7199, + "step": 11032 + }, + { + "epoch": 0.19064487144042025, + "grad_norm": 0.8701070108481637, + "learning_rate": 1.867694830176598e-05, + "loss": 0.6372, + "step": 11033 + }, + { + "epoch": 0.19066215095382913, + "grad_norm": 0.9011339662142364, + "learning_rate": 1.867667008793064e-05, + "loss": 0.5214, + "step": 11034 + }, + { + "epoch": 0.19067943046723804, + "grad_norm": 1.2222268388384125, + "learning_rate": 1.8676391846919318e-05, + "loss": 0.6802, + "step": 11035 + }, + { + "epoch": 0.19069670998064694, + "grad_norm": 0.996909626366523, + "learning_rate": 1.867611357873287e-05, + "loss": 0.5801, + "step": 11036 + }, + { + "epoch": 0.19071398949405585, + "grad_norm": 0.8563572874411266, + "learning_rate": 1.867583528337218e-05, + "loss": 0.5968, + "step": 11037 + }, + { + "epoch": 0.19073126900746476, + "grad_norm": 0.7567441123508319, + "learning_rate": 1.867555696083811e-05, + "loss": 0.5746, + "step": 11038 + }, + { + "epoch": 0.19074854852087364, + "grad_norm": 1.315041978769046, + "learning_rate": 1.867527861113154e-05, + "loss": 0.5769, + "step": 11039 + }, + { + "epoch": 0.19076582803428255, + "grad_norm": 0.9619841157292515, + "learning_rate": 1.867500023425334e-05, + "loss": 0.7013, + "step": 11040 + }, + { + "epoch": 0.19078310754769146, + "grad_norm": 1.6410016862794004, + "learning_rate": 1.8674721830204375e-05, + "loss": 0.4592, + "step": 11041 + }, + { + "epoch": 0.19080038706110036, + "grad_norm": 1.309848003810354, + "learning_rate": 1.8674443398985525e-05, + "loss": 0.5509, + "step": 11042 + }, + { + "epoch": 0.19081766657450927, + "grad_norm": 1.3151115371468671, + "learning_rate": 1.8674164940597656e-05, + "loss": 0.5831, + "step": 11043 + }, + { + "epoch": 0.19083494608791818, + "grad_norm": 0.7644232175201108, + "learning_rate": 1.8673886455041647e-05, + "loss": 0.3649, + "step": 11044 + }, + { + "epoch": 0.19085222560132706, + "grad_norm": 1.1225011528930402, + "learning_rate": 1.8673607942318366e-05, + "loss": 0.5009, + "step": 11045 + }, + { + "epoch": 0.19086950511473597, + "grad_norm": 0.8699490022384313, + "learning_rate": 1.8673329402428683e-05, + "loss": 0.6069, + "step": 11046 + }, + { + "epoch": 0.19088678462814487, + "grad_norm": 1.7348304247018913, + "learning_rate": 1.8673050835373473e-05, + "loss": 0.8013, + "step": 11047 + }, + { + "epoch": 0.19090406414155378, + "grad_norm": 2.8184260105950423, + "learning_rate": 1.8672772241153613e-05, + "loss": 0.4878, + "step": 11048 + }, + { + "epoch": 0.1909213436549627, + "grad_norm": 0.9143973862818665, + "learning_rate": 1.8672493619769968e-05, + "loss": 0.7411, + "step": 11049 + }, + { + "epoch": 0.19093862316837157, + "grad_norm": 0.9123005537129443, + "learning_rate": 1.8672214971223416e-05, + "loss": 0.632, + "step": 11050 + }, + { + "epoch": 0.19095590268178048, + "grad_norm": 0.7949037569970941, + "learning_rate": 1.8671936295514825e-05, + "loss": 0.4586, + "step": 11051 + }, + { + "epoch": 0.19097318219518938, + "grad_norm": 1.390602510246046, + "learning_rate": 1.867165759264507e-05, + "loss": 0.729, + "step": 11052 + }, + { + "epoch": 0.1909904617085983, + "grad_norm": 0.6617083124907365, + "learning_rate": 1.8671378862615025e-05, + "loss": 0.5857, + "step": 11053 + }, + { + "epoch": 0.1910077412220072, + "grad_norm": 0.8351376870725766, + "learning_rate": 1.8671100105425564e-05, + "loss": 0.5363, + "step": 11054 + }, + { + "epoch": 0.19102502073541608, + "grad_norm": 0.4783272626678557, + "learning_rate": 1.8670821321077557e-05, + "loss": 0.8694, + "step": 11055 + }, + { + "epoch": 0.191042300248825, + "grad_norm": 1.065851779658054, + "learning_rate": 1.8670542509571882e-05, + "loss": 0.4983, + "step": 11056 + }, + { + "epoch": 0.1910595797622339, + "grad_norm": 1.0855410877128504, + "learning_rate": 1.8670263670909405e-05, + "loss": 0.7605, + "step": 11057 + }, + { + "epoch": 0.1910768592756428, + "grad_norm": 0.4690244051483713, + "learning_rate": 1.8669984805091005e-05, + "loss": 0.714, + "step": 11058 + }, + { + "epoch": 0.1910941387890517, + "grad_norm": 0.3954447579898164, + "learning_rate": 1.8669705912117555e-05, + "loss": 0.8015, + "step": 11059 + }, + { + "epoch": 0.1911114183024606, + "grad_norm": 0.66947553064533, + "learning_rate": 1.8669426991989926e-05, + "loss": 0.6157, + "step": 11060 + }, + { + "epoch": 0.1911286978158695, + "grad_norm": 0.8139427003920853, + "learning_rate": 1.8669148044708993e-05, + "loss": 0.5744, + "step": 11061 + }, + { + "epoch": 0.1911459773292784, + "grad_norm": 0.8186652636355346, + "learning_rate": 1.8668869070275625e-05, + "loss": 0.4427, + "step": 11062 + }, + { + "epoch": 0.1911632568426873, + "grad_norm": 0.7481985374638603, + "learning_rate": 1.8668590068690705e-05, + "loss": 0.5123, + "step": 11063 + }, + { + "epoch": 0.19118053635609622, + "grad_norm": 0.730533926750656, + "learning_rate": 1.8668311039955102e-05, + "loss": 0.5259, + "step": 11064 + }, + { + "epoch": 0.19119781586950513, + "grad_norm": 1.3850809635010612, + "learning_rate": 1.866803198406969e-05, + "loss": 0.6706, + "step": 11065 + }, + { + "epoch": 0.191215095382914, + "grad_norm": 1.3334732671305705, + "learning_rate": 1.8667752901035343e-05, + "loss": 0.7446, + "step": 11066 + }, + { + "epoch": 0.19123237489632292, + "grad_norm": 0.9775056989315952, + "learning_rate": 1.8667473790852935e-05, + "loss": 0.6227, + "step": 11067 + }, + { + "epoch": 0.19124965440973182, + "grad_norm": 1.3916810436094043, + "learning_rate": 1.866719465352334e-05, + "loss": 0.5616, + "step": 11068 + }, + { + "epoch": 0.19126693392314073, + "grad_norm": 0.45561425492998503, + "learning_rate": 1.8666915489047432e-05, + "loss": 0.6404, + "step": 11069 + }, + { + "epoch": 0.19128421343654964, + "grad_norm": 1.1933084651258612, + "learning_rate": 1.866663629742609e-05, + "loss": 0.5854, + "step": 11070 + }, + { + "epoch": 0.19130149294995852, + "grad_norm": 1.07429272999934, + "learning_rate": 1.866635707866018e-05, + "loss": 0.4848, + "step": 11071 + }, + { + "epoch": 0.19131877246336743, + "grad_norm": 1.1731122049386395, + "learning_rate": 1.866607783275058e-05, + "loss": 0.7137, + "step": 11072 + }, + { + "epoch": 0.19133605197677633, + "grad_norm": 1.8325627140784362, + "learning_rate": 1.8665798559698167e-05, + "loss": 0.7799, + "step": 11073 + }, + { + "epoch": 0.19135333149018524, + "grad_norm": 0.9107464212039348, + "learning_rate": 1.866551925950382e-05, + "loss": 0.4933, + "step": 11074 + }, + { + "epoch": 0.19137061100359415, + "grad_norm": 0.8559104347037801, + "learning_rate": 1.86652399321684e-05, + "loss": 0.4781, + "step": 11075 + }, + { + "epoch": 0.19138789051700303, + "grad_norm": 1.1157467093557987, + "learning_rate": 1.8664960577692793e-05, + "loss": 0.5363, + "step": 11076 + }, + { + "epoch": 0.19140517003041194, + "grad_norm": 0.7537294863718784, + "learning_rate": 1.866468119607787e-05, + "loss": 0.6492, + "step": 11077 + }, + { + "epoch": 0.19142244954382084, + "grad_norm": 0.5048274803160433, + "learning_rate": 1.866440178732451e-05, + "loss": 0.5432, + "step": 11078 + }, + { + "epoch": 0.19143972905722975, + "grad_norm": 1.0685325138860264, + "learning_rate": 1.8664122351433583e-05, + "loss": 0.6583, + "step": 11079 + }, + { + "epoch": 0.19145700857063866, + "grad_norm": 1.3400398257680453, + "learning_rate": 1.8663842888405967e-05, + "loss": 0.7103, + "step": 11080 + }, + { + "epoch": 0.19147428808404757, + "grad_norm": 1.1293864489510343, + "learning_rate": 1.8663563398242536e-05, + "loss": 0.7799, + "step": 11081 + }, + { + "epoch": 0.19149156759745645, + "grad_norm": 0.9479446896374442, + "learning_rate": 1.8663283880944164e-05, + "loss": 0.7488, + "step": 11082 + }, + { + "epoch": 0.19150884711086535, + "grad_norm": 0.9635961508417286, + "learning_rate": 1.866300433651173e-05, + "loss": 0.5955, + "step": 11083 + }, + { + "epoch": 0.19152612662427426, + "grad_norm": 1.2892940191039788, + "learning_rate": 1.8662724764946113e-05, + "loss": 0.8582, + "step": 11084 + }, + { + "epoch": 0.19154340613768317, + "grad_norm": 0.9704909589744274, + "learning_rate": 1.866244516624818e-05, + "loss": 0.2902, + "step": 11085 + }, + { + "epoch": 0.19156068565109208, + "grad_norm": 0.974869613520594, + "learning_rate": 1.8662165540418808e-05, + "loss": 0.517, + "step": 11086 + }, + { + "epoch": 0.19157796516450096, + "grad_norm": 1.005056714724434, + "learning_rate": 1.866188588745888e-05, + "loss": 0.6114, + "step": 11087 + }, + { + "epoch": 0.19159524467790987, + "grad_norm": 0.9614650016677375, + "learning_rate": 1.8661606207369262e-05, + "loss": 0.5415, + "step": 11088 + }, + { + "epoch": 0.19161252419131877, + "grad_norm": 0.8925727599216924, + "learning_rate": 1.866132650015084e-05, + "loss": 0.5467, + "step": 11089 + }, + { + "epoch": 0.19162980370472768, + "grad_norm": 1.0757000062321649, + "learning_rate": 1.8661046765804482e-05, + "loss": 0.4746, + "step": 11090 + }, + { + "epoch": 0.1916470832181366, + "grad_norm": 1.376030438783221, + "learning_rate": 1.8660767004331068e-05, + "loss": 0.6611, + "step": 11091 + }, + { + "epoch": 0.19166436273154547, + "grad_norm": 0.774441700300555, + "learning_rate": 1.866048721573147e-05, + "loss": 0.4722, + "step": 11092 + }, + { + "epoch": 0.19168164224495438, + "grad_norm": 1.0538779085067802, + "learning_rate": 1.8660207400006573e-05, + "loss": 0.5685, + "step": 11093 + }, + { + "epoch": 0.19169892175836328, + "grad_norm": 0.9758410584122494, + "learning_rate": 1.8659927557157247e-05, + "loss": 0.5636, + "step": 11094 + }, + { + "epoch": 0.1917162012717722, + "grad_norm": 1.2961561035393165, + "learning_rate": 1.865964768718437e-05, + "loss": 0.507, + "step": 11095 + }, + { + "epoch": 0.1917334807851811, + "grad_norm": 1.1114303761050761, + "learning_rate": 1.865936779008882e-05, + "loss": 0.6644, + "step": 11096 + }, + { + "epoch": 0.19175076029858998, + "grad_norm": 1.0351937487699576, + "learning_rate": 1.8659087865871468e-05, + "loss": 0.5818, + "step": 11097 + }, + { + "epoch": 0.19176803981199889, + "grad_norm": 1.2954666961473325, + "learning_rate": 1.8658807914533197e-05, + "loss": 0.5291, + "step": 11098 + }, + { + "epoch": 0.1917853193254078, + "grad_norm": 0.7730172563291434, + "learning_rate": 1.865852793607488e-05, + "loss": 0.4977, + "step": 11099 + }, + { + "epoch": 0.1918025988388167, + "grad_norm": 0.9921273296868581, + "learning_rate": 1.8658247930497398e-05, + "loss": 0.5827, + "step": 11100 + }, + { + "epoch": 0.1918198783522256, + "grad_norm": 0.38225948071059046, + "learning_rate": 1.865796789780162e-05, + "loss": 0.5229, + "step": 11101 + }, + { + "epoch": 0.19183715786563452, + "grad_norm": 0.9852095048605352, + "learning_rate": 1.8657687837988435e-05, + "loss": 0.7485, + "step": 11102 + }, + { + "epoch": 0.1918544373790434, + "grad_norm": 0.38883492781488616, + "learning_rate": 1.865740775105871e-05, + "loss": 0.5716, + "step": 11103 + }, + { + "epoch": 0.1918717168924523, + "grad_norm": 0.8467680044997566, + "learning_rate": 1.865712763701333e-05, + "loss": 0.4486, + "step": 11104 + }, + { + "epoch": 0.1918889964058612, + "grad_norm": 1.234439048515658, + "learning_rate": 1.8656847495853164e-05, + "loss": 0.6026, + "step": 11105 + }, + { + "epoch": 0.19190627591927012, + "grad_norm": 0.9842396908925698, + "learning_rate": 1.8656567327579096e-05, + "loss": 0.3805, + "step": 11106 + }, + { + "epoch": 0.19192355543267903, + "grad_norm": 1.4135042768635033, + "learning_rate": 1.8656287132192e-05, + "loss": 0.6136, + "step": 11107 + }, + { + "epoch": 0.1919408349460879, + "grad_norm": 1.0688565105174244, + "learning_rate": 1.8656006909692757e-05, + "loss": 0.57, + "step": 11108 + }, + { + "epoch": 0.19195811445949681, + "grad_norm": 0.7163840768502756, + "learning_rate": 1.865572666008224e-05, + "loss": 0.6643, + "step": 11109 + }, + { + "epoch": 0.19197539397290572, + "grad_norm": 1.202290104564563, + "learning_rate": 1.8655446383361332e-05, + "loss": 0.6993, + "step": 11110 + }, + { + "epoch": 0.19199267348631463, + "grad_norm": 0.8529231918770925, + "learning_rate": 1.8655166079530906e-05, + "loss": 0.5912, + "step": 11111 + }, + { + "epoch": 0.19200995299972354, + "grad_norm": 0.7754198578983965, + "learning_rate": 1.865488574859184e-05, + "loss": 0.5413, + "step": 11112 + }, + { + "epoch": 0.19202723251313242, + "grad_norm": 0.44371922123522994, + "learning_rate": 1.8654605390545018e-05, + "loss": 0.6489, + "step": 11113 + }, + { + "epoch": 0.19204451202654133, + "grad_norm": 0.6854477576408213, + "learning_rate": 1.8654325005391314e-05, + "loss": 0.5749, + "step": 11114 + }, + { + "epoch": 0.19206179153995023, + "grad_norm": 0.9345151808851232, + "learning_rate": 1.8654044593131604e-05, + "loss": 0.5417, + "step": 11115 + }, + { + "epoch": 0.19207907105335914, + "grad_norm": 1.1505397822029604, + "learning_rate": 1.8653764153766772e-05, + "loss": 0.7304, + "step": 11116 + }, + { + "epoch": 0.19209635056676805, + "grad_norm": 1.1958542853827037, + "learning_rate": 1.865348368729769e-05, + "loss": 0.6114, + "step": 11117 + }, + { + "epoch": 0.19211363008017696, + "grad_norm": 1.2582895356439943, + "learning_rate": 1.865320319372524e-05, + "loss": 0.5677, + "step": 11118 + }, + { + "epoch": 0.19213090959358584, + "grad_norm": 0.8706813269342729, + "learning_rate": 1.86529226730503e-05, + "loss": 0.5464, + "step": 11119 + }, + { + "epoch": 0.19214818910699474, + "grad_norm": 0.8163183521356349, + "learning_rate": 1.8652642125273747e-05, + "loss": 0.7728, + "step": 11120 + }, + { + "epoch": 0.19216546862040365, + "grad_norm": 0.9529559062938976, + "learning_rate": 1.8652361550396464e-05, + "loss": 0.6619, + "step": 11121 + }, + { + "epoch": 0.19218274813381256, + "grad_norm": 0.8638847628741807, + "learning_rate": 1.8652080948419328e-05, + "loss": 0.4529, + "step": 11122 + }, + { + "epoch": 0.19220002764722147, + "grad_norm": 1.0028739306833794, + "learning_rate": 1.8651800319343215e-05, + "loss": 0.3584, + "step": 11123 + }, + { + "epoch": 0.19221730716063035, + "grad_norm": 0.4198658792721772, + "learning_rate": 1.8651519663169004e-05, + "loss": 0.651, + "step": 11124 + }, + { + "epoch": 0.19223458667403925, + "grad_norm": 1.2109244898465394, + "learning_rate": 1.865123897989758e-05, + "loss": 0.7973, + "step": 11125 + }, + { + "epoch": 0.19225186618744816, + "grad_norm": 0.8377592615087781, + "learning_rate": 1.8650958269529814e-05, + "loss": 0.5083, + "step": 11126 + }, + { + "epoch": 0.19226914570085707, + "grad_norm": 0.9147582749566192, + "learning_rate": 1.8650677532066593e-05, + "loss": 0.7059, + "step": 11127 + }, + { + "epoch": 0.19228642521426598, + "grad_norm": 0.539007691758768, + "learning_rate": 1.865039676750879e-05, + "loss": 0.67, + "step": 11128 + }, + { + "epoch": 0.19230370472767486, + "grad_norm": 1.085006341878403, + "learning_rate": 1.8650115975857284e-05, + "loss": 0.5089, + "step": 11129 + }, + { + "epoch": 0.19232098424108376, + "grad_norm": 0.9610111957820324, + "learning_rate": 1.8649835157112965e-05, + "loss": 0.638, + "step": 11130 + }, + { + "epoch": 0.19233826375449267, + "grad_norm": 1.2486827007893633, + "learning_rate": 1.86495543112767e-05, + "loss": 0.528, + "step": 11131 + }, + { + "epoch": 0.19235554326790158, + "grad_norm": 1.1134699078885564, + "learning_rate": 1.8649273438349373e-05, + "loss": 0.7845, + "step": 11132 + }, + { + "epoch": 0.1923728227813105, + "grad_norm": 1.2115905652648715, + "learning_rate": 1.864899253833187e-05, + "loss": 0.4481, + "step": 11133 + }, + { + "epoch": 0.19239010229471937, + "grad_norm": 0.8136451877098846, + "learning_rate": 1.864871161122506e-05, + "loss": 0.6788, + "step": 11134 + }, + { + "epoch": 0.19240738180812827, + "grad_norm": 1.3801779387011341, + "learning_rate": 1.8648430657029828e-05, + "loss": 0.5897, + "step": 11135 + }, + { + "epoch": 0.19242466132153718, + "grad_norm": 1.281766134861547, + "learning_rate": 1.8648149675747055e-05, + "loss": 0.649, + "step": 11136 + }, + { + "epoch": 0.1924419408349461, + "grad_norm": 1.5340188329263318, + "learning_rate": 1.864786866737762e-05, + "loss": 0.6572, + "step": 11137 + }, + { + "epoch": 0.192459220348355, + "grad_norm": 1.0502935345469082, + "learning_rate": 1.86475876319224e-05, + "loss": 0.479, + "step": 11138 + }, + { + "epoch": 0.1924764998617639, + "grad_norm": 0.7760878250764176, + "learning_rate": 1.8647306569382283e-05, + "loss": 0.4486, + "step": 11139 + }, + { + "epoch": 0.19249377937517279, + "grad_norm": 1.1255804572471457, + "learning_rate": 1.8647025479758145e-05, + "loss": 0.5446, + "step": 11140 + }, + { + "epoch": 0.1925110588885817, + "grad_norm": 0.8863784676750769, + "learning_rate": 1.864674436305086e-05, + "loss": 0.6903, + "step": 11141 + }, + { + "epoch": 0.1925283384019906, + "grad_norm": 1.2898914795044882, + "learning_rate": 1.8646463219261323e-05, + "loss": 0.5686, + "step": 11142 + }, + { + "epoch": 0.1925456179153995, + "grad_norm": 0.8462816153251994, + "learning_rate": 1.8646182048390398e-05, + "loss": 0.4357, + "step": 11143 + }, + { + "epoch": 0.19256289742880842, + "grad_norm": 1.323879530697762, + "learning_rate": 1.864590085043898e-05, + "loss": 0.5893, + "step": 11144 + }, + { + "epoch": 0.1925801769422173, + "grad_norm": 1.199764611021899, + "learning_rate": 1.8645619625407937e-05, + "loss": 0.9531, + "step": 11145 + }, + { + "epoch": 0.1925974564556262, + "grad_norm": 1.515626805311002, + "learning_rate": 1.864533837329816e-05, + "loss": 0.6989, + "step": 11146 + }, + { + "epoch": 0.1926147359690351, + "grad_norm": 1.8227879649636314, + "learning_rate": 1.864505709411053e-05, + "loss": 0.8466, + "step": 11147 + }, + { + "epoch": 0.19263201548244402, + "grad_norm": 1.0250939885476986, + "learning_rate": 1.864477578784592e-05, + "loss": 0.5323, + "step": 11148 + }, + { + "epoch": 0.19264929499585293, + "grad_norm": 0.792028333546285, + "learning_rate": 1.8644494454505216e-05, + "loss": 0.4977, + "step": 11149 + }, + { + "epoch": 0.1926665745092618, + "grad_norm": 0.8584473444599757, + "learning_rate": 1.8644213094089296e-05, + "loss": 0.5007, + "step": 11150 + }, + { + "epoch": 0.19268385402267071, + "grad_norm": 1.5764842167108746, + "learning_rate": 1.8643931706599046e-05, + "loss": 0.7459, + "step": 11151 + }, + { + "epoch": 0.19270113353607962, + "grad_norm": 0.9850088313242907, + "learning_rate": 1.8643650292035343e-05, + "loss": 0.498, + "step": 11152 + }, + { + "epoch": 0.19271841304948853, + "grad_norm": 1.1674624766251946, + "learning_rate": 1.8643368850399074e-05, + "loss": 0.634, + "step": 11153 + }, + { + "epoch": 0.19273569256289744, + "grad_norm": 0.6886959085824061, + "learning_rate": 1.8643087381691117e-05, + "loss": 0.5452, + "step": 11154 + }, + { + "epoch": 0.19275297207630634, + "grad_norm": 1.0619963880231518, + "learning_rate": 1.864280588591235e-05, + "loss": 0.6046, + "step": 11155 + }, + { + "epoch": 0.19277025158971522, + "grad_norm": 1.284016320491135, + "learning_rate": 1.864252436306366e-05, + "loss": 0.5643, + "step": 11156 + }, + { + "epoch": 0.19278753110312413, + "grad_norm": 1.2441456777638429, + "learning_rate": 1.8642242813145924e-05, + "loss": 0.4673, + "step": 11157 + }, + { + "epoch": 0.19280481061653304, + "grad_norm": 1.3003574573215848, + "learning_rate": 1.864196123616003e-05, + "loss": 0.6009, + "step": 11158 + }, + { + "epoch": 0.19282209012994195, + "grad_norm": 1.3370629288446305, + "learning_rate": 1.864167963210686e-05, + "loss": 0.6302, + "step": 11159 + }, + { + "epoch": 0.19283936964335086, + "grad_norm": 1.053969263968773, + "learning_rate": 1.8641398000987287e-05, + "loss": 0.5035, + "step": 11160 + }, + { + "epoch": 0.19285664915675974, + "grad_norm": 1.1012355828972613, + "learning_rate": 1.86411163428022e-05, + "loss": 0.4978, + "step": 11161 + }, + { + "epoch": 0.19287392867016864, + "grad_norm": 1.159546860908361, + "learning_rate": 1.864083465755248e-05, + "loss": 0.5579, + "step": 11162 + }, + { + "epoch": 0.19289120818357755, + "grad_norm": 0.975460448233361, + "learning_rate": 1.864055294523901e-05, + "loss": 0.3326, + "step": 11163 + }, + { + "epoch": 0.19290848769698646, + "grad_norm": 1.0289393472470196, + "learning_rate": 1.8640271205862666e-05, + "loss": 0.6859, + "step": 11164 + }, + { + "epoch": 0.19292576721039537, + "grad_norm": 1.1686128428360623, + "learning_rate": 1.8639989439424343e-05, + "loss": 0.7843, + "step": 11165 + }, + { + "epoch": 0.19294304672380425, + "grad_norm": 1.2397582405868526, + "learning_rate": 1.8639707645924913e-05, + "loss": 0.4648, + "step": 11166 + }, + { + "epoch": 0.19296032623721315, + "grad_norm": 1.0028916758382298, + "learning_rate": 1.8639425825365263e-05, + "loss": 0.8127, + "step": 11167 + }, + { + "epoch": 0.19297760575062206, + "grad_norm": 0.8863698278422524, + "learning_rate": 1.8639143977746276e-05, + "loss": 0.6733, + "step": 11168 + }, + { + "epoch": 0.19299488526403097, + "grad_norm": 0.8080941928835316, + "learning_rate": 1.8638862103068834e-05, + "loss": 0.5915, + "step": 11169 + }, + { + "epoch": 0.19301216477743988, + "grad_norm": 0.8119131345037234, + "learning_rate": 1.863858020133382e-05, + "loss": 0.7495, + "step": 11170 + }, + { + "epoch": 0.19302944429084876, + "grad_norm": 1.4908506478596337, + "learning_rate": 1.863829827254211e-05, + "loss": 0.6298, + "step": 11171 + }, + { + "epoch": 0.19304672380425766, + "grad_norm": 0.8646636729749495, + "learning_rate": 1.8638016316694598e-05, + "loss": 0.4789, + "step": 11172 + }, + { + "epoch": 0.19306400331766657, + "grad_norm": 0.933434626034401, + "learning_rate": 1.8637734333792163e-05, + "loss": 0.4836, + "step": 11173 + }, + { + "epoch": 0.19308128283107548, + "grad_norm": 0.7459120487367058, + "learning_rate": 1.8637452323835686e-05, + "loss": 0.5928, + "step": 11174 + }, + { + "epoch": 0.1930985623444844, + "grad_norm": 0.9488758427414978, + "learning_rate": 1.863717028682605e-05, + "loss": 0.6045, + "step": 11175 + }, + { + "epoch": 0.1931158418578933, + "grad_norm": 1.1097378893899394, + "learning_rate": 1.8636888222764144e-05, + "loss": 0.7293, + "step": 11176 + }, + { + "epoch": 0.19313312137130217, + "grad_norm": 0.7847805691048836, + "learning_rate": 1.8636606131650848e-05, + "loss": 0.6196, + "step": 11177 + }, + { + "epoch": 0.19315040088471108, + "grad_norm": 1.208855545508496, + "learning_rate": 1.8636324013487043e-05, + "loss": 0.7272, + "step": 11178 + }, + { + "epoch": 0.19316768039812, + "grad_norm": 1.1210252195964867, + "learning_rate": 1.8636041868273618e-05, + "loss": 0.6057, + "step": 11179 + }, + { + "epoch": 0.1931849599115289, + "grad_norm": 1.2107471007735864, + "learning_rate": 1.863575969601145e-05, + "loss": 0.5307, + "step": 11180 + }, + { + "epoch": 0.1932022394249378, + "grad_norm": 1.2308855182949343, + "learning_rate": 1.8635477496701427e-05, + "loss": 0.6553, + "step": 11181 + }, + { + "epoch": 0.19321951893834668, + "grad_norm": 0.7839372620292647, + "learning_rate": 1.863519527034443e-05, + "loss": 0.4983, + "step": 11182 + }, + { + "epoch": 0.1932367984517556, + "grad_norm": 1.225366407136006, + "learning_rate": 1.863491301694135e-05, + "loss": 0.6413, + "step": 11183 + }, + { + "epoch": 0.1932540779651645, + "grad_norm": 1.4594072474957083, + "learning_rate": 1.8634630736493065e-05, + "loss": 0.8141, + "step": 11184 + }, + { + "epoch": 0.1932713574785734, + "grad_norm": 0.8338899820230021, + "learning_rate": 1.8634348429000456e-05, + "loss": 0.3942, + "step": 11185 + }, + { + "epoch": 0.19328863699198232, + "grad_norm": 0.7282677422845741, + "learning_rate": 1.8634066094464416e-05, + "loss": 0.5006, + "step": 11186 + }, + { + "epoch": 0.1933059165053912, + "grad_norm": 1.2990393564071994, + "learning_rate": 1.8633783732885823e-05, + "loss": 0.6863, + "step": 11187 + }, + { + "epoch": 0.1933231960188001, + "grad_norm": 0.8183638516274854, + "learning_rate": 1.8633501344265567e-05, + "loss": 0.4289, + "step": 11188 + }, + { + "epoch": 0.193340475532209, + "grad_norm": 1.186923945958819, + "learning_rate": 1.8633218928604523e-05, + "loss": 0.6457, + "step": 11189 + }, + { + "epoch": 0.19335775504561792, + "grad_norm": 1.0870653834633153, + "learning_rate": 1.8632936485903585e-05, + "loss": 0.5542, + "step": 11190 + }, + { + "epoch": 0.19337503455902683, + "grad_norm": 0.7780655356152648, + "learning_rate": 1.863265401616363e-05, + "loss": 0.7001, + "step": 11191 + }, + { + "epoch": 0.19339231407243573, + "grad_norm": 0.6763630265586856, + "learning_rate": 1.8632371519385552e-05, + "loss": 0.5396, + "step": 11192 + }, + { + "epoch": 0.1934095935858446, + "grad_norm": 0.9570480526641613, + "learning_rate": 1.863208899557023e-05, + "loss": 0.5795, + "step": 11193 + }, + { + "epoch": 0.19342687309925352, + "grad_norm": 0.7583604326077701, + "learning_rate": 1.8631806444718545e-05, + "loss": 0.6198, + "step": 11194 + }, + { + "epoch": 0.19344415261266243, + "grad_norm": 1.0795378916076956, + "learning_rate": 1.863152386683139e-05, + "loss": 0.5101, + "step": 11195 + }, + { + "epoch": 0.19346143212607134, + "grad_norm": 1.3313131721914402, + "learning_rate": 1.863124126190964e-05, + "loss": 0.5825, + "step": 11196 + }, + { + "epoch": 0.19347871163948024, + "grad_norm": 0.7267643388921141, + "learning_rate": 1.863095862995419e-05, + "loss": 0.3778, + "step": 11197 + }, + { + "epoch": 0.19349599115288912, + "grad_norm": 1.034201320649152, + "learning_rate": 1.8630675970965925e-05, + "loss": 0.5448, + "step": 11198 + }, + { + "epoch": 0.19351327066629803, + "grad_norm": 1.2219802718608577, + "learning_rate": 1.8630393284945723e-05, + "loss": 0.6481, + "step": 11199 + }, + { + "epoch": 0.19353055017970694, + "grad_norm": 1.1791153883149768, + "learning_rate": 1.8630110571894478e-05, + "loss": 0.5709, + "step": 11200 + }, + { + "epoch": 0.19354782969311585, + "grad_norm": 0.9642065898740101, + "learning_rate": 1.8629827831813066e-05, + "loss": 0.5648, + "step": 11201 + }, + { + "epoch": 0.19356510920652475, + "grad_norm": 1.2637924061320878, + "learning_rate": 1.862954506470238e-05, + "loss": 0.5936, + "step": 11202 + }, + { + "epoch": 0.19358238871993363, + "grad_norm": 0.829795128817997, + "learning_rate": 1.8629262270563303e-05, + "loss": 0.6204, + "step": 11203 + }, + { + "epoch": 0.19359966823334254, + "grad_norm": 1.3720763475900646, + "learning_rate": 1.8628979449396718e-05, + "loss": 0.5849, + "step": 11204 + }, + { + "epoch": 0.19361694774675145, + "grad_norm": 1.4979836153682868, + "learning_rate": 1.8628696601203514e-05, + "loss": 0.5115, + "step": 11205 + }, + { + "epoch": 0.19363422726016036, + "grad_norm": 0.7883415445910185, + "learning_rate": 1.862841372598458e-05, + "loss": 0.3925, + "step": 11206 + }, + { + "epoch": 0.19365150677356927, + "grad_norm": 0.7390789309533015, + "learning_rate": 1.8628130823740792e-05, + "loss": 0.3279, + "step": 11207 + }, + { + "epoch": 0.19366878628697815, + "grad_norm": 1.0443682300872557, + "learning_rate": 1.862784789447305e-05, + "loss": 0.6445, + "step": 11208 + }, + { + "epoch": 0.19368606580038705, + "grad_norm": 0.8072751209898045, + "learning_rate": 1.862756493818223e-05, + "loss": 0.7506, + "step": 11209 + }, + { + "epoch": 0.19370334531379596, + "grad_norm": 1.4744547917534045, + "learning_rate": 1.8627281954869222e-05, + "loss": 0.5984, + "step": 11210 + }, + { + "epoch": 0.19372062482720487, + "grad_norm": 1.3483126236563383, + "learning_rate": 1.8626998944534907e-05, + "loss": 0.5074, + "step": 11211 + }, + { + "epoch": 0.19373790434061378, + "grad_norm": 0.9405357188898149, + "learning_rate": 1.8626715907180178e-05, + "loss": 0.6044, + "step": 11212 + }, + { + "epoch": 0.19375518385402268, + "grad_norm": 0.790952880191289, + "learning_rate": 1.862643284280592e-05, + "loss": 0.5036, + "step": 11213 + }, + { + "epoch": 0.19377246336743156, + "grad_norm": 0.9606615561744777, + "learning_rate": 1.862614975141302e-05, + "loss": 0.4686, + "step": 11214 + }, + { + "epoch": 0.19378974288084047, + "grad_norm": 2.202763398124543, + "learning_rate": 1.862586663300236e-05, + "loss": 0.921, + "step": 11215 + }, + { + "epoch": 0.19380702239424938, + "grad_norm": 1.1414727359403993, + "learning_rate": 1.862558348757483e-05, + "loss": 0.6663, + "step": 11216 + }, + { + "epoch": 0.1938243019076583, + "grad_norm": 0.7735070906134437, + "learning_rate": 1.862530031513132e-05, + "loss": 0.7977, + "step": 11217 + }, + { + "epoch": 0.1938415814210672, + "grad_norm": 0.38654426307639395, + "learning_rate": 1.8625017115672715e-05, + "loss": 0.5283, + "step": 11218 + }, + { + "epoch": 0.19385886093447607, + "grad_norm": 0.6489138142580911, + "learning_rate": 1.86247338891999e-05, + "loss": 0.5446, + "step": 11219 + }, + { + "epoch": 0.19387614044788498, + "grad_norm": 0.9389704346734079, + "learning_rate": 1.862445063571376e-05, + "loss": 0.5553, + "step": 11220 + }, + { + "epoch": 0.1938934199612939, + "grad_norm": 1.3921424458145506, + "learning_rate": 1.8624167355215186e-05, + "loss": 0.5021, + "step": 11221 + }, + { + "epoch": 0.1939106994747028, + "grad_norm": 0.6259853847482678, + "learning_rate": 1.8623884047705064e-05, + "loss": 0.6475, + "step": 11222 + }, + { + "epoch": 0.1939279789881117, + "grad_norm": 0.4650868285715431, + "learning_rate": 1.8623600713184285e-05, + "loss": 0.6436, + "step": 11223 + }, + { + "epoch": 0.19394525850152058, + "grad_norm": 1.2066828988559068, + "learning_rate": 1.862331735165373e-05, + "loss": 0.5954, + "step": 11224 + }, + { + "epoch": 0.1939625380149295, + "grad_norm": 1.1183993808797883, + "learning_rate": 1.862303396311429e-05, + "loss": 0.4859, + "step": 11225 + }, + { + "epoch": 0.1939798175283384, + "grad_norm": 0.501625890327175, + "learning_rate": 1.8622750547566855e-05, + "loss": 0.7272, + "step": 11226 + }, + { + "epoch": 0.1939970970417473, + "grad_norm": 1.0325139012198257, + "learning_rate": 1.862246710501231e-05, + "loss": 0.5563, + "step": 11227 + }, + { + "epoch": 0.19401437655515621, + "grad_norm": 0.911273614470737, + "learning_rate": 1.862218363545154e-05, + "loss": 0.5593, + "step": 11228 + }, + { + "epoch": 0.19403165606856512, + "grad_norm": 1.0615978128278587, + "learning_rate": 1.8621900138885433e-05, + "loss": 0.4877, + "step": 11229 + }, + { + "epoch": 0.194048935581974, + "grad_norm": 1.1552980541776505, + "learning_rate": 1.8621616615314886e-05, + "loss": 0.6339, + "step": 11230 + }, + { + "epoch": 0.1940662150953829, + "grad_norm": 1.9404718539267873, + "learning_rate": 1.8621333064740776e-05, + "loss": 0.7354, + "step": 11231 + }, + { + "epoch": 0.19408349460879182, + "grad_norm": 1.0421480357467672, + "learning_rate": 1.8621049487163997e-05, + "loss": 0.6238, + "step": 11232 + }, + { + "epoch": 0.19410077412220073, + "grad_norm": 0.926191876645984, + "learning_rate": 1.8620765882585438e-05, + "loss": 0.8183, + "step": 11233 + }, + { + "epoch": 0.19411805363560963, + "grad_norm": 0.7713864531753447, + "learning_rate": 1.8620482251005983e-05, + "loss": 0.4001, + "step": 11234 + }, + { + "epoch": 0.1941353331490185, + "grad_norm": 0.9647977617032276, + "learning_rate": 1.8620198592426525e-05, + "loss": 0.7643, + "step": 11235 + }, + { + "epoch": 0.19415261266242742, + "grad_norm": 1.3298074367746782, + "learning_rate": 1.8619914906847943e-05, + "loss": 0.6457, + "step": 11236 + }, + { + "epoch": 0.19416989217583633, + "grad_norm": 1.2612687759705907, + "learning_rate": 1.861963119427114e-05, + "loss": 0.6375, + "step": 11237 + }, + { + "epoch": 0.19418717168924524, + "grad_norm": 0.8266055818007993, + "learning_rate": 1.861934745469699e-05, + "loss": 0.5203, + "step": 11238 + }, + { + "epoch": 0.19420445120265414, + "grad_norm": 1.5093418376495076, + "learning_rate": 1.8619063688126397e-05, + "loss": 0.7641, + "step": 11239 + }, + { + "epoch": 0.19422173071606302, + "grad_norm": 1.139951666760941, + "learning_rate": 1.8618779894560238e-05, + "loss": 0.4763, + "step": 11240 + }, + { + "epoch": 0.19423901022947193, + "grad_norm": 1.0224928289230337, + "learning_rate": 1.86184960739994e-05, + "loss": 0.6526, + "step": 11241 + }, + { + "epoch": 0.19425628974288084, + "grad_norm": 1.2550909224104658, + "learning_rate": 1.8618212226444785e-05, + "loss": 0.4824, + "step": 11242 + }, + { + "epoch": 0.19427356925628975, + "grad_norm": 0.9438757764303012, + "learning_rate": 1.8617928351897273e-05, + "loss": 0.6185, + "step": 11243 + }, + { + "epoch": 0.19429084876969865, + "grad_norm": 1.4273916963190652, + "learning_rate": 1.861764445035775e-05, + "loss": 0.5237, + "step": 11244 + }, + { + "epoch": 0.19430812828310753, + "grad_norm": 1.0012614056842981, + "learning_rate": 1.861736052182711e-05, + "loss": 0.6628, + "step": 11245 + }, + { + "epoch": 0.19432540779651644, + "grad_norm": 0.3958042134280378, + "learning_rate": 1.8617076566306248e-05, + "loss": 0.7442, + "step": 11246 + }, + { + "epoch": 0.19434268730992535, + "grad_norm": 1.2576514948977826, + "learning_rate": 1.861679258379604e-05, + "loss": 0.4907, + "step": 11247 + }, + { + "epoch": 0.19435996682333426, + "grad_norm": 1.1355040527624412, + "learning_rate": 1.861650857429739e-05, + "loss": 0.6481, + "step": 11248 + }, + { + "epoch": 0.19437724633674316, + "grad_norm": 0.6975921718846686, + "learning_rate": 1.8616224537811175e-05, + "loss": 0.4362, + "step": 11249 + }, + { + "epoch": 0.19439452585015207, + "grad_norm": 1.013560099075536, + "learning_rate": 1.8615940474338293e-05, + "loss": 0.6246, + "step": 11250 + }, + { + "epoch": 0.19441180536356095, + "grad_norm": 0.8338108144743946, + "learning_rate": 1.861565638387963e-05, + "loss": 0.5295, + "step": 11251 + }, + { + "epoch": 0.19442908487696986, + "grad_norm": 0.5588253239237828, + "learning_rate": 1.8615372266436075e-05, + "loss": 0.4044, + "step": 11252 + }, + { + "epoch": 0.19444636439037877, + "grad_norm": 0.8023264593644936, + "learning_rate": 1.861508812200852e-05, + "loss": 0.3451, + "step": 11253 + }, + { + "epoch": 0.19446364390378768, + "grad_norm": 0.8779068267355554, + "learning_rate": 1.8614803950597855e-05, + "loss": 0.4546, + "step": 11254 + }, + { + "epoch": 0.19448092341719658, + "grad_norm": 0.836960325169707, + "learning_rate": 1.8614519752204967e-05, + "loss": 0.6024, + "step": 11255 + }, + { + "epoch": 0.19449820293060546, + "grad_norm": 0.7949695573734002, + "learning_rate": 1.8614235526830754e-05, + "loss": 0.5256, + "step": 11256 + }, + { + "epoch": 0.19451548244401437, + "grad_norm": 1.2748500167620387, + "learning_rate": 1.8613951274476094e-05, + "loss": 0.5658, + "step": 11257 + }, + { + "epoch": 0.19453276195742328, + "grad_norm": 0.7264664447664694, + "learning_rate": 1.8613666995141887e-05, + "loss": 0.6016, + "step": 11258 + }, + { + "epoch": 0.19455004147083219, + "grad_norm": 0.9481009432433588, + "learning_rate": 1.861338268882902e-05, + "loss": 0.5458, + "step": 11259 + }, + { + "epoch": 0.1945673209842411, + "grad_norm": 1.1829448252876034, + "learning_rate": 1.8613098355538387e-05, + "loss": 0.5684, + "step": 11260 + }, + { + "epoch": 0.19458460049764997, + "grad_norm": 0.8427723678150512, + "learning_rate": 1.861281399527087e-05, + "loss": 0.483, + "step": 11261 + }, + { + "epoch": 0.19460188001105888, + "grad_norm": 1.4833658399328935, + "learning_rate": 1.861252960802737e-05, + "loss": 0.4534, + "step": 11262 + }, + { + "epoch": 0.1946191595244678, + "grad_norm": 1.5999220276273547, + "learning_rate": 1.8612245193808766e-05, + "loss": 0.6902, + "step": 11263 + }, + { + "epoch": 0.1946364390378767, + "grad_norm": 0.79975802211684, + "learning_rate": 1.861196075261596e-05, + "loss": 0.4671, + "step": 11264 + }, + { + "epoch": 0.1946537185512856, + "grad_norm": 0.9790592695942796, + "learning_rate": 1.8611676284449836e-05, + "loss": 0.4477, + "step": 11265 + }, + { + "epoch": 0.1946709980646945, + "grad_norm": 1.2893828533042742, + "learning_rate": 1.8611391789311292e-05, + "loss": 0.5192, + "step": 11266 + }, + { + "epoch": 0.1946882775781034, + "grad_norm": 1.2900148559031945, + "learning_rate": 1.8611107267201213e-05, + "loss": 0.7828, + "step": 11267 + }, + { + "epoch": 0.1947055570915123, + "grad_norm": 1.0230865543968728, + "learning_rate": 1.861082271812049e-05, + "loss": 0.5209, + "step": 11268 + }, + { + "epoch": 0.1947228366049212, + "grad_norm": 1.8082191185713543, + "learning_rate": 1.8610538142070015e-05, + "loss": 0.7549, + "step": 11269 + }, + { + "epoch": 0.19474011611833011, + "grad_norm": 1.2426733232841627, + "learning_rate": 1.8610253539050682e-05, + "loss": 0.6464, + "step": 11270 + }, + { + "epoch": 0.19475739563173902, + "grad_norm": 1.6167990408378448, + "learning_rate": 1.860996890906338e-05, + "loss": 0.7348, + "step": 11271 + }, + { + "epoch": 0.1947746751451479, + "grad_norm": 1.6207853108334134, + "learning_rate": 1.8609684252109e-05, + "loss": 0.6979, + "step": 11272 + }, + { + "epoch": 0.1947919546585568, + "grad_norm": 0.4487849140683452, + "learning_rate": 1.860939956818844e-05, + "loss": 0.5868, + "step": 11273 + }, + { + "epoch": 0.19480923417196572, + "grad_norm": 0.8449331283378602, + "learning_rate": 1.860911485730258e-05, + "loss": 0.3882, + "step": 11274 + }, + { + "epoch": 0.19482651368537462, + "grad_norm": 1.547734439291306, + "learning_rate": 1.8608830119452322e-05, + "loss": 0.5739, + "step": 11275 + }, + { + "epoch": 0.19484379319878353, + "grad_norm": 0.9470727792741686, + "learning_rate": 1.8608545354638553e-05, + "loss": 0.5871, + "step": 11276 + }, + { + "epoch": 0.1948610727121924, + "grad_norm": 0.8835105754297105, + "learning_rate": 1.8608260562862162e-05, + "loss": 0.6859, + "step": 11277 + }, + { + "epoch": 0.19487835222560132, + "grad_norm": 0.48857722671587006, + "learning_rate": 1.860797574412405e-05, + "loss": 0.774, + "step": 11278 + }, + { + "epoch": 0.19489563173901023, + "grad_norm": 1.5828414236072732, + "learning_rate": 1.86076908984251e-05, + "loss": 0.6916, + "step": 11279 + }, + { + "epoch": 0.19491291125241914, + "grad_norm": 0.4581624494969497, + "learning_rate": 1.8607406025766207e-05, + "loss": 0.555, + "step": 11280 + }, + { + "epoch": 0.19493019076582804, + "grad_norm": 0.8319931277429068, + "learning_rate": 1.860712112614827e-05, + "loss": 0.4967, + "step": 11281 + }, + { + "epoch": 0.19494747027923692, + "grad_norm": 0.7861499413622278, + "learning_rate": 1.8606836199572168e-05, + "loss": 0.3472, + "step": 11282 + }, + { + "epoch": 0.19496474979264583, + "grad_norm": 0.8500497315604275, + "learning_rate": 1.8606551246038807e-05, + "loss": 0.4067, + "step": 11283 + }, + { + "epoch": 0.19498202930605474, + "grad_norm": 0.3909990179878982, + "learning_rate": 1.860626626554907e-05, + "loss": 0.5968, + "step": 11284 + }, + { + "epoch": 0.19499930881946365, + "grad_norm": 1.551934488264648, + "learning_rate": 1.8605981258103854e-05, + "loss": 0.8729, + "step": 11285 + }, + { + "epoch": 0.19501658833287255, + "grad_norm": 1.407087989273539, + "learning_rate": 1.860569622370405e-05, + "loss": 0.7257, + "step": 11286 + }, + { + "epoch": 0.19503386784628146, + "grad_norm": 1.241824009564278, + "learning_rate": 1.8605411162350554e-05, + "loss": 0.6752, + "step": 11287 + }, + { + "epoch": 0.19505114735969034, + "grad_norm": 1.2347196933818079, + "learning_rate": 1.8605126074044256e-05, + "loss": 0.5669, + "step": 11288 + }, + { + "epoch": 0.19506842687309925, + "grad_norm": 0.44704123562887627, + "learning_rate": 1.8604840958786047e-05, + "loss": 0.8671, + "step": 11289 + }, + { + "epoch": 0.19508570638650816, + "grad_norm": 1.0678728693118753, + "learning_rate": 1.8604555816576822e-05, + "loss": 0.513, + "step": 11290 + }, + { + "epoch": 0.19510298589991706, + "grad_norm": 1.6113941177887947, + "learning_rate": 1.8604270647417475e-05, + "loss": 0.5443, + "step": 11291 + }, + { + "epoch": 0.19512026541332597, + "grad_norm": 0.8858475016508414, + "learning_rate": 1.8603985451308898e-05, + "loss": 0.7853, + "step": 11292 + }, + { + "epoch": 0.19513754492673485, + "grad_norm": 0.9417364945939517, + "learning_rate": 1.8603700228251987e-05, + "loss": 0.6274, + "step": 11293 + }, + { + "epoch": 0.19515482444014376, + "grad_norm": 0.8683145606756879, + "learning_rate": 1.8603414978247628e-05, + "loss": 0.558, + "step": 11294 + }, + { + "epoch": 0.19517210395355267, + "grad_norm": 0.9314042235387442, + "learning_rate": 1.8603129701296725e-05, + "loss": 0.4329, + "step": 11295 + }, + { + "epoch": 0.19518938346696157, + "grad_norm": 0.7218547204812231, + "learning_rate": 1.8602844397400165e-05, + "loss": 0.5986, + "step": 11296 + }, + { + "epoch": 0.19520666298037048, + "grad_norm": 0.7996823044353479, + "learning_rate": 1.8602559066558838e-05, + "loss": 0.5384, + "step": 11297 + }, + { + "epoch": 0.19522394249377936, + "grad_norm": 1.112828495052793, + "learning_rate": 1.8602273708773646e-05, + "loss": 0.5273, + "step": 11298 + }, + { + "epoch": 0.19524122200718827, + "grad_norm": 0.9129516850195696, + "learning_rate": 1.860198832404548e-05, + "loss": 0.4504, + "step": 11299 + }, + { + "epoch": 0.19525850152059718, + "grad_norm": 0.7995921504557865, + "learning_rate": 1.860170291237523e-05, + "loss": 0.5407, + "step": 11300 + }, + { + "epoch": 0.19527578103400609, + "grad_norm": 1.324052719780753, + "learning_rate": 1.8601417473763794e-05, + "loss": 0.4452, + "step": 11301 + }, + { + "epoch": 0.195293060547415, + "grad_norm": 0.6443345374659247, + "learning_rate": 1.8601132008212067e-05, + "loss": 0.4284, + "step": 11302 + }, + { + "epoch": 0.1953103400608239, + "grad_norm": 1.2742201502899586, + "learning_rate": 1.860084651572094e-05, + "loss": 0.6899, + "step": 11303 + }, + { + "epoch": 0.19532761957423278, + "grad_norm": 0.36383851792069577, + "learning_rate": 1.8600560996291306e-05, + "loss": 0.4311, + "step": 11304 + }, + { + "epoch": 0.1953448990876417, + "grad_norm": 1.0703045754824327, + "learning_rate": 1.860027544992406e-05, + "loss": 0.5945, + "step": 11305 + }, + { + "epoch": 0.1953621786010506, + "grad_norm": 1.6102850961647788, + "learning_rate": 1.85999898766201e-05, + "loss": 0.5523, + "step": 11306 + }, + { + "epoch": 0.1953794581144595, + "grad_norm": 0.8895359278739541, + "learning_rate": 1.859970427638032e-05, + "loss": 0.5217, + "step": 11307 + }, + { + "epoch": 0.1953967376278684, + "grad_norm": 0.8620502547731, + "learning_rate": 1.8599418649205607e-05, + "loss": 0.4537, + "step": 11308 + }, + { + "epoch": 0.1954140171412773, + "grad_norm": 1.178150018061513, + "learning_rate": 1.8599132995096868e-05, + "loss": 0.5764, + "step": 11309 + }, + { + "epoch": 0.1954312966546862, + "grad_norm": 0.9934145909386601, + "learning_rate": 1.8598847314054986e-05, + "loss": 0.5961, + "step": 11310 + }, + { + "epoch": 0.1954485761680951, + "grad_norm": 0.9894529013880144, + "learning_rate": 1.859856160608086e-05, + "loss": 0.6232, + "step": 11311 + }, + { + "epoch": 0.195465855681504, + "grad_norm": 0.9820521706032876, + "learning_rate": 1.859827587117539e-05, + "loss": 0.4501, + "step": 11312 + }, + { + "epoch": 0.19548313519491292, + "grad_norm": 0.9516888820889529, + "learning_rate": 1.8597990109339465e-05, + "loss": 0.3933, + "step": 11313 + }, + { + "epoch": 0.1955004147083218, + "grad_norm": 1.5958403940502253, + "learning_rate": 1.8597704320573982e-05, + "loss": 0.712, + "step": 11314 + }, + { + "epoch": 0.1955176942217307, + "grad_norm": 0.5786207449864287, + "learning_rate": 1.8597418504879835e-05, + "loss": 0.4167, + "step": 11315 + }, + { + "epoch": 0.19553497373513962, + "grad_norm": 0.7876746907727753, + "learning_rate": 1.859713266225792e-05, + "loss": 0.433, + "step": 11316 + }, + { + "epoch": 0.19555225324854852, + "grad_norm": 1.0873392574308844, + "learning_rate": 1.859684679270913e-05, + "loss": 0.5031, + "step": 11317 + }, + { + "epoch": 0.19556953276195743, + "grad_norm": 1.0843814712589583, + "learning_rate": 1.8596560896234365e-05, + "loss": 0.7479, + "step": 11318 + }, + { + "epoch": 0.1955868122753663, + "grad_norm": 0.9584859809573163, + "learning_rate": 1.859627497283452e-05, + "loss": 0.4414, + "step": 11319 + }, + { + "epoch": 0.19560409178877522, + "grad_norm": 0.8108659507542847, + "learning_rate": 1.8595989022510483e-05, + "loss": 0.542, + "step": 11320 + }, + { + "epoch": 0.19562137130218413, + "grad_norm": 0.36550535934799994, + "learning_rate": 1.859570304526316e-05, + "loss": 0.8363, + "step": 11321 + }, + { + "epoch": 0.19563865081559303, + "grad_norm": 0.9663958020465918, + "learning_rate": 1.859541704109344e-05, + "loss": 0.4912, + "step": 11322 + }, + { + "epoch": 0.19565593032900194, + "grad_norm": 0.39042188395905475, + "learning_rate": 1.8595131010002218e-05, + "loss": 0.6614, + "step": 11323 + }, + { + "epoch": 0.19567320984241085, + "grad_norm": 1.2089244528963596, + "learning_rate": 1.8594844951990396e-05, + "loss": 0.6791, + "step": 11324 + }, + { + "epoch": 0.19569048935581973, + "grad_norm": 1.5411329498218673, + "learning_rate": 1.8594558867058868e-05, + "loss": 0.7883, + "step": 11325 + }, + { + "epoch": 0.19570776886922864, + "grad_norm": 1.1793465676354349, + "learning_rate": 1.8594272755208524e-05, + "loss": 0.6637, + "step": 11326 + }, + { + "epoch": 0.19572504838263755, + "grad_norm": 0.45698833898322794, + "learning_rate": 1.8593986616440266e-05, + "loss": 0.6082, + "step": 11327 + }, + { + "epoch": 0.19574232789604645, + "grad_norm": 1.235967472647359, + "learning_rate": 1.8593700450754988e-05, + "loss": 0.6309, + "step": 11328 + }, + { + "epoch": 0.19575960740945536, + "grad_norm": 1.349734314209965, + "learning_rate": 1.8593414258153588e-05, + "loss": 0.6283, + "step": 11329 + }, + { + "epoch": 0.19577688692286424, + "grad_norm": 1.03878541020657, + "learning_rate": 1.8593128038636964e-05, + "loss": 0.3651, + "step": 11330 + }, + { + "epoch": 0.19579416643627315, + "grad_norm": 1.2131407013345632, + "learning_rate": 1.8592841792206004e-05, + "loss": 0.6246, + "step": 11331 + }, + { + "epoch": 0.19581144594968206, + "grad_norm": 0.840707713764356, + "learning_rate": 1.8592555518861618e-05, + "loss": 0.4731, + "step": 11332 + }, + { + "epoch": 0.19582872546309096, + "grad_norm": 0.6212236103034576, + "learning_rate": 1.859226921860469e-05, + "loss": 0.4114, + "step": 11333 + }, + { + "epoch": 0.19584600497649987, + "grad_norm": 1.1103179996787051, + "learning_rate": 1.8591982891436124e-05, + "loss": 0.5751, + "step": 11334 + }, + { + "epoch": 0.19586328448990875, + "grad_norm": 1.3801128565479903, + "learning_rate": 1.8591696537356813e-05, + "loss": 0.6117, + "step": 11335 + }, + { + "epoch": 0.19588056400331766, + "grad_norm": 0.7392842618919914, + "learning_rate": 1.8591410156367656e-05, + "loss": 0.5738, + "step": 11336 + }, + { + "epoch": 0.19589784351672657, + "grad_norm": 0.895322093727919, + "learning_rate": 1.859112374846955e-05, + "loss": 0.6695, + "step": 11337 + }, + { + "epoch": 0.19591512303013547, + "grad_norm": 1.1176073146519718, + "learning_rate": 1.859083731366339e-05, + "loss": 0.383, + "step": 11338 + }, + { + "epoch": 0.19593240254354438, + "grad_norm": 0.9123135886742423, + "learning_rate": 1.8590550851950076e-05, + "loss": 0.6031, + "step": 11339 + }, + { + "epoch": 0.1959496820569533, + "grad_norm": 1.4406682181800416, + "learning_rate": 1.8590264363330502e-05, + "loss": 0.4873, + "step": 11340 + }, + { + "epoch": 0.19596696157036217, + "grad_norm": 1.1377747764595, + "learning_rate": 1.858997784780557e-05, + "loss": 0.7484, + "step": 11341 + }, + { + "epoch": 0.19598424108377108, + "grad_norm": 1.23514651656243, + "learning_rate": 1.8589691305376175e-05, + "loss": 0.5442, + "step": 11342 + }, + { + "epoch": 0.19600152059717998, + "grad_norm": 1.162209642347435, + "learning_rate": 1.8589404736043214e-05, + "loss": 0.4122, + "step": 11343 + }, + { + "epoch": 0.1960188001105889, + "grad_norm": 0.6634535348324602, + "learning_rate": 1.858911813980758e-05, + "loss": 0.6495, + "step": 11344 + }, + { + "epoch": 0.1960360796239978, + "grad_norm": 1.729470879599982, + "learning_rate": 1.8588831516670178e-05, + "loss": 0.8137, + "step": 11345 + }, + { + "epoch": 0.19605335913740668, + "grad_norm": 0.9879564114318664, + "learning_rate": 1.8588544866631904e-05, + "loss": 0.552, + "step": 11346 + }, + { + "epoch": 0.1960706386508156, + "grad_norm": 1.2122175891213933, + "learning_rate": 1.8588258189693653e-05, + "loss": 0.7017, + "step": 11347 + }, + { + "epoch": 0.1960879181642245, + "grad_norm": 0.9400978629028874, + "learning_rate": 1.8587971485856326e-05, + "loss": 0.671, + "step": 11348 + }, + { + "epoch": 0.1961051976776334, + "grad_norm": 0.7137648639160276, + "learning_rate": 1.8587684755120817e-05, + "loss": 0.3676, + "step": 11349 + }, + { + "epoch": 0.1961224771910423, + "grad_norm": 0.9139707926581597, + "learning_rate": 1.858739799748803e-05, + "loss": 0.49, + "step": 11350 + }, + { + "epoch": 0.1961397567044512, + "grad_norm": 1.0637206296587371, + "learning_rate": 1.858711121295886e-05, + "loss": 0.6701, + "step": 11351 + }, + { + "epoch": 0.1961570362178601, + "grad_norm": 0.7543177503855397, + "learning_rate": 1.8586824401534205e-05, + "loss": 0.709, + "step": 11352 + }, + { + "epoch": 0.196174315731269, + "grad_norm": 1.2500875619247955, + "learning_rate": 1.858653756321496e-05, + "loss": 0.5315, + "step": 11353 + }, + { + "epoch": 0.1961915952446779, + "grad_norm": 0.5096504969691679, + "learning_rate": 1.8586250698002032e-05, + "loss": 0.8948, + "step": 11354 + }, + { + "epoch": 0.19620887475808682, + "grad_norm": 1.5022110539525457, + "learning_rate": 1.858596380589631e-05, + "loss": 0.5918, + "step": 11355 + }, + { + "epoch": 0.19622615427149573, + "grad_norm": 1.3028308203032253, + "learning_rate": 1.85856768868987e-05, + "loss": 0.53, + "step": 11356 + }, + { + "epoch": 0.1962434337849046, + "grad_norm": 1.1841119611771547, + "learning_rate": 1.8585389941010094e-05, + "loss": 0.6417, + "step": 11357 + }, + { + "epoch": 0.19626071329831352, + "grad_norm": 1.1339511509392963, + "learning_rate": 1.8585102968231397e-05, + "loss": 0.752, + "step": 11358 + }, + { + "epoch": 0.19627799281172242, + "grad_norm": 1.0558455335669923, + "learning_rate": 1.8584815968563506e-05, + "loss": 0.6069, + "step": 11359 + }, + { + "epoch": 0.19629527232513133, + "grad_norm": 0.9606681075336085, + "learning_rate": 1.8584528942007317e-05, + "loss": 0.4618, + "step": 11360 + }, + { + "epoch": 0.19631255183854024, + "grad_norm": 1.1165069584111205, + "learning_rate": 1.8584241888563728e-05, + "loss": 0.6729, + "step": 11361 + }, + { + "epoch": 0.19632983135194912, + "grad_norm": 1.3244785524679064, + "learning_rate": 1.8583954808233645e-05, + "loss": 0.5964, + "step": 11362 + }, + { + "epoch": 0.19634711086535803, + "grad_norm": 1.1655542541040171, + "learning_rate": 1.858366770101796e-05, + "loss": 0.5574, + "step": 11363 + }, + { + "epoch": 0.19636439037876693, + "grad_norm": 0.9334406389218596, + "learning_rate": 1.858338056691758e-05, + "loss": 0.4503, + "step": 11364 + }, + { + "epoch": 0.19638166989217584, + "grad_norm": 0.7108767021146456, + "learning_rate": 1.8583093405933397e-05, + "loss": 0.4861, + "step": 11365 + }, + { + "epoch": 0.19639894940558475, + "grad_norm": 1.2366772993533672, + "learning_rate": 1.8582806218066312e-05, + "loss": 0.7265, + "step": 11366 + }, + { + "epoch": 0.19641622891899363, + "grad_norm": 1.3350423434031815, + "learning_rate": 1.8582519003317227e-05, + "loss": 0.9524, + "step": 11367 + }, + { + "epoch": 0.19643350843240254, + "grad_norm": 0.9219893837177848, + "learning_rate": 1.858223176168704e-05, + "loss": 0.4251, + "step": 11368 + }, + { + "epoch": 0.19645078794581144, + "grad_norm": 1.1029012691960862, + "learning_rate": 1.8581944493176652e-05, + "loss": 0.4925, + "step": 11369 + }, + { + "epoch": 0.19646806745922035, + "grad_norm": 0.7850801117162761, + "learning_rate": 1.858165719778696e-05, + "loss": 0.5641, + "step": 11370 + }, + { + "epoch": 0.19648534697262926, + "grad_norm": 1.1798123378840002, + "learning_rate": 1.858136987551887e-05, + "loss": 0.8341, + "step": 11371 + }, + { + "epoch": 0.19650262648603814, + "grad_norm": 0.40693707170436655, + "learning_rate": 1.858108252637327e-05, + "loss": 0.493, + "step": 11372 + }, + { + "epoch": 0.19651990599944705, + "grad_norm": 1.2788067517436772, + "learning_rate": 1.8580795150351072e-05, + "loss": 0.6407, + "step": 11373 + }, + { + "epoch": 0.19653718551285596, + "grad_norm": 0.9793705712628135, + "learning_rate": 1.8580507747453174e-05, + "loss": 0.7993, + "step": 11374 + }, + { + "epoch": 0.19655446502626486, + "grad_norm": 1.19693798954006, + "learning_rate": 1.858022031768047e-05, + "loss": 0.7352, + "step": 11375 + }, + { + "epoch": 0.19657174453967377, + "grad_norm": 0.9643250808620123, + "learning_rate": 1.8579932861033866e-05, + "loss": 0.5564, + "step": 11376 + }, + { + "epoch": 0.19658902405308268, + "grad_norm": 1.4570864184735577, + "learning_rate": 1.8579645377514254e-05, + "loss": 0.6097, + "step": 11377 + }, + { + "epoch": 0.19660630356649156, + "grad_norm": 1.0548681415412398, + "learning_rate": 1.857935786712255e-05, + "loss": 0.5611, + "step": 11378 + }, + { + "epoch": 0.19662358307990047, + "grad_norm": 1.048452079713609, + "learning_rate": 1.857907032985964e-05, + "loss": 0.7155, + "step": 11379 + }, + { + "epoch": 0.19664086259330937, + "grad_norm": 0.8297407732299255, + "learning_rate": 1.8578782765726427e-05, + "loss": 0.6577, + "step": 11380 + }, + { + "epoch": 0.19665814210671828, + "grad_norm": 0.542888398082709, + "learning_rate": 1.857849517472382e-05, + "loss": 0.4804, + "step": 11381 + }, + { + "epoch": 0.1966754216201272, + "grad_norm": 1.0296675206948338, + "learning_rate": 1.8578207556852707e-05, + "loss": 0.6327, + "step": 11382 + }, + { + "epoch": 0.19669270113353607, + "grad_norm": 0.7859380016530407, + "learning_rate": 1.8577919912114e-05, + "loss": 0.6064, + "step": 11383 + }, + { + "epoch": 0.19670998064694498, + "grad_norm": 1.0555471252225699, + "learning_rate": 1.85776322405086e-05, + "loss": 0.7064, + "step": 11384 + }, + { + "epoch": 0.19672726016035388, + "grad_norm": 1.3502454647036572, + "learning_rate": 1.85773445420374e-05, + "loss": 0.5928, + "step": 11385 + }, + { + "epoch": 0.1967445396737628, + "grad_norm": 1.2801105338180403, + "learning_rate": 1.8577056816701306e-05, + "loss": 0.7448, + "step": 11386 + }, + { + "epoch": 0.1967618191871717, + "grad_norm": 0.7074950108060155, + "learning_rate": 1.8576769064501217e-05, + "loss": 0.4685, + "step": 11387 + }, + { + "epoch": 0.19677909870058058, + "grad_norm": 0.4167913430775289, + "learning_rate": 1.8576481285438033e-05, + "loss": 0.6419, + "step": 11388 + }, + { + "epoch": 0.1967963782139895, + "grad_norm": 0.4158855359426039, + "learning_rate": 1.8576193479512664e-05, + "loss": 0.5973, + "step": 11389 + }, + { + "epoch": 0.1968136577273984, + "grad_norm": 0.943860783518415, + "learning_rate": 1.8575905646726003e-05, + "loss": 0.4495, + "step": 11390 + }, + { + "epoch": 0.1968309372408073, + "grad_norm": 1.368548577595285, + "learning_rate": 1.857561778707895e-05, + "loss": 0.7483, + "step": 11391 + }, + { + "epoch": 0.1968482167542162, + "grad_norm": 1.0346115350174108, + "learning_rate": 1.8575329900572417e-05, + "loss": 0.683, + "step": 11392 + }, + { + "epoch": 0.19686549626762512, + "grad_norm": 0.9249694577895844, + "learning_rate": 1.8575041987207294e-05, + "loss": 0.6145, + "step": 11393 + }, + { + "epoch": 0.196882775781034, + "grad_norm": 0.7214220704569453, + "learning_rate": 1.857475404698449e-05, + "loss": 0.5909, + "step": 11394 + }, + { + "epoch": 0.1969000552944429, + "grad_norm": 1.4014088565049385, + "learning_rate": 1.8574466079904904e-05, + "loss": 0.738, + "step": 11395 + }, + { + "epoch": 0.1969173348078518, + "grad_norm": 1.2127658842117859, + "learning_rate": 1.857417808596944e-05, + "loss": 0.6577, + "step": 11396 + }, + { + "epoch": 0.19693461432126072, + "grad_norm": 0.6453214696560429, + "learning_rate": 1.8573890065178997e-05, + "loss": 0.5709, + "step": 11397 + }, + { + "epoch": 0.19695189383466963, + "grad_norm": 1.170765288002417, + "learning_rate": 1.8573602017534476e-05, + "loss": 0.6798, + "step": 11398 + }, + { + "epoch": 0.1969691733480785, + "grad_norm": 1.2209004437023963, + "learning_rate": 1.8573313943036786e-05, + "loss": 0.5662, + "step": 11399 + }, + { + "epoch": 0.19698645286148742, + "grad_norm": 0.9787067818779777, + "learning_rate": 1.8573025841686823e-05, + "loss": 0.6174, + "step": 11400 + }, + { + "epoch": 0.19700373237489632, + "grad_norm": 1.4704215196583001, + "learning_rate": 1.8572737713485495e-05, + "loss": 0.6559, + "step": 11401 + }, + { + "epoch": 0.19702101188830523, + "grad_norm": 0.7918109737540183, + "learning_rate": 1.8572449558433697e-05, + "loss": 0.5761, + "step": 11402 + }, + { + "epoch": 0.19703829140171414, + "grad_norm": 0.6338656472709797, + "learning_rate": 1.8572161376532337e-05, + "loss": 0.6062, + "step": 11403 + }, + { + "epoch": 0.19705557091512302, + "grad_norm": 1.0086168313345956, + "learning_rate": 1.8571873167782316e-05, + "loss": 0.552, + "step": 11404 + }, + { + "epoch": 0.19707285042853193, + "grad_norm": 1.055103499811031, + "learning_rate": 1.857158493218454e-05, + "loss": 0.6327, + "step": 11405 + }, + { + "epoch": 0.19709012994194083, + "grad_norm": 0.7696619425257432, + "learning_rate": 1.8571296669739904e-05, + "loss": 0.4302, + "step": 11406 + }, + { + "epoch": 0.19710740945534974, + "grad_norm": 1.5139788012672222, + "learning_rate": 1.8571008380449315e-05, + "loss": 0.6963, + "step": 11407 + }, + { + "epoch": 0.19712468896875865, + "grad_norm": 1.2283165252395576, + "learning_rate": 1.857072006431368e-05, + "loss": 0.6478, + "step": 11408 + }, + { + "epoch": 0.19714196848216753, + "grad_norm": 1.118937158620546, + "learning_rate": 1.8570431721333894e-05, + "loss": 0.4736, + "step": 11409 + }, + { + "epoch": 0.19715924799557644, + "grad_norm": 1.058468200749685, + "learning_rate": 1.8570143351510868e-05, + "loss": 0.5739, + "step": 11410 + }, + { + "epoch": 0.19717652750898534, + "grad_norm": 1.049626682224361, + "learning_rate": 1.8569854954845504e-05, + "loss": 0.6315, + "step": 11411 + }, + { + "epoch": 0.19719380702239425, + "grad_norm": 0.49025950687418374, + "learning_rate": 1.8569566531338695e-05, + "loss": 0.8706, + "step": 11412 + }, + { + "epoch": 0.19721108653580316, + "grad_norm": 1.2955217174488296, + "learning_rate": 1.856927808099136e-05, + "loss": 0.6614, + "step": 11413 + }, + { + "epoch": 0.19722836604921207, + "grad_norm": 1.251308843433828, + "learning_rate": 1.8568989603804392e-05, + "loss": 0.707, + "step": 11414 + }, + { + "epoch": 0.19724564556262095, + "grad_norm": 1.6085529169681188, + "learning_rate": 1.85687010997787e-05, + "loss": 0.614, + "step": 11415 + }, + { + "epoch": 0.19726292507602985, + "grad_norm": 0.6723074404714322, + "learning_rate": 1.856841256891518e-05, + "loss": 0.5634, + "step": 11416 + }, + { + "epoch": 0.19728020458943876, + "grad_norm": 1.2304584988042424, + "learning_rate": 1.856812401121474e-05, + "loss": 0.5691, + "step": 11417 + }, + { + "epoch": 0.19729748410284767, + "grad_norm": 0.8505139003459561, + "learning_rate": 1.8567835426678293e-05, + "loss": 0.7187, + "step": 11418 + }, + { + "epoch": 0.19731476361625658, + "grad_norm": 0.9822480437261547, + "learning_rate": 1.8567546815306726e-05, + "loss": 0.6192, + "step": 11419 + }, + { + "epoch": 0.19733204312966546, + "grad_norm": 1.5163668345652166, + "learning_rate": 1.8567258177100955e-05, + "loss": 0.6359, + "step": 11420 + }, + { + "epoch": 0.19734932264307437, + "grad_norm": 1.2754402284735893, + "learning_rate": 1.8566969512061878e-05, + "loss": 0.5789, + "step": 11421 + }, + { + "epoch": 0.19736660215648327, + "grad_norm": 0.8646150621278137, + "learning_rate": 1.8566680820190404e-05, + "loss": 0.4499, + "step": 11422 + }, + { + "epoch": 0.19738388166989218, + "grad_norm": 1.1837103096031223, + "learning_rate": 1.8566392101487433e-05, + "loss": 0.4382, + "step": 11423 + }, + { + "epoch": 0.1974011611833011, + "grad_norm": 0.6964748167559333, + "learning_rate": 1.856610335595387e-05, + "loss": 0.5616, + "step": 11424 + }, + { + "epoch": 0.19741844069670997, + "grad_norm": 0.7289331903320261, + "learning_rate": 1.8565814583590626e-05, + "loss": 0.5433, + "step": 11425 + }, + { + "epoch": 0.19743572021011888, + "grad_norm": 0.8809984286138806, + "learning_rate": 1.8565525784398593e-05, + "loss": 0.5032, + "step": 11426 + }, + { + "epoch": 0.19745299972352778, + "grad_norm": 0.7303925358794462, + "learning_rate": 1.8565236958378686e-05, + "loss": 0.4415, + "step": 11427 + }, + { + "epoch": 0.1974702792369367, + "grad_norm": 0.7317110265693008, + "learning_rate": 1.8564948105531805e-05, + "loss": 0.5163, + "step": 11428 + }, + { + "epoch": 0.1974875587503456, + "grad_norm": 0.9888566268072098, + "learning_rate": 1.8564659225858852e-05, + "loss": 0.6019, + "step": 11429 + }, + { + "epoch": 0.1975048382637545, + "grad_norm": 0.9563673159602093, + "learning_rate": 1.856437031936074e-05, + "loss": 0.461, + "step": 11430 + }, + { + "epoch": 0.1975221177771634, + "grad_norm": 0.4814619835923934, + "learning_rate": 1.8564081386038368e-05, + "loss": 0.7287, + "step": 11431 + }, + { + "epoch": 0.1975393972905723, + "grad_norm": 1.0502825187830516, + "learning_rate": 1.8563792425892643e-05, + "loss": 0.4626, + "step": 11432 + }, + { + "epoch": 0.1975566768039812, + "grad_norm": 1.1294937252182333, + "learning_rate": 1.8563503438924465e-05, + "loss": 0.6285, + "step": 11433 + }, + { + "epoch": 0.1975739563173901, + "grad_norm": 1.5588307409732547, + "learning_rate": 1.8563214425134747e-05, + "loss": 0.6095, + "step": 11434 + }, + { + "epoch": 0.19759123583079902, + "grad_norm": 0.6950909724525567, + "learning_rate": 1.8562925384524393e-05, + "loss": 0.5042, + "step": 11435 + }, + { + "epoch": 0.1976085153442079, + "grad_norm": 1.5011887676506308, + "learning_rate": 1.8562636317094305e-05, + "loss": 0.4665, + "step": 11436 + }, + { + "epoch": 0.1976257948576168, + "grad_norm": 0.7328255319057869, + "learning_rate": 1.856234722284539e-05, + "loss": 0.4609, + "step": 11437 + }, + { + "epoch": 0.1976430743710257, + "grad_norm": 0.37767120756831823, + "learning_rate": 1.856205810177855e-05, + "loss": 0.6297, + "step": 11438 + }, + { + "epoch": 0.19766035388443462, + "grad_norm": 1.4037748193476407, + "learning_rate": 1.856176895389469e-05, + "loss": 0.4659, + "step": 11439 + }, + { + "epoch": 0.19767763339784353, + "grad_norm": 1.7624695336391858, + "learning_rate": 1.8561479779194723e-05, + "loss": 0.7915, + "step": 11440 + }, + { + "epoch": 0.1976949129112524, + "grad_norm": 0.903919930016482, + "learning_rate": 1.856119057767955e-05, + "loss": 0.6066, + "step": 11441 + }, + { + "epoch": 0.19771219242466131, + "grad_norm": 1.1225060763574863, + "learning_rate": 1.856090134935008e-05, + "loss": 0.7544, + "step": 11442 + }, + { + "epoch": 0.19772947193807022, + "grad_norm": 1.1529044474814485, + "learning_rate": 1.8560612094207214e-05, + "loss": 0.5693, + "step": 11443 + }, + { + "epoch": 0.19774675145147913, + "grad_norm": 1.1540787692671355, + "learning_rate": 1.856032281225186e-05, + "loss": 0.4309, + "step": 11444 + }, + { + "epoch": 0.19776403096488804, + "grad_norm": 0.6801254387268704, + "learning_rate": 1.8560033503484924e-05, + "loss": 0.6782, + "step": 11445 + }, + { + "epoch": 0.19778131047829692, + "grad_norm": 0.9101079961992583, + "learning_rate": 1.8559744167907316e-05, + "loss": 0.4685, + "step": 11446 + }, + { + "epoch": 0.19779858999170583, + "grad_norm": 0.428875045326553, + "learning_rate": 1.8559454805519935e-05, + "loss": 0.9787, + "step": 11447 + }, + { + "epoch": 0.19781586950511473, + "grad_norm": 0.3833798964316069, + "learning_rate": 1.8559165416323695e-05, + "loss": 0.6411, + "step": 11448 + }, + { + "epoch": 0.19783314901852364, + "grad_norm": 1.087644642895304, + "learning_rate": 1.8558876000319495e-05, + "loss": 0.6488, + "step": 11449 + }, + { + "epoch": 0.19785042853193255, + "grad_norm": 1.1429667145546107, + "learning_rate": 1.8558586557508243e-05, + "loss": 0.6322, + "step": 11450 + }, + { + "epoch": 0.19786770804534146, + "grad_norm": 0.9392193561571075, + "learning_rate": 1.8558297087890852e-05, + "loss": 0.4779, + "step": 11451 + }, + { + "epoch": 0.19788498755875034, + "grad_norm": 0.5395110059152096, + "learning_rate": 1.8558007591468223e-05, + "loss": 0.4094, + "step": 11452 + }, + { + "epoch": 0.19790226707215924, + "grad_norm": 0.7490486314023982, + "learning_rate": 1.8557718068241263e-05, + "loss": 0.3779, + "step": 11453 + }, + { + "epoch": 0.19791954658556815, + "grad_norm": 1.1359582724520187, + "learning_rate": 1.8557428518210877e-05, + "loss": 0.4799, + "step": 11454 + }, + { + "epoch": 0.19793682609897706, + "grad_norm": 1.0681036505586188, + "learning_rate": 1.855713894137798e-05, + "loss": 0.5182, + "step": 11455 + }, + { + "epoch": 0.19795410561238597, + "grad_norm": 0.7432896573658179, + "learning_rate": 1.8556849337743474e-05, + "loss": 0.6335, + "step": 11456 + }, + { + "epoch": 0.19797138512579485, + "grad_norm": 1.393113873779193, + "learning_rate": 1.855655970730826e-05, + "loss": 0.4834, + "step": 11457 + }, + { + "epoch": 0.19798866463920375, + "grad_norm": 0.9287135052693628, + "learning_rate": 1.8556270050073256e-05, + "loss": 0.5131, + "step": 11458 + }, + { + "epoch": 0.19800594415261266, + "grad_norm": 0.782283779642612, + "learning_rate": 1.8555980366039357e-05, + "loss": 0.5329, + "step": 11459 + }, + { + "epoch": 0.19802322366602157, + "grad_norm": 1.479678206768957, + "learning_rate": 1.855569065520748e-05, + "loss": 0.6509, + "step": 11460 + }, + { + "epoch": 0.19804050317943048, + "grad_norm": 0.6991431537025248, + "learning_rate": 1.8555400917578535e-05, + "loss": 0.452, + "step": 11461 + }, + { + "epoch": 0.19805778269283936, + "grad_norm": 1.4777366248623496, + "learning_rate": 1.855511115315342e-05, + "loss": 0.4824, + "step": 11462 + }, + { + "epoch": 0.19807506220624826, + "grad_norm": 0.9820918934917628, + "learning_rate": 1.8554821361933046e-05, + "loss": 0.5984, + "step": 11463 + }, + { + "epoch": 0.19809234171965717, + "grad_norm": 1.850658298588655, + "learning_rate": 1.8554531543918322e-05, + "loss": 0.781, + "step": 11464 + }, + { + "epoch": 0.19810962123306608, + "grad_norm": 1.2936608594997112, + "learning_rate": 1.8554241699110157e-05, + "loss": 0.5714, + "step": 11465 + }, + { + "epoch": 0.198126900746475, + "grad_norm": 1.1422062710427645, + "learning_rate": 1.8553951827509453e-05, + "loss": 0.5831, + "step": 11466 + }, + { + "epoch": 0.1981441802598839, + "grad_norm": 1.2274673986843323, + "learning_rate": 1.8553661929117126e-05, + "loss": 0.6881, + "step": 11467 + }, + { + "epoch": 0.19816145977329278, + "grad_norm": 0.4646920887298064, + "learning_rate": 1.855337200393408e-05, + "loss": 0.7644, + "step": 11468 + }, + { + "epoch": 0.19817873928670168, + "grad_norm": 1.3161325167067452, + "learning_rate": 1.8553082051961217e-05, + "loss": 0.6663, + "step": 11469 + }, + { + "epoch": 0.1981960188001106, + "grad_norm": 0.9813671196574718, + "learning_rate": 1.8552792073199457e-05, + "loss": 0.6062, + "step": 11470 + }, + { + "epoch": 0.1982132983135195, + "grad_norm": 1.2351330848650368, + "learning_rate": 1.85525020676497e-05, + "loss": 0.5992, + "step": 11471 + }, + { + "epoch": 0.1982305778269284, + "grad_norm": 1.0401033248089828, + "learning_rate": 1.8552212035312856e-05, + "loss": 0.7451, + "step": 11472 + }, + { + "epoch": 0.19824785734033729, + "grad_norm": 1.1451823946948498, + "learning_rate": 1.8551921976189834e-05, + "loss": 0.6132, + "step": 11473 + }, + { + "epoch": 0.1982651368537462, + "grad_norm": 0.9277536729069857, + "learning_rate": 1.8551631890281543e-05, + "loss": 0.5634, + "step": 11474 + }, + { + "epoch": 0.1982824163671551, + "grad_norm": 1.0540226574165616, + "learning_rate": 1.8551341777588892e-05, + "loss": 0.5902, + "step": 11475 + }, + { + "epoch": 0.198299695880564, + "grad_norm": 0.8090898201386331, + "learning_rate": 1.8551051638112784e-05, + "loss": 0.5042, + "step": 11476 + }, + { + "epoch": 0.19831697539397292, + "grad_norm": 1.365814391438844, + "learning_rate": 1.855076147185414e-05, + "loss": 0.5433, + "step": 11477 + }, + { + "epoch": 0.1983342549073818, + "grad_norm": 1.2194159473959911, + "learning_rate": 1.8550471278813856e-05, + "loss": 0.702, + "step": 11478 + }, + { + "epoch": 0.1983515344207907, + "grad_norm": 0.8621538744985254, + "learning_rate": 1.855018105899285e-05, + "loss": 0.863, + "step": 11479 + }, + { + "epoch": 0.1983688139341996, + "grad_norm": 0.9196066932675269, + "learning_rate": 1.8549890812392023e-05, + "loss": 0.6583, + "step": 11480 + }, + { + "epoch": 0.19838609344760852, + "grad_norm": 0.8413076613442637, + "learning_rate": 1.854960053901229e-05, + "loss": 0.5002, + "step": 11481 + }, + { + "epoch": 0.19840337296101743, + "grad_norm": 0.5506416544187552, + "learning_rate": 1.8549310238854556e-05, + "loss": 0.298, + "step": 11482 + }, + { + "epoch": 0.1984206524744263, + "grad_norm": 0.990750828405168, + "learning_rate": 1.8549019911919733e-05, + "loss": 0.6126, + "step": 11483 + }, + { + "epoch": 0.19843793198783521, + "grad_norm": 0.8821645061829521, + "learning_rate": 1.8548729558208736e-05, + "loss": 0.5733, + "step": 11484 + }, + { + "epoch": 0.19845521150124412, + "grad_norm": 1.0398815015261862, + "learning_rate": 1.854843917772246e-05, + "loss": 0.5271, + "step": 11485 + }, + { + "epoch": 0.19847249101465303, + "grad_norm": 1.0081849757351764, + "learning_rate": 1.854814877046183e-05, + "loss": 0.5769, + "step": 11486 + }, + { + "epoch": 0.19848977052806194, + "grad_norm": 1.4648901256418416, + "learning_rate": 1.8547858336427745e-05, + "loss": 0.6624, + "step": 11487 + }, + { + "epoch": 0.19850705004147084, + "grad_norm": 0.7753993419009333, + "learning_rate": 1.8547567875621117e-05, + "loss": 0.2981, + "step": 11488 + }, + { + "epoch": 0.19852432955487972, + "grad_norm": 0.7978753789781694, + "learning_rate": 1.854727738804286e-05, + "loss": 0.4577, + "step": 11489 + }, + { + "epoch": 0.19854160906828863, + "grad_norm": 1.1670677874800537, + "learning_rate": 1.8546986873693877e-05, + "loss": 0.6546, + "step": 11490 + }, + { + "epoch": 0.19855888858169754, + "grad_norm": 0.6757121202454844, + "learning_rate": 1.8546696332575085e-05, + "loss": 0.5912, + "step": 11491 + }, + { + "epoch": 0.19857616809510645, + "grad_norm": 1.2087916085543144, + "learning_rate": 1.854640576468739e-05, + "loss": 0.6703, + "step": 11492 + }, + { + "epoch": 0.19859344760851536, + "grad_norm": 1.545534391869337, + "learning_rate": 1.85461151700317e-05, + "loss": 0.9009, + "step": 11493 + }, + { + "epoch": 0.19861072712192424, + "grad_norm": 0.6735642920418654, + "learning_rate": 1.854582454860893e-05, + "loss": 0.4123, + "step": 11494 + }, + { + "epoch": 0.19862800663533314, + "grad_norm": 1.4856034116099326, + "learning_rate": 1.8545533900419985e-05, + "loss": 0.6178, + "step": 11495 + }, + { + "epoch": 0.19864528614874205, + "grad_norm": 1.5008230675857244, + "learning_rate": 1.8545243225465782e-05, + "loss": 0.6834, + "step": 11496 + }, + { + "epoch": 0.19866256566215096, + "grad_norm": 1.2038935576287055, + "learning_rate": 1.8544952523747226e-05, + "loss": 0.4729, + "step": 11497 + }, + { + "epoch": 0.19867984517555987, + "grad_norm": 1.6646381286616705, + "learning_rate": 1.854466179526523e-05, + "loss": 0.5193, + "step": 11498 + }, + { + "epoch": 0.19869712468896875, + "grad_norm": 1.3948601066362505, + "learning_rate": 1.8544371040020706e-05, + "loss": 0.5394, + "step": 11499 + }, + { + "epoch": 0.19871440420237765, + "grad_norm": 1.3184478526288228, + "learning_rate": 1.854408025801456e-05, + "loss": 0.3707, + "step": 11500 + }, + { + "epoch": 0.19873168371578656, + "grad_norm": 0.865276784919097, + "learning_rate": 1.8543789449247705e-05, + "loss": 0.58, + "step": 11501 + }, + { + "epoch": 0.19874896322919547, + "grad_norm": 1.17004699876306, + "learning_rate": 1.8543498613721053e-05, + "loss": 0.6789, + "step": 11502 + }, + { + "epoch": 0.19876624274260438, + "grad_norm": 0.7591836824902786, + "learning_rate": 1.8543207751435513e-05, + "loss": 0.558, + "step": 11503 + }, + { + "epoch": 0.19878352225601328, + "grad_norm": 1.4180935579137106, + "learning_rate": 1.8542916862391998e-05, + "loss": 0.7571, + "step": 11504 + }, + { + "epoch": 0.19880080176942216, + "grad_norm": 0.9429990316191514, + "learning_rate": 1.854262594659142e-05, + "loss": 0.4561, + "step": 11505 + }, + { + "epoch": 0.19881808128283107, + "grad_norm": 1.0962265712859887, + "learning_rate": 1.8542335004034685e-05, + "loss": 0.7636, + "step": 11506 + }, + { + "epoch": 0.19883536079623998, + "grad_norm": 1.0364842545121713, + "learning_rate": 1.854204403472271e-05, + "loss": 0.6019, + "step": 11507 + }, + { + "epoch": 0.1988526403096489, + "grad_norm": 1.0788824432903512, + "learning_rate": 1.8541753038656402e-05, + "loss": 0.487, + "step": 11508 + }, + { + "epoch": 0.1988699198230578, + "grad_norm": 1.234232072479849, + "learning_rate": 1.8541462015836675e-05, + "loss": 0.5938, + "step": 11509 + }, + { + "epoch": 0.19888719933646667, + "grad_norm": 1.087625890983638, + "learning_rate": 1.8541170966264438e-05, + "loss": 0.4364, + "step": 11510 + }, + { + "epoch": 0.19890447884987558, + "grad_norm": 1.0645211789412579, + "learning_rate": 1.8540879889940607e-05, + "loss": 0.6296, + "step": 11511 + }, + { + "epoch": 0.1989217583632845, + "grad_norm": 1.2505539509807149, + "learning_rate": 1.854058878686609e-05, + "loss": 0.7254, + "step": 11512 + }, + { + "epoch": 0.1989390378766934, + "grad_norm": 0.861568295665428, + "learning_rate": 1.8540297657041805e-05, + "loss": 0.4408, + "step": 11513 + }, + { + "epoch": 0.1989563173901023, + "grad_norm": 1.696571070267252, + "learning_rate": 1.8540006500468653e-05, + "loss": 0.6516, + "step": 11514 + }, + { + "epoch": 0.19897359690351119, + "grad_norm": 1.3378626865101102, + "learning_rate": 1.8539715317147552e-05, + "loss": 0.6612, + "step": 11515 + }, + { + "epoch": 0.1989908764169201, + "grad_norm": 1.1738353689596464, + "learning_rate": 1.8539424107079415e-05, + "loss": 0.5027, + "step": 11516 + }, + { + "epoch": 0.199008155930329, + "grad_norm": 1.183919513932262, + "learning_rate": 1.853913287026515e-05, + "loss": 0.6126, + "step": 11517 + }, + { + "epoch": 0.1990254354437379, + "grad_norm": 0.7177925684171237, + "learning_rate": 1.8538841606705676e-05, + "loss": 0.4572, + "step": 11518 + }, + { + "epoch": 0.19904271495714682, + "grad_norm": 0.9268457851832604, + "learning_rate": 1.85385503164019e-05, + "loss": 0.677, + "step": 11519 + }, + { + "epoch": 0.1990599944705557, + "grad_norm": 1.2651959641537875, + "learning_rate": 1.8538258999354733e-05, + "loss": 0.6695, + "step": 11520 + }, + { + "epoch": 0.1990772739839646, + "grad_norm": 0.8887275245553001, + "learning_rate": 1.853796765556509e-05, + "loss": 0.764, + "step": 11521 + }, + { + "epoch": 0.1990945534973735, + "grad_norm": 1.7894486795774838, + "learning_rate": 1.8537676285033886e-05, + "loss": 0.5985, + "step": 11522 + }, + { + "epoch": 0.19911183301078242, + "grad_norm": 1.1377377715316597, + "learning_rate": 1.853738488776203e-05, + "loss": 0.6748, + "step": 11523 + }, + { + "epoch": 0.19912911252419133, + "grad_norm": 1.3475985594409272, + "learning_rate": 1.8537093463750437e-05, + "loss": 0.6294, + "step": 11524 + }, + { + "epoch": 0.19914639203760023, + "grad_norm": 0.9428689676484052, + "learning_rate": 1.853680201300002e-05, + "loss": 0.698, + "step": 11525 + }, + { + "epoch": 0.1991636715510091, + "grad_norm": 0.737872190513295, + "learning_rate": 1.8536510535511683e-05, + "loss": 0.53, + "step": 11526 + }, + { + "epoch": 0.19918095106441802, + "grad_norm": 0.7058339637290036, + "learning_rate": 1.8536219031286354e-05, + "loss": 0.5063, + "step": 11527 + }, + { + "epoch": 0.19919823057782693, + "grad_norm": 0.8535308542817477, + "learning_rate": 1.8535927500324934e-05, + "loss": 0.4381, + "step": 11528 + }, + { + "epoch": 0.19921551009123584, + "grad_norm": 1.1821605527248664, + "learning_rate": 1.8535635942628342e-05, + "loss": 0.5532, + "step": 11529 + }, + { + "epoch": 0.19923278960464474, + "grad_norm": 0.7270715687150786, + "learning_rate": 1.853534435819749e-05, + "loss": 0.3376, + "step": 11530 + }, + { + "epoch": 0.19925006911805362, + "grad_norm": 1.1907666274704019, + "learning_rate": 1.8535052747033288e-05, + "loss": 0.6119, + "step": 11531 + }, + { + "epoch": 0.19926734863146253, + "grad_norm": 0.9091062569310541, + "learning_rate": 1.8534761109136655e-05, + "loss": 0.7271, + "step": 11532 + }, + { + "epoch": 0.19928462814487144, + "grad_norm": 0.935239488131879, + "learning_rate": 1.85344694445085e-05, + "loss": 0.5013, + "step": 11533 + }, + { + "epoch": 0.19930190765828035, + "grad_norm": 0.8529243453422258, + "learning_rate": 1.853417775314974e-05, + "loss": 0.4384, + "step": 11534 + }, + { + "epoch": 0.19931918717168925, + "grad_norm": 0.6693302432879026, + "learning_rate": 1.8533886035061286e-05, + "loss": 0.6766, + "step": 11535 + }, + { + "epoch": 0.19933646668509813, + "grad_norm": 0.8635586971005167, + "learning_rate": 1.853359429024405e-05, + "loss": 0.5194, + "step": 11536 + }, + { + "epoch": 0.19935374619850704, + "grad_norm": 0.9180855771770784, + "learning_rate": 1.853330251869895e-05, + "loss": 0.6572, + "step": 11537 + }, + { + "epoch": 0.19937102571191595, + "grad_norm": 0.8284548367154042, + "learning_rate": 1.85330107204269e-05, + "loss": 0.5027, + "step": 11538 + }, + { + "epoch": 0.19938830522532486, + "grad_norm": 1.0741242300719889, + "learning_rate": 1.8532718895428807e-05, + "loss": 0.5488, + "step": 11539 + }, + { + "epoch": 0.19940558473873377, + "grad_norm": 0.43450403924647885, + "learning_rate": 1.8532427043705595e-05, + "loss": 0.5967, + "step": 11540 + }, + { + "epoch": 0.19942286425214267, + "grad_norm": 1.3498276213601272, + "learning_rate": 1.8532135165258172e-05, + "loss": 0.6855, + "step": 11541 + }, + { + "epoch": 0.19944014376555155, + "grad_norm": 0.8796568284231935, + "learning_rate": 1.853184326008745e-05, + "loss": 0.4194, + "step": 11542 + }, + { + "epoch": 0.19945742327896046, + "grad_norm": 1.4992613006181517, + "learning_rate": 1.8531551328194347e-05, + "loss": 0.6426, + "step": 11543 + }, + { + "epoch": 0.19947470279236937, + "grad_norm": 1.650900014707884, + "learning_rate": 1.8531259369579778e-05, + "loss": 0.5143, + "step": 11544 + }, + { + "epoch": 0.19949198230577828, + "grad_norm": 0.9876134356213342, + "learning_rate": 1.853096738424466e-05, + "loss": 0.7831, + "step": 11545 + }, + { + "epoch": 0.19950926181918718, + "grad_norm": 1.2450793115089192, + "learning_rate": 1.8530675372189903e-05, + "loss": 0.7047, + "step": 11546 + }, + { + "epoch": 0.19952654133259606, + "grad_norm": 0.69224628016206, + "learning_rate": 1.853038333341642e-05, + "loss": 0.5034, + "step": 11547 + }, + { + "epoch": 0.19954382084600497, + "grad_norm": 0.7803101863703359, + "learning_rate": 1.8530091267925126e-05, + "loss": 0.5865, + "step": 11548 + }, + { + "epoch": 0.19956110035941388, + "grad_norm": 1.210544652316521, + "learning_rate": 1.8529799175716938e-05, + "loss": 0.6003, + "step": 11549 + }, + { + "epoch": 0.1995783798728228, + "grad_norm": 1.1141554229707629, + "learning_rate": 1.852950705679277e-05, + "loss": 0.525, + "step": 11550 + }, + { + "epoch": 0.1995956593862317, + "grad_norm": 0.9386856320480662, + "learning_rate": 1.8529214911153545e-05, + "loss": 0.7015, + "step": 11551 + }, + { + "epoch": 0.19961293889964057, + "grad_norm": 0.36867142566863953, + "learning_rate": 1.8528922738800166e-05, + "loss": 0.5804, + "step": 11552 + }, + { + "epoch": 0.19963021841304948, + "grad_norm": 1.500210752496783, + "learning_rate": 1.852863053973355e-05, + "loss": 0.4663, + "step": 11553 + }, + { + "epoch": 0.1996474979264584, + "grad_norm": 0.9031045396124436, + "learning_rate": 1.8528338313954623e-05, + "loss": 0.5355, + "step": 11554 + }, + { + "epoch": 0.1996647774398673, + "grad_norm": 1.0813818998017035, + "learning_rate": 1.8528046061464287e-05, + "loss": 0.5862, + "step": 11555 + }, + { + "epoch": 0.1996820569532762, + "grad_norm": 0.7709205130622127, + "learning_rate": 1.8527753782263462e-05, + "loss": 0.6142, + "step": 11556 + }, + { + "epoch": 0.19969933646668508, + "grad_norm": 1.569010830473755, + "learning_rate": 1.8527461476353062e-05, + "loss": 0.6257, + "step": 11557 + }, + { + "epoch": 0.199716615980094, + "grad_norm": 1.9841881698810797, + "learning_rate": 1.852716914373401e-05, + "loss": 0.6687, + "step": 11558 + }, + { + "epoch": 0.1997338954935029, + "grad_norm": 1.1715125491498053, + "learning_rate": 1.8526876784407215e-05, + "loss": 0.4937, + "step": 11559 + }, + { + "epoch": 0.1997511750069118, + "grad_norm": 1.18655596949317, + "learning_rate": 1.852658439837359e-05, + "loss": 0.2932, + "step": 11560 + }, + { + "epoch": 0.19976845452032072, + "grad_norm": 1.4118847137689972, + "learning_rate": 1.8526291985634058e-05, + "loss": 0.5183, + "step": 11561 + }, + { + "epoch": 0.19978573403372962, + "grad_norm": 1.4736782916565376, + "learning_rate": 1.852599954618953e-05, + "loss": 0.4984, + "step": 11562 + }, + { + "epoch": 0.1998030135471385, + "grad_norm": 0.8178230571473379, + "learning_rate": 1.8525707080040924e-05, + "loss": 0.5468, + "step": 11563 + }, + { + "epoch": 0.1998202930605474, + "grad_norm": 1.0215473343469939, + "learning_rate": 1.852541458718916e-05, + "loss": 0.5992, + "step": 11564 + }, + { + "epoch": 0.19983757257395632, + "grad_norm": 1.2843536342130162, + "learning_rate": 1.8525122067635144e-05, + "loss": 0.6584, + "step": 11565 + }, + { + "epoch": 0.19985485208736523, + "grad_norm": 1.2203246473802905, + "learning_rate": 1.85248295213798e-05, + "loss": 0.5745, + "step": 11566 + }, + { + "epoch": 0.19987213160077413, + "grad_norm": 0.6775146498086785, + "learning_rate": 1.8524536948424038e-05, + "loss": 0.5872, + "step": 11567 + }, + { + "epoch": 0.199889411114183, + "grad_norm": 0.7060465943440375, + "learning_rate": 1.8524244348768782e-05, + "loss": 0.491, + "step": 11568 + }, + { + "epoch": 0.19990669062759192, + "grad_norm": 1.0153591093590977, + "learning_rate": 1.8523951722414946e-05, + "loss": 0.6891, + "step": 11569 + }, + { + "epoch": 0.19992397014100083, + "grad_norm": 0.4687051319117614, + "learning_rate": 1.852365906936344e-05, + "loss": 0.8021, + "step": 11570 + }, + { + "epoch": 0.19994124965440974, + "grad_norm": 1.1385860745484988, + "learning_rate": 1.8523366389615193e-05, + "loss": 0.6436, + "step": 11571 + }, + { + "epoch": 0.19995852916781864, + "grad_norm": 1.1953514897716595, + "learning_rate": 1.8523073683171112e-05, + "loss": 0.5363, + "step": 11572 + }, + { + "epoch": 0.19997580868122752, + "grad_norm": 1.232888835173566, + "learning_rate": 1.8522780950032117e-05, + "loss": 0.5304, + "step": 11573 + }, + { + "epoch": 0.19999308819463643, + "grad_norm": 0.5593531192570033, + "learning_rate": 1.8522488190199125e-05, + "loss": 0.6972, + "step": 11574 + }, + { + "epoch": 0.20001036770804534, + "grad_norm": 0.6952181315403765, + "learning_rate": 1.852219540367305e-05, + "loss": 0.5579, + "step": 11575 + }, + { + "epoch": 0.20002764722145425, + "grad_norm": 0.9782724317704369, + "learning_rate": 1.852190259045481e-05, + "loss": 0.4034, + "step": 11576 + }, + { + "epoch": 0.20004492673486315, + "grad_norm": 0.38360139487067074, + "learning_rate": 1.8521609750545327e-05, + "loss": 0.6251, + "step": 11577 + }, + { + "epoch": 0.20006220624827206, + "grad_norm": 0.9398158476692624, + "learning_rate": 1.8521316883945514e-05, + "loss": 0.5855, + "step": 11578 + }, + { + "epoch": 0.20007948576168094, + "grad_norm": 0.8903369029296196, + "learning_rate": 1.8521023990656285e-05, + "loss": 0.4527, + "step": 11579 + }, + { + "epoch": 0.20009676527508985, + "grad_norm": 1.0278695221554703, + "learning_rate": 1.852073107067857e-05, + "loss": 0.5254, + "step": 11580 + }, + { + "epoch": 0.20011404478849876, + "grad_norm": 1.1297894655492717, + "learning_rate": 1.8520438124013267e-05, + "loss": 0.5255, + "step": 11581 + }, + { + "epoch": 0.20013132430190766, + "grad_norm": 1.8998014999478245, + "learning_rate": 1.8520145150661312e-05, + "loss": 0.6682, + "step": 11582 + }, + { + "epoch": 0.20014860381531657, + "grad_norm": 0.9911065165048079, + "learning_rate": 1.8519852150623614e-05, + "loss": 0.5926, + "step": 11583 + }, + { + "epoch": 0.20016588332872545, + "grad_norm": 0.8452714153423894, + "learning_rate": 1.851955912390109e-05, + "loss": 0.4368, + "step": 11584 + }, + { + "epoch": 0.20018316284213436, + "grad_norm": 1.5999847933780142, + "learning_rate": 1.8519266070494657e-05, + "loss": 0.5794, + "step": 11585 + }, + { + "epoch": 0.20020044235554327, + "grad_norm": 1.1433038364362975, + "learning_rate": 1.8518972990405237e-05, + "loss": 0.5302, + "step": 11586 + }, + { + "epoch": 0.20021772186895218, + "grad_norm": 1.0602734958954558, + "learning_rate": 1.8518679883633748e-05, + "loss": 0.5914, + "step": 11587 + }, + { + "epoch": 0.20023500138236108, + "grad_norm": 1.1171337840261624, + "learning_rate": 1.8518386750181107e-05, + "loss": 0.6076, + "step": 11588 + }, + { + "epoch": 0.20025228089576996, + "grad_norm": 0.8883610125770333, + "learning_rate": 1.851809359004823e-05, + "loss": 0.5685, + "step": 11589 + }, + { + "epoch": 0.20026956040917887, + "grad_norm": 1.1046637044981416, + "learning_rate": 1.8517800403236036e-05, + "loss": 0.6922, + "step": 11590 + }, + { + "epoch": 0.20028683992258778, + "grad_norm": 1.0790113060617774, + "learning_rate": 1.8517507189745442e-05, + "loss": 0.4964, + "step": 11591 + }, + { + "epoch": 0.20030411943599669, + "grad_norm": 1.8918279454497047, + "learning_rate": 1.851721394957737e-05, + "loss": 0.7253, + "step": 11592 + }, + { + "epoch": 0.2003213989494056, + "grad_norm": 0.7870384730545558, + "learning_rate": 1.8516920682732737e-05, + "loss": 0.597, + "step": 11593 + }, + { + "epoch": 0.20033867846281447, + "grad_norm": 0.47681268080786054, + "learning_rate": 1.8516627389212458e-05, + "loss": 0.6989, + "step": 11594 + }, + { + "epoch": 0.20035595797622338, + "grad_norm": 1.2801228029047158, + "learning_rate": 1.8516334069017458e-05, + "loss": 0.581, + "step": 11595 + }, + { + "epoch": 0.2003732374896323, + "grad_norm": 1.1365087436096835, + "learning_rate": 1.8516040722148652e-05, + "loss": 0.725, + "step": 11596 + }, + { + "epoch": 0.2003905170030412, + "grad_norm": 1.2645561985151987, + "learning_rate": 1.8515747348606958e-05, + "loss": 0.489, + "step": 11597 + }, + { + "epoch": 0.2004077965164501, + "grad_norm": 0.5141768454517176, + "learning_rate": 1.8515453948393298e-05, + "loss": 0.663, + "step": 11598 + }, + { + "epoch": 0.200425076029859, + "grad_norm": 0.458880551869916, + "learning_rate": 1.851516052150859e-05, + "loss": 0.5909, + "step": 11599 + }, + { + "epoch": 0.2004423555432679, + "grad_norm": 1.1607537477105825, + "learning_rate": 1.8514867067953748e-05, + "loss": 0.6688, + "step": 11600 + }, + { + "epoch": 0.2004596350566768, + "grad_norm": 1.1605948939169424, + "learning_rate": 1.85145735877297e-05, + "loss": 0.5977, + "step": 11601 + }, + { + "epoch": 0.2004769145700857, + "grad_norm": 0.43419199557596433, + "learning_rate": 1.8514280080837357e-05, + "loss": 0.4545, + "step": 11602 + }, + { + "epoch": 0.20049419408349461, + "grad_norm": 1.4577410459560691, + "learning_rate": 1.851398654727764e-05, + "loss": 0.6492, + "step": 11603 + }, + { + "epoch": 0.20051147359690352, + "grad_norm": 0.941286548983822, + "learning_rate": 1.8513692987051475e-05, + "loss": 0.633, + "step": 11604 + }, + { + "epoch": 0.2005287531103124, + "grad_norm": 0.817965427303748, + "learning_rate": 1.8513399400159776e-05, + "loss": 0.664, + "step": 11605 + }, + { + "epoch": 0.2005460326237213, + "grad_norm": 0.8725331336866968, + "learning_rate": 1.8513105786603463e-05, + "loss": 0.3744, + "step": 11606 + }, + { + "epoch": 0.20056331213713022, + "grad_norm": 1.1510518790194222, + "learning_rate": 1.851281214638345e-05, + "loss": 0.6169, + "step": 11607 + }, + { + "epoch": 0.20058059165053913, + "grad_norm": 0.6755140628133406, + "learning_rate": 1.8512518479500668e-05, + "loss": 0.5641, + "step": 11608 + }, + { + "epoch": 0.20059787116394803, + "grad_norm": 0.699505951910465, + "learning_rate": 1.851222478595603e-05, + "loss": 0.5631, + "step": 11609 + }, + { + "epoch": 0.2006151506773569, + "grad_norm": 1.5629756020994487, + "learning_rate": 1.8511931065750458e-05, + "loss": 0.7016, + "step": 11610 + }, + { + "epoch": 0.20063243019076582, + "grad_norm": 1.0829799240595914, + "learning_rate": 1.851163731888487e-05, + "loss": 0.7262, + "step": 11611 + }, + { + "epoch": 0.20064970970417473, + "grad_norm": 0.6924033825168533, + "learning_rate": 1.8511343545360186e-05, + "loss": 0.4909, + "step": 11612 + }, + { + "epoch": 0.20066698921758364, + "grad_norm": 1.0836804730292091, + "learning_rate": 1.8511049745177327e-05, + "loss": 0.6131, + "step": 11613 + }, + { + "epoch": 0.20068426873099254, + "grad_norm": 1.257250393984529, + "learning_rate": 1.8510755918337217e-05, + "loss": 0.5623, + "step": 11614 + }, + { + "epoch": 0.20070154824440145, + "grad_norm": 0.7439189561161302, + "learning_rate": 1.851046206484077e-05, + "loss": 0.3833, + "step": 11615 + }, + { + "epoch": 0.20071882775781033, + "grad_norm": 0.4729855420039105, + "learning_rate": 1.851016818468891e-05, + "loss": 0.6543, + "step": 11616 + }, + { + "epoch": 0.20073610727121924, + "grad_norm": 0.9377116604165971, + "learning_rate": 1.8509874277882554e-05, + "loss": 0.6289, + "step": 11617 + }, + { + "epoch": 0.20075338678462815, + "grad_norm": 0.8074365675619062, + "learning_rate": 1.850958034442263e-05, + "loss": 0.3676, + "step": 11618 + }, + { + "epoch": 0.20077066629803705, + "grad_norm": 0.9218958147083023, + "learning_rate": 1.8509286384310047e-05, + "loss": 0.3991, + "step": 11619 + }, + { + "epoch": 0.20078794581144596, + "grad_norm": 1.063895529566932, + "learning_rate": 1.850899239754574e-05, + "loss": 0.4864, + "step": 11620 + }, + { + "epoch": 0.20080522532485484, + "grad_norm": 0.8726105772649877, + "learning_rate": 1.8508698384130616e-05, + "loss": 0.5769, + "step": 11621 + }, + { + "epoch": 0.20082250483826375, + "grad_norm": 0.5061450263331665, + "learning_rate": 1.85084043440656e-05, + "loss": 0.4809, + "step": 11622 + }, + { + "epoch": 0.20083978435167266, + "grad_norm": 0.48727833048291147, + "learning_rate": 1.8508110277351622e-05, + "loss": 0.6904, + "step": 11623 + }, + { + "epoch": 0.20085706386508156, + "grad_norm": 1.5293935469278312, + "learning_rate": 1.8507816183989593e-05, + "loss": 0.7527, + "step": 11624 + }, + { + "epoch": 0.20087434337849047, + "grad_norm": 1.6092254948672957, + "learning_rate": 1.8507522063980436e-05, + "loss": 0.6983, + "step": 11625 + }, + { + "epoch": 0.20089162289189935, + "grad_norm": 0.7803225112064767, + "learning_rate": 1.8507227917325076e-05, + "loss": 0.5047, + "step": 11626 + }, + { + "epoch": 0.20090890240530826, + "grad_norm": 1.2824757757483252, + "learning_rate": 1.8506933744024428e-05, + "loss": 0.7223, + "step": 11627 + }, + { + "epoch": 0.20092618191871717, + "grad_norm": 1.809088260972928, + "learning_rate": 1.850663954407942e-05, + "loss": 0.6529, + "step": 11628 + }, + { + "epoch": 0.20094346143212607, + "grad_norm": 0.8151539049796119, + "learning_rate": 1.850634531749097e-05, + "loss": 0.6305, + "step": 11629 + }, + { + "epoch": 0.20096074094553498, + "grad_norm": 1.1951008148057092, + "learning_rate": 1.850605106426e-05, + "loss": 0.5557, + "step": 11630 + }, + { + "epoch": 0.20097802045894386, + "grad_norm": 1.4419536931721086, + "learning_rate": 1.8505756784387432e-05, + "loss": 0.4735, + "step": 11631 + }, + { + "epoch": 0.20099529997235277, + "grad_norm": 0.9296534709186108, + "learning_rate": 1.8505462477874187e-05, + "loss": 0.5929, + "step": 11632 + }, + { + "epoch": 0.20101257948576168, + "grad_norm": 1.2880550653574687, + "learning_rate": 1.8505168144721186e-05, + "loss": 0.6273, + "step": 11633 + }, + { + "epoch": 0.20102985899917059, + "grad_norm": 0.6171695928045854, + "learning_rate": 1.850487378492935e-05, + "loss": 0.3812, + "step": 11634 + }, + { + "epoch": 0.2010471385125795, + "grad_norm": 1.0304643339367492, + "learning_rate": 1.8504579398499608e-05, + "loss": 0.5962, + "step": 11635 + }, + { + "epoch": 0.2010644180259884, + "grad_norm": 0.8263533637441978, + "learning_rate": 1.8504284985432874e-05, + "loss": 0.6812, + "step": 11636 + }, + { + "epoch": 0.20108169753939728, + "grad_norm": 1.1032550612911864, + "learning_rate": 1.8503990545730074e-05, + "loss": 0.5078, + "step": 11637 + }, + { + "epoch": 0.2010989770528062, + "grad_norm": 1.5841756191396663, + "learning_rate": 1.850369607939213e-05, + "loss": 0.6294, + "step": 11638 + }, + { + "epoch": 0.2011162565662151, + "grad_norm": 0.8624396489491664, + "learning_rate": 1.8503401586419963e-05, + "loss": 0.3889, + "step": 11639 + }, + { + "epoch": 0.201133536079624, + "grad_norm": 1.4812422312284643, + "learning_rate": 1.8503107066814492e-05, + "loss": 0.5232, + "step": 11640 + }, + { + "epoch": 0.2011508155930329, + "grad_norm": 0.7626113357766204, + "learning_rate": 1.850281252057665e-05, + "loss": 0.3816, + "step": 11641 + }, + { + "epoch": 0.2011680951064418, + "grad_norm": 1.0683145444127085, + "learning_rate": 1.850251794770735e-05, + "loss": 0.6791, + "step": 11642 + }, + { + "epoch": 0.2011853746198507, + "grad_norm": 1.5963251237360654, + "learning_rate": 1.8502223348207516e-05, + "loss": 0.5168, + "step": 11643 + }, + { + "epoch": 0.2012026541332596, + "grad_norm": 0.8999987542977594, + "learning_rate": 1.8501928722078077e-05, + "loss": 0.4817, + "step": 11644 + }, + { + "epoch": 0.20121993364666851, + "grad_norm": 1.4183746793523797, + "learning_rate": 1.8501634069319945e-05, + "loss": 0.5835, + "step": 11645 + }, + { + "epoch": 0.20123721316007742, + "grad_norm": 1.1686803780859287, + "learning_rate": 1.8501339389934055e-05, + "loss": 0.727, + "step": 11646 + }, + { + "epoch": 0.2012544926734863, + "grad_norm": 1.0220308579275257, + "learning_rate": 1.8501044683921322e-05, + "loss": 0.5711, + "step": 11647 + }, + { + "epoch": 0.2012717721868952, + "grad_norm": 1.0442685489102121, + "learning_rate": 1.8500749951282667e-05, + "loss": 0.583, + "step": 11648 + }, + { + "epoch": 0.20128905170030412, + "grad_norm": 1.4409254800202542, + "learning_rate": 1.8500455192019022e-05, + "loss": 0.7557, + "step": 11649 + }, + { + "epoch": 0.20130633121371302, + "grad_norm": 0.6264799948186509, + "learning_rate": 1.8500160406131305e-05, + "loss": 0.7234, + "step": 11650 + }, + { + "epoch": 0.20132361072712193, + "grad_norm": 0.7598374304295213, + "learning_rate": 1.849986559362044e-05, + "loss": 0.5936, + "step": 11651 + }, + { + "epoch": 0.20134089024053084, + "grad_norm": 0.6593408132961629, + "learning_rate": 1.8499570754487347e-05, + "loss": 0.2703, + "step": 11652 + }, + { + "epoch": 0.20135816975393972, + "grad_norm": 1.1218765444345415, + "learning_rate": 1.849927588873295e-05, + "loss": 0.5579, + "step": 11653 + }, + { + "epoch": 0.20137544926734863, + "grad_norm": 1.1968697016565693, + "learning_rate": 1.849898099635818e-05, + "loss": 0.5651, + "step": 11654 + }, + { + "epoch": 0.20139272878075754, + "grad_norm": 0.8945963372555644, + "learning_rate": 1.8498686077363955e-05, + "loss": 0.7796, + "step": 11655 + }, + { + "epoch": 0.20141000829416644, + "grad_norm": 0.7463944562226793, + "learning_rate": 1.84983911317512e-05, + "loss": 0.4435, + "step": 11656 + }, + { + "epoch": 0.20142728780757535, + "grad_norm": 1.0307483960751478, + "learning_rate": 1.8498096159520835e-05, + "loss": 0.6832, + "step": 11657 + }, + { + "epoch": 0.20144456732098423, + "grad_norm": 0.864161088973285, + "learning_rate": 1.849780116067379e-05, + "loss": 0.5978, + "step": 11658 + }, + { + "epoch": 0.20146184683439314, + "grad_norm": 0.43474822651632583, + "learning_rate": 1.8497506135210982e-05, + "loss": 0.7082, + "step": 11659 + }, + { + "epoch": 0.20147912634780205, + "grad_norm": 1.2641952709428188, + "learning_rate": 1.8497211083133343e-05, + "loss": 0.7038, + "step": 11660 + }, + { + "epoch": 0.20149640586121095, + "grad_norm": 1.0132869997883218, + "learning_rate": 1.8496916004441793e-05, + "loss": 0.5871, + "step": 11661 + }, + { + "epoch": 0.20151368537461986, + "grad_norm": 1.3749280883557131, + "learning_rate": 1.8496620899137254e-05, + "loss": 0.4728, + "step": 11662 + }, + { + "epoch": 0.20153096488802874, + "grad_norm": 0.5454894790893093, + "learning_rate": 1.8496325767220654e-05, + "loss": 0.3773, + "step": 11663 + }, + { + "epoch": 0.20154824440143765, + "grad_norm": 1.236922005528746, + "learning_rate": 1.8496030608692915e-05, + "loss": 0.5476, + "step": 11664 + }, + { + "epoch": 0.20156552391484656, + "grad_norm": 1.110254492402957, + "learning_rate": 1.8495735423554965e-05, + "loss": 0.6032, + "step": 11665 + }, + { + "epoch": 0.20158280342825546, + "grad_norm": 1.2886412275585124, + "learning_rate": 1.8495440211807722e-05, + "loss": 0.7792, + "step": 11666 + }, + { + "epoch": 0.20160008294166437, + "grad_norm": 1.315762645262715, + "learning_rate": 1.849514497345212e-05, + "loss": 0.5588, + "step": 11667 + }, + { + "epoch": 0.20161736245507325, + "grad_norm": 1.3015330591917376, + "learning_rate": 1.8494849708489076e-05, + "loss": 0.514, + "step": 11668 + }, + { + "epoch": 0.20163464196848216, + "grad_norm": 0.6273766984123998, + "learning_rate": 1.8494554416919515e-05, + "loss": 0.5024, + "step": 11669 + }, + { + "epoch": 0.20165192148189107, + "grad_norm": 1.2446620564621136, + "learning_rate": 1.8494259098744365e-05, + "loss": 0.5381, + "step": 11670 + }, + { + "epoch": 0.20166920099529997, + "grad_norm": 1.0304054469606436, + "learning_rate": 1.849396375396455e-05, + "loss": 0.5821, + "step": 11671 + }, + { + "epoch": 0.20168648050870888, + "grad_norm": 0.8324161473096662, + "learning_rate": 1.8493668382580992e-05, + "loss": 0.5735, + "step": 11672 + }, + { + "epoch": 0.2017037600221178, + "grad_norm": 1.3506732485580508, + "learning_rate": 1.8493372984594627e-05, + "loss": 0.5644, + "step": 11673 + }, + { + "epoch": 0.20172103953552667, + "grad_norm": 1.1782629693095643, + "learning_rate": 1.8493077560006368e-05, + "loss": 0.7021, + "step": 11674 + }, + { + "epoch": 0.20173831904893558, + "grad_norm": 0.9436867592119941, + "learning_rate": 1.8492782108817145e-05, + "loss": 0.4621, + "step": 11675 + }, + { + "epoch": 0.20175559856234448, + "grad_norm": 1.5846643099218094, + "learning_rate": 1.849248663102788e-05, + "loss": 0.4926, + "step": 11676 + }, + { + "epoch": 0.2017728780757534, + "grad_norm": 1.2300092965666465, + "learning_rate": 1.8492191126639504e-05, + "loss": 0.6715, + "step": 11677 + }, + { + "epoch": 0.2017901575891623, + "grad_norm": 1.2405302871657498, + "learning_rate": 1.8491895595652943e-05, + "loss": 0.5505, + "step": 11678 + }, + { + "epoch": 0.20180743710257118, + "grad_norm": 1.073506792121608, + "learning_rate": 1.8491600038069114e-05, + "loss": 0.5514, + "step": 11679 + }, + { + "epoch": 0.2018247166159801, + "grad_norm": 1.1426799669321717, + "learning_rate": 1.849130445388895e-05, + "loss": 0.4688, + "step": 11680 + }, + { + "epoch": 0.201841996129389, + "grad_norm": 1.3441238233739783, + "learning_rate": 1.8491008843113375e-05, + "loss": 0.528, + "step": 11681 + }, + { + "epoch": 0.2018592756427979, + "grad_norm": 1.2250493112574115, + "learning_rate": 1.8490713205743318e-05, + "loss": 0.4042, + "step": 11682 + }, + { + "epoch": 0.2018765551562068, + "grad_norm": 1.0663258447324113, + "learning_rate": 1.84904175417797e-05, + "loss": 0.7305, + "step": 11683 + }, + { + "epoch": 0.2018938346696157, + "grad_norm": 0.7416458671102244, + "learning_rate": 1.8490121851223448e-05, + "loss": 0.4612, + "step": 11684 + }, + { + "epoch": 0.2019111141830246, + "grad_norm": 1.2310538519877448, + "learning_rate": 1.848982613407549e-05, + "loss": 0.6758, + "step": 11685 + }, + { + "epoch": 0.2019283936964335, + "grad_norm": 0.7749957825206029, + "learning_rate": 1.848953039033675e-05, + "loss": 0.3979, + "step": 11686 + }, + { + "epoch": 0.2019456732098424, + "grad_norm": 0.6615192305038958, + "learning_rate": 1.848923462000816e-05, + "loss": 0.4401, + "step": 11687 + }, + { + "epoch": 0.20196295272325132, + "grad_norm": 0.9359227299199018, + "learning_rate": 1.848893882309064e-05, + "loss": 0.4567, + "step": 11688 + }, + { + "epoch": 0.20198023223666023, + "grad_norm": 1.2604465650114842, + "learning_rate": 1.848864299958512e-05, + "loss": 0.5776, + "step": 11689 + }, + { + "epoch": 0.2019975117500691, + "grad_norm": 0.7839689964336108, + "learning_rate": 1.8488347149492522e-05, + "loss": 0.7252, + "step": 11690 + }, + { + "epoch": 0.20201479126347802, + "grad_norm": 1.4144214573710967, + "learning_rate": 1.848805127281377e-05, + "loss": 0.7479, + "step": 11691 + }, + { + "epoch": 0.20203207077688692, + "grad_norm": 1.5102134849700024, + "learning_rate": 1.8487755369549805e-05, + "loss": 0.5731, + "step": 11692 + }, + { + "epoch": 0.20204935029029583, + "grad_norm": 1.2598907236759032, + "learning_rate": 1.8487459439701542e-05, + "loss": 0.5335, + "step": 11693 + }, + { + "epoch": 0.20206662980370474, + "grad_norm": 1.0769204059914332, + "learning_rate": 1.8487163483269915e-05, + "loss": 0.6976, + "step": 11694 + }, + { + "epoch": 0.20208390931711362, + "grad_norm": 1.3522633103919488, + "learning_rate": 1.8486867500255843e-05, + "loss": 0.9119, + "step": 11695 + }, + { + "epoch": 0.20210118883052253, + "grad_norm": 1.624329628559573, + "learning_rate": 1.8486571490660258e-05, + "loss": 0.6564, + "step": 11696 + }, + { + "epoch": 0.20211846834393143, + "grad_norm": 1.0489090971247301, + "learning_rate": 1.8486275454484084e-05, + "loss": 0.5559, + "step": 11697 + }, + { + "epoch": 0.20213574785734034, + "grad_norm": 0.9247173920127838, + "learning_rate": 1.8485979391728254e-05, + "loss": 0.4447, + "step": 11698 + }, + { + "epoch": 0.20215302737074925, + "grad_norm": 0.7669776390381883, + "learning_rate": 1.8485683302393688e-05, + "loss": 0.5418, + "step": 11699 + }, + { + "epoch": 0.20217030688415813, + "grad_norm": 0.9342150039647672, + "learning_rate": 1.848538718648132e-05, + "loss": 0.5564, + "step": 11700 + }, + { + "epoch": 0.20218758639756704, + "grad_norm": 0.7248536365007158, + "learning_rate": 1.848509104399207e-05, + "loss": 0.6127, + "step": 11701 + }, + { + "epoch": 0.20220486591097594, + "grad_norm": 1.56727383466266, + "learning_rate": 1.8484794874926874e-05, + "loss": 0.7539, + "step": 11702 + }, + { + "epoch": 0.20222214542438485, + "grad_norm": 1.0360148195214385, + "learning_rate": 1.8484498679286658e-05, + "loss": 0.4701, + "step": 11703 + }, + { + "epoch": 0.20223942493779376, + "grad_norm": 0.8309188184093713, + "learning_rate": 1.848420245707234e-05, + "loss": 0.3698, + "step": 11704 + }, + { + "epoch": 0.20225670445120267, + "grad_norm": 0.9317293205912492, + "learning_rate": 1.8483906208284864e-05, + "loss": 0.4285, + "step": 11705 + }, + { + "epoch": 0.20227398396461155, + "grad_norm": 1.1916517868181542, + "learning_rate": 1.8483609932925142e-05, + "loss": 0.6542, + "step": 11706 + }, + { + "epoch": 0.20229126347802046, + "grad_norm": 1.1371236597645602, + "learning_rate": 1.8483313630994113e-05, + "loss": 0.5086, + "step": 11707 + }, + { + "epoch": 0.20230854299142936, + "grad_norm": 1.2865358705294567, + "learning_rate": 1.84830173024927e-05, + "loss": 0.4581, + "step": 11708 + }, + { + "epoch": 0.20232582250483827, + "grad_norm": 1.119516339252898, + "learning_rate": 1.848272094742183e-05, + "loss": 0.5208, + "step": 11709 + }, + { + "epoch": 0.20234310201824718, + "grad_norm": 1.4234811197738857, + "learning_rate": 1.8482424565782437e-05, + "loss": 0.4567, + "step": 11710 + }, + { + "epoch": 0.20236038153165606, + "grad_norm": 1.8350509699879505, + "learning_rate": 1.848212815757544e-05, + "loss": 0.8929, + "step": 11711 + }, + { + "epoch": 0.20237766104506497, + "grad_norm": 1.170864513834337, + "learning_rate": 1.848183172280178e-05, + "loss": 0.4941, + "step": 11712 + }, + { + "epoch": 0.20239494055847387, + "grad_norm": 1.2488913211744468, + "learning_rate": 1.8481535261462372e-05, + "loss": 0.6841, + "step": 11713 + }, + { + "epoch": 0.20241222007188278, + "grad_norm": 1.6549784717275784, + "learning_rate": 1.8481238773558154e-05, + "loss": 0.7509, + "step": 11714 + }, + { + "epoch": 0.2024294995852917, + "grad_norm": 1.2800516190134226, + "learning_rate": 1.848094225909005e-05, + "loss": 0.6402, + "step": 11715 + }, + { + "epoch": 0.20244677909870057, + "grad_norm": 2.047228326253168, + "learning_rate": 1.8480645718058995e-05, + "loss": 0.6376, + "step": 11716 + }, + { + "epoch": 0.20246405861210948, + "grad_norm": 1.5875583935944746, + "learning_rate": 1.8480349150465907e-05, + "loss": 0.8149, + "step": 11717 + }, + { + "epoch": 0.20248133812551838, + "grad_norm": 1.0663848189092213, + "learning_rate": 1.8480052556311726e-05, + "loss": 0.6791, + "step": 11718 + }, + { + "epoch": 0.2024986176389273, + "grad_norm": 1.200049073423201, + "learning_rate": 1.8479755935597373e-05, + "loss": 0.5089, + "step": 11719 + }, + { + "epoch": 0.2025158971523362, + "grad_norm": 0.8880205422691925, + "learning_rate": 1.8479459288323784e-05, + "loss": 0.6959, + "step": 11720 + }, + { + "epoch": 0.20253317666574508, + "grad_norm": 1.2472967395334906, + "learning_rate": 1.847916261449188e-05, + "loss": 0.5571, + "step": 11721 + }, + { + "epoch": 0.202550456179154, + "grad_norm": 0.9325462203152853, + "learning_rate": 1.8478865914102596e-05, + "loss": 0.5792, + "step": 11722 + }, + { + "epoch": 0.2025677356925629, + "grad_norm": 1.1672385542184622, + "learning_rate": 1.8478569187156858e-05, + "loss": 0.5728, + "step": 11723 + }, + { + "epoch": 0.2025850152059718, + "grad_norm": 0.9950768875763344, + "learning_rate": 1.8478272433655596e-05, + "loss": 0.7621, + "step": 11724 + }, + { + "epoch": 0.2026022947193807, + "grad_norm": 0.9267078399101011, + "learning_rate": 1.8477975653599743e-05, + "loss": 0.6606, + "step": 11725 + }, + { + "epoch": 0.20261957423278962, + "grad_norm": 1.1960474965211978, + "learning_rate": 1.8477678846990224e-05, + "loss": 0.5288, + "step": 11726 + }, + { + "epoch": 0.2026368537461985, + "grad_norm": 1.3291100475231934, + "learning_rate": 1.8477382013827973e-05, + "loss": 0.5664, + "step": 11727 + }, + { + "epoch": 0.2026541332596074, + "grad_norm": 0.8677821098054411, + "learning_rate": 1.8477085154113913e-05, + "loss": 0.499, + "step": 11728 + }, + { + "epoch": 0.2026714127730163, + "grad_norm": 1.1285254572991323, + "learning_rate": 1.8476788267848982e-05, + "loss": 0.6144, + "step": 11729 + }, + { + "epoch": 0.20268869228642522, + "grad_norm": 1.0596353221392687, + "learning_rate": 1.8476491355034104e-05, + "loss": 0.6938, + "step": 11730 + }, + { + "epoch": 0.20270597179983413, + "grad_norm": 0.8388171600836782, + "learning_rate": 1.8476194415670216e-05, + "loss": 0.5237, + "step": 11731 + }, + { + "epoch": 0.202723251313243, + "grad_norm": 1.216746611345208, + "learning_rate": 1.8475897449758234e-05, + "loss": 0.6541, + "step": 11732 + }, + { + "epoch": 0.20274053082665192, + "grad_norm": 1.0501807320666738, + "learning_rate": 1.8475600457299102e-05, + "loss": 0.5488, + "step": 11733 + }, + { + "epoch": 0.20275781034006082, + "grad_norm": 1.5145474527643727, + "learning_rate": 1.8475303438293747e-05, + "loss": 0.6098, + "step": 11734 + }, + { + "epoch": 0.20277508985346973, + "grad_norm": 0.8716759759406171, + "learning_rate": 1.8475006392743097e-05, + "loss": 0.4697, + "step": 11735 + }, + { + "epoch": 0.20279236936687864, + "grad_norm": 0.9766544467080646, + "learning_rate": 1.8474709320648082e-05, + "loss": 0.4601, + "step": 11736 + }, + { + "epoch": 0.20280964888028752, + "grad_norm": 1.480623423836775, + "learning_rate": 1.847441222200963e-05, + "loss": 0.5868, + "step": 11737 + }, + { + "epoch": 0.20282692839369643, + "grad_norm": 0.6347074506582283, + "learning_rate": 1.8474115096828677e-05, + "loss": 0.5032, + "step": 11738 + }, + { + "epoch": 0.20284420790710533, + "grad_norm": 0.92009155058051, + "learning_rate": 1.8473817945106155e-05, + "loss": 0.5493, + "step": 11739 + }, + { + "epoch": 0.20286148742051424, + "grad_norm": 0.7896437663752826, + "learning_rate": 1.847352076684299e-05, + "loss": 0.5952, + "step": 11740 + }, + { + "epoch": 0.20287876693392315, + "grad_norm": 1.0719935967771674, + "learning_rate": 1.847322356204011e-05, + "loss": 0.8123, + "step": 11741 + }, + { + "epoch": 0.20289604644733206, + "grad_norm": 1.4134987373994201, + "learning_rate": 1.847292633069845e-05, + "loss": 0.7925, + "step": 11742 + }, + { + "epoch": 0.20291332596074094, + "grad_norm": 0.7817555071020579, + "learning_rate": 1.8472629072818947e-05, + "loss": 0.5297, + "step": 11743 + }, + { + "epoch": 0.20293060547414984, + "grad_norm": 0.7975737106111548, + "learning_rate": 1.847233178840252e-05, + "loss": 0.4376, + "step": 11744 + }, + { + "epoch": 0.20294788498755875, + "grad_norm": 0.9536250415449503, + "learning_rate": 1.847203447745011e-05, + "loss": 0.6464, + "step": 11745 + }, + { + "epoch": 0.20296516450096766, + "grad_norm": 0.9493912761286407, + "learning_rate": 1.847173713996264e-05, + "loss": 0.4315, + "step": 11746 + }, + { + "epoch": 0.20298244401437657, + "grad_norm": 1.1054694966777605, + "learning_rate": 1.847143977594105e-05, + "loss": 0.7675, + "step": 11747 + }, + { + "epoch": 0.20299972352778545, + "grad_norm": 0.5982974653531082, + "learning_rate": 1.8471142385386264e-05, + "loss": 0.7935, + "step": 11748 + }, + { + "epoch": 0.20301700304119435, + "grad_norm": 0.947460859196146, + "learning_rate": 1.8470844968299215e-05, + "loss": 0.7735, + "step": 11749 + }, + { + "epoch": 0.20303428255460326, + "grad_norm": 0.9691494014586389, + "learning_rate": 1.8470547524680838e-05, + "loss": 0.657, + "step": 11750 + }, + { + "epoch": 0.20305156206801217, + "grad_norm": 1.4287091788507056, + "learning_rate": 1.847025005453206e-05, + "loss": 0.6418, + "step": 11751 + }, + { + "epoch": 0.20306884158142108, + "grad_norm": 1.0336713312292007, + "learning_rate": 1.8469952557853815e-05, + "loss": 0.6859, + "step": 11752 + }, + { + "epoch": 0.20308612109482996, + "grad_norm": 0.953306190056365, + "learning_rate": 1.846965503464704e-05, + "loss": 0.6166, + "step": 11753 + }, + { + "epoch": 0.20310340060823887, + "grad_norm": 0.8920949037881205, + "learning_rate": 1.846935748491266e-05, + "loss": 0.4226, + "step": 11754 + }, + { + "epoch": 0.20312068012164777, + "grad_norm": 0.9534580544395282, + "learning_rate": 1.8469059908651606e-05, + "loss": 0.3964, + "step": 11755 + }, + { + "epoch": 0.20313795963505668, + "grad_norm": 1.2525323823827061, + "learning_rate": 1.8468762305864815e-05, + "loss": 0.4169, + "step": 11756 + }, + { + "epoch": 0.2031552391484656, + "grad_norm": 1.0588452334547827, + "learning_rate": 1.8468464676553214e-05, + "loss": 0.7231, + "step": 11757 + }, + { + "epoch": 0.20317251866187447, + "grad_norm": 0.4669844996479945, + "learning_rate": 1.8468167020717738e-05, + "loss": 0.5129, + "step": 11758 + }, + { + "epoch": 0.20318979817528338, + "grad_norm": 0.8563796539281558, + "learning_rate": 1.8467869338359323e-05, + "loss": 0.6996, + "step": 11759 + }, + { + "epoch": 0.20320707768869228, + "grad_norm": 1.0379780559252627, + "learning_rate": 1.8467571629478898e-05, + "loss": 0.7252, + "step": 11760 + }, + { + "epoch": 0.2032243572021012, + "grad_norm": 1.2122170347802792, + "learning_rate": 1.8467273894077392e-05, + "loss": 0.5167, + "step": 11761 + }, + { + "epoch": 0.2032416367155101, + "grad_norm": 1.1963443129591183, + "learning_rate": 1.8466976132155743e-05, + "loss": 0.6814, + "step": 11762 + }, + { + "epoch": 0.203258916228919, + "grad_norm": 0.7686553179356979, + "learning_rate": 1.8466678343714878e-05, + "loss": 0.4351, + "step": 11763 + }, + { + "epoch": 0.2032761957423279, + "grad_norm": 1.0585843787227833, + "learning_rate": 1.8466380528755735e-05, + "loss": 0.6676, + "step": 11764 + }, + { + "epoch": 0.2032934752557368, + "grad_norm": 1.026960895856095, + "learning_rate": 1.8466082687279247e-05, + "loss": 0.5957, + "step": 11765 + }, + { + "epoch": 0.2033107547691457, + "grad_norm": 0.42369182718660936, + "learning_rate": 1.846578481928634e-05, + "loss": 0.5351, + "step": 11766 + }, + { + "epoch": 0.2033280342825546, + "grad_norm": 1.0899884700273497, + "learning_rate": 1.8465486924777954e-05, + "loss": 0.5326, + "step": 11767 + }, + { + "epoch": 0.20334531379596352, + "grad_norm": 0.41337711876275524, + "learning_rate": 1.846518900375502e-05, + "loss": 0.5783, + "step": 11768 + }, + { + "epoch": 0.2033625933093724, + "grad_norm": 0.7830580290548368, + "learning_rate": 1.846489105621847e-05, + "loss": 0.4462, + "step": 11769 + }, + { + "epoch": 0.2033798728227813, + "grad_norm": 0.9738108855666624, + "learning_rate": 1.846459308216924e-05, + "loss": 0.6261, + "step": 11770 + }, + { + "epoch": 0.2033971523361902, + "grad_norm": 1.1656415246323621, + "learning_rate": 1.846429508160826e-05, + "loss": 0.5708, + "step": 11771 + }, + { + "epoch": 0.20341443184959912, + "grad_norm": 1.2364792155825015, + "learning_rate": 1.8463997054536464e-05, + "loss": 0.6584, + "step": 11772 + }, + { + "epoch": 0.20343171136300803, + "grad_norm": 0.9925075821219845, + "learning_rate": 1.8463699000954784e-05, + "loss": 0.609, + "step": 11773 + }, + { + "epoch": 0.2034489908764169, + "grad_norm": 0.8459644393075291, + "learning_rate": 1.8463400920864157e-05, + "loss": 0.4691, + "step": 11774 + }, + { + "epoch": 0.20346627038982582, + "grad_norm": 0.8085531760824207, + "learning_rate": 1.8463102814265517e-05, + "loss": 0.4043, + "step": 11775 + }, + { + "epoch": 0.20348354990323472, + "grad_norm": 1.1169957905047514, + "learning_rate": 1.8462804681159793e-05, + "loss": 0.4657, + "step": 11776 + }, + { + "epoch": 0.20350082941664363, + "grad_norm": 0.7027847283013935, + "learning_rate": 1.8462506521547925e-05, + "loss": 0.4337, + "step": 11777 + }, + { + "epoch": 0.20351810893005254, + "grad_norm": 0.52189337722394, + "learning_rate": 1.846220833543084e-05, + "loss": 0.5866, + "step": 11778 + }, + { + "epoch": 0.20353538844346145, + "grad_norm": 0.8417443748569853, + "learning_rate": 1.8461910122809475e-05, + "loss": 0.4363, + "step": 11779 + }, + { + "epoch": 0.20355266795687033, + "grad_norm": 1.16937220408016, + "learning_rate": 1.846161188368477e-05, + "loss": 0.7069, + "step": 11780 + }, + { + "epoch": 0.20356994747027923, + "grad_norm": 0.9247711643834887, + "learning_rate": 1.846131361805765e-05, + "loss": 0.577, + "step": 11781 + }, + { + "epoch": 0.20358722698368814, + "grad_norm": 1.1836087314187935, + "learning_rate": 1.846101532592905e-05, + "loss": 0.7181, + "step": 11782 + }, + { + "epoch": 0.20360450649709705, + "grad_norm": 1.1411910710878146, + "learning_rate": 1.846071700729991e-05, + "loss": 0.4654, + "step": 11783 + }, + { + "epoch": 0.20362178601050596, + "grad_norm": 0.8943344865665672, + "learning_rate": 1.846041866217116e-05, + "loss": 0.5042, + "step": 11784 + }, + { + "epoch": 0.20363906552391484, + "grad_norm": 0.8017280496795126, + "learning_rate": 1.8460120290543737e-05, + "loss": 0.4773, + "step": 11785 + }, + { + "epoch": 0.20365634503732374, + "grad_norm": 1.3317247157863028, + "learning_rate": 1.8459821892418575e-05, + "loss": 0.6357, + "step": 11786 + }, + { + "epoch": 0.20367362455073265, + "grad_norm": 1.071924328938915, + "learning_rate": 1.8459523467796606e-05, + "loss": 0.4416, + "step": 11787 + }, + { + "epoch": 0.20369090406414156, + "grad_norm": 0.9759927276109186, + "learning_rate": 1.8459225016678766e-05, + "loss": 0.6969, + "step": 11788 + }, + { + "epoch": 0.20370818357755047, + "grad_norm": 0.8620844465034807, + "learning_rate": 1.8458926539065993e-05, + "loss": 0.5417, + "step": 11789 + }, + { + "epoch": 0.20372546309095935, + "grad_norm": 0.6524609410085813, + "learning_rate": 1.8458628034959218e-05, + "loss": 0.6608, + "step": 11790 + }, + { + "epoch": 0.20374274260436825, + "grad_norm": 0.8769500254190205, + "learning_rate": 1.8458329504359377e-05, + "loss": 0.6025, + "step": 11791 + }, + { + "epoch": 0.20376002211777716, + "grad_norm": 1.415521560391579, + "learning_rate": 1.8458030947267405e-05, + "loss": 0.3445, + "step": 11792 + }, + { + "epoch": 0.20377730163118607, + "grad_norm": 0.9904064196003273, + "learning_rate": 1.845773236368424e-05, + "loss": 0.6235, + "step": 11793 + }, + { + "epoch": 0.20379458114459498, + "grad_norm": 0.8237237685214076, + "learning_rate": 1.845743375361081e-05, + "loss": 0.6007, + "step": 11794 + }, + { + "epoch": 0.20381186065800386, + "grad_norm": 1.0301628096764313, + "learning_rate": 1.8457135117048056e-05, + "loss": 0.6781, + "step": 11795 + }, + { + "epoch": 0.20382914017141276, + "grad_norm": 0.9596008139475123, + "learning_rate": 1.8456836453996915e-05, + "loss": 0.5361, + "step": 11796 + }, + { + "epoch": 0.20384641968482167, + "grad_norm": 0.9219250543466846, + "learning_rate": 1.8456537764458317e-05, + "loss": 0.4985, + "step": 11797 + }, + { + "epoch": 0.20386369919823058, + "grad_norm": 1.3280013086833744, + "learning_rate": 1.84562390484332e-05, + "loss": 0.5141, + "step": 11798 + }, + { + "epoch": 0.2038809787116395, + "grad_norm": 0.6391963709272854, + "learning_rate": 1.84559403059225e-05, + "loss": 0.5506, + "step": 11799 + }, + { + "epoch": 0.2038982582250484, + "grad_norm": 1.2044128362199031, + "learning_rate": 1.845564153692715e-05, + "loss": 0.5562, + "step": 11800 + }, + { + "epoch": 0.20391553773845728, + "grad_norm": 0.9641630618032662, + "learning_rate": 1.8455342741448096e-05, + "loss": 0.5402, + "step": 11801 + }, + { + "epoch": 0.20393281725186618, + "grad_norm": 1.0311537391252599, + "learning_rate": 1.8455043919486258e-05, + "loss": 0.5867, + "step": 11802 + }, + { + "epoch": 0.2039500967652751, + "grad_norm": 1.4132873953740928, + "learning_rate": 1.8454745071042582e-05, + "loss": 0.5512, + "step": 11803 + }, + { + "epoch": 0.203967376278684, + "grad_norm": 0.8009682052105922, + "learning_rate": 1.8454446196118004e-05, + "loss": 0.6385, + "step": 11804 + }, + { + "epoch": 0.2039846557920929, + "grad_norm": 1.030813139228772, + "learning_rate": 1.8454147294713454e-05, + "loss": 0.4878, + "step": 11805 + }, + { + "epoch": 0.20400193530550179, + "grad_norm": 0.7018801663283176, + "learning_rate": 1.8453848366829875e-05, + "loss": 0.5626, + "step": 11806 + }, + { + "epoch": 0.2040192148189107, + "grad_norm": 1.2011255094006177, + "learning_rate": 1.8453549412468202e-05, + "loss": 0.554, + "step": 11807 + }, + { + "epoch": 0.2040364943323196, + "grad_norm": 1.4297537685078932, + "learning_rate": 1.8453250431629367e-05, + "loss": 0.617, + "step": 11808 + }, + { + "epoch": 0.2040537738457285, + "grad_norm": 1.3754949001710761, + "learning_rate": 1.8452951424314308e-05, + "loss": 0.4955, + "step": 11809 + }, + { + "epoch": 0.20407105335913742, + "grad_norm": 0.8798442441387708, + "learning_rate": 1.8452652390523967e-05, + "loss": 0.5895, + "step": 11810 + }, + { + "epoch": 0.2040883328725463, + "grad_norm": 1.2304996145948566, + "learning_rate": 1.8452353330259272e-05, + "loss": 0.6358, + "step": 11811 + }, + { + "epoch": 0.2041056123859552, + "grad_norm": 1.0018587630695341, + "learning_rate": 1.8452054243521166e-05, + "loss": 0.5655, + "step": 11812 + }, + { + "epoch": 0.2041228918993641, + "grad_norm": 0.9466278957989981, + "learning_rate": 1.8451755130310583e-05, + "loss": 0.5163, + "step": 11813 + }, + { + "epoch": 0.20414017141277302, + "grad_norm": 0.3343777004625958, + "learning_rate": 1.8451455990628462e-05, + "loss": 0.5613, + "step": 11814 + }, + { + "epoch": 0.20415745092618193, + "grad_norm": 1.2259206194000773, + "learning_rate": 1.8451156824475738e-05, + "loss": 0.5349, + "step": 11815 + }, + { + "epoch": 0.20417473043959083, + "grad_norm": 1.0578105681220638, + "learning_rate": 1.845085763185335e-05, + "loss": 0.6693, + "step": 11816 + }, + { + "epoch": 0.20419200995299971, + "grad_norm": 1.0797756884467993, + "learning_rate": 1.8450558412762233e-05, + "loss": 0.4364, + "step": 11817 + }, + { + "epoch": 0.20420928946640862, + "grad_norm": 1.1386891012960338, + "learning_rate": 1.8450259167203324e-05, + "loss": 0.6002, + "step": 11818 + }, + { + "epoch": 0.20422656897981753, + "grad_norm": 1.2153447258774865, + "learning_rate": 1.8449959895177557e-05, + "loss": 0.5309, + "step": 11819 + }, + { + "epoch": 0.20424384849322644, + "grad_norm": 1.0719653255454449, + "learning_rate": 1.844966059668588e-05, + "loss": 0.7028, + "step": 11820 + }, + { + "epoch": 0.20426112800663535, + "grad_norm": 0.9411451186057319, + "learning_rate": 1.8449361271729223e-05, + "loss": 0.4725, + "step": 11821 + }, + { + "epoch": 0.20427840752004423, + "grad_norm": 0.8397931805322724, + "learning_rate": 1.8449061920308523e-05, + "loss": 0.488, + "step": 11822 + }, + { + "epoch": 0.20429568703345313, + "grad_norm": 1.305750851890066, + "learning_rate": 1.844876254242472e-05, + "loss": 0.6651, + "step": 11823 + }, + { + "epoch": 0.20431296654686204, + "grad_norm": 0.9898238896914533, + "learning_rate": 1.8448463138078744e-05, + "loss": 0.6997, + "step": 11824 + }, + { + "epoch": 0.20433024606027095, + "grad_norm": 1.2123487171814802, + "learning_rate": 1.8448163707271548e-05, + "loss": 0.6221, + "step": 11825 + }, + { + "epoch": 0.20434752557367986, + "grad_norm": 0.858810075131546, + "learning_rate": 1.8447864250004056e-05, + "loss": 0.8193, + "step": 11826 + }, + { + "epoch": 0.20436480508708874, + "grad_norm": 0.6517117553929437, + "learning_rate": 1.8447564766277217e-05, + "loss": 0.4237, + "step": 11827 + }, + { + "epoch": 0.20438208460049764, + "grad_norm": 0.7372369936724541, + "learning_rate": 1.8447265256091954e-05, + "loss": 0.3753, + "step": 11828 + }, + { + "epoch": 0.20439936411390655, + "grad_norm": 0.7578624617635983, + "learning_rate": 1.8446965719449222e-05, + "loss": 0.6935, + "step": 11829 + }, + { + "epoch": 0.20441664362731546, + "grad_norm": 0.878145611343442, + "learning_rate": 1.8446666156349947e-05, + "loss": 0.436, + "step": 11830 + }, + { + "epoch": 0.20443392314072437, + "grad_norm": 1.1449298365768266, + "learning_rate": 1.8446366566795075e-05, + "loss": 0.5177, + "step": 11831 + }, + { + "epoch": 0.20445120265413325, + "grad_norm": 0.7820109927297967, + "learning_rate": 1.8446066950785542e-05, + "loss": 0.4621, + "step": 11832 + }, + { + "epoch": 0.20446848216754215, + "grad_norm": 1.0447653634468965, + "learning_rate": 1.8445767308322282e-05, + "loss": 0.8519, + "step": 11833 + }, + { + "epoch": 0.20448576168095106, + "grad_norm": 0.8975919705406468, + "learning_rate": 1.8445467639406237e-05, + "loss": 0.5771, + "step": 11834 + }, + { + "epoch": 0.20450304119435997, + "grad_norm": 1.3552285606710999, + "learning_rate": 1.8445167944038346e-05, + "loss": 0.5058, + "step": 11835 + }, + { + "epoch": 0.20452032070776888, + "grad_norm": 0.8015800939654474, + "learning_rate": 1.844486822221955e-05, + "loss": 0.621, + "step": 11836 + }, + { + "epoch": 0.20453760022117778, + "grad_norm": 0.6480520798199175, + "learning_rate": 1.8444568473950782e-05, + "loss": 0.4521, + "step": 11837 + }, + { + "epoch": 0.20455487973458666, + "grad_norm": 0.6536741222885112, + "learning_rate": 1.8444268699232984e-05, + "loss": 0.3056, + "step": 11838 + }, + { + "epoch": 0.20457215924799557, + "grad_norm": 0.8822564735077072, + "learning_rate": 1.8443968898067096e-05, + "loss": 0.4144, + "step": 11839 + }, + { + "epoch": 0.20458943876140448, + "grad_norm": 1.4637706723836807, + "learning_rate": 1.8443669070454055e-05, + "loss": 0.5623, + "step": 11840 + }, + { + "epoch": 0.2046067182748134, + "grad_norm": 0.9338376744768719, + "learning_rate": 1.84433692163948e-05, + "loss": 0.5798, + "step": 11841 + }, + { + "epoch": 0.2046239977882223, + "grad_norm": 0.8152465823269697, + "learning_rate": 1.844306933589027e-05, + "loss": 0.6593, + "step": 11842 + }, + { + "epoch": 0.20464127730163117, + "grad_norm": 1.0393701717387265, + "learning_rate": 1.844276942894141e-05, + "loss": 0.5166, + "step": 11843 + }, + { + "epoch": 0.20465855681504008, + "grad_norm": 0.9226219711378825, + "learning_rate": 1.844246949554915e-05, + "loss": 0.4361, + "step": 11844 + }, + { + "epoch": 0.204675836328449, + "grad_norm": 0.7228617544825496, + "learning_rate": 1.8442169535714437e-05, + "loss": 0.3776, + "step": 11845 + }, + { + "epoch": 0.2046931158418579, + "grad_norm": 1.2658941465597886, + "learning_rate": 1.8441869549438203e-05, + "loss": 0.4757, + "step": 11846 + }, + { + "epoch": 0.2047103953552668, + "grad_norm": 1.0661743578290401, + "learning_rate": 1.8441569536721395e-05, + "loss": 0.6821, + "step": 11847 + }, + { + "epoch": 0.20472767486867569, + "grad_norm": 1.081283816788204, + "learning_rate": 1.8441269497564952e-05, + "loss": 0.7105, + "step": 11848 + }, + { + "epoch": 0.2047449543820846, + "grad_norm": 0.8246482237026934, + "learning_rate": 1.8440969431969805e-05, + "loss": 0.5205, + "step": 11849 + }, + { + "epoch": 0.2047622338954935, + "grad_norm": 1.1646807933247685, + "learning_rate": 1.8440669339936907e-05, + "loss": 0.8693, + "step": 11850 + }, + { + "epoch": 0.2047795134089024, + "grad_norm": 2.026251843402623, + "learning_rate": 1.844036922146719e-05, + "loss": 0.58, + "step": 11851 + }, + { + "epoch": 0.20479679292231132, + "grad_norm": 1.0064074084924173, + "learning_rate": 1.8440069076561592e-05, + "loss": 0.5638, + "step": 11852 + }, + { + "epoch": 0.20481407243572022, + "grad_norm": 1.0895566525081402, + "learning_rate": 1.8439768905221056e-05, + "loss": 0.7057, + "step": 11853 + }, + { + "epoch": 0.2048313519491291, + "grad_norm": 0.5114794413711933, + "learning_rate": 1.8439468707446523e-05, + "loss": 0.8246, + "step": 11854 + }, + { + "epoch": 0.204848631462538, + "grad_norm": 0.44094791165998165, + "learning_rate": 1.8439168483238934e-05, + "loss": 0.6592, + "step": 11855 + }, + { + "epoch": 0.20486591097594692, + "grad_norm": 1.2767562560555326, + "learning_rate": 1.8438868232599228e-05, + "loss": 0.4735, + "step": 11856 + }, + { + "epoch": 0.20488319048935583, + "grad_norm": 1.1440840612181946, + "learning_rate": 1.8438567955528343e-05, + "loss": 0.6284, + "step": 11857 + }, + { + "epoch": 0.20490047000276473, + "grad_norm": 0.7149385993806823, + "learning_rate": 1.8438267652027225e-05, + "loss": 0.4435, + "step": 11858 + }, + { + "epoch": 0.20491774951617361, + "grad_norm": 0.43299690694677573, + "learning_rate": 1.843796732209681e-05, + "loss": 0.5036, + "step": 11859 + }, + { + "epoch": 0.20493502902958252, + "grad_norm": 0.6823009133028631, + "learning_rate": 1.843766696573804e-05, + "loss": 0.4785, + "step": 11860 + }, + { + "epoch": 0.20495230854299143, + "grad_norm": 1.0683382436397264, + "learning_rate": 1.8437366582951857e-05, + "loss": 0.4046, + "step": 11861 + }, + { + "epoch": 0.20496958805640034, + "grad_norm": 0.5809667115283873, + "learning_rate": 1.8437066173739197e-05, + "loss": 0.4265, + "step": 11862 + }, + { + "epoch": 0.20498686756980924, + "grad_norm": 1.2902019918931706, + "learning_rate": 1.843676573810101e-05, + "loss": 0.565, + "step": 11863 + }, + { + "epoch": 0.20500414708321812, + "grad_norm": 0.7127530342806166, + "learning_rate": 1.843646527603823e-05, + "loss": 0.5293, + "step": 11864 + }, + { + "epoch": 0.20502142659662703, + "grad_norm": 1.159792448264049, + "learning_rate": 1.8436164787551796e-05, + "loss": 0.6309, + "step": 11865 + }, + { + "epoch": 0.20503870611003594, + "grad_norm": 1.3101359562926238, + "learning_rate": 1.8435864272642656e-05, + "loss": 0.7627, + "step": 11866 + }, + { + "epoch": 0.20505598562344485, + "grad_norm": 1.0460182114948646, + "learning_rate": 1.843556373131175e-05, + "loss": 0.7004, + "step": 11867 + }, + { + "epoch": 0.20507326513685376, + "grad_norm": 0.9111818209155842, + "learning_rate": 1.8435263163560014e-05, + "loss": 0.5168, + "step": 11868 + }, + { + "epoch": 0.20509054465026264, + "grad_norm": 0.9889301086899326, + "learning_rate": 1.843496256938839e-05, + "loss": 0.6576, + "step": 11869 + }, + { + "epoch": 0.20510782416367154, + "grad_norm": 1.7136626519088847, + "learning_rate": 1.8434661948797825e-05, + "loss": 0.6663, + "step": 11870 + }, + { + "epoch": 0.20512510367708045, + "grad_norm": 0.9397326072299942, + "learning_rate": 1.843436130178926e-05, + "loss": 0.4857, + "step": 11871 + }, + { + "epoch": 0.20514238319048936, + "grad_norm": 0.9132789540249096, + "learning_rate": 1.8434060628363634e-05, + "loss": 0.5976, + "step": 11872 + }, + { + "epoch": 0.20515966270389827, + "grad_norm": 0.8692375497534608, + "learning_rate": 1.8433759928521885e-05, + "loss": 0.4206, + "step": 11873 + }, + { + "epoch": 0.20517694221730717, + "grad_norm": 0.8898672989507654, + "learning_rate": 1.8433459202264963e-05, + "loss": 0.5733, + "step": 11874 + }, + { + "epoch": 0.20519422173071605, + "grad_norm": 1.1780977711220602, + "learning_rate": 1.8433158449593803e-05, + "loss": 0.6075, + "step": 11875 + }, + { + "epoch": 0.20521150124412496, + "grad_norm": 0.8218236644482332, + "learning_rate": 1.8432857670509353e-05, + "loss": 0.4853, + "step": 11876 + }, + { + "epoch": 0.20522878075753387, + "grad_norm": 0.8395603239010241, + "learning_rate": 1.843255686501255e-05, + "loss": 0.5031, + "step": 11877 + }, + { + "epoch": 0.20524606027094278, + "grad_norm": 0.6605569933162947, + "learning_rate": 1.8432256033104337e-05, + "loss": 0.7513, + "step": 11878 + }, + { + "epoch": 0.20526333978435168, + "grad_norm": 0.913547113284591, + "learning_rate": 1.843195517478566e-05, + "loss": 0.5126, + "step": 11879 + }, + { + "epoch": 0.20528061929776056, + "grad_norm": 1.3299716047349524, + "learning_rate": 1.8431654290057457e-05, + "loss": 0.5549, + "step": 11880 + }, + { + "epoch": 0.20529789881116947, + "grad_norm": 1.4687643503917318, + "learning_rate": 1.8431353378920673e-05, + "loss": 0.4722, + "step": 11881 + }, + { + "epoch": 0.20531517832457838, + "grad_norm": 0.9770506138183085, + "learning_rate": 1.8431052441376247e-05, + "loss": 0.6294, + "step": 11882 + }, + { + "epoch": 0.2053324578379873, + "grad_norm": 1.1448648499824907, + "learning_rate": 1.8430751477425126e-05, + "loss": 0.6166, + "step": 11883 + }, + { + "epoch": 0.2053497373513962, + "grad_norm": 1.724476819102054, + "learning_rate": 1.843045048706825e-05, + "loss": 0.6032, + "step": 11884 + }, + { + "epoch": 0.20536701686480507, + "grad_norm": 0.9842011249100528, + "learning_rate": 1.8430149470306566e-05, + "loss": 0.417, + "step": 11885 + }, + { + "epoch": 0.20538429637821398, + "grad_norm": 0.9774541883550341, + "learning_rate": 1.8429848427141008e-05, + "loss": 0.6238, + "step": 11886 + }, + { + "epoch": 0.2054015758916229, + "grad_norm": 1.5304593336948868, + "learning_rate": 1.8429547357572525e-05, + "loss": 0.5077, + "step": 11887 + }, + { + "epoch": 0.2054188554050318, + "grad_norm": 1.7337581013298335, + "learning_rate": 1.8429246261602062e-05, + "loss": 0.7605, + "step": 11888 + }, + { + "epoch": 0.2054361349184407, + "grad_norm": 0.7375355109484132, + "learning_rate": 1.8428945139230555e-05, + "loss": 0.4845, + "step": 11889 + }, + { + "epoch": 0.2054534144318496, + "grad_norm": 0.8738461076811808, + "learning_rate": 1.8428643990458955e-05, + "loss": 0.5965, + "step": 11890 + }, + { + "epoch": 0.2054706939452585, + "grad_norm": 0.5793688325168882, + "learning_rate": 1.8428342815288198e-05, + "loss": 0.541, + "step": 11891 + }, + { + "epoch": 0.2054879734586674, + "grad_norm": 1.5496284533345048, + "learning_rate": 1.8428041613719232e-05, + "loss": 0.4926, + "step": 11892 + }, + { + "epoch": 0.2055052529720763, + "grad_norm": 1.0938625990239783, + "learning_rate": 1.8427740385753e-05, + "loss": 0.6394, + "step": 11893 + }, + { + "epoch": 0.20552253248548522, + "grad_norm": 1.2500792984559148, + "learning_rate": 1.8427439131390445e-05, + "loss": 0.5208, + "step": 11894 + }, + { + "epoch": 0.20553981199889412, + "grad_norm": 1.008048318556466, + "learning_rate": 1.8427137850632512e-05, + "loss": 0.6627, + "step": 11895 + }, + { + "epoch": 0.205557091512303, + "grad_norm": 0.829952954646651, + "learning_rate": 1.8426836543480137e-05, + "loss": 0.6117, + "step": 11896 + }, + { + "epoch": 0.2055743710257119, + "grad_norm": 0.6037437284305248, + "learning_rate": 1.8426535209934276e-05, + "loss": 0.4763, + "step": 11897 + }, + { + "epoch": 0.20559165053912082, + "grad_norm": 1.0543022242139264, + "learning_rate": 1.8426233849995862e-05, + "loss": 0.5098, + "step": 11898 + }, + { + "epoch": 0.20560893005252973, + "grad_norm": 0.7915813762713299, + "learning_rate": 1.8425932463665846e-05, + "loss": 0.6546, + "step": 11899 + }, + { + "epoch": 0.20562620956593863, + "grad_norm": 0.8595646871991999, + "learning_rate": 1.8425631050945166e-05, + "loss": 0.6078, + "step": 11900 + }, + { + "epoch": 0.2056434890793475, + "grad_norm": 0.8138166004176541, + "learning_rate": 1.842532961183477e-05, + "loss": 0.5655, + "step": 11901 + }, + { + "epoch": 0.20566076859275642, + "grad_norm": 0.9403021018885442, + "learning_rate": 1.8425028146335605e-05, + "loss": 0.8226, + "step": 11902 + }, + { + "epoch": 0.20567804810616533, + "grad_norm": 0.6259192921856193, + "learning_rate": 1.8424726654448607e-05, + "loss": 0.5446, + "step": 11903 + }, + { + "epoch": 0.20569532761957424, + "grad_norm": 0.6008793198310888, + "learning_rate": 1.842442513617473e-05, + "loss": 0.438, + "step": 11904 + }, + { + "epoch": 0.20571260713298314, + "grad_norm": 1.0425954275250853, + "learning_rate": 1.8424123591514906e-05, + "loss": 0.6804, + "step": 11905 + }, + { + "epoch": 0.20572988664639202, + "grad_norm": 1.0167064344056034, + "learning_rate": 1.842382202047009e-05, + "loss": 0.6409, + "step": 11906 + }, + { + "epoch": 0.20574716615980093, + "grad_norm": 1.1035671003631722, + "learning_rate": 1.8423520423041226e-05, + "loss": 0.7201, + "step": 11907 + }, + { + "epoch": 0.20576444567320984, + "grad_norm": 0.46072455990559597, + "learning_rate": 1.8423218799229253e-05, + "loss": 0.6047, + "step": 11908 + }, + { + "epoch": 0.20578172518661875, + "grad_norm": 1.4048311467598396, + "learning_rate": 1.842291714903512e-05, + "loss": 0.5469, + "step": 11909 + }, + { + "epoch": 0.20579900470002765, + "grad_norm": 1.294257063018877, + "learning_rate": 1.842261547245977e-05, + "loss": 0.6609, + "step": 11910 + }, + { + "epoch": 0.20581628421343656, + "grad_norm": 0.9379186167705887, + "learning_rate": 1.8422313769504144e-05, + "loss": 0.5568, + "step": 11911 + }, + { + "epoch": 0.20583356372684544, + "grad_norm": 0.9975841668890646, + "learning_rate": 1.8422012040169196e-05, + "loss": 0.6437, + "step": 11912 + }, + { + "epoch": 0.20585084324025435, + "grad_norm": 1.0421635665774762, + "learning_rate": 1.8421710284455867e-05, + "loss": 0.7234, + "step": 11913 + }, + { + "epoch": 0.20586812275366326, + "grad_norm": 1.3078436257101889, + "learning_rate": 1.84214085023651e-05, + "loss": 0.5842, + "step": 11914 + }, + { + "epoch": 0.20588540226707217, + "grad_norm": 1.1178592721717882, + "learning_rate": 1.8421106693897842e-05, + "loss": 0.6503, + "step": 11915 + }, + { + "epoch": 0.20590268178048107, + "grad_norm": 0.5558195928521924, + "learning_rate": 1.8420804859055037e-05, + "loss": 0.4556, + "step": 11916 + }, + { + "epoch": 0.20591996129388995, + "grad_norm": 1.0180867955950839, + "learning_rate": 1.842050299783763e-05, + "loss": 0.593, + "step": 11917 + }, + { + "epoch": 0.20593724080729886, + "grad_norm": 0.3534357739631043, + "learning_rate": 1.842020111024657e-05, + "loss": 0.5561, + "step": 11918 + }, + { + "epoch": 0.20595452032070777, + "grad_norm": 0.901340265443266, + "learning_rate": 1.84198991962828e-05, + "loss": 0.6303, + "step": 11919 + }, + { + "epoch": 0.20597179983411668, + "grad_norm": 0.8044882282552179, + "learning_rate": 1.8419597255947262e-05, + "loss": 0.4883, + "step": 11920 + }, + { + "epoch": 0.20598907934752558, + "grad_norm": 1.090374159068048, + "learning_rate": 1.841929528924091e-05, + "loss": 0.7801, + "step": 11921 + }, + { + "epoch": 0.20600635886093446, + "grad_norm": 1.458779855775318, + "learning_rate": 1.8418993296164686e-05, + "loss": 0.5256, + "step": 11922 + }, + { + "epoch": 0.20602363837434337, + "grad_norm": 0.38998435541912024, + "learning_rate": 1.841869127671953e-05, + "loss": 0.571, + "step": 11923 + }, + { + "epoch": 0.20604091788775228, + "grad_norm": 1.4194581548876661, + "learning_rate": 1.84183892309064e-05, + "loss": 0.6119, + "step": 11924 + }, + { + "epoch": 0.20605819740116119, + "grad_norm": 1.1208963372587968, + "learning_rate": 1.841808715872623e-05, + "loss": 0.6435, + "step": 11925 + }, + { + "epoch": 0.2060754769145701, + "grad_norm": 1.0220032673635189, + "learning_rate": 1.841778506017997e-05, + "loss": 0.5731, + "step": 11926 + }, + { + "epoch": 0.206092756427979, + "grad_norm": 1.2990450629249297, + "learning_rate": 1.841748293526857e-05, + "loss": 0.7538, + "step": 11927 + }, + { + "epoch": 0.20611003594138788, + "grad_norm": 0.8014431874245728, + "learning_rate": 1.8417180783992977e-05, + "loss": 0.7749, + "step": 11928 + }, + { + "epoch": 0.2061273154547968, + "grad_norm": 1.4029163555015067, + "learning_rate": 1.841687860635413e-05, + "loss": 0.5888, + "step": 11929 + }, + { + "epoch": 0.2061445949682057, + "grad_norm": 1.0331409546067465, + "learning_rate": 1.841657640235298e-05, + "loss": 0.5014, + "step": 11930 + }, + { + "epoch": 0.2061618744816146, + "grad_norm": 0.7663153523834397, + "learning_rate": 1.8416274171990474e-05, + "loss": 0.5577, + "step": 11931 + }, + { + "epoch": 0.2061791539950235, + "grad_norm": 1.3269256298722178, + "learning_rate": 1.8415971915267555e-05, + "loss": 0.5911, + "step": 11932 + }, + { + "epoch": 0.2061964335084324, + "grad_norm": 1.0493570139178774, + "learning_rate": 1.8415669632185177e-05, + "loss": 0.5504, + "step": 11933 + }, + { + "epoch": 0.2062137130218413, + "grad_norm": 1.3386545313922453, + "learning_rate": 1.841536732274428e-05, + "loss": 0.6095, + "step": 11934 + }, + { + "epoch": 0.2062309925352502, + "grad_norm": 1.4129519356977296, + "learning_rate": 1.8415064986945813e-05, + "loss": 0.4217, + "step": 11935 + }, + { + "epoch": 0.20624827204865911, + "grad_norm": 0.45713851010796236, + "learning_rate": 1.841476262479072e-05, + "loss": 0.6437, + "step": 11936 + }, + { + "epoch": 0.20626555156206802, + "grad_norm": 1.1184080845680908, + "learning_rate": 1.8414460236279958e-05, + "loss": 0.8218, + "step": 11937 + }, + { + "epoch": 0.2062828310754769, + "grad_norm": 1.218731403755841, + "learning_rate": 1.841415782141446e-05, + "loss": 0.61, + "step": 11938 + }, + { + "epoch": 0.2063001105888858, + "grad_norm": 0.4240610929809856, + "learning_rate": 1.8413855380195184e-05, + "loss": 0.6339, + "step": 11939 + }, + { + "epoch": 0.20631739010229472, + "grad_norm": 0.6135140310375432, + "learning_rate": 1.841355291262307e-05, + "loss": 0.5787, + "step": 11940 + }, + { + "epoch": 0.20633466961570363, + "grad_norm": 0.43814338911281775, + "learning_rate": 1.8413250418699074e-05, + "loss": 0.531, + "step": 11941 + }, + { + "epoch": 0.20635194912911253, + "grad_norm": 1.302658622744064, + "learning_rate": 1.8412947898424133e-05, + "loss": 0.4997, + "step": 11942 + }, + { + "epoch": 0.2063692286425214, + "grad_norm": 1.1149548467809893, + "learning_rate": 1.8412645351799204e-05, + "loss": 0.7895, + "step": 11943 + }, + { + "epoch": 0.20638650815593032, + "grad_norm": 0.7959963620375996, + "learning_rate": 1.841234277882523e-05, + "loss": 0.3287, + "step": 11944 + }, + { + "epoch": 0.20640378766933923, + "grad_norm": 1.0748119600410595, + "learning_rate": 1.841204017950316e-05, + "loss": 0.5087, + "step": 11945 + }, + { + "epoch": 0.20642106718274814, + "grad_norm": 1.3379755258881703, + "learning_rate": 1.8411737553833936e-05, + "loss": 0.6577, + "step": 11946 + }, + { + "epoch": 0.20643834669615704, + "grad_norm": 1.0367578050384707, + "learning_rate": 1.8411434901818514e-05, + "loss": 0.4011, + "step": 11947 + }, + { + "epoch": 0.20645562620956595, + "grad_norm": 0.7970202713682566, + "learning_rate": 1.8411132223457837e-05, + "loss": 0.6241, + "step": 11948 + }, + { + "epoch": 0.20647290572297483, + "grad_norm": 0.6992573874669279, + "learning_rate": 1.8410829518752856e-05, + "loss": 0.3686, + "step": 11949 + }, + { + "epoch": 0.20649018523638374, + "grad_norm": 1.8418656714394184, + "learning_rate": 1.841052678770452e-05, + "loss": 0.5511, + "step": 11950 + }, + { + "epoch": 0.20650746474979265, + "grad_norm": 1.357841563425019, + "learning_rate": 1.841022403031377e-05, + "loss": 0.6029, + "step": 11951 + }, + { + "epoch": 0.20652474426320155, + "grad_norm": 0.4795526231977654, + "learning_rate": 1.8409921246581563e-05, + "loss": 0.8023, + "step": 11952 + }, + { + "epoch": 0.20654202377661046, + "grad_norm": 0.8540303681200948, + "learning_rate": 1.8409618436508844e-05, + "loss": 0.6246, + "step": 11953 + }, + { + "epoch": 0.20655930329001934, + "grad_norm": 1.016188082596859, + "learning_rate": 1.8409315600096556e-05, + "loss": 0.589, + "step": 11954 + }, + { + "epoch": 0.20657658280342825, + "grad_norm": 0.9764534277746805, + "learning_rate": 1.8409012737345656e-05, + "loss": 0.4318, + "step": 11955 + }, + { + "epoch": 0.20659386231683716, + "grad_norm": 0.4549967703590385, + "learning_rate": 1.840870984825709e-05, + "loss": 0.6733, + "step": 11956 + }, + { + "epoch": 0.20661114183024606, + "grad_norm": 1.1594137940241696, + "learning_rate": 1.8408406932831802e-05, + "loss": 0.6724, + "step": 11957 + }, + { + "epoch": 0.20662842134365497, + "grad_norm": 1.0375304458879873, + "learning_rate": 1.840810399107075e-05, + "loss": 0.6135, + "step": 11958 + }, + { + "epoch": 0.20664570085706385, + "grad_norm": 0.8377257104835626, + "learning_rate": 1.840780102297487e-05, + "loss": 0.3714, + "step": 11959 + }, + { + "epoch": 0.20666298037047276, + "grad_norm": 1.0173541003454565, + "learning_rate": 1.8407498028545126e-05, + "loss": 0.5491, + "step": 11960 + }, + { + "epoch": 0.20668025988388167, + "grad_norm": 1.0660244682150368, + "learning_rate": 1.8407195007782456e-05, + "loss": 0.4261, + "step": 11961 + }, + { + "epoch": 0.20669753939729058, + "grad_norm": 1.0485963446540374, + "learning_rate": 1.840689196068781e-05, + "loss": 0.5753, + "step": 11962 + }, + { + "epoch": 0.20671481891069948, + "grad_norm": 1.0102446505751033, + "learning_rate": 1.840658888726214e-05, + "loss": 0.5111, + "step": 11963 + }, + { + "epoch": 0.2067320984241084, + "grad_norm": 1.1979027528422368, + "learning_rate": 1.8406285787506398e-05, + "loss": 0.5641, + "step": 11964 + }, + { + "epoch": 0.20674937793751727, + "grad_norm": 0.7314489462670605, + "learning_rate": 1.840598266142153e-05, + "loss": 0.422, + "step": 11965 + }, + { + "epoch": 0.20676665745092618, + "grad_norm": 0.6552297603944082, + "learning_rate": 1.8405679509008483e-05, + "loss": 0.3531, + "step": 11966 + }, + { + "epoch": 0.20678393696433509, + "grad_norm": 1.3336437154540697, + "learning_rate": 1.8405376330268208e-05, + "loss": 0.666, + "step": 11967 + }, + { + "epoch": 0.206801216477744, + "grad_norm": 1.2721879147137116, + "learning_rate": 1.840507312520166e-05, + "loss": 0.7104, + "step": 11968 + }, + { + "epoch": 0.2068184959911529, + "grad_norm": 1.1330556623385462, + "learning_rate": 1.8404769893809778e-05, + "loss": 0.4871, + "step": 11969 + }, + { + "epoch": 0.20683577550456178, + "grad_norm": 1.363977585440166, + "learning_rate": 1.8404466636093524e-05, + "loss": 0.4681, + "step": 11970 + }, + { + "epoch": 0.2068530550179707, + "grad_norm": 1.3461996734148862, + "learning_rate": 1.840416335205384e-05, + "loss": 0.4266, + "step": 11971 + }, + { + "epoch": 0.2068703345313796, + "grad_norm": 1.132137551648303, + "learning_rate": 1.8403860041691676e-05, + "loss": 0.494, + "step": 11972 + }, + { + "epoch": 0.2068876140447885, + "grad_norm": 0.8819538065603734, + "learning_rate": 1.8403556705007983e-05, + "loss": 0.7577, + "step": 11973 + }, + { + "epoch": 0.2069048935581974, + "grad_norm": 1.2411869591498133, + "learning_rate": 1.8403253342003717e-05, + "loss": 0.4795, + "step": 11974 + }, + { + "epoch": 0.2069221730716063, + "grad_norm": 1.666443114616024, + "learning_rate": 1.8402949952679816e-05, + "loss": 0.5922, + "step": 11975 + }, + { + "epoch": 0.2069394525850152, + "grad_norm": 1.3825422115805859, + "learning_rate": 1.840264653703724e-05, + "loss": 0.3741, + "step": 11976 + }, + { + "epoch": 0.2069567320984241, + "grad_norm": 1.168183292894785, + "learning_rate": 1.8402343095076937e-05, + "loss": 0.4457, + "step": 11977 + }, + { + "epoch": 0.20697401161183301, + "grad_norm": 1.3269925065371404, + "learning_rate": 1.8402039626799855e-05, + "loss": 0.6451, + "step": 11978 + }, + { + "epoch": 0.20699129112524192, + "grad_norm": 0.8699547686073993, + "learning_rate": 1.840173613220695e-05, + "loss": 0.5637, + "step": 11979 + }, + { + "epoch": 0.2070085706386508, + "grad_norm": 1.115113429687021, + "learning_rate": 1.840143261129917e-05, + "loss": 0.5161, + "step": 11980 + }, + { + "epoch": 0.2070258501520597, + "grad_norm": 0.551601047238453, + "learning_rate": 1.8401129064077458e-05, + "loss": 0.5694, + "step": 11981 + }, + { + "epoch": 0.20704312966546862, + "grad_norm": 0.8191630155768514, + "learning_rate": 1.8400825490542774e-05, + "loss": 0.6158, + "step": 11982 + }, + { + "epoch": 0.20706040917887752, + "grad_norm": 2.7183282522074927, + "learning_rate": 1.8400521890696068e-05, + "loss": 0.4858, + "step": 11983 + }, + { + "epoch": 0.20707768869228643, + "grad_norm": 0.752001639710631, + "learning_rate": 1.840021826453829e-05, + "loss": 0.6551, + "step": 11984 + }, + { + "epoch": 0.20709496820569534, + "grad_norm": 0.9671911229399788, + "learning_rate": 1.839991461207039e-05, + "loss": 0.5425, + "step": 11985 + }, + { + "epoch": 0.20711224771910422, + "grad_norm": 1.222445657742924, + "learning_rate": 1.8399610933293312e-05, + "loss": 0.5826, + "step": 11986 + }, + { + "epoch": 0.20712952723251313, + "grad_norm": 0.8077229462734884, + "learning_rate": 1.839930722820802e-05, + "loss": 0.505, + "step": 11987 + }, + { + "epoch": 0.20714680674592204, + "grad_norm": 1.747769962863262, + "learning_rate": 1.839900349681546e-05, + "loss": 0.8113, + "step": 11988 + }, + { + "epoch": 0.20716408625933094, + "grad_norm": 0.6895151423844548, + "learning_rate": 1.839869973911658e-05, + "loss": 0.7224, + "step": 11989 + }, + { + "epoch": 0.20718136577273985, + "grad_norm": 0.5662840441156827, + "learning_rate": 1.839839595511234e-05, + "loss": 0.6257, + "step": 11990 + }, + { + "epoch": 0.20719864528614873, + "grad_norm": 1.5189835284906748, + "learning_rate": 1.8398092144803678e-05, + "loss": 0.6963, + "step": 11991 + }, + { + "epoch": 0.20721592479955764, + "grad_norm": 1.3938909184365251, + "learning_rate": 1.8397788308191558e-05, + "loss": 0.5872, + "step": 11992 + }, + { + "epoch": 0.20723320431296655, + "grad_norm": 0.7876619239155117, + "learning_rate": 1.8397484445276924e-05, + "loss": 0.4638, + "step": 11993 + }, + { + "epoch": 0.20725048382637545, + "grad_norm": 1.419508194753247, + "learning_rate": 1.839718055606073e-05, + "loss": 0.7158, + "step": 11994 + }, + { + "epoch": 0.20726776333978436, + "grad_norm": 1.576152151513001, + "learning_rate": 1.839687664054393e-05, + "loss": 0.6208, + "step": 11995 + }, + { + "epoch": 0.20728504285319324, + "grad_norm": 0.8818402334371949, + "learning_rate": 1.8396572698727476e-05, + "loss": 0.6719, + "step": 11996 + }, + { + "epoch": 0.20730232236660215, + "grad_norm": 1.2135503500972133, + "learning_rate": 1.8396268730612315e-05, + "loss": 0.4978, + "step": 11997 + }, + { + "epoch": 0.20731960188001106, + "grad_norm": 1.1506136143386132, + "learning_rate": 1.8395964736199404e-05, + "loss": 0.5152, + "step": 11998 + }, + { + "epoch": 0.20733688139341996, + "grad_norm": 0.7439257426911575, + "learning_rate": 1.8395660715489694e-05, + "loss": 0.6086, + "step": 11999 + }, + { + "epoch": 0.20735416090682887, + "grad_norm": 1.0087778160241951, + "learning_rate": 1.8395356668484138e-05, + "loss": 0.4498, + "step": 12000 + }, + { + "epoch": 0.20737144042023778, + "grad_norm": 0.45615854268900324, + "learning_rate": 1.8395052595183684e-05, + "loss": 0.5969, + "step": 12001 + }, + { + "epoch": 0.20738871993364666, + "grad_norm": 1.2503646072770902, + "learning_rate": 1.8394748495589287e-05, + "loss": 0.793, + "step": 12002 + }, + { + "epoch": 0.20740599944705557, + "grad_norm": 1.1753942637379629, + "learning_rate": 1.83944443697019e-05, + "loss": 0.4697, + "step": 12003 + }, + { + "epoch": 0.20742327896046447, + "grad_norm": 0.924505184961157, + "learning_rate": 1.8394140217522476e-05, + "loss": 0.4636, + "step": 12004 + }, + { + "epoch": 0.20744055847387338, + "grad_norm": 0.8274452110428403, + "learning_rate": 1.8393836039051968e-05, + "loss": 0.722, + "step": 12005 + }, + { + "epoch": 0.2074578379872823, + "grad_norm": 1.185713569682252, + "learning_rate": 1.8393531834291326e-05, + "loss": 0.6167, + "step": 12006 + }, + { + "epoch": 0.20747511750069117, + "grad_norm": 1.0891461300530791, + "learning_rate": 1.8393227603241502e-05, + "loss": 0.5353, + "step": 12007 + }, + { + "epoch": 0.20749239701410008, + "grad_norm": 1.6165806942717371, + "learning_rate": 1.839292334590346e-05, + "loss": 0.739, + "step": 12008 + }, + { + "epoch": 0.20750967652750898, + "grad_norm": 1.2586629357088517, + "learning_rate": 1.8392619062278134e-05, + "loss": 0.6363, + "step": 12009 + }, + { + "epoch": 0.2075269560409179, + "grad_norm": 1.1278192297225151, + "learning_rate": 1.839231475236649e-05, + "loss": 0.4665, + "step": 12010 + }, + { + "epoch": 0.2075442355543268, + "grad_norm": 0.4464636873056422, + "learning_rate": 1.8392010416169483e-05, + "loss": 0.7854, + "step": 12011 + }, + { + "epoch": 0.20756151506773568, + "grad_norm": 0.915110696073618, + "learning_rate": 1.8391706053688057e-05, + "loss": 0.5224, + "step": 12012 + }, + { + "epoch": 0.2075787945811446, + "grad_norm": 0.37293948056336296, + "learning_rate": 1.8391401664923172e-05, + "loss": 0.5596, + "step": 12013 + }, + { + "epoch": 0.2075960740945535, + "grad_norm": 1.060162655993583, + "learning_rate": 1.839109724987578e-05, + "loss": 0.5261, + "step": 12014 + }, + { + "epoch": 0.2076133536079624, + "grad_norm": 0.41103486735776984, + "learning_rate": 1.839079280854683e-05, + "loss": 0.5884, + "step": 12015 + }, + { + "epoch": 0.2076306331213713, + "grad_norm": 0.39186174669748647, + "learning_rate": 1.8390488340937283e-05, + "loss": 0.6218, + "step": 12016 + }, + { + "epoch": 0.20764791263478022, + "grad_norm": 1.0602094178608548, + "learning_rate": 1.8390183847048088e-05, + "loss": 0.5225, + "step": 12017 + }, + { + "epoch": 0.2076651921481891, + "grad_norm": 1.2646924110264275, + "learning_rate": 1.8389879326880196e-05, + "loss": 0.7289, + "step": 12018 + }, + { + "epoch": 0.207682471661598, + "grad_norm": 1.1224398724175422, + "learning_rate": 1.838957478043457e-05, + "loss": 0.5979, + "step": 12019 + }, + { + "epoch": 0.2076997511750069, + "grad_norm": 1.3972094282125183, + "learning_rate": 1.8389270207712155e-05, + "loss": 0.573, + "step": 12020 + }, + { + "epoch": 0.20771703068841582, + "grad_norm": 1.4989185055793814, + "learning_rate": 1.838896560871391e-05, + "loss": 0.5494, + "step": 12021 + }, + { + "epoch": 0.20773431020182473, + "grad_norm": 1.5019801139438926, + "learning_rate": 1.8388660983440786e-05, + "loss": 0.4936, + "step": 12022 + }, + { + "epoch": 0.2077515897152336, + "grad_norm": 1.1090901886808282, + "learning_rate": 1.838835633189374e-05, + "loss": 0.5572, + "step": 12023 + }, + { + "epoch": 0.20776886922864252, + "grad_norm": 1.1063250788055803, + "learning_rate": 1.8388051654073723e-05, + "loss": 0.372, + "step": 12024 + }, + { + "epoch": 0.20778614874205142, + "grad_norm": 1.165271281480767, + "learning_rate": 1.838774694998169e-05, + "loss": 0.6835, + "step": 12025 + }, + { + "epoch": 0.20780342825546033, + "grad_norm": 1.5816348590425855, + "learning_rate": 1.8387442219618595e-05, + "loss": 0.6752, + "step": 12026 + }, + { + "epoch": 0.20782070776886924, + "grad_norm": 1.7804540376956586, + "learning_rate": 1.8387137462985393e-05, + "loss": 0.7297, + "step": 12027 + }, + { + "epoch": 0.20783798728227812, + "grad_norm": 0.9136468455635047, + "learning_rate": 1.8386832680083044e-05, + "loss": 0.4773, + "step": 12028 + }, + { + "epoch": 0.20785526679568703, + "grad_norm": 0.6454074367324617, + "learning_rate": 1.8386527870912495e-05, + "loss": 0.4868, + "step": 12029 + }, + { + "epoch": 0.20787254630909593, + "grad_norm": 0.8048516931124762, + "learning_rate": 1.8386223035474705e-05, + "loss": 0.5464, + "step": 12030 + }, + { + "epoch": 0.20788982582250484, + "grad_norm": 1.7236521208696847, + "learning_rate": 1.8385918173770624e-05, + "loss": 0.5862, + "step": 12031 + }, + { + "epoch": 0.20790710533591375, + "grad_norm": 0.9021106948679345, + "learning_rate": 1.838561328580121e-05, + "loss": 0.5038, + "step": 12032 + }, + { + "epoch": 0.20792438484932263, + "grad_norm": 1.0389040984547169, + "learning_rate": 1.8385308371567418e-05, + "loss": 0.5782, + "step": 12033 + }, + { + "epoch": 0.20794166436273154, + "grad_norm": 1.031355002181952, + "learning_rate": 1.8385003431070204e-05, + "loss": 0.506, + "step": 12034 + }, + { + "epoch": 0.20795894387614045, + "grad_norm": 0.9130541410655778, + "learning_rate": 1.8384698464310524e-05, + "loss": 0.466, + "step": 12035 + }, + { + "epoch": 0.20797622338954935, + "grad_norm": 0.9661964503236143, + "learning_rate": 1.838439347128933e-05, + "loss": 0.3529, + "step": 12036 + }, + { + "epoch": 0.20799350290295826, + "grad_norm": 0.9592174563396406, + "learning_rate": 1.838408845200758e-05, + "loss": 0.4485, + "step": 12037 + }, + { + "epoch": 0.20801078241636717, + "grad_norm": 1.1159925516294054, + "learning_rate": 1.838378340646622e-05, + "loss": 0.4956, + "step": 12038 + }, + { + "epoch": 0.20802806192977605, + "grad_norm": 1.1385793660439532, + "learning_rate": 1.8383478334666222e-05, + "loss": 0.5397, + "step": 12039 + }, + { + "epoch": 0.20804534144318496, + "grad_norm": 1.5694924644503427, + "learning_rate": 1.838317323660853e-05, + "loss": 0.8036, + "step": 12040 + }, + { + "epoch": 0.20806262095659386, + "grad_norm": 1.0529763622304074, + "learning_rate": 1.83828681122941e-05, + "loss": 0.4912, + "step": 12041 + }, + { + "epoch": 0.20807990047000277, + "grad_norm": 0.9300095916383704, + "learning_rate": 1.8382562961723894e-05, + "loss": 0.5717, + "step": 12042 + }, + { + "epoch": 0.20809717998341168, + "grad_norm": 1.3184266428309737, + "learning_rate": 1.838225778489886e-05, + "loss": 0.591, + "step": 12043 + }, + { + "epoch": 0.20811445949682056, + "grad_norm": 1.6466236595624058, + "learning_rate": 1.8381952581819962e-05, + "loss": 0.5511, + "step": 12044 + }, + { + "epoch": 0.20813173901022947, + "grad_norm": 1.1824200335932509, + "learning_rate": 1.838164735248815e-05, + "loss": 0.5732, + "step": 12045 + }, + { + "epoch": 0.20814901852363837, + "grad_norm": 1.1045361473264341, + "learning_rate": 1.838134209690438e-05, + "loss": 0.579, + "step": 12046 + }, + { + "epoch": 0.20816629803704728, + "grad_norm": 1.043012997404713, + "learning_rate": 1.8381036815069608e-05, + "loss": 0.4271, + "step": 12047 + }, + { + "epoch": 0.2081835775504562, + "grad_norm": 0.8029354182124119, + "learning_rate": 1.8380731506984794e-05, + "loss": 0.6155, + "step": 12048 + }, + { + "epoch": 0.20820085706386507, + "grad_norm": 0.8150829591279186, + "learning_rate": 1.838042617265089e-05, + "loss": 0.5232, + "step": 12049 + }, + { + "epoch": 0.20821813657727398, + "grad_norm": 1.0351097984617295, + "learning_rate": 1.838012081206886e-05, + "loss": 0.6707, + "step": 12050 + }, + { + "epoch": 0.20823541609068288, + "grad_norm": 1.3194253149476545, + "learning_rate": 1.8379815425239648e-05, + "loss": 0.5487, + "step": 12051 + }, + { + "epoch": 0.2082526956040918, + "grad_norm": 1.0292361541334303, + "learning_rate": 1.837951001216422e-05, + "loss": 0.6005, + "step": 12052 + }, + { + "epoch": 0.2082699751175007, + "grad_norm": 1.3012365685343676, + "learning_rate": 1.8379204572843526e-05, + "loss": 0.4493, + "step": 12053 + }, + { + "epoch": 0.2082872546309096, + "grad_norm": 2.092105429235061, + "learning_rate": 1.8378899107278534e-05, + "loss": 0.7521, + "step": 12054 + }, + { + "epoch": 0.2083045341443185, + "grad_norm": 1.2858341055739149, + "learning_rate": 1.837859361547019e-05, + "loss": 0.7367, + "step": 12055 + }, + { + "epoch": 0.2083218136577274, + "grad_norm": 0.9815762946543267, + "learning_rate": 1.8378288097419452e-05, + "loss": 0.5789, + "step": 12056 + }, + { + "epoch": 0.2083390931711363, + "grad_norm": 0.5928197650069121, + "learning_rate": 1.837798255312728e-05, + "loss": 0.457, + "step": 12057 + }, + { + "epoch": 0.2083563726845452, + "grad_norm": 1.167175179145593, + "learning_rate": 1.837767698259463e-05, + "loss": 0.5059, + "step": 12058 + }, + { + "epoch": 0.20837365219795412, + "grad_norm": 0.9855090792231608, + "learning_rate": 1.837737138582246e-05, + "loss": 0.4979, + "step": 12059 + }, + { + "epoch": 0.208390931711363, + "grad_norm": 1.3349807324147454, + "learning_rate": 1.8377065762811724e-05, + "loss": 0.7504, + "step": 12060 + }, + { + "epoch": 0.2084082112247719, + "grad_norm": 0.9827280311651199, + "learning_rate": 1.8376760113563385e-05, + "loss": 0.7131, + "step": 12061 + }, + { + "epoch": 0.2084254907381808, + "grad_norm": 0.8802466947249045, + "learning_rate": 1.8376454438078397e-05, + "loss": 0.5858, + "step": 12062 + }, + { + "epoch": 0.20844277025158972, + "grad_norm": 0.9476071626462931, + "learning_rate": 1.837614873635771e-05, + "loss": 0.5383, + "step": 12063 + }, + { + "epoch": 0.20846004976499863, + "grad_norm": 1.2429831475245976, + "learning_rate": 1.8375843008402298e-05, + "loss": 0.4821, + "step": 12064 + }, + { + "epoch": 0.2084773292784075, + "grad_norm": 1.0084434956755786, + "learning_rate": 1.8375537254213105e-05, + "loss": 0.51, + "step": 12065 + }, + { + "epoch": 0.20849460879181642, + "grad_norm": 0.9141978726409073, + "learning_rate": 1.8375231473791092e-05, + "loss": 0.7295, + "step": 12066 + }, + { + "epoch": 0.20851188830522532, + "grad_norm": 1.0210921589569366, + "learning_rate": 1.8374925667137214e-05, + "loss": 0.6003, + "step": 12067 + }, + { + "epoch": 0.20852916781863423, + "grad_norm": 0.7397300558653385, + "learning_rate": 1.8374619834252437e-05, + "loss": 0.4865, + "step": 12068 + }, + { + "epoch": 0.20854644733204314, + "grad_norm": 1.4331860171766415, + "learning_rate": 1.8374313975137716e-05, + "loss": 0.5303, + "step": 12069 + }, + { + "epoch": 0.20856372684545202, + "grad_norm": 0.9144587833996338, + "learning_rate": 1.8374008089794006e-05, + "loss": 0.5536, + "step": 12070 + }, + { + "epoch": 0.20858100635886093, + "grad_norm": 0.9003715718885216, + "learning_rate": 1.837370217822226e-05, + "loss": 0.482, + "step": 12071 + }, + { + "epoch": 0.20859828587226983, + "grad_norm": 1.15315964185601, + "learning_rate": 1.837339624042345e-05, + "loss": 0.4797, + "step": 12072 + }, + { + "epoch": 0.20861556538567874, + "grad_norm": 0.8585345728258283, + "learning_rate": 1.8373090276398524e-05, + "loss": 0.4844, + "step": 12073 + }, + { + "epoch": 0.20863284489908765, + "grad_norm": 0.3809825428883405, + "learning_rate": 1.8372784286148446e-05, + "loss": 0.741, + "step": 12074 + }, + { + "epoch": 0.20865012441249656, + "grad_norm": 0.87458567441409, + "learning_rate": 1.8372478269674166e-05, + "loss": 0.4677, + "step": 12075 + }, + { + "epoch": 0.20866740392590544, + "grad_norm": 1.158374828916517, + "learning_rate": 1.837217222697665e-05, + "loss": 0.4986, + "step": 12076 + }, + { + "epoch": 0.20868468343931434, + "grad_norm": 1.0085753884837676, + "learning_rate": 1.8371866158056853e-05, + "loss": 0.8047, + "step": 12077 + }, + { + "epoch": 0.20870196295272325, + "grad_norm": 0.7168805560470712, + "learning_rate": 1.837156006291574e-05, + "loss": 0.4965, + "step": 12078 + }, + { + "epoch": 0.20871924246613216, + "grad_norm": 1.2064239422893106, + "learning_rate": 1.8371253941554258e-05, + "loss": 0.6986, + "step": 12079 + }, + { + "epoch": 0.20873652197954107, + "grad_norm": 1.268702242121435, + "learning_rate": 1.8370947793973376e-05, + "loss": 0.6826, + "step": 12080 + }, + { + "epoch": 0.20875380149294995, + "grad_norm": 1.0337413320330056, + "learning_rate": 1.8370641620174046e-05, + "loss": 0.5529, + "step": 12081 + }, + { + "epoch": 0.20877108100635886, + "grad_norm": 0.8356940260773242, + "learning_rate": 1.8370335420157234e-05, + "loss": 0.7904, + "step": 12082 + }, + { + "epoch": 0.20878836051976776, + "grad_norm": 0.7161802100537761, + "learning_rate": 1.8370029193923895e-05, + "loss": 0.4164, + "step": 12083 + }, + { + "epoch": 0.20880564003317667, + "grad_norm": 0.8779262450020122, + "learning_rate": 1.8369722941474988e-05, + "loss": 0.5128, + "step": 12084 + }, + { + "epoch": 0.20882291954658558, + "grad_norm": 1.1232797138833421, + "learning_rate": 1.836941666281147e-05, + "loss": 0.546, + "step": 12085 + }, + { + "epoch": 0.20884019905999446, + "grad_norm": 1.1779040144906714, + "learning_rate": 1.8369110357934308e-05, + "loss": 0.6806, + "step": 12086 + }, + { + "epoch": 0.20885747857340337, + "grad_norm": 0.9256968409902574, + "learning_rate": 1.8368804026844453e-05, + "loss": 0.5785, + "step": 12087 + }, + { + "epoch": 0.20887475808681227, + "grad_norm": 0.7290554402950402, + "learning_rate": 1.8368497669542872e-05, + "loss": 0.4702, + "step": 12088 + }, + { + "epoch": 0.20889203760022118, + "grad_norm": 1.0649741725520667, + "learning_rate": 1.8368191286030514e-05, + "loss": 0.6209, + "step": 12089 + }, + { + "epoch": 0.2089093171136301, + "grad_norm": 1.1973664942228714, + "learning_rate": 1.8367884876308348e-05, + "loss": 0.6121, + "step": 12090 + }, + { + "epoch": 0.208926596627039, + "grad_norm": 0.5155372992271968, + "learning_rate": 1.8367578440377332e-05, + "loss": 0.5513, + "step": 12091 + }, + { + "epoch": 0.20894387614044788, + "grad_norm": 1.1322502630848523, + "learning_rate": 1.8367271978238422e-05, + "loss": 0.5523, + "step": 12092 + }, + { + "epoch": 0.20896115565385678, + "grad_norm": 0.992574577652377, + "learning_rate": 1.836696548989258e-05, + "loss": 0.5067, + "step": 12093 + }, + { + "epoch": 0.2089784351672657, + "grad_norm": 0.7778568891752743, + "learning_rate": 1.836665897534077e-05, + "loss": 0.6975, + "step": 12094 + }, + { + "epoch": 0.2089957146806746, + "grad_norm": 0.6464496376017517, + "learning_rate": 1.8366352434583945e-05, + "loss": 0.6141, + "step": 12095 + }, + { + "epoch": 0.2090129941940835, + "grad_norm": 0.40267505267595766, + "learning_rate": 1.8366045867623068e-05, + "loss": 0.8249, + "step": 12096 + }, + { + "epoch": 0.2090302737074924, + "grad_norm": 0.415715590007029, + "learning_rate": 1.83657392744591e-05, + "loss": 0.642, + "step": 12097 + }, + { + "epoch": 0.2090475532209013, + "grad_norm": 1.3731599510007249, + "learning_rate": 1.8365432655093004e-05, + "loss": 0.5801, + "step": 12098 + }, + { + "epoch": 0.2090648327343102, + "grad_norm": 1.4787173483961233, + "learning_rate": 1.836512600952573e-05, + "loss": 0.7397, + "step": 12099 + }, + { + "epoch": 0.2090821122477191, + "grad_norm": 0.4511688607283296, + "learning_rate": 1.8364819337758253e-05, + "loss": 0.7387, + "step": 12100 + }, + { + "epoch": 0.20909939176112802, + "grad_norm": 0.9903209476177129, + "learning_rate": 1.8364512639791523e-05, + "loss": 0.6065, + "step": 12101 + }, + { + "epoch": 0.2091166712745369, + "grad_norm": 1.0266429604262306, + "learning_rate": 1.8364205915626505e-05, + "loss": 0.542, + "step": 12102 + }, + { + "epoch": 0.2091339507879458, + "grad_norm": 1.081877472906891, + "learning_rate": 1.8363899165264157e-05, + "loss": 0.6276, + "step": 12103 + }, + { + "epoch": 0.2091512303013547, + "grad_norm": 0.5825200800731617, + "learning_rate": 1.8363592388705442e-05, + "loss": 0.3418, + "step": 12104 + }, + { + "epoch": 0.20916850981476362, + "grad_norm": 1.5751231933753382, + "learning_rate": 1.836328558595132e-05, + "loss": 0.5422, + "step": 12105 + }, + { + "epoch": 0.20918578932817253, + "grad_norm": 0.7040135834535313, + "learning_rate": 1.836297875700275e-05, + "loss": 0.6292, + "step": 12106 + }, + { + "epoch": 0.2092030688415814, + "grad_norm": 1.3540277080914265, + "learning_rate": 1.8362671901860696e-05, + "loss": 0.4404, + "step": 12107 + }, + { + "epoch": 0.20922034835499032, + "grad_norm": 0.9521250313651364, + "learning_rate": 1.8362365020526122e-05, + "loss": 0.5075, + "step": 12108 + }, + { + "epoch": 0.20923762786839922, + "grad_norm": 0.8458840283621798, + "learning_rate": 1.8362058112999982e-05, + "loss": 0.7744, + "step": 12109 + }, + { + "epoch": 0.20925490738180813, + "grad_norm": 1.3460715152460798, + "learning_rate": 1.8361751179283238e-05, + "loss": 0.7963, + "step": 12110 + }, + { + "epoch": 0.20927218689521704, + "grad_norm": 1.2878631210918396, + "learning_rate": 1.8361444219376855e-05, + "loss": 0.5116, + "step": 12111 + }, + { + "epoch": 0.20928946640862595, + "grad_norm": 0.8433559006280219, + "learning_rate": 1.8361137233281795e-05, + "loss": 0.4907, + "step": 12112 + }, + { + "epoch": 0.20930674592203483, + "grad_norm": 1.4624472117072649, + "learning_rate": 1.8360830220999018e-05, + "loss": 0.683, + "step": 12113 + }, + { + "epoch": 0.20932402543544373, + "grad_norm": 1.2773559736242026, + "learning_rate": 1.8360523182529482e-05, + "loss": 0.5547, + "step": 12114 + }, + { + "epoch": 0.20934130494885264, + "grad_norm": 1.2289754487370812, + "learning_rate": 1.8360216117874157e-05, + "loss": 0.5399, + "step": 12115 + }, + { + "epoch": 0.20935858446226155, + "grad_norm": 0.8425491560782179, + "learning_rate": 1.8359909027033997e-05, + "loss": 0.4808, + "step": 12116 + }, + { + "epoch": 0.20937586397567046, + "grad_norm": 0.9368977892416971, + "learning_rate": 1.8359601910009968e-05, + "loss": 0.6611, + "step": 12117 + }, + { + "epoch": 0.20939314348907934, + "grad_norm": 1.1227714035434841, + "learning_rate": 1.835929476680303e-05, + "loss": 0.5624, + "step": 12118 + }, + { + "epoch": 0.20941042300248824, + "grad_norm": 0.5207069435402467, + "learning_rate": 1.8358987597414143e-05, + "loss": 0.6828, + "step": 12119 + }, + { + "epoch": 0.20942770251589715, + "grad_norm": 0.8586369090047797, + "learning_rate": 1.8358680401844275e-05, + "loss": 0.4885, + "step": 12120 + }, + { + "epoch": 0.20944498202930606, + "grad_norm": 0.950537584197576, + "learning_rate": 1.8358373180094383e-05, + "loss": 0.6909, + "step": 12121 + }, + { + "epoch": 0.20946226154271497, + "grad_norm": 0.8500636539987194, + "learning_rate": 1.835806593216543e-05, + "loss": 0.4009, + "step": 12122 + }, + { + "epoch": 0.20947954105612385, + "grad_norm": 0.727854155976513, + "learning_rate": 1.8357758658058383e-05, + "loss": 0.4212, + "step": 12123 + }, + { + "epoch": 0.20949682056953275, + "grad_norm": 0.8481330495543157, + "learning_rate": 1.8357451357774195e-05, + "loss": 0.4998, + "step": 12124 + }, + { + "epoch": 0.20951410008294166, + "grad_norm": 1.3224228765637136, + "learning_rate": 1.8357144031313837e-05, + "loss": 0.6337, + "step": 12125 + }, + { + "epoch": 0.20953137959635057, + "grad_norm": 1.0749432971947435, + "learning_rate": 1.8356836678678273e-05, + "loss": 0.7762, + "step": 12126 + }, + { + "epoch": 0.20954865910975948, + "grad_norm": 1.095038174156272, + "learning_rate": 1.8356529299868456e-05, + "loss": 0.5687, + "step": 12127 + }, + { + "epoch": 0.20956593862316839, + "grad_norm": 1.598574667699593, + "learning_rate": 1.8356221894885355e-05, + "loss": 0.75, + "step": 12128 + }, + { + "epoch": 0.20958321813657727, + "grad_norm": 1.2049838333637775, + "learning_rate": 1.8355914463729935e-05, + "loss": 0.6265, + "step": 12129 + }, + { + "epoch": 0.20960049764998617, + "grad_norm": 0.9012535746099847, + "learning_rate": 1.8355607006403152e-05, + "loss": 0.664, + "step": 12130 + }, + { + "epoch": 0.20961777716339508, + "grad_norm": 1.1532169437853474, + "learning_rate": 1.8355299522905975e-05, + "loss": 0.4864, + "step": 12131 + }, + { + "epoch": 0.209635056676804, + "grad_norm": 0.9944856824345631, + "learning_rate": 1.8354992013239363e-05, + "loss": 0.7024, + "step": 12132 + }, + { + "epoch": 0.2096523361902129, + "grad_norm": 1.2690068497365068, + "learning_rate": 1.8354684477404285e-05, + "loss": 0.7639, + "step": 12133 + }, + { + "epoch": 0.20966961570362178, + "grad_norm": 1.6357818063258724, + "learning_rate": 1.8354376915401695e-05, + "loss": 0.5396, + "step": 12134 + }, + { + "epoch": 0.20968689521703068, + "grad_norm": 0.9513620694668072, + "learning_rate": 1.8354069327232568e-05, + "loss": 0.4534, + "step": 12135 + }, + { + "epoch": 0.2097041747304396, + "grad_norm": 0.9493861618529653, + "learning_rate": 1.8353761712897857e-05, + "loss": 0.5701, + "step": 12136 + }, + { + "epoch": 0.2097214542438485, + "grad_norm": 1.0338721834775961, + "learning_rate": 1.835345407239853e-05, + "loss": 0.3502, + "step": 12137 + }, + { + "epoch": 0.2097387337572574, + "grad_norm": 0.589179013523167, + "learning_rate": 1.835314640573555e-05, + "loss": 0.4954, + "step": 12138 + }, + { + "epoch": 0.20975601327066629, + "grad_norm": 0.6714727127859434, + "learning_rate": 1.835283871290988e-05, + "loss": 0.5847, + "step": 12139 + }, + { + "epoch": 0.2097732927840752, + "grad_norm": 0.941451674121286, + "learning_rate": 1.8352530993922483e-05, + "loss": 0.6531, + "step": 12140 + }, + { + "epoch": 0.2097905722974841, + "grad_norm": 0.4836187498957086, + "learning_rate": 1.8352223248774325e-05, + "loss": 0.565, + "step": 12141 + }, + { + "epoch": 0.209807851810893, + "grad_norm": 1.210868047904471, + "learning_rate": 1.835191547746637e-05, + "loss": 0.4423, + "step": 12142 + }, + { + "epoch": 0.20982513132430192, + "grad_norm": 1.5443039969566636, + "learning_rate": 1.8351607679999583e-05, + "loss": 0.4785, + "step": 12143 + }, + { + "epoch": 0.2098424108377108, + "grad_norm": 1.3923797526553678, + "learning_rate": 1.8351299856374923e-05, + "loss": 0.6934, + "step": 12144 + }, + { + "epoch": 0.2098596903511197, + "grad_norm": 1.4040933243233071, + "learning_rate": 1.835099200659336e-05, + "loss": 0.7042, + "step": 12145 + }, + { + "epoch": 0.2098769698645286, + "grad_norm": 1.027256693274816, + "learning_rate": 1.8350684130655855e-05, + "loss": 0.4908, + "step": 12146 + }, + { + "epoch": 0.20989424937793752, + "grad_norm": 1.9975466185515143, + "learning_rate": 1.8350376228563368e-05, + "loss": 0.7655, + "step": 12147 + }, + { + "epoch": 0.20991152889134643, + "grad_norm": 1.216101113872734, + "learning_rate": 1.8350068300316875e-05, + "loss": 0.55, + "step": 12148 + }, + { + "epoch": 0.20992880840475533, + "grad_norm": 1.1185970962024137, + "learning_rate": 1.8349760345917328e-05, + "loss": 0.6554, + "step": 12149 + }, + { + "epoch": 0.20994608791816421, + "grad_norm": 1.0449602041764836, + "learning_rate": 1.83494523653657e-05, + "loss": 0.6641, + "step": 12150 + }, + { + "epoch": 0.20996336743157312, + "grad_norm": 1.3903682060981895, + "learning_rate": 1.8349144358662953e-05, + "loss": 0.4555, + "step": 12151 + }, + { + "epoch": 0.20998064694498203, + "grad_norm": 1.2293016180225345, + "learning_rate": 1.8348836325810045e-05, + "loss": 0.5798, + "step": 12152 + }, + { + "epoch": 0.20999792645839094, + "grad_norm": 0.8781607649974814, + "learning_rate": 1.8348528266807954e-05, + "loss": 0.6747, + "step": 12153 + }, + { + "epoch": 0.21001520597179985, + "grad_norm": 1.5220559857075306, + "learning_rate": 1.8348220181657637e-05, + "loss": 0.8482, + "step": 12154 + }, + { + "epoch": 0.21003248548520873, + "grad_norm": 0.6269506091051305, + "learning_rate": 1.834791207036006e-05, + "loss": 0.8056, + "step": 12155 + }, + { + "epoch": 0.21004976499861763, + "grad_norm": 1.712203256575628, + "learning_rate": 1.8347603932916184e-05, + "loss": 0.6352, + "step": 12156 + }, + { + "epoch": 0.21006704451202654, + "grad_norm": 1.002113793949251, + "learning_rate": 1.8347295769326984e-05, + "loss": 0.6297, + "step": 12157 + }, + { + "epoch": 0.21008432402543545, + "grad_norm": 1.192791031684358, + "learning_rate": 1.8346987579593418e-05, + "loss": 0.4799, + "step": 12158 + }, + { + "epoch": 0.21010160353884436, + "grad_norm": 1.2653160165341153, + "learning_rate": 1.834667936371645e-05, + "loss": 0.5956, + "step": 12159 + }, + { + "epoch": 0.21011888305225324, + "grad_norm": 0.9881784603741018, + "learning_rate": 1.8346371121697047e-05, + "loss": 0.5706, + "step": 12160 + }, + { + "epoch": 0.21013616256566214, + "grad_norm": 1.289164901372841, + "learning_rate": 1.8346062853536178e-05, + "loss": 0.6129, + "step": 12161 + }, + { + "epoch": 0.21015344207907105, + "grad_norm": 1.3243231851109287, + "learning_rate": 1.8345754559234805e-05, + "loss": 0.6158, + "step": 12162 + }, + { + "epoch": 0.21017072159247996, + "grad_norm": 1.2290975885099933, + "learning_rate": 1.8345446238793893e-05, + "loss": 0.7113, + "step": 12163 + }, + { + "epoch": 0.21018800110588887, + "grad_norm": 0.7800589518308672, + "learning_rate": 1.834513789221441e-05, + "loss": 0.4795, + "step": 12164 + }, + { + "epoch": 0.21020528061929777, + "grad_norm": 0.8609074646649022, + "learning_rate": 1.834482951949732e-05, + "loss": 0.4109, + "step": 12165 + }, + { + "epoch": 0.21022256013270665, + "grad_norm": 1.2015626986973946, + "learning_rate": 1.8344521120643595e-05, + "loss": 0.628, + "step": 12166 + }, + { + "epoch": 0.21023983964611556, + "grad_norm": 1.0277411478298535, + "learning_rate": 1.834421269565419e-05, + "loss": 0.4687, + "step": 12167 + }, + { + "epoch": 0.21025711915952447, + "grad_norm": 0.7820187069778307, + "learning_rate": 1.834390424453008e-05, + "loss": 0.4243, + "step": 12168 + }, + { + "epoch": 0.21027439867293338, + "grad_norm": 0.6548399458633454, + "learning_rate": 1.8343595767272227e-05, + "loss": 0.597, + "step": 12169 + }, + { + "epoch": 0.21029167818634228, + "grad_norm": 0.8571980076783374, + "learning_rate": 1.8343287263881597e-05, + "loss": 0.5779, + "step": 12170 + }, + { + "epoch": 0.21030895769975116, + "grad_norm": 1.2282577303896607, + "learning_rate": 1.8342978734359154e-05, + "loss": 0.6876, + "step": 12171 + }, + { + "epoch": 0.21032623721316007, + "grad_norm": 0.505827411962836, + "learning_rate": 1.8342670178705874e-05, + "loss": 0.6361, + "step": 12172 + }, + { + "epoch": 0.21034351672656898, + "grad_norm": 1.008708423147947, + "learning_rate": 1.8342361596922714e-05, + "loss": 0.5149, + "step": 12173 + }, + { + "epoch": 0.2103607962399779, + "grad_norm": 1.4243724226600076, + "learning_rate": 1.8342052989010645e-05, + "loss": 0.6458, + "step": 12174 + }, + { + "epoch": 0.2103780757533868, + "grad_norm": 1.3264214485662393, + "learning_rate": 1.834174435497063e-05, + "loss": 0.5898, + "step": 12175 + }, + { + "epoch": 0.21039535526679568, + "grad_norm": 1.2908482847872782, + "learning_rate": 1.834143569480364e-05, + "loss": 0.5811, + "step": 12176 + }, + { + "epoch": 0.21041263478020458, + "grad_norm": 0.7449828972216869, + "learning_rate": 1.8341127008510638e-05, + "loss": 0.4481, + "step": 12177 + }, + { + "epoch": 0.2104299142936135, + "grad_norm": 1.709712522261858, + "learning_rate": 1.834081829609259e-05, + "loss": 0.602, + "step": 12178 + }, + { + "epoch": 0.2104471938070224, + "grad_norm": 1.0981571154354952, + "learning_rate": 1.834050955755047e-05, + "loss": 0.5756, + "step": 12179 + }, + { + "epoch": 0.2104644733204313, + "grad_norm": 0.8690943632970136, + "learning_rate": 1.8340200792885235e-05, + "loss": 0.522, + "step": 12180 + }, + { + "epoch": 0.21048175283384019, + "grad_norm": 1.0637257866575125, + "learning_rate": 1.8339892002097862e-05, + "loss": 0.8216, + "step": 12181 + }, + { + "epoch": 0.2104990323472491, + "grad_norm": 1.1223103519985147, + "learning_rate": 1.833958318518931e-05, + "loss": 0.5705, + "step": 12182 + }, + { + "epoch": 0.210516311860658, + "grad_norm": 1.0445029108105714, + "learning_rate": 1.8339274342160553e-05, + "loss": 0.5873, + "step": 12183 + }, + { + "epoch": 0.2105335913740669, + "grad_norm": 0.7098480358287595, + "learning_rate": 1.8338965473012552e-05, + "loss": 0.647, + "step": 12184 + }, + { + "epoch": 0.21055087088747582, + "grad_norm": 0.8019587571989284, + "learning_rate": 1.833865657774628e-05, + "loss": 0.5846, + "step": 12185 + }, + { + "epoch": 0.21056815040088472, + "grad_norm": 1.5225867841799838, + "learning_rate": 1.83383476563627e-05, + "loss": 0.7396, + "step": 12186 + }, + { + "epoch": 0.2105854299142936, + "grad_norm": 1.3232313056270033, + "learning_rate": 1.8338038708862778e-05, + "loss": 0.4493, + "step": 12187 + }, + { + "epoch": 0.2106027094277025, + "grad_norm": 0.8440571842809814, + "learning_rate": 1.833772973524749e-05, + "loss": 0.6, + "step": 12188 + }, + { + "epoch": 0.21061998894111142, + "grad_norm": 0.38898806005497616, + "learning_rate": 1.83374207355178e-05, + "loss": 0.5696, + "step": 12189 + }, + { + "epoch": 0.21063726845452033, + "grad_norm": 1.0250165870067653, + "learning_rate": 1.833711170967467e-05, + "loss": 0.4544, + "step": 12190 + }, + { + "epoch": 0.21065454796792923, + "grad_norm": 0.7513814471954886, + "learning_rate": 1.8336802657719077e-05, + "loss": 0.4722, + "step": 12191 + }, + { + "epoch": 0.21067182748133811, + "grad_norm": 1.1390186262409436, + "learning_rate": 1.8336493579651982e-05, + "loss": 0.6779, + "step": 12192 + }, + { + "epoch": 0.21068910699474702, + "grad_norm": 0.4126297703164855, + "learning_rate": 1.8336184475474358e-05, + "loss": 0.697, + "step": 12193 + }, + { + "epoch": 0.21070638650815593, + "grad_norm": 1.676158491836303, + "learning_rate": 1.8335875345187166e-05, + "loss": 0.6911, + "step": 12194 + }, + { + "epoch": 0.21072366602156484, + "grad_norm": 1.166517150711738, + "learning_rate": 1.8335566188791384e-05, + "loss": 0.4791, + "step": 12195 + }, + { + "epoch": 0.21074094553497374, + "grad_norm": 0.9470100474288256, + "learning_rate": 1.8335257006287972e-05, + "loss": 0.4903, + "step": 12196 + }, + { + "epoch": 0.21075822504838262, + "grad_norm": 0.7728232478966679, + "learning_rate": 1.83349477976779e-05, + "loss": 0.3997, + "step": 12197 + }, + { + "epoch": 0.21077550456179153, + "grad_norm": 1.3929172478396266, + "learning_rate": 1.8334638562962142e-05, + "loss": 0.6913, + "step": 12198 + }, + { + "epoch": 0.21079278407520044, + "grad_norm": 1.6887365347924497, + "learning_rate": 1.833432930214166e-05, + "loss": 0.4277, + "step": 12199 + }, + { + "epoch": 0.21081006358860935, + "grad_norm": 1.5465562885658235, + "learning_rate": 1.8334020015217427e-05, + "loss": 0.4911, + "step": 12200 + }, + { + "epoch": 0.21082734310201826, + "grad_norm": 0.7804549974660633, + "learning_rate": 1.833371070219041e-05, + "loss": 0.6703, + "step": 12201 + }, + { + "epoch": 0.21084462261542716, + "grad_norm": 1.1354874368263472, + "learning_rate": 1.833340136306158e-05, + "loss": 0.726, + "step": 12202 + }, + { + "epoch": 0.21086190212883604, + "grad_norm": 1.127049943442908, + "learning_rate": 1.8333091997831898e-05, + "loss": 0.5391, + "step": 12203 + }, + { + "epoch": 0.21087918164224495, + "grad_norm": 0.9261372761647257, + "learning_rate": 1.833278260650234e-05, + "loss": 0.4295, + "step": 12204 + }, + { + "epoch": 0.21089646115565386, + "grad_norm": 0.5398087612331777, + "learning_rate": 1.8332473189073875e-05, + "loss": 0.4222, + "step": 12205 + }, + { + "epoch": 0.21091374066906277, + "grad_norm": 0.6201717466075393, + "learning_rate": 1.8332163745547473e-05, + "loss": 0.5717, + "step": 12206 + }, + { + "epoch": 0.21093102018247167, + "grad_norm": 1.2586303691118461, + "learning_rate": 1.8331854275924097e-05, + "loss": 0.5589, + "step": 12207 + }, + { + "epoch": 0.21094829969588055, + "grad_norm": 0.9742192125209859, + "learning_rate": 1.8331544780204724e-05, + "loss": 0.4837, + "step": 12208 + }, + { + "epoch": 0.21096557920928946, + "grad_norm": 1.2159540990713817, + "learning_rate": 1.8331235258390315e-05, + "loss": 0.5727, + "step": 12209 + }, + { + "epoch": 0.21098285872269837, + "grad_norm": 0.8028922733950026, + "learning_rate": 1.8330925710481847e-05, + "loss": 1.154, + "step": 12210 + }, + { + "epoch": 0.21100013823610728, + "grad_norm": 0.37105354971575494, + "learning_rate": 1.8330616136480285e-05, + "loss": 0.4911, + "step": 12211 + }, + { + "epoch": 0.21101741774951618, + "grad_norm": 0.9588753219303893, + "learning_rate": 1.83303065363866e-05, + "loss": 0.468, + "step": 12212 + }, + { + "epoch": 0.21103469726292506, + "grad_norm": 1.4888813926925453, + "learning_rate": 1.8329996910201762e-05, + "loss": 0.428, + "step": 12213 + }, + { + "epoch": 0.21105197677633397, + "grad_norm": 0.8011279468892649, + "learning_rate": 1.8329687257926743e-05, + "loss": 0.4939, + "step": 12214 + }, + { + "epoch": 0.21106925628974288, + "grad_norm": 1.2518553655552924, + "learning_rate": 1.832937757956251e-05, + "loss": 0.677, + "step": 12215 + }, + { + "epoch": 0.2110865358031518, + "grad_norm": 1.173832818455124, + "learning_rate": 1.8329067875110033e-05, + "loss": 0.6124, + "step": 12216 + }, + { + "epoch": 0.2111038153165607, + "grad_norm": 1.3399573101859379, + "learning_rate": 1.832875814457028e-05, + "loss": 0.5601, + "step": 12217 + }, + { + "epoch": 0.21112109482996957, + "grad_norm": 0.9885349291308372, + "learning_rate": 1.8328448387944227e-05, + "loss": 0.6962, + "step": 12218 + }, + { + "epoch": 0.21113837434337848, + "grad_norm": 0.7354993633851448, + "learning_rate": 1.8328138605232836e-05, + "loss": 0.544, + "step": 12219 + }, + { + "epoch": 0.2111556538567874, + "grad_norm": 0.7980620657139248, + "learning_rate": 1.8327828796437085e-05, + "loss": 0.4847, + "step": 12220 + }, + { + "epoch": 0.2111729333701963, + "grad_norm": 1.4224129274842998, + "learning_rate": 1.832751896155794e-05, + "loss": 0.5309, + "step": 12221 + }, + { + "epoch": 0.2111902128836052, + "grad_norm": 0.5877792138688902, + "learning_rate": 1.8327209100596378e-05, + "loss": 0.6193, + "step": 12222 + }, + { + "epoch": 0.2112074923970141, + "grad_norm": 1.0206057227328875, + "learning_rate": 1.832689921355336e-05, + "loss": 0.5139, + "step": 12223 + }, + { + "epoch": 0.211224771910423, + "grad_norm": 1.4144450878438908, + "learning_rate": 1.832658930042986e-05, + "loss": 0.4228, + "step": 12224 + }, + { + "epoch": 0.2112420514238319, + "grad_norm": 1.135633525752151, + "learning_rate": 1.8326279361226853e-05, + "loss": 0.5292, + "step": 12225 + }, + { + "epoch": 0.2112593309372408, + "grad_norm": 1.5534858120844366, + "learning_rate": 1.8325969395945303e-05, + "loss": 0.6043, + "step": 12226 + }, + { + "epoch": 0.21127661045064972, + "grad_norm": 1.1336344811732446, + "learning_rate": 1.8325659404586186e-05, + "loss": 0.555, + "step": 12227 + }, + { + "epoch": 0.21129388996405862, + "grad_norm": 1.0726956530056289, + "learning_rate": 1.832534938715047e-05, + "loss": 0.5218, + "step": 12228 + }, + { + "epoch": 0.2113111694774675, + "grad_norm": 0.8307927813599998, + "learning_rate": 1.8325039343639127e-05, + "loss": 0.4002, + "step": 12229 + }, + { + "epoch": 0.2113284489908764, + "grad_norm": 0.8844607191803056, + "learning_rate": 1.832472927405313e-05, + "loss": 0.6578, + "step": 12230 + }, + { + "epoch": 0.21134572850428532, + "grad_norm": 0.7154856127319627, + "learning_rate": 1.8324419178393445e-05, + "loss": 0.4646, + "step": 12231 + }, + { + "epoch": 0.21136300801769423, + "grad_norm": 0.9367227302249704, + "learning_rate": 1.8324109056661047e-05, + "loss": 0.443, + "step": 12232 + }, + { + "epoch": 0.21138028753110313, + "grad_norm": 1.302407015032014, + "learning_rate": 1.8323798908856906e-05, + "loss": 0.6484, + "step": 12233 + }, + { + "epoch": 0.211397567044512, + "grad_norm": 1.1564754116837495, + "learning_rate": 1.8323488734981997e-05, + "loss": 0.5018, + "step": 12234 + }, + { + "epoch": 0.21141484655792092, + "grad_norm": 0.9362053563128697, + "learning_rate": 1.8323178535037286e-05, + "loss": 0.707, + "step": 12235 + }, + { + "epoch": 0.21143212607132983, + "grad_norm": 1.1587062560610677, + "learning_rate": 1.8322868309023748e-05, + "loss": 0.5476, + "step": 12236 + }, + { + "epoch": 0.21144940558473874, + "grad_norm": 1.3000763900467167, + "learning_rate": 1.8322558056942357e-05, + "loss": 0.8001, + "step": 12237 + }, + { + "epoch": 0.21146668509814764, + "grad_norm": 0.9227646360868101, + "learning_rate": 1.832224777879408e-05, + "loss": 0.5115, + "step": 12238 + }, + { + "epoch": 0.21148396461155655, + "grad_norm": 0.6657769396943023, + "learning_rate": 1.8321937474579888e-05, + "loss": 0.5078, + "step": 12239 + }, + { + "epoch": 0.21150124412496543, + "grad_norm": 1.1105579848531981, + "learning_rate": 1.832162714430076e-05, + "loss": 0.4551, + "step": 12240 + }, + { + "epoch": 0.21151852363837434, + "grad_norm": 1.3960253040754398, + "learning_rate": 1.8321316787957658e-05, + "loss": 0.6519, + "step": 12241 + }, + { + "epoch": 0.21153580315178325, + "grad_norm": 1.2383049231647532, + "learning_rate": 1.8321006405551565e-05, + "loss": 0.5101, + "step": 12242 + }, + { + "epoch": 0.21155308266519215, + "grad_norm": 1.5682438995509178, + "learning_rate": 1.8320695997083443e-05, + "loss": 0.5789, + "step": 12243 + }, + { + "epoch": 0.21157036217860106, + "grad_norm": 1.166040822792692, + "learning_rate": 1.832038556255427e-05, + "loss": 0.631, + "step": 12244 + }, + { + "epoch": 0.21158764169200994, + "grad_norm": 1.0563137690163311, + "learning_rate": 1.832007510196502e-05, + "loss": 0.5236, + "step": 12245 + }, + { + "epoch": 0.21160492120541885, + "grad_norm": 1.075904005109306, + "learning_rate": 1.831976461531666e-05, + "loss": 0.5144, + "step": 12246 + }, + { + "epoch": 0.21162220071882776, + "grad_norm": 1.0176087252869463, + "learning_rate": 1.8319454102610165e-05, + "loss": 0.6689, + "step": 12247 + }, + { + "epoch": 0.21163948023223667, + "grad_norm": 1.321299701249749, + "learning_rate": 1.8319143563846505e-05, + "loss": 0.613, + "step": 12248 + }, + { + "epoch": 0.21165675974564557, + "grad_norm": 1.3633519326724135, + "learning_rate": 1.831883299902666e-05, + "loss": 0.6777, + "step": 12249 + }, + { + "epoch": 0.21167403925905445, + "grad_norm": 1.8021812055309387, + "learning_rate": 1.8318522408151595e-05, + "loss": 0.6667, + "step": 12250 + }, + { + "epoch": 0.21169131877246336, + "grad_norm": 0.7620695725348261, + "learning_rate": 1.8318211791222285e-05, + "loss": 0.4367, + "step": 12251 + }, + { + "epoch": 0.21170859828587227, + "grad_norm": 1.2090459085872616, + "learning_rate": 1.8317901148239706e-05, + "loss": 0.8138, + "step": 12252 + }, + { + "epoch": 0.21172587779928118, + "grad_norm": 0.6892076571901968, + "learning_rate": 1.831759047920483e-05, + "loss": 0.4703, + "step": 12253 + }, + { + "epoch": 0.21174315731269008, + "grad_norm": 0.8366710346545657, + "learning_rate": 1.8317279784118624e-05, + "loss": 0.479, + "step": 12254 + }, + { + "epoch": 0.21176043682609896, + "grad_norm": 0.446455814486482, + "learning_rate": 1.831696906298207e-05, + "loss": 0.8578, + "step": 12255 + }, + { + "epoch": 0.21177771633950787, + "grad_norm": 0.9766897307070962, + "learning_rate": 1.831665831579613e-05, + "loss": 0.706, + "step": 12256 + }, + { + "epoch": 0.21179499585291678, + "grad_norm": 1.1895973750730313, + "learning_rate": 1.831634754256179e-05, + "loss": 0.4996, + "step": 12257 + }, + { + "epoch": 0.2118122753663257, + "grad_norm": 1.3620342659062226, + "learning_rate": 1.8316036743280014e-05, + "loss": 0.4863, + "step": 12258 + }, + { + "epoch": 0.2118295548797346, + "grad_norm": 1.1058054316828243, + "learning_rate": 1.831572591795178e-05, + "loss": 0.4887, + "step": 12259 + }, + { + "epoch": 0.2118468343931435, + "grad_norm": 1.5875147682999187, + "learning_rate": 1.8315415066578062e-05, + "loss": 0.801, + "step": 12260 + }, + { + "epoch": 0.21186411390655238, + "grad_norm": 0.9613540341535376, + "learning_rate": 1.8315104189159833e-05, + "loss": 0.4722, + "step": 12261 + }, + { + "epoch": 0.2118813934199613, + "grad_norm": 1.3907698112501494, + "learning_rate": 1.8314793285698064e-05, + "loss": 0.584, + "step": 12262 + }, + { + "epoch": 0.2118986729333702, + "grad_norm": 1.0755162807527752, + "learning_rate": 1.831448235619373e-05, + "loss": 0.5438, + "step": 12263 + }, + { + "epoch": 0.2119159524467791, + "grad_norm": 1.01043422426236, + "learning_rate": 1.8314171400647806e-05, + "loss": 0.7401, + "step": 12264 + }, + { + "epoch": 0.211933231960188, + "grad_norm": 1.0288354549935945, + "learning_rate": 1.8313860419061265e-05, + "loss": 0.4808, + "step": 12265 + }, + { + "epoch": 0.2119505114735969, + "grad_norm": 1.1988226808165814, + "learning_rate": 1.831354941143508e-05, + "loss": 0.6165, + "step": 12266 + }, + { + "epoch": 0.2119677909870058, + "grad_norm": 1.5393233671039934, + "learning_rate": 1.831323837777023e-05, + "loss": 0.6841, + "step": 12267 + }, + { + "epoch": 0.2119850705004147, + "grad_norm": 1.5498379536548952, + "learning_rate": 1.831292731806768e-05, + "loss": 0.6305, + "step": 12268 + }, + { + "epoch": 0.21200235001382361, + "grad_norm": 0.8467518683616576, + "learning_rate": 1.8312616232328414e-05, + "loss": 0.4828, + "step": 12269 + }, + { + "epoch": 0.21201962952723252, + "grad_norm": 1.0704028189936297, + "learning_rate": 1.8312305120553403e-05, + "loss": 0.4411, + "step": 12270 + }, + { + "epoch": 0.2120369090406414, + "grad_norm": 0.9705414303204883, + "learning_rate": 1.831199398274362e-05, + "loss": 0.6371, + "step": 12271 + }, + { + "epoch": 0.2120541885540503, + "grad_norm": 0.989949321104073, + "learning_rate": 1.831168281890004e-05, + "loss": 0.4988, + "step": 12272 + }, + { + "epoch": 0.21207146806745922, + "grad_norm": 0.8975463569393495, + "learning_rate": 1.8311371629023637e-05, + "loss": 0.4116, + "step": 12273 + }, + { + "epoch": 0.21208874758086813, + "grad_norm": 0.7924438194370874, + "learning_rate": 1.8311060413115386e-05, + "loss": 0.5409, + "step": 12274 + }, + { + "epoch": 0.21210602709427703, + "grad_norm": 1.3879711656190779, + "learning_rate": 1.8310749171176263e-05, + "loss": 0.5504, + "step": 12275 + }, + { + "epoch": 0.21212330660768594, + "grad_norm": 1.0058314644893118, + "learning_rate": 1.8310437903207242e-05, + "loss": 0.6503, + "step": 12276 + }, + { + "epoch": 0.21214058612109482, + "grad_norm": 0.6752095472823083, + "learning_rate": 1.8310126609209295e-05, + "loss": 0.616, + "step": 12277 + }, + { + "epoch": 0.21215786563450373, + "grad_norm": 1.0306011430892665, + "learning_rate": 1.8309815289183404e-05, + "loss": 0.8156, + "step": 12278 + }, + { + "epoch": 0.21217514514791264, + "grad_norm": 1.0609209755007103, + "learning_rate": 1.830950394313054e-05, + "loss": 0.4792, + "step": 12279 + }, + { + "epoch": 0.21219242466132154, + "grad_norm": 0.8322238233732121, + "learning_rate": 1.830919257105168e-05, + "loss": 0.5765, + "step": 12280 + }, + { + "epoch": 0.21220970417473045, + "grad_norm": 0.9974167519730589, + "learning_rate": 1.8308881172947792e-05, + "loss": 0.55, + "step": 12281 + }, + { + "epoch": 0.21222698368813933, + "grad_norm": 1.2388466648439391, + "learning_rate": 1.8308569748819858e-05, + "loss": 0.4639, + "step": 12282 + }, + { + "epoch": 0.21224426320154824, + "grad_norm": 1.2659233265257699, + "learning_rate": 1.830825829866885e-05, + "loss": 0.6827, + "step": 12283 + }, + { + "epoch": 0.21226154271495715, + "grad_norm": 0.7040333222629196, + "learning_rate": 1.8307946822495752e-05, + "loss": 0.5377, + "step": 12284 + }, + { + "epoch": 0.21227882222836605, + "grad_norm": 1.2147243686053188, + "learning_rate": 1.8307635320301528e-05, + "loss": 0.4829, + "step": 12285 + }, + { + "epoch": 0.21229610174177496, + "grad_norm": 1.288368724850856, + "learning_rate": 1.8307323792087163e-05, + "loss": 0.4112, + "step": 12286 + }, + { + "epoch": 0.21231338125518384, + "grad_norm": 1.0330747679802164, + "learning_rate": 1.8307012237853623e-05, + "loss": 0.6034, + "step": 12287 + }, + { + "epoch": 0.21233066076859275, + "grad_norm": 1.2529127373193107, + "learning_rate": 1.8306700657601895e-05, + "loss": 0.5355, + "step": 12288 + }, + { + "epoch": 0.21234794028200166, + "grad_norm": 0.6946658477751546, + "learning_rate": 1.8306389051332946e-05, + "loss": 0.3795, + "step": 12289 + }, + { + "epoch": 0.21236521979541056, + "grad_norm": 0.864861739631645, + "learning_rate": 1.8306077419047756e-05, + "loss": 0.4823, + "step": 12290 + }, + { + "epoch": 0.21238249930881947, + "grad_norm": 0.7156175973470924, + "learning_rate": 1.83057657607473e-05, + "loss": 0.6731, + "step": 12291 + }, + { + "epoch": 0.21239977882222835, + "grad_norm": 1.0234529102543723, + "learning_rate": 1.8305454076432557e-05, + "loss": 0.5352, + "step": 12292 + }, + { + "epoch": 0.21241705833563726, + "grad_norm": 1.3124391255271615, + "learning_rate": 1.8305142366104496e-05, + "loss": 0.5756, + "step": 12293 + }, + { + "epoch": 0.21243433784904617, + "grad_norm": 1.0064358685928727, + "learning_rate": 1.83048306297641e-05, + "loss": 0.4858, + "step": 12294 + }, + { + "epoch": 0.21245161736245508, + "grad_norm": 1.1598916059267816, + "learning_rate": 1.8304518867412343e-05, + "loss": 0.5246, + "step": 12295 + }, + { + "epoch": 0.21246889687586398, + "grad_norm": 1.099506398732932, + "learning_rate": 1.8304207079050203e-05, + "loss": 0.4578, + "step": 12296 + }, + { + "epoch": 0.2124861763892729, + "grad_norm": 0.6328299927667123, + "learning_rate": 1.8303895264678655e-05, + "loss": 0.5258, + "step": 12297 + }, + { + "epoch": 0.21250345590268177, + "grad_norm": 0.933296808999032, + "learning_rate": 1.8303583424298677e-05, + "loss": 0.588, + "step": 12298 + }, + { + "epoch": 0.21252073541609068, + "grad_norm": 0.7543255232815179, + "learning_rate": 1.8303271557911243e-05, + "loss": 0.7675, + "step": 12299 + }, + { + "epoch": 0.21253801492949959, + "grad_norm": 0.7048198855383694, + "learning_rate": 1.830295966551733e-05, + "loss": 0.4224, + "step": 12300 + }, + { + "epoch": 0.2125552944429085, + "grad_norm": 1.7327714271156243, + "learning_rate": 1.830264774711792e-05, + "loss": 0.6229, + "step": 12301 + }, + { + "epoch": 0.2125725739563174, + "grad_norm": 1.0781888291067676, + "learning_rate": 1.8302335802713983e-05, + "loss": 0.5516, + "step": 12302 + }, + { + "epoch": 0.21258985346972628, + "grad_norm": 1.0376120980167567, + "learning_rate": 1.8302023832306502e-05, + "loss": 0.4714, + "step": 12303 + }, + { + "epoch": 0.2126071329831352, + "grad_norm": 0.9910467406010715, + "learning_rate": 1.830171183589645e-05, + "loss": 0.5666, + "step": 12304 + }, + { + "epoch": 0.2126244124965441, + "grad_norm": 0.8620204856304072, + "learning_rate": 1.8301399813484803e-05, + "loss": 0.5177, + "step": 12305 + }, + { + "epoch": 0.212641692009953, + "grad_norm": 1.3882832331997474, + "learning_rate": 1.8301087765072546e-05, + "loss": 0.5355, + "step": 12306 + }, + { + "epoch": 0.2126589715233619, + "grad_norm": 0.842219698221819, + "learning_rate": 1.8300775690660647e-05, + "loss": 0.5373, + "step": 12307 + }, + { + "epoch": 0.2126762510367708, + "grad_norm": 1.4402070205404363, + "learning_rate": 1.8300463590250092e-05, + "loss": 0.5328, + "step": 12308 + }, + { + "epoch": 0.2126935305501797, + "grad_norm": 0.8748399943193335, + "learning_rate": 1.830015146384185e-05, + "loss": 0.5543, + "step": 12309 + }, + { + "epoch": 0.2127108100635886, + "grad_norm": 0.5313705847579923, + "learning_rate": 1.8299839311436905e-05, + "loss": 0.5822, + "step": 12310 + }, + { + "epoch": 0.21272808957699751, + "grad_norm": 1.12280601416813, + "learning_rate": 1.8299527133036235e-05, + "loss": 0.6197, + "step": 12311 + }, + { + "epoch": 0.21274536909040642, + "grad_norm": 0.7816651133609006, + "learning_rate": 1.829921492864081e-05, + "loss": 0.6795, + "step": 12312 + }, + { + "epoch": 0.21276264860381533, + "grad_norm": 0.9338599009797787, + "learning_rate": 1.8298902698251618e-05, + "loss": 0.3982, + "step": 12313 + }, + { + "epoch": 0.2127799281172242, + "grad_norm": 1.4088301885924326, + "learning_rate": 1.829859044186963e-05, + "loss": 0.6385, + "step": 12314 + }, + { + "epoch": 0.21279720763063312, + "grad_norm": 0.9260100759758244, + "learning_rate": 1.829827815949582e-05, + "loss": 0.494, + "step": 12315 + }, + { + "epoch": 0.21281448714404202, + "grad_norm": 1.295357001395221, + "learning_rate": 1.829796585113118e-05, + "loss": 0.4679, + "step": 12316 + }, + { + "epoch": 0.21283176665745093, + "grad_norm": 0.8639078829453892, + "learning_rate": 1.829765351677668e-05, + "loss": 0.6863, + "step": 12317 + }, + { + "epoch": 0.21284904617085984, + "grad_norm": 0.5125554687689567, + "learning_rate": 1.8297341156433295e-05, + "loss": 0.649, + "step": 12318 + }, + { + "epoch": 0.21286632568426872, + "grad_norm": 1.353651372033938, + "learning_rate": 1.829702877010201e-05, + "loss": 0.7148, + "step": 12319 + }, + { + "epoch": 0.21288360519767763, + "grad_norm": 1.1487939531401483, + "learning_rate": 1.82967163577838e-05, + "loss": 0.4507, + "step": 12320 + }, + { + "epoch": 0.21290088471108654, + "grad_norm": 1.3520153992131554, + "learning_rate": 1.8296403919479643e-05, + "loss": 0.6638, + "step": 12321 + }, + { + "epoch": 0.21291816422449544, + "grad_norm": 1.7607400821370818, + "learning_rate": 1.8296091455190516e-05, + "loss": 0.5726, + "step": 12322 + }, + { + "epoch": 0.21293544373790435, + "grad_norm": 0.8212433771761063, + "learning_rate": 1.8295778964917404e-05, + "loss": 0.575, + "step": 12323 + }, + { + "epoch": 0.21295272325131323, + "grad_norm": 1.2893892830680964, + "learning_rate": 1.829546644866128e-05, + "loss": 0.5982, + "step": 12324 + }, + { + "epoch": 0.21297000276472214, + "grad_norm": 0.7572024415772203, + "learning_rate": 1.8295153906423125e-05, + "loss": 0.4937, + "step": 12325 + }, + { + "epoch": 0.21298728227813105, + "grad_norm": 1.5737112551825705, + "learning_rate": 1.8294841338203917e-05, + "loss": 0.5723, + "step": 12326 + }, + { + "epoch": 0.21300456179153995, + "grad_norm": 0.7051364462387829, + "learning_rate": 1.8294528744004636e-05, + "loss": 1.0746, + "step": 12327 + }, + { + "epoch": 0.21302184130494886, + "grad_norm": 1.7206656593875342, + "learning_rate": 1.829421612382626e-05, + "loss": 0.5574, + "step": 12328 + }, + { + "epoch": 0.21303912081835774, + "grad_norm": 0.6484594229545099, + "learning_rate": 1.829390347766977e-05, + "loss": 0.5762, + "step": 12329 + }, + { + "epoch": 0.21305640033176665, + "grad_norm": 1.1392546165009532, + "learning_rate": 1.8293590805536142e-05, + "loss": 0.5903, + "step": 12330 + }, + { + "epoch": 0.21307367984517556, + "grad_norm": 1.0722404072222285, + "learning_rate": 1.829327810742636e-05, + "loss": 0.3909, + "step": 12331 + }, + { + "epoch": 0.21309095935858446, + "grad_norm": 1.284234967813193, + "learning_rate": 1.82929653833414e-05, + "loss": 0.5845, + "step": 12332 + }, + { + "epoch": 0.21310823887199337, + "grad_norm": 1.1931837716372304, + "learning_rate": 1.8292652633282242e-05, + "loss": 0.5714, + "step": 12333 + }, + { + "epoch": 0.21312551838540228, + "grad_norm": 1.204271661817866, + "learning_rate": 1.8292339857249866e-05, + "loss": 0.5149, + "step": 12334 + }, + { + "epoch": 0.21314279789881116, + "grad_norm": 0.734367278786092, + "learning_rate": 1.8292027055245248e-05, + "loss": 0.4534, + "step": 12335 + }, + { + "epoch": 0.21316007741222007, + "grad_norm": 1.5324570679081237, + "learning_rate": 1.8291714227269373e-05, + "loss": 0.9177, + "step": 12336 + }, + { + "epoch": 0.21317735692562897, + "grad_norm": 0.9969014553087326, + "learning_rate": 1.829140137332322e-05, + "loss": 0.5882, + "step": 12337 + }, + { + "epoch": 0.21319463643903788, + "grad_norm": 1.2686456714144594, + "learning_rate": 1.8291088493407766e-05, + "loss": 0.6662, + "step": 12338 + }, + { + "epoch": 0.2132119159524468, + "grad_norm": 0.5564406113282574, + "learning_rate": 1.8290775587523997e-05, + "loss": 0.3599, + "step": 12339 + }, + { + "epoch": 0.21322919546585567, + "grad_norm": 1.0132293024550811, + "learning_rate": 1.8290462655672887e-05, + "loss": 0.6908, + "step": 12340 + }, + { + "epoch": 0.21324647497926458, + "grad_norm": 0.6670154712278108, + "learning_rate": 1.8290149697855416e-05, + "loss": 0.4434, + "step": 12341 + }, + { + "epoch": 0.21326375449267349, + "grad_norm": 0.6069890210547239, + "learning_rate": 1.8289836714072567e-05, + "loss": 0.6585, + "step": 12342 + }, + { + "epoch": 0.2132810340060824, + "grad_norm": 0.7746829025151697, + "learning_rate": 1.828952370432532e-05, + "loss": 0.4117, + "step": 12343 + }, + { + "epoch": 0.2132983135194913, + "grad_norm": 0.8956327821655629, + "learning_rate": 1.828921066861465e-05, + "loss": 0.521, + "step": 12344 + }, + { + "epoch": 0.21331559303290018, + "grad_norm": 0.9573351482269543, + "learning_rate": 1.828889760694155e-05, + "loss": 0.7374, + "step": 12345 + }, + { + "epoch": 0.2133328725463091, + "grad_norm": 1.0319494522021182, + "learning_rate": 1.8288584519306985e-05, + "loss": 0.5116, + "step": 12346 + }, + { + "epoch": 0.213350152059718, + "grad_norm": 1.370430602781659, + "learning_rate": 1.8288271405711953e-05, + "loss": 0.8347, + "step": 12347 + }, + { + "epoch": 0.2133674315731269, + "grad_norm": 0.8184679812803171, + "learning_rate": 1.8287958266157417e-05, + "loss": 0.5655, + "step": 12348 + }, + { + "epoch": 0.2133847110865358, + "grad_norm": 0.6463336393081842, + "learning_rate": 1.8287645100644367e-05, + "loss": 0.443, + "step": 12349 + }, + { + "epoch": 0.21340199059994472, + "grad_norm": 1.0171396949390599, + "learning_rate": 1.8287331909173786e-05, + "loss": 0.6703, + "step": 12350 + }, + { + "epoch": 0.2134192701133536, + "grad_norm": 0.5101448935230879, + "learning_rate": 1.828701869174665e-05, + "loss": 0.5608, + "step": 12351 + }, + { + "epoch": 0.2134365496267625, + "grad_norm": 1.1777120502640461, + "learning_rate": 1.828670544836394e-05, + "loss": 0.4406, + "step": 12352 + }, + { + "epoch": 0.2134538291401714, + "grad_norm": 0.4389998895107797, + "learning_rate": 1.828639217902664e-05, + "loss": 0.5072, + "step": 12353 + }, + { + "epoch": 0.21347110865358032, + "grad_norm": 0.7827228372729891, + "learning_rate": 1.8286078883735733e-05, + "loss": 0.3406, + "step": 12354 + }, + { + "epoch": 0.21348838816698923, + "grad_norm": 0.9192636979435074, + "learning_rate": 1.8285765562492193e-05, + "loss": 0.5268, + "step": 12355 + }, + { + "epoch": 0.2135056676803981, + "grad_norm": 1.1713093274766626, + "learning_rate": 1.8285452215297007e-05, + "loss": 0.4673, + "step": 12356 + }, + { + "epoch": 0.21352294719380702, + "grad_norm": 1.2427072647445576, + "learning_rate": 1.8285138842151157e-05, + "loss": 0.4926, + "step": 12357 + }, + { + "epoch": 0.21354022670721592, + "grad_norm": 1.332872218870722, + "learning_rate": 1.828482544305562e-05, + "loss": 0.5868, + "step": 12358 + }, + { + "epoch": 0.21355750622062483, + "grad_norm": 1.0086688929800947, + "learning_rate": 1.8284512018011377e-05, + "loss": 0.3295, + "step": 12359 + }, + { + "epoch": 0.21357478573403374, + "grad_norm": 0.7553293986570957, + "learning_rate": 1.8284198567019418e-05, + "loss": 0.429, + "step": 12360 + }, + { + "epoch": 0.21359206524744262, + "grad_norm": 0.6151331263864878, + "learning_rate": 1.8283885090080716e-05, + "loss": 0.4798, + "step": 12361 + }, + { + "epoch": 0.21360934476085153, + "grad_norm": 1.8837416972047443, + "learning_rate": 1.828357158719626e-05, + "loss": 0.727, + "step": 12362 + }, + { + "epoch": 0.21362662427426043, + "grad_norm": 0.6654034366725736, + "learning_rate": 1.8283258058367024e-05, + "loss": 0.5644, + "step": 12363 + }, + { + "epoch": 0.21364390378766934, + "grad_norm": 1.0295741647687824, + "learning_rate": 1.8282944503593997e-05, + "loss": 0.6834, + "step": 12364 + }, + { + "epoch": 0.21366118330107825, + "grad_norm": 0.9145327210328809, + "learning_rate": 1.828263092287816e-05, + "loss": 0.6997, + "step": 12365 + }, + { + "epoch": 0.21367846281448716, + "grad_norm": 1.117637308694418, + "learning_rate": 1.8282317316220492e-05, + "loss": 0.7403, + "step": 12366 + }, + { + "epoch": 0.21369574232789604, + "grad_norm": 1.0973730663380608, + "learning_rate": 1.8282003683621976e-05, + "loss": 0.4919, + "step": 12367 + }, + { + "epoch": 0.21371302184130495, + "grad_norm": 1.006594732060123, + "learning_rate": 1.8281690025083594e-05, + "loss": 0.6265, + "step": 12368 + }, + { + "epoch": 0.21373030135471385, + "grad_norm": 1.2449053343560104, + "learning_rate": 1.828137634060633e-05, + "loss": 0.5744, + "step": 12369 + }, + { + "epoch": 0.21374758086812276, + "grad_norm": 0.9131489015103822, + "learning_rate": 1.8281062630191167e-05, + "loss": 0.5692, + "step": 12370 + }, + { + "epoch": 0.21376486038153167, + "grad_norm": 0.997255412707175, + "learning_rate": 1.8280748893839087e-05, + "loss": 0.6482, + "step": 12371 + }, + { + "epoch": 0.21378213989494055, + "grad_norm": 0.9257170360642051, + "learning_rate": 1.8280435131551072e-05, + "loss": 0.4215, + "step": 12372 + }, + { + "epoch": 0.21379941940834946, + "grad_norm": 1.5040078777007264, + "learning_rate": 1.8280121343328104e-05, + "loss": 0.7434, + "step": 12373 + }, + { + "epoch": 0.21381669892175836, + "grad_norm": 1.076288150573106, + "learning_rate": 1.8279807529171165e-05, + "loss": 0.5693, + "step": 12374 + }, + { + "epoch": 0.21383397843516727, + "grad_norm": 1.2054410044718051, + "learning_rate": 1.8279493689081243e-05, + "loss": 0.6242, + "step": 12375 + }, + { + "epoch": 0.21385125794857618, + "grad_norm": 1.1292595018570668, + "learning_rate": 1.8279179823059318e-05, + "loss": 0.5217, + "step": 12376 + }, + { + "epoch": 0.21386853746198506, + "grad_norm": 0.7585342775773886, + "learning_rate": 1.8278865931106368e-05, + "loss": 0.3898, + "step": 12377 + }, + { + "epoch": 0.21388581697539397, + "grad_norm": 0.8375894837199614, + "learning_rate": 1.8278552013223384e-05, + "loss": 0.4941, + "step": 12378 + }, + { + "epoch": 0.21390309648880287, + "grad_norm": 1.1037500781604082, + "learning_rate": 1.8278238069411346e-05, + "loss": 0.5539, + "step": 12379 + }, + { + "epoch": 0.21392037600221178, + "grad_norm": 0.8727048120782418, + "learning_rate": 1.8277924099671237e-05, + "loss": 0.4347, + "step": 12380 + }, + { + "epoch": 0.2139376555156207, + "grad_norm": 0.9302407754009365, + "learning_rate": 1.827761010400404e-05, + "loss": 0.6541, + "step": 12381 + }, + { + "epoch": 0.21395493502902957, + "grad_norm": 1.7103162330264468, + "learning_rate": 1.8277296082410737e-05, + "loss": 0.5394, + "step": 12382 + }, + { + "epoch": 0.21397221454243848, + "grad_norm": 0.9931695921747837, + "learning_rate": 1.8276982034892318e-05, + "loss": 0.5969, + "step": 12383 + }, + { + "epoch": 0.21398949405584738, + "grad_norm": 0.8368436579728352, + "learning_rate": 1.827666796144976e-05, + "loss": 0.5165, + "step": 12384 + }, + { + "epoch": 0.2140067735692563, + "grad_norm": 1.1749105297627531, + "learning_rate": 1.827635386208405e-05, + "loss": 0.7786, + "step": 12385 + }, + { + "epoch": 0.2140240530826652, + "grad_norm": 0.7949606876721744, + "learning_rate": 1.827603973679617e-05, + "loss": 0.469, + "step": 12386 + }, + { + "epoch": 0.2140413325960741, + "grad_norm": 1.1896780971385839, + "learning_rate": 1.82757255855871e-05, + "loss": 0.5393, + "step": 12387 + }, + { + "epoch": 0.214058612109483, + "grad_norm": 0.6681475830251102, + "learning_rate": 1.8275411408457835e-05, + "loss": 0.4717, + "step": 12388 + }, + { + "epoch": 0.2140758916228919, + "grad_norm": 1.5072442181906742, + "learning_rate": 1.827509720540935e-05, + "loss": 0.5306, + "step": 12389 + }, + { + "epoch": 0.2140931711363008, + "grad_norm": 1.5304561368272769, + "learning_rate": 1.8274782976442635e-05, + "loss": 0.8149, + "step": 12390 + }, + { + "epoch": 0.2141104506497097, + "grad_norm": 1.1180868398901176, + "learning_rate": 1.8274468721558665e-05, + "loss": 0.5635, + "step": 12391 + }, + { + "epoch": 0.21412773016311862, + "grad_norm": 0.8262578796013749, + "learning_rate": 1.8274154440758432e-05, + "loss": 0.6, + "step": 12392 + }, + { + "epoch": 0.2141450096765275, + "grad_norm": 1.2421055142595758, + "learning_rate": 1.827384013404292e-05, + "loss": 0.5329, + "step": 12393 + }, + { + "epoch": 0.2141622891899364, + "grad_norm": 0.9296366546800036, + "learning_rate": 1.8273525801413114e-05, + "loss": 0.6257, + "step": 12394 + }, + { + "epoch": 0.2141795687033453, + "grad_norm": 1.192440577487927, + "learning_rate": 1.827321144286999e-05, + "loss": 0.5987, + "step": 12395 + }, + { + "epoch": 0.21419684821675422, + "grad_norm": 0.8127697105291976, + "learning_rate": 1.8272897058414544e-05, + "loss": 0.5495, + "step": 12396 + }, + { + "epoch": 0.21421412773016313, + "grad_norm": 0.7043649990189141, + "learning_rate": 1.8272582648047756e-05, + "loss": 0.5343, + "step": 12397 + }, + { + "epoch": 0.214231407243572, + "grad_norm": 1.1501125088501907, + "learning_rate": 1.8272268211770607e-05, + "loss": 0.5067, + "step": 12398 + }, + { + "epoch": 0.21424868675698092, + "grad_norm": 0.4563497588333089, + "learning_rate": 1.827195374958409e-05, + "loss": 0.6052, + "step": 12399 + }, + { + "epoch": 0.21426596627038982, + "grad_norm": 1.8904737459580252, + "learning_rate": 1.8271639261489184e-05, + "loss": 0.7769, + "step": 12400 + }, + { + "epoch": 0.21428324578379873, + "grad_norm": 1.1517991147011832, + "learning_rate": 1.827132474748687e-05, + "loss": 0.5363, + "step": 12401 + }, + { + "epoch": 0.21430052529720764, + "grad_norm": 0.4210688794105964, + "learning_rate": 1.8271010207578142e-05, + "loss": 0.6527, + "step": 12402 + }, + { + "epoch": 0.21431780481061655, + "grad_norm": 1.3174308369493766, + "learning_rate": 1.8270695641763983e-05, + "loss": 0.5984, + "step": 12403 + }, + { + "epoch": 0.21433508432402543, + "grad_norm": 1.4475201119252925, + "learning_rate": 1.8270381050045375e-05, + "loss": 0.7377, + "step": 12404 + }, + { + "epoch": 0.21435236383743433, + "grad_norm": 1.161609077462307, + "learning_rate": 1.8270066432423306e-05, + "loss": 0.6605, + "step": 12405 + }, + { + "epoch": 0.21436964335084324, + "grad_norm": 0.8789585941661562, + "learning_rate": 1.826975178889876e-05, + "loss": 0.5506, + "step": 12406 + }, + { + "epoch": 0.21438692286425215, + "grad_norm": 0.8060528668923805, + "learning_rate": 1.8269437119472723e-05, + "loss": 0.5164, + "step": 12407 + }, + { + "epoch": 0.21440420237766106, + "grad_norm": 0.817517152796597, + "learning_rate": 1.826912242414618e-05, + "loss": 0.3815, + "step": 12408 + }, + { + "epoch": 0.21442148189106994, + "grad_norm": 1.1676347236019702, + "learning_rate": 1.8268807702920116e-05, + "loss": 0.6352, + "step": 12409 + }, + { + "epoch": 0.21443876140447884, + "grad_norm": 0.9197723223021211, + "learning_rate": 1.826849295579552e-05, + "loss": 0.59, + "step": 12410 + }, + { + "epoch": 0.21445604091788775, + "grad_norm": 1.0178806844269188, + "learning_rate": 1.826817818277337e-05, + "loss": 0.6768, + "step": 12411 + }, + { + "epoch": 0.21447332043129666, + "grad_norm": 0.6318835608363359, + "learning_rate": 1.8267863383854665e-05, + "loss": 0.3385, + "step": 12412 + }, + { + "epoch": 0.21449059994470557, + "grad_norm": 1.0447499817542187, + "learning_rate": 1.8267548559040382e-05, + "loss": 0.6148, + "step": 12413 + }, + { + "epoch": 0.21450787945811445, + "grad_norm": 1.1220151308674653, + "learning_rate": 1.826723370833151e-05, + "loss": 0.539, + "step": 12414 + }, + { + "epoch": 0.21452515897152336, + "grad_norm": 0.9073824951488959, + "learning_rate": 1.826691883172903e-05, + "loss": 0.5405, + "step": 12415 + }, + { + "epoch": 0.21454243848493226, + "grad_norm": 1.083389547442195, + "learning_rate": 1.826660392923393e-05, + "loss": 0.8, + "step": 12416 + }, + { + "epoch": 0.21455971799834117, + "grad_norm": 1.0452588422233426, + "learning_rate": 1.82662890008472e-05, + "loss": 0.3432, + "step": 12417 + }, + { + "epoch": 0.21457699751175008, + "grad_norm": 0.9478816784761027, + "learning_rate": 1.8265974046569827e-05, + "loss": 0.7516, + "step": 12418 + }, + { + "epoch": 0.21459427702515896, + "grad_norm": 0.9796913120091983, + "learning_rate": 1.8265659066402794e-05, + "loss": 0.4364, + "step": 12419 + }, + { + "epoch": 0.21461155653856787, + "grad_norm": 1.1823278630567777, + "learning_rate": 1.8265344060347093e-05, + "loss": 0.6601, + "step": 12420 + }, + { + "epoch": 0.21462883605197677, + "grad_norm": 1.5064483305230851, + "learning_rate": 1.82650290284037e-05, + "loss": 0.728, + "step": 12421 + }, + { + "epoch": 0.21464611556538568, + "grad_norm": 1.2998356073549295, + "learning_rate": 1.826471397057361e-05, + "loss": 0.6977, + "step": 12422 + }, + { + "epoch": 0.2146633950787946, + "grad_norm": 0.8354030964409381, + "learning_rate": 1.826439888685781e-05, + "loss": 0.3469, + "step": 12423 + }, + { + "epoch": 0.2146806745922035, + "grad_norm": 0.9228848735798874, + "learning_rate": 1.826408377725728e-05, + "loss": 0.5304, + "step": 12424 + }, + { + "epoch": 0.21469795410561238, + "grad_norm": 0.8039178616192499, + "learning_rate": 1.8263768641773014e-05, + "loss": 0.4634, + "step": 12425 + }, + { + "epoch": 0.21471523361902128, + "grad_norm": 0.3770316229510607, + "learning_rate": 1.8263453480405995e-05, + "loss": 0.6191, + "step": 12426 + }, + { + "epoch": 0.2147325131324302, + "grad_norm": 1.0681435842127853, + "learning_rate": 1.8263138293157214e-05, + "loss": 0.5969, + "step": 12427 + }, + { + "epoch": 0.2147497926458391, + "grad_norm": 0.9998739167590546, + "learning_rate": 1.8262823080027656e-05, + "loss": 0.4269, + "step": 12428 + }, + { + "epoch": 0.214767072159248, + "grad_norm": 1.297726962621721, + "learning_rate": 1.8262507841018308e-05, + "loss": 0.5981, + "step": 12429 + }, + { + "epoch": 0.2147843516726569, + "grad_norm": 1.1170530996064612, + "learning_rate": 1.8262192576130155e-05, + "loss": 0.6272, + "step": 12430 + }, + { + "epoch": 0.2148016311860658, + "grad_norm": 1.3897284348721946, + "learning_rate": 1.826187728536419e-05, + "loss": 0.5819, + "step": 12431 + }, + { + "epoch": 0.2148189106994747, + "grad_norm": 0.9663427078558199, + "learning_rate": 1.8261561968721395e-05, + "loss": 0.6275, + "step": 12432 + }, + { + "epoch": 0.2148361902128836, + "grad_norm": 1.3575305654719527, + "learning_rate": 1.8261246626202764e-05, + "loss": 0.7265, + "step": 12433 + }, + { + "epoch": 0.21485346972629252, + "grad_norm": 1.2348525103972605, + "learning_rate": 1.8260931257809275e-05, + "loss": 0.6999, + "step": 12434 + }, + { + "epoch": 0.2148707492397014, + "grad_norm": 1.4287688560026637, + "learning_rate": 1.8260615863541925e-05, + "loss": 0.7415, + "step": 12435 + }, + { + "epoch": 0.2148880287531103, + "grad_norm": 1.3228646508995003, + "learning_rate": 1.82603004434017e-05, + "loss": 0.5909, + "step": 12436 + }, + { + "epoch": 0.2149053082665192, + "grad_norm": 0.8615918461909496, + "learning_rate": 1.825998499738958e-05, + "loss": 0.5092, + "step": 12437 + }, + { + "epoch": 0.21492258777992812, + "grad_norm": 1.15508168917988, + "learning_rate": 1.8259669525506565e-05, + "loss": 0.6629, + "step": 12438 + }, + { + "epoch": 0.21493986729333703, + "grad_norm": 1.900117273919235, + "learning_rate": 1.825935402775364e-05, + "loss": 0.5606, + "step": 12439 + }, + { + "epoch": 0.21495714680674594, + "grad_norm": 1.2062950836970405, + "learning_rate": 1.8259038504131784e-05, + "loss": 0.5643, + "step": 12440 + }, + { + "epoch": 0.21497442632015482, + "grad_norm": 1.115564623698523, + "learning_rate": 1.8258722954641994e-05, + "loss": 0.5316, + "step": 12441 + }, + { + "epoch": 0.21499170583356372, + "grad_norm": 1.0700316609505203, + "learning_rate": 1.8258407379285254e-05, + "loss": 0.477, + "step": 12442 + }, + { + "epoch": 0.21500898534697263, + "grad_norm": 0.7076025990260352, + "learning_rate": 1.8258091778062557e-05, + "loss": 0.5642, + "step": 12443 + }, + { + "epoch": 0.21502626486038154, + "grad_norm": 0.35722444662474134, + "learning_rate": 1.825777615097489e-05, + "loss": 0.5042, + "step": 12444 + }, + { + "epoch": 0.21504354437379045, + "grad_norm": 0.6182830557730692, + "learning_rate": 1.825746049802324e-05, + "loss": 0.4378, + "step": 12445 + }, + { + "epoch": 0.21506082388719933, + "grad_norm": 0.9691065646591067, + "learning_rate": 1.825714481920859e-05, + "loss": 0.7451, + "step": 12446 + }, + { + "epoch": 0.21507810340060823, + "grad_norm": 0.7396004410760849, + "learning_rate": 1.8256829114531943e-05, + "loss": 0.4546, + "step": 12447 + }, + { + "epoch": 0.21509538291401714, + "grad_norm": 0.42642328847153277, + "learning_rate": 1.8256513383994275e-05, + "loss": 0.5164, + "step": 12448 + }, + { + "epoch": 0.21511266242742605, + "grad_norm": 0.9750855353785601, + "learning_rate": 1.8256197627596582e-05, + "loss": 0.6088, + "step": 12449 + }, + { + "epoch": 0.21512994194083496, + "grad_norm": 0.8947457757343932, + "learning_rate": 1.825588184533985e-05, + "loss": 0.6066, + "step": 12450 + }, + { + "epoch": 0.21514722145424384, + "grad_norm": 0.9715046587228875, + "learning_rate": 1.825556603722507e-05, + "loss": 0.6725, + "step": 12451 + }, + { + "epoch": 0.21516450096765274, + "grad_norm": 1.4493403676092462, + "learning_rate": 1.8255250203253228e-05, + "loss": 0.8346, + "step": 12452 + }, + { + "epoch": 0.21518178048106165, + "grad_norm": 1.5439920259629545, + "learning_rate": 1.8254934343425314e-05, + "loss": 0.5709, + "step": 12453 + }, + { + "epoch": 0.21519905999447056, + "grad_norm": 0.7145247600047757, + "learning_rate": 1.825461845774232e-05, + "loss": 0.5789, + "step": 12454 + }, + { + "epoch": 0.21521633950787947, + "grad_norm": 1.5681065420227083, + "learning_rate": 1.8254302546205233e-05, + "loss": 0.5773, + "step": 12455 + }, + { + "epoch": 0.21523361902128835, + "grad_norm": 1.5854773992055484, + "learning_rate": 1.8253986608815045e-05, + "loss": 0.7473, + "step": 12456 + }, + { + "epoch": 0.21525089853469725, + "grad_norm": 0.9417149471115203, + "learning_rate": 1.825367064557274e-05, + "loss": 0.5689, + "step": 12457 + }, + { + "epoch": 0.21526817804810616, + "grad_norm": 0.6831536197061862, + "learning_rate": 1.8253354656479314e-05, + "loss": 0.4972, + "step": 12458 + }, + { + "epoch": 0.21528545756151507, + "grad_norm": 1.0530996292152217, + "learning_rate": 1.8253038641535755e-05, + "loss": 0.5729, + "step": 12459 + }, + { + "epoch": 0.21530273707492398, + "grad_norm": 0.780933937538489, + "learning_rate": 1.825272260074305e-05, + "loss": 0.5593, + "step": 12460 + }, + { + "epoch": 0.21532001658833289, + "grad_norm": 0.8158522733830447, + "learning_rate": 1.825240653410219e-05, + "loss": 0.5887, + "step": 12461 + }, + { + "epoch": 0.21533729610174177, + "grad_norm": 1.2141369952890138, + "learning_rate": 1.8252090441614166e-05, + "loss": 0.5818, + "step": 12462 + }, + { + "epoch": 0.21535457561515067, + "grad_norm": 1.0959266817975462, + "learning_rate": 1.8251774323279966e-05, + "loss": 0.4009, + "step": 12463 + }, + { + "epoch": 0.21537185512855958, + "grad_norm": 0.9303461714093814, + "learning_rate": 1.8251458179100585e-05, + "loss": 0.3416, + "step": 12464 + }, + { + "epoch": 0.2153891346419685, + "grad_norm": 0.7443059642055044, + "learning_rate": 1.825114200907701e-05, + "loss": 0.5854, + "step": 12465 + }, + { + "epoch": 0.2154064141553774, + "grad_norm": 0.5986890691359097, + "learning_rate": 1.8250825813210227e-05, + "loss": 0.4669, + "step": 12466 + }, + { + "epoch": 0.21542369366878628, + "grad_norm": 1.0923076736129076, + "learning_rate": 1.8250509591501236e-05, + "loss": 0.4729, + "step": 12467 + }, + { + "epoch": 0.21544097318219518, + "grad_norm": 0.8396447819818542, + "learning_rate": 1.8250193343951018e-05, + "loss": 0.483, + "step": 12468 + }, + { + "epoch": 0.2154582526956041, + "grad_norm": 1.1649794056283271, + "learning_rate": 1.8249877070560565e-05, + "loss": 0.5009, + "step": 12469 + }, + { + "epoch": 0.215475532209013, + "grad_norm": 0.8260472997872618, + "learning_rate": 1.8249560771330878e-05, + "loss": 0.6334, + "step": 12470 + }, + { + "epoch": 0.2154928117224219, + "grad_norm": 1.1996185238720019, + "learning_rate": 1.8249244446262935e-05, + "loss": 0.578, + "step": 12471 + }, + { + "epoch": 0.2155100912358308, + "grad_norm": 0.41846879466520287, + "learning_rate": 1.824892809535773e-05, + "loss": 0.6324, + "step": 12472 + }, + { + "epoch": 0.2155273707492397, + "grad_norm": 0.9271322979935539, + "learning_rate": 1.8248611718616258e-05, + "loss": 0.4609, + "step": 12473 + }, + { + "epoch": 0.2155446502626486, + "grad_norm": 1.710035723575337, + "learning_rate": 1.8248295316039505e-05, + "loss": 0.6765, + "step": 12474 + }, + { + "epoch": 0.2155619297760575, + "grad_norm": 1.5176900496016588, + "learning_rate": 1.8247978887628467e-05, + "loss": 0.5916, + "step": 12475 + }, + { + "epoch": 0.21557920928946642, + "grad_norm": 0.5609382055956252, + "learning_rate": 1.824766243338413e-05, + "loss": 0.5107, + "step": 12476 + }, + { + "epoch": 0.21559648880287532, + "grad_norm": 1.2034206119127517, + "learning_rate": 1.8247345953307488e-05, + "loss": 0.5301, + "step": 12477 + }, + { + "epoch": 0.2156137683162842, + "grad_norm": 1.0569611671309072, + "learning_rate": 1.8247029447399532e-05, + "loss": 0.4767, + "step": 12478 + }, + { + "epoch": 0.2156310478296931, + "grad_norm": 1.034048540261938, + "learning_rate": 1.8246712915661253e-05, + "loss": 0.4221, + "step": 12479 + }, + { + "epoch": 0.21564832734310202, + "grad_norm": 0.9817145255862738, + "learning_rate": 1.8246396358093642e-05, + "loss": 0.6252, + "step": 12480 + }, + { + "epoch": 0.21566560685651093, + "grad_norm": 0.9985887667423494, + "learning_rate": 1.8246079774697693e-05, + "loss": 0.5223, + "step": 12481 + }, + { + "epoch": 0.21568288636991984, + "grad_norm": 0.48331933829970963, + "learning_rate": 1.8245763165474394e-05, + "loss": 0.5862, + "step": 12482 + }, + { + "epoch": 0.21570016588332871, + "grad_norm": 1.3738583552957493, + "learning_rate": 1.8245446530424736e-05, + "loss": 0.4655, + "step": 12483 + }, + { + "epoch": 0.21571744539673762, + "grad_norm": 1.0079232239201779, + "learning_rate": 1.8245129869549717e-05, + "loss": 0.5465, + "step": 12484 + }, + { + "epoch": 0.21573472491014653, + "grad_norm": 1.1535981785066578, + "learning_rate": 1.824481318285032e-05, + "loss": 0.6793, + "step": 12485 + }, + { + "epoch": 0.21575200442355544, + "grad_norm": 0.9697888031586006, + "learning_rate": 1.8244496470327546e-05, + "loss": 0.5082, + "step": 12486 + }, + { + "epoch": 0.21576928393696435, + "grad_norm": 0.7362277351209132, + "learning_rate": 1.824417973198238e-05, + "loss": 0.4452, + "step": 12487 + }, + { + "epoch": 0.21578656345037323, + "grad_norm": 0.9231753085289977, + "learning_rate": 1.8243862967815816e-05, + "loss": 0.6357, + "step": 12488 + }, + { + "epoch": 0.21580384296378213, + "grad_norm": 1.7105836024410015, + "learning_rate": 1.824354617782885e-05, + "loss": 0.4123, + "step": 12489 + }, + { + "epoch": 0.21582112247719104, + "grad_norm": 1.4768702874300073, + "learning_rate": 1.8243229362022466e-05, + "loss": 0.5541, + "step": 12490 + }, + { + "epoch": 0.21583840199059995, + "grad_norm": 0.9702388818751495, + "learning_rate": 1.8242912520397667e-05, + "loss": 0.5065, + "step": 12491 + }, + { + "epoch": 0.21585568150400886, + "grad_norm": 0.4822216125216524, + "learning_rate": 1.8242595652955436e-05, + "loss": 0.6319, + "step": 12492 + }, + { + "epoch": 0.21587296101741774, + "grad_norm": 0.8703053647664535, + "learning_rate": 1.824227875969677e-05, + "loss": 0.6557, + "step": 12493 + }, + { + "epoch": 0.21589024053082664, + "grad_norm": 1.5993585089937137, + "learning_rate": 1.824196184062266e-05, + "loss": 0.7145, + "step": 12494 + }, + { + "epoch": 0.21590752004423555, + "grad_norm": 0.866173361467329, + "learning_rate": 1.8241644895734098e-05, + "loss": 0.44, + "step": 12495 + }, + { + "epoch": 0.21592479955764446, + "grad_norm": 1.0752021275800323, + "learning_rate": 1.8241327925032076e-05, + "loss": 0.479, + "step": 12496 + }, + { + "epoch": 0.21594207907105337, + "grad_norm": 1.5959215322708131, + "learning_rate": 1.8241010928517592e-05, + "loss": 0.4424, + "step": 12497 + }, + { + "epoch": 0.21595935858446227, + "grad_norm": 0.5470928473004246, + "learning_rate": 1.8240693906191636e-05, + "loss": 0.5102, + "step": 12498 + }, + { + "epoch": 0.21597663809787115, + "grad_norm": 0.6314535584020735, + "learning_rate": 1.8240376858055197e-05, + "loss": 0.7297, + "step": 12499 + }, + { + "epoch": 0.21599391761128006, + "grad_norm": 0.7614475380303652, + "learning_rate": 1.8240059784109275e-05, + "loss": 0.5535, + "step": 12500 + }, + { + "epoch": 0.21601119712468897, + "grad_norm": 1.0282575059125802, + "learning_rate": 1.8239742684354858e-05, + "loss": 0.4898, + "step": 12501 + }, + { + "epoch": 0.21602847663809788, + "grad_norm": 0.6281295725951463, + "learning_rate": 1.823942555879294e-05, + "loss": 0.5465, + "step": 12502 + }, + { + "epoch": 0.21604575615150678, + "grad_norm": 2.1469586679554014, + "learning_rate": 1.8239108407424516e-05, + "loss": 0.8756, + "step": 12503 + }, + { + "epoch": 0.21606303566491566, + "grad_norm": 1.0052612039702955, + "learning_rate": 1.823879123025058e-05, + "loss": 0.5848, + "step": 12504 + }, + { + "epoch": 0.21608031517832457, + "grad_norm": 1.348027584379469, + "learning_rate": 1.823847402727212e-05, + "loss": 0.6794, + "step": 12505 + }, + { + "epoch": 0.21609759469173348, + "grad_norm": 0.6508669288989655, + "learning_rate": 1.8238156798490137e-05, + "loss": 0.6678, + "step": 12506 + }, + { + "epoch": 0.2161148742051424, + "grad_norm": 0.8298108143728998, + "learning_rate": 1.823783954390562e-05, + "loss": 0.3934, + "step": 12507 + }, + { + "epoch": 0.2161321537185513, + "grad_norm": 1.2701440610073336, + "learning_rate": 1.8237522263519563e-05, + "loss": 0.6198, + "step": 12508 + }, + { + "epoch": 0.21614943323196018, + "grad_norm": 0.9145188536366533, + "learning_rate": 1.8237204957332957e-05, + "loss": 0.5409, + "step": 12509 + }, + { + "epoch": 0.21616671274536908, + "grad_norm": 0.7647986393937208, + "learning_rate": 1.8236887625346803e-05, + "loss": 0.4176, + "step": 12510 + }, + { + "epoch": 0.216183992258778, + "grad_norm": 0.8345377282695763, + "learning_rate": 1.8236570267562088e-05, + "loss": 0.3854, + "step": 12511 + }, + { + "epoch": 0.2162012717721869, + "grad_norm": 1.3412195671400955, + "learning_rate": 1.8236252883979812e-05, + "loss": 0.4816, + "step": 12512 + }, + { + "epoch": 0.2162185512855958, + "grad_norm": 1.1830410080920335, + "learning_rate": 1.8235935474600968e-05, + "loss": 0.5565, + "step": 12513 + }, + { + "epoch": 0.2162358307990047, + "grad_norm": 1.2318039463596928, + "learning_rate": 1.8235618039426542e-05, + "loss": 0.6907, + "step": 12514 + }, + { + "epoch": 0.2162531103124136, + "grad_norm": 1.0227962368466488, + "learning_rate": 1.8235300578457537e-05, + "loss": 0.5557, + "step": 12515 + }, + { + "epoch": 0.2162703898258225, + "grad_norm": 1.3381411763869016, + "learning_rate": 1.8234983091694946e-05, + "loss": 0.5665, + "step": 12516 + }, + { + "epoch": 0.2162876693392314, + "grad_norm": 0.6835130189686245, + "learning_rate": 1.8234665579139765e-05, + "loss": 0.4653, + "step": 12517 + }, + { + "epoch": 0.21630494885264032, + "grad_norm": 1.0225554594112976, + "learning_rate": 1.823434804079298e-05, + "loss": 0.7632, + "step": 12518 + }, + { + "epoch": 0.21632222836604922, + "grad_norm": 1.4937068748050033, + "learning_rate": 1.823403047665559e-05, + "loss": 0.4935, + "step": 12519 + }, + { + "epoch": 0.2163395078794581, + "grad_norm": 1.528483218346633, + "learning_rate": 1.8233712886728595e-05, + "loss": 0.5799, + "step": 12520 + }, + { + "epoch": 0.216356787392867, + "grad_norm": 1.131916430317824, + "learning_rate": 1.8233395271012986e-05, + "loss": 0.4485, + "step": 12521 + }, + { + "epoch": 0.21637406690627592, + "grad_norm": 1.3805241621347075, + "learning_rate": 1.8233077629509754e-05, + "loss": 0.6417, + "step": 12522 + }, + { + "epoch": 0.21639134641968483, + "grad_norm": 1.4794771678153056, + "learning_rate": 1.82327599622199e-05, + "loss": 0.7642, + "step": 12523 + }, + { + "epoch": 0.21640862593309373, + "grad_norm": 0.652172849009569, + "learning_rate": 1.8232442269144414e-05, + "loss": 0.4008, + "step": 12524 + }, + { + "epoch": 0.21642590544650261, + "grad_norm": 0.805327865718135, + "learning_rate": 1.8232124550284295e-05, + "loss": 0.4489, + "step": 12525 + }, + { + "epoch": 0.21644318495991152, + "grad_norm": 1.037353283931845, + "learning_rate": 1.8231806805640534e-05, + "loss": 0.7973, + "step": 12526 + }, + { + "epoch": 0.21646046447332043, + "grad_norm": 1.2286967835382883, + "learning_rate": 1.823148903521413e-05, + "loss": 0.5576, + "step": 12527 + }, + { + "epoch": 0.21647774398672934, + "grad_norm": 0.6660721487368081, + "learning_rate": 1.8231171239006077e-05, + "loss": 0.3852, + "step": 12528 + }, + { + "epoch": 0.21649502350013825, + "grad_norm": 1.1654725158082515, + "learning_rate": 1.8230853417017373e-05, + "loss": 0.5654, + "step": 12529 + }, + { + "epoch": 0.21651230301354712, + "grad_norm": 0.8423360386853974, + "learning_rate": 1.8230535569249004e-05, + "loss": 0.5899, + "step": 12530 + }, + { + "epoch": 0.21652958252695603, + "grad_norm": 0.9425341607276524, + "learning_rate": 1.8230217695701977e-05, + "loss": 0.4913, + "step": 12531 + }, + { + "epoch": 0.21654686204036494, + "grad_norm": 1.0092004666410483, + "learning_rate": 1.822989979637728e-05, + "loss": 0.5172, + "step": 12532 + }, + { + "epoch": 0.21656414155377385, + "grad_norm": 0.8447797102426824, + "learning_rate": 1.822958187127591e-05, + "loss": 0.3713, + "step": 12533 + }, + { + "epoch": 0.21658142106718276, + "grad_norm": 1.1571289660520494, + "learning_rate": 1.822926392039887e-05, + "loss": 0.786, + "step": 12534 + }, + { + "epoch": 0.21659870058059166, + "grad_norm": 0.9254813820462044, + "learning_rate": 1.8228945943747146e-05, + "loss": 0.6255, + "step": 12535 + }, + { + "epoch": 0.21661598009400054, + "grad_norm": 1.1105723884797112, + "learning_rate": 1.822862794132174e-05, + "loss": 0.682, + "step": 12536 + }, + { + "epoch": 0.21663325960740945, + "grad_norm": 0.999391946970058, + "learning_rate": 1.8228309913123646e-05, + "loss": 0.5179, + "step": 12537 + }, + { + "epoch": 0.21665053912081836, + "grad_norm": 0.9367041449746543, + "learning_rate": 1.8227991859153858e-05, + "loss": 0.6743, + "step": 12538 + }, + { + "epoch": 0.21666781863422727, + "grad_norm": 1.1625366927216272, + "learning_rate": 1.8227673779413373e-05, + "loss": 0.5177, + "step": 12539 + }, + { + "epoch": 0.21668509814763617, + "grad_norm": 0.4532137652716884, + "learning_rate": 1.8227355673903192e-05, + "loss": 0.8155, + "step": 12540 + }, + { + "epoch": 0.21670237766104505, + "grad_norm": 1.7239620306538384, + "learning_rate": 1.8227037542624303e-05, + "loss": 0.4811, + "step": 12541 + }, + { + "epoch": 0.21671965717445396, + "grad_norm": 1.485585949291182, + "learning_rate": 1.822671938557771e-05, + "loss": 0.6502, + "step": 12542 + }, + { + "epoch": 0.21673693668786287, + "grad_norm": 1.112242962501111, + "learning_rate": 1.822640120276441e-05, + "loss": 0.6292, + "step": 12543 + }, + { + "epoch": 0.21675421620127178, + "grad_norm": 0.7325870476590121, + "learning_rate": 1.822608299418539e-05, + "loss": 0.4234, + "step": 12544 + }, + { + "epoch": 0.21677149571468068, + "grad_norm": 0.9052461814922986, + "learning_rate": 1.822576475984166e-05, + "loss": 0.7294, + "step": 12545 + }, + { + "epoch": 0.21678877522808956, + "grad_norm": 0.7790650504973543, + "learning_rate": 1.82254464997342e-05, + "loss": 0.6253, + "step": 12546 + }, + { + "epoch": 0.21680605474149847, + "grad_norm": 0.8600077676369807, + "learning_rate": 1.8225128213864024e-05, + "loss": 0.521, + "step": 12547 + }, + { + "epoch": 0.21682333425490738, + "grad_norm": 0.8062013218648324, + "learning_rate": 1.8224809902232123e-05, + "loss": 0.4392, + "step": 12548 + }, + { + "epoch": 0.2168406137683163, + "grad_norm": 1.4244434630983294, + "learning_rate": 1.8224491564839486e-05, + "loss": 0.8319, + "step": 12549 + }, + { + "epoch": 0.2168578932817252, + "grad_norm": 1.0125971120258555, + "learning_rate": 1.822417320168712e-05, + "loss": 0.6675, + "step": 12550 + }, + { + "epoch": 0.2168751727951341, + "grad_norm": 1.311285322030121, + "learning_rate": 1.8223854812776015e-05, + "loss": 0.677, + "step": 12551 + }, + { + "epoch": 0.21689245230854298, + "grad_norm": 1.07432894376356, + "learning_rate": 1.8223536398107177e-05, + "loss": 0.5217, + "step": 12552 + }, + { + "epoch": 0.2169097318219519, + "grad_norm": 0.42036843539088736, + "learning_rate": 1.8223217957681595e-05, + "loss": 0.5663, + "step": 12553 + }, + { + "epoch": 0.2169270113353608, + "grad_norm": 1.3639266691380318, + "learning_rate": 1.822289949150027e-05, + "loss": 0.4685, + "step": 12554 + }, + { + "epoch": 0.2169442908487697, + "grad_norm": 0.8681824824437812, + "learning_rate": 1.82225809995642e-05, + "loss": 0.5456, + "step": 12555 + }, + { + "epoch": 0.2169615703621786, + "grad_norm": 1.3065280791509624, + "learning_rate": 1.822226248187438e-05, + "loss": 0.5241, + "step": 12556 + }, + { + "epoch": 0.2169788498755875, + "grad_norm": 0.9665825402052631, + "learning_rate": 1.822194393843181e-05, + "loss": 0.5526, + "step": 12557 + }, + { + "epoch": 0.2169961293889964, + "grad_norm": 1.019035879772206, + "learning_rate": 1.8221625369237485e-05, + "loss": 0.7748, + "step": 12558 + }, + { + "epoch": 0.2170134089024053, + "grad_norm": 1.0676330880456248, + "learning_rate": 1.8221306774292405e-05, + "loss": 0.6211, + "step": 12559 + }, + { + "epoch": 0.21703068841581422, + "grad_norm": 1.1533728872854074, + "learning_rate": 1.8220988153597568e-05, + "loss": 0.509, + "step": 12560 + }, + { + "epoch": 0.21704796792922312, + "grad_norm": 1.0639497311474764, + "learning_rate": 1.8220669507153972e-05, + "loss": 0.41, + "step": 12561 + }, + { + "epoch": 0.217065247442632, + "grad_norm": 1.3909776781946102, + "learning_rate": 1.822035083496261e-05, + "loss": 0.5364, + "step": 12562 + }, + { + "epoch": 0.2170825269560409, + "grad_norm": 0.9440589125534298, + "learning_rate": 1.8220032137024492e-05, + "loss": 0.3129, + "step": 12563 + }, + { + "epoch": 0.21709980646944982, + "grad_norm": 0.9722622550846088, + "learning_rate": 1.8219713413340603e-05, + "loss": 0.4012, + "step": 12564 + }, + { + "epoch": 0.21711708598285873, + "grad_norm": 1.353061710548333, + "learning_rate": 1.821939466391195e-05, + "loss": 0.6083, + "step": 12565 + }, + { + "epoch": 0.21713436549626763, + "grad_norm": 1.2945130777120915, + "learning_rate": 1.8219075888739526e-05, + "loss": 0.4597, + "step": 12566 + }, + { + "epoch": 0.2171516450096765, + "grad_norm": 1.0650368008852626, + "learning_rate": 1.821875708782433e-05, + "loss": 0.4958, + "step": 12567 + }, + { + "epoch": 0.21716892452308542, + "grad_norm": 1.5521019531521034, + "learning_rate": 1.8218438261167365e-05, + "loss": 0.6795, + "step": 12568 + }, + { + "epoch": 0.21718620403649433, + "grad_norm": 0.6453734506603535, + "learning_rate": 1.8218119408769626e-05, + "loss": 0.4506, + "step": 12569 + }, + { + "epoch": 0.21720348354990324, + "grad_norm": 1.329056175642255, + "learning_rate": 1.8217800530632115e-05, + "loss": 0.7054, + "step": 12570 + }, + { + "epoch": 0.21722076306331214, + "grad_norm": 0.9162358942305805, + "learning_rate": 1.8217481626755827e-05, + "loss": 0.4655, + "step": 12571 + }, + { + "epoch": 0.21723804257672105, + "grad_norm": 1.064241997868609, + "learning_rate": 1.8217162697141758e-05, + "loss": 0.7047, + "step": 12572 + }, + { + "epoch": 0.21725532209012993, + "grad_norm": 1.3113824064804722, + "learning_rate": 1.821684374179092e-05, + "loss": 0.5123, + "step": 12573 + }, + { + "epoch": 0.21727260160353884, + "grad_norm": 0.6270443408833353, + "learning_rate": 1.8216524760704294e-05, + "loss": 0.4641, + "step": 12574 + }, + { + "epoch": 0.21728988111694775, + "grad_norm": 0.8687984059236467, + "learning_rate": 1.8216205753882888e-05, + "loss": 0.5444, + "step": 12575 + }, + { + "epoch": 0.21730716063035665, + "grad_norm": 0.7981727079403643, + "learning_rate": 1.8215886721327706e-05, + "loss": 0.4375, + "step": 12576 + }, + { + "epoch": 0.21732444014376556, + "grad_norm": 1.4155687286469203, + "learning_rate": 1.821556766303974e-05, + "loss": 0.7688, + "step": 12577 + }, + { + "epoch": 0.21734171965717444, + "grad_norm": 1.6393878532026676, + "learning_rate": 1.8215248579019996e-05, + "loss": 0.748, + "step": 12578 + }, + { + "epoch": 0.21735899917058335, + "grad_norm": 0.3999382961576353, + "learning_rate": 1.8214929469269463e-05, + "loss": 0.5135, + "step": 12579 + }, + { + "epoch": 0.21737627868399226, + "grad_norm": 0.762761680801337, + "learning_rate": 1.821461033378915e-05, + "loss": 0.4154, + "step": 12580 + }, + { + "epoch": 0.21739355819740117, + "grad_norm": 1.1191428694388563, + "learning_rate": 1.821429117258005e-05, + "loss": 0.7377, + "step": 12581 + }, + { + "epoch": 0.21741083771081007, + "grad_norm": 0.8242712808313133, + "learning_rate": 1.821397198564317e-05, + "loss": 0.5513, + "step": 12582 + }, + { + "epoch": 0.21742811722421895, + "grad_norm": 1.0307391474702892, + "learning_rate": 1.8213652772979503e-05, + "loss": 0.6111, + "step": 12583 + }, + { + "epoch": 0.21744539673762786, + "grad_norm": 0.5808811737401787, + "learning_rate": 1.8213333534590053e-05, + "loss": 0.6148, + "step": 12584 + }, + { + "epoch": 0.21746267625103677, + "grad_norm": 0.9739572220264129, + "learning_rate": 1.8213014270475816e-05, + "loss": 0.6706, + "step": 12585 + }, + { + "epoch": 0.21747995576444568, + "grad_norm": 1.1315638158042356, + "learning_rate": 1.8212694980637798e-05, + "loss": 0.6153, + "step": 12586 + }, + { + "epoch": 0.21749723527785458, + "grad_norm": 1.0737100910140494, + "learning_rate": 1.8212375665076992e-05, + "loss": 0.6198, + "step": 12587 + }, + { + "epoch": 0.2175145147912635, + "grad_norm": 0.8280573952682851, + "learning_rate": 1.82120563237944e-05, + "loss": 0.6531, + "step": 12588 + }, + { + "epoch": 0.21753179430467237, + "grad_norm": 1.637137682383167, + "learning_rate": 1.8211736956791025e-05, + "loss": 0.5178, + "step": 12589 + }, + { + "epoch": 0.21754907381808128, + "grad_norm": 0.844684328482705, + "learning_rate": 1.8211417564067866e-05, + "loss": 0.5584, + "step": 12590 + }, + { + "epoch": 0.2175663533314902, + "grad_norm": 1.3441420819209964, + "learning_rate": 1.8211098145625923e-05, + "loss": 0.559, + "step": 12591 + }, + { + "epoch": 0.2175836328448991, + "grad_norm": 1.3746497291347428, + "learning_rate": 1.8210778701466197e-05, + "loss": 0.58, + "step": 12592 + }, + { + "epoch": 0.217600912358308, + "grad_norm": 1.3612586277678769, + "learning_rate": 1.8210459231589686e-05, + "loss": 0.5624, + "step": 12593 + }, + { + "epoch": 0.21761819187171688, + "grad_norm": 0.7402466255361219, + "learning_rate": 1.8210139735997398e-05, + "loss": 0.5565, + "step": 12594 + }, + { + "epoch": 0.2176354713851258, + "grad_norm": 1.0457674753132218, + "learning_rate": 1.8209820214690324e-05, + "loss": 0.4645, + "step": 12595 + }, + { + "epoch": 0.2176527508985347, + "grad_norm": 1.3647655101639256, + "learning_rate": 1.8209500667669468e-05, + "loss": 0.6955, + "step": 12596 + }, + { + "epoch": 0.2176700304119436, + "grad_norm": 1.195481487758162, + "learning_rate": 1.8209181094935835e-05, + "loss": 0.7431, + "step": 12597 + }, + { + "epoch": 0.2176873099253525, + "grad_norm": 0.9702252741528942, + "learning_rate": 1.820886149649042e-05, + "loss": 0.6767, + "step": 12598 + }, + { + "epoch": 0.2177045894387614, + "grad_norm": 1.4391951625050707, + "learning_rate": 1.820854187233423e-05, + "loss": 0.5789, + "step": 12599 + }, + { + "epoch": 0.2177218689521703, + "grad_norm": 0.651775063512161, + "learning_rate": 1.820822222246826e-05, + "loss": 0.4147, + "step": 12600 + }, + { + "epoch": 0.2177391484655792, + "grad_norm": 0.758850996304714, + "learning_rate": 1.820790254689352e-05, + "loss": 0.3752, + "step": 12601 + }, + { + "epoch": 0.21775642797898812, + "grad_norm": 0.5427004299568339, + "learning_rate": 1.8207582845611e-05, + "loss": 0.4223, + "step": 12602 + }, + { + "epoch": 0.21777370749239702, + "grad_norm": 1.4430555842127193, + "learning_rate": 1.820726311862171e-05, + "loss": 0.7425, + "step": 12603 + }, + { + "epoch": 0.2177909870058059, + "grad_norm": 1.6857732897297775, + "learning_rate": 1.8206943365926647e-05, + "loss": 0.5614, + "step": 12604 + }, + { + "epoch": 0.2178082665192148, + "grad_norm": 1.1615101614174683, + "learning_rate": 1.8206623587526812e-05, + "loss": 0.5079, + "step": 12605 + }, + { + "epoch": 0.21782554603262372, + "grad_norm": 1.4010326545271192, + "learning_rate": 1.820630378342321e-05, + "loss": 0.6636, + "step": 12606 + }, + { + "epoch": 0.21784282554603263, + "grad_norm": 1.4200849497990002, + "learning_rate": 1.820598395361684e-05, + "loss": 0.4405, + "step": 12607 + }, + { + "epoch": 0.21786010505944153, + "grad_norm": 0.7363630742202725, + "learning_rate": 1.8205664098108705e-05, + "loss": 0.4704, + "step": 12608 + }, + { + "epoch": 0.21787738457285044, + "grad_norm": 1.684308921114416, + "learning_rate": 1.8205344216899805e-05, + "loss": 0.7763, + "step": 12609 + }, + { + "epoch": 0.21789466408625932, + "grad_norm": 1.8774364386446376, + "learning_rate": 1.8205024309991146e-05, + "loss": 0.5568, + "step": 12610 + }, + { + "epoch": 0.21791194359966823, + "grad_norm": 1.6083215154854367, + "learning_rate": 1.8204704377383725e-05, + "loss": 0.5411, + "step": 12611 + }, + { + "epoch": 0.21792922311307714, + "grad_norm": 1.2274632617496293, + "learning_rate": 1.8204384419078547e-05, + "loss": 0.7764, + "step": 12612 + }, + { + "epoch": 0.21794650262648604, + "grad_norm": 0.4287180935673713, + "learning_rate": 1.8204064435076616e-05, + "loss": 0.676, + "step": 12613 + }, + { + "epoch": 0.21796378213989495, + "grad_norm": 1.222030349460491, + "learning_rate": 1.820374442537893e-05, + "loss": 0.5286, + "step": 12614 + }, + { + "epoch": 0.21798106165330383, + "grad_norm": 1.1584249821257255, + "learning_rate": 1.820342438998649e-05, + "loss": 0.7321, + "step": 12615 + }, + { + "epoch": 0.21799834116671274, + "grad_norm": 0.8501091686100684, + "learning_rate": 1.8203104328900304e-05, + "loss": 0.3647, + "step": 12616 + }, + { + "epoch": 0.21801562068012165, + "grad_norm": 1.3606599104752621, + "learning_rate": 1.820278424212137e-05, + "loss": 0.6861, + "step": 12617 + }, + { + "epoch": 0.21803290019353055, + "grad_norm": 0.926732363518417, + "learning_rate": 1.8202464129650697e-05, + "loss": 0.5731, + "step": 12618 + }, + { + "epoch": 0.21805017970693946, + "grad_norm": 0.8968231732817425, + "learning_rate": 1.820214399148928e-05, + "loss": 0.5504, + "step": 12619 + }, + { + "epoch": 0.21806745922034834, + "grad_norm": 0.7482894557049583, + "learning_rate": 1.8201823827638124e-05, + "loss": 0.2994, + "step": 12620 + }, + { + "epoch": 0.21808473873375725, + "grad_norm": 1.0469781773638729, + "learning_rate": 1.8201503638098234e-05, + "loss": 0.5478, + "step": 12621 + }, + { + "epoch": 0.21810201824716616, + "grad_norm": 0.9947442826043886, + "learning_rate": 1.820118342287061e-05, + "loss": 0.6583, + "step": 12622 + }, + { + "epoch": 0.21811929776057506, + "grad_norm": 0.7657553881698002, + "learning_rate": 1.820086318195626e-05, + "loss": 0.5907, + "step": 12623 + }, + { + "epoch": 0.21813657727398397, + "grad_norm": 1.1959579016757311, + "learning_rate": 1.8200542915356178e-05, + "loss": 0.5871, + "step": 12624 + }, + { + "epoch": 0.21815385678739288, + "grad_norm": 1.2759038522094415, + "learning_rate": 1.8200222623071374e-05, + "loss": 0.5915, + "step": 12625 + }, + { + "epoch": 0.21817113630080176, + "grad_norm": 0.9068597061047197, + "learning_rate": 1.819990230510285e-05, + "loss": 0.4703, + "step": 12626 + }, + { + "epoch": 0.21818841581421067, + "grad_norm": 0.7024534633712556, + "learning_rate": 1.819958196145161e-05, + "loss": 0.3859, + "step": 12627 + }, + { + "epoch": 0.21820569532761958, + "grad_norm": 1.5009118356018973, + "learning_rate": 1.8199261592118653e-05, + "loss": 0.6276, + "step": 12628 + }, + { + "epoch": 0.21822297484102848, + "grad_norm": 0.9371660131226025, + "learning_rate": 1.819894119710499e-05, + "loss": 0.6916, + "step": 12629 + }, + { + "epoch": 0.2182402543544374, + "grad_norm": 1.0222810533794586, + "learning_rate": 1.8198620776411616e-05, + "loss": 0.3732, + "step": 12630 + }, + { + "epoch": 0.21825753386784627, + "grad_norm": 1.4254204870969596, + "learning_rate": 1.819830033003954e-05, + "loss": 0.6302, + "step": 12631 + }, + { + "epoch": 0.21827481338125518, + "grad_norm": 1.0098834060713842, + "learning_rate": 1.8197979857989766e-05, + "loss": 0.5653, + "step": 12632 + }, + { + "epoch": 0.21829209289466409, + "grad_norm": 1.0038004839631258, + "learning_rate": 1.8197659360263294e-05, + "loss": 0.5658, + "step": 12633 + }, + { + "epoch": 0.218309372408073, + "grad_norm": 0.9016973903972707, + "learning_rate": 1.8197338836861132e-05, + "loss": 0.3387, + "step": 12634 + }, + { + "epoch": 0.2183266519214819, + "grad_norm": 1.1481421151063167, + "learning_rate": 1.819701828778428e-05, + "loss": 0.745, + "step": 12635 + }, + { + "epoch": 0.21834393143489078, + "grad_norm": 0.9952016679779317, + "learning_rate": 1.8196697713033744e-05, + "loss": 0.4281, + "step": 12636 + }, + { + "epoch": 0.2183612109482997, + "grad_norm": 0.8322542535564649, + "learning_rate": 1.8196377112610524e-05, + "loss": 0.7918, + "step": 12637 + }, + { + "epoch": 0.2183784904617086, + "grad_norm": 0.9047075763480728, + "learning_rate": 1.8196056486515633e-05, + "loss": 0.4979, + "step": 12638 + }, + { + "epoch": 0.2183957699751175, + "grad_norm": 0.9986163266432949, + "learning_rate": 1.8195735834750073e-05, + "loss": 0.5433, + "step": 12639 + }, + { + "epoch": 0.2184130494885264, + "grad_norm": 1.6666137034977155, + "learning_rate": 1.8195415157314838e-05, + "loss": 0.5422, + "step": 12640 + }, + { + "epoch": 0.2184303290019353, + "grad_norm": 1.1618823343037983, + "learning_rate": 1.8195094454210943e-05, + "loss": 0.607, + "step": 12641 + }, + { + "epoch": 0.2184476085153442, + "grad_norm": 1.2025430444888145, + "learning_rate": 1.8194773725439393e-05, + "loss": 0.6749, + "step": 12642 + }, + { + "epoch": 0.2184648880287531, + "grad_norm": 1.0620224460488816, + "learning_rate": 1.8194452971001185e-05, + "loss": 0.6497, + "step": 12643 + }, + { + "epoch": 0.21848216754216201, + "grad_norm": 0.41761511919944105, + "learning_rate": 1.8194132190897326e-05, + "loss": 0.6221, + "step": 12644 + }, + { + "epoch": 0.21849944705557092, + "grad_norm": 0.3698527552978401, + "learning_rate": 1.8193811385128827e-05, + "loss": 0.4994, + "step": 12645 + }, + { + "epoch": 0.21851672656897983, + "grad_norm": 1.3855381538407943, + "learning_rate": 1.8193490553696687e-05, + "loss": 0.5267, + "step": 12646 + }, + { + "epoch": 0.2185340060823887, + "grad_norm": 0.7559040848901455, + "learning_rate": 1.8193169696601907e-05, + "loss": 0.5881, + "step": 12647 + }, + { + "epoch": 0.21855128559579762, + "grad_norm": 0.8423564478545854, + "learning_rate": 1.8192848813845503e-05, + "loss": 0.3997, + "step": 12648 + }, + { + "epoch": 0.21856856510920653, + "grad_norm": 1.1218121872330824, + "learning_rate": 1.819252790542847e-05, + "loss": 0.5067, + "step": 12649 + }, + { + "epoch": 0.21858584462261543, + "grad_norm": 0.8427734596004699, + "learning_rate": 1.819220697135182e-05, + "loss": 0.4345, + "step": 12650 + }, + { + "epoch": 0.21860312413602434, + "grad_norm": 1.5053676500986315, + "learning_rate": 1.819188601161655e-05, + "loss": 0.5786, + "step": 12651 + }, + { + "epoch": 0.21862040364943322, + "grad_norm": 1.5182336896473145, + "learning_rate": 1.8191565026223677e-05, + "loss": 0.5171, + "step": 12652 + }, + { + "epoch": 0.21863768316284213, + "grad_norm": 0.7822816537496026, + "learning_rate": 1.8191244015174195e-05, + "loss": 0.6998, + "step": 12653 + }, + { + "epoch": 0.21865496267625104, + "grad_norm": 0.8112354529672408, + "learning_rate": 1.8190922978469116e-05, + "loss": 0.5002, + "step": 12654 + }, + { + "epoch": 0.21867224218965994, + "grad_norm": 1.0130247585305296, + "learning_rate": 1.8190601916109444e-05, + "loss": 0.5592, + "step": 12655 + }, + { + "epoch": 0.21868952170306885, + "grad_norm": 1.1499561012518245, + "learning_rate": 1.8190280828096184e-05, + "loss": 0.5131, + "step": 12656 + }, + { + "epoch": 0.21870680121647773, + "grad_norm": 1.3674382624769783, + "learning_rate": 1.8189959714430344e-05, + "loss": 0.7817, + "step": 12657 + }, + { + "epoch": 0.21872408072988664, + "grad_norm": 0.9132974449228874, + "learning_rate": 1.8189638575112925e-05, + "loss": 0.5612, + "step": 12658 + }, + { + "epoch": 0.21874136024329555, + "grad_norm": 1.1498048193069157, + "learning_rate": 1.8189317410144936e-05, + "loss": 0.735, + "step": 12659 + }, + { + "epoch": 0.21875863975670445, + "grad_norm": 0.47902844346323764, + "learning_rate": 1.8188996219527383e-05, + "loss": 0.5348, + "step": 12660 + }, + { + "epoch": 0.21877591927011336, + "grad_norm": 1.4591622386179364, + "learning_rate": 1.8188675003261272e-05, + "loss": 0.7455, + "step": 12661 + }, + { + "epoch": 0.21879319878352227, + "grad_norm": 1.5992840053746649, + "learning_rate": 1.8188353761347608e-05, + "loss": 0.4925, + "step": 12662 + }, + { + "epoch": 0.21881047829693115, + "grad_norm": 0.5457323668005697, + "learning_rate": 1.8188032493787398e-05, + "loss": 0.5702, + "step": 12663 + }, + { + "epoch": 0.21882775781034006, + "grad_norm": 0.6498437770086324, + "learning_rate": 1.818771120058165e-05, + "loss": 0.5904, + "step": 12664 + }, + { + "epoch": 0.21884503732374896, + "grad_norm": 1.3816006088814958, + "learning_rate": 1.8187389881731362e-05, + "loss": 0.5323, + "step": 12665 + }, + { + "epoch": 0.21886231683715787, + "grad_norm": 1.3469643079882812, + "learning_rate": 1.818706853723755e-05, + "loss": 0.8492, + "step": 12666 + }, + { + "epoch": 0.21887959635056678, + "grad_norm": 1.6215736141759027, + "learning_rate": 1.8186747167101218e-05, + "loss": 0.5185, + "step": 12667 + }, + { + "epoch": 0.21889687586397566, + "grad_norm": 1.34620220620333, + "learning_rate": 1.818642577132337e-05, + "loss": 0.752, + "step": 12668 + }, + { + "epoch": 0.21891415537738457, + "grad_norm": 1.527660345425114, + "learning_rate": 1.8186104349905017e-05, + "loss": 0.4288, + "step": 12669 + }, + { + "epoch": 0.21893143489079347, + "grad_norm": 0.6432955399973251, + "learning_rate": 1.8185782902847166e-05, + "loss": 0.6193, + "step": 12670 + }, + { + "epoch": 0.21894871440420238, + "grad_norm": 0.7868409674737045, + "learning_rate": 1.8185461430150812e-05, + "loss": 0.4762, + "step": 12671 + }, + { + "epoch": 0.2189659939176113, + "grad_norm": 1.5018318284251397, + "learning_rate": 1.8185139931816978e-05, + "loss": 0.8031, + "step": 12672 + }, + { + "epoch": 0.21898327343102017, + "grad_norm": 0.7078125645097242, + "learning_rate": 1.818481840784666e-05, + "loss": 0.2869, + "step": 12673 + }, + { + "epoch": 0.21900055294442908, + "grad_norm": 1.1141652334383336, + "learning_rate": 1.818449685824087e-05, + "loss": 0.7451, + "step": 12674 + }, + { + "epoch": 0.21901783245783799, + "grad_norm": 0.8892486700457145, + "learning_rate": 1.8184175283000612e-05, + "loss": 0.6819, + "step": 12675 + }, + { + "epoch": 0.2190351119712469, + "grad_norm": 0.9672011188741378, + "learning_rate": 1.81838536821269e-05, + "loss": 0.5963, + "step": 12676 + }, + { + "epoch": 0.2190523914846558, + "grad_norm": 1.1670415258584261, + "learning_rate": 1.818353205562073e-05, + "loss": 0.5343, + "step": 12677 + }, + { + "epoch": 0.2190696709980647, + "grad_norm": 0.8989649766945415, + "learning_rate": 1.818321040348312e-05, + "loss": 0.7594, + "step": 12678 + }, + { + "epoch": 0.2190869505114736, + "grad_norm": 1.5121609360388177, + "learning_rate": 1.818288872571507e-05, + "loss": 0.6369, + "step": 12679 + }, + { + "epoch": 0.2191042300248825, + "grad_norm": 1.0251829387148992, + "learning_rate": 1.8182567022317594e-05, + "loss": 0.5762, + "step": 12680 + }, + { + "epoch": 0.2191215095382914, + "grad_norm": 0.8779370778378917, + "learning_rate": 1.8182245293291694e-05, + "loss": 0.3232, + "step": 12681 + }, + { + "epoch": 0.2191387890517003, + "grad_norm": 0.8254872727824305, + "learning_rate": 1.818192353863838e-05, + "loss": 0.5609, + "step": 12682 + }, + { + "epoch": 0.21915606856510922, + "grad_norm": 0.8536586907631164, + "learning_rate": 1.8181601758358662e-05, + "loss": 0.8117, + "step": 12683 + }, + { + "epoch": 0.2191733480785181, + "grad_norm": 1.624533976937255, + "learning_rate": 1.8181279952453544e-05, + "loss": 0.7463, + "step": 12684 + }, + { + "epoch": 0.219190627591927, + "grad_norm": 0.4852329015713962, + "learning_rate": 1.8180958120924036e-05, + "loss": 0.6704, + "step": 12685 + }, + { + "epoch": 0.21920790710533591, + "grad_norm": 0.6704913723350107, + "learning_rate": 1.8180636263771144e-05, + "loss": 0.367, + "step": 12686 + }, + { + "epoch": 0.21922518661874482, + "grad_norm": 0.7526057206424623, + "learning_rate": 1.8180314380995878e-05, + "loss": 0.6397, + "step": 12687 + }, + { + "epoch": 0.21924246613215373, + "grad_norm": 1.08559852020478, + "learning_rate": 1.8179992472599245e-05, + "loss": 0.4038, + "step": 12688 + }, + { + "epoch": 0.2192597456455626, + "grad_norm": 1.365447718686803, + "learning_rate": 1.8179670538582253e-05, + "loss": 0.7147, + "step": 12689 + }, + { + "epoch": 0.21927702515897152, + "grad_norm": 0.6386093395626085, + "learning_rate": 1.8179348578945917e-05, + "loss": 0.498, + "step": 12690 + }, + { + "epoch": 0.21929430467238042, + "grad_norm": 0.9533764707792931, + "learning_rate": 1.8179026593691235e-05, + "loss": 0.5685, + "step": 12691 + }, + { + "epoch": 0.21931158418578933, + "grad_norm": 0.7554224778784855, + "learning_rate": 1.817870458281922e-05, + "loss": 0.4614, + "step": 12692 + }, + { + "epoch": 0.21932886369919824, + "grad_norm": 1.0653700031225222, + "learning_rate": 1.817838254633088e-05, + "loss": 0.5256, + "step": 12693 + }, + { + "epoch": 0.21934614321260712, + "grad_norm": 1.1454180818967197, + "learning_rate": 1.817806048422723e-05, + "loss": 0.5522, + "step": 12694 + }, + { + "epoch": 0.21936342272601603, + "grad_norm": 0.9067002196117082, + "learning_rate": 1.8177738396509266e-05, + "loss": 0.5074, + "step": 12695 + }, + { + "epoch": 0.21938070223942494, + "grad_norm": 1.232965852158414, + "learning_rate": 1.8177416283178005e-05, + "loss": 0.4393, + "step": 12696 + }, + { + "epoch": 0.21939798175283384, + "grad_norm": 1.3954462948632067, + "learning_rate": 1.8177094144234457e-05, + "loss": 0.5967, + "step": 12697 + }, + { + "epoch": 0.21941526126624275, + "grad_norm": 0.9534195071325472, + "learning_rate": 1.8176771979679627e-05, + "loss": 0.5081, + "step": 12698 + }, + { + "epoch": 0.21943254077965166, + "grad_norm": 0.9274396588024798, + "learning_rate": 1.817644978951453e-05, + "loss": 0.6778, + "step": 12699 + }, + { + "epoch": 0.21944982029306054, + "grad_norm": 1.0761533488901862, + "learning_rate": 1.8176127573740166e-05, + "loss": 0.4487, + "step": 12700 + }, + { + "epoch": 0.21946709980646945, + "grad_norm": 1.1298458602691799, + "learning_rate": 1.817580533235755e-05, + "loss": 0.4793, + "step": 12701 + }, + { + "epoch": 0.21948437931987835, + "grad_norm": 0.8233646535109972, + "learning_rate": 1.8175483065367693e-05, + "loss": 0.4645, + "step": 12702 + }, + { + "epoch": 0.21950165883328726, + "grad_norm": 1.2576611795424089, + "learning_rate": 1.81751607727716e-05, + "loss": 0.6335, + "step": 12703 + }, + { + "epoch": 0.21951893834669617, + "grad_norm": 0.9544877455479661, + "learning_rate": 1.8174838454570282e-05, + "loss": 0.3154, + "step": 12704 + }, + { + "epoch": 0.21953621786010505, + "grad_norm": 1.965307982560754, + "learning_rate": 1.817451611076475e-05, + "loss": 0.8592, + "step": 12705 + }, + { + "epoch": 0.21955349737351396, + "grad_norm": 1.2860777983209437, + "learning_rate": 1.817419374135601e-05, + "loss": 0.5544, + "step": 12706 + }, + { + "epoch": 0.21957077688692286, + "grad_norm": 0.9907597562788164, + "learning_rate": 1.8173871346345077e-05, + "loss": 0.6255, + "step": 12707 + }, + { + "epoch": 0.21958805640033177, + "grad_norm": 0.5359963321673603, + "learning_rate": 1.8173548925732953e-05, + "loss": 0.6284, + "step": 12708 + }, + { + "epoch": 0.21960533591374068, + "grad_norm": 1.4398772175696932, + "learning_rate": 1.8173226479520658e-05, + "loss": 0.4814, + "step": 12709 + }, + { + "epoch": 0.21962261542714956, + "grad_norm": 0.7534986559464241, + "learning_rate": 1.817290400770919e-05, + "loss": 0.5558, + "step": 12710 + }, + { + "epoch": 0.21963989494055847, + "grad_norm": 0.7994641067622458, + "learning_rate": 1.817258151029957e-05, + "loss": 0.5592, + "step": 12711 + }, + { + "epoch": 0.21965717445396737, + "grad_norm": 0.9678185924920576, + "learning_rate": 1.8172258987292805e-05, + "loss": 0.6011, + "step": 12712 + }, + { + "epoch": 0.21967445396737628, + "grad_norm": 1.0785278563899383, + "learning_rate": 1.8171936438689903e-05, + "loss": 0.522, + "step": 12713 + }, + { + "epoch": 0.2196917334807852, + "grad_norm": 0.9928133647589935, + "learning_rate": 1.817161386449187e-05, + "loss": 0.5951, + "step": 12714 + }, + { + "epoch": 0.2197090129941941, + "grad_norm": 0.9260859614408514, + "learning_rate": 1.817129126469973e-05, + "loss": 0.3661, + "step": 12715 + }, + { + "epoch": 0.21972629250760298, + "grad_norm": 1.116180243303828, + "learning_rate": 1.8170968639314477e-05, + "loss": 0.5242, + "step": 12716 + }, + { + "epoch": 0.21974357202101188, + "grad_norm": 0.9606827970937153, + "learning_rate": 1.8170645988337133e-05, + "loss": 0.475, + "step": 12717 + }, + { + "epoch": 0.2197608515344208, + "grad_norm": 0.7758566562164091, + "learning_rate": 1.8170323311768705e-05, + "loss": 0.5566, + "step": 12718 + }, + { + "epoch": 0.2197781310478297, + "grad_norm": 1.0970487767343293, + "learning_rate": 1.8170000609610202e-05, + "loss": 0.6328, + "step": 12719 + }, + { + "epoch": 0.2197954105612386, + "grad_norm": 1.1687370388918303, + "learning_rate": 1.8169677881862637e-05, + "loss": 0.5899, + "step": 12720 + }, + { + "epoch": 0.2198126900746475, + "grad_norm": 0.9529398113906439, + "learning_rate": 1.816935512852702e-05, + "loss": 0.4625, + "step": 12721 + }, + { + "epoch": 0.2198299695880564, + "grad_norm": 0.8557050888964793, + "learning_rate": 1.816903234960436e-05, + "loss": 0.4833, + "step": 12722 + }, + { + "epoch": 0.2198472491014653, + "grad_norm": 1.4108537206817808, + "learning_rate": 1.8168709545095672e-05, + "loss": 0.5407, + "step": 12723 + }, + { + "epoch": 0.2198645286148742, + "grad_norm": 0.9474414331213933, + "learning_rate": 1.8168386715001967e-05, + "loss": 0.4808, + "step": 12724 + }, + { + "epoch": 0.21988180812828312, + "grad_norm": 1.0280268433758146, + "learning_rate": 1.816806385932425e-05, + "loss": 0.8318, + "step": 12725 + }, + { + "epoch": 0.219899087641692, + "grad_norm": 1.3069261870277904, + "learning_rate": 1.816774097806354e-05, + "loss": 0.5296, + "step": 12726 + }, + { + "epoch": 0.2199163671551009, + "grad_norm": 0.927543338833829, + "learning_rate": 1.816741807122084e-05, + "loss": 0.5402, + "step": 12727 + }, + { + "epoch": 0.2199336466685098, + "grad_norm": 0.7495874234632877, + "learning_rate": 1.8167095138797167e-05, + "loss": 0.3921, + "step": 12728 + }, + { + "epoch": 0.21995092618191872, + "grad_norm": 1.065556049401326, + "learning_rate": 1.816677218079353e-05, + "loss": 0.5647, + "step": 12729 + }, + { + "epoch": 0.21996820569532763, + "grad_norm": 1.3176287584725916, + "learning_rate": 1.8166449197210948e-05, + "loss": 0.4916, + "step": 12730 + }, + { + "epoch": 0.2199854852087365, + "grad_norm": 1.1344227009807653, + "learning_rate": 1.816612618805042e-05, + "loss": 0.5388, + "step": 12731 + }, + { + "epoch": 0.22000276472214542, + "grad_norm": 0.7818256247299555, + "learning_rate": 1.816580315331297e-05, + "loss": 0.5729, + "step": 12732 + }, + { + "epoch": 0.22002004423555432, + "grad_norm": 1.6683923185720477, + "learning_rate": 1.8165480092999602e-05, + "loss": 0.857, + "step": 12733 + }, + { + "epoch": 0.22003732374896323, + "grad_norm": 1.0974387094377762, + "learning_rate": 1.816515700711133e-05, + "loss": 0.476, + "step": 12734 + }, + { + "epoch": 0.22005460326237214, + "grad_norm": 1.305732029942653, + "learning_rate": 1.8164833895649162e-05, + "loss": 0.7851, + "step": 12735 + }, + { + "epoch": 0.22007188277578105, + "grad_norm": 1.022188358365669, + "learning_rate": 1.8164510758614115e-05, + "loss": 0.653, + "step": 12736 + }, + { + "epoch": 0.22008916228918993, + "grad_norm": 0.4000508049507549, + "learning_rate": 1.81641875960072e-05, + "loss": 0.576, + "step": 12737 + }, + { + "epoch": 0.22010644180259883, + "grad_norm": 1.0836768599618347, + "learning_rate": 1.8163864407829432e-05, + "loss": 0.5199, + "step": 12738 + }, + { + "epoch": 0.22012372131600774, + "grad_norm": 1.0911011836978168, + "learning_rate": 1.8163541194081817e-05, + "loss": 0.7104, + "step": 12739 + }, + { + "epoch": 0.22014100082941665, + "grad_norm": 0.49555543116249945, + "learning_rate": 1.8163217954765373e-05, + "loss": 0.7736, + "step": 12740 + }, + { + "epoch": 0.22015828034282556, + "grad_norm": 0.40345340345271075, + "learning_rate": 1.816289468988111e-05, + "loss": 0.7107, + "step": 12741 + }, + { + "epoch": 0.22017555985623444, + "grad_norm": 1.0677436313173811, + "learning_rate": 1.8162571399430037e-05, + "loss": 0.5447, + "step": 12742 + }, + { + "epoch": 0.22019283936964335, + "grad_norm": 0.5269408059293619, + "learning_rate": 1.816224808341317e-05, + "loss": 0.8155, + "step": 12743 + }, + { + "epoch": 0.22021011888305225, + "grad_norm": 1.290657996837656, + "learning_rate": 1.816192474183153e-05, + "loss": 0.8342, + "step": 12744 + }, + { + "epoch": 0.22022739839646116, + "grad_norm": 1.435294510053657, + "learning_rate": 1.8161601374686114e-05, + "loss": 0.6347, + "step": 12745 + }, + { + "epoch": 0.22024467790987007, + "grad_norm": 1.17336912903045, + "learning_rate": 1.8161277981977942e-05, + "loss": 0.6013, + "step": 12746 + }, + { + "epoch": 0.22026195742327895, + "grad_norm": 1.2096968419554204, + "learning_rate": 1.8160954563708028e-05, + "loss": 0.5646, + "step": 12747 + }, + { + "epoch": 0.22027923693668786, + "grad_norm": 1.1141380381990316, + "learning_rate": 1.8160631119877387e-05, + "loss": 0.4858, + "step": 12748 + }, + { + "epoch": 0.22029651645009676, + "grad_norm": 0.9547683855526604, + "learning_rate": 1.8160307650487028e-05, + "loss": 0.5879, + "step": 12749 + }, + { + "epoch": 0.22031379596350567, + "grad_norm": 1.0883265354387717, + "learning_rate": 1.8159984155537965e-05, + "loss": 0.5302, + "step": 12750 + }, + { + "epoch": 0.22033107547691458, + "grad_norm": 0.3888815583766802, + "learning_rate": 1.8159660635031208e-05, + "loss": 0.593, + "step": 12751 + }, + { + "epoch": 0.2203483549903235, + "grad_norm": 1.322604163495417, + "learning_rate": 1.8159337088967776e-05, + "loss": 0.6149, + "step": 12752 + }, + { + "epoch": 0.22036563450373237, + "grad_norm": 1.413351884228647, + "learning_rate": 1.8159013517348682e-05, + "loss": 0.6299, + "step": 12753 + }, + { + "epoch": 0.22038291401714127, + "grad_norm": 1.4609030141039214, + "learning_rate": 1.8158689920174936e-05, + "loss": 0.5093, + "step": 12754 + }, + { + "epoch": 0.22040019353055018, + "grad_norm": 0.8775054155121788, + "learning_rate": 1.8158366297447555e-05, + "loss": 0.5238, + "step": 12755 + }, + { + "epoch": 0.2204174730439591, + "grad_norm": 1.429860250416508, + "learning_rate": 1.8158042649167548e-05, + "loss": 0.4633, + "step": 12756 + }, + { + "epoch": 0.220434752557368, + "grad_norm": 1.0358120335887633, + "learning_rate": 1.815771897533593e-05, + "loss": 0.5947, + "step": 12757 + }, + { + "epoch": 0.22045203207077688, + "grad_norm": 0.7772479342866321, + "learning_rate": 1.8157395275953722e-05, + "loss": 0.5268, + "step": 12758 + }, + { + "epoch": 0.22046931158418578, + "grad_norm": 1.3351290406672083, + "learning_rate": 1.815707155102193e-05, + "loss": 0.5817, + "step": 12759 + }, + { + "epoch": 0.2204865910975947, + "grad_norm": 0.9271936633304041, + "learning_rate": 1.815674780054157e-05, + "loss": 0.6898, + "step": 12760 + }, + { + "epoch": 0.2205038706110036, + "grad_norm": 0.5211041158950072, + "learning_rate": 1.8156424024513652e-05, + "loss": 0.8756, + "step": 12761 + }, + { + "epoch": 0.2205211501244125, + "grad_norm": 1.3773286569169652, + "learning_rate": 1.8156100222939198e-05, + "loss": 0.5418, + "step": 12762 + }, + { + "epoch": 0.2205384296378214, + "grad_norm": 1.0906361940565663, + "learning_rate": 1.8155776395819215e-05, + "loss": 0.6554, + "step": 12763 + }, + { + "epoch": 0.2205557091512303, + "grad_norm": 1.035558681550035, + "learning_rate": 1.8155452543154725e-05, + "loss": 0.5516, + "step": 12764 + }, + { + "epoch": 0.2205729886646392, + "grad_norm": 1.0106576013446757, + "learning_rate": 1.8155128664946734e-05, + "loss": 0.6514, + "step": 12765 + }, + { + "epoch": 0.2205902681780481, + "grad_norm": 1.0929016074574887, + "learning_rate": 1.8154804761196263e-05, + "loss": 0.6368, + "step": 12766 + }, + { + "epoch": 0.22060754769145702, + "grad_norm": 0.74589468902421, + "learning_rate": 1.8154480831904325e-05, + "loss": 0.6771, + "step": 12767 + }, + { + "epoch": 0.2206248272048659, + "grad_norm": 0.6111628198552775, + "learning_rate": 1.815415687707193e-05, + "loss": 0.4725, + "step": 12768 + }, + { + "epoch": 0.2206421067182748, + "grad_norm": 0.903721153288426, + "learning_rate": 1.8153832896700096e-05, + "loss": 0.6092, + "step": 12769 + }, + { + "epoch": 0.2206593862316837, + "grad_norm": 0.9866233016536584, + "learning_rate": 1.815350889078984e-05, + "loss": 0.5052, + "step": 12770 + }, + { + "epoch": 0.22067666574509262, + "grad_norm": 1.1746477470348267, + "learning_rate": 1.815318485934217e-05, + "loss": 0.6615, + "step": 12771 + }, + { + "epoch": 0.22069394525850153, + "grad_norm": 1.38313433282816, + "learning_rate": 1.815286080235811e-05, + "loss": 0.5438, + "step": 12772 + }, + { + "epoch": 0.22071122477191044, + "grad_norm": 1.0988672002401203, + "learning_rate": 1.8152536719838666e-05, + "loss": 0.5996, + "step": 12773 + }, + { + "epoch": 0.22072850428531932, + "grad_norm": 1.1363423489478588, + "learning_rate": 1.8152212611784862e-05, + "loss": 0.587, + "step": 12774 + }, + { + "epoch": 0.22074578379872822, + "grad_norm": 1.5428977379468045, + "learning_rate": 1.815188847819771e-05, + "loss": 0.4754, + "step": 12775 + }, + { + "epoch": 0.22076306331213713, + "grad_norm": 1.1031069873281854, + "learning_rate": 1.8151564319078218e-05, + "loss": 0.6566, + "step": 12776 + }, + { + "epoch": 0.22078034282554604, + "grad_norm": 1.1289369087510943, + "learning_rate": 1.815124013442741e-05, + "loss": 0.6557, + "step": 12777 + }, + { + "epoch": 0.22079762233895495, + "grad_norm": 0.8785707903198372, + "learning_rate": 1.8150915924246296e-05, + "loss": 0.6359, + "step": 12778 + }, + { + "epoch": 0.22081490185236383, + "grad_norm": 0.7693089414545716, + "learning_rate": 1.8150591688535896e-05, + "loss": 0.5368, + "step": 12779 + }, + { + "epoch": 0.22083218136577273, + "grad_norm": 1.008358200237401, + "learning_rate": 1.8150267427297227e-05, + "loss": 0.4318, + "step": 12780 + }, + { + "epoch": 0.22084946087918164, + "grad_norm": 0.8577155640656396, + "learning_rate": 1.8149943140531295e-05, + "loss": 0.4352, + "step": 12781 + }, + { + "epoch": 0.22086674039259055, + "grad_norm": 0.9347348185928075, + "learning_rate": 1.8149618828239125e-05, + "loss": 0.6609, + "step": 12782 + }, + { + "epoch": 0.22088401990599946, + "grad_norm": 0.9181575430069454, + "learning_rate": 1.814929449042173e-05, + "loss": 0.4865, + "step": 12783 + }, + { + "epoch": 0.22090129941940834, + "grad_norm": 1.3668365085043535, + "learning_rate": 1.8148970127080122e-05, + "loss": 0.6164, + "step": 12784 + }, + { + "epoch": 0.22091857893281724, + "grad_norm": 0.6802774641270092, + "learning_rate": 1.8148645738215323e-05, + "loss": 0.5713, + "step": 12785 + }, + { + "epoch": 0.22093585844622615, + "grad_norm": 1.297172626224323, + "learning_rate": 1.8148321323828347e-05, + "loss": 0.5157, + "step": 12786 + }, + { + "epoch": 0.22095313795963506, + "grad_norm": 1.2015261892242766, + "learning_rate": 1.8147996883920208e-05, + "loss": 0.3957, + "step": 12787 + }, + { + "epoch": 0.22097041747304397, + "grad_norm": 0.46844843703481553, + "learning_rate": 1.814767241849192e-05, + "loss": 0.7116, + "step": 12788 + }, + { + "epoch": 0.22098769698645288, + "grad_norm": 1.0198690713207244, + "learning_rate": 1.8147347927544506e-05, + "loss": 0.4715, + "step": 12789 + }, + { + "epoch": 0.22100497649986175, + "grad_norm": 1.2227660783412684, + "learning_rate": 1.814702341107898e-05, + "loss": 0.3791, + "step": 12790 + }, + { + "epoch": 0.22102225601327066, + "grad_norm": 1.0503851549122503, + "learning_rate": 1.8146698869096358e-05, + "loss": 0.4664, + "step": 12791 + }, + { + "epoch": 0.22103953552667957, + "grad_norm": 0.9601459956633508, + "learning_rate": 1.8146374301597657e-05, + "loss": 0.6979, + "step": 12792 + }, + { + "epoch": 0.22105681504008848, + "grad_norm": 2.015982781855685, + "learning_rate": 1.8146049708583887e-05, + "loss": 0.5606, + "step": 12793 + }, + { + "epoch": 0.22107409455349739, + "grad_norm": 0.5143958900977698, + "learning_rate": 1.8145725090056075e-05, + "loss": 0.4974, + "step": 12794 + }, + { + "epoch": 0.22109137406690627, + "grad_norm": 1.0807082258599008, + "learning_rate": 1.814540044601523e-05, + "loss": 0.6403, + "step": 12795 + }, + { + "epoch": 0.22110865358031517, + "grad_norm": 1.3630656844188886, + "learning_rate": 1.8145075776462375e-05, + "loss": 0.5669, + "step": 12796 + }, + { + "epoch": 0.22112593309372408, + "grad_norm": 1.765317193804435, + "learning_rate": 1.8144751081398523e-05, + "loss": 0.6466, + "step": 12797 + }, + { + "epoch": 0.221143212607133, + "grad_norm": 0.40170422111882037, + "learning_rate": 1.814442636082469e-05, + "loss": 0.8038, + "step": 12798 + }, + { + "epoch": 0.2211604921205419, + "grad_norm": 1.4456799965023814, + "learning_rate": 1.8144101614741894e-05, + "loss": 0.3712, + "step": 12799 + }, + { + "epoch": 0.22117777163395078, + "grad_norm": 0.8072319311239357, + "learning_rate": 1.8143776843151156e-05, + "loss": 0.5807, + "step": 12800 + }, + { + "epoch": 0.22119505114735968, + "grad_norm": 0.8939708571087264, + "learning_rate": 1.814345204605349e-05, + "loss": 0.6214, + "step": 12801 + }, + { + "epoch": 0.2212123306607686, + "grad_norm": 1.3314988662181177, + "learning_rate": 1.8143127223449907e-05, + "loss": 0.6924, + "step": 12802 + }, + { + "epoch": 0.2212296101741775, + "grad_norm": 1.027857835994457, + "learning_rate": 1.8142802375341438e-05, + "loss": 0.4847, + "step": 12803 + }, + { + "epoch": 0.2212468896875864, + "grad_norm": 0.9556366134974372, + "learning_rate": 1.814247750172909e-05, + "loss": 0.6272, + "step": 12804 + }, + { + "epoch": 0.2212641692009953, + "grad_norm": 1.164190672397921, + "learning_rate": 1.8142152602613885e-05, + "loss": 0.4485, + "step": 12805 + }, + { + "epoch": 0.2212814487144042, + "grad_norm": 1.2532372153287676, + "learning_rate": 1.8141827677996837e-05, + "loss": 0.5581, + "step": 12806 + }, + { + "epoch": 0.2212987282278131, + "grad_norm": 1.111185885706173, + "learning_rate": 1.8141502727878967e-05, + "loss": 0.8535, + "step": 12807 + }, + { + "epoch": 0.221316007741222, + "grad_norm": 1.5254792947871831, + "learning_rate": 1.8141177752261293e-05, + "loss": 0.4591, + "step": 12808 + }, + { + "epoch": 0.22133328725463092, + "grad_norm": 0.7328069236496169, + "learning_rate": 1.814085275114483e-05, + "loss": 0.6231, + "step": 12809 + }, + { + "epoch": 0.22135056676803982, + "grad_norm": 0.3827881578334393, + "learning_rate": 1.8140527724530598e-05, + "loss": 0.629, + "step": 12810 + }, + { + "epoch": 0.2213678462814487, + "grad_norm": 0.9148448086280982, + "learning_rate": 1.8140202672419614e-05, + "loss": 0.4196, + "step": 12811 + }, + { + "epoch": 0.2213851257948576, + "grad_norm": 1.2709780832479467, + "learning_rate": 1.8139877594812895e-05, + "loss": 0.639, + "step": 12812 + }, + { + "epoch": 0.22140240530826652, + "grad_norm": 1.817046652046089, + "learning_rate": 1.8139552491711466e-05, + "loss": 0.5618, + "step": 12813 + }, + { + "epoch": 0.22141968482167543, + "grad_norm": 0.968224084236616, + "learning_rate": 1.8139227363116334e-05, + "loss": 0.6014, + "step": 12814 + }, + { + "epoch": 0.22143696433508434, + "grad_norm": 0.9977446928406474, + "learning_rate": 1.813890220902853e-05, + "loss": 0.5618, + "step": 12815 + }, + { + "epoch": 0.22145424384849322, + "grad_norm": 0.8593966778568335, + "learning_rate": 1.8138577029449062e-05, + "loss": 0.3275, + "step": 12816 + }, + { + "epoch": 0.22147152336190212, + "grad_norm": 0.8087682670222995, + "learning_rate": 1.813825182437895e-05, + "loss": 0.5023, + "step": 12817 + }, + { + "epoch": 0.22148880287531103, + "grad_norm": 1.2904597108580722, + "learning_rate": 1.8137926593819216e-05, + "loss": 0.5671, + "step": 12818 + }, + { + "epoch": 0.22150608238871994, + "grad_norm": 1.0932357001062676, + "learning_rate": 1.813760133777088e-05, + "loss": 0.5772, + "step": 12819 + }, + { + "epoch": 0.22152336190212885, + "grad_norm": 1.2644738824404012, + "learning_rate": 1.8137276056234955e-05, + "loss": 0.7534, + "step": 12820 + }, + { + "epoch": 0.22154064141553773, + "grad_norm": 0.9647368650133542, + "learning_rate": 1.813695074921247e-05, + "loss": 0.778, + "step": 12821 + }, + { + "epoch": 0.22155792092894663, + "grad_norm": 0.6947129415460589, + "learning_rate": 1.8136625416704428e-05, + "loss": 0.3785, + "step": 12822 + }, + { + "epoch": 0.22157520044235554, + "grad_norm": 1.3400461976497102, + "learning_rate": 1.813630005871186e-05, + "loss": 0.659, + "step": 12823 + }, + { + "epoch": 0.22159247995576445, + "grad_norm": 0.7083014963693786, + "learning_rate": 1.8135974675235783e-05, + "loss": 0.6404, + "step": 12824 + }, + { + "epoch": 0.22160975946917336, + "grad_norm": 1.0841163609643625, + "learning_rate": 1.8135649266277215e-05, + "loss": 0.4468, + "step": 12825 + }, + { + "epoch": 0.22162703898258226, + "grad_norm": 1.5722965323283837, + "learning_rate": 1.813532383183717e-05, + "loss": 0.5557, + "step": 12826 + }, + { + "epoch": 0.22164431849599114, + "grad_norm": 0.9922010896009147, + "learning_rate": 1.813499837191668e-05, + "loss": 0.4526, + "step": 12827 + }, + { + "epoch": 0.22166159800940005, + "grad_norm": 1.1079593015895504, + "learning_rate": 1.8134672886516752e-05, + "loss": 0.6584, + "step": 12828 + }, + { + "epoch": 0.22167887752280896, + "grad_norm": 0.863138534742054, + "learning_rate": 1.8134347375638413e-05, + "loss": 0.6624, + "step": 12829 + }, + { + "epoch": 0.22169615703621787, + "grad_norm": 1.4033790850730938, + "learning_rate": 1.8134021839282677e-05, + "loss": 0.6951, + "step": 12830 + }, + { + "epoch": 0.22171343654962677, + "grad_norm": 1.5862001534946018, + "learning_rate": 1.8133696277450573e-05, + "loss": 0.7118, + "step": 12831 + }, + { + "epoch": 0.22173071606303565, + "grad_norm": 1.4623823881669227, + "learning_rate": 1.8133370690143105e-05, + "loss": 0.5093, + "step": 12832 + }, + { + "epoch": 0.22174799557644456, + "grad_norm": 1.0433236388370177, + "learning_rate": 1.813304507736131e-05, + "loss": 0.6083, + "step": 12833 + }, + { + "epoch": 0.22176527508985347, + "grad_norm": 0.7647816468804652, + "learning_rate": 1.8132719439106195e-05, + "loss": 0.5321, + "step": 12834 + }, + { + "epoch": 0.22178255460326238, + "grad_norm": 0.9220217734461321, + "learning_rate": 1.8132393775378785e-05, + "loss": 0.5003, + "step": 12835 + }, + { + "epoch": 0.22179983411667129, + "grad_norm": 1.1366817607487079, + "learning_rate": 1.81320680861801e-05, + "loss": 0.5511, + "step": 12836 + }, + { + "epoch": 0.22181711363008016, + "grad_norm": 1.0041825548855206, + "learning_rate": 1.813174237151116e-05, + "loss": 0.5713, + "step": 12837 + }, + { + "epoch": 0.22183439314348907, + "grad_norm": 1.4060225969460827, + "learning_rate": 1.8131416631372983e-05, + "loss": 0.5814, + "step": 12838 + }, + { + "epoch": 0.22185167265689798, + "grad_norm": 0.7911924627374411, + "learning_rate": 1.8131090865766595e-05, + "loss": 0.3955, + "step": 12839 + }, + { + "epoch": 0.2218689521703069, + "grad_norm": 1.428693951951923, + "learning_rate": 1.8130765074693012e-05, + "loss": 0.7967, + "step": 12840 + }, + { + "epoch": 0.2218862316837158, + "grad_norm": 1.2181365496356444, + "learning_rate": 1.8130439258153255e-05, + "loss": 0.5254, + "step": 12841 + }, + { + "epoch": 0.22190351119712468, + "grad_norm": 0.39605601049513767, + "learning_rate": 1.8130113416148344e-05, + "loss": 0.47, + "step": 12842 + }, + { + "epoch": 0.22192079071053358, + "grad_norm": 1.3086508058392128, + "learning_rate": 1.8129787548679295e-05, + "loss": 0.5963, + "step": 12843 + }, + { + "epoch": 0.2219380702239425, + "grad_norm": 0.9321858625532768, + "learning_rate": 1.8129461655747137e-05, + "loss": 0.5185, + "step": 12844 + }, + { + "epoch": 0.2219553497373514, + "grad_norm": 0.9374094047002652, + "learning_rate": 1.812913573735289e-05, + "loss": 0.4596, + "step": 12845 + }, + { + "epoch": 0.2219726292507603, + "grad_norm": 1.0880195721885069, + "learning_rate": 1.8128809793497568e-05, + "loss": 0.7374, + "step": 12846 + }, + { + "epoch": 0.2219899087641692, + "grad_norm": 1.675197965328831, + "learning_rate": 1.81284838241822e-05, + "loss": 0.5453, + "step": 12847 + }, + { + "epoch": 0.2220071882775781, + "grad_norm": 0.8595366046252866, + "learning_rate": 1.81281578294078e-05, + "loss": 0.5291, + "step": 12848 + }, + { + "epoch": 0.222024467790987, + "grad_norm": 0.9034223274279235, + "learning_rate": 1.812783180917539e-05, + "loss": 0.3448, + "step": 12849 + }, + { + "epoch": 0.2220417473043959, + "grad_norm": 0.9610392656588599, + "learning_rate": 1.8127505763485994e-05, + "loss": 0.5763, + "step": 12850 + }, + { + "epoch": 0.22205902681780482, + "grad_norm": 1.0426471101064358, + "learning_rate": 1.8127179692340635e-05, + "loss": 0.6913, + "step": 12851 + }, + { + "epoch": 0.22207630633121372, + "grad_norm": 0.880371866479067, + "learning_rate": 1.812685359574033e-05, + "loss": 0.5653, + "step": 12852 + }, + { + "epoch": 0.2220935858446226, + "grad_norm": 1.0141604775796071, + "learning_rate": 1.8126527473686103e-05, + "loss": 0.6845, + "step": 12853 + }, + { + "epoch": 0.2221108653580315, + "grad_norm": 0.8734487718166931, + "learning_rate": 1.8126201326178977e-05, + "loss": 0.4289, + "step": 12854 + }, + { + "epoch": 0.22212814487144042, + "grad_norm": 1.1998392614539564, + "learning_rate": 1.8125875153219966e-05, + "loss": 0.4725, + "step": 12855 + }, + { + "epoch": 0.22214542438484933, + "grad_norm": 0.8295016215093993, + "learning_rate": 1.8125548954810096e-05, + "loss": 0.6603, + "step": 12856 + }, + { + "epoch": 0.22216270389825823, + "grad_norm": 1.146163227283165, + "learning_rate": 1.8125222730950393e-05, + "loss": 0.6417, + "step": 12857 + }, + { + "epoch": 0.22217998341166711, + "grad_norm": 1.0514964803294622, + "learning_rate": 1.8124896481641872e-05, + "loss": 0.5121, + "step": 12858 + }, + { + "epoch": 0.22219726292507602, + "grad_norm": 1.6092557300556516, + "learning_rate": 1.8124570206885562e-05, + "loss": 0.6086, + "step": 12859 + }, + { + "epoch": 0.22221454243848493, + "grad_norm": 0.7164718677346024, + "learning_rate": 1.8124243906682477e-05, + "loss": 0.5215, + "step": 12860 + }, + { + "epoch": 0.22223182195189384, + "grad_norm": 1.2334352084447697, + "learning_rate": 1.8123917581033642e-05, + "loss": 0.6012, + "step": 12861 + }, + { + "epoch": 0.22224910146530275, + "grad_norm": 1.0546210635586724, + "learning_rate": 1.8123591229940083e-05, + "loss": 0.5374, + "step": 12862 + }, + { + "epoch": 0.22226638097871165, + "grad_norm": 1.2782565732152131, + "learning_rate": 1.8123264853402816e-05, + "loss": 0.6639, + "step": 12863 + }, + { + "epoch": 0.22228366049212053, + "grad_norm": 1.3132417788677333, + "learning_rate": 1.8122938451422865e-05, + "loss": 0.753, + "step": 12864 + }, + { + "epoch": 0.22230094000552944, + "grad_norm": 1.372414469978802, + "learning_rate": 1.8122612024001256e-05, + "loss": 0.556, + "step": 12865 + }, + { + "epoch": 0.22231821951893835, + "grad_norm": 1.0091738015728977, + "learning_rate": 1.812228557113901e-05, + "loss": 0.5841, + "step": 12866 + }, + { + "epoch": 0.22233549903234726, + "grad_norm": 1.3984612802241259, + "learning_rate": 1.8121959092837147e-05, + "loss": 0.6507, + "step": 12867 + }, + { + "epoch": 0.22235277854575616, + "grad_norm": 1.0045070173851007, + "learning_rate": 1.812163258909669e-05, + "loss": 0.6079, + "step": 12868 + }, + { + "epoch": 0.22237005805916504, + "grad_norm": 1.2814568479928845, + "learning_rate": 1.8121306059918662e-05, + "loss": 0.4618, + "step": 12869 + }, + { + "epoch": 0.22238733757257395, + "grad_norm": 0.9030987660195582, + "learning_rate": 1.812097950530409e-05, + "loss": 0.5619, + "step": 12870 + }, + { + "epoch": 0.22240461708598286, + "grad_norm": 0.9480704330174636, + "learning_rate": 1.8120652925253986e-05, + "loss": 0.4254, + "step": 12871 + }, + { + "epoch": 0.22242189659939177, + "grad_norm": 1.6382335281702438, + "learning_rate": 1.8120326319769386e-05, + "loss": 0.6483, + "step": 12872 + }, + { + "epoch": 0.22243917611280067, + "grad_norm": 0.7085459513827483, + "learning_rate": 1.8119999688851303e-05, + "loss": 0.7514, + "step": 12873 + }, + { + "epoch": 0.22245645562620955, + "grad_norm": 1.024019646112797, + "learning_rate": 1.8119673032500767e-05, + "loss": 0.5132, + "step": 12874 + }, + { + "epoch": 0.22247373513961846, + "grad_norm": 0.9338847091351768, + "learning_rate": 1.8119346350718796e-05, + "loss": 0.4519, + "step": 12875 + }, + { + "epoch": 0.22249101465302737, + "grad_norm": 1.054850305215315, + "learning_rate": 1.8119019643506416e-05, + "loss": 0.3363, + "step": 12876 + }, + { + "epoch": 0.22250829416643628, + "grad_norm": 1.376695002608079, + "learning_rate": 1.811869291086465e-05, + "loss": 0.6401, + "step": 12877 + }, + { + "epoch": 0.22252557367984518, + "grad_norm": 1.0566956946030035, + "learning_rate": 1.811836615279452e-05, + "loss": 0.5178, + "step": 12878 + }, + { + "epoch": 0.22254285319325406, + "grad_norm": 1.0111412764198078, + "learning_rate": 1.8118039369297053e-05, + "loss": 0.7193, + "step": 12879 + }, + { + "epoch": 0.22256013270666297, + "grad_norm": 1.2322340020920632, + "learning_rate": 1.8117712560373265e-05, + "loss": 0.4371, + "step": 12880 + }, + { + "epoch": 0.22257741222007188, + "grad_norm": 1.5420561574071794, + "learning_rate": 1.8117385726024187e-05, + "loss": 0.417, + "step": 12881 + }, + { + "epoch": 0.2225946917334808, + "grad_norm": 1.5360480324307608, + "learning_rate": 1.8117058866250842e-05, + "loss": 0.5567, + "step": 12882 + }, + { + "epoch": 0.2226119712468897, + "grad_norm": 1.1578579798909414, + "learning_rate": 1.8116731981054248e-05, + "loss": 0.4454, + "step": 12883 + }, + { + "epoch": 0.2226292507602986, + "grad_norm": 0.5633948772646055, + "learning_rate": 1.8116405070435433e-05, + "loss": 0.8061, + "step": 12884 + }, + { + "epoch": 0.22264653027370748, + "grad_norm": 1.5538060782246434, + "learning_rate": 1.8116078134395422e-05, + "loss": 0.6001, + "step": 12885 + }, + { + "epoch": 0.2226638097871164, + "grad_norm": 1.0327189329674544, + "learning_rate": 1.8115751172935237e-05, + "loss": 0.5635, + "step": 12886 + }, + { + "epoch": 0.2226810893005253, + "grad_norm": 0.5078822935666856, + "learning_rate": 1.81154241860559e-05, + "loss": 0.5739, + "step": 12887 + }, + { + "epoch": 0.2226983688139342, + "grad_norm": 0.955744007479332, + "learning_rate": 1.8115097173758442e-05, + "loss": 0.6601, + "step": 12888 + }, + { + "epoch": 0.2227156483273431, + "grad_norm": 1.1719520625724624, + "learning_rate": 1.8114770136043882e-05, + "loss": 0.5739, + "step": 12889 + }, + { + "epoch": 0.222732927840752, + "grad_norm": 1.2445524937069892, + "learning_rate": 1.8114443072913245e-05, + "loss": 0.5777, + "step": 12890 + }, + { + "epoch": 0.2227502073541609, + "grad_norm": 0.9150369704916889, + "learning_rate": 1.8114115984367557e-05, + "loss": 0.5111, + "step": 12891 + }, + { + "epoch": 0.2227674868675698, + "grad_norm": 1.218099094227752, + "learning_rate": 1.8113788870407834e-05, + "loss": 0.4523, + "step": 12892 + }, + { + "epoch": 0.22278476638097872, + "grad_norm": 1.3369468279824759, + "learning_rate": 1.8113461731035114e-05, + "loss": 0.4745, + "step": 12893 + }, + { + "epoch": 0.22280204589438762, + "grad_norm": 1.2429664697422, + "learning_rate": 1.8113134566250413e-05, + "loss": 0.6058, + "step": 12894 + }, + { + "epoch": 0.2228193254077965, + "grad_norm": 1.241703828024639, + "learning_rate": 1.811280737605476e-05, + "loss": 0.6121, + "step": 12895 + }, + { + "epoch": 0.2228366049212054, + "grad_norm": 0.8541370265753794, + "learning_rate": 1.8112480160449177e-05, + "loss": 0.5519, + "step": 12896 + }, + { + "epoch": 0.22285388443461432, + "grad_norm": 1.112335966114035, + "learning_rate": 1.811215291943469e-05, + "loss": 0.605, + "step": 12897 + }, + { + "epoch": 0.22287116394802323, + "grad_norm": 0.567930481349934, + "learning_rate": 1.8111825653012322e-05, + "loss": 0.6289, + "step": 12898 + }, + { + "epoch": 0.22288844346143213, + "grad_norm": 1.145298373868534, + "learning_rate": 1.81114983611831e-05, + "loss": 0.694, + "step": 12899 + }, + { + "epoch": 0.22290572297484104, + "grad_norm": 1.4038533131925244, + "learning_rate": 1.8111171043948048e-05, + "loss": 0.543, + "step": 12900 + }, + { + "epoch": 0.22292300248824992, + "grad_norm": 1.8042236669559526, + "learning_rate": 1.8110843701308197e-05, + "loss": 0.659, + "step": 12901 + }, + { + "epoch": 0.22294028200165883, + "grad_norm": 0.7993752707270347, + "learning_rate": 1.811051633326456e-05, + "loss": 0.4204, + "step": 12902 + }, + { + "epoch": 0.22295756151506774, + "grad_norm": 1.227206049392944, + "learning_rate": 1.8110188939818175e-05, + "loss": 0.72, + "step": 12903 + }, + { + "epoch": 0.22297484102847664, + "grad_norm": 1.293543824860225, + "learning_rate": 1.8109861520970058e-05, + "loss": 0.5672, + "step": 12904 + }, + { + "epoch": 0.22299212054188555, + "grad_norm": 0.8645149593312218, + "learning_rate": 1.810953407672124e-05, + "loss": 0.7576, + "step": 12905 + }, + { + "epoch": 0.22300940005529443, + "grad_norm": 0.8380837031614463, + "learning_rate": 1.8109206607072744e-05, + "loss": 0.6249, + "step": 12906 + }, + { + "epoch": 0.22302667956870334, + "grad_norm": 0.7919683780896764, + "learning_rate": 1.8108879112025597e-05, + "loss": 0.4686, + "step": 12907 + }, + { + "epoch": 0.22304395908211225, + "grad_norm": 0.9417245701843248, + "learning_rate": 1.8108551591580825e-05, + "loss": 0.5167, + "step": 12908 + }, + { + "epoch": 0.22306123859552116, + "grad_norm": 0.9792922263366817, + "learning_rate": 1.8108224045739453e-05, + "loss": 0.6026, + "step": 12909 + }, + { + "epoch": 0.22307851810893006, + "grad_norm": 1.321424256266359, + "learning_rate": 1.8107896474502506e-05, + "loss": 0.7368, + "step": 12910 + }, + { + "epoch": 0.22309579762233894, + "grad_norm": 1.0171892817977375, + "learning_rate": 1.8107568877871013e-05, + "loss": 0.2939, + "step": 12911 + }, + { + "epoch": 0.22311307713574785, + "grad_norm": 1.461328638378573, + "learning_rate": 1.8107241255845997e-05, + "loss": 0.2761, + "step": 12912 + }, + { + "epoch": 0.22313035664915676, + "grad_norm": 1.1299374215702036, + "learning_rate": 1.8106913608428486e-05, + "loss": 0.6902, + "step": 12913 + }, + { + "epoch": 0.22314763616256567, + "grad_norm": 1.206119425119758, + "learning_rate": 1.8106585935619505e-05, + "loss": 0.6576, + "step": 12914 + }, + { + "epoch": 0.22316491567597457, + "grad_norm": 1.0958365131881125, + "learning_rate": 1.810625823742008e-05, + "loss": 0.5113, + "step": 12915 + }, + { + "epoch": 0.22318219518938345, + "grad_norm": 1.7271618100712065, + "learning_rate": 1.810593051383124e-05, + "loss": 0.6564, + "step": 12916 + }, + { + "epoch": 0.22319947470279236, + "grad_norm": 1.51060680263719, + "learning_rate": 1.8105602764854008e-05, + "loss": 0.53, + "step": 12917 + }, + { + "epoch": 0.22321675421620127, + "grad_norm": 0.4704787835725754, + "learning_rate": 1.8105274990489415e-05, + "loss": 0.7228, + "step": 12918 + }, + { + "epoch": 0.22323403372961018, + "grad_norm": 0.5865190438610661, + "learning_rate": 1.810494719073848e-05, + "loss": 0.4194, + "step": 12919 + }, + { + "epoch": 0.22325131324301908, + "grad_norm": 1.2766070555285887, + "learning_rate": 1.8104619365602237e-05, + "loss": 0.5033, + "step": 12920 + }, + { + "epoch": 0.223268592756428, + "grad_norm": 1.2575180890191642, + "learning_rate": 1.8104291515081712e-05, + "loss": 0.4127, + "step": 12921 + }, + { + "epoch": 0.22328587226983687, + "grad_norm": 0.7287173828648734, + "learning_rate": 1.8103963639177927e-05, + "loss": 0.5242, + "step": 12922 + }, + { + "epoch": 0.22330315178324578, + "grad_norm": 1.0880273798685791, + "learning_rate": 1.810363573789191e-05, + "loss": 0.3529, + "step": 12923 + }, + { + "epoch": 0.2233204312966547, + "grad_norm": 1.278962857702272, + "learning_rate": 1.8103307811224696e-05, + "loss": 0.5721, + "step": 12924 + }, + { + "epoch": 0.2233377108100636, + "grad_norm": 1.0379404444467675, + "learning_rate": 1.8102979859177304e-05, + "loss": 0.5721, + "step": 12925 + }, + { + "epoch": 0.2233549903234725, + "grad_norm": 1.5994733516777364, + "learning_rate": 1.8102651881750762e-05, + "loss": 0.6427, + "step": 12926 + }, + { + "epoch": 0.22337226983688138, + "grad_norm": 1.0295018150507522, + "learning_rate": 1.8102323878946097e-05, + "loss": 0.5642, + "step": 12927 + }, + { + "epoch": 0.2233895493502903, + "grad_norm": 0.8623629701636614, + "learning_rate": 1.810199585076434e-05, + "loss": 0.6341, + "step": 12928 + }, + { + "epoch": 0.2234068288636992, + "grad_norm": 1.4184165212663757, + "learning_rate": 1.8101667797206518e-05, + "loss": 0.6352, + "step": 12929 + }, + { + "epoch": 0.2234241083771081, + "grad_norm": 1.213208988292386, + "learning_rate": 1.8101339718273655e-05, + "loss": 0.5709, + "step": 12930 + }, + { + "epoch": 0.223441387890517, + "grad_norm": 1.1362105096623265, + "learning_rate": 1.810101161396678e-05, + "loss": 0.6459, + "step": 12931 + }, + { + "epoch": 0.2234586674039259, + "grad_norm": 1.0388738355494027, + "learning_rate": 1.810068348428692e-05, + "loss": 0.6867, + "step": 12932 + }, + { + "epoch": 0.2234759469173348, + "grad_norm": 1.470793575580204, + "learning_rate": 1.8100355329235104e-05, + "loss": 0.648, + "step": 12933 + }, + { + "epoch": 0.2234932264307437, + "grad_norm": 1.8110074074858353, + "learning_rate": 1.8100027148812362e-05, + "loss": 0.6805, + "step": 12934 + }, + { + "epoch": 0.22351050594415262, + "grad_norm": 1.0629253845088673, + "learning_rate": 1.809969894301972e-05, + "loss": 0.49, + "step": 12935 + }, + { + "epoch": 0.22352778545756152, + "grad_norm": 0.8550862531469177, + "learning_rate": 1.80993707118582e-05, + "loss": 0.6524, + "step": 12936 + }, + { + "epoch": 0.22354506497097043, + "grad_norm": 1.1613518214195966, + "learning_rate": 1.8099042455328838e-05, + "loss": 0.4771, + "step": 12937 + }, + { + "epoch": 0.2235623444843793, + "grad_norm": 1.000286973701249, + "learning_rate": 1.809871417343266e-05, + "loss": 0.7506, + "step": 12938 + }, + { + "epoch": 0.22357962399778822, + "grad_norm": 1.2738671650772166, + "learning_rate": 1.8098385866170696e-05, + "loss": 0.6417, + "step": 12939 + }, + { + "epoch": 0.22359690351119713, + "grad_norm": 1.2284096328554663, + "learning_rate": 1.809805753354397e-05, + "loss": 0.4824, + "step": 12940 + }, + { + "epoch": 0.22361418302460603, + "grad_norm": 0.6962770580518044, + "learning_rate": 1.809772917555351e-05, + "loss": 0.5312, + "step": 12941 + }, + { + "epoch": 0.22363146253801494, + "grad_norm": 1.4650812308613015, + "learning_rate": 1.809740079220035e-05, + "loss": 0.5983, + "step": 12942 + }, + { + "epoch": 0.22364874205142382, + "grad_norm": 1.4447702504408941, + "learning_rate": 1.8097072383485514e-05, + "loss": 0.5353, + "step": 12943 + }, + { + "epoch": 0.22366602156483273, + "grad_norm": 0.8648602670736438, + "learning_rate": 1.8096743949410035e-05, + "loss": 0.4588, + "step": 12944 + }, + { + "epoch": 0.22368330107824164, + "grad_norm": 1.1425487069785334, + "learning_rate": 1.8096415489974936e-05, + "loss": 0.4901, + "step": 12945 + }, + { + "epoch": 0.22370058059165054, + "grad_norm": 1.3742494259744467, + "learning_rate": 1.8096087005181245e-05, + "loss": 0.6201, + "step": 12946 + }, + { + "epoch": 0.22371786010505945, + "grad_norm": 1.2024628582932273, + "learning_rate": 1.8095758495029997e-05, + "loss": 0.4465, + "step": 12947 + }, + { + "epoch": 0.22373513961846833, + "grad_norm": 1.203399168878399, + "learning_rate": 1.809542995952222e-05, + "loss": 0.5237, + "step": 12948 + }, + { + "epoch": 0.22375241913187724, + "grad_norm": 1.2303343710707633, + "learning_rate": 1.809510139865894e-05, + "loss": 0.5837, + "step": 12949 + }, + { + "epoch": 0.22376969864528615, + "grad_norm": 1.3735010430719279, + "learning_rate": 1.8094772812441187e-05, + "loss": 0.5778, + "step": 12950 + }, + { + "epoch": 0.22378697815869505, + "grad_norm": 0.9999453881276781, + "learning_rate": 1.8094444200869992e-05, + "loss": 0.4056, + "step": 12951 + }, + { + "epoch": 0.22380425767210396, + "grad_norm": 1.0189342212515025, + "learning_rate": 1.809411556394638e-05, + "loss": 0.2971, + "step": 12952 + }, + { + "epoch": 0.22382153718551284, + "grad_norm": 0.9494985770692492, + "learning_rate": 1.8093786901671385e-05, + "loss": 0.577, + "step": 12953 + }, + { + "epoch": 0.22383881669892175, + "grad_norm": 1.310254593434691, + "learning_rate": 1.8093458214046033e-05, + "loss": 0.7795, + "step": 12954 + }, + { + "epoch": 0.22385609621233066, + "grad_norm": 0.8267624530846659, + "learning_rate": 1.8093129501071356e-05, + "loss": 0.4291, + "step": 12955 + }, + { + "epoch": 0.22387337572573957, + "grad_norm": 1.1178898767783159, + "learning_rate": 1.809280076274838e-05, + "loss": 0.6305, + "step": 12956 + }, + { + "epoch": 0.22389065523914847, + "grad_norm": 1.0285180129760767, + "learning_rate": 1.8092471999078137e-05, + "loss": 0.6369, + "step": 12957 + }, + { + "epoch": 0.22390793475255738, + "grad_norm": 1.0849651613259572, + "learning_rate": 1.809214321006166e-05, + "loss": 0.5269, + "step": 12958 + }, + { + "epoch": 0.22392521426596626, + "grad_norm": 1.5814904422437475, + "learning_rate": 1.8091814395699973e-05, + "loss": 0.693, + "step": 12959 + }, + { + "epoch": 0.22394249377937517, + "grad_norm": 1.0864775020425796, + "learning_rate": 1.8091485555994107e-05, + "loss": 0.4034, + "step": 12960 + }, + { + "epoch": 0.22395977329278408, + "grad_norm": 1.7343517834191047, + "learning_rate": 1.8091156690945097e-05, + "loss": 0.2922, + "step": 12961 + }, + { + "epoch": 0.22397705280619298, + "grad_norm": 1.1930018481781661, + "learning_rate": 1.8090827800553966e-05, + "loss": 0.5323, + "step": 12962 + }, + { + "epoch": 0.2239943323196019, + "grad_norm": 0.5123335917092928, + "learning_rate": 1.809049888482175e-05, + "loss": 0.481, + "step": 12963 + }, + { + "epoch": 0.22401161183301077, + "grad_norm": 0.426089808356666, + "learning_rate": 1.8090169943749477e-05, + "loss": 0.6088, + "step": 12964 + }, + { + "epoch": 0.22402889134641968, + "grad_norm": 0.38469815361006965, + "learning_rate": 1.8089840977338172e-05, + "loss": 0.5678, + "step": 12965 + }, + { + "epoch": 0.2240461708598286, + "grad_norm": 0.752849888189397, + "learning_rate": 1.8089511985588876e-05, + "loss": 0.3694, + "step": 12966 + }, + { + "epoch": 0.2240634503732375, + "grad_norm": 0.8765343772539752, + "learning_rate": 1.8089182968502612e-05, + "loss": 0.5803, + "step": 12967 + }, + { + "epoch": 0.2240807298866464, + "grad_norm": 0.9550327343999151, + "learning_rate": 1.808885392608041e-05, + "loss": 0.7404, + "step": 12968 + }, + { + "epoch": 0.22409800940005528, + "grad_norm": 0.7219478280122367, + "learning_rate": 1.8088524858323304e-05, + "loss": 0.5354, + "step": 12969 + }, + { + "epoch": 0.2241152889134642, + "grad_norm": 0.7352727430558191, + "learning_rate": 1.8088195765232326e-05, + "loss": 0.5107, + "step": 12970 + }, + { + "epoch": 0.2241325684268731, + "grad_norm": 1.0569864443114643, + "learning_rate": 1.80878666468085e-05, + "loss": 0.4973, + "step": 12971 + }, + { + "epoch": 0.224149847940282, + "grad_norm": 0.9556568011448532, + "learning_rate": 1.8087537503052864e-05, + "loss": 0.4637, + "step": 12972 + }, + { + "epoch": 0.2241671274536909, + "grad_norm": 1.4153520240243525, + "learning_rate": 1.8087208333966443e-05, + "loss": 0.4919, + "step": 12973 + }, + { + "epoch": 0.22418440696709982, + "grad_norm": 0.4671144678157269, + "learning_rate": 1.8086879139550274e-05, + "loss": 0.5142, + "step": 12974 + }, + { + "epoch": 0.2242016864805087, + "grad_norm": 1.2650997774546997, + "learning_rate": 1.8086549919805384e-05, + "loss": 0.526, + "step": 12975 + }, + { + "epoch": 0.2242189659939176, + "grad_norm": 0.7473724480124798, + "learning_rate": 1.8086220674732806e-05, + "loss": 0.4321, + "step": 12976 + }, + { + "epoch": 0.22423624550732651, + "grad_norm": 1.005648413843174, + "learning_rate": 1.808589140433357e-05, + "loss": 0.4611, + "step": 12977 + }, + { + "epoch": 0.22425352502073542, + "grad_norm": 0.4954626051484521, + "learning_rate": 1.8085562108608704e-05, + "loss": 0.631, + "step": 12978 + }, + { + "epoch": 0.22427080453414433, + "grad_norm": 1.1420216313592546, + "learning_rate": 1.808523278755925e-05, + "loss": 0.5976, + "step": 12979 + }, + { + "epoch": 0.2242880840475532, + "grad_norm": 1.0288360753500547, + "learning_rate": 1.8084903441186226e-05, + "loss": 0.5094, + "step": 12980 + }, + { + "epoch": 0.22430536356096212, + "grad_norm": 1.3526803063359596, + "learning_rate": 1.8084574069490673e-05, + "loss": 0.592, + "step": 12981 + }, + { + "epoch": 0.22432264307437103, + "grad_norm": 1.167933358939171, + "learning_rate": 1.808424467247362e-05, + "loss": 0.5083, + "step": 12982 + }, + { + "epoch": 0.22433992258777993, + "grad_norm": 1.1478427929644643, + "learning_rate": 1.8083915250136096e-05, + "loss": 0.5655, + "step": 12983 + }, + { + "epoch": 0.22435720210118884, + "grad_norm": 1.0312337765797719, + "learning_rate": 1.8083585802479136e-05, + "loss": 0.593, + "step": 12984 + }, + { + "epoch": 0.22437448161459772, + "grad_norm": 1.4823876868862864, + "learning_rate": 1.808325632950377e-05, + "loss": 0.5059, + "step": 12985 + }, + { + "epoch": 0.22439176112800663, + "grad_norm": 0.9319383652136989, + "learning_rate": 1.8082926831211033e-05, + "loss": 0.3547, + "step": 12986 + }, + { + "epoch": 0.22440904064141554, + "grad_norm": 0.4551225303293179, + "learning_rate": 1.8082597307601953e-05, + "loss": 0.7224, + "step": 12987 + }, + { + "epoch": 0.22442632015482444, + "grad_norm": 0.5971573445332135, + "learning_rate": 1.8082267758677566e-05, + "loss": 1.0236, + "step": 12988 + }, + { + "epoch": 0.22444359966823335, + "grad_norm": 1.065948947980253, + "learning_rate": 1.80819381844389e-05, + "loss": 0.5074, + "step": 12989 + }, + { + "epoch": 0.22446087918164226, + "grad_norm": 0.8732484215586219, + "learning_rate": 1.8081608584886992e-05, + "loss": 0.5391, + "step": 12990 + }, + { + "epoch": 0.22447815869505114, + "grad_norm": 1.4874183348066699, + "learning_rate": 1.808127896002287e-05, + "loss": 0.4527, + "step": 12991 + }, + { + "epoch": 0.22449543820846005, + "grad_norm": 1.0574041143807729, + "learning_rate": 1.8080949309847568e-05, + "loss": 0.5625, + "step": 12992 + }, + { + "epoch": 0.22451271772186895, + "grad_norm": 1.1337886582751988, + "learning_rate": 1.8080619634362117e-05, + "loss": 0.5973, + "step": 12993 + }, + { + "epoch": 0.22452999723527786, + "grad_norm": 0.9049480437786832, + "learning_rate": 1.8080289933567555e-05, + "loss": 0.4374, + "step": 12994 + }, + { + "epoch": 0.22454727674868677, + "grad_norm": 0.9701423871124925, + "learning_rate": 1.807996020746491e-05, + "loss": 0.7154, + "step": 12995 + }, + { + "epoch": 0.22456455626209565, + "grad_norm": 0.984913741832412, + "learning_rate": 1.807963045605521e-05, + "loss": 0.6119, + "step": 12996 + }, + { + "epoch": 0.22458183577550456, + "grad_norm": 1.8240634301476764, + "learning_rate": 1.80793006793395e-05, + "loss": 0.4711, + "step": 12997 + }, + { + "epoch": 0.22459911528891346, + "grad_norm": 1.246188422170359, + "learning_rate": 1.8078970877318806e-05, + "loss": 0.4097, + "step": 12998 + }, + { + "epoch": 0.22461639480232237, + "grad_norm": 1.2886206833914051, + "learning_rate": 1.807864104999416e-05, + "loss": 0.6631, + "step": 12999 + }, + { + "epoch": 0.22463367431573128, + "grad_norm": 0.9231908175114065, + "learning_rate": 1.8078311197366594e-05, + "loss": 0.6979, + "step": 13000 + }, + { + "epoch": 0.22465095382914016, + "grad_norm": 1.0675099725216708, + "learning_rate": 1.8077981319437148e-05, + "loss": 0.5027, + "step": 13001 + }, + { + "epoch": 0.22466823334254907, + "grad_norm": 1.0051123756632252, + "learning_rate": 1.8077651416206848e-05, + "loss": 0.5265, + "step": 13002 + }, + { + "epoch": 0.22468551285595798, + "grad_norm": 0.9098087012083873, + "learning_rate": 1.8077321487676727e-05, + "loss": 0.5295, + "step": 13003 + }, + { + "epoch": 0.22470279236936688, + "grad_norm": 0.845997122477809, + "learning_rate": 1.8076991533847826e-05, + "loss": 0.4216, + "step": 13004 + }, + { + "epoch": 0.2247200718827758, + "grad_norm": 0.9751060404476198, + "learning_rate": 1.807666155472117e-05, + "loss": 0.5989, + "step": 13005 + }, + { + "epoch": 0.22473735139618467, + "grad_norm": 0.8237567943167666, + "learning_rate": 1.80763315502978e-05, + "loss": 0.5868, + "step": 13006 + }, + { + "epoch": 0.22475463090959358, + "grad_norm": 1.5099060574112062, + "learning_rate": 1.8076001520578742e-05, + "loss": 0.7528, + "step": 13007 + }, + { + "epoch": 0.22477191042300249, + "grad_norm": 1.1341681443305367, + "learning_rate": 1.8075671465565037e-05, + "loss": 0.5082, + "step": 13008 + }, + { + "epoch": 0.2247891899364114, + "grad_norm": 1.2267729265121823, + "learning_rate": 1.807534138525771e-05, + "loss": 0.4961, + "step": 13009 + }, + { + "epoch": 0.2248064694498203, + "grad_norm": 0.6471376087549874, + "learning_rate": 1.8075011279657806e-05, + "loss": 0.4838, + "step": 13010 + }, + { + "epoch": 0.2248237489632292, + "grad_norm": 1.1848405920814415, + "learning_rate": 1.807468114876635e-05, + "loss": 0.6812, + "step": 13011 + }, + { + "epoch": 0.2248410284766381, + "grad_norm": 0.3982835318797687, + "learning_rate": 1.807435099258438e-05, + "loss": 0.5274, + "step": 13012 + }, + { + "epoch": 0.224858307990047, + "grad_norm": 0.9330645581115514, + "learning_rate": 1.8074020811112928e-05, + "loss": 0.4082, + "step": 13013 + }, + { + "epoch": 0.2248755875034559, + "grad_norm": 1.4666872343200164, + "learning_rate": 1.807369060435303e-05, + "loss": 0.4738, + "step": 13014 + }, + { + "epoch": 0.2248928670168648, + "grad_norm": 1.3343302428439587, + "learning_rate": 1.8073360372305718e-05, + "loss": 0.6185, + "step": 13015 + }, + { + "epoch": 0.22491014653027372, + "grad_norm": 0.457912596806763, + "learning_rate": 1.807303011497203e-05, + "loss": 0.8152, + "step": 13016 + }, + { + "epoch": 0.2249274260436826, + "grad_norm": 0.8747521932336028, + "learning_rate": 1.8072699832352997e-05, + "loss": 0.476, + "step": 13017 + }, + { + "epoch": 0.2249447055570915, + "grad_norm": 1.3595819424341729, + "learning_rate": 1.8072369524449653e-05, + "loss": 0.4919, + "step": 13018 + }, + { + "epoch": 0.22496198507050041, + "grad_norm": 1.4249561456022035, + "learning_rate": 1.8072039191263037e-05, + "loss": 0.4714, + "step": 13019 + }, + { + "epoch": 0.22497926458390932, + "grad_norm": 0.9651173313007341, + "learning_rate": 1.8071708832794178e-05, + "loss": 0.7971, + "step": 13020 + }, + { + "epoch": 0.22499654409731823, + "grad_norm": 1.2468239374448244, + "learning_rate": 1.8071378449044113e-05, + "loss": 0.6955, + "step": 13021 + }, + { + "epoch": 0.2250138236107271, + "grad_norm": 1.8987852565390695, + "learning_rate": 1.8071048040013882e-05, + "loss": 0.6956, + "step": 13022 + }, + { + "epoch": 0.22503110312413602, + "grad_norm": 1.043874281375659, + "learning_rate": 1.8070717605704512e-05, + "loss": 0.4376, + "step": 13023 + }, + { + "epoch": 0.22504838263754492, + "grad_norm": 0.9935369277861943, + "learning_rate": 1.8070387146117043e-05, + "loss": 0.5325, + "step": 13024 + }, + { + "epoch": 0.22506566215095383, + "grad_norm": 1.6333957860591293, + "learning_rate": 1.8070056661252508e-05, + "loss": 0.5515, + "step": 13025 + }, + { + "epoch": 0.22508294166436274, + "grad_norm": 0.8969089201571364, + "learning_rate": 1.806972615111194e-05, + "loss": 0.4832, + "step": 13026 + }, + { + "epoch": 0.22510022117777165, + "grad_norm": 0.8427469051969301, + "learning_rate": 1.806939561569638e-05, + "loss": 0.5897, + "step": 13027 + }, + { + "epoch": 0.22511750069118053, + "grad_norm": 2.111470699023131, + "learning_rate": 1.8069065055006855e-05, + "loss": 0.7608, + "step": 13028 + }, + { + "epoch": 0.22513478020458944, + "grad_norm": 0.47373955606023443, + "learning_rate": 1.8068734469044408e-05, + "loss": 0.7616, + "step": 13029 + }, + { + "epoch": 0.22515205971799834, + "grad_norm": 0.9508885553540336, + "learning_rate": 1.8068403857810072e-05, + "loss": 0.5214, + "step": 13030 + }, + { + "epoch": 0.22516933923140725, + "grad_norm": 0.771652059452212, + "learning_rate": 1.8068073221304878e-05, + "loss": 0.5239, + "step": 13031 + }, + { + "epoch": 0.22518661874481616, + "grad_norm": 1.0497545423008017, + "learning_rate": 1.8067742559529868e-05, + "loss": 0.4944, + "step": 13032 + }, + { + "epoch": 0.22520389825822504, + "grad_norm": 1.194953735343156, + "learning_rate": 1.8067411872486077e-05, + "loss": 0.7306, + "step": 13033 + }, + { + "epoch": 0.22522117777163395, + "grad_norm": 0.7361079674485449, + "learning_rate": 1.8067081160174537e-05, + "loss": 0.5713, + "step": 13034 + }, + { + "epoch": 0.22523845728504285, + "grad_norm": 0.6652606943828849, + "learning_rate": 1.8066750422596285e-05, + "loss": 0.2644, + "step": 13035 + }, + { + "epoch": 0.22525573679845176, + "grad_norm": 0.7989637995230057, + "learning_rate": 1.806641965975236e-05, + "loss": 0.4808, + "step": 13036 + }, + { + "epoch": 0.22527301631186067, + "grad_norm": 0.9533406211845821, + "learning_rate": 1.806608887164379e-05, + "loss": 0.5063, + "step": 13037 + }, + { + "epoch": 0.22529029582526955, + "grad_norm": 1.6607641879918473, + "learning_rate": 1.8065758058271622e-05, + "loss": 0.554, + "step": 13038 + }, + { + "epoch": 0.22530757533867846, + "grad_norm": 1.1157939138615447, + "learning_rate": 1.8065427219636885e-05, + "loss": 0.6227, + "step": 13039 + }, + { + "epoch": 0.22532485485208736, + "grad_norm": 0.8161716951403856, + "learning_rate": 1.806509635574062e-05, + "loss": 0.5009, + "step": 13040 + }, + { + "epoch": 0.22534213436549627, + "grad_norm": 0.9338058561864337, + "learning_rate": 1.8064765466583858e-05, + "loss": 0.4142, + "step": 13041 + }, + { + "epoch": 0.22535941387890518, + "grad_norm": 1.3141288494750305, + "learning_rate": 1.8064434552167638e-05, + "loss": 0.6413, + "step": 13042 + }, + { + "epoch": 0.22537669339231406, + "grad_norm": 0.9576375140922759, + "learning_rate": 1.8064103612492997e-05, + "loss": 0.446, + "step": 13043 + }, + { + "epoch": 0.22539397290572297, + "grad_norm": 1.354140277193867, + "learning_rate": 1.8063772647560966e-05, + "loss": 0.7189, + "step": 13044 + }, + { + "epoch": 0.22541125241913187, + "grad_norm": 0.8874462553266047, + "learning_rate": 1.806344165737259e-05, + "loss": 0.4739, + "step": 13045 + }, + { + "epoch": 0.22542853193254078, + "grad_norm": 1.0039914759560082, + "learning_rate": 1.8063110641928904e-05, + "loss": 0.4842, + "step": 13046 + }, + { + "epoch": 0.2254458114459497, + "grad_norm": 0.7622792963392764, + "learning_rate": 1.8062779601230937e-05, + "loss": 0.31, + "step": 13047 + }, + { + "epoch": 0.2254630909593586, + "grad_norm": 0.8983293023400231, + "learning_rate": 1.8062448535279736e-05, + "loss": 0.5037, + "step": 13048 + }, + { + "epoch": 0.22548037047276748, + "grad_norm": 1.5733969727930588, + "learning_rate": 1.8062117444076332e-05, + "loss": 0.5901, + "step": 13049 + }, + { + "epoch": 0.22549764998617639, + "grad_norm": 0.910576178356451, + "learning_rate": 1.806178632762176e-05, + "loss": 0.4341, + "step": 13050 + }, + { + "epoch": 0.2255149294995853, + "grad_norm": 0.9166785513010915, + "learning_rate": 1.8061455185917067e-05, + "loss": 0.6277, + "step": 13051 + }, + { + "epoch": 0.2255322090129942, + "grad_norm": 0.9021947795008136, + "learning_rate": 1.806112401896328e-05, + "loss": 0.5822, + "step": 13052 + }, + { + "epoch": 0.2255494885264031, + "grad_norm": 0.7177327499684678, + "learning_rate": 1.8060792826761443e-05, + "loss": 0.5974, + "step": 13053 + }, + { + "epoch": 0.225566768039812, + "grad_norm": 0.892668516302323, + "learning_rate": 1.8060461609312586e-05, + "loss": 0.4562, + "step": 13054 + }, + { + "epoch": 0.2255840475532209, + "grad_norm": 1.1564674907959334, + "learning_rate": 1.8060130366617752e-05, + "loss": 0.5136, + "step": 13055 + }, + { + "epoch": 0.2256013270666298, + "grad_norm": 0.7204426595513655, + "learning_rate": 1.805979909867798e-05, + "loss": 0.4968, + "step": 13056 + }, + { + "epoch": 0.2256186065800387, + "grad_norm": 1.6116578556456813, + "learning_rate": 1.80594678054943e-05, + "loss": 0.5384, + "step": 13057 + }, + { + "epoch": 0.22563588609344762, + "grad_norm": 1.2200922791583404, + "learning_rate": 1.805913648706776e-05, + "loss": 0.5568, + "step": 13058 + }, + { + "epoch": 0.2256531656068565, + "grad_norm": 0.43952862305700774, + "learning_rate": 1.805880514339939e-05, + "loss": 0.6297, + "step": 13059 + }, + { + "epoch": 0.2256704451202654, + "grad_norm": 1.1683159233387777, + "learning_rate": 1.805847377449023e-05, + "loss": 0.6721, + "step": 13060 + }, + { + "epoch": 0.2256877246336743, + "grad_norm": 0.870289075827681, + "learning_rate": 1.8058142380341314e-05, + "loss": 0.576, + "step": 13061 + }, + { + "epoch": 0.22570500414708322, + "grad_norm": 0.8909105830033192, + "learning_rate": 1.8057810960953687e-05, + "loss": 0.5829, + "step": 13062 + }, + { + "epoch": 0.22572228366049213, + "grad_norm": 0.6618506919121117, + "learning_rate": 1.805747951632838e-05, + "loss": 0.5456, + "step": 13063 + }, + { + "epoch": 0.22573956317390104, + "grad_norm": 0.8387125797398477, + "learning_rate": 1.805714804646644e-05, + "loss": 0.643, + "step": 13064 + }, + { + "epoch": 0.22575684268730992, + "grad_norm": 1.6170156109194396, + "learning_rate": 1.8056816551368897e-05, + "loss": 0.559, + "step": 13065 + }, + { + "epoch": 0.22577412220071882, + "grad_norm": 1.0571655458745328, + "learning_rate": 1.8056485031036793e-05, + "loss": 0.5223, + "step": 13066 + }, + { + "epoch": 0.22579140171412773, + "grad_norm": 0.9762508609269566, + "learning_rate": 1.8056153485471167e-05, + "loss": 0.4665, + "step": 13067 + }, + { + "epoch": 0.22580868122753664, + "grad_norm": 1.6838861602392576, + "learning_rate": 1.8055821914673056e-05, + "loss": 0.8058, + "step": 13068 + }, + { + "epoch": 0.22582596074094555, + "grad_norm": 0.9530756696274398, + "learning_rate": 1.8055490318643494e-05, + "loss": 0.6611, + "step": 13069 + }, + { + "epoch": 0.22584324025435443, + "grad_norm": 0.761236124686957, + "learning_rate": 1.8055158697383528e-05, + "loss": 0.6311, + "step": 13070 + }, + { + "epoch": 0.22586051976776333, + "grad_norm": 2.130886205329016, + "learning_rate": 1.805482705089419e-05, + "loss": 0.6616, + "step": 13071 + }, + { + "epoch": 0.22587779928117224, + "grad_norm": 1.2948674694762874, + "learning_rate": 1.8054495379176526e-05, + "loss": 0.5666, + "step": 13072 + }, + { + "epoch": 0.22589507879458115, + "grad_norm": 1.4104748886194782, + "learning_rate": 1.8054163682231567e-05, + "loss": 0.4588, + "step": 13073 + }, + { + "epoch": 0.22591235830799006, + "grad_norm": 1.4350224789100487, + "learning_rate": 1.8053831960060358e-05, + "loss": 0.4697, + "step": 13074 + }, + { + "epoch": 0.22592963782139894, + "grad_norm": 0.605103353773405, + "learning_rate": 1.805350021266393e-05, + "loss": 0.5307, + "step": 13075 + }, + { + "epoch": 0.22594691733480785, + "grad_norm": 0.8722398718525567, + "learning_rate": 1.805316844004333e-05, + "loss": 0.3855, + "step": 13076 + }, + { + "epoch": 0.22596419684821675, + "grad_norm": 0.7037222800055708, + "learning_rate": 1.8052836642199597e-05, + "loss": 0.5746, + "step": 13077 + }, + { + "epoch": 0.22598147636162566, + "grad_norm": 1.2921614506813026, + "learning_rate": 1.8052504819133767e-05, + "loss": 0.5172, + "step": 13078 + }, + { + "epoch": 0.22599875587503457, + "grad_norm": 1.471647802670365, + "learning_rate": 1.805217297084688e-05, + "loss": 0.5785, + "step": 13079 + }, + { + "epoch": 0.22601603538844345, + "grad_norm": 1.1109310094967535, + "learning_rate": 1.8051841097339974e-05, + "loss": 0.562, + "step": 13080 + }, + { + "epoch": 0.22603331490185236, + "grad_norm": 0.518793521974812, + "learning_rate": 1.8051509198614087e-05, + "loss": 0.3901, + "step": 13081 + }, + { + "epoch": 0.22605059441526126, + "grad_norm": 0.6886426745958822, + "learning_rate": 1.8051177274670265e-05, + "loss": 0.5457, + "step": 13082 + }, + { + "epoch": 0.22606787392867017, + "grad_norm": 0.9962945800514417, + "learning_rate": 1.8050845325509544e-05, + "loss": 0.5443, + "step": 13083 + }, + { + "epoch": 0.22608515344207908, + "grad_norm": 1.0985708479987022, + "learning_rate": 1.805051335113296e-05, + "loss": 0.4955, + "step": 13084 + }, + { + "epoch": 0.226102432955488, + "grad_norm": 1.2214678578485962, + "learning_rate": 1.805018135154156e-05, + "loss": 0.7184, + "step": 13085 + }, + { + "epoch": 0.22611971246889687, + "grad_norm": 0.9430170646718986, + "learning_rate": 1.804984932673638e-05, + "loss": 0.694, + "step": 13086 + }, + { + "epoch": 0.22613699198230577, + "grad_norm": 0.45801041284100247, + "learning_rate": 1.804951727671846e-05, + "loss": 0.5078, + "step": 13087 + }, + { + "epoch": 0.22615427149571468, + "grad_norm": 0.9717688323163681, + "learning_rate": 1.804918520148884e-05, + "loss": 0.7134, + "step": 13088 + }, + { + "epoch": 0.2261715510091236, + "grad_norm": 0.8954812065038489, + "learning_rate": 1.8048853101048558e-05, + "loss": 0.5013, + "step": 13089 + }, + { + "epoch": 0.2261888305225325, + "grad_norm": 0.8289784272481682, + "learning_rate": 1.804852097539866e-05, + "loss": 0.6048, + "step": 13090 + }, + { + "epoch": 0.22620611003594138, + "grad_norm": 0.7115335868868334, + "learning_rate": 1.804818882454018e-05, + "loss": 0.6758, + "step": 13091 + }, + { + "epoch": 0.22622338954935028, + "grad_norm": 0.4246638874562209, + "learning_rate": 1.8047856648474164e-05, + "loss": 0.505, + "step": 13092 + }, + { + "epoch": 0.2262406690627592, + "grad_norm": 1.312739960133374, + "learning_rate": 1.8047524447201645e-05, + "loss": 0.682, + "step": 13093 + }, + { + "epoch": 0.2262579485761681, + "grad_norm": 1.2649064798169702, + "learning_rate": 1.804719222072367e-05, + "loss": 0.5602, + "step": 13094 + }, + { + "epoch": 0.226275228089577, + "grad_norm": 0.661792250479656, + "learning_rate": 1.8046859969041277e-05, + "loss": 0.4868, + "step": 13095 + }, + { + "epoch": 0.2262925076029859, + "grad_norm": 0.763607999657789, + "learning_rate": 1.804652769215551e-05, + "loss": 0.6126, + "step": 13096 + }, + { + "epoch": 0.2263097871163948, + "grad_norm": 0.37613783512544346, + "learning_rate": 1.80461953900674e-05, + "loss": 0.5052, + "step": 13097 + }, + { + "epoch": 0.2263270666298037, + "grad_norm": 0.8455129499696828, + "learning_rate": 1.8045863062778e-05, + "loss": 0.5067, + "step": 13098 + }, + { + "epoch": 0.2263443461432126, + "grad_norm": 0.9844090590605503, + "learning_rate": 1.8045530710288345e-05, + "loss": 0.4277, + "step": 13099 + }, + { + "epoch": 0.22636162565662152, + "grad_norm": 0.8640340581883547, + "learning_rate": 1.8045198332599473e-05, + "loss": 0.664, + "step": 13100 + }, + { + "epoch": 0.22637890517003043, + "grad_norm": 1.3531721250940218, + "learning_rate": 1.804486592971243e-05, + "loss": 0.7595, + "step": 13101 + }, + { + "epoch": 0.2263961846834393, + "grad_norm": 0.9230167256855031, + "learning_rate": 1.804453350162826e-05, + "loss": 0.5535, + "step": 13102 + }, + { + "epoch": 0.2264134641968482, + "grad_norm": 1.1036611833123948, + "learning_rate": 1.8044201048347995e-05, + "loss": 0.6047, + "step": 13103 + }, + { + "epoch": 0.22643074371025712, + "grad_norm": 0.9859465062719169, + "learning_rate": 1.8043868569872685e-05, + "loss": 0.5393, + "step": 13104 + }, + { + "epoch": 0.22644802322366603, + "grad_norm": 0.8761766555352019, + "learning_rate": 1.8043536066203363e-05, + "loss": 0.356, + "step": 13105 + }, + { + "epoch": 0.22646530273707494, + "grad_norm": 1.4112013399504424, + "learning_rate": 1.8043203537341075e-05, + "loss": 0.7389, + "step": 13106 + }, + { + "epoch": 0.22648258225048382, + "grad_norm": 1.2584503070837874, + "learning_rate": 1.8042870983286865e-05, + "loss": 0.8863, + "step": 13107 + }, + { + "epoch": 0.22649986176389272, + "grad_norm": 0.8267608644949196, + "learning_rate": 1.8042538404041766e-05, + "loss": 0.5088, + "step": 13108 + }, + { + "epoch": 0.22651714127730163, + "grad_norm": 1.5959729371982712, + "learning_rate": 1.8042205799606832e-05, + "loss": 0.8173, + "step": 13109 + }, + { + "epoch": 0.22653442079071054, + "grad_norm": 1.1980206633856867, + "learning_rate": 1.8041873169983097e-05, + "loss": 0.541, + "step": 13110 + }, + { + "epoch": 0.22655170030411945, + "grad_norm": 1.2302259058695624, + "learning_rate": 1.8041540515171604e-05, + "loss": 0.6164, + "step": 13111 + }, + { + "epoch": 0.22656897981752833, + "grad_norm": 1.2988577912150883, + "learning_rate": 1.8041207835173393e-05, + "loss": 0.5623, + "step": 13112 + }, + { + "epoch": 0.22658625933093723, + "grad_norm": 0.9905092383013407, + "learning_rate": 1.8040875129989512e-05, + "loss": 0.5104, + "step": 13113 + }, + { + "epoch": 0.22660353884434614, + "grad_norm": 0.7919014177218511, + "learning_rate": 1.8040542399620996e-05, + "loss": 0.5618, + "step": 13114 + }, + { + "epoch": 0.22662081835775505, + "grad_norm": 1.1709865083733424, + "learning_rate": 1.8040209644068886e-05, + "loss": 0.7328, + "step": 13115 + }, + { + "epoch": 0.22663809787116396, + "grad_norm": 0.816032757074883, + "learning_rate": 1.8039876863334233e-05, + "loss": 0.4807, + "step": 13116 + }, + { + "epoch": 0.22665537738457284, + "grad_norm": 0.719422151384506, + "learning_rate": 1.8039544057418076e-05, + "loss": 0.4325, + "step": 13117 + }, + { + "epoch": 0.22667265689798174, + "grad_norm": 0.55486892252633, + "learning_rate": 1.8039211226321452e-05, + "loss": 0.4417, + "step": 13118 + }, + { + "epoch": 0.22668993641139065, + "grad_norm": 1.0426927583353938, + "learning_rate": 1.8038878370045412e-05, + "loss": 0.5985, + "step": 13119 + }, + { + "epoch": 0.22670721592479956, + "grad_norm": 0.7845381925678605, + "learning_rate": 1.803854548859099e-05, + "loss": 0.5641, + "step": 13120 + }, + { + "epoch": 0.22672449543820847, + "grad_norm": 0.7360476093572252, + "learning_rate": 1.8038212581959237e-05, + "loss": 0.5694, + "step": 13121 + }, + { + "epoch": 0.22674177495161738, + "grad_norm": 1.121710949419178, + "learning_rate": 1.8037879650151185e-05, + "loss": 0.4827, + "step": 13122 + }, + { + "epoch": 0.22675905446502626, + "grad_norm": 3.011882444812052, + "learning_rate": 1.8037546693167888e-05, + "loss": 0.6168, + "step": 13123 + }, + { + "epoch": 0.22677633397843516, + "grad_norm": 1.371275160611332, + "learning_rate": 1.8037213711010386e-05, + "loss": 0.5679, + "step": 13124 + }, + { + "epoch": 0.22679361349184407, + "grad_norm": 0.9966885665022825, + "learning_rate": 1.8036880703679714e-05, + "loss": 0.5089, + "step": 13125 + }, + { + "epoch": 0.22681089300525298, + "grad_norm": 1.3891789797677185, + "learning_rate": 1.803654767117692e-05, + "loss": 0.401, + "step": 13126 + }, + { + "epoch": 0.22682817251866189, + "grad_norm": 1.1266396347993042, + "learning_rate": 1.8036214613503053e-05, + "loss": 0.6214, + "step": 13127 + }, + { + "epoch": 0.22684545203207077, + "grad_norm": 0.8760218307504131, + "learning_rate": 1.803588153065915e-05, + "loss": 0.6256, + "step": 13128 + }, + { + "epoch": 0.22686273154547967, + "grad_norm": 0.7638423179996229, + "learning_rate": 1.8035548422646254e-05, + "loss": 0.6194, + "step": 13129 + }, + { + "epoch": 0.22688001105888858, + "grad_norm": 0.969657339435446, + "learning_rate": 1.8035215289465413e-05, + "loss": 0.3102, + "step": 13130 + }, + { + "epoch": 0.2268972905722975, + "grad_norm": 0.4250888321201171, + "learning_rate": 1.8034882131117664e-05, + "loss": 0.5999, + "step": 13131 + }, + { + "epoch": 0.2269145700857064, + "grad_norm": 3.2293336737712797, + "learning_rate": 1.8034548947604055e-05, + "loss": 0.3394, + "step": 13132 + }, + { + "epoch": 0.22693184959911528, + "grad_norm": 0.9268770944802439, + "learning_rate": 1.8034215738925626e-05, + "loss": 0.3859, + "step": 13133 + }, + { + "epoch": 0.22694912911252418, + "grad_norm": 1.413602105757449, + "learning_rate": 1.8033882505083423e-05, + "loss": 0.6995, + "step": 13134 + }, + { + "epoch": 0.2269664086259331, + "grad_norm": 1.3594448745225152, + "learning_rate": 1.803354924607849e-05, + "loss": 0.5063, + "step": 13135 + }, + { + "epoch": 0.226983688139342, + "grad_norm": 0.7869042729628164, + "learning_rate": 1.8033215961911872e-05, + "loss": 0.4566, + "step": 13136 + }, + { + "epoch": 0.2270009676527509, + "grad_norm": 0.7222162173407987, + "learning_rate": 1.803288265258461e-05, + "loss": 0.3626, + "step": 13137 + }, + { + "epoch": 0.22701824716615981, + "grad_norm": 0.8187412552528891, + "learning_rate": 1.8032549318097746e-05, + "loss": 0.4425, + "step": 13138 + }, + { + "epoch": 0.2270355266795687, + "grad_norm": 1.3062904738964116, + "learning_rate": 1.803221595845233e-05, + "loss": 0.5596, + "step": 13139 + }, + { + "epoch": 0.2270528061929776, + "grad_norm": 1.5936540980935585, + "learning_rate": 1.8031882573649408e-05, + "loss": 0.4916, + "step": 13140 + }, + { + "epoch": 0.2270700857063865, + "grad_norm": 0.8851721976398004, + "learning_rate": 1.803154916369001e-05, + "loss": 0.5956, + "step": 13141 + }, + { + "epoch": 0.22708736521979542, + "grad_norm": 0.7743861292877549, + "learning_rate": 1.8031215728575192e-05, + "loss": 0.5006, + "step": 13142 + }, + { + "epoch": 0.22710464473320432, + "grad_norm": 1.1421060198952435, + "learning_rate": 1.8030882268306002e-05, + "loss": 0.6145, + "step": 13143 + }, + { + "epoch": 0.2271219242466132, + "grad_norm": 0.8460090471909304, + "learning_rate": 1.8030548782883474e-05, + "loss": 0.3523, + "step": 13144 + }, + { + "epoch": 0.2271392037600221, + "grad_norm": 1.1986298753652695, + "learning_rate": 1.8030215272308655e-05, + "loss": 0.7233, + "step": 13145 + }, + { + "epoch": 0.22715648327343102, + "grad_norm": 1.286656390825339, + "learning_rate": 1.8029881736582595e-05, + "loss": 0.5659, + "step": 13146 + }, + { + "epoch": 0.22717376278683993, + "grad_norm": 0.814014274921849, + "learning_rate": 1.802954817570633e-05, + "loss": 0.5334, + "step": 13147 + }, + { + "epoch": 0.22719104230024884, + "grad_norm": 1.4982139886999393, + "learning_rate": 1.8029214589680917e-05, + "loss": 0.5236, + "step": 13148 + }, + { + "epoch": 0.22720832181365772, + "grad_norm": 1.7516629845870004, + "learning_rate": 1.802888097850739e-05, + "loss": 0.7101, + "step": 13149 + }, + { + "epoch": 0.22722560132706662, + "grad_norm": 0.7224941301136553, + "learning_rate": 1.8028547342186803e-05, + "loss": 0.5851, + "step": 13150 + }, + { + "epoch": 0.22724288084047553, + "grad_norm": 0.5723951653772757, + "learning_rate": 1.8028213680720186e-05, + "loss": 0.6231, + "step": 13151 + }, + { + "epoch": 0.22726016035388444, + "grad_norm": 1.2558609323447083, + "learning_rate": 1.80278799941086e-05, + "loss": 0.5285, + "step": 13152 + }, + { + "epoch": 0.22727743986729335, + "grad_norm": 1.0111845548811484, + "learning_rate": 1.8027546282353082e-05, + "loss": 0.572, + "step": 13153 + }, + { + "epoch": 0.22729471938070223, + "grad_norm": 1.205587814815362, + "learning_rate": 1.802721254545468e-05, + "loss": 0.6383, + "step": 13154 + }, + { + "epoch": 0.22731199889411113, + "grad_norm": 1.4689513152883937, + "learning_rate": 1.8026878783414435e-05, + "loss": 0.6498, + "step": 13155 + }, + { + "epoch": 0.22732927840752004, + "grad_norm": 0.8797908584680272, + "learning_rate": 1.80265449962334e-05, + "loss": 0.4851, + "step": 13156 + }, + { + "epoch": 0.22734655792092895, + "grad_norm": 0.7352716396259333, + "learning_rate": 1.8026211183912612e-05, + "loss": 0.6127, + "step": 13157 + }, + { + "epoch": 0.22736383743433786, + "grad_norm": 0.981436508917084, + "learning_rate": 1.802587734645312e-05, + "loss": 0.4679, + "step": 13158 + }, + { + "epoch": 0.22738111694774676, + "grad_norm": 1.1675409110754045, + "learning_rate": 1.8025543483855977e-05, + "loss": 0.4944, + "step": 13159 + }, + { + "epoch": 0.22739839646115564, + "grad_norm": 1.0815320593095792, + "learning_rate": 1.8025209596122218e-05, + "loss": 0.595, + "step": 13160 + }, + { + "epoch": 0.22741567597456455, + "grad_norm": 1.166910610536576, + "learning_rate": 1.8024875683252895e-05, + "loss": 0.5828, + "step": 13161 + }, + { + "epoch": 0.22743295548797346, + "grad_norm": 1.1432392653038441, + "learning_rate": 1.8024541745249046e-05, + "loss": 0.4843, + "step": 13162 + }, + { + "epoch": 0.22745023500138237, + "grad_norm": 1.3286813081916642, + "learning_rate": 1.8024207782111726e-05, + "loss": 0.483, + "step": 13163 + }, + { + "epoch": 0.22746751451479127, + "grad_norm": 0.5440768686234025, + "learning_rate": 1.8023873793841977e-05, + "loss": 0.7474, + "step": 13164 + }, + { + "epoch": 0.22748479402820015, + "grad_norm": 1.041834628416562, + "learning_rate": 1.8023539780440844e-05, + "loss": 0.5668, + "step": 13165 + }, + { + "epoch": 0.22750207354160906, + "grad_norm": 1.1728178624697159, + "learning_rate": 1.8023205741909376e-05, + "loss": 0.7187, + "step": 13166 + }, + { + "epoch": 0.22751935305501797, + "grad_norm": 1.1119006603274975, + "learning_rate": 1.802287167824862e-05, + "loss": 0.5041, + "step": 13167 + }, + { + "epoch": 0.22753663256842688, + "grad_norm": 1.2645097501469662, + "learning_rate": 1.8022537589459617e-05, + "loss": 0.5172, + "step": 13168 + }, + { + "epoch": 0.22755391208183579, + "grad_norm": 1.4402239289227303, + "learning_rate": 1.8022203475543416e-05, + "loss": 0.454, + "step": 13169 + }, + { + "epoch": 0.22757119159524467, + "grad_norm": 1.0765003741624164, + "learning_rate": 1.8021869336501066e-05, + "loss": 0.5725, + "step": 13170 + }, + { + "epoch": 0.22758847110865357, + "grad_norm": 0.8580627803070309, + "learning_rate": 1.802153517233361e-05, + "loss": 0.5274, + "step": 13171 + }, + { + "epoch": 0.22760575062206248, + "grad_norm": 0.8675592932198873, + "learning_rate": 1.8021200983042098e-05, + "loss": 0.533, + "step": 13172 + }, + { + "epoch": 0.2276230301354714, + "grad_norm": 1.2543533693845665, + "learning_rate": 1.8020866768627575e-05, + "loss": 0.6565, + "step": 13173 + }, + { + "epoch": 0.2276403096488803, + "grad_norm": 1.1191823963891534, + "learning_rate": 1.8020532529091086e-05, + "loss": 0.5495, + "step": 13174 + }, + { + "epoch": 0.2276575891622892, + "grad_norm": 0.7183307070186709, + "learning_rate": 1.8020198264433683e-05, + "loss": 0.4871, + "step": 13175 + }, + { + "epoch": 0.22767486867569808, + "grad_norm": 0.5951464065318014, + "learning_rate": 1.8019863974656403e-05, + "loss": 0.7321, + "step": 13176 + }, + { + "epoch": 0.227692148189107, + "grad_norm": 1.44602701736756, + "learning_rate": 1.8019529659760304e-05, + "loss": 0.5322, + "step": 13177 + }, + { + "epoch": 0.2277094277025159, + "grad_norm": 0.7715200866282652, + "learning_rate": 1.8019195319746428e-05, + "loss": 0.4622, + "step": 13178 + }, + { + "epoch": 0.2277267072159248, + "grad_norm": 0.7921755890330755, + "learning_rate": 1.8018860954615823e-05, + "loss": 0.5529, + "step": 13179 + }, + { + "epoch": 0.22774398672933371, + "grad_norm": 1.0343907249450948, + "learning_rate": 1.801852656436954e-05, + "loss": 0.5243, + "step": 13180 + }, + { + "epoch": 0.2277612662427426, + "grad_norm": 0.826888480711513, + "learning_rate": 1.8018192149008618e-05, + "loss": 0.449, + "step": 13181 + }, + { + "epoch": 0.2277785457561515, + "grad_norm": 0.9676388878998228, + "learning_rate": 1.8017857708534107e-05, + "loss": 0.5607, + "step": 13182 + }, + { + "epoch": 0.2277958252695604, + "grad_norm": 1.2351870646197325, + "learning_rate": 1.801752324294706e-05, + "loss": 0.5016, + "step": 13183 + }, + { + "epoch": 0.22781310478296932, + "grad_norm": 1.3423240046394749, + "learning_rate": 1.801718875224852e-05, + "loss": 0.6747, + "step": 13184 + }, + { + "epoch": 0.22783038429637822, + "grad_norm": 1.1027396104055118, + "learning_rate": 1.8016854236439535e-05, + "loss": 0.6685, + "step": 13185 + }, + { + "epoch": 0.2278476638097871, + "grad_norm": 1.4880578564643308, + "learning_rate": 1.8016519695521153e-05, + "loss": 0.4153, + "step": 13186 + }, + { + "epoch": 0.227864943323196, + "grad_norm": 0.9616740003769243, + "learning_rate": 1.8016185129494425e-05, + "loss": 0.6311, + "step": 13187 + }, + { + "epoch": 0.22788222283660492, + "grad_norm": 1.4185236985286094, + "learning_rate": 1.801585053836039e-05, + "loss": 0.5445, + "step": 13188 + }, + { + "epoch": 0.22789950235001383, + "grad_norm": 1.0990195537617062, + "learning_rate": 1.8015515922120106e-05, + "loss": 0.6047, + "step": 13189 + }, + { + "epoch": 0.22791678186342273, + "grad_norm": 0.893172288856475, + "learning_rate": 1.8015181280774618e-05, + "loss": 0.6576, + "step": 13190 + }, + { + "epoch": 0.22793406137683161, + "grad_norm": 1.6749839656710914, + "learning_rate": 1.801484661432497e-05, + "loss": 0.5123, + "step": 13191 + }, + { + "epoch": 0.22795134089024052, + "grad_norm": 0.9919827531895956, + "learning_rate": 1.8014511922772216e-05, + "loss": 0.3694, + "step": 13192 + }, + { + "epoch": 0.22796862040364943, + "grad_norm": 0.7493137151889827, + "learning_rate": 1.80141772061174e-05, + "loss": 0.369, + "step": 13193 + }, + { + "epoch": 0.22798589991705834, + "grad_norm": 0.7553749029142797, + "learning_rate": 1.801384246436157e-05, + "loss": 0.4813, + "step": 13194 + }, + { + "epoch": 0.22800317943046725, + "grad_norm": 1.0303632888920384, + "learning_rate": 1.801350769750578e-05, + "loss": 0.6139, + "step": 13195 + }, + { + "epoch": 0.22802045894387615, + "grad_norm": 1.2121409538593304, + "learning_rate": 1.801317290555107e-05, + "loss": 0.4998, + "step": 13196 + }, + { + "epoch": 0.22803773845728503, + "grad_norm": 0.8194144556423424, + "learning_rate": 1.8012838088498502e-05, + "loss": 0.556, + "step": 13197 + }, + { + "epoch": 0.22805501797069394, + "grad_norm": 0.760378434307342, + "learning_rate": 1.801250324634911e-05, + "loss": 0.4683, + "step": 13198 + }, + { + "epoch": 0.22807229748410285, + "grad_norm": 1.0463552509925262, + "learning_rate": 1.801216837910395e-05, + "loss": 0.5181, + "step": 13199 + }, + { + "epoch": 0.22808957699751176, + "grad_norm": 0.8890919807988688, + "learning_rate": 1.801183348676407e-05, + "loss": 0.5413, + "step": 13200 + }, + { + "epoch": 0.22810685651092066, + "grad_norm": 0.657599456369669, + "learning_rate": 1.801149856933052e-05, + "loss": 0.4259, + "step": 13201 + }, + { + "epoch": 0.22812413602432954, + "grad_norm": 0.670643022643907, + "learning_rate": 1.8011163626804347e-05, + "loss": 0.5758, + "step": 13202 + }, + { + "epoch": 0.22814141553773845, + "grad_norm": 0.8954568945520494, + "learning_rate": 1.80108286591866e-05, + "loss": 0.6694, + "step": 13203 + }, + { + "epoch": 0.22815869505114736, + "grad_norm": 1.2879316037574962, + "learning_rate": 1.8010493666478327e-05, + "loss": 0.5719, + "step": 13204 + }, + { + "epoch": 0.22817597456455627, + "grad_norm": 1.1930264360823994, + "learning_rate": 1.801015864868058e-05, + "loss": 0.6568, + "step": 13205 + }, + { + "epoch": 0.22819325407796517, + "grad_norm": 0.69453290345546, + "learning_rate": 1.800982360579441e-05, + "loss": 0.5957, + "step": 13206 + }, + { + "epoch": 0.22821053359137405, + "grad_norm": 0.9966048947686005, + "learning_rate": 1.800948853782086e-05, + "loss": 0.4819, + "step": 13207 + }, + { + "epoch": 0.22822781310478296, + "grad_norm": 0.539419533024076, + "learning_rate": 1.8009153444760987e-05, + "loss": 0.481, + "step": 13208 + }, + { + "epoch": 0.22824509261819187, + "grad_norm": 1.0075684056053635, + "learning_rate": 1.8008818326615832e-05, + "loss": 0.5306, + "step": 13209 + }, + { + "epoch": 0.22826237213160078, + "grad_norm": 1.2286108762761585, + "learning_rate": 1.800848318338645e-05, + "loss": 0.8673, + "step": 13210 + }, + { + "epoch": 0.22827965164500968, + "grad_norm": 1.0973710144184101, + "learning_rate": 1.8008148015073892e-05, + "loss": 0.5641, + "step": 13211 + }, + { + "epoch": 0.2282969311584186, + "grad_norm": 0.438609841591915, + "learning_rate": 1.8007812821679208e-05, + "loss": 0.6276, + "step": 13212 + }, + { + "epoch": 0.22831421067182747, + "grad_norm": 0.7656678763773666, + "learning_rate": 1.8007477603203445e-05, + "loss": 0.5366, + "step": 13213 + }, + { + "epoch": 0.22833149018523638, + "grad_norm": 0.9766491714294983, + "learning_rate": 1.8007142359647652e-05, + "loss": 0.4546, + "step": 13214 + }, + { + "epoch": 0.2283487696986453, + "grad_norm": 1.2111210106008374, + "learning_rate": 1.800680709101288e-05, + "loss": 0.495, + "step": 13215 + }, + { + "epoch": 0.2283660492120542, + "grad_norm": 0.5058780471412088, + "learning_rate": 1.8006471797300182e-05, + "loss": 0.6041, + "step": 13216 + }, + { + "epoch": 0.2283833287254631, + "grad_norm": 0.895013136970317, + "learning_rate": 1.80061364785106e-05, + "loss": 0.5251, + "step": 13217 + }, + { + "epoch": 0.22840060823887198, + "grad_norm": 0.8399827384826365, + "learning_rate": 1.8005801134645197e-05, + "loss": 0.4034, + "step": 13218 + }, + { + "epoch": 0.2284178877522809, + "grad_norm": 1.2004294990721134, + "learning_rate": 1.8005465765705014e-05, + "loss": 0.6409, + "step": 13219 + }, + { + "epoch": 0.2284351672656898, + "grad_norm": 1.571816707504039, + "learning_rate": 1.8005130371691105e-05, + "loss": 0.5255, + "step": 13220 + }, + { + "epoch": 0.2284524467790987, + "grad_norm": 0.9681111661016916, + "learning_rate": 1.800479495260452e-05, + "loss": 0.67, + "step": 13221 + }, + { + "epoch": 0.2284697262925076, + "grad_norm": 0.825678793300528, + "learning_rate": 1.8004459508446307e-05, + "loss": 0.5159, + "step": 13222 + }, + { + "epoch": 0.2284870058059165, + "grad_norm": 1.1457620000366386, + "learning_rate": 1.800412403921752e-05, + "loss": 0.6524, + "step": 13223 + }, + { + "epoch": 0.2285042853193254, + "grad_norm": 0.7406706906795048, + "learning_rate": 1.80037885449192e-05, + "loss": 0.52, + "step": 13224 + }, + { + "epoch": 0.2285215648327343, + "grad_norm": 0.7295864908463439, + "learning_rate": 1.8003453025552415e-05, + "loss": 0.612, + "step": 13225 + }, + { + "epoch": 0.22853884434614322, + "grad_norm": 1.2089195776430401, + "learning_rate": 1.8003117481118204e-05, + "loss": 0.3825, + "step": 13226 + }, + { + "epoch": 0.22855612385955212, + "grad_norm": 0.9789755387128432, + "learning_rate": 1.800278191161762e-05, + "loss": 0.5567, + "step": 13227 + }, + { + "epoch": 0.228573403372961, + "grad_norm": 1.0593876365219845, + "learning_rate": 1.800244631705172e-05, + "loss": 0.6925, + "step": 13228 + }, + { + "epoch": 0.2285906828863699, + "grad_norm": 1.2814823679502303, + "learning_rate": 1.8002110697421544e-05, + "loss": 0.4002, + "step": 13229 + }, + { + "epoch": 0.22860796239977882, + "grad_norm": 1.4532997833497587, + "learning_rate": 1.8001775052728153e-05, + "loss": 0.6179, + "step": 13230 + }, + { + "epoch": 0.22862524191318773, + "grad_norm": 0.8301814627817812, + "learning_rate": 1.800143938297259e-05, + "loss": 0.4795, + "step": 13231 + }, + { + "epoch": 0.22864252142659663, + "grad_norm": 1.1666602927923764, + "learning_rate": 1.8001103688155917e-05, + "loss": 0.7002, + "step": 13232 + }, + { + "epoch": 0.22865980094000554, + "grad_norm": 1.110601978128734, + "learning_rate": 1.8000767968279176e-05, + "loss": 0.6728, + "step": 13233 + }, + { + "epoch": 0.22867708045341442, + "grad_norm": 1.356463452884251, + "learning_rate": 1.800043222334342e-05, + "loss": 0.5487, + "step": 13234 + }, + { + "epoch": 0.22869435996682333, + "grad_norm": 0.7807678823538095, + "learning_rate": 1.8000096453349702e-05, + "loss": 0.3085, + "step": 13235 + }, + { + "epoch": 0.22871163948023224, + "grad_norm": 0.7986285208035471, + "learning_rate": 1.799976065829908e-05, + "loss": 0.5256, + "step": 13236 + }, + { + "epoch": 0.22872891899364114, + "grad_norm": 1.0107460594022177, + "learning_rate": 1.799942483819259e-05, + "loss": 0.3425, + "step": 13237 + }, + { + "epoch": 0.22874619850705005, + "grad_norm": 0.7522531662747178, + "learning_rate": 1.7999088993031302e-05, + "loss": 0.4403, + "step": 13238 + }, + { + "epoch": 0.22876347802045893, + "grad_norm": 1.5575974635325291, + "learning_rate": 1.7998753122816256e-05, + "loss": 0.519, + "step": 13239 + }, + { + "epoch": 0.22878075753386784, + "grad_norm": 0.830062115362034, + "learning_rate": 1.7998417227548507e-05, + "loss": 0.8118, + "step": 13240 + }, + { + "epoch": 0.22879803704727675, + "grad_norm": 1.6333303249825621, + "learning_rate": 1.799808130722911e-05, + "loss": 0.5373, + "step": 13241 + }, + { + "epoch": 0.22881531656068566, + "grad_norm": 1.0568739158395142, + "learning_rate": 1.799774536185911e-05, + "loss": 0.515, + "step": 13242 + }, + { + "epoch": 0.22883259607409456, + "grad_norm": 1.358550996952437, + "learning_rate": 1.799740939143957e-05, + "loss": 0.6111, + "step": 13243 + }, + { + "epoch": 0.22884987558750344, + "grad_norm": 0.9907500151058073, + "learning_rate": 1.799707339597153e-05, + "loss": 0.4366, + "step": 13244 + }, + { + "epoch": 0.22886715510091235, + "grad_norm": 0.8620434767381316, + "learning_rate": 1.799673737545605e-05, + "loss": 0.452, + "step": 13245 + }, + { + "epoch": 0.22888443461432126, + "grad_norm": 0.7859542842483783, + "learning_rate": 1.7996401329894178e-05, + "loss": 0.4927, + "step": 13246 + }, + { + "epoch": 0.22890171412773017, + "grad_norm": 0.8081885298464361, + "learning_rate": 1.7996065259286973e-05, + "loss": 0.7173, + "step": 13247 + }, + { + "epoch": 0.22891899364113907, + "grad_norm": 1.2145399795660403, + "learning_rate": 1.7995729163635487e-05, + "loss": 0.5026, + "step": 13248 + }, + { + "epoch": 0.22893627315454798, + "grad_norm": 1.0588533055867895, + "learning_rate": 1.7995393042940767e-05, + "loss": 0.6318, + "step": 13249 + }, + { + "epoch": 0.22895355266795686, + "grad_norm": 0.9091410518902817, + "learning_rate": 1.7995056897203867e-05, + "loss": 0.5129, + "step": 13250 + }, + { + "epoch": 0.22897083218136577, + "grad_norm": 1.1510792504718403, + "learning_rate": 1.7994720726425837e-05, + "loss": 0.5581, + "step": 13251 + }, + { + "epoch": 0.22898811169477468, + "grad_norm": 1.08363380986893, + "learning_rate": 1.799438453060774e-05, + "loss": 0.6078, + "step": 13252 + }, + { + "epoch": 0.22900539120818358, + "grad_norm": 1.262227131934332, + "learning_rate": 1.7994048309750623e-05, + "loss": 0.3811, + "step": 13253 + }, + { + "epoch": 0.2290226707215925, + "grad_norm": 0.9785045485067326, + "learning_rate": 1.799371206385554e-05, + "loss": 0.6881, + "step": 13254 + }, + { + "epoch": 0.22903995023500137, + "grad_norm": 1.171225175818238, + "learning_rate": 1.799337579292354e-05, + "loss": 0.5791, + "step": 13255 + }, + { + "epoch": 0.22905722974841028, + "grad_norm": 1.1899420378528087, + "learning_rate": 1.7993039496955682e-05, + "loss": 0.5466, + "step": 13256 + }, + { + "epoch": 0.2290745092618192, + "grad_norm": 0.8714608334927818, + "learning_rate": 1.7992703175953015e-05, + "loss": 0.5685, + "step": 13257 + }, + { + "epoch": 0.2290917887752281, + "grad_norm": 1.0295799369573744, + "learning_rate": 1.7992366829916594e-05, + "loss": 0.3783, + "step": 13258 + }, + { + "epoch": 0.229109068288637, + "grad_norm": 0.8545220735390409, + "learning_rate": 1.7992030458847477e-05, + "loss": 0.583, + "step": 13259 + }, + { + "epoch": 0.22912634780204588, + "grad_norm": 1.0742860698385182, + "learning_rate": 1.7991694062746706e-05, + "loss": 0.4497, + "step": 13260 + }, + { + "epoch": 0.2291436273154548, + "grad_norm": 1.3143305342101481, + "learning_rate": 1.799135764161535e-05, + "loss": 0.4719, + "step": 13261 + }, + { + "epoch": 0.2291609068288637, + "grad_norm": 1.921179581845693, + "learning_rate": 1.7991021195454447e-05, + "loss": 0.4998, + "step": 13262 + }, + { + "epoch": 0.2291781863422726, + "grad_norm": 1.2490398009518575, + "learning_rate": 1.7990684724265062e-05, + "loss": 0.5929, + "step": 13263 + }, + { + "epoch": 0.2291954658556815, + "grad_norm": 0.9532971040041304, + "learning_rate": 1.7990348228048247e-05, + "loss": 0.3583, + "step": 13264 + }, + { + "epoch": 0.2292127453690904, + "grad_norm": 0.8458097086482306, + "learning_rate": 1.799001170680505e-05, + "loss": 0.5171, + "step": 13265 + }, + { + "epoch": 0.2292300248824993, + "grad_norm": 0.8052067672781275, + "learning_rate": 1.798967516053653e-05, + "loss": 0.5262, + "step": 13266 + }, + { + "epoch": 0.2292473043959082, + "grad_norm": 1.2049621063938831, + "learning_rate": 1.7989338589243743e-05, + "loss": 0.6164, + "step": 13267 + }, + { + "epoch": 0.22926458390931712, + "grad_norm": 0.9208477694546386, + "learning_rate": 1.7989001992927738e-05, + "loss": 0.6572, + "step": 13268 + }, + { + "epoch": 0.22928186342272602, + "grad_norm": 1.0053537136565598, + "learning_rate": 1.7988665371589573e-05, + "loss": 0.5063, + "step": 13269 + }, + { + "epoch": 0.22929914293613493, + "grad_norm": 1.0339694518675828, + "learning_rate": 1.79883287252303e-05, + "loss": 0.6013, + "step": 13270 + }, + { + "epoch": 0.2293164224495438, + "grad_norm": 1.0827240299764873, + "learning_rate": 1.7987992053850973e-05, + "loss": 0.5707, + "step": 13271 + }, + { + "epoch": 0.22933370196295272, + "grad_norm": 0.9025856522755242, + "learning_rate": 1.798765535745265e-05, + "loss": 0.4114, + "step": 13272 + }, + { + "epoch": 0.22935098147636163, + "grad_norm": 1.6652219667695378, + "learning_rate": 1.7987318636036382e-05, + "loss": 0.5804, + "step": 13273 + }, + { + "epoch": 0.22936826098977053, + "grad_norm": 0.8249900433290102, + "learning_rate": 1.7986981889603224e-05, + "loss": 0.5655, + "step": 13274 + }, + { + "epoch": 0.22938554050317944, + "grad_norm": 1.1010666672821556, + "learning_rate": 1.7986645118154237e-05, + "loss": 0.6136, + "step": 13275 + }, + { + "epoch": 0.22940282001658832, + "grad_norm": 0.7333620557431327, + "learning_rate": 1.7986308321690465e-05, + "loss": 0.7143, + "step": 13276 + }, + { + "epoch": 0.22942009952999723, + "grad_norm": 1.4036927793689458, + "learning_rate": 1.798597150021297e-05, + "loss": 0.6792, + "step": 13277 + }, + { + "epoch": 0.22943737904340614, + "grad_norm": 1.1494505863822335, + "learning_rate": 1.7985634653722806e-05, + "loss": 0.496, + "step": 13278 + }, + { + "epoch": 0.22945465855681504, + "grad_norm": 1.3831408623915966, + "learning_rate": 1.7985297782221026e-05, + "loss": 0.6961, + "step": 13279 + }, + { + "epoch": 0.22947193807022395, + "grad_norm": 1.0113570449074512, + "learning_rate": 1.798496088570869e-05, + "loss": 0.5937, + "step": 13280 + }, + { + "epoch": 0.22948921758363283, + "grad_norm": 1.4869593162029762, + "learning_rate": 1.7984623964186847e-05, + "loss": 0.6586, + "step": 13281 + }, + { + "epoch": 0.22950649709704174, + "grad_norm": 1.3330788380847607, + "learning_rate": 1.7984287017656555e-05, + "loss": 0.563, + "step": 13282 + }, + { + "epoch": 0.22952377661045065, + "grad_norm": 1.4169136547610635, + "learning_rate": 1.798395004611887e-05, + "loss": 0.6002, + "step": 13283 + }, + { + "epoch": 0.22954105612385955, + "grad_norm": 0.9237352226500819, + "learning_rate": 1.7983613049574847e-05, + "loss": 0.6549, + "step": 13284 + }, + { + "epoch": 0.22955833563726846, + "grad_norm": 1.0449095410052331, + "learning_rate": 1.7983276028025537e-05, + "loss": 0.4341, + "step": 13285 + }, + { + "epoch": 0.22957561515067737, + "grad_norm": 1.350148763170627, + "learning_rate": 1.7982938981472005e-05, + "loss": 0.4734, + "step": 13286 + }, + { + "epoch": 0.22959289466408625, + "grad_norm": 1.1778331535475268, + "learning_rate": 1.79826019099153e-05, + "loss": 0.5413, + "step": 13287 + }, + { + "epoch": 0.22961017417749516, + "grad_norm": 0.3722882910776771, + "learning_rate": 1.798226481335648e-05, + "loss": 0.4659, + "step": 13288 + }, + { + "epoch": 0.22962745369090407, + "grad_norm": 0.8234359678522757, + "learning_rate": 1.7981927691796598e-05, + "loss": 0.4713, + "step": 13289 + }, + { + "epoch": 0.22964473320431297, + "grad_norm": 1.2161274571241627, + "learning_rate": 1.798159054523671e-05, + "loss": 0.7212, + "step": 13290 + }, + { + "epoch": 0.22966201271772188, + "grad_norm": 0.8623031320744156, + "learning_rate": 1.798125337367788e-05, + "loss": 0.5433, + "step": 13291 + }, + { + "epoch": 0.22967929223113076, + "grad_norm": 1.180583316295919, + "learning_rate": 1.7980916177121152e-05, + "loss": 0.6818, + "step": 13292 + }, + { + "epoch": 0.22969657174453967, + "grad_norm": 0.8105733666174026, + "learning_rate": 1.798057895556759e-05, + "loss": 0.3545, + "step": 13293 + }, + { + "epoch": 0.22971385125794858, + "grad_norm": 0.9520049622900264, + "learning_rate": 1.798024170901825e-05, + "loss": 0.6668, + "step": 13294 + }, + { + "epoch": 0.22973113077135748, + "grad_norm": 0.8967373529825909, + "learning_rate": 1.7979904437474185e-05, + "loss": 0.4957, + "step": 13295 + }, + { + "epoch": 0.2297484102847664, + "grad_norm": 0.6810598907889306, + "learning_rate": 1.7979567140936453e-05, + "loss": 0.5995, + "step": 13296 + }, + { + "epoch": 0.22976568979817527, + "grad_norm": 0.42154592141435965, + "learning_rate": 1.797922981940611e-05, + "loss": 0.8046, + "step": 13297 + }, + { + "epoch": 0.22978296931158418, + "grad_norm": 1.239751568036535, + "learning_rate": 1.7978892472884213e-05, + "loss": 0.6495, + "step": 13298 + }, + { + "epoch": 0.2298002488249931, + "grad_norm": 1.1421706575348227, + "learning_rate": 1.7978555101371817e-05, + "loss": 0.6509, + "step": 13299 + }, + { + "epoch": 0.229817528338402, + "grad_norm": 1.019747403176936, + "learning_rate": 1.7978217704869983e-05, + "loss": 0.5564, + "step": 13300 + }, + { + "epoch": 0.2298348078518109, + "grad_norm": 1.283648778985201, + "learning_rate": 1.7977880283379763e-05, + "loss": 0.5461, + "step": 13301 + }, + { + "epoch": 0.22985208736521978, + "grad_norm": 1.030531177830489, + "learning_rate": 1.7977542836902217e-05, + "loss": 0.5663, + "step": 13302 + }, + { + "epoch": 0.2298693668786287, + "grad_norm": 1.2184616880792611, + "learning_rate": 1.7977205365438397e-05, + "loss": 0.7105, + "step": 13303 + }, + { + "epoch": 0.2298866463920376, + "grad_norm": 1.1435791211634139, + "learning_rate": 1.7976867868989366e-05, + "loss": 0.4842, + "step": 13304 + }, + { + "epoch": 0.2299039259054465, + "grad_norm": 0.9530130384361082, + "learning_rate": 1.797653034755618e-05, + "loss": 0.5017, + "step": 13305 + }, + { + "epoch": 0.2299212054188554, + "grad_norm": 1.2161839997043342, + "learning_rate": 1.7976192801139893e-05, + "loss": 0.5342, + "step": 13306 + }, + { + "epoch": 0.22993848493226432, + "grad_norm": 1.1634423995783134, + "learning_rate": 1.7975855229741565e-05, + "loss": 0.6201, + "step": 13307 + }, + { + "epoch": 0.2299557644456732, + "grad_norm": 1.3381994173749245, + "learning_rate": 1.797551763336225e-05, + "loss": 0.5756, + "step": 13308 + }, + { + "epoch": 0.2299730439590821, + "grad_norm": 1.2493820524354717, + "learning_rate": 1.797518001200301e-05, + "loss": 0.6009, + "step": 13309 + }, + { + "epoch": 0.22999032347249102, + "grad_norm": 1.010474578063057, + "learning_rate": 1.79748423656649e-05, + "loss": 0.4929, + "step": 13310 + }, + { + "epoch": 0.23000760298589992, + "grad_norm": 1.3061332140270394, + "learning_rate": 1.7974504694348977e-05, + "loss": 0.4708, + "step": 13311 + }, + { + "epoch": 0.23002488249930883, + "grad_norm": 1.0685311329229927, + "learning_rate": 1.7974166998056298e-05, + "loss": 0.4494, + "step": 13312 + }, + { + "epoch": 0.2300421620127177, + "grad_norm": 1.3066618069414142, + "learning_rate": 1.7973829276787924e-05, + "loss": 0.8305, + "step": 13313 + }, + { + "epoch": 0.23005944152612662, + "grad_norm": 0.8667567402521688, + "learning_rate": 1.797349153054491e-05, + "loss": 0.4161, + "step": 13314 + }, + { + "epoch": 0.23007672103953553, + "grad_norm": 1.6310761829288203, + "learning_rate": 1.7973153759328314e-05, + "loss": 0.5192, + "step": 13315 + }, + { + "epoch": 0.23009400055294443, + "grad_norm": 0.7440788008015641, + "learning_rate": 1.7972815963139195e-05, + "loss": 0.4913, + "step": 13316 + }, + { + "epoch": 0.23011128006635334, + "grad_norm": 1.2908199575303925, + "learning_rate": 1.7972478141978613e-05, + "loss": 0.5712, + "step": 13317 + }, + { + "epoch": 0.23012855957976222, + "grad_norm": 0.8667913492091909, + "learning_rate": 1.797214029584762e-05, + "loss": 0.4503, + "step": 13318 + }, + { + "epoch": 0.23014583909317113, + "grad_norm": 0.41992065473117285, + "learning_rate": 1.797180242474728e-05, + "loss": 0.6129, + "step": 13319 + }, + { + "epoch": 0.23016311860658004, + "grad_norm": 0.9671119702893062, + "learning_rate": 1.7971464528678647e-05, + "loss": 0.6303, + "step": 13320 + }, + { + "epoch": 0.23018039811998894, + "grad_norm": 0.7782737124120798, + "learning_rate": 1.797112660764278e-05, + "loss": 0.4584, + "step": 13321 + }, + { + "epoch": 0.23019767763339785, + "grad_norm": 1.1695995323303168, + "learning_rate": 1.7970788661640743e-05, + "loss": 0.3993, + "step": 13322 + }, + { + "epoch": 0.23021495714680676, + "grad_norm": 0.38762292582201435, + "learning_rate": 1.797045069067359e-05, + "loss": 0.5838, + "step": 13323 + }, + { + "epoch": 0.23023223666021564, + "grad_norm": 0.7446383958892023, + "learning_rate": 1.7970112694742375e-05, + "loss": 0.5676, + "step": 13324 + }, + { + "epoch": 0.23024951617362455, + "grad_norm": 1.2359799003718221, + "learning_rate": 1.7969774673848165e-05, + "loss": 0.6076, + "step": 13325 + }, + { + "epoch": 0.23026679568703345, + "grad_norm": 0.6978667424839308, + "learning_rate": 1.7969436627992012e-05, + "loss": 0.5312, + "step": 13326 + }, + { + "epoch": 0.23028407520044236, + "grad_norm": 1.477287846857098, + "learning_rate": 1.796909855717498e-05, + "loss": 0.6653, + "step": 13327 + }, + { + "epoch": 0.23030135471385127, + "grad_norm": 1.2710950430785655, + "learning_rate": 1.796876046139813e-05, + "loss": 0.4523, + "step": 13328 + }, + { + "epoch": 0.23031863422726015, + "grad_norm": 0.6483977062053631, + "learning_rate": 1.796842234066251e-05, + "loss": 0.3313, + "step": 13329 + }, + { + "epoch": 0.23033591374066906, + "grad_norm": 1.1301321900483392, + "learning_rate": 1.7968084194969188e-05, + "loss": 0.5628, + "step": 13330 + }, + { + "epoch": 0.23035319325407796, + "grad_norm": 1.5085188597150143, + "learning_rate": 1.796774602431922e-05, + "loss": 0.4985, + "step": 13331 + }, + { + "epoch": 0.23037047276748687, + "grad_norm": 0.9029620233573813, + "learning_rate": 1.7967407828713667e-05, + "loss": 0.5544, + "step": 13332 + }, + { + "epoch": 0.23038775228089578, + "grad_norm": 0.7897365610904747, + "learning_rate": 1.7967069608153584e-05, + "loss": 0.5195, + "step": 13333 + }, + { + "epoch": 0.23040503179430466, + "grad_norm": 0.9149898195255197, + "learning_rate": 1.7966731362640036e-05, + "loss": 0.4201, + "step": 13334 + }, + { + "epoch": 0.23042231130771357, + "grad_norm": 0.8672989690981469, + "learning_rate": 1.796639309217408e-05, + "loss": 0.5274, + "step": 13335 + }, + { + "epoch": 0.23043959082112248, + "grad_norm": 1.3136943757445223, + "learning_rate": 1.7966054796756776e-05, + "loss": 0.59, + "step": 13336 + }, + { + "epoch": 0.23045687033453138, + "grad_norm": 1.6980975916283108, + "learning_rate": 1.7965716476389182e-05, + "loss": 0.6461, + "step": 13337 + }, + { + "epoch": 0.2304741498479403, + "grad_norm": 1.0454306180536506, + "learning_rate": 1.796537813107236e-05, + "loss": 0.6289, + "step": 13338 + }, + { + "epoch": 0.2304914293613492, + "grad_norm": 1.6664394422431228, + "learning_rate": 1.7965039760807367e-05, + "loss": 0.4623, + "step": 13339 + }, + { + "epoch": 0.23050870887475808, + "grad_norm": 0.6342755980136434, + "learning_rate": 1.7964701365595267e-05, + "loss": 0.3976, + "step": 13340 + }, + { + "epoch": 0.23052598838816699, + "grad_norm": 1.0541840817489179, + "learning_rate": 1.796436294543711e-05, + "loss": 0.5637, + "step": 13341 + }, + { + "epoch": 0.2305432679015759, + "grad_norm": 0.7987821844587283, + "learning_rate": 1.796402450033397e-05, + "loss": 0.45, + "step": 13342 + }, + { + "epoch": 0.2305605474149848, + "grad_norm": 1.5869415245412868, + "learning_rate": 1.7963686030286897e-05, + "loss": 0.6239, + "step": 13343 + }, + { + "epoch": 0.2305778269283937, + "grad_norm": 1.3196791931287304, + "learning_rate": 1.7963347535296955e-05, + "loss": 0.6993, + "step": 13344 + }, + { + "epoch": 0.2305951064418026, + "grad_norm": 1.1705591425861301, + "learning_rate": 1.7963009015365203e-05, + "loss": 0.621, + "step": 13345 + }, + { + "epoch": 0.2306123859552115, + "grad_norm": 0.39937467729142945, + "learning_rate": 1.7962670470492702e-05, + "loss": 0.6191, + "step": 13346 + }, + { + "epoch": 0.2306296654686204, + "grad_norm": 0.7692852900643955, + "learning_rate": 1.796233190068051e-05, + "loss": 0.4778, + "step": 13347 + }, + { + "epoch": 0.2306469449820293, + "grad_norm": 1.135177648912212, + "learning_rate": 1.7961993305929692e-05, + "loss": 0.5092, + "step": 13348 + }, + { + "epoch": 0.23066422449543822, + "grad_norm": 0.8009313531777366, + "learning_rate": 1.7961654686241305e-05, + "loss": 0.3552, + "step": 13349 + }, + { + "epoch": 0.2306815040088471, + "grad_norm": 0.6674226151162613, + "learning_rate": 1.7961316041616413e-05, + "loss": 0.2599, + "step": 13350 + }, + { + "epoch": 0.230698783522256, + "grad_norm": 1.169430730532712, + "learning_rate": 1.796097737205607e-05, + "loss": 0.4736, + "step": 13351 + }, + { + "epoch": 0.23071606303566491, + "grad_norm": 0.9623664258412492, + "learning_rate": 1.7960638677561342e-05, + "loss": 0.5102, + "step": 13352 + }, + { + "epoch": 0.23073334254907382, + "grad_norm": 0.977334411885055, + "learning_rate": 1.796029995813329e-05, + "loss": 0.4977, + "step": 13353 + }, + { + "epoch": 0.23075062206248273, + "grad_norm": 1.1406562944389367, + "learning_rate": 1.7959961213772973e-05, + "loss": 0.4541, + "step": 13354 + }, + { + "epoch": 0.2307679015758916, + "grad_norm": 0.8068677062122959, + "learning_rate": 1.795962244448145e-05, + "loss": 0.6443, + "step": 13355 + }, + { + "epoch": 0.23078518108930052, + "grad_norm": 1.04474510911772, + "learning_rate": 1.795928365025979e-05, + "loss": 0.5222, + "step": 13356 + }, + { + "epoch": 0.23080246060270942, + "grad_norm": 1.5524682266697465, + "learning_rate": 1.7958944831109045e-05, + "loss": 0.7545, + "step": 13357 + }, + { + "epoch": 0.23081974011611833, + "grad_norm": 0.868650110752681, + "learning_rate": 1.7958605987030276e-05, + "loss": 0.5107, + "step": 13358 + }, + { + "epoch": 0.23083701962952724, + "grad_norm": 1.1163932606408138, + "learning_rate": 1.7958267118024555e-05, + "loss": 0.6912, + "step": 13359 + }, + { + "epoch": 0.23085429914293615, + "grad_norm": 1.5926925941590457, + "learning_rate": 1.7957928224092933e-05, + "loss": 0.4981, + "step": 13360 + }, + { + "epoch": 0.23087157865634503, + "grad_norm": 1.097797967578673, + "learning_rate": 1.7957589305236477e-05, + "loss": 0.6083, + "step": 13361 + }, + { + "epoch": 0.23088885816975394, + "grad_norm": 0.8108963785454621, + "learning_rate": 1.7957250361456248e-05, + "loss": 0.4893, + "step": 13362 + }, + { + "epoch": 0.23090613768316284, + "grad_norm": 1.027582986652156, + "learning_rate": 1.7956911392753302e-05, + "loss": 0.611, + "step": 13363 + }, + { + "epoch": 0.23092341719657175, + "grad_norm": 0.967860316008929, + "learning_rate": 1.7956572399128705e-05, + "loss": 0.6287, + "step": 13364 + }, + { + "epoch": 0.23094069670998066, + "grad_norm": 0.8099267144149384, + "learning_rate": 1.795623338058352e-05, + "loss": 0.6968, + "step": 13365 + }, + { + "epoch": 0.23095797622338954, + "grad_norm": 1.1861182235877874, + "learning_rate": 1.795589433711881e-05, + "loss": 0.4582, + "step": 13366 + }, + { + "epoch": 0.23097525573679845, + "grad_norm": 0.774649500718825, + "learning_rate": 1.795555526873563e-05, + "loss": 0.3708, + "step": 13367 + }, + { + "epoch": 0.23099253525020735, + "grad_norm": 1.128571760387209, + "learning_rate": 1.795521617543505e-05, + "loss": 0.4972, + "step": 13368 + }, + { + "epoch": 0.23100981476361626, + "grad_norm": 1.1700901483997777, + "learning_rate": 1.7954877057218126e-05, + "loss": 0.4064, + "step": 13369 + }, + { + "epoch": 0.23102709427702517, + "grad_norm": 1.499446078554853, + "learning_rate": 1.7954537914085922e-05, + "loss": 0.4387, + "step": 13370 + }, + { + "epoch": 0.23104437379043405, + "grad_norm": 0.7679460069536328, + "learning_rate": 1.79541987460395e-05, + "loss": 0.3796, + "step": 13371 + }, + { + "epoch": 0.23106165330384296, + "grad_norm": 1.0998259681335403, + "learning_rate": 1.795385955307993e-05, + "loss": 0.4848, + "step": 13372 + }, + { + "epoch": 0.23107893281725186, + "grad_norm": 1.3681263088159632, + "learning_rate": 1.7953520335208262e-05, + "loss": 0.3631, + "step": 13373 + }, + { + "epoch": 0.23109621233066077, + "grad_norm": 0.7961651791216828, + "learning_rate": 1.7953181092425563e-05, + "loss": 0.8089, + "step": 13374 + }, + { + "epoch": 0.23111349184406968, + "grad_norm": 1.3162464292865599, + "learning_rate": 1.7952841824732895e-05, + "loss": 0.7489, + "step": 13375 + }, + { + "epoch": 0.2311307713574786, + "grad_norm": 0.61140641864497, + "learning_rate": 1.7952502532131326e-05, + "loss": 0.4573, + "step": 13376 + }, + { + "epoch": 0.23114805087088747, + "grad_norm": 0.9646683016999592, + "learning_rate": 1.7952163214621912e-05, + "loss": 0.5838, + "step": 13377 + }, + { + "epoch": 0.23116533038429637, + "grad_norm": 0.8268976817651096, + "learning_rate": 1.7951823872205722e-05, + "loss": 0.6715, + "step": 13378 + }, + { + "epoch": 0.23118260989770528, + "grad_norm": 1.1470803255869222, + "learning_rate": 1.7951484504883812e-05, + "loss": 0.8207, + "step": 13379 + }, + { + "epoch": 0.2311998894111142, + "grad_norm": 1.6822399182909378, + "learning_rate": 1.795114511265725e-05, + "loss": 0.7233, + "step": 13380 + }, + { + "epoch": 0.2312171689245231, + "grad_norm": 2.0130549489500433, + "learning_rate": 1.7950805695527096e-05, + "loss": 0.7339, + "step": 13381 + }, + { + "epoch": 0.23123444843793198, + "grad_norm": 1.4892951993900054, + "learning_rate": 1.7950466253494414e-05, + "loss": 0.3935, + "step": 13382 + }, + { + "epoch": 0.23125172795134089, + "grad_norm": 0.5428941022716604, + "learning_rate": 1.795012678656027e-05, + "loss": 0.3105, + "step": 13383 + }, + { + "epoch": 0.2312690074647498, + "grad_norm": 0.5418857869454643, + "learning_rate": 1.7949787294725724e-05, + "loss": 0.3822, + "step": 13384 + }, + { + "epoch": 0.2312862869781587, + "grad_norm": 1.3289745589201563, + "learning_rate": 1.7949447777991837e-05, + "loss": 0.6514, + "step": 13385 + }, + { + "epoch": 0.2313035664915676, + "grad_norm": 0.9412268835306307, + "learning_rate": 1.7949108236359678e-05, + "loss": 0.5754, + "step": 13386 + }, + { + "epoch": 0.2313208460049765, + "grad_norm": 0.8829388604617576, + "learning_rate": 1.7948768669830306e-05, + "loss": 0.4566, + "step": 13387 + }, + { + "epoch": 0.2313381255183854, + "grad_norm": 0.7887189051529395, + "learning_rate": 1.794842907840479e-05, + "loss": 0.4413, + "step": 13388 + }, + { + "epoch": 0.2313554050317943, + "grad_norm": 1.082408710206543, + "learning_rate": 1.7948089462084186e-05, + "loss": 0.4745, + "step": 13389 + }, + { + "epoch": 0.2313726845452032, + "grad_norm": 0.906297924192528, + "learning_rate": 1.7947749820869565e-05, + "loss": 0.6588, + "step": 13390 + }, + { + "epoch": 0.23138996405861212, + "grad_norm": 0.8832015168983215, + "learning_rate": 1.7947410154761984e-05, + "loss": 0.4961, + "step": 13391 + }, + { + "epoch": 0.231407243572021, + "grad_norm": 0.94888479744503, + "learning_rate": 1.7947070463762513e-05, + "loss": 0.8176, + "step": 13392 + }, + { + "epoch": 0.2314245230854299, + "grad_norm": 1.081122740678963, + "learning_rate": 1.7946730747872214e-05, + "loss": 0.5371, + "step": 13393 + }, + { + "epoch": 0.23144180259883881, + "grad_norm": 0.8256174315042961, + "learning_rate": 1.794639100709215e-05, + "loss": 0.5688, + "step": 13394 + }, + { + "epoch": 0.23145908211224772, + "grad_norm": 0.666816140284934, + "learning_rate": 1.7946051241423384e-05, + "loss": 0.5893, + "step": 13395 + }, + { + "epoch": 0.23147636162565663, + "grad_norm": 0.4546785695912671, + "learning_rate": 1.794571145086698e-05, + "loss": 0.5792, + "step": 13396 + }, + { + "epoch": 0.23149364113906554, + "grad_norm": 1.0771823644034706, + "learning_rate": 1.794537163542401e-05, + "loss": 0.625, + "step": 13397 + }, + { + "epoch": 0.23151092065247442, + "grad_norm": 0.9673979006702903, + "learning_rate": 1.794503179509553e-05, + "loss": 0.4526, + "step": 13398 + }, + { + "epoch": 0.23152820016588332, + "grad_norm": 1.1231449007307808, + "learning_rate": 1.7944691929882602e-05, + "loss": 0.5918, + "step": 13399 + }, + { + "epoch": 0.23154547967929223, + "grad_norm": 0.9594134568553127, + "learning_rate": 1.79443520397863e-05, + "loss": 0.4922, + "step": 13400 + }, + { + "epoch": 0.23156275919270114, + "grad_norm": 1.0548025348809176, + "learning_rate": 1.7944012124807678e-05, + "loss": 0.7287, + "step": 13401 + }, + { + "epoch": 0.23158003870611005, + "grad_norm": 1.1077772866067286, + "learning_rate": 1.7943672184947813e-05, + "loss": 0.4633, + "step": 13402 + }, + { + "epoch": 0.23159731821951893, + "grad_norm": 0.9311852496253974, + "learning_rate": 1.7943332220207762e-05, + "loss": 0.5098, + "step": 13403 + }, + { + "epoch": 0.23161459773292783, + "grad_norm": 0.7409637209858205, + "learning_rate": 1.7942992230588588e-05, + "loss": 0.3736, + "step": 13404 + }, + { + "epoch": 0.23163187724633674, + "grad_norm": 0.8417870573539002, + "learning_rate": 1.7942652216091358e-05, + "loss": 0.7803, + "step": 13405 + }, + { + "epoch": 0.23164915675974565, + "grad_norm": 0.8718979404374043, + "learning_rate": 1.794231217671714e-05, + "loss": 0.3283, + "step": 13406 + }, + { + "epoch": 0.23166643627315456, + "grad_norm": 1.0968392098549269, + "learning_rate": 1.7941972112466997e-05, + "loss": 0.5946, + "step": 13407 + }, + { + "epoch": 0.23168371578656344, + "grad_norm": 0.8654423531483557, + "learning_rate": 1.7941632023341994e-05, + "loss": 0.7253, + "step": 13408 + }, + { + "epoch": 0.23170099529997235, + "grad_norm": 0.5515098779230154, + "learning_rate": 1.7941291909343194e-05, + "loss": 0.4747, + "step": 13409 + }, + { + "epoch": 0.23171827481338125, + "grad_norm": 0.7903741083200806, + "learning_rate": 1.7940951770471664e-05, + "loss": 0.7868, + "step": 13410 + }, + { + "epoch": 0.23173555432679016, + "grad_norm": 1.2242041450564842, + "learning_rate": 1.794061160672847e-05, + "loss": 0.7221, + "step": 13411 + }, + { + "epoch": 0.23175283384019907, + "grad_norm": 0.9653571847692022, + "learning_rate": 1.794027141811468e-05, + "loss": 0.7206, + "step": 13412 + }, + { + "epoch": 0.23177011335360798, + "grad_norm": 0.9136253987137914, + "learning_rate": 1.7939931204631355e-05, + "loss": 0.5465, + "step": 13413 + }, + { + "epoch": 0.23178739286701686, + "grad_norm": 0.6203739377535404, + "learning_rate": 1.793959096627956e-05, + "loss": 0.4056, + "step": 13414 + }, + { + "epoch": 0.23180467238042576, + "grad_norm": 1.1896234405478994, + "learning_rate": 1.7939250703060365e-05, + "loss": 0.6925, + "step": 13415 + }, + { + "epoch": 0.23182195189383467, + "grad_norm": 0.9323780009003062, + "learning_rate": 1.793891041497483e-05, + "loss": 0.5366, + "step": 13416 + }, + { + "epoch": 0.23183923140724358, + "grad_norm": 0.733759192469226, + "learning_rate": 1.7938570102024027e-05, + "loss": 0.6295, + "step": 13417 + }, + { + "epoch": 0.2318565109206525, + "grad_norm": 1.052680805055962, + "learning_rate": 1.793822976420902e-05, + "loss": 0.4764, + "step": 13418 + }, + { + "epoch": 0.23187379043406137, + "grad_norm": 2.094332924228009, + "learning_rate": 1.7937889401530873e-05, + "loss": 0.5501, + "step": 13419 + }, + { + "epoch": 0.23189106994747027, + "grad_norm": 0.9184230199381599, + "learning_rate": 1.7937549013990653e-05, + "loss": 0.4877, + "step": 13420 + }, + { + "epoch": 0.23190834946087918, + "grad_norm": 0.884590867192395, + "learning_rate": 1.7937208601589423e-05, + "loss": 0.507, + "step": 13421 + }, + { + "epoch": 0.2319256289742881, + "grad_norm": 1.2616039113167183, + "learning_rate": 1.7936868164328258e-05, + "loss": 0.4348, + "step": 13422 + }, + { + "epoch": 0.231942908487697, + "grad_norm": 0.8466234999147834, + "learning_rate": 1.7936527702208217e-05, + "loss": 0.569, + "step": 13423 + }, + { + "epoch": 0.23196018800110588, + "grad_norm": 0.7744469150788703, + "learning_rate": 1.7936187215230364e-05, + "loss": 0.3312, + "step": 13424 + }, + { + "epoch": 0.23197746751451478, + "grad_norm": 1.0085635396615196, + "learning_rate": 1.7935846703395774e-05, + "loss": 0.5446, + "step": 13425 + }, + { + "epoch": 0.2319947470279237, + "grad_norm": 0.7741597583056605, + "learning_rate": 1.7935506166705507e-05, + "loss": 0.6199, + "step": 13426 + }, + { + "epoch": 0.2320120265413326, + "grad_norm": 0.7799071184159753, + "learning_rate": 1.7935165605160634e-05, + "loss": 0.7328, + "step": 13427 + }, + { + "epoch": 0.2320293060547415, + "grad_norm": 1.0411003656940037, + "learning_rate": 1.7934825018762215e-05, + "loss": 0.4035, + "step": 13428 + }, + { + "epoch": 0.2320465855681504, + "grad_norm": 0.7785157346374593, + "learning_rate": 1.7934484407511327e-05, + "loss": 0.4567, + "step": 13429 + }, + { + "epoch": 0.2320638650815593, + "grad_norm": 0.7518301016402866, + "learning_rate": 1.793414377140903e-05, + "loss": 0.439, + "step": 13430 + }, + { + "epoch": 0.2320811445949682, + "grad_norm": 1.852587532952193, + "learning_rate": 1.7933803110456385e-05, + "loss": 0.7388, + "step": 13431 + }, + { + "epoch": 0.2320984241083771, + "grad_norm": 1.058680309817126, + "learning_rate": 1.7933462424654472e-05, + "loss": 0.6214, + "step": 13432 + }, + { + "epoch": 0.23211570362178602, + "grad_norm": 0.6193245589985519, + "learning_rate": 1.793312171400435e-05, + "loss": 0.821, + "step": 13433 + }, + { + "epoch": 0.23213298313519493, + "grad_norm": 1.7282895403329288, + "learning_rate": 1.7932780978507088e-05, + "loss": 0.6684, + "step": 13434 + }, + { + "epoch": 0.2321502626486038, + "grad_norm": 0.7558127182707698, + "learning_rate": 1.793244021816375e-05, + "loss": 0.5865, + "step": 13435 + }, + { + "epoch": 0.2321675421620127, + "grad_norm": 0.457812552628474, + "learning_rate": 1.793209943297541e-05, + "loss": 0.4465, + "step": 13436 + }, + { + "epoch": 0.23218482167542162, + "grad_norm": 0.9886492298748294, + "learning_rate": 1.7931758622943132e-05, + "loss": 0.5884, + "step": 13437 + }, + { + "epoch": 0.23220210118883053, + "grad_norm": 1.1784493649319654, + "learning_rate": 1.793141778806798e-05, + "loss": 0.5001, + "step": 13438 + }, + { + "epoch": 0.23221938070223944, + "grad_norm": 0.9580585039388949, + "learning_rate": 1.793107692835103e-05, + "loss": 0.481, + "step": 13439 + }, + { + "epoch": 0.23223666021564832, + "grad_norm": 0.47575943164052686, + "learning_rate": 1.793073604379334e-05, + "loss": 0.8265, + "step": 13440 + }, + { + "epoch": 0.23225393972905722, + "grad_norm": 0.8853816900617771, + "learning_rate": 1.7930395134395983e-05, + "loss": 0.5968, + "step": 13441 + }, + { + "epoch": 0.23227121924246613, + "grad_norm": 0.7120355798523333, + "learning_rate": 1.7930054200160025e-05, + "loss": 0.7398, + "step": 13442 + }, + { + "epoch": 0.23228849875587504, + "grad_norm": 1.4861104562967626, + "learning_rate": 1.7929713241086537e-05, + "loss": 0.4834, + "step": 13443 + }, + { + "epoch": 0.23230577826928395, + "grad_norm": 1.0991239359773728, + "learning_rate": 1.7929372257176582e-05, + "loss": 0.4672, + "step": 13444 + }, + { + "epoch": 0.23232305778269283, + "grad_norm": 1.2144033256754365, + "learning_rate": 1.7929031248431235e-05, + "loss": 0.5388, + "step": 13445 + }, + { + "epoch": 0.23234033729610173, + "grad_norm": 0.8918577147463645, + "learning_rate": 1.7928690214851553e-05, + "loss": 0.3862, + "step": 13446 + }, + { + "epoch": 0.23235761680951064, + "grad_norm": 0.4652719035550301, + "learning_rate": 1.7928349156438613e-05, + "loss": 0.5932, + "step": 13447 + }, + { + "epoch": 0.23237489632291955, + "grad_norm": 1.1375656385930928, + "learning_rate": 1.7928008073193483e-05, + "loss": 0.6769, + "step": 13448 + }, + { + "epoch": 0.23239217583632846, + "grad_norm": 1.5588906410594134, + "learning_rate": 1.792766696511723e-05, + "loss": 0.7146, + "step": 13449 + }, + { + "epoch": 0.23240945534973736, + "grad_norm": 1.4267699423052609, + "learning_rate": 1.7927325832210916e-05, + "loss": 0.5527, + "step": 13450 + }, + { + "epoch": 0.23242673486314624, + "grad_norm": 1.0030971529302386, + "learning_rate": 1.792698467447562e-05, + "loss": 0.4491, + "step": 13451 + }, + { + "epoch": 0.23244401437655515, + "grad_norm": 0.8509024500900064, + "learning_rate": 1.7926643491912402e-05, + "loss": 0.6409, + "step": 13452 + }, + { + "epoch": 0.23246129388996406, + "grad_norm": 1.3522952108863153, + "learning_rate": 1.7926302284522338e-05, + "loss": 0.5636, + "step": 13453 + }, + { + "epoch": 0.23247857340337297, + "grad_norm": 0.8556167097456383, + "learning_rate": 1.7925961052306494e-05, + "loss": 0.5129, + "step": 13454 + }, + { + "epoch": 0.23249585291678188, + "grad_norm": 0.8850753933383206, + "learning_rate": 1.7925619795265934e-05, + "loss": 0.6505, + "step": 13455 + }, + { + "epoch": 0.23251313243019076, + "grad_norm": 1.375696129771798, + "learning_rate": 1.792527851340173e-05, + "loss": 0.5871, + "step": 13456 + }, + { + "epoch": 0.23253041194359966, + "grad_norm": 0.6691466582618498, + "learning_rate": 1.7924937206714955e-05, + "loss": 0.5081, + "step": 13457 + }, + { + "epoch": 0.23254769145700857, + "grad_norm": 1.3976919071632736, + "learning_rate": 1.792459587520667e-05, + "loss": 0.7202, + "step": 13458 + }, + { + "epoch": 0.23256497097041748, + "grad_norm": 0.7558905857633905, + "learning_rate": 1.792425451887795e-05, + "loss": 0.4043, + "step": 13459 + }, + { + "epoch": 0.23258225048382639, + "grad_norm": 1.1851803191782462, + "learning_rate": 1.7923913137729863e-05, + "loss": 0.4319, + "step": 13460 + }, + { + "epoch": 0.23259952999723527, + "grad_norm": 1.6011737575578235, + "learning_rate": 1.792357173176348e-05, + "loss": 0.5352, + "step": 13461 + }, + { + "epoch": 0.23261680951064417, + "grad_norm": 0.8628604003996823, + "learning_rate": 1.7923230300979866e-05, + "loss": 0.5618, + "step": 13462 + }, + { + "epoch": 0.23263408902405308, + "grad_norm": 0.6261559008156098, + "learning_rate": 1.7922888845380093e-05, + "loss": 0.7014, + "step": 13463 + }, + { + "epoch": 0.232651368537462, + "grad_norm": 0.8129482762799445, + "learning_rate": 1.7922547364965228e-05, + "loss": 0.4883, + "step": 13464 + }, + { + "epoch": 0.2326686480508709, + "grad_norm": 0.7376575623150188, + "learning_rate": 1.7922205859736347e-05, + "loss": 0.6401, + "step": 13465 + }, + { + "epoch": 0.23268592756427978, + "grad_norm": 0.863217130443693, + "learning_rate": 1.792186432969451e-05, + "loss": 0.3804, + "step": 13466 + }, + { + "epoch": 0.23270320707768868, + "grad_norm": 0.9996048681875664, + "learning_rate": 1.7921522774840795e-05, + "loss": 0.5608, + "step": 13467 + }, + { + "epoch": 0.2327204865910976, + "grad_norm": 1.1771151779683973, + "learning_rate": 1.7921181195176268e-05, + "loss": 0.4799, + "step": 13468 + }, + { + "epoch": 0.2327377661045065, + "grad_norm": 0.9423395517450629, + "learning_rate": 1.7920839590701997e-05, + "loss": 0.4141, + "step": 13469 + }, + { + "epoch": 0.2327550456179154, + "grad_norm": 0.89391805625526, + "learning_rate": 1.792049796141906e-05, + "loss": 0.3509, + "step": 13470 + }, + { + "epoch": 0.23277232513132431, + "grad_norm": 0.9999479485809993, + "learning_rate": 1.7920156307328515e-05, + "loss": 0.5404, + "step": 13471 + }, + { + "epoch": 0.2327896046447332, + "grad_norm": 1.1366209930811886, + "learning_rate": 1.7919814628431446e-05, + "loss": 0.5988, + "step": 13472 + }, + { + "epoch": 0.2328068841581421, + "grad_norm": 0.751950237230961, + "learning_rate": 1.7919472924728908e-05, + "loss": 0.5249, + "step": 13473 + }, + { + "epoch": 0.232824163671551, + "grad_norm": 0.735143948868983, + "learning_rate": 1.7919131196221983e-05, + "loss": 0.5146, + "step": 13474 + }, + { + "epoch": 0.23284144318495992, + "grad_norm": 1.0493854142983579, + "learning_rate": 1.7918789442911738e-05, + "loss": 0.5496, + "step": 13475 + }, + { + "epoch": 0.23285872269836883, + "grad_norm": 0.8057609718379617, + "learning_rate": 1.7918447664799243e-05, + "loss": 0.5163, + "step": 13476 + }, + { + "epoch": 0.2328760022117777, + "grad_norm": 1.082320711341505, + "learning_rate": 1.7918105861885566e-05, + "loss": 0.4863, + "step": 13477 + }, + { + "epoch": 0.2328932817251866, + "grad_norm": 1.1593821310839338, + "learning_rate": 1.791776403417178e-05, + "loss": 0.7748, + "step": 13478 + }, + { + "epoch": 0.23291056123859552, + "grad_norm": 1.06057595630546, + "learning_rate": 1.7917422181658954e-05, + "loss": 0.5785, + "step": 13479 + }, + { + "epoch": 0.23292784075200443, + "grad_norm": 0.972784486371836, + "learning_rate": 1.7917080304348164e-05, + "loss": 0.4277, + "step": 13480 + }, + { + "epoch": 0.23294512026541334, + "grad_norm": 1.1683469918171894, + "learning_rate": 1.791673840224047e-05, + "loss": 0.5159, + "step": 13481 + }, + { + "epoch": 0.23296239977882222, + "grad_norm": 0.9439704659598832, + "learning_rate": 1.7916396475336956e-05, + "loss": 0.5998, + "step": 13482 + }, + { + "epoch": 0.23297967929223112, + "grad_norm": 0.9546564009962832, + "learning_rate": 1.7916054523638685e-05, + "loss": 0.369, + "step": 13483 + }, + { + "epoch": 0.23299695880564003, + "grad_norm": 1.3442610197731373, + "learning_rate": 1.791571254714673e-05, + "loss": 0.6019, + "step": 13484 + }, + { + "epoch": 0.23301423831904894, + "grad_norm": 1.00864001119423, + "learning_rate": 1.7915370545862162e-05, + "loss": 0.6511, + "step": 13485 + }, + { + "epoch": 0.23303151783245785, + "grad_norm": 0.9217175270792528, + "learning_rate": 1.791502851978605e-05, + "loss": 0.4278, + "step": 13486 + }, + { + "epoch": 0.23304879734586675, + "grad_norm": 0.9969302074591586, + "learning_rate": 1.7914686468919467e-05, + "loss": 0.3403, + "step": 13487 + }, + { + "epoch": 0.23306607685927563, + "grad_norm": 0.908728234263159, + "learning_rate": 1.791434439326349e-05, + "loss": 0.5023, + "step": 13488 + }, + { + "epoch": 0.23308335637268454, + "grad_norm": 1.7211321787624965, + "learning_rate": 1.791400229281918e-05, + "loss": 0.8098, + "step": 13489 + }, + { + "epoch": 0.23310063588609345, + "grad_norm": 1.6578213095314174, + "learning_rate": 1.791366016758761e-05, + "loss": 0.499, + "step": 13490 + }, + { + "epoch": 0.23311791539950236, + "grad_norm": 1.465412832721183, + "learning_rate": 1.791331801756986e-05, + "loss": 0.5308, + "step": 13491 + }, + { + "epoch": 0.23313519491291126, + "grad_norm": 1.6969013338721939, + "learning_rate": 1.7912975842767e-05, + "loss": 0.5434, + "step": 13492 + }, + { + "epoch": 0.23315247442632014, + "grad_norm": 0.9819518532431356, + "learning_rate": 1.791263364318009e-05, + "loss": 0.5118, + "step": 13493 + }, + { + "epoch": 0.23316975393972905, + "grad_norm": 1.7380142804418297, + "learning_rate": 1.7912291418810216e-05, + "loss": 0.5434, + "step": 13494 + }, + { + "epoch": 0.23318703345313796, + "grad_norm": 0.5995420396784483, + "learning_rate": 1.7911949169658444e-05, + "loss": 0.3117, + "step": 13495 + }, + { + "epoch": 0.23320431296654687, + "grad_norm": 1.472064729359647, + "learning_rate": 1.7911606895725842e-05, + "loss": 0.5892, + "step": 13496 + }, + { + "epoch": 0.23322159247995577, + "grad_norm": 1.9598296402984852, + "learning_rate": 1.7911264597013484e-05, + "loss": 0.5967, + "step": 13497 + }, + { + "epoch": 0.23323887199336465, + "grad_norm": 0.8797197598142764, + "learning_rate": 1.791092227352245e-05, + "loss": 0.4907, + "step": 13498 + }, + { + "epoch": 0.23325615150677356, + "grad_norm": 0.9838769337126793, + "learning_rate": 1.7910579925253804e-05, + "loss": 0.6577, + "step": 13499 + }, + { + "epoch": 0.23327343102018247, + "grad_norm": 0.990599770339331, + "learning_rate": 1.791023755220862e-05, + "loss": 0.5438, + "step": 13500 + }, + { + "epoch": 0.23329071053359138, + "grad_norm": 0.7487229292890445, + "learning_rate": 1.7909895154387973e-05, + "loss": 0.6049, + "step": 13501 + }, + { + "epoch": 0.23330799004700029, + "grad_norm": 1.0044655447895297, + "learning_rate": 1.790955273179293e-05, + "loss": 0.7335, + "step": 13502 + }, + { + "epoch": 0.23332526956040917, + "grad_norm": 0.4368856537214698, + "learning_rate": 1.790921028442457e-05, + "loss": 0.5341, + "step": 13503 + }, + { + "epoch": 0.23334254907381807, + "grad_norm": 1.1307645976023026, + "learning_rate": 1.790886781228396e-05, + "loss": 0.6112, + "step": 13504 + }, + { + "epoch": 0.23335982858722698, + "grad_norm": 1.1135985104401445, + "learning_rate": 1.7908525315372174e-05, + "loss": 0.4709, + "step": 13505 + }, + { + "epoch": 0.2333771081006359, + "grad_norm": 0.7349392362807574, + "learning_rate": 1.7908182793690288e-05, + "loss": 0.3893, + "step": 13506 + }, + { + "epoch": 0.2333943876140448, + "grad_norm": 0.6737823283442315, + "learning_rate": 1.790784024723937e-05, + "loss": 0.6685, + "step": 13507 + }, + { + "epoch": 0.2334116671274537, + "grad_norm": 1.4003352598779975, + "learning_rate": 1.79074976760205e-05, + "loss": 0.5771, + "step": 13508 + }, + { + "epoch": 0.23342894664086258, + "grad_norm": 0.5809924663743793, + "learning_rate": 1.790715508003474e-05, + "loss": 0.6431, + "step": 13509 + }, + { + "epoch": 0.2334462261542715, + "grad_norm": 1.0688498791420813, + "learning_rate": 1.7906812459283174e-05, + "loss": 0.493, + "step": 13510 + }, + { + "epoch": 0.2334635056676804, + "grad_norm": 1.0743620835172731, + "learning_rate": 1.790646981376687e-05, + "loss": 0.6204, + "step": 13511 + }, + { + "epoch": 0.2334807851810893, + "grad_norm": 1.2624830198443588, + "learning_rate": 1.79061271434869e-05, + "loss": 0.5512, + "step": 13512 + }, + { + "epoch": 0.23349806469449821, + "grad_norm": 0.8031942245424645, + "learning_rate": 1.790578444844434e-05, + "loss": 0.4182, + "step": 13513 + }, + { + "epoch": 0.2335153442079071, + "grad_norm": 0.9666623693281015, + "learning_rate": 1.7905441728640264e-05, + "loss": 0.625, + "step": 13514 + }, + { + "epoch": 0.233532623721316, + "grad_norm": 1.0388447438734316, + "learning_rate": 1.790509898407574e-05, + "loss": 0.6881, + "step": 13515 + }, + { + "epoch": 0.2335499032347249, + "grad_norm": 1.6868758446670937, + "learning_rate": 1.790475621475185e-05, + "loss": 0.5177, + "step": 13516 + }, + { + "epoch": 0.23356718274813382, + "grad_norm": 0.9562444114514228, + "learning_rate": 1.7904413420669657e-05, + "loss": 0.6288, + "step": 13517 + }, + { + "epoch": 0.23358446226154272, + "grad_norm": 0.6109923452746004, + "learning_rate": 1.7904070601830246e-05, + "loss": 0.5267, + "step": 13518 + }, + { + "epoch": 0.2336017417749516, + "grad_norm": 1.3358934250657688, + "learning_rate": 1.790372775823468e-05, + "loss": 0.4889, + "step": 13519 + }, + { + "epoch": 0.2336190212883605, + "grad_norm": 1.3075888529492348, + "learning_rate": 1.790338488988404e-05, + "loss": 0.6059, + "step": 13520 + }, + { + "epoch": 0.23363630080176942, + "grad_norm": 0.4220100719348645, + "learning_rate": 1.79030419967794e-05, + "loss": 0.7123, + "step": 13521 + }, + { + "epoch": 0.23365358031517833, + "grad_norm": 1.1333468832254434, + "learning_rate": 1.7902699078921832e-05, + "loss": 0.7318, + "step": 13522 + }, + { + "epoch": 0.23367085982858724, + "grad_norm": 0.9582539790793653, + "learning_rate": 1.7902356136312405e-05, + "loss": 0.4605, + "step": 13523 + }, + { + "epoch": 0.23368813934199614, + "grad_norm": 1.4226449645012536, + "learning_rate": 1.79020131689522e-05, + "loss": 0.4832, + "step": 13524 + }, + { + "epoch": 0.23370541885540502, + "grad_norm": 1.37522830977507, + "learning_rate": 1.790167017684229e-05, + "loss": 0.5771, + "step": 13525 + }, + { + "epoch": 0.23372269836881393, + "grad_norm": 0.4997237945014826, + "learning_rate": 1.7901327159983753e-05, + "loss": 0.6601, + "step": 13526 + }, + { + "epoch": 0.23373997788222284, + "grad_norm": 1.3001583192239428, + "learning_rate": 1.7900984118377652e-05, + "loss": 0.4695, + "step": 13527 + }, + { + "epoch": 0.23375725739563175, + "grad_norm": 0.9900228809212903, + "learning_rate": 1.7900641052025073e-05, + "loss": 0.5827, + "step": 13528 + }, + { + "epoch": 0.23377453690904065, + "grad_norm": 1.0087490032976048, + "learning_rate": 1.7900297960927085e-05, + "loss": 0.7701, + "step": 13529 + }, + { + "epoch": 0.23379181642244953, + "grad_norm": 1.018080334558973, + "learning_rate": 1.789995484508476e-05, + "loss": 0.5837, + "step": 13530 + }, + { + "epoch": 0.23380909593585844, + "grad_norm": 0.4161605968252808, + "learning_rate": 1.7899611704499177e-05, + "loss": 0.5354, + "step": 13531 + }, + { + "epoch": 0.23382637544926735, + "grad_norm": 1.4670868455860389, + "learning_rate": 1.7899268539171415e-05, + "loss": 0.5952, + "step": 13532 + }, + { + "epoch": 0.23384365496267626, + "grad_norm": 1.5238215279116933, + "learning_rate": 1.789892534910254e-05, + "loss": 0.5714, + "step": 13533 + }, + { + "epoch": 0.23386093447608516, + "grad_norm": 1.6198228921617293, + "learning_rate": 1.7898582134293627e-05, + "loss": 0.5495, + "step": 13534 + }, + { + "epoch": 0.23387821398949404, + "grad_norm": 1.4705835891903776, + "learning_rate": 1.789823889474576e-05, + "loss": 0.581, + "step": 13535 + }, + { + "epoch": 0.23389549350290295, + "grad_norm": 1.9412727230119735, + "learning_rate": 1.7897895630460006e-05, + "loss": 0.6735, + "step": 13536 + }, + { + "epoch": 0.23391277301631186, + "grad_norm": 1.302108443429933, + "learning_rate": 1.789755234143744e-05, + "loss": 0.5834, + "step": 13537 + }, + { + "epoch": 0.23393005252972077, + "grad_norm": 0.8255454625340687, + "learning_rate": 1.7897209027679146e-05, + "loss": 0.6536, + "step": 13538 + }, + { + "epoch": 0.23394733204312967, + "grad_norm": 1.1261305238651729, + "learning_rate": 1.7896865689186188e-05, + "loss": 0.8145, + "step": 13539 + }, + { + "epoch": 0.23396461155653855, + "grad_norm": 0.9871891291287167, + "learning_rate": 1.7896522325959648e-05, + "loss": 0.4639, + "step": 13540 + }, + { + "epoch": 0.23398189106994746, + "grad_norm": 0.42127783342802644, + "learning_rate": 1.7896178938000603e-05, + "loss": 0.5998, + "step": 13541 + }, + { + "epoch": 0.23399917058335637, + "grad_norm": 1.272041397015203, + "learning_rate": 1.789583552531012e-05, + "loss": 0.7533, + "step": 13542 + }, + { + "epoch": 0.23401645009676528, + "grad_norm": 0.661323858823962, + "learning_rate": 1.7895492087889287e-05, + "loss": 0.4284, + "step": 13543 + }, + { + "epoch": 0.23403372961017418, + "grad_norm": 1.028617398301747, + "learning_rate": 1.7895148625739165e-05, + "loss": 0.4053, + "step": 13544 + }, + { + "epoch": 0.2340510091235831, + "grad_norm": 1.0933192387493962, + "learning_rate": 1.7894805138860844e-05, + "loss": 0.4817, + "step": 13545 + }, + { + "epoch": 0.23406828863699197, + "grad_norm": 1.0952050886323765, + "learning_rate": 1.7894461627255388e-05, + "loss": 0.4592, + "step": 13546 + }, + { + "epoch": 0.23408556815040088, + "grad_norm": 1.0413250150012496, + "learning_rate": 1.7894118090923882e-05, + "loss": 0.5748, + "step": 13547 + }, + { + "epoch": 0.2341028476638098, + "grad_norm": 1.125900906740278, + "learning_rate": 1.78937745298674e-05, + "loss": 0.4731, + "step": 13548 + }, + { + "epoch": 0.2341201271772187, + "grad_norm": 1.0087050619982834, + "learning_rate": 1.789343094408701e-05, + "loss": 0.7274, + "step": 13549 + }, + { + "epoch": 0.2341374066906276, + "grad_norm": 0.9277924705849057, + "learning_rate": 1.7893087333583795e-05, + "loss": 0.7121, + "step": 13550 + }, + { + "epoch": 0.23415468620403648, + "grad_norm": 0.8732335447513283, + "learning_rate": 1.7892743698358838e-05, + "loss": 0.5625, + "step": 13551 + }, + { + "epoch": 0.2341719657174454, + "grad_norm": 1.3832192329790496, + "learning_rate": 1.7892400038413203e-05, + "loss": 0.4607, + "step": 13552 + }, + { + "epoch": 0.2341892452308543, + "grad_norm": 1.2507866265506469, + "learning_rate": 1.789205635374797e-05, + "loss": 0.7517, + "step": 13553 + }, + { + "epoch": 0.2342065247442632, + "grad_norm": 1.0940225199043496, + "learning_rate": 1.7891712644364217e-05, + "loss": 0.6681, + "step": 13554 + }, + { + "epoch": 0.2342238042576721, + "grad_norm": 0.9396392358371276, + "learning_rate": 1.7891368910263024e-05, + "loss": 0.5543, + "step": 13555 + }, + { + "epoch": 0.234241083771081, + "grad_norm": 0.8276195233467101, + "learning_rate": 1.789102515144546e-05, + "loss": 0.5655, + "step": 13556 + }, + { + "epoch": 0.2342583632844899, + "grad_norm": 0.8916270783152773, + "learning_rate": 1.7890681367912606e-05, + "loss": 0.6865, + "step": 13557 + }, + { + "epoch": 0.2342756427978988, + "grad_norm": 1.0503058015673465, + "learning_rate": 1.789033755966554e-05, + "loss": 0.5106, + "step": 13558 + }, + { + "epoch": 0.23429292231130772, + "grad_norm": 0.9738966076237994, + "learning_rate": 1.7889993726705338e-05, + "loss": 0.6064, + "step": 13559 + }, + { + "epoch": 0.23431020182471662, + "grad_norm": 0.9416158511264558, + "learning_rate": 1.7889649869033073e-05, + "loss": 0.5708, + "step": 13560 + }, + { + "epoch": 0.23432748133812553, + "grad_norm": 1.2650844882439913, + "learning_rate": 1.7889305986649824e-05, + "loss": 0.5297, + "step": 13561 + }, + { + "epoch": 0.2343447608515344, + "grad_norm": 0.5017170088303766, + "learning_rate": 1.7888962079556672e-05, + "loss": 0.6698, + "step": 13562 + }, + { + "epoch": 0.23436204036494332, + "grad_norm": 0.670877213762548, + "learning_rate": 1.7888618147754692e-05, + "loss": 0.4736, + "step": 13563 + }, + { + "epoch": 0.23437931987835223, + "grad_norm": 1.0641022848668773, + "learning_rate": 1.7888274191244957e-05, + "loss": 0.3836, + "step": 13564 + }, + { + "epoch": 0.23439659939176113, + "grad_norm": 1.1616595028190382, + "learning_rate": 1.7887930210028552e-05, + "loss": 0.5989, + "step": 13565 + }, + { + "epoch": 0.23441387890517004, + "grad_norm": 1.112534060568682, + "learning_rate": 1.7887586204106547e-05, + "loss": 0.488, + "step": 13566 + }, + { + "epoch": 0.23443115841857892, + "grad_norm": 1.110855761839331, + "learning_rate": 1.7887242173480024e-05, + "loss": 0.5795, + "step": 13567 + }, + { + "epoch": 0.23444843793198783, + "grad_norm": 0.8201226988169396, + "learning_rate": 1.788689811815006e-05, + "loss": 0.5833, + "step": 13568 + }, + { + "epoch": 0.23446571744539674, + "grad_norm": 1.094407869113263, + "learning_rate": 1.788655403811773e-05, + "loss": 0.5127, + "step": 13569 + }, + { + "epoch": 0.23448299695880565, + "grad_norm": 0.6030306759719278, + "learning_rate": 1.788620993338411e-05, + "loss": 0.4169, + "step": 13570 + }, + { + "epoch": 0.23450027647221455, + "grad_norm": 0.7258666980993416, + "learning_rate": 1.7885865803950285e-05, + "loss": 0.5339, + "step": 13571 + }, + { + "epoch": 0.23451755598562343, + "grad_norm": 0.8465508287978618, + "learning_rate": 1.788552164981733e-05, + "loss": 0.459, + "step": 13572 + }, + { + "epoch": 0.23453483549903234, + "grad_norm": 0.9237727833699971, + "learning_rate": 1.788517747098632e-05, + "loss": 0.4887, + "step": 13573 + }, + { + "epoch": 0.23455211501244125, + "grad_norm": 0.7882462419776078, + "learning_rate": 1.7884833267458336e-05, + "loss": 0.4863, + "step": 13574 + }, + { + "epoch": 0.23456939452585016, + "grad_norm": 0.6650178652831613, + "learning_rate": 1.7884489039234456e-05, + "loss": 0.4527, + "step": 13575 + }, + { + "epoch": 0.23458667403925906, + "grad_norm": 0.7896651732378941, + "learning_rate": 1.7884144786315756e-05, + "loss": 0.5045, + "step": 13576 + }, + { + "epoch": 0.23460395355266794, + "grad_norm": 1.5070096647535633, + "learning_rate": 1.7883800508703312e-05, + "loss": 0.4812, + "step": 13577 + }, + { + "epoch": 0.23462123306607685, + "grad_norm": 0.4314844002846643, + "learning_rate": 1.788345620639821e-05, + "loss": 0.598, + "step": 13578 + }, + { + "epoch": 0.23463851257948576, + "grad_norm": 1.2891767605257134, + "learning_rate": 1.7883111879401524e-05, + "loss": 0.4405, + "step": 13579 + }, + { + "epoch": 0.23465579209289467, + "grad_norm": 1.1869133989977203, + "learning_rate": 1.788276752771433e-05, + "loss": 0.6134, + "step": 13580 + }, + { + "epoch": 0.23467307160630357, + "grad_norm": 1.1024940198886728, + "learning_rate": 1.7882423151337713e-05, + "loss": 0.6536, + "step": 13581 + }, + { + "epoch": 0.23469035111971248, + "grad_norm": 0.8517257178472977, + "learning_rate": 1.788207875027274e-05, + "loss": 0.594, + "step": 13582 + }, + { + "epoch": 0.23470763063312136, + "grad_norm": 0.38833221728907064, + "learning_rate": 1.788173432452051e-05, + "loss": 0.7284, + "step": 13583 + }, + { + "epoch": 0.23472491014653027, + "grad_norm": 1.1450890097060975, + "learning_rate": 1.7881389874082078e-05, + "loss": 0.4954, + "step": 13584 + }, + { + "epoch": 0.23474218965993918, + "grad_norm": 0.8776115556606825, + "learning_rate": 1.7881045398958536e-05, + "loss": 0.5995, + "step": 13585 + }, + { + "epoch": 0.23475946917334808, + "grad_norm": 0.8461260914202222, + "learning_rate": 1.7880700899150962e-05, + "loss": 0.5587, + "step": 13586 + }, + { + "epoch": 0.234776748686757, + "grad_norm": 0.8043267868430883, + "learning_rate": 1.7880356374660435e-05, + "loss": 0.6139, + "step": 13587 + }, + { + "epoch": 0.23479402820016587, + "grad_norm": 0.4362536778591543, + "learning_rate": 1.7880011825488032e-05, + "loss": 0.6815, + "step": 13588 + }, + { + "epoch": 0.23481130771357478, + "grad_norm": 0.6991608046620699, + "learning_rate": 1.7879667251634834e-05, + "loss": 0.361, + "step": 13589 + }, + { + "epoch": 0.2348285872269837, + "grad_norm": 1.4937231849999946, + "learning_rate": 1.787932265310192e-05, + "loss": 0.392, + "step": 13590 + }, + { + "epoch": 0.2348458667403926, + "grad_norm": 1.495202346563411, + "learning_rate": 1.7878978029890366e-05, + "loss": 0.6151, + "step": 13591 + }, + { + "epoch": 0.2348631462538015, + "grad_norm": 1.354808969439757, + "learning_rate": 1.7878633382001257e-05, + "loss": 0.4528, + "step": 13592 + }, + { + "epoch": 0.23488042576721038, + "grad_norm": 1.2010308744495217, + "learning_rate": 1.7878288709435665e-05, + "loss": 0.6546, + "step": 13593 + }, + { + "epoch": 0.2348977052806193, + "grad_norm": 1.6358301031033913, + "learning_rate": 1.7877944012194676e-05, + "loss": 0.558, + "step": 13594 + }, + { + "epoch": 0.2349149847940282, + "grad_norm": 1.5981933242931337, + "learning_rate": 1.7877599290279367e-05, + "loss": 0.6531, + "step": 13595 + }, + { + "epoch": 0.2349322643074371, + "grad_norm": 1.2889120352432004, + "learning_rate": 1.787725454369082e-05, + "loss": 0.4692, + "step": 13596 + }, + { + "epoch": 0.234949543820846, + "grad_norm": 0.6605498969829378, + "learning_rate": 1.7876909772430117e-05, + "loss": 0.4286, + "step": 13597 + }, + { + "epoch": 0.23496682333425492, + "grad_norm": 1.7355254609336535, + "learning_rate": 1.7876564976498327e-05, + "loss": 0.6808, + "step": 13598 + }, + { + "epoch": 0.2349841028476638, + "grad_norm": 0.8418949301091517, + "learning_rate": 1.787622015589654e-05, + "loss": 0.6925, + "step": 13599 + }, + { + "epoch": 0.2350013823610727, + "grad_norm": 0.8854113999998258, + "learning_rate": 1.787587531062583e-05, + "loss": 0.4157, + "step": 13600 + }, + { + "epoch": 0.23501866187448162, + "grad_norm": 1.254102987707146, + "learning_rate": 1.787553044068729e-05, + "loss": 0.4954, + "step": 13601 + }, + { + "epoch": 0.23503594138789052, + "grad_norm": 1.4101764729633814, + "learning_rate": 1.787518554608198e-05, + "loss": 0.4421, + "step": 13602 + }, + { + "epoch": 0.23505322090129943, + "grad_norm": 1.253332185181399, + "learning_rate": 1.787484062681099e-05, + "loss": 0.575, + "step": 13603 + }, + { + "epoch": 0.2350705004147083, + "grad_norm": 0.49132306588487873, + "learning_rate": 1.7874495682875407e-05, + "loss": 0.8368, + "step": 13604 + }, + { + "epoch": 0.23508777992811722, + "grad_norm": 0.8846873253972168, + "learning_rate": 1.78741507142763e-05, + "loss": 0.6332, + "step": 13605 + }, + { + "epoch": 0.23510505944152613, + "grad_norm": 0.8286528875356778, + "learning_rate": 1.7873805721014755e-05, + "loss": 0.4561, + "step": 13606 + }, + { + "epoch": 0.23512233895493503, + "grad_norm": 0.8612239683305122, + "learning_rate": 1.7873460703091856e-05, + "loss": 0.6411, + "step": 13607 + }, + { + "epoch": 0.23513961846834394, + "grad_norm": 1.7925904336503113, + "learning_rate": 1.7873115660508677e-05, + "loss": 0.5532, + "step": 13608 + }, + { + "epoch": 0.23515689798175282, + "grad_norm": 0.7972099063741902, + "learning_rate": 1.7872770593266302e-05, + "loss": 0.5362, + "step": 13609 + }, + { + "epoch": 0.23517417749516173, + "grad_norm": 0.8514299501924228, + "learning_rate": 1.787242550136581e-05, + "loss": 0.5754, + "step": 13610 + }, + { + "epoch": 0.23519145700857064, + "grad_norm": 1.3953755921891369, + "learning_rate": 1.7872080384808284e-05, + "loss": 0.4523, + "step": 13611 + }, + { + "epoch": 0.23520873652197954, + "grad_norm": 0.6897257616628725, + "learning_rate": 1.7871735243594806e-05, + "loss": 0.369, + "step": 13612 + }, + { + "epoch": 0.23522601603538845, + "grad_norm": 0.6069384421049409, + "learning_rate": 1.7871390077726453e-05, + "loss": 0.3835, + "step": 13613 + }, + { + "epoch": 0.23524329554879733, + "grad_norm": 0.3429530877273976, + "learning_rate": 1.7871044887204307e-05, + "loss": 0.597, + "step": 13614 + }, + { + "epoch": 0.23526057506220624, + "grad_norm": 0.8520633273340389, + "learning_rate": 1.787069967202945e-05, + "loss": 0.6792, + "step": 13615 + }, + { + "epoch": 0.23527785457561515, + "grad_norm": 1.4001556378292566, + "learning_rate": 1.7870354432202964e-05, + "loss": 0.7633, + "step": 13616 + }, + { + "epoch": 0.23529513408902406, + "grad_norm": 0.8086818819781155, + "learning_rate": 1.7870009167725932e-05, + "loss": 0.5743, + "step": 13617 + }, + { + "epoch": 0.23531241360243296, + "grad_norm": 1.0909451217957893, + "learning_rate": 1.786966387859943e-05, + "loss": 0.5707, + "step": 13618 + }, + { + "epoch": 0.23532969311584187, + "grad_norm": 0.8200220626425153, + "learning_rate": 1.7869318564824545e-05, + "loss": 0.4496, + "step": 13619 + }, + { + "epoch": 0.23534697262925075, + "grad_norm": 1.0083255992121873, + "learning_rate": 1.7868973226402356e-05, + "loss": 0.5426, + "step": 13620 + }, + { + "epoch": 0.23536425214265966, + "grad_norm": 0.9898004949544161, + "learning_rate": 1.7868627863333945e-05, + "loss": 0.431, + "step": 13621 + }, + { + "epoch": 0.23538153165606857, + "grad_norm": 0.9850641370300911, + "learning_rate": 1.7868282475620395e-05, + "loss": 0.4741, + "step": 13622 + }, + { + "epoch": 0.23539881116947747, + "grad_norm": 1.885996162964344, + "learning_rate": 1.786793706326278e-05, + "loss": 0.5202, + "step": 13623 + }, + { + "epoch": 0.23541609068288638, + "grad_norm": 1.4625378109500176, + "learning_rate": 1.7867591626262192e-05, + "loss": 0.62, + "step": 13624 + }, + { + "epoch": 0.23543337019629526, + "grad_norm": 0.8801552903776644, + "learning_rate": 1.786724616461971e-05, + "loss": 0.4451, + "step": 13625 + }, + { + "epoch": 0.23545064970970417, + "grad_norm": 1.1040554096234558, + "learning_rate": 1.7866900678336415e-05, + "loss": 0.661, + "step": 13626 + }, + { + "epoch": 0.23546792922311308, + "grad_norm": 1.2025161081246314, + "learning_rate": 1.786655516741339e-05, + "loss": 0.4748, + "step": 13627 + }, + { + "epoch": 0.23548520873652198, + "grad_norm": 0.8320922066991743, + "learning_rate": 1.7866209631851715e-05, + "loss": 0.6138, + "step": 13628 + }, + { + "epoch": 0.2355024882499309, + "grad_norm": 0.7256956219649574, + "learning_rate": 1.786586407165247e-05, + "loss": 0.3797, + "step": 13629 + }, + { + "epoch": 0.23551976776333977, + "grad_norm": 1.0341142643265986, + "learning_rate": 1.786551848681675e-05, + "loss": 0.6109, + "step": 13630 + }, + { + "epoch": 0.23553704727674868, + "grad_norm": 1.5825208893260565, + "learning_rate": 1.7865172877345624e-05, + "loss": 0.5328, + "step": 13631 + }, + { + "epoch": 0.2355543267901576, + "grad_norm": 1.5600821167204904, + "learning_rate": 1.7864827243240177e-05, + "loss": 0.5837, + "step": 13632 + }, + { + "epoch": 0.2355716063035665, + "grad_norm": 0.9738268537404594, + "learning_rate": 1.7864481584501492e-05, + "loss": 0.7784, + "step": 13633 + }, + { + "epoch": 0.2355888858169754, + "grad_norm": 1.3795084701995284, + "learning_rate": 1.7864135901130658e-05, + "loss": 0.4611, + "step": 13634 + }, + { + "epoch": 0.2356061653303843, + "grad_norm": 0.6064928375417449, + "learning_rate": 1.7863790193128746e-05, + "loss": 0.4772, + "step": 13635 + }, + { + "epoch": 0.2356234448437932, + "grad_norm": 0.49302044755863295, + "learning_rate": 1.786344446049685e-05, + "loss": 0.6432, + "step": 13636 + }, + { + "epoch": 0.2356407243572021, + "grad_norm": 1.4614573989552295, + "learning_rate": 1.786309870323605e-05, + "loss": 0.5117, + "step": 13637 + }, + { + "epoch": 0.235658003870611, + "grad_norm": 1.2140358842448042, + "learning_rate": 1.7862752921347427e-05, + "loss": 0.5282, + "step": 13638 + }, + { + "epoch": 0.2356752833840199, + "grad_norm": 0.8837360838878443, + "learning_rate": 1.786240711483206e-05, + "loss": 0.3162, + "step": 13639 + }, + { + "epoch": 0.23569256289742882, + "grad_norm": 1.1335637341630886, + "learning_rate": 1.786206128369104e-05, + "loss": 0.7288, + "step": 13640 + }, + { + "epoch": 0.2357098424108377, + "grad_norm": 1.2291271574966909, + "learning_rate": 1.7861715427925443e-05, + "loss": 0.637, + "step": 13641 + }, + { + "epoch": 0.2357271219242466, + "grad_norm": 1.048138116995435, + "learning_rate": 1.7861369547536358e-05, + "loss": 0.7504, + "step": 13642 + }, + { + "epoch": 0.23574440143765552, + "grad_norm": 1.10530096713791, + "learning_rate": 1.7861023642524865e-05, + "loss": 0.579, + "step": 13643 + }, + { + "epoch": 0.23576168095106442, + "grad_norm": 1.311975266446901, + "learning_rate": 1.7860677712892053e-05, + "loss": 0.4954, + "step": 13644 + }, + { + "epoch": 0.23577896046447333, + "grad_norm": 0.8916064859583317, + "learning_rate": 1.7860331758639e-05, + "loss": 0.4177, + "step": 13645 + }, + { + "epoch": 0.2357962399778822, + "grad_norm": 1.1110690799744654, + "learning_rate": 1.7859985779766785e-05, + "loss": 0.4418, + "step": 13646 + }, + { + "epoch": 0.23581351949129112, + "grad_norm": 1.6878276250191535, + "learning_rate": 1.78596397762765e-05, + "loss": 0.528, + "step": 13647 + }, + { + "epoch": 0.23583079900470003, + "grad_norm": 1.6189646990936526, + "learning_rate": 1.7859293748169228e-05, + "loss": 0.4937, + "step": 13648 + }, + { + "epoch": 0.23584807851810893, + "grad_norm": 1.2654693660555971, + "learning_rate": 1.785894769544605e-05, + "loss": 0.3632, + "step": 13649 + }, + { + "epoch": 0.23586535803151784, + "grad_norm": 1.2929238190722594, + "learning_rate": 1.7858601618108048e-05, + "loss": 0.4819, + "step": 13650 + }, + { + "epoch": 0.23588263754492675, + "grad_norm": 0.959654264133489, + "learning_rate": 1.785825551615631e-05, + "loss": 0.4002, + "step": 13651 + }, + { + "epoch": 0.23589991705833563, + "grad_norm": 0.3980074241655995, + "learning_rate": 1.785790938959192e-05, + "loss": 0.4789, + "step": 13652 + }, + { + "epoch": 0.23591719657174454, + "grad_norm": 0.7854965925227444, + "learning_rate": 1.785756323841596e-05, + "loss": 0.511, + "step": 13653 + }, + { + "epoch": 0.23593447608515344, + "grad_norm": 1.447314125179313, + "learning_rate": 1.7857217062629513e-05, + "loss": 0.8144, + "step": 13654 + }, + { + "epoch": 0.23595175559856235, + "grad_norm": 0.8053572468159927, + "learning_rate": 1.7856870862233665e-05, + "loss": 0.4751, + "step": 13655 + }, + { + "epoch": 0.23596903511197126, + "grad_norm": 1.0431713663445428, + "learning_rate": 1.7856524637229503e-05, + "loss": 0.3889, + "step": 13656 + }, + { + "epoch": 0.23598631462538014, + "grad_norm": 1.2120684458660747, + "learning_rate": 1.785617838761811e-05, + "loss": 0.74, + "step": 13657 + }, + { + "epoch": 0.23600359413878905, + "grad_norm": 0.8119991632454943, + "learning_rate": 1.7855832113400568e-05, + "loss": 0.4572, + "step": 13658 + }, + { + "epoch": 0.23602087365219795, + "grad_norm": 0.7873242710735004, + "learning_rate": 1.7855485814577956e-05, + "loss": 0.5559, + "step": 13659 + }, + { + "epoch": 0.23603815316560686, + "grad_norm": 0.7706694538744475, + "learning_rate": 1.785513949115137e-05, + "loss": 0.3806, + "step": 13660 + }, + { + "epoch": 0.23605543267901577, + "grad_norm": 1.057035215083655, + "learning_rate": 1.7854793143121895e-05, + "loss": 0.4809, + "step": 13661 + }, + { + "epoch": 0.23607271219242465, + "grad_norm": 0.671087983756798, + "learning_rate": 1.7854446770490607e-05, + "loss": 0.3621, + "step": 13662 + }, + { + "epoch": 0.23608999170583356, + "grad_norm": 0.9943707919552551, + "learning_rate": 1.7854100373258593e-05, + "loss": 0.4485, + "step": 13663 + }, + { + "epoch": 0.23610727121924246, + "grad_norm": 0.9728543845749289, + "learning_rate": 1.7853753951426942e-05, + "loss": 0.3597, + "step": 13664 + }, + { + "epoch": 0.23612455073265137, + "grad_norm": 1.5120964805278958, + "learning_rate": 1.7853407504996737e-05, + "loss": 0.5796, + "step": 13665 + }, + { + "epoch": 0.23614183024606028, + "grad_norm": 1.1560289695505368, + "learning_rate": 1.785306103396906e-05, + "loss": 0.6086, + "step": 13666 + }, + { + "epoch": 0.23615910975946916, + "grad_norm": 1.1103910324219775, + "learning_rate": 1.7852714538345e-05, + "loss": 0.5603, + "step": 13667 + }, + { + "epoch": 0.23617638927287807, + "grad_norm": 0.846225831465253, + "learning_rate": 1.7852368018125646e-05, + "loss": 0.5702, + "step": 13668 + }, + { + "epoch": 0.23619366878628698, + "grad_norm": 0.8830248568306238, + "learning_rate": 1.7852021473312072e-05, + "loss": 0.5071, + "step": 13669 + }, + { + "epoch": 0.23621094829969588, + "grad_norm": 1.0488331698723357, + "learning_rate": 1.7851674903905375e-05, + "loss": 0.5022, + "step": 13670 + }, + { + "epoch": 0.2362282278131048, + "grad_norm": 1.1567576165256073, + "learning_rate": 1.7851328309906634e-05, + "loss": 0.5386, + "step": 13671 + }, + { + "epoch": 0.2362455073265137, + "grad_norm": 0.9004788169472122, + "learning_rate": 1.7850981691316935e-05, + "loss": 0.3787, + "step": 13672 + }, + { + "epoch": 0.23626278683992258, + "grad_norm": 0.5993190300635595, + "learning_rate": 1.7850635048137367e-05, + "loss": 0.3856, + "step": 13673 + }, + { + "epoch": 0.23628006635333149, + "grad_norm": 0.8147340506865786, + "learning_rate": 1.785028838036901e-05, + "loss": 0.6437, + "step": 13674 + }, + { + "epoch": 0.2362973458667404, + "grad_norm": 1.02550255493561, + "learning_rate": 1.7849941688012956e-05, + "loss": 0.5209, + "step": 13675 + }, + { + "epoch": 0.2363146253801493, + "grad_norm": 0.9633216322443271, + "learning_rate": 1.7849594971070287e-05, + "loss": 0.5381, + "step": 13676 + }, + { + "epoch": 0.2363319048935582, + "grad_norm": 0.9388455963574459, + "learning_rate": 1.784924822954209e-05, + "loss": 0.2905, + "step": 13677 + }, + { + "epoch": 0.2363491844069671, + "grad_norm": 1.3686729410587548, + "learning_rate": 1.784890146342945e-05, + "loss": 0.605, + "step": 13678 + }, + { + "epoch": 0.236366463920376, + "grad_norm": 1.6488162272441835, + "learning_rate": 1.7848554672733453e-05, + "loss": 0.7674, + "step": 13679 + }, + { + "epoch": 0.2363837434337849, + "grad_norm": 1.466821631831707, + "learning_rate": 1.7848207857455192e-05, + "loss": 0.3462, + "step": 13680 + }, + { + "epoch": 0.2364010229471938, + "grad_norm": 1.7522076759687961, + "learning_rate": 1.7847861017595743e-05, + "loss": 0.725, + "step": 13681 + }, + { + "epoch": 0.23641830246060272, + "grad_norm": 1.9554529656913415, + "learning_rate": 1.7847514153156198e-05, + "loss": 0.4217, + "step": 13682 + }, + { + "epoch": 0.2364355819740116, + "grad_norm": 0.7280924217560306, + "learning_rate": 1.784716726413764e-05, + "loss": 0.5374, + "step": 13683 + }, + { + "epoch": 0.2364528614874205, + "grad_norm": 0.9092148499690268, + "learning_rate": 1.784682035054116e-05, + "loss": 0.6477, + "step": 13684 + }, + { + "epoch": 0.23647014100082941, + "grad_norm": 0.4999790908162109, + "learning_rate": 1.7846473412367845e-05, + "loss": 0.8271, + "step": 13685 + }, + { + "epoch": 0.23648742051423832, + "grad_norm": 0.9080161580568371, + "learning_rate": 1.7846126449618776e-05, + "loss": 0.4401, + "step": 13686 + }, + { + "epoch": 0.23650470002764723, + "grad_norm": 0.6930803925915112, + "learning_rate": 1.784577946229504e-05, + "loss": 0.4639, + "step": 13687 + }, + { + "epoch": 0.23652197954105614, + "grad_norm": 0.9305070512723298, + "learning_rate": 1.784543245039773e-05, + "loss": 0.5358, + "step": 13688 + }, + { + "epoch": 0.23653925905446502, + "grad_norm": 0.7238673094943965, + "learning_rate": 1.784508541392793e-05, + "loss": 0.4843, + "step": 13689 + }, + { + "epoch": 0.23655653856787393, + "grad_norm": 1.2631976592788503, + "learning_rate": 1.7844738352886722e-05, + "loss": 0.4821, + "step": 13690 + }, + { + "epoch": 0.23657381808128283, + "grad_norm": 0.7423816239930331, + "learning_rate": 1.7844391267275202e-05, + "loss": 0.6978, + "step": 13691 + }, + { + "epoch": 0.23659109759469174, + "grad_norm": 1.4518085367363638, + "learning_rate": 1.784404415709445e-05, + "loss": 0.6496, + "step": 13692 + }, + { + "epoch": 0.23660837710810065, + "grad_norm": 1.3441462571314984, + "learning_rate": 1.7843697022345558e-05, + "loss": 0.5882, + "step": 13693 + }, + { + "epoch": 0.23662565662150953, + "grad_norm": 0.49089340125082587, + "learning_rate": 1.7843349863029608e-05, + "loss": 0.6459, + "step": 13694 + }, + { + "epoch": 0.23664293613491844, + "grad_norm": 1.8836914215165572, + "learning_rate": 1.784300267914769e-05, + "loss": 0.4171, + "step": 13695 + }, + { + "epoch": 0.23666021564832734, + "grad_norm": 0.8120345267591624, + "learning_rate": 1.7842655470700893e-05, + "loss": 0.8849, + "step": 13696 + }, + { + "epoch": 0.23667749516173625, + "grad_norm": 0.7046406085499357, + "learning_rate": 1.7842308237690305e-05, + "loss": 0.4334, + "step": 13697 + }, + { + "epoch": 0.23669477467514516, + "grad_norm": 1.094850601950661, + "learning_rate": 1.784196098011701e-05, + "loss": 0.3367, + "step": 13698 + }, + { + "epoch": 0.23671205418855404, + "grad_norm": 1.0781091259162778, + "learning_rate": 1.7841613697982095e-05, + "loss": 0.4855, + "step": 13699 + }, + { + "epoch": 0.23672933370196295, + "grad_norm": 0.6376841396076366, + "learning_rate": 1.784126639128665e-05, + "loss": 0.4575, + "step": 13700 + }, + { + "epoch": 0.23674661321537185, + "grad_norm": 1.5882810364116406, + "learning_rate": 1.784091906003177e-05, + "loss": 0.6114, + "step": 13701 + }, + { + "epoch": 0.23676389272878076, + "grad_norm": 0.8605999429461951, + "learning_rate": 1.7840571704218524e-05, + "loss": 0.5614, + "step": 13702 + }, + { + "epoch": 0.23678117224218967, + "grad_norm": 0.6380407172290437, + "learning_rate": 1.784022432384802e-05, + "loss": 0.6282, + "step": 13703 + }, + { + "epoch": 0.23679845175559855, + "grad_norm": 1.5369463253075852, + "learning_rate": 1.7839876918921334e-05, + "loss": 0.6416, + "step": 13704 + }, + { + "epoch": 0.23681573126900746, + "grad_norm": 0.6841866658690717, + "learning_rate": 1.7839529489439557e-05, + "loss": 0.343, + "step": 13705 + }, + { + "epoch": 0.23683301078241636, + "grad_norm": 1.015575926637437, + "learning_rate": 1.7839182035403783e-05, + "loss": 0.5024, + "step": 13706 + }, + { + "epoch": 0.23685029029582527, + "grad_norm": 1.3844265614068665, + "learning_rate": 1.783883455681509e-05, + "loss": 0.4477, + "step": 13707 + }, + { + "epoch": 0.23686756980923418, + "grad_norm": 0.9527285829261654, + "learning_rate": 1.7838487053674574e-05, + "loss": 0.3537, + "step": 13708 + }, + { + "epoch": 0.2368848493226431, + "grad_norm": 1.149897313683006, + "learning_rate": 1.7838139525983318e-05, + "loss": 0.5172, + "step": 13709 + }, + { + "epoch": 0.23690212883605197, + "grad_norm": 0.9496567775490086, + "learning_rate": 1.7837791973742414e-05, + "loss": 0.5397, + "step": 13710 + }, + { + "epoch": 0.23691940834946087, + "grad_norm": 0.9816032009042166, + "learning_rate": 1.7837444396952955e-05, + "loss": 0.5858, + "step": 13711 + }, + { + "epoch": 0.23693668786286978, + "grad_norm": 1.2071062045907648, + "learning_rate": 1.783709679561602e-05, + "loss": 0.4072, + "step": 13712 + }, + { + "epoch": 0.2369539673762787, + "grad_norm": 1.431818386604391, + "learning_rate": 1.7836749169732703e-05, + "loss": 0.6123, + "step": 13713 + }, + { + "epoch": 0.2369712468896876, + "grad_norm": 0.7211118355216692, + "learning_rate": 1.7836401519304093e-05, + "loss": 0.5886, + "step": 13714 + }, + { + "epoch": 0.23698852640309648, + "grad_norm": 1.0834097247011645, + "learning_rate": 1.7836053844331273e-05, + "loss": 0.7163, + "step": 13715 + }, + { + "epoch": 0.23700580591650539, + "grad_norm": 1.242052720003912, + "learning_rate": 1.7835706144815345e-05, + "loss": 0.5501, + "step": 13716 + }, + { + "epoch": 0.2370230854299143, + "grad_norm": 1.0046304855714225, + "learning_rate": 1.7835358420757383e-05, + "loss": 0.5313, + "step": 13717 + }, + { + "epoch": 0.2370403649433232, + "grad_norm": 0.8751623093990176, + "learning_rate": 1.7835010672158488e-05, + "loss": 0.3575, + "step": 13718 + }, + { + "epoch": 0.2370576444567321, + "grad_norm": 0.7648094301774371, + "learning_rate": 1.783466289901974e-05, + "loss": 0.4015, + "step": 13719 + }, + { + "epoch": 0.237074923970141, + "grad_norm": 0.9090619644002348, + "learning_rate": 1.7834315101342234e-05, + "loss": 0.4955, + "step": 13720 + }, + { + "epoch": 0.2370922034835499, + "grad_norm": 1.3235440048229998, + "learning_rate": 1.783396727912706e-05, + "loss": 0.6128, + "step": 13721 + }, + { + "epoch": 0.2371094829969588, + "grad_norm": 0.46946638307802824, + "learning_rate": 1.7833619432375303e-05, + "loss": 0.7938, + "step": 13722 + }, + { + "epoch": 0.2371267625103677, + "grad_norm": 1.2090887438177398, + "learning_rate": 1.7833271561088053e-05, + "loss": 0.6187, + "step": 13723 + }, + { + "epoch": 0.23714404202377662, + "grad_norm": 1.053772775930864, + "learning_rate": 1.7832923665266402e-05, + "loss": 0.5921, + "step": 13724 + }, + { + "epoch": 0.23716132153718553, + "grad_norm": 1.4071228599823815, + "learning_rate": 1.783257574491144e-05, + "loss": 0.7187, + "step": 13725 + }, + { + "epoch": 0.2371786010505944, + "grad_norm": 1.2793740256867525, + "learning_rate": 1.7832227800024254e-05, + "loss": 0.579, + "step": 13726 + }, + { + "epoch": 0.23719588056400331, + "grad_norm": 0.6549527557811972, + "learning_rate": 1.783187983060594e-05, + "loss": 0.4054, + "step": 13727 + }, + { + "epoch": 0.23721316007741222, + "grad_norm": 1.0985475557755024, + "learning_rate": 1.7831531836657575e-05, + "loss": 0.4054, + "step": 13728 + }, + { + "epoch": 0.23723043959082113, + "grad_norm": 1.1135360065720166, + "learning_rate": 1.783118381818026e-05, + "loss": 0.7471, + "step": 13729 + }, + { + "epoch": 0.23724771910423004, + "grad_norm": 0.9810208248291653, + "learning_rate": 1.7830835775175085e-05, + "loss": 0.5094, + "step": 13730 + }, + { + "epoch": 0.23726499861763892, + "grad_norm": 3.7039455731858104, + "learning_rate": 1.7830487707643138e-05, + "loss": 0.5088, + "step": 13731 + }, + { + "epoch": 0.23728227813104782, + "grad_norm": 0.966993127592698, + "learning_rate": 1.7830139615585508e-05, + "loss": 0.4871, + "step": 13732 + }, + { + "epoch": 0.23729955764445673, + "grad_norm": 1.135028390659974, + "learning_rate": 1.7829791499003283e-05, + "loss": 0.5385, + "step": 13733 + }, + { + "epoch": 0.23731683715786564, + "grad_norm": 1.2598085349647858, + "learning_rate": 1.7829443357897556e-05, + "loss": 0.58, + "step": 13734 + }, + { + "epoch": 0.23733411667127455, + "grad_norm": 1.4013527217319277, + "learning_rate": 1.7829095192269416e-05, + "loss": 0.7333, + "step": 13735 + }, + { + "epoch": 0.23735139618468343, + "grad_norm": 1.7307679978407378, + "learning_rate": 1.782874700211996e-05, + "loss": 0.646, + "step": 13736 + }, + { + "epoch": 0.23736867569809234, + "grad_norm": 0.40240759109514085, + "learning_rate": 1.7828398787450273e-05, + "loss": 0.575, + "step": 13737 + }, + { + "epoch": 0.23738595521150124, + "grad_norm": 1.1531502202422046, + "learning_rate": 1.782805054826144e-05, + "loss": 0.4903, + "step": 13738 + }, + { + "epoch": 0.23740323472491015, + "grad_norm": 0.9131014855390522, + "learning_rate": 1.7827702284554563e-05, + "loss": 0.5459, + "step": 13739 + }, + { + "epoch": 0.23742051423831906, + "grad_norm": 1.0580625806834667, + "learning_rate": 1.7827353996330727e-05, + "loss": 0.5122, + "step": 13740 + }, + { + "epoch": 0.23743779375172794, + "grad_norm": 0.731728571940131, + "learning_rate": 1.7827005683591022e-05, + "loss": 0.5341, + "step": 13741 + }, + { + "epoch": 0.23745507326513685, + "grad_norm": 1.1755092108447383, + "learning_rate": 1.7826657346336538e-05, + "loss": 0.8212, + "step": 13742 + }, + { + "epoch": 0.23747235277854575, + "grad_norm": 1.2983532973917575, + "learning_rate": 1.782630898456837e-05, + "loss": 0.4285, + "step": 13743 + }, + { + "epoch": 0.23748963229195466, + "grad_norm": 0.9678577580233108, + "learning_rate": 1.782596059828761e-05, + "loss": 0.6529, + "step": 13744 + }, + { + "epoch": 0.23750691180536357, + "grad_norm": 1.0055365224984891, + "learning_rate": 1.7825612187495348e-05, + "loss": 0.7023, + "step": 13745 + }, + { + "epoch": 0.23752419131877248, + "grad_norm": 1.5421962238284284, + "learning_rate": 1.782526375219267e-05, + "loss": 0.5767, + "step": 13746 + }, + { + "epoch": 0.23754147083218136, + "grad_norm": 1.328609520234532, + "learning_rate": 1.7824915292380673e-05, + "loss": 0.6402, + "step": 13747 + }, + { + "epoch": 0.23755875034559026, + "grad_norm": 1.0905787294030147, + "learning_rate": 1.7824566808060445e-05, + "loss": 0.4902, + "step": 13748 + }, + { + "epoch": 0.23757602985899917, + "grad_norm": 0.7068665312243496, + "learning_rate": 1.782421829923308e-05, + "loss": 0.4624, + "step": 13749 + }, + { + "epoch": 0.23759330937240808, + "grad_norm": 0.9368614393800166, + "learning_rate": 1.7823869765899672e-05, + "loss": 0.5094, + "step": 13750 + }, + { + "epoch": 0.237610588885817, + "grad_norm": 0.5570253412778956, + "learning_rate": 1.7823521208061304e-05, + "loss": 0.6521, + "step": 13751 + }, + { + "epoch": 0.23762786839922587, + "grad_norm": 0.5626872085734498, + "learning_rate": 1.7823172625719073e-05, + "loss": 0.3343, + "step": 13752 + }, + { + "epoch": 0.23764514791263477, + "grad_norm": 0.9853678928019224, + "learning_rate": 1.7822824018874073e-05, + "loss": 0.5852, + "step": 13753 + }, + { + "epoch": 0.23766242742604368, + "grad_norm": 0.47213725035521553, + "learning_rate": 1.7822475387527398e-05, + "loss": 0.8073, + "step": 13754 + }, + { + "epoch": 0.2376797069394526, + "grad_norm": 0.37351743070651366, + "learning_rate": 1.782212673168013e-05, + "loss": 0.6459, + "step": 13755 + }, + { + "epoch": 0.2376969864528615, + "grad_norm": 0.8364203283426088, + "learning_rate": 1.7821778051333367e-05, + "loss": 0.7391, + "step": 13756 + }, + { + "epoch": 0.23771426596627038, + "grad_norm": 1.3548914820774391, + "learning_rate": 1.7821429346488203e-05, + "loss": 0.4955, + "step": 13757 + }, + { + "epoch": 0.23773154547967928, + "grad_norm": 1.0066659737641956, + "learning_rate": 1.782108061714573e-05, + "loss": 0.6532, + "step": 13758 + }, + { + "epoch": 0.2377488249930882, + "grad_norm": 1.0428386278878714, + "learning_rate": 1.782073186330703e-05, + "loss": 0.604, + "step": 13759 + }, + { + "epoch": 0.2377661045064971, + "grad_norm": 1.3997105856179761, + "learning_rate": 1.7820383084973207e-05, + "loss": 0.4458, + "step": 13760 + }, + { + "epoch": 0.237783384019906, + "grad_norm": 1.2776831113926594, + "learning_rate": 1.7820034282145352e-05, + "loss": 0.531, + "step": 13761 + }, + { + "epoch": 0.23780066353331492, + "grad_norm": 0.8330063283364828, + "learning_rate": 1.7819685454824553e-05, + "loss": 0.4499, + "step": 13762 + }, + { + "epoch": 0.2378179430467238, + "grad_norm": 0.9719940282299363, + "learning_rate": 1.781933660301191e-05, + "loss": 0.4431, + "step": 13763 + }, + { + "epoch": 0.2378352225601327, + "grad_norm": 0.8811287516840066, + "learning_rate": 1.7818987726708506e-05, + "loss": 0.6473, + "step": 13764 + }, + { + "epoch": 0.2378525020735416, + "grad_norm": 0.5303763308346, + "learning_rate": 1.7818638825915436e-05, + "loss": 0.8234, + "step": 13765 + }, + { + "epoch": 0.23786978158695052, + "grad_norm": 0.5589427872859408, + "learning_rate": 1.7818289900633797e-05, + "loss": 0.6323, + "step": 13766 + }, + { + "epoch": 0.23788706110035943, + "grad_norm": 1.1914025047524206, + "learning_rate": 1.7817940950864682e-05, + "loss": 0.4611, + "step": 13767 + }, + { + "epoch": 0.2379043406137683, + "grad_norm": 0.8633330277607302, + "learning_rate": 1.781759197660918e-05, + "loss": 0.4283, + "step": 13768 + }, + { + "epoch": 0.2379216201271772, + "grad_norm": 1.4317096975784882, + "learning_rate": 1.7817242977868388e-05, + "loss": 0.6036, + "step": 13769 + }, + { + "epoch": 0.23793889964058612, + "grad_norm": 0.9121456859311051, + "learning_rate": 1.7816893954643394e-05, + "loss": 0.6538, + "step": 13770 + }, + { + "epoch": 0.23795617915399503, + "grad_norm": 1.0975471772640066, + "learning_rate": 1.7816544906935295e-05, + "loss": 0.6642, + "step": 13771 + }, + { + "epoch": 0.23797345866740394, + "grad_norm": 0.7557765748818598, + "learning_rate": 1.7816195834745186e-05, + "loss": 0.5737, + "step": 13772 + }, + { + "epoch": 0.23799073818081282, + "grad_norm": 1.1235549304333803, + "learning_rate": 1.7815846738074155e-05, + "loss": 0.4039, + "step": 13773 + }, + { + "epoch": 0.23800801769422172, + "grad_norm": 1.03412754740054, + "learning_rate": 1.78154976169233e-05, + "loss": 0.5403, + "step": 13774 + }, + { + "epoch": 0.23802529720763063, + "grad_norm": 0.8733099034644786, + "learning_rate": 1.781514847129371e-05, + "loss": 0.5941, + "step": 13775 + }, + { + "epoch": 0.23804257672103954, + "grad_norm": 1.0580951807025527, + "learning_rate": 1.781479930118648e-05, + "loss": 0.5986, + "step": 13776 + }, + { + "epoch": 0.23805985623444845, + "grad_norm": 1.1488024287276195, + "learning_rate": 1.781445010660271e-05, + "loss": 0.7068, + "step": 13777 + }, + { + "epoch": 0.23807713574785733, + "grad_norm": 0.7449451774283935, + "learning_rate": 1.7814100887543483e-05, + "loss": 0.4404, + "step": 13778 + }, + { + "epoch": 0.23809441526126623, + "grad_norm": 1.2628493607756703, + "learning_rate": 1.7813751644009902e-05, + "loss": 0.4403, + "step": 13779 + }, + { + "epoch": 0.23811169477467514, + "grad_norm": 0.4448246923731807, + "learning_rate": 1.781340237600305e-05, + "loss": 0.6526, + "step": 13780 + }, + { + "epoch": 0.23812897428808405, + "grad_norm": 0.9538322972963439, + "learning_rate": 1.7813053083524034e-05, + "loss": 0.4218, + "step": 13781 + }, + { + "epoch": 0.23814625380149296, + "grad_norm": 1.5377604589706952, + "learning_rate": 1.7812703766573943e-05, + "loss": 0.5006, + "step": 13782 + }, + { + "epoch": 0.23816353331490187, + "grad_norm": 0.5464111635432709, + "learning_rate": 1.781235442515387e-05, + "loss": 0.7922, + "step": 13783 + }, + { + "epoch": 0.23818081282831075, + "grad_norm": 1.3258674531684072, + "learning_rate": 1.7812005059264905e-05, + "loss": 0.5907, + "step": 13784 + }, + { + "epoch": 0.23819809234171965, + "grad_norm": 0.9178816628031919, + "learning_rate": 1.781165566890815e-05, + "loss": 0.4531, + "step": 13785 + }, + { + "epoch": 0.23821537185512856, + "grad_norm": 1.0165700208914035, + "learning_rate": 1.7811306254084693e-05, + "loss": 0.7564, + "step": 13786 + }, + { + "epoch": 0.23823265136853747, + "grad_norm": 0.7196923637320669, + "learning_rate": 1.781095681479563e-05, + "loss": 0.4841, + "step": 13787 + }, + { + "epoch": 0.23824993088194638, + "grad_norm": 1.2309000374865875, + "learning_rate": 1.7810607351042062e-05, + "loss": 0.5631, + "step": 13788 + }, + { + "epoch": 0.23826721039535526, + "grad_norm": 1.3900748922522488, + "learning_rate": 1.7810257862825077e-05, + "loss": 0.5049, + "step": 13789 + }, + { + "epoch": 0.23828448990876416, + "grad_norm": 0.49598084776269596, + "learning_rate": 1.7809908350145767e-05, + "loss": 0.8144, + "step": 13790 + }, + { + "epoch": 0.23830176942217307, + "grad_norm": 1.3562564389448357, + "learning_rate": 1.7809558813005233e-05, + "loss": 0.5815, + "step": 13791 + }, + { + "epoch": 0.23831904893558198, + "grad_norm": 1.2697738129343326, + "learning_rate": 1.780920925140457e-05, + "loss": 0.6777, + "step": 13792 + }, + { + "epoch": 0.2383363284489909, + "grad_norm": 0.9966163789187215, + "learning_rate": 1.7808859665344867e-05, + "loss": 0.6405, + "step": 13793 + }, + { + "epoch": 0.23835360796239977, + "grad_norm": 0.9256468678315511, + "learning_rate": 1.780851005482722e-05, + "loss": 0.497, + "step": 13794 + }, + { + "epoch": 0.23837088747580867, + "grad_norm": 1.1655940766764612, + "learning_rate": 1.7808160419852728e-05, + "loss": 0.5988, + "step": 13795 + }, + { + "epoch": 0.23838816698921758, + "grad_norm": 0.9890125295083841, + "learning_rate": 1.7807810760422487e-05, + "loss": 0.701, + "step": 13796 + }, + { + "epoch": 0.2384054465026265, + "grad_norm": 0.895477035803931, + "learning_rate": 1.7807461076537587e-05, + "loss": 0.4942, + "step": 13797 + }, + { + "epoch": 0.2384227260160354, + "grad_norm": 1.1739362058326304, + "learning_rate": 1.7807111368199125e-05, + "loss": 0.6429, + "step": 13798 + }, + { + "epoch": 0.2384400055294443, + "grad_norm": 0.8643103299553271, + "learning_rate": 1.78067616354082e-05, + "loss": 0.5238, + "step": 13799 + }, + { + "epoch": 0.23845728504285318, + "grad_norm": 1.475490490184712, + "learning_rate": 1.78064118781659e-05, + "loss": 0.8018, + "step": 13800 + }, + { + "epoch": 0.2384745645562621, + "grad_norm": 0.8526366187612792, + "learning_rate": 1.7806062096473327e-05, + "loss": 0.5434, + "step": 13801 + }, + { + "epoch": 0.238491844069671, + "grad_norm": 0.8416349340607377, + "learning_rate": 1.7805712290331572e-05, + "loss": 0.5315, + "step": 13802 + }, + { + "epoch": 0.2385091235830799, + "grad_norm": 1.0858859932263893, + "learning_rate": 1.780536245974174e-05, + "loss": 0.5616, + "step": 13803 + }, + { + "epoch": 0.23852640309648881, + "grad_norm": 0.6863209642209378, + "learning_rate": 1.780501260470491e-05, + "loss": 0.3929, + "step": 13804 + }, + { + "epoch": 0.2385436826098977, + "grad_norm": 0.7625062050358946, + "learning_rate": 1.780466272522219e-05, + "loss": 0.4147, + "step": 13805 + }, + { + "epoch": 0.2385609621233066, + "grad_norm": 1.009094621274436, + "learning_rate": 1.7804312821294677e-05, + "loss": 0.6184, + "step": 13806 + }, + { + "epoch": 0.2385782416367155, + "grad_norm": 0.6375577291372446, + "learning_rate": 1.780396289292346e-05, + "loss": 0.3738, + "step": 13807 + }, + { + "epoch": 0.23859552115012442, + "grad_norm": 1.253534272265565, + "learning_rate": 1.7803612940109637e-05, + "loss": 0.7183, + "step": 13808 + }, + { + "epoch": 0.23861280066353333, + "grad_norm": 0.7682750472382183, + "learning_rate": 1.780326296285431e-05, + "loss": 0.5967, + "step": 13809 + }, + { + "epoch": 0.2386300801769422, + "grad_norm": 0.71692827724057, + "learning_rate": 1.780291296115857e-05, + "loss": 0.3931, + "step": 13810 + }, + { + "epoch": 0.2386473596903511, + "grad_norm": 0.832345417761658, + "learning_rate": 1.780256293502351e-05, + "loss": 0.6152, + "step": 13811 + }, + { + "epoch": 0.23866463920376002, + "grad_norm": 0.756319202930976, + "learning_rate": 1.780221288445023e-05, + "loss": 0.5329, + "step": 13812 + }, + { + "epoch": 0.23868191871716893, + "grad_norm": 1.3197628326653068, + "learning_rate": 1.780186280943983e-05, + "loss": 0.6862, + "step": 13813 + }, + { + "epoch": 0.23869919823057784, + "grad_norm": 0.8044363579343586, + "learning_rate": 1.7801512709993396e-05, + "loss": 0.4983, + "step": 13814 + }, + { + "epoch": 0.23871647774398672, + "grad_norm": 0.5649794261514846, + "learning_rate": 1.7801162586112038e-05, + "loss": 0.42, + "step": 13815 + }, + { + "epoch": 0.23873375725739562, + "grad_norm": 1.0451552305081449, + "learning_rate": 1.7800812437796843e-05, + "loss": 0.6051, + "step": 13816 + }, + { + "epoch": 0.23875103677080453, + "grad_norm": 0.3991052773669273, + "learning_rate": 1.780046226504891e-05, + "loss": 0.6301, + "step": 13817 + }, + { + "epoch": 0.23876831628421344, + "grad_norm": 1.4629475269191987, + "learning_rate": 1.780011206786934e-05, + "loss": 0.5618, + "step": 13818 + }, + { + "epoch": 0.23878559579762235, + "grad_norm": 0.9095899311877033, + "learning_rate": 1.7799761846259222e-05, + "loss": 0.604, + "step": 13819 + }, + { + "epoch": 0.23880287531103125, + "grad_norm": 0.7673056110592936, + "learning_rate": 1.779941160021966e-05, + "loss": 0.5459, + "step": 13820 + }, + { + "epoch": 0.23882015482444013, + "grad_norm": 0.49021218565304836, + "learning_rate": 1.7799061329751746e-05, + "loss": 0.6968, + "step": 13821 + }, + { + "epoch": 0.23883743433784904, + "grad_norm": 0.7167340641312188, + "learning_rate": 1.779871103485658e-05, + "loss": 0.4646, + "step": 13822 + }, + { + "epoch": 0.23885471385125795, + "grad_norm": 1.2997041941594498, + "learning_rate": 1.779836071553526e-05, + "loss": 0.6273, + "step": 13823 + }, + { + "epoch": 0.23887199336466686, + "grad_norm": 0.6760108478312282, + "learning_rate": 1.779801037178888e-05, + "loss": 0.6708, + "step": 13824 + }, + { + "epoch": 0.23888927287807576, + "grad_norm": 1.0952675519538653, + "learning_rate": 1.7797660003618536e-05, + "loss": 0.4242, + "step": 13825 + }, + { + "epoch": 0.23890655239148464, + "grad_norm": 0.7815781237492987, + "learning_rate": 1.779730961102533e-05, + "loss": 0.6857, + "step": 13826 + }, + { + "epoch": 0.23892383190489355, + "grad_norm": 0.45129089030196967, + "learning_rate": 1.7796959194010363e-05, + "loss": 0.8447, + "step": 13827 + }, + { + "epoch": 0.23894111141830246, + "grad_norm": 0.9083979172051935, + "learning_rate": 1.7796608752574723e-05, + "loss": 0.6824, + "step": 13828 + }, + { + "epoch": 0.23895839093171137, + "grad_norm": 1.4730688244570187, + "learning_rate": 1.779625828671951e-05, + "loss": 0.5805, + "step": 13829 + }, + { + "epoch": 0.23897567044512028, + "grad_norm": 0.935442602108919, + "learning_rate": 1.7795907796445825e-05, + "loss": 0.5961, + "step": 13830 + }, + { + "epoch": 0.23899294995852916, + "grad_norm": 1.2069307916311174, + "learning_rate": 1.7795557281754768e-05, + "loss": 0.5651, + "step": 13831 + }, + { + "epoch": 0.23901022947193806, + "grad_norm": 1.308429104041792, + "learning_rate": 1.7795206742647428e-05, + "loss": 0.5943, + "step": 13832 + }, + { + "epoch": 0.23902750898534697, + "grad_norm": 0.9921439497839, + "learning_rate": 1.779485617912491e-05, + "loss": 0.5223, + "step": 13833 + }, + { + "epoch": 0.23904478849875588, + "grad_norm": 0.8762552777108514, + "learning_rate": 1.779450559118831e-05, + "loss": 0.6072, + "step": 13834 + }, + { + "epoch": 0.23906206801216479, + "grad_norm": 1.3252768040966936, + "learning_rate": 1.779415497883873e-05, + "loss": 0.5395, + "step": 13835 + }, + { + "epoch": 0.2390793475255737, + "grad_norm": 1.2512782296479974, + "learning_rate": 1.7793804342077258e-05, + "loss": 0.4118, + "step": 13836 + }, + { + "epoch": 0.23909662703898257, + "grad_norm": 0.9147117476513544, + "learning_rate": 1.7793453680905002e-05, + "loss": 0.4769, + "step": 13837 + }, + { + "epoch": 0.23911390655239148, + "grad_norm": 1.5834720699160854, + "learning_rate": 1.7793102995323056e-05, + "loss": 0.5843, + "step": 13838 + }, + { + "epoch": 0.2391311860658004, + "grad_norm": 0.7812686627474037, + "learning_rate": 1.7792752285332517e-05, + "loss": 0.4005, + "step": 13839 + }, + { + "epoch": 0.2391484655792093, + "grad_norm": 0.974327529137748, + "learning_rate": 1.779240155093449e-05, + "loss": 0.3951, + "step": 13840 + }, + { + "epoch": 0.2391657450926182, + "grad_norm": 1.499951590137346, + "learning_rate": 1.7792050792130066e-05, + "loss": 0.5674, + "step": 13841 + }, + { + "epoch": 0.23918302460602708, + "grad_norm": 1.030655486969483, + "learning_rate": 1.7791700008920346e-05, + "loss": 0.6396, + "step": 13842 + }, + { + "epoch": 0.239200304119436, + "grad_norm": 0.9536526610113515, + "learning_rate": 1.7791349201306434e-05, + "loss": 0.3203, + "step": 13843 + }, + { + "epoch": 0.2392175836328449, + "grad_norm": 1.1986212412637356, + "learning_rate": 1.779099836928942e-05, + "loss": 0.7072, + "step": 13844 + }, + { + "epoch": 0.2392348631462538, + "grad_norm": 0.9660509304109816, + "learning_rate": 1.7790647512870407e-05, + "loss": 0.5231, + "step": 13845 + }, + { + "epoch": 0.23925214265966271, + "grad_norm": 2.268758927155082, + "learning_rate": 1.7790296632050497e-05, + "loss": 0.6347, + "step": 13846 + }, + { + "epoch": 0.2392694221730716, + "grad_norm": 0.41803656476579876, + "learning_rate": 1.7789945726830783e-05, + "loss": 0.5627, + "step": 13847 + }, + { + "epoch": 0.2392867016864805, + "grad_norm": 1.3770123253941269, + "learning_rate": 1.7789594797212367e-05, + "loss": 0.4844, + "step": 13848 + }, + { + "epoch": 0.2393039811998894, + "grad_norm": 1.0769374616098353, + "learning_rate": 1.778924384319635e-05, + "loss": 0.5943, + "step": 13849 + }, + { + "epoch": 0.23932126071329832, + "grad_norm": 1.1219534301032938, + "learning_rate": 1.778889286478383e-05, + "loss": 0.6996, + "step": 13850 + }, + { + "epoch": 0.23933854022670722, + "grad_norm": 1.7181889852036727, + "learning_rate": 1.7788541861975902e-05, + "loss": 0.3843, + "step": 13851 + }, + { + "epoch": 0.2393558197401161, + "grad_norm": 1.2322239052884916, + "learning_rate": 1.778819083477367e-05, + "loss": 0.5935, + "step": 13852 + }, + { + "epoch": 0.239373099253525, + "grad_norm": 0.913894510532316, + "learning_rate": 1.7787839783178236e-05, + "loss": 0.4067, + "step": 13853 + }, + { + "epoch": 0.23939037876693392, + "grad_norm": 0.9664609529405079, + "learning_rate": 1.7787488707190694e-05, + "loss": 0.5106, + "step": 13854 + }, + { + "epoch": 0.23940765828034283, + "grad_norm": 0.9479373327507951, + "learning_rate": 1.7787137606812143e-05, + "loss": 0.3164, + "step": 13855 + }, + { + "epoch": 0.23942493779375174, + "grad_norm": 0.962220101993663, + "learning_rate": 1.7786786482043688e-05, + "loss": 0.6071, + "step": 13856 + }, + { + "epoch": 0.23944221730716064, + "grad_norm": 0.8089025626095423, + "learning_rate": 1.7786435332886424e-05, + "loss": 0.5332, + "step": 13857 + }, + { + "epoch": 0.23945949682056952, + "grad_norm": 0.6543779457946576, + "learning_rate": 1.7786084159341455e-05, + "loss": 0.4254, + "step": 13858 + }, + { + "epoch": 0.23947677633397843, + "grad_norm": 0.970086587174185, + "learning_rate": 1.7785732961409875e-05, + "loss": 0.5236, + "step": 13859 + }, + { + "epoch": 0.23949405584738734, + "grad_norm": 1.5022238676568234, + "learning_rate": 1.7785381739092793e-05, + "loss": 0.4396, + "step": 13860 + }, + { + "epoch": 0.23951133536079625, + "grad_norm": 0.7432780535914525, + "learning_rate": 1.77850304923913e-05, + "loss": 0.421, + "step": 13861 + }, + { + "epoch": 0.23952861487420515, + "grad_norm": 1.3175624693454169, + "learning_rate": 1.7784679221306502e-05, + "loss": 0.4684, + "step": 13862 + }, + { + "epoch": 0.23954589438761403, + "grad_norm": 0.7893372881489414, + "learning_rate": 1.7784327925839496e-05, + "loss": 0.7686, + "step": 13863 + }, + { + "epoch": 0.23956317390102294, + "grad_norm": 1.6109423252159782, + "learning_rate": 1.778397660599138e-05, + "loss": 0.5742, + "step": 13864 + }, + { + "epoch": 0.23958045341443185, + "grad_norm": 0.7719195941612295, + "learning_rate": 1.7783625261763263e-05, + "loss": 0.3547, + "step": 13865 + }, + { + "epoch": 0.23959773292784076, + "grad_norm": 1.1897475683123724, + "learning_rate": 1.778327389315624e-05, + "loss": 0.3876, + "step": 13866 + }, + { + "epoch": 0.23961501244124966, + "grad_norm": 0.4302702299848043, + "learning_rate": 1.778292250017141e-05, + "loss": 0.4547, + "step": 13867 + }, + { + "epoch": 0.23963229195465854, + "grad_norm": 1.2254524747069022, + "learning_rate": 1.7782571082809873e-05, + "loss": 0.492, + "step": 13868 + }, + { + "epoch": 0.23964957146806745, + "grad_norm": 1.1045746093972941, + "learning_rate": 1.7782219641072735e-05, + "loss": 0.5906, + "step": 13869 + }, + { + "epoch": 0.23966685098147636, + "grad_norm": 1.3829813136218714, + "learning_rate": 1.7781868174961092e-05, + "loss": 0.665, + "step": 13870 + }, + { + "epoch": 0.23968413049488527, + "grad_norm": 1.1125427111014843, + "learning_rate": 1.778151668447605e-05, + "loss": 0.6317, + "step": 13871 + }, + { + "epoch": 0.23970141000829417, + "grad_norm": 1.5536999250842753, + "learning_rate": 1.7781165169618703e-05, + "loss": 0.5656, + "step": 13872 + }, + { + "epoch": 0.23971868952170308, + "grad_norm": 1.1385138389060228, + "learning_rate": 1.7780813630390152e-05, + "loss": 0.5047, + "step": 13873 + }, + { + "epoch": 0.23973596903511196, + "grad_norm": 0.9899096163864005, + "learning_rate": 1.7780462066791505e-05, + "loss": 0.526, + "step": 13874 + }, + { + "epoch": 0.23975324854852087, + "grad_norm": 0.843890950030548, + "learning_rate": 1.778011047882386e-05, + "loss": 0.756, + "step": 13875 + }, + { + "epoch": 0.23977052806192978, + "grad_norm": 1.7121176049092437, + "learning_rate": 1.777975886648832e-05, + "loss": 0.5738, + "step": 13876 + }, + { + "epoch": 0.23978780757533869, + "grad_norm": 1.2943767523678693, + "learning_rate": 1.777940722978598e-05, + "loss": 0.6052, + "step": 13877 + }, + { + "epoch": 0.2398050870887476, + "grad_norm": 1.058096085403308, + "learning_rate": 1.7779055568717945e-05, + "loss": 0.5564, + "step": 13878 + }, + { + "epoch": 0.23982236660215647, + "grad_norm": 1.0417270970929375, + "learning_rate": 1.777870388328532e-05, + "loss": 0.6324, + "step": 13879 + }, + { + "epoch": 0.23983964611556538, + "grad_norm": 0.7096371986471446, + "learning_rate": 1.77783521734892e-05, + "loss": 0.3309, + "step": 13880 + }, + { + "epoch": 0.2398569256289743, + "grad_norm": 0.45881531515746915, + "learning_rate": 1.7778000439330692e-05, + "loss": 0.7085, + "step": 13881 + }, + { + "epoch": 0.2398742051423832, + "grad_norm": 1.0982836795647577, + "learning_rate": 1.77776486808109e-05, + "loss": 0.6032, + "step": 13882 + }, + { + "epoch": 0.2398914846557921, + "grad_norm": 0.9621473964625933, + "learning_rate": 1.7777296897930912e-05, + "loss": 0.6101, + "step": 13883 + }, + { + "epoch": 0.23990876416920098, + "grad_norm": 1.3444166756846887, + "learning_rate": 1.7776945090691844e-05, + "loss": 0.6465, + "step": 13884 + }, + { + "epoch": 0.2399260436826099, + "grad_norm": 1.2168713895105736, + "learning_rate": 1.777659325909479e-05, + "loss": 0.6035, + "step": 13885 + }, + { + "epoch": 0.2399433231960188, + "grad_norm": 1.4462187699177649, + "learning_rate": 1.7776241403140862e-05, + "loss": 0.7588, + "step": 13886 + }, + { + "epoch": 0.2399606027094277, + "grad_norm": 1.2978209422025928, + "learning_rate": 1.7775889522831148e-05, + "loss": 0.4394, + "step": 13887 + }, + { + "epoch": 0.2399778822228366, + "grad_norm": 1.0090601200872242, + "learning_rate": 1.777553761816676e-05, + "loss": 0.4927, + "step": 13888 + }, + { + "epoch": 0.2399951617362455, + "grad_norm": 1.1773751133033505, + "learning_rate": 1.7775185689148795e-05, + "loss": 0.661, + "step": 13889 + }, + { + "epoch": 0.2400124412496544, + "grad_norm": 0.8385076445181446, + "learning_rate": 1.777483373577836e-05, + "loss": 0.6549, + "step": 13890 + }, + { + "epoch": 0.2400297207630633, + "grad_norm": 1.0830065500606934, + "learning_rate": 1.7774481758056553e-05, + "loss": 0.4876, + "step": 13891 + }, + { + "epoch": 0.24004700027647222, + "grad_norm": 1.5216974303838353, + "learning_rate": 1.777412975598448e-05, + "loss": 0.5118, + "step": 13892 + }, + { + "epoch": 0.24006427978988112, + "grad_norm": 0.7819843521815084, + "learning_rate": 1.7773777729563243e-05, + "loss": 0.5998, + "step": 13893 + }, + { + "epoch": 0.24008155930329003, + "grad_norm": 0.8984826487970639, + "learning_rate": 1.7773425678793942e-05, + "loss": 0.4878, + "step": 13894 + }, + { + "epoch": 0.2400988388166989, + "grad_norm": 1.0097248061033166, + "learning_rate": 1.7773073603677678e-05, + "loss": 0.4026, + "step": 13895 + }, + { + "epoch": 0.24011611833010782, + "grad_norm": 0.461098598029719, + "learning_rate": 1.777272150421556e-05, + "loss": 0.6623, + "step": 13896 + }, + { + "epoch": 0.24013339784351673, + "grad_norm": 0.3666155256800063, + "learning_rate": 1.7772369380408688e-05, + "loss": 0.5695, + "step": 13897 + }, + { + "epoch": 0.24015067735692563, + "grad_norm": 1.1266353690896376, + "learning_rate": 1.777201723225816e-05, + "loss": 0.314, + "step": 13898 + }, + { + "epoch": 0.24016795687033454, + "grad_norm": 1.1439330033139608, + "learning_rate": 1.777166505976509e-05, + "loss": 0.5832, + "step": 13899 + }, + { + "epoch": 0.24018523638374342, + "grad_norm": 1.1892288406091553, + "learning_rate": 1.777131286293057e-05, + "loss": 0.717, + "step": 13900 + }, + { + "epoch": 0.24020251589715233, + "grad_norm": 1.1178436628405797, + "learning_rate": 1.777096064175571e-05, + "loss": 0.6937, + "step": 13901 + }, + { + "epoch": 0.24021979541056124, + "grad_norm": 0.7615315829864123, + "learning_rate": 1.7770608396241607e-05, + "loss": 0.5471, + "step": 13902 + }, + { + "epoch": 0.24023707492397015, + "grad_norm": 1.2359765905390443, + "learning_rate": 1.7770256126389372e-05, + "loss": 0.7174, + "step": 13903 + }, + { + "epoch": 0.24025435443737905, + "grad_norm": 1.1603517485053207, + "learning_rate": 1.77699038322001e-05, + "loss": 0.4479, + "step": 13904 + }, + { + "epoch": 0.24027163395078793, + "grad_norm": 0.9458991053305903, + "learning_rate": 1.7769551513674904e-05, + "loss": 0.6881, + "step": 13905 + }, + { + "epoch": 0.24028891346419684, + "grad_norm": 0.357507441917608, + "learning_rate": 1.7769199170814876e-05, + "loss": 0.4969, + "step": 13906 + }, + { + "epoch": 0.24030619297760575, + "grad_norm": 1.32773777806017, + "learning_rate": 1.776884680362113e-05, + "loss": 0.6454, + "step": 13907 + }, + { + "epoch": 0.24032347249101466, + "grad_norm": 1.113797620140278, + "learning_rate": 1.7768494412094764e-05, + "loss": 0.5705, + "step": 13908 + }, + { + "epoch": 0.24034075200442356, + "grad_norm": 1.1599096456113718, + "learning_rate": 1.7768141996236885e-05, + "loss": 0.5391, + "step": 13909 + }, + { + "epoch": 0.24035803151783247, + "grad_norm": 1.3586267262439102, + "learning_rate": 1.7767789556048594e-05, + "loss": 0.488, + "step": 13910 + }, + { + "epoch": 0.24037531103124135, + "grad_norm": 1.1751043900614795, + "learning_rate": 1.7767437091530993e-05, + "loss": 0.464, + "step": 13911 + }, + { + "epoch": 0.24039259054465026, + "grad_norm": 0.8953588048449497, + "learning_rate": 1.7767084602685193e-05, + "loss": 0.6394, + "step": 13912 + }, + { + "epoch": 0.24040987005805917, + "grad_norm": 2.1699974981013543, + "learning_rate": 1.7766732089512292e-05, + "loss": 0.7124, + "step": 13913 + }, + { + "epoch": 0.24042714957146807, + "grad_norm": 1.1392688202188772, + "learning_rate": 1.7766379552013397e-05, + "loss": 0.6101, + "step": 13914 + }, + { + "epoch": 0.24044442908487698, + "grad_norm": 0.8577021472343554, + "learning_rate": 1.7766026990189606e-05, + "loss": 0.4221, + "step": 13915 + }, + { + "epoch": 0.24046170859828586, + "grad_norm": 1.97821739061017, + "learning_rate": 1.7765674404042035e-05, + "loss": 0.5045, + "step": 13916 + }, + { + "epoch": 0.24047898811169477, + "grad_norm": 0.803750417221399, + "learning_rate": 1.7765321793571777e-05, + "loss": 0.6562, + "step": 13917 + }, + { + "epoch": 0.24049626762510368, + "grad_norm": 1.235891683392304, + "learning_rate": 1.7764969158779943e-05, + "loss": 0.5468, + "step": 13918 + }, + { + "epoch": 0.24051354713851258, + "grad_norm": 1.043190106287117, + "learning_rate": 1.7764616499667636e-05, + "loss": 0.4024, + "step": 13919 + }, + { + "epoch": 0.2405308266519215, + "grad_norm": 0.6829429063651717, + "learning_rate": 1.776426381623596e-05, + "loss": 0.2962, + "step": 13920 + }, + { + "epoch": 0.24054810616533037, + "grad_norm": 0.7248416105039773, + "learning_rate": 1.7763911108486016e-05, + "loss": 0.4384, + "step": 13921 + }, + { + "epoch": 0.24056538567873928, + "grad_norm": 1.0699644638100605, + "learning_rate": 1.7763558376418917e-05, + "loss": 0.5706, + "step": 13922 + }, + { + "epoch": 0.2405826651921482, + "grad_norm": 1.0715956627287087, + "learning_rate": 1.776320562003576e-05, + "loss": 0.5547, + "step": 13923 + }, + { + "epoch": 0.2405999447055571, + "grad_norm": 2.101254822292212, + "learning_rate": 1.7762852839337654e-05, + "loss": 0.6757, + "step": 13924 + }, + { + "epoch": 0.240617224218966, + "grad_norm": 0.8458829155778272, + "learning_rate": 1.7762500034325703e-05, + "loss": 0.3343, + "step": 13925 + }, + { + "epoch": 0.24063450373237488, + "grad_norm": 1.2631516213687495, + "learning_rate": 1.7762147205001014e-05, + "loss": 0.5937, + "step": 13926 + }, + { + "epoch": 0.2406517832457838, + "grad_norm": 0.9714692608936788, + "learning_rate": 1.7761794351364686e-05, + "loss": 0.5824, + "step": 13927 + }, + { + "epoch": 0.2406690627591927, + "grad_norm": 1.5264465813272299, + "learning_rate": 1.7761441473417832e-05, + "loss": 0.692, + "step": 13928 + }, + { + "epoch": 0.2406863422726016, + "grad_norm": 1.1850808407185793, + "learning_rate": 1.7761088571161552e-05, + "loss": 0.471, + "step": 13929 + }, + { + "epoch": 0.2407036217860105, + "grad_norm": 1.3531360582318412, + "learning_rate": 1.7760735644596952e-05, + "loss": 0.7014, + "step": 13930 + }, + { + "epoch": 0.24072090129941942, + "grad_norm": 1.1643863247213944, + "learning_rate": 1.7760382693725138e-05, + "loss": 0.5521, + "step": 13931 + }, + { + "epoch": 0.2407381808128283, + "grad_norm": 1.0510822191548772, + "learning_rate": 1.776002971854722e-05, + "loss": 0.6865, + "step": 13932 + }, + { + "epoch": 0.2407554603262372, + "grad_norm": 0.6942690914776293, + "learning_rate": 1.7759676719064294e-05, + "loss": 0.5998, + "step": 13933 + }, + { + "epoch": 0.24077273983964612, + "grad_norm": 1.3445380377980913, + "learning_rate": 1.775932369527747e-05, + "loss": 0.6591, + "step": 13934 + }, + { + "epoch": 0.24079001935305502, + "grad_norm": 0.7547897455709076, + "learning_rate": 1.7758970647187858e-05, + "loss": 0.6554, + "step": 13935 + }, + { + "epoch": 0.24080729886646393, + "grad_norm": 0.8359359968057581, + "learning_rate": 1.775861757479656e-05, + "loss": 0.5046, + "step": 13936 + }, + { + "epoch": 0.2408245783798728, + "grad_norm": 0.7890154362595253, + "learning_rate": 1.7758264478104678e-05, + "loss": 0.5197, + "step": 13937 + }, + { + "epoch": 0.24084185789328172, + "grad_norm": 0.7056007668216536, + "learning_rate": 1.7757911357113327e-05, + "loss": 0.5533, + "step": 13938 + }, + { + "epoch": 0.24085913740669063, + "grad_norm": 0.4029067100191667, + "learning_rate": 1.7757558211823607e-05, + "loss": 0.821, + "step": 13939 + }, + { + "epoch": 0.24087641692009953, + "grad_norm": 0.9500884247842835, + "learning_rate": 1.7757205042236624e-05, + "loss": 0.6954, + "step": 13940 + }, + { + "epoch": 0.24089369643350844, + "grad_norm": 1.1118971131602002, + "learning_rate": 1.7756851848353487e-05, + "loss": 0.5421, + "step": 13941 + }, + { + "epoch": 0.24091097594691732, + "grad_norm": 0.8736825462007802, + "learning_rate": 1.77564986301753e-05, + "loss": 0.4614, + "step": 13942 + }, + { + "epoch": 0.24092825546032623, + "grad_norm": 0.8215821773821985, + "learning_rate": 1.775614538770317e-05, + "loss": 0.4901, + "step": 13943 + }, + { + "epoch": 0.24094553497373514, + "grad_norm": 0.6915416499685133, + "learning_rate": 1.77557921209382e-05, + "loss": 0.5312, + "step": 13944 + }, + { + "epoch": 0.24096281448714404, + "grad_norm": 0.5870191529625057, + "learning_rate": 1.7755438829881503e-05, + "loss": 0.4344, + "step": 13945 + }, + { + "epoch": 0.24098009400055295, + "grad_norm": 1.4998425662141395, + "learning_rate": 1.775508551453418e-05, + "loss": 0.5717, + "step": 13946 + }, + { + "epoch": 0.24099737351396186, + "grad_norm": 1.3099944772999284, + "learning_rate": 1.7754732174897344e-05, + "loss": 0.7042, + "step": 13947 + }, + { + "epoch": 0.24101465302737074, + "grad_norm": 0.7818855849369103, + "learning_rate": 1.7754378810972092e-05, + "loss": 0.4417, + "step": 13948 + }, + { + "epoch": 0.24103193254077965, + "grad_norm": 1.6451036475592915, + "learning_rate": 1.7754025422759536e-05, + "loss": 0.4449, + "step": 13949 + }, + { + "epoch": 0.24104921205418856, + "grad_norm": 1.2166270683676874, + "learning_rate": 1.775367201026079e-05, + "loss": 0.4095, + "step": 13950 + }, + { + "epoch": 0.24106649156759746, + "grad_norm": 1.8154451300873493, + "learning_rate": 1.7753318573476946e-05, + "loss": 0.4671, + "step": 13951 + }, + { + "epoch": 0.24108377108100637, + "grad_norm": 1.100038162062855, + "learning_rate": 1.7752965112409128e-05, + "loss": 0.4597, + "step": 13952 + }, + { + "epoch": 0.24110105059441525, + "grad_norm": 1.1472687637276777, + "learning_rate": 1.7752611627058427e-05, + "loss": 0.4799, + "step": 13953 + }, + { + "epoch": 0.24111833010782416, + "grad_norm": 1.2018869350374843, + "learning_rate": 1.7752258117425957e-05, + "loss": 0.521, + "step": 13954 + }, + { + "epoch": 0.24113560962123307, + "grad_norm": 0.9554459145775992, + "learning_rate": 1.7751904583512828e-05, + "loss": 0.5628, + "step": 13955 + }, + { + "epoch": 0.24115288913464197, + "grad_norm": 0.7597353176968231, + "learning_rate": 1.7751551025320146e-05, + "loss": 0.3999, + "step": 13956 + }, + { + "epoch": 0.24117016864805088, + "grad_norm": 1.0159454661552698, + "learning_rate": 1.7751197442849017e-05, + "loss": 0.3655, + "step": 13957 + }, + { + "epoch": 0.24118744816145976, + "grad_norm": 1.0141211000859875, + "learning_rate": 1.7750843836100544e-05, + "loss": 0.6473, + "step": 13958 + }, + { + "epoch": 0.24120472767486867, + "grad_norm": 0.832683223497829, + "learning_rate": 1.7750490205075843e-05, + "loss": 0.3884, + "step": 13959 + }, + { + "epoch": 0.24122200718827758, + "grad_norm": 1.5238894631915012, + "learning_rate": 1.7750136549776014e-05, + "loss": 0.7152, + "step": 13960 + }, + { + "epoch": 0.24123928670168648, + "grad_norm": 1.3004050259766555, + "learning_rate": 1.7749782870202173e-05, + "loss": 0.6392, + "step": 13961 + }, + { + "epoch": 0.2412565662150954, + "grad_norm": 1.2850739599314107, + "learning_rate": 1.774942916635542e-05, + "loss": 0.4137, + "step": 13962 + }, + { + "epoch": 0.24127384572850427, + "grad_norm": 0.7060139092128929, + "learning_rate": 1.774907543823687e-05, + "loss": 0.5389, + "step": 13963 + }, + { + "epoch": 0.24129112524191318, + "grad_norm": 0.8707880186532755, + "learning_rate": 1.774872168584762e-05, + "loss": 0.3776, + "step": 13964 + }, + { + "epoch": 0.2413084047553221, + "grad_norm": 1.254141155046156, + "learning_rate": 1.774836790918879e-05, + "loss": 0.5485, + "step": 13965 + }, + { + "epoch": 0.241325684268731, + "grad_norm": 0.871676571060705, + "learning_rate": 1.7748014108261482e-05, + "loss": 0.5164, + "step": 13966 + }, + { + "epoch": 0.2413429637821399, + "grad_norm": 1.7283411720508117, + "learning_rate": 1.7747660283066803e-05, + "loss": 0.4567, + "step": 13967 + }, + { + "epoch": 0.2413602432955488, + "grad_norm": 0.8418875508944864, + "learning_rate": 1.7747306433605868e-05, + "loss": 0.5835, + "step": 13968 + }, + { + "epoch": 0.2413775228089577, + "grad_norm": 0.7058543874260211, + "learning_rate": 1.7746952559879776e-05, + "loss": 0.3797, + "step": 13969 + }, + { + "epoch": 0.2413948023223666, + "grad_norm": 1.2547256351289877, + "learning_rate": 1.774659866188964e-05, + "loss": 0.6848, + "step": 13970 + }, + { + "epoch": 0.2414120818357755, + "grad_norm": 0.9846282720836952, + "learning_rate": 1.774624473963657e-05, + "loss": 0.6, + "step": 13971 + }, + { + "epoch": 0.2414293613491844, + "grad_norm": 0.9152173036242837, + "learning_rate": 1.774589079312167e-05, + "loss": 0.6579, + "step": 13972 + }, + { + "epoch": 0.24144664086259332, + "grad_norm": 0.7576738626989261, + "learning_rate": 1.7745536822346053e-05, + "loss": 0.6281, + "step": 13973 + }, + { + "epoch": 0.2414639203760022, + "grad_norm": 1.017226249873109, + "learning_rate": 1.7745182827310827e-05, + "loss": 0.6177, + "step": 13974 + }, + { + "epoch": 0.2414811998894111, + "grad_norm": 0.7112660060765523, + "learning_rate": 1.7744828808017102e-05, + "loss": 0.3761, + "step": 13975 + }, + { + "epoch": 0.24149847940282002, + "grad_norm": 1.3414983884266398, + "learning_rate": 1.7744474764465983e-05, + "loss": 0.5361, + "step": 13976 + }, + { + "epoch": 0.24151575891622892, + "grad_norm": 1.242674494859236, + "learning_rate": 1.7744120696658575e-05, + "loss": 0.4916, + "step": 13977 + }, + { + "epoch": 0.24153303842963783, + "grad_norm": 1.0364013569292558, + "learning_rate": 1.7743766604596e-05, + "loss": 0.712, + "step": 13978 + }, + { + "epoch": 0.2415503179430467, + "grad_norm": 1.9990494535439758, + "learning_rate": 1.7743412488279355e-05, + "loss": 0.7532, + "step": 13979 + }, + { + "epoch": 0.24156759745645562, + "grad_norm": 0.6545948313122845, + "learning_rate": 1.7743058347709754e-05, + "loss": 0.4575, + "step": 13980 + }, + { + "epoch": 0.24158487696986453, + "grad_norm": 1.1325645395657489, + "learning_rate": 1.7742704182888308e-05, + "loss": 0.7035, + "step": 13981 + }, + { + "epoch": 0.24160215648327343, + "grad_norm": 1.2083130602465344, + "learning_rate": 1.7742349993816122e-05, + "loss": 0.4672, + "step": 13982 + }, + { + "epoch": 0.24161943599668234, + "grad_norm": 0.6136661299580575, + "learning_rate": 1.7741995780494308e-05, + "loss": 0.3609, + "step": 13983 + }, + { + "epoch": 0.24163671551009125, + "grad_norm": 1.6427664165617124, + "learning_rate": 1.7741641542923974e-05, + "loss": 0.5037, + "step": 13984 + }, + { + "epoch": 0.24165399502350013, + "grad_norm": 1.8323120300630662, + "learning_rate": 1.774128728110623e-05, + "loss": 0.47, + "step": 13985 + }, + { + "epoch": 0.24167127453690904, + "grad_norm": 0.8313982657811085, + "learning_rate": 1.774093299504219e-05, + "loss": 0.3702, + "step": 13986 + }, + { + "epoch": 0.24168855405031794, + "grad_norm": 0.9632392825421048, + "learning_rate": 1.7740578684732955e-05, + "loss": 0.6181, + "step": 13987 + }, + { + "epoch": 0.24170583356372685, + "grad_norm": 0.8150001294641182, + "learning_rate": 1.7740224350179642e-05, + "loss": 0.4312, + "step": 13988 + }, + { + "epoch": 0.24172311307713576, + "grad_norm": 0.654644489323144, + "learning_rate": 1.7739869991383355e-05, + "loss": 0.6115, + "step": 13989 + }, + { + "epoch": 0.24174039259054464, + "grad_norm": 0.7399647026208335, + "learning_rate": 1.773951560834521e-05, + "loss": 0.503, + "step": 13990 + }, + { + "epoch": 0.24175767210395355, + "grad_norm": 0.4149307536531977, + "learning_rate": 1.7739161201066314e-05, + "loss": 0.6651, + "step": 13991 + }, + { + "epoch": 0.24177495161736245, + "grad_norm": 0.6369984321929831, + "learning_rate": 1.7738806769547776e-05, + "loss": 0.3708, + "step": 13992 + }, + { + "epoch": 0.24179223113077136, + "grad_norm": 1.235985740781452, + "learning_rate": 1.773845231379071e-05, + "loss": 0.4193, + "step": 13993 + }, + { + "epoch": 0.24180951064418027, + "grad_norm": 0.7270855658942397, + "learning_rate": 1.7738097833796218e-05, + "loss": 0.5072, + "step": 13994 + }, + { + "epoch": 0.24182679015758915, + "grad_norm": 0.9438847972145428, + "learning_rate": 1.7737743329565414e-05, + "loss": 0.4574, + "step": 13995 + }, + { + "epoch": 0.24184406967099806, + "grad_norm": 0.8351977046108446, + "learning_rate": 1.773738880109942e-05, + "loss": 0.356, + "step": 13996 + }, + { + "epoch": 0.24186134918440697, + "grad_norm": 1.4614458342887529, + "learning_rate": 1.7737034248399326e-05, + "loss": 0.7532, + "step": 13997 + }, + { + "epoch": 0.24187862869781587, + "grad_norm": 1.0552353634938099, + "learning_rate": 1.7736679671466258e-05, + "loss": 0.6647, + "step": 13998 + }, + { + "epoch": 0.24189590821122478, + "grad_norm": 1.2930670196419107, + "learning_rate": 1.773632507030132e-05, + "loss": 0.6111, + "step": 13999 + }, + { + "epoch": 0.2419131877246337, + "grad_norm": 0.8775739604228137, + "learning_rate": 1.7735970444905623e-05, + "loss": 0.4414, + "step": 14000 + }, + { + "epoch": 0.24193046723804257, + "grad_norm": 0.7448406563688842, + "learning_rate": 1.773561579528028e-05, + "loss": 0.6514, + "step": 14001 + }, + { + "epoch": 0.24194774675145148, + "grad_norm": 0.8407995195657124, + "learning_rate": 1.77352611214264e-05, + "loss": 0.664, + "step": 14002 + }, + { + "epoch": 0.24196502626486038, + "grad_norm": 1.389446444018631, + "learning_rate": 1.7734906423345097e-05, + "loss": 0.6901, + "step": 14003 + }, + { + "epoch": 0.2419823057782693, + "grad_norm": 0.8320317370391761, + "learning_rate": 1.7734551701037475e-05, + "loss": 0.4013, + "step": 14004 + }, + { + "epoch": 0.2419995852916782, + "grad_norm": 0.8698270482115531, + "learning_rate": 1.773419695450465e-05, + "loss": 0.5337, + "step": 14005 + }, + { + "epoch": 0.24201686480508708, + "grad_norm": 0.8339987534411009, + "learning_rate": 1.7733842183747732e-05, + "loss": 0.3796, + "step": 14006 + }, + { + "epoch": 0.242034144318496, + "grad_norm": 1.0726687958629726, + "learning_rate": 1.7733487388767835e-05, + "loss": 0.3833, + "step": 14007 + }, + { + "epoch": 0.2420514238319049, + "grad_norm": 1.1057284640402818, + "learning_rate": 1.7733132569566067e-05, + "loss": 0.393, + "step": 14008 + }, + { + "epoch": 0.2420687033453138, + "grad_norm": 0.4915395288727515, + "learning_rate": 1.773277772614354e-05, + "loss": 0.6089, + "step": 14009 + }, + { + "epoch": 0.2420859828587227, + "grad_norm": 1.0075080276112454, + "learning_rate": 1.7732422858501363e-05, + "loss": 0.5105, + "step": 14010 + }, + { + "epoch": 0.2421032623721316, + "grad_norm": 1.0827660159393087, + "learning_rate": 1.7732067966640653e-05, + "loss": 0.5231, + "step": 14011 + }, + { + "epoch": 0.2421205418855405, + "grad_norm": 0.8614026964576035, + "learning_rate": 1.7731713050562518e-05, + "loss": 0.6512, + "step": 14012 + }, + { + "epoch": 0.2421378213989494, + "grad_norm": 1.3153653095483802, + "learning_rate": 1.773135811026807e-05, + "loss": 0.4998, + "step": 14013 + }, + { + "epoch": 0.2421551009123583, + "grad_norm": 1.1090477919532225, + "learning_rate": 1.7731003145758417e-05, + "loss": 0.3672, + "step": 14014 + }, + { + "epoch": 0.24217238042576722, + "grad_norm": 1.2920083705150391, + "learning_rate": 1.773064815703468e-05, + "loss": 0.6631, + "step": 14015 + }, + { + "epoch": 0.2421896599391761, + "grad_norm": 0.8879690900860533, + "learning_rate": 1.7730293144097966e-05, + "loss": 0.302, + "step": 14016 + }, + { + "epoch": 0.242206939452585, + "grad_norm": 1.4772804236370782, + "learning_rate": 1.7729938106949384e-05, + "loss": 0.7092, + "step": 14017 + }, + { + "epoch": 0.24222421896599391, + "grad_norm": 1.5323523091442586, + "learning_rate": 1.7729583045590048e-05, + "loss": 0.7161, + "step": 14018 + }, + { + "epoch": 0.24224149847940282, + "grad_norm": 1.0924144148344581, + "learning_rate": 1.7729227960021068e-05, + "loss": 0.3592, + "step": 14019 + }, + { + "epoch": 0.24225877799281173, + "grad_norm": 0.7891815853208103, + "learning_rate": 1.7728872850243564e-05, + "loss": 0.5174, + "step": 14020 + }, + { + "epoch": 0.24227605750622064, + "grad_norm": 1.4549309157223784, + "learning_rate": 1.772851771625864e-05, + "loss": 0.6318, + "step": 14021 + }, + { + "epoch": 0.24229333701962952, + "grad_norm": 1.3160414438249166, + "learning_rate": 1.7728162558067414e-05, + "loss": 0.5666, + "step": 14022 + }, + { + "epoch": 0.24231061653303843, + "grad_norm": 1.2430344825119846, + "learning_rate": 1.772780737567099e-05, + "loss": 0.5769, + "step": 14023 + }, + { + "epoch": 0.24232789604644733, + "grad_norm": 1.1093340867682342, + "learning_rate": 1.7727452169070488e-05, + "loss": 0.6523, + "step": 14024 + }, + { + "epoch": 0.24234517555985624, + "grad_norm": 1.1132553313985587, + "learning_rate": 1.7727096938267023e-05, + "loss": 0.5644, + "step": 14025 + }, + { + "epoch": 0.24236245507326515, + "grad_norm": 1.0606034038854455, + "learning_rate": 1.7726741683261698e-05, + "loss": 0.5863, + "step": 14026 + }, + { + "epoch": 0.24237973458667403, + "grad_norm": 1.0558299306826162, + "learning_rate": 1.7726386404055633e-05, + "loss": 0.6053, + "step": 14027 + }, + { + "epoch": 0.24239701410008294, + "grad_norm": 0.9374728806200225, + "learning_rate": 1.7726031100649938e-05, + "loss": 0.5375, + "step": 14028 + }, + { + "epoch": 0.24241429361349184, + "grad_norm": 0.9928605531589324, + "learning_rate": 1.7725675773045728e-05, + "loss": 0.3645, + "step": 14029 + }, + { + "epoch": 0.24243157312690075, + "grad_norm": 0.5773909108648103, + "learning_rate": 1.772532042124411e-05, + "loss": 0.911, + "step": 14030 + }, + { + "epoch": 0.24244885264030966, + "grad_norm": 0.9752331519376364, + "learning_rate": 1.7724965045246208e-05, + "loss": 0.5797, + "step": 14031 + }, + { + "epoch": 0.24246613215371854, + "grad_norm": 0.7753663205785112, + "learning_rate": 1.7724609645053124e-05, + "loss": 0.5631, + "step": 14032 + }, + { + "epoch": 0.24248341166712745, + "grad_norm": 1.1852594718214375, + "learning_rate": 1.7724254220665974e-05, + "loss": 0.475, + "step": 14033 + }, + { + "epoch": 0.24250069118053635, + "grad_norm": 0.830413602959384, + "learning_rate": 1.7723898772085876e-05, + "loss": 0.6629, + "step": 14034 + }, + { + "epoch": 0.24251797069394526, + "grad_norm": 0.7730730999009179, + "learning_rate": 1.772354329931394e-05, + "loss": 0.5974, + "step": 14035 + }, + { + "epoch": 0.24253525020735417, + "grad_norm": 1.4263098671644092, + "learning_rate": 1.772318780235128e-05, + "loss": 0.6815, + "step": 14036 + }, + { + "epoch": 0.24255252972076308, + "grad_norm": 0.544448205661149, + "learning_rate": 1.7722832281199007e-05, + "loss": 0.4325, + "step": 14037 + }, + { + "epoch": 0.24256980923417196, + "grad_norm": 1.8407648148159217, + "learning_rate": 1.7722476735858233e-05, + "loss": 0.4747, + "step": 14038 + }, + { + "epoch": 0.24258708874758086, + "grad_norm": 1.3712787692430184, + "learning_rate": 1.7722121166330083e-05, + "loss": 0.6393, + "step": 14039 + }, + { + "epoch": 0.24260436826098977, + "grad_norm": 0.9932166299350571, + "learning_rate": 1.7721765572615656e-05, + "loss": 0.411, + "step": 14040 + }, + { + "epoch": 0.24262164777439868, + "grad_norm": 0.9281115774460472, + "learning_rate": 1.7721409954716077e-05, + "loss": 0.4439, + "step": 14041 + }, + { + "epoch": 0.2426389272878076, + "grad_norm": 1.0949337930089642, + "learning_rate": 1.772105431263245e-05, + "loss": 0.5511, + "step": 14042 + }, + { + "epoch": 0.24265620680121647, + "grad_norm": 0.7810915341818688, + "learning_rate": 1.7720698646365898e-05, + "loss": 0.5658, + "step": 14043 + }, + { + "epoch": 0.24267348631462538, + "grad_norm": 0.3958206687626405, + "learning_rate": 1.772034295591753e-05, + "loss": 0.7041, + "step": 14044 + }, + { + "epoch": 0.24269076582803428, + "grad_norm": 1.5139465871293967, + "learning_rate": 1.7719987241288463e-05, + "loss": 0.6604, + "step": 14045 + }, + { + "epoch": 0.2427080453414432, + "grad_norm": 0.5221536545772233, + "learning_rate": 1.771963150247981e-05, + "loss": 0.4493, + "step": 14046 + }, + { + "epoch": 0.2427253248548521, + "grad_norm": 1.2461020089780652, + "learning_rate": 1.7719275739492682e-05, + "loss": 0.6796, + "step": 14047 + }, + { + "epoch": 0.24274260436826098, + "grad_norm": 1.1849615054703873, + "learning_rate": 1.771891995232819e-05, + "loss": 0.6317, + "step": 14048 + }, + { + "epoch": 0.24275988388166989, + "grad_norm": 1.619156004504215, + "learning_rate": 1.7718564140987463e-05, + "loss": 0.7819, + "step": 14049 + }, + { + "epoch": 0.2427771633950788, + "grad_norm": 0.7550589928988877, + "learning_rate": 1.77182083054716e-05, + "loss": 0.4611, + "step": 14050 + }, + { + "epoch": 0.2427944429084877, + "grad_norm": 1.0925474990737245, + "learning_rate": 1.7717852445781728e-05, + "loss": 0.5065, + "step": 14051 + }, + { + "epoch": 0.2428117224218966, + "grad_norm": 0.911736942435686, + "learning_rate": 1.7717496561918954e-05, + "loss": 0.5723, + "step": 14052 + }, + { + "epoch": 0.2428290019353055, + "grad_norm": 0.5854176206145387, + "learning_rate": 1.7717140653884393e-05, + "loss": 0.2645, + "step": 14053 + }, + { + "epoch": 0.2428462814487144, + "grad_norm": 1.4675750968170187, + "learning_rate": 1.771678472167916e-05, + "loss": 0.792, + "step": 14054 + }, + { + "epoch": 0.2428635609621233, + "grad_norm": 1.1450602986116991, + "learning_rate": 1.7716428765304373e-05, + "loss": 0.6896, + "step": 14055 + }, + { + "epoch": 0.2428808404755322, + "grad_norm": 2.0770287014769013, + "learning_rate": 1.7716072784761143e-05, + "loss": 0.7835, + "step": 14056 + }, + { + "epoch": 0.24289811998894112, + "grad_norm": 0.7285396248778969, + "learning_rate": 1.771571678005059e-05, + "loss": 0.4269, + "step": 14057 + }, + { + "epoch": 0.24291539950235003, + "grad_norm": 0.7918912750881542, + "learning_rate": 1.771536075117382e-05, + "loss": 0.3789, + "step": 14058 + }, + { + "epoch": 0.2429326790157589, + "grad_norm": 0.899956111381204, + "learning_rate": 1.7715004698131958e-05, + "loss": 0.5763, + "step": 14059 + }, + { + "epoch": 0.24294995852916781, + "grad_norm": 0.7278582482116752, + "learning_rate": 1.7714648620926117e-05, + "loss": 0.4073, + "step": 14060 + }, + { + "epoch": 0.24296723804257672, + "grad_norm": 1.3154358359159237, + "learning_rate": 1.7714292519557405e-05, + "loss": 0.607, + "step": 14061 + }, + { + "epoch": 0.24298451755598563, + "grad_norm": 1.7159949056777672, + "learning_rate": 1.7713936394026945e-05, + "loss": 0.6504, + "step": 14062 + }, + { + "epoch": 0.24300179706939454, + "grad_norm": 0.8845748767905034, + "learning_rate": 1.7713580244335847e-05, + "loss": 0.4237, + "step": 14063 + }, + { + "epoch": 0.24301907658280342, + "grad_norm": 1.099765118630514, + "learning_rate": 1.7713224070485234e-05, + "loss": 0.6499, + "step": 14064 + }, + { + "epoch": 0.24303635609621232, + "grad_norm": 1.4161695776429624, + "learning_rate": 1.7712867872476215e-05, + "loss": 0.6353, + "step": 14065 + }, + { + "epoch": 0.24305363560962123, + "grad_norm": 0.8875706834501472, + "learning_rate": 1.7712511650309908e-05, + "loss": 0.6484, + "step": 14066 + }, + { + "epoch": 0.24307091512303014, + "grad_norm": 1.0742463295867557, + "learning_rate": 1.771215540398743e-05, + "loss": 0.4214, + "step": 14067 + }, + { + "epoch": 0.24308819463643905, + "grad_norm": 0.9540174203542474, + "learning_rate": 1.771179913350989e-05, + "loss": 0.5597, + "step": 14068 + }, + { + "epoch": 0.24310547414984793, + "grad_norm": 0.4764380708860749, + "learning_rate": 1.7711442838878415e-05, + "loss": 0.6429, + "step": 14069 + }, + { + "epoch": 0.24312275366325684, + "grad_norm": 0.7084239764290912, + "learning_rate": 1.771108652009411e-05, + "loss": 0.4217, + "step": 14070 + }, + { + "epoch": 0.24314003317666574, + "grad_norm": 1.7725759938287302, + "learning_rate": 1.7710730177158098e-05, + "loss": 0.6211, + "step": 14071 + }, + { + "epoch": 0.24315731269007465, + "grad_norm": 0.9251292999629654, + "learning_rate": 1.7710373810071494e-05, + "loss": 0.6293, + "step": 14072 + }, + { + "epoch": 0.24317459220348356, + "grad_norm": 1.553161111367125, + "learning_rate": 1.7710017418835415e-05, + "loss": 0.6558, + "step": 14073 + }, + { + "epoch": 0.24319187171689247, + "grad_norm": 0.9185812978182801, + "learning_rate": 1.7709661003450972e-05, + "loss": 0.6165, + "step": 14074 + }, + { + "epoch": 0.24320915123030135, + "grad_norm": 0.7092225307868373, + "learning_rate": 1.7709304563919287e-05, + "loss": 0.343, + "step": 14075 + }, + { + "epoch": 0.24322643074371025, + "grad_norm": 0.4513878827450507, + "learning_rate": 1.770894810024147e-05, + "loss": 0.5905, + "step": 14076 + }, + { + "epoch": 0.24324371025711916, + "grad_norm": 0.6975349679763637, + "learning_rate": 1.7708591612418645e-05, + "loss": 0.3972, + "step": 14077 + }, + { + "epoch": 0.24326098977052807, + "grad_norm": 0.668926176100715, + "learning_rate": 1.7708235100451928e-05, + "loss": 0.372, + "step": 14078 + }, + { + "epoch": 0.24327826928393698, + "grad_norm": 0.7227347759926208, + "learning_rate": 1.770787856434243e-05, + "loss": 0.3985, + "step": 14079 + }, + { + "epoch": 0.24329554879734586, + "grad_norm": 0.8782856799583594, + "learning_rate": 1.770752200409127e-05, + "loss": 0.4412, + "step": 14080 + }, + { + "epoch": 0.24331282831075476, + "grad_norm": 1.1279319674084305, + "learning_rate": 1.7707165419699567e-05, + "loss": 0.5782, + "step": 14081 + }, + { + "epoch": 0.24333010782416367, + "grad_norm": 1.2649391652894248, + "learning_rate": 1.7706808811168434e-05, + "loss": 0.5314, + "step": 14082 + }, + { + "epoch": 0.24334738733757258, + "grad_norm": 1.3696915212945635, + "learning_rate": 1.7706452178498993e-05, + "loss": 0.601, + "step": 14083 + }, + { + "epoch": 0.2433646668509815, + "grad_norm": 1.5818348912893068, + "learning_rate": 1.7706095521692355e-05, + "loss": 0.5211, + "step": 14084 + }, + { + "epoch": 0.24338194636439037, + "grad_norm": 0.8076092435211265, + "learning_rate": 1.7705738840749643e-05, + "loss": 0.5686, + "step": 14085 + }, + { + "epoch": 0.24339922587779927, + "grad_norm": 1.7206395453092704, + "learning_rate": 1.7705382135671967e-05, + "loss": 0.5212, + "step": 14086 + }, + { + "epoch": 0.24341650539120818, + "grad_norm": 0.9334480778260297, + "learning_rate": 1.770502540646045e-05, + "loss": 0.3398, + "step": 14087 + }, + { + "epoch": 0.2434337849046171, + "grad_norm": 1.3585286303651716, + "learning_rate": 1.7704668653116212e-05, + "loss": 0.4889, + "step": 14088 + }, + { + "epoch": 0.243451064418026, + "grad_norm": 0.9933163898040152, + "learning_rate": 1.7704311875640363e-05, + "loss": 0.556, + "step": 14089 + }, + { + "epoch": 0.24346834393143488, + "grad_norm": 2.287168025462239, + "learning_rate": 1.7703955074034022e-05, + "loss": 0.5089, + "step": 14090 + }, + { + "epoch": 0.24348562344484379, + "grad_norm": 1.256430620445216, + "learning_rate": 1.7703598248298312e-05, + "loss": 0.5743, + "step": 14091 + }, + { + "epoch": 0.2435029029582527, + "grad_norm": 0.7641173585581754, + "learning_rate": 1.7703241398434346e-05, + "loss": 0.5598, + "step": 14092 + }, + { + "epoch": 0.2435201824716616, + "grad_norm": 1.3483185434441, + "learning_rate": 1.7702884524443244e-05, + "loss": 0.6936, + "step": 14093 + }, + { + "epoch": 0.2435374619850705, + "grad_norm": 0.9525216537396077, + "learning_rate": 1.7702527626326118e-05, + "loss": 0.3685, + "step": 14094 + }, + { + "epoch": 0.24355474149847942, + "grad_norm": 0.8656548792930963, + "learning_rate": 1.7702170704084092e-05, + "loss": 0.5304, + "step": 14095 + }, + { + "epoch": 0.2435720210118883, + "grad_norm": 1.1828970864230943, + "learning_rate": 1.7701813757718283e-05, + "loss": 0.6375, + "step": 14096 + }, + { + "epoch": 0.2435893005252972, + "grad_norm": 0.8092002456716278, + "learning_rate": 1.7701456787229805e-05, + "loss": 0.5135, + "step": 14097 + }, + { + "epoch": 0.2436065800387061, + "grad_norm": 1.0097157382818394, + "learning_rate": 1.7701099792619784e-05, + "loss": 0.5432, + "step": 14098 + }, + { + "epoch": 0.24362385955211502, + "grad_norm": 0.8264187761549627, + "learning_rate": 1.770074277388933e-05, + "loss": 0.4255, + "step": 14099 + }, + { + "epoch": 0.24364113906552393, + "grad_norm": 1.195061767965784, + "learning_rate": 1.7700385731039564e-05, + "loss": 0.5236, + "step": 14100 + }, + { + "epoch": 0.2436584185789328, + "grad_norm": 1.1108971494331172, + "learning_rate": 1.7700028664071605e-05, + "loss": 0.501, + "step": 14101 + }, + { + "epoch": 0.2436756980923417, + "grad_norm": 1.0472482074397411, + "learning_rate": 1.769967157298657e-05, + "loss": 0.5366, + "step": 14102 + }, + { + "epoch": 0.24369297760575062, + "grad_norm": 0.8397385590725649, + "learning_rate": 1.7699314457785584e-05, + "loss": 0.615, + "step": 14103 + }, + { + "epoch": 0.24371025711915953, + "grad_norm": 1.7297464116639745, + "learning_rate": 1.7698957318469755e-05, + "loss": 0.4793, + "step": 14104 + }, + { + "epoch": 0.24372753663256844, + "grad_norm": 1.1923109610374278, + "learning_rate": 1.7698600155040206e-05, + "loss": 0.5257, + "step": 14105 + }, + { + "epoch": 0.24374481614597732, + "grad_norm": 0.7046589310847869, + "learning_rate": 1.769824296749806e-05, + "loss": 0.5504, + "step": 14106 + }, + { + "epoch": 0.24376209565938622, + "grad_norm": 1.2086069982585326, + "learning_rate": 1.7697885755844427e-05, + "loss": 0.4358, + "step": 14107 + }, + { + "epoch": 0.24377937517279513, + "grad_norm": 1.0706338289598576, + "learning_rate": 1.7697528520080433e-05, + "loss": 0.5729, + "step": 14108 + }, + { + "epoch": 0.24379665468620404, + "grad_norm": 0.8650204057729876, + "learning_rate": 1.7697171260207198e-05, + "loss": 0.3956, + "step": 14109 + }, + { + "epoch": 0.24381393419961295, + "grad_norm": 1.1039346596173856, + "learning_rate": 1.7696813976225833e-05, + "loss": 0.549, + "step": 14110 + }, + { + "epoch": 0.24383121371302185, + "grad_norm": 0.8006494758289917, + "learning_rate": 1.7696456668137463e-05, + "loss": 0.4407, + "step": 14111 + }, + { + "epoch": 0.24384849322643073, + "grad_norm": 1.0894901536410504, + "learning_rate": 1.7696099335943206e-05, + "loss": 0.582, + "step": 14112 + }, + { + "epoch": 0.24386577273983964, + "grad_norm": 1.1946931473047653, + "learning_rate": 1.7695741979644184e-05, + "loss": 0.3906, + "step": 14113 + }, + { + "epoch": 0.24388305225324855, + "grad_norm": 1.2298393201031252, + "learning_rate": 1.7695384599241512e-05, + "loss": 0.4786, + "step": 14114 + }, + { + "epoch": 0.24390033176665746, + "grad_norm": 1.0933066375250262, + "learning_rate": 1.7695027194736306e-05, + "loss": 0.4806, + "step": 14115 + }, + { + "epoch": 0.24391761128006637, + "grad_norm": 0.8366638496122051, + "learning_rate": 1.7694669766129693e-05, + "loss": 0.5362, + "step": 14116 + }, + { + "epoch": 0.24393489079347525, + "grad_norm": 0.9899647762830817, + "learning_rate": 1.769431231342279e-05, + "loss": 0.4646, + "step": 14117 + }, + { + "epoch": 0.24395217030688415, + "grad_norm": 0.8705743015631343, + "learning_rate": 1.7693954836616717e-05, + "loss": 0.4347, + "step": 14118 + }, + { + "epoch": 0.24396944982029306, + "grad_norm": 3.9412677385962938, + "learning_rate": 1.7693597335712592e-05, + "loss": 1.236, + "step": 14119 + }, + { + "epoch": 0.24398672933370197, + "grad_norm": 0.6063519335023615, + "learning_rate": 1.7693239810711536e-05, + "loss": 0.4971, + "step": 14120 + }, + { + "epoch": 0.24400400884711088, + "grad_norm": 1.5012212068602284, + "learning_rate": 1.7692882261614666e-05, + "loss": 0.5382, + "step": 14121 + }, + { + "epoch": 0.24402128836051976, + "grad_norm": 1.2956919107846068, + "learning_rate": 1.7692524688423107e-05, + "loss": 0.5248, + "step": 14122 + }, + { + "epoch": 0.24403856787392866, + "grad_norm": 1.408560063442554, + "learning_rate": 1.7692167091137973e-05, + "loss": 0.6688, + "step": 14123 + }, + { + "epoch": 0.24405584738733757, + "grad_norm": 1.5198434533942256, + "learning_rate": 1.769180946976039e-05, + "loss": 0.5052, + "step": 14124 + }, + { + "epoch": 0.24407312690074648, + "grad_norm": 0.5942626047110356, + "learning_rate": 1.7691451824291476e-05, + "loss": 0.8368, + "step": 14125 + }, + { + "epoch": 0.2440904064141554, + "grad_norm": 0.797482818534826, + "learning_rate": 1.7691094154732344e-05, + "loss": 0.6408, + "step": 14126 + }, + { + "epoch": 0.24410768592756427, + "grad_norm": 1.4166541862559834, + "learning_rate": 1.7690736461084125e-05, + "loss": 0.657, + "step": 14127 + }, + { + "epoch": 0.24412496544097317, + "grad_norm": 0.9737363180302333, + "learning_rate": 1.7690378743347935e-05, + "loss": 0.436, + "step": 14128 + }, + { + "epoch": 0.24414224495438208, + "grad_norm": 1.0928193820781866, + "learning_rate": 1.7690021001524896e-05, + "loss": 0.6052, + "step": 14129 + }, + { + "epoch": 0.244159524467791, + "grad_norm": 0.8517697718920919, + "learning_rate": 1.7689663235616122e-05, + "loss": 0.4985, + "step": 14130 + }, + { + "epoch": 0.2441768039811999, + "grad_norm": 1.1174045565092399, + "learning_rate": 1.7689305445622743e-05, + "loss": 0.5814, + "step": 14131 + }, + { + "epoch": 0.2441940834946088, + "grad_norm": 1.241268273279682, + "learning_rate": 1.7688947631545873e-05, + "loss": 0.5834, + "step": 14132 + }, + { + "epoch": 0.24421136300801768, + "grad_norm": 0.9620004770360978, + "learning_rate": 1.7688589793386636e-05, + "loss": 0.3359, + "step": 14133 + }, + { + "epoch": 0.2442286425214266, + "grad_norm": 1.0196832260411688, + "learning_rate": 1.768823193114615e-05, + "loss": 0.5802, + "step": 14134 + }, + { + "epoch": 0.2442459220348355, + "grad_norm": 1.1303241561301132, + "learning_rate": 1.768787404482554e-05, + "loss": 0.5281, + "step": 14135 + }, + { + "epoch": 0.2442632015482444, + "grad_norm": 0.7173163103147961, + "learning_rate": 1.7687516134425918e-05, + "loss": 0.4384, + "step": 14136 + }, + { + "epoch": 0.24428048106165332, + "grad_norm": 0.9888643767104781, + "learning_rate": 1.7687158199948416e-05, + "loss": 0.3178, + "step": 14137 + }, + { + "epoch": 0.2442977605750622, + "grad_norm": 0.6849052191822631, + "learning_rate": 1.768680024139415e-05, + "loss": 0.6004, + "step": 14138 + }, + { + "epoch": 0.2443150400884711, + "grad_norm": 1.6631972507704427, + "learning_rate": 1.768644225876424e-05, + "loss": 0.6683, + "step": 14139 + }, + { + "epoch": 0.24433231960188, + "grad_norm": 1.0258251824991798, + "learning_rate": 1.768608425205981e-05, + "loss": 0.3531, + "step": 14140 + }, + { + "epoch": 0.24434959911528892, + "grad_norm": 1.0077044972490752, + "learning_rate": 1.7685726221281977e-05, + "loss": 0.6343, + "step": 14141 + }, + { + "epoch": 0.24436687862869783, + "grad_norm": 0.486643972228634, + "learning_rate": 1.768536816643187e-05, + "loss": 0.6794, + "step": 14142 + }, + { + "epoch": 0.2443841581421067, + "grad_norm": 1.354682059752617, + "learning_rate": 1.7685010087510604e-05, + "loss": 0.446, + "step": 14143 + }, + { + "epoch": 0.2444014376555156, + "grad_norm": 1.210476581623412, + "learning_rate": 1.76846519845193e-05, + "loss": 0.2666, + "step": 14144 + }, + { + "epoch": 0.24441871716892452, + "grad_norm": 0.7136391942779773, + "learning_rate": 1.7684293857459086e-05, + "loss": 0.6458, + "step": 14145 + }, + { + "epoch": 0.24443599668233343, + "grad_norm": 1.6038700163899473, + "learning_rate": 1.7683935706331075e-05, + "loss": 0.6419, + "step": 14146 + }, + { + "epoch": 0.24445327619574234, + "grad_norm": 0.5057026772411171, + "learning_rate": 1.76835775311364e-05, + "loss": 0.7696, + "step": 14147 + }, + { + "epoch": 0.24447055570915124, + "grad_norm": 1.4201771996355013, + "learning_rate": 1.7683219331876175e-05, + "loss": 0.571, + "step": 14148 + }, + { + "epoch": 0.24448783522256012, + "grad_norm": 1.2239941070560898, + "learning_rate": 1.7682861108551517e-05, + "loss": 0.4389, + "step": 14149 + }, + { + "epoch": 0.24450511473596903, + "grad_norm": 1.085605562499256, + "learning_rate": 1.768250286116356e-05, + "loss": 0.5007, + "step": 14150 + }, + { + "epoch": 0.24452239424937794, + "grad_norm": 0.5888456008079431, + "learning_rate": 1.7682144589713416e-05, + "loss": 0.3634, + "step": 14151 + }, + { + "epoch": 0.24453967376278685, + "grad_norm": 0.7298872354051176, + "learning_rate": 1.7681786294202212e-05, + "loss": 0.7231, + "step": 14152 + }, + { + "epoch": 0.24455695327619575, + "grad_norm": 1.1293750625648384, + "learning_rate": 1.7681427974631075e-05, + "loss": 0.4202, + "step": 14153 + }, + { + "epoch": 0.24457423278960463, + "grad_norm": 1.5039299693704795, + "learning_rate": 1.7681069631001115e-05, + "loss": 0.4511, + "step": 14154 + }, + { + "epoch": 0.24459151230301354, + "grad_norm": 1.2619105766176828, + "learning_rate": 1.7680711263313465e-05, + "loss": 0.5084, + "step": 14155 + }, + { + "epoch": 0.24460879181642245, + "grad_norm": 0.9488162722087933, + "learning_rate": 1.768035287156924e-05, + "loss": 0.5803, + "step": 14156 + }, + { + "epoch": 0.24462607132983136, + "grad_norm": 1.1237113613742797, + "learning_rate": 1.7679994455769568e-05, + "loss": 0.5474, + "step": 14157 + }, + { + "epoch": 0.24464335084324026, + "grad_norm": 1.02435103866726, + "learning_rate": 1.767963601591557e-05, + "loss": 0.6143, + "step": 14158 + }, + { + "epoch": 0.24466063035664914, + "grad_norm": 1.1595789522727418, + "learning_rate": 1.767927755200837e-05, + "loss": 0.4328, + "step": 14159 + }, + { + "epoch": 0.24467790987005805, + "grad_norm": 1.8651188428036678, + "learning_rate": 1.7678919064049087e-05, + "loss": 0.6288, + "step": 14160 + }, + { + "epoch": 0.24469518938346696, + "grad_norm": 1.0907346366266253, + "learning_rate": 1.7678560552038847e-05, + "loss": 0.569, + "step": 14161 + }, + { + "epoch": 0.24471246889687587, + "grad_norm": 0.3730839022861574, + "learning_rate": 1.767820201597877e-05, + "loss": 0.4637, + "step": 14162 + }, + { + "epoch": 0.24472974841028478, + "grad_norm": 0.710299817492714, + "learning_rate": 1.7677843455869984e-05, + "loss": 0.5302, + "step": 14163 + }, + { + "epoch": 0.24474702792369366, + "grad_norm": 2.2951748291678618, + "learning_rate": 1.7677484871713605e-05, + "loss": 0.504, + "step": 14164 + }, + { + "epoch": 0.24476430743710256, + "grad_norm": 1.7717476380841215, + "learning_rate": 1.7677126263510766e-05, + "loss": 0.5153, + "step": 14165 + }, + { + "epoch": 0.24478158695051147, + "grad_norm": 1.5124142900478408, + "learning_rate": 1.7676767631262576e-05, + "loss": 0.3566, + "step": 14166 + }, + { + "epoch": 0.24479886646392038, + "grad_norm": 2.2006660547095844, + "learning_rate": 1.7676408974970173e-05, + "loss": 0.4851, + "step": 14167 + }, + { + "epoch": 0.24481614597732929, + "grad_norm": 0.7765606869934283, + "learning_rate": 1.767605029463467e-05, + "loss": 0.3862, + "step": 14168 + }, + { + "epoch": 0.2448334254907382, + "grad_norm": 0.8604074013138403, + "learning_rate": 1.7675691590257192e-05, + "loss": 0.5364, + "step": 14169 + }, + { + "epoch": 0.24485070500414707, + "grad_norm": 1.3317926002422773, + "learning_rate": 1.767533286183887e-05, + "loss": 0.4385, + "step": 14170 + }, + { + "epoch": 0.24486798451755598, + "grad_norm": 0.9513221319989068, + "learning_rate": 1.767497410938082e-05, + "loss": 0.3129, + "step": 14171 + }, + { + "epoch": 0.2448852640309649, + "grad_norm": 0.7985848593921542, + "learning_rate": 1.7674615332884168e-05, + "loss": 0.3583, + "step": 14172 + }, + { + "epoch": 0.2449025435443738, + "grad_norm": 0.8942967288565705, + "learning_rate": 1.7674256532350037e-05, + "loss": 0.534, + "step": 14173 + }, + { + "epoch": 0.2449198230577827, + "grad_norm": 0.8380049092242905, + "learning_rate": 1.767389770777955e-05, + "loss": 0.5266, + "step": 14174 + }, + { + "epoch": 0.24493710257119158, + "grad_norm": 0.7911721850937545, + "learning_rate": 1.7673538859173836e-05, + "loss": 0.4659, + "step": 14175 + }, + { + "epoch": 0.2449543820846005, + "grad_norm": 0.8765118131573952, + "learning_rate": 1.7673179986534013e-05, + "loss": 0.4948, + "step": 14176 + }, + { + "epoch": 0.2449716615980094, + "grad_norm": 1.3042621191847872, + "learning_rate": 1.7672821089861206e-05, + "loss": 0.8206, + "step": 14177 + }, + { + "epoch": 0.2449889411114183, + "grad_norm": 1.4320444249395612, + "learning_rate": 1.7672462169156542e-05, + "loss": 0.4916, + "step": 14178 + }, + { + "epoch": 0.24500622062482721, + "grad_norm": 1.2085688177562888, + "learning_rate": 1.7672103224421144e-05, + "loss": 0.6028, + "step": 14179 + }, + { + "epoch": 0.2450235001382361, + "grad_norm": 1.3723199516278102, + "learning_rate": 1.7671744255656135e-05, + "loss": 0.5356, + "step": 14180 + }, + { + "epoch": 0.245040779651645, + "grad_norm": 1.0059933032141615, + "learning_rate": 1.7671385262862635e-05, + "loss": 0.515, + "step": 14181 + }, + { + "epoch": 0.2450580591650539, + "grad_norm": 1.6949239005562193, + "learning_rate": 1.767102624604178e-05, + "loss": 0.5456, + "step": 14182 + }, + { + "epoch": 0.24507533867846282, + "grad_norm": 1.0972968844878046, + "learning_rate": 1.7670667205194683e-05, + "loss": 0.5147, + "step": 14183 + }, + { + "epoch": 0.24509261819187173, + "grad_norm": 0.8474719775432792, + "learning_rate": 1.7670308140322478e-05, + "loss": 0.3685, + "step": 14184 + }, + { + "epoch": 0.24510989770528063, + "grad_norm": 1.097524743297652, + "learning_rate": 1.7669949051426286e-05, + "loss": 0.621, + "step": 14185 + }, + { + "epoch": 0.2451271772186895, + "grad_norm": 1.2329556319493449, + "learning_rate": 1.7669589938507224e-05, + "loss": 0.579, + "step": 14186 + }, + { + "epoch": 0.24514445673209842, + "grad_norm": 0.8875566204273143, + "learning_rate": 1.7669230801566427e-05, + "loss": 0.4156, + "step": 14187 + }, + { + "epoch": 0.24516173624550733, + "grad_norm": 0.7204036689533583, + "learning_rate": 1.7668871640605018e-05, + "loss": 0.621, + "step": 14188 + }, + { + "epoch": 0.24517901575891624, + "grad_norm": 0.9717985883634392, + "learning_rate": 1.766851245562412e-05, + "loss": 0.5492, + "step": 14189 + }, + { + "epoch": 0.24519629527232514, + "grad_norm": 0.9546589922443123, + "learning_rate": 1.7668153246624853e-05, + "loss": 0.5905, + "step": 14190 + }, + { + "epoch": 0.24521357478573402, + "grad_norm": 0.6572389180777533, + "learning_rate": 1.766779401360835e-05, + "loss": 0.6332, + "step": 14191 + }, + { + "epoch": 0.24523085429914293, + "grad_norm": 1.4341857177710573, + "learning_rate": 1.7667434756575737e-05, + "loss": 0.6246, + "step": 14192 + }, + { + "epoch": 0.24524813381255184, + "grad_norm": 1.1015522160203721, + "learning_rate": 1.7667075475528133e-05, + "loss": 0.5263, + "step": 14193 + }, + { + "epoch": 0.24526541332596075, + "grad_norm": 1.5167750964289437, + "learning_rate": 1.7666716170466663e-05, + "loss": 0.7782, + "step": 14194 + }, + { + "epoch": 0.24528269283936965, + "grad_norm": 0.914266736459215, + "learning_rate": 1.766635684139246e-05, + "loss": 0.547, + "step": 14195 + }, + { + "epoch": 0.24529997235277853, + "grad_norm": 1.4060333469144335, + "learning_rate": 1.7665997488306642e-05, + "loss": 0.4504, + "step": 14196 + }, + { + "epoch": 0.24531725186618744, + "grad_norm": 0.9424259818439648, + "learning_rate": 1.766563811121034e-05, + "loss": 0.3806, + "step": 14197 + }, + { + "epoch": 0.24533453137959635, + "grad_norm": 1.2557073734462916, + "learning_rate": 1.7665278710104676e-05, + "loss": 0.7025, + "step": 14198 + }, + { + "epoch": 0.24535181089300526, + "grad_norm": 0.612357065773229, + "learning_rate": 1.7664919284990774e-05, + "loss": 0.7278, + "step": 14199 + }, + { + "epoch": 0.24536909040641416, + "grad_norm": 1.0485103249351106, + "learning_rate": 1.7664559835869763e-05, + "loss": 0.6868, + "step": 14200 + }, + { + "epoch": 0.24538636991982304, + "grad_norm": 1.3321361090273327, + "learning_rate": 1.766420036274277e-05, + "loss": 0.5462, + "step": 14201 + }, + { + "epoch": 0.24540364943323195, + "grad_norm": 1.0182316422693884, + "learning_rate": 1.766384086561092e-05, + "loss": 0.5002, + "step": 14202 + }, + { + "epoch": 0.24542092894664086, + "grad_norm": 0.7704921139569928, + "learning_rate": 1.7663481344475336e-05, + "loss": 0.7418, + "step": 14203 + }, + { + "epoch": 0.24543820846004977, + "grad_norm": 0.9129131939045498, + "learning_rate": 1.7663121799337143e-05, + "loss": 0.4307, + "step": 14204 + }, + { + "epoch": 0.24545548797345867, + "grad_norm": 0.930669095042831, + "learning_rate": 1.7662762230197476e-05, + "loss": 0.6809, + "step": 14205 + }, + { + "epoch": 0.24547276748686758, + "grad_norm": 0.8818550870737494, + "learning_rate": 1.766240263705745e-05, + "loss": 0.5588, + "step": 14206 + }, + { + "epoch": 0.24549004700027646, + "grad_norm": 0.7503844060488564, + "learning_rate": 1.76620430199182e-05, + "loss": 0.419, + "step": 14207 + }, + { + "epoch": 0.24550732651368537, + "grad_norm": 1.331015681456067, + "learning_rate": 1.7661683378780846e-05, + "loss": 0.6028, + "step": 14208 + }, + { + "epoch": 0.24552460602709428, + "grad_norm": 0.9331658864747594, + "learning_rate": 1.766132371364652e-05, + "loss": 0.4676, + "step": 14209 + }, + { + "epoch": 0.24554188554050319, + "grad_norm": 1.1824389538347184, + "learning_rate": 1.7660964024516346e-05, + "loss": 0.416, + "step": 14210 + }, + { + "epoch": 0.2455591650539121, + "grad_norm": 0.6687160119785228, + "learning_rate": 1.766060431139145e-05, + "loss": 0.6526, + "step": 14211 + }, + { + "epoch": 0.24557644456732097, + "grad_norm": 0.8573611795449193, + "learning_rate": 1.766024457427296e-05, + "loss": 0.6102, + "step": 14212 + }, + { + "epoch": 0.24559372408072988, + "grad_norm": 1.0302904523666365, + "learning_rate": 1.7659884813162e-05, + "loss": 0.5513, + "step": 14213 + }, + { + "epoch": 0.2456110035941388, + "grad_norm": 1.6373867901299446, + "learning_rate": 1.76595250280597e-05, + "loss": 0.518, + "step": 14214 + }, + { + "epoch": 0.2456282831075477, + "grad_norm": 1.1902176552804569, + "learning_rate": 1.7659165218967185e-05, + "loss": 0.4474, + "step": 14215 + }, + { + "epoch": 0.2456455626209566, + "grad_norm": 0.8251712894828229, + "learning_rate": 1.7658805385885583e-05, + "loss": 0.5522, + "step": 14216 + }, + { + "epoch": 0.24566284213436548, + "grad_norm": 0.8054946605952588, + "learning_rate": 1.7658445528816018e-05, + "loss": 0.6425, + "step": 14217 + }, + { + "epoch": 0.2456801216477744, + "grad_norm": 0.7571638220341985, + "learning_rate": 1.7658085647759626e-05, + "loss": 0.4287, + "step": 14218 + }, + { + "epoch": 0.2456974011611833, + "grad_norm": 0.7378847606277139, + "learning_rate": 1.765772574271752e-05, + "loss": 0.5051, + "step": 14219 + }, + { + "epoch": 0.2457146806745922, + "grad_norm": 0.728298671670178, + "learning_rate": 1.7657365813690836e-05, + "loss": 0.5783, + "step": 14220 + }, + { + "epoch": 0.24573196018800111, + "grad_norm": 1.2835254936570872, + "learning_rate": 1.7657005860680706e-05, + "loss": 0.475, + "step": 14221 + }, + { + "epoch": 0.24574923970141002, + "grad_norm": 0.9972095057430745, + "learning_rate": 1.7656645883688247e-05, + "loss": 0.3412, + "step": 14222 + }, + { + "epoch": 0.2457665192148189, + "grad_norm": 1.6669579956379086, + "learning_rate": 1.765628588271459e-05, + "loss": 0.5778, + "step": 14223 + }, + { + "epoch": 0.2457837987282278, + "grad_norm": 1.0316958735857737, + "learning_rate": 1.7655925857760867e-05, + "loss": 0.7255, + "step": 14224 + }, + { + "epoch": 0.24580107824163672, + "grad_norm": 0.8101496049358065, + "learning_rate": 1.7655565808828202e-05, + "loss": 0.6048, + "step": 14225 + }, + { + "epoch": 0.24581835775504562, + "grad_norm": 1.5301007555099404, + "learning_rate": 1.7655205735917726e-05, + "loss": 0.4998, + "step": 14226 + }, + { + "epoch": 0.24583563726845453, + "grad_norm": 1.0572054734207892, + "learning_rate": 1.765484563903056e-05, + "loss": 0.5427, + "step": 14227 + }, + { + "epoch": 0.2458529167818634, + "grad_norm": 1.0719039426837342, + "learning_rate": 1.7654485518167836e-05, + "loss": 0.5538, + "step": 14228 + }, + { + "epoch": 0.24587019629527232, + "grad_norm": 1.0811589128458237, + "learning_rate": 1.765412537333068e-05, + "loss": 0.6674, + "step": 14229 + }, + { + "epoch": 0.24588747580868123, + "grad_norm": 0.7490094806149715, + "learning_rate": 1.7653765204520223e-05, + "loss": 0.3107, + "step": 14230 + }, + { + "epoch": 0.24590475532209013, + "grad_norm": 1.1755986077009555, + "learning_rate": 1.7653405011737593e-05, + "loss": 0.5733, + "step": 14231 + }, + { + "epoch": 0.24592203483549904, + "grad_norm": 1.037249775500211, + "learning_rate": 1.7653044794983917e-05, + "loss": 0.3834, + "step": 14232 + }, + { + "epoch": 0.24593931434890792, + "grad_norm": 0.8741941758496472, + "learning_rate": 1.7652684554260323e-05, + "loss": 0.7143, + "step": 14233 + }, + { + "epoch": 0.24595659386231683, + "grad_norm": 1.7256213848614852, + "learning_rate": 1.7652324289567936e-05, + "loss": 0.5091, + "step": 14234 + }, + { + "epoch": 0.24597387337572574, + "grad_norm": 1.7291501469515318, + "learning_rate": 1.7651964000907892e-05, + "loss": 0.554, + "step": 14235 + }, + { + "epoch": 0.24599115288913465, + "grad_norm": 0.9554604315598478, + "learning_rate": 1.7651603688281314e-05, + "loss": 0.579, + "step": 14236 + }, + { + "epoch": 0.24600843240254355, + "grad_norm": 0.8738222515354642, + "learning_rate": 1.7651243351689332e-05, + "loss": 0.5153, + "step": 14237 + }, + { + "epoch": 0.24602571191595243, + "grad_norm": 1.2392963305699984, + "learning_rate": 1.765088299113307e-05, + "loss": 0.6843, + "step": 14238 + }, + { + "epoch": 0.24604299142936134, + "grad_norm": 0.908917856141407, + "learning_rate": 1.7650522606613667e-05, + "loss": 0.5042, + "step": 14239 + }, + { + "epoch": 0.24606027094277025, + "grad_norm": 0.4188045417555236, + "learning_rate": 1.7650162198132243e-05, + "loss": 0.6699, + "step": 14240 + }, + { + "epoch": 0.24607755045617916, + "grad_norm": 0.8227984601053543, + "learning_rate": 1.764980176568993e-05, + "loss": 0.5529, + "step": 14241 + }, + { + "epoch": 0.24609482996958806, + "grad_norm": 0.7813732578923495, + "learning_rate": 1.764944130928786e-05, + "loss": 0.5237, + "step": 14242 + }, + { + "epoch": 0.24611210948299697, + "grad_norm": 1.2558095846361332, + "learning_rate": 1.7649080828927153e-05, + "loss": 0.5097, + "step": 14243 + }, + { + "epoch": 0.24612938899640585, + "grad_norm": 1.0091951819781821, + "learning_rate": 1.7648720324608945e-05, + "loss": 0.5171, + "step": 14244 + }, + { + "epoch": 0.24614666850981476, + "grad_norm": 0.6971516327906873, + "learning_rate": 1.7648359796334366e-05, + "loss": 0.3926, + "step": 14245 + }, + { + "epoch": 0.24616394802322367, + "grad_norm": 0.38347282828046353, + "learning_rate": 1.764799924410454e-05, + "loss": 0.6003, + "step": 14246 + }, + { + "epoch": 0.24618122753663257, + "grad_norm": 0.7126958881433209, + "learning_rate": 1.7647638667920598e-05, + "loss": 0.358, + "step": 14247 + }, + { + "epoch": 0.24619850705004148, + "grad_norm": 1.2748412979151431, + "learning_rate": 1.7647278067783675e-05, + "loss": 0.597, + "step": 14248 + }, + { + "epoch": 0.24621578656345036, + "grad_norm": 1.4787446543343128, + "learning_rate": 1.7646917443694894e-05, + "loss": 0.5048, + "step": 14249 + }, + { + "epoch": 0.24623306607685927, + "grad_norm": 0.8246745041778168, + "learning_rate": 1.7646556795655386e-05, + "loss": 0.6321, + "step": 14250 + }, + { + "epoch": 0.24625034559026818, + "grad_norm": 0.903286366062915, + "learning_rate": 1.764619612366628e-05, + "loss": 0.5838, + "step": 14251 + }, + { + "epoch": 0.24626762510367708, + "grad_norm": 0.8833464966270904, + "learning_rate": 1.764583542772871e-05, + "loss": 0.5215, + "step": 14252 + }, + { + "epoch": 0.246284904617086, + "grad_norm": 0.9043185600371452, + "learning_rate": 1.76454747078438e-05, + "loss": 0.6294, + "step": 14253 + }, + { + "epoch": 0.24630218413049487, + "grad_norm": 1.1598186193506241, + "learning_rate": 1.764511396401268e-05, + "loss": 0.3915, + "step": 14254 + }, + { + "epoch": 0.24631946364390378, + "grad_norm": 0.4682813995283456, + "learning_rate": 1.7644753196236486e-05, + "loss": 0.5671, + "step": 14255 + }, + { + "epoch": 0.2463367431573127, + "grad_norm": 0.8484399582686742, + "learning_rate": 1.764439240451634e-05, + "loss": 0.5058, + "step": 14256 + }, + { + "epoch": 0.2463540226707216, + "grad_norm": 1.0260708988585732, + "learning_rate": 1.7644031588853382e-05, + "loss": 0.6028, + "step": 14257 + }, + { + "epoch": 0.2463713021841305, + "grad_norm": 1.0051197324065195, + "learning_rate": 1.7643670749248733e-05, + "loss": 0.4949, + "step": 14258 + }, + { + "epoch": 0.2463885816975394, + "grad_norm": 1.0401136852027173, + "learning_rate": 1.7643309885703527e-05, + "loss": 0.491, + "step": 14259 + }, + { + "epoch": 0.2464058612109483, + "grad_norm": 1.1235923315960916, + "learning_rate": 1.764294899821889e-05, + "loss": 0.5564, + "step": 14260 + }, + { + "epoch": 0.2464231407243572, + "grad_norm": 1.0755575648069324, + "learning_rate": 1.7642588086795957e-05, + "loss": 0.3351, + "step": 14261 + }, + { + "epoch": 0.2464404202377661, + "grad_norm": 0.9810489758882508, + "learning_rate": 1.764222715143586e-05, + "loss": 0.4413, + "step": 14262 + }, + { + "epoch": 0.246457699751175, + "grad_norm": 0.4147602283670578, + "learning_rate": 1.7641866192139725e-05, + "loss": 0.6112, + "step": 14263 + }, + { + "epoch": 0.24647497926458392, + "grad_norm": 0.4805870734816246, + "learning_rate": 1.7641505208908683e-05, + "loss": 0.8224, + "step": 14264 + }, + { + "epoch": 0.2464922587779928, + "grad_norm": 1.5980903618967086, + "learning_rate": 1.764114420174387e-05, + "loss": 0.5707, + "step": 14265 + }, + { + "epoch": 0.2465095382914017, + "grad_norm": 1.7178145856148213, + "learning_rate": 1.764078317064641e-05, + "loss": 0.6619, + "step": 14266 + }, + { + "epoch": 0.24652681780481062, + "grad_norm": 1.2131187339956273, + "learning_rate": 1.7640422115617437e-05, + "loss": 0.6271, + "step": 14267 + }, + { + "epoch": 0.24654409731821952, + "grad_norm": 1.4603199853100461, + "learning_rate": 1.7640061036658082e-05, + "loss": 0.6056, + "step": 14268 + }, + { + "epoch": 0.24656137683162843, + "grad_norm": 0.49253528873968055, + "learning_rate": 1.7639699933769474e-05, + "loss": 0.8046, + "step": 14269 + }, + { + "epoch": 0.2465786563450373, + "grad_norm": 0.9663269085816186, + "learning_rate": 1.7639338806952744e-05, + "loss": 0.5519, + "step": 14270 + }, + { + "epoch": 0.24659593585844622, + "grad_norm": 0.4651207756299592, + "learning_rate": 1.763897765620903e-05, + "loss": 0.6754, + "step": 14271 + }, + { + "epoch": 0.24661321537185513, + "grad_norm": 1.6464435671637077, + "learning_rate": 1.763861648153945e-05, + "loss": 0.6981, + "step": 14272 + }, + { + "epoch": 0.24663049488526403, + "grad_norm": 1.2413788139215893, + "learning_rate": 1.7638255282945146e-05, + "loss": 0.5676, + "step": 14273 + }, + { + "epoch": 0.24664777439867294, + "grad_norm": 1.1062869177149415, + "learning_rate": 1.7637894060427245e-05, + "loss": 0.6297, + "step": 14274 + }, + { + "epoch": 0.24666505391208182, + "grad_norm": 2.060777209531104, + "learning_rate": 1.763753281398688e-05, + "loss": 0.7142, + "step": 14275 + }, + { + "epoch": 0.24668233342549073, + "grad_norm": 1.5796175918990447, + "learning_rate": 1.763717154362518e-05, + "loss": 0.5156, + "step": 14276 + }, + { + "epoch": 0.24669961293889964, + "grad_norm": 1.0189191544781022, + "learning_rate": 1.7636810249343285e-05, + "loss": 0.5199, + "step": 14277 + }, + { + "epoch": 0.24671689245230854, + "grad_norm": 0.9997196160988716, + "learning_rate": 1.7636448931142313e-05, + "loss": 0.5412, + "step": 14278 + }, + { + "epoch": 0.24673417196571745, + "grad_norm": 1.02332769933104, + "learning_rate": 1.7636087589023408e-05, + "loss": 0.4904, + "step": 14279 + }, + { + "epoch": 0.24675145147912636, + "grad_norm": 1.0756774963804439, + "learning_rate": 1.763572622298769e-05, + "loss": 0.5964, + "step": 14280 + }, + { + "epoch": 0.24676873099253524, + "grad_norm": 1.271201990044235, + "learning_rate": 1.7635364833036303e-05, + "loss": 0.4808, + "step": 14281 + }, + { + "epoch": 0.24678601050594415, + "grad_norm": 0.851409170314416, + "learning_rate": 1.763500341917037e-05, + "loss": 0.5322, + "step": 14282 + }, + { + "epoch": 0.24680329001935306, + "grad_norm": 1.4498848257070833, + "learning_rate": 1.7634641981391025e-05, + "loss": 0.526, + "step": 14283 + }, + { + "epoch": 0.24682056953276196, + "grad_norm": 1.074323308321541, + "learning_rate": 1.7634280519699403e-05, + "loss": 0.6687, + "step": 14284 + }, + { + "epoch": 0.24683784904617087, + "grad_norm": 0.8197175673739657, + "learning_rate": 1.7633919034096633e-05, + "loss": 0.4914, + "step": 14285 + }, + { + "epoch": 0.24685512855957975, + "grad_norm": 1.3766343741759814, + "learning_rate": 1.7633557524583848e-05, + "loss": 0.6427, + "step": 14286 + }, + { + "epoch": 0.24687240807298866, + "grad_norm": 0.4127859779469925, + "learning_rate": 1.7633195991162185e-05, + "loss": 0.5879, + "step": 14287 + }, + { + "epoch": 0.24688968758639757, + "grad_norm": 1.833070538062973, + "learning_rate": 1.7632834433832767e-05, + "loss": 0.6186, + "step": 14288 + }, + { + "epoch": 0.24690696709980647, + "grad_norm": 0.8927613416835697, + "learning_rate": 1.7632472852596735e-05, + "loss": 0.5623, + "step": 14289 + }, + { + "epoch": 0.24692424661321538, + "grad_norm": 0.9169848322716754, + "learning_rate": 1.7632111247455217e-05, + "loss": 0.5574, + "step": 14290 + }, + { + "epoch": 0.24694152612662426, + "grad_norm": 0.7413384071234603, + "learning_rate": 1.7631749618409343e-05, + "loss": 0.597, + "step": 14291 + }, + { + "epoch": 0.24695880564003317, + "grad_norm": 0.9545950251913519, + "learning_rate": 1.7631387965460255e-05, + "loss": 0.5282, + "step": 14292 + }, + { + "epoch": 0.24697608515344208, + "grad_norm": 0.6473980708851997, + "learning_rate": 1.7631026288609078e-05, + "loss": 0.6426, + "step": 14293 + }, + { + "epoch": 0.24699336466685098, + "grad_norm": 1.0803359682673415, + "learning_rate": 1.7630664587856944e-05, + "loss": 0.4467, + "step": 14294 + }, + { + "epoch": 0.2470106441802599, + "grad_norm": 0.6193141815549775, + "learning_rate": 1.763030286320499e-05, + "loss": 0.4368, + "step": 14295 + }, + { + "epoch": 0.2470279236936688, + "grad_norm": 0.843357228693139, + "learning_rate": 1.7629941114654347e-05, + "loss": 0.4922, + "step": 14296 + }, + { + "epoch": 0.24704520320707768, + "grad_norm": 0.45447138921572694, + "learning_rate": 1.7629579342206153e-05, + "loss": 0.6325, + "step": 14297 + }, + { + "epoch": 0.2470624827204866, + "grad_norm": 0.7110922999216932, + "learning_rate": 1.7629217545861533e-05, + "loss": 0.5744, + "step": 14298 + }, + { + "epoch": 0.2470797622338955, + "grad_norm": 0.8007168221549157, + "learning_rate": 1.7628855725621623e-05, + "loss": 0.592, + "step": 14299 + }, + { + "epoch": 0.2470970417473044, + "grad_norm": 1.3117522819667178, + "learning_rate": 1.7628493881487557e-05, + "loss": 0.5547, + "step": 14300 + }, + { + "epoch": 0.2471143212607133, + "grad_norm": 1.0176740871418295, + "learning_rate": 1.762813201346047e-05, + "loss": 0.4724, + "step": 14301 + }, + { + "epoch": 0.2471316007741222, + "grad_norm": 0.8514175025958126, + "learning_rate": 1.7627770121541495e-05, + "loss": 0.6756, + "step": 14302 + }, + { + "epoch": 0.2471488802875311, + "grad_norm": 1.1694379112955287, + "learning_rate": 1.7627408205731762e-05, + "loss": 0.5942, + "step": 14303 + }, + { + "epoch": 0.24716615980094, + "grad_norm": 0.8768149816334105, + "learning_rate": 1.762704626603241e-05, + "loss": 0.6316, + "step": 14304 + }, + { + "epoch": 0.2471834393143489, + "grad_norm": 0.7363522096530797, + "learning_rate": 1.7626684302444567e-05, + "loss": 0.5185, + "step": 14305 + }, + { + "epoch": 0.24720071882775782, + "grad_norm": 1.050509119053578, + "learning_rate": 1.762632231496937e-05, + "loss": 0.5456, + "step": 14306 + }, + { + "epoch": 0.2472179983411667, + "grad_norm": 1.0722839512973497, + "learning_rate": 1.7625960303607946e-05, + "loss": 0.6936, + "step": 14307 + }, + { + "epoch": 0.2472352778545756, + "grad_norm": 1.066111111042907, + "learning_rate": 1.762559826836144e-05, + "loss": 0.29, + "step": 14308 + }, + { + "epoch": 0.24725255736798452, + "grad_norm": 0.42093366221977924, + "learning_rate": 1.7625236209230982e-05, + "loss": 0.6044, + "step": 14309 + }, + { + "epoch": 0.24726983688139342, + "grad_norm": 1.2628219435169583, + "learning_rate": 1.7624874126217704e-05, + "loss": 0.5951, + "step": 14310 + }, + { + "epoch": 0.24728711639480233, + "grad_norm": 1.3346925747304466, + "learning_rate": 1.7624512019322742e-05, + "loss": 0.5682, + "step": 14311 + }, + { + "epoch": 0.24730439590821124, + "grad_norm": 0.7850829161019188, + "learning_rate": 1.7624149888547225e-05, + "loss": 0.6657, + "step": 14312 + }, + { + "epoch": 0.24732167542162012, + "grad_norm": 0.6676931854215526, + "learning_rate": 1.7623787733892293e-05, + "loss": 0.363, + "step": 14313 + }, + { + "epoch": 0.24733895493502903, + "grad_norm": 0.6235999959845847, + "learning_rate": 1.762342555535908e-05, + "loss": 0.5872, + "step": 14314 + }, + { + "epoch": 0.24735623444843793, + "grad_norm": 1.3470895446988589, + "learning_rate": 1.762306335294872e-05, + "loss": 0.5387, + "step": 14315 + }, + { + "epoch": 0.24737351396184684, + "grad_norm": 0.7065781706614885, + "learning_rate": 1.762270112666234e-05, + "loss": 0.6267, + "step": 14316 + }, + { + "epoch": 0.24739079347525575, + "grad_norm": 1.1583112802594318, + "learning_rate": 1.7622338876501084e-05, + "loss": 0.5621, + "step": 14317 + }, + { + "epoch": 0.24740807298866463, + "grad_norm": 1.7842742835124465, + "learning_rate": 1.7621976602466085e-05, + "loss": 0.6085, + "step": 14318 + }, + { + "epoch": 0.24742535250207354, + "grad_norm": 0.8254977999487911, + "learning_rate": 1.7621614304558474e-05, + "loss": 0.6956, + "step": 14319 + }, + { + "epoch": 0.24744263201548244, + "grad_norm": 0.8463760243248791, + "learning_rate": 1.762125198277939e-05, + "loss": 0.6087, + "step": 14320 + }, + { + "epoch": 0.24745991152889135, + "grad_norm": 0.997901260629938, + "learning_rate": 1.7620889637129963e-05, + "loss": 0.3826, + "step": 14321 + }, + { + "epoch": 0.24747719104230026, + "grad_norm": 1.4125667530281334, + "learning_rate": 1.762052726761133e-05, + "loss": 0.4909, + "step": 14322 + }, + { + "epoch": 0.24749447055570914, + "grad_norm": 1.3184212624861322, + "learning_rate": 1.762016487422463e-05, + "loss": 0.8324, + "step": 14323 + }, + { + "epoch": 0.24751175006911805, + "grad_norm": 0.90754617298113, + "learning_rate": 1.761980245697099e-05, + "loss": 0.6366, + "step": 14324 + }, + { + "epoch": 0.24752902958252695, + "grad_norm": 0.7660811659156166, + "learning_rate": 1.7619440015851553e-05, + "loss": 0.3508, + "step": 14325 + }, + { + "epoch": 0.24754630909593586, + "grad_norm": 1.4980473963125507, + "learning_rate": 1.7619077550867453e-05, + "loss": 0.4312, + "step": 14326 + }, + { + "epoch": 0.24756358860934477, + "grad_norm": 1.1622473207926936, + "learning_rate": 1.761871506201982e-05, + "loss": 0.4359, + "step": 14327 + }, + { + "epoch": 0.24758086812275365, + "grad_norm": 1.349265844261684, + "learning_rate": 1.7618352549309793e-05, + "loss": 0.4471, + "step": 14328 + }, + { + "epoch": 0.24759814763616256, + "grad_norm": 1.1606901897892703, + "learning_rate": 1.7617990012738504e-05, + "loss": 0.6049, + "step": 14329 + }, + { + "epoch": 0.24761542714957147, + "grad_norm": 1.0272286904536296, + "learning_rate": 1.7617627452307092e-05, + "loss": 0.4413, + "step": 14330 + }, + { + "epoch": 0.24763270666298037, + "grad_norm": 0.40234408759772755, + "learning_rate": 1.7617264868016697e-05, + "loss": 0.6689, + "step": 14331 + }, + { + "epoch": 0.24764998617638928, + "grad_norm": 0.7664381265061325, + "learning_rate": 1.7616902259868444e-05, + "loss": 0.527, + "step": 14332 + }, + { + "epoch": 0.2476672656897982, + "grad_norm": 1.337334745837268, + "learning_rate": 1.7616539627863477e-05, + "loss": 0.6547, + "step": 14333 + }, + { + "epoch": 0.24768454520320707, + "grad_norm": 0.9485962062661951, + "learning_rate": 1.7616176972002928e-05, + "loss": 0.4278, + "step": 14334 + }, + { + "epoch": 0.24770182471661598, + "grad_norm": 1.4150810250113126, + "learning_rate": 1.7615814292287936e-05, + "loss": 0.5101, + "step": 14335 + }, + { + "epoch": 0.24771910423002488, + "grad_norm": 1.836543464581302, + "learning_rate": 1.7615451588719632e-05, + "loss": 0.5869, + "step": 14336 + }, + { + "epoch": 0.2477363837434338, + "grad_norm": 1.0844509005804115, + "learning_rate": 1.7615088861299158e-05, + "loss": 0.5155, + "step": 14337 + }, + { + "epoch": 0.2477536632568427, + "grad_norm": 1.2062046217541695, + "learning_rate": 1.7614726110027646e-05, + "loss": 0.4412, + "step": 14338 + }, + { + "epoch": 0.24777094277025158, + "grad_norm": 0.8197988044298246, + "learning_rate": 1.761436333490623e-05, + "loss": 0.5371, + "step": 14339 + }, + { + "epoch": 0.2477882222836605, + "grad_norm": 1.5086929030183585, + "learning_rate": 1.761400053593605e-05, + "loss": 0.5109, + "step": 14340 + }, + { + "epoch": 0.2478055017970694, + "grad_norm": 0.8923971562795717, + "learning_rate": 1.7613637713118246e-05, + "loss": 0.5002, + "step": 14341 + }, + { + "epoch": 0.2478227813104783, + "grad_norm": 0.4479728713503142, + "learning_rate": 1.7613274866453944e-05, + "loss": 0.5603, + "step": 14342 + }, + { + "epoch": 0.2478400608238872, + "grad_norm": 0.46258513545562546, + "learning_rate": 1.761291199594429e-05, + "loss": 0.6245, + "step": 14343 + }, + { + "epoch": 0.2478573403372961, + "grad_norm": 1.299667135516721, + "learning_rate": 1.761254910159042e-05, + "loss": 0.4948, + "step": 14344 + }, + { + "epoch": 0.247874619850705, + "grad_norm": 0.7075952701578688, + "learning_rate": 1.7612186183393464e-05, + "loss": 0.3885, + "step": 14345 + }, + { + "epoch": 0.2478918993641139, + "grad_norm": 0.5385006747514794, + "learning_rate": 1.7611823241354563e-05, + "loss": 0.8338, + "step": 14346 + }, + { + "epoch": 0.2479091788775228, + "grad_norm": 0.6730444545426759, + "learning_rate": 1.7611460275474853e-05, + "loss": 0.5141, + "step": 14347 + }, + { + "epoch": 0.24792645839093172, + "grad_norm": 1.0959939535814531, + "learning_rate": 1.761109728575547e-05, + "loss": 0.4975, + "step": 14348 + }, + { + "epoch": 0.24794373790434063, + "grad_norm": 1.3111237857052944, + "learning_rate": 1.7610734272197555e-05, + "loss": 0.6978, + "step": 14349 + }, + { + "epoch": 0.2479610174177495, + "grad_norm": 1.393339736448536, + "learning_rate": 1.761037123480224e-05, + "loss": 0.4703, + "step": 14350 + }, + { + "epoch": 0.24797829693115842, + "grad_norm": 0.5062085766813914, + "learning_rate": 1.7610008173570662e-05, + "loss": 0.6159, + "step": 14351 + }, + { + "epoch": 0.24799557644456732, + "grad_norm": 1.0177358361463504, + "learning_rate": 1.760964508850396e-05, + "loss": 0.7298, + "step": 14352 + }, + { + "epoch": 0.24801285595797623, + "grad_norm": 0.5207027897198104, + "learning_rate": 1.7609281979603276e-05, + "loss": 0.3756, + "step": 14353 + }, + { + "epoch": 0.24803013547138514, + "grad_norm": 0.9102250516520733, + "learning_rate": 1.7608918846869737e-05, + "loss": 0.4529, + "step": 14354 + }, + { + "epoch": 0.24804741498479402, + "grad_norm": 0.5248072286911093, + "learning_rate": 1.760855569030449e-05, + "loss": 0.3335, + "step": 14355 + }, + { + "epoch": 0.24806469449820293, + "grad_norm": 1.1476501599632152, + "learning_rate": 1.7608192509908667e-05, + "loss": 0.7002, + "step": 14356 + }, + { + "epoch": 0.24808197401161183, + "grad_norm": 0.8103048353463576, + "learning_rate": 1.7607829305683407e-05, + "loss": 0.5581, + "step": 14357 + }, + { + "epoch": 0.24809925352502074, + "grad_norm": 1.1115789706011587, + "learning_rate": 1.7607466077629845e-05, + "loss": 0.4748, + "step": 14358 + }, + { + "epoch": 0.24811653303842965, + "grad_norm": 0.828833226999873, + "learning_rate": 1.7607102825749125e-05, + "loss": 0.5006, + "step": 14359 + }, + { + "epoch": 0.24813381255183853, + "grad_norm": 1.585825980010744, + "learning_rate": 1.7606739550042378e-05, + "loss": 0.5309, + "step": 14360 + }, + { + "epoch": 0.24815109206524744, + "grad_norm": 1.5191246652511523, + "learning_rate": 1.7606376250510745e-05, + "loss": 0.4965, + "step": 14361 + }, + { + "epoch": 0.24816837157865634, + "grad_norm": 0.9295043089241352, + "learning_rate": 1.7606012927155366e-05, + "loss": 0.743, + "step": 14362 + }, + { + "epoch": 0.24818565109206525, + "grad_norm": 0.6622419121587464, + "learning_rate": 1.7605649579977374e-05, + "loss": 0.4675, + "step": 14363 + }, + { + "epoch": 0.24820293060547416, + "grad_norm": 0.3730831643559471, + "learning_rate": 1.760528620897791e-05, + "loss": 0.7021, + "step": 14364 + }, + { + "epoch": 0.24822021011888304, + "grad_norm": 0.9660370448917358, + "learning_rate": 1.7604922814158113e-05, + "loss": 0.6093, + "step": 14365 + }, + { + "epoch": 0.24823748963229195, + "grad_norm": 1.7090912631432011, + "learning_rate": 1.760455939551912e-05, + "loss": 0.6944, + "step": 14366 + }, + { + "epoch": 0.24825476914570085, + "grad_norm": 0.9184750832916297, + "learning_rate": 1.7604195953062068e-05, + "loss": 0.4222, + "step": 14367 + }, + { + "epoch": 0.24827204865910976, + "grad_norm": 1.1994490449673425, + "learning_rate": 1.7603832486788097e-05, + "loss": 0.5618, + "step": 14368 + }, + { + "epoch": 0.24828932817251867, + "grad_norm": 1.4264276894884926, + "learning_rate": 1.760346899669834e-05, + "loss": 0.5125, + "step": 14369 + }, + { + "epoch": 0.24830660768592758, + "grad_norm": 0.9603747191140609, + "learning_rate": 1.7603105482793945e-05, + "loss": 0.6111, + "step": 14370 + }, + { + "epoch": 0.24832388719933646, + "grad_norm": 1.3546327756715153, + "learning_rate": 1.7602741945076048e-05, + "loss": 0.6143, + "step": 14371 + }, + { + "epoch": 0.24834116671274536, + "grad_norm": 1.8924277246100154, + "learning_rate": 1.760237838354578e-05, + "loss": 0.7212, + "step": 14372 + }, + { + "epoch": 0.24835844622615427, + "grad_norm": 1.057299491380126, + "learning_rate": 1.7602014798204286e-05, + "loss": 0.4109, + "step": 14373 + }, + { + "epoch": 0.24837572573956318, + "grad_norm": 1.5092177121435446, + "learning_rate": 1.7601651189052706e-05, + "loss": 0.4972, + "step": 14374 + }, + { + "epoch": 0.2483930052529721, + "grad_norm": 1.223779573987761, + "learning_rate": 1.7601287556092178e-05, + "loss": 0.6079, + "step": 14375 + }, + { + "epoch": 0.24841028476638097, + "grad_norm": 1.02306165679524, + "learning_rate": 1.7600923899323838e-05, + "loss": 0.5552, + "step": 14376 + }, + { + "epoch": 0.24842756427978988, + "grad_norm": 0.739193180086789, + "learning_rate": 1.7600560218748826e-05, + "loss": 0.5926, + "step": 14377 + }, + { + "epoch": 0.24844484379319878, + "grad_norm": 0.44862141735501027, + "learning_rate": 1.7600196514368283e-05, + "loss": 0.7968, + "step": 14378 + }, + { + "epoch": 0.2484621233066077, + "grad_norm": 0.7374062781703412, + "learning_rate": 1.7599832786183345e-05, + "loss": 0.4307, + "step": 14379 + }, + { + "epoch": 0.2484794028200166, + "grad_norm": 1.0057460496660955, + "learning_rate": 1.7599469034195155e-05, + "loss": 0.3098, + "step": 14380 + }, + { + "epoch": 0.24849668233342548, + "grad_norm": 1.0374184752520643, + "learning_rate": 1.759910525840485e-05, + "loss": 0.637, + "step": 14381 + }, + { + "epoch": 0.24851396184683439, + "grad_norm": 1.104194059433293, + "learning_rate": 1.759874145881357e-05, + "loss": 0.457, + "step": 14382 + }, + { + "epoch": 0.2485312413602433, + "grad_norm": 1.0968311018368258, + "learning_rate": 1.759837763542245e-05, + "loss": 0.611, + "step": 14383 + }, + { + "epoch": 0.2485485208736522, + "grad_norm": 0.5504656030406964, + "learning_rate": 1.759801378823264e-05, + "loss": 0.7398, + "step": 14384 + }, + { + "epoch": 0.2485658003870611, + "grad_norm": 0.8036199119228901, + "learning_rate": 1.7597649917245273e-05, + "loss": 0.456, + "step": 14385 + }, + { + "epoch": 0.24858307990047002, + "grad_norm": 1.543336913948174, + "learning_rate": 1.7597286022461488e-05, + "loss": 0.7036, + "step": 14386 + }, + { + "epoch": 0.2486003594138789, + "grad_norm": 0.9191301841043908, + "learning_rate": 1.7596922103882422e-05, + "loss": 0.3978, + "step": 14387 + }, + { + "epoch": 0.2486176389272878, + "grad_norm": 0.915186027661779, + "learning_rate": 1.7596558161509222e-05, + "loss": 0.5501, + "step": 14388 + }, + { + "epoch": 0.2486349184406967, + "grad_norm": 0.6806932708587945, + "learning_rate": 1.7596194195343022e-05, + "loss": 0.4917, + "step": 14389 + }, + { + "epoch": 0.24865219795410562, + "grad_norm": 1.3005744993041914, + "learning_rate": 1.759583020538497e-05, + "loss": 0.5053, + "step": 14390 + }, + { + "epoch": 0.24866947746751453, + "grad_norm": 1.4109327796846964, + "learning_rate": 1.7595466191636198e-05, + "loss": 0.4299, + "step": 14391 + }, + { + "epoch": 0.2486867569809234, + "grad_norm": 1.4144757464051, + "learning_rate": 1.7595102154097847e-05, + "loss": 0.5264, + "step": 14392 + }, + { + "epoch": 0.24870403649433231, + "grad_norm": 0.5341304605298592, + "learning_rate": 1.7594738092771057e-05, + "loss": 0.7451, + "step": 14393 + }, + { + "epoch": 0.24872131600774122, + "grad_norm": 1.0108639981149246, + "learning_rate": 1.7594374007656976e-05, + "loss": 0.561, + "step": 14394 + }, + { + "epoch": 0.24873859552115013, + "grad_norm": 1.218906838457813, + "learning_rate": 1.7594009898756736e-05, + "loss": 0.652, + "step": 14395 + }, + { + "epoch": 0.24875587503455904, + "grad_norm": 0.7599182489385905, + "learning_rate": 1.7593645766071475e-05, + "loss": 0.4876, + "step": 14396 + }, + { + "epoch": 0.24877315454796792, + "grad_norm": 1.003946118686551, + "learning_rate": 1.7593281609602346e-05, + "loss": 0.5682, + "step": 14397 + }, + { + "epoch": 0.24879043406137683, + "grad_norm": 1.0028378913425948, + "learning_rate": 1.759291742935048e-05, + "loss": 0.6875, + "step": 14398 + }, + { + "epoch": 0.24880771357478573, + "grad_norm": 0.44737052302197944, + "learning_rate": 1.7592553225317015e-05, + "loss": 0.805, + "step": 14399 + }, + { + "epoch": 0.24882499308819464, + "grad_norm": 1.3733313158734517, + "learning_rate": 1.75921889975031e-05, + "loss": 0.5006, + "step": 14400 + }, + { + "epoch": 0.24884227260160355, + "grad_norm": 0.9005317336381354, + "learning_rate": 1.759182474590987e-05, + "loss": 0.3984, + "step": 14401 + }, + { + "epoch": 0.24885955211501243, + "grad_norm": 1.3796554510783008, + "learning_rate": 1.759146047053847e-05, + "loss": 0.4388, + "step": 14402 + }, + { + "epoch": 0.24887683162842134, + "grad_norm": 0.9166970164594206, + "learning_rate": 1.759109617139004e-05, + "loss": 0.5282, + "step": 14403 + }, + { + "epoch": 0.24889411114183024, + "grad_norm": 0.9057498468983496, + "learning_rate": 1.7590731848465716e-05, + "loss": 0.5715, + "step": 14404 + }, + { + "epoch": 0.24891139065523915, + "grad_norm": 1.3741220397156657, + "learning_rate": 1.7590367501766644e-05, + "loss": 0.5602, + "step": 14405 + }, + { + "epoch": 0.24892867016864806, + "grad_norm": 1.4589472809750768, + "learning_rate": 1.7590003131293967e-05, + "loss": 0.5861, + "step": 14406 + }, + { + "epoch": 0.24894594968205697, + "grad_norm": 1.2473460536209375, + "learning_rate": 1.758963873704882e-05, + "loss": 0.6563, + "step": 14407 + }, + { + "epoch": 0.24896322919546585, + "grad_norm": 1.0199697656545212, + "learning_rate": 1.758927431903235e-05, + "loss": 0.551, + "step": 14408 + }, + { + "epoch": 0.24898050870887475, + "grad_norm": 0.5230962241508701, + "learning_rate": 1.7588909877245696e-05, + "loss": 0.6963, + "step": 14409 + }, + { + "epoch": 0.24899778822228366, + "grad_norm": 1.468783205189493, + "learning_rate": 1.758854541169e-05, + "loss": 0.5374, + "step": 14410 + }, + { + "epoch": 0.24901506773569257, + "grad_norm": 1.213450164221872, + "learning_rate": 1.75881809223664e-05, + "loss": 0.4896, + "step": 14411 + }, + { + "epoch": 0.24903234724910148, + "grad_norm": 0.5580839082864272, + "learning_rate": 1.7587816409276046e-05, + "loss": 0.5138, + "step": 14412 + }, + { + "epoch": 0.24904962676251036, + "grad_norm": 1.468615445035804, + "learning_rate": 1.7587451872420066e-05, + "loss": 0.6708, + "step": 14413 + }, + { + "epoch": 0.24906690627591926, + "grad_norm": 0.42162325240381243, + "learning_rate": 1.758708731179962e-05, + "loss": 0.7343, + "step": 14414 + }, + { + "epoch": 0.24908418578932817, + "grad_norm": 0.8042929914478049, + "learning_rate": 1.758672272741583e-05, + "loss": 0.4568, + "step": 14415 + }, + { + "epoch": 0.24910146530273708, + "grad_norm": 0.6782484702770701, + "learning_rate": 1.7586358119269852e-05, + "loss": 0.3882, + "step": 14416 + }, + { + "epoch": 0.249118744816146, + "grad_norm": 0.8860245076705959, + "learning_rate": 1.758599348736283e-05, + "loss": 0.4297, + "step": 14417 + }, + { + "epoch": 0.24913602432955487, + "grad_norm": 1.252629699778785, + "learning_rate": 1.758562883169589e-05, + "loss": 0.7884, + "step": 14418 + }, + { + "epoch": 0.24915330384296377, + "grad_norm": 0.6834114812308074, + "learning_rate": 1.758526415227019e-05, + "loss": 0.3365, + "step": 14419 + }, + { + "epoch": 0.24917058335637268, + "grad_norm": 1.2742717435955855, + "learning_rate": 1.7584899449086866e-05, + "loss": 0.5091, + "step": 14420 + }, + { + "epoch": 0.2491878628697816, + "grad_norm": 1.3684734277504436, + "learning_rate": 1.758453472214706e-05, + "loss": 0.6311, + "step": 14421 + }, + { + "epoch": 0.2492051423831905, + "grad_norm": 1.0205098984766783, + "learning_rate": 1.7584169971451915e-05, + "loss": 0.7501, + "step": 14422 + }, + { + "epoch": 0.2492224218965994, + "grad_norm": 1.2740973007632388, + "learning_rate": 1.758380519700257e-05, + "loss": 0.7126, + "step": 14423 + }, + { + "epoch": 0.24923970141000829, + "grad_norm": 1.206966150568114, + "learning_rate": 1.7583440398800172e-05, + "loss": 0.618, + "step": 14424 + }, + { + "epoch": 0.2492569809234172, + "grad_norm": 1.2373726215783782, + "learning_rate": 1.7583075576845865e-05, + "loss": 0.4765, + "step": 14425 + }, + { + "epoch": 0.2492742604368261, + "grad_norm": 1.140342670196425, + "learning_rate": 1.7582710731140786e-05, + "loss": 0.5137, + "step": 14426 + }, + { + "epoch": 0.249291539950235, + "grad_norm": 1.3330694236257552, + "learning_rate": 1.7582345861686084e-05, + "loss": 0.5965, + "step": 14427 + }, + { + "epoch": 0.24930881946364392, + "grad_norm": 1.069063161533541, + "learning_rate": 1.7581980968482897e-05, + "loss": 0.4579, + "step": 14428 + }, + { + "epoch": 0.2493260989770528, + "grad_norm": 1.2630824408768615, + "learning_rate": 1.7581616051532368e-05, + "loss": 0.4311, + "step": 14429 + }, + { + "epoch": 0.2493433784904617, + "grad_norm": 0.4852485631287693, + "learning_rate": 1.7581251110835643e-05, + "loss": 0.5904, + "step": 14430 + }, + { + "epoch": 0.2493606580038706, + "grad_norm": 1.4403290943859404, + "learning_rate": 1.7580886146393864e-05, + "loss": 0.7464, + "step": 14431 + }, + { + "epoch": 0.24937793751727952, + "grad_norm": 1.1935022294704503, + "learning_rate": 1.7580521158208173e-05, + "loss": 0.7521, + "step": 14432 + }, + { + "epoch": 0.24939521703068843, + "grad_norm": 1.862021518945196, + "learning_rate": 1.758015614627971e-05, + "loss": 0.4532, + "step": 14433 + }, + { + "epoch": 0.2494124965440973, + "grad_norm": 1.437265947920549, + "learning_rate": 1.7579791110609624e-05, + "loss": 0.573, + "step": 14434 + }, + { + "epoch": 0.24942977605750621, + "grad_norm": 1.0245003694807597, + "learning_rate": 1.7579426051199056e-05, + "loss": 0.4085, + "step": 14435 + }, + { + "epoch": 0.24944705557091512, + "grad_norm": 1.073369490647519, + "learning_rate": 1.7579060968049154e-05, + "loss": 0.5237, + "step": 14436 + }, + { + "epoch": 0.24946433508432403, + "grad_norm": 1.8649459831709718, + "learning_rate": 1.757869586116105e-05, + "loss": 0.5974, + "step": 14437 + }, + { + "epoch": 0.24948161459773294, + "grad_norm": 1.1961832210475383, + "learning_rate": 1.75783307305359e-05, + "loss": 0.6557, + "step": 14438 + }, + { + "epoch": 0.24949889411114182, + "grad_norm": 0.8399579819281769, + "learning_rate": 1.757796557617484e-05, + "loss": 0.602, + "step": 14439 + }, + { + "epoch": 0.24951617362455072, + "grad_norm": 1.1029328842205814, + "learning_rate": 1.7577600398079016e-05, + "loss": 0.6653, + "step": 14440 + }, + { + "epoch": 0.24953345313795963, + "grad_norm": 0.6096896075597232, + "learning_rate": 1.7577235196249573e-05, + "loss": 0.7611, + "step": 14441 + }, + { + "epoch": 0.24955073265136854, + "grad_norm": 0.7444937095043066, + "learning_rate": 1.7576869970687655e-05, + "loss": 0.4705, + "step": 14442 + }, + { + "epoch": 0.24956801216477745, + "grad_norm": 1.0079199501319234, + "learning_rate": 1.75765047213944e-05, + "loss": 0.5451, + "step": 14443 + }, + { + "epoch": 0.24958529167818636, + "grad_norm": 0.4920636226346579, + "learning_rate": 1.757613944837096e-05, + "loss": 0.6413, + "step": 14444 + }, + { + "epoch": 0.24960257119159523, + "grad_norm": 0.5790102189366347, + "learning_rate": 1.757577415161847e-05, + "loss": 0.6196, + "step": 14445 + }, + { + "epoch": 0.24961985070500414, + "grad_norm": 0.9851084377664964, + "learning_rate": 1.7575408831138086e-05, + "loss": 0.4911, + "step": 14446 + }, + { + "epoch": 0.24963713021841305, + "grad_norm": 0.43325798556347456, + "learning_rate": 1.7575043486930945e-05, + "loss": 0.3013, + "step": 14447 + }, + { + "epoch": 0.24965440973182196, + "grad_norm": 0.8891357368371107, + "learning_rate": 1.757467811899819e-05, + "loss": 0.6623, + "step": 14448 + }, + { + "epoch": 0.24967168924523087, + "grad_norm": 0.8835655331904742, + "learning_rate": 1.7574312727340966e-05, + "loss": 0.5592, + "step": 14449 + }, + { + "epoch": 0.24968896875863975, + "grad_norm": 1.1794028265856422, + "learning_rate": 1.757394731196042e-05, + "loss": 0.4818, + "step": 14450 + }, + { + "epoch": 0.24970624827204865, + "grad_norm": 0.7416040802123851, + "learning_rate": 1.75735818728577e-05, + "loss": 0.4998, + "step": 14451 + }, + { + "epoch": 0.24972352778545756, + "grad_norm": 1.1065451888840416, + "learning_rate": 1.7573216410033942e-05, + "loss": 0.3706, + "step": 14452 + }, + { + "epoch": 0.24974080729886647, + "grad_norm": 1.2825295948129751, + "learning_rate": 1.7572850923490294e-05, + "loss": 0.5894, + "step": 14453 + }, + { + "epoch": 0.24975808681227538, + "grad_norm": 0.401291605036772, + "learning_rate": 1.7572485413227904e-05, + "loss": 0.6729, + "step": 14454 + }, + { + "epoch": 0.24977536632568426, + "grad_norm": 0.9270110617003083, + "learning_rate": 1.757211987924791e-05, + "loss": 0.6462, + "step": 14455 + }, + { + "epoch": 0.24979264583909316, + "grad_norm": 1.0477715778823298, + "learning_rate": 1.7571754321551465e-05, + "loss": 0.4536, + "step": 14456 + }, + { + "epoch": 0.24980992535250207, + "grad_norm": 0.8162707837628704, + "learning_rate": 1.757138874013971e-05, + "loss": 0.2667, + "step": 14457 + }, + { + "epoch": 0.24982720486591098, + "grad_norm": 0.7188707921649322, + "learning_rate": 1.7571023135013787e-05, + "loss": 0.6458, + "step": 14458 + }, + { + "epoch": 0.2498444843793199, + "grad_norm": 1.2128217528853653, + "learning_rate": 1.7570657506174848e-05, + "loss": 0.7224, + "step": 14459 + }, + { + "epoch": 0.2498617638927288, + "grad_norm": 0.7666605008460518, + "learning_rate": 1.757029185362403e-05, + "loss": 0.2837, + "step": 14460 + }, + { + "epoch": 0.24987904340613767, + "grad_norm": 1.0808081072421214, + "learning_rate": 1.7569926177362486e-05, + "loss": 0.5248, + "step": 14461 + }, + { + "epoch": 0.24989632291954658, + "grad_norm": 1.15676679394471, + "learning_rate": 1.7569560477391356e-05, + "loss": 0.6681, + "step": 14462 + }, + { + "epoch": 0.2499136024329555, + "grad_norm": 1.482115806806773, + "learning_rate": 1.7569194753711788e-05, + "loss": 0.6563, + "step": 14463 + }, + { + "epoch": 0.2499308819463644, + "grad_norm": 1.4754167009200638, + "learning_rate": 1.7568829006324927e-05, + "loss": 0.5764, + "step": 14464 + }, + { + "epoch": 0.2499481614597733, + "grad_norm": 1.1062784811875865, + "learning_rate": 1.7568463235231917e-05, + "loss": 0.3653, + "step": 14465 + }, + { + "epoch": 0.24996544097318218, + "grad_norm": 1.335736013448704, + "learning_rate": 1.7568097440433907e-05, + "loss": 0.4763, + "step": 14466 + }, + { + "epoch": 0.2499827204865911, + "grad_norm": 0.674924509014645, + "learning_rate": 1.756773162193204e-05, + "loss": 0.4479, + "step": 14467 + }, + { + "epoch": 0.25, + "grad_norm": 1.1786486911206255, + "learning_rate": 1.7567365779727463e-05, + "loss": 0.5599, + "step": 14468 + }, + { + "epoch": 0.2500172795134089, + "grad_norm": 1.2630995221673866, + "learning_rate": 1.756699991382132e-05, + "loss": 0.5527, + "step": 14469 + }, + { + "epoch": 0.2500345590268178, + "grad_norm": 1.065010908865613, + "learning_rate": 1.756663402421476e-05, + "loss": 0.5069, + "step": 14470 + }, + { + "epoch": 0.2500518385402267, + "grad_norm": 1.119576579129707, + "learning_rate": 1.7566268110908926e-05, + "loss": 0.6625, + "step": 14471 + }, + { + "epoch": 0.25006911805363563, + "grad_norm": 1.1939524422487549, + "learning_rate": 1.7565902173904964e-05, + "loss": 0.6693, + "step": 14472 + }, + { + "epoch": 0.25008639756704454, + "grad_norm": 0.9580423709355611, + "learning_rate": 1.7565536213204022e-05, + "loss": 0.6056, + "step": 14473 + }, + { + "epoch": 0.2501036770804534, + "grad_norm": 1.00502296074213, + "learning_rate": 1.7565170228807245e-05, + "loss": 0.5285, + "step": 14474 + }, + { + "epoch": 0.2501209565938623, + "grad_norm": 0.8014009120543569, + "learning_rate": 1.7564804220715782e-05, + "loss": 0.4177, + "step": 14475 + }, + { + "epoch": 0.2501382361072712, + "grad_norm": 0.9906394554543001, + "learning_rate": 1.7564438188930778e-05, + "loss": 0.553, + "step": 14476 + }, + { + "epoch": 0.2501555156206801, + "grad_norm": 0.7875783070183442, + "learning_rate": 1.756407213345338e-05, + "loss": 0.4024, + "step": 14477 + }, + { + "epoch": 0.250172795134089, + "grad_norm": 1.92689647294607, + "learning_rate": 1.7563706054284724e-05, + "loss": 0.6581, + "step": 14478 + }, + { + "epoch": 0.25019007464749793, + "grad_norm": 0.7029065502316006, + "learning_rate": 1.7563339951425975e-05, + "loss": 0.562, + "step": 14479 + }, + { + "epoch": 0.25020735416090684, + "grad_norm": 1.2549333427246554, + "learning_rate": 1.756297382487827e-05, + "loss": 0.4607, + "step": 14480 + }, + { + "epoch": 0.25022463367431574, + "grad_norm": 1.1061237165227842, + "learning_rate": 1.756260767464275e-05, + "loss": 0.5989, + "step": 14481 + }, + { + "epoch": 0.25024191318772465, + "grad_norm": 1.2743438713443584, + "learning_rate": 1.756224150072057e-05, + "loss": 0.5964, + "step": 14482 + }, + { + "epoch": 0.25025919270113356, + "grad_norm": 0.6777190901433127, + "learning_rate": 1.7561875303112882e-05, + "loss": 0.5191, + "step": 14483 + }, + { + "epoch": 0.2502764722145424, + "grad_norm": 1.0591858992895398, + "learning_rate": 1.756150908182082e-05, + "loss": 0.8102, + "step": 14484 + }, + { + "epoch": 0.2502937517279513, + "grad_norm": 1.2040596483850918, + "learning_rate": 1.7561142836845535e-05, + "loss": 0.4949, + "step": 14485 + }, + { + "epoch": 0.2503110312413602, + "grad_norm": 1.1356017068271473, + "learning_rate": 1.756077656818818e-05, + "loss": 0.5255, + "step": 14486 + }, + { + "epoch": 0.25032831075476913, + "grad_norm": 1.0072179359166618, + "learning_rate": 1.7560410275849896e-05, + "loss": 0.4952, + "step": 14487 + }, + { + "epoch": 0.25034559026817804, + "grad_norm": 1.062890802613147, + "learning_rate": 1.7560043959831832e-05, + "loss": 0.5677, + "step": 14488 + }, + { + "epoch": 0.25036286978158695, + "grad_norm": 1.6856085747538776, + "learning_rate": 1.755967762013514e-05, + "loss": 0.6018, + "step": 14489 + }, + { + "epoch": 0.25038014929499586, + "grad_norm": 1.6458531267192031, + "learning_rate": 1.7559311256760958e-05, + "loss": 0.6155, + "step": 14490 + }, + { + "epoch": 0.25039742880840477, + "grad_norm": 0.8108765908517642, + "learning_rate": 1.755894486971044e-05, + "loss": 0.2888, + "step": 14491 + }, + { + "epoch": 0.2504147083218137, + "grad_norm": 0.8802427566704419, + "learning_rate": 1.7558578458984733e-05, + "loss": 0.5498, + "step": 14492 + }, + { + "epoch": 0.2504319878352226, + "grad_norm": 2.4331875312592715, + "learning_rate": 1.7558212024584986e-05, + "loss": 0.3904, + "step": 14493 + }, + { + "epoch": 0.2504492673486315, + "grad_norm": 1.944225332271197, + "learning_rate": 1.7557845566512345e-05, + "loss": 0.664, + "step": 14494 + }, + { + "epoch": 0.25046654686204034, + "grad_norm": 0.8915580138411888, + "learning_rate": 1.7557479084767952e-05, + "loss": 0.4424, + "step": 14495 + }, + { + "epoch": 0.25048382637544925, + "grad_norm": 0.6843544828303317, + "learning_rate": 1.7557112579352963e-05, + "loss": 0.4443, + "step": 14496 + }, + { + "epoch": 0.25050110588885816, + "grad_norm": 0.7899801942317805, + "learning_rate": 1.7556746050268522e-05, + "loss": 0.5995, + "step": 14497 + }, + { + "epoch": 0.25051838540226706, + "grad_norm": 0.45526848107949947, + "learning_rate": 1.7556379497515782e-05, + "loss": 0.69, + "step": 14498 + }, + { + "epoch": 0.25053566491567597, + "grad_norm": 1.52114383472435, + "learning_rate": 1.755601292109588e-05, + "loss": 0.4883, + "step": 14499 + }, + { + "epoch": 0.2505529444290849, + "grad_norm": 1.4305035246408775, + "learning_rate": 1.7555646321009977e-05, + "loss": 0.6897, + "step": 14500 + }, + { + "epoch": 0.2505702239424938, + "grad_norm": 0.5650028050129493, + "learning_rate": 1.7555279697259213e-05, + "loss": 0.5858, + "step": 14501 + }, + { + "epoch": 0.2505875034559027, + "grad_norm": 0.48436353892174977, + "learning_rate": 1.755491304984474e-05, + "loss": 0.6855, + "step": 14502 + }, + { + "epoch": 0.2506047829693116, + "grad_norm": 1.0523742510502954, + "learning_rate": 1.7554546378767705e-05, + "loss": 0.7023, + "step": 14503 + }, + { + "epoch": 0.2506220624827205, + "grad_norm": 0.9948654782992115, + "learning_rate": 1.7554179684029257e-05, + "loss": 0.4948, + "step": 14504 + }, + { + "epoch": 0.2506393419961294, + "grad_norm": 0.8719493986598388, + "learning_rate": 1.7553812965630544e-05, + "loss": 0.4631, + "step": 14505 + }, + { + "epoch": 0.25065662150953827, + "grad_norm": 2.082391911361976, + "learning_rate": 1.7553446223572716e-05, + "loss": 0.5247, + "step": 14506 + }, + { + "epoch": 0.2506739010229472, + "grad_norm": 0.8020802265072513, + "learning_rate": 1.755307945785692e-05, + "loss": 0.5413, + "step": 14507 + }, + { + "epoch": 0.2506911805363561, + "grad_norm": 0.47230143299603783, + "learning_rate": 1.7552712668484302e-05, + "loss": 0.7934, + "step": 14508 + }, + { + "epoch": 0.250708460049765, + "grad_norm": 0.8743371383657759, + "learning_rate": 1.7552345855456017e-05, + "loss": 0.5503, + "step": 14509 + }, + { + "epoch": 0.2507257395631739, + "grad_norm": 0.6503071486765222, + "learning_rate": 1.7551979018773208e-05, + "loss": 0.4351, + "step": 14510 + }, + { + "epoch": 0.2507430190765828, + "grad_norm": 1.3501741880622316, + "learning_rate": 1.7551612158437028e-05, + "loss": 0.5562, + "step": 14511 + }, + { + "epoch": 0.2507602985899917, + "grad_norm": 0.6154203260025375, + "learning_rate": 1.7551245274448625e-05, + "loss": 0.42, + "step": 14512 + }, + { + "epoch": 0.2507775781034006, + "grad_norm": 1.1480625453586344, + "learning_rate": 1.755087836680915e-05, + "loss": 0.5802, + "step": 14513 + }, + { + "epoch": 0.25079485761680953, + "grad_norm": 1.0531620286130308, + "learning_rate": 1.7550511435519748e-05, + "loss": 0.5016, + "step": 14514 + }, + { + "epoch": 0.25081213713021844, + "grad_norm": 0.3929692096464447, + "learning_rate": 1.755014448058157e-05, + "loss": 0.621, + "step": 14515 + }, + { + "epoch": 0.2508294166436273, + "grad_norm": 0.9204883316880526, + "learning_rate": 1.7549777501995767e-05, + "loss": 0.4785, + "step": 14516 + }, + { + "epoch": 0.2508466961570362, + "grad_norm": 1.3552002610094454, + "learning_rate": 1.7549410499763485e-05, + "loss": 0.6807, + "step": 14517 + }, + { + "epoch": 0.2508639756704451, + "grad_norm": 1.1776763087950053, + "learning_rate": 1.754904347388588e-05, + "loss": 0.4876, + "step": 14518 + }, + { + "epoch": 0.250881255183854, + "grad_norm": 1.0087865043146171, + "learning_rate": 1.7548676424364094e-05, + "loss": 0.5955, + "step": 14519 + }, + { + "epoch": 0.2508985346972629, + "grad_norm": 0.9681505084382427, + "learning_rate": 1.754830935119928e-05, + "loss": 0.4077, + "step": 14520 + }, + { + "epoch": 0.25091581421067183, + "grad_norm": 1.4988151628439041, + "learning_rate": 1.7547942254392587e-05, + "loss": 0.6273, + "step": 14521 + }, + { + "epoch": 0.25093309372408074, + "grad_norm": 1.2037597683061447, + "learning_rate": 1.7547575133945165e-05, + "loss": 0.5077, + "step": 14522 + }, + { + "epoch": 0.25095037323748964, + "grad_norm": 19.78088707862915, + "learning_rate": 1.7547207989858165e-05, + "loss": 0.3491, + "step": 14523 + }, + { + "epoch": 0.25096765275089855, + "grad_norm": 14.62793742640225, + "learning_rate": 1.7546840822132735e-05, + "loss": 0.6763, + "step": 14524 + }, + { + "epoch": 0.25098493226430746, + "grad_norm": 0.9960985875517634, + "learning_rate": 1.7546473630770028e-05, + "loss": 0.3581, + "step": 14525 + }, + { + "epoch": 0.25100221177771637, + "grad_norm": 1.2233641920534502, + "learning_rate": 1.7546106415771192e-05, + "loss": 0.5699, + "step": 14526 + }, + { + "epoch": 0.2510194912911252, + "grad_norm": 1.0771545385934056, + "learning_rate": 1.7545739177137375e-05, + "loss": 0.6895, + "step": 14527 + }, + { + "epoch": 0.2510367708045341, + "grad_norm": 1.3444544666845093, + "learning_rate": 1.754537191486973e-05, + "loss": 0.7164, + "step": 14528 + }, + { + "epoch": 0.25105405031794303, + "grad_norm": 0.9054439781157239, + "learning_rate": 1.7545004628969407e-05, + "loss": 0.4223, + "step": 14529 + }, + { + "epoch": 0.25107132983135194, + "grad_norm": 1.0122412293337282, + "learning_rate": 1.7544637319437557e-05, + "loss": 0.635, + "step": 14530 + }, + { + "epoch": 0.25108860934476085, + "grad_norm": 0.5348515432029735, + "learning_rate": 1.7544269986275327e-05, + "loss": 0.3079, + "step": 14531 + }, + { + "epoch": 0.25110588885816976, + "grad_norm": 0.8655404951475149, + "learning_rate": 1.7543902629483873e-05, + "loss": 0.4198, + "step": 14532 + }, + { + "epoch": 0.25112316837157866, + "grad_norm": 1.2595929507398436, + "learning_rate": 1.7543535249064344e-05, + "loss": 0.6045, + "step": 14533 + }, + { + "epoch": 0.25114044788498757, + "grad_norm": 0.9114688746605831, + "learning_rate": 1.7543167845017884e-05, + "loss": 0.6639, + "step": 14534 + }, + { + "epoch": 0.2511577273983965, + "grad_norm": 1.3766617931982952, + "learning_rate": 1.7542800417345653e-05, + "loss": 0.6283, + "step": 14535 + }, + { + "epoch": 0.2511750069118054, + "grad_norm": 0.7536859341732406, + "learning_rate": 1.7542432966048795e-05, + "loss": 0.4473, + "step": 14536 + }, + { + "epoch": 0.25119228642521424, + "grad_norm": 5.281202134104472, + "learning_rate": 1.754206549112847e-05, + "loss": 0.4342, + "step": 14537 + }, + { + "epoch": 0.25120956593862315, + "grad_norm": 1.3419201282070479, + "learning_rate": 1.7541697992585815e-05, + "loss": 0.6039, + "step": 14538 + }, + { + "epoch": 0.25122684545203205, + "grad_norm": 0.9943324319988042, + "learning_rate": 1.754133047042199e-05, + "loss": 0.5963, + "step": 14539 + }, + { + "epoch": 0.25124412496544096, + "grad_norm": 1.4390476921772415, + "learning_rate": 1.754096292463815e-05, + "loss": 0.7732, + "step": 14540 + }, + { + "epoch": 0.25126140447884987, + "grad_norm": 1.5641600883541382, + "learning_rate": 1.7540595355235436e-05, + "loss": 0.425, + "step": 14541 + }, + { + "epoch": 0.2512786839922588, + "grad_norm": 1.0086606596918746, + "learning_rate": 1.754022776221501e-05, + "loss": 0.7287, + "step": 14542 + }, + { + "epoch": 0.2512959635056677, + "grad_norm": 1.1090659639495786, + "learning_rate": 1.7539860145578013e-05, + "loss": 0.6442, + "step": 14543 + }, + { + "epoch": 0.2513132430190766, + "grad_norm": 1.3366214165067714, + "learning_rate": 1.75394925053256e-05, + "loss": 0.6462, + "step": 14544 + }, + { + "epoch": 0.2513305225324855, + "grad_norm": 0.4669341347750054, + "learning_rate": 1.7539124841458927e-05, + "loss": 0.6702, + "step": 14545 + }, + { + "epoch": 0.2513478020458944, + "grad_norm": 1.2815719761916908, + "learning_rate": 1.753875715397914e-05, + "loss": 0.3529, + "step": 14546 + }, + { + "epoch": 0.2513650815593033, + "grad_norm": 1.95713799796268, + "learning_rate": 1.7538389442887393e-05, + "loss": 0.5007, + "step": 14547 + }, + { + "epoch": 0.25138236107271217, + "grad_norm": 1.2155878653988386, + "learning_rate": 1.7538021708184838e-05, + "loss": 0.3783, + "step": 14548 + }, + { + "epoch": 0.2513996405861211, + "grad_norm": 1.5449830061288203, + "learning_rate": 1.7537653949872625e-05, + "loss": 0.4188, + "step": 14549 + }, + { + "epoch": 0.25141692009953, + "grad_norm": 1.5812925512849711, + "learning_rate": 1.753728616795191e-05, + "loss": 0.4557, + "step": 14550 + }, + { + "epoch": 0.2514341996129389, + "grad_norm": 0.9123116848900319, + "learning_rate": 1.7536918362423837e-05, + "loss": 0.6201, + "step": 14551 + }, + { + "epoch": 0.2514514791263478, + "grad_norm": 0.6021349064368404, + "learning_rate": 1.7536550533289566e-05, + "loss": 0.656, + "step": 14552 + }, + { + "epoch": 0.2514687586397567, + "grad_norm": 1.5954897936150556, + "learning_rate": 1.7536182680550245e-05, + "loss": 0.5857, + "step": 14553 + }, + { + "epoch": 0.2514860381531656, + "grad_norm": 1.2735881694536169, + "learning_rate": 1.7535814804207027e-05, + "loss": 0.7599, + "step": 14554 + }, + { + "epoch": 0.2515033176665745, + "grad_norm": 1.3897388205013403, + "learning_rate": 1.7535446904261065e-05, + "loss": 0.5837, + "step": 14555 + }, + { + "epoch": 0.25152059717998343, + "grad_norm": 0.4651055179626021, + "learning_rate": 1.753507898071351e-05, + "loss": 0.8092, + "step": 14556 + }, + { + "epoch": 0.25153787669339234, + "grad_norm": 0.7685826206600196, + "learning_rate": 1.7534711033565517e-05, + "loss": 0.5837, + "step": 14557 + }, + { + "epoch": 0.2515551562068012, + "grad_norm": 1.2004265183899554, + "learning_rate": 1.7534343062818234e-05, + "loss": 0.5946, + "step": 14558 + }, + { + "epoch": 0.2515724357202101, + "grad_norm": 1.0400295268857849, + "learning_rate": 1.7533975068472815e-05, + "loss": 0.4953, + "step": 14559 + }, + { + "epoch": 0.251589715233619, + "grad_norm": 1.1365995865953162, + "learning_rate": 1.7533607050530418e-05, + "loss": 0.5601, + "step": 14560 + }, + { + "epoch": 0.2516069947470279, + "grad_norm": 0.8681984855267978, + "learning_rate": 1.7533239008992187e-05, + "loss": 0.5641, + "step": 14561 + }, + { + "epoch": 0.2516242742604368, + "grad_norm": 1.1487846829610304, + "learning_rate": 1.753287094385928e-05, + "loss": 0.5671, + "step": 14562 + }, + { + "epoch": 0.2516415537738457, + "grad_norm": 0.8552500200254585, + "learning_rate": 1.753250285513285e-05, + "loss": 0.6146, + "step": 14563 + }, + { + "epoch": 0.25165883328725464, + "grad_norm": 0.8613128101602038, + "learning_rate": 1.7532134742814048e-05, + "loss": 0.7483, + "step": 14564 + }, + { + "epoch": 0.25167611280066354, + "grad_norm": 1.2217659358906676, + "learning_rate": 1.7531766606904024e-05, + "loss": 0.7837, + "step": 14565 + }, + { + "epoch": 0.25169339231407245, + "grad_norm": 0.768477437459257, + "learning_rate": 1.753139844740394e-05, + "loss": 0.6614, + "step": 14566 + }, + { + "epoch": 0.25171067182748136, + "grad_norm": 0.3748836580374003, + "learning_rate": 1.753103026431494e-05, + "loss": 0.7454, + "step": 14567 + }, + { + "epoch": 0.25172795134089027, + "grad_norm": 0.8426920446908613, + "learning_rate": 1.7530662057638184e-05, + "loss": 0.4783, + "step": 14568 + }, + { + "epoch": 0.2517452308542991, + "grad_norm": 1.3090216411849263, + "learning_rate": 1.753029382737482e-05, + "loss": 0.6836, + "step": 14569 + }, + { + "epoch": 0.251762510367708, + "grad_norm": 1.042284252068064, + "learning_rate": 1.7529925573526007e-05, + "loss": 0.5126, + "step": 14570 + }, + { + "epoch": 0.25177978988111693, + "grad_norm": 1.1070702908756351, + "learning_rate": 1.7529557296092887e-05, + "loss": 0.5508, + "step": 14571 + }, + { + "epoch": 0.25179706939452584, + "grad_norm": 1.567990773640594, + "learning_rate": 1.752918899507663e-05, + "loss": 0.4897, + "step": 14572 + }, + { + "epoch": 0.25181434890793475, + "grad_norm": 0.6431094112411202, + "learning_rate": 1.7528820670478378e-05, + "loss": 0.7516, + "step": 14573 + }, + { + "epoch": 0.25183162842134366, + "grad_norm": 1.1929386501459829, + "learning_rate": 1.7528452322299284e-05, + "loss": 0.714, + "step": 14574 + }, + { + "epoch": 0.25184890793475256, + "grad_norm": 0.42765161364951837, + "learning_rate": 1.7528083950540508e-05, + "loss": 0.6351, + "step": 14575 + }, + { + "epoch": 0.25186618744816147, + "grad_norm": 1.5039379815995604, + "learning_rate": 1.75277155552032e-05, + "loss": 0.5972, + "step": 14576 + }, + { + "epoch": 0.2518834669615704, + "grad_norm": 1.4548919020733673, + "learning_rate": 1.7527347136288515e-05, + "loss": 0.6989, + "step": 14577 + }, + { + "epoch": 0.2519007464749793, + "grad_norm": 0.9552426229776959, + "learning_rate": 1.7526978693797606e-05, + "loss": 0.692, + "step": 14578 + }, + { + "epoch": 0.2519180259883882, + "grad_norm": 1.558350359401104, + "learning_rate": 1.752661022773163e-05, + "loss": 0.5325, + "step": 14579 + }, + { + "epoch": 0.25193530550179705, + "grad_norm": 0.6227355552980084, + "learning_rate": 1.7526241738091737e-05, + "loss": 0.4818, + "step": 14580 + }, + { + "epoch": 0.25195258501520595, + "grad_norm": 0.8130268534814331, + "learning_rate": 1.7525873224879083e-05, + "loss": 0.4946, + "step": 14581 + }, + { + "epoch": 0.25196986452861486, + "grad_norm": 1.1818466909731629, + "learning_rate": 1.7525504688094822e-05, + "loss": 0.5194, + "step": 14582 + }, + { + "epoch": 0.25198714404202377, + "grad_norm": 0.8704886288438448, + "learning_rate": 1.752513612774011e-05, + "loss": 0.4608, + "step": 14583 + }, + { + "epoch": 0.2520044235554327, + "grad_norm": 0.9715323561653412, + "learning_rate": 1.75247675438161e-05, + "loss": 0.4363, + "step": 14584 + }, + { + "epoch": 0.2520217030688416, + "grad_norm": 1.3205679522599036, + "learning_rate": 1.7524398936323944e-05, + "loss": 0.4866, + "step": 14585 + }, + { + "epoch": 0.2520389825822505, + "grad_norm": 0.664626023240692, + "learning_rate": 1.75240303052648e-05, + "loss": 0.4041, + "step": 14586 + }, + { + "epoch": 0.2520562620956594, + "grad_norm": 0.9763504044766959, + "learning_rate": 1.7523661650639823e-05, + "loss": 0.5284, + "step": 14587 + }, + { + "epoch": 0.2520735416090683, + "grad_norm": 0.8421232177282385, + "learning_rate": 1.7523292972450165e-05, + "loss": 0.3902, + "step": 14588 + }, + { + "epoch": 0.2520908211224772, + "grad_norm": 0.6880140519014868, + "learning_rate": 1.7522924270696978e-05, + "loss": 0.4783, + "step": 14589 + }, + { + "epoch": 0.25210810063588607, + "grad_norm": 1.1657487299366895, + "learning_rate": 1.7522555545381422e-05, + "loss": 0.4872, + "step": 14590 + }, + { + "epoch": 0.252125380149295, + "grad_norm": 1.2018301646900067, + "learning_rate": 1.7522186796504655e-05, + "loss": 0.4693, + "step": 14591 + }, + { + "epoch": 0.2521426596627039, + "grad_norm": 1.5139584661787726, + "learning_rate": 1.7521818024067823e-05, + "loss": 0.4979, + "step": 14592 + }, + { + "epoch": 0.2521599391761128, + "grad_norm": 0.8004400777675222, + "learning_rate": 1.7521449228072083e-05, + "loss": 0.4071, + "step": 14593 + }, + { + "epoch": 0.2521772186895217, + "grad_norm": 0.7478376138327225, + "learning_rate": 1.7521080408518596e-05, + "loss": 0.4195, + "step": 14594 + }, + { + "epoch": 0.2521944982029306, + "grad_norm": 1.3871745294065794, + "learning_rate": 1.7520711565408516e-05, + "loss": 0.6123, + "step": 14595 + }, + { + "epoch": 0.2522117777163395, + "grad_norm": 1.096619659690265, + "learning_rate": 1.752034269874299e-05, + "loss": 0.4928, + "step": 14596 + }, + { + "epoch": 0.2522290572297484, + "grad_norm": 1.0260175351876064, + "learning_rate": 1.751997380852318e-05, + "loss": 0.5616, + "step": 14597 + }, + { + "epoch": 0.25224633674315733, + "grad_norm": 1.117633291472542, + "learning_rate": 1.7519604894750245e-05, + "loss": 0.4126, + "step": 14598 + }, + { + "epoch": 0.25226361625656624, + "grad_norm": 1.5342100193997323, + "learning_rate": 1.7519235957425334e-05, + "loss": 0.5919, + "step": 14599 + }, + { + "epoch": 0.25228089576997514, + "grad_norm": 1.2980872748310783, + "learning_rate": 1.7518866996549603e-05, + "loss": 0.5772, + "step": 14600 + }, + { + "epoch": 0.252298175283384, + "grad_norm": 1.282755480773523, + "learning_rate": 1.751849801212421e-05, + "loss": 0.5063, + "step": 14601 + }, + { + "epoch": 0.2523154547967929, + "grad_norm": 2.8843828425641416, + "learning_rate": 1.7518129004150313e-05, + "loss": 0.5438, + "step": 14602 + }, + { + "epoch": 0.2523327343102018, + "grad_norm": 0.8238637682278792, + "learning_rate": 1.751775997262906e-05, + "loss": 0.5079, + "step": 14603 + }, + { + "epoch": 0.2523500138236107, + "grad_norm": 0.4091639826099683, + "learning_rate": 1.7517390917561612e-05, + "loss": 0.6506, + "step": 14604 + }, + { + "epoch": 0.2523672933370196, + "grad_norm": 0.9407667725788207, + "learning_rate": 1.7517021838949125e-05, + "loss": 0.5337, + "step": 14605 + }, + { + "epoch": 0.25238457285042853, + "grad_norm": 1.4982775458906183, + "learning_rate": 1.7516652736792758e-05, + "loss": 0.5758, + "step": 14606 + }, + { + "epoch": 0.25240185236383744, + "grad_norm": 1.428008708612437, + "learning_rate": 1.751628361109366e-05, + "loss": 0.5356, + "step": 14607 + }, + { + "epoch": 0.25241913187724635, + "grad_norm": 0.8552385924175895, + "learning_rate": 1.751591446185299e-05, + "loss": 0.6629, + "step": 14608 + }, + { + "epoch": 0.25243641139065526, + "grad_norm": 0.5942109091490821, + "learning_rate": 1.7515545289071903e-05, + "loss": 0.5084, + "step": 14609 + }, + { + "epoch": 0.25245369090406417, + "grad_norm": 1.123587839847969, + "learning_rate": 1.751517609275156e-05, + "loss": 0.6099, + "step": 14610 + }, + { + "epoch": 0.252470970417473, + "grad_norm": 1.1015476011829635, + "learning_rate": 1.7514806872893114e-05, + "loss": 0.6897, + "step": 14611 + }, + { + "epoch": 0.2524882499308819, + "grad_norm": 1.5760427481537518, + "learning_rate": 1.751443762949772e-05, + "loss": 0.6483, + "step": 14612 + }, + { + "epoch": 0.25250552944429083, + "grad_norm": 1.918044924380789, + "learning_rate": 1.7514068362566542e-05, + "loss": 0.617, + "step": 14613 + }, + { + "epoch": 0.25252280895769974, + "grad_norm": 1.4498066207058844, + "learning_rate": 1.7513699072100724e-05, + "loss": 0.7035, + "step": 14614 + }, + { + "epoch": 0.25254008847110865, + "grad_norm": 0.45707029195452087, + "learning_rate": 1.7513329758101433e-05, + "loss": 0.8402, + "step": 14615 + }, + { + "epoch": 0.25255736798451756, + "grad_norm": 0.9127066702698059, + "learning_rate": 1.751296042056982e-05, + "loss": 0.4342, + "step": 14616 + }, + { + "epoch": 0.25257464749792646, + "grad_norm": 0.8381704152724286, + "learning_rate": 1.7512591059507044e-05, + "loss": 0.5011, + "step": 14617 + }, + { + "epoch": 0.25259192701133537, + "grad_norm": 1.201929926473634, + "learning_rate": 1.7512221674914266e-05, + "loss": 0.5103, + "step": 14618 + }, + { + "epoch": 0.2526092065247443, + "grad_norm": 1.2121740210971452, + "learning_rate": 1.7511852266792633e-05, + "loss": 0.4807, + "step": 14619 + }, + { + "epoch": 0.2526264860381532, + "grad_norm": 0.7458628520099676, + "learning_rate": 1.7511482835143313e-05, + "loss": 0.5584, + "step": 14620 + }, + { + "epoch": 0.2526437655515621, + "grad_norm": 1.5753201926065794, + "learning_rate": 1.7511113379967455e-05, + "loss": 0.5874, + "step": 14621 + }, + { + "epoch": 0.25266104506497095, + "grad_norm": 1.4627209237888859, + "learning_rate": 1.751074390126622e-05, + "loss": 0.6638, + "step": 14622 + }, + { + "epoch": 0.25267832457837985, + "grad_norm": 1.0741346529950027, + "learning_rate": 1.7510374399040765e-05, + "loss": 0.6431, + "step": 14623 + }, + { + "epoch": 0.25269560409178876, + "grad_norm": 1.7773297222410034, + "learning_rate": 1.7510004873292247e-05, + "loss": 0.6955, + "step": 14624 + }, + { + "epoch": 0.25271288360519767, + "grad_norm": 0.9037255847499028, + "learning_rate": 1.750963532402182e-05, + "loss": 0.4091, + "step": 14625 + }, + { + "epoch": 0.2527301631186066, + "grad_norm": 1.303665834512623, + "learning_rate": 1.7509265751230647e-05, + "loss": 0.4828, + "step": 14626 + }, + { + "epoch": 0.2527474426320155, + "grad_norm": 0.7556721384289189, + "learning_rate": 1.750889615491988e-05, + "loss": 0.59, + "step": 14627 + }, + { + "epoch": 0.2527647221454244, + "grad_norm": 0.8697339547613856, + "learning_rate": 1.750852653509068e-05, + "loss": 0.3692, + "step": 14628 + }, + { + "epoch": 0.2527820016588333, + "grad_norm": 1.1479681800862038, + "learning_rate": 1.7508156891744207e-05, + "loss": 0.5819, + "step": 14629 + }, + { + "epoch": 0.2527992811722422, + "grad_norm": 2.076500421991425, + "learning_rate": 1.7507787224881613e-05, + "loss": 0.5775, + "step": 14630 + }, + { + "epoch": 0.2528165606856511, + "grad_norm": 0.6328069352303781, + "learning_rate": 1.7507417534504062e-05, + "loss": 0.3608, + "step": 14631 + }, + { + "epoch": 0.25283384019905997, + "grad_norm": 0.9646033872719504, + "learning_rate": 1.7507047820612708e-05, + "loss": 0.5753, + "step": 14632 + }, + { + "epoch": 0.2528511197124689, + "grad_norm": 1.1640391856091423, + "learning_rate": 1.750667808320871e-05, + "loss": 0.4419, + "step": 14633 + }, + { + "epoch": 0.2528683992258778, + "grad_norm": 0.38091307886588577, + "learning_rate": 1.7506308322293225e-05, + "loss": 0.5418, + "step": 14634 + }, + { + "epoch": 0.2528856787392867, + "grad_norm": 0.6746780540988329, + "learning_rate": 1.7505938537867414e-05, + "loss": 0.7008, + "step": 14635 + }, + { + "epoch": 0.2529029582526956, + "grad_norm": 0.6145452663668622, + "learning_rate": 1.7505568729932428e-05, + "loss": 0.3264, + "step": 14636 + }, + { + "epoch": 0.2529202377661045, + "grad_norm": 1.0662826654468727, + "learning_rate": 1.7505198898489434e-05, + "loss": 0.5074, + "step": 14637 + }, + { + "epoch": 0.2529375172795134, + "grad_norm": 1.188367165507153, + "learning_rate": 1.7504829043539586e-05, + "loss": 0.5071, + "step": 14638 + }, + { + "epoch": 0.2529547967929223, + "grad_norm": 1.1970329016183088, + "learning_rate": 1.750445916508404e-05, + "loss": 0.7181, + "step": 14639 + }, + { + "epoch": 0.25297207630633123, + "grad_norm": 0.6430355952334369, + "learning_rate": 1.7504089263123965e-05, + "loss": 0.4505, + "step": 14640 + }, + { + "epoch": 0.25298935581974014, + "grad_norm": 0.8128191425158471, + "learning_rate": 1.7503719337660508e-05, + "loss": 0.4629, + "step": 14641 + }, + { + "epoch": 0.25300663533314904, + "grad_norm": 0.6032817989266392, + "learning_rate": 1.750334938869483e-05, + "loss": 0.8343, + "step": 14642 + }, + { + "epoch": 0.2530239148465579, + "grad_norm": 0.6912361214938018, + "learning_rate": 1.750297941622809e-05, + "loss": 0.4498, + "step": 14643 + }, + { + "epoch": 0.2530411943599668, + "grad_norm": 1.5520302494352578, + "learning_rate": 1.7502609420261453e-05, + "loss": 0.5213, + "step": 14644 + }, + { + "epoch": 0.2530584738733757, + "grad_norm": 0.41850346605988564, + "learning_rate": 1.7502239400796072e-05, + "loss": 0.5087, + "step": 14645 + }, + { + "epoch": 0.2530757533867846, + "grad_norm": 1.811671865169588, + "learning_rate": 1.750186935783311e-05, + "loss": 0.6828, + "step": 14646 + }, + { + "epoch": 0.2530930329001935, + "grad_norm": 0.8151768908292925, + "learning_rate": 1.7501499291373715e-05, + "loss": 0.3627, + "step": 14647 + }, + { + "epoch": 0.25311031241360243, + "grad_norm": 1.4141472405889195, + "learning_rate": 1.750112920141906e-05, + "loss": 0.3574, + "step": 14648 + }, + { + "epoch": 0.25312759192701134, + "grad_norm": 1.375547991044132, + "learning_rate": 1.7500759087970295e-05, + "loss": 0.5291, + "step": 14649 + }, + { + "epoch": 0.25314487144042025, + "grad_norm": 0.9358739001568329, + "learning_rate": 1.7500388951028586e-05, + "loss": 0.49, + "step": 14650 + }, + { + "epoch": 0.25316215095382916, + "grad_norm": 0.973150306479791, + "learning_rate": 1.7500018790595085e-05, + "loss": 0.5426, + "step": 14651 + }, + { + "epoch": 0.25317943046723806, + "grad_norm": 1.1288408605567153, + "learning_rate": 1.749964860667096e-05, + "loss": 0.5903, + "step": 14652 + }, + { + "epoch": 0.253196709980647, + "grad_norm": 1.0719942551025574, + "learning_rate": 1.7499278399257362e-05, + "loss": 0.572, + "step": 14653 + }, + { + "epoch": 0.2532139894940558, + "grad_norm": 0.9202364509420011, + "learning_rate": 1.7498908168355457e-05, + "loss": 0.5963, + "step": 14654 + }, + { + "epoch": 0.25323126900746473, + "grad_norm": 1.056313105819651, + "learning_rate": 1.74985379139664e-05, + "loss": 0.4951, + "step": 14655 + }, + { + "epoch": 0.25324854852087364, + "grad_norm": 0.5315001309105559, + "learning_rate": 1.7498167636091353e-05, + "loss": 0.4066, + "step": 14656 + }, + { + "epoch": 0.25326582803428255, + "grad_norm": 0.9110784125456662, + "learning_rate": 1.749779733473147e-05, + "loss": 0.3675, + "step": 14657 + }, + { + "epoch": 0.25328310754769146, + "grad_norm": 0.5299004555159109, + "learning_rate": 1.749742700988792e-05, + "loss": 0.2811, + "step": 14658 + }, + { + "epoch": 0.25330038706110036, + "grad_norm": 0.8273708650889406, + "learning_rate": 1.749705666156186e-05, + "loss": 0.3783, + "step": 14659 + }, + { + "epoch": 0.25331766657450927, + "grad_norm": 0.528226658474169, + "learning_rate": 1.749668628975445e-05, + "loss": 0.7173, + "step": 14660 + }, + { + "epoch": 0.2533349460879182, + "grad_norm": 0.7151887355364326, + "learning_rate": 1.7496315894466845e-05, + "loss": 0.5907, + "step": 14661 + }, + { + "epoch": 0.2533522256013271, + "grad_norm": 0.9740512894640673, + "learning_rate": 1.749594547570021e-05, + "loss": 0.3536, + "step": 14662 + }, + { + "epoch": 0.253369505114736, + "grad_norm": 1.5124025417062361, + "learning_rate": 1.7495575033455705e-05, + "loss": 0.5662, + "step": 14663 + }, + { + "epoch": 0.25338678462814485, + "grad_norm": 0.7833311304292729, + "learning_rate": 1.749520456773449e-05, + "loss": 0.7119, + "step": 14664 + }, + { + "epoch": 0.25340406414155375, + "grad_norm": 1.8442806851492746, + "learning_rate": 1.7494834078537722e-05, + "loss": 0.8118, + "step": 14665 + }, + { + "epoch": 0.25342134365496266, + "grad_norm": 1.1434161319523752, + "learning_rate": 1.7494463565866566e-05, + "loss": 0.4222, + "step": 14666 + }, + { + "epoch": 0.25343862316837157, + "grad_norm": 1.8805670444012947, + "learning_rate": 1.7494093029722183e-05, + "loss": 0.6356, + "step": 14667 + }, + { + "epoch": 0.2534559026817805, + "grad_norm": 0.906186056038859, + "learning_rate": 1.7493722470105727e-05, + "loss": 0.5576, + "step": 14668 + }, + { + "epoch": 0.2534731821951894, + "grad_norm": 1.1846754092528486, + "learning_rate": 1.7493351887018364e-05, + "loss": 0.5319, + "step": 14669 + }, + { + "epoch": 0.2534904617085983, + "grad_norm": 0.7903510404453191, + "learning_rate": 1.7492981280461253e-05, + "loss": 0.6065, + "step": 14670 + }, + { + "epoch": 0.2535077412220072, + "grad_norm": 1.9723348430506684, + "learning_rate": 1.7492610650435557e-05, + "loss": 0.7261, + "step": 14671 + }, + { + "epoch": 0.2535250207354161, + "grad_norm": 0.7915519249859215, + "learning_rate": 1.7492239996942432e-05, + "loss": 0.6447, + "step": 14672 + }, + { + "epoch": 0.253542300248825, + "grad_norm": 1.4207736643474205, + "learning_rate": 1.7491869319983043e-05, + "loss": 0.5692, + "step": 14673 + }, + { + "epoch": 0.2535595797622339, + "grad_norm": 1.4312043005290755, + "learning_rate": 1.7491498619558554e-05, + "loss": 0.3863, + "step": 14674 + }, + { + "epoch": 0.2535768592756428, + "grad_norm": 1.0103190993355142, + "learning_rate": 1.7491127895670118e-05, + "loss": 0.5538, + "step": 14675 + }, + { + "epoch": 0.2535941387890517, + "grad_norm": 0.6904012926943409, + "learning_rate": 1.7490757148318902e-05, + "loss": 0.5657, + "step": 14676 + }, + { + "epoch": 0.2536114183024606, + "grad_norm": 5.0924284081423155, + "learning_rate": 1.7490386377506064e-05, + "loss": 0.5679, + "step": 14677 + }, + { + "epoch": 0.2536286978158695, + "grad_norm": 4.774997802153511, + "learning_rate": 1.7490015583232764e-05, + "loss": 0.5889, + "step": 14678 + }, + { + "epoch": 0.2536459773292784, + "grad_norm": 1.2533395610061984, + "learning_rate": 1.7489644765500168e-05, + "loss": 0.5181, + "step": 14679 + }, + { + "epoch": 0.2536632568426873, + "grad_norm": 1.3552324018083954, + "learning_rate": 1.748927392430944e-05, + "loss": 0.8291, + "step": 14680 + }, + { + "epoch": 0.2536805363560962, + "grad_norm": 1.0420524690359303, + "learning_rate": 1.748890305966173e-05, + "loss": 0.4929, + "step": 14681 + }, + { + "epoch": 0.2536978158695051, + "grad_norm": 0.9165106673454186, + "learning_rate": 1.748853217155821e-05, + "loss": 0.5555, + "step": 14682 + }, + { + "epoch": 0.25371509538291404, + "grad_norm": 0.9786830908568811, + "learning_rate": 1.748816126000004e-05, + "loss": 0.5508, + "step": 14683 + }, + { + "epoch": 0.25373237489632294, + "grad_norm": 0.6483536123979599, + "learning_rate": 1.7487790324988373e-05, + "loss": 0.7468, + "step": 14684 + }, + { + "epoch": 0.2537496544097318, + "grad_norm": 0.8964832833572239, + "learning_rate": 1.7487419366524383e-05, + "loss": 0.5191, + "step": 14685 + }, + { + "epoch": 0.2537669339231407, + "grad_norm": 1.2043079004544066, + "learning_rate": 1.7487048384609227e-05, + "loss": 0.7338, + "step": 14686 + }, + { + "epoch": 0.2537842134365496, + "grad_norm": 0.9641139610041217, + "learning_rate": 1.7486677379244067e-05, + "loss": 0.476, + "step": 14687 + }, + { + "epoch": 0.2538014929499585, + "grad_norm": 1.1737033482497787, + "learning_rate": 1.748630635043006e-05, + "loss": 0.603, + "step": 14688 + }, + { + "epoch": 0.2538187724633674, + "grad_norm": 1.0503053944105372, + "learning_rate": 1.7485935298168378e-05, + "loss": 0.5978, + "step": 14689 + }, + { + "epoch": 0.25383605197677633, + "grad_norm": 0.36444503012578294, + "learning_rate": 1.7485564222460172e-05, + "loss": 0.6097, + "step": 14690 + }, + { + "epoch": 0.25385333149018524, + "grad_norm": 1.239712864140911, + "learning_rate": 1.7485193123306615e-05, + "loss": 0.5418, + "step": 14691 + }, + { + "epoch": 0.25387061100359415, + "grad_norm": 1.2654413537784603, + "learning_rate": 1.748482200070886e-05, + "loss": 0.4516, + "step": 14692 + }, + { + "epoch": 0.25388789051700306, + "grad_norm": 1.152858047154314, + "learning_rate": 1.7484450854668077e-05, + "loss": 0.5704, + "step": 14693 + }, + { + "epoch": 0.25390517003041196, + "grad_norm": 1.344353406765502, + "learning_rate": 1.7484079685185422e-05, + "loss": 0.5421, + "step": 14694 + }, + { + "epoch": 0.25392244954382087, + "grad_norm": 0.7943875594790196, + "learning_rate": 1.7483708492262063e-05, + "loss": 0.4429, + "step": 14695 + }, + { + "epoch": 0.2539397290572297, + "grad_norm": 1.108890237529332, + "learning_rate": 1.748333727589916e-05, + "loss": 0.7467, + "step": 14696 + }, + { + "epoch": 0.25395700857063863, + "grad_norm": 0.6594055712499154, + "learning_rate": 1.7482966036097877e-05, + "loss": 0.5417, + "step": 14697 + }, + { + "epoch": 0.25397428808404754, + "grad_norm": 0.916258360331099, + "learning_rate": 1.7482594772859374e-05, + "loss": 0.5751, + "step": 14698 + }, + { + "epoch": 0.25399156759745645, + "grad_norm": 0.7200782613021403, + "learning_rate": 1.7482223486184816e-05, + "loss": 0.4297, + "step": 14699 + }, + { + "epoch": 0.25400884711086535, + "grad_norm": 0.8812004037011888, + "learning_rate": 1.7481852176075366e-05, + "loss": 0.442, + "step": 14700 + }, + { + "epoch": 0.25402612662427426, + "grad_norm": 1.3668771178004235, + "learning_rate": 1.7481480842532187e-05, + "loss": 0.5533, + "step": 14701 + }, + { + "epoch": 0.25404340613768317, + "grad_norm": 0.7973638573210188, + "learning_rate": 1.748110948555644e-05, + "loss": 0.5425, + "step": 14702 + }, + { + "epoch": 0.2540606856510921, + "grad_norm": 1.9202019956905463, + "learning_rate": 1.748073810514929e-05, + "loss": 0.6684, + "step": 14703 + }, + { + "epoch": 0.254077965164501, + "grad_norm": 1.232173494725194, + "learning_rate": 1.7480366701311896e-05, + "loss": 0.6822, + "step": 14704 + }, + { + "epoch": 0.2540952446779099, + "grad_norm": 1.2786358496967298, + "learning_rate": 1.747999527404543e-05, + "loss": 0.5391, + "step": 14705 + }, + { + "epoch": 0.25411252419131874, + "grad_norm": 1.7011291500085315, + "learning_rate": 1.747962382335105e-05, + "loss": 0.6879, + "step": 14706 + }, + { + "epoch": 0.25412980370472765, + "grad_norm": 0.8839055557081471, + "learning_rate": 1.7479252349229918e-05, + "loss": 0.4134, + "step": 14707 + }, + { + "epoch": 0.25414708321813656, + "grad_norm": 0.9686119103761004, + "learning_rate": 1.74788808516832e-05, + "loss": 0.4828, + "step": 14708 + }, + { + "epoch": 0.25416436273154547, + "grad_norm": 0.49713206958384054, + "learning_rate": 1.7478509330712058e-05, + "loss": 0.7559, + "step": 14709 + }, + { + "epoch": 0.2541816422449544, + "grad_norm": 1.0100289216152083, + "learning_rate": 1.747813778631766e-05, + "loss": 0.5272, + "step": 14710 + }, + { + "epoch": 0.2541989217583633, + "grad_norm": 3.1605565606072785, + "learning_rate": 1.747776621850116e-05, + "loss": 0.5976, + "step": 14711 + }, + { + "epoch": 0.2542162012717722, + "grad_norm": 1.3267006063086577, + "learning_rate": 1.7477394627263734e-05, + "loss": 0.5538, + "step": 14712 + }, + { + "epoch": 0.2542334807851811, + "grad_norm": 0.6061374513387652, + "learning_rate": 1.7477023012606535e-05, + "loss": 0.3408, + "step": 14713 + }, + { + "epoch": 0.25425076029859, + "grad_norm": 1.1416670811362961, + "learning_rate": 1.7476651374530733e-05, + "loss": 0.524, + "step": 14714 + }, + { + "epoch": 0.2542680398119989, + "grad_norm": 1.0542366549899793, + "learning_rate": 1.747627971303749e-05, + "loss": 0.6158, + "step": 14715 + }, + { + "epoch": 0.2542853193254078, + "grad_norm": 0.9276957547698454, + "learning_rate": 1.7475908028127974e-05, + "loss": 0.3382, + "step": 14716 + }, + { + "epoch": 0.2543025988388167, + "grad_norm": 1.5595335830003987, + "learning_rate": 1.7475536319803345e-05, + "loss": 0.556, + "step": 14717 + }, + { + "epoch": 0.2543198783522256, + "grad_norm": 0.8400961415453332, + "learning_rate": 1.7475164588064767e-05, + "loss": 0.7152, + "step": 14718 + }, + { + "epoch": 0.2543371578656345, + "grad_norm": 0.8426034174445148, + "learning_rate": 1.7474792832913405e-05, + "loss": 0.5666, + "step": 14719 + }, + { + "epoch": 0.2543544373790434, + "grad_norm": 0.8413590438499837, + "learning_rate": 1.7474421054350422e-05, + "loss": 0.4796, + "step": 14720 + }, + { + "epoch": 0.2543717168924523, + "grad_norm": 1.1363353440373316, + "learning_rate": 1.747404925237699e-05, + "loss": 0.5219, + "step": 14721 + }, + { + "epoch": 0.2543889964058612, + "grad_norm": 0.8461790017523766, + "learning_rate": 1.747367742699426e-05, + "loss": 0.4861, + "step": 14722 + }, + { + "epoch": 0.2544062759192701, + "grad_norm": 0.9074080450524323, + "learning_rate": 1.747330557820341e-05, + "loss": 0.5798, + "step": 14723 + }, + { + "epoch": 0.254423555432679, + "grad_norm": 0.9986686885493649, + "learning_rate": 1.7472933706005597e-05, + "loss": 0.5073, + "step": 14724 + }, + { + "epoch": 0.25444083494608793, + "grad_norm": 0.8565734790035474, + "learning_rate": 1.747256181040199e-05, + "loss": 0.5402, + "step": 14725 + }, + { + "epoch": 0.25445811445949684, + "grad_norm": 0.9396830666648678, + "learning_rate": 1.7472189891393752e-05, + "loss": 0.4885, + "step": 14726 + }, + { + "epoch": 0.25447539397290575, + "grad_norm": 0.9710734756585492, + "learning_rate": 1.747181794898204e-05, + "loss": 0.4663, + "step": 14727 + }, + { + "epoch": 0.2544926734863146, + "grad_norm": 1.4885364546753588, + "learning_rate": 1.7471445983168035e-05, + "loss": 0.5897, + "step": 14728 + }, + { + "epoch": 0.2545099529997235, + "grad_norm": 1.170004543388864, + "learning_rate": 1.7471073993952888e-05, + "loss": 0.7268, + "step": 14729 + }, + { + "epoch": 0.2545272325131324, + "grad_norm": 1.054361903055343, + "learning_rate": 1.747070198133777e-05, + "loss": 0.7119, + "step": 14730 + }, + { + "epoch": 0.2545445120265413, + "grad_norm": 1.053421491735641, + "learning_rate": 1.747032994532385e-05, + "loss": 0.4308, + "step": 14731 + }, + { + "epoch": 0.25456179153995023, + "grad_norm": 0.758727334581687, + "learning_rate": 1.7469957885912284e-05, + "loss": 0.2856, + "step": 14732 + }, + { + "epoch": 0.25457907105335914, + "grad_norm": 0.6212489609497936, + "learning_rate": 1.7469585803104245e-05, + "loss": 0.3896, + "step": 14733 + }, + { + "epoch": 0.25459635056676805, + "grad_norm": 0.9355552857558286, + "learning_rate": 1.7469213696900895e-05, + "loss": 0.4702, + "step": 14734 + }, + { + "epoch": 0.25461363008017696, + "grad_norm": 0.89028376096738, + "learning_rate": 1.74688415673034e-05, + "loss": 0.5201, + "step": 14735 + }, + { + "epoch": 0.25463090959358586, + "grad_norm": 0.8465432223317333, + "learning_rate": 1.7468469414312924e-05, + "loss": 0.3775, + "step": 14736 + }, + { + "epoch": 0.25464818910699477, + "grad_norm": 0.9296678659566386, + "learning_rate": 1.7468097237930638e-05, + "loss": 0.5783, + "step": 14737 + }, + { + "epoch": 0.2546654686204036, + "grad_norm": 1.3411760319755328, + "learning_rate": 1.74677250381577e-05, + "loss": 0.4279, + "step": 14738 + }, + { + "epoch": 0.25468274813381253, + "grad_norm": 1.0946355756952604, + "learning_rate": 1.746735281499528e-05, + "loss": 0.5814, + "step": 14739 + }, + { + "epoch": 0.25470002764722144, + "grad_norm": 1.7847257829247387, + "learning_rate": 1.7466980568444545e-05, + "loss": 1.0336, + "step": 14740 + }, + { + "epoch": 0.25471730716063035, + "grad_norm": 1.0097012046273248, + "learning_rate": 1.746660829850666e-05, + "loss": 0.4103, + "step": 14741 + }, + { + "epoch": 0.25473458667403925, + "grad_norm": 0.9303895117751089, + "learning_rate": 1.746623600518279e-05, + "loss": 0.63, + "step": 14742 + }, + { + "epoch": 0.25475186618744816, + "grad_norm": 1.373967915718538, + "learning_rate": 1.74658636884741e-05, + "loss": 0.5501, + "step": 14743 + }, + { + "epoch": 0.25476914570085707, + "grad_norm": 0.5901239531084731, + "learning_rate": 1.7465491348381757e-05, + "loss": 0.6115, + "step": 14744 + }, + { + "epoch": 0.254786425214266, + "grad_norm": 0.6298568748970008, + "learning_rate": 1.7465118984906932e-05, + "loss": 0.4462, + "step": 14745 + }, + { + "epoch": 0.2548037047276749, + "grad_norm": 1.7350045530425315, + "learning_rate": 1.7464746598050785e-05, + "loss": 0.4734, + "step": 14746 + }, + { + "epoch": 0.2548209842410838, + "grad_norm": 1.18748331238059, + "learning_rate": 1.7464374187814483e-05, + "loss": 0.3617, + "step": 14747 + }, + { + "epoch": 0.2548382637544927, + "grad_norm": 0.3752636022945686, + "learning_rate": 1.746400175419919e-05, + "loss": 0.6959, + "step": 14748 + }, + { + "epoch": 0.25485554326790155, + "grad_norm": 1.2915400293834383, + "learning_rate": 1.7463629297206083e-05, + "loss": 0.6926, + "step": 14749 + }, + { + "epoch": 0.25487282278131046, + "grad_norm": 1.3856085573186445, + "learning_rate": 1.7463256816836316e-05, + "loss": 0.7266, + "step": 14750 + }, + { + "epoch": 0.25489010229471937, + "grad_norm": 0.9085059143515987, + "learning_rate": 1.7462884313091066e-05, + "loss": 0.3458, + "step": 14751 + }, + { + "epoch": 0.2549073818081283, + "grad_norm": 1.0830750473887911, + "learning_rate": 1.7462511785971495e-05, + "loss": 0.6157, + "step": 14752 + }, + { + "epoch": 0.2549246613215372, + "grad_norm": 1.2104830218350955, + "learning_rate": 1.746213923547877e-05, + "loss": 0.5334, + "step": 14753 + }, + { + "epoch": 0.2549419408349461, + "grad_norm": 0.9618161015380134, + "learning_rate": 1.7461766661614053e-05, + "loss": 0.3049, + "step": 14754 + }, + { + "epoch": 0.254959220348355, + "grad_norm": 0.9008235748641211, + "learning_rate": 1.7461394064378524e-05, + "loss": 0.5395, + "step": 14755 + }, + { + "epoch": 0.2549764998617639, + "grad_norm": 1.3499278970605737, + "learning_rate": 1.7461021443773333e-05, + "loss": 0.4762, + "step": 14756 + }, + { + "epoch": 0.2549937793751728, + "grad_norm": 0.9637805810929473, + "learning_rate": 1.746064879979966e-05, + "loss": 0.5485, + "step": 14757 + }, + { + "epoch": 0.2550110588885817, + "grad_norm": 0.4277019134296577, + "learning_rate": 1.7460276132458667e-05, + "loss": 0.6794, + "step": 14758 + }, + { + "epoch": 0.2550283384019906, + "grad_norm": 0.7673702322785174, + "learning_rate": 1.7459903441751528e-05, + "loss": 0.4365, + "step": 14759 + }, + { + "epoch": 0.2550456179153995, + "grad_norm": 0.9038527839603611, + "learning_rate": 1.7459530727679398e-05, + "loss": 0.5677, + "step": 14760 + }, + { + "epoch": 0.2550628974288084, + "grad_norm": 0.8188284103287307, + "learning_rate": 1.7459157990243453e-05, + "loss": 0.3562, + "step": 14761 + }, + { + "epoch": 0.2550801769422173, + "grad_norm": 1.0713041935165117, + "learning_rate": 1.745878522944486e-05, + "loss": 0.6543, + "step": 14762 + }, + { + "epoch": 0.2550974564556262, + "grad_norm": 1.3957951291940065, + "learning_rate": 1.745841244528478e-05, + "loss": 0.5762, + "step": 14763 + }, + { + "epoch": 0.2551147359690351, + "grad_norm": 1.4460925204446151, + "learning_rate": 1.745803963776439e-05, + "loss": 0.6803, + "step": 14764 + }, + { + "epoch": 0.255132015482444, + "grad_norm": 1.0309539927503286, + "learning_rate": 1.745766680688485e-05, + "loss": 0.5538, + "step": 14765 + }, + { + "epoch": 0.2551492949958529, + "grad_norm": 1.3942014223269148, + "learning_rate": 1.745729395264733e-05, + "loss": 0.5361, + "step": 14766 + }, + { + "epoch": 0.25516657450926183, + "grad_norm": 0.4687706147464086, + "learning_rate": 1.7456921075053e-05, + "loss": 0.6406, + "step": 14767 + }, + { + "epoch": 0.25518385402267074, + "grad_norm": 1.134463659526909, + "learning_rate": 1.7456548174103027e-05, + "loss": 0.549, + "step": 14768 + }, + { + "epoch": 0.25520113353607965, + "grad_norm": 1.279347426869645, + "learning_rate": 1.745617524979858e-05, + "loss": 0.7106, + "step": 14769 + }, + { + "epoch": 0.2552184130494885, + "grad_norm": 1.0284913293394833, + "learning_rate": 1.7455802302140825e-05, + "loss": 0.5491, + "step": 14770 + }, + { + "epoch": 0.2552356925628974, + "grad_norm": 0.718158571029076, + "learning_rate": 1.7455429331130927e-05, + "loss": 0.4094, + "step": 14771 + }, + { + "epoch": 0.2552529720763063, + "grad_norm": 0.7431803330021061, + "learning_rate": 1.7455056336770062e-05, + "loss": 0.5824, + "step": 14772 + }, + { + "epoch": 0.2552702515897152, + "grad_norm": 1.128692104857476, + "learning_rate": 1.7454683319059392e-05, + "loss": 0.4249, + "step": 14773 + }, + { + "epoch": 0.25528753110312413, + "grad_norm": 1.3827905274021053, + "learning_rate": 1.7454310278000087e-05, + "loss": 0.5874, + "step": 14774 + }, + { + "epoch": 0.25530481061653304, + "grad_norm": 1.048553465902051, + "learning_rate": 1.7453937213593314e-05, + "loss": 0.5294, + "step": 14775 + }, + { + "epoch": 0.25532209012994195, + "grad_norm": 1.3040876413217743, + "learning_rate": 1.745356412584025e-05, + "loss": 0.6372, + "step": 14776 + }, + { + "epoch": 0.25533936964335086, + "grad_norm": 2.455800537613332, + "learning_rate": 1.745319101474205e-05, + "loss": 0.6017, + "step": 14777 + }, + { + "epoch": 0.25535664915675976, + "grad_norm": 0.8231443594699366, + "learning_rate": 1.745281788029989e-05, + "loss": 0.6644, + "step": 14778 + }, + { + "epoch": 0.25537392867016867, + "grad_norm": 1.0200282590110354, + "learning_rate": 1.745244472251494e-05, + "loss": 0.5032, + "step": 14779 + }, + { + "epoch": 0.2553912081835775, + "grad_norm": 1.1380092299573121, + "learning_rate": 1.7452071541388368e-05, + "loss": 0.5843, + "step": 14780 + }, + { + "epoch": 0.25540848769698643, + "grad_norm": 1.1353063414424847, + "learning_rate": 1.745169833692134e-05, + "loss": 0.7153, + "step": 14781 + }, + { + "epoch": 0.25542576721039534, + "grad_norm": 1.0614462399755649, + "learning_rate": 1.7451325109115023e-05, + "loss": 0.657, + "step": 14782 + }, + { + "epoch": 0.25544304672380425, + "grad_norm": 1.1495488548580655, + "learning_rate": 1.7450951857970594e-05, + "loss": 0.6633, + "step": 14783 + }, + { + "epoch": 0.25546032623721315, + "grad_norm": 1.1439622374832312, + "learning_rate": 1.7450578583489215e-05, + "loss": 0.5831, + "step": 14784 + }, + { + "epoch": 0.25547760575062206, + "grad_norm": 0.9753076009623838, + "learning_rate": 1.7450205285672058e-05, + "loss": 0.5009, + "step": 14785 + }, + { + "epoch": 0.25549488526403097, + "grad_norm": 0.9985206717495195, + "learning_rate": 1.7449831964520295e-05, + "loss": 0.6972, + "step": 14786 + }, + { + "epoch": 0.2555121647774399, + "grad_norm": 1.0054819233501613, + "learning_rate": 1.7449458620035088e-05, + "loss": 0.5105, + "step": 14787 + }, + { + "epoch": 0.2555294442908488, + "grad_norm": 0.36958134829496575, + "learning_rate": 1.7449085252217614e-05, + "loss": 0.6665, + "step": 14788 + }, + { + "epoch": 0.2555467238042577, + "grad_norm": 0.6110109800919358, + "learning_rate": 1.7448711861069037e-05, + "loss": 0.3386, + "step": 14789 + }, + { + "epoch": 0.2555640033176666, + "grad_norm": 1.0072939284501865, + "learning_rate": 1.744833844659053e-05, + "loss": 0.721, + "step": 14790 + }, + { + "epoch": 0.25558128283107545, + "grad_norm": 1.3180288747806266, + "learning_rate": 1.7447965008783258e-05, + "loss": 0.7866, + "step": 14791 + }, + { + "epoch": 0.25559856234448436, + "grad_norm": 1.1718347459835012, + "learning_rate": 1.7447591547648394e-05, + "loss": 0.4634, + "step": 14792 + }, + { + "epoch": 0.25561584185789327, + "grad_norm": 1.2816807507019519, + "learning_rate": 1.744721806318711e-05, + "loss": 0.631, + "step": 14793 + }, + { + "epoch": 0.2556331213713022, + "grad_norm": 0.458905538378025, + "learning_rate": 1.744684455540057e-05, + "loss": 0.7586, + "step": 14794 + }, + { + "epoch": 0.2556504008847111, + "grad_norm": 1.4011035281714441, + "learning_rate": 1.744647102428995e-05, + "loss": 0.792, + "step": 14795 + }, + { + "epoch": 0.25566768039812, + "grad_norm": 1.3910132273598304, + "learning_rate": 1.7446097469856417e-05, + "loss": 0.5192, + "step": 14796 + }, + { + "epoch": 0.2556849599115289, + "grad_norm": 1.511773501035948, + "learning_rate": 1.7445723892101135e-05, + "loss": 0.4463, + "step": 14797 + }, + { + "epoch": 0.2557022394249378, + "grad_norm": 1.2830740855489646, + "learning_rate": 1.7445350291025285e-05, + "loss": 0.4933, + "step": 14798 + }, + { + "epoch": 0.2557195189383467, + "grad_norm": 0.742918540591407, + "learning_rate": 1.744497666663003e-05, + "loss": 0.434, + "step": 14799 + }, + { + "epoch": 0.2557367984517556, + "grad_norm": 1.275749005238826, + "learning_rate": 1.7444603018916547e-05, + "loss": 0.5281, + "step": 14800 + }, + { + "epoch": 0.25575407796516453, + "grad_norm": 1.1600119448123771, + "learning_rate": 1.7444229347885997e-05, + "loss": 0.5165, + "step": 14801 + }, + { + "epoch": 0.2557713574785734, + "grad_norm": 0.376885755129281, + "learning_rate": 1.7443855653539557e-05, + "loss": 0.5539, + "step": 14802 + }, + { + "epoch": 0.2557886369919823, + "grad_norm": 0.7023438952144124, + "learning_rate": 1.7443481935878394e-05, + "loss": 0.423, + "step": 14803 + }, + { + "epoch": 0.2558059165053912, + "grad_norm": 1.0733542675364425, + "learning_rate": 1.7443108194903678e-05, + "loss": 0.4663, + "step": 14804 + }, + { + "epoch": 0.2558231960188001, + "grad_norm": 0.9471676646314643, + "learning_rate": 1.7442734430616583e-05, + "loss": 0.5477, + "step": 14805 + }, + { + "epoch": 0.255840475532209, + "grad_norm": 0.9104514294103857, + "learning_rate": 1.744236064301828e-05, + "loss": 0.3637, + "step": 14806 + }, + { + "epoch": 0.2558577550456179, + "grad_norm": 1.075457381795993, + "learning_rate": 1.7441986832109936e-05, + "loss": 0.4063, + "step": 14807 + }, + { + "epoch": 0.2558750345590268, + "grad_norm": 1.1997483548002443, + "learning_rate": 1.7441612997892722e-05, + "loss": 0.4849, + "step": 14808 + }, + { + "epoch": 0.25589231407243573, + "grad_norm": 1.0886509647090012, + "learning_rate": 1.7441239140367815e-05, + "loss": 0.722, + "step": 14809 + }, + { + "epoch": 0.25590959358584464, + "grad_norm": 0.983372770356866, + "learning_rate": 1.744086525953638e-05, + "loss": 0.4443, + "step": 14810 + }, + { + "epoch": 0.25592687309925355, + "grad_norm": 0.6968402767183596, + "learning_rate": 1.744049135539959e-05, + "loss": 0.4734, + "step": 14811 + }, + { + "epoch": 0.2559441526126624, + "grad_norm": 0.35676145910291884, + "learning_rate": 1.7440117427958615e-05, + "loss": 0.5662, + "step": 14812 + }, + { + "epoch": 0.2559614321260713, + "grad_norm": 1.0761959597853696, + "learning_rate": 1.7439743477214625e-05, + "loss": 0.5332, + "step": 14813 + }, + { + "epoch": 0.2559787116394802, + "grad_norm": 1.0328801423470606, + "learning_rate": 1.7439369503168792e-05, + "loss": 0.5333, + "step": 14814 + }, + { + "epoch": 0.2559959911528891, + "grad_norm": 1.2594311673756926, + "learning_rate": 1.7438995505822292e-05, + "loss": 0.4933, + "step": 14815 + }, + { + "epoch": 0.25601327066629803, + "grad_norm": 1.063302941001259, + "learning_rate": 1.743862148517629e-05, + "loss": 0.5339, + "step": 14816 + }, + { + "epoch": 0.25603055017970694, + "grad_norm": 0.8928847867079678, + "learning_rate": 1.743824744123196e-05, + "loss": 0.5388, + "step": 14817 + }, + { + "epoch": 0.25604782969311585, + "grad_norm": 0.7207822299416682, + "learning_rate": 1.7437873373990478e-05, + "loss": 0.5018, + "step": 14818 + }, + { + "epoch": 0.25606510920652475, + "grad_norm": 0.7049488186511149, + "learning_rate": 1.743749928345301e-05, + "loss": 0.4095, + "step": 14819 + }, + { + "epoch": 0.25608238871993366, + "grad_norm": 1.1667051652102178, + "learning_rate": 1.7437125169620724e-05, + "loss": 0.5023, + "step": 14820 + }, + { + "epoch": 0.25609966823334257, + "grad_norm": 1.1908813556028401, + "learning_rate": 1.7436751032494804e-05, + "loss": 0.5195, + "step": 14821 + }, + { + "epoch": 0.2561169477467515, + "grad_norm": 1.105624660959022, + "learning_rate": 1.7436376872076407e-05, + "loss": 0.503, + "step": 14822 + }, + { + "epoch": 0.25613422726016033, + "grad_norm": 1.3497553940907905, + "learning_rate": 1.7436002688366718e-05, + "loss": 0.3823, + "step": 14823 + }, + { + "epoch": 0.25615150677356924, + "grad_norm": 0.37559859897915776, + "learning_rate": 1.7435628481366902e-05, + "loss": 0.5947, + "step": 14824 + }, + { + "epoch": 0.25616878628697815, + "grad_norm": 1.2177959185034941, + "learning_rate": 1.7435254251078133e-05, + "loss": 0.4255, + "step": 14825 + }, + { + "epoch": 0.25618606580038705, + "grad_norm": 1.0903431790149454, + "learning_rate": 1.743487999750158e-05, + "loss": 0.6233, + "step": 14826 + }, + { + "epoch": 0.25620334531379596, + "grad_norm": 1.5152638193007273, + "learning_rate": 1.743450572063842e-05, + "loss": 0.5126, + "step": 14827 + }, + { + "epoch": 0.25622062482720487, + "grad_norm": 1.0328992589783632, + "learning_rate": 1.7434131420489823e-05, + "loss": 0.5202, + "step": 14828 + }, + { + "epoch": 0.2562379043406138, + "grad_norm": 1.301417247184678, + "learning_rate": 1.7433757097056963e-05, + "loss": 0.3562, + "step": 14829 + }, + { + "epoch": 0.2562551838540227, + "grad_norm": 0.4713131003790436, + "learning_rate": 1.743338275034101e-05, + "loss": 0.5935, + "step": 14830 + }, + { + "epoch": 0.2562724633674316, + "grad_norm": 0.44974897177971357, + "learning_rate": 1.7433008380343136e-05, + "loss": 0.7122, + "step": 14831 + }, + { + "epoch": 0.2562897428808405, + "grad_norm": 1.0290099129135235, + "learning_rate": 1.7432633987064512e-05, + "loss": 0.5069, + "step": 14832 + }, + { + "epoch": 0.25630702239424935, + "grad_norm": 0.7681743408112642, + "learning_rate": 1.743225957050632e-05, + "loss": 0.456, + "step": 14833 + }, + { + "epoch": 0.25632430190765826, + "grad_norm": 0.8560859559937514, + "learning_rate": 1.743188513066972e-05, + "loss": 0.545, + "step": 14834 + }, + { + "epoch": 0.25634158142106717, + "grad_norm": 1.376424427641178, + "learning_rate": 1.7431510667555896e-05, + "loss": 0.6079, + "step": 14835 + }, + { + "epoch": 0.2563588609344761, + "grad_norm": 0.7063895756894019, + "learning_rate": 1.7431136181166014e-05, + "loss": 0.5484, + "step": 14836 + }, + { + "epoch": 0.256376140447885, + "grad_norm": 0.8187390321991832, + "learning_rate": 1.7430761671501248e-05, + "loss": 0.4048, + "step": 14837 + }, + { + "epoch": 0.2563934199612939, + "grad_norm": 0.890496739687621, + "learning_rate": 1.7430387138562772e-05, + "loss": 0.4323, + "step": 14838 + }, + { + "epoch": 0.2564106994747028, + "grad_norm": 0.425407040000163, + "learning_rate": 1.743001258235176e-05, + "loss": 0.6423, + "step": 14839 + }, + { + "epoch": 0.2564279789881117, + "grad_norm": 0.7712891179898372, + "learning_rate": 1.7429638002869382e-05, + "loss": 0.4066, + "step": 14840 + }, + { + "epoch": 0.2564452585015206, + "grad_norm": 0.8454885513203758, + "learning_rate": 1.7429263400116814e-05, + "loss": 0.5064, + "step": 14841 + }, + { + "epoch": 0.2564625380149295, + "grad_norm": 1.1771886492045145, + "learning_rate": 1.742888877409523e-05, + "loss": 0.5204, + "step": 14842 + }, + { + "epoch": 0.2564798175283384, + "grad_norm": 0.8100786613492271, + "learning_rate": 1.74285141248058e-05, + "loss": 0.3077, + "step": 14843 + }, + { + "epoch": 0.2564970970417473, + "grad_norm": 1.0793732558765674, + "learning_rate": 1.7428139452249698e-05, + "loss": 0.4158, + "step": 14844 + }, + { + "epoch": 0.2565143765551562, + "grad_norm": 1.0265257642941195, + "learning_rate": 1.7427764756428104e-05, + "loss": 0.5238, + "step": 14845 + }, + { + "epoch": 0.2565316560685651, + "grad_norm": 0.9460771535395158, + "learning_rate": 1.7427390037342184e-05, + "loss": 0.5573, + "step": 14846 + }, + { + "epoch": 0.256548935581974, + "grad_norm": 0.9578640799189726, + "learning_rate": 1.742701529499311e-05, + "loss": 0.4825, + "step": 14847 + }, + { + "epoch": 0.2565662150953829, + "grad_norm": 0.8438755328034616, + "learning_rate": 1.7426640529382063e-05, + "loss": 0.4712, + "step": 14848 + }, + { + "epoch": 0.2565834946087918, + "grad_norm": 0.7339872981941021, + "learning_rate": 1.742626574051021e-05, + "loss": 0.3615, + "step": 14849 + }, + { + "epoch": 0.2566007741222007, + "grad_norm": 1.3575469147511194, + "learning_rate": 1.7425890928378733e-05, + "loss": 0.515, + "step": 14850 + }, + { + "epoch": 0.25661805363560963, + "grad_norm": 0.9957060801374765, + "learning_rate": 1.7425516092988796e-05, + "loss": 0.6674, + "step": 14851 + }, + { + "epoch": 0.25663533314901854, + "grad_norm": 0.7716598944730538, + "learning_rate": 1.7425141234341584e-05, + "loss": 0.5976, + "step": 14852 + }, + { + "epoch": 0.25665261266242745, + "grad_norm": 1.2837766593986517, + "learning_rate": 1.7424766352438263e-05, + "loss": 0.4042, + "step": 14853 + }, + { + "epoch": 0.25666989217583636, + "grad_norm": 1.2410648791220498, + "learning_rate": 1.7424391447280007e-05, + "loss": 0.6728, + "step": 14854 + }, + { + "epoch": 0.2566871716892452, + "grad_norm": 1.5827338647434974, + "learning_rate": 1.7424016518867997e-05, + "loss": 0.6326, + "step": 14855 + }, + { + "epoch": 0.2567044512026541, + "grad_norm": 1.0958815680165115, + "learning_rate": 1.74236415672034e-05, + "loss": 0.5566, + "step": 14856 + }, + { + "epoch": 0.256721730716063, + "grad_norm": 1.128913680385398, + "learning_rate": 1.7423266592287395e-05, + "loss": 0.5034, + "step": 14857 + }, + { + "epoch": 0.25673901022947193, + "grad_norm": 1.7417974929998645, + "learning_rate": 1.7422891594121155e-05, + "loss": 0.9151, + "step": 14858 + }, + { + "epoch": 0.25675628974288084, + "grad_norm": 0.945490758062159, + "learning_rate": 1.7422516572705855e-05, + "loss": 0.6682, + "step": 14859 + }, + { + "epoch": 0.25677356925628975, + "grad_norm": 0.9411969533763671, + "learning_rate": 1.742214152804267e-05, + "loss": 0.6148, + "step": 14860 + }, + { + "epoch": 0.25679084876969865, + "grad_norm": 1.3158134451607368, + "learning_rate": 1.742176646013277e-05, + "loss": 0.507, + "step": 14861 + }, + { + "epoch": 0.25680812828310756, + "grad_norm": 1.5272526119680554, + "learning_rate": 1.7421391368977334e-05, + "loss": 0.5312, + "step": 14862 + }, + { + "epoch": 0.25682540779651647, + "grad_norm": 0.8733928678903224, + "learning_rate": 1.7421016254577536e-05, + "loss": 0.6307, + "step": 14863 + }, + { + "epoch": 0.2568426873099254, + "grad_norm": 0.8952884377154803, + "learning_rate": 1.7420641116934555e-05, + "loss": 0.7238, + "step": 14864 + }, + { + "epoch": 0.25685996682333423, + "grad_norm": 0.7372102867198205, + "learning_rate": 1.7420265956049558e-05, + "loss": 0.4046, + "step": 14865 + }, + { + "epoch": 0.25687724633674314, + "grad_norm": 1.0962784820270544, + "learning_rate": 1.7419890771923725e-05, + "loss": 0.4749, + "step": 14866 + }, + { + "epoch": 0.25689452585015204, + "grad_norm": 0.9410057374536395, + "learning_rate": 1.7419515564558233e-05, + "loss": 0.566, + "step": 14867 + }, + { + "epoch": 0.25691180536356095, + "grad_norm": 0.8745057445332265, + "learning_rate": 1.7419140333954252e-05, + "loss": 0.5679, + "step": 14868 + }, + { + "epoch": 0.25692908487696986, + "grad_norm": 0.986058848695566, + "learning_rate": 1.741876508011296e-05, + "loss": 0.6054, + "step": 14869 + }, + { + "epoch": 0.25694636439037877, + "grad_norm": 1.313743830829201, + "learning_rate": 1.741838980303553e-05, + "loss": 0.4844, + "step": 14870 + }, + { + "epoch": 0.2569636439037877, + "grad_norm": 0.8834617440640125, + "learning_rate": 1.741801450272314e-05, + "loss": 0.5159, + "step": 14871 + }, + { + "epoch": 0.2569809234171966, + "grad_norm": 1.113395486979188, + "learning_rate": 1.741763917917697e-05, + "loss": 0.4401, + "step": 14872 + }, + { + "epoch": 0.2569982029306055, + "grad_norm": 1.1989058130120995, + "learning_rate": 1.7417263832398182e-05, + "loss": 0.4237, + "step": 14873 + }, + { + "epoch": 0.2570154824440144, + "grad_norm": 1.0359217204995104, + "learning_rate": 1.7416888462387964e-05, + "loss": 0.3479, + "step": 14874 + }, + { + "epoch": 0.2570327619574233, + "grad_norm": 0.9723916716623558, + "learning_rate": 1.7416513069147488e-05, + "loss": 0.4535, + "step": 14875 + }, + { + "epoch": 0.25705004147083216, + "grad_norm": 1.2787831109023495, + "learning_rate": 1.7416137652677933e-05, + "loss": 0.5854, + "step": 14876 + }, + { + "epoch": 0.25706732098424107, + "grad_norm": 1.1571342109546792, + "learning_rate": 1.7415762212980464e-05, + "loss": 0.5357, + "step": 14877 + }, + { + "epoch": 0.25708460049765, + "grad_norm": 0.8963045251805584, + "learning_rate": 1.7415386750056268e-05, + "loss": 0.4599, + "step": 14878 + }, + { + "epoch": 0.2571018800110589, + "grad_norm": 1.2589468728911164, + "learning_rate": 1.7415011263906517e-05, + "loss": 0.3835, + "step": 14879 + }, + { + "epoch": 0.2571191595244678, + "grad_norm": 1.175806427513574, + "learning_rate": 1.741463575453239e-05, + "loss": 0.5743, + "step": 14880 + }, + { + "epoch": 0.2571364390378767, + "grad_norm": 0.9480855492695176, + "learning_rate": 1.7414260221935054e-05, + "loss": 0.3916, + "step": 14881 + }, + { + "epoch": 0.2571537185512856, + "grad_norm": 1.2934552678291455, + "learning_rate": 1.7413884666115697e-05, + "loss": 0.4694, + "step": 14882 + }, + { + "epoch": 0.2571709980646945, + "grad_norm": 1.3496450380139624, + "learning_rate": 1.7413509087075488e-05, + "loss": 0.7312, + "step": 14883 + }, + { + "epoch": 0.2571882775781034, + "grad_norm": 1.5005132449392597, + "learning_rate": 1.7413133484815606e-05, + "loss": 0.4685, + "step": 14884 + }, + { + "epoch": 0.2572055570915123, + "grad_norm": 1.49486269971843, + "learning_rate": 1.7412757859337225e-05, + "loss": 0.621, + "step": 14885 + }, + { + "epoch": 0.2572228366049212, + "grad_norm": 0.7211169112221183, + "learning_rate": 1.7412382210641526e-05, + "loss": 0.4613, + "step": 14886 + }, + { + "epoch": 0.2572401161183301, + "grad_norm": 1.0608087051789399, + "learning_rate": 1.7412006538729677e-05, + "loss": 0.5048, + "step": 14887 + }, + { + "epoch": 0.257257395631739, + "grad_norm": 0.7746482320598094, + "learning_rate": 1.7411630843602863e-05, + "loss": 0.4148, + "step": 14888 + }, + { + "epoch": 0.2572746751451479, + "grad_norm": 1.2144472571859959, + "learning_rate": 1.741125512526226e-05, + "loss": 0.736, + "step": 14889 + }, + { + "epoch": 0.2572919546585568, + "grad_norm": 1.028725016268501, + "learning_rate": 1.741087938370904e-05, + "loss": 0.5629, + "step": 14890 + }, + { + "epoch": 0.2573092341719657, + "grad_norm": 1.0640332486807715, + "learning_rate": 1.7410503618944383e-05, + "loss": 0.7172, + "step": 14891 + }, + { + "epoch": 0.2573265136853746, + "grad_norm": 1.4484289632840037, + "learning_rate": 1.7410127830969464e-05, + "loss": 0.6145, + "step": 14892 + }, + { + "epoch": 0.25734379319878353, + "grad_norm": 1.2218603017753311, + "learning_rate": 1.7409752019785466e-05, + "loss": 0.6095, + "step": 14893 + }, + { + "epoch": 0.25736107271219244, + "grad_norm": 1.2769075241998527, + "learning_rate": 1.740937618539356e-05, + "loss": 0.4532, + "step": 14894 + }, + { + "epoch": 0.25737835222560135, + "grad_norm": 0.8864038362482891, + "learning_rate": 1.7409000327794922e-05, + "loss": 0.5294, + "step": 14895 + }, + { + "epoch": 0.25739563173901026, + "grad_norm": 0.948121396700009, + "learning_rate": 1.7408624446990735e-05, + "loss": 0.5018, + "step": 14896 + }, + { + "epoch": 0.2574129112524191, + "grad_norm": 1.6044170106582631, + "learning_rate": 1.7408248542982172e-05, + "loss": 0.7087, + "step": 14897 + }, + { + "epoch": 0.257430190765828, + "grad_norm": 1.4584029236335903, + "learning_rate": 1.740787261577041e-05, + "loss": 0.7436, + "step": 14898 + }, + { + "epoch": 0.2574474702792369, + "grad_norm": 0.8729768638739126, + "learning_rate": 1.740749666535663e-05, + "loss": 0.6001, + "step": 14899 + }, + { + "epoch": 0.25746474979264583, + "grad_norm": 1.0849618655385302, + "learning_rate": 1.740712069174201e-05, + "loss": 0.4591, + "step": 14900 + }, + { + "epoch": 0.25748202930605474, + "grad_norm": 0.7168921710209955, + "learning_rate": 1.7406744694927718e-05, + "loss": 0.7928, + "step": 14901 + }, + { + "epoch": 0.25749930881946365, + "grad_norm": 0.4892825547796686, + "learning_rate": 1.7406368674914947e-05, + "loss": 0.7814, + "step": 14902 + }, + { + "epoch": 0.25751658833287255, + "grad_norm": 1.185206457253177, + "learning_rate": 1.740599263170486e-05, + "loss": 0.6753, + "step": 14903 + }, + { + "epoch": 0.25753386784628146, + "grad_norm": 1.3882465692558406, + "learning_rate": 1.7405616565298644e-05, + "loss": 0.6136, + "step": 14904 + }, + { + "epoch": 0.25755114735969037, + "grad_norm": 1.5065947771503512, + "learning_rate": 1.740524047569747e-05, + "loss": 0.5291, + "step": 14905 + }, + { + "epoch": 0.2575684268730993, + "grad_norm": 1.1183294121234426, + "learning_rate": 1.7404864362902526e-05, + "loss": 0.5501, + "step": 14906 + }, + { + "epoch": 0.25758570638650813, + "grad_norm": 0.9120259630681128, + "learning_rate": 1.7404488226914983e-05, + "loss": 0.5491, + "step": 14907 + }, + { + "epoch": 0.25760298589991704, + "grad_norm": 1.4098676686288516, + "learning_rate": 1.740411206773602e-05, + "loss": 0.5608, + "step": 14908 + }, + { + "epoch": 0.25762026541332594, + "grad_norm": 1.2031104321048864, + "learning_rate": 1.7403735885366812e-05, + "loss": 0.3779, + "step": 14909 + }, + { + "epoch": 0.25763754492673485, + "grad_norm": 1.0469061346615882, + "learning_rate": 1.7403359679808543e-05, + "loss": 0.4477, + "step": 14910 + }, + { + "epoch": 0.25765482444014376, + "grad_norm": 0.7652324544313025, + "learning_rate": 1.740298345106239e-05, + "loss": 0.5178, + "step": 14911 + }, + { + "epoch": 0.25767210395355267, + "grad_norm": 1.003153152680316, + "learning_rate": 1.7402607199129525e-05, + "loss": 0.6595, + "step": 14912 + }, + { + "epoch": 0.2576893834669616, + "grad_norm": 1.1441480844106966, + "learning_rate": 1.740223092401114e-05, + "loss": 0.5506, + "step": 14913 + }, + { + "epoch": 0.2577066629803705, + "grad_norm": 0.5838126530223937, + "learning_rate": 1.74018546257084e-05, + "loss": 0.5491, + "step": 14914 + }, + { + "epoch": 0.2577239424937794, + "grad_norm": 1.634905874823978, + "learning_rate": 1.7401478304222487e-05, + "loss": 0.6285, + "step": 14915 + }, + { + "epoch": 0.2577412220071883, + "grad_norm": 1.2631620987901109, + "learning_rate": 1.7401101959554584e-05, + "loss": 0.5229, + "step": 14916 + }, + { + "epoch": 0.2577585015205972, + "grad_norm": 1.5053037915392962, + "learning_rate": 1.7400725591705866e-05, + "loss": 0.4691, + "step": 14917 + }, + { + "epoch": 0.25777578103400606, + "grad_norm": 1.6550941486146553, + "learning_rate": 1.7400349200677515e-05, + "loss": 0.5985, + "step": 14918 + }, + { + "epoch": 0.25779306054741497, + "grad_norm": 1.0959506725354702, + "learning_rate": 1.7399972786470708e-05, + "loss": 0.5391, + "step": 14919 + }, + { + "epoch": 0.2578103400608239, + "grad_norm": 0.7964005141815057, + "learning_rate": 1.739959634908662e-05, + "loss": 0.3549, + "step": 14920 + }, + { + "epoch": 0.2578276195742328, + "grad_norm": 1.0437297348300556, + "learning_rate": 1.7399219888526438e-05, + "loss": 0.3818, + "step": 14921 + }, + { + "epoch": 0.2578448990876417, + "grad_norm": 0.7401123813845556, + "learning_rate": 1.7398843404791338e-05, + "loss": 0.4575, + "step": 14922 + }, + { + "epoch": 0.2578621786010506, + "grad_norm": 1.1977768556662607, + "learning_rate": 1.7398466897882493e-05, + "loss": 0.4867, + "step": 14923 + }, + { + "epoch": 0.2578794581144595, + "grad_norm": 1.543273634027802, + "learning_rate": 1.739809036780109e-05, + "loss": 0.5896, + "step": 14924 + }, + { + "epoch": 0.2578967376278684, + "grad_norm": 1.221797554984194, + "learning_rate": 1.7397713814548305e-05, + "loss": 0.422, + "step": 14925 + }, + { + "epoch": 0.2579140171412773, + "grad_norm": 0.680579835423493, + "learning_rate": 1.739733723812532e-05, + "loss": 0.3487, + "step": 14926 + }, + { + "epoch": 0.2579312966546862, + "grad_norm": 1.1146348825936296, + "learning_rate": 1.7396960638533314e-05, + "loss": 0.586, + "step": 14927 + }, + { + "epoch": 0.25794857616809513, + "grad_norm": 0.8842922763257763, + "learning_rate": 1.7396584015773463e-05, + "loss": 0.4892, + "step": 14928 + }, + { + "epoch": 0.257965855681504, + "grad_norm": 0.809166056487766, + "learning_rate": 1.739620736984695e-05, + "loss": 0.4412, + "step": 14929 + }, + { + "epoch": 0.2579831351949129, + "grad_norm": 0.8169566112604592, + "learning_rate": 1.739583070075495e-05, + "loss": 0.546, + "step": 14930 + }, + { + "epoch": 0.2580004147083218, + "grad_norm": 0.979574147490972, + "learning_rate": 1.7395454008498648e-05, + "loss": 0.5357, + "step": 14931 + }, + { + "epoch": 0.2580176942217307, + "grad_norm": 0.4206839025953276, + "learning_rate": 1.7395077293079223e-05, + "loss": 0.5834, + "step": 14932 + }, + { + "epoch": 0.2580349737351396, + "grad_norm": 0.36474491451083296, + "learning_rate": 1.7394700554497852e-05, + "loss": 0.5186, + "step": 14933 + }, + { + "epoch": 0.2580522532485485, + "grad_norm": 1.182288560857285, + "learning_rate": 1.739432379275572e-05, + "loss": 0.4222, + "step": 14934 + }, + { + "epoch": 0.25806953276195743, + "grad_norm": 0.9634933430142384, + "learning_rate": 1.7393947007854e-05, + "loss": 0.357, + "step": 14935 + }, + { + "epoch": 0.25808681227536634, + "grad_norm": 0.7967004029562297, + "learning_rate": 1.7393570199793876e-05, + "loss": 0.4588, + "step": 14936 + }, + { + "epoch": 0.25810409178877525, + "grad_norm": 0.8076155321132864, + "learning_rate": 1.7393193368576533e-05, + "loss": 0.5197, + "step": 14937 + }, + { + "epoch": 0.25812137130218415, + "grad_norm": 1.1266549692408079, + "learning_rate": 1.7392816514203142e-05, + "loss": 0.6395, + "step": 14938 + }, + { + "epoch": 0.258138650815593, + "grad_norm": 1.2787746264078454, + "learning_rate": 1.7392439636674893e-05, + "loss": 0.4865, + "step": 14939 + }, + { + "epoch": 0.2581559303290019, + "grad_norm": 1.5039149302619772, + "learning_rate": 1.7392062735992957e-05, + "loss": 0.5756, + "step": 14940 + }, + { + "epoch": 0.2581732098424108, + "grad_norm": 0.860873685342524, + "learning_rate": 1.7391685812158518e-05, + "loss": 0.5401, + "step": 14941 + }, + { + "epoch": 0.25819048935581973, + "grad_norm": 1.6115833675595685, + "learning_rate": 1.739130886517276e-05, + "loss": 0.7169, + "step": 14942 + }, + { + "epoch": 0.25820776886922864, + "grad_norm": 1.3757083121068137, + "learning_rate": 1.739093189503686e-05, + "loss": 0.5672, + "step": 14943 + }, + { + "epoch": 0.25822504838263755, + "grad_norm": 1.191856580010132, + "learning_rate": 1.7390554901751997e-05, + "loss": 0.4223, + "step": 14944 + }, + { + "epoch": 0.25824232789604645, + "grad_norm": 0.9729638440607977, + "learning_rate": 1.739017788531936e-05, + "loss": 0.6577, + "step": 14945 + }, + { + "epoch": 0.25825960740945536, + "grad_norm": 0.43768501456611913, + "learning_rate": 1.7389800845740118e-05, + "loss": 0.7459, + "step": 14946 + }, + { + "epoch": 0.25827688692286427, + "grad_norm": 1.2752336507235467, + "learning_rate": 1.7389423783015464e-05, + "loss": 0.4563, + "step": 14947 + }, + { + "epoch": 0.2582941664362732, + "grad_norm": 1.1473277836606024, + "learning_rate": 1.738904669714657e-05, + "loss": 0.3823, + "step": 14948 + }, + { + "epoch": 0.2583114459496821, + "grad_norm": 0.4159372089388834, + "learning_rate": 1.738866958813462e-05, + "loss": 0.6112, + "step": 14949 + }, + { + "epoch": 0.25832872546309094, + "grad_norm": 0.7270897574774249, + "learning_rate": 1.7388292455980796e-05, + "loss": 0.4734, + "step": 14950 + }, + { + "epoch": 0.25834600497649984, + "grad_norm": 0.37910656262039616, + "learning_rate": 1.738791530068628e-05, + "loss": 0.6171, + "step": 14951 + }, + { + "epoch": 0.25836328448990875, + "grad_norm": 0.767470243261094, + "learning_rate": 1.7387538122252248e-05, + "loss": 0.4013, + "step": 14952 + }, + { + "epoch": 0.25838056400331766, + "grad_norm": 1.062679882511362, + "learning_rate": 1.7387160920679886e-05, + "loss": 0.5431, + "step": 14953 + }, + { + "epoch": 0.25839784351672657, + "grad_norm": 0.857228698518464, + "learning_rate": 1.738678369597038e-05, + "loss": 0.5063, + "step": 14954 + }, + { + "epoch": 0.2584151230301355, + "grad_norm": 0.7045464057866526, + "learning_rate": 1.7386406448124898e-05, + "loss": 0.588, + "step": 14955 + }, + { + "epoch": 0.2584324025435444, + "grad_norm": 0.8921429927668106, + "learning_rate": 1.7386029177144634e-05, + "loss": 0.6377, + "step": 14956 + }, + { + "epoch": 0.2584496820569533, + "grad_norm": 2.431071613352951, + "learning_rate": 1.7385651883030768e-05, + "loss": 0.6452, + "step": 14957 + }, + { + "epoch": 0.2584669615703622, + "grad_norm": 1.0095684654898593, + "learning_rate": 1.7385274565784476e-05, + "loss": 0.4922, + "step": 14958 + }, + { + "epoch": 0.2584842410837711, + "grad_norm": 0.8994950539903535, + "learning_rate": 1.738489722540694e-05, + "loss": 0.474, + "step": 14959 + }, + { + "epoch": 0.25850152059717996, + "grad_norm": 1.38840439805591, + "learning_rate": 1.7384519861899346e-05, + "loss": 0.4789, + "step": 14960 + }, + { + "epoch": 0.25851880011058886, + "grad_norm": 1.2349752078210416, + "learning_rate": 1.738414247526288e-05, + "loss": 0.5504, + "step": 14961 + }, + { + "epoch": 0.25853607962399777, + "grad_norm": 1.059448880609154, + "learning_rate": 1.7383765065498712e-05, + "loss": 0.5789, + "step": 14962 + }, + { + "epoch": 0.2585533591374067, + "grad_norm": 1.20596906564312, + "learning_rate": 1.7383387632608032e-05, + "loss": 0.4726, + "step": 14963 + }, + { + "epoch": 0.2585706386508156, + "grad_norm": 1.3200518372153627, + "learning_rate": 1.7383010176592024e-05, + "loss": 0.5588, + "step": 14964 + }, + { + "epoch": 0.2585879181642245, + "grad_norm": 1.2446890694007153, + "learning_rate": 1.7382632697451868e-05, + "loss": 0.4784, + "step": 14965 + }, + { + "epoch": 0.2586051976776334, + "grad_norm": 1.5516612645134866, + "learning_rate": 1.738225519518874e-05, + "loss": 0.5898, + "step": 14966 + }, + { + "epoch": 0.2586224771910423, + "grad_norm": 1.1866427134567514, + "learning_rate": 1.738187766980383e-05, + "loss": 0.4515, + "step": 14967 + }, + { + "epoch": 0.2586397567044512, + "grad_norm": 1.3059459774927198, + "learning_rate": 1.738150012129832e-05, + "loss": 0.4107, + "step": 14968 + }, + { + "epoch": 0.2586570362178601, + "grad_norm": 1.251129646393444, + "learning_rate": 1.7381122549673387e-05, + "loss": 0.571, + "step": 14969 + }, + { + "epoch": 0.25867431573126903, + "grad_norm": 0.6034409941938625, + "learning_rate": 1.738074495493022e-05, + "loss": 0.244, + "step": 14970 + }, + { + "epoch": 0.2586915952446779, + "grad_norm": 0.7422950668900632, + "learning_rate": 1.738036733707e-05, + "loss": 0.7302, + "step": 14971 + }, + { + "epoch": 0.2587088747580868, + "grad_norm": 1.221840362650537, + "learning_rate": 1.7379989696093905e-05, + "loss": 0.601, + "step": 14972 + }, + { + "epoch": 0.2587261542714957, + "grad_norm": 0.44967915045819534, + "learning_rate": 1.7379612032003122e-05, + "loss": 0.6848, + "step": 14973 + }, + { + "epoch": 0.2587434337849046, + "grad_norm": 1.130748635017061, + "learning_rate": 1.7379234344798836e-05, + "loss": 0.4031, + "step": 14974 + }, + { + "epoch": 0.2587607132983135, + "grad_norm": 1.211812543984831, + "learning_rate": 1.7378856634482227e-05, + "loss": 0.5848, + "step": 14975 + }, + { + "epoch": 0.2587779928117224, + "grad_norm": 1.170350989971116, + "learning_rate": 1.7378478901054475e-05, + "loss": 0.5653, + "step": 14976 + }, + { + "epoch": 0.25879527232513133, + "grad_norm": 1.1291983320529557, + "learning_rate": 1.737810114451677e-05, + "loss": 0.5512, + "step": 14977 + }, + { + "epoch": 0.25881255183854024, + "grad_norm": 0.8113683598806214, + "learning_rate": 1.737772336487029e-05, + "loss": 0.5149, + "step": 14978 + }, + { + "epoch": 0.25882983135194915, + "grad_norm": 1.158638310670996, + "learning_rate": 1.7377345562116218e-05, + "loss": 0.6264, + "step": 14979 + }, + { + "epoch": 0.25884711086535805, + "grad_norm": 1.0778149196999933, + "learning_rate": 1.7376967736255742e-05, + "loss": 0.5476, + "step": 14980 + }, + { + "epoch": 0.2588643903787669, + "grad_norm": 0.8417693661366152, + "learning_rate": 1.7376589887290037e-05, + "loss": 0.5388, + "step": 14981 + }, + { + "epoch": 0.2588816698921758, + "grad_norm": 0.7453399303706645, + "learning_rate": 1.73762120152203e-05, + "loss": 0.4507, + "step": 14982 + }, + { + "epoch": 0.2588989494055847, + "grad_norm": 0.7714416255788386, + "learning_rate": 1.73758341200477e-05, + "loss": 0.5522, + "step": 14983 + }, + { + "epoch": 0.25891622891899363, + "grad_norm": 1.3132883830660687, + "learning_rate": 1.737545620177343e-05, + "loss": 0.5309, + "step": 14984 + }, + { + "epoch": 0.25893350843240254, + "grad_norm": 1.5117659824141791, + "learning_rate": 1.737507826039867e-05, + "loss": 0.5047, + "step": 14985 + }, + { + "epoch": 0.25895078794581144, + "grad_norm": 0.7721686544826621, + "learning_rate": 1.7374700295924602e-05, + "loss": 0.4978, + "step": 14986 + }, + { + "epoch": 0.25896806745922035, + "grad_norm": 0.8758552317945801, + "learning_rate": 1.7374322308352415e-05, + "loss": 0.5832, + "step": 14987 + }, + { + "epoch": 0.25898534697262926, + "grad_norm": 0.40895090128015615, + "learning_rate": 1.737394429768329e-05, + "loss": 0.6664, + "step": 14988 + }, + { + "epoch": 0.25900262648603817, + "grad_norm": 0.8592285698438397, + "learning_rate": 1.737356626391841e-05, + "loss": 0.6451, + "step": 14989 + }, + { + "epoch": 0.2590199059994471, + "grad_norm": 0.4307962316227998, + "learning_rate": 1.737318820705896e-05, + "loss": 0.6789, + "step": 14990 + }, + { + "epoch": 0.259037185512856, + "grad_norm": 1.2372830200003468, + "learning_rate": 1.7372810127106125e-05, + "loss": 0.7353, + "step": 14991 + }, + { + "epoch": 0.25905446502626484, + "grad_norm": 1.2467647063627998, + "learning_rate": 1.737243202406109e-05, + "loss": 0.4224, + "step": 14992 + }, + { + "epoch": 0.25907174453967374, + "grad_norm": 0.8846689542916435, + "learning_rate": 1.7372053897925035e-05, + "loss": 0.4306, + "step": 14993 + }, + { + "epoch": 0.25908902405308265, + "grad_norm": 1.323892886664995, + "learning_rate": 1.7371675748699148e-05, + "loss": 0.6632, + "step": 14994 + }, + { + "epoch": 0.25910630356649156, + "grad_norm": 0.6951917757343352, + "learning_rate": 1.7371297576384615e-05, + "loss": 0.4566, + "step": 14995 + }, + { + "epoch": 0.25912358307990047, + "grad_norm": 1.4882755556532814, + "learning_rate": 1.7370919380982615e-05, + "loss": 0.5409, + "step": 14996 + }, + { + "epoch": 0.2591408625933094, + "grad_norm": 1.3268774066511118, + "learning_rate": 1.7370541162494334e-05, + "loss": 0.6921, + "step": 14997 + }, + { + "epoch": 0.2591581421067183, + "grad_norm": 1.1042836395153641, + "learning_rate": 1.7370162920920957e-05, + "loss": 0.4676, + "step": 14998 + }, + { + "epoch": 0.2591754216201272, + "grad_norm": 1.3474069490036222, + "learning_rate": 1.7369784656263672e-05, + "loss": 0.5807, + "step": 14999 + }, + { + "epoch": 0.2591927011335361, + "grad_norm": 1.2977352160781412, + "learning_rate": 1.7369406368523662e-05, + "loss": 0.799, + "step": 15000 + }, + { + "epoch": 0.259209980646945, + "grad_norm": 1.1347641166214877, + "learning_rate": 1.736902805770211e-05, + "loss": 0.4749, + "step": 15001 + }, + { + "epoch": 0.2592272601603539, + "grad_norm": 0.7551585908636725, + "learning_rate": 1.7368649723800203e-05, + "loss": 0.4624, + "step": 15002 + }, + { + "epoch": 0.25924453967376276, + "grad_norm": 1.2167097727058702, + "learning_rate": 1.7368271366819126e-05, + "loss": 0.8212, + "step": 15003 + }, + { + "epoch": 0.25926181918717167, + "grad_norm": 0.7467467811690541, + "learning_rate": 1.7367892986760063e-05, + "loss": 0.394, + "step": 15004 + }, + { + "epoch": 0.2592790987005806, + "grad_norm": 0.7851714882658282, + "learning_rate": 1.7367514583624197e-05, + "loss": 0.6449, + "step": 15005 + }, + { + "epoch": 0.2592963782139895, + "grad_norm": 1.1555394106470636, + "learning_rate": 1.7367136157412715e-05, + "loss": 0.6296, + "step": 15006 + }, + { + "epoch": 0.2593136577273984, + "grad_norm": 1.1004482923808496, + "learning_rate": 1.7366757708126805e-05, + "loss": 0.6372, + "step": 15007 + }, + { + "epoch": 0.2593309372408073, + "grad_norm": 0.40262440320981874, + "learning_rate": 1.7366379235767646e-05, + "loss": 0.5844, + "step": 15008 + }, + { + "epoch": 0.2593482167542162, + "grad_norm": 0.8363596193922888, + "learning_rate": 1.736600074033643e-05, + "loss": 0.5688, + "step": 15009 + }, + { + "epoch": 0.2593654962676251, + "grad_norm": 0.7129671410537207, + "learning_rate": 1.7365622221834343e-05, + "loss": 0.5624, + "step": 15010 + }, + { + "epoch": 0.259382775781034, + "grad_norm": 0.7614642819801, + "learning_rate": 1.7365243680262564e-05, + "loss": 0.3719, + "step": 15011 + }, + { + "epoch": 0.25940005529444293, + "grad_norm": 1.3141284274418943, + "learning_rate": 1.7364865115622282e-05, + "loss": 0.5851, + "step": 15012 + }, + { + "epoch": 0.2594173348078518, + "grad_norm": 1.2582627324776028, + "learning_rate": 1.7364486527914685e-05, + "loss": 0.5319, + "step": 15013 + }, + { + "epoch": 0.2594346143212607, + "grad_norm": 0.9427471746059249, + "learning_rate": 1.7364107917140954e-05, + "loss": 0.381, + "step": 15014 + }, + { + "epoch": 0.2594518938346696, + "grad_norm": 1.2786962092823373, + "learning_rate": 1.736372928330228e-05, + "loss": 0.4919, + "step": 15015 + }, + { + "epoch": 0.2594691733480785, + "grad_norm": 1.0380748799310935, + "learning_rate": 1.7363350626399845e-05, + "loss": 0.4905, + "step": 15016 + }, + { + "epoch": 0.2594864528614874, + "grad_norm": 1.2896007845860993, + "learning_rate": 1.7362971946434835e-05, + "loss": 0.4832, + "step": 15017 + }, + { + "epoch": 0.2595037323748963, + "grad_norm": 1.1158358871152723, + "learning_rate": 1.7362593243408436e-05, + "loss": 0.3814, + "step": 15018 + }, + { + "epoch": 0.25952101188830523, + "grad_norm": 0.8577781999498852, + "learning_rate": 1.736221451732184e-05, + "loss": 0.3888, + "step": 15019 + }, + { + "epoch": 0.25953829140171414, + "grad_norm": 0.7945435492439347, + "learning_rate": 1.7361835768176226e-05, + "loss": 0.4694, + "step": 15020 + }, + { + "epoch": 0.25955557091512305, + "grad_norm": 1.1231440482981796, + "learning_rate": 1.7361456995972784e-05, + "loss": 0.6467, + "step": 15021 + }, + { + "epoch": 0.25957285042853195, + "grad_norm": 1.2465016878566026, + "learning_rate": 1.73610782007127e-05, + "loss": 0.2998, + "step": 15022 + }, + { + "epoch": 0.25959012994194086, + "grad_norm": 1.5198973391033885, + "learning_rate": 1.7360699382397156e-05, + "loss": 0.5653, + "step": 15023 + }, + { + "epoch": 0.2596074094553497, + "grad_norm": 0.7669937650861345, + "learning_rate": 1.7360320541027342e-05, + "loss": 0.4646, + "step": 15024 + }, + { + "epoch": 0.2596246889687586, + "grad_norm": 0.6947931529533461, + "learning_rate": 1.735994167660445e-05, + "loss": 0.4896, + "step": 15025 + }, + { + "epoch": 0.25964196848216753, + "grad_norm": 1.0408535164740909, + "learning_rate": 1.7359562789129655e-05, + "loss": 0.4438, + "step": 15026 + }, + { + "epoch": 0.25965924799557644, + "grad_norm": 0.9444940476193312, + "learning_rate": 1.7359183878604157e-05, + "loss": 0.568, + "step": 15027 + }, + { + "epoch": 0.25967652750898534, + "grad_norm": 1.6198056542252286, + "learning_rate": 1.7358804945029128e-05, + "loss": 0.6379, + "step": 15028 + }, + { + "epoch": 0.25969380702239425, + "grad_norm": 1.733400265757401, + "learning_rate": 1.7358425988405766e-05, + "loss": 0.4955, + "step": 15029 + }, + { + "epoch": 0.25971108653580316, + "grad_norm": 0.7209031263377972, + "learning_rate": 1.735804700873526e-05, + "loss": 0.4974, + "step": 15030 + }, + { + "epoch": 0.25972836604921207, + "grad_norm": 1.0173886752174488, + "learning_rate": 1.7357668006018786e-05, + "loss": 0.4925, + "step": 15031 + }, + { + "epoch": 0.259745645562621, + "grad_norm": 0.9588213738328725, + "learning_rate": 1.7357288980257536e-05, + "loss": 0.5379, + "step": 15032 + }, + { + "epoch": 0.2597629250760299, + "grad_norm": 0.42187373913699355, + "learning_rate": 1.73569099314527e-05, + "loss": 0.707, + "step": 15033 + }, + { + "epoch": 0.25978020458943873, + "grad_norm": 1.1171641675584651, + "learning_rate": 1.735653085960546e-05, + "loss": 0.5067, + "step": 15034 + }, + { + "epoch": 0.25979748410284764, + "grad_norm": 1.3215813510051024, + "learning_rate": 1.7356151764717012e-05, + "loss": 0.4269, + "step": 15035 + }, + { + "epoch": 0.25981476361625655, + "grad_norm": 0.8030377853180567, + "learning_rate": 1.7355772646788532e-05, + "loss": 0.4534, + "step": 15036 + }, + { + "epoch": 0.25983204312966546, + "grad_norm": 0.7695339952795589, + "learning_rate": 1.7355393505821216e-05, + "loss": 0.433, + "step": 15037 + }, + { + "epoch": 0.25984932264307437, + "grad_norm": 1.1489727713227678, + "learning_rate": 1.7355014341816247e-05, + "loss": 0.3721, + "step": 15038 + }, + { + "epoch": 0.2598666021564833, + "grad_norm": 0.9532371537916428, + "learning_rate": 1.7354635154774814e-05, + "loss": 0.5148, + "step": 15039 + }, + { + "epoch": 0.2598838816698922, + "grad_norm": 0.8114249324903022, + "learning_rate": 1.7354255944698107e-05, + "loss": 0.5977, + "step": 15040 + }, + { + "epoch": 0.2599011611833011, + "grad_norm": 0.7103277312792021, + "learning_rate": 1.735387671158731e-05, + "loss": 0.403, + "step": 15041 + }, + { + "epoch": 0.25991844069671, + "grad_norm": 0.367913191782964, + "learning_rate": 1.7353497455443613e-05, + "loss": 0.4989, + "step": 15042 + }, + { + "epoch": 0.2599357202101189, + "grad_norm": 0.7468229603273103, + "learning_rate": 1.73531181762682e-05, + "loss": 0.3798, + "step": 15043 + }, + { + "epoch": 0.2599529997235278, + "grad_norm": 0.7376420113787563, + "learning_rate": 1.7352738874062268e-05, + "loss": 0.5339, + "step": 15044 + }, + { + "epoch": 0.25997027923693666, + "grad_norm": 1.0738875015303049, + "learning_rate": 1.7352359548826997e-05, + "loss": 0.4441, + "step": 15045 + }, + { + "epoch": 0.25998755875034557, + "grad_norm": 1.1870320831052128, + "learning_rate": 1.7351980200563573e-05, + "loss": 0.5294, + "step": 15046 + }, + { + "epoch": 0.2600048382637545, + "grad_norm": 0.8419878778958451, + "learning_rate": 1.7351600829273196e-05, + "loss": 0.5784, + "step": 15047 + }, + { + "epoch": 0.2600221177771634, + "grad_norm": 1.065179817769287, + "learning_rate": 1.7351221434957038e-05, + "loss": 0.4603, + "step": 15048 + }, + { + "epoch": 0.2600393972905723, + "grad_norm": 0.9006436587816209, + "learning_rate": 1.7350842017616298e-05, + "loss": 0.5361, + "step": 15049 + }, + { + "epoch": 0.2600566768039812, + "grad_norm": 1.181637515480485, + "learning_rate": 1.7350462577252166e-05, + "loss": 0.5481, + "step": 15050 + }, + { + "epoch": 0.2600739563173901, + "grad_norm": 1.3111456710248912, + "learning_rate": 1.7350083113865825e-05, + "loss": 0.7216, + "step": 15051 + }, + { + "epoch": 0.260091235830799, + "grad_norm": 0.4517453273548631, + "learning_rate": 1.7349703627458462e-05, + "loss": 0.6562, + "step": 15052 + }, + { + "epoch": 0.2601085153442079, + "grad_norm": 0.8110528851074719, + "learning_rate": 1.734932411803127e-05, + "loss": 0.5519, + "step": 15053 + }, + { + "epoch": 0.26012579485761683, + "grad_norm": 1.3422269932949906, + "learning_rate": 1.734894458558544e-05, + "loss": 0.6085, + "step": 15054 + }, + { + "epoch": 0.2601430743710257, + "grad_norm": 0.8799150414600359, + "learning_rate": 1.7348565030122155e-05, + "loss": 0.6269, + "step": 15055 + }, + { + "epoch": 0.2601603538844346, + "grad_norm": 1.0079050081784828, + "learning_rate": 1.7348185451642602e-05, + "loss": 0.8225, + "step": 15056 + }, + { + "epoch": 0.2601776333978435, + "grad_norm": 1.7616626545359662, + "learning_rate": 1.734780585014798e-05, + "loss": 0.564, + "step": 15057 + }, + { + "epoch": 0.2601949129112524, + "grad_norm": 0.8126831657507141, + "learning_rate": 1.7347426225639464e-05, + "loss": 0.5037, + "step": 15058 + }, + { + "epoch": 0.2602121924246613, + "grad_norm": 1.1797203486357568, + "learning_rate": 1.734704657811826e-05, + "loss": 0.4599, + "step": 15059 + }, + { + "epoch": 0.2602294719380702, + "grad_norm": 0.4887722915396237, + "learning_rate": 1.7346666907585542e-05, + "loss": 0.6064, + "step": 15060 + }, + { + "epoch": 0.26024675145147913, + "grad_norm": 0.9679937872428916, + "learning_rate": 1.7346287214042506e-05, + "loss": 0.736, + "step": 15061 + }, + { + "epoch": 0.26026403096488804, + "grad_norm": 1.2409522814463212, + "learning_rate": 1.734590749749034e-05, + "loss": 0.4555, + "step": 15062 + }, + { + "epoch": 0.26028131047829695, + "grad_norm": 0.877181583188905, + "learning_rate": 1.734552775793023e-05, + "loss": 0.6639, + "step": 15063 + }, + { + "epoch": 0.26029858999170585, + "grad_norm": 0.8859442125704777, + "learning_rate": 1.7345147995363374e-05, + "loss": 0.6009, + "step": 15064 + }, + { + "epoch": 0.26031586950511476, + "grad_norm": 0.9762049097577806, + "learning_rate": 1.7344768209790955e-05, + "loss": 0.4053, + "step": 15065 + }, + { + "epoch": 0.2603331490185236, + "grad_norm": 1.4698185074768224, + "learning_rate": 1.7344388401214164e-05, + "loss": 0.4808, + "step": 15066 + }, + { + "epoch": 0.2603504285319325, + "grad_norm": 0.48757913381663276, + "learning_rate": 1.734400856963419e-05, + "loss": 0.683, + "step": 15067 + }, + { + "epoch": 0.26036770804534143, + "grad_norm": 1.7698033831101367, + "learning_rate": 1.734362871505222e-05, + "loss": 0.7152, + "step": 15068 + }, + { + "epoch": 0.26038498755875034, + "grad_norm": 0.8892434835732776, + "learning_rate": 1.7343248837469452e-05, + "loss": 0.3967, + "step": 15069 + }, + { + "epoch": 0.26040226707215924, + "grad_norm": 0.9857907546370476, + "learning_rate": 1.734286893688707e-05, + "loss": 0.5006, + "step": 15070 + }, + { + "epoch": 0.26041954658556815, + "grad_norm": 1.2072992592408205, + "learning_rate": 1.734248901330626e-05, + "loss": 0.5793, + "step": 15071 + }, + { + "epoch": 0.26043682609897706, + "grad_norm": 0.6215644091479445, + "learning_rate": 1.7342109066728218e-05, + "loss": 0.7055, + "step": 15072 + }, + { + "epoch": 0.26045410561238597, + "grad_norm": 0.5271595657571845, + "learning_rate": 1.7341729097154133e-05, + "loss": 0.5608, + "step": 15073 + }, + { + "epoch": 0.2604713851257949, + "grad_norm": 2.2051720672596935, + "learning_rate": 1.7341349104585198e-05, + "loss": 0.6947, + "step": 15074 + }, + { + "epoch": 0.2604886646392038, + "grad_norm": 1.123522787590689, + "learning_rate": 1.7340969089022594e-05, + "loss": 0.5413, + "step": 15075 + }, + { + "epoch": 0.2605059441526127, + "grad_norm": 1.22691629524684, + "learning_rate": 1.734058905046752e-05, + "loss": 0.543, + "step": 15076 + }, + { + "epoch": 0.26052322366602154, + "grad_norm": 1.5144720214250627, + "learning_rate": 1.7340208988921167e-05, + "loss": 0.6361, + "step": 15077 + }, + { + "epoch": 0.26054050317943045, + "grad_norm": 1.1174450771983835, + "learning_rate": 1.7339828904384714e-05, + "loss": 0.5087, + "step": 15078 + }, + { + "epoch": 0.26055778269283936, + "grad_norm": 0.7127054389144366, + "learning_rate": 1.733944879685936e-05, + "loss": 0.4685, + "step": 15079 + }, + { + "epoch": 0.26057506220624826, + "grad_norm": 0.7224084454153521, + "learning_rate": 1.73390686663463e-05, + "loss": 0.4978, + "step": 15080 + }, + { + "epoch": 0.2605923417196572, + "grad_norm": 1.264742876665891, + "learning_rate": 1.7338688512846714e-05, + "loss": 0.5664, + "step": 15081 + }, + { + "epoch": 0.2606096212330661, + "grad_norm": 1.0857766537695925, + "learning_rate": 1.7338308336361803e-05, + "loss": 0.4164, + "step": 15082 + }, + { + "epoch": 0.260626900746475, + "grad_norm": 1.2165050292198507, + "learning_rate": 1.7337928136892747e-05, + "loss": 0.6011, + "step": 15083 + }, + { + "epoch": 0.2606441802598839, + "grad_norm": 1.2811833865073803, + "learning_rate": 1.7337547914440747e-05, + "loss": 0.5669, + "step": 15084 + }, + { + "epoch": 0.2606614597732928, + "grad_norm": 1.4316587625249861, + "learning_rate": 1.7337167669006988e-05, + "loss": 0.4776, + "step": 15085 + }, + { + "epoch": 0.2606787392867017, + "grad_norm": 1.4314832430835813, + "learning_rate": 1.733678740059266e-05, + "loss": 0.6622, + "step": 15086 + }, + { + "epoch": 0.26069601880011056, + "grad_norm": 0.7071139172190574, + "learning_rate": 1.7336407109198957e-05, + "loss": 0.4231, + "step": 15087 + }, + { + "epoch": 0.26071329831351947, + "grad_norm": 1.4325909688331044, + "learning_rate": 1.7336026794827074e-05, + "loss": 0.7737, + "step": 15088 + }, + { + "epoch": 0.2607305778269284, + "grad_norm": 1.0796119767830443, + "learning_rate": 1.733564645747819e-05, + "loss": 0.6557, + "step": 15089 + }, + { + "epoch": 0.2607478573403373, + "grad_norm": 0.3461100819088546, + "learning_rate": 1.733526609715351e-05, + "loss": 0.5537, + "step": 15090 + }, + { + "epoch": 0.2607651368537462, + "grad_norm": 1.5533567145737317, + "learning_rate": 1.7334885713854216e-05, + "loss": 0.5819, + "step": 15091 + }, + { + "epoch": 0.2607824163671551, + "grad_norm": 1.2205922380738798, + "learning_rate": 1.7334505307581504e-05, + "loss": 0.4303, + "step": 15092 + }, + { + "epoch": 0.260799695880564, + "grad_norm": 1.2498443510973862, + "learning_rate": 1.7334124878336564e-05, + "loss": 0.5712, + "step": 15093 + }, + { + "epoch": 0.2608169753939729, + "grad_norm": 0.926230962473468, + "learning_rate": 1.7333744426120585e-05, + "loss": 0.6399, + "step": 15094 + }, + { + "epoch": 0.2608342549073818, + "grad_norm": 0.38922551442825576, + "learning_rate": 1.7333363950934763e-05, + "loss": 0.4985, + "step": 15095 + }, + { + "epoch": 0.26085153442079073, + "grad_norm": 1.1707583952864595, + "learning_rate": 1.7332983452780288e-05, + "loss": 0.3765, + "step": 15096 + }, + { + "epoch": 0.26086881393419964, + "grad_norm": 0.9634066343130954, + "learning_rate": 1.7332602931658352e-05, + "loss": 0.927, + "step": 15097 + }, + { + "epoch": 0.2608860934476085, + "grad_norm": 0.9547421652474285, + "learning_rate": 1.7332222387570147e-05, + "loss": 0.5087, + "step": 15098 + }, + { + "epoch": 0.2609033729610174, + "grad_norm": 0.7307918335250675, + "learning_rate": 1.733184182051686e-05, + "loss": 0.4252, + "step": 15099 + }, + { + "epoch": 0.2609206524744263, + "grad_norm": 0.8677787583049397, + "learning_rate": 1.733146123049969e-05, + "loss": 0.5626, + "step": 15100 + }, + { + "epoch": 0.2609379319878352, + "grad_norm": 0.7481177890278154, + "learning_rate": 1.733108061751983e-05, + "loss": 0.3856, + "step": 15101 + }, + { + "epoch": 0.2609552115012441, + "grad_norm": 0.6994891551502564, + "learning_rate": 1.7330699981578463e-05, + "loss": 0.6327, + "step": 15102 + }, + { + "epoch": 0.26097249101465303, + "grad_norm": 0.644308651097396, + "learning_rate": 1.733031932267679e-05, + "loss": 0.6074, + "step": 15103 + }, + { + "epoch": 0.26098977052806194, + "grad_norm": 1.0021974086266856, + "learning_rate": 1.7329938640815995e-05, + "loss": 0.4341, + "step": 15104 + }, + { + "epoch": 0.26100705004147084, + "grad_norm": 0.6710694315447847, + "learning_rate": 1.732955793599728e-05, + "loss": 0.3335, + "step": 15105 + }, + { + "epoch": 0.26102432955487975, + "grad_norm": 0.6239695866815993, + "learning_rate": 1.732917720822183e-05, + "loss": 0.5498, + "step": 15106 + }, + { + "epoch": 0.26104160906828866, + "grad_norm": 0.789510311926446, + "learning_rate": 1.732879645749084e-05, + "loss": 0.4765, + "step": 15107 + }, + { + "epoch": 0.2610588885816975, + "grad_norm": 1.178435858815146, + "learning_rate": 1.7328415683805503e-05, + "loss": 0.594, + "step": 15108 + }, + { + "epoch": 0.2610761680951064, + "grad_norm": 0.7823381998500222, + "learning_rate": 1.7328034887167012e-05, + "loss": 0.456, + "step": 15109 + }, + { + "epoch": 0.2610934476085153, + "grad_norm": 1.544398293890562, + "learning_rate": 1.7327654067576557e-05, + "loss": 0.6398, + "step": 15110 + }, + { + "epoch": 0.26111072712192424, + "grad_norm": 0.7236828375747026, + "learning_rate": 1.7327273225035335e-05, + "loss": 0.4932, + "step": 15111 + }, + { + "epoch": 0.26112800663533314, + "grad_norm": 1.4186911302705854, + "learning_rate": 1.7326892359544537e-05, + "loss": 0.7505, + "step": 15112 + }, + { + "epoch": 0.26114528614874205, + "grad_norm": 1.303737585117447, + "learning_rate": 1.732651147110535e-05, + "loss": 0.4754, + "step": 15113 + }, + { + "epoch": 0.26116256566215096, + "grad_norm": 1.553883363613303, + "learning_rate": 1.7326130559718977e-05, + "loss": 0.5142, + "step": 15114 + }, + { + "epoch": 0.26117984517555987, + "grad_norm": 1.5578753132299348, + "learning_rate": 1.7325749625386607e-05, + "loss": 0.7272, + "step": 15115 + }, + { + "epoch": 0.2611971246889688, + "grad_norm": 1.3426855924043388, + "learning_rate": 1.732536866810943e-05, + "loss": 0.6986, + "step": 15116 + }, + { + "epoch": 0.2612144042023777, + "grad_norm": 0.8522793063680831, + "learning_rate": 1.7324987687888642e-05, + "loss": 0.4512, + "step": 15117 + }, + { + "epoch": 0.2612316837157866, + "grad_norm": 1.7671692009434607, + "learning_rate": 1.7324606684725432e-05, + "loss": 0.7957, + "step": 15118 + }, + { + "epoch": 0.26124896322919544, + "grad_norm": 1.146693728860995, + "learning_rate": 1.7324225658621003e-05, + "loss": 0.3437, + "step": 15119 + }, + { + "epoch": 0.26126624274260435, + "grad_norm": 1.4619535662310652, + "learning_rate": 1.7323844609576543e-05, + "loss": 0.5561, + "step": 15120 + }, + { + "epoch": 0.26128352225601326, + "grad_norm": 1.2594303872938193, + "learning_rate": 1.732346353759324e-05, + "loss": 0.5778, + "step": 15121 + }, + { + "epoch": 0.26130080176942216, + "grad_norm": 0.7879121653821749, + "learning_rate": 1.7323082442672298e-05, + "loss": 0.4294, + "step": 15122 + }, + { + "epoch": 0.26131808128283107, + "grad_norm": 0.8687308620237154, + "learning_rate": 1.73227013248149e-05, + "loss": 0.6418, + "step": 15123 + }, + { + "epoch": 0.26133536079624, + "grad_norm": 0.7775697127396973, + "learning_rate": 1.732232018402225e-05, + "loss": 0.3132, + "step": 15124 + }, + { + "epoch": 0.2613526403096489, + "grad_norm": 1.7272345372420985, + "learning_rate": 1.7321939020295534e-05, + "loss": 0.6346, + "step": 15125 + }, + { + "epoch": 0.2613699198230578, + "grad_norm": 0.8640634571742086, + "learning_rate": 1.7321557833635948e-05, + "loss": 0.4767, + "step": 15126 + }, + { + "epoch": 0.2613871993364667, + "grad_norm": 1.1473306206467724, + "learning_rate": 1.732117662404469e-05, + "loss": 0.8119, + "step": 15127 + }, + { + "epoch": 0.2614044788498756, + "grad_norm": 0.9586384788120202, + "learning_rate": 1.7320795391522946e-05, + "loss": 0.368, + "step": 15128 + }, + { + "epoch": 0.2614217583632845, + "grad_norm": 1.1586966600143747, + "learning_rate": 1.7320414136071915e-05, + "loss": 0.6434, + "step": 15129 + }, + { + "epoch": 0.26143903787669337, + "grad_norm": 0.8150443822771386, + "learning_rate": 1.7320032857692794e-05, + "loss": 0.5888, + "step": 15130 + }, + { + "epoch": 0.2614563173901023, + "grad_norm": 1.318839693335485, + "learning_rate": 1.731965155638677e-05, + "loss": 0.6654, + "step": 15131 + }, + { + "epoch": 0.2614735969035112, + "grad_norm": 0.48794938221032735, + "learning_rate": 1.7319270232155044e-05, + "loss": 0.7719, + "step": 15132 + }, + { + "epoch": 0.2614908764169201, + "grad_norm": 0.9234339644344832, + "learning_rate": 1.7318888884998805e-05, + "loss": 0.6882, + "step": 15133 + }, + { + "epoch": 0.261508155930329, + "grad_norm": 0.9549256897827193, + "learning_rate": 1.7318507514919253e-05, + "loss": 0.6704, + "step": 15134 + }, + { + "epoch": 0.2615254354437379, + "grad_norm": 0.8958415368928182, + "learning_rate": 1.7318126121917576e-05, + "loss": 0.8862, + "step": 15135 + }, + { + "epoch": 0.2615427149571468, + "grad_norm": 0.8708280650832072, + "learning_rate": 1.7317744705994974e-05, + "loss": 0.5013, + "step": 15136 + }, + { + "epoch": 0.2615599944705557, + "grad_norm": 1.0105048380719315, + "learning_rate": 1.731736326715264e-05, + "loss": 0.5882, + "step": 15137 + }, + { + "epoch": 0.26157727398396463, + "grad_norm": 0.9074727440698248, + "learning_rate": 1.7316981805391762e-05, + "loss": 0.653, + "step": 15138 + }, + { + "epoch": 0.26159455349737354, + "grad_norm": 0.8878132250761094, + "learning_rate": 1.7316600320713548e-05, + "loss": 0.583, + "step": 15139 + }, + { + "epoch": 0.2616118330107824, + "grad_norm": 0.7161111448678371, + "learning_rate": 1.7316218813119182e-05, + "loss": 0.5567, + "step": 15140 + }, + { + "epoch": 0.2616291125241913, + "grad_norm": 0.8681858245273261, + "learning_rate": 1.7315837282609865e-05, + "loss": 0.5514, + "step": 15141 + }, + { + "epoch": 0.2616463920376002, + "grad_norm": 0.8805529562189455, + "learning_rate": 1.731545572918679e-05, + "loss": 0.4584, + "step": 15142 + }, + { + "epoch": 0.2616636715510091, + "grad_norm": 0.7353539941275498, + "learning_rate": 1.731507415285115e-05, + "loss": 0.4336, + "step": 15143 + }, + { + "epoch": 0.261680951064418, + "grad_norm": 0.5628907998968222, + "learning_rate": 1.7314692553604143e-05, + "loss": 0.7026, + "step": 15144 + }, + { + "epoch": 0.26169823057782693, + "grad_norm": 1.1925947846121745, + "learning_rate": 1.7314310931446962e-05, + "loss": 0.6258, + "step": 15145 + }, + { + "epoch": 0.26171551009123584, + "grad_norm": 0.9033616068920004, + "learning_rate": 1.7313929286380802e-05, + "loss": 0.5539, + "step": 15146 + }, + { + "epoch": 0.26173278960464474, + "grad_norm": 0.41641641927079515, + "learning_rate": 1.7313547618406864e-05, + "loss": 0.6598, + "step": 15147 + }, + { + "epoch": 0.26175006911805365, + "grad_norm": 0.8486378048005254, + "learning_rate": 1.7313165927526336e-05, + "loss": 0.477, + "step": 15148 + }, + { + "epoch": 0.26176734863146256, + "grad_norm": 0.8515635430112177, + "learning_rate": 1.7312784213740413e-05, + "loss": 0.4451, + "step": 15149 + }, + { + "epoch": 0.26178462814487147, + "grad_norm": 1.2699746739345927, + "learning_rate": 1.73124024770503e-05, + "loss": 0.786, + "step": 15150 + }, + { + "epoch": 0.2618019076582803, + "grad_norm": 0.9410987287850244, + "learning_rate": 1.7312020717457185e-05, + "loss": 0.5185, + "step": 15151 + }, + { + "epoch": 0.2618191871716892, + "grad_norm": 1.0404137456770484, + "learning_rate": 1.7311638934962265e-05, + "loss": 0.6723, + "step": 15152 + }, + { + "epoch": 0.26183646668509813, + "grad_norm": 1.0915750308824033, + "learning_rate": 1.7311257129566734e-05, + "loss": 0.4392, + "step": 15153 + }, + { + "epoch": 0.26185374619850704, + "grad_norm": 0.8287859498879206, + "learning_rate": 1.7310875301271793e-05, + "loss": 0.4033, + "step": 15154 + }, + { + "epoch": 0.26187102571191595, + "grad_norm": 1.0749959562605997, + "learning_rate": 1.7310493450078635e-05, + "loss": 0.5284, + "step": 15155 + }, + { + "epoch": 0.26188830522532486, + "grad_norm": 1.043836961247553, + "learning_rate": 1.7310111575988454e-05, + "loss": 0.5225, + "step": 15156 + }, + { + "epoch": 0.26190558473873377, + "grad_norm": 1.1650440943099731, + "learning_rate": 1.7309729679002448e-05, + "loss": 0.5234, + "step": 15157 + }, + { + "epoch": 0.2619228642521427, + "grad_norm": 1.2706950291464238, + "learning_rate": 1.7309347759121816e-05, + "loss": 0.4581, + "step": 15158 + }, + { + "epoch": 0.2619401437655516, + "grad_norm": 1.228219915422441, + "learning_rate": 1.730896581634775e-05, + "loss": 0.5693, + "step": 15159 + }, + { + "epoch": 0.2619574232789605, + "grad_norm": 0.9443486653351633, + "learning_rate": 1.7308583850681446e-05, + "loss": 0.4738, + "step": 15160 + }, + { + "epoch": 0.26197470279236934, + "grad_norm": 1.3452557432949162, + "learning_rate": 1.7308201862124104e-05, + "loss": 0.5983, + "step": 15161 + }, + { + "epoch": 0.26199198230577825, + "grad_norm": 0.8913809749227544, + "learning_rate": 1.7307819850676916e-05, + "loss": 0.5491, + "step": 15162 + }, + { + "epoch": 0.26200926181918716, + "grad_norm": 0.4109232995318948, + "learning_rate": 1.7307437816341082e-05, + "loss": 0.6034, + "step": 15163 + }, + { + "epoch": 0.26202654133259606, + "grad_norm": 0.7723651135418212, + "learning_rate": 1.7307055759117798e-05, + "loss": 0.4888, + "step": 15164 + }, + { + "epoch": 0.26204382084600497, + "grad_norm": 1.4274988687649788, + "learning_rate": 1.730667367900826e-05, + "loss": 0.6951, + "step": 15165 + }, + { + "epoch": 0.2620611003594139, + "grad_norm": 1.330553687215635, + "learning_rate": 1.7306291576013662e-05, + "loss": 0.6018, + "step": 15166 + }, + { + "epoch": 0.2620783798728228, + "grad_norm": 0.993202134881518, + "learning_rate": 1.7305909450135207e-05, + "loss": 0.3312, + "step": 15167 + }, + { + "epoch": 0.2620956593862317, + "grad_norm": 0.9840156950773786, + "learning_rate": 1.7305527301374088e-05, + "loss": 0.5664, + "step": 15168 + }, + { + "epoch": 0.2621129388996406, + "grad_norm": 1.0207869170424495, + "learning_rate": 1.7305145129731503e-05, + "loss": 0.5997, + "step": 15169 + }, + { + "epoch": 0.2621302184130495, + "grad_norm": 0.6424751461881637, + "learning_rate": 1.7304762935208647e-05, + "loss": 0.494, + "step": 15170 + }, + { + "epoch": 0.2621474979264584, + "grad_norm": 0.8537173070108885, + "learning_rate": 1.7304380717806716e-05, + "loss": 0.3853, + "step": 15171 + }, + { + "epoch": 0.26216477743986727, + "grad_norm": 0.9001593785048952, + "learning_rate": 1.7303998477526913e-05, + "loss": 0.6501, + "step": 15172 + }, + { + "epoch": 0.2621820569532762, + "grad_norm": 1.373922742993178, + "learning_rate": 1.730361621437043e-05, + "loss": 0.64, + "step": 15173 + }, + { + "epoch": 0.2621993364666851, + "grad_norm": 1.3812832046250596, + "learning_rate": 1.730323392833847e-05, + "loss": 0.6211, + "step": 15174 + }, + { + "epoch": 0.262216615980094, + "grad_norm": 1.4081914738307078, + "learning_rate": 1.730285161943222e-05, + "loss": 0.5557, + "step": 15175 + }, + { + "epoch": 0.2622338954935029, + "grad_norm": 1.0431976290956118, + "learning_rate": 1.7302469287652885e-05, + "loss": 0.5393, + "step": 15176 + }, + { + "epoch": 0.2622511750069118, + "grad_norm": 1.0915489476641467, + "learning_rate": 1.7302086933001665e-05, + "loss": 0.3846, + "step": 15177 + }, + { + "epoch": 0.2622684545203207, + "grad_norm": 1.0941368458902003, + "learning_rate": 1.7301704555479753e-05, + "loss": 0.5683, + "step": 15178 + }, + { + "epoch": 0.2622857340337296, + "grad_norm": 1.382098407690067, + "learning_rate": 1.7301322155088345e-05, + "loss": 0.5087, + "step": 15179 + }, + { + "epoch": 0.26230301354713853, + "grad_norm": 0.9319749945596201, + "learning_rate": 1.7300939731828642e-05, + "loss": 0.6881, + "step": 15180 + }, + { + "epoch": 0.26232029306054744, + "grad_norm": 1.1295584012756261, + "learning_rate": 1.730055728570184e-05, + "loss": 0.4227, + "step": 15181 + }, + { + "epoch": 0.2623375725739563, + "grad_norm": 0.9387995540486503, + "learning_rate": 1.730017481670914e-05, + "loss": 0.6361, + "step": 15182 + }, + { + "epoch": 0.2623548520873652, + "grad_norm": 0.4558447606633431, + "learning_rate": 1.7299792324851736e-05, + "loss": 0.6004, + "step": 15183 + }, + { + "epoch": 0.2623721316007741, + "grad_norm": 0.9286368816746967, + "learning_rate": 1.729940981013083e-05, + "loss": 0.4578, + "step": 15184 + }, + { + "epoch": 0.262389411114183, + "grad_norm": 0.433320116799537, + "learning_rate": 1.7299027272547615e-05, + "loss": 0.7903, + "step": 15185 + }, + { + "epoch": 0.2624066906275919, + "grad_norm": 0.7502388313761644, + "learning_rate": 1.7298644712103293e-05, + "loss": 0.4521, + "step": 15186 + }, + { + "epoch": 0.26242397014100083, + "grad_norm": 0.7145895005365234, + "learning_rate": 1.729826212879906e-05, + "loss": 0.6226, + "step": 15187 + }, + { + "epoch": 0.26244124965440974, + "grad_norm": 1.0117557891045814, + "learning_rate": 1.7297879522636115e-05, + "loss": 0.5215, + "step": 15188 + }, + { + "epoch": 0.26245852916781864, + "grad_norm": 0.6220117051646612, + "learning_rate": 1.7297496893615657e-05, + "loss": 0.5229, + "step": 15189 + }, + { + "epoch": 0.26247580868122755, + "grad_norm": 1.1181705832065347, + "learning_rate": 1.7297114241738886e-05, + "loss": 0.5568, + "step": 15190 + }, + { + "epoch": 0.26249308819463646, + "grad_norm": 0.8873132872112143, + "learning_rate": 1.7296731567006997e-05, + "loss": 0.4558, + "step": 15191 + }, + { + "epoch": 0.26251036770804537, + "grad_norm": 0.4078085966095842, + "learning_rate": 1.729634886942119e-05, + "loss": 0.6339, + "step": 15192 + }, + { + "epoch": 0.2625276472214542, + "grad_norm": 1.2639609497574142, + "learning_rate": 1.7295966148982665e-05, + "loss": 0.4373, + "step": 15193 + }, + { + "epoch": 0.2625449267348631, + "grad_norm": 0.9436909027853569, + "learning_rate": 1.729558340569262e-05, + "loss": 0.4646, + "step": 15194 + }, + { + "epoch": 0.26256220624827203, + "grad_norm": 1.0937512422550038, + "learning_rate": 1.729520063955225e-05, + "loss": 0.6076, + "step": 15195 + }, + { + "epoch": 0.26257948576168094, + "grad_norm": 0.38238566257393247, + "learning_rate": 1.7294817850562763e-05, + "loss": 0.6089, + "step": 15196 + }, + { + "epoch": 0.26259676527508985, + "grad_norm": 0.5924746411144354, + "learning_rate": 1.7294435038725348e-05, + "loss": 0.5721, + "step": 15197 + }, + { + "epoch": 0.26261404478849876, + "grad_norm": 1.0775382632325652, + "learning_rate": 1.7294052204041207e-05, + "loss": 0.5018, + "step": 15198 + }, + { + "epoch": 0.26263132430190766, + "grad_norm": 0.94171336117329, + "learning_rate": 1.7293669346511545e-05, + "loss": 0.4661, + "step": 15199 + }, + { + "epoch": 0.2626486038153166, + "grad_norm": 0.8478059503421678, + "learning_rate": 1.7293286466137553e-05, + "loss": 0.4469, + "step": 15200 + }, + { + "epoch": 0.2626658833287255, + "grad_norm": 1.1945937162442173, + "learning_rate": 1.7292903562920434e-05, + "loss": 0.6497, + "step": 15201 + }, + { + "epoch": 0.2626831628421344, + "grad_norm": 1.4433606947444726, + "learning_rate": 1.7292520636861385e-05, + "loss": 0.6878, + "step": 15202 + }, + { + "epoch": 0.2627004423555433, + "grad_norm": 1.1870498583542326, + "learning_rate": 1.729213768796161e-05, + "loss": 0.5869, + "step": 15203 + }, + { + "epoch": 0.26271772186895215, + "grad_norm": 0.7272120120511477, + "learning_rate": 1.7291754716222305e-05, + "loss": 0.4024, + "step": 15204 + }, + { + "epoch": 0.26273500138236106, + "grad_norm": 0.837066831494282, + "learning_rate": 1.729137172164467e-05, + "loss": 0.4854, + "step": 15205 + }, + { + "epoch": 0.26275228089576996, + "grad_norm": 1.5957216505955572, + "learning_rate": 1.7290988704229902e-05, + "loss": 0.6692, + "step": 15206 + }, + { + "epoch": 0.26276956040917887, + "grad_norm": 1.5267523816341808, + "learning_rate": 1.7290605663979207e-05, + "loss": 0.4968, + "step": 15207 + }, + { + "epoch": 0.2627868399225878, + "grad_norm": 0.6388864178173359, + "learning_rate": 1.7290222600893778e-05, + "loss": 0.5851, + "step": 15208 + }, + { + "epoch": 0.2628041194359967, + "grad_norm": 0.8417647685954086, + "learning_rate": 1.728983951497482e-05, + "loss": 0.6599, + "step": 15209 + }, + { + "epoch": 0.2628213989494056, + "grad_norm": 1.1527077807753274, + "learning_rate": 1.728945640622353e-05, + "loss": 0.3437, + "step": 15210 + }, + { + "epoch": 0.2628386784628145, + "grad_norm": 1.161753532761548, + "learning_rate": 1.728907327464111e-05, + "loss": 0.4296, + "step": 15211 + }, + { + "epoch": 0.2628559579762234, + "grad_norm": 1.4354381910649836, + "learning_rate": 1.7288690120228756e-05, + "loss": 0.5586, + "step": 15212 + }, + { + "epoch": 0.2628732374896323, + "grad_norm": 0.7730092993593946, + "learning_rate": 1.728830694298767e-05, + "loss": 0.4555, + "step": 15213 + }, + { + "epoch": 0.26289051700304117, + "grad_norm": 0.4011668227481283, + "learning_rate": 1.7287923742919056e-05, + "loss": 0.5713, + "step": 15214 + }, + { + "epoch": 0.2629077965164501, + "grad_norm": 0.7131721749554227, + "learning_rate": 1.7287540520024107e-05, + "loss": 0.5903, + "step": 15215 + }, + { + "epoch": 0.262925076029859, + "grad_norm": 1.376777855435342, + "learning_rate": 1.7287157274304032e-05, + "loss": 0.6162, + "step": 15216 + }, + { + "epoch": 0.2629423555432679, + "grad_norm": 1.0170448667809975, + "learning_rate": 1.7286774005760025e-05, + "loss": 0.5738, + "step": 15217 + }, + { + "epoch": 0.2629596350566768, + "grad_norm": 1.3120905699441707, + "learning_rate": 1.7286390714393287e-05, + "loss": 0.766, + "step": 15218 + }, + { + "epoch": 0.2629769145700857, + "grad_norm": 0.892952159159918, + "learning_rate": 1.728600740020502e-05, + "loss": 0.4128, + "step": 15219 + }, + { + "epoch": 0.2629941940834946, + "grad_norm": 0.8160289306236554, + "learning_rate": 1.7285624063196422e-05, + "loss": 0.4561, + "step": 15220 + }, + { + "epoch": 0.2630114735969035, + "grad_norm": 1.1834044958662908, + "learning_rate": 1.72852407033687e-05, + "loss": 0.5372, + "step": 15221 + }, + { + "epoch": 0.26302875311031243, + "grad_norm": 1.389687665654405, + "learning_rate": 1.728485732072305e-05, + "loss": 0.4949, + "step": 15222 + }, + { + "epoch": 0.26304603262372134, + "grad_norm": 1.1228456654926264, + "learning_rate": 1.7284473915260668e-05, + "loss": 0.5974, + "step": 15223 + }, + { + "epoch": 0.26306331213713025, + "grad_norm": 1.0939126852077512, + "learning_rate": 1.7284090486982764e-05, + "loss": 0.5733, + "step": 15224 + }, + { + "epoch": 0.2630805916505391, + "grad_norm": 0.9164464593739325, + "learning_rate": 1.7283707035890532e-05, + "loss": 0.6349, + "step": 15225 + }, + { + "epoch": 0.263097871163948, + "grad_norm": 0.7951034825142164, + "learning_rate": 1.728332356198518e-05, + "loss": 0.5163, + "step": 15226 + }, + { + "epoch": 0.2631151506773569, + "grad_norm": 1.0144894390100214, + "learning_rate": 1.7282940065267903e-05, + "loss": 0.8409, + "step": 15227 + }, + { + "epoch": 0.2631324301907658, + "grad_norm": 0.6746157263245618, + "learning_rate": 1.7282556545739905e-05, + "loss": 0.2843, + "step": 15228 + }, + { + "epoch": 0.26314970970417473, + "grad_norm": 1.4690502742776252, + "learning_rate": 1.7282173003402384e-05, + "loss": 0.6182, + "step": 15229 + }, + { + "epoch": 0.26316698921758364, + "grad_norm": 1.0755379646575856, + "learning_rate": 1.728178943825655e-05, + "loss": 0.7518, + "step": 15230 + }, + { + "epoch": 0.26318426873099254, + "grad_norm": 1.2525840346630939, + "learning_rate": 1.7281405850303593e-05, + "loss": 0.5552, + "step": 15231 + }, + { + "epoch": 0.26320154824440145, + "grad_norm": 0.6607175458487089, + "learning_rate": 1.7281022239544722e-05, + "loss": 0.4035, + "step": 15232 + }, + { + "epoch": 0.26321882775781036, + "grad_norm": 0.8487769044994306, + "learning_rate": 1.7280638605981132e-05, + "loss": 0.5938, + "step": 15233 + }, + { + "epoch": 0.26323610727121927, + "grad_norm": 0.8126960428572518, + "learning_rate": 1.7280254949614033e-05, + "loss": 0.7527, + "step": 15234 + }, + { + "epoch": 0.2632533867846281, + "grad_norm": 1.2237280322834796, + "learning_rate": 1.727987127044462e-05, + "loss": 0.6891, + "step": 15235 + }, + { + "epoch": 0.263270666298037, + "grad_norm": 1.1708902098310934, + "learning_rate": 1.7279487568474098e-05, + "loss": 0.3456, + "step": 15236 + }, + { + "epoch": 0.26328794581144593, + "grad_norm": 2.011475351538925, + "learning_rate": 1.7279103843703667e-05, + "loss": 0.5739, + "step": 15237 + }, + { + "epoch": 0.26330522532485484, + "grad_norm": 0.6315861715668679, + "learning_rate": 1.727872009613453e-05, + "loss": 0.6257, + "step": 15238 + }, + { + "epoch": 0.26332250483826375, + "grad_norm": 1.1528278718916485, + "learning_rate": 1.7278336325767888e-05, + "loss": 0.5554, + "step": 15239 + }, + { + "epoch": 0.26333978435167266, + "grad_norm": 1.257411347180205, + "learning_rate": 1.7277952532604944e-05, + "loss": 0.6701, + "step": 15240 + }, + { + "epoch": 0.26335706386508156, + "grad_norm": 1.1617653084345665, + "learning_rate": 1.7277568716646897e-05, + "loss": 0.6027, + "step": 15241 + }, + { + "epoch": 0.26337434337849047, + "grad_norm": 1.3482419203580085, + "learning_rate": 1.7277184877894956e-05, + "loss": 0.4777, + "step": 15242 + }, + { + "epoch": 0.2633916228918994, + "grad_norm": 0.5778647009468768, + "learning_rate": 1.7276801016350314e-05, + "loss": 0.4443, + "step": 15243 + }, + { + "epoch": 0.2634089024053083, + "grad_norm": 0.7582026010944941, + "learning_rate": 1.7276417132014184e-05, + "loss": 0.4166, + "step": 15244 + }, + { + "epoch": 0.2634261819187172, + "grad_norm": 1.327441249851333, + "learning_rate": 1.7276033224887755e-05, + "loss": 0.6651, + "step": 15245 + }, + { + "epoch": 0.26344346143212605, + "grad_norm": 0.46472318725914913, + "learning_rate": 1.7275649294972243e-05, + "loss": 0.6128, + "step": 15246 + }, + { + "epoch": 0.26346074094553495, + "grad_norm": 0.9797251824619178, + "learning_rate": 1.7275265342268844e-05, + "loss": 0.5233, + "step": 15247 + }, + { + "epoch": 0.26347802045894386, + "grad_norm": 0.6945203354135496, + "learning_rate": 1.727488136677876e-05, + "loss": 0.5117, + "step": 15248 + }, + { + "epoch": 0.26349529997235277, + "grad_norm": 1.365879770009351, + "learning_rate": 1.7274497368503192e-05, + "loss": 0.6375, + "step": 15249 + }, + { + "epoch": 0.2635125794857617, + "grad_norm": 0.8347703648388018, + "learning_rate": 1.727411334744335e-05, + "loss": 0.4477, + "step": 15250 + }, + { + "epoch": 0.2635298589991706, + "grad_norm": 0.8215128047907998, + "learning_rate": 1.7273729303600433e-05, + "loss": 0.4631, + "step": 15251 + }, + { + "epoch": 0.2635471385125795, + "grad_norm": 0.9371677611263705, + "learning_rate": 1.7273345236975637e-05, + "loss": 0.4912, + "step": 15252 + }, + { + "epoch": 0.2635644180259884, + "grad_norm": 0.7604467322295134, + "learning_rate": 1.7272961147570177e-05, + "loss": 0.415, + "step": 15253 + }, + { + "epoch": 0.2635816975393973, + "grad_norm": 0.7940209164222073, + "learning_rate": 1.7272577035385247e-05, + "loss": 0.496, + "step": 15254 + }, + { + "epoch": 0.2635989770528062, + "grad_norm": 0.7924972899229671, + "learning_rate": 1.7272192900422052e-05, + "loss": 0.38, + "step": 15255 + }, + { + "epoch": 0.26361625656621507, + "grad_norm": 1.0092695897256587, + "learning_rate": 1.7271808742681796e-05, + "loss": 0.4227, + "step": 15256 + }, + { + "epoch": 0.263633536079624, + "grad_norm": 0.9690660411659502, + "learning_rate": 1.7271424562165685e-05, + "loss": 0.5276, + "step": 15257 + }, + { + "epoch": 0.2636508155930329, + "grad_norm": 1.2671916300717259, + "learning_rate": 1.727104035887492e-05, + "loss": 0.4265, + "step": 15258 + }, + { + "epoch": 0.2636680951064418, + "grad_norm": 0.6627357375607206, + "learning_rate": 1.72706561328107e-05, + "loss": 0.269, + "step": 15259 + }, + { + "epoch": 0.2636853746198507, + "grad_norm": 1.5018503325314057, + "learning_rate": 1.7270271883974236e-05, + "loss": 0.4747, + "step": 15260 + }, + { + "epoch": 0.2637026541332596, + "grad_norm": 1.3627599442278648, + "learning_rate": 1.7269887612366728e-05, + "loss": 0.5775, + "step": 15261 + }, + { + "epoch": 0.2637199336466685, + "grad_norm": 0.9219855461536585, + "learning_rate": 1.7269503317989378e-05, + "loss": 0.7367, + "step": 15262 + }, + { + "epoch": 0.2637372131600774, + "grad_norm": 1.4540898800124507, + "learning_rate": 1.726911900084339e-05, + "loss": 0.7427, + "step": 15263 + }, + { + "epoch": 0.26375449267348633, + "grad_norm": 0.47408810911366095, + "learning_rate": 1.7268734660929972e-05, + "loss": 0.518, + "step": 15264 + }, + { + "epoch": 0.26377177218689524, + "grad_norm": 1.172183493770127, + "learning_rate": 1.726835029825032e-05, + "loss": 0.6143, + "step": 15265 + }, + { + "epoch": 0.26378905170030414, + "grad_norm": 0.9483005983086535, + "learning_rate": 1.7267965912805647e-05, + "loss": 0.6571, + "step": 15266 + }, + { + "epoch": 0.263806331213713, + "grad_norm": 0.9043653102000881, + "learning_rate": 1.7267581504597153e-05, + "loss": 0.7812, + "step": 15267 + }, + { + "epoch": 0.2638236107271219, + "grad_norm": 0.592434996682576, + "learning_rate": 1.726719707362604e-05, + "loss": 0.4243, + "step": 15268 + }, + { + "epoch": 0.2638408902405308, + "grad_norm": 1.0605958827767898, + "learning_rate": 1.7266812619893512e-05, + "loss": 0.4146, + "step": 15269 + }, + { + "epoch": 0.2638581697539397, + "grad_norm": 0.8263938629801926, + "learning_rate": 1.7266428143400775e-05, + "loss": 0.4263, + "step": 15270 + }, + { + "epoch": 0.2638754492673486, + "grad_norm": 1.191269572968865, + "learning_rate": 1.7266043644149032e-05, + "loss": 0.5906, + "step": 15271 + }, + { + "epoch": 0.26389272878075754, + "grad_norm": 1.1674444230287866, + "learning_rate": 1.7265659122139492e-05, + "loss": 0.5248, + "step": 15272 + }, + { + "epoch": 0.26391000829416644, + "grad_norm": 1.2534357823200541, + "learning_rate": 1.7265274577373352e-05, + "loss": 0.6159, + "step": 15273 + }, + { + "epoch": 0.26392728780757535, + "grad_norm": 0.6275616917349398, + "learning_rate": 1.726489000985182e-05, + "loss": 0.5902, + "step": 15274 + }, + { + "epoch": 0.26394456732098426, + "grad_norm": 1.2041500944852555, + "learning_rate": 1.72645054195761e-05, + "loss": 0.4324, + "step": 15275 + }, + { + "epoch": 0.26396184683439317, + "grad_norm": 0.7985610525662143, + "learning_rate": 1.72641208065474e-05, + "loss": 0.5227, + "step": 15276 + }, + { + "epoch": 0.2639791263478021, + "grad_norm": 1.777882615626039, + "learning_rate": 1.7263736170766918e-05, + "loss": 0.5029, + "step": 15277 + }, + { + "epoch": 0.2639964058612109, + "grad_norm": 0.8514587544906669, + "learning_rate": 1.7263351512235864e-05, + "loss": 0.3419, + "step": 15278 + }, + { + "epoch": 0.26401368537461983, + "grad_norm": 0.45509150528964704, + "learning_rate": 1.7262966830955443e-05, + "loss": 0.606, + "step": 15279 + }, + { + "epoch": 0.26403096488802874, + "grad_norm": 1.0606723528373974, + "learning_rate": 1.7262582126926854e-05, + "loss": 0.3778, + "step": 15280 + }, + { + "epoch": 0.26404824440143765, + "grad_norm": 1.2740170181875679, + "learning_rate": 1.7262197400151308e-05, + "loss": 0.4876, + "step": 15281 + }, + { + "epoch": 0.26406552391484656, + "grad_norm": 1.5271375156183418, + "learning_rate": 1.726181265063001e-05, + "loss": 0.6372, + "step": 15282 + }, + { + "epoch": 0.26408280342825546, + "grad_norm": 0.5553306116627214, + "learning_rate": 1.7261427878364158e-05, + "loss": 0.35, + "step": 15283 + }, + { + "epoch": 0.26410008294166437, + "grad_norm": 1.5263199542474584, + "learning_rate": 1.7261043083354968e-05, + "loss": 0.4678, + "step": 15284 + }, + { + "epoch": 0.2641173624550733, + "grad_norm": 0.614778617949595, + "learning_rate": 1.726065826560363e-05, + "loss": 0.4669, + "step": 15285 + }, + { + "epoch": 0.2641346419684822, + "grad_norm": 1.4696619048654544, + "learning_rate": 1.7260273425111366e-05, + "loss": 0.5152, + "step": 15286 + }, + { + "epoch": 0.2641519214818911, + "grad_norm": 1.742244549530184, + "learning_rate": 1.7259888561879374e-05, + "loss": 0.4505, + "step": 15287 + }, + { + "epoch": 0.26416920099529995, + "grad_norm": 1.6444880744306045, + "learning_rate": 1.7259503675908857e-05, + "loss": 0.6434, + "step": 15288 + }, + { + "epoch": 0.26418648050870885, + "grad_norm": 0.7518789422460224, + "learning_rate": 1.7259118767201025e-05, + "loss": 0.5459, + "step": 15289 + }, + { + "epoch": 0.26420376002211776, + "grad_norm": 0.8853177540067129, + "learning_rate": 1.7258733835757077e-05, + "loss": 0.6643, + "step": 15290 + }, + { + "epoch": 0.26422103953552667, + "grad_norm": 0.9377492543014729, + "learning_rate": 1.7258348881578226e-05, + "loss": 0.4128, + "step": 15291 + }, + { + "epoch": 0.2642383190489356, + "grad_norm": 0.41739916835005675, + "learning_rate": 1.7257963904665674e-05, + "loss": 0.7509, + "step": 15292 + }, + { + "epoch": 0.2642555985623445, + "grad_norm": 1.0977868284586842, + "learning_rate": 1.725757890502063e-05, + "loss": 0.5259, + "step": 15293 + }, + { + "epoch": 0.2642728780757534, + "grad_norm": 0.6365650022064385, + "learning_rate": 1.7257193882644295e-05, + "loss": 0.5291, + "step": 15294 + }, + { + "epoch": 0.2642901575891623, + "grad_norm": 0.749496409080053, + "learning_rate": 1.7256808837537876e-05, + "loss": 0.326, + "step": 15295 + }, + { + "epoch": 0.2643074371025712, + "grad_norm": 0.7645205121435154, + "learning_rate": 1.7256423769702582e-05, + "loss": 0.5295, + "step": 15296 + }, + { + "epoch": 0.2643247166159801, + "grad_norm": 1.5359884865044109, + "learning_rate": 1.725603867913962e-05, + "loss": 0.5212, + "step": 15297 + }, + { + "epoch": 0.264341996129389, + "grad_norm": 0.750758912007651, + "learning_rate": 1.7255653565850185e-05, + "loss": 0.7229, + "step": 15298 + }, + { + "epoch": 0.2643592756427979, + "grad_norm": 1.3470324345183247, + "learning_rate": 1.72552684298355e-05, + "loss": 0.5175, + "step": 15299 + }, + { + "epoch": 0.2643765551562068, + "grad_norm": 1.151241593634772, + "learning_rate": 1.7254883271096757e-05, + "loss": 0.436, + "step": 15300 + }, + { + "epoch": 0.2643938346696157, + "grad_norm": 1.277158984921713, + "learning_rate": 1.725449808963517e-05, + "loss": 0.6108, + "step": 15301 + }, + { + "epoch": 0.2644111141830246, + "grad_norm": 0.9261667308964459, + "learning_rate": 1.7254112885451947e-05, + "loss": 0.6806, + "step": 15302 + }, + { + "epoch": 0.2644283936964335, + "grad_norm": 1.0753449847028314, + "learning_rate": 1.7253727658548287e-05, + "loss": 0.6012, + "step": 15303 + }, + { + "epoch": 0.2644456732098424, + "grad_norm": 0.8456904333136914, + "learning_rate": 1.7253342408925404e-05, + "loss": 0.6605, + "step": 15304 + }, + { + "epoch": 0.2644629527232513, + "grad_norm": 0.919623339323188, + "learning_rate": 1.7252957136584498e-05, + "loss": 0.4651, + "step": 15305 + }, + { + "epoch": 0.26448023223666023, + "grad_norm": 1.070962517308401, + "learning_rate": 1.725257184152678e-05, + "loss": 0.7285, + "step": 15306 + }, + { + "epoch": 0.26449751175006914, + "grad_norm": 0.7634535474990923, + "learning_rate": 1.7252186523753457e-05, + "loss": 0.4186, + "step": 15307 + }, + { + "epoch": 0.26451479126347804, + "grad_norm": 1.6074321181268523, + "learning_rate": 1.7251801183265735e-05, + "loss": 0.5242, + "step": 15308 + }, + { + "epoch": 0.2645320707768869, + "grad_norm": 0.9268601725362154, + "learning_rate": 1.725141582006482e-05, + "loss": 0.7442, + "step": 15309 + }, + { + "epoch": 0.2645493502902958, + "grad_norm": 0.3862341161612959, + "learning_rate": 1.725103043415192e-05, + "loss": 0.6126, + "step": 15310 + }, + { + "epoch": 0.2645666298037047, + "grad_norm": 1.3170243129306414, + "learning_rate": 1.7250645025528242e-05, + "loss": 0.5452, + "step": 15311 + }, + { + "epoch": 0.2645839093171136, + "grad_norm": 1.0766124593207524, + "learning_rate": 1.7250259594194992e-05, + "loss": 0.4897, + "step": 15312 + }, + { + "epoch": 0.2646011888305225, + "grad_norm": 1.1348323019044124, + "learning_rate": 1.7249874140153376e-05, + "loss": 0.6058, + "step": 15313 + }, + { + "epoch": 0.26461846834393143, + "grad_norm": 0.9076292609670014, + "learning_rate": 1.724948866340461e-05, + "loss": 0.3869, + "step": 15314 + }, + { + "epoch": 0.26463574785734034, + "grad_norm": 1.4045422288507414, + "learning_rate": 1.7249103163949887e-05, + "loss": 0.4746, + "step": 15315 + }, + { + "epoch": 0.26465302737074925, + "grad_norm": 1.227765481420623, + "learning_rate": 1.7248717641790423e-05, + "loss": 0.5469, + "step": 15316 + }, + { + "epoch": 0.26467030688415816, + "grad_norm": 1.0873251722697408, + "learning_rate": 1.7248332096927427e-05, + "loss": 0.4828, + "step": 15317 + }, + { + "epoch": 0.26468758639756707, + "grad_norm": 1.2688447673769292, + "learning_rate": 1.7247946529362104e-05, + "loss": 0.4449, + "step": 15318 + }, + { + "epoch": 0.264704865910976, + "grad_norm": 1.7903687320741304, + "learning_rate": 1.724756093909566e-05, + "loss": 0.6019, + "step": 15319 + }, + { + "epoch": 0.2647221454243848, + "grad_norm": 0.7264698587263722, + "learning_rate": 1.7247175326129302e-05, + "loss": 0.8254, + "step": 15320 + }, + { + "epoch": 0.26473942493779373, + "grad_norm": 0.659927749761475, + "learning_rate": 1.7246789690464245e-05, + "loss": 0.3806, + "step": 15321 + }, + { + "epoch": 0.26475670445120264, + "grad_norm": 0.9127424966659723, + "learning_rate": 1.724640403210169e-05, + "loss": 0.3908, + "step": 15322 + }, + { + "epoch": 0.26477398396461155, + "grad_norm": 0.999738592324007, + "learning_rate": 1.7246018351042847e-05, + "loss": 0.462, + "step": 15323 + }, + { + "epoch": 0.26479126347802046, + "grad_norm": 0.949414299959487, + "learning_rate": 1.7245632647288925e-05, + "loss": 0.5185, + "step": 15324 + }, + { + "epoch": 0.26480854299142936, + "grad_norm": 0.7826248149938683, + "learning_rate": 1.724524692084113e-05, + "loss": 0.4376, + "step": 15325 + }, + { + "epoch": 0.26482582250483827, + "grad_norm": 0.6755949705494343, + "learning_rate": 1.724486117170067e-05, + "loss": 0.5406, + "step": 15326 + }, + { + "epoch": 0.2648431020182472, + "grad_norm": 0.8967559591100155, + "learning_rate": 1.7244475399868753e-05, + "loss": 0.4732, + "step": 15327 + }, + { + "epoch": 0.2648603815316561, + "grad_norm": 0.8365518761787539, + "learning_rate": 1.7244089605346593e-05, + "loss": 0.5455, + "step": 15328 + }, + { + "epoch": 0.264877661045065, + "grad_norm": 1.206823718463061, + "learning_rate": 1.7243703788135393e-05, + "loss": 0.5484, + "step": 15329 + }, + { + "epoch": 0.26489494055847385, + "grad_norm": 1.7033510218720964, + "learning_rate": 1.7243317948236357e-05, + "loss": 0.6295, + "step": 15330 + }, + { + "epoch": 0.26491222007188275, + "grad_norm": 1.8160125003427001, + "learning_rate": 1.7242932085650706e-05, + "loss": 0.6224, + "step": 15331 + }, + { + "epoch": 0.26492949958529166, + "grad_norm": 0.7884451373889942, + "learning_rate": 1.7242546200379635e-05, + "loss": 0.3976, + "step": 15332 + }, + { + "epoch": 0.26494677909870057, + "grad_norm": 1.2227688755050763, + "learning_rate": 1.7242160292424362e-05, + "loss": 0.8149, + "step": 15333 + }, + { + "epoch": 0.2649640586121095, + "grad_norm": 1.5536773331985492, + "learning_rate": 1.7241774361786092e-05, + "loss": 0.4658, + "step": 15334 + }, + { + "epoch": 0.2649813381255184, + "grad_norm": 1.2341608123209287, + "learning_rate": 1.7241388408466034e-05, + "loss": 0.3148, + "step": 15335 + }, + { + "epoch": 0.2649986176389273, + "grad_norm": 1.2058757633551276, + "learning_rate": 1.72410024324654e-05, + "loss": 0.4501, + "step": 15336 + }, + { + "epoch": 0.2650158971523362, + "grad_norm": 0.9897007004591408, + "learning_rate": 1.7240616433785392e-05, + "loss": 0.5523, + "step": 15337 + }, + { + "epoch": 0.2650331766657451, + "grad_norm": 0.771258242549107, + "learning_rate": 1.7240230412427223e-05, + "loss": 0.3529, + "step": 15338 + }, + { + "epoch": 0.265050456179154, + "grad_norm": 0.8478888427106043, + "learning_rate": 1.7239844368392106e-05, + "loss": 0.7102, + "step": 15339 + }, + { + "epoch": 0.2650677356925629, + "grad_norm": 1.2639435928532823, + "learning_rate": 1.723945830168124e-05, + "loss": 0.5401, + "step": 15340 + }, + { + "epoch": 0.2650850152059718, + "grad_norm": 1.3408199524061504, + "learning_rate": 1.7239072212295848e-05, + "loss": 0.6194, + "step": 15341 + }, + { + "epoch": 0.2651022947193807, + "grad_norm": 0.5890169196469244, + "learning_rate": 1.7238686100237127e-05, + "loss": 0.3605, + "step": 15342 + }, + { + "epoch": 0.2651195742327896, + "grad_norm": 0.76517623668224, + "learning_rate": 1.723829996550629e-05, + "loss": 0.6427, + "step": 15343 + }, + { + "epoch": 0.2651368537461985, + "grad_norm": 1.3927169519820004, + "learning_rate": 1.723791380810455e-05, + "loss": 0.5791, + "step": 15344 + }, + { + "epoch": 0.2651541332596074, + "grad_norm": 0.9969180045059203, + "learning_rate": 1.7237527628033113e-05, + "loss": 0.5535, + "step": 15345 + }, + { + "epoch": 0.2651714127730163, + "grad_norm": 1.097252808585259, + "learning_rate": 1.7237141425293193e-05, + "loss": 0.5261, + "step": 15346 + }, + { + "epoch": 0.2651886922864252, + "grad_norm": 0.9606951707334604, + "learning_rate": 1.723675519988599e-05, + "loss": 0.4567, + "step": 15347 + }, + { + "epoch": 0.26520597179983413, + "grad_norm": 1.213262613241413, + "learning_rate": 1.7236368951812724e-05, + "loss": 0.3692, + "step": 15348 + }, + { + "epoch": 0.26522325131324304, + "grad_norm": 0.5486702485807171, + "learning_rate": 1.7235982681074598e-05, + "loss": 0.268, + "step": 15349 + }, + { + "epoch": 0.26524053082665194, + "grad_norm": 1.0447587053154443, + "learning_rate": 1.7235596387672825e-05, + "loss": 0.5187, + "step": 15350 + }, + { + "epoch": 0.26525781034006085, + "grad_norm": 1.064494709679941, + "learning_rate": 1.7235210071608617e-05, + "loss": 0.4837, + "step": 15351 + }, + { + "epoch": 0.2652750898534697, + "grad_norm": 1.473602823646776, + "learning_rate": 1.7234823732883178e-05, + "loss": 0.4593, + "step": 15352 + }, + { + "epoch": 0.2652923693668786, + "grad_norm": 0.9440832950419676, + "learning_rate": 1.7234437371497724e-05, + "loss": 0.6229, + "step": 15353 + }, + { + "epoch": 0.2653096488802875, + "grad_norm": 1.0873341727321695, + "learning_rate": 1.7234050987453457e-05, + "loss": 0.4146, + "step": 15354 + }, + { + "epoch": 0.2653269283936964, + "grad_norm": 0.7028537530406913, + "learning_rate": 1.7233664580751598e-05, + "loss": 0.4642, + "step": 15355 + }, + { + "epoch": 0.26534420790710533, + "grad_norm": 0.6639040366988964, + "learning_rate": 1.7233278151393353e-05, + "loss": 0.4874, + "step": 15356 + }, + { + "epoch": 0.26536148742051424, + "grad_norm": 1.3649611955025756, + "learning_rate": 1.7232891699379927e-05, + "loss": 0.4981, + "step": 15357 + }, + { + "epoch": 0.26537876693392315, + "grad_norm": 0.416308076956182, + "learning_rate": 1.7232505224712534e-05, + "loss": 0.7032, + "step": 15358 + }, + { + "epoch": 0.26539604644733206, + "grad_norm": 0.4542966393203689, + "learning_rate": 1.723211872739239e-05, + "loss": 0.8552, + "step": 15359 + }, + { + "epoch": 0.26541332596074096, + "grad_norm": 0.7407107117623378, + "learning_rate": 1.7231732207420693e-05, + "loss": 0.4577, + "step": 15360 + }, + { + "epoch": 0.26543060547414987, + "grad_norm": 0.7695358347155808, + "learning_rate": 1.7231345664798666e-05, + "loss": 0.4587, + "step": 15361 + }, + { + "epoch": 0.2654478849875587, + "grad_norm": 0.834843685595701, + "learning_rate": 1.7230959099527512e-05, + "loss": 0.7259, + "step": 15362 + }, + { + "epoch": 0.26546516450096763, + "grad_norm": 0.6065189124026052, + "learning_rate": 1.7230572511608446e-05, + "loss": 0.545, + "step": 15363 + }, + { + "epoch": 0.26548244401437654, + "grad_norm": 0.6949842850236152, + "learning_rate": 1.723018590104268e-05, + "loss": 0.3717, + "step": 15364 + }, + { + "epoch": 0.26549972352778545, + "grad_norm": 1.4225476072510472, + "learning_rate": 1.722979926783142e-05, + "loss": 0.6525, + "step": 15365 + }, + { + "epoch": 0.26551700304119435, + "grad_norm": 0.8617617717399628, + "learning_rate": 1.722941261197588e-05, + "loss": 0.5029, + "step": 15366 + }, + { + "epoch": 0.26553428255460326, + "grad_norm": 1.1365993555616374, + "learning_rate": 1.722902593347727e-05, + "loss": 0.4602, + "step": 15367 + }, + { + "epoch": 0.26555156206801217, + "grad_norm": 1.272701916669702, + "learning_rate": 1.72286392323368e-05, + "loss": 0.7401, + "step": 15368 + }, + { + "epoch": 0.2655688415814211, + "grad_norm": 1.2687182424916872, + "learning_rate": 1.7228252508555682e-05, + "loss": 0.4395, + "step": 15369 + }, + { + "epoch": 0.26558612109483, + "grad_norm": 0.9876921086146099, + "learning_rate": 1.7227865762135132e-05, + "loss": 0.358, + "step": 15370 + }, + { + "epoch": 0.2656034006082389, + "grad_norm": 1.1706288780063414, + "learning_rate": 1.7227478993076353e-05, + "loss": 0.606, + "step": 15371 + }, + { + "epoch": 0.2656206801216478, + "grad_norm": 0.8656461038742863, + "learning_rate": 1.7227092201380563e-05, + "loss": 0.4488, + "step": 15372 + }, + { + "epoch": 0.26563795963505665, + "grad_norm": 1.197468534466042, + "learning_rate": 1.722670538704897e-05, + "loss": 0.6149, + "step": 15373 + }, + { + "epoch": 0.26565523914846556, + "grad_norm": 0.6674552066394094, + "learning_rate": 1.7226318550082787e-05, + "loss": 0.5179, + "step": 15374 + }, + { + "epoch": 0.26567251866187447, + "grad_norm": 0.9885958359061521, + "learning_rate": 1.7225931690483226e-05, + "loss": 0.4637, + "step": 15375 + }, + { + "epoch": 0.2656897981752834, + "grad_norm": 0.8376535773958843, + "learning_rate": 1.7225544808251496e-05, + "loss": 0.5046, + "step": 15376 + }, + { + "epoch": 0.2657070776886923, + "grad_norm": 0.866982954942823, + "learning_rate": 1.7225157903388812e-05, + "loss": 0.4781, + "step": 15377 + }, + { + "epoch": 0.2657243572021012, + "grad_norm": 1.1209315979883705, + "learning_rate": 1.7224770975896382e-05, + "loss": 0.6052, + "step": 15378 + }, + { + "epoch": 0.2657416367155101, + "grad_norm": 1.0285383215125377, + "learning_rate": 1.722438402577542e-05, + "loss": 0.4849, + "step": 15379 + }, + { + "epoch": 0.265758916228919, + "grad_norm": 1.6440341087012396, + "learning_rate": 1.722399705302714e-05, + "loss": 0.7755, + "step": 15380 + }, + { + "epoch": 0.2657761957423279, + "grad_norm": 1.2776541346732069, + "learning_rate": 1.722361005765275e-05, + "loss": 0.3843, + "step": 15381 + }, + { + "epoch": 0.2657934752557368, + "grad_norm": 0.7679322348448921, + "learning_rate": 1.722322303965347e-05, + "loss": 0.4189, + "step": 15382 + }, + { + "epoch": 0.2658107547691457, + "grad_norm": 0.8441355770404517, + "learning_rate": 1.7222835999030502e-05, + "loss": 0.4313, + "step": 15383 + }, + { + "epoch": 0.2658280342825546, + "grad_norm": 1.1428619149552597, + "learning_rate": 1.7222448935785064e-05, + "loss": 0.3707, + "step": 15384 + }, + { + "epoch": 0.2658453137959635, + "grad_norm": 0.8668097306603726, + "learning_rate": 1.7222061849918364e-05, + "loss": 0.4302, + "step": 15385 + }, + { + "epoch": 0.2658625933093724, + "grad_norm": 1.163565225285994, + "learning_rate": 1.7221674741431626e-05, + "loss": 0.5749, + "step": 15386 + }, + { + "epoch": 0.2658798728227813, + "grad_norm": 1.0906383499823173, + "learning_rate": 1.7221287610326045e-05, + "loss": 0.5745, + "step": 15387 + }, + { + "epoch": 0.2658971523361902, + "grad_norm": 1.0725760461095053, + "learning_rate": 1.7220900456602844e-05, + "loss": 0.4482, + "step": 15388 + }, + { + "epoch": 0.2659144318495991, + "grad_norm": 0.861003433964517, + "learning_rate": 1.7220513280263237e-05, + "loss": 0.3973, + "step": 15389 + }, + { + "epoch": 0.265931711363008, + "grad_norm": 1.1952833964750478, + "learning_rate": 1.7220126081308433e-05, + "loss": 0.6553, + "step": 15390 + }, + { + "epoch": 0.26594899087641694, + "grad_norm": 1.0450013641413716, + "learning_rate": 1.7219738859739642e-05, + "loss": 0.5068, + "step": 15391 + }, + { + "epoch": 0.26596627038982584, + "grad_norm": 1.0204032895542812, + "learning_rate": 1.7219351615558083e-05, + "loss": 0.598, + "step": 15392 + }, + { + "epoch": 0.26598354990323475, + "grad_norm": 1.4183008754183106, + "learning_rate": 1.721896434876497e-05, + "loss": 0.5919, + "step": 15393 + }, + { + "epoch": 0.2660008294166436, + "grad_norm": 1.5964584879391444, + "learning_rate": 1.7218577059361504e-05, + "loss": 0.4766, + "step": 15394 + }, + { + "epoch": 0.2660181089300525, + "grad_norm": 1.1439287931348296, + "learning_rate": 1.721818974734891e-05, + "loss": 0.5516, + "step": 15395 + }, + { + "epoch": 0.2660353884434614, + "grad_norm": 0.979445958925556, + "learning_rate": 1.72178024127284e-05, + "loss": 0.5735, + "step": 15396 + }, + { + "epoch": 0.2660526679568703, + "grad_norm": 1.0198913965903746, + "learning_rate": 1.721741505550118e-05, + "loss": 0.4825, + "step": 15397 + }, + { + "epoch": 0.26606994747027923, + "grad_norm": 0.6709630981633339, + "learning_rate": 1.7217027675668467e-05, + "loss": 0.4233, + "step": 15398 + }, + { + "epoch": 0.26608722698368814, + "grad_norm": 0.954975989226908, + "learning_rate": 1.721664027323148e-05, + "loss": 0.7424, + "step": 15399 + }, + { + "epoch": 0.26610450649709705, + "grad_norm": 1.4068821563789862, + "learning_rate": 1.7216252848191423e-05, + "loss": 0.5655, + "step": 15400 + }, + { + "epoch": 0.26612178601050596, + "grad_norm": 1.2370513354673742, + "learning_rate": 1.7215865400549514e-05, + "loss": 0.5282, + "step": 15401 + }, + { + "epoch": 0.26613906552391486, + "grad_norm": 0.9945196251368905, + "learning_rate": 1.7215477930306964e-05, + "loss": 0.5216, + "step": 15402 + }, + { + "epoch": 0.26615634503732377, + "grad_norm": 1.5661658562283451, + "learning_rate": 1.7215090437464995e-05, + "loss": 0.6487, + "step": 15403 + }, + { + "epoch": 0.2661736245507326, + "grad_norm": 1.212721471048529, + "learning_rate": 1.721470292202481e-05, + "loss": 0.4743, + "step": 15404 + }, + { + "epoch": 0.26619090406414153, + "grad_norm": 1.0472306407251335, + "learning_rate": 1.721431538398763e-05, + "loss": 0.3875, + "step": 15405 + }, + { + "epoch": 0.26620818357755044, + "grad_norm": 0.4595603622352799, + "learning_rate": 1.721392782335466e-05, + "loss": 0.8059, + "step": 15406 + }, + { + "epoch": 0.26622546309095935, + "grad_norm": 1.2580214295030936, + "learning_rate": 1.7213540240127125e-05, + "loss": 0.6039, + "step": 15407 + }, + { + "epoch": 0.26624274260436825, + "grad_norm": 0.9851038533529363, + "learning_rate": 1.7213152634306235e-05, + "loss": 0.4653, + "step": 15408 + }, + { + "epoch": 0.26626002211777716, + "grad_norm": 1.7333020738594251, + "learning_rate": 1.72127650058932e-05, + "loss": 0.641, + "step": 15409 + }, + { + "epoch": 0.26627730163118607, + "grad_norm": 0.8805004701170643, + "learning_rate": 1.7212377354889236e-05, + "loss": 0.6507, + "step": 15410 + }, + { + "epoch": 0.266294581144595, + "grad_norm": 1.1413777143880395, + "learning_rate": 1.721198968129556e-05, + "loss": 0.5212, + "step": 15411 + }, + { + "epoch": 0.2663118606580039, + "grad_norm": 1.969155259575624, + "learning_rate": 1.7211601985113384e-05, + "loss": 0.6541, + "step": 15412 + }, + { + "epoch": 0.2663291401714128, + "grad_norm": 0.8949406959803954, + "learning_rate": 1.721121426634392e-05, + "loss": 0.5584, + "step": 15413 + }, + { + "epoch": 0.2663464196848217, + "grad_norm": 1.0870316064347936, + "learning_rate": 1.7210826524988387e-05, + "loss": 0.5328, + "step": 15414 + }, + { + "epoch": 0.26636369919823055, + "grad_norm": 0.6022943320425055, + "learning_rate": 1.7210438761047998e-05, + "loss": 0.8575, + "step": 15415 + }, + { + "epoch": 0.26638097871163946, + "grad_norm": 0.7219076076773088, + "learning_rate": 1.721005097452397e-05, + "loss": 0.224, + "step": 15416 + }, + { + "epoch": 0.26639825822504837, + "grad_norm": 1.1427525268812422, + "learning_rate": 1.7209663165417506e-05, + "loss": 0.4764, + "step": 15417 + }, + { + "epoch": 0.2664155377384573, + "grad_norm": 0.7657343644705055, + "learning_rate": 1.7209275333729832e-05, + "loss": 0.5989, + "step": 15418 + }, + { + "epoch": 0.2664328172518662, + "grad_norm": 1.5729831872611075, + "learning_rate": 1.7208887479462165e-05, + "loss": 0.7307, + "step": 15419 + }, + { + "epoch": 0.2664500967652751, + "grad_norm": 0.9052526581759637, + "learning_rate": 1.720849960261571e-05, + "loss": 0.598, + "step": 15420 + }, + { + "epoch": 0.266467376278684, + "grad_norm": 0.6655483845345146, + "learning_rate": 1.7208111703191688e-05, + "loss": 0.5903, + "step": 15421 + }, + { + "epoch": 0.2664846557920929, + "grad_norm": 1.1193080223262326, + "learning_rate": 1.720772378119131e-05, + "loss": 0.6616, + "step": 15422 + }, + { + "epoch": 0.2665019353055018, + "grad_norm": 1.0055924412685844, + "learning_rate": 1.7207335836615797e-05, + "loss": 0.7349, + "step": 15423 + }, + { + "epoch": 0.2665192148189107, + "grad_norm": 1.1142702946583916, + "learning_rate": 1.720694786946636e-05, + "loss": 0.5663, + "step": 15424 + }, + { + "epoch": 0.26653649433231963, + "grad_norm": 0.8279197623315858, + "learning_rate": 1.7206559879744208e-05, + "loss": 0.5267, + "step": 15425 + }, + { + "epoch": 0.2665537738457285, + "grad_norm": 0.4071453262162129, + "learning_rate": 1.720617186745057e-05, + "loss": 0.5685, + "step": 15426 + }, + { + "epoch": 0.2665710533591374, + "grad_norm": 1.2402800184489566, + "learning_rate": 1.7205783832586652e-05, + "loss": 0.6315, + "step": 15427 + }, + { + "epoch": 0.2665883328725463, + "grad_norm": 0.7277075035628499, + "learning_rate": 1.720539577515367e-05, + "loss": 0.5211, + "step": 15428 + }, + { + "epoch": 0.2666056123859552, + "grad_norm": 0.8469248690238662, + "learning_rate": 1.7205007695152843e-05, + "loss": 0.5449, + "step": 15429 + }, + { + "epoch": 0.2666228918993641, + "grad_norm": 1.0104403357867382, + "learning_rate": 1.7204619592585385e-05, + "loss": 0.624, + "step": 15430 + }, + { + "epoch": 0.266640171412773, + "grad_norm": 0.8054361920889219, + "learning_rate": 1.7204231467452508e-05, + "loss": 0.5004, + "step": 15431 + }, + { + "epoch": 0.2666574509261819, + "grad_norm": 0.8205801752519498, + "learning_rate": 1.7203843319755433e-05, + "loss": 0.3936, + "step": 15432 + }, + { + "epoch": 0.26667473043959083, + "grad_norm": 0.8329438217613909, + "learning_rate": 1.7203455149495368e-05, + "loss": 0.3841, + "step": 15433 + }, + { + "epoch": 0.26669200995299974, + "grad_norm": 1.070177630832039, + "learning_rate": 1.720306695667354e-05, + "loss": 0.5272, + "step": 15434 + }, + { + "epoch": 0.26670928946640865, + "grad_norm": 1.1558422034705307, + "learning_rate": 1.7202678741291157e-05, + "loss": 0.4711, + "step": 15435 + }, + { + "epoch": 0.2667265689798175, + "grad_norm": 0.7815092932306322, + "learning_rate": 1.7202290503349436e-05, + "loss": 0.447, + "step": 15436 + }, + { + "epoch": 0.2667438484932264, + "grad_norm": 1.5774634375702625, + "learning_rate": 1.72019022428496e-05, + "loss": 0.6201, + "step": 15437 + }, + { + "epoch": 0.2667611280066353, + "grad_norm": 1.8884898496066393, + "learning_rate": 1.720151395979285e-05, + "loss": 0.6647, + "step": 15438 + }, + { + "epoch": 0.2667784075200442, + "grad_norm": 0.9466610973562578, + "learning_rate": 1.7201125654180413e-05, + "loss": 0.5092, + "step": 15439 + }, + { + "epoch": 0.26679568703345313, + "grad_norm": 0.7685467507008114, + "learning_rate": 1.7200737326013506e-05, + "loss": 0.3759, + "step": 15440 + }, + { + "epoch": 0.26681296654686204, + "grad_norm": 0.593533400074049, + "learning_rate": 1.7200348975293343e-05, + "loss": 0.3285, + "step": 15441 + }, + { + "epoch": 0.26683024606027095, + "grad_norm": 0.8947680680698833, + "learning_rate": 1.7199960602021137e-05, + "loss": 0.4599, + "step": 15442 + }, + { + "epoch": 0.26684752557367986, + "grad_norm": 0.7519287148646402, + "learning_rate": 1.7199572206198106e-05, + "loss": 0.4215, + "step": 15443 + }, + { + "epoch": 0.26686480508708876, + "grad_norm": 0.9933340179797997, + "learning_rate": 1.719918378782547e-05, + "loss": 0.5323, + "step": 15444 + }, + { + "epoch": 0.26688208460049767, + "grad_norm": 0.6889264618129505, + "learning_rate": 1.7198795346904446e-05, + "loss": 0.764, + "step": 15445 + }, + { + "epoch": 0.2668993641139066, + "grad_norm": 0.9413199305931702, + "learning_rate": 1.7198406883436242e-05, + "loss": 0.607, + "step": 15446 + }, + { + "epoch": 0.26691664362731543, + "grad_norm": 1.5050956869617735, + "learning_rate": 1.7198018397422085e-05, + "loss": 0.5412, + "step": 15447 + }, + { + "epoch": 0.26693392314072434, + "grad_norm": 1.0271394474737474, + "learning_rate": 1.7197629888863187e-05, + "loss": 0.3977, + "step": 15448 + }, + { + "epoch": 0.26695120265413325, + "grad_norm": 1.3851876382187283, + "learning_rate": 1.719724135776076e-05, + "loss": 0.4861, + "step": 15449 + }, + { + "epoch": 0.26696848216754215, + "grad_norm": 0.8843988611533204, + "learning_rate": 1.719685280411603e-05, + "loss": 0.4701, + "step": 15450 + }, + { + "epoch": 0.26698576168095106, + "grad_norm": 0.7678706653960421, + "learning_rate": 1.7196464227930212e-05, + "loss": 0.6918, + "step": 15451 + }, + { + "epoch": 0.26700304119435997, + "grad_norm": 0.8094817653214714, + "learning_rate": 1.719607562920452e-05, + "loss": 0.5571, + "step": 15452 + }, + { + "epoch": 0.2670203207077689, + "grad_norm": 1.5357989305317372, + "learning_rate": 1.719568700794017e-05, + "loss": 0.4802, + "step": 15453 + }, + { + "epoch": 0.2670376002211778, + "grad_norm": 1.3521071435958878, + "learning_rate": 1.7195298364138386e-05, + "loss": 0.5645, + "step": 15454 + }, + { + "epoch": 0.2670548797345867, + "grad_norm": 1.0201300530091493, + "learning_rate": 1.7194909697800377e-05, + "loss": 0.4549, + "step": 15455 + }, + { + "epoch": 0.2670721592479956, + "grad_norm": 1.6688409788512415, + "learning_rate": 1.7194521008927365e-05, + "loss": 0.6001, + "step": 15456 + }, + { + "epoch": 0.26708943876140445, + "grad_norm": 0.7115629410813662, + "learning_rate": 1.7194132297520566e-05, + "loss": 0.2591, + "step": 15457 + }, + { + "epoch": 0.26710671827481336, + "grad_norm": 0.8853072206010266, + "learning_rate": 1.7193743563581197e-05, + "loss": 0.6709, + "step": 15458 + }, + { + "epoch": 0.26712399778822227, + "grad_norm": 1.6263043028502437, + "learning_rate": 1.719335480711048e-05, + "loss": 0.6087, + "step": 15459 + }, + { + "epoch": 0.2671412773016312, + "grad_norm": 1.2376039398181635, + "learning_rate": 1.7192966028109625e-05, + "loss": 0.5023, + "step": 15460 + }, + { + "epoch": 0.2671585568150401, + "grad_norm": 0.9101969511772122, + "learning_rate": 1.7192577226579856e-05, + "loss": 0.432, + "step": 15461 + }, + { + "epoch": 0.267175836328449, + "grad_norm": 0.45560525501377747, + "learning_rate": 1.7192188402522388e-05, + "loss": 0.7029, + "step": 15462 + }, + { + "epoch": 0.2671931158418579, + "grad_norm": 0.6823992215963169, + "learning_rate": 1.7191799555938438e-05, + "loss": 0.3773, + "step": 15463 + }, + { + "epoch": 0.2672103953552668, + "grad_norm": 1.2565835001987267, + "learning_rate": 1.7191410686829225e-05, + "loss": 0.4952, + "step": 15464 + }, + { + "epoch": 0.2672276748686757, + "grad_norm": 0.891222724541366, + "learning_rate": 1.719102179519597e-05, + "loss": 0.4005, + "step": 15465 + }, + { + "epoch": 0.2672449543820846, + "grad_norm": 0.9244478600316987, + "learning_rate": 1.7190632881039885e-05, + "loss": 0.402, + "step": 15466 + }, + { + "epoch": 0.26726223389549353, + "grad_norm": 1.135502251966693, + "learning_rate": 1.7190243944362194e-05, + "loss": 0.6859, + "step": 15467 + }, + { + "epoch": 0.2672795134089024, + "grad_norm": 0.9219599062508099, + "learning_rate": 1.7189854985164113e-05, + "loss": 0.5295, + "step": 15468 + }, + { + "epoch": 0.2672967929223113, + "grad_norm": 0.6786648836245157, + "learning_rate": 1.718946600344686e-05, + "loss": 0.4357, + "step": 15469 + }, + { + "epoch": 0.2673140724357202, + "grad_norm": 1.4122613252338079, + "learning_rate": 1.718907699921165e-05, + "loss": 0.4586, + "step": 15470 + }, + { + "epoch": 0.2673313519491291, + "grad_norm": 1.1911188808701398, + "learning_rate": 1.7188687972459707e-05, + "loss": 0.6522, + "step": 15471 + }, + { + "epoch": 0.267348631462538, + "grad_norm": 0.9533629029691585, + "learning_rate": 1.7188298923192245e-05, + "loss": 0.5913, + "step": 15472 + }, + { + "epoch": 0.2673659109759469, + "grad_norm": 0.9309468458996165, + "learning_rate": 1.7187909851410488e-05, + "loss": 0.3991, + "step": 15473 + }, + { + "epoch": 0.2673831904893558, + "grad_norm": 0.4979630641775095, + "learning_rate": 1.718752075711565e-05, + "loss": 0.8017, + "step": 15474 + }, + { + "epoch": 0.26740047000276473, + "grad_norm": 0.43929389383916045, + "learning_rate": 1.7187131640308947e-05, + "loss": 0.6806, + "step": 15475 + }, + { + "epoch": 0.26741774951617364, + "grad_norm": 1.2632768741671498, + "learning_rate": 1.7186742500991603e-05, + "loss": 0.5051, + "step": 15476 + }, + { + "epoch": 0.26743502902958255, + "grad_norm": 0.7449342782511491, + "learning_rate": 1.7186353339164837e-05, + "loss": 0.5525, + "step": 15477 + }, + { + "epoch": 0.26745230854299146, + "grad_norm": 0.9950708882949328, + "learning_rate": 1.7185964154829867e-05, + "loss": 0.3949, + "step": 15478 + }, + { + "epoch": 0.2674695880564003, + "grad_norm": 0.7911230498362268, + "learning_rate": 1.718557494798791e-05, + "loss": 0.3676, + "step": 15479 + }, + { + "epoch": 0.2674868675698092, + "grad_norm": 0.39287533073382724, + "learning_rate": 1.7185185718640186e-05, + "loss": 0.5646, + "step": 15480 + }, + { + "epoch": 0.2675041470832181, + "grad_norm": 0.9064398659288514, + "learning_rate": 1.718479646678791e-05, + "loss": 0.6207, + "step": 15481 + }, + { + "epoch": 0.26752142659662703, + "grad_norm": 0.9105308365338028, + "learning_rate": 1.718440719243231e-05, + "loss": 0.4957, + "step": 15482 + }, + { + "epoch": 0.26753870611003594, + "grad_norm": 0.8445436838437604, + "learning_rate": 1.71840178955746e-05, + "loss": 0.4477, + "step": 15483 + }, + { + "epoch": 0.26755598562344485, + "grad_norm": 0.7910176377105318, + "learning_rate": 1.7183628576216e-05, + "loss": 0.5197, + "step": 15484 + }, + { + "epoch": 0.26757326513685376, + "grad_norm": 1.7715252652330185, + "learning_rate": 1.718323923435773e-05, + "loss": 0.6525, + "step": 15485 + }, + { + "epoch": 0.26759054465026266, + "grad_norm": 1.538665636640024, + "learning_rate": 1.718284987000101e-05, + "loss": 0.614, + "step": 15486 + }, + { + "epoch": 0.26760782416367157, + "grad_norm": 0.76728645678048, + "learning_rate": 1.7182460483147055e-05, + "loss": 0.6375, + "step": 15487 + }, + { + "epoch": 0.2676251036770805, + "grad_norm": 0.7633588743007982, + "learning_rate": 1.718207107379709e-05, + "loss": 0.5037, + "step": 15488 + }, + { + "epoch": 0.26764238319048933, + "grad_norm": 1.1383449472911533, + "learning_rate": 1.7181681641952335e-05, + "loss": 0.4648, + "step": 15489 + }, + { + "epoch": 0.26765966270389824, + "grad_norm": 0.5951698991541403, + "learning_rate": 1.7181292187614002e-05, + "loss": 0.6847, + "step": 15490 + }, + { + "epoch": 0.26767694221730715, + "grad_norm": 0.9610789295200619, + "learning_rate": 1.718090271078332e-05, + "loss": 0.6141, + "step": 15491 + }, + { + "epoch": 0.26769422173071605, + "grad_norm": 0.9582349485954006, + "learning_rate": 1.7180513211461502e-05, + "loss": 0.5306, + "step": 15492 + }, + { + "epoch": 0.26771150124412496, + "grad_norm": 1.4895642928573696, + "learning_rate": 1.718012368964977e-05, + "loss": 0.6579, + "step": 15493 + }, + { + "epoch": 0.26772878075753387, + "grad_norm": 0.8540230112638494, + "learning_rate": 1.717973414534935e-05, + "loss": 0.4924, + "step": 15494 + }, + { + "epoch": 0.2677460602709428, + "grad_norm": 0.829253992777169, + "learning_rate": 1.7179344578561457e-05, + "loss": 0.4128, + "step": 15495 + }, + { + "epoch": 0.2677633397843517, + "grad_norm": 0.7351574604737077, + "learning_rate": 1.7178954989287306e-05, + "loss": 0.5846, + "step": 15496 + }, + { + "epoch": 0.2677806192977606, + "grad_norm": 0.4337911256894302, + "learning_rate": 1.7178565377528124e-05, + "loss": 0.6369, + "step": 15497 + }, + { + "epoch": 0.2677978988111695, + "grad_norm": 1.1260114487323387, + "learning_rate": 1.7178175743285132e-05, + "loss": 0.5084, + "step": 15498 + }, + { + "epoch": 0.2678151783245784, + "grad_norm": 1.076382881348536, + "learning_rate": 1.7177786086559546e-05, + "loss": 0.5904, + "step": 15499 + }, + { + "epoch": 0.26783245783798726, + "grad_norm": 1.0689609459221567, + "learning_rate": 1.717739640735259e-05, + "loss": 0.659, + "step": 15500 + }, + { + "epoch": 0.26784973735139617, + "grad_norm": 0.5962776366830576, + "learning_rate": 1.717700670566548e-05, + "loss": 0.4641, + "step": 15501 + }, + { + "epoch": 0.2678670168648051, + "grad_norm": 2.102647531173859, + "learning_rate": 1.7176616981499442e-05, + "loss": 0.8362, + "step": 15502 + }, + { + "epoch": 0.267884296378214, + "grad_norm": 0.8469924300871478, + "learning_rate": 1.7176227234855698e-05, + "loss": 0.6282, + "step": 15503 + }, + { + "epoch": 0.2679015758916229, + "grad_norm": 1.0964390751300674, + "learning_rate": 1.7175837465735462e-05, + "loss": 0.5604, + "step": 15504 + }, + { + "epoch": 0.2679188554050318, + "grad_norm": 0.9039521872539935, + "learning_rate": 1.7175447674139953e-05, + "loss": 0.2856, + "step": 15505 + }, + { + "epoch": 0.2679361349184407, + "grad_norm": 1.3089139192009862, + "learning_rate": 1.71750578600704e-05, + "loss": 0.5809, + "step": 15506 + }, + { + "epoch": 0.2679534144318496, + "grad_norm": 0.6468848283296732, + "learning_rate": 1.717466802352802e-05, + "loss": 0.4006, + "step": 15507 + }, + { + "epoch": 0.2679706939452585, + "grad_norm": 0.9919864896019654, + "learning_rate": 1.7174278164514038e-05, + "loss": 0.5036, + "step": 15508 + }, + { + "epoch": 0.26798797345866743, + "grad_norm": 0.4538378656971789, + "learning_rate": 1.7173888283029668e-05, + "loss": 0.6749, + "step": 15509 + }, + { + "epoch": 0.2680052529720763, + "grad_norm": 0.9180693658645548, + "learning_rate": 1.7173498379076137e-05, + "loss": 0.5339, + "step": 15510 + }, + { + "epoch": 0.2680225324854852, + "grad_norm": 0.7374604987348482, + "learning_rate": 1.717310845265466e-05, + "loss": 0.5426, + "step": 15511 + }, + { + "epoch": 0.2680398119988941, + "grad_norm": 0.8366813369668572, + "learning_rate": 1.7172718503766467e-05, + "loss": 0.3691, + "step": 15512 + }, + { + "epoch": 0.268057091512303, + "grad_norm": 1.0276998870003191, + "learning_rate": 1.717232853241277e-05, + "loss": 0.5823, + "step": 15513 + }, + { + "epoch": 0.2680743710257119, + "grad_norm": 1.0457424674816445, + "learning_rate": 1.7171938538594798e-05, + "loss": 0.378, + "step": 15514 + }, + { + "epoch": 0.2680916505391208, + "grad_norm": 1.0685476295284975, + "learning_rate": 1.717154852231377e-05, + "loss": 0.5417, + "step": 15515 + }, + { + "epoch": 0.2681089300525297, + "grad_norm": 0.7566722238777376, + "learning_rate": 1.7171158483570904e-05, + "loss": 0.4601, + "step": 15516 + }, + { + "epoch": 0.26812620956593863, + "grad_norm": 0.41048140494629015, + "learning_rate": 1.7170768422367427e-05, + "loss": 0.5503, + "step": 15517 + }, + { + "epoch": 0.26814348907934754, + "grad_norm": 0.4751045882602818, + "learning_rate": 1.7170378338704556e-05, + "loss": 0.4594, + "step": 15518 + }, + { + "epoch": 0.26816076859275645, + "grad_norm": 0.9419438578154368, + "learning_rate": 1.7169988232583518e-05, + "loss": 0.3702, + "step": 15519 + }, + { + "epoch": 0.26817804810616536, + "grad_norm": 0.8574360934974822, + "learning_rate": 1.716959810400553e-05, + "loss": 0.4381, + "step": 15520 + }, + { + "epoch": 0.2681953276195742, + "grad_norm": 1.4169481099983519, + "learning_rate": 1.7169207952971816e-05, + "loss": 0.6736, + "step": 15521 + }, + { + "epoch": 0.2682126071329831, + "grad_norm": 0.8615356708313405, + "learning_rate": 1.7168817779483598e-05, + "loss": 0.5205, + "step": 15522 + }, + { + "epoch": 0.268229886646392, + "grad_norm": 0.8208096891083101, + "learning_rate": 1.71684275835421e-05, + "loss": 0.4457, + "step": 15523 + }, + { + "epoch": 0.26824716615980093, + "grad_norm": 0.6720745416019192, + "learning_rate": 1.7168037365148538e-05, + "loss": 0.3479, + "step": 15524 + }, + { + "epoch": 0.26826444567320984, + "grad_norm": 1.4392579053042915, + "learning_rate": 1.716764712430414e-05, + "loss": 0.5554, + "step": 15525 + }, + { + "epoch": 0.26828172518661875, + "grad_norm": 0.9262053500991231, + "learning_rate": 1.716725686101013e-05, + "loss": 0.289, + "step": 15526 + }, + { + "epoch": 0.26829900470002765, + "grad_norm": 0.9055312668769305, + "learning_rate": 1.7166866575267722e-05, + "loss": 0.7518, + "step": 15527 + }, + { + "epoch": 0.26831628421343656, + "grad_norm": 0.9667524946064405, + "learning_rate": 1.7166476267078145e-05, + "loss": 0.5618, + "step": 15528 + }, + { + "epoch": 0.26833356372684547, + "grad_norm": 0.7735044402158202, + "learning_rate": 1.7166085936442622e-05, + "loss": 0.421, + "step": 15529 + }, + { + "epoch": 0.2683508432402544, + "grad_norm": 0.8004005454353218, + "learning_rate": 1.716569558336237e-05, + "loss": 0.3371, + "step": 15530 + }, + { + "epoch": 0.26836812275366323, + "grad_norm": 0.6669590005576166, + "learning_rate": 1.7165305207838612e-05, + "loss": 0.4118, + "step": 15531 + }, + { + "epoch": 0.26838540226707214, + "grad_norm": 1.0362710484374051, + "learning_rate": 1.716491480987258e-05, + "loss": 0.4638, + "step": 15532 + }, + { + "epoch": 0.26840268178048104, + "grad_norm": 0.7631170082728406, + "learning_rate": 1.7164524389465485e-05, + "loss": 0.4353, + "step": 15533 + }, + { + "epoch": 0.26841996129388995, + "grad_norm": 1.3178452518255537, + "learning_rate": 1.716413394661856e-05, + "loss": 0.515, + "step": 15534 + }, + { + "epoch": 0.26843724080729886, + "grad_norm": 1.418944082301075, + "learning_rate": 1.7163743481333022e-05, + "loss": 0.4814, + "step": 15535 + }, + { + "epoch": 0.26845452032070777, + "grad_norm": 1.1091974995599876, + "learning_rate": 1.7163352993610092e-05, + "loss": 0.5228, + "step": 15536 + }, + { + "epoch": 0.2684717998341167, + "grad_norm": 1.1662079423408787, + "learning_rate": 1.7162962483450998e-05, + "loss": 0.7036, + "step": 15537 + }, + { + "epoch": 0.2684890793475256, + "grad_norm": 1.285233705575625, + "learning_rate": 1.7162571950856962e-05, + "loss": 0.4957, + "step": 15538 + }, + { + "epoch": 0.2685063588609345, + "grad_norm": 1.6682755358060497, + "learning_rate": 1.7162181395829204e-05, + "loss": 0.6191, + "step": 15539 + }, + { + "epoch": 0.2685236383743434, + "grad_norm": 0.742711992605654, + "learning_rate": 1.716179081836895e-05, + "loss": 0.6696, + "step": 15540 + }, + { + "epoch": 0.2685409178877523, + "grad_norm": 0.8931227173705144, + "learning_rate": 1.7161400218477425e-05, + "loss": 0.3384, + "step": 15541 + }, + { + "epoch": 0.26855819740116116, + "grad_norm": 1.5767956307306357, + "learning_rate": 1.7161009596155847e-05, + "loss": 0.5706, + "step": 15542 + }, + { + "epoch": 0.26857547691457007, + "grad_norm": 0.8143334756060949, + "learning_rate": 1.7160618951405447e-05, + "loss": 0.4556, + "step": 15543 + }, + { + "epoch": 0.268592756427979, + "grad_norm": 1.276093419721041, + "learning_rate": 1.716022828422744e-05, + "loss": 0.3517, + "step": 15544 + }, + { + "epoch": 0.2686100359413879, + "grad_norm": 1.5539460528906535, + "learning_rate": 1.7159837594623057e-05, + "loss": 0.4096, + "step": 15545 + }, + { + "epoch": 0.2686273154547968, + "grad_norm": 1.3959377600971763, + "learning_rate": 1.7159446882593518e-05, + "loss": 0.3665, + "step": 15546 + }, + { + "epoch": 0.2686445949682057, + "grad_norm": 1.2195552769790214, + "learning_rate": 1.7159056148140045e-05, + "loss": 0.5752, + "step": 15547 + }, + { + "epoch": 0.2686618744816146, + "grad_norm": 2.209739949835928, + "learning_rate": 1.7158665391263868e-05, + "loss": 0.7798, + "step": 15548 + }, + { + "epoch": 0.2686791539950235, + "grad_norm": 0.9600422097578731, + "learning_rate": 1.7158274611966204e-05, + "loss": 0.6242, + "step": 15549 + }, + { + "epoch": 0.2686964335084324, + "grad_norm": 0.814935501310396, + "learning_rate": 1.7157883810248282e-05, + "loss": 0.3636, + "step": 15550 + }, + { + "epoch": 0.2687137130218413, + "grad_norm": 0.8622217805703062, + "learning_rate": 1.715749298611132e-05, + "loss": 0.355, + "step": 15551 + }, + { + "epoch": 0.26873099253525023, + "grad_norm": 1.1776884994064707, + "learning_rate": 1.715710213955655e-05, + "loss": 0.3297, + "step": 15552 + }, + { + "epoch": 0.2687482720486591, + "grad_norm": 0.9886393701953888, + "learning_rate": 1.7156711270585188e-05, + "loss": 0.6462, + "step": 15553 + }, + { + "epoch": 0.268765551562068, + "grad_norm": 0.6565592674268435, + "learning_rate": 1.7156320379198468e-05, + "loss": 0.7091, + "step": 15554 + }, + { + "epoch": 0.2687828310754769, + "grad_norm": 0.9875044005550845, + "learning_rate": 1.7155929465397607e-05, + "loss": 0.4135, + "step": 15555 + }, + { + "epoch": 0.2688001105888858, + "grad_norm": 0.8876841255316992, + "learning_rate": 1.715553852918383e-05, + "loss": 0.387, + "step": 15556 + }, + { + "epoch": 0.2688173901022947, + "grad_norm": 0.9133490598135905, + "learning_rate": 1.715514757055836e-05, + "loss": 0.4235, + "step": 15557 + }, + { + "epoch": 0.2688346696157036, + "grad_norm": 0.8219497612078273, + "learning_rate": 1.715475658952243e-05, + "loss": 0.5582, + "step": 15558 + }, + { + "epoch": 0.26885194912911253, + "grad_norm": 0.9928164590538333, + "learning_rate": 1.7154365586077252e-05, + "loss": 0.3938, + "step": 15559 + }, + { + "epoch": 0.26886922864252144, + "grad_norm": 0.7809203886314618, + "learning_rate": 1.7153974560224062e-05, + "loss": 0.5085, + "step": 15560 + }, + { + "epoch": 0.26888650815593035, + "grad_norm": 0.709985674805758, + "learning_rate": 1.7153583511964076e-05, + "loss": 0.4055, + "step": 15561 + }, + { + "epoch": 0.26890378766933926, + "grad_norm": 1.1811528750223639, + "learning_rate": 1.7153192441298526e-05, + "loss": 0.5662, + "step": 15562 + }, + { + "epoch": 0.2689210671827481, + "grad_norm": 1.2764886172984025, + "learning_rate": 1.715280134822863e-05, + "loss": 0.6284, + "step": 15563 + }, + { + "epoch": 0.268938346696157, + "grad_norm": 0.9462078796303839, + "learning_rate": 1.715241023275562e-05, + "loss": 0.4484, + "step": 15564 + }, + { + "epoch": 0.2689556262095659, + "grad_norm": 0.7463195612608398, + "learning_rate": 1.7152019094880717e-05, + "loss": 0.2797, + "step": 15565 + }, + { + "epoch": 0.26897290572297483, + "grad_norm": 1.2266354243474962, + "learning_rate": 1.7151627934605147e-05, + "loss": 0.6147, + "step": 15566 + }, + { + "epoch": 0.26899018523638374, + "grad_norm": 1.478766570604198, + "learning_rate": 1.7151236751930132e-05, + "loss": 0.4117, + "step": 15567 + }, + { + "epoch": 0.26900746474979265, + "grad_norm": 1.5736535869031574, + "learning_rate": 1.7150845546856903e-05, + "loss": 0.5011, + "step": 15568 + }, + { + "epoch": 0.26902474426320155, + "grad_norm": 0.6330992257746578, + "learning_rate": 1.7150454319386684e-05, + "loss": 0.5258, + "step": 15569 + }, + { + "epoch": 0.26904202377661046, + "grad_norm": 0.750493329774007, + "learning_rate": 1.7150063069520696e-05, + "loss": 0.4136, + "step": 15570 + }, + { + "epoch": 0.26905930329001937, + "grad_norm": 0.5513536720545538, + "learning_rate": 1.7149671797260167e-05, + "loss": 0.4763, + "step": 15571 + }, + { + "epoch": 0.2690765828034283, + "grad_norm": 1.1407492407265947, + "learning_rate": 1.7149280502606326e-05, + "loss": 0.2971, + "step": 15572 + }, + { + "epoch": 0.2690938623168372, + "grad_norm": 1.2458526168090043, + "learning_rate": 1.714888918556039e-05, + "loss": 0.5266, + "step": 15573 + }, + { + "epoch": 0.26911114183024604, + "grad_norm": 1.5995352000600593, + "learning_rate": 1.714849784612359e-05, + "loss": 0.6606, + "step": 15574 + }, + { + "epoch": 0.26912842134365494, + "grad_norm": 1.1136119262815696, + "learning_rate": 1.7148106484297155e-05, + "loss": 0.4475, + "step": 15575 + }, + { + "epoch": 0.26914570085706385, + "grad_norm": 1.1124011389578665, + "learning_rate": 1.7147715100082306e-05, + "loss": 0.6752, + "step": 15576 + }, + { + "epoch": 0.26916298037047276, + "grad_norm": 0.4151957681036119, + "learning_rate": 1.714732369348027e-05, + "loss": 0.6095, + "step": 15577 + }, + { + "epoch": 0.26918025988388167, + "grad_norm": 0.7853210570671497, + "learning_rate": 1.7146932264492273e-05, + "loss": 0.4554, + "step": 15578 + }, + { + "epoch": 0.2691975393972906, + "grad_norm": 1.0602390331972282, + "learning_rate": 1.7146540813119542e-05, + "loss": 0.5141, + "step": 15579 + }, + { + "epoch": 0.2692148189106995, + "grad_norm": 1.490467573234477, + "learning_rate": 1.71461493393633e-05, + "loss": 0.4832, + "step": 15580 + }, + { + "epoch": 0.2692320984241084, + "grad_norm": 1.214817456311995, + "learning_rate": 1.714575784322478e-05, + "loss": 0.4377, + "step": 15581 + }, + { + "epoch": 0.2692493779375173, + "grad_norm": 0.7752577525167169, + "learning_rate": 1.7145366324705195e-05, + "loss": 0.4251, + "step": 15582 + }, + { + "epoch": 0.2692666574509262, + "grad_norm": 0.8664933160155116, + "learning_rate": 1.7144974783805785e-05, + "loss": 0.4559, + "step": 15583 + }, + { + "epoch": 0.26928393696433506, + "grad_norm": 1.235377182296535, + "learning_rate": 1.714458322052777e-05, + "loss": 0.5262, + "step": 15584 + }, + { + "epoch": 0.26930121647774397, + "grad_norm": 0.9749547824971289, + "learning_rate": 1.7144191634872376e-05, + "loss": 0.4184, + "step": 15585 + }, + { + "epoch": 0.2693184959911529, + "grad_norm": 1.1632403610998838, + "learning_rate": 1.7143800026840834e-05, + "loss": 0.4641, + "step": 15586 + }, + { + "epoch": 0.2693357755045618, + "grad_norm": 0.7213109118495298, + "learning_rate": 1.7143408396434365e-05, + "loss": 0.5127, + "step": 15587 + }, + { + "epoch": 0.2693530550179707, + "grad_norm": 1.4031935429994185, + "learning_rate": 1.71430167436542e-05, + "loss": 0.553, + "step": 15588 + }, + { + "epoch": 0.2693703345313796, + "grad_norm": 1.8669623260476118, + "learning_rate": 1.714262506850156e-05, + "loss": 0.6824, + "step": 15589 + }, + { + "epoch": 0.2693876140447885, + "grad_norm": 1.0817500061257348, + "learning_rate": 1.7142233370977676e-05, + "loss": 0.5012, + "step": 15590 + }, + { + "epoch": 0.2694048935581974, + "grad_norm": 0.8329383463357618, + "learning_rate": 1.714184165108378e-05, + "loss": 0.405, + "step": 15591 + }, + { + "epoch": 0.2694221730716063, + "grad_norm": 1.4087184181098769, + "learning_rate": 1.714144990882109e-05, + "loss": 0.6544, + "step": 15592 + }, + { + "epoch": 0.2694394525850152, + "grad_norm": 1.0816924815722762, + "learning_rate": 1.7141058144190835e-05, + "loss": 0.4976, + "step": 15593 + }, + { + "epoch": 0.26945673209842413, + "grad_norm": 0.909936457264399, + "learning_rate": 1.714066635719424e-05, + "loss": 0.5444, + "step": 15594 + }, + { + "epoch": 0.269474011611833, + "grad_norm": 1.186239618179911, + "learning_rate": 1.714027454783254e-05, + "loss": 0.6498, + "step": 15595 + }, + { + "epoch": 0.2694912911252419, + "grad_norm": 0.8375794515235004, + "learning_rate": 1.713988271610696e-05, + "loss": 0.536, + "step": 15596 + }, + { + "epoch": 0.2695085706386508, + "grad_norm": 0.882682471218498, + "learning_rate": 1.713949086201872e-05, + "loss": 0.6635, + "step": 15597 + }, + { + "epoch": 0.2695258501520597, + "grad_norm": 0.6343086793545847, + "learning_rate": 1.7139098985569053e-05, + "loss": 0.5461, + "step": 15598 + }, + { + "epoch": 0.2695431296654686, + "grad_norm": 1.084240337262233, + "learning_rate": 1.7138707086759184e-05, + "loss": 0.4442, + "step": 15599 + }, + { + "epoch": 0.2695604091788775, + "grad_norm": 1.2965251857298814, + "learning_rate": 1.7138315165590346e-05, + "loss": 0.6464, + "step": 15600 + }, + { + "epoch": 0.26957768869228643, + "grad_norm": 0.5294267690319316, + "learning_rate": 1.713792322206376e-05, + "loss": 0.3298, + "step": 15601 + }, + { + "epoch": 0.26959496820569534, + "grad_norm": 0.9249372513285866, + "learning_rate": 1.7137531256180654e-05, + "loss": 0.3764, + "step": 15602 + }, + { + "epoch": 0.26961224771910425, + "grad_norm": 0.4086916343233211, + "learning_rate": 1.713713926794226e-05, + "loss": 0.7687, + "step": 15603 + }, + { + "epoch": 0.26962952723251316, + "grad_norm": 0.38032401356483664, + "learning_rate": 1.7136747257349806e-05, + "loss": 0.6056, + "step": 15604 + }, + { + "epoch": 0.269646806745922, + "grad_norm": 1.2171996363214566, + "learning_rate": 1.713635522440451e-05, + "loss": 0.4253, + "step": 15605 + }, + { + "epoch": 0.2696640862593309, + "grad_norm": 0.9234366395436969, + "learning_rate": 1.7135963169107613e-05, + "loss": 0.5396, + "step": 15606 + }, + { + "epoch": 0.2696813657727398, + "grad_norm": 1.2431959132314403, + "learning_rate": 1.7135571091460334e-05, + "loss": 0.6419, + "step": 15607 + }, + { + "epoch": 0.26969864528614873, + "grad_norm": 1.0888843266631278, + "learning_rate": 1.7135178991463908e-05, + "loss": 0.4388, + "step": 15608 + }, + { + "epoch": 0.26971592479955764, + "grad_norm": 1.0043621494764268, + "learning_rate": 1.7134786869119554e-05, + "loss": 0.4792, + "step": 15609 + }, + { + "epoch": 0.26973320431296655, + "grad_norm": 0.4516739419206151, + "learning_rate": 1.713439472442851e-05, + "loss": 0.7484, + "step": 15610 + }, + { + "epoch": 0.26975048382637545, + "grad_norm": 0.46020223466924076, + "learning_rate": 1.7134002557391997e-05, + "loss": 0.7207, + "step": 15611 + }, + { + "epoch": 0.26976776333978436, + "grad_norm": 1.145510264260845, + "learning_rate": 1.7133610368011247e-05, + "loss": 0.7523, + "step": 15612 + }, + { + "epoch": 0.26978504285319327, + "grad_norm": 1.3843851147548805, + "learning_rate": 1.7133218156287485e-05, + "loss": 0.4163, + "step": 15613 + }, + { + "epoch": 0.2698023223666022, + "grad_norm": 1.2690129087872453, + "learning_rate": 1.7132825922221946e-05, + "loss": 0.5425, + "step": 15614 + }, + { + "epoch": 0.2698196018800111, + "grad_norm": 0.9075774162972113, + "learning_rate": 1.7132433665815848e-05, + "loss": 0.5979, + "step": 15615 + }, + { + "epoch": 0.26983688139341994, + "grad_norm": 1.499350069771829, + "learning_rate": 1.713204138707043e-05, + "loss": 0.5445, + "step": 15616 + }, + { + "epoch": 0.26985416090682884, + "grad_norm": 1.3374168744057058, + "learning_rate": 1.7131649085986913e-05, + "loss": 0.5854, + "step": 15617 + }, + { + "epoch": 0.26987144042023775, + "grad_norm": 0.9543809267222323, + "learning_rate": 1.7131256762566533e-05, + "loss": 0.4551, + "step": 15618 + }, + { + "epoch": 0.26988871993364666, + "grad_norm": 0.4447977788445848, + "learning_rate": 1.7130864416810514e-05, + "loss": 0.6417, + "step": 15619 + }, + { + "epoch": 0.26990599944705557, + "grad_norm": 0.8476259730340022, + "learning_rate": 1.7130472048720083e-05, + "loss": 0.4576, + "step": 15620 + }, + { + "epoch": 0.2699232789604645, + "grad_norm": 1.5125343075494317, + "learning_rate": 1.7130079658296476e-05, + "loss": 0.4665, + "step": 15621 + }, + { + "epoch": 0.2699405584738734, + "grad_norm": 1.1788551161546674, + "learning_rate": 1.7129687245540917e-05, + "loss": 0.3278, + "step": 15622 + }, + { + "epoch": 0.2699578379872823, + "grad_norm": 0.7053416671357365, + "learning_rate": 1.712929481045463e-05, + "loss": 0.4687, + "step": 15623 + }, + { + "epoch": 0.2699751175006912, + "grad_norm": 0.9042302453675765, + "learning_rate": 1.7128902353038857e-05, + "loss": 0.4084, + "step": 15624 + }, + { + "epoch": 0.2699923970141001, + "grad_norm": 0.7269509272227789, + "learning_rate": 1.7128509873294812e-05, + "loss": 0.5052, + "step": 15625 + }, + { + "epoch": 0.270009676527509, + "grad_norm": 1.3545503797701446, + "learning_rate": 1.712811737122374e-05, + "loss": 0.4905, + "step": 15626 + }, + { + "epoch": 0.27002695604091786, + "grad_norm": 0.9420087345512521, + "learning_rate": 1.712772484682686e-05, + "loss": 0.461, + "step": 15627 + }, + { + "epoch": 0.2700442355543268, + "grad_norm": 0.7310630463245475, + "learning_rate": 1.7127332300105405e-05, + "loss": 0.3428, + "step": 15628 + }, + { + "epoch": 0.2700615150677357, + "grad_norm": 1.0145280469418563, + "learning_rate": 1.71269397310606e-05, + "loss": 0.5659, + "step": 15629 + }, + { + "epoch": 0.2700787945811446, + "grad_norm": 1.1002553232835437, + "learning_rate": 1.7126547139693685e-05, + "loss": 0.8466, + "step": 15630 + }, + { + "epoch": 0.2700960740945535, + "grad_norm": 0.940487116619856, + "learning_rate": 1.7126154526005878e-05, + "loss": 0.6797, + "step": 15631 + }, + { + "epoch": 0.2701133536079624, + "grad_norm": 1.1546277438832948, + "learning_rate": 1.712576188999841e-05, + "loss": 0.6598, + "step": 15632 + }, + { + "epoch": 0.2701306331213713, + "grad_norm": 1.4081309596537326, + "learning_rate": 1.712536923167252e-05, + "loss": 0.5199, + "step": 15633 + }, + { + "epoch": 0.2701479126347802, + "grad_norm": 0.920077595071302, + "learning_rate": 1.712497655102943e-05, + "loss": 0.4927, + "step": 15634 + }, + { + "epoch": 0.2701651921481891, + "grad_norm": 0.9847535346728218, + "learning_rate": 1.712458384807037e-05, + "loss": 0.5613, + "step": 15635 + }, + { + "epoch": 0.27018247166159803, + "grad_norm": 0.758067722255109, + "learning_rate": 1.7124191122796575e-05, + "loss": 0.4223, + "step": 15636 + }, + { + "epoch": 0.2701997511750069, + "grad_norm": 1.0083913978205183, + "learning_rate": 1.712379837520927e-05, + "loss": 0.5004, + "step": 15637 + }, + { + "epoch": 0.2702170306884158, + "grad_norm": 0.39955706540224095, + "learning_rate": 1.712340560530969e-05, + "loss": 0.5363, + "step": 15638 + }, + { + "epoch": 0.2702343102018247, + "grad_norm": 0.8913819329834647, + "learning_rate": 1.712301281309906e-05, + "loss": 0.5321, + "step": 15639 + }, + { + "epoch": 0.2702515897152336, + "grad_norm": 1.1994605619522383, + "learning_rate": 1.7122619998578613e-05, + "loss": 0.6007, + "step": 15640 + }, + { + "epoch": 0.2702688692286425, + "grad_norm": 0.640484956766864, + "learning_rate": 1.712222716174958e-05, + "loss": 0.595, + "step": 15641 + }, + { + "epoch": 0.2702861487420514, + "grad_norm": 1.115909830564747, + "learning_rate": 1.712183430261319e-05, + "loss": 0.4602, + "step": 15642 + }, + { + "epoch": 0.27030342825546033, + "grad_norm": 1.1643972190764047, + "learning_rate": 1.712144142117067e-05, + "loss": 0.5225, + "step": 15643 + }, + { + "epoch": 0.27032070776886924, + "grad_norm": 0.7075413477877931, + "learning_rate": 1.7121048517423258e-05, + "loss": 0.3341, + "step": 15644 + }, + { + "epoch": 0.27033798728227815, + "grad_norm": 1.3207083176492582, + "learning_rate": 1.712065559137218e-05, + "loss": 0.5577, + "step": 15645 + }, + { + "epoch": 0.27035526679568705, + "grad_norm": 0.6547580670046163, + "learning_rate": 1.7120262643018667e-05, + "loss": 0.3797, + "step": 15646 + }, + { + "epoch": 0.27037254630909596, + "grad_norm": 0.8857598894796215, + "learning_rate": 1.7119869672363953e-05, + "loss": 0.3846, + "step": 15647 + }, + { + "epoch": 0.2703898258225048, + "grad_norm": 1.7470465304289666, + "learning_rate": 1.7119476679409263e-05, + "loss": 0.6806, + "step": 15648 + }, + { + "epoch": 0.2704071053359137, + "grad_norm": 0.4773559555390689, + "learning_rate": 1.711908366415583e-05, + "loss": 0.6192, + "step": 15649 + }, + { + "epoch": 0.27042438484932263, + "grad_norm": 0.7763253025838731, + "learning_rate": 1.711869062660489e-05, + "loss": 0.5649, + "step": 15650 + }, + { + "epoch": 0.27044166436273154, + "grad_norm": 1.4662740305698114, + "learning_rate": 1.7118297566757667e-05, + "loss": 0.6231, + "step": 15651 + }, + { + "epoch": 0.27045894387614045, + "grad_norm": 1.1476873478839644, + "learning_rate": 1.7117904484615395e-05, + "loss": 0.4722, + "step": 15652 + }, + { + "epoch": 0.27047622338954935, + "grad_norm": 0.9973212563819326, + "learning_rate": 1.7117511380179307e-05, + "loss": 0.5436, + "step": 15653 + }, + { + "epoch": 0.27049350290295826, + "grad_norm": 0.9845440663549107, + "learning_rate": 1.7117118253450633e-05, + "loss": 0.6208, + "step": 15654 + }, + { + "epoch": 0.27051078241636717, + "grad_norm": 0.6512565063015803, + "learning_rate": 1.71167251044306e-05, + "loss": 0.2859, + "step": 15655 + }, + { + "epoch": 0.2705280619297761, + "grad_norm": 0.5544825520713405, + "learning_rate": 1.711633193312045e-05, + "loss": 0.7104, + "step": 15656 + }, + { + "epoch": 0.270545341443185, + "grad_norm": 1.4458772233931612, + "learning_rate": 1.7115938739521397e-05, + "loss": 0.6505, + "step": 15657 + }, + { + "epoch": 0.27056262095659384, + "grad_norm": 1.7924425706913836, + "learning_rate": 1.7115545523634693e-05, + "loss": 0.5234, + "step": 15658 + }, + { + "epoch": 0.27057990047000274, + "grad_norm": 1.1139934941438911, + "learning_rate": 1.7115152285461552e-05, + "loss": 0.5052, + "step": 15659 + }, + { + "epoch": 0.27059717998341165, + "grad_norm": 1.5199138792423117, + "learning_rate": 1.711475902500322e-05, + "loss": 0.4761, + "step": 15660 + }, + { + "epoch": 0.27061445949682056, + "grad_norm": 1.0806960588190282, + "learning_rate": 1.7114365742260915e-05, + "loss": 0.4083, + "step": 15661 + }, + { + "epoch": 0.27063173901022947, + "grad_norm": 1.2711457763034575, + "learning_rate": 1.7113972437235882e-05, + "loss": 0.5242, + "step": 15662 + }, + { + "epoch": 0.2706490185236384, + "grad_norm": 1.1983785756054628, + "learning_rate": 1.7113579109929346e-05, + "loss": 0.4428, + "step": 15663 + }, + { + "epoch": 0.2706662980370473, + "grad_norm": 1.0361101398883417, + "learning_rate": 1.7113185760342537e-05, + "loss": 0.538, + "step": 15664 + }, + { + "epoch": 0.2706835775504562, + "grad_norm": 0.9640148757844322, + "learning_rate": 1.711279238847669e-05, + "loss": 0.4943, + "step": 15665 + }, + { + "epoch": 0.2707008570638651, + "grad_norm": 1.3654671189976795, + "learning_rate": 1.7112398994333037e-05, + "loss": 0.5991, + "step": 15666 + }, + { + "epoch": 0.270718136577274, + "grad_norm": 0.7561795268298143, + "learning_rate": 1.711200557791281e-05, + "loss": 0.5858, + "step": 15667 + }, + { + "epoch": 0.2707354160906829, + "grad_norm": 1.9032502345730788, + "learning_rate": 1.711161213921724e-05, + "loss": 0.6084, + "step": 15668 + }, + { + "epoch": 0.27075269560409176, + "grad_norm": 1.3893266573712995, + "learning_rate": 1.7111218678247564e-05, + "loss": 0.5358, + "step": 15669 + }, + { + "epoch": 0.27076997511750067, + "grad_norm": 1.3723912718908036, + "learning_rate": 1.7110825195005007e-05, + "loss": 0.6341, + "step": 15670 + }, + { + "epoch": 0.2707872546309096, + "grad_norm": 0.47317917095727713, + "learning_rate": 1.711043168949081e-05, + "loss": 0.4566, + "step": 15671 + }, + { + "epoch": 0.2708045341443185, + "grad_norm": 1.5610074108184715, + "learning_rate": 1.7110038161706194e-05, + "loss": 0.5838, + "step": 15672 + }, + { + "epoch": 0.2708218136577274, + "grad_norm": 1.0690870433486979, + "learning_rate": 1.71096446116524e-05, + "loss": 0.4752, + "step": 15673 + }, + { + "epoch": 0.2708390931711363, + "grad_norm": 1.7325334938362944, + "learning_rate": 1.7109251039330662e-05, + "loss": 0.5753, + "step": 15674 + }, + { + "epoch": 0.2708563726845452, + "grad_norm": 1.5974822938281346, + "learning_rate": 1.7108857444742205e-05, + "loss": 0.596, + "step": 15675 + }, + { + "epoch": 0.2708736521979541, + "grad_norm": 0.7798724248531479, + "learning_rate": 1.7108463827888267e-05, + "loss": 0.686, + "step": 15676 + }, + { + "epoch": 0.270890931711363, + "grad_norm": 1.1595998327531003, + "learning_rate": 1.7108070188770086e-05, + "loss": 0.6142, + "step": 15677 + }, + { + "epoch": 0.27090821122477193, + "grad_norm": 0.8970824320778087, + "learning_rate": 1.7107676527388883e-05, + "loss": 0.5511, + "step": 15678 + }, + { + "epoch": 0.2709254907381808, + "grad_norm": 1.1410330221807339, + "learning_rate": 1.71072828437459e-05, + "loss": 0.653, + "step": 15679 + }, + { + "epoch": 0.2709427702515897, + "grad_norm": 0.7770449664963293, + "learning_rate": 1.710688913784236e-05, + "loss": 0.5467, + "step": 15680 + }, + { + "epoch": 0.2709600497649986, + "grad_norm": 1.3166282244194427, + "learning_rate": 1.7106495409679512e-05, + "loss": 0.5819, + "step": 15681 + }, + { + "epoch": 0.2709773292784075, + "grad_norm": 0.8776521358520345, + "learning_rate": 1.7106101659258575e-05, + "loss": 0.6632, + "step": 15682 + }, + { + "epoch": 0.2709946087918164, + "grad_norm": 1.0044381096912958, + "learning_rate": 1.710570788658079e-05, + "loss": 0.4966, + "step": 15683 + }, + { + "epoch": 0.2710118883052253, + "grad_norm": 1.0384700135985387, + "learning_rate": 1.7105314091647388e-05, + "loss": 0.3539, + "step": 15684 + }, + { + "epoch": 0.27102916781863423, + "grad_norm": 0.9191612138160129, + "learning_rate": 1.71049202744596e-05, + "loss": 0.3628, + "step": 15685 + }, + { + "epoch": 0.27104644733204314, + "grad_norm": 0.8175764489518932, + "learning_rate": 1.7104526435018666e-05, + "loss": 0.5801, + "step": 15686 + }, + { + "epoch": 0.27106372684545205, + "grad_norm": 1.4054917809199188, + "learning_rate": 1.7104132573325813e-05, + "loss": 0.4199, + "step": 15687 + }, + { + "epoch": 0.27108100635886095, + "grad_norm": 1.295711403641967, + "learning_rate": 1.7103738689382275e-05, + "loss": 0.4655, + "step": 15688 + }, + { + "epoch": 0.27109828587226986, + "grad_norm": 1.3225749555020332, + "learning_rate": 1.7103344783189292e-05, + "loss": 0.5052, + "step": 15689 + }, + { + "epoch": 0.2711155653856787, + "grad_norm": 1.405738493249582, + "learning_rate": 1.7102950854748088e-05, + "loss": 0.5972, + "step": 15690 + }, + { + "epoch": 0.2711328448990876, + "grad_norm": 0.7604111763342111, + "learning_rate": 1.7102556904059905e-05, + "loss": 0.4084, + "step": 15691 + }, + { + "epoch": 0.27115012441249653, + "grad_norm": 1.0427410580581578, + "learning_rate": 1.7102162931125975e-05, + "loss": 0.7319, + "step": 15692 + }, + { + "epoch": 0.27116740392590544, + "grad_norm": 0.9033633419297481, + "learning_rate": 1.7101768935947526e-05, + "loss": 0.6367, + "step": 15693 + }, + { + "epoch": 0.27118468343931434, + "grad_norm": 0.9859515477643697, + "learning_rate": 1.7101374918525802e-05, + "loss": 0.5473, + "step": 15694 + }, + { + "epoch": 0.27120196295272325, + "grad_norm": 1.0804674371944623, + "learning_rate": 1.7100980878862033e-05, + "loss": 0.5092, + "step": 15695 + }, + { + "epoch": 0.27121924246613216, + "grad_norm": 1.503573342827126, + "learning_rate": 1.710058681695745e-05, + "loss": 0.4549, + "step": 15696 + }, + { + "epoch": 0.27123652197954107, + "grad_norm": 1.0407043843903492, + "learning_rate": 1.710019273281329e-05, + "loss": 0.3656, + "step": 15697 + }, + { + "epoch": 0.27125380149295, + "grad_norm": 1.059924176446345, + "learning_rate": 1.709979862643078e-05, + "loss": 0.3755, + "step": 15698 + }, + { + "epoch": 0.2712710810063589, + "grad_norm": 0.9742558856430614, + "learning_rate": 1.709940449781117e-05, + "loss": 0.6131, + "step": 15699 + }, + { + "epoch": 0.2712883605197678, + "grad_norm": 1.0195067235240678, + "learning_rate": 1.7099010346955684e-05, + "loss": 0.4448, + "step": 15700 + }, + { + "epoch": 0.27130564003317664, + "grad_norm": 1.0821543400377864, + "learning_rate": 1.7098616173865554e-05, + "loss": 0.5116, + "step": 15701 + }, + { + "epoch": 0.27132291954658555, + "grad_norm": 0.7471872297252261, + "learning_rate": 1.709822197854202e-05, + "loss": 0.575, + "step": 15702 + }, + { + "epoch": 0.27134019905999446, + "grad_norm": 0.9486717701442097, + "learning_rate": 1.7097827760986317e-05, + "loss": 0.4241, + "step": 15703 + }, + { + "epoch": 0.27135747857340337, + "grad_norm": 0.5383750499645447, + "learning_rate": 1.709743352119968e-05, + "loss": 0.4607, + "step": 15704 + }, + { + "epoch": 0.2713747580868123, + "grad_norm": 0.9937397562698994, + "learning_rate": 1.709703925918334e-05, + "loss": 0.5231, + "step": 15705 + }, + { + "epoch": 0.2713920376002212, + "grad_norm": 0.8813027412099058, + "learning_rate": 1.7096644974938536e-05, + "loss": 0.6052, + "step": 15706 + }, + { + "epoch": 0.2714093171136301, + "grad_norm": 1.425809411913656, + "learning_rate": 1.7096250668466497e-05, + "loss": 0.4924, + "step": 15707 + }, + { + "epoch": 0.271426596627039, + "grad_norm": 1.404350754862615, + "learning_rate": 1.709585633976846e-05, + "loss": 0.3327, + "step": 15708 + }, + { + "epoch": 0.2714438761404479, + "grad_norm": 1.0093316409417914, + "learning_rate": 1.7095461988845666e-05, + "loss": 0.6477, + "step": 15709 + }, + { + "epoch": 0.2714611556538568, + "grad_norm": 1.365420964355088, + "learning_rate": 1.709506761569934e-05, + "loss": 0.442, + "step": 15710 + }, + { + "epoch": 0.27147843516726566, + "grad_norm": 1.164992777167769, + "learning_rate": 1.709467322033073e-05, + "loss": 0.4783, + "step": 15711 + }, + { + "epoch": 0.27149571468067457, + "grad_norm": 1.166325036946599, + "learning_rate": 1.709427880274106e-05, + "loss": 0.585, + "step": 15712 + }, + { + "epoch": 0.2715129941940835, + "grad_norm": 0.8757548759773203, + "learning_rate": 1.7093884362931573e-05, + "loss": 0.8027, + "step": 15713 + }, + { + "epoch": 0.2715302737074924, + "grad_norm": 0.41080876142631956, + "learning_rate": 1.70934899009035e-05, + "loss": 0.5422, + "step": 15714 + }, + { + "epoch": 0.2715475532209013, + "grad_norm": 0.8887381068511492, + "learning_rate": 1.7093095416658075e-05, + "loss": 0.4408, + "step": 15715 + }, + { + "epoch": 0.2715648327343102, + "grad_norm": 0.724801548297017, + "learning_rate": 1.709270091019654e-05, + "loss": 0.3501, + "step": 15716 + }, + { + "epoch": 0.2715821122477191, + "grad_norm": 0.9306059803657856, + "learning_rate": 1.709230638152012e-05, + "loss": 0.4554, + "step": 15717 + }, + { + "epoch": 0.271599391761128, + "grad_norm": 2.0362778195057665, + "learning_rate": 1.7091911830630064e-05, + "loss": 0.5723, + "step": 15718 + }, + { + "epoch": 0.2716166712745369, + "grad_norm": 0.48953671784420455, + "learning_rate": 1.7091517257527598e-05, + "loss": 0.6311, + "step": 15719 + }, + { + "epoch": 0.27163395078794583, + "grad_norm": 1.125244401145553, + "learning_rate": 1.7091122662213963e-05, + "loss": 0.5116, + "step": 15720 + }, + { + "epoch": 0.27165123030135474, + "grad_norm": 1.3247771301591351, + "learning_rate": 1.7090728044690392e-05, + "loss": 0.2834, + "step": 15721 + }, + { + "epoch": 0.2716685098147636, + "grad_norm": 1.2453955448694132, + "learning_rate": 1.7090333404958122e-05, + "loss": 0.4325, + "step": 15722 + }, + { + "epoch": 0.2716857893281725, + "grad_norm": 1.0974189015865934, + "learning_rate": 1.7089938743018387e-05, + "loss": 0.5343, + "step": 15723 + }, + { + "epoch": 0.2717030688415814, + "grad_norm": 1.1417065337982515, + "learning_rate": 1.708954405887243e-05, + "loss": 0.3135, + "step": 15724 + }, + { + "epoch": 0.2717203483549903, + "grad_norm": 0.5074354627265579, + "learning_rate": 1.7089149352521478e-05, + "loss": 0.2759, + "step": 15725 + }, + { + "epoch": 0.2717376278683992, + "grad_norm": 1.1501776179663732, + "learning_rate": 1.708875462396677e-05, + "loss": 0.3712, + "step": 15726 + }, + { + "epoch": 0.27175490738180813, + "grad_norm": 0.4239851218427717, + "learning_rate": 1.7088359873209547e-05, + "loss": 0.6649, + "step": 15727 + }, + { + "epoch": 0.27177218689521704, + "grad_norm": 0.7502612571016968, + "learning_rate": 1.7087965100251043e-05, + "loss": 0.5379, + "step": 15728 + }, + { + "epoch": 0.27178946640862595, + "grad_norm": 1.1717283469257103, + "learning_rate": 1.7087570305092492e-05, + "loss": 0.3497, + "step": 15729 + }, + { + "epoch": 0.27180674592203485, + "grad_norm": 1.420979645370319, + "learning_rate": 1.708717548773513e-05, + "loss": 0.4899, + "step": 15730 + }, + { + "epoch": 0.27182402543544376, + "grad_norm": 0.7669775191378977, + "learning_rate": 1.70867806481802e-05, + "loss": 0.5683, + "step": 15731 + }, + { + "epoch": 0.2718413049488526, + "grad_norm": 1.3892796851067226, + "learning_rate": 1.708638578642893e-05, + "loss": 0.4776, + "step": 15732 + }, + { + "epoch": 0.2718585844622615, + "grad_norm": 0.4137262283350101, + "learning_rate": 1.7085990902482565e-05, + "loss": 0.8134, + "step": 15733 + }, + { + "epoch": 0.27187586397567043, + "grad_norm": 0.8054270324845701, + "learning_rate": 1.7085595996342336e-05, + "loss": 0.3251, + "step": 15734 + }, + { + "epoch": 0.27189314348907934, + "grad_norm": 1.4195995714700331, + "learning_rate": 1.7085201068009485e-05, + "loss": 0.6498, + "step": 15735 + }, + { + "epoch": 0.27191042300248824, + "grad_norm": 0.525831131223519, + "learning_rate": 1.708480611748524e-05, + "loss": 0.583, + "step": 15736 + }, + { + "epoch": 0.27192770251589715, + "grad_norm": 1.140561437528728, + "learning_rate": 1.708441114477085e-05, + "loss": 0.6161, + "step": 15737 + }, + { + "epoch": 0.27194498202930606, + "grad_norm": 1.0449151040333302, + "learning_rate": 1.7084016149867543e-05, + "loss": 0.5972, + "step": 15738 + }, + { + "epoch": 0.27196226154271497, + "grad_norm": 1.7428688199190427, + "learning_rate": 1.708362113277656e-05, + "loss": 0.6142, + "step": 15739 + }, + { + "epoch": 0.2719795410561239, + "grad_norm": 1.2035130235512392, + "learning_rate": 1.7083226093499136e-05, + "loss": 0.4755, + "step": 15740 + }, + { + "epoch": 0.2719968205695328, + "grad_norm": 0.5701376397560761, + "learning_rate": 1.7082831032036507e-05, + "loss": 0.3972, + "step": 15741 + }, + { + "epoch": 0.2720141000829417, + "grad_norm": 1.045879113092827, + "learning_rate": 1.7082435948389916e-05, + "loss": 0.5572, + "step": 15742 + }, + { + "epoch": 0.27203137959635054, + "grad_norm": 0.7982854718302067, + "learning_rate": 1.70820408425606e-05, + "loss": 0.4737, + "step": 15743 + }, + { + "epoch": 0.27204865910975945, + "grad_norm": 0.6841414122375303, + "learning_rate": 1.708164571454979e-05, + "loss": 0.3052, + "step": 15744 + }, + { + "epoch": 0.27206593862316836, + "grad_norm": 1.1962432086096684, + "learning_rate": 1.708125056435873e-05, + "loss": 0.3743, + "step": 15745 + }, + { + "epoch": 0.27208321813657727, + "grad_norm": 1.4497669664366488, + "learning_rate": 1.708085539198865e-05, + "loss": 0.4749, + "step": 15746 + }, + { + "epoch": 0.2721004976499862, + "grad_norm": 1.4564635502595367, + "learning_rate": 1.70804601974408e-05, + "loss": 0.5273, + "step": 15747 + }, + { + "epoch": 0.2721177771633951, + "grad_norm": 0.7358497487096214, + "learning_rate": 1.7080064980716408e-05, + "loss": 0.3988, + "step": 15748 + }, + { + "epoch": 0.272135056676804, + "grad_norm": 1.1544158682256307, + "learning_rate": 1.707966974181671e-05, + "loss": 0.5057, + "step": 15749 + }, + { + "epoch": 0.2721523361902129, + "grad_norm": 1.3160144131321942, + "learning_rate": 1.7079274480742954e-05, + "loss": 0.5779, + "step": 15750 + }, + { + "epoch": 0.2721696157036218, + "grad_norm": 1.3122369771188482, + "learning_rate": 1.707887919749637e-05, + "loss": 0.5403, + "step": 15751 + }, + { + "epoch": 0.2721868952170307, + "grad_norm": 1.240714027793169, + "learning_rate": 1.7078483892078196e-05, + "loss": 0.4421, + "step": 15752 + }, + { + "epoch": 0.27220417473043956, + "grad_norm": 1.2720618216977109, + "learning_rate": 1.7078088564489674e-05, + "loss": 0.6688, + "step": 15753 + }, + { + "epoch": 0.27222145424384847, + "grad_norm": 0.9484504358216739, + "learning_rate": 1.7077693214732042e-05, + "loss": 0.3777, + "step": 15754 + }, + { + "epoch": 0.2722387337572574, + "grad_norm": 0.9475276943795434, + "learning_rate": 1.7077297842806536e-05, + "loss": 0.511, + "step": 15755 + }, + { + "epoch": 0.2722560132706663, + "grad_norm": 1.1771091032527146, + "learning_rate": 1.7076902448714397e-05, + "loss": 0.5758, + "step": 15756 + }, + { + "epoch": 0.2722732927840752, + "grad_norm": 0.9379869421235026, + "learning_rate": 1.707650703245686e-05, + "loss": 0.6981, + "step": 15757 + }, + { + "epoch": 0.2722905722974841, + "grad_norm": 1.2360926038159756, + "learning_rate": 1.7076111594035167e-05, + "loss": 0.6571, + "step": 15758 + }, + { + "epoch": 0.272307851810893, + "grad_norm": 1.0331031617975943, + "learning_rate": 1.7075716133450553e-05, + "loss": 0.4592, + "step": 15759 + }, + { + "epoch": 0.2723251313243019, + "grad_norm": 1.245074124431137, + "learning_rate": 1.707532065070426e-05, + "loss": 0.6225, + "step": 15760 + }, + { + "epoch": 0.2723424108377108, + "grad_norm": 1.4423039299649363, + "learning_rate": 1.7074925145797525e-05, + "loss": 0.4511, + "step": 15761 + }, + { + "epoch": 0.27235969035111973, + "grad_norm": 1.015979259379023, + "learning_rate": 1.7074529618731584e-05, + "loss": 0.4233, + "step": 15762 + }, + { + "epoch": 0.27237696986452864, + "grad_norm": 0.6322862324358377, + "learning_rate": 1.707413406950768e-05, + "loss": 0.5809, + "step": 15763 + }, + { + "epoch": 0.2723942493779375, + "grad_norm": 1.5928841572297345, + "learning_rate": 1.7073738498127052e-05, + "loss": 0.5513, + "step": 15764 + }, + { + "epoch": 0.2724115288913464, + "grad_norm": 1.62462755321103, + "learning_rate": 1.7073342904590933e-05, + "loss": 0.3775, + "step": 15765 + }, + { + "epoch": 0.2724288084047553, + "grad_norm": 1.0007468511121638, + "learning_rate": 1.707294728890057e-05, + "loss": 0.4484, + "step": 15766 + }, + { + "epoch": 0.2724460879181642, + "grad_norm": 1.1969549678059435, + "learning_rate": 1.70725516510572e-05, + "loss": 0.6179, + "step": 15767 + }, + { + "epoch": 0.2724633674315731, + "grad_norm": 0.595507203517466, + "learning_rate": 1.707215599106206e-05, + "loss": 0.4373, + "step": 15768 + }, + { + "epoch": 0.27248064694498203, + "grad_norm": 0.93021453132007, + "learning_rate": 1.7071760308916387e-05, + "loss": 0.3462, + "step": 15769 + }, + { + "epoch": 0.27249792645839094, + "grad_norm": 0.4789766523711591, + "learning_rate": 1.7071364604621427e-05, + "loss": 0.6672, + "step": 15770 + }, + { + "epoch": 0.27251520597179985, + "grad_norm": 1.097513453091549, + "learning_rate": 1.7070968878178412e-05, + "loss": 0.4697, + "step": 15771 + }, + { + "epoch": 0.27253248548520875, + "grad_norm": 1.0431980828026206, + "learning_rate": 1.707057312958859e-05, + "loss": 0.5252, + "step": 15772 + }, + { + "epoch": 0.27254976499861766, + "grad_norm": 1.210243210329744, + "learning_rate": 1.7070177358853187e-05, + "loss": 0.4058, + "step": 15773 + }, + { + "epoch": 0.27256704451202657, + "grad_norm": 1.4760528086310538, + "learning_rate": 1.7069781565973458e-05, + "loss": 0.467, + "step": 15774 + }, + { + "epoch": 0.2725843240254354, + "grad_norm": 1.0385947247330214, + "learning_rate": 1.7069385750950634e-05, + "loss": 0.5555, + "step": 15775 + }, + { + "epoch": 0.27260160353884433, + "grad_norm": 0.876569062235307, + "learning_rate": 1.7068989913785956e-05, + "loss": 0.4993, + "step": 15776 + }, + { + "epoch": 0.27261888305225324, + "grad_norm": 0.7130107951255062, + "learning_rate": 1.7068594054480666e-05, + "loss": 0.4126, + "step": 15777 + }, + { + "epoch": 0.27263616256566214, + "grad_norm": 0.8237560624261401, + "learning_rate": 1.7068198173036005e-05, + "loss": 0.4901, + "step": 15778 + }, + { + "epoch": 0.27265344207907105, + "grad_norm": 1.9193355331990678, + "learning_rate": 1.7067802269453207e-05, + "loss": 0.429, + "step": 15779 + }, + { + "epoch": 0.27267072159247996, + "grad_norm": 0.9738967064209025, + "learning_rate": 1.7067406343733514e-05, + "loss": 0.5223, + "step": 15780 + }, + { + "epoch": 0.27268800110588887, + "grad_norm": 1.3547365633941786, + "learning_rate": 1.7067010395878166e-05, + "loss": 0.4288, + "step": 15781 + }, + { + "epoch": 0.2727052806192978, + "grad_norm": 1.3011273402569572, + "learning_rate": 1.7066614425888408e-05, + "loss": 0.5144, + "step": 15782 + }, + { + "epoch": 0.2727225601327067, + "grad_norm": 1.0391763968034347, + "learning_rate": 1.7066218433765476e-05, + "loss": 0.5149, + "step": 15783 + }, + { + "epoch": 0.2727398396461156, + "grad_norm": 1.7164038481455939, + "learning_rate": 1.706582241951061e-05, + "loss": 0.6856, + "step": 15784 + }, + { + "epoch": 0.27275711915952444, + "grad_norm": 1.3334017425888827, + "learning_rate": 1.706542638312505e-05, + "loss": 0.4685, + "step": 15785 + }, + { + "epoch": 0.27277439867293335, + "grad_norm": 1.3798420150379251, + "learning_rate": 1.7065030324610038e-05, + "loss": 0.3884, + "step": 15786 + }, + { + "epoch": 0.27279167818634226, + "grad_norm": 1.0509821211128154, + "learning_rate": 1.7064634243966816e-05, + "loss": 0.5715, + "step": 15787 + }, + { + "epoch": 0.27280895769975116, + "grad_norm": 0.6668543153035634, + "learning_rate": 1.706423814119662e-05, + "loss": 0.5152, + "step": 15788 + }, + { + "epoch": 0.27282623721316007, + "grad_norm": 1.6706739799527919, + "learning_rate": 1.7063842016300694e-05, + "loss": 0.4581, + "step": 15789 + }, + { + "epoch": 0.272843516726569, + "grad_norm": 0.8710221139329665, + "learning_rate": 1.706344586928028e-05, + "loss": 0.6785, + "step": 15790 + }, + { + "epoch": 0.2728607962399779, + "grad_norm": 1.1380905116404294, + "learning_rate": 1.7063049700136617e-05, + "loss": 0.452, + "step": 15791 + }, + { + "epoch": 0.2728780757533868, + "grad_norm": 0.9441713740883731, + "learning_rate": 1.7062653508870943e-05, + "loss": 0.4772, + "step": 15792 + }, + { + "epoch": 0.2728953552667957, + "grad_norm": 1.3504329742630325, + "learning_rate": 1.7062257295484503e-05, + "loss": 0.5014, + "step": 15793 + }, + { + "epoch": 0.2729126347802046, + "grad_norm": 1.3908115479518655, + "learning_rate": 1.706186105997854e-05, + "loss": 0.6238, + "step": 15794 + }, + { + "epoch": 0.2729299142936135, + "grad_norm": 0.8407209391902619, + "learning_rate": 1.7061464802354284e-05, + "loss": 0.4525, + "step": 15795 + }, + { + "epoch": 0.27294719380702237, + "grad_norm": 1.3880849762118066, + "learning_rate": 1.7061068522612987e-05, + "loss": 0.5277, + "step": 15796 + }, + { + "epoch": 0.2729644733204313, + "grad_norm": 0.7812511943482121, + "learning_rate": 1.7060672220755886e-05, + "loss": 0.5175, + "step": 15797 + }, + { + "epoch": 0.2729817528338402, + "grad_norm": 1.1767141019369516, + "learning_rate": 1.7060275896784225e-05, + "loss": 0.5391, + "step": 15798 + }, + { + "epoch": 0.2729990323472491, + "grad_norm": 1.2011374626879492, + "learning_rate": 1.705987955069924e-05, + "loss": 0.5843, + "step": 15799 + }, + { + "epoch": 0.273016311860658, + "grad_norm": 0.8874727561243306, + "learning_rate": 1.705948318250218e-05, + "loss": 0.5377, + "step": 15800 + }, + { + "epoch": 0.2730335913740669, + "grad_norm": 1.180615819407357, + "learning_rate": 1.705908679219428e-05, + "loss": 0.6481, + "step": 15801 + }, + { + "epoch": 0.2730508708874758, + "grad_norm": 0.4960824879304714, + "learning_rate": 1.7058690379776782e-05, + "loss": 0.2624, + "step": 15802 + }, + { + "epoch": 0.2730681504008847, + "grad_norm": 0.42806923308578415, + "learning_rate": 1.7058293945250934e-05, + "loss": 0.7707, + "step": 15803 + }, + { + "epoch": 0.27308542991429363, + "grad_norm": 1.4258960453360374, + "learning_rate": 1.705789748861797e-05, + "loss": 0.7564, + "step": 15804 + }, + { + "epoch": 0.27310270942770254, + "grad_norm": 0.5636976099854162, + "learning_rate": 1.7057501009879135e-05, + "loss": 0.6237, + "step": 15805 + }, + { + "epoch": 0.2731199889411114, + "grad_norm": 0.943390893118493, + "learning_rate": 1.705710450903567e-05, + "loss": 0.4294, + "step": 15806 + }, + { + "epoch": 0.2731372684545203, + "grad_norm": 0.4793637029480378, + "learning_rate": 1.705670798608882e-05, + "loss": 0.488, + "step": 15807 + }, + { + "epoch": 0.2731545479679292, + "grad_norm": 1.0227507051496119, + "learning_rate": 1.7056311441039818e-05, + "loss": 0.606, + "step": 15808 + }, + { + "epoch": 0.2731718274813381, + "grad_norm": 1.3659849312218149, + "learning_rate": 1.7055914873889915e-05, + "loss": 0.7111, + "step": 15809 + }, + { + "epoch": 0.273189106994747, + "grad_norm": 1.3570777812973818, + "learning_rate": 1.7055518284640355e-05, + "loss": 0.6358, + "step": 15810 + }, + { + "epoch": 0.27320638650815593, + "grad_norm": 1.543569769205413, + "learning_rate": 1.705512167329237e-05, + "loss": 0.6715, + "step": 15811 + }, + { + "epoch": 0.27322366602156484, + "grad_norm": 0.9053111252413099, + "learning_rate": 1.705472503984721e-05, + "loss": 0.6959, + "step": 15812 + }, + { + "epoch": 0.27324094553497374, + "grad_norm": 1.0074398110789298, + "learning_rate": 1.7054328384306116e-05, + "loss": 0.4433, + "step": 15813 + }, + { + "epoch": 0.27325822504838265, + "grad_norm": 0.8958556691138214, + "learning_rate": 1.705393170667033e-05, + "loss": 0.4026, + "step": 15814 + }, + { + "epoch": 0.27327550456179156, + "grad_norm": 1.0251647171494396, + "learning_rate": 1.705353500694109e-05, + "loss": 0.4119, + "step": 15815 + }, + { + "epoch": 0.27329278407520047, + "grad_norm": 1.507963937296549, + "learning_rate": 1.7053138285119644e-05, + "loss": 0.5687, + "step": 15816 + }, + { + "epoch": 0.2733100635886093, + "grad_norm": 1.3740277050927514, + "learning_rate": 1.7052741541207235e-05, + "loss": 0.5816, + "step": 15817 + }, + { + "epoch": 0.2733273431020182, + "grad_norm": 0.6709862577247203, + "learning_rate": 1.70523447752051e-05, + "loss": 0.6046, + "step": 15818 + }, + { + "epoch": 0.27334462261542714, + "grad_norm": 0.7849236798784829, + "learning_rate": 1.7051947987114487e-05, + "loss": 0.4852, + "step": 15819 + }, + { + "epoch": 0.27336190212883604, + "grad_norm": 1.223741602475088, + "learning_rate": 1.7051551176936638e-05, + "loss": 0.662, + "step": 15820 + }, + { + "epoch": 0.27337918164224495, + "grad_norm": 0.8313375718749618, + "learning_rate": 1.7051154344672795e-05, + "loss": 0.4842, + "step": 15821 + }, + { + "epoch": 0.27339646115565386, + "grad_norm": 1.365092101423622, + "learning_rate": 1.7050757490324197e-05, + "loss": 0.4905, + "step": 15822 + }, + { + "epoch": 0.27341374066906277, + "grad_norm": 1.5528852551386696, + "learning_rate": 1.7050360613892095e-05, + "loss": 0.6403, + "step": 15823 + }, + { + "epoch": 0.2734310201824717, + "grad_norm": 1.1014751830421452, + "learning_rate": 1.7049963715377725e-05, + "loss": 0.552, + "step": 15824 + }, + { + "epoch": 0.2734482996958806, + "grad_norm": 0.5645484642202054, + "learning_rate": 1.7049566794782335e-05, + "loss": 0.9105, + "step": 15825 + }, + { + "epoch": 0.2734655792092895, + "grad_norm": 0.9147186351355222, + "learning_rate": 1.7049169852107164e-05, + "loss": 0.5662, + "step": 15826 + }, + { + "epoch": 0.2734828587226984, + "grad_norm": 0.6608054289778261, + "learning_rate": 1.704877288735346e-05, + "loss": 0.3764, + "step": 15827 + }, + { + "epoch": 0.27350013823610725, + "grad_norm": 1.2996149552057488, + "learning_rate": 1.7048375900522457e-05, + "loss": 0.4465, + "step": 15828 + }, + { + "epoch": 0.27351741774951616, + "grad_norm": 0.819901248318996, + "learning_rate": 1.704797889161541e-05, + "loss": 0.61, + "step": 15829 + }, + { + "epoch": 0.27353469726292506, + "grad_norm": 0.8048553949661017, + "learning_rate": 1.704758186063356e-05, + "loss": 0.6333, + "step": 15830 + }, + { + "epoch": 0.27355197677633397, + "grad_norm": 2.0012370346298063, + "learning_rate": 1.7047184807578147e-05, + "loss": 0.6015, + "step": 15831 + }, + { + "epoch": 0.2735692562897429, + "grad_norm": 1.1119927163647045, + "learning_rate": 1.7046787732450415e-05, + "loss": 0.6252, + "step": 15832 + }, + { + "epoch": 0.2735865358031518, + "grad_norm": 1.008984060214732, + "learning_rate": 1.7046390635251603e-05, + "loss": 0.5542, + "step": 15833 + }, + { + "epoch": 0.2736038153165607, + "grad_norm": 0.8404198645400238, + "learning_rate": 1.7045993515982968e-05, + "loss": 0.644, + "step": 15834 + }, + { + "epoch": 0.2736210948299696, + "grad_norm": 0.8423610876839357, + "learning_rate": 1.704559637464574e-05, + "loss": 0.4396, + "step": 15835 + }, + { + "epoch": 0.2736383743433785, + "grad_norm": 0.6320887562445879, + "learning_rate": 1.704519921124117e-05, + "loss": 0.4391, + "step": 15836 + }, + { + "epoch": 0.2736556538567874, + "grad_norm": 1.179145904466074, + "learning_rate": 1.7044802025770502e-05, + "loss": 0.4307, + "step": 15837 + }, + { + "epoch": 0.27367293337019627, + "grad_norm": 1.1410032923084914, + "learning_rate": 1.7044404818234976e-05, + "loss": 0.544, + "step": 15838 + }, + { + "epoch": 0.2736902128836052, + "grad_norm": 0.9331801970283478, + "learning_rate": 1.7044007588635844e-05, + "loss": 0.4637, + "step": 15839 + }, + { + "epoch": 0.2737074923970141, + "grad_norm": 0.8166972121373401, + "learning_rate": 1.704361033697434e-05, + "loss": 0.7875, + "step": 15840 + }, + { + "epoch": 0.273724771910423, + "grad_norm": 0.9463504416003294, + "learning_rate": 1.7043213063251716e-05, + "loss": 0.49, + "step": 15841 + }, + { + "epoch": 0.2737420514238319, + "grad_norm": 1.062830413260909, + "learning_rate": 1.704281576746921e-05, + "loss": 0.5843, + "step": 15842 + }, + { + "epoch": 0.2737593309372408, + "grad_norm": 1.0224150878760891, + "learning_rate": 1.7042418449628074e-05, + "loss": 0.4529, + "step": 15843 + }, + { + "epoch": 0.2737766104506497, + "grad_norm": 0.8946019463143599, + "learning_rate": 1.7042021109729545e-05, + "loss": 0.398, + "step": 15844 + }, + { + "epoch": 0.2737938899640586, + "grad_norm": 1.089935826993671, + "learning_rate": 1.7041623747774875e-05, + "loss": 0.3392, + "step": 15845 + }, + { + "epoch": 0.27381116947746753, + "grad_norm": 0.9032398147367021, + "learning_rate": 1.70412263637653e-05, + "loss": 0.4296, + "step": 15846 + }, + { + "epoch": 0.27382844899087644, + "grad_norm": 0.9491852494145625, + "learning_rate": 1.7040828957702072e-05, + "loss": 0.3013, + "step": 15847 + }, + { + "epoch": 0.27384572850428535, + "grad_norm": 0.8056979710733969, + "learning_rate": 1.7040431529586427e-05, + "loss": 0.5392, + "step": 15848 + }, + { + "epoch": 0.2738630080176942, + "grad_norm": 1.2973143909194476, + "learning_rate": 1.704003407941962e-05, + "loss": 0.5506, + "step": 15849 + }, + { + "epoch": 0.2738802875311031, + "grad_norm": 0.6501364296769465, + "learning_rate": 1.703963660720289e-05, + "loss": 0.6673, + "step": 15850 + }, + { + "epoch": 0.273897567044512, + "grad_norm": 1.2744117680785347, + "learning_rate": 1.7039239112937486e-05, + "loss": 0.4348, + "step": 15851 + }, + { + "epoch": 0.2739148465579209, + "grad_norm": 1.3390191384575725, + "learning_rate": 1.7038841596624647e-05, + "loss": 0.4862, + "step": 15852 + }, + { + "epoch": 0.27393212607132983, + "grad_norm": 0.980713479004205, + "learning_rate": 1.703844405826562e-05, + "loss": 0.5879, + "step": 15853 + }, + { + "epoch": 0.27394940558473874, + "grad_norm": 0.9547964730582434, + "learning_rate": 1.7038046497861654e-05, + "loss": 0.4223, + "step": 15854 + }, + { + "epoch": 0.27396668509814764, + "grad_norm": 0.9507165409822134, + "learning_rate": 1.703764891541399e-05, + "loss": 0.5319, + "step": 15855 + }, + { + "epoch": 0.27398396461155655, + "grad_norm": 1.2824303880137677, + "learning_rate": 1.7037251310923877e-05, + "loss": 0.6616, + "step": 15856 + }, + { + "epoch": 0.27400124412496546, + "grad_norm": 1.1251943068472845, + "learning_rate": 1.703685368439256e-05, + "loss": 0.576, + "step": 15857 + }, + { + "epoch": 0.27401852363837437, + "grad_norm": 0.9679529157135283, + "learning_rate": 1.7036456035821276e-05, + "loss": 0.3945, + "step": 15858 + }, + { + "epoch": 0.2740358031517832, + "grad_norm": 1.264773197317788, + "learning_rate": 1.7036058365211282e-05, + "loss": 0.5903, + "step": 15859 + }, + { + "epoch": 0.2740530826651921, + "grad_norm": 0.5747244771262644, + "learning_rate": 1.7035660672563815e-05, + "loss": 0.2713, + "step": 15860 + }, + { + "epoch": 0.27407036217860103, + "grad_norm": 1.2713707123842044, + "learning_rate": 1.7035262957880126e-05, + "loss": 0.5309, + "step": 15861 + }, + { + "epoch": 0.27408764169200994, + "grad_norm": 0.6955608069950059, + "learning_rate": 1.7034865221161457e-05, + "loss": 0.382, + "step": 15862 + }, + { + "epoch": 0.27410492120541885, + "grad_norm": 1.0826150303409103, + "learning_rate": 1.7034467462409056e-05, + "loss": 0.4933, + "step": 15863 + }, + { + "epoch": 0.27412220071882776, + "grad_norm": 0.7261584108152636, + "learning_rate": 1.7034069681624172e-05, + "loss": 0.4117, + "step": 15864 + }, + { + "epoch": 0.27413948023223667, + "grad_norm": 0.5072645410425032, + "learning_rate": 1.7033671878808043e-05, + "loss": 1.0843, + "step": 15865 + }, + { + "epoch": 0.2741567597456456, + "grad_norm": 0.6248061900565817, + "learning_rate": 1.7033274053961922e-05, + "loss": 0.3849, + "step": 15866 + }, + { + "epoch": 0.2741740392590545, + "grad_norm": 1.0567727329559666, + "learning_rate": 1.703287620708705e-05, + "loss": 0.8337, + "step": 15867 + }, + { + "epoch": 0.2741913187724634, + "grad_norm": 0.9992425586990294, + "learning_rate": 1.7032478338184673e-05, + "loss": 0.5756, + "step": 15868 + }, + { + "epoch": 0.2742085982858723, + "grad_norm": 1.1696501116866307, + "learning_rate": 1.7032080447256044e-05, + "loss": 0.4183, + "step": 15869 + }, + { + "epoch": 0.27422587779928115, + "grad_norm": 0.7595593750936023, + "learning_rate": 1.7031682534302402e-05, + "loss": 0.4488, + "step": 15870 + }, + { + "epoch": 0.27424315731269006, + "grad_norm": 1.802098684489561, + "learning_rate": 1.7031284599324996e-05, + "loss": 0.6315, + "step": 15871 + }, + { + "epoch": 0.27426043682609896, + "grad_norm": 0.9854258098218055, + "learning_rate": 1.7030886642325075e-05, + "loss": 0.5072, + "step": 15872 + }, + { + "epoch": 0.27427771633950787, + "grad_norm": 1.6141496710193912, + "learning_rate": 1.703048866330388e-05, + "loss": 0.6005, + "step": 15873 + }, + { + "epoch": 0.2742949958529168, + "grad_norm": 0.5587809516966147, + "learning_rate": 1.703009066226266e-05, + "loss": 0.3995, + "step": 15874 + }, + { + "epoch": 0.2743122753663257, + "grad_norm": 1.345698434315308, + "learning_rate": 1.7029692639202665e-05, + "loss": 0.5708, + "step": 15875 + }, + { + "epoch": 0.2743295548797346, + "grad_norm": 0.9094393897864594, + "learning_rate": 1.7029294594125135e-05, + "loss": 0.501, + "step": 15876 + }, + { + "epoch": 0.2743468343931435, + "grad_norm": 2.013743762424641, + "learning_rate": 1.702889652703132e-05, + "loss": 0.6129, + "step": 15877 + }, + { + "epoch": 0.2743641139065524, + "grad_norm": 1.0925623928126098, + "learning_rate": 1.702849843792247e-05, + "loss": 0.4487, + "step": 15878 + }, + { + "epoch": 0.2743813934199613, + "grad_norm": 1.3076664490299634, + "learning_rate": 1.7028100326799826e-05, + "loss": 0.4409, + "step": 15879 + }, + { + "epoch": 0.27439867293337017, + "grad_norm": 1.263325864841382, + "learning_rate": 1.702770219366464e-05, + "loss": 0.5444, + "step": 15880 + }, + { + "epoch": 0.2744159524467791, + "grad_norm": 0.9184692073864982, + "learning_rate": 1.7027304038518156e-05, + "loss": 0.6047, + "step": 15881 + }, + { + "epoch": 0.274433231960188, + "grad_norm": 0.49330402614471397, + "learning_rate": 1.702690586136162e-05, + "loss": 0.7156, + "step": 15882 + }, + { + "epoch": 0.2744505114735969, + "grad_norm": 1.039356983296152, + "learning_rate": 1.702650766219628e-05, + "loss": 0.4272, + "step": 15883 + }, + { + "epoch": 0.2744677909870058, + "grad_norm": 1.3251539630313571, + "learning_rate": 1.7026109441023385e-05, + "loss": 0.6892, + "step": 15884 + }, + { + "epoch": 0.2744850705004147, + "grad_norm": 1.068961751397645, + "learning_rate": 1.7025711197844185e-05, + "loss": 0.5316, + "step": 15885 + }, + { + "epoch": 0.2745023500138236, + "grad_norm": 1.151408542997164, + "learning_rate": 1.7025312932659922e-05, + "loss": 0.507, + "step": 15886 + }, + { + "epoch": 0.2745196295272325, + "grad_norm": 1.2320849602906234, + "learning_rate": 1.702491464547184e-05, + "loss": 0.4198, + "step": 15887 + }, + { + "epoch": 0.27453690904064143, + "grad_norm": 1.039260009481052, + "learning_rate": 1.7024516336281197e-05, + "loss": 0.6941, + "step": 15888 + }, + { + "epoch": 0.27455418855405034, + "grad_norm": 0.5657303379245799, + "learning_rate": 1.7024118005089233e-05, + "loss": 0.4152, + "step": 15889 + }, + { + "epoch": 0.27457146806745925, + "grad_norm": 0.5238353439757406, + "learning_rate": 1.7023719651897196e-05, + "loss": 0.333, + "step": 15890 + }, + { + "epoch": 0.2745887475808681, + "grad_norm": 1.246227910332796, + "learning_rate": 1.702332127670634e-05, + "loss": 0.5299, + "step": 15891 + }, + { + "epoch": 0.274606027094277, + "grad_norm": 0.9560856655218297, + "learning_rate": 1.7022922879517904e-05, + "loss": 0.3881, + "step": 15892 + }, + { + "epoch": 0.2746233066076859, + "grad_norm": 1.2946953814314175, + "learning_rate": 1.702252446033314e-05, + "loss": 0.5935, + "step": 15893 + }, + { + "epoch": 0.2746405861210948, + "grad_norm": 0.5457395263659013, + "learning_rate": 1.7022126019153294e-05, + "loss": 0.8044, + "step": 15894 + }, + { + "epoch": 0.27465786563450373, + "grad_norm": 0.4590715081043756, + "learning_rate": 1.702172755597962e-05, + "loss": 0.6545, + "step": 15895 + }, + { + "epoch": 0.27467514514791264, + "grad_norm": 0.7809974606439328, + "learning_rate": 1.7021329070813362e-05, + "loss": 0.5588, + "step": 15896 + }, + { + "epoch": 0.27469242466132154, + "grad_norm": 1.2909444962791314, + "learning_rate": 1.7020930563655764e-05, + "loss": 0.4051, + "step": 15897 + }, + { + "epoch": 0.27470970417473045, + "grad_norm": 0.9341234194546312, + "learning_rate": 1.702053203450808e-05, + "loss": 0.4797, + "step": 15898 + }, + { + "epoch": 0.27472698368813936, + "grad_norm": 1.1955221272481895, + "learning_rate": 1.7020133483371556e-05, + "loss": 0.4743, + "step": 15899 + }, + { + "epoch": 0.27474426320154827, + "grad_norm": 0.8058935759210353, + "learning_rate": 1.701973491024744e-05, + "loss": 0.4003, + "step": 15900 + }, + { + "epoch": 0.2747615427149572, + "grad_norm": 1.0118603443688112, + "learning_rate": 1.701933631513698e-05, + "loss": 0.4906, + "step": 15901 + }, + { + "epoch": 0.274778822228366, + "grad_norm": 1.5094900761607124, + "learning_rate": 1.7018937698041426e-05, + "loss": 0.6194, + "step": 15902 + }, + { + "epoch": 0.27479610174177493, + "grad_norm": 1.049797572072347, + "learning_rate": 1.7018539058962024e-05, + "loss": 0.5326, + "step": 15903 + }, + { + "epoch": 0.27481338125518384, + "grad_norm": 0.9970681085087156, + "learning_rate": 1.7018140397900027e-05, + "loss": 0.6212, + "step": 15904 + }, + { + "epoch": 0.27483066076859275, + "grad_norm": 0.8649597583354913, + "learning_rate": 1.7017741714856678e-05, + "loss": 0.6279, + "step": 15905 + }, + { + "epoch": 0.27484794028200166, + "grad_norm": 1.1924456469346516, + "learning_rate": 1.7017343009833234e-05, + "loss": 0.5934, + "step": 15906 + }, + { + "epoch": 0.27486521979541056, + "grad_norm": 1.622197331436577, + "learning_rate": 1.7016944282830935e-05, + "loss": 0.7233, + "step": 15907 + }, + { + "epoch": 0.2748824993088195, + "grad_norm": 0.4220852515035679, + "learning_rate": 1.701654553385103e-05, + "loss": 0.6134, + "step": 15908 + }, + { + "epoch": 0.2748997788222284, + "grad_norm": 1.2028245850472288, + "learning_rate": 1.7016146762894776e-05, + "loss": 0.3903, + "step": 15909 + }, + { + "epoch": 0.2749170583356373, + "grad_norm": 0.7603501526855734, + "learning_rate": 1.7015747969963414e-05, + "loss": 0.5537, + "step": 15910 + }, + { + "epoch": 0.2749343378490462, + "grad_norm": 1.0871253637035836, + "learning_rate": 1.7015349155058197e-05, + "loss": 0.394, + "step": 15911 + }, + { + "epoch": 0.27495161736245505, + "grad_norm": 0.8878546785636523, + "learning_rate": 1.7014950318180377e-05, + "loss": 0.5546, + "step": 15912 + }, + { + "epoch": 0.27496889687586396, + "grad_norm": 0.9785786925662643, + "learning_rate": 1.7014551459331196e-05, + "loss": 0.4862, + "step": 15913 + }, + { + "epoch": 0.27498617638927286, + "grad_norm": 1.1609292094723642, + "learning_rate": 1.7014152578511908e-05, + "loss": 0.5165, + "step": 15914 + }, + { + "epoch": 0.27500345590268177, + "grad_norm": 0.9234399905748611, + "learning_rate": 1.701375367572376e-05, + "loss": 0.6835, + "step": 15915 + }, + { + "epoch": 0.2750207354160907, + "grad_norm": 0.3979923722836957, + "learning_rate": 1.7013354750968e-05, + "loss": 0.4889, + "step": 15916 + }, + { + "epoch": 0.2750380149294996, + "grad_norm": 1.2509276401757838, + "learning_rate": 1.7012955804245886e-05, + "loss": 0.4965, + "step": 15917 + }, + { + "epoch": 0.2750552944429085, + "grad_norm": 1.1981782244174586, + "learning_rate": 1.7012556835558656e-05, + "loss": 0.4843, + "step": 15918 + }, + { + "epoch": 0.2750725739563174, + "grad_norm": 0.40651149492488764, + "learning_rate": 1.7012157844907567e-05, + "loss": 0.8199, + "step": 15919 + }, + { + "epoch": 0.2750898534697263, + "grad_norm": 0.6296804467189122, + "learning_rate": 1.7011758832293866e-05, + "loss": 0.3945, + "step": 15920 + }, + { + "epoch": 0.2751071329831352, + "grad_norm": 0.6958996863894101, + "learning_rate": 1.7011359797718807e-05, + "loss": 0.434, + "step": 15921 + }, + { + "epoch": 0.2751244124965441, + "grad_norm": 1.259546603816148, + "learning_rate": 1.7010960741183632e-05, + "loss": 0.4527, + "step": 15922 + }, + { + "epoch": 0.275141692009953, + "grad_norm": 1.148798913556311, + "learning_rate": 1.7010561662689595e-05, + "loss": 0.6446, + "step": 15923 + }, + { + "epoch": 0.2751589715233619, + "grad_norm": 0.9375163941388529, + "learning_rate": 1.7010162562237947e-05, + "loss": 0.5206, + "step": 15924 + }, + { + "epoch": 0.2751762510367708, + "grad_norm": 0.9083687707149738, + "learning_rate": 1.7009763439829942e-05, + "loss": 0.6726, + "step": 15925 + }, + { + "epoch": 0.2751935305501797, + "grad_norm": 1.0694701853427089, + "learning_rate": 1.700936429546682e-05, + "loss": 0.5604, + "step": 15926 + }, + { + "epoch": 0.2752108100635886, + "grad_norm": 1.2746850739456221, + "learning_rate": 1.7008965129149838e-05, + "loss": 0.5763, + "step": 15927 + }, + { + "epoch": 0.2752280895769975, + "grad_norm": 0.7603408859274924, + "learning_rate": 1.7008565940880243e-05, + "loss": 0.5595, + "step": 15928 + }, + { + "epoch": 0.2752453690904064, + "grad_norm": 0.9435959653166934, + "learning_rate": 1.7008166730659285e-05, + "loss": 0.4078, + "step": 15929 + }, + { + "epoch": 0.27526264860381533, + "grad_norm": 1.8265508275047797, + "learning_rate": 1.700776749848822e-05, + "loss": 0.5958, + "step": 15930 + }, + { + "epoch": 0.27527992811722424, + "grad_norm": 0.8568656026938396, + "learning_rate": 1.7007368244368295e-05, + "loss": 0.4093, + "step": 15931 + }, + { + "epoch": 0.27529720763063315, + "grad_norm": 0.7367811289355152, + "learning_rate": 1.7006968968300758e-05, + "loss": 0.7429, + "step": 15932 + }, + { + "epoch": 0.275314487144042, + "grad_norm": 0.8014810874889639, + "learning_rate": 1.700656967028686e-05, + "loss": 0.3749, + "step": 15933 + }, + { + "epoch": 0.2753317666574509, + "grad_norm": 2.0151814405499824, + "learning_rate": 1.700617035032786e-05, + "loss": 0.6582, + "step": 15934 + }, + { + "epoch": 0.2753490461708598, + "grad_norm": 1.1249514760637183, + "learning_rate": 1.7005771008424996e-05, + "loss": 0.5983, + "step": 15935 + }, + { + "epoch": 0.2753663256842687, + "grad_norm": 0.4615606719531353, + "learning_rate": 1.7005371644579525e-05, + "loss": 0.6279, + "step": 15936 + }, + { + "epoch": 0.27538360519767763, + "grad_norm": 1.2712644570159168, + "learning_rate": 1.70049722587927e-05, + "loss": 0.5311, + "step": 15937 + }, + { + "epoch": 0.27540088471108654, + "grad_norm": 1.0599315564525769, + "learning_rate": 1.700457285106577e-05, + "loss": 0.423, + "step": 15938 + }, + { + "epoch": 0.27541816422449544, + "grad_norm": 0.7522187275255088, + "learning_rate": 1.7004173421399984e-05, + "loss": 0.3036, + "step": 15939 + }, + { + "epoch": 0.27543544373790435, + "grad_norm": 0.8765611191250333, + "learning_rate": 1.7003773969796594e-05, + "loss": 0.4182, + "step": 15940 + }, + { + "epoch": 0.27545272325131326, + "grad_norm": 0.7430848030191093, + "learning_rate": 1.700337449625685e-05, + "loss": 0.5002, + "step": 15941 + }, + { + "epoch": 0.27547000276472217, + "grad_norm": 1.272237600702662, + "learning_rate": 1.700297500078201e-05, + "loss": 0.3337, + "step": 15942 + }, + { + "epoch": 0.2754872822781311, + "grad_norm": 1.098655355818428, + "learning_rate": 1.7002575483373314e-05, + "loss": 0.4868, + "step": 15943 + }, + { + "epoch": 0.2755045617915399, + "grad_norm": 0.9503721248824087, + "learning_rate": 1.7002175944032026e-05, + "loss": 0.4007, + "step": 15944 + }, + { + "epoch": 0.27552184130494883, + "grad_norm": 0.5762651597417388, + "learning_rate": 1.7001776382759388e-05, + "loss": 0.4695, + "step": 15945 + }, + { + "epoch": 0.27553912081835774, + "grad_norm": 0.8024019096915757, + "learning_rate": 1.7001376799556653e-05, + "loss": 0.5414, + "step": 15946 + }, + { + "epoch": 0.27555640033176665, + "grad_norm": 1.1033583988899638, + "learning_rate": 1.7000977194425075e-05, + "loss": 0.4139, + "step": 15947 + }, + { + "epoch": 0.27557367984517556, + "grad_norm": 0.6187262267795711, + "learning_rate": 1.7000577567365902e-05, + "loss": 0.4956, + "step": 15948 + }, + { + "epoch": 0.27559095935858446, + "grad_norm": 0.48546301246331247, + "learning_rate": 1.7000177918380393e-05, + "loss": 0.8238, + "step": 15949 + }, + { + "epoch": 0.27560823887199337, + "grad_norm": 1.2631600765131072, + "learning_rate": 1.699977824746979e-05, + "loss": 0.5258, + "step": 15950 + }, + { + "epoch": 0.2756255183854023, + "grad_norm": 0.7058956298761244, + "learning_rate": 1.699937855463535e-05, + "loss": 0.3529, + "step": 15951 + }, + { + "epoch": 0.2756427978988112, + "grad_norm": 0.9004939100429266, + "learning_rate": 1.6998978839878325e-05, + "loss": 0.4069, + "step": 15952 + }, + { + "epoch": 0.2756600774122201, + "grad_norm": 0.7791425543563628, + "learning_rate": 1.699857910319997e-05, + "loss": 0.6112, + "step": 15953 + }, + { + "epoch": 0.27567735692562895, + "grad_norm": 1.0386767931484477, + "learning_rate": 1.699817934460153e-05, + "loss": 0.4421, + "step": 15954 + }, + { + "epoch": 0.27569463643903785, + "grad_norm": 1.415765558862855, + "learning_rate": 1.699777956408426e-05, + "loss": 0.5401, + "step": 15955 + }, + { + "epoch": 0.27571191595244676, + "grad_norm": 1.5021029110016073, + "learning_rate": 1.6997379761649414e-05, + "loss": 0.3244, + "step": 15956 + }, + { + "epoch": 0.27572919546585567, + "grad_norm": 1.2393810315329752, + "learning_rate": 1.6996979937298242e-05, + "loss": 0.5052, + "step": 15957 + }, + { + "epoch": 0.2757464749792646, + "grad_norm": 1.2863224918630363, + "learning_rate": 1.6996580091031994e-05, + "loss": 0.5778, + "step": 15958 + }, + { + "epoch": 0.2757637544926735, + "grad_norm": 0.9650077272767716, + "learning_rate": 1.6996180222851933e-05, + "loss": 0.4236, + "step": 15959 + }, + { + "epoch": 0.2757810340060824, + "grad_norm": 1.0853823892003667, + "learning_rate": 1.69957803327593e-05, + "loss": 0.5396, + "step": 15960 + }, + { + "epoch": 0.2757983135194913, + "grad_norm": 0.6503168460243455, + "learning_rate": 1.6995380420755348e-05, + "loss": 0.3545, + "step": 15961 + }, + { + "epoch": 0.2758155930329002, + "grad_norm": 1.2872533599465987, + "learning_rate": 1.699498048684134e-05, + "loss": 0.4665, + "step": 15962 + }, + { + "epoch": 0.2758328725463091, + "grad_norm": 1.2250208822794044, + "learning_rate": 1.6994580531018514e-05, + "loss": 0.425, + "step": 15963 + }, + { + "epoch": 0.275850152059718, + "grad_norm": 0.8805446093124834, + "learning_rate": 1.6994180553288134e-05, + "loss": 0.4722, + "step": 15964 + }, + { + "epoch": 0.2758674315731269, + "grad_norm": 0.7786023697209153, + "learning_rate": 1.699378055365145e-05, + "loss": 0.5351, + "step": 15965 + }, + { + "epoch": 0.2758847110865358, + "grad_norm": 0.43741564664080407, + "learning_rate": 1.699338053210971e-05, + "loss": 0.637, + "step": 15966 + }, + { + "epoch": 0.2759019905999447, + "grad_norm": 1.0891574485392548, + "learning_rate": 1.6992980488664173e-05, + "loss": 0.5842, + "step": 15967 + }, + { + "epoch": 0.2759192701133536, + "grad_norm": 0.7494784289959725, + "learning_rate": 1.6992580423316092e-05, + "loss": 0.3232, + "step": 15968 + }, + { + "epoch": 0.2759365496267625, + "grad_norm": 0.9938148137452398, + "learning_rate": 1.6992180336066714e-05, + "loss": 0.5834, + "step": 15969 + }, + { + "epoch": 0.2759538291401714, + "grad_norm": 0.9554663187369604, + "learning_rate": 1.6991780226917298e-05, + "loss": 0.3547, + "step": 15970 + }, + { + "epoch": 0.2759711086535803, + "grad_norm": 0.9419373034765611, + "learning_rate": 1.6991380095869093e-05, + "loss": 0.6263, + "step": 15971 + }, + { + "epoch": 0.27598838816698923, + "grad_norm": 1.332833237581187, + "learning_rate": 1.6990979942923358e-05, + "loss": 0.6841, + "step": 15972 + }, + { + "epoch": 0.27600566768039814, + "grad_norm": 1.7491722467820097, + "learning_rate": 1.699057976808134e-05, + "loss": 0.5041, + "step": 15973 + }, + { + "epoch": 0.27602294719380704, + "grad_norm": 1.1418183239074353, + "learning_rate": 1.699017957134429e-05, + "loss": 0.5545, + "step": 15974 + }, + { + "epoch": 0.27604022670721595, + "grad_norm": 1.1094772910294306, + "learning_rate": 1.6989779352713474e-05, + "loss": 0.6491, + "step": 15975 + }, + { + "epoch": 0.2760575062206248, + "grad_norm": 1.0725239614851207, + "learning_rate": 1.6989379112190137e-05, + "loss": 0.5512, + "step": 15976 + }, + { + "epoch": 0.2760747857340337, + "grad_norm": 0.9760172782309433, + "learning_rate": 1.698897884977553e-05, + "loss": 0.4028, + "step": 15977 + }, + { + "epoch": 0.2760920652474426, + "grad_norm": 1.2990882958863426, + "learning_rate": 1.6988578565470913e-05, + "loss": 0.4071, + "step": 15978 + }, + { + "epoch": 0.2761093447608515, + "grad_norm": 1.6459122878129617, + "learning_rate": 1.6988178259277535e-05, + "loss": 0.561, + "step": 15979 + }, + { + "epoch": 0.27612662427426043, + "grad_norm": 1.0345341555149206, + "learning_rate": 1.6987777931196652e-05, + "loss": 0.4448, + "step": 15980 + }, + { + "epoch": 0.27614390378766934, + "grad_norm": 0.8655825653176434, + "learning_rate": 1.6987377581229516e-05, + "loss": 0.5154, + "step": 15981 + }, + { + "epoch": 0.27616118330107825, + "grad_norm": 1.3617963358327116, + "learning_rate": 1.6986977209377385e-05, + "loss": 0.4037, + "step": 15982 + }, + { + "epoch": 0.27617846281448716, + "grad_norm": 0.9115735322281477, + "learning_rate": 1.698657681564151e-05, + "loss": 0.2715, + "step": 15983 + }, + { + "epoch": 0.27619574232789607, + "grad_norm": 1.58790659247638, + "learning_rate": 1.6986176400023145e-05, + "loss": 0.586, + "step": 15984 + }, + { + "epoch": 0.276213021841305, + "grad_norm": 0.7567906698095359, + "learning_rate": 1.6985775962523546e-05, + "loss": 0.3547, + "step": 15985 + }, + { + "epoch": 0.2762303013547138, + "grad_norm": 1.1996945152347869, + "learning_rate": 1.698537550314396e-05, + "loss": 0.3555, + "step": 15986 + }, + { + "epoch": 0.27624758086812273, + "grad_norm": 0.8996788097072819, + "learning_rate": 1.6984975021885653e-05, + "loss": 0.8247, + "step": 15987 + }, + { + "epoch": 0.27626486038153164, + "grad_norm": 0.9305566987588322, + "learning_rate": 1.698457451874987e-05, + "loss": 0.3734, + "step": 15988 + }, + { + "epoch": 0.27628213989494055, + "grad_norm": 1.2824899157680683, + "learning_rate": 1.698417399373787e-05, + "loss": 0.524, + "step": 15989 + }, + { + "epoch": 0.27629941940834946, + "grad_norm": 1.2322910368702114, + "learning_rate": 1.6983773446850908e-05, + "loss": 0.3495, + "step": 15990 + }, + { + "epoch": 0.27631669892175836, + "grad_norm": 0.8401407274939041, + "learning_rate": 1.6983372878090234e-05, + "loss": 0.5687, + "step": 15991 + }, + { + "epoch": 0.27633397843516727, + "grad_norm": 1.0555601760993019, + "learning_rate": 1.698297228745711e-05, + "loss": 0.4115, + "step": 15992 + }, + { + "epoch": 0.2763512579485762, + "grad_norm": 0.7456189463692869, + "learning_rate": 1.6982571674952783e-05, + "loss": 0.3534, + "step": 15993 + }, + { + "epoch": 0.2763685374619851, + "grad_norm": 1.018623247536452, + "learning_rate": 1.698217104057851e-05, + "loss": 0.7323, + "step": 15994 + }, + { + "epoch": 0.276385816975394, + "grad_norm": 0.9894078942899356, + "learning_rate": 1.6981770384335547e-05, + "loss": 0.6332, + "step": 15995 + }, + { + "epoch": 0.2764030964888029, + "grad_norm": 0.8119641099483909, + "learning_rate": 1.698136970622515e-05, + "loss": 0.3457, + "step": 15996 + }, + { + "epoch": 0.27642037600221175, + "grad_norm": 1.3864424702869573, + "learning_rate": 1.698096900624857e-05, + "loss": 0.593, + "step": 15997 + }, + { + "epoch": 0.27643765551562066, + "grad_norm": 0.7200265092977685, + "learning_rate": 1.6980568284407067e-05, + "loss": 0.4458, + "step": 15998 + }, + { + "epoch": 0.27645493502902957, + "grad_norm": 1.398180730513096, + "learning_rate": 1.6980167540701893e-05, + "loss": 0.5159, + "step": 15999 + }, + { + "epoch": 0.2764722145424385, + "grad_norm": 1.3152746615911757, + "learning_rate": 1.6979766775134304e-05, + "loss": 0.5799, + "step": 16000 + }, + { + "epoch": 0.2764894940558474, + "grad_norm": 1.1268863597388328, + "learning_rate": 1.6979365987705553e-05, + "loss": 0.3963, + "step": 16001 + }, + { + "epoch": 0.2765067735692563, + "grad_norm": 1.4531321480330488, + "learning_rate": 1.69789651784169e-05, + "loss": 0.6257, + "step": 16002 + }, + { + "epoch": 0.2765240530826652, + "grad_norm": 0.8581608832869397, + "learning_rate": 1.69785643472696e-05, + "loss": 0.3344, + "step": 16003 + }, + { + "epoch": 0.2765413325960741, + "grad_norm": 0.36145596456328366, + "learning_rate": 1.6978163494264895e-05, + "loss": 0.6015, + "step": 16004 + }, + { + "epoch": 0.276558612109483, + "grad_norm": 1.4597266557693998, + "learning_rate": 1.6977762619404062e-05, + "loss": 0.5934, + "step": 16005 + }, + { + "epoch": 0.2765758916228919, + "grad_norm": 0.7916339810191356, + "learning_rate": 1.6977361722688342e-05, + "loss": 0.4922, + "step": 16006 + }, + { + "epoch": 0.2765931711363008, + "grad_norm": 1.1090744013660716, + "learning_rate": 1.6976960804119e-05, + "loss": 0.5428, + "step": 16007 + }, + { + "epoch": 0.2766104506497097, + "grad_norm": 0.7092565435419851, + "learning_rate": 1.697655986369728e-05, + "loss": 0.525, + "step": 16008 + }, + { + "epoch": 0.2766277301631186, + "grad_norm": 1.8410039354015586, + "learning_rate": 1.697615890142445e-05, + "loss": 0.6182, + "step": 16009 + }, + { + "epoch": 0.2766450096765275, + "grad_norm": 1.476536623369969, + "learning_rate": 1.6975757917301757e-05, + "loss": 0.5791, + "step": 16010 + }, + { + "epoch": 0.2766622891899364, + "grad_norm": 1.2548633111517988, + "learning_rate": 1.697535691133046e-05, + "loss": 0.3813, + "step": 16011 + }, + { + "epoch": 0.2766795687033453, + "grad_norm": 0.8535709600462197, + "learning_rate": 1.6974955883511817e-05, + "loss": 0.3971, + "step": 16012 + }, + { + "epoch": 0.2766968482167542, + "grad_norm": 0.8005621215144372, + "learning_rate": 1.6974554833847076e-05, + "loss": 0.418, + "step": 16013 + }, + { + "epoch": 0.27671412773016313, + "grad_norm": 0.8724785539550151, + "learning_rate": 1.6974153762337505e-05, + "loss": 0.3928, + "step": 16014 + }, + { + "epoch": 0.27673140724357204, + "grad_norm": 1.1596117004383275, + "learning_rate": 1.6973752668984354e-05, + "loss": 0.6574, + "step": 16015 + }, + { + "epoch": 0.27674868675698094, + "grad_norm": 0.9017878322620833, + "learning_rate": 1.697335155378888e-05, + "loss": 0.5456, + "step": 16016 + }, + { + "epoch": 0.27676596627038985, + "grad_norm": 0.7674886756088872, + "learning_rate": 1.6972950416752338e-05, + "loss": 0.6293, + "step": 16017 + }, + { + "epoch": 0.2767832457837987, + "grad_norm": 0.3992624332267506, + "learning_rate": 1.6972549257875984e-05, + "loss": 0.6692, + "step": 16018 + }, + { + "epoch": 0.2768005252972076, + "grad_norm": 0.9990391687342839, + "learning_rate": 1.6972148077161077e-05, + "loss": 0.5036, + "step": 16019 + }, + { + "epoch": 0.2768178048106165, + "grad_norm": 0.8168981310500247, + "learning_rate": 1.6971746874608872e-05, + "loss": 0.6124, + "step": 16020 + }, + { + "epoch": 0.2768350843240254, + "grad_norm": 1.5711552947590917, + "learning_rate": 1.6971345650220628e-05, + "loss": 0.4113, + "step": 16021 + }, + { + "epoch": 0.27685236383743433, + "grad_norm": 1.4571261633196735, + "learning_rate": 1.6970944403997596e-05, + "loss": 0.4098, + "step": 16022 + }, + { + "epoch": 0.27686964335084324, + "grad_norm": 0.8796909784294547, + "learning_rate": 1.6970543135941037e-05, + "loss": 0.4908, + "step": 16023 + }, + { + "epoch": 0.27688692286425215, + "grad_norm": 1.0005613606289325, + "learning_rate": 1.697014184605221e-05, + "loss": 0.5057, + "step": 16024 + }, + { + "epoch": 0.27690420237766106, + "grad_norm": 1.0503855960643622, + "learning_rate": 1.6969740534332365e-05, + "loss": 0.3692, + "step": 16025 + }, + { + "epoch": 0.27692148189106996, + "grad_norm": 0.6807399307836125, + "learning_rate": 1.6969339200782767e-05, + "loss": 0.583, + "step": 16026 + }, + { + "epoch": 0.2769387614044789, + "grad_norm": 1.0609676406200186, + "learning_rate": 1.6968937845404666e-05, + "loss": 0.6243, + "step": 16027 + }, + { + "epoch": 0.2769560409178877, + "grad_norm": 0.8204924798986916, + "learning_rate": 1.6968536468199325e-05, + "loss": 0.3639, + "step": 16028 + }, + { + "epoch": 0.27697332043129663, + "grad_norm": 0.8005998026941189, + "learning_rate": 1.6968135069168e-05, + "loss": 0.3735, + "step": 16029 + }, + { + "epoch": 0.27699059994470554, + "grad_norm": 1.0467119192069623, + "learning_rate": 1.696773364831194e-05, + "loss": 0.5265, + "step": 16030 + }, + { + "epoch": 0.27700787945811445, + "grad_norm": 0.7843076642141286, + "learning_rate": 1.6967332205632413e-05, + "loss": 0.3693, + "step": 16031 + }, + { + "epoch": 0.27702515897152336, + "grad_norm": 0.7701312860640559, + "learning_rate": 1.6966930741130668e-05, + "loss": 0.5543, + "step": 16032 + }, + { + "epoch": 0.27704243848493226, + "grad_norm": 1.1849606698646729, + "learning_rate": 1.6966529254807972e-05, + "loss": 0.4773, + "step": 16033 + }, + { + "epoch": 0.27705971799834117, + "grad_norm": 1.1244819424700028, + "learning_rate": 1.696612774666557e-05, + "loss": 0.7059, + "step": 16034 + }, + { + "epoch": 0.2770769975117501, + "grad_norm": 1.537010048072724, + "learning_rate": 1.6965726216704733e-05, + "loss": 0.5291, + "step": 16035 + }, + { + "epoch": 0.277094277025159, + "grad_norm": 0.9551923337837404, + "learning_rate": 1.6965324664926712e-05, + "loss": 0.6017, + "step": 16036 + }, + { + "epoch": 0.2771115565385679, + "grad_norm": 0.9332134678152791, + "learning_rate": 1.696492309133276e-05, + "loss": 0.5231, + "step": 16037 + }, + { + "epoch": 0.2771288360519768, + "grad_norm": 1.0300810375114355, + "learning_rate": 1.6964521495924144e-05, + "loss": 0.4358, + "step": 16038 + }, + { + "epoch": 0.27714611556538565, + "grad_norm": 1.3075778256046307, + "learning_rate": 1.6964119878702114e-05, + "loss": 0.6914, + "step": 16039 + }, + { + "epoch": 0.27716339507879456, + "grad_norm": 0.6332744272871675, + "learning_rate": 1.6963718239667933e-05, + "loss": 0.6087, + "step": 16040 + }, + { + "epoch": 0.27718067459220347, + "grad_norm": 0.90253227460236, + "learning_rate": 1.6963316578822856e-05, + "loss": 0.5565, + "step": 16041 + }, + { + "epoch": 0.2771979541056124, + "grad_norm": 1.2224587395574267, + "learning_rate": 1.6962914896168144e-05, + "loss": 0.6782, + "step": 16042 + }, + { + "epoch": 0.2772152336190213, + "grad_norm": 1.2150328775499792, + "learning_rate": 1.696251319170505e-05, + "loss": 0.4661, + "step": 16043 + }, + { + "epoch": 0.2772325131324302, + "grad_norm": 1.4761159895577751, + "learning_rate": 1.696211146543484e-05, + "loss": 0.5361, + "step": 16044 + }, + { + "epoch": 0.2772497926458391, + "grad_norm": 0.43452899979147547, + "learning_rate": 1.6961709717358768e-05, + "loss": 0.6741, + "step": 16045 + }, + { + "epoch": 0.277267072159248, + "grad_norm": 1.5229117580154152, + "learning_rate": 1.6961307947478087e-05, + "loss": 0.5823, + "step": 16046 + }, + { + "epoch": 0.2772843516726569, + "grad_norm": 0.8573474649836844, + "learning_rate": 1.6960906155794067e-05, + "loss": 0.4478, + "step": 16047 + }, + { + "epoch": 0.2773016311860658, + "grad_norm": 1.020244452163036, + "learning_rate": 1.6960504342307954e-05, + "loss": 0.554, + "step": 16048 + }, + { + "epoch": 0.27731891069947473, + "grad_norm": 0.740348832741542, + "learning_rate": 1.6960102507021016e-05, + "loss": 0.5211, + "step": 16049 + }, + { + "epoch": 0.2773361902128836, + "grad_norm": 1.1930025899371304, + "learning_rate": 1.6959700649934506e-05, + "loss": 0.754, + "step": 16050 + }, + { + "epoch": 0.2773534697262925, + "grad_norm": 0.9364427673598599, + "learning_rate": 1.6959298771049687e-05, + "loss": 0.5508, + "step": 16051 + }, + { + "epoch": 0.2773707492397014, + "grad_norm": 1.4860049329412286, + "learning_rate": 1.6958896870367814e-05, + "loss": 0.5769, + "step": 16052 + }, + { + "epoch": 0.2773880287531103, + "grad_norm": 0.7915259387195165, + "learning_rate": 1.6958494947890147e-05, + "loss": 0.4356, + "step": 16053 + }, + { + "epoch": 0.2774053082665192, + "grad_norm": 0.7697944710676768, + "learning_rate": 1.6958093003617942e-05, + "loss": 0.6255, + "step": 16054 + }, + { + "epoch": 0.2774225877799281, + "grad_norm": 1.2215503716826608, + "learning_rate": 1.6957691037552468e-05, + "loss": 0.3603, + "step": 16055 + }, + { + "epoch": 0.27743986729333703, + "grad_norm": 1.0104651111786724, + "learning_rate": 1.695728904969497e-05, + "loss": 0.4124, + "step": 16056 + }, + { + "epoch": 0.27745714680674594, + "grad_norm": 1.0138736723486226, + "learning_rate": 1.695688704004672e-05, + "loss": 0.4969, + "step": 16057 + }, + { + "epoch": 0.27747442632015484, + "grad_norm": 4.156852168950099, + "learning_rate": 1.6956485008608967e-05, + "loss": 0.4468, + "step": 16058 + }, + { + "epoch": 0.27749170583356375, + "grad_norm": 0.8656594902581974, + "learning_rate": 1.6956082955382975e-05, + "loss": 0.419, + "step": 16059 + }, + { + "epoch": 0.2775089853469726, + "grad_norm": 0.9977712192101448, + "learning_rate": 1.6955680880370002e-05, + "loss": 0.3984, + "step": 16060 + }, + { + "epoch": 0.2775262648603815, + "grad_norm": 1.4790046999237219, + "learning_rate": 1.695527878357131e-05, + "loss": 0.6605, + "step": 16061 + }, + { + "epoch": 0.2775435443737904, + "grad_norm": 1.0472705429486466, + "learning_rate": 1.6954876664988152e-05, + "loss": 0.5558, + "step": 16062 + }, + { + "epoch": 0.2775608238871993, + "grad_norm": 1.1641936270123954, + "learning_rate": 1.6954474524621796e-05, + "loss": 0.3853, + "step": 16063 + }, + { + "epoch": 0.27757810340060823, + "grad_norm": 0.7989607980929855, + "learning_rate": 1.6954072362473497e-05, + "loss": 0.7532, + "step": 16064 + }, + { + "epoch": 0.27759538291401714, + "grad_norm": 0.6339602078356132, + "learning_rate": 1.695367017854451e-05, + "loss": 0.3581, + "step": 16065 + }, + { + "epoch": 0.27761266242742605, + "grad_norm": 1.5542152846197042, + "learning_rate": 1.6953267972836106e-05, + "loss": 0.5417, + "step": 16066 + }, + { + "epoch": 0.27762994194083496, + "grad_norm": 1.445677210345405, + "learning_rate": 1.6952865745349538e-05, + "loss": 0.4869, + "step": 16067 + }, + { + "epoch": 0.27764722145424386, + "grad_norm": 0.8915859473630737, + "learning_rate": 1.695246349608606e-05, + "loss": 0.5287, + "step": 16068 + }, + { + "epoch": 0.27766450096765277, + "grad_norm": 1.1239024470288337, + "learning_rate": 1.6952061225046944e-05, + "loss": 0.4479, + "step": 16069 + }, + { + "epoch": 0.2776817804810617, + "grad_norm": 0.9876368957627488, + "learning_rate": 1.695165893223344e-05, + "loss": 0.4618, + "step": 16070 + }, + { + "epoch": 0.27769905999447053, + "grad_norm": 1.3089564182497608, + "learning_rate": 1.6951256617646814e-05, + "loss": 0.4863, + "step": 16071 + }, + { + "epoch": 0.27771633950787944, + "grad_norm": 0.975019002766589, + "learning_rate": 1.6950854281288324e-05, + "loss": 0.4743, + "step": 16072 + }, + { + "epoch": 0.27773361902128835, + "grad_norm": 0.9898151920414819, + "learning_rate": 1.695045192315923e-05, + "loss": 0.6153, + "step": 16073 + }, + { + "epoch": 0.27775089853469725, + "grad_norm": 1.3005359567850543, + "learning_rate": 1.6950049543260792e-05, + "loss": 0.4812, + "step": 16074 + }, + { + "epoch": 0.27776817804810616, + "grad_norm": 0.816382846711414, + "learning_rate": 1.694964714159427e-05, + "loss": 0.5372, + "step": 16075 + }, + { + "epoch": 0.27778545756151507, + "grad_norm": 0.8638520588557972, + "learning_rate": 1.6949244718160924e-05, + "loss": 0.3291, + "step": 16076 + }, + { + "epoch": 0.277802737074924, + "grad_norm": 1.205313411518972, + "learning_rate": 1.6948842272962016e-05, + "loss": 0.6843, + "step": 16077 + }, + { + "epoch": 0.2778200165883329, + "grad_norm": 1.2259367867300988, + "learning_rate": 1.694843980599881e-05, + "loss": 0.4241, + "step": 16078 + }, + { + "epoch": 0.2778372961017418, + "grad_norm": 1.032428310940565, + "learning_rate": 1.694803731727256e-05, + "loss": 0.4823, + "step": 16079 + }, + { + "epoch": 0.2778545756151507, + "grad_norm": 1.000415324218178, + "learning_rate": 1.6947634806784527e-05, + "loss": 0.5572, + "step": 16080 + }, + { + "epoch": 0.27787185512855955, + "grad_norm": 1.1252406500730707, + "learning_rate": 1.6947232274535975e-05, + "loss": 0.3943, + "step": 16081 + }, + { + "epoch": 0.27788913464196846, + "grad_norm": 1.7701318007927174, + "learning_rate": 1.6946829720528164e-05, + "loss": 0.5424, + "step": 16082 + }, + { + "epoch": 0.27790641415537737, + "grad_norm": 1.1052881541423247, + "learning_rate": 1.6946427144762354e-05, + "loss": 0.5918, + "step": 16083 + }, + { + "epoch": 0.2779236936687863, + "grad_norm": 0.817609281900633, + "learning_rate": 1.6946024547239807e-05, + "loss": 0.618, + "step": 16084 + }, + { + "epoch": 0.2779409731821952, + "grad_norm": 0.6518361980047377, + "learning_rate": 1.6945621927961782e-05, + "loss": 0.51, + "step": 16085 + }, + { + "epoch": 0.2779582526956041, + "grad_norm": 0.6874277488400581, + "learning_rate": 1.694521928692954e-05, + "loss": 0.3806, + "step": 16086 + }, + { + "epoch": 0.277975532209013, + "grad_norm": 1.5008111695909796, + "learning_rate": 1.6944816624144346e-05, + "loss": 0.5575, + "step": 16087 + }, + { + "epoch": 0.2779928117224219, + "grad_norm": 1.272480653505792, + "learning_rate": 1.694441393960746e-05, + "loss": 0.7304, + "step": 16088 + }, + { + "epoch": 0.2780100912358308, + "grad_norm": 1.2833316034657658, + "learning_rate": 1.694401123332014e-05, + "loss": 0.4785, + "step": 16089 + }, + { + "epoch": 0.2780273707492397, + "grad_norm": 1.1542294238327058, + "learning_rate": 1.6943608505283647e-05, + "loss": 0.4181, + "step": 16090 + }, + { + "epoch": 0.27804465026264863, + "grad_norm": 2.115413267437555, + "learning_rate": 1.6943205755499248e-05, + "loss": 0.5695, + "step": 16091 + }, + { + "epoch": 0.2780619297760575, + "grad_norm": 0.5190955947881014, + "learning_rate": 1.69428029839682e-05, + "loss": 0.3517, + "step": 16092 + }, + { + "epoch": 0.2780792092894664, + "grad_norm": 0.792693239484432, + "learning_rate": 1.6942400190691764e-05, + "loss": 0.4891, + "step": 16093 + }, + { + "epoch": 0.2780964888028753, + "grad_norm": 0.8510309317656424, + "learning_rate": 1.69419973756712e-05, + "loss": 0.5902, + "step": 16094 + }, + { + "epoch": 0.2781137683162842, + "grad_norm": 0.3857226273220678, + "learning_rate": 1.694159453890778e-05, + "loss": 0.671, + "step": 16095 + }, + { + "epoch": 0.2781310478296931, + "grad_norm": 1.0176968491514167, + "learning_rate": 1.6941191680402754e-05, + "loss": 0.5364, + "step": 16096 + }, + { + "epoch": 0.278148327343102, + "grad_norm": 1.0661264390757235, + "learning_rate": 1.6940788800157388e-05, + "loss": 0.548, + "step": 16097 + }, + { + "epoch": 0.2781656068565109, + "grad_norm": 0.6820862186593943, + "learning_rate": 1.6940385898172948e-05, + "loss": 0.4652, + "step": 16098 + }, + { + "epoch": 0.27818288636991984, + "grad_norm": 1.1276115106056595, + "learning_rate": 1.6939982974450685e-05, + "loss": 0.8133, + "step": 16099 + }, + { + "epoch": 0.27820016588332874, + "grad_norm": 1.1139666992549122, + "learning_rate": 1.6939580028991873e-05, + "loss": 0.4767, + "step": 16100 + }, + { + "epoch": 0.27821744539673765, + "grad_norm": 0.6200194952002144, + "learning_rate": 1.6939177061797768e-05, + "loss": 0.465, + "step": 16101 + }, + { + "epoch": 0.2782347249101465, + "grad_norm": 1.5792986175249932, + "learning_rate": 1.693877407286963e-05, + "loss": 0.6136, + "step": 16102 + }, + { + "epoch": 0.2782520044235554, + "grad_norm": 1.06459697453303, + "learning_rate": 1.693837106220873e-05, + "loss": 0.6377, + "step": 16103 + }, + { + "epoch": 0.2782692839369643, + "grad_norm": 1.2334439692763175, + "learning_rate": 1.6937968029816317e-05, + "loss": 0.5282, + "step": 16104 + }, + { + "epoch": 0.2782865634503732, + "grad_norm": 0.5148470603832297, + "learning_rate": 1.6937564975693667e-05, + "loss": 0.6715, + "step": 16105 + }, + { + "epoch": 0.27830384296378213, + "grad_norm": 0.7671670847758638, + "learning_rate": 1.6937161899842034e-05, + "loss": 0.5424, + "step": 16106 + }, + { + "epoch": 0.27832112247719104, + "grad_norm": 1.1174496276716688, + "learning_rate": 1.6936758802262684e-05, + "loss": 0.4855, + "step": 16107 + }, + { + "epoch": 0.27833840199059995, + "grad_norm": 1.2717484986197478, + "learning_rate": 1.6936355682956878e-05, + "loss": 0.421, + "step": 16108 + }, + { + "epoch": 0.27835568150400886, + "grad_norm": 0.7887087671683277, + "learning_rate": 1.693595254192588e-05, + "loss": 0.4881, + "step": 16109 + }, + { + "epoch": 0.27837296101741776, + "grad_norm": 0.7249574625802004, + "learning_rate": 1.693554937917095e-05, + "loss": 0.3705, + "step": 16110 + }, + { + "epoch": 0.27839024053082667, + "grad_norm": 0.7588865698879064, + "learning_rate": 1.693514619469335e-05, + "loss": 0.4657, + "step": 16111 + }, + { + "epoch": 0.2784075200442356, + "grad_norm": 0.3579719185813975, + "learning_rate": 1.6934742988494348e-05, + "loss": 0.5021, + "step": 16112 + }, + { + "epoch": 0.27842479955764443, + "grad_norm": 1.2498783506908349, + "learning_rate": 1.69343397605752e-05, + "loss": 0.4581, + "step": 16113 + }, + { + "epoch": 0.27844207907105334, + "grad_norm": 1.3944292795408215, + "learning_rate": 1.6933936510937178e-05, + "loss": 0.3706, + "step": 16114 + }, + { + "epoch": 0.27845935858446225, + "grad_norm": 1.2865116658988394, + "learning_rate": 1.6933533239581538e-05, + "loss": 0.5838, + "step": 16115 + }, + { + "epoch": 0.27847663809787115, + "grad_norm": 0.515093747200972, + "learning_rate": 1.6933129946509545e-05, + "loss": 0.6879, + "step": 16116 + }, + { + "epoch": 0.27849391761128006, + "grad_norm": 1.0725613619057413, + "learning_rate": 1.693272663172246e-05, + "loss": 0.5677, + "step": 16117 + }, + { + "epoch": 0.27851119712468897, + "grad_norm": 0.6791726278346739, + "learning_rate": 1.693232329522155e-05, + "loss": 0.2916, + "step": 16118 + }, + { + "epoch": 0.2785284766380979, + "grad_norm": 0.7744243718741038, + "learning_rate": 1.6931919937008082e-05, + "loss": 0.6529, + "step": 16119 + }, + { + "epoch": 0.2785457561515068, + "grad_norm": 1.090050009597204, + "learning_rate": 1.6931516557083306e-05, + "loss": 0.4591, + "step": 16120 + }, + { + "epoch": 0.2785630356649157, + "grad_norm": 0.8310152704934356, + "learning_rate": 1.6931113155448496e-05, + "loss": 0.4706, + "step": 16121 + }, + { + "epoch": 0.2785803151783246, + "grad_norm": 0.37486359664264896, + "learning_rate": 1.6930709732104917e-05, + "loss": 0.4897, + "step": 16122 + }, + { + "epoch": 0.2785975946917335, + "grad_norm": 0.8197905163015746, + "learning_rate": 1.693030628705382e-05, + "loss": 0.3849, + "step": 16123 + }, + { + "epoch": 0.27861487420514236, + "grad_norm": 1.3011046204547994, + "learning_rate": 1.6929902820296482e-05, + "loss": 0.721, + "step": 16124 + }, + { + "epoch": 0.27863215371855127, + "grad_norm": 1.2167409276060983, + "learning_rate": 1.692949933183416e-05, + "loss": 0.3152, + "step": 16125 + }, + { + "epoch": 0.2786494332319602, + "grad_norm": 0.9257169571381209, + "learning_rate": 1.6929095821668122e-05, + "loss": 0.438, + "step": 16126 + }, + { + "epoch": 0.2786667127453691, + "grad_norm": 1.45101953763966, + "learning_rate": 1.692869228979963e-05, + "loss": 0.4038, + "step": 16127 + }, + { + "epoch": 0.278683992258778, + "grad_norm": 0.9084953771457978, + "learning_rate": 1.6928288736229944e-05, + "loss": 0.5388, + "step": 16128 + }, + { + "epoch": 0.2787012717721869, + "grad_norm": 0.965090001850519, + "learning_rate": 1.6927885160960333e-05, + "loss": 0.3999, + "step": 16129 + }, + { + "epoch": 0.2787185512855958, + "grad_norm": 1.1223643424606715, + "learning_rate": 1.6927481563992058e-05, + "loss": 0.4066, + "step": 16130 + }, + { + "epoch": 0.2787358307990047, + "grad_norm": 1.009884565479002, + "learning_rate": 1.6927077945326384e-05, + "loss": 0.4747, + "step": 16131 + }, + { + "epoch": 0.2787531103124136, + "grad_norm": 1.5845982533996852, + "learning_rate": 1.6926674304964577e-05, + "loss": 0.5832, + "step": 16132 + }, + { + "epoch": 0.27877038982582253, + "grad_norm": 1.029761286924572, + "learning_rate": 1.69262706429079e-05, + "loss": 0.4205, + "step": 16133 + }, + { + "epoch": 0.2787876693392314, + "grad_norm": 0.9043607076259694, + "learning_rate": 1.6925866959157616e-05, + "loss": 0.6364, + "step": 16134 + }, + { + "epoch": 0.2788049488526403, + "grad_norm": 0.7217051562932979, + "learning_rate": 1.692546325371499e-05, + "loss": 0.5476, + "step": 16135 + }, + { + "epoch": 0.2788222283660492, + "grad_norm": 0.9102398702650019, + "learning_rate": 1.6925059526581287e-05, + "loss": 0.4049, + "step": 16136 + }, + { + "epoch": 0.2788395078794581, + "grad_norm": 1.0979295410957646, + "learning_rate": 1.692465577775777e-05, + "loss": 0.4211, + "step": 16137 + }, + { + "epoch": 0.278856787392867, + "grad_norm": 0.664474793214843, + "learning_rate": 1.6924252007245705e-05, + "loss": 0.3759, + "step": 16138 + }, + { + "epoch": 0.2788740669062759, + "grad_norm": 0.7453973463759045, + "learning_rate": 1.6923848215046358e-05, + "loss": 0.6208, + "step": 16139 + }, + { + "epoch": 0.2788913464196848, + "grad_norm": 1.0131764056378094, + "learning_rate": 1.6923444401160992e-05, + "loss": 0.4542, + "step": 16140 + }, + { + "epoch": 0.27890862593309373, + "grad_norm": 0.9911888851686131, + "learning_rate": 1.6923040565590875e-05, + "loss": 0.5463, + "step": 16141 + }, + { + "epoch": 0.27892590544650264, + "grad_norm": 1.2084480460737406, + "learning_rate": 1.6922636708337263e-05, + "loss": 0.6824, + "step": 16142 + }, + { + "epoch": 0.27894318495991155, + "grad_norm": 1.5871997992375593, + "learning_rate": 1.6922232829401427e-05, + "loss": 0.7032, + "step": 16143 + }, + { + "epoch": 0.27896046447332046, + "grad_norm": 0.7669697771919932, + "learning_rate": 1.6921828928784635e-05, + "loss": 0.4862, + "step": 16144 + }, + { + "epoch": 0.2789777439867293, + "grad_norm": 0.9494470885174916, + "learning_rate": 1.692142500648815e-05, + "loss": 0.7413, + "step": 16145 + }, + { + "epoch": 0.2789950235001382, + "grad_norm": 1.3006864849546913, + "learning_rate": 1.692102106251323e-05, + "loss": 0.4173, + "step": 16146 + }, + { + "epoch": 0.2790123030135471, + "grad_norm": 1.1425028018070436, + "learning_rate": 1.692061709686115e-05, + "loss": 0.6517, + "step": 16147 + }, + { + "epoch": 0.27902958252695603, + "grad_norm": 0.7722767236285237, + "learning_rate": 1.692021310953317e-05, + "loss": 0.3438, + "step": 16148 + }, + { + "epoch": 0.27904686204036494, + "grad_norm": 0.9285021701741702, + "learning_rate": 1.691980910053056e-05, + "loss": 0.5702, + "step": 16149 + }, + { + "epoch": 0.27906414155377385, + "grad_norm": 0.7993840726930594, + "learning_rate": 1.691940506985458e-05, + "loss": 0.7735, + "step": 16150 + }, + { + "epoch": 0.27908142106718276, + "grad_norm": 0.722118142863905, + "learning_rate": 1.69190010175065e-05, + "loss": 0.3805, + "step": 16151 + }, + { + "epoch": 0.27909870058059166, + "grad_norm": 1.0261692524017014, + "learning_rate": 1.6918596943487577e-05, + "loss": 0.453, + "step": 16152 + }, + { + "epoch": 0.27911598009400057, + "grad_norm": 0.7056697172096158, + "learning_rate": 1.6918192847799087e-05, + "loss": 0.5771, + "step": 16153 + }, + { + "epoch": 0.2791332596074095, + "grad_norm": 0.8968690473245674, + "learning_rate": 1.6917788730442287e-05, + "loss": 0.3964, + "step": 16154 + }, + { + "epoch": 0.27915053912081833, + "grad_norm": 0.6846165283129749, + "learning_rate": 1.691738459141845e-05, + "loss": 0.6171, + "step": 16155 + }, + { + "epoch": 0.27916781863422724, + "grad_norm": 0.7184241809393844, + "learning_rate": 1.691698043072884e-05, + "loss": 0.3591, + "step": 16156 + }, + { + "epoch": 0.27918509814763615, + "grad_norm": 1.2818567425741931, + "learning_rate": 1.691657624837472e-05, + "loss": 0.4781, + "step": 16157 + }, + { + "epoch": 0.27920237766104505, + "grad_norm": 0.9670280964834341, + "learning_rate": 1.691617204435736e-05, + "loss": 0.5959, + "step": 16158 + }, + { + "epoch": 0.27921965717445396, + "grad_norm": 1.37974695750549, + "learning_rate": 1.691576781867802e-05, + "loss": 0.4531, + "step": 16159 + }, + { + "epoch": 0.27923693668786287, + "grad_norm": 0.8364206694186123, + "learning_rate": 1.691536357133797e-05, + "loss": 0.391, + "step": 16160 + }, + { + "epoch": 0.2792542162012718, + "grad_norm": 0.892709388944977, + "learning_rate": 1.6914959302338478e-05, + "loss": 0.6881, + "step": 16161 + }, + { + "epoch": 0.2792714957146807, + "grad_norm": 1.6677604714078247, + "learning_rate": 1.6914555011680807e-05, + "loss": 0.5153, + "step": 16162 + }, + { + "epoch": 0.2792887752280896, + "grad_norm": 2.070983130341557, + "learning_rate": 1.6914150699366222e-05, + "loss": 0.4146, + "step": 16163 + }, + { + "epoch": 0.2793060547414985, + "grad_norm": 0.4104505628375095, + "learning_rate": 1.6913746365395996e-05, + "loss": 0.5611, + "step": 16164 + }, + { + "epoch": 0.2793233342549074, + "grad_norm": 1.0912830135490381, + "learning_rate": 1.6913342009771387e-05, + "loss": 0.5443, + "step": 16165 + }, + { + "epoch": 0.27934061376831626, + "grad_norm": 1.5214061059227273, + "learning_rate": 1.6912937632493664e-05, + "loss": 0.4759, + "step": 16166 + }, + { + "epoch": 0.27935789328172517, + "grad_norm": 0.8152856068795721, + "learning_rate": 1.69125332335641e-05, + "loss": 0.4338, + "step": 16167 + }, + { + "epoch": 0.2793751727951341, + "grad_norm": 1.1870952263317802, + "learning_rate": 1.6912128812983954e-05, + "loss": 0.6049, + "step": 16168 + }, + { + "epoch": 0.279392452308543, + "grad_norm": 0.6942117203944111, + "learning_rate": 1.6911724370754495e-05, + "loss": 0.381, + "step": 16169 + }, + { + "epoch": 0.2794097318219519, + "grad_norm": 1.078645600291893, + "learning_rate": 1.691131990687699e-05, + "loss": 0.559, + "step": 16170 + }, + { + "epoch": 0.2794270113353608, + "grad_norm": 0.9377302011251146, + "learning_rate": 1.6910915421352706e-05, + "loss": 0.6371, + "step": 16171 + }, + { + "epoch": 0.2794442908487697, + "grad_norm": 1.5990107887919909, + "learning_rate": 1.691051091418291e-05, + "loss": 0.607, + "step": 16172 + }, + { + "epoch": 0.2794615703621786, + "grad_norm": 0.5470969133872756, + "learning_rate": 1.691010638536887e-05, + "loss": 0.4293, + "step": 16173 + }, + { + "epoch": 0.2794788498755875, + "grad_norm": 0.9816950032597878, + "learning_rate": 1.6909701834911853e-05, + "loss": 0.4315, + "step": 16174 + }, + { + "epoch": 0.27949612938899643, + "grad_norm": 1.1449808051281118, + "learning_rate": 1.690929726281312e-05, + "loss": 0.6691, + "step": 16175 + }, + { + "epoch": 0.27951340890240534, + "grad_norm": 0.9263546582373064, + "learning_rate": 1.6908892669073946e-05, + "loss": 0.4548, + "step": 16176 + }, + { + "epoch": 0.2795306884158142, + "grad_norm": 1.21354575733892, + "learning_rate": 1.6908488053695593e-05, + "loss": 0.4507, + "step": 16177 + }, + { + "epoch": 0.2795479679292231, + "grad_norm": 1.0168657366769749, + "learning_rate": 1.6908083416679334e-05, + "loss": 0.4401, + "step": 16178 + }, + { + "epoch": 0.279565247442632, + "grad_norm": 1.121430662485246, + "learning_rate": 1.6907678758026428e-05, + "loss": 0.486, + "step": 16179 + }, + { + "epoch": 0.2795825269560409, + "grad_norm": 1.1611216327639857, + "learning_rate": 1.690727407773815e-05, + "loss": 0.5252, + "step": 16180 + }, + { + "epoch": 0.2795998064694498, + "grad_norm": 1.1435249228671132, + "learning_rate": 1.6906869375815763e-05, + "loss": 0.5343, + "step": 16181 + }, + { + "epoch": 0.2796170859828587, + "grad_norm": 1.266451142753878, + "learning_rate": 1.690646465226054e-05, + "loss": 0.7104, + "step": 16182 + }, + { + "epoch": 0.27963436549626763, + "grad_norm": 1.2757416946279436, + "learning_rate": 1.690605990707374e-05, + "loss": 0.6942, + "step": 16183 + }, + { + "epoch": 0.27965164500967654, + "grad_norm": 0.6458272827147872, + "learning_rate": 1.690565514025664e-05, + "loss": 0.7705, + "step": 16184 + }, + { + "epoch": 0.27966892452308545, + "grad_norm": 1.538843069808386, + "learning_rate": 1.69052503518105e-05, + "loss": 0.5174, + "step": 16185 + }, + { + "epoch": 0.27968620403649436, + "grad_norm": 1.0493871751428228, + "learning_rate": 1.6904845541736592e-05, + "loss": 0.7328, + "step": 16186 + }, + { + "epoch": 0.2797034835499032, + "grad_norm": 0.9916554096218632, + "learning_rate": 1.690444071003618e-05, + "loss": 0.6134, + "step": 16187 + }, + { + "epoch": 0.2797207630633121, + "grad_norm": 0.946326925982613, + "learning_rate": 1.6904035856710538e-05, + "loss": 0.5439, + "step": 16188 + }, + { + "epoch": 0.279738042576721, + "grad_norm": 1.3967365129146505, + "learning_rate": 1.690363098176093e-05, + "loss": 0.6611, + "step": 16189 + }, + { + "epoch": 0.27975532209012993, + "grad_norm": 0.5982069817465614, + "learning_rate": 1.6903226085188626e-05, + "loss": 0.3465, + "step": 16190 + }, + { + "epoch": 0.27977260160353884, + "grad_norm": 0.9914186839192441, + "learning_rate": 1.6902821166994895e-05, + "loss": 0.4774, + "step": 16191 + }, + { + "epoch": 0.27978988111694775, + "grad_norm": 0.7829669256868181, + "learning_rate": 1.6902416227181002e-05, + "loss": 0.3857, + "step": 16192 + }, + { + "epoch": 0.27980716063035665, + "grad_norm": 1.2922201047986437, + "learning_rate": 1.690201126574822e-05, + "loss": 0.639, + "step": 16193 + }, + { + "epoch": 0.27982444014376556, + "grad_norm": 0.545324245572541, + "learning_rate": 1.690160628269781e-05, + "loss": 0.3548, + "step": 16194 + }, + { + "epoch": 0.27984171965717447, + "grad_norm": 0.7029036077947309, + "learning_rate": 1.6901201278031046e-05, + "loss": 0.569, + "step": 16195 + }, + { + "epoch": 0.2798589991705834, + "grad_norm": 0.6933080173357352, + "learning_rate": 1.6900796251749195e-05, + "loss": 0.3996, + "step": 16196 + }, + { + "epoch": 0.2798762786839923, + "grad_norm": 1.1001318065179106, + "learning_rate": 1.6900391203853522e-05, + "loss": 0.4082, + "step": 16197 + }, + { + "epoch": 0.27989355819740114, + "grad_norm": 1.2033960501451422, + "learning_rate": 1.6899986134345306e-05, + "loss": 0.5033, + "step": 16198 + }, + { + "epoch": 0.27991083771081005, + "grad_norm": 0.8108857230610474, + "learning_rate": 1.6899581043225804e-05, + "loss": 0.6127, + "step": 16199 + }, + { + "epoch": 0.27992811722421895, + "grad_norm": 0.849986251300564, + "learning_rate": 1.689917593049629e-05, + "loss": 0.5228, + "step": 16200 + }, + { + "epoch": 0.27994539673762786, + "grad_norm": 1.7219763445348293, + "learning_rate": 1.6898770796158034e-05, + "loss": 0.5094, + "step": 16201 + }, + { + "epoch": 0.27996267625103677, + "grad_norm": 0.7861090246428972, + "learning_rate": 1.6898365640212308e-05, + "loss": 0.4164, + "step": 16202 + }, + { + "epoch": 0.2799799557644457, + "grad_norm": 0.8255606978291746, + "learning_rate": 1.689796046266037e-05, + "loss": 0.7134, + "step": 16203 + }, + { + "epoch": 0.2799972352778546, + "grad_norm": 0.8677016351114725, + "learning_rate": 1.68975552635035e-05, + "loss": 0.4088, + "step": 16204 + }, + { + "epoch": 0.2800145147912635, + "grad_norm": 0.7630526647811335, + "learning_rate": 1.6897150042742958e-05, + "loss": 0.6058, + "step": 16205 + }, + { + "epoch": 0.2800317943046724, + "grad_norm": 0.8764201131062497, + "learning_rate": 1.689674480038002e-05, + "loss": 0.5023, + "step": 16206 + }, + { + "epoch": 0.2800490738180813, + "grad_norm": 0.38349979593498346, + "learning_rate": 1.6896339536415953e-05, + "loss": 0.5186, + "step": 16207 + }, + { + "epoch": 0.28006635333149016, + "grad_norm": 0.9674862328594013, + "learning_rate": 1.6895934250852028e-05, + "loss": 0.4749, + "step": 16208 + }, + { + "epoch": 0.28008363284489907, + "grad_norm": 1.026696387282813, + "learning_rate": 1.6895528943689514e-05, + "loss": 0.6478, + "step": 16209 + }, + { + "epoch": 0.280100912358308, + "grad_norm": 1.153699850247785, + "learning_rate": 1.6895123614929677e-05, + "loss": 0.5549, + "step": 16210 + }, + { + "epoch": 0.2801181918717169, + "grad_norm": 1.2926038054416906, + "learning_rate": 1.689471826457379e-05, + "loss": 0.3657, + "step": 16211 + }, + { + "epoch": 0.2801354713851258, + "grad_norm": 1.6715487550745762, + "learning_rate": 1.6894312892623118e-05, + "loss": 0.3985, + "step": 16212 + }, + { + "epoch": 0.2801527508985347, + "grad_norm": 0.7680913186939202, + "learning_rate": 1.6893907499078938e-05, + "loss": 0.7738, + "step": 16213 + }, + { + "epoch": 0.2801700304119436, + "grad_norm": 1.176312425970188, + "learning_rate": 1.6893502083942516e-05, + "loss": 0.2478, + "step": 16214 + }, + { + "epoch": 0.2801873099253525, + "grad_norm": 0.8269662262270477, + "learning_rate": 1.689309664721512e-05, + "loss": 0.5486, + "step": 16215 + }, + { + "epoch": 0.2802045894387614, + "grad_norm": 1.3311338871201155, + "learning_rate": 1.689269118889802e-05, + "loss": 0.4804, + "step": 16216 + }, + { + "epoch": 0.2802218689521703, + "grad_norm": 1.2548073448344135, + "learning_rate": 1.689228570899249e-05, + "loss": 0.5315, + "step": 16217 + }, + { + "epoch": 0.28023914846557924, + "grad_norm": 1.151729804363667, + "learning_rate": 1.6891880207499794e-05, + "loss": 0.4914, + "step": 16218 + }, + { + "epoch": 0.2802564279789881, + "grad_norm": 0.9179502822375903, + "learning_rate": 1.689147468442121e-05, + "loss": 0.3582, + "step": 16219 + }, + { + "epoch": 0.280273707492397, + "grad_norm": 1.197096417018342, + "learning_rate": 1.6891069139757997e-05, + "loss": 0.4816, + "step": 16220 + }, + { + "epoch": 0.2802909870058059, + "grad_norm": 1.108007083199302, + "learning_rate": 1.689066357351144e-05, + "loss": 0.4179, + "step": 16221 + }, + { + "epoch": 0.2803082665192148, + "grad_norm": 1.1585358137637627, + "learning_rate": 1.6890257985682797e-05, + "loss": 0.7008, + "step": 16222 + }, + { + "epoch": 0.2803255460326237, + "grad_norm": 0.7677326835924837, + "learning_rate": 1.688985237627334e-05, + "loss": 0.4611, + "step": 16223 + }, + { + "epoch": 0.2803428255460326, + "grad_norm": 4.844638242123295, + "learning_rate": 1.6889446745284345e-05, + "loss": 0.4638, + "step": 16224 + }, + { + "epoch": 0.28036010505944153, + "grad_norm": 1.0535414237569891, + "learning_rate": 1.688904109271708e-05, + "loss": 0.4732, + "step": 16225 + }, + { + "epoch": 0.28037738457285044, + "grad_norm": 1.1154938679117237, + "learning_rate": 1.6888635418572812e-05, + "loss": 0.571, + "step": 16226 + }, + { + "epoch": 0.28039466408625935, + "grad_norm": 0.9511819456869519, + "learning_rate": 1.6888229722852816e-05, + "loss": 0.4638, + "step": 16227 + }, + { + "epoch": 0.28041194359966826, + "grad_norm": 1.3116918488949014, + "learning_rate": 1.688782400555836e-05, + "loss": 0.4093, + "step": 16228 + }, + { + "epoch": 0.2804292231130771, + "grad_norm": 1.3975404549068173, + "learning_rate": 1.6887418266690715e-05, + "loss": 0.4807, + "step": 16229 + }, + { + "epoch": 0.280446502626486, + "grad_norm": 0.8800869414359068, + "learning_rate": 1.6887012506251154e-05, + "loss": 0.3895, + "step": 16230 + }, + { + "epoch": 0.2804637821398949, + "grad_norm": 0.7036323297775942, + "learning_rate": 1.6886606724240947e-05, + "loss": 0.4421, + "step": 16231 + }, + { + "epoch": 0.28048106165330383, + "grad_norm": 1.6550477098667569, + "learning_rate": 1.6886200920661365e-05, + "loss": 0.5459, + "step": 16232 + }, + { + "epoch": 0.28049834116671274, + "grad_norm": 0.8297975039322514, + "learning_rate": 1.6885795095513678e-05, + "loss": 0.6357, + "step": 16233 + }, + { + "epoch": 0.28051562068012165, + "grad_norm": 1.2897694690854657, + "learning_rate": 1.6885389248799153e-05, + "loss": 0.406, + "step": 16234 + }, + { + "epoch": 0.28053290019353055, + "grad_norm": 0.9510909435812505, + "learning_rate": 1.688498338051907e-05, + "loss": 0.5762, + "step": 16235 + }, + { + "epoch": 0.28055017970693946, + "grad_norm": 0.8627375966261521, + "learning_rate": 1.6884577490674695e-05, + "loss": 0.5895, + "step": 16236 + }, + { + "epoch": 0.28056745922034837, + "grad_norm": 0.9394050186921087, + "learning_rate": 1.68841715792673e-05, + "loss": 0.5832, + "step": 16237 + }, + { + "epoch": 0.2805847387337573, + "grad_norm": 0.9833804758065097, + "learning_rate": 1.6883765646298157e-05, + "loss": 0.6522, + "step": 16238 + }, + { + "epoch": 0.2806020182471662, + "grad_norm": 1.1439418992010701, + "learning_rate": 1.6883359691768535e-05, + "loss": 0.4962, + "step": 16239 + }, + { + "epoch": 0.28061929776057504, + "grad_norm": 0.9293089814974269, + "learning_rate": 1.6882953715679708e-05, + "loss": 0.6092, + "step": 16240 + }, + { + "epoch": 0.28063657727398394, + "grad_norm": 0.9318413221422363, + "learning_rate": 1.6882547718032948e-05, + "loss": 0.4271, + "step": 16241 + }, + { + "epoch": 0.28065385678739285, + "grad_norm": 1.3348856318137114, + "learning_rate": 1.6882141698829525e-05, + "loss": 0.4908, + "step": 16242 + }, + { + "epoch": 0.28067113630080176, + "grad_norm": 1.3654447371143148, + "learning_rate": 1.6881735658070712e-05, + "loss": 0.3089, + "step": 16243 + }, + { + "epoch": 0.28068841581421067, + "grad_norm": 0.7544417238973737, + "learning_rate": 1.688132959575778e-05, + "loss": 0.4494, + "step": 16244 + }, + { + "epoch": 0.2807056953276196, + "grad_norm": 1.6093681211718673, + "learning_rate": 1.6880923511891998e-05, + "loss": 0.4572, + "step": 16245 + }, + { + "epoch": 0.2807229748410285, + "grad_norm": 1.427312256414054, + "learning_rate": 1.6880517406474644e-05, + "loss": 0.532, + "step": 16246 + }, + { + "epoch": 0.2807402543544374, + "grad_norm": 0.9175845269463847, + "learning_rate": 1.6880111279506983e-05, + "loss": 0.6556, + "step": 16247 + }, + { + "epoch": 0.2807575338678463, + "grad_norm": 1.954940582732475, + "learning_rate": 1.6879705130990295e-05, + "loss": 0.5825, + "step": 16248 + }, + { + "epoch": 0.2807748133812552, + "grad_norm": 1.2302548621761498, + "learning_rate": 1.687929896092584e-05, + "loss": 0.4231, + "step": 16249 + }, + { + "epoch": 0.2807920928946641, + "grad_norm": 0.7618171854491024, + "learning_rate": 1.6878892769314906e-05, + "loss": 0.4743, + "step": 16250 + }, + { + "epoch": 0.28080937240807297, + "grad_norm": 1.2219020031911028, + "learning_rate": 1.6878486556158756e-05, + "loss": 0.474, + "step": 16251 + }, + { + "epoch": 0.2808266519214819, + "grad_norm": 1.3891986293745817, + "learning_rate": 1.6878080321458662e-05, + "loss": 0.4896, + "step": 16252 + }, + { + "epoch": 0.2808439314348908, + "grad_norm": 1.0998654178772966, + "learning_rate": 1.6877674065215897e-05, + "loss": 0.3822, + "step": 16253 + }, + { + "epoch": 0.2808612109482997, + "grad_norm": 0.7250446778649148, + "learning_rate": 1.6877267787431733e-05, + "loss": 0.5298, + "step": 16254 + }, + { + "epoch": 0.2808784904617086, + "grad_norm": 0.9988189649287625, + "learning_rate": 1.6876861488107446e-05, + "loss": 0.6132, + "step": 16255 + }, + { + "epoch": 0.2808957699751175, + "grad_norm": 0.7759421998070459, + "learning_rate": 1.6876455167244304e-05, + "loss": 0.351, + "step": 16256 + }, + { + "epoch": 0.2809130494885264, + "grad_norm": 0.45027746115068923, + "learning_rate": 1.6876048824843585e-05, + "loss": 0.716, + "step": 16257 + }, + { + "epoch": 0.2809303290019353, + "grad_norm": 1.576267627209688, + "learning_rate": 1.6875642460906557e-05, + "loss": 0.4564, + "step": 16258 + }, + { + "epoch": 0.2809476085153442, + "grad_norm": 1.1573858634266925, + "learning_rate": 1.687523607543449e-05, + "loss": 0.497, + "step": 16259 + }, + { + "epoch": 0.28096488802875313, + "grad_norm": 1.2446083717328837, + "learning_rate": 1.6874829668428667e-05, + "loss": 0.6185, + "step": 16260 + }, + { + "epoch": 0.280982167542162, + "grad_norm": 0.9353522532296003, + "learning_rate": 1.687442323989035e-05, + "loss": 0.5123, + "step": 16261 + }, + { + "epoch": 0.2809994470555709, + "grad_norm": 0.7844418541404878, + "learning_rate": 1.687401678982082e-05, + "loss": 0.5034, + "step": 16262 + }, + { + "epoch": 0.2810167265689798, + "grad_norm": 1.265719149479558, + "learning_rate": 1.687361031822135e-05, + "loss": 0.5538, + "step": 16263 + }, + { + "epoch": 0.2810340060823887, + "grad_norm": 1.0073486164410934, + "learning_rate": 1.6873203825093206e-05, + "loss": 0.6533, + "step": 16264 + }, + { + "epoch": 0.2810512855957976, + "grad_norm": 1.1409823631254756, + "learning_rate": 1.6872797310437667e-05, + "loss": 0.3876, + "step": 16265 + }, + { + "epoch": 0.2810685651092065, + "grad_norm": 0.690525025962618, + "learning_rate": 1.6872390774256006e-05, + "loss": 0.4046, + "step": 16266 + }, + { + "epoch": 0.28108584462261543, + "grad_norm": 1.071074605667334, + "learning_rate": 1.6871984216549492e-05, + "loss": 0.4804, + "step": 16267 + }, + { + "epoch": 0.28110312413602434, + "grad_norm": 0.700961345105089, + "learning_rate": 1.68715776373194e-05, + "loss": 0.3438, + "step": 16268 + }, + { + "epoch": 0.28112040364943325, + "grad_norm": 1.2683543381196911, + "learning_rate": 1.6871171036567008e-05, + "loss": 0.6471, + "step": 16269 + }, + { + "epoch": 0.28113768316284216, + "grad_norm": 0.8665895975736936, + "learning_rate": 1.6870764414293582e-05, + "loss": 0.4495, + "step": 16270 + }, + { + "epoch": 0.28115496267625106, + "grad_norm": 0.9787048303481302, + "learning_rate": 1.6870357770500403e-05, + "loss": 0.4469, + "step": 16271 + }, + { + "epoch": 0.2811722421896599, + "grad_norm": 1.2555754262560384, + "learning_rate": 1.686995110518874e-05, + "loss": 0.5245, + "step": 16272 + }, + { + "epoch": 0.2811895217030688, + "grad_norm": 0.7573552764259464, + "learning_rate": 1.686954441835987e-05, + "loss": 0.3773, + "step": 16273 + }, + { + "epoch": 0.28120680121647773, + "grad_norm": 0.9444594167198171, + "learning_rate": 1.6869137710015063e-05, + "loss": 0.5389, + "step": 16274 + }, + { + "epoch": 0.28122408072988664, + "grad_norm": 1.0692465379913156, + "learning_rate": 1.6868730980155595e-05, + "loss": 0.6933, + "step": 16275 + }, + { + "epoch": 0.28124136024329555, + "grad_norm": 1.170145263437241, + "learning_rate": 1.6868324228782743e-05, + "loss": 0.3958, + "step": 16276 + }, + { + "epoch": 0.28125863975670445, + "grad_norm": 1.7564088035091352, + "learning_rate": 1.6867917455897772e-05, + "loss": 0.5791, + "step": 16277 + }, + { + "epoch": 0.28127591927011336, + "grad_norm": 0.865566404051238, + "learning_rate": 1.6867510661501966e-05, + "loss": 0.4672, + "step": 16278 + }, + { + "epoch": 0.28129319878352227, + "grad_norm": 1.2239791238880975, + "learning_rate": 1.686710384559659e-05, + "loss": 0.4856, + "step": 16279 + }, + { + "epoch": 0.2813104782969312, + "grad_norm": 1.2538694218462632, + "learning_rate": 1.6866697008182925e-05, + "loss": 0.6177, + "step": 16280 + }, + { + "epoch": 0.2813277578103401, + "grad_norm": 1.0449774138648993, + "learning_rate": 1.6866290149262243e-05, + "loss": 0.6412, + "step": 16281 + }, + { + "epoch": 0.28134503732374894, + "grad_norm": 1.2276109947977278, + "learning_rate": 1.686588326883582e-05, + "loss": 0.5096, + "step": 16282 + }, + { + "epoch": 0.28136231683715784, + "grad_norm": 0.986526766008239, + "learning_rate": 1.6865476366904926e-05, + "loss": 0.4923, + "step": 16283 + }, + { + "epoch": 0.28137959635056675, + "grad_norm": 0.8080758388062234, + "learning_rate": 1.6865069443470843e-05, + "loss": 0.4801, + "step": 16284 + }, + { + "epoch": 0.28139687586397566, + "grad_norm": 0.7539037733803441, + "learning_rate": 1.6864662498534835e-05, + "loss": 0.3983, + "step": 16285 + }, + { + "epoch": 0.28141415537738457, + "grad_norm": 0.929781723863623, + "learning_rate": 1.6864255532098187e-05, + "loss": 0.5581, + "step": 16286 + }, + { + "epoch": 0.2814314348907935, + "grad_norm": 0.4598536115498151, + "learning_rate": 1.6863848544162165e-05, + "loss": 0.6332, + "step": 16287 + }, + { + "epoch": 0.2814487144042024, + "grad_norm": 0.6701328940886334, + "learning_rate": 1.6863441534728055e-05, + "loss": 0.2969, + "step": 16288 + }, + { + "epoch": 0.2814659939176113, + "grad_norm": 1.208313964580344, + "learning_rate": 1.6863034503797115e-05, + "loss": 0.5001, + "step": 16289 + }, + { + "epoch": 0.2814832734310202, + "grad_norm": 0.6793036208485187, + "learning_rate": 1.6862627451370634e-05, + "loss": 0.3408, + "step": 16290 + }, + { + "epoch": 0.2815005529444291, + "grad_norm": 0.9568042045417404, + "learning_rate": 1.6862220377449886e-05, + "loss": 0.4877, + "step": 16291 + }, + { + "epoch": 0.281517832457838, + "grad_norm": 0.9592663445935123, + "learning_rate": 1.6861813282036138e-05, + "loss": 0.4681, + "step": 16292 + }, + { + "epoch": 0.28153511197124687, + "grad_norm": 0.8212495358546061, + "learning_rate": 1.6861406165130666e-05, + "loss": 0.4041, + "step": 16293 + }, + { + "epoch": 0.2815523914846558, + "grad_norm": 0.8181799564953071, + "learning_rate": 1.6860999026734756e-05, + "loss": 0.6167, + "step": 16294 + }, + { + "epoch": 0.2815696709980647, + "grad_norm": 1.3582891340347267, + "learning_rate": 1.686059186684967e-05, + "loss": 0.5782, + "step": 16295 + }, + { + "epoch": 0.2815869505114736, + "grad_norm": 1.1769070380044846, + "learning_rate": 1.686018468547669e-05, + "loss": 0.5393, + "step": 16296 + }, + { + "epoch": 0.2816042300248825, + "grad_norm": 0.8634260640112005, + "learning_rate": 1.685977748261709e-05, + "loss": 0.4982, + "step": 16297 + }, + { + "epoch": 0.2816215095382914, + "grad_norm": 1.6115831050771194, + "learning_rate": 1.6859370258272148e-05, + "loss": 0.515, + "step": 16298 + }, + { + "epoch": 0.2816387890517003, + "grad_norm": 0.4499489078209517, + "learning_rate": 1.6858963012443135e-05, + "loss": 0.6742, + "step": 16299 + }, + { + "epoch": 0.2816560685651092, + "grad_norm": 0.9067290559556792, + "learning_rate": 1.6858555745131328e-05, + "loss": 0.6043, + "step": 16300 + }, + { + "epoch": 0.2816733480785181, + "grad_norm": 0.9321194017008361, + "learning_rate": 1.6858148456338006e-05, + "loss": 0.454, + "step": 16301 + }, + { + "epoch": 0.28169062759192703, + "grad_norm": 0.5873236480568169, + "learning_rate": 1.685774114606444e-05, + "loss": 0.9092, + "step": 16302 + }, + { + "epoch": 0.2817079071053359, + "grad_norm": 0.9762492389767032, + "learning_rate": 1.6857333814311908e-05, + "loss": 0.4942, + "step": 16303 + }, + { + "epoch": 0.2817251866187448, + "grad_norm": 1.199125588920059, + "learning_rate": 1.6856926461081682e-05, + "loss": 0.633, + "step": 16304 + }, + { + "epoch": 0.2817424661321537, + "grad_norm": 0.9153716885244648, + "learning_rate": 1.685651908637505e-05, + "loss": 0.5088, + "step": 16305 + }, + { + "epoch": 0.2817597456455626, + "grad_norm": 1.2690419252352287, + "learning_rate": 1.685611169019327e-05, + "loss": 0.4422, + "step": 16306 + }, + { + "epoch": 0.2817770251589715, + "grad_norm": 0.7936620881914349, + "learning_rate": 1.685570427253763e-05, + "loss": 0.3072, + "step": 16307 + }, + { + "epoch": 0.2817943046723804, + "grad_norm": 0.435438494905927, + "learning_rate": 1.68552968334094e-05, + "loss": 0.65, + "step": 16308 + }, + { + "epoch": 0.28181158418578933, + "grad_norm": 1.0064685596284835, + "learning_rate": 1.6854889372809866e-05, + "loss": 0.6026, + "step": 16309 + }, + { + "epoch": 0.28182886369919824, + "grad_norm": 1.1147833190160024, + "learning_rate": 1.6854481890740295e-05, + "loss": 0.6564, + "step": 16310 + }, + { + "epoch": 0.28184614321260715, + "grad_norm": 1.7357699427030222, + "learning_rate": 1.6854074387201963e-05, + "loss": 0.532, + "step": 16311 + }, + { + "epoch": 0.28186342272601606, + "grad_norm": 0.7836825398503615, + "learning_rate": 1.6853666862196153e-05, + "loss": 0.375, + "step": 16312 + }, + { + "epoch": 0.28188070223942496, + "grad_norm": 1.2760708364343214, + "learning_rate": 1.6853259315724136e-05, + "loss": 0.8222, + "step": 16313 + }, + { + "epoch": 0.2818979817528338, + "grad_norm": 1.119000029740949, + "learning_rate": 1.685285174778719e-05, + "loss": 0.7757, + "step": 16314 + }, + { + "epoch": 0.2819152612662427, + "grad_norm": 0.5784713252648548, + "learning_rate": 1.6852444158386594e-05, + "loss": 0.6623, + "step": 16315 + }, + { + "epoch": 0.28193254077965163, + "grad_norm": 1.437316683026923, + "learning_rate": 1.6852036547523617e-05, + "loss": 0.5803, + "step": 16316 + }, + { + "epoch": 0.28194982029306054, + "grad_norm": 1.127506448841093, + "learning_rate": 1.6851628915199546e-05, + "loss": 0.585, + "step": 16317 + }, + { + "epoch": 0.28196709980646945, + "grad_norm": 1.0899140585675395, + "learning_rate": 1.6851221261415648e-05, + "loss": 0.3995, + "step": 16318 + }, + { + "epoch": 0.28198437931987835, + "grad_norm": 0.8451171042259752, + "learning_rate": 1.6850813586173207e-05, + "loss": 0.4822, + "step": 16319 + }, + { + "epoch": 0.28200165883328726, + "grad_norm": 1.1063291568567586, + "learning_rate": 1.6850405889473497e-05, + "loss": 0.4682, + "step": 16320 + }, + { + "epoch": 0.28201893834669617, + "grad_norm": 0.7328646322908122, + "learning_rate": 1.6849998171317795e-05, + "loss": 0.463, + "step": 16321 + }, + { + "epoch": 0.2820362178601051, + "grad_norm": 1.0043419544139975, + "learning_rate": 1.6849590431707375e-05, + "loss": 0.4973, + "step": 16322 + }, + { + "epoch": 0.282053497373514, + "grad_norm": 1.2184538117024986, + "learning_rate": 1.6849182670643522e-05, + "loss": 0.4922, + "step": 16323 + }, + { + "epoch": 0.2820707768869229, + "grad_norm": 1.0977906281428769, + "learning_rate": 1.6848774888127505e-05, + "loss": 0.341, + "step": 16324 + }, + { + "epoch": 0.28208805640033174, + "grad_norm": 0.7443767396505837, + "learning_rate": 1.684836708416061e-05, + "loss": 0.5617, + "step": 16325 + }, + { + "epoch": 0.28210533591374065, + "grad_norm": 1.414539642217534, + "learning_rate": 1.6847959258744103e-05, + "loss": 0.3668, + "step": 16326 + }, + { + "epoch": 0.28212261542714956, + "grad_norm": 1.3701945947139451, + "learning_rate": 1.6847551411879268e-05, + "loss": 0.5808, + "step": 16327 + }, + { + "epoch": 0.28213989494055847, + "grad_norm": 0.9221257444880678, + "learning_rate": 1.6847143543567383e-05, + "loss": 0.3776, + "step": 16328 + }, + { + "epoch": 0.2821571744539674, + "grad_norm": 0.9456419077440634, + "learning_rate": 1.6846735653809724e-05, + "loss": 0.4855, + "step": 16329 + }, + { + "epoch": 0.2821744539673763, + "grad_norm": 0.7029498804317975, + "learning_rate": 1.6846327742607567e-05, + "loss": 0.4662, + "step": 16330 + }, + { + "epoch": 0.2821917334807852, + "grad_norm": 0.9431267107033493, + "learning_rate": 1.6845919809962194e-05, + "loss": 0.4013, + "step": 16331 + }, + { + "epoch": 0.2822090129941941, + "grad_norm": 0.9481208942735224, + "learning_rate": 1.6845511855874877e-05, + "loss": 0.6963, + "step": 16332 + }, + { + "epoch": 0.282226292507603, + "grad_norm": 2.2129147509364424, + "learning_rate": 1.68451038803469e-05, + "loss": 0.5298, + "step": 16333 + }, + { + "epoch": 0.2822435720210119, + "grad_norm": 1.1018629333118026, + "learning_rate": 1.6844695883379535e-05, + "loss": 0.3194, + "step": 16334 + }, + { + "epoch": 0.28226085153442076, + "grad_norm": 1.33363643044996, + "learning_rate": 1.6844287864974062e-05, + "loss": 0.7107, + "step": 16335 + }, + { + "epoch": 0.2822781310478297, + "grad_norm": 1.1037493354997934, + "learning_rate": 1.684387982513176e-05, + "loss": 0.4549, + "step": 16336 + }, + { + "epoch": 0.2822954105612386, + "grad_norm": 1.2846248931479016, + "learning_rate": 1.6843471763853908e-05, + "loss": 0.5023, + "step": 16337 + }, + { + "epoch": 0.2823126900746475, + "grad_norm": 0.7693052347922974, + "learning_rate": 1.684306368114178e-05, + "loss": 0.3841, + "step": 16338 + }, + { + "epoch": 0.2823299695880564, + "grad_norm": 0.83236827556159, + "learning_rate": 1.6842655576996656e-05, + "loss": 0.671, + "step": 16339 + }, + { + "epoch": 0.2823472491014653, + "grad_norm": 0.9237735952748157, + "learning_rate": 1.6842247451419814e-05, + "loss": 0.4558, + "step": 16340 + }, + { + "epoch": 0.2823645286148742, + "grad_norm": 1.373723163326703, + "learning_rate": 1.6841839304412534e-05, + "loss": 0.4675, + "step": 16341 + }, + { + "epoch": 0.2823818081282831, + "grad_norm": 0.41364040005045816, + "learning_rate": 1.6841431135976096e-05, + "loss": 0.5617, + "step": 16342 + }, + { + "epoch": 0.282399087641692, + "grad_norm": 1.079446532374757, + "learning_rate": 1.6841022946111774e-05, + "loss": 0.4427, + "step": 16343 + }, + { + "epoch": 0.28241636715510093, + "grad_norm": 1.4357479588469262, + "learning_rate": 1.6840614734820846e-05, + "loss": 0.4639, + "step": 16344 + }, + { + "epoch": 0.28243364666850984, + "grad_norm": 1.605037304884471, + "learning_rate": 1.6840206502104595e-05, + "loss": 0.6291, + "step": 16345 + }, + { + "epoch": 0.2824509261819187, + "grad_norm": 1.49196773442289, + "learning_rate": 1.68397982479643e-05, + "loss": 0.6511, + "step": 16346 + }, + { + "epoch": 0.2824682056953276, + "grad_norm": 1.4450338936764757, + "learning_rate": 1.6839389972401233e-05, + "loss": 0.6212, + "step": 16347 + }, + { + "epoch": 0.2824854852087365, + "grad_norm": 1.5910077598291408, + "learning_rate": 1.683898167541668e-05, + "loss": 0.5403, + "step": 16348 + }, + { + "epoch": 0.2825027647221454, + "grad_norm": 1.069677801121025, + "learning_rate": 1.6838573357011913e-05, + "loss": 0.4716, + "step": 16349 + }, + { + "epoch": 0.2825200442355543, + "grad_norm": 0.8774222559709349, + "learning_rate": 1.6838165017188215e-05, + "loss": 0.4005, + "step": 16350 + }, + { + "epoch": 0.28253732374896323, + "grad_norm": 0.4854731308819206, + "learning_rate": 1.6837756655946868e-05, + "loss": 0.6149, + "step": 16351 + }, + { + "epoch": 0.28255460326237214, + "grad_norm": 1.2171291277484846, + "learning_rate": 1.6837348273289146e-05, + "loss": 0.4268, + "step": 16352 + }, + { + "epoch": 0.28257188277578105, + "grad_norm": 0.8357144937710312, + "learning_rate": 1.6836939869216328e-05, + "loss": 0.3447, + "step": 16353 + }, + { + "epoch": 0.28258916228918995, + "grad_norm": 0.876529615220662, + "learning_rate": 1.6836531443729697e-05, + "loss": 0.5627, + "step": 16354 + }, + { + "epoch": 0.28260644180259886, + "grad_norm": 0.8364192548924829, + "learning_rate": 1.6836122996830527e-05, + "loss": 0.487, + "step": 16355 + }, + { + "epoch": 0.2826237213160077, + "grad_norm": 0.9953998563312948, + "learning_rate": 1.6835714528520103e-05, + "loss": 0.421, + "step": 16356 + }, + { + "epoch": 0.2826410008294166, + "grad_norm": 1.0235538441770529, + "learning_rate": 1.68353060387997e-05, + "loss": 0.5065, + "step": 16357 + }, + { + "epoch": 0.28265828034282553, + "grad_norm": 1.0564036918964557, + "learning_rate": 1.6834897527670598e-05, + "loss": 0.4011, + "step": 16358 + }, + { + "epoch": 0.28267555985623444, + "grad_norm": 0.8106980273487339, + "learning_rate": 1.6834488995134083e-05, + "loss": 0.5614, + "step": 16359 + }, + { + "epoch": 0.28269283936964335, + "grad_norm": 1.0664252267139331, + "learning_rate": 1.6834080441191423e-05, + "loss": 0.679, + "step": 16360 + }, + { + "epoch": 0.28271011888305225, + "grad_norm": 0.4108788221775374, + "learning_rate": 1.6833671865843904e-05, + "loss": 0.6299, + "step": 16361 + }, + { + "epoch": 0.28272739839646116, + "grad_norm": 1.4600411511785985, + "learning_rate": 1.683326326909281e-05, + "loss": 0.5205, + "step": 16362 + }, + { + "epoch": 0.28274467790987007, + "grad_norm": 0.7378198576583758, + "learning_rate": 1.683285465093941e-05, + "loss": 0.3829, + "step": 16363 + }, + { + "epoch": 0.282761957423279, + "grad_norm": 1.2513065683981741, + "learning_rate": 1.6832446011384994e-05, + "loss": 0.5472, + "step": 16364 + }, + { + "epoch": 0.2827792369366879, + "grad_norm": 0.6443762792890324, + "learning_rate": 1.6832037350430838e-05, + "loss": 0.4333, + "step": 16365 + }, + { + "epoch": 0.2827965164500968, + "grad_norm": 1.4579425606973206, + "learning_rate": 1.6831628668078223e-05, + "loss": 0.5551, + "step": 16366 + }, + { + "epoch": 0.28281379596350564, + "grad_norm": 0.8954435743187044, + "learning_rate": 1.6831219964328424e-05, + "loss": 0.629, + "step": 16367 + }, + { + "epoch": 0.28283107547691455, + "grad_norm": 0.9475471728307611, + "learning_rate": 1.6830811239182726e-05, + "loss": 0.5761, + "step": 16368 + }, + { + "epoch": 0.28284835499032346, + "grad_norm": 1.1134775236672636, + "learning_rate": 1.6830402492642412e-05, + "loss": 0.4659, + "step": 16369 + }, + { + "epoch": 0.28286563450373237, + "grad_norm": 0.844770370224043, + "learning_rate": 1.6829993724708753e-05, + "loss": 0.3829, + "step": 16370 + }, + { + "epoch": 0.2828829140171413, + "grad_norm": 1.402801202912495, + "learning_rate": 1.6829584935383036e-05, + "loss": 0.7319, + "step": 16371 + }, + { + "epoch": 0.2829001935305502, + "grad_norm": 0.7729394120299996, + "learning_rate": 1.682917612466654e-05, + "loss": 0.6756, + "step": 16372 + }, + { + "epoch": 0.2829174730439591, + "grad_norm": 0.7601453596147058, + "learning_rate": 1.6828767292560545e-05, + "loss": 0.6869, + "step": 16373 + }, + { + "epoch": 0.282934752557368, + "grad_norm": 1.6951187900357687, + "learning_rate": 1.6828358439066333e-05, + "loss": 0.6218, + "step": 16374 + }, + { + "epoch": 0.2829520320707769, + "grad_norm": 1.190472601709234, + "learning_rate": 1.6827949564185183e-05, + "loss": 0.5374, + "step": 16375 + }, + { + "epoch": 0.2829693115841858, + "grad_norm": 0.5685443567442383, + "learning_rate": 1.6827540667918378e-05, + "loss": 0.3532, + "step": 16376 + }, + { + "epoch": 0.28298659109759466, + "grad_norm": 1.1035881937050476, + "learning_rate": 1.6827131750267197e-05, + "loss": 0.5375, + "step": 16377 + }, + { + "epoch": 0.28300387061100357, + "grad_norm": 1.2713282434222628, + "learning_rate": 1.6826722811232916e-05, + "loss": 0.4885, + "step": 16378 + }, + { + "epoch": 0.2830211501244125, + "grad_norm": 1.4775067380987204, + "learning_rate": 1.6826313850816824e-05, + "loss": 0.461, + "step": 16379 + }, + { + "epoch": 0.2830384296378214, + "grad_norm": 1.6655754771105455, + "learning_rate": 1.6825904869020197e-05, + "loss": 0.6492, + "step": 16380 + }, + { + "epoch": 0.2830557091512303, + "grad_norm": 0.7673125252686557, + "learning_rate": 1.6825495865844315e-05, + "loss": 0.3451, + "step": 16381 + }, + { + "epoch": 0.2830729886646392, + "grad_norm": 0.8733274174680387, + "learning_rate": 1.6825086841290465e-05, + "loss": 0.5452, + "step": 16382 + }, + { + "epoch": 0.2830902681780481, + "grad_norm": 0.9136988087068314, + "learning_rate": 1.682467779535992e-05, + "loss": 0.4653, + "step": 16383 + }, + { + "epoch": 0.283107547691457, + "grad_norm": 0.9486750097224391, + "learning_rate": 1.682426872805397e-05, + "loss": 0.4763, + "step": 16384 + }, + { + "epoch": 0.2831248272048659, + "grad_norm": 1.0405999720091168, + "learning_rate": 1.682385963937389e-05, + "loss": 0.3555, + "step": 16385 + }, + { + "epoch": 0.28314210671827483, + "grad_norm": 0.4713152087828211, + "learning_rate": 1.6823450529320964e-05, + "loss": 0.8105, + "step": 16386 + }, + { + "epoch": 0.28315938623168374, + "grad_norm": 0.9931772077612382, + "learning_rate": 1.6823041397896475e-05, + "loss": 0.44, + "step": 16387 + }, + { + "epoch": 0.2831766657450926, + "grad_norm": 1.0611764284452945, + "learning_rate": 1.6822632245101697e-05, + "loss": 0.4949, + "step": 16388 + }, + { + "epoch": 0.2831939452585015, + "grad_norm": 0.441745195686093, + "learning_rate": 1.6822223070937918e-05, + "loss": 0.4929, + "step": 16389 + }, + { + "epoch": 0.2832112247719104, + "grad_norm": 1.0392203755571954, + "learning_rate": 1.6821813875406414e-05, + "loss": 0.7981, + "step": 16390 + }, + { + "epoch": 0.2832285042853193, + "grad_norm": 1.4627922862033622, + "learning_rate": 1.6821404658508474e-05, + "loss": 0.645, + "step": 16391 + }, + { + "epoch": 0.2832457837987282, + "grad_norm": 0.8199020961955928, + "learning_rate": 1.6820995420245378e-05, + "loss": 0.4424, + "step": 16392 + }, + { + "epoch": 0.28326306331213713, + "grad_norm": 0.5824425805480471, + "learning_rate": 1.6820586160618404e-05, + "loss": 0.2921, + "step": 16393 + }, + { + "epoch": 0.28328034282554604, + "grad_norm": 1.1606263776594576, + "learning_rate": 1.6820176879628835e-05, + "loss": 0.493, + "step": 16394 + }, + { + "epoch": 0.28329762233895495, + "grad_norm": 1.3425653945170632, + "learning_rate": 1.6819767577277955e-05, + "loss": 0.5883, + "step": 16395 + }, + { + "epoch": 0.28331490185236385, + "grad_norm": 0.7091071551783005, + "learning_rate": 1.6819358253567043e-05, + "loss": 0.4421, + "step": 16396 + }, + { + "epoch": 0.28333218136577276, + "grad_norm": 0.4128607174704922, + "learning_rate": 1.6818948908497385e-05, + "loss": 0.5941, + "step": 16397 + }, + { + "epoch": 0.28334946087918167, + "grad_norm": 1.4794998796480414, + "learning_rate": 1.6818539542070263e-05, + "loss": 0.371, + "step": 16398 + }, + { + "epoch": 0.2833667403925905, + "grad_norm": 1.0805877338144876, + "learning_rate": 1.6818130154286953e-05, + "loss": 0.3986, + "step": 16399 + }, + { + "epoch": 0.28338401990599943, + "grad_norm": 0.8072440791393686, + "learning_rate": 1.681772074514874e-05, + "loss": 0.4392, + "step": 16400 + }, + { + "epoch": 0.28340129941940834, + "grad_norm": 0.7153885008288479, + "learning_rate": 1.681731131465691e-05, + "loss": 0.5546, + "step": 16401 + }, + { + "epoch": 0.28341857893281724, + "grad_norm": 1.3827983890149749, + "learning_rate": 1.6816901862812742e-05, + "loss": 0.5041, + "step": 16402 + }, + { + "epoch": 0.28343585844622615, + "grad_norm": 0.7614142951027844, + "learning_rate": 1.681649238961752e-05, + "loss": 0.4339, + "step": 16403 + }, + { + "epoch": 0.28345313795963506, + "grad_norm": 1.640009271208085, + "learning_rate": 1.6816082895072526e-05, + "loss": 0.708, + "step": 16404 + }, + { + "epoch": 0.28347041747304397, + "grad_norm": 1.1924237367692132, + "learning_rate": 1.681567337917904e-05, + "loss": 0.6366, + "step": 16405 + }, + { + "epoch": 0.2834876969864529, + "grad_norm": 1.7509819379454687, + "learning_rate": 1.681526384193835e-05, + "loss": 0.5555, + "step": 16406 + }, + { + "epoch": 0.2835049764998618, + "grad_norm": 1.0396491298471202, + "learning_rate": 1.6814854283351736e-05, + "loss": 0.5583, + "step": 16407 + }, + { + "epoch": 0.2835222560132707, + "grad_norm": 1.0259320498048392, + "learning_rate": 1.681444470342048e-05, + "loss": 0.541, + "step": 16408 + }, + { + "epoch": 0.28353953552667954, + "grad_norm": 1.2219372397763175, + "learning_rate": 1.6814035102145862e-05, + "loss": 0.5712, + "step": 16409 + }, + { + "epoch": 0.28355681504008845, + "grad_norm": 1.0564743631363884, + "learning_rate": 1.681362547952917e-05, + "loss": 0.4594, + "step": 16410 + }, + { + "epoch": 0.28357409455349736, + "grad_norm": 0.5803109261360455, + "learning_rate": 1.6813215835571685e-05, + "loss": 0.5081, + "step": 16411 + }, + { + "epoch": 0.28359137406690627, + "grad_norm": 0.7740824449243833, + "learning_rate": 1.6812806170274692e-05, + "loss": 0.2546, + "step": 16412 + }, + { + "epoch": 0.2836086535803152, + "grad_norm": 1.2965635429842763, + "learning_rate": 1.681239648363947e-05, + "loss": 0.5751, + "step": 16413 + }, + { + "epoch": 0.2836259330937241, + "grad_norm": 1.8842351481388555, + "learning_rate": 1.6811986775667307e-05, + "loss": 0.7804, + "step": 16414 + }, + { + "epoch": 0.283643212607133, + "grad_norm": 1.2225476160483024, + "learning_rate": 1.6811577046359483e-05, + "loss": 0.5523, + "step": 16415 + }, + { + "epoch": 0.2836604921205419, + "grad_norm": 0.6389469502319742, + "learning_rate": 1.681116729571728e-05, + "loss": 0.3699, + "step": 16416 + }, + { + "epoch": 0.2836777716339508, + "grad_norm": 0.8284448763175646, + "learning_rate": 1.6810757523741987e-05, + "loss": 0.5169, + "step": 16417 + }, + { + "epoch": 0.2836950511473597, + "grad_norm": 0.7277433994169832, + "learning_rate": 1.681034773043488e-05, + "loss": 0.4212, + "step": 16418 + }, + { + "epoch": 0.2837123306607686, + "grad_norm": 0.8334217884855213, + "learning_rate": 1.680993791579725e-05, + "loss": 0.3894, + "step": 16419 + }, + { + "epoch": 0.28372961017417747, + "grad_norm": 0.442417334850148, + "learning_rate": 1.6809528079830374e-05, + "loss": 0.6202, + "step": 16420 + }, + { + "epoch": 0.2837468896875864, + "grad_norm": 1.2858756233628788, + "learning_rate": 1.680911822253554e-05, + "loss": 0.604, + "step": 16421 + }, + { + "epoch": 0.2837641692009953, + "grad_norm": 0.8748385051557583, + "learning_rate": 1.6808708343914027e-05, + "loss": 0.5958, + "step": 16422 + }, + { + "epoch": 0.2837814487144042, + "grad_norm": 0.40100922275767115, + "learning_rate": 1.6808298443967126e-05, + "loss": 0.5144, + "step": 16423 + }, + { + "epoch": 0.2837987282278131, + "grad_norm": 1.010697778495894, + "learning_rate": 1.6807888522696116e-05, + "loss": 0.4941, + "step": 16424 + }, + { + "epoch": 0.283816007741222, + "grad_norm": 1.374588492210313, + "learning_rate": 1.6807478580102282e-05, + "loss": 0.4421, + "step": 16425 + }, + { + "epoch": 0.2838332872546309, + "grad_norm": 1.2104195468461212, + "learning_rate": 1.6807068616186907e-05, + "loss": 0.4973, + "step": 16426 + }, + { + "epoch": 0.2838505667680398, + "grad_norm": 1.1720490234707996, + "learning_rate": 1.6806658630951276e-05, + "loss": 0.527, + "step": 16427 + }, + { + "epoch": 0.28386784628144873, + "grad_norm": 1.0779699319617466, + "learning_rate": 1.6806248624396673e-05, + "loss": 0.4216, + "step": 16428 + }, + { + "epoch": 0.28388512579485764, + "grad_norm": 0.8610632742348053, + "learning_rate": 1.680583859652438e-05, + "loss": 0.6112, + "step": 16429 + }, + { + "epoch": 0.2839024053082665, + "grad_norm": 1.2143709307178638, + "learning_rate": 1.6805428547335685e-05, + "loss": 0.5568, + "step": 16430 + }, + { + "epoch": 0.2839196848216754, + "grad_norm": 0.765168330843782, + "learning_rate": 1.6805018476831873e-05, + "loss": 0.4838, + "step": 16431 + }, + { + "epoch": 0.2839369643350843, + "grad_norm": 0.45378428312740915, + "learning_rate": 1.6804608385014222e-05, + "loss": 0.8138, + "step": 16432 + }, + { + "epoch": 0.2839542438484932, + "grad_norm": 1.5789042447854063, + "learning_rate": 1.6804198271884022e-05, + "loss": 0.7093, + "step": 16433 + }, + { + "epoch": 0.2839715233619021, + "grad_norm": 0.6854786505100264, + "learning_rate": 1.6803788137442558e-05, + "loss": 0.588, + "step": 16434 + }, + { + "epoch": 0.28398880287531103, + "grad_norm": 1.1363849069465866, + "learning_rate": 1.680337798169111e-05, + "loss": 0.6672, + "step": 16435 + }, + { + "epoch": 0.28400608238871994, + "grad_norm": 1.2514897585451101, + "learning_rate": 1.6802967804630964e-05, + "loss": 0.5272, + "step": 16436 + }, + { + "epoch": 0.28402336190212885, + "grad_norm": 1.2024415866105815, + "learning_rate": 1.680255760626341e-05, + "loss": 0.5588, + "step": 16437 + }, + { + "epoch": 0.28404064141553775, + "grad_norm": 1.0391235251271853, + "learning_rate": 1.6802147386589728e-05, + "loss": 0.5328, + "step": 16438 + }, + { + "epoch": 0.28405792092894666, + "grad_norm": 2.0724347013325053, + "learning_rate": 1.6801737145611198e-05, + "loss": 0.6772, + "step": 16439 + }, + { + "epoch": 0.28407520044235557, + "grad_norm": 0.767876731018408, + "learning_rate": 1.6801326883329114e-05, + "loss": 0.6553, + "step": 16440 + }, + { + "epoch": 0.2840924799557644, + "grad_norm": 1.425359761328466, + "learning_rate": 1.6800916599744756e-05, + "loss": 0.4373, + "step": 16441 + }, + { + "epoch": 0.28410975946917333, + "grad_norm": 1.056168799828037, + "learning_rate": 1.6800506294859412e-05, + "loss": 0.6816, + "step": 16442 + }, + { + "epoch": 0.28412703898258224, + "grad_norm": 0.3651221741818122, + "learning_rate": 1.6800095968674365e-05, + "loss": 0.4941, + "step": 16443 + }, + { + "epoch": 0.28414431849599114, + "grad_norm": 1.0936655881870756, + "learning_rate": 1.67996856211909e-05, + "loss": 0.4156, + "step": 16444 + }, + { + "epoch": 0.28416159800940005, + "grad_norm": 1.0943698463391112, + "learning_rate": 1.6799275252410304e-05, + "loss": 0.6074, + "step": 16445 + }, + { + "epoch": 0.28417887752280896, + "grad_norm": 1.260199515485936, + "learning_rate": 1.679886486233386e-05, + "loss": 0.567, + "step": 16446 + }, + { + "epoch": 0.28419615703621787, + "grad_norm": 0.46247874258824107, + "learning_rate": 1.6798454450962854e-05, + "loss": 0.5125, + "step": 16447 + }, + { + "epoch": 0.2842134365496268, + "grad_norm": 0.8233570239459018, + "learning_rate": 1.679804401829858e-05, + "loss": 0.4006, + "step": 16448 + }, + { + "epoch": 0.2842307160630357, + "grad_norm": 1.2977344417961385, + "learning_rate": 1.6797633564342305e-05, + "loss": 0.6295, + "step": 16449 + }, + { + "epoch": 0.2842479955764446, + "grad_norm": 0.7813655016817386, + "learning_rate": 1.679722308909533e-05, + "loss": 0.5461, + "step": 16450 + }, + { + "epoch": 0.2842652750898535, + "grad_norm": 1.6970280764703523, + "learning_rate": 1.6796812592558935e-05, + "loss": 0.5414, + "step": 16451 + }, + { + "epoch": 0.28428255460326235, + "grad_norm": 1.0411439282308144, + "learning_rate": 1.6796402074734404e-05, + "loss": 0.5712, + "step": 16452 + }, + { + "epoch": 0.28429983411667126, + "grad_norm": 1.1214499661004453, + "learning_rate": 1.6795991535623028e-05, + "loss": 0.3406, + "step": 16453 + }, + { + "epoch": 0.28431711363008016, + "grad_norm": 0.9027156909155932, + "learning_rate": 1.679558097522609e-05, + "loss": 0.4921, + "step": 16454 + }, + { + "epoch": 0.2843343931434891, + "grad_norm": 0.6961145628837868, + "learning_rate": 1.6795170393544876e-05, + "loss": 0.4886, + "step": 16455 + }, + { + "epoch": 0.284351672656898, + "grad_norm": 0.6248158340092482, + "learning_rate": 1.679475979058067e-05, + "loss": 0.5894, + "step": 16456 + }, + { + "epoch": 0.2843689521703069, + "grad_norm": 1.381524315034476, + "learning_rate": 1.679434916633476e-05, + "loss": 0.6372, + "step": 16457 + }, + { + "epoch": 0.2843862316837158, + "grad_norm": 0.8406323074555724, + "learning_rate": 1.6793938520808432e-05, + "loss": 0.5477, + "step": 16458 + }, + { + "epoch": 0.2844035111971247, + "grad_norm": 1.688570508450639, + "learning_rate": 1.6793527854002976e-05, + "loss": 0.3971, + "step": 16459 + }, + { + "epoch": 0.2844207907105336, + "grad_norm": 1.0132634737396125, + "learning_rate": 1.679311716591967e-05, + "loss": 0.5387, + "step": 16460 + }, + { + "epoch": 0.2844380702239425, + "grad_norm": 1.2492931325641772, + "learning_rate": 1.6792706456559807e-05, + "loss": 0.546, + "step": 16461 + }, + { + "epoch": 0.28445534973735137, + "grad_norm": 0.7892314541948557, + "learning_rate": 1.6792295725924673e-05, + "loss": 0.4152, + "step": 16462 + }, + { + "epoch": 0.2844726292507603, + "grad_norm": 0.962386444076648, + "learning_rate": 1.6791884974015546e-05, + "loss": 0.5816, + "step": 16463 + }, + { + "epoch": 0.2844899087641692, + "grad_norm": 1.5247961093453313, + "learning_rate": 1.6791474200833727e-05, + "loss": 0.5844, + "step": 16464 + }, + { + "epoch": 0.2845071882775781, + "grad_norm": 1.2562357045725614, + "learning_rate": 1.679106340638049e-05, + "loss": 0.6182, + "step": 16465 + }, + { + "epoch": 0.284524467790987, + "grad_norm": 0.9277618679201707, + "learning_rate": 1.6790652590657125e-05, + "loss": 0.5577, + "step": 16466 + }, + { + "epoch": 0.2845417473043959, + "grad_norm": 1.3906445682385187, + "learning_rate": 1.6790241753664925e-05, + "loss": 0.6549, + "step": 16467 + }, + { + "epoch": 0.2845590268178048, + "grad_norm": 0.9804012804052544, + "learning_rate": 1.6789830895405168e-05, + "loss": 0.4088, + "step": 16468 + }, + { + "epoch": 0.2845763063312137, + "grad_norm": 0.8704363980966899, + "learning_rate": 1.6789420015879147e-05, + "loss": 0.5953, + "step": 16469 + }, + { + "epoch": 0.28459358584462263, + "grad_norm": 0.7950633343785544, + "learning_rate": 1.6789009115088145e-05, + "loss": 0.7526, + "step": 16470 + }, + { + "epoch": 0.28461086535803154, + "grad_norm": 1.3158462174047714, + "learning_rate": 1.678859819303345e-05, + "loss": 0.3752, + "step": 16471 + }, + { + "epoch": 0.28462814487144045, + "grad_norm": 1.3734766729288996, + "learning_rate": 1.678818724971635e-05, + "loss": 0.6867, + "step": 16472 + }, + { + "epoch": 0.2846454243848493, + "grad_norm": 1.2062393977042125, + "learning_rate": 1.6787776285138134e-05, + "loss": 0.4565, + "step": 16473 + }, + { + "epoch": 0.2846627038982582, + "grad_norm": 1.2053576355387672, + "learning_rate": 1.6787365299300083e-05, + "loss": 0.4465, + "step": 16474 + }, + { + "epoch": 0.2846799834116671, + "grad_norm": 0.7562019901488324, + "learning_rate": 1.678695429220349e-05, + "loss": 0.3908, + "step": 16475 + }, + { + "epoch": 0.284697262925076, + "grad_norm": 1.2271111846614586, + "learning_rate": 1.678654326384964e-05, + "loss": 0.5785, + "step": 16476 + }, + { + "epoch": 0.28471454243848493, + "grad_norm": 1.413675609500313, + "learning_rate": 1.6786132214239822e-05, + "loss": 0.4928, + "step": 16477 + }, + { + "epoch": 0.28473182195189384, + "grad_norm": 1.1602928747322747, + "learning_rate": 1.678572114337532e-05, + "loss": 0.5065, + "step": 16478 + }, + { + "epoch": 0.28474910146530275, + "grad_norm": 0.7210262416641414, + "learning_rate": 1.6785310051257427e-05, + "loss": 0.2974, + "step": 16479 + }, + { + "epoch": 0.28476638097871165, + "grad_norm": 1.2138433362708354, + "learning_rate": 1.6784898937887426e-05, + "loss": 0.5859, + "step": 16480 + }, + { + "epoch": 0.28478366049212056, + "grad_norm": 1.178951914157048, + "learning_rate": 1.6784487803266602e-05, + "loss": 0.4356, + "step": 16481 + }, + { + "epoch": 0.28480094000552947, + "grad_norm": 1.378142079531548, + "learning_rate": 1.6784076647396252e-05, + "loss": 0.5696, + "step": 16482 + }, + { + "epoch": 0.2848182195189383, + "grad_norm": 1.046451939645522, + "learning_rate": 1.6783665470277653e-05, + "loss": 0.5197, + "step": 16483 + }, + { + "epoch": 0.28483549903234723, + "grad_norm": 0.7804884232192154, + "learning_rate": 1.6783254271912104e-05, + "loss": 0.4192, + "step": 16484 + }, + { + "epoch": 0.28485277854575614, + "grad_norm": 0.6580413397168936, + "learning_rate": 1.6782843052300882e-05, + "loss": 0.5728, + "step": 16485 + }, + { + "epoch": 0.28487005805916504, + "grad_norm": 0.6305650394097236, + "learning_rate": 1.6782431811445283e-05, + "loss": 0.3814, + "step": 16486 + }, + { + "epoch": 0.28488733757257395, + "grad_norm": 1.621644284823565, + "learning_rate": 1.6782020549346594e-05, + "loss": 0.6595, + "step": 16487 + }, + { + "epoch": 0.28490461708598286, + "grad_norm": 0.8928833388228179, + "learning_rate": 1.67816092660061e-05, + "loss": 0.6874, + "step": 16488 + }, + { + "epoch": 0.28492189659939177, + "grad_norm": 1.0427436981109859, + "learning_rate": 1.6781197961425085e-05, + "loss": 0.4506, + "step": 16489 + }, + { + "epoch": 0.2849391761128007, + "grad_norm": 0.9436133813641517, + "learning_rate": 1.678078663560485e-05, + "loss": 0.4813, + "step": 16490 + }, + { + "epoch": 0.2849564556262096, + "grad_norm": 1.019605753219257, + "learning_rate": 1.678037528854667e-05, + "loss": 0.8023, + "step": 16491 + }, + { + "epoch": 0.2849737351396185, + "grad_norm": 0.9746107455940842, + "learning_rate": 1.677996392025184e-05, + "loss": 0.581, + "step": 16492 + }, + { + "epoch": 0.2849910146530274, + "grad_norm": 1.5691405162343162, + "learning_rate": 1.6779552530721652e-05, + "loss": 0.5576, + "step": 16493 + }, + { + "epoch": 0.28500829416643625, + "grad_norm": 0.9396679739874679, + "learning_rate": 1.677914111995739e-05, + "loss": 0.6598, + "step": 16494 + }, + { + "epoch": 0.28502557367984516, + "grad_norm": 1.0115922650460012, + "learning_rate": 1.677872968796034e-05, + "loss": 0.3734, + "step": 16495 + }, + { + "epoch": 0.28504285319325406, + "grad_norm": 0.8711940395729807, + "learning_rate": 1.6778318234731794e-05, + "loss": 0.5222, + "step": 16496 + }, + { + "epoch": 0.28506013270666297, + "grad_norm": 0.7969588703458423, + "learning_rate": 1.677790676027304e-05, + "loss": 0.6263, + "step": 16497 + }, + { + "epoch": 0.2850774122200719, + "grad_norm": 1.0355769253593339, + "learning_rate": 1.6777495264585367e-05, + "loss": 0.5816, + "step": 16498 + }, + { + "epoch": 0.2850946917334808, + "grad_norm": 1.0122392556963848, + "learning_rate": 1.6777083747670063e-05, + "loss": 0.7741, + "step": 16499 + }, + { + "epoch": 0.2851119712468897, + "grad_norm": 1.1505985539826955, + "learning_rate": 1.677667220952842e-05, + "loss": 0.6663, + "step": 16500 + }, + { + "epoch": 0.2851292507602986, + "grad_norm": 1.1696355533134208, + "learning_rate": 1.6776260650161726e-05, + "loss": 0.398, + "step": 16501 + }, + { + "epoch": 0.2851465302737075, + "grad_norm": 1.2745785052265723, + "learning_rate": 1.6775849069571265e-05, + "loss": 0.7332, + "step": 16502 + }, + { + "epoch": 0.2851638097871164, + "grad_norm": 0.9015431010795563, + "learning_rate": 1.677543746775833e-05, + "loss": 0.5994, + "step": 16503 + }, + { + "epoch": 0.28518108930052527, + "grad_norm": 0.720169778825214, + "learning_rate": 1.677502584472421e-05, + "loss": 0.4214, + "step": 16504 + }, + { + "epoch": 0.2851983688139342, + "grad_norm": 1.2100404384455659, + "learning_rate": 1.6774614200470197e-05, + "loss": 0.3887, + "step": 16505 + }, + { + "epoch": 0.2852156483273431, + "grad_norm": 0.9676977956162914, + "learning_rate": 1.6774202534997575e-05, + "loss": 0.4652, + "step": 16506 + }, + { + "epoch": 0.285232927840752, + "grad_norm": 1.0285737545393718, + "learning_rate": 1.6773790848307635e-05, + "loss": 0.5844, + "step": 16507 + }, + { + "epoch": 0.2852502073541609, + "grad_norm": 0.4288125064059061, + "learning_rate": 1.6773379140401668e-05, + "loss": 0.5638, + "step": 16508 + }, + { + "epoch": 0.2852674868675698, + "grad_norm": 1.7889168115837624, + "learning_rate": 1.6772967411280964e-05, + "loss": 0.4776, + "step": 16509 + }, + { + "epoch": 0.2852847663809787, + "grad_norm": 1.0696924755700117, + "learning_rate": 1.677255566094681e-05, + "loss": 0.5661, + "step": 16510 + }, + { + "epoch": 0.2853020458943876, + "grad_norm": 1.2398436186245085, + "learning_rate": 1.6772143889400498e-05, + "loss": 0.6086, + "step": 16511 + }, + { + "epoch": 0.28531932540779653, + "grad_norm": 0.8699197578616712, + "learning_rate": 1.6771732096643317e-05, + "loss": 0.3398, + "step": 16512 + }, + { + "epoch": 0.28533660492120544, + "grad_norm": 1.3428886544010215, + "learning_rate": 1.6771320282676556e-05, + "loss": 0.6536, + "step": 16513 + }, + { + "epoch": 0.28535388443461435, + "grad_norm": 1.3708467427580047, + "learning_rate": 1.6770908447501507e-05, + "loss": 0.5228, + "step": 16514 + }, + { + "epoch": 0.2853711639480232, + "grad_norm": 1.8380229456841652, + "learning_rate": 1.6770496591119453e-05, + "loss": 0.7663, + "step": 16515 + }, + { + "epoch": 0.2853884434614321, + "grad_norm": 0.804229565742061, + "learning_rate": 1.6770084713531695e-05, + "loss": 0.579, + "step": 16516 + }, + { + "epoch": 0.285405722974841, + "grad_norm": 1.3674203062428327, + "learning_rate": 1.6769672814739514e-05, + "loss": 0.4904, + "step": 16517 + }, + { + "epoch": 0.2854230024882499, + "grad_norm": 0.6515132340085106, + "learning_rate": 1.6769260894744202e-05, + "loss": 0.7327, + "step": 16518 + }, + { + "epoch": 0.28544028200165883, + "grad_norm": 0.9128276216811267, + "learning_rate": 1.676884895354705e-05, + "loss": 0.4376, + "step": 16519 + }, + { + "epoch": 0.28545756151506774, + "grad_norm": 1.1276348294563985, + "learning_rate": 1.6768436991149354e-05, + "loss": 0.4953, + "step": 16520 + }, + { + "epoch": 0.28547484102847664, + "grad_norm": 1.1007075010491407, + "learning_rate": 1.6768025007552394e-05, + "loss": 0.5684, + "step": 16521 + }, + { + "epoch": 0.28549212054188555, + "grad_norm": 0.7734808731809923, + "learning_rate": 1.676761300275747e-05, + "loss": 0.4882, + "step": 16522 + }, + { + "epoch": 0.28550940005529446, + "grad_norm": 1.1598347635697104, + "learning_rate": 1.6767200976765863e-05, + "loss": 0.3609, + "step": 16523 + }, + { + "epoch": 0.28552667956870337, + "grad_norm": 0.9634382002837967, + "learning_rate": 1.6766788929578873e-05, + "loss": 0.3158, + "step": 16524 + }, + { + "epoch": 0.2855439590821123, + "grad_norm": 1.4543227428483005, + "learning_rate": 1.6766376861197778e-05, + "loss": 0.6246, + "step": 16525 + }, + { + "epoch": 0.2855612385955211, + "grad_norm": 1.0124864852234376, + "learning_rate": 1.6765964771623884e-05, + "loss": 0.5261, + "step": 16526 + }, + { + "epoch": 0.28557851810893004, + "grad_norm": 0.7067467781161733, + "learning_rate": 1.6765552660858473e-05, + "loss": 0.4737, + "step": 16527 + }, + { + "epoch": 0.28559579762233894, + "grad_norm": 1.0137367301411737, + "learning_rate": 1.6765140528902835e-05, + "loss": 0.632, + "step": 16528 + }, + { + "epoch": 0.28561307713574785, + "grad_norm": 0.6256018655287736, + "learning_rate": 1.676472837575826e-05, + "loss": 0.3836, + "step": 16529 + }, + { + "epoch": 0.28563035664915676, + "grad_norm": 0.8653402348366841, + "learning_rate": 1.6764316201426046e-05, + "loss": 0.5559, + "step": 16530 + }, + { + "epoch": 0.28564763616256567, + "grad_norm": 0.9292422580574434, + "learning_rate": 1.676390400590748e-05, + "loss": 0.5887, + "step": 16531 + }, + { + "epoch": 0.2856649156759746, + "grad_norm": 0.8405312589728756, + "learning_rate": 1.6763491789203848e-05, + "loss": 0.4851, + "step": 16532 + }, + { + "epoch": 0.2856821951893835, + "grad_norm": 0.7422762470565171, + "learning_rate": 1.676307955131645e-05, + "loss": 0.5939, + "step": 16533 + }, + { + "epoch": 0.2856994747027924, + "grad_norm": 1.1475982387276054, + "learning_rate": 1.676266729224657e-05, + "loss": 0.684, + "step": 16534 + }, + { + "epoch": 0.2857167542162013, + "grad_norm": 0.7406765339304834, + "learning_rate": 1.6762255011995505e-05, + "loss": 0.618, + "step": 16535 + }, + { + "epoch": 0.28573403372961015, + "grad_norm": 0.8536429685594294, + "learning_rate": 1.6761842710564543e-05, + "loss": 0.2998, + "step": 16536 + }, + { + "epoch": 0.28575131324301906, + "grad_norm": 1.030133038195429, + "learning_rate": 1.6761430387954975e-05, + "loss": 0.5242, + "step": 16537 + }, + { + "epoch": 0.28576859275642796, + "grad_norm": 0.7923034265903645, + "learning_rate": 1.6761018044168093e-05, + "loss": 0.5142, + "step": 16538 + }, + { + "epoch": 0.28578587226983687, + "grad_norm": 1.1017009075037039, + "learning_rate": 1.6760605679205187e-05, + "loss": 0.6482, + "step": 16539 + }, + { + "epoch": 0.2858031517832458, + "grad_norm": 0.5880312807336131, + "learning_rate": 1.6760193293067555e-05, + "loss": 0.8214, + "step": 16540 + }, + { + "epoch": 0.2858204312966547, + "grad_norm": 0.8633785482910329, + "learning_rate": 1.6759780885756482e-05, + "loss": 0.612, + "step": 16541 + }, + { + "epoch": 0.2858377108100636, + "grad_norm": 0.45186009444646097, + "learning_rate": 1.6759368457273257e-05, + "loss": 0.7694, + "step": 16542 + }, + { + "epoch": 0.2858549903234725, + "grad_norm": 0.6934296192692426, + "learning_rate": 1.675895600761918e-05, + "loss": 0.2937, + "step": 16543 + }, + { + "epoch": 0.2858722698368814, + "grad_norm": 1.0296955815166573, + "learning_rate": 1.675854353679554e-05, + "loss": 0.4806, + "step": 16544 + }, + { + "epoch": 0.2858895493502903, + "grad_norm": 1.068520103944973, + "learning_rate": 1.6758131044803624e-05, + "loss": 0.5779, + "step": 16545 + }, + { + "epoch": 0.2859068288636992, + "grad_norm": 1.4309079448677435, + "learning_rate": 1.6757718531644732e-05, + "loss": 0.5044, + "step": 16546 + }, + { + "epoch": 0.2859241083771081, + "grad_norm": 1.3096623712710034, + "learning_rate": 1.675730599732015e-05, + "loss": 0.6109, + "step": 16547 + }, + { + "epoch": 0.285941387890517, + "grad_norm": 0.438101313155241, + "learning_rate": 1.6756893441831175e-05, + "loss": 0.7745, + "step": 16548 + }, + { + "epoch": 0.2859586674039259, + "grad_norm": 1.0711533376351168, + "learning_rate": 1.6756480865179093e-05, + "loss": 0.4706, + "step": 16549 + }, + { + "epoch": 0.2859759469173348, + "grad_norm": 1.0184354906035678, + "learning_rate": 1.67560682673652e-05, + "loss": 0.4202, + "step": 16550 + }, + { + "epoch": 0.2859932264307437, + "grad_norm": 0.6766025963691418, + "learning_rate": 1.675565564839079e-05, + "loss": 0.5387, + "step": 16551 + }, + { + "epoch": 0.2860105059441526, + "grad_norm": 1.1729205637820306, + "learning_rate": 1.675524300825715e-05, + "loss": 0.6231, + "step": 16552 + }, + { + "epoch": 0.2860277854575615, + "grad_norm": 1.0030710796796931, + "learning_rate": 1.6754830346965576e-05, + "loss": 0.6672, + "step": 16553 + }, + { + "epoch": 0.28604506497097043, + "grad_norm": 1.092619984872091, + "learning_rate": 1.675441766451736e-05, + "loss": 0.5721, + "step": 16554 + }, + { + "epoch": 0.28606234448437934, + "grad_norm": 0.7515867616696921, + "learning_rate": 1.6754004960913796e-05, + "loss": 0.4706, + "step": 16555 + }, + { + "epoch": 0.28607962399778825, + "grad_norm": 0.6360240138942259, + "learning_rate": 1.6753592236156177e-05, + "loss": 0.3401, + "step": 16556 + }, + { + "epoch": 0.2860969035111971, + "grad_norm": 0.7028032872297735, + "learning_rate": 1.675317949024579e-05, + "loss": 0.6625, + "step": 16557 + }, + { + "epoch": 0.286114183024606, + "grad_norm": 1.2914140859637486, + "learning_rate": 1.6752766723183934e-05, + "loss": 0.5201, + "step": 16558 + }, + { + "epoch": 0.2861314625380149, + "grad_norm": 0.8139614073243584, + "learning_rate": 1.6752353934971898e-05, + "loss": 0.553, + "step": 16559 + }, + { + "epoch": 0.2861487420514238, + "grad_norm": 2.2154355056139763, + "learning_rate": 1.6751941125610978e-05, + "loss": 0.4184, + "step": 16560 + }, + { + "epoch": 0.28616602156483273, + "grad_norm": 0.8474045432922604, + "learning_rate": 1.675152829510246e-05, + "loss": 0.536, + "step": 16561 + }, + { + "epoch": 0.28618330107824164, + "grad_norm": 0.839994901410622, + "learning_rate": 1.675111544344765e-05, + "loss": 0.4153, + "step": 16562 + }, + { + "epoch": 0.28620058059165054, + "grad_norm": 0.9276008094372323, + "learning_rate": 1.675070257064783e-05, + "loss": 0.4415, + "step": 16563 + }, + { + "epoch": 0.28621786010505945, + "grad_norm": 0.971225656458752, + "learning_rate": 1.6750289676704296e-05, + "loss": 0.5153, + "step": 16564 + }, + { + "epoch": 0.28623513961846836, + "grad_norm": 1.270957878017811, + "learning_rate": 1.674987676161834e-05, + "loss": 0.529, + "step": 16565 + }, + { + "epoch": 0.28625241913187727, + "grad_norm": 1.9568116240253906, + "learning_rate": 1.674946382539126e-05, + "loss": 0.5037, + "step": 16566 + }, + { + "epoch": 0.2862696986452862, + "grad_norm": 1.0165270634854295, + "learning_rate": 1.6749050868024347e-05, + "loss": 0.4261, + "step": 16567 + }, + { + "epoch": 0.286286978158695, + "grad_norm": 0.897110887498184, + "learning_rate": 1.6748637889518892e-05, + "loss": 0.5101, + "step": 16568 + }, + { + "epoch": 0.28630425767210393, + "grad_norm": 0.5694888908570577, + "learning_rate": 1.6748224889876188e-05, + "loss": 0.3956, + "step": 16569 + }, + { + "epoch": 0.28632153718551284, + "grad_norm": 1.1826054211583386, + "learning_rate": 1.6747811869097535e-05, + "loss": 0.6618, + "step": 16570 + }, + { + "epoch": 0.28633881669892175, + "grad_norm": 1.2669937038972867, + "learning_rate": 1.6747398827184217e-05, + "loss": 0.4106, + "step": 16571 + }, + { + "epoch": 0.28635609621233066, + "grad_norm": 1.1319405306897057, + "learning_rate": 1.674698576413754e-05, + "loss": 0.5355, + "step": 16572 + }, + { + "epoch": 0.28637337572573957, + "grad_norm": 0.8474532568364116, + "learning_rate": 1.6746572679958785e-05, + "loss": 0.6574, + "step": 16573 + }, + { + "epoch": 0.2863906552391485, + "grad_norm": 1.4358044074915703, + "learning_rate": 1.6746159574649256e-05, + "loss": 0.5547, + "step": 16574 + }, + { + "epoch": 0.2864079347525574, + "grad_norm": 0.968923993176041, + "learning_rate": 1.674574644821024e-05, + "loss": 0.5902, + "step": 16575 + }, + { + "epoch": 0.2864252142659663, + "grad_norm": 1.065710252745448, + "learning_rate": 1.6745333300643036e-05, + "loss": 0.564, + "step": 16576 + }, + { + "epoch": 0.2864424937793752, + "grad_norm": 0.6038317873262528, + "learning_rate": 1.6744920131948933e-05, + "loss": 0.4895, + "step": 16577 + }, + { + "epoch": 0.28645977329278405, + "grad_norm": 1.155750452082665, + "learning_rate": 1.6744506942129226e-05, + "loss": 0.558, + "step": 16578 + }, + { + "epoch": 0.28647705280619296, + "grad_norm": 0.7273046861531454, + "learning_rate": 1.6744093731185212e-05, + "loss": 0.541, + "step": 16579 + }, + { + "epoch": 0.28649433231960186, + "grad_norm": 1.2798279727869457, + "learning_rate": 1.6743680499118188e-05, + "loss": 0.5685, + "step": 16580 + }, + { + "epoch": 0.28651161183301077, + "grad_norm": 1.5060896751958168, + "learning_rate": 1.674326724592944e-05, + "loss": 0.6281, + "step": 16581 + }, + { + "epoch": 0.2865288913464197, + "grad_norm": 0.7267696486046219, + "learning_rate": 1.674285397162027e-05, + "loss": 0.5744, + "step": 16582 + }, + { + "epoch": 0.2865461708598286, + "grad_norm": 1.186161856094309, + "learning_rate": 1.6742440676191965e-05, + "loss": 0.78, + "step": 16583 + }, + { + "epoch": 0.2865634503732375, + "grad_norm": 1.359560133451885, + "learning_rate": 1.6742027359645823e-05, + "loss": 0.5922, + "step": 16584 + }, + { + "epoch": 0.2865807298866464, + "grad_norm": 1.5680737655443757, + "learning_rate": 1.674161402198314e-05, + "loss": 0.5771, + "step": 16585 + }, + { + "epoch": 0.2865980094000553, + "grad_norm": 0.82129011382061, + "learning_rate": 1.674120066320521e-05, + "loss": 0.5832, + "step": 16586 + }, + { + "epoch": 0.2866152889134642, + "grad_norm": 0.7424060563639199, + "learning_rate": 1.674078728331333e-05, + "loss": 0.3869, + "step": 16587 + }, + { + "epoch": 0.2866325684268731, + "grad_norm": 1.1955355240012377, + "learning_rate": 1.6740373882308786e-05, + "loss": 0.6259, + "step": 16588 + }, + { + "epoch": 0.286649847940282, + "grad_norm": 0.7527380364779728, + "learning_rate": 1.6739960460192885e-05, + "loss": 0.4183, + "step": 16589 + }, + { + "epoch": 0.2866671274536909, + "grad_norm": 1.0589290106755471, + "learning_rate": 1.673954701696691e-05, + "loss": 0.4693, + "step": 16590 + }, + { + "epoch": 0.2866844069670998, + "grad_norm": 0.9185054994050541, + "learning_rate": 1.6739133552632166e-05, + "loss": 0.8283, + "step": 16591 + }, + { + "epoch": 0.2867016864805087, + "grad_norm": 0.7238927826041255, + "learning_rate": 1.673872006718994e-05, + "loss": 0.4278, + "step": 16592 + }, + { + "epoch": 0.2867189659939176, + "grad_norm": 1.1918179782310867, + "learning_rate": 1.6738306560641535e-05, + "loss": 0.7015, + "step": 16593 + }, + { + "epoch": 0.2867362455073265, + "grad_norm": 1.663986577160554, + "learning_rate": 1.673789303298824e-05, + "loss": 0.5733, + "step": 16594 + }, + { + "epoch": 0.2867535250207354, + "grad_norm": 0.6899680013029683, + "learning_rate": 1.673747948423135e-05, + "loss": 0.2637, + "step": 16595 + }, + { + "epoch": 0.28677080453414433, + "grad_norm": 1.246880764107364, + "learning_rate": 1.6737065914372165e-05, + "loss": 0.4748, + "step": 16596 + }, + { + "epoch": 0.28678808404755324, + "grad_norm": 1.6785885888411287, + "learning_rate": 1.6736652323411972e-05, + "loss": 0.7385, + "step": 16597 + }, + { + "epoch": 0.28680536356096215, + "grad_norm": 0.5260826738810571, + "learning_rate": 1.6736238711352077e-05, + "loss": 0.5703, + "step": 16598 + }, + { + "epoch": 0.28682264307437105, + "grad_norm": 1.0665831008706863, + "learning_rate": 1.673582507819377e-05, + "loss": 0.7511, + "step": 16599 + }, + { + "epoch": 0.2868399225877799, + "grad_norm": 0.688003609174661, + "learning_rate": 1.6735411423938346e-05, + "loss": 0.386, + "step": 16600 + }, + { + "epoch": 0.2868572021011888, + "grad_norm": 0.724119304665136, + "learning_rate": 1.6734997748587102e-05, + "loss": 0.4929, + "step": 16601 + }, + { + "epoch": 0.2868744816145977, + "grad_norm": 0.8579078184426036, + "learning_rate": 1.6734584052141333e-05, + "loss": 0.509, + "step": 16602 + }, + { + "epoch": 0.28689176112800663, + "grad_norm": 0.946893891309093, + "learning_rate": 1.6734170334602335e-05, + "loss": 0.5774, + "step": 16603 + }, + { + "epoch": 0.28690904064141554, + "grad_norm": 1.0589724684797743, + "learning_rate": 1.6733756595971404e-05, + "loss": 0.5091, + "step": 16604 + }, + { + "epoch": 0.28692632015482444, + "grad_norm": 1.043623810234183, + "learning_rate": 1.6733342836249834e-05, + "loss": 0.5163, + "step": 16605 + }, + { + "epoch": 0.28694359966823335, + "grad_norm": 1.1933507253041689, + "learning_rate": 1.6732929055438926e-05, + "loss": 0.675, + "step": 16606 + }, + { + "epoch": 0.28696087918164226, + "grad_norm": 0.9358250601324278, + "learning_rate": 1.673251525353997e-05, + "loss": 0.506, + "step": 16607 + }, + { + "epoch": 0.28697815869505117, + "grad_norm": 0.6501702247854848, + "learning_rate": 1.6732101430554265e-05, + "loss": 0.5541, + "step": 16608 + }, + { + "epoch": 0.2869954382084601, + "grad_norm": 1.9075460094041015, + "learning_rate": 1.6731687586483104e-05, + "loss": 0.6545, + "step": 16609 + }, + { + "epoch": 0.2870127177218689, + "grad_norm": 1.516244297692633, + "learning_rate": 1.673127372132779e-05, + "loss": 0.5006, + "step": 16610 + }, + { + "epoch": 0.28702999723527783, + "grad_norm": 0.9370877686610066, + "learning_rate": 1.6730859835089614e-05, + "loss": 0.6118, + "step": 16611 + }, + { + "epoch": 0.28704727674868674, + "grad_norm": 0.36680279093720697, + "learning_rate": 1.6730445927769873e-05, + "loss": 0.6279, + "step": 16612 + }, + { + "epoch": 0.28706455626209565, + "grad_norm": 0.8996591770185103, + "learning_rate": 1.6730031999369863e-05, + "loss": 0.5439, + "step": 16613 + }, + { + "epoch": 0.28708183577550456, + "grad_norm": 1.1118627191395054, + "learning_rate": 1.6729618049890883e-05, + "loss": 0.3155, + "step": 16614 + }, + { + "epoch": 0.28709911528891346, + "grad_norm": 1.283777399493391, + "learning_rate": 1.6729204079334227e-05, + "loss": 0.5036, + "step": 16615 + }, + { + "epoch": 0.28711639480232237, + "grad_norm": 0.9772483911357813, + "learning_rate": 1.6728790087701196e-05, + "loss": 0.6358, + "step": 16616 + }, + { + "epoch": 0.2871336743157313, + "grad_norm": 0.800186230733227, + "learning_rate": 1.6728376074993077e-05, + "loss": 0.5912, + "step": 16617 + }, + { + "epoch": 0.2871509538291402, + "grad_norm": 1.3965599819407732, + "learning_rate": 1.6727962041211174e-05, + "loss": 0.6723, + "step": 16618 + }, + { + "epoch": 0.2871682333425491, + "grad_norm": 0.8731629545323476, + "learning_rate": 1.6727547986356786e-05, + "loss": 0.3881, + "step": 16619 + }, + { + "epoch": 0.287185512855958, + "grad_norm": 0.6470838702852415, + "learning_rate": 1.6727133910431203e-05, + "loss": 0.5378, + "step": 16620 + }, + { + "epoch": 0.28720279236936685, + "grad_norm": 1.1929679449435289, + "learning_rate": 1.6726719813435727e-05, + "loss": 0.5247, + "step": 16621 + }, + { + "epoch": 0.28722007188277576, + "grad_norm": 0.7203009328746333, + "learning_rate": 1.6726305695371654e-05, + "loss": 0.4721, + "step": 16622 + }, + { + "epoch": 0.28723735139618467, + "grad_norm": 0.9711239464607253, + "learning_rate": 1.672589155624028e-05, + "loss": 0.4185, + "step": 16623 + }, + { + "epoch": 0.2872546309095936, + "grad_norm": 1.084837013534949, + "learning_rate": 1.67254773960429e-05, + "loss": 0.4974, + "step": 16624 + }, + { + "epoch": 0.2872719104230025, + "grad_norm": 0.9352283728399605, + "learning_rate": 1.6725063214780815e-05, + "loss": 0.4054, + "step": 16625 + }, + { + "epoch": 0.2872891899364114, + "grad_norm": 1.0629609843936292, + "learning_rate": 1.6724649012455325e-05, + "loss": 0.4882, + "step": 16626 + }, + { + "epoch": 0.2873064694498203, + "grad_norm": 1.0633915348833372, + "learning_rate": 1.672423478906772e-05, + "loss": 0.5603, + "step": 16627 + }, + { + "epoch": 0.2873237489632292, + "grad_norm": 1.3205846290264018, + "learning_rate": 1.6723820544619297e-05, + "loss": 0.5162, + "step": 16628 + }, + { + "epoch": 0.2873410284766381, + "grad_norm": 0.39573048952527506, + "learning_rate": 1.6723406279111362e-05, + "loss": 0.6524, + "step": 16629 + }, + { + "epoch": 0.287358307990047, + "grad_norm": 0.740166215193328, + "learning_rate": 1.6722991992545206e-05, + "loss": 0.4961, + "step": 16630 + }, + { + "epoch": 0.2873755875034559, + "grad_norm": 1.1689842729413094, + "learning_rate": 1.672257768492213e-05, + "loss": 0.5165, + "step": 16631 + }, + { + "epoch": 0.2873928670168648, + "grad_norm": 1.0375415013826366, + "learning_rate": 1.6722163356243426e-05, + "loss": 0.5692, + "step": 16632 + }, + { + "epoch": 0.2874101465302737, + "grad_norm": 1.1966991398931026, + "learning_rate": 1.6721749006510397e-05, + "loss": 0.4709, + "step": 16633 + }, + { + "epoch": 0.2874274260436826, + "grad_norm": 0.8306773197539176, + "learning_rate": 1.672133463572434e-05, + "loss": 0.5201, + "step": 16634 + }, + { + "epoch": 0.2874447055570915, + "grad_norm": 0.9211323697774, + "learning_rate": 1.6720920243886555e-05, + "loss": 0.4642, + "step": 16635 + }, + { + "epoch": 0.2874619850705004, + "grad_norm": 1.0625308570424215, + "learning_rate": 1.672050583099833e-05, + "loss": 0.5486, + "step": 16636 + }, + { + "epoch": 0.2874792645839093, + "grad_norm": 0.6782612689576316, + "learning_rate": 1.6720091397060976e-05, + "loss": 0.4226, + "step": 16637 + }, + { + "epoch": 0.28749654409731823, + "grad_norm": 1.1632595757742163, + "learning_rate": 1.6719676942075783e-05, + "loss": 0.4165, + "step": 16638 + }, + { + "epoch": 0.28751382361072714, + "grad_norm": 1.0986775726367537, + "learning_rate": 1.671926246604405e-05, + "loss": 0.5151, + "step": 16639 + }, + { + "epoch": 0.28753110312413604, + "grad_norm": 0.7343329056192672, + "learning_rate": 1.671884796896708e-05, + "loss": 0.3944, + "step": 16640 + }, + { + "epoch": 0.28754838263754495, + "grad_norm": 0.903434596647272, + "learning_rate": 1.6718433450846164e-05, + "loss": 0.5098, + "step": 16641 + }, + { + "epoch": 0.2875656621509538, + "grad_norm": 1.1541523857527327, + "learning_rate": 1.6718018911682606e-05, + "loss": 0.641, + "step": 16642 + }, + { + "epoch": 0.2875829416643627, + "grad_norm": 1.6552822811590708, + "learning_rate": 1.67176043514777e-05, + "loss": 0.5858, + "step": 16643 + }, + { + "epoch": 0.2876002211777716, + "grad_norm": 0.6261168602977214, + "learning_rate": 1.6717189770232747e-05, + "loss": 0.7056, + "step": 16644 + }, + { + "epoch": 0.2876175006911805, + "grad_norm": 0.5564247597598044, + "learning_rate": 1.6716775167949046e-05, + "loss": 0.3985, + "step": 16645 + }, + { + "epoch": 0.28763478020458944, + "grad_norm": 1.6229328963125638, + "learning_rate": 1.6716360544627893e-05, + "loss": 0.4644, + "step": 16646 + }, + { + "epoch": 0.28765205971799834, + "grad_norm": 0.5189663041242057, + "learning_rate": 1.671594590027059e-05, + "loss": 0.3379, + "step": 16647 + }, + { + "epoch": 0.28766933923140725, + "grad_norm": 0.9944660212389677, + "learning_rate": 1.6715531234878436e-05, + "loss": 0.4862, + "step": 16648 + }, + { + "epoch": 0.28768661874481616, + "grad_norm": 0.7069311265646177, + "learning_rate": 1.6715116548452724e-05, + "loss": 0.3439, + "step": 16649 + }, + { + "epoch": 0.28770389825822507, + "grad_norm": 1.2030923882542142, + "learning_rate": 1.671470184099476e-05, + "loss": 0.4293, + "step": 16650 + }, + { + "epoch": 0.287721177771634, + "grad_norm": 1.7714528870452964, + "learning_rate": 1.6714287112505835e-05, + "loss": 0.4712, + "step": 16651 + }, + { + "epoch": 0.2877384572850428, + "grad_norm": 1.3458039747727792, + "learning_rate": 1.6713872362987258e-05, + "loss": 0.4045, + "step": 16652 + }, + { + "epoch": 0.28775573679845173, + "grad_norm": 0.8932046947930885, + "learning_rate": 1.671345759244032e-05, + "loss": 0.3836, + "step": 16653 + }, + { + "epoch": 0.28777301631186064, + "grad_norm": 1.042649111981114, + "learning_rate": 1.6713042800866323e-05, + "loss": 0.4427, + "step": 16654 + }, + { + "epoch": 0.28779029582526955, + "grad_norm": 0.9100398958083984, + "learning_rate": 1.6712627988266564e-05, + "loss": 0.5167, + "step": 16655 + }, + { + "epoch": 0.28780757533867846, + "grad_norm": 1.0897556546413838, + "learning_rate": 1.6712213154642346e-05, + "loss": 0.3933, + "step": 16656 + }, + { + "epoch": 0.28782485485208736, + "grad_norm": 0.8402474154891054, + "learning_rate": 1.671179829999497e-05, + "loss": 0.4174, + "step": 16657 + }, + { + "epoch": 0.28784213436549627, + "grad_norm": 0.5430724951107955, + "learning_rate": 1.6711383424325723e-05, + "loss": 0.7365, + "step": 16658 + }, + { + "epoch": 0.2878594138789052, + "grad_norm": 1.012920381682582, + "learning_rate": 1.6710968527635917e-05, + "loss": 0.4867, + "step": 16659 + }, + { + "epoch": 0.2878766933923141, + "grad_norm": 0.8278821217381125, + "learning_rate": 1.6710553609926847e-05, + "loss": 0.5298, + "step": 16660 + }, + { + "epoch": 0.287893972905723, + "grad_norm": 0.7605062974119559, + "learning_rate": 1.6710138671199816e-05, + "loss": 0.6481, + "step": 16661 + }, + { + "epoch": 0.2879112524191319, + "grad_norm": 1.008776583704615, + "learning_rate": 1.670972371145612e-05, + "loss": 0.5946, + "step": 16662 + }, + { + "epoch": 0.28792853193254075, + "grad_norm": 1.0453756088547295, + "learning_rate": 1.670930873069706e-05, + "loss": 0.4955, + "step": 16663 + }, + { + "epoch": 0.28794581144594966, + "grad_norm": 0.9134402364128008, + "learning_rate": 1.670889372892393e-05, + "loss": 0.5447, + "step": 16664 + }, + { + "epoch": 0.28796309095935857, + "grad_norm": 1.2116412690912914, + "learning_rate": 1.670847870613804e-05, + "loss": 0.4552, + "step": 16665 + }, + { + "epoch": 0.2879803704727675, + "grad_norm": 0.762111818220867, + "learning_rate": 1.6708063662340685e-05, + "loss": 0.6513, + "step": 16666 + }, + { + "epoch": 0.2879976499861764, + "grad_norm": 1.5976255835982123, + "learning_rate": 1.670764859753316e-05, + "loss": 0.6006, + "step": 16667 + }, + { + "epoch": 0.2880149294995853, + "grad_norm": 1.7639182861647977, + "learning_rate": 1.6707233511716773e-05, + "loss": 0.8312, + "step": 16668 + }, + { + "epoch": 0.2880322090129942, + "grad_norm": 0.7538825155516442, + "learning_rate": 1.670681840489282e-05, + "loss": 0.5218, + "step": 16669 + }, + { + "epoch": 0.2880494885264031, + "grad_norm": 0.7407348258643083, + "learning_rate": 1.67064032770626e-05, + "loss": 0.4103, + "step": 16670 + }, + { + "epoch": 0.288066768039812, + "grad_norm": 1.1464260176018393, + "learning_rate": 1.670598812822742e-05, + "loss": 0.7558, + "step": 16671 + }, + { + "epoch": 0.2880840475532209, + "grad_norm": 0.43941768464605546, + "learning_rate": 1.6705572958388576e-05, + "loss": 0.5472, + "step": 16672 + }, + { + "epoch": 0.28810132706662983, + "grad_norm": 0.7640768582773928, + "learning_rate": 1.6705157767547365e-05, + "loss": 0.6409, + "step": 16673 + }, + { + "epoch": 0.2881186065800387, + "grad_norm": 1.0514951037750793, + "learning_rate": 1.670474255570509e-05, + "loss": 0.5298, + "step": 16674 + }, + { + "epoch": 0.2881358860934476, + "grad_norm": 1.644403938176876, + "learning_rate": 1.6704327322863053e-05, + "loss": 0.4864, + "step": 16675 + }, + { + "epoch": 0.2881531656068565, + "grad_norm": 0.9519876392003256, + "learning_rate": 1.6703912069022554e-05, + "loss": 0.732, + "step": 16676 + }, + { + "epoch": 0.2881704451202654, + "grad_norm": 0.7906182441362211, + "learning_rate": 1.670349679418489e-05, + "loss": 0.4028, + "step": 16677 + }, + { + "epoch": 0.2881877246336743, + "grad_norm": 1.2007897630631692, + "learning_rate": 1.6703081498351368e-05, + "loss": 0.5383, + "step": 16678 + }, + { + "epoch": 0.2882050041470832, + "grad_norm": 1.126178486437957, + "learning_rate": 1.6702666181523283e-05, + "loss": 0.5291, + "step": 16679 + }, + { + "epoch": 0.28822228366049213, + "grad_norm": 1.120577121506314, + "learning_rate": 1.670225084370194e-05, + "loss": 0.6237, + "step": 16680 + }, + { + "epoch": 0.28823956317390104, + "grad_norm": 0.3677790687179988, + "learning_rate": 1.6701835484888634e-05, + "loss": 0.6828, + "step": 16681 + }, + { + "epoch": 0.28825684268730994, + "grad_norm": 0.8067008416193364, + "learning_rate": 1.6701420105084673e-05, + "loss": 0.4984, + "step": 16682 + }, + { + "epoch": 0.28827412220071885, + "grad_norm": 0.8860600538570474, + "learning_rate": 1.6701004704291354e-05, + "loss": 0.5382, + "step": 16683 + }, + { + "epoch": 0.2882914017141277, + "grad_norm": 0.7956015960717194, + "learning_rate": 1.670058928250998e-05, + "loss": 0.5847, + "step": 16684 + }, + { + "epoch": 0.2883086812275366, + "grad_norm": 0.4034796215660213, + "learning_rate": 1.6700173839741853e-05, + "loss": 0.5245, + "step": 16685 + }, + { + "epoch": 0.2883259607409455, + "grad_norm": 1.2887632215260822, + "learning_rate": 1.669975837598827e-05, + "loss": 0.6673, + "step": 16686 + }, + { + "epoch": 0.2883432402543544, + "grad_norm": 1.683586754096161, + "learning_rate": 1.6699342891250536e-05, + "loss": 0.617, + "step": 16687 + }, + { + "epoch": 0.28836051976776333, + "grad_norm": 1.2807605325220175, + "learning_rate": 1.6698927385529947e-05, + "loss": 0.4284, + "step": 16688 + }, + { + "epoch": 0.28837779928117224, + "grad_norm": 0.5889426739614764, + "learning_rate": 1.6698511858827813e-05, + "loss": 0.4351, + "step": 16689 + }, + { + "epoch": 0.28839507879458115, + "grad_norm": 1.1276042040970418, + "learning_rate": 1.6698096311145428e-05, + "loss": 0.4246, + "step": 16690 + }, + { + "epoch": 0.28841235830799006, + "grad_norm": 0.740708830583945, + "learning_rate": 1.6697680742484097e-05, + "loss": 0.7121, + "step": 16691 + }, + { + "epoch": 0.28842963782139897, + "grad_norm": 0.6282822217557527, + "learning_rate": 1.6697265152845125e-05, + "loss": 0.6118, + "step": 16692 + }, + { + "epoch": 0.2884469173348079, + "grad_norm": 1.2272361195646861, + "learning_rate": 1.66968495422298e-05, + "loss": 0.4503, + "step": 16693 + }, + { + "epoch": 0.2884641968482168, + "grad_norm": 0.8692474076411746, + "learning_rate": 1.669643391063944e-05, + "loss": 0.4903, + "step": 16694 + }, + { + "epoch": 0.28848147636162563, + "grad_norm": 1.6010825954941597, + "learning_rate": 1.669601825807534e-05, + "loss": 0.5382, + "step": 16695 + }, + { + "epoch": 0.28849875587503454, + "grad_norm": 1.0144825747890693, + "learning_rate": 1.6695602584538804e-05, + "loss": 0.4658, + "step": 16696 + }, + { + "epoch": 0.28851603538844345, + "grad_norm": 0.7776052039767692, + "learning_rate": 1.669518689003113e-05, + "loss": 0.3952, + "step": 16697 + }, + { + "epoch": 0.28853331490185236, + "grad_norm": 1.158586089913446, + "learning_rate": 1.6694771174553618e-05, + "loss": 0.5201, + "step": 16698 + }, + { + "epoch": 0.28855059441526126, + "grad_norm": 0.9401800423072613, + "learning_rate": 1.6694355438107576e-05, + "loss": 0.4617, + "step": 16699 + }, + { + "epoch": 0.28856787392867017, + "grad_norm": 0.7052826962270454, + "learning_rate": 1.6693939680694304e-05, + "loss": 0.3052, + "step": 16700 + }, + { + "epoch": 0.2885851534420791, + "grad_norm": 0.8475932065692158, + "learning_rate": 1.6693523902315106e-05, + "loss": 0.5097, + "step": 16701 + }, + { + "epoch": 0.288602432955488, + "grad_norm": 0.39110781963029445, + "learning_rate": 1.669310810297128e-05, + "loss": 0.589, + "step": 16702 + }, + { + "epoch": 0.2886197124688969, + "grad_norm": 0.767388336632427, + "learning_rate": 1.6692692282664132e-05, + "loss": 0.4038, + "step": 16703 + }, + { + "epoch": 0.2886369919823058, + "grad_norm": 1.033660723021026, + "learning_rate": 1.6692276441394965e-05, + "loss": 0.2875, + "step": 16704 + }, + { + "epoch": 0.28865427149571465, + "grad_norm": 0.665300520734648, + "learning_rate": 1.6691860579165075e-05, + "loss": 0.6147, + "step": 16705 + }, + { + "epoch": 0.28867155100912356, + "grad_norm": 0.9809458491038472, + "learning_rate": 1.6691444695975776e-05, + "loss": 0.7896, + "step": 16706 + }, + { + "epoch": 0.28868883052253247, + "grad_norm": 1.1310096940151229, + "learning_rate": 1.6691028791828357e-05, + "loss": 0.4613, + "step": 16707 + }, + { + "epoch": 0.2887061100359414, + "grad_norm": 0.519234802998271, + "learning_rate": 1.669061286672413e-05, + "loss": 0.5699, + "step": 16708 + }, + { + "epoch": 0.2887233895493503, + "grad_norm": 0.8342035184176353, + "learning_rate": 1.6690196920664397e-05, + "loss": 0.5424, + "step": 16709 + }, + { + "epoch": 0.2887406690627592, + "grad_norm": 1.6738976722788332, + "learning_rate": 1.6689780953650454e-05, + "loss": 0.4215, + "step": 16710 + }, + { + "epoch": 0.2887579485761681, + "grad_norm": 0.5461460127560707, + "learning_rate": 1.6689364965683613e-05, + "loss": 0.5342, + "step": 16711 + }, + { + "epoch": 0.288775228089577, + "grad_norm": 0.614660550008966, + "learning_rate": 1.668894895676517e-05, + "loss": 0.7159, + "step": 16712 + }, + { + "epoch": 0.2887925076029859, + "grad_norm": 0.9753222906712459, + "learning_rate": 1.6688532926896433e-05, + "loss": 0.4311, + "step": 16713 + }, + { + "epoch": 0.2888097871163948, + "grad_norm": 0.4897277161385666, + "learning_rate": 1.66881168760787e-05, + "loss": 0.5875, + "step": 16714 + }, + { + "epoch": 0.28882706662980373, + "grad_norm": 1.1331623618795668, + "learning_rate": 1.6687700804313277e-05, + "loss": 0.6087, + "step": 16715 + }, + { + "epoch": 0.2888443461432126, + "grad_norm": 1.115957832436394, + "learning_rate": 1.6687284711601468e-05, + "loss": 0.4161, + "step": 16716 + }, + { + "epoch": 0.2888616256566215, + "grad_norm": 0.7970646797889092, + "learning_rate": 1.6686868597944576e-05, + "loss": 0.6207, + "step": 16717 + }, + { + "epoch": 0.2888789051700304, + "grad_norm": 0.8424866600735353, + "learning_rate": 1.66864524633439e-05, + "loss": 0.4885, + "step": 16718 + }, + { + "epoch": 0.2888961846834393, + "grad_norm": 0.9713821491006157, + "learning_rate": 1.668603630780075e-05, + "loss": 0.4138, + "step": 16719 + }, + { + "epoch": 0.2889134641968482, + "grad_norm": 1.9915633891557354, + "learning_rate": 1.6685620131316425e-05, + "loss": 0.4205, + "step": 16720 + }, + { + "epoch": 0.2889307437102571, + "grad_norm": 1.4489332027287296, + "learning_rate": 1.6685203933892228e-05, + "loss": 0.5559, + "step": 16721 + }, + { + "epoch": 0.28894802322366603, + "grad_norm": 0.8862187117499332, + "learning_rate": 1.6684787715529467e-05, + "loss": 0.5522, + "step": 16722 + }, + { + "epoch": 0.28896530273707494, + "grad_norm": 0.7205087691194215, + "learning_rate": 1.668437147622944e-05, + "loss": 0.2653, + "step": 16723 + }, + { + "epoch": 0.28898258225048384, + "grad_norm": 0.8220274346192001, + "learning_rate": 1.6683955215993454e-05, + "loss": 0.399, + "step": 16724 + }, + { + "epoch": 0.28899986176389275, + "grad_norm": 0.40959406270929183, + "learning_rate": 1.6683538934822818e-05, + "loss": 0.5488, + "step": 16725 + }, + { + "epoch": 0.2890171412773016, + "grad_norm": 0.8135296197232873, + "learning_rate": 1.6683122632718826e-05, + "loss": 0.4685, + "step": 16726 + }, + { + "epoch": 0.2890344207907105, + "grad_norm": 0.5092468684251735, + "learning_rate": 1.6682706309682783e-05, + "loss": 0.6092, + "step": 16727 + }, + { + "epoch": 0.2890517003041194, + "grad_norm": 1.1282322196741086, + "learning_rate": 1.6682289965716e-05, + "loss": 0.554, + "step": 16728 + }, + { + "epoch": 0.2890689798175283, + "grad_norm": 0.9816818800194773, + "learning_rate": 1.6681873600819775e-05, + "loss": 0.5647, + "step": 16729 + }, + { + "epoch": 0.28908625933093723, + "grad_norm": 1.3975948798095494, + "learning_rate": 1.6681457214995416e-05, + "loss": 0.7473, + "step": 16730 + }, + { + "epoch": 0.28910353884434614, + "grad_norm": 0.9586913756363479, + "learning_rate": 1.6681040808244223e-05, + "loss": 0.7161, + "step": 16731 + }, + { + "epoch": 0.28912081835775505, + "grad_norm": 0.8597740490773471, + "learning_rate": 1.6680624380567502e-05, + "loss": 0.3175, + "step": 16732 + }, + { + "epoch": 0.28913809787116396, + "grad_norm": 0.7983404426249244, + "learning_rate": 1.668020793196656e-05, + "loss": 0.3571, + "step": 16733 + }, + { + "epoch": 0.28915537738457286, + "grad_norm": 0.828386905998363, + "learning_rate": 1.6679791462442697e-05, + "loss": 0.4077, + "step": 16734 + }, + { + "epoch": 0.2891726568979818, + "grad_norm": 0.41660445362732645, + "learning_rate": 1.6679374971997223e-05, + "loss": 0.632, + "step": 16735 + }, + { + "epoch": 0.2891899364113907, + "grad_norm": 1.1090095024872573, + "learning_rate": 1.6678958460631434e-05, + "loss": 0.4987, + "step": 16736 + }, + { + "epoch": 0.28920721592479953, + "grad_norm": 1.0553653922441262, + "learning_rate": 1.667854192834664e-05, + "loss": 0.4722, + "step": 16737 + }, + { + "epoch": 0.28922449543820844, + "grad_norm": 0.9550675741968112, + "learning_rate": 1.667812537514415e-05, + "loss": 0.354, + "step": 16738 + }, + { + "epoch": 0.28924177495161735, + "grad_norm": 0.9673285408460419, + "learning_rate": 1.667770880102526e-05, + "loss": 0.6901, + "step": 16739 + }, + { + "epoch": 0.28925905446502626, + "grad_norm": 1.3685805592107925, + "learning_rate": 1.667729220599128e-05, + "loss": 0.6279, + "step": 16740 + }, + { + "epoch": 0.28927633397843516, + "grad_norm": 1.0394640779187465, + "learning_rate": 1.6676875590043515e-05, + "loss": 0.4893, + "step": 16741 + }, + { + "epoch": 0.28929361349184407, + "grad_norm": 1.2409756583694704, + "learning_rate": 1.6676458953183266e-05, + "loss": 0.4046, + "step": 16742 + }, + { + "epoch": 0.289310893005253, + "grad_norm": 0.6650740013521115, + "learning_rate": 1.667604229541184e-05, + "loss": 0.6428, + "step": 16743 + }, + { + "epoch": 0.2893281725186619, + "grad_norm": 0.7290258185812101, + "learning_rate": 1.6675625616730542e-05, + "loss": 0.3987, + "step": 16744 + }, + { + "epoch": 0.2893454520320708, + "grad_norm": 1.3469077837271206, + "learning_rate": 1.6675208917140676e-05, + "loss": 0.734, + "step": 16745 + }, + { + "epoch": 0.2893627315454797, + "grad_norm": 0.949032916657817, + "learning_rate": 1.667479219664355e-05, + "loss": 0.3931, + "step": 16746 + }, + { + "epoch": 0.2893800110588886, + "grad_norm": 2.2141625746042286, + "learning_rate": 1.667437545524047e-05, + "loss": 0.6828, + "step": 16747 + }, + { + "epoch": 0.28939729057229746, + "grad_norm": 1.1482841475880312, + "learning_rate": 1.6673958692932736e-05, + "loss": 0.4179, + "step": 16748 + }, + { + "epoch": 0.28941457008570637, + "grad_norm": 0.9620131323295509, + "learning_rate": 1.6673541909721657e-05, + "loss": 0.4081, + "step": 16749 + }, + { + "epoch": 0.2894318495991153, + "grad_norm": 1.4674639981084725, + "learning_rate": 1.6673125105608535e-05, + "loss": 0.4854, + "step": 16750 + }, + { + "epoch": 0.2894491291125242, + "grad_norm": 1.5652980756574721, + "learning_rate": 1.667270828059468e-05, + "loss": 0.4248, + "step": 16751 + }, + { + "epoch": 0.2894664086259331, + "grad_norm": 0.7009888748396401, + "learning_rate": 1.66722914346814e-05, + "loss": 0.3932, + "step": 16752 + }, + { + "epoch": 0.289483688139342, + "grad_norm": 0.7789229474061641, + "learning_rate": 1.6671874567869988e-05, + "loss": 0.5774, + "step": 16753 + }, + { + "epoch": 0.2895009676527509, + "grad_norm": 0.8110293513598895, + "learning_rate": 1.667145768016176e-05, + "loss": 0.4943, + "step": 16754 + }, + { + "epoch": 0.2895182471661598, + "grad_norm": 1.2569999418431859, + "learning_rate": 1.6671040771558025e-05, + "loss": 0.6462, + "step": 16755 + }, + { + "epoch": 0.2895355266795687, + "grad_norm": 1.0810048239278287, + "learning_rate": 1.667062384206008e-05, + "loss": 0.5747, + "step": 16756 + }, + { + "epoch": 0.28955280619297763, + "grad_norm": 1.9341069225366454, + "learning_rate": 1.6670206891669234e-05, + "loss": 0.5095, + "step": 16757 + }, + { + "epoch": 0.2895700857063865, + "grad_norm": 1.0711213669159272, + "learning_rate": 1.666978992038679e-05, + "loss": 0.5292, + "step": 16758 + }, + { + "epoch": 0.2895873652197954, + "grad_norm": 1.070201994009315, + "learning_rate": 1.6669372928214062e-05, + "loss": 0.3807, + "step": 16759 + }, + { + "epoch": 0.2896046447332043, + "grad_norm": 0.921814449790304, + "learning_rate": 1.666895591515235e-05, + "loss": 0.4822, + "step": 16760 + }, + { + "epoch": 0.2896219242466132, + "grad_norm": 0.6983292784527272, + "learning_rate": 1.6668538881202956e-05, + "loss": 0.6385, + "step": 16761 + }, + { + "epoch": 0.2896392037600221, + "grad_norm": 1.3942203830123752, + "learning_rate": 1.6668121826367198e-05, + "loss": 0.3866, + "step": 16762 + }, + { + "epoch": 0.289656483273431, + "grad_norm": 1.020234415825812, + "learning_rate": 1.6667704750646375e-05, + "loss": 0.404, + "step": 16763 + }, + { + "epoch": 0.28967376278683993, + "grad_norm": 1.0554417186655793, + "learning_rate": 1.6667287654041793e-05, + "loss": 0.575, + "step": 16764 + }, + { + "epoch": 0.28969104230024884, + "grad_norm": 1.3539130851208305, + "learning_rate": 1.6666870536554758e-05, + "loss": 0.4978, + "step": 16765 + }, + { + "epoch": 0.28970832181365774, + "grad_norm": 1.3519599868636931, + "learning_rate": 1.666645339818658e-05, + "loss": 0.6251, + "step": 16766 + }, + { + "epoch": 0.28972560132706665, + "grad_norm": 0.7768025760132571, + "learning_rate": 1.666603623893856e-05, + "loss": 0.6662, + "step": 16767 + }, + { + "epoch": 0.28974288084047556, + "grad_norm": 1.0723063669139545, + "learning_rate": 1.6665619058812012e-05, + "loss": 0.4064, + "step": 16768 + }, + { + "epoch": 0.2897601603538844, + "grad_norm": 0.7559555888051719, + "learning_rate": 1.6665201857808237e-05, + "loss": 0.3532, + "step": 16769 + }, + { + "epoch": 0.2897774398672933, + "grad_norm": 1.2334843952432124, + "learning_rate": 1.666478463592854e-05, + "loss": 0.445, + "step": 16770 + }, + { + "epoch": 0.2897947193807022, + "grad_norm": 1.050627333141298, + "learning_rate": 1.6664367393174235e-05, + "loss": 0.7093, + "step": 16771 + }, + { + "epoch": 0.28981199889411113, + "grad_norm": 0.8973828016848757, + "learning_rate": 1.6663950129546623e-05, + "loss": 0.5176, + "step": 16772 + }, + { + "epoch": 0.28982927840752004, + "grad_norm": 1.1180903328596783, + "learning_rate": 1.6663532845047012e-05, + "loss": 0.4322, + "step": 16773 + }, + { + "epoch": 0.28984655792092895, + "grad_norm": 0.9637653995424937, + "learning_rate": 1.6663115539676713e-05, + "loss": 0.6109, + "step": 16774 + }, + { + "epoch": 0.28986383743433786, + "grad_norm": 0.5469600884447605, + "learning_rate": 1.666269821343703e-05, + "loss": 0.5586, + "step": 16775 + }, + { + "epoch": 0.28988111694774676, + "grad_norm": 1.7116791227835682, + "learning_rate": 1.6662280866329266e-05, + "loss": 0.3777, + "step": 16776 + }, + { + "epoch": 0.28989839646115567, + "grad_norm": 0.8178390806860325, + "learning_rate": 1.6661863498354734e-05, + "loss": 0.6118, + "step": 16777 + }, + { + "epoch": 0.2899156759745646, + "grad_norm": 0.7879878834526737, + "learning_rate": 1.6661446109514738e-05, + "loss": 0.4548, + "step": 16778 + }, + { + "epoch": 0.28993295548797343, + "grad_norm": 0.65713887344146, + "learning_rate": 1.666102869981059e-05, + "loss": 0.4551, + "step": 16779 + }, + { + "epoch": 0.28995023500138234, + "grad_norm": 1.0259634553703054, + "learning_rate": 1.666061126924359e-05, + "loss": 0.4943, + "step": 16780 + }, + { + "epoch": 0.28996751451479125, + "grad_norm": 1.2090152614084462, + "learning_rate": 1.6660193817815048e-05, + "loss": 0.6019, + "step": 16781 + }, + { + "epoch": 0.28998479402820015, + "grad_norm": 1.3136632451544412, + "learning_rate": 1.6659776345526278e-05, + "loss": 0.5946, + "step": 16782 + }, + { + "epoch": 0.29000207354160906, + "grad_norm": 0.7490323687924056, + "learning_rate": 1.665935885237858e-05, + "loss": 0.4488, + "step": 16783 + }, + { + "epoch": 0.29001935305501797, + "grad_norm": 1.7316335923505572, + "learning_rate": 1.6658941338373263e-05, + "loss": 0.5975, + "step": 16784 + }, + { + "epoch": 0.2900366325684269, + "grad_norm": 1.180095011261754, + "learning_rate": 1.6658523803511638e-05, + "loss": 0.595, + "step": 16785 + }, + { + "epoch": 0.2900539120818358, + "grad_norm": 1.6635126139143237, + "learning_rate": 1.665810624779501e-05, + "loss": 0.4365, + "step": 16786 + }, + { + "epoch": 0.2900711915952447, + "grad_norm": 1.6910478757890863, + "learning_rate": 1.665768867122468e-05, + "loss": 0.6895, + "step": 16787 + }, + { + "epoch": 0.2900884711086536, + "grad_norm": 0.827747268528612, + "learning_rate": 1.6657271073801972e-05, + "loss": 0.4872, + "step": 16788 + }, + { + "epoch": 0.2901057506220625, + "grad_norm": 1.1081132726143352, + "learning_rate": 1.6656853455528184e-05, + "loss": 0.4892, + "step": 16789 + }, + { + "epoch": 0.29012303013547136, + "grad_norm": 0.8360595577659483, + "learning_rate": 1.6656435816404623e-05, + "loss": 0.5234, + "step": 16790 + }, + { + "epoch": 0.29014030964888027, + "grad_norm": 0.7357010080904548, + "learning_rate": 1.66560181564326e-05, + "loss": 0.4085, + "step": 16791 + }, + { + "epoch": 0.2901575891622892, + "grad_norm": 0.8710763436688094, + "learning_rate": 1.665560047561342e-05, + "loss": 0.5363, + "step": 16792 + }, + { + "epoch": 0.2901748686756981, + "grad_norm": 0.8567011617616619, + "learning_rate": 1.6655182773948397e-05, + "loss": 0.4583, + "step": 16793 + }, + { + "epoch": 0.290192148189107, + "grad_norm": 0.8328862967852517, + "learning_rate": 1.6654765051438833e-05, + "loss": 0.5907, + "step": 16794 + }, + { + "epoch": 0.2902094277025159, + "grad_norm": 1.460682050997852, + "learning_rate": 1.6654347308086038e-05, + "loss": 0.5728, + "step": 16795 + }, + { + "epoch": 0.2902267072159248, + "grad_norm": 0.4248997934760208, + "learning_rate": 1.6653929543891323e-05, + "loss": 0.655, + "step": 16796 + }, + { + "epoch": 0.2902439867293337, + "grad_norm": 0.8275515835314031, + "learning_rate": 1.6653511758855997e-05, + "loss": 0.4845, + "step": 16797 + }, + { + "epoch": 0.2902612662427426, + "grad_norm": 1.3796635242354647, + "learning_rate": 1.665309395298137e-05, + "loss": 0.7986, + "step": 16798 + }, + { + "epoch": 0.29027854575615153, + "grad_norm": 0.8901368627967762, + "learning_rate": 1.6652676126268738e-05, + "loss": 0.3596, + "step": 16799 + }, + { + "epoch": 0.29029582526956044, + "grad_norm": 0.8326652070630876, + "learning_rate": 1.6652258278719425e-05, + "loss": 0.6313, + "step": 16800 + }, + { + "epoch": 0.2903131047829693, + "grad_norm": 0.8571767564896393, + "learning_rate": 1.665184041033473e-05, + "loss": 0.4396, + "step": 16801 + }, + { + "epoch": 0.2903303842963782, + "grad_norm": 1.233311940257715, + "learning_rate": 1.665142252111597e-05, + "loss": 0.4917, + "step": 16802 + }, + { + "epoch": 0.2903476638097871, + "grad_norm": 0.8179399887507984, + "learning_rate": 1.6651004611064447e-05, + "loss": 0.381, + "step": 16803 + }, + { + "epoch": 0.290364943323196, + "grad_norm": 0.8212355923203214, + "learning_rate": 1.6650586680181468e-05, + "loss": 0.5478, + "step": 16804 + }, + { + "epoch": 0.2903822228366049, + "grad_norm": 1.565796593087223, + "learning_rate": 1.665016872846835e-05, + "loss": 0.6636, + "step": 16805 + }, + { + "epoch": 0.2903995023500138, + "grad_norm": 1.2445406827604002, + "learning_rate": 1.6649750755926396e-05, + "loss": 0.6707, + "step": 16806 + }, + { + "epoch": 0.29041678186342273, + "grad_norm": 1.5984545967257007, + "learning_rate": 1.664933276255692e-05, + "loss": 0.5177, + "step": 16807 + }, + { + "epoch": 0.29043406137683164, + "grad_norm": 0.6064358602423047, + "learning_rate": 1.6648914748361226e-05, + "loss": 0.3767, + "step": 16808 + }, + { + "epoch": 0.29045134089024055, + "grad_norm": 1.0058693991146546, + "learning_rate": 1.664849671334063e-05, + "loss": 0.4098, + "step": 16809 + }, + { + "epoch": 0.29046862040364946, + "grad_norm": 0.8141699309380243, + "learning_rate": 1.6648078657496434e-05, + "loss": 0.3551, + "step": 16810 + }, + { + "epoch": 0.2904858999170583, + "grad_norm": 1.0954600980421865, + "learning_rate": 1.6647660580829947e-05, + "loss": 0.4703, + "step": 16811 + }, + { + "epoch": 0.2905031794304672, + "grad_norm": 1.284799092420258, + "learning_rate": 1.6647242483342486e-05, + "loss": 0.8314, + "step": 16812 + }, + { + "epoch": 0.2905204589438761, + "grad_norm": 0.6357622590109636, + "learning_rate": 1.6646824365035357e-05, + "loss": 0.7608, + "step": 16813 + }, + { + "epoch": 0.29053773845728503, + "grad_norm": 1.3969010064600524, + "learning_rate": 1.6646406225909867e-05, + "loss": 0.4946, + "step": 16814 + }, + { + "epoch": 0.29055501797069394, + "grad_norm": 1.0894078170554775, + "learning_rate": 1.664598806596733e-05, + "loss": 0.6721, + "step": 16815 + }, + { + "epoch": 0.29057229748410285, + "grad_norm": 0.9107221192193099, + "learning_rate": 1.664556988520905e-05, + "loss": 0.5372, + "step": 16816 + }, + { + "epoch": 0.29058957699751176, + "grad_norm": 0.8515893595945733, + "learning_rate": 1.664515168363634e-05, + "loss": 0.3149, + "step": 16817 + }, + { + "epoch": 0.29060685651092066, + "grad_norm": 1.212639789793006, + "learning_rate": 1.664473346125051e-05, + "loss": 0.5404, + "step": 16818 + }, + { + "epoch": 0.29062413602432957, + "grad_norm": 0.7651534673600405, + "learning_rate": 1.6644315218052874e-05, + "loss": 0.3582, + "step": 16819 + }, + { + "epoch": 0.2906414155377385, + "grad_norm": 1.1161016407541382, + "learning_rate": 1.6643896954044736e-05, + "loss": 0.4893, + "step": 16820 + }, + { + "epoch": 0.2906586950511474, + "grad_norm": 1.2042801253852118, + "learning_rate": 1.6643478669227407e-05, + "loss": 0.7349, + "step": 16821 + }, + { + "epoch": 0.29067597456455624, + "grad_norm": 1.2188674375505435, + "learning_rate": 1.66430603636022e-05, + "loss": 0.4391, + "step": 16822 + }, + { + "epoch": 0.29069325407796515, + "grad_norm": 0.38561222755444907, + "learning_rate": 1.664264203717042e-05, + "loss": 0.6812, + "step": 16823 + }, + { + "epoch": 0.29071053359137405, + "grad_norm": 0.753806695860578, + "learning_rate": 1.6642223689933378e-05, + "loss": 0.726, + "step": 16824 + }, + { + "epoch": 0.29072781310478296, + "grad_norm": 0.7903246233814729, + "learning_rate": 1.664180532189239e-05, + "loss": 0.6342, + "step": 16825 + }, + { + "epoch": 0.29074509261819187, + "grad_norm": 0.8567341321254113, + "learning_rate": 1.664138693304876e-05, + "loss": 0.5211, + "step": 16826 + }, + { + "epoch": 0.2907623721316008, + "grad_norm": 0.9166942939834052, + "learning_rate": 1.6640968523403808e-05, + "loss": 0.5942, + "step": 16827 + }, + { + "epoch": 0.2907796516450097, + "grad_norm": 0.675629869281226, + "learning_rate": 1.664055009295883e-05, + "loss": 0.3181, + "step": 16828 + }, + { + "epoch": 0.2907969311584186, + "grad_norm": 0.4941741790390984, + "learning_rate": 1.6640131641715154e-05, + "loss": 0.7302, + "step": 16829 + }, + { + "epoch": 0.2908142106718275, + "grad_norm": 0.9645247829182915, + "learning_rate": 1.6639713169674074e-05, + "loss": 0.4439, + "step": 16830 + }, + { + "epoch": 0.2908314901852364, + "grad_norm": 1.2413135930846542, + "learning_rate": 1.663929467683691e-05, + "loss": 0.4719, + "step": 16831 + }, + { + "epoch": 0.29084876969864526, + "grad_norm": 1.6264365688769775, + "learning_rate": 1.663887616320497e-05, + "loss": 0.6643, + "step": 16832 + }, + { + "epoch": 0.29086604921205417, + "grad_norm": 0.41238706322598384, + "learning_rate": 1.6638457628779563e-05, + "loss": 0.5821, + "step": 16833 + }, + { + "epoch": 0.2908833287254631, + "grad_norm": 1.4307293039190068, + "learning_rate": 1.6638039073562005e-05, + "loss": 0.5156, + "step": 16834 + }, + { + "epoch": 0.290900608238872, + "grad_norm": 1.3188936908174553, + "learning_rate": 1.6637620497553604e-05, + "loss": 0.5265, + "step": 16835 + }, + { + "epoch": 0.2909178877522809, + "grad_norm": 1.1141844269945276, + "learning_rate": 1.663720190075567e-05, + "loss": 0.8634, + "step": 16836 + }, + { + "epoch": 0.2909351672656898, + "grad_norm": 0.8973707982474526, + "learning_rate": 1.6636783283169514e-05, + "loss": 0.5539, + "step": 16837 + }, + { + "epoch": 0.2909524467790987, + "grad_norm": 0.5470021144566599, + "learning_rate": 1.6636364644796452e-05, + "loss": 0.6752, + "step": 16838 + }, + { + "epoch": 0.2909697262925076, + "grad_norm": 0.8805269265441749, + "learning_rate": 1.6635945985637788e-05, + "loss": 0.4821, + "step": 16839 + }, + { + "epoch": 0.2909870058059165, + "grad_norm": 0.746049180080876, + "learning_rate": 1.6635527305694842e-05, + "loss": 0.5781, + "step": 16840 + }, + { + "epoch": 0.29100428531932543, + "grad_norm": 0.7641254643236716, + "learning_rate": 1.6635108604968916e-05, + "loss": 0.6025, + "step": 16841 + }, + { + "epoch": 0.29102156483273434, + "grad_norm": 0.782974961354384, + "learning_rate": 1.6634689883461325e-05, + "loss": 0.4001, + "step": 16842 + }, + { + "epoch": 0.2910388443461432, + "grad_norm": 1.179242219553561, + "learning_rate": 1.6634271141173384e-05, + "loss": 0.5918, + "step": 16843 + }, + { + "epoch": 0.2910561238595521, + "grad_norm": 1.043117125064096, + "learning_rate": 1.6633852378106403e-05, + "loss": 0.5429, + "step": 16844 + }, + { + "epoch": 0.291073403372961, + "grad_norm": 1.0841389855287542, + "learning_rate": 1.6633433594261686e-05, + "loss": 0.4607, + "step": 16845 + }, + { + "epoch": 0.2910906828863699, + "grad_norm": 1.0285220720059616, + "learning_rate": 1.6633014789640553e-05, + "loss": 0.3955, + "step": 16846 + }, + { + "epoch": 0.2911079623997788, + "grad_norm": 0.550311197643335, + "learning_rate": 1.6632595964244316e-05, + "loss": 0.2309, + "step": 16847 + }, + { + "epoch": 0.2911252419131877, + "grad_norm": 1.345159807203212, + "learning_rate": 1.6632177118074282e-05, + "loss": 0.4468, + "step": 16848 + }, + { + "epoch": 0.29114252142659663, + "grad_norm": 0.5767077527492919, + "learning_rate": 1.6631758251131768e-05, + "loss": 0.6939, + "step": 16849 + }, + { + "epoch": 0.29115980094000554, + "grad_norm": 0.6812730897963098, + "learning_rate": 1.663133936341808e-05, + "loss": 0.4879, + "step": 16850 + }, + { + "epoch": 0.29117708045341445, + "grad_norm": 0.8809545712867171, + "learning_rate": 1.6630920454934537e-05, + "loss": 0.7539, + "step": 16851 + }, + { + "epoch": 0.29119435996682336, + "grad_norm": 1.3114189360483985, + "learning_rate": 1.6630501525682446e-05, + "loss": 0.6015, + "step": 16852 + }, + { + "epoch": 0.2912116394802322, + "grad_norm": 0.9261377208824085, + "learning_rate": 1.663008257566312e-05, + "loss": 0.4568, + "step": 16853 + }, + { + "epoch": 0.2912289189936411, + "grad_norm": 1.3320886333582438, + "learning_rate": 1.662966360487787e-05, + "loss": 0.6899, + "step": 16854 + }, + { + "epoch": 0.29124619850705, + "grad_norm": 1.2444899146920207, + "learning_rate": 1.662924461332801e-05, + "loss": 0.4004, + "step": 16855 + }, + { + "epoch": 0.29126347802045893, + "grad_norm": 0.9307076760108446, + "learning_rate": 1.662882560101485e-05, + "loss": 0.6414, + "step": 16856 + }, + { + "epoch": 0.29128075753386784, + "grad_norm": 0.44815832153378915, + "learning_rate": 1.662840656793971e-05, + "loss": 0.806, + "step": 16857 + }, + { + "epoch": 0.29129803704727675, + "grad_norm": 1.2140895733819832, + "learning_rate": 1.662798751410389e-05, + "loss": 0.3444, + "step": 16858 + }, + { + "epoch": 0.29131531656068566, + "grad_norm": 1.8466155553777637, + "learning_rate": 1.6627568439508716e-05, + "loss": 0.5716, + "step": 16859 + }, + { + "epoch": 0.29133259607409456, + "grad_norm": 0.8762926686174606, + "learning_rate": 1.6627149344155493e-05, + "loss": 0.4324, + "step": 16860 + }, + { + "epoch": 0.29134987558750347, + "grad_norm": 0.9958717615987335, + "learning_rate": 1.6626730228045532e-05, + "loss": 0.6931, + "step": 16861 + }, + { + "epoch": 0.2913671551009124, + "grad_norm": 1.0770964341688787, + "learning_rate": 1.662631109118015e-05, + "loss": 0.4227, + "step": 16862 + }, + { + "epoch": 0.2913844346143213, + "grad_norm": 1.118309512799184, + "learning_rate": 1.6625891933560658e-05, + "loss": 0.5258, + "step": 16863 + }, + { + "epoch": 0.29140171412773014, + "grad_norm": 0.8154990690567967, + "learning_rate": 1.6625472755188364e-05, + "loss": 0.5582, + "step": 16864 + }, + { + "epoch": 0.29141899364113905, + "grad_norm": 0.5850477408806092, + "learning_rate": 1.6625053556064593e-05, + "loss": 0.8032, + "step": 16865 + }, + { + "epoch": 0.29143627315454795, + "grad_norm": 0.7815206110054644, + "learning_rate": 1.6624634336190646e-05, + "loss": 0.3593, + "step": 16866 + }, + { + "epoch": 0.29145355266795686, + "grad_norm": 1.0106086102920426, + "learning_rate": 1.662421509556784e-05, + "loss": 0.6114, + "step": 16867 + }, + { + "epoch": 0.29147083218136577, + "grad_norm": 0.9513633083191919, + "learning_rate": 1.6623795834197496e-05, + "loss": 0.5485, + "step": 16868 + }, + { + "epoch": 0.2914881116947747, + "grad_norm": 1.122970147780308, + "learning_rate": 1.6623376552080915e-05, + "loss": 0.575, + "step": 16869 + }, + { + "epoch": 0.2915053912081836, + "grad_norm": 1.772267084553012, + "learning_rate": 1.6622957249219416e-05, + "loss": 0.3749, + "step": 16870 + }, + { + "epoch": 0.2915226707215925, + "grad_norm": 0.8805395209555861, + "learning_rate": 1.662253792561431e-05, + "loss": 0.6173, + "step": 16871 + }, + { + "epoch": 0.2915399502350014, + "grad_norm": 0.9367782135385408, + "learning_rate": 1.6622118581266915e-05, + "loss": 0.396, + "step": 16872 + }, + { + "epoch": 0.2915572297484103, + "grad_norm": 2.3054114998338724, + "learning_rate": 1.662169921617854e-05, + "loss": 0.4815, + "step": 16873 + }, + { + "epoch": 0.2915745092618192, + "grad_norm": 2.182613032869182, + "learning_rate": 1.66212798303505e-05, + "loss": 0.459, + "step": 16874 + }, + { + "epoch": 0.29159178877522807, + "grad_norm": 1.3407303516583973, + "learning_rate": 1.6620860423784105e-05, + "loss": 0.3752, + "step": 16875 + }, + { + "epoch": 0.291609068288637, + "grad_norm": 1.2737504970555937, + "learning_rate": 1.6620440996480676e-05, + "loss": 0.5495, + "step": 16876 + }, + { + "epoch": 0.2916263478020459, + "grad_norm": 1.4981271713645352, + "learning_rate": 1.6620021548441522e-05, + "loss": 0.475, + "step": 16877 + }, + { + "epoch": 0.2916436273154548, + "grad_norm": 0.8356547172747135, + "learning_rate": 1.6619602079667956e-05, + "loss": 0.2954, + "step": 16878 + }, + { + "epoch": 0.2916609068288637, + "grad_norm": 0.7837614668602791, + "learning_rate": 1.6619182590161294e-05, + "loss": 0.4494, + "step": 16879 + }, + { + "epoch": 0.2916781863422726, + "grad_norm": 0.9297760911911327, + "learning_rate": 1.661876307992285e-05, + "loss": 0.5337, + "step": 16880 + }, + { + "epoch": 0.2916954658556815, + "grad_norm": 1.1582486867514257, + "learning_rate": 1.6618343548953936e-05, + "loss": 0.4123, + "step": 16881 + }, + { + "epoch": 0.2917127453690904, + "grad_norm": 0.8052729483870433, + "learning_rate": 1.661792399725587e-05, + "loss": 0.5207, + "step": 16882 + }, + { + "epoch": 0.29173002488249933, + "grad_norm": 0.7522339737035767, + "learning_rate": 1.6617504424829956e-05, + "loss": 0.3785, + "step": 16883 + }, + { + "epoch": 0.29174730439590824, + "grad_norm": 1.2524659212022635, + "learning_rate": 1.661708483167752e-05, + "loss": 0.4672, + "step": 16884 + }, + { + "epoch": 0.2917645839093171, + "grad_norm": 0.6446348975685249, + "learning_rate": 1.6616665217799873e-05, + "loss": 0.5407, + "step": 16885 + }, + { + "epoch": 0.291781863422726, + "grad_norm": 0.7941214380761918, + "learning_rate": 1.6616245583198324e-05, + "loss": 0.62, + "step": 16886 + }, + { + "epoch": 0.2917991429361349, + "grad_norm": 0.9132575859539225, + "learning_rate": 1.6615825927874195e-05, + "loss": 0.5541, + "step": 16887 + }, + { + "epoch": 0.2918164224495438, + "grad_norm": 1.4233364223041873, + "learning_rate": 1.6615406251828794e-05, + "loss": 0.4247, + "step": 16888 + }, + { + "epoch": 0.2918337019629527, + "grad_norm": 1.1802905676943927, + "learning_rate": 1.6614986555063436e-05, + "loss": 0.6055, + "step": 16889 + }, + { + "epoch": 0.2918509814763616, + "grad_norm": 1.42353820168992, + "learning_rate": 1.661456683757944e-05, + "loss": 0.5756, + "step": 16890 + }, + { + "epoch": 0.29186826098977053, + "grad_norm": 0.8644370711424137, + "learning_rate": 1.661414709937812e-05, + "loss": 0.4556, + "step": 16891 + }, + { + "epoch": 0.29188554050317944, + "grad_norm": 0.9838508909788218, + "learning_rate": 1.6613727340460784e-05, + "loss": 0.4708, + "step": 16892 + }, + { + "epoch": 0.29190282001658835, + "grad_norm": 0.8891435887598826, + "learning_rate": 1.6613307560828756e-05, + "loss": 0.4057, + "step": 16893 + }, + { + "epoch": 0.29192009952999726, + "grad_norm": 0.6370703854690897, + "learning_rate": 1.661288776048334e-05, + "loss": 0.2617, + "step": 16894 + }, + { + "epoch": 0.29193737904340616, + "grad_norm": 1.3486583530013114, + "learning_rate": 1.661246793942586e-05, + "loss": 0.433, + "step": 16895 + }, + { + "epoch": 0.291954658556815, + "grad_norm": 1.0805452008180452, + "learning_rate": 1.661204809765763e-05, + "loss": 0.5371, + "step": 16896 + }, + { + "epoch": 0.2919719380702239, + "grad_norm": 0.8472955536822352, + "learning_rate": 1.661162823517996e-05, + "loss": 0.5527, + "step": 16897 + }, + { + "epoch": 0.29198921758363283, + "grad_norm": 0.8935259014716028, + "learning_rate": 1.661120835199417e-05, + "loss": 0.4887, + "step": 16898 + }, + { + "epoch": 0.29200649709704174, + "grad_norm": 1.3669214270473073, + "learning_rate": 1.6610788448101575e-05, + "loss": 0.5255, + "step": 16899 + }, + { + "epoch": 0.29202377661045065, + "grad_norm": 1.3329563216777884, + "learning_rate": 1.661036852350348e-05, + "loss": 0.4837, + "step": 16900 + }, + { + "epoch": 0.29204105612385955, + "grad_norm": 0.9960260991088159, + "learning_rate": 1.6609948578201217e-05, + "loss": 0.7103, + "step": 16901 + }, + { + "epoch": 0.29205833563726846, + "grad_norm": 0.5932109063031201, + "learning_rate": 1.6609528612196092e-05, + "loss": 0.7199, + "step": 16902 + }, + { + "epoch": 0.29207561515067737, + "grad_norm": 1.0324348203683305, + "learning_rate": 1.6609108625489415e-05, + "loss": 0.4911, + "step": 16903 + }, + { + "epoch": 0.2920928946640863, + "grad_norm": 1.1206050596297632, + "learning_rate": 1.6608688618082513e-05, + "loss": 0.4024, + "step": 16904 + }, + { + "epoch": 0.2921101741774952, + "grad_norm": 0.5314355787733981, + "learning_rate": 1.6608268589976694e-05, + "loss": 0.8071, + "step": 16905 + }, + { + "epoch": 0.29212745369090404, + "grad_norm": 1.009827558882171, + "learning_rate": 1.6607848541173272e-05, + "loss": 0.5987, + "step": 16906 + }, + { + "epoch": 0.29214473320431295, + "grad_norm": 0.9056120294472724, + "learning_rate": 1.6607428471673573e-05, + "loss": 0.5543, + "step": 16907 + }, + { + "epoch": 0.29216201271772185, + "grad_norm": 0.7212370036765496, + "learning_rate": 1.6607008381478903e-05, + "loss": 0.5819, + "step": 16908 + }, + { + "epoch": 0.29217929223113076, + "grad_norm": 1.2686911652783222, + "learning_rate": 1.660658827059058e-05, + "loss": 0.4997, + "step": 16909 + }, + { + "epoch": 0.29219657174453967, + "grad_norm": 0.8348943979628249, + "learning_rate": 1.6606168139009917e-05, + "loss": 0.6176, + "step": 16910 + }, + { + "epoch": 0.2922138512579486, + "grad_norm": 1.3351377800288673, + "learning_rate": 1.660574798673824e-05, + "loss": 0.5542, + "step": 16911 + }, + { + "epoch": 0.2922311307713575, + "grad_norm": 0.9409958609591, + "learning_rate": 1.6605327813776857e-05, + "loss": 0.6387, + "step": 16912 + }, + { + "epoch": 0.2922484102847664, + "grad_norm": 1.179409933922404, + "learning_rate": 1.660490762012708e-05, + "loss": 0.4161, + "step": 16913 + }, + { + "epoch": 0.2922656897981753, + "grad_norm": 0.7236285296331971, + "learning_rate": 1.6604487405790237e-05, + "loss": 0.5098, + "step": 16914 + }, + { + "epoch": 0.2922829693115842, + "grad_norm": 1.5219888745014056, + "learning_rate": 1.6604067170767635e-05, + "loss": 0.3709, + "step": 16915 + }, + { + "epoch": 0.2923002488249931, + "grad_norm": 0.5557420899613532, + "learning_rate": 1.6603646915060593e-05, + "loss": 0.321, + "step": 16916 + }, + { + "epoch": 0.29231752833840197, + "grad_norm": 1.4691305329895066, + "learning_rate": 1.6603226638670423e-05, + "loss": 0.5277, + "step": 16917 + }, + { + "epoch": 0.2923348078518109, + "grad_norm": 0.9053736076502779, + "learning_rate": 1.660280634159845e-05, + "loss": 0.565, + "step": 16918 + }, + { + "epoch": 0.2923520873652198, + "grad_norm": 0.81167670063717, + "learning_rate": 1.6602386023845984e-05, + "loss": 0.5671, + "step": 16919 + }, + { + "epoch": 0.2923693668786287, + "grad_norm": 0.5787357015762109, + "learning_rate": 1.6601965685414343e-05, + "loss": 0.3965, + "step": 16920 + }, + { + "epoch": 0.2923866463920376, + "grad_norm": 0.4792956754469158, + "learning_rate": 1.6601545326304843e-05, + "loss": 0.8187, + "step": 16921 + }, + { + "epoch": 0.2924039259054465, + "grad_norm": 1.2029672160114695, + "learning_rate": 1.660112494651881e-05, + "loss": 0.4794, + "step": 16922 + }, + { + "epoch": 0.2924212054188554, + "grad_norm": 1.085206660999293, + "learning_rate": 1.660070454605754e-05, + "loss": 0.687, + "step": 16923 + }, + { + "epoch": 0.2924384849322643, + "grad_norm": 0.8483976374278941, + "learning_rate": 1.6600284124922364e-05, + "loss": 0.5024, + "step": 16924 + }, + { + "epoch": 0.2924557644456732, + "grad_norm": 0.7084718344963628, + "learning_rate": 1.65998636831146e-05, + "loss": 0.4692, + "step": 16925 + }, + { + "epoch": 0.29247304395908214, + "grad_norm": 0.42280798840738937, + "learning_rate": 1.6599443220635562e-05, + "loss": 0.6141, + "step": 16926 + }, + { + "epoch": 0.292490323472491, + "grad_norm": 1.1302602855535249, + "learning_rate": 1.6599022737486564e-05, + "loss": 0.4596, + "step": 16927 + }, + { + "epoch": 0.2925076029858999, + "grad_norm": 0.606488256181879, + "learning_rate": 1.6598602233668925e-05, + "loss": 0.4357, + "step": 16928 + }, + { + "epoch": 0.2925248824993088, + "grad_norm": 1.1356777112076546, + "learning_rate": 1.6598181709183964e-05, + "loss": 0.4975, + "step": 16929 + }, + { + "epoch": 0.2925421620127177, + "grad_norm": 1.009286747966413, + "learning_rate": 1.6597761164032994e-05, + "loss": 0.6113, + "step": 16930 + }, + { + "epoch": 0.2925594415261266, + "grad_norm": 1.019956772656267, + "learning_rate": 1.6597340598217338e-05, + "loss": 0.2604, + "step": 16931 + }, + { + "epoch": 0.2925767210395355, + "grad_norm": 1.103185932461887, + "learning_rate": 1.659692001173831e-05, + "loss": 0.4327, + "step": 16932 + }, + { + "epoch": 0.29259400055294443, + "grad_norm": 1.2673548619116208, + "learning_rate": 1.6596499404597224e-05, + "loss": 0.4793, + "step": 16933 + }, + { + "epoch": 0.29261128006635334, + "grad_norm": 0.7600295637386737, + "learning_rate": 1.65960787767954e-05, + "loss": 0.739, + "step": 16934 + }, + { + "epoch": 0.29262855957976225, + "grad_norm": 0.8700365029426222, + "learning_rate": 1.659565812833416e-05, + "loss": 0.526, + "step": 16935 + }, + { + "epoch": 0.29264583909317116, + "grad_norm": 1.2383724158068827, + "learning_rate": 1.6595237459214814e-05, + "loss": 0.469, + "step": 16936 + }, + { + "epoch": 0.29266311860658006, + "grad_norm": 1.477338644021086, + "learning_rate": 1.6594816769438684e-05, + "loss": 0.7043, + "step": 16937 + }, + { + "epoch": 0.2926803981199889, + "grad_norm": 0.8166647229843289, + "learning_rate": 1.6594396059007087e-05, + "loss": 0.5871, + "step": 16938 + }, + { + "epoch": 0.2926976776333978, + "grad_norm": 1.1465482478813622, + "learning_rate": 1.659397532792134e-05, + "loss": 0.6478, + "step": 16939 + }, + { + "epoch": 0.29271495714680673, + "grad_norm": 0.4286775655762313, + "learning_rate": 1.6593554576182762e-05, + "loss": 0.7819, + "step": 16940 + }, + { + "epoch": 0.29273223666021564, + "grad_norm": 0.644778892243342, + "learning_rate": 1.659313380379267e-05, + "loss": 0.4095, + "step": 16941 + }, + { + "epoch": 0.29274951617362455, + "grad_norm": 0.4770516420488351, + "learning_rate": 1.6592713010752382e-05, + "loss": 0.7486, + "step": 16942 + }, + { + "epoch": 0.29276679568703345, + "grad_norm": 1.263135216340717, + "learning_rate": 1.6592292197063217e-05, + "loss": 0.626, + "step": 16943 + }, + { + "epoch": 0.29278407520044236, + "grad_norm": 0.7878838196831514, + "learning_rate": 1.6591871362726488e-05, + "loss": 0.5773, + "step": 16944 + }, + { + "epoch": 0.29280135471385127, + "grad_norm": 1.1676516231092668, + "learning_rate": 1.659145050774352e-05, + "loss": 0.6411, + "step": 16945 + }, + { + "epoch": 0.2928186342272602, + "grad_norm": 1.5442914203536933, + "learning_rate": 1.6591029632115627e-05, + "loss": 0.4323, + "step": 16946 + }, + { + "epoch": 0.2928359137406691, + "grad_norm": 0.8085524430727338, + "learning_rate": 1.6590608735844127e-05, + "loss": 0.3985, + "step": 16947 + }, + { + "epoch": 0.292853193254078, + "grad_norm": 0.9021361121325916, + "learning_rate": 1.6590187818930343e-05, + "loss": 0.3619, + "step": 16948 + }, + { + "epoch": 0.29287047276748684, + "grad_norm": 1.2382288799643275, + "learning_rate": 1.6589766881375585e-05, + "loss": 0.8328, + "step": 16949 + }, + { + "epoch": 0.29288775228089575, + "grad_norm": 0.6405175885341952, + "learning_rate": 1.6589345923181182e-05, + "loss": 0.3126, + "step": 16950 + }, + { + "epoch": 0.29290503179430466, + "grad_norm": 1.2468543714737617, + "learning_rate": 1.6588924944348443e-05, + "loss": 0.5584, + "step": 16951 + }, + { + "epoch": 0.29292231130771357, + "grad_norm": 0.7687931088254777, + "learning_rate": 1.658850394487869e-05, + "loss": 0.398, + "step": 16952 + }, + { + "epoch": 0.2929395908211225, + "grad_norm": 0.840613647011466, + "learning_rate": 1.6588082924773247e-05, + "loss": 0.5076, + "step": 16953 + }, + { + "epoch": 0.2929568703345314, + "grad_norm": 1.1712502447236492, + "learning_rate": 1.6587661884033424e-05, + "loss": 0.3022, + "step": 16954 + }, + { + "epoch": 0.2929741498479403, + "grad_norm": 0.7420833100405281, + "learning_rate": 1.6587240822660542e-05, + "loss": 0.6696, + "step": 16955 + }, + { + "epoch": 0.2929914293613492, + "grad_norm": 1.9881129623391456, + "learning_rate": 1.6586819740655923e-05, + "loss": 0.7477, + "step": 16956 + }, + { + "epoch": 0.2930087088747581, + "grad_norm": 0.8061699778202939, + "learning_rate": 1.658639863802088e-05, + "loss": 0.5101, + "step": 16957 + }, + { + "epoch": 0.293025988388167, + "grad_norm": 1.234035738248148, + "learning_rate": 1.6585977514756742e-05, + "loss": 0.5316, + "step": 16958 + }, + { + "epoch": 0.29304326790157587, + "grad_norm": 0.6758569439510835, + "learning_rate": 1.6585556370864816e-05, + "loss": 0.6072, + "step": 16959 + }, + { + "epoch": 0.2930605474149848, + "grad_norm": 1.0628552570677265, + "learning_rate": 1.6585135206346432e-05, + "loss": 0.5383, + "step": 16960 + }, + { + "epoch": 0.2930778269283937, + "grad_norm": 1.235126677747917, + "learning_rate": 1.65847140212029e-05, + "loss": 0.6074, + "step": 16961 + }, + { + "epoch": 0.2930951064418026, + "grad_norm": 0.6092893355181587, + "learning_rate": 1.6584292815435547e-05, + "loss": 0.3945, + "step": 16962 + }, + { + "epoch": 0.2931123859552115, + "grad_norm": 1.1358705120689947, + "learning_rate": 1.658387158904569e-05, + "loss": 0.485, + "step": 16963 + }, + { + "epoch": 0.2931296654686204, + "grad_norm": 0.9263534515919816, + "learning_rate": 1.6583450342034643e-05, + "loss": 0.3482, + "step": 16964 + }, + { + "epoch": 0.2931469449820293, + "grad_norm": 1.3664808164718738, + "learning_rate": 1.6583029074403733e-05, + "loss": 0.5111, + "step": 16965 + }, + { + "epoch": 0.2931642244954382, + "grad_norm": 0.8755307875866202, + "learning_rate": 1.658260778615427e-05, + "loss": 0.451, + "step": 16966 + }, + { + "epoch": 0.2931815040088471, + "grad_norm": 1.4448624931171845, + "learning_rate": 1.6582186477287582e-05, + "loss": 0.6153, + "step": 16967 + }, + { + "epoch": 0.29319878352225603, + "grad_norm": 0.724125228444274, + "learning_rate": 1.6581765147804985e-05, + "loss": 0.6862, + "step": 16968 + }, + { + "epoch": 0.29321606303566494, + "grad_norm": 0.9499351671789557, + "learning_rate": 1.6581343797707797e-05, + "loss": 0.3706, + "step": 16969 + }, + { + "epoch": 0.2932333425490738, + "grad_norm": 0.8994158559243585, + "learning_rate": 1.6580922426997345e-05, + "loss": 0.4295, + "step": 16970 + }, + { + "epoch": 0.2932506220624827, + "grad_norm": 1.040242775904712, + "learning_rate": 1.658050103567494e-05, + "loss": 0.7419, + "step": 16971 + }, + { + "epoch": 0.2932679015758916, + "grad_norm": 0.9213736813742696, + "learning_rate": 1.658007962374191e-05, + "loss": 0.5087, + "step": 16972 + }, + { + "epoch": 0.2932851810893005, + "grad_norm": 0.9347440801179755, + "learning_rate": 1.6579658191199566e-05, + "loss": 0.58, + "step": 16973 + }, + { + "epoch": 0.2933024606027094, + "grad_norm": 0.7206161771870254, + "learning_rate": 1.6579236738049237e-05, + "loss": 0.4937, + "step": 16974 + }, + { + "epoch": 0.29331974011611833, + "grad_norm": 0.6853174101902135, + "learning_rate": 1.6578815264292236e-05, + "loss": 0.3307, + "step": 16975 + }, + { + "epoch": 0.29333701962952724, + "grad_norm": 1.2904798959272152, + "learning_rate": 1.6578393769929887e-05, + "loss": 0.7088, + "step": 16976 + }, + { + "epoch": 0.29335429914293615, + "grad_norm": 0.7681034412694767, + "learning_rate": 1.6577972254963507e-05, + "loss": 0.6126, + "step": 16977 + }, + { + "epoch": 0.29337157865634506, + "grad_norm": 1.1857481591450103, + "learning_rate": 1.657755071939442e-05, + "loss": 0.5403, + "step": 16978 + }, + { + "epoch": 0.29338885816975396, + "grad_norm": 0.8507327949042585, + "learning_rate": 1.657712916322394e-05, + "loss": 0.6276, + "step": 16979 + }, + { + "epoch": 0.2934061376831628, + "grad_norm": 1.2623893606584835, + "learning_rate": 1.6576707586453394e-05, + "loss": 0.4594, + "step": 16980 + }, + { + "epoch": 0.2934234171965717, + "grad_norm": 1.20574522330501, + "learning_rate": 1.65762859890841e-05, + "loss": 0.4686, + "step": 16981 + }, + { + "epoch": 0.29344069670998063, + "grad_norm": 1.0097560725237043, + "learning_rate": 1.657586437111738e-05, + "loss": 0.5681, + "step": 16982 + }, + { + "epoch": 0.29345797622338954, + "grad_norm": 0.7481567982512641, + "learning_rate": 1.657544273255455e-05, + "loss": 0.4114, + "step": 16983 + }, + { + "epoch": 0.29347525573679845, + "grad_norm": 1.221351326710251, + "learning_rate": 1.6575021073396937e-05, + "loss": 0.4844, + "step": 16984 + }, + { + "epoch": 0.29349253525020735, + "grad_norm": 1.1452599177973213, + "learning_rate": 1.6574599393645856e-05, + "loss": 0.5367, + "step": 16985 + }, + { + "epoch": 0.29350981476361626, + "grad_norm": 1.0086712370407718, + "learning_rate": 1.6574177693302632e-05, + "loss": 0.511, + "step": 16986 + }, + { + "epoch": 0.29352709427702517, + "grad_norm": 1.1460845159868969, + "learning_rate": 1.6573755972368583e-05, + "loss": 0.4735, + "step": 16987 + }, + { + "epoch": 0.2935443737904341, + "grad_norm": 0.8978865562227478, + "learning_rate": 1.6573334230845032e-05, + "loss": 0.4268, + "step": 16988 + }, + { + "epoch": 0.293561653303843, + "grad_norm": 0.9998108156814771, + "learning_rate": 1.6572912468733296e-05, + "loss": 0.3871, + "step": 16989 + }, + { + "epoch": 0.2935789328172519, + "grad_norm": 1.267726271388006, + "learning_rate": 1.65724906860347e-05, + "loss": 0.4931, + "step": 16990 + }, + { + "epoch": 0.29359621233066074, + "grad_norm": 1.218778164044485, + "learning_rate": 1.6572068882750564e-05, + "loss": 0.3807, + "step": 16991 + }, + { + "epoch": 0.29361349184406965, + "grad_norm": 1.3113906380117872, + "learning_rate": 1.6571647058882208e-05, + "loss": 0.4571, + "step": 16992 + }, + { + "epoch": 0.29363077135747856, + "grad_norm": 0.6482807378324783, + "learning_rate": 1.6571225214430954e-05, + "loss": 0.3392, + "step": 16993 + }, + { + "epoch": 0.29364805087088747, + "grad_norm": 1.3680999173353523, + "learning_rate": 1.657080334939812e-05, + "loss": 0.486, + "step": 16994 + }, + { + "epoch": 0.2936653303842964, + "grad_norm": 1.3702355785923837, + "learning_rate": 1.6570381463785038e-05, + "loss": 0.5424, + "step": 16995 + }, + { + "epoch": 0.2936826098977053, + "grad_norm": 0.7206301816879553, + "learning_rate": 1.6569959557593017e-05, + "loss": 0.4663, + "step": 16996 + }, + { + "epoch": 0.2936998894111142, + "grad_norm": 0.8739660804638416, + "learning_rate": 1.6569537630823385e-05, + "loss": 0.5311, + "step": 16997 + }, + { + "epoch": 0.2937171689245231, + "grad_norm": 0.7189271301873905, + "learning_rate": 1.656911568347746e-05, + "loss": 0.5163, + "step": 16998 + }, + { + "epoch": 0.293734448437932, + "grad_norm": 0.39108172512613154, + "learning_rate": 1.6568693715556565e-05, + "loss": 0.6454, + "step": 16999 + }, + { + "epoch": 0.2937517279513409, + "grad_norm": 0.3591528726840231, + "learning_rate": 1.6568271727062026e-05, + "loss": 0.6513, + "step": 17000 + }, + { + "epoch": 0.29376900746474977, + "grad_norm": 0.9201310710817037, + "learning_rate": 1.6567849717995157e-05, + "loss": 0.6196, + "step": 17001 + }, + { + "epoch": 0.2937862869781587, + "grad_norm": 1.076071532941433, + "learning_rate": 1.6567427688357283e-05, + "loss": 0.7454, + "step": 17002 + }, + { + "epoch": 0.2938035664915676, + "grad_norm": 0.9249438040667236, + "learning_rate": 1.656700563814973e-05, + "loss": 0.4324, + "step": 17003 + }, + { + "epoch": 0.2938208460049765, + "grad_norm": 1.184924860285277, + "learning_rate": 1.6566583567373817e-05, + "loss": 0.4388, + "step": 17004 + }, + { + "epoch": 0.2938381255183854, + "grad_norm": 1.0443272867542577, + "learning_rate": 1.6566161476030864e-05, + "loss": 0.3012, + "step": 17005 + }, + { + "epoch": 0.2938554050317943, + "grad_norm": 1.3098675298585416, + "learning_rate": 1.656573936412219e-05, + "loss": 0.4421, + "step": 17006 + }, + { + "epoch": 0.2938726845452032, + "grad_norm": 1.387497413867958, + "learning_rate": 1.6565317231649124e-05, + "loss": 0.4181, + "step": 17007 + }, + { + "epoch": 0.2938899640586121, + "grad_norm": 1.0238093214544517, + "learning_rate": 1.6564895078612986e-05, + "loss": 0.5759, + "step": 17008 + }, + { + "epoch": 0.293907243572021, + "grad_norm": 0.8932546446448325, + "learning_rate": 1.6564472905015097e-05, + "loss": 0.4655, + "step": 17009 + }, + { + "epoch": 0.29392452308542993, + "grad_norm": 1.1153643353366018, + "learning_rate": 1.656405071085678e-05, + "loss": 0.6173, + "step": 17010 + }, + { + "epoch": 0.29394180259883884, + "grad_norm": 1.2087163699911982, + "learning_rate": 1.656362849613936e-05, + "loss": 0.5284, + "step": 17011 + }, + { + "epoch": 0.2939590821122477, + "grad_norm": 0.8415272751716091, + "learning_rate": 1.6563206260864153e-05, + "loss": 0.574, + "step": 17012 + }, + { + "epoch": 0.2939763616256566, + "grad_norm": 1.073516216421483, + "learning_rate": 1.656278400503249e-05, + "loss": 0.4898, + "step": 17013 + }, + { + "epoch": 0.2939936411390655, + "grad_norm": 1.5691347919633412, + "learning_rate": 1.6562361728645682e-05, + "loss": 0.4649, + "step": 17014 + }, + { + "epoch": 0.2940109206524744, + "grad_norm": 1.0122458892216664, + "learning_rate": 1.6561939431705064e-05, + "loss": 0.4734, + "step": 17015 + }, + { + "epoch": 0.2940282001658833, + "grad_norm": 1.2780306840383584, + "learning_rate": 1.656151711421195e-05, + "loss": 0.5583, + "step": 17016 + }, + { + "epoch": 0.29404547967929223, + "grad_norm": 1.7250667015949754, + "learning_rate": 1.6561094776167667e-05, + "loss": 0.7265, + "step": 17017 + }, + { + "epoch": 0.29406275919270114, + "grad_norm": 0.5411183511057708, + "learning_rate": 1.6560672417573537e-05, + "loss": 0.906, + "step": 17018 + }, + { + "epoch": 0.29408003870611005, + "grad_norm": 1.511527148053247, + "learning_rate": 1.6560250038430882e-05, + "loss": 0.4323, + "step": 17019 + }, + { + "epoch": 0.29409731821951896, + "grad_norm": 0.7793588631258966, + "learning_rate": 1.6559827638741024e-05, + "loss": 0.4177, + "step": 17020 + }, + { + "epoch": 0.29411459773292786, + "grad_norm": 0.4682764863662154, + "learning_rate": 1.6559405218505288e-05, + "loss": 0.4288, + "step": 17021 + }, + { + "epoch": 0.29413187724633677, + "grad_norm": 1.5724550055319904, + "learning_rate": 1.6558982777724997e-05, + "loss": 0.4024, + "step": 17022 + }, + { + "epoch": 0.2941491567597456, + "grad_norm": 0.4700821782282531, + "learning_rate": 1.655856031640147e-05, + "loss": 0.5709, + "step": 17023 + }, + { + "epoch": 0.29416643627315453, + "grad_norm": 0.5635094168680785, + "learning_rate": 1.655813783453604e-05, + "loss": 0.4163, + "step": 17024 + }, + { + "epoch": 0.29418371578656344, + "grad_norm": 1.2284766549744452, + "learning_rate": 1.655771533213002e-05, + "loss": 0.4028, + "step": 17025 + }, + { + "epoch": 0.29420099529997235, + "grad_norm": 0.550509153121899, + "learning_rate": 1.655729280918474e-05, + "loss": 0.6997, + "step": 17026 + }, + { + "epoch": 0.29421827481338125, + "grad_norm": 1.1865123701971665, + "learning_rate": 1.655687026570152e-05, + "loss": 0.6953, + "step": 17027 + }, + { + "epoch": 0.29423555432679016, + "grad_norm": 0.94470110333593, + "learning_rate": 1.655644770168168e-05, + "loss": 0.5143, + "step": 17028 + }, + { + "epoch": 0.29425283384019907, + "grad_norm": 0.9610641183812243, + "learning_rate": 1.655602511712655e-05, + "loss": 0.5003, + "step": 17029 + }, + { + "epoch": 0.294270113353608, + "grad_norm": 0.979390136725412, + "learning_rate": 1.655560251203745e-05, + "loss": 0.4139, + "step": 17030 + }, + { + "epoch": 0.2942873928670169, + "grad_norm": 0.9344715148399395, + "learning_rate": 1.6555179886415706e-05, + "loss": 0.6142, + "step": 17031 + }, + { + "epoch": 0.2943046723804258, + "grad_norm": 0.7709760481895221, + "learning_rate": 1.655475724026264e-05, + "loss": 0.4657, + "step": 17032 + }, + { + "epoch": 0.29432195189383464, + "grad_norm": 1.4001969802398997, + "learning_rate": 1.6554334573579576e-05, + "loss": 0.555, + "step": 17033 + }, + { + "epoch": 0.29433923140724355, + "grad_norm": 0.6713576810467214, + "learning_rate": 1.655391188636784e-05, + "loss": 0.2997, + "step": 17034 + }, + { + "epoch": 0.29435651092065246, + "grad_norm": 1.1840602918536451, + "learning_rate": 1.6553489178628752e-05, + "loss": 0.4727, + "step": 17035 + }, + { + "epoch": 0.29437379043406137, + "grad_norm": 1.3108002913493264, + "learning_rate": 1.655306645036364e-05, + "loss": 0.4379, + "step": 17036 + }, + { + "epoch": 0.2943910699474703, + "grad_norm": 0.933311983949498, + "learning_rate": 1.655264370157382e-05, + "loss": 0.5663, + "step": 17037 + }, + { + "epoch": 0.2944083494608792, + "grad_norm": 0.8400449238514429, + "learning_rate": 1.655222093226063e-05, + "loss": 0.6216, + "step": 17038 + }, + { + "epoch": 0.2944256289742881, + "grad_norm": 0.6524241679758728, + "learning_rate": 1.655179814242538e-05, + "loss": 0.574, + "step": 17039 + }, + { + "epoch": 0.294442908487697, + "grad_norm": 1.1606525173810738, + "learning_rate": 1.6551375332069405e-05, + "loss": 0.4403, + "step": 17040 + }, + { + "epoch": 0.2944601880011059, + "grad_norm": 1.3494214006021816, + "learning_rate": 1.655095250119402e-05, + "loss": 0.5006, + "step": 17041 + }, + { + "epoch": 0.2944774675145148, + "grad_norm": 0.6911722665625222, + "learning_rate": 1.6550529649800556e-05, + "loss": 0.2226, + "step": 17042 + }, + { + "epoch": 0.2944947470279237, + "grad_norm": 1.1068855094577934, + "learning_rate": 1.6550106777890335e-05, + "loss": 0.5891, + "step": 17043 + }, + { + "epoch": 0.29451202654133257, + "grad_norm": 0.9329043769245216, + "learning_rate": 1.6549683885464684e-05, + "loss": 0.6022, + "step": 17044 + }, + { + "epoch": 0.2945293060547415, + "grad_norm": 1.4980780135131202, + "learning_rate": 1.6549260972524922e-05, + "loss": 0.4441, + "step": 17045 + }, + { + "epoch": 0.2945465855681504, + "grad_norm": 1.5674182317214536, + "learning_rate": 1.6548838039072377e-05, + "loss": 0.5557, + "step": 17046 + }, + { + "epoch": 0.2945638650815593, + "grad_norm": 1.09162484395382, + "learning_rate": 1.6548415085108373e-05, + "loss": 0.443, + "step": 17047 + }, + { + "epoch": 0.2945811445949682, + "grad_norm": 1.0667048973068134, + "learning_rate": 1.654799211063424e-05, + "loss": 0.4888, + "step": 17048 + }, + { + "epoch": 0.2945984241083771, + "grad_norm": 1.060139523018831, + "learning_rate": 1.6547569115651293e-05, + "loss": 0.5513, + "step": 17049 + }, + { + "epoch": 0.294615703621786, + "grad_norm": 1.2328826530487207, + "learning_rate": 1.6547146100160865e-05, + "loss": 0.6501, + "step": 17050 + }, + { + "epoch": 0.2946329831351949, + "grad_norm": 1.4853473380510547, + "learning_rate": 1.6546723064164275e-05, + "loss": 0.5301, + "step": 17051 + }, + { + "epoch": 0.29465026264860383, + "grad_norm": 1.0844153352080494, + "learning_rate": 1.6546300007662854e-05, + "loss": 0.589, + "step": 17052 + }, + { + "epoch": 0.29466754216201274, + "grad_norm": 1.1746213876107803, + "learning_rate": 1.6545876930657923e-05, + "loss": 0.3965, + "step": 17053 + }, + { + "epoch": 0.2946848216754216, + "grad_norm": 0.6629230828345034, + "learning_rate": 1.654545383315081e-05, + "loss": 0.4449, + "step": 17054 + }, + { + "epoch": 0.2947021011888305, + "grad_norm": 0.5553591431687288, + "learning_rate": 1.654503071514283e-05, + "loss": 0.9163, + "step": 17055 + }, + { + "epoch": 0.2947193807022394, + "grad_norm": 1.7786492181931113, + "learning_rate": 1.6544607576635328e-05, + "loss": 0.785, + "step": 17056 + }, + { + "epoch": 0.2947366602156483, + "grad_norm": 0.9181349174802125, + "learning_rate": 1.654418441762961e-05, + "loss": 0.5513, + "step": 17057 + }, + { + "epoch": 0.2947539397290572, + "grad_norm": 1.1661550094342028, + "learning_rate": 1.6543761238127012e-05, + "loss": 0.4516, + "step": 17058 + }, + { + "epoch": 0.29477121924246613, + "grad_norm": 1.0533552666167951, + "learning_rate": 1.6543338038128855e-05, + "loss": 0.4605, + "step": 17059 + }, + { + "epoch": 0.29478849875587504, + "grad_norm": 1.2560696928235007, + "learning_rate": 1.6542914817636468e-05, + "loss": 0.5875, + "step": 17060 + }, + { + "epoch": 0.29480577826928395, + "grad_norm": 0.5472099052346525, + "learning_rate": 1.6542491576651172e-05, + "loss": 0.7037, + "step": 17061 + }, + { + "epoch": 0.29482305778269285, + "grad_norm": 0.7384154040971039, + "learning_rate": 1.6542068315174296e-05, + "loss": 0.3491, + "step": 17062 + }, + { + "epoch": 0.29484033729610176, + "grad_norm": 1.4049012552965543, + "learning_rate": 1.6541645033207162e-05, + "loss": 0.5947, + "step": 17063 + }, + { + "epoch": 0.29485761680951067, + "grad_norm": 0.7405972976857166, + "learning_rate": 1.6541221730751103e-05, + "loss": 0.4683, + "step": 17064 + }, + { + "epoch": 0.2948748963229195, + "grad_norm": 2.148442586997462, + "learning_rate": 1.654079840780744e-05, + "loss": 0.6367, + "step": 17065 + }, + { + "epoch": 0.29489217583632843, + "grad_norm": 0.715594977084567, + "learning_rate": 1.65403750643775e-05, + "loss": 0.4619, + "step": 17066 + }, + { + "epoch": 0.29490945534973734, + "grad_norm": 0.7855645986241895, + "learning_rate": 1.6539951700462603e-05, + "loss": 0.4535, + "step": 17067 + }, + { + "epoch": 0.29492673486314624, + "grad_norm": 0.7766766365159876, + "learning_rate": 1.6539528316064086e-05, + "loss": 0.3831, + "step": 17068 + }, + { + "epoch": 0.29494401437655515, + "grad_norm": 1.0368997370099522, + "learning_rate": 1.6539104911183265e-05, + "loss": 0.4767, + "step": 17069 + }, + { + "epoch": 0.29496129388996406, + "grad_norm": 0.8172350647677563, + "learning_rate": 1.6538681485821472e-05, + "loss": 0.4962, + "step": 17070 + }, + { + "epoch": 0.29497857340337297, + "grad_norm": 0.9798646121922264, + "learning_rate": 1.6538258039980032e-05, + "loss": 0.3966, + "step": 17071 + }, + { + "epoch": 0.2949958529167819, + "grad_norm": 0.9558147173393039, + "learning_rate": 1.6537834573660273e-05, + "loss": 0.3479, + "step": 17072 + }, + { + "epoch": 0.2950131324301908, + "grad_norm": 1.293762236170651, + "learning_rate": 1.653741108686352e-05, + "loss": 0.5193, + "step": 17073 + }, + { + "epoch": 0.2950304119435997, + "grad_norm": 0.9193908458418749, + "learning_rate": 1.6536987579591092e-05, + "loss": 0.4617, + "step": 17074 + }, + { + "epoch": 0.29504769145700854, + "grad_norm": 1.1350385302322499, + "learning_rate": 1.6536564051844327e-05, + "loss": 0.5702, + "step": 17075 + }, + { + "epoch": 0.29506497097041745, + "grad_norm": 1.068719268962507, + "learning_rate": 1.6536140503624545e-05, + "loss": 0.5759, + "step": 17076 + }, + { + "epoch": 0.29508225048382636, + "grad_norm": 0.7023529193061214, + "learning_rate": 1.653571693493308e-05, + "loss": 0.5186, + "step": 17077 + }, + { + "epoch": 0.29509952999723527, + "grad_norm": 0.8850760031887125, + "learning_rate": 1.6535293345771244e-05, + "loss": 0.5583, + "step": 17078 + }, + { + "epoch": 0.2951168095106442, + "grad_norm": 1.346162828601912, + "learning_rate": 1.6534869736140375e-05, + "loss": 0.8336, + "step": 17079 + }, + { + "epoch": 0.2951340890240531, + "grad_norm": 1.0943936810233479, + "learning_rate": 1.65344461060418e-05, + "loss": 0.7298, + "step": 17080 + }, + { + "epoch": 0.295151368537462, + "grad_norm": 0.7700230085213361, + "learning_rate": 1.6534022455476844e-05, + "loss": 0.5298, + "step": 17081 + }, + { + "epoch": 0.2951686480508709, + "grad_norm": 1.5697114858765302, + "learning_rate": 1.653359878444683e-05, + "loss": 0.5935, + "step": 17082 + }, + { + "epoch": 0.2951859275642798, + "grad_norm": 0.9005861759042155, + "learning_rate": 1.6533175092953085e-05, + "loss": 0.4423, + "step": 17083 + }, + { + "epoch": 0.2952032070776887, + "grad_norm": 1.057068088172174, + "learning_rate": 1.653275138099695e-05, + "loss": 0.3721, + "step": 17084 + }, + { + "epoch": 0.2952204865910976, + "grad_norm": 1.0666224784710536, + "learning_rate": 1.6532327648579733e-05, + "loss": 0.513, + "step": 17085 + }, + { + "epoch": 0.29523776610450647, + "grad_norm": 0.6861859944120524, + "learning_rate": 1.653190389570277e-05, + "loss": 0.4155, + "step": 17086 + }, + { + "epoch": 0.2952550456179154, + "grad_norm": 1.3960272410731593, + "learning_rate": 1.653148012236739e-05, + "loss": 0.4329, + "step": 17087 + }, + { + "epoch": 0.2952723251313243, + "grad_norm": 0.899597298043293, + "learning_rate": 1.6531056328574913e-05, + "loss": 0.455, + "step": 17088 + }, + { + "epoch": 0.2952896046447332, + "grad_norm": 0.8663659497605678, + "learning_rate": 1.6530632514326677e-05, + "loss": 0.4764, + "step": 17089 + }, + { + "epoch": 0.2953068841581421, + "grad_norm": 1.2423934649886283, + "learning_rate": 1.6530208679624e-05, + "loss": 0.5297, + "step": 17090 + }, + { + "epoch": 0.295324163671551, + "grad_norm": 1.1516753361674215, + "learning_rate": 1.6529784824468215e-05, + "loss": 0.6751, + "step": 17091 + }, + { + "epoch": 0.2953414431849599, + "grad_norm": 0.9481110843367183, + "learning_rate": 1.6529360948860648e-05, + "loss": 0.4437, + "step": 17092 + }, + { + "epoch": 0.2953587226983688, + "grad_norm": 1.6249524531492456, + "learning_rate": 1.6528937052802626e-05, + "loss": 0.4399, + "step": 17093 + }, + { + "epoch": 0.29537600221177773, + "grad_norm": 1.06162874796712, + "learning_rate": 1.6528513136295473e-05, + "loss": 0.4607, + "step": 17094 + }, + { + "epoch": 0.29539328172518664, + "grad_norm": 0.9244810132595925, + "learning_rate": 1.6528089199340526e-05, + "loss": 0.4869, + "step": 17095 + }, + { + "epoch": 0.29541056123859555, + "grad_norm": 1.1125739872705183, + "learning_rate": 1.6527665241939103e-05, + "loss": 0.7134, + "step": 17096 + }, + { + "epoch": 0.2954278407520044, + "grad_norm": 1.1697591751266438, + "learning_rate": 1.652724126409254e-05, + "loss": 0.5779, + "step": 17097 + }, + { + "epoch": 0.2954451202654133, + "grad_norm": 0.8967778376312864, + "learning_rate": 1.6526817265802163e-05, + "loss": 0.4691, + "step": 17098 + }, + { + "epoch": 0.2954623997788222, + "grad_norm": 0.9035408262153637, + "learning_rate": 1.652639324706929e-05, + "loss": 0.4313, + "step": 17099 + }, + { + "epoch": 0.2954796792922311, + "grad_norm": 0.8694721804081779, + "learning_rate": 1.6525969207895264e-05, + "loss": 0.5844, + "step": 17100 + }, + { + "epoch": 0.29549695880564003, + "grad_norm": 0.8973536636725452, + "learning_rate": 1.6525545148281408e-05, + "loss": 0.4391, + "step": 17101 + }, + { + "epoch": 0.29551423831904894, + "grad_norm": 0.4693138834956013, + "learning_rate": 1.6525121068229045e-05, + "loss": 0.8449, + "step": 17102 + }, + { + "epoch": 0.29553151783245785, + "grad_norm": 0.8934856992249846, + "learning_rate": 1.652469696773951e-05, + "loss": 0.4306, + "step": 17103 + }, + { + "epoch": 0.29554879734586675, + "grad_norm": 1.104100885167561, + "learning_rate": 1.6524272846814125e-05, + "loss": 0.5231, + "step": 17104 + }, + { + "epoch": 0.29556607685927566, + "grad_norm": 0.8150640703823818, + "learning_rate": 1.6523848705454223e-05, + "loss": 0.6853, + "step": 17105 + }, + { + "epoch": 0.29558335637268457, + "grad_norm": 0.7791973691837202, + "learning_rate": 1.652342454366113e-05, + "loss": 0.5535, + "step": 17106 + }, + { + "epoch": 0.2956006358860934, + "grad_norm": 1.51362461239857, + "learning_rate": 1.652300036143618e-05, + "loss": 0.4129, + "step": 17107 + }, + { + "epoch": 0.29561791539950233, + "grad_norm": 0.9913925654015974, + "learning_rate": 1.652257615878069e-05, + "loss": 0.3581, + "step": 17108 + }, + { + "epoch": 0.29563519491291124, + "grad_norm": 1.3338192699187672, + "learning_rate": 1.6522151935696e-05, + "loss": 0.5476, + "step": 17109 + }, + { + "epoch": 0.29565247442632014, + "grad_norm": 0.8692137730200449, + "learning_rate": 1.6521727692183434e-05, + "loss": 0.5337, + "step": 17110 + }, + { + "epoch": 0.29566975393972905, + "grad_norm": 1.2107696395433742, + "learning_rate": 1.6521303428244322e-05, + "loss": 0.4343, + "step": 17111 + }, + { + "epoch": 0.29568703345313796, + "grad_norm": 1.0375357989869205, + "learning_rate": 1.652087914387999e-05, + "loss": 0.4609, + "step": 17112 + }, + { + "epoch": 0.29570431296654687, + "grad_norm": 1.2852222321038862, + "learning_rate": 1.652045483909177e-05, + "loss": 0.5017, + "step": 17113 + }, + { + "epoch": 0.2957215924799558, + "grad_norm": 1.378454034344812, + "learning_rate": 1.652003051388099e-05, + "loss": 0.4478, + "step": 17114 + }, + { + "epoch": 0.2957388719933647, + "grad_norm": 0.8981007795855873, + "learning_rate": 1.651960616824898e-05, + "loss": 0.5848, + "step": 17115 + }, + { + "epoch": 0.2957561515067736, + "grad_norm": 1.0022882024012392, + "learning_rate": 1.6519181802197065e-05, + "loss": 0.3527, + "step": 17116 + }, + { + "epoch": 0.2957734310201825, + "grad_norm": 1.009892014630168, + "learning_rate": 1.651875741572658e-05, + "loss": 0.525, + "step": 17117 + }, + { + "epoch": 0.29579071053359135, + "grad_norm": 0.6975297606904811, + "learning_rate": 1.651833300883885e-05, + "loss": 0.2885, + "step": 17118 + }, + { + "epoch": 0.29580799004700026, + "grad_norm": 0.7804401596653029, + "learning_rate": 1.651790858153521e-05, + "loss": 0.3809, + "step": 17119 + }, + { + "epoch": 0.29582526956040917, + "grad_norm": 1.0220158860201964, + "learning_rate": 1.6517484133816977e-05, + "loss": 0.356, + "step": 17120 + }, + { + "epoch": 0.2958425490738181, + "grad_norm": 1.0631120602598603, + "learning_rate": 1.6517059665685496e-05, + "loss": 0.6016, + "step": 17121 + }, + { + "epoch": 0.295859828587227, + "grad_norm": 0.7844878852560582, + "learning_rate": 1.6516635177142083e-05, + "loss": 0.5395, + "step": 17122 + }, + { + "epoch": 0.2958771081006359, + "grad_norm": 0.9934328905500833, + "learning_rate": 1.6516210668188077e-05, + "loss": 0.4169, + "step": 17123 + }, + { + "epoch": 0.2958943876140448, + "grad_norm": 0.37998456455820756, + "learning_rate": 1.65157861388248e-05, + "loss": 0.7241, + "step": 17124 + }, + { + "epoch": 0.2959116671274537, + "grad_norm": 1.1005533382940749, + "learning_rate": 1.6515361589053587e-05, + "loss": 0.6805, + "step": 17125 + }, + { + "epoch": 0.2959289466408626, + "grad_norm": 0.8413111437263844, + "learning_rate": 1.6514937018875768e-05, + "loss": 0.4587, + "step": 17126 + }, + { + "epoch": 0.2959462261542715, + "grad_norm": 1.3750888023212195, + "learning_rate": 1.6514512428292674e-05, + "loss": 0.5022, + "step": 17127 + }, + { + "epoch": 0.29596350566768037, + "grad_norm": 0.9502643170960766, + "learning_rate": 1.6514087817305627e-05, + "loss": 0.5205, + "step": 17128 + }, + { + "epoch": 0.2959807851810893, + "grad_norm": 0.8664090962438623, + "learning_rate": 1.6513663185915966e-05, + "loss": 0.4974, + "step": 17129 + }, + { + "epoch": 0.2959980646944982, + "grad_norm": 0.9979611671911603, + "learning_rate": 1.6513238534125014e-05, + "loss": 0.4686, + "step": 17130 + }, + { + "epoch": 0.2960153442079071, + "grad_norm": 1.489784045800018, + "learning_rate": 1.6512813861934104e-05, + "loss": 0.4839, + "step": 17131 + }, + { + "epoch": 0.296032623721316, + "grad_norm": 0.8161085848937921, + "learning_rate": 1.6512389169344568e-05, + "loss": 0.5068, + "step": 17132 + }, + { + "epoch": 0.2960499032347249, + "grad_norm": 1.2392519149229346, + "learning_rate": 1.651196445635773e-05, + "loss": 0.3996, + "step": 17133 + }, + { + "epoch": 0.2960671827481338, + "grad_norm": 0.8798681097168994, + "learning_rate": 1.651153972297493e-05, + "loss": 0.441, + "step": 17134 + }, + { + "epoch": 0.2960844622615427, + "grad_norm": 0.95986901979317, + "learning_rate": 1.6511114969197487e-05, + "loss": 0.3547, + "step": 17135 + }, + { + "epoch": 0.29610174177495163, + "grad_norm": 0.8162399590006929, + "learning_rate": 1.651069019502674e-05, + "loss": 0.5042, + "step": 17136 + }, + { + "epoch": 0.29611902128836054, + "grad_norm": 0.9832534991605197, + "learning_rate": 1.651026540046402e-05, + "loss": 0.4524, + "step": 17137 + }, + { + "epoch": 0.29613630080176945, + "grad_norm": 1.2042667039974986, + "learning_rate": 1.650984058551065e-05, + "loss": 0.7261, + "step": 17138 + }, + { + "epoch": 0.2961535803151783, + "grad_norm": 1.4992227138343956, + "learning_rate": 1.6509415750167963e-05, + "loss": 0.5209, + "step": 17139 + }, + { + "epoch": 0.2961708598285872, + "grad_norm": 1.2540655378067007, + "learning_rate": 1.6508990894437292e-05, + "loss": 0.7217, + "step": 17140 + }, + { + "epoch": 0.2961881393419961, + "grad_norm": 1.6504811793420122, + "learning_rate": 1.6508566018319968e-05, + "loss": 0.4353, + "step": 17141 + }, + { + "epoch": 0.296205418855405, + "grad_norm": 0.9628796131443108, + "learning_rate": 1.650814112181732e-05, + "loss": 0.4975, + "step": 17142 + }, + { + "epoch": 0.29622269836881393, + "grad_norm": 1.4888471560964855, + "learning_rate": 1.6507716204930682e-05, + "loss": 0.4893, + "step": 17143 + }, + { + "epoch": 0.29623997788222284, + "grad_norm": 0.7335495334198332, + "learning_rate": 1.650729126766138e-05, + "loss": 0.3247, + "step": 17144 + }, + { + "epoch": 0.29625725739563175, + "grad_norm": 0.8598688122373686, + "learning_rate": 1.6506866310010748e-05, + "loss": 0.6044, + "step": 17145 + }, + { + "epoch": 0.29627453690904065, + "grad_norm": 0.7159527049558034, + "learning_rate": 1.6506441331980114e-05, + "loss": 0.6262, + "step": 17146 + }, + { + "epoch": 0.29629181642244956, + "grad_norm": 0.7401612690310473, + "learning_rate": 1.6506016333570815e-05, + "loss": 0.5536, + "step": 17147 + }, + { + "epoch": 0.29630909593585847, + "grad_norm": 1.1663350672057524, + "learning_rate": 1.6505591314784174e-05, + "loss": 0.567, + "step": 17148 + }, + { + "epoch": 0.2963263754492674, + "grad_norm": 1.0481433574934296, + "learning_rate": 1.6505166275621532e-05, + "loss": 0.3991, + "step": 17149 + }, + { + "epoch": 0.29634365496267623, + "grad_norm": 0.9013682094716413, + "learning_rate": 1.6504741216084214e-05, + "loss": 0.3865, + "step": 17150 + }, + { + "epoch": 0.29636093447608514, + "grad_norm": 1.284272156513719, + "learning_rate": 1.6504316136173548e-05, + "loss": 0.672, + "step": 17151 + }, + { + "epoch": 0.29637821398949404, + "grad_norm": 0.8278510693503663, + "learning_rate": 1.650389103589087e-05, + "loss": 0.5383, + "step": 17152 + }, + { + "epoch": 0.29639549350290295, + "grad_norm": 1.1187626620823115, + "learning_rate": 1.6503465915237514e-05, + "loss": 0.4609, + "step": 17153 + }, + { + "epoch": 0.29641277301631186, + "grad_norm": 0.7389514014619365, + "learning_rate": 1.6503040774214806e-05, + "loss": 0.3267, + "step": 17154 + }, + { + "epoch": 0.29643005252972077, + "grad_norm": 1.2451886205354246, + "learning_rate": 1.6502615612824083e-05, + "loss": 0.4989, + "step": 17155 + }, + { + "epoch": 0.2964473320431297, + "grad_norm": 0.8044173578571772, + "learning_rate": 1.650219043106667e-05, + "loss": 0.6088, + "step": 17156 + }, + { + "epoch": 0.2964646115565386, + "grad_norm": 1.2043085765128365, + "learning_rate": 1.6501765228943904e-05, + "loss": 0.4799, + "step": 17157 + }, + { + "epoch": 0.2964818910699475, + "grad_norm": 0.963988625888671, + "learning_rate": 1.6501340006457115e-05, + "loss": 0.433, + "step": 17158 + }, + { + "epoch": 0.2964991705833564, + "grad_norm": 0.818355413866762, + "learning_rate": 1.6500914763607637e-05, + "loss": 0.53, + "step": 17159 + }, + { + "epoch": 0.29651645009676525, + "grad_norm": 1.0610013867365686, + "learning_rate": 1.6500489500396798e-05, + "loss": 0.4048, + "step": 17160 + }, + { + "epoch": 0.29653372961017416, + "grad_norm": 1.2238485322132666, + "learning_rate": 1.6500064216825933e-05, + "loss": 0.5666, + "step": 17161 + }, + { + "epoch": 0.29655100912358306, + "grad_norm": 1.2263752947931557, + "learning_rate": 1.6499638912896374e-05, + "loss": 0.7218, + "step": 17162 + }, + { + "epoch": 0.296568288636992, + "grad_norm": 0.8248787065752214, + "learning_rate": 1.6499213588609445e-05, + "loss": 0.7357, + "step": 17163 + }, + { + "epoch": 0.2965855681504009, + "grad_norm": 0.578538250322971, + "learning_rate": 1.6498788243966494e-05, + "loss": 0.4153, + "step": 17164 + }, + { + "epoch": 0.2966028476638098, + "grad_norm": 1.0233572641133848, + "learning_rate": 1.6498362878968838e-05, + "loss": 0.4304, + "step": 17165 + }, + { + "epoch": 0.2966201271772187, + "grad_norm": 0.8759874404926498, + "learning_rate": 1.649793749361782e-05, + "loss": 0.3776, + "step": 17166 + }, + { + "epoch": 0.2966374066906276, + "grad_norm": 1.2669508721949032, + "learning_rate": 1.6497512087914768e-05, + "loss": 0.5257, + "step": 17167 + }, + { + "epoch": 0.2966546862040365, + "grad_norm": 0.9111115647860837, + "learning_rate": 1.649708666186101e-05, + "loss": 0.4056, + "step": 17168 + }, + { + "epoch": 0.2966719657174454, + "grad_norm": 0.6756035491014639, + "learning_rate": 1.6496661215457884e-05, + "loss": 0.4869, + "step": 17169 + }, + { + "epoch": 0.2966892452308543, + "grad_norm": 1.3381772936522898, + "learning_rate": 1.6496235748706726e-05, + "loss": 0.5963, + "step": 17170 + }, + { + "epoch": 0.2967065247442632, + "grad_norm": 1.6183724451622374, + "learning_rate": 1.649581026160886e-05, + "loss": 0.5992, + "step": 17171 + }, + { + "epoch": 0.2967238042576721, + "grad_norm": 1.021495893476829, + "learning_rate": 1.6495384754165625e-05, + "loss": 0.4944, + "step": 17172 + }, + { + "epoch": 0.296741083771081, + "grad_norm": 0.46036704244284576, + "learning_rate": 1.6494959226378346e-05, + "loss": 0.7948, + "step": 17173 + }, + { + "epoch": 0.2967583632844899, + "grad_norm": 1.1644401233340227, + "learning_rate": 1.6494533678248364e-05, + "loss": 0.5225, + "step": 17174 + }, + { + "epoch": 0.2967756427978988, + "grad_norm": 0.4329391543172947, + "learning_rate": 1.6494108109777012e-05, + "loss": 0.6707, + "step": 17175 + }, + { + "epoch": 0.2967929223113077, + "grad_norm": 1.380884196422507, + "learning_rate": 1.6493682520965614e-05, + "loss": 0.5521, + "step": 17176 + }, + { + "epoch": 0.2968102018247166, + "grad_norm": 0.6285313126413239, + "learning_rate": 1.6493256911815514e-05, + "loss": 0.5336, + "step": 17177 + }, + { + "epoch": 0.29682748133812553, + "grad_norm": 1.166376128846442, + "learning_rate": 1.649283128232804e-05, + "loss": 0.5343, + "step": 17178 + }, + { + "epoch": 0.29684476085153444, + "grad_norm": 1.4522992504548176, + "learning_rate": 1.649240563250452e-05, + "loss": 0.6336, + "step": 17179 + }, + { + "epoch": 0.29686204036494335, + "grad_norm": 1.1428427858747512, + "learning_rate": 1.6491979962346298e-05, + "loss": 0.5045, + "step": 17180 + }, + { + "epoch": 0.2968793198783522, + "grad_norm": 0.9198933961103007, + "learning_rate": 1.64915542718547e-05, + "loss": 0.456, + "step": 17181 + }, + { + "epoch": 0.2968965993917611, + "grad_norm": 1.5571196886862853, + "learning_rate": 1.6491128561031058e-05, + "loss": 0.6517, + "step": 17182 + }, + { + "epoch": 0.29691387890517, + "grad_norm": 0.5502568236325949, + "learning_rate": 1.6490702829876708e-05, + "loss": 0.7876, + "step": 17183 + }, + { + "epoch": 0.2969311584185789, + "grad_norm": 1.0857033317899463, + "learning_rate": 1.6490277078392988e-05, + "loss": 0.5211, + "step": 17184 + }, + { + "epoch": 0.29694843793198783, + "grad_norm": 0.6890278029244569, + "learning_rate": 1.6489851306581223e-05, + "loss": 0.5605, + "step": 17185 + }, + { + "epoch": 0.29696571744539674, + "grad_norm": 0.8849260760472262, + "learning_rate": 1.648942551444275e-05, + "loss": 0.4648, + "step": 17186 + }, + { + "epoch": 0.29698299695880565, + "grad_norm": 1.045377138577949, + "learning_rate": 1.6488999701978905e-05, + "loss": 0.4358, + "step": 17187 + }, + { + "epoch": 0.29700027647221455, + "grad_norm": 0.8202851592182582, + "learning_rate": 1.6488573869191017e-05, + "loss": 0.5018, + "step": 17188 + }, + { + "epoch": 0.29701755598562346, + "grad_norm": 0.87888239195963, + "learning_rate": 1.6488148016080426e-05, + "loss": 0.5958, + "step": 17189 + }, + { + "epoch": 0.29703483549903237, + "grad_norm": 0.8110518456247569, + "learning_rate": 1.648772214264846e-05, + "loss": 0.5771, + "step": 17190 + }, + { + "epoch": 0.2970521150124413, + "grad_norm": 1.305095618315536, + "learning_rate": 1.6487296248896457e-05, + "loss": 0.6294, + "step": 17191 + }, + { + "epoch": 0.29706939452585013, + "grad_norm": 1.478677211268199, + "learning_rate": 1.648687033482575e-05, + "loss": 0.5102, + "step": 17192 + }, + { + "epoch": 0.29708667403925904, + "grad_norm": 1.2864173622620727, + "learning_rate": 1.6486444400437667e-05, + "loss": 0.4319, + "step": 17193 + }, + { + "epoch": 0.29710395355266794, + "grad_norm": 1.2320417224857012, + "learning_rate": 1.6486018445733555e-05, + "loss": 0.5207, + "step": 17194 + }, + { + "epoch": 0.29712123306607685, + "grad_norm": 0.44001723007266114, + "learning_rate": 1.6485592470714737e-05, + "loss": 0.6156, + "step": 17195 + }, + { + "epoch": 0.29713851257948576, + "grad_norm": 1.12867689962819, + "learning_rate": 1.6485166475382547e-05, + "loss": 0.4582, + "step": 17196 + }, + { + "epoch": 0.29715579209289467, + "grad_norm": 1.211131763945401, + "learning_rate": 1.6484740459738328e-05, + "loss": 0.4178, + "step": 17197 + }, + { + "epoch": 0.2971730716063036, + "grad_norm": 1.2540617025926883, + "learning_rate": 1.6484314423783403e-05, + "loss": 0.4226, + "step": 17198 + }, + { + "epoch": 0.2971903511197125, + "grad_norm": 1.320688115111217, + "learning_rate": 1.6483888367519116e-05, + "loss": 0.6434, + "step": 17199 + }, + { + "epoch": 0.2972076306331214, + "grad_norm": 1.1648186621975938, + "learning_rate": 1.64834622909468e-05, + "loss": 0.4203, + "step": 17200 + }, + { + "epoch": 0.2972249101465303, + "grad_norm": 0.8260644888683748, + "learning_rate": 1.6483036194067784e-05, + "loss": 0.4145, + "step": 17201 + }, + { + "epoch": 0.29724218965993915, + "grad_norm": 1.1963213675386386, + "learning_rate": 1.6482610076883406e-05, + "loss": 0.5713, + "step": 17202 + }, + { + "epoch": 0.29725946917334806, + "grad_norm": 1.0783828867680847, + "learning_rate": 1.6482183939395002e-05, + "loss": 0.5812, + "step": 17203 + }, + { + "epoch": 0.29727674868675696, + "grad_norm": 0.9652946171023192, + "learning_rate": 1.6481757781603905e-05, + "loss": 0.392, + "step": 17204 + }, + { + "epoch": 0.29729402820016587, + "grad_norm": 1.1166019787330903, + "learning_rate": 1.648133160351145e-05, + "loss": 0.517, + "step": 17205 + }, + { + "epoch": 0.2973113077135748, + "grad_norm": 1.2183513239390935, + "learning_rate": 1.648090540511897e-05, + "loss": 0.5155, + "step": 17206 + }, + { + "epoch": 0.2973285872269837, + "grad_norm": 0.868459717443102, + "learning_rate": 1.6480479186427806e-05, + "loss": 0.5377, + "step": 17207 + }, + { + "epoch": 0.2973458667403926, + "grad_norm": 0.7887871291725388, + "learning_rate": 1.6480052947439284e-05, + "loss": 0.5134, + "step": 17208 + }, + { + "epoch": 0.2973631462538015, + "grad_norm": 0.49014984292969654, + "learning_rate": 1.647962668815475e-05, + "loss": 0.5863, + "step": 17209 + }, + { + "epoch": 0.2973804257672104, + "grad_norm": 0.806989572972634, + "learning_rate": 1.6479200408575527e-05, + "loss": 0.5045, + "step": 17210 + }, + { + "epoch": 0.2973977052806193, + "grad_norm": 0.7124559898708306, + "learning_rate": 1.6478774108702958e-05, + "loss": 0.4221, + "step": 17211 + }, + { + "epoch": 0.2974149847940282, + "grad_norm": 1.1460574295870916, + "learning_rate": 1.6478347788538376e-05, + "loss": 0.6704, + "step": 17212 + }, + { + "epoch": 0.2974322643074371, + "grad_norm": 0.8692339077244917, + "learning_rate": 1.6477921448083114e-05, + "loss": 0.4678, + "step": 17213 + }, + { + "epoch": 0.297449543820846, + "grad_norm": 1.1990147291211661, + "learning_rate": 1.647749508733851e-05, + "loss": 0.5924, + "step": 17214 + }, + { + "epoch": 0.2974668233342549, + "grad_norm": 0.9345387027597692, + "learning_rate": 1.64770687063059e-05, + "loss": 0.4783, + "step": 17215 + }, + { + "epoch": 0.2974841028476638, + "grad_norm": 0.7502145817723002, + "learning_rate": 1.6476642304986622e-05, + "loss": 0.3799, + "step": 17216 + }, + { + "epoch": 0.2975013823610727, + "grad_norm": 0.9456385038870063, + "learning_rate": 1.6476215883382005e-05, + "loss": 0.6673, + "step": 17217 + }, + { + "epoch": 0.2975186618744816, + "grad_norm": 1.0515215074589856, + "learning_rate": 1.6475789441493388e-05, + "loss": 0.4525, + "step": 17218 + }, + { + "epoch": 0.2975359413878905, + "grad_norm": 0.8914669718334917, + "learning_rate": 1.6475362979322107e-05, + "loss": 0.5631, + "step": 17219 + }, + { + "epoch": 0.29755322090129943, + "grad_norm": 0.7288417530674919, + "learning_rate": 1.6474936496869495e-05, + "loss": 0.3857, + "step": 17220 + }, + { + "epoch": 0.29757050041470834, + "grad_norm": 0.9660765551776059, + "learning_rate": 1.6474509994136892e-05, + "loss": 0.8047, + "step": 17221 + }, + { + "epoch": 0.29758777992811725, + "grad_norm": 1.1503269517811157, + "learning_rate": 1.6474083471125633e-05, + "loss": 0.4517, + "step": 17222 + }, + { + "epoch": 0.29760505944152615, + "grad_norm": 0.9464932227092532, + "learning_rate": 1.647365692783705e-05, + "loss": 0.3916, + "step": 17223 + }, + { + "epoch": 0.297622338954935, + "grad_norm": 1.2241118365951298, + "learning_rate": 1.6473230364272484e-05, + "loss": 0.5719, + "step": 17224 + }, + { + "epoch": 0.2976396184683439, + "grad_norm": 1.3668940308604987, + "learning_rate": 1.6472803780433265e-05, + "loss": 0.4532, + "step": 17225 + }, + { + "epoch": 0.2976568979817528, + "grad_norm": 0.6654775561056577, + "learning_rate": 1.6472377176320735e-05, + "loss": 0.3458, + "step": 17226 + }, + { + "epoch": 0.29767417749516173, + "grad_norm": 1.1489307991493403, + "learning_rate": 1.6471950551936227e-05, + "loss": 0.4913, + "step": 17227 + }, + { + "epoch": 0.29769145700857064, + "grad_norm": 0.6816253076542437, + "learning_rate": 1.647152390728108e-05, + "loss": 0.2722, + "step": 17228 + }, + { + "epoch": 0.29770873652197954, + "grad_norm": 0.869806073337143, + "learning_rate": 1.6471097242356625e-05, + "loss": 0.6637, + "step": 17229 + }, + { + "epoch": 0.29772601603538845, + "grad_norm": 0.7265474499670151, + "learning_rate": 1.6470670557164204e-05, + "loss": 0.416, + "step": 17230 + }, + { + "epoch": 0.29774329554879736, + "grad_norm": 1.2949241337862614, + "learning_rate": 1.647024385170515e-05, + "loss": 0.6627, + "step": 17231 + }, + { + "epoch": 0.29776057506220627, + "grad_norm": 0.5140804275037555, + "learning_rate": 1.6469817125980802e-05, + "loss": 0.6792, + "step": 17232 + }, + { + "epoch": 0.2977778545756152, + "grad_norm": 1.0951698716071654, + "learning_rate": 1.646939037999249e-05, + "loss": 0.664, + "step": 17233 + }, + { + "epoch": 0.297795134089024, + "grad_norm": 0.7098110782181971, + "learning_rate": 1.646896361374156e-05, + "loss": 0.5808, + "step": 17234 + }, + { + "epoch": 0.29781241360243293, + "grad_norm": 0.5912755431798983, + "learning_rate": 1.6468536827229346e-05, + "loss": 0.6165, + "step": 17235 + }, + { + "epoch": 0.29782969311584184, + "grad_norm": 0.9692258715742267, + "learning_rate": 1.646811002045718e-05, + "loss": 0.5287, + "step": 17236 + }, + { + "epoch": 0.29784697262925075, + "grad_norm": 1.2426539943905248, + "learning_rate": 1.6467683193426403e-05, + "loss": 0.4804, + "step": 17237 + }, + { + "epoch": 0.29786425214265966, + "grad_norm": 0.9681425083757653, + "learning_rate": 1.646725634613835e-05, + "loss": 0.422, + "step": 17238 + }, + { + "epoch": 0.29788153165606857, + "grad_norm": 1.1163530076289112, + "learning_rate": 1.646682947859436e-05, + "loss": 0.3944, + "step": 17239 + }, + { + "epoch": 0.2978988111694775, + "grad_norm": 0.5842934015084127, + "learning_rate": 1.6466402590795768e-05, + "loss": 0.4251, + "step": 17240 + }, + { + "epoch": 0.2979160906828864, + "grad_norm": 1.3477802229573521, + "learning_rate": 1.646597568274391e-05, + "loss": 0.4707, + "step": 17241 + }, + { + "epoch": 0.2979333701962953, + "grad_norm": 2.6474282487909346, + "learning_rate": 1.6465548754440125e-05, + "loss": 0.5817, + "step": 17242 + }, + { + "epoch": 0.2979506497097042, + "grad_norm": 0.7269755818121718, + "learning_rate": 1.6465121805885754e-05, + "loss": 0.5585, + "step": 17243 + }, + { + "epoch": 0.2979679292231131, + "grad_norm": 1.0868335048593778, + "learning_rate": 1.6464694837082125e-05, + "loss": 0.4707, + "step": 17244 + }, + { + "epoch": 0.29798520873652196, + "grad_norm": 0.9789922398923866, + "learning_rate": 1.6464267848030583e-05, + "loss": 0.3749, + "step": 17245 + }, + { + "epoch": 0.29800248824993086, + "grad_norm": 1.115438789025867, + "learning_rate": 1.646384083873246e-05, + "loss": 0.4867, + "step": 17246 + }, + { + "epoch": 0.29801976776333977, + "grad_norm": 0.5449237396417186, + "learning_rate": 1.6463413809189102e-05, + "loss": 0.4002, + "step": 17247 + }, + { + "epoch": 0.2980370472767487, + "grad_norm": 0.9442017182186668, + "learning_rate": 1.646298675940184e-05, + "loss": 0.318, + "step": 17248 + }, + { + "epoch": 0.2980543267901576, + "grad_norm": 1.8552880794399125, + "learning_rate": 1.6462559689372006e-05, + "loss": 0.5116, + "step": 17249 + }, + { + "epoch": 0.2980716063035665, + "grad_norm": 1.1777031194206942, + "learning_rate": 1.646213259910095e-05, + "loss": 0.4821, + "step": 17250 + }, + { + "epoch": 0.2980888858169754, + "grad_norm": 0.41393436793722294, + "learning_rate": 1.646170548859e-05, + "loss": 0.7048, + "step": 17251 + }, + { + "epoch": 0.2981061653303843, + "grad_norm": 1.1819495346509885, + "learning_rate": 1.64612783578405e-05, + "loss": 0.6256, + "step": 17252 + }, + { + "epoch": 0.2981234448437932, + "grad_norm": 1.2817936072248899, + "learning_rate": 1.6460851206853783e-05, + "loss": 0.4331, + "step": 17253 + }, + { + "epoch": 0.2981407243572021, + "grad_norm": 0.7011251210305405, + "learning_rate": 1.646042403563119e-05, + "loss": 0.3987, + "step": 17254 + }, + { + "epoch": 0.298158003870611, + "grad_norm": 0.8493744487957909, + "learning_rate": 1.645999684417406e-05, + "loss": 0.7036, + "step": 17255 + }, + { + "epoch": 0.2981752833840199, + "grad_norm": 1.2263193587550374, + "learning_rate": 1.6459569632483723e-05, + "loss": 0.5771, + "step": 17256 + }, + { + "epoch": 0.2981925628974288, + "grad_norm": 0.6797551733435593, + "learning_rate": 1.6459142400561526e-05, + "loss": 0.3938, + "step": 17257 + }, + { + "epoch": 0.2982098424108377, + "grad_norm": 1.0852899487235281, + "learning_rate": 1.6458715148408805e-05, + "loss": 0.5362, + "step": 17258 + }, + { + "epoch": 0.2982271219242466, + "grad_norm": 0.7762389261606928, + "learning_rate": 1.6458287876026898e-05, + "loss": 0.5821, + "step": 17259 + }, + { + "epoch": 0.2982444014376555, + "grad_norm": 1.0774144140357464, + "learning_rate": 1.6457860583417142e-05, + "loss": 0.4829, + "step": 17260 + }, + { + "epoch": 0.2982616809510644, + "grad_norm": 1.0016426876352946, + "learning_rate": 1.6457433270580873e-05, + "loss": 0.3438, + "step": 17261 + }, + { + "epoch": 0.29827896046447333, + "grad_norm": 1.470247456477531, + "learning_rate": 1.645700593751944e-05, + "loss": 0.5063, + "step": 17262 + }, + { + "epoch": 0.29829623997788224, + "grad_norm": 0.840520788560117, + "learning_rate": 1.6456578584234166e-05, + "loss": 0.6815, + "step": 17263 + }, + { + "epoch": 0.29831351949129115, + "grad_norm": 0.8217280943969173, + "learning_rate": 1.6456151210726398e-05, + "loss": 0.4449, + "step": 17264 + }, + { + "epoch": 0.29833079900470005, + "grad_norm": 0.9335534513682453, + "learning_rate": 1.6455723816997476e-05, + "loss": 0.4376, + "step": 17265 + }, + { + "epoch": 0.2983480785181089, + "grad_norm": 0.680453398365576, + "learning_rate": 1.6455296403048736e-05, + "loss": 0.4006, + "step": 17266 + }, + { + "epoch": 0.2983653580315178, + "grad_norm": 1.2814524506774907, + "learning_rate": 1.645486896888152e-05, + "loss": 0.4039, + "step": 17267 + }, + { + "epoch": 0.2983826375449267, + "grad_norm": 1.1947650556117382, + "learning_rate": 1.645444151449716e-05, + "loss": 0.4441, + "step": 17268 + }, + { + "epoch": 0.29839991705833563, + "grad_norm": 1.2412280612115325, + "learning_rate": 1.6454014039896997e-05, + "loss": 0.4624, + "step": 17269 + }, + { + "epoch": 0.29841719657174454, + "grad_norm": 0.6953203699262726, + "learning_rate": 1.6453586545082374e-05, + "loss": 0.6481, + "step": 17270 + }, + { + "epoch": 0.29843447608515344, + "grad_norm": 1.2003224903384337, + "learning_rate": 1.6453159030054628e-05, + "loss": 0.5598, + "step": 17271 + }, + { + "epoch": 0.29845175559856235, + "grad_norm": 1.1451505375206654, + "learning_rate": 1.64527314948151e-05, + "loss": 0.5218, + "step": 17272 + }, + { + "epoch": 0.29846903511197126, + "grad_norm": 0.9215746070555701, + "learning_rate": 1.6452303939365122e-05, + "loss": 0.315, + "step": 17273 + }, + { + "epoch": 0.29848631462538017, + "grad_norm": 1.5235530523538037, + "learning_rate": 1.645187636370604e-05, + "loss": 0.2391, + "step": 17274 + }, + { + "epoch": 0.2985035941387891, + "grad_norm": 1.5111410003658894, + "learning_rate": 1.645144876783919e-05, + "loss": 0.517, + "step": 17275 + }, + { + "epoch": 0.2985208736521979, + "grad_norm": 1.0776987340668422, + "learning_rate": 1.645102115176591e-05, + "loss": 0.4771, + "step": 17276 + }, + { + "epoch": 0.29853815316560683, + "grad_norm": 1.0996365126615584, + "learning_rate": 1.6450593515487544e-05, + "loss": 0.4653, + "step": 17277 + }, + { + "epoch": 0.29855543267901574, + "grad_norm": 1.7164699223131066, + "learning_rate": 1.645016585900543e-05, + "loss": 0.6214, + "step": 17278 + }, + { + "epoch": 0.29857271219242465, + "grad_norm": 0.9813230361711834, + "learning_rate": 1.6449738182320904e-05, + "loss": 0.5545, + "step": 17279 + }, + { + "epoch": 0.29858999170583356, + "grad_norm": 1.258193066049479, + "learning_rate": 1.644931048543531e-05, + "loss": 0.6358, + "step": 17280 + }, + { + "epoch": 0.29860727121924246, + "grad_norm": 0.7429715228332354, + "learning_rate": 1.644888276834998e-05, + "loss": 0.4839, + "step": 17281 + }, + { + "epoch": 0.2986245507326514, + "grad_norm": 1.348322628944918, + "learning_rate": 1.644845503106627e-05, + "loss": 0.5674, + "step": 17282 + }, + { + "epoch": 0.2986418302460603, + "grad_norm": 1.1312113434612527, + "learning_rate": 1.64480272735855e-05, + "loss": 0.4499, + "step": 17283 + }, + { + "epoch": 0.2986591097594692, + "grad_norm": 1.719238158312419, + "learning_rate": 1.644759949590902e-05, + "loss": 0.6929, + "step": 17284 + }, + { + "epoch": 0.2986763892728781, + "grad_norm": 0.8770067460446258, + "learning_rate": 1.644717169803817e-05, + "loss": 0.4397, + "step": 17285 + }, + { + "epoch": 0.298693668786287, + "grad_norm": 1.062742545479404, + "learning_rate": 1.6446743879974286e-05, + "loss": 0.537, + "step": 17286 + }, + { + "epoch": 0.29871094829969586, + "grad_norm": 0.6683744144802072, + "learning_rate": 1.6446316041718713e-05, + "loss": 0.4445, + "step": 17287 + }, + { + "epoch": 0.29872822781310476, + "grad_norm": 0.7156028989383367, + "learning_rate": 1.6445888183272788e-05, + "loss": 0.5684, + "step": 17288 + }, + { + "epoch": 0.29874550732651367, + "grad_norm": 0.9342476472800004, + "learning_rate": 1.6445460304637847e-05, + "loss": 0.5974, + "step": 17289 + }, + { + "epoch": 0.2987627868399226, + "grad_norm": 0.8809876447182282, + "learning_rate": 1.644503240581524e-05, + "loss": 0.5625, + "step": 17290 + }, + { + "epoch": 0.2987800663533315, + "grad_norm": 1.590992023585226, + "learning_rate": 1.6444604486806297e-05, + "loss": 0.6419, + "step": 17291 + }, + { + "epoch": 0.2987973458667404, + "grad_norm": 0.7789846642425771, + "learning_rate": 1.6444176547612366e-05, + "loss": 0.8116, + "step": 17292 + }, + { + "epoch": 0.2988146253801493, + "grad_norm": 0.8049835428694192, + "learning_rate": 1.6443748588234784e-05, + "loss": 0.5307, + "step": 17293 + }, + { + "epoch": 0.2988319048935582, + "grad_norm": 0.620976142169009, + "learning_rate": 1.6443320608674895e-05, + "loss": 0.4311, + "step": 17294 + }, + { + "epoch": 0.2988491844069671, + "grad_norm": 0.947791848977851, + "learning_rate": 1.644289260893403e-05, + "loss": 0.5476, + "step": 17295 + }, + { + "epoch": 0.298866463920376, + "grad_norm": 1.292309428579424, + "learning_rate": 1.6442464589013537e-05, + "loss": 0.6237, + "step": 17296 + }, + { + "epoch": 0.29888374343378493, + "grad_norm": 0.9963712246360954, + "learning_rate": 1.6442036548914756e-05, + "loss": 0.8361, + "step": 17297 + }, + { + "epoch": 0.2989010229471938, + "grad_norm": 0.7893995601784718, + "learning_rate": 1.6441608488639027e-05, + "loss": 0.3865, + "step": 17298 + }, + { + "epoch": 0.2989183024606027, + "grad_norm": 1.0160107052661227, + "learning_rate": 1.644118040818769e-05, + "loss": 0.5895, + "step": 17299 + }, + { + "epoch": 0.2989355819740116, + "grad_norm": 0.8562215882196683, + "learning_rate": 1.644075230756209e-05, + "loss": 0.4614, + "step": 17300 + }, + { + "epoch": 0.2989528614874205, + "grad_norm": 1.3148952666069693, + "learning_rate": 1.644032418676356e-05, + "loss": 0.4085, + "step": 17301 + }, + { + "epoch": 0.2989701410008294, + "grad_norm": 0.6660878619196265, + "learning_rate": 1.6439896045793448e-05, + "loss": 0.4287, + "step": 17302 + }, + { + "epoch": 0.2989874205142383, + "grad_norm": 1.16288822985814, + "learning_rate": 1.6439467884653093e-05, + "loss": 0.5547, + "step": 17303 + }, + { + "epoch": 0.29900470002764723, + "grad_norm": 0.9603459583675756, + "learning_rate": 1.6439039703343836e-05, + "loss": 0.4864, + "step": 17304 + }, + { + "epoch": 0.29902197954105614, + "grad_norm": 0.9249532426645242, + "learning_rate": 1.6438611501867013e-05, + "loss": 0.5215, + "step": 17305 + }, + { + "epoch": 0.29903925905446505, + "grad_norm": 1.2528298897241, + "learning_rate": 1.643818328022397e-05, + "loss": 0.5832, + "step": 17306 + }, + { + "epoch": 0.29905653856787395, + "grad_norm": 1.0991532740719636, + "learning_rate": 1.6437755038416052e-05, + "loss": 0.5438, + "step": 17307 + }, + { + "epoch": 0.2990738180812828, + "grad_norm": 1.01229639855345, + "learning_rate": 1.643732677644459e-05, + "loss": 0.369, + "step": 17308 + }, + { + "epoch": 0.2990910975946917, + "grad_norm": 0.8475745071886686, + "learning_rate": 1.6436898494310935e-05, + "loss": 0.5145, + "step": 17309 + }, + { + "epoch": 0.2991083771081006, + "grad_norm": 0.9125761046005312, + "learning_rate": 1.6436470192016422e-05, + "loss": 0.4581, + "step": 17310 + }, + { + "epoch": 0.29912565662150953, + "grad_norm": 0.9495731052077836, + "learning_rate": 1.64360418695624e-05, + "loss": 0.3883, + "step": 17311 + }, + { + "epoch": 0.29914293613491844, + "grad_norm": 0.4454360787174164, + "learning_rate": 1.64356135269502e-05, + "loss": 0.7244, + "step": 17312 + }, + { + "epoch": 0.29916021564832734, + "grad_norm": 0.9813470899955272, + "learning_rate": 1.6435185164181176e-05, + "loss": 0.3315, + "step": 17313 + }, + { + "epoch": 0.29917749516173625, + "grad_norm": 1.565822513132526, + "learning_rate": 1.6434756781256658e-05, + "loss": 0.4731, + "step": 17314 + }, + { + "epoch": 0.29919477467514516, + "grad_norm": 1.3695584553312123, + "learning_rate": 1.6434328378177994e-05, + "loss": 0.7418, + "step": 17315 + }, + { + "epoch": 0.29921205418855407, + "grad_norm": 1.2528778978551487, + "learning_rate": 1.6433899954946523e-05, + "loss": 0.4471, + "step": 17316 + }, + { + "epoch": 0.299229333701963, + "grad_norm": 0.9333646789389342, + "learning_rate": 1.643347151156359e-05, + "loss": 0.3005, + "step": 17317 + }, + { + "epoch": 0.2992466132153719, + "grad_norm": 0.6878955451210994, + "learning_rate": 1.6433043048030537e-05, + "loss": 0.4909, + "step": 17318 + }, + { + "epoch": 0.29926389272878073, + "grad_norm": 0.8925093690718046, + "learning_rate": 1.6432614564348702e-05, + "loss": 0.623, + "step": 17319 + }, + { + "epoch": 0.29928117224218964, + "grad_norm": 0.9165131000493741, + "learning_rate": 1.6432186060519427e-05, + "loss": 0.3452, + "step": 17320 + }, + { + "epoch": 0.29929845175559855, + "grad_norm": 0.4727422806376258, + "learning_rate": 1.643175753654406e-05, + "loss": 0.8199, + "step": 17321 + }, + { + "epoch": 0.29931573126900746, + "grad_norm": 1.5895934523398405, + "learning_rate": 1.643132899242394e-05, + "loss": 0.5106, + "step": 17322 + }, + { + "epoch": 0.29933301078241636, + "grad_norm": 0.8572054917793276, + "learning_rate": 1.6430900428160405e-05, + "loss": 0.5708, + "step": 17323 + }, + { + "epoch": 0.29935029029582527, + "grad_norm": 1.1247471108591063, + "learning_rate": 1.6430471843754806e-05, + "loss": 0.5504, + "step": 17324 + }, + { + "epoch": 0.2993675698092342, + "grad_norm": 1.479553175071495, + "learning_rate": 1.643004323920848e-05, + "loss": 0.5856, + "step": 17325 + }, + { + "epoch": 0.2993848493226431, + "grad_norm": 0.8841095226118052, + "learning_rate": 1.6429614614522767e-05, + "loss": 0.3704, + "step": 17326 + }, + { + "epoch": 0.299402128836052, + "grad_norm": 0.8897622725006661, + "learning_rate": 1.6429185969699016e-05, + "loss": 0.381, + "step": 17327 + }, + { + "epoch": 0.2994194083494609, + "grad_norm": 0.42564932443686043, + "learning_rate": 1.642875730473856e-05, + "loss": 0.7438, + "step": 17328 + }, + { + "epoch": 0.29943668786286975, + "grad_norm": 1.016338945824097, + "learning_rate": 1.642832861964275e-05, + "loss": 0.7428, + "step": 17329 + }, + { + "epoch": 0.29945396737627866, + "grad_norm": 1.2258162945371887, + "learning_rate": 1.6427899914412927e-05, + "loss": 0.4537, + "step": 17330 + }, + { + "epoch": 0.29947124688968757, + "grad_norm": 1.3271886240014463, + "learning_rate": 1.6427471189050438e-05, + "loss": 0.5063, + "step": 17331 + }, + { + "epoch": 0.2994885264030965, + "grad_norm": 1.1062487051816228, + "learning_rate": 1.6427042443556614e-05, + "loss": 0.524, + "step": 17332 + }, + { + "epoch": 0.2995058059165054, + "grad_norm": 1.4629688822154427, + "learning_rate": 1.6426613677932806e-05, + "loss": 0.3961, + "step": 17333 + }, + { + "epoch": 0.2995230854299143, + "grad_norm": 0.8925555755622298, + "learning_rate": 1.6426184892180357e-05, + "loss": 0.5307, + "step": 17334 + }, + { + "epoch": 0.2995403649433232, + "grad_norm": 0.36390007083164777, + "learning_rate": 1.6425756086300607e-05, + "loss": 0.6209, + "step": 17335 + }, + { + "epoch": 0.2995576444567321, + "grad_norm": 1.190025612679174, + "learning_rate": 1.6425327260294902e-05, + "loss": 0.4361, + "step": 17336 + }, + { + "epoch": 0.299574923970141, + "grad_norm": 1.0709539867106121, + "learning_rate": 1.6424898414164583e-05, + "loss": 0.4724, + "step": 17337 + }, + { + "epoch": 0.2995922034835499, + "grad_norm": 1.3357435379328715, + "learning_rate": 1.6424469547910994e-05, + "loss": 0.4772, + "step": 17338 + }, + { + "epoch": 0.29960948299695883, + "grad_norm": 0.7243850755226398, + "learning_rate": 1.642404066153548e-05, + "loss": 0.354, + "step": 17339 + }, + { + "epoch": 0.2996267625103677, + "grad_norm": 0.8447104049044678, + "learning_rate": 1.642361175503938e-05, + "loss": 0.5428, + "step": 17340 + }, + { + "epoch": 0.2996440420237766, + "grad_norm": 0.8997199998326773, + "learning_rate": 1.642318282842404e-05, + "loss": 0.3845, + "step": 17341 + }, + { + "epoch": 0.2996613215371855, + "grad_norm": 1.365326864644494, + "learning_rate": 1.6422753881690804e-05, + "loss": 0.4823, + "step": 17342 + }, + { + "epoch": 0.2996786010505944, + "grad_norm": 1.3257093527607944, + "learning_rate": 1.6422324914841014e-05, + "loss": 0.4572, + "step": 17343 + }, + { + "epoch": 0.2996958805640033, + "grad_norm": 0.40352839789436123, + "learning_rate": 1.6421895927876014e-05, + "loss": 0.4853, + "step": 17344 + }, + { + "epoch": 0.2997131600774122, + "grad_norm": 1.0692923752014045, + "learning_rate": 1.642146692079715e-05, + "loss": 0.4494, + "step": 17345 + }, + { + "epoch": 0.29973043959082113, + "grad_norm": 1.323940466844376, + "learning_rate": 1.642103789360576e-05, + "loss": 0.4508, + "step": 17346 + }, + { + "epoch": 0.29974771910423004, + "grad_norm": 0.8763738649627613, + "learning_rate": 1.6420608846303193e-05, + "loss": 0.3772, + "step": 17347 + }, + { + "epoch": 0.29976499861763894, + "grad_norm": 0.7527969290969896, + "learning_rate": 1.642017977889079e-05, + "loss": 0.2864, + "step": 17348 + }, + { + "epoch": 0.29978227813104785, + "grad_norm": 0.5824863499045373, + "learning_rate": 1.64197506913699e-05, + "loss": 0.7417, + "step": 17349 + }, + { + "epoch": 0.2997995576444567, + "grad_norm": 1.1062041928388588, + "learning_rate": 1.641932158374186e-05, + "loss": 0.3526, + "step": 17350 + }, + { + "epoch": 0.2998168371578656, + "grad_norm": 0.48798197046596054, + "learning_rate": 1.6418892456008018e-05, + "loss": 0.5984, + "step": 17351 + }, + { + "epoch": 0.2998341166712745, + "grad_norm": 0.8654838635329355, + "learning_rate": 1.6418463308169717e-05, + "loss": 0.517, + "step": 17352 + }, + { + "epoch": 0.2998513961846834, + "grad_norm": 0.5098001911180938, + "learning_rate": 1.6418034140228297e-05, + "loss": 1.0297, + "step": 17353 + }, + { + "epoch": 0.29986867569809234, + "grad_norm": 1.7210996271973409, + "learning_rate": 1.6417604952185113e-05, + "loss": 0.4814, + "step": 17354 + }, + { + "epoch": 0.29988595521150124, + "grad_norm": 1.0983817559032882, + "learning_rate": 1.64171757440415e-05, + "loss": 0.4393, + "step": 17355 + }, + { + "epoch": 0.29990323472491015, + "grad_norm": 0.7425509503015132, + "learning_rate": 1.6416746515798806e-05, + "loss": 0.3186, + "step": 17356 + }, + { + "epoch": 0.29992051423831906, + "grad_norm": 2.0924747848052543, + "learning_rate": 1.6416317267458372e-05, + "loss": 0.629, + "step": 17357 + }, + { + "epoch": 0.29993779375172797, + "grad_norm": 0.75962292697345, + "learning_rate": 1.6415887999021545e-05, + "loss": 0.562, + "step": 17358 + }, + { + "epoch": 0.2999550732651369, + "grad_norm": 1.523504113797781, + "learning_rate": 1.6415458710489668e-05, + "loss": 0.5183, + "step": 17359 + }, + { + "epoch": 0.2999723527785458, + "grad_norm": 0.9898222364027733, + "learning_rate": 1.641502940186409e-05, + "loss": 0.6078, + "step": 17360 + }, + { + "epoch": 0.29998963229195463, + "grad_norm": 0.804322676931206, + "learning_rate": 1.641460007314615e-05, + "loss": 0.4488, + "step": 17361 + }, + { + "epoch": 0.30000691180536354, + "grad_norm": 1.4083020029386712, + "learning_rate": 1.6414170724337197e-05, + "loss": 0.4126, + "step": 17362 + }, + { + "epoch": 0.30002419131877245, + "grad_norm": 1.0753868304801655, + "learning_rate": 1.641374135543857e-05, + "loss": 0.5541, + "step": 17363 + }, + { + "epoch": 0.30004147083218136, + "grad_norm": 0.44543785506645, + "learning_rate": 1.641331196645162e-05, + "loss": 0.6144, + "step": 17364 + }, + { + "epoch": 0.30005875034559026, + "grad_norm": 0.8417934749383245, + "learning_rate": 1.6412882557377692e-05, + "loss": 0.4854, + "step": 17365 + }, + { + "epoch": 0.30007602985899917, + "grad_norm": 1.1535010035728106, + "learning_rate": 1.6412453128218126e-05, + "loss": 0.4532, + "step": 17366 + }, + { + "epoch": 0.3000933093724081, + "grad_norm": 0.9145063883809176, + "learning_rate": 1.641202367897427e-05, + "loss": 0.6873, + "step": 17367 + }, + { + "epoch": 0.300110588885817, + "grad_norm": 0.7200352225651669, + "learning_rate": 1.641159420964747e-05, + "loss": 0.4892, + "step": 17368 + }, + { + "epoch": 0.3001278683992259, + "grad_norm": 1.1100992524245563, + "learning_rate": 1.6411164720239065e-05, + "loss": 0.6021, + "step": 17369 + }, + { + "epoch": 0.3001451479126348, + "grad_norm": 0.9240395634357738, + "learning_rate": 1.6410735210750408e-05, + "loss": 0.6046, + "step": 17370 + }, + { + "epoch": 0.3001624274260437, + "grad_norm": 0.557737496783254, + "learning_rate": 1.641030568118284e-05, + "loss": 0.5567, + "step": 17371 + }, + { + "epoch": 0.30017970693945256, + "grad_norm": 1.2591639319601247, + "learning_rate": 1.640987613153771e-05, + "loss": 0.5038, + "step": 17372 + }, + { + "epoch": 0.30019698645286147, + "grad_norm": 0.69251395161469, + "learning_rate": 1.640944656181636e-05, + "loss": 0.4239, + "step": 17373 + }, + { + "epoch": 0.3002142659662704, + "grad_norm": 1.4922906031545775, + "learning_rate": 1.640901697202013e-05, + "loss": 0.4612, + "step": 17374 + }, + { + "epoch": 0.3002315454796793, + "grad_norm": 1.195108527488242, + "learning_rate": 1.6408587362150382e-05, + "loss": 0.4681, + "step": 17375 + }, + { + "epoch": 0.3002488249930882, + "grad_norm": 0.8520534247904095, + "learning_rate": 1.640815773220845e-05, + "loss": 0.4158, + "step": 17376 + }, + { + "epoch": 0.3002661045064971, + "grad_norm": 1.058821556794575, + "learning_rate": 1.6407728082195676e-05, + "loss": 0.4312, + "step": 17377 + }, + { + "epoch": 0.300283384019906, + "grad_norm": 1.0747226510209005, + "learning_rate": 1.640729841211341e-05, + "loss": 0.344, + "step": 17378 + }, + { + "epoch": 0.3003006635333149, + "grad_norm": 1.0800638458365126, + "learning_rate": 1.6406868721963003e-05, + "loss": 0.4334, + "step": 17379 + }, + { + "epoch": 0.3003179430467238, + "grad_norm": 0.6919164301196981, + "learning_rate": 1.640643901174579e-05, + "loss": 0.3715, + "step": 17380 + }, + { + "epoch": 0.30033522256013273, + "grad_norm": 0.7430762686463787, + "learning_rate": 1.640600928146313e-05, + "loss": 0.3886, + "step": 17381 + }, + { + "epoch": 0.3003525020735416, + "grad_norm": 1.0006826519324947, + "learning_rate": 1.6405579531116357e-05, + "loss": 0.5332, + "step": 17382 + }, + { + "epoch": 0.3003697815869505, + "grad_norm": 0.8393269602664358, + "learning_rate": 1.6405149760706826e-05, + "loss": 0.4792, + "step": 17383 + }, + { + "epoch": 0.3003870611003594, + "grad_norm": 1.1067935381481309, + "learning_rate": 1.6404719970235876e-05, + "loss": 0.4185, + "step": 17384 + }, + { + "epoch": 0.3004043406137683, + "grad_norm": 0.7033495578885295, + "learning_rate": 1.6404290159704858e-05, + "loss": 0.4056, + "step": 17385 + }, + { + "epoch": 0.3004216201271772, + "grad_norm": 0.4752688664160469, + "learning_rate": 1.6403860329115117e-05, + "loss": 0.6207, + "step": 17386 + }, + { + "epoch": 0.3004388996405861, + "grad_norm": 1.605549626957958, + "learning_rate": 1.6403430478468e-05, + "loss": 0.5344, + "step": 17387 + }, + { + "epoch": 0.30045617915399503, + "grad_norm": 1.4052750883618648, + "learning_rate": 1.6403000607764848e-05, + "loss": 0.519, + "step": 17388 + }, + { + "epoch": 0.30047345866740394, + "grad_norm": 1.5692676686163145, + "learning_rate": 1.6402570717007016e-05, + "loss": 0.4715, + "step": 17389 + }, + { + "epoch": 0.30049073818081284, + "grad_norm": 0.9997136458852319, + "learning_rate": 1.6402140806195842e-05, + "loss": 0.5492, + "step": 17390 + }, + { + "epoch": 0.30050801769422175, + "grad_norm": 0.9462401452712191, + "learning_rate": 1.6401710875332677e-05, + "loss": 0.321, + "step": 17391 + }, + { + "epoch": 0.30052529720763066, + "grad_norm": 0.7991705567655844, + "learning_rate": 1.640128092441887e-05, + "loss": 0.3956, + "step": 17392 + }, + { + "epoch": 0.3005425767210395, + "grad_norm": 1.090112084319564, + "learning_rate": 1.640085095345576e-05, + "loss": 0.6272, + "step": 17393 + }, + { + "epoch": 0.3005598562344484, + "grad_norm": 1.2764823694380463, + "learning_rate": 1.64004209624447e-05, + "loss": 0.4384, + "step": 17394 + }, + { + "epoch": 0.3005771357478573, + "grad_norm": 0.9600467111816332, + "learning_rate": 1.639999095138704e-05, + "loss": 0.3863, + "step": 17395 + }, + { + "epoch": 0.30059441526126623, + "grad_norm": 1.0715897102751932, + "learning_rate": 1.6399560920284118e-05, + "loss": 0.3294, + "step": 17396 + }, + { + "epoch": 0.30061169477467514, + "grad_norm": 1.2818000647202348, + "learning_rate": 1.6399130869137284e-05, + "loss": 0.5501, + "step": 17397 + }, + { + "epoch": 0.30062897428808405, + "grad_norm": 1.1088072203132833, + "learning_rate": 1.639870079794789e-05, + "loss": 0.7205, + "step": 17398 + }, + { + "epoch": 0.30064625380149296, + "grad_norm": 1.1308750332357076, + "learning_rate": 1.639827070671727e-05, + "loss": 0.4006, + "step": 17399 + }, + { + "epoch": 0.30066353331490187, + "grad_norm": 1.1785708396260441, + "learning_rate": 1.6397840595446788e-05, + "loss": 0.4497, + "step": 17400 + }, + { + "epoch": 0.3006808128283108, + "grad_norm": 0.8309189183036862, + "learning_rate": 1.6397410464137782e-05, + "loss": 0.3912, + "step": 17401 + }, + { + "epoch": 0.3006980923417197, + "grad_norm": 0.6773423432601408, + "learning_rate": 1.6396980312791596e-05, + "loss": 0.5736, + "step": 17402 + }, + { + "epoch": 0.30071537185512853, + "grad_norm": 1.5017039870215663, + "learning_rate": 1.6396550141409587e-05, + "loss": 0.6392, + "step": 17403 + }, + { + "epoch": 0.30073265136853744, + "grad_norm": 1.4090053750634612, + "learning_rate": 1.639611994999309e-05, + "loss": 0.5767, + "step": 17404 + }, + { + "epoch": 0.30074993088194635, + "grad_norm": 0.9445456387672667, + "learning_rate": 1.639568973854347e-05, + "loss": 0.4389, + "step": 17405 + }, + { + "epoch": 0.30076721039535526, + "grad_norm": 1.2085388048608279, + "learning_rate": 1.6395259507062054e-05, + "loss": 0.6539, + "step": 17406 + }, + { + "epoch": 0.30078448990876416, + "grad_norm": 0.8782973206227512, + "learning_rate": 1.6394829255550203e-05, + "loss": 0.4049, + "step": 17407 + }, + { + "epoch": 0.30080176942217307, + "grad_norm": 0.8712095667723275, + "learning_rate": 1.639439898400926e-05, + "loss": 0.7075, + "step": 17408 + }, + { + "epoch": 0.300819048935582, + "grad_norm": 1.039292254805304, + "learning_rate": 1.6393968692440573e-05, + "loss": 0.4858, + "step": 17409 + }, + { + "epoch": 0.3008363284489909, + "grad_norm": 0.7994836918440194, + "learning_rate": 1.6393538380845487e-05, + "loss": 0.5444, + "step": 17410 + }, + { + "epoch": 0.3008536079623998, + "grad_norm": 0.8389730352627883, + "learning_rate": 1.639310804922536e-05, + "loss": 0.431, + "step": 17411 + }, + { + "epoch": 0.3008708874758087, + "grad_norm": 0.7044038602345309, + "learning_rate": 1.639267769758153e-05, + "loss": 0.5306, + "step": 17412 + }, + { + "epoch": 0.3008881669892176, + "grad_norm": 1.0406327941052709, + "learning_rate": 1.6392247325915348e-05, + "loss": 0.5504, + "step": 17413 + }, + { + "epoch": 0.30090544650262646, + "grad_norm": 1.6761959361544259, + "learning_rate": 1.6391816934228157e-05, + "loss": 0.7055, + "step": 17414 + }, + { + "epoch": 0.30092272601603537, + "grad_norm": 1.0065511883026872, + "learning_rate": 1.6391386522521315e-05, + "loss": 0.4911, + "step": 17415 + }, + { + "epoch": 0.3009400055294443, + "grad_norm": 0.751425912960536, + "learning_rate": 1.6390956090796164e-05, + "loss": 0.5546, + "step": 17416 + }, + { + "epoch": 0.3009572850428532, + "grad_norm": 0.7273244973046514, + "learning_rate": 1.639052563905405e-05, + "loss": 0.3811, + "step": 17417 + }, + { + "epoch": 0.3009745645562621, + "grad_norm": 0.8550963584637872, + "learning_rate": 1.6390095167296326e-05, + "loss": 0.3243, + "step": 17418 + }, + { + "epoch": 0.300991844069671, + "grad_norm": 0.9995193257898267, + "learning_rate": 1.638966467552434e-05, + "loss": 0.4284, + "step": 17419 + }, + { + "epoch": 0.3010091235830799, + "grad_norm": 1.720442603456618, + "learning_rate": 1.6389234163739433e-05, + "loss": 0.4795, + "step": 17420 + }, + { + "epoch": 0.3010264030964888, + "grad_norm": 0.8337136858546199, + "learning_rate": 1.6388803631942963e-05, + "loss": 0.542, + "step": 17421 + }, + { + "epoch": 0.3010436826098977, + "grad_norm": 0.5746299589655668, + "learning_rate": 1.6388373080136273e-05, + "loss": 0.3816, + "step": 17422 + }, + { + "epoch": 0.30106096212330663, + "grad_norm": 0.895274374605064, + "learning_rate": 1.6387942508320715e-05, + "loss": 0.6671, + "step": 17423 + }, + { + "epoch": 0.3010782416367155, + "grad_norm": 0.6482958690163781, + "learning_rate": 1.6387511916497633e-05, + "loss": 0.8269, + "step": 17424 + }, + { + "epoch": 0.3010955211501244, + "grad_norm": 1.184571253337483, + "learning_rate": 1.6387081304668382e-05, + "loss": 0.6369, + "step": 17425 + }, + { + "epoch": 0.3011128006635333, + "grad_norm": 1.629328328658854, + "learning_rate": 1.6386650672834306e-05, + "loss": 0.7789, + "step": 17426 + }, + { + "epoch": 0.3011300801769422, + "grad_norm": 1.941660721364817, + "learning_rate": 1.6386220020996755e-05, + "loss": 0.538, + "step": 17427 + }, + { + "epoch": 0.3011473596903511, + "grad_norm": 0.35939689096459954, + "learning_rate": 1.6385789349157076e-05, + "loss": 0.5837, + "step": 17428 + }, + { + "epoch": 0.30116463920376, + "grad_norm": 1.5681413191889269, + "learning_rate": 1.6385358657316618e-05, + "loss": 0.5013, + "step": 17429 + }, + { + "epoch": 0.30118191871716893, + "grad_norm": 0.6287781311786672, + "learning_rate": 1.6384927945476733e-05, + "loss": 0.3191, + "step": 17430 + }, + { + "epoch": 0.30119919823057784, + "grad_norm": 0.9085378278325373, + "learning_rate": 1.638449721363877e-05, + "loss": 0.4462, + "step": 17431 + }, + { + "epoch": 0.30121647774398674, + "grad_norm": 1.4333204484751692, + "learning_rate": 1.6384066461804072e-05, + "loss": 0.3439, + "step": 17432 + }, + { + "epoch": 0.30123375725739565, + "grad_norm": 0.9910530049750362, + "learning_rate": 1.6383635689973997e-05, + "loss": 0.6474, + "step": 17433 + }, + { + "epoch": 0.30125103677080456, + "grad_norm": 0.9475679071312865, + "learning_rate": 1.6383204898149888e-05, + "loss": 0.5033, + "step": 17434 + }, + { + "epoch": 0.3012683162842134, + "grad_norm": 0.38526707837798474, + "learning_rate": 1.6382774086333095e-05, + "loss": 0.6354, + "step": 17435 + }, + { + "epoch": 0.3012855957976223, + "grad_norm": 1.2419826160179377, + "learning_rate": 1.6382343254524973e-05, + "loss": 0.4713, + "step": 17436 + }, + { + "epoch": 0.3013028753110312, + "grad_norm": 1.316326492278742, + "learning_rate": 1.6381912402726862e-05, + "loss": 0.4376, + "step": 17437 + }, + { + "epoch": 0.30132015482444013, + "grad_norm": 1.6802471706294324, + "learning_rate": 1.638148153094012e-05, + "loss": 0.5869, + "step": 17438 + }, + { + "epoch": 0.30133743433784904, + "grad_norm": 0.5267614706012341, + "learning_rate": 1.6381050639166093e-05, + "loss": 0.7527, + "step": 17439 + }, + { + "epoch": 0.30135471385125795, + "grad_norm": 1.5256880871690608, + "learning_rate": 1.6380619727406127e-05, + "loss": 0.5192, + "step": 17440 + }, + { + "epoch": 0.30137199336466686, + "grad_norm": 0.6965310345188794, + "learning_rate": 1.6380188795661575e-05, + "loss": 0.5776, + "step": 17441 + }, + { + "epoch": 0.30138927287807576, + "grad_norm": 0.7510909744449714, + "learning_rate": 1.637975784393379e-05, + "loss": 0.4056, + "step": 17442 + }, + { + "epoch": 0.3014065523914847, + "grad_norm": 0.7545257232561161, + "learning_rate": 1.6379326872224116e-05, + "loss": 0.3812, + "step": 17443 + }, + { + "epoch": 0.3014238319048936, + "grad_norm": 1.4867591625882468, + "learning_rate": 1.637889588053391e-05, + "loss": 0.464, + "step": 17444 + }, + { + "epoch": 0.3014411114183025, + "grad_norm": 0.8251272709358798, + "learning_rate": 1.637846486886451e-05, + "loss": 0.4484, + "step": 17445 + }, + { + "epoch": 0.30145839093171134, + "grad_norm": 0.6898732062259352, + "learning_rate": 1.6378033837217277e-05, + "loss": 0.4641, + "step": 17446 + }, + { + "epoch": 0.30147567044512025, + "grad_norm": 1.022739244755983, + "learning_rate": 1.6377602785593564e-05, + "loss": 0.5432, + "step": 17447 + }, + { + "epoch": 0.30149294995852916, + "grad_norm": 0.8420272916977906, + "learning_rate": 1.6377171713994707e-05, + "loss": 0.3092, + "step": 17448 + }, + { + "epoch": 0.30151022947193806, + "grad_norm": 1.049906108168355, + "learning_rate": 1.6376740622422064e-05, + "loss": 0.4034, + "step": 17449 + }, + { + "epoch": 0.30152750898534697, + "grad_norm": 0.8885243582061676, + "learning_rate": 1.637630951087698e-05, + "loss": 0.5213, + "step": 17450 + }, + { + "epoch": 0.3015447884987559, + "grad_norm": 1.0502126416520927, + "learning_rate": 1.637587837936082e-05, + "loss": 0.503, + "step": 17451 + }, + { + "epoch": 0.3015620680121648, + "grad_norm": 1.0811012204385297, + "learning_rate": 1.637544722787492e-05, + "loss": 0.5407, + "step": 17452 + }, + { + "epoch": 0.3015793475255737, + "grad_norm": 1.2117790489989386, + "learning_rate": 1.6375016056420633e-05, + "loss": 0.445, + "step": 17453 + }, + { + "epoch": 0.3015966270389826, + "grad_norm": 0.9202696232783136, + "learning_rate": 1.6374584864999313e-05, + "loss": 0.3582, + "step": 17454 + }, + { + "epoch": 0.3016139065523915, + "grad_norm": 0.4003146268706638, + "learning_rate": 1.6374153653612308e-05, + "loss": 0.5506, + "step": 17455 + }, + { + "epoch": 0.30163118606580036, + "grad_norm": 1.0016377609595342, + "learning_rate": 1.637372242226097e-05, + "loss": 0.3941, + "step": 17456 + }, + { + "epoch": 0.30164846557920927, + "grad_norm": 0.8358027301392488, + "learning_rate": 1.6373291170946648e-05, + "loss": 0.5762, + "step": 17457 + }, + { + "epoch": 0.3016657450926182, + "grad_norm": 1.1123791441459374, + "learning_rate": 1.6372859899670697e-05, + "loss": 0.3541, + "step": 17458 + }, + { + "epoch": 0.3016830246060271, + "grad_norm": 1.3425356153518153, + "learning_rate": 1.637242860843446e-05, + "loss": 0.584, + "step": 17459 + }, + { + "epoch": 0.301700304119436, + "grad_norm": 1.2694780309950564, + "learning_rate": 1.6371997297239294e-05, + "loss": 0.4746, + "step": 17460 + }, + { + "epoch": 0.3017175836328449, + "grad_norm": 0.9788648593540975, + "learning_rate": 1.637156596608655e-05, + "loss": 0.4302, + "step": 17461 + }, + { + "epoch": 0.3017348631462538, + "grad_norm": 1.1830432384908545, + "learning_rate": 1.6371134614977575e-05, + "loss": 0.4015, + "step": 17462 + }, + { + "epoch": 0.3017521426596627, + "grad_norm": 0.36525914415742267, + "learning_rate": 1.6370703243913722e-05, + "loss": 0.4445, + "step": 17463 + }, + { + "epoch": 0.3017694221730716, + "grad_norm": 1.2609995192458188, + "learning_rate": 1.6370271852896342e-05, + "loss": 0.48, + "step": 17464 + }, + { + "epoch": 0.30178670168648053, + "grad_norm": 1.124612647976228, + "learning_rate": 1.636984044192679e-05, + "loss": 0.5606, + "step": 17465 + }, + { + "epoch": 0.30180398119988944, + "grad_norm": 1.1472987283833396, + "learning_rate": 1.636940901100641e-05, + "loss": 0.4737, + "step": 17466 + }, + { + "epoch": 0.3018212607132983, + "grad_norm": 1.3024526799605138, + "learning_rate": 1.6368977560136558e-05, + "loss": 0.4173, + "step": 17467 + }, + { + "epoch": 0.3018385402267072, + "grad_norm": 0.8835597135176093, + "learning_rate": 1.6368546089318584e-05, + "loss": 0.7395, + "step": 17468 + }, + { + "epoch": 0.3018558197401161, + "grad_norm": 0.8410653494696629, + "learning_rate": 1.636811459855384e-05, + "loss": 0.555, + "step": 17469 + }, + { + "epoch": 0.301873099253525, + "grad_norm": 0.9147683600895187, + "learning_rate": 1.636768308784368e-05, + "loss": 0.5548, + "step": 17470 + }, + { + "epoch": 0.3018903787669339, + "grad_norm": 0.9543501750306108, + "learning_rate": 1.6367251557189448e-05, + "loss": 0.6674, + "step": 17471 + }, + { + "epoch": 0.3019076582803428, + "grad_norm": 0.898397590032721, + "learning_rate": 1.63668200065925e-05, + "loss": 0.4942, + "step": 17472 + }, + { + "epoch": 0.30192493779375174, + "grad_norm": 0.8855666631497033, + "learning_rate": 1.636638843605419e-05, + "loss": 0.4682, + "step": 17473 + }, + { + "epoch": 0.30194221730716064, + "grad_norm": 1.2102208412526003, + "learning_rate": 1.636595684557587e-05, + "loss": 0.4059, + "step": 17474 + }, + { + "epoch": 0.30195949682056955, + "grad_norm": 1.3997626629570459, + "learning_rate": 1.6365525235158887e-05, + "loss": 0.5181, + "step": 17475 + }, + { + "epoch": 0.30197677633397846, + "grad_norm": 1.1160655693672754, + "learning_rate": 1.6365093604804595e-05, + "loss": 0.843, + "step": 17476 + }, + { + "epoch": 0.3019940558473873, + "grad_norm": 1.065109149833049, + "learning_rate": 1.6364661954514346e-05, + "loss": 0.5232, + "step": 17477 + }, + { + "epoch": 0.3020113353607962, + "grad_norm": 0.7540783440777861, + "learning_rate": 1.6364230284289492e-05, + "loss": 0.4299, + "step": 17478 + }, + { + "epoch": 0.3020286148742051, + "grad_norm": 1.1159968273327083, + "learning_rate": 1.6363798594131385e-05, + "loss": 0.5489, + "step": 17479 + }, + { + "epoch": 0.30204589438761403, + "grad_norm": 0.6753836379966274, + "learning_rate": 1.636336688404138e-05, + "loss": 0.3452, + "step": 17480 + }, + { + "epoch": 0.30206317390102294, + "grad_norm": 1.351674309777307, + "learning_rate": 1.6362935154020823e-05, + "loss": 0.5486, + "step": 17481 + }, + { + "epoch": 0.30208045341443185, + "grad_norm": 1.3871518846757362, + "learning_rate": 1.636250340407107e-05, + "loss": 0.4203, + "step": 17482 + }, + { + "epoch": 0.30209773292784076, + "grad_norm": 0.9190104494348683, + "learning_rate": 1.6362071634193474e-05, + "loss": 0.6182, + "step": 17483 + }, + { + "epoch": 0.30211501244124966, + "grad_norm": 1.1166870754453893, + "learning_rate": 1.6361639844389384e-05, + "loss": 0.3613, + "step": 17484 + }, + { + "epoch": 0.30213229195465857, + "grad_norm": 0.5141959216938987, + "learning_rate": 1.6361208034660155e-05, + "loss": 0.8173, + "step": 17485 + }, + { + "epoch": 0.3021495714680675, + "grad_norm": 1.2678452586811766, + "learning_rate": 1.636077620500714e-05, + "loss": 0.3716, + "step": 17486 + }, + { + "epoch": 0.3021668509814764, + "grad_norm": 1.1857606502197682, + "learning_rate": 1.6360344355431688e-05, + "loss": 0.5499, + "step": 17487 + }, + { + "epoch": 0.30218413049488524, + "grad_norm": 1.1028368971174431, + "learning_rate": 1.635991248593516e-05, + "loss": 0.6398, + "step": 17488 + }, + { + "epoch": 0.30220141000829415, + "grad_norm": 1.554985925365247, + "learning_rate": 1.6359480596518898e-05, + "loss": 0.539, + "step": 17489 + }, + { + "epoch": 0.30221868952170305, + "grad_norm": 0.6578939734346104, + "learning_rate": 1.6359048687184262e-05, + "loss": 0.2935, + "step": 17490 + }, + { + "epoch": 0.30223596903511196, + "grad_norm": 0.3973591795465896, + "learning_rate": 1.63586167579326e-05, + "loss": 0.715, + "step": 17491 + }, + { + "epoch": 0.30225324854852087, + "grad_norm": 1.2558217511646412, + "learning_rate": 1.6358184808765267e-05, + "loss": 0.4076, + "step": 17492 + }, + { + "epoch": 0.3022705280619298, + "grad_norm": 1.0437121294952945, + "learning_rate": 1.6357752839683615e-05, + "loss": 0.6363, + "step": 17493 + }, + { + "epoch": 0.3022878075753387, + "grad_norm": 0.9623359812512208, + "learning_rate": 1.6357320850689e-05, + "loss": 0.5859, + "step": 17494 + }, + { + "epoch": 0.3023050870887476, + "grad_norm": 1.5387156559615833, + "learning_rate": 1.6356888841782776e-05, + "loss": 0.7156, + "step": 17495 + }, + { + "epoch": 0.3023223666021565, + "grad_norm": 0.4599296446029423, + "learning_rate": 1.635645681296629e-05, + "loss": 0.693, + "step": 17496 + }, + { + "epoch": 0.3023396461155654, + "grad_norm": 1.1204149476110972, + "learning_rate": 1.6356024764240897e-05, + "loss": 0.6494, + "step": 17497 + }, + { + "epoch": 0.3023569256289743, + "grad_norm": 0.8549793459951401, + "learning_rate": 1.6355592695607954e-05, + "loss": 0.3277, + "step": 17498 + }, + { + "epoch": 0.30237420514238317, + "grad_norm": 1.0833948298376035, + "learning_rate": 1.635516060706881e-05, + "loss": 0.5587, + "step": 17499 + }, + { + "epoch": 0.3023914846557921, + "grad_norm": 0.8074422691424357, + "learning_rate": 1.6354728498624817e-05, + "loss": 0.5025, + "step": 17500 + }, + { + "epoch": 0.302408764169201, + "grad_norm": 1.226022876883569, + "learning_rate": 1.6354296370277335e-05, + "loss": 0.7829, + "step": 17501 + }, + { + "epoch": 0.3024260436826099, + "grad_norm": 0.6890069086726195, + "learning_rate": 1.6353864222027716e-05, + "loss": 0.6006, + "step": 17502 + }, + { + "epoch": 0.3024433231960188, + "grad_norm": 0.9648344587566334, + "learning_rate": 1.635343205387731e-05, + "loss": 0.6169, + "step": 17503 + }, + { + "epoch": 0.3024606027094277, + "grad_norm": 1.2630768349204424, + "learning_rate": 1.6352999865827468e-05, + "loss": 0.4852, + "step": 17504 + }, + { + "epoch": 0.3024778822228366, + "grad_norm": 0.7364920322762892, + "learning_rate": 1.635256765787955e-05, + "loss": 0.4037, + "step": 17505 + }, + { + "epoch": 0.3024951617362455, + "grad_norm": 0.7390541275037705, + "learning_rate": 1.635213543003491e-05, + "loss": 0.6211, + "step": 17506 + }, + { + "epoch": 0.30251244124965443, + "grad_norm": 0.9153392611592307, + "learning_rate": 1.6351703182294897e-05, + "loss": 0.559, + "step": 17507 + }, + { + "epoch": 0.30252972076306334, + "grad_norm": 0.8287275017230848, + "learning_rate": 1.6351270914660866e-05, + "loss": 0.5676, + "step": 17508 + }, + { + "epoch": 0.3025470002764722, + "grad_norm": 0.7277192871385012, + "learning_rate": 1.635083862713417e-05, + "loss": 0.5318, + "step": 17509 + }, + { + "epoch": 0.3025642797898811, + "grad_norm": 0.6891673766085623, + "learning_rate": 1.635040631971617e-05, + "loss": 0.3228, + "step": 17510 + }, + { + "epoch": 0.30258155930329, + "grad_norm": 1.0337719583971543, + "learning_rate": 1.6349973992408213e-05, + "loss": 0.4891, + "step": 17511 + }, + { + "epoch": 0.3025988388166989, + "grad_norm": 0.6538478184192089, + "learning_rate": 1.6349541645211652e-05, + "loss": 0.3969, + "step": 17512 + }, + { + "epoch": 0.3026161183301078, + "grad_norm": 1.554224900984469, + "learning_rate": 1.6349109278127846e-05, + "loss": 0.5504, + "step": 17513 + }, + { + "epoch": 0.3026333978435167, + "grad_norm": 1.5720690254980612, + "learning_rate": 1.6348676891158145e-05, + "loss": 0.4324, + "step": 17514 + }, + { + "epoch": 0.30265067735692563, + "grad_norm": 1.1110226354473547, + "learning_rate": 1.634824448430391e-05, + "loss": 0.5453, + "step": 17515 + }, + { + "epoch": 0.30266795687033454, + "grad_norm": 0.9114244901044022, + "learning_rate": 1.634781205756649e-05, + "loss": 0.5048, + "step": 17516 + }, + { + "epoch": 0.30268523638374345, + "grad_norm": 0.8648688049247988, + "learning_rate": 1.6347379610947238e-05, + "loss": 0.5722, + "step": 17517 + }, + { + "epoch": 0.30270251589715236, + "grad_norm": 1.3501010612322892, + "learning_rate": 1.6346947144447513e-05, + "loss": 0.6029, + "step": 17518 + }, + { + "epoch": 0.30271979541056127, + "grad_norm": 0.42518937431416, + "learning_rate": 1.6346514658068664e-05, + "loss": 0.658, + "step": 17519 + }, + { + "epoch": 0.3027370749239701, + "grad_norm": 1.2901147310086682, + "learning_rate": 1.634608215181205e-05, + "loss": 0.5107, + "step": 17520 + }, + { + "epoch": 0.302754354437379, + "grad_norm": 1.2268190250783608, + "learning_rate": 1.6345649625679025e-05, + "loss": 0.4303, + "step": 17521 + }, + { + "epoch": 0.30277163395078793, + "grad_norm": 1.468500469955968, + "learning_rate": 1.6345217079670944e-05, + "loss": 0.4774, + "step": 17522 + }, + { + "epoch": 0.30278891346419684, + "grad_norm": 1.1847970121872462, + "learning_rate": 1.634478451378916e-05, + "loss": 0.5181, + "step": 17523 + }, + { + "epoch": 0.30280619297760575, + "grad_norm": 0.9286818277277306, + "learning_rate": 1.6344351928035028e-05, + "loss": 0.58, + "step": 17524 + }, + { + "epoch": 0.30282347249101466, + "grad_norm": 1.1081124899329526, + "learning_rate": 1.63439193224099e-05, + "loss": 0.5397, + "step": 17525 + }, + { + "epoch": 0.30284075200442356, + "grad_norm": 1.5242574201780439, + "learning_rate": 1.634348669691514e-05, + "loss": 0.3556, + "step": 17526 + }, + { + "epoch": 0.30285803151783247, + "grad_norm": 0.6299512108521707, + "learning_rate": 1.63430540515521e-05, + "loss": 0.6947, + "step": 17527 + }, + { + "epoch": 0.3028753110312414, + "grad_norm": 0.9606806951589946, + "learning_rate": 1.6342621386322128e-05, + "loss": 0.6437, + "step": 17528 + }, + { + "epoch": 0.3028925905446503, + "grad_norm": 0.8374529581059788, + "learning_rate": 1.634218870122658e-05, + "loss": 0.5932, + "step": 17529 + }, + { + "epoch": 0.30290987005805914, + "grad_norm": 1.3019191887415509, + "learning_rate": 1.6341755996266827e-05, + "loss": 0.4221, + "step": 17530 + }, + { + "epoch": 0.30292714957146805, + "grad_norm": 1.225996340134685, + "learning_rate": 1.6341323271444202e-05, + "loss": 0.4545, + "step": 17531 + }, + { + "epoch": 0.30294442908487695, + "grad_norm": 1.253413641857932, + "learning_rate": 1.6340890526760073e-05, + "loss": 0.6484, + "step": 17532 + }, + { + "epoch": 0.30296170859828586, + "grad_norm": 1.1837391501250922, + "learning_rate": 1.6340457762215797e-05, + "loss": 0.5216, + "step": 17533 + }, + { + "epoch": 0.30297898811169477, + "grad_norm": 0.7930500645156702, + "learning_rate": 1.634002497781272e-05, + "loss": 0.4632, + "step": 17534 + }, + { + "epoch": 0.3029962676251037, + "grad_norm": 1.2924562306550385, + "learning_rate": 1.6339592173552206e-05, + "loss": 0.5754, + "step": 17535 + }, + { + "epoch": 0.3030135471385126, + "grad_norm": 0.985684337307427, + "learning_rate": 1.6339159349435604e-05, + "loss": 0.5269, + "step": 17536 + }, + { + "epoch": 0.3030308266519215, + "grad_norm": 0.5774536901646269, + "learning_rate": 1.6338726505464276e-05, + "loss": 0.5565, + "step": 17537 + }, + { + "epoch": 0.3030481061653304, + "grad_norm": 1.2717244059620743, + "learning_rate": 1.6338293641639573e-05, + "loss": 0.6297, + "step": 17538 + }, + { + "epoch": 0.3030653856787393, + "grad_norm": 0.7583948911833945, + "learning_rate": 1.6337860757962854e-05, + "loss": 0.5083, + "step": 17539 + }, + { + "epoch": 0.3030826651921482, + "grad_norm": 0.5296489846663899, + "learning_rate": 1.6337427854435474e-05, + "loss": 0.4452, + "step": 17540 + }, + { + "epoch": 0.30309994470555707, + "grad_norm": 1.1079033472563447, + "learning_rate": 1.6336994931058788e-05, + "loss": 0.3562, + "step": 17541 + }, + { + "epoch": 0.303117224218966, + "grad_norm": 0.6804747216040474, + "learning_rate": 1.6336561987834155e-05, + "loss": 0.4269, + "step": 17542 + }, + { + "epoch": 0.3031345037323749, + "grad_norm": 1.198081755423432, + "learning_rate": 1.6336129024762924e-05, + "loss": 0.584, + "step": 17543 + }, + { + "epoch": 0.3031517832457838, + "grad_norm": 0.7076005019766637, + "learning_rate": 1.6335696041846456e-05, + "loss": 0.582, + "step": 17544 + }, + { + "epoch": 0.3031690627591927, + "grad_norm": 0.9773412427767378, + "learning_rate": 1.6335263039086106e-05, + "loss": 0.6393, + "step": 17545 + }, + { + "epoch": 0.3031863422726016, + "grad_norm": 0.9763459739514555, + "learning_rate": 1.6334830016483234e-05, + "loss": 0.5254, + "step": 17546 + }, + { + "epoch": 0.3032036217860105, + "grad_norm": 1.25619665080796, + "learning_rate": 1.633439697403919e-05, + "loss": 0.5991, + "step": 17547 + }, + { + "epoch": 0.3032209012994194, + "grad_norm": 1.2876761781411834, + "learning_rate": 1.6333963911755336e-05, + "loss": 0.4416, + "step": 17548 + }, + { + "epoch": 0.30323818081282833, + "grad_norm": 0.5560655676102769, + "learning_rate": 1.633353082963302e-05, + "loss": 0.8445, + "step": 17549 + }, + { + "epoch": 0.30325546032623724, + "grad_norm": 1.3593614101264053, + "learning_rate": 1.633309772767361e-05, + "loss": 0.4983, + "step": 17550 + }, + { + "epoch": 0.3032727398396461, + "grad_norm": 1.2602952377279886, + "learning_rate": 1.6332664605878458e-05, + "loss": 0.4319, + "step": 17551 + }, + { + "epoch": 0.303290019353055, + "grad_norm": 1.326537131089692, + "learning_rate": 1.6332231464248917e-05, + "loss": 0.5346, + "step": 17552 + }, + { + "epoch": 0.3033072988664639, + "grad_norm": 0.9822795667630017, + "learning_rate": 1.633179830278634e-05, + "loss": 0.5752, + "step": 17553 + }, + { + "epoch": 0.3033245783798728, + "grad_norm": 0.675229199777019, + "learning_rate": 1.6331365121492096e-05, + "loss": 0.4254, + "step": 17554 + }, + { + "epoch": 0.3033418578932817, + "grad_norm": 1.296651644237458, + "learning_rate": 1.6330931920367533e-05, + "loss": 0.5696, + "step": 17555 + }, + { + "epoch": 0.3033591374066906, + "grad_norm": 1.1765911030434888, + "learning_rate": 1.633049869941401e-05, + "loss": 0.7474, + "step": 17556 + }, + { + "epoch": 0.30337641692009953, + "grad_norm": 1.4684211193158732, + "learning_rate": 1.6330065458632883e-05, + "loss": 0.5388, + "step": 17557 + }, + { + "epoch": 0.30339369643350844, + "grad_norm": 0.9366468199657982, + "learning_rate": 1.6329632198025513e-05, + "loss": 0.6096, + "step": 17558 + }, + { + "epoch": 0.30341097594691735, + "grad_norm": 1.2562489527197827, + "learning_rate": 1.6329198917593256e-05, + "loss": 0.6635, + "step": 17559 + }, + { + "epoch": 0.30342825546032626, + "grad_norm": 0.6699365212353293, + "learning_rate": 1.632876561733746e-05, + "loss": 0.3978, + "step": 17560 + }, + { + "epoch": 0.30344553497373516, + "grad_norm": 0.8620681125305327, + "learning_rate": 1.6328332297259493e-05, + "loss": 0.374, + "step": 17561 + }, + { + "epoch": 0.303462814487144, + "grad_norm": 1.1053458146107566, + "learning_rate": 1.6327898957360708e-05, + "loss": 0.62, + "step": 17562 + }, + { + "epoch": 0.3034800940005529, + "grad_norm": 1.4799940183068108, + "learning_rate": 1.6327465597642463e-05, + "loss": 0.3346, + "step": 17563 + }, + { + "epoch": 0.30349737351396183, + "grad_norm": 1.484484656880501, + "learning_rate": 1.6327032218106114e-05, + "loss": 0.5379, + "step": 17564 + }, + { + "epoch": 0.30351465302737074, + "grad_norm": 0.8306879926096037, + "learning_rate": 1.632659881875302e-05, + "loss": 0.2776, + "step": 17565 + }, + { + "epoch": 0.30353193254077965, + "grad_norm": 1.6917835396226353, + "learning_rate": 1.6326165399584537e-05, + "loss": 0.5881, + "step": 17566 + }, + { + "epoch": 0.30354921205418856, + "grad_norm": 1.169911299312229, + "learning_rate": 1.6325731960602024e-05, + "loss": 0.535, + "step": 17567 + }, + { + "epoch": 0.30356649156759746, + "grad_norm": 2.8485251303285635, + "learning_rate": 1.6325298501806837e-05, + "loss": 0.591, + "step": 17568 + }, + { + "epoch": 0.30358377108100637, + "grad_norm": 0.9581287648398407, + "learning_rate": 1.6324865023200335e-05, + "loss": 0.6157, + "step": 17569 + }, + { + "epoch": 0.3036010505944153, + "grad_norm": 1.305884046541763, + "learning_rate": 1.632443152478387e-05, + "loss": 0.3557, + "step": 17570 + }, + { + "epoch": 0.3036183301078242, + "grad_norm": 0.4117293365128586, + "learning_rate": 1.6323998006558813e-05, + "loss": 0.4794, + "step": 17571 + }, + { + "epoch": 0.3036356096212331, + "grad_norm": 0.8719062820260107, + "learning_rate": 1.6323564468526512e-05, + "loss": 0.6049, + "step": 17572 + }, + { + "epoch": 0.30365288913464195, + "grad_norm": 0.7363000118414142, + "learning_rate": 1.6323130910688322e-05, + "loss": 0.3802, + "step": 17573 + }, + { + "epoch": 0.30367016864805085, + "grad_norm": 1.4821550447709329, + "learning_rate": 1.6322697333045607e-05, + "loss": 0.4942, + "step": 17574 + }, + { + "epoch": 0.30368744816145976, + "grad_norm": 0.8427614476371096, + "learning_rate": 1.6322263735599725e-05, + "loss": 0.5231, + "step": 17575 + }, + { + "epoch": 0.30370472767486867, + "grad_norm": 1.1422008035950446, + "learning_rate": 1.632183011835203e-05, + "loss": 0.5268, + "step": 17576 + }, + { + "epoch": 0.3037220071882776, + "grad_norm": 1.4078011769261911, + "learning_rate": 1.6321396481303885e-05, + "loss": 0.4717, + "step": 17577 + }, + { + "epoch": 0.3037392867016865, + "grad_norm": 1.494673513303057, + "learning_rate": 1.6320962824456647e-05, + "loss": 0.673, + "step": 17578 + }, + { + "epoch": 0.3037565662150954, + "grad_norm": 0.8979618165123999, + "learning_rate": 1.632052914781167e-05, + "loss": 0.4966, + "step": 17579 + }, + { + "epoch": 0.3037738457285043, + "grad_norm": 1.611129468606265, + "learning_rate": 1.6320095451370318e-05, + "loss": 0.5398, + "step": 17580 + }, + { + "epoch": 0.3037911252419132, + "grad_norm": 1.144648482186094, + "learning_rate": 1.6319661735133948e-05, + "loss": 0.4835, + "step": 17581 + }, + { + "epoch": 0.3038084047553221, + "grad_norm": 0.4419885481100324, + "learning_rate": 1.631922799910391e-05, + "loss": 0.4942, + "step": 17582 + }, + { + "epoch": 0.30382568426873097, + "grad_norm": 1.0282687881108536, + "learning_rate": 1.6318794243281578e-05, + "loss": 0.5471, + "step": 17583 + }, + { + "epoch": 0.3038429637821399, + "grad_norm": 0.7229497782577511, + "learning_rate": 1.63183604676683e-05, + "loss": 0.3182, + "step": 17584 + }, + { + "epoch": 0.3038602432955488, + "grad_norm": 1.2966443703329578, + "learning_rate": 1.6317926672265435e-05, + "loss": 0.4086, + "step": 17585 + }, + { + "epoch": 0.3038775228089577, + "grad_norm": 0.7805476104875837, + "learning_rate": 1.6317492857074346e-05, + "loss": 0.5215, + "step": 17586 + }, + { + "epoch": 0.3038948023223666, + "grad_norm": 1.1302412482951134, + "learning_rate": 1.6317059022096384e-05, + "loss": 0.5759, + "step": 17587 + }, + { + "epoch": 0.3039120818357755, + "grad_norm": 1.3884548771271534, + "learning_rate": 1.6316625167332915e-05, + "loss": 0.4848, + "step": 17588 + }, + { + "epoch": 0.3039293613491844, + "grad_norm": 1.241261099776799, + "learning_rate": 1.63161912927853e-05, + "loss": 0.4651, + "step": 17589 + }, + { + "epoch": 0.3039466408625933, + "grad_norm": 1.1847465841382856, + "learning_rate": 1.631575739845489e-05, + "loss": 0.5578, + "step": 17590 + }, + { + "epoch": 0.30396392037600223, + "grad_norm": 0.5370170260665469, + "learning_rate": 1.6315323484343047e-05, + "loss": 0.4643, + "step": 17591 + }, + { + "epoch": 0.30398119988941114, + "grad_norm": 1.0267363552441042, + "learning_rate": 1.6314889550451136e-05, + "loss": 0.596, + "step": 17592 + }, + { + "epoch": 0.30399847940282004, + "grad_norm": 0.7382999507420425, + "learning_rate": 1.6314455596780507e-05, + "loss": 0.5068, + "step": 17593 + }, + { + "epoch": 0.3040157589162289, + "grad_norm": 0.5398226331666117, + "learning_rate": 1.6314021623332522e-05, + "loss": 0.285, + "step": 17594 + }, + { + "epoch": 0.3040330384296378, + "grad_norm": 1.0112704631200549, + "learning_rate": 1.6313587630108545e-05, + "loss": 0.3948, + "step": 17595 + }, + { + "epoch": 0.3040503179430467, + "grad_norm": 0.9138793476161886, + "learning_rate": 1.6313153617109933e-05, + "loss": 0.5623, + "step": 17596 + }, + { + "epoch": 0.3040675974564556, + "grad_norm": 0.9957272667066868, + "learning_rate": 1.6312719584338038e-05, + "loss": 0.4261, + "step": 17597 + }, + { + "epoch": 0.3040848769698645, + "grad_norm": 1.0986617092546545, + "learning_rate": 1.631228553179423e-05, + "loss": 0.3951, + "step": 17598 + }, + { + "epoch": 0.30410215648327343, + "grad_norm": 0.8497085932890899, + "learning_rate": 1.631185145947986e-05, + "loss": 0.5945, + "step": 17599 + }, + { + "epoch": 0.30411943599668234, + "grad_norm": 0.7464811447336872, + "learning_rate": 1.6311417367396293e-05, + "loss": 0.5734, + "step": 17600 + }, + { + "epoch": 0.30413671551009125, + "grad_norm": 0.992366820928322, + "learning_rate": 1.631098325554489e-05, + "loss": 0.4487, + "step": 17601 + }, + { + "epoch": 0.30415399502350016, + "grad_norm": 1.0430969570120634, + "learning_rate": 1.631054912392701e-05, + "loss": 0.4092, + "step": 17602 + }, + { + "epoch": 0.30417127453690906, + "grad_norm": 1.0588290126710875, + "learning_rate": 1.6310114972544006e-05, + "loss": 0.5826, + "step": 17603 + }, + { + "epoch": 0.3041885540503179, + "grad_norm": 0.9311914966669659, + "learning_rate": 1.630968080139724e-05, + "loss": 0.4915, + "step": 17604 + }, + { + "epoch": 0.3042058335637268, + "grad_norm": 1.7041773363657315, + "learning_rate": 1.6309246610488082e-05, + "loss": 0.5128, + "step": 17605 + }, + { + "epoch": 0.30422311307713573, + "grad_norm": 1.4323675540052054, + "learning_rate": 1.6308812399817876e-05, + "loss": 0.5007, + "step": 17606 + }, + { + "epoch": 0.30424039259054464, + "grad_norm": 0.9801119526693082, + "learning_rate": 1.6308378169387996e-05, + "loss": 0.4188, + "step": 17607 + }, + { + "epoch": 0.30425767210395355, + "grad_norm": 1.2439511643516687, + "learning_rate": 1.630794391919979e-05, + "loss": 0.5669, + "step": 17608 + }, + { + "epoch": 0.30427495161736245, + "grad_norm": 1.3188071704172553, + "learning_rate": 1.630750964925463e-05, + "loss": 0.4398, + "step": 17609 + }, + { + "epoch": 0.30429223113077136, + "grad_norm": 0.9888425664576108, + "learning_rate": 1.630707535955387e-05, + "loss": 0.5055, + "step": 17610 + }, + { + "epoch": 0.30430951064418027, + "grad_norm": 1.4461486035150424, + "learning_rate": 1.6306641050098868e-05, + "loss": 0.571, + "step": 17611 + }, + { + "epoch": 0.3043267901575892, + "grad_norm": 1.5713078211484948, + "learning_rate": 1.6306206720890988e-05, + "loss": 0.6205, + "step": 17612 + }, + { + "epoch": 0.3043440696709981, + "grad_norm": 1.3610273813127287, + "learning_rate": 1.6305772371931594e-05, + "loss": 0.5824, + "step": 17613 + }, + { + "epoch": 0.304361349184407, + "grad_norm": 0.8082407679631627, + "learning_rate": 1.6305338003222034e-05, + "loss": 0.4759, + "step": 17614 + }, + { + "epoch": 0.30437862869781585, + "grad_norm": 0.9502768296643272, + "learning_rate": 1.6304903614763677e-05, + "loss": 0.3021, + "step": 17615 + }, + { + "epoch": 0.30439590821122475, + "grad_norm": 0.8845757264630418, + "learning_rate": 1.6304469206557886e-05, + "loss": 0.6128, + "step": 17616 + }, + { + "epoch": 0.30441318772463366, + "grad_norm": 0.6493612088536552, + "learning_rate": 1.6304034778606015e-05, + "loss": 0.4396, + "step": 17617 + }, + { + "epoch": 0.30443046723804257, + "grad_norm": 1.1993958554718909, + "learning_rate": 1.6303600330909434e-05, + "loss": 0.6709, + "step": 17618 + }, + { + "epoch": 0.3044477467514515, + "grad_norm": 0.5308862755728733, + "learning_rate": 1.6303165863469494e-05, + "loss": 0.5657, + "step": 17619 + }, + { + "epoch": 0.3044650262648604, + "grad_norm": 0.8777494724834533, + "learning_rate": 1.630273137628756e-05, + "loss": 0.4548, + "step": 17620 + }, + { + "epoch": 0.3044823057782693, + "grad_norm": 0.6984088661811886, + "learning_rate": 1.6302296869364992e-05, + "loss": 0.4898, + "step": 17621 + }, + { + "epoch": 0.3044995852916782, + "grad_norm": 1.0550955179577406, + "learning_rate": 1.630186234270315e-05, + "loss": 0.4918, + "step": 17622 + }, + { + "epoch": 0.3045168648050871, + "grad_norm": 1.3210489577112208, + "learning_rate": 1.63014277963034e-05, + "loss": 0.501, + "step": 17623 + }, + { + "epoch": 0.304534144318496, + "grad_norm": 0.9283246487867571, + "learning_rate": 1.6300993230167095e-05, + "loss": 0.4569, + "step": 17624 + }, + { + "epoch": 0.30455142383190487, + "grad_norm": 1.164726274054453, + "learning_rate": 1.63005586442956e-05, + "loss": 0.6279, + "step": 17625 + }, + { + "epoch": 0.3045687033453138, + "grad_norm": 0.6296272716528944, + "learning_rate": 1.630012403869028e-05, + "loss": 0.4165, + "step": 17626 + }, + { + "epoch": 0.3045859828587227, + "grad_norm": 1.2131804102340666, + "learning_rate": 1.6299689413352487e-05, + "loss": 0.4052, + "step": 17627 + }, + { + "epoch": 0.3046032623721316, + "grad_norm": 1.1706534632143846, + "learning_rate": 1.6299254768283595e-05, + "loss": 0.5555, + "step": 17628 + }, + { + "epoch": 0.3046205418855405, + "grad_norm": 0.6652308983643053, + "learning_rate": 1.6298820103484955e-05, + "loss": 0.3781, + "step": 17629 + }, + { + "epoch": 0.3046378213989494, + "grad_norm": 1.598375163110005, + "learning_rate": 1.629838541895793e-05, + "loss": 0.502, + "step": 17630 + }, + { + "epoch": 0.3046551009123583, + "grad_norm": 1.26909214336249, + "learning_rate": 1.629795071470389e-05, + "loss": 0.4765, + "step": 17631 + }, + { + "epoch": 0.3046723804257672, + "grad_norm": 1.5059852969901384, + "learning_rate": 1.629751599072418e-05, + "loss": 0.4214, + "step": 17632 + }, + { + "epoch": 0.3046896599391761, + "grad_norm": 1.066858184829033, + "learning_rate": 1.6297081247020183e-05, + "loss": 0.5278, + "step": 17633 + }, + { + "epoch": 0.30470693945258503, + "grad_norm": 1.1901184509481173, + "learning_rate": 1.6296646483593243e-05, + "loss": 0.5933, + "step": 17634 + }, + { + "epoch": 0.30472421896599394, + "grad_norm": 0.7329995952342235, + "learning_rate": 1.6296211700444726e-05, + "loss": 0.6093, + "step": 17635 + }, + { + "epoch": 0.3047414984794028, + "grad_norm": 1.1385268402887296, + "learning_rate": 1.6295776897575995e-05, + "loss": 0.5934, + "step": 17636 + }, + { + "epoch": 0.3047587779928117, + "grad_norm": 0.93681339155284, + "learning_rate": 1.6295342074988414e-05, + "loss": 0.596, + "step": 17637 + }, + { + "epoch": 0.3047760575062206, + "grad_norm": 0.788576852804126, + "learning_rate": 1.6294907232683343e-05, + "loss": 0.4444, + "step": 17638 + }, + { + "epoch": 0.3047933370196295, + "grad_norm": 0.7613041677699373, + "learning_rate": 1.6294472370662144e-05, + "loss": 0.3987, + "step": 17639 + }, + { + "epoch": 0.3048106165330384, + "grad_norm": 0.8162838477590241, + "learning_rate": 1.629403748892618e-05, + "loss": 0.581, + "step": 17640 + }, + { + "epoch": 0.30482789604644733, + "grad_norm": 0.8348878465406219, + "learning_rate": 1.6293602587476812e-05, + "loss": 0.3727, + "step": 17641 + }, + { + "epoch": 0.30484517555985624, + "grad_norm": 0.6706088849491787, + "learning_rate": 1.62931676663154e-05, + "loss": 0.4235, + "step": 17642 + }, + { + "epoch": 0.30486245507326515, + "grad_norm": 1.2903183084389247, + "learning_rate": 1.629273272544331e-05, + "loss": 0.5615, + "step": 17643 + }, + { + "epoch": 0.30487973458667406, + "grad_norm": 1.115802344724427, + "learning_rate": 1.6292297764861907e-05, + "loss": 0.5218, + "step": 17644 + }, + { + "epoch": 0.30489701410008296, + "grad_norm": 1.1268423641042247, + "learning_rate": 1.6291862784572546e-05, + "loss": 0.6172, + "step": 17645 + }, + { + "epoch": 0.30491429361349187, + "grad_norm": 1.968089348743592, + "learning_rate": 1.6291427784576592e-05, + "loss": 0.6142, + "step": 17646 + }, + { + "epoch": 0.3049315731269007, + "grad_norm": 1.0586722256819914, + "learning_rate": 1.6290992764875405e-05, + "loss": 0.6276, + "step": 17647 + }, + { + "epoch": 0.30494885264030963, + "grad_norm": 0.9163201927399243, + "learning_rate": 1.6290557725470355e-05, + "loss": 0.4645, + "step": 17648 + }, + { + "epoch": 0.30496613215371854, + "grad_norm": 0.619415768423083, + "learning_rate": 1.62901226663628e-05, + "loss": 0.4577, + "step": 17649 + }, + { + "epoch": 0.30498341166712745, + "grad_norm": 0.8322005072524106, + "learning_rate": 1.6289687587554103e-05, + "loss": 0.4679, + "step": 17650 + }, + { + "epoch": 0.30500069118053635, + "grad_norm": 0.7649533776970768, + "learning_rate": 1.6289252489045625e-05, + "loss": 0.5119, + "step": 17651 + }, + { + "epoch": 0.30501797069394526, + "grad_norm": 0.964245627869302, + "learning_rate": 1.628881737083873e-05, + "loss": 0.416, + "step": 17652 + }, + { + "epoch": 0.30503525020735417, + "grad_norm": 1.0443584594621467, + "learning_rate": 1.6288382232934784e-05, + "loss": 0.5609, + "step": 17653 + }, + { + "epoch": 0.3050525297207631, + "grad_norm": 0.8572507651670209, + "learning_rate": 1.6287947075335143e-05, + "loss": 0.3803, + "step": 17654 + }, + { + "epoch": 0.305069809234172, + "grad_norm": 1.6339338153373093, + "learning_rate": 1.6287511898041176e-05, + "loss": 0.6764, + "step": 17655 + }, + { + "epoch": 0.3050870887475809, + "grad_norm": 0.4487400773330741, + "learning_rate": 1.6287076701054242e-05, + "loss": 0.6158, + "step": 17656 + }, + { + "epoch": 0.30510436826098974, + "grad_norm": 0.688302459855739, + "learning_rate": 1.628664148437571e-05, + "loss": 0.6714, + "step": 17657 + }, + { + "epoch": 0.30512164777439865, + "grad_norm": 0.9780341110027352, + "learning_rate": 1.6286206248006934e-05, + "loss": 0.4302, + "step": 17658 + }, + { + "epoch": 0.30513892728780756, + "grad_norm": 1.0078252080496481, + "learning_rate": 1.6285770991949287e-05, + "loss": 0.4768, + "step": 17659 + }, + { + "epoch": 0.30515620680121647, + "grad_norm": 1.0920023838223303, + "learning_rate": 1.628533571620412e-05, + "loss": 0.34, + "step": 17660 + }, + { + "epoch": 0.3051734863146254, + "grad_norm": 1.392642117122366, + "learning_rate": 1.6284900420772813e-05, + "loss": 0.6153, + "step": 17661 + }, + { + "epoch": 0.3051907658280343, + "grad_norm": 1.0727884461015573, + "learning_rate": 1.6284465105656715e-05, + "loss": 0.678, + "step": 17662 + }, + { + "epoch": 0.3052080453414432, + "grad_norm": 1.0793765728426223, + "learning_rate": 1.6284029770857195e-05, + "loss": 0.6573, + "step": 17663 + }, + { + "epoch": 0.3052253248548521, + "grad_norm": 1.3275298237974864, + "learning_rate": 1.6283594416375618e-05, + "loss": 0.6173, + "step": 17664 + }, + { + "epoch": 0.305242604368261, + "grad_norm": 0.7485283421269802, + "learning_rate": 1.628315904221335e-05, + "loss": 0.4993, + "step": 17665 + }, + { + "epoch": 0.3052598838816699, + "grad_norm": 0.8912444342292788, + "learning_rate": 1.6282723648371743e-05, + "loss": 0.5646, + "step": 17666 + }, + { + "epoch": 0.3052771633950788, + "grad_norm": 0.753674053827744, + "learning_rate": 1.6282288234852172e-05, + "loss": 0.5076, + "step": 17667 + }, + { + "epoch": 0.3052944429084877, + "grad_norm": 1.3856501864745416, + "learning_rate": 1.6281852801655996e-05, + "loss": 0.4919, + "step": 17668 + }, + { + "epoch": 0.3053117224218966, + "grad_norm": 0.8489358841219206, + "learning_rate": 1.6281417348784576e-05, + "loss": 0.3779, + "step": 17669 + }, + { + "epoch": 0.3053290019353055, + "grad_norm": 0.5708277225326102, + "learning_rate": 1.628098187623928e-05, + "loss": 0.5011, + "step": 17670 + }, + { + "epoch": 0.3053462814487144, + "grad_norm": 0.748462452071948, + "learning_rate": 1.6280546384021476e-05, + "loss": 0.462, + "step": 17671 + }, + { + "epoch": 0.3053635609621233, + "grad_norm": 0.9706675247003724, + "learning_rate": 1.6280110872132523e-05, + "loss": 0.3649, + "step": 17672 + }, + { + "epoch": 0.3053808404755322, + "grad_norm": 0.8110651065410116, + "learning_rate": 1.6279675340573782e-05, + "loss": 0.3905, + "step": 17673 + }, + { + "epoch": 0.3053981199889411, + "grad_norm": 1.0919994040930385, + "learning_rate": 1.6279239789346624e-05, + "loss": 0.4522, + "step": 17674 + }, + { + "epoch": 0.30541539950235, + "grad_norm": 0.682381087064296, + "learning_rate": 1.6278804218452408e-05, + "loss": 0.6354, + "step": 17675 + }, + { + "epoch": 0.30543267901575893, + "grad_norm": 0.9344289952875467, + "learning_rate": 1.62783686278925e-05, + "loss": 0.379, + "step": 17676 + }, + { + "epoch": 0.30544995852916784, + "grad_norm": 2.0645579236771403, + "learning_rate": 1.6277933017668263e-05, + "loss": 0.5185, + "step": 17677 + }, + { + "epoch": 0.3054672380425767, + "grad_norm": 1.06208060994131, + "learning_rate": 1.6277497387781065e-05, + "loss": 0.5209, + "step": 17678 + }, + { + "epoch": 0.3054845175559856, + "grad_norm": 0.7619462523348725, + "learning_rate": 1.6277061738232267e-05, + "loss": 0.5913, + "step": 17679 + }, + { + "epoch": 0.3055017970693945, + "grad_norm": 0.8555967970880369, + "learning_rate": 1.6276626069023235e-05, + "loss": 0.4113, + "step": 17680 + }, + { + "epoch": 0.3055190765828034, + "grad_norm": 0.4246008727972863, + "learning_rate": 1.6276190380155334e-05, + "loss": 0.5674, + "step": 17681 + }, + { + "epoch": 0.3055363560962123, + "grad_norm": 1.2892755083343228, + "learning_rate": 1.6275754671629925e-05, + "loss": 0.6664, + "step": 17682 + }, + { + "epoch": 0.30555363560962123, + "grad_norm": 0.9454470913348924, + "learning_rate": 1.6275318943448376e-05, + "loss": 0.5661, + "step": 17683 + }, + { + "epoch": 0.30557091512303014, + "grad_norm": 1.2344245368843612, + "learning_rate": 1.6274883195612053e-05, + "loss": 0.6398, + "step": 17684 + }, + { + "epoch": 0.30558819463643905, + "grad_norm": 1.930762039717557, + "learning_rate": 1.627444742812232e-05, + "loss": 0.466, + "step": 17685 + }, + { + "epoch": 0.30560547414984796, + "grad_norm": 0.9808379081592455, + "learning_rate": 1.6274011640980538e-05, + "loss": 0.5186, + "step": 17686 + }, + { + "epoch": 0.30562275366325686, + "grad_norm": 0.40873056634139016, + "learning_rate": 1.6273575834188077e-05, + "loss": 0.7723, + "step": 17687 + }, + { + "epoch": 0.30564003317666577, + "grad_norm": 1.633786800327449, + "learning_rate": 1.62731400077463e-05, + "loss": 0.4954, + "step": 17688 + }, + { + "epoch": 0.3056573126900746, + "grad_norm": 1.1945738389099787, + "learning_rate": 1.627270416165657e-05, + "loss": 0.4704, + "step": 17689 + }, + { + "epoch": 0.30567459220348353, + "grad_norm": 0.8475952396229697, + "learning_rate": 1.6272268295920253e-05, + "loss": 0.5506, + "step": 17690 + }, + { + "epoch": 0.30569187171689244, + "grad_norm": 0.836052535803253, + "learning_rate": 1.6271832410538716e-05, + "loss": 0.6545, + "step": 17691 + }, + { + "epoch": 0.30570915123030135, + "grad_norm": 1.2394387822993937, + "learning_rate": 1.6271396505513322e-05, + "loss": 0.4054, + "step": 17692 + }, + { + "epoch": 0.30572643074371025, + "grad_norm": 1.4566743379388885, + "learning_rate": 1.6270960580845438e-05, + "loss": 0.5745, + "step": 17693 + }, + { + "epoch": 0.30574371025711916, + "grad_norm": 1.257710593642285, + "learning_rate": 1.6270524636536432e-05, + "loss": 0.4986, + "step": 17694 + }, + { + "epoch": 0.30576098977052807, + "grad_norm": 1.1183449714183173, + "learning_rate": 1.6270088672587666e-05, + "loss": 0.4993, + "step": 17695 + }, + { + "epoch": 0.305778269283937, + "grad_norm": 0.9456038515204066, + "learning_rate": 1.6269652689000502e-05, + "loss": 0.6331, + "step": 17696 + }, + { + "epoch": 0.3057955487973459, + "grad_norm": 1.0289852514287012, + "learning_rate": 1.6269216685776313e-05, + "loss": 0.7331, + "step": 17697 + }, + { + "epoch": 0.3058128283107548, + "grad_norm": 1.3280551480908143, + "learning_rate": 1.626878066291646e-05, + "loss": 0.6475, + "step": 17698 + }, + { + "epoch": 0.30583010782416364, + "grad_norm": 0.9712763349320067, + "learning_rate": 1.6268344620422308e-05, + "loss": 0.4053, + "step": 17699 + }, + { + "epoch": 0.30584738733757255, + "grad_norm": 0.9326364979534607, + "learning_rate": 1.6267908558295224e-05, + "loss": 0.3422, + "step": 17700 + }, + { + "epoch": 0.30586466685098146, + "grad_norm": 0.7448735765878374, + "learning_rate": 1.6267472476536575e-05, + "loss": 0.4324, + "step": 17701 + }, + { + "epoch": 0.30588194636439037, + "grad_norm": 0.9604697745065833, + "learning_rate": 1.6267036375147728e-05, + "loss": 0.5488, + "step": 17702 + }, + { + "epoch": 0.3058992258777993, + "grad_norm": 0.9910064679879055, + "learning_rate": 1.626660025413004e-05, + "loss": 0.6112, + "step": 17703 + }, + { + "epoch": 0.3059165053912082, + "grad_norm": 0.7559228415426495, + "learning_rate": 1.626616411348489e-05, + "loss": 0.4523, + "step": 17704 + }, + { + "epoch": 0.3059337849046171, + "grad_norm": 1.0447052698242634, + "learning_rate": 1.626572795321364e-05, + "loss": 0.5909, + "step": 17705 + }, + { + "epoch": 0.305951064418026, + "grad_norm": 1.4910631719088705, + "learning_rate": 1.626529177331765e-05, + "loss": 0.6738, + "step": 17706 + }, + { + "epoch": 0.3059683439314349, + "grad_norm": 1.0208947922042437, + "learning_rate": 1.6264855573798287e-05, + "loss": 0.4892, + "step": 17707 + }, + { + "epoch": 0.3059856234448438, + "grad_norm": 1.2362357251652656, + "learning_rate": 1.6264419354656925e-05, + "loss": 0.5553, + "step": 17708 + }, + { + "epoch": 0.3060029029582527, + "grad_norm": 0.9189430692849647, + "learning_rate": 1.6263983115894923e-05, + "loss": 0.5696, + "step": 17709 + }, + { + "epoch": 0.3060201824716616, + "grad_norm": 1.025615985524474, + "learning_rate": 1.626354685751365e-05, + "loss": 0.547, + "step": 17710 + }, + { + "epoch": 0.3060374619850705, + "grad_norm": 1.2034586443453068, + "learning_rate": 1.6263110579514475e-05, + "loss": 0.5371, + "step": 17711 + }, + { + "epoch": 0.3060547414984794, + "grad_norm": 0.9551964764026325, + "learning_rate": 1.6262674281898757e-05, + "loss": 0.5997, + "step": 17712 + }, + { + "epoch": 0.3060720210118883, + "grad_norm": 1.4821140584406018, + "learning_rate": 1.626223796466787e-05, + "loss": 0.5286, + "step": 17713 + }, + { + "epoch": 0.3060893005252972, + "grad_norm": 1.2149053419524103, + "learning_rate": 1.626180162782318e-05, + "loss": 0.521, + "step": 17714 + }, + { + "epoch": 0.3061065800387061, + "grad_norm": 0.8954790199224676, + "learning_rate": 1.626136527136605e-05, + "loss": 0.4639, + "step": 17715 + }, + { + "epoch": 0.306123859552115, + "grad_norm": 1.1418002755776089, + "learning_rate": 1.6260928895297846e-05, + "loss": 0.3979, + "step": 17716 + }, + { + "epoch": 0.3061411390655239, + "grad_norm": 1.0306732710866076, + "learning_rate": 1.6260492499619937e-05, + "loss": 0.5724, + "step": 17717 + }, + { + "epoch": 0.30615841857893283, + "grad_norm": 1.1250366043079465, + "learning_rate": 1.6260056084333694e-05, + "loss": 0.4656, + "step": 17718 + }, + { + "epoch": 0.30617569809234174, + "grad_norm": 1.3301840937295748, + "learning_rate": 1.6259619649440474e-05, + "loss": 0.6011, + "step": 17719 + }, + { + "epoch": 0.30619297760575065, + "grad_norm": 1.3673243629960612, + "learning_rate": 1.625918319494165e-05, + "loss": 0.5649, + "step": 17720 + }, + { + "epoch": 0.3062102571191595, + "grad_norm": 1.5527732951961764, + "learning_rate": 1.625874672083859e-05, + "loss": 0.4445, + "step": 17721 + }, + { + "epoch": 0.3062275366325684, + "grad_norm": 1.2578408977303808, + "learning_rate": 1.625831022713266e-05, + "loss": 0.3175, + "step": 17722 + }, + { + "epoch": 0.3062448161459773, + "grad_norm": 0.7782524614191777, + "learning_rate": 1.6257873713825227e-05, + "loss": 0.4954, + "step": 17723 + }, + { + "epoch": 0.3062620956593862, + "grad_norm": 1.3767405539883568, + "learning_rate": 1.6257437180917658e-05, + "loss": 0.5013, + "step": 17724 + }, + { + "epoch": 0.30627937517279513, + "grad_norm": 1.5216098538067486, + "learning_rate": 1.625700062841132e-05, + "loss": 0.413, + "step": 17725 + }, + { + "epoch": 0.30629665468620404, + "grad_norm": 0.8230781398593612, + "learning_rate": 1.6256564056307583e-05, + "loss": 0.5129, + "step": 17726 + }, + { + "epoch": 0.30631393419961295, + "grad_norm": 1.4849306968021472, + "learning_rate": 1.6256127464607806e-05, + "loss": 0.4648, + "step": 17727 + }, + { + "epoch": 0.30633121371302185, + "grad_norm": 0.8219555940189727, + "learning_rate": 1.6255690853313368e-05, + "loss": 0.4521, + "step": 17728 + }, + { + "epoch": 0.30634849322643076, + "grad_norm": 0.6685214185622137, + "learning_rate": 1.625525422242563e-05, + "loss": 0.3403, + "step": 17729 + }, + { + "epoch": 0.30636577273983967, + "grad_norm": 1.084913586683744, + "learning_rate": 1.625481757194596e-05, + "loss": 0.4026, + "step": 17730 + }, + { + "epoch": 0.3063830522532485, + "grad_norm": 0.8657509963695132, + "learning_rate": 1.6254380901875722e-05, + "loss": 0.3989, + "step": 17731 + }, + { + "epoch": 0.30640033176665743, + "grad_norm": 1.1028239416158863, + "learning_rate": 1.625394421221629e-05, + "loss": 0.5053, + "step": 17732 + }, + { + "epoch": 0.30641761128006634, + "grad_norm": 0.9262827475734494, + "learning_rate": 1.625350750296903e-05, + "loss": 0.6081, + "step": 17733 + }, + { + "epoch": 0.30643489079347525, + "grad_norm": 0.8806952283695652, + "learning_rate": 1.625307077413531e-05, + "loss": 0.5484, + "step": 17734 + }, + { + "epoch": 0.30645217030688415, + "grad_norm": 1.3671361235927622, + "learning_rate": 1.6252634025716497e-05, + "loss": 0.6409, + "step": 17735 + }, + { + "epoch": 0.30646944982029306, + "grad_norm": 0.4132617046228935, + "learning_rate": 1.625219725771396e-05, + "loss": 0.6691, + "step": 17736 + }, + { + "epoch": 0.30648672933370197, + "grad_norm": 1.6803218584653707, + "learning_rate": 1.6251760470129062e-05, + "loss": 0.487, + "step": 17737 + }, + { + "epoch": 0.3065040088471109, + "grad_norm": 1.045445434703112, + "learning_rate": 1.6251323662963182e-05, + "loss": 0.2918, + "step": 17738 + }, + { + "epoch": 0.3065212883605198, + "grad_norm": 1.2189932732293742, + "learning_rate": 1.6250886836217674e-05, + "loss": 0.4505, + "step": 17739 + }, + { + "epoch": 0.3065385678739287, + "grad_norm": 1.4426821545141801, + "learning_rate": 1.6250449989893917e-05, + "loss": 0.6504, + "step": 17740 + }, + { + "epoch": 0.3065558473873376, + "grad_norm": 0.9066804086627985, + "learning_rate": 1.6250013123993275e-05, + "loss": 0.3189, + "step": 17741 + }, + { + "epoch": 0.30657312690074645, + "grad_norm": 0.715934670376151, + "learning_rate": 1.6249576238517114e-05, + "loss": 0.4783, + "step": 17742 + }, + { + "epoch": 0.30659040641415536, + "grad_norm": 1.099608532875196, + "learning_rate": 1.624913933346681e-05, + "loss": 0.361, + "step": 17743 + }, + { + "epoch": 0.30660768592756427, + "grad_norm": 0.8479846260581804, + "learning_rate": 1.6248702408843723e-05, + "loss": 0.3879, + "step": 17744 + }, + { + "epoch": 0.3066249654409732, + "grad_norm": 0.7758666072541459, + "learning_rate": 1.624826546464923e-05, + "loss": 0.4438, + "step": 17745 + }, + { + "epoch": 0.3066422449543821, + "grad_norm": 0.899358559668658, + "learning_rate": 1.624782850088469e-05, + "loss": 0.4702, + "step": 17746 + }, + { + "epoch": 0.306659524467791, + "grad_norm": 1.2910460529122358, + "learning_rate": 1.6247391517551478e-05, + "loss": 0.5635, + "step": 17747 + }, + { + "epoch": 0.3066768039811999, + "grad_norm": 0.6865761357589646, + "learning_rate": 1.624695451465096e-05, + "loss": 0.5643, + "step": 17748 + }, + { + "epoch": 0.3066940834946088, + "grad_norm": 1.0881845500641067, + "learning_rate": 1.6246517492184507e-05, + "loss": 0.5364, + "step": 17749 + }, + { + "epoch": 0.3067113630080177, + "grad_norm": 0.9140634516264856, + "learning_rate": 1.6246080450153487e-05, + "loss": 0.607, + "step": 17750 + }, + { + "epoch": 0.3067286425214266, + "grad_norm": 1.0553944453575326, + "learning_rate": 1.6245643388559268e-05, + "loss": 0.4394, + "step": 17751 + }, + { + "epoch": 0.30674592203483547, + "grad_norm": 0.8368359226907928, + "learning_rate": 1.624520630740322e-05, + "loss": 0.3214, + "step": 17752 + }, + { + "epoch": 0.3067632015482444, + "grad_norm": 0.7713018002094756, + "learning_rate": 1.6244769206686708e-05, + "loss": 0.5263, + "step": 17753 + }, + { + "epoch": 0.3067804810616533, + "grad_norm": 1.106031821596216, + "learning_rate": 1.6244332086411107e-05, + "loss": 0.5414, + "step": 17754 + }, + { + "epoch": 0.3067977605750622, + "grad_norm": 1.0155535025646711, + "learning_rate": 1.624389494657778e-05, + "loss": 0.3627, + "step": 17755 + }, + { + "epoch": 0.3068150400884711, + "grad_norm": 1.1304976690715007, + "learning_rate": 1.6243457787188105e-05, + "loss": 0.4812, + "step": 17756 + }, + { + "epoch": 0.30683231960188, + "grad_norm": 1.0604062782828785, + "learning_rate": 1.624302060824344e-05, + "loss": 0.4199, + "step": 17757 + }, + { + "epoch": 0.3068495991152889, + "grad_norm": 1.3279803200977611, + "learning_rate": 1.6242583409745164e-05, + "loss": 0.3956, + "step": 17758 + }, + { + "epoch": 0.3068668786286978, + "grad_norm": 0.6858054929795492, + "learning_rate": 1.6242146191694643e-05, + "loss": 0.4626, + "step": 17759 + }, + { + "epoch": 0.30688415814210673, + "grad_norm": 0.7791074406726584, + "learning_rate": 1.6241708954093242e-05, + "loss": 0.3326, + "step": 17760 + }, + { + "epoch": 0.30690143765551564, + "grad_norm": 0.5320962780146381, + "learning_rate": 1.6241271696942335e-05, + "loss": 0.7248, + "step": 17761 + }, + { + "epoch": 0.30691871716892455, + "grad_norm": 0.916466011518348, + "learning_rate": 1.6240834420243293e-05, + "loss": 0.3474, + "step": 17762 + }, + { + "epoch": 0.3069359966823334, + "grad_norm": 0.8929307483880761, + "learning_rate": 1.624039712399748e-05, + "loss": 0.5364, + "step": 17763 + }, + { + "epoch": 0.3069532761957423, + "grad_norm": 1.1250868942376286, + "learning_rate": 1.6239959808206273e-05, + "loss": 0.3619, + "step": 17764 + }, + { + "epoch": 0.3069705557091512, + "grad_norm": 0.7508153024600617, + "learning_rate": 1.6239522472871033e-05, + "loss": 0.4662, + "step": 17765 + }, + { + "epoch": 0.3069878352225601, + "grad_norm": 1.449709345341424, + "learning_rate": 1.6239085117993137e-05, + "loss": 0.763, + "step": 17766 + }, + { + "epoch": 0.30700511473596903, + "grad_norm": 2.2440725850545573, + "learning_rate": 1.6238647743573957e-05, + "loss": 0.7184, + "step": 17767 + }, + { + "epoch": 0.30702239424937794, + "grad_norm": 0.6291897098708934, + "learning_rate": 1.6238210349614852e-05, + "loss": 0.4178, + "step": 17768 + }, + { + "epoch": 0.30703967376278685, + "grad_norm": 0.8553701760790282, + "learning_rate": 1.62377729361172e-05, + "loss": 0.6829, + "step": 17769 + }, + { + "epoch": 0.30705695327619575, + "grad_norm": 1.1604921452409271, + "learning_rate": 1.623733550308237e-05, + "loss": 0.4806, + "step": 17770 + }, + { + "epoch": 0.30707423278960466, + "grad_norm": 0.7901434421781579, + "learning_rate": 1.623689805051173e-05, + "loss": 0.5171, + "step": 17771 + }, + { + "epoch": 0.30709151230301357, + "grad_norm": 1.2118899264569258, + "learning_rate": 1.623646057840665e-05, + "loss": 0.4071, + "step": 17772 + }, + { + "epoch": 0.3071087918164225, + "grad_norm": 0.6307340421833211, + "learning_rate": 1.62360230867685e-05, + "loss": 0.3695, + "step": 17773 + }, + { + "epoch": 0.30712607132983133, + "grad_norm": 3.7485787154881853, + "learning_rate": 1.6235585575598657e-05, + "loss": 0.6588, + "step": 17774 + }, + { + "epoch": 0.30714335084324024, + "grad_norm": 0.6824987246139559, + "learning_rate": 1.6235148044898483e-05, + "loss": 0.4057, + "step": 17775 + }, + { + "epoch": 0.30716063035664914, + "grad_norm": 1.065807555903142, + "learning_rate": 1.623471049466935e-05, + "loss": 0.3659, + "step": 17776 + }, + { + "epoch": 0.30717790987005805, + "grad_norm": 0.9049659501536297, + "learning_rate": 1.6234272924912638e-05, + "loss": 0.39, + "step": 17777 + }, + { + "epoch": 0.30719518938346696, + "grad_norm": 0.7288947417185048, + "learning_rate": 1.6233835335629703e-05, + "loss": 0.4161, + "step": 17778 + }, + { + "epoch": 0.30721246889687587, + "grad_norm": 1.0507077021606357, + "learning_rate": 1.623339772682192e-05, + "loss": 0.6256, + "step": 17779 + }, + { + "epoch": 0.3072297484102848, + "grad_norm": 1.1732243685850943, + "learning_rate": 1.6232960098490666e-05, + "loss": 0.6613, + "step": 17780 + }, + { + "epoch": 0.3072470279236937, + "grad_norm": 0.8692708460014835, + "learning_rate": 1.6232522450637304e-05, + "loss": 0.4749, + "step": 17781 + }, + { + "epoch": 0.3072643074371026, + "grad_norm": 1.069578617747468, + "learning_rate": 1.6232084783263207e-05, + "loss": 0.4529, + "step": 17782 + }, + { + "epoch": 0.3072815869505115, + "grad_norm": 1.0836665270367587, + "learning_rate": 1.623164709636975e-05, + "loss": 0.5643, + "step": 17783 + }, + { + "epoch": 0.30729886646392035, + "grad_norm": 0.9938679884863171, + "learning_rate": 1.62312093899583e-05, + "loss": 0.5415, + "step": 17784 + }, + { + "epoch": 0.30731614597732926, + "grad_norm": 0.6584651893413394, + "learning_rate": 1.623077166403023e-05, + "loss": 0.4852, + "step": 17785 + }, + { + "epoch": 0.30733342549073817, + "grad_norm": 1.0292380596703345, + "learning_rate": 1.6230333918586906e-05, + "loss": 0.4841, + "step": 17786 + }, + { + "epoch": 0.3073507050041471, + "grad_norm": 1.0709489517909079, + "learning_rate": 1.6229896153629707e-05, + "loss": 0.2646, + "step": 17787 + }, + { + "epoch": 0.307367984517556, + "grad_norm": 1.2654769264406727, + "learning_rate": 1.6229458369159997e-05, + "loss": 0.425, + "step": 17788 + }, + { + "epoch": 0.3073852640309649, + "grad_norm": 0.8828315612647123, + "learning_rate": 1.622902056517915e-05, + "loss": 0.46, + "step": 17789 + }, + { + "epoch": 0.3074025435443738, + "grad_norm": 1.358540103129944, + "learning_rate": 1.622858274168854e-05, + "loss": 0.5501, + "step": 17790 + }, + { + "epoch": 0.3074198230577827, + "grad_norm": 0.9105663147052612, + "learning_rate": 1.622814489868953e-05, + "loss": 0.4705, + "step": 17791 + }, + { + "epoch": 0.3074371025711916, + "grad_norm": 0.7770170115266134, + "learning_rate": 1.6227707036183502e-05, + "loss": 0.5266, + "step": 17792 + }, + { + "epoch": 0.3074543820846005, + "grad_norm": 0.7488110886057706, + "learning_rate": 1.622726915417182e-05, + "loss": 0.5006, + "step": 17793 + }, + { + "epoch": 0.3074716615980094, + "grad_norm": 0.8084827069915606, + "learning_rate": 1.622683125265586e-05, + "loss": 0.4568, + "step": 17794 + }, + { + "epoch": 0.3074889411114183, + "grad_norm": 1.2673544471449634, + "learning_rate": 1.622639333163699e-05, + "loss": 0.4596, + "step": 17795 + }, + { + "epoch": 0.3075062206248272, + "grad_norm": 1.0738365350078174, + "learning_rate": 1.6225955391116583e-05, + "loss": 0.3676, + "step": 17796 + }, + { + "epoch": 0.3075235001382361, + "grad_norm": 0.8836721524507045, + "learning_rate": 1.6225517431096008e-05, + "loss": 0.5407, + "step": 17797 + }, + { + "epoch": 0.307540779651645, + "grad_norm": 1.3976839848402025, + "learning_rate": 1.6225079451576643e-05, + "loss": 0.741, + "step": 17798 + }, + { + "epoch": 0.3075580591650539, + "grad_norm": 1.0175475010374395, + "learning_rate": 1.6224641452559854e-05, + "loss": 0.7708, + "step": 17799 + }, + { + "epoch": 0.3075753386784628, + "grad_norm": 0.5060549619716489, + "learning_rate": 1.6224203434047012e-05, + "loss": 0.7564, + "step": 17800 + }, + { + "epoch": 0.3075926181918717, + "grad_norm": 0.8127691337805778, + "learning_rate": 1.6223765396039497e-05, + "loss": 0.5102, + "step": 17801 + }, + { + "epoch": 0.30760989770528063, + "grad_norm": 0.7369729661801175, + "learning_rate": 1.6223327338538674e-05, + "loss": 0.4653, + "step": 17802 + }, + { + "epoch": 0.30762717721868954, + "grad_norm": 0.9985919060260662, + "learning_rate": 1.6222889261545915e-05, + "loss": 0.4515, + "step": 17803 + }, + { + "epoch": 0.30764445673209845, + "grad_norm": 0.61451501462957, + "learning_rate": 1.6222451165062596e-05, + "loss": 0.3603, + "step": 17804 + }, + { + "epoch": 0.3076617362455073, + "grad_norm": 0.6518277714596954, + "learning_rate": 1.6222013049090086e-05, + "loss": 0.3026, + "step": 17805 + }, + { + "epoch": 0.3076790157589162, + "grad_norm": 1.0975045700150707, + "learning_rate": 1.6221574913629757e-05, + "loss": 0.3263, + "step": 17806 + }, + { + "epoch": 0.3076962952723251, + "grad_norm": 1.0687892721590757, + "learning_rate": 1.6221136758682984e-05, + "loss": 0.4835, + "step": 17807 + }, + { + "epoch": 0.307713574785734, + "grad_norm": 1.1772688577183523, + "learning_rate": 1.6220698584251143e-05, + "loss": 0.4581, + "step": 17808 + }, + { + "epoch": 0.30773085429914293, + "grad_norm": 1.1301660307201133, + "learning_rate": 1.6220260390335596e-05, + "loss": 0.4015, + "step": 17809 + }, + { + "epoch": 0.30774813381255184, + "grad_norm": 1.1062851622095873, + "learning_rate": 1.6219822176937722e-05, + "loss": 0.3877, + "step": 17810 + }, + { + "epoch": 0.30776541332596075, + "grad_norm": 1.2372882394719225, + "learning_rate": 1.621938394405889e-05, + "loss": 0.528, + "step": 17811 + }, + { + "epoch": 0.30778269283936965, + "grad_norm": 0.5408079090176267, + "learning_rate": 1.6218945691700476e-05, + "loss": 0.7697, + "step": 17812 + }, + { + "epoch": 0.30779997235277856, + "grad_norm": 1.3234013536978715, + "learning_rate": 1.621850741986385e-05, + "loss": 0.4675, + "step": 17813 + }, + { + "epoch": 0.30781725186618747, + "grad_norm": 1.8235611513926038, + "learning_rate": 1.621806912855039e-05, + "loss": 0.6306, + "step": 17814 + }, + { + "epoch": 0.3078345313795964, + "grad_norm": 0.7862817137944146, + "learning_rate": 1.621763081776146e-05, + "loss": 0.5226, + "step": 17815 + }, + { + "epoch": 0.30785181089300523, + "grad_norm": 0.9763358504769051, + "learning_rate": 1.6217192487498445e-05, + "loss": 0.7521, + "step": 17816 + }, + { + "epoch": 0.30786909040641414, + "grad_norm": 0.6995364934749917, + "learning_rate": 1.6216754137762703e-05, + "loss": 0.3947, + "step": 17817 + }, + { + "epoch": 0.30788636991982304, + "grad_norm": 1.9109447272932982, + "learning_rate": 1.621631576855562e-05, + "loss": 0.5961, + "step": 17818 + }, + { + "epoch": 0.30790364943323195, + "grad_norm": 0.8460117344405417, + "learning_rate": 1.621587737987856e-05, + "loss": 0.4629, + "step": 17819 + }, + { + "epoch": 0.30792092894664086, + "grad_norm": 0.7267709475429951, + "learning_rate": 1.6215438971732905e-05, + "loss": 0.4163, + "step": 17820 + }, + { + "epoch": 0.30793820846004977, + "grad_norm": 0.9200797698244527, + "learning_rate": 1.621500054412002e-05, + "loss": 0.4755, + "step": 17821 + }, + { + "epoch": 0.3079554879734587, + "grad_norm": 1.016302141064899, + "learning_rate": 1.6214562097041277e-05, + "loss": 0.48, + "step": 17822 + }, + { + "epoch": 0.3079727674868676, + "grad_norm": 1.0992028296567822, + "learning_rate": 1.6214123630498055e-05, + "loss": 0.4427, + "step": 17823 + }, + { + "epoch": 0.3079900470002765, + "grad_norm": 1.4396954588571074, + "learning_rate": 1.6213685144491728e-05, + "loss": 0.4402, + "step": 17824 + }, + { + "epoch": 0.3080073265136854, + "grad_norm": 1.3737484605262187, + "learning_rate": 1.6213246639023666e-05, + "loss": 0.4357, + "step": 17825 + }, + { + "epoch": 0.30802460602709425, + "grad_norm": 0.9063755775371757, + "learning_rate": 1.6212808114095243e-05, + "loss": 0.6954, + "step": 17826 + }, + { + "epoch": 0.30804188554050316, + "grad_norm": 1.4341130447663295, + "learning_rate": 1.6212369569707834e-05, + "loss": 0.6536, + "step": 17827 + }, + { + "epoch": 0.30805916505391207, + "grad_norm": 1.117069001154784, + "learning_rate": 1.6211931005862808e-05, + "loss": 0.532, + "step": 17828 + }, + { + "epoch": 0.308076444567321, + "grad_norm": 0.9937707246784164, + "learning_rate": 1.6211492422561543e-05, + "loss": 0.606, + "step": 17829 + }, + { + "epoch": 0.3080937240807299, + "grad_norm": 0.8540408449089835, + "learning_rate": 1.6211053819805415e-05, + "loss": 0.4774, + "step": 17830 + }, + { + "epoch": 0.3081110035941388, + "grad_norm": 1.4047079527958062, + "learning_rate": 1.6210615197595793e-05, + "loss": 0.4634, + "step": 17831 + }, + { + "epoch": 0.3081282831075477, + "grad_norm": 1.3363532403083733, + "learning_rate": 1.621017655593405e-05, + "loss": 0.3258, + "step": 17832 + }, + { + "epoch": 0.3081455626209566, + "grad_norm": 1.0434834875507843, + "learning_rate": 1.6209737894821563e-05, + "loss": 0.6911, + "step": 17833 + }, + { + "epoch": 0.3081628421343655, + "grad_norm": 1.5160242595348294, + "learning_rate": 1.6209299214259706e-05, + "loss": 0.3817, + "step": 17834 + }, + { + "epoch": 0.3081801216477744, + "grad_norm": 0.9954302222756591, + "learning_rate": 1.620886051424985e-05, + "loss": 0.5484, + "step": 17835 + }, + { + "epoch": 0.3081974011611833, + "grad_norm": 1.729354624027233, + "learning_rate": 1.6208421794793376e-05, + "loss": 0.3965, + "step": 17836 + }, + { + "epoch": 0.3082146806745922, + "grad_norm": 0.6636540418421885, + "learning_rate": 1.6207983055891647e-05, + "loss": 0.6094, + "step": 17837 + }, + { + "epoch": 0.3082319601880011, + "grad_norm": 1.2946514294710445, + "learning_rate": 1.6207544297546046e-05, + "loss": 0.5833, + "step": 17838 + }, + { + "epoch": 0.30824923970141, + "grad_norm": 0.7853600016825493, + "learning_rate": 1.6207105519757942e-05, + "loss": 0.616, + "step": 17839 + }, + { + "epoch": 0.3082665192148189, + "grad_norm": 1.359697067977732, + "learning_rate": 1.6206666722528715e-05, + "loss": 0.4616, + "step": 17840 + }, + { + "epoch": 0.3082837987282278, + "grad_norm": 0.7488765593319585, + "learning_rate": 1.6206227905859736e-05, + "loss": 0.4848, + "step": 17841 + }, + { + "epoch": 0.3083010782416367, + "grad_norm": 1.1551115419951252, + "learning_rate": 1.620578906975238e-05, + "loss": 0.4081, + "step": 17842 + }, + { + "epoch": 0.3083183577550456, + "grad_norm": 1.5077972703371105, + "learning_rate": 1.620535021420802e-05, + "loss": 0.3884, + "step": 17843 + }, + { + "epoch": 0.30833563726845453, + "grad_norm": 1.1262486300242995, + "learning_rate": 1.620491133922803e-05, + "loss": 0.4206, + "step": 17844 + }, + { + "epoch": 0.30835291678186344, + "grad_norm": 1.2906175768079455, + "learning_rate": 1.620447244481379e-05, + "loss": 0.4516, + "step": 17845 + }, + { + "epoch": 0.30837019629527235, + "grad_norm": 0.8471802458231831, + "learning_rate": 1.620403353096667e-05, + "loss": 0.343, + "step": 17846 + }, + { + "epoch": 0.30838747580868126, + "grad_norm": 1.0194819553065528, + "learning_rate": 1.6203594597688042e-05, + "loss": 0.4703, + "step": 17847 + }, + { + "epoch": 0.3084047553220901, + "grad_norm": 0.7430817059801499, + "learning_rate": 1.6203155644979283e-05, + "loss": 0.6465, + "step": 17848 + }, + { + "epoch": 0.308422034835499, + "grad_norm": 1.175686269216393, + "learning_rate": 1.6202716672841777e-05, + "loss": 0.6078, + "step": 17849 + }, + { + "epoch": 0.3084393143489079, + "grad_norm": 1.097145902548682, + "learning_rate": 1.6202277681276886e-05, + "loss": 0.3923, + "step": 17850 + }, + { + "epoch": 0.30845659386231683, + "grad_norm": 1.2090387197005976, + "learning_rate": 1.620183867028599e-05, + "loss": 0.452, + "step": 17851 + }, + { + "epoch": 0.30847387337572574, + "grad_norm": 1.1462314838674021, + "learning_rate": 1.620139963987046e-05, + "loss": 0.447, + "step": 17852 + }, + { + "epoch": 0.30849115288913465, + "grad_norm": 0.9688034199244185, + "learning_rate": 1.620096059003168e-05, + "loss": 0.3756, + "step": 17853 + }, + { + "epoch": 0.30850843240254355, + "grad_norm": 0.735059871487954, + "learning_rate": 1.620052152077102e-05, + "loss": 0.3975, + "step": 17854 + }, + { + "epoch": 0.30852571191595246, + "grad_norm": 1.8619715580585285, + "learning_rate": 1.6200082432089855e-05, + "loss": 0.733, + "step": 17855 + }, + { + "epoch": 0.30854299142936137, + "grad_norm": 1.1554693767993212, + "learning_rate": 1.619964332398956e-05, + "loss": 0.5678, + "step": 17856 + }, + { + "epoch": 0.3085602709427703, + "grad_norm": 0.42414284984606254, + "learning_rate": 1.619920419647151e-05, + "loss": 0.669, + "step": 17857 + }, + { + "epoch": 0.30857755045617913, + "grad_norm": 1.4561397602676203, + "learning_rate": 1.6198765049537085e-05, + "loss": 0.4857, + "step": 17858 + }, + { + "epoch": 0.30859482996958804, + "grad_norm": 1.0669532297920856, + "learning_rate": 1.6198325883187656e-05, + "loss": 0.4278, + "step": 17859 + }, + { + "epoch": 0.30861210948299694, + "grad_norm": 1.1207747542529953, + "learning_rate": 1.6197886697424595e-05, + "loss": 0.4968, + "step": 17860 + }, + { + "epoch": 0.30862938899640585, + "grad_norm": 0.8462912196508848, + "learning_rate": 1.6197447492249284e-05, + "loss": 0.462, + "step": 17861 + }, + { + "epoch": 0.30864666850981476, + "grad_norm": 1.2246373023798214, + "learning_rate": 1.6197008267663095e-05, + "loss": 0.4643, + "step": 17862 + }, + { + "epoch": 0.30866394802322367, + "grad_norm": 0.8593177214061242, + "learning_rate": 1.619656902366741e-05, + "loss": 0.3581, + "step": 17863 + }, + { + "epoch": 0.3086812275366326, + "grad_norm": 0.8052849700159759, + "learning_rate": 1.61961297602636e-05, + "loss": 0.7197, + "step": 17864 + }, + { + "epoch": 0.3086985070500415, + "grad_norm": 0.5349572625608184, + "learning_rate": 1.6195690477453035e-05, + "loss": 0.6725, + "step": 17865 + }, + { + "epoch": 0.3087157865634504, + "grad_norm": 0.8940120398755045, + "learning_rate": 1.61952511752371e-05, + "loss": 0.5389, + "step": 17866 + }, + { + "epoch": 0.3087330660768593, + "grad_norm": 1.180584178347893, + "learning_rate": 1.6194811853617168e-05, + "loss": 0.6321, + "step": 17867 + }, + { + "epoch": 0.3087503455902682, + "grad_norm": 0.9921067794861206, + "learning_rate": 1.619437251259461e-05, + "loss": 0.4639, + "step": 17868 + }, + { + "epoch": 0.30876762510367706, + "grad_norm": 1.1484160156683791, + "learning_rate": 1.6193933152170812e-05, + "loss": 0.4081, + "step": 17869 + }, + { + "epoch": 0.30878490461708596, + "grad_norm": 1.1862209956723584, + "learning_rate": 1.6193493772347143e-05, + "loss": 0.6121, + "step": 17870 + }, + { + "epoch": 0.3088021841304949, + "grad_norm": 0.9347243452544468, + "learning_rate": 1.619305437312498e-05, + "loss": 0.355, + "step": 17871 + }, + { + "epoch": 0.3088194636439038, + "grad_norm": 0.9396168160496255, + "learning_rate": 1.61926149545057e-05, + "loss": 0.4302, + "step": 17872 + }, + { + "epoch": 0.3088367431573127, + "grad_norm": 0.9114779265829352, + "learning_rate": 1.6192175516490678e-05, + "loss": 0.3079, + "step": 17873 + }, + { + "epoch": 0.3088540226707216, + "grad_norm": 0.4696974149806278, + "learning_rate": 1.6191736059081295e-05, + "loss": 0.6402, + "step": 17874 + }, + { + "epoch": 0.3088713021841305, + "grad_norm": 0.6973246496219749, + "learning_rate": 1.6191296582278923e-05, + "loss": 0.4802, + "step": 17875 + }, + { + "epoch": 0.3088885816975394, + "grad_norm": 0.558545775508904, + "learning_rate": 1.6190857086084938e-05, + "loss": 0.6729, + "step": 17876 + }, + { + "epoch": 0.3089058612109483, + "grad_norm": 0.8381444232922719, + "learning_rate": 1.619041757050072e-05, + "loss": 0.4617, + "step": 17877 + }, + { + "epoch": 0.3089231407243572, + "grad_norm": 0.8821832166697166, + "learning_rate": 1.618997803552764e-05, + "loss": 0.3183, + "step": 17878 + }, + { + "epoch": 0.3089404202377661, + "grad_norm": 0.8137942691798569, + "learning_rate": 1.6189538481167083e-05, + "loss": 0.4775, + "step": 17879 + }, + { + "epoch": 0.308957699751175, + "grad_norm": 1.0404380558513886, + "learning_rate": 1.6189098907420418e-05, + "loss": 0.3975, + "step": 17880 + }, + { + "epoch": 0.3089749792645839, + "grad_norm": 0.9153353412057089, + "learning_rate": 1.6188659314289028e-05, + "loss": 0.4374, + "step": 17881 + }, + { + "epoch": 0.3089922587779928, + "grad_norm": 1.0139971943288526, + "learning_rate": 1.6188219701774285e-05, + "loss": 0.4889, + "step": 17882 + }, + { + "epoch": 0.3090095382914017, + "grad_norm": 0.8899005273839494, + "learning_rate": 1.6187780069877565e-05, + "loss": 0.7546, + "step": 17883 + }, + { + "epoch": 0.3090268178048106, + "grad_norm": 1.1266851955418449, + "learning_rate": 1.6187340418600253e-05, + "loss": 0.422, + "step": 17884 + }, + { + "epoch": 0.3090440973182195, + "grad_norm": 0.7897988176592355, + "learning_rate": 1.6186900747943715e-05, + "loss": 0.5048, + "step": 17885 + }, + { + "epoch": 0.30906137683162843, + "grad_norm": 1.9193461028366348, + "learning_rate": 1.6186461057909337e-05, + "loss": 0.3491, + "step": 17886 + }, + { + "epoch": 0.30907865634503734, + "grad_norm": 1.0508227564107733, + "learning_rate": 1.6186021348498494e-05, + "loss": 0.572, + "step": 17887 + }, + { + "epoch": 0.30909593585844625, + "grad_norm": 1.1070807108632144, + "learning_rate": 1.6185581619712558e-05, + "loss": 0.6561, + "step": 17888 + }, + { + "epoch": 0.30911321537185515, + "grad_norm": 1.628897666145265, + "learning_rate": 1.6185141871552912e-05, + "loss": 0.5314, + "step": 17889 + }, + { + "epoch": 0.309130494885264, + "grad_norm": 0.8075566134143622, + "learning_rate": 1.6184702104020935e-05, + "loss": 0.5156, + "step": 17890 + }, + { + "epoch": 0.3091477743986729, + "grad_norm": 1.400100940595674, + "learning_rate": 1.6184262317117995e-05, + "loss": 0.3571, + "step": 17891 + }, + { + "epoch": 0.3091650539120818, + "grad_norm": 0.9859992805236696, + "learning_rate": 1.6183822510845478e-05, + "loss": 0.5845, + "step": 17892 + }, + { + "epoch": 0.30918233342549073, + "grad_norm": 1.5139908973528384, + "learning_rate": 1.6183382685204764e-05, + "loss": 0.4986, + "step": 17893 + }, + { + "epoch": 0.30919961293889964, + "grad_norm": 0.38107383769402614, + "learning_rate": 1.6182942840197217e-05, + "loss": 0.4712, + "step": 17894 + }, + { + "epoch": 0.30921689245230854, + "grad_norm": 0.6045171895872912, + "learning_rate": 1.6182502975824227e-05, + "loss": 0.3004, + "step": 17895 + }, + { + "epoch": 0.30923417196571745, + "grad_norm": 0.8726019486925637, + "learning_rate": 1.6182063092087166e-05, + "loss": 0.4099, + "step": 17896 + }, + { + "epoch": 0.30925145147912636, + "grad_norm": 1.406593289203663, + "learning_rate": 1.618162318898742e-05, + "loss": 0.6482, + "step": 17897 + }, + { + "epoch": 0.30926873099253527, + "grad_norm": 0.5092532215607334, + "learning_rate": 1.618118326652635e-05, + "loss": 0.4438, + "step": 17898 + }, + { + "epoch": 0.3092860105059442, + "grad_norm": 1.1586161156870383, + "learning_rate": 1.6180743324705354e-05, + "loss": 0.4128, + "step": 17899 + }, + { + "epoch": 0.309303290019353, + "grad_norm": 1.3440707575891933, + "learning_rate": 1.61803033635258e-05, + "loss": 0.5813, + "step": 17900 + }, + { + "epoch": 0.30932056953276194, + "grad_norm": 1.2951592664493832, + "learning_rate": 1.617986338298906e-05, + "loss": 0.6179, + "step": 17901 + }, + { + "epoch": 0.30933784904617084, + "grad_norm": 0.7752290709390083, + "learning_rate": 1.617942338309652e-05, + "loss": 0.4114, + "step": 17902 + }, + { + "epoch": 0.30935512855957975, + "grad_norm": 0.9842503581720707, + "learning_rate": 1.6178983363849557e-05, + "loss": 0.3325, + "step": 17903 + }, + { + "epoch": 0.30937240807298866, + "grad_norm": 1.0376664425126023, + "learning_rate": 1.617854332524955e-05, + "loss": 0.454, + "step": 17904 + }, + { + "epoch": 0.30938968758639757, + "grad_norm": 0.8050909955580405, + "learning_rate": 1.6178103267297874e-05, + "loss": 0.3371, + "step": 17905 + }, + { + "epoch": 0.3094069670998065, + "grad_norm": 1.114601646134647, + "learning_rate": 1.617766318999591e-05, + "loss": 0.2481, + "step": 17906 + }, + { + "epoch": 0.3094242466132154, + "grad_norm": 1.103070383040746, + "learning_rate": 1.6177223093345034e-05, + "loss": 0.544, + "step": 17907 + }, + { + "epoch": 0.3094415261266243, + "grad_norm": 1.1670449390833222, + "learning_rate": 1.6176782977346626e-05, + "loss": 0.5021, + "step": 17908 + }, + { + "epoch": 0.3094588056400332, + "grad_norm": 0.6989222862007022, + "learning_rate": 1.6176342842002064e-05, + "loss": 0.3531, + "step": 17909 + }, + { + "epoch": 0.3094760851534421, + "grad_norm": 1.4980826554949624, + "learning_rate": 1.617590268731273e-05, + "loss": 0.4786, + "step": 17910 + }, + { + "epoch": 0.30949336466685096, + "grad_norm": 1.48994806513043, + "learning_rate": 1.6175462513279998e-05, + "loss": 0.5589, + "step": 17911 + }, + { + "epoch": 0.30951064418025986, + "grad_norm": 1.4207434454098058, + "learning_rate": 1.6175022319905246e-05, + "loss": 0.5878, + "step": 17912 + }, + { + "epoch": 0.30952792369366877, + "grad_norm": 2.1976514524533424, + "learning_rate": 1.6174582107189854e-05, + "loss": 0.32, + "step": 17913 + }, + { + "epoch": 0.3095452032070777, + "grad_norm": 1.5150072326376764, + "learning_rate": 1.6174141875135204e-05, + "loss": 0.5004, + "step": 17914 + }, + { + "epoch": 0.3095624827204866, + "grad_norm": 1.123552528793738, + "learning_rate": 1.6173701623742672e-05, + "loss": 0.5351, + "step": 17915 + }, + { + "epoch": 0.3095797622338955, + "grad_norm": 0.7660428267546333, + "learning_rate": 1.617326135301364e-05, + "loss": 0.5567, + "step": 17916 + }, + { + "epoch": 0.3095970417473044, + "grad_norm": 0.9207538466833849, + "learning_rate": 1.617282106294948e-05, + "loss": 0.6203, + "step": 17917 + }, + { + "epoch": 0.3096143212607133, + "grad_norm": 0.5070880620251691, + "learning_rate": 1.617238075355158e-05, + "loss": 0.8714, + "step": 17918 + }, + { + "epoch": 0.3096316007741222, + "grad_norm": 0.9636985752791364, + "learning_rate": 1.6171940424821307e-05, + "loss": 0.5092, + "step": 17919 + }, + { + "epoch": 0.3096488802875311, + "grad_norm": 0.6728622037464563, + "learning_rate": 1.6171500076760053e-05, + "loss": 0.496, + "step": 17920 + }, + { + "epoch": 0.30966615980094003, + "grad_norm": 0.9195075486561878, + "learning_rate": 1.617105970936919e-05, + "loss": 0.44, + "step": 17921 + }, + { + "epoch": 0.3096834393143489, + "grad_norm": 0.9689095116670668, + "learning_rate": 1.6170619322650102e-05, + "loss": 0.4329, + "step": 17922 + }, + { + "epoch": 0.3097007188277578, + "grad_norm": 0.8141034276406448, + "learning_rate": 1.617017891660416e-05, + "loss": 0.6747, + "step": 17923 + }, + { + "epoch": 0.3097179983411667, + "grad_norm": 1.794417692707149, + "learning_rate": 1.6169738491232752e-05, + "loss": 0.5102, + "step": 17924 + }, + { + "epoch": 0.3097352778545756, + "grad_norm": 0.5944979373514752, + "learning_rate": 1.6169298046537253e-05, + "loss": 0.5313, + "step": 17925 + }, + { + "epoch": 0.3097525573679845, + "grad_norm": 1.183750518138164, + "learning_rate": 1.6168857582519048e-05, + "loss": 0.5429, + "step": 17926 + }, + { + "epoch": 0.3097698368813934, + "grad_norm": 0.6731387434388687, + "learning_rate": 1.6168417099179507e-05, + "loss": 0.7667, + "step": 17927 + }, + { + "epoch": 0.30978711639480233, + "grad_norm": 0.5559265817762473, + "learning_rate": 1.6167976596520015e-05, + "loss": 0.4227, + "step": 17928 + }, + { + "epoch": 0.30980439590821124, + "grad_norm": 1.0251630224735073, + "learning_rate": 1.6167536074541958e-05, + "loss": 0.3919, + "step": 17929 + }, + { + "epoch": 0.30982167542162015, + "grad_norm": 1.187379638538534, + "learning_rate": 1.61670955332467e-05, + "loss": 0.5311, + "step": 17930 + }, + { + "epoch": 0.30983895493502905, + "grad_norm": 0.9879481014075737, + "learning_rate": 1.6166654972635638e-05, + "loss": 0.5849, + "step": 17931 + }, + { + "epoch": 0.3098562344484379, + "grad_norm": 0.9234932202547956, + "learning_rate": 1.616621439271014e-05, + "loss": 0.6451, + "step": 17932 + }, + { + "epoch": 0.3098735139618468, + "grad_norm": 0.9784645146698863, + "learning_rate": 1.616577379347159e-05, + "loss": 0.4673, + "step": 17933 + }, + { + "epoch": 0.3098907934752557, + "grad_norm": 1.5156533449577794, + "learning_rate": 1.6165333174921366e-05, + "loss": 0.5402, + "step": 17934 + }, + { + "epoch": 0.30990807298866463, + "grad_norm": 1.142398860998641, + "learning_rate": 1.6164892537060854e-05, + "loss": 0.7318, + "step": 17935 + }, + { + "epoch": 0.30992535250207354, + "grad_norm": 0.7793121059429974, + "learning_rate": 1.616445187989143e-05, + "loss": 0.4602, + "step": 17936 + }, + { + "epoch": 0.30994263201548244, + "grad_norm": 1.2729467727771908, + "learning_rate": 1.616401120341447e-05, + "loss": 0.4519, + "step": 17937 + }, + { + "epoch": 0.30995991152889135, + "grad_norm": 1.7528298695377276, + "learning_rate": 1.6163570507631362e-05, + "loss": 0.4789, + "step": 17938 + }, + { + "epoch": 0.30997719104230026, + "grad_norm": 0.9623045820038342, + "learning_rate": 1.616312979254348e-05, + "loss": 0.4788, + "step": 17939 + }, + { + "epoch": 0.30999447055570917, + "grad_norm": 0.8585154760499878, + "learning_rate": 1.616268905815221e-05, + "loss": 0.5108, + "step": 17940 + }, + { + "epoch": 0.3100117500691181, + "grad_norm": 0.3298680663168058, + "learning_rate": 1.616224830445893e-05, + "loss": 0.3725, + "step": 17941 + }, + { + "epoch": 0.310029029582527, + "grad_norm": 0.8705485216097755, + "learning_rate": 1.616180753146502e-05, + "loss": 0.5353, + "step": 17942 + }, + { + "epoch": 0.31004630909593583, + "grad_norm": 2.045508898917465, + "learning_rate": 1.6161366739171858e-05, + "loss": 0.8474, + "step": 17943 + }, + { + "epoch": 0.31006358860934474, + "grad_norm": 1.281247030059028, + "learning_rate": 1.616092592758083e-05, + "loss": 0.4668, + "step": 17944 + }, + { + "epoch": 0.31008086812275365, + "grad_norm": 1.5095389675805189, + "learning_rate": 1.616048509669331e-05, + "loss": 0.4336, + "step": 17945 + }, + { + "epoch": 0.31009814763616256, + "grad_norm": 0.653951295991101, + "learning_rate": 1.6160044246510686e-05, + "loss": 0.4312, + "step": 17946 + }, + { + "epoch": 0.31011542714957147, + "grad_norm": 0.9803030470223386, + "learning_rate": 1.6159603377034333e-05, + "loss": 0.4369, + "step": 17947 + }, + { + "epoch": 0.3101327066629804, + "grad_norm": 1.3032536418319378, + "learning_rate": 1.6159162488265633e-05, + "loss": 0.6355, + "step": 17948 + }, + { + "epoch": 0.3101499861763893, + "grad_norm": 1.5837205220320845, + "learning_rate": 1.615872158020597e-05, + "loss": 0.5673, + "step": 17949 + }, + { + "epoch": 0.3101672656897982, + "grad_norm": 0.8880987236854296, + "learning_rate": 1.6158280652856722e-05, + "loss": 0.4624, + "step": 17950 + }, + { + "epoch": 0.3101845452032071, + "grad_norm": 1.6917638763931806, + "learning_rate": 1.6157839706219273e-05, + "loss": 0.6506, + "step": 17951 + }, + { + "epoch": 0.310201824716616, + "grad_norm": 0.8427968499052865, + "learning_rate": 1.6157398740295002e-05, + "loss": 0.5306, + "step": 17952 + }, + { + "epoch": 0.31021910423002486, + "grad_norm": 0.6467469057534373, + "learning_rate": 1.615695775508529e-05, + "loss": 0.5612, + "step": 17953 + }, + { + "epoch": 0.31023638374343376, + "grad_norm": 0.7277895648420328, + "learning_rate": 1.6156516750591515e-05, + "loss": 0.3974, + "step": 17954 + }, + { + "epoch": 0.31025366325684267, + "grad_norm": 1.1296822970162574, + "learning_rate": 1.6156075726815066e-05, + "loss": 0.5971, + "step": 17955 + }, + { + "epoch": 0.3102709427702516, + "grad_norm": 0.9964417679490285, + "learning_rate": 1.615563468375732e-05, + "loss": 0.4948, + "step": 17956 + }, + { + "epoch": 0.3102882222836605, + "grad_norm": 0.9750808281158452, + "learning_rate": 1.6155193621419655e-05, + "loss": 0.6582, + "step": 17957 + }, + { + "epoch": 0.3103055017970694, + "grad_norm": 1.4657313189591226, + "learning_rate": 1.6154752539803454e-05, + "loss": 0.7416, + "step": 17958 + }, + { + "epoch": 0.3103227813104783, + "grad_norm": 1.353272231895131, + "learning_rate": 1.6154311438910104e-05, + "loss": 0.4893, + "step": 17959 + }, + { + "epoch": 0.3103400608238872, + "grad_norm": 0.9445247031749643, + "learning_rate": 1.6153870318740984e-05, + "loss": 0.5047, + "step": 17960 + }, + { + "epoch": 0.3103573403372961, + "grad_norm": 1.505744556062421, + "learning_rate": 1.615342917929747e-05, + "loss": 0.5147, + "step": 17961 + }, + { + "epoch": 0.310374619850705, + "grad_norm": 1.394102684937657, + "learning_rate": 1.6152988020580952e-05, + "loss": 0.6865, + "step": 17962 + }, + { + "epoch": 0.31039189936411393, + "grad_norm": 0.8125718156812167, + "learning_rate": 1.6152546842592808e-05, + "loss": 0.6241, + "step": 17963 + }, + { + "epoch": 0.3104091788775228, + "grad_norm": 1.303992795931992, + "learning_rate": 1.615210564533442e-05, + "loss": 0.7358, + "step": 17964 + }, + { + "epoch": 0.3104264583909317, + "grad_norm": 0.8390464436287626, + "learning_rate": 1.6151664428807165e-05, + "loss": 0.7725, + "step": 17965 + }, + { + "epoch": 0.3104437379043406, + "grad_norm": 1.0464985153021205, + "learning_rate": 1.6151223193012434e-05, + "loss": 0.4972, + "step": 17966 + }, + { + "epoch": 0.3104610174177495, + "grad_norm": 1.2055243932823587, + "learning_rate": 1.6150781937951606e-05, + "loss": 0.6329, + "step": 17967 + }, + { + "epoch": 0.3104782969311584, + "grad_norm": 0.9718248688018268, + "learning_rate": 1.6150340663626055e-05, + "loss": 0.5779, + "step": 17968 + }, + { + "epoch": 0.3104955764445673, + "grad_norm": 0.9126965799506432, + "learning_rate": 1.6149899370037175e-05, + "loss": 0.519, + "step": 17969 + }, + { + "epoch": 0.31051285595797623, + "grad_norm": 0.8829410414049443, + "learning_rate": 1.6149458057186342e-05, + "loss": 0.49, + "step": 17970 + }, + { + "epoch": 0.31053013547138514, + "grad_norm": 0.8037907977092178, + "learning_rate": 1.6149016725074936e-05, + "loss": 0.3847, + "step": 17971 + }, + { + "epoch": 0.31054741498479405, + "grad_norm": 0.7164630465346639, + "learning_rate": 1.6148575373704348e-05, + "loss": 0.5119, + "step": 17972 + }, + { + "epoch": 0.31056469449820295, + "grad_norm": 0.8583787506477247, + "learning_rate": 1.614813400307595e-05, + "loss": 0.5331, + "step": 17973 + }, + { + "epoch": 0.3105819740116118, + "grad_norm": 1.0603667540704431, + "learning_rate": 1.614769261319113e-05, + "loss": 0.4275, + "step": 17974 + }, + { + "epoch": 0.3105992535250207, + "grad_norm": 1.3793073457129252, + "learning_rate": 1.614725120405127e-05, + "loss": 0.4999, + "step": 17975 + }, + { + "epoch": 0.3106165330384296, + "grad_norm": 0.8362844426792357, + "learning_rate": 1.614680977565775e-05, + "loss": 0.5552, + "step": 17976 + }, + { + "epoch": 0.31063381255183853, + "grad_norm": 1.0087386481620753, + "learning_rate": 1.6146368328011958e-05, + "loss": 0.5836, + "step": 17977 + }, + { + "epoch": 0.31065109206524744, + "grad_norm": 0.9275154070852346, + "learning_rate": 1.614592686111527e-05, + "loss": 0.5636, + "step": 17978 + }, + { + "epoch": 0.31066837157865634, + "grad_norm": 0.9432289548903079, + "learning_rate": 1.6145485374969077e-05, + "loss": 0.4736, + "step": 17979 + }, + { + "epoch": 0.31068565109206525, + "grad_norm": 0.588608554685626, + "learning_rate": 1.6145043869574754e-05, + "loss": 0.7598, + "step": 17980 + }, + { + "epoch": 0.31070293060547416, + "grad_norm": 1.2231772623660377, + "learning_rate": 1.6144602344933683e-05, + "loss": 0.6307, + "step": 17981 + }, + { + "epoch": 0.31072021011888307, + "grad_norm": 1.0807192846221718, + "learning_rate": 1.6144160801047255e-05, + "loss": 0.6922, + "step": 17982 + }, + { + "epoch": 0.310737489632292, + "grad_norm": 1.0243077075207405, + "learning_rate": 1.6143719237916845e-05, + "loss": 0.566, + "step": 17983 + }, + { + "epoch": 0.3107547691457009, + "grad_norm": 1.578936568044859, + "learning_rate": 1.6143277655543842e-05, + "loss": 0.3571, + "step": 17984 + }, + { + "epoch": 0.31077204865910973, + "grad_norm": 0.4734928416201508, + "learning_rate": 1.6142836053929628e-05, + "loss": 0.7419, + "step": 17985 + }, + { + "epoch": 0.31078932817251864, + "grad_norm": 0.9727780020680298, + "learning_rate": 1.6142394433075583e-05, + "loss": 0.5364, + "step": 17986 + }, + { + "epoch": 0.31080660768592755, + "grad_norm": 0.5870791960042618, + "learning_rate": 1.614195279298309e-05, + "loss": 0.3733, + "step": 17987 + }, + { + "epoch": 0.31082388719933646, + "grad_norm": 0.940405770514384, + "learning_rate": 1.6141511133653533e-05, + "loss": 0.3957, + "step": 17988 + }, + { + "epoch": 0.31084116671274536, + "grad_norm": 0.8604072734983645, + "learning_rate": 1.61410694550883e-05, + "loss": 0.4654, + "step": 17989 + }, + { + "epoch": 0.3108584462261543, + "grad_norm": 1.0378824991929612, + "learning_rate": 1.614062775728877e-05, + "loss": 0.5446, + "step": 17990 + }, + { + "epoch": 0.3108757257395632, + "grad_norm": 1.0996856913872954, + "learning_rate": 1.614018604025633e-05, + "loss": 0.5628, + "step": 17991 + }, + { + "epoch": 0.3108930052529721, + "grad_norm": 0.6864898542496485, + "learning_rate": 1.6139744303992357e-05, + "loss": 0.4315, + "step": 17992 + }, + { + "epoch": 0.310910284766381, + "grad_norm": 1.0612312325631952, + "learning_rate": 1.6139302548498237e-05, + "loss": 0.5874, + "step": 17993 + }, + { + "epoch": 0.3109275642797899, + "grad_norm": 0.7197598109725547, + "learning_rate": 1.6138860773775354e-05, + "loss": 0.3889, + "step": 17994 + }, + { + "epoch": 0.3109448437931988, + "grad_norm": 0.7813005514157204, + "learning_rate": 1.6138418979825098e-05, + "loss": 0.5097, + "step": 17995 + }, + { + "epoch": 0.31096212330660766, + "grad_norm": 1.182008499586003, + "learning_rate": 1.6137977166648844e-05, + "loss": 0.5977, + "step": 17996 + }, + { + "epoch": 0.31097940282001657, + "grad_norm": 0.4001817121619927, + "learning_rate": 1.613753533424798e-05, + "loss": 0.4999, + "step": 17997 + }, + { + "epoch": 0.3109966823334255, + "grad_norm": 0.9632889632222086, + "learning_rate": 1.613709348262389e-05, + "loss": 0.3373, + "step": 17998 + }, + { + "epoch": 0.3110139618468344, + "grad_norm": 1.665097731316228, + "learning_rate": 1.6136651611777952e-05, + "loss": 0.4847, + "step": 17999 + }, + { + "epoch": 0.3110312413602433, + "grad_norm": 0.7250895150257037, + "learning_rate": 1.6136209721711557e-05, + "loss": 0.2993, + "step": 18000 + }, + { + "epoch": 0.3110485208736522, + "grad_norm": 0.451778707241669, + "learning_rate": 1.613576781242609e-05, + "loss": 0.6068, + "step": 18001 + }, + { + "epoch": 0.3110658003870611, + "grad_norm": 1.2508677239434236, + "learning_rate": 1.613532588392293e-05, + "loss": 0.3417, + "step": 18002 + }, + { + "epoch": 0.31108307990047, + "grad_norm": 1.3008987305220459, + "learning_rate": 1.6134883936203464e-05, + "loss": 0.5396, + "step": 18003 + }, + { + "epoch": 0.3111003594138789, + "grad_norm": 1.0212667798034953, + "learning_rate": 1.6134441969269073e-05, + "loss": 0.4716, + "step": 18004 + }, + { + "epoch": 0.31111763892728783, + "grad_norm": 1.1563067314114837, + "learning_rate": 1.6133999983121142e-05, + "loss": 0.6835, + "step": 18005 + }, + { + "epoch": 0.3111349184406967, + "grad_norm": 0.49553741984725386, + "learning_rate": 1.613355797776106e-05, + "loss": 0.7699, + "step": 18006 + }, + { + "epoch": 0.3111521979541056, + "grad_norm": 1.905210619429263, + "learning_rate": 1.613311595319021e-05, + "loss": 0.5931, + "step": 18007 + }, + { + "epoch": 0.3111694774675145, + "grad_norm": 0.8143304423391461, + "learning_rate": 1.613267390940997e-05, + "loss": 0.7566, + "step": 18008 + }, + { + "epoch": 0.3111867569809234, + "grad_norm": 0.9073803004415717, + "learning_rate": 1.6132231846421734e-05, + "loss": 0.7396, + "step": 18009 + }, + { + "epoch": 0.3112040364943323, + "grad_norm": 0.7578040384942462, + "learning_rate": 1.6131789764226878e-05, + "loss": 0.6677, + "step": 18010 + }, + { + "epoch": 0.3112213160077412, + "grad_norm": 1.037881195380615, + "learning_rate": 1.6131347662826793e-05, + "loss": 0.4831, + "step": 18011 + }, + { + "epoch": 0.31123859552115013, + "grad_norm": 0.9718783629388471, + "learning_rate": 1.613090554222286e-05, + "loss": 0.4474, + "step": 18012 + }, + { + "epoch": 0.31125587503455904, + "grad_norm": 0.4818786136505316, + "learning_rate": 1.6130463402416462e-05, + "loss": 0.9704, + "step": 18013 + }, + { + "epoch": 0.31127315454796795, + "grad_norm": 0.8909542838728154, + "learning_rate": 1.6130021243408992e-05, + "loss": 0.4969, + "step": 18014 + }, + { + "epoch": 0.31129043406137685, + "grad_norm": 0.6804770711688974, + "learning_rate": 1.612957906520183e-05, + "loss": 0.5375, + "step": 18015 + }, + { + "epoch": 0.31130771357478576, + "grad_norm": 0.7906571970048005, + "learning_rate": 1.6129136867796357e-05, + "loss": 0.701, + "step": 18016 + }, + { + "epoch": 0.3113249930881946, + "grad_norm": 0.8989895606379005, + "learning_rate": 1.612869465119396e-05, + "loss": 0.3834, + "step": 18017 + }, + { + "epoch": 0.3113422726016035, + "grad_norm": 0.9801072167465383, + "learning_rate": 1.612825241539603e-05, + "loss": 0.7263, + "step": 18018 + }, + { + "epoch": 0.31135955211501243, + "grad_norm": 1.3106806969053122, + "learning_rate": 1.6127810160403946e-05, + "loss": 0.5525, + "step": 18019 + }, + { + "epoch": 0.31137683162842134, + "grad_norm": 1.2212536960153373, + "learning_rate": 1.612736788621909e-05, + "loss": 0.5126, + "step": 18020 + }, + { + "epoch": 0.31139411114183024, + "grad_norm": 0.4856666137321323, + "learning_rate": 1.612692559284286e-05, + "loss": 0.5921, + "step": 18021 + }, + { + "epoch": 0.31141139065523915, + "grad_norm": 0.4633210432243372, + "learning_rate": 1.612648328027663e-05, + "loss": 0.7297, + "step": 18022 + }, + { + "epoch": 0.31142867016864806, + "grad_norm": 0.9675253433874985, + "learning_rate": 1.612604094852179e-05, + "loss": 0.4792, + "step": 18023 + }, + { + "epoch": 0.31144594968205697, + "grad_norm": 1.5655591758543004, + "learning_rate": 1.6125598597579723e-05, + "loss": 0.5332, + "step": 18024 + }, + { + "epoch": 0.3114632291954659, + "grad_norm": 1.5835488928896648, + "learning_rate": 1.6125156227451815e-05, + "loss": 0.6808, + "step": 18025 + }, + { + "epoch": 0.3114805087088748, + "grad_norm": 0.47021551392206595, + "learning_rate": 1.6124713838139454e-05, + "loss": 0.7096, + "step": 18026 + }, + { + "epoch": 0.31149778822228363, + "grad_norm": 0.9686348444234268, + "learning_rate": 1.6124271429644023e-05, + "loss": 0.5696, + "step": 18027 + }, + { + "epoch": 0.31151506773569254, + "grad_norm": 1.1550754026875052, + "learning_rate": 1.6123829001966908e-05, + "loss": 0.5932, + "step": 18028 + }, + { + "epoch": 0.31153234724910145, + "grad_norm": 1.0774847208501508, + "learning_rate": 1.6123386555109494e-05, + "loss": 0.4223, + "step": 18029 + }, + { + "epoch": 0.31154962676251036, + "grad_norm": 0.7357010432091021, + "learning_rate": 1.612294408907317e-05, + "loss": 0.6628, + "step": 18030 + }, + { + "epoch": 0.31156690627591926, + "grad_norm": 1.4968360177606068, + "learning_rate": 1.612250160385932e-05, + "loss": 0.4503, + "step": 18031 + }, + { + "epoch": 0.31158418578932817, + "grad_norm": 0.8787662048273863, + "learning_rate": 1.612205909946933e-05, + "loss": 0.5042, + "step": 18032 + }, + { + "epoch": 0.3116014653027371, + "grad_norm": 0.9184402490298695, + "learning_rate": 1.612161657590458e-05, + "loss": 0.534, + "step": 18033 + }, + { + "epoch": 0.311618744816146, + "grad_norm": 1.2656885422304733, + "learning_rate": 1.612117403316647e-05, + "loss": 0.4497, + "step": 18034 + }, + { + "epoch": 0.3116360243295549, + "grad_norm": 1.3685600183990718, + "learning_rate": 1.6120731471256373e-05, + "loss": 0.5876, + "step": 18035 + }, + { + "epoch": 0.3116533038429638, + "grad_norm": 1.22276325118434, + "learning_rate": 1.6120288890175682e-05, + "loss": 0.3948, + "step": 18036 + }, + { + "epoch": 0.3116705833563727, + "grad_norm": 0.4409425638025452, + "learning_rate": 1.6119846289925778e-05, + "loss": 0.7627, + "step": 18037 + }, + { + "epoch": 0.31168786286978156, + "grad_norm": 1.0242599077072172, + "learning_rate": 1.611940367050805e-05, + "loss": 0.3255, + "step": 18038 + }, + { + "epoch": 0.31170514238319047, + "grad_norm": 1.4039987556941556, + "learning_rate": 1.611896103192389e-05, + "loss": 0.645, + "step": 18039 + }, + { + "epoch": 0.3117224218965994, + "grad_norm": 0.9388432147671465, + "learning_rate": 1.6118518374174674e-05, + "loss": 0.4251, + "step": 18040 + }, + { + "epoch": 0.3117397014100083, + "grad_norm": 1.1018980146995991, + "learning_rate": 1.6118075697261793e-05, + "loss": 0.4579, + "step": 18041 + }, + { + "epoch": 0.3117569809234172, + "grad_norm": 0.8234386053143526, + "learning_rate": 1.611763300118664e-05, + "loss": 0.6626, + "step": 18042 + }, + { + "epoch": 0.3117742604368261, + "grad_norm": 1.2495597672039254, + "learning_rate": 1.611719028595059e-05, + "loss": 0.3604, + "step": 18043 + }, + { + "epoch": 0.311791539950235, + "grad_norm": 1.755643519752953, + "learning_rate": 1.6116747551555037e-05, + "loss": 0.4032, + "step": 18044 + }, + { + "epoch": 0.3118088194636439, + "grad_norm": 1.3088910632195012, + "learning_rate": 1.6116304798001365e-05, + "loss": 0.4953, + "step": 18045 + }, + { + "epoch": 0.3118260989770528, + "grad_norm": 1.3992497343320438, + "learning_rate": 1.6115862025290966e-05, + "loss": 0.6161, + "step": 18046 + }, + { + "epoch": 0.31184337849046173, + "grad_norm": 1.8707723858446346, + "learning_rate": 1.6115419233425214e-05, + "loss": 0.5988, + "step": 18047 + }, + { + "epoch": 0.3118606580038706, + "grad_norm": 0.7863283838926775, + "learning_rate": 1.611497642240551e-05, + "loss": 0.3701, + "step": 18048 + }, + { + "epoch": 0.3118779375172795, + "grad_norm": 0.8850543496577378, + "learning_rate": 1.6114533592233236e-05, + "loss": 0.5784, + "step": 18049 + }, + { + "epoch": 0.3118952170306884, + "grad_norm": 1.5313907861022484, + "learning_rate": 1.6114090742909777e-05, + "loss": 0.5631, + "step": 18050 + }, + { + "epoch": 0.3119124965440973, + "grad_norm": 1.2903197387657168, + "learning_rate": 1.611364787443652e-05, + "loss": 0.7371, + "step": 18051 + }, + { + "epoch": 0.3119297760575062, + "grad_norm": 1.058592172224824, + "learning_rate": 1.6113204986814852e-05, + "loss": 0.5647, + "step": 18052 + }, + { + "epoch": 0.3119470555709151, + "grad_norm": 1.2487304923675113, + "learning_rate": 1.6112762080046164e-05, + "loss": 0.4117, + "step": 18053 + }, + { + "epoch": 0.31196433508432403, + "grad_norm": 0.9679179891080276, + "learning_rate": 1.6112319154131838e-05, + "loss": 0.3938, + "step": 18054 + }, + { + "epoch": 0.31198161459773294, + "grad_norm": 1.169708573571263, + "learning_rate": 1.6111876209073266e-05, + "loss": 0.3355, + "step": 18055 + }, + { + "epoch": 0.31199889411114184, + "grad_norm": 1.072046654280959, + "learning_rate": 1.611143324487183e-05, + "loss": 0.3354, + "step": 18056 + }, + { + "epoch": 0.31201617362455075, + "grad_norm": 0.5691585546797405, + "learning_rate": 1.6110990261528925e-05, + "loss": 0.5347, + "step": 18057 + }, + { + "epoch": 0.31203345313795966, + "grad_norm": 0.6506681091728872, + "learning_rate": 1.611054725904593e-05, + "loss": 0.8882, + "step": 18058 + }, + { + "epoch": 0.3120507326513685, + "grad_norm": 0.5585069518444211, + "learning_rate": 1.611010423742424e-05, + "loss": 0.4407, + "step": 18059 + }, + { + "epoch": 0.3120680121647774, + "grad_norm": 1.2767056165880808, + "learning_rate": 1.6109661196665237e-05, + "loss": 0.452, + "step": 18060 + }, + { + "epoch": 0.3120852916781863, + "grad_norm": 1.4025682315410088, + "learning_rate": 1.6109218136770316e-05, + "loss": 0.6354, + "step": 18061 + }, + { + "epoch": 0.31210257119159523, + "grad_norm": 0.8575941138578436, + "learning_rate": 1.6108775057740853e-05, + "loss": 0.4843, + "step": 18062 + }, + { + "epoch": 0.31211985070500414, + "grad_norm": 1.0014686584392678, + "learning_rate": 1.6108331959578243e-05, + "loss": 0.6227, + "step": 18063 + }, + { + "epoch": 0.31213713021841305, + "grad_norm": 0.7110447695220018, + "learning_rate": 1.6107888842283873e-05, + "loss": 0.4415, + "step": 18064 + }, + { + "epoch": 0.31215440973182196, + "grad_norm": 1.4641510107970528, + "learning_rate": 1.6107445705859136e-05, + "loss": 0.5543, + "step": 18065 + }, + { + "epoch": 0.31217168924523087, + "grad_norm": 0.8680665314116492, + "learning_rate": 1.6107002550305412e-05, + "loss": 0.5325, + "step": 18066 + }, + { + "epoch": 0.3121889687586398, + "grad_norm": 0.9677495625119289, + "learning_rate": 1.6106559375624093e-05, + "loss": 0.7005, + "step": 18067 + }, + { + "epoch": 0.3122062482720487, + "grad_norm": 0.5135191555711079, + "learning_rate": 1.6106116181816563e-05, + "loss": 0.6042, + "step": 18068 + }, + { + "epoch": 0.3122235277854576, + "grad_norm": 0.8050795102202865, + "learning_rate": 1.6105672968884214e-05, + "loss": 0.4009, + "step": 18069 + }, + { + "epoch": 0.31224080729886644, + "grad_norm": 1.0257191460000765, + "learning_rate": 1.6105229736828434e-05, + "loss": 0.4287, + "step": 18070 + }, + { + "epoch": 0.31225808681227535, + "grad_norm": 0.8606545744594936, + "learning_rate": 1.610478648565061e-05, + "loss": 0.4675, + "step": 18071 + }, + { + "epoch": 0.31227536632568426, + "grad_norm": 0.6548320640885759, + "learning_rate": 1.610434321535213e-05, + "loss": 0.3489, + "step": 18072 + }, + { + "epoch": 0.31229264583909316, + "grad_norm": 0.9050706518039199, + "learning_rate": 1.6103899925934382e-05, + "loss": 0.5413, + "step": 18073 + }, + { + "epoch": 0.31230992535250207, + "grad_norm": 1.5389853622260925, + "learning_rate": 1.610345661739876e-05, + "loss": 0.7123, + "step": 18074 + }, + { + "epoch": 0.312327204865911, + "grad_norm": 0.9808383952844861, + "learning_rate": 1.6103013289746648e-05, + "loss": 0.6663, + "step": 18075 + }, + { + "epoch": 0.3123444843793199, + "grad_norm": 0.618653129635532, + "learning_rate": 1.610256994297943e-05, + "loss": 0.4441, + "step": 18076 + }, + { + "epoch": 0.3123617638927288, + "grad_norm": 0.5489870342431634, + "learning_rate": 1.6102126577098503e-05, + "loss": 0.9343, + "step": 18077 + }, + { + "epoch": 0.3123790434061377, + "grad_norm": 0.9244022666143048, + "learning_rate": 1.6101683192105252e-05, + "loss": 0.5124, + "step": 18078 + }, + { + "epoch": 0.3123963229195466, + "grad_norm": 1.2086896763499784, + "learning_rate": 1.610123978800106e-05, + "loss": 0.5558, + "step": 18079 + }, + { + "epoch": 0.31241360243295546, + "grad_norm": 0.7467703167293888, + "learning_rate": 1.6100796364787325e-05, + "loss": 0.3884, + "step": 18080 + }, + { + "epoch": 0.31243088194636437, + "grad_norm": 1.0439094635903845, + "learning_rate": 1.6100352922465434e-05, + "loss": 0.5415, + "step": 18081 + }, + { + "epoch": 0.3124481614597733, + "grad_norm": 0.8478445339641062, + "learning_rate": 1.609990946103677e-05, + "loss": 0.3827, + "step": 18082 + }, + { + "epoch": 0.3124654409731822, + "grad_norm": 0.941203446525795, + "learning_rate": 1.609946598050273e-05, + "loss": 0.5379, + "step": 18083 + }, + { + "epoch": 0.3124827204865911, + "grad_norm": 1.926168000236055, + "learning_rate": 1.6099022480864697e-05, + "loss": 0.6022, + "step": 18084 + }, + { + "epoch": 0.3125, + "grad_norm": 0.7195004176221678, + "learning_rate": 1.609857896212406e-05, + "loss": 0.4713, + "step": 18085 + }, + { + "epoch": 0.3125172795134089, + "grad_norm": 1.0017646541429626, + "learning_rate": 1.6098135424282215e-05, + "loss": 0.3969, + "step": 18086 + }, + { + "epoch": 0.3125345590268178, + "grad_norm": 0.7674059156775611, + "learning_rate": 1.6097691867340547e-05, + "loss": 0.4235, + "step": 18087 + }, + { + "epoch": 0.3125518385402267, + "grad_norm": 0.8365301178365169, + "learning_rate": 1.6097248291300438e-05, + "loss": 0.4436, + "step": 18088 + }, + { + "epoch": 0.31256911805363563, + "grad_norm": 1.2221988060049351, + "learning_rate": 1.609680469616329e-05, + "loss": 0.6027, + "step": 18089 + }, + { + "epoch": 0.31258639756704454, + "grad_norm": 1.079906437447672, + "learning_rate": 1.6096361081930482e-05, + "loss": 0.4735, + "step": 18090 + }, + { + "epoch": 0.3126036770804534, + "grad_norm": 1.4383407388701797, + "learning_rate": 1.609591744860341e-05, + "loss": 0.5414, + "step": 18091 + }, + { + "epoch": 0.3126209565938623, + "grad_norm": 0.6966339428449605, + "learning_rate": 1.609547379618346e-05, + "loss": 0.6533, + "step": 18092 + }, + { + "epoch": 0.3126382361072712, + "grad_norm": 0.9657277796604431, + "learning_rate": 1.6095030124672025e-05, + "loss": 0.5272, + "step": 18093 + }, + { + "epoch": 0.3126555156206801, + "grad_norm": 0.41958267147266626, + "learning_rate": 1.6094586434070492e-05, + "loss": 0.5488, + "step": 18094 + }, + { + "epoch": 0.312672795134089, + "grad_norm": 0.36141314775857114, + "learning_rate": 1.609414272438025e-05, + "loss": 0.5072, + "step": 18095 + }, + { + "epoch": 0.31269007464749793, + "grad_norm": 0.9103477097606631, + "learning_rate": 1.609369899560269e-05, + "loss": 0.3804, + "step": 18096 + }, + { + "epoch": 0.31270735416090684, + "grad_norm": 1.005214455445248, + "learning_rate": 1.60932552477392e-05, + "loss": 0.6777, + "step": 18097 + }, + { + "epoch": 0.31272463367431574, + "grad_norm": 0.7217665826623914, + "learning_rate": 1.6092811480791174e-05, + "loss": 0.4036, + "step": 18098 + }, + { + "epoch": 0.31274191318772465, + "grad_norm": 0.6444860818962426, + "learning_rate": 1.609236769476e-05, + "loss": 0.5929, + "step": 18099 + }, + { + "epoch": 0.31275919270113356, + "grad_norm": 0.616820900503866, + "learning_rate": 1.6091923889647067e-05, + "loss": 0.3215, + "step": 18100 + }, + { + "epoch": 0.3127764722145424, + "grad_norm": 0.9697085789756501, + "learning_rate": 1.609148006545376e-05, + "loss": 0.5473, + "step": 18101 + }, + { + "epoch": 0.3127937517279513, + "grad_norm": 0.9064238830699524, + "learning_rate": 1.6091036222181483e-05, + "loss": 0.467, + "step": 18102 + }, + { + "epoch": 0.3128110312413602, + "grad_norm": 0.7719892301851867, + "learning_rate": 1.6090592359831613e-05, + "loss": 0.2568, + "step": 18103 + }, + { + "epoch": 0.31282831075476913, + "grad_norm": 0.7282289021360207, + "learning_rate": 1.6090148478405545e-05, + "loss": 0.5676, + "step": 18104 + }, + { + "epoch": 0.31284559026817804, + "grad_norm": 1.4015888544470865, + "learning_rate": 1.608970457790467e-05, + "loss": 0.4483, + "step": 18105 + }, + { + "epoch": 0.31286286978158695, + "grad_norm": 0.8512561507869096, + "learning_rate": 1.608926065833038e-05, + "loss": 0.5034, + "step": 18106 + }, + { + "epoch": 0.31288014929499586, + "grad_norm": 1.1657133599182645, + "learning_rate": 1.6088816719684056e-05, + "loss": 0.4693, + "step": 18107 + }, + { + "epoch": 0.31289742880840477, + "grad_norm": 0.9105919174875177, + "learning_rate": 1.6088372761967102e-05, + "loss": 0.4572, + "step": 18108 + }, + { + "epoch": 0.3129147083218137, + "grad_norm": 1.1219553841769476, + "learning_rate": 1.6087928785180896e-05, + "loss": 0.4504, + "step": 18109 + }, + { + "epoch": 0.3129319878352226, + "grad_norm": 1.4184508688659883, + "learning_rate": 1.6087484789326843e-05, + "loss": 0.3937, + "step": 18110 + }, + { + "epoch": 0.3129492673486315, + "grad_norm": 0.7968889640776142, + "learning_rate": 1.6087040774406316e-05, + "loss": 0.5763, + "step": 18111 + }, + { + "epoch": 0.31296654686204034, + "grad_norm": 0.8834783467744813, + "learning_rate": 1.608659674042072e-05, + "loss": 0.5029, + "step": 18112 + }, + { + "epoch": 0.31298382637544925, + "grad_norm": 1.3243938345524753, + "learning_rate": 1.6086152687371435e-05, + "loss": 0.4762, + "step": 18113 + }, + { + "epoch": 0.31300110588885816, + "grad_norm": 0.9930254115572369, + "learning_rate": 1.6085708615259863e-05, + "loss": 0.6281, + "step": 18114 + }, + { + "epoch": 0.31301838540226706, + "grad_norm": 1.2097891754554844, + "learning_rate": 1.6085264524087387e-05, + "loss": 0.6171, + "step": 18115 + }, + { + "epoch": 0.31303566491567597, + "grad_norm": 1.0062307449585102, + "learning_rate": 1.60848204138554e-05, + "loss": 0.5645, + "step": 18116 + }, + { + "epoch": 0.3130529444290849, + "grad_norm": 0.7354304649952892, + "learning_rate": 1.6084376284565295e-05, + "loss": 0.3641, + "step": 18117 + }, + { + "epoch": 0.3130702239424938, + "grad_norm": 0.6588119589034328, + "learning_rate": 1.6083932136218457e-05, + "loss": 0.4156, + "step": 18118 + }, + { + "epoch": 0.3130875034559027, + "grad_norm": 0.6257272733171367, + "learning_rate": 1.6083487968816285e-05, + "loss": 0.4085, + "step": 18119 + }, + { + "epoch": 0.3131047829693116, + "grad_norm": 0.48348719331925766, + "learning_rate": 1.6083043782360165e-05, + "loss": 0.7614, + "step": 18120 + }, + { + "epoch": 0.3131220624827205, + "grad_norm": 0.7256761077914552, + "learning_rate": 1.608259957685149e-05, + "loss": 0.4692, + "step": 18121 + }, + { + "epoch": 0.3131393419961294, + "grad_norm": 0.8258886146759835, + "learning_rate": 1.608215535229165e-05, + "loss": 0.4086, + "step": 18122 + }, + { + "epoch": 0.31315662150953827, + "grad_norm": 0.9348314510326524, + "learning_rate": 1.6081711108682037e-05, + "loss": 0.6503, + "step": 18123 + }, + { + "epoch": 0.3131739010229472, + "grad_norm": 0.3983478105340604, + "learning_rate": 1.608126684602404e-05, + "loss": 0.6423, + "step": 18124 + }, + { + "epoch": 0.3131911805363561, + "grad_norm": 0.6979692623682227, + "learning_rate": 1.6080822564319056e-05, + "loss": 0.5047, + "step": 18125 + }, + { + "epoch": 0.313208460049765, + "grad_norm": 1.3295978701192288, + "learning_rate": 1.6080378263568473e-05, + "loss": 0.4521, + "step": 18126 + }, + { + "epoch": 0.3132257395631739, + "grad_norm": 1.348630308693543, + "learning_rate": 1.6079933943773687e-05, + "loss": 0.609, + "step": 18127 + }, + { + "epoch": 0.3132430190765828, + "grad_norm": 0.762446215817846, + "learning_rate": 1.607948960493608e-05, + "loss": 0.5551, + "step": 18128 + }, + { + "epoch": 0.3132602985899917, + "grad_norm": 0.8941352385845767, + "learning_rate": 1.6079045247057054e-05, + "loss": 0.6576, + "step": 18129 + }, + { + "epoch": 0.3132775781034006, + "grad_norm": 0.9832958364354041, + "learning_rate": 1.6078600870137996e-05, + "loss": 0.5763, + "step": 18130 + }, + { + "epoch": 0.31329485761680953, + "grad_norm": 0.4778028682907217, + "learning_rate": 1.6078156474180297e-05, + "loss": 0.6552, + "step": 18131 + }, + { + "epoch": 0.31331213713021844, + "grad_norm": 1.25735051492554, + "learning_rate": 1.607771205918535e-05, + "loss": 0.5397, + "step": 18132 + }, + { + "epoch": 0.3133294166436273, + "grad_norm": 0.7205154652233825, + "learning_rate": 1.6077267625154546e-05, + "loss": 0.6371, + "step": 18133 + }, + { + "epoch": 0.3133466961570362, + "grad_norm": 1.2512982977183447, + "learning_rate": 1.6076823172089276e-05, + "loss": 0.6626, + "step": 18134 + }, + { + "epoch": 0.3133639756704451, + "grad_norm": 2.23477208223869, + "learning_rate": 1.607637869999094e-05, + "loss": 0.8073, + "step": 18135 + }, + { + "epoch": 0.313381255183854, + "grad_norm": 0.7204941057702374, + "learning_rate": 1.607593420886092e-05, + "loss": 0.3475, + "step": 18136 + }, + { + "epoch": 0.3133985346972629, + "grad_norm": 1.111608604731477, + "learning_rate": 1.6075489698700614e-05, + "loss": 0.4023, + "step": 18137 + }, + { + "epoch": 0.31341581421067183, + "grad_norm": 1.0927985576947516, + "learning_rate": 1.607504516951141e-05, + "loss": 0.4778, + "step": 18138 + }, + { + "epoch": 0.31343309372408074, + "grad_norm": 0.828807418178851, + "learning_rate": 1.6074600621294703e-05, + "loss": 0.4667, + "step": 18139 + }, + { + "epoch": 0.31345037323748964, + "grad_norm": 0.9188636763683613, + "learning_rate": 1.607415605405189e-05, + "loss": 0.4765, + "step": 18140 + }, + { + "epoch": 0.31346765275089855, + "grad_norm": 0.7788622820840935, + "learning_rate": 1.6073711467784352e-05, + "loss": 0.4277, + "step": 18141 + }, + { + "epoch": 0.31348493226430746, + "grad_norm": 0.6478896392360716, + "learning_rate": 1.6073266862493492e-05, + "loss": 0.3768, + "step": 18142 + }, + { + "epoch": 0.31350221177771637, + "grad_norm": 1.1790152640285254, + "learning_rate": 1.6072822238180698e-05, + "loss": 0.7406, + "step": 18143 + }, + { + "epoch": 0.3135194912911252, + "grad_norm": 1.0472887980046328, + "learning_rate": 1.6072377594847362e-05, + "loss": 0.4339, + "step": 18144 + }, + { + "epoch": 0.3135367708045341, + "grad_norm": 0.8599515490703863, + "learning_rate": 1.607193293249488e-05, + "loss": 0.6795, + "step": 18145 + }, + { + "epoch": 0.31355405031794303, + "grad_norm": 0.9726628414549184, + "learning_rate": 1.607148825112464e-05, + "loss": 0.6687, + "step": 18146 + }, + { + "epoch": 0.31357132983135194, + "grad_norm": 0.986877766072759, + "learning_rate": 1.607104355073804e-05, + "loss": 0.4601, + "step": 18147 + }, + { + "epoch": 0.31358860934476085, + "grad_norm": 1.2508645768213682, + "learning_rate": 1.607059883133647e-05, + "loss": 0.3876, + "step": 18148 + }, + { + "epoch": 0.31360588885816976, + "grad_norm": 0.7326062677087769, + "learning_rate": 1.6070154092921325e-05, + "loss": 0.2623, + "step": 18149 + }, + { + "epoch": 0.31362316837157866, + "grad_norm": 0.9218824090957589, + "learning_rate": 1.606970933549399e-05, + "loss": 0.3535, + "step": 18150 + }, + { + "epoch": 0.31364044788498757, + "grad_norm": 0.7479154443102207, + "learning_rate": 1.6069264559055867e-05, + "loss": 0.5128, + "step": 18151 + }, + { + "epoch": 0.3136577273983965, + "grad_norm": 1.1636711579047783, + "learning_rate": 1.6068819763608347e-05, + "loss": 0.5405, + "step": 18152 + }, + { + "epoch": 0.3136750069118054, + "grad_norm": 1.1681451563634906, + "learning_rate": 1.6068374949152824e-05, + "loss": 0.7435, + "step": 18153 + }, + { + "epoch": 0.31369228642521424, + "grad_norm": 0.8022287094802112, + "learning_rate": 1.6067930115690685e-05, + "loss": 0.402, + "step": 18154 + }, + { + "epoch": 0.31370956593862315, + "grad_norm": 1.3870290739255813, + "learning_rate": 1.606748526322333e-05, + "loss": 0.5304, + "step": 18155 + }, + { + "epoch": 0.31372684545203205, + "grad_norm": 0.7533695074490717, + "learning_rate": 1.606704039175215e-05, + "loss": 0.5902, + "step": 18156 + }, + { + "epoch": 0.31374412496544096, + "grad_norm": 0.8694570839923048, + "learning_rate": 1.606659550127854e-05, + "loss": 0.5284, + "step": 18157 + }, + { + "epoch": 0.31376140447884987, + "grad_norm": 1.3026675813830422, + "learning_rate": 1.606615059180389e-05, + "loss": 0.6277, + "step": 18158 + }, + { + "epoch": 0.3137786839922588, + "grad_norm": 1.2937566774851799, + "learning_rate": 1.60657056633296e-05, + "loss": 0.4472, + "step": 18159 + }, + { + "epoch": 0.3137959635056677, + "grad_norm": 0.667840837916599, + "learning_rate": 1.6065260715857052e-05, + "loss": 0.4708, + "step": 18160 + }, + { + "epoch": 0.3138132430190766, + "grad_norm": 1.4240888064257715, + "learning_rate": 1.6064815749387647e-05, + "loss": 0.7401, + "step": 18161 + }, + { + "epoch": 0.3138305225324855, + "grad_norm": 1.5780166133091178, + "learning_rate": 1.6064370763922782e-05, + "loss": 0.4056, + "step": 18162 + }, + { + "epoch": 0.3138478020458944, + "grad_norm": 0.4081218106446281, + "learning_rate": 1.6063925759463844e-05, + "loss": 0.7729, + "step": 18163 + }, + { + "epoch": 0.3138650815593033, + "grad_norm": 0.8840366981361739, + "learning_rate": 1.6063480736012232e-05, + "loss": 0.3993, + "step": 18164 + }, + { + "epoch": 0.31388236107271217, + "grad_norm": 1.1960522208360727, + "learning_rate": 1.6063035693569335e-05, + "loss": 0.4234, + "step": 18165 + }, + { + "epoch": 0.3138996405861211, + "grad_norm": 0.7411543645623989, + "learning_rate": 1.606259063213655e-05, + "loss": 0.346, + "step": 18166 + }, + { + "epoch": 0.31391692009953, + "grad_norm": 0.6180261932617727, + "learning_rate": 1.6062145551715272e-05, + "loss": 0.3518, + "step": 18167 + }, + { + "epoch": 0.3139341996129389, + "grad_norm": 1.1502371242606926, + "learning_rate": 1.6061700452306896e-05, + "loss": 0.5218, + "step": 18168 + }, + { + "epoch": 0.3139514791263478, + "grad_norm": 0.7240979706752052, + "learning_rate": 1.6061255333912807e-05, + "loss": 0.469, + "step": 18169 + }, + { + "epoch": 0.3139687586397567, + "grad_norm": 1.107403925256729, + "learning_rate": 1.606081019653441e-05, + "loss": 0.3732, + "step": 18170 + }, + { + "epoch": 0.3139860381531656, + "grad_norm": 0.6805739405728912, + "learning_rate": 1.6060365040173093e-05, + "loss": 0.2262, + "step": 18171 + }, + { + "epoch": 0.3140033176665745, + "grad_norm": 0.9862429806198915, + "learning_rate": 1.6059919864830252e-05, + "loss": 0.5846, + "step": 18172 + }, + { + "epoch": 0.31402059717998343, + "grad_norm": 0.7949999804957338, + "learning_rate": 1.6059474670507283e-05, + "loss": 0.4245, + "step": 18173 + }, + { + "epoch": 0.31403787669339234, + "grad_norm": 0.480133233106044, + "learning_rate": 1.605902945720558e-05, + "loss": 0.6355, + "step": 18174 + }, + { + "epoch": 0.3140551562068012, + "grad_norm": 0.5092203294117215, + "learning_rate": 1.605858422492653e-05, + "loss": 0.4546, + "step": 18175 + }, + { + "epoch": 0.3140724357202101, + "grad_norm": 1.7442979407295542, + "learning_rate": 1.605813897367154e-05, + "loss": 0.6584, + "step": 18176 + }, + { + "epoch": 0.314089715233619, + "grad_norm": 0.9241707763043733, + "learning_rate": 1.6057693703441995e-05, + "loss": 0.53, + "step": 18177 + }, + { + "epoch": 0.3141069947470279, + "grad_norm": 0.44103430922289644, + "learning_rate": 1.60572484142393e-05, + "loss": 0.6541, + "step": 18178 + }, + { + "epoch": 0.3141242742604368, + "grad_norm": 0.8139866075614093, + "learning_rate": 1.6056803106064832e-05, + "loss": 0.601, + "step": 18179 + }, + { + "epoch": 0.3141415537738457, + "grad_norm": 0.7930930081169574, + "learning_rate": 1.6056357778920006e-05, + "loss": 0.501, + "step": 18180 + }, + { + "epoch": 0.31415883328725464, + "grad_norm": 0.9257660110976365, + "learning_rate": 1.60559124328062e-05, + "loss": 0.4336, + "step": 18181 + }, + { + "epoch": 0.31417611280066354, + "grad_norm": 1.3883217855610843, + "learning_rate": 1.605546706772482e-05, + "loss": 0.6181, + "step": 18182 + }, + { + "epoch": 0.31419339231407245, + "grad_norm": 1.2547908196260615, + "learning_rate": 1.6055021683677255e-05, + "loss": 0.5364, + "step": 18183 + }, + { + "epoch": 0.31421067182748136, + "grad_norm": 0.8847957807066745, + "learning_rate": 1.60545762806649e-05, + "loss": 0.5402, + "step": 18184 + }, + { + "epoch": 0.31422795134089027, + "grad_norm": 0.7585967348888853, + "learning_rate": 1.6054130858689155e-05, + "loss": 0.501, + "step": 18185 + }, + { + "epoch": 0.3142452308542991, + "grad_norm": 0.7755182158471032, + "learning_rate": 1.6053685417751412e-05, + "loss": 0.4995, + "step": 18186 + }, + { + "epoch": 0.314262510367708, + "grad_norm": 1.2279349762489458, + "learning_rate": 1.6053239957853067e-05, + "loss": 0.3572, + "step": 18187 + }, + { + "epoch": 0.31427978988111693, + "grad_norm": 0.99173305695541, + "learning_rate": 1.6052794478995512e-05, + "loss": 0.4403, + "step": 18188 + }, + { + "epoch": 0.31429706939452584, + "grad_norm": 1.0422180452180558, + "learning_rate": 1.6052348981180147e-05, + "loss": 0.5924, + "step": 18189 + }, + { + "epoch": 0.31431434890793475, + "grad_norm": 1.081033785849585, + "learning_rate": 1.6051903464408364e-05, + "loss": 0.3704, + "step": 18190 + }, + { + "epoch": 0.31433162842134366, + "grad_norm": 0.7697877679363172, + "learning_rate": 1.605145792868156e-05, + "loss": 0.5335, + "step": 18191 + }, + { + "epoch": 0.31434890793475256, + "grad_norm": 0.8797422298874134, + "learning_rate": 1.605101237400113e-05, + "loss": 0.4687, + "step": 18192 + }, + { + "epoch": 0.31436618744816147, + "grad_norm": 0.6339484586865739, + "learning_rate": 1.6050566800368468e-05, + "loss": 0.2939, + "step": 18193 + }, + { + "epoch": 0.3143834669615704, + "grad_norm": 0.9508970794596889, + "learning_rate": 1.605012120778497e-05, + "loss": 0.5119, + "step": 18194 + }, + { + "epoch": 0.3144007464749793, + "grad_norm": 1.0530482319197556, + "learning_rate": 1.6049675596252036e-05, + "loss": 0.4878, + "step": 18195 + }, + { + "epoch": 0.3144180259883882, + "grad_norm": 0.42323959986118526, + "learning_rate": 1.6049229965771054e-05, + "loss": 0.62, + "step": 18196 + }, + { + "epoch": 0.31443530550179705, + "grad_norm": 0.6841252108143684, + "learning_rate": 1.6048784316343425e-05, + "loss": 0.3364, + "step": 18197 + }, + { + "epoch": 0.31445258501520595, + "grad_norm": 0.7590530855141713, + "learning_rate": 1.6048338647970546e-05, + "loss": 0.4105, + "step": 18198 + }, + { + "epoch": 0.31446986452861486, + "grad_norm": 0.9663634047159946, + "learning_rate": 1.604789296065381e-05, + "loss": 0.5533, + "step": 18199 + }, + { + "epoch": 0.31448714404202377, + "grad_norm": 0.822506998168344, + "learning_rate": 1.6047447254394616e-05, + "loss": 0.5326, + "step": 18200 + }, + { + "epoch": 0.3145044235554327, + "grad_norm": 1.3986162402187772, + "learning_rate": 1.6047001529194354e-05, + "loss": 0.394, + "step": 18201 + }, + { + "epoch": 0.3145217030688416, + "grad_norm": 0.7656371802073645, + "learning_rate": 1.6046555785054425e-05, + "loss": 0.4107, + "step": 18202 + }, + { + "epoch": 0.3145389825822505, + "grad_norm": 0.7687960634422489, + "learning_rate": 1.6046110021976225e-05, + "loss": 0.3609, + "step": 18203 + }, + { + "epoch": 0.3145562620956594, + "grad_norm": 0.5255944205510079, + "learning_rate": 1.6045664239961143e-05, + "loss": 0.3316, + "step": 18204 + }, + { + "epoch": 0.3145735416090683, + "grad_norm": 1.1653945627436357, + "learning_rate": 1.604521843901059e-05, + "loss": 0.3603, + "step": 18205 + }, + { + "epoch": 0.3145908211224772, + "grad_norm": 0.7185458634856856, + "learning_rate": 1.6044772619125945e-05, + "loss": 0.5151, + "step": 18206 + }, + { + "epoch": 0.31460810063588607, + "grad_norm": 0.9262255771058578, + "learning_rate": 1.6044326780308617e-05, + "loss": 0.6132, + "step": 18207 + }, + { + "epoch": 0.314625380149295, + "grad_norm": 0.9091274325486561, + "learning_rate": 1.604388092256e-05, + "loss": 0.6622, + "step": 18208 + }, + { + "epoch": 0.3146426596627039, + "grad_norm": 0.7790896264675413, + "learning_rate": 1.6043435045881486e-05, + "loss": 0.2876, + "step": 18209 + }, + { + "epoch": 0.3146599391761128, + "grad_norm": 0.9033664807780941, + "learning_rate": 1.604298915027448e-05, + "loss": 0.5107, + "step": 18210 + }, + { + "epoch": 0.3146772186895217, + "grad_norm": 1.085508683779767, + "learning_rate": 1.604254323574036e-05, + "loss": 0.4887, + "step": 18211 + }, + { + "epoch": 0.3146944982029306, + "grad_norm": 1.1205521153847473, + "learning_rate": 1.6042097302280545e-05, + "loss": 0.6066, + "step": 18212 + }, + { + "epoch": 0.3147117777163395, + "grad_norm": 1.8847928189952738, + "learning_rate": 1.604165134989642e-05, + "loss": 0.3639, + "step": 18213 + }, + { + "epoch": 0.3147290572297484, + "grad_norm": 0.763813090573216, + "learning_rate": 1.6041205378589384e-05, + "loss": 0.5396, + "step": 18214 + }, + { + "epoch": 0.31474633674315733, + "grad_norm": 0.7157624639527984, + "learning_rate": 1.6040759388360834e-05, + "loss": 0.3867, + "step": 18215 + }, + { + "epoch": 0.31476361625656624, + "grad_norm": 1.0389318704946864, + "learning_rate": 1.6040313379212163e-05, + "loss": 0.4757, + "step": 18216 + }, + { + "epoch": 0.31478089576997514, + "grad_norm": 1.5556083170802728, + "learning_rate": 1.6039867351144778e-05, + "loss": 0.48, + "step": 18217 + }, + { + "epoch": 0.314798175283384, + "grad_norm": 0.42223428123658985, + "learning_rate": 1.6039421304160067e-05, + "loss": 0.7104, + "step": 18218 + }, + { + "epoch": 0.3148154547967929, + "grad_norm": 0.9251440677474473, + "learning_rate": 1.6038975238259426e-05, + "loss": 0.3215, + "step": 18219 + }, + { + "epoch": 0.3148327343102018, + "grad_norm": 1.6618628610101673, + "learning_rate": 1.603852915344426e-05, + "loss": 0.6012, + "step": 18220 + }, + { + "epoch": 0.3148500138236107, + "grad_norm": 1.102188706047671, + "learning_rate": 1.6038083049715957e-05, + "loss": 0.6598, + "step": 18221 + }, + { + "epoch": 0.3148672933370196, + "grad_norm": 0.7212114802145333, + "learning_rate": 1.6037636927075923e-05, + "loss": 0.5281, + "step": 18222 + }, + { + "epoch": 0.31488457285042853, + "grad_norm": 1.042009026890193, + "learning_rate": 1.603719078552555e-05, + "loss": 0.4563, + "step": 18223 + }, + { + "epoch": 0.31490185236383744, + "grad_norm": 1.7993279869592944, + "learning_rate": 1.6036744625066238e-05, + "loss": 0.4947, + "step": 18224 + }, + { + "epoch": 0.31491913187724635, + "grad_norm": 1.6230581593560076, + "learning_rate": 1.603629844569938e-05, + "loss": 0.4664, + "step": 18225 + }, + { + "epoch": 0.31493641139065526, + "grad_norm": 1.900439672039645, + "learning_rate": 1.6035852247426376e-05, + "loss": 0.6068, + "step": 18226 + }, + { + "epoch": 0.31495369090406417, + "grad_norm": 0.9286379719533148, + "learning_rate": 1.603540603024863e-05, + "loss": 0.5593, + "step": 18227 + }, + { + "epoch": 0.314970970417473, + "grad_norm": 1.1404139729286407, + "learning_rate": 1.603495979416753e-05, + "loss": 0.3584, + "step": 18228 + }, + { + "epoch": 0.3149882499308819, + "grad_norm": 0.931345977016377, + "learning_rate": 1.6034513539184477e-05, + "loss": 0.4555, + "step": 18229 + }, + { + "epoch": 0.31500552944429083, + "grad_norm": 0.6531592341040895, + "learning_rate": 1.603406726530087e-05, + "loss": 0.9351, + "step": 18230 + }, + { + "epoch": 0.31502280895769974, + "grad_norm": 1.1757748161762058, + "learning_rate": 1.6033620972518103e-05, + "loss": 0.4429, + "step": 18231 + }, + { + "epoch": 0.31504008847110865, + "grad_norm": 0.7437908307685923, + "learning_rate": 1.6033174660837582e-05, + "loss": 0.5224, + "step": 18232 + }, + { + "epoch": 0.31505736798451756, + "grad_norm": 1.2691607864549541, + "learning_rate": 1.603272833026069e-05, + "loss": 0.6013, + "step": 18233 + }, + { + "epoch": 0.31507464749792646, + "grad_norm": 1.246047532268928, + "learning_rate": 1.6032281980788842e-05, + "loss": 0.4035, + "step": 18234 + }, + { + "epoch": 0.31509192701133537, + "grad_norm": 0.7950284065419498, + "learning_rate": 1.603183561242343e-05, + "loss": 0.4522, + "step": 18235 + }, + { + "epoch": 0.3151092065247443, + "grad_norm": 1.0022527222320845, + "learning_rate": 1.6031389225165847e-05, + "loss": 0.5574, + "step": 18236 + }, + { + "epoch": 0.3151264860381532, + "grad_norm": 0.5305245062349294, + "learning_rate": 1.6030942819017493e-05, + "loss": 0.6512, + "step": 18237 + }, + { + "epoch": 0.3151437655515621, + "grad_norm": 0.9538361543975811, + "learning_rate": 1.6030496393979768e-05, + "loss": 0.4275, + "step": 18238 + }, + { + "epoch": 0.31516104506497095, + "grad_norm": 1.2751912637739826, + "learning_rate": 1.6030049950054073e-05, + "loss": 0.2943, + "step": 18239 + }, + { + "epoch": 0.31517832457837985, + "grad_norm": 0.628624476439443, + "learning_rate": 1.6029603487241803e-05, + "loss": 0.3345, + "step": 18240 + }, + { + "epoch": 0.31519560409178876, + "grad_norm": 1.1427222291500359, + "learning_rate": 1.6029157005544354e-05, + "loss": 0.4021, + "step": 18241 + }, + { + "epoch": 0.31521288360519767, + "grad_norm": 1.3477670758643725, + "learning_rate": 1.602871050496313e-05, + "loss": 0.4021, + "step": 18242 + }, + { + "epoch": 0.3152301631186066, + "grad_norm": 1.5021710327629767, + "learning_rate": 1.6028263985499526e-05, + "loss": 0.6271, + "step": 18243 + }, + { + "epoch": 0.3152474426320155, + "grad_norm": 1.2487626813861208, + "learning_rate": 1.602781744715494e-05, + "loss": 0.5615, + "step": 18244 + }, + { + "epoch": 0.3152647221454244, + "grad_norm": 0.8899394438356373, + "learning_rate": 1.602737088993077e-05, + "loss": 0.4707, + "step": 18245 + }, + { + "epoch": 0.3152820016588333, + "grad_norm": 0.9961126856970784, + "learning_rate": 1.6026924313828417e-05, + "loss": 0.5274, + "step": 18246 + }, + { + "epoch": 0.3152992811722422, + "grad_norm": 0.7532032464747037, + "learning_rate": 1.6026477718849283e-05, + "loss": 0.4998, + "step": 18247 + }, + { + "epoch": 0.3153165606856511, + "grad_norm": 0.8620618578971484, + "learning_rate": 1.602603110499476e-05, + "loss": 0.6549, + "step": 18248 + }, + { + "epoch": 0.31533384019905997, + "grad_norm": 0.8918742036125389, + "learning_rate": 1.602558447226625e-05, + "loss": 0.6471, + "step": 18249 + }, + { + "epoch": 0.3153511197124689, + "grad_norm": 1.3807186239270943, + "learning_rate": 1.602513782066515e-05, + "loss": 0.4665, + "step": 18250 + }, + { + "epoch": 0.3153683992258778, + "grad_norm": 1.0076912649697332, + "learning_rate": 1.602469115019286e-05, + "loss": 0.4897, + "step": 18251 + }, + { + "epoch": 0.3153856787392867, + "grad_norm": 0.6571877942991255, + "learning_rate": 1.6024244460850784e-05, + "loss": 0.4101, + "step": 18252 + }, + { + "epoch": 0.3154029582526956, + "grad_norm": 1.0587837597871173, + "learning_rate": 1.6023797752640313e-05, + "loss": 0.5896, + "step": 18253 + }, + { + "epoch": 0.3154202377661045, + "grad_norm": 0.8775317617463163, + "learning_rate": 1.602335102556285e-05, + "loss": 0.7779, + "step": 18254 + }, + { + "epoch": 0.3154375172795134, + "grad_norm": 0.8591494164482244, + "learning_rate": 1.6022904279619792e-05, + "loss": 0.4909, + "step": 18255 + }, + { + "epoch": 0.3154547967929223, + "grad_norm": 1.1279865268084512, + "learning_rate": 1.602245751481254e-05, + "loss": 0.375, + "step": 18256 + }, + { + "epoch": 0.31547207630633123, + "grad_norm": 1.587213471968141, + "learning_rate": 1.60220107311425e-05, + "loss": 0.5898, + "step": 18257 + }, + { + "epoch": 0.31548935581974014, + "grad_norm": 1.0353481146402435, + "learning_rate": 1.6021563928611058e-05, + "loss": 0.5376, + "step": 18258 + }, + { + "epoch": 0.31550663533314904, + "grad_norm": 0.7287686809890295, + "learning_rate": 1.602111710721962e-05, + "loss": 0.3706, + "step": 18259 + }, + { + "epoch": 0.3155239148465579, + "grad_norm": 0.7985366340312186, + "learning_rate": 1.6020670266969588e-05, + "loss": 0.4662, + "step": 18260 + }, + { + "epoch": 0.3155411943599668, + "grad_norm": 0.4456523381040884, + "learning_rate": 1.6020223407862362e-05, + "loss": 0.8468, + "step": 18261 + }, + { + "epoch": 0.3155584738733757, + "grad_norm": 0.901214562743724, + "learning_rate": 1.6019776529899336e-05, + "loss": 0.6685, + "step": 18262 + }, + { + "epoch": 0.3155757533867846, + "grad_norm": 1.147725607792426, + "learning_rate": 1.6019329633081912e-05, + "loss": 0.323, + "step": 18263 + }, + { + "epoch": 0.3155930329001935, + "grad_norm": 1.174236364758104, + "learning_rate": 1.601888271741149e-05, + "loss": 0.4283, + "step": 18264 + }, + { + "epoch": 0.31561031241360243, + "grad_norm": 0.996485289075517, + "learning_rate": 1.601843578288947e-05, + "loss": 0.4023, + "step": 18265 + }, + { + "epoch": 0.31562759192701134, + "grad_norm": 1.2056583218710741, + "learning_rate": 1.6017988829517253e-05, + "loss": 0.3533, + "step": 18266 + }, + { + "epoch": 0.31564487144042025, + "grad_norm": 0.7220720436173755, + "learning_rate": 1.6017541857296235e-05, + "loss": 0.529, + "step": 18267 + }, + { + "epoch": 0.31566215095382916, + "grad_norm": 0.7568827069741285, + "learning_rate": 1.601709486622782e-05, + "loss": 0.4941, + "step": 18268 + }, + { + "epoch": 0.31567943046723806, + "grad_norm": 0.7430290309130454, + "learning_rate": 1.601664785631341e-05, + "loss": 0.4671, + "step": 18269 + }, + { + "epoch": 0.315696709980647, + "grad_norm": 0.5909670592682745, + "learning_rate": 1.6016200827554396e-05, + "loss": 0.4879, + "step": 18270 + }, + { + "epoch": 0.3157139894940558, + "grad_norm": 1.0420258928699044, + "learning_rate": 1.6015753779952188e-05, + "loss": 0.3392, + "step": 18271 + }, + { + "epoch": 0.31573126900746473, + "grad_norm": 0.9710530939108125, + "learning_rate": 1.601530671350818e-05, + "loss": 0.6099, + "step": 18272 + }, + { + "epoch": 0.31574854852087364, + "grad_norm": 0.9157496218011754, + "learning_rate": 1.6014859628223774e-05, + "loss": 0.5816, + "step": 18273 + }, + { + "epoch": 0.31576582803428255, + "grad_norm": 0.9507976299367521, + "learning_rate": 1.6014412524100372e-05, + "loss": 0.4383, + "step": 18274 + }, + { + "epoch": 0.31578310754769146, + "grad_norm": 1.8770295061072755, + "learning_rate": 1.601396540113937e-05, + "loss": 0.5414, + "step": 18275 + }, + { + "epoch": 0.31580038706110036, + "grad_norm": 0.9927545441171031, + "learning_rate": 1.6013518259342174e-05, + "loss": 0.6292, + "step": 18276 + }, + { + "epoch": 0.31581766657450927, + "grad_norm": 1.3329599837791284, + "learning_rate": 1.6013071098710182e-05, + "loss": 0.5386, + "step": 18277 + }, + { + "epoch": 0.3158349460879182, + "grad_norm": 1.727397076090137, + "learning_rate": 1.6012623919244796e-05, + "loss": 0.4245, + "step": 18278 + }, + { + "epoch": 0.3158522256013271, + "grad_norm": 1.4066602909198958, + "learning_rate": 1.601217672094741e-05, + "loss": 0.5361, + "step": 18279 + }, + { + "epoch": 0.315869505114736, + "grad_norm": 0.948436203977254, + "learning_rate": 1.601172950381943e-05, + "loss": 0.4327, + "step": 18280 + }, + { + "epoch": 0.31588678462814485, + "grad_norm": 0.760903762492966, + "learning_rate": 1.601128226786226e-05, + "loss": 0.6977, + "step": 18281 + }, + { + "epoch": 0.31590406414155375, + "grad_norm": 0.8218781362977657, + "learning_rate": 1.6010835013077296e-05, + "loss": 0.4343, + "step": 18282 + }, + { + "epoch": 0.31592134365496266, + "grad_norm": 0.6341771750946201, + "learning_rate": 1.601038773946594e-05, + "loss": 0.5344, + "step": 18283 + }, + { + "epoch": 0.31593862316837157, + "grad_norm": 1.040150870845263, + "learning_rate": 1.6009940447029593e-05, + "loss": 0.4243, + "step": 18284 + }, + { + "epoch": 0.3159559026817805, + "grad_norm": 0.8098718073858778, + "learning_rate": 1.6009493135769655e-05, + "loss": 0.4896, + "step": 18285 + }, + { + "epoch": 0.3159731821951894, + "grad_norm": 0.7123702528652738, + "learning_rate": 1.6009045805687526e-05, + "loss": 0.5081, + "step": 18286 + }, + { + "epoch": 0.3159904617085983, + "grad_norm": 1.2513067736255639, + "learning_rate": 1.600859845678461e-05, + "loss": 0.4789, + "step": 18287 + }, + { + "epoch": 0.3160077412220072, + "grad_norm": 1.3480510293187802, + "learning_rate": 1.6008151089062305e-05, + "loss": 0.7027, + "step": 18288 + }, + { + "epoch": 0.3160250207354161, + "grad_norm": 0.7961190811935764, + "learning_rate": 1.600770370252202e-05, + "loss": 0.479, + "step": 18289 + }, + { + "epoch": 0.316042300248825, + "grad_norm": 1.4498475389926306, + "learning_rate": 1.600725629716515e-05, + "loss": 0.5222, + "step": 18290 + }, + { + "epoch": 0.3160595797622339, + "grad_norm": 0.9081421214037844, + "learning_rate": 1.600680887299309e-05, + "loss": 0.2839, + "step": 18291 + }, + { + "epoch": 0.3160768592756428, + "grad_norm": 1.0096761127020866, + "learning_rate": 1.6006361430007253e-05, + "loss": 0.6427, + "step": 18292 + }, + { + "epoch": 0.3160941387890517, + "grad_norm": 1.3881061281501892, + "learning_rate": 1.6005913968209034e-05, + "loss": 0.6203, + "step": 18293 + }, + { + "epoch": 0.3161114183024606, + "grad_norm": 0.9547542252501308, + "learning_rate": 1.6005466487599837e-05, + "loss": 0.6216, + "step": 18294 + }, + { + "epoch": 0.3161286978158695, + "grad_norm": 1.3696142962441702, + "learning_rate": 1.6005018988181063e-05, + "loss": 0.5373, + "step": 18295 + }, + { + "epoch": 0.3161459773292784, + "grad_norm": 0.905489868628869, + "learning_rate": 1.600457146995411e-05, + "loss": 0.468, + "step": 18296 + }, + { + "epoch": 0.3161632568426873, + "grad_norm": 0.8669090174905, + "learning_rate": 1.600412393292039e-05, + "loss": 0.3488, + "step": 18297 + }, + { + "epoch": 0.3161805363560962, + "grad_norm": 0.6728063558782161, + "learning_rate": 1.600367637708129e-05, + "loss": 0.5972, + "step": 18298 + }, + { + "epoch": 0.3161978158695051, + "grad_norm": 0.7607349958003253, + "learning_rate": 1.600322880243822e-05, + "loss": 0.4184, + "step": 18299 + }, + { + "epoch": 0.31621509538291404, + "grad_norm": 1.0508677052290858, + "learning_rate": 1.6002781208992586e-05, + "loss": 0.3467, + "step": 18300 + }, + { + "epoch": 0.31623237489632294, + "grad_norm": 0.7971979044179163, + "learning_rate": 1.600233359674578e-05, + "loss": 0.5186, + "step": 18301 + }, + { + "epoch": 0.3162496544097318, + "grad_norm": 0.5581711074983281, + "learning_rate": 1.6001885965699213e-05, + "loss": 0.4736, + "step": 18302 + }, + { + "epoch": 0.3162669339231407, + "grad_norm": 1.1275111596817744, + "learning_rate": 1.6001438315854283e-05, + "loss": 0.5951, + "step": 18303 + }, + { + "epoch": 0.3162842134365496, + "grad_norm": 0.9401010781963913, + "learning_rate": 1.600099064721239e-05, + "loss": 0.5005, + "step": 18304 + }, + { + "epoch": 0.3163014929499585, + "grad_norm": 0.8715539775604093, + "learning_rate": 1.600054295977494e-05, + "loss": 0.5638, + "step": 18305 + }, + { + "epoch": 0.3163187724633674, + "grad_norm": 1.3867019328122467, + "learning_rate": 1.600009525354333e-05, + "loss": 0.5277, + "step": 18306 + }, + { + "epoch": 0.31633605197677633, + "grad_norm": 1.1735230804830554, + "learning_rate": 1.5999647528518967e-05, + "loss": 0.5069, + "step": 18307 + }, + { + "epoch": 0.31635333149018524, + "grad_norm": 1.0869490271073836, + "learning_rate": 1.5999199784703253e-05, + "loss": 0.487, + "step": 18308 + }, + { + "epoch": 0.31637061100359415, + "grad_norm": 1.0730944040962582, + "learning_rate": 1.5998752022097587e-05, + "loss": 0.4314, + "step": 18309 + }, + { + "epoch": 0.31638789051700306, + "grad_norm": 0.8871881606602313, + "learning_rate": 1.599830424070338e-05, + "loss": 0.647, + "step": 18310 + }, + { + "epoch": 0.31640517003041196, + "grad_norm": 0.9725955157973797, + "learning_rate": 1.5997856440522023e-05, + "loss": 0.335, + "step": 18311 + }, + { + "epoch": 0.31642244954382087, + "grad_norm": 0.5242223772647594, + "learning_rate": 1.5997408621554927e-05, + "loss": 0.637, + "step": 18312 + }, + { + "epoch": 0.3164397290572297, + "grad_norm": 0.919390334725225, + "learning_rate": 1.599696078380349e-05, + "loss": 0.3657, + "step": 18313 + }, + { + "epoch": 0.31645700857063863, + "grad_norm": 1.4179573899194415, + "learning_rate": 1.5996512927269113e-05, + "loss": 0.5623, + "step": 18314 + }, + { + "epoch": 0.31647428808404754, + "grad_norm": 1.4176755012850584, + "learning_rate": 1.5996065051953205e-05, + "loss": 0.4467, + "step": 18315 + }, + { + "epoch": 0.31649156759745645, + "grad_norm": 0.6598652549022678, + "learning_rate": 1.5995617157857163e-05, + "loss": 0.359, + "step": 18316 + }, + { + "epoch": 0.31650884711086535, + "grad_norm": 0.5596098889465995, + "learning_rate": 1.5995169244982393e-05, + "loss": 0.509, + "step": 18317 + }, + { + "epoch": 0.31652612662427426, + "grad_norm": 0.37153074428381205, + "learning_rate": 1.59947213133303e-05, + "loss": 0.5598, + "step": 18318 + }, + { + "epoch": 0.31654340613768317, + "grad_norm": 1.0701062933133407, + "learning_rate": 1.599427336290228e-05, + "loss": 0.4895, + "step": 18319 + }, + { + "epoch": 0.3165606856510921, + "grad_norm": 1.6117464507486465, + "learning_rate": 1.5993825393699746e-05, + "loss": 0.5593, + "step": 18320 + }, + { + "epoch": 0.316577965164501, + "grad_norm": 1.1108751659763425, + "learning_rate": 1.599337740572409e-05, + "loss": 0.3906, + "step": 18321 + }, + { + "epoch": 0.3165952446779099, + "grad_norm": 1.3872046528833333, + "learning_rate": 1.5992929398976723e-05, + "loss": 0.674, + "step": 18322 + }, + { + "epoch": 0.31661252419131874, + "grad_norm": 1.4760166690387375, + "learning_rate": 1.5992481373459047e-05, + "loss": 0.6459, + "step": 18323 + }, + { + "epoch": 0.31662980370472765, + "grad_norm": 0.945782659875184, + "learning_rate": 1.5992033329172466e-05, + "loss": 0.5565, + "step": 18324 + }, + { + "epoch": 0.31664708321813656, + "grad_norm": 1.2277689132090583, + "learning_rate": 1.5991585266118375e-05, + "loss": 0.482, + "step": 18325 + }, + { + "epoch": 0.31666436273154547, + "grad_norm": 1.3742015535726855, + "learning_rate": 1.5991137184298186e-05, + "loss": 0.5994, + "step": 18326 + }, + { + "epoch": 0.3166816422449544, + "grad_norm": 0.9562819534644101, + "learning_rate": 1.5990689083713302e-05, + "loss": 0.388, + "step": 18327 + }, + { + "epoch": 0.3166989217583633, + "grad_norm": 1.8336313405996763, + "learning_rate": 1.599024096436512e-05, + "loss": 0.5811, + "step": 18328 + }, + { + "epoch": 0.3167162012717722, + "grad_norm": 0.4151765680121013, + "learning_rate": 1.598979282625505e-05, + "loss": 0.4815, + "step": 18329 + }, + { + "epoch": 0.3167334807851811, + "grad_norm": 0.8877832991287021, + "learning_rate": 1.5989344669384497e-05, + "loss": 0.4942, + "step": 18330 + }, + { + "epoch": 0.31675076029859, + "grad_norm": 1.1867866410775234, + "learning_rate": 1.598889649375486e-05, + "loss": 0.5554, + "step": 18331 + }, + { + "epoch": 0.3167680398119989, + "grad_norm": 1.1121418093105018, + "learning_rate": 1.598844829936754e-05, + "loss": 0.4156, + "step": 18332 + }, + { + "epoch": 0.3167853193254078, + "grad_norm": 0.9177595181624654, + "learning_rate": 1.5988000086223953e-05, + "loss": 0.4489, + "step": 18333 + }, + { + "epoch": 0.3168025988388167, + "grad_norm": 0.5698203433140495, + "learning_rate": 1.5987551854325486e-05, + "loss": 0.5881, + "step": 18334 + }, + { + "epoch": 0.3168198783522256, + "grad_norm": 1.258774703320697, + "learning_rate": 1.5987103603673557e-05, + "loss": 0.4663, + "step": 18335 + }, + { + "epoch": 0.3168371578656345, + "grad_norm": 0.6843241556094455, + "learning_rate": 1.598665533426956e-05, + "loss": 0.4026, + "step": 18336 + }, + { + "epoch": 0.3168544373790434, + "grad_norm": 1.0229946133381986, + "learning_rate": 1.598620704611491e-05, + "loss": 0.6433, + "step": 18337 + }, + { + "epoch": 0.3168717168924523, + "grad_norm": 1.065015994288638, + "learning_rate": 1.5985758739211e-05, + "loss": 0.5607, + "step": 18338 + }, + { + "epoch": 0.3168889964058612, + "grad_norm": 0.786001514358412, + "learning_rate": 1.5985310413559237e-05, + "loss": 0.4637, + "step": 18339 + }, + { + "epoch": 0.3169062759192701, + "grad_norm": 0.6715294388690671, + "learning_rate": 1.598486206916103e-05, + "loss": 0.4025, + "step": 18340 + }, + { + "epoch": 0.316923555432679, + "grad_norm": 0.6806370279104927, + "learning_rate": 1.598441370601778e-05, + "loss": 0.5153, + "step": 18341 + }, + { + "epoch": 0.31694083494608793, + "grad_norm": 1.5871885320179142, + "learning_rate": 1.5983965324130892e-05, + "loss": 0.3123, + "step": 18342 + }, + { + "epoch": 0.31695811445949684, + "grad_norm": 0.7965907784992963, + "learning_rate": 1.5983516923501768e-05, + "loss": 0.4491, + "step": 18343 + }, + { + "epoch": 0.31697539397290575, + "grad_norm": 0.7230545242638572, + "learning_rate": 1.5983068504131813e-05, + "loss": 0.3332, + "step": 18344 + }, + { + "epoch": 0.3169926734863146, + "grad_norm": 0.8772420023941386, + "learning_rate": 1.5982620066022437e-05, + "loss": 0.5054, + "step": 18345 + }, + { + "epoch": 0.3170099529997235, + "grad_norm": 0.9086858540902217, + "learning_rate": 1.598217160917504e-05, + "loss": 0.3359, + "step": 18346 + }, + { + "epoch": 0.3170272325131324, + "grad_norm": 0.7430721456552617, + "learning_rate": 1.5981723133591024e-05, + "loss": 0.5703, + "step": 18347 + }, + { + "epoch": 0.3170445120265413, + "grad_norm": 0.7677768645002779, + "learning_rate": 1.5981274639271796e-05, + "loss": 0.4555, + "step": 18348 + }, + { + "epoch": 0.31706179153995023, + "grad_norm": 0.8770763525524159, + "learning_rate": 1.5980826126218764e-05, + "loss": 0.3956, + "step": 18349 + }, + { + "epoch": 0.31707907105335914, + "grad_norm": 1.2299551123248558, + "learning_rate": 1.5980377594433324e-05, + "loss": 0.5567, + "step": 18350 + }, + { + "epoch": 0.31709635056676805, + "grad_norm": 0.838323264547013, + "learning_rate": 1.597992904391689e-05, + "loss": 0.3311, + "step": 18351 + }, + { + "epoch": 0.31711363008017696, + "grad_norm": 1.206043469584223, + "learning_rate": 1.597948047467087e-05, + "loss": 0.4726, + "step": 18352 + }, + { + "epoch": 0.31713090959358586, + "grad_norm": 1.2448542183416225, + "learning_rate": 1.597903188669666e-05, + "loss": 0.4472, + "step": 18353 + }, + { + "epoch": 0.31714818910699477, + "grad_norm": 1.0040107007462997, + "learning_rate": 1.597858327999566e-05, + "loss": 0.525, + "step": 18354 + }, + { + "epoch": 0.3171654686204036, + "grad_norm": 0.7695067695819144, + "learning_rate": 1.5978134654569296e-05, + "loss": 0.4236, + "step": 18355 + }, + { + "epoch": 0.31718274813381253, + "grad_norm": 0.7704188826666627, + "learning_rate": 1.597768601041895e-05, + "loss": 0.5478, + "step": 18356 + }, + { + "epoch": 0.31720002764722144, + "grad_norm": 1.3146403065867447, + "learning_rate": 1.597723734754604e-05, + "loss": 0.3617, + "step": 18357 + }, + { + "epoch": 0.31721730716063035, + "grad_norm": 0.4733402263296577, + "learning_rate": 1.5976788665951967e-05, + "loss": 0.6756, + "step": 18358 + }, + { + "epoch": 0.31723458667403925, + "grad_norm": 1.3404661681125045, + "learning_rate": 1.5976339965638138e-05, + "loss": 0.6937, + "step": 18359 + }, + { + "epoch": 0.31725186618744816, + "grad_norm": 1.1190474707356746, + "learning_rate": 1.5975891246605958e-05, + "loss": 0.5654, + "step": 18360 + }, + { + "epoch": 0.31726914570085707, + "grad_norm": 0.738670181255221, + "learning_rate": 1.5975442508856837e-05, + "loss": 0.3278, + "step": 18361 + }, + { + "epoch": 0.317286425214266, + "grad_norm": 0.6886382620326443, + "learning_rate": 1.597499375239217e-05, + "loss": 0.5077, + "step": 18362 + }, + { + "epoch": 0.3173037047276749, + "grad_norm": 0.7186038139142882, + "learning_rate": 1.597454497721337e-05, + "loss": 0.5625, + "step": 18363 + }, + { + "epoch": 0.3173209842410838, + "grad_norm": 0.7444953537181838, + "learning_rate": 1.5974096183321846e-05, + "loss": 0.5136, + "step": 18364 + }, + { + "epoch": 0.3173382637544927, + "grad_norm": 0.9917932500772666, + "learning_rate": 1.5973647370718993e-05, + "loss": 0.5014, + "step": 18365 + }, + { + "epoch": 0.31735554326790155, + "grad_norm": 1.5012737645020262, + "learning_rate": 1.5973198539406227e-05, + "loss": 0.4208, + "step": 18366 + }, + { + "epoch": 0.31737282278131046, + "grad_norm": 0.9584933224229957, + "learning_rate": 1.5972749689384945e-05, + "loss": 0.5557, + "step": 18367 + }, + { + "epoch": 0.31739010229471937, + "grad_norm": 1.387905847106702, + "learning_rate": 1.5972300820656558e-05, + "loss": 0.5094, + "step": 18368 + }, + { + "epoch": 0.3174073818081283, + "grad_norm": 1.0278943720205005, + "learning_rate": 1.5971851933222468e-05, + "loss": 0.4668, + "step": 18369 + }, + { + "epoch": 0.3174246613215372, + "grad_norm": 0.9398259510295487, + "learning_rate": 1.5971403027084088e-05, + "loss": 0.6904, + "step": 18370 + }, + { + "epoch": 0.3174419408349461, + "grad_norm": 1.8205341956688144, + "learning_rate": 1.597095410224282e-05, + "loss": 0.6849, + "step": 18371 + }, + { + "epoch": 0.317459220348355, + "grad_norm": 1.2470253908548028, + "learning_rate": 1.597050515870007e-05, + "loss": 0.5725, + "step": 18372 + }, + { + "epoch": 0.3174764998617639, + "grad_norm": 1.1132338228594723, + "learning_rate": 1.597005619645724e-05, + "loss": 0.5694, + "step": 18373 + }, + { + "epoch": 0.3174937793751728, + "grad_norm": 0.9089964287913667, + "learning_rate": 1.596960721551575e-05, + "loss": 0.5347, + "step": 18374 + }, + { + "epoch": 0.3175110588885817, + "grad_norm": 0.7427923709790503, + "learning_rate": 1.5969158215876987e-05, + "loss": 0.4144, + "step": 18375 + }, + { + "epoch": 0.3175283384019906, + "grad_norm": 1.1968193752288814, + "learning_rate": 1.596870919754237e-05, + "loss": 0.5337, + "step": 18376 + }, + { + "epoch": 0.3175456179153995, + "grad_norm": 0.7257680535293198, + "learning_rate": 1.5968260160513304e-05, + "loss": 0.4613, + "step": 18377 + }, + { + "epoch": 0.3175628974288084, + "grad_norm": 0.7085935132185762, + "learning_rate": 1.596781110479119e-05, + "loss": 0.3017, + "step": 18378 + }, + { + "epoch": 0.3175801769422173, + "grad_norm": 0.7695106588618909, + "learning_rate": 1.596736203037744e-05, + "loss": 0.3354, + "step": 18379 + }, + { + "epoch": 0.3175974564556262, + "grad_norm": 1.2405069442371741, + "learning_rate": 1.5966912937273456e-05, + "loss": 0.4282, + "step": 18380 + }, + { + "epoch": 0.3176147359690351, + "grad_norm": 1.058068028084635, + "learning_rate": 1.596646382548065e-05, + "loss": 0.3207, + "step": 18381 + }, + { + "epoch": 0.317632015482444, + "grad_norm": 0.9498325405418782, + "learning_rate": 1.5966014695000427e-05, + "loss": 0.5578, + "step": 18382 + }, + { + "epoch": 0.3176492949958529, + "grad_norm": 1.6280752588893217, + "learning_rate": 1.596556554583419e-05, + "loss": 0.5612, + "step": 18383 + }, + { + "epoch": 0.31766657450926183, + "grad_norm": 1.1040669266737186, + "learning_rate": 1.5965116377983348e-05, + "loss": 0.4045, + "step": 18384 + }, + { + "epoch": 0.31768385402267074, + "grad_norm": 1.6556620783308948, + "learning_rate": 1.5964667191449314e-05, + "loss": 0.6057, + "step": 18385 + }, + { + "epoch": 0.31770113353607965, + "grad_norm": 1.1721338717174559, + "learning_rate": 1.5964217986233483e-05, + "loss": 0.7125, + "step": 18386 + }, + { + "epoch": 0.3177184130494885, + "grad_norm": 0.9693871043036981, + "learning_rate": 1.596376876233727e-05, + "loss": 0.5212, + "step": 18387 + }, + { + "epoch": 0.3177356925628974, + "grad_norm": 0.9873906120050194, + "learning_rate": 1.596331951976208e-05, + "loss": 0.4256, + "step": 18388 + }, + { + "epoch": 0.3177529720763063, + "grad_norm": 0.8727318384976399, + "learning_rate": 1.5962870258509316e-05, + "loss": 0.7031, + "step": 18389 + }, + { + "epoch": 0.3177702515897152, + "grad_norm": 1.4426800188205968, + "learning_rate": 1.5962420978580392e-05, + "loss": 0.6368, + "step": 18390 + }, + { + "epoch": 0.31778753110312413, + "grad_norm": 0.9959913069923059, + "learning_rate": 1.5961971679976716e-05, + "loss": 0.7469, + "step": 18391 + }, + { + "epoch": 0.31780481061653304, + "grad_norm": 1.0418134224125677, + "learning_rate": 1.5961522362699687e-05, + "loss": 0.3181, + "step": 18392 + }, + { + "epoch": 0.31782209012994195, + "grad_norm": 1.3977002314196834, + "learning_rate": 1.5961073026750718e-05, + "loss": 0.5153, + "step": 18393 + }, + { + "epoch": 0.31783936964335086, + "grad_norm": 1.3792191542080157, + "learning_rate": 1.596062367213122e-05, + "loss": 0.572, + "step": 18394 + }, + { + "epoch": 0.31785664915675976, + "grad_norm": 0.821236162496169, + "learning_rate": 1.596017429884259e-05, + "loss": 0.3625, + "step": 18395 + }, + { + "epoch": 0.31787392867016867, + "grad_norm": 1.1720452537502104, + "learning_rate": 1.595972490688624e-05, + "loss": 0.4019, + "step": 18396 + }, + { + "epoch": 0.3178912081835775, + "grad_norm": 0.725558287097334, + "learning_rate": 1.5959275496263583e-05, + "loss": 0.4381, + "step": 18397 + }, + { + "epoch": 0.31790848769698643, + "grad_norm": 1.5709963222894352, + "learning_rate": 1.595882606697602e-05, + "loss": 0.5648, + "step": 18398 + }, + { + "epoch": 0.31792576721039534, + "grad_norm": 1.266440880298971, + "learning_rate": 1.5958376619024963e-05, + "loss": 0.4833, + "step": 18399 + }, + { + "epoch": 0.31794304672380425, + "grad_norm": 1.2269106671268426, + "learning_rate": 1.5957927152411818e-05, + "loss": 0.4225, + "step": 18400 + }, + { + "epoch": 0.31796032623721315, + "grad_norm": 0.6918896927365383, + "learning_rate": 1.595747766713799e-05, + "loss": 0.5397, + "step": 18401 + }, + { + "epoch": 0.31797760575062206, + "grad_norm": 1.0478315475959272, + "learning_rate": 1.595702816320489e-05, + "loss": 0.4498, + "step": 18402 + }, + { + "epoch": 0.31799488526403097, + "grad_norm": 0.7128531817047832, + "learning_rate": 1.5956578640613924e-05, + "loss": 0.4733, + "step": 18403 + }, + { + "epoch": 0.3180121647774399, + "grad_norm": 0.8400239502633816, + "learning_rate": 1.5956129099366503e-05, + "loss": 0.4798, + "step": 18404 + }, + { + "epoch": 0.3180294442908488, + "grad_norm": 1.2269004814250661, + "learning_rate": 1.5955679539464032e-05, + "loss": 0.6209, + "step": 18405 + }, + { + "epoch": 0.3180467238042577, + "grad_norm": 0.880732306132638, + "learning_rate": 1.595522996090792e-05, + "loss": 0.555, + "step": 18406 + }, + { + "epoch": 0.3180640033176666, + "grad_norm": 0.8578082312947742, + "learning_rate": 1.5954780363699573e-05, + "loss": 0.4882, + "step": 18407 + }, + { + "epoch": 0.31808128283107545, + "grad_norm": 1.0102156602353354, + "learning_rate": 1.5954330747840405e-05, + "loss": 0.3981, + "step": 18408 + }, + { + "epoch": 0.31809856234448436, + "grad_norm": 0.8042704916363043, + "learning_rate": 1.5953881113331817e-05, + "loss": 0.4914, + "step": 18409 + }, + { + "epoch": 0.31811584185789327, + "grad_norm": 0.8653900698804728, + "learning_rate": 1.5953431460175223e-05, + "loss": 0.4808, + "step": 18410 + }, + { + "epoch": 0.3181331213713022, + "grad_norm": 0.9320624382186524, + "learning_rate": 1.5952981788372032e-05, + "loss": 0.492, + "step": 18411 + }, + { + "epoch": 0.3181504008847111, + "grad_norm": 0.9210644329427602, + "learning_rate": 1.5952532097923644e-05, + "loss": 0.5014, + "step": 18412 + }, + { + "epoch": 0.31816768039812, + "grad_norm": 1.0633730206090064, + "learning_rate": 1.5952082388831477e-05, + "loss": 0.363, + "step": 18413 + }, + { + "epoch": 0.3181849599115289, + "grad_norm": 1.01635852070252, + "learning_rate": 1.5951632661096932e-05, + "loss": 0.5389, + "step": 18414 + }, + { + "epoch": 0.3182022394249378, + "grad_norm": 0.571628326727797, + "learning_rate": 1.595118291472143e-05, + "loss": 0.5758, + "step": 18415 + }, + { + "epoch": 0.3182195189383467, + "grad_norm": 0.5583461689284657, + "learning_rate": 1.595073314970636e-05, + "loss": 0.2764, + "step": 18416 + }, + { + "epoch": 0.3182367984517556, + "grad_norm": 0.9054972117869993, + "learning_rate": 1.5950283366053148e-05, + "loss": 0.7547, + "step": 18417 + }, + { + "epoch": 0.31825407796516453, + "grad_norm": 1.717049183311608, + "learning_rate": 1.5949833563763193e-05, + "loss": 0.5224, + "step": 18418 + }, + { + "epoch": 0.3182713574785734, + "grad_norm": 2.6535468799525215, + "learning_rate": 1.5949383742837907e-05, + "loss": 0.5077, + "step": 18419 + }, + { + "epoch": 0.3182886369919823, + "grad_norm": 1.144802400299295, + "learning_rate": 1.59489339032787e-05, + "loss": 0.4919, + "step": 18420 + }, + { + "epoch": 0.3183059165053912, + "grad_norm": 1.0158285145089125, + "learning_rate": 1.5948484045086982e-05, + "loss": 0.5613, + "step": 18421 + }, + { + "epoch": 0.3183231960188001, + "grad_norm": 1.1911995743086996, + "learning_rate": 1.594803416826416e-05, + "loss": 0.4157, + "step": 18422 + }, + { + "epoch": 0.318340475532209, + "grad_norm": 1.2079264574751378, + "learning_rate": 1.594758427281164e-05, + "loss": 0.4406, + "step": 18423 + }, + { + "epoch": 0.3183577550456179, + "grad_norm": 1.0137589845152526, + "learning_rate": 1.5947134358730835e-05, + "loss": 0.4596, + "step": 18424 + }, + { + "epoch": 0.3183750345590268, + "grad_norm": 0.7581901177568321, + "learning_rate": 1.5946684426023153e-05, + "loss": 0.4837, + "step": 18425 + }, + { + "epoch": 0.31839231407243573, + "grad_norm": 1.406143743211544, + "learning_rate": 1.5946234474690006e-05, + "loss": 0.7492, + "step": 18426 + }, + { + "epoch": 0.31840959358584464, + "grad_norm": 0.6595035930135431, + "learning_rate": 1.5945784504732797e-05, + "loss": 0.5897, + "step": 18427 + }, + { + "epoch": 0.31842687309925355, + "grad_norm": 0.9177741869148972, + "learning_rate": 1.594533451615294e-05, + "loss": 0.4355, + "step": 18428 + }, + { + "epoch": 0.3184441526126624, + "grad_norm": 1.1997589234901778, + "learning_rate": 1.5944884508951845e-05, + "loss": 0.4495, + "step": 18429 + }, + { + "epoch": 0.3184614321260713, + "grad_norm": 1.2384753988236448, + "learning_rate": 1.594443448313092e-05, + "loss": 0.4003, + "step": 18430 + }, + { + "epoch": 0.3184787116394802, + "grad_norm": 0.891813235871613, + "learning_rate": 1.5943984438691572e-05, + "loss": 0.3945, + "step": 18431 + }, + { + "epoch": 0.3184959911528891, + "grad_norm": 0.9108648408372558, + "learning_rate": 1.5943534375635216e-05, + "loss": 0.6051, + "step": 18432 + }, + { + "epoch": 0.31851327066629803, + "grad_norm": 1.4515433747916067, + "learning_rate": 1.5943084293963255e-05, + "loss": 0.4911, + "step": 18433 + }, + { + "epoch": 0.31853055017970694, + "grad_norm": 1.211780671494904, + "learning_rate": 1.5942634193677107e-05, + "loss": 0.6499, + "step": 18434 + }, + { + "epoch": 0.31854782969311585, + "grad_norm": 1.4116093936632634, + "learning_rate": 1.5942184074778177e-05, + "loss": 0.3973, + "step": 18435 + }, + { + "epoch": 0.31856510920652475, + "grad_norm": 1.1876594347098006, + "learning_rate": 1.594173393726787e-05, + "loss": 0.3828, + "step": 18436 + }, + { + "epoch": 0.31858238871993366, + "grad_norm": 0.8987672723992338, + "learning_rate": 1.5941283781147604e-05, + "loss": 0.6056, + "step": 18437 + }, + { + "epoch": 0.31859966823334257, + "grad_norm": 0.6631309599955726, + "learning_rate": 1.5940833606418787e-05, + "loss": 0.3965, + "step": 18438 + }, + { + "epoch": 0.3186169477467515, + "grad_norm": 0.9176451304030464, + "learning_rate": 1.5940383413082824e-05, + "loss": 0.5249, + "step": 18439 + }, + { + "epoch": 0.31863422726016033, + "grad_norm": 0.8242759755423025, + "learning_rate": 1.593993320114113e-05, + "loss": 0.3469, + "step": 18440 + }, + { + "epoch": 0.31865150677356924, + "grad_norm": 0.8471139662704621, + "learning_rate": 1.5939482970595115e-05, + "loss": 0.5832, + "step": 18441 + }, + { + "epoch": 0.31866878628697815, + "grad_norm": 0.9494792817654555, + "learning_rate": 1.5939032721446186e-05, + "loss": 0.3765, + "step": 18442 + }, + { + "epoch": 0.31868606580038705, + "grad_norm": 1.1940195919452743, + "learning_rate": 1.5938582453695754e-05, + "loss": 0.6725, + "step": 18443 + }, + { + "epoch": 0.31870334531379596, + "grad_norm": 1.3177039655854919, + "learning_rate": 1.5938132167345233e-05, + "loss": 0.4783, + "step": 18444 + }, + { + "epoch": 0.31872062482720487, + "grad_norm": 0.40349947929871516, + "learning_rate": 1.593768186239603e-05, + "loss": 0.5038, + "step": 18445 + }, + { + "epoch": 0.3187379043406138, + "grad_norm": 0.9481431165097982, + "learning_rate": 1.593723153884956e-05, + "loss": 0.3588, + "step": 18446 + }, + { + "epoch": 0.3187551838540227, + "grad_norm": 1.3695456323548698, + "learning_rate": 1.5936781196707225e-05, + "loss": 0.5352, + "step": 18447 + }, + { + "epoch": 0.3187724633674316, + "grad_norm": 0.7818894922787853, + "learning_rate": 1.593633083597044e-05, + "loss": 0.4918, + "step": 18448 + }, + { + "epoch": 0.3187897428808405, + "grad_norm": 1.8256456813015913, + "learning_rate": 1.5935880456640618e-05, + "loss": 0.4148, + "step": 18449 + }, + { + "epoch": 0.31880702239424935, + "grad_norm": 0.699995746408933, + "learning_rate": 1.5935430058719165e-05, + "loss": 0.4224, + "step": 18450 + }, + { + "epoch": 0.31882430190765826, + "grad_norm": 1.482246127768185, + "learning_rate": 1.5934979642207497e-05, + "loss": 0.5135, + "step": 18451 + }, + { + "epoch": 0.31884158142106717, + "grad_norm": 1.1400803430623356, + "learning_rate": 1.593452920710702e-05, + "loss": 0.5981, + "step": 18452 + }, + { + "epoch": 0.3188588609344761, + "grad_norm": 1.9442361672118529, + "learning_rate": 1.5934078753419147e-05, + "loss": 0.5104, + "step": 18453 + }, + { + "epoch": 0.318876140447885, + "grad_norm": 0.9971486626135078, + "learning_rate": 1.5933628281145285e-05, + "loss": 0.3461, + "step": 18454 + }, + { + "epoch": 0.3188934199612939, + "grad_norm": 0.9266344667547531, + "learning_rate": 1.593317779028685e-05, + "loss": 0.4694, + "step": 18455 + }, + { + "epoch": 0.3189106994747028, + "grad_norm": 0.9909126489341692, + "learning_rate": 1.5932727280845254e-05, + "loss": 0.3873, + "step": 18456 + }, + { + "epoch": 0.3189279789881117, + "grad_norm": 1.3547595449050476, + "learning_rate": 1.5932276752821905e-05, + "loss": 0.4694, + "step": 18457 + }, + { + "epoch": 0.3189452585015206, + "grad_norm": 1.4187760231847824, + "learning_rate": 1.5931826206218214e-05, + "loss": 0.5043, + "step": 18458 + }, + { + "epoch": 0.3189625380149295, + "grad_norm": 1.443247526667254, + "learning_rate": 1.593137564103559e-05, + "loss": 0.7724, + "step": 18459 + }, + { + "epoch": 0.3189798175283384, + "grad_norm": 0.9443444133796561, + "learning_rate": 1.5930925057275448e-05, + "loss": 0.6783, + "step": 18460 + }, + { + "epoch": 0.3189970970417473, + "grad_norm": 2.0518405869812035, + "learning_rate": 1.59304744549392e-05, + "loss": 0.5095, + "step": 18461 + }, + { + "epoch": 0.3190143765551562, + "grad_norm": 1.0766500768228828, + "learning_rate": 1.5930023834028255e-05, + "loss": 0.4375, + "step": 18462 + }, + { + "epoch": 0.3190316560685651, + "grad_norm": 0.8702906320538304, + "learning_rate": 1.592957319454402e-05, + "loss": 0.5464, + "step": 18463 + }, + { + "epoch": 0.319048935581974, + "grad_norm": 1.1242578177685998, + "learning_rate": 1.5929122536487914e-05, + "loss": 0.4986, + "step": 18464 + }, + { + "epoch": 0.3190662150953829, + "grad_norm": 0.8487392525762691, + "learning_rate": 1.5928671859861345e-05, + "loss": 0.4358, + "step": 18465 + }, + { + "epoch": 0.3190834946087918, + "grad_norm": 1.0609053385719793, + "learning_rate": 1.5928221164665728e-05, + "loss": 0.3177, + "step": 18466 + }, + { + "epoch": 0.3191007741222007, + "grad_norm": 0.9727771428241399, + "learning_rate": 1.592777045090247e-05, + "loss": 0.4343, + "step": 18467 + }, + { + "epoch": 0.31911805363560963, + "grad_norm": 1.0789650587004211, + "learning_rate": 1.5927319718572985e-05, + "loss": 0.5353, + "step": 18468 + }, + { + "epoch": 0.31913533314901854, + "grad_norm": 1.1866574054160983, + "learning_rate": 1.5926868967678687e-05, + "loss": 0.4151, + "step": 18469 + }, + { + "epoch": 0.31915261266242745, + "grad_norm": 0.6691749368110863, + "learning_rate": 1.592641819822098e-05, + "loss": 0.6823, + "step": 18470 + }, + { + "epoch": 0.31916989217583636, + "grad_norm": 1.044512101499023, + "learning_rate": 1.592596741020128e-05, + "loss": 0.4138, + "step": 18471 + }, + { + "epoch": 0.3191871716892452, + "grad_norm": 1.3411993648145335, + "learning_rate": 1.5925516603621003e-05, + "loss": 0.6725, + "step": 18472 + }, + { + "epoch": 0.3192044512026541, + "grad_norm": 1.010426794919708, + "learning_rate": 1.5925065778481556e-05, + "loss": 0.4059, + "step": 18473 + }, + { + "epoch": 0.319221730716063, + "grad_norm": 1.1840323588703348, + "learning_rate": 1.5924614934784356e-05, + "loss": 0.4415, + "step": 18474 + }, + { + "epoch": 0.31923901022947193, + "grad_norm": 1.0054302856673718, + "learning_rate": 1.592416407253081e-05, + "loss": 0.4968, + "step": 18475 + }, + { + "epoch": 0.31925628974288084, + "grad_norm": 0.8796896503043271, + "learning_rate": 1.5923713191722327e-05, + "loss": 0.3796, + "step": 18476 + }, + { + "epoch": 0.31927356925628975, + "grad_norm": 0.8607159305054322, + "learning_rate": 1.592326229236033e-05, + "loss": 0.4033, + "step": 18477 + }, + { + "epoch": 0.31929084876969865, + "grad_norm": 1.252050429066383, + "learning_rate": 1.5922811374446225e-05, + "loss": 0.3599, + "step": 18478 + }, + { + "epoch": 0.31930812828310756, + "grad_norm": 1.155885013009303, + "learning_rate": 1.5922360437981424e-05, + "loss": 0.5192, + "step": 18479 + }, + { + "epoch": 0.31932540779651647, + "grad_norm": 1.5451257820112398, + "learning_rate": 1.5921909482967336e-05, + "loss": 0.5483, + "step": 18480 + }, + { + "epoch": 0.3193426873099254, + "grad_norm": 0.6136835509569342, + "learning_rate": 1.5921458509405382e-05, + "loss": 0.8288, + "step": 18481 + }, + { + "epoch": 0.31935996682333423, + "grad_norm": 1.511578810094618, + "learning_rate": 1.5921007517296968e-05, + "loss": 0.4102, + "step": 18482 + }, + { + "epoch": 0.31937724633674314, + "grad_norm": 1.366533530788906, + "learning_rate": 1.592055650664351e-05, + "loss": 0.5292, + "step": 18483 + }, + { + "epoch": 0.31939452585015204, + "grad_norm": 0.9438814976941554, + "learning_rate": 1.5920105477446418e-05, + "loss": 0.4771, + "step": 18484 + }, + { + "epoch": 0.31941180536356095, + "grad_norm": 1.1527119749388957, + "learning_rate": 1.5919654429707106e-05, + "loss": 0.4545, + "step": 18485 + }, + { + "epoch": 0.31942908487696986, + "grad_norm": 1.4789424417294152, + "learning_rate": 1.5919203363426987e-05, + "loss": 0.4065, + "step": 18486 + }, + { + "epoch": 0.31944636439037877, + "grad_norm": 1.1975342486261746, + "learning_rate": 1.5918752278607472e-05, + "loss": 0.5232, + "step": 18487 + }, + { + "epoch": 0.3194636439037877, + "grad_norm": 1.1228162514536535, + "learning_rate": 1.5918301175249977e-05, + "loss": 0.4739, + "step": 18488 + }, + { + "epoch": 0.3194809234171966, + "grad_norm": 1.5257671664842911, + "learning_rate": 1.5917850053355912e-05, + "loss": 0.4804, + "step": 18489 + }, + { + "epoch": 0.3194982029306055, + "grad_norm": 0.9208460462345134, + "learning_rate": 1.591739891292669e-05, + "loss": 0.587, + "step": 18490 + }, + { + "epoch": 0.3195154824440144, + "grad_norm": 0.44069287717668415, + "learning_rate": 1.5916947753963724e-05, + "loss": 0.6127, + "step": 18491 + }, + { + "epoch": 0.3195327619574233, + "grad_norm": 1.1859023961193562, + "learning_rate": 1.5916496576468433e-05, + "loss": 0.6271, + "step": 18492 + }, + { + "epoch": 0.31955004147083216, + "grad_norm": 0.7784191021040315, + "learning_rate": 1.591604538044222e-05, + "loss": 0.7403, + "step": 18493 + }, + { + "epoch": 0.31956732098424107, + "grad_norm": 1.0275431535707198, + "learning_rate": 1.5915594165886504e-05, + "loss": 0.5056, + "step": 18494 + }, + { + "epoch": 0.31958460049765, + "grad_norm": 1.1526069944623343, + "learning_rate": 1.5915142932802695e-05, + "loss": 0.6958, + "step": 18495 + }, + { + "epoch": 0.3196018800110589, + "grad_norm": 0.4615753901478301, + "learning_rate": 1.5914691681192215e-05, + "loss": 0.6312, + "step": 18496 + }, + { + "epoch": 0.3196191595244678, + "grad_norm": 1.80765720208255, + "learning_rate": 1.591424041105647e-05, + "loss": 0.4694, + "step": 18497 + }, + { + "epoch": 0.3196364390378767, + "grad_norm": 1.5274752729914638, + "learning_rate": 1.591378912239687e-05, + "loss": 0.5331, + "step": 18498 + }, + { + "epoch": 0.3196537185512856, + "grad_norm": 1.791614349411475, + "learning_rate": 1.591333781521484e-05, + "loss": 0.6267, + "step": 18499 + }, + { + "epoch": 0.3196709980646945, + "grad_norm": 1.85983453839525, + "learning_rate": 1.591288648951178e-05, + "loss": 0.5804, + "step": 18500 + }, + { + "epoch": 0.3196882775781034, + "grad_norm": 1.3107234634654334, + "learning_rate": 1.591243514528911e-05, + "loss": 0.4225, + "step": 18501 + }, + { + "epoch": 0.3197055570915123, + "grad_norm": 0.7534159484926278, + "learning_rate": 1.5911983782548247e-05, + "loss": 0.4945, + "step": 18502 + }, + { + "epoch": 0.3197228366049212, + "grad_norm": 1.4496651382493735, + "learning_rate": 1.5911532401290603e-05, + "loss": 0.4281, + "step": 18503 + }, + { + "epoch": 0.3197401161183301, + "grad_norm": 0.9104380269619333, + "learning_rate": 1.5911081001517587e-05, + "loss": 0.4452, + "step": 18504 + }, + { + "epoch": 0.319757395631739, + "grad_norm": 1.1326367385153746, + "learning_rate": 1.5910629583230617e-05, + "loss": 0.6321, + "step": 18505 + }, + { + "epoch": 0.3197746751451479, + "grad_norm": 1.049068852002027, + "learning_rate": 1.5910178146431104e-05, + "loss": 0.6701, + "step": 18506 + }, + { + "epoch": 0.3197919546585568, + "grad_norm": 2.1066966396646514, + "learning_rate": 1.590972669112047e-05, + "loss": 0.4281, + "step": 18507 + }, + { + "epoch": 0.3198092341719657, + "grad_norm": 1.1039189736443247, + "learning_rate": 1.590927521730012e-05, + "loss": 0.4979, + "step": 18508 + }, + { + "epoch": 0.3198265136853746, + "grad_norm": 0.9635355455397768, + "learning_rate": 1.5908823724971466e-05, + "loss": 0.3284, + "step": 18509 + }, + { + "epoch": 0.31984379319878353, + "grad_norm": 1.2787182615865174, + "learning_rate": 1.590837221413593e-05, + "loss": 0.6044, + "step": 18510 + }, + { + "epoch": 0.31986107271219244, + "grad_norm": 1.5152144758692456, + "learning_rate": 1.5907920684794925e-05, + "loss": 0.4855, + "step": 18511 + }, + { + "epoch": 0.31987835222560135, + "grad_norm": 0.9479105129926709, + "learning_rate": 1.5907469136949864e-05, + "loss": 0.6004, + "step": 18512 + }, + { + "epoch": 0.31989563173901026, + "grad_norm": 0.9727147737888712, + "learning_rate": 1.5907017570602158e-05, + "loss": 0.398, + "step": 18513 + }, + { + "epoch": 0.3199129112524191, + "grad_norm": 0.8248074180595008, + "learning_rate": 1.5906565985753226e-05, + "loss": 0.4867, + "step": 18514 + }, + { + "epoch": 0.319930190765828, + "grad_norm": 1.2224070878993345, + "learning_rate": 1.590611438240448e-05, + "loss": 0.4153, + "step": 18515 + }, + { + "epoch": 0.3199474702792369, + "grad_norm": 0.7650792948606329, + "learning_rate": 1.5905662760557333e-05, + "loss": 0.3305, + "step": 18516 + }, + { + "epoch": 0.31996474979264583, + "grad_norm": 0.8220609398398776, + "learning_rate": 1.5905211120213204e-05, + "loss": 0.6785, + "step": 18517 + }, + { + "epoch": 0.31998202930605474, + "grad_norm": 1.2432250877260602, + "learning_rate": 1.5904759461373505e-05, + "loss": 0.3178, + "step": 18518 + }, + { + "epoch": 0.31999930881946365, + "grad_norm": 1.290441909978106, + "learning_rate": 1.590430778403965e-05, + "loss": 0.5697, + "step": 18519 + }, + { + "epoch": 0.32001658833287255, + "grad_norm": 1.137149607680307, + "learning_rate": 1.5903856088213055e-05, + "loss": 0.4828, + "step": 18520 + }, + { + "epoch": 0.32003386784628146, + "grad_norm": 0.9454784080766782, + "learning_rate": 1.590340437389513e-05, + "loss": 0.5102, + "step": 18521 + }, + { + "epoch": 0.32005114735969037, + "grad_norm": 0.778308171297146, + "learning_rate": 1.59029526410873e-05, + "loss": 0.4607, + "step": 18522 + }, + { + "epoch": 0.3200684268730993, + "grad_norm": 0.679128331459289, + "learning_rate": 1.590250088979097e-05, + "loss": 0.3875, + "step": 18523 + }, + { + "epoch": 0.32008570638650813, + "grad_norm": 0.6171034091944195, + "learning_rate": 1.590204912000756e-05, + "loss": 0.5001, + "step": 18524 + }, + { + "epoch": 0.32010298589991704, + "grad_norm": 1.3376405407673237, + "learning_rate": 1.590159733173848e-05, + "loss": 0.4656, + "step": 18525 + }, + { + "epoch": 0.32012026541332594, + "grad_norm": 1.163112489888366, + "learning_rate": 1.5901145524985155e-05, + "loss": 0.4659, + "step": 18526 + }, + { + "epoch": 0.32013754492673485, + "grad_norm": 0.846378905123399, + "learning_rate": 1.5900693699748987e-05, + "loss": 0.4791, + "step": 18527 + }, + { + "epoch": 0.32015482444014376, + "grad_norm": 0.8340815061511107, + "learning_rate": 1.59002418560314e-05, + "loss": 0.6478, + "step": 18528 + }, + { + "epoch": 0.32017210395355267, + "grad_norm": 0.9152893816388803, + "learning_rate": 1.589978999383381e-05, + "loss": 0.2931, + "step": 18529 + }, + { + "epoch": 0.3201893834669616, + "grad_norm": 1.292846160199408, + "learning_rate": 1.5899338113157628e-05, + "loss": 0.62, + "step": 18530 + }, + { + "epoch": 0.3202066629803705, + "grad_norm": 0.9534645214174299, + "learning_rate": 1.589888621400427e-05, + "loss": 0.6119, + "step": 18531 + }, + { + "epoch": 0.3202239424937794, + "grad_norm": 1.0555625605779704, + "learning_rate": 1.589843429637515e-05, + "loss": 0.5006, + "step": 18532 + }, + { + "epoch": 0.3202412220071883, + "grad_norm": 0.9365045570496023, + "learning_rate": 1.589798236027169e-05, + "loss": 0.3994, + "step": 18533 + }, + { + "epoch": 0.3202585015205972, + "grad_norm": 1.0088699639035192, + "learning_rate": 1.58975304056953e-05, + "loss": 0.4014, + "step": 18534 + }, + { + "epoch": 0.32027578103400606, + "grad_norm": 1.0733224126611873, + "learning_rate": 1.589707843264739e-05, + "loss": 0.4372, + "step": 18535 + }, + { + "epoch": 0.32029306054741497, + "grad_norm": 0.7987101658305383, + "learning_rate": 1.5896626441129393e-05, + "loss": 0.3955, + "step": 18536 + }, + { + "epoch": 0.3203103400608239, + "grad_norm": 0.7989150544061726, + "learning_rate": 1.589617443114271e-05, + "loss": 0.6112, + "step": 18537 + }, + { + "epoch": 0.3203276195742328, + "grad_norm": 0.6660379111455365, + "learning_rate": 1.5895722402688755e-05, + "loss": 0.3714, + "step": 18538 + }, + { + "epoch": 0.3203448990876417, + "grad_norm": 1.369871357361048, + "learning_rate": 1.5895270355768957e-05, + "loss": 0.4768, + "step": 18539 + }, + { + "epoch": 0.3203621786010506, + "grad_norm": 0.6322935187776252, + "learning_rate": 1.589481829038472e-05, + "loss": 0.4711, + "step": 18540 + }, + { + "epoch": 0.3203794581144595, + "grad_norm": 1.1585347441548726, + "learning_rate": 1.5894366206537463e-05, + "loss": 0.4989, + "step": 18541 + }, + { + "epoch": 0.3203967376278684, + "grad_norm": 0.8273928502872553, + "learning_rate": 1.5893914104228606e-05, + "loss": 0.4217, + "step": 18542 + }, + { + "epoch": 0.3204140171412773, + "grad_norm": 1.0988851659584002, + "learning_rate": 1.5893461983459563e-05, + "loss": 0.5886, + "step": 18543 + }, + { + "epoch": 0.3204312966546862, + "grad_norm": 1.1228340402684007, + "learning_rate": 1.5893009844231745e-05, + "loss": 0.5886, + "step": 18544 + }, + { + "epoch": 0.32044857616809513, + "grad_norm": 1.1392218985963969, + "learning_rate": 1.5892557686546574e-05, + "loss": 0.4893, + "step": 18545 + }, + { + "epoch": 0.320465855681504, + "grad_norm": 0.7351460455111013, + "learning_rate": 1.5892105510405466e-05, + "loss": 0.4049, + "step": 18546 + }, + { + "epoch": 0.3204831351949129, + "grad_norm": 1.302394412398452, + "learning_rate": 1.5891653315809835e-05, + "loss": 0.2968, + "step": 18547 + }, + { + "epoch": 0.3205004147083218, + "grad_norm": 1.432033587972339, + "learning_rate": 1.5891201102761096e-05, + "loss": 0.3674, + "step": 18548 + }, + { + "epoch": 0.3205176942217307, + "grad_norm": 0.6228059748505163, + "learning_rate": 1.589074887126067e-05, + "loss": 0.5346, + "step": 18549 + }, + { + "epoch": 0.3205349737351396, + "grad_norm": 1.4399730240487865, + "learning_rate": 1.589029662130997e-05, + "loss": 0.508, + "step": 18550 + }, + { + "epoch": 0.3205522532485485, + "grad_norm": 0.4067744125302086, + "learning_rate": 1.5889844352910415e-05, + "loss": 0.7232, + "step": 18551 + }, + { + "epoch": 0.32056953276195743, + "grad_norm": 1.0849354849478787, + "learning_rate": 1.588939206606342e-05, + "loss": 0.3935, + "step": 18552 + }, + { + "epoch": 0.32058681227536634, + "grad_norm": 1.2107519999165255, + "learning_rate": 1.58889397607704e-05, + "loss": 0.5171, + "step": 18553 + }, + { + "epoch": 0.32060409178877525, + "grad_norm": 0.8819825213445673, + "learning_rate": 1.588848743703277e-05, + "loss": 0.4198, + "step": 18554 + }, + { + "epoch": 0.32062137130218415, + "grad_norm": 1.0709853451806681, + "learning_rate": 1.5888035094851956e-05, + "loss": 0.453, + "step": 18555 + }, + { + "epoch": 0.320638650815593, + "grad_norm": 0.8378376785680623, + "learning_rate": 1.5887582734229363e-05, + "loss": 0.6601, + "step": 18556 + }, + { + "epoch": 0.3206559303290019, + "grad_norm": 1.9675294260008065, + "learning_rate": 1.5887130355166415e-05, + "loss": 0.7363, + "step": 18557 + }, + { + "epoch": 0.3206732098424108, + "grad_norm": 1.0833650274458417, + "learning_rate": 1.588667795766453e-05, + "loss": 0.5754, + "step": 18558 + }, + { + "epoch": 0.32069048935581973, + "grad_norm": 0.9981378315949544, + "learning_rate": 1.588622554172512e-05, + "loss": 0.4126, + "step": 18559 + }, + { + "epoch": 0.32070776886922864, + "grad_norm": 0.6891205108898489, + "learning_rate": 1.5885773107349604e-05, + "loss": 0.4877, + "step": 18560 + }, + { + "epoch": 0.32072504838263755, + "grad_norm": 0.5875082216826226, + "learning_rate": 1.58853206545394e-05, + "loss": 0.4569, + "step": 18561 + }, + { + "epoch": 0.32074232789604645, + "grad_norm": 1.3293284324008665, + "learning_rate": 1.5884868183295924e-05, + "loss": 0.6885, + "step": 18562 + }, + { + "epoch": 0.32075960740945536, + "grad_norm": 1.7874132565437708, + "learning_rate": 1.5884415693620594e-05, + "loss": 0.5661, + "step": 18563 + }, + { + "epoch": 0.32077688692286427, + "grad_norm": 0.7966949938679032, + "learning_rate": 1.588396318551483e-05, + "loss": 0.3374, + "step": 18564 + }, + { + "epoch": 0.3207941664362732, + "grad_norm": 0.9071438564341919, + "learning_rate": 1.588351065898004e-05, + "loss": 0.4012, + "step": 18565 + }, + { + "epoch": 0.3208114459496821, + "grad_norm": 1.0776529285224181, + "learning_rate": 1.588305811401765e-05, + "loss": 0.5903, + "step": 18566 + }, + { + "epoch": 0.32082872546309094, + "grad_norm": 1.1502878222469357, + "learning_rate": 1.5882605550629078e-05, + "loss": 0.4893, + "step": 18567 + }, + { + "epoch": 0.32084600497649984, + "grad_norm": 1.2067776094521563, + "learning_rate": 1.5882152968815733e-05, + "loss": 0.484, + "step": 18568 + }, + { + "epoch": 0.32086328448990875, + "grad_norm": 0.9558717433782604, + "learning_rate": 1.5881700368579042e-05, + "loss": 0.339, + "step": 18569 + }, + { + "epoch": 0.32088056400331766, + "grad_norm": 0.8980075887491237, + "learning_rate": 1.5881247749920418e-05, + "loss": 0.5464, + "step": 18570 + }, + { + "epoch": 0.32089784351672657, + "grad_norm": 1.2216164444535904, + "learning_rate": 1.5880795112841277e-05, + "loss": 0.4432, + "step": 18571 + }, + { + "epoch": 0.3209151230301355, + "grad_norm": 1.1651968945272264, + "learning_rate": 1.5880342457343042e-05, + "loss": 0.5196, + "step": 18572 + }, + { + "epoch": 0.3209324025435444, + "grad_norm": 1.2278108218983665, + "learning_rate": 1.5879889783427126e-05, + "loss": 0.4153, + "step": 18573 + }, + { + "epoch": 0.3209496820569533, + "grad_norm": 0.9489508933587394, + "learning_rate": 1.5879437091094945e-05, + "loss": 0.5029, + "step": 18574 + }, + { + "epoch": 0.3209669615703622, + "grad_norm": 0.8859752091183241, + "learning_rate": 1.5878984380347925e-05, + "loss": 0.4734, + "step": 18575 + }, + { + "epoch": 0.3209842410837711, + "grad_norm": 1.4247619051861022, + "learning_rate": 1.5878531651187477e-05, + "loss": 0.6837, + "step": 18576 + }, + { + "epoch": 0.32100152059717996, + "grad_norm": 1.5718570965945549, + "learning_rate": 1.5878078903615022e-05, + "loss": 0.4382, + "step": 18577 + }, + { + "epoch": 0.32101880011058886, + "grad_norm": 1.177527083133673, + "learning_rate": 1.5877626137631974e-05, + "loss": 0.5969, + "step": 18578 + }, + { + "epoch": 0.32103607962399777, + "grad_norm": 1.1090926947967459, + "learning_rate": 1.587717335323976e-05, + "loss": 0.5957, + "step": 18579 + }, + { + "epoch": 0.3210533591374067, + "grad_norm": 0.45265179233421665, + "learning_rate": 1.5876720550439788e-05, + "loss": 0.5823, + "step": 18580 + }, + { + "epoch": 0.3210706386508156, + "grad_norm": 1.238178991514086, + "learning_rate": 1.5876267729233482e-05, + "loss": 0.4387, + "step": 18581 + }, + { + "epoch": 0.3210879181642245, + "grad_norm": 1.7715104429209372, + "learning_rate": 1.5875814889622255e-05, + "loss": 0.6543, + "step": 18582 + }, + { + "epoch": 0.3211051976776334, + "grad_norm": 0.8892402646690162, + "learning_rate": 1.5875362031607536e-05, + "loss": 0.2928, + "step": 18583 + }, + { + "epoch": 0.3211224771910423, + "grad_norm": 0.47098079775497004, + "learning_rate": 1.5874909155190735e-05, + "loss": 0.7222, + "step": 18584 + }, + { + "epoch": 0.3211397567044512, + "grad_norm": 2.0724033711908527, + "learning_rate": 1.5874456260373267e-05, + "loss": 0.6009, + "step": 18585 + }, + { + "epoch": 0.3211570362178601, + "grad_norm": 0.809501173107802, + "learning_rate": 1.5874003347156557e-05, + "loss": 0.6817, + "step": 18586 + }, + { + "epoch": 0.32117431573126903, + "grad_norm": 0.8367877598030907, + "learning_rate": 1.5873550415542028e-05, + "loss": 0.6482, + "step": 18587 + }, + { + "epoch": 0.3211915952446779, + "grad_norm": 0.8130634881652873, + "learning_rate": 1.5873097465531087e-05, + "loss": 0.5433, + "step": 18588 + }, + { + "epoch": 0.3212088747580868, + "grad_norm": 0.9388429021104902, + "learning_rate": 1.587264449712516e-05, + "loss": 0.6003, + "step": 18589 + }, + { + "epoch": 0.3212261542714957, + "grad_norm": 0.642111131023274, + "learning_rate": 1.5872191510325667e-05, + "loss": 0.3849, + "step": 18590 + }, + { + "epoch": 0.3212434337849046, + "grad_norm": 0.3805370428703603, + "learning_rate": 1.587173850513402e-05, + "loss": 0.5927, + "step": 18591 + }, + { + "epoch": 0.3212607132983135, + "grad_norm": 1.269896401051164, + "learning_rate": 1.5871285481551645e-05, + "loss": 0.4909, + "step": 18592 + }, + { + "epoch": 0.3212779928117224, + "grad_norm": 1.0687696221017267, + "learning_rate": 1.5870832439579953e-05, + "loss": 0.3818, + "step": 18593 + }, + { + "epoch": 0.32129527232513133, + "grad_norm": 0.9825818430853422, + "learning_rate": 1.587037937922037e-05, + "loss": 0.3634, + "step": 18594 + }, + { + "epoch": 0.32131255183854024, + "grad_norm": 0.8519962262886869, + "learning_rate": 1.5869926300474316e-05, + "loss": 0.4903, + "step": 18595 + }, + { + "epoch": 0.32132983135194915, + "grad_norm": 1.1435785727107957, + "learning_rate": 1.5869473203343206e-05, + "loss": 0.6667, + "step": 18596 + }, + { + "epoch": 0.32134711086535805, + "grad_norm": 1.1647639825137874, + "learning_rate": 1.5869020087828455e-05, + "loss": 0.7208, + "step": 18597 + }, + { + "epoch": 0.3213643903787669, + "grad_norm": 1.0773942468561006, + "learning_rate": 1.586856695393149e-05, + "loss": 0.351, + "step": 18598 + }, + { + "epoch": 0.3213816698921758, + "grad_norm": 1.6244484897304814, + "learning_rate": 1.5868113801653727e-05, + "loss": 0.486, + "step": 18599 + }, + { + "epoch": 0.3213989494055847, + "grad_norm": 1.000594760179212, + "learning_rate": 1.5867660630996585e-05, + "loss": 0.4913, + "step": 18600 + }, + { + "epoch": 0.32141622891899363, + "grad_norm": 0.669302536611916, + "learning_rate": 1.5867207441961485e-05, + "loss": 0.4677, + "step": 18601 + }, + { + "epoch": 0.32143350843240254, + "grad_norm": 1.1575250143011, + "learning_rate": 1.5866754234549846e-05, + "loss": 0.5548, + "step": 18602 + }, + { + "epoch": 0.32145078794581144, + "grad_norm": 1.740156056245214, + "learning_rate": 1.5866301008763085e-05, + "loss": 0.6902, + "step": 18603 + }, + { + "epoch": 0.32146806745922035, + "grad_norm": 1.1570688912792069, + "learning_rate": 1.5865847764602623e-05, + "loss": 0.505, + "step": 18604 + }, + { + "epoch": 0.32148534697262926, + "grad_norm": 1.4943068048542794, + "learning_rate": 1.586539450206988e-05, + "loss": 0.3171, + "step": 18605 + }, + { + "epoch": 0.32150262648603817, + "grad_norm": 1.2294978898294588, + "learning_rate": 1.5864941221166283e-05, + "loss": 0.3277, + "step": 18606 + }, + { + "epoch": 0.3215199059994471, + "grad_norm": 1.2530430617714161, + "learning_rate": 1.5864487921893236e-05, + "loss": 0.4198, + "step": 18607 + }, + { + "epoch": 0.321537185512856, + "grad_norm": 1.3722046124610952, + "learning_rate": 1.5864034604252168e-05, + "loss": 0.7505, + "step": 18608 + }, + { + "epoch": 0.32155446502626484, + "grad_norm": 1.023817797480943, + "learning_rate": 1.58635812682445e-05, + "loss": 0.4878, + "step": 18609 + }, + { + "epoch": 0.32157174453967374, + "grad_norm": 1.4446438093929712, + "learning_rate": 1.5863127913871647e-05, + "loss": 0.3843, + "step": 18610 + }, + { + "epoch": 0.32158902405308265, + "grad_norm": 1.4090996189070106, + "learning_rate": 1.5862674541135037e-05, + "loss": 0.5611, + "step": 18611 + }, + { + "epoch": 0.32160630356649156, + "grad_norm": 0.5805222843214787, + "learning_rate": 1.586222115003608e-05, + "loss": 0.3895, + "step": 18612 + }, + { + "epoch": 0.32162358307990047, + "grad_norm": 0.900317469908939, + "learning_rate": 1.5861767740576204e-05, + "loss": 0.4797, + "step": 18613 + }, + { + "epoch": 0.3216408625933094, + "grad_norm": 1.1146992660829984, + "learning_rate": 1.5861314312756824e-05, + "loss": 0.5377, + "step": 18614 + }, + { + "epoch": 0.3216581421067183, + "grad_norm": 1.0743145660778484, + "learning_rate": 1.5860860866579364e-05, + "loss": 0.4374, + "step": 18615 + }, + { + "epoch": 0.3216754216201272, + "grad_norm": 0.9594655283949377, + "learning_rate": 1.5860407402045243e-05, + "loss": 0.5041, + "step": 18616 + }, + { + "epoch": 0.3216927011335361, + "grad_norm": 1.1684154750116738, + "learning_rate": 1.5859953919155877e-05, + "loss": 0.3823, + "step": 18617 + }, + { + "epoch": 0.321709980646945, + "grad_norm": 1.0897881162099257, + "learning_rate": 1.5859500417912693e-05, + "loss": 0.5174, + "step": 18618 + }, + { + "epoch": 0.3217272601603539, + "grad_norm": 0.967461726215648, + "learning_rate": 1.5859046898317107e-05, + "loss": 0.6244, + "step": 18619 + }, + { + "epoch": 0.32174453967376276, + "grad_norm": 1.0701156290522358, + "learning_rate": 1.585859336037054e-05, + "loss": 0.6114, + "step": 18620 + }, + { + "epoch": 0.32176181918717167, + "grad_norm": 0.7375971474438329, + "learning_rate": 1.5858139804074414e-05, + "loss": 0.3666, + "step": 18621 + }, + { + "epoch": 0.3217790987005806, + "grad_norm": 1.3372351231932653, + "learning_rate": 1.585768622943015e-05, + "loss": 0.7055, + "step": 18622 + }, + { + "epoch": 0.3217963782139895, + "grad_norm": 0.4286143491455017, + "learning_rate": 1.585723263643917e-05, + "loss": 0.8058, + "step": 18623 + }, + { + "epoch": 0.3218136577273984, + "grad_norm": 1.106680211886444, + "learning_rate": 1.5856779025102885e-05, + "loss": 0.4378, + "step": 18624 + }, + { + "epoch": 0.3218309372408073, + "grad_norm": 1.614501697373286, + "learning_rate": 1.5856325395422726e-05, + "loss": 0.544, + "step": 18625 + }, + { + "epoch": 0.3218482167542162, + "grad_norm": 1.018766568399605, + "learning_rate": 1.5855871747400113e-05, + "loss": 0.5066, + "step": 18626 + }, + { + "epoch": 0.3218654962676251, + "grad_norm": 1.1900318411711994, + "learning_rate": 1.5855418081036465e-05, + "loss": 0.5869, + "step": 18627 + }, + { + "epoch": 0.321882775781034, + "grad_norm": 0.8658810491324289, + "learning_rate": 1.5854964396333202e-05, + "loss": 0.4028, + "step": 18628 + }, + { + "epoch": 0.32190005529444293, + "grad_norm": 0.8969407594522133, + "learning_rate": 1.5854510693291744e-05, + "loss": 0.5526, + "step": 18629 + }, + { + "epoch": 0.3219173348078518, + "grad_norm": 1.288015319669288, + "learning_rate": 1.5854056971913513e-05, + "loss": 0.4302, + "step": 18630 + }, + { + "epoch": 0.3219346143212607, + "grad_norm": 1.5423868425745249, + "learning_rate": 1.5853603232199932e-05, + "loss": 0.4357, + "step": 18631 + }, + { + "epoch": 0.3219518938346696, + "grad_norm": 1.0745584009581992, + "learning_rate": 1.585314947415242e-05, + "loss": 0.3849, + "step": 18632 + }, + { + "epoch": 0.3219691733480785, + "grad_norm": 0.42213550781491554, + "learning_rate": 1.58526956977724e-05, + "loss": 0.519, + "step": 18633 + }, + { + "epoch": 0.3219864528614874, + "grad_norm": 1.0865781021253473, + "learning_rate": 1.5852241903061293e-05, + "loss": 0.5774, + "step": 18634 + }, + { + "epoch": 0.3220037323748963, + "grad_norm": 0.8999252563747062, + "learning_rate": 1.585178809002052e-05, + "loss": 0.4133, + "step": 18635 + }, + { + "epoch": 0.32202101188830523, + "grad_norm": 1.4444684421007306, + "learning_rate": 1.58513342586515e-05, + "loss": 0.6059, + "step": 18636 + }, + { + "epoch": 0.32203829140171414, + "grad_norm": 0.7273773610946452, + "learning_rate": 1.585088040895566e-05, + "loss": 0.5247, + "step": 18637 + }, + { + "epoch": 0.32205557091512305, + "grad_norm": 0.4209144965571641, + "learning_rate": 1.5850426540934415e-05, + "loss": 0.6087, + "step": 18638 + }, + { + "epoch": 0.32207285042853195, + "grad_norm": 0.7005348217141685, + "learning_rate": 1.584997265458919e-05, + "loss": 0.4721, + "step": 18639 + }, + { + "epoch": 0.32209012994194086, + "grad_norm": 0.6435283457772512, + "learning_rate": 1.5849518749921403e-05, + "loss": 0.2971, + "step": 18640 + }, + { + "epoch": 0.3221074094553497, + "grad_norm": 1.0676042061818696, + "learning_rate": 1.5849064826932478e-05, + "loss": 0.486, + "step": 18641 + }, + { + "epoch": 0.3221246889687586, + "grad_norm": 0.7680216016900895, + "learning_rate": 1.5848610885623843e-05, + "loss": 0.5448, + "step": 18642 + }, + { + "epoch": 0.32214196848216753, + "grad_norm": 0.7883830762010425, + "learning_rate": 1.5848156925996914e-05, + "loss": 0.5182, + "step": 18643 + }, + { + "epoch": 0.32215924799557644, + "grad_norm": 0.63310271604046, + "learning_rate": 1.5847702948053108e-05, + "loss": 0.3608, + "step": 18644 + }, + { + "epoch": 0.32217652750898534, + "grad_norm": 0.9933841472707952, + "learning_rate": 1.5847248951793858e-05, + "loss": 0.453, + "step": 18645 + }, + { + "epoch": 0.32219380702239425, + "grad_norm": 1.5511214646962685, + "learning_rate": 1.5846794937220576e-05, + "loss": 0.732, + "step": 18646 + }, + { + "epoch": 0.32221108653580316, + "grad_norm": 1.4003304024656829, + "learning_rate": 1.5846340904334686e-05, + "loss": 0.3766, + "step": 18647 + }, + { + "epoch": 0.32222836604921207, + "grad_norm": 1.611730606930057, + "learning_rate": 1.5845886853137614e-05, + "loss": 0.423, + "step": 18648 + }, + { + "epoch": 0.322245645562621, + "grad_norm": 1.231646099931083, + "learning_rate": 1.5845432783630786e-05, + "loss": 0.3759, + "step": 18649 + }, + { + "epoch": 0.3222629250760299, + "grad_norm": 0.9269876315424006, + "learning_rate": 1.5844978695815612e-05, + "loss": 0.3886, + "step": 18650 + }, + { + "epoch": 0.32228020458943873, + "grad_norm": 1.2485384217140392, + "learning_rate": 1.5844524589693522e-05, + "loss": 0.4031, + "step": 18651 + }, + { + "epoch": 0.32229748410284764, + "grad_norm": 1.657662424176107, + "learning_rate": 1.5844070465265936e-05, + "loss": 0.5467, + "step": 18652 + }, + { + "epoch": 0.32231476361625655, + "grad_norm": 1.5271332620649434, + "learning_rate": 1.584361632253428e-05, + "loss": 0.665, + "step": 18653 + }, + { + "epoch": 0.32233204312966546, + "grad_norm": 0.930120524437654, + "learning_rate": 1.584316216149997e-05, + "loss": 0.4739, + "step": 18654 + }, + { + "epoch": 0.32234932264307437, + "grad_norm": 0.9166252940376787, + "learning_rate": 1.5842707982164432e-05, + "loss": 0.3727, + "step": 18655 + }, + { + "epoch": 0.3223666021564833, + "grad_norm": 0.8090348224543046, + "learning_rate": 1.5842253784529092e-05, + "loss": 0.3366, + "step": 18656 + }, + { + "epoch": 0.3223838816698922, + "grad_norm": 1.0175511390066136, + "learning_rate": 1.5841799568595364e-05, + "loss": 0.2791, + "step": 18657 + }, + { + "epoch": 0.3224011611833011, + "grad_norm": 0.9463286320659325, + "learning_rate": 1.584134533436468e-05, + "loss": 0.3602, + "step": 18658 + }, + { + "epoch": 0.32241844069671, + "grad_norm": 1.2253387693510474, + "learning_rate": 1.5840891081838458e-05, + "loss": 0.6585, + "step": 18659 + }, + { + "epoch": 0.3224357202101189, + "grad_norm": 1.4165004740071516, + "learning_rate": 1.584043681101812e-05, + "loss": 0.4594, + "step": 18660 + }, + { + "epoch": 0.3224529997235278, + "grad_norm": 0.8418717115071606, + "learning_rate": 1.5839982521905088e-05, + "loss": 0.6646, + "step": 18661 + }, + { + "epoch": 0.32247027923693666, + "grad_norm": 0.9270395329871477, + "learning_rate": 1.5839528214500794e-05, + "loss": 0.5932, + "step": 18662 + }, + { + "epoch": 0.32248755875034557, + "grad_norm": 1.0527054066795196, + "learning_rate": 1.583907388880665e-05, + "loss": 0.6861, + "step": 18663 + }, + { + "epoch": 0.3225048382637545, + "grad_norm": 0.8156079047404751, + "learning_rate": 1.583861954482408e-05, + "loss": 0.5044, + "step": 18664 + }, + { + "epoch": 0.3225221177771634, + "grad_norm": 0.4500232286565845, + "learning_rate": 1.5838165182554514e-05, + "loss": 0.6631, + "step": 18665 + }, + { + "epoch": 0.3225393972905723, + "grad_norm": 1.5506761842185208, + "learning_rate": 1.5837710801999366e-05, + "loss": 0.5371, + "step": 18666 + }, + { + "epoch": 0.3225566768039812, + "grad_norm": 1.2532244848033744, + "learning_rate": 1.5837256403160068e-05, + "loss": 0.7535, + "step": 18667 + }, + { + "epoch": 0.3225739563173901, + "grad_norm": 0.48109084443839983, + "learning_rate": 1.583680198603804e-05, + "loss": 0.8874, + "step": 18668 + }, + { + "epoch": 0.322591235830799, + "grad_norm": 1.1128119257673652, + "learning_rate": 1.58363475506347e-05, + "loss": 0.4879, + "step": 18669 + }, + { + "epoch": 0.3226085153442079, + "grad_norm": 0.6668623581959302, + "learning_rate": 1.583589309695148e-05, + "loss": 0.2329, + "step": 18670 + }, + { + "epoch": 0.32262579485761683, + "grad_norm": 0.931616912597217, + "learning_rate": 1.5835438624989797e-05, + "loss": 0.347, + "step": 18671 + }, + { + "epoch": 0.3226430743710257, + "grad_norm": 1.8355496933124529, + "learning_rate": 1.583498413475108e-05, + "loss": 0.5646, + "step": 18672 + }, + { + "epoch": 0.3226603538844346, + "grad_norm": 1.3324344539227457, + "learning_rate": 1.583452962623675e-05, + "loss": 0.4859, + "step": 18673 + }, + { + "epoch": 0.3226776333978435, + "grad_norm": 1.0499772320066754, + "learning_rate": 1.5834075099448224e-05, + "loss": 0.4712, + "step": 18674 + }, + { + "epoch": 0.3226949129112524, + "grad_norm": 1.1761913502758583, + "learning_rate": 1.5833620554386935e-05, + "loss": 0.503, + "step": 18675 + }, + { + "epoch": 0.3227121924246613, + "grad_norm": 0.4133048958366817, + "learning_rate": 1.5833165991054302e-05, + "loss": 0.5777, + "step": 18676 + }, + { + "epoch": 0.3227294719380702, + "grad_norm": 0.6507263372046656, + "learning_rate": 1.583271140945175e-05, + "loss": 0.686, + "step": 18677 + }, + { + "epoch": 0.32274675145147913, + "grad_norm": 1.5022090192117314, + "learning_rate": 1.5832256809580705e-05, + "loss": 0.4882, + "step": 18678 + }, + { + "epoch": 0.32276403096488804, + "grad_norm": 1.5817923845792885, + "learning_rate": 1.5831802191442584e-05, + "loss": 0.5174, + "step": 18679 + }, + { + "epoch": 0.32278131047829695, + "grad_norm": 0.9360201143470488, + "learning_rate": 1.5831347555038817e-05, + "loss": 0.3507, + "step": 18680 + }, + { + "epoch": 0.32279858999170585, + "grad_norm": 0.7704934462733397, + "learning_rate": 1.5830892900370828e-05, + "loss": 0.4459, + "step": 18681 + }, + { + "epoch": 0.32281586950511476, + "grad_norm": 0.7503324546371894, + "learning_rate": 1.583043822744004e-05, + "loss": 0.5842, + "step": 18682 + }, + { + "epoch": 0.3228331490185236, + "grad_norm": 1.1335663214516054, + "learning_rate": 1.5829983536247875e-05, + "loss": 0.4143, + "step": 18683 + }, + { + "epoch": 0.3228504285319325, + "grad_norm": 0.8560567597508215, + "learning_rate": 1.582952882679576e-05, + "loss": 0.4283, + "step": 18684 + }, + { + "epoch": 0.32286770804534143, + "grad_norm": 1.0759365456970589, + "learning_rate": 1.5829074099085114e-05, + "loss": 0.6653, + "step": 18685 + }, + { + "epoch": 0.32288498755875034, + "grad_norm": 0.5934564204762146, + "learning_rate": 1.5828619353117368e-05, + "loss": 0.3639, + "step": 18686 + }, + { + "epoch": 0.32290226707215924, + "grad_norm": 1.1615177598580984, + "learning_rate": 1.582816458889394e-05, + "loss": 0.4007, + "step": 18687 + }, + { + "epoch": 0.32291954658556815, + "grad_norm": 1.4133647929785957, + "learning_rate": 1.582770980641626e-05, + "loss": 0.5007, + "step": 18688 + }, + { + "epoch": 0.32293682609897706, + "grad_norm": 0.891194493593443, + "learning_rate": 1.582725500568575e-05, + "loss": 0.5744, + "step": 18689 + }, + { + "epoch": 0.32295410561238597, + "grad_norm": 0.7696570057808028, + "learning_rate": 1.5826800186703835e-05, + "loss": 0.5454, + "step": 18690 + }, + { + "epoch": 0.3229713851257949, + "grad_norm": 1.1307363870383595, + "learning_rate": 1.5826345349471938e-05, + "loss": 0.5657, + "step": 18691 + }, + { + "epoch": 0.3229886646392038, + "grad_norm": 0.9218635596244238, + "learning_rate": 1.5825890493991485e-05, + "loss": 0.5308, + "step": 18692 + }, + { + "epoch": 0.3230059441526127, + "grad_norm": 0.8743963919953106, + "learning_rate": 1.58254356202639e-05, + "loss": 0.6328, + "step": 18693 + }, + { + "epoch": 0.32302322366602154, + "grad_norm": 0.7480719327195909, + "learning_rate": 1.5824980728290607e-05, + "loss": 0.5249, + "step": 18694 + }, + { + "epoch": 0.32304050317943045, + "grad_norm": 1.059318834506743, + "learning_rate": 1.5824525818073032e-05, + "loss": 0.774, + "step": 18695 + }, + { + "epoch": 0.32305778269283936, + "grad_norm": 0.8802286602194637, + "learning_rate": 1.58240708896126e-05, + "loss": 0.3285, + "step": 18696 + }, + { + "epoch": 0.32307506220624826, + "grad_norm": 0.8293568886895382, + "learning_rate": 1.5823615942910734e-05, + "loss": 0.2953, + "step": 18697 + }, + { + "epoch": 0.3230923417196572, + "grad_norm": 0.5886761039048911, + "learning_rate": 1.5823160977968864e-05, + "loss": 0.4926, + "step": 18698 + }, + { + "epoch": 0.3231096212330661, + "grad_norm": 1.2458104944078836, + "learning_rate": 1.5822705994788406e-05, + "loss": 0.5576, + "step": 18699 + }, + { + "epoch": 0.323126900746475, + "grad_norm": 0.8481548834471814, + "learning_rate": 1.5822250993370797e-05, + "loss": 0.5015, + "step": 18700 + }, + { + "epoch": 0.3231441802598839, + "grad_norm": 1.057484658140707, + "learning_rate": 1.5821795973717453e-05, + "loss": 0.4523, + "step": 18701 + }, + { + "epoch": 0.3231614597732928, + "grad_norm": 1.315405296549893, + "learning_rate": 1.5821340935829796e-05, + "loss": 0.4002, + "step": 18702 + }, + { + "epoch": 0.3231787392867017, + "grad_norm": 1.1510782571206502, + "learning_rate": 1.582088587970926e-05, + "loss": 0.5903, + "step": 18703 + }, + { + "epoch": 0.32319601880011056, + "grad_norm": 2.006620466666476, + "learning_rate": 1.5820430805357273e-05, + "loss": 0.4081, + "step": 18704 + }, + { + "epoch": 0.32321329831351947, + "grad_norm": 0.9601418252992359, + "learning_rate": 1.581997571277525e-05, + "loss": 0.4385, + "step": 18705 + }, + { + "epoch": 0.3232305778269284, + "grad_norm": 1.1066526579224074, + "learning_rate": 1.581952060196462e-05, + "loss": 0.4777, + "step": 18706 + }, + { + "epoch": 0.3232478573403373, + "grad_norm": 0.7974784726527111, + "learning_rate": 1.5819065472926806e-05, + "loss": 0.3877, + "step": 18707 + }, + { + "epoch": 0.3232651368537462, + "grad_norm": 1.2002305329260274, + "learning_rate": 1.581861032566324e-05, + "loss": 0.4793, + "step": 18708 + }, + { + "epoch": 0.3232824163671551, + "grad_norm": 1.439064088231366, + "learning_rate": 1.5818155160175347e-05, + "loss": 0.6165, + "step": 18709 + }, + { + "epoch": 0.323299695880564, + "grad_norm": 1.3972312021986173, + "learning_rate": 1.5817699976464545e-05, + "loss": 0.6331, + "step": 18710 + }, + { + "epoch": 0.3233169753939729, + "grad_norm": 1.0110679183677256, + "learning_rate": 1.581724477453227e-05, + "loss": 0.4427, + "step": 18711 + }, + { + "epoch": 0.3233342549073818, + "grad_norm": 0.9289833687536286, + "learning_rate": 1.5816789554379937e-05, + "loss": 0.6242, + "step": 18712 + }, + { + "epoch": 0.32335153442079073, + "grad_norm": 1.1367722812800438, + "learning_rate": 1.5816334316008978e-05, + "loss": 0.4233, + "step": 18713 + }, + { + "epoch": 0.32336881393419964, + "grad_norm": 1.1701508977710278, + "learning_rate": 1.5815879059420823e-05, + "loss": 0.5857, + "step": 18714 + }, + { + "epoch": 0.3233860934476085, + "grad_norm": 1.4081340573571401, + "learning_rate": 1.5815423784616886e-05, + "loss": 0.6941, + "step": 18715 + }, + { + "epoch": 0.3234033729610174, + "grad_norm": 0.7576913013614207, + "learning_rate": 1.5814968491598603e-05, + "loss": 0.4213, + "step": 18716 + }, + { + "epoch": 0.3234206524744263, + "grad_norm": 1.1500536115317965, + "learning_rate": 1.5814513180367397e-05, + "loss": 0.3597, + "step": 18717 + }, + { + "epoch": 0.3234379319878352, + "grad_norm": 1.4016365722307693, + "learning_rate": 1.581405785092469e-05, + "loss": 0.7893, + "step": 18718 + }, + { + "epoch": 0.3234552115012441, + "grad_norm": 1.0588561636481706, + "learning_rate": 1.5813602503271916e-05, + "loss": 0.2661, + "step": 18719 + }, + { + "epoch": 0.32347249101465303, + "grad_norm": 0.816902506715783, + "learning_rate": 1.5813147137410497e-05, + "loss": 0.5284, + "step": 18720 + }, + { + "epoch": 0.32348977052806194, + "grad_norm": 1.4504723129556911, + "learning_rate": 1.5812691753341858e-05, + "loss": 0.5192, + "step": 18721 + }, + { + "epoch": 0.32350705004147084, + "grad_norm": 1.2627519664858515, + "learning_rate": 1.581223635106743e-05, + "loss": 0.4847, + "step": 18722 + }, + { + "epoch": 0.32352432955487975, + "grad_norm": 0.7451462271996586, + "learning_rate": 1.5811780930588633e-05, + "loss": 0.4037, + "step": 18723 + }, + { + "epoch": 0.32354160906828866, + "grad_norm": 1.0546196433178687, + "learning_rate": 1.5811325491906897e-05, + "loss": 0.4607, + "step": 18724 + }, + { + "epoch": 0.3235588885816975, + "grad_norm": 1.2280233874548208, + "learning_rate": 1.5810870035023646e-05, + "loss": 0.4066, + "step": 18725 + }, + { + "epoch": 0.3235761680951064, + "grad_norm": 0.9767287872208719, + "learning_rate": 1.581041455994031e-05, + "loss": 0.5762, + "step": 18726 + }, + { + "epoch": 0.3235934476085153, + "grad_norm": 0.8465707636589821, + "learning_rate": 1.5809959066658315e-05, + "loss": 0.3754, + "step": 18727 + }, + { + "epoch": 0.32361072712192424, + "grad_norm": 2.164081163627006, + "learning_rate": 1.5809503555179085e-05, + "loss": 0.6865, + "step": 18728 + }, + { + "epoch": 0.32362800663533314, + "grad_norm": 1.069126238291855, + "learning_rate": 1.580904802550405e-05, + "loss": 0.5068, + "step": 18729 + }, + { + "epoch": 0.32364528614874205, + "grad_norm": 1.171221862747813, + "learning_rate": 1.5808592477634633e-05, + "loss": 0.5519, + "step": 18730 + }, + { + "epoch": 0.32366256566215096, + "grad_norm": 1.2006809126662323, + "learning_rate": 1.5808136911572264e-05, + "loss": 0.7253, + "step": 18731 + }, + { + "epoch": 0.32367984517555987, + "grad_norm": 0.921228321512227, + "learning_rate": 1.5807681327318372e-05, + "loss": 0.5364, + "step": 18732 + }, + { + "epoch": 0.3236971246889688, + "grad_norm": 0.604092372192916, + "learning_rate": 1.580722572487438e-05, + "loss": 0.4683, + "step": 18733 + }, + { + "epoch": 0.3237144042023777, + "grad_norm": 0.9827492058421167, + "learning_rate": 1.580677010424171e-05, + "loss": 0.5743, + "step": 18734 + }, + { + "epoch": 0.3237316837157866, + "grad_norm": 0.4231865047281401, + "learning_rate": 1.58063144654218e-05, + "loss": 0.6742, + "step": 18735 + }, + { + "epoch": 0.32374896322919544, + "grad_norm": 0.9805356204556334, + "learning_rate": 1.580585880841607e-05, + "loss": 0.5209, + "step": 18736 + }, + { + "epoch": 0.32376624274260435, + "grad_norm": 0.6747831633692578, + "learning_rate": 1.5805403133225947e-05, + "loss": 0.4178, + "step": 18737 + }, + { + "epoch": 0.32378352225601326, + "grad_norm": 0.7202715038976273, + "learning_rate": 1.5804947439852863e-05, + "loss": 0.4817, + "step": 18738 + }, + { + "epoch": 0.32380080176942216, + "grad_norm": 1.0353158068802246, + "learning_rate": 1.580449172829824e-05, + "loss": 0.4422, + "step": 18739 + }, + { + "epoch": 0.32381808128283107, + "grad_norm": 1.0987556751064138, + "learning_rate": 1.5804035998563508e-05, + "loss": 0.5523, + "step": 18740 + }, + { + "epoch": 0.32383536079624, + "grad_norm": 0.8175538620420894, + "learning_rate": 1.5803580250650098e-05, + "loss": 0.5118, + "step": 18741 + }, + { + "epoch": 0.3238526403096489, + "grad_norm": 0.9493694886978468, + "learning_rate": 1.580312448455943e-05, + "loss": 0.5027, + "step": 18742 + }, + { + "epoch": 0.3238699198230578, + "grad_norm": 0.7491124605753414, + "learning_rate": 1.580266870029294e-05, + "loss": 0.5161, + "step": 18743 + }, + { + "epoch": 0.3238871993364667, + "grad_norm": 0.8370913783942359, + "learning_rate": 1.5802212897852043e-05, + "loss": 0.4713, + "step": 18744 + }, + { + "epoch": 0.3239044788498756, + "grad_norm": 0.7686667026323826, + "learning_rate": 1.580175707723818e-05, + "loss": 0.4922, + "step": 18745 + }, + { + "epoch": 0.3239217583632845, + "grad_norm": 1.091154096298181, + "learning_rate": 1.580130123845277e-05, + "loss": 0.533, + "step": 18746 + }, + { + "epoch": 0.32393903787669337, + "grad_norm": 1.2724459238319843, + "learning_rate": 1.580084538149725e-05, + "loss": 0.4407, + "step": 18747 + }, + { + "epoch": 0.3239563173901023, + "grad_norm": 1.002803184522077, + "learning_rate": 1.5800389506373032e-05, + "loss": 0.5306, + "step": 18748 + }, + { + "epoch": 0.3239735969035112, + "grad_norm": 0.7279699949825554, + "learning_rate": 1.579993361308156e-05, + "loss": 0.3943, + "step": 18749 + }, + { + "epoch": 0.3239908764169201, + "grad_norm": 0.9229609117206881, + "learning_rate": 1.5799477701624252e-05, + "loss": 0.3044, + "step": 18750 + }, + { + "epoch": 0.324008155930329, + "grad_norm": 0.9844633449114992, + "learning_rate": 1.5799021772002538e-05, + "loss": 0.596, + "step": 18751 + }, + { + "epoch": 0.3240254354437379, + "grad_norm": 1.68818570265512, + "learning_rate": 1.579856582421785e-05, + "loss": 0.5539, + "step": 18752 + }, + { + "epoch": 0.3240427149571468, + "grad_norm": 1.2198136723919821, + "learning_rate": 1.579810985827161e-05, + "loss": 0.3886, + "step": 18753 + }, + { + "epoch": 0.3240599944705557, + "grad_norm": 0.9415526555468289, + "learning_rate": 1.579765387416525e-05, + "loss": 0.5299, + "step": 18754 + }, + { + "epoch": 0.32407727398396463, + "grad_norm": 0.6146164168570221, + "learning_rate": 1.5797197871900202e-05, + "loss": 0.3913, + "step": 18755 + }, + { + "epoch": 0.32409455349737354, + "grad_norm": 0.9379588794475597, + "learning_rate": 1.5796741851477884e-05, + "loss": 0.3675, + "step": 18756 + }, + { + "epoch": 0.3241118330107824, + "grad_norm": 0.8082556213518161, + "learning_rate": 1.579628581289973e-05, + "loss": 0.4801, + "step": 18757 + }, + { + "epoch": 0.3241291125241913, + "grad_norm": 1.0546455231933538, + "learning_rate": 1.5795829756167174e-05, + "loss": 0.4603, + "step": 18758 + }, + { + "epoch": 0.3241463920376002, + "grad_norm": 0.9186692312061809, + "learning_rate": 1.5795373681281638e-05, + "loss": 0.6806, + "step": 18759 + }, + { + "epoch": 0.3241636715510091, + "grad_norm": 0.9311411314050567, + "learning_rate": 1.5794917588244544e-05, + "loss": 0.4867, + "step": 18760 + }, + { + "epoch": 0.324180951064418, + "grad_norm": 1.692264410436793, + "learning_rate": 1.5794461477057334e-05, + "loss": 0.4291, + "step": 18761 + }, + { + "epoch": 0.32419823057782693, + "grad_norm": 0.7288193795549217, + "learning_rate": 1.579400534772143e-05, + "loss": 0.4868, + "step": 18762 + }, + { + "epoch": 0.32421551009123584, + "grad_norm": 1.1695071447821792, + "learning_rate": 1.5793549200238258e-05, + "loss": 0.5847, + "step": 18763 + }, + { + "epoch": 0.32423278960464474, + "grad_norm": 1.114215521341549, + "learning_rate": 1.5793093034609252e-05, + "loss": 0.4076, + "step": 18764 + }, + { + "epoch": 0.32425006911805365, + "grad_norm": 0.5964264250085672, + "learning_rate": 1.5792636850835838e-05, + "loss": 0.3762, + "step": 18765 + }, + { + "epoch": 0.32426734863146256, + "grad_norm": 1.2355067450272272, + "learning_rate": 1.5792180648919445e-05, + "loss": 0.5096, + "step": 18766 + }, + { + "epoch": 0.32428462814487147, + "grad_norm": 0.49748354805332073, + "learning_rate": 1.57917244288615e-05, + "loss": 0.8395, + "step": 18767 + }, + { + "epoch": 0.3243019076582803, + "grad_norm": 0.8337536877233989, + "learning_rate": 1.5791268190663435e-05, + "loss": 0.6523, + "step": 18768 + }, + { + "epoch": 0.3243191871716892, + "grad_norm": 1.0968022192506042, + "learning_rate": 1.579081193432668e-05, + "loss": 0.6107, + "step": 18769 + }, + { + "epoch": 0.32433646668509813, + "grad_norm": 1.0465512389536773, + "learning_rate": 1.579035565985266e-05, + "loss": 0.4099, + "step": 18770 + }, + { + "epoch": 0.32435374619850704, + "grad_norm": 1.1252072377342046, + "learning_rate": 1.5789899367242806e-05, + "loss": 0.534, + "step": 18771 + }, + { + "epoch": 0.32437102571191595, + "grad_norm": 0.7542451299209063, + "learning_rate": 1.5789443056498547e-05, + "loss": 0.4745, + "step": 18772 + }, + { + "epoch": 0.32438830522532486, + "grad_norm": 0.7097747410423968, + "learning_rate": 1.5788986727621315e-05, + "loss": 0.3294, + "step": 18773 + }, + { + "epoch": 0.32440558473873377, + "grad_norm": 0.6704428341554726, + "learning_rate": 1.5788530380612535e-05, + "loss": 0.4196, + "step": 18774 + }, + { + "epoch": 0.3244228642521427, + "grad_norm": 0.6248304599532651, + "learning_rate": 1.5788074015473636e-05, + "loss": 0.3084, + "step": 18775 + }, + { + "epoch": 0.3244401437655516, + "grad_norm": 1.0450864709915049, + "learning_rate": 1.578761763220605e-05, + "loss": 0.5902, + "step": 18776 + }, + { + "epoch": 0.3244574232789605, + "grad_norm": 1.0070096093858931, + "learning_rate": 1.578716123081121e-05, + "loss": 0.5108, + "step": 18777 + }, + { + "epoch": 0.32447470279236934, + "grad_norm": 1.4719052242614545, + "learning_rate": 1.5786704811290535e-05, + "loss": 0.5166, + "step": 18778 + }, + { + "epoch": 0.32449198230577825, + "grad_norm": 1.0451349218361232, + "learning_rate": 1.5786248373645467e-05, + "loss": 0.2409, + "step": 18779 + }, + { + "epoch": 0.32450926181918716, + "grad_norm": 1.2278278935061302, + "learning_rate": 1.5785791917877424e-05, + "loss": 0.4675, + "step": 18780 + }, + { + "epoch": 0.32452654133259606, + "grad_norm": 1.0123683346990293, + "learning_rate": 1.5785335443987842e-05, + "loss": 0.4414, + "step": 18781 + }, + { + "epoch": 0.32454382084600497, + "grad_norm": 0.8987466308648446, + "learning_rate": 1.578487895197815e-05, + "loss": 0.6761, + "step": 18782 + }, + { + "epoch": 0.3245611003594139, + "grad_norm": 1.3020473413091829, + "learning_rate": 1.578442244184978e-05, + "loss": 0.5026, + "step": 18783 + }, + { + "epoch": 0.3245783798728228, + "grad_norm": 1.3064908406152989, + "learning_rate": 1.5783965913604154e-05, + "loss": 0.3313, + "step": 18784 + }, + { + "epoch": 0.3245956593862317, + "grad_norm": 1.3923013016735821, + "learning_rate": 1.5783509367242712e-05, + "loss": 0.5941, + "step": 18785 + }, + { + "epoch": 0.3246129388996406, + "grad_norm": 0.7062939381313427, + "learning_rate": 1.578305280276688e-05, + "loss": 0.5799, + "step": 18786 + }, + { + "epoch": 0.3246302184130495, + "grad_norm": 0.943077692436282, + "learning_rate": 1.5782596220178086e-05, + "loss": 0.38, + "step": 18787 + }, + { + "epoch": 0.3246474979264584, + "grad_norm": 0.5418352781878559, + "learning_rate": 1.578213961947776e-05, + "loss": 0.6734, + "step": 18788 + }, + { + "epoch": 0.32466477743986727, + "grad_norm": 0.988841297962206, + "learning_rate": 1.5781683000667332e-05, + "loss": 0.468, + "step": 18789 + }, + { + "epoch": 0.3246820569532762, + "grad_norm": 0.9642024478840763, + "learning_rate": 1.5781226363748235e-05, + "loss": 0.4444, + "step": 18790 + }, + { + "epoch": 0.3246993364666851, + "grad_norm": 0.5131859862928658, + "learning_rate": 1.5780769708721898e-05, + "loss": 0.7875, + "step": 18791 + }, + { + "epoch": 0.324716615980094, + "grad_norm": 1.1079648687218204, + "learning_rate": 1.578031303558975e-05, + "loss": 0.4968, + "step": 18792 + }, + { + "epoch": 0.3247338954935029, + "grad_norm": 1.2038503913458012, + "learning_rate": 1.5779856344353222e-05, + "loss": 0.4356, + "step": 18793 + }, + { + "epoch": 0.3247511750069118, + "grad_norm": 0.7794481217520343, + "learning_rate": 1.5779399635013747e-05, + "loss": 0.4002, + "step": 18794 + }, + { + "epoch": 0.3247684545203207, + "grad_norm": 1.0514895813045884, + "learning_rate": 1.577894290757275e-05, + "loss": 0.6498, + "step": 18795 + }, + { + "epoch": 0.3247857340337296, + "grad_norm": 1.4433178061831373, + "learning_rate": 1.5778486162031667e-05, + "loss": 0.5314, + "step": 18796 + }, + { + "epoch": 0.32480301354713853, + "grad_norm": 1.551256219201194, + "learning_rate": 1.5778029398391924e-05, + "loss": 0.4782, + "step": 18797 + }, + { + "epoch": 0.32482029306054744, + "grad_norm": 1.2673526890495936, + "learning_rate": 1.5777572616654957e-05, + "loss": 0.5746, + "step": 18798 + }, + { + "epoch": 0.3248375725739563, + "grad_norm": 0.9925340632853906, + "learning_rate": 1.5777115816822192e-05, + "loss": 0.6301, + "step": 18799 + }, + { + "epoch": 0.3248548520873652, + "grad_norm": 0.8780794057831601, + "learning_rate": 1.577665899889506e-05, + "loss": 0.4574, + "step": 18800 + }, + { + "epoch": 0.3248721316007741, + "grad_norm": 0.6202104530003717, + "learning_rate": 1.577620216287499e-05, + "loss": 0.3996, + "step": 18801 + }, + { + "epoch": 0.324889411114183, + "grad_norm": 1.465315558125806, + "learning_rate": 1.5775745308763423e-05, + "loss": 0.6603, + "step": 18802 + }, + { + "epoch": 0.3249066906275919, + "grad_norm": 0.7064031209813335, + "learning_rate": 1.577528843656178e-05, + "loss": 0.3389, + "step": 18803 + }, + { + "epoch": 0.32492397014100083, + "grad_norm": 0.8923553900932744, + "learning_rate": 1.5774831546271495e-05, + "loss": 0.4521, + "step": 18804 + }, + { + "epoch": 0.32494124965440974, + "grad_norm": 0.8743959917523206, + "learning_rate": 1.5774374637893995e-05, + "loss": 0.565, + "step": 18805 + }, + { + "epoch": 0.32495852916781864, + "grad_norm": 0.9446897361471089, + "learning_rate": 1.577391771143072e-05, + "loss": 0.3598, + "step": 18806 + }, + { + "epoch": 0.32497580868122755, + "grad_norm": 1.1087927618593758, + "learning_rate": 1.5773460766883096e-05, + "loss": 0.6687, + "step": 18807 + }, + { + "epoch": 0.32499308819463646, + "grad_norm": 1.3822318588308087, + "learning_rate": 1.577300380425255e-05, + "loss": 0.5901, + "step": 18808 + }, + { + "epoch": 0.32501036770804537, + "grad_norm": 0.7422586271733781, + "learning_rate": 1.577254682354052e-05, + "loss": 0.581, + "step": 18809 + }, + { + "epoch": 0.3250276472214542, + "grad_norm": 0.89178463516734, + "learning_rate": 1.577208982474843e-05, + "loss": 0.4333, + "step": 18810 + }, + { + "epoch": 0.3250449267348631, + "grad_norm": 1.261460239427618, + "learning_rate": 1.5771632807877722e-05, + "loss": 0.4103, + "step": 18811 + }, + { + "epoch": 0.32506220624827203, + "grad_norm": 1.2572783659258393, + "learning_rate": 1.577117577292982e-05, + "loss": 0.49, + "step": 18812 + }, + { + "epoch": 0.32507948576168094, + "grad_norm": 0.9985460903844732, + "learning_rate": 1.5770718719906154e-05, + "loss": 0.4187, + "step": 18813 + }, + { + "epoch": 0.32509676527508985, + "grad_norm": 1.3633382624961177, + "learning_rate": 1.577026164880816e-05, + "loss": 0.421, + "step": 18814 + }, + { + "epoch": 0.32511404478849876, + "grad_norm": 0.9705508406429028, + "learning_rate": 1.576980455963727e-05, + "loss": 0.5959, + "step": 18815 + }, + { + "epoch": 0.32513132430190766, + "grad_norm": 0.5015762707768986, + "learning_rate": 1.5769347452394914e-05, + "loss": 0.8533, + "step": 18816 + }, + { + "epoch": 0.3251486038153166, + "grad_norm": 1.8442410628725066, + "learning_rate": 1.576889032708252e-05, + "loss": 0.5853, + "step": 18817 + }, + { + "epoch": 0.3251658833287255, + "grad_norm": 1.146390937513221, + "learning_rate": 1.5768433183701524e-05, + "loss": 0.3987, + "step": 18818 + }, + { + "epoch": 0.3251831628421344, + "grad_norm": 1.0986208744873387, + "learning_rate": 1.576797602225336e-05, + "loss": 0.3961, + "step": 18819 + }, + { + "epoch": 0.3252004423555433, + "grad_norm": 1.4503054356054128, + "learning_rate": 1.576751884273945e-05, + "loss": 0.6389, + "step": 18820 + }, + { + "epoch": 0.32521772186895215, + "grad_norm": 1.510937429021272, + "learning_rate": 1.5767061645161235e-05, + "loss": 0.3623, + "step": 18821 + }, + { + "epoch": 0.32523500138236106, + "grad_norm": 1.2501913340363056, + "learning_rate": 1.5766604429520152e-05, + "loss": 0.4876, + "step": 18822 + }, + { + "epoch": 0.32525228089576996, + "grad_norm": 1.146265383324323, + "learning_rate": 1.576614719581762e-05, + "loss": 0.5085, + "step": 18823 + }, + { + "epoch": 0.32526956040917887, + "grad_norm": 1.385223078899145, + "learning_rate": 1.5765689944055073e-05, + "loss": 0.6531, + "step": 18824 + }, + { + "epoch": 0.3252868399225878, + "grad_norm": 1.342005837137266, + "learning_rate": 1.5765232674233954e-05, + "loss": 0.5063, + "step": 18825 + }, + { + "epoch": 0.3253041194359967, + "grad_norm": 0.8997937214002295, + "learning_rate": 1.5764775386355685e-05, + "loss": 0.4365, + "step": 18826 + }, + { + "epoch": 0.3253213989494056, + "grad_norm": 0.4318345815133044, + "learning_rate": 1.5764318080421702e-05, + "loss": 0.6121, + "step": 18827 + }, + { + "epoch": 0.3253386784628145, + "grad_norm": 1.1586984638345175, + "learning_rate": 1.5763860756433436e-05, + "loss": 0.3064, + "step": 18828 + }, + { + "epoch": 0.3253559579762234, + "grad_norm": 0.8876937073240091, + "learning_rate": 1.5763403414392316e-05, + "loss": 0.4555, + "step": 18829 + }, + { + "epoch": 0.3253732374896323, + "grad_norm": 1.5254083529357492, + "learning_rate": 1.5762946054299786e-05, + "loss": 0.5685, + "step": 18830 + }, + { + "epoch": 0.32539051700304117, + "grad_norm": 0.5286751266699757, + "learning_rate": 1.5762488676157265e-05, + "loss": 0.8641, + "step": 18831 + }, + { + "epoch": 0.3254077965164501, + "grad_norm": 1.199907980170665, + "learning_rate": 1.5762031279966196e-05, + "loss": 0.3421, + "step": 18832 + }, + { + "epoch": 0.325425076029859, + "grad_norm": 1.7881326069305243, + "learning_rate": 1.5761573865728002e-05, + "loss": 0.675, + "step": 18833 + }, + { + "epoch": 0.3254423555432679, + "grad_norm": 1.3470712621239722, + "learning_rate": 1.5761116433444124e-05, + "loss": 0.4681, + "step": 18834 + }, + { + "epoch": 0.3254596350566768, + "grad_norm": 1.0410261305570476, + "learning_rate": 1.576065898311599e-05, + "loss": 0.5196, + "step": 18835 + }, + { + "epoch": 0.3254769145700857, + "grad_norm": 0.9564237861292463, + "learning_rate": 1.576020151474504e-05, + "loss": 0.5605, + "step": 18836 + }, + { + "epoch": 0.3254941940834946, + "grad_norm": 1.3634127979342456, + "learning_rate": 1.5759744028332694e-05, + "loss": 0.5388, + "step": 18837 + }, + { + "epoch": 0.3255114735969035, + "grad_norm": 1.7277418989156608, + "learning_rate": 1.57592865238804e-05, + "loss": 0.4965, + "step": 18838 + }, + { + "epoch": 0.32552875311031243, + "grad_norm": 0.8357940505086924, + "learning_rate": 1.5758829001389576e-05, + "loss": 0.5102, + "step": 18839 + }, + { + "epoch": 0.32554603262372134, + "grad_norm": 0.6811747840605212, + "learning_rate": 1.5758371460861664e-05, + "loss": 0.585, + "step": 18840 + }, + { + "epoch": 0.32556331213713025, + "grad_norm": 0.33906777468580457, + "learning_rate": 1.5757913902298096e-05, + "loss": 0.4839, + "step": 18841 + }, + { + "epoch": 0.3255805916505391, + "grad_norm": 0.7460778614156739, + "learning_rate": 1.57574563257003e-05, + "loss": 0.5345, + "step": 18842 + }, + { + "epoch": 0.325597871163948, + "grad_norm": 0.9634339374389053, + "learning_rate": 1.5756998731069718e-05, + "loss": 0.5093, + "step": 18843 + }, + { + "epoch": 0.3256151506773569, + "grad_norm": 4.163176047618017, + "learning_rate": 1.5756541118407775e-05, + "loss": 0.4543, + "step": 18844 + }, + { + "epoch": 0.3256324301907658, + "grad_norm": 1.0017840206217845, + "learning_rate": 1.575608348771591e-05, + "loss": 0.548, + "step": 18845 + }, + { + "epoch": 0.32564970970417473, + "grad_norm": 1.448072120115142, + "learning_rate": 1.575562583899555e-05, + "loss": 0.533, + "step": 18846 + }, + { + "epoch": 0.32566698921758364, + "grad_norm": 0.9087892281495957, + "learning_rate": 1.5755168172248138e-05, + "loss": 0.4396, + "step": 18847 + }, + { + "epoch": 0.32568426873099254, + "grad_norm": 1.1467907206266046, + "learning_rate": 1.57547104874751e-05, + "loss": 0.3816, + "step": 18848 + }, + { + "epoch": 0.32570154824440145, + "grad_norm": 1.4414819769226899, + "learning_rate": 1.5754252784677873e-05, + "loss": 0.6557, + "step": 18849 + }, + { + "epoch": 0.32571882775781036, + "grad_norm": 0.8953066152619256, + "learning_rate": 1.5753795063857886e-05, + "loss": 0.4874, + "step": 18850 + }, + { + "epoch": 0.32573610727121927, + "grad_norm": 1.0086941358118897, + "learning_rate": 1.575333732501658e-05, + "loss": 0.5144, + "step": 18851 + }, + { + "epoch": 0.3257533867846281, + "grad_norm": 0.8482769514361413, + "learning_rate": 1.5752879568155378e-05, + "loss": 0.4916, + "step": 18852 + }, + { + "epoch": 0.325770666298037, + "grad_norm": 1.3864447096318309, + "learning_rate": 1.5752421793275723e-05, + "loss": 0.4927, + "step": 18853 + }, + { + "epoch": 0.32578794581144593, + "grad_norm": 0.8068531911027113, + "learning_rate": 1.5751964000379043e-05, + "loss": 0.4859, + "step": 18854 + }, + { + "epoch": 0.32580522532485484, + "grad_norm": 1.0151845105589692, + "learning_rate": 1.575150618946678e-05, + "loss": 0.4901, + "step": 18855 + }, + { + "epoch": 0.32582250483826375, + "grad_norm": 0.3975301581822878, + "learning_rate": 1.575104836054036e-05, + "loss": 0.603, + "step": 18856 + }, + { + "epoch": 0.32583978435167266, + "grad_norm": 0.7707941961398458, + "learning_rate": 1.5750590513601217e-05, + "loss": 0.4204, + "step": 18857 + }, + { + "epoch": 0.32585706386508156, + "grad_norm": 1.4983290147683321, + "learning_rate": 1.575013264865079e-05, + "loss": 0.4245, + "step": 18858 + }, + { + "epoch": 0.32587434337849047, + "grad_norm": 1.1810731971368102, + "learning_rate": 1.574967476569051e-05, + "loss": 0.602, + "step": 18859 + }, + { + "epoch": 0.3258916228918994, + "grad_norm": 0.8261084801547847, + "learning_rate": 1.5749216864721813e-05, + "loss": 0.3547, + "step": 18860 + }, + { + "epoch": 0.3259089024053083, + "grad_norm": 1.4009091687386777, + "learning_rate": 1.5748758945746132e-05, + "loss": 0.5299, + "step": 18861 + }, + { + "epoch": 0.3259261819187172, + "grad_norm": 0.9235629883227993, + "learning_rate": 1.57483010087649e-05, + "loss": 0.603, + "step": 18862 + }, + { + "epoch": 0.32594346143212605, + "grad_norm": 1.2831446893780678, + "learning_rate": 1.574784305377955e-05, + "loss": 0.4534, + "step": 18863 + }, + { + "epoch": 0.32596074094553495, + "grad_norm": 0.8857268905321266, + "learning_rate": 1.574738508079152e-05, + "loss": 0.4266, + "step": 18864 + }, + { + "epoch": 0.32597802045894386, + "grad_norm": 1.2163278724625626, + "learning_rate": 1.5746927089802247e-05, + "loss": 0.5312, + "step": 18865 + }, + { + "epoch": 0.32599529997235277, + "grad_norm": 0.860638811820134, + "learning_rate": 1.5746469080813155e-05, + "loss": 0.4681, + "step": 18866 + }, + { + "epoch": 0.3260125794857617, + "grad_norm": 1.479098384102378, + "learning_rate": 1.574601105382569e-05, + "loss": 0.5462, + "step": 18867 + }, + { + "epoch": 0.3260298589991706, + "grad_norm": 0.6178204081259725, + "learning_rate": 1.574555300884128e-05, + "loss": 0.6177, + "step": 18868 + }, + { + "epoch": 0.3260471385125795, + "grad_norm": 0.7080488771407419, + "learning_rate": 1.5745094945861366e-05, + "loss": 0.4009, + "step": 18869 + }, + { + "epoch": 0.3260644180259884, + "grad_norm": 1.7102721415365092, + "learning_rate": 1.5744636864887372e-05, + "loss": 0.4438, + "step": 18870 + }, + { + "epoch": 0.3260816975393973, + "grad_norm": 0.7409981203401761, + "learning_rate": 1.574417876592074e-05, + "loss": 0.3932, + "step": 18871 + }, + { + "epoch": 0.3260989770528062, + "grad_norm": 1.0341840560960323, + "learning_rate": 1.5743720648962906e-05, + "loss": 0.3436, + "step": 18872 + }, + { + "epoch": 0.32611625656621507, + "grad_norm": 0.8931484596290549, + "learning_rate": 1.57432625140153e-05, + "loss": 0.4682, + "step": 18873 + }, + { + "epoch": 0.326133536079624, + "grad_norm": 0.9941833881210572, + "learning_rate": 1.574280436107936e-05, + "loss": 0.4816, + "step": 18874 + }, + { + "epoch": 0.3261508155930329, + "grad_norm": 0.9206658541667646, + "learning_rate": 1.5742346190156517e-05, + "loss": 0.5974, + "step": 18875 + }, + { + "epoch": 0.3261680951064418, + "grad_norm": 0.39050511920108577, + "learning_rate": 1.5741888001248215e-05, + "loss": 0.4997, + "step": 18876 + }, + { + "epoch": 0.3261853746198507, + "grad_norm": 0.6219022806502845, + "learning_rate": 1.574142979435588e-05, + "loss": 0.3865, + "step": 18877 + }, + { + "epoch": 0.3262026541332596, + "grad_norm": 1.0935744343040115, + "learning_rate": 1.5740971569480953e-05, + "loss": 0.363, + "step": 18878 + }, + { + "epoch": 0.3262199336466685, + "grad_norm": 1.2617825479068674, + "learning_rate": 1.5740513326624868e-05, + "loss": 0.4256, + "step": 18879 + }, + { + "epoch": 0.3262372131600774, + "grad_norm": 0.5470181154507651, + "learning_rate": 1.5740055065789055e-05, + "loss": 0.8768, + "step": 18880 + }, + { + "epoch": 0.32625449267348633, + "grad_norm": 1.2279783270836055, + "learning_rate": 1.5739596786974954e-05, + "loss": 0.5604, + "step": 18881 + }, + { + "epoch": 0.32627177218689524, + "grad_norm": 1.33539615950358, + "learning_rate": 1.5739138490184e-05, + "loss": 0.5922, + "step": 18882 + }, + { + "epoch": 0.32628905170030414, + "grad_norm": 0.5529896355668972, + "learning_rate": 1.5738680175417627e-05, + "loss": 0.7933, + "step": 18883 + }, + { + "epoch": 0.326306331213713, + "grad_norm": 1.3680276725077536, + "learning_rate": 1.5738221842677272e-05, + "loss": 0.5707, + "step": 18884 + }, + { + "epoch": 0.3263236107271219, + "grad_norm": 1.2362844944444598, + "learning_rate": 1.5737763491964372e-05, + "loss": 0.3664, + "step": 18885 + }, + { + "epoch": 0.3263408902405308, + "grad_norm": 1.307697069530335, + "learning_rate": 1.573730512328036e-05, + "loss": 0.3574, + "step": 18886 + }, + { + "epoch": 0.3263581697539397, + "grad_norm": 0.846340173877558, + "learning_rate": 1.5736846736626672e-05, + "loss": 0.3679, + "step": 18887 + }, + { + "epoch": 0.3263754492673486, + "grad_norm": 0.9025463468128748, + "learning_rate": 1.5736388332004743e-05, + "loss": 0.6653, + "step": 18888 + }, + { + "epoch": 0.32639272878075754, + "grad_norm": 1.0635123687651566, + "learning_rate": 1.5735929909416015e-05, + "loss": 0.5627, + "step": 18889 + }, + { + "epoch": 0.32641000829416644, + "grad_norm": 0.989269681634943, + "learning_rate": 1.5735471468861914e-05, + "loss": 0.4731, + "step": 18890 + }, + { + "epoch": 0.32642728780757535, + "grad_norm": 1.180055398943403, + "learning_rate": 1.573501301034388e-05, + "loss": 0.5959, + "step": 18891 + }, + { + "epoch": 0.32644456732098426, + "grad_norm": 2.439261853042115, + "learning_rate": 1.573455453386335e-05, + "loss": 0.606, + "step": 18892 + }, + { + "epoch": 0.32646184683439317, + "grad_norm": 1.4043336414285252, + "learning_rate": 1.573409603942176e-05, + "loss": 0.671, + "step": 18893 + }, + { + "epoch": 0.3264791263478021, + "grad_norm": 0.7947868886310474, + "learning_rate": 1.5733637527020548e-05, + "loss": 0.4091, + "step": 18894 + }, + { + "epoch": 0.3264964058612109, + "grad_norm": 0.8006979448934587, + "learning_rate": 1.573317899666114e-05, + "loss": 0.5301, + "step": 18895 + }, + { + "epoch": 0.32651368537461983, + "grad_norm": 1.718744988979289, + "learning_rate": 1.5732720448344985e-05, + "loss": 0.6783, + "step": 18896 + }, + { + "epoch": 0.32653096488802874, + "grad_norm": 0.6726024329349225, + "learning_rate": 1.5732261882073513e-05, + "loss": 0.3929, + "step": 18897 + }, + { + "epoch": 0.32654824440143765, + "grad_norm": 0.4156652219102762, + "learning_rate": 1.5731803297848164e-05, + "loss": 0.8538, + "step": 18898 + }, + { + "epoch": 0.32656552391484656, + "grad_norm": 0.9551465302402606, + "learning_rate": 1.573134469567037e-05, + "loss": 0.3449, + "step": 18899 + }, + { + "epoch": 0.32658280342825546, + "grad_norm": 0.6883632768988125, + "learning_rate": 1.5730886075541567e-05, + "loss": 0.3666, + "step": 18900 + }, + { + "epoch": 0.32660008294166437, + "grad_norm": 0.681923330587272, + "learning_rate": 1.5730427437463195e-05, + "loss": 0.3516, + "step": 18901 + }, + { + "epoch": 0.3266173624550733, + "grad_norm": 1.082126531755124, + "learning_rate": 1.5729968781436686e-05, + "loss": 0.4702, + "step": 18902 + }, + { + "epoch": 0.3266346419684822, + "grad_norm": 0.7446405853792745, + "learning_rate": 1.5729510107463483e-05, + "loss": 0.5195, + "step": 18903 + }, + { + "epoch": 0.3266519214818911, + "grad_norm": 1.1907433244669496, + "learning_rate": 1.5729051415545017e-05, + "loss": 0.5817, + "step": 18904 + }, + { + "epoch": 0.32666920099529995, + "grad_norm": 1.2987696721007413, + "learning_rate": 1.5728592705682727e-05, + "loss": 0.7281, + "step": 18905 + }, + { + "epoch": 0.32668648050870885, + "grad_norm": 1.3986297388034101, + "learning_rate": 1.5728133977878046e-05, + "loss": 0.503, + "step": 18906 + }, + { + "epoch": 0.32670376002211776, + "grad_norm": 1.3844416128787533, + "learning_rate": 1.572767523213242e-05, + "loss": 0.3971, + "step": 18907 + }, + { + "epoch": 0.32672103953552667, + "grad_norm": 0.7606246212622287, + "learning_rate": 1.5727216468447277e-05, + "loss": 0.4093, + "step": 18908 + }, + { + "epoch": 0.3267383190489356, + "grad_norm": 0.8611748941453977, + "learning_rate": 1.5726757686824054e-05, + "loss": 0.4204, + "step": 18909 + }, + { + "epoch": 0.3267555985623445, + "grad_norm": 0.8387391305033443, + "learning_rate": 1.5726298887264193e-05, + "loss": 0.4579, + "step": 18910 + }, + { + "epoch": 0.3267728780757534, + "grad_norm": 0.7462836569086798, + "learning_rate": 1.572584006976913e-05, + "loss": 0.522, + "step": 18911 + }, + { + "epoch": 0.3267901575891623, + "grad_norm": 0.8352664029794875, + "learning_rate": 1.57253812343403e-05, + "loss": 0.5303, + "step": 18912 + }, + { + "epoch": 0.3268074371025712, + "grad_norm": 1.101960657085605, + "learning_rate": 1.572492238097914e-05, + "loss": 0.3343, + "step": 18913 + }, + { + "epoch": 0.3268247166159801, + "grad_norm": 0.5654853577625432, + "learning_rate": 1.5724463509687087e-05, + "loss": 0.2988, + "step": 18914 + }, + { + "epoch": 0.326841996129389, + "grad_norm": 1.124896241409548, + "learning_rate": 1.5724004620465584e-05, + "loss": 0.6369, + "step": 18915 + }, + { + "epoch": 0.3268592756427979, + "grad_norm": 1.3818039284604882, + "learning_rate": 1.572354571331606e-05, + "loss": 0.487, + "step": 18916 + }, + { + "epoch": 0.3268765551562068, + "grad_norm": 0.7181631390010019, + "learning_rate": 1.5723086788239954e-05, + "loss": 0.427, + "step": 18917 + }, + { + "epoch": 0.3268938346696157, + "grad_norm": 0.7094917074144016, + "learning_rate": 1.572262784523871e-05, + "loss": 0.6614, + "step": 18918 + }, + { + "epoch": 0.3269111141830246, + "grad_norm": 0.853670604772854, + "learning_rate": 1.5722168884313756e-05, + "loss": 0.3971, + "step": 18919 + }, + { + "epoch": 0.3269283936964335, + "grad_norm": 1.1540447908565064, + "learning_rate": 1.572170990546654e-05, + "loss": 0.5069, + "step": 18920 + }, + { + "epoch": 0.3269456732098424, + "grad_norm": 0.7112744469435099, + "learning_rate": 1.572125090869849e-05, + "loss": 0.3901, + "step": 18921 + }, + { + "epoch": 0.3269629527232513, + "grad_norm": 1.0493701375215192, + "learning_rate": 1.5720791894011047e-05, + "loss": 0.3838, + "step": 18922 + }, + { + "epoch": 0.32698023223666023, + "grad_norm": 0.7214358655636509, + "learning_rate": 1.572033286140565e-05, + "loss": 0.3947, + "step": 18923 + }, + { + "epoch": 0.32699751175006914, + "grad_norm": 1.0542138080650563, + "learning_rate": 1.5719873810883734e-05, + "loss": 0.5031, + "step": 18924 + }, + { + "epoch": 0.32701479126347804, + "grad_norm": 0.9538720903982413, + "learning_rate": 1.5719414742446742e-05, + "loss": 0.4946, + "step": 18925 + }, + { + "epoch": 0.3270320707768869, + "grad_norm": 1.0876958373555252, + "learning_rate": 1.5718955656096105e-05, + "loss": 0.4889, + "step": 18926 + }, + { + "epoch": 0.3270493502902958, + "grad_norm": 0.9396370774715789, + "learning_rate": 1.5718496551833262e-05, + "loss": 0.4685, + "step": 18927 + }, + { + "epoch": 0.3270666298037047, + "grad_norm": 0.9161696151695985, + "learning_rate": 1.5718037429659657e-05, + "loss": 0.6792, + "step": 18928 + }, + { + "epoch": 0.3270839093171136, + "grad_norm": 1.000143288514427, + "learning_rate": 1.5717578289576726e-05, + "loss": 0.4694, + "step": 18929 + }, + { + "epoch": 0.3271011888305225, + "grad_norm": 0.8592964055283997, + "learning_rate": 1.5717119131585902e-05, + "loss": 0.3669, + "step": 18930 + }, + { + "epoch": 0.32711846834393143, + "grad_norm": 0.6740410429435766, + "learning_rate": 1.571665995568863e-05, + "loss": 0.4585, + "step": 18931 + }, + { + "epoch": 0.32713574785734034, + "grad_norm": 1.6232724587700196, + "learning_rate": 1.571620076188634e-05, + "loss": 0.5555, + "step": 18932 + }, + { + "epoch": 0.32715302737074925, + "grad_norm": 0.9008447747419391, + "learning_rate": 1.5715741550180478e-05, + "loss": 0.4531, + "step": 18933 + }, + { + "epoch": 0.32717030688415816, + "grad_norm": 0.9578213636174852, + "learning_rate": 1.5715282320572476e-05, + "loss": 0.4499, + "step": 18934 + }, + { + "epoch": 0.32718758639756707, + "grad_norm": 0.48205202060566527, + "learning_rate": 1.5714823073063778e-05, + "loss": 0.8325, + "step": 18935 + }, + { + "epoch": 0.327204865910976, + "grad_norm": 0.523943539311662, + "learning_rate": 1.571436380765582e-05, + "loss": 0.6699, + "step": 18936 + }, + { + "epoch": 0.3272221454243848, + "grad_norm": 0.9899434404508168, + "learning_rate": 1.571390452435004e-05, + "loss": 0.5192, + "step": 18937 + }, + { + "epoch": 0.32723942493779373, + "grad_norm": 1.1380224628357538, + "learning_rate": 1.5713445223147876e-05, + "loss": 0.48, + "step": 18938 + }, + { + "epoch": 0.32725670445120264, + "grad_norm": 0.38655074704114556, + "learning_rate": 1.5712985904050766e-05, + "loss": 0.6012, + "step": 18939 + }, + { + "epoch": 0.32727398396461155, + "grad_norm": 0.8376850609106247, + "learning_rate": 1.5712526567060152e-05, + "loss": 0.5186, + "step": 18940 + }, + { + "epoch": 0.32729126347802046, + "grad_norm": 1.038688814010796, + "learning_rate": 1.5712067212177472e-05, + "loss": 0.3675, + "step": 18941 + }, + { + "epoch": 0.32730854299142936, + "grad_norm": 2.2067365460758035, + "learning_rate": 1.571160783940416e-05, + "loss": 0.6641, + "step": 18942 + }, + { + "epoch": 0.32732582250483827, + "grad_norm": 1.0992043220093695, + "learning_rate": 1.571114844874166e-05, + "loss": 0.7478, + "step": 18943 + }, + { + "epoch": 0.3273431020182472, + "grad_norm": 0.8925641111397065, + "learning_rate": 1.571068904019141e-05, + "loss": 0.4163, + "step": 18944 + }, + { + "epoch": 0.3273603815316561, + "grad_norm": 0.9512429444406875, + "learning_rate": 1.571022961375485e-05, + "loss": 0.4033, + "step": 18945 + }, + { + "epoch": 0.327377661045065, + "grad_norm": 0.6730980574762688, + "learning_rate": 1.570977016943341e-05, + "loss": 0.4457, + "step": 18946 + }, + { + "epoch": 0.32739494055847385, + "grad_norm": 1.8081480276346447, + "learning_rate": 1.570931070722854e-05, + "loss": 0.6217, + "step": 18947 + }, + { + "epoch": 0.32741222007188275, + "grad_norm": 1.2761274702774095, + "learning_rate": 1.5708851227141674e-05, + "loss": 0.4972, + "step": 18948 + }, + { + "epoch": 0.32742949958529166, + "grad_norm": 1.0854095177762302, + "learning_rate": 1.5708391729174254e-05, + "loss": 0.6003, + "step": 18949 + }, + { + "epoch": 0.32744677909870057, + "grad_norm": 1.365782406725158, + "learning_rate": 1.5707932213327714e-05, + "loss": 0.5085, + "step": 18950 + }, + { + "epoch": 0.3274640586121095, + "grad_norm": 1.4756209452007594, + "learning_rate": 1.57074726796035e-05, + "loss": 0.5511, + "step": 18951 + }, + { + "epoch": 0.3274813381255184, + "grad_norm": 1.1761839454190284, + "learning_rate": 1.5707013128003046e-05, + "loss": 0.5918, + "step": 18952 + }, + { + "epoch": 0.3274986176389273, + "grad_norm": 0.9119144222474209, + "learning_rate": 1.5706553558527794e-05, + "loss": 0.4368, + "step": 18953 + }, + { + "epoch": 0.3275158971523362, + "grad_norm": 0.9341225168490328, + "learning_rate": 1.570609397117918e-05, + "loss": 0.4714, + "step": 18954 + }, + { + "epoch": 0.3275331766657451, + "grad_norm": 1.2453537911293249, + "learning_rate": 1.570563436595865e-05, + "loss": 0.5414, + "step": 18955 + }, + { + "epoch": 0.327550456179154, + "grad_norm": 0.7901360721090228, + "learning_rate": 1.570517474286764e-05, + "loss": 0.5834, + "step": 18956 + }, + { + "epoch": 0.3275677356925629, + "grad_norm": 0.7062262313102835, + "learning_rate": 1.5704715101907584e-05, + "loss": 0.3763, + "step": 18957 + }, + { + "epoch": 0.3275850152059718, + "grad_norm": 0.9992264512516449, + "learning_rate": 1.570425544307993e-05, + "loss": 0.4386, + "step": 18958 + }, + { + "epoch": 0.3276022947193807, + "grad_norm": 2.264771227824978, + "learning_rate": 1.5703795766386114e-05, + "loss": 0.4904, + "step": 18959 + }, + { + "epoch": 0.3276195742327896, + "grad_norm": 1.3878697262382707, + "learning_rate": 1.5703336071827575e-05, + "loss": 0.5479, + "step": 18960 + }, + { + "epoch": 0.3276368537461985, + "grad_norm": 0.7863755063722324, + "learning_rate": 1.5702876359405755e-05, + "loss": 0.6266, + "step": 18961 + }, + { + "epoch": 0.3276541332596074, + "grad_norm": 1.2828420909297082, + "learning_rate": 1.5702416629122095e-05, + "loss": 0.631, + "step": 18962 + }, + { + "epoch": 0.3276714127730163, + "grad_norm": 0.9184310886815189, + "learning_rate": 1.5701956880978033e-05, + "loss": 0.6627, + "step": 18963 + }, + { + "epoch": 0.3276886922864252, + "grad_norm": 0.9655016794048473, + "learning_rate": 1.5701497114975003e-05, + "loss": 0.5126, + "step": 18964 + }, + { + "epoch": 0.32770597179983413, + "grad_norm": 0.3976278424901494, + "learning_rate": 1.5701037331114455e-05, + "loss": 0.7678, + "step": 18965 + }, + { + "epoch": 0.32772325131324304, + "grad_norm": 1.2152563546946447, + "learning_rate": 1.5700577529397825e-05, + "loss": 0.5613, + "step": 18966 + }, + { + "epoch": 0.32774053082665194, + "grad_norm": 0.39265444756987833, + "learning_rate": 1.570011770982655e-05, + "loss": 0.5721, + "step": 18967 + }, + { + "epoch": 0.32775781034006085, + "grad_norm": 1.00133140025656, + "learning_rate": 1.5699657872402076e-05, + "loss": 0.4607, + "step": 18968 + }, + { + "epoch": 0.3277750898534697, + "grad_norm": 1.1596475692467736, + "learning_rate": 1.569919801712584e-05, + "loss": 0.399, + "step": 18969 + }, + { + "epoch": 0.3277923693668786, + "grad_norm": 1.3028573241476757, + "learning_rate": 1.569873814399928e-05, + "loss": 0.4467, + "step": 18970 + }, + { + "epoch": 0.3278096488802875, + "grad_norm": 1.1110631653652498, + "learning_rate": 1.5698278253023844e-05, + "loss": 0.5745, + "step": 18971 + }, + { + "epoch": 0.3278269283936964, + "grad_norm": 0.7154643167625093, + "learning_rate": 1.5697818344200966e-05, + "loss": 0.5291, + "step": 18972 + }, + { + "epoch": 0.32784420790710533, + "grad_norm": 0.9035462063753119, + "learning_rate": 1.5697358417532086e-05, + "loss": 0.5642, + "step": 18973 + }, + { + "epoch": 0.32786148742051424, + "grad_norm": 1.1059174591324359, + "learning_rate": 1.5696898473018644e-05, + "loss": 0.5722, + "step": 18974 + }, + { + "epoch": 0.32787876693392315, + "grad_norm": 0.8089675224209242, + "learning_rate": 1.5696438510662088e-05, + "loss": 0.5365, + "step": 18975 + }, + { + "epoch": 0.32789604644733206, + "grad_norm": 1.2733708537058723, + "learning_rate": 1.569597853046385e-05, + "loss": 0.5897, + "step": 18976 + }, + { + "epoch": 0.32791332596074096, + "grad_norm": 0.8886495659241564, + "learning_rate": 1.5695518532425376e-05, + "loss": 0.5017, + "step": 18977 + }, + { + "epoch": 0.32793060547414987, + "grad_norm": 2.022615758084837, + "learning_rate": 1.5695058516548108e-05, + "loss": 0.5447, + "step": 18978 + }, + { + "epoch": 0.3279478849875587, + "grad_norm": 1.0845477532839711, + "learning_rate": 1.5694598482833478e-05, + "loss": 0.5516, + "step": 18979 + }, + { + "epoch": 0.32796516450096763, + "grad_norm": 1.3014466650309244, + "learning_rate": 1.5694138431282936e-05, + "loss": 0.4484, + "step": 18980 + }, + { + "epoch": 0.32798244401437654, + "grad_norm": 1.4615730196233194, + "learning_rate": 1.569367836189792e-05, + "loss": 0.5382, + "step": 18981 + }, + { + "epoch": 0.32799972352778545, + "grad_norm": 1.0318470212217927, + "learning_rate": 1.5693218274679873e-05, + "loss": 0.5212, + "step": 18982 + }, + { + "epoch": 0.32801700304119435, + "grad_norm": 2.4565117439028827, + "learning_rate": 1.5692758169630228e-05, + "loss": 0.543, + "step": 18983 + }, + { + "epoch": 0.32803428255460326, + "grad_norm": 0.847968043156922, + "learning_rate": 1.5692298046750435e-05, + "loss": 0.3692, + "step": 18984 + }, + { + "epoch": 0.32805156206801217, + "grad_norm": 0.8660925613947327, + "learning_rate": 1.5691837906041934e-05, + "loss": 0.4645, + "step": 18985 + }, + { + "epoch": 0.3280688415814211, + "grad_norm": 0.868594690045874, + "learning_rate": 1.569137774750616e-05, + "loss": 0.3582, + "step": 18986 + }, + { + "epoch": 0.32808612109483, + "grad_norm": 1.869525636410325, + "learning_rate": 1.569091757114456e-05, + "loss": 0.5054, + "step": 18987 + }, + { + "epoch": 0.3281034006082389, + "grad_norm": 0.7679832331649791, + "learning_rate": 1.5690457376958573e-05, + "loss": 0.6595, + "step": 18988 + }, + { + "epoch": 0.3281206801216478, + "grad_norm": 0.9729261122415469, + "learning_rate": 1.5689997164949643e-05, + "loss": 0.5073, + "step": 18989 + }, + { + "epoch": 0.32813795963505665, + "grad_norm": 0.8993275498964036, + "learning_rate": 1.5689536935119208e-05, + "loss": 0.4022, + "step": 18990 + }, + { + "epoch": 0.32815523914846556, + "grad_norm": 0.8489963895700283, + "learning_rate": 1.568907668746871e-05, + "loss": 0.3633, + "step": 18991 + }, + { + "epoch": 0.32817251866187447, + "grad_norm": 1.2244328834085405, + "learning_rate": 1.5688616421999595e-05, + "loss": 0.6223, + "step": 18992 + }, + { + "epoch": 0.3281897981752834, + "grad_norm": 0.6421578281912503, + "learning_rate": 1.5688156138713297e-05, + "loss": 0.7479, + "step": 18993 + }, + { + "epoch": 0.3282070776886923, + "grad_norm": 0.8897702758327949, + "learning_rate": 1.5687695837611266e-05, + "loss": 0.5103, + "step": 18994 + }, + { + "epoch": 0.3282243572021012, + "grad_norm": 1.8799918125429993, + "learning_rate": 1.5687235518694937e-05, + "loss": 0.6378, + "step": 18995 + }, + { + "epoch": 0.3282416367155101, + "grad_norm": 0.7960501887716379, + "learning_rate": 1.5686775181965752e-05, + "loss": 0.5491, + "step": 18996 + }, + { + "epoch": 0.328258916228919, + "grad_norm": 0.8034483832072237, + "learning_rate": 1.568631482742516e-05, + "loss": 0.541, + "step": 18997 + }, + { + "epoch": 0.3282761957423279, + "grad_norm": 0.7667335904264723, + "learning_rate": 1.5685854455074593e-05, + "loss": 0.5092, + "step": 18998 + }, + { + "epoch": 0.3282934752557368, + "grad_norm": 0.7289170673735679, + "learning_rate": 1.5685394064915498e-05, + "loss": 0.5249, + "step": 18999 + }, + { + "epoch": 0.3283107547691457, + "grad_norm": 0.9490724451378764, + "learning_rate": 1.5684933656949318e-05, + "loss": 0.7651, + "step": 19000 + }, + { + "epoch": 0.3283280342825546, + "grad_norm": 1.1893267701277799, + "learning_rate": 1.5684473231177495e-05, + "loss": 0.545, + "step": 19001 + }, + { + "epoch": 0.3283453137959635, + "grad_norm": 0.8345867775538451, + "learning_rate": 1.568401278760147e-05, + "loss": 0.3195, + "step": 19002 + }, + { + "epoch": 0.3283625933093724, + "grad_norm": 1.0559215854438682, + "learning_rate": 1.5683552326222683e-05, + "loss": 0.5631, + "step": 19003 + }, + { + "epoch": 0.3283798728227813, + "grad_norm": 0.7934386615952695, + "learning_rate": 1.568309184704258e-05, + "loss": 0.3783, + "step": 19004 + }, + { + "epoch": 0.3283971523361902, + "grad_norm": 0.4024310036269581, + "learning_rate": 1.56826313500626e-05, + "loss": 0.6847, + "step": 19005 + }, + { + "epoch": 0.3284144318495991, + "grad_norm": 0.907663404411865, + "learning_rate": 1.568217083528419e-05, + "loss": 0.3913, + "step": 19006 + }, + { + "epoch": 0.328431711363008, + "grad_norm": 0.9904884441070301, + "learning_rate": 1.5681710302708782e-05, + "loss": 0.4831, + "step": 19007 + }, + { + "epoch": 0.32844899087641694, + "grad_norm": 1.0094665444664506, + "learning_rate": 1.568124975233783e-05, + "loss": 0.4986, + "step": 19008 + }, + { + "epoch": 0.32846627038982584, + "grad_norm": 1.859082347244339, + "learning_rate": 1.5680789184172773e-05, + "loss": 0.5658, + "step": 19009 + }, + { + "epoch": 0.32848354990323475, + "grad_norm": 1.289978408826619, + "learning_rate": 1.568032859821505e-05, + "loss": 0.5483, + "step": 19010 + }, + { + "epoch": 0.3285008294166436, + "grad_norm": 0.8684928692802893, + "learning_rate": 1.5679867994466108e-05, + "loss": 0.5752, + "step": 19011 + }, + { + "epoch": 0.3285181089300525, + "grad_norm": 0.4449742639966061, + "learning_rate": 1.5679407372927387e-05, + "loss": 0.5897, + "step": 19012 + }, + { + "epoch": 0.3285353884434614, + "grad_norm": 0.7844674318101739, + "learning_rate": 1.567894673360033e-05, + "loss": 0.5306, + "step": 19013 + }, + { + "epoch": 0.3285526679568703, + "grad_norm": 0.6385381358882843, + "learning_rate": 1.5678486076486385e-05, + "loss": 0.4604, + "step": 19014 + }, + { + "epoch": 0.32856994747027923, + "grad_norm": 0.7165379706649843, + "learning_rate": 1.5678025401586983e-05, + "loss": 0.4898, + "step": 19015 + }, + { + "epoch": 0.32858722698368814, + "grad_norm": 0.7857225184065939, + "learning_rate": 1.5677564708903577e-05, + "loss": 0.3593, + "step": 19016 + }, + { + "epoch": 0.32860450649709705, + "grad_norm": 0.8462253558474587, + "learning_rate": 1.5677103998437607e-05, + "loss": 0.4109, + "step": 19017 + }, + { + "epoch": 0.32862178601050596, + "grad_norm": 0.9960338878423515, + "learning_rate": 1.5676643270190516e-05, + "loss": 0.4099, + "step": 19018 + }, + { + "epoch": 0.32863906552391486, + "grad_norm": 1.1310143826161232, + "learning_rate": 1.5676182524163746e-05, + "loss": 0.4996, + "step": 19019 + }, + { + "epoch": 0.32865634503732377, + "grad_norm": 1.003498253198888, + "learning_rate": 1.5675721760358743e-05, + "loss": 0.5059, + "step": 19020 + }, + { + "epoch": 0.3286736245507326, + "grad_norm": 0.745789930569426, + "learning_rate": 1.5675260978776945e-05, + "loss": 0.4432, + "step": 19021 + }, + { + "epoch": 0.32869090406414153, + "grad_norm": 0.5896907422097445, + "learning_rate": 1.5674800179419797e-05, + "loss": 0.5692, + "step": 19022 + }, + { + "epoch": 0.32870818357755044, + "grad_norm": 1.0562927466618828, + "learning_rate": 1.567433936228875e-05, + "loss": 0.5505, + "step": 19023 + }, + { + "epoch": 0.32872546309095935, + "grad_norm": 0.6666372815497246, + "learning_rate": 1.5673878527385236e-05, + "loss": 0.4727, + "step": 19024 + }, + { + "epoch": 0.32874274260436825, + "grad_norm": 0.3834738538989741, + "learning_rate": 1.5673417674710706e-05, + "loss": 0.5124, + "step": 19025 + }, + { + "epoch": 0.32876002211777716, + "grad_norm": 1.2540621566223042, + "learning_rate": 1.56729568042666e-05, + "loss": 0.5519, + "step": 19026 + }, + { + "epoch": 0.32877730163118607, + "grad_norm": 1.1219999100187443, + "learning_rate": 1.567249591605436e-05, + "loss": 0.7039, + "step": 19027 + }, + { + "epoch": 0.328794581144595, + "grad_norm": 0.7658543828484399, + "learning_rate": 1.5672035010075434e-05, + "loss": 0.43, + "step": 19028 + }, + { + "epoch": 0.3288118606580039, + "grad_norm": 1.3388626427588382, + "learning_rate": 1.567157408633126e-05, + "loss": 0.549, + "step": 19029 + }, + { + "epoch": 0.3288291401714128, + "grad_norm": 1.1881771323592902, + "learning_rate": 1.567111314482329e-05, + "loss": 0.7319, + "step": 19030 + }, + { + "epoch": 0.3288464196848217, + "grad_norm": 1.3828056974994578, + "learning_rate": 1.567065218555296e-05, + "loss": 0.5299, + "step": 19031 + }, + { + "epoch": 0.32886369919823055, + "grad_norm": 1.1756683990025816, + "learning_rate": 1.5670191208521717e-05, + "loss": 0.4753, + "step": 19032 + }, + { + "epoch": 0.32888097871163946, + "grad_norm": 0.959074566298396, + "learning_rate": 1.5669730213731002e-05, + "loss": 0.3734, + "step": 19033 + }, + { + "epoch": 0.32889825822504837, + "grad_norm": 0.8711237112046445, + "learning_rate": 1.5669269201182262e-05, + "loss": 0.484, + "step": 19034 + }, + { + "epoch": 0.3289155377384573, + "grad_norm": 1.4323357643491945, + "learning_rate": 1.5668808170876944e-05, + "loss": 0.4171, + "step": 19035 + }, + { + "epoch": 0.3289328172518662, + "grad_norm": 0.7434989766899127, + "learning_rate": 1.5668347122816483e-05, + "loss": 0.5323, + "step": 19036 + }, + { + "epoch": 0.3289500967652751, + "grad_norm": 0.6412832733180027, + "learning_rate": 1.566788605700233e-05, + "loss": 0.4629, + "step": 19037 + }, + { + "epoch": 0.328967376278684, + "grad_norm": 1.523693172943965, + "learning_rate": 1.566742497343593e-05, + "loss": 0.6362, + "step": 19038 + }, + { + "epoch": 0.3289846557920929, + "grad_norm": 0.7993691954722332, + "learning_rate": 1.566696387211872e-05, + "loss": 0.7582, + "step": 19039 + }, + { + "epoch": 0.3290019353055018, + "grad_norm": 0.7434836312637647, + "learning_rate": 1.5666502753052147e-05, + "loss": 0.2838, + "step": 19040 + }, + { + "epoch": 0.3290192148189107, + "grad_norm": 0.89726908575935, + "learning_rate": 1.566604161623766e-05, + "loss": 0.4192, + "step": 19041 + }, + { + "epoch": 0.32903649433231963, + "grad_norm": 1.0058003604456944, + "learning_rate": 1.56655804616767e-05, + "loss": 0.5387, + "step": 19042 + }, + { + "epoch": 0.3290537738457285, + "grad_norm": 1.232686601988503, + "learning_rate": 1.5665119289370713e-05, + "loss": 0.4531, + "step": 19043 + }, + { + "epoch": 0.3290710533591374, + "grad_norm": 1.0803118474224382, + "learning_rate": 1.5664658099321136e-05, + "loss": 0.6318, + "step": 19044 + }, + { + "epoch": 0.3290883328725463, + "grad_norm": 1.157500875273629, + "learning_rate": 1.5664196891529425e-05, + "loss": 0.6362, + "step": 19045 + }, + { + "epoch": 0.3291056123859552, + "grad_norm": 0.8301017151772796, + "learning_rate": 1.5663735665997015e-05, + "loss": 0.394, + "step": 19046 + }, + { + "epoch": 0.3291228918993641, + "grad_norm": 0.9359523034899608, + "learning_rate": 1.566327442272536e-05, + "loss": 0.5756, + "step": 19047 + }, + { + "epoch": 0.329140171412773, + "grad_norm": 0.8124894119794183, + "learning_rate": 1.5662813161715894e-05, + "loss": 0.3728, + "step": 19048 + }, + { + "epoch": 0.3291574509261819, + "grad_norm": 1.4607083824215796, + "learning_rate": 1.566235188297007e-05, + "loss": 0.7297, + "step": 19049 + }, + { + "epoch": 0.32917473043959083, + "grad_norm": 0.7906718977664107, + "learning_rate": 1.5661890586489325e-05, + "loss": 0.4868, + "step": 19050 + }, + { + "epoch": 0.32919200995299974, + "grad_norm": 1.0938948437205176, + "learning_rate": 1.566142927227511e-05, + "loss": 0.2803, + "step": 19051 + }, + { + "epoch": 0.32920928946640865, + "grad_norm": 0.7968192835110398, + "learning_rate": 1.566096794032887e-05, + "loss": 0.4736, + "step": 19052 + }, + { + "epoch": 0.3292265689798175, + "grad_norm": 0.8639835397243196, + "learning_rate": 1.566050659065205e-05, + "loss": 0.574, + "step": 19053 + }, + { + "epoch": 0.3292438484932264, + "grad_norm": 0.7988029074631413, + "learning_rate": 1.566004522324609e-05, + "loss": 0.4969, + "step": 19054 + }, + { + "epoch": 0.3292611280066353, + "grad_norm": 0.9983466175511819, + "learning_rate": 1.565958383811244e-05, + "loss": 0.3784, + "step": 19055 + }, + { + "epoch": 0.3292784075200442, + "grad_norm": 0.5343858558020027, + "learning_rate": 1.5659122435252543e-05, + "loss": 0.5718, + "step": 19056 + }, + { + "epoch": 0.32929568703345313, + "grad_norm": 1.1356689237581126, + "learning_rate": 1.5658661014667842e-05, + "loss": 0.602, + "step": 19057 + }, + { + "epoch": 0.32931296654686204, + "grad_norm": 1.2015458211418872, + "learning_rate": 1.5658199576359787e-05, + "loss": 0.486, + "step": 19058 + }, + { + "epoch": 0.32933024606027095, + "grad_norm": 1.219281082135424, + "learning_rate": 1.565773812032982e-05, + "loss": 0.5107, + "step": 19059 + }, + { + "epoch": 0.32934752557367986, + "grad_norm": 1.3428919314384284, + "learning_rate": 1.565727664657939e-05, + "loss": 0.5576, + "step": 19060 + }, + { + "epoch": 0.32936480508708876, + "grad_norm": 0.7921876800048111, + "learning_rate": 1.5656815155109937e-05, + "loss": 0.3974, + "step": 19061 + }, + { + "epoch": 0.32938208460049767, + "grad_norm": 0.9695582035766835, + "learning_rate": 1.5656353645922907e-05, + "loss": 0.4643, + "step": 19062 + }, + { + "epoch": 0.3293993641139066, + "grad_norm": 1.1731172055243861, + "learning_rate": 1.565589211901975e-05, + "loss": 0.514, + "step": 19063 + }, + { + "epoch": 0.32941664362731543, + "grad_norm": 0.9681458229018328, + "learning_rate": 1.565543057440191e-05, + "loss": 0.581, + "step": 19064 + }, + { + "epoch": 0.32943392314072434, + "grad_norm": 0.8448016992181164, + "learning_rate": 1.565496901207083e-05, + "loss": 0.414, + "step": 19065 + }, + { + "epoch": 0.32945120265413325, + "grad_norm": 0.7507312402420084, + "learning_rate": 1.565450743202796e-05, + "loss": 0.2573, + "step": 19066 + }, + { + "epoch": 0.32946848216754215, + "grad_norm": 1.3489225961620668, + "learning_rate": 1.565404583427474e-05, + "loss": 0.6176, + "step": 19067 + }, + { + "epoch": 0.32948576168095106, + "grad_norm": 0.7447543361017864, + "learning_rate": 1.565358421881262e-05, + "loss": 0.4582, + "step": 19068 + }, + { + "epoch": 0.32950304119435997, + "grad_norm": 1.1464500037034073, + "learning_rate": 1.5653122585643043e-05, + "loss": 0.6199, + "step": 19069 + }, + { + "epoch": 0.3295203207077689, + "grad_norm": 0.8784552040628311, + "learning_rate": 1.565266093476746e-05, + "loss": 0.5109, + "step": 19070 + }, + { + "epoch": 0.3295376002211778, + "grad_norm": 0.8848137521616771, + "learning_rate": 1.5652199266187306e-05, + "loss": 0.5553, + "step": 19071 + }, + { + "epoch": 0.3295548797345867, + "grad_norm": 1.8357389274650413, + "learning_rate": 1.565173757990404e-05, + "loss": 0.7603, + "step": 19072 + }, + { + "epoch": 0.3295721592479956, + "grad_norm": 0.8154564333695796, + "learning_rate": 1.5651275875919104e-05, + "loss": 0.4047, + "step": 19073 + }, + { + "epoch": 0.32958943876140445, + "grad_norm": 2.18688422236032, + "learning_rate": 1.565081415423394e-05, + "loss": 0.6235, + "step": 19074 + }, + { + "epoch": 0.32960671827481336, + "grad_norm": 1.0646905347974913, + "learning_rate": 1.5650352414849996e-05, + "loss": 0.3166, + "step": 19075 + }, + { + "epoch": 0.32962399778822227, + "grad_norm": 1.128488947452573, + "learning_rate": 1.564989065776872e-05, + "loss": 0.6416, + "step": 19076 + }, + { + "epoch": 0.3296412773016312, + "grad_norm": 1.101850542474198, + "learning_rate": 1.564942888299156e-05, + "loss": 0.3688, + "step": 19077 + }, + { + "epoch": 0.3296585568150401, + "grad_norm": 1.5971813942493185, + "learning_rate": 1.5648967090519953e-05, + "loss": 0.5645, + "step": 19078 + }, + { + "epoch": 0.329675836328449, + "grad_norm": 0.9056774228630913, + "learning_rate": 1.5648505280355354e-05, + "loss": 0.611, + "step": 19079 + }, + { + "epoch": 0.3296931158418579, + "grad_norm": 0.9444715397456658, + "learning_rate": 1.5648043452499208e-05, + "loss": 0.4361, + "step": 19080 + }, + { + "epoch": 0.3297103953552668, + "grad_norm": 1.410396027752461, + "learning_rate": 1.564758160695296e-05, + "loss": 0.3838, + "step": 19081 + }, + { + "epoch": 0.3297276748686757, + "grad_norm": 1.4989789944751768, + "learning_rate": 1.564711974371806e-05, + "loss": 0.7139, + "step": 19082 + }, + { + "epoch": 0.3297449543820846, + "grad_norm": 0.8697639701325709, + "learning_rate": 1.564665786279595e-05, + "loss": 0.5961, + "step": 19083 + }, + { + "epoch": 0.32976223389549353, + "grad_norm": 1.6703148954957465, + "learning_rate": 1.564619596418808e-05, + "loss": 0.6123, + "step": 19084 + }, + { + "epoch": 0.3297795134089024, + "grad_norm": 1.2057105015851857, + "learning_rate": 1.5645734047895893e-05, + "loss": 0.4897, + "step": 19085 + }, + { + "epoch": 0.3297967929223113, + "grad_norm": 0.9052093502606241, + "learning_rate": 1.5645272113920837e-05, + "loss": 0.6136, + "step": 19086 + }, + { + "epoch": 0.3298140724357202, + "grad_norm": 1.1482669976206752, + "learning_rate": 1.564481016226436e-05, + "loss": 0.4162, + "step": 19087 + }, + { + "epoch": 0.3298313519491291, + "grad_norm": 0.9099497560738445, + "learning_rate": 1.5644348192927908e-05, + "loss": 0.5929, + "step": 19088 + }, + { + "epoch": 0.329848631462538, + "grad_norm": 0.6501387229120446, + "learning_rate": 1.5643886205912933e-05, + "loss": 0.4655, + "step": 19089 + }, + { + "epoch": 0.3298659109759469, + "grad_norm": 1.26053289178646, + "learning_rate": 1.5643424201220875e-05, + "loss": 0.4711, + "step": 19090 + }, + { + "epoch": 0.3298831904893558, + "grad_norm": 0.699715789003099, + "learning_rate": 1.564296217885318e-05, + "loss": 0.403, + "step": 19091 + }, + { + "epoch": 0.32990047000276473, + "grad_norm": 1.058802146573416, + "learning_rate": 1.5642500138811303e-05, + "loss": 0.5077, + "step": 19092 + }, + { + "epoch": 0.32991774951617364, + "grad_norm": 0.9003061285651013, + "learning_rate": 1.5642038081096684e-05, + "loss": 0.2904, + "step": 19093 + }, + { + "epoch": 0.32993502902958255, + "grad_norm": 0.4114069006693756, + "learning_rate": 1.5641576005710778e-05, + "loss": 0.5942, + "step": 19094 + }, + { + "epoch": 0.32995230854299146, + "grad_norm": 0.8040722596440721, + "learning_rate": 1.5641113912655023e-05, + "loss": 0.4292, + "step": 19095 + }, + { + "epoch": 0.3299695880564003, + "grad_norm": 0.8963058704614605, + "learning_rate": 1.5640651801930872e-05, + "loss": 0.4645, + "step": 19096 + }, + { + "epoch": 0.3299868675698092, + "grad_norm": 1.40476140481263, + "learning_rate": 1.5640189673539766e-05, + "loss": 0.3636, + "step": 19097 + }, + { + "epoch": 0.3300041470832181, + "grad_norm": 0.502449314617788, + "learning_rate": 1.5639727527483164e-05, + "loss": 0.6947, + "step": 19098 + }, + { + "epoch": 0.33002142659662703, + "grad_norm": 0.888758041359734, + "learning_rate": 1.5639265363762503e-05, + "loss": 0.383, + "step": 19099 + }, + { + "epoch": 0.33003870611003594, + "grad_norm": 1.2571990935302106, + "learning_rate": 1.5638803182379233e-05, + "loss": 0.5856, + "step": 19100 + }, + { + "epoch": 0.33005598562344485, + "grad_norm": 0.5717606832231134, + "learning_rate": 1.5638340983334808e-05, + "loss": 0.7636, + "step": 19101 + }, + { + "epoch": 0.33007326513685376, + "grad_norm": 0.9981564917743633, + "learning_rate": 1.5637878766630667e-05, + "loss": 0.4017, + "step": 19102 + }, + { + "epoch": 0.33009054465026266, + "grad_norm": 1.0858451212874471, + "learning_rate": 1.563741653226826e-05, + "loss": 0.6304, + "step": 19103 + }, + { + "epoch": 0.33010782416367157, + "grad_norm": 1.0307938349023182, + "learning_rate": 1.563695428024904e-05, + "loss": 0.5485, + "step": 19104 + }, + { + "epoch": 0.3301251036770805, + "grad_norm": 1.4312487285738533, + "learning_rate": 1.5636492010574447e-05, + "loss": 0.5058, + "step": 19105 + }, + { + "epoch": 0.33014238319048933, + "grad_norm": 0.8252909533587881, + "learning_rate": 1.5636029723245936e-05, + "loss": 0.2472, + "step": 19106 + }, + { + "epoch": 0.33015966270389824, + "grad_norm": 1.2150868717735788, + "learning_rate": 1.5635567418264954e-05, + "loss": 0.4279, + "step": 19107 + }, + { + "epoch": 0.33017694221730715, + "grad_norm": 0.47997245717543346, + "learning_rate": 1.563510509563294e-05, + "loss": 0.6201, + "step": 19108 + }, + { + "epoch": 0.33019422173071605, + "grad_norm": 0.9610957024271667, + "learning_rate": 1.563464275535135e-05, + "loss": 0.5258, + "step": 19109 + }, + { + "epoch": 0.33021150124412496, + "grad_norm": 1.0339816460183873, + "learning_rate": 1.5634180397421634e-05, + "loss": 0.5554, + "step": 19110 + }, + { + "epoch": 0.33022878075753387, + "grad_norm": 0.5507462011738795, + "learning_rate": 1.5633718021845236e-05, + "loss": 0.6761, + "step": 19111 + }, + { + "epoch": 0.3302460602709428, + "grad_norm": 0.6398018197679627, + "learning_rate": 1.5633255628623603e-05, + "loss": 0.36, + "step": 19112 + }, + { + "epoch": 0.3302633397843517, + "grad_norm": 1.4762526810962135, + "learning_rate": 1.5632793217758182e-05, + "loss": 0.6485, + "step": 19113 + }, + { + "epoch": 0.3302806192977606, + "grad_norm": 0.7126756288553872, + "learning_rate": 1.5632330789250428e-05, + "loss": 0.4479, + "step": 19114 + }, + { + "epoch": 0.3302978988111695, + "grad_norm": 1.4304802183575456, + "learning_rate": 1.563186834310179e-05, + "loss": 0.7509, + "step": 19115 + }, + { + "epoch": 0.3303151783245784, + "grad_norm": 0.8113556485134964, + "learning_rate": 1.563140587931371e-05, + "loss": 0.4208, + "step": 19116 + }, + { + "epoch": 0.33033245783798726, + "grad_norm": 0.836218870239186, + "learning_rate": 1.5630943397887636e-05, + "loss": 0.4867, + "step": 19117 + }, + { + "epoch": 0.33034973735139617, + "grad_norm": 0.9662766888130803, + "learning_rate": 1.563048089882502e-05, + "loss": 0.5128, + "step": 19118 + }, + { + "epoch": 0.3303670168648051, + "grad_norm": 0.8327165218355944, + "learning_rate": 1.5630018382127314e-05, + "loss": 0.3784, + "step": 19119 + }, + { + "epoch": 0.330384296378214, + "grad_norm": 2.1624904856939478, + "learning_rate": 1.5629555847795955e-05, + "loss": 0.4049, + "step": 19120 + }, + { + "epoch": 0.3304015758916229, + "grad_norm": 0.4446979764317114, + "learning_rate": 1.5629093295832406e-05, + "loss": 0.6278, + "step": 19121 + }, + { + "epoch": 0.3304188554050318, + "grad_norm": 0.8519952387573463, + "learning_rate": 1.5628630726238107e-05, + "loss": 0.3137, + "step": 19122 + }, + { + "epoch": 0.3304361349184407, + "grad_norm": 1.1420076538876112, + "learning_rate": 1.5628168139014508e-05, + "loss": 0.4201, + "step": 19123 + }, + { + "epoch": 0.3304534144318496, + "grad_norm": 1.8065191798329698, + "learning_rate": 1.562770553416306e-05, + "loss": 0.4975, + "step": 19124 + }, + { + "epoch": 0.3304706939452585, + "grad_norm": 0.5114189712749281, + "learning_rate": 1.5627242911685207e-05, + "loss": 0.7332, + "step": 19125 + }, + { + "epoch": 0.33048797345866743, + "grad_norm": 0.8677725847101576, + "learning_rate": 1.5626780271582408e-05, + "loss": 0.6665, + "step": 19126 + }, + { + "epoch": 0.3305052529720763, + "grad_norm": 1.1530536096813129, + "learning_rate": 1.56263176138561e-05, + "loss": 0.6727, + "step": 19127 + }, + { + "epoch": 0.3305225324854852, + "grad_norm": 0.7897615532961063, + "learning_rate": 1.562585493850774e-05, + "loss": 0.3639, + "step": 19128 + }, + { + "epoch": 0.3305398119988941, + "grad_norm": 0.4629792652888118, + "learning_rate": 1.5625392245538774e-05, + "loss": 0.6443, + "step": 19129 + }, + { + "epoch": 0.330557091512303, + "grad_norm": 0.7933716346795731, + "learning_rate": 1.5624929534950654e-05, + "loss": 0.5428, + "step": 19130 + }, + { + "epoch": 0.3305743710257119, + "grad_norm": 0.5883733756123808, + "learning_rate": 1.5624466806744824e-05, + "loss": 0.4786, + "step": 19131 + }, + { + "epoch": 0.3305916505391208, + "grad_norm": 0.8412610350869766, + "learning_rate": 1.562400406092274e-05, + "loss": 0.484, + "step": 19132 + }, + { + "epoch": 0.3306089300525297, + "grad_norm": 0.956506673343634, + "learning_rate": 1.5623541297485844e-05, + "loss": 0.6231, + "step": 19133 + }, + { + "epoch": 0.33062620956593863, + "grad_norm": 0.7825786636191421, + "learning_rate": 1.562307851643559e-05, + "loss": 0.7303, + "step": 19134 + }, + { + "epoch": 0.33064348907934754, + "grad_norm": 0.5291418486541268, + "learning_rate": 1.562261571777343e-05, + "loss": 0.74, + "step": 19135 + }, + { + "epoch": 0.33066076859275645, + "grad_norm": 1.3685717751222792, + "learning_rate": 1.5622152901500808e-05, + "loss": 0.4283, + "step": 19136 + }, + { + "epoch": 0.33067804810616536, + "grad_norm": 1.9664682441709878, + "learning_rate": 1.5621690067619175e-05, + "loss": 0.4425, + "step": 19137 + }, + { + "epoch": 0.3306953276195742, + "grad_norm": 0.5644340873619134, + "learning_rate": 1.5621227216129983e-05, + "loss": 0.3342, + "step": 19138 + }, + { + "epoch": 0.3307126071329831, + "grad_norm": 1.1339490225847773, + "learning_rate": 1.5620764347034677e-05, + "loss": 0.5148, + "step": 19139 + }, + { + "epoch": 0.330729886646392, + "grad_norm": 1.0368027781070357, + "learning_rate": 1.5620301460334715e-05, + "loss": 0.3405, + "step": 19140 + }, + { + "epoch": 0.33074716615980093, + "grad_norm": 1.3847193780837586, + "learning_rate": 1.561983855603154e-05, + "loss": 0.6212, + "step": 19141 + }, + { + "epoch": 0.33076444567320984, + "grad_norm": 1.1306585265646982, + "learning_rate": 1.56193756341266e-05, + "loss": 0.4178, + "step": 19142 + }, + { + "epoch": 0.33078172518661875, + "grad_norm": 1.0973891473531099, + "learning_rate": 1.5618912694621352e-05, + "loss": 0.3133, + "step": 19143 + }, + { + "epoch": 0.33079900470002765, + "grad_norm": 1.9002058407902425, + "learning_rate": 1.5618449737517242e-05, + "loss": 0.5454, + "step": 19144 + }, + { + "epoch": 0.33081628421343656, + "grad_norm": 0.415055408502737, + "learning_rate": 1.561798676281572e-05, + "loss": 0.6202, + "step": 19145 + }, + { + "epoch": 0.33083356372684547, + "grad_norm": 1.5407953886731616, + "learning_rate": 1.561752377051824e-05, + "loss": 0.5334, + "step": 19146 + }, + { + "epoch": 0.3308508432402544, + "grad_norm": 0.8722700317224099, + "learning_rate": 1.5617060760626243e-05, + "loss": 0.5355, + "step": 19147 + }, + { + "epoch": 0.33086812275366323, + "grad_norm": 1.435542750106848, + "learning_rate": 1.561659773314119e-05, + "loss": 0.6557, + "step": 19148 + }, + { + "epoch": 0.33088540226707214, + "grad_norm": 1.2292666956746834, + "learning_rate": 1.561613468806452e-05, + "loss": 0.6513, + "step": 19149 + }, + { + "epoch": 0.33090268178048104, + "grad_norm": 0.8859178353434847, + "learning_rate": 1.5615671625397694e-05, + "loss": 0.577, + "step": 19150 + }, + { + "epoch": 0.33091996129388995, + "grad_norm": 1.1829989985080647, + "learning_rate": 1.5615208545142156e-05, + "loss": 0.6377, + "step": 19151 + }, + { + "epoch": 0.33093724080729886, + "grad_norm": 0.8111408932477039, + "learning_rate": 1.5614745447299358e-05, + "loss": 0.4585, + "step": 19152 + }, + { + "epoch": 0.33095452032070777, + "grad_norm": 0.7712167328059875, + "learning_rate": 1.561428233187075e-05, + "loss": 0.3561, + "step": 19153 + }, + { + "epoch": 0.3309717998341167, + "grad_norm": 1.026669228940893, + "learning_rate": 1.5613819198857784e-05, + "loss": 0.6478, + "step": 19154 + }, + { + "epoch": 0.3309890793475256, + "grad_norm": 0.6819601804375394, + "learning_rate": 1.561335604826191e-05, + "loss": 0.4079, + "step": 19155 + }, + { + "epoch": 0.3310063588609345, + "grad_norm": 1.0627356768901493, + "learning_rate": 1.5612892880084575e-05, + "loss": 0.4518, + "step": 19156 + }, + { + "epoch": 0.3310236383743434, + "grad_norm": 0.664751031504422, + "learning_rate": 1.561242969432724e-05, + "loss": 0.7064, + "step": 19157 + }, + { + "epoch": 0.3310409178877523, + "grad_norm": 1.4178582554695502, + "learning_rate": 1.5611966490991344e-05, + "loss": 0.5969, + "step": 19158 + }, + { + "epoch": 0.33105819740116116, + "grad_norm": 0.9236490327584936, + "learning_rate": 1.5611503270078342e-05, + "loss": 0.3273, + "step": 19159 + }, + { + "epoch": 0.33107547691457007, + "grad_norm": 0.9909498635278171, + "learning_rate": 1.5611040031589687e-05, + "loss": 0.5073, + "step": 19160 + }, + { + "epoch": 0.331092756427979, + "grad_norm": 1.1991908147911734, + "learning_rate": 1.5610576775526826e-05, + "loss": 0.4172, + "step": 19161 + }, + { + "epoch": 0.3311100359413879, + "grad_norm": 1.370889756954429, + "learning_rate": 1.5610113501891213e-05, + "loss": 0.5374, + "step": 19162 + }, + { + "epoch": 0.3311273154547968, + "grad_norm": 0.8287096310462464, + "learning_rate": 1.5609650210684298e-05, + "loss": 0.469, + "step": 19163 + }, + { + "epoch": 0.3311445949682057, + "grad_norm": 1.7000801715099758, + "learning_rate": 1.5609186901907534e-05, + "loss": 0.6774, + "step": 19164 + }, + { + "epoch": 0.3311618744816146, + "grad_norm": 0.9763638465920357, + "learning_rate": 1.560872357556237e-05, + "loss": 0.447, + "step": 19165 + }, + { + "epoch": 0.3311791539950235, + "grad_norm": 0.9096351759982685, + "learning_rate": 1.5608260231650255e-05, + "loss": 0.58, + "step": 19166 + }, + { + "epoch": 0.3311964335084324, + "grad_norm": 0.8311531804888616, + "learning_rate": 1.5607796870172646e-05, + "loss": 0.3174, + "step": 19167 + }, + { + "epoch": 0.3312137130218413, + "grad_norm": 0.987548928482391, + "learning_rate": 1.560733349113099e-05, + "loss": 0.4883, + "step": 19168 + }, + { + "epoch": 0.33123099253525023, + "grad_norm": 1.2157090687719831, + "learning_rate": 1.5606870094526738e-05, + "loss": 0.5604, + "step": 19169 + }, + { + "epoch": 0.3312482720486591, + "grad_norm": 0.9268733165255936, + "learning_rate": 1.5606406680361342e-05, + "loss": 0.4029, + "step": 19170 + }, + { + "epoch": 0.331265551562068, + "grad_norm": 1.094013168576485, + "learning_rate": 1.560594324863626e-05, + "loss": 0.4774, + "step": 19171 + }, + { + "epoch": 0.3312828310754769, + "grad_norm": 1.242807693358037, + "learning_rate": 1.560547979935293e-05, + "loss": 0.6562, + "step": 19172 + }, + { + "epoch": 0.3313001105888858, + "grad_norm": 0.9607118805940956, + "learning_rate": 1.5605016332512817e-05, + "loss": 0.6498, + "step": 19173 + }, + { + "epoch": 0.3313173901022947, + "grad_norm": 1.0790287291444356, + "learning_rate": 1.560455284811736e-05, + "loss": 0.4521, + "step": 19174 + }, + { + "epoch": 0.3313346696157036, + "grad_norm": 0.8298805780464601, + "learning_rate": 1.5604089346168025e-05, + "loss": 0.6142, + "step": 19175 + }, + { + "epoch": 0.33135194912911253, + "grad_norm": 0.9400576076453353, + "learning_rate": 1.5603625826666253e-05, + "loss": 0.6359, + "step": 19176 + }, + { + "epoch": 0.33136922864252144, + "grad_norm": 1.0836370338078036, + "learning_rate": 1.5603162289613503e-05, + "loss": 0.4725, + "step": 19177 + }, + { + "epoch": 0.33138650815593035, + "grad_norm": 0.9784912321785165, + "learning_rate": 1.5602698735011217e-05, + "loss": 0.5245, + "step": 19178 + }, + { + "epoch": 0.33140378766933926, + "grad_norm": 0.8878539412126375, + "learning_rate": 1.5602235162860858e-05, + "loss": 0.4349, + "step": 19179 + }, + { + "epoch": 0.3314210671827481, + "grad_norm": 1.0711364555270062, + "learning_rate": 1.5601771573163866e-05, + "loss": 0.3873, + "step": 19180 + }, + { + "epoch": 0.331438346696157, + "grad_norm": 0.6334506112064935, + "learning_rate": 1.5601307965921707e-05, + "loss": 0.4446, + "step": 19181 + }, + { + "epoch": 0.3314556262095659, + "grad_norm": 0.9705459467307131, + "learning_rate": 1.560084434113582e-05, + "loss": 0.4701, + "step": 19182 + }, + { + "epoch": 0.33147290572297483, + "grad_norm": 0.8113910526234928, + "learning_rate": 1.5600380698807666e-05, + "loss": 0.4912, + "step": 19183 + }, + { + "epoch": 0.33149018523638374, + "grad_norm": 0.4209290816699102, + "learning_rate": 1.5599917038938693e-05, + "loss": 0.6319, + "step": 19184 + }, + { + "epoch": 0.33150746474979265, + "grad_norm": 1.5167919442396223, + "learning_rate": 1.5599453361530355e-05, + "loss": 0.6487, + "step": 19185 + }, + { + "epoch": 0.33152474426320155, + "grad_norm": 0.7999694505278304, + "learning_rate": 1.5598989666584105e-05, + "loss": 0.3841, + "step": 19186 + }, + { + "epoch": 0.33154202377661046, + "grad_norm": 0.43200346291540576, + "learning_rate": 1.559852595410139e-05, + "loss": 0.8081, + "step": 19187 + }, + { + "epoch": 0.33155930329001937, + "grad_norm": 1.1793689669336198, + "learning_rate": 1.559806222408367e-05, + "loss": 0.5561, + "step": 19188 + }, + { + "epoch": 0.3315765828034283, + "grad_norm": 1.1796119339681905, + "learning_rate": 1.559759847653239e-05, + "loss": 0.4362, + "step": 19189 + }, + { + "epoch": 0.3315938623168372, + "grad_norm": 0.636688402025416, + "learning_rate": 1.559713471144901e-05, + "loss": 0.5002, + "step": 19190 + }, + { + "epoch": 0.33161114183024604, + "grad_norm": 0.8530926361295076, + "learning_rate": 1.5596670928834975e-05, + "loss": 0.4423, + "step": 19191 + }, + { + "epoch": 0.33162842134365494, + "grad_norm": 0.8583786519884745, + "learning_rate": 1.5596207128691742e-05, + "loss": 0.5345, + "step": 19192 + }, + { + "epoch": 0.33164570085706385, + "grad_norm": 1.1738232018358716, + "learning_rate": 1.5595743311020766e-05, + "loss": 0.4633, + "step": 19193 + }, + { + "epoch": 0.33166298037047276, + "grad_norm": 0.9094225687644817, + "learning_rate": 1.559527947582349e-05, + "loss": 0.3166, + "step": 19194 + }, + { + "epoch": 0.33168025988388167, + "grad_norm": 1.2773415581436214, + "learning_rate": 1.559481562310138e-05, + "loss": 0.6041, + "step": 19195 + }, + { + "epoch": 0.3316975393972906, + "grad_norm": 0.7311204114500057, + "learning_rate": 1.559435175285588e-05, + "loss": 0.5286, + "step": 19196 + }, + { + "epoch": 0.3317148189106995, + "grad_norm": 1.2069247443116704, + "learning_rate": 1.5593887865088445e-05, + "loss": 0.6388, + "step": 19197 + }, + { + "epoch": 0.3317320984241084, + "grad_norm": 0.7984740764636036, + "learning_rate": 1.5593423959800532e-05, + "loss": 0.3837, + "step": 19198 + }, + { + "epoch": 0.3317493779375173, + "grad_norm": 1.2618704704185186, + "learning_rate": 1.5592960036993586e-05, + "loss": 0.3946, + "step": 19199 + }, + { + "epoch": 0.3317666574509262, + "grad_norm": 0.688291268748216, + "learning_rate": 1.5592496096669064e-05, + "loss": 0.5138, + "step": 19200 + }, + { + "epoch": 0.33178393696433506, + "grad_norm": 1.245074701869332, + "learning_rate": 1.5592032138828422e-05, + "loss": 0.5682, + "step": 19201 + }, + { + "epoch": 0.33180121647774397, + "grad_norm": 0.9401754420117411, + "learning_rate": 1.559156816347311e-05, + "loss": 0.6387, + "step": 19202 + }, + { + "epoch": 0.3318184959911529, + "grad_norm": 0.9320592937186969, + "learning_rate": 1.559110417060458e-05, + "loss": 0.4266, + "step": 19203 + }, + { + "epoch": 0.3318357755045618, + "grad_norm": 0.8745068454885451, + "learning_rate": 1.5590640160224287e-05, + "loss": 0.5454, + "step": 19204 + }, + { + "epoch": 0.3318530550179707, + "grad_norm": 0.923433145248076, + "learning_rate": 1.559017613233368e-05, + "loss": 0.4405, + "step": 19205 + }, + { + "epoch": 0.3318703345313796, + "grad_norm": 1.2118590080788756, + "learning_rate": 1.5589712086934224e-05, + "loss": 0.3985, + "step": 19206 + }, + { + "epoch": 0.3318876140447885, + "grad_norm": 1.2107020292978876, + "learning_rate": 1.558924802402736e-05, + "loss": 0.6855, + "step": 19207 + }, + { + "epoch": 0.3319048935581974, + "grad_norm": 0.7150966989806996, + "learning_rate": 1.558878394361455e-05, + "loss": 0.5075, + "step": 19208 + }, + { + "epoch": 0.3319221730716063, + "grad_norm": 1.2162597106429724, + "learning_rate": 1.558831984569724e-05, + "loss": 0.4764, + "step": 19209 + }, + { + "epoch": 0.3319394525850152, + "grad_norm": 1.3062003855604187, + "learning_rate": 1.558785573027689e-05, + "loss": 0.5184, + "step": 19210 + }, + { + "epoch": 0.33195673209842413, + "grad_norm": 0.9752092493407952, + "learning_rate": 1.558739159735495e-05, + "loss": 0.5331, + "step": 19211 + }, + { + "epoch": 0.331974011611833, + "grad_norm": 1.543050606587518, + "learning_rate": 1.5586927446932874e-05, + "loss": 0.6703, + "step": 19212 + }, + { + "epoch": 0.3319912911252419, + "grad_norm": 0.8182788596506821, + "learning_rate": 1.558646327901212e-05, + "loss": 0.5574, + "step": 19213 + }, + { + "epoch": 0.3320085706386508, + "grad_norm": 0.8664712912741664, + "learning_rate": 1.558599909359414e-05, + "loss": 0.468, + "step": 19214 + }, + { + "epoch": 0.3320258501520597, + "grad_norm": 1.1236695063924942, + "learning_rate": 1.5585534890680377e-05, + "loss": 0.5889, + "step": 19215 + }, + { + "epoch": 0.3320431296654686, + "grad_norm": 0.40798018319952195, + "learning_rate": 1.55850706702723e-05, + "loss": 0.5789, + "step": 19216 + }, + { + "epoch": 0.3320604091788775, + "grad_norm": 0.9458114620866779, + "learning_rate": 1.558460643237136e-05, + "loss": 0.5351, + "step": 19217 + }, + { + "epoch": 0.33207768869228643, + "grad_norm": 0.8275827105707461, + "learning_rate": 1.5584142176979003e-05, + "loss": 0.4975, + "step": 19218 + }, + { + "epoch": 0.33209496820569534, + "grad_norm": 0.9923208980018533, + "learning_rate": 1.5583677904096695e-05, + "loss": 0.5362, + "step": 19219 + }, + { + "epoch": 0.33211224771910425, + "grad_norm": 0.4166169127252687, + "learning_rate": 1.558321361372588e-05, + "loss": 0.4889, + "step": 19220 + }, + { + "epoch": 0.33212952723251316, + "grad_norm": 1.003103325641582, + "learning_rate": 1.5582749305868015e-05, + "loss": 0.5406, + "step": 19221 + }, + { + "epoch": 0.332146806745922, + "grad_norm": 0.6733976870190622, + "learning_rate": 1.558228498052455e-05, + "loss": 0.3465, + "step": 19222 + }, + { + "epoch": 0.3321640862593309, + "grad_norm": 1.3044467349555284, + "learning_rate": 1.5581820637696952e-05, + "loss": 0.467, + "step": 19223 + }, + { + "epoch": 0.3321813657727398, + "grad_norm": 1.0306924472923578, + "learning_rate": 1.5581356277386664e-05, + "loss": 0.5223, + "step": 19224 + }, + { + "epoch": 0.33219864528614873, + "grad_norm": 0.8164662560721421, + "learning_rate": 1.5580891899595144e-05, + "loss": 0.5387, + "step": 19225 + }, + { + "epoch": 0.33221592479955764, + "grad_norm": 1.1915712811161232, + "learning_rate": 1.558042750432385e-05, + "loss": 0.4038, + "step": 19226 + }, + { + "epoch": 0.33223320431296655, + "grad_norm": 0.8594341325665422, + "learning_rate": 1.5579963091574228e-05, + "loss": 0.3139, + "step": 19227 + }, + { + "epoch": 0.33225048382637545, + "grad_norm": 1.0116839340181425, + "learning_rate": 1.557949866134774e-05, + "loss": 0.5264, + "step": 19228 + }, + { + "epoch": 0.33226776333978436, + "grad_norm": 1.1772607017431704, + "learning_rate": 1.5579034213645837e-05, + "loss": 0.5425, + "step": 19229 + }, + { + "epoch": 0.33228504285319327, + "grad_norm": 1.3648733793599666, + "learning_rate": 1.5578569748469976e-05, + "loss": 0.3914, + "step": 19230 + }, + { + "epoch": 0.3323023223666022, + "grad_norm": 0.9680986606966857, + "learning_rate": 1.5578105265821612e-05, + "loss": 0.405, + "step": 19231 + }, + { + "epoch": 0.3323196018800111, + "grad_norm": 1.056055941048769, + "learning_rate": 1.5577640765702196e-05, + "loss": 0.3126, + "step": 19232 + }, + { + "epoch": 0.33233688139341994, + "grad_norm": 0.7574707675415306, + "learning_rate": 1.5577176248113187e-05, + "loss": 0.4963, + "step": 19233 + }, + { + "epoch": 0.33235416090682884, + "grad_norm": 0.7032103616386512, + "learning_rate": 1.5576711713056038e-05, + "loss": 0.3906, + "step": 19234 + }, + { + "epoch": 0.33237144042023775, + "grad_norm": 0.8686687069712182, + "learning_rate": 1.5576247160532202e-05, + "loss": 0.2852, + "step": 19235 + }, + { + "epoch": 0.33238871993364666, + "grad_norm": 0.6280011044248328, + "learning_rate": 1.5575782590543136e-05, + "loss": 0.4674, + "step": 19236 + }, + { + "epoch": 0.33240599944705557, + "grad_norm": 1.3029835113884312, + "learning_rate": 1.5575318003090294e-05, + "loss": 0.3246, + "step": 19237 + }, + { + "epoch": 0.3324232789604645, + "grad_norm": 0.9903822862162098, + "learning_rate": 1.5574853398175135e-05, + "loss": 0.6108, + "step": 19238 + }, + { + "epoch": 0.3324405584738734, + "grad_norm": 0.43528523703725686, + "learning_rate": 1.5574388775799115e-05, + "loss": 0.7793, + "step": 19239 + }, + { + "epoch": 0.3324578379872823, + "grad_norm": 0.8884505334048323, + "learning_rate": 1.557392413596368e-05, + "loss": 0.4236, + "step": 19240 + }, + { + "epoch": 0.3324751175006912, + "grad_norm": 0.38650012537278555, + "learning_rate": 1.5573459478670293e-05, + "loss": 0.5366, + "step": 19241 + }, + { + "epoch": 0.3324923970141001, + "grad_norm": 0.8870994311802941, + "learning_rate": 1.557299480392041e-05, + "loss": 0.3754, + "step": 19242 + }, + { + "epoch": 0.332509676527509, + "grad_norm": 0.7102777801193678, + "learning_rate": 1.5572530111715476e-05, + "loss": 0.4891, + "step": 19243 + }, + { + "epoch": 0.33252695604091786, + "grad_norm": 1.3049911170134938, + "learning_rate": 1.5572065402056962e-05, + "loss": 0.556, + "step": 19244 + }, + { + "epoch": 0.3325442355543268, + "grad_norm": 1.8412363960161011, + "learning_rate": 1.557160067494631e-05, + "loss": 0.6026, + "step": 19245 + }, + { + "epoch": 0.3325615150677357, + "grad_norm": 1.5581800529060639, + "learning_rate": 1.5571135930384987e-05, + "loss": 0.6347, + "step": 19246 + }, + { + "epoch": 0.3325787945811446, + "grad_norm": 1.154808171697698, + "learning_rate": 1.557067116837444e-05, + "loss": 0.5657, + "step": 19247 + }, + { + "epoch": 0.3325960740945535, + "grad_norm": 0.8065401067412029, + "learning_rate": 1.5570206388916128e-05, + "loss": 0.4347, + "step": 19248 + }, + { + "epoch": 0.3326133536079624, + "grad_norm": 0.5072867372379724, + "learning_rate": 1.5569741592011506e-05, + "loss": 0.4064, + "step": 19249 + }, + { + "epoch": 0.3326306331213713, + "grad_norm": 1.4613623499421016, + "learning_rate": 1.5569276777662027e-05, + "loss": 0.503, + "step": 19250 + }, + { + "epoch": 0.3326479126347802, + "grad_norm": 1.0514964830582745, + "learning_rate": 1.5568811945869152e-05, + "loss": 0.6875, + "step": 19251 + }, + { + "epoch": 0.3326651921481891, + "grad_norm": 0.8947499123416487, + "learning_rate": 1.5568347096634336e-05, + "loss": 0.4646, + "step": 19252 + }, + { + "epoch": 0.33268247166159803, + "grad_norm": 1.6634296915193245, + "learning_rate": 1.556788222995903e-05, + "loss": 0.9601, + "step": 19253 + }, + { + "epoch": 0.3326997511750069, + "grad_norm": 0.8082128170060721, + "learning_rate": 1.5567417345844704e-05, + "loss": 0.4478, + "step": 19254 + }, + { + "epoch": 0.3327170306884158, + "grad_norm": 0.9800500034950627, + "learning_rate": 1.5566952444292796e-05, + "loss": 0.4942, + "step": 19255 + }, + { + "epoch": 0.3327343102018247, + "grad_norm": 0.8291392968710086, + "learning_rate": 1.5566487525304767e-05, + "loss": 0.6513, + "step": 19256 + }, + { + "epoch": 0.3327515897152336, + "grad_norm": 0.8608078702626996, + "learning_rate": 1.5566022588882082e-05, + "loss": 0.4983, + "step": 19257 + }, + { + "epoch": 0.3327688692286425, + "grad_norm": 0.8382512754798402, + "learning_rate": 1.556555763502619e-05, + "loss": 0.4094, + "step": 19258 + }, + { + "epoch": 0.3327861487420514, + "grad_norm": 1.046142003860321, + "learning_rate": 1.5565092663738547e-05, + "loss": 0.703, + "step": 19259 + }, + { + "epoch": 0.33280342825546033, + "grad_norm": 0.9508518163046513, + "learning_rate": 1.556462767502061e-05, + "loss": 0.4227, + "step": 19260 + }, + { + "epoch": 0.33282070776886924, + "grad_norm": 0.8778309204147102, + "learning_rate": 1.556416266887384e-05, + "loss": 0.4546, + "step": 19261 + }, + { + "epoch": 0.33283798728227815, + "grad_norm": 0.8321890316359947, + "learning_rate": 1.5563697645299684e-05, + "loss": 0.3902, + "step": 19262 + }, + { + "epoch": 0.33285526679568705, + "grad_norm": 0.5928502537444249, + "learning_rate": 1.5563232604299608e-05, + "loss": 0.8146, + "step": 19263 + }, + { + "epoch": 0.33287254630909596, + "grad_norm": 0.5547596048877386, + "learning_rate": 1.5562767545875064e-05, + "loss": 0.4774, + "step": 19264 + }, + { + "epoch": 0.3328898258225048, + "grad_norm": 1.0324182768241077, + "learning_rate": 1.5562302470027507e-05, + "loss": 0.6539, + "step": 19265 + }, + { + "epoch": 0.3329071053359137, + "grad_norm": 1.130306868822214, + "learning_rate": 1.55618373767584e-05, + "loss": 0.3916, + "step": 19266 + }, + { + "epoch": 0.33292438484932263, + "grad_norm": 0.7124788463972657, + "learning_rate": 1.5561372266069193e-05, + "loss": 0.4627, + "step": 19267 + }, + { + "epoch": 0.33294166436273154, + "grad_norm": 2.1930499428650894, + "learning_rate": 1.5560907137961346e-05, + "loss": 0.3998, + "step": 19268 + }, + { + "epoch": 0.33295894387614045, + "grad_norm": 2.3463601997742014, + "learning_rate": 1.5560441992436313e-05, + "loss": 0.5119, + "step": 19269 + }, + { + "epoch": 0.33297622338954935, + "grad_norm": 1.4097308566382951, + "learning_rate": 1.555997682949556e-05, + "loss": 0.4728, + "step": 19270 + }, + { + "epoch": 0.33299350290295826, + "grad_norm": 0.8469718907125274, + "learning_rate": 1.5559511649140534e-05, + "loss": 0.4761, + "step": 19271 + }, + { + "epoch": 0.33301078241636717, + "grad_norm": 0.9466648153117293, + "learning_rate": 1.555904645137269e-05, + "loss": 0.597, + "step": 19272 + }, + { + "epoch": 0.3330280619297761, + "grad_norm": 0.6977995454201592, + "learning_rate": 1.5558581236193496e-05, + "loss": 0.4072, + "step": 19273 + }, + { + "epoch": 0.333045341443185, + "grad_norm": 1.0995002424348954, + "learning_rate": 1.55581160036044e-05, + "loss": 0.4138, + "step": 19274 + }, + { + "epoch": 0.33306262095659384, + "grad_norm": 0.5492568794514437, + "learning_rate": 1.5557650753606865e-05, + "loss": 0.4547, + "step": 19275 + }, + { + "epoch": 0.33307990047000274, + "grad_norm": 1.156689316397144, + "learning_rate": 1.5557185486202343e-05, + "loss": 0.484, + "step": 19276 + }, + { + "epoch": 0.33309717998341165, + "grad_norm": 1.0505223219255182, + "learning_rate": 1.5556720201392293e-05, + "loss": 0.4725, + "step": 19277 + }, + { + "epoch": 0.33311445949682056, + "grad_norm": 1.0317288677415795, + "learning_rate": 1.5556254899178172e-05, + "loss": 0.5293, + "step": 19278 + }, + { + "epoch": 0.33313173901022947, + "grad_norm": 1.0565365048619542, + "learning_rate": 1.555578957956144e-05, + "loss": 0.3637, + "step": 19279 + }, + { + "epoch": 0.3331490185236384, + "grad_norm": 1.238982814790882, + "learning_rate": 1.5555324242543557e-05, + "loss": 0.4901, + "step": 19280 + }, + { + "epoch": 0.3331662980370473, + "grad_norm": 1.0705609684102386, + "learning_rate": 1.5554858888125976e-05, + "loss": 0.3682, + "step": 19281 + }, + { + "epoch": 0.3331835775504562, + "grad_norm": 1.2310644170396945, + "learning_rate": 1.5554393516310148e-05, + "loss": 0.5217, + "step": 19282 + }, + { + "epoch": 0.3332008570638651, + "grad_norm": 1.1274293010013003, + "learning_rate": 1.555392812709754e-05, + "loss": 0.6086, + "step": 19283 + }, + { + "epoch": 0.333218136577274, + "grad_norm": 1.5709287914258496, + "learning_rate": 1.555346272048961e-05, + "loss": 0.653, + "step": 19284 + }, + { + "epoch": 0.3332354160906829, + "grad_norm": 0.9250072139363914, + "learning_rate": 1.555299729648781e-05, + "loss": 0.5057, + "step": 19285 + }, + { + "epoch": 0.33325269560409176, + "grad_norm": 0.9523872791270872, + "learning_rate": 1.55525318550936e-05, + "loss": 0.4465, + "step": 19286 + }, + { + "epoch": 0.33326997511750067, + "grad_norm": 1.0692345422442995, + "learning_rate": 1.555206639630844e-05, + "loss": 0.339, + "step": 19287 + }, + { + "epoch": 0.3332872546309096, + "grad_norm": 1.1498759582027613, + "learning_rate": 1.5551600920133783e-05, + "loss": 0.4395, + "step": 19288 + }, + { + "epoch": 0.3333045341443185, + "grad_norm": 0.8864779342713514, + "learning_rate": 1.5551135426571095e-05, + "loss": 0.7398, + "step": 19289 + }, + { + "epoch": 0.3333218136577274, + "grad_norm": 0.5606545930751426, + "learning_rate": 1.555066991562183e-05, + "loss": 0.4787, + "step": 19290 + }, + { + "epoch": 0.3333390931711363, + "grad_norm": 1.368531661934795, + "learning_rate": 1.555020438728744e-05, + "loss": 0.5259, + "step": 19291 + }, + { + "epoch": 0.3333563726845452, + "grad_norm": 1.7641876712199172, + "learning_rate": 1.554973884156939e-05, + "loss": 0.7071, + "step": 19292 + }, + { + "epoch": 0.3333736521979541, + "grad_norm": 0.7065923278774267, + "learning_rate": 1.5549273278469135e-05, + "loss": 0.5308, + "step": 19293 + }, + { + "epoch": 0.333390931711363, + "grad_norm": 0.626922618678877, + "learning_rate": 1.5548807697988134e-05, + "loss": 0.5138, + "step": 19294 + }, + { + "epoch": 0.33340821122477193, + "grad_norm": 0.8637360450571016, + "learning_rate": 1.5548342100127846e-05, + "loss": 0.4844, + "step": 19295 + }, + { + "epoch": 0.3334254907381808, + "grad_norm": 1.3558937091443775, + "learning_rate": 1.554787648488973e-05, + "loss": 0.4938, + "step": 19296 + }, + { + "epoch": 0.3334427702515897, + "grad_norm": 1.1039970921329434, + "learning_rate": 1.554741085227524e-05, + "loss": 0.4903, + "step": 19297 + }, + { + "epoch": 0.3334600497649986, + "grad_norm": 0.8405724945819245, + "learning_rate": 1.5546945202285845e-05, + "loss": 0.4222, + "step": 19298 + }, + { + "epoch": 0.3334773292784075, + "grad_norm": 0.7858483508726403, + "learning_rate": 1.554647953492299e-05, + "loss": 0.475, + "step": 19299 + }, + { + "epoch": 0.3334946087918164, + "grad_norm": 1.1272119874332254, + "learning_rate": 1.554601385018814e-05, + "loss": 0.3253, + "step": 19300 + }, + { + "epoch": 0.3335118883052253, + "grad_norm": 0.8036772142749423, + "learning_rate": 1.5545548148082755e-05, + "loss": 0.4213, + "step": 19301 + }, + { + "epoch": 0.33352916781863423, + "grad_norm": 1.1833240386310533, + "learning_rate": 1.5545082428608292e-05, + "loss": 0.5528, + "step": 19302 + }, + { + "epoch": 0.33354644733204314, + "grad_norm": 0.8493284349142759, + "learning_rate": 1.5544616691766208e-05, + "loss": 0.7462, + "step": 19303 + }, + { + "epoch": 0.33356372684545205, + "grad_norm": 1.2136283486639758, + "learning_rate": 1.5544150937557963e-05, + "loss": 0.6165, + "step": 19304 + }, + { + "epoch": 0.33358100635886095, + "grad_norm": 0.7815468318470964, + "learning_rate": 1.5543685165985017e-05, + "loss": 0.5183, + "step": 19305 + }, + { + "epoch": 0.33359828587226986, + "grad_norm": 0.8922804957131961, + "learning_rate": 1.5543219377048825e-05, + "loss": 0.4236, + "step": 19306 + }, + { + "epoch": 0.3336155653856787, + "grad_norm": 0.4418403080917693, + "learning_rate": 1.5542753570750852e-05, + "loss": 0.9661, + "step": 19307 + }, + { + "epoch": 0.3336328448990876, + "grad_norm": 0.8960081528774876, + "learning_rate": 1.5542287747092553e-05, + "loss": 0.3897, + "step": 19308 + }, + { + "epoch": 0.33365012441249653, + "grad_norm": 2.040652265786083, + "learning_rate": 1.5541821906075387e-05, + "loss": 0.6387, + "step": 19309 + }, + { + "epoch": 0.33366740392590544, + "grad_norm": 1.1749447351756426, + "learning_rate": 1.5541356047700812e-05, + "loss": 0.3919, + "step": 19310 + }, + { + "epoch": 0.33368468343931434, + "grad_norm": 0.4732755455362933, + "learning_rate": 1.5540890171970292e-05, + "loss": 0.5998, + "step": 19311 + }, + { + "epoch": 0.33370196295272325, + "grad_norm": 0.6940787159305905, + "learning_rate": 1.5540424278885283e-05, + "loss": 0.3886, + "step": 19312 + }, + { + "epoch": 0.33371924246613216, + "grad_norm": 0.939394529787345, + "learning_rate": 1.5539958368447242e-05, + "loss": 0.6377, + "step": 19313 + }, + { + "epoch": 0.33373652197954107, + "grad_norm": 0.8468190749715095, + "learning_rate": 1.553949244065763e-05, + "loss": 0.4361, + "step": 19314 + }, + { + "epoch": 0.33375380149295, + "grad_norm": 0.985603025081494, + "learning_rate": 1.5539026495517905e-05, + "loss": 0.636, + "step": 19315 + }, + { + "epoch": 0.3337710810063589, + "grad_norm": 1.4099026014396745, + "learning_rate": 1.5538560533029534e-05, + "loss": 0.4762, + "step": 19316 + }, + { + "epoch": 0.3337883605197678, + "grad_norm": 0.9226596677414933, + "learning_rate": 1.5538094553193965e-05, + "loss": 0.6118, + "step": 19317 + }, + { + "epoch": 0.33380564003317664, + "grad_norm": 0.8305659122747039, + "learning_rate": 1.5537628556012666e-05, + "loss": 0.5693, + "step": 19318 + }, + { + "epoch": 0.33382291954658555, + "grad_norm": 1.5184790632864913, + "learning_rate": 1.5537162541487093e-05, + "loss": 0.3746, + "step": 19319 + }, + { + "epoch": 0.33384019905999446, + "grad_norm": 0.9156302314831996, + "learning_rate": 1.5536696509618707e-05, + "loss": 0.4753, + "step": 19320 + }, + { + "epoch": 0.33385747857340337, + "grad_norm": 0.9252114573943431, + "learning_rate": 1.553623046040896e-05, + "loss": 0.3276, + "step": 19321 + }, + { + "epoch": 0.3338747580868123, + "grad_norm": 0.8668154094046817, + "learning_rate": 1.553576439385933e-05, + "loss": 0.4248, + "step": 19322 + }, + { + "epoch": 0.3338920376002212, + "grad_norm": 1.1134953287673353, + "learning_rate": 1.5535298309971258e-05, + "loss": 0.4839, + "step": 19323 + }, + { + "epoch": 0.3339093171136301, + "grad_norm": 1.2044121810541022, + "learning_rate": 1.553483220874621e-05, + "loss": 0.4762, + "step": 19324 + }, + { + "epoch": 0.333926596627039, + "grad_norm": 0.971912968985608, + "learning_rate": 1.553436609018565e-05, + "loss": 0.5492, + "step": 19325 + }, + { + "epoch": 0.3339438761404479, + "grad_norm": 0.8956213096951218, + "learning_rate": 1.5533899954291033e-05, + "loss": 0.4057, + "step": 19326 + }, + { + "epoch": 0.3339611556538568, + "grad_norm": 0.5645919420141158, + "learning_rate": 1.5533433801063822e-05, + "loss": 0.9237, + "step": 19327 + }, + { + "epoch": 0.33397843516726566, + "grad_norm": 0.7224193907025012, + "learning_rate": 1.5532967630505473e-05, + "loss": 0.4555, + "step": 19328 + }, + { + "epoch": 0.33399571468067457, + "grad_norm": 0.8029397955970712, + "learning_rate": 1.5532501442617453e-05, + "loss": 0.3791, + "step": 19329 + }, + { + "epoch": 0.3340129941940835, + "grad_norm": 1.0289804811207812, + "learning_rate": 1.5532035237401216e-05, + "loss": 0.7115, + "step": 19330 + }, + { + "epoch": 0.3340302737074924, + "grad_norm": 1.7599849239298748, + "learning_rate": 1.5531569014858226e-05, + "loss": 0.529, + "step": 19331 + }, + { + "epoch": 0.3340475532209013, + "grad_norm": 0.8179292348557173, + "learning_rate": 1.553110277498994e-05, + "loss": 0.5331, + "step": 19332 + }, + { + "epoch": 0.3340648327343102, + "grad_norm": 0.7354646955585586, + "learning_rate": 1.553063651779782e-05, + "loss": 0.3973, + "step": 19333 + }, + { + "epoch": 0.3340821122477191, + "grad_norm": 0.8223665373240934, + "learning_rate": 1.5530170243283323e-05, + "loss": 0.4779, + "step": 19334 + }, + { + "epoch": 0.334099391761128, + "grad_norm": 1.1161261966091836, + "learning_rate": 1.5529703951447918e-05, + "loss": 0.5155, + "step": 19335 + }, + { + "epoch": 0.3341166712745369, + "grad_norm": 0.7527511470324831, + "learning_rate": 1.5529237642293054e-05, + "loss": 0.5129, + "step": 19336 + }, + { + "epoch": 0.33413395078794583, + "grad_norm": 0.802496088682318, + "learning_rate": 1.5528771315820202e-05, + "loss": 0.3666, + "step": 19337 + }, + { + "epoch": 0.33415123030135474, + "grad_norm": 1.6930543169443155, + "learning_rate": 1.5528304972030815e-05, + "loss": 0.7242, + "step": 19338 + }, + { + "epoch": 0.3341685098147636, + "grad_norm": 0.5084610092012286, + "learning_rate": 1.5527838610926363e-05, + "loss": 0.641, + "step": 19339 + }, + { + "epoch": 0.3341857893281725, + "grad_norm": 0.8153461098671928, + "learning_rate": 1.5527372232508294e-05, + "loss": 0.4566, + "step": 19340 + }, + { + "epoch": 0.3342030688415814, + "grad_norm": 0.6912161473223567, + "learning_rate": 1.5526905836778078e-05, + "loss": 0.5387, + "step": 19341 + }, + { + "epoch": 0.3342203483549903, + "grad_norm": 0.9772294182220487, + "learning_rate": 1.552643942373717e-05, + "loss": 0.5387, + "step": 19342 + }, + { + "epoch": 0.3342376278683992, + "grad_norm": 1.369877026874337, + "learning_rate": 1.5525972993387037e-05, + "loss": 0.6186, + "step": 19343 + }, + { + "epoch": 0.33425490738180813, + "grad_norm": 0.9970930415813678, + "learning_rate": 1.5525506545729137e-05, + "loss": 0.5487, + "step": 19344 + }, + { + "epoch": 0.33427218689521704, + "grad_norm": 0.3992230615920003, + "learning_rate": 1.5525040080764928e-05, + "loss": 0.5438, + "step": 19345 + }, + { + "epoch": 0.33428946640862595, + "grad_norm": 1.1640339950653207, + "learning_rate": 1.5524573598495876e-05, + "loss": 0.4461, + "step": 19346 + }, + { + "epoch": 0.33430674592203485, + "grad_norm": 1.0322002128868863, + "learning_rate": 1.5524107098923438e-05, + "loss": 0.473, + "step": 19347 + }, + { + "epoch": 0.33432402543544376, + "grad_norm": 0.6249885781193868, + "learning_rate": 1.5523640582049075e-05, + "loss": 0.53, + "step": 19348 + }, + { + "epoch": 0.3343413049488526, + "grad_norm": 0.3956055468646621, + "learning_rate": 1.552317404787425e-05, + "loss": 0.476, + "step": 19349 + }, + { + "epoch": 0.3343585844622615, + "grad_norm": 1.4081930218859886, + "learning_rate": 1.5522707496400425e-05, + "loss": 0.4881, + "step": 19350 + }, + { + "epoch": 0.33437586397567043, + "grad_norm": 1.1833547659448997, + "learning_rate": 1.5522240927629062e-05, + "loss": 0.5461, + "step": 19351 + }, + { + "epoch": 0.33439314348907934, + "grad_norm": 0.8712842635522742, + "learning_rate": 1.552177434156162e-05, + "loss": 0.5826, + "step": 19352 + }, + { + "epoch": 0.33441042300248824, + "grad_norm": 0.6349296705078168, + "learning_rate": 1.5521307738199563e-05, + "loss": 0.4016, + "step": 19353 + }, + { + "epoch": 0.33442770251589715, + "grad_norm": 1.9030585556273614, + "learning_rate": 1.552084111754435e-05, + "loss": 0.6758, + "step": 19354 + }, + { + "epoch": 0.33444498202930606, + "grad_norm": 1.2585881363044176, + "learning_rate": 1.552037447959744e-05, + "loss": 0.4093, + "step": 19355 + }, + { + "epoch": 0.33446226154271497, + "grad_norm": 0.7636270886603231, + "learning_rate": 1.55199078243603e-05, + "loss": 0.4637, + "step": 19356 + }, + { + "epoch": 0.3344795410561239, + "grad_norm": 0.9008653087739655, + "learning_rate": 1.5519441151834387e-05, + "loss": 0.4606, + "step": 19357 + }, + { + "epoch": 0.3344968205695328, + "grad_norm": 1.1166947175642143, + "learning_rate": 1.551897446202117e-05, + "loss": 0.6452, + "step": 19358 + }, + { + "epoch": 0.3345141000829417, + "grad_norm": 0.9602277036403812, + "learning_rate": 1.5518507754922102e-05, + "loss": 0.4405, + "step": 19359 + }, + { + "epoch": 0.33453137959635054, + "grad_norm": 0.593944739268247, + "learning_rate": 1.5518041030538646e-05, + "loss": 0.3461, + "step": 19360 + }, + { + "epoch": 0.33454865910975945, + "grad_norm": 1.1342785112500648, + "learning_rate": 1.551757428887227e-05, + "loss": 0.397, + "step": 19361 + }, + { + "epoch": 0.33456593862316836, + "grad_norm": 0.7910698938131777, + "learning_rate": 1.551710752992443e-05, + "loss": 0.6126, + "step": 19362 + }, + { + "epoch": 0.33458321813657727, + "grad_norm": 0.6693484067725306, + "learning_rate": 1.5516640753696594e-05, + "loss": 0.4593, + "step": 19363 + }, + { + "epoch": 0.3346004976499862, + "grad_norm": 1.0214588109009932, + "learning_rate": 1.551617396019021e-05, + "loss": 0.4364, + "step": 19364 + }, + { + "epoch": 0.3346177771633951, + "grad_norm": 1.0640124260664139, + "learning_rate": 1.551570714940676e-05, + "loss": 0.4275, + "step": 19365 + }, + { + "epoch": 0.334635056676804, + "grad_norm": 1.3306203849472298, + "learning_rate": 1.551524032134769e-05, + "loss": 0.5616, + "step": 19366 + }, + { + "epoch": 0.3346523361902129, + "grad_norm": 1.019234814281084, + "learning_rate": 1.551477347601447e-05, + "loss": 0.737, + "step": 19367 + }, + { + "epoch": 0.3346696157036218, + "grad_norm": 1.3042871655415713, + "learning_rate": 1.5514306613408563e-05, + "loss": 0.5088, + "step": 19368 + }, + { + "epoch": 0.3346868952170307, + "grad_norm": 0.9162679101303375, + "learning_rate": 1.5513839733531427e-05, + "loss": 0.3616, + "step": 19369 + }, + { + "epoch": 0.33470417473043956, + "grad_norm": 0.716549511870551, + "learning_rate": 1.5513372836384523e-05, + "loss": 0.3179, + "step": 19370 + }, + { + "epoch": 0.33472145424384847, + "grad_norm": 1.1007497471108774, + "learning_rate": 1.5512905921969317e-05, + "loss": 0.4396, + "step": 19371 + }, + { + "epoch": 0.3347387337572574, + "grad_norm": 0.8190404211626802, + "learning_rate": 1.5512438990287274e-05, + "loss": 0.4548, + "step": 19372 + }, + { + "epoch": 0.3347560132706663, + "grad_norm": 0.7651230849689845, + "learning_rate": 1.5511972041339853e-05, + "loss": 0.3943, + "step": 19373 + }, + { + "epoch": 0.3347732927840752, + "grad_norm": 0.8863794656338505, + "learning_rate": 1.5511505075128515e-05, + "loss": 0.5943, + "step": 19374 + }, + { + "epoch": 0.3347905722974841, + "grad_norm": 1.4641368716605425, + "learning_rate": 1.5511038091654723e-05, + "loss": 0.6442, + "step": 19375 + }, + { + "epoch": 0.334807851810893, + "grad_norm": 1.0802431894340934, + "learning_rate": 1.5510571090919945e-05, + "loss": 0.8232, + "step": 19376 + }, + { + "epoch": 0.3348251313243019, + "grad_norm": 1.7113423812383224, + "learning_rate": 1.5510104072925637e-05, + "loss": 0.4535, + "step": 19377 + }, + { + "epoch": 0.3348424108377108, + "grad_norm": 0.7921642829743204, + "learning_rate": 1.5509637037673263e-05, + "loss": 0.3792, + "step": 19378 + }, + { + "epoch": 0.33485969035111973, + "grad_norm": 1.1647374769227288, + "learning_rate": 1.5509169985164288e-05, + "loss": 0.475, + "step": 19379 + }, + { + "epoch": 0.33487696986452864, + "grad_norm": 1.1796842868479187, + "learning_rate": 1.5508702915400173e-05, + "loss": 0.5603, + "step": 19380 + }, + { + "epoch": 0.3348942493779375, + "grad_norm": 0.7124324462163134, + "learning_rate": 1.550823582838238e-05, + "loss": 0.4673, + "step": 19381 + }, + { + "epoch": 0.3349115288913464, + "grad_norm": 0.8305199344657552, + "learning_rate": 1.550776872411238e-05, + "loss": 0.4551, + "step": 19382 + }, + { + "epoch": 0.3349288084047553, + "grad_norm": 0.4398135133104757, + "learning_rate": 1.5507301602591628e-05, + "loss": 0.8067, + "step": 19383 + }, + { + "epoch": 0.3349460879181642, + "grad_norm": 1.0962317242460944, + "learning_rate": 1.5506834463821586e-05, + "loss": 0.5403, + "step": 19384 + }, + { + "epoch": 0.3349633674315731, + "grad_norm": 1.2035046717636075, + "learning_rate": 1.550636730780372e-05, + "loss": 0.4614, + "step": 19385 + }, + { + "epoch": 0.33498064694498203, + "grad_norm": 0.8273420236041031, + "learning_rate": 1.5505900134539495e-05, + "loss": 0.5782, + "step": 19386 + }, + { + "epoch": 0.33499792645839094, + "grad_norm": 0.38128561607026956, + "learning_rate": 1.5505432944030373e-05, + "loss": 0.4893, + "step": 19387 + }, + { + "epoch": 0.33501520597179985, + "grad_norm": 1.2705964259978901, + "learning_rate": 1.550496573627781e-05, + "loss": 0.3499, + "step": 19388 + }, + { + "epoch": 0.33503248548520875, + "grad_norm": 0.5197439450671717, + "learning_rate": 1.5504498511283285e-05, + "loss": 0.8445, + "step": 19389 + }, + { + "epoch": 0.33504976499861766, + "grad_norm": 0.8470360409968785, + "learning_rate": 1.5504031269048246e-05, + "loss": 0.5236, + "step": 19390 + }, + { + "epoch": 0.33506704451202657, + "grad_norm": 0.9539335027796199, + "learning_rate": 1.550356400957417e-05, + "loss": 0.4508, + "step": 19391 + }, + { + "epoch": 0.3350843240254354, + "grad_norm": 0.8954768951277763, + "learning_rate": 1.5503096732862508e-05, + "loss": 0.3601, + "step": 19392 + }, + { + "epoch": 0.33510160353884433, + "grad_norm": 0.7288069721690796, + "learning_rate": 1.550262943891473e-05, + "loss": 0.4756, + "step": 19393 + }, + { + "epoch": 0.33511888305225324, + "grad_norm": 2.498340535221878, + "learning_rate": 1.5502162127732295e-05, + "loss": 0.5558, + "step": 19394 + }, + { + "epoch": 0.33513616256566214, + "grad_norm": 1.0525051120662232, + "learning_rate": 1.5501694799316672e-05, + "loss": 0.623, + "step": 19395 + }, + { + "epoch": 0.33515344207907105, + "grad_norm": 1.4333221171730637, + "learning_rate": 1.550122745366933e-05, + "loss": 0.4182, + "step": 19396 + }, + { + "epoch": 0.33517072159247996, + "grad_norm": 1.2033074526657639, + "learning_rate": 1.5500760090791716e-05, + "loss": 0.6847, + "step": 19397 + }, + { + "epoch": 0.33518800110588887, + "grad_norm": 1.237791041788109, + "learning_rate": 1.5500292710685308e-05, + "loss": 0.3676, + "step": 19398 + }, + { + "epoch": 0.3352052806192978, + "grad_norm": 0.7920223981616938, + "learning_rate": 1.5499825313351564e-05, + "loss": 0.4119, + "step": 19399 + }, + { + "epoch": 0.3352225601327067, + "grad_norm": 0.449317525799259, + "learning_rate": 1.5499357898791948e-05, + "loss": 0.5296, + "step": 19400 + }, + { + "epoch": 0.3352398396461156, + "grad_norm": 1.017819560630844, + "learning_rate": 1.5498890467007927e-05, + "loss": 0.6762, + "step": 19401 + }, + { + "epoch": 0.33525711915952444, + "grad_norm": 0.7007615021820893, + "learning_rate": 1.549842301800096e-05, + "loss": 0.4761, + "step": 19402 + }, + { + "epoch": 0.33527439867293335, + "grad_norm": 1.276721767602126, + "learning_rate": 1.5497955551772515e-05, + "loss": 0.7222, + "step": 19403 + }, + { + "epoch": 0.33529167818634226, + "grad_norm": 1.3775322595795159, + "learning_rate": 1.5497488068324062e-05, + "loss": 0.5638, + "step": 19404 + }, + { + "epoch": 0.33530895769975116, + "grad_norm": 0.5788316743491881, + "learning_rate": 1.5497020567657052e-05, + "loss": 0.4397, + "step": 19405 + }, + { + "epoch": 0.33532623721316007, + "grad_norm": 1.1152027428370712, + "learning_rate": 1.5496553049772955e-05, + "loss": 0.4501, + "step": 19406 + }, + { + "epoch": 0.335343516726569, + "grad_norm": 1.5638889242718752, + "learning_rate": 1.549608551467324e-05, + "loss": 0.5694, + "step": 19407 + }, + { + "epoch": 0.3353607962399779, + "grad_norm": 0.8973572594157926, + "learning_rate": 1.5495617962359367e-05, + "loss": 0.4622, + "step": 19408 + }, + { + "epoch": 0.3353780757533868, + "grad_norm": 0.6237950447091644, + "learning_rate": 1.5495150392832797e-05, + "loss": 0.4295, + "step": 19409 + }, + { + "epoch": 0.3353953552667957, + "grad_norm": 0.727383306467064, + "learning_rate": 1.5494682806095e-05, + "loss": 0.769, + "step": 19410 + }, + { + "epoch": 0.3354126347802046, + "grad_norm": 0.8034496161519542, + "learning_rate": 1.549421520214744e-05, + "loss": 0.5224, + "step": 19411 + }, + { + "epoch": 0.3354299142936135, + "grad_norm": 0.9848362316859218, + "learning_rate": 1.5493747580991583e-05, + "loss": 0.4644, + "step": 19412 + }, + { + "epoch": 0.33544719380702237, + "grad_norm": 0.7799529820518409, + "learning_rate": 1.5493279942628888e-05, + "loss": 0.3738, + "step": 19413 + }, + { + "epoch": 0.3354644733204313, + "grad_norm": 0.8914004806223662, + "learning_rate": 1.5492812287060822e-05, + "loss": 0.4306, + "step": 19414 + }, + { + "epoch": 0.3354817528338402, + "grad_norm": 0.7760326065799539, + "learning_rate": 1.5492344614288854e-05, + "loss": 0.4592, + "step": 19415 + }, + { + "epoch": 0.3354990323472491, + "grad_norm": 0.9938585497503261, + "learning_rate": 1.5491876924314442e-05, + "loss": 0.5334, + "step": 19416 + }, + { + "epoch": 0.335516311860658, + "grad_norm": 1.2011122230328615, + "learning_rate": 1.5491409217139058e-05, + "loss": 0.446, + "step": 19417 + }, + { + "epoch": 0.3355335913740669, + "grad_norm": 0.4527490001212144, + "learning_rate": 1.5490941492764158e-05, + "loss": 0.5577, + "step": 19418 + }, + { + "epoch": 0.3355508708874758, + "grad_norm": 1.0286534658245539, + "learning_rate": 1.5490473751191216e-05, + "loss": 0.4415, + "step": 19419 + }, + { + "epoch": 0.3355681504008847, + "grad_norm": 0.8569702080859581, + "learning_rate": 1.549000599242169e-05, + "loss": 0.4376, + "step": 19420 + }, + { + "epoch": 0.33558542991429363, + "grad_norm": 0.7891825740222712, + "learning_rate": 1.548953821645705e-05, + "loss": 0.5041, + "step": 19421 + }, + { + "epoch": 0.33560270942770254, + "grad_norm": 0.6859671902541457, + "learning_rate": 1.5489070423298754e-05, + "loss": 0.4495, + "step": 19422 + }, + { + "epoch": 0.3356199889411114, + "grad_norm": 1.2187549759552507, + "learning_rate": 1.548860261294828e-05, + "loss": 0.6187, + "step": 19423 + }, + { + "epoch": 0.3356372684545203, + "grad_norm": 0.9375832308937931, + "learning_rate": 1.548813478540708e-05, + "loss": 0.4279, + "step": 19424 + }, + { + "epoch": 0.3356545479679292, + "grad_norm": 0.8233983223611481, + "learning_rate": 1.5487666940676625e-05, + "loss": 0.3436, + "step": 19425 + }, + { + "epoch": 0.3356718274813381, + "grad_norm": 0.9843965542198314, + "learning_rate": 1.5487199078758384e-05, + "loss": 0.5243, + "step": 19426 + }, + { + "epoch": 0.335689106994747, + "grad_norm": 0.7649094087488973, + "learning_rate": 1.5486731199653818e-05, + "loss": 0.3867, + "step": 19427 + }, + { + "epoch": 0.33570638650815593, + "grad_norm": 0.6216673744263632, + "learning_rate": 1.548626330336439e-05, + "loss": 0.2418, + "step": 19428 + }, + { + "epoch": 0.33572366602156484, + "grad_norm": 0.46733786588647974, + "learning_rate": 1.548579538989157e-05, + "loss": 0.7629, + "step": 19429 + }, + { + "epoch": 0.33574094553497374, + "grad_norm": 1.3831515539705537, + "learning_rate": 1.548532745923682e-05, + "loss": 0.2951, + "step": 19430 + }, + { + "epoch": 0.33575822504838265, + "grad_norm": 0.6667590985792019, + "learning_rate": 1.548485951140161e-05, + "loss": 0.5506, + "step": 19431 + }, + { + "epoch": 0.33577550456179156, + "grad_norm": 1.6404054362468914, + "learning_rate": 1.5484391546387404e-05, + "loss": 0.5278, + "step": 19432 + }, + { + "epoch": 0.33579278407520047, + "grad_norm": 0.9720353544907245, + "learning_rate": 1.548392356419566e-05, + "loss": 0.4383, + "step": 19433 + }, + { + "epoch": 0.3358100635886093, + "grad_norm": 0.6488505345669169, + "learning_rate": 1.5483455564827856e-05, + "loss": 0.3521, + "step": 19434 + }, + { + "epoch": 0.3358273431020182, + "grad_norm": 0.8566070269740099, + "learning_rate": 1.5482987548285455e-05, + "loss": 0.4287, + "step": 19435 + }, + { + "epoch": 0.33584462261542714, + "grad_norm": 1.0185273473888197, + "learning_rate": 1.548251951456992e-05, + "loss": 0.4985, + "step": 19436 + }, + { + "epoch": 0.33586190212883604, + "grad_norm": 0.8824155065913328, + "learning_rate": 1.5482051463682713e-05, + "loss": 0.4479, + "step": 19437 + }, + { + "epoch": 0.33587918164224495, + "grad_norm": 0.745993194608821, + "learning_rate": 1.5481583395625306e-05, + "loss": 0.4515, + "step": 19438 + }, + { + "epoch": 0.33589646115565386, + "grad_norm": 0.7781585011333589, + "learning_rate": 1.548111531039916e-05, + "loss": 0.4711, + "step": 19439 + }, + { + "epoch": 0.33591374066906277, + "grad_norm": 0.978310115445103, + "learning_rate": 1.548064720800575e-05, + "loss": 0.3633, + "step": 19440 + }, + { + "epoch": 0.3359310201824717, + "grad_norm": 0.6765297647781578, + "learning_rate": 1.548017908844653e-05, + "loss": 0.3165, + "step": 19441 + }, + { + "epoch": 0.3359482996958806, + "grad_norm": 0.9338170708828276, + "learning_rate": 1.5479710951722978e-05, + "loss": 0.429, + "step": 19442 + }, + { + "epoch": 0.3359655792092895, + "grad_norm": 0.8937369803631064, + "learning_rate": 1.5479242797836556e-05, + "loss": 0.5233, + "step": 19443 + }, + { + "epoch": 0.3359828587226984, + "grad_norm": 0.9834450324513132, + "learning_rate": 1.5478774626788725e-05, + "loss": 0.3392, + "step": 19444 + }, + { + "epoch": 0.33600013823610725, + "grad_norm": 1.0787758692167768, + "learning_rate": 1.5478306438580958e-05, + "loss": 0.4488, + "step": 19445 + }, + { + "epoch": 0.33601741774951616, + "grad_norm": 0.8361707054177443, + "learning_rate": 1.547783823321472e-05, + "loss": 0.3742, + "step": 19446 + }, + { + "epoch": 0.33603469726292506, + "grad_norm": 0.6093595755847364, + "learning_rate": 1.5477370010691473e-05, + "loss": 0.3444, + "step": 19447 + }, + { + "epoch": 0.33605197677633397, + "grad_norm": 1.3164182838968361, + "learning_rate": 1.5476901771012686e-05, + "loss": 0.4934, + "step": 19448 + }, + { + "epoch": 0.3360692562897429, + "grad_norm": 0.8559896865423535, + "learning_rate": 1.547643351417983e-05, + "loss": 0.4521, + "step": 19449 + }, + { + "epoch": 0.3360865358031518, + "grad_norm": 1.0431415203933336, + "learning_rate": 1.5475965240194366e-05, + "loss": 0.6207, + "step": 19450 + }, + { + "epoch": 0.3361038153165607, + "grad_norm": 0.9928330272708061, + "learning_rate": 1.547549694905776e-05, + "loss": 0.5304, + "step": 19451 + }, + { + "epoch": 0.3361210948299696, + "grad_norm": 0.9173719543024459, + "learning_rate": 1.5475028640771487e-05, + "loss": 0.5183, + "step": 19452 + }, + { + "epoch": 0.3361383743433785, + "grad_norm": 0.9886649501265222, + "learning_rate": 1.5474560315337007e-05, + "loss": 0.3235, + "step": 19453 + }, + { + "epoch": 0.3361556538567874, + "grad_norm": 1.1985013962262177, + "learning_rate": 1.5474091972755788e-05, + "loss": 0.588, + "step": 19454 + }, + { + "epoch": 0.33617293337019627, + "grad_norm": 0.8092926632132209, + "learning_rate": 1.5473623613029298e-05, + "loss": 0.4236, + "step": 19455 + }, + { + "epoch": 0.3361902128836052, + "grad_norm": 0.5646036443107315, + "learning_rate": 1.5473155236159e-05, + "loss": 0.4273, + "step": 19456 + }, + { + "epoch": 0.3362074923970141, + "grad_norm": 1.0964600420282176, + "learning_rate": 1.547268684214636e-05, + "loss": 0.6296, + "step": 19457 + }, + { + "epoch": 0.336224771910423, + "grad_norm": 1.0765075966037319, + "learning_rate": 1.5472218430992857e-05, + "loss": 0.4087, + "step": 19458 + }, + { + "epoch": 0.3362420514238319, + "grad_norm": 1.148065988240884, + "learning_rate": 1.5471750002699944e-05, + "loss": 0.5802, + "step": 19459 + }, + { + "epoch": 0.3362593309372408, + "grad_norm": 1.1773778551461738, + "learning_rate": 1.54712815572691e-05, + "loss": 0.5402, + "step": 19460 + }, + { + "epoch": 0.3362766104506497, + "grad_norm": 1.8593120631949902, + "learning_rate": 1.547081309470178e-05, + "loss": 0.6746, + "step": 19461 + }, + { + "epoch": 0.3362938899640586, + "grad_norm": 1.5704714210041033, + "learning_rate": 1.547034461499946e-05, + "loss": 0.5662, + "step": 19462 + }, + { + "epoch": 0.33631116947746753, + "grad_norm": 1.3498031593829722, + "learning_rate": 1.5469876118163603e-05, + "loss": 0.4887, + "step": 19463 + }, + { + "epoch": 0.33632844899087644, + "grad_norm": 0.7378428565092381, + "learning_rate": 1.5469407604195683e-05, + "loss": 0.4779, + "step": 19464 + }, + { + "epoch": 0.33634572850428535, + "grad_norm": 1.0831723773917374, + "learning_rate": 1.546893907309716e-05, + "loss": 0.7191, + "step": 19465 + }, + { + "epoch": 0.3363630080176942, + "grad_norm": 1.1093017131004248, + "learning_rate": 1.546847052486951e-05, + "loss": 0.5392, + "step": 19466 + }, + { + "epoch": 0.3363802875311031, + "grad_norm": 0.8572639846394067, + "learning_rate": 1.5468001959514187e-05, + "loss": 0.5867, + "step": 19467 + }, + { + "epoch": 0.336397567044512, + "grad_norm": 0.7997756702935672, + "learning_rate": 1.546753337703267e-05, + "loss": 0.6074, + "step": 19468 + }, + { + "epoch": 0.3364148465579209, + "grad_norm": 1.2290310261018431, + "learning_rate": 1.546706477742642e-05, + "loss": 0.721, + "step": 19469 + }, + { + "epoch": 0.33643212607132983, + "grad_norm": 0.8800289793896973, + "learning_rate": 1.546659616069691e-05, + "loss": 0.5244, + "step": 19470 + }, + { + "epoch": 0.33644940558473874, + "grad_norm": 1.0451379879262612, + "learning_rate": 1.54661275268456e-05, + "loss": 0.6591, + "step": 19471 + }, + { + "epoch": 0.33646668509814764, + "grad_norm": 0.9468834524186465, + "learning_rate": 1.5465658875873974e-05, + "loss": 0.426, + "step": 19472 + }, + { + "epoch": 0.33648396461155655, + "grad_norm": 0.7463610800667598, + "learning_rate": 1.546519020778348e-05, + "loss": 0.468, + "step": 19473 + }, + { + "epoch": 0.33650124412496546, + "grad_norm": 0.8906158239152722, + "learning_rate": 1.5464721522575596e-05, + "loss": 0.3392, + "step": 19474 + }, + { + "epoch": 0.33651852363837437, + "grad_norm": 1.2522651186319205, + "learning_rate": 1.5464252820251794e-05, + "loss": 0.8089, + "step": 19475 + }, + { + "epoch": 0.3365358031517832, + "grad_norm": 0.9662936893030877, + "learning_rate": 1.5463784100813535e-05, + "loss": 0.5968, + "step": 19476 + }, + { + "epoch": 0.3365530826651921, + "grad_norm": 1.511872034722206, + "learning_rate": 1.5463315364262286e-05, + "loss": 0.4634, + "step": 19477 + }, + { + "epoch": 0.33657036217860103, + "grad_norm": 1.0001460091348382, + "learning_rate": 1.546284661059952e-05, + "loss": 0.4879, + "step": 19478 + }, + { + "epoch": 0.33658764169200994, + "grad_norm": 1.1054514133430258, + "learning_rate": 1.5462377839826703e-05, + "loss": 0.5038, + "step": 19479 + }, + { + "epoch": 0.33660492120541885, + "grad_norm": 1.0616245604229315, + "learning_rate": 1.54619090519453e-05, + "loss": 0.5663, + "step": 19480 + }, + { + "epoch": 0.33662220071882776, + "grad_norm": 0.7091844399472614, + "learning_rate": 1.546144024695679e-05, + "loss": 0.4149, + "step": 19481 + }, + { + "epoch": 0.33663948023223667, + "grad_norm": 1.0302874133090623, + "learning_rate": 1.546097142486263e-05, + "loss": 0.4665, + "step": 19482 + }, + { + "epoch": 0.3366567597456456, + "grad_norm": 1.1745836330701032, + "learning_rate": 1.5460502585664293e-05, + "loss": 0.4113, + "step": 19483 + }, + { + "epoch": 0.3366740392590545, + "grad_norm": 1.1951081291067231, + "learning_rate": 1.5460033729363243e-05, + "loss": 0.4432, + "step": 19484 + }, + { + "epoch": 0.3366913187724634, + "grad_norm": 0.8458228270409219, + "learning_rate": 1.5459564855960957e-05, + "loss": 0.3287, + "step": 19485 + }, + { + "epoch": 0.3367085982858723, + "grad_norm": 0.7953007552024014, + "learning_rate": 1.5459095965458903e-05, + "loss": 0.3256, + "step": 19486 + }, + { + "epoch": 0.33672587779928115, + "grad_norm": 1.008785890063799, + "learning_rate": 1.545862705785854e-05, + "loss": 0.4589, + "step": 19487 + }, + { + "epoch": 0.33674315731269006, + "grad_norm": 0.47293420128626074, + "learning_rate": 1.545815813316134e-05, + "loss": 0.6265, + "step": 19488 + }, + { + "epoch": 0.33676043682609896, + "grad_norm": 0.8792271578758836, + "learning_rate": 1.5457689191368778e-05, + "loss": 0.3274, + "step": 19489 + }, + { + "epoch": 0.33677771633950787, + "grad_norm": 1.0020453795289146, + "learning_rate": 1.5457220232482317e-05, + "loss": 0.3823, + "step": 19490 + }, + { + "epoch": 0.3367949958529168, + "grad_norm": 0.9822702008975661, + "learning_rate": 1.545675125650343e-05, + "loss": 0.5636, + "step": 19491 + }, + { + "epoch": 0.3368122753663257, + "grad_norm": 0.8440946735612062, + "learning_rate": 1.545628226343358e-05, + "loss": 0.3353, + "step": 19492 + }, + { + "epoch": 0.3368295548797346, + "grad_norm": 0.6001596819087452, + "learning_rate": 1.545581325327424e-05, + "loss": 0.5626, + "step": 19493 + }, + { + "epoch": 0.3368468343931435, + "grad_norm": 0.6595805822148929, + "learning_rate": 1.5455344226026877e-05, + "loss": 0.6915, + "step": 19494 + }, + { + "epoch": 0.3368641139065524, + "grad_norm": 0.9402009734425715, + "learning_rate": 1.5454875181692967e-05, + "loss": 0.6691, + "step": 19495 + }, + { + "epoch": 0.3368813934199613, + "grad_norm": 0.7585593865233108, + "learning_rate": 1.5454406120273964e-05, + "loss": 0.3456, + "step": 19496 + }, + { + "epoch": 0.33689867293337017, + "grad_norm": 1.3364504275061277, + "learning_rate": 1.5453937041771358e-05, + "loss": 0.3972, + "step": 19497 + }, + { + "epoch": 0.3369159524467791, + "grad_norm": 1.048546602646986, + "learning_rate": 1.5453467946186597e-05, + "loss": 0.6613, + "step": 19498 + }, + { + "epoch": 0.336933231960188, + "grad_norm": 1.022828030736519, + "learning_rate": 1.5452998833521163e-05, + "loss": 0.4884, + "step": 19499 + }, + { + "epoch": 0.3369505114735969, + "grad_norm": 1.0248222190282061, + "learning_rate": 1.5452529703776526e-05, + "loss": 0.481, + "step": 19500 + }, + { + "epoch": 0.3369677909870058, + "grad_norm": 0.8177010337993843, + "learning_rate": 1.5452060556954146e-05, + "loss": 0.2978, + "step": 19501 + }, + { + "epoch": 0.3369850705004147, + "grad_norm": 0.7475721584540211, + "learning_rate": 1.5451591393055504e-05, + "loss": 0.4646, + "step": 19502 + }, + { + "epoch": 0.3370023500138236, + "grad_norm": 1.3582085770733092, + "learning_rate": 1.5451122212082056e-05, + "loss": 0.4632, + "step": 19503 + }, + { + "epoch": 0.3370196295272325, + "grad_norm": 0.9344147027667208, + "learning_rate": 1.5450653014035288e-05, + "loss": 0.6174, + "step": 19504 + }, + { + "epoch": 0.33703690904064143, + "grad_norm": 1.6262341889719298, + "learning_rate": 1.545018379891665e-05, + "loss": 0.5426, + "step": 19505 + }, + { + "epoch": 0.33705418855405034, + "grad_norm": 1.15110181387395, + "learning_rate": 1.5449714566727628e-05, + "loss": 0.5332, + "step": 19506 + }, + { + "epoch": 0.33707146806745925, + "grad_norm": 1.0525597836660638, + "learning_rate": 1.5449245317469688e-05, + "loss": 0.531, + "step": 19507 + }, + { + "epoch": 0.3370887475808681, + "grad_norm": 1.1743224363083042, + "learning_rate": 1.5448776051144294e-05, + "loss": 0.4866, + "step": 19508 + }, + { + "epoch": 0.337106027094277, + "grad_norm": 0.8580857369788136, + "learning_rate": 1.5448306767752922e-05, + "loss": 0.5807, + "step": 19509 + }, + { + "epoch": 0.3371233066076859, + "grad_norm": 0.7943769166727197, + "learning_rate": 1.5447837467297038e-05, + "loss": 0.2974, + "step": 19510 + }, + { + "epoch": 0.3371405861210948, + "grad_norm": 1.4429458511796442, + "learning_rate": 1.544736814977811e-05, + "loss": 0.5514, + "step": 19511 + }, + { + "epoch": 0.33715786563450373, + "grad_norm": 1.3451498864555735, + "learning_rate": 1.5446898815197614e-05, + "loss": 0.576, + "step": 19512 + }, + { + "epoch": 0.33717514514791264, + "grad_norm": 1.0871844086992843, + "learning_rate": 1.5446429463557016e-05, + "loss": 0.3313, + "step": 19513 + }, + { + "epoch": 0.33719242466132154, + "grad_norm": 1.4039550421516198, + "learning_rate": 1.5445960094857788e-05, + "loss": 0.4263, + "step": 19514 + }, + { + "epoch": 0.33720970417473045, + "grad_norm": 1.0581637058604676, + "learning_rate": 1.54454907091014e-05, + "loss": 0.3305, + "step": 19515 + }, + { + "epoch": 0.33722698368813936, + "grad_norm": 0.9699677975198615, + "learning_rate": 1.544502130628932e-05, + "loss": 0.5291, + "step": 19516 + }, + { + "epoch": 0.33724426320154827, + "grad_norm": 1.5048691999051043, + "learning_rate": 1.544455188642302e-05, + "loss": 0.7013, + "step": 19517 + }, + { + "epoch": 0.3372615427149572, + "grad_norm": 0.723709601165505, + "learning_rate": 1.5444082449503967e-05, + "loss": 0.4486, + "step": 19518 + }, + { + "epoch": 0.337278822228366, + "grad_norm": 0.9638465666736789, + "learning_rate": 1.5443612995533637e-05, + "loss": 0.5232, + "step": 19519 + }, + { + "epoch": 0.33729610174177493, + "grad_norm": 0.8980426372860263, + "learning_rate": 1.5443143524513496e-05, + "loss": 0.566, + "step": 19520 + }, + { + "epoch": 0.33731338125518384, + "grad_norm": 0.781385545577763, + "learning_rate": 1.5442674036445015e-05, + "loss": 0.6169, + "step": 19521 + }, + { + "epoch": 0.33733066076859275, + "grad_norm": 1.2330756522431745, + "learning_rate": 1.5442204531329667e-05, + "loss": 0.5405, + "step": 19522 + }, + { + "epoch": 0.33734794028200166, + "grad_norm": 1.1813182322307247, + "learning_rate": 1.544173500916892e-05, + "loss": 0.4443, + "step": 19523 + }, + { + "epoch": 0.33736521979541056, + "grad_norm": 0.6286258695363585, + "learning_rate": 1.5441265469964245e-05, + "loss": 0.4274, + "step": 19524 + }, + { + "epoch": 0.3373824993088195, + "grad_norm": 1.0298593797517823, + "learning_rate": 1.5440795913717116e-05, + "loss": 0.637, + "step": 19525 + }, + { + "epoch": 0.3373997788222284, + "grad_norm": 0.7501162703304068, + "learning_rate": 1.5440326340428996e-05, + "loss": 0.4888, + "step": 19526 + }, + { + "epoch": 0.3374170583356373, + "grad_norm": 0.6545674161241296, + "learning_rate": 1.5439856750101364e-05, + "loss": 0.6667, + "step": 19527 + }, + { + "epoch": 0.3374343378490462, + "grad_norm": 0.4161420675741692, + "learning_rate": 1.5439387142735688e-05, + "loss": 0.6448, + "step": 19528 + }, + { + "epoch": 0.33745161736245505, + "grad_norm": 0.7887733749930412, + "learning_rate": 1.5438917518333435e-05, + "loss": 0.685, + "step": 19529 + }, + { + "epoch": 0.33746889687586396, + "grad_norm": 1.4593413746643926, + "learning_rate": 1.543844787689608e-05, + "loss": 0.5653, + "step": 19530 + }, + { + "epoch": 0.33748617638927286, + "grad_norm": 0.6918249220111858, + "learning_rate": 1.5437978218425094e-05, + "loss": 0.3958, + "step": 19531 + }, + { + "epoch": 0.33750345590268177, + "grad_norm": 0.536294223966993, + "learning_rate": 1.5437508542921947e-05, + "loss": 0.3018, + "step": 19532 + }, + { + "epoch": 0.3375207354160907, + "grad_norm": 1.00546797856856, + "learning_rate": 1.5437038850388108e-05, + "loss": 0.4341, + "step": 19533 + }, + { + "epoch": 0.3375380149294996, + "grad_norm": 0.7320163447459568, + "learning_rate": 1.5436569140825047e-05, + "loss": 0.5139, + "step": 19534 + }, + { + "epoch": 0.3375552944429085, + "grad_norm": 0.7969756085972233, + "learning_rate": 1.5436099414234246e-05, + "loss": 0.4377, + "step": 19535 + }, + { + "epoch": 0.3375725739563174, + "grad_norm": 0.9284245725132378, + "learning_rate": 1.543562967061716e-05, + "loss": 0.5726, + "step": 19536 + }, + { + "epoch": 0.3375898534697263, + "grad_norm": 1.1604543058341708, + "learning_rate": 1.5435159909975276e-05, + "loss": 0.4969, + "step": 19537 + }, + { + "epoch": 0.3376071329831352, + "grad_norm": 0.9193364690863504, + "learning_rate": 1.543469013231006e-05, + "loss": 0.5026, + "step": 19538 + }, + { + "epoch": 0.3376244124965441, + "grad_norm": 1.0602244343028229, + "learning_rate": 1.5434220337622976e-05, + "loss": 0.2936, + "step": 19539 + }, + { + "epoch": 0.337641692009953, + "grad_norm": 0.9359307920895578, + "learning_rate": 1.54337505259155e-05, + "loss": 0.5448, + "step": 19540 + }, + { + "epoch": 0.3376589715233619, + "grad_norm": 0.8336907643556776, + "learning_rate": 1.5433280697189108e-05, + "loss": 0.5595, + "step": 19541 + }, + { + "epoch": 0.3376762510367708, + "grad_norm": 1.2636400538129935, + "learning_rate": 1.5432810851445267e-05, + "loss": 0.4351, + "step": 19542 + }, + { + "epoch": 0.3376935305501797, + "grad_norm": 0.8500409345042342, + "learning_rate": 1.543234098868545e-05, + "loss": 0.3331, + "step": 19543 + }, + { + "epoch": 0.3377108100635886, + "grad_norm": 1.198804937085327, + "learning_rate": 1.5431871108911125e-05, + "loss": 0.5063, + "step": 19544 + }, + { + "epoch": 0.3377280895769975, + "grad_norm": 1.3050991188263612, + "learning_rate": 1.543140121212377e-05, + "loss": 0.4002, + "step": 19545 + }, + { + "epoch": 0.3377453690904064, + "grad_norm": 1.480137603750765, + "learning_rate": 1.5430931298324855e-05, + "loss": 0.6487, + "step": 19546 + }, + { + "epoch": 0.33776264860381533, + "grad_norm": 0.8307732615447506, + "learning_rate": 1.5430461367515847e-05, + "loss": 0.4351, + "step": 19547 + }, + { + "epoch": 0.33777992811722424, + "grad_norm": 1.3156524814321489, + "learning_rate": 1.5429991419698224e-05, + "loss": 0.5509, + "step": 19548 + }, + { + "epoch": 0.33779720763063315, + "grad_norm": 0.7310682990269307, + "learning_rate": 1.5429521454873456e-05, + "loss": 0.4871, + "step": 19549 + }, + { + "epoch": 0.337814487144042, + "grad_norm": 1.4139523346811664, + "learning_rate": 1.5429051473043012e-05, + "loss": 0.6652, + "step": 19550 + }, + { + "epoch": 0.3378317666574509, + "grad_norm": 1.0602728524615277, + "learning_rate": 1.5428581474208364e-05, + "loss": 0.3077, + "step": 19551 + }, + { + "epoch": 0.3378490461708598, + "grad_norm": 0.8304079604540248, + "learning_rate": 1.542811145837099e-05, + "loss": 0.4584, + "step": 19552 + }, + { + "epoch": 0.3378663256842687, + "grad_norm": 0.7761249670751762, + "learning_rate": 1.5427641425532354e-05, + "loss": 0.4249, + "step": 19553 + }, + { + "epoch": 0.33788360519767763, + "grad_norm": 0.9539425708659727, + "learning_rate": 1.5427171375693933e-05, + "loss": 0.334, + "step": 19554 + }, + { + "epoch": 0.33790088471108654, + "grad_norm": 0.46512614539877534, + "learning_rate": 1.5426701308857203e-05, + "loss": 0.6299, + "step": 19555 + }, + { + "epoch": 0.33791816422449544, + "grad_norm": 0.979599189940788, + "learning_rate": 1.542623122502363e-05, + "loss": 0.5101, + "step": 19556 + }, + { + "epoch": 0.33793544373790435, + "grad_norm": 1.1038783141120883, + "learning_rate": 1.542576112419469e-05, + "loss": 0.4566, + "step": 19557 + }, + { + "epoch": 0.33795272325131326, + "grad_norm": 0.6432963916271278, + "learning_rate": 1.5425291006371854e-05, + "loss": 0.4494, + "step": 19558 + }, + { + "epoch": 0.33797000276472217, + "grad_norm": 0.5436193589166894, + "learning_rate": 1.542482087155659e-05, + "loss": 0.6648, + "step": 19559 + }, + { + "epoch": 0.3379872822781311, + "grad_norm": 0.9261700764473976, + "learning_rate": 1.5424350719750378e-05, + "loss": 0.3587, + "step": 19560 + }, + { + "epoch": 0.3380045617915399, + "grad_norm": 1.3320125569480143, + "learning_rate": 1.5423880550954686e-05, + "loss": 0.5903, + "step": 19561 + }, + { + "epoch": 0.33802184130494883, + "grad_norm": 1.209567261301572, + "learning_rate": 1.5423410365170994e-05, + "loss": 0.422, + "step": 19562 + }, + { + "epoch": 0.33803912081835774, + "grad_norm": 1.3733100762826518, + "learning_rate": 1.542294016240076e-05, + "loss": 0.6703, + "step": 19563 + }, + { + "epoch": 0.33805640033176665, + "grad_norm": 1.756382266376087, + "learning_rate": 1.542246994264547e-05, + "loss": 0.5444, + "step": 19564 + }, + { + "epoch": 0.33807367984517556, + "grad_norm": 1.4394171697970886, + "learning_rate": 1.542199970590659e-05, + "loss": 0.4739, + "step": 19565 + }, + { + "epoch": 0.33809095935858446, + "grad_norm": 0.9447856450241005, + "learning_rate": 1.5421529452185592e-05, + "loss": 0.3094, + "step": 19566 + }, + { + "epoch": 0.33810823887199337, + "grad_norm": 0.8882522566531038, + "learning_rate": 1.5421059181483956e-05, + "loss": 0.5019, + "step": 19567 + }, + { + "epoch": 0.3381255183854023, + "grad_norm": 0.36106828259356566, + "learning_rate": 1.5420588893803152e-05, + "loss": 0.6536, + "step": 19568 + }, + { + "epoch": 0.3381427978988112, + "grad_norm": 0.8188848968164704, + "learning_rate": 1.542011858914465e-05, + "loss": 0.531, + "step": 19569 + }, + { + "epoch": 0.3381600774122201, + "grad_norm": 0.624374995964751, + "learning_rate": 1.5419648267509924e-05, + "loss": 0.4524, + "step": 19570 + }, + { + "epoch": 0.33817735692562895, + "grad_norm": 0.8370438562045223, + "learning_rate": 1.5419177928900447e-05, + "loss": 0.4374, + "step": 19571 + }, + { + "epoch": 0.33819463643903785, + "grad_norm": 0.392928643837109, + "learning_rate": 1.5418707573317694e-05, + "loss": 0.6446, + "step": 19572 + }, + { + "epoch": 0.33821191595244676, + "grad_norm": 1.0864094423288086, + "learning_rate": 1.5418237200763138e-05, + "loss": 0.4023, + "step": 19573 + }, + { + "epoch": 0.33822919546585567, + "grad_norm": 0.96137715445322, + "learning_rate": 1.541776681123825e-05, + "loss": 0.3138, + "step": 19574 + }, + { + "epoch": 0.3382464749792646, + "grad_norm": 0.5326814903532183, + "learning_rate": 1.5417296404744507e-05, + "loss": 0.4874, + "step": 19575 + }, + { + "epoch": 0.3382637544926735, + "grad_norm": 0.525570032201478, + "learning_rate": 1.5416825981283373e-05, + "loss": 0.7126, + "step": 19576 + }, + { + "epoch": 0.3382810340060824, + "grad_norm": 1.091624958533379, + "learning_rate": 1.5416355540856335e-05, + "loss": 0.6952, + "step": 19577 + }, + { + "epoch": 0.3382983135194913, + "grad_norm": 0.5504910053752108, + "learning_rate": 1.541588508346486e-05, + "loss": 0.373, + "step": 19578 + }, + { + "epoch": 0.3383155930329002, + "grad_norm": 1.2295206370705645, + "learning_rate": 1.5415414609110418e-05, + "loss": 0.6765, + "step": 19579 + }, + { + "epoch": 0.3383328725463091, + "grad_norm": 0.7138037589597168, + "learning_rate": 1.5414944117794488e-05, + "loss": 0.3957, + "step": 19580 + }, + { + "epoch": 0.338350152059718, + "grad_norm": 0.8822641529095399, + "learning_rate": 1.541447360951854e-05, + "loss": 0.6597, + "step": 19581 + }, + { + "epoch": 0.3383674315731269, + "grad_norm": 0.830301635916222, + "learning_rate": 1.541400308428405e-05, + "loss": 0.476, + "step": 19582 + }, + { + "epoch": 0.3383847110865358, + "grad_norm": 1.421615401627912, + "learning_rate": 1.5413532542092492e-05, + "loss": 0.5586, + "step": 19583 + }, + { + "epoch": 0.3384019905999447, + "grad_norm": 0.46212008798482684, + "learning_rate": 1.5413061982945336e-05, + "loss": 0.5108, + "step": 19584 + }, + { + "epoch": 0.3384192701133536, + "grad_norm": 1.2847922909950675, + "learning_rate": 1.541259140684406e-05, + "loss": 0.7009, + "step": 19585 + }, + { + "epoch": 0.3384365496267625, + "grad_norm": 1.1337099036110625, + "learning_rate": 1.5412120813790135e-05, + "loss": 0.4708, + "step": 19586 + }, + { + "epoch": 0.3384538291401714, + "grad_norm": 0.5871590160296504, + "learning_rate": 1.5411650203785038e-05, + "loss": 0.3657, + "step": 19587 + }, + { + "epoch": 0.3384711086535803, + "grad_norm": 1.141914423748576, + "learning_rate": 1.5411179576830242e-05, + "loss": 0.4671, + "step": 19588 + }, + { + "epoch": 0.33848838816698923, + "grad_norm": 1.116709587565821, + "learning_rate": 1.541070893292722e-05, + "loss": 0.3943, + "step": 19589 + }, + { + "epoch": 0.33850566768039814, + "grad_norm": 1.4665536401574042, + "learning_rate": 1.5410238272077446e-05, + "loss": 0.6787, + "step": 19590 + }, + { + "epoch": 0.33852294719380704, + "grad_norm": 1.0362187301443875, + "learning_rate": 1.540976759428239e-05, + "loss": 0.4147, + "step": 19591 + }, + { + "epoch": 0.33854022670721595, + "grad_norm": 0.5626043147600839, + "learning_rate": 1.540929689954354e-05, + "loss": 0.4992, + "step": 19592 + }, + { + "epoch": 0.3385575062206248, + "grad_norm": 1.0770713704798174, + "learning_rate": 1.5408826187862355e-05, + "loss": 0.5331, + "step": 19593 + }, + { + "epoch": 0.3385747857340337, + "grad_norm": 0.502653483035869, + "learning_rate": 1.5408355459240315e-05, + "loss": 0.5066, + "step": 19594 + }, + { + "epoch": 0.3385920652474426, + "grad_norm": 0.998932118822669, + "learning_rate": 1.5407884713678897e-05, + "loss": 0.3891, + "step": 19595 + }, + { + "epoch": 0.3386093447608515, + "grad_norm": 0.5540616375425795, + "learning_rate": 1.540741395117957e-05, + "loss": 0.6208, + "step": 19596 + }, + { + "epoch": 0.33862662427426043, + "grad_norm": 1.3203940684448574, + "learning_rate": 1.5406943171743815e-05, + "loss": 0.5106, + "step": 19597 + }, + { + "epoch": 0.33864390378766934, + "grad_norm": 0.9930455743607477, + "learning_rate": 1.5406472375373102e-05, + "loss": 0.5714, + "step": 19598 + }, + { + "epoch": 0.33866118330107825, + "grad_norm": 1.5264278274659147, + "learning_rate": 1.5406001562068906e-05, + "loss": 0.6361, + "step": 19599 + }, + { + "epoch": 0.33867846281448716, + "grad_norm": 1.336379176475601, + "learning_rate": 1.5405530731832704e-05, + "loss": 0.5442, + "step": 19600 + }, + { + "epoch": 0.33869574232789607, + "grad_norm": 1.456136421648971, + "learning_rate": 1.5405059884665968e-05, + "loss": 0.3793, + "step": 19601 + }, + { + "epoch": 0.338713021841305, + "grad_norm": 1.3200127737568121, + "learning_rate": 1.5404589020570173e-05, + "loss": 0.4537, + "step": 19602 + }, + { + "epoch": 0.3387303013547138, + "grad_norm": 1.115107885635727, + "learning_rate": 1.5404118139546794e-05, + "loss": 0.3379, + "step": 19603 + }, + { + "epoch": 0.33874758086812273, + "grad_norm": 0.7902785511161746, + "learning_rate": 1.5403647241597308e-05, + "loss": 0.3766, + "step": 19604 + }, + { + "epoch": 0.33876486038153164, + "grad_norm": 1.1429459537193944, + "learning_rate": 1.5403176326723186e-05, + "loss": 0.5379, + "step": 19605 + }, + { + "epoch": 0.33878213989494055, + "grad_norm": 1.0946006406378903, + "learning_rate": 1.5402705394925908e-05, + "loss": 0.5718, + "step": 19606 + }, + { + "epoch": 0.33879941940834946, + "grad_norm": 1.1202467937152354, + "learning_rate": 1.5402234446206944e-05, + "loss": 0.3409, + "step": 19607 + }, + { + "epoch": 0.33881669892175836, + "grad_norm": 0.7761840812248871, + "learning_rate": 1.5401763480567772e-05, + "loss": 0.3273, + "step": 19608 + }, + { + "epoch": 0.33883397843516727, + "grad_norm": 1.0771069169935987, + "learning_rate": 1.5401292498009866e-05, + "loss": 0.6143, + "step": 19609 + }, + { + "epoch": 0.3388512579485762, + "grad_norm": 0.7353643039260214, + "learning_rate": 1.54008214985347e-05, + "loss": 0.369, + "step": 19610 + }, + { + "epoch": 0.3388685374619851, + "grad_norm": 0.9764140582334547, + "learning_rate": 1.5400350482143753e-05, + "loss": 0.5186, + "step": 19611 + }, + { + "epoch": 0.338885816975394, + "grad_norm": 0.8085543097441517, + "learning_rate": 1.5399879448838494e-05, + "loss": 0.4033, + "step": 19612 + }, + { + "epoch": 0.3389030964888029, + "grad_norm": 0.7673885851491161, + "learning_rate": 1.539940839862041e-05, + "loss": 0.5179, + "step": 19613 + }, + { + "epoch": 0.33892037600221175, + "grad_norm": 0.7936155475066167, + "learning_rate": 1.539893733149096e-05, + "loss": 0.4674, + "step": 19614 + }, + { + "epoch": 0.33893765551562066, + "grad_norm": 1.3041542313131071, + "learning_rate": 1.539846624745163e-05, + "loss": 0.6179, + "step": 19615 + }, + { + "epoch": 0.33895493502902957, + "grad_norm": 1.2173185605444419, + "learning_rate": 1.5397995146503896e-05, + "loss": 0.4567, + "step": 19616 + }, + { + "epoch": 0.3389722145424385, + "grad_norm": 0.8731311408503926, + "learning_rate": 1.5397524028649228e-05, + "loss": 0.5047, + "step": 19617 + }, + { + "epoch": 0.3389894940558474, + "grad_norm": 0.8236688292414389, + "learning_rate": 1.539705289388911e-05, + "loss": 0.4116, + "step": 19618 + }, + { + "epoch": 0.3390067735692563, + "grad_norm": 0.6764917486891241, + "learning_rate": 1.5396581742225006e-05, + "loss": 0.5177, + "step": 19619 + }, + { + "epoch": 0.3390240530826652, + "grad_norm": 0.9969042119930696, + "learning_rate": 1.5396110573658404e-05, + "loss": 0.3037, + "step": 19620 + }, + { + "epoch": 0.3390413325960741, + "grad_norm": 0.6734911087043317, + "learning_rate": 1.539563938819077e-05, + "loss": 0.3556, + "step": 19621 + }, + { + "epoch": 0.339058612109483, + "grad_norm": 0.9475218162825049, + "learning_rate": 1.5395168185823583e-05, + "loss": 0.4515, + "step": 19622 + }, + { + "epoch": 0.3390758916228919, + "grad_norm": 1.008744050558373, + "learning_rate": 1.5394696966558318e-05, + "loss": 0.4738, + "step": 19623 + }, + { + "epoch": 0.3390931711363008, + "grad_norm": 1.061577270893314, + "learning_rate": 1.5394225730396454e-05, + "loss": 0.5613, + "step": 19624 + }, + { + "epoch": 0.3391104506497097, + "grad_norm": 0.6939009600379037, + "learning_rate": 1.5393754477339464e-05, + "loss": 0.529, + "step": 19625 + }, + { + "epoch": 0.3391277301631186, + "grad_norm": 0.8797470155149949, + "learning_rate": 1.5393283207388825e-05, + "loss": 0.4434, + "step": 19626 + }, + { + "epoch": 0.3391450096765275, + "grad_norm": 0.5719255209228638, + "learning_rate": 1.5392811920546015e-05, + "loss": 0.2851, + "step": 19627 + }, + { + "epoch": 0.3391622891899364, + "grad_norm": 0.985443647130421, + "learning_rate": 1.5392340616812507e-05, + "loss": 0.5362, + "step": 19628 + }, + { + "epoch": 0.3391795687033453, + "grad_norm": 0.6698920139466451, + "learning_rate": 1.539186929618978e-05, + "loss": 0.3666, + "step": 19629 + }, + { + "epoch": 0.3391968482167542, + "grad_norm": 0.8012994688815257, + "learning_rate": 1.5391397958679308e-05, + "loss": 0.5251, + "step": 19630 + }, + { + "epoch": 0.33921412773016313, + "grad_norm": 1.0592206424646926, + "learning_rate": 1.5390926604282567e-05, + "loss": 0.3515, + "step": 19631 + }, + { + "epoch": 0.33923140724357204, + "grad_norm": 0.43618934842096907, + "learning_rate": 1.5390455233001035e-05, + "loss": 0.5113, + "step": 19632 + }, + { + "epoch": 0.33924868675698094, + "grad_norm": 1.2280047820443978, + "learning_rate": 1.5389983844836188e-05, + "loss": 0.5648, + "step": 19633 + }, + { + "epoch": 0.33926596627038985, + "grad_norm": 0.9926294993949766, + "learning_rate": 1.53895124397895e-05, + "loss": 0.5981, + "step": 19634 + }, + { + "epoch": 0.3392832457837987, + "grad_norm": 1.1592745286069084, + "learning_rate": 1.538904101786245e-05, + "loss": 0.4771, + "step": 19635 + }, + { + "epoch": 0.3393005252972076, + "grad_norm": 0.682688217390908, + "learning_rate": 1.5388569579056513e-05, + "loss": 0.4937, + "step": 19636 + }, + { + "epoch": 0.3393178048106165, + "grad_norm": 2.463369938285794, + "learning_rate": 1.5388098123373168e-05, + "loss": 0.5619, + "step": 19637 + }, + { + "epoch": 0.3393350843240254, + "grad_norm": 1.2749499232557784, + "learning_rate": 1.5387626650813888e-05, + "loss": 0.5579, + "step": 19638 + }, + { + "epoch": 0.33935236383743433, + "grad_norm": 1.246455639086736, + "learning_rate": 1.5387155161380157e-05, + "loss": 0.4825, + "step": 19639 + }, + { + "epoch": 0.33936964335084324, + "grad_norm": 0.9359072877054334, + "learning_rate": 1.538668365507344e-05, + "loss": 0.4434, + "step": 19640 + }, + { + "epoch": 0.33938692286425215, + "grad_norm": 1.0209540602481226, + "learning_rate": 1.5386212131895225e-05, + "loss": 0.5865, + "step": 19641 + }, + { + "epoch": 0.33940420237766106, + "grad_norm": 0.8758057609846017, + "learning_rate": 1.5385740591846982e-05, + "loss": 0.5199, + "step": 19642 + }, + { + "epoch": 0.33942148189106996, + "grad_norm": 0.8125789867799571, + "learning_rate": 1.5385269034930194e-05, + "loss": 0.5273, + "step": 19643 + }, + { + "epoch": 0.3394387614044789, + "grad_norm": 1.119862147991427, + "learning_rate": 1.538479746114633e-05, + "loss": 0.4365, + "step": 19644 + }, + { + "epoch": 0.3394560409178877, + "grad_norm": 0.9225929235276964, + "learning_rate": 1.5384325870496874e-05, + "loss": 0.4086, + "step": 19645 + }, + { + "epoch": 0.33947332043129663, + "grad_norm": 0.5248272100006511, + "learning_rate": 1.5383854262983297e-05, + "loss": 0.4773, + "step": 19646 + }, + { + "epoch": 0.33949059994470554, + "grad_norm": 1.2179710938990571, + "learning_rate": 1.5383382638607084e-05, + "loss": 0.5897, + "step": 19647 + }, + { + "epoch": 0.33950787945811445, + "grad_norm": 1.080434417056331, + "learning_rate": 1.5382910997369703e-05, + "loss": 0.5592, + "step": 19648 + }, + { + "epoch": 0.33952515897152336, + "grad_norm": 0.675436192511413, + "learning_rate": 1.5382439339272637e-05, + "loss": 0.3595, + "step": 19649 + }, + { + "epoch": 0.33954243848493226, + "grad_norm": 1.1861513737439844, + "learning_rate": 1.5381967664317362e-05, + "loss": 0.2516, + "step": 19650 + }, + { + "epoch": 0.33955971799834117, + "grad_norm": 0.7925708167011297, + "learning_rate": 1.5381495972505355e-05, + "loss": 0.4314, + "step": 19651 + }, + { + "epoch": 0.3395769975117501, + "grad_norm": 0.6862566850789721, + "learning_rate": 1.5381024263838094e-05, + "loss": 0.4301, + "step": 19652 + }, + { + "epoch": 0.339594277025159, + "grad_norm": 1.3819858987893283, + "learning_rate": 1.538055253831706e-05, + "loss": 0.522, + "step": 19653 + }, + { + "epoch": 0.3396115565385679, + "grad_norm": 1.3553119330837238, + "learning_rate": 1.538008079594372e-05, + "loss": 0.3456, + "step": 19654 + }, + { + "epoch": 0.3396288360519768, + "grad_norm": 1.633141935972842, + "learning_rate": 1.537960903671956e-05, + "loss": 0.4951, + "step": 19655 + }, + { + "epoch": 0.33964611556538565, + "grad_norm": 0.653095976377142, + "learning_rate": 1.537913726064606e-05, + "loss": 0.5539, + "step": 19656 + }, + { + "epoch": 0.33966339507879456, + "grad_norm": 1.003592876970413, + "learning_rate": 1.5378665467724685e-05, + "loss": 0.6039, + "step": 19657 + }, + { + "epoch": 0.33968067459220347, + "grad_norm": 1.1266929788979625, + "learning_rate": 1.5378193657956927e-05, + "loss": 0.5548, + "step": 19658 + }, + { + "epoch": 0.3396979541056124, + "grad_norm": 1.1245391020977435, + "learning_rate": 1.5377721831344258e-05, + "loss": 0.4576, + "step": 19659 + }, + { + "epoch": 0.3397152336190213, + "grad_norm": 0.7348446354985843, + "learning_rate": 1.5377249987888153e-05, + "loss": 0.2825, + "step": 19660 + }, + { + "epoch": 0.3397325131324302, + "grad_norm": 0.8551826873436609, + "learning_rate": 1.5376778127590096e-05, + "loss": 0.6973, + "step": 19661 + }, + { + "epoch": 0.3397497926458391, + "grad_norm": 0.9663736669703483, + "learning_rate": 1.5376306250451552e-05, + "loss": 0.5837, + "step": 19662 + }, + { + "epoch": 0.339767072159248, + "grad_norm": 0.7081842344581398, + "learning_rate": 1.5375834356474018e-05, + "loss": 0.7018, + "step": 19663 + }, + { + "epoch": 0.3397843516726569, + "grad_norm": 0.9132999637024706, + "learning_rate": 1.537536244565896e-05, + "loss": 0.29, + "step": 19664 + }, + { + "epoch": 0.3398016311860658, + "grad_norm": 0.9403273688203373, + "learning_rate": 1.5374890518007854e-05, + "loss": 0.4456, + "step": 19665 + }, + { + "epoch": 0.33981891069947473, + "grad_norm": 1.043537917078634, + "learning_rate": 1.537441857352219e-05, + "loss": 0.3877, + "step": 19666 + }, + { + "epoch": 0.3398361902128836, + "grad_norm": 1.9860776543077001, + "learning_rate": 1.5373946612203435e-05, + "loss": 0.5438, + "step": 19667 + }, + { + "epoch": 0.3398534697262925, + "grad_norm": 0.9914353753790754, + "learning_rate": 1.537347463405307e-05, + "loss": 0.6408, + "step": 19668 + }, + { + "epoch": 0.3398707492397014, + "grad_norm": 0.7500371795539813, + "learning_rate": 1.537300263907257e-05, + "loss": 0.7205, + "step": 19669 + }, + { + "epoch": 0.3398880287531103, + "grad_norm": 0.7999697871841812, + "learning_rate": 1.5372530627263423e-05, + "loss": 0.596, + "step": 19670 + }, + { + "epoch": 0.3399053082665192, + "grad_norm": 0.9375702822006962, + "learning_rate": 1.53720585986271e-05, + "loss": 0.3445, + "step": 19671 + }, + { + "epoch": 0.3399225877799281, + "grad_norm": 0.5810951029464233, + "learning_rate": 1.5371586553165087e-05, + "loss": 0.4268, + "step": 19672 + }, + { + "epoch": 0.33993986729333703, + "grad_norm": 1.1439147707921962, + "learning_rate": 1.537111449087885e-05, + "loss": 0.6303, + "step": 19673 + }, + { + "epoch": 0.33995714680674594, + "grad_norm": 0.812188138470762, + "learning_rate": 1.5370642411769875e-05, + "loss": 0.7073, + "step": 19674 + }, + { + "epoch": 0.33997442632015484, + "grad_norm": 1.7149633862938842, + "learning_rate": 1.5370170315839638e-05, + "loss": 0.4938, + "step": 19675 + }, + { + "epoch": 0.33999170583356375, + "grad_norm": 1.7586058761258838, + "learning_rate": 1.536969820308962e-05, + "loss": 0.5619, + "step": 19676 + }, + { + "epoch": 0.3400089853469726, + "grad_norm": 1.4489675400868665, + "learning_rate": 1.53692260735213e-05, + "loss": 0.3879, + "step": 19677 + }, + { + "epoch": 0.3400262648603815, + "grad_norm": 0.8793666299802385, + "learning_rate": 1.536875392713616e-05, + "loss": 0.4015, + "step": 19678 + }, + { + "epoch": 0.3400435443737904, + "grad_norm": 1.1248789615810673, + "learning_rate": 1.5368281763935672e-05, + "loss": 0.4543, + "step": 19679 + }, + { + "epoch": 0.3400608238871993, + "grad_norm": 0.9716492168959663, + "learning_rate": 1.5367809583921315e-05, + "loss": 0.4925, + "step": 19680 + }, + { + "epoch": 0.34007810340060823, + "grad_norm": 0.9745905422245527, + "learning_rate": 1.5367337387094574e-05, + "loss": 0.5021, + "step": 19681 + }, + { + "epoch": 0.34009538291401714, + "grad_norm": 0.7507739849952759, + "learning_rate": 1.5366865173456923e-05, + "loss": 0.5848, + "step": 19682 + }, + { + "epoch": 0.34011266242742605, + "grad_norm": 1.6771025214091713, + "learning_rate": 1.536639294300984e-05, + "loss": 0.7625, + "step": 19683 + }, + { + "epoch": 0.34012994194083496, + "grad_norm": 1.1143248225408613, + "learning_rate": 1.5365920695754814e-05, + "loss": 0.3759, + "step": 19684 + }, + { + "epoch": 0.34014722145424386, + "grad_norm": 1.1936446000304202, + "learning_rate": 1.5365448431693308e-05, + "loss": 0.4926, + "step": 19685 + }, + { + "epoch": 0.34016450096765277, + "grad_norm": 1.5781078146033933, + "learning_rate": 1.5364976150826817e-05, + "loss": 0.6072, + "step": 19686 + }, + { + "epoch": 0.3401817804810617, + "grad_norm": 1.2790606494173546, + "learning_rate": 1.5364503853156806e-05, + "loss": 0.4628, + "step": 19687 + }, + { + "epoch": 0.34019905999447053, + "grad_norm": 1.0476687945183445, + "learning_rate": 1.5364031538684764e-05, + "loss": 0.4581, + "step": 19688 + }, + { + "epoch": 0.34021633950787944, + "grad_norm": 1.582127395003925, + "learning_rate": 1.536355920741217e-05, + "loss": 0.5673, + "step": 19689 + }, + { + "epoch": 0.34023361902128835, + "grad_norm": 0.7673787931574192, + "learning_rate": 1.5363086859340496e-05, + "loss": 0.4637, + "step": 19690 + }, + { + "epoch": 0.34025089853469725, + "grad_norm": 1.7958191458770163, + "learning_rate": 1.536261449447123e-05, + "loss": 0.491, + "step": 19691 + }, + { + "epoch": 0.34026817804810616, + "grad_norm": 0.5962278274841464, + "learning_rate": 1.536214211280585e-05, + "loss": 0.4594, + "step": 19692 + }, + { + "epoch": 0.34028545756151507, + "grad_norm": 1.7045364996835437, + "learning_rate": 1.536166971434583e-05, + "loss": 0.5804, + "step": 19693 + }, + { + "epoch": 0.340302737074924, + "grad_norm": 0.6637325882731144, + "learning_rate": 1.5361197299092653e-05, + "loss": 0.3422, + "step": 19694 + }, + { + "epoch": 0.3403200165883329, + "grad_norm": 1.2351774582878945, + "learning_rate": 1.53607248670478e-05, + "loss": 0.4455, + "step": 19695 + }, + { + "epoch": 0.3403372961017418, + "grad_norm": 0.8322036470552109, + "learning_rate": 1.5360252418212748e-05, + "loss": 0.5714, + "step": 19696 + }, + { + "epoch": 0.3403545756151507, + "grad_norm": 1.1432201517686709, + "learning_rate": 1.5359779952588974e-05, + "loss": 0.6212, + "step": 19697 + }, + { + "epoch": 0.34037185512855955, + "grad_norm": 0.7820031843469284, + "learning_rate": 1.535930747017797e-05, + "loss": 0.3528, + "step": 19698 + }, + { + "epoch": 0.34038913464196846, + "grad_norm": 1.0019421280630612, + "learning_rate": 1.5358834970981205e-05, + "loss": 0.486, + "step": 19699 + }, + { + "epoch": 0.34040641415537737, + "grad_norm": 1.7025773080735944, + "learning_rate": 1.5358362455000158e-05, + "loss": 0.5118, + "step": 19700 + }, + { + "epoch": 0.3404236936687863, + "grad_norm": 0.9582393223188034, + "learning_rate": 1.5357889922236314e-05, + "loss": 0.4363, + "step": 19701 + }, + { + "epoch": 0.3404409731821952, + "grad_norm": 1.9627802617675674, + "learning_rate": 1.5357417372691156e-05, + "loss": 0.43, + "step": 19702 + }, + { + "epoch": 0.3404582526956041, + "grad_norm": 0.4318668261818711, + "learning_rate": 1.535694480636616e-05, + "loss": 0.56, + "step": 19703 + }, + { + "epoch": 0.340475532209013, + "grad_norm": 1.527131609347791, + "learning_rate": 1.53564722232628e-05, + "loss": 0.5969, + "step": 19704 + }, + { + "epoch": 0.3404928117224219, + "grad_norm": 0.9243322313173624, + "learning_rate": 1.535599962338256e-05, + "loss": 0.3935, + "step": 19705 + }, + { + "epoch": 0.3405100912358308, + "grad_norm": 1.347712690246987, + "learning_rate": 1.535552700672693e-05, + "loss": 0.476, + "step": 19706 + }, + { + "epoch": 0.3405273707492397, + "grad_norm": 1.1028625262488532, + "learning_rate": 1.535505437329738e-05, + "loss": 0.6666, + "step": 19707 + }, + { + "epoch": 0.34054465026264863, + "grad_norm": 1.344132533008428, + "learning_rate": 1.535458172309539e-05, + "loss": 0.4035, + "step": 19708 + }, + { + "epoch": 0.3405619297760575, + "grad_norm": 0.9050456047843507, + "learning_rate": 1.5354109056122445e-05, + "loss": 0.3752, + "step": 19709 + }, + { + "epoch": 0.3405792092894664, + "grad_norm": 0.9965970183725157, + "learning_rate": 1.5353636372380024e-05, + "loss": 0.5051, + "step": 19710 + }, + { + "epoch": 0.3405964888028753, + "grad_norm": 1.4814646163851697, + "learning_rate": 1.5353163671869603e-05, + "loss": 0.5105, + "step": 19711 + }, + { + "epoch": 0.3406137683162842, + "grad_norm": 0.9939951395654113, + "learning_rate": 1.535269095459267e-05, + "loss": 0.525, + "step": 19712 + }, + { + "epoch": 0.3406310478296931, + "grad_norm": 0.7503490705661817, + "learning_rate": 1.5352218220550704e-05, + "loss": 0.4629, + "step": 19713 + }, + { + "epoch": 0.340648327343102, + "grad_norm": 0.5012453737710373, + "learning_rate": 1.535174546974518e-05, + "loss": 0.7574, + "step": 19714 + }, + { + "epoch": 0.3406656068565109, + "grad_norm": 1.2088146616126445, + "learning_rate": 1.5351272702177587e-05, + "loss": 0.3818, + "step": 19715 + }, + { + "epoch": 0.34068288636991984, + "grad_norm": 0.7252678482477747, + "learning_rate": 1.5350799917849397e-05, + "loss": 0.4003, + "step": 19716 + }, + { + "epoch": 0.34070016588332874, + "grad_norm": 0.9181651471502468, + "learning_rate": 1.5350327116762094e-05, + "loss": 0.5179, + "step": 19717 + }, + { + "epoch": 0.34071744539673765, + "grad_norm": 1.1454120436109239, + "learning_rate": 1.5349854298917162e-05, + "loss": 0.4144, + "step": 19718 + }, + { + "epoch": 0.3407347249101465, + "grad_norm": 0.6724149723256392, + "learning_rate": 1.5349381464316083e-05, + "loss": 0.3086, + "step": 19719 + }, + { + "epoch": 0.3407520044235554, + "grad_norm": 1.0994631815759408, + "learning_rate": 1.534890861296033e-05, + "loss": 0.5467, + "step": 19720 + }, + { + "epoch": 0.3407692839369643, + "grad_norm": 0.5978351048347205, + "learning_rate": 1.534843574485139e-05, + "loss": 0.7233, + "step": 19721 + }, + { + "epoch": 0.3407865634503732, + "grad_norm": 0.8794514152517731, + "learning_rate": 1.5347962859990744e-05, + "loss": 0.5433, + "step": 19722 + }, + { + "epoch": 0.34080384296378213, + "grad_norm": 1.2269717163855591, + "learning_rate": 1.534748995837987e-05, + "loss": 0.4042, + "step": 19723 + }, + { + "epoch": 0.34082112247719104, + "grad_norm": 0.7776865612198413, + "learning_rate": 1.5347017040020254e-05, + "loss": 0.4781, + "step": 19724 + }, + { + "epoch": 0.34083840199059995, + "grad_norm": 0.7870972289759083, + "learning_rate": 1.534654410491337e-05, + "loss": 0.4249, + "step": 19725 + }, + { + "epoch": 0.34085568150400886, + "grad_norm": 1.0228209090949691, + "learning_rate": 1.5346071153060706e-05, + "loss": 0.5513, + "step": 19726 + }, + { + "epoch": 0.34087296101741776, + "grad_norm": 0.7077950983236252, + "learning_rate": 1.534559818446374e-05, + "loss": 0.5411, + "step": 19727 + }, + { + "epoch": 0.34089024053082667, + "grad_norm": 1.252189332663816, + "learning_rate": 1.5345125199123956e-05, + "loss": 0.4607, + "step": 19728 + }, + { + "epoch": 0.3409075200442356, + "grad_norm": 1.1886673094268225, + "learning_rate": 1.5344652197042833e-05, + "loss": 0.4362, + "step": 19729 + }, + { + "epoch": 0.34092479955764443, + "grad_norm": 0.8218121177449215, + "learning_rate": 1.5344179178221855e-05, + "loss": 0.4226, + "step": 19730 + }, + { + "epoch": 0.34094207907105334, + "grad_norm": 0.6560900410845544, + "learning_rate": 1.5343706142662496e-05, + "loss": 0.2364, + "step": 19731 + }, + { + "epoch": 0.34095935858446225, + "grad_norm": 0.8750634538620935, + "learning_rate": 1.5343233090366248e-05, + "loss": 0.3971, + "step": 19732 + }, + { + "epoch": 0.34097663809787115, + "grad_norm": 1.1837389953472062, + "learning_rate": 1.5342760021334587e-05, + "loss": 0.5016, + "step": 19733 + }, + { + "epoch": 0.34099391761128006, + "grad_norm": 1.1322864030647057, + "learning_rate": 1.5342286935568994e-05, + "loss": 0.5016, + "step": 19734 + }, + { + "epoch": 0.34101119712468897, + "grad_norm": 1.661630547209475, + "learning_rate": 1.5341813833070956e-05, + "loss": 0.468, + "step": 19735 + }, + { + "epoch": 0.3410284766380979, + "grad_norm": 1.359037820933317, + "learning_rate": 1.5341340713841947e-05, + "loss": 0.6097, + "step": 19736 + }, + { + "epoch": 0.3410457561515068, + "grad_norm": 0.8680409388159164, + "learning_rate": 1.5340867577883455e-05, + "loss": 0.4245, + "step": 19737 + }, + { + "epoch": 0.3410630356649157, + "grad_norm": 1.168041291351976, + "learning_rate": 1.534039442519696e-05, + "loss": 0.5863, + "step": 19738 + }, + { + "epoch": 0.3410803151783246, + "grad_norm": 2.26208848402289, + "learning_rate": 1.5339921255783943e-05, + "loss": 0.5065, + "step": 19739 + }, + { + "epoch": 0.3410975946917335, + "grad_norm": 1.294353274067216, + "learning_rate": 1.5339448069645887e-05, + "loss": 0.5505, + "step": 19740 + }, + { + "epoch": 0.34111487420514236, + "grad_norm": 0.9251664491846059, + "learning_rate": 1.5338974866784274e-05, + "loss": 0.4653, + "step": 19741 + }, + { + "epoch": 0.34113215371855127, + "grad_norm": 1.1149020558842968, + "learning_rate": 1.5338501647200587e-05, + "loss": 0.3652, + "step": 19742 + }, + { + "epoch": 0.3411494332319602, + "grad_norm": 0.88792709908228, + "learning_rate": 1.53380284108963e-05, + "loss": 0.5988, + "step": 19743 + }, + { + "epoch": 0.3411667127453691, + "grad_norm": 1.7353123380880962, + "learning_rate": 1.5337555157872915e-05, + "loss": 0.4739, + "step": 19744 + }, + { + "epoch": 0.341183992258778, + "grad_norm": 1.0501548881527285, + "learning_rate": 1.5337081888131894e-05, + "loss": 0.2909, + "step": 19745 + }, + { + "epoch": 0.3412012717721869, + "grad_norm": 1.0093698488791696, + "learning_rate": 1.5336608601674726e-05, + "loss": 0.7025, + "step": 19746 + }, + { + "epoch": 0.3412185512855958, + "grad_norm": 1.7036117738690526, + "learning_rate": 1.5336135298502897e-05, + "loss": 0.4778, + "step": 19747 + }, + { + "epoch": 0.3412358307990047, + "grad_norm": 1.281324376608407, + "learning_rate": 1.5335661978617885e-05, + "loss": 0.5341, + "step": 19748 + }, + { + "epoch": 0.3412531103124136, + "grad_norm": 1.8319268319285185, + "learning_rate": 1.5335188642021174e-05, + "loss": 0.5115, + "step": 19749 + }, + { + "epoch": 0.34127038982582253, + "grad_norm": 1.2207339294491502, + "learning_rate": 1.533471528871425e-05, + "loss": 0.4632, + "step": 19750 + }, + { + "epoch": 0.3412876693392314, + "grad_norm": 1.5149759312018902, + "learning_rate": 1.533424191869859e-05, + "loss": 0.5494, + "step": 19751 + }, + { + "epoch": 0.3413049488526403, + "grad_norm": 0.9656292276676256, + "learning_rate": 1.533376853197568e-05, + "loss": 0.4741, + "step": 19752 + }, + { + "epoch": 0.3413222283660492, + "grad_norm": 0.8258623123484053, + "learning_rate": 1.5333295128546998e-05, + "loss": 0.5792, + "step": 19753 + }, + { + "epoch": 0.3413395078794581, + "grad_norm": 2.2989964496779836, + "learning_rate": 1.5332821708414034e-05, + "loss": 0.4571, + "step": 19754 + }, + { + "epoch": 0.341356787392867, + "grad_norm": 1.0571156205979169, + "learning_rate": 1.5332348271578265e-05, + "loss": 0.4157, + "step": 19755 + }, + { + "epoch": 0.3413740669062759, + "grad_norm": 0.9485980220261045, + "learning_rate": 1.5331874818041178e-05, + "loss": 0.4424, + "step": 19756 + }, + { + "epoch": 0.3413913464196848, + "grad_norm": 0.5749909420340269, + "learning_rate": 1.5331401347804254e-05, + "loss": 0.6963, + "step": 19757 + }, + { + "epoch": 0.34140862593309373, + "grad_norm": 0.7726796092682342, + "learning_rate": 1.5330927860868973e-05, + "loss": 0.4095, + "step": 19758 + }, + { + "epoch": 0.34142590544650264, + "grad_norm": 0.795949403831066, + "learning_rate": 1.5330454357236822e-05, + "loss": 0.4756, + "step": 19759 + }, + { + "epoch": 0.34144318495991155, + "grad_norm": 0.7998580325873392, + "learning_rate": 1.5329980836909283e-05, + "loss": 0.5694, + "step": 19760 + }, + { + "epoch": 0.34146046447332046, + "grad_norm": 1.0950086935850505, + "learning_rate": 1.5329507299887843e-05, + "loss": 0.5109, + "step": 19761 + }, + { + "epoch": 0.3414777439867293, + "grad_norm": 0.7704466110337728, + "learning_rate": 1.5329033746173975e-05, + "loss": 0.5267, + "step": 19762 + }, + { + "epoch": 0.3414950235001382, + "grad_norm": 0.8204733498217346, + "learning_rate": 1.532856017576917e-05, + "loss": 0.5558, + "step": 19763 + }, + { + "epoch": 0.3415123030135471, + "grad_norm": 1.0182821495585674, + "learning_rate": 1.5328086588674913e-05, + "loss": 0.416, + "step": 19764 + }, + { + "epoch": 0.34152958252695603, + "grad_norm": 1.0182700417208979, + "learning_rate": 1.532761298489268e-05, + "loss": 0.4752, + "step": 19765 + }, + { + "epoch": 0.34154686204036494, + "grad_norm": 1.3463575735298725, + "learning_rate": 1.532713936442396e-05, + "loss": 0.5576, + "step": 19766 + }, + { + "epoch": 0.34156414155377385, + "grad_norm": 1.0206170363337335, + "learning_rate": 1.5326665727270235e-05, + "loss": 0.4433, + "step": 19767 + }, + { + "epoch": 0.34158142106718276, + "grad_norm": 0.6883052182232602, + "learning_rate": 1.5326192073432987e-05, + "loss": 0.4285, + "step": 19768 + }, + { + "epoch": 0.34159870058059166, + "grad_norm": 1.1123447715275958, + "learning_rate": 1.53257184029137e-05, + "loss": 0.5018, + "step": 19769 + }, + { + "epoch": 0.34161598009400057, + "grad_norm": 1.1642807296040643, + "learning_rate": 1.532524471571386e-05, + "loss": 0.4498, + "step": 19770 + }, + { + "epoch": 0.3416332596074095, + "grad_norm": 1.205907863383448, + "learning_rate": 1.5324771011834948e-05, + "loss": 0.6041, + "step": 19771 + }, + { + "epoch": 0.34165053912081833, + "grad_norm": 1.727195992354214, + "learning_rate": 1.5324297291278446e-05, + "loss": 0.4331, + "step": 19772 + }, + { + "epoch": 0.34166781863422724, + "grad_norm": 0.7855380144780136, + "learning_rate": 1.5323823554045843e-05, + "loss": 0.5408, + "step": 19773 + }, + { + "epoch": 0.34168509814763615, + "grad_norm": 1.757191966257566, + "learning_rate": 1.532334980013862e-05, + "loss": 0.5281, + "step": 19774 + }, + { + "epoch": 0.34170237766104505, + "grad_norm": 1.1407955497979032, + "learning_rate": 1.532287602955826e-05, + "loss": 0.4729, + "step": 19775 + }, + { + "epoch": 0.34171965717445396, + "grad_norm": 0.7285447213279539, + "learning_rate": 1.5322402242306253e-05, + "loss": 0.4629, + "step": 19776 + }, + { + "epoch": 0.34173693668786287, + "grad_norm": 1.0870324287542141, + "learning_rate": 1.532192843838407e-05, + "loss": 0.3083, + "step": 19777 + }, + { + "epoch": 0.3417542162012718, + "grad_norm": 1.1477250897997138, + "learning_rate": 1.5321454617793207e-05, + "loss": 0.7483, + "step": 19778 + }, + { + "epoch": 0.3417714957146807, + "grad_norm": 1.0268826246411198, + "learning_rate": 1.532098078053514e-05, + "loss": 0.5084, + "step": 19779 + }, + { + "epoch": 0.3417887752280896, + "grad_norm": 1.3757113676347184, + "learning_rate": 1.532050692661136e-05, + "loss": 0.4786, + "step": 19780 + }, + { + "epoch": 0.3418060547414985, + "grad_norm": 1.5448795925206515, + "learning_rate": 1.5320033056023348e-05, + "loss": 0.4373, + "step": 19781 + }, + { + "epoch": 0.3418233342549074, + "grad_norm": 0.8394755864190129, + "learning_rate": 1.5319559168772588e-05, + "loss": 0.4127, + "step": 19782 + }, + { + "epoch": 0.34184061376831626, + "grad_norm": 1.11081230592985, + "learning_rate": 1.531908526486056e-05, + "loss": 0.3983, + "step": 19783 + }, + { + "epoch": 0.34185789328172517, + "grad_norm": 1.0760645965276456, + "learning_rate": 1.531861134428876e-05, + "loss": 0.4935, + "step": 19784 + }, + { + "epoch": 0.3418751727951341, + "grad_norm": 2.1285664087016474, + "learning_rate": 1.5318137407058664e-05, + "loss": 0.5709, + "step": 19785 + }, + { + "epoch": 0.341892452308543, + "grad_norm": 1.0688964152424465, + "learning_rate": 1.531766345317175e-05, + "loss": 0.3773, + "step": 19786 + }, + { + "epoch": 0.3419097318219519, + "grad_norm": 0.8219655512096519, + "learning_rate": 1.531718948262952e-05, + "loss": 0.3866, + "step": 19787 + }, + { + "epoch": 0.3419270113353608, + "grad_norm": 0.9290227426406186, + "learning_rate": 1.531671549543344e-05, + "loss": 0.49, + "step": 19788 + }, + { + "epoch": 0.3419442908487697, + "grad_norm": 0.5237314338816814, + "learning_rate": 1.5316241491585007e-05, + "loss": 0.7045, + "step": 19789 + }, + { + "epoch": 0.3419615703621786, + "grad_norm": 0.9673395094058694, + "learning_rate": 1.53157674710857e-05, + "loss": 0.5382, + "step": 19790 + }, + { + "epoch": 0.3419788498755875, + "grad_norm": 1.292107718919949, + "learning_rate": 1.5315293433937005e-05, + "loss": 0.3804, + "step": 19791 + }, + { + "epoch": 0.34199612938899643, + "grad_norm": 0.9467268406366847, + "learning_rate": 1.531481938014041e-05, + "loss": 0.5415, + "step": 19792 + }, + { + "epoch": 0.34201340890240534, + "grad_norm": 0.7760005947374784, + "learning_rate": 1.5314345309697397e-05, + "loss": 0.6596, + "step": 19793 + }, + { + "epoch": 0.3420306884158142, + "grad_norm": 1.1866478977037935, + "learning_rate": 1.5313871222609447e-05, + "loss": 0.49, + "step": 19794 + }, + { + "epoch": 0.3420479679292231, + "grad_norm": 0.6199181575806372, + "learning_rate": 1.5313397118878052e-05, + "loss": 0.3084, + "step": 19795 + }, + { + "epoch": 0.342065247442632, + "grad_norm": 1.050073613526179, + "learning_rate": 1.5312922998504692e-05, + "loss": 0.5287, + "step": 19796 + }, + { + "epoch": 0.3420825269560409, + "grad_norm": 0.8860510403377779, + "learning_rate": 1.5312448861490855e-05, + "loss": 0.4479, + "step": 19797 + }, + { + "epoch": 0.3420998064694498, + "grad_norm": 1.0724812164070319, + "learning_rate": 1.531197470783802e-05, + "loss": 0.6267, + "step": 19798 + }, + { + "epoch": 0.3421170859828587, + "grad_norm": 1.0139006993456112, + "learning_rate": 1.5311500537547682e-05, + "loss": 0.575, + "step": 19799 + }, + { + "epoch": 0.34213436549626763, + "grad_norm": 1.1294306740501268, + "learning_rate": 1.5311026350621314e-05, + "loss": 0.3449, + "step": 19800 + }, + { + "epoch": 0.34215164500967654, + "grad_norm": 1.0287775474462648, + "learning_rate": 1.5310552147060414e-05, + "loss": 0.4699, + "step": 19801 + }, + { + "epoch": 0.34216892452308545, + "grad_norm": 0.9855615463772802, + "learning_rate": 1.5310077926866458e-05, + "loss": 0.7273, + "step": 19802 + }, + { + "epoch": 0.34218620403649436, + "grad_norm": 0.7722736077120873, + "learning_rate": 1.5309603690040935e-05, + "loss": 0.4587, + "step": 19803 + }, + { + "epoch": 0.3422034835499032, + "grad_norm": 0.4527125920520629, + "learning_rate": 1.530912943658533e-05, + "loss": 0.5861, + "step": 19804 + }, + { + "epoch": 0.3422207630633121, + "grad_norm": 1.2382889867595792, + "learning_rate": 1.5308655166501126e-05, + "loss": 0.5541, + "step": 19805 + }, + { + "epoch": 0.342238042576721, + "grad_norm": 1.214842061963578, + "learning_rate": 1.5308180879789814e-05, + "loss": 0.5514, + "step": 19806 + }, + { + "epoch": 0.34225532209012993, + "grad_norm": 0.727870005367404, + "learning_rate": 1.5307706576452876e-05, + "loss": 0.3816, + "step": 19807 + }, + { + "epoch": 0.34227260160353884, + "grad_norm": 0.8412206687700083, + "learning_rate": 1.5307232256491794e-05, + "loss": 0.5486, + "step": 19808 + }, + { + "epoch": 0.34228988111694775, + "grad_norm": 1.1134115665157815, + "learning_rate": 1.530675791990806e-05, + "loss": 0.4666, + "step": 19809 + }, + { + "epoch": 0.34230716063035665, + "grad_norm": 1.062091837861213, + "learning_rate": 1.5306283566703158e-05, + "loss": 0.471, + "step": 19810 + }, + { + "epoch": 0.34232444014376556, + "grad_norm": 0.5059692353749868, + "learning_rate": 1.5305809196878568e-05, + "loss": 0.2206, + "step": 19811 + }, + { + "epoch": 0.34234171965717447, + "grad_norm": 0.9148897753257614, + "learning_rate": 1.5305334810435785e-05, + "loss": 0.5395, + "step": 19812 + }, + { + "epoch": 0.3423589991705834, + "grad_norm": 0.8715177963789061, + "learning_rate": 1.5304860407376288e-05, + "loss": 0.6537, + "step": 19813 + }, + { + "epoch": 0.3423762786839923, + "grad_norm": 1.5807451829699697, + "learning_rate": 1.5304385987701563e-05, + "loss": 0.5849, + "step": 19814 + }, + { + "epoch": 0.34239355819740114, + "grad_norm": 1.1592094983285328, + "learning_rate": 1.5303911551413103e-05, + "loss": 0.3973, + "step": 19815 + }, + { + "epoch": 0.34241083771081005, + "grad_norm": 1.6432017470844094, + "learning_rate": 1.530343709851239e-05, + "loss": 0.6518, + "step": 19816 + }, + { + "epoch": 0.34242811722421895, + "grad_norm": 0.7882175733102054, + "learning_rate": 1.5302962629000902e-05, + "loss": 0.4817, + "step": 19817 + }, + { + "epoch": 0.34244539673762786, + "grad_norm": 1.0146431146577324, + "learning_rate": 1.5302488142880135e-05, + "loss": 0.4962, + "step": 19818 + }, + { + "epoch": 0.34246267625103677, + "grad_norm": 1.4926171635229897, + "learning_rate": 1.5302013640151573e-05, + "loss": 0.629, + "step": 19819 + }, + { + "epoch": 0.3424799557644457, + "grad_norm": 0.9085356597350295, + "learning_rate": 1.5301539120816703e-05, + "loss": 0.3656, + "step": 19820 + }, + { + "epoch": 0.3424972352778546, + "grad_norm": 1.0865469053149086, + "learning_rate": 1.5301064584877005e-05, + "loss": 0.4918, + "step": 19821 + }, + { + "epoch": 0.3425145147912635, + "grad_norm": 0.5345689508423709, + "learning_rate": 1.530059003233397e-05, + "loss": 1.004, + "step": 19822 + }, + { + "epoch": 0.3425317943046724, + "grad_norm": 0.9452646437422465, + "learning_rate": 1.530011546318909e-05, + "loss": 0.5478, + "step": 19823 + }, + { + "epoch": 0.3425490738180813, + "grad_norm": 1.0598885433678424, + "learning_rate": 1.5299640877443844e-05, + "loss": 0.3504, + "step": 19824 + }, + { + "epoch": 0.34256635333149016, + "grad_norm": 1.2476198272267267, + "learning_rate": 1.5299166275099718e-05, + "loss": 0.4797, + "step": 19825 + }, + { + "epoch": 0.34258363284489907, + "grad_norm": 1.0743467920757435, + "learning_rate": 1.52986916561582e-05, + "loss": 0.4534, + "step": 19826 + }, + { + "epoch": 0.342600912358308, + "grad_norm": 0.9098348759130999, + "learning_rate": 1.529821702062078e-05, + "loss": 0.48, + "step": 19827 + }, + { + "epoch": 0.3426181918717169, + "grad_norm": 1.1973994604509492, + "learning_rate": 1.5297742368488937e-05, + "loss": 0.6331, + "step": 19828 + }, + { + "epoch": 0.3426354713851258, + "grad_norm": 0.9599819537491342, + "learning_rate": 1.5297267699764163e-05, + "loss": 0.6693, + "step": 19829 + }, + { + "epoch": 0.3426527508985347, + "grad_norm": 1.9074760807100084, + "learning_rate": 1.529679301444795e-05, + "loss": 0.6862, + "step": 19830 + }, + { + "epoch": 0.3426700304119436, + "grad_norm": 1.2028527072587036, + "learning_rate": 1.5296318312541768e-05, + "loss": 0.5809, + "step": 19831 + }, + { + "epoch": 0.3426873099253525, + "grad_norm": 1.1136473775860294, + "learning_rate": 1.529584359404712e-05, + "loss": 0.5622, + "step": 19832 + }, + { + "epoch": 0.3427045894387614, + "grad_norm": 0.5480576997824261, + "learning_rate": 1.5295368858965495e-05, + "loss": 0.6439, + "step": 19833 + }, + { + "epoch": 0.3427218689521703, + "grad_norm": 1.0612148962348134, + "learning_rate": 1.529489410729836e-05, + "loss": 0.3973, + "step": 19834 + }, + { + "epoch": 0.34273914846557924, + "grad_norm": 0.8911454111508343, + "learning_rate": 1.5294419339047223e-05, + "loss": 0.3189, + "step": 19835 + }, + { + "epoch": 0.3427564279789881, + "grad_norm": 1.125067839321607, + "learning_rate": 1.529394455421356e-05, + "loss": 0.6586, + "step": 19836 + }, + { + "epoch": 0.342773707492397, + "grad_norm": 1.7389464188873764, + "learning_rate": 1.529346975279886e-05, + "loss": 0.5897, + "step": 19837 + }, + { + "epoch": 0.3427909870058059, + "grad_norm": 0.9725488221883614, + "learning_rate": 1.529299493480461e-05, + "loss": 0.3588, + "step": 19838 + }, + { + "epoch": 0.3428082665192148, + "grad_norm": 1.2014598677610975, + "learning_rate": 1.5292520100232296e-05, + "loss": 0.4574, + "step": 19839 + }, + { + "epoch": 0.3428255460326237, + "grad_norm": 0.6963351655609112, + "learning_rate": 1.5292045249083406e-05, + "loss": 0.4285, + "step": 19840 + }, + { + "epoch": 0.3428428255460326, + "grad_norm": 1.0069444261134506, + "learning_rate": 1.529157038135943e-05, + "loss": 0.3227, + "step": 19841 + }, + { + "epoch": 0.34286010505944153, + "grad_norm": 0.8920663380426369, + "learning_rate": 1.5291095497061857e-05, + "loss": 0.4706, + "step": 19842 + }, + { + "epoch": 0.34287738457285044, + "grad_norm": 0.9683879067134186, + "learning_rate": 1.5290620596192163e-05, + "loss": 0.5223, + "step": 19843 + }, + { + "epoch": 0.34289466408625935, + "grad_norm": 0.41176968591805446, + "learning_rate": 1.5290145678751852e-05, + "loss": 0.6707, + "step": 19844 + }, + { + "epoch": 0.34291194359966826, + "grad_norm": 0.8683360232117754, + "learning_rate": 1.52896707447424e-05, + "loss": 0.615, + "step": 19845 + }, + { + "epoch": 0.3429292231130771, + "grad_norm": 0.7806962543624817, + "learning_rate": 1.5289195794165293e-05, + "loss": 0.3652, + "step": 19846 + }, + { + "epoch": 0.342946502626486, + "grad_norm": 0.7935451582607032, + "learning_rate": 1.5288720827022027e-05, + "loss": 0.4685, + "step": 19847 + }, + { + "epoch": 0.3429637821398949, + "grad_norm": 1.1882423779126892, + "learning_rate": 1.5288245843314083e-05, + "loss": 0.4713, + "step": 19848 + }, + { + "epoch": 0.34298106165330383, + "grad_norm": 1.261376001537292, + "learning_rate": 1.5287770843042952e-05, + "loss": 0.4998, + "step": 19849 + }, + { + "epoch": 0.34299834116671274, + "grad_norm": 0.7397669092043445, + "learning_rate": 1.528729582621012e-05, + "loss": 0.4613, + "step": 19850 + }, + { + "epoch": 0.34301562068012165, + "grad_norm": 0.850973888491216, + "learning_rate": 1.5286820792817078e-05, + "loss": 0.4934, + "step": 19851 + }, + { + "epoch": 0.34303290019353055, + "grad_norm": 0.8449443576592771, + "learning_rate": 1.5286345742865313e-05, + "loss": 0.548, + "step": 19852 + }, + { + "epoch": 0.34305017970693946, + "grad_norm": 0.9202367782121873, + "learning_rate": 1.5285870676356307e-05, + "loss": 0.294, + "step": 19853 + }, + { + "epoch": 0.34306745922034837, + "grad_norm": 1.159996252014003, + "learning_rate": 1.5285395593291554e-05, + "loss": 0.4933, + "step": 19854 + }, + { + "epoch": 0.3430847387337573, + "grad_norm": 0.8360902786886784, + "learning_rate": 1.5284920493672538e-05, + "loss": 0.6736, + "step": 19855 + }, + { + "epoch": 0.3431020182471662, + "grad_norm": 0.8400958977298932, + "learning_rate": 1.528444537750075e-05, + "loss": 0.4317, + "step": 19856 + }, + { + "epoch": 0.34311929776057504, + "grad_norm": 0.9930729655049528, + "learning_rate": 1.5283970244777684e-05, + "loss": 0.3894, + "step": 19857 + }, + { + "epoch": 0.34313657727398394, + "grad_norm": 1.5577644065165868, + "learning_rate": 1.5283495095504815e-05, + "loss": 0.4043, + "step": 19858 + }, + { + "epoch": 0.34315385678739285, + "grad_norm": 0.5090788030205786, + "learning_rate": 1.5283019929683642e-05, + "loss": 0.7023, + "step": 19859 + }, + { + "epoch": 0.34317113630080176, + "grad_norm": 0.889298415831224, + "learning_rate": 1.5282544747315643e-05, + "loss": 0.5092, + "step": 19860 + }, + { + "epoch": 0.34318841581421067, + "grad_norm": 0.6708858500862726, + "learning_rate": 1.528206954840232e-05, + "loss": 0.4401, + "step": 19861 + }, + { + "epoch": 0.3432056953276196, + "grad_norm": 1.5426715109601083, + "learning_rate": 1.5281594332945148e-05, + "loss": 0.5874, + "step": 19862 + }, + { + "epoch": 0.3432229748410285, + "grad_norm": 0.8938534313780239, + "learning_rate": 1.528111910094562e-05, + "loss": 0.432, + "step": 19863 + }, + { + "epoch": 0.3432402543544374, + "grad_norm": 1.0664121128959245, + "learning_rate": 1.528064385240523e-05, + "loss": 0.4349, + "step": 19864 + }, + { + "epoch": 0.3432575338678463, + "grad_norm": 1.4477331124390527, + "learning_rate": 1.5280168587325462e-05, + "loss": 0.5877, + "step": 19865 + }, + { + "epoch": 0.3432748133812552, + "grad_norm": 0.9873190551702671, + "learning_rate": 1.52796933057078e-05, + "loss": 0.5095, + "step": 19866 + }, + { + "epoch": 0.3432920928946641, + "grad_norm": 0.8147360648558047, + "learning_rate": 1.5279218007553745e-05, + "loss": 0.4578, + "step": 19867 + }, + { + "epoch": 0.34330937240807297, + "grad_norm": 0.5019637763171092, + "learning_rate": 1.5278742692864773e-05, + "loss": 0.5277, + "step": 19868 + }, + { + "epoch": 0.3433266519214819, + "grad_norm": 1.2854461798939825, + "learning_rate": 1.527826736164238e-05, + "loss": 0.3123, + "step": 19869 + }, + { + "epoch": 0.3433439314348908, + "grad_norm": 0.8857835161088901, + "learning_rate": 1.527779201388805e-05, + "loss": 0.4591, + "step": 19870 + }, + { + "epoch": 0.3433612109482997, + "grad_norm": 0.9554238956081152, + "learning_rate": 1.5277316649603273e-05, + "loss": 0.3844, + "step": 19871 + }, + { + "epoch": 0.3433784904617086, + "grad_norm": 0.9964198593130446, + "learning_rate": 1.5276841268789542e-05, + "loss": 0.4286, + "step": 19872 + }, + { + "epoch": 0.3433957699751175, + "grad_norm": 0.9179209884105634, + "learning_rate": 1.5276365871448346e-05, + "loss": 0.5223, + "step": 19873 + }, + { + "epoch": 0.3434130494885264, + "grad_norm": 0.41588066514839167, + "learning_rate": 1.5275890457581164e-05, + "loss": 0.711, + "step": 19874 + }, + { + "epoch": 0.3434303290019353, + "grad_norm": 1.603431527379459, + "learning_rate": 1.5275415027189494e-05, + "loss": 0.5094, + "step": 19875 + }, + { + "epoch": 0.3434476085153442, + "grad_norm": 0.8115380579308756, + "learning_rate": 1.5274939580274827e-05, + "loss": 0.4629, + "step": 19876 + }, + { + "epoch": 0.34346488802875313, + "grad_norm": 1.0768256885673486, + "learning_rate": 1.5274464116838646e-05, + "loss": 0.5211, + "step": 19877 + }, + { + "epoch": 0.343482167542162, + "grad_norm": 0.44548777636547604, + "learning_rate": 1.5273988636882444e-05, + "loss": 0.4922, + "step": 19878 + }, + { + "epoch": 0.3434994470555709, + "grad_norm": 0.678685115802046, + "learning_rate": 1.5273513140407706e-05, + "loss": 0.3988, + "step": 19879 + }, + { + "epoch": 0.3435167265689798, + "grad_norm": 0.6692271296160841, + "learning_rate": 1.5273037627415924e-05, + "loss": 0.4546, + "step": 19880 + }, + { + "epoch": 0.3435340060823887, + "grad_norm": 1.1130584804256363, + "learning_rate": 1.5272562097908592e-05, + "loss": 0.3646, + "step": 19881 + }, + { + "epoch": 0.3435512855957976, + "grad_norm": 1.0668574818452232, + "learning_rate": 1.5272086551887194e-05, + "loss": 0.3446, + "step": 19882 + }, + { + "epoch": 0.3435685651092065, + "grad_norm": 1.238450015531959, + "learning_rate": 1.5271610989353217e-05, + "loss": 0.4962, + "step": 19883 + }, + { + "epoch": 0.34358584462261543, + "grad_norm": 1.2472357046656635, + "learning_rate": 1.5271135410308153e-05, + "loss": 0.2839, + "step": 19884 + }, + { + "epoch": 0.34360312413602434, + "grad_norm": 0.8178864155213789, + "learning_rate": 1.5270659814753495e-05, + "loss": 0.5002, + "step": 19885 + }, + { + "epoch": 0.34362040364943325, + "grad_norm": 0.7550641288549935, + "learning_rate": 1.527018420269073e-05, + "loss": 0.3089, + "step": 19886 + }, + { + "epoch": 0.34363768316284216, + "grad_norm": 0.6534660483782625, + "learning_rate": 1.5269708574121346e-05, + "loss": 0.2976, + "step": 19887 + }, + { + "epoch": 0.34365496267625106, + "grad_norm": 1.0150466666943936, + "learning_rate": 1.5269232929046835e-05, + "loss": 0.5129, + "step": 19888 + }, + { + "epoch": 0.3436722421896599, + "grad_norm": 0.6789626098539333, + "learning_rate": 1.5268757267468686e-05, + "loss": 0.2918, + "step": 19889 + }, + { + "epoch": 0.3436895217030688, + "grad_norm": 2.021485247740015, + "learning_rate": 1.5268281589388393e-05, + "loss": 0.6613, + "step": 19890 + }, + { + "epoch": 0.34370680121647773, + "grad_norm": 0.7043970136707282, + "learning_rate": 1.5267805894807435e-05, + "loss": 0.3519, + "step": 19891 + }, + { + "epoch": 0.34372408072988664, + "grad_norm": 1.8982068870981061, + "learning_rate": 1.5267330183727312e-05, + "loss": 0.6239, + "step": 19892 + }, + { + "epoch": 0.34374136024329555, + "grad_norm": 1.0109968489028862, + "learning_rate": 1.5266854456149508e-05, + "loss": 0.4466, + "step": 19893 + }, + { + "epoch": 0.34375863975670445, + "grad_norm": 0.8536170363227484, + "learning_rate": 1.5266378712075516e-05, + "loss": 0.4423, + "step": 19894 + }, + { + "epoch": 0.34377591927011336, + "grad_norm": 0.7588545862399836, + "learning_rate": 1.526590295150683e-05, + "loss": 0.3264, + "step": 19895 + }, + { + "epoch": 0.34379319878352227, + "grad_norm": 1.4333881101949024, + "learning_rate": 1.526542717444493e-05, + "loss": 0.5192, + "step": 19896 + }, + { + "epoch": 0.3438104782969312, + "grad_norm": 1.2757442456727446, + "learning_rate": 1.5264951380891316e-05, + "loss": 0.4401, + "step": 19897 + }, + { + "epoch": 0.3438277578103401, + "grad_norm": 0.7829952464538776, + "learning_rate": 1.5264475570847474e-05, + "loss": 0.3881, + "step": 19898 + }, + { + "epoch": 0.34384503732374894, + "grad_norm": 0.6517047529300356, + "learning_rate": 1.5263999744314894e-05, + "loss": 0.4022, + "step": 19899 + }, + { + "epoch": 0.34386231683715784, + "grad_norm": 0.5487322388181675, + "learning_rate": 1.5263523901295064e-05, + "loss": 0.327, + "step": 19900 + }, + { + "epoch": 0.34387959635056675, + "grad_norm": 0.9027785471970992, + "learning_rate": 1.526304804178948e-05, + "loss": 0.4425, + "step": 19901 + }, + { + "epoch": 0.34389687586397566, + "grad_norm": 1.360962933404798, + "learning_rate": 1.5262572165799627e-05, + "loss": 0.738, + "step": 19902 + }, + { + "epoch": 0.34391415537738457, + "grad_norm": 1.654753244099419, + "learning_rate": 1.5262096273326998e-05, + "loss": 0.4732, + "step": 19903 + }, + { + "epoch": 0.3439314348907935, + "grad_norm": 0.8326718917677622, + "learning_rate": 1.5261620364373087e-05, + "loss": 0.4494, + "step": 19904 + }, + { + "epoch": 0.3439487144042024, + "grad_norm": 0.4493248155065272, + "learning_rate": 1.526114443893938e-05, + "loss": 0.6085, + "step": 19905 + }, + { + "epoch": 0.3439659939176113, + "grad_norm": 0.6721979564713857, + "learning_rate": 1.5260668497027367e-05, + "loss": 0.3565, + "step": 19906 + }, + { + "epoch": 0.3439832734310202, + "grad_norm": 0.7878145256508982, + "learning_rate": 1.5260192538638542e-05, + "loss": 0.4882, + "step": 19907 + }, + { + "epoch": 0.3440005529444291, + "grad_norm": 0.571039070864117, + "learning_rate": 1.5259716563774395e-05, + "loss": 0.5381, + "step": 19908 + }, + { + "epoch": 0.344017832457838, + "grad_norm": 1.218493668454387, + "learning_rate": 1.5259240572436413e-05, + "loss": 0.4442, + "step": 19909 + }, + { + "epoch": 0.34403511197124687, + "grad_norm": 1.1045486342787931, + "learning_rate": 1.5258764564626095e-05, + "loss": 0.5617, + "step": 19910 + }, + { + "epoch": 0.3440523914846558, + "grad_norm": 1.0018656493223725, + "learning_rate": 1.525828854034492e-05, + "loss": 0.4388, + "step": 19911 + }, + { + "epoch": 0.3440696709980647, + "grad_norm": 0.7825173205800358, + "learning_rate": 1.5257812499594389e-05, + "loss": 0.4474, + "step": 19912 + }, + { + "epoch": 0.3440869505114736, + "grad_norm": 0.7586029564714263, + "learning_rate": 1.5257336442375991e-05, + "loss": 0.4435, + "step": 19913 + }, + { + "epoch": 0.3441042300248825, + "grad_norm": 1.2724696204741455, + "learning_rate": 1.5256860368691216e-05, + "loss": 0.4166, + "step": 19914 + }, + { + "epoch": 0.3441215095382914, + "grad_norm": 0.9531130043209891, + "learning_rate": 1.5256384278541551e-05, + "loss": 0.6251, + "step": 19915 + }, + { + "epoch": 0.3441387890517003, + "grad_norm": 1.2028243060582726, + "learning_rate": 1.5255908171928492e-05, + "loss": 0.4967, + "step": 19916 + }, + { + "epoch": 0.3441560685651092, + "grad_norm": 1.5554087341850413, + "learning_rate": 1.525543204885353e-05, + "loss": 0.5295, + "step": 19917 + }, + { + "epoch": 0.3441733480785181, + "grad_norm": 0.7211901827836811, + "learning_rate": 1.5254955909318159e-05, + "loss": 0.4445, + "step": 19918 + }, + { + "epoch": 0.34419062759192703, + "grad_norm": 1.0379072870090968, + "learning_rate": 1.5254479753323863e-05, + "loss": 0.4715, + "step": 19919 + }, + { + "epoch": 0.3442079071053359, + "grad_norm": 1.1944888078609448, + "learning_rate": 1.5254003580872137e-05, + "loss": 0.3692, + "step": 19920 + }, + { + "epoch": 0.3442251866187448, + "grad_norm": 0.7075499239430539, + "learning_rate": 1.5253527391964475e-05, + "loss": 0.2804, + "step": 19921 + }, + { + "epoch": 0.3442424661321537, + "grad_norm": 0.8491229578295639, + "learning_rate": 1.5253051186602364e-05, + "loss": 0.6637, + "step": 19922 + }, + { + "epoch": 0.3442597456455626, + "grad_norm": 0.8641550545139263, + "learning_rate": 1.5252574964787297e-05, + "loss": 0.5668, + "step": 19923 + }, + { + "epoch": 0.3442770251589715, + "grad_norm": 0.3989567525872835, + "learning_rate": 1.5252098726520768e-05, + "loss": 0.6744, + "step": 19924 + }, + { + "epoch": 0.3442943046723804, + "grad_norm": 1.3537413238117395, + "learning_rate": 1.5251622471804263e-05, + "loss": 0.6205, + "step": 19925 + }, + { + "epoch": 0.34431158418578933, + "grad_norm": 0.9016587975432013, + "learning_rate": 1.5251146200639279e-05, + "loss": 0.6614, + "step": 19926 + }, + { + "epoch": 0.34432886369919824, + "grad_norm": 1.2937258379938628, + "learning_rate": 1.5250669913027307e-05, + "loss": 0.6421, + "step": 19927 + }, + { + "epoch": 0.34434614321260715, + "grad_norm": 0.7826646295735694, + "learning_rate": 1.5250193608969839e-05, + "loss": 0.4164, + "step": 19928 + }, + { + "epoch": 0.34436342272601606, + "grad_norm": 0.8300314665439091, + "learning_rate": 1.5249717288468363e-05, + "loss": 0.3414, + "step": 19929 + }, + { + "epoch": 0.34438070223942496, + "grad_norm": 0.8182045288733836, + "learning_rate": 1.5249240951524376e-05, + "loss": 0.448, + "step": 19930 + }, + { + "epoch": 0.3443979817528338, + "grad_norm": 0.6562018472991739, + "learning_rate": 1.5248764598139365e-05, + "loss": 0.4907, + "step": 19931 + }, + { + "epoch": 0.3444152612662427, + "grad_norm": 0.9343162596975617, + "learning_rate": 1.5248288228314825e-05, + "loss": 0.4421, + "step": 19932 + }, + { + "epoch": 0.34443254077965163, + "grad_norm": 0.9879834262342224, + "learning_rate": 1.524781184205225e-05, + "loss": 0.5651, + "step": 19933 + }, + { + "epoch": 0.34444982029306054, + "grad_norm": 0.762398968184122, + "learning_rate": 1.5247335439353123e-05, + "loss": 0.3529, + "step": 19934 + }, + { + "epoch": 0.34446709980646945, + "grad_norm": 1.200067819688811, + "learning_rate": 1.524685902021895e-05, + "loss": 0.6225, + "step": 19935 + }, + { + "epoch": 0.34448437931987835, + "grad_norm": 1.165705019724458, + "learning_rate": 1.5246382584651211e-05, + "loss": 0.4498, + "step": 19936 + }, + { + "epoch": 0.34450165883328726, + "grad_norm": 1.9170311365391877, + "learning_rate": 1.5245906132651405e-05, + "loss": 0.3912, + "step": 19937 + }, + { + "epoch": 0.34451893834669617, + "grad_norm": 1.316908671246818, + "learning_rate": 1.5245429664221024e-05, + "loss": 0.5729, + "step": 19938 + }, + { + "epoch": 0.3445362178601051, + "grad_norm": 0.8599460168842388, + "learning_rate": 1.5244953179361556e-05, + "loss": 0.5093, + "step": 19939 + }, + { + "epoch": 0.344553497373514, + "grad_norm": 1.2064946449861993, + "learning_rate": 1.5244476678074496e-05, + "loss": 0.4089, + "step": 19940 + }, + { + "epoch": 0.3445707768869229, + "grad_norm": 0.5364894584462367, + "learning_rate": 1.5244000160361338e-05, + "loss": 0.5557, + "step": 19941 + }, + { + "epoch": 0.34458805640033174, + "grad_norm": 0.8643479062512236, + "learning_rate": 1.524352362622357e-05, + "loss": 0.5783, + "step": 19942 + }, + { + "epoch": 0.34460533591374065, + "grad_norm": 1.1470456246451175, + "learning_rate": 1.5243047075662692e-05, + "loss": 0.6829, + "step": 19943 + }, + { + "epoch": 0.34462261542714956, + "grad_norm": 0.78731987674439, + "learning_rate": 1.5242570508680188e-05, + "loss": 0.4106, + "step": 19944 + }, + { + "epoch": 0.34463989494055847, + "grad_norm": 1.3843405538843596, + "learning_rate": 1.5242093925277557e-05, + "loss": 0.6591, + "step": 19945 + }, + { + "epoch": 0.3446571744539674, + "grad_norm": 0.7825386474623203, + "learning_rate": 1.5241617325456289e-05, + "loss": 0.3959, + "step": 19946 + }, + { + "epoch": 0.3446744539673763, + "grad_norm": 0.838254904899328, + "learning_rate": 1.5241140709217876e-05, + "loss": 0.3037, + "step": 19947 + }, + { + "epoch": 0.3446917334807852, + "grad_norm": 1.0126127637508329, + "learning_rate": 1.5240664076563816e-05, + "loss": 0.3777, + "step": 19948 + }, + { + "epoch": 0.3447090129941941, + "grad_norm": 0.7984685758412308, + "learning_rate": 1.5240187427495592e-05, + "loss": 0.3201, + "step": 19949 + }, + { + "epoch": 0.344726292507603, + "grad_norm": 0.4853133186934657, + "learning_rate": 1.5239710762014709e-05, + "loss": 0.5804, + "step": 19950 + }, + { + "epoch": 0.3447435720210119, + "grad_norm": 1.7594920980800253, + "learning_rate": 1.5239234080122648e-05, + "loss": 0.4643, + "step": 19951 + }, + { + "epoch": 0.34476085153442076, + "grad_norm": 0.944970350783746, + "learning_rate": 1.523875738182091e-05, + "loss": 0.5864, + "step": 19952 + }, + { + "epoch": 0.3447781310478297, + "grad_norm": 1.0253346424136234, + "learning_rate": 1.5238280667110987e-05, + "loss": 0.4319, + "step": 19953 + }, + { + "epoch": 0.3447954105612386, + "grad_norm": 0.8277339028671287, + "learning_rate": 1.5237803935994368e-05, + "loss": 0.4658, + "step": 19954 + }, + { + "epoch": 0.3448126900746475, + "grad_norm": 0.5795697472734236, + "learning_rate": 1.523732718847255e-05, + "loss": 0.6112, + "step": 19955 + }, + { + "epoch": 0.3448299695880564, + "grad_norm": 0.8092518431085071, + "learning_rate": 1.5236850424547028e-05, + "loss": 0.4977, + "step": 19956 + }, + { + "epoch": 0.3448472491014653, + "grad_norm": 0.8585924070466112, + "learning_rate": 1.5236373644219288e-05, + "loss": 0.6043, + "step": 19957 + }, + { + "epoch": 0.3448645286148742, + "grad_norm": 1.1843441441484674, + "learning_rate": 1.5235896847490831e-05, + "loss": 0.5277, + "step": 19958 + }, + { + "epoch": 0.3448818081282831, + "grad_norm": 0.9189370542555971, + "learning_rate": 1.5235420034363149e-05, + "loss": 0.4908, + "step": 19959 + }, + { + "epoch": 0.344899087641692, + "grad_norm": 0.6775019918920671, + "learning_rate": 1.5234943204837728e-05, + "loss": 0.4111, + "step": 19960 + }, + { + "epoch": 0.34491636715510093, + "grad_norm": 1.3158017171535905, + "learning_rate": 1.5234466358916074e-05, + "loss": 0.5903, + "step": 19961 + }, + { + "epoch": 0.34493364666850984, + "grad_norm": 0.7950441778335223, + "learning_rate": 1.5233989496599669e-05, + "loss": 0.5971, + "step": 19962 + }, + { + "epoch": 0.3449509261819187, + "grad_norm": 2.2904376747994224, + "learning_rate": 1.523351261789001e-05, + "loss": 0.7511, + "step": 19963 + }, + { + "epoch": 0.3449682056953276, + "grad_norm": 0.8120533124477101, + "learning_rate": 1.5233035722788593e-05, + "loss": 0.4836, + "step": 19964 + }, + { + "epoch": 0.3449854852087365, + "grad_norm": 2.783173374836526, + "learning_rate": 1.523255881129691e-05, + "loss": 0.5618, + "step": 19965 + }, + { + "epoch": 0.3450027647221454, + "grad_norm": 0.9527421569132173, + "learning_rate": 1.5232081883416457e-05, + "loss": 0.3996, + "step": 19966 + }, + { + "epoch": 0.3450200442355543, + "grad_norm": 0.9805114370139842, + "learning_rate": 1.5231604939148725e-05, + "loss": 0.4886, + "step": 19967 + }, + { + "epoch": 0.34503732374896323, + "grad_norm": 0.9632029936507198, + "learning_rate": 1.5231127978495208e-05, + "loss": 0.4302, + "step": 19968 + }, + { + "epoch": 0.34505460326237214, + "grad_norm": 1.1574262628033605, + "learning_rate": 1.5230651001457402e-05, + "loss": 0.3635, + "step": 19969 + }, + { + "epoch": 0.34507188277578105, + "grad_norm": 1.3934507388318547, + "learning_rate": 1.52301740080368e-05, + "loss": 0.6326, + "step": 19970 + }, + { + "epoch": 0.34508916228918995, + "grad_norm": 1.084278358743273, + "learning_rate": 1.5229696998234893e-05, + "loss": 0.4857, + "step": 19971 + }, + { + "epoch": 0.34510644180259886, + "grad_norm": 0.49788700477018066, + "learning_rate": 1.5229219972053177e-05, + "loss": 0.7706, + "step": 19972 + }, + { + "epoch": 0.3451237213160077, + "grad_norm": 0.706711236878866, + "learning_rate": 1.522874292949315e-05, + "loss": 0.6088, + "step": 19973 + }, + { + "epoch": 0.3451410008294166, + "grad_norm": 1.1580948194172151, + "learning_rate": 1.52282658705563e-05, + "loss": 0.4384, + "step": 19974 + }, + { + "epoch": 0.34515828034282553, + "grad_norm": 1.368650831563279, + "learning_rate": 1.5227788795244127e-05, + "loss": 0.4932, + "step": 19975 + }, + { + "epoch": 0.34517555985623444, + "grad_norm": 0.9165452485585113, + "learning_rate": 1.5227311703558122e-05, + "loss": 0.4347, + "step": 19976 + }, + { + "epoch": 0.34519283936964335, + "grad_norm": 1.2200921106939566, + "learning_rate": 1.5226834595499776e-05, + "loss": 0.6985, + "step": 19977 + }, + { + "epoch": 0.34521011888305225, + "grad_norm": 0.556220746358606, + "learning_rate": 1.5226357471070587e-05, + "loss": 0.2638, + "step": 19978 + }, + { + "epoch": 0.34522739839646116, + "grad_norm": 0.7510649612073033, + "learning_rate": 1.5225880330272054e-05, + "loss": 0.408, + "step": 19979 + }, + { + "epoch": 0.34524467790987007, + "grad_norm": 0.8235137711372457, + "learning_rate": 1.5225403173105662e-05, + "loss": 0.3435, + "step": 19980 + }, + { + "epoch": 0.345261957423279, + "grad_norm": 0.7558721233974659, + "learning_rate": 1.5224925999572912e-05, + "loss": 0.5792, + "step": 19981 + }, + { + "epoch": 0.3452792369366879, + "grad_norm": 1.240110680861136, + "learning_rate": 1.5224448809675299e-05, + "loss": 0.4487, + "step": 19982 + }, + { + "epoch": 0.3452965164500968, + "grad_norm": 1.1409254676211726, + "learning_rate": 1.522397160341431e-05, + "loss": 0.4419, + "step": 19983 + }, + { + "epoch": 0.34531379596350564, + "grad_norm": 0.8686527221788903, + "learning_rate": 1.5223494380791448e-05, + "loss": 0.3759, + "step": 19984 + }, + { + "epoch": 0.34533107547691455, + "grad_norm": 1.2896860066418563, + "learning_rate": 1.5223017141808206e-05, + "loss": 0.4235, + "step": 19985 + }, + { + "epoch": 0.34534835499032346, + "grad_norm": 1.2331291069115922, + "learning_rate": 1.5222539886466075e-05, + "loss": 0.4976, + "step": 19986 + }, + { + "epoch": 0.34536563450373237, + "grad_norm": 1.0467316187021478, + "learning_rate": 1.5222062614766553e-05, + "loss": 0.5431, + "step": 19987 + }, + { + "epoch": 0.3453829140171413, + "grad_norm": 1.3329078525486024, + "learning_rate": 1.5221585326711132e-05, + "loss": 0.5465, + "step": 19988 + }, + { + "epoch": 0.3454001935305502, + "grad_norm": 0.5767702473704992, + "learning_rate": 1.522110802230131e-05, + "loss": 0.8123, + "step": 19989 + }, + { + "epoch": 0.3454174730439591, + "grad_norm": 1.4036811113407266, + "learning_rate": 1.522063070153858e-05, + "loss": 0.6317, + "step": 19990 + }, + { + "epoch": 0.345434752557368, + "grad_norm": 0.7875422823571441, + "learning_rate": 1.5220153364424443e-05, + "loss": 0.5411, + "step": 19991 + }, + { + "epoch": 0.3454520320707769, + "grad_norm": 0.8668331810076144, + "learning_rate": 1.5219676010960385e-05, + "loss": 0.3829, + "step": 19992 + }, + { + "epoch": 0.3454693115841858, + "grad_norm": 1.0146807650804293, + "learning_rate": 1.5219198641147906e-05, + "loss": 0.3729, + "step": 19993 + }, + { + "epoch": 0.34548659109759466, + "grad_norm": 1.1222170285322952, + "learning_rate": 1.5218721254988498e-05, + "loss": 0.42, + "step": 19994 + }, + { + "epoch": 0.34550387061100357, + "grad_norm": 0.678580156940426, + "learning_rate": 1.5218243852483662e-05, + "loss": 0.517, + "step": 19995 + }, + { + "epoch": 0.3455211501244125, + "grad_norm": 0.7388402736326719, + "learning_rate": 1.521776643363489e-05, + "loss": 0.3046, + "step": 19996 + }, + { + "epoch": 0.3455384296378214, + "grad_norm": 0.7842182477604467, + "learning_rate": 1.5217288998443671e-05, + "loss": 0.3857, + "step": 19997 + }, + { + "epoch": 0.3455557091512303, + "grad_norm": 1.1301487499264118, + "learning_rate": 1.5216811546911512e-05, + "loss": 0.6429, + "step": 19998 + }, + { + "epoch": 0.3455729886646392, + "grad_norm": 1.7293356086967486, + "learning_rate": 1.5216334079039902e-05, + "loss": 0.5054, + "step": 19999 + }, + { + "epoch": 0.3455902681780481, + "grad_norm": 0.923097100197611, + "learning_rate": 1.5215856594830333e-05, + "loss": 0.5111, + "step": 20000 + }, + { + "epoch": 0.345607547691457, + "grad_norm": 0.9603460349348637, + "learning_rate": 1.5215379094284312e-05, + "loss": 0.433, + "step": 20001 + }, + { + "epoch": 0.3456248272048659, + "grad_norm": 1.1963655771372146, + "learning_rate": 1.5214901577403326e-05, + "loss": 0.4797, + "step": 20002 + }, + { + "epoch": 0.34564210671827483, + "grad_norm": 0.7806405330210934, + "learning_rate": 1.5214424044188865e-05, + "loss": 0.5549, + "step": 20003 + }, + { + "epoch": 0.34565938623168374, + "grad_norm": 0.6747756667505984, + "learning_rate": 1.5213946494642438e-05, + "loss": 0.5499, + "step": 20004 + }, + { + "epoch": 0.3456766657450926, + "grad_norm": 1.8720841431615418, + "learning_rate": 1.5213468928765531e-05, + "loss": 0.5423, + "step": 20005 + }, + { + "epoch": 0.3456939452585015, + "grad_norm": 0.6374261937921035, + "learning_rate": 1.5212991346559643e-05, + "loss": 0.4545, + "step": 20006 + }, + { + "epoch": 0.3457112247719104, + "grad_norm": 0.8322868092927017, + "learning_rate": 1.5212513748026272e-05, + "loss": 0.4904, + "step": 20007 + }, + { + "epoch": 0.3457285042853193, + "grad_norm": 0.6659135649752187, + "learning_rate": 1.5212036133166911e-05, + "loss": 0.4028, + "step": 20008 + }, + { + "epoch": 0.3457457837987282, + "grad_norm": 1.2281431150197513, + "learning_rate": 1.5211558501983055e-05, + "loss": 0.5, + "step": 20009 + }, + { + "epoch": 0.34576306331213713, + "grad_norm": 0.9333861186484372, + "learning_rate": 1.5211080854476206e-05, + "loss": 0.2718, + "step": 20010 + }, + { + "epoch": 0.34578034282554604, + "grad_norm": 1.4341651715182442, + "learning_rate": 1.5210603190647853e-05, + "loss": 0.6665, + "step": 20011 + }, + { + "epoch": 0.34579762233895495, + "grad_norm": 0.4367906857887706, + "learning_rate": 1.5210125510499497e-05, + "loss": 0.5525, + "step": 20012 + }, + { + "epoch": 0.34581490185236385, + "grad_norm": 0.5107310890422249, + "learning_rate": 1.520964781403263e-05, + "loss": 0.3441, + "step": 20013 + }, + { + "epoch": 0.34583218136577276, + "grad_norm": 1.3758207263421827, + "learning_rate": 1.5209170101248746e-05, + "loss": 0.3957, + "step": 20014 + }, + { + "epoch": 0.34584946087918167, + "grad_norm": 0.838162902530185, + "learning_rate": 1.5208692372149352e-05, + "loss": 0.3852, + "step": 20015 + }, + { + "epoch": 0.3458667403925905, + "grad_norm": 1.3052039402702793, + "learning_rate": 1.5208214626735935e-05, + "loss": 0.4318, + "step": 20016 + }, + { + "epoch": 0.34588401990599943, + "grad_norm": 1.1670688266667886, + "learning_rate": 1.5207736865009992e-05, + "loss": 0.576, + "step": 20017 + }, + { + "epoch": 0.34590129941940834, + "grad_norm": 0.4963294117613155, + "learning_rate": 1.5207259086973025e-05, + "loss": 0.2456, + "step": 20018 + }, + { + "epoch": 0.34591857893281724, + "grad_norm": 0.5539411319150449, + "learning_rate": 1.5206781292626525e-05, + "loss": 0.3872, + "step": 20019 + }, + { + "epoch": 0.34593585844622615, + "grad_norm": 1.3401303846790769, + "learning_rate": 1.520630348197199e-05, + "loss": 0.6677, + "step": 20020 + }, + { + "epoch": 0.34595313795963506, + "grad_norm": 0.9749413568418231, + "learning_rate": 1.5205825655010918e-05, + "loss": 0.5179, + "step": 20021 + }, + { + "epoch": 0.34597041747304397, + "grad_norm": 1.1963083232360763, + "learning_rate": 1.5205347811744804e-05, + "loss": 0.4364, + "step": 20022 + }, + { + "epoch": 0.3459876969864529, + "grad_norm": 0.5232981302173038, + "learning_rate": 1.5204869952175145e-05, + "loss": 0.7115, + "step": 20023 + }, + { + "epoch": 0.3460049764998618, + "grad_norm": 0.48374216086795924, + "learning_rate": 1.5204392076303438e-05, + "loss": 0.7177, + "step": 20024 + }, + { + "epoch": 0.3460222560132707, + "grad_norm": 1.0592946370865113, + "learning_rate": 1.5203914184131182e-05, + "loss": 0.5613, + "step": 20025 + }, + { + "epoch": 0.34603953552667954, + "grad_norm": 1.4878768016442878, + "learning_rate": 1.5203436275659864e-05, + "loss": 0.4663, + "step": 20026 + }, + { + "epoch": 0.34605681504008845, + "grad_norm": 0.6494018930314381, + "learning_rate": 1.5202958350890993e-05, + "loss": 0.6016, + "step": 20027 + }, + { + "epoch": 0.34607409455349736, + "grad_norm": 0.6846443659607035, + "learning_rate": 1.5202480409826062e-05, + "loss": 0.65, + "step": 20028 + }, + { + "epoch": 0.34609137406690627, + "grad_norm": 1.1261070752370954, + "learning_rate": 1.5202002452466564e-05, + "loss": 0.4637, + "step": 20029 + }, + { + "epoch": 0.3461086535803152, + "grad_norm": 1.115094119156181, + "learning_rate": 1.5201524478814e-05, + "loss": 0.4761, + "step": 20030 + }, + { + "epoch": 0.3461259330937241, + "grad_norm": 0.8211576085712355, + "learning_rate": 1.5201046488869868e-05, + "loss": 0.3814, + "step": 20031 + }, + { + "epoch": 0.346143212607133, + "grad_norm": 1.109015988044909, + "learning_rate": 1.5200568482635662e-05, + "loss": 0.512, + "step": 20032 + }, + { + "epoch": 0.3461604921205419, + "grad_norm": 0.988648348838836, + "learning_rate": 1.5200090460112882e-05, + "loss": 0.4943, + "step": 20033 + }, + { + "epoch": 0.3461777716339508, + "grad_norm": 0.6616231489380967, + "learning_rate": 1.5199612421303021e-05, + "loss": 0.4839, + "step": 20034 + }, + { + "epoch": 0.3461950511473597, + "grad_norm": 0.702538860541657, + "learning_rate": 1.5199134366207578e-05, + "loss": 0.4065, + "step": 20035 + }, + { + "epoch": 0.3462123306607686, + "grad_norm": 0.9536812604086908, + "learning_rate": 1.5198656294828054e-05, + "loss": 0.4919, + "step": 20036 + }, + { + "epoch": 0.34622961017417747, + "grad_norm": 1.026988853716331, + "learning_rate": 1.5198178207165939e-05, + "loss": 0.4841, + "step": 20037 + }, + { + "epoch": 0.3462468896875864, + "grad_norm": 0.9071865175724811, + "learning_rate": 1.5197700103222737e-05, + "loss": 0.3889, + "step": 20038 + }, + { + "epoch": 0.3462641692009953, + "grad_norm": 1.05686571527896, + "learning_rate": 1.5197221982999946e-05, + "loss": 0.6401, + "step": 20039 + }, + { + "epoch": 0.3462814487144042, + "grad_norm": 1.743905130013485, + "learning_rate": 1.5196743846499056e-05, + "loss": 0.6095, + "step": 20040 + }, + { + "epoch": 0.3462987282278131, + "grad_norm": 1.0288559011991083, + "learning_rate": 1.5196265693721573e-05, + "loss": 0.5436, + "step": 20041 + }, + { + "epoch": 0.346316007741222, + "grad_norm": 1.3932297782462026, + "learning_rate": 1.5195787524668993e-05, + "loss": 0.4857, + "step": 20042 + }, + { + "epoch": 0.3463332872546309, + "grad_norm": 1.3289706467229323, + "learning_rate": 1.5195309339342805e-05, + "loss": 0.3272, + "step": 20043 + }, + { + "epoch": 0.3463505667680398, + "grad_norm": 1.2980923021578226, + "learning_rate": 1.5194831137744519e-05, + "loss": 0.4827, + "step": 20044 + }, + { + "epoch": 0.34636784628144873, + "grad_norm": 0.42379517581566095, + "learning_rate": 1.5194352919875624e-05, + "loss": 0.7291, + "step": 20045 + }, + { + "epoch": 0.34638512579485764, + "grad_norm": 0.428675561564992, + "learning_rate": 1.5193874685737622e-05, + "loss": 0.7515, + "step": 20046 + }, + { + "epoch": 0.3464024053082665, + "grad_norm": 0.7688340367263758, + "learning_rate": 1.5193396435332009e-05, + "loss": 0.4313, + "step": 20047 + }, + { + "epoch": 0.3464196848216754, + "grad_norm": 0.9814602467462643, + "learning_rate": 1.5192918168660285e-05, + "loss": 0.3398, + "step": 20048 + }, + { + "epoch": 0.3464369643350843, + "grad_norm": 0.948281464355487, + "learning_rate": 1.5192439885723942e-05, + "loss": 0.5021, + "step": 20049 + }, + { + "epoch": 0.3464542438484932, + "grad_norm": 1.023830565805674, + "learning_rate": 1.519196158652449e-05, + "loss": 0.5544, + "step": 20050 + }, + { + "epoch": 0.3464715233619021, + "grad_norm": 0.6869809266875984, + "learning_rate": 1.5191483271063417e-05, + "loss": 0.4064, + "step": 20051 + }, + { + "epoch": 0.34648880287531103, + "grad_norm": 0.548396673351666, + "learning_rate": 1.519100493934222e-05, + "loss": 0.2671, + "step": 20052 + }, + { + "epoch": 0.34650608238871994, + "grad_norm": 1.0700892065701155, + "learning_rate": 1.5190526591362407e-05, + "loss": 0.4472, + "step": 20053 + }, + { + "epoch": 0.34652336190212885, + "grad_norm": 1.6058982856293256, + "learning_rate": 1.5190048227125464e-05, + "loss": 0.4421, + "step": 20054 + }, + { + "epoch": 0.34654064141553775, + "grad_norm": 1.3760458706301268, + "learning_rate": 1.5189569846632898e-05, + "loss": 0.7051, + "step": 20055 + }, + { + "epoch": 0.34655792092894666, + "grad_norm": 0.5494915325678622, + "learning_rate": 1.5189091449886209e-05, + "loss": 0.3683, + "step": 20056 + }, + { + "epoch": 0.34657520044235557, + "grad_norm": 0.9430855329943761, + "learning_rate": 1.5188613036886888e-05, + "loss": 0.4729, + "step": 20057 + }, + { + "epoch": 0.3465924799557644, + "grad_norm": 0.8302856553596538, + "learning_rate": 1.5188134607636436e-05, + "loss": 0.3586, + "step": 20058 + }, + { + "epoch": 0.34660975946917333, + "grad_norm": 0.9006300242335586, + "learning_rate": 1.5187656162136353e-05, + "loss": 0.5419, + "step": 20059 + }, + { + "epoch": 0.34662703898258224, + "grad_norm": 0.846376887253268, + "learning_rate": 1.5187177700388133e-05, + "loss": 0.5478, + "step": 20060 + }, + { + "epoch": 0.34664431849599114, + "grad_norm": 1.2705669715118453, + "learning_rate": 1.5186699222393281e-05, + "loss": 0.4541, + "step": 20061 + }, + { + "epoch": 0.34666159800940005, + "grad_norm": 0.9922158880688301, + "learning_rate": 1.5186220728153296e-05, + "loss": 0.5151, + "step": 20062 + }, + { + "epoch": 0.34667887752280896, + "grad_norm": 1.3140639867779043, + "learning_rate": 1.5185742217669672e-05, + "loss": 0.5684, + "step": 20063 + }, + { + "epoch": 0.34669615703621787, + "grad_norm": 0.8102474339614377, + "learning_rate": 1.5185263690943908e-05, + "loss": 0.5274, + "step": 20064 + }, + { + "epoch": 0.3467134365496268, + "grad_norm": 0.8426842447580167, + "learning_rate": 1.5184785147977506e-05, + "loss": 0.48, + "step": 20065 + }, + { + "epoch": 0.3467307160630357, + "grad_norm": 1.7213454343906063, + "learning_rate": 1.518430658877196e-05, + "loss": 0.4261, + "step": 20066 + }, + { + "epoch": 0.3467479955764446, + "grad_norm": 0.49244970485231715, + "learning_rate": 1.5183828013328773e-05, + "loss": 0.6609, + "step": 20067 + }, + { + "epoch": 0.3467652750898535, + "grad_norm": 0.4871392548998066, + "learning_rate": 1.5183349421649442e-05, + "loss": 0.5712, + "step": 20068 + }, + { + "epoch": 0.34678255460326235, + "grad_norm": 1.0800157745708705, + "learning_rate": 1.5182870813735467e-05, + "loss": 0.6507, + "step": 20069 + }, + { + "epoch": 0.34679983411667126, + "grad_norm": 1.070415408405932, + "learning_rate": 1.5182392189588346e-05, + "loss": 0.4519, + "step": 20070 + }, + { + "epoch": 0.34681711363008016, + "grad_norm": 1.3483209761568178, + "learning_rate": 1.5181913549209582e-05, + "loss": 0.5504, + "step": 20071 + }, + { + "epoch": 0.3468343931434891, + "grad_norm": 0.9292866731077305, + "learning_rate": 1.5181434892600667e-05, + "loss": 0.3966, + "step": 20072 + }, + { + "epoch": 0.346851672656898, + "grad_norm": 1.3908128491330045, + "learning_rate": 1.5180956219763107e-05, + "loss": 0.3274, + "step": 20073 + }, + { + "epoch": 0.3468689521703069, + "grad_norm": 0.8186508004732933, + "learning_rate": 1.5180477530698397e-05, + "loss": 0.4359, + "step": 20074 + }, + { + "epoch": 0.3468862316837158, + "grad_norm": 1.407824301142296, + "learning_rate": 1.5179998825408038e-05, + "loss": 0.5437, + "step": 20075 + }, + { + "epoch": 0.3469035111971247, + "grad_norm": 1.4128351036511955, + "learning_rate": 1.5179520103893533e-05, + "loss": 0.5223, + "step": 20076 + }, + { + "epoch": 0.3469207907105336, + "grad_norm": 0.7426481270624077, + "learning_rate": 1.5179041366156368e-05, + "loss": 0.7467, + "step": 20077 + }, + { + "epoch": 0.3469380702239425, + "grad_norm": 1.7446591111280072, + "learning_rate": 1.517856261219806e-05, + "loss": 0.5129, + "step": 20078 + }, + { + "epoch": 0.34695534973735137, + "grad_norm": 1.1360389346361721, + "learning_rate": 1.51780838420201e-05, + "loss": 0.5016, + "step": 20079 + }, + { + "epoch": 0.3469726292507603, + "grad_norm": 1.0122231290587558, + "learning_rate": 1.5177605055623983e-05, + "loss": 0.3283, + "step": 20080 + }, + { + "epoch": 0.3469899087641692, + "grad_norm": 0.7552109674053208, + "learning_rate": 1.5177126253011217e-05, + "loss": 0.2836, + "step": 20081 + }, + { + "epoch": 0.3470071882775781, + "grad_norm": 0.749716303025382, + "learning_rate": 1.5176647434183297e-05, + "loss": 0.402, + "step": 20082 + }, + { + "epoch": 0.347024467790987, + "grad_norm": 0.4347118806407909, + "learning_rate": 1.5176168599141723e-05, + "loss": 0.7866, + "step": 20083 + }, + { + "epoch": 0.3470417473043959, + "grad_norm": 0.489044897510769, + "learning_rate": 1.5175689747887998e-05, + "loss": 0.7375, + "step": 20084 + }, + { + "epoch": 0.3470590268178048, + "grad_norm": 0.8933770340094338, + "learning_rate": 1.5175210880423619e-05, + "loss": 0.5846, + "step": 20085 + }, + { + "epoch": 0.3470763063312137, + "grad_norm": 0.4083154577505365, + "learning_rate": 1.5174731996750083e-05, + "loss": 0.508, + "step": 20086 + }, + { + "epoch": 0.34709358584462263, + "grad_norm": 0.43664697597589486, + "learning_rate": 1.5174253096868896e-05, + "loss": 0.6774, + "step": 20087 + }, + { + "epoch": 0.34711086535803154, + "grad_norm": 1.2497455183013437, + "learning_rate": 1.5173774180781555e-05, + "loss": 0.5278, + "step": 20088 + }, + { + "epoch": 0.34712814487144045, + "grad_norm": 1.481239073239463, + "learning_rate": 1.5173295248489557e-05, + "loss": 0.5069, + "step": 20089 + }, + { + "epoch": 0.3471454243848493, + "grad_norm": 1.2530610435723868, + "learning_rate": 1.5172816299994408e-05, + "loss": 0.4952, + "step": 20090 + }, + { + "epoch": 0.3471627038982582, + "grad_norm": 1.3374406780072945, + "learning_rate": 1.5172337335297606e-05, + "loss": 0.4751, + "step": 20091 + }, + { + "epoch": 0.3471799834116671, + "grad_norm": 1.736476267069056, + "learning_rate": 1.5171858354400648e-05, + "loss": 0.4626, + "step": 20092 + }, + { + "epoch": 0.347197262925076, + "grad_norm": 0.7932042988409607, + "learning_rate": 1.5171379357305039e-05, + "loss": 0.505, + "step": 20093 + }, + { + "epoch": 0.34721454243848493, + "grad_norm": 0.8397285801674877, + "learning_rate": 1.5170900344012273e-05, + "loss": 0.6497, + "step": 20094 + }, + { + "epoch": 0.34723182195189384, + "grad_norm": 0.9774579993770364, + "learning_rate": 1.517042131452386e-05, + "loss": 0.3172, + "step": 20095 + }, + { + "epoch": 0.34724910146530275, + "grad_norm": 0.9294208262250728, + "learning_rate": 1.5169942268841292e-05, + "loss": 0.4538, + "step": 20096 + }, + { + "epoch": 0.34726638097871165, + "grad_norm": 1.0909282419094604, + "learning_rate": 1.5169463206966068e-05, + "loss": 0.4473, + "step": 20097 + }, + { + "epoch": 0.34728366049212056, + "grad_norm": 1.3004223423789074, + "learning_rate": 1.5168984128899698e-05, + "loss": 0.5549, + "step": 20098 + }, + { + "epoch": 0.34730094000552947, + "grad_norm": 0.8150242655980137, + "learning_rate": 1.5168505034643674e-05, + "loss": 0.4851, + "step": 20099 + }, + { + "epoch": 0.3473182195189383, + "grad_norm": 1.846972831500736, + "learning_rate": 1.5168025924199498e-05, + "loss": 0.4519, + "step": 20100 + }, + { + "epoch": 0.34733549903234723, + "grad_norm": 0.9117797384238632, + "learning_rate": 1.5167546797568675e-05, + "loss": 0.4531, + "step": 20101 + }, + { + "epoch": 0.34735277854575614, + "grad_norm": 1.0656206474787409, + "learning_rate": 1.5167067654752701e-05, + "loss": 0.5523, + "step": 20102 + }, + { + "epoch": 0.34737005805916504, + "grad_norm": 1.5333732597328094, + "learning_rate": 1.5166588495753078e-05, + "loss": 0.4638, + "step": 20103 + }, + { + "epoch": 0.34738733757257395, + "grad_norm": 1.1686910017888636, + "learning_rate": 1.5166109320571311e-05, + "loss": 0.4719, + "step": 20104 + }, + { + "epoch": 0.34740461708598286, + "grad_norm": 0.8540102664414259, + "learning_rate": 1.5165630129208894e-05, + "loss": 0.5242, + "step": 20105 + }, + { + "epoch": 0.34742189659939177, + "grad_norm": 1.072787852932956, + "learning_rate": 1.5165150921667331e-05, + "loss": 0.526, + "step": 20106 + }, + { + "epoch": 0.3474391761128007, + "grad_norm": 1.149441839142896, + "learning_rate": 1.5164671697948121e-05, + "loss": 0.5646, + "step": 20107 + }, + { + "epoch": 0.3474564556262096, + "grad_norm": 0.6897968820968675, + "learning_rate": 1.5164192458052771e-05, + "loss": 0.512, + "step": 20108 + }, + { + "epoch": 0.3474737351396185, + "grad_norm": 0.7450413215216578, + "learning_rate": 1.5163713201982773e-05, + "loss": 0.3116, + "step": 20109 + }, + { + "epoch": 0.3474910146530274, + "grad_norm": 1.2659703665987434, + "learning_rate": 1.5163233929739636e-05, + "loss": 0.5776, + "step": 20110 + }, + { + "epoch": 0.34750829416643625, + "grad_norm": 0.9052913485519435, + "learning_rate": 1.5162754641324857e-05, + "loss": 0.4254, + "step": 20111 + }, + { + "epoch": 0.34752557367984516, + "grad_norm": 0.908049983380105, + "learning_rate": 1.5162275336739937e-05, + "loss": 0.4433, + "step": 20112 + }, + { + "epoch": 0.34754285319325406, + "grad_norm": 1.1657466352808084, + "learning_rate": 1.5161796015986382e-05, + "loss": 0.3899, + "step": 20113 + }, + { + "epoch": 0.34756013270666297, + "grad_norm": 1.1471641159043964, + "learning_rate": 1.5161316679065686e-05, + "loss": 0.4761, + "step": 20114 + }, + { + "epoch": 0.3475774122200719, + "grad_norm": 0.701754978581685, + "learning_rate": 1.5160837325979356e-05, + "loss": 0.4477, + "step": 20115 + }, + { + "epoch": 0.3475946917334808, + "grad_norm": 0.86852683858627, + "learning_rate": 1.516035795672889e-05, + "loss": 0.3884, + "step": 20116 + }, + { + "epoch": 0.3476119712468897, + "grad_norm": 0.7242927057338024, + "learning_rate": 1.5159878571315791e-05, + "loss": 0.266, + "step": 20117 + }, + { + "epoch": 0.3476292507602986, + "grad_norm": 0.7914104218760968, + "learning_rate": 1.515939916974156e-05, + "loss": 0.4783, + "step": 20118 + }, + { + "epoch": 0.3476465302737075, + "grad_norm": 0.6486652066888928, + "learning_rate": 1.5158919752007699e-05, + "loss": 0.4733, + "step": 20119 + }, + { + "epoch": 0.3476638097871164, + "grad_norm": 0.8362136137270064, + "learning_rate": 1.5158440318115708e-05, + "loss": 0.4087, + "step": 20120 + }, + { + "epoch": 0.34768108930052527, + "grad_norm": 0.5991285809508933, + "learning_rate": 1.5157960868067091e-05, + "loss": 0.4869, + "step": 20121 + }, + { + "epoch": 0.3476983688139342, + "grad_norm": 0.9125898801390455, + "learning_rate": 1.515748140186335e-05, + "loss": 0.3576, + "step": 20122 + }, + { + "epoch": 0.3477156483273431, + "grad_norm": 1.0259296275955327, + "learning_rate": 1.5157001919505983e-05, + "loss": 0.6374, + "step": 20123 + }, + { + "epoch": 0.347732927840752, + "grad_norm": 1.063910603829095, + "learning_rate": 1.5156522420996496e-05, + "loss": 0.5652, + "step": 20124 + }, + { + "epoch": 0.3477502073541609, + "grad_norm": 1.6323235131900702, + "learning_rate": 1.5156042906336387e-05, + "loss": 0.4285, + "step": 20125 + }, + { + "epoch": 0.3477674868675698, + "grad_norm": 1.3365523698617852, + "learning_rate": 1.5155563375527163e-05, + "loss": 0.3357, + "step": 20126 + }, + { + "epoch": 0.3477847663809787, + "grad_norm": 1.1229041630230177, + "learning_rate": 1.5155083828570321e-05, + "loss": 0.365, + "step": 20127 + }, + { + "epoch": 0.3478020458943876, + "grad_norm": 1.1570666578465814, + "learning_rate": 1.5154604265467363e-05, + "loss": 0.6278, + "step": 20128 + }, + { + "epoch": 0.34781932540779653, + "grad_norm": 0.8781922888659427, + "learning_rate": 1.5154124686219792e-05, + "loss": 0.4353, + "step": 20129 + }, + { + "epoch": 0.34783660492120544, + "grad_norm": 1.4822038920242726, + "learning_rate": 1.5153645090829115e-05, + "loss": 0.3781, + "step": 20130 + }, + { + "epoch": 0.34785388443461435, + "grad_norm": 0.8815413581490994, + "learning_rate": 1.5153165479296826e-05, + "loss": 0.4093, + "step": 20131 + }, + { + "epoch": 0.3478711639480232, + "grad_norm": 0.9113829276946978, + "learning_rate": 1.5152685851624432e-05, + "loss": 0.5507, + "step": 20132 + }, + { + "epoch": 0.3478884434614321, + "grad_norm": 0.7750114419976625, + "learning_rate": 1.5152206207813435e-05, + "loss": 0.4463, + "step": 20133 + }, + { + "epoch": 0.347905722974841, + "grad_norm": 0.8783463044219235, + "learning_rate": 1.5151726547865337e-05, + "loss": 0.3404, + "step": 20134 + }, + { + "epoch": 0.3479230024882499, + "grad_norm": 0.8706779412907795, + "learning_rate": 1.515124687178164e-05, + "loss": 0.5643, + "step": 20135 + }, + { + "epoch": 0.34794028200165883, + "grad_norm": 0.6982206453805898, + "learning_rate": 1.5150767179563848e-05, + "loss": 0.38, + "step": 20136 + }, + { + "epoch": 0.34795756151506774, + "grad_norm": 1.1789610344762589, + "learning_rate": 1.5150287471213457e-05, + "loss": 0.5112, + "step": 20137 + }, + { + "epoch": 0.34797484102847664, + "grad_norm": 0.9326438162558804, + "learning_rate": 1.5149807746731977e-05, + "loss": 0.4854, + "step": 20138 + }, + { + "epoch": 0.34799212054188555, + "grad_norm": 0.8291886145736785, + "learning_rate": 1.514932800612091e-05, + "loss": 0.4323, + "step": 20139 + }, + { + "epoch": 0.34800940005529446, + "grad_norm": 0.8570245557367272, + "learning_rate": 1.514884824938175e-05, + "loss": 0.4276, + "step": 20140 + }, + { + "epoch": 0.34802667956870337, + "grad_norm": 1.4141265359591624, + "learning_rate": 1.514836847651601e-05, + "loss": 0.4998, + "step": 20141 + }, + { + "epoch": 0.3480439590821123, + "grad_norm": 1.017175794838654, + "learning_rate": 1.514788868752519e-05, + "loss": 0.4137, + "step": 20142 + }, + { + "epoch": 0.3480612385955211, + "grad_norm": 1.0676886563790569, + "learning_rate": 1.5147408882410788e-05, + "loss": 0.484, + "step": 20143 + }, + { + "epoch": 0.34807851810893004, + "grad_norm": 0.9613671931593333, + "learning_rate": 1.5146929061174312e-05, + "loss": 0.449, + "step": 20144 + }, + { + "epoch": 0.34809579762233894, + "grad_norm": 1.098720580035972, + "learning_rate": 1.5146449223817266e-05, + "loss": 0.4968, + "step": 20145 + }, + { + "epoch": 0.34811307713574785, + "grad_norm": 1.4797529027742742, + "learning_rate": 1.5145969370341145e-05, + "loss": 0.324, + "step": 20146 + }, + { + "epoch": 0.34813035664915676, + "grad_norm": 1.3069565218698236, + "learning_rate": 1.5145489500747461e-05, + "loss": 0.4225, + "step": 20147 + }, + { + "epoch": 0.34814763616256567, + "grad_norm": 0.698175327404703, + "learning_rate": 1.5145009615037709e-05, + "loss": 0.4441, + "step": 20148 + }, + { + "epoch": 0.3481649156759746, + "grad_norm": 0.7965662324705699, + "learning_rate": 1.5144529713213398e-05, + "loss": 0.6722, + "step": 20149 + }, + { + "epoch": 0.3481821951893835, + "grad_norm": 1.0892346459697129, + "learning_rate": 1.514404979527603e-05, + "loss": 0.5349, + "step": 20150 + }, + { + "epoch": 0.3481994747027924, + "grad_norm": 0.8494459582057091, + "learning_rate": 1.5143569861227106e-05, + "loss": 0.4271, + "step": 20151 + }, + { + "epoch": 0.3482167542162013, + "grad_norm": 1.4663100228096773, + "learning_rate": 1.5143089911068127e-05, + "loss": 0.3649, + "step": 20152 + }, + { + "epoch": 0.34823403372961015, + "grad_norm": 0.8036559780100903, + "learning_rate": 1.5142609944800603e-05, + "loss": 0.7252, + "step": 20153 + }, + { + "epoch": 0.34825131324301906, + "grad_norm": 1.17049420705558, + "learning_rate": 1.5142129962426032e-05, + "loss": 0.7096, + "step": 20154 + }, + { + "epoch": 0.34826859275642796, + "grad_norm": 1.1219446153256052, + "learning_rate": 1.5141649963945922e-05, + "loss": 0.3363, + "step": 20155 + }, + { + "epoch": 0.34828587226983687, + "grad_norm": 0.9938622344846089, + "learning_rate": 1.5141169949361773e-05, + "loss": 0.5157, + "step": 20156 + }, + { + "epoch": 0.3483031517832458, + "grad_norm": 1.3495376134810697, + "learning_rate": 1.5140689918675086e-05, + "loss": 0.5668, + "step": 20157 + }, + { + "epoch": 0.3483204312966547, + "grad_norm": 0.9595030203018144, + "learning_rate": 1.514020987188737e-05, + "loss": 0.4744, + "step": 20158 + }, + { + "epoch": 0.3483377108100636, + "grad_norm": 0.7012044088601377, + "learning_rate": 1.5139729809000127e-05, + "loss": 0.4737, + "step": 20159 + }, + { + "epoch": 0.3483549903234725, + "grad_norm": 0.7021198510244739, + "learning_rate": 1.5139249730014857e-05, + "loss": 0.3554, + "step": 20160 + }, + { + "epoch": 0.3483722698368814, + "grad_norm": 0.4613260759151114, + "learning_rate": 1.5138769634933068e-05, + "loss": 0.569, + "step": 20161 + }, + { + "epoch": 0.3483895493502903, + "grad_norm": 2.009301142183007, + "learning_rate": 1.5138289523756262e-05, + "loss": 0.544, + "step": 20162 + }, + { + "epoch": 0.3484068288636992, + "grad_norm": 1.4935806097080033, + "learning_rate": 1.5137809396485944e-05, + "loss": 0.613, + "step": 20163 + }, + { + "epoch": 0.3484241083771081, + "grad_norm": 1.114045215410246, + "learning_rate": 1.5137329253123612e-05, + "loss": 0.37, + "step": 20164 + }, + { + "epoch": 0.348441387890517, + "grad_norm": 1.6291268928981901, + "learning_rate": 1.5136849093670778e-05, + "loss": 0.7007, + "step": 20165 + }, + { + "epoch": 0.3484586674039259, + "grad_norm": 0.8832231219585177, + "learning_rate": 1.5136368918128943e-05, + "loss": 0.5145, + "step": 20166 + }, + { + "epoch": 0.3484759469173348, + "grad_norm": 0.7772637341642249, + "learning_rate": 1.5135888726499607e-05, + "loss": 0.5077, + "step": 20167 + }, + { + "epoch": 0.3484932264307437, + "grad_norm": 1.2823048068792169, + "learning_rate": 1.5135408518784281e-05, + "loss": 0.5501, + "step": 20168 + }, + { + "epoch": 0.3485105059441526, + "grad_norm": 1.0892671575518797, + "learning_rate": 1.5134928294984462e-05, + "loss": 0.3294, + "step": 20169 + }, + { + "epoch": 0.3485277854575615, + "grad_norm": 1.1350709976016577, + "learning_rate": 1.5134448055101658e-05, + "loss": 0.4898, + "step": 20170 + }, + { + "epoch": 0.34854506497097043, + "grad_norm": 1.1040025336626411, + "learning_rate": 1.5133967799137375e-05, + "loss": 0.3373, + "step": 20171 + }, + { + "epoch": 0.34856234448437934, + "grad_norm": 1.487396283301959, + "learning_rate": 1.5133487527093109e-05, + "loss": 0.4206, + "step": 20172 + }, + { + "epoch": 0.34857962399778825, + "grad_norm": 1.1451559080279083, + "learning_rate": 1.5133007238970374e-05, + "loss": 0.4722, + "step": 20173 + }, + { + "epoch": 0.3485969035111971, + "grad_norm": 0.8215643754930908, + "learning_rate": 1.513252693477067e-05, + "loss": 0.6512, + "step": 20174 + }, + { + "epoch": 0.348614183024606, + "grad_norm": 1.195282301138609, + "learning_rate": 1.5132046614495499e-05, + "loss": 0.3377, + "step": 20175 + }, + { + "epoch": 0.3486314625380149, + "grad_norm": 1.0973770395022124, + "learning_rate": 1.5131566278146373e-05, + "loss": 0.7673, + "step": 20176 + }, + { + "epoch": 0.3486487420514238, + "grad_norm": 0.6169917525170738, + "learning_rate": 1.5131085925724787e-05, + "loss": 0.376, + "step": 20177 + }, + { + "epoch": 0.34866602156483273, + "grad_norm": 0.969862815831669, + "learning_rate": 1.5130605557232252e-05, + "loss": 0.3191, + "step": 20178 + }, + { + "epoch": 0.34868330107824164, + "grad_norm": 1.3929160664817073, + "learning_rate": 1.5130125172670271e-05, + "loss": 0.5423, + "step": 20179 + }, + { + "epoch": 0.34870058059165054, + "grad_norm": 0.8320521940363081, + "learning_rate": 1.5129644772040347e-05, + "loss": 0.7505, + "step": 20180 + }, + { + "epoch": 0.34871786010505945, + "grad_norm": 0.8060412112741346, + "learning_rate": 1.5129164355343986e-05, + "loss": 0.5694, + "step": 20181 + }, + { + "epoch": 0.34873513961846836, + "grad_norm": 1.4646136538379417, + "learning_rate": 1.5128683922582692e-05, + "loss": 0.3793, + "step": 20182 + }, + { + "epoch": 0.34875241913187727, + "grad_norm": 0.9057352306202622, + "learning_rate": 1.512820347375797e-05, + "loss": 0.5973, + "step": 20183 + }, + { + "epoch": 0.3487696986452862, + "grad_norm": 0.8348432664000723, + "learning_rate": 1.5127723008871327e-05, + "loss": 0.5359, + "step": 20184 + }, + { + "epoch": 0.348786978158695, + "grad_norm": 1.6729177158880242, + "learning_rate": 1.5127242527924262e-05, + "loss": 0.5032, + "step": 20185 + }, + { + "epoch": 0.34880425767210393, + "grad_norm": 1.5500612599117878, + "learning_rate": 1.5126762030918286e-05, + "loss": 0.6099, + "step": 20186 + }, + { + "epoch": 0.34882153718551284, + "grad_norm": 0.9288190975603687, + "learning_rate": 1.5126281517854905e-05, + "loss": 0.4602, + "step": 20187 + }, + { + "epoch": 0.34883881669892175, + "grad_norm": 1.0825883561525953, + "learning_rate": 1.5125800988735617e-05, + "loss": 0.4366, + "step": 20188 + }, + { + "epoch": 0.34885609621233066, + "grad_norm": 1.8751310757389386, + "learning_rate": 1.512532044356193e-05, + "loss": 0.514, + "step": 20189 + }, + { + "epoch": 0.34887337572573957, + "grad_norm": 0.9764547978603814, + "learning_rate": 1.5124839882335352e-05, + "loss": 0.5804, + "step": 20190 + }, + { + "epoch": 0.3488906552391485, + "grad_norm": 1.1784723803632533, + "learning_rate": 1.5124359305057386e-05, + "loss": 0.3707, + "step": 20191 + }, + { + "epoch": 0.3489079347525574, + "grad_norm": 0.4626567032891688, + "learning_rate": 1.5123878711729533e-05, + "loss": 0.5799, + "step": 20192 + }, + { + "epoch": 0.3489252142659663, + "grad_norm": 0.8853508681915742, + "learning_rate": 1.5123398102353308e-05, + "loss": 0.5518, + "step": 20193 + }, + { + "epoch": 0.3489424937793752, + "grad_norm": 1.7428847724679453, + "learning_rate": 1.5122917476930207e-05, + "loss": 0.399, + "step": 20194 + }, + { + "epoch": 0.34895977329278405, + "grad_norm": 0.5593740501535764, + "learning_rate": 1.5122436835461742e-05, + "loss": 0.4521, + "step": 20195 + }, + { + "epoch": 0.34897705280619296, + "grad_norm": 2.0407516320341506, + "learning_rate": 1.5121956177949413e-05, + "loss": 0.5366, + "step": 20196 + }, + { + "epoch": 0.34899433231960186, + "grad_norm": 1.154970771527214, + "learning_rate": 1.512147550439473e-05, + "loss": 0.4655, + "step": 20197 + }, + { + "epoch": 0.34901161183301077, + "grad_norm": 0.8021216231465051, + "learning_rate": 1.5120994814799197e-05, + "loss": 0.7263, + "step": 20198 + }, + { + "epoch": 0.3490288913464197, + "grad_norm": 0.6690547971872797, + "learning_rate": 1.5120514109164317e-05, + "loss": 0.3468, + "step": 20199 + }, + { + "epoch": 0.3490461708598286, + "grad_norm": 1.3101757986430704, + "learning_rate": 1.51200333874916e-05, + "loss": 0.3445, + "step": 20200 + }, + { + "epoch": 0.3490634503732375, + "grad_norm": 0.8944073415889727, + "learning_rate": 1.5119552649782545e-05, + "loss": 0.3857, + "step": 20201 + }, + { + "epoch": 0.3490807298866464, + "grad_norm": 0.9720815897048183, + "learning_rate": 1.5119071896038668e-05, + "loss": 0.4603, + "step": 20202 + }, + { + "epoch": 0.3490980094000553, + "grad_norm": 0.8790026031582493, + "learning_rate": 1.5118591126261465e-05, + "loss": 0.5627, + "step": 20203 + }, + { + "epoch": 0.3491152889134642, + "grad_norm": 0.7188726379754171, + "learning_rate": 1.5118110340452446e-05, + "loss": 0.4253, + "step": 20204 + }, + { + "epoch": 0.3491325684268731, + "grad_norm": 1.2363784584399498, + "learning_rate": 1.5117629538613115e-05, + "loss": 0.7854, + "step": 20205 + }, + { + "epoch": 0.349149847940282, + "grad_norm": 0.919639623487414, + "learning_rate": 1.5117148720744983e-05, + "loss": 0.3722, + "step": 20206 + }, + { + "epoch": 0.3491671274536909, + "grad_norm": 1.0580827144681015, + "learning_rate": 1.5116667886849552e-05, + "loss": 0.4828, + "step": 20207 + }, + { + "epoch": 0.3491844069670998, + "grad_norm": 0.9002297009092887, + "learning_rate": 1.5116187036928325e-05, + "loss": 0.4579, + "step": 20208 + }, + { + "epoch": 0.3492016864805087, + "grad_norm": 1.1203462786594827, + "learning_rate": 1.5115706170982814e-05, + "loss": 0.2821, + "step": 20209 + }, + { + "epoch": 0.3492189659939176, + "grad_norm": 0.8199117054974094, + "learning_rate": 1.5115225289014518e-05, + "loss": 0.3918, + "step": 20210 + }, + { + "epoch": 0.3492362455073265, + "grad_norm": 1.3883444080417613, + "learning_rate": 1.5114744391024954e-05, + "loss": 0.3785, + "step": 20211 + }, + { + "epoch": 0.3492535250207354, + "grad_norm": 0.891579555965675, + "learning_rate": 1.5114263477015616e-05, + "loss": 0.4663, + "step": 20212 + }, + { + "epoch": 0.34927080453414433, + "grad_norm": 1.0672009522958519, + "learning_rate": 1.5113782546988022e-05, + "loss": 0.7282, + "step": 20213 + }, + { + "epoch": 0.34928808404755324, + "grad_norm": 0.6475906330326913, + "learning_rate": 1.5113301600943668e-05, + "loss": 0.309, + "step": 20214 + }, + { + "epoch": 0.34930536356096215, + "grad_norm": 1.2016579626339126, + "learning_rate": 1.5112820638884067e-05, + "loss": 0.6358, + "step": 20215 + }, + { + "epoch": 0.34932264307437105, + "grad_norm": 0.9780010507880303, + "learning_rate": 1.5112339660810724e-05, + "loss": 0.5637, + "step": 20216 + }, + { + "epoch": 0.3493399225877799, + "grad_norm": 1.166481372758446, + "learning_rate": 1.511185866672514e-05, + "loss": 0.4074, + "step": 20217 + }, + { + "epoch": 0.3493572021011888, + "grad_norm": 0.9791822414155746, + "learning_rate": 1.5111377656628832e-05, + "loss": 0.423, + "step": 20218 + }, + { + "epoch": 0.3493744816145977, + "grad_norm": 1.067253680089418, + "learning_rate": 1.5110896630523298e-05, + "loss": 0.5156, + "step": 20219 + }, + { + "epoch": 0.34939176112800663, + "grad_norm": 1.0770506851381605, + "learning_rate": 1.5110415588410047e-05, + "loss": 0.4004, + "step": 20220 + }, + { + "epoch": 0.34940904064141554, + "grad_norm": 1.2270513808333798, + "learning_rate": 1.5109934530290588e-05, + "loss": 0.5858, + "step": 20221 + }, + { + "epoch": 0.34942632015482444, + "grad_norm": 1.1639570517450482, + "learning_rate": 1.5109453456166426e-05, + "loss": 0.4738, + "step": 20222 + }, + { + "epoch": 0.34944359966823335, + "grad_norm": 0.4601969033689078, + "learning_rate": 1.5108972366039063e-05, + "loss": 0.4952, + "step": 20223 + }, + { + "epoch": 0.34946087918164226, + "grad_norm": 1.067637855323236, + "learning_rate": 1.5108491259910012e-05, + "loss": 0.6906, + "step": 20224 + }, + { + "epoch": 0.34947815869505117, + "grad_norm": 0.8926247387210224, + "learning_rate": 1.510801013778078e-05, + "loss": 0.6299, + "step": 20225 + }, + { + "epoch": 0.3494954382084601, + "grad_norm": 0.6552745908411567, + "learning_rate": 1.5107528999652872e-05, + "loss": 0.5315, + "step": 20226 + }, + { + "epoch": 0.3495127177218689, + "grad_norm": 0.8790181409995029, + "learning_rate": 1.5107047845527793e-05, + "loss": 0.5679, + "step": 20227 + }, + { + "epoch": 0.34952999723527783, + "grad_norm": 1.6647497471599972, + "learning_rate": 1.5106566675407054e-05, + "loss": 0.4907, + "step": 20228 + }, + { + "epoch": 0.34954727674868674, + "grad_norm": 0.9535200980485008, + "learning_rate": 1.510608548929216e-05, + "loss": 0.5187, + "step": 20229 + }, + { + "epoch": 0.34956455626209565, + "grad_norm": 1.7504453280165564, + "learning_rate": 1.5105604287184616e-05, + "loss": 0.4536, + "step": 20230 + }, + { + "epoch": 0.34958183577550456, + "grad_norm": 1.0439298957728287, + "learning_rate": 1.5105123069085933e-05, + "loss": 0.7003, + "step": 20231 + }, + { + "epoch": 0.34959911528891346, + "grad_norm": 1.5712439476474196, + "learning_rate": 1.5104641834997615e-05, + "loss": 0.5292, + "step": 20232 + }, + { + "epoch": 0.34961639480232237, + "grad_norm": 0.9481060735137308, + "learning_rate": 1.5104160584921172e-05, + "loss": 0.4243, + "step": 20233 + }, + { + "epoch": 0.3496336743157313, + "grad_norm": 0.6550399577385915, + "learning_rate": 1.5103679318858113e-05, + "loss": 0.28, + "step": 20234 + }, + { + "epoch": 0.3496509538291402, + "grad_norm": 0.8459396742183767, + "learning_rate": 1.5103198036809939e-05, + "loss": 0.365, + "step": 20235 + }, + { + "epoch": 0.3496682333425491, + "grad_norm": 1.0879259842284763, + "learning_rate": 1.5102716738778163e-05, + "loss": 0.3436, + "step": 20236 + }, + { + "epoch": 0.349685512855958, + "grad_norm": 0.6492283982803915, + "learning_rate": 1.5102235424764289e-05, + "loss": 0.4156, + "step": 20237 + }, + { + "epoch": 0.34970279236936685, + "grad_norm": 1.2307348180122966, + "learning_rate": 1.5101754094769825e-05, + "loss": 0.5467, + "step": 20238 + }, + { + "epoch": 0.34972007188277576, + "grad_norm": 0.9876601535069951, + "learning_rate": 1.5101272748796283e-05, + "loss": 0.583, + "step": 20239 + }, + { + "epoch": 0.34973735139618467, + "grad_norm": 1.188651095081443, + "learning_rate": 1.5100791386845161e-05, + "loss": 0.3509, + "step": 20240 + }, + { + "epoch": 0.3497546309095936, + "grad_norm": 0.9455889858224926, + "learning_rate": 1.510031000891798e-05, + "loss": 0.4954, + "step": 20241 + }, + { + "epoch": 0.3497719104230025, + "grad_norm": 1.117715932432754, + "learning_rate": 1.5099828615016234e-05, + "loss": 0.5597, + "step": 20242 + }, + { + "epoch": 0.3497891899364114, + "grad_norm": 0.9251644744567703, + "learning_rate": 1.5099347205141444e-05, + "loss": 0.4728, + "step": 20243 + }, + { + "epoch": 0.3498064694498203, + "grad_norm": 1.3338303066747266, + "learning_rate": 1.5098865779295104e-05, + "loss": 0.498, + "step": 20244 + }, + { + "epoch": 0.3498237489632292, + "grad_norm": 0.7836538541418077, + "learning_rate": 1.5098384337478732e-05, + "loss": 0.367, + "step": 20245 + }, + { + "epoch": 0.3498410284766381, + "grad_norm": 0.6540915475363721, + "learning_rate": 1.5097902879693833e-05, + "loss": 0.4283, + "step": 20246 + }, + { + "epoch": 0.349858307990047, + "grad_norm": 1.3070718638998466, + "learning_rate": 1.5097421405941912e-05, + "loss": 0.5089, + "step": 20247 + }, + { + "epoch": 0.3498755875034559, + "grad_norm": 2.243642827645055, + "learning_rate": 1.5096939916224486e-05, + "loss": 0.4482, + "step": 20248 + }, + { + "epoch": 0.3498928670168648, + "grad_norm": 1.3426816134529704, + "learning_rate": 1.5096458410543051e-05, + "loss": 0.6851, + "step": 20249 + }, + { + "epoch": 0.3499101465302737, + "grad_norm": 0.5106861714550563, + "learning_rate": 1.5095976888899125e-05, + "loss": 0.6102, + "step": 20250 + }, + { + "epoch": 0.3499274260436826, + "grad_norm": 0.7830046823912327, + "learning_rate": 1.5095495351294209e-05, + "loss": 0.4837, + "step": 20251 + }, + { + "epoch": 0.3499447055570915, + "grad_norm": 0.8249312738955501, + "learning_rate": 1.5095013797729813e-05, + "loss": 0.4168, + "step": 20252 + }, + { + "epoch": 0.3499619850705004, + "grad_norm": 1.2214983083898967, + "learning_rate": 1.5094532228207452e-05, + "loss": 0.5467, + "step": 20253 + }, + { + "epoch": 0.3499792645839093, + "grad_norm": 1.5701991875110979, + "learning_rate": 1.5094050642728626e-05, + "loss": 0.5365, + "step": 20254 + }, + { + "epoch": 0.34999654409731823, + "grad_norm": 1.442762716088335, + "learning_rate": 1.5093569041294844e-05, + "loss": 0.5411, + "step": 20255 + }, + { + "epoch": 0.35001382361072714, + "grad_norm": 0.592544622496863, + "learning_rate": 1.509308742390762e-05, + "loss": 0.2627, + "step": 20256 + }, + { + "epoch": 0.35003110312413604, + "grad_norm": 1.360873387672361, + "learning_rate": 1.5092605790568458e-05, + "loss": 0.4753, + "step": 20257 + }, + { + "epoch": 0.35004838263754495, + "grad_norm": 1.2258264981511566, + "learning_rate": 1.5092124141278866e-05, + "loss": 0.4074, + "step": 20258 + }, + { + "epoch": 0.3500656621509538, + "grad_norm": 1.5097359151639946, + "learning_rate": 1.5091642476040358e-05, + "loss": 0.4043, + "step": 20259 + }, + { + "epoch": 0.3500829416643627, + "grad_norm": 1.5237958768713877, + "learning_rate": 1.5091160794854436e-05, + "loss": 0.4394, + "step": 20260 + }, + { + "epoch": 0.3501002211777716, + "grad_norm": 1.1009868868674062, + "learning_rate": 1.5090679097722612e-05, + "loss": 0.5988, + "step": 20261 + }, + { + "epoch": 0.3501175006911805, + "grad_norm": 1.5288716350601712, + "learning_rate": 1.5090197384646394e-05, + "loss": 0.5662, + "step": 20262 + }, + { + "epoch": 0.35013478020458944, + "grad_norm": 1.3773640136767153, + "learning_rate": 1.5089715655627293e-05, + "loss": 0.4953, + "step": 20263 + }, + { + "epoch": 0.35015205971799834, + "grad_norm": 0.9198234692322552, + "learning_rate": 1.5089233910666812e-05, + "loss": 0.3603, + "step": 20264 + }, + { + "epoch": 0.35016933923140725, + "grad_norm": 1.0235177164812501, + "learning_rate": 1.5088752149766466e-05, + "loss": 0.4003, + "step": 20265 + }, + { + "epoch": 0.35018661874481616, + "grad_norm": 0.815880552386645, + "learning_rate": 1.5088270372927762e-05, + "loss": 0.35, + "step": 20266 + }, + { + "epoch": 0.35020389825822507, + "grad_norm": 1.2323547646401352, + "learning_rate": 1.5087788580152207e-05, + "loss": 0.4971, + "step": 20267 + }, + { + "epoch": 0.350221177771634, + "grad_norm": 0.8951556609370912, + "learning_rate": 1.508730677144131e-05, + "loss": 0.4808, + "step": 20268 + }, + { + "epoch": 0.3502384572850428, + "grad_norm": 1.053677359727644, + "learning_rate": 1.5086824946796586e-05, + "loss": 0.3373, + "step": 20269 + }, + { + "epoch": 0.35025573679845173, + "grad_norm": 0.7594227762314983, + "learning_rate": 1.5086343106219538e-05, + "loss": 0.4976, + "step": 20270 + }, + { + "epoch": 0.35027301631186064, + "grad_norm": 0.7138853233299913, + "learning_rate": 1.5085861249711675e-05, + "loss": 0.8437, + "step": 20271 + }, + { + "epoch": 0.35029029582526955, + "grad_norm": 1.4790560408744404, + "learning_rate": 1.508537937727451e-05, + "loss": 0.4219, + "step": 20272 + }, + { + "epoch": 0.35030757533867846, + "grad_norm": 0.7170788437003753, + "learning_rate": 1.5084897488909549e-05, + "loss": 0.5245, + "step": 20273 + }, + { + "epoch": 0.35032485485208736, + "grad_norm": 1.2538589839251675, + "learning_rate": 1.5084415584618303e-05, + "loss": 0.4735, + "step": 20274 + }, + { + "epoch": 0.35034213436549627, + "grad_norm": 0.5010044172202707, + "learning_rate": 1.508393366440228e-05, + "loss": 0.5885, + "step": 20275 + }, + { + "epoch": 0.3503594138789052, + "grad_norm": 2.024373259462249, + "learning_rate": 1.5083451728262995e-05, + "loss": 0.4602, + "step": 20276 + }, + { + "epoch": 0.3503766933923141, + "grad_norm": 1.2102344322030154, + "learning_rate": 1.5082969776201948e-05, + "loss": 0.627, + "step": 20277 + }, + { + "epoch": 0.350393972905723, + "grad_norm": 1.0893260801149087, + "learning_rate": 1.5082487808220654e-05, + "loss": 0.5297, + "step": 20278 + }, + { + "epoch": 0.3504112524191319, + "grad_norm": 1.756344509923082, + "learning_rate": 1.5082005824320625e-05, + "loss": 0.4716, + "step": 20279 + }, + { + "epoch": 0.35042853193254075, + "grad_norm": 1.118026629531127, + "learning_rate": 1.5081523824503364e-05, + "loss": 0.5562, + "step": 20280 + }, + { + "epoch": 0.35044581144594966, + "grad_norm": 0.6259869051469236, + "learning_rate": 1.5081041808770388e-05, + "loss": 0.396, + "step": 20281 + }, + { + "epoch": 0.35046309095935857, + "grad_norm": 0.6995632854928601, + "learning_rate": 1.5080559777123199e-05, + "loss": 0.4724, + "step": 20282 + }, + { + "epoch": 0.3504803704727675, + "grad_norm": 1.163852903399435, + "learning_rate": 1.5080077729563311e-05, + "loss": 0.4473, + "step": 20283 + }, + { + "epoch": 0.3504976499861764, + "grad_norm": 0.9400890788930155, + "learning_rate": 1.5079595666092232e-05, + "loss": 0.4947, + "step": 20284 + }, + { + "epoch": 0.3505149294995853, + "grad_norm": 0.7304320626469605, + "learning_rate": 1.5079113586711477e-05, + "loss": 0.4181, + "step": 20285 + }, + { + "epoch": 0.3505322090129942, + "grad_norm": 0.7214358561288422, + "learning_rate": 1.5078631491422552e-05, + "loss": 0.5846, + "step": 20286 + }, + { + "epoch": 0.3505494885264031, + "grad_norm": 1.0147518483966402, + "learning_rate": 1.5078149380226967e-05, + "loss": 0.3573, + "step": 20287 + }, + { + "epoch": 0.350566768039812, + "grad_norm": 1.759879727215666, + "learning_rate": 1.5077667253126231e-05, + "loss": 0.5237, + "step": 20288 + }, + { + "epoch": 0.3505840475532209, + "grad_norm": 0.8762650328167694, + "learning_rate": 1.5077185110121857e-05, + "loss": 0.7282, + "step": 20289 + }, + { + "epoch": 0.35060132706662983, + "grad_norm": 1.1054343784768033, + "learning_rate": 1.5076702951215352e-05, + "loss": 0.5103, + "step": 20290 + }, + { + "epoch": 0.3506186065800387, + "grad_norm": 1.2411350267419412, + "learning_rate": 1.5076220776408227e-05, + "loss": 0.4614, + "step": 20291 + }, + { + "epoch": 0.3506358860934476, + "grad_norm": 1.015070231163087, + "learning_rate": 1.5075738585701993e-05, + "loss": 0.5707, + "step": 20292 + }, + { + "epoch": 0.3506531656068565, + "grad_norm": 1.1627705219235531, + "learning_rate": 1.507525637909816e-05, + "loss": 0.5176, + "step": 20293 + }, + { + "epoch": 0.3506704451202654, + "grad_norm": 0.9066263228135879, + "learning_rate": 1.5074774156598242e-05, + "loss": 0.4096, + "step": 20294 + }, + { + "epoch": 0.3506877246336743, + "grad_norm": 1.0957515955654458, + "learning_rate": 1.5074291918203741e-05, + "loss": 0.3251, + "step": 20295 + }, + { + "epoch": 0.3507050041470832, + "grad_norm": 1.1792973894164145, + "learning_rate": 1.5073809663916175e-05, + "loss": 0.5924, + "step": 20296 + }, + { + "epoch": 0.35072228366049213, + "grad_norm": 0.7097688657540071, + "learning_rate": 1.5073327393737049e-05, + "loss": 0.9484, + "step": 20297 + }, + { + "epoch": 0.35073956317390104, + "grad_norm": 0.8871036625215927, + "learning_rate": 1.5072845107667875e-05, + "loss": 0.6, + "step": 20298 + }, + { + "epoch": 0.35075684268730994, + "grad_norm": 1.4488314854574622, + "learning_rate": 1.5072362805710168e-05, + "loss": 0.3712, + "step": 20299 + }, + { + "epoch": 0.35077412220071885, + "grad_norm": 0.985039999394801, + "learning_rate": 1.5071880487865435e-05, + "loss": 0.5483, + "step": 20300 + }, + { + "epoch": 0.3507914017141277, + "grad_norm": 0.5800944212079348, + "learning_rate": 1.5071398154135186e-05, + "loss": 0.3728, + "step": 20301 + }, + { + "epoch": 0.3508086812275366, + "grad_norm": 1.2105287543260832, + "learning_rate": 1.5070915804520932e-05, + "loss": 0.338, + "step": 20302 + }, + { + "epoch": 0.3508259607409455, + "grad_norm": 1.3401090476823834, + "learning_rate": 1.5070433439024186e-05, + "loss": 0.4326, + "step": 20303 + }, + { + "epoch": 0.3508432402543544, + "grad_norm": 0.8058434467573088, + "learning_rate": 1.5069951057646454e-05, + "loss": 0.4543, + "step": 20304 + }, + { + "epoch": 0.35086051976776333, + "grad_norm": 0.7266544281626938, + "learning_rate": 1.5069468660389252e-05, + "loss": 0.3958, + "step": 20305 + }, + { + "epoch": 0.35087779928117224, + "grad_norm": 0.5446674188260694, + "learning_rate": 1.5068986247254089e-05, + "loss": 0.2996, + "step": 20306 + }, + { + "epoch": 0.35089507879458115, + "grad_norm": 0.9353872619228137, + "learning_rate": 1.5068503818242475e-05, + "loss": 0.3701, + "step": 20307 + }, + { + "epoch": 0.35091235830799006, + "grad_norm": 1.0600282243272117, + "learning_rate": 1.5068021373355924e-05, + "loss": 0.5511, + "step": 20308 + }, + { + "epoch": 0.35092963782139897, + "grad_norm": 1.0875888374256208, + "learning_rate": 1.5067538912595942e-05, + "loss": 0.5591, + "step": 20309 + }, + { + "epoch": 0.3509469173348079, + "grad_norm": 1.650869998469292, + "learning_rate": 1.5067056435964044e-05, + "loss": 0.5581, + "step": 20310 + }, + { + "epoch": 0.3509641968482168, + "grad_norm": 1.4399327519307985, + "learning_rate": 1.5066573943461741e-05, + "loss": 0.374, + "step": 20311 + }, + { + "epoch": 0.35098147636162563, + "grad_norm": 0.8629896978747903, + "learning_rate": 1.5066091435090542e-05, + "loss": 0.4471, + "step": 20312 + }, + { + "epoch": 0.35099875587503454, + "grad_norm": 0.8501150108359858, + "learning_rate": 1.5065608910851962e-05, + "loss": 0.4476, + "step": 20313 + }, + { + "epoch": 0.35101603538844345, + "grad_norm": 1.0430848492872342, + "learning_rate": 1.5065126370747509e-05, + "loss": 0.5255, + "step": 20314 + }, + { + "epoch": 0.35103331490185236, + "grad_norm": 1.197791444694849, + "learning_rate": 1.5064643814778692e-05, + "loss": 0.61, + "step": 20315 + }, + { + "epoch": 0.35105059441526126, + "grad_norm": 0.408976565695153, + "learning_rate": 1.5064161242947029e-05, + "loss": 0.5868, + "step": 20316 + }, + { + "epoch": 0.35106787392867017, + "grad_norm": 1.0472350957944991, + "learning_rate": 1.506367865525403e-05, + "loss": 0.6485, + "step": 20317 + }, + { + "epoch": 0.3510851534420791, + "grad_norm": 0.8408506924413888, + "learning_rate": 1.50631960517012e-05, + "loss": 0.3956, + "step": 20318 + }, + { + "epoch": 0.351102432955488, + "grad_norm": 0.8635771869648043, + "learning_rate": 1.5062713432290057e-05, + "loss": 0.364, + "step": 20319 + }, + { + "epoch": 0.3511197124688969, + "grad_norm": 0.7521546774270265, + "learning_rate": 1.5062230797022111e-05, + "loss": 0.5706, + "step": 20320 + }, + { + "epoch": 0.3511369919823058, + "grad_norm": 0.4830319786668684, + "learning_rate": 1.5061748145898874e-05, + "loss": 0.6771, + "step": 20321 + }, + { + "epoch": 0.35115427149571465, + "grad_norm": 1.0349345098885403, + "learning_rate": 1.5061265478921858e-05, + "loss": 0.7203, + "step": 20322 + }, + { + "epoch": 0.35117155100912356, + "grad_norm": 0.6109564510767002, + "learning_rate": 1.5060782796092573e-05, + "loss": 0.3116, + "step": 20323 + }, + { + "epoch": 0.35118883052253247, + "grad_norm": 1.6165015238526959, + "learning_rate": 1.506030009741253e-05, + "loss": 0.5373, + "step": 20324 + }, + { + "epoch": 0.3512061100359414, + "grad_norm": 0.5253539657379728, + "learning_rate": 1.5059817382883244e-05, + "loss": 0.4509, + "step": 20325 + }, + { + "epoch": 0.3512233895493503, + "grad_norm": 0.5763699666960953, + "learning_rate": 1.5059334652506225e-05, + "loss": 0.8545, + "step": 20326 + }, + { + "epoch": 0.3512406690627592, + "grad_norm": 0.8941626093687108, + "learning_rate": 1.5058851906282983e-05, + "loss": 0.426, + "step": 20327 + }, + { + "epoch": 0.3512579485761681, + "grad_norm": 1.244982900979936, + "learning_rate": 1.5058369144215037e-05, + "loss": 0.5041, + "step": 20328 + }, + { + "epoch": 0.351275228089577, + "grad_norm": 0.6404997866629699, + "learning_rate": 1.5057886366303893e-05, + "loss": 0.2905, + "step": 20329 + }, + { + "epoch": 0.3512925076029859, + "grad_norm": 0.9779063290884707, + "learning_rate": 1.5057403572551064e-05, + "loss": 0.4497, + "step": 20330 + }, + { + "epoch": 0.3513097871163948, + "grad_norm": 1.1446568872979819, + "learning_rate": 1.5056920762958062e-05, + "loss": 0.5744, + "step": 20331 + }, + { + "epoch": 0.35132706662980373, + "grad_norm": 1.1372522689238072, + "learning_rate": 1.50564379375264e-05, + "loss": 0.4038, + "step": 20332 + }, + { + "epoch": 0.3513443461432126, + "grad_norm": 0.9180371588286776, + "learning_rate": 1.5055955096257591e-05, + "loss": 0.316, + "step": 20333 + }, + { + "epoch": 0.3513616256566215, + "grad_norm": 0.7697901497638526, + "learning_rate": 1.5055472239153146e-05, + "loss": 0.4445, + "step": 20334 + }, + { + "epoch": 0.3513789051700304, + "grad_norm": 1.286192216176821, + "learning_rate": 1.5054989366214577e-05, + "loss": 0.4445, + "step": 20335 + }, + { + "epoch": 0.3513961846834393, + "grad_norm": 0.9779726700334839, + "learning_rate": 1.5054506477443398e-05, + "loss": 0.528, + "step": 20336 + }, + { + "epoch": 0.3514134641968482, + "grad_norm": 0.9236123769821347, + "learning_rate": 1.505402357284112e-05, + "loss": 0.3853, + "step": 20337 + }, + { + "epoch": 0.3514307437102571, + "grad_norm": 0.8418223462539401, + "learning_rate": 1.5053540652409255e-05, + "loss": 0.2919, + "step": 20338 + }, + { + "epoch": 0.35144802322366603, + "grad_norm": 1.5429463149526836, + "learning_rate": 1.505305771614932e-05, + "loss": 0.5276, + "step": 20339 + }, + { + "epoch": 0.35146530273707494, + "grad_norm": 1.4238552121972414, + "learning_rate": 1.5052574764062822e-05, + "loss": 0.4985, + "step": 20340 + }, + { + "epoch": 0.35148258225048384, + "grad_norm": 0.9306827891014668, + "learning_rate": 1.5052091796151276e-05, + "loss": 0.3684, + "step": 20341 + }, + { + "epoch": 0.35149986176389275, + "grad_norm": 1.1489451672500184, + "learning_rate": 1.5051608812416193e-05, + "loss": 0.4721, + "step": 20342 + }, + { + "epoch": 0.3515171412773016, + "grad_norm": 1.182924617623958, + "learning_rate": 1.5051125812859094e-05, + "loss": 0.3451, + "step": 20343 + }, + { + "epoch": 0.3515344207907105, + "grad_norm": 0.935298493194416, + "learning_rate": 1.5050642797481477e-05, + "loss": 0.6049, + "step": 20344 + }, + { + "epoch": 0.3515517003041194, + "grad_norm": 0.7042561552734771, + "learning_rate": 1.5050159766284864e-05, + "loss": 0.4927, + "step": 20345 + }, + { + "epoch": 0.3515689798175283, + "grad_norm": 1.1112498698540825, + "learning_rate": 1.5049676719270769e-05, + "loss": 0.3257, + "step": 20346 + }, + { + "epoch": 0.35158625933093723, + "grad_norm": 0.8947461256313307, + "learning_rate": 1.5049193656440704e-05, + "loss": 0.4463, + "step": 20347 + }, + { + "epoch": 0.35160353884434614, + "grad_norm": 1.315902466324806, + "learning_rate": 1.5048710577796177e-05, + "loss": 0.6338, + "step": 20348 + }, + { + "epoch": 0.35162081835775505, + "grad_norm": 1.3848853276848108, + "learning_rate": 1.5048227483338709e-05, + "loss": 0.4213, + "step": 20349 + }, + { + "epoch": 0.35163809787116396, + "grad_norm": 0.8066605477910086, + "learning_rate": 1.5047744373069806e-05, + "loss": 0.6533, + "step": 20350 + }, + { + "epoch": 0.35165537738457286, + "grad_norm": 1.0309079162079307, + "learning_rate": 1.5047261246990985e-05, + "loss": 0.4661, + "step": 20351 + }, + { + "epoch": 0.3516726568979818, + "grad_norm": 0.8320385886820804, + "learning_rate": 1.504677810510376e-05, + "loss": 0.4696, + "step": 20352 + }, + { + "epoch": 0.3516899364113907, + "grad_norm": 0.6295731009666424, + "learning_rate": 1.5046294947409639e-05, + "loss": 0.2908, + "step": 20353 + }, + { + "epoch": 0.35170721592479953, + "grad_norm": 0.952492777500314, + "learning_rate": 1.5045811773910139e-05, + "loss": 0.2813, + "step": 20354 + }, + { + "epoch": 0.35172449543820844, + "grad_norm": 0.6487934414891685, + "learning_rate": 1.5045328584606775e-05, + "loss": 0.409, + "step": 20355 + }, + { + "epoch": 0.35174177495161735, + "grad_norm": 1.1811986502450897, + "learning_rate": 1.5044845379501059e-05, + "loss": 0.4059, + "step": 20356 + }, + { + "epoch": 0.35175905446502626, + "grad_norm": 0.6244922893832745, + "learning_rate": 1.50443621585945e-05, + "loss": 0.6577, + "step": 20357 + }, + { + "epoch": 0.35177633397843516, + "grad_norm": 0.8632625890448539, + "learning_rate": 1.504387892188862e-05, + "loss": 0.4034, + "step": 20358 + }, + { + "epoch": 0.35179361349184407, + "grad_norm": 0.9174173375907175, + "learning_rate": 1.5043395669384926e-05, + "loss": 0.4305, + "step": 20359 + }, + { + "epoch": 0.351810893005253, + "grad_norm": 1.4405167180467386, + "learning_rate": 1.5042912401084933e-05, + "loss": 0.343, + "step": 20360 + }, + { + "epoch": 0.3518281725186619, + "grad_norm": 0.8926029702166562, + "learning_rate": 1.5042429116990154e-05, + "loss": 0.485, + "step": 20361 + }, + { + "epoch": 0.3518454520320708, + "grad_norm": 0.4235375221556237, + "learning_rate": 1.5041945817102109e-05, + "loss": 0.59, + "step": 20362 + }, + { + "epoch": 0.3518627315454797, + "grad_norm": 0.8335299519878299, + "learning_rate": 1.5041462501422301e-05, + "loss": 0.3946, + "step": 20363 + }, + { + "epoch": 0.3518800110588886, + "grad_norm": 1.189425678812305, + "learning_rate": 1.504097916995225e-05, + "loss": 0.5956, + "step": 20364 + }, + { + "epoch": 0.35189729057229746, + "grad_norm": 1.028466854766274, + "learning_rate": 1.5040495822693473e-05, + "loss": 0.5743, + "step": 20365 + }, + { + "epoch": 0.35191457008570637, + "grad_norm": 1.102707967055327, + "learning_rate": 1.5040012459647475e-05, + "loss": 0.5091, + "step": 20366 + }, + { + "epoch": 0.3519318495991153, + "grad_norm": 0.46190764573042, + "learning_rate": 1.5039529080815776e-05, + "loss": 0.8288, + "step": 20367 + }, + { + "epoch": 0.3519491291125242, + "grad_norm": 1.2893622337620598, + "learning_rate": 1.503904568619989e-05, + "loss": 0.6136, + "step": 20368 + }, + { + "epoch": 0.3519664086259331, + "grad_norm": 1.4920540781186702, + "learning_rate": 1.5038562275801332e-05, + "loss": 0.4308, + "step": 20369 + }, + { + "epoch": 0.351983688139342, + "grad_norm": 0.3849840912516157, + "learning_rate": 1.5038078849621611e-05, + "loss": 0.534, + "step": 20370 + }, + { + "epoch": 0.3520009676527509, + "grad_norm": 1.4434955518478345, + "learning_rate": 1.5037595407662244e-05, + "loss": 0.6431, + "step": 20371 + }, + { + "epoch": 0.3520182471661598, + "grad_norm": 0.6009812707112726, + "learning_rate": 1.5037111949924747e-05, + "loss": 0.3738, + "step": 20372 + }, + { + "epoch": 0.3520355266795687, + "grad_norm": 1.4009903166131816, + "learning_rate": 1.5036628476410634e-05, + "loss": 0.5168, + "step": 20373 + }, + { + "epoch": 0.35205280619297763, + "grad_norm": 0.9125147644079452, + "learning_rate": 1.5036144987121418e-05, + "loss": 0.4133, + "step": 20374 + }, + { + "epoch": 0.3520700857063865, + "grad_norm": 1.2835424406279983, + "learning_rate": 1.5035661482058608e-05, + "loss": 0.5862, + "step": 20375 + }, + { + "epoch": 0.3520873652197954, + "grad_norm": 0.7805424745594112, + "learning_rate": 1.5035177961223727e-05, + "loss": 0.4075, + "step": 20376 + }, + { + "epoch": 0.3521046447332043, + "grad_norm": 0.8165184323807018, + "learning_rate": 1.5034694424618286e-05, + "loss": 0.5831, + "step": 20377 + }, + { + "epoch": 0.3521219242466132, + "grad_norm": 1.1945384858893946, + "learning_rate": 1.5034210872243797e-05, + "loss": 0.5663, + "step": 20378 + }, + { + "epoch": 0.3521392037600221, + "grad_norm": 1.2012337053601563, + "learning_rate": 1.5033727304101783e-05, + "loss": 0.4889, + "step": 20379 + }, + { + "epoch": 0.352156483273431, + "grad_norm": 1.2057676963835862, + "learning_rate": 1.5033243720193746e-05, + "loss": 0.587, + "step": 20380 + }, + { + "epoch": 0.35217376278683993, + "grad_norm": 1.1218662729589781, + "learning_rate": 1.5032760120521211e-05, + "loss": 0.4473, + "step": 20381 + }, + { + "epoch": 0.35219104230024884, + "grad_norm": 0.8685104883874746, + "learning_rate": 1.5032276505085688e-05, + "loss": 0.5008, + "step": 20382 + }, + { + "epoch": 0.35220832181365774, + "grad_norm": 0.9246065056389663, + "learning_rate": 1.5031792873888693e-05, + "loss": 0.6006, + "step": 20383 + }, + { + "epoch": 0.35222560132706665, + "grad_norm": 1.2338012159345995, + "learning_rate": 1.5031309226931739e-05, + "loss": 0.558, + "step": 20384 + }, + { + "epoch": 0.35224288084047556, + "grad_norm": 0.4248803731829133, + "learning_rate": 1.5030825564216343e-05, + "loss": 0.5146, + "step": 20385 + }, + { + "epoch": 0.3522601603538844, + "grad_norm": 0.5002202802015647, + "learning_rate": 1.503034188574402e-05, + "loss": 0.5207, + "step": 20386 + }, + { + "epoch": 0.3522774398672933, + "grad_norm": 0.9604735984379457, + "learning_rate": 1.502985819151628e-05, + "loss": 0.4239, + "step": 20387 + }, + { + "epoch": 0.3522947193807022, + "grad_norm": 1.0566381699430012, + "learning_rate": 1.5029374481534644e-05, + "loss": 0.459, + "step": 20388 + }, + { + "epoch": 0.35231199889411113, + "grad_norm": 0.9910080230566443, + "learning_rate": 1.5028890755800626e-05, + "loss": 0.4823, + "step": 20389 + }, + { + "epoch": 0.35232927840752004, + "grad_norm": 0.8999514397475636, + "learning_rate": 1.5028407014315739e-05, + "loss": 0.7046, + "step": 20390 + }, + { + "epoch": 0.35234655792092895, + "grad_norm": 1.1802474690024476, + "learning_rate": 1.5027923257081498e-05, + "loss": 0.7512, + "step": 20391 + }, + { + "epoch": 0.35236383743433786, + "grad_norm": 1.8947358418901357, + "learning_rate": 1.5027439484099424e-05, + "loss": 0.4131, + "step": 20392 + }, + { + "epoch": 0.35238111694774676, + "grad_norm": 0.8347504972738928, + "learning_rate": 1.5026955695371024e-05, + "loss": 0.519, + "step": 20393 + }, + { + "epoch": 0.35239839646115567, + "grad_norm": 0.9507238367872246, + "learning_rate": 1.5026471890897817e-05, + "loss": 0.4143, + "step": 20394 + }, + { + "epoch": 0.3524156759745646, + "grad_norm": 1.022736184367532, + "learning_rate": 1.5025988070681317e-05, + "loss": 0.5956, + "step": 20395 + }, + { + "epoch": 0.35243295548797343, + "grad_norm": 1.2228063059615855, + "learning_rate": 1.5025504234723041e-05, + "loss": 0.6132, + "step": 20396 + }, + { + "epoch": 0.35245023500138234, + "grad_norm": 0.9336625445218635, + "learning_rate": 1.5025020383024505e-05, + "loss": 0.4346, + "step": 20397 + }, + { + "epoch": 0.35246751451479125, + "grad_norm": 1.1064651152347105, + "learning_rate": 1.502453651558722e-05, + "loss": 0.6002, + "step": 20398 + }, + { + "epoch": 0.35248479402820015, + "grad_norm": 0.9147672127186778, + "learning_rate": 1.5024052632412707e-05, + "loss": 0.3478, + "step": 20399 + }, + { + "epoch": 0.35250207354160906, + "grad_norm": 1.175656746585799, + "learning_rate": 1.502356873350248e-05, + "loss": 0.3123, + "step": 20400 + }, + { + "epoch": 0.35251935305501797, + "grad_norm": 1.056510149133773, + "learning_rate": 1.5023084818858053e-05, + "loss": 0.5875, + "step": 20401 + }, + { + "epoch": 0.3525366325684269, + "grad_norm": 0.6750576688425449, + "learning_rate": 1.5022600888480945e-05, + "loss": 0.4109, + "step": 20402 + }, + { + "epoch": 0.3525539120818358, + "grad_norm": 0.9510646292513777, + "learning_rate": 1.5022116942372669e-05, + "loss": 0.5637, + "step": 20403 + }, + { + "epoch": 0.3525711915952447, + "grad_norm": 0.6694611165912466, + "learning_rate": 1.5021632980534738e-05, + "loss": 0.3207, + "step": 20404 + }, + { + "epoch": 0.3525884711086536, + "grad_norm": 0.41902483394235224, + "learning_rate": 1.502114900296867e-05, + "loss": 0.4423, + "step": 20405 + }, + { + "epoch": 0.3526057506220625, + "grad_norm": 0.8396028478863418, + "learning_rate": 1.5020665009675987e-05, + "loss": 0.4207, + "step": 20406 + }, + { + "epoch": 0.35262303013547136, + "grad_norm": 1.1567145865076383, + "learning_rate": 1.5020181000658194e-05, + "loss": 0.4474, + "step": 20407 + }, + { + "epoch": 0.35264030964888027, + "grad_norm": 1.240880713365084, + "learning_rate": 1.5019696975916818e-05, + "loss": 0.3118, + "step": 20408 + }, + { + "epoch": 0.3526575891622892, + "grad_norm": 0.9165846522301504, + "learning_rate": 1.5019212935453364e-05, + "loss": 0.3967, + "step": 20409 + }, + { + "epoch": 0.3526748686756981, + "grad_norm": 0.8927813936126945, + "learning_rate": 1.5018728879269358e-05, + "loss": 0.3022, + "step": 20410 + }, + { + "epoch": 0.352692148189107, + "grad_norm": 0.4524453534223058, + "learning_rate": 1.5018244807366311e-05, + "loss": 0.6049, + "step": 20411 + }, + { + "epoch": 0.3527094277025159, + "grad_norm": 1.053995830236578, + "learning_rate": 1.501776071974574e-05, + "loss": 0.436, + "step": 20412 + }, + { + "epoch": 0.3527267072159248, + "grad_norm": 1.1241155573081338, + "learning_rate": 1.5017276616409158e-05, + "loss": 0.5573, + "step": 20413 + }, + { + "epoch": 0.3527439867293337, + "grad_norm": 0.7922377190111013, + "learning_rate": 1.5016792497358088e-05, + "loss": 0.308, + "step": 20414 + }, + { + "epoch": 0.3527612662427426, + "grad_norm": 0.6048427425646627, + "learning_rate": 1.5016308362594041e-05, + "loss": 0.4372, + "step": 20415 + }, + { + "epoch": 0.35277854575615153, + "grad_norm": 0.9692173863394618, + "learning_rate": 1.5015824212118534e-05, + "loss": 0.4631, + "step": 20416 + }, + { + "epoch": 0.35279582526956044, + "grad_norm": 0.9339164212252493, + "learning_rate": 1.5015340045933087e-05, + "loss": 0.3635, + "step": 20417 + }, + { + "epoch": 0.3528131047829693, + "grad_norm": 1.2932713676720389, + "learning_rate": 1.501485586403921e-05, + "loss": 0.6542, + "step": 20418 + }, + { + "epoch": 0.3528303842963782, + "grad_norm": 1.266220228421209, + "learning_rate": 1.5014371666438425e-05, + "loss": 0.5839, + "step": 20419 + }, + { + "epoch": 0.3528476638097871, + "grad_norm": 0.8712695097771387, + "learning_rate": 1.501388745313225e-05, + "loss": 0.4796, + "step": 20420 + }, + { + "epoch": 0.352864943323196, + "grad_norm": 1.0920032895753569, + "learning_rate": 1.5013403224122195e-05, + "loss": 0.5364, + "step": 20421 + }, + { + "epoch": 0.3528822228366049, + "grad_norm": 0.7021027556448651, + "learning_rate": 1.5012918979409781e-05, + "loss": 0.3488, + "step": 20422 + }, + { + "epoch": 0.3528995023500138, + "grad_norm": 0.8335312393452176, + "learning_rate": 1.5012434718996525e-05, + "loss": 0.5886, + "step": 20423 + }, + { + "epoch": 0.35291678186342273, + "grad_norm": 0.49770535029317403, + "learning_rate": 1.501195044288394e-05, + "loss": 0.996, + "step": 20424 + }, + { + "epoch": 0.35293406137683164, + "grad_norm": 0.4625547789510445, + "learning_rate": 1.5011466151073548e-05, + "loss": 0.6786, + "step": 20425 + }, + { + "epoch": 0.35295134089024055, + "grad_norm": 0.42832079297588704, + "learning_rate": 1.5010981843566863e-05, + "loss": 0.746, + "step": 20426 + }, + { + "epoch": 0.35296862040364946, + "grad_norm": 0.9608530848018115, + "learning_rate": 1.5010497520365399e-05, + "loss": 0.5091, + "step": 20427 + }, + { + "epoch": 0.3529858999170583, + "grad_norm": 1.5522952989902412, + "learning_rate": 1.5010013181470679e-05, + "loss": 0.5, + "step": 20428 + }, + { + "epoch": 0.3530031794304672, + "grad_norm": 1.1119933618948807, + "learning_rate": 1.5009528826884215e-05, + "loss": 0.461, + "step": 20429 + }, + { + "epoch": 0.3530204589438761, + "grad_norm": 0.8385203561071326, + "learning_rate": 1.5009044456607527e-05, + "loss": 0.4613, + "step": 20430 + }, + { + "epoch": 0.35303773845728503, + "grad_norm": 0.9509068686519448, + "learning_rate": 1.500856007064213e-05, + "loss": 0.4788, + "step": 20431 + }, + { + "epoch": 0.35305501797069394, + "grad_norm": 0.8418442931151422, + "learning_rate": 1.5008075668989545e-05, + "loss": 0.3775, + "step": 20432 + }, + { + "epoch": 0.35307229748410285, + "grad_norm": 1.3245776225447665, + "learning_rate": 1.5007591251651282e-05, + "loss": 0.7561, + "step": 20433 + }, + { + "epoch": 0.35308957699751176, + "grad_norm": 0.8721722719764597, + "learning_rate": 1.500710681862887e-05, + "loss": 0.4907, + "step": 20434 + }, + { + "epoch": 0.35310685651092066, + "grad_norm": 0.8811483921637389, + "learning_rate": 1.5006622369923811e-05, + "loss": 0.4502, + "step": 20435 + }, + { + "epoch": 0.35312413602432957, + "grad_norm": 0.7286297395498449, + "learning_rate": 1.5006137905537635e-05, + "loss": 0.5378, + "step": 20436 + }, + { + "epoch": 0.3531414155377385, + "grad_norm": 0.6851723763499721, + "learning_rate": 1.5005653425471853e-05, + "loss": 0.2939, + "step": 20437 + }, + { + "epoch": 0.3531586950511474, + "grad_norm": 1.7056551790575036, + "learning_rate": 1.5005168929727982e-05, + "loss": 0.5627, + "step": 20438 + }, + { + "epoch": 0.35317597456455624, + "grad_norm": 1.1986047382020573, + "learning_rate": 1.5004684418307545e-05, + "loss": 0.3828, + "step": 20439 + }, + { + "epoch": 0.35319325407796515, + "grad_norm": 1.4010225384257138, + "learning_rate": 1.5004199891212055e-05, + "loss": 0.6045, + "step": 20440 + }, + { + "epoch": 0.35321053359137405, + "grad_norm": 0.7536098854727353, + "learning_rate": 1.5003715348443028e-05, + "loss": 0.3781, + "step": 20441 + }, + { + "epoch": 0.35322781310478296, + "grad_norm": 0.7485037324626451, + "learning_rate": 1.5003230790001988e-05, + "loss": 0.7797, + "step": 20442 + }, + { + "epoch": 0.35324509261819187, + "grad_norm": 0.8263099703546735, + "learning_rate": 1.5002746215890448e-05, + "loss": 0.5636, + "step": 20443 + }, + { + "epoch": 0.3532623721316008, + "grad_norm": 1.3231087919346989, + "learning_rate": 1.5002261626109926e-05, + "loss": 0.5782, + "step": 20444 + }, + { + "epoch": 0.3532796516450097, + "grad_norm": 1.0717151490611851, + "learning_rate": 1.500177702066194e-05, + "loss": 0.6006, + "step": 20445 + }, + { + "epoch": 0.3532969311584186, + "grad_norm": 1.216923961615278, + "learning_rate": 1.500129239954801e-05, + "loss": 0.4173, + "step": 20446 + }, + { + "epoch": 0.3533142106718275, + "grad_norm": 0.7910966477173316, + "learning_rate": 1.5000807762769652e-05, + "loss": 0.5322, + "step": 20447 + }, + { + "epoch": 0.3533314901852364, + "grad_norm": 0.7812838566251435, + "learning_rate": 1.5000323110328383e-05, + "loss": 0.4339, + "step": 20448 + }, + { + "epoch": 0.35334876969864526, + "grad_norm": 0.8896636718420944, + "learning_rate": 1.4999838442225721e-05, + "loss": 0.3796, + "step": 20449 + }, + { + "epoch": 0.35336604921205417, + "grad_norm": 0.5926487327562135, + "learning_rate": 1.4999353758463185e-05, + "loss": 0.2653, + "step": 20450 + }, + { + "epoch": 0.3533833287254631, + "grad_norm": 0.8436338702270211, + "learning_rate": 1.4998869059042294e-05, + "loss": 0.5579, + "step": 20451 + }, + { + "epoch": 0.353400608238872, + "grad_norm": 0.46681129442185115, + "learning_rate": 1.4998384343964567e-05, + "loss": 0.8904, + "step": 20452 + }, + { + "epoch": 0.3534178877522809, + "grad_norm": 1.3493268377811585, + "learning_rate": 1.4997899613231516e-05, + "loss": 0.6144, + "step": 20453 + }, + { + "epoch": 0.3534351672656898, + "grad_norm": 1.3638610923748422, + "learning_rate": 1.4997414866844668e-05, + "loss": 0.4941, + "step": 20454 + }, + { + "epoch": 0.3534524467790987, + "grad_norm": 1.0608953964582497, + "learning_rate": 1.4996930104805535e-05, + "loss": 0.6575, + "step": 20455 + }, + { + "epoch": 0.3534697262925076, + "grad_norm": 0.6749285914354903, + "learning_rate": 1.4996445327115638e-05, + "loss": 0.3484, + "step": 20456 + }, + { + "epoch": 0.3534870058059165, + "grad_norm": 0.9598147346363749, + "learning_rate": 1.4995960533776495e-05, + "loss": 0.3818, + "step": 20457 + }, + { + "epoch": 0.35350428531932543, + "grad_norm": 0.6531750637231339, + "learning_rate": 1.499547572478962e-05, + "loss": 0.6326, + "step": 20458 + }, + { + "epoch": 0.35352156483273434, + "grad_norm": 1.040840121406762, + "learning_rate": 1.4994990900156538e-05, + "loss": 0.522, + "step": 20459 + }, + { + "epoch": 0.3535388443461432, + "grad_norm": 0.7770406842194704, + "learning_rate": 1.4994506059878766e-05, + "loss": 0.4437, + "step": 20460 + }, + { + "epoch": 0.3535561238595521, + "grad_norm": 1.2657382239309753, + "learning_rate": 1.4994021203957819e-05, + "loss": 0.5929, + "step": 20461 + }, + { + "epoch": 0.353573403372961, + "grad_norm": 0.8837888167305578, + "learning_rate": 1.499353633239522e-05, + "loss": 0.5649, + "step": 20462 + }, + { + "epoch": 0.3535906828863699, + "grad_norm": 0.8125367282835981, + "learning_rate": 1.4993051445192485e-05, + "loss": 0.5234, + "step": 20463 + }, + { + "epoch": 0.3536079623997788, + "grad_norm": 1.2535084006067898, + "learning_rate": 1.4992566542351132e-05, + "loss": 0.5485, + "step": 20464 + }, + { + "epoch": 0.3536252419131877, + "grad_norm": 0.6643891347077169, + "learning_rate": 1.4992081623872688e-05, + "loss": 0.4602, + "step": 20465 + }, + { + "epoch": 0.35364252142659663, + "grad_norm": 0.6745950428765424, + "learning_rate": 1.4991596689758658e-05, + "loss": 0.3004, + "step": 20466 + }, + { + "epoch": 0.35365980094000554, + "grad_norm": 0.8859324371275881, + "learning_rate": 1.499111174001057e-05, + "loss": 0.3935, + "step": 20467 + }, + { + "epoch": 0.35367708045341445, + "grad_norm": 0.6271975996518073, + "learning_rate": 1.499062677462994e-05, + "loss": 0.5239, + "step": 20468 + }, + { + "epoch": 0.35369435996682336, + "grad_norm": 0.8652014415654317, + "learning_rate": 1.4990141793618288e-05, + "loss": 0.4909, + "step": 20469 + }, + { + "epoch": 0.3537116394802322, + "grad_norm": 1.238120257870155, + "learning_rate": 1.4989656796977131e-05, + "loss": 0.4041, + "step": 20470 + }, + { + "epoch": 0.3537289189936411, + "grad_norm": 0.7171042055173906, + "learning_rate": 1.4989171784707991e-05, + "loss": 0.3482, + "step": 20471 + }, + { + "epoch": 0.35374619850705, + "grad_norm": 1.1401932908084877, + "learning_rate": 1.498868675681239e-05, + "loss": 0.4223, + "step": 20472 + }, + { + "epoch": 0.35376347802045893, + "grad_norm": 0.8994012629962802, + "learning_rate": 1.4988201713291836e-05, + "loss": 0.5083, + "step": 20473 + }, + { + "epoch": 0.35378075753386784, + "grad_norm": 0.7710395189879903, + "learning_rate": 1.4987716654147859e-05, + "loss": 0.5268, + "step": 20474 + }, + { + "epoch": 0.35379803704727675, + "grad_norm": 1.4127552528993337, + "learning_rate": 1.4987231579381973e-05, + "loss": 0.2283, + "step": 20475 + }, + { + "epoch": 0.35381531656068566, + "grad_norm": 1.033968971601005, + "learning_rate": 1.4986746488995701e-05, + "loss": 0.5121, + "step": 20476 + }, + { + "epoch": 0.35383259607409456, + "grad_norm": 0.9897005508558857, + "learning_rate": 1.498626138299056e-05, + "loss": 0.4969, + "step": 20477 + }, + { + "epoch": 0.35384987558750347, + "grad_norm": 1.140941604509571, + "learning_rate": 1.4985776261368066e-05, + "loss": 0.327, + "step": 20478 + }, + { + "epoch": 0.3538671551009124, + "grad_norm": 0.6178105368864539, + "learning_rate": 1.4985291124129743e-05, + "loss": 0.4824, + "step": 20479 + }, + { + "epoch": 0.3538844346143213, + "grad_norm": 1.247712576955136, + "learning_rate": 1.498480597127711e-05, + "loss": 0.3896, + "step": 20480 + }, + { + "epoch": 0.35390171412773014, + "grad_norm": 1.106604102309115, + "learning_rate": 1.4984320802811684e-05, + "loss": 0.636, + "step": 20481 + }, + { + "epoch": 0.35391899364113905, + "grad_norm": 1.1742131971861154, + "learning_rate": 1.498383561873499e-05, + "loss": 0.4998, + "step": 20482 + }, + { + "epoch": 0.35393627315454795, + "grad_norm": 0.8594562313379337, + "learning_rate": 1.4983350419048544e-05, + "loss": 0.5675, + "step": 20483 + }, + { + "epoch": 0.35395355266795686, + "grad_norm": 1.3202591842308766, + "learning_rate": 1.4982865203753863e-05, + "loss": 0.4536, + "step": 20484 + }, + { + "epoch": 0.35397083218136577, + "grad_norm": 1.558965773681652, + "learning_rate": 1.4982379972852471e-05, + "loss": 0.6109, + "step": 20485 + }, + { + "epoch": 0.3539881116947747, + "grad_norm": 0.6357704213601646, + "learning_rate": 1.4981894726345887e-05, + "loss": 0.4112, + "step": 20486 + }, + { + "epoch": 0.3540053912081836, + "grad_norm": 0.7154126946851436, + "learning_rate": 1.4981409464235627e-05, + "loss": 0.4743, + "step": 20487 + }, + { + "epoch": 0.3540226707215925, + "grad_norm": 1.4518218693992808, + "learning_rate": 1.4980924186523216e-05, + "loss": 0.6602, + "step": 20488 + }, + { + "epoch": 0.3540399502350014, + "grad_norm": 1.2636246770415054, + "learning_rate": 1.4980438893210174e-05, + "loss": 0.5878, + "step": 20489 + }, + { + "epoch": 0.3540572297484103, + "grad_norm": 0.9565482962040848, + "learning_rate": 1.4979953584298015e-05, + "loss": 0.5448, + "step": 20490 + }, + { + "epoch": 0.3540745092618192, + "grad_norm": 0.8490951451449453, + "learning_rate": 1.4979468259788266e-05, + "loss": 0.4121, + "step": 20491 + }, + { + "epoch": 0.35409178877522807, + "grad_norm": 0.4341394866777759, + "learning_rate": 1.497898291968244e-05, + "loss": 0.4081, + "step": 20492 + }, + { + "epoch": 0.354109068288637, + "grad_norm": 0.7421746295569503, + "learning_rate": 1.4978497563982064e-05, + "loss": 0.4416, + "step": 20493 + }, + { + "epoch": 0.3541263478020459, + "grad_norm": 0.8195637872319175, + "learning_rate": 1.4978012192688656e-05, + "loss": 0.4648, + "step": 20494 + }, + { + "epoch": 0.3541436273154548, + "grad_norm": 1.261604156166245, + "learning_rate": 1.4977526805803735e-05, + "loss": 0.461, + "step": 20495 + }, + { + "epoch": 0.3541609068288637, + "grad_norm": 0.7713333010673177, + "learning_rate": 1.4977041403328822e-05, + "loss": 0.5554, + "step": 20496 + }, + { + "epoch": 0.3541781863422726, + "grad_norm": 0.921619804946482, + "learning_rate": 1.4976555985265438e-05, + "loss": 0.5617, + "step": 20497 + }, + { + "epoch": 0.3541954658556815, + "grad_norm": 0.44987792471344007, + "learning_rate": 1.4976070551615098e-05, + "loss": 0.634, + "step": 20498 + }, + { + "epoch": 0.3542127453690904, + "grad_norm": 0.9889904662398606, + "learning_rate": 1.497558510237933e-05, + "loss": 0.2411, + "step": 20499 + }, + { + "epoch": 0.35423002488249933, + "grad_norm": 0.7900972544173556, + "learning_rate": 1.4975099637559652e-05, + "loss": 0.3918, + "step": 20500 + }, + { + "epoch": 0.35424730439590824, + "grad_norm": 1.447484160405377, + "learning_rate": 1.497461415715758e-05, + "loss": 0.3481, + "step": 20501 + }, + { + "epoch": 0.3542645839093171, + "grad_norm": 0.46302794784387596, + "learning_rate": 1.4974128661174643e-05, + "loss": 0.6202, + "step": 20502 + }, + { + "epoch": 0.354281863422726, + "grad_norm": 0.448291746899025, + "learning_rate": 1.4973643149612355e-05, + "loss": 0.7334, + "step": 20503 + }, + { + "epoch": 0.3542991429361349, + "grad_norm": 0.4440775342813521, + "learning_rate": 1.497315762247224e-05, + "loss": 0.8397, + "step": 20504 + }, + { + "epoch": 0.3543164224495438, + "grad_norm": 0.8034335579597889, + "learning_rate": 1.4972672079755818e-05, + "loss": 0.6698, + "step": 20505 + }, + { + "epoch": 0.3543337019629527, + "grad_norm": 0.6277774137252237, + "learning_rate": 1.4972186521464606e-05, + "loss": 0.4268, + "step": 20506 + }, + { + "epoch": 0.3543509814763616, + "grad_norm": 0.8803896444608826, + "learning_rate": 1.497170094760013e-05, + "loss": 0.5152, + "step": 20507 + }, + { + "epoch": 0.35436826098977053, + "grad_norm": 0.9309744517379858, + "learning_rate": 1.4971215358163908e-05, + "loss": 0.2992, + "step": 20508 + }, + { + "epoch": 0.35438554050317944, + "grad_norm": 1.1086069496828634, + "learning_rate": 1.4970729753157463e-05, + "loss": 0.4175, + "step": 20509 + }, + { + "epoch": 0.35440282001658835, + "grad_norm": 0.7284327443686057, + "learning_rate": 1.4970244132582313e-05, + "loss": 0.3429, + "step": 20510 + }, + { + "epoch": 0.35442009952999726, + "grad_norm": 0.9222414120341669, + "learning_rate": 1.4969758496439982e-05, + "loss": 0.4438, + "step": 20511 + }, + { + "epoch": 0.35443737904340616, + "grad_norm": 1.023356382467732, + "learning_rate": 1.4969272844731988e-05, + "loss": 0.4644, + "step": 20512 + }, + { + "epoch": 0.354454658556815, + "grad_norm": 1.2145823534274325, + "learning_rate": 1.4968787177459854e-05, + "loss": 0.4088, + "step": 20513 + }, + { + "epoch": 0.3544719380702239, + "grad_norm": 0.8948972313973601, + "learning_rate": 1.4968301494625101e-05, + "loss": 0.6368, + "step": 20514 + }, + { + "epoch": 0.35448921758363283, + "grad_norm": 0.8742352741076489, + "learning_rate": 1.4967815796229253e-05, + "loss": 0.4147, + "step": 20515 + }, + { + "epoch": 0.35450649709704174, + "grad_norm": 0.8509226317889361, + "learning_rate": 1.4967330082273825e-05, + "loss": 0.5193, + "step": 20516 + }, + { + "epoch": 0.35452377661045065, + "grad_norm": 1.001885444120917, + "learning_rate": 1.4966844352760343e-05, + "loss": 0.5359, + "step": 20517 + }, + { + "epoch": 0.35454105612385955, + "grad_norm": 1.1949645815088081, + "learning_rate": 1.4966358607690325e-05, + "loss": 0.3281, + "step": 20518 + }, + { + "epoch": 0.35455833563726846, + "grad_norm": 1.131376537624057, + "learning_rate": 1.4965872847065299e-05, + "loss": 0.4304, + "step": 20519 + }, + { + "epoch": 0.35457561515067737, + "grad_norm": 1.0314228358061512, + "learning_rate": 1.4965387070886778e-05, + "loss": 0.44, + "step": 20520 + }, + { + "epoch": 0.3545928946640863, + "grad_norm": 0.8817090474262328, + "learning_rate": 1.4964901279156288e-05, + "loss": 0.8249, + "step": 20521 + }, + { + "epoch": 0.3546101741774952, + "grad_norm": 0.6236485194313143, + "learning_rate": 1.4964415471875351e-05, + "loss": 0.4271, + "step": 20522 + }, + { + "epoch": 0.35462745369090404, + "grad_norm": 1.1029888407898094, + "learning_rate": 1.4963929649045488e-05, + "loss": 0.6037, + "step": 20523 + }, + { + "epoch": 0.35464473320431295, + "grad_norm": 0.9138371321156037, + "learning_rate": 1.4963443810668216e-05, + "loss": 0.4198, + "step": 20524 + }, + { + "epoch": 0.35466201271772185, + "grad_norm": 2.2011022159621247, + "learning_rate": 1.4962957956745066e-05, + "loss": 0.4421, + "step": 20525 + }, + { + "epoch": 0.35467929223113076, + "grad_norm": 0.7499392289668395, + "learning_rate": 1.4962472087277553e-05, + "loss": 0.422, + "step": 20526 + }, + { + "epoch": 0.35469657174453967, + "grad_norm": 1.6489428405216557, + "learning_rate": 1.4961986202267197e-05, + "loss": 0.6587, + "step": 20527 + }, + { + "epoch": 0.3547138512579486, + "grad_norm": 0.8982537924197941, + "learning_rate": 1.4961500301715528e-05, + "loss": 0.669, + "step": 20528 + }, + { + "epoch": 0.3547311307713575, + "grad_norm": 0.9697078000539514, + "learning_rate": 1.496101438562406e-05, + "loss": 0.6332, + "step": 20529 + }, + { + "epoch": 0.3547484102847664, + "grad_norm": 0.8490010693236916, + "learning_rate": 1.4960528453994318e-05, + "loss": 0.4329, + "step": 20530 + }, + { + "epoch": 0.3547656897981753, + "grad_norm": 1.6891242247067815, + "learning_rate": 1.4960042506827826e-05, + "loss": 0.5277, + "step": 20531 + }, + { + "epoch": 0.3547829693115842, + "grad_norm": 1.6409881733518288, + "learning_rate": 1.4959556544126101e-05, + "loss": 0.4154, + "step": 20532 + }, + { + "epoch": 0.3548002488249931, + "grad_norm": 0.39279706462436953, + "learning_rate": 1.4959070565890667e-05, + "loss": 0.6069, + "step": 20533 + }, + { + "epoch": 0.35481752833840197, + "grad_norm": 1.1460577689820428, + "learning_rate": 1.4958584572123052e-05, + "loss": 0.5728, + "step": 20534 + }, + { + "epoch": 0.3548348078518109, + "grad_norm": 1.076052677621583, + "learning_rate": 1.495809856282477e-05, + "loss": 0.409, + "step": 20535 + }, + { + "epoch": 0.3548520873652198, + "grad_norm": 0.41831788371059436, + "learning_rate": 1.4957612537997346e-05, + "loss": 0.5984, + "step": 20536 + }, + { + "epoch": 0.3548693668786287, + "grad_norm": 1.0843935190598577, + "learning_rate": 1.4957126497642306e-05, + "loss": 0.7784, + "step": 20537 + }, + { + "epoch": 0.3548866463920376, + "grad_norm": 1.059291901638322, + "learning_rate": 1.4956640441761168e-05, + "loss": 0.4133, + "step": 20538 + }, + { + "epoch": 0.3549039259054465, + "grad_norm": 0.888945212642086, + "learning_rate": 1.4956154370355453e-05, + "loss": 0.5108, + "step": 20539 + }, + { + "epoch": 0.3549212054188554, + "grad_norm": 1.2638347215731391, + "learning_rate": 1.495566828342669e-05, + "loss": 0.4064, + "step": 20540 + }, + { + "epoch": 0.3549384849322643, + "grad_norm": 0.4584464781474096, + "learning_rate": 1.4955182180976393e-05, + "loss": 0.6709, + "step": 20541 + }, + { + "epoch": 0.3549557644456732, + "grad_norm": 0.9919092924060996, + "learning_rate": 1.4954696063006088e-05, + "loss": 0.5648, + "step": 20542 + }, + { + "epoch": 0.35497304395908214, + "grad_norm": 0.627153772206581, + "learning_rate": 1.4954209929517305e-05, + "loss": 0.5322, + "step": 20543 + }, + { + "epoch": 0.354990323472491, + "grad_norm": 1.2663317726491874, + "learning_rate": 1.4953723780511554e-05, + "loss": 0.4412, + "step": 20544 + }, + { + "epoch": 0.3550076029858999, + "grad_norm": 1.133857595698741, + "learning_rate": 1.4953237615990366e-05, + "loss": 0.4568, + "step": 20545 + }, + { + "epoch": 0.3550248824993088, + "grad_norm": 0.9252259459633747, + "learning_rate": 1.4952751435955262e-05, + "loss": 0.3919, + "step": 20546 + }, + { + "epoch": 0.3550421620127177, + "grad_norm": 0.9495569709207334, + "learning_rate": 1.4952265240407761e-05, + "loss": 0.5859, + "step": 20547 + }, + { + "epoch": 0.3550594415261266, + "grad_norm": 0.47369612477736545, + "learning_rate": 1.4951779029349394e-05, + "loss": 0.8222, + "step": 20548 + }, + { + "epoch": 0.3550767210395355, + "grad_norm": 0.7580949078650037, + "learning_rate": 1.4951292802781674e-05, + "loss": 0.426, + "step": 20549 + }, + { + "epoch": 0.35509400055294443, + "grad_norm": 0.7634275179452901, + "learning_rate": 1.495080656070613e-05, + "loss": 0.4438, + "step": 20550 + }, + { + "epoch": 0.35511128006635334, + "grad_norm": 0.5096631497456451, + "learning_rate": 1.4950320303124284e-05, + "loss": 0.5852, + "step": 20551 + }, + { + "epoch": 0.35512855957976225, + "grad_norm": 0.9261287571133824, + "learning_rate": 1.494983403003766e-05, + "loss": 0.4488, + "step": 20552 + }, + { + "epoch": 0.35514583909317116, + "grad_norm": 1.1139705406707634, + "learning_rate": 1.4949347741447774e-05, + "loss": 0.5758, + "step": 20553 + }, + { + "epoch": 0.35516311860658006, + "grad_norm": 1.233274918983584, + "learning_rate": 1.4948861437356162e-05, + "loss": 0.5087, + "step": 20554 + }, + { + "epoch": 0.3551803981199889, + "grad_norm": 0.7334455898074131, + "learning_rate": 1.4948375117764338e-05, + "loss": 0.3584, + "step": 20555 + }, + { + "epoch": 0.3551976776333978, + "grad_norm": 1.1365215710073975, + "learning_rate": 1.4947888782673822e-05, + "loss": 0.3479, + "step": 20556 + }, + { + "epoch": 0.35521495714680673, + "grad_norm": 0.9772692941993997, + "learning_rate": 1.4947402432086149e-05, + "loss": 0.5133, + "step": 20557 + }, + { + "epoch": 0.35523223666021564, + "grad_norm": 0.7334887809857777, + "learning_rate": 1.4946916066002833e-05, + "loss": 0.5236, + "step": 20558 + }, + { + "epoch": 0.35524951617362455, + "grad_norm": 0.7865455022630132, + "learning_rate": 1.4946429684425399e-05, + "loss": 0.5545, + "step": 20559 + }, + { + "epoch": 0.35526679568703345, + "grad_norm": 2.692921934409806, + "learning_rate": 1.4945943287355373e-05, + "loss": 0.4795, + "step": 20560 + }, + { + "epoch": 0.35528407520044236, + "grad_norm": 0.9193230600274989, + "learning_rate": 1.4945456874794275e-05, + "loss": 0.3184, + "step": 20561 + }, + { + "epoch": 0.35530135471385127, + "grad_norm": 0.8194326347268485, + "learning_rate": 1.494497044674363e-05, + "loss": 0.2832, + "step": 20562 + }, + { + "epoch": 0.3553186342272602, + "grad_norm": 0.7031365994658, + "learning_rate": 1.4944484003204965e-05, + "loss": 0.4809, + "step": 20563 + }, + { + "epoch": 0.3553359137406691, + "grad_norm": 0.4064577101146231, + "learning_rate": 1.4943997544179796e-05, + "loss": 0.6325, + "step": 20564 + }, + { + "epoch": 0.355353193254078, + "grad_norm": 0.8506064166609447, + "learning_rate": 1.4943511069669654e-05, + "loss": 0.5514, + "step": 20565 + }, + { + "epoch": 0.35537047276748684, + "grad_norm": 0.45537259898467597, + "learning_rate": 1.494302457967606e-05, + "loss": 0.8543, + "step": 20566 + }, + { + "epoch": 0.35538775228089575, + "grad_norm": 0.7240359570234032, + "learning_rate": 1.4942538074200532e-05, + "loss": 0.4891, + "step": 20567 + }, + { + "epoch": 0.35540503179430466, + "grad_norm": 0.7203760838649541, + "learning_rate": 1.4942051553244607e-05, + "loss": 0.4723, + "step": 20568 + }, + { + "epoch": 0.35542231130771357, + "grad_norm": 0.9350857298642716, + "learning_rate": 1.4941565016809798e-05, + "loss": 0.4522, + "step": 20569 + }, + { + "epoch": 0.3554395908211225, + "grad_norm": 0.5723265773885629, + "learning_rate": 1.494107846489763e-05, + "loss": 0.4428, + "step": 20570 + }, + { + "epoch": 0.3554568703345314, + "grad_norm": 0.7985179818284781, + "learning_rate": 1.4940591897509629e-05, + "loss": 0.7074, + "step": 20571 + }, + { + "epoch": 0.3554741498479403, + "grad_norm": 0.8345785616948475, + "learning_rate": 1.494010531464732e-05, + "loss": 0.4235, + "step": 20572 + }, + { + "epoch": 0.3554914293613492, + "grad_norm": 1.0283572842322721, + "learning_rate": 1.4939618716312225e-05, + "loss": 0.4916, + "step": 20573 + }, + { + "epoch": 0.3555087088747581, + "grad_norm": 0.9067974754498305, + "learning_rate": 1.4939132102505869e-05, + "loss": 0.51, + "step": 20574 + }, + { + "epoch": 0.355525988388167, + "grad_norm": 1.1167765659377042, + "learning_rate": 1.4938645473229775e-05, + "loss": 0.4289, + "step": 20575 + }, + { + "epoch": 0.35554326790157587, + "grad_norm": 1.5415838904248509, + "learning_rate": 1.4938158828485467e-05, + "loss": 0.468, + "step": 20576 + }, + { + "epoch": 0.3555605474149848, + "grad_norm": 1.1665781995598103, + "learning_rate": 1.4937672168274473e-05, + "loss": 0.5503, + "step": 20577 + }, + { + "epoch": 0.3555778269283937, + "grad_norm": 1.186897010703586, + "learning_rate": 1.4937185492598315e-05, + "loss": 0.4822, + "step": 20578 + }, + { + "epoch": 0.3555951064418026, + "grad_norm": 1.147177844063545, + "learning_rate": 1.4936698801458516e-05, + "loss": 0.6129, + "step": 20579 + }, + { + "epoch": 0.3556123859552115, + "grad_norm": 0.5510710273038828, + "learning_rate": 1.4936212094856601e-05, + "loss": 0.8429, + "step": 20580 + }, + { + "epoch": 0.3556296654686204, + "grad_norm": 0.899491460765301, + "learning_rate": 1.4935725372794093e-05, + "loss": 0.67, + "step": 20581 + }, + { + "epoch": 0.3556469449820293, + "grad_norm": 0.6449797253370467, + "learning_rate": 1.493523863527252e-05, + "loss": 0.6267, + "step": 20582 + }, + { + "epoch": 0.3556642244954382, + "grad_norm": 0.5207776522998793, + "learning_rate": 1.4934751882293403e-05, + "loss": 0.4446, + "step": 20583 + }, + { + "epoch": 0.3556815040088471, + "grad_norm": 0.6807820045853982, + "learning_rate": 1.4934265113858268e-05, + "loss": 0.4402, + "step": 20584 + }, + { + "epoch": 0.35569878352225603, + "grad_norm": 0.7240576705066747, + "learning_rate": 1.4933778329968643e-05, + "loss": 0.2984, + "step": 20585 + }, + { + "epoch": 0.35571606303566494, + "grad_norm": 0.9381899328374503, + "learning_rate": 1.4933291530626047e-05, + "loss": 0.5016, + "step": 20586 + }, + { + "epoch": 0.3557333425490738, + "grad_norm": 1.0581875073398903, + "learning_rate": 1.4932804715832004e-05, + "loss": 0.4359, + "step": 20587 + }, + { + "epoch": 0.3557506220624827, + "grad_norm": 1.0550665362477194, + "learning_rate": 1.4932317885588048e-05, + "loss": 0.484, + "step": 20588 + }, + { + "epoch": 0.3557679015758916, + "grad_norm": 0.9606517597399941, + "learning_rate": 1.4931831039895694e-05, + "loss": 0.4727, + "step": 20589 + }, + { + "epoch": 0.3557851810893005, + "grad_norm": 0.8844601901878274, + "learning_rate": 1.4931344178756469e-05, + "loss": 0.4472, + "step": 20590 + }, + { + "epoch": 0.3558024606027094, + "grad_norm": 1.104589171035512, + "learning_rate": 1.4930857302171901e-05, + "loss": 0.3671, + "step": 20591 + }, + { + "epoch": 0.35581974011611833, + "grad_norm": 0.983143468791271, + "learning_rate": 1.4930370410143514e-05, + "loss": 0.5523, + "step": 20592 + }, + { + "epoch": 0.35583701962952724, + "grad_norm": 1.0439675109401576, + "learning_rate": 1.4929883502672828e-05, + "loss": 0.4984, + "step": 20593 + }, + { + "epoch": 0.35585429914293615, + "grad_norm": 0.8819277820962379, + "learning_rate": 1.4929396579761378e-05, + "loss": 0.4382, + "step": 20594 + }, + { + "epoch": 0.35587157865634506, + "grad_norm": 0.7293896252352121, + "learning_rate": 1.492890964141068e-05, + "loss": 0.5064, + "step": 20595 + }, + { + "epoch": 0.35588885816975396, + "grad_norm": 0.8851723789597051, + "learning_rate": 1.492842268762226e-05, + "loss": 0.3301, + "step": 20596 + }, + { + "epoch": 0.3559061376831628, + "grad_norm": 1.0847305717676508, + "learning_rate": 1.4927935718397652e-05, + "loss": 0.4512, + "step": 20597 + }, + { + "epoch": 0.3559234171965717, + "grad_norm": 0.9983489387740905, + "learning_rate": 1.4927448733738372e-05, + "loss": 0.5817, + "step": 20598 + }, + { + "epoch": 0.35594069670998063, + "grad_norm": 1.08520925925094, + "learning_rate": 1.4926961733645949e-05, + "loss": 0.5632, + "step": 20599 + }, + { + "epoch": 0.35595797622338954, + "grad_norm": 1.5804426825698374, + "learning_rate": 1.4926474718121906e-05, + "loss": 0.501, + "step": 20600 + }, + { + "epoch": 0.35597525573679845, + "grad_norm": 1.0105379005372672, + "learning_rate": 1.4925987687167767e-05, + "loss": 0.4729, + "step": 20601 + }, + { + "epoch": 0.35599253525020735, + "grad_norm": 0.89124781606838, + "learning_rate": 1.4925500640785064e-05, + "loss": 0.4492, + "step": 20602 + }, + { + "epoch": 0.35600981476361626, + "grad_norm": 0.895203890842344, + "learning_rate": 1.4925013578975318e-05, + "loss": 0.5565, + "step": 20603 + }, + { + "epoch": 0.35602709427702517, + "grad_norm": 0.3850766188020686, + "learning_rate": 1.4924526501740053e-05, + "loss": 0.5114, + "step": 20604 + }, + { + "epoch": 0.3560443737904341, + "grad_norm": 0.8925853461139416, + "learning_rate": 1.49240394090808e-05, + "loss": 0.4359, + "step": 20605 + }, + { + "epoch": 0.356061653303843, + "grad_norm": 0.8745892296930745, + "learning_rate": 1.492355230099908e-05, + "loss": 0.1983, + "step": 20606 + }, + { + "epoch": 0.3560789328172519, + "grad_norm": 0.9320244272588517, + "learning_rate": 1.4923065177496419e-05, + "loss": 0.6129, + "step": 20607 + }, + { + "epoch": 0.35609621233066074, + "grad_norm": 0.8285883440348683, + "learning_rate": 1.4922578038574344e-05, + "loss": 0.4852, + "step": 20608 + }, + { + "epoch": 0.35611349184406965, + "grad_norm": 1.1549322648449065, + "learning_rate": 1.4922090884234384e-05, + "loss": 0.5161, + "step": 20609 + }, + { + "epoch": 0.35613077135747856, + "grad_norm": 0.7874947819532663, + "learning_rate": 1.4921603714478057e-05, + "loss": 0.4218, + "step": 20610 + }, + { + "epoch": 0.35614805087088747, + "grad_norm": 1.4495119672170351, + "learning_rate": 1.4921116529306896e-05, + "loss": 0.5169, + "step": 20611 + }, + { + "epoch": 0.3561653303842964, + "grad_norm": 0.9112812341073979, + "learning_rate": 1.4920629328722423e-05, + "loss": 0.4877, + "step": 20612 + }, + { + "epoch": 0.3561826098977053, + "grad_norm": 1.1138530782812233, + "learning_rate": 1.4920142112726163e-05, + "loss": 0.5615, + "step": 20613 + }, + { + "epoch": 0.3561998894111142, + "grad_norm": 1.1374436861573174, + "learning_rate": 1.4919654881319648e-05, + "loss": 0.5072, + "step": 20614 + }, + { + "epoch": 0.3562171689245231, + "grad_norm": 0.7634056451394318, + "learning_rate": 1.4919167634504397e-05, + "loss": 0.4214, + "step": 20615 + }, + { + "epoch": 0.356234448437932, + "grad_norm": 1.5620836712342796, + "learning_rate": 1.4918680372281938e-05, + "loss": 0.5615, + "step": 20616 + }, + { + "epoch": 0.3562517279513409, + "grad_norm": 1.9993037959551516, + "learning_rate": 1.49181930946538e-05, + "loss": 0.5082, + "step": 20617 + }, + { + "epoch": 0.35626900746474977, + "grad_norm": 1.316862596169418, + "learning_rate": 1.4917705801621507e-05, + "loss": 0.6289, + "step": 20618 + }, + { + "epoch": 0.3562862869781587, + "grad_norm": 0.9861724744792235, + "learning_rate": 1.4917218493186586e-05, + "loss": 0.3741, + "step": 20619 + }, + { + "epoch": 0.3563035664915676, + "grad_norm": 1.066356652056141, + "learning_rate": 1.4916731169350564e-05, + "loss": 0.4877, + "step": 20620 + }, + { + "epoch": 0.3563208460049765, + "grad_norm": 1.8458122411609161, + "learning_rate": 1.4916243830114967e-05, + "loss": 0.4875, + "step": 20621 + }, + { + "epoch": 0.3563381255183854, + "grad_norm": 0.6500405718999949, + "learning_rate": 1.4915756475481318e-05, + "loss": 0.3683, + "step": 20622 + }, + { + "epoch": 0.3563554050317943, + "grad_norm": 1.1435537454234481, + "learning_rate": 1.491526910545115e-05, + "loss": 0.6128, + "step": 20623 + }, + { + "epoch": 0.3563726845452032, + "grad_norm": 0.426807931998511, + "learning_rate": 1.4914781720025979e-05, + "loss": 0.4745, + "step": 20624 + }, + { + "epoch": 0.3563899640586121, + "grad_norm": 1.2305133323569206, + "learning_rate": 1.4914294319207343e-05, + "loss": 0.5488, + "step": 20625 + }, + { + "epoch": 0.356407243572021, + "grad_norm": 1.4081278487723747, + "learning_rate": 1.4913806902996764e-05, + "loss": 0.3508, + "step": 20626 + }, + { + "epoch": 0.35642452308542993, + "grad_norm": 1.133469298430784, + "learning_rate": 1.4913319471395767e-05, + "loss": 0.5793, + "step": 20627 + }, + { + "epoch": 0.35644180259883884, + "grad_norm": 0.3821297759073899, + "learning_rate": 1.4912832024405879e-05, + "loss": 0.3861, + "step": 20628 + }, + { + "epoch": 0.3564590821122477, + "grad_norm": 0.9643300311438032, + "learning_rate": 1.491234456202863e-05, + "loss": 0.4664, + "step": 20629 + }, + { + "epoch": 0.3564763616256566, + "grad_norm": 1.1479082687537285, + "learning_rate": 1.4911857084265544e-05, + "loss": 0.4141, + "step": 20630 + }, + { + "epoch": 0.3564936411390655, + "grad_norm": 0.6513966017102303, + "learning_rate": 1.4911369591118147e-05, + "loss": 0.3936, + "step": 20631 + }, + { + "epoch": 0.3565109206524744, + "grad_norm": 0.8706563160871293, + "learning_rate": 1.491088208258797e-05, + "loss": 0.5846, + "step": 20632 + }, + { + "epoch": 0.3565282001658833, + "grad_norm": 1.2193480783407475, + "learning_rate": 1.4910394558676532e-05, + "loss": 0.3924, + "step": 20633 + }, + { + "epoch": 0.35654547967929223, + "grad_norm": 0.9746365030788747, + "learning_rate": 1.490990701938537e-05, + "loss": 0.2975, + "step": 20634 + }, + { + "epoch": 0.35656275919270114, + "grad_norm": 0.5740481414641251, + "learning_rate": 1.4909419464716004e-05, + "loss": 0.7482, + "step": 20635 + }, + { + "epoch": 0.35658003870611005, + "grad_norm": 1.33838054403668, + "learning_rate": 1.4908931894669962e-05, + "loss": 0.549, + "step": 20636 + }, + { + "epoch": 0.35659731821951896, + "grad_norm": 1.2299112623344801, + "learning_rate": 1.4908444309248773e-05, + "loss": 0.4507, + "step": 20637 + }, + { + "epoch": 0.35661459773292786, + "grad_norm": 1.3414797577389173, + "learning_rate": 1.4907956708453966e-05, + "loss": 0.479, + "step": 20638 + }, + { + "epoch": 0.35663187724633677, + "grad_norm": 0.6529340359456858, + "learning_rate": 1.4907469092287061e-05, + "loss": 0.4044, + "step": 20639 + }, + { + "epoch": 0.3566491567597456, + "grad_norm": 0.7095778502625733, + "learning_rate": 1.4906981460749595e-05, + "loss": 0.3266, + "step": 20640 + }, + { + "epoch": 0.35666643627315453, + "grad_norm": 1.0470944986201929, + "learning_rate": 1.4906493813843086e-05, + "loss": 0.5509, + "step": 20641 + }, + { + "epoch": 0.35668371578656344, + "grad_norm": 1.1637838527630857, + "learning_rate": 1.4906006151569067e-05, + "loss": 0.4425, + "step": 20642 + }, + { + "epoch": 0.35670099529997235, + "grad_norm": 0.7409237538244646, + "learning_rate": 1.4905518473929067e-05, + "loss": 0.3168, + "step": 20643 + }, + { + "epoch": 0.35671827481338125, + "grad_norm": 1.2601614713720444, + "learning_rate": 1.4905030780924605e-05, + "loss": 0.3928, + "step": 20644 + }, + { + "epoch": 0.35673555432679016, + "grad_norm": 0.7436185110381652, + "learning_rate": 1.4904543072557215e-05, + "loss": 0.364, + "step": 20645 + }, + { + "epoch": 0.35675283384019907, + "grad_norm": 0.49644646511450324, + "learning_rate": 1.4904055348828426e-05, + "loss": 0.7809, + "step": 20646 + }, + { + "epoch": 0.356770113353608, + "grad_norm": 1.9414918058552364, + "learning_rate": 1.490356760973976e-05, + "loss": 0.5242, + "step": 20647 + }, + { + "epoch": 0.3567873928670169, + "grad_norm": 0.9765926049502368, + "learning_rate": 1.490307985529275e-05, + "loss": 0.677, + "step": 20648 + }, + { + "epoch": 0.3568046723804258, + "grad_norm": 1.19661351027201, + "learning_rate": 1.4902592085488921e-05, + "loss": 0.5755, + "step": 20649 + }, + { + "epoch": 0.35682195189383464, + "grad_norm": 0.8739380240624083, + "learning_rate": 1.49021043003298e-05, + "loss": 0.4642, + "step": 20650 + }, + { + "epoch": 0.35683923140724355, + "grad_norm": 1.2042291294359226, + "learning_rate": 1.4901616499816917e-05, + "loss": 0.4073, + "step": 20651 + }, + { + "epoch": 0.35685651092065246, + "grad_norm": 0.9332056099949175, + "learning_rate": 1.4901128683951797e-05, + "loss": 0.556, + "step": 20652 + }, + { + "epoch": 0.35687379043406137, + "grad_norm": 0.9250760566623611, + "learning_rate": 1.490064085273597e-05, + "loss": 0.6356, + "step": 20653 + }, + { + "epoch": 0.3568910699474703, + "grad_norm": 1.8963531205266042, + "learning_rate": 1.4900153006170964e-05, + "loss": 0.5737, + "step": 20654 + }, + { + "epoch": 0.3569083494608792, + "grad_norm": 0.9405568322410506, + "learning_rate": 1.4899665144258307e-05, + "loss": 0.5973, + "step": 20655 + }, + { + "epoch": 0.3569256289742881, + "grad_norm": 1.328588999469816, + "learning_rate": 1.4899177266999523e-05, + "loss": 0.4269, + "step": 20656 + }, + { + "epoch": 0.356942908487697, + "grad_norm": 1.2184297001422795, + "learning_rate": 1.4898689374396147e-05, + "loss": 0.6305, + "step": 20657 + }, + { + "epoch": 0.3569601880011059, + "grad_norm": 1.0064308059567437, + "learning_rate": 1.4898201466449702e-05, + "loss": 0.4734, + "step": 20658 + }, + { + "epoch": 0.3569774675145148, + "grad_norm": 0.7258301669164287, + "learning_rate": 1.4897713543161717e-05, + "loss": 0.2585, + "step": 20659 + }, + { + "epoch": 0.3569947470279237, + "grad_norm": 1.0903255818081214, + "learning_rate": 1.4897225604533725e-05, + "loss": 0.3998, + "step": 20660 + }, + { + "epoch": 0.35701202654133257, + "grad_norm": 0.8096140252517795, + "learning_rate": 1.4896737650567245e-05, + "loss": 0.5785, + "step": 20661 + }, + { + "epoch": 0.3570293060547415, + "grad_norm": 1.1047997238265523, + "learning_rate": 1.4896249681263814e-05, + "loss": 0.3994, + "step": 20662 + }, + { + "epoch": 0.3570465855681504, + "grad_norm": 1.8369889895839202, + "learning_rate": 1.4895761696624957e-05, + "loss": 0.5159, + "step": 20663 + }, + { + "epoch": 0.3570638650815593, + "grad_norm": 0.9704681197967056, + "learning_rate": 1.4895273696652199e-05, + "loss": 0.4958, + "step": 20664 + }, + { + "epoch": 0.3570811445949682, + "grad_norm": 0.9976794186683008, + "learning_rate": 1.4894785681347074e-05, + "loss": 0.4083, + "step": 20665 + }, + { + "epoch": 0.3570984241083771, + "grad_norm": 1.000706429905043, + "learning_rate": 1.489429765071111e-05, + "loss": 0.3651, + "step": 20666 + }, + { + "epoch": 0.357115703621786, + "grad_norm": 1.0868144587766342, + "learning_rate": 1.489380960474583e-05, + "loss": 0.3799, + "step": 20667 + }, + { + "epoch": 0.3571329831351949, + "grad_norm": 1.418745856639499, + "learning_rate": 1.4893321543452767e-05, + "loss": 0.443, + "step": 20668 + }, + { + "epoch": 0.35715026264860383, + "grad_norm": 0.542594429448805, + "learning_rate": 1.4892833466833452e-05, + "loss": 0.3846, + "step": 20669 + }, + { + "epoch": 0.35716754216201274, + "grad_norm": 1.365485642181616, + "learning_rate": 1.4892345374889408e-05, + "loss": 0.4875, + "step": 20670 + }, + { + "epoch": 0.3571848216754216, + "grad_norm": 1.8095851392216873, + "learning_rate": 1.4891857267622172e-05, + "loss": 0.8216, + "step": 20671 + }, + { + "epoch": 0.3572021011888305, + "grad_norm": 0.8745990069972194, + "learning_rate": 1.4891369145033263e-05, + "loss": 0.3297, + "step": 20672 + }, + { + "epoch": 0.3572193807022394, + "grad_norm": 0.8917012422927575, + "learning_rate": 1.4890881007124211e-05, + "loss": 0.7581, + "step": 20673 + }, + { + "epoch": 0.3572366602156483, + "grad_norm": 0.9540714068927854, + "learning_rate": 1.4890392853896552e-05, + "loss": 0.4497, + "step": 20674 + }, + { + "epoch": 0.3572539397290572, + "grad_norm": 1.500621797933716, + "learning_rate": 1.488990468535181e-05, + "loss": 0.5128, + "step": 20675 + }, + { + "epoch": 0.35727121924246613, + "grad_norm": 1.3330883217279184, + "learning_rate": 1.4889416501491513e-05, + "loss": 0.5141, + "step": 20676 + }, + { + "epoch": 0.35728849875587504, + "grad_norm": 0.7977436864370223, + "learning_rate": 1.4888928302317196e-05, + "loss": 0.342, + "step": 20677 + }, + { + "epoch": 0.35730577826928395, + "grad_norm": 0.9953278729641076, + "learning_rate": 1.4888440087830382e-05, + "loss": 0.5859, + "step": 20678 + }, + { + "epoch": 0.35732305778269285, + "grad_norm": 0.8868124777073632, + "learning_rate": 1.4887951858032602e-05, + "loss": 0.3262, + "step": 20679 + }, + { + "epoch": 0.35734033729610176, + "grad_norm": 0.8848662100575857, + "learning_rate": 1.4887463612925383e-05, + "loss": 0.4075, + "step": 20680 + }, + { + "epoch": 0.35735761680951067, + "grad_norm": 1.1776490842951317, + "learning_rate": 1.488697535251026e-05, + "loss": 0.3212, + "step": 20681 + }, + { + "epoch": 0.3573748963229195, + "grad_norm": 0.9465376607050144, + "learning_rate": 1.4886487076788758e-05, + "loss": 0.6595, + "step": 20682 + }, + { + "epoch": 0.35739217583632843, + "grad_norm": 0.6947471447578505, + "learning_rate": 1.4885998785762408e-05, + "loss": 0.3646, + "step": 20683 + }, + { + "epoch": 0.35740945534973734, + "grad_norm": 0.8717080646031824, + "learning_rate": 1.4885510479432734e-05, + "loss": 0.5288, + "step": 20684 + }, + { + "epoch": 0.35742673486314624, + "grad_norm": 0.7240626047668524, + "learning_rate": 1.4885022157801275e-05, + "loss": 0.4195, + "step": 20685 + }, + { + "epoch": 0.35744401437655515, + "grad_norm": 1.0467355858896903, + "learning_rate": 1.4884533820869553e-05, + "loss": 0.5983, + "step": 20686 + }, + { + "epoch": 0.35746129388996406, + "grad_norm": 0.9747335823772302, + "learning_rate": 1.48840454686391e-05, + "loss": 0.5535, + "step": 20687 + }, + { + "epoch": 0.35747857340337297, + "grad_norm": 0.7336004935463678, + "learning_rate": 1.4883557101111446e-05, + "loss": 0.4943, + "step": 20688 + }, + { + "epoch": 0.3574958529167819, + "grad_norm": 1.0044729865914317, + "learning_rate": 1.4883068718288121e-05, + "loss": 0.5071, + "step": 20689 + }, + { + "epoch": 0.3575131324301908, + "grad_norm": 1.1376099903777708, + "learning_rate": 1.488258032017065e-05, + "loss": 0.5979, + "step": 20690 + }, + { + "epoch": 0.3575304119435997, + "grad_norm": 1.275160942933061, + "learning_rate": 1.4882091906760572e-05, + "loss": 0.3982, + "step": 20691 + }, + { + "epoch": 0.35754769145700854, + "grad_norm": 0.9538436854481988, + "learning_rate": 1.4881603478059408e-05, + "loss": 0.3546, + "step": 20692 + }, + { + "epoch": 0.35756497097041745, + "grad_norm": 0.9995597914238475, + "learning_rate": 1.488111503406869e-05, + "loss": 0.4516, + "step": 20693 + }, + { + "epoch": 0.35758225048382636, + "grad_norm": 0.6749304190736367, + "learning_rate": 1.488062657478995e-05, + "loss": 0.3744, + "step": 20694 + }, + { + "epoch": 0.35759952999723527, + "grad_norm": 0.8148096354193417, + "learning_rate": 1.4880138100224715e-05, + "loss": 0.4529, + "step": 20695 + }, + { + "epoch": 0.3576168095106442, + "grad_norm": 0.7595738928095747, + "learning_rate": 1.4879649610374518e-05, + "loss": 0.6454, + "step": 20696 + }, + { + "epoch": 0.3576340890240531, + "grad_norm": 0.9110952200766234, + "learning_rate": 1.4879161105240887e-05, + "loss": 0.3602, + "step": 20697 + }, + { + "epoch": 0.357651368537462, + "grad_norm": 0.9652785277158522, + "learning_rate": 1.4878672584825354e-05, + "loss": 0.3628, + "step": 20698 + }, + { + "epoch": 0.3576686480508709, + "grad_norm": 0.8338504208929226, + "learning_rate": 1.4878184049129445e-05, + "loss": 0.3592, + "step": 20699 + }, + { + "epoch": 0.3576859275642798, + "grad_norm": 0.7858816173450749, + "learning_rate": 1.4877695498154694e-05, + "loss": 0.4271, + "step": 20700 + }, + { + "epoch": 0.3577032070776887, + "grad_norm": 1.6879799680470067, + "learning_rate": 1.487720693190263e-05, + "loss": 0.5245, + "step": 20701 + }, + { + "epoch": 0.3577204865910976, + "grad_norm": 1.0220705632325369, + "learning_rate": 1.4876718350374786e-05, + "loss": 0.5148, + "step": 20702 + }, + { + "epoch": 0.35773776610450647, + "grad_norm": 0.9484615405007203, + "learning_rate": 1.4876229753572688e-05, + "loss": 0.3988, + "step": 20703 + }, + { + "epoch": 0.3577550456179154, + "grad_norm": 0.9372281079959202, + "learning_rate": 1.4875741141497866e-05, + "loss": 0.4104, + "step": 20704 + }, + { + "epoch": 0.3577723251313243, + "grad_norm": 1.6821315176051694, + "learning_rate": 1.4875252514151852e-05, + "loss": 0.4722, + "step": 20705 + }, + { + "epoch": 0.3577896046447332, + "grad_norm": 0.44736944008935514, + "learning_rate": 1.487476387153618e-05, + "loss": 0.6838, + "step": 20706 + }, + { + "epoch": 0.3578068841581421, + "grad_norm": 1.1093697548832409, + "learning_rate": 1.4874275213652374e-05, + "loss": 0.4376, + "step": 20707 + }, + { + "epoch": 0.357824163671551, + "grad_norm": 0.8412405763193131, + "learning_rate": 1.487378654050197e-05, + "loss": 0.4872, + "step": 20708 + }, + { + "epoch": 0.3578414431849599, + "grad_norm": 0.46841311220055404, + "learning_rate": 1.4873297852086492e-05, + "loss": 0.5569, + "step": 20709 + }, + { + "epoch": 0.3578587226983688, + "grad_norm": 1.5178955545602435, + "learning_rate": 1.487280914840748e-05, + "loss": 0.4549, + "step": 20710 + }, + { + "epoch": 0.35787600221177773, + "grad_norm": 0.759693608968993, + "learning_rate": 1.4872320429466456e-05, + "loss": 0.3637, + "step": 20711 + }, + { + "epoch": 0.35789328172518664, + "grad_norm": 0.8307512440938579, + "learning_rate": 1.4871831695264955e-05, + "loss": 0.4041, + "step": 20712 + }, + { + "epoch": 0.35791056123859555, + "grad_norm": 1.100029418675344, + "learning_rate": 1.4871342945804508e-05, + "loss": 0.5947, + "step": 20713 + }, + { + "epoch": 0.3579278407520044, + "grad_norm": 0.8011370634674664, + "learning_rate": 1.4870854181086643e-05, + "loss": 0.3899, + "step": 20714 + }, + { + "epoch": 0.3579451202654133, + "grad_norm": 1.032571511529072, + "learning_rate": 1.4870365401112894e-05, + "loss": 0.591, + "step": 20715 + }, + { + "epoch": 0.3579623997788222, + "grad_norm": 0.8602168906911564, + "learning_rate": 1.4869876605884787e-05, + "loss": 0.34, + "step": 20716 + }, + { + "epoch": 0.3579796792922311, + "grad_norm": 0.8787457729790598, + "learning_rate": 1.486938779540386e-05, + "loss": 0.47, + "step": 20717 + }, + { + "epoch": 0.35799695880564003, + "grad_norm": 0.7926408410524295, + "learning_rate": 1.4868898969671641e-05, + "loss": 0.4475, + "step": 20718 + }, + { + "epoch": 0.35801423831904894, + "grad_norm": 1.1131147814373545, + "learning_rate": 1.4868410128689657e-05, + "loss": 0.8456, + "step": 20719 + }, + { + "epoch": 0.35803151783245785, + "grad_norm": 0.9266857430998606, + "learning_rate": 1.4867921272459445e-05, + "loss": 0.5682, + "step": 20720 + }, + { + "epoch": 0.35804879734586675, + "grad_norm": 0.7926260431568402, + "learning_rate": 1.4867432400982533e-05, + "loss": 0.559, + "step": 20721 + }, + { + "epoch": 0.35806607685927566, + "grad_norm": 1.4618033184488586, + "learning_rate": 1.4866943514260452e-05, + "loss": 0.4922, + "step": 20722 + }, + { + "epoch": 0.35808335637268457, + "grad_norm": 0.46367755462009336, + "learning_rate": 1.4866454612294737e-05, + "loss": 0.6358, + "step": 20723 + }, + { + "epoch": 0.3581006358860934, + "grad_norm": 0.8958632300539895, + "learning_rate": 1.4865965695086913e-05, + "loss": 0.414, + "step": 20724 + }, + { + "epoch": 0.35811791539950233, + "grad_norm": 1.3456164502772658, + "learning_rate": 1.4865476762638516e-05, + "loss": 0.4549, + "step": 20725 + }, + { + "epoch": 0.35813519491291124, + "grad_norm": 1.1293147703684048, + "learning_rate": 1.4864987814951075e-05, + "loss": 0.7718, + "step": 20726 + }, + { + "epoch": 0.35815247442632014, + "grad_norm": 0.8155667596961159, + "learning_rate": 1.4864498852026121e-05, + "loss": 0.5557, + "step": 20727 + }, + { + "epoch": 0.35816975393972905, + "grad_norm": 0.6628765820176348, + "learning_rate": 1.486400987386519e-05, + "loss": 0.3587, + "step": 20728 + }, + { + "epoch": 0.35818703345313796, + "grad_norm": 1.876901415199191, + "learning_rate": 1.486352088046981e-05, + "loss": 0.6507, + "step": 20729 + }, + { + "epoch": 0.35820431296654687, + "grad_norm": 0.9518520858233516, + "learning_rate": 1.4863031871841514e-05, + "loss": 0.3357, + "step": 20730 + }, + { + "epoch": 0.3582215924799558, + "grad_norm": 1.1188815987674232, + "learning_rate": 1.486254284798183e-05, + "loss": 0.395, + "step": 20731 + }, + { + "epoch": 0.3582388719933647, + "grad_norm": 0.6396384778203497, + "learning_rate": 1.4862053808892296e-05, + "loss": 0.5528, + "step": 20732 + }, + { + "epoch": 0.3582561515067736, + "grad_norm": 1.323374223402587, + "learning_rate": 1.4861564754574436e-05, + "loss": 0.5148, + "step": 20733 + }, + { + "epoch": 0.3582734310201825, + "grad_norm": 0.9132912753504587, + "learning_rate": 1.4861075685029787e-05, + "loss": 0.2907, + "step": 20734 + }, + { + "epoch": 0.35829071053359135, + "grad_norm": 0.7305766007433063, + "learning_rate": 1.4860586600259881e-05, + "loss": 0.3641, + "step": 20735 + }, + { + "epoch": 0.35830799004700026, + "grad_norm": 0.9995932893494163, + "learning_rate": 1.4860097500266245e-05, + "loss": 0.5387, + "step": 20736 + }, + { + "epoch": 0.35832526956040917, + "grad_norm": 1.329964854234184, + "learning_rate": 1.4859608385050416e-05, + "loss": 0.6568, + "step": 20737 + }, + { + "epoch": 0.3583425490738181, + "grad_norm": 1.1989921330253086, + "learning_rate": 1.4859119254613925e-05, + "loss": 0.5729, + "step": 20738 + }, + { + "epoch": 0.358359828587227, + "grad_norm": 0.8241451150029424, + "learning_rate": 1.48586301089583e-05, + "loss": 0.456, + "step": 20739 + }, + { + "epoch": 0.3583771081006359, + "grad_norm": 0.9716909923071008, + "learning_rate": 1.4858140948085081e-05, + "loss": 0.6862, + "step": 20740 + }, + { + "epoch": 0.3583943876140448, + "grad_norm": 1.2514721060365863, + "learning_rate": 1.4857651771995794e-05, + "loss": 0.4897, + "step": 20741 + }, + { + "epoch": 0.3584116671274537, + "grad_norm": 0.7047482055776526, + "learning_rate": 1.4857162580691968e-05, + "loss": 0.6347, + "step": 20742 + }, + { + "epoch": 0.3584289466408626, + "grad_norm": 0.8692523654864603, + "learning_rate": 1.4856673374175148e-05, + "loss": 0.2633, + "step": 20743 + }, + { + "epoch": 0.3584462261542715, + "grad_norm": 1.4267526930672145, + "learning_rate": 1.4856184152446852e-05, + "loss": 0.4509, + "step": 20744 + }, + { + "epoch": 0.35846350566768037, + "grad_norm": 0.8437927782594253, + "learning_rate": 1.4855694915508618e-05, + "loss": 0.4447, + "step": 20745 + }, + { + "epoch": 0.3584807851810893, + "grad_norm": 1.6451901479593571, + "learning_rate": 1.4855205663361982e-05, + "loss": 0.7203, + "step": 20746 + }, + { + "epoch": 0.3584980646944982, + "grad_norm": 0.8909916567983263, + "learning_rate": 1.4854716396008471e-05, + "loss": 0.4396, + "step": 20747 + }, + { + "epoch": 0.3585153442079071, + "grad_norm": 0.7516563217433903, + "learning_rate": 1.4854227113449617e-05, + "loss": 0.3771, + "step": 20748 + }, + { + "epoch": 0.358532623721316, + "grad_norm": 0.777153840510611, + "learning_rate": 1.4853737815686958e-05, + "loss": 0.4693, + "step": 20749 + }, + { + "epoch": 0.3585499032347249, + "grad_norm": 0.8090618285790423, + "learning_rate": 1.4853248502722021e-05, + "loss": 0.4824, + "step": 20750 + }, + { + "epoch": 0.3585671827481338, + "grad_norm": 1.251529123646651, + "learning_rate": 1.4852759174556344e-05, + "loss": 0.4639, + "step": 20751 + }, + { + "epoch": 0.3585844622615427, + "grad_norm": 0.8464963375889939, + "learning_rate": 1.4852269831191453e-05, + "loss": 0.6509, + "step": 20752 + }, + { + "epoch": 0.35860174177495163, + "grad_norm": 1.7822069447585158, + "learning_rate": 1.4851780472628887e-05, + "loss": 0.5574, + "step": 20753 + }, + { + "epoch": 0.35861902128836054, + "grad_norm": 1.42601496610187, + "learning_rate": 1.4851291098870175e-05, + "loss": 0.5806, + "step": 20754 + }, + { + "epoch": 0.35863630080176945, + "grad_norm": 0.8904707940960339, + "learning_rate": 1.485080170991685e-05, + "loss": 0.554, + "step": 20755 + }, + { + "epoch": 0.3586535803151783, + "grad_norm": 0.959106116314052, + "learning_rate": 1.4850312305770444e-05, + "loss": 0.5429, + "step": 20756 + }, + { + "epoch": 0.3586708598285872, + "grad_norm": 1.406836185419696, + "learning_rate": 1.4849822886432495e-05, + "loss": 0.5611, + "step": 20757 + }, + { + "epoch": 0.3586881393419961, + "grad_norm": 1.1958135572324644, + "learning_rate": 1.4849333451904531e-05, + "loss": 0.4039, + "step": 20758 + }, + { + "epoch": 0.358705418855405, + "grad_norm": 1.0661900111827816, + "learning_rate": 1.484884400218808e-05, + "loss": 0.3097, + "step": 20759 + }, + { + "epoch": 0.35872269836881393, + "grad_norm": 0.9600018929662625, + "learning_rate": 1.4848354537284688e-05, + "loss": 0.3058, + "step": 20760 + }, + { + "epoch": 0.35873997788222284, + "grad_norm": 0.8565284996727662, + "learning_rate": 1.4847865057195881e-05, + "loss": 0.4058, + "step": 20761 + }, + { + "epoch": 0.35875725739563175, + "grad_norm": 0.47449813208221814, + "learning_rate": 1.4847375561923188e-05, + "loss": 0.5792, + "step": 20762 + }, + { + "epoch": 0.35877453690904065, + "grad_norm": 1.1090420264850307, + "learning_rate": 1.4846886051468153e-05, + "loss": 0.4945, + "step": 20763 + }, + { + "epoch": 0.35879181642244956, + "grad_norm": 1.1372067410391824, + "learning_rate": 1.4846396525832294e-05, + "loss": 0.3361, + "step": 20764 + }, + { + "epoch": 0.35880909593585847, + "grad_norm": 0.8515828004340278, + "learning_rate": 1.484590698501716e-05, + "loss": 0.4279, + "step": 20765 + }, + { + "epoch": 0.3588263754492674, + "grad_norm": 0.874780100640578, + "learning_rate": 1.4845417429024273e-05, + "loss": 0.5584, + "step": 20766 + }, + { + "epoch": 0.35884365496267623, + "grad_norm": 0.9773390085229707, + "learning_rate": 1.4844927857855172e-05, + "loss": 0.493, + "step": 20767 + }, + { + "epoch": 0.35886093447608514, + "grad_norm": 1.137171053671914, + "learning_rate": 1.4844438271511387e-05, + "loss": 0.435, + "step": 20768 + }, + { + "epoch": 0.35887821398949404, + "grad_norm": 0.9032521604812158, + "learning_rate": 1.4843948669994455e-05, + "loss": 0.603, + "step": 20769 + }, + { + "epoch": 0.35889549350290295, + "grad_norm": 0.7497873873285781, + "learning_rate": 1.4843459053305907e-05, + "loss": 0.5543, + "step": 20770 + }, + { + "epoch": 0.35891277301631186, + "grad_norm": 1.2921555415166888, + "learning_rate": 1.4842969421447277e-05, + "loss": 0.4288, + "step": 20771 + }, + { + "epoch": 0.35893005252972077, + "grad_norm": 1.015151604165804, + "learning_rate": 1.48424797744201e-05, + "loss": 0.523, + "step": 20772 + }, + { + "epoch": 0.3589473320431297, + "grad_norm": 1.6622115400057718, + "learning_rate": 1.4841990112225903e-05, + "loss": 0.5622, + "step": 20773 + }, + { + "epoch": 0.3589646115565386, + "grad_norm": 1.3246862919246754, + "learning_rate": 1.4841500434866234e-05, + "loss": 0.4702, + "step": 20774 + }, + { + "epoch": 0.3589818910699475, + "grad_norm": 0.7291510599202525, + "learning_rate": 1.4841010742342614e-05, + "loss": 0.4885, + "step": 20775 + }, + { + "epoch": 0.3589991705833564, + "grad_norm": 0.7978261569278309, + "learning_rate": 1.4840521034656577e-05, + "loss": 0.5824, + "step": 20776 + }, + { + "epoch": 0.35901645009676525, + "grad_norm": 1.0663491639376992, + "learning_rate": 1.4840031311809664e-05, + "loss": 0.3786, + "step": 20777 + }, + { + "epoch": 0.35903372961017416, + "grad_norm": 0.5678208057187694, + "learning_rate": 1.4839541573803401e-05, + "loss": 0.6107, + "step": 20778 + }, + { + "epoch": 0.35905100912358306, + "grad_norm": 1.0966941135482366, + "learning_rate": 1.483905182063933e-05, + "loss": 0.3855, + "step": 20779 + }, + { + "epoch": 0.359068288636992, + "grad_norm": 1.272815855718021, + "learning_rate": 1.483856205231898e-05, + "loss": 0.5773, + "step": 20780 + }, + { + "epoch": 0.3590855681504009, + "grad_norm": 1.0414277694860827, + "learning_rate": 1.4838072268843886e-05, + "loss": 0.3921, + "step": 20781 + }, + { + "epoch": 0.3591028476638098, + "grad_norm": 0.5571519505745379, + "learning_rate": 1.483758247021558e-05, + "loss": 0.64, + "step": 20782 + }, + { + "epoch": 0.3591201271772187, + "grad_norm": 1.0715909238320074, + "learning_rate": 1.4837092656435601e-05, + "loss": 0.49, + "step": 20783 + }, + { + "epoch": 0.3591374066906276, + "grad_norm": 1.5202711214734939, + "learning_rate": 1.483660282750548e-05, + "loss": 0.5048, + "step": 20784 + }, + { + "epoch": 0.3591546862040365, + "grad_norm": 0.9336603289506261, + "learning_rate": 1.4836112983426749e-05, + "loss": 0.5336, + "step": 20785 + }, + { + "epoch": 0.3591719657174454, + "grad_norm": 2.1215374213841995, + "learning_rate": 1.4835623124200946e-05, + "loss": 0.5841, + "step": 20786 + }, + { + "epoch": 0.3591892452308543, + "grad_norm": 0.8200751809965575, + "learning_rate": 1.4835133249829603e-05, + "loss": 0.4823, + "step": 20787 + }, + { + "epoch": 0.3592065247442632, + "grad_norm": 1.3279974772043674, + "learning_rate": 1.4834643360314256e-05, + "loss": 0.4556, + "step": 20788 + }, + { + "epoch": 0.3592238042576721, + "grad_norm": 0.7871601198745541, + "learning_rate": 1.483415345565644e-05, + "loss": 0.4245, + "step": 20789 + }, + { + "epoch": 0.359241083771081, + "grad_norm": 1.0524954535286792, + "learning_rate": 1.4833663535857685e-05, + "loss": 0.4601, + "step": 20790 + }, + { + "epoch": 0.3592583632844899, + "grad_norm": 0.8111792893392049, + "learning_rate": 1.483317360091953e-05, + "loss": 0.4239, + "step": 20791 + }, + { + "epoch": 0.3592756427978988, + "grad_norm": 0.9392602375563233, + "learning_rate": 1.483268365084351e-05, + "loss": 0.3838, + "step": 20792 + }, + { + "epoch": 0.3592929223113077, + "grad_norm": 1.8462094371343476, + "learning_rate": 1.4832193685631154e-05, + "loss": 0.5816, + "step": 20793 + }, + { + "epoch": 0.3593102018247166, + "grad_norm": 0.9399342285380888, + "learning_rate": 1.4831703705284003e-05, + "loss": 0.4669, + "step": 20794 + }, + { + "epoch": 0.35932748133812553, + "grad_norm": 1.796224222061458, + "learning_rate": 1.4831213709803586e-05, + "loss": 0.5833, + "step": 20795 + }, + { + "epoch": 0.35934476085153444, + "grad_norm": 0.8040733087094488, + "learning_rate": 1.4830723699191442e-05, + "loss": 0.3693, + "step": 20796 + }, + { + "epoch": 0.35936204036494335, + "grad_norm": 0.48558047880922955, + "learning_rate": 1.4830233673449104e-05, + "loss": 1.0922, + "step": 20797 + }, + { + "epoch": 0.3593793198783522, + "grad_norm": 0.6152255670575141, + "learning_rate": 1.4829743632578109e-05, + "loss": 0.3924, + "step": 20798 + }, + { + "epoch": 0.3593965993917611, + "grad_norm": 1.2857951234128515, + "learning_rate": 1.4829253576579988e-05, + "loss": 0.439, + "step": 20799 + }, + { + "epoch": 0.35941387890517, + "grad_norm": 1.2602822745793534, + "learning_rate": 1.4828763505456276e-05, + "loss": 0.4423, + "step": 20800 + }, + { + "epoch": 0.3594311584185789, + "grad_norm": 0.5855671981260738, + "learning_rate": 1.4828273419208513e-05, + "loss": 0.339, + "step": 20801 + }, + { + "epoch": 0.35944843793198783, + "grad_norm": 0.7699534254767267, + "learning_rate": 1.482778331783823e-05, + "loss": 0.5308, + "step": 20802 + }, + { + "epoch": 0.35946571744539674, + "grad_norm": 0.9652762432801417, + "learning_rate": 1.4827293201346962e-05, + "loss": 0.4556, + "step": 20803 + }, + { + "epoch": 0.35948299695880565, + "grad_norm": 1.824420324176112, + "learning_rate": 1.4826803069736245e-05, + "loss": 0.5161, + "step": 20804 + }, + { + "epoch": 0.35950027647221455, + "grad_norm": 1.0611013635905275, + "learning_rate": 1.4826312923007613e-05, + "loss": 0.4197, + "step": 20805 + }, + { + "epoch": 0.35951755598562346, + "grad_norm": 1.217595039291246, + "learning_rate": 1.4825822761162605e-05, + "loss": 0.5935, + "step": 20806 + }, + { + "epoch": 0.35953483549903237, + "grad_norm": 1.02736879083315, + "learning_rate": 1.4825332584202748e-05, + "loss": 0.4785, + "step": 20807 + }, + { + "epoch": 0.3595521150124413, + "grad_norm": 1.4180907886444492, + "learning_rate": 1.4824842392129589e-05, + "loss": 0.4267, + "step": 20808 + }, + { + "epoch": 0.35956939452585013, + "grad_norm": 0.9044601158657657, + "learning_rate": 1.4824352184944652e-05, + "loss": 0.3775, + "step": 20809 + }, + { + "epoch": 0.35958667403925904, + "grad_norm": 1.030487101451847, + "learning_rate": 1.4823861962649479e-05, + "loss": 0.4965, + "step": 20810 + }, + { + "epoch": 0.35960395355266794, + "grad_norm": 0.9109905476067427, + "learning_rate": 1.4823371725245603e-05, + "loss": 0.5065, + "step": 20811 + }, + { + "epoch": 0.35962123306607685, + "grad_norm": 0.8567820083648889, + "learning_rate": 1.4822881472734563e-05, + "loss": 0.3768, + "step": 20812 + }, + { + "epoch": 0.35963851257948576, + "grad_norm": 0.4309893130144156, + "learning_rate": 1.4822391205117887e-05, + "loss": 0.8666, + "step": 20813 + }, + { + "epoch": 0.35965579209289467, + "grad_norm": 1.8768889445706436, + "learning_rate": 1.482190092239712e-05, + "loss": 0.7062, + "step": 20814 + }, + { + "epoch": 0.3596730716063036, + "grad_norm": 1.3498700130932217, + "learning_rate": 1.4821410624573793e-05, + "loss": 0.5759, + "step": 20815 + }, + { + "epoch": 0.3596903511197125, + "grad_norm": 1.1979154214668755, + "learning_rate": 1.4820920311649437e-05, + "loss": 0.4516, + "step": 20816 + }, + { + "epoch": 0.3597076306331214, + "grad_norm": 1.0823928576493327, + "learning_rate": 1.4820429983625598e-05, + "loss": 0.3575, + "step": 20817 + }, + { + "epoch": 0.3597249101465303, + "grad_norm": 1.033326932801651, + "learning_rate": 1.4819939640503802e-05, + "loss": 0.4292, + "step": 20818 + }, + { + "epoch": 0.35974218965993915, + "grad_norm": 1.042479500213679, + "learning_rate": 1.4819449282285588e-05, + "loss": 0.6818, + "step": 20819 + }, + { + "epoch": 0.35975946917334806, + "grad_norm": 0.7663524165653319, + "learning_rate": 1.4818958908972494e-05, + "loss": 0.4401, + "step": 20820 + }, + { + "epoch": 0.35977674868675696, + "grad_norm": 0.7955094819546762, + "learning_rate": 1.4818468520566056e-05, + "loss": 0.5224, + "step": 20821 + }, + { + "epoch": 0.35979402820016587, + "grad_norm": 0.6422050253349615, + "learning_rate": 1.4817978117067805e-05, + "loss": 0.2901, + "step": 20822 + }, + { + "epoch": 0.3598113077135748, + "grad_norm": 0.8962410905458201, + "learning_rate": 1.4817487698479282e-05, + "loss": 0.3603, + "step": 20823 + }, + { + "epoch": 0.3598285872269837, + "grad_norm": 0.9061088294989562, + "learning_rate": 1.4816997264802022e-05, + "loss": 0.5805, + "step": 20824 + }, + { + "epoch": 0.3598458667403926, + "grad_norm": 0.6946360927425032, + "learning_rate": 1.4816506816037561e-05, + "loss": 0.3126, + "step": 20825 + }, + { + "epoch": 0.3598631462538015, + "grad_norm": 0.887635057883516, + "learning_rate": 1.4816016352187435e-05, + "loss": 0.4265, + "step": 20826 + }, + { + "epoch": 0.3598804257672104, + "grad_norm": 0.8127093721798698, + "learning_rate": 1.4815525873253178e-05, + "loss": 0.4628, + "step": 20827 + }, + { + "epoch": 0.3598977052806193, + "grad_norm": 0.5640653517757865, + "learning_rate": 1.4815035379236329e-05, + "loss": 0.847, + "step": 20828 + }, + { + "epoch": 0.3599149847940282, + "grad_norm": 1.3422012594291246, + "learning_rate": 1.4814544870138423e-05, + "loss": 0.4582, + "step": 20829 + }, + { + "epoch": 0.3599322643074371, + "grad_norm": 1.2275988047690467, + "learning_rate": 1.4814054345960995e-05, + "loss": 0.4854, + "step": 20830 + }, + { + "epoch": 0.359949543820846, + "grad_norm": 0.7714903614137597, + "learning_rate": 1.4813563806705584e-05, + "loss": 0.5341, + "step": 20831 + }, + { + "epoch": 0.3599668233342549, + "grad_norm": 1.0163627088726164, + "learning_rate": 1.4813073252373726e-05, + "loss": 0.512, + "step": 20832 + }, + { + "epoch": 0.3599841028476638, + "grad_norm": 1.094755980311084, + "learning_rate": 1.4812582682966958e-05, + "loss": 0.404, + "step": 20833 + }, + { + "epoch": 0.3600013823610727, + "grad_norm": 0.9477848445077959, + "learning_rate": 1.4812092098486813e-05, + "loss": 0.4955, + "step": 20834 + }, + { + "epoch": 0.3600186618744816, + "grad_norm": 0.9753458584687626, + "learning_rate": 1.481160149893483e-05, + "loss": 0.4897, + "step": 20835 + }, + { + "epoch": 0.3600359413878905, + "grad_norm": 1.359117325473993, + "learning_rate": 1.4811110884312548e-05, + "loss": 0.4526, + "step": 20836 + }, + { + "epoch": 0.36005322090129943, + "grad_norm": 1.0843807631386928, + "learning_rate": 1.4810620254621499e-05, + "loss": 0.3173, + "step": 20837 + }, + { + "epoch": 0.36007050041470834, + "grad_norm": 0.6951284235733273, + "learning_rate": 1.4810129609863222e-05, + "loss": 0.436, + "step": 20838 + }, + { + "epoch": 0.36008777992811725, + "grad_norm": 0.662918961957121, + "learning_rate": 1.4809638950039252e-05, + "loss": 0.3482, + "step": 20839 + }, + { + "epoch": 0.36010505944152615, + "grad_norm": 0.9931311338913293, + "learning_rate": 1.4809148275151128e-05, + "loss": 0.5201, + "step": 20840 + }, + { + "epoch": 0.360122338954935, + "grad_norm": 0.9855386348619989, + "learning_rate": 1.4808657585200388e-05, + "loss": 0.4884, + "step": 20841 + }, + { + "epoch": 0.3601396184683439, + "grad_norm": 0.7290842098311129, + "learning_rate": 1.4808166880188567e-05, + "loss": 0.4591, + "step": 20842 + }, + { + "epoch": 0.3601568979817528, + "grad_norm": 0.7021198259721962, + "learning_rate": 1.48076761601172e-05, + "loss": 0.3658, + "step": 20843 + }, + { + "epoch": 0.36017417749516173, + "grad_norm": 1.1798584568582593, + "learning_rate": 1.4807185424987826e-05, + "loss": 0.5689, + "step": 20844 + }, + { + "epoch": 0.36019145700857064, + "grad_norm": 0.42684978860940803, + "learning_rate": 1.4806694674801981e-05, + "loss": 0.5583, + "step": 20845 + }, + { + "epoch": 0.36020873652197954, + "grad_norm": 0.8717796185902374, + "learning_rate": 1.4806203909561208e-05, + "loss": 0.346, + "step": 20846 + }, + { + "epoch": 0.36022601603538845, + "grad_norm": 0.4339000149850759, + "learning_rate": 1.4805713129267035e-05, + "loss": 0.7198, + "step": 20847 + }, + { + "epoch": 0.36024329554879736, + "grad_norm": 0.6272042338074151, + "learning_rate": 1.4805222333921003e-05, + "loss": 0.2451, + "step": 20848 + }, + { + "epoch": 0.36026057506220627, + "grad_norm": 1.1765882269117633, + "learning_rate": 1.4804731523524651e-05, + "loss": 0.6365, + "step": 20849 + }, + { + "epoch": 0.3602778545756152, + "grad_norm": 0.9898003897464707, + "learning_rate": 1.4804240698079512e-05, + "loss": 0.4011, + "step": 20850 + }, + { + "epoch": 0.360295134089024, + "grad_norm": 1.6669828967486062, + "learning_rate": 1.4803749857587128e-05, + "loss": 0.5234, + "step": 20851 + }, + { + "epoch": 0.36031241360243293, + "grad_norm": 0.6833795647388345, + "learning_rate": 1.4803259002049038e-05, + "loss": 0.3546, + "step": 20852 + }, + { + "epoch": 0.36032969311584184, + "grad_norm": 0.8361761346681803, + "learning_rate": 1.4802768131466772e-05, + "loss": 0.4873, + "step": 20853 + }, + { + "epoch": 0.36034697262925075, + "grad_norm": 1.1215223671392123, + "learning_rate": 1.480227724584187e-05, + "loss": 0.5187, + "step": 20854 + }, + { + "epoch": 0.36036425214265966, + "grad_norm": 1.0653575415531258, + "learning_rate": 1.4801786345175873e-05, + "loss": 0.5994, + "step": 20855 + }, + { + "epoch": 0.36038153165606857, + "grad_norm": 0.8380710763846125, + "learning_rate": 1.4801295429470315e-05, + "loss": 0.4759, + "step": 20856 + }, + { + "epoch": 0.3603988111694775, + "grad_norm": 1.3882473807641236, + "learning_rate": 1.4800804498726735e-05, + "loss": 0.6672, + "step": 20857 + }, + { + "epoch": 0.3604160906828864, + "grad_norm": 1.9458399754151867, + "learning_rate": 1.4800313552946672e-05, + "loss": 0.5696, + "step": 20858 + }, + { + "epoch": 0.3604333701962953, + "grad_norm": 0.525419469415639, + "learning_rate": 1.4799822592131659e-05, + "loss": 0.7015, + "step": 20859 + }, + { + "epoch": 0.3604506497097042, + "grad_norm": 0.9623484764230317, + "learning_rate": 1.479933161628324e-05, + "loss": 0.5445, + "step": 20860 + }, + { + "epoch": 0.3604679292231131, + "grad_norm": 0.5832117741964025, + "learning_rate": 1.4798840625402949e-05, + "loss": 0.3735, + "step": 20861 + }, + { + "epoch": 0.36048520873652196, + "grad_norm": 1.3414342371005894, + "learning_rate": 1.4798349619492323e-05, + "loss": 0.6537, + "step": 20862 + }, + { + "epoch": 0.36050248824993086, + "grad_norm": 1.3087257731801782, + "learning_rate": 1.4797858598552901e-05, + "loss": 0.6462, + "step": 20863 + }, + { + "epoch": 0.36051976776333977, + "grad_norm": 0.7727374658726877, + "learning_rate": 1.4797367562586224e-05, + "loss": 0.457, + "step": 20864 + }, + { + "epoch": 0.3605370472767487, + "grad_norm": 0.7790327819645676, + "learning_rate": 1.4796876511593822e-05, + "loss": 0.6375, + "step": 20865 + }, + { + "epoch": 0.3605543267901576, + "grad_norm": 0.8381901384047812, + "learning_rate": 1.4796385445577245e-05, + "loss": 0.5497, + "step": 20866 + }, + { + "epoch": 0.3605716063035665, + "grad_norm": 0.7652202920520965, + "learning_rate": 1.4795894364538019e-05, + "loss": 0.3552, + "step": 20867 + }, + { + "epoch": 0.3605888858169754, + "grad_norm": 1.4851998369053283, + "learning_rate": 1.4795403268477691e-05, + "loss": 0.4136, + "step": 20868 + }, + { + "epoch": 0.3606061653303843, + "grad_norm": 0.807503690749522, + "learning_rate": 1.4794912157397794e-05, + "loss": 0.4376, + "step": 20869 + }, + { + "epoch": 0.3606234448437932, + "grad_norm": 0.8214597710357465, + "learning_rate": 1.4794421031299866e-05, + "loss": 0.4004, + "step": 20870 + }, + { + "epoch": 0.3606407243572021, + "grad_norm": 0.723410736981108, + "learning_rate": 1.479392989018545e-05, + "loss": 0.4271, + "step": 20871 + }, + { + "epoch": 0.360658003870611, + "grad_norm": 1.1320279211406947, + "learning_rate": 1.4793438734056081e-05, + "loss": 0.4398, + "step": 20872 + }, + { + "epoch": 0.3606752833840199, + "grad_norm": 1.7013905320442932, + "learning_rate": 1.4792947562913296e-05, + "loss": 0.4684, + "step": 20873 + }, + { + "epoch": 0.3606925628974288, + "grad_norm": 1.1194367707565602, + "learning_rate": 1.4792456376758634e-05, + "loss": 0.5262, + "step": 20874 + }, + { + "epoch": 0.3607098424108377, + "grad_norm": 0.8157298392947434, + "learning_rate": 1.4791965175593636e-05, + "loss": 0.4421, + "step": 20875 + }, + { + "epoch": 0.3607271219242466, + "grad_norm": 0.752062183896501, + "learning_rate": 1.479147395941984e-05, + "loss": 0.3395, + "step": 20876 + }, + { + "epoch": 0.3607444014376555, + "grad_norm": 0.4713676476917215, + "learning_rate": 1.4790982728238782e-05, + "loss": 0.7709, + "step": 20877 + }, + { + "epoch": 0.3607616809510644, + "grad_norm": 1.0999196383406276, + "learning_rate": 1.4790491482052006e-05, + "loss": 0.4992, + "step": 20878 + }, + { + "epoch": 0.36077896046447333, + "grad_norm": 1.0015519123820709, + "learning_rate": 1.4790000220861038e-05, + "loss": 0.6382, + "step": 20879 + }, + { + "epoch": 0.36079623997788224, + "grad_norm": 1.0617011300785386, + "learning_rate": 1.4789508944667433e-05, + "loss": 0.4699, + "step": 20880 + }, + { + "epoch": 0.36081351949129115, + "grad_norm": 0.7672566894031769, + "learning_rate": 1.4789017653472719e-05, + "loss": 0.5789, + "step": 20881 + }, + { + "epoch": 0.36083079900470005, + "grad_norm": 0.9091912730020965, + "learning_rate": 1.4788526347278435e-05, + "loss": 0.4616, + "step": 20882 + }, + { + "epoch": 0.3608480785181089, + "grad_norm": 0.8062857847782027, + "learning_rate": 1.4788035026086127e-05, + "loss": 0.3344, + "step": 20883 + }, + { + "epoch": 0.3608653580315178, + "grad_norm": 0.9161206705772459, + "learning_rate": 1.478754368989733e-05, + "loss": 0.3455, + "step": 20884 + }, + { + "epoch": 0.3608826375449267, + "grad_norm": 0.8234222556013557, + "learning_rate": 1.4787052338713576e-05, + "loss": 0.5379, + "step": 20885 + }, + { + "epoch": 0.36089991705833563, + "grad_norm": 0.8548768329965029, + "learning_rate": 1.4786560972536417e-05, + "loss": 0.4583, + "step": 20886 + }, + { + "epoch": 0.36091719657174454, + "grad_norm": 0.6082280408167074, + "learning_rate": 1.4786069591367383e-05, + "loss": 0.4067, + "step": 20887 + }, + { + "epoch": 0.36093447608515344, + "grad_norm": 1.0060171889294827, + "learning_rate": 1.4785578195208015e-05, + "loss": 0.7338, + "step": 20888 + }, + { + "epoch": 0.36095175559856235, + "grad_norm": 0.9806957881553281, + "learning_rate": 1.4785086784059854e-05, + "loss": 0.5327, + "step": 20889 + }, + { + "epoch": 0.36096903511197126, + "grad_norm": 1.1125686933424404, + "learning_rate": 1.4784595357924435e-05, + "loss": 0.5533, + "step": 20890 + }, + { + "epoch": 0.36098631462538017, + "grad_norm": 1.0103042256605652, + "learning_rate": 1.4784103916803301e-05, + "loss": 0.5455, + "step": 20891 + }, + { + "epoch": 0.3610035941387891, + "grad_norm": 1.549935812885402, + "learning_rate": 1.4783612460697989e-05, + "loss": 0.5394, + "step": 20892 + }, + { + "epoch": 0.3610208736521979, + "grad_norm": 1.390206493584713, + "learning_rate": 1.478312098961004e-05, + "loss": 0.5312, + "step": 20893 + }, + { + "epoch": 0.36103815316560683, + "grad_norm": 0.8546410973768801, + "learning_rate": 1.4782629503540991e-05, + "loss": 0.3958, + "step": 20894 + }, + { + "epoch": 0.36105543267901574, + "grad_norm": 0.9813478965114598, + "learning_rate": 1.4782138002492385e-05, + "loss": 0.4734, + "step": 20895 + }, + { + "epoch": 0.36107271219242465, + "grad_norm": 1.2881009841604198, + "learning_rate": 1.478164648646576e-05, + "loss": 0.5726, + "step": 20896 + }, + { + "epoch": 0.36108999170583356, + "grad_norm": 0.7431570552045419, + "learning_rate": 1.4781154955462654e-05, + "loss": 0.8347, + "step": 20897 + }, + { + "epoch": 0.36110727121924246, + "grad_norm": 0.7348299656970714, + "learning_rate": 1.4780663409484608e-05, + "loss": 0.3324, + "step": 20898 + }, + { + "epoch": 0.3611245507326514, + "grad_norm": 1.2611125461910313, + "learning_rate": 1.4780171848533158e-05, + "loss": 0.4919, + "step": 20899 + }, + { + "epoch": 0.3611418302460603, + "grad_norm": 1.5638180315407426, + "learning_rate": 1.4779680272609848e-05, + "loss": 0.4124, + "step": 20900 + }, + { + "epoch": 0.3611591097594692, + "grad_norm": 1.528413755981818, + "learning_rate": 1.4779188681716217e-05, + "loss": 0.3683, + "step": 20901 + }, + { + "epoch": 0.3611763892728781, + "grad_norm": 1.173553000171291, + "learning_rate": 1.4778697075853803e-05, + "loss": 0.377, + "step": 20902 + }, + { + "epoch": 0.361193668786287, + "grad_norm": 1.0725463696421325, + "learning_rate": 1.4778205455024147e-05, + "loss": 0.3755, + "step": 20903 + }, + { + "epoch": 0.36121094829969586, + "grad_norm": 0.7431923298677675, + "learning_rate": 1.4777713819228789e-05, + "loss": 0.6454, + "step": 20904 + }, + { + "epoch": 0.36122822781310476, + "grad_norm": 0.9513179722225983, + "learning_rate": 1.4777222168469264e-05, + "loss": 0.6151, + "step": 20905 + }, + { + "epoch": 0.36124550732651367, + "grad_norm": 1.5236183121918954, + "learning_rate": 1.4776730502747121e-05, + "loss": 0.5533, + "step": 20906 + }, + { + "epoch": 0.3612627868399226, + "grad_norm": 1.6669219258136618, + "learning_rate": 1.4776238822063895e-05, + "loss": 0.3898, + "step": 20907 + }, + { + "epoch": 0.3612800663533315, + "grad_norm": 1.1655677096487642, + "learning_rate": 1.4775747126421124e-05, + "loss": 0.5071, + "step": 20908 + }, + { + "epoch": 0.3612973458667404, + "grad_norm": 1.0632838661624713, + "learning_rate": 1.4775255415820353e-05, + "loss": 0.4842, + "step": 20909 + }, + { + "epoch": 0.3613146253801493, + "grad_norm": 0.7386132643293314, + "learning_rate": 1.4774763690263114e-05, + "loss": 0.3212, + "step": 20910 + }, + { + "epoch": 0.3613319048935582, + "grad_norm": 0.8044550277539639, + "learning_rate": 1.4774271949750956e-05, + "loss": 0.738, + "step": 20911 + }, + { + "epoch": 0.3613491844069671, + "grad_norm": 0.7724169540253408, + "learning_rate": 1.4773780194285414e-05, + "loss": 0.5273, + "step": 20912 + }, + { + "epoch": 0.361366463920376, + "grad_norm": 0.969146791439077, + "learning_rate": 1.4773288423868031e-05, + "loss": 0.3107, + "step": 20913 + }, + { + "epoch": 0.36138374343378493, + "grad_norm": 0.8104554740163816, + "learning_rate": 1.4772796638500343e-05, + "loss": 0.2894, + "step": 20914 + }, + { + "epoch": 0.3614010229471938, + "grad_norm": 0.9740128757920765, + "learning_rate": 1.4772304838183898e-05, + "loss": 0.5561, + "step": 20915 + }, + { + "epoch": 0.3614183024606027, + "grad_norm": 0.7739087839339455, + "learning_rate": 1.4771813022920229e-05, + "loss": 0.515, + "step": 20916 + }, + { + "epoch": 0.3614355819740116, + "grad_norm": 1.2904976409380415, + "learning_rate": 1.4771321192710878e-05, + "loss": 0.4076, + "step": 20917 + }, + { + "epoch": 0.3614528614874205, + "grad_norm": 0.9850600562219638, + "learning_rate": 1.477082934755739e-05, + "loss": 0.6672, + "step": 20918 + }, + { + "epoch": 0.3614701410008294, + "grad_norm": 0.8597970029206109, + "learning_rate": 1.4770337487461298e-05, + "loss": 0.4993, + "step": 20919 + }, + { + "epoch": 0.3614874205142383, + "grad_norm": 1.089064102586451, + "learning_rate": 1.4769845612424145e-05, + "loss": 0.6065, + "step": 20920 + }, + { + "epoch": 0.36150470002764723, + "grad_norm": 0.9268797769664481, + "learning_rate": 1.4769353722447477e-05, + "loss": 0.4008, + "step": 20921 + }, + { + "epoch": 0.36152197954105614, + "grad_norm": 1.0575751790364878, + "learning_rate": 1.4768861817532826e-05, + "loss": 0.3794, + "step": 20922 + }, + { + "epoch": 0.36153925905446505, + "grad_norm": 1.2303847785685567, + "learning_rate": 1.4768369897681742e-05, + "loss": 0.3717, + "step": 20923 + }, + { + "epoch": 0.36155653856787395, + "grad_norm": 0.6220868378540162, + "learning_rate": 1.476787796289576e-05, + "loss": 0.5669, + "step": 20924 + }, + { + "epoch": 0.3615738180812828, + "grad_norm": 0.7820473753108745, + "learning_rate": 1.4767386013176421e-05, + "loss": 0.4941, + "step": 20925 + }, + { + "epoch": 0.3615910975946917, + "grad_norm": 1.0825239766921269, + "learning_rate": 1.4766894048525268e-05, + "loss": 0.6487, + "step": 20926 + }, + { + "epoch": 0.3616083771081006, + "grad_norm": 1.3997931577889904, + "learning_rate": 1.4766402068943842e-05, + "loss": 0.4038, + "step": 20927 + }, + { + "epoch": 0.36162565662150953, + "grad_norm": 1.5371278829740405, + "learning_rate": 1.476591007443368e-05, + "loss": 0.3938, + "step": 20928 + }, + { + "epoch": 0.36164293613491844, + "grad_norm": 0.7137461215534787, + "learning_rate": 1.4765418064996326e-05, + "loss": 0.7027, + "step": 20929 + }, + { + "epoch": 0.36166021564832734, + "grad_norm": 0.6933610838090729, + "learning_rate": 1.4764926040633318e-05, + "loss": 0.4188, + "step": 20930 + }, + { + "epoch": 0.36167749516173625, + "grad_norm": 0.6575830199966821, + "learning_rate": 1.4764434001346203e-05, + "loss": 0.42, + "step": 20931 + }, + { + "epoch": 0.36169477467514516, + "grad_norm": 0.8341357523313702, + "learning_rate": 1.4763941947136517e-05, + "loss": 0.5504, + "step": 20932 + }, + { + "epoch": 0.36171205418855407, + "grad_norm": 1.168240583898527, + "learning_rate": 1.4763449878005804e-05, + "loss": 0.5183, + "step": 20933 + }, + { + "epoch": 0.361729333701963, + "grad_norm": 1.0198663821941816, + "learning_rate": 1.4762957793955601e-05, + "loss": 0.5879, + "step": 20934 + }, + { + "epoch": 0.3617466132153719, + "grad_norm": 0.884622908305971, + "learning_rate": 1.4762465694987456e-05, + "loss": 0.4159, + "step": 20935 + }, + { + "epoch": 0.36176389272878073, + "grad_norm": 1.033901389475161, + "learning_rate": 1.4761973581102906e-05, + "loss": 0.7571, + "step": 20936 + }, + { + "epoch": 0.36178117224218964, + "grad_norm": 0.951734323313489, + "learning_rate": 1.476148145230349e-05, + "loss": 0.4891, + "step": 20937 + }, + { + "epoch": 0.36179845175559855, + "grad_norm": 1.2529999249557817, + "learning_rate": 1.4760989308590757e-05, + "loss": 0.5048, + "step": 20938 + }, + { + "epoch": 0.36181573126900746, + "grad_norm": 1.3911422355103378, + "learning_rate": 1.476049714996624e-05, + "loss": 0.6121, + "step": 20939 + }, + { + "epoch": 0.36183301078241636, + "grad_norm": 0.7943556183999598, + "learning_rate": 1.4760004976431484e-05, + "loss": 0.57, + "step": 20940 + }, + { + "epoch": 0.36185029029582527, + "grad_norm": 0.899530027499673, + "learning_rate": 1.4759512787988033e-05, + "loss": 0.5672, + "step": 20941 + }, + { + "epoch": 0.3618675698092342, + "grad_norm": 1.1852573624185472, + "learning_rate": 1.4759020584637423e-05, + "loss": 0.394, + "step": 20942 + }, + { + "epoch": 0.3618848493226431, + "grad_norm": 0.7348482307826263, + "learning_rate": 1.47585283663812e-05, + "loss": 0.473, + "step": 20943 + }, + { + "epoch": 0.361902128836052, + "grad_norm": 1.0903878445810824, + "learning_rate": 1.4758036133220905e-05, + "loss": 0.3771, + "step": 20944 + }, + { + "epoch": 0.3619194083494609, + "grad_norm": 0.9515915037429395, + "learning_rate": 1.4757543885158078e-05, + "loss": 0.5694, + "step": 20945 + }, + { + "epoch": 0.36193668786286975, + "grad_norm": 1.0455781154708663, + "learning_rate": 1.4757051622194264e-05, + "loss": 0.5903, + "step": 20946 + }, + { + "epoch": 0.36195396737627866, + "grad_norm": 1.1757589520558047, + "learning_rate": 1.4756559344331004e-05, + "loss": 0.5565, + "step": 20947 + }, + { + "epoch": 0.36197124688968757, + "grad_norm": 0.7770223655226995, + "learning_rate": 1.4756067051569836e-05, + "loss": 0.4306, + "step": 20948 + }, + { + "epoch": 0.3619885264030965, + "grad_norm": 1.0937133587659587, + "learning_rate": 1.4755574743912307e-05, + "loss": 0.5647, + "step": 20949 + }, + { + "epoch": 0.3620058059165054, + "grad_norm": 1.1686616535833658, + "learning_rate": 1.4755082421359954e-05, + "loss": 0.4181, + "step": 20950 + }, + { + "epoch": 0.3620230854299143, + "grad_norm": 0.836948708322006, + "learning_rate": 1.4754590083914321e-05, + "loss": 0.4067, + "step": 20951 + }, + { + "epoch": 0.3620403649433232, + "grad_norm": 1.0610140723704695, + "learning_rate": 1.4754097731576953e-05, + "loss": 0.5656, + "step": 20952 + }, + { + "epoch": 0.3620576444567321, + "grad_norm": 0.9499218359328256, + "learning_rate": 1.4753605364349388e-05, + "loss": 0.4189, + "step": 20953 + }, + { + "epoch": 0.362074923970141, + "grad_norm": 0.9017806931622424, + "learning_rate": 1.4753112982233169e-05, + "loss": 0.6078, + "step": 20954 + }, + { + "epoch": 0.3620922034835499, + "grad_norm": 1.493873568323115, + "learning_rate": 1.4752620585229842e-05, + "loss": 0.4507, + "step": 20955 + }, + { + "epoch": 0.36210948299695883, + "grad_norm": 0.97272042953381, + "learning_rate": 1.4752128173340943e-05, + "loss": 0.6295, + "step": 20956 + }, + { + "epoch": 0.3621267625103677, + "grad_norm": 0.9422937245616498, + "learning_rate": 1.4751635746568018e-05, + "loss": 0.4526, + "step": 20957 + }, + { + "epoch": 0.3621440420237766, + "grad_norm": 0.9135663122798056, + "learning_rate": 1.4751143304912609e-05, + "loss": 0.4829, + "step": 20958 + }, + { + "epoch": 0.3621613215371855, + "grad_norm": 1.0091329068096928, + "learning_rate": 1.475065084837626e-05, + "loss": 0.4691, + "step": 20959 + }, + { + "epoch": 0.3621786010505944, + "grad_norm": 0.5895173034109813, + "learning_rate": 1.475015837696051e-05, + "loss": 0.533, + "step": 20960 + }, + { + "epoch": 0.3621958805640033, + "grad_norm": 1.3473085399758773, + "learning_rate": 1.4749665890666904e-05, + "loss": 0.4327, + "step": 20961 + }, + { + "epoch": 0.3622131600774122, + "grad_norm": 1.092406937051651, + "learning_rate": 1.474917338949698e-05, + "loss": 0.4551, + "step": 20962 + }, + { + "epoch": 0.36223043959082113, + "grad_norm": 0.9810977631591318, + "learning_rate": 1.4748680873452288e-05, + "loss": 0.5381, + "step": 20963 + }, + { + "epoch": 0.36224771910423004, + "grad_norm": 1.1935037859552426, + "learning_rate": 1.4748188342534363e-05, + "loss": 0.4215, + "step": 20964 + }, + { + "epoch": 0.36226499861763894, + "grad_norm": 1.5232490359103337, + "learning_rate": 1.4747695796744754e-05, + "loss": 0.4584, + "step": 20965 + }, + { + "epoch": 0.36228227813104785, + "grad_norm": 0.5692828920063517, + "learning_rate": 1.4747203236085001e-05, + "loss": 0.4564, + "step": 20966 + }, + { + "epoch": 0.3622995576444567, + "grad_norm": 1.137434095013084, + "learning_rate": 1.4746710660556645e-05, + "loss": 0.5043, + "step": 20967 + }, + { + "epoch": 0.3623168371578656, + "grad_norm": 0.44830406040679843, + "learning_rate": 1.4746218070161232e-05, + "loss": 0.5893, + "step": 20968 + }, + { + "epoch": 0.3623341166712745, + "grad_norm": 0.6033314833693841, + "learning_rate": 1.4745725464900305e-05, + "loss": 0.577, + "step": 20969 + }, + { + "epoch": 0.3623513961846834, + "grad_norm": 1.0641732057601572, + "learning_rate": 1.4745232844775403e-05, + "loss": 0.3657, + "step": 20970 + }, + { + "epoch": 0.36236867569809234, + "grad_norm": 0.965140804917045, + "learning_rate": 1.474474020978807e-05, + "loss": 0.4336, + "step": 20971 + }, + { + "epoch": 0.36238595521150124, + "grad_norm": 1.276917311207439, + "learning_rate": 1.4744247559939851e-05, + "loss": 0.4584, + "step": 20972 + }, + { + "epoch": 0.36240323472491015, + "grad_norm": 0.8461733214997901, + "learning_rate": 1.474375489523229e-05, + "loss": 0.6239, + "step": 20973 + }, + { + "epoch": 0.36242051423831906, + "grad_norm": 0.8997030288088563, + "learning_rate": 1.4743262215666922e-05, + "loss": 0.5239, + "step": 20974 + }, + { + "epoch": 0.36243779375172797, + "grad_norm": 0.9115671416709651, + "learning_rate": 1.4742769521245301e-05, + "loss": 0.4254, + "step": 20975 + }, + { + "epoch": 0.3624550732651369, + "grad_norm": 0.6794185291183684, + "learning_rate": 1.4742276811968967e-05, + "loss": 0.3521, + "step": 20976 + }, + { + "epoch": 0.3624723527785458, + "grad_norm": 1.1699750425594082, + "learning_rate": 1.4741784087839458e-05, + "loss": 0.4744, + "step": 20977 + }, + { + "epoch": 0.36248963229195463, + "grad_norm": 0.9694705537605876, + "learning_rate": 1.4741291348858322e-05, + "loss": 0.4176, + "step": 20978 + }, + { + "epoch": 0.36250691180536354, + "grad_norm": 1.1414433476441501, + "learning_rate": 1.4740798595027104e-05, + "loss": 0.4138, + "step": 20979 + }, + { + "epoch": 0.36252419131877245, + "grad_norm": 1.1738715883093507, + "learning_rate": 1.4740305826347343e-05, + "loss": 0.4157, + "step": 20980 + }, + { + "epoch": 0.36254147083218136, + "grad_norm": 1.3958531134636272, + "learning_rate": 1.4739813042820581e-05, + "loss": 0.7585, + "step": 20981 + }, + { + "epoch": 0.36255875034559026, + "grad_norm": 1.2803926447627068, + "learning_rate": 1.4739320244448365e-05, + "loss": 0.3564, + "step": 20982 + }, + { + "epoch": 0.36257602985899917, + "grad_norm": 1.0885538997984583, + "learning_rate": 1.4738827431232242e-05, + "loss": 0.4619, + "step": 20983 + }, + { + "epoch": 0.3625933093724081, + "grad_norm": 0.6317102537585922, + "learning_rate": 1.4738334603173748e-05, + "loss": 0.3752, + "step": 20984 + }, + { + "epoch": 0.362610588885817, + "grad_norm": 0.8429969406751353, + "learning_rate": 1.4737841760274429e-05, + "loss": 0.3999, + "step": 20985 + }, + { + "epoch": 0.3626278683992259, + "grad_norm": 1.336813677460136, + "learning_rate": 1.4737348902535833e-05, + "loss": 0.6687, + "step": 20986 + }, + { + "epoch": 0.3626451479126348, + "grad_norm": 1.3461366541761055, + "learning_rate": 1.4736856029959499e-05, + "loss": 0.3154, + "step": 20987 + }, + { + "epoch": 0.3626624274260437, + "grad_norm": 0.9723746659030891, + "learning_rate": 1.4736363142546969e-05, + "loss": 0.3615, + "step": 20988 + }, + { + "epoch": 0.36267970693945256, + "grad_norm": 1.2210302468472787, + "learning_rate": 1.4735870240299791e-05, + "loss": 0.7006, + "step": 20989 + }, + { + "epoch": 0.36269698645286147, + "grad_norm": 1.235619195500849, + "learning_rate": 1.473537732321951e-05, + "loss": 0.4697, + "step": 20990 + }, + { + "epoch": 0.3627142659662704, + "grad_norm": 1.7984871697892526, + "learning_rate": 1.4734884391307666e-05, + "loss": 0.3159, + "step": 20991 + }, + { + "epoch": 0.3627315454796793, + "grad_norm": 0.9928701869489054, + "learning_rate": 1.4734391444565806e-05, + "loss": 0.4375, + "step": 20992 + }, + { + "epoch": 0.3627488249930882, + "grad_norm": 0.8412899348748548, + "learning_rate": 1.4733898482995469e-05, + "loss": 0.5382, + "step": 20993 + }, + { + "epoch": 0.3627661045064971, + "grad_norm": 0.8258919198197044, + "learning_rate": 1.4733405506598202e-05, + "loss": 0.399, + "step": 20994 + }, + { + "epoch": 0.362783384019906, + "grad_norm": 0.9311804485910448, + "learning_rate": 1.473291251537555e-05, + "loss": 0.4256, + "step": 20995 + }, + { + "epoch": 0.3628006635333149, + "grad_norm": 0.8720806001547197, + "learning_rate": 1.473241950932906e-05, + "loss": 0.3641, + "step": 20996 + }, + { + "epoch": 0.3628179430467238, + "grad_norm": 0.4760777557249783, + "learning_rate": 1.4731926488460267e-05, + "loss": 1.0509, + "step": 20997 + }, + { + "epoch": 0.36283522256013273, + "grad_norm": 1.2676371293621904, + "learning_rate": 1.4731433452770723e-05, + "loss": 0.3678, + "step": 20998 + }, + { + "epoch": 0.3628525020735416, + "grad_norm": 0.6023145225252627, + "learning_rate": 1.473094040226197e-05, + "loss": 0.5771, + "step": 20999 + }, + { + "epoch": 0.3628697815869505, + "grad_norm": 1.3899055824008815, + "learning_rate": 1.4730447336935551e-05, + "loss": 0.4541, + "step": 21000 + }, + { + "epoch": 0.3628870611003594, + "grad_norm": 1.209446164560581, + "learning_rate": 1.4729954256793014e-05, + "loss": 0.469, + "step": 21001 + }, + { + "epoch": 0.3629043406137683, + "grad_norm": 1.6004513832821023, + "learning_rate": 1.4729461161835899e-05, + "loss": 0.6064, + "step": 21002 + }, + { + "epoch": 0.3629216201271772, + "grad_norm": 0.9668031584301403, + "learning_rate": 1.4728968052065753e-05, + "loss": 0.559, + "step": 21003 + }, + { + "epoch": 0.3629388996405861, + "grad_norm": 1.14741618172089, + "learning_rate": 1.4728474927484118e-05, + "loss": 0.4795, + "step": 21004 + }, + { + "epoch": 0.36295617915399503, + "grad_norm": 2.2171791145164277, + "learning_rate": 1.472798178809254e-05, + "loss": 0.4194, + "step": 21005 + }, + { + "epoch": 0.36297345866740394, + "grad_norm": 1.2011367517099918, + "learning_rate": 1.4727488633892566e-05, + "loss": 0.6925, + "step": 21006 + }, + { + "epoch": 0.36299073818081284, + "grad_norm": 0.7926976611761088, + "learning_rate": 1.4726995464885738e-05, + "loss": 0.3969, + "step": 21007 + }, + { + "epoch": 0.36300801769422175, + "grad_norm": 1.223401310432225, + "learning_rate": 1.4726502281073598e-05, + "loss": 0.5044, + "step": 21008 + }, + { + "epoch": 0.36302529720763066, + "grad_norm": 0.9130822417974337, + "learning_rate": 1.4726009082457697e-05, + "loss": 0.4291, + "step": 21009 + }, + { + "epoch": 0.3630425767210395, + "grad_norm": 0.8992301934402228, + "learning_rate": 1.4725515869039577e-05, + "loss": 0.5063, + "step": 21010 + }, + { + "epoch": 0.3630598562344484, + "grad_norm": 0.7223284994460474, + "learning_rate": 1.4725022640820781e-05, + "loss": 0.3074, + "step": 21011 + }, + { + "epoch": 0.3630771357478573, + "grad_norm": 1.0397132708035366, + "learning_rate": 1.4724529397802854e-05, + "loss": 0.2995, + "step": 21012 + }, + { + "epoch": 0.36309441526126623, + "grad_norm": 0.9991013685149376, + "learning_rate": 1.4724036139987341e-05, + "loss": 0.3989, + "step": 21013 + }, + { + "epoch": 0.36311169477467514, + "grad_norm": 1.3647104813397826, + "learning_rate": 1.4723542867375789e-05, + "loss": 0.5876, + "step": 21014 + }, + { + "epoch": 0.36312897428808405, + "grad_norm": 0.796954266615066, + "learning_rate": 1.472304957996974e-05, + "loss": 0.2976, + "step": 21015 + }, + { + "epoch": 0.36314625380149296, + "grad_norm": 0.7120634349035705, + "learning_rate": 1.4722556277770745e-05, + "loss": 0.6338, + "step": 21016 + }, + { + "epoch": 0.36316353331490187, + "grad_norm": 1.2132105342511796, + "learning_rate": 1.4722062960780341e-05, + "loss": 0.5132, + "step": 21017 + }, + { + "epoch": 0.3631808128283108, + "grad_norm": 0.9193165229908663, + "learning_rate": 1.4721569629000078e-05, + "loss": 0.3779, + "step": 21018 + }, + { + "epoch": 0.3631980923417197, + "grad_norm": 0.9382263405780208, + "learning_rate": 1.4721076282431503e-05, + "loss": 0.545, + "step": 21019 + }, + { + "epoch": 0.36321537185512853, + "grad_norm": 1.4077530108931684, + "learning_rate": 1.4720582921076155e-05, + "loss": 0.533, + "step": 21020 + }, + { + "epoch": 0.36323265136853744, + "grad_norm": 0.42847205606659494, + "learning_rate": 1.4720089544935585e-05, + "loss": 0.8298, + "step": 21021 + }, + { + "epoch": 0.36324993088194635, + "grad_norm": 1.2185165711785748, + "learning_rate": 1.4719596154011333e-05, + "loss": 0.5329, + "step": 21022 + }, + { + "epoch": 0.36326721039535526, + "grad_norm": 0.5026910694756341, + "learning_rate": 1.4719102748304947e-05, + "loss": 0.8728, + "step": 21023 + }, + { + "epoch": 0.36328448990876416, + "grad_norm": 1.7024579674577829, + "learning_rate": 1.4718609327817976e-05, + "loss": 0.5752, + "step": 21024 + }, + { + "epoch": 0.36330176942217307, + "grad_norm": 0.8951229449728821, + "learning_rate": 1.4718115892551959e-05, + "loss": 0.3892, + "step": 21025 + }, + { + "epoch": 0.363319048935582, + "grad_norm": 0.7731686651419638, + "learning_rate": 1.4717622442508446e-05, + "loss": 0.4498, + "step": 21026 + }, + { + "epoch": 0.3633363284489909, + "grad_norm": 1.0496710490241272, + "learning_rate": 1.4717128977688982e-05, + "loss": 0.5515, + "step": 21027 + }, + { + "epoch": 0.3633536079623998, + "grad_norm": 1.5468423574320298, + "learning_rate": 1.4716635498095107e-05, + "loss": 0.5121, + "step": 21028 + }, + { + "epoch": 0.3633708874758087, + "grad_norm": 1.0974301762245975, + "learning_rate": 1.4716142003728376e-05, + "loss": 0.6008, + "step": 21029 + }, + { + "epoch": 0.3633881669892176, + "grad_norm": 0.7276404692139705, + "learning_rate": 1.4715648494590327e-05, + "loss": 0.5774, + "step": 21030 + }, + { + "epoch": 0.36340544650262646, + "grad_norm": 0.6408580390167625, + "learning_rate": 1.471515497068251e-05, + "loss": 0.3764, + "step": 21031 + }, + { + "epoch": 0.36342272601603537, + "grad_norm": 0.8134795004951247, + "learning_rate": 1.471466143200647e-05, + "loss": 0.4747, + "step": 21032 + }, + { + "epoch": 0.3634400055294443, + "grad_norm": 0.598391370876586, + "learning_rate": 1.471416787856375e-05, + "loss": 0.7677, + "step": 21033 + }, + { + "epoch": 0.3634572850428532, + "grad_norm": 0.9370407936142487, + "learning_rate": 1.4713674310355899e-05, + "loss": 0.4089, + "step": 21034 + }, + { + "epoch": 0.3634745645562621, + "grad_norm": 1.119354012774654, + "learning_rate": 1.4713180727384462e-05, + "loss": 0.4888, + "step": 21035 + }, + { + "epoch": 0.363491844069671, + "grad_norm": 1.306643760147342, + "learning_rate": 1.4712687129650985e-05, + "loss": 0.5829, + "step": 21036 + }, + { + "epoch": 0.3635091235830799, + "grad_norm": 1.1371707441254075, + "learning_rate": 1.4712193517157012e-05, + "loss": 0.4365, + "step": 21037 + }, + { + "epoch": 0.3635264030964888, + "grad_norm": 1.0545407173963932, + "learning_rate": 1.4711699889904092e-05, + "loss": 0.4444, + "step": 21038 + }, + { + "epoch": 0.3635436826098977, + "grad_norm": 0.7574099481992381, + "learning_rate": 1.471120624789377e-05, + "loss": 0.5223, + "step": 21039 + }, + { + "epoch": 0.36356096212330663, + "grad_norm": 0.4308631248411991, + "learning_rate": 1.471071259112759e-05, + "loss": 0.6224, + "step": 21040 + }, + { + "epoch": 0.3635782416367155, + "grad_norm": 1.0460908040610528, + "learning_rate": 1.4710218919607104e-05, + "loss": 0.407, + "step": 21041 + }, + { + "epoch": 0.3635955211501244, + "grad_norm": 1.1213574174313858, + "learning_rate": 1.470972523333385e-05, + "loss": 0.4865, + "step": 21042 + }, + { + "epoch": 0.3636128006635333, + "grad_norm": 0.8761776083613332, + "learning_rate": 1.4709231532309383e-05, + "loss": 0.6226, + "step": 21043 + }, + { + "epoch": 0.3636300801769422, + "grad_norm": 0.5330751877202372, + "learning_rate": 1.4708737816535243e-05, + "loss": 0.2789, + "step": 21044 + }, + { + "epoch": 0.3636473596903511, + "grad_norm": 1.1308211082265782, + "learning_rate": 1.4708244086012977e-05, + "loss": 0.5252, + "step": 21045 + }, + { + "epoch": 0.36366463920376, + "grad_norm": 1.0347932582976436, + "learning_rate": 1.4707750340744133e-05, + "loss": 0.5753, + "step": 21046 + }, + { + "epoch": 0.36368191871716893, + "grad_norm": 0.37563420813444615, + "learning_rate": 1.470725658073026e-05, + "loss": 0.5958, + "step": 21047 + }, + { + "epoch": 0.36369919823057784, + "grad_norm": 0.7965434581649017, + "learning_rate": 1.4706762805972899e-05, + "loss": 0.4597, + "step": 21048 + }, + { + "epoch": 0.36371647774398674, + "grad_norm": 0.7042279675922041, + "learning_rate": 1.47062690164736e-05, + "loss": 0.3895, + "step": 21049 + }, + { + "epoch": 0.36373375725739565, + "grad_norm": 0.7409348630005482, + "learning_rate": 1.4705775212233907e-05, + "loss": 0.4163, + "step": 21050 + }, + { + "epoch": 0.36375103677080456, + "grad_norm": 0.8166381968331488, + "learning_rate": 1.470528139325537e-05, + "loss": 0.4378, + "step": 21051 + }, + { + "epoch": 0.3637683162842134, + "grad_norm": 1.8034955942084494, + "learning_rate": 1.4704787559539538e-05, + "loss": 0.5385, + "step": 21052 + }, + { + "epoch": 0.3637855957976223, + "grad_norm": 1.2251724600566714, + "learning_rate": 1.470429371108795e-05, + "loss": 0.3719, + "step": 21053 + }, + { + "epoch": 0.3638028753110312, + "grad_norm": 1.4672035352179933, + "learning_rate": 1.4703799847902156e-05, + "loss": 0.5971, + "step": 21054 + }, + { + "epoch": 0.36382015482444013, + "grad_norm": 1.6031958103334027, + "learning_rate": 1.4703305969983704e-05, + "loss": 0.4934, + "step": 21055 + }, + { + "epoch": 0.36383743433784904, + "grad_norm": 1.4884263172055483, + "learning_rate": 1.470281207733414e-05, + "loss": 0.7251, + "step": 21056 + }, + { + "epoch": 0.36385471385125795, + "grad_norm": 1.6876896163365187, + "learning_rate": 1.470231816995501e-05, + "loss": 0.382, + "step": 21057 + }, + { + "epoch": 0.36387199336466686, + "grad_norm": 0.8678567793487219, + "learning_rate": 1.4701824247847864e-05, + "loss": 0.435, + "step": 21058 + }, + { + "epoch": 0.36388927287807576, + "grad_norm": 0.898891239743455, + "learning_rate": 1.4701330311014248e-05, + "loss": 0.6477, + "step": 21059 + }, + { + "epoch": 0.3639065523914847, + "grad_norm": 1.1257993300457445, + "learning_rate": 1.4700836359455706e-05, + "loss": 0.6362, + "step": 21060 + }, + { + "epoch": 0.3639238319048936, + "grad_norm": 1.2381772196866976, + "learning_rate": 1.4700342393173787e-05, + "loss": 0.4648, + "step": 21061 + }, + { + "epoch": 0.3639411114183025, + "grad_norm": 0.8920833078615016, + "learning_rate": 1.4699848412170041e-05, + "loss": 0.4603, + "step": 21062 + }, + { + "epoch": 0.36395839093171134, + "grad_norm": 0.8891855456055651, + "learning_rate": 1.4699354416446013e-05, + "loss": 0.2826, + "step": 21063 + }, + { + "epoch": 0.36397567044512025, + "grad_norm": 1.0258884449708308, + "learning_rate": 1.4698860406003247e-05, + "loss": 0.7286, + "step": 21064 + }, + { + "epoch": 0.36399294995852916, + "grad_norm": 0.9124445967276447, + "learning_rate": 1.4698366380843294e-05, + "loss": 0.5237, + "step": 21065 + }, + { + "epoch": 0.36401022947193806, + "grad_norm": 0.6987663960929195, + "learning_rate": 1.46978723409677e-05, + "loss": 0.3222, + "step": 21066 + }, + { + "epoch": 0.36402750898534697, + "grad_norm": 1.045463177033787, + "learning_rate": 1.4697378286378015e-05, + "loss": 0.3803, + "step": 21067 + }, + { + "epoch": 0.3640447884987559, + "grad_norm": 1.061041285431911, + "learning_rate": 1.469688421707578e-05, + "loss": 0.6561, + "step": 21068 + }, + { + "epoch": 0.3640620680121648, + "grad_norm": 1.1636795469002916, + "learning_rate": 1.469639013306255e-05, + "loss": 0.5589, + "step": 21069 + }, + { + "epoch": 0.3640793475255737, + "grad_norm": 0.7296368330854105, + "learning_rate": 1.469589603433987e-05, + "loss": 0.6853, + "step": 21070 + }, + { + "epoch": 0.3640966270389826, + "grad_norm": 0.7356749826300726, + "learning_rate": 1.4695401920909283e-05, + "loss": 0.4505, + "step": 21071 + }, + { + "epoch": 0.3641139065523915, + "grad_norm": 0.798110314064378, + "learning_rate": 1.4694907792772343e-05, + "loss": 0.4027, + "step": 21072 + }, + { + "epoch": 0.36413118606580036, + "grad_norm": 1.010685707332624, + "learning_rate": 1.4694413649930596e-05, + "loss": 0.4526, + "step": 21073 + }, + { + "epoch": 0.36414846557920927, + "grad_norm": 0.6376717245506842, + "learning_rate": 1.4693919492385587e-05, + "loss": 0.2685, + "step": 21074 + }, + { + "epoch": 0.3641657450926182, + "grad_norm": 0.867427493159675, + "learning_rate": 1.4693425320138866e-05, + "loss": 0.4646, + "step": 21075 + }, + { + "epoch": 0.3641830246060271, + "grad_norm": 0.9244193554778259, + "learning_rate": 1.4692931133191981e-05, + "loss": 0.4739, + "step": 21076 + }, + { + "epoch": 0.364200304119436, + "grad_norm": 1.2303562820065586, + "learning_rate": 1.4692436931546476e-05, + "loss": 0.4414, + "step": 21077 + }, + { + "epoch": 0.3642175836328449, + "grad_norm": 1.0370649105665437, + "learning_rate": 1.4691942715203904e-05, + "loss": 0.5147, + "step": 21078 + }, + { + "epoch": 0.3642348631462538, + "grad_norm": 0.507581561557065, + "learning_rate": 1.469144848416581e-05, + "loss": 0.6068, + "step": 21079 + }, + { + "epoch": 0.3642521426596627, + "grad_norm": 1.4170778457207307, + "learning_rate": 1.4690954238433743e-05, + "loss": 0.3499, + "step": 21080 + }, + { + "epoch": 0.3642694221730716, + "grad_norm": 1.0185246834104706, + "learning_rate": 1.4690459978009253e-05, + "loss": 0.4862, + "step": 21081 + }, + { + "epoch": 0.36428670168648053, + "grad_norm": 1.142399750444393, + "learning_rate": 1.4689965702893885e-05, + "loss": 0.324, + "step": 21082 + }, + { + "epoch": 0.36430398119988944, + "grad_norm": 0.9160875718070359, + "learning_rate": 1.4689471413089188e-05, + "loss": 0.5284, + "step": 21083 + }, + { + "epoch": 0.3643212607132983, + "grad_norm": 1.2036591780629675, + "learning_rate": 1.468897710859671e-05, + "loss": 0.46, + "step": 21084 + }, + { + "epoch": 0.3643385402267072, + "grad_norm": 1.1776938362794427, + "learning_rate": 1.4688482789417996e-05, + "loss": 0.5095, + "step": 21085 + }, + { + "epoch": 0.3643558197401161, + "grad_norm": 0.9553942150612922, + "learning_rate": 1.46879884555546e-05, + "loss": 0.5375, + "step": 21086 + }, + { + "epoch": 0.364373099253525, + "grad_norm": 0.8343315101261655, + "learning_rate": 1.4687494107008069e-05, + "loss": 0.43, + "step": 21087 + }, + { + "epoch": 0.3643903787669339, + "grad_norm": 0.8046712043566615, + "learning_rate": 1.4686999743779949e-05, + "loss": 0.3497, + "step": 21088 + }, + { + "epoch": 0.3644076582803428, + "grad_norm": 0.8583880989047153, + "learning_rate": 1.468650536587179e-05, + "loss": 0.425, + "step": 21089 + }, + { + "epoch": 0.36442493779375174, + "grad_norm": 0.8810873953110828, + "learning_rate": 1.4686010973285138e-05, + "loss": 0.4131, + "step": 21090 + }, + { + "epoch": 0.36444221730716064, + "grad_norm": 1.3274938965037437, + "learning_rate": 1.4685516566021546e-05, + "loss": 0.4647, + "step": 21091 + }, + { + "epoch": 0.36445949682056955, + "grad_norm": 1.1434014668574422, + "learning_rate": 1.4685022144082558e-05, + "loss": 0.4086, + "step": 21092 + }, + { + "epoch": 0.36447677633397846, + "grad_norm": 0.9009400957909214, + "learning_rate": 1.4684527707469731e-05, + "loss": 0.4598, + "step": 21093 + }, + { + "epoch": 0.3644940558473873, + "grad_norm": 1.098537719406676, + "learning_rate": 1.46840332561846e-05, + "loss": 0.4627, + "step": 21094 + }, + { + "epoch": 0.3645113353607962, + "grad_norm": 0.6547514721491371, + "learning_rate": 1.4683538790228724e-05, + "loss": 0.52, + "step": 21095 + }, + { + "epoch": 0.3645286148742051, + "grad_norm": 0.8792492651442629, + "learning_rate": 1.468304430960365e-05, + "loss": 0.6695, + "step": 21096 + }, + { + "epoch": 0.36454589438761403, + "grad_norm": 0.6305674882694262, + "learning_rate": 1.468254981431092e-05, + "loss": 0.551, + "step": 21097 + }, + { + "epoch": 0.36456317390102294, + "grad_norm": 0.824373859038545, + "learning_rate": 1.4682055304352092e-05, + "loss": 0.4389, + "step": 21098 + }, + { + "epoch": 0.36458045341443185, + "grad_norm": 0.9764732486238576, + "learning_rate": 1.468156077972871e-05, + "loss": 0.2853, + "step": 21099 + }, + { + "epoch": 0.36459773292784076, + "grad_norm": 0.7127811645356792, + "learning_rate": 1.4681066240442323e-05, + "loss": 0.5228, + "step": 21100 + }, + { + "epoch": 0.36461501244124966, + "grad_norm": 1.4418305628306716, + "learning_rate": 1.4680571686494483e-05, + "loss": 0.3406, + "step": 21101 + }, + { + "epoch": 0.36463229195465857, + "grad_norm": 0.951584084510179, + "learning_rate": 1.4680077117886734e-05, + "loss": 0.4369, + "step": 21102 + }, + { + "epoch": 0.3646495714680675, + "grad_norm": 0.7300724838776049, + "learning_rate": 1.4679582534620632e-05, + "loss": 0.61, + "step": 21103 + }, + { + "epoch": 0.3646668509814764, + "grad_norm": 0.9955889709267141, + "learning_rate": 1.4679087936697718e-05, + "loss": 0.4869, + "step": 21104 + }, + { + "epoch": 0.36468413049488524, + "grad_norm": 0.8800339725563366, + "learning_rate": 1.4678593324119545e-05, + "loss": 0.4658, + "step": 21105 + }, + { + "epoch": 0.36470141000829415, + "grad_norm": 0.9967363340423927, + "learning_rate": 1.4678098696887664e-05, + "loss": 0.6767, + "step": 21106 + }, + { + "epoch": 0.36471868952170305, + "grad_norm": 0.6749302531338087, + "learning_rate": 1.4677604055003622e-05, + "loss": 0.3064, + "step": 21107 + }, + { + "epoch": 0.36473596903511196, + "grad_norm": 0.8704581532982281, + "learning_rate": 1.4677109398468967e-05, + "loss": 0.3498, + "step": 21108 + }, + { + "epoch": 0.36475324854852087, + "grad_norm": 1.3938861344096367, + "learning_rate": 1.4676614727285253e-05, + "loss": 0.5391, + "step": 21109 + }, + { + "epoch": 0.3647705280619298, + "grad_norm": 0.8531829947505318, + "learning_rate": 1.4676120041454024e-05, + "loss": 0.4582, + "step": 21110 + }, + { + "epoch": 0.3647878075753387, + "grad_norm": 0.6603753536090595, + "learning_rate": 1.4675625340976832e-05, + "loss": 0.3004, + "step": 21111 + }, + { + "epoch": 0.3648050870887476, + "grad_norm": 0.9090861818515354, + "learning_rate": 1.4675130625855228e-05, + "loss": 0.532, + "step": 21112 + }, + { + "epoch": 0.3648223666021565, + "grad_norm": 1.0813691290570897, + "learning_rate": 1.4674635896090759e-05, + "loss": 0.4283, + "step": 21113 + }, + { + "epoch": 0.3648396461155654, + "grad_norm": 1.1166015326066987, + "learning_rate": 1.4674141151684972e-05, + "loss": 0.6273, + "step": 21114 + }, + { + "epoch": 0.3648569256289743, + "grad_norm": 0.995711631674487, + "learning_rate": 1.4673646392639423e-05, + "loss": 0.5831, + "step": 21115 + }, + { + "epoch": 0.36487420514238317, + "grad_norm": 1.1934085569364186, + "learning_rate": 1.4673151618955656e-05, + "loss": 0.4593, + "step": 21116 + }, + { + "epoch": 0.3648914846557921, + "grad_norm": 1.0463265339003411, + "learning_rate": 1.4672656830635223e-05, + "loss": 0.3937, + "step": 21117 + }, + { + "epoch": 0.364908764169201, + "grad_norm": 0.939246339632497, + "learning_rate": 1.4672162027679676e-05, + "loss": 0.5654, + "step": 21118 + }, + { + "epoch": 0.3649260436826099, + "grad_norm": 0.8676146372667045, + "learning_rate": 1.4671667210090561e-05, + "loss": 0.4003, + "step": 21119 + }, + { + "epoch": 0.3649433231960188, + "grad_norm": 0.9019839952609322, + "learning_rate": 1.4671172377869429e-05, + "loss": 0.5421, + "step": 21120 + }, + { + "epoch": 0.3649606027094277, + "grad_norm": 1.2117859288753827, + "learning_rate": 1.467067753101783e-05, + "loss": 0.4339, + "step": 21121 + }, + { + "epoch": 0.3649778822228366, + "grad_norm": 1.357467351266177, + "learning_rate": 1.4670182669537316e-05, + "loss": 0.3957, + "step": 21122 + }, + { + "epoch": 0.3649951617362455, + "grad_norm": 1.748469083058735, + "learning_rate": 1.4669687793429432e-05, + "loss": 0.3513, + "step": 21123 + }, + { + "epoch": 0.36501244124965443, + "grad_norm": 0.9846610292445177, + "learning_rate": 1.4669192902695734e-05, + "loss": 0.5241, + "step": 21124 + }, + { + "epoch": 0.36502972076306334, + "grad_norm": 1.0285216384778642, + "learning_rate": 1.4668697997337765e-05, + "loss": 0.3725, + "step": 21125 + }, + { + "epoch": 0.3650470002764722, + "grad_norm": 0.9165083240838787, + "learning_rate": 1.4668203077357084e-05, + "loss": 0.4885, + "step": 21126 + }, + { + "epoch": 0.3650642797898811, + "grad_norm": 0.5857138334856088, + "learning_rate": 1.4667708142755235e-05, + "loss": 0.3328, + "step": 21127 + }, + { + "epoch": 0.36508155930329, + "grad_norm": 1.1411795837031236, + "learning_rate": 1.4667213193533765e-05, + "loss": 0.5351, + "step": 21128 + }, + { + "epoch": 0.3650988388166989, + "grad_norm": 0.9749525198824031, + "learning_rate": 1.4666718229694233e-05, + "loss": 0.3574, + "step": 21129 + }, + { + "epoch": 0.3651161183301078, + "grad_norm": 0.6099026051142644, + "learning_rate": 1.4666223251238183e-05, + "loss": 0.4311, + "step": 21130 + }, + { + "epoch": 0.3651333978435167, + "grad_norm": 0.9560933841036393, + "learning_rate": 1.4665728258167166e-05, + "loss": 0.5568, + "step": 21131 + }, + { + "epoch": 0.36515067735692563, + "grad_norm": 1.8959385356216973, + "learning_rate": 1.4665233250482735e-05, + "loss": 0.6324, + "step": 21132 + }, + { + "epoch": 0.36516795687033454, + "grad_norm": 1.3734334236261394, + "learning_rate": 1.4664738228186442e-05, + "loss": 0.5157, + "step": 21133 + }, + { + "epoch": 0.36518523638374345, + "grad_norm": 1.1932479455352698, + "learning_rate": 1.466424319127983e-05, + "loss": 0.5756, + "step": 21134 + }, + { + "epoch": 0.36520251589715236, + "grad_norm": 0.4388632599841258, + "learning_rate": 1.4663748139764456e-05, + "loss": 0.5437, + "step": 21135 + }, + { + "epoch": 0.36521979541056127, + "grad_norm": 0.9484158955234929, + "learning_rate": 1.466325307364187e-05, + "loss": 0.6382, + "step": 21136 + }, + { + "epoch": 0.3652370749239701, + "grad_norm": 0.8498996554307329, + "learning_rate": 1.4662757992913617e-05, + "loss": 0.4017, + "step": 21137 + }, + { + "epoch": 0.365254354437379, + "grad_norm": 0.9273367935997123, + "learning_rate": 1.4662262897581252e-05, + "loss": 0.4518, + "step": 21138 + }, + { + "epoch": 0.36527163395078793, + "grad_norm": 1.2210442303430542, + "learning_rate": 1.4661767787646328e-05, + "loss": 0.3299, + "step": 21139 + }, + { + "epoch": 0.36528891346419684, + "grad_norm": 1.323481965333917, + "learning_rate": 1.4661272663110388e-05, + "loss": 0.3403, + "step": 21140 + }, + { + "epoch": 0.36530619297760575, + "grad_norm": 0.829308600877964, + "learning_rate": 1.4660777523974994e-05, + "loss": 0.3629, + "step": 21141 + }, + { + "epoch": 0.36532347249101466, + "grad_norm": 1.3696700104842314, + "learning_rate": 1.4660282370241688e-05, + "loss": 0.4952, + "step": 21142 + }, + { + "epoch": 0.36534075200442356, + "grad_norm": 1.111984225308663, + "learning_rate": 1.4659787201912022e-05, + "loss": 0.7219, + "step": 21143 + }, + { + "epoch": 0.36535803151783247, + "grad_norm": 1.1345168700935695, + "learning_rate": 1.4659292018987552e-05, + "loss": 0.4927, + "step": 21144 + }, + { + "epoch": 0.3653753110312414, + "grad_norm": 0.9821179862371783, + "learning_rate": 1.4658796821469823e-05, + "loss": 0.3242, + "step": 21145 + }, + { + "epoch": 0.3653925905446503, + "grad_norm": 0.9210447198962952, + "learning_rate": 1.4658301609360387e-05, + "loss": 0.3507, + "step": 21146 + }, + { + "epoch": 0.36540987005805914, + "grad_norm": 1.4535565888330255, + "learning_rate": 1.4657806382660797e-05, + "loss": 0.4641, + "step": 21147 + }, + { + "epoch": 0.36542714957146805, + "grad_norm": 1.1331404001143228, + "learning_rate": 1.4657311141372605e-05, + "loss": 0.4146, + "step": 21148 + }, + { + "epoch": 0.36544442908487695, + "grad_norm": 0.8235422281127449, + "learning_rate": 1.465681588549736e-05, + "loss": 0.4833, + "step": 21149 + }, + { + "epoch": 0.36546170859828586, + "grad_norm": 1.8395923773100138, + "learning_rate": 1.4656320615036612e-05, + "loss": 0.5749, + "step": 21150 + }, + { + "epoch": 0.36547898811169477, + "grad_norm": 0.7476897151785823, + "learning_rate": 1.4655825329991915e-05, + "loss": 0.5015, + "step": 21151 + }, + { + "epoch": 0.3654962676251037, + "grad_norm": 0.7274093723326613, + "learning_rate": 1.465533003036482e-05, + "loss": 0.3179, + "step": 21152 + }, + { + "epoch": 0.3655135471385126, + "grad_norm": 0.899572098933919, + "learning_rate": 1.4654834716156879e-05, + "loss": 0.5163, + "step": 21153 + }, + { + "epoch": 0.3655308266519215, + "grad_norm": 1.5034589084267358, + "learning_rate": 1.4654339387369639e-05, + "loss": 0.504, + "step": 21154 + }, + { + "epoch": 0.3655481061653304, + "grad_norm": 0.9990580538883375, + "learning_rate": 1.4653844044004656e-05, + "loss": 0.688, + "step": 21155 + }, + { + "epoch": 0.3655653856787393, + "grad_norm": 0.7697050747024788, + "learning_rate": 1.465334868606348e-05, + "loss": 0.2601, + "step": 21156 + }, + { + "epoch": 0.3655826651921482, + "grad_norm": 0.9128003964310862, + "learning_rate": 1.4652853313547661e-05, + "loss": 0.5088, + "step": 21157 + }, + { + "epoch": 0.36559994470555707, + "grad_norm": 1.0057935418343356, + "learning_rate": 1.4652357926458752e-05, + "loss": 0.3976, + "step": 21158 + }, + { + "epoch": 0.365617224218966, + "grad_norm": 1.0819777008914528, + "learning_rate": 1.4651862524798306e-05, + "loss": 0.481, + "step": 21159 + }, + { + "epoch": 0.3656345037323749, + "grad_norm": 1.0937445740602445, + "learning_rate": 1.4651367108567868e-05, + "loss": 0.3697, + "step": 21160 + }, + { + "epoch": 0.3656517832457838, + "grad_norm": 1.6257084767865158, + "learning_rate": 1.4650871677769001e-05, + "loss": 0.5543, + "step": 21161 + }, + { + "epoch": 0.3656690627591927, + "grad_norm": 1.1570060549138437, + "learning_rate": 1.4650376232403248e-05, + "loss": 0.475, + "step": 21162 + }, + { + "epoch": 0.3656863422726016, + "grad_norm": 1.0635497329894792, + "learning_rate": 1.4649880772472163e-05, + "loss": 0.4145, + "step": 21163 + }, + { + "epoch": 0.3657036217860105, + "grad_norm": 0.9366321892440764, + "learning_rate": 1.4649385297977297e-05, + "loss": 0.4647, + "step": 21164 + }, + { + "epoch": 0.3657209012994194, + "grad_norm": 1.397898439137732, + "learning_rate": 1.4648889808920203e-05, + "loss": 0.6786, + "step": 21165 + }, + { + "epoch": 0.36573818081282833, + "grad_norm": 1.1774114192926597, + "learning_rate": 1.4648394305302435e-05, + "loss": 0.491, + "step": 21166 + }, + { + "epoch": 0.36575546032623724, + "grad_norm": 0.8661903302425513, + "learning_rate": 1.4647898787125543e-05, + "loss": 0.4939, + "step": 21167 + }, + { + "epoch": 0.3657727398396461, + "grad_norm": 1.6531076387363983, + "learning_rate": 1.4647403254391077e-05, + "loss": 0.4788, + "step": 21168 + }, + { + "epoch": 0.365790019353055, + "grad_norm": 1.8747825360637647, + "learning_rate": 1.464690770710059e-05, + "loss": 0.6356, + "step": 21169 + }, + { + "epoch": 0.3658072988664639, + "grad_norm": 1.1764090438813637, + "learning_rate": 1.4646412145255637e-05, + "loss": 0.3665, + "step": 21170 + }, + { + "epoch": 0.3658245783798728, + "grad_norm": 0.8535060136229768, + "learning_rate": 1.4645916568857765e-05, + "loss": 0.3329, + "step": 21171 + }, + { + "epoch": 0.3658418578932817, + "grad_norm": 0.8672948649523748, + "learning_rate": 1.4645420977908533e-05, + "loss": 0.3467, + "step": 21172 + }, + { + "epoch": 0.3658591374066906, + "grad_norm": 0.9769632081133062, + "learning_rate": 1.4644925372409488e-05, + "loss": 0.469, + "step": 21173 + }, + { + "epoch": 0.36587641692009953, + "grad_norm": 1.022595764796952, + "learning_rate": 1.4644429752362181e-05, + "loss": 0.412, + "step": 21174 + }, + { + "epoch": 0.36589369643350844, + "grad_norm": 1.2687066005341954, + "learning_rate": 1.4643934117768175e-05, + "loss": 0.381, + "step": 21175 + }, + { + "epoch": 0.36591097594691735, + "grad_norm": 1.1882404481811908, + "learning_rate": 1.4643438468629008e-05, + "loss": 0.5069, + "step": 21176 + }, + { + "epoch": 0.36592825546032626, + "grad_norm": 1.3890731903467608, + "learning_rate": 1.4642942804946238e-05, + "loss": 0.6121, + "step": 21177 + }, + { + "epoch": 0.36594553497373516, + "grad_norm": 0.9348650076127314, + "learning_rate": 1.4642447126721422e-05, + "loss": 0.5499, + "step": 21178 + }, + { + "epoch": 0.365962814487144, + "grad_norm": 1.200953968062343, + "learning_rate": 1.4641951433956108e-05, + "loss": 0.4769, + "step": 21179 + }, + { + "epoch": 0.3659800940005529, + "grad_norm": 0.7672072891881554, + "learning_rate": 1.4641455726651846e-05, + "loss": 0.4826, + "step": 21180 + }, + { + "epoch": 0.36599737351396183, + "grad_norm": 1.0291593977833775, + "learning_rate": 1.4640960004810195e-05, + "loss": 0.5441, + "step": 21181 + }, + { + "epoch": 0.36601465302737074, + "grad_norm": 1.1401548183016945, + "learning_rate": 1.4640464268432703e-05, + "loss": 0.3118, + "step": 21182 + }, + { + "epoch": 0.36603193254077965, + "grad_norm": 1.3772562198396274, + "learning_rate": 1.4639968517520923e-05, + "loss": 0.394, + "step": 21183 + }, + { + "epoch": 0.36604921205418856, + "grad_norm": 0.9715618786106577, + "learning_rate": 1.4639472752076413e-05, + "loss": 0.5082, + "step": 21184 + }, + { + "epoch": 0.36606649156759746, + "grad_norm": 1.0473959255406466, + "learning_rate": 1.463897697210072e-05, + "loss": 0.5901, + "step": 21185 + }, + { + "epoch": 0.36608377108100637, + "grad_norm": 1.0797613204006422, + "learning_rate": 1.4638481177595401e-05, + "loss": 0.4549, + "step": 21186 + }, + { + "epoch": 0.3661010505944153, + "grad_norm": 1.0589690908288207, + "learning_rate": 1.4637985368562003e-05, + "loss": 0.5697, + "step": 21187 + }, + { + "epoch": 0.3661183301078242, + "grad_norm": 0.9810325240041171, + "learning_rate": 1.4637489545002081e-05, + "loss": 0.7681, + "step": 21188 + }, + { + "epoch": 0.3661356096212331, + "grad_norm": 0.9135670804448679, + "learning_rate": 1.4636993706917192e-05, + "loss": 0.4393, + "step": 21189 + }, + { + "epoch": 0.36615288913464195, + "grad_norm": 1.437714468867015, + "learning_rate": 1.4636497854308887e-05, + "loss": 0.2834, + "step": 21190 + }, + { + "epoch": 0.36617016864805085, + "grad_norm": 0.6932457758227745, + "learning_rate": 1.4636001987178717e-05, + "loss": 0.4783, + "step": 21191 + }, + { + "epoch": 0.36618744816145976, + "grad_norm": 0.9095669591672998, + "learning_rate": 1.4635506105528234e-05, + "loss": 0.4368, + "step": 21192 + }, + { + "epoch": 0.36620472767486867, + "grad_norm": 1.273774256311707, + "learning_rate": 1.4635010209358998e-05, + "loss": 0.4501, + "step": 21193 + }, + { + "epoch": 0.3662220071882776, + "grad_norm": 1.0355221256503178, + "learning_rate": 1.4634514298672554e-05, + "loss": 0.4679, + "step": 21194 + }, + { + "epoch": 0.3662392867016865, + "grad_norm": 1.049912934895478, + "learning_rate": 1.4634018373470462e-05, + "loss": 0.6367, + "step": 21195 + }, + { + "epoch": 0.3662565662150954, + "grad_norm": 1.4790315906124327, + "learning_rate": 1.4633522433754274e-05, + "loss": 0.4763, + "step": 21196 + }, + { + "epoch": 0.3662738457285043, + "grad_norm": 1.5458542704884712, + "learning_rate": 1.4633026479525536e-05, + "loss": 0.6659, + "step": 21197 + }, + { + "epoch": 0.3662911252419132, + "grad_norm": 1.0794351788209662, + "learning_rate": 1.4632530510785812e-05, + "loss": 0.4202, + "step": 21198 + }, + { + "epoch": 0.3663084047553221, + "grad_norm": 1.2909432656305546, + "learning_rate": 1.4632034527536647e-05, + "loss": 0.5716, + "step": 21199 + }, + { + "epoch": 0.36632568426873097, + "grad_norm": 1.0205953593795982, + "learning_rate": 1.4631538529779599e-05, + "loss": 0.5519, + "step": 21200 + }, + { + "epoch": 0.3663429637821399, + "grad_norm": 1.4594352637236587, + "learning_rate": 1.463104251751622e-05, + "loss": 0.5649, + "step": 21201 + }, + { + "epoch": 0.3663602432955488, + "grad_norm": 1.1412024914358567, + "learning_rate": 1.4630546490748066e-05, + "loss": 0.5952, + "step": 21202 + }, + { + "epoch": 0.3663775228089577, + "grad_norm": 0.6895781516695336, + "learning_rate": 1.4630050449476686e-05, + "loss": 0.4257, + "step": 21203 + }, + { + "epoch": 0.3663948023223666, + "grad_norm": 0.8270259325250946, + "learning_rate": 1.4629554393703635e-05, + "loss": 0.6203, + "step": 21204 + }, + { + "epoch": 0.3664120818357755, + "grad_norm": 0.9370214298801812, + "learning_rate": 1.4629058323430471e-05, + "loss": 0.5771, + "step": 21205 + }, + { + "epoch": 0.3664293613491844, + "grad_norm": 1.004462955380342, + "learning_rate": 1.4628562238658743e-05, + "loss": 0.5093, + "step": 21206 + }, + { + "epoch": 0.3664466408625933, + "grad_norm": 0.8795884816772253, + "learning_rate": 1.4628066139390007e-05, + "loss": 0.5956, + "step": 21207 + }, + { + "epoch": 0.36646392037600223, + "grad_norm": 1.7477770207043963, + "learning_rate": 1.4627570025625815e-05, + "loss": 0.3846, + "step": 21208 + }, + { + "epoch": 0.36648119988941114, + "grad_norm": 1.5878264977196248, + "learning_rate": 1.4627073897367723e-05, + "loss": 0.3385, + "step": 21209 + }, + { + "epoch": 0.36649847940282004, + "grad_norm": 0.8474381003871395, + "learning_rate": 1.4626577754617284e-05, + "loss": 0.4787, + "step": 21210 + }, + { + "epoch": 0.3665157589162289, + "grad_norm": 0.9059455310077017, + "learning_rate": 1.462608159737605e-05, + "loss": 0.5283, + "step": 21211 + }, + { + "epoch": 0.3665330384296378, + "grad_norm": 2.0300775761442296, + "learning_rate": 1.4625585425645579e-05, + "loss": 0.6651, + "step": 21212 + }, + { + "epoch": 0.3665503179430467, + "grad_norm": 0.7750895950964158, + "learning_rate": 1.4625089239427422e-05, + "loss": 0.4293, + "step": 21213 + }, + { + "epoch": 0.3665675974564556, + "grad_norm": 0.6879865453871303, + "learning_rate": 1.4624593038723134e-05, + "loss": 0.3307, + "step": 21214 + }, + { + "epoch": 0.3665848769698645, + "grad_norm": 0.8937150945505109, + "learning_rate": 1.4624096823534268e-05, + "loss": 0.4136, + "step": 21215 + }, + { + "epoch": 0.36660215648327343, + "grad_norm": 0.7018828750692946, + "learning_rate": 1.4623600593862384e-05, + "loss": 0.3275, + "step": 21216 + }, + { + "epoch": 0.36661943599668234, + "grad_norm": 0.8526269135493667, + "learning_rate": 1.4623104349709028e-05, + "loss": 0.6222, + "step": 21217 + }, + { + "epoch": 0.36663671551009125, + "grad_norm": 0.9767401316265563, + "learning_rate": 1.4622608091075759e-05, + "loss": 0.583, + "step": 21218 + }, + { + "epoch": 0.36665399502350016, + "grad_norm": 0.8999825523348414, + "learning_rate": 1.4622111817964129e-05, + "loss": 0.2769, + "step": 21219 + }, + { + "epoch": 0.36667127453690906, + "grad_norm": 1.4476434639998115, + "learning_rate": 1.4621615530375692e-05, + "loss": 0.3705, + "step": 21220 + }, + { + "epoch": 0.3666885540503179, + "grad_norm": 1.1896785015965767, + "learning_rate": 1.4621119228312007e-05, + "loss": 0.5067, + "step": 21221 + }, + { + "epoch": 0.3667058335637268, + "grad_norm": 0.8379674642781565, + "learning_rate": 1.4620622911774622e-05, + "loss": 0.7632, + "step": 21222 + }, + { + "epoch": 0.36672311307713573, + "grad_norm": 0.8164050888409515, + "learning_rate": 1.4620126580765096e-05, + "loss": 0.5829, + "step": 21223 + }, + { + "epoch": 0.36674039259054464, + "grad_norm": 0.8423201481849696, + "learning_rate": 1.4619630235284985e-05, + "loss": 0.4519, + "step": 21224 + }, + { + "epoch": 0.36675767210395355, + "grad_norm": 1.1005681377531615, + "learning_rate": 1.461913387533584e-05, + "loss": 0.4665, + "step": 21225 + }, + { + "epoch": 0.36677495161736245, + "grad_norm": 1.2837476385435875, + "learning_rate": 1.4618637500919212e-05, + "loss": 0.3876, + "step": 21226 + }, + { + "epoch": 0.36679223113077136, + "grad_norm": 0.9759260017106748, + "learning_rate": 1.4618141112036667e-05, + "loss": 0.5028, + "step": 21227 + }, + { + "epoch": 0.36680951064418027, + "grad_norm": 1.1930464927507682, + "learning_rate": 1.461764470868975e-05, + "loss": 0.3767, + "step": 21228 + }, + { + "epoch": 0.3668267901575892, + "grad_norm": 0.5637658338224827, + "learning_rate": 1.461714829088002e-05, + "loss": 0.566, + "step": 21229 + }, + { + "epoch": 0.3668440696709981, + "grad_norm": 0.9986938521926554, + "learning_rate": 1.4616651858609033e-05, + "loss": 0.4723, + "step": 21230 + }, + { + "epoch": 0.366861349184407, + "grad_norm": 1.3608138626215005, + "learning_rate": 1.4616155411878335e-05, + "loss": 0.4344, + "step": 21231 + }, + { + "epoch": 0.36687862869781585, + "grad_norm": 0.9746649919985946, + "learning_rate": 1.4615658950689492e-05, + "loss": 0.4462, + "step": 21232 + }, + { + "epoch": 0.36689590821122475, + "grad_norm": 0.6174830573064974, + "learning_rate": 1.4615162475044054e-05, + "loss": 0.6952, + "step": 21233 + }, + { + "epoch": 0.36691318772463366, + "grad_norm": 0.8590573422398814, + "learning_rate": 1.4614665984943574e-05, + "loss": 0.4498, + "step": 21234 + }, + { + "epoch": 0.36693046723804257, + "grad_norm": 0.4616616285947855, + "learning_rate": 1.461416948038961e-05, + "loss": 0.6287, + "step": 21235 + }, + { + "epoch": 0.3669477467514515, + "grad_norm": 0.9199134746007429, + "learning_rate": 1.461367296138372e-05, + "loss": 0.5663, + "step": 21236 + }, + { + "epoch": 0.3669650262648604, + "grad_norm": 1.0841881053375726, + "learning_rate": 1.4613176427927454e-05, + "loss": 0.4528, + "step": 21237 + }, + { + "epoch": 0.3669823057782693, + "grad_norm": 0.8946714458698763, + "learning_rate": 1.4612679880022369e-05, + "loss": 0.4594, + "step": 21238 + }, + { + "epoch": 0.3669995852916782, + "grad_norm": 1.8860278502446732, + "learning_rate": 1.4612183317670019e-05, + "loss": 0.5701, + "step": 21239 + }, + { + "epoch": 0.3670168648050871, + "grad_norm": 1.1555864765227801, + "learning_rate": 1.461168674087196e-05, + "loss": 0.469, + "step": 21240 + }, + { + "epoch": 0.367034144318496, + "grad_norm": 1.002001962018866, + "learning_rate": 1.4611190149629748e-05, + "loss": 0.5296, + "step": 21241 + }, + { + "epoch": 0.36705142383190487, + "grad_norm": 1.4231277967362899, + "learning_rate": 1.4610693543944938e-05, + "loss": 0.5489, + "step": 21242 + }, + { + "epoch": 0.3670687033453138, + "grad_norm": 1.0681340650618787, + "learning_rate": 1.4610196923819085e-05, + "loss": 0.5418, + "step": 21243 + }, + { + "epoch": 0.3670859828587227, + "grad_norm": 0.6435698676520962, + "learning_rate": 1.4609700289253745e-05, + "loss": 0.4689, + "step": 21244 + }, + { + "epoch": 0.3671032623721316, + "grad_norm": 0.9263946014824476, + "learning_rate": 1.4609203640250476e-05, + "loss": 0.3918, + "step": 21245 + }, + { + "epoch": 0.3671205418855405, + "grad_norm": 0.7876459729650809, + "learning_rate": 1.4608706976810827e-05, + "loss": 0.395, + "step": 21246 + }, + { + "epoch": 0.3671378213989494, + "grad_norm": 1.0561237257840144, + "learning_rate": 1.4608210298936361e-05, + "loss": 0.5651, + "step": 21247 + }, + { + "epoch": 0.3671551009123583, + "grad_norm": 1.0279506932058087, + "learning_rate": 1.4607713606628627e-05, + "loss": 0.3668, + "step": 21248 + }, + { + "epoch": 0.3671723804257672, + "grad_norm": 1.2724450096069757, + "learning_rate": 1.4607216899889186e-05, + "loss": 0.5949, + "step": 21249 + }, + { + "epoch": 0.3671896599391761, + "grad_norm": 0.9781387463335368, + "learning_rate": 1.4606720178719591e-05, + "loss": 0.4354, + "step": 21250 + }, + { + "epoch": 0.36720693945258503, + "grad_norm": 0.5129924953816744, + "learning_rate": 1.4606223443121397e-05, + "loss": 0.7971, + "step": 21251 + }, + { + "epoch": 0.36722421896599394, + "grad_norm": 0.9581355644165948, + "learning_rate": 1.4605726693096164e-05, + "loss": 0.5238, + "step": 21252 + }, + { + "epoch": 0.3672414984794028, + "grad_norm": 1.218472925191904, + "learning_rate": 1.4605229928645442e-05, + "loss": 0.4539, + "step": 21253 + }, + { + "epoch": 0.3672587779928117, + "grad_norm": 1.2119421148773069, + "learning_rate": 1.4604733149770791e-05, + "loss": 0.4807, + "step": 21254 + }, + { + "epoch": 0.3672760575062206, + "grad_norm": 0.9860768081472708, + "learning_rate": 1.4604236356473767e-05, + "loss": 0.6443, + "step": 21255 + }, + { + "epoch": 0.3672933370196295, + "grad_norm": 0.8003188456086908, + "learning_rate": 1.4603739548755921e-05, + "loss": 0.3675, + "step": 21256 + }, + { + "epoch": 0.3673106165330384, + "grad_norm": 0.8449263548873986, + "learning_rate": 1.4603242726618816e-05, + "loss": 0.5057, + "step": 21257 + }, + { + "epoch": 0.36732789604644733, + "grad_norm": 1.4663112614413079, + "learning_rate": 1.4602745890064008e-05, + "loss": 0.4103, + "step": 21258 + }, + { + "epoch": 0.36734517555985624, + "grad_norm": 1.1382515369272967, + "learning_rate": 1.4602249039093047e-05, + "loss": 0.4032, + "step": 21259 + }, + { + "epoch": 0.36736245507326515, + "grad_norm": 1.1400291777508524, + "learning_rate": 1.4601752173707489e-05, + "loss": 0.558, + "step": 21260 + }, + { + "epoch": 0.36737973458667406, + "grad_norm": 0.4881711424578792, + "learning_rate": 1.4601255293908897e-05, + "loss": 0.8317, + "step": 21261 + }, + { + "epoch": 0.36739701410008296, + "grad_norm": 0.48916237925586564, + "learning_rate": 1.4600758399698825e-05, + "loss": 0.3833, + "step": 21262 + }, + { + "epoch": 0.36741429361349187, + "grad_norm": 0.9411362966780721, + "learning_rate": 1.4600261491078821e-05, + "loss": 0.5304, + "step": 21263 + }, + { + "epoch": 0.3674315731269007, + "grad_norm": 1.0391687779733787, + "learning_rate": 1.4599764568050456e-05, + "loss": 0.361, + "step": 21264 + }, + { + "epoch": 0.36744885264030963, + "grad_norm": 1.0073147604985837, + "learning_rate": 1.4599267630615278e-05, + "loss": 0.4047, + "step": 21265 + }, + { + "epoch": 0.36746613215371854, + "grad_norm": 0.9578637831746868, + "learning_rate": 1.459877067877484e-05, + "loss": 0.547, + "step": 21266 + }, + { + "epoch": 0.36748341166712745, + "grad_norm": 0.8408685460416657, + "learning_rate": 1.4598273712530707e-05, + "loss": 0.4687, + "step": 21267 + }, + { + "epoch": 0.36750069118053635, + "grad_norm": 0.9316159507587728, + "learning_rate": 1.4597776731884428e-05, + "loss": 0.5042, + "step": 21268 + }, + { + "epoch": 0.36751797069394526, + "grad_norm": 0.8732891119519075, + "learning_rate": 1.4597279736837567e-05, + "loss": 0.6655, + "step": 21269 + }, + { + "epoch": 0.36753525020735417, + "grad_norm": 0.8306023631572268, + "learning_rate": 1.4596782727391674e-05, + "loss": 0.3547, + "step": 21270 + }, + { + "epoch": 0.3675525297207631, + "grad_norm": 1.0620437467874302, + "learning_rate": 1.4596285703548305e-05, + "loss": 0.52, + "step": 21271 + }, + { + "epoch": 0.367569809234172, + "grad_norm": 0.5709479020158599, + "learning_rate": 1.4595788665309026e-05, + "loss": 0.5326, + "step": 21272 + }, + { + "epoch": 0.3675870887475809, + "grad_norm": 0.6188923736736496, + "learning_rate": 1.4595291612675385e-05, + "loss": 0.3189, + "step": 21273 + }, + { + "epoch": 0.36760436826098974, + "grad_norm": 1.4313439407848683, + "learning_rate": 1.4594794545648938e-05, + "loss": 0.4181, + "step": 21274 + }, + { + "epoch": 0.36762164777439865, + "grad_norm": 0.7398282616290561, + "learning_rate": 1.4594297464231249e-05, + "loss": 0.2862, + "step": 21275 + }, + { + "epoch": 0.36763892728780756, + "grad_norm": 1.0349301313676442, + "learning_rate": 1.4593800368423871e-05, + "loss": 0.3375, + "step": 21276 + }, + { + "epoch": 0.36765620680121647, + "grad_norm": 0.6629553550653509, + "learning_rate": 1.459330325822836e-05, + "loss": 0.472, + "step": 21277 + }, + { + "epoch": 0.3676734863146254, + "grad_norm": 1.07161654803572, + "learning_rate": 1.4592806133646276e-05, + "loss": 0.5385, + "step": 21278 + }, + { + "epoch": 0.3676907658280343, + "grad_norm": 1.8247792811984282, + "learning_rate": 1.4592308994679173e-05, + "loss": 0.4993, + "step": 21279 + }, + { + "epoch": 0.3677080453414432, + "grad_norm": 0.9994102258156371, + "learning_rate": 1.4591811841328607e-05, + "loss": 0.3224, + "step": 21280 + }, + { + "epoch": 0.3677253248548521, + "grad_norm": 0.8338867297007156, + "learning_rate": 1.4591314673596142e-05, + "loss": 0.4666, + "step": 21281 + }, + { + "epoch": 0.367742604368261, + "grad_norm": 1.8798346176741814, + "learning_rate": 1.4590817491483326e-05, + "loss": 0.5662, + "step": 21282 + }, + { + "epoch": 0.3677598838816699, + "grad_norm": 1.0302960278045639, + "learning_rate": 1.4590320294991722e-05, + "loss": 0.6414, + "step": 21283 + }, + { + "epoch": 0.3677771633950788, + "grad_norm": 0.4618511066457711, + "learning_rate": 1.458982308412289e-05, + "loss": 0.8886, + "step": 21284 + }, + { + "epoch": 0.3677944429084877, + "grad_norm": 0.9525386213890956, + "learning_rate": 1.4589325858878379e-05, + "loss": 0.4698, + "step": 21285 + }, + { + "epoch": 0.3678117224218966, + "grad_norm": 0.4680229599993873, + "learning_rate": 1.458882861925975e-05, + "loss": 0.4221, + "step": 21286 + }, + { + "epoch": 0.3678290019353055, + "grad_norm": 0.7821892667994246, + "learning_rate": 1.4588331365268564e-05, + "loss": 0.4421, + "step": 21287 + }, + { + "epoch": 0.3678462814487144, + "grad_norm": 0.8409651474902268, + "learning_rate": 1.4587834096906376e-05, + "loss": 0.4191, + "step": 21288 + }, + { + "epoch": 0.3678635609621233, + "grad_norm": 1.3493373804948106, + "learning_rate": 1.458733681417474e-05, + "loss": 0.6351, + "step": 21289 + }, + { + "epoch": 0.3678808404755322, + "grad_norm": 1.2671428333866794, + "learning_rate": 1.4586839517075217e-05, + "loss": 0.549, + "step": 21290 + }, + { + "epoch": 0.3678981199889411, + "grad_norm": 0.8372101132706039, + "learning_rate": 1.4586342205609363e-05, + "loss": 0.5197, + "step": 21291 + }, + { + "epoch": 0.36791539950235, + "grad_norm": 1.4556741462673282, + "learning_rate": 1.4585844879778738e-05, + "loss": 0.5838, + "step": 21292 + }, + { + "epoch": 0.36793267901575893, + "grad_norm": 0.5397457065162703, + "learning_rate": 1.45853475395849e-05, + "loss": 0.3562, + "step": 21293 + }, + { + "epoch": 0.36794995852916784, + "grad_norm": 1.606431514305747, + "learning_rate": 1.4584850185029402e-05, + "loss": 0.5206, + "step": 21294 + }, + { + "epoch": 0.3679672380425767, + "grad_norm": 1.0126880930988575, + "learning_rate": 1.4584352816113808e-05, + "loss": 0.4375, + "step": 21295 + }, + { + "epoch": 0.3679845175559856, + "grad_norm": 1.011449265143705, + "learning_rate": 1.458385543283967e-05, + "loss": 0.5262, + "step": 21296 + }, + { + "epoch": 0.3680017970693945, + "grad_norm": 1.277063748108508, + "learning_rate": 1.4583358035208547e-05, + "loss": 0.6229, + "step": 21297 + }, + { + "epoch": 0.3680190765828034, + "grad_norm": 0.9925090415402279, + "learning_rate": 1.4582860623222002e-05, + "loss": 0.6323, + "step": 21298 + }, + { + "epoch": 0.3680363560962123, + "grad_norm": 1.0053234192600526, + "learning_rate": 1.458236319688159e-05, + "loss": 0.4653, + "step": 21299 + }, + { + "epoch": 0.36805363560962123, + "grad_norm": 1.4770848542726727, + "learning_rate": 1.4581865756188865e-05, + "loss": 0.5453, + "step": 21300 + }, + { + "epoch": 0.36807091512303014, + "grad_norm": 1.171988592294597, + "learning_rate": 1.4581368301145391e-05, + "loss": 0.4626, + "step": 21301 + }, + { + "epoch": 0.36808819463643905, + "grad_norm": 1.726732267159741, + "learning_rate": 1.4580870831752723e-05, + "loss": 0.5387, + "step": 21302 + }, + { + "epoch": 0.36810547414984796, + "grad_norm": 0.7879501594603446, + "learning_rate": 1.4580373348012415e-05, + "loss": 0.465, + "step": 21303 + }, + { + "epoch": 0.36812275366325686, + "grad_norm": 0.8457575662051907, + "learning_rate": 1.4579875849926033e-05, + "loss": 0.4398, + "step": 21304 + }, + { + "epoch": 0.36814003317666577, + "grad_norm": 1.2136967931281015, + "learning_rate": 1.4579378337495134e-05, + "loss": 0.3326, + "step": 21305 + }, + { + "epoch": 0.3681573126900746, + "grad_norm": 0.7489600216740812, + "learning_rate": 1.457888081072127e-05, + "loss": 0.4186, + "step": 21306 + }, + { + "epoch": 0.36817459220348353, + "grad_norm": 1.1521143382489778, + "learning_rate": 1.4578383269606004e-05, + "loss": 0.5803, + "step": 21307 + }, + { + "epoch": 0.36819187171689244, + "grad_norm": 1.8095004967969481, + "learning_rate": 1.4577885714150895e-05, + "loss": 0.5034, + "step": 21308 + }, + { + "epoch": 0.36820915123030135, + "grad_norm": 1.0079748323603739, + "learning_rate": 1.45773881443575e-05, + "loss": 0.6503, + "step": 21309 + }, + { + "epoch": 0.36822643074371025, + "grad_norm": 0.5953744419888222, + "learning_rate": 1.4576890560227376e-05, + "loss": 0.4078, + "step": 21310 + }, + { + "epoch": 0.36824371025711916, + "grad_norm": 0.6521608673334356, + "learning_rate": 1.4576392961762084e-05, + "loss": 0.4831, + "step": 21311 + }, + { + "epoch": 0.36826098977052807, + "grad_norm": 0.8297028581242062, + "learning_rate": 1.4575895348963183e-05, + "loss": 0.4329, + "step": 21312 + }, + { + "epoch": 0.368278269283937, + "grad_norm": 1.4833394734688017, + "learning_rate": 1.4575397721832228e-05, + "loss": 0.5067, + "step": 21313 + }, + { + "epoch": 0.3682955487973459, + "grad_norm": 1.0739160003215784, + "learning_rate": 1.4574900080370777e-05, + "loss": 0.8326, + "step": 21314 + }, + { + "epoch": 0.3683128283107548, + "grad_norm": 0.8429679349250678, + "learning_rate": 1.4574402424580396e-05, + "loss": 0.3601, + "step": 21315 + }, + { + "epoch": 0.36833010782416364, + "grad_norm": 0.8497408618910153, + "learning_rate": 1.4573904754462638e-05, + "loss": 0.392, + "step": 21316 + }, + { + "epoch": 0.36834738733757255, + "grad_norm": 0.9635143959123812, + "learning_rate": 1.4573407070019061e-05, + "loss": 0.4989, + "step": 21317 + }, + { + "epoch": 0.36836466685098146, + "grad_norm": 1.50285069016726, + "learning_rate": 1.4572909371251224e-05, + "loss": 0.5226, + "step": 21318 + }, + { + "epoch": 0.36838194636439037, + "grad_norm": 0.6250196082954326, + "learning_rate": 1.4572411658160695e-05, + "loss": 0.2736, + "step": 21319 + }, + { + "epoch": 0.3683992258777993, + "grad_norm": 0.8772021326305045, + "learning_rate": 1.4571913930749017e-05, + "loss": 0.7987, + "step": 21320 + }, + { + "epoch": 0.3684165053912082, + "grad_norm": 1.361014011850031, + "learning_rate": 1.4571416189017762e-05, + "loss": 0.6093, + "step": 21321 + }, + { + "epoch": 0.3684337849046171, + "grad_norm": 1.0866816326785413, + "learning_rate": 1.4570918432968481e-05, + "loss": 0.3333, + "step": 21322 + }, + { + "epoch": 0.368451064418026, + "grad_norm": 0.5990378346215315, + "learning_rate": 1.4570420662602738e-05, + "loss": 0.6892, + "step": 21323 + }, + { + "epoch": 0.3684683439314349, + "grad_norm": 0.856758497423282, + "learning_rate": 1.4569922877922088e-05, + "loss": 0.3464, + "step": 21324 + }, + { + "epoch": 0.3684856234448438, + "grad_norm": 0.9071278209805145, + "learning_rate": 1.4569425078928095e-05, + "loss": 0.3907, + "step": 21325 + }, + { + "epoch": 0.3685029029582527, + "grad_norm": 1.1911960473642278, + "learning_rate": 1.4568927265622313e-05, + "loss": 0.4441, + "step": 21326 + }, + { + "epoch": 0.3685201824716616, + "grad_norm": 1.157484394590305, + "learning_rate": 1.4568429438006304e-05, + "loss": 0.3449, + "step": 21327 + }, + { + "epoch": 0.3685374619850705, + "grad_norm": 0.8835703569756165, + "learning_rate": 1.4567931596081628e-05, + "loss": 0.3244, + "step": 21328 + }, + { + "epoch": 0.3685547414984794, + "grad_norm": 1.9432954936992228, + "learning_rate": 1.4567433739849841e-05, + "loss": 0.4838, + "step": 21329 + }, + { + "epoch": 0.3685720210118883, + "grad_norm": 0.773830756127993, + "learning_rate": 1.456693586931251e-05, + "loss": 0.5235, + "step": 21330 + }, + { + "epoch": 0.3685893005252972, + "grad_norm": 0.6774531350653872, + "learning_rate": 1.4566437984471183e-05, + "loss": 0.4458, + "step": 21331 + }, + { + "epoch": 0.3686065800387061, + "grad_norm": 1.415427434208715, + "learning_rate": 1.4565940085327426e-05, + "loss": 0.6101, + "step": 21332 + }, + { + "epoch": 0.368623859552115, + "grad_norm": 0.8371669249129762, + "learning_rate": 1.45654421718828e-05, + "loss": 0.5584, + "step": 21333 + }, + { + "epoch": 0.3686411390655239, + "grad_norm": 1.0970237477199394, + "learning_rate": 1.4564944244138858e-05, + "loss": 0.4495, + "step": 21334 + }, + { + "epoch": 0.36865841857893283, + "grad_norm": 0.9058808579502792, + "learning_rate": 1.4564446302097169e-05, + "loss": 0.3744, + "step": 21335 + }, + { + "epoch": 0.36867569809234174, + "grad_norm": 1.241282776048598, + "learning_rate": 1.4563948345759284e-05, + "loss": 0.384, + "step": 21336 + }, + { + "epoch": 0.36869297760575065, + "grad_norm": 0.5096319645269616, + "learning_rate": 1.4563450375126764e-05, + "loss": 0.5645, + "step": 21337 + }, + { + "epoch": 0.3687102571191595, + "grad_norm": 0.8861967622660296, + "learning_rate": 1.4562952390201175e-05, + "loss": 0.5518, + "step": 21338 + }, + { + "epoch": 0.3687275366325684, + "grad_norm": 1.508105360098071, + "learning_rate": 1.4562454390984072e-05, + "loss": 0.5895, + "step": 21339 + }, + { + "epoch": 0.3687448161459773, + "grad_norm": 1.2850784840140195, + "learning_rate": 1.4561956377477013e-05, + "loss": 0.3464, + "step": 21340 + }, + { + "epoch": 0.3687620956593862, + "grad_norm": 1.050982000408968, + "learning_rate": 1.4561458349681562e-05, + "loss": 0.2586, + "step": 21341 + }, + { + "epoch": 0.36877937517279513, + "grad_norm": 1.3265492497208944, + "learning_rate": 1.4560960307599275e-05, + "loss": 0.5495, + "step": 21342 + }, + { + "epoch": 0.36879665468620404, + "grad_norm": 0.7506475033277912, + "learning_rate": 1.4560462251231714e-05, + "loss": 0.4972, + "step": 21343 + }, + { + "epoch": 0.36881393419961295, + "grad_norm": 1.4327248369443513, + "learning_rate": 1.4559964180580437e-05, + "loss": 0.3427, + "step": 21344 + }, + { + "epoch": 0.36883121371302185, + "grad_norm": 1.3411635856474053, + "learning_rate": 1.455946609564701e-05, + "loss": 0.575, + "step": 21345 + }, + { + "epoch": 0.36884849322643076, + "grad_norm": 0.9027402201250649, + "learning_rate": 1.4558967996432985e-05, + "loss": 0.3959, + "step": 21346 + }, + { + "epoch": 0.36886577273983967, + "grad_norm": 1.0769444350116426, + "learning_rate": 1.4558469882939926e-05, + "loss": 0.4286, + "step": 21347 + }, + { + "epoch": 0.3688830522532485, + "grad_norm": 0.9192224818790528, + "learning_rate": 1.4557971755169393e-05, + "loss": 0.3082, + "step": 21348 + }, + { + "epoch": 0.36890033176665743, + "grad_norm": 0.8829496247681328, + "learning_rate": 1.4557473613122946e-05, + "loss": 0.3168, + "step": 21349 + }, + { + "epoch": 0.36891761128006634, + "grad_norm": 0.5175927632374296, + "learning_rate": 1.4556975456802149e-05, + "loss": 0.6133, + "step": 21350 + }, + { + "epoch": 0.36893489079347525, + "grad_norm": 0.8793361503218925, + "learning_rate": 1.4556477286208554e-05, + "loss": 0.3938, + "step": 21351 + }, + { + "epoch": 0.36895217030688415, + "grad_norm": 1.1035978876446728, + "learning_rate": 1.455597910134373e-05, + "loss": 0.5417, + "step": 21352 + }, + { + "epoch": 0.36896944982029306, + "grad_norm": 0.878203502778756, + "learning_rate": 1.455548090220923e-05, + "loss": 0.3946, + "step": 21353 + }, + { + "epoch": 0.36898672933370197, + "grad_norm": 1.0416034597999124, + "learning_rate": 1.4554982688806616e-05, + "loss": 0.5661, + "step": 21354 + }, + { + "epoch": 0.3690040088471109, + "grad_norm": 0.9851064211411985, + "learning_rate": 1.4554484461137453e-05, + "loss": 0.6085, + "step": 21355 + }, + { + "epoch": 0.3690212883605198, + "grad_norm": 1.1657702548236795, + "learning_rate": 1.4553986219203297e-05, + "loss": 0.7308, + "step": 21356 + }, + { + "epoch": 0.3690385678739287, + "grad_norm": 0.9453485040597965, + "learning_rate": 1.4553487963005712e-05, + "loss": 0.6022, + "step": 21357 + }, + { + "epoch": 0.3690558473873376, + "grad_norm": 1.8302836538387475, + "learning_rate": 1.4552989692546255e-05, + "loss": 0.4262, + "step": 21358 + }, + { + "epoch": 0.36907312690074645, + "grad_norm": 1.3584829137260324, + "learning_rate": 1.455249140782649e-05, + "loss": 0.574, + "step": 21359 + }, + { + "epoch": 0.36909040641415536, + "grad_norm": 0.7420597659020813, + "learning_rate": 1.4551993108847972e-05, + "loss": 0.4813, + "step": 21360 + }, + { + "epoch": 0.36910768592756427, + "grad_norm": 0.9679222605905969, + "learning_rate": 1.4551494795612274e-05, + "loss": 0.5316, + "step": 21361 + }, + { + "epoch": 0.3691249654409732, + "grad_norm": 1.0394933343657347, + "learning_rate": 1.4550996468120941e-05, + "loss": 0.5474, + "step": 21362 + }, + { + "epoch": 0.3691422449543821, + "grad_norm": 1.711648125516876, + "learning_rate": 1.4550498126375542e-05, + "loss": 0.5583, + "step": 21363 + }, + { + "epoch": 0.369159524467791, + "grad_norm": 0.8276904329467455, + "learning_rate": 1.4549999770377638e-05, + "loss": 0.4013, + "step": 21364 + }, + { + "epoch": 0.3691768039811999, + "grad_norm": 1.6709194011323762, + "learning_rate": 1.4549501400128789e-05, + "loss": 0.5505, + "step": 21365 + }, + { + "epoch": 0.3691940834946088, + "grad_norm": 1.3884535050214482, + "learning_rate": 1.4549003015630555e-05, + "loss": 0.3111, + "step": 21366 + }, + { + "epoch": 0.3692113630080177, + "grad_norm": 1.840303869408224, + "learning_rate": 1.45485046168845e-05, + "loss": 0.4103, + "step": 21367 + }, + { + "epoch": 0.3692286425214266, + "grad_norm": 1.1368634176337093, + "learning_rate": 1.4548006203892182e-05, + "loss": 0.6217, + "step": 21368 + }, + { + "epoch": 0.36924592203483547, + "grad_norm": 1.1230751322132686, + "learning_rate": 1.4547507776655162e-05, + "loss": 0.5146, + "step": 21369 + }, + { + "epoch": 0.3692632015482444, + "grad_norm": 0.6528041314570239, + "learning_rate": 1.4547009335175002e-05, + "loss": 0.4354, + "step": 21370 + }, + { + "epoch": 0.3692804810616533, + "grad_norm": 0.8951945165944537, + "learning_rate": 1.4546510879453263e-05, + "loss": 0.5904, + "step": 21371 + }, + { + "epoch": 0.3692977605750622, + "grad_norm": 0.9437335331441409, + "learning_rate": 1.454601240949151e-05, + "loss": 0.5131, + "step": 21372 + }, + { + "epoch": 0.3693150400884711, + "grad_norm": 1.177296980012314, + "learning_rate": 1.4545513925291298e-05, + "loss": 0.3527, + "step": 21373 + }, + { + "epoch": 0.36933231960188, + "grad_norm": 0.7601033120004896, + "learning_rate": 1.454501542685419e-05, + "loss": 0.4059, + "step": 21374 + }, + { + "epoch": 0.3693495991152889, + "grad_norm": 0.8478772965377277, + "learning_rate": 1.454451691418175e-05, + "loss": 0.4747, + "step": 21375 + }, + { + "epoch": 0.3693668786286978, + "grad_norm": 0.6054976462275029, + "learning_rate": 1.4544018387275536e-05, + "loss": 0.3709, + "step": 21376 + }, + { + "epoch": 0.36938415814210673, + "grad_norm": 1.5429826928373862, + "learning_rate": 1.4543519846137111e-05, + "loss": 0.4137, + "step": 21377 + }, + { + "epoch": 0.36940143765551564, + "grad_norm": 1.247986551382444, + "learning_rate": 1.4543021290768037e-05, + "loss": 0.4471, + "step": 21378 + }, + { + "epoch": 0.36941871716892455, + "grad_norm": 1.1843544014462881, + "learning_rate": 1.4542522721169876e-05, + "loss": 0.6041, + "step": 21379 + }, + { + "epoch": 0.3694359966823334, + "grad_norm": 0.4599509367445138, + "learning_rate": 1.4542024137344186e-05, + "loss": 0.5048, + "step": 21380 + }, + { + "epoch": 0.3694532761957423, + "grad_norm": 0.7800679069461136, + "learning_rate": 1.4541525539292536e-05, + "loss": 0.5111, + "step": 21381 + }, + { + "epoch": 0.3694705557091512, + "grad_norm": 0.6536606152239636, + "learning_rate": 1.4541026927016481e-05, + "loss": 0.433, + "step": 21382 + }, + { + "epoch": 0.3694878352225601, + "grad_norm": 0.9509391464120829, + "learning_rate": 1.4540528300517582e-05, + "loss": 0.34, + "step": 21383 + }, + { + "epoch": 0.36950511473596903, + "grad_norm": 1.5433825543965367, + "learning_rate": 1.4540029659797406e-05, + "loss": 0.538, + "step": 21384 + }, + { + "epoch": 0.36952239424937794, + "grad_norm": 0.386384185209618, + "learning_rate": 1.4539531004857512e-05, + "loss": 0.4891, + "step": 21385 + }, + { + "epoch": 0.36953967376278685, + "grad_norm": 1.0753841429584616, + "learning_rate": 1.4539032335699459e-05, + "loss": 0.4799, + "step": 21386 + }, + { + "epoch": 0.36955695327619575, + "grad_norm": 1.3130682346124243, + "learning_rate": 1.4538533652324813e-05, + "loss": 0.487, + "step": 21387 + }, + { + "epoch": 0.36957423278960466, + "grad_norm": 1.0003910941705265, + "learning_rate": 1.4538034954735136e-05, + "loss": 0.6736, + "step": 21388 + }, + { + "epoch": 0.36959151230301357, + "grad_norm": 0.8435148748813037, + "learning_rate": 1.453753624293199e-05, + "loss": 0.5434, + "step": 21389 + }, + { + "epoch": 0.3696087918164225, + "grad_norm": 0.8516924657413734, + "learning_rate": 1.4537037516916933e-05, + "loss": 0.3503, + "step": 21390 + }, + { + "epoch": 0.36962607132983133, + "grad_norm": 1.1214012348018614, + "learning_rate": 1.4536538776691531e-05, + "loss": 0.761, + "step": 21391 + }, + { + "epoch": 0.36964335084324024, + "grad_norm": 1.0409806355958258, + "learning_rate": 1.4536040022257343e-05, + "loss": 0.4419, + "step": 21392 + }, + { + "epoch": 0.36966063035664914, + "grad_norm": 0.8778090851763474, + "learning_rate": 1.4535541253615934e-05, + "loss": 0.3322, + "step": 21393 + }, + { + "epoch": 0.36967790987005805, + "grad_norm": 0.9591864496611081, + "learning_rate": 1.4535042470768866e-05, + "loss": 0.399, + "step": 21394 + }, + { + "epoch": 0.36969518938346696, + "grad_norm": 0.6801174191457896, + "learning_rate": 1.4534543673717698e-05, + "loss": 0.2637, + "step": 21395 + }, + { + "epoch": 0.36971246889687587, + "grad_norm": 0.9798163927620668, + "learning_rate": 1.4534044862463999e-05, + "loss": 0.4567, + "step": 21396 + }, + { + "epoch": 0.3697297484102848, + "grad_norm": 0.7721545367393561, + "learning_rate": 1.453354603700932e-05, + "loss": 0.3206, + "step": 21397 + }, + { + "epoch": 0.3697470279236937, + "grad_norm": 1.107595454840411, + "learning_rate": 1.4533047197355236e-05, + "loss": 0.4515, + "step": 21398 + }, + { + "epoch": 0.3697643074371026, + "grad_norm": 0.9949367843846808, + "learning_rate": 1.4532548343503303e-05, + "loss": 0.6186, + "step": 21399 + }, + { + "epoch": 0.3697815869505115, + "grad_norm": 0.6084616570764922, + "learning_rate": 1.4532049475455082e-05, + "loss": 0.6815, + "step": 21400 + }, + { + "epoch": 0.36979886646392035, + "grad_norm": 1.1340295832114546, + "learning_rate": 1.4531550593212138e-05, + "loss": 0.7024, + "step": 21401 + }, + { + "epoch": 0.36981614597732926, + "grad_norm": 0.6121884830433765, + "learning_rate": 1.4531051696776038e-05, + "loss": 0.8416, + "step": 21402 + }, + { + "epoch": 0.36983342549073817, + "grad_norm": 0.5410138770022245, + "learning_rate": 1.4530552786148332e-05, + "loss": 0.7194, + "step": 21403 + }, + { + "epoch": 0.3698507050041471, + "grad_norm": 0.776019180962369, + "learning_rate": 1.4530053861330596e-05, + "loss": 0.445, + "step": 21404 + }, + { + "epoch": 0.369867984517556, + "grad_norm": 1.2280886517768879, + "learning_rate": 1.4529554922324384e-05, + "loss": 0.4819, + "step": 21405 + }, + { + "epoch": 0.3698852640309649, + "grad_norm": 0.46718431003342786, + "learning_rate": 1.4529055969131262e-05, + "loss": 0.626, + "step": 21406 + }, + { + "epoch": 0.3699025435443738, + "grad_norm": 0.9971951397994415, + "learning_rate": 1.4528557001752795e-05, + "loss": 0.4681, + "step": 21407 + }, + { + "epoch": 0.3699198230577827, + "grad_norm": 1.2444409680098536, + "learning_rate": 1.452805802019054e-05, + "loss": 0.4661, + "step": 21408 + }, + { + "epoch": 0.3699371025711916, + "grad_norm": 0.430994281311276, + "learning_rate": 1.4527559024446065e-05, + "loss": 0.7056, + "step": 21409 + }, + { + "epoch": 0.3699543820846005, + "grad_norm": 1.0727536044493091, + "learning_rate": 1.4527060014520932e-05, + "loss": 0.5155, + "step": 21410 + }, + { + "epoch": 0.3699716615980094, + "grad_norm": 0.8238715220241442, + "learning_rate": 1.4526560990416702e-05, + "loss": 0.377, + "step": 21411 + }, + { + "epoch": 0.3699889411114183, + "grad_norm": 0.9062269073457843, + "learning_rate": 1.452606195213494e-05, + "loss": 0.5177, + "step": 21412 + }, + { + "epoch": 0.3700062206248272, + "grad_norm": 0.946303218147159, + "learning_rate": 1.4525562899677205e-05, + "loss": 0.3414, + "step": 21413 + }, + { + "epoch": 0.3700235001382361, + "grad_norm": 1.018067409570208, + "learning_rate": 1.4525063833045065e-05, + "loss": 0.5752, + "step": 21414 + }, + { + "epoch": 0.370040779651645, + "grad_norm": 1.2091757544699069, + "learning_rate": 1.452456475224008e-05, + "loss": 0.536, + "step": 21415 + }, + { + "epoch": 0.3700580591650539, + "grad_norm": 1.7799820458965743, + "learning_rate": 1.4524065657263818e-05, + "loss": 0.5023, + "step": 21416 + }, + { + "epoch": 0.3700753386784628, + "grad_norm": 1.005802152300969, + "learning_rate": 1.4523566548117834e-05, + "loss": 0.6331, + "step": 21417 + }, + { + "epoch": 0.3700926181918717, + "grad_norm": 1.8998558239714696, + "learning_rate": 1.4523067424803699e-05, + "loss": 0.5514, + "step": 21418 + }, + { + "epoch": 0.37010989770528063, + "grad_norm": 1.300939403402336, + "learning_rate": 1.452256828732297e-05, + "loss": 0.5389, + "step": 21419 + }, + { + "epoch": 0.37012717721868954, + "grad_norm": 1.024658938030307, + "learning_rate": 1.4522069135677215e-05, + "loss": 0.3861, + "step": 21420 + }, + { + "epoch": 0.37014445673209845, + "grad_norm": 0.44093032812157856, + "learning_rate": 1.4521569969867997e-05, + "loss": 0.5461, + "step": 21421 + }, + { + "epoch": 0.3701617362455073, + "grad_norm": 0.7277796521582061, + "learning_rate": 1.452107078989688e-05, + "loss": 0.5139, + "step": 21422 + }, + { + "epoch": 0.3701790157589162, + "grad_norm": 0.7544302129649406, + "learning_rate": 1.452057159576542e-05, + "loss": 0.4537, + "step": 21423 + }, + { + "epoch": 0.3701962952723251, + "grad_norm": 0.9451413206877111, + "learning_rate": 1.452007238747519e-05, + "loss": 0.4188, + "step": 21424 + }, + { + "epoch": 0.370213574785734, + "grad_norm": 0.6650249458300725, + "learning_rate": 1.451957316502775e-05, + "loss": 0.3799, + "step": 21425 + }, + { + "epoch": 0.37023085429914293, + "grad_norm": 0.7408348700605858, + "learning_rate": 1.451907392842466e-05, + "loss": 0.3352, + "step": 21426 + }, + { + "epoch": 0.37024813381255184, + "grad_norm": 0.9820800671718614, + "learning_rate": 1.4518574677667489e-05, + "loss": 0.5351, + "step": 21427 + }, + { + "epoch": 0.37026541332596075, + "grad_norm": 1.1613636998512311, + "learning_rate": 1.4518075412757802e-05, + "loss": 0.4949, + "step": 21428 + }, + { + "epoch": 0.37028269283936965, + "grad_norm": 0.6962603344717243, + "learning_rate": 1.4517576133697153e-05, + "loss": 0.3804, + "step": 21429 + }, + { + "epoch": 0.37029997235277856, + "grad_norm": 0.7422887629020232, + "learning_rate": 1.4517076840487119e-05, + "loss": 0.5191, + "step": 21430 + }, + { + "epoch": 0.37031725186618747, + "grad_norm": 0.838568820753028, + "learning_rate": 1.4516577533129252e-05, + "loss": 0.4174, + "step": 21431 + }, + { + "epoch": 0.3703345313795964, + "grad_norm": 2.0385656017014857, + "learning_rate": 1.4516078211625122e-05, + "loss": 0.4045, + "step": 21432 + }, + { + "epoch": 0.37035181089300523, + "grad_norm": 0.8251488158510935, + "learning_rate": 1.4515578875976295e-05, + "loss": 0.403, + "step": 21433 + }, + { + "epoch": 0.37036909040641414, + "grad_norm": 0.7141220502981533, + "learning_rate": 1.4515079526184328e-05, + "loss": 0.3422, + "step": 21434 + }, + { + "epoch": 0.37038636991982304, + "grad_norm": 1.4075021074425407, + "learning_rate": 1.4514580162250791e-05, + "loss": 0.6425, + "step": 21435 + }, + { + "epoch": 0.37040364943323195, + "grad_norm": 1.3596008889662887, + "learning_rate": 1.4514080784177245e-05, + "loss": 0.4918, + "step": 21436 + }, + { + "epoch": 0.37042092894664086, + "grad_norm": 1.1112685681784449, + "learning_rate": 1.4513581391965254e-05, + "loss": 0.4763, + "step": 21437 + }, + { + "epoch": 0.37043820846004977, + "grad_norm": 1.0480013706552842, + "learning_rate": 1.4513081985616384e-05, + "loss": 0.4179, + "step": 21438 + }, + { + "epoch": 0.3704554879734587, + "grad_norm": 0.7432768115616124, + "learning_rate": 1.4512582565132198e-05, + "loss": 0.4808, + "step": 21439 + }, + { + "epoch": 0.3704727674868676, + "grad_norm": 0.7664919248575831, + "learning_rate": 1.451208313051426e-05, + "loss": 0.3733, + "step": 21440 + }, + { + "epoch": 0.3704900470002765, + "grad_norm": 1.064663381354745, + "learning_rate": 1.4511583681764136e-05, + "loss": 0.4735, + "step": 21441 + }, + { + "epoch": 0.3705073265136854, + "grad_norm": 0.8953371396476731, + "learning_rate": 1.4511084218883388e-05, + "loss": 0.585, + "step": 21442 + }, + { + "epoch": 0.37052460602709425, + "grad_norm": 1.1565193154414364, + "learning_rate": 1.4510584741873584e-05, + "loss": 0.4132, + "step": 21443 + }, + { + "epoch": 0.37054188554050316, + "grad_norm": 0.8667627692275266, + "learning_rate": 1.4510085250736283e-05, + "loss": 0.5636, + "step": 21444 + }, + { + "epoch": 0.37055916505391207, + "grad_norm": 0.8441850250061612, + "learning_rate": 1.4509585745473055e-05, + "loss": 0.5327, + "step": 21445 + }, + { + "epoch": 0.370576444567321, + "grad_norm": 0.7241268090943288, + "learning_rate": 1.4509086226085457e-05, + "loss": 0.6171, + "step": 21446 + }, + { + "epoch": 0.3705937240807299, + "grad_norm": 0.635790428079934, + "learning_rate": 1.4508586692575062e-05, + "loss": 0.4695, + "step": 21447 + }, + { + "epoch": 0.3706110035941388, + "grad_norm": 0.6996494940784678, + "learning_rate": 1.4508087144943428e-05, + "loss": 0.4542, + "step": 21448 + }, + { + "epoch": 0.3706282831075477, + "grad_norm": 0.8975335219199129, + "learning_rate": 1.4507587583192124e-05, + "loss": 0.7185, + "step": 21449 + }, + { + "epoch": 0.3706455626209566, + "grad_norm": 0.6478211136858792, + "learning_rate": 1.4507088007322714e-05, + "loss": 0.6802, + "step": 21450 + }, + { + "epoch": 0.3706628421343655, + "grad_norm": 0.8526092359341837, + "learning_rate": 1.450658841733676e-05, + "loss": 0.4834, + "step": 21451 + }, + { + "epoch": 0.3706801216477744, + "grad_norm": 0.9125068995068383, + "learning_rate": 1.450608881323583e-05, + "loss": 0.535, + "step": 21452 + }, + { + "epoch": 0.3706974011611833, + "grad_norm": 0.6550687334860069, + "learning_rate": 1.4505589195021488e-05, + "loss": 0.3915, + "step": 21453 + }, + { + "epoch": 0.3707146806745922, + "grad_norm": 1.094185814868263, + "learning_rate": 1.4505089562695296e-05, + "loss": 0.4689, + "step": 21454 + }, + { + "epoch": 0.3707319601880011, + "grad_norm": 1.028428707420682, + "learning_rate": 1.4504589916258822e-05, + "loss": 0.3636, + "step": 21455 + }, + { + "epoch": 0.37074923970141, + "grad_norm": 0.6840984420791348, + "learning_rate": 1.4504090255713633e-05, + "loss": 0.493, + "step": 21456 + }, + { + "epoch": 0.3707665192148189, + "grad_norm": 1.8605145192930188, + "learning_rate": 1.4503590581061287e-05, + "loss": 0.4549, + "step": 21457 + }, + { + "epoch": 0.3707837987282278, + "grad_norm": 1.0230263535779178, + "learning_rate": 1.4503090892303356e-05, + "loss": 0.4502, + "step": 21458 + }, + { + "epoch": 0.3708010782416367, + "grad_norm": 1.2308930867974155, + "learning_rate": 1.45025911894414e-05, + "loss": 0.5619, + "step": 21459 + }, + { + "epoch": 0.3708183577550456, + "grad_norm": 1.4502115817554986, + "learning_rate": 1.4502091472476984e-05, + "loss": 0.6403, + "step": 21460 + }, + { + "epoch": 0.37083563726845453, + "grad_norm": 1.0143861044711515, + "learning_rate": 1.450159174141168e-05, + "loss": 0.3748, + "step": 21461 + }, + { + "epoch": 0.37085291678186344, + "grad_norm": 1.0241044434428808, + "learning_rate": 1.4501091996247048e-05, + "loss": 0.5524, + "step": 21462 + }, + { + "epoch": 0.37087019629527235, + "grad_norm": 0.9132065079444983, + "learning_rate": 1.4500592236984651e-05, + "loss": 0.4324, + "step": 21463 + }, + { + "epoch": 0.37088747580868126, + "grad_norm": 0.42264744383693, + "learning_rate": 1.450009246362606e-05, + "loss": 0.5719, + "step": 21464 + }, + { + "epoch": 0.3709047553220901, + "grad_norm": 1.0073447546314471, + "learning_rate": 1.4499592676172838e-05, + "loss": 0.3297, + "step": 21465 + }, + { + "epoch": 0.370922034835499, + "grad_norm": 0.8394792552749657, + "learning_rate": 1.4499092874626546e-05, + "loss": 0.3626, + "step": 21466 + }, + { + "epoch": 0.3709393143489079, + "grad_norm": 1.223336190383745, + "learning_rate": 1.4498593058988755e-05, + "loss": 0.5354, + "step": 21467 + }, + { + "epoch": 0.37095659386231683, + "grad_norm": 1.1136787992167285, + "learning_rate": 1.449809322926103e-05, + "loss": 0.3969, + "step": 21468 + }, + { + "epoch": 0.37097387337572574, + "grad_norm": 0.9989183171658107, + "learning_rate": 1.449759338544493e-05, + "loss": 0.3914, + "step": 21469 + }, + { + "epoch": 0.37099115288913465, + "grad_norm": 1.0509184998298733, + "learning_rate": 1.4497093527542033e-05, + "loss": 0.3821, + "step": 21470 + }, + { + "epoch": 0.37100843240254355, + "grad_norm": 0.8943812186553578, + "learning_rate": 1.4496593655553892e-05, + "loss": 0.6444, + "step": 21471 + }, + { + "epoch": 0.37102571191595246, + "grad_norm": 1.2388745378132535, + "learning_rate": 1.4496093769482078e-05, + "loss": 0.3578, + "step": 21472 + }, + { + "epoch": 0.37104299142936137, + "grad_norm": 1.1056002489530106, + "learning_rate": 1.449559386932816e-05, + "loss": 0.6455, + "step": 21473 + }, + { + "epoch": 0.3710602709427703, + "grad_norm": 0.43382113032931763, + "learning_rate": 1.4495093955093698e-05, + "loss": 0.597, + "step": 21474 + }, + { + "epoch": 0.37107755045617913, + "grad_norm": 0.8794739307545344, + "learning_rate": 1.4494594026780262e-05, + "loss": 0.444, + "step": 21475 + }, + { + "epoch": 0.37109482996958804, + "grad_norm": 0.9234026170355447, + "learning_rate": 1.4494094084389416e-05, + "loss": 0.5337, + "step": 21476 + }, + { + "epoch": 0.37111210948299694, + "grad_norm": 1.6712827272962596, + "learning_rate": 1.4493594127922724e-05, + "loss": 0.4615, + "step": 21477 + }, + { + "epoch": 0.37112938899640585, + "grad_norm": 1.1157906477788926, + "learning_rate": 1.4493094157381753e-05, + "loss": 0.4644, + "step": 21478 + }, + { + "epoch": 0.37114666850981476, + "grad_norm": 0.9217743399723645, + "learning_rate": 1.4492594172768071e-05, + "loss": 0.5849, + "step": 21479 + }, + { + "epoch": 0.37116394802322367, + "grad_norm": 0.46329606937366585, + "learning_rate": 1.4492094174083243e-05, + "loss": 0.4086, + "step": 21480 + }, + { + "epoch": 0.3711812275366326, + "grad_norm": 1.1842962507376433, + "learning_rate": 1.4491594161328833e-05, + "loss": 0.5015, + "step": 21481 + }, + { + "epoch": 0.3711985070500415, + "grad_norm": 0.8005372917855438, + "learning_rate": 1.4491094134506412e-05, + "loss": 0.5784, + "step": 21482 + }, + { + "epoch": 0.3712157865634504, + "grad_norm": 0.9323323377466808, + "learning_rate": 1.449059409361754e-05, + "loss": 0.3677, + "step": 21483 + }, + { + "epoch": 0.3712330660768593, + "grad_norm": 1.0672836332134188, + "learning_rate": 1.4490094038663789e-05, + "loss": 0.591, + "step": 21484 + }, + { + "epoch": 0.3712503455902682, + "grad_norm": 1.1187199940091443, + "learning_rate": 1.4489593969646718e-05, + "loss": 0.346, + "step": 21485 + }, + { + "epoch": 0.37126762510367706, + "grad_norm": 1.0261988038027947, + "learning_rate": 1.4489093886567899e-05, + "loss": 0.524, + "step": 21486 + }, + { + "epoch": 0.37128490461708596, + "grad_norm": 0.9541323619240193, + "learning_rate": 1.4488593789428898e-05, + "loss": 0.6113, + "step": 21487 + }, + { + "epoch": 0.3713021841304949, + "grad_norm": 0.810718609842425, + "learning_rate": 1.448809367823128e-05, + "loss": 0.5651, + "step": 21488 + }, + { + "epoch": 0.3713194636439038, + "grad_norm": 1.0842390903864265, + "learning_rate": 1.448759355297661e-05, + "loss": 0.4499, + "step": 21489 + }, + { + "epoch": 0.3713367431573127, + "grad_norm": 0.786335289078164, + "learning_rate": 1.4487093413666458e-05, + "loss": 0.4628, + "step": 21490 + }, + { + "epoch": 0.3713540226707216, + "grad_norm": 0.8175705188352115, + "learning_rate": 1.4486593260302387e-05, + "loss": 0.5266, + "step": 21491 + }, + { + "epoch": 0.3713713021841305, + "grad_norm": 1.466600992493676, + "learning_rate": 1.4486093092885964e-05, + "loss": 0.4884, + "step": 21492 + }, + { + "epoch": 0.3713885816975394, + "grad_norm": 0.5760567229743171, + "learning_rate": 1.4485592911418758e-05, + "loss": 0.2842, + "step": 21493 + }, + { + "epoch": 0.3714058612109483, + "grad_norm": 1.0012013244885174, + "learning_rate": 1.4485092715902336e-05, + "loss": 0.4339, + "step": 21494 + }, + { + "epoch": 0.3714231407243572, + "grad_norm": 1.1214130124626154, + "learning_rate": 1.4484592506338259e-05, + "loss": 0.4311, + "step": 21495 + }, + { + "epoch": 0.3714404202377661, + "grad_norm": 0.9128014259725934, + "learning_rate": 1.4484092282728099e-05, + "loss": 0.3866, + "step": 21496 + }, + { + "epoch": 0.371457699751175, + "grad_norm": 0.7719708711113108, + "learning_rate": 1.4483592045073423e-05, + "loss": 0.3908, + "step": 21497 + }, + { + "epoch": 0.3714749792645839, + "grad_norm": 0.6861113550280148, + "learning_rate": 1.4483091793375793e-05, + "loss": 0.3922, + "step": 21498 + }, + { + "epoch": 0.3714922587779928, + "grad_norm": 1.048587803612433, + "learning_rate": 1.448259152763678e-05, + "loss": 0.6626, + "step": 21499 + }, + { + "epoch": 0.3715095382914017, + "grad_norm": 0.41348843075797337, + "learning_rate": 1.448209124785795e-05, + "loss": 0.588, + "step": 21500 + }, + { + "epoch": 0.3715268178048106, + "grad_norm": 0.7811123396962064, + "learning_rate": 1.4481590954040869e-05, + "loss": 0.5849, + "step": 21501 + }, + { + "epoch": 0.3715440973182195, + "grad_norm": 0.9223283759822875, + "learning_rate": 1.4481090646187106e-05, + "loss": 0.4686, + "step": 21502 + }, + { + "epoch": 0.37156137683162843, + "grad_norm": 1.0758659212181296, + "learning_rate": 1.4480590324298225e-05, + "loss": 0.4385, + "step": 21503 + }, + { + "epoch": 0.37157865634503734, + "grad_norm": 1.3404851559326612, + "learning_rate": 1.4480089988375794e-05, + "loss": 0.3983, + "step": 21504 + }, + { + "epoch": 0.37159593585844625, + "grad_norm": 0.7854466197315823, + "learning_rate": 1.4479589638421385e-05, + "loss": 0.5891, + "step": 21505 + }, + { + "epoch": 0.37161321537185515, + "grad_norm": 0.9635683798488306, + "learning_rate": 1.4479089274436556e-05, + "loss": 0.4156, + "step": 21506 + }, + { + "epoch": 0.371630494885264, + "grad_norm": 0.8274635484941417, + "learning_rate": 1.447858889642288e-05, + "loss": 0.3122, + "step": 21507 + }, + { + "epoch": 0.3716477743986729, + "grad_norm": 0.8603825169399901, + "learning_rate": 1.4478088504381925e-05, + "loss": 0.5729, + "step": 21508 + }, + { + "epoch": 0.3716650539120818, + "grad_norm": 0.6930409090448034, + "learning_rate": 1.4477588098315254e-05, + "loss": 0.4123, + "step": 21509 + }, + { + "epoch": 0.37168233342549073, + "grad_norm": 0.8878517337541384, + "learning_rate": 1.4477087678224438e-05, + "loss": 0.5259, + "step": 21510 + }, + { + "epoch": 0.37169961293889964, + "grad_norm": 0.40704058892735306, + "learning_rate": 1.4476587244111042e-05, + "loss": 0.7937, + "step": 21511 + }, + { + "epoch": 0.37171689245230854, + "grad_norm": 0.8288600852580669, + "learning_rate": 1.4476086795976636e-05, + "loss": 0.357, + "step": 21512 + }, + { + "epoch": 0.37173417196571745, + "grad_norm": 1.1240213520980002, + "learning_rate": 1.4475586333822787e-05, + "loss": 0.329, + "step": 21513 + }, + { + "epoch": 0.37175145147912636, + "grad_norm": 0.8096417148453245, + "learning_rate": 1.447508585765106e-05, + "loss": 0.4625, + "step": 21514 + }, + { + "epoch": 0.37176873099253527, + "grad_norm": 0.719091832455765, + "learning_rate": 1.4474585367463024e-05, + "loss": 0.302, + "step": 21515 + }, + { + "epoch": 0.3717860105059442, + "grad_norm": 0.8913219854912507, + "learning_rate": 1.4474084863260247e-05, + "loss": 0.5705, + "step": 21516 + }, + { + "epoch": 0.371803290019353, + "grad_norm": 1.2026628253580316, + "learning_rate": 1.4473584345044293e-05, + "loss": 0.5592, + "step": 21517 + }, + { + "epoch": 0.37182056953276194, + "grad_norm": 1.1813144279458418, + "learning_rate": 1.447308381281674e-05, + "loss": 0.5173, + "step": 21518 + }, + { + "epoch": 0.37183784904617084, + "grad_norm": 1.0245726637115438, + "learning_rate": 1.4472583266579144e-05, + "loss": 0.3927, + "step": 21519 + }, + { + "epoch": 0.37185512855957975, + "grad_norm": 1.3915296201152452, + "learning_rate": 1.4472082706333074e-05, + "loss": 0.5784, + "step": 21520 + }, + { + "epoch": 0.37187240807298866, + "grad_norm": 1.3036228469738254, + "learning_rate": 1.4471582132080107e-05, + "loss": 0.4794, + "step": 21521 + }, + { + "epoch": 0.37188968758639757, + "grad_norm": 0.997097080065603, + "learning_rate": 1.4471081543821804e-05, + "loss": 0.6086, + "step": 21522 + }, + { + "epoch": 0.3719069670998065, + "grad_norm": 1.1384797933726087, + "learning_rate": 1.447058094155973e-05, + "loss": 0.3916, + "step": 21523 + }, + { + "epoch": 0.3719242466132154, + "grad_norm": 1.1249480478110356, + "learning_rate": 1.4470080325295459e-05, + "loss": 0.5767, + "step": 21524 + }, + { + "epoch": 0.3719415261266243, + "grad_norm": 0.8957128599225358, + "learning_rate": 1.446957969503056e-05, + "loss": 0.4851, + "step": 21525 + }, + { + "epoch": 0.3719588056400332, + "grad_norm": 0.7413232647848966, + "learning_rate": 1.4469079050766592e-05, + "loss": 0.4935, + "step": 21526 + }, + { + "epoch": 0.3719760851534421, + "grad_norm": 0.9992153525318871, + "learning_rate": 1.4468578392505133e-05, + "loss": 0.6086, + "step": 21527 + }, + { + "epoch": 0.37199336466685096, + "grad_norm": 1.6511828662645684, + "learning_rate": 1.4468077720247746e-05, + "loss": 0.7087, + "step": 21528 + }, + { + "epoch": 0.37201064418025986, + "grad_norm": 0.4218899263393243, + "learning_rate": 1.4467577033995998e-05, + "loss": 0.6011, + "step": 21529 + }, + { + "epoch": 0.37202792369366877, + "grad_norm": 0.8246026381962681, + "learning_rate": 1.4467076333751463e-05, + "loss": 0.4283, + "step": 21530 + }, + { + "epoch": 0.3720452032070777, + "grad_norm": 0.7960961171580448, + "learning_rate": 1.4466575619515701e-05, + "loss": 0.3794, + "step": 21531 + }, + { + "epoch": 0.3720624827204866, + "grad_norm": 0.9082994781933396, + "learning_rate": 1.4466074891290284e-05, + "loss": 0.581, + "step": 21532 + }, + { + "epoch": 0.3720797622338955, + "grad_norm": 1.1920246185875591, + "learning_rate": 1.4465574149076784e-05, + "loss": 0.4871, + "step": 21533 + }, + { + "epoch": 0.3720970417473044, + "grad_norm": 1.225284034577377, + "learning_rate": 1.4465073392876767e-05, + "loss": 0.3649, + "step": 21534 + }, + { + "epoch": 0.3721143212607133, + "grad_norm": 1.191685286878894, + "learning_rate": 1.44645726226918e-05, + "loss": 0.4796, + "step": 21535 + }, + { + "epoch": 0.3721316007741222, + "grad_norm": 1.241878739991295, + "learning_rate": 1.4464071838523454e-05, + "loss": 0.6166, + "step": 21536 + }, + { + "epoch": 0.3721488802875311, + "grad_norm": 1.5240015289191604, + "learning_rate": 1.4463571040373292e-05, + "loss": 0.4253, + "step": 21537 + }, + { + "epoch": 0.37216615980094003, + "grad_norm": 0.8595636680390989, + "learning_rate": 1.4463070228242889e-05, + "loss": 0.2385, + "step": 21538 + }, + { + "epoch": 0.3721834393143489, + "grad_norm": 0.9889017352502929, + "learning_rate": 1.4462569402133811e-05, + "loss": 0.4509, + "step": 21539 + }, + { + "epoch": 0.3722007188277578, + "grad_norm": 1.2663018450480392, + "learning_rate": 1.4462068562047624e-05, + "loss": 0.461, + "step": 21540 + }, + { + "epoch": 0.3722179983411667, + "grad_norm": 0.6534754086004748, + "learning_rate": 1.4461567707985901e-05, + "loss": 0.5147, + "step": 21541 + }, + { + "epoch": 0.3722352778545756, + "grad_norm": 0.9942399269403215, + "learning_rate": 1.4461066839950208e-05, + "loss": 0.5774, + "step": 21542 + }, + { + "epoch": 0.3722525573679845, + "grad_norm": 0.7713866042883932, + "learning_rate": 1.4460565957942116e-05, + "loss": 0.4511, + "step": 21543 + }, + { + "epoch": 0.3722698368813934, + "grad_norm": 0.9407427816416413, + "learning_rate": 1.4460065061963191e-05, + "loss": 0.3364, + "step": 21544 + }, + { + "epoch": 0.37228711639480233, + "grad_norm": 0.5272965127624855, + "learning_rate": 1.4459564152015003e-05, + "loss": 0.5406, + "step": 21545 + }, + { + "epoch": 0.37230439590821124, + "grad_norm": 0.9034578050038445, + "learning_rate": 1.4459063228099124e-05, + "loss": 0.5268, + "step": 21546 + }, + { + "epoch": 0.37232167542162015, + "grad_norm": 1.5166271519894965, + "learning_rate": 1.445856229021712e-05, + "loss": 0.7469, + "step": 21547 + }, + { + "epoch": 0.37233895493502905, + "grad_norm": 1.0199172397060348, + "learning_rate": 1.4458061338370556e-05, + "loss": 0.7207, + "step": 21548 + }, + { + "epoch": 0.3723562344484379, + "grad_norm": 0.939678640511227, + "learning_rate": 1.4457560372561009e-05, + "loss": 0.3955, + "step": 21549 + }, + { + "epoch": 0.3723735139618468, + "grad_norm": 0.6896804039469177, + "learning_rate": 1.445705939279004e-05, + "loss": 0.4835, + "step": 21550 + }, + { + "epoch": 0.3723907934752557, + "grad_norm": 0.8968023748505202, + "learning_rate": 1.4456558399059226e-05, + "loss": 0.4935, + "step": 21551 + }, + { + "epoch": 0.37240807298866463, + "grad_norm": 0.6411943299085059, + "learning_rate": 1.4456057391370131e-05, + "loss": 0.4386, + "step": 21552 + }, + { + "epoch": 0.37242535250207354, + "grad_norm": 0.9206120779898166, + "learning_rate": 1.4455556369724324e-05, + "loss": 0.4315, + "step": 21553 + }, + { + "epoch": 0.37244263201548244, + "grad_norm": 1.3110416793176523, + "learning_rate": 1.445505533412338e-05, + "loss": 0.4726, + "step": 21554 + }, + { + "epoch": 0.37245991152889135, + "grad_norm": 0.7160124044735654, + "learning_rate": 1.445455428456886e-05, + "loss": 0.4304, + "step": 21555 + }, + { + "epoch": 0.37247719104230026, + "grad_norm": 1.5666084428105278, + "learning_rate": 1.445405322106234e-05, + "loss": 0.4674, + "step": 21556 + }, + { + "epoch": 0.37249447055570917, + "grad_norm": 0.7229425633176033, + "learning_rate": 1.4453552143605385e-05, + "loss": 0.6621, + "step": 21557 + }, + { + "epoch": 0.3725117500691181, + "grad_norm": 0.6363795138840155, + "learning_rate": 1.4453051052199567e-05, + "loss": 0.4122, + "step": 21558 + }, + { + "epoch": 0.372529029582527, + "grad_norm": 1.280805367803494, + "learning_rate": 1.4452549946846458e-05, + "loss": 0.5388, + "step": 21559 + }, + { + "epoch": 0.37254630909593583, + "grad_norm": 1.1678033803540142, + "learning_rate": 1.4452048827547618e-05, + "loss": 0.5204, + "step": 21560 + }, + { + "epoch": 0.37256358860934474, + "grad_norm": 1.0576770248419747, + "learning_rate": 1.4451547694304627e-05, + "loss": 0.4158, + "step": 21561 + }, + { + "epoch": 0.37258086812275365, + "grad_norm": 0.8357820668852594, + "learning_rate": 1.445104654711905e-05, + "loss": 0.367, + "step": 21562 + }, + { + "epoch": 0.37259814763616256, + "grad_norm": 0.6353641531414082, + "learning_rate": 1.4450545385992456e-05, + "loss": 0.3617, + "step": 21563 + }, + { + "epoch": 0.37261542714957147, + "grad_norm": 0.801970563244832, + "learning_rate": 1.4450044210926412e-05, + "loss": 0.4742, + "step": 21564 + }, + { + "epoch": 0.3726327066629804, + "grad_norm": 0.8420408042232921, + "learning_rate": 1.4449543021922496e-05, + "loss": 0.6227, + "step": 21565 + }, + { + "epoch": 0.3726499861763893, + "grad_norm": 1.2612919042733661, + "learning_rate": 1.4449041818982274e-05, + "loss": 0.3933, + "step": 21566 + }, + { + "epoch": 0.3726672656897982, + "grad_norm": 0.7506952154651441, + "learning_rate": 1.4448540602107313e-05, + "loss": 0.5156, + "step": 21567 + }, + { + "epoch": 0.3726845452032071, + "grad_norm": 0.8655340643442195, + "learning_rate": 1.4448039371299186e-05, + "loss": 0.5054, + "step": 21568 + }, + { + "epoch": 0.372701824716616, + "grad_norm": 0.605152398922143, + "learning_rate": 1.4447538126559459e-05, + "loss": 0.3265, + "step": 21569 + }, + { + "epoch": 0.37271910423002486, + "grad_norm": 1.0020318806929098, + "learning_rate": 1.4447036867889705e-05, + "loss": 0.4692, + "step": 21570 + }, + { + "epoch": 0.37273638374343376, + "grad_norm": 0.7490587749934031, + "learning_rate": 1.4446535595291495e-05, + "loss": 0.3891, + "step": 21571 + }, + { + "epoch": 0.37275366325684267, + "grad_norm": 0.8248230166623849, + "learning_rate": 1.4446034308766396e-05, + "loss": 0.3565, + "step": 21572 + }, + { + "epoch": 0.3727709427702516, + "grad_norm": 1.6432209521053103, + "learning_rate": 1.4445533008315981e-05, + "loss": 0.5086, + "step": 21573 + }, + { + "epoch": 0.3727882222836605, + "grad_norm": 0.8356275764989245, + "learning_rate": 1.444503169394182e-05, + "loss": 0.5744, + "step": 21574 + }, + { + "epoch": 0.3728055017970694, + "grad_norm": 1.28914488997669, + "learning_rate": 1.444453036564548e-05, + "loss": 0.4459, + "step": 21575 + }, + { + "epoch": 0.3728227813104783, + "grad_norm": 1.2700811038876318, + "learning_rate": 1.4444029023428533e-05, + "loss": 0.53, + "step": 21576 + }, + { + "epoch": 0.3728400608238872, + "grad_norm": 1.017822170672999, + "learning_rate": 1.4443527667292551e-05, + "loss": 0.4243, + "step": 21577 + }, + { + "epoch": 0.3728573403372961, + "grad_norm": 1.2712744305195778, + "learning_rate": 1.4443026297239103e-05, + "loss": 0.4576, + "step": 21578 + }, + { + "epoch": 0.372874619850705, + "grad_norm": 0.7014202885872544, + "learning_rate": 1.4442524913269758e-05, + "loss": 0.3649, + "step": 21579 + }, + { + "epoch": 0.37289189936411393, + "grad_norm": 0.476926629939004, + "learning_rate": 1.4442023515386088e-05, + "loss": 0.6033, + "step": 21580 + }, + { + "epoch": 0.3729091788775228, + "grad_norm": 0.7081770289319738, + "learning_rate": 1.4441522103589659e-05, + "loss": 0.4069, + "step": 21581 + }, + { + "epoch": 0.3729264583909317, + "grad_norm": 0.7803683012370055, + "learning_rate": 1.4441020677882049e-05, + "loss": 0.5607, + "step": 21582 + }, + { + "epoch": 0.3729437379043406, + "grad_norm": 1.1377463367364973, + "learning_rate": 1.4440519238264825e-05, + "loss": 0.402, + "step": 21583 + }, + { + "epoch": 0.3729610174177495, + "grad_norm": 0.9262865211941503, + "learning_rate": 1.4440017784739553e-05, + "loss": 0.3495, + "step": 21584 + }, + { + "epoch": 0.3729782969311584, + "grad_norm": 0.9356038522860399, + "learning_rate": 1.4439516317307812e-05, + "loss": 0.4783, + "step": 21585 + }, + { + "epoch": 0.3729955764445673, + "grad_norm": 1.532162542199996, + "learning_rate": 1.4439014835971168e-05, + "loss": 0.6119, + "step": 21586 + }, + { + "epoch": 0.37301285595797623, + "grad_norm": 0.8393211974278996, + "learning_rate": 1.443851334073119e-05, + "loss": 0.3459, + "step": 21587 + }, + { + "epoch": 0.37303013547138514, + "grad_norm": 1.2044817745059737, + "learning_rate": 1.4438011831589456e-05, + "loss": 0.4314, + "step": 21588 + }, + { + "epoch": 0.37304741498479405, + "grad_norm": 1.3798770400225915, + "learning_rate": 1.4437510308547524e-05, + "loss": 0.5736, + "step": 21589 + }, + { + "epoch": 0.37306469449820295, + "grad_norm": 1.029459401061381, + "learning_rate": 1.4437008771606977e-05, + "loss": 0.458, + "step": 21590 + }, + { + "epoch": 0.3730819740116118, + "grad_norm": 0.9266713382499606, + "learning_rate": 1.4436507220769382e-05, + "loss": 0.5723, + "step": 21591 + }, + { + "epoch": 0.3730992535250207, + "grad_norm": 0.7125174198719124, + "learning_rate": 1.4436005656036307e-05, + "loss": 0.5094, + "step": 21592 + }, + { + "epoch": 0.3731165330384296, + "grad_norm": 0.9238079628715827, + "learning_rate": 1.4435504077409328e-05, + "loss": 0.6007, + "step": 21593 + }, + { + "epoch": 0.37313381255183853, + "grad_norm": 1.0318037171125005, + "learning_rate": 1.4435002484890011e-05, + "loss": 0.4881, + "step": 21594 + }, + { + "epoch": 0.37315109206524744, + "grad_norm": 0.7592032520602738, + "learning_rate": 1.4434500878479928e-05, + "loss": 0.4649, + "step": 21595 + }, + { + "epoch": 0.37316837157865634, + "grad_norm": 0.6473969343140038, + "learning_rate": 1.4433999258180653e-05, + "loss": 0.289, + "step": 21596 + }, + { + "epoch": 0.37318565109206525, + "grad_norm": 0.8662753716812085, + "learning_rate": 1.4433497623993758e-05, + "loss": 0.4862, + "step": 21597 + }, + { + "epoch": 0.37320293060547416, + "grad_norm": 0.8633333512658091, + "learning_rate": 1.4432995975920808e-05, + "loss": 0.5054, + "step": 21598 + }, + { + "epoch": 0.37322021011888307, + "grad_norm": 0.8179516072288225, + "learning_rate": 1.4432494313963378e-05, + "loss": 0.6228, + "step": 21599 + }, + { + "epoch": 0.373237489632292, + "grad_norm": 0.5552798304075939, + "learning_rate": 1.443199263812304e-05, + "loss": 0.7083, + "step": 21600 + }, + { + "epoch": 0.3732547691457009, + "grad_norm": 1.6589294484932051, + "learning_rate": 1.4431490948401363e-05, + "loss": 0.4764, + "step": 21601 + }, + { + "epoch": 0.37327204865910973, + "grad_norm": 0.755457354984677, + "learning_rate": 1.443098924479992e-05, + "loss": 0.5694, + "step": 21602 + }, + { + "epoch": 0.37328932817251864, + "grad_norm": 1.031344233704929, + "learning_rate": 1.4430487527320283e-05, + "loss": 0.4855, + "step": 21603 + }, + { + "epoch": 0.37330660768592755, + "grad_norm": 1.0159770368782874, + "learning_rate": 1.442998579596402e-05, + "loss": 0.3444, + "step": 21604 + }, + { + "epoch": 0.37332388719933646, + "grad_norm": 0.8844422911565556, + "learning_rate": 1.4429484050732707e-05, + "loss": 0.3283, + "step": 21605 + }, + { + "epoch": 0.37334116671274536, + "grad_norm": 0.6894841292737107, + "learning_rate": 1.4428982291627914e-05, + "loss": 0.5333, + "step": 21606 + }, + { + "epoch": 0.3733584462261543, + "grad_norm": 0.8928924910489451, + "learning_rate": 1.4428480518651209e-05, + "loss": 0.4531, + "step": 21607 + }, + { + "epoch": 0.3733757257395632, + "grad_norm": 0.7698959066479459, + "learning_rate": 1.4427978731804169e-05, + "loss": 0.3707, + "step": 21608 + }, + { + "epoch": 0.3733930052529721, + "grad_norm": 0.44682167546225093, + "learning_rate": 1.442747693108836e-05, + "loss": 0.5123, + "step": 21609 + }, + { + "epoch": 0.373410284766381, + "grad_norm": 1.1334747702582733, + "learning_rate": 1.442697511650536e-05, + "loss": 0.4213, + "step": 21610 + }, + { + "epoch": 0.3734275642797899, + "grad_norm": 0.8358465077002284, + "learning_rate": 1.4426473288056736e-05, + "loss": 0.4298, + "step": 21611 + }, + { + "epoch": 0.3734448437931988, + "grad_norm": 1.0945665096788941, + "learning_rate": 1.4425971445744061e-05, + "loss": 0.493, + "step": 21612 + }, + { + "epoch": 0.37346212330660766, + "grad_norm": 1.1551322594621605, + "learning_rate": 1.4425469589568907e-05, + "loss": 0.5883, + "step": 21613 + }, + { + "epoch": 0.37347940282001657, + "grad_norm": 0.9791441974730478, + "learning_rate": 1.4424967719532847e-05, + "loss": 0.3653, + "step": 21614 + }, + { + "epoch": 0.3734966823334255, + "grad_norm": 1.4343268040288872, + "learning_rate": 1.4424465835637452e-05, + "loss": 0.4098, + "step": 21615 + }, + { + "epoch": 0.3735139618468344, + "grad_norm": 0.7555602974244534, + "learning_rate": 1.4423963937884293e-05, + "loss": 0.3524, + "step": 21616 + }, + { + "epoch": 0.3735312413602433, + "grad_norm": 0.6038692111113704, + "learning_rate": 1.4423462026274943e-05, + "loss": 0.3748, + "step": 21617 + }, + { + "epoch": 0.3735485208736522, + "grad_norm": 1.018271849554975, + "learning_rate": 1.4422960100810974e-05, + "loss": 0.5222, + "step": 21618 + }, + { + "epoch": 0.3735658003870611, + "grad_norm": 0.37976884732683924, + "learning_rate": 1.442245816149396e-05, + "loss": 0.6718, + "step": 21619 + }, + { + "epoch": 0.37358307990047, + "grad_norm": 1.6031933990286829, + "learning_rate": 1.4421956208325468e-05, + "loss": 0.3151, + "step": 21620 + }, + { + "epoch": 0.3736003594138789, + "grad_norm": 1.0467589736666867, + "learning_rate": 1.4421454241307071e-05, + "loss": 0.5198, + "step": 21621 + }, + { + "epoch": 0.37361763892728783, + "grad_norm": 1.540623031291519, + "learning_rate": 1.4420952260440346e-05, + "loss": 0.5095, + "step": 21622 + }, + { + "epoch": 0.3736349184406967, + "grad_norm": 0.8409638615805974, + "learning_rate": 1.4420450265726865e-05, + "loss": 0.5315, + "step": 21623 + }, + { + "epoch": 0.3736521979541056, + "grad_norm": 1.2379108820284068, + "learning_rate": 1.4419948257168192e-05, + "loss": 0.5966, + "step": 21624 + }, + { + "epoch": 0.3736694774675145, + "grad_norm": 0.9497842021641657, + "learning_rate": 1.4419446234765909e-05, + "loss": 0.5104, + "step": 21625 + }, + { + "epoch": 0.3736867569809234, + "grad_norm": 1.693199002318995, + "learning_rate": 1.4418944198521586e-05, + "loss": 0.4532, + "step": 21626 + }, + { + "epoch": 0.3737040364943323, + "grad_norm": 0.6702270073394138, + "learning_rate": 1.4418442148436789e-05, + "loss": 0.471, + "step": 21627 + }, + { + "epoch": 0.3737213160077412, + "grad_norm": 1.4960265079298263, + "learning_rate": 1.44179400845131e-05, + "loss": 0.5636, + "step": 21628 + }, + { + "epoch": 0.37373859552115013, + "grad_norm": 1.1375384277272063, + "learning_rate": 1.4417438006752085e-05, + "loss": 0.4204, + "step": 21629 + }, + { + "epoch": 0.37375587503455904, + "grad_norm": 0.9052498251299314, + "learning_rate": 1.441693591515532e-05, + "loss": 0.5436, + "step": 21630 + }, + { + "epoch": 0.37377315454796795, + "grad_norm": 1.2997269711304162, + "learning_rate": 1.4416433809724373e-05, + "loss": 0.4223, + "step": 21631 + }, + { + "epoch": 0.37379043406137685, + "grad_norm": 1.5196298668785762, + "learning_rate": 1.441593169046082e-05, + "loss": 0.5421, + "step": 21632 + }, + { + "epoch": 0.37380771357478576, + "grad_norm": 0.9089254395496694, + "learning_rate": 1.4415429557366236e-05, + "loss": 0.4354, + "step": 21633 + }, + { + "epoch": 0.3738249930881946, + "grad_norm": 0.7329166481051909, + "learning_rate": 1.441492741044219e-05, + "loss": 0.3504, + "step": 21634 + }, + { + "epoch": 0.3738422726016035, + "grad_norm": 0.6557394856580997, + "learning_rate": 1.4414425249690253e-05, + "loss": 0.4056, + "step": 21635 + }, + { + "epoch": 0.37385955211501243, + "grad_norm": 0.8149991428513598, + "learning_rate": 1.4413923075112004e-05, + "loss": 0.3947, + "step": 21636 + }, + { + "epoch": 0.37387683162842134, + "grad_norm": 0.8339360770207751, + "learning_rate": 1.4413420886709014e-05, + "loss": 0.4092, + "step": 21637 + }, + { + "epoch": 0.37389411114183024, + "grad_norm": 1.0185276989676384, + "learning_rate": 1.441291868448285e-05, + "loss": 0.4396, + "step": 21638 + }, + { + "epoch": 0.37391139065523915, + "grad_norm": 1.4088397946678117, + "learning_rate": 1.4412416468435093e-05, + "loss": 0.6526, + "step": 21639 + }, + { + "epoch": 0.37392867016864806, + "grad_norm": 1.0473236731056752, + "learning_rate": 1.441191423856731e-05, + "loss": 0.6114, + "step": 21640 + }, + { + "epoch": 0.37394594968205697, + "grad_norm": 1.1527591448812506, + "learning_rate": 1.4411411994881077e-05, + "loss": 0.8296, + "step": 21641 + }, + { + "epoch": 0.3739632291954659, + "grad_norm": 0.8883214754656277, + "learning_rate": 1.4410909737377967e-05, + "loss": 0.5407, + "step": 21642 + }, + { + "epoch": 0.3739805087088748, + "grad_norm": 0.9923761674895835, + "learning_rate": 1.441040746605955e-05, + "loss": 0.4123, + "step": 21643 + }, + { + "epoch": 0.37399778822228363, + "grad_norm": 0.6299606600708507, + "learning_rate": 1.4409905180927404e-05, + "loss": 0.8069, + "step": 21644 + }, + { + "epoch": 0.37401506773569254, + "grad_norm": 0.8815532147704551, + "learning_rate": 1.4409402881983099e-05, + "loss": 0.4418, + "step": 21645 + }, + { + "epoch": 0.37403234724910145, + "grad_norm": 0.8011063855604611, + "learning_rate": 1.4408900569228211e-05, + "loss": 0.4258, + "step": 21646 + }, + { + "epoch": 0.37404962676251036, + "grad_norm": 0.9927734836779512, + "learning_rate": 1.4408398242664308e-05, + "loss": 0.4839, + "step": 21647 + }, + { + "epoch": 0.37406690627591926, + "grad_norm": 0.9760735235191775, + "learning_rate": 1.4407895902292968e-05, + "loss": 0.3982, + "step": 21648 + }, + { + "epoch": 0.37408418578932817, + "grad_norm": 1.2472369265716896, + "learning_rate": 1.4407393548115765e-05, + "loss": 0.6609, + "step": 21649 + }, + { + "epoch": 0.3741014653027371, + "grad_norm": 0.7064845645277754, + "learning_rate": 1.4406891180134268e-05, + "loss": 0.383, + "step": 21650 + }, + { + "epoch": 0.374118744816146, + "grad_norm": 1.2346294679357255, + "learning_rate": 1.4406388798350054e-05, + "loss": 0.5247, + "step": 21651 + }, + { + "epoch": 0.3741360243295549, + "grad_norm": 0.45861634795036377, + "learning_rate": 1.4405886402764695e-05, + "loss": 0.6123, + "step": 21652 + }, + { + "epoch": 0.3741533038429638, + "grad_norm": 0.8654913782408559, + "learning_rate": 1.4405383993379764e-05, + "loss": 0.4188, + "step": 21653 + }, + { + "epoch": 0.3741705833563727, + "grad_norm": 1.641540393207396, + "learning_rate": 1.4404881570196838e-05, + "loss": 0.3694, + "step": 21654 + }, + { + "epoch": 0.37418786286978156, + "grad_norm": 0.8688298097200471, + "learning_rate": 1.4404379133217486e-05, + "loss": 0.456, + "step": 21655 + }, + { + "epoch": 0.37420514238319047, + "grad_norm": 1.1508723292234937, + "learning_rate": 1.4403876682443286e-05, + "loss": 0.447, + "step": 21656 + }, + { + "epoch": 0.3742224218965994, + "grad_norm": 0.662773560596446, + "learning_rate": 1.4403374217875807e-05, + "loss": 0.5528, + "step": 21657 + }, + { + "epoch": 0.3742397014100083, + "grad_norm": 0.8824851732835697, + "learning_rate": 1.4402871739516626e-05, + "loss": 0.428, + "step": 21658 + }, + { + "epoch": 0.3742569809234172, + "grad_norm": 0.963047357297546, + "learning_rate": 1.4402369247367319e-05, + "loss": 0.4007, + "step": 21659 + }, + { + "epoch": 0.3742742604368261, + "grad_norm": 1.1945129887591117, + "learning_rate": 1.4401866741429456e-05, + "loss": 0.4945, + "step": 21660 + }, + { + "epoch": 0.374291539950235, + "grad_norm": 1.4588341657513135, + "learning_rate": 1.4401364221704608e-05, + "loss": 0.5644, + "step": 21661 + }, + { + "epoch": 0.3743088194636439, + "grad_norm": 1.120532410517339, + "learning_rate": 1.4400861688194357e-05, + "loss": 0.4783, + "step": 21662 + }, + { + "epoch": 0.3743260989770528, + "grad_norm": 0.9672109430689909, + "learning_rate": 1.4400359140900271e-05, + "loss": 0.4801, + "step": 21663 + }, + { + "epoch": 0.37434337849046173, + "grad_norm": 0.7103775029677782, + "learning_rate": 1.4399856579823923e-05, + "loss": 0.5029, + "step": 21664 + }, + { + "epoch": 0.3743606580038706, + "grad_norm": 0.8194270608054163, + "learning_rate": 1.4399354004966895e-05, + "loss": 0.4408, + "step": 21665 + }, + { + "epoch": 0.3743779375172795, + "grad_norm": 0.9184976029627671, + "learning_rate": 1.4398851416330752e-05, + "loss": 0.3565, + "step": 21666 + }, + { + "epoch": 0.3743952170306884, + "grad_norm": 0.9528494573747925, + "learning_rate": 1.4398348813917071e-05, + "loss": 0.504, + "step": 21667 + }, + { + "epoch": 0.3744124965440973, + "grad_norm": 0.7723665890053217, + "learning_rate": 1.4397846197727431e-05, + "loss": 0.3733, + "step": 21668 + }, + { + "epoch": 0.3744297760575062, + "grad_norm": 0.7677962077845589, + "learning_rate": 1.4397343567763402e-05, + "loss": 0.5004, + "step": 21669 + }, + { + "epoch": 0.3744470555709151, + "grad_norm": 0.9097122115125449, + "learning_rate": 1.4396840924026558e-05, + "loss": 0.4478, + "step": 21670 + }, + { + "epoch": 0.37446433508432403, + "grad_norm": 0.48007149533501087, + "learning_rate": 1.4396338266518475e-05, + "loss": 0.8278, + "step": 21671 + }, + { + "epoch": 0.37448161459773294, + "grad_norm": 0.9106181028387403, + "learning_rate": 1.4395835595240724e-05, + "loss": 0.4105, + "step": 21672 + }, + { + "epoch": 0.37449889411114184, + "grad_norm": 0.4601149356217118, + "learning_rate": 1.4395332910194883e-05, + "loss": 0.4956, + "step": 21673 + }, + { + "epoch": 0.37451617362455075, + "grad_norm": 0.7894939935239856, + "learning_rate": 1.4394830211382527e-05, + "loss": 0.4601, + "step": 21674 + }, + { + "epoch": 0.37453345313795966, + "grad_norm": 1.061935641287815, + "learning_rate": 1.4394327498805227e-05, + "loss": 0.7542, + "step": 21675 + }, + { + "epoch": 0.3745507326513685, + "grad_norm": 0.5425704956948759, + "learning_rate": 1.439382477246456e-05, + "loss": 0.871, + "step": 21676 + }, + { + "epoch": 0.3745680121647774, + "grad_norm": 0.9333745778091272, + "learning_rate": 1.43933220323621e-05, + "loss": 0.3143, + "step": 21677 + }, + { + "epoch": 0.3745852916781863, + "grad_norm": 0.8690296686655113, + "learning_rate": 1.439281927849942e-05, + "loss": 0.2943, + "step": 21678 + }, + { + "epoch": 0.37460257119159523, + "grad_norm": 1.118660332922925, + "learning_rate": 1.4392316510878096e-05, + "loss": 0.5632, + "step": 21679 + }, + { + "epoch": 0.37461985070500414, + "grad_norm": 0.8179833774165486, + "learning_rate": 1.4391813729499708e-05, + "loss": 0.5253, + "step": 21680 + }, + { + "epoch": 0.37463713021841305, + "grad_norm": 0.9720442999746521, + "learning_rate": 1.4391310934365821e-05, + "loss": 0.7052, + "step": 21681 + }, + { + "epoch": 0.37465440973182196, + "grad_norm": 1.0068434445820678, + "learning_rate": 1.4390808125478015e-05, + "loss": 0.3423, + "step": 21682 + }, + { + "epoch": 0.37467168924523087, + "grad_norm": 0.8028601305486923, + "learning_rate": 1.4390305302837866e-05, + "loss": 0.6371, + "step": 21683 + }, + { + "epoch": 0.3746889687586398, + "grad_norm": 0.4022312808998879, + "learning_rate": 1.4389802466446942e-05, + "loss": 0.6305, + "step": 21684 + }, + { + "epoch": 0.3747062482720487, + "grad_norm": 1.3506020707296582, + "learning_rate": 1.4389299616306829e-05, + "loss": 0.5136, + "step": 21685 + }, + { + "epoch": 0.3747235277854576, + "grad_norm": 0.9294547559313763, + "learning_rate": 1.4388796752419094e-05, + "loss": 0.4226, + "step": 21686 + }, + { + "epoch": 0.37474080729886644, + "grad_norm": 1.3984686214705548, + "learning_rate": 1.4388293874785313e-05, + "loss": 0.4172, + "step": 21687 + }, + { + "epoch": 0.37475808681227535, + "grad_norm": 1.2311188586306725, + "learning_rate": 1.4387790983407062e-05, + "loss": 0.3146, + "step": 21688 + }, + { + "epoch": 0.37477536632568426, + "grad_norm": 0.8209793676855326, + "learning_rate": 1.4387288078285916e-05, + "loss": 0.4224, + "step": 21689 + }, + { + "epoch": 0.37479264583909316, + "grad_norm": 0.6409021154918197, + "learning_rate": 1.4386785159423454e-05, + "loss": 0.3906, + "step": 21690 + }, + { + "epoch": 0.37480992535250207, + "grad_norm": 1.1181303569568026, + "learning_rate": 1.4386282226821245e-05, + "loss": 0.3476, + "step": 21691 + }, + { + "epoch": 0.374827204865911, + "grad_norm": 0.9838559484645905, + "learning_rate": 1.4385779280480865e-05, + "loss": 0.496, + "step": 21692 + }, + { + "epoch": 0.3748444843793199, + "grad_norm": 0.8379437311913631, + "learning_rate": 1.4385276320403894e-05, + "loss": 0.5962, + "step": 21693 + }, + { + "epoch": 0.3748617638927288, + "grad_norm": 0.787507942806923, + "learning_rate": 1.4384773346591902e-05, + "loss": 0.4128, + "step": 21694 + }, + { + "epoch": 0.3748790434061377, + "grad_norm": 0.9679282955789008, + "learning_rate": 1.4384270359046467e-05, + "loss": 0.4776, + "step": 21695 + }, + { + "epoch": 0.3748963229195466, + "grad_norm": 1.2189295078431608, + "learning_rate": 1.4383767357769166e-05, + "loss": 0.6753, + "step": 21696 + }, + { + "epoch": 0.37491360243295546, + "grad_norm": 1.0723788895707231, + "learning_rate": 1.4383264342761569e-05, + "loss": 0.4729, + "step": 21697 + }, + { + "epoch": 0.37493088194636437, + "grad_norm": 0.8574039299828831, + "learning_rate": 1.4382761314025255e-05, + "loss": 0.3904, + "step": 21698 + }, + { + "epoch": 0.3749481614597733, + "grad_norm": 0.6771314303260337, + "learning_rate": 1.4382258271561805e-05, + "loss": 0.3604, + "step": 21699 + }, + { + "epoch": 0.3749654409731822, + "grad_norm": 0.5420275483931607, + "learning_rate": 1.4381755215372786e-05, + "loss": 0.4932, + "step": 21700 + }, + { + "epoch": 0.3749827204865911, + "grad_norm": 0.7412128736377929, + "learning_rate": 1.4381252145459775e-05, + "loss": 0.4415, + "step": 21701 + }, + { + "epoch": 0.375, + "grad_norm": 1.1833814403187537, + "learning_rate": 1.4380749061824352e-05, + "loss": 0.501, + "step": 21702 + }, + { + "epoch": 0.3750172795134089, + "grad_norm": 1.0610302159861005, + "learning_rate": 1.4380245964468088e-05, + "loss": 0.5583, + "step": 21703 + }, + { + "epoch": 0.3750345590268178, + "grad_norm": 0.8860593909711093, + "learning_rate": 1.437974285339256e-05, + "loss": 0.6, + "step": 21704 + }, + { + "epoch": 0.3750518385402267, + "grad_norm": 1.0091953024659892, + "learning_rate": 1.4379239728599345e-05, + "loss": 0.4006, + "step": 21705 + }, + { + "epoch": 0.37506911805363563, + "grad_norm": 0.9913822645393573, + "learning_rate": 1.4378736590090021e-05, + "loss": 0.3674, + "step": 21706 + }, + { + "epoch": 0.37508639756704454, + "grad_norm": 0.44105701542775044, + "learning_rate": 1.4378233437866158e-05, + "loss": 0.8181, + "step": 21707 + }, + { + "epoch": 0.3751036770804534, + "grad_norm": 1.0719363561485902, + "learning_rate": 1.4377730271929338e-05, + "loss": 0.3557, + "step": 21708 + }, + { + "epoch": 0.3751209565938623, + "grad_norm": 1.0423162629433091, + "learning_rate": 1.4377227092281132e-05, + "loss": 0.6099, + "step": 21709 + }, + { + "epoch": 0.3751382361072712, + "grad_norm": 0.782089177712411, + "learning_rate": 1.4376723898923118e-05, + "loss": 0.3207, + "step": 21710 + }, + { + "epoch": 0.3751555156206801, + "grad_norm": 1.1034607428598968, + "learning_rate": 1.4376220691856875e-05, + "loss": 0.3692, + "step": 21711 + }, + { + "epoch": 0.375172795134089, + "grad_norm": 0.9920068864767274, + "learning_rate": 1.4375717471083972e-05, + "loss": 0.5354, + "step": 21712 + }, + { + "epoch": 0.37519007464749793, + "grad_norm": 0.8848016285813632, + "learning_rate": 1.4375214236605992e-05, + "loss": 0.5432, + "step": 21713 + }, + { + "epoch": 0.37520735416090684, + "grad_norm": 0.9726609832652878, + "learning_rate": 1.437471098842451e-05, + "loss": 0.4445, + "step": 21714 + }, + { + "epoch": 0.37522463367431574, + "grad_norm": 1.1536735724544624, + "learning_rate": 1.4374207726541095e-05, + "loss": 0.4588, + "step": 21715 + }, + { + "epoch": 0.37524191318772465, + "grad_norm": 0.7935571734346625, + "learning_rate": 1.4373704450957333e-05, + "loss": 0.476, + "step": 21716 + }, + { + "epoch": 0.37525919270113356, + "grad_norm": 1.037511527603825, + "learning_rate": 1.4373201161674797e-05, + "loss": 0.3806, + "step": 21717 + }, + { + "epoch": 0.3752764722145424, + "grad_norm": 0.5087481941726777, + "learning_rate": 1.4372697858695062e-05, + "loss": 0.9707, + "step": 21718 + }, + { + "epoch": 0.3752937517279513, + "grad_norm": 1.4603441151707066, + "learning_rate": 1.4372194542019705e-05, + "loss": 0.4931, + "step": 21719 + }, + { + "epoch": 0.3753110312413602, + "grad_norm": 1.2512854736467807, + "learning_rate": 1.43716912116503e-05, + "loss": 0.4875, + "step": 21720 + }, + { + "epoch": 0.37532831075476913, + "grad_norm": 0.9757366552296294, + "learning_rate": 1.437118786758843e-05, + "loss": 0.4663, + "step": 21721 + }, + { + "epoch": 0.37534559026817804, + "grad_norm": 0.9460457625231921, + "learning_rate": 1.4370684509835666e-05, + "loss": 0.3651, + "step": 21722 + }, + { + "epoch": 0.37536286978158695, + "grad_norm": 1.0345400715825035, + "learning_rate": 1.4370181138393584e-05, + "loss": 0.373, + "step": 21723 + }, + { + "epoch": 0.37538014929499586, + "grad_norm": 1.0460977620196905, + "learning_rate": 1.4369677753263762e-05, + "loss": 0.4166, + "step": 21724 + }, + { + "epoch": 0.37539742880840477, + "grad_norm": 0.6432449287933362, + "learning_rate": 1.436917435444778e-05, + "loss": 0.3993, + "step": 21725 + }, + { + "epoch": 0.3754147083218137, + "grad_norm": 0.8155697001155073, + "learning_rate": 1.4368670941947212e-05, + "loss": 0.367, + "step": 21726 + }, + { + "epoch": 0.3754319878352226, + "grad_norm": 0.8217915198413498, + "learning_rate": 1.436816751576363e-05, + "loss": 0.5235, + "step": 21727 + }, + { + "epoch": 0.3754492673486315, + "grad_norm": 1.1317320211936064, + "learning_rate": 1.4367664075898619e-05, + "loss": 0.5972, + "step": 21728 + }, + { + "epoch": 0.37546654686204034, + "grad_norm": 0.5730352127430788, + "learning_rate": 1.4367160622353752e-05, + "loss": 0.5809, + "step": 21729 + }, + { + "epoch": 0.37548382637544925, + "grad_norm": 1.4850300856515468, + "learning_rate": 1.4366657155130606e-05, + "loss": 0.3949, + "step": 21730 + }, + { + "epoch": 0.37550110588885816, + "grad_norm": 1.170414097578094, + "learning_rate": 1.436615367423076e-05, + "loss": 0.6542, + "step": 21731 + }, + { + "epoch": 0.37551838540226706, + "grad_norm": 0.7779170658734669, + "learning_rate": 1.4365650179655785e-05, + "loss": 0.3887, + "step": 21732 + }, + { + "epoch": 0.37553566491567597, + "grad_norm": 0.7171179625625447, + "learning_rate": 1.4365146671407266e-05, + "loss": 0.4669, + "step": 21733 + }, + { + "epoch": 0.3755529444290849, + "grad_norm": 1.521340618262223, + "learning_rate": 1.4364643149486774e-05, + "loss": 0.5381, + "step": 21734 + }, + { + "epoch": 0.3755702239424938, + "grad_norm": 1.160667980958768, + "learning_rate": 1.4364139613895884e-05, + "loss": 0.5171, + "step": 21735 + }, + { + "epoch": 0.3755875034559027, + "grad_norm": 0.6330695435453007, + "learning_rate": 1.4363636064636182e-05, + "loss": 0.5997, + "step": 21736 + }, + { + "epoch": 0.3756047829693116, + "grad_norm": 0.9647069855253014, + "learning_rate": 1.436313250170924e-05, + "loss": 0.437, + "step": 21737 + }, + { + "epoch": 0.3756220624827205, + "grad_norm": 0.9179485544229327, + "learning_rate": 1.4362628925116632e-05, + "loss": 0.5274, + "step": 21738 + }, + { + "epoch": 0.3756393419961294, + "grad_norm": 0.9698352704018961, + "learning_rate": 1.4362125334859942e-05, + "loss": 0.5807, + "step": 21739 + }, + { + "epoch": 0.37565662150953827, + "grad_norm": 0.6738515094717036, + "learning_rate": 1.4361621730940742e-05, + "loss": 0.3225, + "step": 21740 + }, + { + "epoch": 0.3756739010229472, + "grad_norm": 0.768646954553812, + "learning_rate": 1.4361118113360613e-05, + "loss": 0.3665, + "step": 21741 + }, + { + "epoch": 0.3756911805363561, + "grad_norm": 0.8408279180480314, + "learning_rate": 1.436061448212113e-05, + "loss": 0.3624, + "step": 21742 + }, + { + "epoch": 0.375708460049765, + "grad_norm": 0.990571517316442, + "learning_rate": 1.4360110837223873e-05, + "loss": 0.399, + "step": 21743 + }, + { + "epoch": 0.3757257395631739, + "grad_norm": 1.5956453811282882, + "learning_rate": 1.4359607178670414e-05, + "loss": 0.4807, + "step": 21744 + }, + { + "epoch": 0.3757430190765828, + "grad_norm": 1.1484917952610603, + "learning_rate": 1.4359103506462335e-05, + "loss": 0.6246, + "step": 21745 + }, + { + "epoch": 0.3757602985899917, + "grad_norm": 0.8075797518441231, + "learning_rate": 1.4358599820601214e-05, + "loss": 0.4811, + "step": 21746 + }, + { + "epoch": 0.3757775781034006, + "grad_norm": 1.540884080450957, + "learning_rate": 1.4358096121088623e-05, + "loss": 0.5353, + "step": 21747 + }, + { + "epoch": 0.37579485761680953, + "grad_norm": 0.7783408215632386, + "learning_rate": 1.4357592407926149e-05, + "loss": 0.4263, + "step": 21748 + }, + { + "epoch": 0.37581213713021844, + "grad_norm": 0.7390879801278751, + "learning_rate": 1.4357088681115362e-05, + "loss": 0.373, + "step": 21749 + }, + { + "epoch": 0.3758294166436273, + "grad_norm": 0.9742414089542702, + "learning_rate": 1.435658494065784e-05, + "loss": 0.4626, + "step": 21750 + }, + { + "epoch": 0.3758466961570362, + "grad_norm": 1.492627957501958, + "learning_rate": 1.4356081186555164e-05, + "loss": 0.5683, + "step": 21751 + }, + { + "epoch": 0.3758639756704451, + "grad_norm": 2.28671373653847, + "learning_rate": 1.4355577418808913e-05, + "loss": 0.5078, + "step": 21752 + }, + { + "epoch": 0.375881255183854, + "grad_norm": 1.0462515077433223, + "learning_rate": 1.4355073637420662e-05, + "loss": 0.5428, + "step": 21753 + }, + { + "epoch": 0.3758985346972629, + "grad_norm": 0.8824281548048519, + "learning_rate": 1.435456984239199e-05, + "loss": 0.4409, + "step": 21754 + }, + { + "epoch": 0.37591581421067183, + "grad_norm": 0.9650221942598556, + "learning_rate": 1.4354066033724472e-05, + "loss": 0.5516, + "step": 21755 + }, + { + "epoch": 0.37593309372408074, + "grad_norm": 0.9711262990658178, + "learning_rate": 1.435356221141969e-05, + "loss": 0.4774, + "step": 21756 + }, + { + "epoch": 0.37595037323748964, + "grad_norm": 1.1415581115503182, + "learning_rate": 1.435305837547922e-05, + "loss": 0.4336, + "step": 21757 + }, + { + "epoch": 0.37596765275089855, + "grad_norm": 0.9421027027171802, + "learning_rate": 1.4352554525904637e-05, + "loss": 0.3066, + "step": 21758 + }, + { + "epoch": 0.37598493226430746, + "grad_norm": 0.5040545121850513, + "learning_rate": 1.4352050662697525e-05, + "loss": 0.6229, + "step": 21759 + }, + { + "epoch": 0.37600221177771637, + "grad_norm": 0.7173010780625946, + "learning_rate": 1.4351546785859461e-05, + "loss": 0.4738, + "step": 21760 + }, + { + "epoch": 0.3760194912911252, + "grad_norm": 0.3864684481728502, + "learning_rate": 1.435104289539202e-05, + "loss": 0.5795, + "step": 21761 + }, + { + "epoch": 0.3760367708045341, + "grad_norm": 1.0664084908515248, + "learning_rate": 1.4350538991296785e-05, + "loss": 0.3864, + "step": 21762 + }, + { + "epoch": 0.37605405031794303, + "grad_norm": 1.090550915969561, + "learning_rate": 1.435003507357533e-05, + "loss": 0.6199, + "step": 21763 + }, + { + "epoch": 0.37607132983135194, + "grad_norm": 0.8815494436565515, + "learning_rate": 1.4349531142229231e-05, + "loss": 0.5045, + "step": 21764 + }, + { + "epoch": 0.37608860934476085, + "grad_norm": 0.6482183939582067, + "learning_rate": 1.4349027197260072e-05, + "loss": 0.3539, + "step": 21765 + }, + { + "epoch": 0.37610588885816976, + "grad_norm": 0.9031554865678701, + "learning_rate": 1.4348523238669432e-05, + "loss": 0.722, + "step": 21766 + }, + { + "epoch": 0.37612316837157866, + "grad_norm": 1.1544311896521413, + "learning_rate": 1.4348019266458883e-05, + "loss": 0.4224, + "step": 21767 + }, + { + "epoch": 0.37614044788498757, + "grad_norm": 0.9891807217673434, + "learning_rate": 1.4347515280630009e-05, + "loss": 0.3179, + "step": 21768 + }, + { + "epoch": 0.3761577273983965, + "grad_norm": 1.2537408049034142, + "learning_rate": 1.4347011281184386e-05, + "loss": 0.505, + "step": 21769 + }, + { + "epoch": 0.3761750069118054, + "grad_norm": 0.9153032104269369, + "learning_rate": 1.4346507268123593e-05, + "loss": 0.3494, + "step": 21770 + }, + { + "epoch": 0.37619228642521424, + "grad_norm": 1.1178078670669078, + "learning_rate": 1.434600324144921e-05, + "loss": 0.5983, + "step": 21771 + }, + { + "epoch": 0.37620956593862315, + "grad_norm": 1.002232357554197, + "learning_rate": 1.4345499201162814e-05, + "loss": 0.5711, + "step": 21772 + }, + { + "epoch": 0.37622684545203205, + "grad_norm": 1.08714539308099, + "learning_rate": 1.4344995147265985e-05, + "loss": 0.6098, + "step": 21773 + }, + { + "epoch": 0.37624412496544096, + "grad_norm": 0.8213872233346137, + "learning_rate": 1.43444910797603e-05, + "loss": 0.358, + "step": 21774 + }, + { + "epoch": 0.37626140447884987, + "grad_norm": 1.4695191082573082, + "learning_rate": 1.4343986998647339e-05, + "loss": 0.5471, + "step": 21775 + }, + { + "epoch": 0.3762786839922588, + "grad_norm": 1.1732250775202497, + "learning_rate": 1.434348290392868e-05, + "loss": 0.5514, + "step": 21776 + }, + { + "epoch": 0.3762959635056677, + "grad_norm": 1.2428750804975073, + "learning_rate": 1.4342978795605904e-05, + "loss": 0.4082, + "step": 21777 + }, + { + "epoch": 0.3763132430190766, + "grad_norm": 0.9033575992360615, + "learning_rate": 1.4342474673680586e-05, + "loss": 0.5365, + "step": 21778 + }, + { + "epoch": 0.3763305225324855, + "grad_norm": 0.9516998786821481, + "learning_rate": 1.4341970538154311e-05, + "loss": 0.4603, + "step": 21779 + }, + { + "epoch": 0.3763478020458944, + "grad_norm": 0.8552047010468199, + "learning_rate": 1.4341466389028652e-05, + "loss": 0.4545, + "step": 21780 + }, + { + "epoch": 0.3763650815593033, + "grad_norm": 1.4805486499298628, + "learning_rate": 1.4340962226305189e-05, + "loss": 0.4958, + "step": 21781 + }, + { + "epoch": 0.37638236107271217, + "grad_norm": 0.7739250637451113, + "learning_rate": 1.4340458049985504e-05, + "loss": 0.4665, + "step": 21782 + }, + { + "epoch": 0.3763996405861211, + "grad_norm": 0.9087544787672193, + "learning_rate": 1.4339953860071178e-05, + "loss": 0.5693, + "step": 21783 + }, + { + "epoch": 0.37641692009953, + "grad_norm": 1.1457113954433447, + "learning_rate": 1.433944965656378e-05, + "loss": 0.2601, + "step": 21784 + }, + { + "epoch": 0.3764341996129389, + "grad_norm": 0.8086451208475423, + "learning_rate": 1.4338945439464902e-05, + "loss": 0.4501, + "step": 21785 + }, + { + "epoch": 0.3764514791263478, + "grad_norm": 0.7693445163770364, + "learning_rate": 1.4338441208776112e-05, + "loss": 0.478, + "step": 21786 + }, + { + "epoch": 0.3764687586397567, + "grad_norm": 1.075289123110272, + "learning_rate": 1.4337936964498998e-05, + "loss": 0.5148, + "step": 21787 + }, + { + "epoch": 0.3764860381531656, + "grad_norm": 0.6410573413217202, + "learning_rate": 1.4337432706635132e-05, + "loss": 0.5406, + "step": 21788 + }, + { + "epoch": 0.3765033176665745, + "grad_norm": 1.0829862093665723, + "learning_rate": 1.4336928435186102e-05, + "loss": 0.2948, + "step": 21789 + }, + { + "epoch": 0.37652059717998343, + "grad_norm": 0.6862525518034679, + "learning_rate": 1.433642415015348e-05, + "loss": 0.5787, + "step": 21790 + }, + { + "epoch": 0.37653787669339234, + "grad_norm": 1.1737465889118233, + "learning_rate": 1.4335919851538847e-05, + "loss": 0.4125, + "step": 21791 + }, + { + "epoch": 0.3765551562068012, + "grad_norm": 0.917203064657358, + "learning_rate": 1.4335415539343785e-05, + "loss": 0.6624, + "step": 21792 + }, + { + "epoch": 0.3765724357202101, + "grad_norm": 1.1016470862674035, + "learning_rate": 1.4334911213569872e-05, + "loss": 0.4493, + "step": 21793 + }, + { + "epoch": 0.376589715233619, + "grad_norm": 0.4685602898051424, + "learning_rate": 1.4334406874218687e-05, + "loss": 0.6326, + "step": 21794 + }, + { + "epoch": 0.3766069947470279, + "grad_norm": 0.8335916357276713, + "learning_rate": 1.433390252129181e-05, + "loss": 0.4178, + "step": 21795 + }, + { + "epoch": 0.3766242742604368, + "grad_norm": 1.0478997851147724, + "learning_rate": 1.4333398154790821e-05, + "loss": 0.4957, + "step": 21796 + }, + { + "epoch": 0.3766415537738457, + "grad_norm": 0.717867142923632, + "learning_rate": 1.43328937747173e-05, + "loss": 0.3997, + "step": 21797 + }, + { + "epoch": 0.37665883328725464, + "grad_norm": 0.7815730182915194, + "learning_rate": 1.4332389381072825e-05, + "loss": 0.354, + "step": 21798 + }, + { + "epoch": 0.37667611280066354, + "grad_norm": 1.154670495084109, + "learning_rate": 1.433188497385898e-05, + "loss": 0.3715, + "step": 21799 + }, + { + "epoch": 0.37669339231407245, + "grad_norm": 1.2171595941667064, + "learning_rate": 1.433138055307734e-05, + "loss": 0.4581, + "step": 21800 + }, + { + "epoch": 0.37671067182748136, + "grad_norm": 1.350539505194686, + "learning_rate": 1.4330876118729487e-05, + "loss": 0.5633, + "step": 21801 + }, + { + "epoch": 0.37672795134089027, + "grad_norm": 1.4089693214095036, + "learning_rate": 1.4330371670817002e-05, + "loss": 0.3876, + "step": 21802 + }, + { + "epoch": 0.3767452308542991, + "grad_norm": 0.8077454334871227, + "learning_rate": 1.4329867209341467e-05, + "loss": 0.4365, + "step": 21803 + }, + { + "epoch": 0.376762510367708, + "grad_norm": 1.3887924509829062, + "learning_rate": 1.4329362734304453e-05, + "loss": 0.3715, + "step": 21804 + }, + { + "epoch": 0.37677978988111693, + "grad_norm": 1.817232171486665, + "learning_rate": 1.4328858245707549e-05, + "loss": 0.6145, + "step": 21805 + }, + { + "epoch": 0.37679706939452584, + "grad_norm": 1.0219019086679968, + "learning_rate": 1.432835374355233e-05, + "loss": 0.61, + "step": 21806 + }, + { + "epoch": 0.37681434890793475, + "grad_norm": 1.201084490317737, + "learning_rate": 1.4327849227840378e-05, + "loss": 0.5616, + "step": 21807 + }, + { + "epoch": 0.37683162842134366, + "grad_norm": 0.9253950458903669, + "learning_rate": 1.4327344698573275e-05, + "loss": 0.5725, + "step": 21808 + }, + { + "epoch": 0.37684890793475256, + "grad_norm": 0.7830805620014405, + "learning_rate": 1.4326840155752599e-05, + "loss": 0.5063, + "step": 21809 + }, + { + "epoch": 0.37686618744816147, + "grad_norm": 1.122123924664824, + "learning_rate": 1.432633559937993e-05, + "loss": 0.365, + "step": 21810 + }, + { + "epoch": 0.3768834669615704, + "grad_norm": 0.5486040056319071, + "learning_rate": 1.4325831029456852e-05, + "loss": 0.5724, + "step": 21811 + }, + { + "epoch": 0.3769007464749793, + "grad_norm": 0.9632609686385113, + "learning_rate": 1.432532644598494e-05, + "loss": 0.5653, + "step": 21812 + }, + { + "epoch": 0.3769180259883882, + "grad_norm": 1.0552321563792304, + "learning_rate": 1.4324821848965776e-05, + "loss": 0.579, + "step": 21813 + }, + { + "epoch": 0.37693530550179705, + "grad_norm": 0.42331931897470143, + "learning_rate": 1.4324317238400944e-05, + "loss": 0.6503, + "step": 21814 + }, + { + "epoch": 0.37695258501520595, + "grad_norm": 0.48677274849441765, + "learning_rate": 1.4323812614292022e-05, + "loss": 0.7048, + "step": 21815 + }, + { + "epoch": 0.37696986452861486, + "grad_norm": 0.6211328482012936, + "learning_rate": 1.4323307976640589e-05, + "loss": 0.2946, + "step": 21816 + }, + { + "epoch": 0.37698714404202377, + "grad_norm": 1.7543368072517025, + "learning_rate": 1.4322803325448225e-05, + "loss": 0.6533, + "step": 21817 + }, + { + "epoch": 0.3770044235554327, + "grad_norm": 1.3724579591052228, + "learning_rate": 1.4322298660716514e-05, + "loss": 0.3294, + "step": 21818 + }, + { + "epoch": 0.3770217030688416, + "grad_norm": 1.0560425305123502, + "learning_rate": 1.4321793982447034e-05, + "loss": 0.7082, + "step": 21819 + }, + { + "epoch": 0.3770389825822505, + "grad_norm": 1.1274765365349628, + "learning_rate": 1.432128929064137e-05, + "loss": 0.6194, + "step": 21820 + }, + { + "epoch": 0.3770562620956594, + "grad_norm": 1.064442737768813, + "learning_rate": 1.4320784585301094e-05, + "loss": 0.4514, + "step": 21821 + }, + { + "epoch": 0.3770735416090683, + "grad_norm": 2.3694297347656765, + "learning_rate": 1.4320279866427798e-05, + "loss": 0.5102, + "step": 21822 + }, + { + "epoch": 0.3770908211224772, + "grad_norm": 1.1277239368161418, + "learning_rate": 1.4319775134023055e-05, + "loss": 0.5872, + "step": 21823 + }, + { + "epoch": 0.37710810063588607, + "grad_norm": 1.2722505615673174, + "learning_rate": 1.4319270388088448e-05, + "loss": 0.5619, + "step": 21824 + }, + { + "epoch": 0.377125380149295, + "grad_norm": 1.0767209166414755, + "learning_rate": 1.4318765628625558e-05, + "loss": 0.4242, + "step": 21825 + }, + { + "epoch": 0.3771426596627039, + "grad_norm": 0.667997828985841, + "learning_rate": 1.4318260855635966e-05, + "loss": 0.4564, + "step": 21826 + }, + { + "epoch": 0.3771599391761128, + "grad_norm": 0.8598629347575931, + "learning_rate": 1.4317756069121251e-05, + "loss": 0.5124, + "step": 21827 + }, + { + "epoch": 0.3771772186895217, + "grad_norm": 0.8632362469207499, + "learning_rate": 1.4317251269082996e-05, + "loss": 0.5191, + "step": 21828 + }, + { + "epoch": 0.3771944982029306, + "grad_norm": 1.039442334813752, + "learning_rate": 1.4316746455522785e-05, + "loss": 0.5206, + "step": 21829 + }, + { + "epoch": 0.3772117777163395, + "grad_norm": 0.9280097103039163, + "learning_rate": 1.4316241628442193e-05, + "loss": 0.3774, + "step": 21830 + }, + { + "epoch": 0.3772290572297484, + "grad_norm": 0.8733063515270593, + "learning_rate": 1.4315736787842806e-05, + "loss": 0.3983, + "step": 21831 + }, + { + "epoch": 0.37724633674315733, + "grad_norm": 1.0820984104044127, + "learning_rate": 1.4315231933726203e-05, + "loss": 0.5543, + "step": 21832 + }, + { + "epoch": 0.37726361625656624, + "grad_norm": 0.8857113499220389, + "learning_rate": 1.4314727066093965e-05, + "loss": 0.3757, + "step": 21833 + }, + { + "epoch": 0.37728089576997514, + "grad_norm": 0.8361865106855871, + "learning_rate": 1.4314222184947677e-05, + "loss": 0.3685, + "step": 21834 + }, + { + "epoch": 0.377298175283384, + "grad_norm": 0.9765782656364563, + "learning_rate": 1.4313717290288912e-05, + "loss": 0.442, + "step": 21835 + }, + { + "epoch": 0.3773154547967929, + "grad_norm": 0.5676787116094392, + "learning_rate": 1.4313212382119261e-05, + "loss": 0.394, + "step": 21836 + }, + { + "epoch": 0.3773327343102018, + "grad_norm": 0.855833575387064, + "learning_rate": 1.43127074604403e-05, + "loss": 0.3073, + "step": 21837 + }, + { + "epoch": 0.3773500138236107, + "grad_norm": 0.7352791520429325, + "learning_rate": 1.431220252525361e-05, + "loss": 0.2967, + "step": 21838 + }, + { + "epoch": 0.3773672933370196, + "grad_norm": 0.7283653204230255, + "learning_rate": 1.4311697576560774e-05, + "loss": 0.4262, + "step": 21839 + }, + { + "epoch": 0.37738457285042853, + "grad_norm": 0.5812748940420732, + "learning_rate": 1.4311192614363374e-05, + "loss": 0.4567, + "step": 21840 + }, + { + "epoch": 0.37740185236383744, + "grad_norm": 0.8970052689969866, + "learning_rate": 1.431068763866299e-05, + "loss": 0.3976, + "step": 21841 + }, + { + "epoch": 0.37741913187724635, + "grad_norm": 1.374745511209265, + "learning_rate": 1.4310182649461206e-05, + "loss": 0.4682, + "step": 21842 + }, + { + "epoch": 0.37743641139065526, + "grad_norm": 0.9874188098657202, + "learning_rate": 1.4309677646759603e-05, + "loss": 0.3666, + "step": 21843 + }, + { + "epoch": 0.37745369090406417, + "grad_norm": 0.808378220721257, + "learning_rate": 1.4309172630559763e-05, + "loss": 0.4632, + "step": 21844 + }, + { + "epoch": 0.377470970417473, + "grad_norm": 1.3172568123117865, + "learning_rate": 1.4308667600863266e-05, + "loss": 0.4688, + "step": 21845 + }, + { + "epoch": 0.3774882499308819, + "grad_norm": 0.4347749407517822, + "learning_rate": 1.4308162557671694e-05, + "loss": 0.5216, + "step": 21846 + }, + { + "epoch": 0.37750552944429083, + "grad_norm": 1.0133283694689597, + "learning_rate": 1.430765750098663e-05, + "loss": 0.3478, + "step": 21847 + }, + { + "epoch": 0.37752280895769974, + "grad_norm": 0.9455705591917131, + "learning_rate": 1.4307152430809654e-05, + "loss": 0.5033, + "step": 21848 + }, + { + "epoch": 0.37754008847110865, + "grad_norm": 1.3169935277083051, + "learning_rate": 1.430664734714235e-05, + "loss": 0.4453, + "step": 21849 + }, + { + "epoch": 0.37755736798451756, + "grad_norm": 0.7459223537464688, + "learning_rate": 1.4306142249986299e-05, + "loss": 0.3798, + "step": 21850 + }, + { + "epoch": 0.37757464749792646, + "grad_norm": 1.028727249458635, + "learning_rate": 1.4305637139343085e-05, + "loss": 0.56, + "step": 21851 + }, + { + "epoch": 0.37759192701133537, + "grad_norm": 1.235251709345989, + "learning_rate": 1.4305132015214284e-05, + "loss": 0.3786, + "step": 21852 + }, + { + "epoch": 0.3776092065247443, + "grad_norm": 0.8319537026132846, + "learning_rate": 1.4304626877601487e-05, + "loss": 0.4096, + "step": 21853 + }, + { + "epoch": 0.3776264860381532, + "grad_norm": 1.4321919425815648, + "learning_rate": 1.4304121726506268e-05, + "loss": 0.425, + "step": 21854 + }, + { + "epoch": 0.3776437655515621, + "grad_norm": 0.9179655830168831, + "learning_rate": 1.4303616561930216e-05, + "loss": 0.495, + "step": 21855 + }, + { + "epoch": 0.37766104506497095, + "grad_norm": 0.8574922537170525, + "learning_rate": 1.4303111383874909e-05, + "loss": 0.4885, + "step": 21856 + }, + { + "epoch": 0.37767832457837985, + "grad_norm": 1.181298516375256, + "learning_rate": 1.4302606192341928e-05, + "loss": 0.4442, + "step": 21857 + }, + { + "epoch": 0.37769560409178876, + "grad_norm": 1.0584879196145753, + "learning_rate": 1.4302100987332859e-05, + "loss": 0.5556, + "step": 21858 + }, + { + "epoch": 0.37771288360519767, + "grad_norm": 1.6123425419955737, + "learning_rate": 1.430159576884928e-05, + "loss": 0.4909, + "step": 21859 + }, + { + "epoch": 0.3777301631186066, + "grad_norm": 0.9208295426832313, + "learning_rate": 1.4301090536892781e-05, + "loss": 0.788, + "step": 21860 + }, + { + "epoch": 0.3777474426320155, + "grad_norm": 0.6434958866579937, + "learning_rate": 1.4300585291464937e-05, + "loss": 0.3979, + "step": 21861 + }, + { + "epoch": 0.3777647221454244, + "grad_norm": 0.6963766581041081, + "learning_rate": 1.4300080032567332e-05, + "loss": 0.3323, + "step": 21862 + }, + { + "epoch": 0.3777820016588333, + "grad_norm": 0.5232158394463925, + "learning_rate": 1.4299574760201552e-05, + "loss": 0.8224, + "step": 21863 + }, + { + "epoch": 0.3777992811722422, + "grad_norm": 1.04693332627964, + "learning_rate": 1.4299069474369174e-05, + "loss": 0.503, + "step": 21864 + }, + { + "epoch": 0.3778165606856511, + "grad_norm": 0.49471576889119456, + "learning_rate": 1.4298564175071787e-05, + "loss": 0.736, + "step": 21865 + }, + { + "epoch": 0.37783384019905997, + "grad_norm": 1.5153097473230144, + "learning_rate": 1.429805886231097e-05, + "loss": 0.5811, + "step": 21866 + }, + { + "epoch": 0.3778511197124689, + "grad_norm": 1.551991282546142, + "learning_rate": 1.4297553536088304e-05, + "loss": 0.4287, + "step": 21867 + }, + { + "epoch": 0.3778683992258778, + "grad_norm": 0.7110613599243284, + "learning_rate": 1.4297048196405373e-05, + "loss": 0.3436, + "step": 21868 + }, + { + "epoch": 0.3778856787392867, + "grad_norm": 1.5264310487094794, + "learning_rate": 1.4296542843263763e-05, + "loss": 0.2785, + "step": 21869 + }, + { + "epoch": 0.3779029582526956, + "grad_norm": 0.8683417541245815, + "learning_rate": 1.4296037476665053e-05, + "loss": 0.5434, + "step": 21870 + }, + { + "epoch": 0.3779202377661045, + "grad_norm": 0.5907506006690741, + "learning_rate": 1.4295532096610826e-05, + "loss": 0.3195, + "step": 21871 + }, + { + "epoch": 0.3779375172795134, + "grad_norm": 1.5710788931113162, + "learning_rate": 1.429502670310267e-05, + "loss": 0.6101, + "step": 21872 + }, + { + "epoch": 0.3779547967929223, + "grad_norm": 0.4478437994460352, + "learning_rate": 1.4294521296142159e-05, + "loss": 0.5922, + "step": 21873 + }, + { + "epoch": 0.37797207630633123, + "grad_norm": 1.50039684000733, + "learning_rate": 1.4294015875730884e-05, + "loss": 0.3878, + "step": 21874 + }, + { + "epoch": 0.37798935581974014, + "grad_norm": 1.1952682240221744, + "learning_rate": 1.4293510441870426e-05, + "loss": 0.5256, + "step": 21875 + }, + { + "epoch": 0.37800663533314904, + "grad_norm": 0.7343132337391045, + "learning_rate": 1.4293004994562366e-05, + "loss": 0.3769, + "step": 21876 + }, + { + "epoch": 0.3780239148465579, + "grad_norm": 1.0757007788669883, + "learning_rate": 1.4292499533808286e-05, + "loss": 0.3934, + "step": 21877 + }, + { + "epoch": 0.3780411943599668, + "grad_norm": 1.00403139153682, + "learning_rate": 1.429199405960977e-05, + "loss": 0.5898, + "step": 21878 + }, + { + "epoch": 0.3780584738733757, + "grad_norm": 1.2598643854153846, + "learning_rate": 1.4291488571968409e-05, + "loss": 0.5397, + "step": 21879 + }, + { + "epoch": 0.3780757533867846, + "grad_norm": 0.811561639174457, + "learning_rate": 1.4290983070885776e-05, + "loss": 0.4668, + "step": 21880 + }, + { + "epoch": 0.3780930329001935, + "grad_norm": 0.7030844512640887, + "learning_rate": 1.4290477556363453e-05, + "loss": 0.3365, + "step": 21881 + }, + { + "epoch": 0.37811031241360243, + "grad_norm": 0.9033815987235004, + "learning_rate": 1.4289972028403036e-05, + "loss": 0.4342, + "step": 21882 + }, + { + "epoch": 0.37812759192701134, + "grad_norm": 1.1675889710318308, + "learning_rate": 1.4289466487006095e-05, + "loss": 0.3762, + "step": 21883 + }, + { + "epoch": 0.37814487144042025, + "grad_norm": 1.184364290267543, + "learning_rate": 1.4288960932174223e-05, + "loss": 0.4288, + "step": 21884 + }, + { + "epoch": 0.37816215095382916, + "grad_norm": 1.284831746704216, + "learning_rate": 1.4288455363908997e-05, + "loss": 0.3836, + "step": 21885 + }, + { + "epoch": 0.37817943046723806, + "grad_norm": 1.2972569294080567, + "learning_rate": 1.4287949782212008e-05, + "loss": 0.6027, + "step": 21886 + }, + { + "epoch": 0.378196709980647, + "grad_norm": 0.7561107028562379, + "learning_rate": 1.4287444187084826e-05, + "loss": 0.5182, + "step": 21887 + }, + { + "epoch": 0.3782139894940558, + "grad_norm": 1.0045649209714544, + "learning_rate": 1.4286938578529049e-05, + "loss": 0.6788, + "step": 21888 + }, + { + "epoch": 0.37823126900746473, + "grad_norm": 0.9180607500950917, + "learning_rate": 1.4286432956546252e-05, + "loss": 0.5001, + "step": 21889 + }, + { + "epoch": 0.37824854852087364, + "grad_norm": 0.8051502456821062, + "learning_rate": 1.4285927321138023e-05, + "loss": 0.5054, + "step": 21890 + }, + { + "epoch": 0.37826582803428255, + "grad_norm": 1.1501910914342832, + "learning_rate": 1.4285421672305942e-05, + "loss": 0.666, + "step": 21891 + }, + { + "epoch": 0.37828310754769146, + "grad_norm": 0.8334261766714105, + "learning_rate": 1.4284916010051598e-05, + "loss": 0.4804, + "step": 21892 + }, + { + "epoch": 0.37830038706110036, + "grad_norm": 1.492811600344578, + "learning_rate": 1.4284410334376567e-05, + "loss": 0.5714, + "step": 21893 + }, + { + "epoch": 0.37831766657450927, + "grad_norm": 0.9106718877932312, + "learning_rate": 1.4283904645282438e-05, + "loss": 0.4481, + "step": 21894 + }, + { + "epoch": 0.3783349460879182, + "grad_norm": 1.27892232084839, + "learning_rate": 1.4283398942770795e-05, + "loss": 0.4654, + "step": 21895 + }, + { + "epoch": 0.3783522256013271, + "grad_norm": 0.8418257036616857, + "learning_rate": 1.4282893226843222e-05, + "loss": 0.4151, + "step": 21896 + }, + { + "epoch": 0.378369505114736, + "grad_norm": 0.9284894664363229, + "learning_rate": 1.4282387497501302e-05, + "loss": 0.586, + "step": 21897 + }, + { + "epoch": 0.37838678462814485, + "grad_norm": 1.0642833091651176, + "learning_rate": 1.4281881754746616e-05, + "loss": 0.7386, + "step": 21898 + }, + { + "epoch": 0.37840406414155375, + "grad_norm": 1.287866452558938, + "learning_rate": 1.4281375998580753e-05, + "loss": 0.4861, + "step": 21899 + }, + { + "epoch": 0.37842134365496266, + "grad_norm": 0.6908496023011114, + "learning_rate": 1.4280870229005297e-05, + "loss": 0.401, + "step": 21900 + }, + { + "epoch": 0.37843862316837157, + "grad_norm": 0.9689340389764158, + "learning_rate": 1.4280364446021827e-05, + "loss": 0.5292, + "step": 21901 + }, + { + "epoch": 0.3784559026817805, + "grad_norm": 0.5809195223720355, + "learning_rate": 1.427985864963193e-05, + "loss": 0.9604, + "step": 21902 + }, + { + "epoch": 0.3784731821951894, + "grad_norm": 1.108935383912291, + "learning_rate": 1.4279352839837194e-05, + "loss": 0.563, + "step": 21903 + }, + { + "epoch": 0.3784904617085983, + "grad_norm": 1.383475475845417, + "learning_rate": 1.4278847016639195e-05, + "loss": 0.6144, + "step": 21904 + }, + { + "epoch": 0.3785077412220072, + "grad_norm": 1.2265865321327236, + "learning_rate": 1.4278341180039523e-05, + "loss": 0.5444, + "step": 21905 + }, + { + "epoch": 0.3785250207354161, + "grad_norm": 1.5431351607654544, + "learning_rate": 1.4277835330039767e-05, + "loss": 0.4926, + "step": 21906 + }, + { + "epoch": 0.378542300248825, + "grad_norm": 0.9742815041682436, + "learning_rate": 1.42773294666415e-05, + "loss": 0.4268, + "step": 21907 + }, + { + "epoch": 0.3785595797622339, + "grad_norm": 0.8735609383883379, + "learning_rate": 1.4276823589846314e-05, + "loss": 0.4581, + "step": 21908 + }, + { + "epoch": 0.3785768592756428, + "grad_norm": 1.256587169750737, + "learning_rate": 1.427631769965579e-05, + "loss": 0.4467, + "step": 21909 + }, + { + "epoch": 0.3785941387890517, + "grad_norm": 0.553923997105082, + "learning_rate": 1.4275811796071515e-05, + "loss": 0.4293, + "step": 21910 + }, + { + "epoch": 0.3786114183024606, + "grad_norm": 0.9858225927510846, + "learning_rate": 1.4275305879095071e-05, + "loss": 0.3891, + "step": 21911 + }, + { + "epoch": 0.3786286978158695, + "grad_norm": 1.0452235125780884, + "learning_rate": 1.4274799948728046e-05, + "loss": 0.5251, + "step": 21912 + }, + { + "epoch": 0.3786459773292784, + "grad_norm": 1.2156750057284493, + "learning_rate": 1.4274294004972023e-05, + "loss": 0.4993, + "step": 21913 + }, + { + "epoch": 0.3786632568426873, + "grad_norm": 1.2558550264807549, + "learning_rate": 1.4273788047828585e-05, + "loss": 0.492, + "step": 21914 + }, + { + "epoch": 0.3786805363560962, + "grad_norm": 0.902733660046486, + "learning_rate": 1.4273282077299317e-05, + "loss": 0.6256, + "step": 21915 + }, + { + "epoch": 0.3786978158695051, + "grad_norm": 0.7925990096234476, + "learning_rate": 1.4272776093385807e-05, + "loss": 0.6899, + "step": 21916 + }, + { + "epoch": 0.37871509538291404, + "grad_norm": 0.7661649202583147, + "learning_rate": 1.4272270096089637e-05, + "loss": 0.3836, + "step": 21917 + }, + { + "epoch": 0.37873237489632294, + "grad_norm": 0.6153733439920672, + "learning_rate": 1.4271764085412391e-05, + "loss": 0.4303, + "step": 21918 + }, + { + "epoch": 0.3787496544097318, + "grad_norm": 0.9731300639849633, + "learning_rate": 1.4271258061355657e-05, + "loss": 0.4444, + "step": 21919 + }, + { + "epoch": 0.3787669339231407, + "grad_norm": 2.69723337547164, + "learning_rate": 1.427075202392102e-05, + "loss": 0.4477, + "step": 21920 + }, + { + "epoch": 0.3787842134365496, + "grad_norm": 0.9157057117808187, + "learning_rate": 1.427024597311006e-05, + "loss": 0.3671, + "step": 21921 + }, + { + "epoch": 0.3788014929499585, + "grad_norm": 0.5458846982471783, + "learning_rate": 1.4269739908924366e-05, + "loss": 0.6718, + "step": 21922 + }, + { + "epoch": 0.3788187724633674, + "grad_norm": 0.7972805436156059, + "learning_rate": 1.426923383136552e-05, + "loss": 0.5012, + "step": 21923 + }, + { + "epoch": 0.37883605197677633, + "grad_norm": 1.2905393376162795, + "learning_rate": 1.4268727740435111e-05, + "loss": 0.3703, + "step": 21924 + }, + { + "epoch": 0.37885333149018524, + "grad_norm": 1.0560706011443879, + "learning_rate": 1.4268221636134722e-05, + "loss": 0.5238, + "step": 21925 + }, + { + "epoch": 0.37887061100359415, + "grad_norm": 1.0703539385815084, + "learning_rate": 1.426771551846594e-05, + "loss": 0.4971, + "step": 21926 + }, + { + "epoch": 0.37888789051700306, + "grad_norm": 0.8832697634597292, + "learning_rate": 1.4267209387430347e-05, + "loss": 0.3815, + "step": 21927 + }, + { + "epoch": 0.37890517003041196, + "grad_norm": 1.7693859627343782, + "learning_rate": 1.4266703243029531e-05, + "loss": 0.4077, + "step": 21928 + }, + { + "epoch": 0.37892244954382087, + "grad_norm": 1.3514455602542763, + "learning_rate": 1.4266197085265077e-05, + "loss": 0.5876, + "step": 21929 + }, + { + "epoch": 0.3789397290572297, + "grad_norm": 1.2560684936699236, + "learning_rate": 1.4265690914138567e-05, + "loss": 0.4413, + "step": 21930 + }, + { + "epoch": 0.37895700857063863, + "grad_norm": 0.8515894797510684, + "learning_rate": 1.4265184729651589e-05, + "loss": 0.4211, + "step": 21931 + }, + { + "epoch": 0.37897428808404754, + "grad_norm": 0.753984030156064, + "learning_rate": 1.4264678531805732e-05, + "loss": 0.3823, + "step": 21932 + }, + { + "epoch": 0.37899156759745645, + "grad_norm": 0.918698820243708, + "learning_rate": 1.4264172320602572e-05, + "loss": 0.379, + "step": 21933 + }, + { + "epoch": 0.37900884711086535, + "grad_norm": 0.6025488764451847, + "learning_rate": 1.4263666096043705e-05, + "loss": 0.4708, + "step": 21934 + }, + { + "epoch": 0.37902612662427426, + "grad_norm": 0.4707352305249185, + "learning_rate": 1.426315985813071e-05, + "loss": 0.7061, + "step": 21935 + }, + { + "epoch": 0.37904340613768317, + "grad_norm": 0.45573835690493847, + "learning_rate": 1.4262653606865172e-05, + "loss": 0.7133, + "step": 21936 + }, + { + "epoch": 0.3790606856510921, + "grad_norm": 1.3136329351545342, + "learning_rate": 1.4262147342248684e-05, + "loss": 0.3482, + "step": 21937 + }, + { + "epoch": 0.379077965164501, + "grad_norm": 1.1427861078414274, + "learning_rate": 1.4261641064282822e-05, + "loss": 0.5659, + "step": 21938 + }, + { + "epoch": 0.3790952446779099, + "grad_norm": 0.9935203843812839, + "learning_rate": 1.426113477296918e-05, + "loss": 0.3908, + "step": 21939 + }, + { + "epoch": 0.37911252419131874, + "grad_norm": 0.9465855543737809, + "learning_rate": 1.4260628468309338e-05, + "loss": 0.38, + "step": 21940 + }, + { + "epoch": 0.37912980370472765, + "grad_norm": 0.9557972276585666, + "learning_rate": 1.4260122150304883e-05, + "loss": 0.4492, + "step": 21941 + }, + { + "epoch": 0.37914708321813656, + "grad_norm": 1.012710399468024, + "learning_rate": 1.4259615818957402e-05, + "loss": 0.3643, + "step": 21942 + }, + { + "epoch": 0.37916436273154547, + "grad_norm": 0.48568064957589935, + "learning_rate": 1.4259109474268482e-05, + "loss": 0.227, + "step": 21943 + }, + { + "epoch": 0.3791816422449544, + "grad_norm": 1.6629635751530156, + "learning_rate": 1.4258603116239706e-05, + "loss": 0.5982, + "step": 21944 + }, + { + "epoch": 0.3791989217583633, + "grad_norm": 1.086587016543878, + "learning_rate": 1.4258096744872663e-05, + "loss": 0.5987, + "step": 21945 + }, + { + "epoch": 0.3792162012717722, + "grad_norm": 0.5217397475828297, + "learning_rate": 1.4257590360168938e-05, + "loss": 0.671, + "step": 21946 + }, + { + "epoch": 0.3792334807851811, + "grad_norm": 0.4851726115635237, + "learning_rate": 1.4257083962130111e-05, + "loss": 0.5976, + "step": 21947 + }, + { + "epoch": 0.37925076029859, + "grad_norm": 0.8503522374244304, + "learning_rate": 1.4256577550757782e-05, + "loss": 0.5731, + "step": 21948 + }, + { + "epoch": 0.3792680398119989, + "grad_norm": 0.872375309653808, + "learning_rate": 1.4256071126053523e-05, + "loss": 0.4122, + "step": 21949 + }, + { + "epoch": 0.3792853193254078, + "grad_norm": 0.7489921152187147, + "learning_rate": 1.4255564688018926e-05, + "loss": 0.4987, + "step": 21950 + }, + { + "epoch": 0.3793025988388167, + "grad_norm": 1.2483797227408018, + "learning_rate": 1.425505823665558e-05, + "loss": 0.5096, + "step": 21951 + }, + { + "epoch": 0.3793198783522256, + "grad_norm": 1.210778037514623, + "learning_rate": 1.4254551771965064e-05, + "loss": 0.4513, + "step": 21952 + }, + { + "epoch": 0.3793371578656345, + "grad_norm": 0.9617706009773256, + "learning_rate": 1.425404529394897e-05, + "loss": 0.471, + "step": 21953 + }, + { + "epoch": 0.3793544373790434, + "grad_norm": 0.9784105064443612, + "learning_rate": 1.4253538802608884e-05, + "loss": 0.4528, + "step": 21954 + }, + { + "epoch": 0.3793717168924523, + "grad_norm": 1.0421055485182678, + "learning_rate": 1.4253032297946392e-05, + "loss": 0.4121, + "step": 21955 + }, + { + "epoch": 0.3793889964058612, + "grad_norm": 0.7039159203317897, + "learning_rate": 1.4252525779963076e-05, + "loss": 0.5402, + "step": 21956 + }, + { + "epoch": 0.3794062759192701, + "grad_norm": 2.5373081834568256, + "learning_rate": 1.4252019248660529e-05, + "loss": 0.5179, + "step": 21957 + }, + { + "epoch": 0.379423555432679, + "grad_norm": 0.66844903520097, + "learning_rate": 1.4251512704040335e-05, + "loss": 0.7703, + "step": 21958 + }, + { + "epoch": 0.37944083494608793, + "grad_norm": 1.012801589282703, + "learning_rate": 1.425100614610408e-05, + "loss": 0.5346, + "step": 21959 + }, + { + "epoch": 0.37945811445949684, + "grad_norm": 0.6931848498288068, + "learning_rate": 1.4250499574853349e-05, + "loss": 0.4159, + "step": 21960 + }, + { + "epoch": 0.37947539397290575, + "grad_norm": 1.5856038855760985, + "learning_rate": 1.424999299028973e-05, + "loss": 0.3612, + "step": 21961 + }, + { + "epoch": 0.3794926734863146, + "grad_norm": 0.5389871318764784, + "learning_rate": 1.4249486392414811e-05, + "loss": 0.5272, + "step": 21962 + }, + { + "epoch": 0.3795099529997235, + "grad_norm": 1.6356369378954567, + "learning_rate": 1.424897978123018e-05, + "loss": 0.4828, + "step": 21963 + }, + { + "epoch": 0.3795272325131324, + "grad_norm": 0.9940471119714693, + "learning_rate": 1.4248473156737416e-05, + "loss": 0.5704, + "step": 21964 + }, + { + "epoch": 0.3795445120265413, + "grad_norm": 0.8322957454957571, + "learning_rate": 1.4247966518938115e-05, + "loss": 0.4767, + "step": 21965 + }, + { + "epoch": 0.37956179153995023, + "grad_norm": 1.28098098165172, + "learning_rate": 1.4247459867833859e-05, + "loss": 0.3313, + "step": 21966 + }, + { + "epoch": 0.37957907105335914, + "grad_norm": 0.9872280209841494, + "learning_rate": 1.4246953203426235e-05, + "loss": 0.5346, + "step": 21967 + }, + { + "epoch": 0.37959635056676805, + "grad_norm": 1.3329265187601582, + "learning_rate": 1.4246446525716835e-05, + "loss": 0.4598, + "step": 21968 + }, + { + "epoch": 0.37961363008017696, + "grad_norm": 1.4478382409671644, + "learning_rate": 1.424593983470724e-05, + "loss": 0.4391, + "step": 21969 + }, + { + "epoch": 0.37963090959358586, + "grad_norm": 1.0067634921149258, + "learning_rate": 1.4245433130399036e-05, + "loss": 0.3871, + "step": 21970 + }, + { + "epoch": 0.37964818910699477, + "grad_norm": 1.0652549671019609, + "learning_rate": 1.4244926412793813e-05, + "loss": 0.3932, + "step": 21971 + }, + { + "epoch": 0.3796654686204036, + "grad_norm": 1.1938737181828998, + "learning_rate": 1.4244419681893162e-05, + "loss": 0.499, + "step": 21972 + }, + { + "epoch": 0.37968274813381253, + "grad_norm": 0.5017635033306183, + "learning_rate": 1.4243912937698662e-05, + "loss": 0.8568, + "step": 21973 + }, + { + "epoch": 0.37970002764722144, + "grad_norm": 1.3501353217470329, + "learning_rate": 1.4243406180211905e-05, + "loss": 0.5935, + "step": 21974 + }, + { + "epoch": 0.37971730716063035, + "grad_norm": 0.8504037979948896, + "learning_rate": 1.4242899409434479e-05, + "loss": 0.6924, + "step": 21975 + }, + { + "epoch": 0.37973458667403925, + "grad_norm": 1.1568754441320968, + "learning_rate": 1.4242392625367965e-05, + "loss": 0.4615, + "step": 21976 + }, + { + "epoch": 0.37975186618744816, + "grad_norm": 1.4721232441677312, + "learning_rate": 1.424188582801396e-05, + "loss": 0.4108, + "step": 21977 + }, + { + "epoch": 0.37976914570085707, + "grad_norm": 0.9276428206948821, + "learning_rate": 1.4241379017374045e-05, + "loss": 0.4293, + "step": 21978 + }, + { + "epoch": 0.379786425214266, + "grad_norm": 1.0667895176096953, + "learning_rate": 1.4240872193449809e-05, + "loss": 0.386, + "step": 21979 + }, + { + "epoch": 0.3798037047276749, + "grad_norm": 0.4271517045273116, + "learning_rate": 1.4240365356242838e-05, + "loss": 0.6251, + "step": 21980 + }, + { + "epoch": 0.3798209842410838, + "grad_norm": 0.45189546335829, + "learning_rate": 1.4239858505754719e-05, + "loss": 0.6638, + "step": 21981 + }, + { + "epoch": 0.3798382637544927, + "grad_norm": 1.2048630075849007, + "learning_rate": 1.4239351641987042e-05, + "loss": 0.6092, + "step": 21982 + }, + { + "epoch": 0.37985554326790155, + "grad_norm": 0.701783257160201, + "learning_rate": 1.4238844764941396e-05, + "loss": 0.3484, + "step": 21983 + }, + { + "epoch": 0.37987282278131046, + "grad_norm": 0.7022592101696608, + "learning_rate": 1.4238337874619362e-05, + "loss": 0.3855, + "step": 21984 + }, + { + "epoch": 0.37989010229471937, + "grad_norm": 0.691703732573638, + "learning_rate": 1.4237830971022533e-05, + "loss": 0.4521, + "step": 21985 + }, + { + "epoch": 0.3799073818081283, + "grad_norm": 0.9082250748226105, + "learning_rate": 1.4237324054152498e-05, + "loss": 0.4221, + "step": 21986 + }, + { + "epoch": 0.3799246613215372, + "grad_norm": 1.7681556557738392, + "learning_rate": 1.4236817124010838e-05, + "loss": 0.4188, + "step": 21987 + }, + { + "epoch": 0.3799419408349461, + "grad_norm": 1.2541727448406463, + "learning_rate": 1.4236310180599146e-05, + "loss": 0.5448, + "step": 21988 + }, + { + "epoch": 0.379959220348355, + "grad_norm": 1.6627163134264713, + "learning_rate": 1.4235803223919011e-05, + "loss": 0.5174, + "step": 21989 + }, + { + "epoch": 0.3799764998617639, + "grad_norm": 0.5118523140627115, + "learning_rate": 1.4235296253972016e-05, + "loss": 0.8346, + "step": 21990 + }, + { + "epoch": 0.3799937793751728, + "grad_norm": 0.8079295005531536, + "learning_rate": 1.4234789270759753e-05, + "loss": 0.3671, + "step": 21991 + }, + { + "epoch": 0.3800110588885817, + "grad_norm": 0.6211124466476148, + "learning_rate": 1.4234282274283806e-05, + "loss": 0.3554, + "step": 21992 + }, + { + "epoch": 0.3800283384019906, + "grad_norm": 1.270660987495377, + "learning_rate": 1.4233775264545766e-05, + "loss": 0.5649, + "step": 21993 + }, + { + "epoch": 0.3800456179153995, + "grad_norm": 1.399074325464403, + "learning_rate": 1.4233268241547222e-05, + "loss": 0.5068, + "step": 21994 + }, + { + "epoch": 0.3800628974288084, + "grad_norm": 1.1906889610859124, + "learning_rate": 1.4232761205289758e-05, + "loss": 0.6377, + "step": 21995 + }, + { + "epoch": 0.3800801769422173, + "grad_norm": 0.9677111231480258, + "learning_rate": 1.4232254155774963e-05, + "loss": 0.3479, + "step": 21996 + }, + { + "epoch": 0.3800974564556262, + "grad_norm": 0.43806743497713996, + "learning_rate": 1.423174709300443e-05, + "loss": 0.6688, + "step": 21997 + }, + { + "epoch": 0.3801147359690351, + "grad_norm": 1.4821866839452063, + "learning_rate": 1.4231240016979742e-05, + "loss": 0.4481, + "step": 21998 + }, + { + "epoch": 0.380132015482444, + "grad_norm": 0.5843474417304655, + "learning_rate": 1.4230732927702488e-05, + "loss": 0.4172, + "step": 21999 + }, + { + "epoch": 0.3801492949958529, + "grad_norm": 0.9504734477668964, + "learning_rate": 1.4230225825174257e-05, + "loss": 0.4949, + "step": 22000 + }, + { + "epoch": 0.38016657450926183, + "grad_norm": 0.8193584673990875, + "learning_rate": 1.4229718709396635e-05, + "loss": 0.2325, + "step": 22001 + }, + { + "epoch": 0.38018385402267074, + "grad_norm": 0.3960331612402664, + "learning_rate": 1.4229211580371218e-05, + "loss": 0.5551, + "step": 22002 + }, + { + "epoch": 0.38020113353607965, + "grad_norm": 0.5573182847995989, + "learning_rate": 1.4228704438099585e-05, + "loss": 0.3819, + "step": 22003 + }, + { + "epoch": 0.3802184130494885, + "grad_norm": 1.0486256450272233, + "learning_rate": 1.422819728258333e-05, + "loss": 0.3877, + "step": 22004 + }, + { + "epoch": 0.3802356925628974, + "grad_norm": 1.17562152106868, + "learning_rate": 1.4227690113824037e-05, + "loss": 0.4239, + "step": 22005 + }, + { + "epoch": 0.3802529720763063, + "grad_norm": 0.7253207256439189, + "learning_rate": 1.4227182931823302e-05, + "loss": 0.2455, + "step": 22006 + }, + { + "epoch": 0.3802702515897152, + "grad_norm": 1.6414705441733848, + "learning_rate": 1.4226675736582704e-05, + "loss": 0.4178, + "step": 22007 + }, + { + "epoch": 0.38028753110312413, + "grad_norm": 1.218760235014517, + "learning_rate": 1.4226168528103838e-05, + "loss": 0.6376, + "step": 22008 + }, + { + "epoch": 0.38030481061653304, + "grad_norm": 0.9633134369329641, + "learning_rate": 1.4225661306388294e-05, + "loss": 0.5058, + "step": 22009 + }, + { + "epoch": 0.38032209012994195, + "grad_norm": 0.9000496378939147, + "learning_rate": 1.4225154071437654e-05, + "loss": 0.4144, + "step": 22010 + }, + { + "epoch": 0.38033936964335086, + "grad_norm": 1.327554484713374, + "learning_rate": 1.4224646823253512e-05, + "loss": 0.5089, + "step": 22011 + }, + { + "epoch": 0.38035664915675976, + "grad_norm": 1.173729161253432, + "learning_rate": 1.4224139561837455e-05, + "loss": 0.556, + "step": 22012 + }, + { + "epoch": 0.38037392867016867, + "grad_norm": 0.7422373882970127, + "learning_rate": 1.422363228719107e-05, + "loss": 0.3464, + "step": 22013 + }, + { + "epoch": 0.3803912081835775, + "grad_norm": 0.9277802030359176, + "learning_rate": 1.4223124999315951e-05, + "loss": 0.3665, + "step": 22014 + }, + { + "epoch": 0.38040848769698643, + "grad_norm": 1.1743414016993299, + "learning_rate": 1.4222617698213681e-05, + "loss": 0.3934, + "step": 22015 + }, + { + "epoch": 0.38042576721039534, + "grad_norm": 0.8897476463199155, + "learning_rate": 1.4222110383885851e-05, + "loss": 0.5055, + "step": 22016 + }, + { + "epoch": 0.38044304672380425, + "grad_norm": 0.5335836525769467, + "learning_rate": 1.4221603056334052e-05, + "loss": 0.8205, + "step": 22017 + }, + { + "epoch": 0.38046032623721315, + "grad_norm": 0.8592119997031599, + "learning_rate": 1.4221095715559871e-05, + "loss": 0.3785, + "step": 22018 + }, + { + "epoch": 0.38047760575062206, + "grad_norm": 1.15682797160111, + "learning_rate": 1.4220588361564898e-05, + "loss": 0.4383, + "step": 22019 + }, + { + "epoch": 0.38049488526403097, + "grad_norm": 1.1064639310362336, + "learning_rate": 1.4220080994350722e-05, + "loss": 0.4214, + "step": 22020 + }, + { + "epoch": 0.3805121647774399, + "grad_norm": 1.078374460470268, + "learning_rate": 1.421957361391893e-05, + "loss": 0.4802, + "step": 22021 + }, + { + "epoch": 0.3805294442908488, + "grad_norm": 1.4200948551702834, + "learning_rate": 1.4219066220271112e-05, + "loss": 0.5265, + "step": 22022 + }, + { + "epoch": 0.3805467238042577, + "grad_norm": 1.2239555732450957, + "learning_rate": 1.421855881340886e-05, + "loss": 0.4061, + "step": 22023 + }, + { + "epoch": 0.3805640033176666, + "grad_norm": 0.9059371845296689, + "learning_rate": 1.421805139333376e-05, + "loss": 0.523, + "step": 22024 + }, + { + "epoch": 0.38058128283107545, + "grad_norm": 0.9115005418527798, + "learning_rate": 1.4217543960047402e-05, + "loss": 0.4205, + "step": 22025 + }, + { + "epoch": 0.38059856234448436, + "grad_norm": 0.45763813534423364, + "learning_rate": 1.4217036513551378e-05, + "loss": 0.6634, + "step": 22026 + }, + { + "epoch": 0.38061584185789327, + "grad_norm": 1.9070712396700882, + "learning_rate": 1.4216529053847271e-05, + "loss": 0.5338, + "step": 22027 + }, + { + "epoch": 0.3806331213713022, + "grad_norm": 1.0550664082429224, + "learning_rate": 1.4216021580936681e-05, + "loss": 0.5434, + "step": 22028 + }, + { + "epoch": 0.3806504008847111, + "grad_norm": 1.4515176745769456, + "learning_rate": 1.4215514094821186e-05, + "loss": 0.4896, + "step": 22029 + }, + { + "epoch": 0.38066768039812, + "grad_norm": 0.8192091324988772, + "learning_rate": 1.4215006595502383e-05, + "loss": 0.6374, + "step": 22030 + }, + { + "epoch": 0.3806849599115289, + "grad_norm": 1.4073411253847063, + "learning_rate": 1.421449908298186e-05, + "loss": 0.3137, + "step": 22031 + }, + { + "epoch": 0.3807022394249378, + "grad_norm": 0.9237513567757396, + "learning_rate": 1.4213991557261202e-05, + "loss": 0.4353, + "step": 22032 + }, + { + "epoch": 0.3807195189383467, + "grad_norm": 0.9325270107787711, + "learning_rate": 1.4213484018342004e-05, + "loss": 0.3885, + "step": 22033 + }, + { + "epoch": 0.3807367984517556, + "grad_norm": 1.2344281899902458, + "learning_rate": 1.4212976466225854e-05, + "loss": 0.432, + "step": 22034 + }, + { + "epoch": 0.38075407796516453, + "grad_norm": 0.4430921615126011, + "learning_rate": 1.421246890091434e-05, + "loss": 0.5612, + "step": 22035 + }, + { + "epoch": 0.3807713574785734, + "grad_norm": 1.0155895214569923, + "learning_rate": 1.4211961322409053e-05, + "loss": 0.4139, + "step": 22036 + }, + { + "epoch": 0.3807886369919823, + "grad_norm": 0.8905477150682822, + "learning_rate": 1.4211453730711585e-05, + "loss": 0.349, + "step": 22037 + }, + { + "epoch": 0.3808059165053912, + "grad_norm": 0.7348508678963688, + "learning_rate": 1.4210946125823524e-05, + "loss": 0.5357, + "step": 22038 + }, + { + "epoch": 0.3808231960188001, + "grad_norm": 1.0726729411555234, + "learning_rate": 1.4210438507746457e-05, + "loss": 0.5268, + "step": 22039 + }, + { + "epoch": 0.380840475532209, + "grad_norm": 0.8269741762480202, + "learning_rate": 1.420993087648198e-05, + "loss": 0.3575, + "step": 22040 + }, + { + "epoch": 0.3808577550456179, + "grad_norm": 1.2180873896820479, + "learning_rate": 1.4209423232031678e-05, + "loss": 0.554, + "step": 22041 + }, + { + "epoch": 0.3808750345590268, + "grad_norm": 0.566502109781232, + "learning_rate": 1.4208915574397145e-05, + "loss": 0.3457, + "step": 22042 + }, + { + "epoch": 0.38089231407243573, + "grad_norm": 1.1465695261263256, + "learning_rate": 1.4208407903579966e-05, + "loss": 0.4678, + "step": 22043 + }, + { + "epoch": 0.38090959358584464, + "grad_norm": 0.7730712465451344, + "learning_rate": 1.4207900219581731e-05, + "loss": 0.5945, + "step": 22044 + }, + { + "epoch": 0.38092687309925355, + "grad_norm": 1.2682449330084813, + "learning_rate": 1.4207392522404035e-05, + "loss": 0.3587, + "step": 22045 + }, + { + "epoch": 0.3809441526126624, + "grad_norm": 0.409894960718116, + "learning_rate": 1.420688481204847e-05, + "loss": 0.2452, + "step": 22046 + }, + { + "epoch": 0.3809614321260713, + "grad_norm": 1.4987498431426165, + "learning_rate": 1.4206377088516616e-05, + "loss": 0.542, + "step": 22047 + }, + { + "epoch": 0.3809787116394802, + "grad_norm": 1.1022123721043953, + "learning_rate": 1.420586935181007e-05, + "loss": 0.5644, + "step": 22048 + }, + { + "epoch": 0.3809959911528891, + "grad_norm": 0.6297455582221705, + "learning_rate": 1.4205361601930425e-05, + "loss": 0.4328, + "step": 22049 + }, + { + "epoch": 0.38101327066629803, + "grad_norm": 1.126734817190323, + "learning_rate": 1.4204853838879264e-05, + "loss": 0.6252, + "step": 22050 + }, + { + "epoch": 0.38103055017970694, + "grad_norm": 0.6626190621869993, + "learning_rate": 1.4204346062658188e-05, + "loss": 0.3205, + "step": 22051 + }, + { + "epoch": 0.38104782969311585, + "grad_norm": 0.9033584377547723, + "learning_rate": 1.4203838273268775e-05, + "loss": 0.5351, + "step": 22052 + }, + { + "epoch": 0.38106510920652475, + "grad_norm": 1.3453864328959604, + "learning_rate": 1.4203330470712621e-05, + "loss": 0.6345, + "step": 22053 + }, + { + "epoch": 0.38108238871993366, + "grad_norm": 0.7921942739163018, + "learning_rate": 1.4202822654991319e-05, + "loss": 0.5716, + "step": 22054 + }, + { + "epoch": 0.38109966823334257, + "grad_norm": 6.300829827764174, + "learning_rate": 1.4202314826106456e-05, + "loss": 0.4396, + "step": 22055 + }, + { + "epoch": 0.3811169477467515, + "grad_norm": 1.349785104960788, + "learning_rate": 1.420180698405962e-05, + "loss": 0.674, + "step": 22056 + }, + { + "epoch": 0.38113422726016033, + "grad_norm": 0.7599921984247917, + "learning_rate": 1.420129912885241e-05, + "loss": 0.4583, + "step": 22057 + }, + { + "epoch": 0.38115150677356924, + "grad_norm": 0.8919314369941518, + "learning_rate": 1.4200791260486411e-05, + "loss": 0.6421, + "step": 22058 + }, + { + "epoch": 0.38116878628697815, + "grad_norm": 0.9686965927334953, + "learning_rate": 1.4200283378963212e-05, + "loss": 0.4597, + "step": 22059 + }, + { + "epoch": 0.38118606580038705, + "grad_norm": 1.0480396689786544, + "learning_rate": 1.4199775484284409e-05, + "loss": 0.3507, + "step": 22060 + }, + { + "epoch": 0.38120334531379596, + "grad_norm": 1.2497378078818837, + "learning_rate": 1.419926757645159e-05, + "loss": 0.7756, + "step": 22061 + }, + { + "epoch": 0.38122062482720487, + "grad_norm": 0.5268312525386826, + "learning_rate": 1.4198759655466346e-05, + "loss": 0.5707, + "step": 22062 + }, + { + "epoch": 0.3812379043406138, + "grad_norm": 0.914292313990575, + "learning_rate": 1.4198251721330265e-05, + "loss": 0.3601, + "step": 22063 + }, + { + "epoch": 0.3812551838540227, + "grad_norm": 1.150652123997643, + "learning_rate": 1.419774377404494e-05, + "loss": 0.6492, + "step": 22064 + }, + { + "epoch": 0.3812724633674316, + "grad_norm": 1.0287986553488293, + "learning_rate": 1.4197235813611967e-05, + "loss": 0.3881, + "step": 22065 + }, + { + "epoch": 0.3812897428808405, + "grad_norm": 0.9092748015483416, + "learning_rate": 1.419672784003293e-05, + "loss": 0.3306, + "step": 22066 + }, + { + "epoch": 0.38130702239424935, + "grad_norm": 0.8862890230693703, + "learning_rate": 1.419621985330942e-05, + "loss": 0.5734, + "step": 22067 + }, + { + "epoch": 0.38132430190765826, + "grad_norm": 0.7366746705388116, + "learning_rate": 1.4195711853443035e-05, + "loss": 0.3942, + "step": 22068 + }, + { + "epoch": 0.38134158142106717, + "grad_norm": 0.9175745498312038, + "learning_rate": 1.4195203840435361e-05, + "loss": 0.4055, + "step": 22069 + }, + { + "epoch": 0.3813588609344761, + "grad_norm": 0.9207175795315514, + "learning_rate": 1.4194695814287986e-05, + "loss": 0.434, + "step": 22070 + }, + { + "epoch": 0.381376140447885, + "grad_norm": 1.6004304863949763, + "learning_rate": 1.419418777500251e-05, + "loss": 0.579, + "step": 22071 + }, + { + "epoch": 0.3813934199612939, + "grad_norm": 0.7552134601069419, + "learning_rate": 1.4193679722580517e-05, + "loss": 0.4808, + "step": 22072 + }, + { + "epoch": 0.3814106994747028, + "grad_norm": 1.097104054343843, + "learning_rate": 1.4193171657023598e-05, + "loss": 0.3422, + "step": 22073 + }, + { + "epoch": 0.3814279789881117, + "grad_norm": 0.8866228059033234, + "learning_rate": 1.419266357833335e-05, + "loss": 0.3632, + "step": 22074 + }, + { + "epoch": 0.3814452585015206, + "grad_norm": 0.9809861296000357, + "learning_rate": 1.4192155486511359e-05, + "loss": 0.5193, + "step": 22075 + }, + { + "epoch": 0.3814625380149295, + "grad_norm": 2.267716757814494, + "learning_rate": 1.4191647381559217e-05, + "loss": 0.5258, + "step": 22076 + }, + { + "epoch": 0.3814798175283384, + "grad_norm": 1.5743849398272634, + "learning_rate": 1.419113926347852e-05, + "loss": 0.3763, + "step": 22077 + }, + { + "epoch": 0.3814970970417473, + "grad_norm": 1.159145829058281, + "learning_rate": 1.4190631132270855e-05, + "loss": 0.406, + "step": 22078 + }, + { + "epoch": 0.3815143765551562, + "grad_norm": 1.2279277417106806, + "learning_rate": 1.4190122987937813e-05, + "loss": 0.596, + "step": 22079 + }, + { + "epoch": 0.3815316560685651, + "grad_norm": 0.522526405894555, + "learning_rate": 1.418961483048099e-05, + "loss": 0.7623, + "step": 22080 + }, + { + "epoch": 0.381548935581974, + "grad_norm": 0.967233881782153, + "learning_rate": 1.4189106659901977e-05, + "loss": 0.6529, + "step": 22081 + }, + { + "epoch": 0.3815662150953829, + "grad_norm": 0.971328240164795, + "learning_rate": 1.418859847620236e-05, + "loss": 0.7522, + "step": 22082 + }, + { + "epoch": 0.3815834946087918, + "grad_norm": 1.1406311656678756, + "learning_rate": 1.4188090279383734e-05, + "loss": 0.5367, + "step": 22083 + }, + { + "epoch": 0.3816007741222007, + "grad_norm": 1.1544910653834897, + "learning_rate": 1.4187582069447691e-05, + "loss": 0.3619, + "step": 22084 + }, + { + "epoch": 0.38161805363560963, + "grad_norm": 0.7630313699642794, + "learning_rate": 1.4187073846395824e-05, + "loss": 0.5388, + "step": 22085 + }, + { + "epoch": 0.38163533314901854, + "grad_norm": 0.8683275352584074, + "learning_rate": 1.4186565610229725e-05, + "loss": 0.4802, + "step": 22086 + }, + { + "epoch": 0.38165261266242745, + "grad_norm": 0.7689804818734316, + "learning_rate": 1.4186057360950982e-05, + "loss": 0.4825, + "step": 22087 + }, + { + "epoch": 0.38166989217583636, + "grad_norm": 1.2930891823969806, + "learning_rate": 1.4185549098561188e-05, + "loss": 0.331, + "step": 22088 + }, + { + "epoch": 0.3816871716892452, + "grad_norm": 1.0215191477292531, + "learning_rate": 1.418504082306194e-05, + "loss": 0.6674, + "step": 22089 + }, + { + "epoch": 0.3817044512026541, + "grad_norm": 1.130454872032005, + "learning_rate": 1.4184532534454825e-05, + "loss": 0.3745, + "step": 22090 + }, + { + "epoch": 0.381721730716063, + "grad_norm": 0.7288586028173487, + "learning_rate": 1.4184024232741434e-05, + "loss": 0.4884, + "step": 22091 + }, + { + "epoch": 0.38173901022947193, + "grad_norm": 1.3763824905595532, + "learning_rate": 1.4183515917923366e-05, + "loss": 0.593, + "step": 22092 + }, + { + "epoch": 0.38175628974288084, + "grad_norm": 0.6584936281322454, + "learning_rate": 1.4183007590002202e-05, + "loss": 0.2816, + "step": 22093 + }, + { + "epoch": 0.38177356925628975, + "grad_norm": 0.7010726572542502, + "learning_rate": 1.4182499248979542e-05, + "loss": 0.4749, + "step": 22094 + }, + { + "epoch": 0.38179084876969865, + "grad_norm": 0.3823381920632606, + "learning_rate": 1.4181990894856978e-05, + "loss": 0.5813, + "step": 22095 + }, + { + "epoch": 0.38180812828310756, + "grad_norm": 0.8278575845199154, + "learning_rate": 1.4181482527636098e-05, + "loss": 0.3507, + "step": 22096 + }, + { + "epoch": 0.38182540779651647, + "grad_norm": 1.1881333292679641, + "learning_rate": 1.4180974147318502e-05, + "loss": 0.5056, + "step": 22097 + }, + { + "epoch": 0.3818426873099254, + "grad_norm": 0.7518501544998847, + "learning_rate": 1.4180465753905774e-05, + "loss": 0.5595, + "step": 22098 + }, + { + "epoch": 0.38185996682333423, + "grad_norm": 0.5799792676181336, + "learning_rate": 1.4179957347399508e-05, + "loss": 0.5227, + "step": 22099 + }, + { + "epoch": 0.38187724633674314, + "grad_norm": 0.8827919198490037, + "learning_rate": 1.4179448927801301e-05, + "loss": 0.4946, + "step": 22100 + }, + { + "epoch": 0.38189452585015204, + "grad_norm": 1.3478753533775563, + "learning_rate": 1.417894049511274e-05, + "loss": 0.635, + "step": 22101 + }, + { + "epoch": 0.38191180536356095, + "grad_norm": 1.437734613842301, + "learning_rate": 1.4178432049335423e-05, + "loss": 0.4455, + "step": 22102 + }, + { + "epoch": 0.38192908487696986, + "grad_norm": 0.886607428475342, + "learning_rate": 1.4177923590470936e-05, + "loss": 0.5077, + "step": 22103 + }, + { + "epoch": 0.38194636439037877, + "grad_norm": 1.0708357570972953, + "learning_rate": 1.4177415118520874e-05, + "loss": 0.4608, + "step": 22104 + }, + { + "epoch": 0.3819636439037877, + "grad_norm": 1.6012390593637795, + "learning_rate": 1.4176906633486833e-05, + "loss": 0.5097, + "step": 22105 + }, + { + "epoch": 0.3819809234171966, + "grad_norm": 0.793626834682011, + "learning_rate": 1.4176398135370403e-05, + "loss": 0.5123, + "step": 22106 + }, + { + "epoch": 0.3819982029306055, + "grad_norm": 1.0868766355285637, + "learning_rate": 1.4175889624173176e-05, + "loss": 0.368, + "step": 22107 + }, + { + "epoch": 0.3820154824440144, + "grad_norm": 1.099623484169508, + "learning_rate": 1.4175381099896746e-05, + "loss": 0.6379, + "step": 22108 + }, + { + "epoch": 0.3820327619574233, + "grad_norm": 0.8014262575197696, + "learning_rate": 1.4174872562542702e-05, + "loss": 0.5231, + "step": 22109 + }, + { + "epoch": 0.38205004147083216, + "grad_norm": 1.0565325910757577, + "learning_rate": 1.4174364012112643e-05, + "loss": 0.6129, + "step": 22110 + }, + { + "epoch": 0.38206732098424107, + "grad_norm": 0.9708734141040052, + "learning_rate": 1.4173855448608158e-05, + "loss": 0.4607, + "step": 22111 + }, + { + "epoch": 0.38208460049765, + "grad_norm": 1.5358789400031267, + "learning_rate": 1.4173346872030842e-05, + "loss": 0.5055, + "step": 22112 + }, + { + "epoch": 0.3821018800110589, + "grad_norm": 1.1766769190240842, + "learning_rate": 1.4172838282382284e-05, + "loss": 0.472, + "step": 22113 + }, + { + "epoch": 0.3821191595244678, + "grad_norm": 1.3286903072282386, + "learning_rate": 1.4172329679664081e-05, + "loss": 0.5115, + "step": 22114 + }, + { + "epoch": 0.3821364390378767, + "grad_norm": 0.9272600403000886, + "learning_rate": 1.4171821063877824e-05, + "loss": 0.4988, + "step": 22115 + }, + { + "epoch": 0.3821537185512856, + "grad_norm": 0.8331804595831769, + "learning_rate": 1.4171312435025104e-05, + "loss": 0.4626, + "step": 22116 + }, + { + "epoch": 0.3821709980646945, + "grad_norm": 0.8074683679432417, + "learning_rate": 1.417080379310752e-05, + "loss": 0.512, + "step": 22117 + }, + { + "epoch": 0.3821882775781034, + "grad_norm": 0.8076033824921623, + "learning_rate": 1.4170295138126659e-05, + "loss": 0.4727, + "step": 22118 + }, + { + "epoch": 0.3822055570915123, + "grad_norm": 2.4389245555181023, + "learning_rate": 1.4169786470084117e-05, + "loss": 0.5585, + "step": 22119 + }, + { + "epoch": 0.3822228366049212, + "grad_norm": 0.928295071532362, + "learning_rate": 1.4169277788981489e-05, + "loss": 0.4669, + "step": 22120 + }, + { + "epoch": 0.3822401161183301, + "grad_norm": 0.8715515566366302, + "learning_rate": 1.4168769094820362e-05, + "loss": 0.4684, + "step": 22121 + }, + { + "epoch": 0.382257395631739, + "grad_norm": 0.415363046703092, + "learning_rate": 1.4168260387602338e-05, + "loss": 0.7685, + "step": 22122 + }, + { + "epoch": 0.3822746751451479, + "grad_norm": 0.8763856886315177, + "learning_rate": 1.4167751667329003e-05, + "loss": 0.5248, + "step": 22123 + }, + { + "epoch": 0.3822919546585568, + "grad_norm": 1.1320231955151585, + "learning_rate": 1.4167242934001954e-05, + "loss": 0.4472, + "step": 22124 + }, + { + "epoch": 0.3823092341719657, + "grad_norm": 0.9662822864706211, + "learning_rate": 1.4166734187622784e-05, + "loss": 0.4545, + "step": 22125 + }, + { + "epoch": 0.3823265136853746, + "grad_norm": 0.8627203531328809, + "learning_rate": 1.4166225428193085e-05, + "loss": 0.5143, + "step": 22126 + }, + { + "epoch": 0.38234379319878353, + "grad_norm": 1.4351681110293244, + "learning_rate": 1.416571665571445e-05, + "loss": 0.4239, + "step": 22127 + }, + { + "epoch": 0.38236107271219244, + "grad_norm": 1.4486322489203503, + "learning_rate": 1.4165207870188475e-05, + "loss": 0.524, + "step": 22128 + }, + { + "epoch": 0.38237835222560135, + "grad_norm": 0.8779590749498236, + "learning_rate": 1.4164699071616755e-05, + "loss": 0.5372, + "step": 22129 + }, + { + "epoch": 0.38239563173901026, + "grad_norm": 0.8468031859380498, + "learning_rate": 1.4164190260000876e-05, + "loss": 0.4808, + "step": 22130 + }, + { + "epoch": 0.3824129112524191, + "grad_norm": 1.7098849955203743, + "learning_rate": 1.416368143534244e-05, + "loss": 0.4956, + "step": 22131 + }, + { + "epoch": 0.382430190765828, + "grad_norm": 1.1223798936041482, + "learning_rate": 1.4163172597643038e-05, + "loss": 0.6791, + "step": 22132 + }, + { + "epoch": 0.3824474702792369, + "grad_norm": 1.0204004632956958, + "learning_rate": 1.4162663746904262e-05, + "loss": 0.5202, + "step": 22133 + }, + { + "epoch": 0.38246474979264583, + "grad_norm": 1.0763671520533165, + "learning_rate": 1.4162154883127705e-05, + "loss": 0.3719, + "step": 22134 + }, + { + "epoch": 0.38248202930605474, + "grad_norm": 0.8041536547223949, + "learning_rate": 1.4161646006314964e-05, + "loss": 0.5343, + "step": 22135 + }, + { + "epoch": 0.38249930881946365, + "grad_norm": 0.6705611630833876, + "learning_rate": 1.4161137116467632e-05, + "loss": 0.4107, + "step": 22136 + }, + { + "epoch": 0.38251658833287255, + "grad_norm": 1.0105689429563929, + "learning_rate": 1.4160628213587302e-05, + "loss": 0.6333, + "step": 22137 + }, + { + "epoch": 0.38253386784628146, + "grad_norm": 1.0648166901817702, + "learning_rate": 1.4160119297675567e-05, + "loss": 0.4377, + "step": 22138 + }, + { + "epoch": 0.38255114735969037, + "grad_norm": 1.1464725677349554, + "learning_rate": 1.4159610368734023e-05, + "loss": 0.3935, + "step": 22139 + }, + { + "epoch": 0.3825684268730993, + "grad_norm": 0.9350823250497791, + "learning_rate": 1.4159101426764263e-05, + "loss": 0.6208, + "step": 22140 + }, + { + "epoch": 0.38258570638650813, + "grad_norm": 1.5420036621040436, + "learning_rate": 1.4158592471767882e-05, + "loss": 0.569, + "step": 22141 + }, + { + "epoch": 0.38260298589991704, + "grad_norm": 0.6685979094717652, + "learning_rate": 1.4158083503746472e-05, + "loss": 0.5178, + "step": 22142 + }, + { + "epoch": 0.38262026541332594, + "grad_norm": 0.6926178068130185, + "learning_rate": 1.4157574522701632e-05, + "loss": 0.3882, + "step": 22143 + }, + { + "epoch": 0.38263754492673485, + "grad_norm": 1.1320780733447293, + "learning_rate": 1.4157065528634948e-05, + "loss": 0.5052, + "step": 22144 + }, + { + "epoch": 0.38265482444014376, + "grad_norm": 1.0505152133510458, + "learning_rate": 1.4156556521548022e-05, + "loss": 0.5092, + "step": 22145 + }, + { + "epoch": 0.38267210395355267, + "grad_norm": 1.2361290647910244, + "learning_rate": 1.4156047501442444e-05, + "loss": 0.4784, + "step": 22146 + }, + { + "epoch": 0.3826893834669616, + "grad_norm": 0.9804437124486451, + "learning_rate": 1.415553846831981e-05, + "loss": 0.4468, + "step": 22147 + }, + { + "epoch": 0.3827066629803705, + "grad_norm": 1.443097162655672, + "learning_rate": 1.4155029422181712e-05, + "loss": 0.394, + "step": 22148 + }, + { + "epoch": 0.3827239424937794, + "grad_norm": 0.8962090214442439, + "learning_rate": 1.4154520363029747e-05, + "loss": 0.4921, + "step": 22149 + }, + { + "epoch": 0.3827412220071883, + "grad_norm": 0.7480599191137959, + "learning_rate": 1.4154011290865509e-05, + "loss": 0.4477, + "step": 22150 + }, + { + "epoch": 0.3827585015205972, + "grad_norm": 1.5474629183726456, + "learning_rate": 1.4153502205690592e-05, + "loss": 0.4183, + "step": 22151 + }, + { + "epoch": 0.38277578103400606, + "grad_norm": 1.3345179306449129, + "learning_rate": 1.415299310750659e-05, + "loss": 0.5296, + "step": 22152 + }, + { + "epoch": 0.38279306054741497, + "grad_norm": 0.7593162120720515, + "learning_rate": 1.4152483996315098e-05, + "loss": 0.4712, + "step": 22153 + }, + { + "epoch": 0.3828103400608239, + "grad_norm": 0.7157794003494974, + "learning_rate": 1.4151974872117711e-05, + "loss": 0.269, + "step": 22154 + }, + { + "epoch": 0.3828276195742328, + "grad_norm": 1.0232523815835701, + "learning_rate": 1.4151465734916023e-05, + "loss": 0.5088, + "step": 22155 + }, + { + "epoch": 0.3828448990876417, + "grad_norm": 0.8495390831199199, + "learning_rate": 1.4150956584711628e-05, + "loss": 0.489, + "step": 22156 + }, + { + "epoch": 0.3828621786010506, + "grad_norm": 0.8937345548722168, + "learning_rate": 1.4150447421506122e-05, + "loss": 0.5054, + "step": 22157 + }, + { + "epoch": 0.3828794581144595, + "grad_norm": 0.8058711378719686, + "learning_rate": 1.41499382453011e-05, + "loss": 0.4904, + "step": 22158 + }, + { + "epoch": 0.3828967376278684, + "grad_norm": 0.6530130297354604, + "learning_rate": 1.4149429056098153e-05, + "loss": 0.2544, + "step": 22159 + }, + { + "epoch": 0.3829140171412773, + "grad_norm": 0.9241295511730467, + "learning_rate": 1.4148919853898884e-05, + "loss": 0.4668, + "step": 22160 + }, + { + "epoch": 0.3829312966546862, + "grad_norm": 0.8673730180596604, + "learning_rate": 1.414841063870488e-05, + "loss": 0.3663, + "step": 22161 + }, + { + "epoch": 0.38294857616809513, + "grad_norm": 0.5135704048999405, + "learning_rate": 1.4147901410517735e-05, + "loss": 0.6233, + "step": 22162 + }, + { + "epoch": 0.382965855681504, + "grad_norm": 1.2435319397145386, + "learning_rate": 1.4147392169339051e-05, + "loss": 0.5905, + "step": 22163 + }, + { + "epoch": 0.3829831351949129, + "grad_norm": 0.8355403236579918, + "learning_rate": 1.4146882915170421e-05, + "loss": 0.2929, + "step": 22164 + }, + { + "epoch": 0.3830004147083218, + "grad_norm": 0.6015024424867197, + "learning_rate": 1.4146373648013439e-05, + "loss": 0.4961, + "step": 22165 + }, + { + "epoch": 0.3830176942217307, + "grad_norm": 0.7765791365369862, + "learning_rate": 1.4145864367869697e-05, + "loss": 0.3889, + "step": 22166 + }, + { + "epoch": 0.3830349737351396, + "grad_norm": 1.010720219547029, + "learning_rate": 1.4145355074740792e-05, + "loss": 0.3814, + "step": 22167 + }, + { + "epoch": 0.3830522532485485, + "grad_norm": 0.9118104550579944, + "learning_rate": 1.4144845768628322e-05, + "loss": 0.6363, + "step": 22168 + }, + { + "epoch": 0.38306953276195743, + "grad_norm": 0.9904836384399811, + "learning_rate": 1.414433644953388e-05, + "loss": 0.3992, + "step": 22169 + }, + { + "epoch": 0.38308681227536634, + "grad_norm": 2.453144559559014, + "learning_rate": 1.4143827117459059e-05, + "loss": 0.4545, + "step": 22170 + }, + { + "epoch": 0.38310409178877525, + "grad_norm": 1.4432720244538215, + "learning_rate": 1.414331777240546e-05, + "loss": 0.5501, + "step": 22171 + }, + { + "epoch": 0.38312137130218415, + "grad_norm": 0.7745509502604939, + "learning_rate": 1.4142808414374674e-05, + "loss": 0.4887, + "step": 22172 + }, + { + "epoch": 0.383138650815593, + "grad_norm": 1.2645028998184202, + "learning_rate": 1.4142299043368298e-05, + "loss": 0.4306, + "step": 22173 + }, + { + "epoch": 0.3831559303290019, + "grad_norm": 1.2624772009272922, + "learning_rate": 1.4141789659387923e-05, + "loss": 0.3911, + "step": 22174 + }, + { + "epoch": 0.3831732098424108, + "grad_norm": 0.8297241813414808, + "learning_rate": 1.414128026243515e-05, + "loss": 0.2989, + "step": 22175 + }, + { + "epoch": 0.38319048935581973, + "grad_norm": 0.8176093440294046, + "learning_rate": 1.4140770852511571e-05, + "loss": 0.4785, + "step": 22176 + }, + { + "epoch": 0.38320776886922864, + "grad_norm": 1.3433448691793326, + "learning_rate": 1.4140261429618786e-05, + "loss": 0.479, + "step": 22177 + }, + { + "epoch": 0.38322504838263755, + "grad_norm": 1.5334276367156652, + "learning_rate": 1.4139751993758385e-05, + "loss": 0.6528, + "step": 22178 + }, + { + "epoch": 0.38324232789604645, + "grad_norm": 1.4628396355359097, + "learning_rate": 1.4139242544931965e-05, + "loss": 0.4096, + "step": 22179 + }, + { + "epoch": 0.38325960740945536, + "grad_norm": 1.107749843471472, + "learning_rate": 1.4138733083141128e-05, + "loss": 0.4325, + "step": 22180 + }, + { + "epoch": 0.38327688692286427, + "grad_norm": 0.8478499816645144, + "learning_rate": 1.4138223608387463e-05, + "loss": 0.5847, + "step": 22181 + }, + { + "epoch": 0.3832941664362732, + "grad_norm": 1.0353997235742727, + "learning_rate": 1.413771412067256e-05, + "loss": 0.4558, + "step": 22182 + }, + { + "epoch": 0.3833114459496821, + "grad_norm": 1.1738890718331803, + "learning_rate": 1.413720461999803e-05, + "loss": 0.6052, + "step": 22183 + }, + { + "epoch": 0.38332872546309094, + "grad_norm": 1.7094204760230969, + "learning_rate": 1.4136695106365459e-05, + "loss": 0.3993, + "step": 22184 + }, + { + "epoch": 0.38334600497649984, + "grad_norm": 1.0585280052986201, + "learning_rate": 1.4136185579776445e-05, + "loss": 0.5758, + "step": 22185 + }, + { + "epoch": 0.38336328448990875, + "grad_norm": 1.227417717398011, + "learning_rate": 1.4135676040232583e-05, + "loss": 0.4977, + "step": 22186 + }, + { + "epoch": 0.38338056400331766, + "grad_norm": 1.2436108632976792, + "learning_rate": 1.4135166487735466e-05, + "loss": 0.4833, + "step": 22187 + }, + { + "epoch": 0.38339784351672657, + "grad_norm": 0.5267329240446066, + "learning_rate": 1.4134656922286695e-05, + "loss": 0.2406, + "step": 22188 + }, + { + "epoch": 0.3834151230301355, + "grad_norm": 0.8728009132722594, + "learning_rate": 1.4134147343887868e-05, + "loss": 0.5228, + "step": 22189 + }, + { + "epoch": 0.3834324025435444, + "grad_norm": 0.8772895252180469, + "learning_rate": 1.4133637752540572e-05, + "loss": 0.4595, + "step": 22190 + }, + { + "epoch": 0.3834496820569533, + "grad_norm": 0.7305513594998251, + "learning_rate": 1.413312814824641e-05, + "loss": 0.3287, + "step": 22191 + }, + { + "epoch": 0.3834669615703622, + "grad_norm": 0.9485084835274801, + "learning_rate": 1.4132618531006979e-05, + "loss": 0.3274, + "step": 22192 + }, + { + "epoch": 0.3834842410837711, + "grad_norm": 1.0243437722286848, + "learning_rate": 1.4132108900823871e-05, + "loss": 0.5796, + "step": 22193 + }, + { + "epoch": 0.38350152059717996, + "grad_norm": 0.7866126925644853, + "learning_rate": 1.4131599257698682e-05, + "loss": 0.3525, + "step": 22194 + }, + { + "epoch": 0.38351880011058886, + "grad_norm": 0.8800902778412937, + "learning_rate": 1.4131089601633014e-05, + "loss": 0.44, + "step": 22195 + }, + { + "epoch": 0.38353607962399777, + "grad_norm": 0.9220710611351652, + "learning_rate": 1.4130579932628456e-05, + "loss": 0.363, + "step": 22196 + }, + { + "epoch": 0.3835533591374067, + "grad_norm": 1.358802944913982, + "learning_rate": 1.4130070250686612e-05, + "loss": 0.5558, + "step": 22197 + }, + { + "epoch": 0.3835706386508156, + "grad_norm": 0.844218016883379, + "learning_rate": 1.412956055580907e-05, + "loss": 0.5778, + "step": 22198 + }, + { + "epoch": 0.3835879181642245, + "grad_norm": 2.181595590685259, + "learning_rate": 1.4129050847997431e-05, + "loss": 0.5772, + "step": 22199 + }, + { + "epoch": 0.3836051976776334, + "grad_norm": 0.7812653022240984, + "learning_rate": 1.4128541127253293e-05, + "loss": 0.5374, + "step": 22200 + }, + { + "epoch": 0.3836224771910423, + "grad_norm": 0.7345463753225515, + "learning_rate": 1.4128031393578249e-05, + "loss": 0.2766, + "step": 22201 + }, + { + "epoch": 0.3836397567044512, + "grad_norm": 1.0551155926102675, + "learning_rate": 1.4127521646973896e-05, + "loss": 0.3845, + "step": 22202 + }, + { + "epoch": 0.3836570362178601, + "grad_norm": 1.0830344874211986, + "learning_rate": 1.4127011887441832e-05, + "loss": 0.52, + "step": 22203 + }, + { + "epoch": 0.38367431573126903, + "grad_norm": 1.0786755608499274, + "learning_rate": 1.4126502114983655e-05, + "loss": 0.4709, + "step": 22204 + }, + { + "epoch": 0.3836915952446779, + "grad_norm": 1.2863745332557242, + "learning_rate": 1.412599232960096e-05, + "loss": 0.8704, + "step": 22205 + }, + { + "epoch": 0.3837088747580868, + "grad_norm": 1.1312705918469192, + "learning_rate": 1.4125482531295343e-05, + "loss": 0.531, + "step": 22206 + }, + { + "epoch": 0.3837261542714957, + "grad_norm": 0.43830690648436144, + "learning_rate": 1.4124972720068397e-05, + "loss": 0.4511, + "step": 22207 + }, + { + "epoch": 0.3837434337849046, + "grad_norm": 0.413449904547738, + "learning_rate": 1.4124462895921727e-05, + "loss": 0.7849, + "step": 22208 + }, + { + "epoch": 0.3837607132983135, + "grad_norm": 1.4329448537695242, + "learning_rate": 1.4123953058856925e-05, + "loss": 0.5538, + "step": 22209 + }, + { + "epoch": 0.3837779928117224, + "grad_norm": 0.8135592622801823, + "learning_rate": 1.412344320887559e-05, + "loss": 0.2653, + "step": 22210 + }, + { + "epoch": 0.38379527232513133, + "grad_norm": 0.5594508267339151, + "learning_rate": 1.4122933345979315e-05, + "loss": 0.7655, + "step": 22211 + }, + { + "epoch": 0.38381255183854024, + "grad_norm": 0.9246679098885175, + "learning_rate": 1.41224234701697e-05, + "loss": 0.3249, + "step": 22212 + }, + { + "epoch": 0.38382983135194915, + "grad_norm": 0.956347571053753, + "learning_rate": 1.4121913581448343e-05, + "loss": 0.469, + "step": 22213 + }, + { + "epoch": 0.38384711086535805, + "grad_norm": 0.8832926088169252, + "learning_rate": 1.4121403679816836e-05, + "loss": 0.445, + "step": 22214 + }, + { + "epoch": 0.3838643903787669, + "grad_norm": 1.324154948439937, + "learning_rate": 1.4120893765276785e-05, + "loss": 0.3344, + "step": 22215 + }, + { + "epoch": 0.3838816698921758, + "grad_norm": 0.8190842151754247, + "learning_rate": 1.4120383837829776e-05, + "loss": 0.4061, + "step": 22216 + }, + { + "epoch": 0.3838989494055847, + "grad_norm": 1.7195348522529315, + "learning_rate": 1.4119873897477416e-05, + "loss": 0.7164, + "step": 22217 + }, + { + "epoch": 0.38391622891899363, + "grad_norm": 0.8844369996757587, + "learning_rate": 1.4119363944221295e-05, + "loss": 0.3033, + "step": 22218 + }, + { + "epoch": 0.38393350843240254, + "grad_norm": 1.0323614364298017, + "learning_rate": 1.4118853978063013e-05, + "loss": 0.609, + "step": 22219 + }, + { + "epoch": 0.38395078794581144, + "grad_norm": 0.5480989288782451, + "learning_rate": 1.4118343999004169e-05, + "loss": 0.5898, + "step": 22220 + }, + { + "epoch": 0.38396806745922035, + "grad_norm": 1.0291746776270683, + "learning_rate": 1.4117834007046359e-05, + "loss": 0.3495, + "step": 22221 + }, + { + "epoch": 0.38398534697262926, + "grad_norm": 0.9537292001247494, + "learning_rate": 1.4117324002191178e-05, + "loss": 0.4189, + "step": 22222 + }, + { + "epoch": 0.38400262648603817, + "grad_norm": 0.9522392637981298, + "learning_rate": 1.4116813984440225e-05, + "loss": 0.5852, + "step": 22223 + }, + { + "epoch": 0.3840199059994471, + "grad_norm": 0.9653646517875778, + "learning_rate": 1.4116303953795098e-05, + "loss": 0.3574, + "step": 22224 + }, + { + "epoch": 0.384037185512856, + "grad_norm": 0.7368546604332559, + "learning_rate": 1.4115793910257396e-05, + "loss": 0.4173, + "step": 22225 + }, + { + "epoch": 0.38405446502626484, + "grad_norm": 0.9640971727065668, + "learning_rate": 1.4115283853828717e-05, + "loss": 0.5402, + "step": 22226 + }, + { + "epoch": 0.38407174453967374, + "grad_norm": 1.064260684474866, + "learning_rate": 1.4114773784510649e-05, + "loss": 0.5765, + "step": 22227 + }, + { + "epoch": 0.38408902405308265, + "grad_norm": 0.9615623026078595, + "learning_rate": 1.4114263702304802e-05, + "loss": 0.3353, + "step": 22228 + }, + { + "epoch": 0.38410630356649156, + "grad_norm": 1.1060887452106842, + "learning_rate": 1.4113753607212768e-05, + "loss": 0.7647, + "step": 22229 + }, + { + "epoch": 0.38412358307990047, + "grad_norm": 1.4119653930176739, + "learning_rate": 1.4113243499236144e-05, + "loss": 0.5692, + "step": 22230 + }, + { + "epoch": 0.3841408625933094, + "grad_norm": 0.9066661975583962, + "learning_rate": 1.411273337837653e-05, + "loss": 0.3502, + "step": 22231 + }, + { + "epoch": 0.3841581421067183, + "grad_norm": 1.7733833898699591, + "learning_rate": 1.411222324463552e-05, + "loss": 0.4503, + "step": 22232 + }, + { + "epoch": 0.3841754216201272, + "grad_norm": 1.305643378975677, + "learning_rate": 1.4111713098014717e-05, + "loss": 0.5054, + "step": 22233 + }, + { + "epoch": 0.3841927011335361, + "grad_norm": 1.1609487667560194, + "learning_rate": 1.4111202938515711e-05, + "loss": 0.5608, + "step": 22234 + }, + { + "epoch": 0.384209980646945, + "grad_norm": 0.5671156124022096, + "learning_rate": 1.411069276614011e-05, + "loss": 0.6031, + "step": 22235 + }, + { + "epoch": 0.3842272601603539, + "grad_norm": 1.2102979637520423, + "learning_rate": 1.4110182580889507e-05, + "loss": 0.5367, + "step": 22236 + }, + { + "epoch": 0.38424453967376276, + "grad_norm": 1.0745833168154715, + "learning_rate": 1.41096723827655e-05, + "loss": 0.3551, + "step": 22237 + }, + { + "epoch": 0.38426181918717167, + "grad_norm": 0.9015007360087208, + "learning_rate": 1.4109162171769684e-05, + "loss": 0.4717, + "step": 22238 + }, + { + "epoch": 0.3842790987005806, + "grad_norm": 1.062009960104532, + "learning_rate": 1.410865194790366e-05, + "loss": 0.4983, + "step": 22239 + }, + { + "epoch": 0.3842963782139895, + "grad_norm": 0.4502011174455118, + "learning_rate": 1.4108141711169027e-05, + "loss": 0.7162, + "step": 22240 + }, + { + "epoch": 0.3843136577273984, + "grad_norm": 1.6572108644949282, + "learning_rate": 1.4107631461567384e-05, + "loss": 0.5606, + "step": 22241 + }, + { + "epoch": 0.3843309372408073, + "grad_norm": 0.9562295522003006, + "learning_rate": 1.4107121199100322e-05, + "loss": 0.468, + "step": 22242 + }, + { + "epoch": 0.3843482167542162, + "grad_norm": 1.6476170401733468, + "learning_rate": 1.4106610923769449e-05, + "loss": 0.4326, + "step": 22243 + }, + { + "epoch": 0.3843654962676251, + "grad_norm": 0.8876947328591601, + "learning_rate": 1.4106100635576356e-05, + "loss": 0.4264, + "step": 22244 + }, + { + "epoch": 0.384382775781034, + "grad_norm": 1.2900452224225338, + "learning_rate": 1.4105590334522644e-05, + "loss": 0.6455, + "step": 22245 + }, + { + "epoch": 0.38440005529444293, + "grad_norm": 0.6791438430206151, + "learning_rate": 1.4105080020609914e-05, + "loss": 0.2877, + "step": 22246 + }, + { + "epoch": 0.3844173348078518, + "grad_norm": 1.345260353062574, + "learning_rate": 1.4104569693839758e-05, + "loss": 0.6363, + "step": 22247 + }, + { + "epoch": 0.3844346143212607, + "grad_norm": 0.8478832157583535, + "learning_rate": 1.4104059354213777e-05, + "loss": 0.628, + "step": 22248 + }, + { + "epoch": 0.3844518938346696, + "grad_norm": 1.0394384093477074, + "learning_rate": 1.4103549001733574e-05, + "loss": 0.5679, + "step": 22249 + }, + { + "epoch": 0.3844691733480785, + "grad_norm": 1.6247393668541124, + "learning_rate": 1.4103038636400742e-05, + "loss": 0.5347, + "step": 22250 + }, + { + "epoch": 0.3844864528614874, + "grad_norm": 1.0104918102661087, + "learning_rate": 1.4102528258216878e-05, + "loss": 0.4857, + "step": 22251 + }, + { + "epoch": 0.3845037323748963, + "grad_norm": 1.0027385115526666, + "learning_rate": 1.4102017867183588e-05, + "loss": 0.4817, + "step": 22252 + }, + { + "epoch": 0.38452101188830523, + "grad_norm": 0.7015538635412263, + "learning_rate": 1.4101507463302465e-05, + "loss": 0.481, + "step": 22253 + }, + { + "epoch": 0.38453829140171414, + "grad_norm": 1.1360526362470973, + "learning_rate": 1.4100997046575107e-05, + "loss": 0.4995, + "step": 22254 + }, + { + "epoch": 0.38455557091512305, + "grad_norm": 1.1864066699008575, + "learning_rate": 1.4100486617003117e-05, + "loss": 0.5962, + "step": 22255 + }, + { + "epoch": 0.38457285042853195, + "grad_norm": 0.790933307153234, + "learning_rate": 1.4099976174588093e-05, + "loss": 0.5829, + "step": 22256 + }, + { + "epoch": 0.38459012994194086, + "grad_norm": 1.1159932682169937, + "learning_rate": 1.4099465719331628e-05, + "loss": 0.3575, + "step": 22257 + }, + { + "epoch": 0.3846074094553497, + "grad_norm": 1.061992532032958, + "learning_rate": 1.4098955251235327e-05, + "loss": 0.3852, + "step": 22258 + }, + { + "epoch": 0.3846246889687586, + "grad_norm": 1.4494781111194877, + "learning_rate": 1.4098444770300786e-05, + "loss": 0.4107, + "step": 22259 + }, + { + "epoch": 0.38464196848216753, + "grad_norm": 0.8789592981391815, + "learning_rate": 1.4097934276529604e-05, + "loss": 0.4027, + "step": 22260 + }, + { + "epoch": 0.38465924799557644, + "grad_norm": 0.8960285988310391, + "learning_rate": 1.4097423769923382e-05, + "loss": 0.6678, + "step": 22261 + }, + { + "epoch": 0.38467652750898534, + "grad_norm": 1.580151284571518, + "learning_rate": 1.4096913250483715e-05, + "loss": 0.526, + "step": 22262 + }, + { + "epoch": 0.38469380702239425, + "grad_norm": 0.8818924520434832, + "learning_rate": 1.4096402718212206e-05, + "loss": 0.5539, + "step": 22263 + }, + { + "epoch": 0.38471108653580316, + "grad_norm": 1.3810437522329269, + "learning_rate": 1.4095892173110453e-05, + "loss": 0.4855, + "step": 22264 + }, + { + "epoch": 0.38472836604921207, + "grad_norm": 0.9764725262608777, + "learning_rate": 1.409538161518005e-05, + "loss": 0.4454, + "step": 22265 + }, + { + "epoch": 0.384745645562621, + "grad_norm": 1.123258739229185, + "learning_rate": 1.4094871044422606e-05, + "loss": 0.4237, + "step": 22266 + }, + { + "epoch": 0.3847629250760299, + "grad_norm": 1.0853979603096324, + "learning_rate": 1.4094360460839714e-05, + "loss": 0.3971, + "step": 22267 + }, + { + "epoch": 0.38478020458943873, + "grad_norm": 1.2430705603962906, + "learning_rate": 1.4093849864432972e-05, + "loss": 0.3566, + "step": 22268 + }, + { + "epoch": 0.38479748410284764, + "grad_norm": 0.971323701795191, + "learning_rate": 1.409333925520398e-05, + "loss": 0.588, + "step": 22269 + }, + { + "epoch": 0.38481476361625655, + "grad_norm": 0.9082446815528334, + "learning_rate": 1.4092828633154343e-05, + "loss": 0.5207, + "step": 22270 + }, + { + "epoch": 0.38483204312966546, + "grad_norm": 1.1800473694114944, + "learning_rate": 1.409231799828565e-05, + "loss": 0.6044, + "step": 22271 + }, + { + "epoch": 0.38484932264307437, + "grad_norm": 0.9387022175420967, + "learning_rate": 1.4091807350599509e-05, + "loss": 0.4978, + "step": 22272 + }, + { + "epoch": 0.3848666021564833, + "grad_norm": 0.8259930087049246, + "learning_rate": 1.4091296690097518e-05, + "loss": 0.451, + "step": 22273 + }, + { + "epoch": 0.3848838816698922, + "grad_norm": 0.69531174302801, + "learning_rate": 1.409078601678127e-05, + "loss": 0.5372, + "step": 22274 + }, + { + "epoch": 0.3849011611833011, + "grad_norm": 0.8088335721957557, + "learning_rate": 1.4090275330652375e-05, + "loss": 0.5478, + "step": 22275 + }, + { + "epoch": 0.38491844069671, + "grad_norm": 0.6845444732492271, + "learning_rate": 1.4089764631712424e-05, + "loss": 0.369, + "step": 22276 + }, + { + "epoch": 0.3849357202101189, + "grad_norm": 0.8393191355940492, + "learning_rate": 1.408925391996302e-05, + "loss": 0.4486, + "step": 22277 + }, + { + "epoch": 0.3849529997235278, + "grad_norm": 0.7755926258360639, + "learning_rate": 1.4088743195405763e-05, + "loss": 0.3744, + "step": 22278 + }, + { + "epoch": 0.38497027923693666, + "grad_norm": 0.9315235949278442, + "learning_rate": 1.4088232458042247e-05, + "loss": 0.539, + "step": 22279 + }, + { + "epoch": 0.38498755875034557, + "grad_norm": 1.004910275504345, + "learning_rate": 1.408772170787408e-05, + "loss": 0.5991, + "step": 22280 + }, + { + "epoch": 0.3850048382637545, + "grad_norm": 0.7582934768676419, + "learning_rate": 1.4087210944902858e-05, + "loss": 0.5422, + "step": 22281 + }, + { + "epoch": 0.3850221177771634, + "grad_norm": 1.389886571531732, + "learning_rate": 1.4086700169130179e-05, + "loss": 0.3643, + "step": 22282 + }, + { + "epoch": 0.3850393972905723, + "grad_norm": 0.9344774464393423, + "learning_rate": 1.4086189380557645e-05, + "loss": 0.7355, + "step": 22283 + }, + { + "epoch": 0.3850566768039812, + "grad_norm": 0.8550744994446565, + "learning_rate": 1.4085678579186856e-05, + "loss": 0.37, + "step": 22284 + }, + { + "epoch": 0.3850739563173901, + "grad_norm": 0.8579423965742214, + "learning_rate": 1.4085167765019409e-05, + "loss": 0.4695, + "step": 22285 + }, + { + "epoch": 0.385091235830799, + "grad_norm": 0.41468939706520463, + "learning_rate": 1.4084656938056909e-05, + "loss": 0.4855, + "step": 22286 + }, + { + "epoch": 0.3851085153442079, + "grad_norm": 0.7588822603942991, + "learning_rate": 1.4084146098300952e-05, + "loss": 0.3782, + "step": 22287 + }, + { + "epoch": 0.38512579485761683, + "grad_norm": 0.8493039103022503, + "learning_rate": 1.408363524575314e-05, + "loss": 0.3257, + "step": 22288 + }, + { + "epoch": 0.3851430743710257, + "grad_norm": 1.378953735649914, + "learning_rate": 1.4083124380415071e-05, + "loss": 0.5173, + "step": 22289 + }, + { + "epoch": 0.3851603538844346, + "grad_norm": 1.806689162810325, + "learning_rate": 1.408261350228835e-05, + "loss": 0.4821, + "step": 22290 + }, + { + "epoch": 0.3851776333978435, + "grad_norm": 1.4261084209148738, + "learning_rate": 1.4082102611374567e-05, + "loss": 0.4601, + "step": 22291 + }, + { + "epoch": 0.3851949129112524, + "grad_norm": 0.955763609465562, + "learning_rate": 1.408159170767533e-05, + "loss": 0.524, + "step": 22292 + }, + { + "epoch": 0.3852121924246613, + "grad_norm": 0.9996307583041691, + "learning_rate": 1.4081080791192241e-05, + "loss": 0.3773, + "step": 22293 + }, + { + "epoch": 0.3852294719380702, + "grad_norm": 1.1349730788889048, + "learning_rate": 1.4080569861926892e-05, + "loss": 0.5258, + "step": 22294 + }, + { + "epoch": 0.38524675145147913, + "grad_norm": 0.9475359195385292, + "learning_rate": 1.408005891988089e-05, + "loss": 0.5134, + "step": 22295 + }, + { + "epoch": 0.38526403096488804, + "grad_norm": 0.5881844879549514, + "learning_rate": 1.4079547965055835e-05, + "loss": 0.2886, + "step": 22296 + }, + { + "epoch": 0.38528131047829695, + "grad_norm": 0.9493577315415448, + "learning_rate": 1.4079036997453324e-05, + "loss": 0.5488, + "step": 22297 + }, + { + "epoch": 0.38529858999170585, + "grad_norm": 0.7375976807420841, + "learning_rate": 1.407852601707496e-05, + "loss": 0.433, + "step": 22298 + }, + { + "epoch": 0.38531586950511476, + "grad_norm": 0.9908541395533437, + "learning_rate": 1.4078015023922342e-05, + "loss": 0.4982, + "step": 22299 + }, + { + "epoch": 0.3853331490185236, + "grad_norm": 0.6857453220607451, + "learning_rate": 1.4077504017997073e-05, + "loss": 0.6554, + "step": 22300 + }, + { + "epoch": 0.3853504285319325, + "grad_norm": 0.7255982903351682, + "learning_rate": 1.4076992999300749e-05, + "loss": 0.426, + "step": 22301 + }, + { + "epoch": 0.38536770804534143, + "grad_norm": 0.8837935980392176, + "learning_rate": 1.4076481967834971e-05, + "loss": 0.3491, + "step": 22302 + }, + { + "epoch": 0.38538498755875034, + "grad_norm": 1.0138705587634025, + "learning_rate": 1.4075970923601348e-05, + "loss": 0.5034, + "step": 22303 + }, + { + "epoch": 0.38540226707215924, + "grad_norm": 0.9827738343421919, + "learning_rate": 1.407545986660147e-05, + "loss": 0.4732, + "step": 22304 + }, + { + "epoch": 0.38541954658556815, + "grad_norm": 0.7577452543761832, + "learning_rate": 1.4074948796836942e-05, + "loss": 0.4494, + "step": 22305 + }, + { + "epoch": 0.38543682609897706, + "grad_norm": 0.5585204113502441, + "learning_rate": 1.4074437714309366e-05, + "loss": 0.7747, + "step": 22306 + }, + { + "epoch": 0.38545410561238597, + "grad_norm": 1.0224972375726649, + "learning_rate": 1.4073926619020342e-05, + "loss": 0.5752, + "step": 22307 + }, + { + "epoch": 0.3854713851257949, + "grad_norm": 1.9649803671735668, + "learning_rate": 1.407341551097147e-05, + "loss": 0.5404, + "step": 22308 + }, + { + "epoch": 0.3854886646392038, + "grad_norm": 1.015045195089302, + "learning_rate": 1.4072904390164351e-05, + "loss": 0.5221, + "step": 22309 + }, + { + "epoch": 0.3855059441526127, + "grad_norm": 1.2406038864685838, + "learning_rate": 1.4072393256600585e-05, + "loss": 0.4768, + "step": 22310 + }, + { + "epoch": 0.38552322366602154, + "grad_norm": 0.9975253691489127, + "learning_rate": 1.4071882110281771e-05, + "loss": 0.4225, + "step": 22311 + }, + { + "epoch": 0.38554050317943045, + "grad_norm": 1.5208574055140545, + "learning_rate": 1.4071370951209518e-05, + "loss": 0.4556, + "step": 22312 + }, + { + "epoch": 0.38555778269283936, + "grad_norm": 1.5790326958969223, + "learning_rate": 1.4070859779385419e-05, + "loss": 0.3159, + "step": 22313 + }, + { + "epoch": 0.38557506220624826, + "grad_norm": 1.5288595791588173, + "learning_rate": 1.4070348594811077e-05, + "loss": 0.571, + "step": 22314 + }, + { + "epoch": 0.3855923417196572, + "grad_norm": 1.034531914833582, + "learning_rate": 1.4069837397488095e-05, + "loss": 0.3985, + "step": 22315 + }, + { + "epoch": 0.3856096212330661, + "grad_norm": 1.3206461424082714, + "learning_rate": 1.4069326187418075e-05, + "loss": 0.4265, + "step": 22316 + }, + { + "epoch": 0.385626900746475, + "grad_norm": 0.7316109815016448, + "learning_rate": 1.4068814964602613e-05, + "loss": 0.4986, + "step": 22317 + }, + { + "epoch": 0.3856441802598839, + "grad_norm": 1.0354731091875535, + "learning_rate": 1.4068303729043318e-05, + "loss": 0.5409, + "step": 22318 + }, + { + "epoch": 0.3856614597732928, + "grad_norm": 0.8341126441978202, + "learning_rate": 1.406779248074178e-05, + "loss": 0.54, + "step": 22319 + }, + { + "epoch": 0.3856787392867017, + "grad_norm": 1.1161936266419572, + "learning_rate": 1.406728121969961e-05, + "loss": 0.3666, + "step": 22320 + }, + { + "epoch": 0.38569601880011056, + "grad_norm": 1.4457196704445612, + "learning_rate": 1.4066769945918407e-05, + "loss": 0.489, + "step": 22321 + }, + { + "epoch": 0.38571329831351947, + "grad_norm": 0.67717357401094, + "learning_rate": 1.406625865939977e-05, + "loss": 0.5094, + "step": 22322 + }, + { + "epoch": 0.3857305778269284, + "grad_norm": 0.454536534936579, + "learning_rate": 1.4065747360145304e-05, + "loss": 0.766, + "step": 22323 + }, + { + "epoch": 0.3857478573403373, + "grad_norm": 0.9275363448880161, + "learning_rate": 1.4065236048156605e-05, + "loss": 0.3959, + "step": 22324 + }, + { + "epoch": 0.3857651368537462, + "grad_norm": 0.7886841554107655, + "learning_rate": 1.4064724723435279e-05, + "loss": 0.4131, + "step": 22325 + }, + { + "epoch": 0.3857824163671551, + "grad_norm": 0.8943056621470612, + "learning_rate": 1.4064213385982925e-05, + "loss": 0.6237, + "step": 22326 + }, + { + "epoch": 0.385799695880564, + "grad_norm": 0.7827320748165201, + "learning_rate": 1.406370203580115e-05, + "loss": 0.3937, + "step": 22327 + }, + { + "epoch": 0.3858169753939729, + "grad_norm": 1.557295848879218, + "learning_rate": 1.4063190672891547e-05, + "loss": 0.5913, + "step": 22328 + }, + { + "epoch": 0.3858342549073818, + "grad_norm": 0.7517209914005751, + "learning_rate": 1.4062679297255726e-05, + "loss": 0.412, + "step": 22329 + }, + { + "epoch": 0.38585153442079073, + "grad_norm": 0.7008576783256765, + "learning_rate": 1.4062167908895284e-05, + "loss": 0.4788, + "step": 22330 + }, + { + "epoch": 0.38586881393419964, + "grad_norm": 0.7180412854287571, + "learning_rate": 1.4061656507811821e-05, + "loss": 0.3163, + "step": 22331 + }, + { + "epoch": 0.3858860934476085, + "grad_norm": 0.4934418685180517, + "learning_rate": 1.4061145094006943e-05, + "loss": 0.2556, + "step": 22332 + }, + { + "epoch": 0.3859033729610174, + "grad_norm": 0.6200032084067795, + "learning_rate": 1.4060633667482248e-05, + "loss": 0.3848, + "step": 22333 + }, + { + "epoch": 0.3859206524744263, + "grad_norm": 0.6723651445005365, + "learning_rate": 1.406012222823934e-05, + "loss": 0.3878, + "step": 22334 + }, + { + "epoch": 0.3859379319878352, + "grad_norm": 1.1439743158234708, + "learning_rate": 1.4059610776279822e-05, + "loss": 0.4855, + "step": 22335 + }, + { + "epoch": 0.3859552115012441, + "grad_norm": 0.7753930606304451, + "learning_rate": 1.4059099311605296e-05, + "loss": 0.4607, + "step": 22336 + }, + { + "epoch": 0.38597249101465303, + "grad_norm": 0.6178178092252238, + "learning_rate": 1.4058587834217356e-05, + "loss": 0.4159, + "step": 22337 + }, + { + "epoch": 0.38598977052806194, + "grad_norm": 1.1019638577692767, + "learning_rate": 1.4058076344117617e-05, + "loss": 0.6175, + "step": 22338 + }, + { + "epoch": 0.38600705004147084, + "grad_norm": 0.8281711922997707, + "learning_rate": 1.4057564841307674e-05, + "loss": 0.4244, + "step": 22339 + }, + { + "epoch": 0.38602432955487975, + "grad_norm": 1.052246081537782, + "learning_rate": 1.4057053325789127e-05, + "loss": 0.3937, + "step": 22340 + }, + { + "epoch": 0.38604160906828866, + "grad_norm": 0.8386981922249653, + "learning_rate": 1.4056541797563581e-05, + "loss": 0.3296, + "step": 22341 + }, + { + "epoch": 0.3860588885816975, + "grad_norm": 1.0429158848723556, + "learning_rate": 1.4056030256632637e-05, + "loss": 0.6289, + "step": 22342 + }, + { + "epoch": 0.3860761680951064, + "grad_norm": 0.8248547981237757, + "learning_rate": 1.4055518702997902e-05, + "loss": 0.3575, + "step": 22343 + }, + { + "epoch": 0.3860934476085153, + "grad_norm": 0.9344373634723455, + "learning_rate": 1.4055007136660973e-05, + "loss": 0.3977, + "step": 22344 + }, + { + "epoch": 0.38611072712192424, + "grad_norm": 1.2661998667577676, + "learning_rate": 1.4054495557623448e-05, + "loss": 0.329, + "step": 22345 + }, + { + "epoch": 0.38612800663533314, + "grad_norm": 2.893124253767737, + "learning_rate": 1.405398396588694e-05, + "loss": 0.583, + "step": 22346 + }, + { + "epoch": 0.38614528614874205, + "grad_norm": 0.9980786566041088, + "learning_rate": 1.4053472361453047e-05, + "loss": 0.4515, + "step": 22347 + }, + { + "epoch": 0.38616256566215096, + "grad_norm": 0.5066181979753623, + "learning_rate": 1.4052960744323365e-05, + "loss": 0.5883, + "step": 22348 + }, + { + "epoch": 0.38617984517555987, + "grad_norm": 1.6008247809793994, + "learning_rate": 1.4052449114499508e-05, + "loss": 0.6772, + "step": 22349 + }, + { + "epoch": 0.3861971246889688, + "grad_norm": 0.8739021075293287, + "learning_rate": 1.405193747198307e-05, + "loss": 0.5375, + "step": 22350 + }, + { + "epoch": 0.3862144042023777, + "grad_norm": 1.4313154065945866, + "learning_rate": 1.4051425816775654e-05, + "loss": 0.4662, + "step": 22351 + }, + { + "epoch": 0.3862316837157866, + "grad_norm": 0.925580534539164, + "learning_rate": 1.405091414887887e-05, + "loss": 0.351, + "step": 22352 + }, + { + "epoch": 0.38624896322919544, + "grad_norm": 0.4874554268717828, + "learning_rate": 1.4050402468294309e-05, + "loss": 0.6376, + "step": 22353 + }, + { + "epoch": 0.38626624274260435, + "grad_norm": 0.4823714495943805, + "learning_rate": 1.404989077502358e-05, + "loss": 0.8171, + "step": 22354 + }, + { + "epoch": 0.38628352225601326, + "grad_norm": 1.2325297083753322, + "learning_rate": 1.404937906906829e-05, + "loss": 0.4995, + "step": 22355 + }, + { + "epoch": 0.38630080176942216, + "grad_norm": 1.0168745485127986, + "learning_rate": 1.4048867350430032e-05, + "loss": 0.4492, + "step": 22356 + }, + { + "epoch": 0.38631808128283107, + "grad_norm": 1.9891823775684585, + "learning_rate": 1.4048355619110416e-05, + "loss": 0.5893, + "step": 22357 + }, + { + "epoch": 0.38633536079624, + "grad_norm": 1.2285906900240626, + "learning_rate": 1.4047843875111043e-05, + "loss": 0.4136, + "step": 22358 + }, + { + "epoch": 0.3863526403096489, + "grad_norm": 0.7320536497126836, + "learning_rate": 1.4047332118433517e-05, + "loss": 0.2541, + "step": 22359 + }, + { + "epoch": 0.3863699198230578, + "grad_norm": 0.3855685568953598, + "learning_rate": 1.4046820349079436e-05, + "loss": 0.6056, + "step": 22360 + }, + { + "epoch": 0.3863871993364667, + "grad_norm": 0.8546389315131971, + "learning_rate": 1.4046308567050408e-05, + "loss": 0.4613, + "step": 22361 + }, + { + "epoch": 0.3864044788498756, + "grad_norm": 0.9877422149163081, + "learning_rate": 1.404579677234803e-05, + "loss": 0.3698, + "step": 22362 + }, + { + "epoch": 0.3864217583632845, + "grad_norm": 1.1751761329994654, + "learning_rate": 1.4045284964973914e-05, + "loss": 0.469, + "step": 22363 + }, + { + "epoch": 0.38643903787669337, + "grad_norm": 1.6766918204791954, + "learning_rate": 1.4044773144929657e-05, + "loss": 0.5574, + "step": 22364 + }, + { + "epoch": 0.3864563173901023, + "grad_norm": 1.0987325935352357, + "learning_rate": 1.404426131221686e-05, + "loss": 0.3183, + "step": 22365 + }, + { + "epoch": 0.3864735969035112, + "grad_norm": 1.1671598152924392, + "learning_rate": 1.4043749466837133e-05, + "loss": 0.6449, + "step": 22366 + }, + { + "epoch": 0.3864908764169201, + "grad_norm": 1.2063564383789918, + "learning_rate": 1.4043237608792075e-05, + "loss": 0.4108, + "step": 22367 + }, + { + "epoch": 0.386508155930329, + "grad_norm": 0.7402934165617836, + "learning_rate": 1.4042725738083287e-05, + "loss": 0.4276, + "step": 22368 + }, + { + "epoch": 0.3865254354437379, + "grad_norm": 0.8787821243097393, + "learning_rate": 1.4042213854712376e-05, + "loss": 0.4711, + "step": 22369 + }, + { + "epoch": 0.3865427149571468, + "grad_norm": 0.8714699127709921, + "learning_rate": 1.4041701958680949e-05, + "loss": 0.3713, + "step": 22370 + }, + { + "epoch": 0.3865599944705557, + "grad_norm": 0.9251709398675231, + "learning_rate": 1.4041190049990597e-05, + "loss": 0.5826, + "step": 22371 + }, + { + "epoch": 0.38657727398396463, + "grad_norm": 0.7556274961759193, + "learning_rate": 1.4040678128642933e-05, + "loss": 0.4732, + "step": 22372 + }, + { + "epoch": 0.38659455349737354, + "grad_norm": 1.2690224688522667, + "learning_rate": 1.4040166194639559e-05, + "loss": 0.4974, + "step": 22373 + }, + { + "epoch": 0.3866118330107824, + "grad_norm": 0.6434428250860073, + "learning_rate": 1.4039654247982075e-05, + "loss": 0.372, + "step": 22374 + }, + { + "epoch": 0.3866291125241913, + "grad_norm": 1.2987183799763673, + "learning_rate": 1.403914228867209e-05, + "loss": 0.7358, + "step": 22375 + }, + { + "epoch": 0.3866463920376002, + "grad_norm": 0.6701756195893371, + "learning_rate": 1.4038630316711204e-05, + "loss": 0.3067, + "step": 22376 + }, + { + "epoch": 0.3866636715510091, + "grad_norm": 1.840245837328248, + "learning_rate": 1.4038118332101018e-05, + "loss": 0.4195, + "step": 22377 + }, + { + "epoch": 0.386680951064418, + "grad_norm": 0.9448636318213628, + "learning_rate": 1.4037606334843141e-05, + "loss": 0.569, + "step": 22378 + }, + { + "epoch": 0.38669823057782693, + "grad_norm": 0.7410995126059701, + "learning_rate": 1.4037094324939174e-05, + "loss": 0.5388, + "step": 22379 + }, + { + "epoch": 0.38671551009123584, + "grad_norm": 0.49616412300204155, + "learning_rate": 1.403658230239072e-05, + "loss": 0.6844, + "step": 22380 + }, + { + "epoch": 0.38673278960464474, + "grad_norm": 1.43034474663614, + "learning_rate": 1.4036070267199385e-05, + "loss": 0.6831, + "step": 22381 + }, + { + "epoch": 0.38675006911805365, + "grad_norm": 0.6347446049062586, + "learning_rate": 1.403555821936677e-05, + "loss": 0.4202, + "step": 22382 + }, + { + "epoch": 0.38676734863146256, + "grad_norm": 0.9899424753318924, + "learning_rate": 1.4035046158894478e-05, + "loss": 0.3869, + "step": 22383 + }, + { + "epoch": 0.38678462814487147, + "grad_norm": 0.6410209569437878, + "learning_rate": 1.403453408578412e-05, + "loss": 0.6838, + "step": 22384 + }, + { + "epoch": 0.3868019076582803, + "grad_norm": 0.9498733163390062, + "learning_rate": 1.4034022000037288e-05, + "loss": 0.3766, + "step": 22385 + }, + { + "epoch": 0.3868191871716892, + "grad_norm": 0.9677951153577956, + "learning_rate": 1.4033509901655598e-05, + "loss": 0.3868, + "step": 22386 + }, + { + "epoch": 0.38683646668509813, + "grad_norm": 0.7610041715444005, + "learning_rate": 1.4032997790640646e-05, + "loss": 0.5089, + "step": 22387 + }, + { + "epoch": 0.38685374619850704, + "grad_norm": 0.6522905634940156, + "learning_rate": 1.4032485666994035e-05, + "loss": 0.3948, + "step": 22388 + }, + { + "epoch": 0.38687102571191595, + "grad_norm": 1.0876288108044714, + "learning_rate": 1.4031973530717378e-05, + "loss": 0.4863, + "step": 22389 + }, + { + "epoch": 0.38688830522532486, + "grad_norm": 0.6178599686939487, + "learning_rate": 1.4031461381812274e-05, + "loss": 0.6605, + "step": 22390 + }, + { + "epoch": 0.38690558473873377, + "grad_norm": 0.8561990474364306, + "learning_rate": 1.4030949220280324e-05, + "loss": 0.3678, + "step": 22391 + }, + { + "epoch": 0.3869228642521427, + "grad_norm": 0.9274064026764602, + "learning_rate": 1.4030437046123136e-05, + "loss": 0.3397, + "step": 22392 + }, + { + "epoch": 0.3869401437655516, + "grad_norm": 1.2337227638444297, + "learning_rate": 1.4029924859342313e-05, + "loss": 0.5865, + "step": 22393 + }, + { + "epoch": 0.3869574232789605, + "grad_norm": 0.712750805939033, + "learning_rate": 1.4029412659939454e-05, + "loss": 0.5467, + "step": 22394 + }, + { + "epoch": 0.38697470279236934, + "grad_norm": 0.9120402911047197, + "learning_rate": 1.4028900447916175e-05, + "loss": 0.3985, + "step": 22395 + }, + { + "epoch": 0.38699198230577825, + "grad_norm": 1.1499618902793236, + "learning_rate": 1.402838822327407e-05, + "loss": 0.5493, + "step": 22396 + }, + { + "epoch": 0.38700926181918716, + "grad_norm": 0.97412110567668, + "learning_rate": 1.4027875986014747e-05, + "loss": 0.5794, + "step": 22397 + }, + { + "epoch": 0.38702654133259606, + "grad_norm": 1.0665240357351835, + "learning_rate": 1.4027363736139812e-05, + "loss": 0.6128, + "step": 22398 + }, + { + "epoch": 0.38704382084600497, + "grad_norm": 0.9341150026104128, + "learning_rate": 1.4026851473650867e-05, + "loss": 0.3683, + "step": 22399 + }, + { + "epoch": 0.3870611003594139, + "grad_norm": 1.0907182365826569, + "learning_rate": 1.4026339198549515e-05, + "loss": 0.4691, + "step": 22400 + }, + { + "epoch": 0.3870783798728228, + "grad_norm": 0.9960252012309301, + "learning_rate": 1.4025826910837367e-05, + "loss": 0.6129, + "step": 22401 + }, + { + "epoch": 0.3870956593862317, + "grad_norm": 1.0439819901293412, + "learning_rate": 1.4025314610516018e-05, + "loss": 0.6656, + "step": 22402 + }, + { + "epoch": 0.3871129388996406, + "grad_norm": 1.1132994285111495, + "learning_rate": 1.4024802297587081e-05, + "loss": 0.3257, + "step": 22403 + }, + { + "epoch": 0.3871302184130495, + "grad_norm": 1.612608552546451, + "learning_rate": 1.4024289972052158e-05, + "loss": 0.4626, + "step": 22404 + }, + { + "epoch": 0.3871474979264584, + "grad_norm": 1.6594477812213075, + "learning_rate": 1.4023777633912849e-05, + "loss": 0.5949, + "step": 22405 + }, + { + "epoch": 0.38716477743986727, + "grad_norm": 1.676377232776491, + "learning_rate": 1.4023265283170769e-05, + "loss": 0.5205, + "step": 22406 + }, + { + "epoch": 0.3871820569532762, + "grad_norm": 1.0168966401186057, + "learning_rate": 1.4022752919827512e-05, + "loss": 0.5949, + "step": 22407 + }, + { + "epoch": 0.3871993364666851, + "grad_norm": 0.7819583997164398, + "learning_rate": 1.4022240543884686e-05, + "loss": 0.3292, + "step": 22408 + }, + { + "epoch": 0.387216615980094, + "grad_norm": 1.0939953987957007, + "learning_rate": 1.40217281553439e-05, + "loss": 0.3412, + "step": 22409 + }, + { + "epoch": 0.3872338954935029, + "grad_norm": 1.5703797195864964, + "learning_rate": 1.4021215754206754e-05, + "loss": 0.4784, + "step": 22410 + }, + { + "epoch": 0.3872511750069118, + "grad_norm": 0.4590507218322827, + "learning_rate": 1.4020703340474857e-05, + "loss": 0.5286, + "step": 22411 + }, + { + "epoch": 0.3872684545203207, + "grad_norm": 0.8007791554996684, + "learning_rate": 1.402019091414981e-05, + "loss": 0.4753, + "step": 22412 + }, + { + "epoch": 0.3872857340337296, + "grad_norm": 0.751628925042847, + "learning_rate": 1.4019678475233221e-05, + "loss": 0.5732, + "step": 22413 + }, + { + "epoch": 0.38730301354713853, + "grad_norm": 1.1026767736541725, + "learning_rate": 1.4019166023726689e-05, + "loss": 0.4985, + "step": 22414 + }, + { + "epoch": 0.38732029306054744, + "grad_norm": 0.6896323502671241, + "learning_rate": 1.4018653559631827e-05, + "loss": 0.3799, + "step": 22415 + }, + { + "epoch": 0.3873375725739563, + "grad_norm": 0.8056499452977198, + "learning_rate": 1.4018141082950236e-05, + "loss": 0.4962, + "step": 22416 + }, + { + "epoch": 0.3873548520873652, + "grad_norm": 0.8524818182076531, + "learning_rate": 1.4017628593683522e-05, + "loss": 0.334, + "step": 22417 + }, + { + "epoch": 0.3873721316007741, + "grad_norm": 0.4114058324879134, + "learning_rate": 1.401711609183329e-05, + "loss": 0.5419, + "step": 22418 + }, + { + "epoch": 0.387389411114183, + "grad_norm": 1.0764308280844321, + "learning_rate": 1.4016603577401146e-05, + "loss": 0.4867, + "step": 22419 + }, + { + "epoch": 0.3874066906275919, + "grad_norm": 0.43253298679300034, + "learning_rate": 1.4016091050388692e-05, + "loss": 0.7342, + "step": 22420 + }, + { + "epoch": 0.38742397014100083, + "grad_norm": 0.9407991832042848, + "learning_rate": 1.4015578510797539e-05, + "loss": 0.4114, + "step": 22421 + }, + { + "epoch": 0.38744124965440974, + "grad_norm": 0.8850014006673911, + "learning_rate": 1.4015065958629284e-05, + "loss": 0.4161, + "step": 22422 + }, + { + "epoch": 0.38745852916781864, + "grad_norm": 1.597304426772824, + "learning_rate": 1.4014553393885541e-05, + "loss": 0.4879, + "step": 22423 + }, + { + "epoch": 0.38747580868122755, + "grad_norm": 0.5340371477107144, + "learning_rate": 1.4014040816567911e-05, + "loss": 0.759, + "step": 22424 + }, + { + "epoch": 0.38749308819463646, + "grad_norm": 0.5049049904795286, + "learning_rate": 1.4013528226678e-05, + "loss": 0.592, + "step": 22425 + }, + { + "epoch": 0.38751036770804537, + "grad_norm": 1.240311966803194, + "learning_rate": 1.4013015624217414e-05, + "loss": 0.3108, + "step": 22426 + }, + { + "epoch": 0.3875276472214542, + "grad_norm": 1.0429222646553307, + "learning_rate": 1.4012503009187756e-05, + "loss": 0.4141, + "step": 22427 + }, + { + "epoch": 0.3875449267348631, + "grad_norm": 0.9428915723475993, + "learning_rate": 1.4011990381590635e-05, + "loss": 0.3662, + "step": 22428 + }, + { + "epoch": 0.38756220624827203, + "grad_norm": 0.852713064423841, + "learning_rate": 1.4011477741427654e-05, + "loss": 0.3884, + "step": 22429 + }, + { + "epoch": 0.38757948576168094, + "grad_norm": 1.019852641920849, + "learning_rate": 1.4010965088700424e-05, + "loss": 0.4381, + "step": 22430 + }, + { + "epoch": 0.38759676527508985, + "grad_norm": 1.1379056987843557, + "learning_rate": 1.4010452423410543e-05, + "loss": 0.5271, + "step": 22431 + }, + { + "epoch": 0.38761404478849876, + "grad_norm": 1.0981242811786998, + "learning_rate": 1.4009939745559623e-05, + "loss": 0.3685, + "step": 22432 + }, + { + "epoch": 0.38763132430190766, + "grad_norm": 0.47726529121667155, + "learning_rate": 1.4009427055149265e-05, + "loss": 0.7542, + "step": 22433 + }, + { + "epoch": 0.3876486038153166, + "grad_norm": 1.6454340602692816, + "learning_rate": 1.4008914352181076e-05, + "loss": 0.3794, + "step": 22434 + }, + { + "epoch": 0.3876658833287255, + "grad_norm": 0.7793858256075235, + "learning_rate": 1.4008401636656664e-05, + "loss": 0.4405, + "step": 22435 + }, + { + "epoch": 0.3876831628421344, + "grad_norm": 1.425077364729274, + "learning_rate": 1.4007888908577632e-05, + "loss": 0.5614, + "step": 22436 + }, + { + "epoch": 0.3877004423555433, + "grad_norm": 0.887607095933027, + "learning_rate": 1.4007376167945588e-05, + "loss": 0.7104, + "step": 22437 + }, + { + "epoch": 0.38771772186895215, + "grad_norm": 1.0100674486717345, + "learning_rate": 1.4006863414762138e-05, + "loss": 0.445, + "step": 22438 + }, + { + "epoch": 0.38773500138236106, + "grad_norm": 1.0791289927268548, + "learning_rate": 1.4006350649028886e-05, + "loss": 0.4658, + "step": 22439 + }, + { + "epoch": 0.38775228089576996, + "grad_norm": 0.9051493879656947, + "learning_rate": 1.4005837870747439e-05, + "loss": 0.3243, + "step": 22440 + }, + { + "epoch": 0.38776956040917887, + "grad_norm": 0.6413120484656281, + "learning_rate": 1.4005325079919405e-05, + "loss": 0.8779, + "step": 22441 + }, + { + "epoch": 0.3877868399225878, + "grad_norm": 0.9593472526833775, + "learning_rate": 1.4004812276546389e-05, + "loss": 0.3938, + "step": 22442 + }, + { + "epoch": 0.3878041194359967, + "grad_norm": 0.8962151125310631, + "learning_rate": 1.4004299460629996e-05, + "loss": 0.4492, + "step": 22443 + }, + { + "epoch": 0.3878213989494056, + "grad_norm": 1.048977451749667, + "learning_rate": 1.4003786632171832e-05, + "loss": 0.6786, + "step": 22444 + }, + { + "epoch": 0.3878386784628145, + "grad_norm": 0.7466155180227334, + "learning_rate": 1.4003273791173504e-05, + "loss": 0.4612, + "step": 22445 + }, + { + "epoch": 0.3878559579762234, + "grad_norm": 0.8822682737170475, + "learning_rate": 1.4002760937636617e-05, + "loss": 0.4369, + "step": 22446 + }, + { + "epoch": 0.3878732374896323, + "grad_norm": 1.649893799955023, + "learning_rate": 1.400224807156278e-05, + "loss": 0.5336, + "step": 22447 + }, + { + "epoch": 0.38789051700304117, + "grad_norm": 1.3567543962286592, + "learning_rate": 1.4001735192953598e-05, + "loss": 0.3453, + "step": 22448 + }, + { + "epoch": 0.3879077965164501, + "grad_norm": 1.154210771169869, + "learning_rate": 1.4001222301810678e-05, + "loss": 0.3221, + "step": 22449 + }, + { + "epoch": 0.387925076029859, + "grad_norm": 0.8171116119944083, + "learning_rate": 1.4000709398135624e-05, + "loss": 0.3557, + "step": 22450 + }, + { + "epoch": 0.3879423555432679, + "grad_norm": 1.1501315264043885, + "learning_rate": 1.4000196481930044e-05, + "loss": 0.5065, + "step": 22451 + }, + { + "epoch": 0.3879596350566768, + "grad_norm": 0.6307252305627477, + "learning_rate": 1.3999683553195547e-05, + "loss": 0.3585, + "step": 22452 + }, + { + "epoch": 0.3879769145700857, + "grad_norm": 1.125368523653812, + "learning_rate": 1.3999170611933736e-05, + "loss": 0.3977, + "step": 22453 + }, + { + "epoch": 0.3879941940834946, + "grad_norm": 0.9606777828192181, + "learning_rate": 1.3998657658146217e-05, + "loss": 0.6498, + "step": 22454 + }, + { + "epoch": 0.3880114735969035, + "grad_norm": 0.5457747855388458, + "learning_rate": 1.3998144691834601e-05, + "loss": 0.9243, + "step": 22455 + }, + { + "epoch": 0.38802875311031243, + "grad_norm": 1.4201054908581658, + "learning_rate": 1.399763171300049e-05, + "loss": 0.6963, + "step": 22456 + }, + { + "epoch": 0.38804603262372134, + "grad_norm": 0.8252284430808702, + "learning_rate": 1.3997118721645492e-05, + "loss": 0.3494, + "step": 22457 + }, + { + "epoch": 0.38806331213713025, + "grad_norm": 1.4437783134014044, + "learning_rate": 1.3996605717771216e-05, + "loss": 0.6002, + "step": 22458 + }, + { + "epoch": 0.3880805916505391, + "grad_norm": 1.080435542061839, + "learning_rate": 1.3996092701379269e-05, + "loss": 0.3874, + "step": 22459 + }, + { + "epoch": 0.388097871163948, + "grad_norm": 0.9320785215362363, + "learning_rate": 1.3995579672471254e-05, + "loss": 0.5166, + "step": 22460 + }, + { + "epoch": 0.3881151506773569, + "grad_norm": 0.6328020407986147, + "learning_rate": 1.399506663104878e-05, + "loss": 0.2533, + "step": 22461 + }, + { + "epoch": 0.3881324301907658, + "grad_norm": 0.997329250303162, + "learning_rate": 1.3994553577113452e-05, + "loss": 0.3414, + "step": 22462 + }, + { + "epoch": 0.38814970970417473, + "grad_norm": 0.5869583856715563, + "learning_rate": 1.399404051066688e-05, + "loss": 0.7329, + "step": 22463 + }, + { + "epoch": 0.38816698921758364, + "grad_norm": 1.291162783164393, + "learning_rate": 1.399352743171067e-05, + "loss": 0.6555, + "step": 22464 + }, + { + "epoch": 0.38818426873099254, + "grad_norm": 1.144392385528313, + "learning_rate": 1.3993014340246428e-05, + "loss": 0.6537, + "step": 22465 + }, + { + "epoch": 0.38820154824440145, + "grad_norm": 0.8263781054405066, + "learning_rate": 1.3992501236275761e-05, + "loss": 0.4434, + "step": 22466 + }, + { + "epoch": 0.38821882775781036, + "grad_norm": 0.9870183356652994, + "learning_rate": 1.3991988119800279e-05, + "loss": 0.5495, + "step": 22467 + }, + { + "epoch": 0.38823610727121927, + "grad_norm": 0.8872843667070054, + "learning_rate": 1.3991474990821583e-05, + "loss": 0.2042, + "step": 22468 + }, + { + "epoch": 0.3882533867846281, + "grad_norm": 0.9042324042707528, + "learning_rate": 1.3990961849341287e-05, + "loss": 0.421, + "step": 22469 + }, + { + "epoch": 0.388270666298037, + "grad_norm": 1.1409716326954382, + "learning_rate": 1.3990448695360997e-05, + "loss": 0.4203, + "step": 22470 + }, + { + "epoch": 0.38828794581144593, + "grad_norm": 1.1358218329152308, + "learning_rate": 1.3989935528882313e-05, + "loss": 0.4657, + "step": 22471 + }, + { + "epoch": 0.38830522532485484, + "grad_norm": 1.4095383513098318, + "learning_rate": 1.3989422349906851e-05, + "loss": 0.4807, + "step": 22472 + }, + { + "epoch": 0.38832250483826375, + "grad_norm": 0.9305509080051085, + "learning_rate": 1.3988909158436217e-05, + "loss": 0.4773, + "step": 22473 + }, + { + "epoch": 0.38833978435167266, + "grad_norm": 1.1920492738083117, + "learning_rate": 1.3988395954472012e-05, + "loss": 0.3628, + "step": 22474 + }, + { + "epoch": 0.38835706386508156, + "grad_norm": 0.772712645021495, + "learning_rate": 1.3987882738015852e-05, + "loss": 0.3253, + "step": 22475 + }, + { + "epoch": 0.38837434337849047, + "grad_norm": 1.1370556722265792, + "learning_rate": 1.3987369509069337e-05, + "loss": 0.5048, + "step": 22476 + }, + { + "epoch": 0.3883916228918994, + "grad_norm": 0.9343522621238451, + "learning_rate": 1.398685626763408e-05, + "loss": 0.5334, + "step": 22477 + }, + { + "epoch": 0.3884089024053083, + "grad_norm": 0.720744606097656, + "learning_rate": 1.3986343013711682e-05, + "loss": 0.2336, + "step": 22478 + }, + { + "epoch": 0.3884261819187172, + "grad_norm": 1.2424585955492002, + "learning_rate": 1.398582974730376e-05, + "loss": 0.2919, + "step": 22479 + }, + { + "epoch": 0.38844346143212605, + "grad_norm": 1.0198416487219961, + "learning_rate": 1.3985316468411912e-05, + "loss": 0.7357, + "step": 22480 + }, + { + "epoch": 0.38846074094553495, + "grad_norm": 1.0349280449505878, + "learning_rate": 1.3984803177037752e-05, + "loss": 0.5712, + "step": 22481 + }, + { + "epoch": 0.38847802045894386, + "grad_norm": 1.2890861100999194, + "learning_rate": 1.3984289873182886e-05, + "loss": 0.5364, + "step": 22482 + }, + { + "epoch": 0.38849529997235277, + "grad_norm": 1.4864020640130169, + "learning_rate": 1.3983776556848922e-05, + "loss": 0.4172, + "step": 22483 + }, + { + "epoch": 0.3885125794857617, + "grad_norm": 0.8925353366234186, + "learning_rate": 1.3983263228037464e-05, + "loss": 0.4213, + "step": 22484 + }, + { + "epoch": 0.3885298589991706, + "grad_norm": 0.4757607994069124, + "learning_rate": 1.3982749886750124e-05, + "loss": 0.8013, + "step": 22485 + }, + { + "epoch": 0.3885471385125795, + "grad_norm": 0.7572032129846655, + "learning_rate": 1.3982236532988511e-05, + "loss": 0.4019, + "step": 22486 + }, + { + "epoch": 0.3885644180259884, + "grad_norm": 0.7537798630320026, + "learning_rate": 1.398172316675423e-05, + "loss": 0.7352, + "step": 22487 + }, + { + "epoch": 0.3885816975393973, + "grad_norm": 0.8290495305880394, + "learning_rate": 1.3981209788048887e-05, + "loss": 0.4721, + "step": 22488 + }, + { + "epoch": 0.3885989770528062, + "grad_norm": 0.8440605603139404, + "learning_rate": 1.3980696396874095e-05, + "loss": 0.3126, + "step": 22489 + }, + { + "epoch": 0.38861625656621507, + "grad_norm": 0.9803899830291688, + "learning_rate": 1.3980182993231457e-05, + "loss": 0.3534, + "step": 22490 + }, + { + "epoch": 0.388633536079624, + "grad_norm": 0.8821709754443732, + "learning_rate": 1.3979669577122583e-05, + "loss": 0.4559, + "step": 22491 + }, + { + "epoch": 0.3886508155930329, + "grad_norm": 0.7176606912970878, + "learning_rate": 1.3979156148549082e-05, + "loss": 0.4989, + "step": 22492 + }, + { + "epoch": 0.3886680951064418, + "grad_norm": 0.951803679624625, + "learning_rate": 1.3978642707512567e-05, + "loss": 0.491, + "step": 22493 + }, + { + "epoch": 0.3886853746198507, + "grad_norm": 1.0763748498647572, + "learning_rate": 1.3978129254014634e-05, + "loss": 0.4511, + "step": 22494 + }, + { + "epoch": 0.3887026541332596, + "grad_norm": 1.0120223942067772, + "learning_rate": 1.39776157880569e-05, + "loss": 0.5713, + "step": 22495 + }, + { + "epoch": 0.3887199336466685, + "grad_norm": 0.9415367059014021, + "learning_rate": 1.3977102309640974e-05, + "loss": 0.5091, + "step": 22496 + }, + { + "epoch": 0.3887372131600774, + "grad_norm": 0.9985654576726873, + "learning_rate": 1.3976588818768455e-05, + "loss": 0.438, + "step": 22497 + }, + { + "epoch": 0.38875449267348633, + "grad_norm": 1.1598095032961735, + "learning_rate": 1.3976075315440963e-05, + "loss": 0.6066, + "step": 22498 + }, + { + "epoch": 0.38877177218689524, + "grad_norm": 1.2118701373056204, + "learning_rate": 1.3975561799660098e-05, + "loss": 0.4769, + "step": 22499 + }, + { + "epoch": 0.38878905170030414, + "grad_norm": 0.7333606967135651, + "learning_rate": 1.397504827142747e-05, + "loss": 0.4407, + "step": 22500 + }, + { + "epoch": 0.388806331213713, + "grad_norm": 0.7892563315513571, + "learning_rate": 1.3974534730744691e-05, + "loss": 0.5351, + "step": 22501 + }, + { + "epoch": 0.3888236107271219, + "grad_norm": 0.7841346044077314, + "learning_rate": 1.3974021177613367e-05, + "loss": 0.456, + "step": 22502 + }, + { + "epoch": 0.3888408902405308, + "grad_norm": 1.056666487073834, + "learning_rate": 1.3973507612035106e-05, + "loss": 0.4869, + "step": 22503 + }, + { + "epoch": 0.3888581697539397, + "grad_norm": 1.957246744114549, + "learning_rate": 1.397299403401152e-05, + "loss": 0.5341, + "step": 22504 + }, + { + "epoch": 0.3888754492673486, + "grad_norm": 1.1673401442240647, + "learning_rate": 1.3972480443544213e-05, + "loss": 0.4345, + "step": 22505 + }, + { + "epoch": 0.38889272878075754, + "grad_norm": 1.794386472718521, + "learning_rate": 1.3971966840634794e-05, + "loss": 0.4989, + "step": 22506 + }, + { + "epoch": 0.38891000829416644, + "grad_norm": 1.3726799660079398, + "learning_rate": 1.3971453225284877e-05, + "loss": 0.2859, + "step": 22507 + }, + { + "epoch": 0.38892728780757535, + "grad_norm": 1.5440854882983577, + "learning_rate": 1.397093959749606e-05, + "loss": 0.4512, + "step": 22508 + }, + { + "epoch": 0.38894456732098426, + "grad_norm": 0.8041489374519889, + "learning_rate": 1.3970425957269961e-05, + "loss": 0.408, + "step": 22509 + }, + { + "epoch": 0.38896184683439317, + "grad_norm": 0.6076839090414466, + "learning_rate": 1.396991230460819e-05, + "loss": 0.3527, + "step": 22510 + }, + { + "epoch": 0.3889791263478021, + "grad_norm": 1.705701631612421, + "learning_rate": 1.3969398639512348e-05, + "loss": 0.5897, + "step": 22511 + }, + { + "epoch": 0.3889964058612109, + "grad_norm": 1.0558897173726165, + "learning_rate": 1.3968884961984048e-05, + "loss": 0.4727, + "step": 22512 + }, + { + "epoch": 0.38901368537461983, + "grad_norm": 1.144372949825857, + "learning_rate": 1.3968371272024901e-05, + "loss": 0.5005, + "step": 22513 + }, + { + "epoch": 0.38903096488802874, + "grad_norm": 0.6958144586133774, + "learning_rate": 1.3967857569636514e-05, + "loss": 0.379, + "step": 22514 + }, + { + "epoch": 0.38904824440143765, + "grad_norm": 1.0417676979868766, + "learning_rate": 1.3967343854820494e-05, + "loss": 0.4929, + "step": 22515 + }, + { + "epoch": 0.38906552391484656, + "grad_norm": 1.2627804265910452, + "learning_rate": 1.3966830127578453e-05, + "loss": 0.3259, + "step": 22516 + }, + { + "epoch": 0.38908280342825546, + "grad_norm": 0.9558418435001836, + "learning_rate": 1.3966316387911993e-05, + "loss": 0.6737, + "step": 22517 + }, + { + "epoch": 0.38910008294166437, + "grad_norm": 4.570129134268487, + "learning_rate": 1.3965802635822734e-05, + "loss": 0.3216, + "step": 22518 + }, + { + "epoch": 0.3891173624550733, + "grad_norm": 1.167379453115112, + "learning_rate": 1.396528887131228e-05, + "loss": 0.5288, + "step": 22519 + }, + { + "epoch": 0.3891346419684822, + "grad_norm": 1.8222860709915143, + "learning_rate": 1.3964775094382237e-05, + "loss": 0.6773, + "step": 22520 + }, + { + "epoch": 0.3891519214818911, + "grad_norm": 0.8531534818224069, + "learning_rate": 1.396426130503422e-05, + "loss": 0.5447, + "step": 22521 + }, + { + "epoch": 0.38916920099529995, + "grad_norm": 1.02415862187521, + "learning_rate": 1.3963747503269832e-05, + "loss": 0.4601, + "step": 22522 + }, + { + "epoch": 0.38918648050870885, + "grad_norm": 1.4059721337377373, + "learning_rate": 1.3963233689090685e-05, + "loss": 0.4749, + "step": 22523 + }, + { + "epoch": 0.38920376002211776, + "grad_norm": 1.1602719538338369, + "learning_rate": 1.3962719862498395e-05, + "loss": 0.716, + "step": 22524 + }, + { + "epoch": 0.38922103953552667, + "grad_norm": 0.8001830007108403, + "learning_rate": 1.396220602349456e-05, + "loss": 0.4384, + "step": 22525 + }, + { + "epoch": 0.3892383190489356, + "grad_norm": 1.303135804224629, + "learning_rate": 1.3961692172080796e-05, + "loss": 0.5667, + "step": 22526 + }, + { + "epoch": 0.3892555985623445, + "grad_norm": 1.1743946181754348, + "learning_rate": 1.3961178308258711e-05, + "loss": 0.4192, + "step": 22527 + }, + { + "epoch": 0.3892728780757534, + "grad_norm": 1.0705262434863227, + "learning_rate": 1.3960664432029914e-05, + "loss": 0.3875, + "step": 22528 + }, + { + "epoch": 0.3892901575891623, + "grad_norm": 1.830871149246883, + "learning_rate": 1.3960150543396015e-05, + "loss": 0.4259, + "step": 22529 + }, + { + "epoch": 0.3893074371025712, + "grad_norm": 1.5544222266850805, + "learning_rate": 1.3959636642358624e-05, + "loss": 0.5547, + "step": 22530 + }, + { + "epoch": 0.3893247166159801, + "grad_norm": 1.1287983642232982, + "learning_rate": 1.395912272891935e-05, + "loss": 0.6506, + "step": 22531 + }, + { + "epoch": 0.389341996129389, + "grad_norm": 1.1523516415931914, + "learning_rate": 1.3958608803079804e-05, + "loss": 0.5036, + "step": 22532 + }, + { + "epoch": 0.3893592756427979, + "grad_norm": 1.1600890010453708, + "learning_rate": 1.3958094864841594e-05, + "loss": 0.5649, + "step": 22533 + }, + { + "epoch": 0.3893765551562068, + "grad_norm": 0.9050654290451922, + "learning_rate": 1.3957580914206328e-05, + "loss": 0.4739, + "step": 22534 + }, + { + "epoch": 0.3893938346696157, + "grad_norm": 1.113873855003696, + "learning_rate": 1.395706695117562e-05, + "loss": 0.4249, + "step": 22535 + }, + { + "epoch": 0.3894111141830246, + "grad_norm": 0.7984991746081418, + "learning_rate": 1.3956552975751079e-05, + "loss": 0.3649, + "step": 22536 + }, + { + "epoch": 0.3894283936964335, + "grad_norm": 1.247723123216342, + "learning_rate": 1.3956038987934309e-05, + "loss": 0.4289, + "step": 22537 + }, + { + "epoch": 0.3894456732098424, + "grad_norm": 0.5045497020792713, + "learning_rate": 1.3955524987726927e-05, + "loss": 0.652, + "step": 22538 + }, + { + "epoch": 0.3894629527232513, + "grad_norm": 0.6662337032428425, + "learning_rate": 1.3955010975130541e-05, + "loss": 0.931, + "step": 22539 + }, + { + "epoch": 0.38948023223666023, + "grad_norm": 1.5834929954686319, + "learning_rate": 1.3954496950146756e-05, + "loss": 0.5516, + "step": 22540 + }, + { + "epoch": 0.38949751175006914, + "grad_norm": 0.8329588473292392, + "learning_rate": 1.3953982912777188e-05, + "loss": 0.5066, + "step": 22541 + }, + { + "epoch": 0.38951479126347804, + "grad_norm": 1.1795724203658116, + "learning_rate": 1.3953468863023447e-05, + "loss": 0.384, + "step": 22542 + }, + { + "epoch": 0.3895320707768869, + "grad_norm": 0.8755010956676929, + "learning_rate": 1.3952954800887137e-05, + "loss": 0.4495, + "step": 22543 + }, + { + "epoch": 0.3895493502902958, + "grad_norm": 0.8113868108980004, + "learning_rate": 1.3952440726369877e-05, + "loss": 0.5127, + "step": 22544 + }, + { + "epoch": 0.3895666298037047, + "grad_norm": 1.1284097160232611, + "learning_rate": 1.395192663947327e-05, + "loss": 0.4535, + "step": 22545 + }, + { + "epoch": 0.3895839093171136, + "grad_norm": 2.421723051091971, + "learning_rate": 1.395141254019893e-05, + "loss": 0.716, + "step": 22546 + }, + { + "epoch": 0.3896011888305225, + "grad_norm": 0.8777395344419565, + "learning_rate": 1.3950898428548464e-05, + "loss": 0.4146, + "step": 22547 + }, + { + "epoch": 0.38961846834393143, + "grad_norm": 1.2379266786771566, + "learning_rate": 1.3950384304523483e-05, + "loss": 0.6474, + "step": 22548 + }, + { + "epoch": 0.38963574785734034, + "grad_norm": 0.7772483102203798, + "learning_rate": 1.3949870168125599e-05, + "loss": 0.378, + "step": 22549 + }, + { + "epoch": 0.38965302737074925, + "grad_norm": 0.770593483423263, + "learning_rate": 1.3949356019356421e-05, + "loss": 0.351, + "step": 22550 + }, + { + "epoch": 0.38967030688415816, + "grad_norm": 0.9701253653288268, + "learning_rate": 1.3948841858217561e-05, + "loss": 0.4249, + "step": 22551 + }, + { + "epoch": 0.38968758639756707, + "grad_norm": 0.4334624711115683, + "learning_rate": 1.3948327684710627e-05, + "loss": 0.7251, + "step": 22552 + }, + { + "epoch": 0.389704865910976, + "grad_norm": 1.1775052331062812, + "learning_rate": 1.3947813498837233e-05, + "loss": 0.6781, + "step": 22553 + }, + { + "epoch": 0.3897221454243848, + "grad_norm": 0.7818868254425321, + "learning_rate": 1.3947299300598984e-05, + "loss": 0.4262, + "step": 22554 + }, + { + "epoch": 0.38973942493779373, + "grad_norm": 1.242528444204823, + "learning_rate": 1.3946785089997497e-05, + "loss": 0.6513, + "step": 22555 + }, + { + "epoch": 0.38975670445120264, + "grad_norm": 1.13849917682578, + "learning_rate": 1.3946270867034377e-05, + "loss": 0.609, + "step": 22556 + }, + { + "epoch": 0.38977398396461155, + "grad_norm": 1.1569520246135383, + "learning_rate": 1.3945756631711235e-05, + "loss": 0.5154, + "step": 22557 + }, + { + "epoch": 0.38979126347802046, + "grad_norm": 1.4507001343686485, + "learning_rate": 1.3945242384029686e-05, + "loss": 0.4545, + "step": 22558 + }, + { + "epoch": 0.38980854299142936, + "grad_norm": 0.464626143383775, + "learning_rate": 1.3944728123991338e-05, + "loss": 0.6459, + "step": 22559 + }, + { + "epoch": 0.38982582250483827, + "grad_norm": 1.17401936770826, + "learning_rate": 1.3944213851597801e-05, + "loss": 0.4424, + "step": 22560 + }, + { + "epoch": 0.3898431020182472, + "grad_norm": 0.9011593198073172, + "learning_rate": 1.3943699566850687e-05, + "loss": 0.4077, + "step": 22561 + }, + { + "epoch": 0.3898603815316561, + "grad_norm": 0.9125927097894387, + "learning_rate": 1.3943185269751607e-05, + "loss": 0.4378, + "step": 22562 + }, + { + "epoch": 0.389877661045065, + "grad_norm": 0.9879254905622106, + "learning_rate": 1.3942670960302166e-05, + "loss": 0.4267, + "step": 22563 + }, + { + "epoch": 0.38989494055847385, + "grad_norm": 0.8020383485396407, + "learning_rate": 1.3942156638503986e-05, + "loss": 0.5117, + "step": 22564 + }, + { + "epoch": 0.38991222007188275, + "grad_norm": 0.705575086542521, + "learning_rate": 1.3941642304358669e-05, + "loss": 0.5098, + "step": 22565 + }, + { + "epoch": 0.38992949958529166, + "grad_norm": 0.9948092936916694, + "learning_rate": 1.394112795786783e-05, + "loss": 0.5443, + "step": 22566 + }, + { + "epoch": 0.38994677909870057, + "grad_norm": 0.9315697732641189, + "learning_rate": 1.394061359903308e-05, + "loss": 0.3006, + "step": 22567 + }, + { + "epoch": 0.3899640586121095, + "grad_norm": 0.4992970407587126, + "learning_rate": 1.3940099227856023e-05, + "loss": 0.925, + "step": 22568 + }, + { + "epoch": 0.3899813381255184, + "grad_norm": 1.009109190869685, + "learning_rate": 1.393958484433828e-05, + "loss": 0.5521, + "step": 22569 + }, + { + "epoch": 0.3899986176389273, + "grad_norm": 0.9841802294237499, + "learning_rate": 1.3939070448481458e-05, + "loss": 0.5176, + "step": 22570 + }, + { + "epoch": 0.3900158971523362, + "grad_norm": 0.7492908190525012, + "learning_rate": 1.3938556040287166e-05, + "loss": 0.4902, + "step": 22571 + }, + { + "epoch": 0.3900331766657451, + "grad_norm": 0.8503429915875083, + "learning_rate": 1.393804161975702e-05, + "loss": 0.4753, + "step": 22572 + }, + { + "epoch": 0.390050456179154, + "grad_norm": 0.8593649845957128, + "learning_rate": 1.3937527186892628e-05, + "loss": 0.4311, + "step": 22573 + }, + { + "epoch": 0.3900677356925629, + "grad_norm": 1.0646231661837227, + "learning_rate": 1.3937012741695596e-05, + "loss": 0.5479, + "step": 22574 + }, + { + "epoch": 0.3900850152059718, + "grad_norm": 0.865135904611213, + "learning_rate": 1.3936498284167549e-05, + "loss": 0.2649, + "step": 22575 + }, + { + "epoch": 0.3901022947193807, + "grad_norm": 1.425194060507984, + "learning_rate": 1.3935983814310088e-05, + "loss": 0.5283, + "step": 22576 + }, + { + "epoch": 0.3901195742327896, + "grad_norm": 1.3741446194026716, + "learning_rate": 1.3935469332124823e-05, + "loss": 0.5406, + "step": 22577 + }, + { + "epoch": 0.3901368537461985, + "grad_norm": 1.6656651098453348, + "learning_rate": 1.3934954837613372e-05, + "loss": 0.5202, + "step": 22578 + }, + { + "epoch": 0.3901541332596074, + "grad_norm": 0.5818750980183531, + "learning_rate": 1.3934440330777343e-05, + "loss": 0.4823, + "step": 22579 + }, + { + "epoch": 0.3901714127730163, + "grad_norm": 1.1317178327950161, + "learning_rate": 1.3933925811618348e-05, + "loss": 0.5188, + "step": 22580 + }, + { + "epoch": 0.3901886922864252, + "grad_norm": 1.1093824694836059, + "learning_rate": 1.3933411280137997e-05, + "loss": 0.4401, + "step": 22581 + }, + { + "epoch": 0.39020597179983413, + "grad_norm": 0.7212434978250924, + "learning_rate": 1.3932896736337904e-05, + "loss": 0.612, + "step": 22582 + }, + { + "epoch": 0.39022325131324304, + "grad_norm": 1.1619918395574242, + "learning_rate": 1.393238218021968e-05, + "loss": 0.5036, + "step": 22583 + }, + { + "epoch": 0.39024053082665194, + "grad_norm": 0.4428020594393042, + "learning_rate": 1.3931867611784933e-05, + "loss": 0.5388, + "step": 22584 + }, + { + "epoch": 0.39025781034006085, + "grad_norm": 0.7138228521362936, + "learning_rate": 1.3931353031035282e-05, + "loss": 0.3908, + "step": 22585 + }, + { + "epoch": 0.3902750898534697, + "grad_norm": 0.8717852378361232, + "learning_rate": 1.3930838437972334e-05, + "loss": 0.2527, + "step": 22586 + }, + { + "epoch": 0.3902923693668786, + "grad_norm": 0.9316815458924632, + "learning_rate": 1.39303238325977e-05, + "loss": 0.4429, + "step": 22587 + }, + { + "epoch": 0.3903096488802875, + "grad_norm": 1.5831363780626404, + "learning_rate": 1.392980921491299e-05, + "loss": 0.5371, + "step": 22588 + }, + { + "epoch": 0.3903269283936964, + "grad_norm": 1.3162388682203636, + "learning_rate": 1.3929294584919824e-05, + "loss": 0.5525, + "step": 22589 + }, + { + "epoch": 0.39034420790710533, + "grad_norm": 0.7329520694268682, + "learning_rate": 1.3928779942619808e-05, + "loss": 0.4189, + "step": 22590 + }, + { + "epoch": 0.39036148742051424, + "grad_norm": 1.594553595303054, + "learning_rate": 1.392826528801455e-05, + "loss": 0.6557, + "step": 22591 + }, + { + "epoch": 0.39037876693392315, + "grad_norm": 0.8727876104634337, + "learning_rate": 1.3927750621105671e-05, + "loss": 0.6161, + "step": 22592 + }, + { + "epoch": 0.39039604644733206, + "grad_norm": 1.9512178128426283, + "learning_rate": 1.392723594189478e-05, + "loss": 0.4353, + "step": 22593 + }, + { + "epoch": 0.39041332596074096, + "grad_norm": 1.2925282842722827, + "learning_rate": 1.3926721250383482e-05, + "loss": 0.4794, + "step": 22594 + }, + { + "epoch": 0.39043060547414987, + "grad_norm": 0.9133153270517766, + "learning_rate": 1.39262065465734e-05, + "loss": 0.3675, + "step": 22595 + }, + { + "epoch": 0.3904478849875587, + "grad_norm": 1.264905614145994, + "learning_rate": 1.392569183046614e-05, + "loss": 0.5068, + "step": 22596 + }, + { + "epoch": 0.39046516450096763, + "grad_norm": 0.7253865662209109, + "learning_rate": 1.3925177102063312e-05, + "loss": 0.4958, + "step": 22597 + }, + { + "epoch": 0.39048244401437654, + "grad_norm": 1.154148996162944, + "learning_rate": 1.3924662361366532e-05, + "loss": 0.3306, + "step": 22598 + }, + { + "epoch": 0.39049972352778545, + "grad_norm": 1.0533959032773077, + "learning_rate": 1.3924147608377413e-05, + "loss": 0.4222, + "step": 22599 + }, + { + "epoch": 0.39051700304119435, + "grad_norm": 0.8755589809713967, + "learning_rate": 1.3923632843097564e-05, + "loss": 0.5494, + "step": 22600 + }, + { + "epoch": 0.39053428255460326, + "grad_norm": 0.809579916901246, + "learning_rate": 1.3923118065528596e-05, + "loss": 0.3745, + "step": 22601 + }, + { + "epoch": 0.39055156206801217, + "grad_norm": 0.9969601766089048, + "learning_rate": 1.3922603275672129e-05, + "loss": 0.3904, + "step": 22602 + }, + { + "epoch": 0.3905688415814211, + "grad_norm": 0.8750502660284085, + "learning_rate": 1.3922088473529767e-05, + "loss": 0.5454, + "step": 22603 + }, + { + "epoch": 0.39058612109483, + "grad_norm": 0.6193111502007861, + "learning_rate": 1.3921573659103126e-05, + "loss": 0.5104, + "step": 22604 + }, + { + "epoch": 0.3906034006082389, + "grad_norm": 0.9090767442682831, + "learning_rate": 1.392105883239382e-05, + "loss": 0.4716, + "step": 22605 + }, + { + "epoch": 0.3906206801216478, + "grad_norm": 1.0254053960160037, + "learning_rate": 1.3920543993403459e-05, + "loss": 0.4231, + "step": 22606 + }, + { + "epoch": 0.39063795963505665, + "grad_norm": 1.335085684674433, + "learning_rate": 1.3920029142133658e-05, + "loss": 0.518, + "step": 22607 + }, + { + "epoch": 0.39065523914846556, + "grad_norm": 1.0395958528977527, + "learning_rate": 1.3919514278586025e-05, + "loss": 0.3818, + "step": 22608 + }, + { + "epoch": 0.39067251866187447, + "grad_norm": 0.9245243389858947, + "learning_rate": 1.3918999402762177e-05, + "loss": 0.4798, + "step": 22609 + }, + { + "epoch": 0.3906897981752834, + "grad_norm": 1.072391029960249, + "learning_rate": 1.3918484514663726e-05, + "loss": 0.4533, + "step": 22610 + }, + { + "epoch": 0.3907070776886923, + "grad_norm": 1.0475034905555984, + "learning_rate": 1.3917969614292281e-05, + "loss": 0.4079, + "step": 22611 + }, + { + "epoch": 0.3907243572021012, + "grad_norm": 1.2178778310814236, + "learning_rate": 1.391745470164946e-05, + "loss": 0.4031, + "step": 22612 + }, + { + "epoch": 0.3907416367155101, + "grad_norm": 1.2049728797663999, + "learning_rate": 1.391693977673687e-05, + "loss": 0.4887, + "step": 22613 + }, + { + "epoch": 0.390758916228919, + "grad_norm": 0.8923913848928237, + "learning_rate": 1.3916424839556128e-05, + "loss": 0.6262, + "step": 22614 + }, + { + "epoch": 0.3907761957423279, + "grad_norm": 1.6669429236568685, + "learning_rate": 1.3915909890108848e-05, + "loss": 0.501, + "step": 22615 + }, + { + "epoch": 0.3907934752557368, + "grad_norm": 0.8830195939103932, + "learning_rate": 1.391539492839664e-05, + "loss": 0.45, + "step": 22616 + }, + { + "epoch": 0.3908107547691457, + "grad_norm": 0.7607505013834538, + "learning_rate": 1.3914879954421116e-05, + "loss": 0.2355, + "step": 22617 + }, + { + "epoch": 0.3908280342825546, + "grad_norm": 0.8104543102276687, + "learning_rate": 1.3914364968183894e-05, + "loss": 0.351, + "step": 22618 + }, + { + "epoch": 0.3908453137959635, + "grad_norm": 1.346034625850968, + "learning_rate": 1.3913849969686579e-05, + "loss": 0.5183, + "step": 22619 + }, + { + "epoch": 0.3908625933093724, + "grad_norm": 0.9684234507314096, + "learning_rate": 1.391333495893079e-05, + "loss": 0.4123, + "step": 22620 + }, + { + "epoch": 0.3908798728227813, + "grad_norm": 1.2263786947473632, + "learning_rate": 1.3912819935918139e-05, + "loss": 0.5597, + "step": 22621 + }, + { + "epoch": 0.3908971523361902, + "grad_norm": 1.1976541341322395, + "learning_rate": 1.3912304900650238e-05, + "loss": 0.359, + "step": 22622 + }, + { + "epoch": 0.3909144318495991, + "grad_norm": 0.9059713967554506, + "learning_rate": 1.39117898531287e-05, + "loss": 0.3742, + "step": 22623 + }, + { + "epoch": 0.390931711363008, + "grad_norm": 1.1103612069706619, + "learning_rate": 1.391127479335514e-05, + "loss": 0.6228, + "step": 22624 + }, + { + "epoch": 0.39094899087641694, + "grad_norm": 1.0948139832349313, + "learning_rate": 1.3910759721331171e-05, + "loss": 0.5399, + "step": 22625 + }, + { + "epoch": 0.39096627038982584, + "grad_norm": 1.0077683180500778, + "learning_rate": 1.3910244637058404e-05, + "loss": 0.4887, + "step": 22626 + }, + { + "epoch": 0.39098354990323475, + "grad_norm": 1.1068911542344118, + "learning_rate": 1.3909729540538455e-05, + "loss": 0.5252, + "step": 22627 + }, + { + "epoch": 0.3910008294166436, + "grad_norm": 1.066754043458718, + "learning_rate": 1.3909214431772933e-05, + "loss": 0.4633, + "step": 22628 + }, + { + "epoch": 0.3910181089300525, + "grad_norm": 0.6126832370727948, + "learning_rate": 1.3908699310763458e-05, + "loss": 0.5387, + "step": 22629 + }, + { + "epoch": 0.3910353884434614, + "grad_norm": 0.9335893140032769, + "learning_rate": 1.3908184177511637e-05, + "loss": 0.3195, + "step": 22630 + }, + { + "epoch": 0.3910526679568703, + "grad_norm": 1.0490540277164337, + "learning_rate": 1.3907669032019086e-05, + "loss": 0.3197, + "step": 22631 + }, + { + "epoch": 0.39106994747027923, + "grad_norm": 0.8912410005998936, + "learning_rate": 1.3907153874287421e-05, + "loss": 0.3347, + "step": 22632 + }, + { + "epoch": 0.39108722698368814, + "grad_norm": 0.9926993940009039, + "learning_rate": 1.3906638704318252e-05, + "loss": 0.3286, + "step": 22633 + }, + { + "epoch": 0.39110450649709705, + "grad_norm": 0.9429444322579973, + "learning_rate": 1.390612352211319e-05, + "loss": 0.4408, + "step": 22634 + }, + { + "epoch": 0.39112178601050596, + "grad_norm": 0.791253984441295, + "learning_rate": 1.3905608327673858e-05, + "loss": 0.4585, + "step": 22635 + }, + { + "epoch": 0.39113906552391486, + "grad_norm": 1.0516521092089666, + "learning_rate": 1.3905093121001863e-05, + "loss": 0.3427, + "step": 22636 + }, + { + "epoch": 0.39115634503732377, + "grad_norm": 0.9813187282573312, + "learning_rate": 1.3904577902098818e-05, + "loss": 0.7195, + "step": 22637 + }, + { + "epoch": 0.3911736245507326, + "grad_norm": 0.6856779911441724, + "learning_rate": 1.3904062670966338e-05, + "loss": 0.3476, + "step": 22638 + }, + { + "epoch": 0.39119090406414153, + "grad_norm": 1.1737667265601284, + "learning_rate": 1.3903547427606039e-05, + "loss": 0.4167, + "step": 22639 + }, + { + "epoch": 0.39120818357755044, + "grad_norm": 0.7946910340493225, + "learning_rate": 1.3903032172019528e-05, + "loss": 0.3693, + "step": 22640 + }, + { + "epoch": 0.39122546309095935, + "grad_norm": 0.8456003589865091, + "learning_rate": 1.390251690420843e-05, + "loss": 0.6047, + "step": 22641 + }, + { + "epoch": 0.39124274260436825, + "grad_norm": 0.8064307936928169, + "learning_rate": 1.3902001624174348e-05, + "loss": 0.2711, + "step": 22642 + }, + { + "epoch": 0.39126002211777716, + "grad_norm": 1.5415703164722134, + "learning_rate": 1.39014863319189e-05, + "loss": 0.5578, + "step": 22643 + }, + { + "epoch": 0.39127730163118607, + "grad_norm": 1.1407873421645565, + "learning_rate": 1.3900971027443703e-05, + "loss": 0.4089, + "step": 22644 + }, + { + "epoch": 0.391294581144595, + "grad_norm": 1.1123233180194863, + "learning_rate": 1.390045571075037e-05, + "loss": 0.6263, + "step": 22645 + }, + { + "epoch": 0.3913118606580039, + "grad_norm": 0.8748468012188052, + "learning_rate": 1.389994038184051e-05, + "loss": 0.5291, + "step": 22646 + }, + { + "epoch": 0.3913291401714128, + "grad_norm": 1.2240773800172047, + "learning_rate": 1.389942504071574e-05, + "loss": 0.4517, + "step": 22647 + }, + { + "epoch": 0.3913464196848217, + "grad_norm": 0.9734628226605356, + "learning_rate": 1.3898909687377675e-05, + "loss": 0.4295, + "step": 22648 + }, + { + "epoch": 0.39136369919823055, + "grad_norm": 0.40633686576267186, + "learning_rate": 1.389839432182793e-05, + "loss": 0.7291, + "step": 22649 + }, + { + "epoch": 0.39138097871163946, + "grad_norm": 1.2467933678233123, + "learning_rate": 1.3897878944068118e-05, + "loss": 0.3978, + "step": 22650 + }, + { + "epoch": 0.39139825822504837, + "grad_norm": 1.8058821728450791, + "learning_rate": 1.3897363554099849e-05, + "loss": 0.449, + "step": 22651 + }, + { + "epoch": 0.3914155377384573, + "grad_norm": 3.3056527605000308, + "learning_rate": 1.3896848151924744e-05, + "loss": 0.6424, + "step": 22652 + }, + { + "epoch": 0.3914328172518662, + "grad_norm": 1.1623016403502646, + "learning_rate": 1.3896332737544415e-05, + "loss": 0.4398, + "step": 22653 + }, + { + "epoch": 0.3914500967652751, + "grad_norm": 0.7862931464112762, + "learning_rate": 1.3895817310960474e-05, + "loss": 0.4272, + "step": 22654 + }, + { + "epoch": 0.391467376278684, + "grad_norm": 1.0871326177389833, + "learning_rate": 1.3895301872174539e-05, + "loss": 0.5266, + "step": 22655 + }, + { + "epoch": 0.3914846557920929, + "grad_norm": 1.1547729443082981, + "learning_rate": 1.3894786421188222e-05, + "loss": 0.591, + "step": 22656 + }, + { + "epoch": 0.3915019353055018, + "grad_norm": 1.1780665446579228, + "learning_rate": 1.3894270958003135e-05, + "loss": 0.3936, + "step": 22657 + }, + { + "epoch": 0.3915192148189107, + "grad_norm": 0.7671535064641207, + "learning_rate": 1.38937554826209e-05, + "loss": 0.4823, + "step": 22658 + }, + { + "epoch": 0.39153649433231963, + "grad_norm": 1.514836388513071, + "learning_rate": 1.3893239995043126e-05, + "loss": 0.4997, + "step": 22659 + }, + { + "epoch": 0.3915537738457285, + "grad_norm": 1.3385090297912463, + "learning_rate": 1.3892724495271425e-05, + "loss": 0.4546, + "step": 22660 + }, + { + "epoch": 0.3915710533591374, + "grad_norm": 0.9786488381283804, + "learning_rate": 1.3892208983307419e-05, + "loss": 0.6093, + "step": 22661 + }, + { + "epoch": 0.3915883328725463, + "grad_norm": 1.1188167059905836, + "learning_rate": 1.3891693459152717e-05, + "loss": 0.5477, + "step": 22662 + }, + { + "epoch": 0.3916056123859552, + "grad_norm": 0.9278300007270881, + "learning_rate": 1.3891177922808934e-05, + "loss": 0.4623, + "step": 22663 + }, + { + "epoch": 0.3916228918993641, + "grad_norm": 0.9892807557589619, + "learning_rate": 1.3890662374277688e-05, + "loss": 0.5098, + "step": 22664 + }, + { + "epoch": 0.391640171412773, + "grad_norm": 1.1176563827943138, + "learning_rate": 1.3890146813560592e-05, + "loss": 0.4479, + "step": 22665 + }, + { + "epoch": 0.3916574509261819, + "grad_norm": 1.1204563239921868, + "learning_rate": 1.3889631240659258e-05, + "loss": 0.4207, + "step": 22666 + }, + { + "epoch": 0.39167473043959083, + "grad_norm": 0.5013814598839208, + "learning_rate": 1.3889115655575305e-05, + "loss": 0.6393, + "step": 22667 + }, + { + "epoch": 0.39169200995299974, + "grad_norm": 1.0460394549526186, + "learning_rate": 1.3888600058310347e-05, + "loss": 0.7563, + "step": 22668 + }, + { + "epoch": 0.39170928946640865, + "grad_norm": 0.7605069919346535, + "learning_rate": 1.3888084448865998e-05, + "loss": 0.4795, + "step": 22669 + }, + { + "epoch": 0.3917265689798175, + "grad_norm": 1.6025786072134296, + "learning_rate": 1.3887568827243874e-05, + "loss": 0.4654, + "step": 22670 + }, + { + "epoch": 0.3917438484932264, + "grad_norm": 1.2999900588277677, + "learning_rate": 1.3887053193445585e-05, + "loss": 0.4153, + "step": 22671 + }, + { + "epoch": 0.3917611280066353, + "grad_norm": 0.5170140094369485, + "learning_rate": 1.3886537547472754e-05, + "loss": 0.6419, + "step": 22672 + }, + { + "epoch": 0.3917784075200442, + "grad_norm": 1.082813668454233, + "learning_rate": 1.3886021889326991e-05, + "loss": 0.458, + "step": 22673 + }, + { + "epoch": 0.39179568703345313, + "grad_norm": 0.8157102469013683, + "learning_rate": 1.3885506219009907e-05, + "loss": 0.3334, + "step": 22674 + }, + { + "epoch": 0.39181296654686204, + "grad_norm": 0.8077584027908034, + "learning_rate": 1.3884990536523128e-05, + "loss": 0.4567, + "step": 22675 + }, + { + "epoch": 0.39183024606027095, + "grad_norm": 1.7073924508423846, + "learning_rate": 1.3884474841868265e-05, + "loss": 0.6921, + "step": 22676 + }, + { + "epoch": 0.39184752557367986, + "grad_norm": 0.9322232343720851, + "learning_rate": 1.3883959135046926e-05, + "loss": 0.2787, + "step": 22677 + }, + { + "epoch": 0.39186480508708876, + "grad_norm": 0.7371631022074585, + "learning_rate": 1.3883443416060736e-05, + "loss": 0.3939, + "step": 22678 + }, + { + "epoch": 0.39188208460049767, + "grad_norm": 1.518263376134504, + "learning_rate": 1.3882927684911306e-05, + "loss": 0.6872, + "step": 22679 + }, + { + "epoch": 0.3918993641139066, + "grad_norm": 0.8967427305681064, + "learning_rate": 1.3882411941600248e-05, + "loss": 0.5841, + "step": 22680 + }, + { + "epoch": 0.39191664362731543, + "grad_norm": 1.9528372193555446, + "learning_rate": 1.3881896186129184e-05, + "loss": 0.4416, + "step": 22681 + }, + { + "epoch": 0.39193392314072434, + "grad_norm": 0.5470217224021168, + "learning_rate": 1.3881380418499725e-05, + "loss": 0.215, + "step": 22682 + }, + { + "epoch": 0.39195120265413325, + "grad_norm": 0.876328293678758, + "learning_rate": 1.3880864638713487e-05, + "loss": 0.4513, + "step": 22683 + }, + { + "epoch": 0.39196848216754215, + "grad_norm": 0.6747853001310635, + "learning_rate": 1.3880348846772088e-05, + "loss": 0.3285, + "step": 22684 + }, + { + "epoch": 0.39198576168095106, + "grad_norm": 2.3076199865171962, + "learning_rate": 1.387983304267714e-05, + "loss": 0.6636, + "step": 22685 + }, + { + "epoch": 0.39200304119435997, + "grad_norm": 1.1711772351572594, + "learning_rate": 1.3879317226430258e-05, + "loss": 0.5528, + "step": 22686 + }, + { + "epoch": 0.3920203207077689, + "grad_norm": 1.697378483765121, + "learning_rate": 1.3878801398033062e-05, + "loss": 0.4214, + "step": 22687 + }, + { + "epoch": 0.3920376002211778, + "grad_norm": 1.1315896015650067, + "learning_rate": 1.3878285557487165e-05, + "loss": 0.3514, + "step": 22688 + }, + { + "epoch": 0.3920548797345867, + "grad_norm": 0.4593474896372095, + "learning_rate": 1.3877769704794184e-05, + "loss": 0.6381, + "step": 22689 + }, + { + "epoch": 0.3920721592479956, + "grad_norm": 1.3554095340479109, + "learning_rate": 1.3877253839955733e-05, + "loss": 0.4808, + "step": 22690 + }, + { + "epoch": 0.39208943876140445, + "grad_norm": 1.1242066278705707, + "learning_rate": 1.3876737962973426e-05, + "loss": 0.5554, + "step": 22691 + }, + { + "epoch": 0.39210671827481336, + "grad_norm": 1.3903764992198422, + "learning_rate": 1.3876222073848882e-05, + "loss": 0.5125, + "step": 22692 + }, + { + "epoch": 0.39212399778822227, + "grad_norm": 0.9620032576950811, + "learning_rate": 1.3875706172583718e-05, + "loss": 0.3308, + "step": 22693 + }, + { + "epoch": 0.3921412773016312, + "grad_norm": 1.0376601099417126, + "learning_rate": 1.3875190259179546e-05, + "loss": 0.5055, + "step": 22694 + }, + { + "epoch": 0.3921585568150401, + "grad_norm": 0.8079812860259163, + "learning_rate": 1.3874674333637984e-05, + "loss": 0.503, + "step": 22695 + }, + { + "epoch": 0.392175836328449, + "grad_norm": 0.7616465580529432, + "learning_rate": 1.3874158395960646e-05, + "loss": 0.5795, + "step": 22696 + }, + { + "epoch": 0.3921931158418579, + "grad_norm": 1.096257888981359, + "learning_rate": 1.3873642446149149e-05, + "loss": 0.2885, + "step": 22697 + }, + { + "epoch": 0.3922103953552668, + "grad_norm": 1.1252051017333728, + "learning_rate": 1.3873126484205113e-05, + "loss": 0.6202, + "step": 22698 + }, + { + "epoch": 0.3922276748686757, + "grad_norm": 1.050577777891239, + "learning_rate": 1.3872610510130154e-05, + "loss": 0.5176, + "step": 22699 + }, + { + "epoch": 0.3922449543820846, + "grad_norm": 0.9907605562818649, + "learning_rate": 1.3872094523925876e-05, + "loss": 0.385, + "step": 22700 + }, + { + "epoch": 0.39226223389549353, + "grad_norm": 1.0719976454911129, + "learning_rate": 1.3871578525593908e-05, + "loss": 0.4268, + "step": 22701 + }, + { + "epoch": 0.3922795134089024, + "grad_norm": 1.092274866269787, + "learning_rate": 1.3871062515135863e-05, + "loss": 0.3261, + "step": 22702 + }, + { + "epoch": 0.3922967929223113, + "grad_norm": 0.6286268306101814, + "learning_rate": 1.3870546492553351e-05, + "loss": 0.5646, + "step": 22703 + }, + { + "epoch": 0.3923140724357202, + "grad_norm": 1.342065837059233, + "learning_rate": 1.3870030457847996e-05, + "loss": 0.4518, + "step": 22704 + }, + { + "epoch": 0.3923313519491291, + "grad_norm": 1.1711051851864127, + "learning_rate": 1.3869514411021414e-05, + "loss": 0.5507, + "step": 22705 + }, + { + "epoch": 0.392348631462538, + "grad_norm": 1.2945731596741146, + "learning_rate": 1.3868998352075213e-05, + "loss": 0.4377, + "step": 22706 + }, + { + "epoch": 0.3923659109759469, + "grad_norm": 1.1312899929899531, + "learning_rate": 1.386848228101102e-05, + "loss": 0.4927, + "step": 22707 + }, + { + "epoch": 0.3923831904893558, + "grad_norm": 1.113366122956419, + "learning_rate": 1.3867966197830444e-05, + "loss": 0.4524, + "step": 22708 + }, + { + "epoch": 0.39240047000276473, + "grad_norm": 1.3526502024775093, + "learning_rate": 1.3867450102535106e-05, + "loss": 0.5547, + "step": 22709 + }, + { + "epoch": 0.39241774951617364, + "grad_norm": 1.461578238665871, + "learning_rate": 1.3866933995126622e-05, + "loss": 0.4207, + "step": 22710 + }, + { + "epoch": 0.39243502902958255, + "grad_norm": 1.214875444606632, + "learning_rate": 1.3866417875606604e-05, + "loss": 0.6776, + "step": 22711 + }, + { + "epoch": 0.39245230854299146, + "grad_norm": 0.43615049380183746, + "learning_rate": 1.3865901743976671e-05, + "loss": 0.46, + "step": 22712 + }, + { + "epoch": 0.3924695880564003, + "grad_norm": 0.5087409230788376, + "learning_rate": 1.3865385600238441e-05, + "loss": 0.7234, + "step": 22713 + }, + { + "epoch": 0.3924868675698092, + "grad_norm": 0.44441355758447687, + "learning_rate": 1.3864869444393527e-05, + "loss": 0.6594, + "step": 22714 + }, + { + "epoch": 0.3925041470832181, + "grad_norm": 1.0860490807918461, + "learning_rate": 1.3864353276443551e-05, + "loss": 0.5321, + "step": 22715 + }, + { + "epoch": 0.39252142659662703, + "grad_norm": 0.8710825106387088, + "learning_rate": 1.3863837096390127e-05, + "loss": 0.5694, + "step": 22716 + }, + { + "epoch": 0.39253870611003594, + "grad_norm": 0.9732274106721919, + "learning_rate": 1.3863320904234869e-05, + "loss": 0.311, + "step": 22717 + }, + { + "epoch": 0.39255598562344485, + "grad_norm": 0.47628137349206523, + "learning_rate": 1.38628046999794e-05, + "loss": 0.4597, + "step": 22718 + }, + { + "epoch": 0.39257326513685376, + "grad_norm": 0.7220317902056671, + "learning_rate": 1.3862288483625333e-05, + "loss": 0.3108, + "step": 22719 + }, + { + "epoch": 0.39259054465026266, + "grad_norm": 0.7635416102739138, + "learning_rate": 1.3861772255174281e-05, + "loss": 0.36, + "step": 22720 + }, + { + "epoch": 0.39260782416367157, + "grad_norm": 1.7969067237036564, + "learning_rate": 1.3861256014627868e-05, + "loss": 0.4527, + "step": 22721 + }, + { + "epoch": 0.3926251036770805, + "grad_norm": 1.3167769699019989, + "learning_rate": 1.3860739761987706e-05, + "loss": 0.4683, + "step": 22722 + }, + { + "epoch": 0.39264238319048933, + "grad_norm": 1.2879498187637095, + "learning_rate": 1.3860223497255414e-05, + "loss": 0.615, + "step": 22723 + }, + { + "epoch": 0.39265966270389824, + "grad_norm": 1.002387340753333, + "learning_rate": 1.385970722043261e-05, + "loss": 0.4412, + "step": 22724 + }, + { + "epoch": 0.39267694221730715, + "grad_norm": 1.0984139101285264, + "learning_rate": 1.385919093152091e-05, + "loss": 0.6314, + "step": 22725 + }, + { + "epoch": 0.39269422173071605, + "grad_norm": 0.9531441274477677, + "learning_rate": 1.3858674630521926e-05, + "loss": 0.5773, + "step": 22726 + }, + { + "epoch": 0.39271150124412496, + "grad_norm": 1.1722579848552703, + "learning_rate": 1.3858158317437285e-05, + "loss": 0.4703, + "step": 22727 + }, + { + "epoch": 0.39272878075753387, + "grad_norm": 1.242180529904985, + "learning_rate": 1.3857641992268597e-05, + "loss": 0.3451, + "step": 22728 + }, + { + "epoch": 0.3927460602709428, + "grad_norm": 1.6375964234788585, + "learning_rate": 1.385712565501748e-05, + "loss": 0.4306, + "step": 22729 + }, + { + "epoch": 0.3927633397843517, + "grad_norm": 1.4401646744414724, + "learning_rate": 1.3856609305685558e-05, + "loss": 0.601, + "step": 22730 + }, + { + "epoch": 0.3927806192977606, + "grad_norm": 0.8160820255674949, + "learning_rate": 1.3856092944274434e-05, + "loss": 0.3766, + "step": 22731 + }, + { + "epoch": 0.3927978988111695, + "grad_norm": 0.9079575230481131, + "learning_rate": 1.3855576570785738e-05, + "loss": 0.6134, + "step": 22732 + }, + { + "epoch": 0.3928151783245784, + "grad_norm": 0.5453566466816882, + "learning_rate": 1.3855060185221085e-05, + "loss": 0.6056, + "step": 22733 + }, + { + "epoch": 0.39283245783798726, + "grad_norm": 0.6332276666638809, + "learning_rate": 1.3854543787582087e-05, + "loss": 0.4599, + "step": 22734 + }, + { + "epoch": 0.39284973735139617, + "grad_norm": 1.6400956631859425, + "learning_rate": 1.3854027377870367e-05, + "loss": 0.3958, + "step": 22735 + }, + { + "epoch": 0.3928670168648051, + "grad_norm": 1.4231925263398792, + "learning_rate": 1.385351095608754e-05, + "loss": 0.5123, + "step": 22736 + }, + { + "epoch": 0.392884296378214, + "grad_norm": 0.902230464342622, + "learning_rate": 1.3852994522235224e-05, + "loss": 0.4339, + "step": 22737 + }, + { + "epoch": 0.3929015758916229, + "grad_norm": 1.0497583700613689, + "learning_rate": 1.3852478076315037e-05, + "loss": 0.6238, + "step": 22738 + }, + { + "epoch": 0.3929188554050318, + "grad_norm": 1.4937513894750531, + "learning_rate": 1.3851961618328594e-05, + "loss": 0.3401, + "step": 22739 + }, + { + "epoch": 0.3929361349184407, + "grad_norm": 1.4806668799581137, + "learning_rate": 1.3851445148277516e-05, + "loss": 0.3787, + "step": 22740 + }, + { + "epoch": 0.3929534144318496, + "grad_norm": 0.8682964497710078, + "learning_rate": 1.3850928666163418e-05, + "loss": 0.4785, + "step": 22741 + }, + { + "epoch": 0.3929706939452585, + "grad_norm": 1.1070380115057257, + "learning_rate": 1.3850412171987922e-05, + "loss": 0.5477, + "step": 22742 + }, + { + "epoch": 0.39298797345866743, + "grad_norm": 1.7288368658509259, + "learning_rate": 1.3849895665752637e-05, + "loss": 0.5625, + "step": 22743 + }, + { + "epoch": 0.3930052529720763, + "grad_norm": 1.0901760790395318, + "learning_rate": 1.384937914745919e-05, + "loss": 0.4331, + "step": 22744 + }, + { + "epoch": 0.3930225324854852, + "grad_norm": 1.6169795277471322, + "learning_rate": 1.3848862617109196e-05, + "loss": 0.5098, + "step": 22745 + }, + { + "epoch": 0.3930398119988941, + "grad_norm": 1.3657244715460553, + "learning_rate": 1.3848346074704266e-05, + "loss": 0.5283, + "step": 22746 + }, + { + "epoch": 0.393057091512303, + "grad_norm": 1.9887004837401046, + "learning_rate": 1.3847829520246028e-05, + "loss": 0.5751, + "step": 22747 + }, + { + "epoch": 0.3930743710257119, + "grad_norm": 1.098379403944354, + "learning_rate": 1.3847312953736096e-05, + "loss": 0.408, + "step": 22748 + }, + { + "epoch": 0.3930916505391208, + "grad_norm": 0.9399998019958966, + "learning_rate": 1.3846796375176083e-05, + "loss": 0.3771, + "step": 22749 + }, + { + "epoch": 0.3931089300525297, + "grad_norm": 1.387758585986431, + "learning_rate": 1.3846279784567617e-05, + "loss": 0.7535, + "step": 22750 + }, + { + "epoch": 0.39312620956593863, + "grad_norm": 0.4835905377901172, + "learning_rate": 1.3845763181912308e-05, + "loss": 0.7507, + "step": 22751 + }, + { + "epoch": 0.39314348907934754, + "grad_norm": 1.0523968073139796, + "learning_rate": 1.3845246567211778e-05, + "loss": 0.4106, + "step": 22752 + }, + { + "epoch": 0.39316076859275645, + "grad_norm": 0.9851235217809567, + "learning_rate": 1.3844729940467643e-05, + "loss": 0.4333, + "step": 22753 + }, + { + "epoch": 0.39317804810616536, + "grad_norm": 1.2216613339812774, + "learning_rate": 1.3844213301681519e-05, + "loss": 0.5475, + "step": 22754 + }, + { + "epoch": 0.3931953276195742, + "grad_norm": 1.1377290407275074, + "learning_rate": 1.384369665085503e-05, + "loss": 0.7248, + "step": 22755 + }, + { + "epoch": 0.3932126071329831, + "grad_norm": 0.5652403781066236, + "learning_rate": 1.384317998798979e-05, + "loss": 0.4044, + "step": 22756 + }, + { + "epoch": 0.393229886646392, + "grad_norm": 1.2570507762320249, + "learning_rate": 1.3842663313087414e-05, + "loss": 0.3867, + "step": 22757 + }, + { + "epoch": 0.39324716615980093, + "grad_norm": 0.9777547246097121, + "learning_rate": 1.3842146626149531e-05, + "loss": 0.4402, + "step": 22758 + }, + { + "epoch": 0.39326444567320984, + "grad_norm": 1.216820633785623, + "learning_rate": 1.3841629927177749e-05, + "loss": 0.5342, + "step": 22759 + }, + { + "epoch": 0.39328172518661875, + "grad_norm": 1.2412952594504845, + "learning_rate": 1.384111321617369e-05, + "loss": 0.3599, + "step": 22760 + }, + { + "epoch": 0.39329900470002765, + "grad_norm": 1.269596253348751, + "learning_rate": 1.3840596493138975e-05, + "loss": 0.4554, + "step": 22761 + }, + { + "epoch": 0.39331628421343656, + "grad_norm": 1.4080399985208305, + "learning_rate": 1.384007975807522e-05, + "loss": 0.3753, + "step": 22762 + }, + { + "epoch": 0.39333356372684547, + "grad_norm": 1.3829079805644575, + "learning_rate": 1.3839563010984041e-05, + "loss": 0.5713, + "step": 22763 + }, + { + "epoch": 0.3933508432402544, + "grad_norm": 0.885236402128147, + "learning_rate": 1.383904625186706e-05, + "loss": 0.7212, + "step": 22764 + }, + { + "epoch": 0.39336812275366323, + "grad_norm": 0.9639607710796664, + "learning_rate": 1.3838529480725894e-05, + "loss": 0.609, + "step": 22765 + }, + { + "epoch": 0.39338540226707214, + "grad_norm": 1.1742009684192058, + "learning_rate": 1.3838012697562162e-05, + "loss": 0.3367, + "step": 22766 + }, + { + "epoch": 0.39340268178048104, + "grad_norm": 1.5268425149744136, + "learning_rate": 1.3837495902377483e-05, + "loss": 0.7358, + "step": 22767 + }, + { + "epoch": 0.39341996129388995, + "grad_norm": 0.8842936194274585, + "learning_rate": 1.3836979095173475e-05, + "loss": 0.3241, + "step": 22768 + }, + { + "epoch": 0.39343724080729886, + "grad_norm": 0.7919265469882578, + "learning_rate": 1.3836462275951753e-05, + "loss": 0.5336, + "step": 22769 + }, + { + "epoch": 0.39345452032070777, + "grad_norm": 1.2458856549898303, + "learning_rate": 1.3835945444713945e-05, + "loss": 0.4619, + "step": 22770 + }, + { + "epoch": 0.3934717998341167, + "grad_norm": 0.7087078282643465, + "learning_rate": 1.3835428601461663e-05, + "loss": 0.3542, + "step": 22771 + }, + { + "epoch": 0.3934890793475256, + "grad_norm": 0.9797354708879752, + "learning_rate": 1.3834911746196528e-05, + "loss": 0.4354, + "step": 22772 + }, + { + "epoch": 0.3935063588609345, + "grad_norm": 1.08361482560691, + "learning_rate": 1.3834394878920156e-05, + "loss": 0.7131, + "step": 22773 + }, + { + "epoch": 0.3935236383743434, + "grad_norm": 0.8388062701644253, + "learning_rate": 1.383387799963417e-05, + "loss": 0.3735, + "step": 22774 + }, + { + "epoch": 0.3935409178877523, + "grad_norm": 1.3938509760016602, + "learning_rate": 1.3833361108340185e-05, + "loss": 0.5517, + "step": 22775 + }, + { + "epoch": 0.39355819740116116, + "grad_norm": 1.5006374904156483, + "learning_rate": 1.3832844205039822e-05, + "loss": 0.3769, + "step": 22776 + }, + { + "epoch": 0.39357547691457007, + "grad_norm": 0.9368339922047446, + "learning_rate": 1.3832327289734698e-05, + "loss": 0.4055, + "step": 22777 + }, + { + "epoch": 0.393592756427979, + "grad_norm": 1.5475920621659347, + "learning_rate": 1.3831810362426434e-05, + "loss": 0.2857, + "step": 22778 + }, + { + "epoch": 0.3936100359413879, + "grad_norm": 1.487004342347105, + "learning_rate": 1.3831293423116652e-05, + "loss": 0.3184, + "step": 22779 + }, + { + "epoch": 0.3936273154547968, + "grad_norm": 1.4723302918531, + "learning_rate": 1.3830776471806963e-05, + "loss": 0.5595, + "step": 22780 + }, + { + "epoch": 0.3936445949682057, + "grad_norm": 1.1138388976721256, + "learning_rate": 1.3830259508498996e-05, + "loss": 0.5373, + "step": 22781 + }, + { + "epoch": 0.3936618744816146, + "grad_norm": 0.9381586467818416, + "learning_rate": 1.3829742533194366e-05, + "loss": 0.566, + "step": 22782 + }, + { + "epoch": 0.3936791539950235, + "grad_norm": 5.380962487578818, + "learning_rate": 1.3829225545894684e-05, + "loss": 0.4035, + "step": 22783 + }, + { + "epoch": 0.3936964335084324, + "grad_norm": 0.809885010833184, + "learning_rate": 1.3828708546601583e-05, + "loss": 0.3772, + "step": 22784 + }, + { + "epoch": 0.3937137130218413, + "grad_norm": 0.946108009212544, + "learning_rate": 1.3828191535316675e-05, + "loss": 0.5727, + "step": 22785 + }, + { + "epoch": 0.39373099253525023, + "grad_norm": 1.4291566896136934, + "learning_rate": 1.3827674512041576e-05, + "loss": 0.4428, + "step": 22786 + }, + { + "epoch": 0.3937482720486591, + "grad_norm": 0.7798567323189204, + "learning_rate": 1.3827157476777915e-05, + "loss": 0.5364, + "step": 22787 + }, + { + "epoch": 0.393765551562068, + "grad_norm": 1.172561080215126, + "learning_rate": 1.3826640429527302e-05, + "loss": 0.4444, + "step": 22788 + }, + { + "epoch": 0.3937828310754769, + "grad_norm": 0.8657150022845037, + "learning_rate": 1.3826123370291362e-05, + "loss": 0.5201, + "step": 22789 + }, + { + "epoch": 0.3938001105888858, + "grad_norm": 1.0544193439923872, + "learning_rate": 1.3825606299071713e-05, + "loss": 0.335, + "step": 22790 + }, + { + "epoch": 0.3938173901022947, + "grad_norm": 0.5997215278711325, + "learning_rate": 1.3825089215869973e-05, + "loss": 0.7401, + "step": 22791 + }, + { + "epoch": 0.3938346696157036, + "grad_norm": 0.517601410966759, + "learning_rate": 1.3824572120687765e-05, + "loss": 0.7817, + "step": 22792 + }, + { + "epoch": 0.39385194912911253, + "grad_norm": 1.1981143803672534, + "learning_rate": 1.3824055013526705e-05, + "loss": 0.4976, + "step": 22793 + }, + { + "epoch": 0.39386922864252144, + "grad_norm": 1.2492164958671708, + "learning_rate": 1.3823537894388413e-05, + "loss": 0.4501, + "step": 22794 + }, + { + "epoch": 0.39388650815593035, + "grad_norm": 1.064049174345373, + "learning_rate": 1.3823020763274512e-05, + "loss": 0.6865, + "step": 22795 + }, + { + "epoch": 0.39390378766933926, + "grad_norm": 1.1653722829744626, + "learning_rate": 1.3822503620186618e-05, + "loss": 0.6348, + "step": 22796 + }, + { + "epoch": 0.3939210671827481, + "grad_norm": 0.9131763319867727, + "learning_rate": 1.3821986465126351e-05, + "loss": 0.4687, + "step": 22797 + }, + { + "epoch": 0.393938346696157, + "grad_norm": 0.9320684809200506, + "learning_rate": 1.3821469298095336e-05, + "loss": 0.6357, + "step": 22798 + }, + { + "epoch": 0.3939556262095659, + "grad_norm": 0.8863914850848121, + "learning_rate": 1.3820952119095186e-05, + "loss": 0.4032, + "step": 22799 + }, + { + "epoch": 0.39397290572297483, + "grad_norm": 1.1155092453016253, + "learning_rate": 1.3820434928127521e-05, + "loss": 0.4578, + "step": 22800 + }, + { + "epoch": 0.39399018523638374, + "grad_norm": 1.0626647689953208, + "learning_rate": 1.3819917725193967e-05, + "loss": 0.3776, + "step": 22801 + }, + { + "epoch": 0.39400746474979265, + "grad_norm": 0.47964428674732285, + "learning_rate": 1.3819400510296144e-05, + "loss": 0.828, + "step": 22802 + }, + { + "epoch": 0.39402474426320155, + "grad_norm": 0.9789813503473652, + "learning_rate": 1.3818883283435662e-05, + "loss": 0.4911, + "step": 22803 + }, + { + "epoch": 0.39404202377661046, + "grad_norm": 2.17181721012966, + "learning_rate": 1.381836604461415e-05, + "loss": 0.549, + "step": 22804 + }, + { + "epoch": 0.39405930329001937, + "grad_norm": 0.8524016870728783, + "learning_rate": 1.3817848793833227e-05, + "loss": 0.3949, + "step": 22805 + }, + { + "epoch": 0.3940765828034283, + "grad_norm": 1.0537754654462506, + "learning_rate": 1.3817331531094507e-05, + "loss": 0.4835, + "step": 22806 + }, + { + "epoch": 0.3940938623168372, + "grad_norm": 0.6122550943191146, + "learning_rate": 1.3816814256399615e-05, + "loss": 0.3424, + "step": 22807 + }, + { + "epoch": 0.39411114183024604, + "grad_norm": 0.7800315392352303, + "learning_rate": 1.3816296969750175e-05, + "loss": 0.283, + "step": 22808 + }, + { + "epoch": 0.39412842134365494, + "grad_norm": 0.9189707788653305, + "learning_rate": 1.3815779671147797e-05, + "loss": 0.3846, + "step": 22809 + }, + { + "epoch": 0.39414570085706385, + "grad_norm": 0.8122859475315084, + "learning_rate": 1.3815262360594114e-05, + "loss": 0.3347, + "step": 22810 + }, + { + "epoch": 0.39416298037047276, + "grad_norm": 0.9326420420403841, + "learning_rate": 1.3814745038090736e-05, + "loss": 0.3986, + "step": 22811 + }, + { + "epoch": 0.39418025988388167, + "grad_norm": 2.12264204005764, + "learning_rate": 1.3814227703639285e-05, + "loss": 0.4037, + "step": 22812 + }, + { + "epoch": 0.3941975393972906, + "grad_norm": 0.44811128323147525, + "learning_rate": 1.3813710357241386e-05, + "loss": 0.724, + "step": 22813 + }, + { + "epoch": 0.3942148189106995, + "grad_norm": 0.7602966327231915, + "learning_rate": 1.3813192998898653e-05, + "loss": 0.4485, + "step": 22814 + }, + { + "epoch": 0.3942320984241084, + "grad_norm": 0.5910162804725967, + "learning_rate": 1.381267562861271e-05, + "loss": 0.3841, + "step": 22815 + }, + { + "epoch": 0.3942493779375173, + "grad_norm": 1.4802580551898932, + "learning_rate": 1.381215824638518e-05, + "loss": 0.5779, + "step": 22816 + }, + { + "epoch": 0.3942666574509262, + "grad_norm": 1.0185261632516704, + "learning_rate": 1.3811640852217678e-05, + "loss": 0.3435, + "step": 22817 + }, + { + "epoch": 0.39428393696433506, + "grad_norm": 1.620376130255332, + "learning_rate": 1.381112344611183e-05, + "loss": 0.6109, + "step": 22818 + }, + { + "epoch": 0.39430121647774397, + "grad_norm": 1.090043166189406, + "learning_rate": 1.3810606028069252e-05, + "loss": 0.2955, + "step": 22819 + }, + { + "epoch": 0.3943184959911529, + "grad_norm": 0.677888029299188, + "learning_rate": 1.3810088598091567e-05, + "loss": 0.3581, + "step": 22820 + }, + { + "epoch": 0.3943357755045618, + "grad_norm": 0.7063208188985094, + "learning_rate": 1.3809571156180392e-05, + "loss": 0.4042, + "step": 22821 + }, + { + "epoch": 0.3943530550179707, + "grad_norm": 1.8254610309468429, + "learning_rate": 1.3809053702337354e-05, + "loss": 0.5714, + "step": 22822 + }, + { + "epoch": 0.3943703345313796, + "grad_norm": 1.1669380845855821, + "learning_rate": 1.380853623656407e-05, + "loss": 0.4265, + "step": 22823 + }, + { + "epoch": 0.3943876140447885, + "grad_norm": 1.2304278875346026, + "learning_rate": 1.380801875886216e-05, + "loss": 0.6415, + "step": 22824 + }, + { + "epoch": 0.3944048935581974, + "grad_norm": 0.5453276283411506, + "learning_rate": 1.3807501269233248e-05, + "loss": 0.282, + "step": 22825 + }, + { + "epoch": 0.3944221730716063, + "grad_norm": 0.7234788605714391, + "learning_rate": 1.380698376767895e-05, + "loss": 0.422, + "step": 22826 + }, + { + "epoch": 0.3944394525850152, + "grad_norm": 0.9055261173606104, + "learning_rate": 1.380646625420089e-05, + "loss": 0.5223, + "step": 22827 + }, + { + "epoch": 0.39445673209842413, + "grad_norm": 1.1623093198546925, + "learning_rate": 1.380594872880069e-05, + "loss": 0.4125, + "step": 22828 + }, + { + "epoch": 0.394474011611833, + "grad_norm": 1.402108128890378, + "learning_rate": 1.3805431191479966e-05, + "loss": 0.382, + "step": 22829 + }, + { + "epoch": 0.3944912911252419, + "grad_norm": 1.0864886248626577, + "learning_rate": 1.3804913642240345e-05, + "loss": 0.5309, + "step": 22830 + }, + { + "epoch": 0.3945085706386508, + "grad_norm": 0.8389478765311271, + "learning_rate": 1.3804396081083447e-05, + "loss": 0.3568, + "step": 22831 + }, + { + "epoch": 0.3945258501520597, + "grad_norm": 1.2089072384914108, + "learning_rate": 1.3803878508010887e-05, + "loss": 0.3341, + "step": 22832 + }, + { + "epoch": 0.3945431296654686, + "grad_norm": 1.6357210797964061, + "learning_rate": 1.3803360923024294e-05, + "loss": 0.3578, + "step": 22833 + }, + { + "epoch": 0.3945604091788775, + "grad_norm": 0.5331931011339468, + "learning_rate": 1.3802843326125283e-05, + "loss": 0.7237, + "step": 22834 + }, + { + "epoch": 0.39457768869228643, + "grad_norm": 1.128435595462419, + "learning_rate": 1.3802325717315479e-05, + "loss": 0.4665, + "step": 22835 + }, + { + "epoch": 0.39459496820569534, + "grad_norm": 1.5420107245793544, + "learning_rate": 1.3801808096596502e-05, + "loss": 0.4485, + "step": 22836 + }, + { + "epoch": 0.39461224771910425, + "grad_norm": 0.9820927166421749, + "learning_rate": 1.3801290463969971e-05, + "loss": 0.6072, + "step": 22837 + }, + { + "epoch": 0.39462952723251316, + "grad_norm": 1.4490441669543754, + "learning_rate": 1.3800772819437514e-05, + "loss": 0.4705, + "step": 22838 + }, + { + "epoch": 0.394646806745922, + "grad_norm": 0.9084965693365326, + "learning_rate": 1.3800255163000744e-05, + "loss": 0.513, + "step": 22839 + }, + { + "epoch": 0.3946640862593309, + "grad_norm": 0.992940314523657, + "learning_rate": 1.3799737494661287e-05, + "loss": 0.3558, + "step": 22840 + }, + { + "epoch": 0.3946813657727398, + "grad_norm": 1.0089046034967941, + "learning_rate": 1.3799219814420761e-05, + "loss": 0.6627, + "step": 22841 + }, + { + "epoch": 0.39469864528614873, + "grad_norm": 0.9353519696227637, + "learning_rate": 1.3798702122280794e-05, + "loss": 0.3462, + "step": 22842 + }, + { + "epoch": 0.39471592479955764, + "grad_norm": 0.576159003292096, + "learning_rate": 1.3798184418243001e-05, + "loss": 0.4086, + "step": 22843 + }, + { + "epoch": 0.39473320431296655, + "grad_norm": 1.0840175712091318, + "learning_rate": 1.3797666702309007e-05, + "loss": 0.6178, + "step": 22844 + }, + { + "epoch": 0.39475048382637545, + "grad_norm": 1.0295965301144312, + "learning_rate": 1.3797148974480432e-05, + "loss": 0.3415, + "step": 22845 + }, + { + "epoch": 0.39476776333978436, + "grad_norm": 1.0056198572015889, + "learning_rate": 1.3796631234758897e-05, + "loss": 0.4897, + "step": 22846 + }, + { + "epoch": 0.39478504285319327, + "grad_norm": 0.7266484025693964, + "learning_rate": 1.3796113483146025e-05, + "loss": 0.6169, + "step": 22847 + }, + { + "epoch": 0.3948023223666022, + "grad_norm": 1.253509647840362, + "learning_rate": 1.3795595719643437e-05, + "loss": 0.4326, + "step": 22848 + }, + { + "epoch": 0.3948196018800111, + "grad_norm": 2.081278112095259, + "learning_rate": 1.3795077944252753e-05, + "loss": 0.3306, + "step": 22849 + }, + { + "epoch": 0.39483688139341994, + "grad_norm": 1.1858056849274163, + "learning_rate": 1.3794560156975599e-05, + "loss": 0.4256, + "step": 22850 + }, + { + "epoch": 0.39485416090682884, + "grad_norm": 0.4380704767135453, + "learning_rate": 1.3794042357813594e-05, + "loss": 0.4941, + "step": 22851 + }, + { + "epoch": 0.39487144042023775, + "grad_norm": 0.547948489629339, + "learning_rate": 1.3793524546768358e-05, + "loss": 0.6755, + "step": 22852 + }, + { + "epoch": 0.39488871993364666, + "grad_norm": 1.012187903852645, + "learning_rate": 1.3793006723841515e-05, + "loss": 0.5706, + "step": 22853 + }, + { + "epoch": 0.39490599944705557, + "grad_norm": 0.9800295822538329, + "learning_rate": 1.3792488889034688e-05, + "loss": 0.4631, + "step": 22854 + }, + { + "epoch": 0.3949232789604645, + "grad_norm": 0.9133068300016576, + "learning_rate": 1.37919710423495e-05, + "loss": 0.5272, + "step": 22855 + }, + { + "epoch": 0.3949405584738734, + "grad_norm": 1.0731843513609538, + "learning_rate": 1.3791453183787567e-05, + "loss": 0.4267, + "step": 22856 + }, + { + "epoch": 0.3949578379872823, + "grad_norm": 1.0925862742979155, + "learning_rate": 1.3790935313350512e-05, + "loss": 0.4673, + "step": 22857 + }, + { + "epoch": 0.3949751175006912, + "grad_norm": 1.4327139230493466, + "learning_rate": 1.3790417431039963e-05, + "loss": 0.4975, + "step": 22858 + }, + { + "epoch": 0.3949923970141001, + "grad_norm": 1.0895890746793537, + "learning_rate": 1.3789899536857541e-05, + "loss": 0.5485, + "step": 22859 + }, + { + "epoch": 0.395009676527509, + "grad_norm": 0.8685360435479401, + "learning_rate": 1.3789381630804864e-05, + "loss": 0.2584, + "step": 22860 + }, + { + "epoch": 0.39502695604091786, + "grad_norm": 0.9082771180297333, + "learning_rate": 1.3788863712883554e-05, + "loss": 0.4721, + "step": 22861 + }, + { + "epoch": 0.3950442355543268, + "grad_norm": 0.7536629749782424, + "learning_rate": 1.3788345783095234e-05, + "loss": 0.5107, + "step": 22862 + }, + { + "epoch": 0.3950615150677357, + "grad_norm": 1.2310931293033054, + "learning_rate": 1.378782784144153e-05, + "loss": 0.5672, + "step": 22863 + }, + { + "epoch": 0.3950787945811446, + "grad_norm": 1.5509131045738385, + "learning_rate": 1.3787309887924061e-05, + "loss": 0.6031, + "step": 22864 + }, + { + "epoch": 0.3950960740945535, + "grad_norm": 1.2305185437213897, + "learning_rate": 1.3786791922544449e-05, + "loss": 0.4317, + "step": 22865 + }, + { + "epoch": 0.3951133536079624, + "grad_norm": 0.5663438320071895, + "learning_rate": 1.3786273945304314e-05, + "loss": 0.7264, + "step": 22866 + }, + { + "epoch": 0.3951306331213713, + "grad_norm": 0.6720694394326869, + "learning_rate": 1.3785755956205285e-05, + "loss": 0.5573, + "step": 22867 + }, + { + "epoch": 0.3951479126347802, + "grad_norm": 0.6678402012982669, + "learning_rate": 1.378523795524898e-05, + "loss": 0.2693, + "step": 22868 + }, + { + "epoch": 0.3951651921481891, + "grad_norm": 0.898003137238868, + "learning_rate": 1.378471994243702e-05, + "loss": 0.6596, + "step": 22869 + }, + { + "epoch": 0.39518247166159803, + "grad_norm": 1.9253848246471208, + "learning_rate": 1.3784201917771032e-05, + "loss": 0.439, + "step": 22870 + }, + { + "epoch": 0.3951997511750069, + "grad_norm": 0.8568394899905403, + "learning_rate": 1.3783683881252636e-05, + "loss": 0.3243, + "step": 22871 + }, + { + "epoch": 0.3952170306884158, + "grad_norm": 1.2516538847941288, + "learning_rate": 1.3783165832883451e-05, + "loss": 0.6516, + "step": 22872 + }, + { + "epoch": 0.3952343102018247, + "grad_norm": 1.2021396333585055, + "learning_rate": 1.3782647772665106e-05, + "loss": 0.6257, + "step": 22873 + }, + { + "epoch": 0.3952515897152336, + "grad_norm": 0.7580473232909131, + "learning_rate": 1.3782129700599222e-05, + "loss": 0.4001, + "step": 22874 + }, + { + "epoch": 0.3952688692286425, + "grad_norm": 0.4693606698036721, + "learning_rate": 1.3781611616687418e-05, + "loss": 0.5697, + "step": 22875 + }, + { + "epoch": 0.3952861487420514, + "grad_norm": 1.5608912095762877, + "learning_rate": 1.378109352093132e-05, + "loss": 0.5191, + "step": 22876 + }, + { + "epoch": 0.39530342825546033, + "grad_norm": 1.1417362558545459, + "learning_rate": 1.3780575413332549e-05, + "loss": 0.5406, + "step": 22877 + }, + { + "epoch": 0.39532070776886924, + "grad_norm": 1.4878007520658147, + "learning_rate": 1.378005729389273e-05, + "loss": 0.5483, + "step": 22878 + }, + { + "epoch": 0.39533798728227815, + "grad_norm": 1.2684979018648928, + "learning_rate": 1.3779539162613484e-05, + "loss": 0.4697, + "step": 22879 + }, + { + "epoch": 0.39535526679568705, + "grad_norm": 0.9908035976213889, + "learning_rate": 1.3779021019496432e-05, + "loss": 0.4995, + "step": 22880 + }, + { + "epoch": 0.39537254630909596, + "grad_norm": 1.5287682673010745, + "learning_rate": 1.37785028645432e-05, + "loss": 0.4373, + "step": 22881 + }, + { + "epoch": 0.3953898258225048, + "grad_norm": 1.0132574821082805, + "learning_rate": 1.3777984697755412e-05, + "loss": 0.4151, + "step": 22882 + }, + { + "epoch": 0.3954071053359137, + "grad_norm": 1.1823710644136487, + "learning_rate": 1.3777466519134686e-05, + "loss": 0.487, + "step": 22883 + }, + { + "epoch": 0.39542438484932263, + "grad_norm": 1.1571228415544532, + "learning_rate": 1.3776948328682646e-05, + "loss": 0.5492, + "step": 22884 + }, + { + "epoch": 0.39544166436273154, + "grad_norm": 0.7958123027576389, + "learning_rate": 1.3776430126400924e-05, + "loss": 0.6973, + "step": 22885 + }, + { + "epoch": 0.39545894387614045, + "grad_norm": 1.1982342518082978, + "learning_rate": 1.3775911912291128e-05, + "loss": 0.543, + "step": 22886 + }, + { + "epoch": 0.39547622338954935, + "grad_norm": 0.9475853148791553, + "learning_rate": 1.3775393686354895e-05, + "loss": 0.6212, + "step": 22887 + }, + { + "epoch": 0.39549350290295826, + "grad_norm": 1.2120071383198967, + "learning_rate": 1.3774875448593839e-05, + "loss": 0.3226, + "step": 22888 + }, + { + "epoch": 0.39551078241636717, + "grad_norm": 1.0930903152157407, + "learning_rate": 1.3774357199009587e-05, + "loss": 0.712, + "step": 22889 + }, + { + "epoch": 0.3955280619297761, + "grad_norm": 1.1730721408285694, + "learning_rate": 1.377383893760376e-05, + "loss": 0.4919, + "step": 22890 + }, + { + "epoch": 0.395545341443185, + "grad_norm": 1.0281233591104872, + "learning_rate": 1.3773320664377985e-05, + "loss": 0.3137, + "step": 22891 + }, + { + "epoch": 0.39556262095659384, + "grad_norm": 0.9446141419894408, + "learning_rate": 1.377280237933388e-05, + "loss": 0.5719, + "step": 22892 + }, + { + "epoch": 0.39557990047000274, + "grad_norm": 0.8352725688737672, + "learning_rate": 1.3772284082473074e-05, + "loss": 0.2769, + "step": 22893 + }, + { + "epoch": 0.39559717998341165, + "grad_norm": 0.6329427005513589, + "learning_rate": 1.3771765773797186e-05, + "loss": 0.3854, + "step": 22894 + }, + { + "epoch": 0.39561445949682056, + "grad_norm": 1.3782264732505325, + "learning_rate": 1.3771247453307842e-05, + "loss": 0.4201, + "step": 22895 + }, + { + "epoch": 0.39563173901022947, + "grad_norm": 0.6811351506663894, + "learning_rate": 1.3770729121006662e-05, + "loss": 0.4913, + "step": 22896 + }, + { + "epoch": 0.3956490185236384, + "grad_norm": 1.5647792241118836, + "learning_rate": 1.3770210776895275e-05, + "loss": 0.6832, + "step": 22897 + }, + { + "epoch": 0.3956662980370473, + "grad_norm": 0.68052264959311, + "learning_rate": 1.37696924209753e-05, + "loss": 0.4838, + "step": 22898 + }, + { + "epoch": 0.3956835775504562, + "grad_norm": 0.9477826447156912, + "learning_rate": 1.3769174053248363e-05, + "loss": 0.6701, + "step": 22899 + }, + { + "epoch": 0.3957008570638651, + "grad_norm": 0.7813149441766643, + "learning_rate": 1.3768655673716085e-05, + "loss": 0.3912, + "step": 22900 + }, + { + "epoch": 0.395718136577274, + "grad_norm": 0.9119474278156698, + "learning_rate": 1.3768137282380089e-05, + "loss": 0.3107, + "step": 22901 + }, + { + "epoch": 0.3957354160906829, + "grad_norm": 0.8474756463040484, + "learning_rate": 1.3767618879242005e-05, + "loss": 0.3518, + "step": 22902 + }, + { + "epoch": 0.39575269560409176, + "grad_norm": 0.7607197739227585, + "learning_rate": 1.3767100464303449e-05, + "loss": 0.3232, + "step": 22903 + }, + { + "epoch": 0.39576997511750067, + "grad_norm": 1.1667809764973303, + "learning_rate": 1.3766582037566047e-05, + "loss": 0.5941, + "step": 22904 + }, + { + "epoch": 0.3957872546309096, + "grad_norm": 1.0862858951253198, + "learning_rate": 1.376606359903143e-05, + "loss": 0.5976, + "step": 22905 + }, + { + "epoch": 0.3958045341443185, + "grad_norm": 1.0520248684477782, + "learning_rate": 1.376554514870121e-05, + "loss": 0.3612, + "step": 22906 + }, + { + "epoch": 0.3958218136577274, + "grad_norm": 0.8483752923079354, + "learning_rate": 1.3765026686577018e-05, + "loss": 0.6028, + "step": 22907 + }, + { + "epoch": 0.3958390931711363, + "grad_norm": 1.2992223884965124, + "learning_rate": 1.376450821266048e-05, + "loss": 0.6943, + "step": 22908 + }, + { + "epoch": 0.3958563726845452, + "grad_norm": 0.9790510315725434, + "learning_rate": 1.3763989726953209e-05, + "loss": 0.2583, + "step": 22909 + }, + { + "epoch": 0.3958736521979541, + "grad_norm": 1.0707663146491788, + "learning_rate": 1.376347122945684e-05, + "loss": 0.3637, + "step": 22910 + }, + { + "epoch": 0.395890931711363, + "grad_norm": 0.8658248591251417, + "learning_rate": 1.3762952720172993e-05, + "loss": 0.3611, + "step": 22911 + }, + { + "epoch": 0.39590821122477193, + "grad_norm": 1.0412132481624297, + "learning_rate": 1.3762434199103289e-05, + "loss": 0.4463, + "step": 22912 + }, + { + "epoch": 0.3959254907381808, + "grad_norm": 1.0185567464057292, + "learning_rate": 1.3761915666249359e-05, + "loss": 0.4578, + "step": 22913 + }, + { + "epoch": 0.3959427702515897, + "grad_norm": 0.7999475935002038, + "learning_rate": 1.3761397121612823e-05, + "loss": 0.3807, + "step": 22914 + }, + { + "epoch": 0.3959600497649986, + "grad_norm": 1.1281895561414892, + "learning_rate": 1.3760878565195303e-05, + "loss": 0.5104, + "step": 22915 + }, + { + "epoch": 0.3959773292784075, + "grad_norm": 0.4763817397200477, + "learning_rate": 1.3760359996998432e-05, + "loss": 0.6417, + "step": 22916 + }, + { + "epoch": 0.3959946087918164, + "grad_norm": 0.47407339352714395, + "learning_rate": 1.3759841417023821e-05, + "loss": 0.6233, + "step": 22917 + }, + { + "epoch": 0.3960118883052253, + "grad_norm": 0.6068923312818736, + "learning_rate": 1.3759322825273102e-05, + "loss": 0.3217, + "step": 22918 + }, + { + "epoch": 0.39602916781863423, + "grad_norm": 0.8496244816259927, + "learning_rate": 1.3758804221747898e-05, + "loss": 0.5517, + "step": 22919 + }, + { + "epoch": 0.39604644733204314, + "grad_norm": 1.1306691832503342, + "learning_rate": 1.3758285606449838e-05, + "loss": 0.6181, + "step": 22920 + }, + { + "epoch": 0.39606372684545205, + "grad_norm": 0.6476146898917354, + "learning_rate": 1.3757766979380535e-05, + "loss": 0.635, + "step": 22921 + }, + { + "epoch": 0.39608100635886095, + "grad_norm": 0.6373344646519202, + "learning_rate": 1.3757248340541625e-05, + "loss": 0.2482, + "step": 22922 + }, + { + "epoch": 0.39609828587226986, + "grad_norm": 0.8190030202800183, + "learning_rate": 1.3756729689934727e-05, + "loss": 0.5193, + "step": 22923 + }, + { + "epoch": 0.3961155653856787, + "grad_norm": 0.8445563910556828, + "learning_rate": 1.3756211027561463e-05, + "loss": 0.6249, + "step": 22924 + }, + { + "epoch": 0.3961328448990876, + "grad_norm": 0.40642593700711627, + "learning_rate": 1.3755692353423465e-05, + "loss": 0.5332, + "step": 22925 + }, + { + "epoch": 0.39615012441249653, + "grad_norm": 1.0786161203850666, + "learning_rate": 1.3755173667522352e-05, + "loss": 0.383, + "step": 22926 + }, + { + "epoch": 0.39616740392590544, + "grad_norm": 1.4472049097916024, + "learning_rate": 1.3754654969859748e-05, + "loss": 0.6556, + "step": 22927 + }, + { + "epoch": 0.39618468343931434, + "grad_norm": 1.753504514208226, + "learning_rate": 1.375413626043728e-05, + "loss": 0.4368, + "step": 22928 + }, + { + "epoch": 0.39620196295272325, + "grad_norm": 1.1029827539245514, + "learning_rate": 1.3753617539256571e-05, + "loss": 0.5947, + "step": 22929 + }, + { + "epoch": 0.39621924246613216, + "grad_norm": 0.7978822819133453, + "learning_rate": 1.375309880631925e-05, + "loss": 0.4627, + "step": 22930 + }, + { + "epoch": 0.39623652197954107, + "grad_norm": 1.262426647115818, + "learning_rate": 1.3752580061626937e-05, + "loss": 0.5149, + "step": 22931 + }, + { + "epoch": 0.39625380149295, + "grad_norm": 0.9364792891045415, + "learning_rate": 1.3752061305181255e-05, + "loss": 0.5668, + "step": 22932 + }, + { + "epoch": 0.3962710810063589, + "grad_norm": 0.9112958377367298, + "learning_rate": 1.3751542536983834e-05, + "loss": 0.5398, + "step": 22933 + }, + { + "epoch": 0.3962883605197678, + "grad_norm": 0.9518072300856218, + "learning_rate": 1.3751023757036297e-05, + "loss": 0.5329, + "step": 22934 + }, + { + "epoch": 0.39630564003317664, + "grad_norm": 0.804027330686188, + "learning_rate": 1.3750504965340266e-05, + "loss": 0.51, + "step": 22935 + }, + { + "epoch": 0.39632291954658555, + "grad_norm": 1.1781975046247997, + "learning_rate": 1.3749986161897374e-05, + "loss": 0.4963, + "step": 22936 + }, + { + "epoch": 0.39634019905999446, + "grad_norm": 1.0054117479204234, + "learning_rate": 1.3749467346709233e-05, + "loss": 0.3925, + "step": 22937 + }, + { + "epoch": 0.39635747857340337, + "grad_norm": 1.123183434970916, + "learning_rate": 1.3748948519777481e-05, + "loss": 0.4564, + "step": 22938 + }, + { + "epoch": 0.3963747580868123, + "grad_norm": 1.04120459839645, + "learning_rate": 1.3748429681103736e-05, + "loss": 0.4581, + "step": 22939 + }, + { + "epoch": 0.3963920376002212, + "grad_norm": 1.7434912719208522, + "learning_rate": 1.3747910830689624e-05, + "loss": 0.4448, + "step": 22940 + }, + { + "epoch": 0.3964093171136301, + "grad_norm": 1.0585770012211106, + "learning_rate": 1.374739196853677e-05, + "loss": 0.5335, + "step": 22941 + }, + { + "epoch": 0.396426596627039, + "grad_norm": 1.2581563580652442, + "learning_rate": 1.3746873094646799e-05, + "loss": 0.6317, + "step": 22942 + }, + { + "epoch": 0.3964438761404479, + "grad_norm": 1.197262014870825, + "learning_rate": 1.3746354209021338e-05, + "loss": 0.4657, + "step": 22943 + }, + { + "epoch": 0.3964611556538568, + "grad_norm": 0.4373815152035554, + "learning_rate": 1.3745835311662011e-05, + "loss": 0.6774, + "step": 22944 + }, + { + "epoch": 0.39647843516726566, + "grad_norm": 1.1329033328638942, + "learning_rate": 1.374531640257044e-05, + "loss": 0.5379, + "step": 22945 + }, + { + "epoch": 0.39649571468067457, + "grad_norm": 0.8717981180014038, + "learning_rate": 1.3744797481748258e-05, + "loss": 0.2609, + "step": 22946 + }, + { + "epoch": 0.3965129941940835, + "grad_norm": 0.7651327534797886, + "learning_rate": 1.3744278549197082e-05, + "loss": 0.3818, + "step": 22947 + }, + { + "epoch": 0.3965302737074924, + "grad_norm": 0.3959425109777651, + "learning_rate": 1.3743759604918545e-05, + "loss": 0.4667, + "step": 22948 + }, + { + "epoch": 0.3965475532209013, + "grad_norm": 1.4222979652575665, + "learning_rate": 1.3743240648914267e-05, + "loss": 0.6386, + "step": 22949 + }, + { + "epoch": 0.3965648327343102, + "grad_norm": 1.2780552765067021, + "learning_rate": 1.3742721681185874e-05, + "loss": 0.4537, + "step": 22950 + }, + { + "epoch": 0.3965821122477191, + "grad_norm": 0.5997343593931797, + "learning_rate": 1.3742202701734993e-05, + "loss": 0.5686, + "step": 22951 + }, + { + "epoch": 0.396599391761128, + "grad_norm": 1.0266323893307079, + "learning_rate": 1.3741683710563245e-05, + "loss": 0.4306, + "step": 22952 + }, + { + "epoch": 0.3966166712745369, + "grad_norm": 1.0525472939664071, + "learning_rate": 1.3741164707672263e-05, + "loss": 0.597, + "step": 22953 + }, + { + "epoch": 0.39663395078794583, + "grad_norm": 1.4284081397572392, + "learning_rate": 1.3740645693063667e-05, + "loss": 0.3763, + "step": 22954 + }, + { + "epoch": 0.39665123030135474, + "grad_norm": 0.8456762630434448, + "learning_rate": 1.3740126666739086e-05, + "loss": 0.4125, + "step": 22955 + }, + { + "epoch": 0.3966685098147636, + "grad_norm": 0.7485315907379578, + "learning_rate": 1.3739607628700143e-05, + "loss": 0.5773, + "step": 22956 + }, + { + "epoch": 0.3966857893281725, + "grad_norm": 1.5906241006114756, + "learning_rate": 1.3739088578948467e-05, + "loss": 0.6457, + "step": 22957 + }, + { + "epoch": 0.3967030688415814, + "grad_norm": 1.2027896667587024, + "learning_rate": 1.373856951748568e-05, + "loss": 0.4386, + "step": 22958 + }, + { + "epoch": 0.3967203483549903, + "grad_norm": 1.0995480285185029, + "learning_rate": 1.3738050444313409e-05, + "loss": 0.6129, + "step": 22959 + }, + { + "epoch": 0.3967376278683992, + "grad_norm": 1.0613887835845586, + "learning_rate": 1.3737531359433281e-05, + "loss": 0.364, + "step": 22960 + }, + { + "epoch": 0.39675490738180813, + "grad_norm": 0.9612656545171975, + "learning_rate": 1.3737012262846916e-05, + "loss": 0.3817, + "step": 22961 + }, + { + "epoch": 0.39677218689521704, + "grad_norm": 1.83950508655102, + "learning_rate": 1.373649315455595e-05, + "loss": 0.4581, + "step": 22962 + }, + { + "epoch": 0.39678946640862595, + "grad_norm": 0.7750595657995734, + "learning_rate": 1.3735974034562001e-05, + "loss": 0.5753, + "step": 22963 + }, + { + "epoch": 0.39680674592203485, + "grad_norm": 1.47646293122425, + "learning_rate": 1.3735454902866696e-05, + "loss": 0.5053, + "step": 22964 + }, + { + "epoch": 0.39682402543544376, + "grad_norm": 1.1099488621805964, + "learning_rate": 1.3734935759471664e-05, + "loss": 0.4296, + "step": 22965 + }, + { + "epoch": 0.3968413049488526, + "grad_norm": 1.1050958399835054, + "learning_rate": 1.373441660437853e-05, + "loss": 0.4027, + "step": 22966 + }, + { + "epoch": 0.3968585844622615, + "grad_norm": 1.2116804659797047, + "learning_rate": 1.373389743758892e-05, + "loss": 0.3756, + "step": 22967 + }, + { + "epoch": 0.39687586397567043, + "grad_norm": 0.7545092692262201, + "learning_rate": 1.373337825910446e-05, + "loss": 0.4651, + "step": 22968 + }, + { + "epoch": 0.39689314348907934, + "grad_norm": 0.6806169640771613, + "learning_rate": 1.3732859068926772e-05, + "loss": 0.4373, + "step": 22969 + }, + { + "epoch": 0.39691042300248824, + "grad_norm": 0.8472528043847789, + "learning_rate": 1.3732339867057488e-05, + "loss": 0.5458, + "step": 22970 + }, + { + "epoch": 0.39692770251589715, + "grad_norm": 0.8335983335575148, + "learning_rate": 1.3731820653498234e-05, + "loss": 0.4887, + "step": 22971 + }, + { + "epoch": 0.39694498202930606, + "grad_norm": 1.224698927147317, + "learning_rate": 1.3731301428250627e-05, + "loss": 0.4508, + "step": 22972 + }, + { + "epoch": 0.39696226154271497, + "grad_norm": 0.46183530808507123, + "learning_rate": 1.3730782191316308e-05, + "loss": 0.6479, + "step": 22973 + }, + { + "epoch": 0.3969795410561239, + "grad_norm": 0.8889575638770999, + "learning_rate": 1.3730262942696893e-05, + "loss": 0.7769, + "step": 22974 + }, + { + "epoch": 0.3969968205695328, + "grad_norm": 1.04982002814495, + "learning_rate": 1.372974368239401e-05, + "loss": 0.5513, + "step": 22975 + }, + { + "epoch": 0.3970141000829417, + "grad_norm": 0.950211685703203, + "learning_rate": 1.3729224410409287e-05, + "loss": 0.2841, + "step": 22976 + }, + { + "epoch": 0.39703137959635054, + "grad_norm": 0.9227885109708679, + "learning_rate": 1.3728705126744352e-05, + "loss": 0.3564, + "step": 22977 + }, + { + "epoch": 0.39704865910975945, + "grad_norm": 0.9318014152731753, + "learning_rate": 1.3728185831400825e-05, + "loss": 0.4676, + "step": 22978 + }, + { + "epoch": 0.39706593862316836, + "grad_norm": 0.9571939214981368, + "learning_rate": 1.3727666524380338e-05, + "loss": 0.2645, + "step": 22979 + }, + { + "epoch": 0.39708321813657727, + "grad_norm": 1.6922833848975882, + "learning_rate": 1.3727147205684519e-05, + "loss": 0.613, + "step": 22980 + }, + { + "epoch": 0.3971004976499862, + "grad_norm": 0.7528356327258728, + "learning_rate": 1.3726627875314988e-05, + "loss": 0.4778, + "step": 22981 + }, + { + "epoch": 0.3971177771633951, + "grad_norm": 1.080268841546119, + "learning_rate": 1.3726108533273377e-05, + "loss": 0.2443, + "step": 22982 + }, + { + "epoch": 0.397135056676804, + "grad_norm": 1.055913585394239, + "learning_rate": 1.372558917956131e-05, + "loss": 0.4764, + "step": 22983 + }, + { + "epoch": 0.3971523361902129, + "grad_norm": 0.8791240160716344, + "learning_rate": 1.3725069814180414e-05, + "loss": 0.4679, + "step": 22984 + }, + { + "epoch": 0.3971696157036218, + "grad_norm": 0.47440676337197585, + "learning_rate": 1.3724550437132319e-05, + "loss": 0.6321, + "step": 22985 + }, + { + "epoch": 0.3971868952170307, + "grad_norm": 0.9045860368599, + "learning_rate": 1.3724031048418647e-05, + "loss": 0.5685, + "step": 22986 + }, + { + "epoch": 0.39720417473043956, + "grad_norm": 0.9117278181271644, + "learning_rate": 1.3723511648041025e-05, + "loss": 0.3175, + "step": 22987 + }, + { + "epoch": 0.39722145424384847, + "grad_norm": 0.7895089579249128, + "learning_rate": 1.3722992236001088e-05, + "loss": 0.3659, + "step": 22988 + }, + { + "epoch": 0.3972387337572574, + "grad_norm": 0.8321392353904575, + "learning_rate": 1.3722472812300449e-05, + "loss": 0.385, + "step": 22989 + }, + { + "epoch": 0.3972560132706663, + "grad_norm": 0.8460230183839672, + "learning_rate": 1.3721953376940746e-05, + "loss": 0.5251, + "step": 22990 + }, + { + "epoch": 0.3972732927840752, + "grad_norm": 0.7460589663602525, + "learning_rate": 1.3721433929923602e-05, + "loss": 0.3667, + "step": 22991 + }, + { + "epoch": 0.3972905722974841, + "grad_norm": 0.9154485039690682, + "learning_rate": 1.3720914471250644e-05, + "loss": 0.4924, + "step": 22992 + }, + { + "epoch": 0.397307851810893, + "grad_norm": 0.9298108081755907, + "learning_rate": 1.37203950009235e-05, + "loss": 0.4257, + "step": 22993 + }, + { + "epoch": 0.3973251313243019, + "grad_norm": 0.8849327297109303, + "learning_rate": 1.3719875518943792e-05, + "loss": 0.4814, + "step": 22994 + }, + { + "epoch": 0.3973424108377108, + "grad_norm": 0.9983111515777146, + "learning_rate": 1.3719356025313154e-05, + "loss": 0.4755, + "step": 22995 + }, + { + "epoch": 0.39735969035111973, + "grad_norm": 0.8497918309072594, + "learning_rate": 1.371883652003321e-05, + "loss": 0.4646, + "step": 22996 + }, + { + "epoch": 0.39737696986452864, + "grad_norm": 0.9957838103332219, + "learning_rate": 1.3718317003105587e-05, + "loss": 0.689, + "step": 22997 + }, + { + "epoch": 0.3973942493779375, + "grad_norm": 1.1044886109409289, + "learning_rate": 1.3717797474531916e-05, + "loss": 0.2539, + "step": 22998 + }, + { + "epoch": 0.3974115288913464, + "grad_norm": 1.2267308213780175, + "learning_rate": 1.3717277934313818e-05, + "loss": 0.5088, + "step": 22999 + }, + { + "epoch": 0.3974288084047553, + "grad_norm": 0.8877446652448554, + "learning_rate": 1.3716758382452923e-05, + "loss": 0.4994, + "step": 23000 + }, + { + "epoch": 0.3974460879181642, + "grad_norm": 0.615787126984516, + "learning_rate": 1.3716238818950856e-05, + "loss": 0.5435, + "step": 23001 + }, + { + "epoch": 0.3974633674315731, + "grad_norm": 1.0157446477863916, + "learning_rate": 1.371571924380925e-05, + "loss": 0.4367, + "step": 23002 + }, + { + "epoch": 0.39748064694498203, + "grad_norm": 0.5811318186320852, + "learning_rate": 1.371519965702973e-05, + "loss": 0.2377, + "step": 23003 + }, + { + "epoch": 0.39749792645839094, + "grad_norm": 0.6978265278440882, + "learning_rate": 1.3714680058613916e-05, + "loss": 0.6098, + "step": 23004 + }, + { + "epoch": 0.39751520597179985, + "grad_norm": 0.7575922867703941, + "learning_rate": 1.3714160448563447e-05, + "loss": 0.581, + "step": 23005 + }, + { + "epoch": 0.39753248548520875, + "grad_norm": 1.0936992713581255, + "learning_rate": 1.3713640826879946e-05, + "loss": 0.5239, + "step": 23006 + }, + { + "epoch": 0.39754976499861766, + "grad_norm": 0.8277725870192638, + "learning_rate": 1.3713121193565035e-05, + "loss": 0.478, + "step": 23007 + }, + { + "epoch": 0.39756704451202657, + "grad_norm": 1.6470101655619822, + "learning_rate": 1.3712601548620352e-05, + "loss": 0.4842, + "step": 23008 + }, + { + "epoch": 0.3975843240254354, + "grad_norm": 0.91091097616399, + "learning_rate": 1.3712081892047516e-05, + "loss": 0.5883, + "step": 23009 + }, + { + "epoch": 0.39760160353884433, + "grad_norm": 0.7749151118236721, + "learning_rate": 1.3711562223848155e-05, + "loss": 0.5186, + "step": 23010 + }, + { + "epoch": 0.39761888305225324, + "grad_norm": 0.8206699655521654, + "learning_rate": 1.3711042544023903e-05, + "loss": 0.3836, + "step": 23011 + }, + { + "epoch": 0.39763616256566214, + "grad_norm": 0.8572042140980819, + "learning_rate": 1.371052285257638e-05, + "loss": 0.5674, + "step": 23012 + }, + { + "epoch": 0.39765344207907105, + "grad_norm": 0.8426581551553062, + "learning_rate": 1.371000314950722e-05, + "loss": 0.5243, + "step": 23013 + }, + { + "epoch": 0.39767072159247996, + "grad_norm": 0.869104322966348, + "learning_rate": 1.3709483434818048e-05, + "loss": 0.4094, + "step": 23014 + }, + { + "epoch": 0.39768800110588887, + "grad_norm": 0.44867164599624965, + "learning_rate": 1.370896370851049e-05, + "loss": 0.7836, + "step": 23015 + }, + { + "epoch": 0.3977052806192978, + "grad_norm": 1.2173076675518375, + "learning_rate": 1.3708443970586179e-05, + "loss": 0.6062, + "step": 23016 + }, + { + "epoch": 0.3977225601327067, + "grad_norm": 0.9723880439619923, + "learning_rate": 1.3707924221046738e-05, + "loss": 0.4912, + "step": 23017 + }, + { + "epoch": 0.3977398396461156, + "grad_norm": 0.9796993496568659, + "learning_rate": 1.3707404459893794e-05, + "loss": 0.4201, + "step": 23018 + }, + { + "epoch": 0.39775711915952444, + "grad_norm": 0.910227238500087, + "learning_rate": 1.3706884687128984e-05, + "loss": 0.5051, + "step": 23019 + }, + { + "epoch": 0.39777439867293335, + "grad_norm": 0.8775662980372395, + "learning_rate": 1.3706364902753925e-05, + "loss": 0.4079, + "step": 23020 + }, + { + "epoch": 0.39779167818634226, + "grad_norm": 1.274536407194513, + "learning_rate": 1.3705845106770248e-05, + "loss": 0.3544, + "step": 23021 + }, + { + "epoch": 0.39780895769975116, + "grad_norm": 0.9706735904290178, + "learning_rate": 1.3705325299179587e-05, + "loss": 0.5961, + "step": 23022 + }, + { + "epoch": 0.39782623721316007, + "grad_norm": 0.8668345203868278, + "learning_rate": 1.3704805479983563e-05, + "loss": 0.4028, + "step": 23023 + }, + { + "epoch": 0.397843516726569, + "grad_norm": 1.3595081019976218, + "learning_rate": 1.3704285649183805e-05, + "loss": 0.4825, + "step": 23024 + }, + { + "epoch": 0.3978607962399779, + "grad_norm": 0.6075956418841187, + "learning_rate": 1.3703765806781945e-05, + "loss": 0.4279, + "step": 23025 + }, + { + "epoch": 0.3978780757533868, + "grad_norm": 1.2232686305030824, + "learning_rate": 1.370324595277961e-05, + "loss": 0.3902, + "step": 23026 + }, + { + "epoch": 0.3978953552667957, + "grad_norm": 0.8744689272183409, + "learning_rate": 1.3702726087178425e-05, + "loss": 0.3954, + "step": 23027 + }, + { + "epoch": 0.3979126347802046, + "grad_norm": 0.7727428044644098, + "learning_rate": 1.370220620998002e-05, + "loss": 0.4655, + "step": 23028 + }, + { + "epoch": 0.3979299142936135, + "grad_norm": 1.308803549286243, + "learning_rate": 1.3701686321186025e-05, + "loss": 0.7415, + "step": 23029 + }, + { + "epoch": 0.39794719380702237, + "grad_norm": 1.1211258455264668, + "learning_rate": 1.370116642079807e-05, + "loss": 0.5059, + "step": 23030 + }, + { + "epoch": 0.3979644733204313, + "grad_norm": 1.2825882941616742, + "learning_rate": 1.3700646508817777e-05, + "loss": 0.3968, + "step": 23031 + }, + { + "epoch": 0.3979817528338402, + "grad_norm": 0.6462512933487765, + "learning_rate": 1.3700126585246775e-05, + "loss": 0.5062, + "step": 23032 + }, + { + "epoch": 0.3979990323472491, + "grad_norm": 0.6886020373736604, + "learning_rate": 1.3699606650086701e-05, + "loss": 0.4151, + "step": 23033 + }, + { + "epoch": 0.398016311860658, + "grad_norm": 1.323627157489332, + "learning_rate": 1.3699086703339177e-05, + "loss": 0.6354, + "step": 23034 + }, + { + "epoch": 0.3980335913740669, + "grad_norm": 1.2810774221712165, + "learning_rate": 1.369856674500583e-05, + "loss": 0.5813, + "step": 23035 + }, + { + "epoch": 0.3980508708874758, + "grad_norm": 0.7900819579145232, + "learning_rate": 1.3698046775088292e-05, + "loss": 0.4272, + "step": 23036 + }, + { + "epoch": 0.3980681504008847, + "grad_norm": 0.8991361209573097, + "learning_rate": 1.3697526793588192e-05, + "loss": 0.4584, + "step": 23037 + }, + { + "epoch": 0.39808542991429363, + "grad_norm": 1.1275832829379397, + "learning_rate": 1.3697006800507153e-05, + "loss": 0.4045, + "step": 23038 + }, + { + "epoch": 0.39810270942770254, + "grad_norm": 0.8867888706798632, + "learning_rate": 1.3696486795846812e-05, + "loss": 0.5781, + "step": 23039 + }, + { + "epoch": 0.3981199889411114, + "grad_norm": 0.44695560942352536, + "learning_rate": 1.3695966779608792e-05, + "loss": 0.6458, + "step": 23040 + }, + { + "epoch": 0.3981372684545203, + "grad_norm": 1.2708225958917705, + "learning_rate": 1.3695446751794723e-05, + "loss": 0.4011, + "step": 23041 + }, + { + "epoch": 0.3981545479679292, + "grad_norm": 0.7214365527310569, + "learning_rate": 1.3694926712406233e-05, + "loss": 0.3365, + "step": 23042 + }, + { + "epoch": 0.3981718274813381, + "grad_norm": 0.5560868621537954, + "learning_rate": 1.3694406661444952e-05, + "loss": 0.6972, + "step": 23043 + }, + { + "epoch": 0.398189106994747, + "grad_norm": 1.0916286519693221, + "learning_rate": 1.3693886598912509e-05, + "loss": 0.4569, + "step": 23044 + }, + { + "epoch": 0.39820638650815593, + "grad_norm": 1.0049328724525668, + "learning_rate": 1.3693366524810534e-05, + "loss": 0.3417, + "step": 23045 + }, + { + "epoch": 0.39822366602156484, + "grad_norm": 0.8250376668608568, + "learning_rate": 1.3692846439140652e-05, + "loss": 0.4319, + "step": 23046 + }, + { + "epoch": 0.39824094553497374, + "grad_norm": 0.8190468669092353, + "learning_rate": 1.3692326341904494e-05, + "loss": 0.4941, + "step": 23047 + }, + { + "epoch": 0.39825822504838265, + "grad_norm": 1.0925266585279787, + "learning_rate": 1.369180623310369e-05, + "loss": 0.5202, + "step": 23048 + }, + { + "epoch": 0.39827550456179156, + "grad_norm": 0.9756024995658793, + "learning_rate": 1.369128611273987e-05, + "loss": 0.4172, + "step": 23049 + }, + { + "epoch": 0.39829278407520047, + "grad_norm": 0.8478384395399158, + "learning_rate": 1.369076598081466e-05, + "loss": 0.4261, + "step": 23050 + }, + { + "epoch": 0.3983100635886093, + "grad_norm": 0.9635769025212142, + "learning_rate": 1.3690245837329691e-05, + "loss": 0.4566, + "step": 23051 + }, + { + "epoch": 0.3983273431020182, + "grad_norm": 1.0821204614705, + "learning_rate": 1.368972568228659e-05, + "loss": 0.4449, + "step": 23052 + }, + { + "epoch": 0.39834462261542714, + "grad_norm": 1.2044773279250185, + "learning_rate": 1.368920551568699e-05, + "loss": 0.4644, + "step": 23053 + }, + { + "epoch": 0.39836190212883604, + "grad_norm": 0.7893108044877665, + "learning_rate": 1.3688685337532516e-05, + "loss": 0.5047, + "step": 23054 + }, + { + "epoch": 0.39837918164224495, + "grad_norm": 1.309043897586438, + "learning_rate": 1.3688165147824801e-05, + "loss": 0.4633, + "step": 23055 + }, + { + "epoch": 0.39839646115565386, + "grad_norm": 0.8905128017290148, + "learning_rate": 1.3687644946565471e-05, + "loss": 0.5742, + "step": 23056 + }, + { + "epoch": 0.39841374066906277, + "grad_norm": 1.0410005452210274, + "learning_rate": 1.3687124733756159e-05, + "loss": 0.3648, + "step": 23057 + }, + { + "epoch": 0.3984310201824717, + "grad_norm": 0.7478919178663969, + "learning_rate": 1.3686604509398489e-05, + "loss": 0.2068, + "step": 23058 + }, + { + "epoch": 0.3984482996958806, + "grad_norm": 0.8876640818346782, + "learning_rate": 1.3686084273494093e-05, + "loss": 0.2311, + "step": 23059 + }, + { + "epoch": 0.3984655792092895, + "grad_norm": 1.4501702576485191, + "learning_rate": 1.3685564026044606e-05, + "loss": 0.5996, + "step": 23060 + }, + { + "epoch": 0.3984828587226984, + "grad_norm": 0.6699391628635111, + "learning_rate": 1.3685043767051649e-05, + "loss": 0.3488, + "step": 23061 + }, + { + "epoch": 0.39850013823610725, + "grad_norm": 0.8326405052974121, + "learning_rate": 1.3684523496516855e-05, + "loss": 0.5406, + "step": 23062 + }, + { + "epoch": 0.39851741774951616, + "grad_norm": 0.9499937027692421, + "learning_rate": 1.3684003214441854e-05, + "loss": 0.4115, + "step": 23063 + }, + { + "epoch": 0.39853469726292506, + "grad_norm": 0.6946528950782416, + "learning_rate": 1.3683482920828273e-05, + "loss": 0.2788, + "step": 23064 + }, + { + "epoch": 0.39855197677633397, + "grad_norm": 1.299494344836258, + "learning_rate": 1.3682962615677746e-05, + "loss": 0.5068, + "step": 23065 + }, + { + "epoch": 0.3985692562897429, + "grad_norm": 1.7515916874080226, + "learning_rate": 1.36824422989919e-05, + "loss": 0.5499, + "step": 23066 + }, + { + "epoch": 0.3985865358031518, + "grad_norm": 0.9747157807824147, + "learning_rate": 1.3681921970772363e-05, + "loss": 0.3276, + "step": 23067 + }, + { + "epoch": 0.3986038153165607, + "grad_norm": 0.8896059060693108, + "learning_rate": 1.3681401631020767e-05, + "loss": 0.4038, + "step": 23068 + }, + { + "epoch": 0.3986210948299696, + "grad_norm": 0.8465598004797049, + "learning_rate": 1.3680881279738742e-05, + "loss": 0.4999, + "step": 23069 + }, + { + "epoch": 0.3986383743433785, + "grad_norm": 0.9604546558997774, + "learning_rate": 1.3680360916927917e-05, + "loss": 0.4613, + "step": 23070 + }, + { + "epoch": 0.3986556538567874, + "grad_norm": 1.3234878662915521, + "learning_rate": 1.3679840542589924e-05, + "loss": 0.5017, + "step": 23071 + }, + { + "epoch": 0.39867293337019627, + "grad_norm": 1.563869117911404, + "learning_rate": 1.3679320156726387e-05, + "loss": 0.4789, + "step": 23072 + }, + { + "epoch": 0.3986902128836052, + "grad_norm": 0.42658578922067486, + "learning_rate": 1.3678799759338941e-05, + "loss": 0.6609, + "step": 23073 + }, + { + "epoch": 0.3987074923970141, + "grad_norm": 0.6624711069370223, + "learning_rate": 1.3678279350429216e-05, + "loss": 0.6756, + "step": 23074 + }, + { + "epoch": 0.398724771910423, + "grad_norm": 0.9559158448038106, + "learning_rate": 1.3677758929998838e-05, + "loss": 0.6692, + "step": 23075 + }, + { + "epoch": 0.3987420514238319, + "grad_norm": 0.7699371986624929, + "learning_rate": 1.3677238498049441e-05, + "loss": 0.333, + "step": 23076 + }, + { + "epoch": 0.3987593309372408, + "grad_norm": 0.5715081830962275, + "learning_rate": 1.3676718054582656e-05, + "loss": 0.4556, + "step": 23077 + }, + { + "epoch": 0.3987766104506497, + "grad_norm": 1.0954645885630354, + "learning_rate": 1.3676197599600105e-05, + "loss": 0.5761, + "step": 23078 + }, + { + "epoch": 0.3987938899640586, + "grad_norm": 1.1370548211860168, + "learning_rate": 1.3675677133103429e-05, + "loss": 0.6473, + "step": 23079 + }, + { + "epoch": 0.39881116947746753, + "grad_norm": 1.4521624658076533, + "learning_rate": 1.3675156655094254e-05, + "loss": 0.4645, + "step": 23080 + }, + { + "epoch": 0.39882844899087644, + "grad_norm": 1.2918843384247212, + "learning_rate": 1.3674636165574202e-05, + "loss": 0.3974, + "step": 23081 + }, + { + "epoch": 0.39884572850428535, + "grad_norm": 0.4662747723015253, + "learning_rate": 1.3674115664544916e-05, + "loss": 0.2344, + "step": 23082 + }, + { + "epoch": 0.3988630080176942, + "grad_norm": 1.1282093721164568, + "learning_rate": 1.367359515200802e-05, + "loss": 0.5682, + "step": 23083 + }, + { + "epoch": 0.3988802875311031, + "grad_norm": 1.1147144782597755, + "learning_rate": 1.3673074627965142e-05, + "loss": 0.4404, + "step": 23084 + }, + { + "epoch": 0.398897567044512, + "grad_norm": 0.9182297261125962, + "learning_rate": 1.3672554092417916e-05, + "loss": 0.4699, + "step": 23085 + }, + { + "epoch": 0.3989148465579209, + "grad_norm": 1.4340648326747882, + "learning_rate": 1.3672033545367973e-05, + "loss": 0.3964, + "step": 23086 + }, + { + "epoch": 0.39893212607132983, + "grad_norm": 0.9341472373071483, + "learning_rate": 1.367151298681694e-05, + "loss": 0.3626, + "step": 23087 + }, + { + "epoch": 0.39894940558473874, + "grad_norm": 0.8518886655921871, + "learning_rate": 1.3670992416766451e-05, + "loss": 0.3976, + "step": 23088 + }, + { + "epoch": 0.39896668509814764, + "grad_norm": 1.2088300309877356, + "learning_rate": 1.3670471835218133e-05, + "loss": 0.5031, + "step": 23089 + }, + { + "epoch": 0.39898396461155655, + "grad_norm": 1.2733546240987805, + "learning_rate": 1.3669951242173618e-05, + "loss": 0.2661, + "step": 23090 + }, + { + "epoch": 0.39900124412496546, + "grad_norm": 1.3696525694957584, + "learning_rate": 1.3669430637634541e-05, + "loss": 0.533, + "step": 23091 + }, + { + "epoch": 0.39901852363837437, + "grad_norm": 1.0106491673944338, + "learning_rate": 1.3668910021602525e-05, + "loss": 0.2746, + "step": 23092 + }, + { + "epoch": 0.3990358031517832, + "grad_norm": 0.8221971185420885, + "learning_rate": 1.3668389394079205e-05, + "loss": 0.4024, + "step": 23093 + }, + { + "epoch": 0.3990530826651921, + "grad_norm": 0.7852709774848924, + "learning_rate": 1.3667868755066209e-05, + "loss": 0.5217, + "step": 23094 + }, + { + "epoch": 0.39907036217860103, + "grad_norm": 1.2905837278100447, + "learning_rate": 1.366734810456517e-05, + "loss": 0.3971, + "step": 23095 + }, + { + "epoch": 0.39908764169200994, + "grad_norm": 1.0659381719796035, + "learning_rate": 1.3666827442577718e-05, + "loss": 0.4343, + "step": 23096 + }, + { + "epoch": 0.39910492120541885, + "grad_norm": 1.3312673034542035, + "learning_rate": 1.3666306769105484e-05, + "loss": 0.6298, + "step": 23097 + }, + { + "epoch": 0.39912220071882776, + "grad_norm": 1.1876990251126067, + "learning_rate": 1.3665786084150098e-05, + "loss": 0.5321, + "step": 23098 + }, + { + "epoch": 0.39913948023223667, + "grad_norm": 0.8912710309435751, + "learning_rate": 1.3665265387713194e-05, + "loss": 0.5336, + "step": 23099 + }, + { + "epoch": 0.3991567597456456, + "grad_norm": 1.0991018647660629, + "learning_rate": 1.3664744679796397e-05, + "loss": 0.4385, + "step": 23100 + }, + { + "epoch": 0.3991740392590545, + "grad_norm": 1.2197246871929972, + "learning_rate": 1.3664223960401345e-05, + "loss": 0.4446, + "step": 23101 + }, + { + "epoch": 0.3991913187724634, + "grad_norm": 0.8953936967554759, + "learning_rate": 1.3663703229529663e-05, + "loss": 0.3272, + "step": 23102 + }, + { + "epoch": 0.3992085982858723, + "grad_norm": 0.8320872260597018, + "learning_rate": 1.3663182487182984e-05, + "loss": 0.4354, + "step": 23103 + }, + { + "epoch": 0.39922587779928115, + "grad_norm": 1.2251319063283932, + "learning_rate": 1.3662661733362939e-05, + "loss": 0.4271, + "step": 23104 + }, + { + "epoch": 0.39924315731269006, + "grad_norm": 1.1096800051986797, + "learning_rate": 1.3662140968071159e-05, + "loss": 0.4454, + "step": 23105 + }, + { + "epoch": 0.39926043682609896, + "grad_norm": 0.5467635368294549, + "learning_rate": 1.3661620191309275e-05, + "loss": 0.4261, + "step": 23106 + }, + { + "epoch": 0.39927771633950787, + "grad_norm": 0.7737068965042185, + "learning_rate": 1.3661099403078916e-05, + "loss": 0.2416, + "step": 23107 + }, + { + "epoch": 0.3992949958529168, + "grad_norm": 1.1680947562937343, + "learning_rate": 1.3660578603381718e-05, + "loss": 0.4232, + "step": 23108 + }, + { + "epoch": 0.3993122753663257, + "grad_norm": 0.6810001650754757, + "learning_rate": 1.366005779221931e-05, + "loss": 0.3894, + "step": 23109 + }, + { + "epoch": 0.3993295548797346, + "grad_norm": 1.144565520088592, + "learning_rate": 1.3659536969593321e-05, + "loss": 0.6165, + "step": 23110 + }, + { + "epoch": 0.3993468343931435, + "grad_norm": 1.137958937053036, + "learning_rate": 1.3659016135505388e-05, + "loss": 0.4792, + "step": 23111 + }, + { + "epoch": 0.3993641139065524, + "grad_norm": 1.1846442799414967, + "learning_rate": 1.3658495289957134e-05, + "loss": 0.3568, + "step": 23112 + }, + { + "epoch": 0.3993813934199613, + "grad_norm": 1.0365724123171036, + "learning_rate": 1.3657974432950196e-05, + "loss": 0.6848, + "step": 23113 + }, + { + "epoch": 0.39939867293337017, + "grad_norm": 0.9541297062251896, + "learning_rate": 1.3657453564486204e-05, + "loss": 0.3941, + "step": 23114 + }, + { + "epoch": 0.3994159524467791, + "grad_norm": 0.7620074169979675, + "learning_rate": 1.365693268456679e-05, + "loss": 0.2274, + "step": 23115 + }, + { + "epoch": 0.399433231960188, + "grad_norm": 1.32838133444419, + "learning_rate": 1.3656411793193587e-05, + "loss": 0.5743, + "step": 23116 + }, + { + "epoch": 0.3994505114735969, + "grad_norm": 0.6071955594612595, + "learning_rate": 1.3655890890368222e-05, + "loss": 0.3852, + "step": 23117 + }, + { + "epoch": 0.3994677909870058, + "grad_norm": 1.0046345181993004, + "learning_rate": 1.3655369976092327e-05, + "loss": 0.4744, + "step": 23118 + }, + { + "epoch": 0.3994850705004147, + "grad_norm": 1.1185056119610575, + "learning_rate": 1.3654849050367538e-05, + "loss": 0.3203, + "step": 23119 + }, + { + "epoch": 0.3995023500138236, + "grad_norm": 1.1584400614729056, + "learning_rate": 1.3654328113195485e-05, + "loss": 0.4058, + "step": 23120 + }, + { + "epoch": 0.3995196295272325, + "grad_norm": 0.7718352095158302, + "learning_rate": 1.3653807164577794e-05, + "loss": 0.3232, + "step": 23121 + }, + { + "epoch": 0.39953690904064143, + "grad_norm": 0.9896480461278937, + "learning_rate": 1.365328620451611e-05, + "loss": 0.359, + "step": 23122 + }, + { + "epoch": 0.39955418855405034, + "grad_norm": 1.028248053193211, + "learning_rate": 1.3652765233012049e-05, + "loss": 0.4207, + "step": 23123 + }, + { + "epoch": 0.39957146806745925, + "grad_norm": 0.7814365299904533, + "learning_rate": 1.365224425006725e-05, + "loss": 0.5588, + "step": 23124 + }, + { + "epoch": 0.3995887475808681, + "grad_norm": 1.2429134246014295, + "learning_rate": 1.3651723255683347e-05, + "loss": 0.4674, + "step": 23125 + }, + { + "epoch": 0.399606027094277, + "grad_norm": 1.2131391396569888, + "learning_rate": 1.3651202249861968e-05, + "loss": 0.5438, + "step": 23126 + }, + { + "epoch": 0.3996233066076859, + "grad_norm": 0.6690845927159205, + "learning_rate": 1.3650681232604744e-05, + "loss": 0.3006, + "step": 23127 + }, + { + "epoch": 0.3996405861210948, + "grad_norm": 0.937181168678115, + "learning_rate": 1.3650160203913312e-05, + "loss": 0.3805, + "step": 23128 + }, + { + "epoch": 0.39965786563450373, + "grad_norm": 0.9300768138703999, + "learning_rate": 1.3649639163789301e-05, + "loss": 0.5524, + "step": 23129 + }, + { + "epoch": 0.39967514514791264, + "grad_norm": 1.0608570604303353, + "learning_rate": 1.364911811223434e-05, + "loss": 0.4044, + "step": 23130 + }, + { + "epoch": 0.39969242466132154, + "grad_norm": 0.861337107162331, + "learning_rate": 1.3648597049250065e-05, + "loss": 0.5162, + "step": 23131 + }, + { + "epoch": 0.39970970417473045, + "grad_norm": 1.7175658591768956, + "learning_rate": 1.3648075974838109e-05, + "loss": 0.3865, + "step": 23132 + }, + { + "epoch": 0.39972698368813936, + "grad_norm": 1.1269590431106709, + "learning_rate": 1.3647554889000098e-05, + "loss": 0.5393, + "step": 23133 + }, + { + "epoch": 0.39974426320154827, + "grad_norm": 1.1955974165455971, + "learning_rate": 1.3647033791737671e-05, + "loss": 0.3118, + "step": 23134 + }, + { + "epoch": 0.3997615427149572, + "grad_norm": 0.8259588806892904, + "learning_rate": 1.3646512683052456e-05, + "loss": 0.3625, + "step": 23135 + }, + { + "epoch": 0.399778822228366, + "grad_norm": 1.2242873231275868, + "learning_rate": 1.3645991562946086e-05, + "loss": 0.3915, + "step": 23136 + }, + { + "epoch": 0.39979610174177493, + "grad_norm": 0.7996868604466316, + "learning_rate": 1.3645470431420194e-05, + "loss": 0.4173, + "step": 23137 + }, + { + "epoch": 0.39981338125518384, + "grad_norm": 1.3592409328168686, + "learning_rate": 1.3644949288476407e-05, + "loss": 0.5267, + "step": 23138 + }, + { + "epoch": 0.39983066076859275, + "grad_norm": 0.9002204318843772, + "learning_rate": 1.3644428134116367e-05, + "loss": 0.7548, + "step": 23139 + }, + { + "epoch": 0.39984794028200166, + "grad_norm": 0.9912301133855377, + "learning_rate": 1.36439069683417e-05, + "loss": 0.3788, + "step": 23140 + }, + { + "epoch": 0.39986521979541056, + "grad_norm": 1.1691010267108624, + "learning_rate": 1.3643385791154038e-05, + "loss": 0.3729, + "step": 23141 + }, + { + "epoch": 0.3998824993088195, + "grad_norm": 1.4891196634855872, + "learning_rate": 1.364286460255502e-05, + "loss": 0.4697, + "step": 23142 + }, + { + "epoch": 0.3998997788222284, + "grad_norm": 0.9700187328504114, + "learning_rate": 1.364234340254627e-05, + "loss": 0.4846, + "step": 23143 + }, + { + "epoch": 0.3999170583356373, + "grad_norm": 1.2761684745307436, + "learning_rate": 1.3641822191129419e-05, + "loss": 0.6514, + "step": 23144 + }, + { + "epoch": 0.3999343378490462, + "grad_norm": 2.1466830272511612, + "learning_rate": 1.3641300968306111e-05, + "loss": 0.5804, + "step": 23145 + }, + { + "epoch": 0.39995161736245505, + "grad_norm": 0.8856088556391721, + "learning_rate": 1.3640779734077967e-05, + "loss": 0.3502, + "step": 23146 + }, + { + "epoch": 0.39996889687586396, + "grad_norm": 1.6053208949509838, + "learning_rate": 1.3640258488446625e-05, + "loss": 0.3782, + "step": 23147 + }, + { + "epoch": 0.39998617638927286, + "grad_norm": 0.8955953995218793, + "learning_rate": 1.3639737231413718e-05, + "loss": 0.4376, + "step": 23148 + }, + { + "epoch": 0.40000345590268177, + "grad_norm": 2.0531958713617935, + "learning_rate": 1.363921596298088e-05, + "loss": 0.6055, + "step": 23149 + }, + { + "epoch": 0.4000207354160907, + "grad_norm": 0.75139409869021, + "learning_rate": 1.3638694683149733e-05, + "loss": 0.4255, + "step": 23150 + }, + { + "epoch": 0.4000380149294996, + "grad_norm": 1.0217569908409652, + "learning_rate": 1.3638173391921923e-05, + "loss": 0.5438, + "step": 23151 + }, + { + "epoch": 0.4000552944429085, + "grad_norm": 0.895127986613088, + "learning_rate": 1.3637652089299079e-05, + "loss": 0.4916, + "step": 23152 + }, + { + "epoch": 0.4000725739563174, + "grad_norm": 1.1027043147806546, + "learning_rate": 1.3637130775282832e-05, + "loss": 0.5675, + "step": 23153 + }, + { + "epoch": 0.4000898534697263, + "grad_norm": 0.6906080206729737, + "learning_rate": 1.3636609449874811e-05, + "loss": 0.3705, + "step": 23154 + }, + { + "epoch": 0.4001071329831352, + "grad_norm": 1.3104395153843003, + "learning_rate": 1.3636088113076656e-05, + "loss": 0.5153, + "step": 23155 + }, + { + "epoch": 0.4001244124965441, + "grad_norm": 3.16534993662283, + "learning_rate": 1.3635566764889997e-05, + "loss": 0.4568, + "step": 23156 + }, + { + "epoch": 0.400141692009953, + "grad_norm": 8.084327307865335, + "learning_rate": 1.3635045405316465e-05, + "loss": 0.5446, + "step": 23157 + }, + { + "epoch": 0.4001589715233619, + "grad_norm": 0.7818014639273408, + "learning_rate": 1.3634524034357693e-05, + "loss": 0.3001, + "step": 23158 + }, + { + "epoch": 0.4001762510367708, + "grad_norm": 0.785220482677319, + "learning_rate": 1.363400265201532e-05, + "loss": 0.3416, + "step": 23159 + }, + { + "epoch": 0.4001935305501797, + "grad_norm": 0.703480022817665, + "learning_rate": 1.363348125829097e-05, + "loss": 0.5152, + "step": 23160 + }, + { + "epoch": 0.4002108100635886, + "grad_norm": 0.8167545624153837, + "learning_rate": 1.363295985318628e-05, + "loss": 0.2371, + "step": 23161 + }, + { + "epoch": 0.4002280895769975, + "grad_norm": 1.159016836121196, + "learning_rate": 1.3632438436702888e-05, + "loss": 0.6304, + "step": 23162 + }, + { + "epoch": 0.4002453690904064, + "grad_norm": 1.2420589511012583, + "learning_rate": 1.3631917008842423e-05, + "loss": 0.5942, + "step": 23163 + }, + { + "epoch": 0.40026264860381533, + "grad_norm": 1.041918212173756, + "learning_rate": 1.3631395569606515e-05, + "loss": 0.6231, + "step": 23164 + }, + { + "epoch": 0.40027992811722424, + "grad_norm": 0.9551768217400837, + "learning_rate": 1.3630874118996802e-05, + "loss": 0.5472, + "step": 23165 + }, + { + "epoch": 0.40029720763063315, + "grad_norm": 0.9404581296433268, + "learning_rate": 1.3630352657014915e-05, + "loss": 0.4731, + "step": 23166 + }, + { + "epoch": 0.400314487144042, + "grad_norm": 0.7820787048857945, + "learning_rate": 1.3629831183662485e-05, + "loss": 0.3314, + "step": 23167 + }, + { + "epoch": 0.4003317666574509, + "grad_norm": 0.5276967623657863, + "learning_rate": 1.362930969894115e-05, + "loss": 0.6816, + "step": 23168 + }, + { + "epoch": 0.4003490461708598, + "grad_norm": 0.6718497706328219, + "learning_rate": 1.3628788202852544e-05, + "loss": 0.4123, + "step": 23169 + }, + { + "epoch": 0.4003663256842687, + "grad_norm": 0.9235790126508835, + "learning_rate": 1.3628266695398291e-05, + "loss": 0.6859, + "step": 23170 + }, + { + "epoch": 0.40038360519767763, + "grad_norm": 0.6276899751679689, + "learning_rate": 1.3627745176580037e-05, + "loss": 0.3516, + "step": 23171 + }, + { + "epoch": 0.40040088471108654, + "grad_norm": 0.935423172189571, + "learning_rate": 1.3627223646399406e-05, + "loss": 0.513, + "step": 23172 + }, + { + "epoch": 0.40041816422449544, + "grad_norm": 1.372407167614025, + "learning_rate": 1.3626702104858035e-05, + "loss": 0.6839, + "step": 23173 + }, + { + "epoch": 0.40043544373790435, + "grad_norm": 0.9272113448043852, + "learning_rate": 1.362618055195756e-05, + "loss": 0.5586, + "step": 23174 + }, + { + "epoch": 0.40045272325131326, + "grad_norm": 1.6492075722033999, + "learning_rate": 1.3625658987699608e-05, + "loss": 0.4645, + "step": 23175 + }, + { + "epoch": 0.40047000276472217, + "grad_norm": 7.676382785770378, + "learning_rate": 1.362513741208582e-05, + "loss": 0.5773, + "step": 23176 + }, + { + "epoch": 0.4004872822781311, + "grad_norm": 1.0322843970344429, + "learning_rate": 1.3624615825117826e-05, + "loss": 0.3193, + "step": 23177 + }, + { + "epoch": 0.4005045617915399, + "grad_norm": 1.575307980185879, + "learning_rate": 1.3624094226797255e-05, + "loss": 0.3559, + "step": 23178 + }, + { + "epoch": 0.40052184130494883, + "grad_norm": 1.9341384278520939, + "learning_rate": 1.3623572617125752e-05, + "loss": 0.3882, + "step": 23179 + }, + { + "epoch": 0.40053912081835774, + "grad_norm": 1.5202017141556192, + "learning_rate": 1.362305099610494e-05, + "loss": 0.4535, + "step": 23180 + }, + { + "epoch": 0.40055640033176665, + "grad_norm": 1.3520861605906827, + "learning_rate": 1.3622529363736456e-05, + "loss": 0.4746, + "step": 23181 + }, + { + "epoch": 0.40057367984517556, + "grad_norm": 1.2192568501666878, + "learning_rate": 1.362200772002194e-05, + "loss": 0.6914, + "step": 23182 + }, + { + "epoch": 0.40059095935858446, + "grad_norm": 0.6551055491796711, + "learning_rate": 1.3621486064963019e-05, + "loss": 0.4314, + "step": 23183 + }, + { + "epoch": 0.40060823887199337, + "grad_norm": 0.8194155915354127, + "learning_rate": 1.3620964398561325e-05, + "loss": 0.3156, + "step": 23184 + }, + { + "epoch": 0.4006255183854023, + "grad_norm": 0.9339312936868476, + "learning_rate": 1.3620442720818498e-05, + "loss": 0.3992, + "step": 23185 + }, + { + "epoch": 0.4006427978988112, + "grad_norm": 1.0585159595845497, + "learning_rate": 1.361992103173617e-05, + "loss": 0.4457, + "step": 23186 + }, + { + "epoch": 0.4006600774122201, + "grad_norm": 1.3681893586072051, + "learning_rate": 1.361939933131597e-05, + "loss": 0.3685, + "step": 23187 + }, + { + "epoch": 0.40067735692562895, + "grad_norm": 0.4394316716122816, + "learning_rate": 1.3618877619559539e-05, + "loss": 0.8402, + "step": 23188 + }, + { + "epoch": 0.40069463643903785, + "grad_norm": 1.0611518566736697, + "learning_rate": 1.361835589646851e-05, + "loss": 0.6223, + "step": 23189 + }, + { + "epoch": 0.40071191595244676, + "grad_norm": 1.293145119492624, + "learning_rate": 1.3617834162044513e-05, + "loss": 0.3394, + "step": 23190 + }, + { + "epoch": 0.40072919546585567, + "grad_norm": 0.8667015110263455, + "learning_rate": 1.3617312416289184e-05, + "loss": 0.4395, + "step": 23191 + }, + { + "epoch": 0.4007464749792646, + "grad_norm": 0.7177711129638294, + "learning_rate": 1.361679065920416e-05, + "loss": 0.5429, + "step": 23192 + }, + { + "epoch": 0.4007637544926735, + "grad_norm": 1.3953438193915204, + "learning_rate": 1.3616268890791069e-05, + "loss": 0.4677, + "step": 23193 + }, + { + "epoch": 0.4007810340060824, + "grad_norm": 0.8962406316040606, + "learning_rate": 1.3615747111051553e-05, + "loss": 0.292, + "step": 23194 + }, + { + "epoch": 0.4007983135194913, + "grad_norm": 1.22579757687426, + "learning_rate": 1.361522531998724e-05, + "loss": 0.5179, + "step": 23195 + }, + { + "epoch": 0.4008155930329002, + "grad_norm": 1.01021466332081, + "learning_rate": 1.3614703517599767e-05, + "loss": 0.4141, + "step": 23196 + }, + { + "epoch": 0.4008328725463091, + "grad_norm": 0.9478548722082354, + "learning_rate": 1.361418170389077e-05, + "loss": 0.5397, + "step": 23197 + }, + { + "epoch": 0.400850152059718, + "grad_norm": 0.9397607802306759, + "learning_rate": 1.3613659878861878e-05, + "loss": 0.5745, + "step": 23198 + }, + { + "epoch": 0.4008674315731269, + "grad_norm": 0.7978190019975535, + "learning_rate": 1.361313804251473e-05, + "loss": 0.4582, + "step": 23199 + }, + { + "epoch": 0.4008847110865358, + "grad_norm": 0.9251210885888301, + "learning_rate": 1.3612616194850959e-05, + "loss": 0.5984, + "step": 23200 + }, + { + "epoch": 0.4009019905999447, + "grad_norm": 0.5594511695441405, + "learning_rate": 1.3612094335872197e-05, + "loss": 0.3549, + "step": 23201 + }, + { + "epoch": 0.4009192701133536, + "grad_norm": 0.9549525813502531, + "learning_rate": 1.3611572465580084e-05, + "loss": 0.4682, + "step": 23202 + }, + { + "epoch": 0.4009365496267625, + "grad_norm": 1.2993363440918486, + "learning_rate": 1.3611050583976253e-05, + "loss": 0.6408, + "step": 23203 + }, + { + "epoch": 0.4009538291401714, + "grad_norm": 0.5847485880409881, + "learning_rate": 1.3610528691062336e-05, + "loss": 0.3375, + "step": 23204 + }, + { + "epoch": 0.4009711086535803, + "grad_norm": 1.319095254734914, + "learning_rate": 1.3610006786839969e-05, + "loss": 0.4402, + "step": 23205 + }, + { + "epoch": 0.40098838816698923, + "grad_norm": 1.1777892446221883, + "learning_rate": 1.3609484871310785e-05, + "loss": 0.3917, + "step": 23206 + }, + { + "epoch": 0.40100566768039814, + "grad_norm": 0.9036508956451981, + "learning_rate": 1.3608962944476418e-05, + "loss": 0.6192, + "step": 23207 + }, + { + "epoch": 0.40102294719380704, + "grad_norm": 1.5425950926917356, + "learning_rate": 1.3608441006338509e-05, + "loss": 0.5702, + "step": 23208 + }, + { + "epoch": 0.40104022670721595, + "grad_norm": 0.5713461943256839, + "learning_rate": 1.3607919056898689e-05, + "loss": 0.419, + "step": 23209 + }, + { + "epoch": 0.4010575062206248, + "grad_norm": 0.6706799930312396, + "learning_rate": 1.3607397096158588e-05, + "loss": 0.4011, + "step": 23210 + }, + { + "epoch": 0.4010747857340337, + "grad_norm": 0.7530779867461198, + "learning_rate": 1.3606875124119848e-05, + "loss": 0.5067, + "step": 23211 + }, + { + "epoch": 0.4010920652474426, + "grad_norm": 1.2798887162363513, + "learning_rate": 1.3606353140784098e-05, + "loss": 0.5816, + "step": 23212 + }, + { + "epoch": 0.4011093447608515, + "grad_norm": 0.8441776461605321, + "learning_rate": 1.360583114615298e-05, + "loss": 0.4946, + "step": 23213 + }, + { + "epoch": 0.40112662427426043, + "grad_norm": 1.2764956125181568, + "learning_rate": 1.3605309140228125e-05, + "loss": 0.4801, + "step": 23214 + }, + { + "epoch": 0.40114390378766934, + "grad_norm": 0.7822886731504994, + "learning_rate": 1.3604787123011163e-05, + "loss": 0.6434, + "step": 23215 + }, + { + "epoch": 0.40116118330107825, + "grad_norm": 1.0854632020611055, + "learning_rate": 1.3604265094503737e-05, + "loss": 0.3913, + "step": 23216 + }, + { + "epoch": 0.40117846281448716, + "grad_norm": 1.1746962670351715, + "learning_rate": 1.3603743054707479e-05, + "loss": 0.3477, + "step": 23217 + }, + { + "epoch": 0.40119574232789607, + "grad_norm": 1.1600612385122928, + "learning_rate": 1.3603221003624022e-05, + "loss": 0.312, + "step": 23218 + }, + { + "epoch": 0.401213021841305, + "grad_norm": 0.9451068122568685, + "learning_rate": 1.3602698941255004e-05, + "loss": 0.3578, + "step": 23219 + }, + { + "epoch": 0.4012303013547138, + "grad_norm": 0.8857936859162865, + "learning_rate": 1.360217686760206e-05, + "loss": 0.5251, + "step": 23220 + }, + { + "epoch": 0.40124758086812273, + "grad_norm": 0.6699874848647847, + "learning_rate": 1.3601654782666822e-05, + "loss": 0.3627, + "step": 23221 + }, + { + "epoch": 0.40126486038153164, + "grad_norm": 1.1074477328835888, + "learning_rate": 1.360113268645093e-05, + "loss": 0.4812, + "step": 23222 + }, + { + "epoch": 0.40128213989494055, + "grad_norm": 1.102943285300033, + "learning_rate": 1.3600610578956017e-05, + "loss": 0.5103, + "step": 23223 + }, + { + "epoch": 0.40129941940834946, + "grad_norm": 1.2671472630845186, + "learning_rate": 1.3600088460183716e-05, + "loss": 0.4731, + "step": 23224 + }, + { + "epoch": 0.40131669892175836, + "grad_norm": 0.5330283896722097, + "learning_rate": 1.3599566330135667e-05, + "loss": 0.7885, + "step": 23225 + }, + { + "epoch": 0.40133397843516727, + "grad_norm": 0.7492271447235015, + "learning_rate": 1.3599044188813502e-05, + "loss": 0.4206, + "step": 23226 + }, + { + "epoch": 0.4013512579485762, + "grad_norm": 0.8930997151213251, + "learning_rate": 1.3598522036218853e-05, + "loss": 0.4911, + "step": 23227 + }, + { + "epoch": 0.4013685374619851, + "grad_norm": 0.9808429929225913, + "learning_rate": 1.3597999872353364e-05, + "loss": 0.5806, + "step": 23228 + }, + { + "epoch": 0.401385816975394, + "grad_norm": 1.0789758283981694, + "learning_rate": 1.3597477697218664e-05, + "loss": 0.39, + "step": 23229 + }, + { + "epoch": 0.4014030964888029, + "grad_norm": 1.0114300586252747, + "learning_rate": 1.359695551081639e-05, + "loss": 0.4663, + "step": 23230 + }, + { + "epoch": 0.40142037600221175, + "grad_norm": 0.8203773994092507, + "learning_rate": 1.359643331314818e-05, + "loss": 0.503, + "step": 23231 + }, + { + "epoch": 0.40143765551562066, + "grad_norm": 1.084395383022207, + "learning_rate": 1.3595911104215666e-05, + "loss": 0.3425, + "step": 23232 + }, + { + "epoch": 0.40145493502902957, + "grad_norm": 1.966916177040466, + "learning_rate": 1.3595388884020483e-05, + "loss": 0.4996, + "step": 23233 + }, + { + "epoch": 0.4014722145424385, + "grad_norm": 0.8952753433055076, + "learning_rate": 1.3594866652564274e-05, + "loss": 0.5574, + "step": 23234 + }, + { + "epoch": 0.4014894940558474, + "grad_norm": 0.8075539451755741, + "learning_rate": 1.3594344409848667e-05, + "loss": 0.4669, + "step": 23235 + }, + { + "epoch": 0.4015067735692563, + "grad_norm": 1.1026769090525275, + "learning_rate": 1.35938221558753e-05, + "loss": 0.4263, + "step": 23236 + }, + { + "epoch": 0.4015240530826652, + "grad_norm": 0.8236792009832751, + "learning_rate": 1.359329989064581e-05, + "loss": 0.3798, + "step": 23237 + }, + { + "epoch": 0.4015413325960741, + "grad_norm": 1.2166842219245722, + "learning_rate": 1.359277761416183e-05, + "loss": 0.5158, + "step": 23238 + }, + { + "epoch": 0.401558612109483, + "grad_norm": 1.3059388287318863, + "learning_rate": 1.3592255326424997e-05, + "loss": 0.4683, + "step": 23239 + }, + { + "epoch": 0.4015758916228919, + "grad_norm": 1.322110221833323, + "learning_rate": 1.3591733027436948e-05, + "loss": 0.4444, + "step": 23240 + }, + { + "epoch": 0.4015931711363008, + "grad_norm": 1.0333441807401884, + "learning_rate": 1.3591210717199319e-05, + "loss": 0.3521, + "step": 23241 + }, + { + "epoch": 0.4016104506497097, + "grad_norm": 0.687580898265106, + "learning_rate": 1.3590688395713744e-05, + "loss": 0.7472, + "step": 23242 + }, + { + "epoch": 0.4016277301631186, + "grad_norm": 0.9019618966365469, + "learning_rate": 1.3590166062981863e-05, + "loss": 0.4125, + "step": 23243 + }, + { + "epoch": 0.4016450096765275, + "grad_norm": 0.8125838789007941, + "learning_rate": 1.3589643719005303e-05, + "loss": 0.5215, + "step": 23244 + }, + { + "epoch": 0.4016622891899364, + "grad_norm": 1.2801475323874907, + "learning_rate": 1.3589121363785713e-05, + "loss": 0.5422, + "step": 23245 + }, + { + "epoch": 0.4016795687033453, + "grad_norm": 1.208968392169248, + "learning_rate": 1.358859899732472e-05, + "loss": 0.4985, + "step": 23246 + }, + { + "epoch": 0.4016968482167542, + "grad_norm": 1.0631794511549941, + "learning_rate": 1.358807661962396e-05, + "loss": 0.5155, + "step": 23247 + }, + { + "epoch": 0.40171412773016313, + "grad_norm": 0.9016558083404208, + "learning_rate": 1.3587554230685072e-05, + "loss": 0.3502, + "step": 23248 + }, + { + "epoch": 0.40173140724357204, + "grad_norm": 0.5576721071558934, + "learning_rate": 1.3587031830509695e-05, + "loss": 0.8599, + "step": 23249 + }, + { + "epoch": 0.40174868675698094, + "grad_norm": 1.5444763822830256, + "learning_rate": 1.3586509419099457e-05, + "loss": 0.4217, + "step": 23250 + }, + { + "epoch": 0.40176596627038985, + "grad_norm": 0.7703811858902265, + "learning_rate": 1.3585986996456e-05, + "loss": 0.6682, + "step": 23251 + }, + { + "epoch": 0.4017832457837987, + "grad_norm": 1.4393427485548436, + "learning_rate": 1.358546456258096e-05, + "loss": 0.4335, + "step": 23252 + }, + { + "epoch": 0.4018005252972076, + "grad_norm": 1.2883187713031299, + "learning_rate": 1.358494211747597e-05, + "loss": 0.7071, + "step": 23253 + }, + { + "epoch": 0.4018178048106165, + "grad_norm": 0.9367211964420605, + "learning_rate": 1.3584419661142672e-05, + "loss": 0.5604, + "step": 23254 + }, + { + "epoch": 0.4018350843240254, + "grad_norm": 1.1331466943077544, + "learning_rate": 1.3583897193582698e-05, + "loss": 0.5092, + "step": 23255 + }, + { + "epoch": 0.40185236383743433, + "grad_norm": 1.1769584474244992, + "learning_rate": 1.3583374714797687e-05, + "loss": 0.4875, + "step": 23256 + }, + { + "epoch": 0.40186964335084324, + "grad_norm": 1.4768669912217214, + "learning_rate": 1.3582852224789273e-05, + "loss": 0.6602, + "step": 23257 + }, + { + "epoch": 0.40188692286425215, + "grad_norm": 1.1734296160607718, + "learning_rate": 1.3582329723559094e-05, + "loss": 0.4726, + "step": 23258 + }, + { + "epoch": 0.40190420237766106, + "grad_norm": 0.6241128741359812, + "learning_rate": 1.3581807211108786e-05, + "loss": 0.8465, + "step": 23259 + }, + { + "epoch": 0.40192148189106996, + "grad_norm": 0.900077470988725, + "learning_rate": 1.3581284687439986e-05, + "loss": 0.582, + "step": 23260 + }, + { + "epoch": 0.4019387614044789, + "grad_norm": 0.9199710665646046, + "learning_rate": 1.3580762152554328e-05, + "loss": 0.4161, + "step": 23261 + }, + { + "epoch": 0.4019560409178877, + "grad_norm": 1.7165334586681908, + "learning_rate": 1.3580239606453453e-05, + "loss": 0.4297, + "step": 23262 + }, + { + "epoch": 0.40197332043129663, + "grad_norm": 0.9078864342702346, + "learning_rate": 1.3579717049138995e-05, + "loss": 0.5774, + "step": 23263 + }, + { + "epoch": 0.40199059994470554, + "grad_norm": 1.2405358888298124, + "learning_rate": 1.357919448061259e-05, + "loss": 0.4053, + "step": 23264 + }, + { + "epoch": 0.40200787945811445, + "grad_norm": 0.8570358280453237, + "learning_rate": 1.3578671900875877e-05, + "loss": 0.3524, + "step": 23265 + }, + { + "epoch": 0.40202515897152336, + "grad_norm": 0.8947486983743009, + "learning_rate": 1.3578149309930492e-05, + "loss": 0.5057, + "step": 23266 + }, + { + "epoch": 0.40204243848493226, + "grad_norm": 0.4729996894251313, + "learning_rate": 1.357762670777807e-05, + "loss": 0.5859, + "step": 23267 + }, + { + "epoch": 0.40205971799834117, + "grad_norm": 0.8217403446955949, + "learning_rate": 1.3577104094420252e-05, + "loss": 0.6975, + "step": 23268 + }, + { + "epoch": 0.4020769975117501, + "grad_norm": 0.8243381275759074, + "learning_rate": 1.3576581469858669e-05, + "loss": 0.3409, + "step": 23269 + }, + { + "epoch": 0.402094277025159, + "grad_norm": 0.8680369195576677, + "learning_rate": 1.3576058834094963e-05, + "loss": 0.6188, + "step": 23270 + }, + { + "epoch": 0.4021115565385679, + "grad_norm": 0.7257107711996575, + "learning_rate": 1.3575536187130767e-05, + "loss": 0.5594, + "step": 23271 + }, + { + "epoch": 0.4021288360519768, + "grad_norm": 1.0775834016977877, + "learning_rate": 1.3575013528967722e-05, + "loss": 0.6093, + "step": 23272 + }, + { + "epoch": 0.40214611556538565, + "grad_norm": 1.3090657007948427, + "learning_rate": 1.3574490859607461e-05, + "loss": 0.5742, + "step": 23273 + }, + { + "epoch": 0.40216339507879456, + "grad_norm": 0.5860466228200166, + "learning_rate": 1.3573968179051625e-05, + "loss": 0.3751, + "step": 23274 + }, + { + "epoch": 0.40218067459220347, + "grad_norm": 1.5915826695182416, + "learning_rate": 1.3573445487301849e-05, + "loss": 0.593, + "step": 23275 + }, + { + "epoch": 0.4021979541056124, + "grad_norm": 0.746955275673356, + "learning_rate": 1.357292278435977e-05, + "loss": 0.4091, + "step": 23276 + }, + { + "epoch": 0.4022152336190213, + "grad_norm": 0.786701521683616, + "learning_rate": 1.3572400070227025e-05, + "loss": 0.4049, + "step": 23277 + }, + { + "epoch": 0.4022325131324302, + "grad_norm": 0.4085037545902152, + "learning_rate": 1.3571877344905248e-05, + "loss": 0.5396, + "step": 23278 + }, + { + "epoch": 0.4022497926458391, + "grad_norm": 0.45059082846266596, + "learning_rate": 1.3571354608396083e-05, + "loss": 0.8729, + "step": 23279 + }, + { + "epoch": 0.402267072159248, + "grad_norm": 1.1735685158024078, + "learning_rate": 1.3570831860701166e-05, + "loss": 0.4195, + "step": 23280 + }, + { + "epoch": 0.4022843516726569, + "grad_norm": 1.27716877391846, + "learning_rate": 1.3570309101822128e-05, + "loss": 0.4517, + "step": 23281 + }, + { + "epoch": 0.4023016311860658, + "grad_norm": 0.7940261963730946, + "learning_rate": 1.3569786331760613e-05, + "loss": 0.302, + "step": 23282 + }, + { + "epoch": 0.40231891069947473, + "grad_norm": 1.7313576656499527, + "learning_rate": 1.3569263550518257e-05, + "loss": 0.4633, + "step": 23283 + }, + { + "epoch": 0.4023361902128836, + "grad_norm": 1.9805259695674364, + "learning_rate": 1.3568740758096692e-05, + "loss": 0.4271, + "step": 23284 + }, + { + "epoch": 0.4023534697262925, + "grad_norm": 0.7209447407183242, + "learning_rate": 1.3568217954497564e-05, + "loss": 0.6105, + "step": 23285 + }, + { + "epoch": 0.4023707492397014, + "grad_norm": 1.2308467163644383, + "learning_rate": 1.3567695139722505e-05, + "loss": 0.3914, + "step": 23286 + }, + { + "epoch": 0.4023880287531103, + "grad_norm": 0.6560527260006072, + "learning_rate": 1.3567172313773153e-05, + "loss": 0.6145, + "step": 23287 + }, + { + "epoch": 0.4024053082665192, + "grad_norm": 1.1105263022173475, + "learning_rate": 1.3566649476651147e-05, + "loss": 0.5119, + "step": 23288 + }, + { + "epoch": 0.4024225877799281, + "grad_norm": 0.8673220804726308, + "learning_rate": 1.3566126628358126e-05, + "loss": 0.6187, + "step": 23289 + }, + { + "epoch": 0.40243986729333703, + "grad_norm": 1.4521312285014618, + "learning_rate": 1.356560376889572e-05, + "loss": 0.4363, + "step": 23290 + }, + { + "epoch": 0.40245714680674594, + "grad_norm": 1.6404089532202335, + "learning_rate": 1.3565080898265574e-05, + "loss": 0.3835, + "step": 23291 + }, + { + "epoch": 0.40247442632015484, + "grad_norm": 0.7400357817601345, + "learning_rate": 1.3564558016469326e-05, + "loss": 0.5427, + "step": 23292 + }, + { + "epoch": 0.40249170583356375, + "grad_norm": 0.7973644502463068, + "learning_rate": 1.3564035123508608e-05, + "loss": 0.4045, + "step": 23293 + }, + { + "epoch": 0.4025089853469726, + "grad_norm": 0.4673912563407784, + "learning_rate": 1.3563512219385065e-05, + "loss": 0.7797, + "step": 23294 + }, + { + "epoch": 0.4025262648603815, + "grad_norm": 0.9194778080367626, + "learning_rate": 1.3562989304100329e-05, + "loss": 0.3982, + "step": 23295 + }, + { + "epoch": 0.4025435443737904, + "grad_norm": 0.9267528042465358, + "learning_rate": 1.3562466377656038e-05, + "loss": 0.3425, + "step": 23296 + }, + { + "epoch": 0.4025608238871993, + "grad_norm": 0.970328623895038, + "learning_rate": 1.3561943440053835e-05, + "loss": 0.3008, + "step": 23297 + }, + { + "epoch": 0.40257810340060823, + "grad_norm": 1.8671765965979097, + "learning_rate": 1.3561420491295352e-05, + "loss": 0.5804, + "step": 23298 + }, + { + "epoch": 0.40259538291401714, + "grad_norm": 0.8129282428591843, + "learning_rate": 1.356089753138223e-05, + "loss": 0.3866, + "step": 23299 + }, + { + "epoch": 0.40261266242742605, + "grad_norm": 1.0520942386440324, + "learning_rate": 1.3560374560316107e-05, + "loss": 0.5397, + "step": 23300 + }, + { + "epoch": 0.40262994194083496, + "grad_norm": 0.775012100415728, + "learning_rate": 1.3559851578098617e-05, + "loss": 0.5694, + "step": 23301 + }, + { + "epoch": 0.40264722145424386, + "grad_norm": 0.9958522085546008, + "learning_rate": 1.3559328584731406e-05, + "loss": 0.4889, + "step": 23302 + }, + { + "epoch": 0.40266450096765277, + "grad_norm": 1.394219216743961, + "learning_rate": 1.3558805580216105e-05, + "loss": 0.4502, + "step": 23303 + }, + { + "epoch": 0.4026817804810617, + "grad_norm": 1.071697900048474, + "learning_rate": 1.3558282564554355e-05, + "loss": 0.532, + "step": 23304 + }, + { + "epoch": 0.40269905999447053, + "grad_norm": 1.064413725170005, + "learning_rate": 1.3557759537747793e-05, + "loss": 0.4535, + "step": 23305 + }, + { + "epoch": 0.40271633950787944, + "grad_norm": 1.1522444775092868, + "learning_rate": 1.3557236499798058e-05, + "loss": 0.5093, + "step": 23306 + }, + { + "epoch": 0.40273361902128835, + "grad_norm": 0.6002643720905356, + "learning_rate": 1.355671345070679e-05, + "loss": 0.7385, + "step": 23307 + }, + { + "epoch": 0.40275089853469725, + "grad_norm": 0.673497304475792, + "learning_rate": 1.3556190390475623e-05, + "loss": 0.25, + "step": 23308 + }, + { + "epoch": 0.40276817804810616, + "grad_norm": 0.8140134970408626, + "learning_rate": 1.3555667319106198e-05, + "loss": 0.9041, + "step": 23309 + }, + { + "epoch": 0.40278545756151507, + "grad_norm": 2.187635707991326, + "learning_rate": 1.355514423660015e-05, + "loss": 0.6295, + "step": 23310 + }, + { + "epoch": 0.402802737074924, + "grad_norm": 1.0137611280275007, + "learning_rate": 1.3554621142959124e-05, + "loss": 0.5061, + "step": 23311 + }, + { + "epoch": 0.4028200165883329, + "grad_norm": 0.5365775456312853, + "learning_rate": 1.3554098038184753e-05, + "loss": 0.7373, + "step": 23312 + }, + { + "epoch": 0.4028372961017418, + "grad_norm": 1.2774488992706183, + "learning_rate": 1.3553574922278676e-05, + "loss": 0.4456, + "step": 23313 + }, + { + "epoch": 0.4028545756151507, + "grad_norm": 0.4668936647883774, + "learning_rate": 1.3553051795242534e-05, + "loss": 0.8617, + "step": 23314 + }, + { + "epoch": 0.40287185512855955, + "grad_norm": 1.0840052528208508, + "learning_rate": 1.3552528657077963e-05, + "loss": 0.4737, + "step": 23315 + }, + { + "epoch": 0.40288913464196846, + "grad_norm": 1.5938340480833406, + "learning_rate": 1.3552005507786601e-05, + "loss": 0.3857, + "step": 23316 + }, + { + "epoch": 0.40290641415537737, + "grad_norm": 0.772034721257656, + "learning_rate": 1.355148234737009e-05, + "loss": 0.4594, + "step": 23317 + }, + { + "epoch": 0.4029236936687863, + "grad_norm": 1.950241327210457, + "learning_rate": 1.3550959175830065e-05, + "loss": 0.493, + "step": 23318 + }, + { + "epoch": 0.4029409731821952, + "grad_norm": 0.8791708678340032, + "learning_rate": 1.3550435993168164e-05, + "loss": 0.5763, + "step": 23319 + }, + { + "epoch": 0.4029582526956041, + "grad_norm": 1.5462105434152944, + "learning_rate": 1.3549912799386031e-05, + "loss": 0.5212, + "step": 23320 + }, + { + "epoch": 0.402975532209013, + "grad_norm": 0.8390904130758808, + "learning_rate": 1.3549389594485301e-05, + "loss": 0.4412, + "step": 23321 + }, + { + "epoch": 0.4029928117224219, + "grad_norm": 0.8734862509654636, + "learning_rate": 1.3548866378467611e-05, + "loss": 0.485, + "step": 23322 + }, + { + "epoch": 0.4030100912358308, + "grad_norm": 1.3056401101392796, + "learning_rate": 1.3548343151334604e-05, + "loss": 0.3049, + "step": 23323 + }, + { + "epoch": 0.4030273707492397, + "grad_norm": 0.9932170780451539, + "learning_rate": 1.3547819913087913e-05, + "loss": 0.5335, + "step": 23324 + }, + { + "epoch": 0.40304465026264863, + "grad_norm": 0.8835449039078453, + "learning_rate": 1.3547296663729185e-05, + "loss": 0.5995, + "step": 23325 + }, + { + "epoch": 0.4030619297760575, + "grad_norm": 1.0726944589473577, + "learning_rate": 1.3546773403260053e-05, + "loss": 0.563, + "step": 23326 + }, + { + "epoch": 0.4030792092894664, + "grad_norm": 0.9671109708516972, + "learning_rate": 1.3546250131682157e-05, + "loss": 0.5415, + "step": 23327 + }, + { + "epoch": 0.4030964888028753, + "grad_norm": 0.695053619987279, + "learning_rate": 1.3545726848997136e-05, + "loss": 0.6307, + "step": 23328 + }, + { + "epoch": 0.4031137683162842, + "grad_norm": 1.0159982271372678, + "learning_rate": 1.354520355520663e-05, + "loss": 0.6801, + "step": 23329 + }, + { + "epoch": 0.4031310478296931, + "grad_norm": 0.8244986226566353, + "learning_rate": 1.3544680250312272e-05, + "loss": 0.485, + "step": 23330 + }, + { + "epoch": 0.403148327343102, + "grad_norm": 0.9034996256835488, + "learning_rate": 1.3544156934315711e-05, + "loss": 0.5584, + "step": 23331 + }, + { + "epoch": 0.4031656068565109, + "grad_norm": 0.9766205216304532, + "learning_rate": 1.3543633607218581e-05, + "loss": 0.4153, + "step": 23332 + }, + { + "epoch": 0.40318288636991984, + "grad_norm": 0.8925290892917206, + "learning_rate": 1.3543110269022519e-05, + "loss": 0.4161, + "step": 23333 + }, + { + "epoch": 0.40320016588332874, + "grad_norm": 0.7963508866449436, + "learning_rate": 1.3542586919729169e-05, + "loss": 0.3968, + "step": 23334 + }, + { + "epoch": 0.40321744539673765, + "grad_norm": 0.8461865311483056, + "learning_rate": 1.3542063559340166e-05, + "loss": 0.3368, + "step": 23335 + }, + { + "epoch": 0.4032347249101465, + "grad_norm": 0.8250453594868198, + "learning_rate": 1.3541540187857147e-05, + "loss": 0.3965, + "step": 23336 + }, + { + "epoch": 0.4032520044235554, + "grad_norm": 1.2698101552583707, + "learning_rate": 1.3541016805281762e-05, + "loss": 0.6841, + "step": 23337 + }, + { + "epoch": 0.4032692839369643, + "grad_norm": 0.9603883315807948, + "learning_rate": 1.3540493411615638e-05, + "loss": 0.5989, + "step": 23338 + }, + { + "epoch": 0.4032865634503732, + "grad_norm": 1.8443156874243083, + "learning_rate": 1.3539970006860423e-05, + "loss": 0.3664, + "step": 23339 + }, + { + "epoch": 0.40330384296378213, + "grad_norm": 0.7065037887170718, + "learning_rate": 1.3539446591017753e-05, + "loss": 0.3768, + "step": 23340 + }, + { + "epoch": 0.40332112247719104, + "grad_norm": 0.39899785348215533, + "learning_rate": 1.3538923164089264e-05, + "loss": 0.698, + "step": 23341 + }, + { + "epoch": 0.40333840199059995, + "grad_norm": 1.5610766243076535, + "learning_rate": 1.3538399726076601e-05, + "loss": 0.4804, + "step": 23342 + }, + { + "epoch": 0.40335568150400886, + "grad_norm": 0.7736003829097688, + "learning_rate": 1.35378762769814e-05, + "loss": 0.3693, + "step": 23343 + }, + { + "epoch": 0.40337296101741776, + "grad_norm": 0.9740148708366199, + "learning_rate": 1.35373528168053e-05, + "loss": 0.5055, + "step": 23344 + }, + { + "epoch": 0.40339024053082667, + "grad_norm": 1.121577593961754, + "learning_rate": 1.3536829345549947e-05, + "loss": 0.3857, + "step": 23345 + }, + { + "epoch": 0.4034075200442356, + "grad_norm": 1.202759444470987, + "learning_rate": 1.3536305863216974e-05, + "loss": 0.5204, + "step": 23346 + }, + { + "epoch": 0.40342479955764443, + "grad_norm": 1.403639118591309, + "learning_rate": 1.353578236980802e-05, + "loss": 0.4972, + "step": 23347 + }, + { + "epoch": 0.40344207907105334, + "grad_norm": 1.119364857351276, + "learning_rate": 1.353525886532473e-05, + "loss": 0.5991, + "step": 23348 + }, + { + "epoch": 0.40345935858446225, + "grad_norm": 1.1718471798067924, + "learning_rate": 1.3534735349768739e-05, + "loss": 0.4845, + "step": 23349 + }, + { + "epoch": 0.40347663809787115, + "grad_norm": 0.7398388612650668, + "learning_rate": 1.3534211823141687e-05, + "loss": 0.6168, + "step": 23350 + }, + { + "epoch": 0.40349391761128006, + "grad_norm": 1.4013668452670505, + "learning_rate": 1.3533688285445217e-05, + "loss": 0.5131, + "step": 23351 + }, + { + "epoch": 0.40351119712468897, + "grad_norm": 0.5495606559470609, + "learning_rate": 1.3533164736680966e-05, + "loss": 0.81, + "step": 23352 + }, + { + "epoch": 0.4035284766380979, + "grad_norm": 1.2045708435810034, + "learning_rate": 1.3532641176850571e-05, + "loss": 0.5966, + "step": 23353 + }, + { + "epoch": 0.4035457561515068, + "grad_norm": 1.4438854073531657, + "learning_rate": 1.3532117605955682e-05, + "loss": 0.5774, + "step": 23354 + }, + { + "epoch": 0.4035630356649157, + "grad_norm": 1.178105370023776, + "learning_rate": 1.3531594023997929e-05, + "loss": 0.3755, + "step": 23355 + }, + { + "epoch": 0.4035803151783246, + "grad_norm": 1.2075132641214346, + "learning_rate": 1.3531070430978952e-05, + "loss": 0.4148, + "step": 23356 + }, + { + "epoch": 0.4035975946917335, + "grad_norm": 1.0939070335850463, + "learning_rate": 1.35305468269004e-05, + "loss": 0.3726, + "step": 23357 + }, + { + "epoch": 0.40361487420514236, + "grad_norm": 1.3007191444593686, + "learning_rate": 1.3530023211763902e-05, + "loss": 0.5248, + "step": 23358 + }, + { + "epoch": 0.40363215371855127, + "grad_norm": 0.7801574175856264, + "learning_rate": 1.3529499585571107e-05, + "loss": 0.3258, + "step": 23359 + }, + { + "epoch": 0.4036494332319602, + "grad_norm": 1.6386963069458653, + "learning_rate": 1.3528975948323648e-05, + "loss": 0.7026, + "step": 23360 + }, + { + "epoch": 0.4036667127453691, + "grad_norm": 1.047994988339362, + "learning_rate": 1.352845230002317e-05, + "loss": 0.5315, + "step": 23361 + }, + { + "epoch": 0.403683992258778, + "grad_norm": 0.8222419306183406, + "learning_rate": 1.3527928640671311e-05, + "loss": 0.6577, + "step": 23362 + }, + { + "epoch": 0.4037012717721869, + "grad_norm": 1.3572565838646737, + "learning_rate": 1.352740497026971e-05, + "loss": 0.3737, + "step": 23363 + }, + { + "epoch": 0.4037185512855958, + "grad_norm": 1.690034461477676, + "learning_rate": 1.3526881288820009e-05, + "loss": 0.388, + "step": 23364 + }, + { + "epoch": 0.4037358307990047, + "grad_norm": 0.7199273912001631, + "learning_rate": 1.3526357596323848e-05, + "loss": 0.4831, + "step": 23365 + }, + { + "epoch": 0.4037531103124136, + "grad_norm": 1.2024770454376477, + "learning_rate": 1.352583389278287e-05, + "loss": 0.4458, + "step": 23366 + }, + { + "epoch": 0.40377038982582253, + "grad_norm": 0.9869893709120553, + "learning_rate": 1.3525310178198707e-05, + "loss": 0.6765, + "step": 23367 + }, + { + "epoch": 0.4037876693392314, + "grad_norm": 1.0946850573931133, + "learning_rate": 1.3524786452573007e-05, + "loss": 0.4017, + "step": 23368 + }, + { + "epoch": 0.4038049488526403, + "grad_norm": 0.8877525146277363, + "learning_rate": 1.3524262715907412e-05, + "loss": 0.2745, + "step": 23369 + }, + { + "epoch": 0.4038222283660492, + "grad_norm": 1.1979979538281758, + "learning_rate": 1.3523738968203553e-05, + "loss": 0.6013, + "step": 23370 + }, + { + "epoch": 0.4038395078794581, + "grad_norm": 0.8697775405510791, + "learning_rate": 1.3523215209463078e-05, + "loss": 0.3684, + "step": 23371 + }, + { + "epoch": 0.403856787392867, + "grad_norm": 1.1487820708319363, + "learning_rate": 1.3522691439687626e-05, + "loss": 0.4676, + "step": 23372 + }, + { + "epoch": 0.4038740669062759, + "grad_norm": 1.305643647056347, + "learning_rate": 1.3522167658878832e-05, + "loss": 0.3908, + "step": 23373 + }, + { + "epoch": 0.4038913464196848, + "grad_norm": 0.7776552974967503, + "learning_rate": 1.3521643867038345e-05, + "loss": 0.3646, + "step": 23374 + }, + { + "epoch": 0.40390862593309373, + "grad_norm": 0.7900660575739582, + "learning_rate": 1.35211200641678e-05, + "loss": 0.3627, + "step": 23375 + }, + { + "epoch": 0.40392590544650264, + "grad_norm": 1.4337021991359422, + "learning_rate": 1.352059625026884e-05, + "loss": 0.5142, + "step": 23376 + }, + { + "epoch": 0.40394318495991155, + "grad_norm": 1.0487326943153166, + "learning_rate": 1.3520072425343107e-05, + "loss": 0.5361, + "step": 23377 + }, + { + "epoch": 0.40396046447332046, + "grad_norm": 1.619414899990846, + "learning_rate": 1.351954858939224e-05, + "loss": 0.4774, + "step": 23378 + }, + { + "epoch": 0.4039777439867293, + "grad_norm": 1.0735659147728827, + "learning_rate": 1.3519024742417876e-05, + "loss": 0.5146, + "step": 23379 + }, + { + "epoch": 0.4039950235001382, + "grad_norm": 0.744315572618668, + "learning_rate": 1.3518500884421664e-05, + "loss": 0.5151, + "step": 23380 + }, + { + "epoch": 0.4040123030135471, + "grad_norm": 0.7722033539037658, + "learning_rate": 1.3517977015405233e-05, + "loss": 0.3358, + "step": 23381 + }, + { + "epoch": 0.40402958252695603, + "grad_norm": 1.2844924685183337, + "learning_rate": 1.3517453135370236e-05, + "loss": 0.7171, + "step": 23382 + }, + { + "epoch": 0.40404686204036494, + "grad_norm": 1.6318871248282902, + "learning_rate": 1.3516929244318307e-05, + "loss": 0.4622, + "step": 23383 + }, + { + "epoch": 0.40406414155377385, + "grad_norm": 0.5845555377850264, + "learning_rate": 1.3516405342251084e-05, + "loss": 0.3221, + "step": 23384 + }, + { + "epoch": 0.40408142106718276, + "grad_norm": 0.986843525737235, + "learning_rate": 1.3515881429170219e-05, + "loss": 0.6057, + "step": 23385 + }, + { + "epoch": 0.40409870058059166, + "grad_norm": 0.8493858229289634, + "learning_rate": 1.3515357505077342e-05, + "loss": 0.5377, + "step": 23386 + }, + { + "epoch": 0.40411598009400057, + "grad_norm": 0.7923768756715945, + "learning_rate": 1.35148335699741e-05, + "loss": 0.4288, + "step": 23387 + }, + { + "epoch": 0.4041332596074095, + "grad_norm": 0.43417969062404255, + "learning_rate": 1.3514309623862132e-05, + "loss": 0.6068, + "step": 23388 + }, + { + "epoch": 0.40415053912081833, + "grad_norm": 0.8583492019103564, + "learning_rate": 1.351378566674308e-05, + "loss": 0.5197, + "step": 23389 + }, + { + "epoch": 0.40416781863422724, + "grad_norm": 1.3343697419596532, + "learning_rate": 1.3513261698618584e-05, + "loss": 0.3874, + "step": 23390 + }, + { + "epoch": 0.40418509814763615, + "grad_norm": 1.3249938682250408, + "learning_rate": 1.3512737719490283e-05, + "loss": 0.3983, + "step": 23391 + }, + { + "epoch": 0.40420237766104505, + "grad_norm": 0.9386481925400495, + "learning_rate": 1.3512213729359821e-05, + "loss": 0.3974, + "step": 23392 + }, + { + "epoch": 0.40421965717445396, + "grad_norm": 1.1881498232349257, + "learning_rate": 1.3511689728228838e-05, + "loss": 0.5077, + "step": 23393 + }, + { + "epoch": 0.40423693668786287, + "grad_norm": 1.1104487830821859, + "learning_rate": 1.351116571609898e-05, + "loss": 0.6823, + "step": 23394 + }, + { + "epoch": 0.4042542162012718, + "grad_norm": 1.4591881317026374, + "learning_rate": 1.351064169297188e-05, + "loss": 0.6039, + "step": 23395 + }, + { + "epoch": 0.4042714957146807, + "grad_norm": 1.1399922943019263, + "learning_rate": 1.3510117658849185e-05, + "loss": 0.5108, + "step": 23396 + }, + { + "epoch": 0.4042887752280896, + "grad_norm": 0.6562241694880259, + "learning_rate": 1.3509593613732534e-05, + "loss": 0.6002, + "step": 23397 + }, + { + "epoch": 0.4043060547414985, + "grad_norm": 0.5948676218083128, + "learning_rate": 1.350906955762357e-05, + "loss": 0.6745, + "step": 23398 + }, + { + "epoch": 0.4043233342549074, + "grad_norm": 0.9274070731796696, + "learning_rate": 1.3508545490523931e-05, + "loss": 0.5413, + "step": 23399 + }, + { + "epoch": 0.40434061376831626, + "grad_norm": 0.4848099341903767, + "learning_rate": 1.3508021412435268e-05, + "loss": 0.5971, + "step": 23400 + }, + { + "epoch": 0.40435789328172517, + "grad_norm": 0.8897977773363327, + "learning_rate": 1.350749732335921e-05, + "loss": 0.5709, + "step": 23401 + }, + { + "epoch": 0.4043751727951341, + "grad_norm": 0.5930196453566987, + "learning_rate": 1.3506973223297403e-05, + "loss": 0.5391, + "step": 23402 + }, + { + "epoch": 0.404392452308543, + "grad_norm": 1.6889094371743567, + "learning_rate": 1.3506449112251492e-05, + "loss": 0.5301, + "step": 23403 + }, + { + "epoch": 0.4044097318219519, + "grad_norm": 0.9526853191513838, + "learning_rate": 1.3505924990223114e-05, + "loss": 0.4807, + "step": 23404 + }, + { + "epoch": 0.4044270113353608, + "grad_norm": 1.2468367730457741, + "learning_rate": 1.3505400857213913e-05, + "loss": 0.4268, + "step": 23405 + }, + { + "epoch": 0.4044442908487697, + "grad_norm": 0.9518744681502695, + "learning_rate": 1.3504876713225529e-05, + "loss": 0.6679, + "step": 23406 + }, + { + "epoch": 0.4044615703621786, + "grad_norm": 1.069773480614663, + "learning_rate": 1.3504352558259606e-05, + "loss": 0.642, + "step": 23407 + }, + { + "epoch": 0.4044788498755875, + "grad_norm": 0.6248788709560011, + "learning_rate": 1.3503828392317785e-05, + "loss": 0.2786, + "step": 23408 + }, + { + "epoch": 0.40449612938899643, + "grad_norm": 0.8819364091123271, + "learning_rate": 1.3503304215401708e-05, + "loss": 0.4968, + "step": 23409 + }, + { + "epoch": 0.40451340890240534, + "grad_norm": 1.233556363352434, + "learning_rate": 1.3502780027513013e-05, + "loss": 0.4796, + "step": 23410 + }, + { + "epoch": 0.4045306884158142, + "grad_norm": 1.8613969080439314, + "learning_rate": 1.3502255828653348e-05, + "loss": 0.4116, + "step": 23411 + }, + { + "epoch": 0.4045479679292231, + "grad_norm": 1.2831333371309934, + "learning_rate": 1.350173161882435e-05, + "loss": 0.4434, + "step": 23412 + }, + { + "epoch": 0.404565247442632, + "grad_norm": 0.8541726589838707, + "learning_rate": 1.3501207398027661e-05, + "loss": 0.5286, + "step": 23413 + }, + { + "epoch": 0.4045825269560409, + "grad_norm": 1.293037833306329, + "learning_rate": 1.3500683166264925e-05, + "loss": 0.4278, + "step": 23414 + }, + { + "epoch": 0.4045998064694498, + "grad_norm": 1.673199545300493, + "learning_rate": 1.3500158923537785e-05, + "loss": 0.4951, + "step": 23415 + }, + { + "epoch": 0.4046170859828587, + "grad_norm": 0.9159076849451487, + "learning_rate": 1.3499634669847878e-05, + "loss": 0.4208, + "step": 23416 + }, + { + "epoch": 0.40463436549626763, + "grad_norm": 1.4874167014803201, + "learning_rate": 1.3499110405196853e-05, + "loss": 0.445, + "step": 23417 + }, + { + "epoch": 0.40465164500967654, + "grad_norm": 1.6377120127113054, + "learning_rate": 1.3498586129586349e-05, + "loss": 0.5823, + "step": 23418 + }, + { + "epoch": 0.40466892452308545, + "grad_norm": 1.2607643559223616, + "learning_rate": 1.3498061843018001e-05, + "loss": 0.6763, + "step": 23419 + }, + { + "epoch": 0.40468620403649436, + "grad_norm": 1.9836969116649052, + "learning_rate": 1.3497537545493464e-05, + "loss": 0.5111, + "step": 23420 + }, + { + "epoch": 0.4047034835499032, + "grad_norm": 0.9630763566495308, + "learning_rate": 1.3497013237014371e-05, + "loss": 0.4005, + "step": 23421 + }, + { + "epoch": 0.4047207630633121, + "grad_norm": 0.7958443231219705, + "learning_rate": 1.3496488917582366e-05, + "loss": 0.5727, + "step": 23422 + }, + { + "epoch": 0.404738042576721, + "grad_norm": 1.2310956304892462, + "learning_rate": 1.3495964587199095e-05, + "loss": 0.4948, + "step": 23423 + }, + { + "epoch": 0.40475532209012993, + "grad_norm": 1.8038389561521229, + "learning_rate": 1.3495440245866194e-05, + "loss": 0.3937, + "step": 23424 + }, + { + "epoch": 0.40477260160353884, + "grad_norm": 0.952296568691277, + "learning_rate": 1.349491589358531e-05, + "loss": 0.3517, + "step": 23425 + }, + { + "epoch": 0.40478988111694775, + "grad_norm": 0.9442100688244238, + "learning_rate": 1.3494391530358084e-05, + "loss": 0.4336, + "step": 23426 + }, + { + "epoch": 0.40480716063035665, + "grad_norm": 1.5011454697359794, + "learning_rate": 1.3493867156186156e-05, + "loss": 0.5513, + "step": 23427 + }, + { + "epoch": 0.40482444014376556, + "grad_norm": 0.9052269042742439, + "learning_rate": 1.3493342771071171e-05, + "loss": 0.3854, + "step": 23428 + }, + { + "epoch": 0.40484171965717447, + "grad_norm": 0.8692840527157196, + "learning_rate": 1.3492818375014774e-05, + "loss": 0.4106, + "step": 23429 + }, + { + "epoch": 0.4048589991705834, + "grad_norm": 0.9219132183370127, + "learning_rate": 1.3492293968018604e-05, + "loss": 0.5087, + "step": 23430 + }, + { + "epoch": 0.4048762786839923, + "grad_norm": 0.9888291503793427, + "learning_rate": 1.3491769550084301e-05, + "loss": 0.4005, + "step": 23431 + }, + { + "epoch": 0.40489355819740114, + "grad_norm": 1.484778229248748, + "learning_rate": 1.3491245121213513e-05, + "loss": 0.4752, + "step": 23432 + }, + { + "epoch": 0.40491083771081005, + "grad_norm": 1.5531523829777836, + "learning_rate": 1.3490720681407878e-05, + "loss": 0.3984, + "step": 23433 + }, + { + "epoch": 0.40492811722421895, + "grad_norm": 0.8544427249292856, + "learning_rate": 1.3490196230669041e-05, + "loss": 0.6449, + "step": 23434 + }, + { + "epoch": 0.40494539673762786, + "grad_norm": 0.7993550800202749, + "learning_rate": 1.3489671768998647e-05, + "loss": 0.4638, + "step": 23435 + }, + { + "epoch": 0.40496267625103677, + "grad_norm": 0.9146683986152779, + "learning_rate": 1.348914729639833e-05, + "loss": 0.6233, + "step": 23436 + }, + { + "epoch": 0.4049799557644457, + "grad_norm": 0.754932851715796, + "learning_rate": 1.3488622812869742e-05, + "loss": 0.8138, + "step": 23437 + }, + { + "epoch": 0.4049972352778546, + "grad_norm": 0.8417916484980762, + "learning_rate": 1.3488098318414525e-05, + "loss": 0.4787, + "step": 23438 + }, + { + "epoch": 0.4050145147912635, + "grad_norm": 1.0898748698468215, + "learning_rate": 1.3487573813034314e-05, + "loss": 0.4087, + "step": 23439 + }, + { + "epoch": 0.4050317943046724, + "grad_norm": 1.2138764129023791, + "learning_rate": 1.3487049296730758e-05, + "loss": 0.283, + "step": 23440 + }, + { + "epoch": 0.4050490738180813, + "grad_norm": 0.6812231010446126, + "learning_rate": 1.34865247695055e-05, + "loss": 0.2729, + "step": 23441 + }, + { + "epoch": 0.40506635333149016, + "grad_norm": 0.8713933614790518, + "learning_rate": 1.3486000231360181e-05, + "loss": 0.4142, + "step": 23442 + }, + { + "epoch": 0.40508363284489907, + "grad_norm": 1.0492691371010334, + "learning_rate": 1.3485475682296445e-05, + "loss": 0.4285, + "step": 23443 + }, + { + "epoch": 0.405100912358308, + "grad_norm": 0.7493138952809402, + "learning_rate": 1.3484951122315933e-05, + "loss": 0.2268, + "step": 23444 + }, + { + "epoch": 0.4051181918717169, + "grad_norm": 1.0393284361212247, + "learning_rate": 1.348442655142029e-05, + "loss": 0.4408, + "step": 23445 + }, + { + "epoch": 0.4051354713851258, + "grad_norm": 1.590638367852534, + "learning_rate": 1.3483901969611157e-05, + "loss": 0.5571, + "step": 23446 + }, + { + "epoch": 0.4051527508985347, + "grad_norm": 1.4049886790641297, + "learning_rate": 1.3483377376890179e-05, + "loss": 0.4533, + "step": 23447 + }, + { + "epoch": 0.4051700304119436, + "grad_norm": 0.8528331198145733, + "learning_rate": 1.3482852773258998e-05, + "loss": 0.3444, + "step": 23448 + }, + { + "epoch": 0.4051873099253525, + "grad_norm": 1.100167595202711, + "learning_rate": 1.348232815871926e-05, + "loss": 0.354, + "step": 23449 + }, + { + "epoch": 0.4052045894387614, + "grad_norm": 1.222365407861242, + "learning_rate": 1.3481803533272604e-05, + "loss": 0.3499, + "step": 23450 + }, + { + "epoch": 0.4052218689521703, + "grad_norm": 1.0808884001279664, + "learning_rate": 1.3481278896920676e-05, + "loss": 0.4746, + "step": 23451 + }, + { + "epoch": 0.40523914846557924, + "grad_norm": 0.7637257147735695, + "learning_rate": 1.3480754249665115e-05, + "loss": 0.6112, + "step": 23452 + }, + { + "epoch": 0.4052564279789881, + "grad_norm": 0.7613261704005704, + "learning_rate": 1.3480229591507567e-05, + "loss": 0.5012, + "step": 23453 + }, + { + "epoch": 0.405273707492397, + "grad_norm": 1.443932253059745, + "learning_rate": 1.3479704922449676e-05, + "loss": 0.4286, + "step": 23454 + }, + { + "epoch": 0.4052909870058059, + "grad_norm": 1.0402140007166023, + "learning_rate": 1.3479180242493088e-05, + "loss": 0.5305, + "step": 23455 + }, + { + "epoch": 0.4053082665192148, + "grad_norm": 0.5405378341703985, + "learning_rate": 1.3478655551639438e-05, + "loss": 0.4374, + "step": 23456 + }, + { + "epoch": 0.4053255460326237, + "grad_norm": 0.8738399522890888, + "learning_rate": 1.3478130849890378e-05, + "loss": 0.4738, + "step": 23457 + }, + { + "epoch": 0.4053428255460326, + "grad_norm": 0.7622905705004392, + "learning_rate": 1.3477606137247548e-05, + "loss": 0.4463, + "step": 23458 + }, + { + "epoch": 0.40536010505944153, + "grad_norm": 0.8080341073399016, + "learning_rate": 1.3477081413712587e-05, + "loss": 0.4364, + "step": 23459 + }, + { + "epoch": 0.40537738457285044, + "grad_norm": 0.5642035752299678, + "learning_rate": 1.3476556679287147e-05, + "loss": 0.3139, + "step": 23460 + }, + { + "epoch": 0.40539466408625935, + "grad_norm": 1.5916325573665269, + "learning_rate": 1.3476031933972866e-05, + "loss": 0.835, + "step": 23461 + }, + { + "epoch": 0.40541194359966826, + "grad_norm": 0.9919183913328968, + "learning_rate": 1.3475507177771391e-05, + "loss": 0.5445, + "step": 23462 + }, + { + "epoch": 0.4054292231130771, + "grad_norm": 1.071304291757055, + "learning_rate": 1.3474982410684363e-05, + "loss": 0.3778, + "step": 23463 + }, + { + "epoch": 0.405446502626486, + "grad_norm": 1.4794587318438728, + "learning_rate": 1.3474457632713422e-05, + "loss": 0.4178, + "step": 23464 + }, + { + "epoch": 0.4054637821398949, + "grad_norm": 0.89376455932606, + "learning_rate": 1.3473932843860218e-05, + "loss": 0.4884, + "step": 23465 + }, + { + "epoch": 0.40548106165330383, + "grad_norm": 0.7847880031099881, + "learning_rate": 1.3473408044126392e-05, + "loss": 0.4332, + "step": 23466 + }, + { + "epoch": 0.40549834116671274, + "grad_norm": 0.9692871471640596, + "learning_rate": 1.3472883233513588e-05, + "loss": 0.4023, + "step": 23467 + }, + { + "epoch": 0.40551562068012165, + "grad_norm": 1.1761653462697987, + "learning_rate": 1.3472358412023451e-05, + "loss": 0.553, + "step": 23468 + }, + { + "epoch": 0.40553290019353055, + "grad_norm": 1.4014058477319296, + "learning_rate": 1.3471833579657623e-05, + "loss": 0.5143, + "step": 23469 + }, + { + "epoch": 0.40555017970693946, + "grad_norm": 1.202714192381173, + "learning_rate": 1.347130873641775e-05, + "loss": 0.5109, + "step": 23470 + }, + { + "epoch": 0.40556745922034837, + "grad_norm": 0.817546632028487, + "learning_rate": 1.347078388230547e-05, + "loss": 0.6322, + "step": 23471 + }, + { + "epoch": 0.4055847387337573, + "grad_norm": 0.7730209930510623, + "learning_rate": 1.3470259017322435e-05, + "loss": 0.384, + "step": 23472 + }, + { + "epoch": 0.4056020182471662, + "grad_norm": 0.7330447209207265, + "learning_rate": 1.3469734141470283e-05, + "loss": 0.4396, + "step": 23473 + }, + { + "epoch": 0.40561929776057504, + "grad_norm": 1.4550946470459136, + "learning_rate": 1.3469209254750662e-05, + "loss": 0.4783, + "step": 23474 + }, + { + "epoch": 0.40563657727398394, + "grad_norm": 1.0778114875694647, + "learning_rate": 1.3468684357165211e-05, + "loss": 0.5776, + "step": 23475 + }, + { + "epoch": 0.40565385678739285, + "grad_norm": 1.5633900064249344, + "learning_rate": 1.3468159448715579e-05, + "loss": 0.6206, + "step": 23476 + }, + { + "epoch": 0.40567113630080176, + "grad_norm": 0.7327811405874026, + "learning_rate": 1.3467634529403406e-05, + "loss": 0.331, + "step": 23477 + }, + { + "epoch": 0.40568841581421067, + "grad_norm": 0.9691814412952519, + "learning_rate": 1.3467109599230341e-05, + "loss": 0.2604, + "step": 23478 + }, + { + "epoch": 0.4057056953276196, + "grad_norm": 1.0033440347232165, + "learning_rate": 1.3466584658198021e-05, + "loss": 0.6277, + "step": 23479 + }, + { + "epoch": 0.4057229748410285, + "grad_norm": 0.8999935174884189, + "learning_rate": 1.3466059706308099e-05, + "loss": 0.4708, + "step": 23480 + }, + { + "epoch": 0.4057402543544374, + "grad_norm": 0.7586858698256086, + "learning_rate": 1.3465534743562213e-05, + "loss": 0.5542, + "step": 23481 + }, + { + "epoch": 0.4057575338678463, + "grad_norm": 1.2608117698785202, + "learning_rate": 1.346500976996201e-05, + "loss": 0.4091, + "step": 23482 + }, + { + "epoch": 0.4057748133812552, + "grad_norm": 1.315193075518827, + "learning_rate": 1.3464484785509132e-05, + "loss": 0.6268, + "step": 23483 + }, + { + "epoch": 0.4057920928946641, + "grad_norm": 0.8826546318624289, + "learning_rate": 1.3463959790205224e-05, + "loss": 0.5617, + "step": 23484 + }, + { + "epoch": 0.40580937240807297, + "grad_norm": 1.0818952551921903, + "learning_rate": 1.346343478405193e-05, + "loss": 0.5056, + "step": 23485 + }, + { + "epoch": 0.4058266519214819, + "grad_norm": 0.7867670801348745, + "learning_rate": 1.3462909767050896e-05, + "loss": 0.4889, + "step": 23486 + }, + { + "epoch": 0.4058439314348908, + "grad_norm": 0.9891865255778983, + "learning_rate": 1.3462384739203764e-05, + "loss": 0.4148, + "step": 23487 + }, + { + "epoch": 0.4058612109482997, + "grad_norm": 0.7202869581434254, + "learning_rate": 1.346185970051218e-05, + "loss": 0.2738, + "step": 23488 + }, + { + "epoch": 0.4058784904617086, + "grad_norm": 1.5185849756098393, + "learning_rate": 1.346133465097779e-05, + "loss": 0.3542, + "step": 23489 + }, + { + "epoch": 0.4058957699751175, + "grad_norm": 1.0155573516819107, + "learning_rate": 1.3460809590602238e-05, + "loss": 0.4235, + "step": 23490 + }, + { + "epoch": 0.4059130494885264, + "grad_norm": 0.7248070092883255, + "learning_rate": 1.3460284519387163e-05, + "loss": 0.5687, + "step": 23491 + }, + { + "epoch": 0.4059303290019353, + "grad_norm": 0.9387653345701028, + "learning_rate": 1.345975943733422e-05, + "loss": 0.3829, + "step": 23492 + }, + { + "epoch": 0.4059476085153442, + "grad_norm": 1.0691208244042767, + "learning_rate": 1.3459234344445043e-05, + "loss": 0.6211, + "step": 23493 + }, + { + "epoch": 0.40596488802875313, + "grad_norm": 1.0418858966252986, + "learning_rate": 1.3458709240721282e-05, + "loss": 0.3451, + "step": 23494 + }, + { + "epoch": 0.405982167542162, + "grad_norm": 0.6569876870337208, + "learning_rate": 1.3458184126164582e-05, + "loss": 0.2884, + "step": 23495 + }, + { + "epoch": 0.4059994470555709, + "grad_norm": 0.9258551811107175, + "learning_rate": 1.3457659000776585e-05, + "loss": 0.3669, + "step": 23496 + }, + { + "epoch": 0.4060167265689798, + "grad_norm": 1.4511211306120184, + "learning_rate": 1.3457133864558935e-05, + "loss": 0.5292, + "step": 23497 + }, + { + "epoch": 0.4060340060823887, + "grad_norm": 1.1194733222322597, + "learning_rate": 1.3456608717513284e-05, + "loss": 0.5843, + "step": 23498 + }, + { + "epoch": 0.4060512855957976, + "grad_norm": 0.8677196802805041, + "learning_rate": 1.3456083559641268e-05, + "loss": 0.5282, + "step": 23499 + }, + { + "epoch": 0.4060685651092065, + "grad_norm": 0.6791381508662832, + "learning_rate": 1.3455558390944539e-05, + "loss": 0.3975, + "step": 23500 + }, + { + "epoch": 0.40608584462261543, + "grad_norm": 1.034418941777635, + "learning_rate": 1.3455033211424736e-05, + "loss": 0.4966, + "step": 23501 + }, + { + "epoch": 0.40610312413602434, + "grad_norm": 0.9006321885051176, + "learning_rate": 1.3454508021083505e-05, + "loss": 0.3338, + "step": 23502 + }, + { + "epoch": 0.40612040364943325, + "grad_norm": 0.6322016065228131, + "learning_rate": 1.3453982819922498e-05, + "loss": 0.2724, + "step": 23503 + }, + { + "epoch": 0.40613768316284216, + "grad_norm": 0.9967146765171329, + "learning_rate": 1.3453457607943347e-05, + "loss": 0.4309, + "step": 23504 + }, + { + "epoch": 0.40615496267625106, + "grad_norm": 0.6408322933400821, + "learning_rate": 1.345293238514771e-05, + "loss": 0.3465, + "step": 23505 + }, + { + "epoch": 0.4061722421896599, + "grad_norm": 0.8827638610801496, + "learning_rate": 1.3452407151537226e-05, + "loss": 0.4739, + "step": 23506 + }, + { + "epoch": 0.4061895217030688, + "grad_norm": 1.0167098039945948, + "learning_rate": 1.3451881907113537e-05, + "loss": 0.4558, + "step": 23507 + }, + { + "epoch": 0.40620680121647773, + "grad_norm": 0.40694105854456, + "learning_rate": 1.3451356651878294e-05, + "loss": 0.5315, + "step": 23508 + }, + { + "epoch": 0.40622408072988664, + "grad_norm": 1.015633162458917, + "learning_rate": 1.3450831385833138e-05, + "loss": 0.4839, + "step": 23509 + }, + { + "epoch": 0.40624136024329555, + "grad_norm": 0.8324620813966548, + "learning_rate": 1.3450306108979718e-05, + "loss": 0.4705, + "step": 23510 + }, + { + "epoch": 0.40625863975670445, + "grad_norm": 0.7898971530222121, + "learning_rate": 1.3449780821319676e-05, + "loss": 0.528, + "step": 23511 + }, + { + "epoch": 0.40627591927011336, + "grad_norm": 0.9749267259057914, + "learning_rate": 1.3449255522854658e-05, + "loss": 0.4605, + "step": 23512 + }, + { + "epoch": 0.40629319878352227, + "grad_norm": 0.9368027378773207, + "learning_rate": 1.344873021358631e-05, + "loss": 0.442, + "step": 23513 + }, + { + "epoch": 0.4063104782969312, + "grad_norm": 0.8052165867756618, + "learning_rate": 1.3448204893516276e-05, + "loss": 0.3121, + "step": 23514 + }, + { + "epoch": 0.4063277578103401, + "grad_norm": 0.729625111451112, + "learning_rate": 1.3447679562646205e-05, + "loss": 0.4845, + "step": 23515 + }, + { + "epoch": 0.40634503732374894, + "grad_norm": 1.7322129095437262, + "learning_rate": 1.3447154220977736e-05, + "loss": 0.5245, + "step": 23516 + }, + { + "epoch": 0.40636231683715784, + "grad_norm": 1.2120253961502856, + "learning_rate": 1.344662886851252e-05, + "loss": 0.6019, + "step": 23517 + }, + { + "epoch": 0.40637959635056675, + "grad_norm": 0.6453869389125917, + "learning_rate": 1.34461035052522e-05, + "loss": 0.5127, + "step": 23518 + }, + { + "epoch": 0.40639687586397566, + "grad_norm": 1.1569764864770435, + "learning_rate": 1.3445578131198421e-05, + "loss": 0.4696, + "step": 23519 + }, + { + "epoch": 0.40641415537738457, + "grad_norm": 1.5957181171637538, + "learning_rate": 1.3445052746352832e-05, + "loss": 0.3304, + "step": 23520 + }, + { + "epoch": 0.4064314348907935, + "grad_norm": 1.4536097068530007, + "learning_rate": 1.3444527350717074e-05, + "loss": 0.5607, + "step": 23521 + }, + { + "epoch": 0.4064487144042024, + "grad_norm": 0.8035700473091194, + "learning_rate": 1.3444001944292792e-05, + "loss": 0.5084, + "step": 23522 + }, + { + "epoch": 0.4064659939176113, + "grad_norm": 1.2937789218686506, + "learning_rate": 1.344347652708164e-05, + "loss": 0.6368, + "step": 23523 + }, + { + "epoch": 0.4064832734310202, + "grad_norm": 1.2705465906810656, + "learning_rate": 1.3442951099085253e-05, + "loss": 0.4155, + "step": 23524 + }, + { + "epoch": 0.4065005529444291, + "grad_norm": 2.005342430517118, + "learning_rate": 1.3442425660305285e-05, + "loss": 0.558, + "step": 23525 + }, + { + "epoch": 0.406517832457838, + "grad_norm": 0.8464938298882667, + "learning_rate": 1.3441900210743377e-05, + "loss": 0.489, + "step": 23526 + }, + { + "epoch": 0.40653511197124687, + "grad_norm": 1.064171351892648, + "learning_rate": 1.3441374750401174e-05, + "loss": 0.3812, + "step": 23527 + }, + { + "epoch": 0.4065523914846558, + "grad_norm": 0.8528140678872055, + "learning_rate": 1.3440849279280326e-05, + "loss": 0.3931, + "step": 23528 + }, + { + "epoch": 0.4065696709980647, + "grad_norm": 0.48808803943196855, + "learning_rate": 1.3440323797382475e-05, + "loss": 0.5586, + "step": 23529 + }, + { + "epoch": 0.4065869505114736, + "grad_norm": 0.9540535212155132, + "learning_rate": 1.343979830470927e-05, + "loss": 0.3343, + "step": 23530 + }, + { + "epoch": 0.4066042300248825, + "grad_norm": 1.8536761096147945, + "learning_rate": 1.3439272801262353e-05, + "loss": 0.3767, + "step": 23531 + }, + { + "epoch": 0.4066215095382914, + "grad_norm": 0.8369451612122076, + "learning_rate": 1.3438747287043375e-05, + "loss": 0.4381, + "step": 23532 + }, + { + "epoch": 0.4066387890517003, + "grad_norm": 0.645745149428596, + "learning_rate": 1.3438221762053977e-05, + "loss": 0.521, + "step": 23533 + }, + { + "epoch": 0.4066560685651092, + "grad_norm": 0.742484182249749, + "learning_rate": 1.3437696226295808e-05, + "loss": 0.413, + "step": 23534 + }, + { + "epoch": 0.4066733480785181, + "grad_norm": 1.2736009618647726, + "learning_rate": 1.3437170679770515e-05, + "loss": 0.4105, + "step": 23535 + }, + { + "epoch": 0.40669062759192703, + "grad_norm": 1.06388902546997, + "learning_rate": 1.3436645122479739e-05, + "loss": 0.519, + "step": 23536 + }, + { + "epoch": 0.4067079071053359, + "grad_norm": 0.7382607376315974, + "learning_rate": 1.3436119554425133e-05, + "loss": 0.3692, + "step": 23537 + }, + { + "epoch": 0.4067251866187448, + "grad_norm": 0.7203748599433151, + "learning_rate": 1.3435593975608337e-05, + "loss": 0.2769, + "step": 23538 + }, + { + "epoch": 0.4067424661321537, + "grad_norm": 0.6868704846231335, + "learning_rate": 1.3435068386030999e-05, + "loss": 0.3069, + "step": 23539 + }, + { + "epoch": 0.4067597456455626, + "grad_norm": 0.7358825062016475, + "learning_rate": 1.3434542785694766e-05, + "loss": 0.5725, + "step": 23540 + }, + { + "epoch": 0.4067770251589715, + "grad_norm": 1.1312867585635011, + "learning_rate": 1.3434017174601285e-05, + "loss": 0.4349, + "step": 23541 + }, + { + "epoch": 0.4067943046723804, + "grad_norm": 0.620475912910835, + "learning_rate": 1.3433491552752198e-05, + "loss": 0.2758, + "step": 23542 + }, + { + "epoch": 0.40681158418578933, + "grad_norm": 0.7829518336151479, + "learning_rate": 1.3432965920149159e-05, + "loss": 0.6409, + "step": 23543 + }, + { + "epoch": 0.40682886369919824, + "grad_norm": 0.6775859320315702, + "learning_rate": 1.343244027679381e-05, + "loss": 0.4305, + "step": 23544 + }, + { + "epoch": 0.40684614321260715, + "grad_norm": 1.1571107480887213, + "learning_rate": 1.3431914622687793e-05, + "loss": 0.3396, + "step": 23545 + }, + { + "epoch": 0.40686342272601606, + "grad_norm": 0.9569460539230114, + "learning_rate": 1.3431388957832763e-05, + "loss": 0.5094, + "step": 23546 + }, + { + "epoch": 0.40688070223942496, + "grad_norm": 1.0272799424539822, + "learning_rate": 1.3430863282230359e-05, + "loss": 0.7967, + "step": 23547 + }, + { + "epoch": 0.4068979817528338, + "grad_norm": 0.9669970298833563, + "learning_rate": 1.3430337595882232e-05, + "loss": 0.4309, + "step": 23548 + }, + { + "epoch": 0.4069152612662427, + "grad_norm": 0.9194932889096347, + "learning_rate": 1.3429811898790026e-05, + "loss": 0.5508, + "step": 23549 + }, + { + "epoch": 0.40693254077965163, + "grad_norm": 0.9672591147041106, + "learning_rate": 1.3429286190955391e-05, + "loss": 0.3627, + "step": 23550 + }, + { + "epoch": 0.40694982029306054, + "grad_norm": 0.6863692589685115, + "learning_rate": 1.3428760472379966e-05, + "loss": 0.4102, + "step": 23551 + }, + { + "epoch": 0.40696709980646945, + "grad_norm": 1.4875690207432517, + "learning_rate": 1.3428234743065405e-05, + "loss": 0.4254, + "step": 23552 + }, + { + "epoch": 0.40698437931987835, + "grad_norm": 0.878426355743375, + "learning_rate": 1.3427709003013353e-05, + "loss": 0.316, + "step": 23553 + }, + { + "epoch": 0.40700165883328726, + "grad_norm": 0.9504183450070562, + "learning_rate": 1.3427183252225457e-05, + "loss": 0.537, + "step": 23554 + }, + { + "epoch": 0.40701893834669617, + "grad_norm": 1.0194158727123594, + "learning_rate": 1.3426657490703363e-05, + "loss": 0.5135, + "step": 23555 + }, + { + "epoch": 0.4070362178601051, + "grad_norm": 1.3500286387727676, + "learning_rate": 1.3426131718448714e-05, + "loss": 0.3361, + "step": 23556 + }, + { + "epoch": 0.407053497373514, + "grad_norm": 0.9828560385534915, + "learning_rate": 1.342560593546316e-05, + "loss": 0.7041, + "step": 23557 + }, + { + "epoch": 0.4070707768869229, + "grad_norm": 0.47737324304862067, + "learning_rate": 1.342508014174835e-05, + "loss": 0.7529, + "step": 23558 + }, + { + "epoch": 0.40708805640033174, + "grad_norm": 1.2061400724568239, + "learning_rate": 1.3424554337305925e-05, + "loss": 0.4066, + "step": 23559 + }, + { + "epoch": 0.40710533591374065, + "grad_norm": 0.678862605151501, + "learning_rate": 1.3424028522137542e-05, + "loss": 0.3352, + "step": 23560 + }, + { + "epoch": 0.40712261542714956, + "grad_norm": 1.027517763902575, + "learning_rate": 1.3423502696244838e-05, + "loss": 0.46, + "step": 23561 + }, + { + "epoch": 0.40713989494055847, + "grad_norm": 1.8373337334213733, + "learning_rate": 1.3422976859629462e-05, + "loss": 0.588, + "step": 23562 + }, + { + "epoch": 0.4071571744539674, + "grad_norm": 1.3657878956406933, + "learning_rate": 1.3422451012293065e-05, + "loss": 0.6883, + "step": 23563 + }, + { + "epoch": 0.4071744539673763, + "grad_norm": 0.9665956945910016, + "learning_rate": 1.342192515423729e-05, + "loss": 0.5375, + "step": 23564 + }, + { + "epoch": 0.4071917334807852, + "grad_norm": 1.1941642970514381, + "learning_rate": 1.3421399285463787e-05, + "loss": 0.5327, + "step": 23565 + }, + { + "epoch": 0.4072090129941941, + "grad_norm": 1.4808738703440165, + "learning_rate": 1.34208734059742e-05, + "loss": 0.4219, + "step": 23566 + }, + { + "epoch": 0.407226292507603, + "grad_norm": 1.2821697957908975, + "learning_rate": 1.3420347515770175e-05, + "loss": 0.555, + "step": 23567 + }, + { + "epoch": 0.4072435720210119, + "grad_norm": 0.7184230366751341, + "learning_rate": 1.3419821614853365e-05, + "loss": 0.561, + "step": 23568 + }, + { + "epoch": 0.40726085153442076, + "grad_norm": 1.5035549915934474, + "learning_rate": 1.3419295703225415e-05, + "loss": 0.4194, + "step": 23569 + }, + { + "epoch": 0.4072781310478297, + "grad_norm": 0.7859137440224051, + "learning_rate": 1.3418769780887968e-05, + "loss": 0.2524, + "step": 23570 + }, + { + "epoch": 0.4072954105612386, + "grad_norm": 1.0876488591232352, + "learning_rate": 1.3418243847842676e-05, + "loss": 0.6403, + "step": 23571 + }, + { + "epoch": 0.4073126900746475, + "grad_norm": 1.250383306016336, + "learning_rate": 1.3417717904091184e-05, + "loss": 0.5264, + "step": 23572 + }, + { + "epoch": 0.4073299695880564, + "grad_norm": 0.8271210226931636, + "learning_rate": 1.3417191949635137e-05, + "loss": 0.4066, + "step": 23573 + }, + { + "epoch": 0.4073472491014653, + "grad_norm": 0.8153609705118139, + "learning_rate": 1.3416665984476187e-05, + "loss": 0.5865, + "step": 23574 + }, + { + "epoch": 0.4073645286148742, + "grad_norm": 0.44683931025779544, + "learning_rate": 1.3416140008615983e-05, + "loss": 0.7265, + "step": 23575 + }, + { + "epoch": 0.4073818081282831, + "grad_norm": 2.3980637998568346, + "learning_rate": 1.3415614022056164e-05, + "loss": 0.4631, + "step": 23576 + }, + { + "epoch": 0.407399087641692, + "grad_norm": 0.5468300423614062, + "learning_rate": 1.3415088024798386e-05, + "loss": 0.5459, + "step": 23577 + }, + { + "epoch": 0.40741636715510093, + "grad_norm": 0.5051958787162071, + "learning_rate": 1.3414562016844292e-05, + "loss": 0.4909, + "step": 23578 + }, + { + "epoch": 0.40743364666850984, + "grad_norm": 0.8060664015480462, + "learning_rate": 1.3414035998195528e-05, + "loss": 0.3833, + "step": 23579 + }, + { + "epoch": 0.4074509261819187, + "grad_norm": 1.2366220505197503, + "learning_rate": 1.3413509968853743e-05, + "loss": 0.5432, + "step": 23580 + }, + { + "epoch": 0.4074682056953276, + "grad_norm": 1.2286229351618263, + "learning_rate": 1.341298392882059e-05, + "loss": 0.3255, + "step": 23581 + }, + { + "epoch": 0.4074854852087365, + "grad_norm": 1.1274853791069654, + "learning_rate": 1.3412457878097706e-05, + "loss": 0.419, + "step": 23582 + }, + { + "epoch": 0.4075027647221454, + "grad_norm": 0.7507719058014435, + "learning_rate": 1.341193181668675e-05, + "loss": 0.3299, + "step": 23583 + }, + { + "epoch": 0.4075200442355543, + "grad_norm": 1.0866631111584908, + "learning_rate": 1.3411405744589363e-05, + "loss": 0.6537, + "step": 23584 + }, + { + "epoch": 0.40753732374896323, + "grad_norm": 0.7682135333849458, + "learning_rate": 1.3410879661807194e-05, + "loss": 0.4482, + "step": 23585 + }, + { + "epoch": 0.40755460326237214, + "grad_norm": 1.2234532060027126, + "learning_rate": 1.341035356834189e-05, + "loss": 0.5147, + "step": 23586 + }, + { + "epoch": 0.40757188277578105, + "grad_norm": 1.1465181753251958, + "learning_rate": 1.34098274641951e-05, + "loss": 0.3463, + "step": 23587 + }, + { + "epoch": 0.40758916228918995, + "grad_norm": 0.8377994415343232, + "learning_rate": 1.3409301349368468e-05, + "loss": 0.5235, + "step": 23588 + }, + { + "epoch": 0.40760644180259886, + "grad_norm": 1.0301129732122392, + "learning_rate": 1.3408775223863648e-05, + "loss": 0.4304, + "step": 23589 + }, + { + "epoch": 0.4076237213160077, + "grad_norm": 0.7863066063558998, + "learning_rate": 1.3408249087682286e-05, + "loss": 0.4166, + "step": 23590 + }, + { + "epoch": 0.4076410008294166, + "grad_norm": 0.974063436692749, + "learning_rate": 1.3407722940826028e-05, + "loss": 0.4991, + "step": 23591 + }, + { + "epoch": 0.40765828034282553, + "grad_norm": 0.9477198505340837, + "learning_rate": 1.3407196783296522e-05, + "loss": 0.6459, + "step": 23592 + }, + { + "epoch": 0.40767555985623444, + "grad_norm": 0.8299048315153087, + "learning_rate": 1.3406670615095415e-05, + "loss": 0.5689, + "step": 23593 + }, + { + "epoch": 0.40769283936964335, + "grad_norm": 1.3153203094093788, + "learning_rate": 1.3406144436224357e-05, + "loss": 0.567, + "step": 23594 + }, + { + "epoch": 0.40771011888305225, + "grad_norm": 0.7392682652284089, + "learning_rate": 1.3405618246685002e-05, + "loss": 0.341, + "step": 23595 + }, + { + "epoch": 0.40772739839646116, + "grad_norm": 0.7994120141036513, + "learning_rate": 1.3405092046478984e-05, + "loss": 0.478, + "step": 23596 + }, + { + "epoch": 0.40774467790987007, + "grad_norm": 1.0341999336579737, + "learning_rate": 1.3404565835607963e-05, + "loss": 0.5189, + "step": 23597 + }, + { + "epoch": 0.407761957423279, + "grad_norm": 0.9962041164772036, + "learning_rate": 1.3404039614073585e-05, + "loss": 0.3674, + "step": 23598 + }, + { + "epoch": 0.4077792369366879, + "grad_norm": 0.9774534727013748, + "learning_rate": 1.3403513381877491e-05, + "loss": 0.3142, + "step": 23599 + }, + { + "epoch": 0.4077965164500968, + "grad_norm": 1.0180663161824053, + "learning_rate": 1.3402987139021337e-05, + "loss": 0.4564, + "step": 23600 + }, + { + "epoch": 0.40781379596350564, + "grad_norm": 0.9002284100858023, + "learning_rate": 1.3402460885506769e-05, + "loss": 0.4997, + "step": 23601 + }, + { + "epoch": 0.40783107547691455, + "grad_norm": 1.047176510149029, + "learning_rate": 1.3401934621335432e-05, + "loss": 0.3283, + "step": 23602 + }, + { + "epoch": 0.40784835499032346, + "grad_norm": 0.5341830125091479, + "learning_rate": 1.3401408346508982e-05, + "loss": 0.7831, + "step": 23603 + }, + { + "epoch": 0.40786563450373237, + "grad_norm": 0.8122064650670816, + "learning_rate": 1.340088206102906e-05, + "loss": 0.4023, + "step": 23604 + }, + { + "epoch": 0.4078829140171413, + "grad_norm": 1.8348618903771514, + "learning_rate": 1.3400355764897318e-05, + "loss": 0.5037, + "step": 23605 + }, + { + "epoch": 0.4079001935305502, + "grad_norm": 1.0755024079211213, + "learning_rate": 1.3399829458115404e-05, + "loss": 0.5181, + "step": 23606 + }, + { + "epoch": 0.4079174730439591, + "grad_norm": 1.1688354027802559, + "learning_rate": 1.3399303140684965e-05, + "loss": 0.5215, + "step": 23607 + }, + { + "epoch": 0.407934752557368, + "grad_norm": 0.9682951118368217, + "learning_rate": 1.3398776812607648e-05, + "loss": 0.4835, + "step": 23608 + }, + { + "epoch": 0.4079520320707769, + "grad_norm": 1.0180763204960879, + "learning_rate": 1.3398250473885103e-05, + "loss": 0.4995, + "step": 23609 + }, + { + "epoch": 0.4079693115841858, + "grad_norm": 0.5088027625752962, + "learning_rate": 1.3397724124518984e-05, + "loss": 0.8728, + "step": 23610 + }, + { + "epoch": 0.40798659109759466, + "grad_norm": 1.4046308794949278, + "learning_rate": 1.339719776451093e-05, + "loss": 0.3533, + "step": 23611 + }, + { + "epoch": 0.40800387061100357, + "grad_norm": 1.5671229885322975, + "learning_rate": 1.33966713938626e-05, + "loss": 0.4507, + "step": 23612 + }, + { + "epoch": 0.4080211501244125, + "grad_norm": 0.7990934367253543, + "learning_rate": 1.3396145012575632e-05, + "loss": 0.403, + "step": 23613 + }, + { + "epoch": 0.4080384296378214, + "grad_norm": 1.5175761356028408, + "learning_rate": 1.339561862065168e-05, + "loss": 0.4758, + "step": 23614 + }, + { + "epoch": 0.4080557091512303, + "grad_norm": 0.780101261918082, + "learning_rate": 1.3395092218092396e-05, + "loss": 0.4631, + "step": 23615 + }, + { + "epoch": 0.4080729886646392, + "grad_norm": 0.7955515229523376, + "learning_rate": 1.3394565804899422e-05, + "loss": 0.4531, + "step": 23616 + }, + { + "epoch": 0.4080902681780481, + "grad_norm": 0.8244562880336767, + "learning_rate": 1.3394039381074413e-05, + "loss": 0.3953, + "step": 23617 + }, + { + "epoch": 0.408107547691457, + "grad_norm": 0.719722680661578, + "learning_rate": 1.3393512946619013e-05, + "loss": 0.4146, + "step": 23618 + }, + { + "epoch": 0.4081248272048659, + "grad_norm": 0.9501018899173332, + "learning_rate": 1.339298650153487e-05, + "loss": 0.5649, + "step": 23619 + }, + { + "epoch": 0.40814210671827483, + "grad_norm": 0.38266538424038915, + "learning_rate": 1.3392460045823639e-05, + "loss": 0.5529, + "step": 23620 + }, + { + "epoch": 0.40815938623168374, + "grad_norm": 1.3857022756437856, + "learning_rate": 1.3391933579486964e-05, + "loss": 0.3788, + "step": 23621 + }, + { + "epoch": 0.4081766657450926, + "grad_norm": 0.9077809797130709, + "learning_rate": 1.3391407102526495e-05, + "loss": 0.3735, + "step": 23622 + }, + { + "epoch": 0.4081939452585015, + "grad_norm": 1.6678829371426096, + "learning_rate": 1.3390880614943881e-05, + "loss": 0.5302, + "step": 23623 + }, + { + "epoch": 0.4082112247719104, + "grad_norm": 0.5568818164214642, + "learning_rate": 1.3390354116740773e-05, + "loss": 0.6945, + "step": 23624 + }, + { + "epoch": 0.4082285042853193, + "grad_norm": 1.7771733075081448, + "learning_rate": 1.3389827607918815e-05, + "loss": 0.5231, + "step": 23625 + }, + { + "epoch": 0.4082457837987282, + "grad_norm": 0.9060470574255831, + "learning_rate": 1.3389301088479664e-05, + "loss": 0.499, + "step": 23626 + }, + { + "epoch": 0.40826306331213713, + "grad_norm": 1.2946386375671692, + "learning_rate": 1.3388774558424961e-05, + "loss": 0.4616, + "step": 23627 + }, + { + "epoch": 0.40828034282554604, + "grad_norm": 1.4422871885408381, + "learning_rate": 1.338824801775636e-05, + "loss": 0.5863, + "step": 23628 + }, + { + "epoch": 0.40829762233895495, + "grad_norm": 0.7463713304664227, + "learning_rate": 1.3387721466475508e-05, + "loss": 0.2295, + "step": 23629 + }, + { + "epoch": 0.40831490185236385, + "grad_norm": 1.204818986935275, + "learning_rate": 1.3387194904584055e-05, + "loss": 0.5568, + "step": 23630 + }, + { + "epoch": 0.40833218136577276, + "grad_norm": 1.3375850442729038, + "learning_rate": 1.338666833208365e-05, + "loss": 0.462, + "step": 23631 + }, + { + "epoch": 0.40834946087918167, + "grad_norm": 1.6705610661271035, + "learning_rate": 1.338614174897594e-05, + "loss": 0.4987, + "step": 23632 + }, + { + "epoch": 0.4083667403925905, + "grad_norm": 1.2424342902119425, + "learning_rate": 1.3385615155262581e-05, + "loss": 0.4303, + "step": 23633 + }, + { + "epoch": 0.40838401990599943, + "grad_norm": 1.5783743720363386, + "learning_rate": 1.3385088550945215e-05, + "loss": 0.4115, + "step": 23634 + }, + { + "epoch": 0.40840129941940834, + "grad_norm": 0.9687910158693, + "learning_rate": 1.3384561936025495e-05, + "loss": 0.5698, + "step": 23635 + }, + { + "epoch": 0.40841857893281724, + "grad_norm": 0.8285333211629706, + "learning_rate": 1.3384035310505072e-05, + "loss": 0.4287, + "step": 23636 + }, + { + "epoch": 0.40843585844622615, + "grad_norm": 0.9939573604317855, + "learning_rate": 1.3383508674385593e-05, + "loss": 0.5841, + "step": 23637 + }, + { + "epoch": 0.40845313795963506, + "grad_norm": 1.3467478821279724, + "learning_rate": 1.3382982027668705e-05, + "loss": 0.572, + "step": 23638 + }, + { + "epoch": 0.40847041747304397, + "grad_norm": 0.6929552164281617, + "learning_rate": 1.338245537035606e-05, + "loss": 0.4331, + "step": 23639 + }, + { + "epoch": 0.4084876969864529, + "grad_norm": 1.7705979579280888, + "learning_rate": 1.3381928702449312e-05, + "loss": 0.5742, + "step": 23640 + }, + { + "epoch": 0.4085049764998618, + "grad_norm": 0.7530001506520156, + "learning_rate": 1.3381402023950103e-05, + "loss": 0.4532, + "step": 23641 + }, + { + "epoch": 0.4085222560132707, + "grad_norm": 0.9748260763791471, + "learning_rate": 1.3380875334860085e-05, + "loss": 0.3544, + "step": 23642 + }, + { + "epoch": 0.40853953552667954, + "grad_norm": 0.6756596083068006, + "learning_rate": 1.3380348635180911e-05, + "loss": 0.2496, + "step": 23643 + }, + { + "epoch": 0.40855681504008845, + "grad_norm": 1.1461450994898352, + "learning_rate": 1.3379821924914228e-05, + "loss": 0.3056, + "step": 23644 + }, + { + "epoch": 0.40857409455349736, + "grad_norm": 0.4878872952823934, + "learning_rate": 1.3379295204061683e-05, + "loss": 0.6097, + "step": 23645 + }, + { + "epoch": 0.40859137406690627, + "grad_norm": 0.8659110698981568, + "learning_rate": 1.337876847262493e-05, + "loss": 0.5375, + "step": 23646 + }, + { + "epoch": 0.4086086535803152, + "grad_norm": 0.9242493002951159, + "learning_rate": 1.3378241730605621e-05, + "loss": 0.458, + "step": 23647 + }, + { + "epoch": 0.4086259330937241, + "grad_norm": 0.8387841012648823, + "learning_rate": 1.3377714978005399e-05, + "loss": 0.2613, + "step": 23648 + }, + { + "epoch": 0.408643212607133, + "grad_norm": 0.7899948534993562, + "learning_rate": 1.3377188214825916e-05, + "loss": 0.3123, + "step": 23649 + }, + { + "epoch": 0.4086604921205419, + "grad_norm": 0.987029607632143, + "learning_rate": 1.3376661441068826e-05, + "loss": 0.3434, + "step": 23650 + }, + { + "epoch": 0.4086777716339508, + "grad_norm": 0.7020826045652889, + "learning_rate": 1.3376134656735771e-05, + "loss": 0.5623, + "step": 23651 + }, + { + "epoch": 0.4086950511473597, + "grad_norm": 0.7670174807800192, + "learning_rate": 1.3375607861828407e-05, + "loss": 0.5321, + "step": 23652 + }, + { + "epoch": 0.4087123306607686, + "grad_norm": 0.7657499008381677, + "learning_rate": 1.3375081056348387e-05, + "loss": 0.413, + "step": 23653 + }, + { + "epoch": 0.40872961017417747, + "grad_norm": 0.8232029496305795, + "learning_rate": 1.3374554240297353e-05, + "loss": 0.3062, + "step": 23654 + }, + { + "epoch": 0.4087468896875864, + "grad_norm": 1.1992224359730852, + "learning_rate": 1.3374027413676958e-05, + "loss": 0.3638, + "step": 23655 + }, + { + "epoch": 0.4087641692009953, + "grad_norm": 0.7867729394576405, + "learning_rate": 1.3373500576488855e-05, + "loss": 0.3041, + "step": 23656 + }, + { + "epoch": 0.4087814487144042, + "grad_norm": 0.9794530349430552, + "learning_rate": 1.3372973728734691e-05, + "loss": 0.4437, + "step": 23657 + }, + { + "epoch": 0.4087987282278131, + "grad_norm": 1.9371570055927245, + "learning_rate": 1.3372446870416119e-05, + "loss": 0.5033, + "step": 23658 + }, + { + "epoch": 0.408816007741222, + "grad_norm": 1.1294854317174232, + "learning_rate": 1.3371920001534781e-05, + "loss": 0.4758, + "step": 23659 + }, + { + "epoch": 0.4088332872546309, + "grad_norm": 1.6545210048035734, + "learning_rate": 1.3371393122092338e-05, + "loss": 0.5118, + "step": 23660 + }, + { + "epoch": 0.4088505667680398, + "grad_norm": 0.8925216573509959, + "learning_rate": 1.3370866232090436e-05, + "loss": 0.9321, + "step": 23661 + }, + { + "epoch": 0.40886784628144873, + "grad_norm": 0.9172725463708937, + "learning_rate": 1.3370339331530721e-05, + "loss": 0.5351, + "step": 23662 + }, + { + "epoch": 0.40888512579485764, + "grad_norm": 0.7845589150236252, + "learning_rate": 1.3369812420414852e-05, + "loss": 0.4012, + "step": 23663 + }, + { + "epoch": 0.4089024053082665, + "grad_norm": 0.9340397609014786, + "learning_rate": 1.3369285498744471e-05, + "loss": 0.2965, + "step": 23664 + }, + { + "epoch": 0.4089196848216754, + "grad_norm": 1.0115204817833183, + "learning_rate": 1.3368758566521232e-05, + "loss": 0.4266, + "step": 23665 + }, + { + "epoch": 0.4089369643350843, + "grad_norm": 0.4599239744383344, + "learning_rate": 1.3368231623746785e-05, + "loss": 0.6495, + "step": 23666 + }, + { + "epoch": 0.4089542438484932, + "grad_norm": 1.6150290713539053, + "learning_rate": 1.3367704670422785e-05, + "loss": 0.5928, + "step": 23667 + }, + { + "epoch": 0.4089715233619021, + "grad_norm": 0.7979738284410905, + "learning_rate": 1.3367177706550873e-05, + "loss": 0.3993, + "step": 23668 + }, + { + "epoch": 0.40898880287531103, + "grad_norm": 0.9211379002889132, + "learning_rate": 1.3366650732132706e-05, + "loss": 0.5814, + "step": 23669 + }, + { + "epoch": 0.40900608238871994, + "grad_norm": 0.8690414481728973, + "learning_rate": 1.3366123747169935e-05, + "loss": 0.2802, + "step": 23670 + }, + { + "epoch": 0.40902336190212885, + "grad_norm": 1.2319854632418967, + "learning_rate": 1.3365596751664205e-05, + "loss": 0.454, + "step": 23671 + }, + { + "epoch": 0.40904064141553775, + "grad_norm": 1.0334031249861553, + "learning_rate": 1.3365069745617171e-05, + "loss": 0.4504, + "step": 23672 + }, + { + "epoch": 0.40905792092894666, + "grad_norm": 1.7129254779014207, + "learning_rate": 1.3364542729030486e-05, + "loss": 0.3071, + "step": 23673 + }, + { + "epoch": 0.40907520044235557, + "grad_norm": 1.2771371135049068, + "learning_rate": 1.3364015701905792e-05, + "loss": 0.534, + "step": 23674 + }, + { + "epoch": 0.4090924799557644, + "grad_norm": 1.1741972159750305, + "learning_rate": 1.336348866424475e-05, + "loss": 0.6073, + "step": 23675 + }, + { + "epoch": 0.40910975946917333, + "grad_norm": 0.9928139572922514, + "learning_rate": 1.3362961616049006e-05, + "loss": 0.656, + "step": 23676 + }, + { + "epoch": 0.40912703898258224, + "grad_norm": 0.9431613766558741, + "learning_rate": 1.3362434557320207e-05, + "loss": 0.2639, + "step": 23677 + }, + { + "epoch": 0.40914431849599114, + "grad_norm": 0.48303492024297795, + "learning_rate": 1.336190748806001e-05, + "loss": 0.783, + "step": 23678 + }, + { + "epoch": 0.40916159800940005, + "grad_norm": 1.5773424620914211, + "learning_rate": 1.3361380408270061e-05, + "loss": 0.55, + "step": 23679 + }, + { + "epoch": 0.40917887752280896, + "grad_norm": 0.8869918193608635, + "learning_rate": 1.3360853317952015e-05, + "loss": 0.4339, + "step": 23680 + }, + { + "epoch": 0.40919615703621787, + "grad_norm": 0.9094315977962465, + "learning_rate": 1.3360326217107522e-05, + "loss": 0.3834, + "step": 23681 + }, + { + "epoch": 0.4092134365496268, + "grad_norm": 1.0354921772781354, + "learning_rate": 1.3359799105738229e-05, + "loss": 0.3333, + "step": 23682 + }, + { + "epoch": 0.4092307160630357, + "grad_norm": 1.721423663111334, + "learning_rate": 1.3359271983845792e-05, + "loss": 0.4429, + "step": 23683 + }, + { + "epoch": 0.4092479955764446, + "grad_norm": 1.0840575640531667, + "learning_rate": 1.335874485143186e-05, + "loss": 0.5769, + "step": 23684 + }, + { + "epoch": 0.4092652750898535, + "grad_norm": 0.8414772835252811, + "learning_rate": 1.3358217708498083e-05, + "loss": 0.5982, + "step": 23685 + }, + { + "epoch": 0.40928255460326235, + "grad_norm": 1.300869064111156, + "learning_rate": 1.3357690555046113e-05, + "loss": 0.4762, + "step": 23686 + }, + { + "epoch": 0.40929983411667126, + "grad_norm": 1.247228689951699, + "learning_rate": 1.33571633910776e-05, + "loss": 0.5069, + "step": 23687 + }, + { + "epoch": 0.40931711363008016, + "grad_norm": 0.4165416570665781, + "learning_rate": 1.3356636216594199e-05, + "loss": 0.532, + "step": 23688 + }, + { + "epoch": 0.4093343931434891, + "grad_norm": 1.202349121429639, + "learning_rate": 1.3356109031597556e-05, + "loss": 0.5908, + "step": 23689 + }, + { + "epoch": 0.409351672656898, + "grad_norm": 0.7879777115153378, + "learning_rate": 1.3355581836089327e-05, + "loss": 0.2994, + "step": 23690 + }, + { + "epoch": 0.4093689521703069, + "grad_norm": 1.2606091249801645, + "learning_rate": 1.3355054630071158e-05, + "loss": 0.2077, + "step": 23691 + }, + { + "epoch": 0.4093862316837158, + "grad_norm": 1.140207356589808, + "learning_rate": 1.3354527413544701e-05, + "loss": 0.5718, + "step": 23692 + }, + { + "epoch": 0.4094035111971247, + "grad_norm": 0.9335784708432112, + "learning_rate": 1.3354000186511615e-05, + "loss": 0.3467, + "step": 23693 + }, + { + "epoch": 0.4094207907105336, + "grad_norm": 0.9661117023908381, + "learning_rate": 1.335347294897354e-05, + "loss": 0.3848, + "step": 23694 + }, + { + "epoch": 0.4094380702239425, + "grad_norm": 1.5183881779951287, + "learning_rate": 1.3352945700932136e-05, + "loss": 0.2916, + "step": 23695 + }, + { + "epoch": 0.40945534973735137, + "grad_norm": 0.9808886986196268, + "learning_rate": 1.335241844238905e-05, + "loss": 0.5252, + "step": 23696 + }, + { + "epoch": 0.4094726292507603, + "grad_norm": 1.0506443122673585, + "learning_rate": 1.3351891173345935e-05, + "loss": 0.4372, + "step": 23697 + }, + { + "epoch": 0.4094899087641692, + "grad_norm": 0.6007790944314167, + "learning_rate": 1.3351363893804442e-05, + "loss": 0.5609, + "step": 23698 + }, + { + "epoch": 0.4095071882775781, + "grad_norm": 0.9038558099118957, + "learning_rate": 1.3350836603766224e-05, + "loss": 0.6144, + "step": 23699 + }, + { + "epoch": 0.409524467790987, + "grad_norm": 0.47234458755592923, + "learning_rate": 1.335030930323293e-05, + "loss": 0.6881, + "step": 23700 + }, + { + "epoch": 0.4095417473043959, + "grad_norm": 1.4304450751798221, + "learning_rate": 1.3349781992206212e-05, + "loss": 0.6489, + "step": 23701 + }, + { + "epoch": 0.4095590268178048, + "grad_norm": 0.7840151001809795, + "learning_rate": 1.3349254670687722e-05, + "loss": 0.3838, + "step": 23702 + }, + { + "epoch": 0.4095763063312137, + "grad_norm": 1.1025351076535748, + "learning_rate": 1.3348727338679113e-05, + "loss": 0.4119, + "step": 23703 + }, + { + "epoch": 0.40959358584462263, + "grad_norm": 1.0167803984101254, + "learning_rate": 1.3348199996182037e-05, + "loss": 0.5083, + "step": 23704 + }, + { + "epoch": 0.40961086535803154, + "grad_norm": 1.115225828332724, + "learning_rate": 1.3347672643198139e-05, + "loss": 0.6547, + "step": 23705 + }, + { + "epoch": 0.40962814487144045, + "grad_norm": 0.6976277076120949, + "learning_rate": 1.334714527972908e-05, + "loss": 0.4522, + "step": 23706 + }, + { + "epoch": 0.4096454243848493, + "grad_norm": 0.7110416806281306, + "learning_rate": 1.3346617905776507e-05, + "loss": 0.3596, + "step": 23707 + }, + { + "epoch": 0.4096627038982582, + "grad_norm": 0.7262934454127843, + "learning_rate": 1.334609052134207e-05, + "loss": 0.5657, + "step": 23708 + }, + { + "epoch": 0.4096799834116671, + "grad_norm": 0.9573366605242312, + "learning_rate": 1.3345563126427428e-05, + "loss": 0.5328, + "step": 23709 + }, + { + "epoch": 0.409697262925076, + "grad_norm": 0.8450751747980909, + "learning_rate": 1.3345035721034227e-05, + "loss": 0.5939, + "step": 23710 + }, + { + "epoch": 0.40971454243848493, + "grad_norm": 0.8446530126984464, + "learning_rate": 1.3344508305164117e-05, + "loss": 0.4671, + "step": 23711 + }, + { + "epoch": 0.40973182195189384, + "grad_norm": 0.4871617211068232, + "learning_rate": 1.3343980878818755e-05, + "loss": 0.5033, + "step": 23712 + }, + { + "epoch": 0.40974910146530275, + "grad_norm": 0.8735143261662522, + "learning_rate": 1.334345344199979e-05, + "loss": 0.3647, + "step": 23713 + }, + { + "epoch": 0.40976638097871165, + "grad_norm": 0.5182637207241961, + "learning_rate": 1.3342925994708873e-05, + "loss": 0.8408, + "step": 23714 + }, + { + "epoch": 0.40978366049212056, + "grad_norm": 1.2627973952151956, + "learning_rate": 1.3342398536947659e-05, + "loss": 0.5815, + "step": 23715 + }, + { + "epoch": 0.40980094000552947, + "grad_norm": 1.0632931464543303, + "learning_rate": 1.33418710687178e-05, + "loss": 0.432, + "step": 23716 + }, + { + "epoch": 0.4098182195189383, + "grad_norm": 0.9949540619358838, + "learning_rate": 1.3341343590020943e-05, + "loss": 0.332, + "step": 23717 + }, + { + "epoch": 0.40983549903234723, + "grad_norm": 0.8534929927655591, + "learning_rate": 1.3340816100858747e-05, + "loss": 0.4663, + "step": 23718 + }, + { + "epoch": 0.40985277854575614, + "grad_norm": 1.2770873701403322, + "learning_rate": 1.3340288601232862e-05, + "loss": 0.4418, + "step": 23719 + }, + { + "epoch": 0.40987005805916504, + "grad_norm": 0.6997545538599363, + "learning_rate": 1.3339761091144939e-05, + "loss": 0.4687, + "step": 23720 + }, + { + "epoch": 0.40988733757257395, + "grad_norm": 0.7470553561480421, + "learning_rate": 1.3339233570596631e-05, + "loss": 0.5533, + "step": 23721 + }, + { + "epoch": 0.40990461708598286, + "grad_norm": 1.4976422267621812, + "learning_rate": 1.3338706039589586e-05, + "loss": 0.4955, + "step": 23722 + }, + { + "epoch": 0.40992189659939177, + "grad_norm": 0.9392404350313526, + "learning_rate": 1.3338178498125462e-05, + "loss": 0.3267, + "step": 23723 + }, + { + "epoch": 0.4099391761128007, + "grad_norm": 0.6953004447789103, + "learning_rate": 1.3337650946205912e-05, + "loss": 0.8129, + "step": 23724 + }, + { + "epoch": 0.4099564556262096, + "grad_norm": 1.0871759446491187, + "learning_rate": 1.333712338383258e-05, + "loss": 0.5031, + "step": 23725 + }, + { + "epoch": 0.4099737351396185, + "grad_norm": 1.0769681894349488, + "learning_rate": 1.3336595811007127e-05, + "loss": 0.4554, + "step": 23726 + }, + { + "epoch": 0.4099910146530274, + "grad_norm": 0.9607796426327705, + "learning_rate": 1.3336068227731205e-05, + "loss": 0.556, + "step": 23727 + }, + { + "epoch": 0.41000829416643625, + "grad_norm": 0.8262363846826356, + "learning_rate": 1.3335540634006459e-05, + "loss": 0.6048, + "step": 23728 + }, + { + "epoch": 0.41002557367984516, + "grad_norm": 0.873877438911064, + "learning_rate": 1.333501302983455e-05, + "loss": 0.4547, + "step": 23729 + }, + { + "epoch": 0.41004285319325406, + "grad_norm": 1.519350204674905, + "learning_rate": 1.3334485415217126e-05, + "loss": 0.5358, + "step": 23730 + }, + { + "epoch": 0.41006013270666297, + "grad_norm": 1.3252328233042912, + "learning_rate": 1.3333957790155839e-05, + "loss": 0.4258, + "step": 23731 + }, + { + "epoch": 0.4100774122200719, + "grad_norm": 0.7443627684912193, + "learning_rate": 1.3333430154652343e-05, + "loss": 0.4021, + "step": 23732 + }, + { + "epoch": 0.4100946917334808, + "grad_norm": 0.772327798511215, + "learning_rate": 1.3332902508708289e-05, + "loss": 0.3188, + "step": 23733 + }, + { + "epoch": 0.4101119712468897, + "grad_norm": 1.370444146852961, + "learning_rate": 1.3332374852325335e-05, + "loss": 0.4962, + "step": 23734 + }, + { + "epoch": 0.4101292507602986, + "grad_norm": 1.9515670107889174, + "learning_rate": 1.3331847185505127e-05, + "loss": 0.3013, + "step": 23735 + }, + { + "epoch": 0.4101465302737075, + "grad_norm": 1.1127111686558016, + "learning_rate": 1.333131950824932e-05, + "loss": 0.4586, + "step": 23736 + }, + { + "epoch": 0.4101638097871164, + "grad_norm": 1.0520497739631036, + "learning_rate": 1.3330791820559568e-05, + "loss": 0.4614, + "step": 23737 + }, + { + "epoch": 0.41018108930052527, + "grad_norm": 1.1228521434572731, + "learning_rate": 1.3330264122437521e-05, + "loss": 0.3853, + "step": 23738 + }, + { + "epoch": 0.4101983688139342, + "grad_norm": 0.9731934193537454, + "learning_rate": 1.3329736413884837e-05, + "loss": 0.4825, + "step": 23739 + }, + { + "epoch": 0.4102156483273431, + "grad_norm": 1.2210667484393103, + "learning_rate": 1.3329208694903165e-05, + "loss": 0.3536, + "step": 23740 + }, + { + "epoch": 0.410232927840752, + "grad_norm": 1.498162424826307, + "learning_rate": 1.3328680965494156e-05, + "loss": 0.3727, + "step": 23741 + }, + { + "epoch": 0.4102502073541609, + "grad_norm": 1.1806719054996562, + "learning_rate": 1.3328153225659466e-05, + "loss": 0.6308, + "step": 23742 + }, + { + "epoch": 0.4102674868675698, + "grad_norm": 0.8644339789455318, + "learning_rate": 1.332762547540075e-05, + "loss": 0.4174, + "step": 23743 + }, + { + "epoch": 0.4102847663809787, + "grad_norm": 1.347253687753418, + "learning_rate": 1.3327097714719655e-05, + "loss": 0.5709, + "step": 23744 + }, + { + "epoch": 0.4103020458943876, + "grad_norm": 1.073384962243895, + "learning_rate": 1.3326569943617838e-05, + "loss": 0.4705, + "step": 23745 + }, + { + "epoch": 0.41031932540779653, + "grad_norm": 1.4222136683718385, + "learning_rate": 1.332604216209695e-05, + "loss": 0.4204, + "step": 23746 + }, + { + "epoch": 0.41033660492120544, + "grad_norm": 0.7304611110381822, + "learning_rate": 1.3325514370158648e-05, + "loss": 0.6429, + "step": 23747 + }, + { + "epoch": 0.41035388443461435, + "grad_norm": 0.9206198115338406, + "learning_rate": 1.332498656780458e-05, + "loss": 0.3811, + "step": 23748 + }, + { + "epoch": 0.4103711639480232, + "grad_norm": 0.8372064487081263, + "learning_rate": 1.33244587550364e-05, + "loss": 0.3866, + "step": 23749 + }, + { + "epoch": 0.4103884434614321, + "grad_norm": 1.1129242627842837, + "learning_rate": 1.3323930931855769e-05, + "loss": 0.455, + "step": 23750 + }, + { + "epoch": 0.410405722974841, + "grad_norm": 0.8131125737796475, + "learning_rate": 1.3323403098264324e-05, + "loss": 0.3605, + "step": 23751 + }, + { + "epoch": 0.4104230024882499, + "grad_norm": 2.161287579003788, + "learning_rate": 1.3322875254263737e-05, + "loss": 0.6064, + "step": 23752 + }, + { + "epoch": 0.41044028200165883, + "grad_norm": 0.769272045962682, + "learning_rate": 1.3322347399855646e-05, + "loss": 0.8719, + "step": 23753 + }, + { + "epoch": 0.41045756151506774, + "grad_norm": 0.9658138078252009, + "learning_rate": 1.3321819535041714e-05, + "loss": 0.4381, + "step": 23754 + }, + { + "epoch": 0.41047484102847664, + "grad_norm": 1.0209223430715582, + "learning_rate": 1.3321291659823588e-05, + "loss": 0.3891, + "step": 23755 + }, + { + "epoch": 0.41049212054188555, + "grad_norm": 1.1342520272228593, + "learning_rate": 1.3320763774202927e-05, + "loss": 0.4893, + "step": 23756 + }, + { + "epoch": 0.41050940005529446, + "grad_norm": 1.2321620859627658, + "learning_rate": 1.3320235878181378e-05, + "loss": 0.3884, + "step": 23757 + }, + { + "epoch": 0.41052667956870337, + "grad_norm": 0.9378057923801935, + "learning_rate": 1.33197079717606e-05, + "loss": 0.3723, + "step": 23758 + }, + { + "epoch": 0.4105439590821123, + "grad_norm": 1.8880518048540478, + "learning_rate": 1.3319180054942246e-05, + "loss": 0.4698, + "step": 23759 + }, + { + "epoch": 0.4105612385955211, + "grad_norm": 1.4197125133085893, + "learning_rate": 1.3318652127727966e-05, + "loss": 0.4769, + "step": 23760 + }, + { + "epoch": 0.41057851810893004, + "grad_norm": 0.6605314665486667, + "learning_rate": 1.3318124190119418e-05, + "loss": 0.4674, + "step": 23761 + }, + { + "epoch": 0.41059579762233894, + "grad_norm": 0.8076720045749888, + "learning_rate": 1.3317596242118247e-05, + "loss": 0.5702, + "step": 23762 + }, + { + "epoch": 0.41061307713574785, + "grad_norm": 0.9027731727102722, + "learning_rate": 1.3317068283726116e-05, + "loss": 0.5885, + "step": 23763 + }, + { + "epoch": 0.41063035664915676, + "grad_norm": 0.8731003002209345, + "learning_rate": 1.3316540314944678e-05, + "loss": 0.4979, + "step": 23764 + }, + { + "epoch": 0.41064763616256567, + "grad_norm": 1.3355853673951086, + "learning_rate": 1.3316012335775577e-05, + "loss": 0.7379, + "step": 23765 + }, + { + "epoch": 0.4106649156759746, + "grad_norm": 1.9375389576253992, + "learning_rate": 1.3315484346220478e-05, + "loss": 0.4382, + "step": 23766 + }, + { + "epoch": 0.4106821951893835, + "grad_norm": 0.8519078951622995, + "learning_rate": 1.331495634628103e-05, + "loss": 0.5394, + "step": 23767 + }, + { + "epoch": 0.4106994747027924, + "grad_norm": 0.9577253519221676, + "learning_rate": 1.3314428335958884e-05, + "loss": 0.5087, + "step": 23768 + }, + { + "epoch": 0.4107167542162013, + "grad_norm": 0.6049417237877273, + "learning_rate": 1.3313900315255698e-05, + "loss": 0.4447, + "step": 23769 + }, + { + "epoch": 0.41073403372961015, + "grad_norm": 0.3915652852387875, + "learning_rate": 1.3313372284173127e-05, + "loss": 0.7607, + "step": 23770 + }, + { + "epoch": 0.41075131324301906, + "grad_norm": 1.1028401484349881, + "learning_rate": 1.331284424271282e-05, + "loss": 0.2916, + "step": 23771 + }, + { + "epoch": 0.41076859275642796, + "grad_norm": 0.8238116835938364, + "learning_rate": 1.3312316190876434e-05, + "loss": 0.5043, + "step": 23772 + }, + { + "epoch": 0.41078587226983687, + "grad_norm": 0.7154058536112418, + "learning_rate": 1.331178812866562e-05, + "loss": 0.2679, + "step": 23773 + }, + { + "epoch": 0.4108031517832458, + "grad_norm": 1.4507031067272331, + "learning_rate": 1.3311260056082034e-05, + "loss": 0.4337, + "step": 23774 + }, + { + "epoch": 0.4108204312966547, + "grad_norm": 0.9958439097752538, + "learning_rate": 1.3310731973127331e-05, + "loss": 0.4674, + "step": 23775 + }, + { + "epoch": 0.4108377108100636, + "grad_norm": 0.8634163825253753, + "learning_rate": 1.3310203879803163e-05, + "loss": 0.4891, + "step": 23776 + }, + { + "epoch": 0.4108549903234725, + "grad_norm": 1.1087376885573186, + "learning_rate": 1.3309675776111184e-05, + "loss": 0.4402, + "step": 23777 + }, + { + "epoch": 0.4108722698368814, + "grad_norm": 1.336084775507938, + "learning_rate": 1.330914766205305e-05, + "loss": 0.3813, + "step": 23778 + }, + { + "epoch": 0.4108895493502903, + "grad_norm": 1.0298600701694631, + "learning_rate": 1.3308619537630416e-05, + "loss": 0.4413, + "step": 23779 + }, + { + "epoch": 0.4109068288636992, + "grad_norm": 0.9980861847241685, + "learning_rate": 1.3308091402844932e-05, + "loss": 0.4112, + "step": 23780 + }, + { + "epoch": 0.4109241083771081, + "grad_norm": 0.7845231296246246, + "learning_rate": 1.3307563257698258e-05, + "loss": 0.4035, + "step": 23781 + }, + { + "epoch": 0.410941387890517, + "grad_norm": 2.1542548958255767, + "learning_rate": 1.3307035102192039e-05, + "loss": 0.3289, + "step": 23782 + }, + { + "epoch": 0.4109586674039259, + "grad_norm": 0.9711907257793502, + "learning_rate": 1.3306506936327937e-05, + "loss": 0.4955, + "step": 23783 + }, + { + "epoch": 0.4109759469173348, + "grad_norm": 0.8936957972867317, + "learning_rate": 1.3305978760107605e-05, + "loss": 0.5287, + "step": 23784 + }, + { + "epoch": 0.4109932264307437, + "grad_norm": 1.0829636932836182, + "learning_rate": 1.3305450573532691e-05, + "loss": 0.4123, + "step": 23785 + }, + { + "epoch": 0.4110105059441526, + "grad_norm": 0.8776124768151228, + "learning_rate": 1.330492237660486e-05, + "loss": 0.3612, + "step": 23786 + }, + { + "epoch": 0.4110277854575615, + "grad_norm": 1.2879704203306697, + "learning_rate": 1.3304394169325763e-05, + "loss": 0.4777, + "step": 23787 + }, + { + "epoch": 0.41104506497097043, + "grad_norm": 1.0902073656872457, + "learning_rate": 1.3303865951697045e-05, + "loss": 0.5767, + "step": 23788 + }, + { + "epoch": 0.41106234448437934, + "grad_norm": 0.6851952285820614, + "learning_rate": 1.3303337723720376e-05, + "loss": 0.3286, + "step": 23789 + }, + { + "epoch": 0.41107962399778825, + "grad_norm": 1.3315270778853374, + "learning_rate": 1.3302809485397396e-05, + "loss": 0.5867, + "step": 23790 + }, + { + "epoch": 0.4110969035111971, + "grad_norm": 0.7768959162671004, + "learning_rate": 1.330228123672977e-05, + "loss": 0.4032, + "step": 23791 + }, + { + "epoch": 0.411114183024606, + "grad_norm": 0.9026528905763894, + "learning_rate": 1.3301752977719147e-05, + "loss": 0.5265, + "step": 23792 + }, + { + "epoch": 0.4111314625380149, + "grad_norm": 1.0251379268540421, + "learning_rate": 1.3301224708367182e-05, + "loss": 0.5196, + "step": 23793 + }, + { + "epoch": 0.4111487420514238, + "grad_norm": 0.9784164860853657, + "learning_rate": 1.3300696428675528e-05, + "loss": 0.5113, + "step": 23794 + }, + { + "epoch": 0.41116602156483273, + "grad_norm": 1.0022366903854678, + "learning_rate": 1.3300168138645845e-05, + "loss": 0.4273, + "step": 23795 + }, + { + "epoch": 0.41118330107824164, + "grad_norm": 0.4506256741213981, + "learning_rate": 1.3299639838279786e-05, + "loss": 0.6684, + "step": 23796 + }, + { + "epoch": 0.41120058059165054, + "grad_norm": 1.0699249019945727, + "learning_rate": 1.3299111527578999e-05, + "loss": 0.4441, + "step": 23797 + }, + { + "epoch": 0.41121786010505945, + "grad_norm": 1.2643262939496642, + "learning_rate": 1.3298583206545147e-05, + "loss": 0.4344, + "step": 23798 + }, + { + "epoch": 0.41123513961846836, + "grad_norm": 0.9092863043993127, + "learning_rate": 1.3298054875179882e-05, + "loss": 0.5936, + "step": 23799 + }, + { + "epoch": 0.41125241913187727, + "grad_norm": 0.6945856807621325, + "learning_rate": 1.3297526533484857e-05, + "loss": 0.6969, + "step": 23800 + }, + { + "epoch": 0.4112696986452862, + "grad_norm": 0.6108095894967059, + "learning_rate": 1.3296998181461733e-05, + "loss": 0.3269, + "step": 23801 + }, + { + "epoch": 0.411286978158695, + "grad_norm": 0.8414456351624108, + "learning_rate": 1.3296469819112155e-05, + "loss": 0.4036, + "step": 23802 + }, + { + "epoch": 0.41130425767210393, + "grad_norm": 1.0202619516670264, + "learning_rate": 1.3295941446437786e-05, + "loss": 0.2865, + "step": 23803 + }, + { + "epoch": 0.41132153718551284, + "grad_norm": 0.8105020141971222, + "learning_rate": 1.3295413063440274e-05, + "loss": 0.3712, + "step": 23804 + }, + { + "epoch": 0.41133881669892175, + "grad_norm": 0.5845092263960764, + "learning_rate": 1.329488467012128e-05, + "loss": 0.217, + "step": 23805 + }, + { + "epoch": 0.41135609621233066, + "grad_norm": 0.9680683901507626, + "learning_rate": 1.3294356266482458e-05, + "loss": 0.3059, + "step": 23806 + }, + { + "epoch": 0.41137337572573957, + "grad_norm": 0.7354321967994664, + "learning_rate": 1.329382785252546e-05, + "loss": 0.504, + "step": 23807 + }, + { + "epoch": 0.4113906552391485, + "grad_norm": 1.1522383150276376, + "learning_rate": 1.3293299428251944e-05, + "loss": 0.5607, + "step": 23808 + }, + { + "epoch": 0.4114079347525574, + "grad_norm": 1.354283679833405, + "learning_rate": 1.3292770993663563e-05, + "loss": 0.6464, + "step": 23809 + }, + { + "epoch": 0.4114252142659663, + "grad_norm": 0.7507599617813618, + "learning_rate": 1.3292242548761975e-05, + "loss": 0.3799, + "step": 23810 + }, + { + "epoch": 0.4114424937793752, + "grad_norm": 0.6728627437494065, + "learning_rate": 1.3291714093548833e-05, + "loss": 0.3966, + "step": 23811 + }, + { + "epoch": 0.41145977329278405, + "grad_norm": 0.9977345480182529, + "learning_rate": 1.329118562802579e-05, + "loss": 0.571, + "step": 23812 + }, + { + "epoch": 0.41147705280619296, + "grad_norm": 1.1323133399957912, + "learning_rate": 1.3290657152194506e-05, + "loss": 0.8388, + "step": 23813 + }, + { + "epoch": 0.41149433231960186, + "grad_norm": 0.9971607439761396, + "learning_rate": 1.329012866605663e-05, + "loss": 0.5101, + "step": 23814 + }, + { + "epoch": 0.41151161183301077, + "grad_norm": 1.6071305793426576, + "learning_rate": 1.3289600169613825e-05, + "loss": 0.6853, + "step": 23815 + }, + { + "epoch": 0.4115288913464197, + "grad_norm": 1.082226279646806, + "learning_rate": 1.328907166286774e-05, + "loss": 0.4796, + "step": 23816 + }, + { + "epoch": 0.4115461708598286, + "grad_norm": 1.2480156444299486, + "learning_rate": 1.3288543145820031e-05, + "loss": 0.5671, + "step": 23817 + }, + { + "epoch": 0.4115634503732375, + "grad_norm": 1.1519966917876239, + "learning_rate": 1.328801461847236e-05, + "loss": 0.2942, + "step": 23818 + }, + { + "epoch": 0.4115807298866464, + "grad_norm": 1.0352373002330457, + "learning_rate": 1.3287486080826375e-05, + "loss": 0.5131, + "step": 23819 + }, + { + "epoch": 0.4115980094000553, + "grad_norm": 1.0993878000124926, + "learning_rate": 1.328695753288373e-05, + "loss": 0.5011, + "step": 23820 + }, + { + "epoch": 0.4116152889134642, + "grad_norm": 0.7770221280930916, + "learning_rate": 1.3286428974646088e-05, + "loss": 0.4218, + "step": 23821 + }, + { + "epoch": 0.4116325684268731, + "grad_norm": 0.8211982713009836, + "learning_rate": 1.3285900406115103e-05, + "loss": 0.2393, + "step": 23822 + }, + { + "epoch": 0.411649847940282, + "grad_norm": 0.9106568586591993, + "learning_rate": 1.3285371827292423e-05, + "loss": 0.4979, + "step": 23823 + }, + { + "epoch": 0.4116671274536909, + "grad_norm": 1.0202019158872484, + "learning_rate": 1.3284843238179711e-05, + "loss": 0.4238, + "step": 23824 + }, + { + "epoch": 0.4116844069670998, + "grad_norm": 1.1118726321597594, + "learning_rate": 1.3284314638778622e-05, + "loss": 0.4642, + "step": 23825 + }, + { + "epoch": 0.4117016864805087, + "grad_norm": 0.8616255934240474, + "learning_rate": 1.3283786029090808e-05, + "loss": 0.452, + "step": 23826 + }, + { + "epoch": 0.4117189659939176, + "grad_norm": 1.4872606274318303, + "learning_rate": 1.328325740911793e-05, + "loss": 0.3846, + "step": 23827 + }, + { + "epoch": 0.4117362455073265, + "grad_norm": 0.6482881123753391, + "learning_rate": 1.3282728778861634e-05, + "loss": 0.3674, + "step": 23828 + }, + { + "epoch": 0.4117535250207354, + "grad_norm": 0.7620921013462902, + "learning_rate": 1.3282200138323585e-05, + "loss": 0.5188, + "step": 23829 + }, + { + "epoch": 0.41177080453414433, + "grad_norm": 0.9347071464999638, + "learning_rate": 1.3281671487505436e-05, + "loss": 0.3185, + "step": 23830 + }, + { + "epoch": 0.41178808404755324, + "grad_norm": 0.9011934115807629, + "learning_rate": 1.3281142826408842e-05, + "loss": 0.2776, + "step": 23831 + }, + { + "epoch": 0.41180536356096215, + "grad_norm": 1.059324859287758, + "learning_rate": 1.3280614155035462e-05, + "loss": 0.5129, + "step": 23832 + }, + { + "epoch": 0.41182264307437105, + "grad_norm": 1.07064260553886, + "learning_rate": 1.3280085473386947e-05, + "loss": 0.3597, + "step": 23833 + }, + { + "epoch": 0.4118399225877799, + "grad_norm": 0.63812861235278, + "learning_rate": 1.3279556781464952e-05, + "loss": 0.335, + "step": 23834 + }, + { + "epoch": 0.4118572021011888, + "grad_norm": 0.6483864819027456, + "learning_rate": 1.3279028079271141e-05, + "loss": 0.3889, + "step": 23835 + }, + { + "epoch": 0.4118744816145977, + "grad_norm": 1.0520608665707476, + "learning_rate": 1.3278499366807163e-05, + "loss": 0.5141, + "step": 23836 + }, + { + "epoch": 0.41189176112800663, + "grad_norm": 1.0493189422237554, + "learning_rate": 1.3277970644074673e-05, + "loss": 0.4347, + "step": 23837 + }, + { + "epoch": 0.41190904064141554, + "grad_norm": 1.326268445324196, + "learning_rate": 1.3277441911075334e-05, + "loss": 0.5401, + "step": 23838 + }, + { + "epoch": 0.41192632015482444, + "grad_norm": 1.1066497774670574, + "learning_rate": 1.3276913167810797e-05, + "loss": 0.5282, + "step": 23839 + }, + { + "epoch": 0.41194359966823335, + "grad_norm": 1.5668213400741287, + "learning_rate": 1.3276384414282717e-05, + "loss": 0.5618, + "step": 23840 + }, + { + "epoch": 0.41196087918164226, + "grad_norm": 0.6592340358451335, + "learning_rate": 1.3275855650492755e-05, + "loss": 0.4678, + "step": 23841 + }, + { + "epoch": 0.41197815869505117, + "grad_norm": 1.3072733248272235, + "learning_rate": 1.327532687644256e-05, + "loss": 0.4661, + "step": 23842 + }, + { + "epoch": 0.4119954382084601, + "grad_norm": 0.981691084676565, + "learning_rate": 1.3274798092133796e-05, + "loss": 0.5249, + "step": 23843 + }, + { + "epoch": 0.4120127177218689, + "grad_norm": 1.0366045253936291, + "learning_rate": 1.3274269297568117e-05, + "loss": 0.408, + "step": 23844 + }, + { + "epoch": 0.41202999723527783, + "grad_norm": 0.8459544887506645, + "learning_rate": 1.327374049274717e-05, + "loss": 0.4527, + "step": 23845 + }, + { + "epoch": 0.41204727674868674, + "grad_norm": 0.9727047548527118, + "learning_rate": 1.3273211677672626e-05, + "loss": 0.3933, + "step": 23846 + }, + { + "epoch": 0.41206455626209565, + "grad_norm": 0.8269167595022949, + "learning_rate": 1.3272682852346132e-05, + "loss": 0.3762, + "step": 23847 + }, + { + "epoch": 0.41208183577550456, + "grad_norm": 1.0065480334627672, + "learning_rate": 1.3272154016769345e-05, + "loss": 0.402, + "step": 23848 + }, + { + "epoch": 0.41209911528891346, + "grad_norm": 0.610435436589677, + "learning_rate": 1.3271625170943927e-05, + "loss": 0.3458, + "step": 23849 + }, + { + "epoch": 0.41211639480232237, + "grad_norm": 1.171212379235743, + "learning_rate": 1.3271096314871528e-05, + "loss": 0.4525, + "step": 23850 + }, + { + "epoch": 0.4121336743157313, + "grad_norm": 2.635335250319318, + "learning_rate": 1.3270567448553805e-05, + "loss": 0.6494, + "step": 23851 + }, + { + "epoch": 0.4121509538291402, + "grad_norm": 1.099145777091365, + "learning_rate": 1.3270038571992419e-05, + "loss": 0.4432, + "step": 23852 + }, + { + "epoch": 0.4121682333425491, + "grad_norm": 0.49064999190251046, + "learning_rate": 1.3269509685189026e-05, + "loss": 0.628, + "step": 23853 + }, + { + "epoch": 0.412185512855958, + "grad_norm": 0.9266392300101622, + "learning_rate": 1.3268980788145273e-05, + "loss": 0.5399, + "step": 23854 + }, + { + "epoch": 0.41220279236936685, + "grad_norm": 1.739296755024587, + "learning_rate": 1.326845188086283e-05, + "loss": 0.4964, + "step": 23855 + }, + { + "epoch": 0.41222007188277576, + "grad_norm": 1.3160387695417968, + "learning_rate": 1.3267922963343345e-05, + "loss": 0.6339, + "step": 23856 + }, + { + "epoch": 0.41223735139618467, + "grad_norm": 0.8672797082661695, + "learning_rate": 1.3267394035588475e-05, + "loss": 0.5903, + "step": 23857 + }, + { + "epoch": 0.4122546309095936, + "grad_norm": 1.0430585614082257, + "learning_rate": 1.3266865097599882e-05, + "loss": 0.4579, + "step": 23858 + }, + { + "epoch": 0.4122719104230025, + "grad_norm": 1.1048454213882446, + "learning_rate": 1.3266336149379218e-05, + "loss": 0.5776, + "step": 23859 + }, + { + "epoch": 0.4122891899364114, + "grad_norm": 1.3136086319849054, + "learning_rate": 1.3265807190928136e-05, + "loss": 0.6434, + "step": 23860 + }, + { + "epoch": 0.4123064694498203, + "grad_norm": 0.46928228936826566, + "learning_rate": 1.3265278222248306e-05, + "loss": 0.6912, + "step": 23861 + }, + { + "epoch": 0.4123237489632292, + "grad_norm": 0.6415548145046773, + "learning_rate": 1.326474924334137e-05, + "loss": 0.3333, + "step": 23862 + }, + { + "epoch": 0.4123410284766381, + "grad_norm": 0.8378531732994046, + "learning_rate": 1.3264220254208998e-05, + "loss": 0.4935, + "step": 23863 + }, + { + "epoch": 0.412358307990047, + "grad_norm": 0.9333543960798524, + "learning_rate": 1.3263691254852836e-05, + "loss": 0.3913, + "step": 23864 + }, + { + "epoch": 0.4123755875034559, + "grad_norm": 1.1504068036842325, + "learning_rate": 1.3263162245274544e-05, + "loss": 0.4009, + "step": 23865 + }, + { + "epoch": 0.4123928670168648, + "grad_norm": 0.8281005487261631, + "learning_rate": 1.326263322547578e-05, + "loss": 0.2671, + "step": 23866 + }, + { + "epoch": 0.4124101465302737, + "grad_norm": 1.475808040277718, + "learning_rate": 1.3262104195458201e-05, + "loss": 0.5635, + "step": 23867 + }, + { + "epoch": 0.4124274260436826, + "grad_norm": 0.953052557276907, + "learning_rate": 1.3261575155223463e-05, + "loss": 0.2769, + "step": 23868 + }, + { + "epoch": 0.4124447055570915, + "grad_norm": 0.5289442878184543, + "learning_rate": 1.3261046104773225e-05, + "loss": 0.2305, + "step": 23869 + }, + { + "epoch": 0.4124619850705004, + "grad_norm": 1.107085341106648, + "learning_rate": 1.3260517044109145e-05, + "loss": 0.5916, + "step": 23870 + }, + { + "epoch": 0.4124792645839093, + "grad_norm": 0.6799252259588552, + "learning_rate": 1.3259987973232873e-05, + "loss": 0.3847, + "step": 23871 + }, + { + "epoch": 0.41249654409731823, + "grad_norm": 0.9227684726155453, + "learning_rate": 1.3259458892146075e-05, + "loss": 0.5738, + "step": 23872 + }, + { + "epoch": 0.41251382361072714, + "grad_norm": 0.9469819467381382, + "learning_rate": 1.3258929800850404e-05, + "loss": 0.3558, + "step": 23873 + }, + { + "epoch": 0.41253110312413604, + "grad_norm": 1.1273219927772322, + "learning_rate": 1.3258400699347515e-05, + "loss": 0.3822, + "step": 23874 + }, + { + "epoch": 0.41254838263754495, + "grad_norm": 1.1844559447871887, + "learning_rate": 1.3257871587639071e-05, + "loss": 0.5703, + "step": 23875 + }, + { + "epoch": 0.4125656621509538, + "grad_norm": 1.3187580025668257, + "learning_rate": 1.3257342465726722e-05, + "loss": 0.5993, + "step": 23876 + }, + { + "epoch": 0.4125829416643627, + "grad_norm": 0.4738159977146922, + "learning_rate": 1.3256813333612128e-05, + "loss": 0.5425, + "step": 23877 + }, + { + "epoch": 0.4126002211777716, + "grad_norm": 0.8500638186385895, + "learning_rate": 1.3256284191296948e-05, + "loss": 0.3433, + "step": 23878 + }, + { + "epoch": 0.4126175006911805, + "grad_norm": 1.2967274750686986, + "learning_rate": 1.325575503878284e-05, + "loss": 0.643, + "step": 23879 + }, + { + "epoch": 0.41263478020458944, + "grad_norm": 0.8528242822617139, + "learning_rate": 1.3255225876071457e-05, + "loss": 0.4098, + "step": 23880 + }, + { + "epoch": 0.41265205971799834, + "grad_norm": 0.9085580577906871, + "learning_rate": 1.3254696703164465e-05, + "loss": 0.4676, + "step": 23881 + }, + { + "epoch": 0.41266933923140725, + "grad_norm": 1.0150793463273615, + "learning_rate": 1.325416752006351e-05, + "loss": 0.4001, + "step": 23882 + }, + { + "epoch": 0.41268661874481616, + "grad_norm": 0.9019604735171517, + "learning_rate": 1.3253638326770254e-05, + "loss": 0.4477, + "step": 23883 + }, + { + "epoch": 0.41270389825822507, + "grad_norm": 0.6552389486431947, + "learning_rate": 1.3253109123286364e-05, + "loss": 0.3268, + "step": 23884 + }, + { + "epoch": 0.412721177771634, + "grad_norm": 0.9571130584399239, + "learning_rate": 1.3252579909613481e-05, + "loss": 0.486, + "step": 23885 + }, + { + "epoch": 0.4127384572850428, + "grad_norm": 0.7070608906730207, + "learning_rate": 1.3252050685753273e-05, + "loss": 0.548, + "step": 23886 + }, + { + "epoch": 0.41275573679845173, + "grad_norm": 0.8298857072193747, + "learning_rate": 1.3251521451707396e-05, + "loss": 0.4101, + "step": 23887 + }, + { + "epoch": 0.41277301631186064, + "grad_norm": 0.8793704941181999, + "learning_rate": 1.3250992207477504e-05, + "loss": 0.498, + "step": 23888 + }, + { + "epoch": 0.41279029582526955, + "grad_norm": 0.7837419216743018, + "learning_rate": 1.3250462953065259e-05, + "loss": 0.3706, + "step": 23889 + }, + { + "epoch": 0.41280757533867846, + "grad_norm": 1.4057625039043788, + "learning_rate": 1.3249933688472318e-05, + "loss": 0.7452, + "step": 23890 + }, + { + "epoch": 0.41282485485208736, + "grad_norm": 1.2032835796312178, + "learning_rate": 1.3249404413700336e-05, + "loss": 0.3294, + "step": 23891 + }, + { + "epoch": 0.41284213436549627, + "grad_norm": 1.114577458544611, + "learning_rate": 1.3248875128750973e-05, + "loss": 0.391, + "step": 23892 + }, + { + "epoch": 0.4128594138789052, + "grad_norm": 0.5040933331164864, + "learning_rate": 1.3248345833625886e-05, + "loss": 0.5285, + "step": 23893 + }, + { + "epoch": 0.4128766933923141, + "grad_norm": 0.6967547553001008, + "learning_rate": 1.3247816528326736e-05, + "loss": 0.3365, + "step": 23894 + }, + { + "epoch": 0.412893972905723, + "grad_norm": 1.0100606736887463, + "learning_rate": 1.3247287212855175e-05, + "loss": 0.4635, + "step": 23895 + }, + { + "epoch": 0.4129112524191319, + "grad_norm": 0.6188008656591248, + "learning_rate": 1.3246757887212862e-05, + "loss": 0.2925, + "step": 23896 + }, + { + "epoch": 0.41292853193254075, + "grad_norm": 0.7960241302943676, + "learning_rate": 1.3246228551401459e-05, + "loss": 0.6996, + "step": 23897 + }, + { + "epoch": 0.41294581144594966, + "grad_norm": 0.9700746078194878, + "learning_rate": 1.324569920542262e-05, + "loss": 0.3553, + "step": 23898 + }, + { + "epoch": 0.41296309095935857, + "grad_norm": 0.9463955248308537, + "learning_rate": 1.3245169849278006e-05, + "loss": 0.4348, + "step": 23899 + }, + { + "epoch": 0.4129803704727675, + "grad_norm": 0.3748935987707257, + "learning_rate": 1.324464048296927e-05, + "loss": 0.5098, + "step": 23900 + }, + { + "epoch": 0.4129976499861764, + "grad_norm": 1.171762150008705, + "learning_rate": 1.3244111106498078e-05, + "loss": 0.4716, + "step": 23901 + }, + { + "epoch": 0.4130149294995853, + "grad_norm": 1.3344351393646559, + "learning_rate": 1.3243581719866083e-05, + "loss": 0.4599, + "step": 23902 + }, + { + "epoch": 0.4130322090129942, + "grad_norm": 1.0652436341244997, + "learning_rate": 1.3243052323074942e-05, + "loss": 0.3794, + "step": 23903 + }, + { + "epoch": 0.4130494885264031, + "grad_norm": 0.9989501533235267, + "learning_rate": 1.3242522916126315e-05, + "loss": 0.4691, + "step": 23904 + }, + { + "epoch": 0.413066768039812, + "grad_norm": 1.493220898113491, + "learning_rate": 1.3241993499021857e-05, + "loss": 0.347, + "step": 23905 + }, + { + "epoch": 0.4130840475532209, + "grad_norm": 1.1488366016995004, + "learning_rate": 1.3241464071763232e-05, + "loss": 0.4677, + "step": 23906 + }, + { + "epoch": 0.41310132706662983, + "grad_norm": 2.7859169370162604, + "learning_rate": 1.3240934634352097e-05, + "loss": 0.6196, + "step": 23907 + }, + { + "epoch": 0.4131186065800387, + "grad_norm": 1.1563089877010766, + "learning_rate": 1.3240405186790104e-05, + "loss": 0.5219, + "step": 23908 + }, + { + "epoch": 0.4131358860934476, + "grad_norm": 0.9858732177178114, + "learning_rate": 1.3239875729078918e-05, + "loss": 0.3898, + "step": 23909 + }, + { + "epoch": 0.4131531656068565, + "grad_norm": 1.7480645672741557, + "learning_rate": 1.3239346261220197e-05, + "loss": 0.3691, + "step": 23910 + }, + { + "epoch": 0.4131704451202654, + "grad_norm": 0.6594867469802117, + "learning_rate": 1.323881678321559e-05, + "loss": 0.8538, + "step": 23911 + }, + { + "epoch": 0.4131877246336743, + "grad_norm": 0.9071610421251023, + "learning_rate": 1.3238287295066773e-05, + "loss": 0.4923, + "step": 23912 + }, + { + "epoch": 0.4132050041470832, + "grad_norm": 1.3630398095201566, + "learning_rate": 1.3237757796775389e-05, + "loss": 0.4877, + "step": 23913 + }, + { + "epoch": 0.41322228366049213, + "grad_norm": 0.9651815680095406, + "learning_rate": 1.3237228288343101e-05, + "loss": 0.5148, + "step": 23914 + }, + { + "epoch": 0.41323956317390104, + "grad_norm": 1.0541419038546322, + "learning_rate": 1.3236698769771568e-05, + "loss": 0.4643, + "step": 23915 + }, + { + "epoch": 0.41325684268730994, + "grad_norm": 0.405632315998189, + "learning_rate": 1.3236169241062448e-05, + "loss": 0.8608, + "step": 23916 + }, + { + "epoch": 0.41327412220071885, + "grad_norm": 0.8279806241038637, + "learning_rate": 1.32356397022174e-05, + "loss": 0.5234, + "step": 23917 + }, + { + "epoch": 0.4132914017141277, + "grad_norm": 0.9285497216200524, + "learning_rate": 1.3235110153238084e-05, + "loss": 0.4113, + "step": 23918 + }, + { + "epoch": 0.4133086812275366, + "grad_norm": 1.7428695567504298, + "learning_rate": 1.3234580594126159e-05, + "loss": 0.5625, + "step": 23919 + }, + { + "epoch": 0.4133259607409455, + "grad_norm": 0.9089142125259755, + "learning_rate": 1.3234051024883277e-05, + "loss": 0.4756, + "step": 23920 + }, + { + "epoch": 0.4133432402543544, + "grad_norm": 0.8505940678827594, + "learning_rate": 1.3233521445511104e-05, + "loss": 0.4755, + "step": 23921 + }, + { + "epoch": 0.41336051976776333, + "grad_norm": 0.7805664075301112, + "learning_rate": 1.3232991856011297e-05, + "loss": 0.3454, + "step": 23922 + }, + { + "epoch": 0.41337779928117224, + "grad_norm": 0.9863595911919774, + "learning_rate": 1.3232462256385513e-05, + "loss": 0.3833, + "step": 23923 + }, + { + "epoch": 0.41339507879458115, + "grad_norm": 1.1078943559927965, + "learning_rate": 1.323193264663541e-05, + "loss": 0.5936, + "step": 23924 + }, + { + "epoch": 0.41341235830799006, + "grad_norm": 1.267599258911642, + "learning_rate": 1.323140302676265e-05, + "loss": 0.2952, + "step": 23925 + }, + { + "epoch": 0.41342963782139897, + "grad_norm": 1.4234112050492265, + "learning_rate": 1.3230873396768891e-05, + "loss": 0.3926, + "step": 23926 + }, + { + "epoch": 0.4134469173348079, + "grad_norm": 0.9170877656165399, + "learning_rate": 1.323034375665579e-05, + "loss": 0.509, + "step": 23927 + }, + { + "epoch": 0.4134641968482168, + "grad_norm": 0.8320160330104974, + "learning_rate": 1.3229814106425006e-05, + "loss": 0.7226, + "step": 23928 + }, + { + "epoch": 0.41348147636162563, + "grad_norm": 1.1670161798018595, + "learning_rate": 1.32292844460782e-05, + "loss": 0.499, + "step": 23929 + }, + { + "epoch": 0.41349875587503454, + "grad_norm": 1.0415918310783738, + "learning_rate": 1.3228754775617028e-05, + "loss": 0.5724, + "step": 23930 + }, + { + "epoch": 0.41351603538844345, + "grad_norm": 1.3727320997257602, + "learning_rate": 1.322822509504315e-05, + "loss": 0.5743, + "step": 23931 + }, + { + "epoch": 0.41353331490185236, + "grad_norm": 1.0916789331390588, + "learning_rate": 1.3227695404358227e-05, + "loss": 0.3932, + "step": 23932 + }, + { + "epoch": 0.41355059441526126, + "grad_norm": 1.0803603769521881, + "learning_rate": 1.3227165703563918e-05, + "loss": 0.2945, + "step": 23933 + }, + { + "epoch": 0.41356787392867017, + "grad_norm": 0.49883884194736, + "learning_rate": 1.3226635992661877e-05, + "loss": 0.593, + "step": 23934 + }, + { + "epoch": 0.4135851534420791, + "grad_norm": 1.5527792375672849, + "learning_rate": 1.3226106271653773e-05, + "loss": 0.5931, + "step": 23935 + }, + { + "epoch": 0.413602432955488, + "grad_norm": 0.8880729956396444, + "learning_rate": 1.3225576540541258e-05, + "loss": 0.2831, + "step": 23936 + }, + { + "epoch": 0.4136197124688969, + "grad_norm": 1.0292568307447794, + "learning_rate": 1.3225046799325987e-05, + "loss": 0.3194, + "step": 23937 + }, + { + "epoch": 0.4136369919823058, + "grad_norm": 1.0670804040308328, + "learning_rate": 1.322451704800963e-05, + "loss": 0.476, + "step": 23938 + }, + { + "epoch": 0.41365427149571465, + "grad_norm": 1.1739409909409777, + "learning_rate": 1.3223987286593835e-05, + "loss": 0.4616, + "step": 23939 + }, + { + "epoch": 0.41367155100912356, + "grad_norm": 1.1958161675787264, + "learning_rate": 1.322345751508027e-05, + "loss": 0.5693, + "step": 23940 + }, + { + "epoch": 0.41368883052253247, + "grad_norm": 1.0800941113078872, + "learning_rate": 1.322292773347059e-05, + "loss": 0.305, + "step": 23941 + }, + { + "epoch": 0.4137061100359414, + "grad_norm": 1.1669517573157706, + "learning_rate": 1.3222397941766456e-05, + "loss": 0.4704, + "step": 23942 + }, + { + "epoch": 0.4137233895493503, + "grad_norm": 1.1255130026589368, + "learning_rate": 1.3221868139969525e-05, + "loss": 0.5721, + "step": 23943 + }, + { + "epoch": 0.4137406690627592, + "grad_norm": 0.9450478007119801, + "learning_rate": 1.3221338328081462e-05, + "loss": 0.5158, + "step": 23944 + }, + { + "epoch": 0.4137579485761681, + "grad_norm": 0.43477181212776816, + "learning_rate": 1.3220808506103921e-05, + "loss": 0.4506, + "step": 23945 + }, + { + "epoch": 0.413775228089577, + "grad_norm": 0.9688956254650297, + "learning_rate": 1.3220278674038563e-05, + "loss": 0.4445, + "step": 23946 + }, + { + "epoch": 0.4137925076029859, + "grad_norm": 0.9808169563562416, + "learning_rate": 1.3219748831887047e-05, + "loss": 0.6588, + "step": 23947 + }, + { + "epoch": 0.4138097871163948, + "grad_norm": 1.0720764011905628, + "learning_rate": 1.3219218979651032e-05, + "loss": 0.489, + "step": 23948 + }, + { + "epoch": 0.41382706662980373, + "grad_norm": 0.8900930916896483, + "learning_rate": 1.3218689117332177e-05, + "loss": 0.4539, + "step": 23949 + }, + { + "epoch": 0.4138443461432126, + "grad_norm": 1.8751866298590993, + "learning_rate": 1.3218159244932148e-05, + "loss": 0.4816, + "step": 23950 + }, + { + "epoch": 0.4138616256566215, + "grad_norm": 0.9532058422054399, + "learning_rate": 1.3217629362452596e-05, + "loss": 0.4995, + "step": 23951 + }, + { + "epoch": 0.4138789051700304, + "grad_norm": 0.7549964071126118, + "learning_rate": 1.3217099469895185e-05, + "loss": 0.5156, + "step": 23952 + }, + { + "epoch": 0.4138961846834393, + "grad_norm": 0.8364911621593158, + "learning_rate": 1.3216569567261575e-05, + "loss": 0.3768, + "step": 23953 + }, + { + "epoch": 0.4139134641968482, + "grad_norm": 1.1893403599455448, + "learning_rate": 1.3216039654553423e-05, + "loss": 0.3184, + "step": 23954 + }, + { + "epoch": 0.4139307437102571, + "grad_norm": 1.0842918960959684, + "learning_rate": 1.321550973177239e-05, + "loss": 0.4846, + "step": 23955 + }, + { + "epoch": 0.41394802322366603, + "grad_norm": 2.0970818251271437, + "learning_rate": 1.3214979798920142e-05, + "loss": 0.487, + "step": 23956 + }, + { + "epoch": 0.41396530273707494, + "grad_norm": 0.8623127836030627, + "learning_rate": 1.3214449855998326e-05, + "loss": 0.527, + "step": 23957 + }, + { + "epoch": 0.41398258225048384, + "grad_norm": 0.7101428873613778, + "learning_rate": 1.3213919903008612e-05, + "loss": 0.3183, + "step": 23958 + }, + { + "epoch": 0.41399986176389275, + "grad_norm": 0.834739554844623, + "learning_rate": 1.3213389939952656e-05, + "loss": 0.5771, + "step": 23959 + }, + { + "epoch": 0.4140171412773016, + "grad_norm": 1.2609516510729697, + "learning_rate": 1.3212859966832117e-05, + "loss": 0.3567, + "step": 23960 + }, + { + "epoch": 0.4140344207907105, + "grad_norm": 1.192874936468029, + "learning_rate": 1.3212329983648657e-05, + "loss": 0.4401, + "step": 23961 + }, + { + "epoch": 0.4140517003041194, + "grad_norm": 0.955830994447248, + "learning_rate": 1.321179999040394e-05, + "loss": 0.4503, + "step": 23962 + }, + { + "epoch": 0.4140689798175283, + "grad_norm": 0.9972250914871712, + "learning_rate": 1.3211269987099615e-05, + "loss": 0.4866, + "step": 23963 + }, + { + "epoch": 0.41408625933093723, + "grad_norm": 1.175916199581925, + "learning_rate": 1.321073997373735e-05, + "loss": 0.4422, + "step": 23964 + }, + { + "epoch": 0.41410353884434614, + "grad_norm": 1.3031608173286406, + "learning_rate": 1.3210209950318804e-05, + "loss": 0.4693, + "step": 23965 + }, + { + "epoch": 0.41412081835775505, + "grad_norm": 0.8803462163714305, + "learning_rate": 1.3209679916845639e-05, + "loss": 0.4346, + "step": 23966 + }, + { + "epoch": 0.41413809787116396, + "grad_norm": 1.0422585846547343, + "learning_rate": 1.3209149873319511e-05, + "loss": 0.5394, + "step": 23967 + }, + { + "epoch": 0.41415537738457286, + "grad_norm": 0.8077741323915576, + "learning_rate": 1.3208619819742079e-05, + "loss": 0.3816, + "step": 23968 + }, + { + "epoch": 0.4141726568979818, + "grad_norm": 1.4396469782286663, + "learning_rate": 1.3208089756115007e-05, + "loss": 0.6366, + "step": 23969 + }, + { + "epoch": 0.4141899364113907, + "grad_norm": 1.34201007829626, + "learning_rate": 1.3207559682439956e-05, + "loss": 0.5832, + "step": 23970 + }, + { + "epoch": 0.41420721592479953, + "grad_norm": 1.1897708049912332, + "learning_rate": 1.3207029598718583e-05, + "loss": 0.4803, + "step": 23971 + }, + { + "epoch": 0.41422449543820844, + "grad_norm": 0.7414872811397917, + "learning_rate": 1.320649950495255e-05, + "loss": 0.4403, + "step": 23972 + }, + { + "epoch": 0.41424177495161735, + "grad_norm": 0.8923753962749339, + "learning_rate": 1.3205969401143517e-05, + "loss": 0.5755, + "step": 23973 + }, + { + "epoch": 0.41425905446502626, + "grad_norm": 0.7202542285429632, + "learning_rate": 1.3205439287293143e-05, + "loss": 0.6147, + "step": 23974 + }, + { + "epoch": 0.41427633397843516, + "grad_norm": 0.7882132065287756, + "learning_rate": 1.320490916340309e-05, + "loss": 0.5325, + "step": 23975 + }, + { + "epoch": 0.41429361349184407, + "grad_norm": 1.184859376391504, + "learning_rate": 1.3204379029475021e-05, + "loss": 0.5104, + "step": 23976 + }, + { + "epoch": 0.414310893005253, + "grad_norm": 0.6322211320114101, + "learning_rate": 1.3203848885510588e-05, + "loss": 0.4243, + "step": 23977 + }, + { + "epoch": 0.4143281725186619, + "grad_norm": 1.134238969245199, + "learning_rate": 1.320331873151146e-05, + "loss": 0.5943, + "step": 23978 + }, + { + "epoch": 0.4143454520320708, + "grad_norm": 0.9607384046653475, + "learning_rate": 1.3202788567479297e-05, + "loss": 0.2748, + "step": 23979 + }, + { + "epoch": 0.4143627315454797, + "grad_norm": 1.0005803586970734, + "learning_rate": 1.3202258393415753e-05, + "loss": 0.5502, + "step": 23980 + }, + { + "epoch": 0.4143800110588886, + "grad_norm": 0.8812422230751414, + "learning_rate": 1.3201728209322491e-05, + "loss": 0.424, + "step": 23981 + }, + { + "epoch": 0.41439729057229746, + "grad_norm": 0.9646147259662529, + "learning_rate": 1.3201198015201175e-05, + "loss": 0.4143, + "step": 23982 + }, + { + "epoch": 0.41441457008570637, + "grad_norm": 0.79741277354653, + "learning_rate": 1.3200667811053463e-05, + "loss": 0.4963, + "step": 23983 + }, + { + "epoch": 0.4144318495991153, + "grad_norm": 0.7601923949420436, + "learning_rate": 1.3200137596881018e-05, + "loss": 0.649, + "step": 23984 + }, + { + "epoch": 0.4144491291125242, + "grad_norm": 0.831905998569073, + "learning_rate": 1.3199607372685497e-05, + "loss": 0.5428, + "step": 23985 + }, + { + "epoch": 0.4144664086259331, + "grad_norm": 0.41871780719918933, + "learning_rate": 1.3199077138468561e-05, + "loss": 0.6571, + "step": 23986 + }, + { + "epoch": 0.414483688139342, + "grad_norm": 0.713416295609782, + "learning_rate": 1.3198546894231878e-05, + "loss": 0.3979, + "step": 23987 + }, + { + "epoch": 0.4145009676527509, + "grad_norm": 1.3221101370761563, + "learning_rate": 1.3198016639977099e-05, + "loss": 0.5362, + "step": 23988 + }, + { + "epoch": 0.4145182471661598, + "grad_norm": 0.9328239739161521, + "learning_rate": 1.3197486375705887e-05, + "loss": 0.4712, + "step": 23989 + }, + { + "epoch": 0.4145355266795687, + "grad_norm": 1.2366118591103703, + "learning_rate": 1.3196956101419909e-05, + "loss": 0.4361, + "step": 23990 + }, + { + "epoch": 0.41455280619297763, + "grad_norm": 0.5572640126969676, + "learning_rate": 1.3196425817120817e-05, + "loss": 0.5342, + "step": 23991 + }, + { + "epoch": 0.4145700857063865, + "grad_norm": 0.9170106376486444, + "learning_rate": 1.3195895522810278e-05, + "loss": 0.4887, + "step": 23992 + }, + { + "epoch": 0.4145873652197954, + "grad_norm": 0.8795121736364984, + "learning_rate": 1.3195365218489955e-05, + "loss": 0.3164, + "step": 23993 + }, + { + "epoch": 0.4146046447332043, + "grad_norm": 0.8406086502295443, + "learning_rate": 1.31948349041615e-05, + "loss": 0.3885, + "step": 23994 + }, + { + "epoch": 0.4146219242466132, + "grad_norm": 0.8944204707397212, + "learning_rate": 1.3194304579826583e-05, + "loss": 0.4297, + "step": 23995 + }, + { + "epoch": 0.4146392037600221, + "grad_norm": 0.9986614150555407, + "learning_rate": 1.319377424548686e-05, + "loss": 0.482, + "step": 23996 + }, + { + "epoch": 0.414656483273431, + "grad_norm": 1.2897856027737502, + "learning_rate": 1.3193243901143992e-05, + "loss": 0.5818, + "step": 23997 + }, + { + "epoch": 0.41467376278683993, + "grad_norm": 0.7020024574728366, + "learning_rate": 1.3192713546799645e-05, + "loss": 0.3991, + "step": 23998 + }, + { + "epoch": 0.41469104230024884, + "grad_norm": 0.7337356234325747, + "learning_rate": 1.3192183182455477e-05, + "loss": 0.394, + "step": 23999 + }, + { + "epoch": 0.41470832181365774, + "grad_norm": 1.479679001116238, + "learning_rate": 1.3191652808113142e-05, + "loss": 0.5044, + "step": 24000 + }, + { + "epoch": 0.41472560132706665, + "grad_norm": 1.049805583549821, + "learning_rate": 1.3191122423774312e-05, + "loss": 0.4266, + "step": 24001 + }, + { + "epoch": 0.41474288084047556, + "grad_norm": 0.5885070454880881, + "learning_rate": 1.3190592029440644e-05, + "loss": 0.3796, + "step": 24002 + }, + { + "epoch": 0.4147601603538844, + "grad_norm": 0.8928411948039647, + "learning_rate": 1.3190061625113799e-05, + "loss": 0.3938, + "step": 24003 + }, + { + "epoch": 0.4147774398672933, + "grad_norm": 0.7736325713344175, + "learning_rate": 1.318953121079544e-05, + "loss": 0.3963, + "step": 24004 + }, + { + "epoch": 0.4147947193807022, + "grad_norm": 0.6933889600567383, + "learning_rate": 1.3189000786487226e-05, + "loss": 0.4842, + "step": 24005 + }, + { + "epoch": 0.41481199889411113, + "grad_norm": 0.7184368698374104, + "learning_rate": 1.3188470352190816e-05, + "loss": 0.2387, + "step": 24006 + }, + { + "epoch": 0.41482927840752004, + "grad_norm": 0.6678004793540769, + "learning_rate": 1.318793990790788e-05, + "loss": 0.365, + "step": 24007 + }, + { + "epoch": 0.41484655792092895, + "grad_norm": 1.0627783489167706, + "learning_rate": 1.318740945364007e-05, + "loss": 0.6625, + "step": 24008 + }, + { + "epoch": 0.41486383743433786, + "grad_norm": 1.4552817164463256, + "learning_rate": 1.3186878989389054e-05, + "loss": 0.5103, + "step": 24009 + }, + { + "epoch": 0.41488111694774676, + "grad_norm": 12.960242098491351, + "learning_rate": 1.3186348515156488e-05, + "loss": 0.6201, + "step": 24010 + }, + { + "epoch": 0.41489839646115567, + "grad_norm": 1.0807907818963127, + "learning_rate": 1.3185818030944036e-05, + "loss": 0.4608, + "step": 24011 + }, + { + "epoch": 0.4149156759745646, + "grad_norm": 0.864239880083431, + "learning_rate": 1.3185287536753362e-05, + "loss": 0.5084, + "step": 24012 + }, + { + "epoch": 0.41493295548797343, + "grad_norm": 1.0150407935103591, + "learning_rate": 1.3184757032586127e-05, + "loss": 0.3044, + "step": 24013 + }, + { + "epoch": 0.41495023500138234, + "grad_norm": 0.46400709743521346, + "learning_rate": 1.3184226518443986e-05, + "loss": 0.5277, + "step": 24014 + }, + { + "epoch": 0.41496751451479125, + "grad_norm": 0.8403399007122173, + "learning_rate": 1.3183695994328609e-05, + "loss": 0.2264, + "step": 24015 + }, + { + "epoch": 0.41498479402820015, + "grad_norm": 0.6101676358875581, + "learning_rate": 1.3183165460241654e-05, + "loss": 0.3096, + "step": 24016 + }, + { + "epoch": 0.41500207354160906, + "grad_norm": 0.9452355486864096, + "learning_rate": 1.3182634916184784e-05, + "loss": 0.4848, + "step": 24017 + }, + { + "epoch": 0.41501935305501797, + "grad_norm": 0.6559388331660568, + "learning_rate": 1.3182104362159658e-05, + "loss": 0.4604, + "step": 24018 + }, + { + "epoch": 0.4150366325684269, + "grad_norm": 0.8394432379596395, + "learning_rate": 1.3181573798167939e-05, + "loss": 0.2714, + "step": 24019 + }, + { + "epoch": 0.4150539120818358, + "grad_norm": 0.877371004829441, + "learning_rate": 1.3181043224211289e-05, + "loss": 0.4583, + "step": 24020 + }, + { + "epoch": 0.4150711915952447, + "grad_norm": 1.8678240442775282, + "learning_rate": 1.318051264029137e-05, + "loss": 0.426, + "step": 24021 + }, + { + "epoch": 0.4150884711086536, + "grad_norm": 1.010815000841918, + "learning_rate": 1.3179982046409845e-05, + "loss": 0.4005, + "step": 24022 + }, + { + "epoch": 0.4151057506220625, + "grad_norm": 1.1473362434886776, + "learning_rate": 1.3179451442568371e-05, + "loss": 0.3817, + "step": 24023 + }, + { + "epoch": 0.41512303013547136, + "grad_norm": 0.6817739136014062, + "learning_rate": 1.3178920828768618e-05, + "loss": 0.3305, + "step": 24024 + }, + { + "epoch": 0.41514030964888027, + "grad_norm": 1.1928607113462315, + "learning_rate": 1.3178390205012241e-05, + "loss": 0.4681, + "step": 24025 + }, + { + "epoch": 0.4151575891622892, + "grad_norm": 0.7235237038219009, + "learning_rate": 1.3177859571300905e-05, + "loss": 0.3199, + "step": 24026 + }, + { + "epoch": 0.4151748686756981, + "grad_norm": 0.9693364714755496, + "learning_rate": 1.317732892763627e-05, + "loss": 0.3972, + "step": 24027 + }, + { + "epoch": 0.415192148189107, + "grad_norm": 1.3427808505903707, + "learning_rate": 1.317679827402e-05, + "loss": 0.4802, + "step": 24028 + }, + { + "epoch": 0.4152094277025159, + "grad_norm": 0.8572732061461694, + "learning_rate": 1.317626761045376e-05, + "loss": 0.4486, + "step": 24029 + }, + { + "epoch": 0.4152267072159248, + "grad_norm": 0.9968467076184276, + "learning_rate": 1.3175736936939203e-05, + "loss": 0.3286, + "step": 24030 + }, + { + "epoch": 0.4152439867293337, + "grad_norm": 0.923279645018098, + "learning_rate": 1.3175206253478e-05, + "loss": 0.3589, + "step": 24031 + }, + { + "epoch": 0.4152612662427426, + "grad_norm": 0.8628962059057904, + "learning_rate": 1.3174675560071806e-05, + "loss": 0.4328, + "step": 24032 + }, + { + "epoch": 0.41527854575615153, + "grad_norm": 1.1128626758212428, + "learning_rate": 1.3174144856722291e-05, + "loss": 0.3204, + "step": 24033 + }, + { + "epoch": 0.41529582526956044, + "grad_norm": 1.3266497978156582, + "learning_rate": 1.317361414343111e-05, + "loss": 0.617, + "step": 24034 + }, + { + "epoch": 0.4153131047829693, + "grad_norm": 0.8418765343224673, + "learning_rate": 1.317308342019993e-05, + "loss": 0.36, + "step": 24035 + }, + { + "epoch": 0.4153303842963782, + "grad_norm": 0.9375455029711814, + "learning_rate": 1.3172552687030411e-05, + "loss": 0.4874, + "step": 24036 + }, + { + "epoch": 0.4153476638097871, + "grad_norm": 0.737540984232811, + "learning_rate": 1.3172021943924214e-05, + "loss": 0.2921, + "step": 24037 + }, + { + "epoch": 0.415364943323196, + "grad_norm": 1.1178432848681783, + "learning_rate": 1.3171491190883008e-05, + "loss": 0.4309, + "step": 24038 + }, + { + "epoch": 0.4153822228366049, + "grad_norm": 0.8791685747709561, + "learning_rate": 1.317096042790845e-05, + "loss": 0.4731, + "step": 24039 + }, + { + "epoch": 0.4153995023500138, + "grad_norm": 0.7793217135886461, + "learning_rate": 1.3170429655002199e-05, + "loss": 0.3219, + "step": 24040 + }, + { + "epoch": 0.41541678186342273, + "grad_norm": 0.7750408198704745, + "learning_rate": 1.3169898872165923e-05, + "loss": 0.5134, + "step": 24041 + }, + { + "epoch": 0.41543406137683164, + "grad_norm": 2.0943490417733712, + "learning_rate": 1.3169368079401284e-05, + "loss": 0.4851, + "step": 24042 + }, + { + "epoch": 0.41545134089024055, + "grad_norm": 0.843239384462097, + "learning_rate": 1.316883727670994e-05, + "loss": 0.3797, + "step": 24043 + }, + { + "epoch": 0.41546862040364946, + "grad_norm": 1.6081669202891213, + "learning_rate": 1.316830646409356e-05, + "loss": 0.5074, + "step": 24044 + }, + { + "epoch": 0.4154858999170583, + "grad_norm": 1.1382354142918796, + "learning_rate": 1.3167775641553804e-05, + "loss": 0.5032, + "step": 24045 + }, + { + "epoch": 0.4155031794304672, + "grad_norm": 1.5039635969695826, + "learning_rate": 1.3167244809092333e-05, + "loss": 0.4844, + "step": 24046 + }, + { + "epoch": 0.4155204589438761, + "grad_norm": 0.7597303331045581, + "learning_rate": 1.316671396671081e-05, + "loss": 0.3975, + "step": 24047 + }, + { + "epoch": 0.41553773845728503, + "grad_norm": 1.187980502698327, + "learning_rate": 1.3166183114410898e-05, + "loss": 0.4502, + "step": 24048 + }, + { + "epoch": 0.41555501797069394, + "grad_norm": 1.7565245078144234, + "learning_rate": 1.316565225219426e-05, + "loss": 0.5714, + "step": 24049 + }, + { + "epoch": 0.41557229748410285, + "grad_norm": 0.8462192937741674, + "learning_rate": 1.316512138006256e-05, + "loss": 0.4782, + "step": 24050 + }, + { + "epoch": 0.41558957699751176, + "grad_norm": 0.7861249454554171, + "learning_rate": 1.3164590498017455e-05, + "loss": 0.5134, + "step": 24051 + }, + { + "epoch": 0.41560685651092066, + "grad_norm": 1.4743370707555377, + "learning_rate": 1.3164059606060615e-05, + "loss": 0.6003, + "step": 24052 + }, + { + "epoch": 0.41562413602432957, + "grad_norm": 1.61773613801666, + "learning_rate": 1.3163528704193702e-05, + "loss": 0.4185, + "step": 24053 + }, + { + "epoch": 0.4156414155377385, + "grad_norm": 0.7675176978771545, + "learning_rate": 1.3162997792418373e-05, + "loss": 0.4023, + "step": 24054 + }, + { + "epoch": 0.4156586950511474, + "grad_norm": 0.49846778706854855, + "learning_rate": 1.3162466870736297e-05, + "loss": 0.6346, + "step": 24055 + }, + { + "epoch": 0.41567597456455624, + "grad_norm": 1.2261413755022867, + "learning_rate": 1.3161935939149133e-05, + "loss": 0.4335, + "step": 24056 + }, + { + "epoch": 0.41569325407796515, + "grad_norm": 0.5456369011400787, + "learning_rate": 1.3161404997658545e-05, + "loss": 0.2892, + "step": 24057 + }, + { + "epoch": 0.41571053359137405, + "grad_norm": 1.2231827767754502, + "learning_rate": 1.3160874046266198e-05, + "loss": 0.4797, + "step": 24058 + }, + { + "epoch": 0.41572781310478296, + "grad_norm": 0.9569517453527612, + "learning_rate": 1.3160343084973755e-05, + "loss": 0.385, + "step": 24059 + }, + { + "epoch": 0.41574509261819187, + "grad_norm": 1.7642740483160595, + "learning_rate": 1.3159812113782873e-05, + "loss": 0.4533, + "step": 24060 + }, + { + "epoch": 0.4157623721316008, + "grad_norm": 0.9262260315973816, + "learning_rate": 1.3159281132695224e-05, + "loss": 0.5215, + "step": 24061 + }, + { + "epoch": 0.4157796516450097, + "grad_norm": 1.494059183217403, + "learning_rate": 1.3158750141712463e-05, + "loss": 0.719, + "step": 24062 + }, + { + "epoch": 0.4157969311584186, + "grad_norm": 2.7508999393720437, + "learning_rate": 1.3158219140836255e-05, + "loss": 0.8535, + "step": 24063 + }, + { + "epoch": 0.4158142106718275, + "grad_norm": 0.8777359766450219, + "learning_rate": 1.3157688130068267e-05, + "loss": 0.4689, + "step": 24064 + }, + { + "epoch": 0.4158314901852364, + "grad_norm": 1.4002452936845315, + "learning_rate": 1.3157157109410159e-05, + "loss": 0.5927, + "step": 24065 + }, + { + "epoch": 0.41584876969864526, + "grad_norm": 1.1995066343460161, + "learning_rate": 1.3156626078863595e-05, + "loss": 0.6169, + "step": 24066 + }, + { + "epoch": 0.41586604921205417, + "grad_norm": 0.7547077457207486, + "learning_rate": 1.315609503843024e-05, + "loss": 0.376, + "step": 24067 + }, + { + "epoch": 0.4158833287254631, + "grad_norm": 0.7413734379668471, + "learning_rate": 1.3155563988111752e-05, + "loss": 0.5675, + "step": 24068 + }, + { + "epoch": 0.415900608238872, + "grad_norm": 1.5531842372159874, + "learning_rate": 1.31550329279098e-05, + "loss": 0.4893, + "step": 24069 + }, + { + "epoch": 0.4159178877522809, + "grad_norm": 0.4666835833612994, + "learning_rate": 1.3154501857826048e-05, + "loss": 0.4963, + "step": 24070 + }, + { + "epoch": 0.4159351672656898, + "grad_norm": 1.4483899520025063, + "learning_rate": 1.315397077786215e-05, + "loss": 0.6248, + "step": 24071 + }, + { + "epoch": 0.4159524467790987, + "grad_norm": 1.2829719814604486, + "learning_rate": 1.3153439688019781e-05, + "loss": 0.4891, + "step": 24072 + }, + { + "epoch": 0.4159697262925076, + "grad_norm": 0.9745124231934293, + "learning_rate": 1.3152908588300597e-05, + "loss": 0.4637, + "step": 24073 + }, + { + "epoch": 0.4159870058059165, + "grad_norm": 0.9809300323925834, + "learning_rate": 1.3152377478706264e-05, + "loss": 0.5466, + "step": 24074 + }, + { + "epoch": 0.41600428531932543, + "grad_norm": 0.4885678592012907, + "learning_rate": 1.3151846359238444e-05, + "loss": 0.7648, + "step": 24075 + }, + { + "epoch": 0.41602156483273434, + "grad_norm": 1.4206326337344128, + "learning_rate": 1.3151315229898804e-05, + "loss": 0.4926, + "step": 24076 + }, + { + "epoch": 0.4160388443461432, + "grad_norm": 0.9223567284705061, + "learning_rate": 1.3150784090689005e-05, + "loss": 0.5272, + "step": 24077 + }, + { + "epoch": 0.4160561238595521, + "grad_norm": 1.0269890406692728, + "learning_rate": 1.315025294161071e-05, + "loss": 0.5167, + "step": 24078 + }, + { + "epoch": 0.416073403372961, + "grad_norm": 0.9863033307767687, + "learning_rate": 1.3149721782665585e-05, + "loss": 0.2803, + "step": 24079 + }, + { + "epoch": 0.4160906828863699, + "grad_norm": 0.40288430210533327, + "learning_rate": 1.3149190613855289e-05, + "loss": 0.4411, + "step": 24080 + }, + { + "epoch": 0.4161079623997788, + "grad_norm": 0.8570854133075262, + "learning_rate": 1.314865943518149e-05, + "loss": 0.3041, + "step": 24081 + }, + { + "epoch": 0.4161252419131877, + "grad_norm": 0.7671995912533646, + "learning_rate": 1.314812824664585e-05, + "loss": 0.5276, + "step": 24082 + }, + { + "epoch": 0.41614252142659663, + "grad_norm": 0.8826815687236949, + "learning_rate": 1.3147597048250034e-05, + "loss": 0.4281, + "step": 24083 + }, + { + "epoch": 0.41615980094000554, + "grad_norm": 1.3442986664365235, + "learning_rate": 1.3147065839995705e-05, + "loss": 0.4629, + "step": 24084 + }, + { + "epoch": 0.41617708045341445, + "grad_norm": 1.0381825111982226, + "learning_rate": 1.3146534621884526e-05, + "loss": 0.3464, + "step": 24085 + }, + { + "epoch": 0.41619435996682336, + "grad_norm": 0.8546072220253891, + "learning_rate": 1.3146003393918162e-05, + "loss": 0.5996, + "step": 24086 + }, + { + "epoch": 0.4162116394802322, + "grad_norm": 0.9136536859682545, + "learning_rate": 1.3145472156098274e-05, + "loss": 0.4599, + "step": 24087 + }, + { + "epoch": 0.4162289189936411, + "grad_norm": 1.351380854624701, + "learning_rate": 1.3144940908426532e-05, + "loss": 0.3865, + "step": 24088 + }, + { + "epoch": 0.41624619850705, + "grad_norm": 1.6846675638715947, + "learning_rate": 1.314440965090459e-05, + "loss": 0.5067, + "step": 24089 + }, + { + "epoch": 0.41626347802045893, + "grad_norm": 1.092654486403738, + "learning_rate": 1.3143878383534126e-05, + "loss": 0.3649, + "step": 24090 + }, + { + "epoch": 0.41628075753386784, + "grad_norm": 0.44819797683790813, + "learning_rate": 1.314334710631679e-05, + "loss": 0.4107, + "step": 24091 + }, + { + "epoch": 0.41629803704727675, + "grad_norm": 0.9877833889792696, + "learning_rate": 1.3142815819254256e-05, + "loss": 0.3245, + "step": 24092 + }, + { + "epoch": 0.41631531656068566, + "grad_norm": 0.7721648903045178, + "learning_rate": 1.314228452234818e-05, + "loss": 0.4016, + "step": 24093 + }, + { + "epoch": 0.41633259607409456, + "grad_norm": 0.7084859535142265, + "learning_rate": 1.314175321560023e-05, + "loss": 0.5063, + "step": 24094 + }, + { + "epoch": 0.41634987558750347, + "grad_norm": 1.6837338999760036, + "learning_rate": 1.3141221899012074e-05, + "loss": 0.5477, + "step": 24095 + }, + { + "epoch": 0.4163671551009124, + "grad_norm": 1.4575680676098897, + "learning_rate": 1.314069057258537e-05, + "loss": 0.3458, + "step": 24096 + }, + { + "epoch": 0.4163844346143213, + "grad_norm": 1.653168374674468, + "learning_rate": 1.3140159236321784e-05, + "loss": 0.7638, + "step": 24097 + }, + { + "epoch": 0.41640171412773014, + "grad_norm": 1.9246389819658025, + "learning_rate": 1.3139627890222982e-05, + "loss": 0.5633, + "step": 24098 + }, + { + "epoch": 0.41641899364113905, + "grad_norm": 1.2900923667777144, + "learning_rate": 1.3139096534290628e-05, + "loss": 0.4418, + "step": 24099 + }, + { + "epoch": 0.41643627315454795, + "grad_norm": 1.1504986404365642, + "learning_rate": 1.3138565168526382e-05, + "loss": 0.474, + "step": 24100 + }, + { + "epoch": 0.41645355266795686, + "grad_norm": 0.9265608064101618, + "learning_rate": 1.3138033792931915e-05, + "loss": 0.3232, + "step": 24101 + }, + { + "epoch": 0.41647083218136577, + "grad_norm": 0.6754447811657329, + "learning_rate": 1.3137502407508884e-05, + "loss": 0.5273, + "step": 24102 + }, + { + "epoch": 0.4164881116947747, + "grad_norm": 1.5610659634517727, + "learning_rate": 1.313697101225896e-05, + "loss": 0.4434, + "step": 24103 + }, + { + "epoch": 0.4165053912081836, + "grad_norm": 1.1071819199718245, + "learning_rate": 1.3136439607183801e-05, + "loss": 0.3092, + "step": 24104 + }, + { + "epoch": 0.4165226707215925, + "grad_norm": 0.5339406267165124, + "learning_rate": 1.3135908192285079e-05, + "loss": 0.5828, + "step": 24105 + }, + { + "epoch": 0.4165399502350014, + "grad_norm": 1.2679352778251727, + "learning_rate": 1.3135376767564449e-05, + "loss": 0.3546, + "step": 24106 + }, + { + "epoch": 0.4165572297484103, + "grad_norm": 0.7548795333121129, + "learning_rate": 1.3134845333023585e-05, + "loss": 0.4381, + "step": 24107 + }, + { + "epoch": 0.4165745092618192, + "grad_norm": 1.341732488692399, + "learning_rate": 1.3134313888664145e-05, + "loss": 0.5513, + "step": 24108 + }, + { + "epoch": 0.41659178877522807, + "grad_norm": 1.3418683027067988, + "learning_rate": 1.3133782434487797e-05, + "loss": 0.3862, + "step": 24109 + }, + { + "epoch": 0.416609068288637, + "grad_norm": 0.8530854663332755, + "learning_rate": 1.3133250970496205e-05, + "loss": 0.5085, + "step": 24110 + }, + { + "epoch": 0.4166263478020459, + "grad_norm": 0.8966237277810013, + "learning_rate": 1.3132719496691029e-05, + "loss": 0.4432, + "step": 24111 + }, + { + "epoch": 0.4166436273154548, + "grad_norm": 0.6670761535003663, + "learning_rate": 1.3132188013073942e-05, + "loss": 0.4962, + "step": 24112 + }, + { + "epoch": 0.4166609068288637, + "grad_norm": 0.9860541550018499, + "learning_rate": 1.3131656519646603e-05, + "loss": 0.6555, + "step": 24113 + }, + { + "epoch": 0.4166781863422726, + "grad_norm": 0.901154702134489, + "learning_rate": 1.3131125016410673e-05, + "loss": 0.3863, + "step": 24114 + }, + { + "epoch": 0.4166954658556815, + "grad_norm": 0.822832238277302, + "learning_rate": 1.3130593503367828e-05, + "loss": 0.4546, + "step": 24115 + }, + { + "epoch": 0.4167127453690904, + "grad_norm": 1.1282681199258187, + "learning_rate": 1.3130061980519724e-05, + "loss": 0.5831, + "step": 24116 + }, + { + "epoch": 0.41673002488249933, + "grad_norm": 1.065344684164102, + "learning_rate": 1.3129530447868026e-05, + "loss": 0.4477, + "step": 24117 + }, + { + "epoch": 0.41674730439590824, + "grad_norm": 1.3358562419959958, + "learning_rate": 1.3128998905414402e-05, + "loss": 0.4696, + "step": 24118 + }, + { + "epoch": 0.4167645839093171, + "grad_norm": 0.8648745095675535, + "learning_rate": 1.3128467353160517e-05, + "loss": 0.5628, + "step": 24119 + }, + { + "epoch": 0.416781863422726, + "grad_norm": 1.1513253018889535, + "learning_rate": 1.3127935791108035e-05, + "loss": 0.591, + "step": 24120 + }, + { + "epoch": 0.4167991429361349, + "grad_norm": 0.6462643020247177, + "learning_rate": 1.3127404219258617e-05, + "loss": 0.3358, + "step": 24121 + }, + { + "epoch": 0.4168164224495438, + "grad_norm": 0.9980239382584821, + "learning_rate": 1.3126872637613937e-05, + "loss": 0.4632, + "step": 24122 + }, + { + "epoch": 0.4168337019629527, + "grad_norm": 0.6513933838767446, + "learning_rate": 1.3126341046175648e-05, + "loss": 0.3483, + "step": 24123 + }, + { + "epoch": 0.4168509814763616, + "grad_norm": 1.084671334627352, + "learning_rate": 1.3125809444945423e-05, + "loss": 0.7356, + "step": 24124 + }, + { + "epoch": 0.41686826098977053, + "grad_norm": 1.1024229435536286, + "learning_rate": 1.3125277833924928e-05, + "loss": 0.3897, + "step": 24125 + }, + { + "epoch": 0.41688554050317944, + "grad_norm": 0.7143015028149816, + "learning_rate": 1.3124746213115821e-05, + "loss": 0.3971, + "step": 24126 + }, + { + "epoch": 0.41690282001658835, + "grad_norm": 1.0293213088657907, + "learning_rate": 1.3124214582519774e-05, + "loss": 0.4577, + "step": 24127 + }, + { + "epoch": 0.41692009952999726, + "grad_norm": 1.1790905616472203, + "learning_rate": 1.3123682942138449e-05, + "loss": 0.4714, + "step": 24128 + }, + { + "epoch": 0.41693737904340616, + "grad_norm": 0.8473875938718766, + "learning_rate": 1.312315129197351e-05, + "loss": 0.2624, + "step": 24129 + }, + { + "epoch": 0.416954658556815, + "grad_norm": 0.42200483061640587, + "learning_rate": 1.3122619632026628e-05, + "loss": 0.5272, + "step": 24130 + }, + { + "epoch": 0.4169719380702239, + "grad_norm": 1.2741979764200877, + "learning_rate": 1.3122087962299461e-05, + "loss": 0.4417, + "step": 24131 + }, + { + "epoch": 0.41698921758363283, + "grad_norm": 0.7344462550302554, + "learning_rate": 1.3121556282793679e-05, + "loss": 0.3588, + "step": 24132 + }, + { + "epoch": 0.41700649709704174, + "grad_norm": 0.8911124833406042, + "learning_rate": 1.3121024593510946e-05, + "loss": 0.4781, + "step": 24133 + }, + { + "epoch": 0.41702377661045065, + "grad_norm": 0.7398671085535798, + "learning_rate": 1.3120492894452924e-05, + "loss": 0.2707, + "step": 24134 + }, + { + "epoch": 0.41704105612385955, + "grad_norm": 1.0387766194624015, + "learning_rate": 1.3119961185621282e-05, + "loss": 0.3889, + "step": 24135 + }, + { + "epoch": 0.41705833563726846, + "grad_norm": 1.6872949424414563, + "learning_rate": 1.3119429467017683e-05, + "loss": 0.4448, + "step": 24136 + }, + { + "epoch": 0.41707561515067737, + "grad_norm": 0.8732737710468806, + "learning_rate": 1.3118897738643797e-05, + "loss": 0.5786, + "step": 24137 + }, + { + "epoch": 0.4170928946640863, + "grad_norm": 1.3241187661118372, + "learning_rate": 1.3118366000501284e-05, + "loss": 0.5115, + "step": 24138 + }, + { + "epoch": 0.4171101741774952, + "grad_norm": 0.82282113677652, + "learning_rate": 1.3117834252591813e-05, + "loss": 0.5333, + "step": 24139 + }, + { + "epoch": 0.41712745369090404, + "grad_norm": 1.3676012436993272, + "learning_rate": 1.3117302494917048e-05, + "loss": 0.4586, + "step": 24140 + }, + { + "epoch": 0.41714473320431295, + "grad_norm": 0.9160646153397548, + "learning_rate": 1.3116770727478656e-05, + "loss": 0.309, + "step": 24141 + }, + { + "epoch": 0.41716201271772185, + "grad_norm": 0.7875592066636549, + "learning_rate": 1.31162389502783e-05, + "loss": 0.3583, + "step": 24142 + }, + { + "epoch": 0.41717929223113076, + "grad_norm": 1.0700882730338166, + "learning_rate": 1.3115707163317644e-05, + "loss": 0.4059, + "step": 24143 + }, + { + "epoch": 0.41719657174453967, + "grad_norm": 1.1228728071079366, + "learning_rate": 1.3115175366598361e-05, + "loss": 0.44, + "step": 24144 + }, + { + "epoch": 0.4172138512579486, + "grad_norm": 1.1283543400930691, + "learning_rate": 1.311464356012211e-05, + "loss": 0.3536, + "step": 24145 + }, + { + "epoch": 0.4172311307713575, + "grad_norm": 0.8126278375710765, + "learning_rate": 1.3114111743890555e-05, + "loss": 0.5472, + "step": 24146 + }, + { + "epoch": 0.4172484102847664, + "grad_norm": 0.9571032443277191, + "learning_rate": 1.311357991790537e-05, + "loss": 0.4816, + "step": 24147 + }, + { + "epoch": 0.4172656897981753, + "grad_norm": 0.569944300700435, + "learning_rate": 1.3113048082168216e-05, + "loss": 0.341, + "step": 24148 + }, + { + "epoch": 0.4172829693115842, + "grad_norm": 1.1351919751284865, + "learning_rate": 1.3112516236680754e-05, + "loss": 0.4237, + "step": 24149 + }, + { + "epoch": 0.4173002488249931, + "grad_norm": 1.436236688212412, + "learning_rate": 1.3111984381444661e-05, + "loss": 0.5333, + "step": 24150 + }, + { + "epoch": 0.41731752833840197, + "grad_norm": 1.079419837223846, + "learning_rate": 1.3111452516461594e-05, + "loss": 0.3118, + "step": 24151 + }, + { + "epoch": 0.4173348078518109, + "grad_norm": 0.5068384993062579, + "learning_rate": 1.3110920641733223e-05, + "loss": 0.4748, + "step": 24152 + }, + { + "epoch": 0.4173520873652198, + "grad_norm": 0.8784503309507773, + "learning_rate": 1.311038875726121e-05, + "loss": 0.6038, + "step": 24153 + }, + { + "epoch": 0.4173693668786287, + "grad_norm": 0.9243493702306126, + "learning_rate": 1.310985686304722e-05, + "loss": 0.3835, + "step": 24154 + }, + { + "epoch": 0.4173866463920376, + "grad_norm": 0.5495367181886501, + "learning_rate": 1.3109324959092927e-05, + "loss": 0.6204, + "step": 24155 + }, + { + "epoch": 0.4174039259054465, + "grad_norm": 1.0810239880607109, + "learning_rate": 1.310879304539999e-05, + "loss": 0.3936, + "step": 24156 + }, + { + "epoch": 0.4174212054188554, + "grad_norm": 1.1937300539486042, + "learning_rate": 1.3108261121970074e-05, + "loss": 0.4189, + "step": 24157 + }, + { + "epoch": 0.4174384849322643, + "grad_norm": 0.9371036233387333, + "learning_rate": 1.3107729188804853e-05, + "loss": 0.6065, + "step": 24158 + }, + { + "epoch": 0.4174557644456732, + "grad_norm": 1.1878908216115691, + "learning_rate": 1.3107197245905984e-05, + "loss": 0.624, + "step": 24159 + }, + { + "epoch": 0.41747304395908214, + "grad_norm": 0.8760952637304442, + "learning_rate": 1.3106665293275143e-05, + "loss": 0.3082, + "step": 24160 + }, + { + "epoch": 0.417490323472491, + "grad_norm": 1.155553356785297, + "learning_rate": 1.3106133330913982e-05, + "loss": 0.455, + "step": 24161 + }, + { + "epoch": 0.4175076029858999, + "grad_norm": 0.9949347359712429, + "learning_rate": 1.3105601358824183e-05, + "loss": 0.483, + "step": 24162 + }, + { + "epoch": 0.4175248824993088, + "grad_norm": 1.150702523351254, + "learning_rate": 1.31050693770074e-05, + "loss": 0.3893, + "step": 24163 + }, + { + "epoch": 0.4175421620127177, + "grad_norm": 1.1283940092120255, + "learning_rate": 1.3104537385465307e-05, + "loss": 0.473, + "step": 24164 + }, + { + "epoch": 0.4175594415261266, + "grad_norm": 0.8807082397195984, + "learning_rate": 1.3104005384199563e-05, + "loss": 0.4397, + "step": 24165 + }, + { + "epoch": 0.4175767210395355, + "grad_norm": 0.7087770878054861, + "learning_rate": 1.310347337321184e-05, + "loss": 0.4298, + "step": 24166 + }, + { + "epoch": 0.41759400055294443, + "grad_norm": 1.062704691683334, + "learning_rate": 1.3102941352503801e-05, + "loss": 0.4264, + "step": 24167 + }, + { + "epoch": 0.41761128006635334, + "grad_norm": 1.026599153414148, + "learning_rate": 1.3102409322077117e-05, + "loss": 0.3759, + "step": 24168 + }, + { + "epoch": 0.41762855957976225, + "grad_norm": 0.8810530462638504, + "learning_rate": 1.3101877281933449e-05, + "loss": 0.5046, + "step": 24169 + }, + { + "epoch": 0.41764583909317116, + "grad_norm": 0.9058715060450699, + "learning_rate": 1.3101345232074465e-05, + "loss": 0.7552, + "step": 24170 + }, + { + "epoch": 0.41766311860658006, + "grad_norm": 0.7919701063037257, + "learning_rate": 1.3100813172501835e-05, + "loss": 0.402, + "step": 24171 + }, + { + "epoch": 0.4176803981199889, + "grad_norm": 1.0247742532432433, + "learning_rate": 1.3100281103217221e-05, + "loss": 0.6252, + "step": 24172 + }, + { + "epoch": 0.4176976776333978, + "grad_norm": 1.3082978979798185, + "learning_rate": 1.309974902422229e-05, + "loss": 0.6105, + "step": 24173 + }, + { + "epoch": 0.41771495714680673, + "grad_norm": 0.6744739842809182, + "learning_rate": 1.309921693551871e-05, + "loss": 0.3596, + "step": 24174 + }, + { + "epoch": 0.41773223666021564, + "grad_norm": 1.3492030277592124, + "learning_rate": 1.3098684837108149e-05, + "loss": 0.4325, + "step": 24175 + }, + { + "epoch": 0.41774951617362455, + "grad_norm": 1.2406904592912662, + "learning_rate": 1.3098152728992268e-05, + "loss": 0.4029, + "step": 24176 + }, + { + "epoch": 0.41776679568703345, + "grad_norm": 0.7374073984702985, + "learning_rate": 1.3097620611172738e-05, + "loss": 0.5377, + "step": 24177 + }, + { + "epoch": 0.41778407520044236, + "grad_norm": 1.2013497721804036, + "learning_rate": 1.3097088483651228e-05, + "loss": 0.5138, + "step": 24178 + }, + { + "epoch": 0.41780135471385127, + "grad_norm": 0.955745919517439, + "learning_rate": 1.3096556346429398e-05, + "loss": 0.483, + "step": 24179 + }, + { + "epoch": 0.4178186342272602, + "grad_norm": 1.4506604279968396, + "learning_rate": 1.3096024199508921e-05, + "loss": 0.5076, + "step": 24180 + }, + { + "epoch": 0.4178359137406691, + "grad_norm": 1.089671709063047, + "learning_rate": 1.3095492042891458e-05, + "loss": 0.3883, + "step": 24181 + }, + { + "epoch": 0.417853193254078, + "grad_norm": 0.6298686096021422, + "learning_rate": 1.3094959876578683e-05, + "loss": 0.7503, + "step": 24182 + }, + { + "epoch": 0.41787047276748684, + "grad_norm": 1.3173270270118582, + "learning_rate": 1.3094427700572253e-05, + "loss": 0.4169, + "step": 24183 + }, + { + "epoch": 0.41788775228089575, + "grad_norm": 0.7615911412811253, + "learning_rate": 1.3093895514873844e-05, + "loss": 0.3359, + "step": 24184 + }, + { + "epoch": 0.41790503179430466, + "grad_norm": 1.3183968806257746, + "learning_rate": 1.309336331948512e-05, + "loss": 0.3374, + "step": 24185 + }, + { + "epoch": 0.41792231130771357, + "grad_norm": 1.1059807535694426, + "learning_rate": 1.3092831114407743e-05, + "loss": 0.3657, + "step": 24186 + }, + { + "epoch": 0.4179395908211225, + "grad_norm": 0.9337111445999942, + "learning_rate": 1.3092298899643385e-05, + "loss": 0.3713, + "step": 24187 + }, + { + "epoch": 0.4179568703345314, + "grad_norm": 0.8261374052960465, + "learning_rate": 1.3091766675193714e-05, + "loss": 0.618, + "step": 24188 + }, + { + "epoch": 0.4179741498479403, + "grad_norm": 0.8446552361150106, + "learning_rate": 1.3091234441060392e-05, + "loss": 0.2769, + "step": 24189 + }, + { + "epoch": 0.4179914293613492, + "grad_norm": 1.108204449275137, + "learning_rate": 1.3090702197245092e-05, + "loss": 0.6142, + "step": 24190 + }, + { + "epoch": 0.4180087088747581, + "grad_norm": 0.7480886496367036, + "learning_rate": 1.3090169943749475e-05, + "loss": 0.3382, + "step": 24191 + }, + { + "epoch": 0.418025988388167, + "grad_norm": 1.025421371748793, + "learning_rate": 1.3089637680575212e-05, + "loss": 0.6325, + "step": 24192 + }, + { + "epoch": 0.41804326790157587, + "grad_norm": 1.241617468229297, + "learning_rate": 1.3089105407723971e-05, + "loss": 0.5473, + "step": 24193 + }, + { + "epoch": 0.4180605474149848, + "grad_norm": 0.730476359245921, + "learning_rate": 1.3088573125197414e-05, + "loss": 0.3615, + "step": 24194 + }, + { + "epoch": 0.4180778269283937, + "grad_norm": 0.9564713409163004, + "learning_rate": 1.3088040832997213e-05, + "loss": 0.5203, + "step": 24195 + }, + { + "epoch": 0.4180951064418026, + "grad_norm": 1.1725340141480243, + "learning_rate": 1.3087508531125033e-05, + "loss": 0.534, + "step": 24196 + }, + { + "epoch": 0.4181123859552115, + "grad_norm": 0.7808712570410348, + "learning_rate": 1.3086976219582544e-05, + "loss": 0.3004, + "step": 24197 + }, + { + "epoch": 0.4181296654686204, + "grad_norm": 0.807534348389961, + "learning_rate": 1.3086443898371406e-05, + "loss": 0.3456, + "step": 24198 + }, + { + "epoch": 0.4181469449820293, + "grad_norm": 0.5949080322163577, + "learning_rate": 1.3085911567493292e-05, + "loss": 0.7307, + "step": 24199 + }, + { + "epoch": 0.4181642244954382, + "grad_norm": 1.0007300870799465, + "learning_rate": 1.308537922694987e-05, + "loss": 0.3729, + "step": 24200 + }, + { + "epoch": 0.4181815040088471, + "grad_norm": 0.7912118614104119, + "learning_rate": 1.3084846876742807e-05, + "loss": 0.6279, + "step": 24201 + }, + { + "epoch": 0.41819878352225603, + "grad_norm": 0.9842057138627046, + "learning_rate": 1.3084314516873766e-05, + "loss": 0.4001, + "step": 24202 + }, + { + "epoch": 0.41821606303566494, + "grad_norm": 1.0772616157682309, + "learning_rate": 1.3083782147344421e-05, + "loss": 0.4617, + "step": 24203 + }, + { + "epoch": 0.4182333425490738, + "grad_norm": 1.0545935184513224, + "learning_rate": 1.3083249768156434e-05, + "loss": 0.4303, + "step": 24204 + }, + { + "epoch": 0.4182506220624827, + "grad_norm": 1.3688434388479902, + "learning_rate": 1.3082717379311477e-05, + "loss": 0.4579, + "step": 24205 + }, + { + "epoch": 0.4182679015758916, + "grad_norm": 0.5614625539241636, + "learning_rate": 1.308218498081121e-05, + "loss": 0.6586, + "step": 24206 + }, + { + "epoch": 0.4182851810893005, + "grad_norm": 1.063730540385414, + "learning_rate": 1.3081652572657308e-05, + "loss": 0.4607, + "step": 24207 + }, + { + "epoch": 0.4183024606027094, + "grad_norm": 0.4207297628317, + "learning_rate": 1.3081120154851438e-05, + "loss": 0.6331, + "step": 24208 + }, + { + "epoch": 0.41831974011611833, + "grad_norm": 1.1321603960526019, + "learning_rate": 1.3080587727395262e-05, + "loss": 0.5169, + "step": 24209 + }, + { + "epoch": 0.41833701962952724, + "grad_norm": 1.1916124443206608, + "learning_rate": 1.3080055290290452e-05, + "loss": 0.3594, + "step": 24210 + }, + { + "epoch": 0.41835429914293615, + "grad_norm": 1.267326287626012, + "learning_rate": 1.3079522843538676e-05, + "loss": 0.5269, + "step": 24211 + }, + { + "epoch": 0.41837157865634506, + "grad_norm": 0.6268601241479776, + "learning_rate": 1.30789903871416e-05, + "loss": 0.7998, + "step": 24212 + }, + { + "epoch": 0.41838885816975396, + "grad_norm": 1.142891131642411, + "learning_rate": 1.3078457921100894e-05, + "loss": 0.5269, + "step": 24213 + }, + { + "epoch": 0.4184061376831628, + "grad_norm": 0.7473032626021294, + "learning_rate": 1.307792544541822e-05, + "loss": 0.4274, + "step": 24214 + }, + { + "epoch": 0.4184234171965717, + "grad_norm": 0.8221202896646875, + "learning_rate": 1.3077392960095252e-05, + "loss": 0.4964, + "step": 24215 + }, + { + "epoch": 0.41844069670998063, + "grad_norm": 0.8419850501800764, + "learning_rate": 1.3076860465133656e-05, + "loss": 0.4996, + "step": 24216 + }, + { + "epoch": 0.41845797622338954, + "grad_norm": 0.9687029639817674, + "learning_rate": 1.3076327960535097e-05, + "loss": 0.7627, + "step": 24217 + }, + { + "epoch": 0.41847525573679845, + "grad_norm": 0.8735302050736617, + "learning_rate": 1.3075795446301246e-05, + "loss": 0.4091, + "step": 24218 + }, + { + "epoch": 0.41849253525020735, + "grad_norm": 0.9182894291497047, + "learning_rate": 1.307526292243377e-05, + "loss": 0.4984, + "step": 24219 + }, + { + "epoch": 0.41850981476361626, + "grad_norm": 1.3331459799324479, + "learning_rate": 1.3074730388934338e-05, + "loss": 0.4616, + "step": 24220 + }, + { + "epoch": 0.41852709427702517, + "grad_norm": 1.5258494498751045, + "learning_rate": 1.3074197845804613e-05, + "loss": 0.4725, + "step": 24221 + }, + { + "epoch": 0.4185443737904341, + "grad_norm": 0.7623229448137802, + "learning_rate": 1.307366529304627e-05, + "loss": 0.3684, + "step": 24222 + }, + { + "epoch": 0.418561653303843, + "grad_norm": 1.0060176483817365, + "learning_rate": 1.3073132730660974e-05, + "loss": 0.3897, + "step": 24223 + }, + { + "epoch": 0.4185789328172519, + "grad_norm": 1.0123053598289435, + "learning_rate": 1.307260015865039e-05, + "loss": 0.4413, + "step": 24224 + }, + { + "epoch": 0.41859621233066074, + "grad_norm": 1.1976206123266941, + "learning_rate": 1.3072067577016192e-05, + "loss": 0.3987, + "step": 24225 + }, + { + "epoch": 0.41861349184406965, + "grad_norm": 1.2252842453060546, + "learning_rate": 1.3071534985760042e-05, + "loss": 0.4911, + "step": 24226 + }, + { + "epoch": 0.41863077135747856, + "grad_norm": 1.4235159631690602, + "learning_rate": 1.3071002384883614e-05, + "loss": 0.5124, + "step": 24227 + }, + { + "epoch": 0.41864805087088747, + "grad_norm": 1.1724618089880157, + "learning_rate": 1.3070469774388572e-05, + "loss": 0.4142, + "step": 24228 + }, + { + "epoch": 0.4186653303842964, + "grad_norm": 0.9398099566636267, + "learning_rate": 1.3069937154276582e-05, + "loss": 0.615, + "step": 24229 + }, + { + "epoch": 0.4186826098977053, + "grad_norm": 0.7865829162090788, + "learning_rate": 1.3069404524549318e-05, + "loss": 0.4014, + "step": 24230 + }, + { + "epoch": 0.4186998894111142, + "grad_norm": 0.9165382886316337, + "learning_rate": 1.3068871885208448e-05, + "loss": 0.3505, + "step": 24231 + }, + { + "epoch": 0.4187171689245231, + "grad_norm": 0.6367608623389001, + "learning_rate": 1.3068339236255634e-05, + "loss": 0.4976, + "step": 24232 + }, + { + "epoch": 0.418734448437932, + "grad_norm": 0.6809858735611447, + "learning_rate": 1.306780657769255e-05, + "loss": 0.3783, + "step": 24233 + }, + { + "epoch": 0.4187517279513409, + "grad_norm": 1.0853771737827942, + "learning_rate": 1.3067273909520865e-05, + "loss": 0.5026, + "step": 24234 + }, + { + "epoch": 0.41876900746474977, + "grad_norm": 1.656065802938019, + "learning_rate": 1.3066741231742243e-05, + "loss": 0.3956, + "step": 24235 + }, + { + "epoch": 0.4187862869781587, + "grad_norm": 0.5076996991053203, + "learning_rate": 1.3066208544358357e-05, + "loss": 0.61, + "step": 24236 + }, + { + "epoch": 0.4188035664915676, + "grad_norm": 0.9067384929676356, + "learning_rate": 1.3065675847370869e-05, + "loss": 0.5547, + "step": 24237 + }, + { + "epoch": 0.4188208460049765, + "grad_norm": 1.1948446875042404, + "learning_rate": 1.306514314078145e-05, + "loss": 0.4857, + "step": 24238 + }, + { + "epoch": 0.4188381255183854, + "grad_norm": 0.7897761224668, + "learning_rate": 1.3064610424591774e-05, + "loss": 0.4769, + "step": 24239 + }, + { + "epoch": 0.4188554050317943, + "grad_norm": 1.1484686508076638, + "learning_rate": 1.3064077698803504e-05, + "loss": 0.3609, + "step": 24240 + }, + { + "epoch": 0.4188726845452032, + "grad_norm": 1.1512763639099757, + "learning_rate": 1.3063544963418308e-05, + "loss": 0.3191, + "step": 24241 + }, + { + "epoch": 0.4188899640586121, + "grad_norm": 0.8983182047874553, + "learning_rate": 1.3063012218437859e-05, + "loss": 0.5157, + "step": 24242 + }, + { + "epoch": 0.418907243572021, + "grad_norm": 0.461439845768874, + "learning_rate": 1.3062479463863822e-05, + "loss": 0.8023, + "step": 24243 + }, + { + "epoch": 0.41892452308542993, + "grad_norm": 0.6857651992992563, + "learning_rate": 1.3061946699697865e-05, + "loss": 0.2926, + "step": 24244 + }, + { + "epoch": 0.41894180259883884, + "grad_norm": 1.4553494365682138, + "learning_rate": 1.3061413925941661e-05, + "loss": 0.2972, + "step": 24245 + }, + { + "epoch": 0.4189590821122477, + "grad_norm": 1.0517527236697253, + "learning_rate": 1.3060881142596872e-05, + "loss": 0.4253, + "step": 24246 + }, + { + "epoch": 0.4189763616256566, + "grad_norm": 1.3482348203389345, + "learning_rate": 1.3060348349665176e-05, + "loss": 0.4265, + "step": 24247 + }, + { + "epoch": 0.4189936411390655, + "grad_norm": 1.0144002356083983, + "learning_rate": 1.3059815547148231e-05, + "loss": 0.469, + "step": 24248 + }, + { + "epoch": 0.4190109206524744, + "grad_norm": 0.44511759390808453, + "learning_rate": 1.3059282735047714e-05, + "loss": 0.5027, + "step": 24249 + }, + { + "epoch": 0.4190282001658833, + "grad_norm": 1.0300509936980766, + "learning_rate": 1.3058749913365292e-05, + "loss": 0.3489, + "step": 24250 + }, + { + "epoch": 0.41904547967929223, + "grad_norm": 0.9088202967645159, + "learning_rate": 1.305821708210263e-05, + "loss": 0.5903, + "step": 24251 + }, + { + "epoch": 0.41906275919270114, + "grad_norm": 1.7427450685429553, + "learning_rate": 1.30576842412614e-05, + "loss": 0.4598, + "step": 24252 + }, + { + "epoch": 0.41908003870611005, + "grad_norm": 1.0545224419970913, + "learning_rate": 1.3057151390843272e-05, + "loss": 0.3074, + "step": 24253 + }, + { + "epoch": 0.41909731821951896, + "grad_norm": 1.7440498034003042, + "learning_rate": 1.3056618530849914e-05, + "loss": 0.4449, + "step": 24254 + }, + { + "epoch": 0.41911459773292786, + "grad_norm": 1.2050201827793683, + "learning_rate": 1.3056085661282993e-05, + "loss": 0.4632, + "step": 24255 + }, + { + "epoch": 0.41913187724633677, + "grad_norm": 1.5195094909525144, + "learning_rate": 1.3055552782144184e-05, + "loss": 0.4648, + "step": 24256 + }, + { + "epoch": 0.4191491567597456, + "grad_norm": 0.9278541375264133, + "learning_rate": 1.3055019893435145e-05, + "loss": 0.3794, + "step": 24257 + }, + { + "epoch": 0.41916643627315453, + "grad_norm": 0.49572111891411486, + "learning_rate": 1.3054486995157553e-05, + "loss": 0.5254, + "step": 24258 + }, + { + "epoch": 0.41918371578656344, + "grad_norm": 0.7954419463928919, + "learning_rate": 1.305395408731308e-05, + "loss": 0.278, + "step": 24259 + }, + { + "epoch": 0.41920099529997235, + "grad_norm": 0.9271006910883444, + "learning_rate": 1.3053421169903386e-05, + "loss": 0.5338, + "step": 24260 + }, + { + "epoch": 0.41921827481338125, + "grad_norm": 1.0174422466464383, + "learning_rate": 1.3052888242930144e-05, + "loss": 0.2806, + "step": 24261 + }, + { + "epoch": 0.41923555432679016, + "grad_norm": 0.7354408748497032, + "learning_rate": 1.3052355306395028e-05, + "loss": 0.3593, + "step": 24262 + }, + { + "epoch": 0.41925283384019907, + "grad_norm": 0.8522995671336479, + "learning_rate": 1.3051822360299702e-05, + "loss": 0.3055, + "step": 24263 + }, + { + "epoch": 0.419270113353608, + "grad_norm": 0.9212877539378973, + "learning_rate": 1.3051289404645833e-05, + "loss": 0.4339, + "step": 24264 + }, + { + "epoch": 0.4192873928670169, + "grad_norm": 0.8590112878778685, + "learning_rate": 1.30507564394351e-05, + "loss": 0.3777, + "step": 24265 + }, + { + "epoch": 0.4193046723804258, + "grad_norm": 0.6832767855697819, + "learning_rate": 1.305022346466916e-05, + "loss": 0.4326, + "step": 24266 + }, + { + "epoch": 0.41932195189383464, + "grad_norm": 0.7798914472574558, + "learning_rate": 1.3049690480349693e-05, + "loss": 0.3928, + "step": 24267 + }, + { + "epoch": 0.41933923140724355, + "grad_norm": 0.7550675721363929, + "learning_rate": 1.3049157486478362e-05, + "loss": 0.3584, + "step": 24268 + }, + { + "epoch": 0.41935651092065246, + "grad_norm": 1.374967290373557, + "learning_rate": 1.3048624483056835e-05, + "loss": 0.3067, + "step": 24269 + }, + { + "epoch": 0.41937379043406137, + "grad_norm": 1.5398832917383065, + "learning_rate": 1.3048091470086787e-05, + "loss": 0.5022, + "step": 24270 + }, + { + "epoch": 0.4193910699474703, + "grad_norm": 0.46470842997629286, + "learning_rate": 1.3047558447569885e-05, + "loss": 0.6207, + "step": 24271 + }, + { + "epoch": 0.4194083494608792, + "grad_norm": 1.1477529027443505, + "learning_rate": 1.3047025415507796e-05, + "loss": 0.5951, + "step": 24272 + }, + { + "epoch": 0.4194256289742881, + "grad_norm": 0.9218800078986751, + "learning_rate": 1.3046492373902196e-05, + "loss": 0.4834, + "step": 24273 + }, + { + "epoch": 0.419442908487697, + "grad_norm": 0.9850279947044439, + "learning_rate": 1.3045959322754748e-05, + "loss": 0.4593, + "step": 24274 + }, + { + "epoch": 0.4194601880011059, + "grad_norm": 1.5512992836395882, + "learning_rate": 1.3045426262067125e-05, + "loss": 0.3316, + "step": 24275 + }, + { + "epoch": 0.4194774675145148, + "grad_norm": 1.0738072760575843, + "learning_rate": 1.3044893191840994e-05, + "loss": 0.5807, + "step": 24276 + }, + { + "epoch": 0.4194947470279237, + "grad_norm": 1.0930353950614455, + "learning_rate": 1.304436011207803e-05, + "loss": 0.4071, + "step": 24277 + }, + { + "epoch": 0.41951202654133257, + "grad_norm": 1.0123472981824135, + "learning_rate": 1.3043827022779893e-05, + "loss": 0.4533, + "step": 24278 + }, + { + "epoch": 0.4195293060547415, + "grad_norm": 1.4005689927683727, + "learning_rate": 1.3043293923948262e-05, + "loss": 0.451, + "step": 24279 + }, + { + "epoch": 0.4195465855681504, + "grad_norm": 1.0730131748529514, + "learning_rate": 1.3042760815584803e-05, + "loss": 0.5012, + "step": 24280 + }, + { + "epoch": 0.4195638650815593, + "grad_norm": 0.6993198431267651, + "learning_rate": 1.3042227697691183e-05, + "loss": 0.4646, + "step": 24281 + }, + { + "epoch": 0.4195811445949682, + "grad_norm": 1.1420500168949106, + "learning_rate": 1.3041694570269078e-05, + "loss": 0.5487, + "step": 24282 + }, + { + "epoch": 0.4195984241083771, + "grad_norm": 0.7051574410869863, + "learning_rate": 1.3041161433320153e-05, + "loss": 0.5113, + "step": 24283 + }, + { + "epoch": 0.419615703621786, + "grad_norm": 1.1885958360825617, + "learning_rate": 1.3040628286846078e-05, + "loss": 0.411, + "step": 24284 + }, + { + "epoch": 0.4196329831351949, + "grad_norm": 1.0335742102053274, + "learning_rate": 1.3040095130848529e-05, + "loss": 0.5218, + "step": 24285 + }, + { + "epoch": 0.41965026264860383, + "grad_norm": 0.7915174625535504, + "learning_rate": 1.3039561965329167e-05, + "loss": 0.3682, + "step": 24286 + }, + { + "epoch": 0.41966754216201274, + "grad_norm": 1.8484433970119116, + "learning_rate": 1.3039028790289665e-05, + "loss": 0.6671, + "step": 24287 + }, + { + "epoch": 0.4196848216754216, + "grad_norm": 1.363371855602728, + "learning_rate": 1.3038495605731697e-05, + "loss": 0.5184, + "step": 24288 + }, + { + "epoch": 0.4197021011888305, + "grad_norm": 0.6254748302280273, + "learning_rate": 1.3037962411656925e-05, + "loss": 0.4187, + "step": 24289 + }, + { + "epoch": 0.4197193807022394, + "grad_norm": 0.9022109455834992, + "learning_rate": 1.3037429208067029e-05, + "loss": 0.2964, + "step": 24290 + }, + { + "epoch": 0.4197366602156483, + "grad_norm": 2.149317574441933, + "learning_rate": 1.3036895994963672e-05, + "loss": 0.2819, + "step": 24291 + }, + { + "epoch": 0.4197539397290572, + "grad_norm": 1.5512018254797328, + "learning_rate": 1.3036362772348525e-05, + "loss": 0.5319, + "step": 24292 + }, + { + "epoch": 0.41977121924246613, + "grad_norm": 0.9737183901168649, + "learning_rate": 1.303582954022326e-05, + "loss": 0.5902, + "step": 24293 + }, + { + "epoch": 0.41978849875587504, + "grad_norm": 0.9814588982396102, + "learning_rate": 1.3035296298589549e-05, + "loss": 0.5221, + "step": 24294 + }, + { + "epoch": 0.41980577826928395, + "grad_norm": 0.7700207827247759, + "learning_rate": 1.3034763047449052e-05, + "loss": 0.4799, + "step": 24295 + }, + { + "epoch": 0.41982305778269285, + "grad_norm": 0.5975622894277383, + "learning_rate": 1.3034229786803456e-05, + "loss": 0.2522, + "step": 24296 + }, + { + "epoch": 0.41984033729610176, + "grad_norm": 1.3427917126743076, + "learning_rate": 1.3033696516654417e-05, + "loss": 0.3952, + "step": 24297 + }, + { + "epoch": 0.41985761680951067, + "grad_norm": 0.9618629444112626, + "learning_rate": 1.3033163237003608e-05, + "loss": 0.4136, + "step": 24298 + }, + { + "epoch": 0.4198748963229195, + "grad_norm": 0.7011140991170506, + "learning_rate": 1.3032629947852705e-05, + "loss": 0.2511, + "step": 24299 + }, + { + "epoch": 0.41989217583632843, + "grad_norm": 1.1570492316457537, + "learning_rate": 1.303209664920337e-05, + "loss": 0.4482, + "step": 24300 + }, + { + "epoch": 0.41990945534973734, + "grad_norm": 1.081814746960274, + "learning_rate": 1.303156334105728e-05, + "loss": 0.4677, + "step": 24301 + }, + { + "epoch": 0.41992673486314624, + "grad_norm": 0.8758028170505835, + "learning_rate": 1.3031030023416102e-05, + "loss": 0.4861, + "step": 24302 + }, + { + "epoch": 0.41994401437655515, + "grad_norm": 1.0105627162548854, + "learning_rate": 1.3030496696281512e-05, + "loss": 0.4457, + "step": 24303 + }, + { + "epoch": 0.41996129388996406, + "grad_norm": 0.7898678817997181, + "learning_rate": 1.3029963359655171e-05, + "loss": 0.452, + "step": 24304 + }, + { + "epoch": 0.41997857340337297, + "grad_norm": 0.7715255721597489, + "learning_rate": 1.3029430013538757e-05, + "loss": 0.315, + "step": 24305 + }, + { + "epoch": 0.4199958529167819, + "grad_norm": 1.2856898998061859, + "learning_rate": 1.3028896657933938e-05, + "loss": 0.5298, + "step": 24306 + }, + { + "epoch": 0.4200131324301908, + "grad_norm": 1.298359347750371, + "learning_rate": 1.3028363292842386e-05, + "loss": 0.5558, + "step": 24307 + }, + { + "epoch": 0.4200304119435997, + "grad_norm": 0.5208476956919257, + "learning_rate": 1.3027829918265767e-05, + "loss": 0.6028, + "step": 24308 + }, + { + "epoch": 0.42004769145700854, + "grad_norm": 0.5366515338905955, + "learning_rate": 1.3027296534205755e-05, + "loss": 0.9008, + "step": 24309 + }, + { + "epoch": 0.42006497097041745, + "grad_norm": 0.7516982338482937, + "learning_rate": 1.302676314066402e-05, + "loss": 0.3485, + "step": 24310 + }, + { + "epoch": 0.42008225048382636, + "grad_norm": 0.8761637516261848, + "learning_rate": 1.3026229737642235e-05, + "loss": 0.4792, + "step": 24311 + }, + { + "epoch": 0.42009952999723527, + "grad_norm": 0.7779872127092607, + "learning_rate": 1.3025696325142066e-05, + "loss": 0.4806, + "step": 24312 + }, + { + "epoch": 0.4201168095106442, + "grad_norm": 1.1027683281535074, + "learning_rate": 1.3025162903165187e-05, + "loss": 0.5554, + "step": 24313 + }, + { + "epoch": 0.4201340890240531, + "grad_norm": 0.906240813602021, + "learning_rate": 1.302462947171327e-05, + "loss": 0.5539, + "step": 24314 + }, + { + "epoch": 0.420151368537462, + "grad_norm": 1.2612969859618837, + "learning_rate": 1.3024096030787981e-05, + "loss": 0.533, + "step": 24315 + }, + { + "epoch": 0.4201686480508709, + "grad_norm": 1.0112168322807458, + "learning_rate": 1.3023562580390997e-05, + "loss": 0.252, + "step": 24316 + }, + { + "epoch": 0.4201859275642798, + "grad_norm": 1.0642512697115, + "learning_rate": 1.3023029120523982e-05, + "loss": 0.2576, + "step": 24317 + }, + { + "epoch": 0.4202032070776887, + "grad_norm": 0.7991404972701693, + "learning_rate": 1.3022495651188609e-05, + "loss": 0.4839, + "step": 24318 + }, + { + "epoch": 0.4202204865910976, + "grad_norm": 1.1288176223724817, + "learning_rate": 1.302196217238655e-05, + "loss": 0.3954, + "step": 24319 + }, + { + "epoch": 0.42023776610450647, + "grad_norm": 0.4875744218222898, + "learning_rate": 1.3021428684119479e-05, + "loss": 0.8012, + "step": 24320 + }, + { + "epoch": 0.4202550456179154, + "grad_norm": 1.1475090597956537, + "learning_rate": 1.3020895186389062e-05, + "loss": 0.5629, + "step": 24321 + }, + { + "epoch": 0.4202723251313243, + "grad_norm": 0.5411004215970552, + "learning_rate": 1.3020361679196972e-05, + "loss": 0.5619, + "step": 24322 + }, + { + "epoch": 0.4202896046447332, + "grad_norm": 0.47760314479160065, + "learning_rate": 1.3019828162544879e-05, + "loss": 0.5311, + "step": 24323 + }, + { + "epoch": 0.4203068841581421, + "grad_norm": 2.0076819171657494, + "learning_rate": 1.3019294636434454e-05, + "loss": 0.6326, + "step": 24324 + }, + { + "epoch": 0.420324163671551, + "grad_norm": 0.9853914625245356, + "learning_rate": 1.3018761100867372e-05, + "loss": 0.3192, + "step": 24325 + }, + { + "epoch": 0.4203414431849599, + "grad_norm": 1.2906892316491108, + "learning_rate": 1.3018227555845298e-05, + "loss": 0.4389, + "step": 24326 + }, + { + "epoch": 0.4203587226983688, + "grad_norm": 0.833606020661745, + "learning_rate": 1.3017694001369907e-05, + "loss": 0.4618, + "step": 24327 + }, + { + "epoch": 0.42037600221177773, + "grad_norm": 0.9120725179732299, + "learning_rate": 1.3017160437442868e-05, + "loss": 0.4132, + "step": 24328 + }, + { + "epoch": 0.42039328172518664, + "grad_norm": 1.6794620467800412, + "learning_rate": 1.3016626864065853e-05, + "loss": 0.4451, + "step": 24329 + }, + { + "epoch": 0.42041056123859555, + "grad_norm": 0.8894341333274833, + "learning_rate": 1.3016093281240534e-05, + "loss": 0.3832, + "step": 24330 + }, + { + "epoch": 0.4204278407520044, + "grad_norm": 0.8474497550049211, + "learning_rate": 1.3015559688968584e-05, + "loss": 0.4145, + "step": 24331 + }, + { + "epoch": 0.4204451202654133, + "grad_norm": 0.7547332163747471, + "learning_rate": 1.3015026087251667e-05, + "loss": 0.4816, + "step": 24332 + }, + { + "epoch": 0.4204623997788222, + "grad_norm": 0.7468670689592758, + "learning_rate": 1.3014492476091463e-05, + "loss": 0.3264, + "step": 24333 + }, + { + "epoch": 0.4204796792922311, + "grad_norm": 1.2496791584430462, + "learning_rate": 1.3013958855489639e-05, + "loss": 0.5921, + "step": 24334 + }, + { + "epoch": 0.42049695880564003, + "grad_norm": 1.0028558486097403, + "learning_rate": 1.3013425225447865e-05, + "loss": 0.5333, + "step": 24335 + }, + { + "epoch": 0.42051423831904894, + "grad_norm": 0.43302800501179317, + "learning_rate": 1.3012891585967815e-05, + "loss": 0.68, + "step": 24336 + }, + { + "epoch": 0.42053151783245785, + "grad_norm": 0.5734901885980148, + "learning_rate": 1.3012357937051164e-05, + "loss": 0.2851, + "step": 24337 + }, + { + "epoch": 0.42054879734586675, + "grad_norm": 0.8871502235335456, + "learning_rate": 1.3011824278699571e-05, + "loss": 0.3782, + "step": 24338 + }, + { + "epoch": 0.42056607685927566, + "grad_norm": 0.7240643838322325, + "learning_rate": 1.3011290610914722e-05, + "loss": 0.2766, + "step": 24339 + }, + { + "epoch": 0.42058335637268457, + "grad_norm": 0.7589787209913084, + "learning_rate": 1.3010756933698279e-05, + "loss": 0.7868, + "step": 24340 + }, + { + "epoch": 0.4206006358860934, + "grad_norm": 1.0570841396891704, + "learning_rate": 1.3010223247051917e-05, + "loss": 0.4884, + "step": 24341 + }, + { + "epoch": 0.42061791539950233, + "grad_norm": 0.6625007129993037, + "learning_rate": 1.3009689550977307e-05, + "loss": 0.3978, + "step": 24342 + }, + { + "epoch": 0.42063519491291124, + "grad_norm": 0.922640803758768, + "learning_rate": 1.300915584547612e-05, + "loss": 0.5051, + "step": 24343 + }, + { + "epoch": 0.42065247442632014, + "grad_norm": 0.9117576096662862, + "learning_rate": 1.3008622130550028e-05, + "loss": 0.3468, + "step": 24344 + }, + { + "epoch": 0.42066975393972905, + "grad_norm": 1.0074557804056645, + "learning_rate": 1.3008088406200705e-05, + "loss": 0.4761, + "step": 24345 + }, + { + "epoch": 0.42068703345313796, + "grad_norm": 0.8922479750569937, + "learning_rate": 1.3007554672429819e-05, + "loss": 0.3782, + "step": 24346 + }, + { + "epoch": 0.42070431296654687, + "grad_norm": 1.01717311429888, + "learning_rate": 1.3007020929239042e-05, + "loss": 0.4609, + "step": 24347 + }, + { + "epoch": 0.4207215924799558, + "grad_norm": 1.2104724089747405, + "learning_rate": 1.300648717663005e-05, + "loss": 0.4566, + "step": 24348 + }, + { + "epoch": 0.4207388719933647, + "grad_norm": 0.9017223549088176, + "learning_rate": 1.3005953414604508e-05, + "loss": 0.5102, + "step": 24349 + }, + { + "epoch": 0.4207561515067736, + "grad_norm": 0.6859096480224434, + "learning_rate": 1.3005419643164095e-05, + "loss": 0.3827, + "step": 24350 + }, + { + "epoch": 0.4207734310201825, + "grad_norm": 0.5561609340244857, + "learning_rate": 1.3004885862310476e-05, + "loss": 0.2052, + "step": 24351 + }, + { + "epoch": 0.42079071053359135, + "grad_norm": 0.8602808634373451, + "learning_rate": 1.3004352072045328e-05, + "loss": 0.2598, + "step": 24352 + }, + { + "epoch": 0.42080799004700026, + "grad_norm": 0.828916553903079, + "learning_rate": 1.300381827237032e-05, + "loss": 0.641, + "step": 24353 + }, + { + "epoch": 0.42082526956040917, + "grad_norm": 1.0840706104735454, + "learning_rate": 1.3003284463287126e-05, + "loss": 0.3174, + "step": 24354 + }, + { + "epoch": 0.4208425490738181, + "grad_norm": 0.7761591947551204, + "learning_rate": 1.3002750644797416e-05, + "loss": 0.7136, + "step": 24355 + }, + { + "epoch": 0.420859828587227, + "grad_norm": 0.8236459353755103, + "learning_rate": 1.3002216816902863e-05, + "loss": 0.4752, + "step": 24356 + }, + { + "epoch": 0.4208771081006359, + "grad_norm": 1.5195753793935902, + "learning_rate": 1.3001682979605142e-05, + "loss": 0.5519, + "step": 24357 + }, + { + "epoch": 0.4208943876140448, + "grad_norm": 0.9272023843487924, + "learning_rate": 1.3001149132905918e-05, + "loss": 0.5731, + "step": 24358 + }, + { + "epoch": 0.4209116671274537, + "grad_norm": 0.7984226896448101, + "learning_rate": 1.3000615276806867e-05, + "loss": 0.415, + "step": 24359 + }, + { + "epoch": 0.4209289466408626, + "grad_norm": 1.1598859513498072, + "learning_rate": 1.3000081411309662e-05, + "loss": 0.6841, + "step": 24360 + }, + { + "epoch": 0.4209462261542715, + "grad_norm": 1.2392046736176912, + "learning_rate": 1.2999547536415972e-05, + "loss": 0.6066, + "step": 24361 + }, + { + "epoch": 0.42096350566768037, + "grad_norm": 1.1440820432083125, + "learning_rate": 1.2999013652127474e-05, + "loss": 0.4808, + "step": 24362 + }, + { + "epoch": 0.4209807851810893, + "grad_norm": 0.9698563672761715, + "learning_rate": 1.2998479758445837e-05, + "loss": 0.4109, + "step": 24363 + }, + { + "epoch": 0.4209980646944982, + "grad_norm": 0.6621082208506543, + "learning_rate": 1.2997945855372728e-05, + "loss": 0.4953, + "step": 24364 + }, + { + "epoch": 0.4210153442079071, + "grad_norm": 1.0987336896617597, + "learning_rate": 1.2997411942909832e-05, + "loss": 0.3994, + "step": 24365 + }, + { + "epoch": 0.421032623721316, + "grad_norm": 1.1142156697584744, + "learning_rate": 1.2996878021058812e-05, + "loss": 0.4329, + "step": 24366 + }, + { + "epoch": 0.4210499032347249, + "grad_norm": 1.2568611764319486, + "learning_rate": 1.299634408982134e-05, + "loss": 0.5678, + "step": 24367 + }, + { + "epoch": 0.4210671827481338, + "grad_norm": 1.0824105477787473, + "learning_rate": 1.2995810149199093e-05, + "loss": 0.3342, + "step": 24368 + }, + { + "epoch": 0.4210844622615427, + "grad_norm": 0.8764116566523008, + "learning_rate": 1.299527619919374e-05, + "loss": 0.3556, + "step": 24369 + }, + { + "epoch": 0.42110174177495163, + "grad_norm": 1.4047556631847824, + "learning_rate": 1.2994742239806955e-05, + "loss": 0.4402, + "step": 24370 + }, + { + "epoch": 0.42111902128836054, + "grad_norm": 0.8382224435224146, + "learning_rate": 1.299420827104041e-05, + "loss": 0.5496, + "step": 24371 + }, + { + "epoch": 0.42113630080176945, + "grad_norm": 0.9666698620945138, + "learning_rate": 1.2993674292895775e-05, + "loss": 0.666, + "step": 24372 + }, + { + "epoch": 0.4211535803151783, + "grad_norm": 1.0855988787957223, + "learning_rate": 1.2993140305374726e-05, + "loss": 0.4239, + "step": 24373 + }, + { + "epoch": 0.4211708598285872, + "grad_norm": 1.005892720135762, + "learning_rate": 1.2992606308478935e-05, + "loss": 0.5383, + "step": 24374 + }, + { + "epoch": 0.4211881393419961, + "grad_norm": 1.1001969459303236, + "learning_rate": 1.299207230221007e-05, + "loss": 0.4259, + "step": 24375 + }, + { + "epoch": 0.421205418855405, + "grad_norm": 1.2773057450759866, + "learning_rate": 1.299153828656981e-05, + "loss": 0.332, + "step": 24376 + }, + { + "epoch": 0.42122269836881393, + "grad_norm": 0.7725714093929104, + "learning_rate": 1.2991004261559826e-05, + "loss": 0.369, + "step": 24377 + }, + { + "epoch": 0.42123997788222284, + "grad_norm": 0.8796177974431747, + "learning_rate": 1.2990470227181789e-05, + "loss": 0.5565, + "step": 24378 + }, + { + "epoch": 0.42125725739563175, + "grad_norm": 1.258969773935511, + "learning_rate": 1.2989936183437371e-05, + "loss": 0.4821, + "step": 24379 + }, + { + "epoch": 0.42127453690904065, + "grad_norm": 1.4425253819603863, + "learning_rate": 1.2989402130328243e-05, + "loss": 0.51, + "step": 24380 + }, + { + "epoch": 0.42129181642244956, + "grad_norm": 0.9490744451760614, + "learning_rate": 1.2988868067856082e-05, + "loss": 0.3106, + "step": 24381 + }, + { + "epoch": 0.42130909593585847, + "grad_norm": 0.7519706150922257, + "learning_rate": 1.2988333996022562e-05, + "loss": 0.438, + "step": 24382 + }, + { + "epoch": 0.4213263754492674, + "grad_norm": 0.7414217206622905, + "learning_rate": 1.2987799914829351e-05, + "loss": 0.4406, + "step": 24383 + }, + { + "epoch": 0.42134365496267623, + "grad_norm": 1.0854906491173997, + "learning_rate": 1.2987265824278121e-05, + "loss": 0.3899, + "step": 24384 + }, + { + "epoch": 0.42136093447608514, + "grad_norm": 0.6857833235326812, + "learning_rate": 1.2986731724370549e-05, + "loss": 0.547, + "step": 24385 + }, + { + "epoch": 0.42137821398949404, + "grad_norm": 1.0080855099877701, + "learning_rate": 1.2986197615108309e-05, + "loss": 0.5234, + "step": 24386 + }, + { + "epoch": 0.42139549350290295, + "grad_norm": 0.9891685402197781, + "learning_rate": 1.2985663496493067e-05, + "loss": 0.4489, + "step": 24387 + }, + { + "epoch": 0.42141277301631186, + "grad_norm": 0.9096440028095619, + "learning_rate": 1.2985129368526505e-05, + "loss": 0.5784, + "step": 24388 + }, + { + "epoch": 0.42143005252972077, + "grad_norm": 0.9126305349316859, + "learning_rate": 1.2984595231210286e-05, + "loss": 0.2696, + "step": 24389 + }, + { + "epoch": 0.4214473320431297, + "grad_norm": 0.645129292528507, + "learning_rate": 1.2984061084546092e-05, + "loss": 0.3652, + "step": 24390 + }, + { + "epoch": 0.4214646115565386, + "grad_norm": 0.5895654431142912, + "learning_rate": 1.298352692853559e-05, + "loss": 0.6709, + "step": 24391 + }, + { + "epoch": 0.4214818910699475, + "grad_norm": 1.2014140581130777, + "learning_rate": 1.2982992763180454e-05, + "loss": 0.5301, + "step": 24392 + }, + { + "epoch": 0.4214991705833564, + "grad_norm": 0.7538429007437945, + "learning_rate": 1.2982458588482359e-05, + "loss": 0.4979, + "step": 24393 + }, + { + "epoch": 0.42151645009676525, + "grad_norm": 1.0400332009652182, + "learning_rate": 1.2981924404442976e-05, + "loss": 0.5376, + "step": 24394 + }, + { + "epoch": 0.42153372961017416, + "grad_norm": 0.8310150508081975, + "learning_rate": 1.2981390211063978e-05, + "loss": 0.5484, + "step": 24395 + }, + { + "epoch": 0.42155100912358306, + "grad_norm": 1.1107820175955798, + "learning_rate": 1.2980856008347043e-05, + "loss": 0.546, + "step": 24396 + }, + { + "epoch": 0.421568288636992, + "grad_norm": 1.1070184878028262, + "learning_rate": 1.2980321796293838e-05, + "loss": 0.3048, + "step": 24397 + }, + { + "epoch": 0.4215855681504009, + "grad_norm": 0.7428482115712762, + "learning_rate": 1.297978757490604e-05, + "loss": 0.4163, + "step": 24398 + }, + { + "epoch": 0.4216028476638098, + "grad_norm": 0.5384888477683889, + "learning_rate": 1.297925334418532e-05, + "loss": 0.7159, + "step": 24399 + }, + { + "epoch": 0.4216201271772187, + "grad_norm": 0.6649229864439268, + "learning_rate": 1.2978719104133353e-05, + "loss": 0.3425, + "step": 24400 + }, + { + "epoch": 0.4216374066906276, + "grad_norm": 1.15991705219241, + "learning_rate": 1.297818485475181e-05, + "loss": 0.6267, + "step": 24401 + }, + { + "epoch": 0.4216546862040365, + "grad_norm": 0.9116024002348737, + "learning_rate": 1.2977650596042366e-05, + "loss": 0.6368, + "step": 24402 + }, + { + "epoch": 0.4216719657174454, + "grad_norm": 1.6218745656419598, + "learning_rate": 1.2977116328006696e-05, + "loss": 0.6106, + "step": 24403 + }, + { + "epoch": 0.4216892452308543, + "grad_norm": 0.832121520128378, + "learning_rate": 1.297658205064647e-05, + "loss": 0.5093, + "step": 24404 + }, + { + "epoch": 0.4217065247442632, + "grad_norm": 1.2802114583553275, + "learning_rate": 1.297604776396336e-05, + "loss": 0.496, + "step": 24405 + }, + { + "epoch": 0.4217238042576721, + "grad_norm": 0.982242472942106, + "learning_rate": 1.2975513467959047e-05, + "loss": 0.5013, + "step": 24406 + }, + { + "epoch": 0.421741083771081, + "grad_norm": 2.3601609167695545, + "learning_rate": 1.2974979162635196e-05, + "loss": 0.6886, + "step": 24407 + }, + { + "epoch": 0.4217583632844899, + "grad_norm": 1.0874148759836828, + "learning_rate": 1.2974444847993485e-05, + "loss": 0.4306, + "step": 24408 + }, + { + "epoch": 0.4217756427978988, + "grad_norm": 2.059169237938283, + "learning_rate": 1.2973910524035588e-05, + "loss": 0.704, + "step": 24409 + }, + { + "epoch": 0.4217929223113077, + "grad_norm": 1.0830127281022353, + "learning_rate": 1.2973376190763178e-05, + "loss": 0.6386, + "step": 24410 + }, + { + "epoch": 0.4218102018247166, + "grad_norm": 1.290176627989424, + "learning_rate": 1.2972841848177927e-05, + "loss": 0.4742, + "step": 24411 + }, + { + "epoch": 0.42182748133812553, + "grad_norm": 1.085023073713799, + "learning_rate": 1.2972307496281507e-05, + "loss": 0.482, + "step": 24412 + }, + { + "epoch": 0.42184476085153444, + "grad_norm": 1.6804115226495995, + "learning_rate": 1.2971773135075597e-05, + "loss": 0.3807, + "step": 24413 + }, + { + "epoch": 0.42186204036494335, + "grad_norm": 1.0144278817620607, + "learning_rate": 1.2971238764561867e-05, + "loss": 0.5184, + "step": 24414 + }, + { + "epoch": 0.4218793198783522, + "grad_norm": 0.9327660597835344, + "learning_rate": 1.297070438474199e-05, + "loss": 0.3468, + "step": 24415 + }, + { + "epoch": 0.4218965993917611, + "grad_norm": 0.8314218737196187, + "learning_rate": 1.297016999561764e-05, + "loss": 0.4055, + "step": 24416 + }, + { + "epoch": 0.42191387890517, + "grad_norm": 1.3196769583560097, + "learning_rate": 1.2969635597190496e-05, + "loss": 0.5128, + "step": 24417 + }, + { + "epoch": 0.4219311584185789, + "grad_norm": 0.552220063066859, + "learning_rate": 1.2969101189462223e-05, + "loss": 0.5939, + "step": 24418 + }, + { + "epoch": 0.42194843793198783, + "grad_norm": 1.7355430292618568, + "learning_rate": 1.2968566772434505e-05, + "loss": 0.5188, + "step": 24419 + }, + { + "epoch": 0.42196571744539674, + "grad_norm": 0.7645949686823861, + "learning_rate": 1.2968032346109007e-05, + "loss": 0.4711, + "step": 24420 + }, + { + "epoch": 0.42198299695880565, + "grad_norm": 0.40565743589612513, + "learning_rate": 1.2967497910487405e-05, + "loss": 0.5677, + "step": 24421 + }, + { + "epoch": 0.42200027647221455, + "grad_norm": 0.9650812629552619, + "learning_rate": 1.2966963465571377e-05, + "loss": 0.4191, + "step": 24422 + }, + { + "epoch": 0.42201755598562346, + "grad_norm": 0.9008112212272651, + "learning_rate": 1.2966429011362594e-05, + "loss": 0.3947, + "step": 24423 + }, + { + "epoch": 0.42203483549903237, + "grad_norm": 1.1563436261802147, + "learning_rate": 1.2965894547862726e-05, + "loss": 0.4632, + "step": 24424 + }, + { + "epoch": 0.4220521150124413, + "grad_norm": 1.1163866440506236, + "learning_rate": 1.2965360075073452e-05, + "loss": 0.3531, + "step": 24425 + }, + { + "epoch": 0.42206939452585013, + "grad_norm": 0.9762463493025088, + "learning_rate": 1.2964825592996447e-05, + "loss": 0.4379, + "step": 24426 + }, + { + "epoch": 0.42208667403925904, + "grad_norm": 0.7997597839502814, + "learning_rate": 1.296429110163338e-05, + "loss": 0.3854, + "step": 24427 + }, + { + "epoch": 0.42210395355266794, + "grad_norm": 1.4948488511271372, + "learning_rate": 1.296375660098593e-05, + "loss": 0.46, + "step": 24428 + }, + { + "epoch": 0.42212123306607685, + "grad_norm": 1.076561635908448, + "learning_rate": 1.2963222091055769e-05, + "loss": 0.3634, + "step": 24429 + }, + { + "epoch": 0.42213851257948576, + "grad_norm": 0.9539878906064444, + "learning_rate": 1.2962687571844575e-05, + "loss": 0.4726, + "step": 24430 + }, + { + "epoch": 0.42215579209289467, + "grad_norm": 1.2775296173242747, + "learning_rate": 1.2962153043354013e-05, + "loss": 0.4698, + "step": 24431 + }, + { + "epoch": 0.4221730716063036, + "grad_norm": 0.6582088340851736, + "learning_rate": 1.2961618505585762e-05, + "loss": 0.2384, + "step": 24432 + }, + { + "epoch": 0.4221903511197125, + "grad_norm": 0.8511376010610305, + "learning_rate": 1.2961083958541501e-05, + "loss": 0.4354, + "step": 24433 + }, + { + "epoch": 0.4222076306331214, + "grad_norm": 0.45185130820333474, + "learning_rate": 1.2960549402222897e-05, + "loss": 0.8444, + "step": 24434 + }, + { + "epoch": 0.4222249101465303, + "grad_norm": 1.0941742849813654, + "learning_rate": 1.2960014836631626e-05, + "loss": 0.4624, + "step": 24435 + }, + { + "epoch": 0.42224218965993915, + "grad_norm": 0.8650970822751752, + "learning_rate": 1.2959480261769368e-05, + "loss": 0.4636, + "step": 24436 + }, + { + "epoch": 0.42225946917334806, + "grad_norm": 1.0142997173509922, + "learning_rate": 1.2958945677637791e-05, + "loss": 0.355, + "step": 24437 + }, + { + "epoch": 0.42227674868675696, + "grad_norm": 1.336140912325757, + "learning_rate": 1.295841108423857e-05, + "loss": 0.5326, + "step": 24438 + }, + { + "epoch": 0.42229402820016587, + "grad_norm": 0.5577229491933352, + "learning_rate": 1.2957876481573382e-05, + "loss": 0.5769, + "step": 24439 + }, + { + "epoch": 0.4223113077135748, + "grad_norm": 0.7865952541019201, + "learning_rate": 1.2957341869643903e-05, + "loss": 0.6672, + "step": 24440 + }, + { + "epoch": 0.4223285872269837, + "grad_norm": 2.2936559170389907, + "learning_rate": 1.2956807248451799e-05, + "loss": 0.5763, + "step": 24441 + }, + { + "epoch": 0.4223458667403926, + "grad_norm": 1.3289787429577977, + "learning_rate": 1.2956272617998754e-05, + "loss": 0.3672, + "step": 24442 + }, + { + "epoch": 0.4223631462538015, + "grad_norm": 0.7330183114581035, + "learning_rate": 1.2955737978286435e-05, + "loss": 0.3998, + "step": 24443 + }, + { + "epoch": 0.4223804257672104, + "grad_norm": 1.0392821326054011, + "learning_rate": 1.2955203329316519e-05, + "loss": 0.3618, + "step": 24444 + }, + { + "epoch": 0.4223977052806193, + "grad_norm": 1.3019400847717872, + "learning_rate": 1.2954668671090686e-05, + "loss": 0.5022, + "step": 24445 + }, + { + "epoch": 0.4224149847940282, + "grad_norm": 1.046455631868529, + "learning_rate": 1.2954134003610603e-05, + "loss": 0.367, + "step": 24446 + }, + { + "epoch": 0.4224322643074371, + "grad_norm": 0.6241086330509014, + "learning_rate": 1.295359932687795e-05, + "loss": 0.4429, + "step": 24447 + }, + { + "epoch": 0.422449543820846, + "grad_norm": 0.864412313850693, + "learning_rate": 1.29530646408944e-05, + "loss": 0.4444, + "step": 24448 + }, + { + "epoch": 0.4224668233342549, + "grad_norm": 0.7822255522961967, + "learning_rate": 1.2952529945661626e-05, + "loss": 0.3504, + "step": 24449 + }, + { + "epoch": 0.4224841028476638, + "grad_norm": 1.026785567695086, + "learning_rate": 1.2951995241181304e-05, + "loss": 0.49, + "step": 24450 + }, + { + "epoch": 0.4225013823610727, + "grad_norm": 0.7940252976632265, + "learning_rate": 1.2951460527455106e-05, + "loss": 0.3886, + "step": 24451 + }, + { + "epoch": 0.4225186618744816, + "grad_norm": 1.0543422529218, + "learning_rate": 1.295092580448471e-05, + "loss": 0.6577, + "step": 24452 + }, + { + "epoch": 0.4225359413878905, + "grad_norm": 0.9470619168285591, + "learning_rate": 1.2950391072271793e-05, + "loss": 0.4265, + "step": 24453 + }, + { + "epoch": 0.42255322090129943, + "grad_norm": 1.1140645122869755, + "learning_rate": 1.2949856330818027e-05, + "loss": 0.489, + "step": 24454 + }, + { + "epoch": 0.42257050041470834, + "grad_norm": 2.1321639372345373, + "learning_rate": 1.2949321580125082e-05, + "loss": 0.4511, + "step": 24455 + }, + { + "epoch": 0.42258777992811725, + "grad_norm": 0.981978408613253, + "learning_rate": 1.2948786820194643e-05, + "loss": 0.7655, + "step": 24456 + }, + { + "epoch": 0.42260505944152615, + "grad_norm": 1.3732087260146295, + "learning_rate": 1.2948252051028378e-05, + "loss": 0.5048, + "step": 24457 + }, + { + "epoch": 0.422622338954935, + "grad_norm": 0.47626346062631764, + "learning_rate": 1.2947717272627962e-05, + "loss": 0.2936, + "step": 24458 + }, + { + "epoch": 0.4226396184683439, + "grad_norm": 1.088828179617537, + "learning_rate": 1.2947182484995073e-05, + "loss": 0.3425, + "step": 24459 + }, + { + "epoch": 0.4226568979817528, + "grad_norm": 0.9709397268077703, + "learning_rate": 1.2946647688131387e-05, + "loss": 0.4869, + "step": 24460 + }, + { + "epoch": 0.42267417749516173, + "grad_norm": 1.1805303578581048, + "learning_rate": 1.294611288203857e-05, + "loss": 0.3828, + "step": 24461 + }, + { + "epoch": 0.42269145700857064, + "grad_norm": 3.0827656833224957, + "learning_rate": 1.294557806671831e-05, + "loss": 0.5494, + "step": 24462 + }, + { + "epoch": 0.42270873652197954, + "grad_norm": 0.7834272335504542, + "learning_rate": 1.2945043242172273e-05, + "loss": 0.5142, + "step": 24463 + }, + { + "epoch": 0.42272601603538845, + "grad_norm": 1.2138570456263889, + "learning_rate": 1.2944508408402134e-05, + "loss": 0.5794, + "step": 24464 + }, + { + "epoch": 0.42274329554879736, + "grad_norm": 1.5234188144269973, + "learning_rate": 1.2943973565409574e-05, + "loss": 0.4763, + "step": 24465 + }, + { + "epoch": 0.42276057506220627, + "grad_norm": 0.8745096095178958, + "learning_rate": 1.2943438713196266e-05, + "loss": 0.5088, + "step": 24466 + }, + { + "epoch": 0.4227778545756152, + "grad_norm": 0.7700582118577133, + "learning_rate": 1.294290385176388e-05, + "loss": 0.4606, + "step": 24467 + }, + { + "epoch": 0.422795134089024, + "grad_norm": 0.4620743325973945, + "learning_rate": 1.29423689811141e-05, + "loss": 0.8168, + "step": 24468 + }, + { + "epoch": 0.42281241360243293, + "grad_norm": 0.9966860241260798, + "learning_rate": 1.2941834101248594e-05, + "loss": 0.4615, + "step": 24469 + }, + { + "epoch": 0.42282969311584184, + "grad_norm": 1.0390562644768992, + "learning_rate": 1.2941299212169041e-05, + "loss": 0.3736, + "step": 24470 + }, + { + "epoch": 0.42284697262925075, + "grad_norm": 1.0874291018871918, + "learning_rate": 1.2940764313877116e-05, + "loss": 0.5056, + "step": 24471 + }, + { + "epoch": 0.42286425214265966, + "grad_norm": 0.8961274969968844, + "learning_rate": 1.2940229406374493e-05, + "loss": 0.4635, + "step": 24472 + }, + { + "epoch": 0.42288153165606857, + "grad_norm": 0.6902560916358147, + "learning_rate": 1.2939694489662848e-05, + "loss": 0.4442, + "step": 24473 + }, + { + "epoch": 0.4228988111694775, + "grad_norm": 0.7251379953533724, + "learning_rate": 1.2939159563743856e-05, + "loss": 0.4203, + "step": 24474 + }, + { + "epoch": 0.4229160906828864, + "grad_norm": 1.0849250332232523, + "learning_rate": 1.2938624628619194e-05, + "loss": 0.5267, + "step": 24475 + }, + { + "epoch": 0.4229333701962953, + "grad_norm": 1.1099178272625558, + "learning_rate": 1.2938089684290536e-05, + "loss": 0.5049, + "step": 24476 + }, + { + "epoch": 0.4229506497097042, + "grad_norm": 0.9807625224369427, + "learning_rate": 1.2937554730759558e-05, + "loss": 0.6641, + "step": 24477 + }, + { + "epoch": 0.4229679292231131, + "grad_norm": 1.3883204989414752, + "learning_rate": 1.2937019768027931e-05, + "loss": 0.4812, + "step": 24478 + }, + { + "epoch": 0.42298520873652196, + "grad_norm": 1.223013431316607, + "learning_rate": 1.293648479609734e-05, + "loss": 0.4897, + "step": 24479 + }, + { + "epoch": 0.42300248824993086, + "grad_norm": 0.8981574912269648, + "learning_rate": 1.2935949814969455e-05, + "loss": 0.4642, + "step": 24480 + }, + { + "epoch": 0.42301976776333977, + "grad_norm": 1.5184706734877362, + "learning_rate": 1.2935414824645953e-05, + "loss": 0.4105, + "step": 24481 + }, + { + "epoch": 0.4230370472767487, + "grad_norm": 1.0079285525739474, + "learning_rate": 1.2934879825128505e-05, + "loss": 0.3885, + "step": 24482 + }, + { + "epoch": 0.4230543267901576, + "grad_norm": 1.207163902281142, + "learning_rate": 1.2934344816418794e-05, + "loss": 0.4362, + "step": 24483 + }, + { + "epoch": 0.4230716063035665, + "grad_norm": 0.7408933472046156, + "learning_rate": 1.2933809798518489e-05, + "loss": 0.4335, + "step": 24484 + }, + { + "epoch": 0.4230888858169754, + "grad_norm": 1.5645907439834335, + "learning_rate": 1.2933274771429272e-05, + "loss": 0.2166, + "step": 24485 + }, + { + "epoch": 0.4231061653303843, + "grad_norm": 0.6934612695735795, + "learning_rate": 1.2932739735152813e-05, + "loss": 0.5422, + "step": 24486 + }, + { + "epoch": 0.4231234448437932, + "grad_norm": 0.9246848321621028, + "learning_rate": 1.2932204689690791e-05, + "loss": 0.4892, + "step": 24487 + }, + { + "epoch": 0.4231407243572021, + "grad_norm": 1.1305280223328789, + "learning_rate": 1.2931669635044879e-05, + "loss": 0.3799, + "step": 24488 + }, + { + "epoch": 0.423158003870611, + "grad_norm": 1.1934028173728926, + "learning_rate": 1.2931134571216756e-05, + "loss": 0.4671, + "step": 24489 + }, + { + "epoch": 0.4231752833840199, + "grad_norm": 1.4945540014591707, + "learning_rate": 1.2930599498208098e-05, + "loss": 0.5199, + "step": 24490 + }, + { + "epoch": 0.4231925628974288, + "grad_norm": 0.9243658699744252, + "learning_rate": 1.2930064416020583e-05, + "loss": 0.4065, + "step": 24491 + }, + { + "epoch": 0.4232098424108377, + "grad_norm": 1.0751597901466323, + "learning_rate": 1.2929529324655877e-05, + "loss": 0.6288, + "step": 24492 + }, + { + "epoch": 0.4232271219242466, + "grad_norm": 0.7663638437156332, + "learning_rate": 1.2928994224115664e-05, + "loss": 0.4761, + "step": 24493 + }, + { + "epoch": 0.4232444014376555, + "grad_norm": 1.1433964679497566, + "learning_rate": 1.2928459114401623e-05, + "loss": 0.7324, + "step": 24494 + }, + { + "epoch": 0.4232616809510644, + "grad_norm": 0.9475579192511563, + "learning_rate": 1.292792399551542e-05, + "loss": 0.4485, + "step": 24495 + }, + { + "epoch": 0.42327896046447333, + "grad_norm": 1.2540220775523272, + "learning_rate": 1.2927388867458738e-05, + "loss": 0.5291, + "step": 24496 + }, + { + "epoch": 0.42329623997788224, + "grad_norm": 1.0880405399736133, + "learning_rate": 1.2926853730233253e-05, + "loss": 0.5194, + "step": 24497 + }, + { + "epoch": 0.42331351949129115, + "grad_norm": 0.9056603558857822, + "learning_rate": 1.292631858384064e-05, + "loss": 0.5153, + "step": 24498 + }, + { + "epoch": 0.42333079900470005, + "grad_norm": 0.6422371744553429, + "learning_rate": 1.2925783428282571e-05, + "loss": 0.3773, + "step": 24499 + }, + { + "epoch": 0.4233480785181089, + "grad_norm": 1.1910387267170666, + "learning_rate": 1.2925248263560733e-05, + "loss": 0.4832, + "step": 24500 + }, + { + "epoch": 0.4233653580315178, + "grad_norm": 1.2307866040207145, + "learning_rate": 1.292471308967679e-05, + "loss": 0.7145, + "step": 24501 + }, + { + "epoch": 0.4233826375449267, + "grad_norm": 0.7380098940540851, + "learning_rate": 1.2924177906632424e-05, + "loss": 0.4113, + "step": 24502 + }, + { + "epoch": 0.42339991705833563, + "grad_norm": 0.9458606441632076, + "learning_rate": 1.2923642714429312e-05, + "loss": 0.5293, + "step": 24503 + }, + { + "epoch": 0.42341719657174454, + "grad_norm": 0.843208602046772, + "learning_rate": 1.2923107513069126e-05, + "loss": 0.3247, + "step": 24504 + }, + { + "epoch": 0.42343447608515344, + "grad_norm": 0.7356299617132145, + "learning_rate": 1.2922572302553549e-05, + "loss": 0.3045, + "step": 24505 + }, + { + "epoch": 0.42345175559856235, + "grad_norm": 0.9008196141889404, + "learning_rate": 1.2922037082884251e-05, + "loss": 0.3953, + "step": 24506 + }, + { + "epoch": 0.42346903511197126, + "grad_norm": 1.2682266558619564, + "learning_rate": 1.292150185406291e-05, + "loss": 0.508, + "step": 24507 + }, + { + "epoch": 0.42348631462538017, + "grad_norm": 2.0061606373490335, + "learning_rate": 1.2920966616091204e-05, + "loss": 0.4945, + "step": 24508 + }, + { + "epoch": 0.4235035941387891, + "grad_norm": 1.116115241409468, + "learning_rate": 1.292043136897081e-05, + "loss": 0.5003, + "step": 24509 + }, + { + "epoch": 0.4235208736521979, + "grad_norm": 0.9633187682016635, + "learning_rate": 1.29198961127034e-05, + "loss": 0.5715, + "step": 24510 + }, + { + "epoch": 0.42353815316560683, + "grad_norm": 0.8467319768252447, + "learning_rate": 1.2919360847290656e-05, + "loss": 0.3419, + "step": 24511 + }, + { + "epoch": 0.42355543267901574, + "grad_norm": 1.7941229423752705, + "learning_rate": 1.291882557273425e-05, + "loss": 0.5657, + "step": 24512 + }, + { + "epoch": 0.42357271219242465, + "grad_norm": 1.2509784767114966, + "learning_rate": 1.2918290289035864e-05, + "loss": 0.4836, + "step": 24513 + }, + { + "epoch": 0.42358999170583356, + "grad_norm": 1.2049297055957218, + "learning_rate": 1.291775499619717e-05, + "loss": 0.4587, + "step": 24514 + }, + { + "epoch": 0.42360727121924246, + "grad_norm": 0.8985168931978885, + "learning_rate": 1.2917219694219843e-05, + "loss": 0.3279, + "step": 24515 + }, + { + "epoch": 0.4236245507326514, + "grad_norm": 1.62957383684111, + "learning_rate": 1.2916684383105564e-05, + "loss": 0.4624, + "step": 24516 + }, + { + "epoch": 0.4236418302460603, + "grad_norm": 1.6504254016867066, + "learning_rate": 1.2916149062856007e-05, + "loss": 0.335, + "step": 24517 + }, + { + "epoch": 0.4236591097594692, + "grad_norm": 0.7999669203992334, + "learning_rate": 1.2915613733472849e-05, + "loss": 0.4976, + "step": 24518 + }, + { + "epoch": 0.4236763892728781, + "grad_norm": 0.813369076075432, + "learning_rate": 1.2915078394957768e-05, + "loss": 0.3356, + "step": 24519 + }, + { + "epoch": 0.423693668786287, + "grad_norm": 0.6469952425472143, + "learning_rate": 1.291454304731244e-05, + "loss": 0.3235, + "step": 24520 + }, + { + "epoch": 0.42371094829969586, + "grad_norm": 0.8751491888853571, + "learning_rate": 1.2914007690538541e-05, + "loss": 0.4148, + "step": 24521 + }, + { + "epoch": 0.42372822781310476, + "grad_norm": 1.0275927245678567, + "learning_rate": 1.2913472324637753e-05, + "loss": 0.5967, + "step": 24522 + }, + { + "epoch": 0.42374550732651367, + "grad_norm": 0.5832566355625661, + "learning_rate": 1.2912936949611746e-05, + "loss": 0.4333, + "step": 24523 + }, + { + "epoch": 0.4237627868399226, + "grad_norm": 0.7116169414717838, + "learning_rate": 1.2912401565462194e-05, + "loss": 0.4556, + "step": 24524 + }, + { + "epoch": 0.4237800663533315, + "grad_norm": 1.0768681712701886, + "learning_rate": 1.2911866172190783e-05, + "loss": 0.648, + "step": 24525 + }, + { + "epoch": 0.4237973458667404, + "grad_norm": 0.7824750755367393, + "learning_rate": 1.2911330769799187e-05, + "loss": 0.4783, + "step": 24526 + }, + { + "epoch": 0.4238146253801493, + "grad_norm": 1.1381344070998396, + "learning_rate": 1.2910795358289078e-05, + "loss": 0.3921, + "step": 24527 + }, + { + "epoch": 0.4238319048935582, + "grad_norm": 0.8696242188174181, + "learning_rate": 1.2910259937662139e-05, + "loss": 0.3178, + "step": 24528 + }, + { + "epoch": 0.4238491844069671, + "grad_norm": 0.8493356708623795, + "learning_rate": 1.2909724507920046e-05, + "loss": 0.4544, + "step": 24529 + }, + { + "epoch": 0.423866463920376, + "grad_norm": 0.9674543391397536, + "learning_rate": 1.2909189069064471e-05, + "loss": 0.5041, + "step": 24530 + }, + { + "epoch": 0.42388374343378493, + "grad_norm": 1.1554772132609947, + "learning_rate": 1.2908653621097099e-05, + "loss": 0.4558, + "step": 24531 + }, + { + "epoch": 0.4239010229471938, + "grad_norm": 0.8653344226100269, + "learning_rate": 1.29081181640196e-05, + "loss": 0.412, + "step": 24532 + }, + { + "epoch": 0.4239183024606027, + "grad_norm": 0.9233666609251087, + "learning_rate": 1.2907582697833656e-05, + "loss": 0.6473, + "step": 24533 + }, + { + "epoch": 0.4239355819740116, + "grad_norm": 0.871616170806902, + "learning_rate": 1.290704722254094e-05, + "loss": 0.5325, + "step": 24534 + }, + { + "epoch": 0.4239528614874205, + "grad_norm": 1.505099064323272, + "learning_rate": 1.290651173814313e-05, + "loss": 0.4211, + "step": 24535 + }, + { + "epoch": 0.4239701410008294, + "grad_norm": 0.7440823491583982, + "learning_rate": 1.2905976244641907e-05, + "loss": 0.5753, + "step": 24536 + }, + { + "epoch": 0.4239874205142383, + "grad_norm": 0.8021562158527079, + "learning_rate": 1.2905440742038946e-05, + "loss": 0.4299, + "step": 24537 + }, + { + "epoch": 0.42400470002764723, + "grad_norm": 1.0204450294553205, + "learning_rate": 1.2904905230335922e-05, + "loss": 0.4647, + "step": 24538 + }, + { + "epoch": 0.42402197954105614, + "grad_norm": 0.8105440962860163, + "learning_rate": 1.2904369709534514e-05, + "loss": 0.2586, + "step": 24539 + }, + { + "epoch": 0.42403925905446505, + "grad_norm": 1.1563242654700145, + "learning_rate": 1.2903834179636401e-05, + "loss": 0.5753, + "step": 24540 + }, + { + "epoch": 0.42405653856787395, + "grad_norm": 0.6171304444593138, + "learning_rate": 1.2903298640643256e-05, + "loss": 0.4195, + "step": 24541 + }, + { + "epoch": 0.4240738180812828, + "grad_norm": 0.704647221432847, + "learning_rate": 1.2902763092556762e-05, + "loss": 0.5097, + "step": 24542 + }, + { + "epoch": 0.4240910975946917, + "grad_norm": 1.09562896220608, + "learning_rate": 1.2902227535378596e-05, + "loss": 0.3784, + "step": 24543 + }, + { + "epoch": 0.4241083771081006, + "grad_norm": 0.5065880689626188, + "learning_rate": 1.2901691969110426e-05, + "loss": 0.6765, + "step": 24544 + }, + { + "epoch": 0.42412565662150953, + "grad_norm": 0.7598538158610797, + "learning_rate": 1.2901156393753939e-05, + "loss": 0.3957, + "step": 24545 + }, + { + "epoch": 0.42414293613491844, + "grad_norm": 1.0433115869145646, + "learning_rate": 1.290062080931081e-05, + "loss": 0.3617, + "step": 24546 + }, + { + "epoch": 0.42416021564832734, + "grad_norm": 1.1666769084127824, + "learning_rate": 1.2900085215782717e-05, + "loss": 0.5966, + "step": 24547 + }, + { + "epoch": 0.42417749516173625, + "grad_norm": 0.7352108946887138, + "learning_rate": 1.2899549613171335e-05, + "loss": 0.3079, + "step": 24548 + }, + { + "epoch": 0.42419477467514516, + "grad_norm": 0.8060649644316349, + "learning_rate": 1.2899014001478345e-05, + "loss": 0.5417, + "step": 24549 + }, + { + "epoch": 0.42421205418855407, + "grad_norm": 0.5098861829776231, + "learning_rate": 1.289847838070542e-05, + "loss": 0.735, + "step": 24550 + }, + { + "epoch": 0.424229333701963, + "grad_norm": 1.2712912513341053, + "learning_rate": 1.2897942750854242e-05, + "loss": 0.3938, + "step": 24551 + }, + { + "epoch": 0.4242466132153719, + "grad_norm": 1.065817562511815, + "learning_rate": 1.289740711192649e-05, + "loss": 0.679, + "step": 24552 + }, + { + "epoch": 0.42426389272878073, + "grad_norm": 1.6736792366940987, + "learning_rate": 1.2896871463923834e-05, + "loss": 0.3319, + "step": 24553 + }, + { + "epoch": 0.42428117224218964, + "grad_norm": 1.158156755244648, + "learning_rate": 1.2896335806847962e-05, + "loss": 0.4263, + "step": 24554 + }, + { + "epoch": 0.42429845175559855, + "grad_norm": 1.4414680415544892, + "learning_rate": 1.289580014070054e-05, + "loss": 0.5042, + "step": 24555 + }, + { + "epoch": 0.42431573126900746, + "grad_norm": 0.9018214098407983, + "learning_rate": 1.2895264465483254e-05, + "loss": 0.6088, + "step": 24556 + }, + { + "epoch": 0.42433301078241636, + "grad_norm": 1.0282788996001002, + "learning_rate": 1.2894728781197781e-05, + "loss": 0.4057, + "step": 24557 + }, + { + "epoch": 0.42435029029582527, + "grad_norm": 1.0758810334508955, + "learning_rate": 1.2894193087845794e-05, + "loss": 0.3532, + "step": 24558 + }, + { + "epoch": 0.4243675698092342, + "grad_norm": 1.1003821892365704, + "learning_rate": 1.2893657385428979e-05, + "loss": 0.5644, + "step": 24559 + }, + { + "epoch": 0.4243848493226431, + "grad_norm": 1.2736569888343383, + "learning_rate": 1.2893121673949008e-05, + "loss": 0.7091, + "step": 24560 + }, + { + "epoch": 0.424402128836052, + "grad_norm": 0.8877949536597858, + "learning_rate": 1.2892585953407558e-05, + "loss": 0.4127, + "step": 24561 + }, + { + "epoch": 0.4244194083494609, + "grad_norm": 0.7342735542051277, + "learning_rate": 1.289205022380631e-05, + "loss": 0.6254, + "step": 24562 + }, + { + "epoch": 0.42443668786286975, + "grad_norm": 1.2133851280767496, + "learning_rate": 1.2891514485146943e-05, + "loss": 0.3055, + "step": 24563 + }, + { + "epoch": 0.42445396737627866, + "grad_norm": 0.8218250164970963, + "learning_rate": 1.289097873743113e-05, + "loss": 0.4842, + "step": 24564 + }, + { + "epoch": 0.42447124688968757, + "grad_norm": 2.077321366466132, + "learning_rate": 1.2890442980660554e-05, + "loss": 0.574, + "step": 24565 + }, + { + "epoch": 0.4244885264030965, + "grad_norm": 1.0810432381943487, + "learning_rate": 1.2889907214836889e-05, + "loss": 0.5665, + "step": 24566 + }, + { + "epoch": 0.4245058059165054, + "grad_norm": 0.9566255003674508, + "learning_rate": 1.2889371439961816e-05, + "loss": 0.4584, + "step": 24567 + }, + { + "epoch": 0.4245230854299143, + "grad_norm": 0.9535164857820899, + "learning_rate": 1.2888835656037011e-05, + "loss": 0.4879, + "step": 24568 + }, + { + "epoch": 0.4245403649433232, + "grad_norm": 0.9646806979736391, + "learning_rate": 1.2888299863064156e-05, + "loss": 0.4631, + "step": 24569 + }, + { + "epoch": 0.4245576444567321, + "grad_norm": 0.8660192978511373, + "learning_rate": 1.2887764061044921e-05, + "loss": 0.4543, + "step": 24570 + }, + { + "epoch": 0.424574923970141, + "grad_norm": 0.9855947449200902, + "learning_rate": 1.2887228249980994e-05, + "loss": 0.6064, + "step": 24571 + }, + { + "epoch": 0.4245922034835499, + "grad_norm": 0.9854539720320179, + "learning_rate": 1.2886692429874048e-05, + "loss": 0.4669, + "step": 24572 + }, + { + "epoch": 0.42460948299695883, + "grad_norm": 0.8646765518860385, + "learning_rate": 1.288615660072576e-05, + "loss": 0.5084, + "step": 24573 + }, + { + "epoch": 0.4246267625103677, + "grad_norm": 0.881744302329686, + "learning_rate": 1.2885620762537813e-05, + "loss": 0.3126, + "step": 24574 + }, + { + "epoch": 0.4246440420237766, + "grad_norm": 0.7381099206309106, + "learning_rate": 1.2885084915311881e-05, + "loss": 0.4901, + "step": 24575 + }, + { + "epoch": 0.4246613215371855, + "grad_norm": 0.840561601809789, + "learning_rate": 1.2884549059049642e-05, + "loss": 0.4695, + "step": 24576 + }, + { + "epoch": 0.4246786010505944, + "grad_norm": 1.211924909742473, + "learning_rate": 1.288401319375278e-05, + "loss": 0.3831, + "step": 24577 + }, + { + "epoch": 0.4246958805640033, + "grad_norm": 1.4116737175319158, + "learning_rate": 1.2883477319422965e-05, + "loss": 0.394, + "step": 24578 + }, + { + "epoch": 0.4247131600774122, + "grad_norm": 0.9165952913278058, + "learning_rate": 1.2882941436061884e-05, + "loss": 0.3284, + "step": 24579 + }, + { + "epoch": 0.42473043959082113, + "grad_norm": 0.8786342799001622, + "learning_rate": 1.288240554367121e-05, + "loss": 0.3057, + "step": 24580 + }, + { + "epoch": 0.42474771910423004, + "grad_norm": 0.7972585156013698, + "learning_rate": 1.288186964225262e-05, + "loss": 0.4539, + "step": 24581 + }, + { + "epoch": 0.42476499861763894, + "grad_norm": 0.7150795066807955, + "learning_rate": 1.28813337318078e-05, + "loss": 0.291, + "step": 24582 + }, + { + "epoch": 0.42478227813104785, + "grad_norm": 1.175596192523121, + "learning_rate": 1.288079781233842e-05, + "loss": 0.6035, + "step": 24583 + }, + { + "epoch": 0.4247995576444567, + "grad_norm": 1.6078508108604115, + "learning_rate": 1.2880261883846166e-05, + "loss": 0.5381, + "step": 24584 + }, + { + "epoch": 0.4248168371578656, + "grad_norm": 1.2089287543927134, + "learning_rate": 1.287972594633271e-05, + "loss": 0.4564, + "step": 24585 + }, + { + "epoch": 0.4248341166712745, + "grad_norm": 0.9260882603987172, + "learning_rate": 1.2879189999799736e-05, + "loss": 0.285, + "step": 24586 + }, + { + "epoch": 0.4248513961846834, + "grad_norm": 1.2948931927126728, + "learning_rate": 1.2878654044248917e-05, + "loss": 0.4237, + "step": 24587 + }, + { + "epoch": 0.42486867569809234, + "grad_norm": 0.689799944450149, + "learning_rate": 1.2878118079681937e-05, + "loss": 0.358, + "step": 24588 + }, + { + "epoch": 0.42488595521150124, + "grad_norm": 0.7234893890358342, + "learning_rate": 1.287758210610047e-05, + "loss": 0.3792, + "step": 24589 + }, + { + "epoch": 0.42490323472491015, + "grad_norm": 1.155725077953086, + "learning_rate": 1.2877046123506199e-05, + "loss": 0.4278, + "step": 24590 + }, + { + "epoch": 0.42492051423831906, + "grad_norm": 0.9047070864000791, + "learning_rate": 1.28765101319008e-05, + "loss": 0.3094, + "step": 24591 + }, + { + "epoch": 0.42493779375172797, + "grad_norm": 2.2947408447409137, + "learning_rate": 1.2875974131285954e-05, + "loss": 0.673, + "step": 24592 + }, + { + "epoch": 0.4249550732651369, + "grad_norm": 5.434445283038508, + "learning_rate": 1.2875438121663338e-05, + "loss": 0.521, + "step": 24593 + }, + { + "epoch": 0.4249723527785458, + "grad_norm": 1.3664450860258857, + "learning_rate": 1.2874902103034633e-05, + "loss": 0.393, + "step": 24594 + }, + { + "epoch": 0.42498963229195463, + "grad_norm": 1.051254507272083, + "learning_rate": 1.2874366075401513e-05, + "loss": 0.5297, + "step": 24595 + }, + { + "epoch": 0.42500691180536354, + "grad_norm": 1.1954790420145907, + "learning_rate": 1.287383003876566e-05, + "loss": 0.529, + "step": 24596 + }, + { + "epoch": 0.42502419131877245, + "grad_norm": 1.7519629131933319, + "learning_rate": 1.2873293993128755e-05, + "loss": 0.4687, + "step": 24597 + }, + { + "epoch": 0.42504147083218136, + "grad_norm": 0.6623698452340181, + "learning_rate": 1.2872757938492472e-05, + "loss": 0.8363, + "step": 24598 + }, + { + "epoch": 0.42505875034559026, + "grad_norm": 1.039624348680046, + "learning_rate": 1.2872221874858496e-05, + "loss": 0.5359, + "step": 24599 + }, + { + "epoch": 0.42507602985899917, + "grad_norm": 0.4767474800524795, + "learning_rate": 1.2871685802228502e-05, + "loss": 0.6985, + "step": 24600 + }, + { + "epoch": 0.4250933093724081, + "grad_norm": 1.2491460216884558, + "learning_rate": 1.2871149720604168e-05, + "loss": 0.3866, + "step": 24601 + }, + { + "epoch": 0.425110588885817, + "grad_norm": 1.3313597467732312, + "learning_rate": 1.2870613629987176e-05, + "loss": 0.4798, + "step": 24602 + }, + { + "epoch": 0.4251278683992259, + "grad_norm": 0.7029956540193437, + "learning_rate": 1.2870077530379205e-05, + "loss": 0.2888, + "step": 24603 + }, + { + "epoch": 0.4251451479126348, + "grad_norm": 1.2722262562493647, + "learning_rate": 1.2869541421781933e-05, + "loss": 0.5238, + "step": 24604 + }, + { + "epoch": 0.4251624274260437, + "grad_norm": 0.8739746196235972, + "learning_rate": 1.286900530419704e-05, + "loss": 0.5955, + "step": 24605 + }, + { + "epoch": 0.42517970693945256, + "grad_norm": 1.1317266065246165, + "learning_rate": 1.2868469177626201e-05, + "loss": 0.3381, + "step": 24606 + }, + { + "epoch": 0.42519698645286147, + "grad_norm": 1.2076625651505428, + "learning_rate": 1.28679330420711e-05, + "loss": 0.5616, + "step": 24607 + }, + { + "epoch": 0.4252142659662704, + "grad_norm": 1.2929232171356169, + "learning_rate": 1.2867396897533415e-05, + "loss": 0.3996, + "step": 24608 + }, + { + "epoch": 0.4252315454796793, + "grad_norm": 0.6702953950359898, + "learning_rate": 1.2866860744014826e-05, + "loss": 0.3423, + "step": 24609 + }, + { + "epoch": 0.4252488249930882, + "grad_norm": 0.6768273343065992, + "learning_rate": 1.2866324581517009e-05, + "loss": 0.5068, + "step": 24610 + }, + { + "epoch": 0.4252661045064971, + "grad_norm": 0.9020820739124282, + "learning_rate": 1.2865788410041647e-05, + "loss": 0.3735, + "step": 24611 + }, + { + "epoch": 0.425283384019906, + "grad_norm": 0.813676303832513, + "learning_rate": 1.2865252229590419e-05, + "loss": 0.4757, + "step": 24612 + }, + { + "epoch": 0.4253006635333149, + "grad_norm": 1.3718629547496977, + "learning_rate": 1.2864716040164999e-05, + "loss": 0.4586, + "step": 24613 + }, + { + "epoch": 0.4253179430467238, + "grad_norm": 0.9706497844569285, + "learning_rate": 1.2864179841767074e-05, + "loss": 0.4849, + "step": 24614 + }, + { + "epoch": 0.42533522256013273, + "grad_norm": 1.6171131527559237, + "learning_rate": 1.2863643634398322e-05, + "loss": 0.5647, + "step": 24615 + }, + { + "epoch": 0.4253525020735416, + "grad_norm": 0.7951281631350277, + "learning_rate": 1.2863107418060419e-05, + "loss": 0.4181, + "step": 24616 + }, + { + "epoch": 0.4253697815869505, + "grad_norm": 0.6278413587302725, + "learning_rate": 1.2862571192755046e-05, + "loss": 0.3771, + "step": 24617 + }, + { + "epoch": 0.4253870611003594, + "grad_norm": 1.4815305675605936, + "learning_rate": 1.286203495848388e-05, + "loss": 0.3653, + "step": 24618 + }, + { + "epoch": 0.4254043406137683, + "grad_norm": 1.3768589123341695, + "learning_rate": 1.2861498715248603e-05, + "loss": 0.4408, + "step": 24619 + }, + { + "epoch": 0.4254216201271772, + "grad_norm": 1.2808259031209535, + "learning_rate": 1.2860962463050898e-05, + "loss": 0.5185, + "step": 24620 + }, + { + "epoch": 0.4254388996405861, + "grad_norm": 1.0632883700939224, + "learning_rate": 1.2860426201892437e-05, + "loss": 0.4234, + "step": 24621 + }, + { + "epoch": 0.42545617915399503, + "grad_norm": 0.536151845832553, + "learning_rate": 1.285988993177491e-05, + "loss": 0.325, + "step": 24622 + }, + { + "epoch": 0.42547345866740394, + "grad_norm": 1.137256826053615, + "learning_rate": 1.2859353652699986e-05, + "loss": 0.426, + "step": 24623 + }, + { + "epoch": 0.42549073818081284, + "grad_norm": 1.3856160676047193, + "learning_rate": 1.2858817364669347e-05, + "loss": 0.5123, + "step": 24624 + }, + { + "epoch": 0.42550801769422175, + "grad_norm": 1.034657000237877, + "learning_rate": 1.2858281067684682e-05, + "loss": 0.5271, + "step": 24625 + }, + { + "epoch": 0.42552529720763066, + "grad_norm": 1.401368124374575, + "learning_rate": 1.2857744761747659e-05, + "loss": 0.5778, + "step": 24626 + }, + { + "epoch": 0.4255425767210395, + "grad_norm": 0.8356088276581208, + "learning_rate": 1.285720844685996e-05, + "loss": 0.4355, + "step": 24627 + }, + { + "epoch": 0.4255598562344484, + "grad_norm": 1.0302536613484228, + "learning_rate": 1.285667212302327e-05, + "loss": 0.5689, + "step": 24628 + }, + { + "epoch": 0.4255771357478573, + "grad_norm": 0.49555609099736286, + "learning_rate": 1.2856135790239266e-05, + "loss": 0.7695, + "step": 24629 + }, + { + "epoch": 0.42559441526126623, + "grad_norm": 1.0985253205782393, + "learning_rate": 1.2855599448509624e-05, + "loss": 0.6794, + "step": 24630 + }, + { + "epoch": 0.42561169477467514, + "grad_norm": 0.8128705551488027, + "learning_rate": 1.2855063097836031e-05, + "loss": 0.5058, + "step": 24631 + }, + { + "epoch": 0.42562897428808405, + "grad_norm": 0.711586158884156, + "learning_rate": 1.2854526738220164e-05, + "loss": 0.2551, + "step": 24632 + }, + { + "epoch": 0.42564625380149296, + "grad_norm": 1.0368339710469707, + "learning_rate": 1.2853990369663698e-05, + "loss": 0.3057, + "step": 24633 + }, + { + "epoch": 0.42566353331490187, + "grad_norm": 1.2697417918659033, + "learning_rate": 1.285345399216832e-05, + "loss": 0.429, + "step": 24634 + }, + { + "epoch": 0.4256808128283108, + "grad_norm": 0.7664212433744227, + "learning_rate": 1.2852917605735709e-05, + "loss": 0.4802, + "step": 24635 + }, + { + "epoch": 0.4256980923417197, + "grad_norm": 0.7357696606012356, + "learning_rate": 1.2852381210367541e-05, + "loss": 0.4212, + "step": 24636 + }, + { + "epoch": 0.42571537185512853, + "grad_norm": 1.125760010168924, + "learning_rate": 1.2851844806065499e-05, + "loss": 0.4885, + "step": 24637 + }, + { + "epoch": 0.42573265136853744, + "grad_norm": 0.8450873299507793, + "learning_rate": 1.285130839283126e-05, + "loss": 0.5424, + "step": 24638 + }, + { + "epoch": 0.42574993088194635, + "grad_norm": 0.8573501296217084, + "learning_rate": 1.2850771970666511e-05, + "loss": 0.3469, + "step": 24639 + }, + { + "epoch": 0.42576721039535526, + "grad_norm": 1.078678264345519, + "learning_rate": 1.2850235539572927e-05, + "loss": 0.6697, + "step": 24640 + }, + { + "epoch": 0.42578448990876416, + "grad_norm": 1.302357382776098, + "learning_rate": 1.2849699099552184e-05, + "loss": 0.4486, + "step": 24641 + }, + { + "epoch": 0.42580176942217307, + "grad_norm": 1.0215478202771153, + "learning_rate": 1.2849162650605971e-05, + "loss": 0.4993, + "step": 24642 + }, + { + "epoch": 0.425819048935582, + "grad_norm": 0.986063879641848, + "learning_rate": 1.2848626192735963e-05, + "loss": 0.4466, + "step": 24643 + }, + { + "epoch": 0.4258363284489909, + "grad_norm": 0.9607668684801945, + "learning_rate": 1.2848089725943838e-05, + "loss": 0.4763, + "step": 24644 + }, + { + "epoch": 0.4258536079623998, + "grad_norm": 0.8289266824784078, + "learning_rate": 1.2847553250231283e-05, + "loss": 0.3594, + "step": 24645 + }, + { + "epoch": 0.4258708874758087, + "grad_norm": 1.0658321034057268, + "learning_rate": 1.2847016765599979e-05, + "loss": 0.4442, + "step": 24646 + }, + { + "epoch": 0.4258881669892176, + "grad_norm": 0.8483532171694466, + "learning_rate": 1.2846480272051594e-05, + "loss": 0.3918, + "step": 24647 + }, + { + "epoch": 0.42590544650262646, + "grad_norm": 0.8116353203793704, + "learning_rate": 1.2845943769587821e-05, + "loss": 0.4053, + "step": 24648 + }, + { + "epoch": 0.42592272601603537, + "grad_norm": 1.5571869160420317, + "learning_rate": 1.2845407258210337e-05, + "loss": 0.5527, + "step": 24649 + }, + { + "epoch": 0.4259400055294443, + "grad_norm": 1.254969208387276, + "learning_rate": 1.2844870737920817e-05, + "loss": 0.4723, + "step": 24650 + }, + { + "epoch": 0.4259572850428532, + "grad_norm": 1.0442297304462995, + "learning_rate": 1.2844334208720947e-05, + "loss": 0.4399, + "step": 24651 + }, + { + "epoch": 0.4259745645562621, + "grad_norm": 1.1431580618087962, + "learning_rate": 1.2843797670612407e-05, + "loss": 0.5276, + "step": 24652 + }, + { + "epoch": 0.425991844069671, + "grad_norm": 1.3067755134083185, + "learning_rate": 1.2843261123596875e-05, + "loss": 0.5184, + "step": 24653 + }, + { + "epoch": 0.4260091235830799, + "grad_norm": 1.0566693467236086, + "learning_rate": 1.2842724567676035e-05, + "loss": 0.3919, + "step": 24654 + }, + { + "epoch": 0.4260264030964888, + "grad_norm": 0.8220848537310771, + "learning_rate": 1.2842188002851566e-05, + "loss": 0.5324, + "step": 24655 + }, + { + "epoch": 0.4260436826098977, + "grad_norm": 1.18463701967675, + "learning_rate": 1.2841651429125147e-05, + "loss": 0.683, + "step": 24656 + }, + { + "epoch": 0.42606096212330663, + "grad_norm": 1.5529597184430213, + "learning_rate": 1.284111484649846e-05, + "loss": 0.5981, + "step": 24657 + }, + { + "epoch": 0.4260782416367155, + "grad_norm": 0.562510259982334, + "learning_rate": 1.2840578254973184e-05, + "loss": 0.7505, + "step": 24658 + }, + { + "epoch": 0.4260955211501244, + "grad_norm": 0.9618187906267817, + "learning_rate": 1.2840041654551003e-05, + "loss": 0.314, + "step": 24659 + }, + { + "epoch": 0.4261128006635333, + "grad_norm": 1.1982706470139013, + "learning_rate": 1.2839505045233596e-05, + "loss": 0.5743, + "step": 24660 + }, + { + "epoch": 0.4261300801769422, + "grad_norm": 0.7723316650220837, + "learning_rate": 1.283896842702264e-05, + "loss": 0.4185, + "step": 24661 + }, + { + "epoch": 0.4261473596903511, + "grad_norm": 1.6248194064941892, + "learning_rate": 1.2838431799919823e-05, + "loss": 0.3217, + "step": 24662 + }, + { + "epoch": 0.42616463920376, + "grad_norm": 0.7804379040476135, + "learning_rate": 1.283789516392682e-05, + "loss": 0.4486, + "step": 24663 + }, + { + "epoch": 0.42618191871716893, + "grad_norm": 1.0578728633155026, + "learning_rate": 1.2837358519045314e-05, + "loss": 0.4787, + "step": 24664 + }, + { + "epoch": 0.42619919823057784, + "grad_norm": 0.781781135527725, + "learning_rate": 1.2836821865276985e-05, + "loss": 0.7021, + "step": 24665 + }, + { + "epoch": 0.42621647774398674, + "grad_norm": 0.832060820781015, + "learning_rate": 1.2836285202623517e-05, + "loss": 0.3473, + "step": 24666 + }, + { + "epoch": 0.42623375725739565, + "grad_norm": 1.3514140000789112, + "learning_rate": 1.2835748531086588e-05, + "loss": 0.4035, + "step": 24667 + }, + { + "epoch": 0.42625103677080456, + "grad_norm": 0.8708852563143867, + "learning_rate": 1.2835211850667876e-05, + "loss": 0.3973, + "step": 24668 + }, + { + "epoch": 0.4262683162842134, + "grad_norm": 0.9485332008708546, + "learning_rate": 1.2834675161369066e-05, + "loss": 0.483, + "step": 24669 + }, + { + "epoch": 0.4262855957976223, + "grad_norm": 0.5096396146434945, + "learning_rate": 1.2834138463191836e-05, + "loss": 0.6495, + "step": 24670 + }, + { + "epoch": 0.4263028753110312, + "grad_norm": 0.8516547564118903, + "learning_rate": 1.2833601756137872e-05, + "loss": 0.4488, + "step": 24671 + }, + { + "epoch": 0.42632015482444013, + "grad_norm": 0.6745537873800758, + "learning_rate": 1.2833065040208852e-05, + "loss": 0.37, + "step": 24672 + }, + { + "epoch": 0.42633743433784904, + "grad_norm": 0.589904181865752, + "learning_rate": 1.2832528315406456e-05, + "loss": 0.3977, + "step": 24673 + }, + { + "epoch": 0.42635471385125795, + "grad_norm": 0.8430615076130019, + "learning_rate": 1.2831991581732366e-05, + "loss": 0.5088, + "step": 24674 + }, + { + "epoch": 0.42637199336466686, + "grad_norm": 0.8102562028226004, + "learning_rate": 1.2831454839188265e-05, + "loss": 0.4162, + "step": 24675 + }, + { + "epoch": 0.42638927287807576, + "grad_norm": 0.8890352706918204, + "learning_rate": 1.2830918087775827e-05, + "loss": 0.4793, + "step": 24676 + }, + { + "epoch": 0.4264065523914847, + "grad_norm": 1.158932917016934, + "learning_rate": 1.2830381327496746e-05, + "loss": 0.4368, + "step": 24677 + }, + { + "epoch": 0.4264238319048936, + "grad_norm": 0.5831006834952401, + "learning_rate": 1.2829844558352691e-05, + "loss": 0.7162, + "step": 24678 + }, + { + "epoch": 0.4264411114183025, + "grad_norm": 0.7526131926673697, + "learning_rate": 1.2829307780345349e-05, + "loss": 0.7028, + "step": 24679 + }, + { + "epoch": 0.42645839093171134, + "grad_norm": 0.8554564384306731, + "learning_rate": 1.2828770993476398e-05, + "loss": 0.3534, + "step": 24680 + }, + { + "epoch": 0.42647567044512025, + "grad_norm": 1.1270512932146373, + "learning_rate": 1.2828234197747522e-05, + "loss": 0.4643, + "step": 24681 + }, + { + "epoch": 0.42649294995852916, + "grad_norm": 1.1089316057698986, + "learning_rate": 1.2827697393160401e-05, + "loss": 0.365, + "step": 24682 + }, + { + "epoch": 0.42651022947193806, + "grad_norm": 1.0769099551343115, + "learning_rate": 1.282716057971672e-05, + "loss": 0.6404, + "step": 24683 + }, + { + "epoch": 0.42652750898534697, + "grad_norm": 0.974733512673503, + "learning_rate": 1.2826623757418154e-05, + "loss": 0.3498, + "step": 24684 + }, + { + "epoch": 0.4265447884987559, + "grad_norm": 1.0847465935306486, + "learning_rate": 1.282608692626639e-05, + "loss": 0.3819, + "step": 24685 + }, + { + "epoch": 0.4265620680121648, + "grad_norm": 0.9563945414251853, + "learning_rate": 1.2825550086263105e-05, + "loss": 0.5694, + "step": 24686 + }, + { + "epoch": 0.4265793475255737, + "grad_norm": 0.5093636770104711, + "learning_rate": 1.2825013237409985e-05, + "loss": 0.2702, + "step": 24687 + }, + { + "epoch": 0.4265966270389826, + "grad_norm": 1.27782712087545, + "learning_rate": 1.2824476379708707e-05, + "loss": 0.7208, + "step": 24688 + }, + { + "epoch": 0.4266139065523915, + "grad_norm": 0.891174652268144, + "learning_rate": 1.2823939513160955e-05, + "loss": 0.5565, + "step": 24689 + }, + { + "epoch": 0.42663118606580036, + "grad_norm": 1.0399853305967817, + "learning_rate": 1.2823402637768408e-05, + "loss": 0.6478, + "step": 24690 + }, + { + "epoch": 0.42664846557920927, + "grad_norm": 0.9084249053130694, + "learning_rate": 1.282286575353275e-05, + "loss": 0.4019, + "step": 24691 + }, + { + "epoch": 0.4266657450926182, + "grad_norm": 0.9434261054293915, + "learning_rate": 1.2822328860455664e-05, + "loss": 0.3731, + "step": 24692 + }, + { + "epoch": 0.4266830246060271, + "grad_norm": 0.9351329334596132, + "learning_rate": 1.2821791958538828e-05, + "loss": 0.3183, + "step": 24693 + }, + { + "epoch": 0.426700304119436, + "grad_norm": 0.8226916363671484, + "learning_rate": 1.2821255047783923e-05, + "loss": 0.2345, + "step": 24694 + }, + { + "epoch": 0.4267175836328449, + "grad_norm": 1.4438586986949915, + "learning_rate": 1.2820718128192638e-05, + "loss": 0.6323, + "step": 24695 + }, + { + "epoch": 0.4267348631462538, + "grad_norm": 0.90388843394025, + "learning_rate": 1.2820181199766643e-05, + "loss": 0.2784, + "step": 24696 + }, + { + "epoch": 0.4267521426596627, + "grad_norm": 0.7561396756458311, + "learning_rate": 1.2819644262507632e-05, + "loss": 0.5416, + "step": 24697 + }, + { + "epoch": 0.4267694221730716, + "grad_norm": 1.095981619239769, + "learning_rate": 1.2819107316417277e-05, + "loss": 0.4918, + "step": 24698 + }, + { + "epoch": 0.42678670168648053, + "grad_norm": 0.6499562633748741, + "learning_rate": 1.2818570361497268e-05, + "loss": 0.3561, + "step": 24699 + }, + { + "epoch": 0.42680398119988944, + "grad_norm": 1.125020010121336, + "learning_rate": 1.281803339774928e-05, + "loss": 0.5236, + "step": 24700 + }, + { + "epoch": 0.4268212607132983, + "grad_norm": 1.50462751427765, + "learning_rate": 1.2817496425174994e-05, + "loss": 0.4685, + "step": 24701 + }, + { + "epoch": 0.4268385402267072, + "grad_norm": 0.8935198005903708, + "learning_rate": 1.2816959443776098e-05, + "loss": 0.4965, + "step": 24702 + }, + { + "epoch": 0.4268558197401161, + "grad_norm": 0.6175698478029247, + "learning_rate": 1.2816422453554271e-05, + "loss": 0.406, + "step": 24703 + }, + { + "epoch": 0.426873099253525, + "grad_norm": 0.9871176219478721, + "learning_rate": 1.2815885454511192e-05, + "loss": 0.4296, + "step": 24704 + }, + { + "epoch": 0.4268903787669339, + "grad_norm": 1.2651367683968473, + "learning_rate": 1.281534844664855e-05, + "loss": 0.4615, + "step": 24705 + }, + { + "epoch": 0.4269076582803428, + "grad_norm": 0.4559705645412446, + "learning_rate": 1.2814811429968022e-05, + "loss": 0.6445, + "step": 24706 + }, + { + "epoch": 0.42692493779375174, + "grad_norm": 0.8097930142182704, + "learning_rate": 1.281427440447129e-05, + "loss": 0.5249, + "step": 24707 + }, + { + "epoch": 0.42694221730716064, + "grad_norm": 0.8757879054677952, + "learning_rate": 1.2813737370160037e-05, + "loss": 0.3574, + "step": 24708 + }, + { + "epoch": 0.42695949682056955, + "grad_norm": 1.080234671375476, + "learning_rate": 1.2813200327035944e-05, + "loss": 0.482, + "step": 24709 + }, + { + "epoch": 0.42697677633397846, + "grad_norm": 1.6641779223997986, + "learning_rate": 1.2812663275100694e-05, + "loss": 0.5345, + "step": 24710 + }, + { + "epoch": 0.4269940558473873, + "grad_norm": 1.0380983511098418, + "learning_rate": 1.2812126214355968e-05, + "loss": 0.5439, + "step": 24711 + }, + { + "epoch": 0.4270113353607962, + "grad_norm": 0.755401854655869, + "learning_rate": 1.2811589144803451e-05, + "loss": 0.4729, + "step": 24712 + }, + { + "epoch": 0.4270286148742051, + "grad_norm": 1.7569909361119742, + "learning_rate": 1.281105206644482e-05, + "loss": 0.495, + "step": 24713 + }, + { + "epoch": 0.42704589438761403, + "grad_norm": 0.8471925965034569, + "learning_rate": 1.2810514979281764e-05, + "loss": 0.6135, + "step": 24714 + }, + { + "epoch": 0.42706317390102294, + "grad_norm": 0.7461506760366454, + "learning_rate": 1.2809977883315959e-05, + "loss": 0.4335, + "step": 24715 + }, + { + "epoch": 0.42708045341443185, + "grad_norm": 1.2488131236317281, + "learning_rate": 1.2809440778549089e-05, + "loss": 0.4904, + "step": 24716 + }, + { + "epoch": 0.42709773292784076, + "grad_norm": 0.7705790335805098, + "learning_rate": 1.280890366498284e-05, + "loss": 0.4136, + "step": 24717 + }, + { + "epoch": 0.42711501244124966, + "grad_norm": 0.829664362583488, + "learning_rate": 1.2808366542618889e-05, + "loss": 0.4765, + "step": 24718 + }, + { + "epoch": 0.42713229195465857, + "grad_norm": 1.2771563442254064, + "learning_rate": 1.2807829411458923e-05, + "loss": 0.5025, + "step": 24719 + }, + { + "epoch": 0.4271495714680675, + "grad_norm": 0.7892215062161984, + "learning_rate": 1.2807292271504619e-05, + "loss": 0.3013, + "step": 24720 + }, + { + "epoch": 0.4271668509814764, + "grad_norm": 0.810487747027883, + "learning_rate": 1.2806755122757662e-05, + "loss": 0.5234, + "step": 24721 + }, + { + "epoch": 0.42718413049488524, + "grad_norm": 1.0430699437560398, + "learning_rate": 1.2806217965219737e-05, + "loss": 0.5134, + "step": 24722 + }, + { + "epoch": 0.42720141000829415, + "grad_norm": 0.9410838461173492, + "learning_rate": 1.2805680798892523e-05, + "loss": 0.3751, + "step": 24723 + }, + { + "epoch": 0.42721868952170305, + "grad_norm": 0.8397291372311517, + "learning_rate": 1.2805143623777703e-05, + "loss": 0.4562, + "step": 24724 + }, + { + "epoch": 0.42723596903511196, + "grad_norm": 2.930067929220683, + "learning_rate": 1.2804606439876962e-05, + "loss": 0.4791, + "step": 24725 + }, + { + "epoch": 0.42725324854852087, + "grad_norm": 1.2619890929061757, + "learning_rate": 1.280406924719198e-05, + "loss": 0.4208, + "step": 24726 + }, + { + "epoch": 0.4272705280619298, + "grad_norm": 0.8391102168363094, + "learning_rate": 1.2803532045724436e-05, + "loss": 0.636, + "step": 24727 + }, + { + "epoch": 0.4272878075753387, + "grad_norm": 0.6891690484095587, + "learning_rate": 1.280299483547602e-05, + "loss": 0.4625, + "step": 24728 + }, + { + "epoch": 0.4273050870887476, + "grad_norm": 1.061090425474601, + "learning_rate": 1.2802457616448414e-05, + "loss": 0.5391, + "step": 24729 + }, + { + "epoch": 0.4273223666021565, + "grad_norm": 1.1889307826822924, + "learning_rate": 1.2801920388643292e-05, + "loss": 0.4481, + "step": 24730 + }, + { + "epoch": 0.4273396461155654, + "grad_norm": 1.068781636597808, + "learning_rate": 1.2801383152062346e-05, + "loss": 0.4064, + "step": 24731 + }, + { + "epoch": 0.4273569256289743, + "grad_norm": 1.3263526446135852, + "learning_rate": 1.2800845906707253e-05, + "loss": 0.5729, + "step": 24732 + }, + { + "epoch": 0.42737420514238317, + "grad_norm": 0.8790331202328648, + "learning_rate": 1.2800308652579699e-05, + "loss": 0.3603, + "step": 24733 + }, + { + "epoch": 0.4273914846557921, + "grad_norm": 0.519781913756851, + "learning_rate": 1.2799771389681366e-05, + "loss": 0.6503, + "step": 24734 + }, + { + "epoch": 0.427408764169201, + "grad_norm": 1.0430026303945, + "learning_rate": 1.2799234118013937e-05, + "loss": 0.5187, + "step": 24735 + }, + { + "epoch": 0.4274260436826099, + "grad_norm": 0.7492729252472045, + "learning_rate": 1.279869683757909e-05, + "loss": 0.7516, + "step": 24736 + }, + { + "epoch": 0.4274433231960188, + "grad_norm": 0.7376331537775224, + "learning_rate": 1.2798159548378515e-05, + "loss": 0.5163, + "step": 24737 + }, + { + "epoch": 0.4274606027094277, + "grad_norm": 1.0831191645306788, + "learning_rate": 1.2797622250413891e-05, + "loss": 0.5672, + "step": 24738 + }, + { + "epoch": 0.4274778822228366, + "grad_norm": 1.001684936582446, + "learning_rate": 1.2797084943686902e-05, + "loss": 0.724, + "step": 24739 + }, + { + "epoch": 0.4274951617362455, + "grad_norm": 0.7615120300326971, + "learning_rate": 1.2796547628199231e-05, + "loss": 0.4292, + "step": 24740 + }, + { + "epoch": 0.42751244124965443, + "grad_norm": 1.154775255581764, + "learning_rate": 1.2796010303952557e-05, + "loss": 0.412, + "step": 24741 + }, + { + "epoch": 0.42752972076306334, + "grad_norm": 1.0109809598822443, + "learning_rate": 1.2795472970948568e-05, + "loss": 0.5132, + "step": 24742 + }, + { + "epoch": 0.4275470002764722, + "grad_norm": 0.8040330114517268, + "learning_rate": 1.2794935629188947e-05, + "loss": 0.6472, + "step": 24743 + }, + { + "epoch": 0.4275642797898811, + "grad_norm": 1.0547560917546135, + "learning_rate": 1.2794398278675373e-05, + "loss": 0.4047, + "step": 24744 + }, + { + "epoch": 0.42758155930329, + "grad_norm": 1.5889895987047524, + "learning_rate": 1.2793860919409533e-05, + "loss": 0.5233, + "step": 24745 + }, + { + "epoch": 0.4275988388166989, + "grad_norm": 0.6481518578052424, + "learning_rate": 1.2793323551393107e-05, + "loss": 0.4311, + "step": 24746 + }, + { + "epoch": 0.4276161183301078, + "grad_norm": 0.8602934470223023, + "learning_rate": 1.2792786174627777e-05, + "loss": 0.3885, + "step": 24747 + }, + { + "epoch": 0.4276333978435167, + "grad_norm": 1.3265086723007078, + "learning_rate": 1.2792248789115234e-05, + "loss": 0.5133, + "step": 24748 + }, + { + "epoch": 0.42765067735692563, + "grad_norm": 0.7472635841358835, + "learning_rate": 1.2791711394857154e-05, + "loss": 0.4372, + "step": 24749 + }, + { + "epoch": 0.42766795687033454, + "grad_norm": 0.856019568687661, + "learning_rate": 1.2791173991855218e-05, + "loss": 0.4806, + "step": 24750 + }, + { + "epoch": 0.42768523638374345, + "grad_norm": 1.7399254659771493, + "learning_rate": 1.2790636580111118e-05, + "loss": 0.6205, + "step": 24751 + }, + { + "epoch": 0.42770251589715236, + "grad_norm": 1.1396094246419801, + "learning_rate": 1.279009915962653e-05, + "loss": 0.3339, + "step": 24752 + }, + { + "epoch": 0.42771979541056127, + "grad_norm": 0.7815889456268561, + "learning_rate": 1.2789561730403138e-05, + "loss": 0.3948, + "step": 24753 + }, + { + "epoch": 0.4277370749239701, + "grad_norm": 1.1665540497257323, + "learning_rate": 1.2789024292442626e-05, + "loss": 0.4279, + "step": 24754 + }, + { + "epoch": 0.427754354437379, + "grad_norm": 0.5355400933861814, + "learning_rate": 1.2788486845746682e-05, + "loss": 0.4607, + "step": 24755 + }, + { + "epoch": 0.42777163395078793, + "grad_norm": 1.344809976258046, + "learning_rate": 1.278794939031698e-05, + "loss": 0.3105, + "step": 24756 + }, + { + "epoch": 0.42778891346419684, + "grad_norm": 0.5242978508599412, + "learning_rate": 1.278741192615521e-05, + "loss": 0.6276, + "step": 24757 + }, + { + "epoch": 0.42780619297760575, + "grad_norm": 0.8807100337320085, + "learning_rate": 1.2786874453263058e-05, + "loss": 0.432, + "step": 24758 + }, + { + "epoch": 0.42782347249101466, + "grad_norm": 1.2004306847301778, + "learning_rate": 1.2786336971642202e-05, + "loss": 0.3706, + "step": 24759 + }, + { + "epoch": 0.42784075200442356, + "grad_norm": 1.3224404899766533, + "learning_rate": 1.2785799481294325e-05, + "loss": 0.4444, + "step": 24760 + }, + { + "epoch": 0.42785803151783247, + "grad_norm": 0.8314838049645648, + "learning_rate": 1.2785261982221113e-05, + "loss": 0.4024, + "step": 24761 + }, + { + "epoch": 0.4278753110312414, + "grad_norm": 1.1743637613461524, + "learning_rate": 1.2784724474424249e-05, + "loss": 0.3983, + "step": 24762 + }, + { + "epoch": 0.4278925905446503, + "grad_norm": 0.6107390076780452, + "learning_rate": 1.2784186957905415e-05, + "loss": 0.3719, + "step": 24763 + }, + { + "epoch": 0.42790987005805914, + "grad_norm": 1.3568759313711982, + "learning_rate": 1.2783649432666296e-05, + "loss": 0.6414, + "step": 24764 + }, + { + "epoch": 0.42792714957146805, + "grad_norm": 1.1811371251794323, + "learning_rate": 1.2783111898708577e-05, + "loss": 0.4866, + "step": 24765 + }, + { + "epoch": 0.42794442908487695, + "grad_norm": 0.5953062939826655, + "learning_rate": 1.278257435603394e-05, + "loss": 0.4297, + "step": 24766 + }, + { + "epoch": 0.42796170859828586, + "grad_norm": 0.5613210060662169, + "learning_rate": 1.2782036804644067e-05, + "loss": 0.4854, + "step": 24767 + }, + { + "epoch": 0.42797898811169477, + "grad_norm": 1.2548599249827146, + "learning_rate": 1.2781499244540645e-05, + "loss": 0.6073, + "step": 24768 + }, + { + "epoch": 0.4279962676251037, + "grad_norm": 0.7114938776785095, + "learning_rate": 1.2780961675725358e-05, + "loss": 0.218, + "step": 24769 + }, + { + "epoch": 0.4280135471385126, + "grad_norm": 1.1636221791195769, + "learning_rate": 1.2780424098199883e-05, + "loss": 0.3032, + "step": 24770 + }, + { + "epoch": 0.4280308266519215, + "grad_norm": 0.7995467421383972, + "learning_rate": 1.2779886511965913e-05, + "loss": 0.473, + "step": 24771 + }, + { + "epoch": 0.4280481061653304, + "grad_norm": 0.8785628491115023, + "learning_rate": 1.2779348917025126e-05, + "loss": 0.4973, + "step": 24772 + }, + { + "epoch": 0.4280653856787393, + "grad_norm": 1.087736857407511, + "learning_rate": 1.2778811313379202e-05, + "loss": 0.598, + "step": 24773 + }, + { + "epoch": 0.4280826651921482, + "grad_norm": 0.9938518159403126, + "learning_rate": 1.2778273701029835e-05, + "loss": 0.4248, + "step": 24774 + }, + { + "epoch": 0.42809994470555707, + "grad_norm": 0.9216887565393993, + "learning_rate": 1.2777736079978702e-05, + "loss": 0.4905, + "step": 24775 + }, + { + "epoch": 0.428117224218966, + "grad_norm": 0.9978299315953947, + "learning_rate": 1.2777198450227488e-05, + "loss": 0.3931, + "step": 24776 + }, + { + "epoch": 0.4281345037323749, + "grad_norm": 1.1741760962164811, + "learning_rate": 1.2776660811777879e-05, + "loss": 0.3631, + "step": 24777 + }, + { + "epoch": 0.4281517832457838, + "grad_norm": 0.8352857327653614, + "learning_rate": 1.2776123164631557e-05, + "loss": 0.4175, + "step": 24778 + }, + { + "epoch": 0.4281690627591927, + "grad_norm": 1.3812069358030707, + "learning_rate": 1.2775585508790202e-05, + "loss": 0.4168, + "step": 24779 + }, + { + "epoch": 0.4281863422726016, + "grad_norm": 0.9681040224523921, + "learning_rate": 1.277504784425551e-05, + "loss": 0.4116, + "step": 24780 + }, + { + "epoch": 0.4282036217860105, + "grad_norm": 0.8311867923332857, + "learning_rate": 1.277451017102915e-05, + "loss": 0.2947, + "step": 24781 + }, + { + "epoch": 0.4282209012994194, + "grad_norm": 0.7233933150375328, + "learning_rate": 1.2773972489112818e-05, + "loss": 0.5294, + "step": 24782 + }, + { + "epoch": 0.42823818081282833, + "grad_norm": 0.8361522502175645, + "learning_rate": 1.2773434798508193e-05, + "loss": 0.3633, + "step": 24783 + }, + { + "epoch": 0.42825546032623724, + "grad_norm": 1.6602131886676434, + "learning_rate": 1.2772897099216956e-05, + "loss": 0.5747, + "step": 24784 + }, + { + "epoch": 0.4282727398396461, + "grad_norm": 1.000445136637867, + "learning_rate": 1.2772359391240796e-05, + "loss": 0.5008, + "step": 24785 + }, + { + "epoch": 0.428290019353055, + "grad_norm": 0.5632236869826154, + "learning_rate": 1.2771821674581397e-05, + "loss": 0.5191, + "step": 24786 + }, + { + "epoch": 0.4283072988664639, + "grad_norm": 0.7407667847076185, + "learning_rate": 1.277128394924044e-05, + "loss": 0.3224, + "step": 24787 + }, + { + "epoch": 0.4283245783798728, + "grad_norm": 0.9399899779535769, + "learning_rate": 1.2770746215219614e-05, + "loss": 0.4497, + "step": 24788 + }, + { + "epoch": 0.4283418578932817, + "grad_norm": 0.8516260390018929, + "learning_rate": 1.27702084725206e-05, + "loss": 0.4036, + "step": 24789 + }, + { + "epoch": 0.4283591374066906, + "grad_norm": 0.8748830692559408, + "learning_rate": 1.2769670721145081e-05, + "loss": 0.3547, + "step": 24790 + }, + { + "epoch": 0.42837641692009953, + "grad_norm": 1.4750735978814793, + "learning_rate": 1.2769132961094743e-05, + "loss": 0.4024, + "step": 24791 + }, + { + "epoch": 0.42839369643350844, + "grad_norm": 1.2868549812334684, + "learning_rate": 1.2768595192371272e-05, + "loss": 0.4304, + "step": 24792 + }, + { + "epoch": 0.42841097594691735, + "grad_norm": 0.8991288633167367, + "learning_rate": 1.2768057414976345e-05, + "loss": 0.3745, + "step": 24793 + }, + { + "epoch": 0.42842825546032626, + "grad_norm": 0.5386600396280788, + "learning_rate": 1.2767519628911658e-05, + "loss": 0.6057, + "step": 24794 + }, + { + "epoch": 0.42844553497373516, + "grad_norm": 1.2024480830980315, + "learning_rate": 1.2766981834178886e-05, + "loss": 0.4973, + "step": 24795 + }, + { + "epoch": 0.428462814487144, + "grad_norm": 0.9428065977689626, + "learning_rate": 1.2766444030779716e-05, + "loss": 0.4219, + "step": 24796 + }, + { + "epoch": 0.4284800940005529, + "grad_norm": 1.0354883311054952, + "learning_rate": 1.2765906218715834e-05, + "loss": 0.3991, + "step": 24797 + }, + { + "epoch": 0.42849737351396183, + "grad_norm": 0.8980725877525592, + "learning_rate": 1.2765368397988924e-05, + "loss": 0.5655, + "step": 24798 + }, + { + "epoch": 0.42851465302737074, + "grad_norm": 0.7947921749827692, + "learning_rate": 1.2764830568600668e-05, + "loss": 0.3276, + "step": 24799 + }, + { + "epoch": 0.42853193254077965, + "grad_norm": 0.8928236070941721, + "learning_rate": 1.2764292730552756e-05, + "loss": 0.3755, + "step": 24800 + }, + { + "epoch": 0.42854921205418856, + "grad_norm": 1.4432599765319458, + "learning_rate": 1.2763754883846868e-05, + "loss": 0.2763, + "step": 24801 + }, + { + "epoch": 0.42856649156759746, + "grad_norm": 0.8383351895250452, + "learning_rate": 1.276321702848469e-05, + "loss": 0.4982, + "step": 24802 + }, + { + "epoch": 0.42858377108100637, + "grad_norm": 1.0412571790112233, + "learning_rate": 1.2762679164467904e-05, + "loss": 0.5153, + "step": 24803 + }, + { + "epoch": 0.4286010505944153, + "grad_norm": 0.6688428518306597, + "learning_rate": 1.2762141291798198e-05, + "loss": 0.496, + "step": 24804 + }, + { + "epoch": 0.4286183301078242, + "grad_norm": 0.5755543304341604, + "learning_rate": 1.2761603410477257e-05, + "loss": 0.3839, + "step": 24805 + }, + { + "epoch": 0.4286356096212331, + "grad_norm": 1.9786549843380847, + "learning_rate": 1.2761065520506764e-05, + "loss": 0.4049, + "step": 24806 + }, + { + "epoch": 0.42865288913464195, + "grad_norm": 0.7250985885665343, + "learning_rate": 1.2760527621888401e-05, + "loss": 0.5277, + "step": 24807 + }, + { + "epoch": 0.42867016864805085, + "grad_norm": 0.6913111692540194, + "learning_rate": 1.2759989714623857e-05, + "loss": 0.4573, + "step": 24808 + }, + { + "epoch": 0.42868744816145976, + "grad_norm": 0.7441021810037413, + "learning_rate": 1.2759451798714816e-05, + "loss": 0.4342, + "step": 24809 + }, + { + "epoch": 0.42870472767486867, + "grad_norm": 1.3179598355012105, + "learning_rate": 1.2758913874162963e-05, + "loss": 0.4908, + "step": 24810 + }, + { + "epoch": 0.4287220071882776, + "grad_norm": 1.0696652960057673, + "learning_rate": 1.2758375940969983e-05, + "loss": 0.6483, + "step": 24811 + }, + { + "epoch": 0.4287392867016865, + "grad_norm": 1.0903386374011697, + "learning_rate": 1.2757837999137559e-05, + "loss": 0.5555, + "step": 24812 + }, + { + "epoch": 0.4287565662150954, + "grad_norm": 1.3255063602956587, + "learning_rate": 1.2757300048667373e-05, + "loss": 0.6604, + "step": 24813 + }, + { + "epoch": 0.4287738457285043, + "grad_norm": 1.188606627392216, + "learning_rate": 1.2756762089561119e-05, + "loss": 0.43, + "step": 24814 + }, + { + "epoch": 0.4287911252419132, + "grad_norm": 1.194896301303215, + "learning_rate": 1.2756224121820476e-05, + "loss": 0.4287, + "step": 24815 + }, + { + "epoch": 0.4288084047553221, + "grad_norm": 0.8466734735840264, + "learning_rate": 1.2755686145447126e-05, + "loss": 0.571, + "step": 24816 + }, + { + "epoch": 0.42882568426873097, + "grad_norm": 0.8490945859145371, + "learning_rate": 1.275514816044276e-05, + "loss": 0.2828, + "step": 24817 + }, + { + "epoch": 0.4288429637821399, + "grad_norm": 1.1093019416983527, + "learning_rate": 1.275461016680906e-05, + "loss": 0.3532, + "step": 24818 + }, + { + "epoch": 0.4288602432955488, + "grad_norm": 0.9171937252516849, + "learning_rate": 1.275407216454771e-05, + "loss": 0.4485, + "step": 24819 + }, + { + "epoch": 0.4288775228089577, + "grad_norm": 0.9453152243988086, + "learning_rate": 1.27535341536604e-05, + "loss": 0.3652, + "step": 24820 + }, + { + "epoch": 0.4288948023223666, + "grad_norm": 0.7893386152693954, + "learning_rate": 1.2752996134148811e-05, + "loss": 0.3273, + "step": 24821 + }, + { + "epoch": 0.4289120818357755, + "grad_norm": 1.2906084818400354, + "learning_rate": 1.2752458106014629e-05, + "loss": 0.9734, + "step": 24822 + }, + { + "epoch": 0.4289293613491844, + "grad_norm": 0.7400144358566169, + "learning_rate": 1.2751920069259538e-05, + "loss": 0.263, + "step": 24823 + }, + { + "epoch": 0.4289466408625933, + "grad_norm": 1.1495492837153902, + "learning_rate": 1.2751382023885223e-05, + "loss": 0.4306, + "step": 24824 + }, + { + "epoch": 0.42896392037600223, + "grad_norm": 1.2489672485556969, + "learning_rate": 1.2750843969893371e-05, + "loss": 0.6231, + "step": 24825 + }, + { + "epoch": 0.42898119988941114, + "grad_norm": 0.9373649890922454, + "learning_rate": 1.275030590728567e-05, + "loss": 0.3912, + "step": 24826 + }, + { + "epoch": 0.42899847940282004, + "grad_norm": 0.7544385096308223, + "learning_rate": 1.27497678360638e-05, + "loss": 0.4436, + "step": 24827 + }, + { + "epoch": 0.4290157589162289, + "grad_norm": 0.880938015613803, + "learning_rate": 1.2749229756229444e-05, + "loss": 0.4861, + "step": 24828 + }, + { + "epoch": 0.4290330384296378, + "grad_norm": 1.1336930451147162, + "learning_rate": 1.2748691667784297e-05, + "loss": 0.459, + "step": 24829 + }, + { + "epoch": 0.4290503179430467, + "grad_norm": 0.9031967909495034, + "learning_rate": 1.2748153570730037e-05, + "loss": 0.4459, + "step": 24830 + }, + { + "epoch": 0.4290675974564556, + "grad_norm": 1.5925667423958012, + "learning_rate": 1.2747615465068353e-05, + "loss": 0.5604, + "step": 24831 + }, + { + "epoch": 0.4290848769698645, + "grad_norm": 0.8099170394967207, + "learning_rate": 1.2747077350800927e-05, + "loss": 0.3185, + "step": 24832 + }, + { + "epoch": 0.42910215648327343, + "grad_norm": 1.1264053243485268, + "learning_rate": 1.2746539227929442e-05, + "loss": 0.3344, + "step": 24833 + }, + { + "epoch": 0.42911943599668234, + "grad_norm": 0.6941161468444911, + "learning_rate": 1.2746001096455593e-05, + "loss": 0.3771, + "step": 24834 + }, + { + "epoch": 0.42913671551009125, + "grad_norm": 0.6521735509547907, + "learning_rate": 1.2745462956381058e-05, + "loss": 0.3392, + "step": 24835 + }, + { + "epoch": 0.42915399502350016, + "grad_norm": 0.418168215581677, + "learning_rate": 1.2744924807707521e-05, + "loss": 0.5304, + "step": 24836 + }, + { + "epoch": 0.42917127453690906, + "grad_norm": 0.7046025517411693, + "learning_rate": 1.2744386650436676e-05, + "loss": 0.4042, + "step": 24837 + }, + { + "epoch": 0.4291885540503179, + "grad_norm": 1.0981247395097837, + "learning_rate": 1.2743848484570203e-05, + "loss": 0.4797, + "step": 24838 + }, + { + "epoch": 0.4292058335637268, + "grad_norm": 1.012005035330947, + "learning_rate": 1.2743310310109785e-05, + "loss": 0.4461, + "step": 24839 + }, + { + "epoch": 0.42922311307713573, + "grad_norm": 0.7985922068144622, + "learning_rate": 1.274277212705711e-05, + "loss": 0.4571, + "step": 24840 + }, + { + "epoch": 0.42924039259054464, + "grad_norm": 1.0756102710134996, + "learning_rate": 1.2742233935413868e-05, + "loss": 0.3815, + "step": 24841 + }, + { + "epoch": 0.42925767210395355, + "grad_norm": 0.47427682780736763, + "learning_rate": 1.274169573518174e-05, + "loss": 0.8049, + "step": 24842 + }, + { + "epoch": 0.42927495161736245, + "grad_norm": 0.8650517840288395, + "learning_rate": 1.2741157526362413e-05, + "loss": 0.4337, + "step": 24843 + }, + { + "epoch": 0.42929223113077136, + "grad_norm": 0.7217814668318489, + "learning_rate": 1.274061930895757e-05, + "loss": 0.6459, + "step": 24844 + }, + { + "epoch": 0.42930951064418027, + "grad_norm": 0.7821397508501449, + "learning_rate": 1.27400810829689e-05, + "loss": 0.515, + "step": 24845 + }, + { + "epoch": 0.4293267901575892, + "grad_norm": 0.8543625146508192, + "learning_rate": 1.273954284839809e-05, + "loss": 0.5106, + "step": 24846 + }, + { + "epoch": 0.4293440696709981, + "grad_norm": 0.8121249344757682, + "learning_rate": 1.273900460524682e-05, + "loss": 0.4173, + "step": 24847 + }, + { + "epoch": 0.429361349184407, + "grad_norm": 1.1160333016194695, + "learning_rate": 1.273846635351678e-05, + "loss": 0.3622, + "step": 24848 + }, + { + "epoch": 0.42937862869781585, + "grad_norm": 0.961448723286697, + "learning_rate": 1.2737928093209657e-05, + "loss": 0.5401, + "step": 24849 + }, + { + "epoch": 0.42939590821122475, + "grad_norm": 0.8319370855514977, + "learning_rate": 1.2737389824327136e-05, + "loss": 0.6456, + "step": 24850 + }, + { + "epoch": 0.42941318772463366, + "grad_norm": 0.8008299294508175, + "learning_rate": 1.2736851546870899e-05, + "loss": 0.4681, + "step": 24851 + }, + { + "epoch": 0.42943046723804257, + "grad_norm": 1.0571125824614878, + "learning_rate": 1.2736313260842638e-05, + "loss": 0.384, + "step": 24852 + }, + { + "epoch": 0.4294477467514515, + "grad_norm": 0.8439725144449559, + "learning_rate": 1.2735774966244034e-05, + "loss": 0.2519, + "step": 24853 + }, + { + "epoch": 0.4294650262648604, + "grad_norm": 1.1153218076797777, + "learning_rate": 1.2735236663076776e-05, + "loss": 0.5659, + "step": 24854 + }, + { + "epoch": 0.4294823057782693, + "grad_norm": 0.8826839166390336, + "learning_rate": 1.2734698351342548e-05, + "loss": 0.4678, + "step": 24855 + }, + { + "epoch": 0.4294995852916782, + "grad_norm": 1.6222489612233069, + "learning_rate": 1.2734160031043034e-05, + "loss": 0.5605, + "step": 24856 + }, + { + "epoch": 0.4295168648050871, + "grad_norm": 0.9496450762738389, + "learning_rate": 1.2733621702179927e-05, + "loss": 0.6637, + "step": 24857 + }, + { + "epoch": 0.429534144318496, + "grad_norm": 1.1160647186980663, + "learning_rate": 1.273308336475491e-05, + "loss": 0.3859, + "step": 24858 + }, + { + "epoch": 0.42955142383190487, + "grad_norm": 1.3929957964401354, + "learning_rate": 1.2732545018769664e-05, + "loss": 0.4145, + "step": 24859 + }, + { + "epoch": 0.4295687033453138, + "grad_norm": 0.6374097359669371, + "learning_rate": 1.2732006664225882e-05, + "loss": 0.3929, + "step": 24860 + }, + { + "epoch": 0.4295859828587227, + "grad_norm": 1.1056899888571385, + "learning_rate": 1.273146830112525e-05, + "loss": 0.479, + "step": 24861 + }, + { + "epoch": 0.4296032623721316, + "grad_norm": 1.1830368116833614, + "learning_rate": 1.273092992946945e-05, + "loss": 0.4047, + "step": 24862 + }, + { + "epoch": 0.4296205418855405, + "grad_norm": 1.0259079466438483, + "learning_rate": 1.2730391549260168e-05, + "loss": 0.4099, + "step": 24863 + }, + { + "epoch": 0.4296378213989494, + "grad_norm": 1.2943203323521926, + "learning_rate": 1.272985316049909e-05, + "loss": 0.5247, + "step": 24864 + }, + { + "epoch": 0.4296551009123583, + "grad_norm": 1.1677736891972443, + "learning_rate": 1.272931476318791e-05, + "loss": 0.5289, + "step": 24865 + }, + { + "epoch": 0.4296723804257672, + "grad_norm": 1.031029183477592, + "learning_rate": 1.2728776357328305e-05, + "loss": 0.7183, + "step": 24866 + }, + { + "epoch": 0.4296896599391761, + "grad_norm": 1.1193373751897964, + "learning_rate": 1.2728237942921968e-05, + "loss": 0.3335, + "step": 24867 + }, + { + "epoch": 0.42970693945258503, + "grad_norm": 1.1137328205033663, + "learning_rate": 1.272769951997058e-05, + "loss": 0.3935, + "step": 24868 + }, + { + "epoch": 0.42972421896599394, + "grad_norm": 0.9325732321914065, + "learning_rate": 1.272716108847583e-05, + "loss": 0.3088, + "step": 24869 + }, + { + "epoch": 0.4297414984794028, + "grad_norm": 0.8320106083962383, + "learning_rate": 1.2726622648439406e-05, + "loss": 0.4047, + "step": 24870 + }, + { + "epoch": 0.4297587779928117, + "grad_norm": 1.055885477336167, + "learning_rate": 1.2726084199862988e-05, + "loss": 0.5323, + "step": 24871 + }, + { + "epoch": 0.4297760575062206, + "grad_norm": 0.7838768447534644, + "learning_rate": 1.2725545742748274e-05, + "loss": 0.6262, + "step": 24872 + }, + { + "epoch": 0.4297933370196295, + "grad_norm": 1.4546593313141207, + "learning_rate": 1.2725007277096939e-05, + "loss": 0.5236, + "step": 24873 + }, + { + "epoch": 0.4298106165330384, + "grad_norm": 0.6715926348053611, + "learning_rate": 1.2724468802910675e-05, + "loss": 0.4824, + "step": 24874 + }, + { + "epoch": 0.42982789604644733, + "grad_norm": 1.4903553142933086, + "learning_rate": 1.2723930320191169e-05, + "loss": 0.4781, + "step": 24875 + }, + { + "epoch": 0.42984517555985624, + "grad_norm": 0.8423514493236779, + "learning_rate": 1.2723391828940105e-05, + "loss": 0.5025, + "step": 24876 + }, + { + "epoch": 0.42986245507326515, + "grad_norm": 1.0616017157900461, + "learning_rate": 1.2722853329159168e-05, + "loss": 0.427, + "step": 24877 + }, + { + "epoch": 0.42987973458667406, + "grad_norm": 1.101813827016025, + "learning_rate": 1.2722314820850053e-05, + "loss": 0.5533, + "step": 24878 + }, + { + "epoch": 0.42989701410008296, + "grad_norm": 1.0109247586710095, + "learning_rate": 1.2721776304014436e-05, + "loss": 0.527, + "step": 24879 + }, + { + "epoch": 0.42991429361349187, + "grad_norm": 1.1079294864541887, + "learning_rate": 1.2721237778654013e-05, + "loss": 0.3893, + "step": 24880 + }, + { + "epoch": 0.4299315731269007, + "grad_norm": 0.6645427457341164, + "learning_rate": 1.2720699244770464e-05, + "loss": 0.2499, + "step": 24881 + }, + { + "epoch": 0.42994885264030963, + "grad_norm": 1.2721283481511896, + "learning_rate": 1.2720160702365476e-05, + "loss": 0.4083, + "step": 24882 + }, + { + "epoch": 0.42996613215371854, + "grad_norm": 1.476533020545439, + "learning_rate": 1.2719622151440744e-05, + "loss": 0.5311, + "step": 24883 + }, + { + "epoch": 0.42998341166712745, + "grad_norm": 0.9243130304808337, + "learning_rate": 1.2719083591997946e-05, + "loss": 0.4684, + "step": 24884 + }, + { + "epoch": 0.43000069118053635, + "grad_norm": 0.7438774189632139, + "learning_rate": 1.2718545024038771e-05, + "loss": 0.419, + "step": 24885 + }, + { + "epoch": 0.43001797069394526, + "grad_norm": 1.0692589836743362, + "learning_rate": 1.2718006447564905e-05, + "loss": 0.3684, + "step": 24886 + }, + { + "epoch": 0.43003525020735417, + "grad_norm": 0.6457823500621199, + "learning_rate": 1.271746786257804e-05, + "loss": 0.3774, + "step": 24887 + }, + { + "epoch": 0.4300525297207631, + "grad_norm": 1.0474468258520977, + "learning_rate": 1.2716929269079854e-05, + "loss": 0.2758, + "step": 24888 + }, + { + "epoch": 0.430069809234172, + "grad_norm": 1.4263620651155413, + "learning_rate": 1.2716390667072043e-05, + "loss": 0.7611, + "step": 24889 + }, + { + "epoch": 0.4300870887475809, + "grad_norm": 0.7304576800547948, + "learning_rate": 1.271585205655629e-05, + "loss": 0.3866, + "step": 24890 + }, + { + "epoch": 0.43010436826098974, + "grad_norm": 0.8714834718221854, + "learning_rate": 1.271531343753428e-05, + "loss": 0.4978, + "step": 24891 + }, + { + "epoch": 0.43012164777439865, + "grad_norm": 0.46752025971547534, + "learning_rate": 1.2714774810007702e-05, + "loss": 0.6374, + "step": 24892 + }, + { + "epoch": 0.43013892728780756, + "grad_norm": 0.4592866708244631, + "learning_rate": 1.2714236173978244e-05, + "loss": 0.6087, + "step": 24893 + }, + { + "epoch": 0.43015620680121647, + "grad_norm": 1.0025917355156564, + "learning_rate": 1.2713697529447593e-05, + "loss": 0.3782, + "step": 24894 + }, + { + "epoch": 0.4301734863146254, + "grad_norm": 0.8282175639107943, + "learning_rate": 1.2713158876417436e-05, + "loss": 0.3681, + "step": 24895 + }, + { + "epoch": 0.4301907658280343, + "grad_norm": 0.4815976513358348, + "learning_rate": 1.2712620214889456e-05, + "loss": 0.6761, + "step": 24896 + }, + { + "epoch": 0.4302080453414432, + "grad_norm": 0.7000983189828459, + "learning_rate": 1.2712081544865344e-05, + "loss": 0.4665, + "step": 24897 + }, + { + "epoch": 0.4302253248548521, + "grad_norm": 0.8049599563174136, + "learning_rate": 1.2711542866346786e-05, + "loss": 0.4062, + "step": 24898 + }, + { + "epoch": 0.430242604368261, + "grad_norm": 0.7534970365123898, + "learning_rate": 1.271100417933547e-05, + "loss": 0.5579, + "step": 24899 + }, + { + "epoch": 0.4302598838816699, + "grad_norm": 1.317656817104215, + "learning_rate": 1.2710465483833084e-05, + "loss": 0.5125, + "step": 24900 + }, + { + "epoch": 0.4302771633950788, + "grad_norm": 1.0698963288505954, + "learning_rate": 1.2709926779841313e-05, + "loss": 0.4302, + "step": 24901 + }, + { + "epoch": 0.4302944429084877, + "grad_norm": 0.8043488689052687, + "learning_rate": 1.2709388067361845e-05, + "loss": 0.4143, + "step": 24902 + }, + { + "epoch": 0.4303117224218966, + "grad_norm": 0.9599494269556982, + "learning_rate": 1.2708849346396372e-05, + "loss": 0.4316, + "step": 24903 + }, + { + "epoch": 0.4303290019353055, + "grad_norm": 0.8661880360447259, + "learning_rate": 1.2708310616946573e-05, + "loss": 0.4643, + "step": 24904 + }, + { + "epoch": 0.4303462814487144, + "grad_norm": 0.8265241621140877, + "learning_rate": 1.270777187901414e-05, + "loss": 0.2812, + "step": 24905 + }, + { + "epoch": 0.4303635609621233, + "grad_norm": 1.0377176221238789, + "learning_rate": 1.2707233132600761e-05, + "loss": 0.4557, + "step": 24906 + }, + { + "epoch": 0.4303808404755322, + "grad_norm": 1.2049420670727355, + "learning_rate": 1.270669437770812e-05, + "loss": 0.5447, + "step": 24907 + }, + { + "epoch": 0.4303981199889411, + "grad_norm": 0.8249924758883839, + "learning_rate": 1.2706155614337905e-05, + "loss": 0.3322, + "step": 24908 + }, + { + "epoch": 0.43041539950235, + "grad_norm": 1.092363910623272, + "learning_rate": 1.2705616842491808e-05, + "loss": 0.4368, + "step": 24909 + }, + { + "epoch": 0.43043267901575893, + "grad_norm": 1.7470127667873503, + "learning_rate": 1.2705078062171515e-05, + "loss": 0.335, + "step": 24910 + }, + { + "epoch": 0.43044995852916784, + "grad_norm": 0.62962354373653, + "learning_rate": 1.2704539273378706e-05, + "loss": 0.4737, + "step": 24911 + }, + { + "epoch": 0.4304672380425767, + "grad_norm": 0.4144100145287366, + "learning_rate": 1.2704000476115079e-05, + "loss": 0.5896, + "step": 24912 + }, + { + "epoch": 0.4304845175559856, + "grad_norm": 1.2224998838314658, + "learning_rate": 1.2703461670382318e-05, + "loss": 0.3519, + "step": 24913 + }, + { + "epoch": 0.4305017970693945, + "grad_norm": 0.6391347032979904, + "learning_rate": 1.2702922856182108e-05, + "loss": 0.5256, + "step": 24914 + }, + { + "epoch": 0.4305190765828034, + "grad_norm": 0.9004664364590969, + "learning_rate": 1.2702384033516137e-05, + "loss": 0.3014, + "step": 24915 + }, + { + "epoch": 0.4305363560962123, + "grad_norm": 0.5222527043439397, + "learning_rate": 1.2701845202386094e-05, + "loss": 0.5612, + "step": 24916 + }, + { + "epoch": 0.43055363560962123, + "grad_norm": 1.5772064506561367, + "learning_rate": 1.2701306362793669e-05, + "loss": 0.3583, + "step": 24917 + }, + { + "epoch": 0.43057091512303014, + "grad_norm": 0.7069853570002779, + "learning_rate": 1.2700767514740543e-05, + "loss": 0.2897, + "step": 24918 + }, + { + "epoch": 0.43058819463643905, + "grad_norm": 1.1924291770824056, + "learning_rate": 1.2700228658228411e-05, + "loss": 0.2864, + "step": 24919 + }, + { + "epoch": 0.43060547414984796, + "grad_norm": 1.0201421292047277, + "learning_rate": 1.2699689793258955e-05, + "loss": 0.4261, + "step": 24920 + }, + { + "epoch": 0.43062275366325686, + "grad_norm": 1.3874885327157824, + "learning_rate": 1.2699150919833869e-05, + "loss": 0.4212, + "step": 24921 + }, + { + "epoch": 0.43064003317666577, + "grad_norm": 1.3532279029811973, + "learning_rate": 1.2698612037954833e-05, + "loss": 0.5335, + "step": 24922 + }, + { + "epoch": 0.4306573126900746, + "grad_norm": 0.8066767687846992, + "learning_rate": 1.2698073147623544e-05, + "loss": 0.37, + "step": 24923 + }, + { + "epoch": 0.43067459220348353, + "grad_norm": 1.0212084625332007, + "learning_rate": 1.2697534248841683e-05, + "loss": 0.4533, + "step": 24924 + }, + { + "epoch": 0.43069187171689244, + "grad_norm": 1.1224089996745272, + "learning_rate": 1.2696995341610937e-05, + "loss": 0.4968, + "step": 24925 + }, + { + "epoch": 0.43070915123030135, + "grad_norm": 0.9186998972184686, + "learning_rate": 1.2696456425933002e-05, + "loss": 0.4669, + "step": 24926 + }, + { + "epoch": 0.43072643074371025, + "grad_norm": 1.0164151353354376, + "learning_rate": 1.2695917501809558e-05, + "loss": 0.3784, + "step": 24927 + }, + { + "epoch": 0.43074371025711916, + "grad_norm": 0.7724584934010964, + "learning_rate": 1.2695378569242292e-05, + "loss": 0.4499, + "step": 24928 + }, + { + "epoch": 0.43076098977052807, + "grad_norm": 0.8596456793373554, + "learning_rate": 1.2694839628232898e-05, + "loss": 0.5068, + "step": 24929 + }, + { + "epoch": 0.430778269283937, + "grad_norm": 0.42907784933987225, + "learning_rate": 1.2694300678783062e-05, + "loss": 0.7395, + "step": 24930 + }, + { + "epoch": 0.4307955487973459, + "grad_norm": 1.042740211478709, + "learning_rate": 1.2693761720894471e-05, + "loss": 0.424, + "step": 24931 + }, + { + "epoch": 0.4308128283107548, + "grad_norm": 0.9763036806721919, + "learning_rate": 1.2693222754568814e-05, + "loss": 0.6978, + "step": 24932 + }, + { + "epoch": 0.43083010782416364, + "grad_norm": 0.7287523216986009, + "learning_rate": 1.2692683779807779e-05, + "loss": 0.4542, + "step": 24933 + }, + { + "epoch": 0.43084738733757255, + "grad_norm": 1.315665266102686, + "learning_rate": 1.2692144796613052e-05, + "loss": 0.5331, + "step": 24934 + }, + { + "epoch": 0.43086466685098146, + "grad_norm": 0.7866981865259622, + "learning_rate": 1.2691605804986326e-05, + "loss": 0.3893, + "step": 24935 + }, + { + "epoch": 0.43088194636439037, + "grad_norm": 0.8850703193512734, + "learning_rate": 1.2691066804929282e-05, + "loss": 0.4193, + "step": 24936 + }, + { + "epoch": 0.4308992258777993, + "grad_norm": 1.1682754161146518, + "learning_rate": 1.2690527796443617e-05, + "loss": 0.4689, + "step": 24937 + }, + { + "epoch": 0.4309165053912082, + "grad_norm": 0.8512155350282741, + "learning_rate": 1.2689988779531012e-05, + "loss": 0.3317, + "step": 24938 + }, + { + "epoch": 0.4309337849046171, + "grad_norm": 1.327546654561105, + "learning_rate": 1.2689449754193156e-05, + "loss": 0.5816, + "step": 24939 + }, + { + "epoch": 0.430951064418026, + "grad_norm": 1.6244484934237498, + "learning_rate": 1.2688910720431743e-05, + "loss": 0.3387, + "step": 24940 + }, + { + "epoch": 0.4309683439314349, + "grad_norm": 1.0461635573340018, + "learning_rate": 1.2688371678248452e-05, + "loss": 0.4527, + "step": 24941 + }, + { + "epoch": 0.4309856234448438, + "grad_norm": 0.933719668262263, + "learning_rate": 1.2687832627644978e-05, + "loss": 0.5843, + "step": 24942 + }, + { + "epoch": 0.4310029029582527, + "grad_norm": 1.224179370120026, + "learning_rate": 1.2687293568623011e-05, + "loss": 0.4929, + "step": 24943 + }, + { + "epoch": 0.4310201824716616, + "grad_norm": 0.8354393294471447, + "learning_rate": 1.2686754501184238e-05, + "loss": 0.4491, + "step": 24944 + }, + { + "epoch": 0.4310374619850705, + "grad_norm": 0.7309543479091539, + "learning_rate": 1.2686215425330342e-05, + "loss": 0.2881, + "step": 24945 + }, + { + "epoch": 0.4310547414984794, + "grad_norm": 1.319179329221828, + "learning_rate": 1.2685676341063014e-05, + "loss": 0.4189, + "step": 24946 + }, + { + "epoch": 0.4310720210118883, + "grad_norm": 1.179283798017825, + "learning_rate": 1.2685137248383948e-05, + "loss": 0.5181, + "step": 24947 + }, + { + "epoch": 0.4310893005252972, + "grad_norm": 0.833181152894408, + "learning_rate": 1.2684598147294821e-05, + "loss": 0.4651, + "step": 24948 + }, + { + "epoch": 0.4311065800387061, + "grad_norm": 1.0793920443650542, + "learning_rate": 1.2684059037797333e-05, + "loss": 0.4743, + "step": 24949 + }, + { + "epoch": 0.431123859552115, + "grad_norm": 0.8494350713622364, + "learning_rate": 1.268351991989317e-05, + "loss": 0.48, + "step": 24950 + }, + { + "epoch": 0.4311411390655239, + "grad_norm": 0.8037967417562033, + "learning_rate": 1.2682980793584014e-05, + "loss": 0.3636, + "step": 24951 + }, + { + "epoch": 0.43115841857893283, + "grad_norm": 1.2805172450132503, + "learning_rate": 1.2682441658871562e-05, + "loss": 0.6374, + "step": 24952 + }, + { + "epoch": 0.43117569809234174, + "grad_norm": 1.0164530109302272, + "learning_rate": 1.2681902515757498e-05, + "loss": 0.5363, + "step": 24953 + }, + { + "epoch": 0.43119297760575065, + "grad_norm": 1.542434226189376, + "learning_rate": 1.2681363364243511e-05, + "loss": 0.4267, + "step": 24954 + }, + { + "epoch": 0.4312102571191595, + "grad_norm": 0.674628368876007, + "learning_rate": 1.2680824204331292e-05, + "loss": 0.5288, + "step": 24955 + }, + { + "epoch": 0.4312275366325684, + "grad_norm": 1.117138683210612, + "learning_rate": 1.2680285036022526e-05, + "loss": 0.3244, + "step": 24956 + }, + { + "epoch": 0.4312448161459773, + "grad_norm": 0.9269966976577341, + "learning_rate": 1.2679745859318904e-05, + "loss": 0.3095, + "step": 24957 + }, + { + "epoch": 0.4312620956593862, + "grad_norm": 1.7966733118389604, + "learning_rate": 1.2679206674222116e-05, + "loss": 0.6397, + "step": 24958 + }, + { + "epoch": 0.43127937517279513, + "grad_norm": 1.2814820545304588, + "learning_rate": 1.2678667480733843e-05, + "loss": 0.6081, + "step": 24959 + }, + { + "epoch": 0.43129665468620404, + "grad_norm": 1.0082246537438366, + "learning_rate": 1.2678128278855785e-05, + "loss": 0.6209, + "step": 24960 + }, + { + "epoch": 0.43131393419961295, + "grad_norm": 1.0192369048072203, + "learning_rate": 1.2677589068589627e-05, + "loss": 0.4666, + "step": 24961 + }, + { + "epoch": 0.43133121371302185, + "grad_norm": 0.89310075235049, + "learning_rate": 1.267704984993705e-05, + "loss": 0.3242, + "step": 24962 + }, + { + "epoch": 0.43134849322643076, + "grad_norm": 0.8699331025148943, + "learning_rate": 1.2676510622899756e-05, + "loss": 0.3634, + "step": 24963 + }, + { + "epoch": 0.43136577273983967, + "grad_norm": 0.6554010142773662, + "learning_rate": 1.2675971387479424e-05, + "loss": 0.3149, + "step": 24964 + }, + { + "epoch": 0.4313830522532485, + "grad_norm": 1.2736760303707724, + "learning_rate": 1.267543214367775e-05, + "loss": 0.3304, + "step": 24965 + }, + { + "epoch": 0.43140033176665743, + "grad_norm": 0.8875466751615619, + "learning_rate": 1.2674892891496415e-05, + "loss": 0.4427, + "step": 24966 + }, + { + "epoch": 0.43141761128006634, + "grad_norm": 0.7747108291684414, + "learning_rate": 1.2674353630937116e-05, + "loss": 0.5005, + "step": 24967 + }, + { + "epoch": 0.43143489079347525, + "grad_norm": 1.092485438426968, + "learning_rate": 1.2673814362001533e-05, + "loss": 0.4068, + "step": 24968 + }, + { + "epoch": 0.43145217030688415, + "grad_norm": 0.7494809375467392, + "learning_rate": 1.2673275084691366e-05, + "loss": 0.3112, + "step": 24969 + }, + { + "epoch": 0.43146944982029306, + "grad_norm": 0.9463871466683129, + "learning_rate": 1.2672735799008296e-05, + "loss": 0.3267, + "step": 24970 + }, + { + "epoch": 0.43148672933370197, + "grad_norm": 1.1063832254007542, + "learning_rate": 1.267219650495401e-05, + "loss": 0.5984, + "step": 24971 + }, + { + "epoch": 0.4315040088471109, + "grad_norm": 0.9262902564411541, + "learning_rate": 1.2671657202530208e-05, + "loss": 0.3801, + "step": 24972 + }, + { + "epoch": 0.4315212883605198, + "grad_norm": 0.7569119376948572, + "learning_rate": 1.267111789173857e-05, + "loss": 0.4142, + "step": 24973 + }, + { + "epoch": 0.4315385678739287, + "grad_norm": 0.9619826144105625, + "learning_rate": 1.2670578572580784e-05, + "loss": 0.5205, + "step": 24974 + }, + { + "epoch": 0.4315558473873376, + "grad_norm": 1.019867675551641, + "learning_rate": 1.267003924505855e-05, + "loss": 0.3725, + "step": 24975 + }, + { + "epoch": 0.43157312690074645, + "grad_norm": 0.8680845097926035, + "learning_rate": 1.2669499909173546e-05, + "loss": 0.4809, + "step": 24976 + }, + { + "epoch": 0.43159040641415536, + "grad_norm": 0.7743697775022401, + "learning_rate": 1.2668960564927466e-05, + "loss": 0.4423, + "step": 24977 + }, + { + "epoch": 0.43160768592756427, + "grad_norm": 0.8722841302249488, + "learning_rate": 1.2668421212322e-05, + "loss": 0.306, + "step": 24978 + }, + { + "epoch": 0.4316249654409732, + "grad_norm": 1.1655015902363983, + "learning_rate": 1.2667881851358832e-05, + "loss": 0.3832, + "step": 24979 + }, + { + "epoch": 0.4316422449543821, + "grad_norm": 0.6037529604348629, + "learning_rate": 1.266734248203966e-05, + "loss": 0.3214, + "step": 24980 + }, + { + "epoch": 0.431659524467791, + "grad_norm": 0.8238240354648164, + "learning_rate": 1.2666803104366165e-05, + "loss": 0.6118, + "step": 24981 + }, + { + "epoch": 0.4316768039811999, + "grad_norm": 0.8870433878935202, + "learning_rate": 1.2666263718340041e-05, + "loss": 0.651, + "step": 24982 + }, + { + "epoch": 0.4316940834946088, + "grad_norm": 1.0213137313888316, + "learning_rate": 1.2665724323962977e-05, + "loss": 0.3712, + "step": 24983 + }, + { + "epoch": 0.4317113630080177, + "grad_norm": 0.705030299221624, + "learning_rate": 1.2665184921236664e-05, + "loss": 0.4589, + "step": 24984 + }, + { + "epoch": 0.4317286425214266, + "grad_norm": 0.9905043686720294, + "learning_rate": 1.2664645510162788e-05, + "loss": 0.3326, + "step": 24985 + }, + { + "epoch": 0.43174592203483547, + "grad_norm": 0.7243157916891153, + "learning_rate": 1.266410609074304e-05, + "loss": 0.5613, + "step": 24986 + }, + { + "epoch": 0.4317632015482444, + "grad_norm": 0.7909584083574309, + "learning_rate": 1.2663566662979107e-05, + "loss": 0.4497, + "step": 24987 + }, + { + "epoch": 0.4317804810616533, + "grad_norm": 1.0291007145855389, + "learning_rate": 1.2663027226872682e-05, + "loss": 0.5313, + "step": 24988 + }, + { + "epoch": 0.4317977605750622, + "grad_norm": 0.4623759466504316, + "learning_rate": 1.2662487782425452e-05, + "loss": 0.5505, + "step": 24989 + }, + { + "epoch": 0.4318150400884711, + "grad_norm": 0.9631306755927203, + "learning_rate": 1.266194832963911e-05, + "loss": 0.4853, + "step": 24990 + }, + { + "epoch": 0.43183231960188, + "grad_norm": 0.8309868159143902, + "learning_rate": 1.2661408868515342e-05, + "loss": 0.3906, + "step": 24991 + }, + { + "epoch": 0.4318495991152889, + "grad_norm": 0.8535403994608711, + "learning_rate": 1.2660869399055842e-05, + "loss": 0.5475, + "step": 24992 + }, + { + "epoch": 0.4318668786286978, + "grad_norm": 0.8092330570638292, + "learning_rate": 1.2660329921262292e-05, + "loss": 0.317, + "step": 24993 + }, + { + "epoch": 0.43188415814210673, + "grad_norm": 0.8221636609394758, + "learning_rate": 1.265979043513639e-05, + "loss": 0.3153, + "step": 24994 + }, + { + "epoch": 0.43190143765551564, + "grad_norm": 1.7281296437016322, + "learning_rate": 1.2659250940679821e-05, + "loss": 0.4273, + "step": 24995 + }, + { + "epoch": 0.43191871716892455, + "grad_norm": 0.9371008310778257, + "learning_rate": 1.2658711437894278e-05, + "loss": 0.5423, + "step": 24996 + }, + { + "epoch": 0.4319359966823334, + "grad_norm": 0.96601999272891, + "learning_rate": 1.2658171926781449e-05, + "loss": 0.4016, + "step": 24997 + }, + { + "epoch": 0.4319532761957423, + "grad_norm": 0.9095945437201354, + "learning_rate": 1.2657632407343021e-05, + "loss": 0.4947, + "step": 24998 + }, + { + "epoch": 0.4319705557091512, + "grad_norm": 1.4177479351885436, + "learning_rate": 1.2657092879580686e-05, + "loss": 0.3422, + "step": 24999 + }, + { + "epoch": 0.4319878352225601, + "grad_norm": 1.2143126832060203, + "learning_rate": 1.2656553343496136e-05, + "loss": 0.6166, + "step": 25000 + }, + { + "epoch": 0.43200511473596903, + "grad_norm": 1.0618159193690417, + "learning_rate": 1.2656013799091058e-05, + "loss": 0.646, + "step": 25001 + }, + { + "epoch": 0.43202239424937794, + "grad_norm": 0.9057490438415492, + "learning_rate": 1.265547424636714e-05, + "loss": 0.4275, + "step": 25002 + }, + { + "epoch": 0.43203967376278685, + "grad_norm": 1.145763340630986, + "learning_rate": 1.2654934685326082e-05, + "loss": 0.501, + "step": 25003 + }, + { + "epoch": 0.43205695327619575, + "grad_norm": 1.144641977142885, + "learning_rate": 1.2654395115969563e-05, + "loss": 0.7148, + "step": 25004 + }, + { + "epoch": 0.43207423278960466, + "grad_norm": 0.9198182014540176, + "learning_rate": 1.2653855538299273e-05, + "loss": 0.3799, + "step": 25005 + }, + { + "epoch": 0.43209151230301357, + "grad_norm": 0.9082389753182877, + "learning_rate": 1.2653315952316914e-05, + "loss": 0.4267, + "step": 25006 + }, + { + "epoch": 0.4321087918164225, + "grad_norm": 1.3652204594987638, + "learning_rate": 1.2652776358024164e-05, + "loss": 0.2903, + "step": 25007 + }, + { + "epoch": 0.43212607132983133, + "grad_norm": 0.570533015147594, + "learning_rate": 1.2652236755422713e-05, + "loss": 0.4523, + "step": 25008 + }, + { + "epoch": 0.43214335084324024, + "grad_norm": 0.8683781923580818, + "learning_rate": 1.265169714451426e-05, + "loss": 0.4178, + "step": 25009 + }, + { + "epoch": 0.43216063035664914, + "grad_norm": 1.3570635221660792, + "learning_rate": 1.2651157525300488e-05, + "loss": 0.5621, + "step": 25010 + }, + { + "epoch": 0.43217790987005805, + "grad_norm": 0.994189771711447, + "learning_rate": 1.2650617897783086e-05, + "loss": 0.5393, + "step": 25011 + }, + { + "epoch": 0.43219518938346696, + "grad_norm": 0.7610807257645568, + "learning_rate": 1.2650078261963752e-05, + "loss": 0.4549, + "step": 25012 + }, + { + "epoch": 0.43221246889687587, + "grad_norm": 0.9046458538439037, + "learning_rate": 1.264953861784417e-05, + "loss": 0.539, + "step": 25013 + }, + { + "epoch": 0.4322297484102848, + "grad_norm": 1.1052740404727952, + "learning_rate": 1.2648998965426032e-05, + "loss": 0.4473, + "step": 25014 + }, + { + "epoch": 0.4322470279236937, + "grad_norm": 1.2162796911998226, + "learning_rate": 1.2648459304711026e-05, + "loss": 0.3682, + "step": 25015 + }, + { + "epoch": 0.4322643074371026, + "grad_norm": 0.9166467796151189, + "learning_rate": 1.2647919635700848e-05, + "loss": 0.5073, + "step": 25016 + }, + { + "epoch": 0.4322815869505115, + "grad_norm": 0.6980123245262052, + "learning_rate": 1.2647379958397181e-05, + "loss": 0.2571, + "step": 25017 + }, + { + "epoch": 0.43229886646392035, + "grad_norm": 1.2158701352171954, + "learning_rate": 1.2646840272801721e-05, + "loss": 0.5298, + "step": 25018 + }, + { + "epoch": 0.43231614597732926, + "grad_norm": 0.6546857521421972, + "learning_rate": 1.2646300578916153e-05, + "loss": 0.3687, + "step": 25019 + }, + { + "epoch": 0.43233342549073817, + "grad_norm": 0.9254408936926529, + "learning_rate": 1.2645760876742173e-05, + "loss": 0.5002, + "step": 25020 + }, + { + "epoch": 0.4323507050041471, + "grad_norm": 1.019630923433768, + "learning_rate": 1.264522116628147e-05, + "loss": 0.566, + "step": 25021 + }, + { + "epoch": 0.432367984517556, + "grad_norm": 1.0994227826809426, + "learning_rate": 1.264468144753573e-05, + "loss": 0.4902, + "step": 25022 + }, + { + "epoch": 0.4323852640309649, + "grad_norm": 0.6011428857977557, + "learning_rate": 1.2644141720506649e-05, + "loss": 0.7939, + "step": 25023 + }, + { + "epoch": 0.4324025435443738, + "grad_norm": 0.9534574834521529, + "learning_rate": 1.2643601985195915e-05, + "loss": 0.4584, + "step": 25024 + }, + { + "epoch": 0.4324198230577827, + "grad_norm": 0.7110009619913708, + "learning_rate": 1.2643062241605217e-05, + "loss": 0.2724, + "step": 25025 + }, + { + "epoch": 0.4324371025711916, + "grad_norm": 0.9064372354497662, + "learning_rate": 1.2642522489736249e-05, + "loss": 0.42, + "step": 25026 + }, + { + "epoch": 0.4324543820846005, + "grad_norm": 1.2020883824196953, + "learning_rate": 1.26419827295907e-05, + "loss": 0.3935, + "step": 25027 + }, + { + "epoch": 0.4324716615980094, + "grad_norm": 0.6254662551718877, + "learning_rate": 1.264144296117026e-05, + "loss": 0.6873, + "step": 25028 + }, + { + "epoch": 0.4324889411114183, + "grad_norm": 0.964373395360244, + "learning_rate": 1.2640903184476618e-05, + "loss": 0.4932, + "step": 25029 + }, + { + "epoch": 0.4325062206248272, + "grad_norm": 1.061644159529788, + "learning_rate": 1.2640363399511469e-05, + "loss": 0.3708, + "step": 25030 + }, + { + "epoch": 0.4325235001382361, + "grad_norm": 0.7472768081393317, + "learning_rate": 1.26398236062765e-05, + "loss": 0.3311, + "step": 25031 + }, + { + "epoch": 0.432540779651645, + "grad_norm": 0.8078422979923514, + "learning_rate": 1.2639283804773404e-05, + "loss": 0.4058, + "step": 25032 + }, + { + "epoch": 0.4325580591650539, + "grad_norm": 1.2205670496699979, + "learning_rate": 1.2638743995003868e-05, + "loss": 0.3953, + "step": 25033 + }, + { + "epoch": 0.4325753386784628, + "grad_norm": 0.8746077283008001, + "learning_rate": 1.2638204176969587e-05, + "loss": 0.4866, + "step": 25034 + }, + { + "epoch": 0.4325926181918717, + "grad_norm": 0.819108123621673, + "learning_rate": 1.263766435067225e-05, + "loss": 0.4631, + "step": 25035 + }, + { + "epoch": 0.43260989770528063, + "grad_norm": 0.9364444213955633, + "learning_rate": 1.2637124516113548e-05, + "loss": 0.3459, + "step": 25036 + }, + { + "epoch": 0.43262717721868954, + "grad_norm": 1.798346695644624, + "learning_rate": 1.2636584673295172e-05, + "loss": 0.4029, + "step": 25037 + }, + { + "epoch": 0.43264445673209845, + "grad_norm": 1.047382674173817, + "learning_rate": 1.2636044822218811e-05, + "loss": 0.5372, + "step": 25038 + }, + { + "epoch": 0.4326617362455073, + "grad_norm": 0.9899655445746603, + "learning_rate": 1.2635504962886155e-05, + "loss": 0.383, + "step": 25039 + }, + { + "epoch": 0.4326790157589162, + "grad_norm": 0.742253062190408, + "learning_rate": 1.2634965095298904e-05, + "loss": 0.3021, + "step": 25040 + }, + { + "epoch": 0.4326962952723251, + "grad_norm": 0.798646727350248, + "learning_rate": 1.2634425219458738e-05, + "loss": 0.5217, + "step": 25041 + }, + { + "epoch": 0.432713574785734, + "grad_norm": 0.8457684466088705, + "learning_rate": 1.2633885335367349e-05, + "loss": 0.3623, + "step": 25042 + }, + { + "epoch": 0.43273085429914293, + "grad_norm": 0.9479154245048308, + "learning_rate": 1.2633345443026434e-05, + "loss": 0.4017, + "step": 25043 + }, + { + "epoch": 0.43274813381255184, + "grad_norm": 0.8923437083786478, + "learning_rate": 1.2632805542437683e-05, + "loss": 0.4125, + "step": 25044 + }, + { + "epoch": 0.43276541332596075, + "grad_norm": 0.7847937638203085, + "learning_rate": 1.2632265633602781e-05, + "loss": 0.5574, + "step": 25045 + }, + { + "epoch": 0.43278269283936965, + "grad_norm": 1.3041864529568854, + "learning_rate": 1.2631725716523423e-05, + "loss": 0.4713, + "step": 25046 + }, + { + "epoch": 0.43279997235277856, + "grad_norm": 0.7396239775404896, + "learning_rate": 1.2631185791201308e-05, + "loss": 0.3309, + "step": 25047 + }, + { + "epoch": 0.43281725186618747, + "grad_norm": 0.8065433327918549, + "learning_rate": 1.263064585763811e-05, + "loss": 0.4257, + "step": 25048 + }, + { + "epoch": 0.4328345313795964, + "grad_norm": 1.1534750840526082, + "learning_rate": 1.2630105915835533e-05, + "loss": 0.4581, + "step": 25049 + }, + { + "epoch": 0.43285181089300523, + "grad_norm": 0.788949557903616, + "learning_rate": 1.2629565965795265e-05, + "loss": 0.5176, + "step": 25050 + }, + { + "epoch": 0.43286909040641414, + "grad_norm": 1.3182336171242053, + "learning_rate": 1.2629026007518995e-05, + "loss": 0.4619, + "step": 25051 + }, + { + "epoch": 0.43288636991982304, + "grad_norm": 0.8611997400596492, + "learning_rate": 1.2628486041008417e-05, + "loss": 0.5221, + "step": 25052 + }, + { + "epoch": 0.43290364943323195, + "grad_norm": 0.98828257758414, + "learning_rate": 1.262794606626522e-05, + "loss": 0.4713, + "step": 25053 + }, + { + "epoch": 0.43292092894664086, + "grad_norm": 1.3900611624941068, + "learning_rate": 1.2627406083291094e-05, + "loss": 0.3148, + "step": 25054 + }, + { + "epoch": 0.43293820846004977, + "grad_norm": 0.9305778148839348, + "learning_rate": 1.2626866092087734e-05, + "loss": 0.4867, + "step": 25055 + }, + { + "epoch": 0.4329554879734587, + "grad_norm": 1.6584091015870124, + "learning_rate": 1.2626326092656833e-05, + "loss": 0.4522, + "step": 25056 + }, + { + "epoch": 0.4329727674868676, + "grad_norm": 1.27930354647477, + "learning_rate": 1.2625786085000073e-05, + "loss": 0.4184, + "step": 25057 + }, + { + "epoch": 0.4329900470002765, + "grad_norm": 1.4476075193331799, + "learning_rate": 1.2625246069119159e-05, + "loss": 0.6305, + "step": 25058 + }, + { + "epoch": 0.4330073265136854, + "grad_norm": 0.9360453033457525, + "learning_rate": 1.2624706045015769e-05, + "loss": 0.4368, + "step": 25059 + }, + { + "epoch": 0.43302460602709425, + "grad_norm": 0.7682700964714437, + "learning_rate": 1.2624166012691603e-05, + "loss": 0.3638, + "step": 25060 + }, + { + "epoch": 0.43304188554050316, + "grad_norm": 2.435934290834329, + "learning_rate": 1.262362597214835e-05, + "loss": 0.6291, + "step": 25061 + }, + { + "epoch": 0.43305916505391207, + "grad_norm": 0.8436154777660275, + "learning_rate": 1.2623085923387697e-05, + "loss": 0.4832, + "step": 25062 + }, + { + "epoch": 0.433076444567321, + "grad_norm": 1.255107023258677, + "learning_rate": 1.2622545866411345e-05, + "loss": 0.4135, + "step": 25063 + }, + { + "epoch": 0.4330937240807299, + "grad_norm": 0.6850448988631008, + "learning_rate": 1.2622005801220976e-05, + "loss": 0.4663, + "step": 25064 + }, + { + "epoch": 0.4331110035941388, + "grad_norm": 1.0702791920520296, + "learning_rate": 1.2621465727818285e-05, + "loss": 0.2826, + "step": 25065 + }, + { + "epoch": 0.4331282831075477, + "grad_norm": 0.9095871807292017, + "learning_rate": 1.2620925646204967e-05, + "loss": 0.5828, + "step": 25066 + }, + { + "epoch": 0.4331455626209566, + "grad_norm": 0.8995992336024234, + "learning_rate": 1.262038555638271e-05, + "loss": 0.6025, + "step": 25067 + }, + { + "epoch": 0.4331628421343655, + "grad_norm": 0.611081625169547, + "learning_rate": 1.2619845458353206e-05, + "loss": 0.4107, + "step": 25068 + }, + { + "epoch": 0.4331801216477744, + "grad_norm": 0.621741392976874, + "learning_rate": 1.2619305352118147e-05, + "loss": 0.5537, + "step": 25069 + }, + { + "epoch": 0.4331974011611833, + "grad_norm": 1.2577903256258502, + "learning_rate": 1.2618765237679225e-05, + "loss": 0.643, + "step": 25070 + }, + { + "epoch": 0.4332146806745922, + "grad_norm": 1.1048671562420194, + "learning_rate": 1.261822511503813e-05, + "loss": 0.4627, + "step": 25071 + }, + { + "epoch": 0.4332319601880011, + "grad_norm": 1.3276167313192868, + "learning_rate": 1.2617684984196556e-05, + "loss": 0.5425, + "step": 25072 + }, + { + "epoch": 0.43324923970141, + "grad_norm": 1.4213731831883714, + "learning_rate": 1.2617144845156195e-05, + "loss": 0.3821, + "step": 25073 + }, + { + "epoch": 0.4332665192148189, + "grad_norm": 0.44172808554000353, + "learning_rate": 1.2616604697918733e-05, + "loss": 0.5152, + "step": 25074 + }, + { + "epoch": 0.4332837987282278, + "grad_norm": 1.0705229709922557, + "learning_rate": 1.261606454248587e-05, + "loss": 0.4505, + "step": 25075 + }, + { + "epoch": 0.4333010782416367, + "grad_norm": 1.4061673746313292, + "learning_rate": 1.2615524378859294e-05, + "loss": 0.3022, + "step": 25076 + }, + { + "epoch": 0.4333183577550456, + "grad_norm": 0.8987024672778084, + "learning_rate": 1.2614984207040695e-05, + "loss": 0.3764, + "step": 25077 + }, + { + "epoch": 0.43333563726845453, + "grad_norm": 0.8919737083615294, + "learning_rate": 1.2614444027031768e-05, + "loss": 0.5826, + "step": 25078 + }, + { + "epoch": 0.43335291678186344, + "grad_norm": 1.0935341570023158, + "learning_rate": 1.2613903838834203e-05, + "loss": 0.5152, + "step": 25079 + }, + { + "epoch": 0.43337019629527235, + "grad_norm": 1.829219878219651, + "learning_rate": 1.2613363642449693e-05, + "loss": 0.4932, + "step": 25080 + }, + { + "epoch": 0.43338747580868126, + "grad_norm": 1.4050028585834367, + "learning_rate": 1.261282343787993e-05, + "loss": 0.4198, + "step": 25081 + }, + { + "epoch": 0.4334047553220901, + "grad_norm": 1.47719938935383, + "learning_rate": 1.2612283225126602e-05, + "loss": 0.4849, + "step": 25082 + }, + { + "epoch": 0.433422034835499, + "grad_norm": 1.2001996186559725, + "learning_rate": 1.261174300419141e-05, + "loss": 0.4556, + "step": 25083 + }, + { + "epoch": 0.4334393143489079, + "grad_norm": 0.6867398233322379, + "learning_rate": 1.2611202775076037e-05, + "loss": 0.5185, + "step": 25084 + }, + { + "epoch": 0.43345659386231683, + "grad_norm": 1.2041659587833153, + "learning_rate": 1.2610662537782178e-05, + "loss": 0.5042, + "step": 25085 + }, + { + "epoch": 0.43347387337572574, + "grad_norm": 0.7521698875682675, + "learning_rate": 1.2610122292311529e-05, + "loss": 0.3685, + "step": 25086 + }, + { + "epoch": 0.43349115288913465, + "grad_norm": 0.9687482275313016, + "learning_rate": 1.2609582038665774e-05, + "loss": 0.2671, + "step": 25087 + }, + { + "epoch": 0.43350843240254355, + "grad_norm": 1.4933919856562796, + "learning_rate": 1.2609041776846613e-05, + "loss": 0.4637, + "step": 25088 + }, + { + "epoch": 0.43352571191595246, + "grad_norm": 1.4452095438193115, + "learning_rate": 1.2608501506855734e-05, + "loss": 0.4765, + "step": 25089 + }, + { + "epoch": 0.43354299142936137, + "grad_norm": 0.9431104705767734, + "learning_rate": 1.2607961228694832e-05, + "loss": 0.4322, + "step": 25090 + }, + { + "epoch": 0.4335602709427703, + "grad_norm": 0.8750111935434428, + "learning_rate": 1.2607420942365591e-05, + "loss": 0.5303, + "step": 25091 + }, + { + "epoch": 0.43357755045617913, + "grad_norm": 1.0661307966621671, + "learning_rate": 1.2606880647869714e-05, + "loss": 0.418, + "step": 25092 + }, + { + "epoch": 0.43359482996958804, + "grad_norm": 1.3410522310258208, + "learning_rate": 1.260634034520889e-05, + "loss": 0.4661, + "step": 25093 + }, + { + "epoch": 0.43361210948299694, + "grad_norm": 0.7480996505065219, + "learning_rate": 1.2605800034384808e-05, + "loss": 0.393, + "step": 25094 + }, + { + "epoch": 0.43362938899640585, + "grad_norm": 0.9468626063176138, + "learning_rate": 1.260525971539916e-05, + "loss": 0.4409, + "step": 25095 + }, + { + "epoch": 0.43364666850981476, + "grad_norm": 0.529967537578703, + "learning_rate": 1.2604719388253645e-05, + "loss": 0.5341, + "step": 25096 + }, + { + "epoch": 0.43366394802322367, + "grad_norm": 0.9564879496919121, + "learning_rate": 1.2604179052949947e-05, + "loss": 0.563, + "step": 25097 + }, + { + "epoch": 0.4336812275366326, + "grad_norm": 1.1193474205846878, + "learning_rate": 1.2603638709489765e-05, + "loss": 0.5352, + "step": 25098 + }, + { + "epoch": 0.4336985070500415, + "grad_norm": 1.1184055810209466, + "learning_rate": 1.260309835787479e-05, + "loss": 0.4116, + "step": 25099 + }, + { + "epoch": 0.4337157865634504, + "grad_norm": 0.7264634243780432, + "learning_rate": 1.260255799810671e-05, + "loss": 0.9368, + "step": 25100 + }, + { + "epoch": 0.4337330660768593, + "grad_norm": 1.1272323147325167, + "learning_rate": 1.2602017630187224e-05, + "loss": 0.5519, + "step": 25101 + }, + { + "epoch": 0.4337503455902682, + "grad_norm": 0.8569623570409063, + "learning_rate": 1.2601477254118017e-05, + "loss": 0.4607, + "step": 25102 + }, + { + "epoch": 0.43376762510367706, + "grad_norm": 0.9571739822316412, + "learning_rate": 1.2600936869900785e-05, + "loss": 0.4117, + "step": 25103 + }, + { + "epoch": 0.43378490461708596, + "grad_norm": 1.1821577311796136, + "learning_rate": 1.2600396477537226e-05, + "loss": 0.3884, + "step": 25104 + }, + { + "epoch": 0.4338021841304949, + "grad_norm": 0.5471904417774421, + "learning_rate": 1.2599856077029023e-05, + "loss": 0.8923, + "step": 25105 + }, + { + "epoch": 0.4338194636439038, + "grad_norm": 1.130476536757089, + "learning_rate": 1.2599315668377877e-05, + "loss": 0.6168, + "step": 25106 + }, + { + "epoch": 0.4338367431573127, + "grad_norm": 0.4182919030793066, + "learning_rate": 1.2598775251585474e-05, + "loss": 0.5599, + "step": 25107 + }, + { + "epoch": 0.4338540226707216, + "grad_norm": 1.012960770700182, + "learning_rate": 1.2598234826653508e-05, + "loss": 0.5569, + "step": 25108 + }, + { + "epoch": 0.4338713021841305, + "grad_norm": 0.8148067651361768, + "learning_rate": 1.259769439358368e-05, + "loss": 0.3643, + "step": 25109 + }, + { + "epoch": 0.4338885816975394, + "grad_norm": 1.1415822195146716, + "learning_rate": 1.259715395237767e-05, + "loss": 0.5372, + "step": 25110 + }, + { + "epoch": 0.4339058612109483, + "grad_norm": 0.8396002798870403, + "learning_rate": 1.2596613503037176e-05, + "loss": 0.3973, + "step": 25111 + }, + { + "epoch": 0.4339231407243572, + "grad_norm": 0.5340235530844298, + "learning_rate": 1.2596073045563892e-05, + "loss": 0.8277, + "step": 25112 + }, + { + "epoch": 0.4339404202377661, + "grad_norm": 0.6253517882001837, + "learning_rate": 1.2595532579959513e-05, + "loss": 0.3217, + "step": 25113 + }, + { + "epoch": 0.433957699751175, + "grad_norm": 0.7771702253463258, + "learning_rate": 1.2594992106225723e-05, + "loss": 0.3554, + "step": 25114 + }, + { + "epoch": 0.4339749792645839, + "grad_norm": 0.8534571470903187, + "learning_rate": 1.2594451624364225e-05, + "loss": 0.5267, + "step": 25115 + }, + { + "epoch": 0.4339922587779928, + "grad_norm": 0.9061347003078414, + "learning_rate": 1.2593911134376708e-05, + "loss": 0.3829, + "step": 25116 + }, + { + "epoch": 0.4340095382914017, + "grad_norm": 0.8491920556603172, + "learning_rate": 1.2593370636264858e-05, + "loss": 0.4747, + "step": 25117 + }, + { + "epoch": 0.4340268178048106, + "grad_norm": 0.45037485333199717, + "learning_rate": 1.259283013003038e-05, + "loss": 0.7735, + "step": 25118 + }, + { + "epoch": 0.4340440973182195, + "grad_norm": 1.2282239079278152, + "learning_rate": 1.2592289615674962e-05, + "loss": 0.4966, + "step": 25119 + }, + { + "epoch": 0.43406137683162843, + "grad_norm": 0.6869528761103779, + "learning_rate": 1.2591749093200295e-05, + "loss": 0.5214, + "step": 25120 + }, + { + "epoch": 0.43407865634503734, + "grad_norm": 0.7223212778519731, + "learning_rate": 1.259120856260807e-05, + "loss": 0.3767, + "step": 25121 + }, + { + "epoch": 0.43409593585844625, + "grad_norm": 0.8517561951834528, + "learning_rate": 1.2590668023899982e-05, + "loss": 0.4568, + "step": 25122 + }, + { + "epoch": 0.43411321537185515, + "grad_norm": 0.9025599424479627, + "learning_rate": 1.2590127477077727e-05, + "loss": 0.4519, + "step": 25123 + }, + { + "epoch": 0.434130494885264, + "grad_norm": 1.0892718116228857, + "learning_rate": 1.2589586922142996e-05, + "loss": 0.6513, + "step": 25124 + }, + { + "epoch": 0.4341477743986729, + "grad_norm": 0.8883212007816216, + "learning_rate": 1.258904635909748e-05, + "loss": 0.526, + "step": 25125 + }, + { + "epoch": 0.4341650539120818, + "grad_norm": 1.132155474287842, + "learning_rate": 1.2588505787942877e-05, + "loss": 0.3317, + "step": 25126 + }, + { + "epoch": 0.43418233342549073, + "grad_norm": 0.9636001522301334, + "learning_rate": 1.2587965208680876e-05, + "loss": 0.3674, + "step": 25127 + }, + { + "epoch": 0.43419961293889964, + "grad_norm": 0.5187708280014465, + "learning_rate": 1.2587424621313171e-05, + "loss": 0.6805, + "step": 25128 + }, + { + "epoch": 0.43421689245230854, + "grad_norm": 0.9782148119874076, + "learning_rate": 1.2586884025841455e-05, + "loss": 0.5807, + "step": 25129 + }, + { + "epoch": 0.43423417196571745, + "grad_norm": 0.7392820766111687, + "learning_rate": 1.2586343422267426e-05, + "loss": 0.3333, + "step": 25130 + }, + { + "epoch": 0.43425145147912636, + "grad_norm": 0.6664603021983849, + "learning_rate": 1.2585802810592766e-05, + "loss": 0.2964, + "step": 25131 + }, + { + "epoch": 0.43426873099253527, + "grad_norm": 1.399791281686372, + "learning_rate": 1.2585262190819182e-05, + "loss": 0.579, + "step": 25132 + }, + { + "epoch": 0.4342860105059442, + "grad_norm": 0.7887982259486346, + "learning_rate": 1.2584721562948357e-05, + "loss": 0.3816, + "step": 25133 + }, + { + "epoch": 0.434303290019353, + "grad_norm": 0.9188741265831973, + "learning_rate": 1.2584180926981984e-05, + "loss": 0.3694, + "step": 25134 + }, + { + "epoch": 0.43432056953276194, + "grad_norm": 0.751777196090232, + "learning_rate": 1.2583640282921763e-05, + "loss": 0.4161, + "step": 25135 + }, + { + "epoch": 0.43433784904617084, + "grad_norm": 0.642683099602453, + "learning_rate": 1.2583099630769387e-05, + "loss": 0.3441, + "step": 25136 + }, + { + "epoch": 0.43435512855957975, + "grad_norm": 0.6144129806853073, + "learning_rate": 1.2582558970526542e-05, + "loss": 0.445, + "step": 25137 + }, + { + "epoch": 0.43437240807298866, + "grad_norm": 0.7282530399605627, + "learning_rate": 1.258201830219493e-05, + "loss": 0.5745, + "step": 25138 + }, + { + "epoch": 0.43438968758639757, + "grad_norm": 1.2492565432763634, + "learning_rate": 1.2581477625776237e-05, + "loss": 0.5369, + "step": 25139 + }, + { + "epoch": 0.4344069670998065, + "grad_norm": 1.0019640016081561, + "learning_rate": 1.2580936941272161e-05, + "loss": 0.458, + "step": 25140 + }, + { + "epoch": 0.4344242466132154, + "grad_norm": 1.0393454050123483, + "learning_rate": 1.2580396248684397e-05, + "loss": 0.6112, + "step": 25141 + }, + { + "epoch": 0.4344415261266243, + "grad_norm": 1.0201383393958343, + "learning_rate": 1.2579855548014633e-05, + "loss": 0.5027, + "step": 25142 + }, + { + "epoch": 0.4344588056400332, + "grad_norm": 0.8933759958274704, + "learning_rate": 1.2579314839264564e-05, + "loss": 0.4162, + "step": 25143 + }, + { + "epoch": 0.4344760851534421, + "grad_norm": 0.8953128852176345, + "learning_rate": 1.2578774122435888e-05, + "loss": 0.4067, + "step": 25144 + }, + { + "epoch": 0.43449336466685096, + "grad_norm": 0.6256790980954001, + "learning_rate": 1.2578233397530295e-05, + "loss": 0.4091, + "step": 25145 + }, + { + "epoch": 0.43451064418025986, + "grad_norm": 1.0019938108668975, + "learning_rate": 1.2577692664549477e-05, + "loss": 0.4419, + "step": 25146 + }, + { + "epoch": 0.43452792369366877, + "grad_norm": 1.0613079946058892, + "learning_rate": 1.2577151923495133e-05, + "loss": 0.369, + "step": 25147 + }, + { + "epoch": 0.4345452032070777, + "grad_norm": 0.503345656462427, + "learning_rate": 1.2576611174368948e-05, + "loss": 0.2267, + "step": 25148 + }, + { + "epoch": 0.4345624827204866, + "grad_norm": 0.6852550381181569, + "learning_rate": 1.2576070417172628e-05, + "loss": 0.414, + "step": 25149 + }, + { + "epoch": 0.4345797622338955, + "grad_norm": 0.7786842619102016, + "learning_rate": 1.2575529651907857e-05, + "loss": 0.3157, + "step": 25150 + }, + { + "epoch": 0.4345970417473044, + "grad_norm": 0.46784313552721696, + "learning_rate": 1.2574988878576328e-05, + "loss": 0.5984, + "step": 25151 + }, + { + "epoch": 0.4346143212607133, + "grad_norm": 0.9037094489663173, + "learning_rate": 1.2574448097179743e-05, + "loss": 0.4354, + "step": 25152 + }, + { + "epoch": 0.4346316007741222, + "grad_norm": 1.710610425548112, + "learning_rate": 1.2573907307719788e-05, + "loss": 0.5694, + "step": 25153 + }, + { + "epoch": 0.4346488802875311, + "grad_norm": 0.6050133244850634, + "learning_rate": 1.2573366510198159e-05, + "loss": 0.1473, + "step": 25154 + }, + { + "epoch": 0.43466615980094003, + "grad_norm": 0.6130377115289828, + "learning_rate": 1.2572825704616554e-05, + "loss": 0.3824, + "step": 25155 + }, + { + "epoch": 0.4346834393143489, + "grad_norm": 0.4181460528056391, + "learning_rate": 1.257228489097666e-05, + "loss": 0.5424, + "step": 25156 + }, + { + "epoch": 0.4347007188277578, + "grad_norm": 0.7915454793231508, + "learning_rate": 1.2571744069280174e-05, + "loss": 0.3415, + "step": 25157 + }, + { + "epoch": 0.4347179983411667, + "grad_norm": 1.309678533903354, + "learning_rate": 1.2571203239528794e-05, + "loss": 0.3947, + "step": 25158 + }, + { + "epoch": 0.4347352778545756, + "grad_norm": 1.3610443493540048, + "learning_rate": 1.2570662401724207e-05, + "loss": 0.4132, + "step": 25159 + }, + { + "epoch": 0.4347525573679845, + "grad_norm": 1.1911025630591205, + "learning_rate": 1.2570121555868108e-05, + "loss": 0.5037, + "step": 25160 + }, + { + "epoch": 0.4347698368813934, + "grad_norm": 0.732073200887143, + "learning_rate": 1.2569580701962198e-05, + "loss": 0.3463, + "step": 25161 + }, + { + "epoch": 0.43478711639480233, + "grad_norm": 1.4687348043924384, + "learning_rate": 1.2569039840008164e-05, + "loss": 0.7933, + "step": 25162 + }, + { + "epoch": 0.43480439590821124, + "grad_norm": 1.1350559250914383, + "learning_rate": 1.25684989700077e-05, + "loss": 0.6691, + "step": 25163 + }, + { + "epoch": 0.43482167542162015, + "grad_norm": 0.9031281839001121, + "learning_rate": 1.2567958091962505e-05, + "loss": 0.4512, + "step": 25164 + }, + { + "epoch": 0.43483895493502905, + "grad_norm": 1.5098708480848873, + "learning_rate": 1.2567417205874266e-05, + "loss": 0.4447, + "step": 25165 + }, + { + "epoch": 0.4348562344484379, + "grad_norm": 1.9277702828977206, + "learning_rate": 1.2566876311744687e-05, + "loss": 0.3252, + "step": 25166 + }, + { + "epoch": 0.4348735139618468, + "grad_norm": 1.1018713120473909, + "learning_rate": 1.2566335409575452e-05, + "loss": 0.5507, + "step": 25167 + }, + { + "epoch": 0.4348907934752557, + "grad_norm": 1.0416288876371813, + "learning_rate": 1.256579449936826e-05, + "loss": 0.4862, + "step": 25168 + }, + { + "epoch": 0.43490807298866463, + "grad_norm": 0.894822100183564, + "learning_rate": 1.2565253581124805e-05, + "loss": 0.6011, + "step": 25169 + }, + { + "epoch": 0.43492535250207354, + "grad_norm": 1.0168802642183412, + "learning_rate": 1.2564712654846782e-05, + "loss": 0.4084, + "step": 25170 + }, + { + "epoch": 0.43494263201548244, + "grad_norm": 0.7509523271772389, + "learning_rate": 1.2564171720535883e-05, + "loss": 0.4784, + "step": 25171 + }, + { + "epoch": 0.43495991152889135, + "grad_norm": 0.8509755940157753, + "learning_rate": 1.2563630778193805e-05, + "loss": 0.466, + "step": 25172 + }, + { + "epoch": 0.43497719104230026, + "grad_norm": 0.4260160530593397, + "learning_rate": 1.2563089827822238e-05, + "loss": 0.7222, + "step": 25173 + }, + { + "epoch": 0.43499447055570917, + "grad_norm": 1.6179732783036669, + "learning_rate": 1.256254886942288e-05, + "loss": 0.4829, + "step": 25174 + }, + { + "epoch": 0.4350117500691181, + "grad_norm": 1.0138016082337857, + "learning_rate": 1.2562007902997424e-05, + "loss": 0.4264, + "step": 25175 + }, + { + "epoch": 0.435029029582527, + "grad_norm": 0.7250284061068833, + "learning_rate": 1.2561466928547565e-05, + "loss": 0.5083, + "step": 25176 + }, + { + "epoch": 0.43504630909593583, + "grad_norm": 0.8904692921521439, + "learning_rate": 1.2560925946074996e-05, + "loss": 0.4051, + "step": 25177 + }, + { + "epoch": 0.43506358860934474, + "grad_norm": 1.1336968679527824, + "learning_rate": 1.2560384955581413e-05, + "loss": 0.5931, + "step": 25178 + }, + { + "epoch": 0.43508086812275365, + "grad_norm": 0.8731794787604656, + "learning_rate": 1.255984395706851e-05, + "loss": 0.3811, + "step": 25179 + }, + { + "epoch": 0.43509814763616256, + "grad_norm": 1.0115701317110835, + "learning_rate": 1.2559302950537979e-05, + "loss": 0.585, + "step": 25180 + }, + { + "epoch": 0.43511542714957147, + "grad_norm": 1.0284762995391163, + "learning_rate": 1.2558761935991521e-05, + "loss": 0.4363, + "step": 25181 + }, + { + "epoch": 0.4351327066629804, + "grad_norm": 0.8624284974242044, + "learning_rate": 1.2558220913430822e-05, + "loss": 0.5412, + "step": 25182 + }, + { + "epoch": 0.4351499861763893, + "grad_norm": 1.3333234161723153, + "learning_rate": 1.2557679882857583e-05, + "loss": 0.3875, + "step": 25183 + }, + { + "epoch": 0.4351672656897982, + "grad_norm": 0.9650868636352062, + "learning_rate": 1.2557138844273495e-05, + "loss": 0.441, + "step": 25184 + }, + { + "epoch": 0.4351845452032071, + "grad_norm": 1.1594363568860138, + "learning_rate": 1.2556597797680255e-05, + "loss": 0.2651, + "step": 25185 + }, + { + "epoch": 0.435201824716616, + "grad_norm": 1.1013950399560202, + "learning_rate": 1.2556056743079554e-05, + "loss": 0.2368, + "step": 25186 + }, + { + "epoch": 0.43521910423002486, + "grad_norm": 0.9434300269393522, + "learning_rate": 1.2555515680473091e-05, + "loss": 0.4301, + "step": 25187 + }, + { + "epoch": 0.43523638374343376, + "grad_norm": 0.8358738129987552, + "learning_rate": 1.2554974609862558e-05, + "loss": 0.3655, + "step": 25188 + }, + { + "epoch": 0.43525366325684267, + "grad_norm": 1.1730765162347712, + "learning_rate": 1.255443353124965e-05, + "loss": 0.3717, + "step": 25189 + }, + { + "epoch": 0.4352709427702516, + "grad_norm": 1.0243669914379028, + "learning_rate": 1.2553892444636064e-05, + "loss": 0.353, + "step": 25190 + }, + { + "epoch": 0.4352882222836605, + "grad_norm": 1.107531154648247, + "learning_rate": 1.2553351350023492e-05, + "loss": 0.6691, + "step": 25191 + }, + { + "epoch": 0.4353055017970694, + "grad_norm": 1.2196241184300267, + "learning_rate": 1.255281024741363e-05, + "loss": 0.4672, + "step": 25192 + }, + { + "epoch": 0.4353227813104783, + "grad_norm": 0.9521215958596148, + "learning_rate": 1.2552269136808173e-05, + "loss": 0.5255, + "step": 25193 + }, + { + "epoch": 0.4353400608238872, + "grad_norm": 1.147602796655047, + "learning_rate": 1.2551728018208812e-05, + "loss": 0.359, + "step": 25194 + }, + { + "epoch": 0.4353573403372961, + "grad_norm": 1.1276123591133616, + "learning_rate": 1.2551186891617246e-05, + "loss": 0.6512, + "step": 25195 + }, + { + "epoch": 0.435374619850705, + "grad_norm": 1.5007926093037167, + "learning_rate": 1.255064575703517e-05, + "loss": 0.5115, + "step": 25196 + }, + { + "epoch": 0.43539189936411393, + "grad_norm": 0.8489771627524185, + "learning_rate": 1.2550104614464276e-05, + "loss": 0.4874, + "step": 25197 + }, + { + "epoch": 0.4354091788775228, + "grad_norm": 1.3481113567688738, + "learning_rate": 1.254956346390626e-05, + "loss": 0.2662, + "step": 25198 + }, + { + "epoch": 0.4354264583909317, + "grad_norm": 0.8895503459243388, + "learning_rate": 1.2549022305362821e-05, + "loss": 0.3919, + "step": 25199 + }, + { + "epoch": 0.4354437379043406, + "grad_norm": 0.7301681027088149, + "learning_rate": 1.2548481138835646e-05, + "loss": 0.2765, + "step": 25200 + }, + { + "epoch": 0.4354610174177495, + "grad_norm": 1.1061932458808192, + "learning_rate": 1.2547939964326438e-05, + "loss": 0.4084, + "step": 25201 + }, + { + "epoch": 0.4354782969311584, + "grad_norm": 1.158671883020356, + "learning_rate": 1.2547398781836888e-05, + "loss": 0.4058, + "step": 25202 + }, + { + "epoch": 0.4354955764445673, + "grad_norm": 0.9506429675516993, + "learning_rate": 1.254685759136869e-05, + "loss": 0.6219, + "step": 25203 + }, + { + "epoch": 0.43551285595797623, + "grad_norm": 0.9769196129279871, + "learning_rate": 1.254631639292354e-05, + "loss": 0.3, + "step": 25204 + }, + { + "epoch": 0.43553013547138514, + "grad_norm": 1.0659564241895394, + "learning_rate": 1.2545775186503136e-05, + "loss": 0.6054, + "step": 25205 + }, + { + "epoch": 0.43554741498479405, + "grad_norm": 0.9953273246094694, + "learning_rate": 1.254523397210917e-05, + "loss": 0.3412, + "step": 25206 + }, + { + "epoch": 0.43556469449820295, + "grad_norm": 1.1635774570763766, + "learning_rate": 1.2544692749743334e-05, + "loss": 0.3245, + "step": 25207 + }, + { + "epoch": 0.4355819740116118, + "grad_norm": 0.8101215623088491, + "learning_rate": 1.254415151940733e-05, + "loss": 0.5876, + "step": 25208 + }, + { + "epoch": 0.4355992535250207, + "grad_norm": 0.9582387975260237, + "learning_rate": 1.2543610281102851e-05, + "loss": 0.5164, + "step": 25209 + }, + { + "epoch": 0.4356165330384296, + "grad_norm": 0.9710091960913433, + "learning_rate": 1.254306903483159e-05, + "loss": 0.5609, + "step": 25210 + }, + { + "epoch": 0.43563381255183853, + "grad_norm": 1.3809563319512388, + "learning_rate": 1.2542527780595242e-05, + "loss": 0.5659, + "step": 25211 + }, + { + "epoch": 0.43565109206524744, + "grad_norm": 0.8081848270210752, + "learning_rate": 1.2541986518395508e-05, + "loss": 0.328, + "step": 25212 + }, + { + "epoch": 0.43566837157865634, + "grad_norm": 0.8062775112594187, + "learning_rate": 1.2541445248234079e-05, + "loss": 0.3519, + "step": 25213 + }, + { + "epoch": 0.43568565109206525, + "grad_norm": 0.6770555738652387, + "learning_rate": 1.2540903970112646e-05, + "loss": 0.6392, + "step": 25214 + }, + { + "epoch": 0.43570293060547416, + "grad_norm": 0.7124603295241156, + "learning_rate": 1.2540362684032909e-05, + "loss": 0.3049, + "step": 25215 + }, + { + "epoch": 0.43572021011888307, + "grad_norm": 0.6536583088603217, + "learning_rate": 1.2539821389996567e-05, + "loss": 0.4382, + "step": 25216 + }, + { + "epoch": 0.435737489632292, + "grad_norm": 0.9096009974569751, + "learning_rate": 1.2539280088005308e-05, + "loss": 0.4929, + "step": 25217 + }, + { + "epoch": 0.4357547691457009, + "grad_norm": 0.524443947203601, + "learning_rate": 1.2538738778060831e-05, + "loss": 0.1758, + "step": 25218 + }, + { + "epoch": 0.43577204865910973, + "grad_norm": 0.9539687668239347, + "learning_rate": 1.2538197460164834e-05, + "loss": 0.3526, + "step": 25219 + }, + { + "epoch": 0.43578932817251864, + "grad_norm": 0.4747572762687909, + "learning_rate": 1.2537656134319005e-05, + "loss": 0.8081, + "step": 25220 + }, + { + "epoch": 0.43580660768592755, + "grad_norm": 0.812364234156166, + "learning_rate": 1.2537114800525047e-05, + "loss": 0.5538, + "step": 25221 + }, + { + "epoch": 0.43582388719933646, + "grad_norm": 0.9331667964885847, + "learning_rate": 1.2536573458784653e-05, + "loss": 0.524, + "step": 25222 + }, + { + "epoch": 0.43584116671274536, + "grad_norm": 0.9622680123358061, + "learning_rate": 1.2536032109099518e-05, + "loss": 0.4275, + "step": 25223 + }, + { + "epoch": 0.4358584462261543, + "grad_norm": 0.9019446030061058, + "learning_rate": 1.2535490751471337e-05, + "loss": 0.4669, + "step": 25224 + }, + { + "epoch": 0.4358757257395632, + "grad_norm": 1.1920937648578869, + "learning_rate": 1.2534949385901807e-05, + "loss": 0.4765, + "step": 25225 + }, + { + "epoch": 0.4358930052529721, + "grad_norm": 0.9649303959368682, + "learning_rate": 1.253440801239262e-05, + "loss": 0.3964, + "step": 25226 + }, + { + "epoch": 0.435910284766381, + "grad_norm": 0.7377912287773799, + "learning_rate": 1.253386663094548e-05, + "loss": 0.5391, + "step": 25227 + }, + { + "epoch": 0.4359275642797899, + "grad_norm": 1.403623953102967, + "learning_rate": 1.2533325241562071e-05, + "loss": 0.5672, + "step": 25228 + }, + { + "epoch": 0.4359448437931988, + "grad_norm": 0.8953197547141, + "learning_rate": 1.2532783844244098e-05, + "loss": 0.5102, + "step": 25229 + }, + { + "epoch": 0.43596212330660766, + "grad_norm": 0.7964994570858269, + "learning_rate": 1.2532242438993255e-05, + "loss": 0.3865, + "step": 25230 + }, + { + "epoch": 0.43597940282001657, + "grad_norm": 1.0454425290361518, + "learning_rate": 1.2531701025811232e-05, + "loss": 0.4123, + "step": 25231 + }, + { + "epoch": 0.4359966823334255, + "grad_norm": 0.9592984438647738, + "learning_rate": 1.2531159604699732e-05, + "loss": 0.4473, + "step": 25232 + }, + { + "epoch": 0.4360139618468344, + "grad_norm": 0.891097055916475, + "learning_rate": 1.2530618175660451e-05, + "loss": 0.4807, + "step": 25233 + }, + { + "epoch": 0.4360312413602433, + "grad_norm": 0.8244990803874962, + "learning_rate": 1.2530076738695077e-05, + "loss": 0.4083, + "step": 25234 + }, + { + "epoch": 0.4360485208736522, + "grad_norm": 0.48483947514542036, + "learning_rate": 1.252953529380531e-05, + "loss": 0.5372, + "step": 25235 + }, + { + "epoch": 0.4360658003870611, + "grad_norm": 1.6392723968687322, + "learning_rate": 1.252899384099285e-05, + "loss": 0.3528, + "step": 25236 + }, + { + "epoch": 0.43608307990047, + "grad_norm": 0.9231130511540129, + "learning_rate": 1.2528452380259384e-05, + "loss": 0.3303, + "step": 25237 + }, + { + "epoch": 0.4361003594138789, + "grad_norm": 0.8598104134652405, + "learning_rate": 1.2527910911606616e-05, + "loss": 0.3816, + "step": 25238 + }, + { + "epoch": 0.43611763892728783, + "grad_norm": 1.0439225399586638, + "learning_rate": 1.2527369435036238e-05, + "loss": 0.5021, + "step": 25239 + }, + { + "epoch": 0.4361349184406967, + "grad_norm": 0.6024716333957928, + "learning_rate": 1.2526827950549947e-05, + "loss": 0.4639, + "step": 25240 + }, + { + "epoch": 0.4361521979541056, + "grad_norm": 0.9385927928829364, + "learning_rate": 1.2526286458149439e-05, + "loss": 0.5454, + "step": 25241 + }, + { + "epoch": 0.4361694774675145, + "grad_norm": 0.8199843547331928, + "learning_rate": 1.252574495783641e-05, + "loss": 0.3342, + "step": 25242 + }, + { + "epoch": 0.4361867569809234, + "grad_norm": 0.7512020531781775, + "learning_rate": 1.2525203449612557e-05, + "loss": 0.5903, + "step": 25243 + }, + { + "epoch": 0.4362040364943323, + "grad_norm": 0.6913281645165161, + "learning_rate": 1.2524661933479574e-05, + "loss": 0.5243, + "step": 25244 + }, + { + "epoch": 0.4362213160077412, + "grad_norm": 1.1466849132460062, + "learning_rate": 1.2524120409439158e-05, + "loss": 0.392, + "step": 25245 + }, + { + "epoch": 0.43623859552115013, + "grad_norm": 1.2229892046062405, + "learning_rate": 1.2523578877493004e-05, + "loss": 0.522, + "step": 25246 + }, + { + "epoch": 0.43625587503455904, + "grad_norm": 0.6733705260499875, + "learning_rate": 1.252303733764281e-05, + "loss": 0.3678, + "step": 25247 + }, + { + "epoch": 0.43627315454796795, + "grad_norm": 1.4117581428839086, + "learning_rate": 1.252249578989027e-05, + "loss": 0.4847, + "step": 25248 + }, + { + "epoch": 0.43629043406137685, + "grad_norm": 1.0106064851560372, + "learning_rate": 1.2521954234237084e-05, + "loss": 0.3611, + "step": 25249 + }, + { + "epoch": 0.43630771357478576, + "grad_norm": 1.163013279783797, + "learning_rate": 1.2521412670684944e-05, + "loss": 0.455, + "step": 25250 + }, + { + "epoch": 0.4363249930881946, + "grad_norm": 1.22346295151921, + "learning_rate": 1.2520871099235547e-05, + "loss": 0.4903, + "step": 25251 + }, + { + "epoch": 0.4363422726016035, + "grad_norm": 1.2742618910756276, + "learning_rate": 1.2520329519890594e-05, + "loss": 0.5069, + "step": 25252 + }, + { + "epoch": 0.43635955211501243, + "grad_norm": 0.6663378330956522, + "learning_rate": 1.2519787932651774e-05, + "loss": 0.5576, + "step": 25253 + }, + { + "epoch": 0.43637683162842134, + "grad_norm": 0.8064458631144337, + "learning_rate": 1.2519246337520787e-05, + "loss": 0.3971, + "step": 25254 + }, + { + "epoch": 0.43639411114183024, + "grad_norm": 0.8321643752386805, + "learning_rate": 1.251870473449933e-05, + "loss": 0.3729, + "step": 25255 + }, + { + "epoch": 0.43641139065523915, + "grad_norm": 1.0218070787373477, + "learning_rate": 1.2518163123589097e-05, + "loss": 0.5819, + "step": 25256 + }, + { + "epoch": 0.43642867016864806, + "grad_norm": 0.7963038762314756, + "learning_rate": 1.2517621504791788e-05, + "loss": 0.2333, + "step": 25257 + }, + { + "epoch": 0.43644594968205697, + "grad_norm": 1.4508979801737796, + "learning_rate": 1.2517079878109095e-05, + "loss": 0.5268, + "step": 25258 + }, + { + "epoch": 0.4364632291954659, + "grad_norm": 0.7331287949773395, + "learning_rate": 1.2516538243542718e-05, + "loss": 0.3838, + "step": 25259 + }, + { + "epoch": 0.4364805087088748, + "grad_norm": 0.4765607702098126, + "learning_rate": 1.2515996601094348e-05, + "loss": 0.6652, + "step": 25260 + }, + { + "epoch": 0.43649778822228363, + "grad_norm": 0.6065811188885843, + "learning_rate": 1.251545495076569e-05, + "loss": 0.7008, + "step": 25261 + }, + { + "epoch": 0.43651506773569254, + "grad_norm": 0.8617504286199277, + "learning_rate": 1.2514913292558438e-05, + "loss": 0.3864, + "step": 25262 + }, + { + "epoch": 0.43653234724910145, + "grad_norm": 1.1704358899994607, + "learning_rate": 1.251437162647428e-05, + "loss": 0.3765, + "step": 25263 + }, + { + "epoch": 0.43654962676251036, + "grad_norm": 0.8937575644295996, + "learning_rate": 1.2513829952514924e-05, + "loss": 0.496, + "step": 25264 + }, + { + "epoch": 0.43656690627591926, + "grad_norm": 1.2034159421507542, + "learning_rate": 1.2513288270682058e-05, + "loss": 0.5739, + "step": 25265 + }, + { + "epoch": 0.43658418578932817, + "grad_norm": 0.6305995262278759, + "learning_rate": 1.2512746580977383e-05, + "loss": 0.3747, + "step": 25266 + }, + { + "epoch": 0.4366014653027371, + "grad_norm": 0.9560486785758618, + "learning_rate": 1.2512204883402597e-05, + "loss": 0.6078, + "step": 25267 + }, + { + "epoch": 0.436618744816146, + "grad_norm": 1.1000084185307546, + "learning_rate": 1.2511663177959392e-05, + "loss": 0.4237, + "step": 25268 + }, + { + "epoch": 0.4366360243295549, + "grad_norm": 0.7649895252943019, + "learning_rate": 1.2511121464649468e-05, + "loss": 0.5386, + "step": 25269 + }, + { + "epoch": 0.4366533038429638, + "grad_norm": 0.6799871278933267, + "learning_rate": 1.251057974347452e-05, + "loss": 0.4896, + "step": 25270 + }, + { + "epoch": 0.4366705833563727, + "grad_norm": 0.6365195308995978, + "learning_rate": 1.2510038014436246e-05, + "loss": 0.3706, + "step": 25271 + }, + { + "epoch": 0.43668786286978156, + "grad_norm": 0.8739834435786422, + "learning_rate": 1.2509496277536342e-05, + "loss": 0.3918, + "step": 25272 + }, + { + "epoch": 0.43670514238319047, + "grad_norm": 0.9324455030265755, + "learning_rate": 1.2508954532776505e-05, + "loss": 0.343, + "step": 25273 + }, + { + "epoch": 0.4367224218965994, + "grad_norm": 0.8678858405732219, + "learning_rate": 1.2508412780158434e-05, + "loss": 0.5681, + "step": 25274 + }, + { + "epoch": 0.4367397014100083, + "grad_norm": 1.010842447901733, + "learning_rate": 1.2507871019683821e-05, + "loss": 0.4574, + "step": 25275 + }, + { + "epoch": 0.4367569809234172, + "grad_norm": 0.7372398723343923, + "learning_rate": 1.2507329251354368e-05, + "loss": 0.3602, + "step": 25276 + }, + { + "epoch": 0.4367742604368261, + "grad_norm": 1.1324883989859893, + "learning_rate": 1.2506787475171765e-05, + "loss": 0.4543, + "step": 25277 + }, + { + "epoch": 0.436791539950235, + "grad_norm": 1.4000935344116237, + "learning_rate": 1.2506245691137714e-05, + "loss": 0.4604, + "step": 25278 + }, + { + "epoch": 0.4368088194636439, + "grad_norm": 1.6847466050814461, + "learning_rate": 1.2505703899253914e-05, + "loss": 0.4597, + "step": 25279 + }, + { + "epoch": 0.4368260989770528, + "grad_norm": 1.5590499991579725, + "learning_rate": 1.2505162099522054e-05, + "loss": 0.7033, + "step": 25280 + }, + { + "epoch": 0.43684337849046173, + "grad_norm": 0.8304043288228614, + "learning_rate": 1.2504620291943841e-05, + "loss": 0.3641, + "step": 25281 + }, + { + "epoch": 0.4368606580038706, + "grad_norm": 0.9591085779804926, + "learning_rate": 1.2504078476520967e-05, + "loss": 0.3997, + "step": 25282 + }, + { + "epoch": 0.4368779375172795, + "grad_norm": 0.824238819568293, + "learning_rate": 1.2503536653255124e-05, + "loss": 0.3512, + "step": 25283 + }, + { + "epoch": 0.4368952170306884, + "grad_norm": 0.401726950058077, + "learning_rate": 1.250299482214802e-05, + "loss": 0.4534, + "step": 25284 + }, + { + "epoch": 0.4369124965440973, + "grad_norm": 0.8099733217268394, + "learning_rate": 1.250245298320134e-05, + "loss": 0.47, + "step": 25285 + }, + { + "epoch": 0.4369297760575062, + "grad_norm": 1.0258206556210907, + "learning_rate": 1.250191113641679e-05, + "loss": 0.4348, + "step": 25286 + }, + { + "epoch": 0.4369470555709151, + "grad_norm": 0.9056736332258776, + "learning_rate": 1.2501369281796065e-05, + "loss": 0.3874, + "step": 25287 + }, + { + "epoch": 0.43696433508432403, + "grad_norm": 0.8858565670318072, + "learning_rate": 1.250082741934086e-05, + "loss": 0.3719, + "step": 25288 + }, + { + "epoch": 0.43698161459773294, + "grad_norm": 1.3283175983140907, + "learning_rate": 1.2500285549052876e-05, + "loss": 0.5665, + "step": 25289 + }, + { + "epoch": 0.43699889411114184, + "grad_norm": 1.0076693413541304, + "learning_rate": 1.2499743670933804e-05, + "loss": 0.3048, + "step": 25290 + }, + { + "epoch": 0.43701617362455075, + "grad_norm": 1.2952948460517943, + "learning_rate": 1.2499201784985344e-05, + "loss": 0.4362, + "step": 25291 + }, + { + "epoch": 0.43703345313795966, + "grad_norm": 1.0399912848934363, + "learning_rate": 1.2498659891209197e-05, + "loss": 0.5008, + "step": 25292 + }, + { + "epoch": 0.4370507326513685, + "grad_norm": 0.8226568951787342, + "learning_rate": 1.2498117989607057e-05, + "loss": 0.5651, + "step": 25293 + }, + { + "epoch": 0.4370680121647774, + "grad_norm": 0.9745311844589307, + "learning_rate": 1.2497576080180623e-05, + "loss": 0.5124, + "step": 25294 + }, + { + "epoch": 0.4370852916781863, + "grad_norm": 0.6592880374127992, + "learning_rate": 1.2497034162931589e-05, + "loss": 0.5183, + "step": 25295 + }, + { + "epoch": 0.43710257119159523, + "grad_norm": 0.41856467801001646, + "learning_rate": 1.2496492237861656e-05, + "loss": 0.7888, + "step": 25296 + }, + { + "epoch": 0.43711985070500414, + "grad_norm": 1.2247753851004082, + "learning_rate": 1.2495950304972517e-05, + "loss": 0.2749, + "step": 25297 + }, + { + "epoch": 0.43713713021841305, + "grad_norm": 1.3943611334472616, + "learning_rate": 1.2495408364265873e-05, + "loss": 0.3651, + "step": 25298 + }, + { + "epoch": 0.43715440973182196, + "grad_norm": 0.7302218916632858, + "learning_rate": 1.249486641574342e-05, + "loss": 0.5473, + "step": 25299 + }, + { + "epoch": 0.43717168924523087, + "grad_norm": 0.8875907636679288, + "learning_rate": 1.2494324459406854e-05, + "loss": 0.5145, + "step": 25300 + }, + { + "epoch": 0.4371889687586398, + "grad_norm": 0.9487600768961088, + "learning_rate": 1.2493782495257879e-05, + "loss": 0.2679, + "step": 25301 + }, + { + "epoch": 0.4372062482720487, + "grad_norm": 0.7967101802362426, + "learning_rate": 1.2493240523298185e-05, + "loss": 0.4354, + "step": 25302 + }, + { + "epoch": 0.4372235277854576, + "grad_norm": 0.9506217556375429, + "learning_rate": 1.2492698543529471e-05, + "loss": 0.5052, + "step": 25303 + }, + { + "epoch": 0.43724080729886644, + "grad_norm": 0.9245403898880397, + "learning_rate": 1.249215655595344e-05, + "loss": 0.4945, + "step": 25304 + }, + { + "epoch": 0.43725808681227535, + "grad_norm": 0.9677990751060261, + "learning_rate": 1.249161456057178e-05, + "loss": 0.3138, + "step": 25305 + }, + { + "epoch": 0.43727536632568426, + "grad_norm": 0.6729813397647654, + "learning_rate": 1.2491072557386197e-05, + "loss": 0.3942, + "step": 25306 + }, + { + "epoch": 0.43729264583909316, + "grad_norm": 0.7093630898631847, + "learning_rate": 1.2490530546398385e-05, + "loss": 0.5106, + "step": 25307 + }, + { + "epoch": 0.43730992535250207, + "grad_norm": 0.6884500503850263, + "learning_rate": 1.248998852761004e-05, + "loss": 0.4811, + "step": 25308 + }, + { + "epoch": 0.437327204865911, + "grad_norm": 0.8325306584958413, + "learning_rate": 1.2489446501022863e-05, + "loss": 0.51, + "step": 25309 + }, + { + "epoch": 0.4373444843793199, + "grad_norm": 1.3251920624030697, + "learning_rate": 1.2488904466638551e-05, + "loss": 0.4594, + "step": 25310 + }, + { + "epoch": 0.4373617638927288, + "grad_norm": 1.5009280088328683, + "learning_rate": 1.24883624244588e-05, + "loss": 0.5104, + "step": 25311 + }, + { + "epoch": 0.4373790434061377, + "grad_norm": 0.8384727833621413, + "learning_rate": 1.2487820374485311e-05, + "loss": 0.3668, + "step": 25312 + }, + { + "epoch": 0.4373963229195466, + "grad_norm": 1.0738324609610403, + "learning_rate": 1.2487278316719778e-05, + "loss": 0.4016, + "step": 25313 + }, + { + "epoch": 0.43741360243295546, + "grad_norm": 0.5199813410589499, + "learning_rate": 1.24867362511639e-05, + "loss": 0.5632, + "step": 25314 + }, + { + "epoch": 0.43743088194636437, + "grad_norm": 0.6521817287953611, + "learning_rate": 1.2486194177819378e-05, + "loss": 0.5885, + "step": 25315 + }, + { + "epoch": 0.4374481614597733, + "grad_norm": 1.1384924498279962, + "learning_rate": 1.2485652096687903e-05, + "loss": 0.3937, + "step": 25316 + }, + { + "epoch": 0.4374654409731822, + "grad_norm": 1.1174055895177462, + "learning_rate": 1.2485110007771177e-05, + "loss": 0.5347, + "step": 25317 + }, + { + "epoch": 0.4374827204865911, + "grad_norm": 0.7494463040698479, + "learning_rate": 1.24845679110709e-05, + "loss": 0.4469, + "step": 25318 + }, + { + "epoch": 0.4375, + "grad_norm": 1.14178204837176, + "learning_rate": 1.2484025806588767e-05, + "loss": 0.4242, + "step": 25319 + }, + { + "epoch": 0.4375172795134089, + "grad_norm": 0.6360401754160171, + "learning_rate": 1.2483483694326473e-05, + "loss": 0.3505, + "step": 25320 + }, + { + "epoch": 0.4375345590268178, + "grad_norm": 0.7704638794566256, + "learning_rate": 1.2482941574285723e-05, + "loss": 0.3748, + "step": 25321 + }, + { + "epoch": 0.4375518385402267, + "grad_norm": 1.1856107637237694, + "learning_rate": 1.2482399446468212e-05, + "loss": 0.5159, + "step": 25322 + }, + { + "epoch": 0.43756911805363563, + "grad_norm": 0.9133466430435666, + "learning_rate": 1.2481857310875634e-05, + "loss": 0.332, + "step": 25323 + }, + { + "epoch": 0.43758639756704454, + "grad_norm": 0.8867109655518883, + "learning_rate": 1.2481315167509691e-05, + "loss": 0.4675, + "step": 25324 + }, + { + "epoch": 0.4376036770804534, + "grad_norm": 1.5140503128701934, + "learning_rate": 1.2480773016372082e-05, + "loss": 0.5166, + "step": 25325 + }, + { + "epoch": 0.4376209565938623, + "grad_norm": 1.3120624391604978, + "learning_rate": 1.2480230857464502e-05, + "loss": 0.4556, + "step": 25326 + }, + { + "epoch": 0.4376382361072712, + "grad_norm": 0.76591130390193, + "learning_rate": 1.2479688690788652e-05, + "loss": 0.3652, + "step": 25327 + }, + { + "epoch": 0.4376555156206801, + "grad_norm": 0.9023759786657833, + "learning_rate": 1.2479146516346226e-05, + "loss": 0.5342, + "step": 25328 + }, + { + "epoch": 0.437672795134089, + "grad_norm": 0.7306977285862785, + "learning_rate": 1.2478604334138926e-05, + "loss": 0.5885, + "step": 25329 + }, + { + "epoch": 0.43769007464749793, + "grad_norm": 0.4150198305583809, + "learning_rate": 1.247806214416845e-05, + "loss": 0.6483, + "step": 25330 + }, + { + "epoch": 0.43770735416090684, + "grad_norm": 0.8957933192959896, + "learning_rate": 1.2477519946436491e-05, + "loss": 0.3815, + "step": 25331 + }, + { + "epoch": 0.43772463367431574, + "grad_norm": 0.9132455479923157, + "learning_rate": 1.2476977740944755e-05, + "loss": 0.3078, + "step": 25332 + }, + { + "epoch": 0.43774191318772465, + "grad_norm": 1.1632083978633694, + "learning_rate": 1.2476435527694937e-05, + "loss": 0.4342, + "step": 25333 + }, + { + "epoch": 0.43775919270113356, + "grad_norm": 0.7756855838083149, + "learning_rate": 1.2475893306688734e-05, + "loss": 0.3412, + "step": 25334 + }, + { + "epoch": 0.4377764722145424, + "grad_norm": 1.064170757498646, + "learning_rate": 1.2475351077927844e-05, + "loss": 0.4782, + "step": 25335 + }, + { + "epoch": 0.4377937517279513, + "grad_norm": 0.9053754738837683, + "learning_rate": 1.247480884141397e-05, + "loss": 0.4654, + "step": 25336 + }, + { + "epoch": 0.4378110312413602, + "grad_norm": 0.857289777860998, + "learning_rate": 1.24742665971488e-05, + "loss": 0.4453, + "step": 25337 + }, + { + "epoch": 0.43782831075476913, + "grad_norm": 1.2410286373564303, + "learning_rate": 1.2473724345134043e-05, + "loss": 0.4102, + "step": 25338 + }, + { + "epoch": 0.43784559026817804, + "grad_norm": 1.193355691650218, + "learning_rate": 1.2473182085371394e-05, + "loss": 0.5328, + "step": 25339 + }, + { + "epoch": 0.43786286978158695, + "grad_norm": 1.0982315744682112, + "learning_rate": 1.2472639817862549e-05, + "loss": 0.3771, + "step": 25340 + }, + { + "epoch": 0.43788014929499586, + "grad_norm": 1.2733984689769162, + "learning_rate": 1.2472097542609207e-05, + "loss": 0.416, + "step": 25341 + }, + { + "epoch": 0.43789742880840477, + "grad_norm": 1.0925143080925648, + "learning_rate": 1.247155525961307e-05, + "loss": 0.4549, + "step": 25342 + }, + { + "epoch": 0.4379147083218137, + "grad_norm": 1.3647331386729844, + "learning_rate": 1.2471012968875832e-05, + "loss": 0.5518, + "step": 25343 + }, + { + "epoch": 0.4379319878352226, + "grad_norm": 0.857998769255812, + "learning_rate": 1.2470470670399198e-05, + "loss": 0.3599, + "step": 25344 + }, + { + "epoch": 0.4379492673486315, + "grad_norm": 1.21167554960617, + "learning_rate": 1.2469928364184857e-05, + "loss": 0.4003, + "step": 25345 + }, + { + "epoch": 0.43796654686204034, + "grad_norm": 1.0984570675312877, + "learning_rate": 1.2469386050234518e-05, + "loss": 0.4964, + "step": 25346 + }, + { + "epoch": 0.43798382637544925, + "grad_norm": 0.8052125370285629, + "learning_rate": 1.2468843728549871e-05, + "loss": 0.3839, + "step": 25347 + }, + { + "epoch": 0.43800110588885816, + "grad_norm": 0.7446051627966749, + "learning_rate": 1.2468301399132616e-05, + "loss": 0.5608, + "step": 25348 + }, + { + "epoch": 0.43801838540226706, + "grad_norm": 1.3168812612337244, + "learning_rate": 1.2467759061984454e-05, + "loss": 0.4042, + "step": 25349 + }, + { + "epoch": 0.43803566491567597, + "grad_norm": 0.816118886622958, + "learning_rate": 1.2467216717107087e-05, + "loss": 0.2675, + "step": 25350 + }, + { + "epoch": 0.4380529444290849, + "grad_norm": 1.4394621156494378, + "learning_rate": 1.2466674364502205e-05, + "loss": 0.4308, + "step": 25351 + }, + { + "epoch": 0.4380702239424938, + "grad_norm": 0.5956254257895992, + "learning_rate": 1.2466132004171514e-05, + "loss": 0.2289, + "step": 25352 + }, + { + "epoch": 0.4380875034559027, + "grad_norm": 1.3899443999177397, + "learning_rate": 1.246558963611671e-05, + "loss": 0.6512, + "step": 25353 + }, + { + "epoch": 0.4381047829693116, + "grad_norm": 1.0091688583181146, + "learning_rate": 1.2465047260339488e-05, + "loss": 0.5887, + "step": 25354 + }, + { + "epoch": 0.4381220624827205, + "grad_norm": 1.6874089932176881, + "learning_rate": 1.2464504876841556e-05, + "loss": 0.4262, + "step": 25355 + }, + { + "epoch": 0.4381393419961294, + "grad_norm": 1.4305551166703518, + "learning_rate": 1.2463962485624608e-05, + "loss": 0.5323, + "step": 25356 + }, + { + "epoch": 0.43815662150953827, + "grad_norm": 1.215744012326037, + "learning_rate": 1.2463420086690339e-05, + "loss": 0.4852, + "step": 25357 + }, + { + "epoch": 0.4381739010229472, + "grad_norm": 1.282477479406933, + "learning_rate": 1.2462877680040454e-05, + "loss": 0.4098, + "step": 25358 + }, + { + "epoch": 0.4381911805363561, + "grad_norm": 1.1864971844668384, + "learning_rate": 1.2462335265676646e-05, + "loss": 0.4088, + "step": 25359 + }, + { + "epoch": 0.438208460049765, + "grad_norm": 0.8299152461436786, + "learning_rate": 1.2461792843600615e-05, + "loss": 0.5018, + "step": 25360 + }, + { + "epoch": 0.4382257395631739, + "grad_norm": 1.0078160410739163, + "learning_rate": 1.2461250413814066e-05, + "loss": 0.209, + "step": 25361 + }, + { + "epoch": 0.4382430190765828, + "grad_norm": 1.4661055145984787, + "learning_rate": 1.2460707976318693e-05, + "loss": 0.4027, + "step": 25362 + }, + { + "epoch": 0.4382602985899917, + "grad_norm": 0.671462278898273, + "learning_rate": 1.2460165531116193e-05, + "loss": 0.5364, + "step": 25363 + }, + { + "epoch": 0.4382775781034006, + "grad_norm": 0.9719022075861677, + "learning_rate": 1.245962307820827e-05, + "loss": 0.6544, + "step": 25364 + }, + { + "epoch": 0.43829485761680953, + "grad_norm": 0.7094456958001253, + "learning_rate": 1.2459080617596619e-05, + "loss": 0.3759, + "step": 25365 + }, + { + "epoch": 0.43831213713021844, + "grad_norm": 1.5945481406168174, + "learning_rate": 1.2458538149282942e-05, + "loss": 0.3666, + "step": 25366 + }, + { + "epoch": 0.4383294166436273, + "grad_norm": 1.1620996548536005, + "learning_rate": 1.2457995673268939e-05, + "loss": 0.4406, + "step": 25367 + }, + { + "epoch": 0.4383466961570362, + "grad_norm": 0.7089771703779341, + "learning_rate": 1.24574531895563e-05, + "loss": 0.413, + "step": 25368 + }, + { + "epoch": 0.4383639756704451, + "grad_norm": 1.489455495028278, + "learning_rate": 1.2456910698146735e-05, + "loss": 0.6005, + "step": 25369 + }, + { + "epoch": 0.438381255183854, + "grad_norm": 1.0278169549934495, + "learning_rate": 1.2456368199041942e-05, + "loss": 0.469, + "step": 25370 + }, + { + "epoch": 0.4383985346972629, + "grad_norm": 0.7531395237436543, + "learning_rate": 1.245582569224361e-05, + "loss": 0.2175, + "step": 25371 + }, + { + "epoch": 0.43841581421067183, + "grad_norm": 0.5399145373192901, + "learning_rate": 1.245528317775345e-05, + "loss": 0.5575, + "step": 25372 + }, + { + "epoch": 0.43843309372408074, + "grad_norm": 0.8643787867148494, + "learning_rate": 1.2454740655573156e-05, + "loss": 0.6702, + "step": 25373 + }, + { + "epoch": 0.43845037323748964, + "grad_norm": 1.1953342255878499, + "learning_rate": 1.2454198125704426e-05, + "loss": 0.5318, + "step": 25374 + }, + { + "epoch": 0.43846765275089855, + "grad_norm": 0.9616406182157324, + "learning_rate": 1.2453655588148963e-05, + "loss": 0.3997, + "step": 25375 + }, + { + "epoch": 0.43848493226430746, + "grad_norm": 1.2824018673739954, + "learning_rate": 1.2453113042908465e-05, + "loss": 0.383, + "step": 25376 + }, + { + "epoch": 0.43850221177771637, + "grad_norm": 0.959920576122637, + "learning_rate": 1.2452570489984628e-05, + "loss": 0.5065, + "step": 25377 + }, + { + "epoch": 0.4385194912911252, + "grad_norm": 0.8389332050453452, + "learning_rate": 1.2452027929379155e-05, + "loss": 0.4106, + "step": 25378 + }, + { + "epoch": 0.4385367708045341, + "grad_norm": 0.4720878916466212, + "learning_rate": 1.2451485361093747e-05, + "loss": 0.8841, + "step": 25379 + }, + { + "epoch": 0.43855405031794303, + "grad_norm": 1.1109983511423611, + "learning_rate": 1.2450942785130095e-05, + "loss": 0.5517, + "step": 25380 + }, + { + "epoch": 0.43857132983135194, + "grad_norm": 0.920281614341108, + "learning_rate": 1.2450400201489907e-05, + "loss": 0.5459, + "step": 25381 + }, + { + "epoch": 0.43858860934476085, + "grad_norm": 0.8630100779213271, + "learning_rate": 1.244985761017488e-05, + "loss": 0.4012, + "step": 25382 + }, + { + "epoch": 0.43860588885816976, + "grad_norm": 0.9813935694215596, + "learning_rate": 1.2449315011186709e-05, + "loss": 0.5347, + "step": 25383 + }, + { + "epoch": 0.43862316837157866, + "grad_norm": 0.5597910584181883, + "learning_rate": 1.2448772404527101e-05, + "loss": 1.048, + "step": 25384 + }, + { + "epoch": 0.43864044788498757, + "grad_norm": 0.9253678645086714, + "learning_rate": 1.2448229790197753e-05, + "loss": 0.5076, + "step": 25385 + }, + { + "epoch": 0.4386577273983965, + "grad_norm": 0.9394235001967046, + "learning_rate": 1.2447687168200357e-05, + "loss": 0.5433, + "step": 25386 + }, + { + "epoch": 0.4386750069118054, + "grad_norm": 1.5272998724406057, + "learning_rate": 1.2447144538536626e-05, + "loss": 0.5872, + "step": 25387 + }, + { + "epoch": 0.43869228642521424, + "grad_norm": 0.5889353780057702, + "learning_rate": 1.2446601901208246e-05, + "loss": 0.6542, + "step": 25388 + }, + { + "epoch": 0.43870956593862315, + "grad_norm": 0.868576303767224, + "learning_rate": 1.2446059256216927e-05, + "loss": 0.4694, + "step": 25389 + }, + { + "epoch": 0.43872684545203205, + "grad_norm": 1.0939879583564311, + "learning_rate": 1.2445516603564365e-05, + "loss": 0.6368, + "step": 25390 + }, + { + "epoch": 0.43874412496544096, + "grad_norm": 1.2414236523923923, + "learning_rate": 1.2444973943252256e-05, + "loss": 0.5141, + "step": 25391 + }, + { + "epoch": 0.43876140447884987, + "grad_norm": 0.8340715354614702, + "learning_rate": 1.2444431275282305e-05, + "loss": 0.4612, + "step": 25392 + }, + { + "epoch": 0.4387786839922588, + "grad_norm": 0.7871110310158105, + "learning_rate": 1.244388859965621e-05, + "loss": 0.3446, + "step": 25393 + }, + { + "epoch": 0.4387959635056677, + "grad_norm": 1.1140272561275089, + "learning_rate": 1.2443345916375667e-05, + "loss": 0.458, + "step": 25394 + }, + { + "epoch": 0.4388132430190766, + "grad_norm": 1.040808998643594, + "learning_rate": 1.2442803225442381e-05, + "loss": 0.5155, + "step": 25395 + }, + { + "epoch": 0.4388305225324855, + "grad_norm": 1.0477947033574544, + "learning_rate": 1.2442260526858049e-05, + "loss": 0.4749, + "step": 25396 + }, + { + "epoch": 0.4388478020458944, + "grad_norm": 0.9503673942543915, + "learning_rate": 1.2441717820624372e-05, + "loss": 0.5554, + "step": 25397 + }, + { + "epoch": 0.4388650815593033, + "grad_norm": 0.8467203418826141, + "learning_rate": 1.244117510674305e-05, + "loss": 0.5065, + "step": 25398 + }, + { + "epoch": 0.43888236107271217, + "grad_norm": 0.9315205295039681, + "learning_rate": 1.2440632385215783e-05, + "loss": 0.4374, + "step": 25399 + }, + { + "epoch": 0.4388996405861211, + "grad_norm": 1.3340980884617506, + "learning_rate": 1.2440089656044268e-05, + "loss": 0.4053, + "step": 25400 + }, + { + "epoch": 0.43891692009953, + "grad_norm": 0.9056882696156674, + "learning_rate": 1.2439546919230205e-05, + "loss": 0.5086, + "step": 25401 + }, + { + "epoch": 0.4389341996129389, + "grad_norm": 1.2495849998450868, + "learning_rate": 1.24390041747753e-05, + "loss": 0.4899, + "step": 25402 + }, + { + "epoch": 0.4389514791263478, + "grad_norm": 1.0245142143707435, + "learning_rate": 1.2438461422681243e-05, + "loss": 0.537, + "step": 25403 + }, + { + "epoch": 0.4389687586397567, + "grad_norm": 1.0710051046997804, + "learning_rate": 1.2437918662949744e-05, + "loss": 0.4293, + "step": 25404 + }, + { + "epoch": 0.4389860381531656, + "grad_norm": 1.3325741060867007, + "learning_rate": 1.2437375895582498e-05, + "loss": 0.4603, + "step": 25405 + }, + { + "epoch": 0.4390033176665745, + "grad_norm": 0.7044257703193852, + "learning_rate": 1.2436833120581203e-05, + "loss": 0.338, + "step": 25406 + }, + { + "epoch": 0.43902059717998343, + "grad_norm": 0.9814549047474191, + "learning_rate": 1.2436290337947563e-05, + "loss": 0.408, + "step": 25407 + }, + { + "epoch": 0.43903787669339234, + "grad_norm": 1.1819937369197635, + "learning_rate": 1.2435747547683276e-05, + "loss": 0.8396, + "step": 25408 + }, + { + "epoch": 0.4390551562068012, + "grad_norm": 1.8627813809820692, + "learning_rate": 1.2435204749790045e-05, + "loss": 0.5593, + "step": 25409 + }, + { + "epoch": 0.4390724357202101, + "grad_norm": 0.8336542516345458, + "learning_rate": 1.2434661944269566e-05, + "loss": 0.4358, + "step": 25410 + }, + { + "epoch": 0.439089715233619, + "grad_norm": 0.8400532939460924, + "learning_rate": 1.2434119131123538e-05, + "loss": 0.559, + "step": 25411 + }, + { + "epoch": 0.4391069947470279, + "grad_norm": 1.1225634324540323, + "learning_rate": 1.2433576310353668e-05, + "loss": 0.4031, + "step": 25412 + }, + { + "epoch": 0.4391242742604368, + "grad_norm": 1.1498616064770126, + "learning_rate": 1.2433033481961652e-05, + "loss": 0.6311, + "step": 25413 + }, + { + "epoch": 0.4391415537738457, + "grad_norm": 1.2015881641889166, + "learning_rate": 1.2432490645949187e-05, + "loss": 0.6202, + "step": 25414 + }, + { + "epoch": 0.43915883328725464, + "grad_norm": 0.8477198436643479, + "learning_rate": 1.2431947802317979e-05, + "loss": 0.2934, + "step": 25415 + }, + { + "epoch": 0.43917611280066354, + "grad_norm": 0.7140791274124315, + "learning_rate": 1.2431404951069723e-05, + "loss": 0.3652, + "step": 25416 + }, + { + "epoch": 0.43919339231407245, + "grad_norm": 1.0719261627799004, + "learning_rate": 1.2430862092206124e-05, + "loss": 0.6289, + "step": 25417 + }, + { + "epoch": 0.43921067182748136, + "grad_norm": 0.7699678182083883, + "learning_rate": 1.2430319225728882e-05, + "loss": 0.3976, + "step": 25418 + }, + { + "epoch": 0.43922795134089027, + "grad_norm": 1.0245716691711328, + "learning_rate": 1.2429776351639692e-05, + "loss": 0.4203, + "step": 25419 + }, + { + "epoch": 0.4392452308542991, + "grad_norm": 0.4531888096722315, + "learning_rate": 1.2429233469940259e-05, + "loss": 0.6922, + "step": 25420 + }, + { + "epoch": 0.439262510367708, + "grad_norm": 0.9271070742699726, + "learning_rate": 1.2428690580632284e-05, + "loss": 0.4485, + "step": 25421 + }, + { + "epoch": 0.43927978988111693, + "grad_norm": 0.3937418327416645, + "learning_rate": 1.2428147683717464e-05, + "loss": 0.4637, + "step": 25422 + }, + { + "epoch": 0.43929706939452584, + "grad_norm": 0.8422669919821908, + "learning_rate": 1.24276047791975e-05, + "loss": 0.3609, + "step": 25423 + }, + { + "epoch": 0.43931434890793475, + "grad_norm": 0.8505035156386982, + "learning_rate": 1.2427061867074092e-05, + "loss": 0.3469, + "step": 25424 + }, + { + "epoch": 0.43933162842134366, + "grad_norm": 1.0109409863028898, + "learning_rate": 1.2426518947348946e-05, + "loss": 0.6083, + "step": 25425 + }, + { + "epoch": 0.43934890793475256, + "grad_norm": 0.9442309023443483, + "learning_rate": 1.2425976020023756e-05, + "loss": 0.2616, + "step": 25426 + }, + { + "epoch": 0.43936618744816147, + "grad_norm": 0.6264015909660307, + "learning_rate": 1.2425433085100224e-05, + "loss": 0.5089, + "step": 25427 + }, + { + "epoch": 0.4393834669615704, + "grad_norm": 1.5760291247218328, + "learning_rate": 1.2424890142580053e-05, + "loss": 0.6162, + "step": 25428 + }, + { + "epoch": 0.4394007464749793, + "grad_norm": 0.8765203365632416, + "learning_rate": 1.2424347192464941e-05, + "loss": 0.4575, + "step": 25429 + }, + { + "epoch": 0.4394180259883882, + "grad_norm": 0.7176211859769043, + "learning_rate": 1.242380423475659e-05, + "loss": 0.3559, + "step": 25430 + }, + { + "epoch": 0.43943530550179705, + "grad_norm": 0.911700917687425, + "learning_rate": 1.2423261269456697e-05, + "loss": 0.4046, + "step": 25431 + }, + { + "epoch": 0.43945258501520595, + "grad_norm": 0.8748989298254346, + "learning_rate": 1.2422718296566968e-05, + "loss": 0.4269, + "step": 25432 + }, + { + "epoch": 0.43946986452861486, + "grad_norm": 0.9114056835887414, + "learning_rate": 1.2422175316089102e-05, + "loss": 0.3436, + "step": 25433 + }, + { + "epoch": 0.43948714404202377, + "grad_norm": 0.7591110108526491, + "learning_rate": 1.2421632328024797e-05, + "loss": 0.3822, + "step": 25434 + }, + { + "epoch": 0.4395044235554327, + "grad_norm": 0.8922717733396085, + "learning_rate": 1.2421089332375758e-05, + "loss": 0.3519, + "step": 25435 + }, + { + "epoch": 0.4395217030688416, + "grad_norm": 0.975526961727515, + "learning_rate": 1.2420546329143681e-05, + "loss": 0.5758, + "step": 25436 + }, + { + "epoch": 0.4395389825822505, + "grad_norm": 0.7848463518701609, + "learning_rate": 1.2420003318330267e-05, + "loss": 0.4241, + "step": 25437 + }, + { + "epoch": 0.4395562620956594, + "grad_norm": 0.5504708423573454, + "learning_rate": 1.2419460299937222e-05, + "loss": 0.4265, + "step": 25438 + }, + { + "epoch": 0.4395735416090683, + "grad_norm": 0.8941637897351082, + "learning_rate": 1.2418917273966246e-05, + "loss": 0.3893, + "step": 25439 + }, + { + "epoch": 0.4395908211224772, + "grad_norm": 1.640266185551329, + "learning_rate": 1.2418374240419032e-05, + "loss": 0.3617, + "step": 25440 + }, + { + "epoch": 0.43960810063588607, + "grad_norm": 1.506338105026616, + "learning_rate": 1.2417831199297288e-05, + "loss": 0.3511, + "step": 25441 + }, + { + "epoch": 0.439625380149295, + "grad_norm": 1.1416034471421577, + "learning_rate": 1.2417288150602713e-05, + "loss": 0.473, + "step": 25442 + }, + { + "epoch": 0.4396426596627039, + "grad_norm": 0.48066394227664727, + "learning_rate": 1.2416745094337006e-05, + "loss": 0.7156, + "step": 25443 + }, + { + "epoch": 0.4396599391761128, + "grad_norm": 0.8235945035283041, + "learning_rate": 1.2416202030501872e-05, + "loss": 0.4349, + "step": 25444 + }, + { + "epoch": 0.4396772186895217, + "grad_norm": 0.9366792975684606, + "learning_rate": 1.241565895909901e-05, + "loss": 0.4451, + "step": 25445 + }, + { + "epoch": 0.4396944982029306, + "grad_norm": 0.9750671865605991, + "learning_rate": 1.2415115880130118e-05, + "loss": 0.531, + "step": 25446 + }, + { + "epoch": 0.4397117777163395, + "grad_norm": 0.8597317488190526, + "learning_rate": 1.2414572793596901e-05, + "loss": 0.3521, + "step": 25447 + }, + { + "epoch": 0.4397290572297484, + "grad_norm": 1.1890329508159019, + "learning_rate": 1.241402969950106e-05, + "loss": 0.4174, + "step": 25448 + }, + { + "epoch": 0.43974633674315733, + "grad_norm": 1.264336052178814, + "learning_rate": 1.2413486597844293e-05, + "loss": 0.6205, + "step": 25449 + }, + { + "epoch": 0.43976361625656624, + "grad_norm": 0.9831649681595473, + "learning_rate": 1.2412943488628303e-05, + "loss": 0.4674, + "step": 25450 + }, + { + "epoch": 0.43978089576997514, + "grad_norm": 0.809043089911853, + "learning_rate": 1.2412400371854789e-05, + "loss": 0.4537, + "step": 25451 + }, + { + "epoch": 0.439798175283384, + "grad_norm": 1.295961260726344, + "learning_rate": 1.2411857247525456e-05, + "loss": 0.4984, + "step": 25452 + }, + { + "epoch": 0.4398154547967929, + "grad_norm": 1.014021650226492, + "learning_rate": 1.2411314115642004e-05, + "loss": 0.3588, + "step": 25453 + }, + { + "epoch": 0.4398327343102018, + "grad_norm": 0.6160480019463576, + "learning_rate": 1.2410770976206128e-05, + "loss": 0.6912, + "step": 25454 + }, + { + "epoch": 0.4398500138236107, + "grad_norm": 0.8804244282899268, + "learning_rate": 1.2410227829219537e-05, + "loss": 0.5685, + "step": 25455 + }, + { + "epoch": 0.4398672933370196, + "grad_norm": 1.0551298358955992, + "learning_rate": 1.240968467468393e-05, + "loss": 0.4607, + "step": 25456 + }, + { + "epoch": 0.43988457285042853, + "grad_norm": 1.0954795897548657, + "learning_rate": 1.2409141512601006e-05, + "loss": 0.3541, + "step": 25457 + }, + { + "epoch": 0.43990185236383744, + "grad_norm": 1.6349803875643956, + "learning_rate": 1.240859834297247e-05, + "loss": 0.4696, + "step": 25458 + }, + { + "epoch": 0.43991913187724635, + "grad_norm": 0.7096014888911645, + "learning_rate": 1.2408055165800019e-05, + "loss": 0.3544, + "step": 25459 + }, + { + "epoch": 0.43993641139065526, + "grad_norm": 0.8657403577619287, + "learning_rate": 1.2407511981085357e-05, + "loss": 0.3187, + "step": 25460 + }, + { + "epoch": 0.43995369090406417, + "grad_norm": 0.629023805432688, + "learning_rate": 1.2406968788830182e-05, + "loss": 0.3888, + "step": 25461 + }, + { + "epoch": 0.439970970417473, + "grad_norm": 0.8543231402243975, + "learning_rate": 1.2406425589036202e-05, + "loss": 0.4509, + "step": 25462 + }, + { + "epoch": 0.4399882499308819, + "grad_norm": 0.6340790884992852, + "learning_rate": 1.240588238170511e-05, + "loss": 0.3011, + "step": 25463 + }, + { + "epoch": 0.44000552944429083, + "grad_norm": 0.5143084991817632, + "learning_rate": 1.2405339166838614e-05, + "loss": 0.2156, + "step": 25464 + }, + { + "epoch": 0.44002280895769974, + "grad_norm": 1.407655765708411, + "learning_rate": 1.2404795944438412e-05, + "loss": 0.4139, + "step": 25465 + }, + { + "epoch": 0.44004008847110865, + "grad_norm": 1.0917011338878644, + "learning_rate": 1.2404252714506206e-05, + "loss": 0.6157, + "step": 25466 + }, + { + "epoch": 0.44005736798451756, + "grad_norm": 0.9348622988534413, + "learning_rate": 1.24037094770437e-05, + "loss": 0.4706, + "step": 25467 + }, + { + "epoch": 0.44007464749792646, + "grad_norm": 1.3859371098953028, + "learning_rate": 1.2403166232052591e-05, + "loss": 0.4217, + "step": 25468 + }, + { + "epoch": 0.44009192701133537, + "grad_norm": 1.1228417028362745, + "learning_rate": 1.2402622979534585e-05, + "loss": 0.5385, + "step": 25469 + }, + { + "epoch": 0.4401092065247443, + "grad_norm": 0.5845108899463142, + "learning_rate": 1.240207971949138e-05, + "loss": 0.3106, + "step": 25470 + }, + { + "epoch": 0.4401264860381532, + "grad_norm": 1.3669270591413185, + "learning_rate": 1.2401536451924676e-05, + "loss": 0.5194, + "step": 25471 + }, + { + "epoch": 0.4401437655515621, + "grad_norm": 1.232189434656998, + "learning_rate": 1.2400993176836181e-05, + "loss": 0.2839, + "step": 25472 + }, + { + "epoch": 0.44016104506497095, + "grad_norm": 0.8383549775294293, + "learning_rate": 1.2400449894227593e-05, + "loss": 0.3631, + "step": 25473 + }, + { + "epoch": 0.44017832457837985, + "grad_norm": 0.7187732838410615, + "learning_rate": 1.239990660410061e-05, + "loss": 0.4266, + "step": 25474 + }, + { + "epoch": 0.44019560409178876, + "grad_norm": 0.8188678026325902, + "learning_rate": 1.239936330645694e-05, + "loss": 0.4942, + "step": 25475 + }, + { + "epoch": 0.44021288360519767, + "grad_norm": 0.9846658322401977, + "learning_rate": 1.2398820001298282e-05, + "loss": 0.5525, + "step": 25476 + }, + { + "epoch": 0.4402301631186066, + "grad_norm": 0.8901889795152058, + "learning_rate": 1.2398276688626336e-05, + "loss": 0.4754, + "step": 25477 + }, + { + "epoch": 0.4402474426320155, + "grad_norm": 0.8124163309419244, + "learning_rate": 1.2397733368442807e-05, + "loss": 0.486, + "step": 25478 + }, + { + "epoch": 0.4402647221454244, + "grad_norm": 1.3038300871228297, + "learning_rate": 1.2397190040749391e-05, + "loss": 0.501, + "step": 25479 + }, + { + "epoch": 0.4402820016588333, + "grad_norm": 0.7575237531098568, + "learning_rate": 1.2396646705547796e-05, + "loss": 0.5481, + "step": 25480 + }, + { + "epoch": 0.4402992811722422, + "grad_norm": 0.6265457229437585, + "learning_rate": 1.2396103362839724e-05, + "loss": 0.303, + "step": 25481 + }, + { + "epoch": 0.4403165606856511, + "grad_norm": 1.300072763107298, + "learning_rate": 1.2395560012626872e-05, + "loss": 0.5927, + "step": 25482 + }, + { + "epoch": 0.44033384019905997, + "grad_norm": 1.150060377691458, + "learning_rate": 1.2395016654910943e-05, + "loss": 0.5493, + "step": 25483 + }, + { + "epoch": 0.4403511197124689, + "grad_norm": 1.1261425530039466, + "learning_rate": 1.2394473289693641e-05, + "loss": 0.535, + "step": 25484 + }, + { + "epoch": 0.4403683992258778, + "grad_norm": 1.0637899893851048, + "learning_rate": 1.2393929916976665e-05, + "loss": 0.4628, + "step": 25485 + }, + { + "epoch": 0.4403856787392867, + "grad_norm": 0.7294814518408681, + "learning_rate": 1.239338653676172e-05, + "loss": 0.2787, + "step": 25486 + }, + { + "epoch": 0.4404029582526956, + "grad_norm": 0.48715017098440716, + "learning_rate": 1.2392843149050506e-05, + "loss": 0.9047, + "step": 25487 + }, + { + "epoch": 0.4404202377661045, + "grad_norm": 1.01340321162352, + "learning_rate": 1.2392299753844728e-05, + "loss": 0.6543, + "step": 25488 + }, + { + "epoch": 0.4404375172795134, + "grad_norm": 1.4890281422635374, + "learning_rate": 1.239175635114608e-05, + "loss": 0.4055, + "step": 25489 + }, + { + "epoch": 0.4404547967929223, + "grad_norm": 1.3670762804681873, + "learning_rate": 1.2391212940956274e-05, + "loss": 0.4968, + "step": 25490 + }, + { + "epoch": 0.44047207630633123, + "grad_norm": 0.47065580196784823, + "learning_rate": 1.2390669523277005e-05, + "loss": 0.6246, + "step": 25491 + }, + { + "epoch": 0.44048935581974014, + "grad_norm": 1.198770213147945, + "learning_rate": 1.2390126098109981e-05, + "loss": 0.2807, + "step": 25492 + }, + { + "epoch": 0.44050663533314904, + "grad_norm": 0.8432091558605458, + "learning_rate": 1.2389582665456896e-05, + "loss": 0.3043, + "step": 25493 + }, + { + "epoch": 0.4405239148465579, + "grad_norm": 0.6342761807181586, + "learning_rate": 1.2389039225319457e-05, + "loss": 0.2931, + "step": 25494 + }, + { + "epoch": 0.4405411943599668, + "grad_norm": 0.8055080069284586, + "learning_rate": 1.2388495777699368e-05, + "loss": 0.3999, + "step": 25495 + }, + { + "epoch": 0.4405584738733757, + "grad_norm": 1.190590301652895, + "learning_rate": 1.2387952322598327e-05, + "loss": 0.4238, + "step": 25496 + }, + { + "epoch": 0.4405757533867846, + "grad_norm": 1.5304477810076422, + "learning_rate": 1.238740886001804e-05, + "loss": 0.7366, + "step": 25497 + }, + { + "epoch": 0.4405930329001935, + "grad_norm": 1.3417645356616121, + "learning_rate": 1.2386865389960204e-05, + "loss": 0.4192, + "step": 25498 + }, + { + "epoch": 0.44061031241360243, + "grad_norm": 0.8913103414920635, + "learning_rate": 1.2386321912426524e-05, + "loss": 0.5638, + "step": 25499 + }, + { + "epoch": 0.44062759192701134, + "grad_norm": 1.9612357243882363, + "learning_rate": 1.2385778427418704e-05, + "loss": 0.7557, + "step": 25500 + }, + { + "epoch": 0.44064487144042025, + "grad_norm": 1.1308041038016985, + "learning_rate": 1.2385234934938446e-05, + "loss": 0.451, + "step": 25501 + }, + { + "epoch": 0.44066215095382916, + "grad_norm": 0.8982027271428132, + "learning_rate": 1.238469143498745e-05, + "loss": 0.4313, + "step": 25502 + }, + { + "epoch": 0.44067943046723806, + "grad_norm": 1.0729816714757416, + "learning_rate": 1.2384147927567416e-05, + "loss": 0.4442, + "step": 25503 + }, + { + "epoch": 0.440696709980647, + "grad_norm": 0.8802126854103277, + "learning_rate": 1.2383604412680053e-05, + "loss": 0.5245, + "step": 25504 + }, + { + "epoch": 0.4407139894940558, + "grad_norm": 1.4146474625624499, + "learning_rate": 1.2383060890327058e-05, + "loss": 0.5219, + "step": 25505 + }, + { + "epoch": 0.44073126900746473, + "grad_norm": 0.7602145383320847, + "learning_rate": 1.2382517360510135e-05, + "loss": 0.56, + "step": 25506 + }, + { + "epoch": 0.44074854852087364, + "grad_norm": 0.8714225958121901, + "learning_rate": 1.238197382323099e-05, + "loss": 0.5809, + "step": 25507 + }, + { + "epoch": 0.44076582803428255, + "grad_norm": 1.245404184324311, + "learning_rate": 1.238143027849132e-05, + "loss": 0.5189, + "step": 25508 + }, + { + "epoch": 0.44078310754769146, + "grad_norm": 0.49953542240094095, + "learning_rate": 1.2380886726292826e-05, + "loss": 0.6456, + "step": 25509 + }, + { + "epoch": 0.44080038706110036, + "grad_norm": 0.703850922164583, + "learning_rate": 1.2380343166637219e-05, + "loss": 0.4394, + "step": 25510 + }, + { + "epoch": 0.44081766657450927, + "grad_norm": 1.0695513977034745, + "learning_rate": 1.2379799599526195e-05, + "loss": 0.3694, + "step": 25511 + }, + { + "epoch": 0.4408349460879182, + "grad_norm": 1.2441703601731855, + "learning_rate": 1.2379256024961456e-05, + "loss": 0.5772, + "step": 25512 + }, + { + "epoch": 0.4408522256013271, + "grad_norm": 0.8408446098469793, + "learning_rate": 1.237871244294471e-05, + "loss": 0.5406, + "step": 25513 + }, + { + "epoch": 0.440869505114736, + "grad_norm": 0.4615198218356161, + "learning_rate": 1.237816885347765e-05, + "loss": 0.8096, + "step": 25514 + }, + { + "epoch": 0.44088678462814485, + "grad_norm": 0.7840334663297792, + "learning_rate": 1.2377625256561987e-05, + "loss": 0.2941, + "step": 25515 + }, + { + "epoch": 0.44090406414155375, + "grad_norm": 1.307579427193087, + "learning_rate": 1.2377081652199423e-05, + "loss": 0.4163, + "step": 25516 + }, + { + "epoch": 0.44092134365496266, + "grad_norm": 0.9273236910708266, + "learning_rate": 1.2376538040391657e-05, + "loss": 0.4719, + "step": 25517 + }, + { + "epoch": 0.44093862316837157, + "grad_norm": 1.2364874645773505, + "learning_rate": 1.2375994421140392e-05, + "loss": 0.2828, + "step": 25518 + }, + { + "epoch": 0.4409559026817805, + "grad_norm": 1.2853162369187652, + "learning_rate": 1.2375450794447335e-05, + "loss": 0.4499, + "step": 25519 + }, + { + "epoch": 0.4409731821951894, + "grad_norm": 0.4787391176533978, + "learning_rate": 1.2374907160314185e-05, + "loss": 0.7244, + "step": 25520 + }, + { + "epoch": 0.4409904617085983, + "grad_norm": 0.7828490244840451, + "learning_rate": 1.2374363518742642e-05, + "loss": 0.3308, + "step": 25521 + }, + { + "epoch": 0.4410077412220072, + "grad_norm": 1.017686428922237, + "learning_rate": 1.2373819869734417e-05, + "loss": 0.4863, + "step": 25522 + }, + { + "epoch": 0.4410250207354161, + "grad_norm": 0.9521697441902109, + "learning_rate": 1.2373276213291204e-05, + "loss": 0.4143, + "step": 25523 + }, + { + "epoch": 0.441042300248825, + "grad_norm": 0.5380487719389369, + "learning_rate": 1.237273254941471e-05, + "loss": 0.6203, + "step": 25524 + }, + { + "epoch": 0.4410595797622339, + "grad_norm": 1.3291225983707502, + "learning_rate": 1.2372188878106638e-05, + "loss": 0.4372, + "step": 25525 + }, + { + "epoch": 0.4410768592756428, + "grad_norm": 0.5991729177023956, + "learning_rate": 1.2371645199368688e-05, + "loss": 0.2469, + "step": 25526 + }, + { + "epoch": 0.4410941387890517, + "grad_norm": 0.8613354664094319, + "learning_rate": 1.2371101513202569e-05, + "loss": 0.2777, + "step": 25527 + }, + { + "epoch": 0.4411114183024606, + "grad_norm": 1.0753111823690402, + "learning_rate": 1.2370557819609977e-05, + "loss": 0.4338, + "step": 25528 + }, + { + "epoch": 0.4411286978158695, + "grad_norm": 1.0257744690110921, + "learning_rate": 1.2370014118592617e-05, + "loss": 0.6426, + "step": 25529 + }, + { + "epoch": 0.4411459773292784, + "grad_norm": 0.9099258729527098, + "learning_rate": 1.2369470410152195e-05, + "loss": 0.4937, + "step": 25530 + }, + { + "epoch": 0.4411632568426873, + "grad_norm": 1.1140884930123236, + "learning_rate": 1.236892669429041e-05, + "loss": 0.327, + "step": 25531 + }, + { + "epoch": 0.4411805363560962, + "grad_norm": 1.0902718353303698, + "learning_rate": 1.236838297100897e-05, + "loss": 0.4724, + "step": 25532 + }, + { + "epoch": 0.4411978158695051, + "grad_norm": 1.3601233461489086, + "learning_rate": 1.2367839240309572e-05, + "loss": 0.3606, + "step": 25533 + }, + { + "epoch": 0.44121509538291404, + "grad_norm": 0.9202606285125179, + "learning_rate": 1.2367295502193919e-05, + "loss": 0.341, + "step": 25534 + }, + { + "epoch": 0.44123237489632294, + "grad_norm": 1.5006417052587633, + "learning_rate": 1.236675175666372e-05, + "loss": 0.4993, + "step": 25535 + }, + { + "epoch": 0.4412496544097318, + "grad_norm": 0.9035525654457618, + "learning_rate": 1.2366208003720674e-05, + "loss": 0.5326, + "step": 25536 + }, + { + "epoch": 0.4412669339231407, + "grad_norm": 0.8724483202538621, + "learning_rate": 1.2365664243366487e-05, + "loss": 0.6204, + "step": 25537 + }, + { + "epoch": 0.4412842134365496, + "grad_norm": 1.1940062185685802, + "learning_rate": 1.2365120475602855e-05, + "loss": 0.5268, + "step": 25538 + }, + { + "epoch": 0.4413014929499585, + "grad_norm": 0.9164510340141369, + "learning_rate": 1.2364576700431488e-05, + "loss": 0.5093, + "step": 25539 + }, + { + "epoch": 0.4413187724633674, + "grad_norm": 1.033782265979796, + "learning_rate": 1.2364032917854087e-05, + "loss": 0.6099, + "step": 25540 + }, + { + "epoch": 0.44133605197677633, + "grad_norm": 0.6489372779146988, + "learning_rate": 1.2363489127872355e-05, + "loss": 0.2683, + "step": 25541 + }, + { + "epoch": 0.44135333149018524, + "grad_norm": 0.9152574605185549, + "learning_rate": 1.2362945330487998e-05, + "loss": 0.3926, + "step": 25542 + }, + { + "epoch": 0.44137061100359415, + "grad_norm": 1.0309658789522091, + "learning_rate": 1.2362401525702715e-05, + "loss": 0.3842, + "step": 25543 + }, + { + "epoch": 0.44138789051700306, + "grad_norm": 0.8842710793232489, + "learning_rate": 1.2361857713518208e-05, + "loss": 0.4431, + "step": 25544 + }, + { + "epoch": 0.44140517003041196, + "grad_norm": 1.8886850685372474, + "learning_rate": 1.2361313893936186e-05, + "loss": 0.4987, + "step": 25545 + }, + { + "epoch": 0.44142244954382087, + "grad_norm": 1.309682890727207, + "learning_rate": 1.2360770066958349e-05, + "loss": 0.376, + "step": 25546 + }, + { + "epoch": 0.4414397290572297, + "grad_norm": 0.706368569993565, + "learning_rate": 1.2360226232586401e-05, + "loss": 0.3562, + "step": 25547 + }, + { + "epoch": 0.44145700857063863, + "grad_norm": 1.179337138740266, + "learning_rate": 1.2359682390822046e-05, + "loss": 0.4544, + "step": 25548 + }, + { + "epoch": 0.44147428808404754, + "grad_norm": 0.6009522017518415, + "learning_rate": 1.2359138541666984e-05, + "loss": 0.3008, + "step": 25549 + }, + { + "epoch": 0.44149156759745645, + "grad_norm": 1.017304006988248, + "learning_rate": 1.2358594685122922e-05, + "loss": 0.4285, + "step": 25550 + }, + { + "epoch": 0.44150884711086535, + "grad_norm": 0.8470410196252312, + "learning_rate": 1.2358050821191562e-05, + "loss": 0.6497, + "step": 25551 + }, + { + "epoch": 0.44152612662427426, + "grad_norm": 1.1019419273181723, + "learning_rate": 1.2357506949874608e-05, + "loss": 0.3514, + "step": 25552 + }, + { + "epoch": 0.44154340613768317, + "grad_norm": 0.9221065069860854, + "learning_rate": 1.2356963071173764e-05, + "loss": 0.4818, + "step": 25553 + }, + { + "epoch": 0.4415606856510921, + "grad_norm": 1.1072119087071473, + "learning_rate": 1.235641918509073e-05, + "loss": 0.526, + "step": 25554 + }, + { + "epoch": 0.441577965164501, + "grad_norm": 1.3971309835367862, + "learning_rate": 1.2355875291627214e-05, + "loss": 0.2806, + "step": 25555 + }, + { + "epoch": 0.4415952446779099, + "grad_norm": 0.567328956448304, + "learning_rate": 1.2355331390784916e-05, + "loss": 0.2746, + "step": 25556 + }, + { + "epoch": 0.44161252419131874, + "grad_norm": 0.46891212328104975, + "learning_rate": 1.2354787482565544e-05, + "loss": 0.579, + "step": 25557 + }, + { + "epoch": 0.44162980370472765, + "grad_norm": 0.7351981749728443, + "learning_rate": 1.2354243566970796e-05, + "loss": 0.5581, + "step": 25558 + }, + { + "epoch": 0.44164708321813656, + "grad_norm": 1.281882379040283, + "learning_rate": 1.235369964400238e-05, + "loss": 0.2494, + "step": 25559 + }, + { + "epoch": 0.44166436273154547, + "grad_norm": 1.0042159254045693, + "learning_rate": 1.2353155713661998e-05, + "loss": 0.5607, + "step": 25560 + }, + { + "epoch": 0.4416816422449544, + "grad_norm": 0.9341254062333821, + "learning_rate": 1.235261177595135e-05, + "loss": 0.4155, + "step": 25561 + }, + { + "epoch": 0.4416989217583633, + "grad_norm": 1.755173050769272, + "learning_rate": 1.2352067830872148e-05, + "loss": 0.5675, + "step": 25562 + }, + { + "epoch": 0.4417162012717722, + "grad_norm": 1.3091199427497973, + "learning_rate": 1.2351523878426085e-05, + "loss": 0.4516, + "step": 25563 + }, + { + "epoch": 0.4417334807851811, + "grad_norm": 0.8922288597491806, + "learning_rate": 1.2350979918614877e-05, + "loss": 0.4697, + "step": 25564 + }, + { + "epoch": 0.44175076029859, + "grad_norm": 1.052040490175565, + "learning_rate": 1.2350435951440219e-05, + "loss": 0.4396, + "step": 25565 + }, + { + "epoch": 0.4417680398119989, + "grad_norm": 0.7500730086512111, + "learning_rate": 1.2349891976903812e-05, + "loss": 0.3805, + "step": 25566 + }, + { + "epoch": 0.4417853193254078, + "grad_norm": 0.7569964244707663, + "learning_rate": 1.234934799500737e-05, + "loss": 0.494, + "step": 25567 + }, + { + "epoch": 0.4418025988388167, + "grad_norm": 0.7834574890349529, + "learning_rate": 1.234880400575259e-05, + "loss": 0.4891, + "step": 25568 + }, + { + "epoch": 0.4418198783522256, + "grad_norm": 0.9974763332747936, + "learning_rate": 1.2348260009141177e-05, + "loss": 0.4913, + "step": 25569 + }, + { + "epoch": 0.4418371578656345, + "grad_norm": 0.8534887412127764, + "learning_rate": 1.2347716005174837e-05, + "loss": 0.3483, + "step": 25570 + }, + { + "epoch": 0.4418544373790434, + "grad_norm": 0.8738028044852809, + "learning_rate": 1.2347171993855272e-05, + "loss": 0.4881, + "step": 25571 + }, + { + "epoch": 0.4418717168924523, + "grad_norm": 0.9716449853153747, + "learning_rate": 1.2346627975184184e-05, + "loss": 0.3885, + "step": 25572 + }, + { + "epoch": 0.4418889964058612, + "grad_norm": 0.9740551523829164, + "learning_rate": 1.234608394916328e-05, + "loss": 0.4129, + "step": 25573 + }, + { + "epoch": 0.4419062759192701, + "grad_norm": 1.165988784929895, + "learning_rate": 1.2345539915794263e-05, + "loss": 0.3948, + "step": 25574 + }, + { + "epoch": 0.441923555432679, + "grad_norm": 0.9645288885557277, + "learning_rate": 1.2344995875078834e-05, + "loss": 0.4016, + "step": 25575 + }, + { + "epoch": 0.44194083494608793, + "grad_norm": 1.3177090684262684, + "learning_rate": 1.2344451827018702e-05, + "loss": 0.3443, + "step": 25576 + }, + { + "epoch": 0.44195811445949684, + "grad_norm": 0.636829368511649, + "learning_rate": 1.234390777161557e-05, + "loss": 0.4455, + "step": 25577 + }, + { + "epoch": 0.44197539397290575, + "grad_norm": 0.9549641616427729, + "learning_rate": 1.2343363708871136e-05, + "loss": 0.4784, + "step": 25578 + }, + { + "epoch": 0.4419926734863146, + "grad_norm": 0.9008552299605443, + "learning_rate": 1.2342819638787114e-05, + "loss": 0.4282, + "step": 25579 + }, + { + "epoch": 0.4420099529997235, + "grad_norm": 1.1024506186455092, + "learning_rate": 1.2342275561365203e-05, + "loss": 0.3916, + "step": 25580 + }, + { + "epoch": 0.4420272325131324, + "grad_norm": 1.036384434761044, + "learning_rate": 1.2341731476607102e-05, + "loss": 0.5259, + "step": 25581 + }, + { + "epoch": 0.4420445120265413, + "grad_norm": 0.9544816149947825, + "learning_rate": 1.2341187384514524e-05, + "loss": 0.4072, + "step": 25582 + }, + { + "epoch": 0.44206179153995023, + "grad_norm": 1.3584302079065615, + "learning_rate": 1.2340643285089167e-05, + "loss": 0.3, + "step": 25583 + }, + { + "epoch": 0.44207907105335914, + "grad_norm": 1.1250050871709407, + "learning_rate": 1.2340099178332739e-05, + "loss": 0.2984, + "step": 25584 + }, + { + "epoch": 0.44209635056676805, + "grad_norm": 1.1547999758081156, + "learning_rate": 1.2339555064246942e-05, + "loss": 0.4862, + "step": 25585 + }, + { + "epoch": 0.44211363008017696, + "grad_norm": 1.6385165238283703, + "learning_rate": 1.233901094283348e-05, + "loss": 0.3056, + "step": 25586 + }, + { + "epoch": 0.44213090959358586, + "grad_norm": 1.1540651444563028, + "learning_rate": 1.233846681409406e-05, + "loss": 0.4585, + "step": 25587 + }, + { + "epoch": 0.44214818910699477, + "grad_norm": 0.4731644996784931, + "learning_rate": 1.2337922678030383e-05, + "loss": 0.7131, + "step": 25588 + }, + { + "epoch": 0.4421654686204036, + "grad_norm": 1.0370085045319413, + "learning_rate": 1.2337378534644153e-05, + "loss": 0.4376, + "step": 25589 + }, + { + "epoch": 0.44218274813381253, + "grad_norm": 1.1514284397540868, + "learning_rate": 1.2336834383937077e-05, + "loss": 0.3946, + "step": 25590 + }, + { + "epoch": 0.44220002764722144, + "grad_norm": 1.3215230698426013, + "learning_rate": 1.2336290225910858e-05, + "loss": 0.5622, + "step": 25591 + }, + { + "epoch": 0.44221730716063035, + "grad_norm": 0.7844945797795414, + "learning_rate": 1.23357460605672e-05, + "loss": 0.4705, + "step": 25592 + }, + { + "epoch": 0.44223458667403925, + "grad_norm": 0.7588773209926497, + "learning_rate": 1.233520188790781e-05, + "loss": 0.3491, + "step": 25593 + }, + { + "epoch": 0.44225186618744816, + "grad_norm": 1.2393461153009662, + "learning_rate": 1.233465770793439e-05, + "loss": 0.4128, + "step": 25594 + }, + { + "epoch": 0.44226914570085707, + "grad_norm": 0.9936712955634445, + "learning_rate": 1.2334113520648644e-05, + "loss": 0.5586, + "step": 25595 + }, + { + "epoch": 0.442286425214266, + "grad_norm": 0.85268149107444, + "learning_rate": 1.2333569326052275e-05, + "loss": 0.4938, + "step": 25596 + }, + { + "epoch": 0.4423037047276749, + "grad_norm": 0.6997069718403944, + "learning_rate": 1.2333025124146992e-05, + "loss": 0.5683, + "step": 25597 + }, + { + "epoch": 0.4423209842410838, + "grad_norm": 0.7981286371504505, + "learning_rate": 1.2332480914934494e-05, + "loss": 0.4162, + "step": 25598 + }, + { + "epoch": 0.4423382637544927, + "grad_norm": 1.2264928015513643, + "learning_rate": 1.2331936698416492e-05, + "loss": 0.443, + "step": 25599 + }, + { + "epoch": 0.44235554326790155, + "grad_norm": 1.0313365229626643, + "learning_rate": 1.2331392474594686e-05, + "loss": 0.4278, + "step": 25600 + }, + { + "epoch": 0.44237282278131046, + "grad_norm": 0.8262626153057012, + "learning_rate": 1.233084824347078e-05, + "loss": 0.3809, + "step": 25601 + }, + { + "epoch": 0.44239010229471937, + "grad_norm": 0.5463722247380463, + "learning_rate": 1.233030400504648e-05, + "loss": 0.7685, + "step": 25602 + }, + { + "epoch": 0.4424073818081283, + "grad_norm": 0.7022222793011822, + "learning_rate": 1.2329759759323495e-05, + "loss": 0.4517, + "step": 25603 + }, + { + "epoch": 0.4424246613215372, + "grad_norm": 1.0102331284904653, + "learning_rate": 1.2329215506303522e-05, + "loss": 0.398, + "step": 25604 + }, + { + "epoch": 0.4424419408349461, + "grad_norm": 1.026139584247791, + "learning_rate": 1.2328671245988271e-05, + "loss": 0.5043, + "step": 25605 + }, + { + "epoch": 0.442459220348355, + "grad_norm": 0.8274113898487394, + "learning_rate": 1.232812697837944e-05, + "loss": 0.5896, + "step": 25606 + }, + { + "epoch": 0.4424764998617639, + "grad_norm": 1.1176748062002875, + "learning_rate": 1.2327582703478741e-05, + "loss": 0.2658, + "step": 25607 + }, + { + "epoch": 0.4424937793751728, + "grad_norm": 0.8049329218267818, + "learning_rate": 1.2327038421287879e-05, + "loss": 0.5448, + "step": 25608 + }, + { + "epoch": 0.4425110588885817, + "grad_norm": 0.8672692378825297, + "learning_rate": 1.232649413180855e-05, + "loss": 0.6302, + "step": 25609 + }, + { + "epoch": 0.4425283384019906, + "grad_norm": 0.8992202478742805, + "learning_rate": 1.232594983504247e-05, + "loss": 0.5199, + "step": 25610 + }, + { + "epoch": 0.4425456179153995, + "grad_norm": 1.0506805813058357, + "learning_rate": 1.2325405530991337e-05, + "loss": 0.4144, + "step": 25611 + }, + { + "epoch": 0.4425628974288084, + "grad_norm": 1.3361763121373784, + "learning_rate": 1.2324861219656854e-05, + "loss": 0.3704, + "step": 25612 + }, + { + "epoch": 0.4425801769422173, + "grad_norm": 1.2625803698752396, + "learning_rate": 1.2324316901040734e-05, + "loss": 0.599, + "step": 25613 + }, + { + "epoch": 0.4425974564556262, + "grad_norm": 0.6913117933728727, + "learning_rate": 1.2323772575144677e-05, + "loss": 0.5638, + "step": 25614 + }, + { + "epoch": 0.4426147359690351, + "grad_norm": 1.087385034598096, + "learning_rate": 1.2323228241970382e-05, + "loss": 0.5684, + "step": 25615 + }, + { + "epoch": 0.442632015482444, + "grad_norm": 1.3615157338005455, + "learning_rate": 1.2322683901519563e-05, + "loss": 0.64, + "step": 25616 + }, + { + "epoch": 0.4426492949958529, + "grad_norm": 0.7924321413977669, + "learning_rate": 1.2322139553793922e-05, + "loss": 0.4029, + "step": 25617 + }, + { + "epoch": 0.44266657450926183, + "grad_norm": 1.2203352840314272, + "learning_rate": 1.2321595198795162e-05, + "loss": 0.371, + "step": 25618 + }, + { + "epoch": 0.44268385402267074, + "grad_norm": 0.9380876979657753, + "learning_rate": 1.232105083652499e-05, + "loss": 0.4541, + "step": 25619 + }, + { + "epoch": 0.44270113353607965, + "grad_norm": 0.89463577809411, + "learning_rate": 1.232050646698511e-05, + "loss": 0.4568, + "step": 25620 + }, + { + "epoch": 0.4427184130494885, + "grad_norm": 0.8804554789140469, + "learning_rate": 1.2319962090177227e-05, + "loss": 0.5765, + "step": 25621 + }, + { + "epoch": 0.4427356925628974, + "grad_norm": 0.7349639769643771, + "learning_rate": 1.2319417706103047e-05, + "loss": 0.2769, + "step": 25622 + }, + { + "epoch": 0.4427529720763063, + "grad_norm": 0.6908458708999703, + "learning_rate": 1.2318873314764275e-05, + "loss": 0.4203, + "step": 25623 + }, + { + "epoch": 0.4427702515897152, + "grad_norm": 1.1139000812370012, + "learning_rate": 1.2318328916162616e-05, + "loss": 0.4185, + "step": 25624 + }, + { + "epoch": 0.44278753110312413, + "grad_norm": 0.7066382943358342, + "learning_rate": 1.2317784510299775e-05, + "loss": 0.4146, + "step": 25625 + }, + { + "epoch": 0.44280481061653304, + "grad_norm": 0.868920495578571, + "learning_rate": 1.2317240097177455e-05, + "loss": 0.4773, + "step": 25626 + }, + { + "epoch": 0.44282209012994195, + "grad_norm": 1.1162172376426975, + "learning_rate": 1.2316695676797363e-05, + "loss": 0.4676, + "step": 25627 + }, + { + "epoch": 0.44283936964335086, + "grad_norm": 1.031316380147425, + "learning_rate": 1.2316151249161203e-05, + "loss": 0.3167, + "step": 25628 + }, + { + "epoch": 0.44285664915675976, + "grad_norm": 0.8126453023349591, + "learning_rate": 1.2315606814270683e-05, + "loss": 0.3712, + "step": 25629 + }, + { + "epoch": 0.44287392867016867, + "grad_norm": 0.8002985958746663, + "learning_rate": 1.2315062372127506e-05, + "loss": 0.3914, + "step": 25630 + }, + { + "epoch": 0.4428912081835775, + "grad_norm": 0.8692228347952837, + "learning_rate": 1.231451792273338e-05, + "loss": 0.3992, + "step": 25631 + }, + { + "epoch": 0.44290848769698643, + "grad_norm": 1.246958609723823, + "learning_rate": 1.2313973466090003e-05, + "loss": 0.4533, + "step": 25632 + }, + { + "epoch": 0.44292576721039534, + "grad_norm": 0.9402527990080595, + "learning_rate": 1.2313429002199088e-05, + "loss": 0.5527, + "step": 25633 + }, + { + "epoch": 0.44294304672380425, + "grad_norm": 0.8473924581597374, + "learning_rate": 1.2312884531062341e-05, + "loss": 0.449, + "step": 25634 + }, + { + "epoch": 0.44296032623721315, + "grad_norm": 1.0163695248125142, + "learning_rate": 1.231234005268146e-05, + "loss": 0.3782, + "step": 25635 + }, + { + "epoch": 0.44297760575062206, + "grad_norm": 1.583658224157362, + "learning_rate": 1.2311795567058156e-05, + "loss": 0.5563, + "step": 25636 + }, + { + "epoch": 0.44299488526403097, + "grad_norm": 0.8729208380152151, + "learning_rate": 1.2311251074194131e-05, + "loss": 0.5125, + "step": 25637 + }, + { + "epoch": 0.4430121647774399, + "grad_norm": 0.7782965412237319, + "learning_rate": 1.2310706574091092e-05, + "loss": 0.3488, + "step": 25638 + }, + { + "epoch": 0.4430294442908488, + "grad_norm": 0.9803092382021265, + "learning_rate": 1.2310162066750744e-05, + "loss": 0.3517, + "step": 25639 + }, + { + "epoch": 0.4430467238042577, + "grad_norm": 0.9105596412651319, + "learning_rate": 1.2309617552174795e-05, + "loss": 0.3691, + "step": 25640 + }, + { + "epoch": 0.4430640033176666, + "grad_norm": 0.7680376705192284, + "learning_rate": 1.2309073030364944e-05, + "loss": 0.5108, + "step": 25641 + }, + { + "epoch": 0.44308128283107545, + "grad_norm": 1.2318301091694974, + "learning_rate": 1.2308528501322905e-05, + "loss": 0.5384, + "step": 25642 + }, + { + "epoch": 0.44309856234448436, + "grad_norm": 1.6735528716075976, + "learning_rate": 1.2307983965050378e-05, + "loss": 0.5483, + "step": 25643 + }, + { + "epoch": 0.44311584185789327, + "grad_norm": 0.8964258710083237, + "learning_rate": 1.230743942154907e-05, + "loss": 0.5465, + "step": 25644 + }, + { + "epoch": 0.4431331213713022, + "grad_norm": 0.6850942176530543, + "learning_rate": 1.230689487082069e-05, + "loss": 0.3261, + "step": 25645 + }, + { + "epoch": 0.4431504008847111, + "grad_norm": 1.0175390789895253, + "learning_rate": 1.2306350312866933e-05, + "loss": 0.4921, + "step": 25646 + }, + { + "epoch": 0.44316768039812, + "grad_norm": 0.8453682139834321, + "learning_rate": 1.2305805747689517e-05, + "loss": 0.4732, + "step": 25647 + }, + { + "epoch": 0.4431849599115289, + "grad_norm": 0.7066498334272217, + "learning_rate": 1.230526117529014e-05, + "loss": 0.4422, + "step": 25648 + }, + { + "epoch": 0.4432022394249378, + "grad_norm": 0.9176807197978051, + "learning_rate": 1.2304716595670507e-05, + "loss": 0.5468, + "step": 25649 + }, + { + "epoch": 0.4432195189383467, + "grad_norm": 0.8148524631924771, + "learning_rate": 1.230417200883233e-05, + "loss": 0.4071, + "step": 25650 + }, + { + "epoch": 0.4432367984517556, + "grad_norm": 0.8615247700793287, + "learning_rate": 1.230362741477731e-05, + "loss": 0.6017, + "step": 25651 + }, + { + "epoch": 0.44325407796516453, + "grad_norm": 1.009602340804851, + "learning_rate": 1.2303082813507154e-05, + "loss": 0.4664, + "step": 25652 + }, + { + "epoch": 0.4432713574785734, + "grad_norm": 1.1271318349695054, + "learning_rate": 1.230253820502357e-05, + "loss": 0.3754, + "step": 25653 + }, + { + "epoch": 0.4432886369919823, + "grad_norm": 1.7473230129782285, + "learning_rate": 1.2301993589328258e-05, + "loss": 0.4889, + "step": 25654 + }, + { + "epoch": 0.4433059165053912, + "grad_norm": 0.8098444405297546, + "learning_rate": 1.2301448966422929e-05, + "loss": 0.6424, + "step": 25655 + }, + { + "epoch": 0.4433231960188001, + "grad_norm": 1.0611106789824898, + "learning_rate": 1.2300904336309287e-05, + "loss": 0.4393, + "step": 25656 + }, + { + "epoch": 0.443340475532209, + "grad_norm": 0.5064842486706865, + "learning_rate": 1.2300359698989036e-05, + "loss": 0.8039, + "step": 25657 + }, + { + "epoch": 0.4433577550456179, + "grad_norm": 0.7135473428480589, + "learning_rate": 1.2299815054463885e-05, + "loss": 0.5836, + "step": 25658 + }, + { + "epoch": 0.4433750345590268, + "grad_norm": 0.7180453612211846, + "learning_rate": 1.2299270402735537e-05, + "loss": 0.2797, + "step": 25659 + }, + { + "epoch": 0.44339231407243573, + "grad_norm": 0.8968234934658357, + "learning_rate": 1.2298725743805702e-05, + "loss": 0.5036, + "step": 25660 + }, + { + "epoch": 0.44340959358584464, + "grad_norm": 0.826265679469438, + "learning_rate": 1.2298181077676079e-05, + "loss": 0.3913, + "step": 25661 + }, + { + "epoch": 0.44342687309925355, + "grad_norm": 0.9402522862614131, + "learning_rate": 1.2297636404348383e-05, + "loss": 0.4238, + "step": 25662 + }, + { + "epoch": 0.4434441526126624, + "grad_norm": 0.8735186500035105, + "learning_rate": 1.2297091723824312e-05, + "loss": 0.4554, + "step": 25663 + }, + { + "epoch": 0.4434614321260713, + "grad_norm": 0.8062439901625422, + "learning_rate": 1.2296547036105576e-05, + "loss": 0.1822, + "step": 25664 + }, + { + "epoch": 0.4434787116394802, + "grad_norm": 0.8798725577546123, + "learning_rate": 1.2296002341193883e-05, + "loss": 0.5261, + "step": 25665 + }, + { + "epoch": 0.4434959911528891, + "grad_norm": 1.2228531893943515, + "learning_rate": 1.2295457639090931e-05, + "loss": 0.4135, + "step": 25666 + }, + { + "epoch": 0.44351327066629803, + "grad_norm": 1.1278632908061832, + "learning_rate": 1.2294912929798435e-05, + "loss": 0.4111, + "step": 25667 + }, + { + "epoch": 0.44353055017970694, + "grad_norm": 0.9703491213598149, + "learning_rate": 1.2294368213318098e-05, + "loss": 0.4054, + "step": 25668 + }, + { + "epoch": 0.44354782969311585, + "grad_norm": 0.9656062378767661, + "learning_rate": 1.2293823489651619e-05, + "loss": 0.368, + "step": 25669 + }, + { + "epoch": 0.44356510920652475, + "grad_norm": 0.7907698700555964, + "learning_rate": 1.2293278758800718e-05, + "loss": 0.2853, + "step": 25670 + }, + { + "epoch": 0.44358238871993366, + "grad_norm": 1.0900177085488896, + "learning_rate": 1.229273402076709e-05, + "loss": 0.5538, + "step": 25671 + }, + { + "epoch": 0.44359966823334257, + "grad_norm": 1.125394691817443, + "learning_rate": 1.2292189275552444e-05, + "loss": 0.3647, + "step": 25672 + }, + { + "epoch": 0.4436169477467515, + "grad_norm": 1.0176740338521189, + "learning_rate": 1.229164452315849e-05, + "loss": 0.4253, + "step": 25673 + }, + { + "epoch": 0.44363422726016033, + "grad_norm": 1.2449812114671555, + "learning_rate": 1.229109976358693e-05, + "loss": 0.4299, + "step": 25674 + }, + { + "epoch": 0.44365150677356924, + "grad_norm": 1.255132734233982, + "learning_rate": 1.229055499683947e-05, + "loss": 0.3744, + "step": 25675 + }, + { + "epoch": 0.44366878628697815, + "grad_norm": 0.7219011670423923, + "learning_rate": 1.2290010222917821e-05, + "loss": 0.4419, + "step": 25676 + }, + { + "epoch": 0.44368606580038705, + "grad_norm": 0.93803581072723, + "learning_rate": 1.2289465441823682e-05, + "loss": 0.5121, + "step": 25677 + }, + { + "epoch": 0.44370334531379596, + "grad_norm": 1.3150310170277475, + "learning_rate": 1.2288920653558765e-05, + "loss": 0.5716, + "step": 25678 + }, + { + "epoch": 0.44372062482720487, + "grad_norm": 0.9459596921387425, + "learning_rate": 1.2288375858124775e-05, + "loss": 0.4317, + "step": 25679 + }, + { + "epoch": 0.4437379043406138, + "grad_norm": 0.8078614733536307, + "learning_rate": 1.2287831055523419e-05, + "loss": 0.3148, + "step": 25680 + }, + { + "epoch": 0.4437551838540227, + "grad_norm": 0.8526134081143679, + "learning_rate": 1.2287286245756398e-05, + "loss": 0.5238, + "step": 25681 + }, + { + "epoch": 0.4437724633674316, + "grad_norm": 1.294695478693007, + "learning_rate": 1.2286741428825426e-05, + "loss": 0.4152, + "step": 25682 + }, + { + "epoch": 0.4437897428808405, + "grad_norm": 1.826066645612444, + "learning_rate": 1.2286196604732205e-05, + "loss": 0.3485, + "step": 25683 + }, + { + "epoch": 0.44380702239424935, + "grad_norm": 1.4972737569417334, + "learning_rate": 1.2285651773478442e-05, + "loss": 0.4893, + "step": 25684 + }, + { + "epoch": 0.44382430190765826, + "grad_norm": 0.7996808921931436, + "learning_rate": 1.2285106935065846e-05, + "loss": 0.4093, + "step": 25685 + }, + { + "epoch": 0.44384158142106717, + "grad_norm": 1.0224446978103778, + "learning_rate": 1.228456208949612e-05, + "loss": 0.4049, + "step": 25686 + }, + { + "epoch": 0.4438588609344761, + "grad_norm": 0.8607789827848752, + "learning_rate": 1.2284017236770974e-05, + "loss": 0.5304, + "step": 25687 + }, + { + "epoch": 0.443876140447885, + "grad_norm": 1.1763331580795433, + "learning_rate": 1.228347237689211e-05, + "loss": 0.5597, + "step": 25688 + }, + { + "epoch": 0.4438934199612939, + "grad_norm": 0.7553168847843649, + "learning_rate": 1.2282927509861236e-05, + "loss": 0.3655, + "step": 25689 + }, + { + "epoch": 0.4439106994747028, + "grad_norm": 0.8432254236789078, + "learning_rate": 1.2282382635680062e-05, + "loss": 0.4731, + "step": 25690 + }, + { + "epoch": 0.4439279789881117, + "grad_norm": 0.7532671720244702, + "learning_rate": 1.2281837754350292e-05, + "loss": 0.7273, + "step": 25691 + }, + { + "epoch": 0.4439452585015206, + "grad_norm": 0.9738866077717058, + "learning_rate": 1.2281292865873628e-05, + "loss": 0.3966, + "step": 25692 + }, + { + "epoch": 0.4439625380149295, + "grad_norm": 1.1094768592134219, + "learning_rate": 1.2280747970251787e-05, + "loss": 0.4221, + "step": 25693 + }, + { + "epoch": 0.4439798175283384, + "grad_norm": 0.8453751364206152, + "learning_rate": 1.2280203067486469e-05, + "loss": 0.3284, + "step": 25694 + }, + { + "epoch": 0.4439970970417473, + "grad_norm": 1.1349854411987443, + "learning_rate": 1.2279658157579379e-05, + "loss": 0.5342, + "step": 25695 + }, + { + "epoch": 0.4440143765551562, + "grad_norm": 0.6878714716149046, + "learning_rate": 1.2279113240532232e-05, + "loss": 0.4147, + "step": 25696 + }, + { + "epoch": 0.4440316560685651, + "grad_norm": 0.838615911728291, + "learning_rate": 1.2278568316346726e-05, + "loss": 0.5189, + "step": 25697 + }, + { + "epoch": 0.444048935581974, + "grad_norm": 0.8638656723433145, + "learning_rate": 1.2278023385024569e-05, + "loss": 0.255, + "step": 25698 + }, + { + "epoch": 0.4440662150953829, + "grad_norm": 1.2599412536843728, + "learning_rate": 1.2277478446567473e-05, + "loss": 0.5823, + "step": 25699 + }, + { + "epoch": 0.4440834946087918, + "grad_norm": 0.9416893556845876, + "learning_rate": 1.227693350097714e-05, + "loss": 0.4788, + "step": 25700 + }, + { + "epoch": 0.4441007741222007, + "grad_norm": 0.8438272861619325, + "learning_rate": 1.2276388548255276e-05, + "loss": 0.5173, + "step": 25701 + }, + { + "epoch": 0.44411805363560963, + "grad_norm": 0.7429252623576508, + "learning_rate": 1.2275843588403591e-05, + "loss": 0.4222, + "step": 25702 + }, + { + "epoch": 0.44413533314901854, + "grad_norm": 1.1157267905499304, + "learning_rate": 1.2275298621423793e-05, + "loss": 0.4251, + "step": 25703 + }, + { + "epoch": 0.44415261266242745, + "grad_norm": 0.9110184996582671, + "learning_rate": 1.2274753647317584e-05, + "loss": 0.4003, + "step": 25704 + }, + { + "epoch": 0.44416989217583636, + "grad_norm": 1.3211057008200788, + "learning_rate": 1.2274208666086676e-05, + "loss": 0.5677, + "step": 25705 + }, + { + "epoch": 0.4441871716892452, + "grad_norm": 1.0453755214731684, + "learning_rate": 1.2273663677732772e-05, + "loss": 0.4835, + "step": 25706 + }, + { + "epoch": 0.4442044512026541, + "grad_norm": 0.83600012284576, + "learning_rate": 1.2273118682257584e-05, + "loss": 0.3568, + "step": 25707 + }, + { + "epoch": 0.444221730716063, + "grad_norm": 1.1027145818408908, + "learning_rate": 1.2272573679662811e-05, + "loss": 0.4149, + "step": 25708 + }, + { + "epoch": 0.44423901022947193, + "grad_norm": 0.8558573572102999, + "learning_rate": 1.2272028669950166e-05, + "loss": 0.235, + "step": 25709 + }, + { + "epoch": 0.44425628974288084, + "grad_norm": 0.5318367341057109, + "learning_rate": 1.2271483653121357e-05, + "loss": 0.3044, + "step": 25710 + }, + { + "epoch": 0.44427356925628975, + "grad_norm": 0.7101849587214972, + "learning_rate": 1.2270938629178086e-05, + "loss": 0.4798, + "step": 25711 + }, + { + "epoch": 0.44429084876969865, + "grad_norm": 1.0614317966097866, + "learning_rate": 1.2270393598122061e-05, + "loss": 0.4698, + "step": 25712 + }, + { + "epoch": 0.44430812828310756, + "grad_norm": 1.5245418947705023, + "learning_rate": 1.2269848559954993e-05, + "loss": 0.4551, + "step": 25713 + }, + { + "epoch": 0.44432540779651647, + "grad_norm": 0.8686366450673504, + "learning_rate": 1.2269303514678587e-05, + "loss": 0.4664, + "step": 25714 + }, + { + "epoch": 0.4443426873099254, + "grad_norm": 0.8837414656805107, + "learning_rate": 1.226875846229455e-05, + "loss": 0.5465, + "step": 25715 + }, + { + "epoch": 0.44435996682333423, + "grad_norm": 0.7764415073298031, + "learning_rate": 1.2268213402804588e-05, + "loss": 0.3573, + "step": 25716 + }, + { + "epoch": 0.44437724633674314, + "grad_norm": 0.7724279643406503, + "learning_rate": 1.2267668336210411e-05, + "loss": 0.4282, + "step": 25717 + }, + { + "epoch": 0.44439452585015204, + "grad_norm": 0.7649588112925212, + "learning_rate": 1.2267123262513724e-05, + "loss": 0.515, + "step": 25718 + }, + { + "epoch": 0.44441180536356095, + "grad_norm": 1.448651409205663, + "learning_rate": 1.2266578181716235e-05, + "loss": 0.4743, + "step": 25719 + }, + { + "epoch": 0.44442908487696986, + "grad_norm": 0.6263781265401178, + "learning_rate": 1.2266033093819649e-05, + "loss": 0.8672, + "step": 25720 + }, + { + "epoch": 0.44444636439037877, + "grad_norm": 0.864239229933871, + "learning_rate": 1.2265487998825675e-05, + "loss": 0.464, + "step": 25721 + }, + { + "epoch": 0.4444636439037877, + "grad_norm": 0.6832532107323122, + "learning_rate": 1.2264942896736022e-05, + "loss": 0.5822, + "step": 25722 + }, + { + "epoch": 0.4444809234171966, + "grad_norm": 0.7109301522694226, + "learning_rate": 1.2264397787552395e-05, + "loss": 0.3072, + "step": 25723 + }, + { + "epoch": 0.4444982029306055, + "grad_norm": 1.0418540706724233, + "learning_rate": 1.2263852671276502e-05, + "loss": 0.3496, + "step": 25724 + }, + { + "epoch": 0.4445154824440144, + "grad_norm": 0.6537806237804162, + "learning_rate": 1.2263307547910053e-05, + "loss": 0.6816, + "step": 25725 + }, + { + "epoch": 0.4445327619574233, + "grad_norm": 1.1398573028941519, + "learning_rate": 1.226276241745475e-05, + "loss": 0.3585, + "step": 25726 + }, + { + "epoch": 0.44455004147083216, + "grad_norm": 1.2095843789003629, + "learning_rate": 1.2262217279912303e-05, + "loss": 0.5389, + "step": 25727 + }, + { + "epoch": 0.44456732098424107, + "grad_norm": 1.1811254958088853, + "learning_rate": 1.2261672135284421e-05, + "loss": 0.4623, + "step": 25728 + }, + { + "epoch": 0.44458460049765, + "grad_norm": 1.012837139994978, + "learning_rate": 1.2261126983572808e-05, + "loss": 0.6161, + "step": 25729 + }, + { + "epoch": 0.4446018800110589, + "grad_norm": 0.7953459000842338, + "learning_rate": 1.2260581824779175e-05, + "loss": 0.2953, + "step": 25730 + }, + { + "epoch": 0.4446191595244678, + "grad_norm": 0.7942360044879052, + "learning_rate": 1.2260036658905228e-05, + "loss": 0.3983, + "step": 25731 + }, + { + "epoch": 0.4446364390378767, + "grad_norm": 1.158861687797297, + "learning_rate": 1.2259491485952672e-05, + "loss": 0.3394, + "step": 25732 + }, + { + "epoch": 0.4446537185512856, + "grad_norm": 0.7800296940429143, + "learning_rate": 1.2258946305923221e-05, + "loss": 0.4119, + "step": 25733 + }, + { + "epoch": 0.4446709980646945, + "grad_norm": 0.8871034969246577, + "learning_rate": 1.2258401118818575e-05, + "loss": 0.4857, + "step": 25734 + }, + { + "epoch": 0.4446882775781034, + "grad_norm": 1.627570988097223, + "learning_rate": 1.2257855924640446e-05, + "loss": 0.4425, + "step": 25735 + }, + { + "epoch": 0.4447055570915123, + "grad_norm": 1.1458545799577593, + "learning_rate": 1.2257310723390541e-05, + "loss": 0.5829, + "step": 25736 + }, + { + "epoch": 0.4447228366049212, + "grad_norm": 0.8862548605022107, + "learning_rate": 1.225676551507057e-05, + "loss": 0.5031, + "step": 25737 + }, + { + "epoch": 0.4447401161183301, + "grad_norm": 1.3351840332542846, + "learning_rate": 1.2256220299682233e-05, + "loss": 0.5745, + "step": 25738 + }, + { + "epoch": 0.444757395631739, + "grad_norm": 1.0596266779125536, + "learning_rate": 1.2255675077227247e-05, + "loss": 0.5176, + "step": 25739 + }, + { + "epoch": 0.4447746751451479, + "grad_norm": 1.307715473444485, + "learning_rate": 1.2255129847707312e-05, + "loss": 0.3323, + "step": 25740 + }, + { + "epoch": 0.4447919546585568, + "grad_norm": 0.9353078494587751, + "learning_rate": 1.2254584611124138e-05, + "loss": 0.4244, + "step": 25741 + }, + { + "epoch": 0.4448092341719657, + "grad_norm": 1.4709234090026089, + "learning_rate": 1.2254039367479439e-05, + "loss": 0.3971, + "step": 25742 + }, + { + "epoch": 0.4448265136853746, + "grad_norm": 1.1308700468538406, + "learning_rate": 1.2253494116774914e-05, + "loss": 0.5113, + "step": 25743 + }, + { + "epoch": 0.44484379319878353, + "grad_norm": 0.8769285706122107, + "learning_rate": 1.225294885901227e-05, + "loss": 0.5516, + "step": 25744 + }, + { + "epoch": 0.44486107271219244, + "grad_norm": 0.917612050904255, + "learning_rate": 1.2252403594193226e-05, + "loss": 0.4089, + "step": 25745 + }, + { + "epoch": 0.44487835222560135, + "grad_norm": 1.53499872257868, + "learning_rate": 1.225185832231948e-05, + "loss": 0.54, + "step": 25746 + }, + { + "epoch": 0.44489563173901026, + "grad_norm": 1.163466642168431, + "learning_rate": 1.225131304339274e-05, + "loss": 0.5507, + "step": 25747 + }, + { + "epoch": 0.4449129112524191, + "grad_norm": 0.8278666081578991, + "learning_rate": 1.2250767757414721e-05, + "loss": 0.4308, + "step": 25748 + }, + { + "epoch": 0.444930190765828, + "grad_norm": 0.7173985785712008, + "learning_rate": 1.2250222464387123e-05, + "loss": 0.3659, + "step": 25749 + }, + { + "epoch": 0.4449474702792369, + "grad_norm": 0.8933582006148235, + "learning_rate": 1.224967716431166e-05, + "loss": 0.5887, + "step": 25750 + }, + { + "epoch": 0.44496474979264583, + "grad_norm": 1.4673654987854619, + "learning_rate": 1.2249131857190035e-05, + "loss": 0.3085, + "step": 25751 + }, + { + "epoch": 0.44498202930605474, + "grad_norm": 0.9959032219806144, + "learning_rate": 1.2248586543023957e-05, + "loss": 0.4201, + "step": 25752 + }, + { + "epoch": 0.44499930881946365, + "grad_norm": 0.8335530436660141, + "learning_rate": 1.224804122181514e-05, + "loss": 0.2552, + "step": 25753 + }, + { + "epoch": 0.44501658833287255, + "grad_norm": 1.0643300412453243, + "learning_rate": 1.2247495893565282e-05, + "loss": 0.4634, + "step": 25754 + }, + { + "epoch": 0.44503386784628146, + "grad_norm": 0.7779540991845965, + "learning_rate": 1.2246950558276098e-05, + "loss": 0.3148, + "step": 25755 + }, + { + "epoch": 0.44505114735969037, + "grad_norm": 0.8835838755583504, + "learning_rate": 1.2246405215949294e-05, + "loss": 0.4467, + "step": 25756 + }, + { + "epoch": 0.4450684268730993, + "grad_norm": 1.0109152426595462, + "learning_rate": 1.2245859866586578e-05, + "loss": 0.4369, + "step": 25757 + }, + { + "epoch": 0.44508570638650813, + "grad_norm": 1.0144186611748405, + "learning_rate": 1.2245314510189658e-05, + "loss": 0.5278, + "step": 25758 + }, + { + "epoch": 0.44510298589991704, + "grad_norm": 0.9728048818451792, + "learning_rate": 1.2244769146760244e-05, + "loss": 0.4826, + "step": 25759 + }, + { + "epoch": 0.44512026541332594, + "grad_norm": 1.5967161442271556, + "learning_rate": 1.2244223776300043e-05, + "loss": 0.4608, + "step": 25760 + }, + { + "epoch": 0.44513754492673485, + "grad_norm": 1.5485057250852134, + "learning_rate": 1.224367839881076e-05, + "loss": 0.3128, + "step": 25761 + }, + { + "epoch": 0.44515482444014376, + "grad_norm": 1.2898058788822606, + "learning_rate": 1.2243133014294106e-05, + "loss": 0.4753, + "step": 25762 + }, + { + "epoch": 0.44517210395355267, + "grad_norm": 1.503957246293414, + "learning_rate": 1.2242587622751789e-05, + "loss": 0.4086, + "step": 25763 + }, + { + "epoch": 0.4451893834669616, + "grad_norm": 0.7321477775519319, + "learning_rate": 1.2242042224185517e-05, + "loss": 0.3935, + "step": 25764 + }, + { + "epoch": 0.4452066629803705, + "grad_norm": 0.9331557769232609, + "learning_rate": 1.2241496818596998e-05, + "loss": 0.4963, + "step": 25765 + }, + { + "epoch": 0.4452239424937794, + "grad_norm": 1.1735436854549555, + "learning_rate": 1.2240951405987943e-05, + "loss": 0.4295, + "step": 25766 + }, + { + "epoch": 0.4452412220071883, + "grad_norm": 0.8705033407399108, + "learning_rate": 1.2240405986360056e-05, + "loss": 0.5567, + "step": 25767 + }, + { + "epoch": 0.4452585015205972, + "grad_norm": 1.1053230929056679, + "learning_rate": 1.2239860559715048e-05, + "loss": 0.3507, + "step": 25768 + }, + { + "epoch": 0.44527578103400606, + "grad_norm": 0.8263874673553687, + "learning_rate": 1.2239315126054624e-05, + "loss": 0.3967, + "step": 25769 + }, + { + "epoch": 0.44529306054741497, + "grad_norm": 1.0196222694738095, + "learning_rate": 1.2238769685380498e-05, + "loss": 0.4098, + "step": 25770 + }, + { + "epoch": 0.4453103400608239, + "grad_norm": 1.2584494773724033, + "learning_rate": 1.2238224237694375e-05, + "loss": 0.4926, + "step": 25771 + }, + { + "epoch": 0.4453276195742328, + "grad_norm": 1.141176941397822, + "learning_rate": 1.223767878299796e-05, + "loss": 0.5653, + "step": 25772 + }, + { + "epoch": 0.4453448990876417, + "grad_norm": 0.5569559647329534, + "learning_rate": 1.223713332129297e-05, + "loss": 0.7719, + "step": 25773 + }, + { + "epoch": 0.4453621786010506, + "grad_norm": 1.0523425235387203, + "learning_rate": 1.2236587852581108e-05, + "loss": 0.352, + "step": 25774 + }, + { + "epoch": 0.4453794581144595, + "grad_norm": 1.339842351408147, + "learning_rate": 1.2236042376864078e-05, + "loss": 0.4234, + "step": 25775 + }, + { + "epoch": 0.4453967376278684, + "grad_norm": 0.7142584789083992, + "learning_rate": 1.2235496894143597e-05, + "loss": 0.2847, + "step": 25776 + }, + { + "epoch": 0.4454140171412773, + "grad_norm": 0.7600290706888382, + "learning_rate": 1.223495140442137e-05, + "loss": 0.298, + "step": 25777 + }, + { + "epoch": 0.4454312966546862, + "grad_norm": 0.8028813023703919, + "learning_rate": 1.2234405907699105e-05, + "loss": 0.3733, + "step": 25778 + }, + { + "epoch": 0.44544857616809513, + "grad_norm": 1.3807050519598183, + "learning_rate": 1.223386040397851e-05, + "loss": 0.5357, + "step": 25779 + }, + { + "epoch": 0.445465855681504, + "grad_norm": 0.716434080163494, + "learning_rate": 1.2233314893261297e-05, + "loss": 0.4745, + "step": 25780 + }, + { + "epoch": 0.4454831351949129, + "grad_norm": 0.7218927706768785, + "learning_rate": 1.2232769375549167e-05, + "loss": 0.3901, + "step": 25781 + }, + { + "epoch": 0.4455004147083218, + "grad_norm": 1.4139779712882983, + "learning_rate": 1.2232223850843839e-05, + "loss": 0.4338, + "step": 25782 + }, + { + "epoch": 0.4455176942217307, + "grad_norm": 0.7388063460158508, + "learning_rate": 1.2231678319147014e-05, + "loss": 0.4853, + "step": 25783 + }, + { + "epoch": 0.4455349737351396, + "grad_norm": 1.2247571482418171, + "learning_rate": 1.22311327804604e-05, + "loss": 0.3736, + "step": 25784 + }, + { + "epoch": 0.4455522532485485, + "grad_norm": 1.0648479460593856, + "learning_rate": 1.223058723478571e-05, + "loss": 0.618, + "step": 25785 + }, + { + "epoch": 0.44556953276195743, + "grad_norm": 0.9487888394429229, + "learning_rate": 1.2230041682124655e-05, + "loss": 0.5998, + "step": 25786 + }, + { + "epoch": 0.44558681227536634, + "grad_norm": 0.8769725246878148, + "learning_rate": 1.2229496122478936e-05, + "loss": 0.5231, + "step": 25787 + }, + { + "epoch": 0.44560409178877525, + "grad_norm": 1.2806373493052712, + "learning_rate": 1.2228950555850268e-05, + "loss": 0.6057, + "step": 25788 + }, + { + "epoch": 0.44562137130218415, + "grad_norm": 1.0977673554908862, + "learning_rate": 1.2228404982240357e-05, + "loss": 0.5343, + "step": 25789 + }, + { + "epoch": 0.445638650815593, + "grad_norm": 0.995513555817117, + "learning_rate": 1.2227859401650912e-05, + "loss": 0.6027, + "step": 25790 + }, + { + "epoch": 0.4456559303290019, + "grad_norm": 1.2355580521288283, + "learning_rate": 1.2227313814083643e-05, + "loss": 0.4331, + "step": 25791 + }, + { + "epoch": 0.4456732098424108, + "grad_norm": 0.4881213707100573, + "learning_rate": 1.2226768219540254e-05, + "loss": 0.6847, + "step": 25792 + }, + { + "epoch": 0.44569048935581973, + "grad_norm": 0.8264995667282189, + "learning_rate": 1.2226222618022464e-05, + "loss": 0.4495, + "step": 25793 + }, + { + "epoch": 0.44570776886922864, + "grad_norm": 0.8283614442413282, + "learning_rate": 1.2225677009531971e-05, + "loss": 0.3238, + "step": 25794 + }, + { + "epoch": 0.44572504838263755, + "grad_norm": 0.46895371626479443, + "learning_rate": 1.2225131394070489e-05, + "loss": 0.6085, + "step": 25795 + }, + { + "epoch": 0.44574232789604645, + "grad_norm": 0.8964599561777099, + "learning_rate": 1.2224585771639728e-05, + "loss": 0.3691, + "step": 25796 + }, + { + "epoch": 0.44575960740945536, + "grad_norm": 1.4654215299607882, + "learning_rate": 1.2224040142241394e-05, + "loss": 0.4851, + "step": 25797 + }, + { + "epoch": 0.44577688692286427, + "grad_norm": 0.8334191869588886, + "learning_rate": 1.2223494505877196e-05, + "loss": 0.6437, + "step": 25798 + }, + { + "epoch": 0.4457941664362732, + "grad_norm": 0.9657810434243164, + "learning_rate": 1.2222948862548847e-05, + "loss": 0.442, + "step": 25799 + }, + { + "epoch": 0.4458114459496821, + "grad_norm": 1.4627914941970817, + "learning_rate": 1.2222403212258054e-05, + "loss": 0.5178, + "step": 25800 + }, + { + "epoch": 0.44582872546309094, + "grad_norm": 0.8310800174166433, + "learning_rate": 1.222185755500652e-05, + "loss": 0.4322, + "step": 25801 + }, + { + "epoch": 0.44584600497649984, + "grad_norm": 0.8022299342462605, + "learning_rate": 1.2221311890795964e-05, + "loss": 0.2808, + "step": 25802 + }, + { + "epoch": 0.44586328448990875, + "grad_norm": 0.507485861036069, + "learning_rate": 1.2220766219628088e-05, + "loss": 0.6303, + "step": 25803 + }, + { + "epoch": 0.44588056400331766, + "grad_norm": 0.8890093994911702, + "learning_rate": 1.2220220541504604e-05, + "loss": 0.4478, + "step": 25804 + }, + { + "epoch": 0.44589784351672657, + "grad_norm": 0.8229986392480705, + "learning_rate": 1.2219674856427222e-05, + "loss": 0.5091, + "step": 25805 + }, + { + "epoch": 0.4459151230301355, + "grad_norm": 0.4539089538800127, + "learning_rate": 1.2219129164397647e-05, + "loss": 0.6049, + "step": 25806 + }, + { + "epoch": 0.4459324025435444, + "grad_norm": 1.1303433066164195, + "learning_rate": 1.2218583465417591e-05, + "loss": 0.4565, + "step": 25807 + }, + { + "epoch": 0.4459496820569533, + "grad_norm": 1.7993437499500669, + "learning_rate": 1.2218037759488765e-05, + "loss": 0.4142, + "step": 25808 + }, + { + "epoch": 0.4459669615703622, + "grad_norm": 0.8439601320671491, + "learning_rate": 1.2217492046612876e-05, + "loss": 0.8783, + "step": 25809 + }, + { + "epoch": 0.4459842410837711, + "grad_norm": 0.9837010779163717, + "learning_rate": 1.2216946326791634e-05, + "loss": 0.2588, + "step": 25810 + }, + { + "epoch": 0.44600152059717996, + "grad_norm": 1.1042531124457622, + "learning_rate": 1.2216400600026748e-05, + "loss": 0.4671, + "step": 25811 + }, + { + "epoch": 0.44601880011058886, + "grad_norm": 1.4415579158247926, + "learning_rate": 1.2215854866319923e-05, + "loss": 0.6171, + "step": 25812 + }, + { + "epoch": 0.44603607962399777, + "grad_norm": 0.9276603002237639, + "learning_rate": 1.2215309125672873e-05, + "loss": 0.5055, + "step": 25813 + }, + { + "epoch": 0.4460533591374067, + "grad_norm": 0.8605033350299729, + "learning_rate": 1.2214763378087309e-05, + "loss": 0.4885, + "step": 25814 + }, + { + "epoch": 0.4460706386508156, + "grad_norm": 0.7288776077260539, + "learning_rate": 1.2214217623564936e-05, + "loss": 0.2741, + "step": 25815 + }, + { + "epoch": 0.4460879181642245, + "grad_norm": 1.12274922877133, + "learning_rate": 1.2213671862107466e-05, + "loss": 0.4709, + "step": 25816 + }, + { + "epoch": 0.4461051976776334, + "grad_norm": 1.217888110908953, + "learning_rate": 1.2213126093716607e-05, + "loss": 0.7187, + "step": 25817 + }, + { + "epoch": 0.4461224771910423, + "grad_norm": 0.6467438555410773, + "learning_rate": 1.221258031839407e-05, + "loss": 0.6389, + "step": 25818 + }, + { + "epoch": 0.4461397567044512, + "grad_norm": 0.7252561631717788, + "learning_rate": 1.2212034536141561e-05, + "loss": 0.5168, + "step": 25819 + }, + { + "epoch": 0.4461570362178601, + "grad_norm": 0.7332952415812773, + "learning_rate": 1.2211488746960796e-05, + "loss": 0.2359, + "step": 25820 + }, + { + "epoch": 0.44617431573126903, + "grad_norm": 0.7279672124817173, + "learning_rate": 1.2210942950853478e-05, + "loss": 0.4123, + "step": 25821 + }, + { + "epoch": 0.4461915952446779, + "grad_norm": 1.0754919124203968, + "learning_rate": 1.2210397147821318e-05, + "loss": 0.4212, + "step": 25822 + }, + { + "epoch": 0.4462088747580868, + "grad_norm": 0.8627869734373104, + "learning_rate": 1.2209851337866026e-05, + "loss": 0.2992, + "step": 25823 + }, + { + "epoch": 0.4462261542714957, + "grad_norm": 0.610992942274328, + "learning_rate": 1.2209305520989309e-05, + "loss": 0.3702, + "step": 25824 + }, + { + "epoch": 0.4462434337849046, + "grad_norm": 0.8176427505050682, + "learning_rate": 1.2208759697192885e-05, + "loss": 0.3543, + "step": 25825 + }, + { + "epoch": 0.4462607132983135, + "grad_norm": 0.8303942006463969, + "learning_rate": 1.2208213866478454e-05, + "loss": 0.3836, + "step": 25826 + }, + { + "epoch": 0.4462779928117224, + "grad_norm": 1.0048665203259388, + "learning_rate": 1.2207668028847727e-05, + "loss": 0.7151, + "step": 25827 + }, + { + "epoch": 0.44629527232513133, + "grad_norm": 0.4193652684156675, + "learning_rate": 1.2207122184302422e-05, + "loss": 0.82, + "step": 25828 + }, + { + "epoch": 0.44631255183854024, + "grad_norm": 0.9237370540048344, + "learning_rate": 1.220657633284424e-05, + "loss": 0.5703, + "step": 25829 + }, + { + "epoch": 0.44632983135194915, + "grad_norm": 0.7900142795951947, + "learning_rate": 1.2206030474474892e-05, + "loss": 0.3602, + "step": 25830 + }, + { + "epoch": 0.44634711086535805, + "grad_norm": 1.294615981470495, + "learning_rate": 1.2205484609196092e-05, + "loss": 0.3145, + "step": 25831 + }, + { + "epoch": 0.4463643903787669, + "grad_norm": 1.0945807546850688, + "learning_rate": 1.2204938737009542e-05, + "loss": 0.6754, + "step": 25832 + }, + { + "epoch": 0.4463816698921758, + "grad_norm": 0.8734065453003735, + "learning_rate": 1.220439285791696e-05, + "loss": 0.4636, + "step": 25833 + }, + { + "epoch": 0.4463989494055847, + "grad_norm": 0.9170110339071851, + "learning_rate": 1.220384697192005e-05, + "loss": 0.417, + "step": 25834 + }, + { + "epoch": 0.44641622891899363, + "grad_norm": 0.5374725022693148, + "learning_rate": 1.2203301079020524e-05, + "loss": 0.7078, + "step": 25835 + }, + { + "epoch": 0.44643350843240254, + "grad_norm": 0.9596732510265705, + "learning_rate": 1.2202755179220094e-05, + "loss": 0.4347, + "step": 25836 + }, + { + "epoch": 0.44645078794581144, + "grad_norm": 0.8714190677130464, + "learning_rate": 1.2202209272520465e-05, + "loss": 0.2794, + "step": 25837 + }, + { + "epoch": 0.44646806745922035, + "grad_norm": 0.8966501483387983, + "learning_rate": 1.2201663358923348e-05, + "loss": 0.6785, + "step": 25838 + }, + { + "epoch": 0.44648534697262926, + "grad_norm": 0.8343008381907716, + "learning_rate": 1.2201117438430456e-05, + "loss": 0.4078, + "step": 25839 + }, + { + "epoch": 0.44650262648603817, + "grad_norm": 0.8960750295161457, + "learning_rate": 1.22005715110435e-05, + "loss": 0.4647, + "step": 25840 + }, + { + "epoch": 0.4465199059994471, + "grad_norm": 1.0323318120976779, + "learning_rate": 1.220002557676418e-05, + "loss": 0.4939, + "step": 25841 + }, + { + "epoch": 0.446537185512856, + "grad_norm": 0.623102896821036, + "learning_rate": 1.2199479635594218e-05, + "loss": 0.5625, + "step": 25842 + }, + { + "epoch": 0.44655446502626484, + "grad_norm": 1.0382952705070667, + "learning_rate": 1.2198933687535316e-05, + "loss": 0.4657, + "step": 25843 + }, + { + "epoch": 0.44657174453967374, + "grad_norm": 0.8098272749494101, + "learning_rate": 1.2198387732589187e-05, + "loss": 0.5333, + "step": 25844 + }, + { + "epoch": 0.44658902405308265, + "grad_norm": 1.5114536478912817, + "learning_rate": 1.2197841770757541e-05, + "loss": 0.2666, + "step": 25845 + }, + { + "epoch": 0.44660630356649156, + "grad_norm": 1.144284147675233, + "learning_rate": 1.2197295802042088e-05, + "loss": 0.4867, + "step": 25846 + }, + { + "epoch": 0.44662358307990047, + "grad_norm": 0.8721399804642666, + "learning_rate": 1.2196749826444536e-05, + "loss": 0.3469, + "step": 25847 + }, + { + "epoch": 0.4466408625933094, + "grad_norm": 1.3893182117976837, + "learning_rate": 1.2196203843966598e-05, + "loss": 0.555, + "step": 25848 + }, + { + "epoch": 0.4466581421067183, + "grad_norm": 1.0210670746756214, + "learning_rate": 1.2195657854609984e-05, + "loss": 0.3395, + "step": 25849 + }, + { + "epoch": 0.4466754216201272, + "grad_norm": 0.515904896078278, + "learning_rate": 1.2195111858376399e-05, + "loss": 0.4422, + "step": 25850 + }, + { + "epoch": 0.4466927011335361, + "grad_norm": 0.7857678305615634, + "learning_rate": 1.2194565855267562e-05, + "loss": 0.5825, + "step": 25851 + }, + { + "epoch": 0.446709980646945, + "grad_norm": 0.5237921519101024, + "learning_rate": 1.2194019845285172e-05, + "loss": 0.7778, + "step": 25852 + }, + { + "epoch": 0.4467272601603539, + "grad_norm": 1.0206828824886238, + "learning_rate": 1.219347382843095e-05, + "loss": 0.5283, + "step": 25853 + }, + { + "epoch": 0.44674453967376276, + "grad_norm": 0.9053451663365698, + "learning_rate": 1.21929278047066e-05, + "loss": 0.513, + "step": 25854 + }, + { + "epoch": 0.44676181918717167, + "grad_norm": 1.0581236132152436, + "learning_rate": 1.219238177411383e-05, + "loss": 0.5609, + "step": 25855 + }, + { + "epoch": 0.4467790987005806, + "grad_norm": 0.9620609647834298, + "learning_rate": 1.2191835736654357e-05, + "loss": 0.277, + "step": 25856 + }, + { + "epoch": 0.4467963782139895, + "grad_norm": 0.7756601629482603, + "learning_rate": 1.2191289692329889e-05, + "loss": 0.2931, + "step": 25857 + }, + { + "epoch": 0.4468136577273984, + "grad_norm": 0.8954998784641394, + "learning_rate": 1.219074364114213e-05, + "loss": 0.3492, + "step": 25858 + }, + { + "epoch": 0.4468309372408073, + "grad_norm": 0.7467667403385421, + "learning_rate": 1.21901975830928e-05, + "loss": 0.4874, + "step": 25859 + }, + { + "epoch": 0.4468482167542162, + "grad_norm": 1.179984937994718, + "learning_rate": 1.2189651518183605e-05, + "loss": 0.73, + "step": 25860 + }, + { + "epoch": 0.4468654962676251, + "grad_norm": 0.6945655578923117, + "learning_rate": 1.2189105446416254e-05, + "loss": 0.5384, + "step": 25861 + }, + { + "epoch": 0.446882775781034, + "grad_norm": 0.9267824800458984, + "learning_rate": 1.2188559367792458e-05, + "loss": 0.4471, + "step": 25862 + }, + { + "epoch": 0.44690005529444293, + "grad_norm": 0.8729024156271514, + "learning_rate": 1.2188013282313928e-05, + "loss": 0.4183, + "step": 25863 + }, + { + "epoch": 0.4469173348078518, + "grad_norm": 0.9380055345693455, + "learning_rate": 1.2187467189982371e-05, + "loss": 0.4021, + "step": 25864 + }, + { + "epoch": 0.4469346143212607, + "grad_norm": 0.8936809976500281, + "learning_rate": 1.2186921090799504e-05, + "loss": 0.3427, + "step": 25865 + }, + { + "epoch": 0.4469518938346696, + "grad_norm": 1.1828586193464854, + "learning_rate": 1.2186374984767035e-05, + "loss": 0.5306, + "step": 25866 + }, + { + "epoch": 0.4469691733480785, + "grad_norm": 0.7894401683798121, + "learning_rate": 1.2185828871886669e-05, + "loss": 0.3966, + "step": 25867 + }, + { + "epoch": 0.4469864528614874, + "grad_norm": 0.9581560876621584, + "learning_rate": 1.2185282752160125e-05, + "loss": 0.3701, + "step": 25868 + }, + { + "epoch": 0.4470037323748963, + "grad_norm": 0.8139686732028217, + "learning_rate": 1.218473662558911e-05, + "loss": 0.3457, + "step": 25869 + }, + { + "epoch": 0.44702101188830523, + "grad_norm": 0.6352961912362932, + "learning_rate": 1.2184190492175328e-05, + "loss": 0.5823, + "step": 25870 + }, + { + "epoch": 0.44703829140171414, + "grad_norm": 0.9601258949552601, + "learning_rate": 1.2183644351920502e-05, + "loss": 0.5429, + "step": 25871 + }, + { + "epoch": 0.44705557091512305, + "grad_norm": 0.4830812186349211, + "learning_rate": 1.218309820482633e-05, + "loss": 0.9211, + "step": 25872 + }, + { + "epoch": 0.44707285042853195, + "grad_norm": 0.8109642874900043, + "learning_rate": 1.2182552050894533e-05, + "loss": 0.4671, + "step": 25873 + }, + { + "epoch": 0.44709012994194086, + "grad_norm": 0.9897262695474262, + "learning_rate": 1.2182005890126817e-05, + "loss": 0.4235, + "step": 25874 + }, + { + "epoch": 0.4471074094553497, + "grad_norm": 0.8948646255879115, + "learning_rate": 1.218145972252489e-05, + "loss": 0.6114, + "step": 25875 + }, + { + "epoch": 0.4471246889687586, + "grad_norm": 0.9830100902938432, + "learning_rate": 1.2180913548090468e-05, + "loss": 0.2797, + "step": 25876 + }, + { + "epoch": 0.44714196848216753, + "grad_norm": 0.7599720816692407, + "learning_rate": 1.2180367366825259e-05, + "loss": 0.4408, + "step": 25877 + }, + { + "epoch": 0.44715924799557644, + "grad_norm": 1.1288530598761974, + "learning_rate": 1.2179821178730968e-05, + "loss": 0.3096, + "step": 25878 + }, + { + "epoch": 0.44717652750898534, + "grad_norm": 1.0289101129261058, + "learning_rate": 1.2179274983809318e-05, + "loss": 0.5641, + "step": 25879 + }, + { + "epoch": 0.44719380702239425, + "grad_norm": 1.9517421935385764, + "learning_rate": 1.217872878206201e-05, + "loss": 0.4159, + "step": 25880 + }, + { + "epoch": 0.44721108653580316, + "grad_norm": 0.681036149287019, + "learning_rate": 1.217818257349076e-05, + "loss": 0.4121, + "step": 25881 + }, + { + "epoch": 0.44722836604921207, + "grad_norm": 0.943098533384753, + "learning_rate": 1.2177636358097278e-05, + "loss": 0.3582, + "step": 25882 + }, + { + "epoch": 0.447245645562621, + "grad_norm": 1.3047260821009445, + "learning_rate": 1.217709013588327e-05, + "loss": 0.4281, + "step": 25883 + }, + { + "epoch": 0.4472629250760299, + "grad_norm": 0.9790034256576221, + "learning_rate": 1.2176543906850448e-05, + "loss": 0.4097, + "step": 25884 + }, + { + "epoch": 0.44728020458943873, + "grad_norm": 0.9311430758380507, + "learning_rate": 1.217599767100053e-05, + "loss": 0.4502, + "step": 25885 + }, + { + "epoch": 0.44729748410284764, + "grad_norm": 1.0899167023069551, + "learning_rate": 1.217545142833522e-05, + "loss": 0.4866, + "step": 25886 + }, + { + "epoch": 0.44731476361625655, + "grad_norm": 1.0323692573529066, + "learning_rate": 1.2174905178856231e-05, + "loss": 0.3752, + "step": 25887 + }, + { + "epoch": 0.44733204312966546, + "grad_norm": 1.4223627910907857, + "learning_rate": 1.2174358922565274e-05, + "loss": 0.3916, + "step": 25888 + }, + { + "epoch": 0.44734932264307437, + "grad_norm": 0.7382978588726797, + "learning_rate": 1.2173812659464059e-05, + "loss": 0.2698, + "step": 25889 + }, + { + "epoch": 0.4473666021564833, + "grad_norm": 0.9757567709584304, + "learning_rate": 1.2173266389554297e-05, + "loss": 0.5016, + "step": 25890 + }, + { + "epoch": 0.4473838816698922, + "grad_norm": 1.1243899526027714, + "learning_rate": 1.2172720112837702e-05, + "loss": 0.4179, + "step": 25891 + }, + { + "epoch": 0.4474011611833011, + "grad_norm": 1.4261346153430958, + "learning_rate": 1.217217382931598e-05, + "loss": 0.4074, + "step": 25892 + }, + { + "epoch": 0.44741844069671, + "grad_norm": 0.5571425509534857, + "learning_rate": 1.2171627538990847e-05, + "loss": 0.7342, + "step": 25893 + }, + { + "epoch": 0.4474357202101189, + "grad_norm": 0.7529427122382638, + "learning_rate": 1.217108124186401e-05, + "loss": 0.4125, + "step": 25894 + }, + { + "epoch": 0.4474529997235278, + "grad_norm": 1.2692026987996214, + "learning_rate": 1.217053493793718e-05, + "loss": 0.5732, + "step": 25895 + }, + { + "epoch": 0.44747027923693666, + "grad_norm": 0.815165387035039, + "learning_rate": 1.216998862721207e-05, + "loss": 0.5269, + "step": 25896 + }, + { + "epoch": 0.44748755875034557, + "grad_norm": 0.9880959219400611, + "learning_rate": 1.2169442309690393e-05, + "loss": 0.6225, + "step": 25897 + }, + { + "epoch": 0.4475048382637545, + "grad_norm": 1.08328183136758, + "learning_rate": 1.2168895985373856e-05, + "loss": 0.4479, + "step": 25898 + }, + { + "epoch": 0.4475221177771634, + "grad_norm": 1.0609833515779672, + "learning_rate": 1.2168349654264173e-05, + "loss": 0.4942, + "step": 25899 + }, + { + "epoch": 0.4475393972905723, + "grad_norm": 1.4949398234308942, + "learning_rate": 1.2167803316363054e-05, + "loss": 0.5567, + "step": 25900 + }, + { + "epoch": 0.4475566768039812, + "grad_norm": 0.8505868743098551, + "learning_rate": 1.2167256971672208e-05, + "loss": 0.4508, + "step": 25901 + }, + { + "epoch": 0.4475739563173901, + "grad_norm": 1.0060509876148822, + "learning_rate": 1.2166710620193354e-05, + "loss": 0.5037, + "step": 25902 + }, + { + "epoch": 0.447591235830799, + "grad_norm": 0.9627623332759717, + "learning_rate": 1.2166164261928193e-05, + "loss": 0.4709, + "step": 25903 + }, + { + "epoch": 0.4476085153442079, + "grad_norm": 0.5164098173778003, + "learning_rate": 1.2165617896878442e-05, + "loss": 0.2219, + "step": 25904 + }, + { + "epoch": 0.44762579485761683, + "grad_norm": 0.8182339800623702, + "learning_rate": 1.216507152504581e-05, + "loss": 0.3791, + "step": 25905 + }, + { + "epoch": 0.4476430743710257, + "grad_norm": 1.2889317452706988, + "learning_rate": 1.2164525146432012e-05, + "loss": 0.4555, + "step": 25906 + }, + { + "epoch": 0.4476603538844346, + "grad_norm": 1.2878348766624144, + "learning_rate": 1.2163978761038753e-05, + "loss": 0.35, + "step": 25907 + }, + { + "epoch": 0.4476776333978435, + "grad_norm": 1.038796501185488, + "learning_rate": 1.2163432368867751e-05, + "loss": 0.5345, + "step": 25908 + }, + { + "epoch": 0.4476949129112524, + "grad_norm": 1.0416736942990112, + "learning_rate": 1.2162885969920715e-05, + "loss": 0.6391, + "step": 25909 + }, + { + "epoch": 0.4477121924246613, + "grad_norm": 1.2101314452600722, + "learning_rate": 1.2162339564199353e-05, + "loss": 0.4732, + "step": 25910 + }, + { + "epoch": 0.4477294719380702, + "grad_norm": 1.3748241575321165, + "learning_rate": 1.2161793151705382e-05, + "loss": 0.4118, + "step": 25911 + }, + { + "epoch": 0.44774675145147913, + "grad_norm": 1.1472680285737897, + "learning_rate": 1.2161246732440508e-05, + "loss": 0.3612, + "step": 25912 + }, + { + "epoch": 0.44776403096488804, + "grad_norm": 0.5087943716571329, + "learning_rate": 1.2160700306406446e-05, + "loss": 0.4806, + "step": 25913 + }, + { + "epoch": 0.44778131047829695, + "grad_norm": 0.7248429833307362, + "learning_rate": 1.2160153873604907e-05, + "loss": 0.3767, + "step": 25914 + }, + { + "epoch": 0.44779858999170585, + "grad_norm": 0.9118035413045716, + "learning_rate": 1.21596074340376e-05, + "loss": 0.5558, + "step": 25915 + }, + { + "epoch": 0.44781586950511476, + "grad_norm": 0.8580885914595894, + "learning_rate": 1.2159060987706242e-05, + "loss": 0.495, + "step": 25916 + }, + { + "epoch": 0.4478331490185236, + "grad_norm": 1.3824140863393781, + "learning_rate": 1.2158514534612538e-05, + "loss": 0.4219, + "step": 25917 + }, + { + "epoch": 0.4478504285319325, + "grad_norm": 0.9890304566628916, + "learning_rate": 1.2157968074758202e-05, + "loss": 0.3831, + "step": 25918 + }, + { + "epoch": 0.44786770804534143, + "grad_norm": 0.9721746636382766, + "learning_rate": 1.2157421608144949e-05, + "loss": 0.4695, + "step": 25919 + }, + { + "epoch": 0.44788498755875034, + "grad_norm": 0.8265610652146311, + "learning_rate": 1.2156875134774486e-05, + "loss": 0.3874, + "step": 25920 + }, + { + "epoch": 0.44790226707215924, + "grad_norm": 0.8314887003078124, + "learning_rate": 1.2156328654648523e-05, + "loss": 0.4591, + "step": 25921 + }, + { + "epoch": 0.44791954658556815, + "grad_norm": 1.087442352746517, + "learning_rate": 1.2155782167768779e-05, + "loss": 0.3354, + "step": 25922 + }, + { + "epoch": 0.44793682609897706, + "grad_norm": 1.0095222155221693, + "learning_rate": 1.2155235674136962e-05, + "loss": 0.4327, + "step": 25923 + }, + { + "epoch": 0.44795410561238597, + "grad_norm": 0.3918690600100197, + "learning_rate": 1.215468917375478e-05, + "loss": 0.5399, + "step": 25924 + }, + { + "epoch": 0.4479713851257949, + "grad_norm": 1.0747527049403982, + "learning_rate": 1.215414266662395e-05, + "loss": 0.5288, + "step": 25925 + }, + { + "epoch": 0.4479886646392038, + "grad_norm": 0.42381943891210516, + "learning_rate": 1.215359615274618e-05, + "loss": 0.4302, + "step": 25926 + }, + { + "epoch": 0.4480059441526127, + "grad_norm": 1.1113363259155993, + "learning_rate": 1.2153049632123181e-05, + "loss": 0.5131, + "step": 25927 + }, + { + "epoch": 0.44802322366602154, + "grad_norm": 0.8081868875867364, + "learning_rate": 1.2152503104756672e-05, + "loss": 0.4238, + "step": 25928 + }, + { + "epoch": 0.44804050317943045, + "grad_norm": 0.7204776164898841, + "learning_rate": 1.2151956570648358e-05, + "loss": 0.5499, + "step": 25929 + }, + { + "epoch": 0.44805778269283936, + "grad_norm": 1.2173404521956386, + "learning_rate": 1.215141002979995e-05, + "loss": 0.4437, + "step": 25930 + }, + { + "epoch": 0.44807506220624826, + "grad_norm": 0.8605913473215971, + "learning_rate": 1.2150863482213165e-05, + "loss": 0.3138, + "step": 25931 + }, + { + "epoch": 0.4480923417196572, + "grad_norm": 0.9984053883738714, + "learning_rate": 1.2150316927889711e-05, + "loss": 0.5961, + "step": 25932 + }, + { + "epoch": 0.4481096212330661, + "grad_norm": 0.8258546020204057, + "learning_rate": 1.2149770366831302e-05, + "loss": 0.3346, + "step": 25933 + }, + { + "epoch": 0.448126900746475, + "grad_norm": 1.0722968891216387, + "learning_rate": 1.2149223799039649e-05, + "loss": 0.467, + "step": 25934 + }, + { + "epoch": 0.4481441802598839, + "grad_norm": 1.0720818584426441, + "learning_rate": 1.214867722451646e-05, + "loss": 0.5926, + "step": 25935 + }, + { + "epoch": 0.4481614597732928, + "grad_norm": 0.8945436767311209, + "learning_rate": 1.2148130643263454e-05, + "loss": 0.3774, + "step": 25936 + }, + { + "epoch": 0.4481787392867017, + "grad_norm": 0.9246821559313729, + "learning_rate": 1.214758405528234e-05, + "loss": 0.3943, + "step": 25937 + }, + { + "epoch": 0.44819601880011056, + "grad_norm": 1.1091635370732922, + "learning_rate": 1.2147037460574826e-05, + "loss": 0.5182, + "step": 25938 + }, + { + "epoch": 0.44821329831351947, + "grad_norm": 1.0903253083402946, + "learning_rate": 1.214649085914263e-05, + "loss": 0.3395, + "step": 25939 + }, + { + "epoch": 0.4482305778269284, + "grad_norm": 0.7540289671616841, + "learning_rate": 1.2145944250987462e-05, + "loss": 0.3956, + "step": 25940 + }, + { + "epoch": 0.4482478573403373, + "grad_norm": 1.1446427744595096, + "learning_rate": 1.2145397636111031e-05, + "loss": 0.4956, + "step": 25941 + }, + { + "epoch": 0.4482651368537462, + "grad_norm": 0.7616682251645476, + "learning_rate": 1.2144851014515055e-05, + "loss": 0.3184, + "step": 25942 + }, + { + "epoch": 0.4482824163671551, + "grad_norm": 1.2425474356299318, + "learning_rate": 1.2144304386201244e-05, + "loss": 0.5427, + "step": 25943 + }, + { + "epoch": 0.448299695880564, + "grad_norm": 3.004922842448046, + "learning_rate": 1.2143757751171304e-05, + "loss": 0.5421, + "step": 25944 + }, + { + "epoch": 0.4483169753939729, + "grad_norm": 1.13741439896733, + "learning_rate": 1.2143211109426954e-05, + "loss": 0.518, + "step": 25945 + }, + { + "epoch": 0.4483342549073818, + "grad_norm": 1.924798096184863, + "learning_rate": 1.2142664460969904e-05, + "loss": 0.4688, + "step": 25946 + }, + { + "epoch": 0.44835153442079073, + "grad_norm": 0.6466324847744609, + "learning_rate": 1.2142117805801865e-05, + "loss": 0.6296, + "step": 25947 + }, + { + "epoch": 0.44836881393419964, + "grad_norm": 0.8911241893398585, + "learning_rate": 1.2141571143924552e-05, + "loss": 0.6063, + "step": 25948 + }, + { + "epoch": 0.4483860934476085, + "grad_norm": 0.8797122263346497, + "learning_rate": 1.2141024475339674e-05, + "loss": 0.4255, + "step": 25949 + }, + { + "epoch": 0.4484033729610174, + "grad_norm": 0.8409926394365561, + "learning_rate": 1.2140477800048942e-05, + "loss": 0.4062, + "step": 25950 + }, + { + "epoch": 0.4484206524744263, + "grad_norm": 0.9524240106812882, + "learning_rate": 1.2139931118054075e-05, + "loss": 0.4555, + "step": 25951 + }, + { + "epoch": 0.4484379319878352, + "grad_norm": 1.0452073948539367, + "learning_rate": 1.213938442935678e-05, + "loss": 0.4515, + "step": 25952 + }, + { + "epoch": 0.4484552115012441, + "grad_norm": 0.7640396239594004, + "learning_rate": 1.2138837733958768e-05, + "loss": 0.7753, + "step": 25953 + }, + { + "epoch": 0.44847249101465303, + "grad_norm": 1.134015459863512, + "learning_rate": 1.213829103186176e-05, + "loss": 0.3707, + "step": 25954 + }, + { + "epoch": 0.44848977052806194, + "grad_norm": 1.159582276909095, + "learning_rate": 1.2137744323067456e-05, + "loss": 0.3931, + "step": 25955 + }, + { + "epoch": 0.44850705004147084, + "grad_norm": 0.7925470704882025, + "learning_rate": 1.2137197607577575e-05, + "loss": 0.471, + "step": 25956 + }, + { + "epoch": 0.44852432955487975, + "grad_norm": 1.1869685276469342, + "learning_rate": 1.2136650885393833e-05, + "loss": 0.4445, + "step": 25957 + }, + { + "epoch": 0.44854160906828866, + "grad_norm": 1.1753151870741654, + "learning_rate": 1.2136104156517931e-05, + "loss": 0.3684, + "step": 25958 + }, + { + "epoch": 0.4485588885816975, + "grad_norm": 0.9240738223461116, + "learning_rate": 1.2135557420951594e-05, + "loss": 0.3479, + "step": 25959 + }, + { + "epoch": 0.4485761680951064, + "grad_norm": 1.4846301583074297, + "learning_rate": 1.2135010678696529e-05, + "loss": 0.4657, + "step": 25960 + }, + { + "epoch": 0.4485934476085153, + "grad_norm": 1.0584600828624344, + "learning_rate": 1.2134463929754443e-05, + "loss": 0.5076, + "step": 25961 + }, + { + "epoch": 0.44861072712192424, + "grad_norm": 1.1271105120535816, + "learning_rate": 1.2133917174127059e-05, + "loss": 0.5665, + "step": 25962 + }, + { + "epoch": 0.44862800663533314, + "grad_norm": 1.0638099808819892, + "learning_rate": 1.2133370411816083e-05, + "loss": 0.3546, + "step": 25963 + }, + { + "epoch": 0.44864528614874205, + "grad_norm": 1.0457859632589042, + "learning_rate": 1.2132823642823229e-05, + "loss": 0.6527, + "step": 25964 + }, + { + "epoch": 0.44866256566215096, + "grad_norm": 0.8211639419689649, + "learning_rate": 1.2132276867150209e-05, + "loss": 0.313, + "step": 25965 + }, + { + "epoch": 0.44867984517555987, + "grad_norm": 0.9762135209074948, + "learning_rate": 1.2131730084798737e-05, + "loss": 0.3392, + "step": 25966 + }, + { + "epoch": 0.4486971246889688, + "grad_norm": 0.986501076740018, + "learning_rate": 1.2131183295770519e-05, + "loss": 0.3029, + "step": 25967 + }, + { + "epoch": 0.4487144042023777, + "grad_norm": 0.9419239076044752, + "learning_rate": 1.2130636500067277e-05, + "loss": 0.3481, + "step": 25968 + }, + { + "epoch": 0.4487316837157866, + "grad_norm": 0.4090754945776587, + "learning_rate": 1.2130089697690721e-05, + "loss": 0.5251, + "step": 25969 + }, + { + "epoch": 0.44874896322919544, + "grad_norm": 1.375859865662666, + "learning_rate": 1.2129542888642558e-05, + "loss": 0.3918, + "step": 25970 + }, + { + "epoch": 0.44876624274260435, + "grad_norm": 0.9240330607996632, + "learning_rate": 1.2128996072924508e-05, + "loss": 0.4252, + "step": 25971 + }, + { + "epoch": 0.44878352225601326, + "grad_norm": 0.8577642825193879, + "learning_rate": 1.2128449250538281e-05, + "loss": 0.469, + "step": 25972 + }, + { + "epoch": 0.44880080176942216, + "grad_norm": 0.8241679061474401, + "learning_rate": 1.2127902421485587e-05, + "loss": 0.4277, + "step": 25973 + }, + { + "epoch": 0.44881808128283107, + "grad_norm": 0.5887309623628437, + "learning_rate": 1.2127355585768144e-05, + "loss": 0.3472, + "step": 25974 + }, + { + "epoch": 0.44883536079624, + "grad_norm": 0.8682873022627052, + "learning_rate": 1.2126808743387657e-05, + "loss": 0.4333, + "step": 25975 + }, + { + "epoch": 0.4488526403096489, + "grad_norm": 0.8535947726639078, + "learning_rate": 1.2126261894345847e-05, + "loss": 0.2986, + "step": 25976 + }, + { + "epoch": 0.4488699198230578, + "grad_norm": 1.132346314879169, + "learning_rate": 1.2125715038644424e-05, + "loss": 0.5556, + "step": 25977 + }, + { + "epoch": 0.4488871993364667, + "grad_norm": 1.0379058523193534, + "learning_rate": 1.2125168176285094e-05, + "loss": 0.4437, + "step": 25978 + }, + { + "epoch": 0.4489044788498756, + "grad_norm": 1.4219357331254834, + "learning_rate": 1.2124621307269582e-05, + "loss": 0.4686, + "step": 25979 + }, + { + "epoch": 0.4489217583632845, + "grad_norm": 1.403764131229171, + "learning_rate": 1.212407443159959e-05, + "loss": 0.3259, + "step": 25980 + }, + { + "epoch": 0.44893903787669337, + "grad_norm": 1.3072023036376692, + "learning_rate": 1.2123527549276837e-05, + "loss": 0.3473, + "step": 25981 + }, + { + "epoch": 0.4489563173901023, + "grad_norm": 1.5146517847110925, + "learning_rate": 1.2122980660303036e-05, + "loss": 0.4754, + "step": 25982 + }, + { + "epoch": 0.4489735969035112, + "grad_norm": 1.2286869312275681, + "learning_rate": 1.2122433764679898e-05, + "loss": 0.4168, + "step": 25983 + }, + { + "epoch": 0.4489908764169201, + "grad_norm": 1.271604548828188, + "learning_rate": 1.2121886862409136e-05, + "loss": 0.7347, + "step": 25984 + }, + { + "epoch": 0.449008155930329, + "grad_norm": 1.2081503046926114, + "learning_rate": 1.2121339953492463e-05, + "loss": 0.4098, + "step": 25985 + }, + { + "epoch": 0.4490254354437379, + "grad_norm": 0.824765525687901, + "learning_rate": 1.2120793037931592e-05, + "loss": 0.4269, + "step": 25986 + }, + { + "epoch": 0.4490427149571468, + "grad_norm": 0.8648298169457102, + "learning_rate": 1.2120246115728235e-05, + "loss": 0.5771, + "step": 25987 + }, + { + "epoch": 0.4490599944705557, + "grad_norm": 0.6833231003162344, + "learning_rate": 1.2119699186884105e-05, + "loss": 0.2339, + "step": 25988 + }, + { + "epoch": 0.44907727398396463, + "grad_norm": 1.3088382532195868, + "learning_rate": 1.2119152251400922e-05, + "loss": 0.487, + "step": 25989 + }, + { + "epoch": 0.44909455349737354, + "grad_norm": 1.0125873738269224, + "learning_rate": 1.2118605309280384e-05, + "loss": 0.2793, + "step": 25990 + }, + { + "epoch": 0.4491118330107824, + "grad_norm": 0.9147200203921528, + "learning_rate": 1.2118058360524221e-05, + "loss": 0.2811, + "step": 25991 + }, + { + "epoch": 0.4491291125241913, + "grad_norm": 0.7072280944393076, + "learning_rate": 1.2117511405134135e-05, + "loss": 0.4237, + "step": 25992 + }, + { + "epoch": 0.4491463920376002, + "grad_norm": 1.2073688027805902, + "learning_rate": 1.211696444311184e-05, + "loss": 0.5353, + "step": 25993 + }, + { + "epoch": 0.4491636715510091, + "grad_norm": 0.8628132088210017, + "learning_rate": 1.2116417474459054e-05, + "loss": 0.5365, + "step": 25994 + }, + { + "epoch": 0.449180951064418, + "grad_norm": 0.7350642133915146, + "learning_rate": 1.2115870499177488e-05, + "loss": 0.4661, + "step": 25995 + }, + { + "epoch": 0.44919823057782693, + "grad_norm": 0.9334917468540312, + "learning_rate": 1.2115323517268857e-05, + "loss": 0.3666, + "step": 25996 + }, + { + "epoch": 0.44921551009123584, + "grad_norm": 0.8998204441359824, + "learning_rate": 1.2114776528734871e-05, + "loss": 0.5218, + "step": 25997 + }, + { + "epoch": 0.44923278960464474, + "grad_norm": 0.619039191836876, + "learning_rate": 1.211422953357724e-05, + "loss": 0.4568, + "step": 25998 + }, + { + "epoch": 0.44925006911805365, + "grad_norm": 1.0374715449321967, + "learning_rate": 1.2113682531797686e-05, + "loss": 0.4557, + "step": 25999 + }, + { + "epoch": 0.44926734863146256, + "grad_norm": 1.1828764506015408, + "learning_rate": 1.2113135523397914e-05, + "loss": 0.5811, + "step": 26000 + }, + { + "epoch": 0.44928462814487147, + "grad_norm": 0.8099555262149853, + "learning_rate": 1.2112588508379641e-05, + "loss": 0.288, + "step": 26001 + }, + { + "epoch": 0.4493019076582803, + "grad_norm": 0.9391366607619626, + "learning_rate": 1.2112041486744584e-05, + "loss": 0.7155, + "step": 26002 + }, + { + "epoch": 0.4493191871716892, + "grad_norm": 1.1110428687025073, + "learning_rate": 1.2111494458494451e-05, + "loss": 0.6144, + "step": 26003 + }, + { + "epoch": 0.44933646668509813, + "grad_norm": 0.767060720728318, + "learning_rate": 1.2110947423630955e-05, + "loss": 0.3476, + "step": 26004 + }, + { + "epoch": 0.44935374619850704, + "grad_norm": 1.3861737186061498, + "learning_rate": 1.2110400382155816e-05, + "loss": 0.4688, + "step": 26005 + }, + { + "epoch": 0.44937102571191595, + "grad_norm": 1.4390862147670709, + "learning_rate": 1.2109853334070738e-05, + "loss": 0.5371, + "step": 26006 + }, + { + "epoch": 0.44938830522532486, + "grad_norm": 0.8418990613814835, + "learning_rate": 1.210930627937744e-05, + "loss": 0.4375, + "step": 26007 + }, + { + "epoch": 0.44940558473873377, + "grad_norm": 1.0885503475280662, + "learning_rate": 1.2108759218077634e-05, + "loss": 0.549, + "step": 26008 + }, + { + "epoch": 0.4494228642521427, + "grad_norm": 0.49963823052695355, + "learning_rate": 1.2108212150173036e-05, + "loss": 0.5504, + "step": 26009 + }, + { + "epoch": 0.4494401437655516, + "grad_norm": 0.8848843250322462, + "learning_rate": 1.2107665075665353e-05, + "loss": 0.4596, + "step": 26010 + }, + { + "epoch": 0.4494574232789605, + "grad_norm": 1.0278197682751375, + "learning_rate": 1.2107117994556308e-05, + "loss": 0.3548, + "step": 26011 + }, + { + "epoch": 0.44947470279236934, + "grad_norm": 1.29089244780133, + "learning_rate": 1.2106570906847608e-05, + "loss": 0.5558, + "step": 26012 + }, + { + "epoch": 0.44949198230577825, + "grad_norm": 1.6218396127181571, + "learning_rate": 1.2106023812540966e-05, + "loss": 0.4811, + "step": 26013 + }, + { + "epoch": 0.44950926181918716, + "grad_norm": 0.7500279039770982, + "learning_rate": 1.2105476711638097e-05, + "loss": 0.3195, + "step": 26014 + }, + { + "epoch": 0.44952654133259606, + "grad_norm": 1.0265312299600087, + "learning_rate": 1.2104929604140718e-05, + "loss": 0.5269, + "step": 26015 + }, + { + "epoch": 0.44954382084600497, + "grad_norm": 0.8715472992871358, + "learning_rate": 1.2104382490050537e-05, + "loss": 0.3447, + "step": 26016 + }, + { + "epoch": 0.4495611003594139, + "grad_norm": 0.9185298318785866, + "learning_rate": 1.2103835369369272e-05, + "loss": 0.5842, + "step": 26017 + }, + { + "epoch": 0.4495783798728228, + "grad_norm": 1.3027728255530884, + "learning_rate": 1.2103288242098632e-05, + "loss": 0.4214, + "step": 26018 + }, + { + "epoch": 0.4495956593862317, + "grad_norm": 1.2393557902548016, + "learning_rate": 1.2102741108240335e-05, + "loss": 0.4676, + "step": 26019 + }, + { + "epoch": 0.4496129388996406, + "grad_norm": 0.6731634232530795, + "learning_rate": 1.2102193967796095e-05, + "loss": 0.3244, + "step": 26020 + }, + { + "epoch": 0.4496302184130495, + "grad_norm": 0.4527342512094912, + "learning_rate": 1.2101646820767618e-05, + "loss": 0.6692, + "step": 26021 + }, + { + "epoch": 0.4496474979264584, + "grad_norm": 0.8039650923148126, + "learning_rate": 1.2101099667156628e-05, + "loss": 0.3896, + "step": 26022 + }, + { + "epoch": 0.44966477743986727, + "grad_norm": 0.8172224051846525, + "learning_rate": 1.2100552506964832e-05, + "loss": 0.3774, + "step": 26023 + }, + { + "epoch": 0.4496820569532762, + "grad_norm": 0.8350218135986046, + "learning_rate": 1.2100005340193947e-05, + "loss": 0.3605, + "step": 26024 + }, + { + "epoch": 0.4496993364666851, + "grad_norm": 0.7057809249062822, + "learning_rate": 1.2099458166845687e-05, + "loss": 0.4408, + "step": 26025 + }, + { + "epoch": 0.449716615980094, + "grad_norm": 0.7421679178962713, + "learning_rate": 1.2098910986921766e-05, + "loss": 0.542, + "step": 26026 + }, + { + "epoch": 0.4497338954935029, + "grad_norm": 1.205146992574336, + "learning_rate": 1.2098363800423893e-05, + "loss": 0.4654, + "step": 26027 + }, + { + "epoch": 0.4497511750069118, + "grad_norm": 0.8810965073021568, + "learning_rate": 1.2097816607353786e-05, + "loss": 0.246, + "step": 26028 + }, + { + "epoch": 0.4497684545203207, + "grad_norm": 0.7099690192409802, + "learning_rate": 1.2097269407713157e-05, + "loss": 0.4524, + "step": 26029 + }, + { + "epoch": 0.4497857340337296, + "grad_norm": 0.5093260510451674, + "learning_rate": 1.2096722201503721e-05, + "loss": 0.8035, + "step": 26030 + }, + { + "epoch": 0.44980301354713853, + "grad_norm": 0.786581765053993, + "learning_rate": 1.2096174988727192e-05, + "loss": 0.5526, + "step": 26031 + }, + { + "epoch": 0.44982029306054744, + "grad_norm": 0.6876234788876066, + "learning_rate": 1.2095627769385285e-05, + "loss": 0.3194, + "step": 26032 + }, + { + "epoch": 0.4498375725739563, + "grad_norm": 0.9197871366761075, + "learning_rate": 1.209508054347971e-05, + "loss": 0.3069, + "step": 26033 + }, + { + "epoch": 0.4498548520873652, + "grad_norm": 0.4718343101593042, + "learning_rate": 1.2094533311012185e-05, + "loss": 0.6596, + "step": 26034 + }, + { + "epoch": 0.4498721316007741, + "grad_norm": 1.170696283298319, + "learning_rate": 1.2093986071984422e-05, + "loss": 0.3799, + "step": 26035 + }, + { + "epoch": 0.449889411114183, + "grad_norm": 0.8147012939649227, + "learning_rate": 1.2093438826398138e-05, + "loss": 0.2968, + "step": 26036 + }, + { + "epoch": 0.4499066906275919, + "grad_norm": 0.73016026364525, + "learning_rate": 1.2092891574255044e-05, + "loss": 0.2423, + "step": 26037 + }, + { + "epoch": 0.44992397014100083, + "grad_norm": 0.8945089932791023, + "learning_rate": 1.209234431555685e-05, + "loss": 0.2778, + "step": 26038 + }, + { + "epoch": 0.44994124965440974, + "grad_norm": 0.8898304407335172, + "learning_rate": 1.2091797050305278e-05, + "loss": 0.4423, + "step": 26039 + }, + { + "epoch": 0.44995852916781864, + "grad_norm": 0.7193237612405806, + "learning_rate": 1.2091249778502038e-05, + "loss": 0.455, + "step": 26040 + }, + { + "epoch": 0.44997580868122755, + "grad_norm": 1.3892305243532719, + "learning_rate": 1.2090702500148845e-05, + "loss": 0.4187, + "step": 26041 + }, + { + "epoch": 0.44999308819463646, + "grad_norm": 0.8868184168771276, + "learning_rate": 1.2090155215247413e-05, + "loss": 0.4434, + "step": 26042 + }, + { + "epoch": 0.45001036770804537, + "grad_norm": 1.525127325484595, + "learning_rate": 1.2089607923799456e-05, + "loss": 0.4993, + "step": 26043 + }, + { + "epoch": 0.4500276472214542, + "grad_norm": 0.8433602886971707, + "learning_rate": 1.2089060625806686e-05, + "loss": 0.6534, + "step": 26044 + }, + { + "epoch": 0.4500449267348631, + "grad_norm": 0.7879583037795703, + "learning_rate": 1.2088513321270823e-05, + "loss": 0.5403, + "step": 26045 + }, + { + "epoch": 0.45006220624827203, + "grad_norm": 0.3970085358998033, + "learning_rate": 1.208796601019358e-05, + "loss": 0.5263, + "step": 26046 + }, + { + "epoch": 0.45007948576168094, + "grad_norm": 1.8025870450580779, + "learning_rate": 1.2087418692576662e-05, + "loss": 0.4708, + "step": 26047 + }, + { + "epoch": 0.45009676527508985, + "grad_norm": 0.8495624157718741, + "learning_rate": 1.2086871368421793e-05, + "loss": 0.5986, + "step": 26048 + }, + { + "epoch": 0.45011404478849876, + "grad_norm": 1.2129966458923571, + "learning_rate": 1.2086324037730685e-05, + "loss": 0.341, + "step": 26049 + }, + { + "epoch": 0.45013132430190766, + "grad_norm": 2.0236082680340273, + "learning_rate": 1.208577670050505e-05, + "loss": 0.4442, + "step": 26050 + }, + { + "epoch": 0.4501486038153166, + "grad_norm": 0.9450043234572498, + "learning_rate": 1.2085229356746605e-05, + "loss": 0.4202, + "step": 26051 + }, + { + "epoch": 0.4501658833287255, + "grad_norm": 1.1978812825281344, + "learning_rate": 1.2084682006457063e-05, + "loss": 0.4564, + "step": 26052 + }, + { + "epoch": 0.4501831628421344, + "grad_norm": 0.862058146208477, + "learning_rate": 1.2084134649638136e-05, + "loss": 0.4302, + "step": 26053 + }, + { + "epoch": 0.4502004423555433, + "grad_norm": 0.9186871751278354, + "learning_rate": 1.2083587286291542e-05, + "loss": 0.3476, + "step": 26054 + }, + { + "epoch": 0.45021772186895215, + "grad_norm": 0.7948414112388864, + "learning_rate": 1.2083039916418997e-05, + "loss": 0.4628, + "step": 26055 + }, + { + "epoch": 0.45023500138236106, + "grad_norm": 1.4596836320912592, + "learning_rate": 1.208249254002221e-05, + "loss": 0.4521, + "step": 26056 + }, + { + "epoch": 0.45025228089576996, + "grad_norm": 2.1117165157471502, + "learning_rate": 1.20819451571029e-05, + "loss": 0.7371, + "step": 26057 + }, + { + "epoch": 0.45026956040917887, + "grad_norm": 0.9035671917431823, + "learning_rate": 1.208139776766278e-05, + "loss": 0.566, + "step": 26058 + }, + { + "epoch": 0.4502868399225878, + "grad_norm": 0.6665478893827401, + "learning_rate": 1.2080850371703561e-05, + "loss": 0.4061, + "step": 26059 + }, + { + "epoch": 0.4503041194359967, + "grad_norm": 1.1627686757629314, + "learning_rate": 1.2080302969226963e-05, + "loss": 0.4264, + "step": 26060 + }, + { + "epoch": 0.4503213989494056, + "grad_norm": 0.8172733880695907, + "learning_rate": 1.2079755560234696e-05, + "loss": 0.5338, + "step": 26061 + }, + { + "epoch": 0.4503386784628145, + "grad_norm": 1.0885236743665783, + "learning_rate": 1.2079208144728477e-05, + "loss": 0.394, + "step": 26062 + }, + { + "epoch": 0.4503559579762234, + "grad_norm": 0.6328457395406937, + "learning_rate": 1.2078660722710023e-05, + "loss": 0.306, + "step": 26063 + }, + { + "epoch": 0.4503732374896323, + "grad_norm": 0.877734365914519, + "learning_rate": 1.2078113294181042e-05, + "loss": 0.4372, + "step": 26064 + }, + { + "epoch": 0.45039051700304117, + "grad_norm": 0.9092523824693168, + "learning_rate": 1.2077565859143254e-05, + "loss": 0.369, + "step": 26065 + }, + { + "epoch": 0.4504077965164501, + "grad_norm": 1.0860739119188099, + "learning_rate": 1.2077018417598372e-05, + "loss": 0.5994, + "step": 26066 + }, + { + "epoch": 0.450425076029859, + "grad_norm": 0.9772284373286781, + "learning_rate": 1.2076470969548111e-05, + "loss": 0.4482, + "step": 26067 + }, + { + "epoch": 0.4504423555432679, + "grad_norm": 1.1545342435062844, + "learning_rate": 1.2075923514994184e-05, + "loss": 0.5129, + "step": 26068 + }, + { + "epoch": 0.4504596350566768, + "grad_norm": 1.0586217467568588, + "learning_rate": 1.2075376053938308e-05, + "loss": 0.4707, + "step": 26069 + }, + { + "epoch": 0.4504769145700857, + "grad_norm": 1.0377925689847858, + "learning_rate": 1.2074828586382192e-05, + "loss": 0.529, + "step": 26070 + }, + { + "epoch": 0.4504941940834946, + "grad_norm": 1.1719983059623746, + "learning_rate": 1.207428111232756e-05, + "loss": 0.3924, + "step": 26071 + }, + { + "epoch": 0.4505114735969035, + "grad_norm": 0.8448630677537968, + "learning_rate": 1.2073733631776122e-05, + "loss": 0.4274, + "step": 26072 + }, + { + "epoch": 0.45052875311031243, + "grad_norm": 0.9004870382866844, + "learning_rate": 1.2073186144729587e-05, + "loss": 0.5492, + "step": 26073 + }, + { + "epoch": 0.45054603262372134, + "grad_norm": 0.9993361676875253, + "learning_rate": 1.2072638651189681e-05, + "loss": 0.4396, + "step": 26074 + }, + { + "epoch": 0.45056331213713025, + "grad_norm": 1.0128601896048353, + "learning_rate": 1.2072091151158115e-05, + "loss": 0.4841, + "step": 26075 + }, + { + "epoch": 0.4505805916505391, + "grad_norm": 0.7587658628696553, + "learning_rate": 1.2071543644636593e-05, + "loss": 0.4963, + "step": 26076 + }, + { + "epoch": 0.450597871163948, + "grad_norm": 0.6745537754329456, + "learning_rate": 1.2070996131626849e-05, + "loss": 0.6378, + "step": 26077 + }, + { + "epoch": 0.4506151506773569, + "grad_norm": 1.048274124094977, + "learning_rate": 1.2070448612130582e-05, + "loss": 0.3751, + "step": 26078 + }, + { + "epoch": 0.4506324301907658, + "grad_norm": 0.9937537571361227, + "learning_rate": 1.2069901086149516e-05, + "loss": 0.5064, + "step": 26079 + }, + { + "epoch": 0.45064970970417473, + "grad_norm": 0.8028076119486819, + "learning_rate": 1.2069353553685361e-05, + "loss": 0.2737, + "step": 26080 + }, + { + "epoch": 0.45066698921758364, + "grad_norm": 0.7139361527162578, + "learning_rate": 1.206880601473983e-05, + "loss": 0.3678, + "step": 26081 + }, + { + "epoch": 0.45068426873099254, + "grad_norm": 0.8065088709243323, + "learning_rate": 1.2068258469314645e-05, + "loss": 0.382, + "step": 26082 + }, + { + "epoch": 0.45070154824440145, + "grad_norm": 1.2398101480396893, + "learning_rate": 1.2067710917411518e-05, + "loss": 0.3492, + "step": 26083 + }, + { + "epoch": 0.45071882775781036, + "grad_norm": 1.0062404414101433, + "learning_rate": 1.206716335903216e-05, + "loss": 0.403, + "step": 26084 + }, + { + "epoch": 0.45073610727121927, + "grad_norm": 0.5126040844875109, + "learning_rate": 1.2066615794178291e-05, + "loss": 0.3101, + "step": 26085 + }, + { + "epoch": 0.4507533867846281, + "grad_norm": 0.8989091522214763, + "learning_rate": 1.2066068222851626e-05, + "loss": 0.4732, + "step": 26086 + }, + { + "epoch": 0.450770666298037, + "grad_norm": 0.9593501035043022, + "learning_rate": 1.2065520645053877e-05, + "loss": 0.4524, + "step": 26087 + }, + { + "epoch": 0.45078794581144593, + "grad_norm": 1.0139033856927901, + "learning_rate": 1.206497306078676e-05, + "loss": 0.4088, + "step": 26088 + }, + { + "epoch": 0.45080522532485484, + "grad_norm": 1.1927128864874696, + "learning_rate": 1.206442547005199e-05, + "loss": 0.422, + "step": 26089 + }, + { + "epoch": 0.45082250483826375, + "grad_norm": 0.8750542854210476, + "learning_rate": 1.2063877872851283e-05, + "loss": 0.356, + "step": 26090 + }, + { + "epoch": 0.45083978435167266, + "grad_norm": 0.9204887111439002, + "learning_rate": 1.2063330269186353e-05, + "loss": 0.4123, + "step": 26091 + }, + { + "epoch": 0.45085706386508156, + "grad_norm": 0.9686210290159116, + "learning_rate": 1.2062782659058918e-05, + "loss": 0.5044, + "step": 26092 + }, + { + "epoch": 0.45087434337849047, + "grad_norm": 0.647881816267235, + "learning_rate": 1.2062235042470686e-05, + "loss": 0.3985, + "step": 26093 + }, + { + "epoch": 0.4508916228918994, + "grad_norm": 0.38690709745315766, + "learning_rate": 1.2061687419423383e-05, + "loss": 0.461, + "step": 26094 + }, + { + "epoch": 0.4509089024053083, + "grad_norm": 0.8262927089640308, + "learning_rate": 1.2061139789918717e-05, + "loss": 0.438, + "step": 26095 + }, + { + "epoch": 0.4509261819187172, + "grad_norm": 0.8041108474972222, + "learning_rate": 1.2060592153958402e-05, + "loss": 0.5678, + "step": 26096 + }, + { + "epoch": 0.45094346143212605, + "grad_norm": 1.6062486703183596, + "learning_rate": 1.2060044511544156e-05, + "loss": 0.465, + "step": 26097 + }, + { + "epoch": 0.45096074094553495, + "grad_norm": 1.1239114115993107, + "learning_rate": 1.2059496862677696e-05, + "loss": 0.5327, + "step": 26098 + }, + { + "epoch": 0.45097802045894386, + "grad_norm": 0.5032497361861566, + "learning_rate": 1.2058949207360735e-05, + "loss": 0.604, + "step": 26099 + }, + { + "epoch": 0.45099529997235277, + "grad_norm": 0.7960329685477755, + "learning_rate": 1.205840154559499e-05, + "loss": 0.3209, + "step": 26100 + }, + { + "epoch": 0.4510125794857617, + "grad_norm": 0.9902339677042622, + "learning_rate": 1.205785387738217e-05, + "loss": 0.3089, + "step": 26101 + }, + { + "epoch": 0.4510298589991706, + "grad_norm": 0.7432205650346244, + "learning_rate": 1.2057306202723999e-05, + "loss": 0.5265, + "step": 26102 + }, + { + "epoch": 0.4510471385125795, + "grad_norm": 0.5914880330562302, + "learning_rate": 1.2056758521622189e-05, + "loss": 0.212, + "step": 26103 + }, + { + "epoch": 0.4510644180259884, + "grad_norm": 1.1895831643883574, + "learning_rate": 1.2056210834078453e-05, + "loss": 0.5706, + "step": 26104 + }, + { + "epoch": 0.4510816975393973, + "grad_norm": 1.748315695293527, + "learning_rate": 1.2055663140094509e-05, + "loss": 0.3682, + "step": 26105 + }, + { + "epoch": 0.4510989770528062, + "grad_norm": 1.2997454073055468, + "learning_rate": 1.2055115439672072e-05, + "loss": 0.5507, + "step": 26106 + }, + { + "epoch": 0.45111625656621507, + "grad_norm": 0.7706512156993888, + "learning_rate": 1.2054567732812858e-05, + "loss": 0.4481, + "step": 26107 + }, + { + "epoch": 0.451133536079624, + "grad_norm": 0.9584919513836154, + "learning_rate": 1.2054020019518582e-05, + "loss": 0.5281, + "step": 26108 + }, + { + "epoch": 0.4511508155930329, + "grad_norm": 1.0968742446588913, + "learning_rate": 1.2053472299790958e-05, + "loss": 0.4415, + "step": 26109 + }, + { + "epoch": 0.4511680951064418, + "grad_norm": 1.323459624972759, + "learning_rate": 1.2052924573631701e-05, + "loss": 0.6262, + "step": 26110 + }, + { + "epoch": 0.4511853746198507, + "grad_norm": 0.8137017712429291, + "learning_rate": 1.2052376841042533e-05, + "loss": 0.4137, + "step": 26111 + }, + { + "epoch": 0.4512026541332596, + "grad_norm": 1.0207187576631886, + "learning_rate": 1.205182910202516e-05, + "loss": 0.6164, + "step": 26112 + }, + { + "epoch": 0.4512199336466685, + "grad_norm": 1.7029346456593097, + "learning_rate": 1.2051281356581303e-05, + "loss": 0.5858, + "step": 26113 + }, + { + "epoch": 0.4512372131600774, + "grad_norm": 1.4709766889556382, + "learning_rate": 1.205073360471268e-05, + "loss": 0.4764, + "step": 26114 + }, + { + "epoch": 0.45125449267348633, + "grad_norm": 1.1977567398513713, + "learning_rate": 1.2050185846421002e-05, + "loss": 0.4453, + "step": 26115 + }, + { + "epoch": 0.45127177218689524, + "grad_norm": 0.84958968112891, + "learning_rate": 1.2049638081707984e-05, + "loss": 0.3533, + "step": 26116 + }, + { + "epoch": 0.45128905170030414, + "grad_norm": 0.8967036802314502, + "learning_rate": 1.2049090310575347e-05, + "loss": 0.5172, + "step": 26117 + }, + { + "epoch": 0.451306331213713, + "grad_norm": 0.8023862511956411, + "learning_rate": 1.2048542533024803e-05, + "loss": 0.3144, + "step": 26118 + }, + { + "epoch": 0.4513236107271219, + "grad_norm": 1.1564647573152893, + "learning_rate": 1.2047994749058067e-05, + "loss": 0.5121, + "step": 26119 + }, + { + "epoch": 0.4513408902405308, + "grad_norm": 1.1820303407072605, + "learning_rate": 1.2047446958676857e-05, + "loss": 0.5362, + "step": 26120 + }, + { + "epoch": 0.4513581697539397, + "grad_norm": 0.8875880496135795, + "learning_rate": 1.2046899161882884e-05, + "loss": 0.5733, + "step": 26121 + }, + { + "epoch": 0.4513754492673486, + "grad_norm": 1.327240669639588, + "learning_rate": 1.204635135867787e-05, + "loss": 0.7974, + "step": 26122 + }, + { + "epoch": 0.45139272878075754, + "grad_norm": 0.6035815869462335, + "learning_rate": 1.2045803549063528e-05, + "loss": 0.6125, + "step": 26123 + }, + { + "epoch": 0.45141000829416644, + "grad_norm": 0.801675036658061, + "learning_rate": 1.2045255733041572e-05, + "loss": 0.5244, + "step": 26124 + }, + { + "epoch": 0.45142728780757535, + "grad_norm": 1.0918302886446514, + "learning_rate": 1.2044707910613723e-05, + "loss": 0.438, + "step": 26125 + }, + { + "epoch": 0.45144456732098426, + "grad_norm": 0.8683915354970158, + "learning_rate": 1.2044160081781691e-05, + "loss": 0.4569, + "step": 26126 + }, + { + "epoch": 0.45146184683439317, + "grad_norm": 0.8515755144430411, + "learning_rate": 1.2043612246547197e-05, + "loss": 0.4193, + "step": 26127 + }, + { + "epoch": 0.4514791263478021, + "grad_norm": 1.1411165175377778, + "learning_rate": 1.204306440491195e-05, + "loss": 0.4664, + "step": 26128 + }, + { + "epoch": 0.4514964058612109, + "grad_norm": 0.8254430398306755, + "learning_rate": 1.2042516556877675e-05, + "loss": 0.3393, + "step": 26129 + }, + { + "epoch": 0.45151368537461983, + "grad_norm": 0.8560231487207222, + "learning_rate": 1.2041968702446078e-05, + "loss": 0.3791, + "step": 26130 + }, + { + "epoch": 0.45153096488802874, + "grad_norm": 0.8564150709796672, + "learning_rate": 1.2041420841618883e-05, + "loss": 0.3797, + "step": 26131 + }, + { + "epoch": 0.45154824440143765, + "grad_norm": 0.7340463129093413, + "learning_rate": 1.2040872974397802e-05, + "loss": 0.5164, + "step": 26132 + }, + { + "epoch": 0.45156552391484656, + "grad_norm": 1.1682239982390854, + "learning_rate": 1.204032510078455e-05, + "loss": 0.4868, + "step": 26133 + }, + { + "epoch": 0.45158280342825546, + "grad_norm": 0.7204114321495406, + "learning_rate": 1.2039777220780847e-05, + "loss": 0.3463, + "step": 26134 + }, + { + "epoch": 0.45160008294166437, + "grad_norm": 1.0660522518190183, + "learning_rate": 1.2039229334388405e-05, + "loss": 0.4266, + "step": 26135 + }, + { + "epoch": 0.4516173624550733, + "grad_norm": 0.8873619294939125, + "learning_rate": 1.2038681441608942e-05, + "loss": 0.3452, + "step": 26136 + }, + { + "epoch": 0.4516346419684822, + "grad_norm": 1.1017873931865798, + "learning_rate": 1.2038133542444173e-05, + "loss": 0.4837, + "step": 26137 + }, + { + "epoch": 0.4516519214818911, + "grad_norm": 0.4247951695513658, + "learning_rate": 1.2037585636895817e-05, + "loss": 0.652, + "step": 26138 + }, + { + "epoch": 0.45166920099529995, + "grad_norm": 1.1549658725112812, + "learning_rate": 1.2037037724965587e-05, + "loss": 0.4605, + "step": 26139 + }, + { + "epoch": 0.45168648050870885, + "grad_norm": 4.207895571682488, + "learning_rate": 1.20364898066552e-05, + "loss": 0.4009, + "step": 26140 + }, + { + "epoch": 0.45170376002211776, + "grad_norm": 0.8908040876554065, + "learning_rate": 1.2035941881966373e-05, + "loss": 0.4105, + "step": 26141 + }, + { + "epoch": 0.45172103953552667, + "grad_norm": 0.8494123052019608, + "learning_rate": 1.203539395090082e-05, + "loss": 0.3079, + "step": 26142 + }, + { + "epoch": 0.4517383190489356, + "grad_norm": 0.8739392817811756, + "learning_rate": 1.203484601346026e-05, + "loss": 0.5443, + "step": 26143 + }, + { + "epoch": 0.4517555985623445, + "grad_norm": 0.962236288817078, + "learning_rate": 1.2034298069646404e-05, + "loss": 0.5934, + "step": 26144 + }, + { + "epoch": 0.4517728780757534, + "grad_norm": 1.1718577364279623, + "learning_rate": 1.2033750119460976e-05, + "loss": 0.317, + "step": 26145 + }, + { + "epoch": 0.4517901575891623, + "grad_norm": 1.1494697148447026, + "learning_rate": 1.2033202162905689e-05, + "loss": 0.3868, + "step": 26146 + }, + { + "epoch": 0.4518074371025712, + "grad_norm": 0.6720520843988207, + "learning_rate": 1.2032654199982255e-05, + "loss": 0.4977, + "step": 26147 + }, + { + "epoch": 0.4518247166159801, + "grad_norm": 1.107240700814252, + "learning_rate": 1.2032106230692394e-05, + "loss": 0.4254, + "step": 26148 + }, + { + "epoch": 0.451841996129389, + "grad_norm": 0.9290233381655635, + "learning_rate": 1.2031558255037823e-05, + "loss": 0.5164, + "step": 26149 + }, + { + "epoch": 0.4518592756427979, + "grad_norm": 1.113361912335574, + "learning_rate": 1.2031010273020257e-05, + "loss": 0.706, + "step": 26150 + }, + { + "epoch": 0.4518765551562068, + "grad_norm": 1.0348910701990082, + "learning_rate": 1.2030462284641411e-05, + "loss": 0.5084, + "step": 26151 + }, + { + "epoch": 0.4518938346696157, + "grad_norm": 1.0559989021188578, + "learning_rate": 1.2029914289903004e-05, + "loss": 0.4805, + "step": 26152 + }, + { + "epoch": 0.4519111141830246, + "grad_norm": 0.5817535541979308, + "learning_rate": 1.202936628880675e-05, + "loss": 0.4688, + "step": 26153 + }, + { + "epoch": 0.4519283936964335, + "grad_norm": 0.5653921216513098, + "learning_rate": 1.2028818281354369e-05, + "loss": 0.687, + "step": 26154 + }, + { + "epoch": 0.4519456732098424, + "grad_norm": 0.7750797824900099, + "learning_rate": 1.2028270267547573e-05, + "loss": 0.462, + "step": 26155 + }, + { + "epoch": 0.4519629527232513, + "grad_norm": 0.7328184956756084, + "learning_rate": 1.2027722247388078e-05, + "loss": 0.2951, + "step": 26156 + }, + { + "epoch": 0.45198023223666023, + "grad_norm": 1.0505877485521882, + "learning_rate": 1.2027174220877606e-05, + "loss": 0.3937, + "step": 26157 + }, + { + "epoch": 0.45199751175006914, + "grad_norm": 1.10289997774611, + "learning_rate": 1.202662618801787e-05, + "loss": 0.3703, + "step": 26158 + }, + { + "epoch": 0.45201479126347804, + "grad_norm": 1.0357867901059323, + "learning_rate": 1.202607814881059e-05, + "loss": 0.3809, + "step": 26159 + }, + { + "epoch": 0.4520320707768869, + "grad_norm": 1.3263624248774397, + "learning_rate": 1.2025530103257474e-05, + "loss": 0.3529, + "step": 26160 + }, + { + "epoch": 0.4520493502902958, + "grad_norm": 1.4028130768000895, + "learning_rate": 1.2024982051360246e-05, + "loss": 0.4506, + "step": 26161 + }, + { + "epoch": 0.4520666298037047, + "grad_norm": 0.9405010041112567, + "learning_rate": 1.202443399312062e-05, + "loss": 0.5901, + "step": 26162 + }, + { + "epoch": 0.4520839093171136, + "grad_norm": 0.7280661080391092, + "learning_rate": 1.2023885928540313e-05, + "loss": 0.4208, + "step": 26163 + }, + { + "epoch": 0.4521011888305225, + "grad_norm": 0.8955558032963911, + "learning_rate": 1.202333785762104e-05, + "loss": 0.3648, + "step": 26164 + }, + { + "epoch": 0.45211846834393143, + "grad_norm": 1.314038620312061, + "learning_rate": 1.2022789780364522e-05, + "loss": 0.3565, + "step": 26165 + }, + { + "epoch": 0.45213574785734034, + "grad_norm": 1.311174433372682, + "learning_rate": 1.2022241696772472e-05, + "loss": 0.4075, + "step": 26166 + }, + { + "epoch": 0.45215302737074925, + "grad_norm": 0.9220884243925848, + "learning_rate": 1.2021693606846606e-05, + "loss": 0.4656, + "step": 26167 + }, + { + "epoch": 0.45217030688415816, + "grad_norm": 0.7541906734680723, + "learning_rate": 1.202114551058864e-05, + "loss": 0.6102, + "step": 26168 + }, + { + "epoch": 0.45218758639756707, + "grad_norm": 0.817686911601379, + "learning_rate": 1.2020597408000297e-05, + "loss": 0.5678, + "step": 26169 + }, + { + "epoch": 0.452204865910976, + "grad_norm": 1.2980973242773972, + "learning_rate": 1.2020049299083287e-05, + "loss": 0.6132, + "step": 26170 + }, + { + "epoch": 0.4522221454243848, + "grad_norm": 1.0662629063774909, + "learning_rate": 1.2019501183839331e-05, + "loss": 0.4572, + "step": 26171 + }, + { + "epoch": 0.45223942493779373, + "grad_norm": 2.066783569477278, + "learning_rate": 1.2018953062270141e-05, + "loss": 0.5656, + "step": 26172 + }, + { + "epoch": 0.45225670445120264, + "grad_norm": 0.9032713953534593, + "learning_rate": 1.2018404934377439e-05, + "loss": 0.6944, + "step": 26173 + }, + { + "epoch": 0.45227398396461155, + "grad_norm": 0.940100865044243, + "learning_rate": 1.2017856800162937e-05, + "loss": 0.4822, + "step": 26174 + }, + { + "epoch": 0.45229126347802046, + "grad_norm": 0.6377601107127582, + "learning_rate": 1.2017308659628357e-05, + "loss": 0.3062, + "step": 26175 + }, + { + "epoch": 0.45230854299142936, + "grad_norm": 0.8627258451800156, + "learning_rate": 1.201676051277541e-05, + "loss": 0.2914, + "step": 26176 + }, + { + "epoch": 0.45232582250483827, + "grad_norm": 1.226348537618637, + "learning_rate": 1.2016212359605816e-05, + "loss": 0.3895, + "step": 26177 + }, + { + "epoch": 0.4523431020182472, + "grad_norm": 0.7709339695866804, + "learning_rate": 1.2015664200121296e-05, + "loss": 0.3211, + "step": 26178 + }, + { + "epoch": 0.4523603815316561, + "grad_norm": 0.760875952309028, + "learning_rate": 1.2015116034323557e-05, + "loss": 0.3692, + "step": 26179 + }, + { + "epoch": 0.452377661045065, + "grad_norm": 0.7556508210898589, + "learning_rate": 1.2014567862214325e-05, + "loss": 0.3958, + "step": 26180 + }, + { + "epoch": 0.45239494055847385, + "grad_norm": 1.1937690667076997, + "learning_rate": 1.2014019683795313e-05, + "loss": 0.6685, + "step": 26181 + }, + { + "epoch": 0.45241222007188275, + "grad_norm": 0.9447931603787468, + "learning_rate": 1.2013471499068236e-05, + "loss": 0.519, + "step": 26182 + }, + { + "epoch": 0.45242949958529166, + "grad_norm": 0.9699917881568477, + "learning_rate": 1.2012923308034817e-05, + "loss": 0.4999, + "step": 26183 + }, + { + "epoch": 0.45244677909870057, + "grad_norm": 0.8700955677403236, + "learning_rate": 1.2012375110696763e-05, + "loss": 0.3259, + "step": 26184 + }, + { + "epoch": 0.4524640586121095, + "grad_norm": 1.0962430062158668, + "learning_rate": 1.2011826907055803e-05, + "loss": 0.5235, + "step": 26185 + }, + { + "epoch": 0.4524813381255184, + "grad_norm": 0.9354591197551338, + "learning_rate": 1.2011278697113646e-05, + "loss": 0.659, + "step": 26186 + }, + { + "epoch": 0.4524986176389273, + "grad_norm": 1.022298491129549, + "learning_rate": 1.2010730480872012e-05, + "loss": 0.5464, + "step": 26187 + }, + { + "epoch": 0.4525158971523362, + "grad_norm": 0.7068307683842606, + "learning_rate": 1.2010182258332616e-05, + "loss": 0.291, + "step": 26188 + }, + { + "epoch": 0.4525331766657451, + "grad_norm": 0.8424457727751088, + "learning_rate": 1.200963402949718e-05, + "loss": 0.4483, + "step": 26189 + }, + { + "epoch": 0.452550456179154, + "grad_norm": 0.7235109007581111, + "learning_rate": 1.2009085794367414e-05, + "loss": 0.3843, + "step": 26190 + }, + { + "epoch": 0.4525677356925629, + "grad_norm": 1.0488291852130953, + "learning_rate": 1.200853755294504e-05, + "loss": 0.5112, + "step": 26191 + }, + { + "epoch": 0.4525850152059718, + "grad_norm": 1.0448498999077525, + "learning_rate": 1.2007989305231776e-05, + "loss": 0.4623, + "step": 26192 + }, + { + "epoch": 0.4526022947193807, + "grad_norm": 0.9052440312383824, + "learning_rate": 1.2007441051229332e-05, + "loss": 0.4922, + "step": 26193 + }, + { + "epoch": 0.4526195742327896, + "grad_norm": 0.8922250996877118, + "learning_rate": 1.200689279093943e-05, + "loss": 0.378, + "step": 26194 + }, + { + "epoch": 0.4526368537461985, + "grad_norm": 1.3785492053041735, + "learning_rate": 1.200634452436379e-05, + "loss": 0.3903, + "step": 26195 + }, + { + "epoch": 0.4526541332596074, + "grad_norm": 0.785904081264475, + "learning_rate": 1.2005796251504125e-05, + "loss": 0.4254, + "step": 26196 + }, + { + "epoch": 0.4526714127730163, + "grad_norm": 0.9080192193936101, + "learning_rate": 1.2005247972362155e-05, + "loss": 0.5701, + "step": 26197 + }, + { + "epoch": 0.4526886922864252, + "grad_norm": 1.3174158439846617, + "learning_rate": 1.2004699686939598e-05, + "loss": 0.5584, + "step": 26198 + }, + { + "epoch": 0.45270597179983413, + "grad_norm": 0.8939842067761753, + "learning_rate": 1.2004151395238164e-05, + "loss": 0.278, + "step": 26199 + }, + { + "epoch": 0.45272325131324304, + "grad_norm": 0.9252661334239979, + "learning_rate": 1.200360309725958e-05, + "loss": 0.4874, + "step": 26200 + }, + { + "epoch": 0.45274053082665194, + "grad_norm": 0.8522462150280787, + "learning_rate": 1.2003054793005557e-05, + "loss": 0.38, + "step": 26201 + }, + { + "epoch": 0.45275781034006085, + "grad_norm": 0.7355324941234922, + "learning_rate": 1.2002506482477816e-05, + "loss": 0.3276, + "step": 26202 + }, + { + "epoch": 0.4527750898534697, + "grad_norm": 1.2799813879987145, + "learning_rate": 1.2001958165678072e-05, + "loss": 0.4414, + "step": 26203 + }, + { + "epoch": 0.4527923693668786, + "grad_norm": 0.6823278237309189, + "learning_rate": 1.200140984260804e-05, + "loss": 0.6027, + "step": 26204 + }, + { + "epoch": 0.4528096488802875, + "grad_norm": 0.9070126508711395, + "learning_rate": 1.2000861513269442e-05, + "loss": 0.2714, + "step": 26205 + }, + { + "epoch": 0.4528269283936964, + "grad_norm": 0.8238389765034161, + "learning_rate": 1.2000313177663993e-05, + "loss": 0.5268, + "step": 26206 + }, + { + "epoch": 0.45284420790710533, + "grad_norm": 0.9976383254982407, + "learning_rate": 1.1999764835793413e-05, + "loss": 0.4639, + "step": 26207 + }, + { + "epoch": 0.45286148742051424, + "grad_norm": 0.920669531163261, + "learning_rate": 1.1999216487659415e-05, + "loss": 0.3703, + "step": 26208 + }, + { + "epoch": 0.45287876693392315, + "grad_norm": 1.2175759558289294, + "learning_rate": 1.199866813326372e-05, + "loss": 0.3913, + "step": 26209 + }, + { + "epoch": 0.45289604644733206, + "grad_norm": 0.7294556373245398, + "learning_rate": 1.1998119772608045e-05, + "loss": 0.3741, + "step": 26210 + }, + { + "epoch": 0.45291332596074096, + "grad_norm": 1.1114366321476556, + "learning_rate": 1.1997571405694108e-05, + "loss": 0.3997, + "step": 26211 + }, + { + "epoch": 0.45293060547414987, + "grad_norm": 0.9053246295352148, + "learning_rate": 1.1997023032523624e-05, + "loss": 0.5599, + "step": 26212 + }, + { + "epoch": 0.4529478849875587, + "grad_norm": 0.8386076054690843, + "learning_rate": 1.1996474653098313e-05, + "loss": 0.472, + "step": 26213 + }, + { + "epoch": 0.45296516450096763, + "grad_norm": 0.9196189765585243, + "learning_rate": 1.1995926267419891e-05, + "loss": 0.4287, + "step": 26214 + }, + { + "epoch": 0.45298244401437654, + "grad_norm": 1.263848894508544, + "learning_rate": 1.1995377875490077e-05, + "loss": 0.3756, + "step": 26215 + }, + { + "epoch": 0.45299972352778545, + "grad_norm": 1.009306820857991, + "learning_rate": 1.1994829477310586e-05, + "loss": 0.3318, + "step": 26216 + }, + { + "epoch": 0.45301700304119435, + "grad_norm": 1.1795735949834865, + "learning_rate": 1.1994281072883139e-05, + "loss": 0.6052, + "step": 26217 + }, + { + "epoch": 0.45303428255460326, + "grad_norm": 1.1663456576623925, + "learning_rate": 1.1993732662209451e-05, + "loss": 0.4927, + "step": 26218 + }, + { + "epoch": 0.45305156206801217, + "grad_norm": 1.3728446771231886, + "learning_rate": 1.1993184245291243e-05, + "loss": 0.5123, + "step": 26219 + }, + { + "epoch": 0.4530688415814211, + "grad_norm": 1.0203037980882752, + "learning_rate": 1.199263582213023e-05, + "loss": 0.3355, + "step": 26220 + }, + { + "epoch": 0.45308612109483, + "grad_norm": 0.5953607907575064, + "learning_rate": 1.199208739272813e-05, + "loss": 0.3062, + "step": 26221 + }, + { + "epoch": 0.4531034006082389, + "grad_norm": 1.1378426512855333, + "learning_rate": 1.199153895708666e-05, + "loss": 0.505, + "step": 26222 + }, + { + "epoch": 0.4531206801216478, + "grad_norm": 1.1775440162707016, + "learning_rate": 1.199099051520754e-05, + "loss": 0.4144, + "step": 26223 + }, + { + "epoch": 0.45313795963505665, + "grad_norm": 1.79002063676963, + "learning_rate": 1.1990442067092485e-05, + "loss": 0.4164, + "step": 26224 + }, + { + "epoch": 0.45315523914846556, + "grad_norm": 0.8722432537769969, + "learning_rate": 1.1989893612743213e-05, + "loss": 0.4449, + "step": 26225 + }, + { + "epoch": 0.45317251866187447, + "grad_norm": 1.0578224345645253, + "learning_rate": 1.1989345152161445e-05, + "loss": 0.4503, + "step": 26226 + }, + { + "epoch": 0.4531897981752834, + "grad_norm": 1.0285059211793257, + "learning_rate": 1.1988796685348896e-05, + "loss": 0.5247, + "step": 26227 + }, + { + "epoch": 0.4532070776886923, + "grad_norm": 1.0643213648423335, + "learning_rate": 1.1988248212307283e-05, + "loss": 0.6371, + "step": 26228 + }, + { + "epoch": 0.4532243572021012, + "grad_norm": 0.5652009435155355, + "learning_rate": 1.1987699733038328e-05, + "loss": 0.3055, + "step": 26229 + }, + { + "epoch": 0.4532416367155101, + "grad_norm": 0.9834615269720529, + "learning_rate": 1.1987151247543746e-05, + "loss": 0.5433, + "step": 26230 + }, + { + "epoch": 0.453258916228919, + "grad_norm": 0.6333093363620443, + "learning_rate": 1.1986602755825255e-05, + "loss": 0.4514, + "step": 26231 + }, + { + "epoch": 0.4532761957423279, + "grad_norm": 1.008544091438976, + "learning_rate": 1.1986054257884574e-05, + "loss": 0.5011, + "step": 26232 + }, + { + "epoch": 0.4532934752557368, + "grad_norm": 1.3220390589667717, + "learning_rate": 1.198550575372342e-05, + "loss": 0.5852, + "step": 26233 + }, + { + "epoch": 0.4533107547691457, + "grad_norm": 1.0174155304575936, + "learning_rate": 1.198495724334351e-05, + "loss": 0.4322, + "step": 26234 + }, + { + "epoch": 0.4533280342825546, + "grad_norm": 1.1008533470581852, + "learning_rate": 1.1984408726746564e-05, + "loss": 0.482, + "step": 26235 + }, + { + "epoch": 0.4533453137959635, + "grad_norm": 0.9716748326042821, + "learning_rate": 1.1983860203934296e-05, + "loss": 0.4375, + "step": 26236 + }, + { + "epoch": 0.4533625933093724, + "grad_norm": 1.4003297884194184, + "learning_rate": 1.1983311674908432e-05, + "loss": 0.4228, + "step": 26237 + }, + { + "epoch": 0.4533798728227813, + "grad_norm": 0.8979107218300543, + "learning_rate": 1.1982763139670682e-05, + "loss": 0.4271, + "step": 26238 + }, + { + "epoch": 0.4533971523361902, + "grad_norm": 0.8587881290559909, + "learning_rate": 1.1982214598222765e-05, + "loss": 0.4139, + "step": 26239 + }, + { + "epoch": 0.4534144318495991, + "grad_norm": 0.8775565668421281, + "learning_rate": 1.1981666050566404e-05, + "loss": 0.5063, + "step": 26240 + }, + { + "epoch": 0.453431711363008, + "grad_norm": 1.0375923423717421, + "learning_rate": 1.1981117496703316e-05, + "loss": 0.4625, + "step": 26241 + }, + { + "epoch": 0.45344899087641694, + "grad_norm": 0.9851895898094777, + "learning_rate": 1.1980568936635217e-05, + "loss": 0.5347, + "step": 26242 + }, + { + "epoch": 0.45346627038982584, + "grad_norm": 1.9579700717602198, + "learning_rate": 1.1980020370363825e-05, + "loss": 0.4712, + "step": 26243 + }, + { + "epoch": 0.45348354990323475, + "grad_norm": 0.7288453127997927, + "learning_rate": 1.1979471797890859e-05, + "loss": 0.2553, + "step": 26244 + }, + { + "epoch": 0.4535008294166436, + "grad_norm": 0.6485730053863786, + "learning_rate": 1.1978923219218035e-05, + "loss": 0.3308, + "step": 26245 + }, + { + "epoch": 0.4535181089300525, + "grad_norm": 0.7018770908603528, + "learning_rate": 1.1978374634347075e-05, + "loss": 0.3535, + "step": 26246 + }, + { + "epoch": 0.4535353884434614, + "grad_norm": 0.8617169536593481, + "learning_rate": 1.1977826043279694e-05, + "loss": 0.2913, + "step": 26247 + }, + { + "epoch": 0.4535526679568703, + "grad_norm": 1.2835275068441876, + "learning_rate": 1.197727744601761e-05, + "loss": 0.35, + "step": 26248 + }, + { + "epoch": 0.45356994747027923, + "grad_norm": 0.7298393347795502, + "learning_rate": 1.1976728842562546e-05, + "loss": 0.3477, + "step": 26249 + }, + { + "epoch": 0.45358722698368814, + "grad_norm": 1.0599005188661565, + "learning_rate": 1.1976180232916216e-05, + "loss": 0.4775, + "step": 26250 + }, + { + "epoch": 0.45360450649709705, + "grad_norm": 1.1542593295130603, + "learning_rate": 1.1975631617080339e-05, + "loss": 0.3979, + "step": 26251 + }, + { + "epoch": 0.45362178601050596, + "grad_norm": 0.8919683110190807, + "learning_rate": 1.1975082995056634e-05, + "loss": 0.4784, + "step": 26252 + }, + { + "epoch": 0.45363906552391486, + "grad_norm": 1.0271844993417694, + "learning_rate": 1.1974534366846817e-05, + "loss": 0.3955, + "step": 26253 + }, + { + "epoch": 0.45365634503732377, + "grad_norm": 1.1961137969907853, + "learning_rate": 1.1973985732452612e-05, + "loss": 0.6144, + "step": 26254 + }, + { + "epoch": 0.4536736245507326, + "grad_norm": 0.8288156660181287, + "learning_rate": 1.1973437091875733e-05, + "loss": 0.4743, + "step": 26255 + }, + { + "epoch": 0.45369090406414153, + "grad_norm": 1.0657959535996513, + "learning_rate": 1.1972888445117895e-05, + "loss": 0.6411, + "step": 26256 + }, + { + "epoch": 0.45370818357755044, + "grad_norm": 0.5571223027757107, + "learning_rate": 1.1972339792180825e-05, + "loss": 0.719, + "step": 26257 + }, + { + "epoch": 0.45372546309095935, + "grad_norm": 1.1093518303535372, + "learning_rate": 1.1971791133066235e-05, + "loss": 0.413, + "step": 26258 + }, + { + "epoch": 0.45374274260436825, + "grad_norm": 1.3463957094705497, + "learning_rate": 1.1971242467775844e-05, + "loss": 0.5151, + "step": 26259 + }, + { + "epoch": 0.45376002211777716, + "grad_norm": 1.0869128535595862, + "learning_rate": 1.1970693796311372e-05, + "loss": 0.4766, + "step": 26260 + }, + { + "epoch": 0.45377730163118607, + "grad_norm": 0.9045344139467492, + "learning_rate": 1.1970145118674541e-05, + "loss": 0.3933, + "step": 26261 + }, + { + "epoch": 0.453794581144595, + "grad_norm": 0.7007091168957117, + "learning_rate": 1.1969596434867063e-05, + "loss": 0.3203, + "step": 26262 + }, + { + "epoch": 0.4538118606580039, + "grad_norm": 0.7587366953626888, + "learning_rate": 1.196904774489066e-05, + "loss": 0.3386, + "step": 26263 + }, + { + "epoch": 0.4538291401714128, + "grad_norm": 1.5271898021387262, + "learning_rate": 1.1968499048747049e-05, + "loss": 0.4726, + "step": 26264 + }, + { + "epoch": 0.4538464196848217, + "grad_norm": 1.0198844766539148, + "learning_rate": 1.1967950346437949e-05, + "loss": 0.5499, + "step": 26265 + }, + { + "epoch": 0.45386369919823055, + "grad_norm": 0.9886291058539127, + "learning_rate": 1.196740163796508e-05, + "loss": 0.3885, + "step": 26266 + }, + { + "epoch": 0.45388097871163946, + "grad_norm": 0.6105190445233253, + "learning_rate": 1.196685292333016e-05, + "loss": 0.7057, + "step": 26267 + }, + { + "epoch": 0.45389825822504837, + "grad_norm": 0.40416067957478174, + "learning_rate": 1.1966304202534904e-05, + "loss": 0.5236, + "step": 26268 + }, + { + "epoch": 0.4539155377384573, + "grad_norm": 0.9630545648749367, + "learning_rate": 1.1965755475581038e-05, + "loss": 0.4439, + "step": 26269 + }, + { + "epoch": 0.4539328172518662, + "grad_norm": 1.0678243011931428, + "learning_rate": 1.1965206742470274e-05, + "loss": 0.6838, + "step": 26270 + }, + { + "epoch": 0.4539500967652751, + "grad_norm": 0.958188360715498, + "learning_rate": 1.1964658003204332e-05, + "loss": 0.3838, + "step": 26271 + }, + { + "epoch": 0.453967376278684, + "grad_norm": 1.2891374847465957, + "learning_rate": 1.1964109257784936e-05, + "loss": 0.4147, + "step": 26272 + }, + { + "epoch": 0.4539846557920929, + "grad_norm": 1.3443571462906831, + "learning_rate": 1.1963560506213799e-05, + "loss": 0.4287, + "step": 26273 + }, + { + "epoch": 0.4540019353055018, + "grad_norm": 1.2289302866632683, + "learning_rate": 1.1963011748492638e-05, + "loss": 0.2212, + "step": 26274 + }, + { + "epoch": 0.4540192148189107, + "grad_norm": 0.9503748613336624, + "learning_rate": 1.196246298462318e-05, + "loss": 0.4215, + "step": 26275 + }, + { + "epoch": 0.45403649433231963, + "grad_norm": 1.4344632119943574, + "learning_rate": 1.1961914214607134e-05, + "loss": 0.3207, + "step": 26276 + }, + { + "epoch": 0.4540537738457285, + "grad_norm": 0.7236307093077894, + "learning_rate": 1.1961365438446227e-05, + "loss": 0.3978, + "step": 26277 + }, + { + "epoch": 0.4540710533591374, + "grad_norm": 1.1455610517759278, + "learning_rate": 1.1960816656142173e-05, + "loss": 0.6021, + "step": 26278 + }, + { + "epoch": 0.4540883328725463, + "grad_norm": 1.6491752962664428, + "learning_rate": 1.1960267867696691e-05, + "loss": 0.48, + "step": 26279 + }, + { + "epoch": 0.4541056123859552, + "grad_norm": 0.9462261002475805, + "learning_rate": 1.1959719073111502e-05, + "loss": 0.3952, + "step": 26280 + }, + { + "epoch": 0.4541228918993641, + "grad_norm": 1.0087319130459236, + "learning_rate": 1.1959170272388325e-05, + "loss": 0.318, + "step": 26281 + }, + { + "epoch": 0.454140171412773, + "grad_norm": 1.0825198065240298, + "learning_rate": 1.1958621465528878e-05, + "loss": 0.4892, + "step": 26282 + }, + { + "epoch": 0.4541574509261819, + "grad_norm": 1.2328826282902945, + "learning_rate": 1.195807265253488e-05, + "loss": 0.5522, + "step": 26283 + }, + { + "epoch": 0.45417473043959083, + "grad_norm": 1.0514510294626451, + "learning_rate": 1.195752383340805e-05, + "loss": 0.3476, + "step": 26284 + }, + { + "epoch": 0.45419200995299974, + "grad_norm": 0.8037619604152704, + "learning_rate": 1.1956975008150103e-05, + "loss": 0.4057, + "step": 26285 + }, + { + "epoch": 0.45420928946640865, + "grad_norm": 0.6832312856465215, + "learning_rate": 1.1956426176762763e-05, + "loss": 0.3148, + "step": 26286 + }, + { + "epoch": 0.4542265689798175, + "grad_norm": 1.224624279453023, + "learning_rate": 1.195587733924775e-05, + "loss": 0.3993, + "step": 26287 + }, + { + "epoch": 0.4542438484932264, + "grad_norm": 1.1045250011502086, + "learning_rate": 1.1955328495606776e-05, + "loss": 0.4747, + "step": 26288 + }, + { + "epoch": 0.4542611280066353, + "grad_norm": 1.4067398159780187, + "learning_rate": 1.1954779645841567e-05, + "loss": 0.6462, + "step": 26289 + }, + { + "epoch": 0.4542784075200442, + "grad_norm": 1.0590695912859345, + "learning_rate": 1.1954230789953842e-05, + "loss": 0.4978, + "step": 26290 + }, + { + "epoch": 0.45429568703345313, + "grad_norm": 0.7838131048843372, + "learning_rate": 1.1953681927945314e-05, + "loss": 0.442, + "step": 26291 + }, + { + "epoch": 0.45431296654686204, + "grad_norm": 1.0583068363635941, + "learning_rate": 1.1953133059817708e-05, + "loss": 0.4224, + "step": 26292 + }, + { + "epoch": 0.45433024606027095, + "grad_norm": 0.8682036510384136, + "learning_rate": 1.195258418557274e-05, + "loss": 0.3372, + "step": 26293 + }, + { + "epoch": 0.45434752557367986, + "grad_norm": 1.293279655392666, + "learning_rate": 1.1952035305212132e-05, + "loss": 0.3325, + "step": 26294 + }, + { + "epoch": 0.45436480508708876, + "grad_norm": 1.0542936197383013, + "learning_rate": 1.19514864187376e-05, + "loss": 0.4499, + "step": 26295 + }, + { + "epoch": 0.45438208460049767, + "grad_norm": 1.008494450205537, + "learning_rate": 1.1950937526150858e-05, + "loss": 0.4309, + "step": 26296 + }, + { + "epoch": 0.4543993641139066, + "grad_norm": 1.0116790260163333, + "learning_rate": 1.1950388627453639e-05, + "loss": 0.4939, + "step": 26297 + }, + { + "epoch": 0.45441664362731543, + "grad_norm": 0.6000799097042803, + "learning_rate": 1.1949839722647653e-05, + "loss": 0.3918, + "step": 26298 + }, + { + "epoch": 0.45443392314072434, + "grad_norm": 0.8597477228032947, + "learning_rate": 1.194929081173462e-05, + "loss": 0.3405, + "step": 26299 + }, + { + "epoch": 0.45445120265413325, + "grad_norm": 1.113541138147264, + "learning_rate": 1.194874189471626e-05, + "loss": 0.3892, + "step": 26300 + }, + { + "epoch": 0.45446848216754215, + "grad_norm": 1.2080277871097111, + "learning_rate": 1.1948192971594292e-05, + "loss": 0.495, + "step": 26301 + }, + { + "epoch": 0.45448576168095106, + "grad_norm": 0.7990062464917093, + "learning_rate": 1.1947644042370434e-05, + "loss": 0.3327, + "step": 26302 + }, + { + "epoch": 0.45450304119435997, + "grad_norm": 1.493767607556073, + "learning_rate": 1.1947095107046409e-05, + "loss": 0.3814, + "step": 26303 + }, + { + "epoch": 0.4545203207077689, + "grad_norm": 1.0735006420442492, + "learning_rate": 1.1946546165623938e-05, + "loss": 0.4478, + "step": 26304 + }, + { + "epoch": 0.4545376002211778, + "grad_norm": 1.4917455383177318, + "learning_rate": 1.194599721810473e-05, + "loss": 0.4559, + "step": 26305 + }, + { + "epoch": 0.4545548797345867, + "grad_norm": 1.1304462564420137, + "learning_rate": 1.1945448264490516e-05, + "loss": 0.461, + "step": 26306 + }, + { + "epoch": 0.4545721592479956, + "grad_norm": 1.3097298713044365, + "learning_rate": 1.1944899304783007e-05, + "loss": 0.5164, + "step": 26307 + }, + { + "epoch": 0.45458943876140445, + "grad_norm": 1.0552845181255284, + "learning_rate": 1.1944350338983924e-05, + "loss": 0.5918, + "step": 26308 + }, + { + "epoch": 0.45460671827481336, + "grad_norm": 0.561633108240609, + "learning_rate": 1.194380136709499e-05, + "loss": 0.6827, + "step": 26309 + }, + { + "epoch": 0.45462399778822227, + "grad_norm": 1.1498222555155733, + "learning_rate": 1.1943252389117924e-05, + "loss": 0.4633, + "step": 26310 + }, + { + "epoch": 0.4546412773016312, + "grad_norm": 0.9388342352287835, + "learning_rate": 1.1942703405054442e-05, + "loss": 0.6602, + "step": 26311 + }, + { + "epoch": 0.4546585568150401, + "grad_norm": 1.3315056976975534, + "learning_rate": 1.1942154414906263e-05, + "loss": 0.6001, + "step": 26312 + }, + { + "epoch": 0.454675836328449, + "grad_norm": 0.9183471686749806, + "learning_rate": 1.1941605418675112e-05, + "loss": 0.3816, + "step": 26313 + }, + { + "epoch": 0.4546931158418579, + "grad_norm": 1.1758902186846332, + "learning_rate": 1.1941056416362706e-05, + "loss": 0.6073, + "step": 26314 + }, + { + "epoch": 0.4547103953552668, + "grad_norm": 0.8716528545335395, + "learning_rate": 1.1940507407970762e-05, + "loss": 0.5121, + "step": 26315 + }, + { + "epoch": 0.4547276748686757, + "grad_norm": 1.9538558895661318, + "learning_rate": 1.1939958393501002e-05, + "loss": 0.4568, + "step": 26316 + }, + { + "epoch": 0.4547449543820846, + "grad_norm": 0.7276335137407377, + "learning_rate": 1.1939409372955145e-05, + "loss": 0.5223, + "step": 26317 + }, + { + "epoch": 0.45476223389549353, + "grad_norm": 1.14823622724867, + "learning_rate": 1.193886034633491e-05, + "loss": 0.5428, + "step": 26318 + }, + { + "epoch": 0.4547795134089024, + "grad_norm": 0.8088822010607095, + "learning_rate": 1.1938311313642017e-05, + "loss": 0.5138, + "step": 26319 + }, + { + "epoch": 0.4547967929223113, + "grad_norm": 1.127474349365623, + "learning_rate": 1.1937762274878186e-05, + "loss": 0.4674, + "step": 26320 + }, + { + "epoch": 0.4548140724357202, + "grad_norm": 0.7565994406219951, + "learning_rate": 1.1937213230045135e-05, + "loss": 0.5569, + "step": 26321 + }, + { + "epoch": 0.4548313519491291, + "grad_norm": 1.6626139503542303, + "learning_rate": 1.1936664179144585e-05, + "loss": 0.443, + "step": 26322 + }, + { + "epoch": 0.454848631462538, + "grad_norm": 0.8577330715186658, + "learning_rate": 1.1936115122178258e-05, + "loss": 0.4675, + "step": 26323 + }, + { + "epoch": 0.4548659109759469, + "grad_norm": 0.9112187314817537, + "learning_rate": 1.1935566059147872e-05, + "loss": 0.4745, + "step": 26324 + }, + { + "epoch": 0.4548831904893558, + "grad_norm": 0.7611412026589982, + "learning_rate": 1.1935016990055142e-05, + "loss": 0.4859, + "step": 26325 + }, + { + "epoch": 0.45490047000276473, + "grad_norm": 1.4048339987039213, + "learning_rate": 1.1934467914901795e-05, + "loss": 0.3377, + "step": 26326 + }, + { + "epoch": 0.45491774951617364, + "grad_norm": 0.8903365137377228, + "learning_rate": 1.1933918833689546e-05, + "loss": 0.4272, + "step": 26327 + }, + { + "epoch": 0.45493502902958255, + "grad_norm": 0.6728557752333774, + "learning_rate": 1.1933369746420116e-05, + "loss": 0.3436, + "step": 26328 + }, + { + "epoch": 0.45495230854299146, + "grad_norm": 0.6960360565350632, + "learning_rate": 1.1932820653095225e-05, + "loss": 0.4809, + "step": 26329 + }, + { + "epoch": 0.4549695880564003, + "grad_norm": 0.6881320748785945, + "learning_rate": 1.1932271553716595e-05, + "loss": 0.3712, + "step": 26330 + }, + { + "epoch": 0.4549868675698092, + "grad_norm": 1.0982479543872437, + "learning_rate": 1.1931722448285938e-05, + "loss": 0.5745, + "step": 26331 + }, + { + "epoch": 0.4550041470832181, + "grad_norm": 1.1631433891594498, + "learning_rate": 1.1931173336804984e-05, + "loss": 0.5241, + "step": 26332 + }, + { + "epoch": 0.45502142659662703, + "grad_norm": 0.9358476180068864, + "learning_rate": 1.1930624219275449e-05, + "loss": 0.4208, + "step": 26333 + }, + { + "epoch": 0.45503870611003594, + "grad_norm": 0.939942862656354, + "learning_rate": 1.1930075095699049e-05, + "loss": 0.4783, + "step": 26334 + }, + { + "epoch": 0.45505598562344485, + "grad_norm": 0.6139902570921515, + "learning_rate": 1.1929525966077514e-05, + "loss": 0.2922, + "step": 26335 + }, + { + "epoch": 0.45507326513685376, + "grad_norm": 0.8246728219150969, + "learning_rate": 1.1928976830412548e-05, + "loss": 0.4387, + "step": 26336 + }, + { + "epoch": 0.45509054465026266, + "grad_norm": 0.9112278257669785, + "learning_rate": 1.1928427688705886e-05, + "loss": 0.4772, + "step": 26337 + }, + { + "epoch": 0.45510782416367157, + "grad_norm": 1.7331207657542906, + "learning_rate": 1.192787854095924e-05, + "loss": 0.4608, + "step": 26338 + }, + { + "epoch": 0.4551251036770805, + "grad_norm": 1.2697927056460618, + "learning_rate": 1.192732938717433e-05, + "loss": 0.525, + "step": 26339 + }, + { + "epoch": 0.45514238319048933, + "grad_norm": 0.9041748246146469, + "learning_rate": 1.1926780227352882e-05, + "loss": 0.4789, + "step": 26340 + }, + { + "epoch": 0.45515966270389824, + "grad_norm": 1.8553595780437389, + "learning_rate": 1.1926231061496609e-05, + "loss": 0.3599, + "step": 26341 + }, + { + "epoch": 0.45517694221730715, + "grad_norm": 0.9624319701685814, + "learning_rate": 1.1925681889607232e-05, + "loss": 0.3715, + "step": 26342 + }, + { + "epoch": 0.45519422173071605, + "grad_norm": 0.7258495639667614, + "learning_rate": 1.1925132711686475e-05, + "loss": 0.327, + "step": 26343 + }, + { + "epoch": 0.45521150124412496, + "grad_norm": 0.9239521073627175, + "learning_rate": 1.1924583527736057e-05, + "loss": 0.6203, + "step": 26344 + }, + { + "epoch": 0.45522878075753387, + "grad_norm": 0.47006800110089847, + "learning_rate": 1.1924034337757699e-05, + "loss": 0.5714, + "step": 26345 + }, + { + "epoch": 0.4552460602709428, + "grad_norm": 0.8657083916884124, + "learning_rate": 1.1923485141753118e-05, + "loss": 0.3338, + "step": 26346 + }, + { + "epoch": 0.4552633397843517, + "grad_norm": 1.342101822540664, + "learning_rate": 1.1922935939724033e-05, + "loss": 0.4388, + "step": 26347 + }, + { + "epoch": 0.4552806192977606, + "grad_norm": 1.1394376631885093, + "learning_rate": 1.1922386731672167e-05, + "loss": 0.3691, + "step": 26348 + }, + { + "epoch": 0.4552978988111695, + "grad_norm": 0.9353075459465163, + "learning_rate": 1.1921837517599241e-05, + "loss": 0.4183, + "step": 26349 + }, + { + "epoch": 0.4553151783245784, + "grad_norm": 0.8964408760862969, + "learning_rate": 1.1921288297506975e-05, + "loss": 0.5178, + "step": 26350 + }, + { + "epoch": 0.45533245783798726, + "grad_norm": 1.3291318256562024, + "learning_rate": 1.1920739071397086e-05, + "loss": 0.502, + "step": 26351 + }, + { + "epoch": 0.45534973735139617, + "grad_norm": 2.7579293368813147, + "learning_rate": 1.1920189839271294e-05, + "loss": 0.4701, + "step": 26352 + }, + { + "epoch": 0.4553670168648051, + "grad_norm": 0.9560882899271733, + "learning_rate": 1.1919640601131325e-05, + "loss": 0.3776, + "step": 26353 + }, + { + "epoch": 0.455384296378214, + "grad_norm": 0.819903367979505, + "learning_rate": 1.1919091356978894e-05, + "loss": 0.2884, + "step": 26354 + }, + { + "epoch": 0.4554015758916229, + "grad_norm": 0.853687909024502, + "learning_rate": 1.1918542106815726e-05, + "loss": 0.4054, + "step": 26355 + }, + { + "epoch": 0.4554188554050318, + "grad_norm": 0.5558498273916431, + "learning_rate": 1.1917992850643537e-05, + "loss": 0.7371, + "step": 26356 + }, + { + "epoch": 0.4554361349184407, + "grad_norm": 0.8709639771746416, + "learning_rate": 1.1917443588464046e-05, + "loss": 0.5702, + "step": 26357 + }, + { + "epoch": 0.4554534144318496, + "grad_norm": 1.0971848063822618, + "learning_rate": 1.1916894320278978e-05, + "loss": 0.349, + "step": 26358 + }, + { + "epoch": 0.4554706939452585, + "grad_norm": 0.9349923743907179, + "learning_rate": 1.191634504609005e-05, + "loss": 0.5078, + "step": 26359 + }, + { + "epoch": 0.45548797345866743, + "grad_norm": 1.377035161135017, + "learning_rate": 1.1915795765898985e-05, + "loss": 0.4288, + "step": 26360 + }, + { + "epoch": 0.4555052529720763, + "grad_norm": 0.5120785279126503, + "learning_rate": 1.1915246479707502e-05, + "loss": 0.5885, + "step": 26361 + }, + { + "epoch": 0.4555225324854852, + "grad_norm": 0.876152533858483, + "learning_rate": 1.191469718751732e-05, + "loss": 0.4001, + "step": 26362 + }, + { + "epoch": 0.4555398119988941, + "grad_norm": 0.995703208329934, + "learning_rate": 1.1914147889330163e-05, + "loss": 0.4766, + "step": 26363 + }, + { + "epoch": 0.455557091512303, + "grad_norm": 0.9373326801109357, + "learning_rate": 1.191359858514775e-05, + "loss": 0.5309, + "step": 26364 + }, + { + "epoch": 0.4555743710257119, + "grad_norm": 1.0750910002354448, + "learning_rate": 1.1913049274971799e-05, + "loss": 0.3932, + "step": 26365 + }, + { + "epoch": 0.4555916505391208, + "grad_norm": 1.0366037672613149, + "learning_rate": 1.1912499958804034e-05, + "loss": 0.5173, + "step": 26366 + }, + { + "epoch": 0.4556089300525297, + "grad_norm": 0.9648458847388117, + "learning_rate": 1.1911950636646172e-05, + "loss": 0.5232, + "step": 26367 + }, + { + "epoch": 0.45562620956593863, + "grad_norm": 0.9394468428516676, + "learning_rate": 1.1911401308499935e-05, + "loss": 0.394, + "step": 26368 + }, + { + "epoch": 0.45564348907934754, + "grad_norm": 0.7804447501941267, + "learning_rate": 1.1910851974367047e-05, + "loss": 0.5824, + "step": 26369 + }, + { + "epoch": 0.45566076859275645, + "grad_norm": 0.8573246224953038, + "learning_rate": 1.191030263424922e-05, + "loss": 0.4353, + "step": 26370 + }, + { + "epoch": 0.45567804810616536, + "grad_norm": 0.3722375176990033, + "learning_rate": 1.1909753288148183e-05, + "loss": 0.415, + "step": 26371 + }, + { + "epoch": 0.4556953276195742, + "grad_norm": 1.0711105688802387, + "learning_rate": 1.1909203936065654e-05, + "loss": 0.4737, + "step": 26372 + }, + { + "epoch": 0.4557126071329831, + "grad_norm": 1.1737671530604026, + "learning_rate": 1.1908654578003353e-05, + "loss": 0.5743, + "step": 26373 + }, + { + "epoch": 0.455729886646392, + "grad_norm": 1.0743863358162504, + "learning_rate": 1.1908105213963e-05, + "loss": 0.3939, + "step": 26374 + }, + { + "epoch": 0.45574716615980093, + "grad_norm": 0.9540294961264989, + "learning_rate": 1.1907555843946316e-05, + "loss": 0.397, + "step": 26375 + }, + { + "epoch": 0.45576444567320984, + "grad_norm": 1.208902468299047, + "learning_rate": 1.1907006467955025e-05, + "loss": 0.5842, + "step": 26376 + }, + { + "epoch": 0.45578172518661875, + "grad_norm": 0.4117518605104887, + "learning_rate": 1.1906457085990843e-05, + "loss": 0.4908, + "step": 26377 + }, + { + "epoch": 0.45579900470002765, + "grad_norm": 1.0708484024953395, + "learning_rate": 1.1905907698055493e-05, + "loss": 0.3912, + "step": 26378 + }, + { + "epoch": 0.45581628421343656, + "grad_norm": 1.1155655595779703, + "learning_rate": 1.1905358304150691e-05, + "loss": 0.5294, + "step": 26379 + }, + { + "epoch": 0.45583356372684547, + "grad_norm": 1.105390853441421, + "learning_rate": 1.1904808904278167e-05, + "loss": 0.4219, + "step": 26380 + }, + { + "epoch": 0.4558508432402544, + "grad_norm": 0.6106444150764021, + "learning_rate": 1.1904259498439636e-05, + "loss": 0.4172, + "step": 26381 + }, + { + "epoch": 0.45586812275366323, + "grad_norm": 1.1567102550440662, + "learning_rate": 1.1903710086636817e-05, + "loss": 0.373, + "step": 26382 + }, + { + "epoch": 0.45588540226707214, + "grad_norm": 0.7208834686935642, + "learning_rate": 1.1903160668871436e-05, + "loss": 0.678, + "step": 26383 + }, + { + "epoch": 0.45590268178048104, + "grad_norm": 1.160569711465698, + "learning_rate": 1.190261124514521e-05, + "loss": 0.4193, + "step": 26384 + }, + { + "epoch": 0.45591996129388995, + "grad_norm": 1.5701864578484401, + "learning_rate": 1.1902061815459863e-05, + "loss": 0.3757, + "step": 26385 + }, + { + "epoch": 0.45593724080729886, + "grad_norm": 0.7385403000941799, + "learning_rate": 1.1901512379817113e-05, + "loss": 0.7022, + "step": 26386 + }, + { + "epoch": 0.45595452032070777, + "grad_norm": 0.8291914749058554, + "learning_rate": 1.1900962938218681e-05, + "loss": 0.4093, + "step": 26387 + }, + { + "epoch": 0.4559717998341167, + "grad_norm": 1.449636294764612, + "learning_rate": 1.1900413490666287e-05, + "loss": 0.5038, + "step": 26388 + }, + { + "epoch": 0.4559890793475256, + "grad_norm": 0.5402847857750832, + "learning_rate": 1.1899864037161657e-05, + "loss": 0.78, + "step": 26389 + }, + { + "epoch": 0.4560063588609345, + "grad_norm": 1.2009375920865353, + "learning_rate": 1.1899314577706507e-05, + "loss": 0.3419, + "step": 26390 + }, + { + "epoch": 0.4560236383743434, + "grad_norm": 1.5297446411682143, + "learning_rate": 1.1898765112302557e-05, + "loss": 0.5009, + "step": 26391 + }, + { + "epoch": 0.4560409178877523, + "grad_norm": 0.9325904260649608, + "learning_rate": 1.1898215640951532e-05, + "loss": 0.3854, + "step": 26392 + }, + { + "epoch": 0.45605819740116116, + "grad_norm": 1.4968492689524653, + "learning_rate": 1.1897666163655153e-05, + "loss": 0.5314, + "step": 26393 + }, + { + "epoch": 0.45607547691457007, + "grad_norm": 0.8217285354776719, + "learning_rate": 1.1897116680415138e-05, + "loss": 0.3791, + "step": 26394 + }, + { + "epoch": 0.456092756427979, + "grad_norm": 1.252090359992464, + "learning_rate": 1.1896567191233207e-05, + "loss": 0.5756, + "step": 26395 + }, + { + "epoch": 0.4561100359413879, + "grad_norm": 1.1269526570241195, + "learning_rate": 1.1896017696111089e-05, + "loss": 0.4031, + "step": 26396 + }, + { + "epoch": 0.4561273154547968, + "grad_norm": 0.6867667192585376, + "learning_rate": 1.1895468195050494e-05, + "loss": 0.2798, + "step": 26397 + }, + { + "epoch": 0.4561445949682057, + "grad_norm": 0.9085020428612973, + "learning_rate": 1.1894918688053152e-05, + "loss": 0.4663, + "step": 26398 + }, + { + "epoch": 0.4561618744816146, + "grad_norm": 0.815685010829846, + "learning_rate": 1.189436917512078e-05, + "loss": 0.4379, + "step": 26399 + }, + { + "epoch": 0.4561791539950235, + "grad_norm": 1.0469349961627101, + "learning_rate": 1.18938196562551e-05, + "loss": 0.3686, + "step": 26400 + }, + { + "epoch": 0.4561964335084324, + "grad_norm": 0.9044745446858538, + "learning_rate": 1.1893270131457831e-05, + "loss": 0.5034, + "step": 26401 + }, + { + "epoch": 0.4562137130218413, + "grad_norm": 0.7316684812168678, + "learning_rate": 1.1892720600730696e-05, + "loss": 0.3774, + "step": 26402 + }, + { + "epoch": 0.45623099253525023, + "grad_norm": 1.240022218516864, + "learning_rate": 1.1892171064075417e-05, + "loss": 0.5562, + "step": 26403 + }, + { + "epoch": 0.4562482720486591, + "grad_norm": 1.612458499140188, + "learning_rate": 1.1891621521493717e-05, + "loss": 0.5659, + "step": 26404 + }, + { + "epoch": 0.456265551562068, + "grad_norm": 1.0141303937435633, + "learning_rate": 1.1891071972987312e-05, + "loss": 0.4798, + "step": 26405 + }, + { + "epoch": 0.4562828310754769, + "grad_norm": 0.5998936669927322, + "learning_rate": 1.1890522418557927e-05, + "loss": 0.3496, + "step": 26406 + }, + { + "epoch": 0.4563001105888858, + "grad_norm": 1.1449172652412498, + "learning_rate": 1.1889972858207282e-05, + "loss": 0.4673, + "step": 26407 + }, + { + "epoch": 0.4563173901022947, + "grad_norm": 0.8284411267701043, + "learning_rate": 1.1889423291937096e-05, + "loss": 0.2687, + "step": 26408 + }, + { + "epoch": 0.4563346696157036, + "grad_norm": 1.463189946027467, + "learning_rate": 1.1888873719749096e-05, + "loss": 0.3495, + "step": 26409 + }, + { + "epoch": 0.45635194912911253, + "grad_norm": 1.101837584451612, + "learning_rate": 1.1888324141645e-05, + "loss": 0.4205, + "step": 26410 + }, + { + "epoch": 0.45636922864252144, + "grad_norm": 1.1966342339167861, + "learning_rate": 1.1887774557626526e-05, + "loss": 0.4574, + "step": 26411 + }, + { + "epoch": 0.45638650815593035, + "grad_norm": 0.94025794483191, + "learning_rate": 1.18872249676954e-05, + "loss": 0.4315, + "step": 26412 + }, + { + "epoch": 0.45640378766933926, + "grad_norm": 1.2202735233865087, + "learning_rate": 1.1886675371853343e-05, + "loss": 0.4489, + "step": 26413 + }, + { + "epoch": 0.4564210671827481, + "grad_norm": 0.9951162874093382, + "learning_rate": 1.1886125770102072e-05, + "loss": 0.3396, + "step": 26414 + }, + { + "epoch": 0.456438346696157, + "grad_norm": 0.7486862877956438, + "learning_rate": 1.1885576162443317e-05, + "loss": 0.3231, + "step": 26415 + }, + { + "epoch": 0.4564556262095659, + "grad_norm": 0.731883403977366, + "learning_rate": 1.1885026548878793e-05, + "loss": 0.2838, + "step": 26416 + }, + { + "epoch": 0.45647290572297483, + "grad_norm": 0.9001844553313245, + "learning_rate": 1.188447692941022e-05, + "loss": 0.3128, + "step": 26417 + }, + { + "epoch": 0.45649018523638374, + "grad_norm": 0.40417882462542715, + "learning_rate": 1.1883927304039324e-05, + "loss": 0.6843, + "step": 26418 + }, + { + "epoch": 0.45650746474979265, + "grad_norm": 0.9016068561673416, + "learning_rate": 1.1883377672767822e-05, + "loss": 0.4716, + "step": 26419 + }, + { + "epoch": 0.45652474426320155, + "grad_norm": 1.0148945886522385, + "learning_rate": 1.1882828035597443e-05, + "loss": 0.3697, + "step": 26420 + }, + { + "epoch": 0.45654202377661046, + "grad_norm": 0.952346318821683, + "learning_rate": 1.1882278392529897e-05, + "loss": 0.3418, + "step": 26421 + }, + { + "epoch": 0.45655930329001937, + "grad_norm": 0.7523340447448388, + "learning_rate": 1.1881728743566916e-05, + "loss": 0.4803, + "step": 26422 + }, + { + "epoch": 0.4565765828034283, + "grad_norm": 1.018682861737932, + "learning_rate": 1.1881179088710218e-05, + "loss": 0.5271, + "step": 26423 + }, + { + "epoch": 0.4565938623168372, + "grad_norm": 0.9932486334148947, + "learning_rate": 1.1880629427961523e-05, + "loss": 0.7817, + "step": 26424 + }, + { + "epoch": 0.45661114183024604, + "grad_norm": 1.0081490383520497, + "learning_rate": 1.1880079761322552e-05, + "loss": 0.3973, + "step": 26425 + }, + { + "epoch": 0.45662842134365494, + "grad_norm": 0.8423496010032739, + "learning_rate": 1.187953008879503e-05, + "loss": 0.5131, + "step": 26426 + }, + { + "epoch": 0.45664570085706385, + "grad_norm": 0.799288194813616, + "learning_rate": 1.1878980410380681e-05, + "loss": 0.3083, + "step": 26427 + }, + { + "epoch": 0.45666298037047276, + "grad_norm": 0.9729421890660749, + "learning_rate": 1.1878430726081216e-05, + "loss": 0.2451, + "step": 26428 + }, + { + "epoch": 0.45668025988388167, + "grad_norm": 0.7959497295466994, + "learning_rate": 1.1877881035898366e-05, + "loss": 0.3827, + "step": 26429 + }, + { + "epoch": 0.4566975393972906, + "grad_norm": 0.9164619206086646, + "learning_rate": 1.187733133983385e-05, + "loss": 0.5517, + "step": 26430 + }, + { + "epoch": 0.4567148189106995, + "grad_norm": 1.0859976406723093, + "learning_rate": 1.187678163788939e-05, + "loss": 0.4511, + "step": 26431 + }, + { + "epoch": 0.4567320984241084, + "grad_norm": 0.9321475699086933, + "learning_rate": 1.1876231930066706e-05, + "loss": 0.4085, + "step": 26432 + }, + { + "epoch": 0.4567493779375173, + "grad_norm": 0.9548256538398315, + "learning_rate": 1.187568221636752e-05, + "loss": 0.3288, + "step": 26433 + }, + { + "epoch": 0.4567666574509262, + "grad_norm": 0.8077634266991153, + "learning_rate": 1.1875132496793555e-05, + "loss": 0.3513, + "step": 26434 + }, + { + "epoch": 0.45678393696433506, + "grad_norm": 0.4800147439847256, + "learning_rate": 1.1874582771346534e-05, + "loss": 0.7282, + "step": 26435 + }, + { + "epoch": 0.45680121647774397, + "grad_norm": 0.7317665413231028, + "learning_rate": 1.1874033040028179e-05, + "loss": 0.4668, + "step": 26436 + }, + { + "epoch": 0.4568184959911529, + "grad_norm": 0.9992857010776778, + "learning_rate": 1.1873483302840207e-05, + "loss": 0.4512, + "step": 26437 + }, + { + "epoch": 0.4568357755045618, + "grad_norm": 1.088474604573642, + "learning_rate": 1.1872933559784347e-05, + "loss": 0.4945, + "step": 26438 + }, + { + "epoch": 0.4568530550179707, + "grad_norm": 0.9064877893526399, + "learning_rate": 1.1872383810862311e-05, + "loss": 0.4563, + "step": 26439 + }, + { + "epoch": 0.4568703345313796, + "grad_norm": 1.0251968907123021, + "learning_rate": 1.1871834056075831e-05, + "loss": 0.4674, + "step": 26440 + }, + { + "epoch": 0.4568876140447885, + "grad_norm": 1.1385077147775524, + "learning_rate": 1.1871284295426624e-05, + "loss": 0.573, + "step": 26441 + }, + { + "epoch": 0.4569048935581974, + "grad_norm": 2.3405298506657126, + "learning_rate": 1.187073452891641e-05, + "loss": 0.6146, + "step": 26442 + }, + { + "epoch": 0.4569221730716063, + "grad_norm": 0.6459455113370568, + "learning_rate": 1.1870184756546914e-05, + "loss": 0.6235, + "step": 26443 + }, + { + "epoch": 0.4569394525850152, + "grad_norm": 1.3469722728771978, + "learning_rate": 1.186963497831986e-05, + "loss": 0.294, + "step": 26444 + }, + { + "epoch": 0.45695673209842413, + "grad_norm": 1.2879337721012913, + "learning_rate": 1.1869085194236962e-05, + "loss": 0.5645, + "step": 26445 + }, + { + "epoch": 0.456974011611833, + "grad_norm": 0.8500261272158979, + "learning_rate": 1.186853540429995e-05, + "loss": 0.3081, + "step": 26446 + }, + { + "epoch": 0.4569912911252419, + "grad_norm": 0.7932872117630945, + "learning_rate": 1.1867985608510544e-05, + "loss": 0.7369, + "step": 26447 + }, + { + "epoch": 0.4570085706386508, + "grad_norm": 0.9943486252652217, + "learning_rate": 1.1867435806870465e-05, + "loss": 0.4434, + "step": 26448 + }, + { + "epoch": 0.4570258501520597, + "grad_norm": 1.5548038783421125, + "learning_rate": 1.1866885999381434e-05, + "loss": 0.4838, + "step": 26449 + }, + { + "epoch": 0.4570431296654686, + "grad_norm": 1.5656116034664838, + "learning_rate": 1.1866336186045175e-05, + "loss": 0.4987, + "step": 26450 + }, + { + "epoch": 0.4570604091788775, + "grad_norm": 0.5527189930898151, + "learning_rate": 1.1865786366863408e-05, + "loss": 0.6726, + "step": 26451 + }, + { + "epoch": 0.45707768869228643, + "grad_norm": 0.5267097949363411, + "learning_rate": 1.1865236541837857e-05, + "loss": 0.4763, + "step": 26452 + }, + { + "epoch": 0.45709496820569534, + "grad_norm": 1.5288099683587968, + "learning_rate": 1.1864686710970245e-05, + "loss": 0.3966, + "step": 26453 + }, + { + "epoch": 0.45711224771910425, + "grad_norm": 1.486481225665944, + "learning_rate": 1.1864136874262289e-05, + "loss": 0.4422, + "step": 26454 + }, + { + "epoch": 0.45712952723251316, + "grad_norm": 0.8893932476274906, + "learning_rate": 1.1863587031715718e-05, + "loss": 0.4451, + "step": 26455 + }, + { + "epoch": 0.457146806745922, + "grad_norm": 0.761820731087491, + "learning_rate": 1.186303718333225e-05, + "loss": 0.3393, + "step": 26456 + }, + { + "epoch": 0.4571640862593309, + "grad_norm": 0.819010751325659, + "learning_rate": 1.1862487329113606e-05, + "loss": 0.4348, + "step": 26457 + }, + { + "epoch": 0.4571813657727398, + "grad_norm": 1.2945801389939902, + "learning_rate": 1.1861937469061513e-05, + "loss": 0.4366, + "step": 26458 + }, + { + "epoch": 0.45719864528614873, + "grad_norm": 0.8262636748310924, + "learning_rate": 1.1861387603177687e-05, + "loss": 0.3759, + "step": 26459 + }, + { + "epoch": 0.45721592479955764, + "grad_norm": 0.4371813777101822, + "learning_rate": 1.1860837731463856e-05, + "loss": 0.5448, + "step": 26460 + }, + { + "epoch": 0.45723320431296655, + "grad_norm": 0.7451849543594435, + "learning_rate": 1.1860287853921741e-05, + "loss": 0.4418, + "step": 26461 + }, + { + "epoch": 0.45725048382637545, + "grad_norm": 0.7546249699204713, + "learning_rate": 1.185973797055306e-05, + "loss": 0.5162, + "step": 26462 + }, + { + "epoch": 0.45726776333978436, + "grad_norm": 1.100742171038288, + "learning_rate": 1.1859188081359541e-05, + "loss": 0.5163, + "step": 26463 + }, + { + "epoch": 0.45728504285319327, + "grad_norm": 0.8865159570817079, + "learning_rate": 1.18586381863429e-05, + "loss": 0.428, + "step": 26464 + }, + { + "epoch": 0.4573023223666022, + "grad_norm": 0.8968681327297733, + "learning_rate": 1.1858088285504865e-05, + "loss": 0.3922, + "step": 26465 + }, + { + "epoch": 0.4573196018800111, + "grad_norm": 0.6574570213180836, + "learning_rate": 1.1857538378847158e-05, + "loss": 0.3062, + "step": 26466 + }, + { + "epoch": 0.45733688139341994, + "grad_norm": 1.1956127527299298, + "learning_rate": 1.1856988466371498e-05, + "loss": 0.6315, + "step": 26467 + }, + { + "epoch": 0.45735416090682884, + "grad_norm": 1.1214617113362664, + "learning_rate": 1.185643854807961e-05, + "loss": 0.3806, + "step": 26468 + }, + { + "epoch": 0.45737144042023775, + "grad_norm": 1.3094968153572553, + "learning_rate": 1.1855888623973216e-05, + "loss": 0.3598, + "step": 26469 + }, + { + "epoch": 0.45738871993364666, + "grad_norm": 1.1498804551088793, + "learning_rate": 1.1855338694054037e-05, + "loss": 0.3618, + "step": 26470 + }, + { + "epoch": 0.45740599944705557, + "grad_norm": 0.8774239712682531, + "learning_rate": 1.1854788758323794e-05, + "loss": 0.4069, + "step": 26471 + }, + { + "epoch": 0.4574232789604645, + "grad_norm": 0.8296911857420242, + "learning_rate": 1.1854238816784215e-05, + "loss": 0.6871, + "step": 26472 + }, + { + "epoch": 0.4574405584738734, + "grad_norm": 1.089546637019029, + "learning_rate": 1.1853688869437016e-05, + "loss": 0.4861, + "step": 26473 + }, + { + "epoch": 0.4574578379872823, + "grad_norm": 1.0826288164186777, + "learning_rate": 1.1853138916283922e-05, + "loss": 0.4261, + "step": 26474 + }, + { + "epoch": 0.4574751175006912, + "grad_norm": 0.8539988775424121, + "learning_rate": 1.1852588957326659e-05, + "loss": 0.3798, + "step": 26475 + }, + { + "epoch": 0.4574923970141001, + "grad_norm": 1.2490848129078753, + "learning_rate": 1.1852038992566945e-05, + "loss": 0.3294, + "step": 26476 + }, + { + "epoch": 0.457509676527509, + "grad_norm": 1.1001346479469998, + "learning_rate": 1.1851489022006504e-05, + "loss": 0.3169, + "step": 26477 + }, + { + "epoch": 0.45752695604091786, + "grad_norm": 0.9216047891437388, + "learning_rate": 1.1850939045647062e-05, + "loss": 0.5206, + "step": 26478 + }, + { + "epoch": 0.4575442355543268, + "grad_norm": 1.0313530887241558, + "learning_rate": 1.1850389063490335e-05, + "loss": 0.3728, + "step": 26479 + }, + { + "epoch": 0.4575615150677357, + "grad_norm": 0.9926943732968956, + "learning_rate": 1.184983907553805e-05, + "loss": 0.563, + "step": 26480 + }, + { + "epoch": 0.4575787945811446, + "grad_norm": 1.2215252333817057, + "learning_rate": 1.1849289081791927e-05, + "loss": 0.4178, + "step": 26481 + }, + { + "epoch": 0.4575960740945535, + "grad_norm": 0.8568647149368057, + "learning_rate": 1.1848739082253691e-05, + "loss": 0.3723, + "step": 26482 + }, + { + "epoch": 0.4576133536079624, + "grad_norm": 1.0175927663442574, + "learning_rate": 1.1848189076925063e-05, + "loss": 0.2946, + "step": 26483 + }, + { + "epoch": 0.4576306331213713, + "grad_norm": 0.7766467242371213, + "learning_rate": 1.1847639065807768e-05, + "loss": 0.3685, + "step": 26484 + }, + { + "epoch": 0.4576479126347802, + "grad_norm": 1.1637499156681148, + "learning_rate": 1.1847089048903524e-05, + "loss": 0.5674, + "step": 26485 + }, + { + "epoch": 0.4576651921481891, + "grad_norm": 1.3541081814857026, + "learning_rate": 1.184653902621406e-05, + "loss": 0.5763, + "step": 26486 + }, + { + "epoch": 0.45768247166159803, + "grad_norm": 0.9812306136939124, + "learning_rate": 1.1845988997741093e-05, + "loss": 0.5997, + "step": 26487 + }, + { + "epoch": 0.4576997511750069, + "grad_norm": 1.1355085955651987, + "learning_rate": 1.184543896348635e-05, + "loss": 0.3024, + "step": 26488 + }, + { + "epoch": 0.4577170306884158, + "grad_norm": 0.9499052883113768, + "learning_rate": 1.1844888923451551e-05, + "loss": 0.4011, + "step": 26489 + }, + { + "epoch": 0.4577343102018247, + "grad_norm": 0.7896021015671574, + "learning_rate": 1.1844338877638421e-05, + "loss": 0.3636, + "step": 26490 + }, + { + "epoch": 0.4577515897152336, + "grad_norm": 1.1300580204781008, + "learning_rate": 1.184378882604868e-05, + "loss": 0.3137, + "step": 26491 + }, + { + "epoch": 0.4577688692286425, + "grad_norm": 1.321103061400188, + "learning_rate": 1.1843238768684054e-05, + "loss": 0.406, + "step": 26492 + }, + { + "epoch": 0.4577861487420514, + "grad_norm": 0.8577311813285894, + "learning_rate": 1.1842688705546264e-05, + "loss": 0.4294, + "step": 26493 + }, + { + "epoch": 0.45780342825546033, + "grad_norm": 0.7292686845984311, + "learning_rate": 1.1842138636637028e-05, + "loss": 0.4469, + "step": 26494 + }, + { + "epoch": 0.45782070776886924, + "grad_norm": 0.8190103267004638, + "learning_rate": 1.1841588561958079e-05, + "loss": 0.4771, + "step": 26495 + }, + { + "epoch": 0.45783798728227815, + "grad_norm": 1.269454722391005, + "learning_rate": 1.1841038481511134e-05, + "loss": 0.5243, + "step": 26496 + }, + { + "epoch": 0.45785526679568705, + "grad_norm": 0.8233081652813087, + "learning_rate": 1.1840488395297913e-05, + "loss": 0.4747, + "step": 26497 + }, + { + "epoch": 0.45787254630909596, + "grad_norm": 0.8071201839335216, + "learning_rate": 1.1839938303320146e-05, + "loss": 0.3012, + "step": 26498 + }, + { + "epoch": 0.4578898258225048, + "grad_norm": 0.914704872273586, + "learning_rate": 1.1839388205579554e-05, + "loss": 0.4888, + "step": 26499 + }, + { + "epoch": 0.4579071053359137, + "grad_norm": 1.2897333847635986, + "learning_rate": 1.1838838102077856e-05, + "loss": 0.4971, + "step": 26500 + }, + { + "epoch": 0.45792438484932263, + "grad_norm": 0.8482909377413629, + "learning_rate": 1.1838287992816776e-05, + "loss": 0.4029, + "step": 26501 + }, + { + "epoch": 0.45794166436273154, + "grad_norm": 0.9071927766684859, + "learning_rate": 1.1837737877798039e-05, + "loss": 0.4029, + "step": 26502 + }, + { + "epoch": 0.45795894387614045, + "grad_norm": 0.9215956331374652, + "learning_rate": 1.1837187757023369e-05, + "loss": 0.3683, + "step": 26503 + }, + { + "epoch": 0.45797622338954935, + "grad_norm": 0.9758742584786357, + "learning_rate": 1.1836637630494487e-05, + "loss": 0.4512, + "step": 26504 + }, + { + "epoch": 0.45799350290295826, + "grad_norm": 1.0251113544596646, + "learning_rate": 1.1836087498213114e-05, + "loss": 0.4472, + "step": 26505 + }, + { + "epoch": 0.45801078241636717, + "grad_norm": 0.9640191737761458, + "learning_rate": 1.1835537360180977e-05, + "loss": 0.4636, + "step": 26506 + }, + { + "epoch": 0.4580280619297761, + "grad_norm": 0.4988818281393843, + "learning_rate": 1.1834987216399798e-05, + "loss": 0.58, + "step": 26507 + }, + { + "epoch": 0.458045341443185, + "grad_norm": 0.9650300852233077, + "learning_rate": 1.1834437066871299e-05, + "loss": 0.4296, + "step": 26508 + }, + { + "epoch": 0.45806262095659384, + "grad_norm": 0.5565301406041275, + "learning_rate": 1.1833886911597204e-05, + "loss": 0.4507, + "step": 26509 + }, + { + "epoch": 0.45807990047000274, + "grad_norm": 0.8931226295450523, + "learning_rate": 1.1833336750579238e-05, + "loss": 0.4695, + "step": 26510 + }, + { + "epoch": 0.45809717998341165, + "grad_norm": 0.4591467553298525, + "learning_rate": 1.1832786583819119e-05, + "loss": 0.4414, + "step": 26511 + }, + { + "epoch": 0.45811445949682056, + "grad_norm": 0.9112595846071018, + "learning_rate": 1.1832236411318574e-05, + "loss": 0.5014, + "step": 26512 + }, + { + "epoch": 0.45813173901022947, + "grad_norm": 0.9144215445254531, + "learning_rate": 1.1831686233079326e-05, + "loss": 0.4456, + "step": 26513 + }, + { + "epoch": 0.4581490185236384, + "grad_norm": 0.7514765516774152, + "learning_rate": 1.1831136049103094e-05, + "loss": 0.482, + "step": 26514 + }, + { + "epoch": 0.4581662980370473, + "grad_norm": 1.0048809020748533, + "learning_rate": 1.1830585859391607e-05, + "loss": 0.399, + "step": 26515 + }, + { + "epoch": 0.4581835775504562, + "grad_norm": 1.086699561140063, + "learning_rate": 1.1830035663946587e-05, + "loss": 0.4481, + "step": 26516 + }, + { + "epoch": 0.4582008570638651, + "grad_norm": 1.1246531672895899, + "learning_rate": 1.1829485462769754e-05, + "loss": 0.3961, + "step": 26517 + }, + { + "epoch": 0.458218136577274, + "grad_norm": 0.964681083679847, + "learning_rate": 1.1828935255862834e-05, + "loss": 0.5601, + "step": 26518 + }, + { + "epoch": 0.4582354160906829, + "grad_norm": 0.7984606628226307, + "learning_rate": 1.1828385043227553e-05, + "loss": 0.7373, + "step": 26519 + }, + { + "epoch": 0.45825269560409176, + "grad_norm": 1.288871039859103, + "learning_rate": 1.182783482486563e-05, + "loss": 0.3236, + "step": 26520 + }, + { + "epoch": 0.45826997511750067, + "grad_norm": 0.5247388578269733, + "learning_rate": 1.1827284600778788e-05, + "loss": 0.6315, + "step": 26521 + }, + { + "epoch": 0.4582872546309096, + "grad_norm": 0.4801509021344712, + "learning_rate": 1.1826734370968751e-05, + "loss": 0.8202, + "step": 26522 + }, + { + "epoch": 0.4583045341443185, + "grad_norm": 2.675550365108863, + "learning_rate": 1.1826184135437244e-05, + "loss": 0.6993, + "step": 26523 + }, + { + "epoch": 0.4583218136577274, + "grad_norm": 1.4538145712989679, + "learning_rate": 1.182563389418599e-05, + "loss": 0.4437, + "step": 26524 + }, + { + "epoch": 0.4583390931711363, + "grad_norm": 0.876950835428249, + "learning_rate": 1.1825083647216711e-05, + "loss": 0.5182, + "step": 26525 + }, + { + "epoch": 0.4583563726845452, + "grad_norm": 0.8822044199869575, + "learning_rate": 1.182453339453113e-05, + "loss": 0.455, + "step": 26526 + }, + { + "epoch": 0.4583736521979541, + "grad_norm": 0.8834341029911738, + "learning_rate": 1.1823983136130977e-05, + "loss": 0.5472, + "step": 26527 + }, + { + "epoch": 0.458390931711363, + "grad_norm": 0.6495129769185791, + "learning_rate": 1.1823432872017963e-05, + "loss": 0.4867, + "step": 26528 + }, + { + "epoch": 0.45840821122477193, + "grad_norm": 0.9294069368432721, + "learning_rate": 1.1822882602193824e-05, + "loss": 0.5494, + "step": 26529 + }, + { + "epoch": 0.4584254907381808, + "grad_norm": 1.0149695280632347, + "learning_rate": 1.182233232666028e-05, + "loss": 0.5566, + "step": 26530 + }, + { + "epoch": 0.4584427702515897, + "grad_norm": 0.9739105348859053, + "learning_rate": 1.1821782045419048e-05, + "loss": 0.6099, + "step": 26531 + }, + { + "epoch": 0.4584600497649986, + "grad_norm": 0.7396209533747634, + "learning_rate": 1.1821231758471856e-05, + "loss": 0.4598, + "step": 26532 + }, + { + "epoch": 0.4584773292784075, + "grad_norm": 0.4368709440277171, + "learning_rate": 1.1820681465820431e-05, + "loss": 0.6985, + "step": 26533 + }, + { + "epoch": 0.4584946087918164, + "grad_norm": 0.9363718985882662, + "learning_rate": 1.1820131167466488e-05, + "loss": 0.3957, + "step": 26534 + }, + { + "epoch": 0.4585118883052253, + "grad_norm": 1.0587398039977443, + "learning_rate": 1.1819580863411762e-05, + "loss": 0.3408, + "step": 26535 + }, + { + "epoch": 0.45852916781863423, + "grad_norm": 1.13454648279113, + "learning_rate": 1.1819030553657968e-05, + "loss": 0.4347, + "step": 26536 + }, + { + "epoch": 0.45854644733204314, + "grad_norm": 0.9363588249533059, + "learning_rate": 1.1818480238206831e-05, + "loss": 0.4488, + "step": 26537 + }, + { + "epoch": 0.45856372684545205, + "grad_norm": 0.8447188008765023, + "learning_rate": 1.1817929917060077e-05, + "loss": 0.3777, + "step": 26538 + }, + { + "epoch": 0.45858100635886095, + "grad_norm": 2.1851587066982643, + "learning_rate": 1.181737959021943e-05, + "loss": 0.4499, + "step": 26539 + }, + { + "epoch": 0.45859828587226986, + "grad_norm": 1.05183193962346, + "learning_rate": 1.1816829257686609e-05, + "loss": 0.3447, + "step": 26540 + }, + { + "epoch": 0.4586155653856787, + "grad_norm": 0.6959908375387097, + "learning_rate": 1.1816278919463342e-05, + "loss": 0.3728, + "step": 26541 + }, + { + "epoch": 0.4586328448990876, + "grad_norm": 1.1294525164070337, + "learning_rate": 1.1815728575551348e-05, + "loss": 0.5606, + "step": 26542 + }, + { + "epoch": 0.45865012441249653, + "grad_norm": 1.8196071452356022, + "learning_rate": 1.1815178225952359e-05, + "loss": 0.3915, + "step": 26543 + }, + { + "epoch": 0.45866740392590544, + "grad_norm": 1.0547591005416022, + "learning_rate": 1.1814627870668091e-05, + "loss": 0.4035, + "step": 26544 + }, + { + "epoch": 0.45868468343931434, + "grad_norm": 1.1090020451579117, + "learning_rate": 1.181407750970027e-05, + "loss": 0.5677, + "step": 26545 + }, + { + "epoch": 0.45870196295272325, + "grad_norm": 0.9577236908575564, + "learning_rate": 1.1813527143050622e-05, + "loss": 0.3808, + "step": 26546 + }, + { + "epoch": 0.45871924246613216, + "grad_norm": 1.159942783351104, + "learning_rate": 1.1812976770720872e-05, + "loss": 0.3757, + "step": 26547 + }, + { + "epoch": 0.45873652197954107, + "grad_norm": 1.1052681113430525, + "learning_rate": 1.1812426392712735e-05, + "loss": 0.4337, + "step": 26548 + }, + { + "epoch": 0.45875380149295, + "grad_norm": 0.8286143062641909, + "learning_rate": 1.1811876009027945e-05, + "loss": 0.3367, + "step": 26549 + }, + { + "epoch": 0.4587710810063589, + "grad_norm": 1.1539159861518087, + "learning_rate": 1.1811325619668222e-05, + "loss": 0.6041, + "step": 26550 + }, + { + "epoch": 0.4587883605197678, + "grad_norm": 1.1644830085528624, + "learning_rate": 1.1810775224635286e-05, + "loss": 0.388, + "step": 26551 + }, + { + "epoch": 0.45880564003317664, + "grad_norm": 0.9003761178924529, + "learning_rate": 1.1810224823930869e-05, + "loss": 0.2724, + "step": 26552 + }, + { + "epoch": 0.45882291954658555, + "grad_norm": 0.5672747146380293, + "learning_rate": 1.1809674417556687e-05, + "loss": 0.6137, + "step": 26553 + }, + { + "epoch": 0.45884019905999446, + "grad_norm": 0.9039600650602324, + "learning_rate": 1.1809124005514467e-05, + "loss": 0.4821, + "step": 26554 + }, + { + "epoch": 0.45885747857340337, + "grad_norm": 1.002220308490761, + "learning_rate": 1.1808573587805934e-05, + "loss": 0.4346, + "step": 26555 + }, + { + "epoch": 0.4588747580868123, + "grad_norm": 1.583254140689163, + "learning_rate": 1.1808023164432811e-05, + "loss": 0.5148, + "step": 26556 + }, + { + "epoch": 0.4588920376002212, + "grad_norm": 1.4149449919928794, + "learning_rate": 1.180747273539682e-05, + "loss": 0.3655, + "step": 26557 + }, + { + "epoch": 0.4589093171136301, + "grad_norm": 1.1926340609266708, + "learning_rate": 1.180692230069969e-05, + "loss": 0.5894, + "step": 26558 + }, + { + "epoch": 0.458926596627039, + "grad_norm": 0.9691535071920573, + "learning_rate": 1.1806371860343142e-05, + "loss": 0.453, + "step": 26559 + }, + { + "epoch": 0.4589438761404479, + "grad_norm": 0.8929170559150444, + "learning_rate": 1.1805821414328897e-05, + "loss": 0.5778, + "step": 26560 + }, + { + "epoch": 0.4589611556538568, + "grad_norm": 0.8191631425578777, + "learning_rate": 1.1805270962658687e-05, + "loss": 0.4535, + "step": 26561 + }, + { + "epoch": 0.45897843516726566, + "grad_norm": 0.9177130809998767, + "learning_rate": 1.1804720505334229e-05, + "loss": 0.3776, + "step": 26562 + }, + { + "epoch": 0.45899571468067457, + "grad_norm": 1.3145487225521089, + "learning_rate": 1.1804170042357249e-05, + "loss": 0.3106, + "step": 26563 + }, + { + "epoch": 0.4590129941940835, + "grad_norm": 1.7402472950550016, + "learning_rate": 1.1803619573729473e-05, + "loss": 0.4925, + "step": 26564 + }, + { + "epoch": 0.4590302737074924, + "grad_norm": 1.1509702693368031, + "learning_rate": 1.1803069099452618e-05, + "loss": 0.3674, + "step": 26565 + }, + { + "epoch": 0.4590475532209013, + "grad_norm": 1.432161058427637, + "learning_rate": 1.1802518619528417e-05, + "loss": 0.4296, + "step": 26566 + }, + { + "epoch": 0.4590648327343102, + "grad_norm": 1.107936937785418, + "learning_rate": 1.1801968133958592e-05, + "loss": 0.4485, + "step": 26567 + }, + { + "epoch": 0.4590821122477191, + "grad_norm": 0.6741159238592889, + "learning_rate": 1.1801417642744863e-05, + "loss": 0.3106, + "step": 26568 + }, + { + "epoch": 0.459099391761128, + "grad_norm": 0.9509221830876703, + "learning_rate": 1.1800867145888959e-05, + "loss": 0.3524, + "step": 26569 + }, + { + "epoch": 0.4591166712745369, + "grad_norm": 0.7189024166126555, + "learning_rate": 1.1800316643392601e-05, + "loss": 0.4418, + "step": 26570 + }, + { + "epoch": 0.45913395078794583, + "grad_norm": 1.1066662999047399, + "learning_rate": 1.1799766135257518e-05, + "loss": 0.4023, + "step": 26571 + }, + { + "epoch": 0.45915123030135474, + "grad_norm": 0.7662799741448693, + "learning_rate": 1.1799215621485427e-05, + "loss": 0.3944, + "step": 26572 + }, + { + "epoch": 0.4591685098147636, + "grad_norm": 1.3067160815897465, + "learning_rate": 1.1798665102078057e-05, + "loss": 0.4648, + "step": 26573 + }, + { + "epoch": 0.4591857893281725, + "grad_norm": 1.1949533749360026, + "learning_rate": 1.179811457703713e-05, + "loss": 0.4984, + "step": 26574 + }, + { + "epoch": 0.4592030688415814, + "grad_norm": 0.8596603754717741, + "learning_rate": 1.1797564046364373e-05, + "loss": 0.4115, + "step": 26575 + }, + { + "epoch": 0.4592203483549903, + "grad_norm": 0.9794372371283692, + "learning_rate": 1.179701351006151e-05, + "loss": 0.5121, + "step": 26576 + }, + { + "epoch": 0.4592376278683992, + "grad_norm": 1.8056249927285553, + "learning_rate": 1.179646296813026e-05, + "loss": 0.4639, + "step": 26577 + }, + { + "epoch": 0.45925490738180813, + "grad_norm": 0.8990534900567889, + "learning_rate": 1.1795912420572356e-05, + "loss": 0.3757, + "step": 26578 + }, + { + "epoch": 0.45927218689521704, + "grad_norm": 0.6014159474180526, + "learning_rate": 1.1795361867389516e-05, + "loss": 0.7969, + "step": 26579 + }, + { + "epoch": 0.45928946640862595, + "grad_norm": 1.0393391059631487, + "learning_rate": 1.1794811308583465e-05, + "loss": 0.358, + "step": 26580 + }, + { + "epoch": 0.45930674592203485, + "grad_norm": 0.7335745515351677, + "learning_rate": 1.179426074415593e-05, + "loss": 0.2779, + "step": 26581 + }, + { + "epoch": 0.45932402543544376, + "grad_norm": 1.1824607417128088, + "learning_rate": 1.1793710174108633e-05, + "loss": 0.4799, + "step": 26582 + }, + { + "epoch": 0.4593413049488526, + "grad_norm": 1.5371018687775615, + "learning_rate": 1.17931595984433e-05, + "loss": 0.5843, + "step": 26583 + }, + { + "epoch": 0.4593585844622615, + "grad_norm": 1.192289484452392, + "learning_rate": 1.1792609017161656e-05, + "loss": 0.4846, + "step": 26584 + }, + { + "epoch": 0.45937586397567043, + "grad_norm": 0.8428073600711241, + "learning_rate": 1.179205843026542e-05, + "loss": 0.385, + "step": 26585 + }, + { + "epoch": 0.45939314348907934, + "grad_norm": 1.110013710019818, + "learning_rate": 1.1791507837756327e-05, + "loss": 0.3391, + "step": 26586 + }, + { + "epoch": 0.45941042300248824, + "grad_norm": 1.0526011796827124, + "learning_rate": 1.1790957239636092e-05, + "loss": 0.2511, + "step": 26587 + }, + { + "epoch": 0.45942770251589715, + "grad_norm": 1.1172942426264831, + "learning_rate": 1.1790406635906442e-05, + "loss": 0.6152, + "step": 26588 + }, + { + "epoch": 0.45944498202930606, + "grad_norm": 0.6871622552305162, + "learning_rate": 1.1789856026569103e-05, + "loss": 0.5581, + "step": 26589 + }, + { + "epoch": 0.45946226154271497, + "grad_norm": 0.7375158466610415, + "learning_rate": 1.1789305411625799e-05, + "loss": 0.3972, + "step": 26590 + }, + { + "epoch": 0.4594795410561239, + "grad_norm": 0.6726147151638806, + "learning_rate": 1.1788754791078255e-05, + "loss": 0.1907, + "step": 26591 + }, + { + "epoch": 0.4594968205695328, + "grad_norm": 0.7303135947384461, + "learning_rate": 1.1788204164928197e-05, + "loss": 0.5957, + "step": 26592 + }, + { + "epoch": 0.4595141000829417, + "grad_norm": 0.6212082582393379, + "learning_rate": 1.1787653533177346e-05, + "loss": 0.3997, + "step": 26593 + }, + { + "epoch": 0.45953137959635054, + "grad_norm": 0.808526682388119, + "learning_rate": 1.1787102895827427e-05, + "loss": 0.3166, + "step": 26594 + }, + { + "epoch": 0.45954865910975945, + "grad_norm": 1.0578666919084834, + "learning_rate": 1.1786552252880165e-05, + "loss": 0.3005, + "step": 26595 + }, + { + "epoch": 0.45956593862316836, + "grad_norm": 1.3389687078928563, + "learning_rate": 1.1786001604337289e-05, + "loss": 0.4637, + "step": 26596 + }, + { + "epoch": 0.45958321813657727, + "grad_norm": 1.0876299641439084, + "learning_rate": 1.1785450950200516e-05, + "loss": 0.3587, + "step": 26597 + }, + { + "epoch": 0.4596004976499862, + "grad_norm": 0.524022021889014, + "learning_rate": 1.1784900290471578e-05, + "loss": 0.6361, + "step": 26598 + }, + { + "epoch": 0.4596177771633951, + "grad_norm": 1.1577238661558493, + "learning_rate": 1.1784349625152198e-05, + "loss": 0.3859, + "step": 26599 + }, + { + "epoch": 0.459635056676804, + "grad_norm": 0.9279640110379248, + "learning_rate": 1.1783798954244095e-05, + "loss": 0.4145, + "step": 26600 + }, + { + "epoch": 0.4596523361902129, + "grad_norm": 0.7527326224883515, + "learning_rate": 1.1783248277749002e-05, + "loss": 0.5078, + "step": 26601 + }, + { + "epoch": 0.4596696157036218, + "grad_norm": 1.5546922544664408, + "learning_rate": 1.1782697595668639e-05, + "loss": 0.3638, + "step": 26602 + }, + { + "epoch": 0.4596868952170307, + "grad_norm": 0.652373566557708, + "learning_rate": 1.178214690800473e-05, + "loss": 0.3903, + "step": 26603 + }, + { + "epoch": 0.45970417473043956, + "grad_norm": 1.144502192473557, + "learning_rate": 1.1781596214759003e-05, + "loss": 0.4236, + "step": 26604 + }, + { + "epoch": 0.45972145424384847, + "grad_norm": 0.8929682847578313, + "learning_rate": 1.178104551593318e-05, + "loss": 0.5145, + "step": 26605 + }, + { + "epoch": 0.4597387337572574, + "grad_norm": 1.0174511581650292, + "learning_rate": 1.1780494811528989e-05, + "loss": 0.5009, + "step": 26606 + }, + { + "epoch": 0.4597560132706663, + "grad_norm": 1.0076305255593978, + "learning_rate": 1.1779944101548153e-05, + "loss": 0.4756, + "step": 26607 + }, + { + "epoch": 0.4597732927840752, + "grad_norm": 0.5894577461191294, + "learning_rate": 1.1779393385992393e-05, + "loss": 0.3095, + "step": 26608 + }, + { + "epoch": 0.4597905722974841, + "grad_norm": 1.4279744468680247, + "learning_rate": 1.1778842664863441e-05, + "loss": 0.3537, + "step": 26609 + }, + { + "epoch": 0.459807851810893, + "grad_norm": 1.0349632692122832, + "learning_rate": 1.1778291938163017e-05, + "loss": 0.5881, + "step": 26610 + }, + { + "epoch": 0.4598251313243019, + "grad_norm": 0.5304924189013694, + "learning_rate": 1.1777741205892848e-05, + "loss": 0.6319, + "step": 26611 + }, + { + "epoch": 0.4598424108377108, + "grad_norm": 1.4214511566482044, + "learning_rate": 1.177719046805466e-05, + "loss": 0.4227, + "step": 26612 + }, + { + "epoch": 0.45985969035111973, + "grad_norm": 0.9396751498868867, + "learning_rate": 1.1776639724650178e-05, + "loss": 0.3782, + "step": 26613 + }, + { + "epoch": 0.45987696986452864, + "grad_norm": 1.0009909116555733, + "learning_rate": 1.1776088975681122e-05, + "loss": 0.4616, + "step": 26614 + }, + { + "epoch": 0.4598942493779375, + "grad_norm": 1.2625393431165015, + "learning_rate": 1.1775538221149222e-05, + "loss": 0.5362, + "step": 26615 + }, + { + "epoch": 0.4599115288913464, + "grad_norm": 1.0255530523981393, + "learning_rate": 1.1774987461056199e-05, + "loss": 0.4412, + "step": 26616 + }, + { + "epoch": 0.4599288084047553, + "grad_norm": 1.1449000586489408, + "learning_rate": 1.177443669540378e-05, + "loss": 0.4375, + "step": 26617 + }, + { + "epoch": 0.4599460879181642, + "grad_norm": 0.8103003362792166, + "learning_rate": 1.1773885924193693e-05, + "loss": 0.3181, + "step": 26618 + }, + { + "epoch": 0.4599633674315731, + "grad_norm": 0.9425941114971746, + "learning_rate": 1.177333514742766e-05, + "loss": 0.5126, + "step": 26619 + }, + { + "epoch": 0.45998064694498203, + "grad_norm": 0.8548241512355951, + "learning_rate": 1.1772784365107404e-05, + "loss": 0.4859, + "step": 26620 + }, + { + "epoch": 0.45999792645839094, + "grad_norm": 0.651765888326397, + "learning_rate": 1.1772233577234655e-05, + "loss": 0.468, + "step": 26621 + }, + { + "epoch": 0.46001520597179985, + "grad_norm": 1.2011446332279003, + "learning_rate": 1.1771682783811138e-05, + "loss": 0.6719, + "step": 26622 + }, + { + "epoch": 0.46003248548520875, + "grad_norm": 1.259284988667457, + "learning_rate": 1.1771131984838573e-05, + "loss": 0.459, + "step": 26623 + }, + { + "epoch": 0.46004976499861766, + "grad_norm": 0.6444396208556866, + "learning_rate": 1.177058118031869e-05, + "loss": 0.3079, + "step": 26624 + }, + { + "epoch": 0.46006704451202657, + "grad_norm": 0.7436527480749129, + "learning_rate": 1.1770030370253207e-05, + "loss": 0.318, + "step": 26625 + }, + { + "epoch": 0.4600843240254354, + "grad_norm": 0.9819617859113364, + "learning_rate": 1.176947955464386e-05, + "loss": 0.4111, + "step": 26626 + }, + { + "epoch": 0.46010160353884433, + "grad_norm": 0.5487646825232683, + "learning_rate": 1.1768928733492367e-05, + "loss": 0.674, + "step": 26627 + }, + { + "epoch": 0.46011888305225324, + "grad_norm": 1.0143086341807594, + "learning_rate": 1.1768377906800452e-05, + "loss": 0.5851, + "step": 26628 + }, + { + "epoch": 0.46013616256566214, + "grad_norm": 0.6048427055926012, + "learning_rate": 1.1767827074569846e-05, + "loss": 0.6708, + "step": 26629 + }, + { + "epoch": 0.46015344207907105, + "grad_norm": 1.135989321086112, + "learning_rate": 1.1767276236802271e-05, + "loss": 0.3063, + "step": 26630 + }, + { + "epoch": 0.46017072159247996, + "grad_norm": 0.9516746038234422, + "learning_rate": 1.1766725393499453e-05, + "loss": 0.377, + "step": 26631 + }, + { + "epoch": 0.46018800110588887, + "grad_norm": 1.0583595169018654, + "learning_rate": 1.1766174544663113e-05, + "loss": 0.3637, + "step": 26632 + }, + { + "epoch": 0.4602052806192978, + "grad_norm": 0.7640278665154918, + "learning_rate": 1.1765623690294987e-05, + "loss": 0.4323, + "step": 26633 + }, + { + "epoch": 0.4602225601327067, + "grad_norm": 1.1811246832330806, + "learning_rate": 1.1765072830396788e-05, + "loss": 0.6962, + "step": 26634 + }, + { + "epoch": 0.4602398396461156, + "grad_norm": 0.6149614020471288, + "learning_rate": 1.1764521964970249e-05, + "loss": 0.2269, + "step": 26635 + }, + { + "epoch": 0.46025711915952444, + "grad_norm": 0.4507615095979632, + "learning_rate": 1.1763971094017093e-05, + "loss": 0.5118, + "step": 26636 + }, + { + "epoch": 0.46027439867293335, + "grad_norm": 0.6904314037956593, + "learning_rate": 1.1763420217539041e-05, + "loss": 0.4382, + "step": 26637 + }, + { + "epoch": 0.46029167818634226, + "grad_norm": 0.8512368423962302, + "learning_rate": 1.1762869335537827e-05, + "loss": 0.3948, + "step": 26638 + }, + { + "epoch": 0.46030895769975116, + "grad_norm": 0.6318302586461397, + "learning_rate": 1.1762318448015174e-05, + "loss": 0.7952, + "step": 26639 + }, + { + "epoch": 0.46032623721316007, + "grad_norm": 0.9008722039049376, + "learning_rate": 1.1761767554972803e-05, + "loss": 0.4552, + "step": 26640 + }, + { + "epoch": 0.460343516726569, + "grad_norm": 0.9711574508141239, + "learning_rate": 1.1761216656412442e-05, + "loss": 0.4543, + "step": 26641 + }, + { + "epoch": 0.4603607962399779, + "grad_norm": 1.0248125512363877, + "learning_rate": 1.1760665752335817e-05, + "loss": 0.4734, + "step": 26642 + }, + { + "epoch": 0.4603780757533868, + "grad_norm": 1.0509507834490004, + "learning_rate": 1.1760114842744654e-05, + "loss": 0.3769, + "step": 26643 + }, + { + "epoch": 0.4603953552667957, + "grad_norm": 0.5984122194421676, + "learning_rate": 1.1759563927640679e-05, + "loss": 0.3086, + "step": 26644 + }, + { + "epoch": 0.4604126347802046, + "grad_norm": 0.7985370450566395, + "learning_rate": 1.1759013007025612e-05, + "loss": 0.5073, + "step": 26645 + }, + { + "epoch": 0.4604299142936135, + "grad_norm": 1.0474306461114224, + "learning_rate": 1.1758462080901184e-05, + "loss": 0.2987, + "step": 26646 + }, + { + "epoch": 0.46044719380702237, + "grad_norm": 0.9073449088029969, + "learning_rate": 1.1757911149269121e-05, + "loss": 0.4224, + "step": 26647 + }, + { + "epoch": 0.4604644733204313, + "grad_norm": 0.613683822763753, + "learning_rate": 1.1757360212131144e-05, + "loss": 0.3907, + "step": 26648 + }, + { + "epoch": 0.4604817528338402, + "grad_norm": 1.0226716663406414, + "learning_rate": 1.1756809269488982e-05, + "loss": 0.4959, + "step": 26649 + }, + { + "epoch": 0.4604990323472491, + "grad_norm": 0.9685790844452289, + "learning_rate": 1.1756258321344362e-05, + "loss": 0.5244, + "step": 26650 + }, + { + "epoch": 0.460516311860658, + "grad_norm": 0.9460717912056364, + "learning_rate": 1.1755707367699002e-05, + "loss": 0.3225, + "step": 26651 + }, + { + "epoch": 0.4605335913740669, + "grad_norm": 0.8743236810523928, + "learning_rate": 1.175515640855464e-05, + "loss": 0.2546, + "step": 26652 + }, + { + "epoch": 0.4605508708874758, + "grad_norm": 1.0956935889831358, + "learning_rate": 1.1754605443912993e-05, + "loss": 0.258, + "step": 26653 + }, + { + "epoch": 0.4605681504008847, + "grad_norm": 0.8843219971704857, + "learning_rate": 1.1754054473775787e-05, + "loss": 0.5144, + "step": 26654 + }, + { + "epoch": 0.46058542991429363, + "grad_norm": 0.799376494408475, + "learning_rate": 1.1753503498144749e-05, + "loss": 0.5484, + "step": 26655 + }, + { + "epoch": 0.46060270942770254, + "grad_norm": 0.9330940147520763, + "learning_rate": 1.1752952517021606e-05, + "loss": 0.5329, + "step": 26656 + }, + { + "epoch": 0.4606199889411114, + "grad_norm": 0.9840432239591239, + "learning_rate": 1.175240153040808e-05, + "loss": 0.3207, + "step": 26657 + }, + { + "epoch": 0.4606372684545203, + "grad_norm": 1.093918380881193, + "learning_rate": 1.1751850538305901e-05, + "loss": 0.5527, + "step": 26658 + }, + { + "epoch": 0.4606545479679292, + "grad_norm": 1.0261829474355466, + "learning_rate": 1.1751299540716793e-05, + "loss": 0.5186, + "step": 26659 + }, + { + "epoch": 0.4606718274813381, + "grad_norm": 0.7755852318892366, + "learning_rate": 1.175074853764248e-05, + "loss": 0.3857, + "step": 26660 + }, + { + "epoch": 0.460689106994747, + "grad_norm": 0.8854214476163494, + "learning_rate": 1.1750197529084692e-05, + "loss": 0.3576, + "step": 26661 + }, + { + "epoch": 0.46070638650815593, + "grad_norm": 0.5797652243527137, + "learning_rate": 1.1749646515045152e-05, + "loss": 0.4401, + "step": 26662 + }, + { + "epoch": 0.46072366602156484, + "grad_norm": 0.9670932369146408, + "learning_rate": 1.1749095495525584e-05, + "loss": 0.5968, + "step": 26663 + }, + { + "epoch": 0.46074094553497374, + "grad_norm": 0.9053291299144272, + "learning_rate": 1.174854447052772e-05, + "loss": 0.3004, + "step": 26664 + }, + { + "epoch": 0.46075822504838265, + "grad_norm": 1.2330455617367821, + "learning_rate": 1.1747993440053278e-05, + "loss": 0.4968, + "step": 26665 + }, + { + "epoch": 0.46077550456179156, + "grad_norm": 0.8250356857953034, + "learning_rate": 1.1747442404103988e-05, + "loss": 0.4668, + "step": 26666 + }, + { + "epoch": 0.46079278407520047, + "grad_norm": 0.8106351213193028, + "learning_rate": 1.1746891362681578e-05, + "loss": 0.5122, + "step": 26667 + }, + { + "epoch": 0.4608100635886093, + "grad_norm": 1.1997021564136479, + "learning_rate": 1.1746340315787768e-05, + "loss": 0.4458, + "step": 26668 + }, + { + "epoch": 0.4608273431020182, + "grad_norm": 1.0127711018675651, + "learning_rate": 1.174578926342429e-05, + "loss": 0.2523, + "step": 26669 + }, + { + "epoch": 0.46084462261542714, + "grad_norm": 1.574411323595658, + "learning_rate": 1.1745238205592868e-05, + "loss": 0.5118, + "step": 26670 + }, + { + "epoch": 0.46086190212883604, + "grad_norm": 0.9571103125676917, + "learning_rate": 1.1744687142295224e-05, + "loss": 0.3786, + "step": 26671 + }, + { + "epoch": 0.46087918164224495, + "grad_norm": 1.1045324086603232, + "learning_rate": 1.174413607353309e-05, + "loss": 0.4548, + "step": 26672 + }, + { + "epoch": 0.46089646115565386, + "grad_norm": 0.9871936375121168, + "learning_rate": 1.1743584999308189e-05, + "loss": 0.5668, + "step": 26673 + }, + { + "epoch": 0.46091374066906277, + "grad_norm": 1.0922297508551817, + "learning_rate": 1.1743033919622249e-05, + "loss": 0.3947, + "step": 26674 + }, + { + "epoch": 0.4609310201824717, + "grad_norm": 0.9869142018594791, + "learning_rate": 1.1742482834476992e-05, + "loss": 0.5212, + "step": 26675 + }, + { + "epoch": 0.4609482996958806, + "grad_norm": 0.7643027189041499, + "learning_rate": 1.1741931743874146e-05, + "loss": 0.4806, + "step": 26676 + }, + { + "epoch": 0.4609655792092895, + "grad_norm": 0.741587778171193, + "learning_rate": 1.1741380647815437e-05, + "loss": 0.4033, + "step": 26677 + }, + { + "epoch": 0.4609828587226984, + "grad_norm": 0.836776218312752, + "learning_rate": 1.1740829546302592e-05, + "loss": 0.4793, + "step": 26678 + }, + { + "epoch": 0.46100013823610725, + "grad_norm": 1.0551177805130731, + "learning_rate": 1.1740278439337338e-05, + "loss": 0.3671, + "step": 26679 + }, + { + "epoch": 0.46101741774951616, + "grad_norm": 1.021182483411233, + "learning_rate": 1.1739727326921398e-05, + "loss": 0.4798, + "step": 26680 + }, + { + "epoch": 0.46103469726292506, + "grad_norm": 1.6210938547544014, + "learning_rate": 1.1739176209056502e-05, + "loss": 0.476, + "step": 26681 + }, + { + "epoch": 0.46105197677633397, + "grad_norm": 1.3511893208114794, + "learning_rate": 1.1738625085744373e-05, + "loss": 0.6403, + "step": 26682 + }, + { + "epoch": 0.4610692562897429, + "grad_norm": 1.1164474125069779, + "learning_rate": 1.1738073956986736e-05, + "loss": 0.4183, + "step": 26683 + }, + { + "epoch": 0.4610865358031518, + "grad_norm": 0.9665136014416795, + "learning_rate": 1.173752282278532e-05, + "loss": 0.4152, + "step": 26684 + }, + { + "epoch": 0.4611038153165607, + "grad_norm": 0.9133853724361766, + "learning_rate": 1.173697168314185e-05, + "loss": 0.3609, + "step": 26685 + }, + { + "epoch": 0.4611210948299696, + "grad_norm": 2.009614185907403, + "learning_rate": 1.1736420538058057e-05, + "loss": 0.5947, + "step": 26686 + }, + { + "epoch": 0.4611383743433785, + "grad_norm": 0.8888287809323197, + "learning_rate": 1.1735869387535662e-05, + "loss": 0.6412, + "step": 26687 + }, + { + "epoch": 0.4611556538567874, + "grad_norm": 1.0624179333421213, + "learning_rate": 1.1735318231576387e-05, + "loss": 0.4653, + "step": 26688 + }, + { + "epoch": 0.46117293337019627, + "grad_norm": 1.2965389680788773, + "learning_rate": 1.1734767070181966e-05, + "loss": 0.4386, + "step": 26689 + }, + { + "epoch": 0.4611902128836052, + "grad_norm": 0.9256293997631017, + "learning_rate": 1.1734215903354124e-05, + "loss": 0.3324, + "step": 26690 + }, + { + "epoch": 0.4612074923970141, + "grad_norm": 1.0871981359780194, + "learning_rate": 1.1733664731094585e-05, + "loss": 0.4745, + "step": 26691 + }, + { + "epoch": 0.461224771910423, + "grad_norm": 0.7690012755970972, + "learning_rate": 1.1733113553405075e-05, + "loss": 0.4052, + "step": 26692 + }, + { + "epoch": 0.4612420514238319, + "grad_norm": 0.7017521226776421, + "learning_rate": 1.1732562370287324e-05, + "loss": 0.4206, + "step": 26693 + }, + { + "epoch": 0.4612593309372408, + "grad_norm": 0.8965782894563535, + "learning_rate": 1.1732011181743056e-05, + "loss": 0.573, + "step": 26694 + }, + { + "epoch": 0.4612766104506497, + "grad_norm": 0.953591770095577, + "learning_rate": 1.1731459987773996e-05, + "loss": 0.469, + "step": 26695 + }, + { + "epoch": 0.4612938899640586, + "grad_norm": 1.0268504157677611, + "learning_rate": 1.1730908788381874e-05, + "loss": 0.38, + "step": 26696 + }, + { + "epoch": 0.46131116947746753, + "grad_norm": 0.9742702607482379, + "learning_rate": 1.173035758356841e-05, + "loss": 0.3715, + "step": 26697 + }, + { + "epoch": 0.46132844899087644, + "grad_norm": 0.4747878279586349, + "learning_rate": 1.1729806373335337e-05, + "loss": 0.589, + "step": 26698 + }, + { + "epoch": 0.46134572850428535, + "grad_norm": 0.99009267178962, + "learning_rate": 1.172925515768438e-05, + "loss": 0.4348, + "step": 26699 + }, + { + "epoch": 0.4613630080176942, + "grad_norm": 1.0403665140125282, + "learning_rate": 1.1728703936617262e-05, + "loss": 0.5165, + "step": 26700 + }, + { + "epoch": 0.4613802875311031, + "grad_norm": 0.8799304176370422, + "learning_rate": 1.1728152710135714e-05, + "loss": 0.4038, + "step": 26701 + }, + { + "epoch": 0.461397567044512, + "grad_norm": 0.8398099329592231, + "learning_rate": 1.172760147824146e-05, + "loss": 0.559, + "step": 26702 + }, + { + "epoch": 0.4614148465579209, + "grad_norm": 1.718729441033111, + "learning_rate": 1.1727050240936225e-05, + "loss": 0.3237, + "step": 26703 + }, + { + "epoch": 0.46143212607132983, + "grad_norm": 1.0329938331234243, + "learning_rate": 1.1726498998221738e-05, + "loss": 0.4186, + "step": 26704 + }, + { + "epoch": 0.46144940558473874, + "grad_norm": 0.4986314982836437, + "learning_rate": 1.1725947750099726e-05, + "loss": 0.7568, + "step": 26705 + }, + { + "epoch": 0.46146668509814764, + "grad_norm": 1.2244975333642079, + "learning_rate": 1.1725396496571917e-05, + "loss": 0.4544, + "step": 26706 + }, + { + "epoch": 0.46148396461155655, + "grad_norm": 0.7725528970695168, + "learning_rate": 1.1724845237640033e-05, + "loss": 0.4018, + "step": 26707 + }, + { + "epoch": 0.46150124412496546, + "grad_norm": 0.6725289359656398, + "learning_rate": 1.1724293973305798e-05, + "loss": 0.5786, + "step": 26708 + }, + { + "epoch": 0.46151852363837437, + "grad_norm": 1.020129362849467, + "learning_rate": 1.1723742703570949e-05, + "loss": 0.4489, + "step": 26709 + }, + { + "epoch": 0.4615358031517832, + "grad_norm": 0.8733381723728648, + "learning_rate": 1.1723191428437207e-05, + "loss": 0.4067, + "step": 26710 + }, + { + "epoch": 0.4615530826651921, + "grad_norm": 0.8726140942459459, + "learning_rate": 1.1722640147906292e-05, + "loss": 0.4381, + "step": 26711 + }, + { + "epoch": 0.46157036217860103, + "grad_norm": 1.1156463391787184, + "learning_rate": 1.1722088861979943e-05, + "loss": 0.4698, + "step": 26712 + }, + { + "epoch": 0.46158764169200994, + "grad_norm": 0.634874404558367, + "learning_rate": 1.172153757065988e-05, + "loss": 0.9268, + "step": 26713 + }, + { + "epoch": 0.46160492120541885, + "grad_norm": 1.2133675813860731, + "learning_rate": 1.172098627394783e-05, + "loss": 0.508, + "step": 26714 + }, + { + "epoch": 0.46162220071882776, + "grad_norm": 1.404130838508771, + "learning_rate": 1.172043497184552e-05, + "loss": 0.3842, + "step": 26715 + }, + { + "epoch": 0.46163948023223667, + "grad_norm": 0.8762009470413237, + "learning_rate": 1.171988366435468e-05, + "loss": 0.2684, + "step": 26716 + }, + { + "epoch": 0.4616567597456456, + "grad_norm": 0.7144713040283271, + "learning_rate": 1.1719332351477029e-05, + "loss": 0.4082, + "step": 26717 + }, + { + "epoch": 0.4616740392590545, + "grad_norm": 0.9038903771922796, + "learning_rate": 1.17187810332143e-05, + "loss": 0.5886, + "step": 26718 + }, + { + "epoch": 0.4616913187724634, + "grad_norm": 1.3616540277909361, + "learning_rate": 1.171822970956822e-05, + "loss": 0.4467, + "step": 26719 + }, + { + "epoch": 0.4617085982858723, + "grad_norm": 1.1219973593651318, + "learning_rate": 1.171767838054051e-05, + "loss": 0.49, + "step": 26720 + }, + { + "epoch": 0.46172587779928115, + "grad_norm": 1.1935787003597917, + "learning_rate": 1.1717127046132903e-05, + "loss": 0.5676, + "step": 26721 + }, + { + "epoch": 0.46174315731269006, + "grad_norm": 1.2588625929150559, + "learning_rate": 1.1716575706347125e-05, + "loss": 0.4891, + "step": 26722 + }, + { + "epoch": 0.46176043682609896, + "grad_norm": 0.8876218272671029, + "learning_rate": 1.1716024361184896e-05, + "loss": 0.3549, + "step": 26723 + }, + { + "epoch": 0.46177771633950787, + "grad_norm": 1.1536142704675976, + "learning_rate": 1.1715473010647952e-05, + "loss": 0.4659, + "step": 26724 + }, + { + "epoch": 0.4617949958529168, + "grad_norm": 0.8818847468829911, + "learning_rate": 1.171492165473802e-05, + "loss": 0.332, + "step": 26725 + }, + { + "epoch": 0.4618122753663257, + "grad_norm": 1.217669589206414, + "learning_rate": 1.171437029345682e-05, + "loss": 0.5543, + "step": 26726 + }, + { + "epoch": 0.4618295548797346, + "grad_norm": 0.5341776758754384, + "learning_rate": 1.1713818926806082e-05, + "loss": 0.5896, + "step": 26727 + }, + { + "epoch": 0.4618468343931435, + "grad_norm": 0.9840269728311596, + "learning_rate": 1.171326755478753e-05, + "loss": 0.3984, + "step": 26728 + }, + { + "epoch": 0.4618641139065524, + "grad_norm": 0.6441474292572328, + "learning_rate": 1.1712716177402897e-05, + "loss": 0.3039, + "step": 26729 + }, + { + "epoch": 0.4618813934199613, + "grad_norm": 1.1679113948106943, + "learning_rate": 1.1712164794653906e-05, + "loss": 0.3936, + "step": 26730 + }, + { + "epoch": 0.46189867293337017, + "grad_norm": 0.8801244051810577, + "learning_rate": 1.1711613406542284e-05, + "loss": 0.3249, + "step": 26731 + }, + { + "epoch": 0.4619159524467791, + "grad_norm": 1.593170752304057, + "learning_rate": 1.171106201306976e-05, + "loss": 0.6987, + "step": 26732 + }, + { + "epoch": 0.461933231960188, + "grad_norm": 1.3447074757871713, + "learning_rate": 1.1710510614238059e-05, + "loss": 0.6101, + "step": 26733 + }, + { + "epoch": 0.4619505114735969, + "grad_norm": 1.4053481557971748, + "learning_rate": 1.1709959210048907e-05, + "loss": 0.4445, + "step": 26734 + }, + { + "epoch": 0.4619677909870058, + "grad_norm": 0.9339734414329492, + "learning_rate": 1.1709407800504036e-05, + "loss": 0.4153, + "step": 26735 + }, + { + "epoch": 0.4619850705004147, + "grad_norm": 1.5044556913534204, + "learning_rate": 1.1708856385605171e-05, + "loss": 0.4961, + "step": 26736 + }, + { + "epoch": 0.4620023500138236, + "grad_norm": 0.6581162195455031, + "learning_rate": 1.1708304965354033e-05, + "loss": 0.611, + "step": 26737 + }, + { + "epoch": 0.4620196295272325, + "grad_norm": 0.828356948210445, + "learning_rate": 1.1707753539752358e-05, + "loss": 0.454, + "step": 26738 + }, + { + "epoch": 0.46203690904064143, + "grad_norm": 1.3271079872333789, + "learning_rate": 1.1707202108801869e-05, + "loss": 0.3171, + "step": 26739 + }, + { + "epoch": 0.46205418855405034, + "grad_norm": 1.123210601054374, + "learning_rate": 1.170665067250429e-05, + "loss": 0.3629, + "step": 26740 + }, + { + "epoch": 0.46207146806745925, + "grad_norm": 0.879847046532128, + "learning_rate": 1.1706099230861354e-05, + "loss": 0.3816, + "step": 26741 + }, + { + "epoch": 0.4620887475808681, + "grad_norm": 1.3593897199960063, + "learning_rate": 1.1705547783874784e-05, + "loss": 0.5575, + "step": 26742 + }, + { + "epoch": 0.462106027094277, + "grad_norm": 0.6601838111847361, + "learning_rate": 1.170499633154631e-05, + "loss": 0.377, + "step": 26743 + }, + { + "epoch": 0.4621233066076859, + "grad_norm": 0.7681317669161236, + "learning_rate": 1.1704444873877655e-05, + "loss": 0.5415, + "step": 26744 + }, + { + "epoch": 0.4621405861210948, + "grad_norm": 1.6326572216313666, + "learning_rate": 1.1703893410870555e-05, + "loss": 0.3231, + "step": 26745 + }, + { + "epoch": 0.46215786563450373, + "grad_norm": 0.6456597805873402, + "learning_rate": 1.1703341942526728e-05, + "loss": 0.51, + "step": 26746 + }, + { + "epoch": 0.46217514514791264, + "grad_norm": 1.2992254395171439, + "learning_rate": 1.1702790468847905e-05, + "loss": 0.5053, + "step": 26747 + }, + { + "epoch": 0.46219242466132154, + "grad_norm": 1.0947068817894208, + "learning_rate": 1.1702238989835811e-05, + "loss": 0.5636, + "step": 26748 + }, + { + "epoch": 0.46220970417473045, + "grad_norm": 0.5116817973257887, + "learning_rate": 1.1701687505492176e-05, + "loss": 0.7029, + "step": 26749 + }, + { + "epoch": 0.46222698368813936, + "grad_norm": 0.7291447492042983, + "learning_rate": 1.1701136015818729e-05, + "loss": 0.3757, + "step": 26750 + }, + { + "epoch": 0.46224426320154827, + "grad_norm": 1.1324458239225519, + "learning_rate": 1.170058452081719e-05, + "loss": 0.5852, + "step": 26751 + }, + { + "epoch": 0.4622615427149572, + "grad_norm": 0.9458081568442299, + "learning_rate": 1.1700033020489295e-05, + "loss": 0.4315, + "step": 26752 + }, + { + "epoch": 0.462278822228366, + "grad_norm": 0.45656815766286485, + "learning_rate": 1.1699481514836766e-05, + "loss": 0.6427, + "step": 26753 + }, + { + "epoch": 0.46229610174177493, + "grad_norm": 1.0673884525356108, + "learning_rate": 1.1698930003861331e-05, + "loss": 0.2621, + "step": 26754 + }, + { + "epoch": 0.46231338125518384, + "grad_norm": 1.421325342544766, + "learning_rate": 1.1698378487564719e-05, + "loss": 0.4127, + "step": 26755 + }, + { + "epoch": 0.46233066076859275, + "grad_norm": 0.9810146740398935, + "learning_rate": 1.1697826965948658e-05, + "loss": 0.3835, + "step": 26756 + }, + { + "epoch": 0.46234794028200166, + "grad_norm": 0.6847771263625109, + "learning_rate": 1.1697275439014873e-05, + "loss": 0.3457, + "step": 26757 + }, + { + "epoch": 0.46236521979541056, + "grad_norm": 0.8935136877098249, + "learning_rate": 1.1696723906765091e-05, + "loss": 0.4573, + "step": 26758 + }, + { + "epoch": 0.4623824993088195, + "grad_norm": 0.9422262345105831, + "learning_rate": 1.1696172369201042e-05, + "loss": 0.6451, + "step": 26759 + }, + { + "epoch": 0.4623997788222284, + "grad_norm": 0.9916909605406959, + "learning_rate": 1.169562082632445e-05, + "loss": 0.5038, + "step": 26760 + }, + { + "epoch": 0.4624170583356373, + "grad_norm": 0.8736472994391169, + "learning_rate": 1.1695069278137047e-05, + "loss": 0.2795, + "step": 26761 + }, + { + "epoch": 0.4624343378490462, + "grad_norm": 0.8963586447563338, + "learning_rate": 1.1694517724640557e-05, + "loss": 0.6861, + "step": 26762 + }, + { + "epoch": 0.46245161736245505, + "grad_norm": 1.0505727683677235, + "learning_rate": 1.1693966165836707e-05, + "loss": 0.6596, + "step": 26763 + }, + { + "epoch": 0.46246889687586396, + "grad_norm": 0.8625352963124433, + "learning_rate": 1.169341460172723e-05, + "loss": 0.2878, + "step": 26764 + }, + { + "epoch": 0.46248617638927286, + "grad_norm": 0.8945227484151694, + "learning_rate": 1.1692863032313851e-05, + "loss": 0.2836, + "step": 26765 + }, + { + "epoch": 0.46250345590268177, + "grad_norm": 1.1587868016704492, + "learning_rate": 1.1692311457598291e-05, + "loss": 0.4475, + "step": 26766 + }, + { + "epoch": 0.4625207354160907, + "grad_norm": 0.5893526639916523, + "learning_rate": 1.1691759877582287e-05, + "loss": 0.7779, + "step": 26767 + }, + { + "epoch": 0.4625380149294996, + "grad_norm": 0.7160276145954618, + "learning_rate": 1.1691208292267559e-05, + "loss": 0.5149, + "step": 26768 + }, + { + "epoch": 0.4625552944429085, + "grad_norm": 0.9952247885410217, + "learning_rate": 1.1690656701655843e-05, + "loss": 0.334, + "step": 26769 + }, + { + "epoch": 0.4625725739563174, + "grad_norm": 1.0391524794541673, + "learning_rate": 1.1690105105748859e-05, + "loss": 0.4794, + "step": 26770 + }, + { + "epoch": 0.4625898534697263, + "grad_norm": 0.8214170070070896, + "learning_rate": 1.1689553504548335e-05, + "loss": 0.4505, + "step": 26771 + }, + { + "epoch": 0.4626071329831352, + "grad_norm": 1.0282494841156244, + "learning_rate": 1.1689001898056005e-05, + "loss": 0.3789, + "step": 26772 + }, + { + "epoch": 0.4626244124965441, + "grad_norm": 1.3581723443400155, + "learning_rate": 1.1688450286273589e-05, + "loss": 0.448, + "step": 26773 + }, + { + "epoch": 0.462641692009953, + "grad_norm": 0.8253513880754176, + "learning_rate": 1.168789866920282e-05, + "loss": 0.6838, + "step": 26774 + }, + { + "epoch": 0.4626589715233619, + "grad_norm": 1.5726293672751657, + "learning_rate": 1.1687347046845426e-05, + "loss": 0.456, + "step": 26775 + }, + { + "epoch": 0.4626762510367708, + "grad_norm": 0.8720881849136165, + "learning_rate": 1.1686795419203132e-05, + "loss": 0.5871, + "step": 26776 + }, + { + "epoch": 0.4626935305501797, + "grad_norm": 0.6224769515825116, + "learning_rate": 1.1686243786277665e-05, + "loss": 0.2677, + "step": 26777 + }, + { + "epoch": 0.4627108100635886, + "grad_norm": 0.9664239187685391, + "learning_rate": 1.1685692148070755e-05, + "loss": 0.403, + "step": 26778 + }, + { + "epoch": 0.4627280895769975, + "grad_norm": 1.1475459818110096, + "learning_rate": 1.1685140504584132e-05, + "loss": 0.4957, + "step": 26779 + }, + { + "epoch": 0.4627453690904064, + "grad_norm": 0.4628955526691009, + "learning_rate": 1.1684588855819517e-05, + "loss": 0.55, + "step": 26780 + }, + { + "epoch": 0.46276264860381533, + "grad_norm": 1.228852823925785, + "learning_rate": 1.1684037201778642e-05, + "loss": 0.5117, + "step": 26781 + }, + { + "epoch": 0.46277992811722424, + "grad_norm": 1.4636379599527893, + "learning_rate": 1.1683485542463235e-05, + "loss": 0.4152, + "step": 26782 + }, + { + "epoch": 0.46279720763063315, + "grad_norm": 0.8329414168147032, + "learning_rate": 1.1682933877875023e-05, + "loss": 0.3251, + "step": 26783 + }, + { + "epoch": 0.462814487144042, + "grad_norm": 1.0150933441078382, + "learning_rate": 1.1682382208015734e-05, + "loss": 0.4551, + "step": 26784 + }, + { + "epoch": 0.4628317666574509, + "grad_norm": 1.0480213422957445, + "learning_rate": 1.1681830532887097e-05, + "loss": 0.5525, + "step": 26785 + }, + { + "epoch": 0.4628490461708598, + "grad_norm": 0.7484515415932924, + "learning_rate": 1.1681278852490839e-05, + "loss": 0.4806, + "step": 26786 + }, + { + "epoch": 0.4628663256842687, + "grad_norm": 0.7891884185242207, + "learning_rate": 1.1680727166828687e-05, + "loss": 0.3736, + "step": 26787 + }, + { + "epoch": 0.46288360519767763, + "grad_norm": 0.9923438550073295, + "learning_rate": 1.1680175475902372e-05, + "loss": 0.6433, + "step": 26788 + }, + { + "epoch": 0.46290088471108654, + "grad_norm": 0.976577929836176, + "learning_rate": 1.1679623779713618e-05, + "loss": 0.4716, + "step": 26789 + }, + { + "epoch": 0.46291816422449544, + "grad_norm": 1.0645203759161204, + "learning_rate": 1.1679072078264155e-05, + "loss": 0.5539, + "step": 26790 + }, + { + "epoch": 0.46293544373790435, + "grad_norm": 0.4659091207182539, + "learning_rate": 1.167852037155571e-05, + "loss": 0.6617, + "step": 26791 + }, + { + "epoch": 0.46295272325131326, + "grad_norm": 0.9608126805639988, + "learning_rate": 1.1677968659590012e-05, + "loss": 0.2995, + "step": 26792 + }, + { + "epoch": 0.46297000276472217, + "grad_norm": 0.5244424861868037, + "learning_rate": 1.1677416942368789e-05, + "loss": 0.5484, + "step": 26793 + }, + { + "epoch": 0.4629872822781311, + "grad_norm": 1.0099462772331065, + "learning_rate": 1.1676865219893767e-05, + "loss": 0.4982, + "step": 26794 + }, + { + "epoch": 0.4630045617915399, + "grad_norm": 0.4230922620517411, + "learning_rate": 1.1676313492166677e-05, + "loss": 0.7033, + "step": 26795 + }, + { + "epoch": 0.46302184130494883, + "grad_norm": 1.113096229155611, + "learning_rate": 1.1675761759189248e-05, + "loss": 0.2489, + "step": 26796 + }, + { + "epoch": 0.46303912081835774, + "grad_norm": 0.9956357849165143, + "learning_rate": 1.1675210020963203e-05, + "loss": 0.3998, + "step": 26797 + }, + { + "epoch": 0.46305640033176665, + "grad_norm": 1.2064424455940508, + "learning_rate": 1.1674658277490276e-05, + "loss": 0.3956, + "step": 26798 + }, + { + "epoch": 0.46307367984517556, + "grad_norm": 1.062171580279652, + "learning_rate": 1.167410652877219e-05, + "loss": 0.351, + "step": 26799 + }, + { + "epoch": 0.46309095935858446, + "grad_norm": 0.8939519327018458, + "learning_rate": 1.1673554774810671e-05, + "loss": 0.3215, + "step": 26800 + }, + { + "epoch": 0.46310823887199337, + "grad_norm": 0.7789742253038637, + "learning_rate": 1.1673003015607458e-05, + "loss": 0.485, + "step": 26801 + }, + { + "epoch": 0.4631255183854023, + "grad_norm": 0.9372925840631519, + "learning_rate": 1.167245125116427e-05, + "loss": 0.4654, + "step": 26802 + }, + { + "epoch": 0.4631427978988112, + "grad_norm": 0.8036881641520401, + "learning_rate": 1.1671899481482833e-05, + "loss": 0.3001, + "step": 26803 + }, + { + "epoch": 0.4631600774122201, + "grad_norm": 2.0649025192396113, + "learning_rate": 1.1671347706564886e-05, + "loss": 0.5441, + "step": 26804 + }, + { + "epoch": 0.46317735692562895, + "grad_norm": 1.33178459938534, + "learning_rate": 1.1670795926412148e-05, + "loss": 0.6281, + "step": 26805 + }, + { + "epoch": 0.46319463643903785, + "grad_norm": 0.9176406433303044, + "learning_rate": 1.167024414102635e-05, + "loss": 0.3209, + "step": 26806 + }, + { + "epoch": 0.46321191595244676, + "grad_norm": 1.2387306003927563, + "learning_rate": 1.1669692350409223e-05, + "loss": 0.3099, + "step": 26807 + }, + { + "epoch": 0.46322919546585567, + "grad_norm": 0.6312446644615745, + "learning_rate": 1.166914055456249e-05, + "loss": 0.5504, + "step": 26808 + }, + { + "epoch": 0.4632464749792646, + "grad_norm": 1.054342900326212, + "learning_rate": 1.1668588753487883e-05, + "loss": 0.4876, + "step": 26809 + }, + { + "epoch": 0.4632637544926735, + "grad_norm": 0.6945107353466952, + "learning_rate": 1.1668036947187132e-05, + "loss": 0.7292, + "step": 26810 + }, + { + "epoch": 0.4632810340060824, + "grad_norm": 0.8602994643753223, + "learning_rate": 1.1667485135661956e-05, + "loss": 0.385, + "step": 26811 + }, + { + "epoch": 0.4632983135194913, + "grad_norm": 0.8979667712251392, + "learning_rate": 1.1666933318914095e-05, + "loss": 0.4994, + "step": 26812 + }, + { + "epoch": 0.4633155930329002, + "grad_norm": 1.0064909162135067, + "learning_rate": 1.1666381496945272e-05, + "loss": 0.3773, + "step": 26813 + }, + { + "epoch": 0.4633328725463091, + "grad_norm": 1.1507535101577178, + "learning_rate": 1.1665829669757214e-05, + "loss": 0.4844, + "step": 26814 + }, + { + "epoch": 0.463350152059718, + "grad_norm": 0.4900320704591903, + "learning_rate": 1.1665277837351649e-05, + "loss": 0.5506, + "step": 26815 + }, + { + "epoch": 0.4633674315731269, + "grad_norm": 1.7761844594290461, + "learning_rate": 1.1664725999730311e-05, + "loss": 0.3749, + "step": 26816 + }, + { + "epoch": 0.4633847110865358, + "grad_norm": 1.0028175163797148, + "learning_rate": 1.1664174156894923e-05, + "loss": 0.4127, + "step": 26817 + }, + { + "epoch": 0.4634019905999447, + "grad_norm": 1.2302491400811109, + "learning_rate": 1.1663622308847213e-05, + "loss": 0.3562, + "step": 26818 + }, + { + "epoch": 0.4634192701133536, + "grad_norm": 0.7765816654379153, + "learning_rate": 1.1663070455588917e-05, + "loss": 0.3874, + "step": 26819 + }, + { + "epoch": 0.4634365496267625, + "grad_norm": 0.9576659154449116, + "learning_rate": 1.1662518597121753e-05, + "loss": 0.6602, + "step": 26820 + }, + { + "epoch": 0.4634538291401714, + "grad_norm": 0.850384840356886, + "learning_rate": 1.1661966733447453e-05, + "loss": 0.4238, + "step": 26821 + }, + { + "epoch": 0.4634711086535803, + "grad_norm": 1.1637863901935825, + "learning_rate": 1.1661414864567752e-05, + "loss": 0.4107, + "step": 26822 + }, + { + "epoch": 0.46348838816698923, + "grad_norm": 0.9243922955968306, + "learning_rate": 1.166086299048437e-05, + "loss": 0.3979, + "step": 26823 + }, + { + "epoch": 0.46350566768039814, + "grad_norm": 0.6194014519115851, + "learning_rate": 1.166031111119904e-05, + "loss": 0.6049, + "step": 26824 + }, + { + "epoch": 0.46352294719380704, + "grad_norm": 1.0209690091250283, + "learning_rate": 1.165975922671349e-05, + "loss": 0.4557, + "step": 26825 + }, + { + "epoch": 0.46354022670721595, + "grad_norm": 0.8294244278133144, + "learning_rate": 1.1659207337029445e-05, + "loss": 0.3912, + "step": 26826 + }, + { + "epoch": 0.4635575062206248, + "grad_norm": 0.44162117492996167, + "learning_rate": 1.1658655442148641e-05, + "loss": 0.6869, + "step": 26827 + }, + { + "epoch": 0.4635747857340337, + "grad_norm": 0.8795807250518186, + "learning_rate": 1.1658103542072801e-05, + "loss": 0.472, + "step": 26828 + }, + { + "epoch": 0.4635920652474426, + "grad_norm": 1.1844432940187872, + "learning_rate": 1.1657551636803652e-05, + "loss": 0.4347, + "step": 26829 + }, + { + "epoch": 0.4636093447608515, + "grad_norm": 1.1464828190888943, + "learning_rate": 1.1656999726342928e-05, + "loss": 0.361, + "step": 26830 + }, + { + "epoch": 0.46362662427426043, + "grad_norm": 0.7328452621649578, + "learning_rate": 1.1656447810692353e-05, + "loss": 0.3088, + "step": 26831 + }, + { + "epoch": 0.46364390378766934, + "grad_norm": 0.8084152844706093, + "learning_rate": 1.1655895889853659e-05, + "loss": 0.4747, + "step": 26832 + }, + { + "epoch": 0.46366118330107825, + "grad_norm": 0.7820937325459862, + "learning_rate": 1.1655343963828573e-05, + "loss": 0.2899, + "step": 26833 + }, + { + "epoch": 0.46367846281448716, + "grad_norm": 0.929273819325117, + "learning_rate": 1.1654792032618824e-05, + "loss": 0.3912, + "step": 26834 + }, + { + "epoch": 0.46369574232789607, + "grad_norm": 1.6327061040243576, + "learning_rate": 1.165424009622614e-05, + "loss": 0.4615, + "step": 26835 + }, + { + "epoch": 0.463713021841305, + "grad_norm": 0.7996998753640661, + "learning_rate": 1.165368815465225e-05, + "loss": 0.3036, + "step": 26836 + }, + { + "epoch": 0.4637303013547138, + "grad_norm": 1.3269710125576721, + "learning_rate": 1.1653136207898886e-05, + "loss": 0.5322, + "step": 26837 + }, + { + "epoch": 0.46374758086812273, + "grad_norm": 0.5388378492711435, + "learning_rate": 1.1652584255967767e-05, + "loss": 0.6245, + "step": 26838 + }, + { + "epoch": 0.46376486038153164, + "grad_norm": 0.7095450886850425, + "learning_rate": 1.1652032298860636e-05, + "loss": 0.3333, + "step": 26839 + }, + { + "epoch": 0.46378213989494055, + "grad_norm": 0.8367694687665319, + "learning_rate": 1.1651480336579211e-05, + "loss": 0.4109, + "step": 26840 + }, + { + "epoch": 0.46379941940834946, + "grad_norm": 1.2016125987151167, + "learning_rate": 1.1650928369125226e-05, + "loss": 0.3987, + "step": 26841 + }, + { + "epoch": 0.46381669892175836, + "grad_norm": 0.7889770338660963, + "learning_rate": 1.1650376396500404e-05, + "loss": 0.3558, + "step": 26842 + }, + { + "epoch": 0.46383397843516727, + "grad_norm": 1.4751576319873114, + "learning_rate": 1.1649824418706479e-05, + "loss": 0.3491, + "step": 26843 + }, + { + "epoch": 0.4638512579485762, + "grad_norm": 0.7061407112558251, + "learning_rate": 1.164927243574518e-05, + "loss": 0.3869, + "step": 26844 + }, + { + "epoch": 0.4638685374619851, + "grad_norm": 1.217120895339851, + "learning_rate": 1.1648720447618234e-05, + "loss": 0.3862, + "step": 26845 + }, + { + "epoch": 0.463885816975394, + "grad_norm": 1.2029394652587226, + "learning_rate": 1.1648168454327369e-05, + "loss": 0.4628, + "step": 26846 + }, + { + "epoch": 0.4639030964888029, + "grad_norm": 1.0924549115470095, + "learning_rate": 1.1647616455874315e-05, + "loss": 0.7808, + "step": 26847 + }, + { + "epoch": 0.46392037600221175, + "grad_norm": 1.2215782906422392, + "learning_rate": 1.1647064452260803e-05, + "loss": 0.4242, + "step": 26848 + }, + { + "epoch": 0.46393765551562066, + "grad_norm": 0.8406421482344073, + "learning_rate": 1.164651244348856e-05, + "loss": 0.4728, + "step": 26849 + }, + { + "epoch": 0.46395493502902957, + "grad_norm": 1.204523020291957, + "learning_rate": 1.1645960429559316e-05, + "loss": 0.4244, + "step": 26850 + }, + { + "epoch": 0.4639722145424385, + "grad_norm": 1.0397655403280572, + "learning_rate": 1.1645408410474794e-05, + "loss": 0.5146, + "step": 26851 + }, + { + "epoch": 0.4639894940558474, + "grad_norm": 1.1406919058663259, + "learning_rate": 1.1644856386236731e-05, + "loss": 0.3591, + "step": 26852 + }, + { + "epoch": 0.4640067735692563, + "grad_norm": 2.3789897786276675, + "learning_rate": 1.1644304356846853e-05, + "loss": 0.4642, + "step": 26853 + }, + { + "epoch": 0.4640240530826652, + "grad_norm": 0.7444315730698299, + "learning_rate": 1.164375232230689e-05, + "loss": 0.4712, + "step": 26854 + }, + { + "epoch": 0.4640413325960741, + "grad_norm": 0.9286550238580357, + "learning_rate": 1.1643200282618566e-05, + "loss": 0.5018, + "step": 26855 + }, + { + "epoch": 0.464058612109483, + "grad_norm": 1.0452411201640657, + "learning_rate": 1.1642648237783616e-05, + "loss": 0.4, + "step": 26856 + }, + { + "epoch": 0.4640758916228919, + "grad_norm": 0.8657439649524099, + "learning_rate": 1.1642096187803768e-05, + "loss": 0.5319, + "step": 26857 + }, + { + "epoch": 0.4640931711363008, + "grad_norm": 0.8461566792640159, + "learning_rate": 1.1641544132680746e-05, + "loss": 0.441, + "step": 26858 + }, + { + "epoch": 0.4641104506497097, + "grad_norm": 0.8278867842773346, + "learning_rate": 1.1640992072416288e-05, + "loss": 0.2988, + "step": 26859 + }, + { + "epoch": 0.4641277301631186, + "grad_norm": 1.06041478567157, + "learning_rate": 1.1640440007012117e-05, + "loss": 0.4008, + "step": 26860 + }, + { + "epoch": 0.4641450096765275, + "grad_norm": 1.1573405943444772, + "learning_rate": 1.1639887936469963e-05, + "loss": 0.5417, + "step": 26861 + }, + { + "epoch": 0.4641622891899364, + "grad_norm": 1.402654759342715, + "learning_rate": 1.1639335860791554e-05, + "loss": 0.5253, + "step": 26862 + }, + { + "epoch": 0.4641795687033453, + "grad_norm": 1.0940176840397777, + "learning_rate": 1.163878377997862e-05, + "loss": 0.4192, + "step": 26863 + }, + { + "epoch": 0.4641968482167542, + "grad_norm": 1.3617745244583994, + "learning_rate": 1.1638231694032891e-05, + "loss": 0.6107, + "step": 26864 + }, + { + "epoch": 0.46421412773016313, + "grad_norm": 1.6372981157083437, + "learning_rate": 1.1637679602956098e-05, + "loss": 0.4157, + "step": 26865 + }, + { + "epoch": 0.46423140724357204, + "grad_norm": 1.0563599268841286, + "learning_rate": 1.1637127506749964e-05, + "loss": 0.4466, + "step": 26866 + }, + { + "epoch": 0.46424868675698094, + "grad_norm": 0.9414862915146452, + "learning_rate": 1.1636575405416226e-05, + "loss": 0.3352, + "step": 26867 + }, + { + "epoch": 0.46426596627038985, + "grad_norm": 0.7027139229579424, + "learning_rate": 1.1636023298956607e-05, + "loss": 0.3114, + "step": 26868 + }, + { + "epoch": 0.4642832457837987, + "grad_norm": 0.6767836470951081, + "learning_rate": 1.163547118737284e-05, + "loss": 0.2673, + "step": 26869 + }, + { + "epoch": 0.4643005252972076, + "grad_norm": 1.161084770769678, + "learning_rate": 1.1634919070666654e-05, + "loss": 0.3083, + "step": 26870 + }, + { + "epoch": 0.4643178048106165, + "grad_norm": 0.9959664301458547, + "learning_rate": 1.1634366948839775e-05, + "loss": 0.4226, + "step": 26871 + }, + { + "epoch": 0.4643350843240254, + "grad_norm": 1.0795539748232788, + "learning_rate": 1.1633814821893936e-05, + "loss": 0.3472, + "step": 26872 + }, + { + "epoch": 0.46435236383743433, + "grad_norm": 0.9829096599209943, + "learning_rate": 1.1633262689830866e-05, + "loss": 0.5646, + "step": 26873 + }, + { + "epoch": 0.46436964335084324, + "grad_norm": 0.8108635954581762, + "learning_rate": 1.1632710552652291e-05, + "loss": 0.4039, + "step": 26874 + }, + { + "epoch": 0.46438692286425215, + "grad_norm": 0.910494725640619, + "learning_rate": 1.1632158410359941e-05, + "loss": 0.4585, + "step": 26875 + }, + { + "epoch": 0.46440420237766106, + "grad_norm": 0.9885763345834022, + "learning_rate": 1.1631606262955551e-05, + "loss": 0.5517, + "step": 26876 + }, + { + "epoch": 0.46442148189106996, + "grad_norm": 1.372666719221957, + "learning_rate": 1.1631054110440845e-05, + "loss": 0.3663, + "step": 26877 + }, + { + "epoch": 0.4644387614044789, + "grad_norm": 1.282941680670728, + "learning_rate": 1.163050195281755e-05, + "loss": 0.4466, + "step": 26878 + }, + { + "epoch": 0.4644560409178877, + "grad_norm": 0.9647788159696421, + "learning_rate": 1.1629949790087403e-05, + "loss": 0.3437, + "step": 26879 + }, + { + "epoch": 0.46447332043129663, + "grad_norm": 1.0707877915356332, + "learning_rate": 1.1629397622252127e-05, + "loss": 0.4656, + "step": 26880 + }, + { + "epoch": 0.46449059994470554, + "grad_norm": 1.1531071350202402, + "learning_rate": 1.1628845449313454e-05, + "loss": 0.4259, + "step": 26881 + }, + { + "epoch": 0.46450787945811445, + "grad_norm": 0.900846182042778, + "learning_rate": 1.1628293271273117e-05, + "loss": 0.6236, + "step": 26882 + }, + { + "epoch": 0.46452515897152336, + "grad_norm": 1.2655325087190221, + "learning_rate": 1.1627741088132836e-05, + "loss": 0.5701, + "step": 26883 + }, + { + "epoch": 0.46454243848493226, + "grad_norm": 0.9090298875866185, + "learning_rate": 1.162718889989435e-05, + "loss": 0.4867, + "step": 26884 + }, + { + "epoch": 0.46455971799834117, + "grad_norm": 0.6489559346902015, + "learning_rate": 1.1626636706559384e-05, + "loss": 0.4535, + "step": 26885 + }, + { + "epoch": 0.4645769975117501, + "grad_norm": 0.8081850013826828, + "learning_rate": 1.1626084508129668e-05, + "loss": 0.2944, + "step": 26886 + }, + { + "epoch": 0.464594277025159, + "grad_norm": 0.8790722028416413, + "learning_rate": 1.162553230460693e-05, + "loss": 0.4326, + "step": 26887 + }, + { + "epoch": 0.4646115565385679, + "grad_norm": 1.0021058461041885, + "learning_rate": 1.1624980095992904e-05, + "loss": 0.4189, + "step": 26888 + }, + { + "epoch": 0.4646288360519768, + "grad_norm": 1.0713903916898, + "learning_rate": 1.1624427882289314e-05, + "loss": 0.5916, + "step": 26889 + }, + { + "epoch": 0.46464611556538565, + "grad_norm": 0.8723329592066361, + "learning_rate": 1.1623875663497895e-05, + "loss": 0.343, + "step": 26890 + }, + { + "epoch": 0.46466339507879456, + "grad_norm": 0.7636265183502359, + "learning_rate": 1.1623323439620373e-05, + "loss": 0.5298, + "step": 26891 + }, + { + "epoch": 0.46468067459220347, + "grad_norm": 0.7796884402421784, + "learning_rate": 1.1622771210658479e-05, + "loss": 0.3078, + "step": 26892 + }, + { + "epoch": 0.4646979541056124, + "grad_norm": 1.1665898533684227, + "learning_rate": 1.1622218976613944e-05, + "loss": 0.5475, + "step": 26893 + }, + { + "epoch": 0.4647152336190213, + "grad_norm": 1.0641059005030382, + "learning_rate": 1.1621666737488493e-05, + "loss": 0.518, + "step": 26894 + }, + { + "epoch": 0.4647325131324302, + "grad_norm": 0.7430393503284108, + "learning_rate": 1.1621114493283858e-05, + "loss": 0.5141, + "step": 26895 + }, + { + "epoch": 0.4647497926458391, + "grad_norm": 0.5802322225430451, + "learning_rate": 1.1620562244001772e-05, + "loss": 0.572, + "step": 26896 + }, + { + "epoch": 0.464767072159248, + "grad_norm": 0.9649999706754329, + "learning_rate": 1.1620009989643961e-05, + "loss": 0.4345, + "step": 26897 + }, + { + "epoch": 0.4647843516726569, + "grad_norm": 0.8621331000450717, + "learning_rate": 1.1619457730212154e-05, + "loss": 0.2904, + "step": 26898 + }, + { + "epoch": 0.4648016311860658, + "grad_norm": 1.4405143230096864, + "learning_rate": 1.1618905465708083e-05, + "loss": 0.4165, + "step": 26899 + }, + { + "epoch": 0.46481891069947473, + "grad_norm": 1.4397083388475485, + "learning_rate": 1.1618353196133478e-05, + "loss": 0.4718, + "step": 26900 + }, + { + "epoch": 0.4648361902128836, + "grad_norm": 1.0582980448527899, + "learning_rate": 1.1617800921490068e-05, + "loss": 0.7168, + "step": 26901 + }, + { + "epoch": 0.4648534697262925, + "grad_norm": 0.39456466535642104, + "learning_rate": 1.1617248641779582e-05, + "loss": 0.4868, + "step": 26902 + }, + { + "epoch": 0.4648707492397014, + "grad_norm": 0.8410170396695411, + "learning_rate": 1.161669635700375e-05, + "loss": 0.328, + "step": 26903 + }, + { + "epoch": 0.4648880287531103, + "grad_norm": 1.4093353580018442, + "learning_rate": 1.1616144067164301e-05, + "loss": 0.3569, + "step": 26904 + }, + { + "epoch": 0.4649053082665192, + "grad_norm": 1.1461513993609478, + "learning_rate": 1.1615591772262969e-05, + "loss": 0.3554, + "step": 26905 + }, + { + "epoch": 0.4649225877799281, + "grad_norm": 0.9504332481340168, + "learning_rate": 1.1615039472301477e-05, + "loss": 0.4788, + "step": 26906 + }, + { + "epoch": 0.46493986729333703, + "grad_norm": 1.013788784963635, + "learning_rate": 1.1614487167281561e-05, + "loss": 0.4973, + "step": 26907 + }, + { + "epoch": 0.46495714680674594, + "grad_norm": 1.375452029329509, + "learning_rate": 1.161393485720495e-05, + "loss": 0.6264, + "step": 26908 + }, + { + "epoch": 0.46497442632015484, + "grad_norm": 1.3020177027345399, + "learning_rate": 1.1613382542073368e-05, + "loss": 0.3801, + "step": 26909 + }, + { + "epoch": 0.46499170583356375, + "grad_norm": 1.488332277876307, + "learning_rate": 1.1612830221888553e-05, + "loss": 0.6322, + "step": 26910 + }, + { + "epoch": 0.4650089853469726, + "grad_norm": 0.7508249718970749, + "learning_rate": 1.1612277896652228e-05, + "loss": 0.5782, + "step": 26911 + }, + { + "epoch": 0.4650262648603815, + "grad_norm": 0.9200664393297814, + "learning_rate": 1.161172556636613e-05, + "loss": 0.3256, + "step": 26912 + }, + { + "epoch": 0.4650435443737904, + "grad_norm": 0.9146666302343633, + "learning_rate": 1.1611173231031985e-05, + "loss": 0.4303, + "step": 26913 + }, + { + "epoch": 0.4650608238871993, + "grad_norm": 0.604908255040807, + "learning_rate": 1.161062089065152e-05, + "loss": 0.2772, + "step": 26914 + }, + { + "epoch": 0.46507810340060823, + "grad_norm": 1.908329469348905, + "learning_rate": 1.161006854522647e-05, + "loss": 0.3317, + "step": 26915 + }, + { + "epoch": 0.46509538291401714, + "grad_norm": 1.2464305841318035, + "learning_rate": 1.1609516194758562e-05, + "loss": 0.3289, + "step": 26916 + }, + { + "epoch": 0.46511266242742605, + "grad_norm": 0.977488838928972, + "learning_rate": 1.1608963839249528e-05, + "loss": 0.3678, + "step": 26917 + }, + { + "epoch": 0.46512994194083496, + "grad_norm": 0.8944780685439656, + "learning_rate": 1.1608411478701094e-05, + "loss": 0.5514, + "step": 26918 + }, + { + "epoch": 0.46514722145424386, + "grad_norm": 0.9539749115905207, + "learning_rate": 1.1607859113114995e-05, + "loss": 0.4741, + "step": 26919 + }, + { + "epoch": 0.46516450096765277, + "grad_norm": 0.9829547633864353, + "learning_rate": 1.1607306742492959e-05, + "loss": 0.6493, + "step": 26920 + }, + { + "epoch": 0.4651817804810617, + "grad_norm": 0.809513031340412, + "learning_rate": 1.1606754366836715e-05, + "loss": 0.3457, + "step": 26921 + }, + { + "epoch": 0.46519905999447053, + "grad_norm": 0.7706867390506649, + "learning_rate": 1.1606201986147997e-05, + "loss": 0.8328, + "step": 26922 + }, + { + "epoch": 0.46521633950787944, + "grad_norm": 1.2971470152038052, + "learning_rate": 1.160564960042853e-05, + "loss": 0.4322, + "step": 26923 + }, + { + "epoch": 0.46523361902128835, + "grad_norm": 0.596607844984062, + "learning_rate": 1.1605097209680047e-05, + "loss": 0.3321, + "step": 26924 + }, + { + "epoch": 0.46525089853469725, + "grad_norm": 0.6175576175929327, + "learning_rate": 1.160454481390428e-05, + "loss": 0.3075, + "step": 26925 + }, + { + "epoch": 0.46526817804810616, + "grad_norm": 1.505689572153735, + "learning_rate": 1.1603992413102953e-05, + "loss": 0.5818, + "step": 26926 + }, + { + "epoch": 0.46528545756151507, + "grad_norm": 0.8157084541282743, + "learning_rate": 1.1603440007277803e-05, + "loss": 0.5049, + "step": 26927 + }, + { + "epoch": 0.465302737074924, + "grad_norm": 0.9873746623402639, + "learning_rate": 1.1602887596430555e-05, + "loss": 0.5234, + "step": 26928 + }, + { + "epoch": 0.4653200165883329, + "grad_norm": 0.9983430304998739, + "learning_rate": 1.1602335180562938e-05, + "loss": 0.5643, + "step": 26929 + }, + { + "epoch": 0.4653372961017418, + "grad_norm": 1.450914735300895, + "learning_rate": 1.160178275967669e-05, + "loss": 0.3644, + "step": 26930 + }, + { + "epoch": 0.4653545756151507, + "grad_norm": 1.2160827658533893, + "learning_rate": 1.1601230333773537e-05, + "loss": 0.5359, + "step": 26931 + }, + { + "epoch": 0.46537185512855955, + "grad_norm": 1.165738500488146, + "learning_rate": 1.1600677902855208e-05, + "loss": 0.3175, + "step": 26932 + }, + { + "epoch": 0.46538913464196846, + "grad_norm": 0.9947188408274327, + "learning_rate": 1.1600125466923435e-05, + "loss": 0.6415, + "step": 26933 + }, + { + "epoch": 0.46540641415537737, + "grad_norm": 0.9956345635947238, + "learning_rate": 1.1599573025979947e-05, + "loss": 0.6066, + "step": 26934 + }, + { + "epoch": 0.4654236936687863, + "grad_norm": 0.7278963419944255, + "learning_rate": 1.1599020580026471e-05, + "loss": 0.4677, + "step": 26935 + }, + { + "epoch": 0.4654409731821952, + "grad_norm": 1.1366962070631494, + "learning_rate": 1.1598468129064746e-05, + "loss": 0.3723, + "step": 26936 + }, + { + "epoch": 0.4654582526956041, + "grad_norm": 1.436356346120973, + "learning_rate": 1.1597915673096494e-05, + "loss": 0.4527, + "step": 26937 + }, + { + "epoch": 0.465475532209013, + "grad_norm": 0.8328216621138729, + "learning_rate": 1.159736321212345e-05, + "loss": 0.443, + "step": 26938 + }, + { + "epoch": 0.4654928117224219, + "grad_norm": 1.0687757445744495, + "learning_rate": 1.1596810746147342e-05, + "loss": 0.4458, + "step": 26939 + }, + { + "epoch": 0.4655100912358308, + "grad_norm": 0.7420866121590435, + "learning_rate": 1.1596258275169903e-05, + "loss": 0.3041, + "step": 26940 + }, + { + "epoch": 0.4655273707492397, + "grad_norm": 1.3865297519585322, + "learning_rate": 1.159570579919286e-05, + "loss": 0.4231, + "step": 26941 + }, + { + "epoch": 0.46554465026264863, + "grad_norm": 0.9510743229026699, + "learning_rate": 1.1595153318217949e-05, + "loss": 0.5793, + "step": 26942 + }, + { + "epoch": 0.4655619297760575, + "grad_norm": 0.8539026334334702, + "learning_rate": 1.1594600832246892e-05, + "loss": 0.3752, + "step": 26943 + }, + { + "epoch": 0.4655792092894664, + "grad_norm": 0.4651683068866058, + "learning_rate": 1.1594048341281427e-05, + "loss": 0.6937, + "step": 26944 + }, + { + "epoch": 0.4655964888028753, + "grad_norm": 0.8610271622100559, + "learning_rate": 1.1593495845323282e-05, + "loss": 0.3447, + "step": 26945 + }, + { + "epoch": 0.4656137683162842, + "grad_norm": 0.9558685258488674, + "learning_rate": 1.1592943344374184e-05, + "loss": 0.3723, + "step": 26946 + }, + { + "epoch": 0.4656310478296931, + "grad_norm": 1.3275457074241035, + "learning_rate": 1.1592390838435868e-05, + "loss": 0.5449, + "step": 26947 + }, + { + "epoch": 0.465648327343102, + "grad_norm": 1.2284382122808015, + "learning_rate": 1.1591838327510062e-05, + "loss": 0.4658, + "step": 26948 + }, + { + "epoch": 0.4656656068565109, + "grad_norm": 1.1220789983580568, + "learning_rate": 1.1591285811598495e-05, + "loss": 0.3691, + "step": 26949 + }, + { + "epoch": 0.46568288636991984, + "grad_norm": 1.7364273673374775, + "learning_rate": 1.1590733290702904e-05, + "loss": 0.5546, + "step": 26950 + }, + { + "epoch": 0.46570016588332874, + "grad_norm": 1.3121819177107683, + "learning_rate": 1.1590180764825015e-05, + "loss": 0.4661, + "step": 26951 + }, + { + "epoch": 0.46571744539673765, + "grad_norm": 0.7795678822775861, + "learning_rate": 1.158962823396656e-05, + "loss": 0.4479, + "step": 26952 + }, + { + "epoch": 0.4657347249101465, + "grad_norm": 1.5379587736630773, + "learning_rate": 1.1589075698129267e-05, + "loss": 0.5168, + "step": 26953 + }, + { + "epoch": 0.4657520044235554, + "grad_norm": 0.6324384084863782, + "learning_rate": 1.1588523157314868e-05, + "loss": 0.3227, + "step": 26954 + }, + { + "epoch": 0.4657692839369643, + "grad_norm": 0.8306051798576161, + "learning_rate": 1.1587970611525092e-05, + "loss": 0.4426, + "step": 26955 + }, + { + "epoch": 0.4657865634503732, + "grad_norm": 0.8213869399048206, + "learning_rate": 1.1587418060761672e-05, + "loss": 0.5807, + "step": 26956 + }, + { + "epoch": 0.46580384296378213, + "grad_norm": 0.7295968760757635, + "learning_rate": 1.158686550502634e-05, + "loss": 0.6304, + "step": 26957 + }, + { + "epoch": 0.46582112247719104, + "grad_norm": 1.0779640160207007, + "learning_rate": 1.1586312944320823e-05, + "loss": 0.3999, + "step": 26958 + }, + { + "epoch": 0.46583840199059995, + "grad_norm": 1.0039237822801175, + "learning_rate": 1.1585760378646856e-05, + "loss": 0.2767, + "step": 26959 + }, + { + "epoch": 0.46585568150400886, + "grad_norm": 0.8830287609258018, + "learning_rate": 1.1585207808006165e-05, + "loss": 0.352, + "step": 26960 + }, + { + "epoch": 0.46587296101741776, + "grad_norm": 1.4802440516708737, + "learning_rate": 1.1584655232400478e-05, + "loss": 0.271, + "step": 26961 + }, + { + "epoch": 0.46589024053082667, + "grad_norm": 1.4886754746210142, + "learning_rate": 1.1584102651831537e-05, + "loss": 0.4875, + "step": 26962 + }, + { + "epoch": 0.4659075200442356, + "grad_norm": 1.4432888817824594, + "learning_rate": 1.1583550066301065e-05, + "loss": 0.5424, + "step": 26963 + }, + { + "epoch": 0.46592479955764443, + "grad_norm": 1.248680820323453, + "learning_rate": 1.1582997475810793e-05, + "loss": 0.4973, + "step": 26964 + }, + { + "epoch": 0.46594207907105334, + "grad_norm": 0.6032891486515513, + "learning_rate": 1.1582444880362453e-05, + "loss": 0.5887, + "step": 26965 + }, + { + "epoch": 0.46595935858446225, + "grad_norm": 1.0798278911077988, + "learning_rate": 1.1581892279957774e-05, + "loss": 0.5854, + "step": 26966 + }, + { + "epoch": 0.46597663809787115, + "grad_norm": 1.2224735252977552, + "learning_rate": 1.1581339674598492e-05, + "loss": 0.4468, + "step": 26967 + }, + { + "epoch": 0.46599391761128006, + "grad_norm": 1.1498430999517735, + "learning_rate": 1.158078706428633e-05, + "loss": 0.5488, + "step": 26968 + }, + { + "epoch": 0.46601119712468897, + "grad_norm": 1.3531130600044132, + "learning_rate": 1.1580234449023022e-05, + "loss": 0.3403, + "step": 26969 + }, + { + "epoch": 0.4660284766380979, + "grad_norm": 0.9189475273234666, + "learning_rate": 1.1579681828810302e-05, + "loss": 0.3734, + "step": 26970 + }, + { + "epoch": 0.4660457561515068, + "grad_norm": 0.6034871524317341, + "learning_rate": 1.1579129203649899e-05, + "loss": 0.4757, + "step": 26971 + }, + { + "epoch": 0.4660630356649157, + "grad_norm": 0.4471224474519245, + "learning_rate": 1.1578576573543541e-05, + "loss": 0.6162, + "step": 26972 + }, + { + "epoch": 0.4660803151783246, + "grad_norm": 0.6511493970893892, + "learning_rate": 1.1578023938492964e-05, + "loss": 0.2936, + "step": 26973 + }, + { + "epoch": 0.4660975946917335, + "grad_norm": 0.6593820587613511, + "learning_rate": 1.1577471298499896e-05, + "loss": 0.3672, + "step": 26974 + }, + { + "epoch": 0.46611487420514236, + "grad_norm": 1.0840239052297884, + "learning_rate": 1.1576918653566065e-05, + "loss": 0.3896, + "step": 26975 + }, + { + "epoch": 0.46613215371855127, + "grad_norm": 1.100502459736239, + "learning_rate": 1.1576366003693206e-05, + "loss": 0.656, + "step": 26976 + }, + { + "epoch": 0.4661494332319602, + "grad_norm": 0.6904707299868614, + "learning_rate": 1.1575813348883051e-05, + "loss": 0.3242, + "step": 26977 + }, + { + "epoch": 0.4661667127453691, + "grad_norm": 1.1959809961985828, + "learning_rate": 1.1575260689137323e-05, + "loss": 0.442, + "step": 26978 + }, + { + "epoch": 0.466183992258778, + "grad_norm": 1.5518032201905236, + "learning_rate": 1.1574708024457766e-05, + "loss": 0.5851, + "step": 26979 + }, + { + "epoch": 0.4662012717721869, + "grad_norm": 1.139497994550156, + "learning_rate": 1.1574155354846102e-05, + "loss": 0.3693, + "step": 26980 + }, + { + "epoch": 0.4662185512855958, + "grad_norm": 0.8220153450885088, + "learning_rate": 1.1573602680304062e-05, + "loss": 0.6025, + "step": 26981 + }, + { + "epoch": 0.4662358307990047, + "grad_norm": 1.5853210143047651, + "learning_rate": 1.157305000083338e-05, + "loss": 0.3686, + "step": 26982 + }, + { + "epoch": 0.4662531103124136, + "grad_norm": 0.8653793880339846, + "learning_rate": 1.1572497316435785e-05, + "loss": 0.6156, + "step": 26983 + }, + { + "epoch": 0.46627038982582253, + "grad_norm": 1.3325296460758653, + "learning_rate": 1.157194462711301e-05, + "loss": 0.5293, + "step": 26984 + }, + { + "epoch": 0.4662876693392314, + "grad_norm": 0.8696611352029957, + "learning_rate": 1.1571391932866783e-05, + "loss": 0.295, + "step": 26985 + }, + { + "epoch": 0.4663049488526403, + "grad_norm": 0.9085061000854591, + "learning_rate": 1.1570839233698836e-05, + "loss": 0.4615, + "step": 26986 + }, + { + "epoch": 0.4663222283660492, + "grad_norm": 1.5755657686030757, + "learning_rate": 1.1570286529610906e-05, + "loss": 0.4852, + "step": 26987 + }, + { + "epoch": 0.4663395078794581, + "grad_norm": 0.836577096345374, + "learning_rate": 1.1569733820604715e-05, + "loss": 0.4326, + "step": 26988 + }, + { + "epoch": 0.466356787392867, + "grad_norm": 0.5576485364281879, + "learning_rate": 1.1569181106682e-05, + "loss": 0.8429, + "step": 26989 + }, + { + "epoch": 0.4663740669062759, + "grad_norm": 0.9337116472183714, + "learning_rate": 1.1568628387844489e-05, + "loss": 0.4801, + "step": 26990 + }, + { + "epoch": 0.4663913464196848, + "grad_norm": 1.7608552667010549, + "learning_rate": 1.1568075664093919e-05, + "loss": 0.6695, + "step": 26991 + }, + { + "epoch": 0.46640862593309373, + "grad_norm": 1.8534690204693565, + "learning_rate": 1.1567522935432012e-05, + "loss": 0.5061, + "step": 26992 + }, + { + "epoch": 0.46642590544650264, + "grad_norm": 0.7609061642343856, + "learning_rate": 1.1566970201860505e-05, + "loss": 0.3218, + "step": 26993 + }, + { + "epoch": 0.46644318495991155, + "grad_norm": 1.2004776966793644, + "learning_rate": 1.1566417463381132e-05, + "loss": 0.4106, + "step": 26994 + }, + { + "epoch": 0.46646046447332046, + "grad_norm": 1.1187390812455091, + "learning_rate": 1.1565864719995617e-05, + "loss": 0.4683, + "step": 26995 + }, + { + "epoch": 0.4664777439867293, + "grad_norm": 0.9185096451828658, + "learning_rate": 1.1565311971705697e-05, + "loss": 0.533, + "step": 26996 + }, + { + "epoch": 0.4664950235001382, + "grad_norm": 1.159878195705123, + "learning_rate": 1.15647592185131e-05, + "loss": 0.5003, + "step": 26997 + }, + { + "epoch": 0.4665123030135471, + "grad_norm": 1.2442273622893487, + "learning_rate": 1.1564206460419556e-05, + "loss": 0.5211, + "step": 26998 + }, + { + "epoch": 0.46652958252695603, + "grad_norm": 0.696033048758318, + "learning_rate": 1.15636536974268e-05, + "loss": 0.5881, + "step": 26999 + }, + { + "epoch": 0.46654686204036494, + "grad_norm": 0.844363837981624, + "learning_rate": 1.1563100929536561e-05, + "loss": 0.4251, + "step": 27000 + }, + { + "epoch": 0.46656414155377385, + "grad_norm": 0.8262876689350104, + "learning_rate": 1.1562548156750573e-05, + "loss": 0.4059, + "step": 27001 + }, + { + "epoch": 0.46658142106718276, + "grad_norm": 0.5231129663769185, + "learning_rate": 1.1561995379070563e-05, + "loss": 0.5187, + "step": 27002 + }, + { + "epoch": 0.46659870058059166, + "grad_norm": 0.9607799067839775, + "learning_rate": 1.1561442596498266e-05, + "loss": 0.4493, + "step": 27003 + }, + { + "epoch": 0.46661598009400057, + "grad_norm": 1.5361726771631061, + "learning_rate": 1.1560889809035413e-05, + "loss": 0.368, + "step": 27004 + }, + { + "epoch": 0.4666332596074095, + "grad_norm": 1.519106033425197, + "learning_rate": 1.1560337016683737e-05, + "loss": 0.3671, + "step": 27005 + }, + { + "epoch": 0.46665053912081833, + "grad_norm": 0.8209596474288344, + "learning_rate": 1.1559784219444961e-05, + "loss": 0.4742, + "step": 27006 + }, + { + "epoch": 0.46666781863422724, + "grad_norm": 0.41368825369290946, + "learning_rate": 1.1559231417320827e-05, + "loss": 0.6024, + "step": 27007 + }, + { + "epoch": 0.46668509814763615, + "grad_norm": 0.9243010465227667, + "learning_rate": 1.155867861031306e-05, + "loss": 0.4412, + "step": 27008 + }, + { + "epoch": 0.46670237766104505, + "grad_norm": 0.8281518307987018, + "learning_rate": 1.155812579842339e-05, + "loss": 0.4074, + "step": 27009 + }, + { + "epoch": 0.46671965717445396, + "grad_norm": 0.9876677179547491, + "learning_rate": 1.1557572981653555e-05, + "loss": 0.5447, + "step": 27010 + }, + { + "epoch": 0.46673693668786287, + "grad_norm": 2.6373034598249596, + "learning_rate": 1.1557020160005284e-05, + "loss": 0.4271, + "step": 27011 + }, + { + "epoch": 0.4667542162012718, + "grad_norm": 0.751108556297736, + "learning_rate": 1.1556467333480303e-05, + "loss": 0.3655, + "step": 27012 + }, + { + "epoch": 0.4667714957146807, + "grad_norm": 0.980207739925325, + "learning_rate": 1.1555914502080352e-05, + "loss": 0.449, + "step": 27013 + }, + { + "epoch": 0.4667887752280896, + "grad_norm": 0.9796669582057157, + "learning_rate": 1.155536166580716e-05, + "loss": 0.3547, + "step": 27014 + }, + { + "epoch": 0.4668060547414985, + "grad_norm": 0.7635171876055133, + "learning_rate": 1.1554808824662453e-05, + "loss": 0.658, + "step": 27015 + }, + { + "epoch": 0.4668233342549074, + "grad_norm": 0.8680243539249383, + "learning_rate": 1.1554255978647968e-05, + "loss": 0.4239, + "step": 27016 + }, + { + "epoch": 0.46684061376831626, + "grad_norm": 0.6180435033909277, + "learning_rate": 1.1553703127765436e-05, + "loss": 0.6785, + "step": 27017 + }, + { + "epoch": 0.46685789328172517, + "grad_norm": 0.8750438630181157, + "learning_rate": 1.1553150272016587e-05, + "loss": 0.3918, + "step": 27018 + }, + { + "epoch": 0.4668751727951341, + "grad_norm": 1.8429460361681433, + "learning_rate": 1.1552597411403152e-05, + "loss": 0.2899, + "step": 27019 + }, + { + "epoch": 0.466892452308543, + "grad_norm": 1.0914072785507185, + "learning_rate": 1.1552044545926866e-05, + "loss": 0.4244, + "step": 27020 + }, + { + "epoch": 0.4669097318219519, + "grad_norm": 0.8690600789660137, + "learning_rate": 1.1551491675589457e-05, + "loss": 0.4286, + "step": 27021 + }, + { + "epoch": 0.4669270113353608, + "grad_norm": 0.8734216729420178, + "learning_rate": 1.1550938800392659e-05, + "loss": 0.3074, + "step": 27022 + }, + { + "epoch": 0.4669442908487697, + "grad_norm": 1.1103117327424847, + "learning_rate": 1.1550385920338203e-05, + "loss": 0.369, + "step": 27023 + }, + { + "epoch": 0.4669615703621786, + "grad_norm": 0.8889714701314868, + "learning_rate": 1.154983303542782e-05, + "loss": 0.3723, + "step": 27024 + }, + { + "epoch": 0.4669788498755875, + "grad_norm": 1.212964067886694, + "learning_rate": 1.1549280145663245e-05, + "loss": 0.3178, + "step": 27025 + }, + { + "epoch": 0.46699612938899643, + "grad_norm": 0.9863502275174781, + "learning_rate": 1.15487272510462e-05, + "loss": 0.4548, + "step": 27026 + }, + { + "epoch": 0.46701340890240534, + "grad_norm": 0.6992474426365537, + "learning_rate": 1.1548174351578429e-05, + "loss": 0.3584, + "step": 27027 + }, + { + "epoch": 0.4670306884158142, + "grad_norm": 1.0379301828666403, + "learning_rate": 1.1547621447261658e-05, + "loss": 0.3417, + "step": 27028 + }, + { + "epoch": 0.4670479679292231, + "grad_norm": 1.1658380112783995, + "learning_rate": 1.1547068538097616e-05, + "loss": 0.4001, + "step": 27029 + }, + { + "epoch": 0.467065247442632, + "grad_norm": 0.7201986898581142, + "learning_rate": 1.1546515624088041e-05, + "loss": 0.5293, + "step": 27030 + }, + { + "epoch": 0.4670825269560409, + "grad_norm": 0.8462305697574142, + "learning_rate": 1.1545962705234662e-05, + "loss": 0.262, + "step": 27031 + }, + { + "epoch": 0.4670998064694498, + "grad_norm": 0.884675230213046, + "learning_rate": 1.1545409781539207e-05, + "loss": 0.3718, + "step": 27032 + }, + { + "epoch": 0.4671170859828587, + "grad_norm": 1.0945003610473394, + "learning_rate": 1.1544856853003413e-05, + "loss": 0.3934, + "step": 27033 + }, + { + "epoch": 0.46713436549626763, + "grad_norm": 0.7876856150198851, + "learning_rate": 1.154430391962901e-05, + "loss": 0.3505, + "step": 27034 + }, + { + "epoch": 0.46715164500967654, + "grad_norm": 0.9156092780821355, + "learning_rate": 1.1543750981417732e-05, + "loss": 0.405, + "step": 27035 + }, + { + "epoch": 0.46716892452308545, + "grad_norm": 0.934313645237875, + "learning_rate": 1.1543198038371307e-05, + "loss": 0.4506, + "step": 27036 + }, + { + "epoch": 0.46718620403649436, + "grad_norm": 0.6102646680275352, + "learning_rate": 1.1542645090491467e-05, + "loss": 0.2886, + "step": 27037 + }, + { + "epoch": 0.4672034835499032, + "grad_norm": 1.193954238621492, + "learning_rate": 1.1542092137779943e-05, + "loss": 0.551, + "step": 27038 + }, + { + "epoch": 0.4672207630633121, + "grad_norm": 0.5388198447805181, + "learning_rate": 1.1541539180238475e-05, + "loss": 0.7454, + "step": 27039 + }, + { + "epoch": 0.467238042576721, + "grad_norm": 1.0032063463729275, + "learning_rate": 1.1540986217868785e-05, + "loss": 0.5697, + "step": 27040 + }, + { + "epoch": 0.46725532209012993, + "grad_norm": 1.94431706028885, + "learning_rate": 1.1540433250672608e-05, + "loss": 0.53, + "step": 27041 + }, + { + "epoch": 0.46727260160353884, + "grad_norm": 1.0522970538252465, + "learning_rate": 1.1539880278651681e-05, + "loss": 0.3803, + "step": 27042 + }, + { + "epoch": 0.46728988111694775, + "grad_norm": 1.1667277873050022, + "learning_rate": 1.153932730180773e-05, + "loss": 0.4018, + "step": 27043 + }, + { + "epoch": 0.46730716063035665, + "grad_norm": 0.9298738593340806, + "learning_rate": 1.153877432014249e-05, + "loss": 0.49, + "step": 27044 + }, + { + "epoch": 0.46732444014376556, + "grad_norm": 0.747356763441753, + "learning_rate": 1.1538221333657692e-05, + "loss": 0.3596, + "step": 27045 + }, + { + "epoch": 0.46734171965717447, + "grad_norm": 0.8011803730737819, + "learning_rate": 1.1537668342355066e-05, + "loss": 0.3419, + "step": 27046 + }, + { + "epoch": 0.4673589991705834, + "grad_norm": 1.070920978081762, + "learning_rate": 1.1537115346236348e-05, + "loss": 0.3263, + "step": 27047 + }, + { + "epoch": 0.4673762786839923, + "grad_norm": 0.8804027939455196, + "learning_rate": 1.1536562345303266e-05, + "loss": 0.3264, + "step": 27048 + }, + { + "epoch": 0.46739355819740114, + "grad_norm": 1.044564391065046, + "learning_rate": 1.1536009339557554e-05, + "loss": 0.4557, + "step": 27049 + }, + { + "epoch": 0.46741083771081005, + "grad_norm": 1.5687216907741843, + "learning_rate": 1.1535456329000946e-05, + "loss": 0.4768, + "step": 27050 + }, + { + "epoch": 0.46742811722421895, + "grad_norm": 1.750738716381595, + "learning_rate": 1.153490331363517e-05, + "loss": 0.4141, + "step": 27051 + }, + { + "epoch": 0.46744539673762786, + "grad_norm": 1.0558267109714976, + "learning_rate": 1.153435029346196e-05, + "loss": 0.6787, + "step": 27052 + }, + { + "epoch": 0.46746267625103677, + "grad_norm": 1.182076691259401, + "learning_rate": 1.1533797268483049e-05, + "loss": 0.4787, + "step": 27053 + }, + { + "epoch": 0.4674799557644457, + "grad_norm": 0.6433913716828958, + "learning_rate": 1.153324423870017e-05, + "loss": 0.4261, + "step": 27054 + }, + { + "epoch": 0.4674972352778546, + "grad_norm": 0.8982008391509049, + "learning_rate": 1.1532691204115051e-05, + "loss": 0.5118, + "step": 27055 + }, + { + "epoch": 0.4675145147912635, + "grad_norm": 1.2363178707247864, + "learning_rate": 1.153213816472943e-05, + "loss": 0.491, + "step": 27056 + }, + { + "epoch": 0.4675317943046724, + "grad_norm": 0.9697438815515771, + "learning_rate": 1.1531585120545035e-05, + "loss": 0.2842, + "step": 27057 + }, + { + "epoch": 0.4675490738180813, + "grad_norm": 1.2376054774686207, + "learning_rate": 1.1531032071563596e-05, + "loss": 0.3749, + "step": 27058 + }, + { + "epoch": 0.46756635333149016, + "grad_norm": 1.2294001325257613, + "learning_rate": 1.1530479017786852e-05, + "loss": 0.4683, + "step": 27059 + }, + { + "epoch": 0.46758363284489907, + "grad_norm": 1.699730221043932, + "learning_rate": 1.1529925959216528e-05, + "loss": 0.4412, + "step": 27060 + }, + { + "epoch": 0.467600912358308, + "grad_norm": 1.2515003243929, + "learning_rate": 1.1529372895854358e-05, + "loss": 0.4534, + "step": 27061 + }, + { + "epoch": 0.4676181918717169, + "grad_norm": 1.1403901810001729, + "learning_rate": 1.152881982770208e-05, + "loss": 0.7453, + "step": 27062 + }, + { + "epoch": 0.4676354713851258, + "grad_norm": 1.4215167590045552, + "learning_rate": 1.1528266754761421e-05, + "loss": 0.5567, + "step": 27063 + }, + { + "epoch": 0.4676527508985347, + "grad_norm": 1.3211405067539768, + "learning_rate": 1.1527713677034114e-05, + "loss": 0.4229, + "step": 27064 + }, + { + "epoch": 0.4676700304119436, + "grad_norm": 0.5094293640091005, + "learning_rate": 1.1527160594521891e-05, + "loss": 0.8173, + "step": 27065 + }, + { + "epoch": 0.4676873099253525, + "grad_norm": 0.7697863805827472, + "learning_rate": 1.1526607507226488e-05, + "loss": 0.384, + "step": 27066 + }, + { + "epoch": 0.4677045894387614, + "grad_norm": 1.1890225244511246, + "learning_rate": 1.1526054415149632e-05, + "loss": 0.512, + "step": 27067 + }, + { + "epoch": 0.4677218689521703, + "grad_norm": 0.9153809952285784, + "learning_rate": 1.1525501318293058e-05, + "loss": 0.4457, + "step": 27068 + }, + { + "epoch": 0.46773914846557924, + "grad_norm": 0.8830973580679835, + "learning_rate": 1.1524948216658495e-05, + "loss": 0.4715, + "step": 27069 + }, + { + "epoch": 0.4677564279789881, + "grad_norm": 0.5586083646564206, + "learning_rate": 1.1524395110247683e-05, + "loss": 0.9401, + "step": 27070 + }, + { + "epoch": 0.467773707492397, + "grad_norm": 0.9635057143391259, + "learning_rate": 1.1523841999062348e-05, + "loss": 0.3613, + "step": 27071 + }, + { + "epoch": 0.4677909870058059, + "grad_norm": 0.8397015098658774, + "learning_rate": 1.1523288883104222e-05, + "loss": 0.5187, + "step": 27072 + }, + { + "epoch": 0.4678082665192148, + "grad_norm": 0.7890269835710908, + "learning_rate": 1.1522735762375041e-05, + "loss": 0.6252, + "step": 27073 + }, + { + "epoch": 0.4678255460326237, + "grad_norm": 0.8934110762701035, + "learning_rate": 1.1522182636876538e-05, + "loss": 0.4743, + "step": 27074 + }, + { + "epoch": 0.4678428255460326, + "grad_norm": 0.7810651643001175, + "learning_rate": 1.1521629506610439e-05, + "loss": 0.6747, + "step": 27075 + }, + { + "epoch": 0.46786010505944153, + "grad_norm": 0.9540997834355283, + "learning_rate": 1.1521076371578485e-05, + "loss": 0.4914, + "step": 27076 + }, + { + "epoch": 0.46787738457285044, + "grad_norm": 0.8489547694591201, + "learning_rate": 1.1520523231782403e-05, + "loss": 0.4008, + "step": 27077 + }, + { + "epoch": 0.46789466408625935, + "grad_norm": 1.0135740272776783, + "learning_rate": 1.1519970087223927e-05, + "loss": 0.4703, + "step": 27078 + }, + { + "epoch": 0.46791194359966826, + "grad_norm": 1.1872958545956245, + "learning_rate": 1.1519416937904789e-05, + "loss": 0.2852, + "step": 27079 + }, + { + "epoch": 0.4679292231130771, + "grad_norm": 0.9174546331588594, + "learning_rate": 1.151886378382672e-05, + "loss": 0.6883, + "step": 27080 + }, + { + "epoch": 0.467946502626486, + "grad_norm": 0.6853946623138797, + "learning_rate": 1.1518310624991453e-05, + "loss": 0.3633, + "step": 27081 + }, + { + "epoch": 0.4679637821398949, + "grad_norm": 1.4414002767120018, + "learning_rate": 1.1517757461400724e-05, + "loss": 0.4084, + "step": 27082 + }, + { + "epoch": 0.46798106165330383, + "grad_norm": 1.5970022600794374, + "learning_rate": 1.1517204293056264e-05, + "loss": 0.3718, + "step": 27083 + }, + { + "epoch": 0.46799834116671274, + "grad_norm": 0.7711740226631729, + "learning_rate": 1.1516651119959803e-05, + "loss": 0.4709, + "step": 27084 + }, + { + "epoch": 0.46801562068012165, + "grad_norm": 1.002165479540276, + "learning_rate": 1.1516097942113076e-05, + "loss": 0.4474, + "step": 27085 + }, + { + "epoch": 0.46803290019353055, + "grad_norm": 1.3700072911078913, + "learning_rate": 1.1515544759517815e-05, + "loss": 0.3472, + "step": 27086 + }, + { + "epoch": 0.46805017970693946, + "grad_norm": 1.3593321986695457, + "learning_rate": 1.1514991572175756e-05, + "loss": 0.4343, + "step": 27087 + }, + { + "epoch": 0.46806745922034837, + "grad_norm": 0.9001719712815787, + "learning_rate": 1.1514438380088623e-05, + "loss": 0.484, + "step": 27088 + }, + { + "epoch": 0.4680847387337573, + "grad_norm": 1.598403626043378, + "learning_rate": 1.1513885183258155e-05, + "loss": 0.6918, + "step": 27089 + }, + { + "epoch": 0.4681020182471662, + "grad_norm": 1.1142903956713566, + "learning_rate": 1.1513331981686087e-05, + "loss": 0.459, + "step": 27090 + }, + { + "epoch": 0.46811929776057504, + "grad_norm": 0.6370017523840903, + "learning_rate": 1.1512778775374145e-05, + "loss": 0.3271, + "step": 27091 + }, + { + "epoch": 0.46813657727398394, + "grad_norm": 0.740489499554877, + "learning_rate": 1.1512225564324064e-05, + "loss": 0.2999, + "step": 27092 + }, + { + "epoch": 0.46815385678739285, + "grad_norm": 1.368615061131967, + "learning_rate": 1.1511672348537582e-05, + "loss": 0.4178, + "step": 27093 + }, + { + "epoch": 0.46817113630080176, + "grad_norm": 1.0561713357134141, + "learning_rate": 1.1511119128016424e-05, + "loss": 0.3753, + "step": 27094 + }, + { + "epoch": 0.46818841581421067, + "grad_norm": 1.36954675080853, + "learning_rate": 1.1510565902762325e-05, + "loss": 0.3927, + "step": 27095 + }, + { + "epoch": 0.4682056953276196, + "grad_norm": 1.2329507030791824, + "learning_rate": 1.1510012672777022e-05, + "loss": 0.4618, + "step": 27096 + }, + { + "epoch": 0.4682229748410285, + "grad_norm": 1.0319004401915226, + "learning_rate": 1.1509459438062245e-05, + "loss": 0.3732, + "step": 27097 + }, + { + "epoch": 0.4682402543544374, + "grad_norm": 0.7104064732906231, + "learning_rate": 1.1508906198619723e-05, + "loss": 0.3586, + "step": 27098 + }, + { + "epoch": 0.4682575338678463, + "grad_norm": 0.9101858572875322, + "learning_rate": 1.1508352954451194e-05, + "loss": 0.4854, + "step": 27099 + }, + { + "epoch": 0.4682748133812552, + "grad_norm": 1.0453380751465011, + "learning_rate": 1.150779970555839e-05, + "loss": 0.4748, + "step": 27100 + }, + { + "epoch": 0.4682920928946641, + "grad_norm": 1.3713966271688633, + "learning_rate": 1.150724645194304e-05, + "loss": 0.4482, + "step": 27101 + }, + { + "epoch": 0.46830937240807297, + "grad_norm": 0.7944523738263966, + "learning_rate": 1.150669319360688e-05, + "loss": 0.451, + "step": 27102 + }, + { + "epoch": 0.4683266519214819, + "grad_norm": 1.095962503049382, + "learning_rate": 1.1506139930551646e-05, + "loss": 0.4374, + "step": 27103 + }, + { + "epoch": 0.4683439314348908, + "grad_norm": 1.2062863601427594, + "learning_rate": 1.150558666277906e-05, + "loss": 0.485, + "step": 27104 + }, + { + "epoch": 0.4683612109482997, + "grad_norm": 1.3930577926626158, + "learning_rate": 1.1505033390290867e-05, + "loss": 0.5623, + "step": 27105 + }, + { + "epoch": 0.4683784904617086, + "grad_norm": 1.1580411473461032, + "learning_rate": 1.1504480113088797e-05, + "loss": 0.632, + "step": 27106 + }, + { + "epoch": 0.4683957699751175, + "grad_norm": 1.5687624698631968, + "learning_rate": 1.1503926831174579e-05, + "loss": 0.4824, + "step": 27107 + }, + { + "epoch": 0.4684130494885264, + "grad_norm": 1.2274440881360582, + "learning_rate": 1.1503373544549947e-05, + "loss": 0.5246, + "step": 27108 + }, + { + "epoch": 0.4684303290019353, + "grad_norm": 0.8244120562626926, + "learning_rate": 1.1502820253216636e-05, + "loss": 0.3455, + "step": 27109 + }, + { + "epoch": 0.4684476085153442, + "grad_norm": 1.283539089034802, + "learning_rate": 1.1502266957176375e-05, + "loss": 0.5646, + "step": 27110 + }, + { + "epoch": 0.46846488802875313, + "grad_norm": 1.4146868833049853, + "learning_rate": 1.1501713656430904e-05, + "loss": 0.3588, + "step": 27111 + }, + { + "epoch": 0.468482167542162, + "grad_norm": 1.2865753795935244, + "learning_rate": 1.1501160350981949e-05, + "loss": 0.4396, + "step": 27112 + }, + { + "epoch": 0.4684994470555709, + "grad_norm": 0.9976458773785405, + "learning_rate": 1.1500607040831246e-05, + "loss": 0.4457, + "step": 27113 + }, + { + "epoch": 0.4685167265689798, + "grad_norm": 0.7781691231682154, + "learning_rate": 1.1500053725980526e-05, + "loss": 0.442, + "step": 27114 + }, + { + "epoch": 0.4685340060823887, + "grad_norm": 0.9885174737270583, + "learning_rate": 1.1499500406431525e-05, + "loss": 0.4552, + "step": 27115 + }, + { + "epoch": 0.4685512855957976, + "grad_norm": 0.8415349230874728, + "learning_rate": 1.1498947082185977e-05, + "loss": 0.5056, + "step": 27116 + }, + { + "epoch": 0.4685685651092065, + "grad_norm": 1.0052414560438212, + "learning_rate": 1.1498393753245613e-05, + "loss": 0.372, + "step": 27117 + }, + { + "epoch": 0.46858584462261543, + "grad_norm": 1.0086349585301566, + "learning_rate": 1.1497840419612163e-05, + "loss": 0.5089, + "step": 27118 + }, + { + "epoch": 0.46860312413602434, + "grad_norm": 0.9259976616747235, + "learning_rate": 1.1497287081287365e-05, + "loss": 0.5069, + "step": 27119 + }, + { + "epoch": 0.46862040364943325, + "grad_norm": 0.8025986175368934, + "learning_rate": 1.149673373827295e-05, + "loss": 0.3242, + "step": 27120 + }, + { + "epoch": 0.46863768316284216, + "grad_norm": 0.7403961433253613, + "learning_rate": 1.1496180390570648e-05, + "loss": 0.3849, + "step": 27121 + }, + { + "epoch": 0.46865496267625106, + "grad_norm": 1.040701681048229, + "learning_rate": 1.1495627038182198e-05, + "loss": 0.5619, + "step": 27122 + }, + { + "epoch": 0.4686722421896599, + "grad_norm": 0.5102729610039923, + "learning_rate": 1.1495073681109332e-05, + "loss": 0.5857, + "step": 27123 + }, + { + "epoch": 0.4686895217030688, + "grad_norm": 1.0599034906811722, + "learning_rate": 1.1494520319353777e-05, + "loss": 0.7952, + "step": 27124 + }, + { + "epoch": 0.46870680121647773, + "grad_norm": 0.8868738680968185, + "learning_rate": 1.1493966952917276e-05, + "loss": 0.4668, + "step": 27125 + }, + { + "epoch": 0.46872408072988664, + "grad_norm": 0.6801933278665742, + "learning_rate": 1.1493413581801554e-05, + "loss": 0.3715, + "step": 27126 + }, + { + "epoch": 0.46874136024329555, + "grad_norm": 0.8358619516647892, + "learning_rate": 1.1492860206008347e-05, + "loss": 0.2647, + "step": 27127 + }, + { + "epoch": 0.46875863975670445, + "grad_norm": 0.6433330701285453, + "learning_rate": 1.149230682553939e-05, + "loss": 0.3011, + "step": 27128 + }, + { + "epoch": 0.46877591927011336, + "grad_norm": 0.8756401562377563, + "learning_rate": 1.1491753440396411e-05, + "loss": 0.4378, + "step": 27129 + }, + { + "epoch": 0.46879319878352227, + "grad_norm": 1.1194373543009004, + "learning_rate": 1.1491200050581151e-05, + "loss": 0.3905, + "step": 27130 + }, + { + "epoch": 0.4688104782969312, + "grad_norm": 1.1960872276972847, + "learning_rate": 1.1490646656095338e-05, + "loss": 0.4093, + "step": 27131 + }, + { + "epoch": 0.4688277578103401, + "grad_norm": 0.9597025226031094, + "learning_rate": 1.1490093256940705e-05, + "loss": 0.5833, + "step": 27132 + }, + { + "epoch": 0.46884503732374894, + "grad_norm": 1.0550843017097677, + "learning_rate": 1.1489539853118986e-05, + "loss": 0.3568, + "step": 27133 + }, + { + "epoch": 0.46886231683715784, + "grad_norm": 1.1333668452182268, + "learning_rate": 1.148898644463192e-05, + "loss": 0.4469, + "step": 27134 + }, + { + "epoch": 0.46887959635056675, + "grad_norm": 1.524702881036207, + "learning_rate": 1.148843303148123e-05, + "loss": 0.5278, + "step": 27135 + }, + { + "epoch": 0.46889687586397566, + "grad_norm": 0.9508469958774134, + "learning_rate": 1.1487879613668658e-05, + "loss": 0.3487, + "step": 27136 + }, + { + "epoch": 0.46891415537738457, + "grad_norm": 0.8075089719573754, + "learning_rate": 1.1487326191195931e-05, + "loss": 0.45, + "step": 27137 + }, + { + "epoch": 0.4689314348907935, + "grad_norm": 0.9602860636866837, + "learning_rate": 1.148677276406479e-05, + "loss": 0.3042, + "step": 27138 + }, + { + "epoch": 0.4689487144042024, + "grad_norm": 1.2739154303928018, + "learning_rate": 1.148621933227696e-05, + "loss": 0.5331, + "step": 27139 + }, + { + "epoch": 0.4689659939176113, + "grad_norm": 0.9279159463860216, + "learning_rate": 1.148566589583418e-05, + "loss": 0.316, + "step": 27140 + }, + { + "epoch": 0.4689832734310202, + "grad_norm": 1.3564432713987795, + "learning_rate": 1.1485112454738177e-05, + "loss": 0.5192, + "step": 27141 + }, + { + "epoch": 0.4690005529444291, + "grad_norm": 1.1258502542164208, + "learning_rate": 1.1484559008990692e-05, + "loss": 0.4519, + "step": 27142 + }, + { + "epoch": 0.469017832457838, + "grad_norm": 0.7688490694904464, + "learning_rate": 1.1484005558593456e-05, + "loss": 0.3796, + "step": 27143 + }, + { + "epoch": 0.46903511197124687, + "grad_norm": 1.1433576725570624, + "learning_rate": 1.14834521035482e-05, + "loss": 0.409, + "step": 27144 + }, + { + "epoch": 0.4690523914846558, + "grad_norm": 1.3586028553111869, + "learning_rate": 1.1482898643856661e-05, + "loss": 0.3754, + "step": 27145 + }, + { + "epoch": 0.4690696709980647, + "grad_norm": 1.2978250590101332, + "learning_rate": 1.1482345179520571e-05, + "loss": 0.3683, + "step": 27146 + }, + { + "epoch": 0.4690869505114736, + "grad_norm": 0.8399027703616042, + "learning_rate": 1.1481791710541661e-05, + "loss": 0.494, + "step": 27147 + }, + { + "epoch": 0.4691042300248825, + "grad_norm": 0.7286343385204356, + "learning_rate": 1.1481238236921673e-05, + "loss": 0.6256, + "step": 27148 + }, + { + "epoch": 0.4691215095382914, + "grad_norm": 0.7878762042662047, + "learning_rate": 1.1480684758662326e-05, + "loss": 0.5414, + "step": 27149 + }, + { + "epoch": 0.4691387890517003, + "grad_norm": 0.8128413891603654, + "learning_rate": 1.1480131275765368e-05, + "loss": 0.3821, + "step": 27150 + }, + { + "epoch": 0.4691560685651092, + "grad_norm": 1.004016295113316, + "learning_rate": 1.1479577788232523e-05, + "loss": 0.3937, + "step": 27151 + }, + { + "epoch": 0.4691733480785181, + "grad_norm": 0.8522412062585145, + "learning_rate": 1.1479024296065527e-05, + "loss": 0.3986, + "step": 27152 + }, + { + "epoch": 0.46919062759192703, + "grad_norm": 1.5571526513848182, + "learning_rate": 1.1478470799266117e-05, + "loss": 0.613, + "step": 27153 + }, + { + "epoch": 0.4692079071053359, + "grad_norm": 1.0731498969644595, + "learning_rate": 1.1477917297836023e-05, + "loss": 0.6281, + "step": 27154 + }, + { + "epoch": 0.4692251866187448, + "grad_norm": 1.1331533722930227, + "learning_rate": 1.1477363791776978e-05, + "loss": 0.4546, + "step": 27155 + }, + { + "epoch": 0.4692424661321537, + "grad_norm": 0.48677055015265736, + "learning_rate": 1.147681028109072e-05, + "loss": 0.5767, + "step": 27156 + }, + { + "epoch": 0.4692597456455626, + "grad_norm": 0.8224257531748147, + "learning_rate": 1.1476256765778981e-05, + "loss": 0.2405, + "step": 27157 + }, + { + "epoch": 0.4692770251589715, + "grad_norm": 0.8663951342644423, + "learning_rate": 1.1475703245843492e-05, + "loss": 0.4279, + "step": 27158 + }, + { + "epoch": 0.4692943046723804, + "grad_norm": 0.7059517616259462, + "learning_rate": 1.147514972128599e-05, + "loss": 0.3207, + "step": 27159 + }, + { + "epoch": 0.46931158418578933, + "grad_norm": 1.6362141040897293, + "learning_rate": 1.1474596192108205e-05, + "loss": 0.5352, + "step": 27160 + }, + { + "epoch": 0.46932886369919824, + "grad_norm": 1.0522926635109968, + "learning_rate": 1.1474042658311871e-05, + "loss": 0.4802, + "step": 27161 + }, + { + "epoch": 0.46934614321260715, + "grad_norm": 0.9875396220150707, + "learning_rate": 1.1473489119898726e-05, + "loss": 0.3494, + "step": 27162 + }, + { + "epoch": 0.46936342272601606, + "grad_norm": 0.884564096871951, + "learning_rate": 1.14729355768705e-05, + "loss": 0.5035, + "step": 27163 + }, + { + "epoch": 0.46938070223942496, + "grad_norm": 1.0588911309583244, + "learning_rate": 1.1472382029228926e-05, + "loss": 0.5261, + "step": 27164 + }, + { + "epoch": 0.4693979817528338, + "grad_norm": 1.8904777820616914, + "learning_rate": 1.1471828476975744e-05, + "loss": 0.6306, + "step": 27165 + }, + { + "epoch": 0.4694152612662427, + "grad_norm": 0.8749577431584422, + "learning_rate": 1.147127492011268e-05, + "loss": 0.2804, + "step": 27166 + }, + { + "epoch": 0.46943254077965163, + "grad_norm": 0.9892253325203767, + "learning_rate": 1.1470721358641472e-05, + "loss": 0.4098, + "step": 27167 + }, + { + "epoch": 0.46944982029306054, + "grad_norm": 1.0406807136795102, + "learning_rate": 1.1470167792563854e-05, + "loss": 0.4938, + "step": 27168 + }, + { + "epoch": 0.46946709980646945, + "grad_norm": 1.087234350530454, + "learning_rate": 1.1469614221881557e-05, + "loss": 0.5633, + "step": 27169 + }, + { + "epoch": 0.46948437931987835, + "grad_norm": 0.7967215181462861, + "learning_rate": 1.146906064659632e-05, + "loss": 0.4132, + "step": 27170 + }, + { + "epoch": 0.46950165883328726, + "grad_norm": 0.9019428419073886, + "learning_rate": 1.1468507066709868e-05, + "loss": 0.3371, + "step": 27171 + }, + { + "epoch": 0.46951893834669617, + "grad_norm": 0.996330552231497, + "learning_rate": 1.1467953482223943e-05, + "loss": 0.2958, + "step": 27172 + }, + { + "epoch": 0.4695362178601051, + "grad_norm": 0.9902605807321648, + "learning_rate": 1.1467399893140276e-05, + "loss": 0.3981, + "step": 27173 + }, + { + "epoch": 0.469553497373514, + "grad_norm": 0.8652442177989959, + "learning_rate": 1.1466846299460603e-05, + "loss": 0.3571, + "step": 27174 + }, + { + "epoch": 0.4695707768869229, + "grad_norm": 0.658599816355164, + "learning_rate": 1.1466292701186653e-05, + "loss": 0.548, + "step": 27175 + }, + { + "epoch": 0.46958805640033174, + "grad_norm": 0.9090952810882077, + "learning_rate": 1.1465739098320164e-05, + "loss": 0.5531, + "step": 27176 + }, + { + "epoch": 0.46960533591374065, + "grad_norm": 0.8610281508321651, + "learning_rate": 1.146518549086287e-05, + "loss": 0.4482, + "step": 27177 + }, + { + "epoch": 0.46962261542714956, + "grad_norm": 0.8917965166045964, + "learning_rate": 1.1464631878816502e-05, + "loss": 0.4622, + "step": 27178 + }, + { + "epoch": 0.46963989494055847, + "grad_norm": 1.3149101273267325, + "learning_rate": 1.1464078262182797e-05, + "loss": 0.5384, + "step": 27179 + }, + { + "epoch": 0.4696571744539674, + "grad_norm": 0.7975333042458194, + "learning_rate": 1.1463524640963489e-05, + "loss": 0.3414, + "step": 27180 + }, + { + "epoch": 0.4696744539673763, + "grad_norm": 1.1366367978284844, + "learning_rate": 1.1462971015160305e-05, + "loss": 0.4962, + "step": 27181 + }, + { + "epoch": 0.4696917334807852, + "grad_norm": 1.3088171239978743, + "learning_rate": 1.146241738477499e-05, + "loss": 0.4117, + "step": 27182 + }, + { + "epoch": 0.4697090129941941, + "grad_norm": 0.6518295680947174, + "learning_rate": 1.146186374980927e-05, + "loss": 0.5814, + "step": 27183 + }, + { + "epoch": 0.469726292507603, + "grad_norm": 0.7724882282387857, + "learning_rate": 1.1461310110264881e-05, + "loss": 0.322, + "step": 27184 + }, + { + "epoch": 0.4697435720210119, + "grad_norm": 0.9377794060664466, + "learning_rate": 1.146075646614356e-05, + "loss": 0.4889, + "step": 27185 + }, + { + "epoch": 0.46976085153442076, + "grad_norm": 0.9683686962332753, + "learning_rate": 1.1460202817447036e-05, + "loss": 0.4199, + "step": 27186 + }, + { + "epoch": 0.4697781310478297, + "grad_norm": 0.436162384436071, + "learning_rate": 1.1459649164177047e-05, + "loss": 0.5943, + "step": 27187 + }, + { + "epoch": 0.4697954105612386, + "grad_norm": 1.111110598136668, + "learning_rate": 1.1459095506335326e-05, + "loss": 0.4809, + "step": 27188 + }, + { + "epoch": 0.4698126900746475, + "grad_norm": 0.8122848636702359, + "learning_rate": 1.1458541843923607e-05, + "loss": 0.3884, + "step": 27189 + }, + { + "epoch": 0.4698299695880564, + "grad_norm": 1.233636124170127, + "learning_rate": 1.1457988176943626e-05, + "loss": 0.303, + "step": 27190 + }, + { + "epoch": 0.4698472491014653, + "grad_norm": 1.3764084167799464, + "learning_rate": 1.1457434505397114e-05, + "loss": 0.6045, + "step": 27191 + }, + { + "epoch": 0.4698645286148742, + "grad_norm": 0.671450130286628, + "learning_rate": 1.1456880829285803e-05, + "loss": 0.3744, + "step": 27192 + }, + { + "epoch": 0.4698818081282831, + "grad_norm": 0.7184383735329773, + "learning_rate": 1.1456327148611436e-05, + "loss": 0.3102, + "step": 27193 + }, + { + "epoch": 0.469899087641692, + "grad_norm": 0.8640356533320599, + "learning_rate": 1.145577346337574e-05, + "loss": 0.4879, + "step": 27194 + }, + { + "epoch": 0.46991636715510093, + "grad_norm": 0.9684804868377943, + "learning_rate": 1.1455219773580448e-05, + "loss": 0.5744, + "step": 27195 + }, + { + "epoch": 0.46993364666850984, + "grad_norm": 0.9896775288167938, + "learning_rate": 1.1454666079227299e-05, + "loss": 0.2246, + "step": 27196 + }, + { + "epoch": 0.4699509261819187, + "grad_norm": 0.7579461667845073, + "learning_rate": 1.1454112380318026e-05, + "loss": 0.532, + "step": 27197 + }, + { + "epoch": 0.4699682056953276, + "grad_norm": 0.4785655103652952, + "learning_rate": 1.145355867685436e-05, + "loss": 0.7561, + "step": 27198 + }, + { + "epoch": 0.4699854852087365, + "grad_norm": 1.7521193093506955, + "learning_rate": 1.1453004968838041e-05, + "loss": 0.349, + "step": 27199 + }, + { + "epoch": 0.4700027647221454, + "grad_norm": 1.04414418161191, + "learning_rate": 1.1452451256270801e-05, + "loss": 0.2327, + "step": 27200 + }, + { + "epoch": 0.4700200442355543, + "grad_norm": 0.8807910145583556, + "learning_rate": 1.145189753915437e-05, + "loss": 0.5382, + "step": 27201 + }, + { + "epoch": 0.47003732374896323, + "grad_norm": 1.00181056252424, + "learning_rate": 1.1451343817490485e-05, + "loss": 0.703, + "step": 27202 + }, + { + "epoch": 0.47005460326237214, + "grad_norm": 0.8709404444365489, + "learning_rate": 1.1450790091280885e-05, + "loss": 0.5328, + "step": 27203 + }, + { + "epoch": 0.47007188277578105, + "grad_norm": 0.7673651484025449, + "learning_rate": 1.1450236360527296e-05, + "loss": 0.3771, + "step": 27204 + }, + { + "epoch": 0.47008916228918995, + "grad_norm": 1.0768927840830602, + "learning_rate": 1.1449682625231457e-05, + "loss": 0.3259, + "step": 27205 + }, + { + "epoch": 0.47010644180259886, + "grad_norm": 1.3164166057076836, + "learning_rate": 1.1449128885395105e-05, + "loss": 0.3731, + "step": 27206 + }, + { + "epoch": 0.4701237213160077, + "grad_norm": 0.721416910198087, + "learning_rate": 1.1448575141019967e-05, + "loss": 0.5521, + "step": 27207 + }, + { + "epoch": 0.4701410008294166, + "grad_norm": 1.2788939787817077, + "learning_rate": 1.1448021392107784e-05, + "loss": 0.4642, + "step": 27208 + }, + { + "epoch": 0.47015828034282553, + "grad_norm": 1.8744438869313051, + "learning_rate": 1.1447467638660287e-05, + "loss": 0.4365, + "step": 27209 + }, + { + "epoch": 0.47017555985623444, + "grad_norm": 1.1730105163618851, + "learning_rate": 1.1446913880679213e-05, + "loss": 0.475, + "step": 27210 + }, + { + "epoch": 0.47019283936964335, + "grad_norm": 1.2105850549000468, + "learning_rate": 1.1446360118166295e-05, + "loss": 0.4758, + "step": 27211 + }, + { + "epoch": 0.47021011888305225, + "grad_norm": 1.1247449191001069, + "learning_rate": 1.1445806351123266e-05, + "loss": 0.6299, + "step": 27212 + }, + { + "epoch": 0.47022739839646116, + "grad_norm": 1.039911123006244, + "learning_rate": 1.1445252579551863e-05, + "loss": 0.4876, + "step": 27213 + }, + { + "epoch": 0.47024467790987007, + "grad_norm": 0.7506495013100923, + "learning_rate": 1.1444698803453817e-05, + "loss": 0.3421, + "step": 27214 + }, + { + "epoch": 0.470261957423279, + "grad_norm": 0.8420427785146612, + "learning_rate": 1.1444145022830864e-05, + "loss": 0.4119, + "step": 27215 + }, + { + "epoch": 0.4702792369366879, + "grad_norm": 0.8916348502328391, + "learning_rate": 1.1443591237684738e-05, + "loss": 0.2479, + "step": 27216 + }, + { + "epoch": 0.4702965164500968, + "grad_norm": 0.8734400673859791, + "learning_rate": 1.144303744801718e-05, + "loss": 0.4513, + "step": 27217 + }, + { + "epoch": 0.47031379596350564, + "grad_norm": 0.9964212518077947, + "learning_rate": 1.1442483653829915e-05, + "loss": 0.3133, + "step": 27218 + }, + { + "epoch": 0.47033107547691455, + "grad_norm": 0.659484695154948, + "learning_rate": 1.144192985512468e-05, + "loss": 0.3255, + "step": 27219 + }, + { + "epoch": 0.47034835499032346, + "grad_norm": 1.2244332214130116, + "learning_rate": 1.1441376051903217e-05, + "loss": 0.615, + "step": 27220 + }, + { + "epoch": 0.47036563450373237, + "grad_norm": 0.5276184031720841, + "learning_rate": 1.1440822244167251e-05, + "loss": 0.5029, + "step": 27221 + }, + { + "epoch": 0.4703829140171413, + "grad_norm": 0.8501341278487113, + "learning_rate": 1.1440268431918519e-05, + "loss": 0.4322, + "step": 27222 + }, + { + "epoch": 0.4704001935305502, + "grad_norm": 0.6282913409078696, + "learning_rate": 1.1439714615158762e-05, + "loss": 0.4535, + "step": 27223 + }, + { + "epoch": 0.4704174730439591, + "grad_norm": 1.214712553805654, + "learning_rate": 1.1439160793889703e-05, + "loss": 0.5138, + "step": 27224 + }, + { + "epoch": 0.470434752557368, + "grad_norm": 0.9654950151697221, + "learning_rate": 1.1438606968113086e-05, + "loss": 0.3068, + "step": 27225 + }, + { + "epoch": 0.4704520320707769, + "grad_norm": 1.1372548300023206, + "learning_rate": 1.1438053137830642e-05, + "loss": 0.5421, + "step": 27226 + }, + { + "epoch": 0.4704693115841858, + "grad_norm": 1.2969271611693007, + "learning_rate": 1.1437499303044107e-05, + "loss": 0.4598, + "step": 27227 + }, + { + "epoch": 0.47048659109759466, + "grad_norm": 0.830064861151279, + "learning_rate": 1.1436945463755213e-05, + "loss": 0.296, + "step": 27228 + }, + { + "epoch": 0.47050387061100357, + "grad_norm": 0.4395963978174699, + "learning_rate": 1.14363916199657e-05, + "loss": 0.7415, + "step": 27229 + }, + { + "epoch": 0.4705211501244125, + "grad_norm": 1.7145535009876154, + "learning_rate": 1.1435837771677299e-05, + "loss": 0.506, + "step": 27230 + }, + { + "epoch": 0.4705384296378214, + "grad_norm": 0.5418289931853677, + "learning_rate": 1.1435283918891742e-05, + "loss": 0.5885, + "step": 27231 + }, + { + "epoch": 0.4705557091512303, + "grad_norm": 1.1909620696007002, + "learning_rate": 1.1434730061610769e-05, + "loss": 0.6178, + "step": 27232 + }, + { + "epoch": 0.4705729886646392, + "grad_norm": 0.6524727367399207, + "learning_rate": 1.1434176199836112e-05, + "loss": 0.3668, + "step": 27233 + }, + { + "epoch": 0.4705902681780481, + "grad_norm": 1.0174639555494165, + "learning_rate": 1.1433622333569505e-05, + "loss": 0.5065, + "step": 27234 + }, + { + "epoch": 0.470607547691457, + "grad_norm": 1.3405533362320887, + "learning_rate": 1.1433068462812682e-05, + "loss": 0.3605, + "step": 27235 + }, + { + "epoch": 0.4706248272048659, + "grad_norm": 1.1085624283525575, + "learning_rate": 1.1432514587567383e-05, + "loss": 0.3899, + "step": 27236 + }, + { + "epoch": 0.47064210671827483, + "grad_norm": 0.9102654571265321, + "learning_rate": 1.143196070783534e-05, + "loss": 0.3587, + "step": 27237 + }, + { + "epoch": 0.47065938623168374, + "grad_norm": 0.9082807811718825, + "learning_rate": 1.1431406823618283e-05, + "loss": 0.3842, + "step": 27238 + }, + { + "epoch": 0.4706766657450926, + "grad_norm": 0.946313816554833, + "learning_rate": 1.1430852934917955e-05, + "loss": 0.3803, + "step": 27239 + }, + { + "epoch": 0.4706939452585015, + "grad_norm": 0.4275502105704407, + "learning_rate": 1.1430299041736087e-05, + "loss": 0.6602, + "step": 27240 + }, + { + "epoch": 0.4707112247719104, + "grad_norm": 1.06058194722339, + "learning_rate": 1.1429745144074414e-05, + "loss": 0.5725, + "step": 27241 + }, + { + "epoch": 0.4707285042853193, + "grad_norm": 1.0031273088962438, + "learning_rate": 1.1429191241934667e-05, + "loss": 0.5393, + "step": 27242 + }, + { + "epoch": 0.4707457837987282, + "grad_norm": 0.6692022928390062, + "learning_rate": 1.142863733531859e-05, + "loss": 0.4455, + "step": 27243 + }, + { + "epoch": 0.47076306331213713, + "grad_norm": 0.9916730523071703, + "learning_rate": 1.1428083424227905e-05, + "loss": 0.3507, + "step": 27244 + }, + { + "epoch": 0.47078034282554604, + "grad_norm": 0.9430841784383145, + "learning_rate": 1.1427529508664359e-05, + "loss": 0.4696, + "step": 27245 + }, + { + "epoch": 0.47079762233895495, + "grad_norm": 0.9407337948222404, + "learning_rate": 1.1426975588629681e-05, + "loss": 0.51, + "step": 27246 + }, + { + "epoch": 0.47081490185236385, + "grad_norm": 0.4905785364550386, + "learning_rate": 1.1426421664125607e-05, + "loss": 0.6004, + "step": 27247 + }, + { + "epoch": 0.47083218136577276, + "grad_norm": 0.7698800063674762, + "learning_rate": 1.1425867735153873e-05, + "loss": 0.4196, + "step": 27248 + }, + { + "epoch": 0.47084946087918167, + "grad_norm": 1.060444471861189, + "learning_rate": 1.1425313801716211e-05, + "loss": 0.3584, + "step": 27249 + }, + { + "epoch": 0.4708667403925905, + "grad_norm": 0.8770525476372883, + "learning_rate": 1.1424759863814356e-05, + "loss": 0.3658, + "step": 27250 + }, + { + "epoch": 0.47088401990599943, + "grad_norm": 0.5335855819640652, + "learning_rate": 1.1424205921450052e-05, + "loss": 0.6935, + "step": 27251 + }, + { + "epoch": 0.47090129941940834, + "grad_norm": 0.9836595061104579, + "learning_rate": 1.1423651974625021e-05, + "loss": 0.3955, + "step": 27252 + }, + { + "epoch": 0.47091857893281724, + "grad_norm": 1.4370236126849236, + "learning_rate": 1.1423098023341006e-05, + "loss": 0.4733, + "step": 27253 + }, + { + "epoch": 0.47093585844622615, + "grad_norm": 0.9126618983758731, + "learning_rate": 1.142254406759974e-05, + "loss": 0.5074, + "step": 27254 + }, + { + "epoch": 0.47095313795963506, + "grad_norm": 1.0483412526455922, + "learning_rate": 1.1421990107402954e-05, + "loss": 0.546, + "step": 27255 + }, + { + "epoch": 0.47097041747304397, + "grad_norm": 0.7238332764538713, + "learning_rate": 1.1421436142752391e-05, + "loss": 0.5061, + "step": 27256 + }, + { + "epoch": 0.4709876969864529, + "grad_norm": 0.662887951916921, + "learning_rate": 1.142088217364978e-05, + "loss": 0.3379, + "step": 27257 + }, + { + "epoch": 0.4710049764998618, + "grad_norm": 0.8660867398547596, + "learning_rate": 1.142032820009686e-05, + "loss": 0.4791, + "step": 27258 + }, + { + "epoch": 0.4710222560132707, + "grad_norm": 0.5105691918853557, + "learning_rate": 1.1419774222095362e-05, + "loss": 0.7503, + "step": 27259 + }, + { + "epoch": 0.47103953552667954, + "grad_norm": 1.4624394711673583, + "learning_rate": 1.1419220239647024e-05, + "loss": 0.3935, + "step": 27260 + }, + { + "epoch": 0.47105681504008845, + "grad_norm": 0.6558351297277552, + "learning_rate": 1.1418666252753583e-05, + "loss": 0.3957, + "step": 27261 + }, + { + "epoch": 0.47107409455349736, + "grad_norm": 0.8154044445487911, + "learning_rate": 1.141811226141677e-05, + "loss": 0.4194, + "step": 27262 + }, + { + "epoch": 0.47109137406690627, + "grad_norm": 0.889903023571397, + "learning_rate": 1.141755826563832e-05, + "loss": 0.5023, + "step": 27263 + }, + { + "epoch": 0.4711086535803152, + "grad_norm": 1.078013306074249, + "learning_rate": 1.1417004265419968e-05, + "loss": 0.3963, + "step": 27264 + }, + { + "epoch": 0.4711259330937241, + "grad_norm": 1.0896650580173837, + "learning_rate": 1.1416450260763455e-05, + "loss": 0.6396, + "step": 27265 + }, + { + "epoch": 0.471143212607133, + "grad_norm": 1.1917523859171588, + "learning_rate": 1.141589625167051e-05, + "loss": 0.573, + "step": 27266 + }, + { + "epoch": 0.4711604921205419, + "grad_norm": 0.9147155145178331, + "learning_rate": 1.141534223814287e-05, + "loss": 0.4489, + "step": 27267 + }, + { + "epoch": 0.4711777716339508, + "grad_norm": 1.0917420692362172, + "learning_rate": 1.1414788220182271e-05, + "loss": 0.4657, + "step": 27268 + }, + { + "epoch": 0.4711950511473597, + "grad_norm": 0.7485001177925077, + "learning_rate": 1.141423419779045e-05, + "loss": 0.4003, + "step": 27269 + }, + { + "epoch": 0.4712123306607686, + "grad_norm": 0.8618336172089733, + "learning_rate": 1.1413680170969137e-05, + "loss": 0.4964, + "step": 27270 + }, + { + "epoch": 0.47122961017417747, + "grad_norm": 0.8376524763115695, + "learning_rate": 1.1413126139720072e-05, + "loss": 0.3741, + "step": 27271 + }, + { + "epoch": 0.4712468896875864, + "grad_norm": 0.8773446244523764, + "learning_rate": 1.1412572104044987e-05, + "loss": 0.416, + "step": 27272 + }, + { + "epoch": 0.4712641692009953, + "grad_norm": 0.939899971648442, + "learning_rate": 1.1412018063945622e-05, + "loss": 0.4581, + "step": 27273 + }, + { + "epoch": 0.4712814487144042, + "grad_norm": 0.9352052849494703, + "learning_rate": 1.1411464019423708e-05, + "loss": 0.3675, + "step": 27274 + }, + { + "epoch": 0.4712987282278131, + "grad_norm": 0.6055827203997735, + "learning_rate": 1.1410909970480978e-05, + "loss": 0.358, + "step": 27275 + }, + { + "epoch": 0.471316007741222, + "grad_norm": 0.9867847437932399, + "learning_rate": 1.1410355917119174e-05, + "loss": 0.433, + "step": 27276 + }, + { + "epoch": 0.4713332872546309, + "grad_norm": 1.0093131568146814, + "learning_rate": 1.140980185934003e-05, + "loss": 0.5207, + "step": 27277 + }, + { + "epoch": 0.4713505667680398, + "grad_norm": 1.1316522705715812, + "learning_rate": 1.1409247797145273e-05, + "loss": 0.4735, + "step": 27278 + }, + { + "epoch": 0.47136784628144873, + "grad_norm": 0.5877019158647593, + "learning_rate": 1.140869373053665e-05, + "loss": 0.3735, + "step": 27279 + }, + { + "epoch": 0.47138512579485764, + "grad_norm": 0.8035676584728197, + "learning_rate": 1.1408139659515889e-05, + "loss": 0.3697, + "step": 27280 + }, + { + "epoch": 0.4714024053082665, + "grad_norm": 0.4313118614141977, + "learning_rate": 1.1407585584084726e-05, + "loss": 0.7414, + "step": 27281 + }, + { + "epoch": 0.4714196848216754, + "grad_norm": 1.040811061240214, + "learning_rate": 1.1407031504244903e-05, + "loss": 0.3493, + "step": 27282 + }, + { + "epoch": 0.4714369643350843, + "grad_norm": 1.1710227582136237, + "learning_rate": 1.140647741999815e-05, + "loss": 0.3897, + "step": 27283 + }, + { + "epoch": 0.4714542438484932, + "grad_norm": 0.9628556660743205, + "learning_rate": 1.1405923331346199e-05, + "loss": 0.4689, + "step": 27284 + }, + { + "epoch": 0.4714715233619021, + "grad_norm": 0.8510610645807554, + "learning_rate": 1.1405369238290791e-05, + "loss": 0.356, + "step": 27285 + }, + { + "epoch": 0.47148880287531103, + "grad_norm": 1.4376793133633448, + "learning_rate": 1.140481514083366e-05, + "loss": 0.4402, + "step": 27286 + }, + { + "epoch": 0.47150608238871994, + "grad_norm": 0.8123396207375015, + "learning_rate": 1.1404261038976538e-05, + "loss": 0.347, + "step": 27287 + }, + { + "epoch": 0.47152336190212885, + "grad_norm": 0.9341779166743952, + "learning_rate": 1.1403706932721169e-05, + "loss": 0.3252, + "step": 27288 + }, + { + "epoch": 0.47154064141553775, + "grad_norm": 0.7973558311700341, + "learning_rate": 1.1403152822069279e-05, + "loss": 0.2778, + "step": 27289 + }, + { + "epoch": 0.47155792092894666, + "grad_norm": 1.0757395215422079, + "learning_rate": 1.1402598707022607e-05, + "loss": 0.383, + "step": 27290 + }, + { + "epoch": 0.47157520044235557, + "grad_norm": 0.8198389562580779, + "learning_rate": 1.1402044587582894e-05, + "loss": 0.2928, + "step": 27291 + }, + { + "epoch": 0.4715924799557644, + "grad_norm": 0.7956613147299182, + "learning_rate": 1.1401490463751867e-05, + "loss": 0.5079, + "step": 27292 + }, + { + "epoch": 0.47160975946917333, + "grad_norm": 1.0095277993831941, + "learning_rate": 1.1400936335531268e-05, + "loss": 0.4848, + "step": 27293 + }, + { + "epoch": 0.47162703898258224, + "grad_norm": 0.7824011942683469, + "learning_rate": 1.140038220292283e-05, + "loss": 0.5182, + "step": 27294 + }, + { + "epoch": 0.47164431849599114, + "grad_norm": 0.7184066256267718, + "learning_rate": 1.1399828065928285e-05, + "loss": 0.449, + "step": 27295 + }, + { + "epoch": 0.47166159800940005, + "grad_norm": 0.4688254480762713, + "learning_rate": 1.1399273924549375e-05, + "loss": 0.6626, + "step": 27296 + }, + { + "epoch": 0.47167887752280896, + "grad_norm": 1.1828432903351103, + "learning_rate": 1.1398719778787833e-05, + "loss": 0.321, + "step": 27297 + }, + { + "epoch": 0.47169615703621787, + "grad_norm": 0.6504479790549911, + "learning_rate": 1.1398165628645393e-05, + "loss": 0.5141, + "step": 27298 + }, + { + "epoch": 0.4717134365496268, + "grad_norm": 1.3142330465221674, + "learning_rate": 1.1397611474123791e-05, + "loss": 0.406, + "step": 27299 + }, + { + "epoch": 0.4717307160630357, + "grad_norm": 0.8264028892750749, + "learning_rate": 1.1397057315224767e-05, + "loss": 0.7121, + "step": 27300 + }, + { + "epoch": 0.4717479955764446, + "grad_norm": 0.8086128911403443, + "learning_rate": 1.1396503151950048e-05, + "loss": 0.3856, + "step": 27301 + }, + { + "epoch": 0.4717652750898535, + "grad_norm": 0.8324799075421145, + "learning_rate": 1.1395948984301379e-05, + "loss": 0.4909, + "step": 27302 + }, + { + "epoch": 0.47178255460326235, + "grad_norm": 0.5438202567335759, + "learning_rate": 1.1395394812280496e-05, + "loss": 0.6132, + "step": 27303 + }, + { + "epoch": 0.47179983411667126, + "grad_norm": 1.4115496872532853, + "learning_rate": 1.1394840635889122e-05, + "loss": 0.4177, + "step": 27304 + }, + { + "epoch": 0.47181711363008016, + "grad_norm": 1.5643911461749496, + "learning_rate": 1.1394286455129007e-05, + "loss": 0.4441, + "step": 27305 + }, + { + "epoch": 0.4718343931434891, + "grad_norm": 0.5338368096937997, + "learning_rate": 1.1393732270001879e-05, + "loss": 0.2405, + "step": 27306 + }, + { + "epoch": 0.471851672656898, + "grad_norm": 0.916572238471315, + "learning_rate": 1.1393178080509473e-05, + "loss": 0.4156, + "step": 27307 + }, + { + "epoch": 0.4718689521703069, + "grad_norm": 0.9931506055838717, + "learning_rate": 1.139262388665353e-05, + "loss": 0.5325, + "step": 27308 + }, + { + "epoch": 0.4718862316837158, + "grad_norm": 0.8544921915677359, + "learning_rate": 1.1392069688435783e-05, + "loss": 0.5438, + "step": 27309 + }, + { + "epoch": 0.4719035111971247, + "grad_norm": 1.4053951506611104, + "learning_rate": 1.1391515485857965e-05, + "loss": 0.4252, + "step": 27310 + }, + { + "epoch": 0.4719207907105336, + "grad_norm": 0.9383021470535665, + "learning_rate": 1.1390961278921817e-05, + "loss": 0.5726, + "step": 27311 + }, + { + "epoch": 0.4719380702239425, + "grad_norm": 0.656516879997807, + "learning_rate": 1.1390407067629075e-05, + "loss": 0.2942, + "step": 27312 + }, + { + "epoch": 0.47195534973735137, + "grad_norm": 0.9045418018702327, + "learning_rate": 1.1389852851981469e-05, + "loss": 0.3089, + "step": 27313 + }, + { + "epoch": 0.4719726292507603, + "grad_norm": 0.6973388960632296, + "learning_rate": 1.138929863198074e-05, + "loss": 0.4339, + "step": 27314 + }, + { + "epoch": 0.4719899087641692, + "grad_norm": 1.8963152055396717, + "learning_rate": 1.1388744407628621e-05, + "loss": 0.3563, + "step": 27315 + }, + { + "epoch": 0.4720071882775781, + "grad_norm": 1.0091255578655185, + "learning_rate": 1.138819017892685e-05, + "loss": 0.3307, + "step": 27316 + }, + { + "epoch": 0.472024467790987, + "grad_norm": 0.9103700903202951, + "learning_rate": 1.1387635945877162e-05, + "loss": 0.519, + "step": 27317 + }, + { + "epoch": 0.4720417473043959, + "grad_norm": 1.5338296488244199, + "learning_rate": 1.138708170848129e-05, + "loss": 0.3099, + "step": 27318 + }, + { + "epoch": 0.4720590268178048, + "grad_norm": 0.9676326969171094, + "learning_rate": 1.1386527466740976e-05, + "loss": 0.3586, + "step": 27319 + }, + { + "epoch": 0.4720763063312137, + "grad_norm": 1.1263006639003192, + "learning_rate": 1.1385973220657953e-05, + "loss": 0.462, + "step": 27320 + }, + { + "epoch": 0.47209358584462263, + "grad_norm": 0.8932848885204354, + "learning_rate": 1.1385418970233952e-05, + "loss": 0.4245, + "step": 27321 + }, + { + "epoch": 0.47211086535803154, + "grad_norm": 1.8007206561842404, + "learning_rate": 1.1384864715470717e-05, + "loss": 0.3791, + "step": 27322 + }, + { + "epoch": 0.47212814487144045, + "grad_norm": 0.4945926748817959, + "learning_rate": 1.1384310456369983e-05, + "loss": 0.7244, + "step": 27323 + }, + { + "epoch": 0.4721454243848493, + "grad_norm": 0.8016129067011377, + "learning_rate": 1.1383756192933479e-05, + "loss": 0.2705, + "step": 27324 + }, + { + "epoch": 0.4721627038982582, + "grad_norm": 1.552974137741341, + "learning_rate": 1.1383201925162947e-05, + "loss": 0.5215, + "step": 27325 + }, + { + "epoch": 0.4721799834116671, + "grad_norm": 0.7475072889986671, + "learning_rate": 1.138264765306012e-05, + "loss": 0.3308, + "step": 27326 + }, + { + "epoch": 0.472197262925076, + "grad_norm": 0.96288067489391, + "learning_rate": 1.1382093376626737e-05, + "loss": 0.4667, + "step": 27327 + }, + { + "epoch": 0.47221454243848493, + "grad_norm": 0.928596276944621, + "learning_rate": 1.1381539095864534e-05, + "loss": 0.2886, + "step": 27328 + }, + { + "epoch": 0.47223182195189384, + "grad_norm": 0.791398701317344, + "learning_rate": 1.1380984810775244e-05, + "loss": 0.3742, + "step": 27329 + }, + { + "epoch": 0.47224910146530275, + "grad_norm": 0.7653823060115368, + "learning_rate": 1.1380430521360601e-05, + "loss": 0.3793, + "step": 27330 + }, + { + "epoch": 0.47226638097871165, + "grad_norm": 0.4812943438152793, + "learning_rate": 1.1379876227622351e-05, + "loss": 0.736, + "step": 27331 + }, + { + "epoch": 0.47228366049212056, + "grad_norm": 2.102223866494731, + "learning_rate": 1.1379321929562219e-05, + "loss": 0.4305, + "step": 27332 + }, + { + "epoch": 0.47230094000552947, + "grad_norm": 0.9848035298623218, + "learning_rate": 1.1378767627181951e-05, + "loss": 0.4947, + "step": 27333 + }, + { + "epoch": 0.4723182195189383, + "grad_norm": 1.1888305617913377, + "learning_rate": 1.1378213320483274e-05, + "loss": 0.4357, + "step": 27334 + }, + { + "epoch": 0.47233549903234723, + "grad_norm": 0.7549035069734915, + "learning_rate": 1.1377659009467927e-05, + "loss": 0.4922, + "step": 27335 + }, + { + "epoch": 0.47235277854575614, + "grad_norm": 0.6834486327762216, + "learning_rate": 1.137710469413765e-05, + "loss": 0.2798, + "step": 27336 + }, + { + "epoch": 0.47237005805916504, + "grad_norm": 0.7416203028429281, + "learning_rate": 1.1376550374494178e-05, + "loss": 0.4585, + "step": 27337 + }, + { + "epoch": 0.47238733757257395, + "grad_norm": 1.0614933048920374, + "learning_rate": 1.137599605053924e-05, + "loss": 0.4481, + "step": 27338 + }, + { + "epoch": 0.47240461708598286, + "grad_norm": 0.9854648465454408, + "learning_rate": 1.1375441722274583e-05, + "loss": 0.4267, + "step": 27339 + }, + { + "epoch": 0.47242189659939177, + "grad_norm": 0.547987766500014, + "learning_rate": 1.1374887389701937e-05, + "loss": 0.5926, + "step": 27340 + }, + { + "epoch": 0.4724391761128007, + "grad_norm": 1.1959205857353308, + "learning_rate": 1.1374333052823034e-05, + "loss": 0.3981, + "step": 27341 + }, + { + "epoch": 0.4724564556262096, + "grad_norm": 1.090640926588968, + "learning_rate": 1.1373778711639622e-05, + "loss": 0.2999, + "step": 27342 + }, + { + "epoch": 0.4724737351396185, + "grad_norm": 0.6902998614647124, + "learning_rate": 1.1373224366153428e-05, + "loss": 0.4172, + "step": 27343 + }, + { + "epoch": 0.4724910146530274, + "grad_norm": 1.8976379459079769, + "learning_rate": 1.1372670016366191e-05, + "loss": 0.6185, + "step": 27344 + }, + { + "epoch": 0.47250829416643625, + "grad_norm": 0.8601128633301517, + "learning_rate": 1.137211566227965e-05, + "loss": 0.4979, + "step": 27345 + }, + { + "epoch": 0.47252557367984516, + "grad_norm": 1.111816470519054, + "learning_rate": 1.1371561303895534e-05, + "loss": 0.435, + "step": 27346 + }, + { + "epoch": 0.47254285319325406, + "grad_norm": 1.402815975800395, + "learning_rate": 1.1371006941215586e-05, + "loss": 0.368, + "step": 27347 + }, + { + "epoch": 0.47256013270666297, + "grad_norm": 0.9553751890734968, + "learning_rate": 1.1370452574241542e-05, + "loss": 0.4973, + "step": 27348 + }, + { + "epoch": 0.4725774122200719, + "grad_norm": 0.9671893413417921, + "learning_rate": 1.1369898202975134e-05, + "loss": 0.3899, + "step": 27349 + }, + { + "epoch": 0.4725946917334808, + "grad_norm": 1.3203974302196866, + "learning_rate": 1.1369343827418098e-05, + "loss": 0.6969, + "step": 27350 + }, + { + "epoch": 0.4726119712468897, + "grad_norm": 0.9253385138235342, + "learning_rate": 1.1368789447572177e-05, + "loss": 0.5059, + "step": 27351 + }, + { + "epoch": 0.4726292507602986, + "grad_norm": 1.4296049055403566, + "learning_rate": 1.1368235063439103e-05, + "loss": 0.5669, + "step": 27352 + }, + { + "epoch": 0.4726465302737075, + "grad_norm": 1.3399357171950503, + "learning_rate": 1.1367680675020612e-05, + "loss": 0.3027, + "step": 27353 + }, + { + "epoch": 0.4726638097871164, + "grad_norm": 0.9574206943552604, + "learning_rate": 1.1367126282318444e-05, + "loss": 0.4806, + "step": 27354 + }, + { + "epoch": 0.47268108930052527, + "grad_norm": 0.7797289406930388, + "learning_rate": 1.1366571885334328e-05, + "loss": 0.4365, + "step": 27355 + }, + { + "epoch": 0.4726983688139342, + "grad_norm": 27.783997964333768, + "learning_rate": 1.1366017484070006e-05, + "loss": 0.6282, + "step": 27356 + }, + { + "epoch": 0.4727156483273431, + "grad_norm": 1.259017888640701, + "learning_rate": 1.1365463078527217e-05, + "loss": 0.5373, + "step": 27357 + }, + { + "epoch": 0.472732927840752, + "grad_norm": 0.8070196310715633, + "learning_rate": 1.1364908668707687e-05, + "loss": 0.3277, + "step": 27358 + }, + { + "epoch": 0.4727502073541609, + "grad_norm": 0.9718771818814028, + "learning_rate": 1.1364354254613166e-05, + "loss": 0.5067, + "step": 27359 + }, + { + "epoch": 0.4727674868675698, + "grad_norm": 0.7642040033996632, + "learning_rate": 1.1363799836245381e-05, + "loss": 0.2877, + "step": 27360 + }, + { + "epoch": 0.4727847663809787, + "grad_norm": 0.8183699108896816, + "learning_rate": 1.136324541360607e-05, + "loss": 0.3226, + "step": 27361 + }, + { + "epoch": 0.4728020458943876, + "grad_norm": 0.42187777416830313, + "learning_rate": 1.1362690986696974e-05, + "loss": 0.5051, + "step": 27362 + }, + { + "epoch": 0.47281932540779653, + "grad_norm": 0.8568661686106439, + "learning_rate": 1.1362136555519826e-05, + "loss": 0.4858, + "step": 27363 + }, + { + "epoch": 0.47283660492120544, + "grad_norm": 1.0004265695789318, + "learning_rate": 1.136158212007636e-05, + "loss": 0.5728, + "step": 27364 + }, + { + "epoch": 0.47285388443461435, + "grad_norm": 0.9875981426465829, + "learning_rate": 1.1361027680368318e-05, + "loss": 0.5318, + "step": 27365 + }, + { + "epoch": 0.4728711639480232, + "grad_norm": 0.9059343336385594, + "learning_rate": 1.1360473236397433e-05, + "loss": 0.2961, + "step": 27366 + }, + { + "epoch": 0.4728884434614321, + "grad_norm": 0.7834163034658702, + "learning_rate": 1.135991878816544e-05, + "loss": 0.4323, + "step": 27367 + }, + { + "epoch": 0.472905722974841, + "grad_norm": 0.662781724784849, + "learning_rate": 1.1359364335674081e-05, + "loss": 0.3438, + "step": 27368 + }, + { + "epoch": 0.4729230024882499, + "grad_norm": 0.7587969953442363, + "learning_rate": 1.135880987892509e-05, + "loss": 0.3393, + "step": 27369 + }, + { + "epoch": 0.47294028200165883, + "grad_norm": 1.1667096139520587, + "learning_rate": 1.1358255417920201e-05, + "loss": 0.5109, + "step": 27370 + }, + { + "epoch": 0.47295756151506774, + "grad_norm": 0.8400778790841559, + "learning_rate": 1.1357700952661156e-05, + "loss": 0.4555, + "step": 27371 + }, + { + "epoch": 0.47297484102847664, + "grad_norm": 1.1215992468409437, + "learning_rate": 1.1357146483149683e-05, + "loss": 0.5889, + "step": 27372 + }, + { + "epoch": 0.47299212054188555, + "grad_norm": 1.2320178904387586, + "learning_rate": 1.1356592009387526e-05, + "loss": 0.6349, + "step": 27373 + }, + { + "epoch": 0.47300940005529446, + "grad_norm": 1.0257911349795101, + "learning_rate": 1.1356037531376423e-05, + "loss": 0.4844, + "step": 27374 + }, + { + "epoch": 0.47302667956870337, + "grad_norm": 0.5034581629676337, + "learning_rate": 1.1355483049118106e-05, + "loss": 0.2449, + "step": 27375 + }, + { + "epoch": 0.4730439590821123, + "grad_norm": 1.2931223653791777, + "learning_rate": 1.1354928562614312e-05, + "loss": 0.3832, + "step": 27376 + }, + { + "epoch": 0.4730612385955211, + "grad_norm": 1.3020689361090922, + "learning_rate": 1.1354374071866781e-05, + "loss": 0.3473, + "step": 27377 + }, + { + "epoch": 0.47307851810893004, + "grad_norm": 0.8622763874806669, + "learning_rate": 1.1353819576877244e-05, + "loss": 0.41, + "step": 27378 + }, + { + "epoch": 0.47309579762233894, + "grad_norm": 0.8383895652679968, + "learning_rate": 1.1353265077647442e-05, + "loss": 0.4873, + "step": 27379 + }, + { + "epoch": 0.47311307713574785, + "grad_norm": 1.0438005652523563, + "learning_rate": 1.1352710574179113e-05, + "loss": 0.4518, + "step": 27380 + }, + { + "epoch": 0.47313035664915676, + "grad_norm": 0.8887671012082596, + "learning_rate": 1.135215606647399e-05, + "loss": 0.3598, + "step": 27381 + }, + { + "epoch": 0.47314763616256567, + "grad_norm": 0.91997640274256, + "learning_rate": 1.1351601554533811e-05, + "loss": 0.446, + "step": 27382 + }, + { + "epoch": 0.4731649156759746, + "grad_norm": 0.8055033328362181, + "learning_rate": 1.1351047038360315e-05, + "loss": 0.438, + "step": 27383 + }, + { + "epoch": 0.4731821951893835, + "grad_norm": 0.8730771948997902, + "learning_rate": 1.1350492517955234e-05, + "loss": 0.2755, + "step": 27384 + }, + { + "epoch": 0.4731994747027924, + "grad_norm": 1.0415006210196414, + "learning_rate": 1.1349937993320312e-05, + "loss": 0.4158, + "step": 27385 + }, + { + "epoch": 0.4732167542162013, + "grad_norm": 0.7925070703469488, + "learning_rate": 1.1349383464457281e-05, + "loss": 0.3876, + "step": 27386 + }, + { + "epoch": 0.47323403372961015, + "grad_norm": 1.6873804012776288, + "learning_rate": 1.1348828931367875e-05, + "loss": 0.3014, + "step": 27387 + }, + { + "epoch": 0.47325131324301906, + "grad_norm": 1.5699554703402525, + "learning_rate": 1.1348274394053837e-05, + "loss": 0.297, + "step": 27388 + }, + { + "epoch": 0.47326859275642796, + "grad_norm": 1.1142949080605944, + "learning_rate": 1.1347719852516901e-05, + "loss": 0.5323, + "step": 27389 + }, + { + "epoch": 0.47328587226983687, + "grad_norm": 1.005884897574511, + "learning_rate": 1.1347165306758802e-05, + "loss": 0.2588, + "step": 27390 + }, + { + "epoch": 0.4733031517832458, + "grad_norm": 1.213631288858207, + "learning_rate": 1.1346610756781282e-05, + "loss": 0.4088, + "step": 27391 + }, + { + "epoch": 0.4733204312966547, + "grad_norm": 1.3387334063920142, + "learning_rate": 1.1346056202586074e-05, + "loss": 0.3377, + "step": 27392 + }, + { + "epoch": 0.4733377108100636, + "grad_norm": 1.0061383427012442, + "learning_rate": 1.1345501644174912e-05, + "loss": 0.2534, + "step": 27393 + }, + { + "epoch": 0.4733549903234725, + "grad_norm": 0.9356510430328558, + "learning_rate": 1.134494708154954e-05, + "loss": 0.4382, + "step": 27394 + }, + { + "epoch": 0.4733722698368814, + "grad_norm": 0.8133127567457392, + "learning_rate": 1.134439251471169e-05, + "loss": 0.5336, + "step": 27395 + }, + { + "epoch": 0.4733895493502903, + "grad_norm": 1.2059170538365194, + "learning_rate": 1.1343837943663104e-05, + "loss": 0.3876, + "step": 27396 + }, + { + "epoch": 0.4734068288636992, + "grad_norm": 1.4954218668491823, + "learning_rate": 1.134328336840551e-05, + "loss": 0.4222, + "step": 27397 + }, + { + "epoch": 0.4734241083771081, + "grad_norm": 0.7103440271206023, + "learning_rate": 1.1342728788940653e-05, + "loss": 0.268, + "step": 27398 + }, + { + "epoch": 0.473441387890517, + "grad_norm": 1.0864351419401361, + "learning_rate": 1.134217420527027e-05, + "loss": 0.4523, + "step": 27399 + }, + { + "epoch": 0.4734586674039259, + "grad_norm": 1.191220909043844, + "learning_rate": 1.1341619617396092e-05, + "loss": 0.3287, + "step": 27400 + }, + { + "epoch": 0.4734759469173348, + "grad_norm": 0.9502706754345307, + "learning_rate": 1.134106502531986e-05, + "loss": 0.5093, + "step": 27401 + }, + { + "epoch": 0.4734932264307437, + "grad_norm": 0.8398622832510483, + "learning_rate": 1.134051042904331e-05, + "loss": 0.4587, + "step": 27402 + }, + { + "epoch": 0.4735105059441526, + "grad_norm": 1.1273610167606114, + "learning_rate": 1.1339955828568181e-05, + "loss": 0.4882, + "step": 27403 + }, + { + "epoch": 0.4735277854575615, + "grad_norm": 1.2891761045065802, + "learning_rate": 1.1339401223896207e-05, + "loss": 0.7062, + "step": 27404 + }, + { + "epoch": 0.47354506497097043, + "grad_norm": 1.544357269498905, + "learning_rate": 1.1338846615029127e-05, + "loss": 0.4557, + "step": 27405 + }, + { + "epoch": 0.47356234448437934, + "grad_norm": 0.9691853396414031, + "learning_rate": 1.133829200196868e-05, + "loss": 0.5071, + "step": 27406 + }, + { + "epoch": 0.47357962399778825, + "grad_norm": 0.7654894121307344, + "learning_rate": 1.13377373847166e-05, + "loss": 0.3614, + "step": 27407 + }, + { + "epoch": 0.4735969035111971, + "grad_norm": 0.48269068447855745, + "learning_rate": 1.133718276327462e-05, + "loss": 0.7638, + "step": 27408 + }, + { + "epoch": 0.473614183024606, + "grad_norm": 1.1529599333044294, + "learning_rate": 1.1336628137644488e-05, + "loss": 0.5376, + "step": 27409 + }, + { + "epoch": 0.4736314625380149, + "grad_norm": 0.9533876319002393, + "learning_rate": 1.1336073507827932e-05, + "loss": 0.332, + "step": 27410 + }, + { + "epoch": 0.4736487420514238, + "grad_norm": 1.0649947909988782, + "learning_rate": 1.1335518873826693e-05, + "loss": 0.5117, + "step": 27411 + }, + { + "epoch": 0.47366602156483273, + "grad_norm": 1.2125250591161687, + "learning_rate": 1.1334964235642507e-05, + "loss": 0.4142, + "step": 27412 + }, + { + "epoch": 0.47368330107824164, + "grad_norm": 1.302451834752741, + "learning_rate": 1.133440959327711e-05, + "loss": 0.7628, + "step": 27413 + }, + { + "epoch": 0.47370058059165054, + "grad_norm": 0.6918677812384022, + "learning_rate": 1.1333854946732243e-05, + "loss": 0.2658, + "step": 27414 + }, + { + "epoch": 0.47371786010505945, + "grad_norm": 1.6732187064291761, + "learning_rate": 1.1333300296009642e-05, + "loss": 0.443, + "step": 27415 + }, + { + "epoch": 0.47373513961846836, + "grad_norm": 0.8366011959110284, + "learning_rate": 1.1332745641111042e-05, + "loss": 0.4195, + "step": 27416 + }, + { + "epoch": 0.47375241913187727, + "grad_norm": 1.1320173837146155, + "learning_rate": 1.133219098203818e-05, + "loss": 0.4506, + "step": 27417 + }, + { + "epoch": 0.4737696986452862, + "grad_norm": 1.1985026208046912, + "learning_rate": 1.1331636318792796e-05, + "loss": 0.338, + "step": 27418 + }, + { + "epoch": 0.473786978158695, + "grad_norm": 0.5291545132602035, + "learning_rate": 1.1331081651376626e-05, + "loss": 0.635, + "step": 27419 + }, + { + "epoch": 0.47380425767210393, + "grad_norm": 0.7375959836435116, + "learning_rate": 1.1330526979791405e-05, + "loss": 0.3967, + "step": 27420 + }, + { + "epoch": 0.47382153718551284, + "grad_norm": 1.1696637325890478, + "learning_rate": 1.1329972304038875e-05, + "loss": 0.6365, + "step": 27421 + }, + { + "epoch": 0.47383881669892175, + "grad_norm": 1.3987499147802855, + "learning_rate": 1.1329417624120769e-05, + "loss": 0.4236, + "step": 27422 + }, + { + "epoch": 0.47385609621233066, + "grad_norm": 1.06547836841607, + "learning_rate": 1.1328862940038827e-05, + "loss": 0.4357, + "step": 27423 + }, + { + "epoch": 0.47387337572573957, + "grad_norm": 1.1567923612474458, + "learning_rate": 1.1328308251794784e-05, + "loss": 0.459, + "step": 27424 + }, + { + "epoch": 0.4738906552391485, + "grad_norm": 1.2217322302376366, + "learning_rate": 1.132775355939038e-05, + "loss": 0.2859, + "step": 27425 + }, + { + "epoch": 0.4739079347525574, + "grad_norm": 0.816063937178379, + "learning_rate": 1.1327198862827354e-05, + "loss": 0.3332, + "step": 27426 + }, + { + "epoch": 0.4739252142659663, + "grad_norm": 1.0901233455011416, + "learning_rate": 1.1326644162107433e-05, + "loss": 0.4426, + "step": 27427 + }, + { + "epoch": 0.4739424937793752, + "grad_norm": 0.6257363379196887, + "learning_rate": 1.1326089457232366e-05, + "loss": 0.7703, + "step": 27428 + }, + { + "epoch": 0.47395977329278405, + "grad_norm": 0.8423197693380351, + "learning_rate": 1.1325534748203886e-05, + "loss": 0.2678, + "step": 27429 + }, + { + "epoch": 0.47397705280619296, + "grad_norm": 0.8571434129614167, + "learning_rate": 1.132498003502373e-05, + "loss": 0.3382, + "step": 27430 + }, + { + "epoch": 0.47399433231960186, + "grad_norm": 0.9635412111843076, + "learning_rate": 1.1324425317693635e-05, + "loss": 0.3055, + "step": 27431 + }, + { + "epoch": 0.47401161183301077, + "grad_norm": 0.9972290410516698, + "learning_rate": 1.1323870596215341e-05, + "loss": 0.4976, + "step": 27432 + }, + { + "epoch": 0.4740288913464197, + "grad_norm": 0.799907987437853, + "learning_rate": 1.1323315870590582e-05, + "loss": 0.427, + "step": 27433 + }, + { + "epoch": 0.4740461708598286, + "grad_norm": 0.8688684265037919, + "learning_rate": 1.1322761140821097e-05, + "loss": 0.4572, + "step": 27434 + }, + { + "epoch": 0.4740634503732375, + "grad_norm": 1.1598547817007046, + "learning_rate": 1.1322206406908625e-05, + "loss": 0.5365, + "step": 27435 + }, + { + "epoch": 0.4740807298866464, + "grad_norm": 1.110411292744179, + "learning_rate": 1.1321651668854901e-05, + "loss": 0.4403, + "step": 27436 + }, + { + "epoch": 0.4740980094000553, + "grad_norm": 0.9499721930822707, + "learning_rate": 1.1321096926661666e-05, + "loss": 0.4553, + "step": 27437 + }, + { + "epoch": 0.4741152889134642, + "grad_norm": 2.014383657025673, + "learning_rate": 1.1320542180330649e-05, + "loss": 0.7111, + "step": 27438 + }, + { + "epoch": 0.4741325684268731, + "grad_norm": 1.1617080323261835, + "learning_rate": 1.13199874298636e-05, + "loss": 0.3874, + "step": 27439 + }, + { + "epoch": 0.474149847940282, + "grad_norm": 0.7945261620859095, + "learning_rate": 1.1319432675262248e-05, + "loss": 0.2669, + "step": 27440 + }, + { + "epoch": 0.4741671274536909, + "grad_norm": 1.3109473498382593, + "learning_rate": 1.1318877916528331e-05, + "loss": 0.4115, + "step": 27441 + }, + { + "epoch": 0.4741844069670998, + "grad_norm": 1.142523217575471, + "learning_rate": 1.1318323153663589e-05, + "loss": 0.399, + "step": 27442 + }, + { + "epoch": 0.4742016864805087, + "grad_norm": 1.0486598312382882, + "learning_rate": 1.1317768386669759e-05, + "loss": 0.358, + "step": 27443 + }, + { + "epoch": 0.4742189659939176, + "grad_norm": 0.6526555944683076, + "learning_rate": 1.131721361554858e-05, + "loss": 0.2521, + "step": 27444 + }, + { + "epoch": 0.4742362455073265, + "grad_norm": 0.9474909956459854, + "learning_rate": 1.1316658840301784e-05, + "loss": 0.4138, + "step": 27445 + }, + { + "epoch": 0.4742535250207354, + "grad_norm": 1.1349611200652556, + "learning_rate": 1.1316104060931114e-05, + "loss": 0.425, + "step": 27446 + }, + { + "epoch": 0.47427080453414433, + "grad_norm": 1.0707798579056118, + "learning_rate": 1.1315549277438308e-05, + "loss": 0.4536, + "step": 27447 + }, + { + "epoch": 0.47428808404755324, + "grad_norm": 1.0249874368536123, + "learning_rate": 1.1314994489825099e-05, + "loss": 0.6152, + "step": 27448 + }, + { + "epoch": 0.47430536356096215, + "grad_norm": 1.0865187786347366, + "learning_rate": 1.1314439698093231e-05, + "loss": 0.4718, + "step": 27449 + }, + { + "epoch": 0.47432264307437105, + "grad_norm": 0.8405844387193738, + "learning_rate": 1.1313884902244432e-05, + "loss": 0.5168, + "step": 27450 + }, + { + "epoch": 0.4743399225877799, + "grad_norm": 0.8051975857629913, + "learning_rate": 1.131333010228045e-05, + "loss": 0.4139, + "step": 27451 + }, + { + "epoch": 0.4743572021011888, + "grad_norm": 0.8393316978334285, + "learning_rate": 1.1312775298203019e-05, + "loss": 0.5344, + "step": 27452 + }, + { + "epoch": 0.4743744816145977, + "grad_norm": 0.8204849790653135, + "learning_rate": 1.1312220490013872e-05, + "loss": 0.4091, + "step": 27453 + }, + { + "epoch": 0.47439176112800663, + "grad_norm": 1.2334131124931575, + "learning_rate": 1.1311665677714754e-05, + "loss": 0.5844, + "step": 27454 + }, + { + "epoch": 0.47440904064141554, + "grad_norm": 1.1028129723152145, + "learning_rate": 1.1311110861307399e-05, + "loss": 0.39, + "step": 27455 + }, + { + "epoch": 0.47442632015482444, + "grad_norm": 2.486415433004618, + "learning_rate": 1.1310556040793543e-05, + "loss": 0.4422, + "step": 27456 + }, + { + "epoch": 0.47444359966823335, + "grad_norm": 0.8884442416367713, + "learning_rate": 1.131000121617493e-05, + "loss": 0.4719, + "step": 27457 + }, + { + "epoch": 0.47446087918164226, + "grad_norm": 0.7702024595826987, + "learning_rate": 1.1309446387453292e-05, + "loss": 0.2647, + "step": 27458 + }, + { + "epoch": 0.47447815869505117, + "grad_norm": 1.3086022859340478, + "learning_rate": 1.1308891554630365e-05, + "loss": 0.4539, + "step": 27459 + }, + { + "epoch": 0.4744954382084601, + "grad_norm": 0.8717969786078128, + "learning_rate": 1.1308336717707896e-05, + "loss": 0.4823, + "step": 27460 + }, + { + "epoch": 0.4745127177218689, + "grad_norm": 1.125463209748143, + "learning_rate": 1.1307781876687611e-05, + "loss": 0.402, + "step": 27461 + }, + { + "epoch": 0.47452999723527783, + "grad_norm": 0.9651345532811485, + "learning_rate": 1.1307227031571257e-05, + "loss": 0.2593, + "step": 27462 + }, + { + "epoch": 0.47454727674868674, + "grad_norm": 2.0319991246826636, + "learning_rate": 1.1306672182360569e-05, + "loss": 0.504, + "step": 27463 + }, + { + "epoch": 0.47456455626209565, + "grad_norm": 0.5942199311136507, + "learning_rate": 1.1306117329057284e-05, + "loss": 0.7849, + "step": 27464 + }, + { + "epoch": 0.47458183577550456, + "grad_norm": 1.4434523451117658, + "learning_rate": 1.130556247166314e-05, + "loss": 0.4313, + "step": 27465 + }, + { + "epoch": 0.47459911528891346, + "grad_norm": 1.2779986341836194, + "learning_rate": 1.1305007610179874e-05, + "loss": 0.4045, + "step": 27466 + }, + { + "epoch": 0.47461639480232237, + "grad_norm": 1.0966496812773732, + "learning_rate": 1.1304452744609227e-05, + "loss": 0.4239, + "step": 27467 + }, + { + "epoch": 0.4746336743157313, + "grad_norm": 0.9880072420628948, + "learning_rate": 1.1303897874952934e-05, + "loss": 0.4098, + "step": 27468 + }, + { + "epoch": 0.4746509538291402, + "grad_norm": 1.3357153747094777, + "learning_rate": 1.1303343001212734e-05, + "loss": 0.4266, + "step": 27469 + }, + { + "epoch": 0.4746682333425491, + "grad_norm": 1.080610483922758, + "learning_rate": 1.1302788123390363e-05, + "loss": 0.4052, + "step": 27470 + }, + { + "epoch": 0.474685512855958, + "grad_norm": 1.1645760103500473, + "learning_rate": 1.1302233241487564e-05, + "loss": 0.5024, + "step": 27471 + }, + { + "epoch": 0.47470279236936685, + "grad_norm": 0.8819786982419359, + "learning_rate": 1.1301678355506068e-05, + "loss": 0.4127, + "step": 27472 + }, + { + "epoch": 0.47472007188277576, + "grad_norm": 0.6599383788115821, + "learning_rate": 1.1301123465447617e-05, + "loss": 0.2323, + "step": 27473 + }, + { + "epoch": 0.47473735139618467, + "grad_norm": 0.8397356255403625, + "learning_rate": 1.130056857131395e-05, + "loss": 0.5297, + "step": 27474 + }, + { + "epoch": 0.4747546309095936, + "grad_norm": 1.0060084081925493, + "learning_rate": 1.1300013673106803e-05, + "loss": 0.3125, + "step": 27475 + }, + { + "epoch": 0.4747719104230025, + "grad_norm": 0.9573261679216918, + "learning_rate": 1.1299458770827911e-05, + "loss": 0.3348, + "step": 27476 + }, + { + "epoch": 0.4747891899364114, + "grad_norm": 0.8498770950749932, + "learning_rate": 1.129890386447902e-05, + "loss": 0.4697, + "step": 27477 + }, + { + "epoch": 0.4748064694498203, + "grad_norm": 1.4167378578056002, + "learning_rate": 1.1298348954061862e-05, + "loss": 0.4943, + "step": 27478 + }, + { + "epoch": 0.4748237489632292, + "grad_norm": 1.3394879496207355, + "learning_rate": 1.1297794039578176e-05, + "loss": 0.6614, + "step": 27479 + }, + { + "epoch": 0.4748410284766381, + "grad_norm": 0.6767700020925208, + "learning_rate": 1.1297239121029701e-05, + "loss": 0.3105, + "step": 27480 + }, + { + "epoch": 0.474858307990047, + "grad_norm": 0.8742003902906301, + "learning_rate": 1.1296684198418174e-05, + "loss": 0.5085, + "step": 27481 + }, + { + "epoch": 0.4748755875034559, + "grad_norm": 1.4477679835794315, + "learning_rate": 1.1296129271745333e-05, + "loss": 0.424, + "step": 27482 + }, + { + "epoch": 0.4748928670168648, + "grad_norm": 1.1052231135885866, + "learning_rate": 1.1295574341012918e-05, + "loss": 0.386, + "step": 27483 + }, + { + "epoch": 0.4749101465302737, + "grad_norm": 0.7073930613053847, + "learning_rate": 1.1295019406222668e-05, + "loss": 0.3505, + "step": 27484 + }, + { + "epoch": 0.4749274260436826, + "grad_norm": 1.2167628520419043, + "learning_rate": 1.1294464467376312e-05, + "loss": 0.404, + "step": 27485 + }, + { + "epoch": 0.4749447055570915, + "grad_norm": 1.195514070732386, + "learning_rate": 1.12939095244756e-05, + "loss": 0.4321, + "step": 27486 + }, + { + "epoch": 0.4749619850705004, + "grad_norm": 1.0904327510149856, + "learning_rate": 1.1293354577522264e-05, + "loss": 0.4667, + "step": 27487 + }, + { + "epoch": 0.4749792645839093, + "grad_norm": 0.8222414335830254, + "learning_rate": 1.1292799626518043e-05, + "loss": 0.4878, + "step": 27488 + }, + { + "epoch": 0.47499654409731823, + "grad_norm": 1.3930745293498183, + "learning_rate": 1.1292244671464676e-05, + "loss": 0.4303, + "step": 27489 + }, + { + "epoch": 0.47501382361072714, + "grad_norm": 0.732634072848543, + "learning_rate": 1.1291689712363898e-05, + "loss": 0.3125, + "step": 27490 + }, + { + "epoch": 0.47503110312413604, + "grad_norm": 0.867317392439989, + "learning_rate": 1.1291134749217451e-05, + "loss": 0.5476, + "step": 27491 + }, + { + "epoch": 0.47504838263754495, + "grad_norm": 0.7020356070838304, + "learning_rate": 1.1290579782027074e-05, + "loss": 0.49, + "step": 27492 + }, + { + "epoch": 0.4750656621509538, + "grad_norm": 0.776199692862944, + "learning_rate": 1.12900248107945e-05, + "loss": 0.413, + "step": 27493 + }, + { + "epoch": 0.4750829416643627, + "grad_norm": 0.9642106742290955, + "learning_rate": 1.128946983552147e-05, + "loss": 0.3678, + "step": 27494 + }, + { + "epoch": 0.4751002211777716, + "grad_norm": 1.1744413842445087, + "learning_rate": 1.1288914856209727e-05, + "loss": 0.5615, + "step": 27495 + }, + { + "epoch": 0.4751175006911805, + "grad_norm": 0.7593467193567505, + "learning_rate": 1.1288359872861e-05, + "loss": 0.4639, + "step": 27496 + }, + { + "epoch": 0.47513478020458944, + "grad_norm": 1.1505490430187388, + "learning_rate": 1.1287804885477033e-05, + "loss": 0.473, + "step": 27497 + }, + { + "epoch": 0.47515205971799834, + "grad_norm": 0.6493259402285612, + "learning_rate": 1.1287249894059564e-05, + "loss": 0.5451, + "step": 27498 + }, + { + "epoch": 0.47516933923140725, + "grad_norm": 0.7054537524857416, + "learning_rate": 1.128669489861033e-05, + "loss": 0.3725, + "step": 27499 + }, + { + "epoch": 0.47518661874481616, + "grad_norm": 0.966073912101836, + "learning_rate": 1.1286139899131073e-05, + "loss": 0.3514, + "step": 27500 + }, + { + "epoch": 0.47520389825822507, + "grad_norm": 1.1639448991347816, + "learning_rate": 1.1285584895623523e-05, + "loss": 0.4511, + "step": 27501 + }, + { + "epoch": 0.475221177771634, + "grad_norm": 0.8326290331154278, + "learning_rate": 1.1285029888089428e-05, + "loss": 0.3288, + "step": 27502 + }, + { + "epoch": 0.4752384572850428, + "grad_norm": 0.846943523539999, + "learning_rate": 1.1284474876530518e-05, + "loss": 0.3968, + "step": 27503 + }, + { + "epoch": 0.47525573679845173, + "grad_norm": 1.4194166264330483, + "learning_rate": 1.1283919860948537e-05, + "loss": 0.5853, + "step": 27504 + }, + { + "epoch": 0.47527301631186064, + "grad_norm": 0.9290541923270815, + "learning_rate": 1.1283364841345219e-05, + "loss": 0.3904, + "step": 27505 + }, + { + "epoch": 0.47529029582526955, + "grad_norm": 0.8116722254514455, + "learning_rate": 1.1282809817722308e-05, + "loss": 0.5732, + "step": 27506 + }, + { + "epoch": 0.47530757533867846, + "grad_norm": 1.108468039642142, + "learning_rate": 1.128225479008154e-05, + "loss": 0.5505, + "step": 27507 + }, + { + "epoch": 0.47532485485208736, + "grad_norm": 1.100022396730182, + "learning_rate": 1.1281699758424647e-05, + "loss": 0.5605, + "step": 27508 + }, + { + "epoch": 0.47534213436549627, + "grad_norm": 0.764127361441054, + "learning_rate": 1.128114472275338e-05, + "loss": 0.4325, + "step": 27509 + }, + { + "epoch": 0.4753594138789052, + "grad_norm": 1.2078200675378903, + "learning_rate": 1.1280589683069464e-05, + "loss": 0.4045, + "step": 27510 + }, + { + "epoch": 0.4753766933923141, + "grad_norm": 1.086211391301494, + "learning_rate": 1.1280034639374648e-05, + "loss": 0.5801, + "step": 27511 + }, + { + "epoch": 0.475393972905723, + "grad_norm": 1.1111085054710312, + "learning_rate": 1.1279479591670666e-05, + "loss": 0.6416, + "step": 27512 + }, + { + "epoch": 0.4754112524191319, + "grad_norm": 0.9985678832904978, + "learning_rate": 1.1278924539959254e-05, + "loss": 0.4073, + "step": 27513 + }, + { + "epoch": 0.47542853193254075, + "grad_norm": 2.0536464842066455, + "learning_rate": 1.1278369484242153e-05, + "loss": 0.3585, + "step": 27514 + }, + { + "epoch": 0.47544581144594966, + "grad_norm": 0.8521319144395091, + "learning_rate": 1.1277814424521104e-05, + "loss": 0.5046, + "step": 27515 + }, + { + "epoch": 0.47546309095935857, + "grad_norm": 0.731232325304885, + "learning_rate": 1.127725936079784e-05, + "loss": 0.3498, + "step": 27516 + }, + { + "epoch": 0.4754803704727675, + "grad_norm": 0.8542434618055176, + "learning_rate": 1.1276704293074107e-05, + "loss": 0.4608, + "step": 27517 + }, + { + "epoch": 0.4754976499861764, + "grad_norm": 0.9058741853574069, + "learning_rate": 1.1276149221351638e-05, + "loss": 0.5189, + "step": 27518 + }, + { + "epoch": 0.4755149294995853, + "grad_norm": 1.068914845002503, + "learning_rate": 1.1275594145632172e-05, + "loss": 0.5647, + "step": 27519 + }, + { + "epoch": 0.4755322090129942, + "grad_norm": 1.17509294177028, + "learning_rate": 1.1275039065917449e-05, + "loss": 0.462, + "step": 27520 + }, + { + "epoch": 0.4755494885264031, + "grad_norm": 0.9662278241319031, + "learning_rate": 1.1274483982209203e-05, + "loss": 0.4171, + "step": 27521 + }, + { + "epoch": 0.475566768039812, + "grad_norm": 0.8914242440483451, + "learning_rate": 1.127392889450918e-05, + "loss": 0.3741, + "step": 27522 + }, + { + "epoch": 0.4755840475532209, + "grad_norm": 0.9855716649101222, + "learning_rate": 1.1273373802819114e-05, + "loss": 0.4251, + "step": 27523 + }, + { + "epoch": 0.47560132706662983, + "grad_norm": 0.7125912419411724, + "learning_rate": 1.1272818707140744e-05, + "loss": 0.3558, + "step": 27524 + }, + { + "epoch": 0.4756186065800387, + "grad_norm": 1.1706546998018947, + "learning_rate": 1.1272263607475808e-05, + "loss": 0.3996, + "step": 27525 + }, + { + "epoch": 0.4756358860934476, + "grad_norm": 0.7806028137667299, + "learning_rate": 1.1271708503826049e-05, + "loss": 0.3379, + "step": 27526 + }, + { + "epoch": 0.4756531656068565, + "grad_norm": 0.6611988086889564, + "learning_rate": 1.1271153396193201e-05, + "loss": 0.5312, + "step": 27527 + }, + { + "epoch": 0.4756704451202654, + "grad_norm": 0.9336319118484627, + "learning_rate": 1.1270598284579002e-05, + "loss": 0.5085, + "step": 27528 + }, + { + "epoch": 0.4756877246336743, + "grad_norm": 1.2024457005796005, + "learning_rate": 1.1270043168985197e-05, + "loss": 0.3798, + "step": 27529 + }, + { + "epoch": 0.4757050041470832, + "grad_norm": 0.8560943743050328, + "learning_rate": 1.1269488049413515e-05, + "loss": 0.4183, + "step": 27530 + }, + { + "epoch": 0.47572228366049213, + "grad_norm": 0.9496985396435103, + "learning_rate": 1.1268932925865703e-05, + "loss": 0.3534, + "step": 27531 + }, + { + "epoch": 0.47573956317390104, + "grad_norm": 0.7670122606058355, + "learning_rate": 1.1268377798343497e-05, + "loss": 0.7304, + "step": 27532 + }, + { + "epoch": 0.47575684268730994, + "grad_norm": 0.9075935061592779, + "learning_rate": 1.1267822666848632e-05, + "loss": 0.2934, + "step": 27533 + }, + { + "epoch": 0.47577412220071885, + "grad_norm": 0.8110226772058173, + "learning_rate": 1.1267267531382853e-05, + "loss": 0.3495, + "step": 27534 + }, + { + "epoch": 0.4757914017141277, + "grad_norm": 0.7700159285489018, + "learning_rate": 1.1266712391947894e-05, + "loss": 0.5014, + "step": 27535 + }, + { + "epoch": 0.4758086812275366, + "grad_norm": 0.4714918358051858, + "learning_rate": 1.1266157248545496e-05, + "loss": 0.5916, + "step": 27536 + }, + { + "epoch": 0.4758259607409455, + "grad_norm": 0.9665199701428846, + "learning_rate": 1.1265602101177398e-05, + "loss": 0.3512, + "step": 27537 + }, + { + "epoch": 0.4758432402543544, + "grad_norm": 1.102099956262538, + "learning_rate": 1.126504694984534e-05, + "loss": 0.4998, + "step": 27538 + }, + { + "epoch": 0.47586051976776333, + "grad_norm": 0.782471381285943, + "learning_rate": 1.1264491794551056e-05, + "loss": 0.6331, + "step": 27539 + }, + { + "epoch": 0.47587779928117224, + "grad_norm": 1.2859594368996086, + "learning_rate": 1.1263936635296289e-05, + "loss": 0.4507, + "step": 27540 + }, + { + "epoch": 0.47589507879458115, + "grad_norm": 1.3744301552930607, + "learning_rate": 1.1263381472082772e-05, + "loss": 0.5397, + "step": 27541 + }, + { + "epoch": 0.47591235830799006, + "grad_norm": 0.8843868084168176, + "learning_rate": 1.1262826304912253e-05, + "loss": 0.585, + "step": 27542 + }, + { + "epoch": 0.47592963782139897, + "grad_norm": 1.065515216467574, + "learning_rate": 1.1262271133786467e-05, + "loss": 0.2387, + "step": 27543 + }, + { + "epoch": 0.4759469173348079, + "grad_norm": 0.5958483732472784, + "learning_rate": 1.1261715958707149e-05, + "loss": 0.3501, + "step": 27544 + }, + { + "epoch": 0.4759641968482168, + "grad_norm": 0.7278052754316043, + "learning_rate": 1.126116077967604e-05, + "loss": 0.2673, + "step": 27545 + }, + { + "epoch": 0.47598147636162563, + "grad_norm": 0.8771951319476969, + "learning_rate": 1.1260605596694883e-05, + "loss": 0.5085, + "step": 27546 + }, + { + "epoch": 0.47599875587503454, + "grad_norm": 0.9448751540443242, + "learning_rate": 1.1260050409765413e-05, + "loss": 0.5461, + "step": 27547 + }, + { + "epoch": 0.47601603538844345, + "grad_norm": 0.8473075703256944, + "learning_rate": 1.1259495218889364e-05, + "loss": 0.385, + "step": 27548 + }, + { + "epoch": 0.47603331490185236, + "grad_norm": 1.517281886401809, + "learning_rate": 1.1258940024068486e-05, + "loss": 0.4834, + "step": 27549 + }, + { + "epoch": 0.47605059441526126, + "grad_norm": 1.1132042051315136, + "learning_rate": 1.1258384825304511e-05, + "loss": 0.3409, + "step": 27550 + }, + { + "epoch": 0.47606787392867017, + "grad_norm": 0.8828016590404612, + "learning_rate": 1.125782962259918e-05, + "loss": 0.4504, + "step": 27551 + }, + { + "epoch": 0.4760851534420791, + "grad_norm": 1.1490180054908032, + "learning_rate": 1.125727441595423e-05, + "loss": 0.6095, + "step": 27552 + }, + { + "epoch": 0.476102432955488, + "grad_norm": 1.4027932467995035, + "learning_rate": 1.1256719205371399e-05, + "loss": 0.3692, + "step": 27553 + }, + { + "epoch": 0.4761197124688969, + "grad_norm": 1.0610102035642468, + "learning_rate": 1.125616399085243e-05, + "loss": 0.3559, + "step": 27554 + }, + { + "epoch": 0.4761369919823058, + "grad_norm": 0.7929032731865269, + "learning_rate": 1.1255608772399058e-05, + "loss": 0.5482, + "step": 27555 + }, + { + "epoch": 0.47615427149571465, + "grad_norm": 0.735685815690051, + "learning_rate": 1.1255053550013024e-05, + "loss": 0.2547, + "step": 27556 + }, + { + "epoch": 0.47617155100912356, + "grad_norm": 0.7975772308738285, + "learning_rate": 1.125449832369607e-05, + "loss": 0.3755, + "step": 27557 + }, + { + "epoch": 0.47618883052253247, + "grad_norm": 0.7017984391044505, + "learning_rate": 1.125394309344993e-05, + "loss": 0.275, + "step": 27558 + }, + { + "epoch": 0.4762061100359414, + "grad_norm": 0.9119820647388919, + "learning_rate": 1.1253387859276343e-05, + "loss": 0.3267, + "step": 27559 + }, + { + "epoch": 0.4762233895493503, + "grad_norm": 0.9636164644290139, + "learning_rate": 1.1252832621177054e-05, + "loss": 0.369, + "step": 27560 + }, + { + "epoch": 0.4762406690627592, + "grad_norm": 0.9233786137829398, + "learning_rate": 1.1252277379153798e-05, + "loss": 0.2953, + "step": 27561 + }, + { + "epoch": 0.4762579485761681, + "grad_norm": 1.2720230003510127, + "learning_rate": 1.1251722133208309e-05, + "loss": 0.6231, + "step": 27562 + }, + { + "epoch": 0.476275228089577, + "grad_norm": 1.4721730009766105, + "learning_rate": 1.1251166883342334e-05, + "loss": 0.4966, + "step": 27563 + }, + { + "epoch": 0.4762925076029859, + "grad_norm": 0.6955023858040779, + "learning_rate": 1.1250611629557613e-05, + "loss": 0.3419, + "step": 27564 + }, + { + "epoch": 0.4763097871163948, + "grad_norm": 1.0336855853657188, + "learning_rate": 1.1250056371855875e-05, + "loss": 0.4037, + "step": 27565 + }, + { + "epoch": 0.47632706662980373, + "grad_norm": 0.8272817785066244, + "learning_rate": 1.1249501110238869e-05, + "loss": 0.4459, + "step": 27566 + }, + { + "epoch": 0.4763443461432126, + "grad_norm": 1.01416109230546, + "learning_rate": 1.124894584470833e-05, + "loss": 0.4725, + "step": 27567 + }, + { + "epoch": 0.4763616256566215, + "grad_norm": 1.3558519173662689, + "learning_rate": 1.1248390575265997e-05, + "loss": 0.3606, + "step": 27568 + }, + { + "epoch": 0.4763789051700304, + "grad_norm": 1.3380962730296466, + "learning_rate": 1.124783530191361e-05, + "loss": 0.4302, + "step": 27569 + }, + { + "epoch": 0.4763961846834393, + "grad_norm": 0.779965322968228, + "learning_rate": 1.124728002465291e-05, + "loss": 0.2418, + "step": 27570 + }, + { + "epoch": 0.4764134641968482, + "grad_norm": 1.0430169371141214, + "learning_rate": 1.1246724743485634e-05, + "loss": 0.3848, + "step": 27571 + }, + { + "epoch": 0.4764307437102571, + "grad_norm": 1.288982355223967, + "learning_rate": 1.1246169458413519e-05, + "loss": 0.3518, + "step": 27572 + }, + { + "epoch": 0.47644802322366603, + "grad_norm": 1.1403014206013666, + "learning_rate": 1.1245614169438305e-05, + "loss": 0.4456, + "step": 27573 + }, + { + "epoch": 0.47646530273707494, + "grad_norm": 1.2430100825567798, + "learning_rate": 1.1245058876561736e-05, + "loss": 0.4484, + "step": 27574 + }, + { + "epoch": 0.47648258225048384, + "grad_norm": 0.9965402203495909, + "learning_rate": 1.1244503579785549e-05, + "loss": 0.3433, + "step": 27575 + }, + { + "epoch": 0.47649986176389275, + "grad_norm": 1.997765667580235, + "learning_rate": 1.1243948279111478e-05, + "loss": 0.3791, + "step": 27576 + }, + { + "epoch": 0.4765171412773016, + "grad_norm": 0.8718475036650749, + "learning_rate": 1.124339297454127e-05, + "loss": 0.5471, + "step": 27577 + }, + { + "epoch": 0.4765344207907105, + "grad_norm": 0.8634503738820762, + "learning_rate": 1.1242837666076658e-05, + "loss": 0.3387, + "step": 27578 + }, + { + "epoch": 0.4765517003041194, + "grad_norm": 0.9385773741561485, + "learning_rate": 1.1242282353719385e-05, + "loss": 0.3325, + "step": 27579 + }, + { + "epoch": 0.4765689798175283, + "grad_norm": 1.0031107853003185, + "learning_rate": 1.1241727037471191e-05, + "loss": 0.3645, + "step": 27580 + }, + { + "epoch": 0.47658625933093723, + "grad_norm": 1.0027043830654814, + "learning_rate": 1.1241171717333815e-05, + "loss": 0.4627, + "step": 27581 + }, + { + "epoch": 0.47660353884434614, + "grad_norm": 0.9088374708445532, + "learning_rate": 1.124061639330899e-05, + "loss": 0.3516, + "step": 27582 + }, + { + "epoch": 0.47662081835775505, + "grad_norm": 1.0372208048308362, + "learning_rate": 1.1240061065398463e-05, + "loss": 0.3724, + "step": 27583 + }, + { + "epoch": 0.47663809787116396, + "grad_norm": 1.3204974078006488, + "learning_rate": 1.123950573360397e-05, + "loss": 0.4424, + "step": 27584 + }, + { + "epoch": 0.47665537738457286, + "grad_norm": 0.8233498621554775, + "learning_rate": 1.123895039792725e-05, + "loss": 0.4318, + "step": 27585 + }, + { + "epoch": 0.4766726568979818, + "grad_norm": 1.0204176672081287, + "learning_rate": 1.1238395058370044e-05, + "loss": 0.3569, + "step": 27586 + }, + { + "epoch": 0.4766899364113907, + "grad_norm": 0.5851103308525165, + "learning_rate": 1.1237839714934091e-05, + "loss": 0.557, + "step": 27587 + }, + { + "epoch": 0.47670721592479953, + "grad_norm": 0.7828242347236954, + "learning_rate": 1.1237284367621126e-05, + "loss": 0.3495, + "step": 27588 + }, + { + "epoch": 0.47672449543820844, + "grad_norm": 1.0264712580497979, + "learning_rate": 1.1236729016432896e-05, + "loss": 0.3255, + "step": 27589 + }, + { + "epoch": 0.47674177495161735, + "grad_norm": 1.0193766522002303, + "learning_rate": 1.1236173661371139e-05, + "loss": 0.5161, + "step": 27590 + }, + { + "epoch": 0.47675905446502626, + "grad_norm": 1.1116834192421114, + "learning_rate": 1.123561830243759e-05, + "loss": 0.6759, + "step": 27591 + }, + { + "epoch": 0.47677633397843516, + "grad_norm": 1.15149456217974, + "learning_rate": 1.1235062939633989e-05, + "loss": 0.478, + "step": 27592 + }, + { + "epoch": 0.47679361349184407, + "grad_norm": 0.674821548453199, + "learning_rate": 1.123450757296208e-05, + "loss": 0.577, + "step": 27593 + }, + { + "epoch": 0.476810893005253, + "grad_norm": 1.3045359099798355, + "learning_rate": 1.1233952202423596e-05, + "loss": 0.6003, + "step": 27594 + }, + { + "epoch": 0.4768281725186619, + "grad_norm": 0.7812729480683749, + "learning_rate": 1.1233396828020282e-05, + "loss": 0.3778, + "step": 27595 + }, + { + "epoch": 0.4768454520320708, + "grad_norm": 1.4754047937146788, + "learning_rate": 1.1232841449753872e-05, + "loss": 0.4186, + "step": 27596 + }, + { + "epoch": 0.4768627315454797, + "grad_norm": 1.4458817292236237, + "learning_rate": 1.1232286067626115e-05, + "loss": 0.2793, + "step": 27597 + }, + { + "epoch": 0.4768800110588886, + "grad_norm": 0.8148142011316888, + "learning_rate": 1.1231730681638741e-05, + "loss": 0.2012, + "step": 27598 + }, + { + "epoch": 0.47689729057229746, + "grad_norm": 0.9824072134172369, + "learning_rate": 1.1231175291793492e-05, + "loss": 0.3757, + "step": 27599 + }, + { + "epoch": 0.47691457008570637, + "grad_norm": 1.3068776887700482, + "learning_rate": 1.123061989809211e-05, + "loss": 0.6178, + "step": 27600 + }, + { + "epoch": 0.4769318495991153, + "grad_norm": 1.0306959142974637, + "learning_rate": 1.1230064500536335e-05, + "loss": 0.3965, + "step": 27601 + }, + { + "epoch": 0.4769491291125242, + "grad_norm": 0.5745443808254849, + "learning_rate": 1.12295090991279e-05, + "loss": 0.3563, + "step": 27602 + }, + { + "epoch": 0.4769664086259331, + "grad_norm": 0.6126701982809084, + "learning_rate": 1.1228953693868552e-05, + "loss": 0.4841, + "step": 27603 + }, + { + "epoch": 0.476983688139342, + "grad_norm": 0.6787192570778892, + "learning_rate": 1.1228398284760027e-05, + "loss": 0.2914, + "step": 27604 + }, + { + "epoch": 0.4770009676527509, + "grad_norm": 1.412757497889449, + "learning_rate": 1.1227842871804064e-05, + "loss": 0.4634, + "step": 27605 + }, + { + "epoch": 0.4770182471661598, + "grad_norm": 1.0269051384222494, + "learning_rate": 1.1227287455002404e-05, + "loss": 0.36, + "step": 27606 + }, + { + "epoch": 0.4770355266795687, + "grad_norm": 1.0286236854966817, + "learning_rate": 1.122673203435679e-05, + "loss": 0.6341, + "step": 27607 + }, + { + "epoch": 0.47705280619297763, + "grad_norm": 0.7328726434484752, + "learning_rate": 1.1226176609868954e-05, + "loss": 0.4046, + "step": 27608 + }, + { + "epoch": 0.4770700857063865, + "grad_norm": 1.2977298089635632, + "learning_rate": 1.122562118154064e-05, + "loss": 0.6186, + "step": 27609 + }, + { + "epoch": 0.4770873652197954, + "grad_norm": 0.9414123960992953, + "learning_rate": 1.1225065749373589e-05, + "loss": 0.3797, + "step": 27610 + }, + { + "epoch": 0.4771046447332043, + "grad_norm": 0.853866964657297, + "learning_rate": 1.1224510313369537e-05, + "loss": 0.3845, + "step": 27611 + }, + { + "epoch": 0.4771219242466132, + "grad_norm": 0.9539095743066908, + "learning_rate": 1.1223954873530229e-05, + "loss": 0.4993, + "step": 27612 + }, + { + "epoch": 0.4771392037600221, + "grad_norm": 0.6838554883798363, + "learning_rate": 1.1223399429857398e-05, + "loss": 0.7521, + "step": 27613 + }, + { + "epoch": 0.477156483273431, + "grad_norm": 0.8689662784917634, + "learning_rate": 1.1222843982352788e-05, + "loss": 0.4269, + "step": 27614 + }, + { + "epoch": 0.47717376278683993, + "grad_norm": 1.031954409878971, + "learning_rate": 1.1222288531018138e-05, + "loss": 0.3174, + "step": 27615 + }, + { + "epoch": 0.47719104230024884, + "grad_norm": 0.9865083605028565, + "learning_rate": 1.1221733075855185e-05, + "loss": 0.4574, + "step": 27616 + }, + { + "epoch": 0.47720832181365774, + "grad_norm": 1.1387480655569695, + "learning_rate": 1.1221177616865675e-05, + "loss": 0.5295, + "step": 27617 + }, + { + "epoch": 0.47722560132706665, + "grad_norm": 1.0019896520088651, + "learning_rate": 1.1220622154051343e-05, + "loss": 0.2809, + "step": 27618 + }, + { + "epoch": 0.47724288084047556, + "grad_norm": 0.6850817788145537, + "learning_rate": 1.1220066687413927e-05, + "loss": 0.3751, + "step": 27619 + }, + { + "epoch": 0.4772601603538844, + "grad_norm": 1.217436442214371, + "learning_rate": 1.1219511216955172e-05, + "loss": 0.3816, + "step": 27620 + }, + { + "epoch": 0.4772774398672933, + "grad_norm": 0.7920965150297337, + "learning_rate": 1.1218955742676815e-05, + "loss": 0.4385, + "step": 27621 + }, + { + "epoch": 0.4772947193807022, + "grad_norm": 1.0385903044727371, + "learning_rate": 1.1218400264580596e-05, + "loss": 0.4761, + "step": 27622 + }, + { + "epoch": 0.47731199889411113, + "grad_norm": 0.6788955274488279, + "learning_rate": 1.1217844782668255e-05, + "loss": 0.7805, + "step": 27623 + }, + { + "epoch": 0.47732927840752004, + "grad_norm": 0.5686267652365508, + "learning_rate": 1.1217289296941532e-05, + "loss": 0.5702, + "step": 27624 + }, + { + "epoch": 0.47734655792092895, + "grad_norm": 0.9662909285307705, + "learning_rate": 1.1216733807402162e-05, + "loss": 0.4077, + "step": 27625 + }, + { + "epoch": 0.47736383743433786, + "grad_norm": 0.8128474705566328, + "learning_rate": 1.1216178314051896e-05, + "loss": 0.3027, + "step": 27626 + }, + { + "epoch": 0.47738111694774676, + "grad_norm": 1.1213383504298464, + "learning_rate": 1.1215622816892462e-05, + "loss": 0.2748, + "step": 27627 + }, + { + "epoch": 0.47739839646115567, + "grad_norm": 0.9240687235612494, + "learning_rate": 1.1215067315925605e-05, + "loss": 0.3464, + "step": 27628 + }, + { + "epoch": 0.4774156759745646, + "grad_norm": 0.9200902662290235, + "learning_rate": 1.1214511811153068e-05, + "loss": 0.4428, + "step": 27629 + }, + { + "epoch": 0.47743295548797343, + "grad_norm": 1.0503652399918575, + "learning_rate": 1.1213956302576587e-05, + "loss": 0.3873, + "step": 27630 + }, + { + "epoch": 0.47745023500138234, + "grad_norm": 0.9403102895116165, + "learning_rate": 1.1213400790197899e-05, + "loss": 0.617, + "step": 27631 + }, + { + "epoch": 0.47746751451479125, + "grad_norm": 1.4402964357168437, + "learning_rate": 1.1212845274018753e-05, + "loss": 0.3764, + "step": 27632 + }, + { + "epoch": 0.47748479402820015, + "grad_norm": 1.0187569917864363, + "learning_rate": 1.1212289754040879e-05, + "loss": 0.5742, + "step": 27633 + }, + { + "epoch": 0.47750207354160906, + "grad_norm": 0.7317718895830877, + "learning_rate": 1.1211734230266025e-05, + "loss": 0.5409, + "step": 27634 + }, + { + "epoch": 0.47751935305501797, + "grad_norm": 0.9033219306131322, + "learning_rate": 1.1211178702695926e-05, + "loss": 0.4138, + "step": 27635 + }, + { + "epoch": 0.4775366325684269, + "grad_norm": 0.9595416685260061, + "learning_rate": 1.1210623171332319e-05, + "loss": 0.4822, + "step": 27636 + }, + { + "epoch": 0.4775539120818358, + "grad_norm": 0.9632578340947154, + "learning_rate": 1.1210067636176954e-05, + "loss": 0.4601, + "step": 27637 + }, + { + "epoch": 0.4775711915952447, + "grad_norm": 1.0230682567848173, + "learning_rate": 1.1209512097231564e-05, + "loss": 0.522, + "step": 27638 + }, + { + "epoch": 0.4775884711086536, + "grad_norm": 1.2125485983990192, + "learning_rate": 1.1208956554497888e-05, + "loss": 0.4803, + "step": 27639 + }, + { + "epoch": 0.4776057506220625, + "grad_norm": 1.002919691280227, + "learning_rate": 1.120840100797767e-05, + "loss": 0.5272, + "step": 27640 + }, + { + "epoch": 0.47762303013547136, + "grad_norm": 0.5320186736438125, + "learning_rate": 1.1207845457672648e-05, + "loss": 0.5571, + "step": 27641 + }, + { + "epoch": 0.47764030964888027, + "grad_norm": 1.1300321897977208, + "learning_rate": 1.1207289903584562e-05, + "loss": 0.2869, + "step": 27642 + }, + { + "epoch": 0.4776575891622892, + "grad_norm": 1.006916181288318, + "learning_rate": 1.1206734345715153e-05, + "loss": 0.4971, + "step": 27643 + }, + { + "epoch": 0.4776748686756981, + "grad_norm": 1.1280902148600378, + "learning_rate": 1.1206178784066159e-05, + "loss": 0.6399, + "step": 27644 + }, + { + "epoch": 0.477692148189107, + "grad_norm": 0.8655085012485064, + "learning_rate": 1.120562321863932e-05, + "loss": 0.42, + "step": 27645 + }, + { + "epoch": 0.4777094277025159, + "grad_norm": 1.3926928691810183, + "learning_rate": 1.1205067649436382e-05, + "loss": 0.6469, + "step": 27646 + }, + { + "epoch": 0.4777267072159248, + "grad_norm": 1.7398603793546024, + "learning_rate": 1.1204512076459078e-05, + "loss": 0.3708, + "step": 27647 + }, + { + "epoch": 0.4777439867293337, + "grad_norm": 0.9410100460358517, + "learning_rate": 1.1203956499709148e-05, + "loss": 0.4365, + "step": 27648 + }, + { + "epoch": 0.4777612662427426, + "grad_norm": 2.029047412746165, + "learning_rate": 1.1203400919188338e-05, + "loss": 0.6165, + "step": 27649 + }, + { + "epoch": 0.47777854575615153, + "grad_norm": 1.0898863733916042, + "learning_rate": 1.1202845334898384e-05, + "loss": 0.4404, + "step": 27650 + }, + { + "epoch": 0.47779582526956044, + "grad_norm": 0.9752785396765358, + "learning_rate": 1.1202289746841026e-05, + "loss": 0.52, + "step": 27651 + }, + { + "epoch": 0.4778131047829693, + "grad_norm": 1.307786588344348, + "learning_rate": 1.1201734155018006e-05, + "loss": 0.656, + "step": 27652 + }, + { + "epoch": 0.4778303842963782, + "grad_norm": 0.6310697842882564, + "learning_rate": 1.1201178559431065e-05, + "loss": 0.3523, + "step": 27653 + }, + { + "epoch": 0.4778476638097871, + "grad_norm": 0.4570775105963636, + "learning_rate": 1.120062296008194e-05, + "loss": 0.517, + "step": 27654 + }, + { + "epoch": 0.477864943323196, + "grad_norm": 1.0295588529990358, + "learning_rate": 1.1200067356972372e-05, + "loss": 0.4769, + "step": 27655 + }, + { + "epoch": 0.4778822228366049, + "grad_norm": 0.7650032743118602, + "learning_rate": 1.1199511750104101e-05, + "loss": 0.3697, + "step": 27656 + }, + { + "epoch": 0.4778995023500138, + "grad_norm": 0.8079670690368216, + "learning_rate": 1.1198956139478871e-05, + "loss": 0.4049, + "step": 27657 + }, + { + "epoch": 0.47791678186342273, + "grad_norm": 0.8546021848899221, + "learning_rate": 1.1198400525098419e-05, + "loss": 0.4355, + "step": 27658 + }, + { + "epoch": 0.47793406137683164, + "grad_norm": 0.6555758445228176, + "learning_rate": 1.119784490696448e-05, + "loss": 0.2928, + "step": 27659 + }, + { + "epoch": 0.47795134089024055, + "grad_norm": 1.3118264971080416, + "learning_rate": 1.1197289285078807e-05, + "loss": 0.2939, + "step": 27660 + }, + { + "epoch": 0.47796862040364946, + "grad_norm": 0.83201969522436, + "learning_rate": 1.1196733659443131e-05, + "loss": 0.3987, + "step": 27661 + }, + { + "epoch": 0.4779858999170583, + "grad_norm": 1.006551549622516, + "learning_rate": 1.1196178030059191e-05, + "loss": 0.5966, + "step": 27662 + }, + { + "epoch": 0.4780031794304672, + "grad_norm": 1.123842457184241, + "learning_rate": 1.1195622396928736e-05, + "loss": 0.3424, + "step": 27663 + }, + { + "epoch": 0.4780204589438761, + "grad_norm": 0.9726482515073405, + "learning_rate": 1.1195066760053496e-05, + "loss": 0.4268, + "step": 27664 + }, + { + "epoch": 0.47803773845728503, + "grad_norm": 0.7447457079204006, + "learning_rate": 1.1194511119435218e-05, + "loss": 0.4689, + "step": 27665 + }, + { + "epoch": 0.47805501797069394, + "grad_norm": 1.0260781418147855, + "learning_rate": 1.119395547507564e-05, + "loss": 0.4784, + "step": 27666 + }, + { + "epoch": 0.47807229748410285, + "grad_norm": 1.1425907474677173, + "learning_rate": 1.1193399826976504e-05, + "loss": 0.3915, + "step": 27667 + }, + { + "epoch": 0.47808957699751176, + "grad_norm": 1.2921127069895946, + "learning_rate": 1.1192844175139545e-05, + "loss": 0.5043, + "step": 27668 + }, + { + "epoch": 0.47810685651092066, + "grad_norm": 1.187779336755055, + "learning_rate": 1.119228851956651e-05, + "loss": 0.5111, + "step": 27669 + }, + { + "epoch": 0.47812413602432957, + "grad_norm": 0.9397946464843377, + "learning_rate": 1.1191732860259139e-05, + "loss": 0.399, + "step": 27670 + }, + { + "epoch": 0.4781414155377385, + "grad_norm": 1.3852037160470436, + "learning_rate": 1.1191177197219165e-05, + "loss": 0.4843, + "step": 27671 + }, + { + "epoch": 0.4781586950511474, + "grad_norm": 0.8244323557021139, + "learning_rate": 1.1190621530448337e-05, + "loss": 0.3966, + "step": 27672 + }, + { + "epoch": 0.47817597456455624, + "grad_norm": 1.1673379626272409, + "learning_rate": 1.1190065859948391e-05, + "loss": 0.6812, + "step": 27673 + }, + { + "epoch": 0.47819325407796515, + "grad_norm": 1.262674210802511, + "learning_rate": 1.118951018572107e-05, + "loss": 0.525, + "step": 27674 + }, + { + "epoch": 0.47821053359137405, + "grad_norm": 0.8025342253549234, + "learning_rate": 1.1188954507768113e-05, + "loss": 0.4537, + "step": 27675 + }, + { + "epoch": 0.47822781310478296, + "grad_norm": 0.9401259408466706, + "learning_rate": 1.1188398826091256e-05, + "loss": 0.43, + "step": 27676 + }, + { + "epoch": 0.47824509261819187, + "grad_norm": 1.53184430654238, + "learning_rate": 1.1187843140692248e-05, + "loss": 0.472, + "step": 27677 + }, + { + "epoch": 0.4782623721316008, + "grad_norm": 0.7988500432857107, + "learning_rate": 1.1187287451572822e-05, + "loss": 0.6908, + "step": 27678 + }, + { + "epoch": 0.4782796516450097, + "grad_norm": 0.48926246086152414, + "learning_rate": 1.1186731758734722e-05, + "loss": 0.5045, + "step": 27679 + }, + { + "epoch": 0.4782969311584186, + "grad_norm": 0.6901158657343672, + "learning_rate": 1.1186176062179688e-05, + "loss": 0.4579, + "step": 27680 + }, + { + "epoch": 0.4783142106718275, + "grad_norm": 0.9699191984946143, + "learning_rate": 1.1185620361909462e-05, + "loss": 0.5041, + "step": 27681 + }, + { + "epoch": 0.4783314901852364, + "grad_norm": 0.5707929974919874, + "learning_rate": 1.118506465792578e-05, + "loss": 0.6501, + "step": 27682 + }, + { + "epoch": 0.47834876969864526, + "grad_norm": 1.0599723601983326, + "learning_rate": 1.1184508950230388e-05, + "loss": 0.2837, + "step": 27683 + }, + { + "epoch": 0.47836604921205417, + "grad_norm": 1.0648610098561857, + "learning_rate": 1.1183953238825025e-05, + "loss": 0.5781, + "step": 27684 + }, + { + "epoch": 0.4783833287254631, + "grad_norm": 1.1965644475767265, + "learning_rate": 1.1183397523711427e-05, + "loss": 0.4609, + "step": 27685 + }, + { + "epoch": 0.478400608238872, + "grad_norm": 0.8708178780145344, + "learning_rate": 1.118284180489134e-05, + "loss": 0.674, + "step": 27686 + }, + { + "epoch": 0.4784178877522809, + "grad_norm": 1.0432596811459107, + "learning_rate": 1.1182286082366503e-05, + "loss": 0.5522, + "step": 27687 + }, + { + "epoch": 0.4784351672656898, + "grad_norm": 0.8570510143067381, + "learning_rate": 1.1181730356138654e-05, + "loss": 0.4365, + "step": 27688 + }, + { + "epoch": 0.4784524467790987, + "grad_norm": 0.9732383075908819, + "learning_rate": 1.118117462620954e-05, + "loss": 0.3897, + "step": 27689 + }, + { + "epoch": 0.4784697262925076, + "grad_norm": 0.5404052577657844, + "learning_rate": 1.1180618892580892e-05, + "loss": 0.6197, + "step": 27690 + }, + { + "epoch": 0.4784870058059165, + "grad_norm": 0.7057039204381222, + "learning_rate": 1.1180063155254457e-05, + "loss": 0.2556, + "step": 27691 + }, + { + "epoch": 0.47850428531932543, + "grad_norm": 1.054102269726023, + "learning_rate": 1.1179507414231976e-05, + "loss": 0.5709, + "step": 27692 + }, + { + "epoch": 0.47852156483273434, + "grad_norm": 1.5002102354813776, + "learning_rate": 1.117895166951519e-05, + "loss": 0.5133, + "step": 27693 + }, + { + "epoch": 0.4785388443461432, + "grad_norm": 0.7492158279390888, + "learning_rate": 1.1178395921105835e-05, + "loss": 0.6065, + "step": 27694 + }, + { + "epoch": 0.4785561238595521, + "grad_norm": 1.3650039393429731, + "learning_rate": 1.1177840169005656e-05, + "loss": 0.3816, + "step": 27695 + }, + { + "epoch": 0.478573403372961, + "grad_norm": 0.8790832092070902, + "learning_rate": 1.1177284413216389e-05, + "loss": 0.5085, + "step": 27696 + }, + { + "epoch": 0.4785906828863699, + "grad_norm": 0.6807891942231374, + "learning_rate": 1.117672865373978e-05, + "loss": 0.365, + "step": 27697 + }, + { + "epoch": 0.4786079623997788, + "grad_norm": 0.40731668472855953, + "learning_rate": 1.1176172890577571e-05, + "loss": 0.4564, + "step": 27698 + }, + { + "epoch": 0.4786252419131877, + "grad_norm": 0.7962502600299802, + "learning_rate": 1.1175617123731493e-05, + "loss": 0.5805, + "step": 27699 + }, + { + "epoch": 0.47864252142659663, + "grad_norm": 0.7656660512625235, + "learning_rate": 1.1175061353203295e-05, + "loss": 0.3818, + "step": 27700 + }, + { + "epoch": 0.47865980094000554, + "grad_norm": 1.066769273016932, + "learning_rate": 1.1174505578994716e-05, + "loss": 0.4426, + "step": 27701 + }, + { + "epoch": 0.47867708045341445, + "grad_norm": 0.7405978552380449, + "learning_rate": 1.1173949801107496e-05, + "loss": 0.269, + "step": 27702 + }, + { + "epoch": 0.47869435996682336, + "grad_norm": 1.2792337710572188, + "learning_rate": 1.1173394019543375e-05, + "loss": 0.4023, + "step": 27703 + }, + { + "epoch": 0.4787116394802322, + "grad_norm": 1.2398643206392395, + "learning_rate": 1.11728382343041e-05, + "loss": 0.4986, + "step": 27704 + }, + { + "epoch": 0.4787289189936411, + "grad_norm": 1.1000237739235381, + "learning_rate": 1.11722824453914e-05, + "loss": 0.4599, + "step": 27705 + }, + { + "epoch": 0.47874619850705, + "grad_norm": 0.5954151249387806, + "learning_rate": 1.1171726652807027e-05, + "loss": 0.3954, + "step": 27706 + }, + { + "epoch": 0.47876347802045893, + "grad_norm": 0.8379644706539292, + "learning_rate": 1.1171170856552714e-05, + "loss": 0.5162, + "step": 27707 + }, + { + "epoch": 0.47878075753386784, + "grad_norm": 1.2245933565937401, + "learning_rate": 1.1170615056630206e-05, + "loss": 0.4951, + "step": 27708 + }, + { + "epoch": 0.47879803704727675, + "grad_norm": 1.1735654961703812, + "learning_rate": 1.1170059253041242e-05, + "loss": 0.3449, + "step": 27709 + }, + { + "epoch": 0.47881531656068566, + "grad_norm": 0.7139914609776348, + "learning_rate": 1.1169503445787564e-05, + "loss": 0.4949, + "step": 27710 + }, + { + "epoch": 0.47883259607409456, + "grad_norm": 1.6841133302962072, + "learning_rate": 1.116894763487091e-05, + "loss": 0.3861, + "step": 27711 + }, + { + "epoch": 0.47884987558750347, + "grad_norm": 1.1436667069437378, + "learning_rate": 1.1168391820293025e-05, + "loss": 0.3938, + "step": 27712 + }, + { + "epoch": 0.4788671551009124, + "grad_norm": 0.8012021759530935, + "learning_rate": 1.116783600205565e-05, + "loss": 0.398, + "step": 27713 + }, + { + "epoch": 0.4788844346143213, + "grad_norm": 0.9627434846042988, + "learning_rate": 1.1167280180160522e-05, + "loss": 0.2912, + "step": 27714 + }, + { + "epoch": 0.47890171412773014, + "grad_norm": 0.5731271439659881, + "learning_rate": 1.1166724354609386e-05, + "loss": 0.2142, + "step": 27715 + }, + { + "epoch": 0.47891899364113905, + "grad_norm": 1.1316875700996143, + "learning_rate": 1.1166168525403975e-05, + "loss": 0.5251, + "step": 27716 + }, + { + "epoch": 0.47893627315454795, + "grad_norm": 1.0614285934552679, + "learning_rate": 1.1165612692546042e-05, + "loss": 0.3929, + "step": 27717 + }, + { + "epoch": 0.47895355266795686, + "grad_norm": 0.7484518115448701, + "learning_rate": 1.1165056856037318e-05, + "loss": 0.2892, + "step": 27718 + }, + { + "epoch": 0.47897083218136577, + "grad_norm": 1.0549241531286433, + "learning_rate": 1.1164501015879548e-05, + "loss": 0.2827, + "step": 27719 + }, + { + "epoch": 0.4789881116947747, + "grad_norm": 1.0865798512377742, + "learning_rate": 1.1163945172074474e-05, + "loss": 0.5897, + "step": 27720 + }, + { + "epoch": 0.4790053912081836, + "grad_norm": 1.065502756851382, + "learning_rate": 1.1163389324623833e-05, + "loss": 0.4955, + "step": 27721 + }, + { + "epoch": 0.4790226707215925, + "grad_norm": 0.9647034174418007, + "learning_rate": 1.1162833473529366e-05, + "loss": 0.3435, + "step": 27722 + }, + { + "epoch": 0.4790399502350014, + "grad_norm": 1.1059679458357436, + "learning_rate": 1.116227761879282e-05, + "loss": 0.4125, + "step": 27723 + }, + { + "epoch": 0.4790572297484103, + "grad_norm": 0.9903140491508181, + "learning_rate": 1.1161721760415932e-05, + "loss": 0.4074, + "step": 27724 + }, + { + "epoch": 0.4790745092618192, + "grad_norm": 0.7840011165890448, + "learning_rate": 1.1161165898400444e-05, + "loss": 0.3989, + "step": 27725 + }, + { + "epoch": 0.47909178877522807, + "grad_norm": 0.7343345746140318, + "learning_rate": 1.1160610032748093e-05, + "loss": 0.3836, + "step": 27726 + }, + { + "epoch": 0.479109068288637, + "grad_norm": 1.2642340753627739, + "learning_rate": 1.1160054163460626e-05, + "loss": 0.5127, + "step": 27727 + }, + { + "epoch": 0.4791263478020459, + "grad_norm": 0.6268893368120798, + "learning_rate": 1.1159498290539779e-05, + "loss": 0.8386, + "step": 27728 + }, + { + "epoch": 0.4791436273154548, + "grad_norm": 0.8917448656405585, + "learning_rate": 1.1158942413987298e-05, + "loss": 0.3903, + "step": 27729 + }, + { + "epoch": 0.4791609068288637, + "grad_norm": 1.0418118519146105, + "learning_rate": 1.1158386533804918e-05, + "loss": 0.6467, + "step": 27730 + }, + { + "epoch": 0.4791781863422726, + "grad_norm": 1.0662847527478536, + "learning_rate": 1.1157830649994383e-05, + "loss": 0.3524, + "step": 27731 + }, + { + "epoch": 0.4791954658556815, + "grad_norm": 1.984052427678205, + "learning_rate": 1.1157274762557438e-05, + "loss": 0.4527, + "step": 27732 + }, + { + "epoch": 0.4792127453690904, + "grad_norm": 1.225302439276953, + "learning_rate": 1.1156718871495818e-05, + "loss": 0.401, + "step": 27733 + }, + { + "epoch": 0.47923002488249933, + "grad_norm": 0.5948099667545848, + "learning_rate": 1.1156162976811267e-05, + "loss": 0.2627, + "step": 27734 + }, + { + "epoch": 0.47924730439590824, + "grad_norm": 1.3190848939737936, + "learning_rate": 1.1155607078505529e-05, + "loss": 0.4554, + "step": 27735 + }, + { + "epoch": 0.4792645839093171, + "grad_norm": 0.7308168198666782, + "learning_rate": 1.1155051176580335e-05, + "loss": 0.3595, + "step": 27736 + }, + { + "epoch": 0.479281863422726, + "grad_norm": 0.9705348882901518, + "learning_rate": 1.1154495271037437e-05, + "loss": 0.4774, + "step": 27737 + }, + { + "epoch": 0.4792991429361349, + "grad_norm": 0.8324982016900174, + "learning_rate": 1.1153939361878574e-05, + "loss": 0.4554, + "step": 27738 + }, + { + "epoch": 0.4793164224495438, + "grad_norm": 1.2232287131857171, + "learning_rate": 1.1153383449105482e-05, + "loss": 0.3922, + "step": 27739 + }, + { + "epoch": 0.4793337019629527, + "grad_norm": 0.6725169892390506, + "learning_rate": 1.1152827532719905e-05, + "loss": 0.3322, + "step": 27740 + }, + { + "epoch": 0.4793509814763616, + "grad_norm": 0.8229263007290238, + "learning_rate": 1.1152271612723587e-05, + "loss": 0.5457, + "step": 27741 + }, + { + "epoch": 0.47936826098977053, + "grad_norm": 1.019594167746348, + "learning_rate": 1.1151715689118265e-05, + "loss": 0.3166, + "step": 27742 + }, + { + "epoch": 0.47938554050317944, + "grad_norm": 0.8880532368786898, + "learning_rate": 1.1151159761905682e-05, + "loss": 0.5254, + "step": 27743 + }, + { + "epoch": 0.47940282001658835, + "grad_norm": 1.1139991865043775, + "learning_rate": 1.115060383108758e-05, + "loss": 0.5626, + "step": 27744 + }, + { + "epoch": 0.47942009952999726, + "grad_norm": 0.948596491002581, + "learning_rate": 1.1150047896665702e-05, + "loss": 0.5482, + "step": 27745 + }, + { + "epoch": 0.47943737904340616, + "grad_norm": 1.267879714661122, + "learning_rate": 1.1149491958641782e-05, + "loss": 0.436, + "step": 27746 + }, + { + "epoch": 0.479454658556815, + "grad_norm": 0.9796154514366991, + "learning_rate": 1.1148936017017569e-05, + "loss": 0.3998, + "step": 27747 + }, + { + "epoch": 0.4794719380702239, + "grad_norm": 1.3088343820717008, + "learning_rate": 1.1148380071794797e-05, + "loss": 0.4881, + "step": 27748 + }, + { + "epoch": 0.47948921758363283, + "grad_norm": 0.851290312447268, + "learning_rate": 1.1147824122975216e-05, + "loss": 0.3672, + "step": 27749 + }, + { + "epoch": 0.47950649709704174, + "grad_norm": 1.2338933266850025, + "learning_rate": 1.1147268170560562e-05, + "loss": 0.5781, + "step": 27750 + }, + { + "epoch": 0.47952377661045065, + "grad_norm": 1.4976707856974711, + "learning_rate": 1.1146712214552574e-05, + "loss": 0.5711, + "step": 27751 + }, + { + "epoch": 0.47954105612385955, + "grad_norm": 0.7222109984071724, + "learning_rate": 1.1146156254952998e-05, + "loss": 0.4457, + "step": 27752 + }, + { + "epoch": 0.47955833563726846, + "grad_norm": 1.194459301183923, + "learning_rate": 1.1145600291763572e-05, + "loss": 0.5801, + "step": 27753 + }, + { + "epoch": 0.47957561515067737, + "grad_norm": 0.9720225730823305, + "learning_rate": 1.1145044324986038e-05, + "loss": 0.4122, + "step": 27754 + }, + { + "epoch": 0.4795928946640863, + "grad_norm": 0.9628223289688195, + "learning_rate": 1.1144488354622143e-05, + "loss": 0.332, + "step": 27755 + }, + { + "epoch": 0.4796101741774952, + "grad_norm": 0.7537293194826743, + "learning_rate": 1.1143932380673621e-05, + "loss": 0.4267, + "step": 27756 + }, + { + "epoch": 0.47962745369090404, + "grad_norm": 1.0009217020611825, + "learning_rate": 1.1143376403142214e-05, + "loss": 0.4376, + "step": 27757 + }, + { + "epoch": 0.47964473320431295, + "grad_norm": 0.9843229971971833, + "learning_rate": 1.1142820422029667e-05, + "loss": 0.4665, + "step": 27758 + }, + { + "epoch": 0.47966201271772185, + "grad_norm": 0.9615168636211916, + "learning_rate": 1.114226443733772e-05, + "loss": 0.4642, + "step": 27759 + }, + { + "epoch": 0.47967929223113076, + "grad_norm": 0.947007037687527, + "learning_rate": 1.1141708449068114e-05, + "loss": 0.4883, + "step": 27760 + }, + { + "epoch": 0.47969657174453967, + "grad_norm": 1.143428387772876, + "learning_rate": 1.1141152457222588e-05, + "loss": 0.3787, + "step": 27761 + }, + { + "epoch": 0.4797138512579486, + "grad_norm": 0.7996753047504618, + "learning_rate": 1.1140596461802887e-05, + "loss": 0.4119, + "step": 27762 + }, + { + "epoch": 0.4797311307713575, + "grad_norm": 1.0063779005620768, + "learning_rate": 1.1140040462810753e-05, + "loss": 0.4511, + "step": 27763 + }, + { + "epoch": 0.4797484102847664, + "grad_norm": 0.9486473671046252, + "learning_rate": 1.1139484460247925e-05, + "loss": 0.5487, + "step": 27764 + }, + { + "epoch": 0.4797656897981753, + "grad_norm": 0.8864345032616331, + "learning_rate": 1.1138928454116143e-05, + "loss": 0.5424, + "step": 27765 + }, + { + "epoch": 0.4797829693115842, + "grad_norm": 0.7083995856836507, + "learning_rate": 1.1138372444417154e-05, + "loss": 0.208, + "step": 27766 + }, + { + "epoch": 0.4798002488249931, + "grad_norm": 0.7105101476896524, + "learning_rate": 1.1137816431152693e-05, + "loss": 0.3888, + "step": 27767 + }, + { + "epoch": 0.47981752833840197, + "grad_norm": 0.8102982606419011, + "learning_rate": 1.1137260414324504e-05, + "loss": 0.2856, + "step": 27768 + }, + { + "epoch": 0.4798348078518109, + "grad_norm": 0.7840488529757907, + "learning_rate": 1.1136704393934333e-05, + "loss": 0.3907, + "step": 27769 + }, + { + "epoch": 0.4798520873652198, + "grad_norm": 0.940299185836781, + "learning_rate": 1.1136148369983913e-05, + "loss": 0.3978, + "step": 27770 + }, + { + "epoch": 0.4798693668786287, + "grad_norm": 0.8213352860478451, + "learning_rate": 1.113559234247499e-05, + "loss": 0.3505, + "step": 27771 + }, + { + "epoch": 0.4798866463920376, + "grad_norm": 0.7599111349130578, + "learning_rate": 1.1135036311409308e-05, + "loss": 0.4198, + "step": 27772 + }, + { + "epoch": 0.4799039259054465, + "grad_norm": 0.9096562318825896, + "learning_rate": 1.1134480276788607e-05, + "loss": 0.4293, + "step": 27773 + }, + { + "epoch": 0.4799212054188554, + "grad_norm": 1.5950311112451623, + "learning_rate": 1.1133924238614623e-05, + "loss": 0.3326, + "step": 27774 + }, + { + "epoch": 0.4799384849322643, + "grad_norm": 1.4444014854823948, + "learning_rate": 1.1133368196889105e-05, + "loss": 0.4123, + "step": 27775 + }, + { + "epoch": 0.4799557644456732, + "grad_norm": 0.817762460386064, + "learning_rate": 1.1132812151613792e-05, + "loss": 0.33, + "step": 27776 + }, + { + "epoch": 0.47997304395908214, + "grad_norm": 0.8968955054991519, + "learning_rate": 1.1132256102790426e-05, + "loss": 0.2231, + "step": 27777 + }, + { + "epoch": 0.479990323472491, + "grad_norm": 0.8342148383672241, + "learning_rate": 1.1131700050420746e-05, + "loss": 0.3895, + "step": 27778 + }, + { + "epoch": 0.4800076029858999, + "grad_norm": 1.030249417676904, + "learning_rate": 1.1131143994506495e-05, + "loss": 0.4551, + "step": 27779 + }, + { + "epoch": 0.4800248824993088, + "grad_norm": 0.850715072987357, + "learning_rate": 1.1130587935049417e-05, + "loss": 0.4659, + "step": 27780 + }, + { + "epoch": 0.4800421620127177, + "grad_norm": 1.157799655670667, + "learning_rate": 1.1130031872051253e-05, + "loss": 0.5815, + "step": 27781 + }, + { + "epoch": 0.4800594415261266, + "grad_norm": 0.9614986842001739, + "learning_rate": 1.112947580551374e-05, + "loss": 0.4301, + "step": 27782 + }, + { + "epoch": 0.4800767210395355, + "grad_norm": 1.1205348406040454, + "learning_rate": 1.1128919735438621e-05, + "loss": 0.2544, + "step": 27783 + }, + { + "epoch": 0.48009400055294443, + "grad_norm": 1.0635639696230867, + "learning_rate": 1.1128363661827644e-05, + "loss": 0.326, + "step": 27784 + }, + { + "epoch": 0.48011128006635334, + "grad_norm": 0.8211258593902234, + "learning_rate": 1.1127807584682543e-05, + "loss": 0.3334, + "step": 27785 + }, + { + "epoch": 0.48012855957976225, + "grad_norm": 0.8270039372283513, + "learning_rate": 1.1127251504005065e-05, + "loss": 0.3152, + "step": 27786 + }, + { + "epoch": 0.48014583909317116, + "grad_norm": 1.1005861380453235, + "learning_rate": 1.1126695419796953e-05, + "loss": 0.414, + "step": 27787 + }, + { + "epoch": 0.48016311860658006, + "grad_norm": 0.9663191292452685, + "learning_rate": 1.112613933205994e-05, + "loss": 0.4307, + "step": 27788 + }, + { + "epoch": 0.4801803981199889, + "grad_norm": 1.2287535291229021, + "learning_rate": 1.1125583240795776e-05, + "loss": 0.4582, + "step": 27789 + }, + { + "epoch": 0.4801976776333978, + "grad_norm": 1.043144458907337, + "learning_rate": 1.1125027146006198e-05, + "loss": 0.5884, + "step": 27790 + }, + { + "epoch": 0.48021495714680673, + "grad_norm": 0.9611806283451749, + "learning_rate": 1.1124471047692948e-05, + "loss": 0.5179, + "step": 27791 + }, + { + "epoch": 0.48023223666021564, + "grad_norm": 0.7183850310081201, + "learning_rate": 1.1123914945857772e-05, + "loss": 0.4724, + "step": 27792 + }, + { + "epoch": 0.48024951617362455, + "grad_norm": 0.7740814098360321, + "learning_rate": 1.1123358840502409e-05, + "loss": 0.5206, + "step": 27793 + }, + { + "epoch": 0.48026679568703345, + "grad_norm": 1.026174621757482, + "learning_rate": 1.11228027316286e-05, + "loss": 0.5116, + "step": 27794 + }, + { + "epoch": 0.48028407520044236, + "grad_norm": 0.8473253075684037, + "learning_rate": 1.1122246619238086e-05, + "loss": 0.3032, + "step": 27795 + }, + { + "epoch": 0.48030135471385127, + "grad_norm": 1.4374616937593676, + "learning_rate": 1.1121690503332613e-05, + "loss": 0.3626, + "step": 27796 + }, + { + "epoch": 0.4803186342272602, + "grad_norm": 0.8260942604868913, + "learning_rate": 1.1121134383913918e-05, + "loss": 0.3177, + "step": 27797 + }, + { + "epoch": 0.4803359137406691, + "grad_norm": 1.0612575380708242, + "learning_rate": 1.1120578260983748e-05, + "loss": 0.4338, + "step": 27798 + }, + { + "epoch": 0.480353193254078, + "grad_norm": 0.9697943560004528, + "learning_rate": 1.1120022134543837e-05, + "loss": 0.2821, + "step": 27799 + }, + { + "epoch": 0.48037047276748684, + "grad_norm": 0.8413227416222046, + "learning_rate": 1.1119466004595936e-05, + "loss": 0.3522, + "step": 27800 + }, + { + "epoch": 0.48038775228089575, + "grad_norm": 1.034828726713592, + "learning_rate": 1.1118909871141781e-05, + "loss": 0.5311, + "step": 27801 + }, + { + "epoch": 0.48040503179430466, + "grad_norm": 1.0483842105723415, + "learning_rate": 1.1118353734183112e-05, + "loss": 0.3422, + "step": 27802 + }, + { + "epoch": 0.48042231130771357, + "grad_norm": 1.5444851372993562, + "learning_rate": 1.1117797593721677e-05, + "loss": 0.5102, + "step": 27803 + }, + { + "epoch": 0.4804395908211225, + "grad_norm": 0.9026552658538408, + "learning_rate": 1.1117241449759216e-05, + "loss": 0.4014, + "step": 27804 + }, + { + "epoch": 0.4804568703345314, + "grad_norm": 1.1946557528724813, + "learning_rate": 1.1116685302297467e-05, + "loss": 0.4397, + "step": 27805 + }, + { + "epoch": 0.4804741498479403, + "grad_norm": 0.8094098844633487, + "learning_rate": 1.1116129151338177e-05, + "loss": 0.3668, + "step": 27806 + }, + { + "epoch": 0.4804914293613492, + "grad_norm": 1.8448957942733362, + "learning_rate": 1.1115572996883088e-05, + "loss": 0.4513, + "step": 27807 + }, + { + "epoch": 0.4805087088747581, + "grad_norm": 1.4468822181385406, + "learning_rate": 1.1115016838933935e-05, + "loss": 0.4457, + "step": 27808 + }, + { + "epoch": 0.480525988388167, + "grad_norm": 0.8806112193901078, + "learning_rate": 1.1114460677492466e-05, + "loss": 0.4234, + "step": 27809 + }, + { + "epoch": 0.48054326790157587, + "grad_norm": 1.2510368425601175, + "learning_rate": 1.1113904512560422e-05, + "loss": 0.513, + "step": 27810 + }, + { + "epoch": 0.4805605474149848, + "grad_norm": 1.1254372426579389, + "learning_rate": 1.1113348344139543e-05, + "loss": 0.5433, + "step": 27811 + }, + { + "epoch": 0.4805778269283937, + "grad_norm": 1.1531492047517555, + "learning_rate": 1.1112792172231574e-05, + "loss": 0.39, + "step": 27812 + }, + { + "epoch": 0.4805951064418026, + "grad_norm": 1.0339938264531565, + "learning_rate": 1.1112235996838255e-05, + "loss": 0.6036, + "step": 27813 + }, + { + "epoch": 0.4806123859552115, + "grad_norm": 0.9880094461749146, + "learning_rate": 1.1111679817961328e-05, + "loss": 0.379, + "step": 27814 + }, + { + "epoch": 0.4806296654686204, + "grad_norm": 1.0531919634321942, + "learning_rate": 1.1111123635602535e-05, + "loss": 0.4594, + "step": 27815 + }, + { + "epoch": 0.4806469449820293, + "grad_norm": 1.2044667892965468, + "learning_rate": 1.1110567449763619e-05, + "loss": 0.3324, + "step": 27816 + }, + { + "epoch": 0.4806642244954382, + "grad_norm": 1.0509512084669859, + "learning_rate": 1.1110011260446323e-05, + "loss": 0.332, + "step": 27817 + }, + { + "epoch": 0.4806815040088471, + "grad_norm": 0.7673785839840707, + "learning_rate": 1.1109455067652384e-05, + "loss": 0.3971, + "step": 27818 + }, + { + "epoch": 0.48069878352225603, + "grad_norm": 0.9719732691647263, + "learning_rate": 1.110889887138355e-05, + "loss": 0.3988, + "step": 27819 + }, + { + "epoch": 0.48071606303566494, + "grad_norm": 0.9921504565638284, + "learning_rate": 1.1108342671641558e-05, + "loss": 0.4433, + "step": 27820 + }, + { + "epoch": 0.4807333425490738, + "grad_norm": 0.9926287156622448, + "learning_rate": 1.1107786468428155e-05, + "loss": 0.3038, + "step": 27821 + }, + { + "epoch": 0.4807506220624827, + "grad_norm": 0.9741912612544564, + "learning_rate": 1.1107230261745076e-05, + "loss": 0.2799, + "step": 27822 + }, + { + "epoch": 0.4807679015758916, + "grad_norm": 0.6039622678118068, + "learning_rate": 1.1106674051594073e-05, + "loss": 0.3089, + "step": 27823 + }, + { + "epoch": 0.4807851810893005, + "grad_norm": 0.7709025535421611, + "learning_rate": 1.1106117837976883e-05, + "loss": 0.4012, + "step": 27824 + }, + { + "epoch": 0.4808024606027094, + "grad_norm": 1.8260858256073735, + "learning_rate": 1.1105561620895243e-05, + "loss": 0.565, + "step": 27825 + }, + { + "epoch": 0.48081974011611833, + "grad_norm": 1.0085462195595658, + "learning_rate": 1.1105005400350903e-05, + "loss": 0.4733, + "step": 27826 + }, + { + "epoch": 0.48083701962952724, + "grad_norm": 1.0941142960830368, + "learning_rate": 1.1104449176345603e-05, + "loss": 0.3818, + "step": 27827 + }, + { + "epoch": 0.48085429914293615, + "grad_norm": 1.1693850064926525, + "learning_rate": 1.1103892948881084e-05, + "loss": 0.3294, + "step": 27828 + }, + { + "epoch": 0.48087157865634506, + "grad_norm": 0.6443257488208235, + "learning_rate": 1.1103336717959088e-05, + "loss": 0.3529, + "step": 27829 + }, + { + "epoch": 0.48088885816975396, + "grad_norm": 1.1400161253573668, + "learning_rate": 1.1102780483581357e-05, + "loss": 0.488, + "step": 27830 + }, + { + "epoch": 0.4809061376831628, + "grad_norm": 1.3997089177818944, + "learning_rate": 1.1102224245749631e-05, + "loss": 0.5142, + "step": 27831 + }, + { + "epoch": 0.4809234171965717, + "grad_norm": 0.8910922604392403, + "learning_rate": 1.110166800446566e-05, + "loss": 0.4598, + "step": 27832 + }, + { + "epoch": 0.48094069670998063, + "grad_norm": 1.6449619242644056, + "learning_rate": 1.1101111759731179e-05, + "loss": 0.4598, + "step": 27833 + }, + { + "epoch": 0.48095797622338954, + "grad_norm": 1.3461827833986217, + "learning_rate": 1.110055551154793e-05, + "loss": 0.4231, + "step": 27834 + }, + { + "epoch": 0.48097525573679845, + "grad_norm": 0.78139604044982, + "learning_rate": 1.109999925991766e-05, + "loss": 0.423, + "step": 27835 + }, + { + "epoch": 0.48099253525020735, + "grad_norm": 0.4272982323496539, + "learning_rate": 1.109944300484211e-05, + "loss": 0.5145, + "step": 27836 + }, + { + "epoch": 0.48100981476361626, + "grad_norm": 0.7199003216875888, + "learning_rate": 1.1098886746323017e-05, + "loss": 0.4546, + "step": 27837 + }, + { + "epoch": 0.48102709427702517, + "grad_norm": 1.3048186638961456, + "learning_rate": 1.1098330484362132e-05, + "loss": 0.6274, + "step": 27838 + }, + { + "epoch": 0.4810443737904341, + "grad_norm": 0.8979721833718374, + "learning_rate": 1.1097774218961188e-05, + "loss": 0.4889, + "step": 27839 + }, + { + "epoch": 0.481061653303843, + "grad_norm": 1.117990725123732, + "learning_rate": 1.1097217950121935e-05, + "loss": 0.3248, + "step": 27840 + }, + { + "epoch": 0.4810789328172519, + "grad_norm": 1.124524187121184, + "learning_rate": 1.109666167784611e-05, + "loss": 0.5373, + "step": 27841 + }, + { + "epoch": 0.48109621233066074, + "grad_norm": 0.7817382094715308, + "learning_rate": 1.1096105402135457e-05, + "loss": 0.4585, + "step": 27842 + }, + { + "epoch": 0.48111349184406965, + "grad_norm": 1.0089106030253976, + "learning_rate": 1.109554912299172e-05, + "loss": 0.4645, + "step": 27843 + }, + { + "epoch": 0.48113077135747856, + "grad_norm": 1.5940188754310125, + "learning_rate": 1.109499284041664e-05, + "loss": 0.4254, + "step": 27844 + }, + { + "epoch": 0.48114805087088747, + "grad_norm": 1.0791564289197202, + "learning_rate": 1.1094436554411956e-05, + "loss": 0.5962, + "step": 27845 + }, + { + "epoch": 0.4811653303842964, + "grad_norm": 0.9492984546014233, + "learning_rate": 1.1093880264979417e-05, + "loss": 0.4282, + "step": 27846 + }, + { + "epoch": 0.4811826098977053, + "grad_norm": 1.4473612863108358, + "learning_rate": 1.1093323972120763e-05, + "loss": 0.5494, + "step": 27847 + }, + { + "epoch": 0.4811998894111142, + "grad_norm": 1.6757295819698317, + "learning_rate": 1.1092767675837732e-05, + "loss": 0.4662, + "step": 27848 + }, + { + "epoch": 0.4812171689245231, + "grad_norm": 1.0055128560323627, + "learning_rate": 1.1092211376132072e-05, + "loss": 0.3226, + "step": 27849 + }, + { + "epoch": 0.481234448437932, + "grad_norm": 0.6218295968515009, + "learning_rate": 1.1091655073005524e-05, + "loss": 0.4806, + "step": 27850 + }, + { + "epoch": 0.4812517279513409, + "grad_norm": 0.42072769263263776, + "learning_rate": 1.1091098766459823e-05, + "loss": 0.6655, + "step": 27851 + }, + { + "epoch": 0.48126900746474977, + "grad_norm": 0.6935864774804719, + "learning_rate": 1.1090542456496723e-05, + "loss": 0.2428, + "step": 27852 + }, + { + "epoch": 0.4812862869781587, + "grad_norm": 1.2817092337882812, + "learning_rate": 1.1089986143117962e-05, + "loss": 0.6312, + "step": 27853 + }, + { + "epoch": 0.4813035664915676, + "grad_norm": 1.3108173475995988, + "learning_rate": 1.1089429826325277e-05, + "loss": 0.4507, + "step": 27854 + }, + { + "epoch": 0.4813208460049765, + "grad_norm": 0.8628923749696412, + "learning_rate": 1.1088873506120417e-05, + "loss": 0.3304, + "step": 27855 + }, + { + "epoch": 0.4813381255183854, + "grad_norm": 0.7576944204654533, + "learning_rate": 1.1088317182505124e-05, + "loss": 0.3722, + "step": 27856 + }, + { + "epoch": 0.4813554050317943, + "grad_norm": 0.9869296082819202, + "learning_rate": 1.1087760855481135e-05, + "loss": 0.3677, + "step": 27857 + }, + { + "epoch": 0.4813726845452032, + "grad_norm": 0.8309886063365152, + "learning_rate": 1.1087204525050202e-05, + "loss": 0.7705, + "step": 27858 + }, + { + "epoch": 0.4813899640586121, + "grad_norm": 1.062131363604484, + "learning_rate": 1.1086648191214057e-05, + "loss": 0.3239, + "step": 27859 + }, + { + "epoch": 0.481407243572021, + "grad_norm": 0.9804820304076208, + "learning_rate": 1.1086091853974453e-05, + "loss": 0.4342, + "step": 27860 + }, + { + "epoch": 0.48142452308542993, + "grad_norm": 1.0654045903226856, + "learning_rate": 1.1085535513333122e-05, + "loss": 0.3903, + "step": 27861 + }, + { + "epoch": 0.48144180259883884, + "grad_norm": 1.0197753615427412, + "learning_rate": 1.108497916929181e-05, + "loss": 0.2966, + "step": 27862 + }, + { + "epoch": 0.4814590821122477, + "grad_norm": 1.598633324045345, + "learning_rate": 1.1084422821852266e-05, + "loss": 0.2911, + "step": 27863 + }, + { + "epoch": 0.4814763616256566, + "grad_norm": 1.3628513421522739, + "learning_rate": 1.1083866471016222e-05, + "loss": 0.4893, + "step": 27864 + }, + { + "epoch": 0.4814936411390655, + "grad_norm": 1.0224068749317192, + "learning_rate": 1.1083310116785428e-05, + "loss": 0.5449, + "step": 27865 + }, + { + "epoch": 0.4815109206524744, + "grad_norm": 1.312084827820693, + "learning_rate": 1.1082753759161627e-05, + "loss": 0.5314, + "step": 27866 + }, + { + "epoch": 0.4815282001658833, + "grad_norm": 1.0952164633299344, + "learning_rate": 1.1082197398146555e-05, + "loss": 0.5027, + "step": 27867 + }, + { + "epoch": 0.48154547967929223, + "grad_norm": 1.5729556916780667, + "learning_rate": 1.1081641033741959e-05, + "loss": 0.5845, + "step": 27868 + }, + { + "epoch": 0.48156275919270114, + "grad_norm": 1.4774239772184787, + "learning_rate": 1.1081084665949584e-05, + "loss": 0.5308, + "step": 27869 + }, + { + "epoch": 0.48158003870611005, + "grad_norm": 0.7109549598463369, + "learning_rate": 1.1080528294771169e-05, + "loss": 0.5141, + "step": 27870 + }, + { + "epoch": 0.48159731821951896, + "grad_norm": 0.857950628039423, + "learning_rate": 1.1079971920208454e-05, + "loss": 0.4058, + "step": 27871 + }, + { + "epoch": 0.48161459773292786, + "grad_norm": 0.6028303325689623, + "learning_rate": 1.1079415542263188e-05, + "loss": 0.6914, + "step": 27872 + }, + { + "epoch": 0.48163187724633677, + "grad_norm": 0.800103540769668, + "learning_rate": 1.107885916093711e-05, + "loss": 0.3633, + "step": 27873 + }, + { + "epoch": 0.4816491567597456, + "grad_norm": 0.4570349352626204, + "learning_rate": 1.1078302776231961e-05, + "loss": 0.544, + "step": 27874 + }, + { + "epoch": 0.48166643627315453, + "grad_norm": 0.9095766762541075, + "learning_rate": 1.1077746388149489e-05, + "loss": 0.5134, + "step": 27875 + }, + { + "epoch": 0.48168371578656344, + "grad_norm": 1.257293888109467, + "learning_rate": 1.107718999669143e-05, + "loss": 0.2931, + "step": 27876 + }, + { + "epoch": 0.48170099529997235, + "grad_norm": 0.785490528259272, + "learning_rate": 1.1076633601859532e-05, + "loss": 0.3572, + "step": 27877 + }, + { + "epoch": 0.48171827481338125, + "grad_norm": 0.5928618471955864, + "learning_rate": 1.1076077203655536e-05, + "loss": 0.5762, + "step": 27878 + }, + { + "epoch": 0.48173555432679016, + "grad_norm": 1.071044439003865, + "learning_rate": 1.1075520802081184e-05, + "loss": 0.4705, + "step": 27879 + }, + { + "epoch": 0.48175283384019907, + "grad_norm": 1.1324109779390623, + "learning_rate": 1.1074964397138219e-05, + "loss": 0.5776, + "step": 27880 + }, + { + "epoch": 0.481770113353608, + "grad_norm": 0.7620831333826503, + "learning_rate": 1.1074407988828386e-05, + "loss": 0.3171, + "step": 27881 + }, + { + "epoch": 0.4817873928670169, + "grad_norm": 1.36378832472911, + "learning_rate": 1.107385157715342e-05, + "loss": 0.2292, + "step": 27882 + }, + { + "epoch": 0.4818046723804258, + "grad_norm": 0.8702218697179622, + "learning_rate": 1.1073295162115075e-05, + "loss": 0.3944, + "step": 27883 + }, + { + "epoch": 0.48182195189383464, + "grad_norm": 0.8403723460603695, + "learning_rate": 1.1072738743715086e-05, + "loss": 0.5345, + "step": 27884 + }, + { + "epoch": 0.48183923140724355, + "grad_norm": 0.4583737298655311, + "learning_rate": 1.1072182321955197e-05, + "loss": 0.5999, + "step": 27885 + }, + { + "epoch": 0.48185651092065246, + "grad_norm": 0.9234477724266861, + "learning_rate": 1.107162589683715e-05, + "loss": 0.4097, + "step": 27886 + }, + { + "epoch": 0.48187379043406137, + "grad_norm": 1.2045846654721692, + "learning_rate": 1.1071069468362692e-05, + "loss": 0.3679, + "step": 27887 + }, + { + "epoch": 0.4818910699474703, + "grad_norm": 1.150513067109674, + "learning_rate": 1.1070513036533562e-05, + "loss": 0.4354, + "step": 27888 + }, + { + "epoch": 0.4819083494608792, + "grad_norm": 0.9165595909470124, + "learning_rate": 1.1069956601351504e-05, + "loss": 0.3977, + "step": 27889 + }, + { + "epoch": 0.4819256289742881, + "grad_norm": 0.8074974332777192, + "learning_rate": 1.1069400162818262e-05, + "loss": 0.5332, + "step": 27890 + }, + { + "epoch": 0.481942908487697, + "grad_norm": 0.7312860855150953, + "learning_rate": 1.1068843720935577e-05, + "loss": 0.2613, + "step": 27891 + }, + { + "epoch": 0.4819601880011059, + "grad_norm": 1.015123303012454, + "learning_rate": 1.1068287275705189e-05, + "loss": 0.4468, + "step": 27892 + }, + { + "epoch": 0.4819774675145148, + "grad_norm": 0.4577112769439001, + "learning_rate": 1.1067730827128846e-05, + "loss": 0.4768, + "step": 27893 + }, + { + "epoch": 0.4819947470279237, + "grad_norm": 0.9449641645376945, + "learning_rate": 1.106717437520829e-05, + "loss": 0.4566, + "step": 27894 + }, + { + "epoch": 0.48201202654133257, + "grad_norm": 0.8510955470097546, + "learning_rate": 1.1066617919945262e-05, + "loss": 0.4615, + "step": 27895 + }, + { + "epoch": 0.4820293060547415, + "grad_norm": 0.9276417311909186, + "learning_rate": 1.1066061461341506e-05, + "loss": 0.5436, + "step": 27896 + }, + { + "epoch": 0.4820465855681504, + "grad_norm": 0.9173509410817184, + "learning_rate": 1.1065504999398762e-05, + "loss": 0.4427, + "step": 27897 + }, + { + "epoch": 0.4820638650815593, + "grad_norm": 1.4184499543875517, + "learning_rate": 1.1064948534118778e-05, + "loss": 0.4695, + "step": 27898 + }, + { + "epoch": 0.4820811445949682, + "grad_norm": 0.7071186883893964, + "learning_rate": 1.1064392065503294e-05, + "loss": 0.3557, + "step": 27899 + }, + { + "epoch": 0.4820984241083771, + "grad_norm": 0.5555279042811145, + "learning_rate": 1.1063835593554053e-05, + "loss": 0.9706, + "step": 27900 + }, + { + "epoch": 0.482115703621786, + "grad_norm": 1.032625119049019, + "learning_rate": 1.1063279118272798e-05, + "loss": 0.3356, + "step": 27901 + }, + { + "epoch": 0.4821329831351949, + "grad_norm": 0.7970390065933968, + "learning_rate": 1.1062722639661272e-05, + "loss": 0.3634, + "step": 27902 + }, + { + "epoch": 0.48215026264860383, + "grad_norm": 1.0847568973590591, + "learning_rate": 1.1062166157721217e-05, + "loss": 0.4398, + "step": 27903 + }, + { + "epoch": 0.48216754216201274, + "grad_norm": 0.680537375816927, + "learning_rate": 1.1061609672454379e-05, + "loss": 0.5961, + "step": 27904 + }, + { + "epoch": 0.4821848216754216, + "grad_norm": 0.5342858624542671, + "learning_rate": 1.1061053183862495e-05, + "loss": 0.7679, + "step": 27905 + }, + { + "epoch": 0.4822021011888305, + "grad_norm": 1.0072053866363486, + "learning_rate": 1.1060496691947316e-05, + "loss": 0.4763, + "step": 27906 + }, + { + "epoch": 0.4822193807022394, + "grad_norm": 0.9184114860270285, + "learning_rate": 1.105994019671058e-05, + "loss": 0.443, + "step": 27907 + }, + { + "epoch": 0.4822366602156483, + "grad_norm": 1.0888299221205708, + "learning_rate": 1.1059383698154026e-05, + "loss": 0.5234, + "step": 27908 + }, + { + "epoch": 0.4822539397290572, + "grad_norm": 0.8513877266378843, + "learning_rate": 1.1058827196279406e-05, + "loss": 0.4974, + "step": 27909 + }, + { + "epoch": 0.48227121924246613, + "grad_norm": 1.4102576545813488, + "learning_rate": 1.1058270691088464e-05, + "loss": 0.4025, + "step": 27910 + }, + { + "epoch": 0.48228849875587504, + "grad_norm": 1.1713905515718714, + "learning_rate": 1.105771418258293e-05, + "loss": 0.4831, + "step": 27911 + }, + { + "epoch": 0.48230577826928395, + "grad_norm": 1.0256216278246166, + "learning_rate": 1.1057157670764558e-05, + "loss": 0.4785, + "step": 27912 + }, + { + "epoch": 0.48232305778269285, + "grad_norm": 1.4537360071417174, + "learning_rate": 1.1056601155635087e-05, + "loss": 0.4353, + "step": 27913 + }, + { + "epoch": 0.48234033729610176, + "grad_norm": 1.1685431863281777, + "learning_rate": 1.105604463719626e-05, + "loss": 0.4449, + "step": 27914 + }, + { + "epoch": 0.48235761680951067, + "grad_norm": 0.9081441745699997, + "learning_rate": 1.105548811544982e-05, + "loss": 0.4581, + "step": 27915 + }, + { + "epoch": 0.4823748963229195, + "grad_norm": 0.9905893134260744, + "learning_rate": 1.1054931590397514e-05, + "loss": 0.5635, + "step": 27916 + }, + { + "epoch": 0.48239217583632843, + "grad_norm": 0.7873176023940645, + "learning_rate": 1.1054375062041078e-05, + "loss": 0.4715, + "step": 27917 + }, + { + "epoch": 0.48240945534973734, + "grad_norm": 1.0363448934792878, + "learning_rate": 1.1053818530382263e-05, + "loss": 0.4188, + "step": 27918 + }, + { + "epoch": 0.48242673486314624, + "grad_norm": 0.8117737622568325, + "learning_rate": 1.1053261995422808e-05, + "loss": 0.3308, + "step": 27919 + }, + { + "epoch": 0.48244401437655515, + "grad_norm": 0.7803128036784857, + "learning_rate": 1.1052705457164456e-05, + "loss": 0.3869, + "step": 27920 + }, + { + "epoch": 0.48246129388996406, + "grad_norm": 1.1868606117498623, + "learning_rate": 1.1052148915608952e-05, + "loss": 0.4049, + "step": 27921 + }, + { + "epoch": 0.48247857340337297, + "grad_norm": 0.7080286789074498, + "learning_rate": 1.1051592370758033e-05, + "loss": 0.375, + "step": 27922 + }, + { + "epoch": 0.4824958529167819, + "grad_norm": 1.0148101069781612, + "learning_rate": 1.105103582261345e-05, + "loss": 0.334, + "step": 27923 + }, + { + "epoch": 0.4825131324301908, + "grad_norm": 1.6975941374698238, + "learning_rate": 1.1050479271176942e-05, + "loss": 0.5628, + "step": 27924 + }, + { + "epoch": 0.4825304119435997, + "grad_norm": 0.646457112336139, + "learning_rate": 1.1049922716450253e-05, + "loss": 0.8065, + "step": 27925 + }, + { + "epoch": 0.48254769145700854, + "grad_norm": 0.9558725380129113, + "learning_rate": 1.1049366158435126e-05, + "loss": 0.4626, + "step": 27926 + }, + { + "epoch": 0.48256497097041745, + "grad_norm": 0.9220621842858802, + "learning_rate": 1.1048809597133306e-05, + "loss": 0.2705, + "step": 27927 + }, + { + "epoch": 0.48258225048382636, + "grad_norm": 0.5860718652190722, + "learning_rate": 1.1048253032546532e-05, + "loss": 0.3552, + "step": 27928 + }, + { + "epoch": 0.48259952999723527, + "grad_norm": 0.9942387345438818, + "learning_rate": 1.1047696464676552e-05, + "loss": 0.4373, + "step": 27929 + }, + { + "epoch": 0.4826168095106442, + "grad_norm": 0.9919742678513336, + "learning_rate": 1.1047139893525106e-05, + "loss": 0.5872, + "step": 27930 + }, + { + "epoch": 0.4826340890240531, + "grad_norm": 1.1528306495438267, + "learning_rate": 1.104658331909394e-05, + "loss": 0.6976, + "step": 27931 + }, + { + "epoch": 0.482651368537462, + "grad_norm": 0.8814085423747299, + "learning_rate": 1.1046026741384796e-05, + "loss": 0.3657, + "step": 27932 + }, + { + "epoch": 0.4826686480508709, + "grad_norm": 0.7563314759033215, + "learning_rate": 1.1045470160399416e-05, + "loss": 0.3276, + "step": 27933 + }, + { + "epoch": 0.4826859275642798, + "grad_norm": 1.4052979604719105, + "learning_rate": 1.104491357613954e-05, + "loss": 0.366, + "step": 27934 + }, + { + "epoch": 0.4827032070776887, + "grad_norm": 0.5529701476997729, + "learning_rate": 1.1044356988606919e-05, + "loss": 0.5431, + "step": 27935 + }, + { + "epoch": 0.4827204865910976, + "grad_norm": 1.3816523061443986, + "learning_rate": 1.1043800397803292e-05, + "loss": 0.4408, + "step": 27936 + }, + { + "epoch": 0.48273776610450647, + "grad_norm": 0.9375116491990817, + "learning_rate": 1.1043243803730404e-05, + "loss": 0.5031, + "step": 27937 + }, + { + "epoch": 0.4827550456179154, + "grad_norm": 1.399870404151388, + "learning_rate": 1.1042687206389994e-05, + "loss": 0.5406, + "step": 27938 + }, + { + "epoch": 0.4827723251313243, + "grad_norm": 0.6051780280701831, + "learning_rate": 1.1042130605783813e-05, + "loss": 0.4673, + "step": 27939 + }, + { + "epoch": 0.4827896046447332, + "grad_norm": 0.9701313201541273, + "learning_rate": 1.1041574001913595e-05, + "loss": 0.7233, + "step": 27940 + }, + { + "epoch": 0.4828068841581421, + "grad_norm": 0.749318326427468, + "learning_rate": 1.1041017394781092e-05, + "loss": 0.4371, + "step": 27941 + }, + { + "epoch": 0.482824163671551, + "grad_norm": 1.1379093631772685, + "learning_rate": 1.1040460784388042e-05, + "loss": 0.3576, + "step": 27942 + }, + { + "epoch": 0.4828414431849599, + "grad_norm": 0.8593726691641166, + "learning_rate": 1.1039904170736189e-05, + "loss": 0.2948, + "step": 27943 + }, + { + "epoch": 0.4828587226983688, + "grad_norm": 0.7500807835493692, + "learning_rate": 1.1039347553827278e-05, + "loss": 0.3214, + "step": 27944 + }, + { + "epoch": 0.48287600221177773, + "grad_norm": 0.9979686726236182, + "learning_rate": 1.1038790933663048e-05, + "loss": 0.523, + "step": 27945 + }, + { + "epoch": 0.48289328172518664, + "grad_norm": 1.0891076002825681, + "learning_rate": 1.103823431024525e-05, + "loss": 0.4399, + "step": 27946 + }, + { + "epoch": 0.48291056123859555, + "grad_norm": 1.1923388993974688, + "learning_rate": 1.1037677683575622e-05, + "loss": 0.3454, + "step": 27947 + }, + { + "epoch": 0.4829278407520044, + "grad_norm": 1.3651088522640789, + "learning_rate": 1.1037121053655907e-05, + "loss": 0.5384, + "step": 27948 + }, + { + "epoch": 0.4829451202654133, + "grad_norm": 1.3564087057629144, + "learning_rate": 1.1036564420487852e-05, + "loss": 0.6446, + "step": 27949 + }, + { + "epoch": 0.4829623997788222, + "grad_norm": 0.8534820321477743, + "learning_rate": 1.10360077840732e-05, + "loss": 0.432, + "step": 27950 + }, + { + "epoch": 0.4829796792922311, + "grad_norm": 0.5922449162099916, + "learning_rate": 1.1035451144413692e-05, + "loss": 0.8505, + "step": 27951 + }, + { + "epoch": 0.48299695880564003, + "grad_norm": 0.9487968465911825, + "learning_rate": 1.1034894501511071e-05, + "loss": 0.3703, + "step": 27952 + }, + { + "epoch": 0.48301423831904894, + "grad_norm": 0.5927111626823304, + "learning_rate": 1.1034337855367084e-05, + "loss": 0.3981, + "step": 27953 + }, + { + "epoch": 0.48303151783245785, + "grad_norm": 1.3839035036226088, + "learning_rate": 1.1033781205983467e-05, + "loss": 0.5643, + "step": 27954 + }, + { + "epoch": 0.48304879734586675, + "grad_norm": 1.1712093837639026, + "learning_rate": 1.1033224553361975e-05, + "loss": 0.2857, + "step": 27955 + }, + { + "epoch": 0.48306607685927566, + "grad_norm": 0.9757613143004574, + "learning_rate": 1.103266789750434e-05, + "loss": 0.4177, + "step": 27956 + }, + { + "epoch": 0.48308335637268457, + "grad_norm": 0.7636538294511312, + "learning_rate": 1.1032111238412313e-05, + "loss": 0.3799, + "step": 27957 + }, + { + "epoch": 0.4831006358860934, + "grad_norm": 0.9254227468972275, + "learning_rate": 1.1031554576087635e-05, + "loss": 0.3929, + "step": 27958 + }, + { + "epoch": 0.48311791539950233, + "grad_norm": 1.3361934802455, + "learning_rate": 1.1030997910532051e-05, + "loss": 0.3656, + "step": 27959 + }, + { + "epoch": 0.48313519491291124, + "grad_norm": 1.6093992074227124, + "learning_rate": 1.10304412417473e-05, + "loss": 0.5036, + "step": 27960 + }, + { + "epoch": 0.48315247442632014, + "grad_norm": 1.9649774483489777, + "learning_rate": 1.102988456973513e-05, + "loss": 0.5054, + "step": 27961 + }, + { + "epoch": 0.48316975393972905, + "grad_norm": 1.0202567421305162, + "learning_rate": 1.1029327894497286e-05, + "loss": 0.4875, + "step": 27962 + }, + { + "epoch": 0.48318703345313796, + "grad_norm": 0.9116344067861571, + "learning_rate": 1.1028771216035508e-05, + "loss": 0.4352, + "step": 27963 + }, + { + "epoch": 0.48320431296654687, + "grad_norm": 0.7632903445844876, + "learning_rate": 1.1028214534351538e-05, + "loss": 0.3165, + "step": 27964 + }, + { + "epoch": 0.4832215924799558, + "grad_norm": 0.630233234790419, + "learning_rate": 1.1027657849447122e-05, + "loss": 0.2663, + "step": 27965 + }, + { + "epoch": 0.4832388719933647, + "grad_norm": 0.5354784381003793, + "learning_rate": 1.1027101161324005e-05, + "loss": 0.7448, + "step": 27966 + }, + { + "epoch": 0.4832561515067736, + "grad_norm": 0.814414963842159, + "learning_rate": 1.102654446998393e-05, + "loss": 0.4459, + "step": 27967 + }, + { + "epoch": 0.4832734310201825, + "grad_norm": 0.8414033179439826, + "learning_rate": 1.1025987775428634e-05, + "loss": 0.3962, + "step": 27968 + }, + { + "epoch": 0.48329071053359135, + "grad_norm": 1.0389902373566646, + "learning_rate": 1.1025431077659875e-05, + "loss": 0.4654, + "step": 27969 + }, + { + "epoch": 0.48330799004700026, + "grad_norm": 0.9769563878749147, + "learning_rate": 1.1024874376679383e-05, + "loss": 0.3186, + "step": 27970 + }, + { + "epoch": 0.48332526956040917, + "grad_norm": 0.9502040595706175, + "learning_rate": 1.1024317672488905e-05, + "loss": 0.3543, + "step": 27971 + }, + { + "epoch": 0.4833425490738181, + "grad_norm": 1.025784877749636, + "learning_rate": 1.102376096509019e-05, + "loss": 0.3572, + "step": 27972 + }, + { + "epoch": 0.483359828587227, + "grad_norm": 0.8170868274595989, + "learning_rate": 1.1023204254484978e-05, + "loss": 0.3653, + "step": 27973 + }, + { + "epoch": 0.4833771081006359, + "grad_norm": 0.9456855462962378, + "learning_rate": 1.1022647540675008e-05, + "loss": 0.5811, + "step": 27974 + }, + { + "epoch": 0.4833943876140448, + "grad_norm": 0.82306079740027, + "learning_rate": 1.1022090823662033e-05, + "loss": 0.5319, + "step": 27975 + }, + { + "epoch": 0.4834116671274537, + "grad_norm": 1.2773324735517355, + "learning_rate": 1.1021534103447789e-05, + "loss": 0.4526, + "step": 27976 + }, + { + "epoch": 0.4834289466408626, + "grad_norm": 1.0697707865170785, + "learning_rate": 1.102097738003402e-05, + "loss": 0.5548, + "step": 27977 + }, + { + "epoch": 0.4834462261542715, + "grad_norm": 0.7957946642824846, + "learning_rate": 1.102042065342248e-05, + "loss": 0.4304, + "step": 27978 + }, + { + "epoch": 0.48346350566768037, + "grad_norm": 1.1920990901890685, + "learning_rate": 1.10198639236149e-05, + "loss": 0.5942, + "step": 27979 + }, + { + "epoch": 0.4834807851810893, + "grad_norm": 0.931934467152933, + "learning_rate": 1.1019307190613027e-05, + "loss": 0.5261, + "step": 27980 + }, + { + "epoch": 0.4834980646944982, + "grad_norm": 0.9680563541344145, + "learning_rate": 1.1018750454418609e-05, + "loss": 0.4293, + "step": 27981 + }, + { + "epoch": 0.4835153442079071, + "grad_norm": 0.9560732073922112, + "learning_rate": 1.1018193715033387e-05, + "loss": 0.4128, + "step": 27982 + }, + { + "epoch": 0.483532623721316, + "grad_norm": 0.855213512417128, + "learning_rate": 1.1017636972459105e-05, + "loss": 0.59, + "step": 27983 + }, + { + "epoch": 0.4835499032347249, + "grad_norm": 0.9175934116792929, + "learning_rate": 1.1017080226697508e-05, + "loss": 0.4398, + "step": 27984 + }, + { + "epoch": 0.4835671827481338, + "grad_norm": 1.1068907769903618, + "learning_rate": 1.1016523477750336e-05, + "loss": 0.4228, + "step": 27985 + }, + { + "epoch": 0.4835844622615427, + "grad_norm": 0.6960166328598534, + "learning_rate": 1.1015966725619336e-05, + "loss": 0.2974, + "step": 27986 + }, + { + "epoch": 0.48360174177495163, + "grad_norm": 1.0586257365663077, + "learning_rate": 1.1015409970306252e-05, + "loss": 0.3765, + "step": 27987 + }, + { + "epoch": 0.48361902128836054, + "grad_norm": 0.7026798046607499, + "learning_rate": 1.1014853211812823e-05, + "loss": 0.5146, + "step": 27988 + }, + { + "epoch": 0.48363630080176945, + "grad_norm": 0.9532638000299948, + "learning_rate": 1.10142964501408e-05, + "loss": 0.343, + "step": 27989 + }, + { + "epoch": 0.4836535803151783, + "grad_norm": 0.7553258160975014, + "learning_rate": 1.1013739685291924e-05, + "loss": 0.5348, + "step": 27990 + }, + { + "epoch": 0.4836708598285872, + "grad_norm": 0.7290358043484598, + "learning_rate": 1.1013182917267936e-05, + "loss": 0.339, + "step": 27991 + }, + { + "epoch": 0.4836881393419961, + "grad_norm": 1.0822379135647275, + "learning_rate": 1.1012626146070584e-05, + "loss": 0.3667, + "step": 27992 + }, + { + "epoch": 0.483705418855405, + "grad_norm": 0.8094540774246856, + "learning_rate": 1.1012069371701611e-05, + "loss": 0.4336, + "step": 27993 + }, + { + "epoch": 0.48372269836881393, + "grad_norm": 0.9209275482874688, + "learning_rate": 1.1011512594162756e-05, + "loss": 0.4659, + "step": 27994 + }, + { + "epoch": 0.48373997788222284, + "grad_norm": 0.9434310744810824, + "learning_rate": 1.101095581345577e-05, + "loss": 0.3981, + "step": 27995 + }, + { + "epoch": 0.48375725739563175, + "grad_norm": 0.9154158668154491, + "learning_rate": 1.1010399029582393e-05, + "loss": 0.3161, + "step": 27996 + }, + { + "epoch": 0.48377453690904065, + "grad_norm": 0.8615452671924475, + "learning_rate": 1.1009842242544367e-05, + "loss": 0.3837, + "step": 27997 + }, + { + "epoch": 0.48379181642244956, + "grad_norm": 0.761621193956676, + "learning_rate": 1.1009285452343441e-05, + "loss": 0.3796, + "step": 27998 + }, + { + "epoch": 0.48380909593585847, + "grad_norm": 0.9038060038748451, + "learning_rate": 1.1008728658981355e-05, + "loss": 0.427, + "step": 27999 + }, + { + "epoch": 0.4838263754492674, + "grad_norm": 1.1276656818956179, + "learning_rate": 1.1008171862459853e-05, + "loss": 0.3441, + "step": 28000 + }, + { + "epoch": 0.48384365496267623, + "grad_norm": 1.0356865256682057, + "learning_rate": 1.1007615062780683e-05, + "loss": 0.3276, + "step": 28001 + }, + { + "epoch": 0.48386093447608514, + "grad_norm": 0.8104713064197168, + "learning_rate": 1.1007058259945584e-05, + "loss": 0.1873, + "step": 28002 + }, + { + "epoch": 0.48387821398949404, + "grad_norm": 0.8981953906587844, + "learning_rate": 1.1006501453956305e-05, + "loss": 0.3468, + "step": 28003 + }, + { + "epoch": 0.48389549350290295, + "grad_norm": 1.3348311624111215, + "learning_rate": 1.1005944644814583e-05, + "loss": 0.4271, + "step": 28004 + }, + { + "epoch": 0.48391277301631186, + "grad_norm": 0.8662338851502644, + "learning_rate": 1.1005387832522167e-05, + "loss": 0.5353, + "step": 28005 + }, + { + "epoch": 0.48393005252972077, + "grad_norm": 1.2493242061951866, + "learning_rate": 1.1004831017080802e-05, + "loss": 0.4828, + "step": 28006 + }, + { + "epoch": 0.4839473320431297, + "grad_norm": 1.2264514393448582, + "learning_rate": 1.1004274198492228e-05, + "loss": 0.4078, + "step": 28007 + }, + { + "epoch": 0.4839646115565386, + "grad_norm": 0.8456727405361477, + "learning_rate": 1.1003717376758189e-05, + "loss": 0.4077, + "step": 28008 + }, + { + "epoch": 0.4839818910699475, + "grad_norm": 0.9243383033722906, + "learning_rate": 1.1003160551880435e-05, + "loss": 0.3841, + "step": 28009 + }, + { + "epoch": 0.4839991705833564, + "grad_norm": 1.2163931672542083, + "learning_rate": 1.1002603723860703e-05, + "loss": 0.5478, + "step": 28010 + }, + { + "epoch": 0.48401645009676525, + "grad_norm": 1.0618250661378978, + "learning_rate": 1.100204689270074e-05, + "loss": 0.5807, + "step": 28011 + }, + { + "epoch": 0.48403372961017416, + "grad_norm": 0.9545627530376907, + "learning_rate": 1.100149005840229e-05, + "loss": 0.3661, + "step": 28012 + }, + { + "epoch": 0.48405100912358306, + "grad_norm": 1.0120159759341185, + "learning_rate": 1.1000933220967103e-05, + "loss": 0.5324, + "step": 28013 + }, + { + "epoch": 0.484068288636992, + "grad_norm": 1.0098465875092257, + "learning_rate": 1.100037638039691e-05, + "loss": 0.4025, + "step": 28014 + }, + { + "epoch": 0.4840855681504009, + "grad_norm": 1.374233722462415, + "learning_rate": 1.0999819536693463e-05, + "loss": 0.3373, + "step": 28015 + }, + { + "epoch": 0.4841028476638098, + "grad_norm": 1.1641022756213855, + "learning_rate": 1.0999262689858507e-05, + "loss": 0.4651, + "step": 28016 + }, + { + "epoch": 0.4841201271772187, + "grad_norm": 1.4789042985248968, + "learning_rate": 1.0998705839893785e-05, + "loss": 0.4177, + "step": 28017 + }, + { + "epoch": 0.4841374066906276, + "grad_norm": 1.4016959551343542, + "learning_rate": 1.0998148986801038e-05, + "loss": 0.5893, + "step": 28018 + }, + { + "epoch": 0.4841546862040365, + "grad_norm": 1.0498895806875153, + "learning_rate": 1.0997592130582015e-05, + "loss": 0.3388, + "step": 28019 + }, + { + "epoch": 0.4841719657174454, + "grad_norm": 0.8632517534565685, + "learning_rate": 1.0997035271238455e-05, + "loss": 0.3566, + "step": 28020 + }, + { + "epoch": 0.4841892452308543, + "grad_norm": 1.0097906464929336, + "learning_rate": 1.0996478408772107e-05, + "loss": 0.3011, + "step": 28021 + }, + { + "epoch": 0.4842065247442632, + "grad_norm": 1.001758154246297, + "learning_rate": 1.099592154318471e-05, + "loss": 0.3898, + "step": 28022 + }, + { + "epoch": 0.4842238042576721, + "grad_norm": 1.2820634042381216, + "learning_rate": 1.0995364674478016e-05, + "loss": 0.3653, + "step": 28023 + }, + { + "epoch": 0.484241083771081, + "grad_norm": 1.1022860204195892, + "learning_rate": 1.0994807802653762e-05, + "loss": 0.4409, + "step": 28024 + }, + { + "epoch": 0.4842583632844899, + "grad_norm": 1.0006392473601664, + "learning_rate": 1.099425092771369e-05, + "loss": 0.4634, + "step": 28025 + }, + { + "epoch": 0.4842756427978988, + "grad_norm": 0.9182029484044516, + "learning_rate": 1.0993694049659554e-05, + "loss": 0.4396, + "step": 28026 + }, + { + "epoch": 0.4842929223113077, + "grad_norm": 0.7497802529457243, + "learning_rate": 1.0993137168493091e-05, + "loss": 0.5084, + "step": 28027 + }, + { + "epoch": 0.4843102018247166, + "grad_norm": 0.9839807930425819, + "learning_rate": 1.0992580284216046e-05, + "loss": 0.3814, + "step": 28028 + }, + { + "epoch": 0.48432748133812553, + "grad_norm": 1.3075440331279269, + "learning_rate": 1.0992023396830166e-05, + "loss": 0.5586, + "step": 28029 + }, + { + "epoch": 0.48434476085153444, + "grad_norm": 0.6038374078266231, + "learning_rate": 1.0991466506337192e-05, + "loss": 0.3032, + "step": 28030 + }, + { + "epoch": 0.48436204036494335, + "grad_norm": 1.0413564538900437, + "learning_rate": 1.0990909612738867e-05, + "loss": 0.5363, + "step": 28031 + }, + { + "epoch": 0.4843793198783522, + "grad_norm": 1.027153278845879, + "learning_rate": 1.099035271603694e-05, + "loss": 0.3193, + "step": 28032 + }, + { + "epoch": 0.4843965993917611, + "grad_norm": 0.9448699449271001, + "learning_rate": 1.0989795816233155e-05, + "loss": 0.4329, + "step": 28033 + }, + { + "epoch": 0.48441387890517, + "grad_norm": 1.1974256236993734, + "learning_rate": 1.0989238913329254e-05, + "loss": 0.5001, + "step": 28034 + }, + { + "epoch": 0.4844311584185789, + "grad_norm": 1.5774994872191768, + "learning_rate": 1.098868200732698e-05, + "loss": 0.4966, + "step": 28035 + }, + { + "epoch": 0.48444843793198783, + "grad_norm": 0.6301735342750322, + "learning_rate": 1.0988125098228079e-05, + "loss": 0.2781, + "step": 28036 + }, + { + "epoch": 0.48446571744539674, + "grad_norm": 0.7050524622195692, + "learning_rate": 1.0987568186034294e-05, + "loss": 0.317, + "step": 28037 + }, + { + "epoch": 0.48448299695880565, + "grad_norm": 0.9175075503147515, + "learning_rate": 1.0987011270747375e-05, + "loss": 0.2803, + "step": 28038 + }, + { + "epoch": 0.48450027647221455, + "grad_norm": 0.7472374803140076, + "learning_rate": 1.0986454352369058e-05, + "loss": 0.4457, + "step": 28039 + }, + { + "epoch": 0.48451755598562346, + "grad_norm": 0.8269501083389507, + "learning_rate": 1.098589743090109e-05, + "loss": 0.3932, + "step": 28040 + }, + { + "epoch": 0.48453483549903237, + "grad_norm": 1.2954071909134026, + "learning_rate": 1.0985340506345217e-05, + "loss": 0.3293, + "step": 28041 + }, + { + "epoch": 0.4845521150124413, + "grad_norm": 0.9057844203079165, + "learning_rate": 1.0984783578703185e-05, + "loss": 0.3161, + "step": 28042 + }, + { + "epoch": 0.48456939452585013, + "grad_norm": 0.8814908611475737, + "learning_rate": 1.0984226647976732e-05, + "loss": 0.2403, + "step": 28043 + }, + { + "epoch": 0.48458667403925904, + "grad_norm": 1.4597082237319854, + "learning_rate": 1.0983669714167611e-05, + "loss": 0.5282, + "step": 28044 + }, + { + "epoch": 0.48460395355266794, + "grad_norm": 0.5986768891241568, + "learning_rate": 1.0983112777277557e-05, + "loss": 0.2529, + "step": 28045 + }, + { + "epoch": 0.48462123306607685, + "grad_norm": 1.8024945941483683, + "learning_rate": 1.0982555837308322e-05, + "loss": 0.5619, + "step": 28046 + }, + { + "epoch": 0.48463851257948576, + "grad_norm": 0.6232757573072583, + "learning_rate": 1.098199889426165e-05, + "loss": 0.4842, + "step": 28047 + }, + { + "epoch": 0.48465579209289467, + "grad_norm": 0.9310758324239874, + "learning_rate": 1.0981441948139277e-05, + "loss": 0.3109, + "step": 28048 + }, + { + "epoch": 0.4846730716063036, + "grad_norm": 0.6284821274877587, + "learning_rate": 1.0980884998942956e-05, + "loss": 1.098, + "step": 28049 + }, + { + "epoch": 0.4846903511197125, + "grad_norm": 1.1931449391524325, + "learning_rate": 1.098032804667443e-05, + "loss": 0.341, + "step": 28050 + }, + { + "epoch": 0.4847076306331214, + "grad_norm": 1.2215655982419606, + "learning_rate": 1.0979771091335437e-05, + "loss": 0.4548, + "step": 28051 + }, + { + "epoch": 0.4847249101465303, + "grad_norm": 0.5282973148129453, + "learning_rate": 1.0979214132927731e-05, + "loss": 0.4799, + "step": 28052 + }, + { + "epoch": 0.48474218965993915, + "grad_norm": 0.8897585694404463, + "learning_rate": 1.0978657171453053e-05, + "loss": 0.5176, + "step": 28053 + }, + { + "epoch": 0.48475946917334806, + "grad_norm": 1.0936696810882884, + "learning_rate": 1.0978100206913145e-05, + "loss": 0.3066, + "step": 28054 + }, + { + "epoch": 0.48477674868675696, + "grad_norm": 0.7594743895725078, + "learning_rate": 1.097754323930975e-05, + "loss": 0.4124, + "step": 28055 + }, + { + "epoch": 0.48479402820016587, + "grad_norm": 1.463319538136732, + "learning_rate": 1.097698626864462e-05, + "loss": 0.3089, + "step": 28056 + }, + { + "epoch": 0.4848113077135748, + "grad_norm": 0.6660859516860744, + "learning_rate": 1.0976429294919491e-05, + "loss": 0.2972, + "step": 28057 + }, + { + "epoch": 0.4848285872269837, + "grad_norm": 1.2483481125194216, + "learning_rate": 1.0975872318136113e-05, + "loss": 0.5027, + "step": 28058 + }, + { + "epoch": 0.4848458667403926, + "grad_norm": 1.2482736844424835, + "learning_rate": 1.0975315338296229e-05, + "loss": 0.2925, + "step": 28059 + }, + { + "epoch": 0.4848631462538015, + "grad_norm": 0.7612368713649669, + "learning_rate": 1.0974758355401581e-05, + "loss": 0.4134, + "step": 28060 + }, + { + "epoch": 0.4848804257672104, + "grad_norm": 0.738731085771111, + "learning_rate": 1.0974201369453918e-05, + "loss": 0.3025, + "step": 28061 + }, + { + "epoch": 0.4848977052806193, + "grad_norm": 0.5298997012598985, + "learning_rate": 1.0973644380454982e-05, + "loss": 0.82, + "step": 28062 + }, + { + "epoch": 0.4849149847940282, + "grad_norm": 0.7175222511671019, + "learning_rate": 1.0973087388406515e-05, + "loss": 0.3177, + "step": 28063 + }, + { + "epoch": 0.4849322643074371, + "grad_norm": 1.6500676614154413, + "learning_rate": 1.0972530393310268e-05, + "loss": 0.3697, + "step": 28064 + }, + { + "epoch": 0.484949543820846, + "grad_norm": 1.0944026953155568, + "learning_rate": 1.097197339516798e-05, + "loss": 0.4424, + "step": 28065 + }, + { + "epoch": 0.4849668233342549, + "grad_norm": 0.8173455780434357, + "learning_rate": 1.09714163939814e-05, + "loss": 0.3324, + "step": 28066 + }, + { + "epoch": 0.4849841028476638, + "grad_norm": 1.127952361910788, + "learning_rate": 1.0970859389752268e-05, + "loss": 0.4857, + "step": 28067 + }, + { + "epoch": 0.4850013823610727, + "grad_norm": 0.5410670456788944, + "learning_rate": 1.097030238248233e-05, + "loss": 0.6951, + "step": 28068 + }, + { + "epoch": 0.4850186618744816, + "grad_norm": 1.153017079490804, + "learning_rate": 1.096974537217333e-05, + "loss": 0.4551, + "step": 28069 + }, + { + "epoch": 0.4850359413878905, + "grad_norm": 0.8640521347958898, + "learning_rate": 1.0969188358827016e-05, + "loss": 0.526, + "step": 28070 + }, + { + "epoch": 0.48505322090129943, + "grad_norm": 1.5108576617004863, + "learning_rate": 1.0968631342445131e-05, + "loss": 0.5622, + "step": 28071 + }, + { + "epoch": 0.48507050041470834, + "grad_norm": 0.9065358284027605, + "learning_rate": 1.0968074323029418e-05, + "loss": 0.4697, + "step": 28072 + }, + { + "epoch": 0.48508777992811725, + "grad_norm": 0.5588344529231355, + "learning_rate": 1.0967517300581623e-05, + "loss": 0.6888, + "step": 28073 + }, + { + "epoch": 0.48510505944152615, + "grad_norm": 0.7614407673025309, + "learning_rate": 1.0966960275103492e-05, + "loss": 0.5517, + "step": 28074 + }, + { + "epoch": 0.485122338954935, + "grad_norm": 1.0128787233482752, + "learning_rate": 1.0966403246596763e-05, + "loss": 0.414, + "step": 28075 + }, + { + "epoch": 0.4851396184683439, + "grad_norm": 0.7313591871175054, + "learning_rate": 1.0965846215063192e-05, + "loss": 0.3927, + "step": 28076 + }, + { + "epoch": 0.4851568979817528, + "grad_norm": 1.0563679286673948, + "learning_rate": 1.096528918050451e-05, + "loss": 0.4714, + "step": 28077 + }, + { + "epoch": 0.48517417749516173, + "grad_norm": 1.6339409421907851, + "learning_rate": 1.0964732142922474e-05, + "loss": 0.3364, + "step": 28078 + }, + { + "epoch": 0.48519145700857064, + "grad_norm": 0.8188939499610275, + "learning_rate": 1.0964175102318823e-05, + "loss": 0.4311, + "step": 28079 + }, + { + "epoch": 0.48520873652197954, + "grad_norm": 1.4741806605047842, + "learning_rate": 1.09636180586953e-05, + "loss": 0.5558, + "step": 28080 + }, + { + "epoch": 0.48522601603538845, + "grad_norm": 1.0811627979980312, + "learning_rate": 1.0963061012053655e-05, + "loss": 0.4784, + "step": 28081 + }, + { + "epoch": 0.48524329554879736, + "grad_norm": 1.0335240668474661, + "learning_rate": 1.0962503962395628e-05, + "loss": 0.5038, + "step": 28082 + }, + { + "epoch": 0.48526057506220627, + "grad_norm": 1.2132466732389597, + "learning_rate": 1.0961946909722965e-05, + "loss": 0.4725, + "step": 28083 + }, + { + "epoch": 0.4852778545756152, + "grad_norm": 0.6833303906122582, + "learning_rate": 1.0961389854037411e-05, + "loss": 0.3013, + "step": 28084 + }, + { + "epoch": 0.485295134089024, + "grad_norm": 0.7484392772974799, + "learning_rate": 1.0960832795340715e-05, + "loss": 0.3679, + "step": 28085 + }, + { + "epoch": 0.48531241360243293, + "grad_norm": 0.42865022553178794, + "learning_rate": 1.0960275733634613e-05, + "loss": 0.5949, + "step": 28086 + }, + { + "epoch": 0.48532969311584184, + "grad_norm": 0.44581198683166096, + "learning_rate": 1.0959718668920858e-05, + "loss": 0.5971, + "step": 28087 + }, + { + "epoch": 0.48534697262925075, + "grad_norm": 0.7396195336524918, + "learning_rate": 1.0959161601201187e-05, + "loss": 0.5579, + "step": 28088 + }, + { + "epoch": 0.48536425214265966, + "grad_norm": 1.7371446729300932, + "learning_rate": 1.0958604530477351e-05, + "loss": 0.4968, + "step": 28089 + }, + { + "epoch": 0.48538153165606857, + "grad_norm": 0.8026247077756656, + "learning_rate": 1.0958047456751093e-05, + "loss": 0.4294, + "step": 28090 + }, + { + "epoch": 0.4853988111694775, + "grad_norm": 0.9335349203716294, + "learning_rate": 1.0957490380024156e-05, + "loss": 0.2635, + "step": 28091 + }, + { + "epoch": 0.4854160906828864, + "grad_norm": 0.9903122698952214, + "learning_rate": 1.095693330029829e-05, + "loss": 0.4722, + "step": 28092 + }, + { + "epoch": 0.4854333701962953, + "grad_norm": 0.7394092236543005, + "learning_rate": 1.0956376217575234e-05, + "loss": 0.5513, + "step": 28093 + }, + { + "epoch": 0.4854506497097042, + "grad_norm": 1.416947186129628, + "learning_rate": 1.0955819131856736e-05, + "loss": 0.5092, + "step": 28094 + }, + { + "epoch": 0.4854679292231131, + "grad_norm": 1.0084804308776172, + "learning_rate": 1.0955262043144537e-05, + "loss": 0.5115, + "step": 28095 + }, + { + "epoch": 0.48548520873652196, + "grad_norm": 1.240788101547891, + "learning_rate": 1.095470495144039e-05, + "loss": 0.421, + "step": 28096 + }, + { + "epoch": 0.48550248824993086, + "grad_norm": 1.151966659838268, + "learning_rate": 1.095414785674603e-05, + "loss": 0.4512, + "step": 28097 + }, + { + "epoch": 0.48551976776333977, + "grad_norm": 0.8747475201411081, + "learning_rate": 1.0953590759063206e-05, + "loss": 0.3785, + "step": 28098 + }, + { + "epoch": 0.4855370472767487, + "grad_norm": 1.6184745995573564, + "learning_rate": 1.0953033658393665e-05, + "loss": 0.3984, + "step": 28099 + }, + { + "epoch": 0.4855543267901576, + "grad_norm": 1.2087937829721642, + "learning_rate": 1.0952476554739148e-05, + "loss": 0.3369, + "step": 28100 + }, + { + "epoch": 0.4855716063035665, + "grad_norm": 0.996979958287152, + "learning_rate": 1.0951919448101405e-05, + "loss": 0.3924, + "step": 28101 + }, + { + "epoch": 0.4855888858169754, + "grad_norm": 1.0845087935319142, + "learning_rate": 1.0951362338482178e-05, + "loss": 0.4371, + "step": 28102 + }, + { + "epoch": 0.4856061653303843, + "grad_norm": 0.6400219906830273, + "learning_rate": 1.095080522588321e-05, + "loss": 0.4596, + "step": 28103 + }, + { + "epoch": 0.4856234448437932, + "grad_norm": 1.1112214464177819, + "learning_rate": 1.0950248110306248e-05, + "loss": 0.3965, + "step": 28104 + }, + { + "epoch": 0.4856407243572021, + "grad_norm": 0.7497594006369755, + "learning_rate": 1.0949690991753036e-05, + "loss": 0.4373, + "step": 28105 + }, + { + "epoch": 0.485658003870611, + "grad_norm": 1.3035048523448496, + "learning_rate": 1.0949133870225323e-05, + "loss": 0.4102, + "step": 28106 + }, + { + "epoch": 0.4856752833840199, + "grad_norm": 0.7291999556530994, + "learning_rate": 1.0948576745724849e-05, + "loss": 0.4997, + "step": 28107 + }, + { + "epoch": 0.4856925628974288, + "grad_norm": 0.787074352799759, + "learning_rate": 1.0948019618253358e-05, + "loss": 0.3926, + "step": 28108 + }, + { + "epoch": 0.4857098424108377, + "grad_norm": 0.4057342469703395, + "learning_rate": 1.09474624878126e-05, + "loss": 0.789, + "step": 28109 + }, + { + "epoch": 0.4857271219242466, + "grad_norm": 1.1166240846789348, + "learning_rate": 1.0946905354404318e-05, + "loss": 0.4897, + "step": 28110 + }, + { + "epoch": 0.4857444014376555, + "grad_norm": 0.9544174514490192, + "learning_rate": 1.0946348218030253e-05, + "loss": 0.5471, + "step": 28111 + }, + { + "epoch": 0.4857616809510644, + "grad_norm": 1.058900256912138, + "learning_rate": 1.094579107869216e-05, + "loss": 0.4955, + "step": 28112 + }, + { + "epoch": 0.48577896046447333, + "grad_norm": 0.8358649743613483, + "learning_rate": 1.094523393639177e-05, + "loss": 0.459, + "step": 28113 + }, + { + "epoch": 0.48579623997788224, + "grad_norm": 1.201069573347141, + "learning_rate": 1.0944676791130841e-05, + "loss": 0.6563, + "step": 28114 + }, + { + "epoch": 0.48581351949129115, + "grad_norm": 1.2461343605600999, + "learning_rate": 1.0944119642911108e-05, + "loss": 0.3947, + "step": 28115 + }, + { + "epoch": 0.48583079900470005, + "grad_norm": 0.8339058056132396, + "learning_rate": 1.0943562491734326e-05, + "loss": 0.4777, + "step": 28116 + }, + { + "epoch": 0.4858480785181089, + "grad_norm": 0.7992011704670812, + "learning_rate": 1.094300533760223e-05, + "loss": 0.4351, + "step": 28117 + }, + { + "epoch": 0.4858653580315178, + "grad_norm": 0.9022900198205402, + "learning_rate": 1.0942448180516572e-05, + "loss": 0.6448, + "step": 28118 + }, + { + "epoch": 0.4858826375449267, + "grad_norm": 0.528859594113551, + "learning_rate": 1.0941891020479095e-05, + "loss": 0.7234, + "step": 28119 + }, + { + "epoch": 0.48589991705833563, + "grad_norm": 1.0928342182666437, + "learning_rate": 1.094133385749154e-05, + "loss": 0.4092, + "step": 28120 + }, + { + "epoch": 0.48591719657174454, + "grad_norm": 0.9022830314179368, + "learning_rate": 1.0940776691555658e-05, + "loss": 0.6773, + "step": 28121 + }, + { + "epoch": 0.48593447608515344, + "grad_norm": 1.1872898668757412, + "learning_rate": 1.0940219522673192e-05, + "loss": 0.465, + "step": 28122 + }, + { + "epoch": 0.48595175559856235, + "grad_norm": 0.6957442220716188, + "learning_rate": 1.0939662350845887e-05, + "loss": 0.2422, + "step": 28123 + }, + { + "epoch": 0.48596903511197126, + "grad_norm": 0.8423249109971499, + "learning_rate": 1.0939105176075489e-05, + "loss": 0.5361, + "step": 28124 + }, + { + "epoch": 0.48598631462538017, + "grad_norm": 0.6939038092201919, + "learning_rate": 1.0938547998363742e-05, + "loss": 0.3363, + "step": 28125 + }, + { + "epoch": 0.4860035941387891, + "grad_norm": 1.2245840848340288, + "learning_rate": 1.0937990817712392e-05, + "loss": 0.4317, + "step": 28126 + }, + { + "epoch": 0.4860208736521979, + "grad_norm": 1.1907375160653226, + "learning_rate": 1.0937433634123183e-05, + "loss": 0.3228, + "step": 28127 + }, + { + "epoch": 0.48603815316560683, + "grad_norm": 1.205707242894107, + "learning_rate": 1.0936876447597857e-05, + "loss": 0.4198, + "step": 28128 + }, + { + "epoch": 0.48605543267901574, + "grad_norm": 0.9935985320315698, + "learning_rate": 1.0936319258138165e-05, + "loss": 0.3208, + "step": 28129 + }, + { + "epoch": 0.48607271219242465, + "grad_norm": 0.7005595032171005, + "learning_rate": 1.0935762065745853e-05, + "loss": 0.5369, + "step": 28130 + }, + { + "epoch": 0.48608999170583356, + "grad_norm": 0.900110544289923, + "learning_rate": 1.093520487042266e-05, + "loss": 0.493, + "step": 28131 + }, + { + "epoch": 0.48610727121924246, + "grad_norm": 0.9760544919566602, + "learning_rate": 1.0934647672170336e-05, + "loss": 0.3612, + "step": 28132 + }, + { + "epoch": 0.4861245507326514, + "grad_norm": 0.9848066965978758, + "learning_rate": 1.093409047099062e-05, + "loss": 0.3223, + "step": 28133 + }, + { + "epoch": 0.4861418302460603, + "grad_norm": 0.7462013300378595, + "learning_rate": 1.0933533266885269e-05, + "loss": 0.5989, + "step": 28134 + }, + { + "epoch": 0.4861591097594692, + "grad_norm": 1.1730926286808383, + "learning_rate": 1.0932976059856015e-05, + "loss": 0.4615, + "step": 28135 + }, + { + "epoch": 0.4861763892728781, + "grad_norm": 0.7529393270624282, + "learning_rate": 1.093241884990461e-05, + "loss": 0.4099, + "step": 28136 + }, + { + "epoch": 0.486193668786287, + "grad_norm": 0.816290707182247, + "learning_rate": 1.0931861637032801e-05, + "loss": 0.3309, + "step": 28137 + }, + { + "epoch": 0.48621094829969586, + "grad_norm": 0.4654752780132879, + "learning_rate": 1.093130442124233e-05, + "loss": 0.8929, + "step": 28138 + }, + { + "epoch": 0.48622822781310476, + "grad_norm": 1.4905217322267466, + "learning_rate": 1.0930747202534941e-05, + "loss": 0.3221, + "step": 28139 + }, + { + "epoch": 0.48624550732651367, + "grad_norm": 0.5310505233473263, + "learning_rate": 1.093018998091238e-05, + "loss": 0.5402, + "step": 28140 + }, + { + "epoch": 0.4862627868399226, + "grad_norm": 1.17762109778098, + "learning_rate": 1.0929632756376395e-05, + "loss": 0.5273, + "step": 28141 + }, + { + "epoch": 0.4862800663533315, + "grad_norm": 0.6930284296362924, + "learning_rate": 1.0929075528928731e-05, + "loss": 0.2983, + "step": 28142 + }, + { + "epoch": 0.4862973458667404, + "grad_norm": 1.4665002109768381, + "learning_rate": 1.092851829857113e-05, + "loss": 0.5309, + "step": 28143 + }, + { + "epoch": 0.4863146253801493, + "grad_norm": 1.3078421748409965, + "learning_rate": 1.0927961065305338e-05, + "loss": 0.5608, + "step": 28144 + }, + { + "epoch": 0.4863319048935582, + "grad_norm": 0.9495625803120944, + "learning_rate": 1.0927403829133104e-05, + "loss": 0.4319, + "step": 28145 + }, + { + "epoch": 0.4863491844069671, + "grad_norm": 0.9039437055960092, + "learning_rate": 1.0926846590056168e-05, + "loss": 0.3825, + "step": 28146 + }, + { + "epoch": 0.486366463920376, + "grad_norm": 0.7784580202535486, + "learning_rate": 1.0926289348076284e-05, + "loss": 0.2762, + "step": 28147 + }, + { + "epoch": 0.48638374343378493, + "grad_norm": 0.9125934771490462, + "learning_rate": 1.0925732103195186e-05, + "loss": 0.4979, + "step": 28148 + }, + { + "epoch": 0.4864010229471938, + "grad_norm": 0.7911615514894262, + "learning_rate": 1.0925174855414623e-05, + "loss": 0.3373, + "step": 28149 + }, + { + "epoch": 0.4864183024606027, + "grad_norm": 0.7936593035487631, + "learning_rate": 1.0924617604736347e-05, + "loss": 0.3464, + "step": 28150 + }, + { + "epoch": 0.4864355819740116, + "grad_norm": 1.028728061227843, + "learning_rate": 1.0924060351162096e-05, + "loss": 0.305, + "step": 28151 + }, + { + "epoch": 0.4864528614874205, + "grad_norm": 0.7408146224226906, + "learning_rate": 1.0923503094693618e-05, + "loss": 0.9147, + "step": 28152 + }, + { + "epoch": 0.4864701410008294, + "grad_norm": 1.1121884360097343, + "learning_rate": 1.0922945835332659e-05, + "loss": 0.6337, + "step": 28153 + }, + { + "epoch": 0.4864874205142383, + "grad_norm": 0.8656603490675039, + "learning_rate": 1.0922388573080962e-05, + "loss": 0.3989, + "step": 28154 + }, + { + "epoch": 0.48650470002764723, + "grad_norm": 1.1139143178724777, + "learning_rate": 1.0921831307940274e-05, + "loss": 0.5258, + "step": 28155 + }, + { + "epoch": 0.48652197954105614, + "grad_norm": 1.4514064510205402, + "learning_rate": 1.092127403991234e-05, + "loss": 0.452, + "step": 28156 + }, + { + "epoch": 0.48653925905446505, + "grad_norm": 0.9285294737669358, + "learning_rate": 1.0920716768998906e-05, + "loss": 0.3078, + "step": 28157 + }, + { + "epoch": 0.48655653856787395, + "grad_norm": 1.453926965611016, + "learning_rate": 1.092015949520172e-05, + "loss": 0.3427, + "step": 28158 + }, + { + "epoch": 0.4865738180812828, + "grad_norm": 0.7778395818241687, + "learning_rate": 1.0919602218522522e-05, + "loss": 0.315, + "step": 28159 + }, + { + "epoch": 0.4865910975946917, + "grad_norm": 1.1245853628373936, + "learning_rate": 1.091904493896306e-05, + "loss": 0.3429, + "step": 28160 + }, + { + "epoch": 0.4866083771081006, + "grad_norm": 1.3715972863697468, + "learning_rate": 1.0918487656525078e-05, + "loss": 0.5483, + "step": 28161 + }, + { + "epoch": 0.48662565662150953, + "grad_norm": 0.9309716475575264, + "learning_rate": 1.0917930371210325e-05, + "loss": 0.4204, + "step": 28162 + }, + { + "epoch": 0.48664293613491844, + "grad_norm": 1.3962024168682636, + "learning_rate": 1.0917373083020545e-05, + "loss": 0.324, + "step": 28163 + }, + { + "epoch": 0.48666021564832734, + "grad_norm": 0.7143022942397441, + "learning_rate": 1.091681579195748e-05, + "loss": 0.3636, + "step": 28164 + }, + { + "epoch": 0.48667749516173625, + "grad_norm": 1.3435501918332042, + "learning_rate": 1.091625849802288e-05, + "loss": 0.5644, + "step": 28165 + }, + { + "epoch": 0.48669477467514516, + "grad_norm": 1.4617955264949172, + "learning_rate": 1.0915701201218488e-05, + "loss": 0.292, + "step": 28166 + }, + { + "epoch": 0.48671205418855407, + "grad_norm": 0.8414783503699627, + "learning_rate": 1.091514390154605e-05, + "loss": 0.509, + "step": 28167 + }, + { + "epoch": 0.486729333701963, + "grad_norm": 0.8096466978913776, + "learning_rate": 1.0914586599007313e-05, + "loss": 0.457, + "step": 28168 + }, + { + "epoch": 0.4867466132153719, + "grad_norm": 1.0158575288502143, + "learning_rate": 1.091402929360402e-05, + "loss": 0.4788, + "step": 28169 + }, + { + "epoch": 0.48676389272878073, + "grad_norm": 0.8337464183972935, + "learning_rate": 1.091347198533792e-05, + "loss": 0.252, + "step": 28170 + }, + { + "epoch": 0.48678117224218964, + "grad_norm": 0.8116734561774498, + "learning_rate": 1.0912914674210753e-05, + "loss": 0.4039, + "step": 28171 + }, + { + "epoch": 0.48679845175559855, + "grad_norm": 0.9233744264941578, + "learning_rate": 1.0912357360224271e-05, + "loss": 0.5573, + "step": 28172 + }, + { + "epoch": 0.48681573126900746, + "grad_norm": 1.3640550225658388, + "learning_rate": 1.0911800043380216e-05, + "loss": 0.5687, + "step": 28173 + }, + { + "epoch": 0.48683301078241636, + "grad_norm": 0.8161733595331434, + "learning_rate": 1.0911242723680333e-05, + "loss": 0.3838, + "step": 28174 + }, + { + "epoch": 0.48685029029582527, + "grad_norm": 1.1231304391461234, + "learning_rate": 1.0910685401126367e-05, + "loss": 0.3895, + "step": 28175 + }, + { + "epoch": 0.4868675698092342, + "grad_norm": 1.1894601425995224, + "learning_rate": 1.0910128075720067e-05, + "loss": 0.5246, + "step": 28176 + }, + { + "epoch": 0.4868848493226431, + "grad_norm": 1.2616628679202777, + "learning_rate": 1.0909570747463177e-05, + "loss": 0.3891, + "step": 28177 + }, + { + "epoch": 0.486902128836052, + "grad_norm": 0.7921642461458032, + "learning_rate": 1.0909013416357443e-05, + "loss": 0.5266, + "step": 28178 + }, + { + "epoch": 0.4869194083494609, + "grad_norm": 0.9318991356190248, + "learning_rate": 1.0908456082404609e-05, + "loss": 0.2805, + "step": 28179 + }, + { + "epoch": 0.48693668786286975, + "grad_norm": 1.1883030627838262, + "learning_rate": 1.090789874560642e-05, + "loss": 0.4778, + "step": 28180 + }, + { + "epoch": 0.48695396737627866, + "grad_norm": 1.3444336845718357, + "learning_rate": 1.0907341405964625e-05, + "loss": 0.6347, + "step": 28181 + }, + { + "epoch": 0.48697124688968757, + "grad_norm": 0.9745090966921557, + "learning_rate": 1.0906784063480967e-05, + "loss": 0.4909, + "step": 28182 + }, + { + "epoch": 0.4869885264030965, + "grad_norm": 1.396490001821435, + "learning_rate": 1.090622671815719e-05, + "loss": 0.5848, + "step": 28183 + }, + { + "epoch": 0.4870058059165054, + "grad_norm": 0.7913374327840744, + "learning_rate": 1.0905669369995045e-05, + "loss": 0.3801, + "step": 28184 + }, + { + "epoch": 0.4870230854299143, + "grad_norm": 0.5629919416888486, + "learning_rate": 1.0905112018996275e-05, + "loss": 0.7684, + "step": 28185 + }, + { + "epoch": 0.4870403649433232, + "grad_norm": 0.6479005112699348, + "learning_rate": 1.0904554665162623e-05, + "loss": 0.3473, + "step": 28186 + }, + { + "epoch": 0.4870576444567321, + "grad_norm": 0.8762182630214099, + "learning_rate": 1.090399730849584e-05, + "loss": 0.4904, + "step": 28187 + }, + { + "epoch": 0.487074923970141, + "grad_norm": 0.8926094305520899, + "learning_rate": 1.0903439948997667e-05, + "loss": 0.5253, + "step": 28188 + }, + { + "epoch": 0.4870922034835499, + "grad_norm": 0.9495459388297504, + "learning_rate": 1.0902882586669852e-05, + "loss": 0.4531, + "step": 28189 + }, + { + "epoch": 0.48710948299695883, + "grad_norm": 0.9100231294805359, + "learning_rate": 1.0902325221514141e-05, + "loss": 0.384, + "step": 28190 + }, + { + "epoch": 0.4871267625103677, + "grad_norm": 1.270873526494813, + "learning_rate": 1.0901767853532276e-05, + "loss": 0.6759, + "step": 28191 + }, + { + "epoch": 0.4871440420237766, + "grad_norm": 0.6537726062003126, + "learning_rate": 1.0901210482726008e-05, + "loss": 0.4548, + "step": 28192 + }, + { + "epoch": 0.4871613215371855, + "grad_norm": 1.145380355834073, + "learning_rate": 1.090065310909708e-05, + "loss": 0.4968, + "step": 28193 + }, + { + "epoch": 0.4871786010505944, + "grad_norm": 1.068525180640396, + "learning_rate": 1.0900095732647238e-05, + "loss": 0.4898, + "step": 28194 + }, + { + "epoch": 0.4871958805640033, + "grad_norm": 1.0215963392136291, + "learning_rate": 1.0899538353378226e-05, + "loss": 0.5189, + "step": 28195 + }, + { + "epoch": 0.4872131600774122, + "grad_norm": 1.0739970359003148, + "learning_rate": 1.0898980971291793e-05, + "loss": 0.3602, + "step": 28196 + }, + { + "epoch": 0.48723043959082113, + "grad_norm": 1.0773534385711916, + "learning_rate": 1.0898423586389683e-05, + "loss": 0.3552, + "step": 28197 + }, + { + "epoch": 0.48724771910423004, + "grad_norm": 0.9548411115404449, + "learning_rate": 1.0897866198673643e-05, + "loss": 0.3763, + "step": 28198 + }, + { + "epoch": 0.48726499861763894, + "grad_norm": 1.0164521229237042, + "learning_rate": 1.0897308808145417e-05, + "loss": 0.5652, + "step": 28199 + }, + { + "epoch": 0.48728227813104785, + "grad_norm": 0.8625104120752202, + "learning_rate": 1.089675141480675e-05, + "loss": 0.4926, + "step": 28200 + }, + { + "epoch": 0.4872995576444567, + "grad_norm": 0.9213629292863068, + "learning_rate": 1.0896194018659392e-05, + "loss": 0.4701, + "step": 28201 + }, + { + "epoch": 0.4873168371578656, + "grad_norm": 0.9561296322220515, + "learning_rate": 1.0895636619705086e-05, + "loss": 0.4342, + "step": 28202 + }, + { + "epoch": 0.4873341166712745, + "grad_norm": 0.9283976635191306, + "learning_rate": 1.0895079217945575e-05, + "loss": 0.4279, + "step": 28203 + }, + { + "epoch": 0.4873513961846834, + "grad_norm": 0.7988630016547443, + "learning_rate": 1.089452181338261e-05, + "loss": 0.3386, + "step": 28204 + }, + { + "epoch": 0.48736867569809234, + "grad_norm": 1.2122540322848312, + "learning_rate": 1.0893964406017936e-05, + "loss": 0.348, + "step": 28205 + }, + { + "epoch": 0.48738595521150124, + "grad_norm": 0.7930757703295672, + "learning_rate": 1.0893406995853295e-05, + "loss": 0.4902, + "step": 28206 + }, + { + "epoch": 0.48740323472491015, + "grad_norm": 0.8863541804012269, + "learning_rate": 1.0892849582890436e-05, + "loss": 0.3548, + "step": 28207 + }, + { + "epoch": 0.48742051423831906, + "grad_norm": 0.8239774010515575, + "learning_rate": 1.0892292167131107e-05, + "loss": 0.5251, + "step": 28208 + }, + { + "epoch": 0.48743779375172797, + "grad_norm": 1.148542370790334, + "learning_rate": 1.089173474857705e-05, + "loss": 0.363, + "step": 28209 + }, + { + "epoch": 0.4874550732651369, + "grad_norm": 1.0534263478692474, + "learning_rate": 1.0891177327230012e-05, + "loss": 0.3736, + "step": 28210 + }, + { + "epoch": 0.4874723527785458, + "grad_norm": 1.3219086103366846, + "learning_rate": 1.0890619903091736e-05, + "loss": 0.4852, + "step": 28211 + }, + { + "epoch": 0.48748963229195463, + "grad_norm": 1.835622556762567, + "learning_rate": 1.0890062476163975e-05, + "loss": 0.4382, + "step": 28212 + }, + { + "epoch": 0.48750691180536354, + "grad_norm": 1.1049523491787123, + "learning_rate": 1.0889505046448468e-05, + "loss": 0.3877, + "step": 28213 + }, + { + "epoch": 0.48752419131877245, + "grad_norm": 0.778177682794994, + "learning_rate": 1.0888947613946963e-05, + "loss": 0.5071, + "step": 28214 + }, + { + "epoch": 0.48754147083218136, + "grad_norm": 0.8684867908191596, + "learning_rate": 1.0888390178661205e-05, + "loss": 0.3474, + "step": 28215 + }, + { + "epoch": 0.48755875034559026, + "grad_norm": 2.057730631849154, + "learning_rate": 1.0887832740592946e-05, + "loss": 0.4536, + "step": 28216 + }, + { + "epoch": 0.48757602985899917, + "grad_norm": 0.7651415985426014, + "learning_rate": 1.0887275299743926e-05, + "loss": 0.3376, + "step": 28217 + }, + { + "epoch": 0.4875933093724081, + "grad_norm": 0.8743639783126684, + "learning_rate": 1.088671785611589e-05, + "loss": 0.5492, + "step": 28218 + }, + { + "epoch": 0.487610588885817, + "grad_norm": 1.3682229195325557, + "learning_rate": 1.088616040971059e-05, + "loss": 0.2056, + "step": 28219 + }, + { + "epoch": 0.4876278683992259, + "grad_norm": 1.0019253220486244, + "learning_rate": 1.0885602960529767e-05, + "loss": 0.5229, + "step": 28220 + }, + { + "epoch": 0.4876451479126348, + "grad_norm": 0.9820344869500137, + "learning_rate": 1.0885045508575168e-05, + "loss": 0.4251, + "step": 28221 + }, + { + "epoch": 0.4876624274260437, + "grad_norm": 0.778417875799942, + "learning_rate": 1.0884488053848538e-05, + "loss": 0.5318, + "step": 28222 + }, + { + "epoch": 0.48767970693945256, + "grad_norm": 1.1532508931783263, + "learning_rate": 1.0883930596351623e-05, + "loss": 0.4333, + "step": 28223 + }, + { + "epoch": 0.48769698645286147, + "grad_norm": 1.1998892359849374, + "learning_rate": 1.0883373136086173e-05, + "loss": 0.4215, + "step": 28224 + }, + { + "epoch": 0.4877142659662704, + "grad_norm": 0.8562771324549666, + "learning_rate": 1.088281567305393e-05, + "loss": 0.2345, + "step": 28225 + }, + { + "epoch": 0.4877315454796793, + "grad_norm": 0.9017415842330369, + "learning_rate": 1.0882258207256639e-05, + "loss": 0.426, + "step": 28226 + }, + { + "epoch": 0.4877488249930882, + "grad_norm": 0.5600211734627744, + "learning_rate": 1.0881700738696052e-05, + "loss": 0.7124, + "step": 28227 + }, + { + "epoch": 0.4877661045064971, + "grad_norm": 1.5449574919692648, + "learning_rate": 1.0881143267373913e-05, + "loss": 0.4437, + "step": 28228 + }, + { + "epoch": 0.487783384019906, + "grad_norm": 1.4635570997500562, + "learning_rate": 1.0880585793291962e-05, + "loss": 0.6321, + "step": 28229 + }, + { + "epoch": 0.4878006635333149, + "grad_norm": 1.302467786088161, + "learning_rate": 1.0880028316451952e-05, + "loss": 0.4788, + "step": 28230 + }, + { + "epoch": 0.4878179430467238, + "grad_norm": 1.7009735704145243, + "learning_rate": 1.0879470836855626e-05, + "loss": 0.3753, + "step": 28231 + }, + { + "epoch": 0.48783522256013273, + "grad_norm": 1.198125818583473, + "learning_rate": 1.0878913354504728e-05, + "loss": 0.3931, + "step": 28232 + }, + { + "epoch": 0.4878525020735416, + "grad_norm": 0.6301738025984689, + "learning_rate": 1.087835586940101e-05, + "loss": 0.2638, + "step": 28233 + }, + { + "epoch": 0.4878697815869505, + "grad_norm": 1.3606304383339245, + "learning_rate": 1.0877798381546212e-05, + "loss": 0.2842, + "step": 28234 + }, + { + "epoch": 0.4878870611003594, + "grad_norm": 1.2020471581931107, + "learning_rate": 1.0877240890942082e-05, + "loss": 0.6843, + "step": 28235 + }, + { + "epoch": 0.4879043406137683, + "grad_norm": 0.912185373966615, + "learning_rate": 1.0876683397590368e-05, + "loss": 0.5089, + "step": 28236 + }, + { + "epoch": 0.4879216201271772, + "grad_norm": 0.8793496529167619, + "learning_rate": 1.0876125901492818e-05, + "loss": 0.4229, + "step": 28237 + }, + { + "epoch": 0.4879388996405861, + "grad_norm": 0.9700244226614536, + "learning_rate": 1.0875568402651171e-05, + "loss": 0.4649, + "step": 28238 + }, + { + "epoch": 0.48795617915399503, + "grad_norm": 1.1046846846634482, + "learning_rate": 1.0875010901067177e-05, + "loss": 0.277, + "step": 28239 + }, + { + "epoch": 0.48797345866740394, + "grad_norm": 0.9642947583362184, + "learning_rate": 1.0874453396742586e-05, + "loss": 0.468, + "step": 28240 + }, + { + "epoch": 0.48799073818081284, + "grad_norm": 1.6094178012535707, + "learning_rate": 1.0873895889679141e-05, + "loss": 0.4874, + "step": 28241 + }, + { + "epoch": 0.48800801769422175, + "grad_norm": 2.053571127072349, + "learning_rate": 1.0873338379878583e-05, + "loss": 0.5125, + "step": 28242 + }, + { + "epoch": 0.48802529720763066, + "grad_norm": 0.8641856672033686, + "learning_rate": 1.0872780867342665e-05, + "loss": 0.5454, + "step": 28243 + }, + { + "epoch": 0.4880425767210395, + "grad_norm": 1.1348006149309904, + "learning_rate": 1.0872223352073133e-05, + "loss": 0.4237, + "step": 28244 + }, + { + "epoch": 0.4880598562344484, + "grad_norm": 0.8359732788160068, + "learning_rate": 1.087166583407173e-05, + "loss": 0.3882, + "step": 28245 + }, + { + "epoch": 0.4880771357478573, + "grad_norm": 0.838381130408896, + "learning_rate": 1.08711083133402e-05, + "loss": 0.3141, + "step": 28246 + }, + { + "epoch": 0.48809441526126623, + "grad_norm": 0.9051556427219741, + "learning_rate": 1.0870550789880296e-05, + "loss": 0.4008, + "step": 28247 + }, + { + "epoch": 0.48811169477467514, + "grad_norm": 1.057739246370684, + "learning_rate": 1.086999326369376e-05, + "loss": 0.5141, + "step": 28248 + }, + { + "epoch": 0.48812897428808405, + "grad_norm": 0.7986461932656648, + "learning_rate": 1.0869435734782337e-05, + "loss": 0.4454, + "step": 28249 + }, + { + "epoch": 0.48814625380149296, + "grad_norm": 0.9045525541123224, + "learning_rate": 1.0868878203147781e-05, + "loss": 0.4399, + "step": 28250 + }, + { + "epoch": 0.48816353331490187, + "grad_norm": 1.09917915000547, + "learning_rate": 1.086832066879183e-05, + "loss": 0.5295, + "step": 28251 + }, + { + "epoch": 0.4881808128283108, + "grad_norm": 0.669976429802136, + "learning_rate": 1.0867763131716229e-05, + "loss": 0.3723, + "step": 28252 + }, + { + "epoch": 0.4881980923417197, + "grad_norm": 1.7941274193394143, + "learning_rate": 1.0867205591922731e-05, + "loss": 0.4126, + "step": 28253 + }, + { + "epoch": 0.48821537185512853, + "grad_norm": 0.8723720738139892, + "learning_rate": 1.0866648049413078e-05, + "loss": 0.6265, + "step": 28254 + }, + { + "epoch": 0.48823265136853744, + "grad_norm": 0.9129192335872065, + "learning_rate": 1.0866090504189018e-05, + "loss": 0.3838, + "step": 28255 + }, + { + "epoch": 0.48824993088194635, + "grad_norm": 0.6114339643962369, + "learning_rate": 1.0865532956252296e-05, + "loss": 0.3067, + "step": 28256 + }, + { + "epoch": 0.48826721039535526, + "grad_norm": 0.8135541612306056, + "learning_rate": 1.0864975405604661e-05, + "loss": 0.4439, + "step": 28257 + }, + { + "epoch": 0.48828448990876416, + "grad_norm": 0.9304896205345129, + "learning_rate": 1.0864417852247853e-05, + "loss": 0.4815, + "step": 28258 + }, + { + "epoch": 0.48830176942217307, + "grad_norm": 1.4984373673259022, + "learning_rate": 1.0863860296183625e-05, + "loss": 0.221, + "step": 28259 + }, + { + "epoch": 0.488319048935582, + "grad_norm": 0.9305999766487962, + "learning_rate": 1.0863302737413723e-05, + "loss": 0.3688, + "step": 28260 + }, + { + "epoch": 0.4883363284489909, + "grad_norm": 1.053178313198856, + "learning_rate": 1.086274517593989e-05, + "loss": 0.4522, + "step": 28261 + }, + { + "epoch": 0.4883536079623998, + "grad_norm": 1.1756921937773366, + "learning_rate": 1.0862187611763875e-05, + "loss": 0.27, + "step": 28262 + }, + { + "epoch": 0.4883708874758087, + "grad_norm": 1.323634213239049, + "learning_rate": 1.0861630044887418e-05, + "loss": 0.4727, + "step": 28263 + }, + { + "epoch": 0.4883881669892176, + "grad_norm": 0.7813828274629601, + "learning_rate": 1.0861072475312276e-05, + "loss": 0.4826, + "step": 28264 + }, + { + "epoch": 0.48840544650262646, + "grad_norm": 0.9826236632948113, + "learning_rate": 1.0860514903040187e-05, + "loss": 0.6076, + "step": 28265 + }, + { + "epoch": 0.48842272601603537, + "grad_norm": 0.7414839697325856, + "learning_rate": 1.0859957328072896e-05, + "loss": 0.4277, + "step": 28266 + }, + { + "epoch": 0.4884400055294443, + "grad_norm": 1.2033221013267186, + "learning_rate": 1.0859399750412158e-05, + "loss": 0.3848, + "step": 28267 + }, + { + "epoch": 0.4884572850428532, + "grad_norm": 0.7247070783858238, + "learning_rate": 1.0858842170059716e-05, + "loss": 0.4093, + "step": 28268 + }, + { + "epoch": 0.4884745645562621, + "grad_norm": 0.49387449146440004, + "learning_rate": 1.0858284587017309e-05, + "loss": 0.9454, + "step": 28269 + }, + { + "epoch": 0.488491844069671, + "grad_norm": 0.7714640324903367, + "learning_rate": 1.0857727001286695e-05, + "loss": 0.4443, + "step": 28270 + }, + { + "epoch": 0.4885091235830799, + "grad_norm": 1.2590553400796893, + "learning_rate": 1.0857169412869615e-05, + "loss": 0.3085, + "step": 28271 + }, + { + "epoch": 0.4885264030964888, + "grad_norm": 1.293102695651116, + "learning_rate": 1.0856611821767813e-05, + "loss": 0.5158, + "step": 28272 + }, + { + "epoch": 0.4885436826098977, + "grad_norm": 0.8310868076016964, + "learning_rate": 1.0856054227983038e-05, + "loss": 0.2683, + "step": 28273 + }, + { + "epoch": 0.48856096212330663, + "grad_norm": 0.7193951714981341, + "learning_rate": 1.0855496631517037e-05, + "loss": 0.5414, + "step": 28274 + }, + { + "epoch": 0.4885782416367155, + "grad_norm": 0.7985716921168121, + "learning_rate": 1.0854939032371551e-05, + "loss": 0.299, + "step": 28275 + }, + { + "epoch": 0.4885955211501244, + "grad_norm": 0.9427360469016578, + "learning_rate": 1.0854381430548338e-05, + "loss": 0.3582, + "step": 28276 + }, + { + "epoch": 0.4886128006635333, + "grad_norm": 0.6101496078463733, + "learning_rate": 1.0853823826049133e-05, + "loss": 0.5098, + "step": 28277 + }, + { + "epoch": 0.4886300801769422, + "grad_norm": 0.9126735386941998, + "learning_rate": 1.0853266218875687e-05, + "loss": 0.5477, + "step": 28278 + }, + { + "epoch": 0.4886473596903511, + "grad_norm": 0.8241246153098923, + "learning_rate": 1.0852708609029747e-05, + "loss": 0.4826, + "step": 28279 + }, + { + "epoch": 0.48866463920376, + "grad_norm": 0.852650270145532, + "learning_rate": 1.085215099651306e-05, + "loss": 0.3463, + "step": 28280 + }, + { + "epoch": 0.48868191871716893, + "grad_norm": 0.9750786068921679, + "learning_rate": 1.085159338132737e-05, + "loss": 0.3957, + "step": 28281 + }, + { + "epoch": 0.48869919823057784, + "grad_norm": 1.2471816081808882, + "learning_rate": 1.0851035763474427e-05, + "loss": 0.6159, + "step": 28282 + }, + { + "epoch": 0.48871647774398674, + "grad_norm": 1.1666223295211962, + "learning_rate": 1.0850478142955972e-05, + "loss": 0.2149, + "step": 28283 + }, + { + "epoch": 0.48873375725739565, + "grad_norm": 0.8691569118810027, + "learning_rate": 1.0849920519773756e-05, + "loss": 0.3537, + "step": 28284 + }, + { + "epoch": 0.48875103677080456, + "grad_norm": 1.2131312825799452, + "learning_rate": 1.0849362893929527e-05, + "loss": 0.2644, + "step": 28285 + }, + { + "epoch": 0.4887683162842134, + "grad_norm": 1.20935830412066, + "learning_rate": 1.0848805265425023e-05, + "loss": 0.435, + "step": 28286 + }, + { + "epoch": 0.4887855957976223, + "grad_norm": 0.7736820219237562, + "learning_rate": 1.0848247634262002e-05, + "loss": 0.3439, + "step": 28287 + }, + { + "epoch": 0.4888028753110312, + "grad_norm": 1.2602924079778193, + "learning_rate": 1.0847690000442204e-05, + "loss": 0.3069, + "step": 28288 + }, + { + "epoch": 0.48882015482444013, + "grad_norm": 1.109151104826579, + "learning_rate": 1.0847132363967371e-05, + "loss": 0.4967, + "step": 28289 + }, + { + "epoch": 0.48883743433784904, + "grad_norm": 0.5875262312144305, + "learning_rate": 1.0846574724839262e-05, + "loss": 0.5984, + "step": 28290 + }, + { + "epoch": 0.48885471385125795, + "grad_norm": 0.8459899717309884, + "learning_rate": 1.0846017083059616e-05, + "loss": 0.29, + "step": 28291 + }, + { + "epoch": 0.48887199336466686, + "grad_norm": 0.9755829963278012, + "learning_rate": 1.0845459438630177e-05, + "loss": 0.3784, + "step": 28292 + }, + { + "epoch": 0.48888927287807576, + "grad_norm": 1.3998521495000888, + "learning_rate": 1.0844901791552697e-05, + "loss": 0.4831, + "step": 28293 + }, + { + "epoch": 0.4889065523914847, + "grad_norm": 0.8900162777115695, + "learning_rate": 1.084434414182892e-05, + "loss": 0.2819, + "step": 28294 + }, + { + "epoch": 0.4889238319048936, + "grad_norm": 0.8905765240625104, + "learning_rate": 1.084378648946059e-05, + "loss": 0.641, + "step": 28295 + }, + { + "epoch": 0.4889411114183025, + "grad_norm": 1.4047762168269058, + "learning_rate": 1.084322883444946e-05, + "loss": 0.4314, + "step": 28296 + }, + { + "epoch": 0.48895839093171134, + "grad_norm": 0.712130793525768, + "learning_rate": 1.0842671176797273e-05, + "loss": 0.2469, + "step": 28297 + }, + { + "epoch": 0.48897567044512025, + "grad_norm": 0.7699949972357675, + "learning_rate": 1.0842113516505775e-05, + "loss": 0.2281, + "step": 28298 + }, + { + "epoch": 0.48899294995852916, + "grad_norm": 0.7644031750650926, + "learning_rate": 1.0841555853576714e-05, + "loss": 0.2668, + "step": 28299 + }, + { + "epoch": 0.48901022947193806, + "grad_norm": 1.0501064931424147, + "learning_rate": 1.0840998188011835e-05, + "loss": 0.4863, + "step": 28300 + }, + { + "epoch": 0.48902750898534697, + "grad_norm": 1.0678701198105507, + "learning_rate": 1.0840440519812886e-05, + "loss": 0.591, + "step": 28301 + }, + { + "epoch": 0.4890447884987559, + "grad_norm": 0.8915513715611937, + "learning_rate": 1.0839882848981616e-05, + "loss": 0.3211, + "step": 28302 + }, + { + "epoch": 0.4890620680121648, + "grad_norm": 0.9730733498069161, + "learning_rate": 1.0839325175519763e-05, + "loss": 0.3619, + "step": 28303 + }, + { + "epoch": 0.4890793475255737, + "grad_norm": 1.3271632722828555, + "learning_rate": 1.0838767499429086e-05, + "loss": 0.5046, + "step": 28304 + }, + { + "epoch": 0.4890966270389826, + "grad_norm": 1.1565903301742329, + "learning_rate": 1.0838209820711322e-05, + "loss": 0.281, + "step": 28305 + }, + { + "epoch": 0.4891139065523915, + "grad_norm": 0.9761616377435832, + "learning_rate": 1.0837652139368223e-05, + "loss": 0.4755, + "step": 28306 + }, + { + "epoch": 0.48913118606580036, + "grad_norm": 1.1867748587695102, + "learning_rate": 1.0837094455401533e-05, + "loss": 0.3519, + "step": 28307 + }, + { + "epoch": 0.48914846557920927, + "grad_norm": 1.1669094084225617, + "learning_rate": 1.0836536768812999e-05, + "loss": 0.3722, + "step": 28308 + }, + { + "epoch": 0.4891657450926182, + "grad_norm": 0.8042921989470918, + "learning_rate": 1.0835979079604368e-05, + "loss": 0.433, + "step": 28309 + }, + { + "epoch": 0.4891830246060271, + "grad_norm": 1.035891671397998, + "learning_rate": 1.083542138777739e-05, + "loss": 0.5147, + "step": 28310 + }, + { + "epoch": 0.489200304119436, + "grad_norm": 0.6068899473210975, + "learning_rate": 1.0834863693333805e-05, + "loss": 0.3144, + "step": 28311 + }, + { + "epoch": 0.4892175836328449, + "grad_norm": 1.5941791055059502, + "learning_rate": 1.0834305996275368e-05, + "loss": 0.7213, + "step": 28312 + }, + { + "epoch": 0.4892348631462538, + "grad_norm": 0.8923898199993833, + "learning_rate": 1.0833748296603818e-05, + "loss": 0.5136, + "step": 28313 + }, + { + "epoch": 0.4892521426596627, + "grad_norm": 1.8330421758915896, + "learning_rate": 1.0833190594320905e-05, + "loss": 0.4591, + "step": 28314 + }, + { + "epoch": 0.4892694221730716, + "grad_norm": 0.8944336029401955, + "learning_rate": 1.0832632889428374e-05, + "loss": 0.5231, + "step": 28315 + }, + { + "epoch": 0.48928670168648053, + "grad_norm": 0.8551174281273308, + "learning_rate": 1.0832075181927977e-05, + "loss": 0.3292, + "step": 28316 + }, + { + "epoch": 0.48930398119988944, + "grad_norm": 1.702404754417771, + "learning_rate": 1.0831517471821456e-05, + "loss": 0.4257, + "step": 28317 + }, + { + "epoch": 0.4893212607132983, + "grad_norm": 0.6906800001935458, + "learning_rate": 1.0830959759110558e-05, + "loss": 0.4162, + "step": 28318 + }, + { + "epoch": 0.4893385402267072, + "grad_norm": 0.7984015345234983, + "learning_rate": 1.0830402043797032e-05, + "loss": 0.353, + "step": 28319 + }, + { + "epoch": 0.4893558197401161, + "grad_norm": 0.7438677758857073, + "learning_rate": 1.0829844325882624e-05, + "loss": 0.625, + "step": 28320 + }, + { + "epoch": 0.489373099253525, + "grad_norm": 0.7201617195883856, + "learning_rate": 1.0829286605369079e-05, + "loss": 0.2406, + "step": 28321 + }, + { + "epoch": 0.4893903787669339, + "grad_norm": 1.1453868438422545, + "learning_rate": 1.0828728882258147e-05, + "loss": 0.3387, + "step": 28322 + }, + { + "epoch": 0.4894076582803428, + "grad_norm": 0.5976741001176284, + "learning_rate": 1.0828171156551572e-05, + "loss": 0.7942, + "step": 28323 + }, + { + "epoch": 0.48942493779375174, + "grad_norm": 1.3254272634736366, + "learning_rate": 1.0827613428251103e-05, + "loss": 0.4341, + "step": 28324 + }, + { + "epoch": 0.48944221730716064, + "grad_norm": 0.7041089233302522, + "learning_rate": 1.0827055697358487e-05, + "loss": 0.3512, + "step": 28325 + }, + { + "epoch": 0.48945949682056955, + "grad_norm": 1.7232394728174707, + "learning_rate": 1.0826497963875466e-05, + "loss": 0.5914, + "step": 28326 + }, + { + "epoch": 0.48947677633397846, + "grad_norm": 0.8267770995051836, + "learning_rate": 1.0825940227803792e-05, + "loss": 0.3477, + "step": 28327 + }, + { + "epoch": 0.4894940558473873, + "grad_norm": 0.6838433413428091, + "learning_rate": 1.0825382489145214e-05, + "loss": 0.4195, + "step": 28328 + }, + { + "epoch": 0.4895113353607962, + "grad_norm": 0.972022520404128, + "learning_rate": 1.082482474790147e-05, + "loss": 0.5043, + "step": 28329 + }, + { + "epoch": 0.4895286148742051, + "grad_norm": 1.2325090504094744, + "learning_rate": 1.0824267004074314e-05, + "loss": 0.4563, + "step": 28330 + }, + { + "epoch": 0.48954589438761403, + "grad_norm": 0.9462743896966385, + "learning_rate": 1.0823709257665494e-05, + "loss": 0.3747, + "step": 28331 + }, + { + "epoch": 0.48956317390102294, + "grad_norm": 1.235233760139888, + "learning_rate": 1.082315150867675e-05, + "loss": 0.5645, + "step": 28332 + }, + { + "epoch": 0.48958045341443185, + "grad_norm": 0.7714110146099193, + "learning_rate": 1.0822593757109835e-05, + "loss": 0.3598, + "step": 28333 + }, + { + "epoch": 0.48959773292784076, + "grad_norm": 0.7424645482670861, + "learning_rate": 1.0822036002966497e-05, + "loss": 0.537, + "step": 28334 + }, + { + "epoch": 0.48961501244124966, + "grad_norm": 0.5746282343021937, + "learning_rate": 1.0821478246248474e-05, + "loss": 0.4647, + "step": 28335 + }, + { + "epoch": 0.48963229195465857, + "grad_norm": 0.9206502061223107, + "learning_rate": 1.082092048695752e-05, + "loss": 0.448, + "step": 28336 + }, + { + "epoch": 0.4896495714680675, + "grad_norm": 0.5156556154371096, + "learning_rate": 1.0820362725095382e-05, + "loss": 0.5733, + "step": 28337 + }, + { + "epoch": 0.4896668509814764, + "grad_norm": 1.0354869290642568, + "learning_rate": 1.0819804960663805e-05, + "loss": 0.397, + "step": 28338 + }, + { + "epoch": 0.48968413049488524, + "grad_norm": 0.7196521555400075, + "learning_rate": 1.0819247193664537e-05, + "loss": 0.3474, + "step": 28339 + }, + { + "epoch": 0.48970141000829415, + "grad_norm": 1.016956895544896, + "learning_rate": 1.0818689424099324e-05, + "loss": 0.5085, + "step": 28340 + }, + { + "epoch": 0.48971868952170305, + "grad_norm": 1.0795866262620433, + "learning_rate": 1.0818131651969912e-05, + "loss": 0.381, + "step": 28341 + }, + { + "epoch": 0.48973596903511196, + "grad_norm": 0.9389846205624961, + "learning_rate": 1.0817573877278052e-05, + "loss": 0.4458, + "step": 28342 + }, + { + "epoch": 0.48975324854852087, + "grad_norm": 0.9705783502975119, + "learning_rate": 1.0817016100025488e-05, + "loss": 0.4201, + "step": 28343 + }, + { + "epoch": 0.4897705280619298, + "grad_norm": 1.4824521448720709, + "learning_rate": 1.0816458320213967e-05, + "loss": 0.4327, + "step": 28344 + }, + { + "epoch": 0.4897878075753387, + "grad_norm": 1.1290818212366234, + "learning_rate": 1.0815900537845238e-05, + "loss": 0.55, + "step": 28345 + }, + { + "epoch": 0.4898050870887476, + "grad_norm": 1.455297781785829, + "learning_rate": 1.0815342752921044e-05, + "loss": 0.5844, + "step": 28346 + }, + { + "epoch": 0.4898223666021565, + "grad_norm": 0.9870327649267049, + "learning_rate": 1.0814784965443137e-05, + "loss": 0.3412, + "step": 28347 + }, + { + "epoch": 0.4898396461155654, + "grad_norm": 1.1304245644061504, + "learning_rate": 1.081422717541326e-05, + "loss": 0.3874, + "step": 28348 + }, + { + "epoch": 0.4898569256289743, + "grad_norm": 1.0405413120097935, + "learning_rate": 1.081366938283316e-05, + "loss": 0.5611, + "step": 28349 + }, + { + "epoch": 0.48987420514238317, + "grad_norm": 0.7825408177432656, + "learning_rate": 1.0813111587704588e-05, + "loss": 0.3676, + "step": 28350 + }, + { + "epoch": 0.4898914846557921, + "grad_norm": 0.7777840716698408, + "learning_rate": 1.081255379002929e-05, + "loss": 0.4179, + "step": 28351 + }, + { + "epoch": 0.489908764169201, + "grad_norm": 0.845320100339375, + "learning_rate": 1.0811995989809008e-05, + "loss": 0.4263, + "step": 28352 + }, + { + "epoch": 0.4899260436826099, + "grad_norm": 1.1654591569105395, + "learning_rate": 1.0811438187045498e-05, + "loss": 0.4557, + "step": 28353 + }, + { + "epoch": 0.4899433231960188, + "grad_norm": 1.0860849131427104, + "learning_rate": 1.0810880381740499e-05, + "loss": 0.3576, + "step": 28354 + }, + { + "epoch": 0.4899606027094277, + "grad_norm": 0.8464082184943021, + "learning_rate": 1.081032257389576e-05, + "loss": 0.4375, + "step": 28355 + }, + { + "epoch": 0.4899778822228366, + "grad_norm": 1.1777260390444004, + "learning_rate": 1.0809764763513028e-05, + "loss": 0.4946, + "step": 28356 + }, + { + "epoch": 0.4899951617362455, + "grad_norm": 0.9297752536577737, + "learning_rate": 1.0809206950594056e-05, + "loss": 0.5419, + "step": 28357 + }, + { + "epoch": 0.49001244124965443, + "grad_norm": 0.9729541114661777, + "learning_rate": 1.080864913514058e-05, + "loss": 0.4388, + "step": 28358 + }, + { + "epoch": 0.49002972076306334, + "grad_norm": 0.49743739072949056, + "learning_rate": 1.080809131715436e-05, + "loss": 0.5372, + "step": 28359 + }, + { + "epoch": 0.4900470002764722, + "grad_norm": 1.081185206987264, + "learning_rate": 1.0807533496637133e-05, + "loss": 0.519, + "step": 28360 + }, + { + "epoch": 0.4900642797898811, + "grad_norm": 0.9719404093320365, + "learning_rate": 1.0806975673590649e-05, + "loss": 0.3816, + "step": 28361 + }, + { + "epoch": 0.49008155930329, + "grad_norm": 1.0316064983388333, + "learning_rate": 1.0806417848016658e-05, + "loss": 0.3333, + "step": 28362 + }, + { + "epoch": 0.4900988388166989, + "grad_norm": 0.9048450589038842, + "learning_rate": 1.0805860019916902e-05, + "loss": 0.2706, + "step": 28363 + }, + { + "epoch": 0.4901161183301078, + "grad_norm": 0.6548880698222828, + "learning_rate": 1.0805302189293136e-05, + "loss": 0.4073, + "step": 28364 + }, + { + "epoch": 0.4901333978435167, + "grad_norm": 1.1594891478490224, + "learning_rate": 1.08047443561471e-05, + "loss": 0.4173, + "step": 28365 + }, + { + "epoch": 0.49015067735692563, + "grad_norm": 1.081855620780087, + "learning_rate": 1.0804186520480542e-05, + "loss": 0.4701, + "step": 28366 + }, + { + "epoch": 0.49016795687033454, + "grad_norm": 0.99893127357246, + "learning_rate": 1.0803628682295213e-05, + "loss": 0.4136, + "step": 28367 + }, + { + "epoch": 0.49018523638374345, + "grad_norm": 1.4571367143247655, + "learning_rate": 1.0803070841592857e-05, + "loss": 0.4383, + "step": 28368 + }, + { + "epoch": 0.49020251589715236, + "grad_norm": 0.8272052991934017, + "learning_rate": 1.080251299837522e-05, + "loss": 0.404, + "step": 28369 + }, + { + "epoch": 0.49021979541056127, + "grad_norm": 2.425422457813135, + "learning_rate": 1.0801955152644053e-05, + "loss": 0.6625, + "step": 28370 + }, + { + "epoch": 0.4902370749239701, + "grad_norm": 0.9561325235345793, + "learning_rate": 1.0801397304401101e-05, + "loss": 0.4379, + "step": 28371 + }, + { + "epoch": 0.490254354437379, + "grad_norm": 0.9437715094053574, + "learning_rate": 1.0800839453648114e-05, + "loss": 0.4872, + "step": 28372 + }, + { + "epoch": 0.49027163395078793, + "grad_norm": 1.2615332040096476, + "learning_rate": 1.0800281600386835e-05, + "loss": 0.4937, + "step": 28373 + }, + { + "epoch": 0.49028891346419684, + "grad_norm": 1.2313863269196827, + "learning_rate": 1.0799723744619016e-05, + "loss": 0.4819, + "step": 28374 + }, + { + "epoch": 0.49030619297760575, + "grad_norm": 0.834759249522442, + "learning_rate": 1.0799165886346397e-05, + "loss": 0.4691, + "step": 28375 + }, + { + "epoch": 0.49032347249101466, + "grad_norm": 0.7479609385500408, + "learning_rate": 1.0798608025570734e-05, + "loss": 0.4283, + "step": 28376 + }, + { + "epoch": 0.49034075200442356, + "grad_norm": 0.9443999444534922, + "learning_rate": 1.0798050162293767e-05, + "loss": 0.4613, + "step": 28377 + }, + { + "epoch": 0.49035803151783247, + "grad_norm": 1.3543642218028535, + "learning_rate": 1.0797492296517245e-05, + "loss": 0.4023, + "step": 28378 + }, + { + "epoch": 0.4903753110312414, + "grad_norm": 0.7211473880257857, + "learning_rate": 1.079693442824292e-05, + "loss": 0.4773, + "step": 28379 + }, + { + "epoch": 0.4903925905446503, + "grad_norm": 1.3043625302290096, + "learning_rate": 1.0796376557472533e-05, + "loss": 0.4888, + "step": 28380 + }, + { + "epoch": 0.49040987005805914, + "grad_norm": 1.3319625195930676, + "learning_rate": 1.0795818684207834e-05, + "loss": 0.4119, + "step": 28381 + }, + { + "epoch": 0.49042714957146805, + "grad_norm": 0.6100059091396351, + "learning_rate": 1.0795260808450573e-05, + "loss": 0.8651, + "step": 28382 + }, + { + "epoch": 0.49044442908487695, + "grad_norm": 1.3771150567653943, + "learning_rate": 1.0794702930202495e-05, + "loss": 0.3284, + "step": 28383 + }, + { + "epoch": 0.49046170859828586, + "grad_norm": 0.817664806818819, + "learning_rate": 1.0794145049465345e-05, + "loss": 0.4061, + "step": 28384 + }, + { + "epoch": 0.49047898811169477, + "grad_norm": 1.4037642887938677, + "learning_rate": 1.0793587166240874e-05, + "loss": 0.5391, + "step": 28385 + }, + { + "epoch": 0.4904962676251037, + "grad_norm": 0.5884378923551696, + "learning_rate": 1.0793029280530825e-05, + "loss": 0.6016, + "step": 28386 + }, + { + "epoch": 0.4905135471385126, + "grad_norm": 1.6141865826150492, + "learning_rate": 1.079247139233695e-05, + "loss": 0.4403, + "step": 28387 + }, + { + "epoch": 0.4905308266519215, + "grad_norm": 1.1367727521160405, + "learning_rate": 1.0791913501660995e-05, + "loss": 0.5291, + "step": 28388 + }, + { + "epoch": 0.4905481061653304, + "grad_norm": 0.8406098608980626, + "learning_rate": 1.0791355608504703e-05, + "loss": 0.3409, + "step": 28389 + }, + { + "epoch": 0.4905653856787393, + "grad_norm": 1.0037661024499949, + "learning_rate": 1.0790797712869829e-05, + "loss": 0.2865, + "step": 28390 + }, + { + "epoch": 0.4905826651921482, + "grad_norm": 1.5658917305499447, + "learning_rate": 1.0790239814758116e-05, + "loss": 0.2999, + "step": 28391 + }, + { + "epoch": 0.49059994470555707, + "grad_norm": 0.5970871132664701, + "learning_rate": 1.078968191417131e-05, + "loss": 0.3083, + "step": 28392 + }, + { + "epoch": 0.490617224218966, + "grad_norm": 1.187902968509033, + "learning_rate": 1.0789124011111163e-05, + "loss": 0.4424, + "step": 28393 + }, + { + "epoch": 0.4906345037323749, + "grad_norm": 1.0195461408178448, + "learning_rate": 1.0788566105579419e-05, + "loss": 0.5912, + "step": 28394 + }, + { + "epoch": 0.4906517832457838, + "grad_norm": 1.0278085547577325, + "learning_rate": 1.0788008197577824e-05, + "loss": 0.6057, + "step": 28395 + }, + { + "epoch": 0.4906690627591927, + "grad_norm": 1.0482766586031587, + "learning_rate": 1.0787450287108129e-05, + "loss": 0.5843, + "step": 28396 + }, + { + "epoch": 0.4906863422726016, + "grad_norm": 0.8019872683735475, + "learning_rate": 1.078689237417208e-05, + "loss": 0.3804, + "step": 28397 + }, + { + "epoch": 0.4907036217860105, + "grad_norm": 0.8070261051716505, + "learning_rate": 1.0786334458771422e-05, + "loss": 0.2896, + "step": 28398 + }, + { + "epoch": 0.4907209012994194, + "grad_norm": 1.2155760839458145, + "learning_rate": 1.0785776540907907e-05, + "loss": 0.403, + "step": 28399 + }, + { + "epoch": 0.49073818081282833, + "grad_norm": 1.1079481863180922, + "learning_rate": 1.078521862058328e-05, + "loss": 0.4193, + "step": 28400 + }, + { + "epoch": 0.49075546032623724, + "grad_norm": 0.9480416099677691, + "learning_rate": 1.0784660697799285e-05, + "loss": 0.5287, + "step": 28401 + }, + { + "epoch": 0.4907727398396461, + "grad_norm": 0.4552405550178349, + "learning_rate": 1.0784102772557678e-05, + "loss": 0.7348, + "step": 28402 + }, + { + "epoch": 0.490790019353055, + "grad_norm": 0.8144633004254073, + "learning_rate": 1.07835448448602e-05, + "loss": 0.5105, + "step": 28403 + }, + { + "epoch": 0.4908072988664639, + "grad_norm": 0.9595716254992799, + "learning_rate": 1.07829869147086e-05, + "loss": 0.4771, + "step": 28404 + }, + { + "epoch": 0.4908245783798728, + "grad_norm": 0.9031280664824731, + "learning_rate": 1.0782428982104626e-05, + "loss": 0.3911, + "step": 28405 + }, + { + "epoch": 0.4908418578932817, + "grad_norm": 0.690491265300089, + "learning_rate": 1.0781871047050024e-05, + "loss": 0.391, + "step": 28406 + }, + { + "epoch": 0.4908591374066906, + "grad_norm": 0.7738831185375113, + "learning_rate": 1.078131310954654e-05, + "loss": 0.2487, + "step": 28407 + }, + { + "epoch": 0.49087641692009953, + "grad_norm": 0.8374178766767717, + "learning_rate": 1.0780755169595929e-05, + "loss": 0.5062, + "step": 28408 + }, + { + "epoch": 0.49089369643350844, + "grad_norm": 1.0618623913774303, + "learning_rate": 1.078019722719993e-05, + "loss": 0.3154, + "step": 28409 + }, + { + "epoch": 0.49091097594691735, + "grad_norm": 0.9333192207456956, + "learning_rate": 1.0779639282360294e-05, + "loss": 0.4516, + "step": 28410 + }, + { + "epoch": 0.49092825546032626, + "grad_norm": 1.6706834540773357, + "learning_rate": 1.0779081335078771e-05, + "loss": 0.514, + "step": 28411 + }, + { + "epoch": 0.49094553497373516, + "grad_norm": 0.8747527828544968, + "learning_rate": 1.0778523385357102e-05, + "loss": 0.3584, + "step": 28412 + }, + { + "epoch": 0.490962814487144, + "grad_norm": 0.743635611019375, + "learning_rate": 1.0777965433197042e-05, + "loss": 0.3441, + "step": 28413 + }, + { + "epoch": 0.4909800940005529, + "grad_norm": 0.8590052314950536, + "learning_rate": 1.0777407478600334e-05, + "loss": 0.5232, + "step": 28414 + }, + { + "epoch": 0.49099737351396183, + "grad_norm": 1.9227431151358982, + "learning_rate": 1.0776849521568729e-05, + "loss": 0.327, + "step": 28415 + }, + { + "epoch": 0.49101465302737074, + "grad_norm": 1.5038496232021779, + "learning_rate": 1.0776291562103972e-05, + "loss": 0.5569, + "step": 28416 + }, + { + "epoch": 0.49103193254077965, + "grad_norm": 0.7929501494592599, + "learning_rate": 1.0775733600207807e-05, + "loss": 0.341, + "step": 28417 + }, + { + "epoch": 0.49104921205418856, + "grad_norm": 1.3411947183525972, + "learning_rate": 1.0775175635881987e-05, + "loss": 0.4177, + "step": 28418 + }, + { + "epoch": 0.49106649156759746, + "grad_norm": 0.6859626853538289, + "learning_rate": 1.077461766912826e-05, + "loss": 0.3482, + "step": 28419 + }, + { + "epoch": 0.49108377108100637, + "grad_norm": 1.0545151348441095, + "learning_rate": 1.077405969994837e-05, + "loss": 0.2905, + "step": 28420 + }, + { + "epoch": 0.4911010505944153, + "grad_norm": 0.49828177627508186, + "learning_rate": 1.0773501728344065e-05, + "loss": 0.6692, + "step": 28421 + }, + { + "epoch": 0.4911183301078242, + "grad_norm": 1.7409121940358543, + "learning_rate": 1.0772943754317096e-05, + "loss": 0.4984, + "step": 28422 + }, + { + "epoch": 0.4911356096212331, + "grad_norm": 0.7692183550451348, + "learning_rate": 1.0772385777869208e-05, + "loss": 0.3588, + "step": 28423 + }, + { + "epoch": 0.49115288913464195, + "grad_norm": 1.4092307434040907, + "learning_rate": 1.0771827799002145e-05, + "loss": 0.4432, + "step": 28424 + }, + { + "epoch": 0.49117016864805085, + "grad_norm": 1.2859899811168423, + "learning_rate": 1.0771269817717667e-05, + "loss": 0.5135, + "step": 28425 + }, + { + "epoch": 0.49118744816145976, + "grad_norm": 1.2298109252053167, + "learning_rate": 1.0770711834017507e-05, + "loss": 0.3283, + "step": 28426 + }, + { + "epoch": 0.49120472767486867, + "grad_norm": 0.8431402420670382, + "learning_rate": 1.077015384790342e-05, + "loss": 0.375, + "step": 28427 + }, + { + "epoch": 0.4912220071882776, + "grad_norm": 0.852789904425069, + "learning_rate": 1.0769595859377154e-05, + "loss": 0.43, + "step": 28428 + }, + { + "epoch": 0.4912392867016865, + "grad_norm": 1.0908548437684433, + "learning_rate": 1.0769037868440452e-05, + "loss": 0.4419, + "step": 28429 + }, + { + "epoch": 0.4912565662150954, + "grad_norm": 1.1059242921589145, + "learning_rate": 1.0768479875095069e-05, + "loss": 0.3631, + "step": 28430 + }, + { + "epoch": 0.4912738457285043, + "grad_norm": 0.8176751826564594, + "learning_rate": 1.0767921879342746e-05, + "loss": 0.4073, + "step": 28431 + }, + { + "epoch": 0.4912911252419132, + "grad_norm": 1.6993869220710935, + "learning_rate": 1.0767363881185233e-05, + "loss": 0.3693, + "step": 28432 + }, + { + "epoch": 0.4913084047553221, + "grad_norm": 0.7570041652245575, + "learning_rate": 1.076680588062428e-05, + "loss": 0.4701, + "step": 28433 + }, + { + "epoch": 0.49132568426873097, + "grad_norm": 1.4350131954293295, + "learning_rate": 1.0766247877661634e-05, + "loss": 0.4857, + "step": 28434 + }, + { + "epoch": 0.4913429637821399, + "grad_norm": 0.9281010797099414, + "learning_rate": 1.0765689872299038e-05, + "loss": 0.6808, + "step": 28435 + }, + { + "epoch": 0.4913602432955488, + "grad_norm": 1.174654356267103, + "learning_rate": 1.0765131864538245e-05, + "loss": 0.489, + "step": 28436 + }, + { + "epoch": 0.4913775228089577, + "grad_norm": 1.4343011177994578, + "learning_rate": 1.0764573854381002e-05, + "loss": 0.6687, + "step": 28437 + }, + { + "epoch": 0.4913948023223666, + "grad_norm": 0.9919415848563535, + "learning_rate": 1.0764015841829051e-05, + "loss": 0.4332, + "step": 28438 + }, + { + "epoch": 0.4914120818357755, + "grad_norm": 0.9234145121593802, + "learning_rate": 1.076345782688415e-05, + "loss": 0.4846, + "step": 28439 + }, + { + "epoch": 0.4914293613491844, + "grad_norm": 0.7945674284538856, + "learning_rate": 1.0762899809548038e-05, + "loss": 0.4964, + "step": 28440 + }, + { + "epoch": 0.4914466408625933, + "grad_norm": 1.216821440459797, + "learning_rate": 1.0762341789822464e-05, + "loss": 0.6595, + "step": 28441 + }, + { + "epoch": 0.49146392037600223, + "grad_norm": 0.7763609161766246, + "learning_rate": 1.0761783767709182e-05, + "loss": 0.3259, + "step": 28442 + }, + { + "epoch": 0.49148119988941114, + "grad_norm": 0.8040726072143928, + "learning_rate": 1.0761225743209933e-05, + "loss": 0.3561, + "step": 28443 + }, + { + "epoch": 0.49149847940282004, + "grad_norm": 1.013847392917105, + "learning_rate": 1.0760667716326465e-05, + "loss": 0.4811, + "step": 28444 + }, + { + "epoch": 0.4915157589162289, + "grad_norm": 0.9349509558910367, + "learning_rate": 1.0760109687060531e-05, + "loss": 0.2766, + "step": 28445 + }, + { + "epoch": 0.4915330384296378, + "grad_norm": 1.0645814102849864, + "learning_rate": 1.0759551655413876e-05, + "loss": 0.4466, + "step": 28446 + }, + { + "epoch": 0.4915503179430467, + "grad_norm": 1.0927989114567398, + "learning_rate": 1.0758993621388245e-05, + "loss": 0.3598, + "step": 28447 + }, + { + "epoch": 0.4915675974564556, + "grad_norm": 0.8198124343509857, + "learning_rate": 1.0758435584985391e-05, + "loss": 0.3692, + "step": 28448 + }, + { + "epoch": 0.4915848769698645, + "grad_norm": 0.47561598006808226, + "learning_rate": 1.0757877546207056e-05, + "loss": 0.5114, + "step": 28449 + }, + { + "epoch": 0.49160215648327343, + "grad_norm": 0.9310538430717679, + "learning_rate": 1.0757319505054992e-05, + "loss": 0.4207, + "step": 28450 + }, + { + "epoch": 0.49161943599668234, + "grad_norm": 0.8298533391311875, + "learning_rate": 1.075676146153095e-05, + "loss": 0.4422, + "step": 28451 + }, + { + "epoch": 0.49163671551009125, + "grad_norm": 0.9492880381387139, + "learning_rate": 1.0756203415636669e-05, + "loss": 0.2734, + "step": 28452 + }, + { + "epoch": 0.49165399502350016, + "grad_norm": 0.892816705390882, + "learning_rate": 1.07556453673739e-05, + "loss": 0.367, + "step": 28453 + }, + { + "epoch": 0.49167127453690906, + "grad_norm": 0.8334154708506443, + "learning_rate": 1.0755087316744397e-05, + "loss": 0.4482, + "step": 28454 + }, + { + "epoch": 0.4916885540503179, + "grad_norm": 0.8106227095027637, + "learning_rate": 1.0754529263749898e-05, + "loss": 0.5402, + "step": 28455 + }, + { + "epoch": 0.4917058335637268, + "grad_norm": 0.47618391103453994, + "learning_rate": 1.0753971208392161e-05, + "loss": 0.6109, + "step": 28456 + }, + { + "epoch": 0.49172311307713573, + "grad_norm": 0.7255376814703495, + "learning_rate": 1.075341315067293e-05, + "loss": 0.3688, + "step": 28457 + }, + { + "epoch": 0.49174039259054464, + "grad_norm": 1.289308903790926, + "learning_rate": 1.0752855090593945e-05, + "loss": 0.4508, + "step": 28458 + }, + { + "epoch": 0.49175767210395355, + "grad_norm": 1.0224918916030974, + "learning_rate": 1.0752297028156966e-05, + "loss": 0.6476, + "step": 28459 + }, + { + "epoch": 0.49177495161736245, + "grad_norm": 0.6761485563339541, + "learning_rate": 1.0751738963363734e-05, + "loss": 0.3551, + "step": 28460 + }, + { + "epoch": 0.49179223113077136, + "grad_norm": 0.7355671277763814, + "learning_rate": 1.0751180896215997e-05, + "loss": 0.429, + "step": 28461 + }, + { + "epoch": 0.49180951064418027, + "grad_norm": 0.7588396968215788, + "learning_rate": 1.0750622826715504e-05, + "loss": 0.2278, + "step": 28462 + }, + { + "epoch": 0.4918267901575892, + "grad_norm": 1.1968207169482221, + "learning_rate": 1.0750064754864006e-05, + "loss": 0.6478, + "step": 28463 + }, + { + "epoch": 0.4918440696709981, + "grad_norm": 1.2288268686906945, + "learning_rate": 1.0749506680663244e-05, + "loss": 0.633, + "step": 28464 + }, + { + "epoch": 0.491861349184407, + "grad_norm": 1.6016216461396298, + "learning_rate": 1.0748948604114975e-05, + "loss": 0.435, + "step": 28465 + }, + { + "epoch": 0.49187862869781585, + "grad_norm": 0.7144276111439958, + "learning_rate": 1.0748390525220939e-05, + "loss": 0.4835, + "step": 28466 + }, + { + "epoch": 0.49189590821122475, + "grad_norm": 0.636067486550441, + "learning_rate": 1.074783244398289e-05, + "loss": 0.7963, + "step": 28467 + }, + { + "epoch": 0.49191318772463366, + "grad_norm": 0.9033735645994213, + "learning_rate": 1.0747274360402572e-05, + "loss": 0.4335, + "step": 28468 + }, + { + "epoch": 0.49193046723804257, + "grad_norm": 1.0176523734802359, + "learning_rate": 1.074671627448173e-05, + "loss": 0.4895, + "step": 28469 + }, + { + "epoch": 0.4919477467514515, + "grad_norm": 1.050066215790388, + "learning_rate": 1.074615818622212e-05, + "loss": 0.3755, + "step": 28470 + }, + { + "epoch": 0.4919650262648604, + "grad_norm": 0.6684936529302424, + "learning_rate": 1.0745600095625486e-05, + "loss": 0.3398, + "step": 28471 + }, + { + "epoch": 0.4919823057782693, + "grad_norm": 0.8764483237587072, + "learning_rate": 1.0745042002693573e-05, + "loss": 0.6452, + "step": 28472 + }, + { + "epoch": 0.4919995852916782, + "grad_norm": 0.7011565158305522, + "learning_rate": 1.0744483907428133e-05, + "loss": 0.3569, + "step": 28473 + }, + { + "epoch": 0.4920168648050871, + "grad_norm": 0.8088247810691093, + "learning_rate": 1.0743925809830914e-05, + "loss": 0.4372, + "step": 28474 + }, + { + "epoch": 0.492034144318496, + "grad_norm": 1.0308659258832475, + "learning_rate": 1.074336770990366e-05, + "loss": 0.4314, + "step": 28475 + }, + { + "epoch": 0.49205142383190487, + "grad_norm": 1.1390395810973601, + "learning_rate": 1.0742809607648126e-05, + "loss": 0.6867, + "step": 28476 + }, + { + "epoch": 0.4920687033453138, + "grad_norm": 1.1280555291810452, + "learning_rate": 1.0742251503066054e-05, + "loss": 0.4824, + "step": 28477 + }, + { + "epoch": 0.4920859828587227, + "grad_norm": 0.8936132555580504, + "learning_rate": 1.0741693396159193e-05, + "loss": 0.4642, + "step": 28478 + }, + { + "epoch": 0.4921032623721316, + "grad_norm": 0.680498444962186, + "learning_rate": 1.0741135286929294e-05, + "loss": 0.3781, + "step": 28479 + }, + { + "epoch": 0.4921205418855405, + "grad_norm": 1.2990977856735146, + "learning_rate": 1.0740577175378101e-05, + "loss": 0.4231, + "step": 28480 + }, + { + "epoch": 0.4921378213989494, + "grad_norm": 1.2486319240304882, + "learning_rate": 1.0740019061507363e-05, + "loss": 0.5868, + "step": 28481 + }, + { + "epoch": 0.4921551009123583, + "grad_norm": 1.1604605971802568, + "learning_rate": 1.0739460945318832e-05, + "loss": 0.4119, + "step": 28482 + }, + { + "epoch": 0.4921723804257672, + "grad_norm": 1.0717098476538547, + "learning_rate": 1.073890282681425e-05, + "loss": 0.3734, + "step": 28483 + }, + { + "epoch": 0.4921896599391761, + "grad_norm": 1.1620927201918212, + "learning_rate": 1.0738344705995369e-05, + "loss": 0.2008, + "step": 28484 + }, + { + "epoch": 0.49220693945258503, + "grad_norm": 1.1505251725784473, + "learning_rate": 1.0737786582863938e-05, + "loss": 0.5736, + "step": 28485 + }, + { + "epoch": 0.49222421896599394, + "grad_norm": 1.114704822764311, + "learning_rate": 1.0737228457421703e-05, + "loss": 0.3498, + "step": 28486 + }, + { + "epoch": 0.4922414984794028, + "grad_norm": 0.7016803040540383, + "learning_rate": 1.0736670329670413e-05, + "loss": 0.4187, + "step": 28487 + }, + { + "epoch": 0.4922587779928117, + "grad_norm": 0.7997986203148186, + "learning_rate": 1.0736112199611815e-05, + "loss": 0.3949, + "step": 28488 + }, + { + "epoch": 0.4922760575062206, + "grad_norm": 0.8964367782602453, + "learning_rate": 1.0735554067247655e-05, + "loss": 0.3429, + "step": 28489 + }, + { + "epoch": 0.4922933370196295, + "grad_norm": 1.4885908767872098, + "learning_rate": 1.0734995932579686e-05, + "loss": 0.5399, + "step": 28490 + }, + { + "epoch": 0.4923106165330384, + "grad_norm": 0.8494987473956033, + "learning_rate": 1.0734437795609653e-05, + "loss": 0.3683, + "step": 28491 + }, + { + "epoch": 0.49232789604644733, + "grad_norm": 0.7351852406011046, + "learning_rate": 1.0733879656339305e-05, + "loss": 0.3829, + "step": 28492 + }, + { + "epoch": 0.49234517555985624, + "grad_norm": 0.8327474838091744, + "learning_rate": 1.0733321514770391e-05, + "loss": 0.261, + "step": 28493 + }, + { + "epoch": 0.49236245507326515, + "grad_norm": 0.7827092755705954, + "learning_rate": 1.0732763370904659e-05, + "loss": 0.3365, + "step": 28494 + }, + { + "epoch": 0.49237973458667406, + "grad_norm": 2.1306675782368956, + "learning_rate": 1.0732205224743854e-05, + "loss": 0.4235, + "step": 28495 + }, + { + "epoch": 0.49239701410008296, + "grad_norm": 2.3967712383405364, + "learning_rate": 1.073164707628973e-05, + "loss": 0.5315, + "step": 28496 + }, + { + "epoch": 0.49241429361349187, + "grad_norm": 1.0031838440086323, + "learning_rate": 1.0731088925544033e-05, + "loss": 0.2376, + "step": 28497 + }, + { + "epoch": 0.4924315731269007, + "grad_norm": 0.9224053339842853, + "learning_rate": 1.0730530772508505e-05, + "loss": 0.4068, + "step": 28498 + }, + { + "epoch": 0.49244885264030963, + "grad_norm": 0.5481162625671822, + "learning_rate": 1.0729972617184901e-05, + "loss": 0.5829, + "step": 28499 + }, + { + "epoch": 0.49246613215371854, + "grad_norm": 0.7871565276039881, + "learning_rate": 1.0729414459574967e-05, + "loss": 0.4335, + "step": 28500 + }, + { + "epoch": 0.49248341166712745, + "grad_norm": 1.1172604045835244, + "learning_rate": 1.072885629968045e-05, + "loss": 0.4636, + "step": 28501 + }, + { + "epoch": 0.49250069118053635, + "grad_norm": 0.7837734544751807, + "learning_rate": 1.0728298137503103e-05, + "loss": 0.3617, + "step": 28502 + }, + { + "epoch": 0.49251797069394526, + "grad_norm": 0.7910518788016573, + "learning_rate": 1.0727739973044669e-05, + "loss": 0.3289, + "step": 28503 + }, + { + "epoch": 0.49253525020735417, + "grad_norm": 0.9876868482360297, + "learning_rate": 1.0727181806306896e-05, + "loss": 0.2024, + "step": 28504 + }, + { + "epoch": 0.4925525297207631, + "grad_norm": 1.2938802103280778, + "learning_rate": 1.0726623637291538e-05, + "loss": 0.6705, + "step": 28505 + }, + { + "epoch": 0.492569809234172, + "grad_norm": 1.1321190209867997, + "learning_rate": 1.072606546600034e-05, + "loss": 0.3753, + "step": 28506 + }, + { + "epoch": 0.4925870887475809, + "grad_norm": 0.9982630025188727, + "learning_rate": 1.0725507292435049e-05, + "loss": 0.3493, + "step": 28507 + }, + { + "epoch": 0.49260436826098974, + "grad_norm": 0.579570265369748, + "learning_rate": 1.0724949116597411e-05, + "loss": 0.2654, + "step": 28508 + }, + { + "epoch": 0.49262164777439865, + "grad_norm": 0.9055434079904162, + "learning_rate": 1.072439093848918e-05, + "loss": 0.4391, + "step": 28509 + }, + { + "epoch": 0.49263892728780756, + "grad_norm": 0.6162982104335829, + "learning_rate": 1.07238327581121e-05, + "loss": 0.8217, + "step": 28510 + }, + { + "epoch": 0.49265620680121647, + "grad_norm": 1.4699493452314836, + "learning_rate": 1.0723274575467923e-05, + "loss": 0.4504, + "step": 28511 + }, + { + "epoch": 0.4926734863146254, + "grad_norm": 1.1456664794578946, + "learning_rate": 1.0722716390558391e-05, + "loss": 0.4943, + "step": 28512 + }, + { + "epoch": 0.4926907658280343, + "grad_norm": 0.9416463316460335, + "learning_rate": 1.072215820338526e-05, + "loss": 0.5131, + "step": 28513 + }, + { + "epoch": 0.4927080453414432, + "grad_norm": 0.8896180712675583, + "learning_rate": 1.0721600013950274e-05, + "loss": 0.5217, + "step": 28514 + }, + { + "epoch": 0.4927253248548521, + "grad_norm": 1.6031671318712446, + "learning_rate": 1.072104182225518e-05, + "loss": 0.4004, + "step": 28515 + }, + { + "epoch": 0.492742604368261, + "grad_norm": 1.2921404515077914, + "learning_rate": 1.0720483628301732e-05, + "loss": 0.3005, + "step": 28516 + }, + { + "epoch": 0.4927598838816699, + "grad_norm": 0.9160925651413667, + "learning_rate": 1.0719925432091671e-05, + "loss": 0.3723, + "step": 28517 + }, + { + "epoch": 0.4927771633950788, + "grad_norm": 0.775357834730567, + "learning_rate": 1.0719367233626751e-05, + "loss": 0.4073, + "step": 28518 + }, + { + "epoch": 0.4927944429084877, + "grad_norm": 0.5253445770641174, + "learning_rate": 1.0718809032908718e-05, + "loss": 0.5941, + "step": 28519 + }, + { + "epoch": 0.4928117224218966, + "grad_norm": 0.8498128932289133, + "learning_rate": 1.0718250829939321e-05, + "loss": 0.4651, + "step": 28520 + }, + { + "epoch": 0.4928290019353055, + "grad_norm": 0.6530199463369354, + "learning_rate": 1.0717692624720304e-05, + "loss": 0.2861, + "step": 28521 + }, + { + "epoch": 0.4928462814487144, + "grad_norm": 1.613249978061479, + "learning_rate": 1.0717134417253423e-05, + "loss": 0.6653, + "step": 28522 + }, + { + "epoch": 0.4928635609621233, + "grad_norm": 1.0786861849474279, + "learning_rate": 1.071657620754042e-05, + "loss": 0.6477, + "step": 28523 + }, + { + "epoch": 0.4928808404755322, + "grad_norm": 1.4517388846993262, + "learning_rate": 1.0716017995583048e-05, + "loss": 0.4728, + "step": 28524 + }, + { + "epoch": 0.4928981199889411, + "grad_norm": 1.3783242948758758, + "learning_rate": 1.0715459781383053e-05, + "loss": 0.4144, + "step": 28525 + }, + { + "epoch": 0.49291539950235, + "grad_norm": 0.909394123832373, + "learning_rate": 1.0714901564942184e-05, + "loss": 0.491, + "step": 28526 + }, + { + "epoch": 0.49293267901575893, + "grad_norm": 1.181497275938674, + "learning_rate": 1.0714343346262186e-05, + "loss": 0.3653, + "step": 28527 + }, + { + "epoch": 0.49294995852916784, + "grad_norm": 1.1997413091100635, + "learning_rate": 1.0713785125344813e-05, + "loss": 0.4432, + "step": 28528 + }, + { + "epoch": 0.4929672380425767, + "grad_norm": 1.104140826910828, + "learning_rate": 1.071322690219181e-05, + "loss": 0.3609, + "step": 28529 + }, + { + "epoch": 0.4929845175559856, + "grad_norm": 1.151103483317344, + "learning_rate": 1.0712668676804926e-05, + "loss": 0.6201, + "step": 28530 + }, + { + "epoch": 0.4930017970693945, + "grad_norm": 1.0180622399777268, + "learning_rate": 1.0712110449185911e-05, + "loss": 0.4349, + "step": 28531 + }, + { + "epoch": 0.4930190765828034, + "grad_norm": 1.260943546510921, + "learning_rate": 1.071155221933651e-05, + "loss": 0.4093, + "step": 28532 + }, + { + "epoch": 0.4930363560962123, + "grad_norm": 1.1533058354531665, + "learning_rate": 1.0710993987258473e-05, + "loss": 0.5155, + "step": 28533 + }, + { + "epoch": 0.49305363560962123, + "grad_norm": 0.7553596849110941, + "learning_rate": 1.0710435752953549e-05, + "loss": 0.3215, + "step": 28534 + }, + { + "epoch": 0.49307091512303014, + "grad_norm": 1.1033361684600473, + "learning_rate": 1.0709877516423488e-05, + "loss": 0.4221, + "step": 28535 + }, + { + "epoch": 0.49308819463643905, + "grad_norm": 0.6352215928947618, + "learning_rate": 1.0709319277670036e-05, + "loss": 1.0577, + "step": 28536 + }, + { + "epoch": 0.49310547414984796, + "grad_norm": 1.51972036252861, + "learning_rate": 1.0708761036694941e-05, + "loss": 0.3862, + "step": 28537 + }, + { + "epoch": 0.49312275366325686, + "grad_norm": 1.0783552835358636, + "learning_rate": 1.0708202793499956e-05, + "loss": 0.4441, + "step": 28538 + }, + { + "epoch": 0.49314003317666577, + "grad_norm": 0.737225878278943, + "learning_rate": 1.0707644548086822e-05, + "loss": 0.392, + "step": 28539 + }, + { + "epoch": 0.4931573126900746, + "grad_norm": 0.7991029159621904, + "learning_rate": 1.0707086300457294e-05, + "loss": 0.3854, + "step": 28540 + }, + { + "epoch": 0.49317459220348353, + "grad_norm": 0.9657687505238401, + "learning_rate": 1.0706528050613117e-05, + "loss": 0.4539, + "step": 28541 + }, + { + "epoch": 0.49319187171689244, + "grad_norm": 0.8025708590534976, + "learning_rate": 1.070596979855604e-05, + "loss": 0.315, + "step": 28542 + }, + { + "epoch": 0.49320915123030135, + "grad_norm": 0.7049899815520329, + "learning_rate": 1.0705411544287811e-05, + "loss": 0.4453, + "step": 28543 + }, + { + "epoch": 0.49322643074371025, + "grad_norm": 0.6381493316413357, + "learning_rate": 1.0704853287810179e-05, + "loss": 0.2815, + "step": 28544 + }, + { + "epoch": 0.49324371025711916, + "grad_norm": 0.8508877982162361, + "learning_rate": 1.0704295029124896e-05, + "loss": 0.397, + "step": 28545 + }, + { + "epoch": 0.49326098977052807, + "grad_norm": 0.787843440731372, + "learning_rate": 1.0703736768233708e-05, + "loss": 0.3891, + "step": 28546 + }, + { + "epoch": 0.493278269283937, + "grad_norm": 1.1896418613311002, + "learning_rate": 1.0703178505138358e-05, + "loss": 0.6744, + "step": 28547 + }, + { + "epoch": 0.4932955487973459, + "grad_norm": 0.983059950633524, + "learning_rate": 1.0702620239840604e-05, + "loss": 0.4736, + "step": 28548 + }, + { + "epoch": 0.4933128283107548, + "grad_norm": 0.6947293121381057, + "learning_rate": 1.0702061972342189e-05, + "loss": 0.3063, + "step": 28549 + }, + { + "epoch": 0.49333010782416364, + "grad_norm": 0.37942283883173983, + "learning_rate": 1.0701503702644862e-05, + "loss": 0.5689, + "step": 28550 + }, + { + "epoch": 0.49334738733757255, + "grad_norm": 1.1040726190391899, + "learning_rate": 1.0700945430750373e-05, + "loss": 0.4395, + "step": 28551 + }, + { + "epoch": 0.49336466685098146, + "grad_norm": 0.8786997966573676, + "learning_rate": 1.0700387156660468e-05, + "loss": 0.696, + "step": 28552 + }, + { + "epoch": 0.49338194636439037, + "grad_norm": 0.9680868204254263, + "learning_rate": 1.06998288803769e-05, + "loss": 0.4864, + "step": 28553 + }, + { + "epoch": 0.4933992258777993, + "grad_norm": 0.7975863322029882, + "learning_rate": 1.0699270601901413e-05, + "loss": 0.4207, + "step": 28554 + }, + { + "epoch": 0.4934165053912082, + "grad_norm": 0.7241123914387418, + "learning_rate": 1.0698712321235754e-05, + "loss": 0.2856, + "step": 28555 + }, + { + "epoch": 0.4934337849046171, + "grad_norm": 1.0558683056281737, + "learning_rate": 1.0698154038381678e-05, + "loss": 0.4113, + "step": 28556 + }, + { + "epoch": 0.493451064418026, + "grad_norm": 0.8128149981811763, + "learning_rate": 1.0697595753340932e-05, + "loss": 0.3164, + "step": 28557 + }, + { + "epoch": 0.4934683439314349, + "grad_norm": 1.7558545844434028, + "learning_rate": 1.0697037466115259e-05, + "loss": 0.5136, + "step": 28558 + }, + { + "epoch": 0.4934856234448438, + "grad_norm": 1.138849234508428, + "learning_rate": 1.0696479176706419e-05, + "loss": 0.4147, + "step": 28559 + }, + { + "epoch": 0.4935029029582527, + "grad_norm": 0.6059059537492162, + "learning_rate": 1.0695920885116147e-05, + "loss": 0.804, + "step": 28560 + }, + { + "epoch": 0.4935201824716616, + "grad_norm": 0.905533380321215, + "learning_rate": 1.0695362591346198e-05, + "loss": 0.6555, + "step": 28561 + }, + { + "epoch": 0.4935374619850705, + "grad_norm": 0.8522911570836905, + "learning_rate": 1.0694804295398323e-05, + "loss": 0.4835, + "step": 28562 + }, + { + "epoch": 0.4935547414984794, + "grad_norm": 0.80248065572367, + "learning_rate": 1.069424599727427e-05, + "loss": 0.2449, + "step": 28563 + }, + { + "epoch": 0.4935720210118883, + "grad_norm": 1.0676321702027038, + "learning_rate": 1.0693687696975779e-05, + "loss": 0.3464, + "step": 28564 + }, + { + "epoch": 0.4935893005252972, + "grad_norm": 0.5193338646564283, + "learning_rate": 1.0693129394504611e-05, + "loss": 0.4437, + "step": 28565 + }, + { + "epoch": 0.4936065800387061, + "grad_norm": 0.5825137533014653, + "learning_rate": 1.0692571089862507e-05, + "loss": 0.2692, + "step": 28566 + }, + { + "epoch": 0.493623859552115, + "grad_norm": 0.8904331996395216, + "learning_rate": 1.0692012783051217e-05, + "loss": 0.3486, + "step": 28567 + }, + { + "epoch": 0.4936411390655239, + "grad_norm": 0.7070651462140344, + "learning_rate": 1.069145447407249e-05, + "loss": 0.3082, + "step": 28568 + }, + { + "epoch": 0.49365841857893283, + "grad_norm": 0.6139931130910257, + "learning_rate": 1.0690896162928079e-05, + "loss": 0.8263, + "step": 28569 + }, + { + "epoch": 0.49367569809234174, + "grad_norm": 0.9774410539006196, + "learning_rate": 1.0690337849619728e-05, + "loss": 0.483, + "step": 28570 + }, + { + "epoch": 0.49369297760575065, + "grad_norm": 0.8691574106096102, + "learning_rate": 1.0689779534149183e-05, + "loss": 0.4889, + "step": 28571 + }, + { + "epoch": 0.4937102571191595, + "grad_norm": 1.040798846564958, + "learning_rate": 1.0689221216518198e-05, + "loss": 0.4821, + "step": 28572 + }, + { + "epoch": 0.4937275366325684, + "grad_norm": 1.4859041340956514, + "learning_rate": 1.0688662896728519e-05, + "loss": 0.5335, + "step": 28573 + }, + { + "epoch": 0.4937448161459773, + "grad_norm": 1.0350599270185281, + "learning_rate": 1.0688104574781897e-05, + "loss": 0.4185, + "step": 28574 + }, + { + "epoch": 0.4937620956593862, + "grad_norm": 0.4456423415461856, + "learning_rate": 1.0687546250680076e-05, + "loss": 0.5363, + "step": 28575 + }, + { + "epoch": 0.49377937517279513, + "grad_norm": 0.910538641571071, + "learning_rate": 1.068698792442481e-05, + "loss": 0.3641, + "step": 28576 + }, + { + "epoch": 0.49379665468620404, + "grad_norm": 1.5500876413336133, + "learning_rate": 1.0686429596017846e-05, + "loss": 0.5185, + "step": 28577 + }, + { + "epoch": 0.49381393419961295, + "grad_norm": 1.1210855515049296, + "learning_rate": 1.0685871265460932e-05, + "loss": 0.4841, + "step": 28578 + }, + { + "epoch": 0.49383121371302185, + "grad_norm": 0.6099842716859233, + "learning_rate": 1.0685312932755816e-05, + "loss": 0.4397, + "step": 28579 + }, + { + "epoch": 0.49384849322643076, + "grad_norm": 1.2765902992083835, + "learning_rate": 1.0684754597904253e-05, + "loss": 0.3396, + "step": 28580 + }, + { + "epoch": 0.49386577273983967, + "grad_norm": 1.5154846915921072, + "learning_rate": 1.068419626090798e-05, + "loss": 0.3538, + "step": 28581 + }, + { + "epoch": 0.4938830522532485, + "grad_norm": 1.0014020325384902, + "learning_rate": 1.0683637921768755e-05, + "loss": 0.2483, + "step": 28582 + }, + { + "epoch": 0.49390033176665743, + "grad_norm": 1.0980624122410367, + "learning_rate": 1.0683079580488325e-05, + "loss": 0.4046, + "step": 28583 + }, + { + "epoch": 0.49391761128006634, + "grad_norm": 0.856666431716411, + "learning_rate": 1.0682521237068436e-05, + "loss": 0.4086, + "step": 28584 + }, + { + "epoch": 0.49393489079347525, + "grad_norm": 0.9209583767109334, + "learning_rate": 1.0681962891510837e-05, + "loss": 0.5524, + "step": 28585 + }, + { + "epoch": 0.49395217030688415, + "grad_norm": 1.067025621081535, + "learning_rate": 1.0681404543817282e-05, + "loss": 0.5608, + "step": 28586 + }, + { + "epoch": 0.49396944982029306, + "grad_norm": 1.0422319966246771, + "learning_rate": 1.0680846193989514e-05, + "loss": 0.6397, + "step": 28587 + }, + { + "epoch": 0.49398672933370197, + "grad_norm": 0.8260120993846493, + "learning_rate": 1.0680287842029284e-05, + "loss": 0.4342, + "step": 28588 + }, + { + "epoch": 0.4940040088471109, + "grad_norm": 0.8635153435462557, + "learning_rate": 1.0679729487938343e-05, + "loss": 0.3858, + "step": 28589 + }, + { + "epoch": 0.4940212883605198, + "grad_norm": 1.122834617262275, + "learning_rate": 1.0679171131718438e-05, + "loss": 0.4339, + "step": 28590 + }, + { + "epoch": 0.4940385678739287, + "grad_norm": 0.8169955203139103, + "learning_rate": 1.0678612773371316e-05, + "loss": 0.545, + "step": 28591 + }, + { + "epoch": 0.4940558473873376, + "grad_norm": 0.6113293824176035, + "learning_rate": 1.0678054412898724e-05, + "loss": 0.3598, + "step": 28592 + }, + { + "epoch": 0.49407312690074645, + "grad_norm": 0.8449618814046383, + "learning_rate": 1.0677496050302418e-05, + "loss": 0.2579, + "step": 28593 + }, + { + "epoch": 0.49409040641415536, + "grad_norm": 1.3463276313257735, + "learning_rate": 1.0676937685584141e-05, + "loss": 0.3461, + "step": 28594 + }, + { + "epoch": 0.49410768592756427, + "grad_norm": 1.1386782259688402, + "learning_rate": 1.0676379318745644e-05, + "loss": 0.41, + "step": 28595 + }, + { + "epoch": 0.4941249654409732, + "grad_norm": 1.1110068626752982, + "learning_rate": 1.0675820949788676e-05, + "loss": 0.6278, + "step": 28596 + }, + { + "epoch": 0.4941422449543821, + "grad_norm": 1.2123432261858906, + "learning_rate": 1.0675262578714987e-05, + "loss": 0.3772, + "step": 28597 + }, + { + "epoch": 0.494159524467791, + "grad_norm": 0.9848959480726454, + "learning_rate": 1.0674704205526321e-05, + "loss": 0.4192, + "step": 28598 + }, + { + "epoch": 0.4941768039811999, + "grad_norm": 1.044026054207821, + "learning_rate": 1.0674145830224432e-05, + "loss": 0.4965, + "step": 28599 + }, + { + "epoch": 0.4941940834946088, + "grad_norm": 0.724757430493793, + "learning_rate": 1.067358745281107e-05, + "loss": 0.6327, + "step": 28600 + }, + { + "epoch": 0.4942113630080177, + "grad_norm": 0.7888542071502805, + "learning_rate": 1.0673029073287978e-05, + "loss": 0.4201, + "step": 28601 + }, + { + "epoch": 0.4942286425214266, + "grad_norm": 1.1545562373911298, + "learning_rate": 1.0672470691656906e-05, + "loss": 0.4686, + "step": 28602 + }, + { + "epoch": 0.49424592203483547, + "grad_norm": 1.4161493396952967, + "learning_rate": 1.0671912307919607e-05, + "loss": 0.6397, + "step": 28603 + }, + { + "epoch": 0.4942632015482444, + "grad_norm": 1.0661769958598353, + "learning_rate": 1.0671353922077826e-05, + "loss": 0.4179, + "step": 28604 + }, + { + "epoch": 0.4942804810616533, + "grad_norm": 0.5933919355696969, + "learning_rate": 1.0670795534133314e-05, + "loss": 0.6877, + "step": 28605 + }, + { + "epoch": 0.4942977605750622, + "grad_norm": 1.0806952406820491, + "learning_rate": 1.067023714408782e-05, + "loss": 0.3482, + "step": 28606 + }, + { + "epoch": 0.4943150400884711, + "grad_norm": 0.9079710571659283, + "learning_rate": 1.0669678751943091e-05, + "loss": 0.5198, + "step": 28607 + }, + { + "epoch": 0.49433231960188, + "grad_norm": 0.9276351158911326, + "learning_rate": 1.066912035770088e-05, + "loss": 0.4694, + "step": 28608 + }, + { + "epoch": 0.4943495991152889, + "grad_norm": 0.732459130549926, + "learning_rate": 1.0668561961362932e-05, + "loss": 0.2098, + "step": 28609 + }, + { + "epoch": 0.4943668786286978, + "grad_norm": 0.8944412969962855, + "learning_rate": 1.0668003562930997e-05, + "loss": 0.3131, + "step": 28610 + }, + { + "epoch": 0.49438415814210673, + "grad_norm": 1.1288990113401691, + "learning_rate": 1.0667445162406825e-05, + "loss": 0.4074, + "step": 28611 + }, + { + "epoch": 0.49440143765551564, + "grad_norm": 0.7940496635810596, + "learning_rate": 1.0666886759792158e-05, + "loss": 0.4398, + "step": 28612 + }, + { + "epoch": 0.49441871716892455, + "grad_norm": 1.296878315512072, + "learning_rate": 1.0666328355088758e-05, + "loss": 0.4402, + "step": 28613 + }, + { + "epoch": 0.4944359966823334, + "grad_norm": 0.8369274778214562, + "learning_rate": 1.0665769948298366e-05, + "loss": 0.3055, + "step": 28614 + }, + { + "epoch": 0.4944532761957423, + "grad_norm": 1.2330400833371382, + "learning_rate": 1.0665211539422728e-05, + "loss": 0.6249, + "step": 28615 + }, + { + "epoch": 0.4944705557091512, + "grad_norm": 0.93838378744434, + "learning_rate": 1.06646531284636e-05, + "loss": 0.5117, + "step": 28616 + }, + { + "epoch": 0.4944878352225601, + "grad_norm": 1.0410974349154656, + "learning_rate": 1.0664094715422728e-05, + "loss": 0.2865, + "step": 28617 + }, + { + "epoch": 0.49450511473596903, + "grad_norm": 1.2417975395141425, + "learning_rate": 1.066353630030186e-05, + "loss": 0.5489, + "step": 28618 + }, + { + "epoch": 0.49452239424937794, + "grad_norm": 0.7833191331291808, + "learning_rate": 1.0662977883102746e-05, + "loss": 0.302, + "step": 28619 + }, + { + "epoch": 0.49453967376278685, + "grad_norm": 0.8739702041942832, + "learning_rate": 1.0662419463827136e-05, + "loss": 0.4432, + "step": 28620 + }, + { + "epoch": 0.49455695327619575, + "grad_norm": 1.6571219252586509, + "learning_rate": 1.0661861042476778e-05, + "loss": 0.3994, + "step": 28621 + }, + { + "epoch": 0.49457423278960466, + "grad_norm": 0.7055037211614819, + "learning_rate": 1.066130261905342e-05, + "loss": 0.6375, + "step": 28622 + }, + { + "epoch": 0.49459151230301357, + "grad_norm": 0.7751396949022004, + "learning_rate": 1.0660744193558812e-05, + "loss": 0.2992, + "step": 28623 + }, + { + "epoch": 0.4946087918164225, + "grad_norm": 0.8800249832924192, + "learning_rate": 1.0660185765994702e-05, + "loss": 0.4087, + "step": 28624 + }, + { + "epoch": 0.49462607132983133, + "grad_norm": 1.5405169421174194, + "learning_rate": 1.065962733636284e-05, + "loss": 0.4943, + "step": 28625 + }, + { + "epoch": 0.49464335084324024, + "grad_norm": 0.8238756384896032, + "learning_rate": 1.0659068904664978e-05, + "loss": 0.4049, + "step": 28626 + }, + { + "epoch": 0.49466063035664914, + "grad_norm": 0.9972171704955753, + "learning_rate": 1.0658510470902858e-05, + "loss": 0.3245, + "step": 28627 + }, + { + "epoch": 0.49467790987005805, + "grad_norm": 1.1555772971923566, + "learning_rate": 1.0657952035078235e-05, + "loss": 0.5891, + "step": 28628 + }, + { + "epoch": 0.49469518938346696, + "grad_norm": 0.8020497688083398, + "learning_rate": 1.0657393597192858e-05, + "loss": 0.3289, + "step": 28629 + }, + { + "epoch": 0.49471246889687587, + "grad_norm": 0.6292124374576228, + "learning_rate": 1.065683515724847e-05, + "loss": 0.2653, + "step": 28630 + }, + { + "epoch": 0.4947297484102848, + "grad_norm": 0.7732081060072101, + "learning_rate": 1.0656276715246829e-05, + "loss": 0.4196, + "step": 28631 + }, + { + "epoch": 0.4947470279236937, + "grad_norm": 1.7701588248314466, + "learning_rate": 1.0655718271189676e-05, + "loss": 0.4151, + "step": 28632 + }, + { + "epoch": 0.4947643074371026, + "grad_norm": 0.7358877498629327, + "learning_rate": 1.0655159825078767e-05, + "loss": 0.7281, + "step": 28633 + }, + { + "epoch": 0.4947815869505115, + "grad_norm": 0.6026552833971303, + "learning_rate": 1.0654601376915845e-05, + "loss": 0.5893, + "step": 28634 + }, + { + "epoch": 0.49479886646392035, + "grad_norm": 1.4062900454623655, + "learning_rate": 1.065404292670266e-05, + "loss": 0.6427, + "step": 28635 + }, + { + "epoch": 0.49481614597732926, + "grad_norm": 1.004764805339812, + "learning_rate": 1.0653484474440967e-05, + "loss": 0.3669, + "step": 28636 + }, + { + "epoch": 0.49483342549073817, + "grad_norm": 1.0112974278412814, + "learning_rate": 1.065292602013251e-05, + "loss": 0.5283, + "step": 28637 + }, + { + "epoch": 0.4948507050041471, + "grad_norm": 1.2511988377120766, + "learning_rate": 1.0652367563779037e-05, + "loss": 0.561, + "step": 28638 + }, + { + "epoch": 0.494867984517556, + "grad_norm": 0.7772442770449316, + "learning_rate": 1.06518091053823e-05, + "loss": 0.4316, + "step": 28639 + }, + { + "epoch": 0.4948852640309649, + "grad_norm": 1.0611154223736807, + "learning_rate": 1.065125064494405e-05, + "loss": 0.4099, + "step": 28640 + }, + { + "epoch": 0.4949025435443738, + "grad_norm": 1.2151131015341357, + "learning_rate": 1.0650692182466031e-05, + "loss": 0.4507, + "step": 28641 + }, + { + "epoch": 0.4949198230577827, + "grad_norm": 0.9695123036896114, + "learning_rate": 1.0650133717949995e-05, + "loss": 0.5233, + "step": 28642 + }, + { + "epoch": 0.4949371025711916, + "grad_norm": 1.2868109823706597, + "learning_rate": 1.0649575251397692e-05, + "loss": 0.4998, + "step": 28643 + }, + { + "epoch": 0.4949543820846005, + "grad_norm": 0.8269749466169865, + "learning_rate": 1.0649016782810868e-05, + "loss": 0.4119, + "step": 28644 + }, + { + "epoch": 0.4949716615980094, + "grad_norm": 1.0101729539211395, + "learning_rate": 1.0648458312191274e-05, + "loss": 0.4627, + "step": 28645 + }, + { + "epoch": 0.4949889411114183, + "grad_norm": 0.7639963995330121, + "learning_rate": 1.064789983954066e-05, + "loss": 0.3654, + "step": 28646 + }, + { + "epoch": 0.4950062206248272, + "grad_norm": 1.4470120430458253, + "learning_rate": 1.0647341364860775e-05, + "loss": 0.3361, + "step": 28647 + }, + { + "epoch": 0.4950235001382361, + "grad_norm": 1.1290174675756408, + "learning_rate": 1.0646782888153366e-05, + "loss": 0.493, + "step": 28648 + }, + { + "epoch": 0.495040779651645, + "grad_norm": 0.7709224684792795, + "learning_rate": 1.0646224409420187e-05, + "loss": 0.4537, + "step": 28649 + }, + { + "epoch": 0.4950580591650539, + "grad_norm": 1.0295950158754787, + "learning_rate": 1.064566592866298e-05, + "loss": 0.5433, + "step": 28650 + }, + { + "epoch": 0.4950753386784628, + "grad_norm": 0.932896776690795, + "learning_rate": 1.0645107445883502e-05, + "loss": 0.3568, + "step": 28651 + }, + { + "epoch": 0.4950926181918717, + "grad_norm": 0.8703937559596449, + "learning_rate": 1.0644548961083497e-05, + "loss": 0.4127, + "step": 28652 + }, + { + "epoch": 0.49510989770528063, + "grad_norm": 1.1484850609928965, + "learning_rate": 1.0643990474264716e-05, + "loss": 0.3005, + "step": 28653 + }, + { + "epoch": 0.49512717721868954, + "grad_norm": 1.1556969440058895, + "learning_rate": 1.0643431985428907e-05, + "loss": 0.365, + "step": 28654 + }, + { + "epoch": 0.49514445673209845, + "grad_norm": 1.1687852057048391, + "learning_rate": 1.0642873494577819e-05, + "loss": 0.3862, + "step": 28655 + }, + { + "epoch": 0.4951617362455073, + "grad_norm": 1.011410655497132, + "learning_rate": 1.0642315001713206e-05, + "loss": 0.3741, + "step": 28656 + }, + { + "epoch": 0.4951790157589162, + "grad_norm": 1.7904524705108031, + "learning_rate": 1.0641756506836811e-05, + "loss": 0.3688, + "step": 28657 + }, + { + "epoch": 0.4951962952723251, + "grad_norm": 1.0692658120921152, + "learning_rate": 1.0641198009950384e-05, + "loss": 0.2794, + "step": 28658 + }, + { + "epoch": 0.495213574785734, + "grad_norm": 1.0135921166073454, + "learning_rate": 1.064063951105568e-05, + "loss": 0.389, + "step": 28659 + }, + { + "epoch": 0.49523085429914293, + "grad_norm": 0.9585926544261404, + "learning_rate": 1.0640081010154444e-05, + "loss": 0.3691, + "step": 28660 + }, + { + "epoch": 0.49524813381255184, + "grad_norm": 1.055481977537712, + "learning_rate": 1.063952250724842e-05, + "loss": 0.5788, + "step": 28661 + }, + { + "epoch": 0.49526541332596075, + "grad_norm": 0.8649165773837019, + "learning_rate": 1.0638964002339372e-05, + "loss": 0.2488, + "step": 28662 + }, + { + "epoch": 0.49528269283936965, + "grad_norm": 1.006516973333382, + "learning_rate": 1.0638405495429036e-05, + "loss": 0.4281, + "step": 28663 + }, + { + "epoch": 0.49529997235277856, + "grad_norm": 0.9882401908613736, + "learning_rate": 1.0637846986519163e-05, + "loss": 0.4389, + "step": 28664 + }, + { + "epoch": 0.49531725186618747, + "grad_norm": 0.8779482797440099, + "learning_rate": 1.0637288475611507e-05, + "loss": 0.3943, + "step": 28665 + }, + { + "epoch": 0.4953345313795964, + "grad_norm": 0.7922499056614332, + "learning_rate": 1.0636729962707817e-05, + "loss": 0.4736, + "step": 28666 + }, + { + "epoch": 0.49535181089300523, + "grad_norm": 0.7898556421280079, + "learning_rate": 1.0636171447809835e-05, + "loss": 0.4862, + "step": 28667 + }, + { + "epoch": 0.49536909040641414, + "grad_norm": 1.1113005795349153, + "learning_rate": 1.0635612930919319e-05, + "loss": 0.5203, + "step": 28668 + }, + { + "epoch": 0.49538636991982304, + "grad_norm": 0.825872039440102, + "learning_rate": 1.0635054412038017e-05, + "loss": 0.4511, + "step": 28669 + }, + { + "epoch": 0.49540364943323195, + "grad_norm": 0.8961687639835944, + "learning_rate": 1.0634495891167671e-05, + "loss": 0.5936, + "step": 28670 + }, + { + "epoch": 0.49542092894664086, + "grad_norm": 0.525629274615509, + "learning_rate": 1.0633937368310039e-05, + "loss": 0.3195, + "step": 28671 + }, + { + "epoch": 0.49543820846004977, + "grad_norm": 0.6549802052757363, + "learning_rate": 1.0633378843466868e-05, + "loss": 0.5738, + "step": 28672 + }, + { + "epoch": 0.4954554879734587, + "grad_norm": 0.9679007229251121, + "learning_rate": 1.0632820316639907e-05, + "loss": 0.3803, + "step": 28673 + }, + { + "epoch": 0.4954727674868676, + "grad_norm": 1.0811059895256019, + "learning_rate": 1.0632261787830902e-05, + "loss": 0.3631, + "step": 28674 + }, + { + "epoch": 0.4954900470002765, + "grad_norm": 1.0465981999621834, + "learning_rate": 1.0631703257041606e-05, + "loss": 0.4169, + "step": 28675 + }, + { + "epoch": 0.4955073265136854, + "grad_norm": 1.1176971105065154, + "learning_rate": 1.0631144724273766e-05, + "loss": 0.3889, + "step": 28676 + }, + { + "epoch": 0.49552460602709425, + "grad_norm": 0.8332317756450078, + "learning_rate": 1.0630586189529136e-05, + "loss": 0.4029, + "step": 28677 + }, + { + "epoch": 0.49554188554050316, + "grad_norm": 1.0113706985386162, + "learning_rate": 1.0630027652809458e-05, + "loss": 0.4113, + "step": 28678 + }, + { + "epoch": 0.49555916505391207, + "grad_norm": 1.016842365479419, + "learning_rate": 1.0629469114116489e-05, + "loss": 0.5504, + "step": 28679 + }, + { + "epoch": 0.495576444567321, + "grad_norm": 1.0068757584627217, + "learning_rate": 1.0628910573451971e-05, + "loss": 0.5003, + "step": 28680 + }, + { + "epoch": 0.4955937240807299, + "grad_norm": 1.1674985359238543, + "learning_rate": 1.0628352030817659e-05, + "loss": 0.3667, + "step": 28681 + }, + { + "epoch": 0.4956110035941388, + "grad_norm": 1.1242192631076924, + "learning_rate": 1.0627793486215303e-05, + "loss": 0.4863, + "step": 28682 + }, + { + "epoch": 0.4956282831075477, + "grad_norm": 1.4542476070069241, + "learning_rate": 1.062723493964665e-05, + "loss": 0.3964, + "step": 28683 + }, + { + "epoch": 0.4956455626209566, + "grad_norm": 1.245480870158069, + "learning_rate": 1.0626676391113449e-05, + "loss": 0.4436, + "step": 28684 + }, + { + "epoch": 0.4956628421343655, + "grad_norm": 1.2147837405823088, + "learning_rate": 1.062611784061745e-05, + "loss": 0.3327, + "step": 28685 + }, + { + "epoch": 0.4956801216477744, + "grad_norm": 1.6681628114702607, + "learning_rate": 1.06255592881604e-05, + "loss": 0.4185, + "step": 28686 + }, + { + "epoch": 0.4956974011611833, + "grad_norm": 1.0755024868097889, + "learning_rate": 1.0625000733744051e-05, + "loss": 0.525, + "step": 28687 + }, + { + "epoch": 0.4957146806745922, + "grad_norm": 1.2406403166726514, + "learning_rate": 1.0624442177370154e-05, + "loss": 0.3568, + "step": 28688 + }, + { + "epoch": 0.4957319601880011, + "grad_norm": 0.7605878975536138, + "learning_rate": 1.0623883619040455e-05, + "loss": 0.6617, + "step": 28689 + }, + { + "epoch": 0.49574923970141, + "grad_norm": 1.171921577414007, + "learning_rate": 1.0623325058756706e-05, + "loss": 0.6439, + "step": 28690 + }, + { + "epoch": 0.4957665192148189, + "grad_norm": 1.2512904594665717, + "learning_rate": 1.0622766496520656e-05, + "loss": 0.5932, + "step": 28691 + }, + { + "epoch": 0.4957837987282278, + "grad_norm": 1.0893055294279248, + "learning_rate": 1.0622207932334055e-05, + "loss": 0.4723, + "step": 28692 + }, + { + "epoch": 0.4958010782416367, + "grad_norm": 1.132859206452261, + "learning_rate": 1.062164936619865e-05, + "loss": 0.58, + "step": 28693 + }, + { + "epoch": 0.4958183577550456, + "grad_norm": 1.6585864409410827, + "learning_rate": 1.0621090798116191e-05, + "loss": 0.5574, + "step": 28694 + }, + { + "epoch": 0.49583563726845453, + "grad_norm": 1.2670438797173014, + "learning_rate": 1.062053222808843e-05, + "loss": 0.4713, + "step": 28695 + }, + { + "epoch": 0.49585291678186344, + "grad_norm": 0.9967473060831693, + "learning_rate": 1.0619973656117113e-05, + "loss": 0.4195, + "step": 28696 + }, + { + "epoch": 0.49587019629527235, + "grad_norm": 0.9730679448437882, + "learning_rate": 1.0619415082203995e-05, + "loss": 0.3643, + "step": 28697 + }, + { + "epoch": 0.49588747580868126, + "grad_norm": 0.8788000880584196, + "learning_rate": 1.0618856506350816e-05, + "loss": 0.561, + "step": 28698 + }, + { + "epoch": 0.4959047553220901, + "grad_norm": 1.0616321250424432, + "learning_rate": 1.0618297928559336e-05, + "loss": 0.4462, + "step": 28699 + }, + { + "epoch": 0.495922034835499, + "grad_norm": 0.8054151894507015, + "learning_rate": 1.06177393488313e-05, + "loss": 0.4332, + "step": 28700 + }, + { + "epoch": 0.4959393143489079, + "grad_norm": 1.16428863191257, + "learning_rate": 1.0617180767168453e-05, + "loss": 0.3863, + "step": 28701 + }, + { + "epoch": 0.49595659386231683, + "grad_norm": 0.8934671444422478, + "learning_rate": 1.0616622183572554e-05, + "loss": 0.4932, + "step": 28702 + }, + { + "epoch": 0.49597387337572574, + "grad_norm": 1.409652564791948, + "learning_rate": 1.0616063598045348e-05, + "loss": 0.5205, + "step": 28703 + }, + { + "epoch": 0.49599115288913465, + "grad_norm": 0.8577187055421093, + "learning_rate": 1.061550501058858e-05, + "loss": 0.3832, + "step": 28704 + }, + { + "epoch": 0.49600843240254355, + "grad_norm": 1.4229489302044638, + "learning_rate": 1.0614946421204006e-05, + "loss": 0.4977, + "step": 28705 + }, + { + "epoch": 0.49602571191595246, + "grad_norm": 1.4195836116720446, + "learning_rate": 1.0614387829893373e-05, + "loss": 0.4284, + "step": 28706 + }, + { + "epoch": 0.49604299142936137, + "grad_norm": 0.9256669559363534, + "learning_rate": 1.0613829236658428e-05, + "loss": 0.5826, + "step": 28707 + }, + { + "epoch": 0.4960602709427703, + "grad_norm": 1.2810331526853154, + "learning_rate": 1.0613270641500926e-05, + "loss": 0.354, + "step": 28708 + }, + { + "epoch": 0.49607755045617913, + "grad_norm": 0.9991692295862836, + "learning_rate": 1.0612712044422613e-05, + "loss": 0.5978, + "step": 28709 + }, + { + "epoch": 0.49609482996958804, + "grad_norm": 0.9232500731024743, + "learning_rate": 1.0612153445425238e-05, + "loss": 0.5013, + "step": 28710 + }, + { + "epoch": 0.49611210948299694, + "grad_norm": 0.8297123911530867, + "learning_rate": 1.0611594844510553e-05, + "loss": 0.5655, + "step": 28711 + }, + { + "epoch": 0.49612938899640585, + "grad_norm": 1.1017486852512837, + "learning_rate": 1.0611036241680308e-05, + "loss": 0.6919, + "step": 28712 + }, + { + "epoch": 0.49614666850981476, + "grad_norm": 0.9565452272719857, + "learning_rate": 1.061047763693625e-05, + "loss": 0.6011, + "step": 28713 + }, + { + "epoch": 0.49616394802322367, + "grad_norm": 1.0484090187255966, + "learning_rate": 1.0609919030280129e-05, + "loss": 0.3895, + "step": 28714 + }, + { + "epoch": 0.4961812275366326, + "grad_norm": 1.0163123871571487, + "learning_rate": 1.0609360421713695e-05, + "loss": 0.3811, + "step": 28715 + }, + { + "epoch": 0.4961985070500415, + "grad_norm": 0.8754881744744414, + "learning_rate": 1.0608801811238699e-05, + "loss": 0.3954, + "step": 28716 + }, + { + "epoch": 0.4962157865634504, + "grad_norm": 1.0612738642288135, + "learning_rate": 1.0608243198856889e-05, + "loss": 0.4605, + "step": 28717 + }, + { + "epoch": 0.4962330660768593, + "grad_norm": 0.6946615928350789, + "learning_rate": 1.0607684584570013e-05, + "loss": 0.4139, + "step": 28718 + }, + { + "epoch": 0.4962503455902682, + "grad_norm": 0.7625470052203821, + "learning_rate": 1.0607125968379826e-05, + "loss": 0.4748, + "step": 28719 + }, + { + "epoch": 0.49626762510367706, + "grad_norm": 0.8645939994757926, + "learning_rate": 1.0606567350288072e-05, + "loss": 0.5346, + "step": 28720 + }, + { + "epoch": 0.49628490461708596, + "grad_norm": 1.11428317372177, + "learning_rate": 1.0606008730296501e-05, + "loss": 0.4537, + "step": 28721 + }, + { + "epoch": 0.4963021841304949, + "grad_norm": 0.7778589564433546, + "learning_rate": 1.060545010840687e-05, + "loss": 0.5458, + "step": 28722 + }, + { + "epoch": 0.4963194636439038, + "grad_norm": 1.0372949345812035, + "learning_rate": 1.060489148462092e-05, + "loss": 0.4344, + "step": 28723 + }, + { + "epoch": 0.4963367431573127, + "grad_norm": 0.8390783093732522, + "learning_rate": 1.0604332858940406e-05, + "loss": 0.4373, + "step": 28724 + }, + { + "epoch": 0.4963540226707216, + "grad_norm": 0.6448265382740377, + "learning_rate": 1.0603774231367074e-05, + "loss": 0.2751, + "step": 28725 + }, + { + "epoch": 0.4963713021841305, + "grad_norm": 0.561516324515557, + "learning_rate": 1.0603215601902676e-05, + "loss": 0.3947, + "step": 28726 + }, + { + "epoch": 0.4963885816975394, + "grad_norm": 1.1516516471309202, + "learning_rate": 1.060265697054896e-05, + "loss": 0.401, + "step": 28727 + }, + { + "epoch": 0.4964058612109483, + "grad_norm": 0.586461966140464, + "learning_rate": 1.0602098337307675e-05, + "loss": 0.2376, + "step": 28728 + }, + { + "epoch": 0.4964231407243572, + "grad_norm": 0.8591439026963247, + "learning_rate": 1.0601539702180576e-05, + "loss": 0.4141, + "step": 28729 + }, + { + "epoch": 0.4964404202377661, + "grad_norm": 1.1336203499440212, + "learning_rate": 1.0600981065169407e-05, + "loss": 0.4223, + "step": 28730 + }, + { + "epoch": 0.496457699751175, + "grad_norm": 0.8709678692427477, + "learning_rate": 1.0600422426275918e-05, + "loss": 0.4995, + "step": 28731 + }, + { + "epoch": 0.4964749792645839, + "grad_norm": 1.2489051605825785, + "learning_rate": 1.0599863785501862e-05, + "loss": 0.6293, + "step": 28732 + }, + { + "epoch": 0.4964922587779928, + "grad_norm": 0.8611237334915252, + "learning_rate": 1.0599305142848985e-05, + "loss": 0.626, + "step": 28733 + }, + { + "epoch": 0.4965095382914017, + "grad_norm": 0.4899965724197024, + "learning_rate": 1.0598746498319046e-05, + "loss": 0.6289, + "step": 28734 + }, + { + "epoch": 0.4965268178048106, + "grad_norm": 1.0325254960181336, + "learning_rate": 1.0598187851913779e-05, + "loss": 0.5065, + "step": 28735 + }, + { + "epoch": 0.4965440973182195, + "grad_norm": 0.970887962999722, + "learning_rate": 1.0597629203634948e-05, + "loss": 0.5895, + "step": 28736 + }, + { + "epoch": 0.49656137683162843, + "grad_norm": 1.1364710757254577, + "learning_rate": 1.0597070553484294e-05, + "loss": 0.3422, + "step": 28737 + }, + { + "epoch": 0.49657865634503734, + "grad_norm": 1.1179296808107688, + "learning_rate": 1.0596511901463568e-05, + "loss": 0.5121, + "step": 28738 + }, + { + "epoch": 0.49659593585844625, + "grad_norm": 0.49165895236509843, + "learning_rate": 1.0595953247574525e-05, + "loss": 0.5988, + "step": 28739 + }, + { + "epoch": 0.49661321537185515, + "grad_norm": 0.8033320604676742, + "learning_rate": 1.0595394591818909e-05, + "loss": 0.3562, + "step": 28740 + }, + { + "epoch": 0.496630494885264, + "grad_norm": 0.48619621192236995, + "learning_rate": 1.0594835934198472e-05, + "loss": 0.605, + "step": 28741 + }, + { + "epoch": 0.4966477743986729, + "grad_norm": 0.7183999983131208, + "learning_rate": 1.0594277274714963e-05, + "loss": 0.626, + "step": 28742 + }, + { + "epoch": 0.4966650539120818, + "grad_norm": 1.0959313103987591, + "learning_rate": 1.0593718613370134e-05, + "loss": 0.5318, + "step": 28743 + }, + { + "epoch": 0.49668233342549073, + "grad_norm": 1.1941617643333096, + "learning_rate": 1.0593159950165734e-05, + "loss": 0.4975, + "step": 28744 + }, + { + "epoch": 0.49669961293889964, + "grad_norm": 0.900120861099617, + "learning_rate": 1.059260128510351e-05, + "loss": 0.5194, + "step": 28745 + }, + { + "epoch": 0.49671689245230854, + "grad_norm": 1.144602604609349, + "learning_rate": 1.0592042618185217e-05, + "loss": 0.3267, + "step": 28746 + }, + { + "epoch": 0.49673417196571745, + "grad_norm": 1.23181056262199, + "learning_rate": 1.0591483949412595e-05, + "loss": 0.5891, + "step": 28747 + }, + { + "epoch": 0.49675145147912636, + "grad_norm": 0.7554145520868955, + "learning_rate": 1.0590925278787405e-05, + "loss": 0.3322, + "step": 28748 + }, + { + "epoch": 0.49676873099253527, + "grad_norm": 1.629042276723655, + "learning_rate": 1.0590366606311393e-05, + "loss": 0.6186, + "step": 28749 + }, + { + "epoch": 0.4967860105059442, + "grad_norm": 1.0037527957319392, + "learning_rate": 1.0589807931986304e-05, + "loss": 0.4026, + "step": 28750 + }, + { + "epoch": 0.496803290019353, + "grad_norm": 1.2241815022651052, + "learning_rate": 1.0589249255813893e-05, + "loss": 0.3358, + "step": 28751 + }, + { + "epoch": 0.49682056953276194, + "grad_norm": 0.968390679509681, + "learning_rate": 1.058869057779591e-05, + "loss": 0.2965, + "step": 28752 + }, + { + "epoch": 0.49683784904617084, + "grad_norm": 0.8397518424749173, + "learning_rate": 1.0588131897934102e-05, + "loss": 0.3322, + "step": 28753 + }, + { + "epoch": 0.49685512855957975, + "grad_norm": 1.0723243252606476, + "learning_rate": 1.0587573216230223e-05, + "loss": 0.3122, + "step": 28754 + }, + { + "epoch": 0.49687240807298866, + "grad_norm": 0.9934213282068368, + "learning_rate": 1.0587014532686017e-05, + "loss": 0.6481, + "step": 28755 + }, + { + "epoch": 0.49688968758639757, + "grad_norm": 0.5392088367964526, + "learning_rate": 1.0586455847303238e-05, + "loss": 0.3416, + "step": 28756 + }, + { + "epoch": 0.4969069670998065, + "grad_norm": 0.9802155343245337, + "learning_rate": 1.0585897160083636e-05, + "loss": 0.2803, + "step": 28757 + }, + { + "epoch": 0.4969242466132154, + "grad_norm": 0.8696706786554179, + "learning_rate": 1.0585338471028956e-05, + "loss": 0.3449, + "step": 28758 + }, + { + "epoch": 0.4969415261266243, + "grad_norm": 1.2490770188117504, + "learning_rate": 1.0584779780140955e-05, + "loss": 0.4615, + "step": 28759 + }, + { + "epoch": 0.4969588056400332, + "grad_norm": 0.8823137944462353, + "learning_rate": 1.058422108742138e-05, + "loss": 0.4251, + "step": 28760 + }, + { + "epoch": 0.4969760851534421, + "grad_norm": 1.244773071331154, + "learning_rate": 1.0583662392871976e-05, + "loss": 0.5706, + "step": 28761 + }, + { + "epoch": 0.49699336466685096, + "grad_norm": 0.9486793976854722, + "learning_rate": 1.0583103696494498e-05, + "loss": 0.4349, + "step": 28762 + }, + { + "epoch": 0.49701064418025986, + "grad_norm": 0.8246897222615951, + "learning_rate": 1.0582544998290698e-05, + "loss": 0.397, + "step": 28763 + }, + { + "epoch": 0.49702792369366877, + "grad_norm": 0.9399647481073948, + "learning_rate": 1.0581986298262322e-05, + "loss": 0.4175, + "step": 28764 + }, + { + "epoch": 0.4970452032070777, + "grad_norm": 0.5427914539279356, + "learning_rate": 1.058142759641112e-05, + "loss": 0.6595, + "step": 28765 + }, + { + "epoch": 0.4970624827204866, + "grad_norm": 0.7612302718583971, + "learning_rate": 1.0580868892738843e-05, + "loss": 0.3267, + "step": 28766 + }, + { + "epoch": 0.4970797622338955, + "grad_norm": 0.8483918976754624, + "learning_rate": 1.0580310187247238e-05, + "loss": 0.255, + "step": 28767 + }, + { + "epoch": 0.4970970417473044, + "grad_norm": 0.7749057172992602, + "learning_rate": 1.0579751479938059e-05, + "loss": 0.4093, + "step": 28768 + }, + { + "epoch": 0.4971143212607133, + "grad_norm": 0.7481235889705682, + "learning_rate": 1.0579192770813053e-05, + "loss": 0.4431, + "step": 28769 + }, + { + "epoch": 0.4971316007741222, + "grad_norm": 0.6173084761288895, + "learning_rate": 1.0578634059873973e-05, + "loss": 0.8155, + "step": 28770 + }, + { + "epoch": 0.4971488802875311, + "grad_norm": 0.8219604264381419, + "learning_rate": 1.0578075347122567e-05, + "loss": 0.2065, + "step": 28771 + }, + { + "epoch": 0.49716615980094003, + "grad_norm": 1.1806572911125057, + "learning_rate": 1.0577516632560585e-05, + "loss": 0.2698, + "step": 28772 + }, + { + "epoch": 0.4971834393143489, + "grad_norm": 0.4793176019224558, + "learning_rate": 1.0576957916189777e-05, + "loss": 0.6399, + "step": 28773 + }, + { + "epoch": 0.4972007188277578, + "grad_norm": 0.7628067534966777, + "learning_rate": 1.0576399198011891e-05, + "loss": 0.4684, + "step": 28774 + }, + { + "epoch": 0.4972179983411667, + "grad_norm": 0.8371663482943993, + "learning_rate": 1.0575840478028682e-05, + "loss": 0.4215, + "step": 28775 + }, + { + "epoch": 0.4972352778545756, + "grad_norm": 1.0742226160744173, + "learning_rate": 1.0575281756241894e-05, + "loss": 0.6529, + "step": 28776 + }, + { + "epoch": 0.4972525573679845, + "grad_norm": 1.5196616679879835, + "learning_rate": 1.0574723032653285e-05, + "loss": 0.4576, + "step": 28777 + }, + { + "epoch": 0.4972698368813934, + "grad_norm": 0.8400139361607594, + "learning_rate": 1.0574164307264594e-05, + "loss": 0.5363, + "step": 28778 + }, + { + "epoch": 0.49728711639480233, + "grad_norm": 0.5146296352519559, + "learning_rate": 1.057360558007758e-05, + "loss": 0.6528, + "step": 28779 + }, + { + "epoch": 0.49730439590821124, + "grad_norm": 1.2426631570808009, + "learning_rate": 1.0573046851093989e-05, + "loss": 0.2926, + "step": 28780 + }, + { + "epoch": 0.49732167542162015, + "grad_norm": 0.6529619601125618, + "learning_rate": 1.057248812031557e-05, + "loss": 0.3373, + "step": 28781 + }, + { + "epoch": 0.49733895493502905, + "grad_norm": 0.8979748209305005, + "learning_rate": 1.0571929387744076e-05, + "loss": 0.4823, + "step": 28782 + }, + { + "epoch": 0.4973562344484379, + "grad_norm": 0.7885535258650775, + "learning_rate": 1.0571370653381254e-05, + "loss": 0.4141, + "step": 28783 + }, + { + "epoch": 0.4973735139618468, + "grad_norm": 1.0731239553612162, + "learning_rate": 1.0570811917228859e-05, + "loss": 0.4045, + "step": 28784 + }, + { + "epoch": 0.4973907934752557, + "grad_norm": 1.2402312727530425, + "learning_rate": 1.0570253179288636e-05, + "loss": 0.4773, + "step": 28785 + }, + { + "epoch": 0.49740807298866463, + "grad_norm": 0.9016042412830862, + "learning_rate": 1.0569694439562339e-05, + "loss": 0.5465, + "step": 28786 + }, + { + "epoch": 0.49742535250207354, + "grad_norm": 0.48449374889197583, + "learning_rate": 1.0569135698051711e-05, + "loss": 0.6396, + "step": 28787 + }, + { + "epoch": 0.49744263201548244, + "grad_norm": 1.1083949470233374, + "learning_rate": 1.056857695475851e-05, + "loss": 0.3142, + "step": 28788 + }, + { + "epoch": 0.49745991152889135, + "grad_norm": 1.1346462592808189, + "learning_rate": 1.0568018209684484e-05, + "loss": 0.3816, + "step": 28789 + }, + { + "epoch": 0.49747719104230026, + "grad_norm": 1.1855235602662992, + "learning_rate": 1.0567459462831377e-05, + "loss": 0.5567, + "step": 28790 + }, + { + "epoch": 0.49749447055570917, + "grad_norm": 1.5617365760709028, + "learning_rate": 1.0566900714200948e-05, + "loss": 0.486, + "step": 28791 + }, + { + "epoch": 0.4975117500691181, + "grad_norm": 1.2984494785062592, + "learning_rate": 1.0566341963794941e-05, + "loss": 0.4324, + "step": 28792 + }, + { + "epoch": 0.497529029582527, + "grad_norm": 1.2544418069868077, + "learning_rate": 1.0565783211615107e-05, + "loss": 0.4309, + "step": 28793 + }, + { + "epoch": 0.49754630909593583, + "grad_norm": 0.7908581681307533, + "learning_rate": 1.05652244576632e-05, + "loss": 0.2624, + "step": 28794 + }, + { + "epoch": 0.49756358860934474, + "grad_norm": 0.7822247118680141, + "learning_rate": 1.0564665701940964e-05, + "loss": 0.4339, + "step": 28795 + }, + { + "epoch": 0.49758086812275365, + "grad_norm": 0.8233464741918108, + "learning_rate": 1.0564106944450155e-05, + "loss": 0.5145, + "step": 28796 + }, + { + "epoch": 0.49759814763616256, + "grad_norm": 1.5359712707047912, + "learning_rate": 1.0563548185192518e-05, + "loss": 0.4138, + "step": 28797 + }, + { + "epoch": 0.49761542714957147, + "grad_norm": 0.978316287595288, + "learning_rate": 1.0562989424169806e-05, + "loss": 0.4401, + "step": 28798 + }, + { + "epoch": 0.4976327066629804, + "grad_norm": 0.9255302155013748, + "learning_rate": 1.0562430661383769e-05, + "loss": 0.5206, + "step": 28799 + }, + { + "epoch": 0.4976499861763893, + "grad_norm": 1.2505318651807973, + "learning_rate": 1.0561871896836156e-05, + "loss": 0.4421, + "step": 28800 + }, + { + "epoch": 0.4976672656897982, + "grad_norm": 1.3210756293876724, + "learning_rate": 1.0561313130528716e-05, + "loss": 0.3957, + "step": 28801 + }, + { + "epoch": 0.4976845452032071, + "grad_norm": 0.8459074320195126, + "learning_rate": 1.05607543624632e-05, + "loss": 0.4118, + "step": 28802 + }, + { + "epoch": 0.497701824716616, + "grad_norm": 0.8114402714582329, + "learning_rate": 1.0560195592641363e-05, + "loss": 0.3898, + "step": 28803 + }, + { + "epoch": 0.49771910423002486, + "grad_norm": 1.065265303848881, + "learning_rate": 1.055963682106495e-05, + "loss": 0.3079, + "step": 28804 + }, + { + "epoch": 0.49773638374343376, + "grad_norm": 0.7389546600221684, + "learning_rate": 1.0559078047735709e-05, + "loss": 0.5985, + "step": 28805 + }, + { + "epoch": 0.49775366325684267, + "grad_norm": 0.44544447889415345, + "learning_rate": 1.0558519272655397e-05, + "loss": 0.5092, + "step": 28806 + }, + { + "epoch": 0.4977709427702516, + "grad_norm": 1.1909474328426912, + "learning_rate": 1.0557960495825757e-05, + "loss": 0.5635, + "step": 28807 + }, + { + "epoch": 0.4977882222836605, + "grad_norm": 1.2001086860947605, + "learning_rate": 1.055740171724854e-05, + "loss": 0.3721, + "step": 28808 + }, + { + "epoch": 0.4978055017970694, + "grad_norm": 1.034207194105473, + "learning_rate": 1.0556842936925505e-05, + "loss": 0.5162, + "step": 28809 + }, + { + "epoch": 0.4978227813104783, + "grad_norm": 1.96795060504899, + "learning_rate": 1.0556284154858389e-05, + "loss": 0.3975, + "step": 28810 + }, + { + "epoch": 0.4978400608238872, + "grad_norm": 0.9578935039885897, + "learning_rate": 1.0555725371048952e-05, + "loss": 0.4639, + "step": 28811 + }, + { + "epoch": 0.4978573403372961, + "grad_norm": 1.035809990254388, + "learning_rate": 1.0555166585498942e-05, + "loss": 0.2916, + "step": 28812 + }, + { + "epoch": 0.497874619850705, + "grad_norm": 1.4206481765250953, + "learning_rate": 1.0554607798210105e-05, + "loss": 0.6777, + "step": 28813 + }, + { + "epoch": 0.49789189936411393, + "grad_norm": 1.1121863042201217, + "learning_rate": 1.0554049009184197e-05, + "loss": 0.4986, + "step": 28814 + }, + { + "epoch": 0.4979091788775228, + "grad_norm": 1.0550628771568733, + "learning_rate": 1.0553490218422964e-05, + "loss": 0.5842, + "step": 28815 + }, + { + "epoch": 0.4979264583909317, + "grad_norm": 0.8531912509239062, + "learning_rate": 1.055293142592816e-05, + "loss": 0.313, + "step": 28816 + }, + { + "epoch": 0.4979437379043406, + "grad_norm": 1.2655146576369534, + "learning_rate": 1.055237263170153e-05, + "loss": 0.4161, + "step": 28817 + }, + { + "epoch": 0.4979610174177495, + "grad_norm": 1.2516352504376027, + "learning_rate": 1.0551813835744826e-05, + "loss": 0.3213, + "step": 28818 + }, + { + "epoch": 0.4979782969311584, + "grad_norm": 0.9413460450332352, + "learning_rate": 1.0551255038059803e-05, + "loss": 0.3433, + "step": 28819 + }, + { + "epoch": 0.4979955764445673, + "grad_norm": 1.9171530031949202, + "learning_rate": 1.0550696238648204e-05, + "loss": 0.4191, + "step": 28820 + }, + { + "epoch": 0.49801285595797623, + "grad_norm": 0.9896923681367618, + "learning_rate": 1.0550137437511784e-05, + "loss": 0.4625, + "step": 28821 + }, + { + "epoch": 0.49803013547138514, + "grad_norm": 0.9867311238132772, + "learning_rate": 1.0549578634652292e-05, + "loss": 0.3876, + "step": 28822 + }, + { + "epoch": 0.49804741498479405, + "grad_norm": 0.8043078721479342, + "learning_rate": 1.0549019830071477e-05, + "loss": 0.4815, + "step": 28823 + }, + { + "epoch": 0.49806469449820295, + "grad_norm": 1.2216149904637177, + "learning_rate": 1.0548461023771091e-05, + "loss": 0.3233, + "step": 28824 + }, + { + "epoch": 0.4980819740116118, + "grad_norm": 1.1058917701705544, + "learning_rate": 1.0547902215752881e-05, + "loss": 0.319, + "step": 28825 + }, + { + "epoch": 0.4980992535250207, + "grad_norm": 1.4319381979324604, + "learning_rate": 1.0547343406018602e-05, + "loss": 0.5422, + "step": 28826 + }, + { + "epoch": 0.4981165330384296, + "grad_norm": 0.9505968037684701, + "learning_rate": 1.0546784594570002e-05, + "loss": 0.9712, + "step": 28827 + }, + { + "epoch": 0.49813381255183853, + "grad_norm": 1.3992379057850743, + "learning_rate": 1.0546225781408832e-05, + "loss": 0.4887, + "step": 28828 + }, + { + "epoch": 0.49815109206524744, + "grad_norm": 1.6191481657534141, + "learning_rate": 1.0545666966536841e-05, + "loss": 0.5362, + "step": 28829 + }, + { + "epoch": 0.49816837157865634, + "grad_norm": 1.12736563481517, + "learning_rate": 1.0545108149955776e-05, + "loss": 0.3275, + "step": 28830 + }, + { + "epoch": 0.49818565109206525, + "grad_norm": 1.1078426839306514, + "learning_rate": 1.0544549331667393e-05, + "loss": 0.3749, + "step": 28831 + }, + { + "epoch": 0.49820293060547416, + "grad_norm": 0.7638945244044466, + "learning_rate": 1.054399051167344e-05, + "loss": 0.3378, + "step": 28832 + }, + { + "epoch": 0.49822021011888307, + "grad_norm": 0.7805014191468712, + "learning_rate": 1.0543431689975667e-05, + "loss": 0.3611, + "step": 28833 + }, + { + "epoch": 0.498237489632292, + "grad_norm": 1.0501454693849757, + "learning_rate": 1.0542872866575826e-05, + "loss": 0.456, + "step": 28834 + }, + { + "epoch": 0.4982547691457009, + "grad_norm": 1.3113212416753088, + "learning_rate": 1.0542314041475665e-05, + "loss": 0.411, + "step": 28835 + }, + { + "epoch": 0.49827204865910973, + "grad_norm": 0.6895474010454187, + "learning_rate": 1.0541755214676934e-05, + "loss": 0.8183, + "step": 28836 + }, + { + "epoch": 0.49828932817251864, + "grad_norm": 1.3139581815072718, + "learning_rate": 1.054119638618139e-05, + "loss": 0.6441, + "step": 28837 + }, + { + "epoch": 0.49830660768592755, + "grad_norm": 0.9863627617003852, + "learning_rate": 1.0540637555990771e-05, + "loss": 0.3297, + "step": 28838 + }, + { + "epoch": 0.49832388719933646, + "grad_norm": 1.0305902927226267, + "learning_rate": 1.054007872410684e-05, + "loss": 0.519, + "step": 28839 + }, + { + "epoch": 0.49834116671274536, + "grad_norm": 1.317147568851286, + "learning_rate": 1.0539519890531338e-05, + "loss": 0.345, + "step": 28840 + }, + { + "epoch": 0.4983584462261543, + "grad_norm": 0.9212110808917394, + "learning_rate": 1.053896105526602e-05, + "loss": 0.3873, + "step": 28841 + }, + { + "epoch": 0.4983757257395632, + "grad_norm": 1.207141345320895, + "learning_rate": 1.0538402218312631e-05, + "loss": 0.316, + "step": 28842 + }, + { + "epoch": 0.4983930052529721, + "grad_norm": 1.015080422932567, + "learning_rate": 1.0537843379672929e-05, + "loss": 0.3958, + "step": 28843 + }, + { + "epoch": 0.498410284766381, + "grad_norm": 1.0759580567895883, + "learning_rate": 1.053728453934866e-05, + "loss": 0.5986, + "step": 28844 + }, + { + "epoch": 0.4984275642797899, + "grad_norm": 1.1418795250515852, + "learning_rate": 1.0536725697341575e-05, + "loss": 0.4397, + "step": 28845 + }, + { + "epoch": 0.4984448437931988, + "grad_norm": 0.918715231726494, + "learning_rate": 1.0536166853653424e-05, + "loss": 0.4357, + "step": 28846 + }, + { + "epoch": 0.49846212330660766, + "grad_norm": 1.208946164160352, + "learning_rate": 1.0535608008285957e-05, + "loss": 0.4834, + "step": 28847 + }, + { + "epoch": 0.49847940282001657, + "grad_norm": 0.7911577877479811, + "learning_rate": 1.0535049161240928e-05, + "loss": 0.4007, + "step": 28848 + }, + { + "epoch": 0.4984966823334255, + "grad_norm": 0.6526506861964324, + "learning_rate": 1.0534490312520083e-05, + "loss": 0.2976, + "step": 28849 + }, + { + "epoch": 0.4985139618468344, + "grad_norm": 1.2415443180817018, + "learning_rate": 1.0533931462125171e-05, + "loss": 0.3288, + "step": 28850 + }, + { + "epoch": 0.4985312413602433, + "grad_norm": 0.9426502818229872, + "learning_rate": 1.0533372610057948e-05, + "loss": 0.4252, + "step": 28851 + }, + { + "epoch": 0.4985485208736522, + "grad_norm": 1.1605203592415378, + "learning_rate": 1.0532813756320161e-05, + "loss": 0.4299, + "step": 28852 + }, + { + "epoch": 0.4985658003870611, + "grad_norm": 0.8710278594148264, + "learning_rate": 1.0532254900913558e-05, + "loss": 0.3804, + "step": 28853 + }, + { + "epoch": 0.49858307990047, + "grad_norm": 1.386044302011403, + "learning_rate": 1.0531696043839896e-05, + "loss": 0.4793, + "step": 28854 + }, + { + "epoch": 0.4986003594138789, + "grad_norm": 1.1718750844154178, + "learning_rate": 1.0531137185100922e-05, + "loss": 0.4319, + "step": 28855 + }, + { + "epoch": 0.49861763892728783, + "grad_norm": 1.2509807250517775, + "learning_rate": 1.0530578324698381e-05, + "loss": 0.374, + "step": 28856 + }, + { + "epoch": 0.4986349184406967, + "grad_norm": 1.2555732612640036, + "learning_rate": 1.0530019462634033e-05, + "loss": 0.3534, + "step": 28857 + }, + { + "epoch": 0.4986521979541056, + "grad_norm": 1.064513826215027, + "learning_rate": 1.0529460598909623e-05, + "loss": 0.2877, + "step": 28858 + }, + { + "epoch": 0.4986694774675145, + "grad_norm": 0.8156592087008047, + "learning_rate": 1.0528901733526901e-05, + "loss": 0.4368, + "step": 28859 + }, + { + "epoch": 0.4986867569809234, + "grad_norm": 1.1056339742603716, + "learning_rate": 1.0528342866487619e-05, + "loss": 0.4153, + "step": 28860 + }, + { + "epoch": 0.4987040364943323, + "grad_norm": 1.0035077263306185, + "learning_rate": 1.0527783997793529e-05, + "loss": 0.3565, + "step": 28861 + }, + { + "epoch": 0.4987213160077412, + "grad_norm": 1.088954217099905, + "learning_rate": 1.0527225127446376e-05, + "loss": 0.4701, + "step": 28862 + }, + { + "epoch": 0.49873859552115013, + "grad_norm": 0.774802633317989, + "learning_rate": 1.0526666255447916e-05, + "loss": 0.4791, + "step": 28863 + }, + { + "epoch": 0.49875587503455904, + "grad_norm": 1.497672598867219, + "learning_rate": 1.0526107381799898e-05, + "loss": 0.5013, + "step": 28864 + }, + { + "epoch": 0.49877315454796795, + "grad_norm": 0.7135228209375081, + "learning_rate": 1.0525548506504067e-05, + "loss": 0.5058, + "step": 28865 + }, + { + "epoch": 0.49879043406137685, + "grad_norm": 1.3654418809661846, + "learning_rate": 1.0524989629562181e-05, + "loss": 0.3356, + "step": 28866 + }, + { + "epoch": 0.49880771357478576, + "grad_norm": 1.1993430367740572, + "learning_rate": 1.052443075097599e-05, + "loss": 0.7141, + "step": 28867 + }, + { + "epoch": 0.4988249930881946, + "grad_norm": 1.2555846001921882, + "learning_rate": 1.052387187074724e-05, + "loss": 0.5705, + "step": 28868 + }, + { + "epoch": 0.4988422726016035, + "grad_norm": 0.6294605403501924, + "learning_rate": 1.0523312988877685e-05, + "loss": 0.5553, + "step": 28869 + }, + { + "epoch": 0.49885955211501243, + "grad_norm": 0.8968355895257778, + "learning_rate": 1.052275410536907e-05, + "loss": 0.3365, + "step": 28870 + }, + { + "epoch": 0.49887683162842134, + "grad_norm": 1.1976559985305204, + "learning_rate": 1.0522195220223152e-05, + "loss": 0.4182, + "step": 28871 + }, + { + "epoch": 0.49889411114183024, + "grad_norm": 0.9048631246677525, + "learning_rate": 1.052163633344168e-05, + "loss": 0.3967, + "step": 28872 + }, + { + "epoch": 0.49891139065523915, + "grad_norm": 1.5427622266425371, + "learning_rate": 1.05210774450264e-05, + "loss": 0.3553, + "step": 28873 + }, + { + "epoch": 0.49892867016864806, + "grad_norm": 0.7691535835525799, + "learning_rate": 1.0520518554979068e-05, + "loss": 0.2461, + "step": 28874 + }, + { + "epoch": 0.49894594968205697, + "grad_norm": 0.8280663416963101, + "learning_rate": 1.0519959663301434e-05, + "loss": 0.2871, + "step": 28875 + }, + { + "epoch": 0.4989632291954659, + "grad_norm": 0.7517233810146349, + "learning_rate": 1.0519400769995243e-05, + "loss": 0.4032, + "step": 28876 + }, + { + "epoch": 0.4989805087088748, + "grad_norm": 1.2634408598729634, + "learning_rate": 1.051884187506225e-05, + "loss": 0.4631, + "step": 28877 + }, + { + "epoch": 0.49899778822228363, + "grad_norm": 1.1245762523898628, + "learning_rate": 1.0518282978504209e-05, + "loss": 0.547, + "step": 28878 + }, + { + "epoch": 0.49901506773569254, + "grad_norm": 0.7327583927317265, + "learning_rate": 1.0517724080322864e-05, + "loss": 0.3311, + "step": 28879 + }, + { + "epoch": 0.49903234724910145, + "grad_norm": 1.1287450907132246, + "learning_rate": 1.0517165180519967e-05, + "loss": 0.5952, + "step": 28880 + }, + { + "epoch": 0.49904962676251036, + "grad_norm": 0.8261702060577212, + "learning_rate": 1.0516606279097273e-05, + "loss": 0.3051, + "step": 28881 + }, + { + "epoch": 0.49906690627591926, + "grad_norm": 1.0187216785459399, + "learning_rate": 1.0516047376056522e-05, + "loss": 0.4854, + "step": 28882 + }, + { + "epoch": 0.49908418578932817, + "grad_norm": 1.4573951649890182, + "learning_rate": 1.0515488471399475e-05, + "loss": 0.4575, + "step": 28883 + }, + { + "epoch": 0.4991014653027371, + "grad_norm": 1.5584786704133464, + "learning_rate": 1.0514929565127881e-05, + "loss": 0.5555, + "step": 28884 + }, + { + "epoch": 0.499118744816146, + "grad_norm": 1.4291664436173555, + "learning_rate": 1.0514370657243484e-05, + "loss": 0.4069, + "step": 28885 + }, + { + "epoch": 0.4991360243295549, + "grad_norm": 0.9416010688493847, + "learning_rate": 1.0513811747748042e-05, + "loss": 0.3251, + "step": 28886 + }, + { + "epoch": 0.4991533038429638, + "grad_norm": 0.7854876016495034, + "learning_rate": 1.0513252836643302e-05, + "loss": 0.3125, + "step": 28887 + }, + { + "epoch": 0.4991705833563727, + "grad_norm": 0.9440678231502128, + "learning_rate": 1.0512693923931017e-05, + "loss": 0.5227, + "step": 28888 + }, + { + "epoch": 0.49918786286978156, + "grad_norm": 0.8474301489708959, + "learning_rate": 1.0512135009612934e-05, + "loss": 0.3565, + "step": 28889 + }, + { + "epoch": 0.49920514238319047, + "grad_norm": 0.957150921239976, + "learning_rate": 1.0511576093690803e-05, + "loss": 0.4677, + "step": 28890 + }, + { + "epoch": 0.4992224218965994, + "grad_norm": 0.6520885366415473, + "learning_rate": 1.051101717616638e-05, + "loss": 0.4951, + "step": 28891 + }, + { + "epoch": 0.4992397014100083, + "grad_norm": 0.779016005095316, + "learning_rate": 1.0510458257041412e-05, + "loss": 0.276, + "step": 28892 + }, + { + "epoch": 0.4992569809234172, + "grad_norm": 0.4945900387854594, + "learning_rate": 1.0509899336317647e-05, + "loss": 0.4926, + "step": 28893 + }, + { + "epoch": 0.4992742604368261, + "grad_norm": 1.107337526235996, + "learning_rate": 1.050934041399684e-05, + "loss": 0.4177, + "step": 28894 + }, + { + "epoch": 0.499291539950235, + "grad_norm": 0.8440364457363886, + "learning_rate": 1.0508781490080742e-05, + "loss": 0.3253, + "step": 28895 + }, + { + "epoch": 0.4993088194636439, + "grad_norm": 1.147787114567669, + "learning_rate": 1.05082225645711e-05, + "loss": 0.4487, + "step": 28896 + }, + { + "epoch": 0.4993260989770528, + "grad_norm": 0.7704588502788284, + "learning_rate": 1.0507663637469667e-05, + "loss": 0.4101, + "step": 28897 + }, + { + "epoch": 0.49934337849046173, + "grad_norm": 1.060633364041343, + "learning_rate": 1.0507104708778192e-05, + "loss": 0.5352, + "step": 28898 + }, + { + "epoch": 0.4993606580038706, + "grad_norm": 1.0590351063135968, + "learning_rate": 1.0506545778498429e-05, + "loss": 0.391, + "step": 28899 + }, + { + "epoch": 0.4993779375172795, + "grad_norm": 1.1605935611832754, + "learning_rate": 1.0505986846632124e-05, + "loss": 0.4249, + "step": 28900 + }, + { + "epoch": 0.4993952170306884, + "grad_norm": 1.1137584387009027, + "learning_rate": 1.050542791318103e-05, + "loss": 0.4578, + "step": 28901 + }, + { + "epoch": 0.4994124965440973, + "grad_norm": 1.5975118061021776, + "learning_rate": 1.0504868978146896e-05, + "loss": 0.553, + "step": 28902 + }, + { + "epoch": 0.4994297760575062, + "grad_norm": 0.6124376541991216, + "learning_rate": 1.0504310041531477e-05, + "loss": 0.6606, + "step": 28903 + }, + { + "epoch": 0.4994470555709151, + "grad_norm": 0.6451145582535045, + "learning_rate": 1.0503751103336519e-05, + "loss": 0.3027, + "step": 28904 + }, + { + "epoch": 0.49946433508432403, + "grad_norm": 1.6976481813182844, + "learning_rate": 1.0503192163563774e-05, + "loss": 0.4029, + "step": 28905 + }, + { + "epoch": 0.49948161459773294, + "grad_norm": 1.2414781839764177, + "learning_rate": 1.0502633222214993e-05, + "loss": 0.4588, + "step": 28906 + }, + { + "epoch": 0.49949889411114184, + "grad_norm": 0.5882862853257367, + "learning_rate": 1.0502074279291928e-05, + "loss": 0.3382, + "step": 28907 + }, + { + "epoch": 0.49951617362455075, + "grad_norm": 1.3911427374875933, + "learning_rate": 1.0501515334796324e-05, + "loss": 0.4668, + "step": 28908 + }, + { + "epoch": 0.49953345313795966, + "grad_norm": 0.792920616857241, + "learning_rate": 1.0500956388729941e-05, + "loss": 0.4621, + "step": 28909 + }, + { + "epoch": 0.4995507326513685, + "grad_norm": 0.924658676922525, + "learning_rate": 1.050039744109452e-05, + "loss": 0.4924, + "step": 28910 + }, + { + "epoch": 0.4995680121647774, + "grad_norm": 0.7939775857008574, + "learning_rate": 1.049983849189182e-05, + "loss": 0.4471, + "step": 28911 + }, + { + "epoch": 0.4995852916781863, + "grad_norm": 1.1138177228795814, + "learning_rate": 1.0499279541123589e-05, + "loss": 0.5375, + "step": 28912 + }, + { + "epoch": 0.49960257119159523, + "grad_norm": 0.8093907085110664, + "learning_rate": 1.0498720588791571e-05, + "loss": 0.3524, + "step": 28913 + }, + { + "epoch": 0.49961985070500414, + "grad_norm": 0.730184348172486, + "learning_rate": 1.0498161634897526e-05, + "loss": 0.3298, + "step": 28914 + }, + { + "epoch": 0.49963713021841305, + "grad_norm": 0.8670783429195971, + "learning_rate": 1.0497602679443199e-05, + "loss": 0.4883, + "step": 28915 + }, + { + "epoch": 0.49965440973182196, + "grad_norm": 0.7264499564008866, + "learning_rate": 1.0497043722430342e-05, + "loss": 0.4675, + "step": 28916 + }, + { + "epoch": 0.49967168924523087, + "grad_norm": 1.5315073533479338, + "learning_rate": 1.0496484763860708e-05, + "loss": 0.4413, + "step": 28917 + }, + { + "epoch": 0.4996889687586398, + "grad_norm": 1.0109221551370846, + "learning_rate": 1.0495925803736047e-05, + "loss": 0.6106, + "step": 28918 + }, + { + "epoch": 0.4997062482720487, + "grad_norm": 1.6621546231666806, + "learning_rate": 1.0495366842058107e-05, + "loss": 0.4176, + "step": 28919 + }, + { + "epoch": 0.4997235277854576, + "grad_norm": 1.2567379308501951, + "learning_rate": 1.0494807878828644e-05, + "loss": 0.426, + "step": 28920 + }, + { + "epoch": 0.49974080729886644, + "grad_norm": 1.152679549555035, + "learning_rate": 1.0494248914049401e-05, + "loss": 0.5573, + "step": 28921 + }, + { + "epoch": 0.49975808681227535, + "grad_norm": 1.0895785118915406, + "learning_rate": 1.0493689947722133e-05, + "loss": 0.3241, + "step": 28922 + }, + { + "epoch": 0.49977536632568426, + "grad_norm": 0.917162427786744, + "learning_rate": 1.0493130979848591e-05, + "loss": 0.4568, + "step": 28923 + }, + { + "epoch": 0.49979264583909316, + "grad_norm": 0.7876180104706649, + "learning_rate": 1.0492572010430528e-05, + "loss": 0.3678, + "step": 28924 + }, + { + "epoch": 0.49980992535250207, + "grad_norm": 0.7568544192683947, + "learning_rate": 1.0492013039469688e-05, + "loss": 0.2657, + "step": 28925 + }, + { + "epoch": 0.499827204865911, + "grad_norm": 0.6145541017192276, + "learning_rate": 1.0491454066967828e-05, + "loss": 1.0448, + "step": 28926 + }, + { + "epoch": 0.4998444843793199, + "grad_norm": 1.08062450540706, + "learning_rate": 1.0490895092926697e-05, + "loss": 0.487, + "step": 28927 + }, + { + "epoch": 0.4998617638927288, + "grad_norm": 1.1720045598438984, + "learning_rate": 1.0490336117348041e-05, + "loss": 0.5243, + "step": 28928 + }, + { + "epoch": 0.4998790434061377, + "grad_norm": 0.8918554097741589, + "learning_rate": 1.0489777140233619e-05, + "loss": 0.3588, + "step": 28929 + }, + { + "epoch": 0.4998963229195466, + "grad_norm": 0.9004264270031554, + "learning_rate": 1.0489218161585179e-05, + "loss": 0.4641, + "step": 28930 + }, + { + "epoch": 0.49991360243295546, + "grad_norm": 1.2158449018291475, + "learning_rate": 1.0488659181404469e-05, + "loss": 0.4334, + "step": 28931 + }, + { + "epoch": 0.49993088194636437, + "grad_norm": 0.6912531981579236, + "learning_rate": 1.048810019969324e-05, + "loss": 0.4106, + "step": 28932 + }, + { + "epoch": 0.4999481614597733, + "grad_norm": 1.0007239756967823, + "learning_rate": 1.0487541216453245e-05, + "loss": 0.4753, + "step": 28933 + }, + { + "epoch": 0.4999654409731822, + "grad_norm": 1.2787161917075025, + "learning_rate": 1.0486982231686234e-05, + "loss": 0.475, + "step": 28934 + }, + { + "epoch": 0.4999827204865911, + "grad_norm": 0.7066289966461496, + "learning_rate": 1.0486423245393959e-05, + "loss": 0.386, + "step": 28935 + }, + { + "epoch": 0.5, + "grad_norm": 0.9959115135295553, + "learning_rate": 1.0485864257578166e-05, + "loss": 0.3561, + "step": 28936 + }, + { + "epoch": 0.5000172795134089, + "grad_norm": 1.3448889857269863, + "learning_rate": 1.0485305268240611e-05, + "loss": 0.3279, + "step": 28937 + }, + { + "epoch": 0.5000345590268178, + "grad_norm": 1.0404997882536364, + "learning_rate": 1.0484746277383045e-05, + "loss": 0.3377, + "step": 28938 + }, + { + "epoch": 0.5000518385402267, + "grad_norm": 1.7400278465878571, + "learning_rate": 1.0484187285007213e-05, + "loss": 0.6467, + "step": 28939 + }, + { + "epoch": 0.5000691180536356, + "grad_norm": 0.8806831911544324, + "learning_rate": 1.0483628291114874e-05, + "loss": 0.2987, + "step": 28940 + }, + { + "epoch": 0.5000863975670445, + "grad_norm": 1.2920362642551904, + "learning_rate": 1.0483069295707774e-05, + "loss": 0.4613, + "step": 28941 + }, + { + "epoch": 0.5001036770804534, + "grad_norm": 1.053949030536366, + "learning_rate": 1.0482510298787661e-05, + "loss": 0.3183, + "step": 28942 + }, + { + "epoch": 0.5001209565938624, + "grad_norm": 0.8091860056262442, + "learning_rate": 1.048195130035629e-05, + "loss": 0.2977, + "step": 28943 + }, + { + "epoch": 0.5001382361072713, + "grad_norm": 0.776664496473596, + "learning_rate": 1.0481392300415411e-05, + "loss": 0.4371, + "step": 28944 + }, + { + "epoch": 0.5001555156206802, + "grad_norm": 0.7677085800304468, + "learning_rate": 1.0480833298966775e-05, + "loss": 0.4814, + "step": 28945 + }, + { + "epoch": 0.5001727951340891, + "grad_norm": 1.0349679455199128, + "learning_rate": 1.0480274296012132e-05, + "loss": 0.4721, + "step": 28946 + }, + { + "epoch": 0.5001900746474979, + "grad_norm": 0.9229021097567748, + "learning_rate": 1.0479715291553235e-05, + "loss": 0.4536, + "step": 28947 + }, + { + "epoch": 0.5002073541609068, + "grad_norm": 1.887512027411924, + "learning_rate": 1.0479156285591831e-05, + "loss": 0.6029, + "step": 28948 + }, + { + "epoch": 0.5002246336743157, + "grad_norm": 0.6232026135775696, + "learning_rate": 1.0478597278129674e-05, + "loss": 0.7046, + "step": 28949 + }, + { + "epoch": 0.5002419131877246, + "grad_norm": 1.016472541585972, + "learning_rate": 1.0478038269168516e-05, + "loss": 0.2641, + "step": 28950 + }, + { + "epoch": 0.5002591927011335, + "grad_norm": 1.0757039269626716, + "learning_rate": 1.0477479258710103e-05, + "loss": 0.4477, + "step": 28951 + }, + { + "epoch": 0.5002764722145424, + "grad_norm": 0.9249319138456001, + "learning_rate": 1.0476920246756192e-05, + "loss": 0.4061, + "step": 28952 + }, + { + "epoch": 0.5002937517279513, + "grad_norm": 1.6380526992084063, + "learning_rate": 1.0476361233308527e-05, + "loss": 0.439, + "step": 28953 + }, + { + "epoch": 0.5003110312413602, + "grad_norm": 0.8175962525851337, + "learning_rate": 1.0475802218368864e-05, + "loss": 0.5027, + "step": 28954 + }, + { + "epoch": 0.5003283107547691, + "grad_norm": 0.7226400184422033, + "learning_rate": 1.0475243201938952e-05, + "loss": 0.3722, + "step": 28955 + }, + { + "epoch": 0.500345590268178, + "grad_norm": 2.197986556505281, + "learning_rate": 1.047468418402054e-05, + "loss": 0.5789, + "step": 28956 + }, + { + "epoch": 0.500362869781587, + "grad_norm": 0.9023757344831762, + "learning_rate": 1.0474125164615385e-05, + "loss": 0.3381, + "step": 28957 + }, + { + "epoch": 0.5003801492949959, + "grad_norm": 0.9935628385017014, + "learning_rate": 1.0473566143725233e-05, + "loss": 0.4681, + "step": 28958 + }, + { + "epoch": 0.5003974288084048, + "grad_norm": 0.8869607767832111, + "learning_rate": 1.0473007121351832e-05, + "loss": 0.4091, + "step": 28959 + }, + { + "epoch": 0.5004147083218137, + "grad_norm": 0.9989008243664264, + "learning_rate": 1.0472448097496941e-05, + "loss": 0.4607, + "step": 28960 + }, + { + "epoch": 0.5004319878352226, + "grad_norm": 0.8811269193838794, + "learning_rate": 1.0471889072162308e-05, + "loss": 0.2866, + "step": 28961 + }, + { + "epoch": 0.5004492673486315, + "grad_norm": 0.9700478692603345, + "learning_rate": 1.047133004534968e-05, + "loss": 0.4316, + "step": 28962 + }, + { + "epoch": 0.5004665468620404, + "grad_norm": 1.405444200209284, + "learning_rate": 1.047077101706081e-05, + "loss": 0.3572, + "step": 28963 + }, + { + "epoch": 0.5004838263754493, + "grad_norm": 0.6892047558381164, + "learning_rate": 1.0470211987297451e-05, + "loss": 0.6287, + "step": 28964 + }, + { + "epoch": 0.5005011058888582, + "grad_norm": 1.1252408469565378, + "learning_rate": 1.0469652956061349e-05, + "loss": 0.4372, + "step": 28965 + }, + { + "epoch": 0.5005183854022671, + "grad_norm": 0.6409213007849355, + "learning_rate": 1.0469093923354262e-05, + "loss": 0.2473, + "step": 28966 + }, + { + "epoch": 0.500535664915676, + "grad_norm": 0.8479750392298255, + "learning_rate": 1.0468534889177937e-05, + "loss": 0.3929, + "step": 28967 + }, + { + "epoch": 0.5005529444290848, + "grad_norm": 0.8853519098610994, + "learning_rate": 1.0467975853534122e-05, + "loss": 0.3929, + "step": 28968 + }, + { + "epoch": 0.5005702239424937, + "grad_norm": 0.6667053362084967, + "learning_rate": 1.0467416816424574e-05, + "loss": 0.5891, + "step": 28969 + }, + { + "epoch": 0.5005875034559026, + "grad_norm": 1.1700367772367615, + "learning_rate": 1.046685777785104e-05, + "loss": 0.4416, + "step": 28970 + }, + { + "epoch": 0.5006047829693115, + "grad_norm": 0.8088093134277249, + "learning_rate": 1.0466298737815272e-05, + "loss": 0.3142, + "step": 28971 + }, + { + "epoch": 0.5006220624827205, + "grad_norm": 1.3378762191149725, + "learning_rate": 1.0465739696319022e-05, + "loss": 0.417, + "step": 28972 + }, + { + "epoch": 0.5006393419961294, + "grad_norm": 0.8110602389027949, + "learning_rate": 1.0465180653364035e-05, + "loss": 0.3979, + "step": 28973 + }, + { + "epoch": 0.5006566215095383, + "grad_norm": 1.0189559611281462, + "learning_rate": 1.0464621608952073e-05, + "loss": 0.5178, + "step": 28974 + }, + { + "epoch": 0.5006739010229472, + "grad_norm": 1.252185567870005, + "learning_rate": 1.0464062563084879e-05, + "loss": 0.3934, + "step": 28975 + }, + { + "epoch": 0.5006911805363561, + "grad_norm": 1.251609985532766, + "learning_rate": 1.0463503515764202e-05, + "loss": 0.5708, + "step": 28976 + }, + { + "epoch": 0.500708460049765, + "grad_norm": 0.9869829213964147, + "learning_rate": 1.04629444669918e-05, + "loss": 0.563, + "step": 28977 + }, + { + "epoch": 0.5007257395631739, + "grad_norm": 0.8465970116531154, + "learning_rate": 1.0462385416769423e-05, + "loss": 0.3198, + "step": 28978 + }, + { + "epoch": 0.5007430190765828, + "grad_norm": 0.9899455838260981, + "learning_rate": 1.0461826365098815e-05, + "loss": 0.3687, + "step": 28979 + }, + { + "epoch": 0.5007602985899917, + "grad_norm": 0.8595106032707507, + "learning_rate": 1.0461267311981736e-05, + "loss": 0.339, + "step": 28980 + }, + { + "epoch": 0.5007775781034006, + "grad_norm": 0.9418849051759347, + "learning_rate": 1.0460708257419931e-05, + "loss": 0.4303, + "step": 28981 + }, + { + "epoch": 0.5007948576168095, + "grad_norm": 0.9218663318921129, + "learning_rate": 1.0460149201415152e-05, + "loss": 0.5433, + "step": 28982 + }, + { + "epoch": 0.5008121371302184, + "grad_norm": 1.0025855815408613, + "learning_rate": 1.0459590143969152e-05, + "loss": 0.3199, + "step": 28983 + }, + { + "epoch": 0.5008294166436273, + "grad_norm": 0.9770978269522552, + "learning_rate": 1.045903108508368e-05, + "loss": 0.3624, + "step": 28984 + }, + { + "epoch": 0.5008466961570363, + "grad_norm": 1.0790048624037034, + "learning_rate": 1.0458472024760486e-05, + "loss": 0.5224, + "step": 28985 + }, + { + "epoch": 0.5008639756704452, + "grad_norm": 0.8088682687634291, + "learning_rate": 1.0457912963001327e-05, + "loss": 0.4187, + "step": 28986 + }, + { + "epoch": 0.5008812551838541, + "grad_norm": 0.8681169668882404, + "learning_rate": 1.0457353899807947e-05, + "loss": 0.3785, + "step": 28987 + }, + { + "epoch": 0.500898534697263, + "grad_norm": 0.9732681701233084, + "learning_rate": 1.0456794835182097e-05, + "loss": 0.5619, + "step": 28988 + }, + { + "epoch": 0.5009158142106718, + "grad_norm": 0.5491064203799482, + "learning_rate": 1.0456235769125536e-05, + "loss": 0.5587, + "step": 28989 + }, + { + "epoch": 0.5009330937240807, + "grad_norm": 0.7416638823888191, + "learning_rate": 1.0455676701640008e-05, + "loss": 0.3413, + "step": 28990 + }, + { + "epoch": 0.5009503732374896, + "grad_norm": 0.5649217309696284, + "learning_rate": 1.0455117632727268e-05, + "loss": 0.6822, + "step": 28991 + }, + { + "epoch": 0.5009676527508985, + "grad_norm": 1.0025485997265358, + "learning_rate": 1.0454558562389062e-05, + "loss": 0.4435, + "step": 28992 + }, + { + "epoch": 0.5009849322643074, + "grad_norm": 0.5656835441729496, + "learning_rate": 1.0453999490627142e-05, + "loss": 0.6228, + "step": 28993 + }, + { + "epoch": 0.5010022117777163, + "grad_norm": 0.9714621021832435, + "learning_rate": 1.0453440417443266e-05, + "loss": 0.4407, + "step": 28994 + }, + { + "epoch": 0.5010194912911252, + "grad_norm": 0.5112063006495394, + "learning_rate": 1.0452881342839178e-05, + "loss": 0.905, + "step": 28995 + }, + { + "epoch": 0.5010367708045341, + "grad_norm": 1.0296755792950698, + "learning_rate": 1.045232226681663e-05, + "loss": 0.4635, + "step": 28996 + }, + { + "epoch": 0.501054050317943, + "grad_norm": 0.5175318619585821, + "learning_rate": 1.0451763189377378e-05, + "loss": 0.5103, + "step": 28997 + }, + { + "epoch": 0.5010713298313519, + "grad_norm": 0.7670659492235452, + "learning_rate": 1.0451204110523165e-05, + "loss": 0.5788, + "step": 28998 + }, + { + "epoch": 0.5010886093447608, + "grad_norm": 0.4964173396134483, + "learning_rate": 1.0450645030255748e-05, + "loss": 0.5659, + "step": 28999 + }, + { + "epoch": 0.5011058888581698, + "grad_norm": 0.9111554023601873, + "learning_rate": 1.0450085948576877e-05, + "loss": 0.4273, + "step": 29000 + }, + { + "epoch": 0.5011231683715787, + "grad_norm": 0.7552719217851017, + "learning_rate": 1.0449526865488304e-05, + "loss": 0.5883, + "step": 29001 + }, + { + "epoch": 0.5011404478849876, + "grad_norm": 0.8682482974229003, + "learning_rate": 1.0448967780991776e-05, + "loss": 0.381, + "step": 29002 + }, + { + "epoch": 0.5011577273983965, + "grad_norm": 1.0702412233830925, + "learning_rate": 1.044840869508905e-05, + "loss": 0.3766, + "step": 29003 + }, + { + "epoch": 0.5011750069118054, + "grad_norm": 1.0653142530242903, + "learning_rate": 1.0447849607781872e-05, + "loss": 0.5217, + "step": 29004 + }, + { + "epoch": 0.5011922864252143, + "grad_norm": 1.0923474008640421, + "learning_rate": 1.0447290519071993e-05, + "loss": 0.4865, + "step": 29005 + }, + { + "epoch": 0.5012095659386232, + "grad_norm": 0.9223202197187145, + "learning_rate": 1.044673142896117e-05, + "loss": 0.4407, + "step": 29006 + }, + { + "epoch": 0.5012268454520321, + "grad_norm": 1.2038373816016565, + "learning_rate": 1.0446172337451148e-05, + "loss": 0.2762, + "step": 29007 + }, + { + "epoch": 0.501244124965441, + "grad_norm": 0.7543793037886923, + "learning_rate": 1.044561324454368e-05, + "loss": 0.7087, + "step": 29008 + }, + { + "epoch": 0.5012614044788499, + "grad_norm": 0.7612289278442597, + "learning_rate": 1.0445054150240516e-05, + "loss": 0.3747, + "step": 29009 + }, + { + "epoch": 0.5012786839922588, + "grad_norm": 1.24655819374571, + "learning_rate": 1.0444495054543414e-05, + "loss": 0.4755, + "step": 29010 + }, + { + "epoch": 0.5012959635056676, + "grad_norm": 0.8040995084531578, + "learning_rate": 1.0443935957454113e-05, + "loss": 0.2787, + "step": 29011 + }, + { + "epoch": 0.5013132430190765, + "grad_norm": 1.911665417683743, + "learning_rate": 1.0443376858974377e-05, + "loss": 0.3788, + "step": 29012 + }, + { + "epoch": 0.5013305225324854, + "grad_norm": 0.8207904363576486, + "learning_rate": 1.0442817759105948e-05, + "loss": 0.4118, + "step": 29013 + }, + { + "epoch": 0.5013478020458944, + "grad_norm": 1.0487009437921124, + "learning_rate": 1.044225865785058e-05, + "loss": 0.5412, + "step": 29014 + }, + { + "epoch": 0.5013650815593033, + "grad_norm": 0.9851046778174335, + "learning_rate": 1.0441699555210025e-05, + "loss": 0.4098, + "step": 29015 + }, + { + "epoch": 0.5013823610727122, + "grad_norm": 1.5261309767200237, + "learning_rate": 1.0441140451186031e-05, + "loss": 0.4041, + "step": 29016 + }, + { + "epoch": 0.5013996405861211, + "grad_norm": 1.4430498311687767, + "learning_rate": 1.0440581345780355e-05, + "loss": 0.5233, + "step": 29017 + }, + { + "epoch": 0.50141692009953, + "grad_norm": 1.4797042698598772, + "learning_rate": 1.0440022238994742e-05, + "loss": 0.5592, + "step": 29018 + }, + { + "epoch": 0.5014341996129389, + "grad_norm": 1.5060715174610575, + "learning_rate": 1.0439463130830947e-05, + "loss": 0.3634, + "step": 29019 + }, + { + "epoch": 0.5014514791263478, + "grad_norm": 1.4547901770664942, + "learning_rate": 1.043890402129072e-05, + "loss": 0.498, + "step": 29020 + }, + { + "epoch": 0.5014687586397567, + "grad_norm": 1.0073942363760915, + "learning_rate": 1.0438344910375813e-05, + "loss": 0.3236, + "step": 29021 + }, + { + "epoch": 0.5014860381531656, + "grad_norm": 0.6249482602731898, + "learning_rate": 1.0437785798087976e-05, + "loss": 0.7116, + "step": 29022 + }, + { + "epoch": 0.5015033176665745, + "grad_norm": 1.1993454701490738, + "learning_rate": 1.0437226684428962e-05, + "loss": 0.4461, + "step": 29023 + }, + { + "epoch": 0.5015205971799834, + "grad_norm": 1.2426569534267318, + "learning_rate": 1.043666756940052e-05, + "loss": 0.4657, + "step": 29024 + }, + { + "epoch": 0.5015378766933923, + "grad_norm": 1.6305764929132562, + "learning_rate": 1.04361084530044e-05, + "loss": 0.4657, + "step": 29025 + }, + { + "epoch": 0.5015551562068012, + "grad_norm": 0.8625373907412727, + "learning_rate": 1.0435549335242357e-05, + "loss": 0.6133, + "step": 29026 + }, + { + "epoch": 0.5015724357202102, + "grad_norm": 0.732713361694434, + "learning_rate": 1.0434990216116139e-05, + "loss": 0.2707, + "step": 29027 + }, + { + "epoch": 0.5015897152336191, + "grad_norm": 0.9855114072447523, + "learning_rate": 1.04344310956275e-05, + "loss": 0.3653, + "step": 29028 + }, + { + "epoch": 0.501606994747028, + "grad_norm": 1.2574116421200419, + "learning_rate": 1.043387197377819e-05, + "loss": 0.376, + "step": 29029 + }, + { + "epoch": 0.5016242742604369, + "grad_norm": 1.0204331214073754, + "learning_rate": 1.0433312850569959e-05, + "loss": 0.4795, + "step": 29030 + }, + { + "epoch": 0.5016415537738458, + "grad_norm": 1.1737976911531611, + "learning_rate": 1.0432753726004556e-05, + "loss": 0.565, + "step": 29031 + }, + { + "epoch": 0.5016588332872546, + "grad_norm": 1.095123254668659, + "learning_rate": 1.043219460008374e-05, + "loss": 0.3252, + "step": 29032 + }, + { + "epoch": 0.5016761128006635, + "grad_norm": 1.2840410615701165, + "learning_rate": 1.0431635472809259e-05, + "loss": 0.4702, + "step": 29033 + }, + { + "epoch": 0.5016933923140724, + "grad_norm": 0.961355515378234, + "learning_rate": 1.043107634418286e-05, + "loss": 0.4627, + "step": 29034 + }, + { + "epoch": 0.5017106718274813, + "grad_norm": 0.6564562595181603, + "learning_rate": 1.0430517214206297e-05, + "loss": 0.2988, + "step": 29035 + }, + { + "epoch": 0.5017279513408902, + "grad_norm": 0.6369227569027817, + "learning_rate": 1.0429958082881322e-05, + "loss": 0.8591, + "step": 29036 + }, + { + "epoch": 0.5017452308542991, + "grad_norm": 1.0901000305128945, + "learning_rate": 1.0429398950209687e-05, + "loss": 0.5302, + "step": 29037 + }, + { + "epoch": 0.501762510367708, + "grad_norm": 1.1402253005206464, + "learning_rate": 1.042883981619314e-05, + "loss": 0.4995, + "step": 29038 + }, + { + "epoch": 0.5017797898811169, + "grad_norm": 0.622685195017963, + "learning_rate": 1.0428280680833431e-05, + "loss": 0.5385, + "step": 29039 + }, + { + "epoch": 0.5017970693945258, + "grad_norm": 0.7724120056410722, + "learning_rate": 1.0427721544132321e-05, + "loss": 0.4685, + "step": 29040 + }, + { + "epoch": 0.5018143489079347, + "grad_norm": 0.7732650140376331, + "learning_rate": 1.042716240609155e-05, + "loss": 0.3519, + "step": 29041 + }, + { + "epoch": 0.5018316284213437, + "grad_norm": 0.41854085894569376, + "learning_rate": 1.0426603266712877e-05, + "loss": 0.4472, + "step": 29042 + }, + { + "epoch": 0.5018489079347526, + "grad_norm": 1.224973207977445, + "learning_rate": 1.042604412599805e-05, + "loss": 0.3881, + "step": 29043 + }, + { + "epoch": 0.5018661874481615, + "grad_norm": 0.710883342816056, + "learning_rate": 1.042548498394882e-05, + "loss": 0.6547, + "step": 29044 + }, + { + "epoch": 0.5018834669615704, + "grad_norm": 1.0368313884724305, + "learning_rate": 1.0424925840566936e-05, + "loss": 0.5157, + "step": 29045 + }, + { + "epoch": 0.5019007464749793, + "grad_norm": 1.7422957772655727, + "learning_rate": 1.0424366695854153e-05, + "loss": 0.4618, + "step": 29046 + }, + { + "epoch": 0.5019180259883882, + "grad_norm": 0.948895260219587, + "learning_rate": 1.0423807549812221e-05, + "loss": 0.596, + "step": 29047 + }, + { + "epoch": 0.5019353055017971, + "grad_norm": 1.3786065352587578, + "learning_rate": 1.0423248402442892e-05, + "loss": 0.4122, + "step": 29048 + }, + { + "epoch": 0.501952585015206, + "grad_norm": 1.6008192890526107, + "learning_rate": 1.0422689253747917e-05, + "loss": 0.5727, + "step": 29049 + }, + { + "epoch": 0.5019698645286149, + "grad_norm": 0.9234339991157807, + "learning_rate": 1.0422130103729049e-05, + "loss": 0.4241, + "step": 29050 + }, + { + "epoch": 0.5019871440420238, + "grad_norm": 1.4153702843279945, + "learning_rate": 1.0421570952388033e-05, + "loss": 0.4042, + "step": 29051 + }, + { + "epoch": 0.5020044235554327, + "grad_norm": 0.6175393395308995, + "learning_rate": 1.042101179972663e-05, + "loss": 0.7786, + "step": 29052 + }, + { + "epoch": 0.5020217030688415, + "grad_norm": 1.0991516299316013, + "learning_rate": 1.0420452645746584e-05, + "loss": 0.3826, + "step": 29053 + }, + { + "epoch": 0.5020389825822504, + "grad_norm": 0.7328930805606805, + "learning_rate": 1.0419893490449648e-05, + "loss": 0.3215, + "step": 29054 + }, + { + "epoch": 0.5020562620956593, + "grad_norm": 0.9388734541349952, + "learning_rate": 1.0419334333837574e-05, + "loss": 0.4172, + "step": 29055 + }, + { + "epoch": 0.5020735416090683, + "grad_norm": 1.225539829300521, + "learning_rate": 1.0418775175912112e-05, + "loss": 0.4489, + "step": 29056 + }, + { + "epoch": 0.5020908211224772, + "grad_norm": 1.3510676773859664, + "learning_rate": 1.0418216016675016e-05, + "loss": 0.5388, + "step": 29057 + }, + { + "epoch": 0.5021081006358861, + "grad_norm": 1.2195305876645994, + "learning_rate": 1.0417656856128033e-05, + "loss": 0.4169, + "step": 29058 + }, + { + "epoch": 0.502125380149295, + "grad_norm": 1.0896893053962833, + "learning_rate": 1.0417097694272917e-05, + "loss": 0.4323, + "step": 29059 + }, + { + "epoch": 0.5021426596627039, + "grad_norm": 0.6527456002524923, + "learning_rate": 1.0416538531111422e-05, + "loss": 0.3291, + "step": 29060 + }, + { + "epoch": 0.5021599391761128, + "grad_norm": 1.1426115757339221, + "learning_rate": 1.0415979366645297e-05, + "loss": 0.3898, + "step": 29061 + }, + { + "epoch": 0.5021772186895217, + "grad_norm": 1.250042628947692, + "learning_rate": 1.041542020087629e-05, + "loss": 0.416, + "step": 29062 + }, + { + "epoch": 0.5021944982029306, + "grad_norm": 0.49481479539055917, + "learning_rate": 1.0414861033806158e-05, + "loss": 0.5976, + "step": 29063 + }, + { + "epoch": 0.5022117777163395, + "grad_norm": 1.1156138206545918, + "learning_rate": 1.0414301865436651e-05, + "loss": 0.4317, + "step": 29064 + }, + { + "epoch": 0.5022290572297484, + "grad_norm": 1.058382725003955, + "learning_rate": 1.0413742695769515e-05, + "loss": 0.2563, + "step": 29065 + }, + { + "epoch": 0.5022463367431573, + "grad_norm": 1.206790858555734, + "learning_rate": 1.0413183524806507e-05, + "loss": 0.5657, + "step": 29066 + }, + { + "epoch": 0.5022636162565662, + "grad_norm": 1.0082304437669596, + "learning_rate": 1.0412624352549378e-05, + "loss": 0.4001, + "step": 29067 + }, + { + "epoch": 0.5022808957699751, + "grad_norm": 1.7246610559990199, + "learning_rate": 1.0412065178999877e-05, + "loss": 0.5159, + "step": 29068 + }, + { + "epoch": 0.502298175283384, + "grad_norm": 0.8317777960484409, + "learning_rate": 1.0411506004159757e-05, + "loss": 0.5353, + "step": 29069 + }, + { + "epoch": 0.502315454796793, + "grad_norm": 0.8280101406406118, + "learning_rate": 1.0410946828030767e-05, + "loss": 0.5666, + "step": 29070 + }, + { + "epoch": 0.5023327343102019, + "grad_norm": 1.4136906377509753, + "learning_rate": 1.0410387650614662e-05, + "loss": 0.4476, + "step": 29071 + }, + { + "epoch": 0.5023500138236108, + "grad_norm": 0.7659786507949691, + "learning_rate": 1.0409828471913193e-05, + "loss": 0.4217, + "step": 29072 + }, + { + "epoch": 0.5023672933370197, + "grad_norm": 1.1498841279895222, + "learning_rate": 1.0409269291928109e-05, + "loss": 0.4601, + "step": 29073 + }, + { + "epoch": 0.5023845728504285, + "grad_norm": 1.2420979975711068, + "learning_rate": 1.0408710110661161e-05, + "loss": 0.4188, + "step": 29074 + }, + { + "epoch": 0.5024018523638374, + "grad_norm": 1.131999380512749, + "learning_rate": 1.0408150928114105e-05, + "loss": 0.5347, + "step": 29075 + }, + { + "epoch": 0.5024191318772463, + "grad_norm": 1.187511874341861, + "learning_rate": 1.0407591744288685e-05, + "loss": 0.498, + "step": 29076 + }, + { + "epoch": 0.5024364113906552, + "grad_norm": 0.67583519269961, + "learning_rate": 1.040703255918666e-05, + "loss": 0.8839, + "step": 29077 + }, + { + "epoch": 0.5024536909040641, + "grad_norm": 1.1247996797722675, + "learning_rate": 1.0406473372809779e-05, + "loss": 0.449, + "step": 29078 + }, + { + "epoch": 0.502470970417473, + "grad_norm": 1.7040684468096983, + "learning_rate": 1.040591418515979e-05, + "loss": 0.3539, + "step": 29079 + }, + { + "epoch": 0.5024882499308819, + "grad_norm": 0.6675065583084114, + "learning_rate": 1.0405354996238446e-05, + "loss": 0.3367, + "step": 29080 + }, + { + "epoch": 0.5025055294442908, + "grad_norm": 0.7080730763850772, + "learning_rate": 1.04047958060475e-05, + "loss": 0.343, + "step": 29081 + }, + { + "epoch": 0.5025228089576997, + "grad_norm": 1.033872644829049, + "learning_rate": 1.0404236614588703e-05, + "loss": 0.3639, + "step": 29082 + }, + { + "epoch": 0.5025400884711086, + "grad_norm": 0.7874718779670004, + "learning_rate": 1.0403677421863808e-05, + "loss": 0.5063, + "step": 29083 + }, + { + "epoch": 0.5025573679845176, + "grad_norm": 0.5182045360428861, + "learning_rate": 1.0403118227874566e-05, + "loss": 0.6606, + "step": 29084 + }, + { + "epoch": 0.5025746474979265, + "grad_norm": 0.907645302835091, + "learning_rate": 1.0402559032622724e-05, + "loss": 0.2676, + "step": 29085 + }, + { + "epoch": 0.5025919270113354, + "grad_norm": 0.9034444130376623, + "learning_rate": 1.0401999836110038e-05, + "loss": 0.5078, + "step": 29086 + }, + { + "epoch": 0.5026092065247443, + "grad_norm": 1.3615598937928508, + "learning_rate": 1.0401440638338255e-05, + "loss": 0.4357, + "step": 29087 + }, + { + "epoch": 0.5026264860381532, + "grad_norm": 1.1639487517473257, + "learning_rate": 1.0400881439309132e-05, + "loss": 0.5158, + "step": 29088 + }, + { + "epoch": 0.5026437655515621, + "grad_norm": 0.7972716016381105, + "learning_rate": 1.0400322239024417e-05, + "loss": 0.356, + "step": 29089 + }, + { + "epoch": 0.502661045064971, + "grad_norm": 1.2905760259308934, + "learning_rate": 1.0399763037485864e-05, + "loss": 0.5039, + "step": 29090 + }, + { + "epoch": 0.5026783245783799, + "grad_norm": 0.5672503812155805, + "learning_rate": 1.039920383469522e-05, + "loss": 0.9354, + "step": 29091 + }, + { + "epoch": 0.5026956040917888, + "grad_norm": 0.7552176143501537, + "learning_rate": 1.039864463065424e-05, + "loss": 0.3415, + "step": 29092 + }, + { + "epoch": 0.5027128836051977, + "grad_norm": 0.8023512783700528, + "learning_rate": 1.0398085425364676e-05, + "loss": 0.3533, + "step": 29093 + }, + { + "epoch": 0.5027301631186066, + "grad_norm": 1.7269023890548003, + "learning_rate": 1.039752621882828e-05, + "loss": 0.5129, + "step": 29094 + }, + { + "epoch": 0.5027474426320154, + "grad_norm": 1.0401553383511253, + "learning_rate": 1.0396967011046799e-05, + "loss": 0.3801, + "step": 29095 + }, + { + "epoch": 0.5027647221454243, + "grad_norm": 0.8981134180862178, + "learning_rate": 1.0396407802021986e-05, + "loss": 0.4235, + "step": 29096 + }, + { + "epoch": 0.5027820016588332, + "grad_norm": 0.9003442586746336, + "learning_rate": 1.0395848591755595e-05, + "loss": 0.2995, + "step": 29097 + }, + { + "epoch": 0.5027992811722422, + "grad_norm": 1.6138679821596762, + "learning_rate": 1.0395289380249375e-05, + "loss": 0.6344, + "step": 29098 + }, + { + "epoch": 0.5028165606856511, + "grad_norm": 0.8784838996808209, + "learning_rate": 1.0394730167505081e-05, + "loss": 1.015, + "step": 29099 + }, + { + "epoch": 0.50283384019906, + "grad_norm": 1.5221134967236485, + "learning_rate": 1.0394170953524461e-05, + "loss": 0.3619, + "step": 29100 + }, + { + "epoch": 0.5028511197124689, + "grad_norm": 1.256578811052321, + "learning_rate": 1.0393611738309269e-05, + "loss": 0.4245, + "step": 29101 + }, + { + "epoch": 0.5028683992258778, + "grad_norm": 1.1703185627202546, + "learning_rate": 1.0393052521861252e-05, + "loss": 0.4795, + "step": 29102 + }, + { + "epoch": 0.5028856787392867, + "grad_norm": 1.5633514136372757, + "learning_rate": 1.0392493304182166e-05, + "loss": 0.4104, + "step": 29103 + }, + { + "epoch": 0.5029029582526956, + "grad_norm": 0.8381501000273033, + "learning_rate": 1.0391934085273762e-05, + "loss": 0.4757, + "step": 29104 + }, + { + "epoch": 0.5029202377661045, + "grad_norm": 1.2395957646974474, + "learning_rate": 1.0391374865137791e-05, + "loss": 0.3308, + "step": 29105 + }, + { + "epoch": 0.5029375172795134, + "grad_norm": 1.2409120574217025, + "learning_rate": 1.0390815643776004e-05, + "loss": 0.5684, + "step": 29106 + }, + { + "epoch": 0.5029547967929223, + "grad_norm": 0.9870455785948292, + "learning_rate": 1.0390256421190153e-05, + "loss": 0.6909, + "step": 29107 + }, + { + "epoch": 0.5029720763063312, + "grad_norm": 0.512295772158197, + "learning_rate": 1.0389697197381985e-05, + "loss": 0.6604, + "step": 29108 + }, + { + "epoch": 0.5029893558197401, + "grad_norm": 1.3944515708984515, + "learning_rate": 1.038913797235326e-05, + "loss": 0.4831, + "step": 29109 + }, + { + "epoch": 0.503006635333149, + "grad_norm": 1.035409397371412, + "learning_rate": 1.0388578746105725e-05, + "loss": 0.2393, + "step": 29110 + }, + { + "epoch": 0.503023914846558, + "grad_norm": 1.53784050229189, + "learning_rate": 1.038801951864113e-05, + "loss": 0.3635, + "step": 29111 + }, + { + "epoch": 0.5030411943599669, + "grad_norm": 1.2844839793743679, + "learning_rate": 1.0387460289961229e-05, + "loss": 0.435, + "step": 29112 + }, + { + "epoch": 0.5030584738733758, + "grad_norm": 1.292996045866671, + "learning_rate": 1.0386901060067775e-05, + "loss": 0.36, + "step": 29113 + }, + { + "epoch": 0.5030757533867847, + "grad_norm": 0.9491044167487575, + "learning_rate": 1.0386341828962514e-05, + "loss": 0.232, + "step": 29114 + }, + { + "epoch": 0.5030930329001936, + "grad_norm": 0.998742027625606, + "learning_rate": 1.0385782596647205e-05, + "loss": 0.4094, + "step": 29115 + }, + { + "epoch": 0.5031103124136024, + "grad_norm": 0.9319905771808519, + "learning_rate": 1.0385223363123593e-05, + "loss": 0.7178, + "step": 29116 + }, + { + "epoch": 0.5031275919270113, + "grad_norm": 1.5098047130106818, + "learning_rate": 1.0384664128393434e-05, + "loss": 0.5278, + "step": 29117 + }, + { + "epoch": 0.5031448714404202, + "grad_norm": 1.057245672638338, + "learning_rate": 1.0384104892458476e-05, + "loss": 0.354, + "step": 29118 + }, + { + "epoch": 0.5031621509538291, + "grad_norm": 1.5368553037314947, + "learning_rate": 1.038354565532047e-05, + "loss": 0.3863, + "step": 29119 + }, + { + "epoch": 0.503179430467238, + "grad_norm": 1.285193748067521, + "learning_rate": 1.0382986416981173e-05, + "loss": 0.5408, + "step": 29120 + }, + { + "epoch": 0.5031967099806469, + "grad_norm": 0.9304280696620673, + "learning_rate": 1.0382427177442333e-05, + "loss": 0.7146, + "step": 29121 + }, + { + "epoch": 0.5032139894940558, + "grad_norm": 0.6876869153932709, + "learning_rate": 1.03818679367057e-05, + "loss": 0.2257, + "step": 29122 + }, + { + "epoch": 0.5032312690074647, + "grad_norm": 0.7183911273318874, + "learning_rate": 1.038130869477303e-05, + "loss": 0.1926, + "step": 29123 + }, + { + "epoch": 0.5032485485208736, + "grad_norm": 1.4679809638624177, + "learning_rate": 1.0380749451646075e-05, + "loss": 0.8311, + "step": 29124 + }, + { + "epoch": 0.5032658280342825, + "grad_norm": 1.6037770068694162, + "learning_rate": 1.038019020732658e-05, + "loss": 0.4415, + "step": 29125 + }, + { + "epoch": 0.5032831075476915, + "grad_norm": 0.8603272994925167, + "learning_rate": 1.0379630961816303e-05, + "loss": 0.3445, + "step": 29126 + }, + { + "epoch": 0.5033003870611004, + "grad_norm": 1.0874715556373584, + "learning_rate": 1.037907171511699e-05, + "loss": 0.6444, + "step": 29127 + }, + { + "epoch": 0.5033176665745093, + "grad_norm": 1.3495457116993697, + "learning_rate": 1.0378512467230396e-05, + "loss": 0.4825, + "step": 29128 + }, + { + "epoch": 0.5033349460879182, + "grad_norm": 0.7048835399349865, + "learning_rate": 1.0377953218158271e-05, + "loss": 0.3632, + "step": 29129 + }, + { + "epoch": 0.5033522256013271, + "grad_norm": 0.8384464564431701, + "learning_rate": 1.0377393967902371e-05, + "loss": 0.4364, + "step": 29130 + }, + { + "epoch": 0.503369505114736, + "grad_norm": 1.400413156416088, + "learning_rate": 1.0376834716464441e-05, + "loss": 0.3906, + "step": 29131 + }, + { + "epoch": 0.5033867846281449, + "grad_norm": 0.7660007394359477, + "learning_rate": 1.0376275463846239e-05, + "loss": 0.6492, + "step": 29132 + }, + { + "epoch": 0.5034040641415538, + "grad_norm": 1.1682081392776686, + "learning_rate": 1.0375716210049514e-05, + "loss": 0.4515, + "step": 29133 + }, + { + "epoch": 0.5034213436549627, + "grad_norm": 1.591820612267219, + "learning_rate": 1.0375156955076013e-05, + "loss": 0.4481, + "step": 29134 + }, + { + "epoch": 0.5034386231683716, + "grad_norm": 1.1079828808269474, + "learning_rate": 1.0374597698927496e-05, + "loss": 0.4181, + "step": 29135 + }, + { + "epoch": 0.5034559026817805, + "grad_norm": 1.9679228476366295, + "learning_rate": 1.0374038441605709e-05, + "loss": 0.4512, + "step": 29136 + }, + { + "epoch": 0.5034731821951893, + "grad_norm": 1.5938843972825507, + "learning_rate": 1.0373479183112407e-05, + "loss": 0.3622, + "step": 29137 + }, + { + "epoch": 0.5034904617085982, + "grad_norm": 1.0725474073983048, + "learning_rate": 1.0372919923449338e-05, + "loss": 0.3183, + "step": 29138 + }, + { + "epoch": 0.5035077412220071, + "grad_norm": 0.9565732407280689, + "learning_rate": 1.0372360662618255e-05, + "loss": 0.4257, + "step": 29139 + }, + { + "epoch": 0.503525020735416, + "grad_norm": 2.408889952688262, + "learning_rate": 1.0371801400620911e-05, + "loss": 0.3477, + "step": 29140 + }, + { + "epoch": 0.503542300248825, + "grad_norm": 1.1512783731215355, + "learning_rate": 1.0371242137459056e-05, + "loss": 0.6523, + "step": 29141 + }, + { + "epoch": 0.5035595797622339, + "grad_norm": 1.3179511900098315, + "learning_rate": 1.0370682873134443e-05, + "loss": 0.5578, + "step": 29142 + }, + { + "epoch": 0.5035768592756428, + "grad_norm": 1.0971063933769807, + "learning_rate": 1.0370123607648823e-05, + "loss": 0.4717, + "step": 29143 + }, + { + "epoch": 0.5035941387890517, + "grad_norm": 0.8789280163414753, + "learning_rate": 1.0369564341003948e-05, + "loss": 0.5871, + "step": 29144 + }, + { + "epoch": 0.5036114183024606, + "grad_norm": 0.9950168234653922, + "learning_rate": 1.0369005073201569e-05, + "loss": 0.473, + "step": 29145 + }, + { + "epoch": 0.5036286978158695, + "grad_norm": 1.0683476255984308, + "learning_rate": 1.0368445804243442e-05, + "loss": 0.4007, + "step": 29146 + }, + { + "epoch": 0.5036459773292784, + "grad_norm": 0.6767425832841827, + "learning_rate": 1.036788653413131e-05, + "loss": 0.2304, + "step": 29147 + }, + { + "epoch": 0.5036632568426873, + "grad_norm": 0.851681629967103, + "learning_rate": 1.0367327262866928e-05, + "loss": 0.567, + "step": 29148 + }, + { + "epoch": 0.5036805363560962, + "grad_norm": 1.3229450821814919, + "learning_rate": 1.0366767990452053e-05, + "loss": 0.4343, + "step": 29149 + }, + { + "epoch": 0.5036978158695051, + "grad_norm": 1.2263996027623736, + "learning_rate": 1.0366208716888433e-05, + "loss": 0.6487, + "step": 29150 + }, + { + "epoch": 0.503715095382914, + "grad_norm": 1.165696000335429, + "learning_rate": 1.0365649442177814e-05, + "loss": 0.4748, + "step": 29151 + }, + { + "epoch": 0.5037323748963229, + "grad_norm": 1.1508822923909818, + "learning_rate": 1.0365090166321959e-05, + "loss": 0.4645, + "step": 29152 + }, + { + "epoch": 0.5037496544097319, + "grad_norm": 0.7644845295858644, + "learning_rate": 1.0364530889322613e-05, + "loss": 0.4679, + "step": 29153 + }, + { + "epoch": 0.5037669339231408, + "grad_norm": 0.6722145120841903, + "learning_rate": 1.0363971611181524e-05, + "loss": 0.4027, + "step": 29154 + }, + { + "epoch": 0.5037842134365497, + "grad_norm": 0.4973970775274273, + "learning_rate": 1.0363412331900452e-05, + "loss": 0.6807, + "step": 29155 + }, + { + "epoch": 0.5038014929499586, + "grad_norm": 0.7636864511600068, + "learning_rate": 1.0362853051481145e-05, + "loss": 0.3841, + "step": 29156 + }, + { + "epoch": 0.5038187724633675, + "grad_norm": 1.0487355919210029, + "learning_rate": 1.0362293769925356e-05, + "loss": 0.4617, + "step": 29157 + }, + { + "epoch": 0.5038360519767764, + "grad_norm": 0.6895835711644309, + "learning_rate": 1.0361734487234833e-05, + "loss": 0.2468, + "step": 29158 + }, + { + "epoch": 0.5038533314901852, + "grad_norm": 0.7143060007763294, + "learning_rate": 1.0361175203411328e-05, + "loss": 0.462, + "step": 29159 + }, + { + "epoch": 0.5038706110035941, + "grad_norm": 0.7408796785250157, + "learning_rate": 1.03606159184566e-05, + "loss": 0.3737, + "step": 29160 + }, + { + "epoch": 0.503887890517003, + "grad_norm": 1.17124126485441, + "learning_rate": 1.0360056632372393e-05, + "loss": 0.6951, + "step": 29161 + }, + { + "epoch": 0.5039051700304119, + "grad_norm": 1.0737340140394462, + "learning_rate": 1.035949734516046e-05, + "loss": 0.3889, + "step": 29162 + }, + { + "epoch": 0.5039224495438208, + "grad_norm": 0.921677283612641, + "learning_rate": 1.0358938056822555e-05, + "loss": 0.509, + "step": 29163 + }, + { + "epoch": 0.5039397290572297, + "grad_norm": 0.9509559952479404, + "learning_rate": 1.035837876736043e-05, + "loss": 0.6351, + "step": 29164 + }, + { + "epoch": 0.5039570085706386, + "grad_norm": 0.9113496700851637, + "learning_rate": 1.0357819476775835e-05, + "loss": 0.4477, + "step": 29165 + }, + { + "epoch": 0.5039742880840475, + "grad_norm": 1.3152000534831922, + "learning_rate": 1.0357260185070521e-05, + "loss": 0.4589, + "step": 29166 + }, + { + "epoch": 0.5039915675974564, + "grad_norm": 0.9153670660877659, + "learning_rate": 1.0356700892246245e-05, + "loss": 0.4044, + "step": 29167 + }, + { + "epoch": 0.5040088471108654, + "grad_norm": 0.7753863765017761, + "learning_rate": 1.0356141598304749e-05, + "loss": 0.4157, + "step": 29168 + }, + { + "epoch": 0.5040261266242743, + "grad_norm": 0.7951681791179335, + "learning_rate": 1.0355582303247794e-05, + "loss": 0.566, + "step": 29169 + }, + { + "epoch": 0.5040434061376832, + "grad_norm": 0.9642742911894157, + "learning_rate": 1.0355023007077128e-05, + "loss": 0.4976, + "step": 29170 + }, + { + "epoch": 0.5040606856510921, + "grad_norm": 1.278782278208629, + "learning_rate": 1.03544637097945e-05, + "loss": 0.4155, + "step": 29171 + }, + { + "epoch": 0.504077965164501, + "grad_norm": 1.017028016840601, + "learning_rate": 1.0353904411401667e-05, + "loss": 0.4046, + "step": 29172 + }, + { + "epoch": 0.5040952446779099, + "grad_norm": 0.7763418953045786, + "learning_rate": 1.0353345111900379e-05, + "loss": 0.2401, + "step": 29173 + }, + { + "epoch": 0.5041125241913188, + "grad_norm": 1.1606441484597332, + "learning_rate": 1.0352785811292384e-05, + "loss": 0.2806, + "step": 29174 + }, + { + "epoch": 0.5041298037047277, + "grad_norm": 0.7564740218308845, + "learning_rate": 1.035222650957944e-05, + "loss": 0.449, + "step": 29175 + }, + { + "epoch": 0.5041470832181366, + "grad_norm": 0.9383074480878921, + "learning_rate": 1.0351667206763296e-05, + "loss": 0.2434, + "step": 29176 + }, + { + "epoch": 0.5041643627315455, + "grad_norm": 1.1155505611604355, + "learning_rate": 1.0351107902845702e-05, + "loss": 0.3211, + "step": 29177 + }, + { + "epoch": 0.5041816422449544, + "grad_norm": 0.8905864702335046, + "learning_rate": 1.0350548597828414e-05, + "loss": 0.6511, + "step": 29178 + }, + { + "epoch": 0.5041989217583633, + "grad_norm": 0.8351475972267438, + "learning_rate": 1.0349989291713179e-05, + "loss": 0.349, + "step": 29179 + }, + { + "epoch": 0.5042162012717721, + "grad_norm": 0.6360677377633034, + "learning_rate": 1.0349429984501752e-05, + "loss": 0.3577, + "step": 29180 + }, + { + "epoch": 0.504233480785181, + "grad_norm": 1.2963896009268898, + "learning_rate": 1.0348870676195883e-05, + "loss": 0.3827, + "step": 29181 + }, + { + "epoch": 0.50425076029859, + "grad_norm": 0.8792387854782575, + "learning_rate": 1.0348311366797324e-05, + "loss": 0.4094, + "step": 29182 + }, + { + "epoch": 0.5042680398119989, + "grad_norm": 0.8884183936128076, + "learning_rate": 1.0347752056307828e-05, + "loss": 0.4677, + "step": 29183 + }, + { + "epoch": 0.5042853193254078, + "grad_norm": 1.181972503682643, + "learning_rate": 1.0347192744729146e-05, + "loss": 0.4764, + "step": 29184 + }, + { + "epoch": 0.5043025988388167, + "grad_norm": 1.1285496614510597, + "learning_rate": 1.0346633432063026e-05, + "loss": 0.3728, + "step": 29185 + }, + { + "epoch": 0.5043198783522256, + "grad_norm": 0.8419689157486369, + "learning_rate": 1.034607411831123e-05, + "loss": 0.5758, + "step": 29186 + }, + { + "epoch": 0.5043371578656345, + "grad_norm": 0.5416471454713819, + "learning_rate": 1.0345514803475506e-05, + "loss": 0.4942, + "step": 29187 + }, + { + "epoch": 0.5043544373790434, + "grad_norm": 0.9951162315344723, + "learning_rate": 1.0344955487557596e-05, + "loss": 0.3871, + "step": 29188 + }, + { + "epoch": 0.5043717168924523, + "grad_norm": 1.3633217861252829, + "learning_rate": 1.0344396170559262e-05, + "loss": 0.3516, + "step": 29189 + }, + { + "epoch": 0.5043889964058612, + "grad_norm": 1.320552639573006, + "learning_rate": 1.0343836852482253e-05, + "loss": 0.206, + "step": 29190 + }, + { + "epoch": 0.5044062759192701, + "grad_norm": 0.9119227078384028, + "learning_rate": 1.034327753332832e-05, + "loss": 0.4518, + "step": 29191 + }, + { + "epoch": 0.504423555432679, + "grad_norm": 1.333857287762694, + "learning_rate": 1.0342718213099216e-05, + "loss": 0.5499, + "step": 29192 + }, + { + "epoch": 0.5044408349460879, + "grad_norm": 1.477102512427524, + "learning_rate": 1.0342158891796695e-05, + "loss": 0.5102, + "step": 29193 + }, + { + "epoch": 0.5044581144594968, + "grad_norm": 0.8358775777458628, + "learning_rate": 1.0341599569422503e-05, + "loss": 0.3883, + "step": 29194 + }, + { + "epoch": 0.5044753939729058, + "grad_norm": 1.0965979017960894, + "learning_rate": 1.0341040245978396e-05, + "loss": 0.4368, + "step": 29195 + }, + { + "epoch": 0.5044926734863147, + "grad_norm": 0.7641325542246213, + "learning_rate": 1.0340480921466126e-05, + "loss": 0.5113, + "step": 29196 + }, + { + "epoch": 0.5045099529997236, + "grad_norm": 0.7857105353733651, + "learning_rate": 1.0339921595887443e-05, + "loss": 0.3565, + "step": 29197 + }, + { + "epoch": 0.5045272325131325, + "grad_norm": 1.0776192247949692, + "learning_rate": 1.0339362269244102e-05, + "loss": 0.5152, + "step": 29198 + }, + { + "epoch": 0.5045445120265414, + "grad_norm": 1.0070536369867824, + "learning_rate": 1.0338802941537849e-05, + "loss": 0.291, + "step": 29199 + }, + { + "epoch": 0.5045617915399503, + "grad_norm": 0.9884436434531436, + "learning_rate": 1.033824361277044e-05, + "loss": 0.3775, + "step": 29200 + }, + { + "epoch": 0.5045790710533591, + "grad_norm": 0.9821228712896366, + "learning_rate": 1.033768428294363e-05, + "loss": 0.5072, + "step": 29201 + }, + { + "epoch": 0.504596350566768, + "grad_norm": 1.2547298617644436, + "learning_rate": 1.0337124952059161e-05, + "loss": 0.564, + "step": 29202 + }, + { + "epoch": 0.5046136300801769, + "grad_norm": 0.6192524110386519, + "learning_rate": 1.0336565620118795e-05, + "loss": 0.6309, + "step": 29203 + }, + { + "epoch": 0.5046309095935858, + "grad_norm": 0.8647009961409509, + "learning_rate": 1.0336006287124281e-05, + "loss": 0.4631, + "step": 29204 + }, + { + "epoch": 0.5046481891069947, + "grad_norm": 0.8624953561979605, + "learning_rate": 1.0335446953077366e-05, + "loss": 0.4608, + "step": 29205 + }, + { + "epoch": 0.5046654686204036, + "grad_norm": 0.8336813525771626, + "learning_rate": 1.033488761797981e-05, + "loss": 0.5535, + "step": 29206 + }, + { + "epoch": 0.5046827481338125, + "grad_norm": 0.8648470047870471, + "learning_rate": 1.033432828183336e-05, + "loss": 0.6434, + "step": 29207 + }, + { + "epoch": 0.5047000276472214, + "grad_norm": 0.6692629438932821, + "learning_rate": 1.0333768944639766e-05, + "loss": 0.5385, + "step": 29208 + }, + { + "epoch": 0.5047173071606303, + "grad_norm": 0.9075356345862674, + "learning_rate": 1.0333209606400783e-05, + "loss": 0.3218, + "step": 29209 + }, + { + "epoch": 0.5047345866740393, + "grad_norm": 0.7217592545299087, + "learning_rate": 1.0332650267118162e-05, + "loss": 0.3726, + "step": 29210 + }, + { + "epoch": 0.5047518661874482, + "grad_norm": 1.235952762456152, + "learning_rate": 1.0332090926793654e-05, + "loss": 0.4836, + "step": 29211 + }, + { + "epoch": 0.5047691457008571, + "grad_norm": 0.5731024431490246, + "learning_rate": 1.0331531585429016e-05, + "loss": 0.5929, + "step": 29212 + }, + { + "epoch": 0.504786425214266, + "grad_norm": 1.1016900926530804, + "learning_rate": 1.0330972243025992e-05, + "loss": 0.5715, + "step": 29213 + }, + { + "epoch": 0.5048037047276749, + "grad_norm": 1.2643002936045333, + "learning_rate": 1.0330412899586337e-05, + "loss": 0.4118, + "step": 29214 + }, + { + "epoch": 0.5048209842410838, + "grad_norm": 1.2347019187028276, + "learning_rate": 1.0329853555111808e-05, + "loss": 0.3537, + "step": 29215 + }, + { + "epoch": 0.5048382637544927, + "grad_norm": 0.6766308575038245, + "learning_rate": 1.032929420960415e-05, + "loss": 0.3331, + "step": 29216 + }, + { + "epoch": 0.5048555432679016, + "grad_norm": 1.6940279907959461, + "learning_rate": 1.0328734863065117e-05, + "loss": 0.3854, + "step": 29217 + }, + { + "epoch": 0.5048728227813105, + "grad_norm": 1.5420088839090615, + "learning_rate": 1.0328175515496466e-05, + "loss": 0.4491, + "step": 29218 + }, + { + "epoch": 0.5048901022947194, + "grad_norm": 1.1997922375115737, + "learning_rate": 1.0327616166899939e-05, + "loss": 0.5904, + "step": 29219 + }, + { + "epoch": 0.5049073818081283, + "grad_norm": 1.1420292272820942, + "learning_rate": 1.0327056817277294e-05, + "loss": 0.3697, + "step": 29220 + }, + { + "epoch": 0.5049246613215372, + "grad_norm": 0.9074596548585436, + "learning_rate": 1.0326497466630283e-05, + "loss": 0.3179, + "step": 29221 + }, + { + "epoch": 0.504941940834946, + "grad_norm": 1.082484490752899, + "learning_rate": 1.0325938114960656e-05, + "loss": 0.5478, + "step": 29222 + }, + { + "epoch": 0.5049592203483549, + "grad_norm": 0.9577943081466452, + "learning_rate": 1.0325378762270168e-05, + "loss": 0.4042, + "step": 29223 + }, + { + "epoch": 0.5049764998617639, + "grad_norm": 1.521921848511583, + "learning_rate": 1.0324819408560569e-05, + "loss": 0.4064, + "step": 29224 + }, + { + "epoch": 0.5049937793751728, + "grad_norm": 1.1264167011589523, + "learning_rate": 1.0324260053833609e-05, + "loss": 0.3972, + "step": 29225 + }, + { + "epoch": 0.5050110588885817, + "grad_norm": 0.8815126975900289, + "learning_rate": 1.0323700698091044e-05, + "loss": 0.4482, + "step": 29226 + }, + { + "epoch": 0.5050283384019906, + "grad_norm": 0.46899280585749714, + "learning_rate": 1.0323141341334623e-05, + "loss": 0.6224, + "step": 29227 + }, + { + "epoch": 0.5050456179153995, + "grad_norm": 0.8194292500773372, + "learning_rate": 1.03225819835661e-05, + "loss": 0.3609, + "step": 29228 + }, + { + "epoch": 0.5050628974288084, + "grad_norm": 1.9003864968987176, + "learning_rate": 1.0322022624787225e-05, + "loss": 0.6715, + "step": 29229 + }, + { + "epoch": 0.5050801769422173, + "grad_norm": 1.1270414163631104, + "learning_rate": 1.032146326499975e-05, + "loss": 0.4176, + "step": 29230 + }, + { + "epoch": 0.5050974564556262, + "grad_norm": 1.3453671805569307, + "learning_rate": 1.0320903904205426e-05, + "loss": 0.533, + "step": 29231 + }, + { + "epoch": 0.5051147359690351, + "grad_norm": 0.9410971217451967, + "learning_rate": 1.0320344542406008e-05, + "loss": 0.3085, + "step": 29232 + }, + { + "epoch": 0.505132015482444, + "grad_norm": 1.3030160588324924, + "learning_rate": 1.0319785179603248e-05, + "loss": 0.4384, + "step": 29233 + }, + { + "epoch": 0.5051492949958529, + "grad_norm": 1.1797074319075038, + "learning_rate": 1.0319225815798893e-05, + "loss": 0.5258, + "step": 29234 + }, + { + "epoch": 0.5051665745092618, + "grad_norm": 0.8220366841092341, + "learning_rate": 1.03186664509947e-05, + "loss": 0.3584, + "step": 29235 + }, + { + "epoch": 0.5051838540226707, + "grad_norm": 1.1806619578539712, + "learning_rate": 1.0318107085192422e-05, + "loss": 0.6687, + "step": 29236 + }, + { + "epoch": 0.5052011335360796, + "grad_norm": 1.177419814975576, + "learning_rate": 1.0317547718393806e-05, + "loss": 0.5918, + "step": 29237 + }, + { + "epoch": 0.5052184130494886, + "grad_norm": 0.6404217407041749, + "learning_rate": 1.0316988350600608e-05, + "loss": 0.3009, + "step": 29238 + }, + { + "epoch": 0.5052356925628975, + "grad_norm": 1.581568982402324, + "learning_rate": 1.0316428981814576e-05, + "loss": 0.4926, + "step": 29239 + }, + { + "epoch": 0.5052529720763064, + "grad_norm": 1.052013251148741, + "learning_rate": 1.0315869612037469e-05, + "loss": 0.4179, + "step": 29240 + }, + { + "epoch": 0.5052702515897153, + "grad_norm": 0.9627862983319019, + "learning_rate": 1.031531024127103e-05, + "loss": 0.5975, + "step": 29241 + }, + { + "epoch": 0.5052875311031242, + "grad_norm": 0.9530887615938848, + "learning_rate": 1.0314750869517017e-05, + "loss": 0.4826, + "step": 29242 + }, + { + "epoch": 0.505304810616533, + "grad_norm": 1.2983448540909392, + "learning_rate": 1.0314191496777179e-05, + "loss": 0.5532, + "step": 29243 + }, + { + "epoch": 0.5053220901299419, + "grad_norm": 0.7913949030995826, + "learning_rate": 1.031363212305327e-05, + "loss": 0.556, + "step": 29244 + }, + { + "epoch": 0.5053393696433508, + "grad_norm": 1.0427660249037218, + "learning_rate": 1.0313072748347042e-05, + "loss": 0.4729, + "step": 29245 + }, + { + "epoch": 0.5053566491567597, + "grad_norm": 0.8764275340518127, + "learning_rate": 1.0312513372660245e-05, + "loss": 0.3978, + "step": 29246 + }, + { + "epoch": 0.5053739286701686, + "grad_norm": 0.8824950771813547, + "learning_rate": 1.0311953995994635e-05, + "loss": 0.5119, + "step": 29247 + }, + { + "epoch": 0.5053912081835775, + "grad_norm": 0.9487996353270791, + "learning_rate": 1.0311394618351957e-05, + "loss": 0.4567, + "step": 29248 + }, + { + "epoch": 0.5054084876969864, + "grad_norm": 0.8335887936551265, + "learning_rate": 1.0310835239733974e-05, + "loss": 0.5239, + "step": 29249 + }, + { + "epoch": 0.5054257672103953, + "grad_norm": 1.195432989406089, + "learning_rate": 1.0310275860142427e-05, + "loss": 0.6116, + "step": 29250 + }, + { + "epoch": 0.5054430467238042, + "grad_norm": 1.084678540619915, + "learning_rate": 1.030971647957907e-05, + "loss": 0.5275, + "step": 29251 + }, + { + "epoch": 0.5054603262372132, + "grad_norm": 1.0584219309239589, + "learning_rate": 1.0309157098045663e-05, + "loss": 0.3491, + "step": 29252 + }, + { + "epoch": 0.5054776057506221, + "grad_norm": 0.6744254791374773, + "learning_rate": 1.030859771554395e-05, + "loss": 0.2919, + "step": 29253 + }, + { + "epoch": 0.505494885264031, + "grad_norm": 1.0801494579668314, + "learning_rate": 1.0308038332075685e-05, + "loss": 0.3382, + "step": 29254 + }, + { + "epoch": 0.5055121647774399, + "grad_norm": 1.2703552045375657, + "learning_rate": 1.030747894764262e-05, + "loss": 0.4307, + "step": 29255 + }, + { + "epoch": 0.5055294442908488, + "grad_norm": 0.8755370904453056, + "learning_rate": 1.030691956224651e-05, + "loss": 0.4028, + "step": 29256 + }, + { + "epoch": 0.5055467238042577, + "grad_norm": 0.7802879307803924, + "learning_rate": 1.0306360175889101e-05, + "loss": 0.3427, + "step": 29257 + }, + { + "epoch": 0.5055640033176666, + "grad_norm": 0.512527558343826, + "learning_rate": 1.0305800788572152e-05, + "loss": 0.6636, + "step": 29258 + }, + { + "epoch": 0.5055812828310755, + "grad_norm": 0.7674083342297724, + "learning_rate": 1.030524140029741e-05, + "loss": 0.3951, + "step": 29259 + }, + { + "epoch": 0.5055985623444844, + "grad_norm": 1.0093987728658504, + "learning_rate": 1.0304682011066628e-05, + "loss": 0.5231, + "step": 29260 + }, + { + "epoch": 0.5056158418578933, + "grad_norm": 0.811787857298496, + "learning_rate": 1.0304122620881563e-05, + "loss": 0.3537, + "step": 29261 + }, + { + "epoch": 0.5056331213713022, + "grad_norm": 0.9164100200553296, + "learning_rate": 1.0303563229743958e-05, + "loss": 0.5519, + "step": 29262 + }, + { + "epoch": 0.5056504008847111, + "grad_norm": 0.7576126493261592, + "learning_rate": 1.0303003837655572e-05, + "loss": 0.3926, + "step": 29263 + }, + { + "epoch": 0.5056676803981199, + "grad_norm": 0.4184659263984491, + "learning_rate": 1.0302444444618157e-05, + "loss": 0.6338, + "step": 29264 + }, + { + "epoch": 0.5056849599115288, + "grad_norm": 0.9753913920093485, + "learning_rate": 1.0301885050633457e-05, + "loss": 0.4708, + "step": 29265 + }, + { + "epoch": 0.5057022394249377, + "grad_norm": 1.0008869537391414, + "learning_rate": 1.0301325655703233e-05, + "loss": 0.4921, + "step": 29266 + }, + { + "epoch": 0.5057195189383467, + "grad_norm": 0.8687396267927695, + "learning_rate": 1.0300766259829239e-05, + "loss": 0.5287, + "step": 29267 + }, + { + "epoch": 0.5057367984517556, + "grad_norm": 0.9761408505942932, + "learning_rate": 1.0300206863013213e-05, + "loss": 0.469, + "step": 29268 + }, + { + "epoch": 0.5057540779651645, + "grad_norm": 0.798213937532438, + "learning_rate": 1.0299647465256926e-05, + "loss": 0.2777, + "step": 29269 + }, + { + "epoch": 0.5057713574785734, + "grad_norm": 0.47810039604102766, + "learning_rate": 1.0299088066562116e-05, + "loss": 0.5491, + "step": 29270 + }, + { + "epoch": 0.5057886369919823, + "grad_norm": 0.9956555858987028, + "learning_rate": 1.0298528666930539e-05, + "loss": 0.3555, + "step": 29271 + }, + { + "epoch": 0.5058059165053912, + "grad_norm": 1.0273831459762737, + "learning_rate": 1.0297969266363948e-05, + "loss": 0.3952, + "step": 29272 + }, + { + "epoch": 0.5058231960188001, + "grad_norm": 0.7180091564715306, + "learning_rate": 1.0297409864864094e-05, + "loss": 0.3676, + "step": 29273 + }, + { + "epoch": 0.505840475532209, + "grad_norm": 0.8704847972491719, + "learning_rate": 1.0296850462432728e-05, + "loss": 0.483, + "step": 29274 + }, + { + "epoch": 0.5058577550456179, + "grad_norm": 0.5522212403987692, + "learning_rate": 1.0296291059071605e-05, + "loss": 0.8416, + "step": 29275 + }, + { + "epoch": 0.5058750345590268, + "grad_norm": 1.130636912657882, + "learning_rate": 1.0295731654782479e-05, + "loss": 0.3275, + "step": 29276 + }, + { + "epoch": 0.5058923140724357, + "grad_norm": 0.8834898232138304, + "learning_rate": 1.0295172249567095e-05, + "loss": 0.3533, + "step": 29277 + }, + { + "epoch": 0.5059095935858446, + "grad_norm": 0.519183996076013, + "learning_rate": 1.0294612843427212e-05, + "loss": 0.6902, + "step": 29278 + }, + { + "epoch": 0.5059268730992535, + "grad_norm": 0.8279114672448509, + "learning_rate": 1.0294053436364578e-05, + "loss": 0.3461, + "step": 29279 + }, + { + "epoch": 0.5059441526126625, + "grad_norm": 1.634140939044489, + "learning_rate": 1.0293494028380946e-05, + "loss": 0.2978, + "step": 29280 + }, + { + "epoch": 0.5059614321260714, + "grad_norm": 1.9969284776687177, + "learning_rate": 1.029293461947807e-05, + "loss": 0.3659, + "step": 29281 + }, + { + "epoch": 0.5059787116394803, + "grad_norm": 1.035004382039526, + "learning_rate": 1.0292375209657697e-05, + "loss": 0.3353, + "step": 29282 + }, + { + "epoch": 0.5059959911528892, + "grad_norm": 1.0515712104275223, + "learning_rate": 1.0291815798921586e-05, + "loss": 0.5541, + "step": 29283 + }, + { + "epoch": 0.5060132706662981, + "grad_norm": 0.6732118483170686, + "learning_rate": 1.0291256387271484e-05, + "loss": 0.4048, + "step": 29284 + }, + { + "epoch": 0.506030550179707, + "grad_norm": 0.4611252158390896, + "learning_rate": 1.0290696974709144e-05, + "loss": 0.7005, + "step": 29285 + }, + { + "epoch": 0.5060478296931158, + "grad_norm": 0.9288933916622438, + "learning_rate": 1.0290137561236322e-05, + "loss": 0.5524, + "step": 29286 + }, + { + "epoch": 0.5060651092065247, + "grad_norm": 0.49088710938173813, + "learning_rate": 1.0289578146854766e-05, + "loss": 0.7495, + "step": 29287 + }, + { + "epoch": 0.5060823887199336, + "grad_norm": 1.1402448548787687, + "learning_rate": 1.0289018731566227e-05, + "loss": 0.3587, + "step": 29288 + }, + { + "epoch": 0.5060996682333425, + "grad_norm": 0.8720547704133437, + "learning_rate": 1.0288459315372461e-05, + "loss": 0.3983, + "step": 29289 + }, + { + "epoch": 0.5061169477467514, + "grad_norm": 0.5110481521837217, + "learning_rate": 1.0287899898275223e-05, + "loss": 0.7427, + "step": 29290 + }, + { + "epoch": 0.5061342272601603, + "grad_norm": 0.8213651751142417, + "learning_rate": 1.0287340480276253e-05, + "loss": 0.3368, + "step": 29291 + }, + { + "epoch": 0.5061515067735692, + "grad_norm": 1.5761577269923064, + "learning_rate": 1.0286781061377316e-05, + "loss": 0.4736, + "step": 29292 + }, + { + "epoch": 0.5061687862869781, + "grad_norm": 0.8928391973258335, + "learning_rate": 1.0286221641580157e-05, + "loss": 0.3678, + "step": 29293 + }, + { + "epoch": 0.506186065800387, + "grad_norm": 0.9206935255624045, + "learning_rate": 1.0285662220886528e-05, + "loss": 0.3862, + "step": 29294 + }, + { + "epoch": 0.506203345313796, + "grad_norm": 0.9964093109331603, + "learning_rate": 1.0285102799298185e-05, + "loss": 0.3573, + "step": 29295 + }, + { + "epoch": 0.5062206248272049, + "grad_norm": 0.6477959378116666, + "learning_rate": 1.028454337681688e-05, + "loss": 0.291, + "step": 29296 + }, + { + "epoch": 0.5062379043406138, + "grad_norm": 1.2646310803062388, + "learning_rate": 1.028398395344436e-05, + "loss": 0.4345, + "step": 29297 + }, + { + "epoch": 0.5062551838540227, + "grad_norm": 0.944217049132821, + "learning_rate": 1.0283424529182385e-05, + "loss": 0.5029, + "step": 29298 + }, + { + "epoch": 0.5062724633674316, + "grad_norm": 1.361382415804954, + "learning_rate": 1.02828651040327e-05, + "loss": 0.3797, + "step": 29299 + }, + { + "epoch": 0.5062897428808405, + "grad_norm": 0.726650380341692, + "learning_rate": 1.0282305677997062e-05, + "loss": 0.4348, + "step": 29300 + }, + { + "epoch": 0.5063070223942494, + "grad_norm": 0.8139600093823862, + "learning_rate": 1.0281746251077221e-05, + "loss": 0.3939, + "step": 29301 + }, + { + "epoch": 0.5063243019076583, + "grad_norm": 0.44845571387390415, + "learning_rate": 1.0281186823274924e-05, + "loss": 0.5273, + "step": 29302 + }, + { + "epoch": 0.5063415814210672, + "grad_norm": 0.7265687038301097, + "learning_rate": 1.0280627394591933e-05, + "loss": 0.4689, + "step": 29303 + }, + { + "epoch": 0.5063588609344761, + "grad_norm": 1.2757755193563665, + "learning_rate": 1.0280067965029997e-05, + "loss": 0.4975, + "step": 29304 + }, + { + "epoch": 0.506376140447885, + "grad_norm": 1.1415246465257491, + "learning_rate": 1.0279508534590862e-05, + "loss": 0.4417, + "step": 29305 + }, + { + "epoch": 0.506393419961294, + "grad_norm": 0.7800813693932933, + "learning_rate": 1.027894910327629e-05, + "loss": 0.509, + "step": 29306 + }, + { + "epoch": 0.5064106994747027, + "grad_norm": 0.8014462826900602, + "learning_rate": 1.0278389671088027e-05, + "loss": 0.2713, + "step": 29307 + }, + { + "epoch": 0.5064279789881116, + "grad_norm": 1.078441826286471, + "learning_rate": 1.0277830238027824e-05, + "loss": 0.3415, + "step": 29308 + }, + { + "epoch": 0.5064452585015206, + "grad_norm": 1.0978515015947141, + "learning_rate": 1.0277270804097439e-05, + "loss": 0.5397, + "step": 29309 + }, + { + "epoch": 0.5064625380149295, + "grad_norm": 0.7467752402089637, + "learning_rate": 1.027671136929862e-05, + "loss": 0.4825, + "step": 29310 + }, + { + "epoch": 0.5064798175283384, + "grad_norm": 1.4342205762308295, + "learning_rate": 1.0276151933633119e-05, + "loss": 0.3239, + "step": 29311 + }, + { + "epoch": 0.5064970970417473, + "grad_norm": 1.3184867882840159, + "learning_rate": 1.0275592497102688e-05, + "loss": 0.4148, + "step": 29312 + }, + { + "epoch": 0.5065143765551562, + "grad_norm": 0.9831226039667481, + "learning_rate": 1.0275033059709083e-05, + "loss": 0.4349, + "step": 29313 + }, + { + "epoch": 0.5065316560685651, + "grad_norm": 0.9038530666802501, + "learning_rate": 1.027447362145405e-05, + "loss": 0.3761, + "step": 29314 + }, + { + "epoch": 0.506548935581974, + "grad_norm": 1.0720738099556688, + "learning_rate": 1.0273914182339348e-05, + "loss": 0.3794, + "step": 29315 + }, + { + "epoch": 0.5065662150953829, + "grad_norm": 1.024953004602435, + "learning_rate": 1.0273354742366726e-05, + "loss": 0.4281, + "step": 29316 + }, + { + "epoch": 0.5065834946087918, + "grad_norm": 1.0299162771095018, + "learning_rate": 1.0272795301537933e-05, + "loss": 0.3929, + "step": 29317 + }, + { + "epoch": 0.5066007741222007, + "grad_norm": 1.3262937846826945, + "learning_rate": 1.0272235859854726e-05, + "loss": 0.3146, + "step": 29318 + }, + { + "epoch": 0.5066180536356096, + "grad_norm": 1.3069251999576441, + "learning_rate": 1.0271676417318859e-05, + "loss": 0.2326, + "step": 29319 + }, + { + "epoch": 0.5066353331490185, + "grad_norm": 1.0866721740364849, + "learning_rate": 1.0271116973932075e-05, + "loss": 0.4184, + "step": 29320 + }, + { + "epoch": 0.5066526126624274, + "grad_norm": 1.17036022812901, + "learning_rate": 1.0270557529696137e-05, + "loss": 0.4078, + "step": 29321 + }, + { + "epoch": 0.5066698921758364, + "grad_norm": 1.1522845845189045, + "learning_rate": 1.0269998084612789e-05, + "loss": 0.3171, + "step": 29322 + }, + { + "epoch": 0.5066871716892453, + "grad_norm": 0.7664948439817236, + "learning_rate": 1.0269438638683789e-05, + "loss": 0.5203, + "step": 29323 + }, + { + "epoch": 0.5067044512026542, + "grad_norm": 1.550654529358824, + "learning_rate": 1.0268879191910886e-05, + "loss": 0.413, + "step": 29324 + }, + { + "epoch": 0.5067217307160631, + "grad_norm": 1.5603443090893008, + "learning_rate": 1.026831974429583e-05, + "loss": 0.3953, + "step": 29325 + }, + { + "epoch": 0.506739010229472, + "grad_norm": 0.8779409375426064, + "learning_rate": 1.026776029584038e-05, + "loss": 0.5015, + "step": 29326 + }, + { + "epoch": 0.5067562897428809, + "grad_norm": 0.6181031205927636, + "learning_rate": 1.0267200846546282e-05, + "loss": 0.3227, + "step": 29327 + }, + { + "epoch": 0.5067735692562897, + "grad_norm": 0.9856273731285703, + "learning_rate": 1.0266641396415288e-05, + "loss": 0.3531, + "step": 29328 + }, + { + "epoch": 0.5067908487696986, + "grad_norm": 1.4368069915458421, + "learning_rate": 1.0266081945449158e-05, + "loss": 0.2415, + "step": 29329 + }, + { + "epoch": 0.5068081282831075, + "grad_norm": 0.8627848941158671, + "learning_rate": 1.026552249364964e-05, + "loss": 0.6166, + "step": 29330 + }, + { + "epoch": 0.5068254077965164, + "grad_norm": 1.5457493470099462, + "learning_rate": 1.0264963041018483e-05, + "loss": 0.3598, + "step": 29331 + }, + { + "epoch": 0.5068426873099253, + "grad_norm": 1.1898139455290575, + "learning_rate": 1.026440358755744e-05, + "loss": 0.3605, + "step": 29332 + }, + { + "epoch": 0.5068599668233342, + "grad_norm": 1.4825466300042915, + "learning_rate": 1.0263844133268268e-05, + "loss": 0.4618, + "step": 29333 + }, + { + "epoch": 0.5068772463367431, + "grad_norm": 1.203733122445767, + "learning_rate": 1.0263284678152714e-05, + "loss": 0.4694, + "step": 29334 + }, + { + "epoch": 0.506894525850152, + "grad_norm": 1.0331545441260463, + "learning_rate": 1.0262725222212534e-05, + "loss": 0.4727, + "step": 29335 + }, + { + "epoch": 0.506911805363561, + "grad_norm": 1.0267119357890153, + "learning_rate": 1.0262165765449477e-05, + "loss": 0.4632, + "step": 29336 + }, + { + "epoch": 0.5069290848769699, + "grad_norm": 0.6660961598047512, + "learning_rate": 1.0261606307865295e-05, + "loss": 0.283, + "step": 29337 + }, + { + "epoch": 0.5069463643903788, + "grad_norm": 0.9728988496744473, + "learning_rate": 1.0261046849461744e-05, + "loss": 0.4586, + "step": 29338 + }, + { + "epoch": 0.5069636439037877, + "grad_norm": 1.5423842275252864, + "learning_rate": 1.0260487390240576e-05, + "loss": 0.4696, + "step": 29339 + }, + { + "epoch": 0.5069809234171966, + "grad_norm": 1.0073523803146587, + "learning_rate": 1.025992793020354e-05, + "loss": 0.5088, + "step": 29340 + }, + { + "epoch": 0.5069982029306055, + "grad_norm": 0.9330555019380748, + "learning_rate": 1.025936846935239e-05, + "loss": 0.441, + "step": 29341 + }, + { + "epoch": 0.5070154824440144, + "grad_norm": 0.7922456968920804, + "learning_rate": 1.0258809007688881e-05, + "loss": 0.4311, + "step": 29342 + }, + { + "epoch": 0.5070327619574233, + "grad_norm": 0.6937949481977183, + "learning_rate": 1.0258249545214761e-05, + "loss": 0.4374, + "step": 29343 + }, + { + "epoch": 0.5070500414708322, + "grad_norm": 0.546077357413136, + "learning_rate": 1.0257690081931784e-05, + "loss": 0.6704, + "step": 29344 + }, + { + "epoch": 0.5070673209842411, + "grad_norm": 0.41996431147402713, + "learning_rate": 1.02571306178417e-05, + "loss": 0.43, + "step": 29345 + }, + { + "epoch": 0.50708460049765, + "grad_norm": 1.2759733381017668, + "learning_rate": 1.0256571152946264e-05, + "loss": 0.4124, + "step": 29346 + }, + { + "epoch": 0.5071018800110589, + "grad_norm": 0.7887033832691634, + "learning_rate": 1.0256011687247229e-05, + "loss": 0.4533, + "step": 29347 + }, + { + "epoch": 0.5071191595244678, + "grad_norm": 1.3990202918732229, + "learning_rate": 1.0255452220746343e-05, + "loss": 0.3159, + "step": 29348 + }, + { + "epoch": 0.5071364390378766, + "grad_norm": 0.7039380815640388, + "learning_rate": 1.0254892753445365e-05, + "loss": 0.3309, + "step": 29349 + }, + { + "epoch": 0.5071537185512855, + "grad_norm": 1.1645324994499882, + "learning_rate": 1.0254333285346043e-05, + "loss": 0.5177, + "step": 29350 + }, + { + "epoch": 0.5071709980646945, + "grad_norm": 1.0786648904588463, + "learning_rate": 1.0253773816450126e-05, + "loss": 0.4264, + "step": 29351 + }, + { + "epoch": 0.5071882775781034, + "grad_norm": 0.8324786672126071, + "learning_rate": 1.0253214346759375e-05, + "loss": 0.4538, + "step": 29352 + }, + { + "epoch": 0.5072055570915123, + "grad_norm": 0.9736857758464873, + "learning_rate": 1.0252654876275537e-05, + "loss": 0.3344, + "step": 29353 + }, + { + "epoch": 0.5072228366049212, + "grad_norm": 0.8513395144057854, + "learning_rate": 1.0252095405000361e-05, + "loss": 0.3003, + "step": 29354 + }, + { + "epoch": 0.5072401161183301, + "grad_norm": 1.0561772192381558, + "learning_rate": 1.0251535932935606e-05, + "loss": 0.4323, + "step": 29355 + }, + { + "epoch": 0.507257395631739, + "grad_norm": 0.8084604461809738, + "learning_rate": 1.0250976460083021e-05, + "loss": 0.301, + "step": 29356 + }, + { + "epoch": 0.5072746751451479, + "grad_norm": 0.8646362906816555, + "learning_rate": 1.0250416986444357e-05, + "loss": 0.3281, + "step": 29357 + }, + { + "epoch": 0.5072919546585568, + "grad_norm": 0.8704379696942034, + "learning_rate": 1.0249857512021368e-05, + "loss": 0.5281, + "step": 29358 + }, + { + "epoch": 0.5073092341719657, + "grad_norm": 1.0838466756350582, + "learning_rate": 1.0249298036815809e-05, + "loss": 0.5604, + "step": 29359 + }, + { + "epoch": 0.5073265136853746, + "grad_norm": 1.6851974174824904, + "learning_rate": 1.0248738560829425e-05, + "loss": 0.525, + "step": 29360 + }, + { + "epoch": 0.5073437931987835, + "grad_norm": 1.397598170366707, + "learning_rate": 1.0248179084063978e-05, + "loss": 0.5315, + "step": 29361 + }, + { + "epoch": 0.5073610727121924, + "grad_norm": 1.17163798643181, + "learning_rate": 1.0247619606521213e-05, + "loss": 0.3178, + "step": 29362 + }, + { + "epoch": 0.5073783522256013, + "grad_norm": 1.275654829665167, + "learning_rate": 1.0247060128202886e-05, + "loss": 0.4616, + "step": 29363 + }, + { + "epoch": 0.5073956317390103, + "grad_norm": 1.2498853276866566, + "learning_rate": 1.0246500649110747e-05, + "loss": 0.3679, + "step": 29364 + }, + { + "epoch": 0.5074129112524192, + "grad_norm": 0.7465088153160996, + "learning_rate": 1.0245941169246548e-05, + "loss": 0.4099, + "step": 29365 + }, + { + "epoch": 0.5074301907658281, + "grad_norm": 0.988563666891379, + "learning_rate": 1.0245381688612045e-05, + "loss": 0.6195, + "step": 29366 + }, + { + "epoch": 0.507447470279237, + "grad_norm": 1.1495288495366602, + "learning_rate": 1.0244822207208985e-05, + "loss": 0.4367, + "step": 29367 + }, + { + "epoch": 0.5074647497926459, + "grad_norm": 0.9298069822751305, + "learning_rate": 1.0244262725039122e-05, + "loss": 0.3482, + "step": 29368 + }, + { + "epoch": 0.5074820293060548, + "grad_norm": 0.9894806361033849, + "learning_rate": 1.0243703242104213e-05, + "loss": 0.3603, + "step": 29369 + }, + { + "epoch": 0.5074993088194636, + "grad_norm": 0.5729119710197639, + "learning_rate": 1.0243143758406007e-05, + "loss": 0.3645, + "step": 29370 + }, + { + "epoch": 0.5075165883328725, + "grad_norm": 1.0810412717821696, + "learning_rate": 1.0242584273946254e-05, + "loss": 0.3067, + "step": 29371 + }, + { + "epoch": 0.5075338678462814, + "grad_norm": 0.5533785790275544, + "learning_rate": 1.0242024788726713e-05, + "loss": 0.7446, + "step": 29372 + }, + { + "epoch": 0.5075511473596903, + "grad_norm": 1.1870929654175333, + "learning_rate": 1.024146530274913e-05, + "loss": 0.3904, + "step": 29373 + }, + { + "epoch": 0.5075684268730992, + "grad_norm": 0.808144334024686, + "learning_rate": 1.0240905816015257e-05, + "loss": 0.2615, + "step": 29374 + }, + { + "epoch": 0.5075857063865081, + "grad_norm": 1.1528151451710766, + "learning_rate": 1.024034632852685e-05, + "loss": 0.5363, + "step": 29375 + }, + { + "epoch": 0.507602985899917, + "grad_norm": 1.0527712551606305, + "learning_rate": 1.0239786840285659e-05, + "loss": 0.4276, + "step": 29376 + }, + { + "epoch": 0.5076202654133259, + "grad_norm": 0.9660774700806827, + "learning_rate": 1.0239227351293438e-05, + "loss": 0.463, + "step": 29377 + }, + { + "epoch": 0.5076375449267349, + "grad_norm": 1.3062531334977932, + "learning_rate": 1.0238667861551938e-05, + "loss": 0.4328, + "step": 29378 + }, + { + "epoch": 0.5076548244401438, + "grad_norm": 0.9554920383837828, + "learning_rate": 1.0238108371062914e-05, + "loss": 0.5697, + "step": 29379 + }, + { + "epoch": 0.5076721039535527, + "grad_norm": 0.5536834241781663, + "learning_rate": 1.0237548879828114e-05, + "loss": 0.7011, + "step": 29380 + }, + { + "epoch": 0.5076893834669616, + "grad_norm": 0.8071642543185876, + "learning_rate": 1.0236989387849297e-05, + "loss": 0.5632, + "step": 29381 + }, + { + "epoch": 0.5077066629803705, + "grad_norm": 0.7230672802659937, + "learning_rate": 1.0236429895128208e-05, + "loss": 0.2956, + "step": 29382 + }, + { + "epoch": 0.5077239424937794, + "grad_norm": 0.9882427435962797, + "learning_rate": 1.0235870401666602e-05, + "loss": 0.3361, + "step": 29383 + }, + { + "epoch": 0.5077412220071883, + "grad_norm": 1.2291929538755155, + "learning_rate": 1.0235310907466237e-05, + "loss": 0.5709, + "step": 29384 + }, + { + "epoch": 0.5077585015205972, + "grad_norm": 0.9819657905347621, + "learning_rate": 1.0234751412528854e-05, + "loss": 0.3318, + "step": 29385 + }, + { + "epoch": 0.5077757810340061, + "grad_norm": 0.6014760894789185, + "learning_rate": 1.0234191916856216e-05, + "loss": 0.5496, + "step": 29386 + }, + { + "epoch": 0.507793060547415, + "grad_norm": 1.3563817037359454, + "learning_rate": 1.0233632420450068e-05, + "loss": 0.3107, + "step": 29387 + }, + { + "epoch": 0.5078103400608239, + "grad_norm": 0.840226429967866, + "learning_rate": 1.0233072923312166e-05, + "loss": 0.4478, + "step": 29388 + }, + { + "epoch": 0.5078276195742328, + "grad_norm": 0.8674396332343262, + "learning_rate": 1.0232513425444264e-05, + "loss": 0.4184, + "step": 29389 + }, + { + "epoch": 0.5078448990876417, + "grad_norm": 0.9046717542453145, + "learning_rate": 1.0231953926848112e-05, + "loss": 0.4215, + "step": 29390 + }, + { + "epoch": 0.5078621786010505, + "grad_norm": 1.1885811038705747, + "learning_rate": 1.023139442752546e-05, + "loss": 0.4064, + "step": 29391 + }, + { + "epoch": 0.5078794581144594, + "grad_norm": 0.8004529825414045, + "learning_rate": 1.0230834927478067e-05, + "loss": 0.4181, + "step": 29392 + }, + { + "epoch": 0.5078967376278684, + "grad_norm": 0.8716152928257251, + "learning_rate": 1.0230275426707681e-05, + "loss": 0.445, + "step": 29393 + }, + { + "epoch": 0.5079140171412773, + "grad_norm": 0.5633244770940903, + "learning_rate": 1.022971592521605e-05, + "loss": 0.6056, + "step": 29394 + }, + { + "epoch": 0.5079312966546862, + "grad_norm": 1.0215406745017435, + "learning_rate": 1.0229156423004935e-05, + "loss": 0.476, + "step": 29395 + }, + { + "epoch": 0.5079485761680951, + "grad_norm": 1.28677754039191, + "learning_rate": 1.0228596920076086e-05, + "loss": 0.5301, + "step": 29396 + }, + { + "epoch": 0.507965855681504, + "grad_norm": 1.0938630396573763, + "learning_rate": 1.0228037416431252e-05, + "loss": 0.7091, + "step": 29397 + }, + { + "epoch": 0.5079831351949129, + "grad_norm": 0.932485057789518, + "learning_rate": 1.0227477912072188e-05, + "loss": 0.5709, + "step": 29398 + }, + { + "epoch": 0.5080004147083218, + "grad_norm": 0.7103911116992183, + "learning_rate": 1.0226918407000645e-05, + "loss": 0.505, + "step": 29399 + }, + { + "epoch": 0.5080176942217307, + "grad_norm": 1.1175347441253807, + "learning_rate": 1.0226358901218376e-05, + "loss": 0.536, + "step": 29400 + }, + { + "epoch": 0.5080349737351396, + "grad_norm": 0.8470055594069765, + "learning_rate": 1.0225799394727137e-05, + "loss": 0.317, + "step": 29401 + }, + { + "epoch": 0.5080522532485485, + "grad_norm": 0.7994960897292156, + "learning_rate": 1.0225239887528673e-05, + "loss": 0.3803, + "step": 29402 + }, + { + "epoch": 0.5080695327619574, + "grad_norm": 1.1341937341466413, + "learning_rate": 1.0224680379624743e-05, + "loss": 0.451, + "step": 29403 + }, + { + "epoch": 0.5080868122753663, + "grad_norm": 1.0126824196654975, + "learning_rate": 1.0224120871017097e-05, + "loss": 0.3876, + "step": 29404 + }, + { + "epoch": 0.5081040917887752, + "grad_norm": 0.9588707323598018, + "learning_rate": 1.0223561361707487e-05, + "loss": 0.5066, + "step": 29405 + }, + { + "epoch": 0.5081213713021842, + "grad_norm": 0.8831097717742361, + "learning_rate": 1.0223001851697664e-05, + "loss": 0.3282, + "step": 29406 + }, + { + "epoch": 0.5081386508155931, + "grad_norm": 0.861094684158708, + "learning_rate": 1.0222442340989385e-05, + "loss": 0.4268, + "step": 29407 + }, + { + "epoch": 0.508155930329002, + "grad_norm": 1.3014293621335293, + "learning_rate": 1.0221882829584397e-05, + "loss": 0.393, + "step": 29408 + }, + { + "epoch": 0.5081732098424109, + "grad_norm": 1.386629713054595, + "learning_rate": 1.0221323317484457e-05, + "loss": 0.4495, + "step": 29409 + }, + { + "epoch": 0.5081904893558198, + "grad_norm": 0.7614742915836274, + "learning_rate": 1.0220763804691316e-05, + "loss": 0.2653, + "step": 29410 + }, + { + "epoch": 0.5082077688692287, + "grad_norm": 0.8528637538292814, + "learning_rate": 1.0220204291206724e-05, + "loss": 0.5442, + "step": 29411 + }, + { + "epoch": 0.5082250483826375, + "grad_norm": 0.9322335883891499, + "learning_rate": 1.0219644777032435e-05, + "loss": 0.3896, + "step": 29412 + }, + { + "epoch": 0.5082423278960464, + "grad_norm": 0.4795249229042051, + "learning_rate": 1.0219085262170205e-05, + "loss": 0.833, + "step": 29413 + }, + { + "epoch": 0.5082596074094553, + "grad_norm": 1.0625646402046471, + "learning_rate": 1.0218525746621783e-05, + "loss": 0.5724, + "step": 29414 + }, + { + "epoch": 0.5082768869228642, + "grad_norm": 1.0409495478102424, + "learning_rate": 1.0217966230388921e-05, + "loss": 0.4042, + "step": 29415 + }, + { + "epoch": 0.5082941664362731, + "grad_norm": 0.6697101583878118, + "learning_rate": 1.0217406713473373e-05, + "loss": 0.617, + "step": 29416 + }, + { + "epoch": 0.508311445949682, + "grad_norm": 1.0682456640610454, + "learning_rate": 1.0216847195876886e-05, + "loss": 0.4688, + "step": 29417 + }, + { + "epoch": 0.5083287254630909, + "grad_norm": 0.8878495226936001, + "learning_rate": 1.0216287677601221e-05, + "loss": 0.4344, + "step": 29418 + }, + { + "epoch": 0.5083460049764998, + "grad_norm": 0.9388068013114427, + "learning_rate": 1.0215728158648129e-05, + "loss": 0.5595, + "step": 29419 + }, + { + "epoch": 0.5083632844899088, + "grad_norm": 1.5988737513285436, + "learning_rate": 1.0215168639019354e-05, + "loss": 0.4881, + "step": 29420 + }, + { + "epoch": 0.5083805640033177, + "grad_norm": 1.5108619369735279, + "learning_rate": 1.0214609118716658e-05, + "loss": 0.3946, + "step": 29421 + }, + { + "epoch": 0.5083978435167266, + "grad_norm": 1.56229451193871, + "learning_rate": 1.0214049597741791e-05, + "loss": 0.4598, + "step": 29422 + }, + { + "epoch": 0.5084151230301355, + "grad_norm": 1.2969862856106118, + "learning_rate": 1.0213490076096502e-05, + "loss": 0.5298, + "step": 29423 + }, + { + "epoch": 0.5084324025435444, + "grad_norm": 0.9516088552357466, + "learning_rate": 1.0212930553782547e-05, + "loss": 0.4111, + "step": 29424 + }, + { + "epoch": 0.5084496820569533, + "grad_norm": 0.9878377633215432, + "learning_rate": 1.0212371030801677e-05, + "loss": 0.6264, + "step": 29425 + }, + { + "epoch": 0.5084669615703622, + "grad_norm": 0.9109479810922497, + "learning_rate": 1.0211811507155644e-05, + "loss": 0.4809, + "step": 29426 + }, + { + "epoch": 0.5084842410837711, + "grad_norm": 0.7284805952271387, + "learning_rate": 1.0211251982846204e-05, + "loss": 0.309, + "step": 29427 + }, + { + "epoch": 0.50850152059718, + "grad_norm": 1.4286908307691766, + "learning_rate": 1.0210692457875101e-05, + "loss": 0.4574, + "step": 29428 + }, + { + "epoch": 0.5085188001105889, + "grad_norm": 0.6590386648166756, + "learning_rate": 1.0210132932244099e-05, + "loss": 0.3808, + "step": 29429 + }, + { + "epoch": 0.5085360796239978, + "grad_norm": 1.3046291213063002, + "learning_rate": 1.0209573405954944e-05, + "loss": 0.3441, + "step": 29430 + }, + { + "epoch": 0.5085533591374067, + "grad_norm": 0.8502479803459242, + "learning_rate": 1.0209013879009389e-05, + "loss": 0.5241, + "step": 29431 + }, + { + "epoch": 0.5085706386508156, + "grad_norm": 0.7816502103311949, + "learning_rate": 1.0208454351409182e-05, + "loss": 0.3082, + "step": 29432 + }, + { + "epoch": 0.5085879181642246, + "grad_norm": 1.1765733565601646, + "learning_rate": 1.0207894823156086e-05, + "loss": 0.5486, + "step": 29433 + }, + { + "epoch": 0.5086051976776333, + "grad_norm": 1.2816205187629215, + "learning_rate": 1.0207335294251845e-05, + "loss": 0.4558, + "step": 29434 + }, + { + "epoch": 0.5086224771910423, + "grad_norm": 0.992795383264893, + "learning_rate": 1.0206775764698216e-05, + "loss": 0.3386, + "step": 29435 + }, + { + "epoch": 0.5086397567044512, + "grad_norm": 0.8252828111824106, + "learning_rate": 1.020621623449695e-05, + "loss": 0.5579, + "step": 29436 + }, + { + "epoch": 0.5086570362178601, + "grad_norm": 0.8861176965009739, + "learning_rate": 1.0205656703649793e-05, + "loss": 0.3767, + "step": 29437 + }, + { + "epoch": 0.508674315731269, + "grad_norm": 0.8588234195877816, + "learning_rate": 1.020509717215851e-05, + "loss": 0.3978, + "step": 29438 + }, + { + "epoch": 0.5086915952446779, + "grad_norm": 0.5235582638555332, + "learning_rate": 1.0204537640024842e-05, + "loss": 0.6511, + "step": 29439 + }, + { + "epoch": 0.5087088747580868, + "grad_norm": 2.03957067298794, + "learning_rate": 1.020397810725055e-05, + "loss": 0.3273, + "step": 29440 + }, + { + "epoch": 0.5087261542714957, + "grad_norm": 0.838209158350392, + "learning_rate": 1.0203418573837382e-05, + "loss": 0.3962, + "step": 29441 + }, + { + "epoch": 0.5087434337849046, + "grad_norm": 0.7646330978694929, + "learning_rate": 1.0202859039787092e-05, + "loss": 0.4073, + "step": 29442 + }, + { + "epoch": 0.5087607132983135, + "grad_norm": 0.6325528027877189, + "learning_rate": 1.0202299505101431e-05, + "loss": 0.391, + "step": 29443 + }, + { + "epoch": 0.5087779928117224, + "grad_norm": 1.063212673217994, + "learning_rate": 1.0201739969782154e-05, + "loss": 0.5702, + "step": 29444 + }, + { + "epoch": 0.5087952723251313, + "grad_norm": 0.9750459704151939, + "learning_rate": 1.0201180433831012e-05, + "loss": 0.4103, + "step": 29445 + }, + { + "epoch": 0.5088125518385402, + "grad_norm": 1.0462924894471246, + "learning_rate": 1.0200620897249758e-05, + "loss": 0.3791, + "step": 29446 + }, + { + "epoch": 0.5088298313519491, + "grad_norm": 1.764912872258038, + "learning_rate": 1.0200061360040144e-05, + "loss": 0.5246, + "step": 29447 + }, + { + "epoch": 0.508847110865358, + "grad_norm": 0.4670026694621662, + "learning_rate": 1.0199501822203922e-05, + "loss": 0.5252, + "step": 29448 + }, + { + "epoch": 0.508864390378767, + "grad_norm": 1.3805636700016326, + "learning_rate": 1.0198942283742844e-05, + "loss": 0.53, + "step": 29449 + }, + { + "epoch": 0.5088816698921759, + "grad_norm": 1.1294826060613181, + "learning_rate": 1.0198382744658666e-05, + "loss": 0.5722, + "step": 29450 + }, + { + "epoch": 0.5088989494055848, + "grad_norm": 1.0013428179427004, + "learning_rate": 1.0197823204953137e-05, + "loss": 0.5228, + "step": 29451 + }, + { + "epoch": 0.5089162289189937, + "grad_norm": 1.037660759667066, + "learning_rate": 1.0197263664628007e-05, + "loss": 0.4828, + "step": 29452 + }, + { + "epoch": 0.5089335084324026, + "grad_norm": 0.7730507102086098, + "learning_rate": 1.0196704123685038e-05, + "loss": 0.4648, + "step": 29453 + }, + { + "epoch": 0.5089507879458115, + "grad_norm": 0.440086025838891, + "learning_rate": 1.0196144582125976e-05, + "loss": 0.8621, + "step": 29454 + }, + { + "epoch": 0.5089680674592203, + "grad_norm": 1.2781258422898079, + "learning_rate": 1.019558503995257e-05, + "loss": 0.6403, + "step": 29455 + }, + { + "epoch": 0.5089853469726292, + "grad_norm": 1.2286828868677129, + "learning_rate": 1.0195025497166581e-05, + "loss": 0.386, + "step": 29456 + }, + { + "epoch": 0.5090026264860381, + "grad_norm": 1.6823614746379123, + "learning_rate": 1.0194465953769757e-05, + "loss": 0.2904, + "step": 29457 + }, + { + "epoch": 0.509019905999447, + "grad_norm": 1.0367518671514602, + "learning_rate": 1.0193906409763849e-05, + "loss": 0.3643, + "step": 29458 + }, + { + "epoch": 0.5090371855128559, + "grad_norm": 0.993705477014239, + "learning_rate": 1.0193346865150613e-05, + "loss": 0.5586, + "step": 29459 + }, + { + "epoch": 0.5090544650262648, + "grad_norm": 1.2151576978430085, + "learning_rate": 1.0192787319931796e-05, + "loss": 0.4534, + "step": 29460 + }, + { + "epoch": 0.5090717445396737, + "grad_norm": 0.846151746198822, + "learning_rate": 1.019222777410916e-05, + "loss": 0.5577, + "step": 29461 + }, + { + "epoch": 0.5090890240530827, + "grad_norm": 1.1121494157712997, + "learning_rate": 1.019166822768445e-05, + "loss": 0.4298, + "step": 29462 + }, + { + "epoch": 0.5091063035664916, + "grad_norm": 0.8140373794690616, + "learning_rate": 1.0191108680659418e-05, + "loss": 0.4764, + "step": 29463 + }, + { + "epoch": 0.5091235830799005, + "grad_norm": 1.2032134598971573, + "learning_rate": 1.019054913303582e-05, + "loss": 0.4297, + "step": 29464 + }, + { + "epoch": 0.5091408625933094, + "grad_norm": 0.5644570692811779, + "learning_rate": 1.0189989584815412e-05, + "loss": 0.3566, + "step": 29465 + }, + { + "epoch": 0.5091581421067183, + "grad_norm": 0.9008570271135892, + "learning_rate": 1.0189430035999939e-05, + "loss": 0.3387, + "step": 29466 + }, + { + "epoch": 0.5091754216201272, + "grad_norm": 0.5241227443942176, + "learning_rate": 1.0188870486591156e-05, + "loss": 0.76, + "step": 29467 + }, + { + "epoch": 0.5091927011335361, + "grad_norm": 1.0713747845338566, + "learning_rate": 1.0188310936590816e-05, + "loss": 0.5283, + "step": 29468 + }, + { + "epoch": 0.509209980646945, + "grad_norm": 1.0475803461342745, + "learning_rate": 1.0187751386000673e-05, + "loss": 0.4931, + "step": 29469 + }, + { + "epoch": 0.5092272601603539, + "grad_norm": 0.9520553747047461, + "learning_rate": 1.0187191834822478e-05, + "loss": 0.5468, + "step": 29470 + }, + { + "epoch": 0.5092445396737628, + "grad_norm": 1.1169478666253425, + "learning_rate": 1.0186632283057987e-05, + "loss": 0.3907, + "step": 29471 + }, + { + "epoch": 0.5092618191871717, + "grad_norm": 1.1113054231685338, + "learning_rate": 1.0186072730708944e-05, + "loss": 0.3184, + "step": 29472 + }, + { + "epoch": 0.5092790987005806, + "grad_norm": 0.9519053468654927, + "learning_rate": 1.0185513177777111e-05, + "loss": 0.4555, + "step": 29473 + }, + { + "epoch": 0.5092963782139895, + "grad_norm": 0.954500885675706, + "learning_rate": 1.0184953624264236e-05, + "loss": 0.3803, + "step": 29474 + }, + { + "epoch": 0.5093136577273984, + "grad_norm": 0.8967820145372318, + "learning_rate": 1.0184394070172072e-05, + "loss": 0.4198, + "step": 29475 + }, + { + "epoch": 0.5093309372408072, + "grad_norm": 0.9930951450355614, + "learning_rate": 1.0183834515502372e-05, + "loss": 0.3958, + "step": 29476 + }, + { + "epoch": 0.5093482167542162, + "grad_norm": 0.7621335463087706, + "learning_rate": 1.0183274960256887e-05, + "loss": 0.3162, + "step": 29477 + }, + { + "epoch": 0.5093654962676251, + "grad_norm": 1.0538006225095802, + "learning_rate": 1.0182715404437371e-05, + "loss": 0.2859, + "step": 29478 + }, + { + "epoch": 0.509382775781034, + "grad_norm": 1.0077777991741537, + "learning_rate": 1.0182155848045576e-05, + "loss": 0.3583, + "step": 29479 + }, + { + "epoch": 0.5094000552944429, + "grad_norm": 1.6892229636410412, + "learning_rate": 1.0181596291083255e-05, + "loss": 0.5799, + "step": 29480 + }, + { + "epoch": 0.5094173348078518, + "grad_norm": 0.9761165191021755, + "learning_rate": 1.0181036733552164e-05, + "loss": 0.5167, + "step": 29481 + }, + { + "epoch": 0.5094346143212607, + "grad_norm": 0.8238103555522375, + "learning_rate": 1.018047717545405e-05, + "loss": 0.2682, + "step": 29482 + }, + { + "epoch": 0.5094518938346696, + "grad_norm": 0.8334003188234561, + "learning_rate": 1.0179917616790665e-05, + "loss": 0.3398, + "step": 29483 + }, + { + "epoch": 0.5094691733480785, + "grad_norm": 1.4085834872285599, + "learning_rate": 1.0179358057563766e-05, + "loss": 0.495, + "step": 29484 + }, + { + "epoch": 0.5094864528614874, + "grad_norm": 0.8424060530675771, + "learning_rate": 1.0178798497775106e-05, + "loss": 0.6133, + "step": 29485 + }, + { + "epoch": 0.5095037323748963, + "grad_norm": 1.087888903966011, + "learning_rate": 1.0178238937426437e-05, + "loss": 0.4421, + "step": 29486 + }, + { + "epoch": 0.5095210118883052, + "grad_norm": 1.4055341811766615, + "learning_rate": 1.0177679376519506e-05, + "loss": 0.416, + "step": 29487 + }, + { + "epoch": 0.5095382914017141, + "grad_norm": 1.1519958179460912, + "learning_rate": 1.0177119815056071e-05, + "loss": 0.3556, + "step": 29488 + }, + { + "epoch": 0.509555570915123, + "grad_norm": 0.7114944155365327, + "learning_rate": 1.0176560253037883e-05, + "loss": 0.339, + "step": 29489 + }, + { + "epoch": 0.509572850428532, + "grad_norm": 1.4900000601800663, + "learning_rate": 1.0176000690466697e-05, + "loss": 0.3903, + "step": 29490 + }, + { + "epoch": 0.5095901299419409, + "grad_norm": 0.8021027259314235, + "learning_rate": 1.0175441127344261e-05, + "loss": 0.405, + "step": 29491 + }, + { + "epoch": 0.5096074094553498, + "grad_norm": 0.9240321534676279, + "learning_rate": 1.017488156367233e-05, + "loss": 0.4468, + "step": 29492 + }, + { + "epoch": 0.5096246889687587, + "grad_norm": 0.6316278922943709, + "learning_rate": 1.017432199945266e-05, + "loss": 0.4784, + "step": 29493 + }, + { + "epoch": 0.5096419684821676, + "grad_norm": 0.4168522113782502, + "learning_rate": 1.0173762434686995e-05, + "loss": 0.5917, + "step": 29494 + }, + { + "epoch": 0.5096592479955765, + "grad_norm": 1.0264471640689563, + "learning_rate": 1.0173202869377095e-05, + "loss": 0.3258, + "step": 29495 + }, + { + "epoch": 0.5096765275089854, + "grad_norm": 0.7352306984041663, + "learning_rate": 1.0172643303524715e-05, + "loss": 0.5804, + "step": 29496 + }, + { + "epoch": 0.5096938070223942, + "grad_norm": 0.6291111641527953, + "learning_rate": 1.0172083737131597e-05, + "loss": 0.7867, + "step": 29497 + }, + { + "epoch": 0.5097110865358031, + "grad_norm": 0.7789750697950334, + "learning_rate": 1.0171524170199502e-05, + "loss": 0.2778, + "step": 29498 + }, + { + "epoch": 0.509728366049212, + "grad_norm": 1.0093225015753655, + "learning_rate": 1.017096460273018e-05, + "loss": 0.4935, + "step": 29499 + }, + { + "epoch": 0.5097456455626209, + "grad_norm": 1.6113195792931707, + "learning_rate": 1.0170405034725383e-05, + "loss": 0.4906, + "step": 29500 + }, + { + "epoch": 0.5097629250760298, + "grad_norm": 1.066518616748197, + "learning_rate": 1.0169845466186865e-05, + "loss": 0.5668, + "step": 29501 + }, + { + "epoch": 0.5097802045894387, + "grad_norm": 0.6675078278884264, + "learning_rate": 1.016928589711638e-05, + "loss": 0.4096, + "step": 29502 + }, + { + "epoch": 0.5097974841028476, + "grad_norm": 1.0351628380956859, + "learning_rate": 1.0168726327515676e-05, + "loss": 0.3438, + "step": 29503 + }, + { + "epoch": 0.5098147636162565, + "grad_norm": 1.254380091891175, + "learning_rate": 1.0168166757386511e-05, + "loss": 0.3418, + "step": 29504 + }, + { + "epoch": 0.5098320431296655, + "grad_norm": 0.6385768109287506, + "learning_rate": 1.0167607186730632e-05, + "loss": 0.2295, + "step": 29505 + }, + { + "epoch": 0.5098493226430744, + "grad_norm": 0.7752430097628606, + "learning_rate": 1.0167047615549796e-05, + "loss": 0.6588, + "step": 29506 + }, + { + "epoch": 0.5098666021564833, + "grad_norm": 0.7438465483950422, + "learning_rate": 1.0166488043845756e-05, + "loss": 0.3926, + "step": 29507 + }, + { + "epoch": 0.5098838816698922, + "grad_norm": 1.1475734983467434, + "learning_rate": 1.016592847162026e-05, + "loss": 0.5113, + "step": 29508 + }, + { + "epoch": 0.5099011611833011, + "grad_norm": 1.1267790126312285, + "learning_rate": 1.0165368898875065e-05, + "loss": 0.4252, + "step": 29509 + }, + { + "epoch": 0.50991844069671, + "grad_norm": 1.2046407604763834, + "learning_rate": 1.0164809325611922e-05, + "loss": 0.3948, + "step": 29510 + }, + { + "epoch": 0.5099357202101189, + "grad_norm": 0.8036300457692505, + "learning_rate": 1.0164249751832582e-05, + "loss": 0.5587, + "step": 29511 + }, + { + "epoch": 0.5099529997235278, + "grad_norm": 1.0116657081179596, + "learning_rate": 1.01636901775388e-05, + "loss": 0.3908, + "step": 29512 + }, + { + "epoch": 0.5099702792369367, + "grad_norm": 0.9631167536086734, + "learning_rate": 1.016313060273233e-05, + "loss": 0.4495, + "step": 29513 + }, + { + "epoch": 0.5099875587503456, + "grad_norm": 0.6921435755952307, + "learning_rate": 1.016257102741492e-05, + "loss": 0.3149, + "step": 29514 + }, + { + "epoch": 0.5100048382637545, + "grad_norm": 1.0593508896108856, + "learning_rate": 1.0162011451588325e-05, + "loss": 0.3069, + "step": 29515 + }, + { + "epoch": 0.5100221177771634, + "grad_norm": 0.9118798284606574, + "learning_rate": 1.01614518752543e-05, + "loss": 0.4066, + "step": 29516 + }, + { + "epoch": 0.5100393972905723, + "grad_norm": 0.9171926812207564, + "learning_rate": 1.0160892298414592e-05, + "loss": 0.5065, + "step": 29517 + }, + { + "epoch": 0.5100566768039811, + "grad_norm": 1.0551885127756047, + "learning_rate": 1.0160332721070963e-05, + "loss": 0.3817, + "step": 29518 + }, + { + "epoch": 0.51007395631739, + "grad_norm": 1.0351064747772523, + "learning_rate": 1.0159773143225155e-05, + "loss": 0.2646, + "step": 29519 + }, + { + "epoch": 0.510091235830799, + "grad_norm": 1.5174555331826622, + "learning_rate": 1.0159213564878925e-05, + "loss": 0.4108, + "step": 29520 + }, + { + "epoch": 0.5101085153442079, + "grad_norm": 1.0470199936984586, + "learning_rate": 1.0158653986034028e-05, + "loss": 0.425, + "step": 29521 + }, + { + "epoch": 0.5101257948576168, + "grad_norm": 1.5100494715812631, + "learning_rate": 1.0158094406692216e-05, + "loss": 0.5319, + "step": 29522 + }, + { + "epoch": 0.5101430743710257, + "grad_norm": 0.9403016101330229, + "learning_rate": 1.0157534826855237e-05, + "loss": 0.401, + "step": 29523 + }, + { + "epoch": 0.5101603538844346, + "grad_norm": 1.2299957911922812, + "learning_rate": 1.015697524652485e-05, + "loss": 0.567, + "step": 29524 + }, + { + "epoch": 0.5101776333978435, + "grad_norm": 0.8093217434757126, + "learning_rate": 1.0156415665702801e-05, + "loss": 0.4185, + "step": 29525 + }, + { + "epoch": 0.5101949129112524, + "grad_norm": 0.9193293611082906, + "learning_rate": 1.0155856084390846e-05, + "loss": 0.4707, + "step": 29526 + }, + { + "epoch": 0.5102121924246613, + "grad_norm": 0.8808685888404062, + "learning_rate": 1.0155296502590746e-05, + "loss": 0.4262, + "step": 29527 + }, + { + "epoch": 0.5102294719380702, + "grad_norm": 1.2546350127739816, + "learning_rate": 1.0154736920304238e-05, + "loss": 0.3896, + "step": 29528 + }, + { + "epoch": 0.5102467514514791, + "grad_norm": 0.6522624074443643, + "learning_rate": 1.0154177337533084e-05, + "loss": 0.3763, + "step": 29529 + }, + { + "epoch": 0.510264030964888, + "grad_norm": 0.8150814811388895, + "learning_rate": 1.0153617754279035e-05, + "loss": 0.3364, + "step": 29530 + }, + { + "epoch": 0.510281310478297, + "grad_norm": 1.0960754432061073, + "learning_rate": 1.0153058170543844e-05, + "loss": 0.3333, + "step": 29531 + }, + { + "epoch": 0.5102985899917059, + "grad_norm": 1.1593154982821023, + "learning_rate": 1.015249858632926e-05, + "loss": 0.3571, + "step": 29532 + }, + { + "epoch": 0.5103158695051148, + "grad_norm": 1.0466011902245456, + "learning_rate": 1.0151939001637044e-05, + "loss": 0.2793, + "step": 29533 + }, + { + "epoch": 0.5103331490185237, + "grad_norm": 0.6974882161716632, + "learning_rate": 1.0151379416468941e-05, + "loss": 0.3592, + "step": 29534 + }, + { + "epoch": 0.5103504285319326, + "grad_norm": 1.3741811982416607, + "learning_rate": 1.0150819830826703e-05, + "loss": 0.6759, + "step": 29535 + }, + { + "epoch": 0.5103677080453415, + "grad_norm": 0.9391711558494369, + "learning_rate": 1.015026024471209e-05, + "loss": 0.3435, + "step": 29536 + }, + { + "epoch": 0.5103849875587504, + "grad_norm": 0.7793972663968466, + "learning_rate": 1.0149700658126852e-05, + "loss": 0.3447, + "step": 29537 + }, + { + "epoch": 0.5104022670721593, + "grad_norm": 1.0257602542410102, + "learning_rate": 1.0149141071072737e-05, + "loss": 0.338, + "step": 29538 + }, + { + "epoch": 0.5104195465855681, + "grad_norm": 0.8185549001552169, + "learning_rate": 1.01485814835515e-05, + "loss": 0.4454, + "step": 29539 + }, + { + "epoch": 0.510436826098977, + "grad_norm": 0.9156863227646609, + "learning_rate": 1.0148021895564895e-05, + "loss": 0.5332, + "step": 29540 + }, + { + "epoch": 0.5104541056123859, + "grad_norm": 0.8082896845418178, + "learning_rate": 1.0147462307114675e-05, + "loss": 0.289, + "step": 29541 + }, + { + "epoch": 0.5104713851257948, + "grad_norm": 1.4717305268687786, + "learning_rate": 1.0146902718202593e-05, + "loss": 0.574, + "step": 29542 + }, + { + "epoch": 0.5104886646392037, + "grad_norm": 0.8865392045134753, + "learning_rate": 1.0146343128830398e-05, + "loss": 0.5019, + "step": 29543 + }, + { + "epoch": 0.5105059441526126, + "grad_norm": 0.8860528308887871, + "learning_rate": 1.0145783538999847e-05, + "loss": 0.3059, + "step": 29544 + }, + { + "epoch": 0.5105232236660215, + "grad_norm": 1.3021295315790498, + "learning_rate": 1.014522394871269e-05, + "loss": 0.3852, + "step": 29545 + }, + { + "epoch": 0.5105405031794304, + "grad_norm": 0.9828915006763961, + "learning_rate": 1.014466435797068e-05, + "loss": 0.4688, + "step": 29546 + }, + { + "epoch": 0.5105577826928394, + "grad_norm": 0.8444404079832899, + "learning_rate": 1.0144104766775574e-05, + "loss": 0.3183, + "step": 29547 + }, + { + "epoch": 0.5105750622062483, + "grad_norm": 1.0879184393455048, + "learning_rate": 1.0143545175129117e-05, + "loss": 0.5663, + "step": 29548 + }, + { + "epoch": 0.5105923417196572, + "grad_norm": 0.935203943941202, + "learning_rate": 1.0142985583033067e-05, + "loss": 0.4517, + "step": 29549 + }, + { + "epoch": 0.5106096212330661, + "grad_norm": 0.8837531012025803, + "learning_rate": 1.0142425990489176e-05, + "loss": 0.2907, + "step": 29550 + }, + { + "epoch": 0.510626900746475, + "grad_norm": 1.1205710686703219, + "learning_rate": 1.0141866397499197e-05, + "loss": 0.3855, + "step": 29551 + }, + { + "epoch": 0.5106441802598839, + "grad_norm": 1.1228405632209177, + "learning_rate": 1.0141306804064877e-05, + "loss": 0.4127, + "step": 29552 + }, + { + "epoch": 0.5106614597732928, + "grad_norm": 1.8597959516615725, + "learning_rate": 1.0140747210187976e-05, + "loss": 0.5037, + "step": 29553 + }, + { + "epoch": 0.5106787392867017, + "grad_norm": 1.0290124831537, + "learning_rate": 1.0140187615870245e-05, + "loss": 0.2493, + "step": 29554 + }, + { + "epoch": 0.5106960188001106, + "grad_norm": 1.146552834212547, + "learning_rate": 1.0139628021113433e-05, + "loss": 0.4985, + "step": 29555 + }, + { + "epoch": 0.5107132983135195, + "grad_norm": 1.093088443999025, + "learning_rate": 1.0139068425919298e-05, + "loss": 0.3992, + "step": 29556 + }, + { + "epoch": 0.5107305778269284, + "grad_norm": 0.9129802609171411, + "learning_rate": 1.0138508830289591e-05, + "loss": 0.4735, + "step": 29557 + }, + { + "epoch": 0.5107478573403373, + "grad_norm": 1.1976595906831717, + "learning_rate": 1.0137949234226061e-05, + "loss": 0.5915, + "step": 29558 + }, + { + "epoch": 0.5107651368537462, + "grad_norm": 1.2333188872922007, + "learning_rate": 1.0137389637730466e-05, + "loss": 0.5015, + "step": 29559 + }, + { + "epoch": 0.510782416367155, + "grad_norm": 0.50307243713444, + "learning_rate": 1.0136830040804553e-05, + "loss": 0.6829, + "step": 29560 + }, + { + "epoch": 0.510799695880564, + "grad_norm": 0.7185762380237578, + "learning_rate": 1.013627044345008e-05, + "loss": 0.5214, + "step": 29561 + }, + { + "epoch": 0.5108169753939729, + "grad_norm": 1.2889766681144823, + "learning_rate": 1.0135710845668798e-05, + "loss": 0.5059, + "step": 29562 + }, + { + "epoch": 0.5108342549073818, + "grad_norm": 0.9559688679935308, + "learning_rate": 1.0135151247462458e-05, + "loss": 0.4317, + "step": 29563 + }, + { + "epoch": 0.5108515344207907, + "grad_norm": 1.0644178181255155, + "learning_rate": 1.0134591648832814e-05, + "loss": 0.3725, + "step": 29564 + }, + { + "epoch": 0.5108688139341996, + "grad_norm": 1.6518573659324882, + "learning_rate": 1.0134032049781621e-05, + "loss": 0.5564, + "step": 29565 + }, + { + "epoch": 0.5108860934476085, + "grad_norm": 1.4761504607668425, + "learning_rate": 1.0133472450310627e-05, + "loss": 0.3686, + "step": 29566 + }, + { + "epoch": 0.5109033729610174, + "grad_norm": 0.8464395415835501, + "learning_rate": 1.0132912850421588e-05, + "loss": 0.4526, + "step": 29567 + }, + { + "epoch": 0.5109206524744263, + "grad_norm": 1.5244539201629057, + "learning_rate": 1.0132353250116259e-05, + "loss": 0.4172, + "step": 29568 + }, + { + "epoch": 0.5109379319878352, + "grad_norm": 1.051902715053314, + "learning_rate": 1.0131793649396384e-05, + "loss": 0.5111, + "step": 29569 + }, + { + "epoch": 0.5109552115012441, + "grad_norm": 0.9200038878568237, + "learning_rate": 1.0131234048263726e-05, + "loss": 0.3207, + "step": 29570 + }, + { + "epoch": 0.510972491014653, + "grad_norm": 0.8055302396763336, + "learning_rate": 1.013067444672003e-05, + "loss": 0.4381, + "step": 29571 + }, + { + "epoch": 0.5109897705280619, + "grad_norm": 0.743430207417488, + "learning_rate": 1.0130114844767051e-05, + "loss": 0.5337, + "step": 29572 + }, + { + "epoch": 0.5110070500414708, + "grad_norm": 1.5524588751733202, + "learning_rate": 1.0129555242406546e-05, + "loss": 0.3283, + "step": 29573 + }, + { + "epoch": 0.5110243295548798, + "grad_norm": 1.3734303422942618, + "learning_rate": 1.0128995639640263e-05, + "loss": 0.4622, + "step": 29574 + }, + { + "epoch": 0.5110416090682887, + "grad_norm": 0.8671745928118206, + "learning_rate": 1.012843603646995e-05, + "loss": 0.3304, + "step": 29575 + }, + { + "epoch": 0.5110588885816976, + "grad_norm": 1.3792479721704878, + "learning_rate": 1.0127876432897371e-05, + "loss": 0.4251, + "step": 29576 + }, + { + "epoch": 0.5110761680951065, + "grad_norm": 1.1248875619138585, + "learning_rate": 1.0127316828924275e-05, + "loss": 0.5169, + "step": 29577 + }, + { + "epoch": 0.5110934476085154, + "grad_norm": 0.8849059442324068, + "learning_rate": 1.012675722455241e-05, + "loss": 0.4744, + "step": 29578 + }, + { + "epoch": 0.5111107271219243, + "grad_norm": 1.151906454393111, + "learning_rate": 1.0126197619783535e-05, + "loss": 0.3981, + "step": 29579 + }, + { + "epoch": 0.5111280066353332, + "grad_norm": 1.538864778946782, + "learning_rate": 1.0125638014619395e-05, + "loss": 0.4642, + "step": 29580 + }, + { + "epoch": 0.5111452861487421, + "grad_norm": 1.0605271892028625, + "learning_rate": 1.012507840906175e-05, + "loss": 0.5568, + "step": 29581 + }, + { + "epoch": 0.5111625656621509, + "grad_norm": 0.9342136780413158, + "learning_rate": 1.012451880311235e-05, + "loss": 0.3559, + "step": 29582 + }, + { + "epoch": 0.5111798451755598, + "grad_norm": 0.7914479714304686, + "learning_rate": 1.0123959196772943e-05, + "loss": 0.3703, + "step": 29583 + }, + { + "epoch": 0.5111971246889687, + "grad_norm": 1.304193085383872, + "learning_rate": 1.0123399590045292e-05, + "loss": 0.4934, + "step": 29584 + }, + { + "epoch": 0.5112144042023776, + "grad_norm": 1.0473664251228876, + "learning_rate": 1.0122839982931143e-05, + "loss": 0.5464, + "step": 29585 + }, + { + "epoch": 0.5112316837157865, + "grad_norm": 0.8842566367055746, + "learning_rate": 1.0122280375432248e-05, + "loss": 0.4973, + "step": 29586 + }, + { + "epoch": 0.5112489632291954, + "grad_norm": 0.8214269735120086, + "learning_rate": 1.0121720767550362e-05, + "loss": 0.354, + "step": 29587 + }, + { + "epoch": 0.5112662427426043, + "grad_norm": 1.146779836330497, + "learning_rate": 1.0121161159287238e-05, + "loss": 0.5462, + "step": 29588 + }, + { + "epoch": 0.5112835222560133, + "grad_norm": 0.7759541329301582, + "learning_rate": 1.0120601550644629e-05, + "loss": 0.3647, + "step": 29589 + }, + { + "epoch": 0.5113008017694222, + "grad_norm": 0.8027446480951476, + "learning_rate": 1.0120041941624286e-05, + "loss": 0.3993, + "step": 29590 + }, + { + "epoch": 0.5113180812828311, + "grad_norm": 0.9387919717828472, + "learning_rate": 1.0119482332227963e-05, + "loss": 0.3303, + "step": 29591 + }, + { + "epoch": 0.51133536079624, + "grad_norm": 1.0663331684215063, + "learning_rate": 1.0118922722457409e-05, + "loss": 0.6577, + "step": 29592 + }, + { + "epoch": 0.5113526403096489, + "grad_norm": 0.8510499803196707, + "learning_rate": 1.0118363112314385e-05, + "loss": 0.3474, + "step": 29593 + }, + { + "epoch": 0.5113699198230578, + "grad_norm": 0.7990130261471579, + "learning_rate": 1.0117803501800636e-05, + "loss": 0.4847, + "step": 29594 + }, + { + "epoch": 0.5113871993364667, + "grad_norm": 1.236462661152364, + "learning_rate": 1.0117243890917915e-05, + "loss": 0.657, + "step": 29595 + }, + { + "epoch": 0.5114044788498756, + "grad_norm": 0.8763977112763381, + "learning_rate": 1.011668427966798e-05, + "loss": 0.3672, + "step": 29596 + }, + { + "epoch": 0.5114217583632845, + "grad_norm": 0.9162922624643328, + "learning_rate": 1.0116124668052582e-05, + "loss": 0.5162, + "step": 29597 + }, + { + "epoch": 0.5114390378766934, + "grad_norm": 1.0011549297366302, + "learning_rate": 1.011556505607347e-05, + "loss": 0.367, + "step": 29598 + }, + { + "epoch": 0.5114563173901023, + "grad_norm": 0.8310298581551815, + "learning_rate": 1.0115005443732405e-05, + "loss": 0.3375, + "step": 29599 + }, + { + "epoch": 0.5114735969035112, + "grad_norm": 1.0857725405422696, + "learning_rate": 1.0114445831031128e-05, + "loss": 0.4035, + "step": 29600 + }, + { + "epoch": 0.5114908764169201, + "grad_norm": 1.042540924984615, + "learning_rate": 1.01138862179714e-05, + "loss": 0.3316, + "step": 29601 + }, + { + "epoch": 0.5115081559303291, + "grad_norm": 1.1928710219951992, + "learning_rate": 1.0113326604554973e-05, + "loss": 0.3135, + "step": 29602 + }, + { + "epoch": 0.5115254354437379, + "grad_norm": 1.8126888884582684, + "learning_rate": 1.0112766990783596e-05, + "loss": 0.4551, + "step": 29603 + }, + { + "epoch": 0.5115427149571468, + "grad_norm": 0.9794421285175234, + "learning_rate": 1.0112207376659026e-05, + "loss": 0.6654, + "step": 29604 + }, + { + "epoch": 0.5115599944705557, + "grad_norm": 1.1907374304814968, + "learning_rate": 1.0111647762183014e-05, + "loss": 0.576, + "step": 29605 + }, + { + "epoch": 0.5115772739839646, + "grad_norm": 0.9556158276774465, + "learning_rate": 1.0111088147357311e-05, + "loss": 0.2381, + "step": 29606 + }, + { + "epoch": 0.5115945534973735, + "grad_norm": 0.8574486796884611, + "learning_rate": 1.0110528532183675e-05, + "loss": 0.3691, + "step": 29607 + }, + { + "epoch": 0.5116118330107824, + "grad_norm": 0.7564793738040149, + "learning_rate": 1.0109968916663852e-05, + "loss": 0.3337, + "step": 29608 + }, + { + "epoch": 0.5116291125241913, + "grad_norm": 0.7985820683349139, + "learning_rate": 1.01094093007996e-05, + "loss": 0.3908, + "step": 29609 + }, + { + "epoch": 0.5116463920376002, + "grad_norm": 2.1112334424331163, + "learning_rate": 1.010884968459267e-05, + "loss": 0.6089, + "step": 29610 + }, + { + "epoch": 0.5116636715510091, + "grad_norm": 1.2785960814294879, + "learning_rate": 1.0108290068044813e-05, + "loss": 0.548, + "step": 29611 + }, + { + "epoch": 0.511680951064418, + "grad_norm": 0.9918265332395501, + "learning_rate": 1.0107730451157783e-05, + "loss": 0.5172, + "step": 29612 + }, + { + "epoch": 0.5116982305778269, + "grad_norm": 1.2059200561463006, + "learning_rate": 1.0107170833933334e-05, + "loss": 0.6653, + "step": 29613 + }, + { + "epoch": 0.5117155100912358, + "grad_norm": 0.8105405707980937, + "learning_rate": 1.0106611216373219e-05, + "loss": 0.6808, + "step": 29614 + }, + { + "epoch": 0.5117327896046447, + "grad_norm": 0.8923607691211802, + "learning_rate": 1.0106051598479185e-05, + "loss": 0.519, + "step": 29615 + }, + { + "epoch": 0.5117500691180537, + "grad_norm": 0.9134278256683601, + "learning_rate": 1.0105491980252992e-05, + "loss": 0.372, + "step": 29616 + }, + { + "epoch": 0.5117673486314626, + "grad_norm": 0.8586383989566703, + "learning_rate": 1.0104932361696392e-05, + "loss": 0.3252, + "step": 29617 + }, + { + "epoch": 0.5117846281448715, + "grad_norm": 1.0448956079963065, + "learning_rate": 1.0104372742811133e-05, + "loss": 0.4058, + "step": 29618 + }, + { + "epoch": 0.5118019076582804, + "grad_norm": 0.7354343320423299, + "learning_rate": 1.0103813123598973e-05, + "loss": 0.4052, + "step": 29619 + }, + { + "epoch": 0.5118191871716893, + "grad_norm": 0.9449900601140317, + "learning_rate": 1.0103253504061662e-05, + "loss": 0.4206, + "step": 29620 + }, + { + "epoch": 0.5118364666850982, + "grad_norm": 0.5676584900445281, + "learning_rate": 1.0102693884200951e-05, + "loss": 0.2839, + "step": 29621 + }, + { + "epoch": 0.5118537461985071, + "grad_norm": 0.9659664481189629, + "learning_rate": 1.01021342640186e-05, + "loss": 0.4941, + "step": 29622 + }, + { + "epoch": 0.511871025711916, + "grad_norm": 1.0111381990467811, + "learning_rate": 1.010157464351635e-05, + "loss": 0.4121, + "step": 29623 + }, + { + "epoch": 0.5118883052253248, + "grad_norm": 0.9910622960428868, + "learning_rate": 1.0101015022695963e-05, + "loss": 0.3056, + "step": 29624 + }, + { + "epoch": 0.5119055847387337, + "grad_norm": 0.8213723980055337, + "learning_rate": 1.0100455401559192e-05, + "loss": 0.3907, + "step": 29625 + }, + { + "epoch": 0.5119228642521426, + "grad_norm": 0.9368947975985096, + "learning_rate": 1.0099895780107782e-05, + "loss": 0.692, + "step": 29626 + }, + { + "epoch": 0.5119401437655515, + "grad_norm": 1.2884815070914055, + "learning_rate": 1.0099336158343496e-05, + "loss": 0.5076, + "step": 29627 + }, + { + "epoch": 0.5119574232789604, + "grad_norm": 0.8336692455556333, + "learning_rate": 1.009877653626808e-05, + "loss": 0.3571, + "step": 29628 + }, + { + "epoch": 0.5119747027923693, + "grad_norm": 0.9113974811746703, + "learning_rate": 1.0098216913883286e-05, + "loss": 0.4335, + "step": 29629 + }, + { + "epoch": 0.5119919823057782, + "grad_norm": 0.9283575505105224, + "learning_rate": 1.0097657291190875e-05, + "loss": 0.4625, + "step": 29630 + }, + { + "epoch": 0.5120092618191872, + "grad_norm": 1.078221274330752, + "learning_rate": 1.0097097668192588e-05, + "loss": 0.3394, + "step": 29631 + }, + { + "epoch": 0.5120265413325961, + "grad_norm": 0.6804543268337792, + "learning_rate": 1.0096538044890184e-05, + "loss": 0.364, + "step": 29632 + }, + { + "epoch": 0.512043820846005, + "grad_norm": 0.7805054238810185, + "learning_rate": 1.0095978421285419e-05, + "loss": 0.5547, + "step": 29633 + }, + { + "epoch": 0.5120611003594139, + "grad_norm": 1.1427323710721748, + "learning_rate": 1.0095418797380041e-05, + "loss": 0.4689, + "step": 29634 + }, + { + "epoch": 0.5120783798728228, + "grad_norm": 0.94813335822426, + "learning_rate": 1.0094859173175801e-05, + "loss": 0.5045, + "step": 29635 + }, + { + "epoch": 0.5120956593862317, + "grad_norm": 1.0518384082721193, + "learning_rate": 1.0094299548674458e-05, + "loss": 0.4712, + "step": 29636 + }, + { + "epoch": 0.5121129388996406, + "grad_norm": 1.0333647622640398, + "learning_rate": 1.0093739923877763e-05, + "loss": 0.4557, + "step": 29637 + }, + { + "epoch": 0.5121302184130495, + "grad_norm": 0.8023629445066321, + "learning_rate": 1.0093180298787464e-05, + "loss": 0.3603, + "step": 29638 + }, + { + "epoch": 0.5121474979264584, + "grad_norm": 1.5153734829983219, + "learning_rate": 1.0092620673405318e-05, + "loss": 0.3445, + "step": 29639 + }, + { + "epoch": 0.5121647774398673, + "grad_norm": 1.1007864075953453, + "learning_rate": 1.009206104773308e-05, + "loss": 0.4943, + "step": 29640 + }, + { + "epoch": 0.5121820569532762, + "grad_norm": 0.9096488228386985, + "learning_rate": 1.0091501421772496e-05, + "loss": 0.4491, + "step": 29641 + }, + { + "epoch": 0.5121993364666851, + "grad_norm": 0.5304399178285236, + "learning_rate": 1.0090941795525327e-05, + "loss": 0.743, + "step": 29642 + }, + { + "epoch": 0.512216615980094, + "grad_norm": 0.726466850670231, + "learning_rate": 1.0090382168993315e-05, + "loss": 0.4573, + "step": 29643 + }, + { + "epoch": 0.512233895493503, + "grad_norm": 0.8581408132852789, + "learning_rate": 1.0089822542178222e-05, + "loss": 0.3351, + "step": 29644 + }, + { + "epoch": 0.5122511750069118, + "grad_norm": 1.5066535814308963, + "learning_rate": 1.0089262915081802e-05, + "loss": 0.4999, + "step": 29645 + }, + { + "epoch": 0.5122684545203207, + "grad_norm": 0.9650533263819244, + "learning_rate": 1.0088703287705796e-05, + "loss": 0.4777, + "step": 29646 + }, + { + "epoch": 0.5122857340337296, + "grad_norm": 0.6795492024977451, + "learning_rate": 1.008814366005197e-05, + "loss": 0.3494, + "step": 29647 + }, + { + "epoch": 0.5123030135471385, + "grad_norm": 0.7250075952708215, + "learning_rate": 1.0087584032122074e-05, + "loss": 0.3027, + "step": 29648 + }, + { + "epoch": 0.5123202930605474, + "grad_norm": 0.4425626932695818, + "learning_rate": 1.0087024403917852e-05, + "loss": 0.7221, + "step": 29649 + }, + { + "epoch": 0.5123375725739563, + "grad_norm": 0.9516455400245443, + "learning_rate": 1.0086464775441064e-05, + "loss": 0.4607, + "step": 29650 + }, + { + "epoch": 0.5123548520873652, + "grad_norm": 1.0366948616090688, + "learning_rate": 1.0085905146693467e-05, + "loss": 0.5288, + "step": 29651 + }, + { + "epoch": 0.5123721316007741, + "grad_norm": 0.7845341213536853, + "learning_rate": 1.0085345517676803e-05, + "loss": 0.5014, + "step": 29652 + }, + { + "epoch": 0.512389411114183, + "grad_norm": 0.9941045429480374, + "learning_rate": 1.0084785888392832e-05, + "loss": 0.2989, + "step": 29653 + }, + { + "epoch": 0.5124066906275919, + "grad_norm": 1.0735918483775946, + "learning_rate": 1.0084226258843306e-05, + "loss": 0.4058, + "step": 29654 + }, + { + "epoch": 0.5124239701410008, + "grad_norm": 1.3219477361109997, + "learning_rate": 1.0083666629029975e-05, + "loss": 0.49, + "step": 29655 + }, + { + "epoch": 0.5124412496544097, + "grad_norm": 1.2071769819896399, + "learning_rate": 1.0083106998954595e-05, + "loss": 0.3477, + "step": 29656 + }, + { + "epoch": 0.5124585291678186, + "grad_norm": 0.8336447178503712, + "learning_rate": 1.0082547368618917e-05, + "loss": 0.3627, + "step": 29657 + }, + { + "epoch": 0.5124758086812276, + "grad_norm": 1.0090630326507155, + "learning_rate": 1.0081987738024695e-05, + "loss": 0.6122, + "step": 29658 + }, + { + "epoch": 0.5124930881946365, + "grad_norm": 1.5675403412235516, + "learning_rate": 1.0081428107173681e-05, + "loss": 0.2452, + "step": 29659 + }, + { + "epoch": 0.5125103677080454, + "grad_norm": 0.8347603203237062, + "learning_rate": 1.0080868476067629e-05, + "loss": 0.3799, + "step": 29660 + }, + { + "epoch": 0.5125276472214543, + "grad_norm": 1.1676798369477663, + "learning_rate": 1.0080308844708292e-05, + "loss": 0.3963, + "step": 29661 + }, + { + "epoch": 0.5125449267348632, + "grad_norm": 0.8156722086337238, + "learning_rate": 1.0079749213097418e-05, + "loss": 0.2749, + "step": 29662 + }, + { + "epoch": 0.5125622062482721, + "grad_norm": 1.1522477098607096, + "learning_rate": 1.0079189581236763e-05, + "loss": 0.4176, + "step": 29663 + }, + { + "epoch": 0.512579485761681, + "grad_norm": 1.1592557045945868, + "learning_rate": 1.0078629949128083e-05, + "loss": 0.6494, + "step": 29664 + }, + { + "epoch": 0.5125967652750899, + "grad_norm": 1.2656568234429817, + "learning_rate": 1.0078070316773128e-05, + "loss": 0.4156, + "step": 29665 + }, + { + "epoch": 0.5126140447884987, + "grad_norm": 0.562778482601333, + "learning_rate": 1.0077510684173648e-05, + "loss": 0.7471, + "step": 29666 + }, + { + "epoch": 0.5126313243019076, + "grad_norm": 1.2832063471687079, + "learning_rate": 1.00769510513314e-05, + "loss": 0.3841, + "step": 29667 + }, + { + "epoch": 0.5126486038153165, + "grad_norm": 1.054881578295018, + "learning_rate": 1.0076391418248136e-05, + "loss": 0.4529, + "step": 29668 + }, + { + "epoch": 0.5126658833287254, + "grad_norm": 0.7077938445078974, + "learning_rate": 1.0075831784925609e-05, + "loss": 0.5624, + "step": 29669 + }, + { + "epoch": 0.5126831628421343, + "grad_norm": 1.4957776428486471, + "learning_rate": 1.0075272151365569e-05, + "loss": 0.3785, + "step": 29670 + }, + { + "epoch": 0.5127004423555432, + "grad_norm": 0.833743328223774, + "learning_rate": 1.0074712517569775e-05, + "loss": 0.3699, + "step": 29671 + }, + { + "epoch": 0.5127177218689521, + "grad_norm": 1.107501791074195, + "learning_rate": 1.0074152883539974e-05, + "loss": 0.4425, + "step": 29672 + }, + { + "epoch": 0.512735001382361, + "grad_norm": 0.7715854828492367, + "learning_rate": 1.0073593249277919e-05, + "loss": 0.4923, + "step": 29673 + }, + { + "epoch": 0.51275228089577, + "grad_norm": 0.7097648553942466, + "learning_rate": 1.0073033614785367e-05, + "loss": 0.3451, + "step": 29674 + }, + { + "epoch": 0.5127695604091789, + "grad_norm": 0.9864986353206632, + "learning_rate": 1.0072473980064064e-05, + "loss": 0.4195, + "step": 29675 + }, + { + "epoch": 0.5127868399225878, + "grad_norm": 0.5125331521250697, + "learning_rate": 1.0071914345115767e-05, + "loss": 0.756, + "step": 29676 + }, + { + "epoch": 0.5128041194359967, + "grad_norm": 0.5512009684676967, + "learning_rate": 1.0071354709942234e-05, + "loss": 0.2365, + "step": 29677 + }, + { + "epoch": 0.5128213989494056, + "grad_norm": 0.8022385565126388, + "learning_rate": 1.0070795074545208e-05, + "loss": 0.4265, + "step": 29678 + }, + { + "epoch": 0.5128386784628145, + "grad_norm": 1.3700190408609363, + "learning_rate": 1.0070235438926447e-05, + "loss": 0.4774, + "step": 29679 + }, + { + "epoch": 0.5128559579762234, + "grad_norm": 0.8407902226501471, + "learning_rate": 1.0069675803087703e-05, + "loss": 0.3784, + "step": 29680 + }, + { + "epoch": 0.5128732374896323, + "grad_norm": 1.3596327274744353, + "learning_rate": 1.0069116167030732e-05, + "loss": 0.463, + "step": 29681 + }, + { + "epoch": 0.5128905170030412, + "grad_norm": 1.1211562035897473, + "learning_rate": 1.0068556530757282e-05, + "loss": 0.4124, + "step": 29682 + }, + { + "epoch": 0.5129077965164501, + "grad_norm": 0.8316156502941959, + "learning_rate": 1.0067996894269108e-05, + "loss": 0.4289, + "step": 29683 + }, + { + "epoch": 0.512925076029859, + "grad_norm": 1.1984348504756284, + "learning_rate": 1.0067437257567963e-05, + "loss": 0.3539, + "step": 29684 + }, + { + "epoch": 0.512942355543268, + "grad_norm": 1.5673186914072197, + "learning_rate": 1.00668776206556e-05, + "loss": 0.2882, + "step": 29685 + }, + { + "epoch": 0.5129596350566769, + "grad_norm": 1.1081753565549757, + "learning_rate": 1.0066317983533767e-05, + "loss": 0.6256, + "step": 29686 + }, + { + "epoch": 0.5129769145700857, + "grad_norm": 1.0305742233990935, + "learning_rate": 1.0065758346204224e-05, + "loss": 0.4732, + "step": 29687 + }, + { + "epoch": 0.5129941940834946, + "grad_norm": 1.5351205290686565, + "learning_rate": 1.0065198708668722e-05, + "loss": 0.3976, + "step": 29688 + }, + { + "epoch": 0.5130114735969035, + "grad_norm": 0.8824024462219194, + "learning_rate": 1.006463907092901e-05, + "loss": 0.386, + "step": 29689 + }, + { + "epoch": 0.5130287531103124, + "grad_norm": 1.0303423023114857, + "learning_rate": 1.0064079432986846e-05, + "loss": 0.461, + "step": 29690 + }, + { + "epoch": 0.5130460326237213, + "grad_norm": 0.8363361194997767, + "learning_rate": 1.0063519794843981e-05, + "loss": 0.4488, + "step": 29691 + }, + { + "epoch": 0.5130633121371302, + "grad_norm": 1.132225415561696, + "learning_rate": 1.0062960156502167e-05, + "loss": 0.4724, + "step": 29692 + }, + { + "epoch": 0.5130805916505391, + "grad_norm": 1.1020108404588507, + "learning_rate": 1.0062400517963155e-05, + "loss": 0.435, + "step": 29693 + }, + { + "epoch": 0.513097871163948, + "grad_norm": 1.1819840085498496, + "learning_rate": 1.0061840879228702e-05, + "loss": 0.4122, + "step": 29694 + }, + { + "epoch": 0.5131151506773569, + "grad_norm": 1.514156987035225, + "learning_rate": 1.0061281240300556e-05, + "loss": 0.5742, + "step": 29695 + }, + { + "epoch": 0.5131324301907658, + "grad_norm": 1.2015726061057668, + "learning_rate": 1.0060721601180471e-05, + "loss": 0.3735, + "step": 29696 + }, + { + "epoch": 0.5131497097041747, + "grad_norm": 1.16760342272562, + "learning_rate": 1.0060161961870207e-05, + "loss": 0.4193, + "step": 29697 + }, + { + "epoch": 0.5131669892175836, + "grad_norm": 0.7743486256721741, + "learning_rate": 1.0059602322371507e-05, + "loss": 0.2157, + "step": 29698 + }, + { + "epoch": 0.5131842687309925, + "grad_norm": 1.4128977878230047, + "learning_rate": 1.0059042682686129e-05, + "loss": 0.6098, + "step": 29699 + }, + { + "epoch": 0.5132015482444015, + "grad_norm": 0.631057118051442, + "learning_rate": 1.0058483042815825e-05, + "loss": 0.6717, + "step": 29700 + }, + { + "epoch": 0.5132188277578104, + "grad_norm": 1.4978678596357087, + "learning_rate": 1.0057923402762346e-05, + "loss": 0.4049, + "step": 29701 + }, + { + "epoch": 0.5132361072712193, + "grad_norm": 1.041755542096185, + "learning_rate": 1.0057363762527452e-05, + "loss": 0.4778, + "step": 29702 + }, + { + "epoch": 0.5132533867846282, + "grad_norm": 0.8910523644146876, + "learning_rate": 1.0056804122112884e-05, + "loss": 0.3638, + "step": 29703 + }, + { + "epoch": 0.5132706662980371, + "grad_norm": 1.508863367293851, + "learning_rate": 1.0056244481520405e-05, + "loss": 0.4544, + "step": 29704 + }, + { + "epoch": 0.513287945811446, + "grad_norm": 1.0259380654342451, + "learning_rate": 1.0055684840751763e-05, + "loss": 0.3958, + "step": 29705 + }, + { + "epoch": 0.5133052253248549, + "grad_norm": 0.6385306789743175, + "learning_rate": 1.005512519980871e-05, + "loss": 0.3573, + "step": 29706 + }, + { + "epoch": 0.5133225048382638, + "grad_norm": 0.9771231743130194, + "learning_rate": 1.0054565558693004e-05, + "loss": 0.398, + "step": 29707 + }, + { + "epoch": 0.5133397843516727, + "grad_norm": 1.4369831860205482, + "learning_rate": 1.005400591740639e-05, + "loss": 0.4466, + "step": 29708 + }, + { + "epoch": 0.5133570638650815, + "grad_norm": 0.7086445320798623, + "learning_rate": 1.0053446275950629e-05, + "loss": 0.7944, + "step": 29709 + }, + { + "epoch": 0.5133743433784904, + "grad_norm": 1.321322223931017, + "learning_rate": 1.005288663432747e-05, + "loss": 0.4712, + "step": 29710 + }, + { + "epoch": 0.5133916228918993, + "grad_norm": 1.2041466053232595, + "learning_rate": 1.0052326992538665e-05, + "loss": 0.3676, + "step": 29711 + }, + { + "epoch": 0.5134089024053082, + "grad_norm": 1.1050535170154423, + "learning_rate": 1.0051767350585968e-05, + "loss": 0.5058, + "step": 29712 + }, + { + "epoch": 0.5134261819187171, + "grad_norm": 0.9693977112723264, + "learning_rate": 1.0051207708471134e-05, + "loss": 0.4461, + "step": 29713 + }, + { + "epoch": 0.513443461432126, + "grad_norm": 0.9739578535378812, + "learning_rate": 1.0050648066195912e-05, + "loss": 0.5581, + "step": 29714 + }, + { + "epoch": 0.513460740945535, + "grad_norm": 0.5272114587522173, + "learning_rate": 1.0050088423762055e-05, + "loss": 0.3545, + "step": 29715 + }, + { + "epoch": 0.5134780204589439, + "grad_norm": 1.1376507590427585, + "learning_rate": 1.0049528781171319e-05, + "loss": 0.6058, + "step": 29716 + }, + { + "epoch": 0.5134952999723528, + "grad_norm": 0.9157867494439074, + "learning_rate": 1.0048969138425453e-05, + "loss": 0.4449, + "step": 29717 + }, + { + "epoch": 0.5135125794857617, + "grad_norm": 0.8406511966155774, + "learning_rate": 1.0048409495526212e-05, + "loss": 0.4067, + "step": 29718 + }, + { + "epoch": 0.5135298589991706, + "grad_norm": 1.162193404648982, + "learning_rate": 1.004784985247535e-05, + "loss": 0.2937, + "step": 29719 + }, + { + "epoch": 0.5135471385125795, + "grad_norm": 1.184359335695902, + "learning_rate": 1.004729020927462e-05, + "loss": 0.6046, + "step": 29720 + }, + { + "epoch": 0.5135644180259884, + "grad_norm": 0.715738591118225, + "learning_rate": 1.004673056592577e-05, + "loss": 0.3646, + "step": 29721 + }, + { + "epoch": 0.5135816975393973, + "grad_norm": 0.7079570279892077, + "learning_rate": 1.004617092243056e-05, + "loss": 0.2886, + "step": 29722 + }, + { + "epoch": 0.5135989770528062, + "grad_norm": 0.9185933008780836, + "learning_rate": 1.0045611278790736e-05, + "loss": 0.235, + "step": 29723 + }, + { + "epoch": 0.5136162565662151, + "grad_norm": 0.6219149752250784, + "learning_rate": 1.0045051635008058e-05, + "loss": 0.5113, + "step": 29724 + }, + { + "epoch": 0.513633536079624, + "grad_norm": 0.9603066168617302, + "learning_rate": 1.0044491991084273e-05, + "loss": 0.3583, + "step": 29725 + }, + { + "epoch": 0.5136508155930329, + "grad_norm": 1.0639260234901815, + "learning_rate": 1.0043932347021133e-05, + "loss": 0.3859, + "step": 29726 + }, + { + "epoch": 0.5136680951064418, + "grad_norm": 0.6021508651747884, + "learning_rate": 1.0043372702820395e-05, + "loss": 0.7539, + "step": 29727 + }, + { + "epoch": 0.5136853746198508, + "grad_norm": 1.3108020465690011, + "learning_rate": 1.0042813058483814e-05, + "loss": 0.2285, + "step": 29728 + }, + { + "epoch": 0.5137026541332597, + "grad_norm": 0.8468942861044307, + "learning_rate": 1.0042253414013135e-05, + "loss": 0.3885, + "step": 29729 + }, + { + "epoch": 0.5137199336466685, + "grad_norm": 0.857002918122189, + "learning_rate": 1.0041693769410114e-05, + "loss": 0.3973, + "step": 29730 + }, + { + "epoch": 0.5137372131600774, + "grad_norm": 1.1523935655377602, + "learning_rate": 1.004113412467651e-05, + "loss": 0.4426, + "step": 29731 + }, + { + "epoch": 0.5137544926734863, + "grad_norm": 0.908736988970899, + "learning_rate": 1.0040574479814067e-05, + "loss": 0.2847, + "step": 29732 + }, + { + "epoch": 0.5137717721868952, + "grad_norm": 1.101715360381836, + "learning_rate": 1.0040014834824545e-05, + "loss": 0.4456, + "step": 29733 + }, + { + "epoch": 0.5137890517003041, + "grad_norm": 0.48222871012614466, + "learning_rate": 1.0039455189709693e-05, + "loss": 0.6572, + "step": 29734 + }, + { + "epoch": 0.513806331213713, + "grad_norm": 1.287984717902317, + "learning_rate": 1.0038895544471258e-05, + "loss": 0.3716, + "step": 29735 + }, + { + "epoch": 0.5138236107271219, + "grad_norm": 0.6719376807884797, + "learning_rate": 1.0038335899111007e-05, + "loss": 0.3651, + "step": 29736 + }, + { + "epoch": 0.5138408902405308, + "grad_norm": 1.120100881532863, + "learning_rate": 1.0037776253630682e-05, + "loss": 0.5481, + "step": 29737 + }, + { + "epoch": 0.5138581697539397, + "grad_norm": 0.9453781250415123, + "learning_rate": 1.0037216608032037e-05, + "loss": 0.3898, + "step": 29738 + }, + { + "epoch": 0.5138754492673486, + "grad_norm": 0.9942260010002322, + "learning_rate": 1.0036656962316828e-05, + "loss": 0.4701, + "step": 29739 + }, + { + "epoch": 0.5138927287807575, + "grad_norm": 1.029210623434706, + "learning_rate": 1.0036097316486809e-05, + "loss": 0.3454, + "step": 29740 + }, + { + "epoch": 0.5139100082941664, + "grad_norm": 0.769796779593184, + "learning_rate": 1.0035537670543726e-05, + "loss": 0.4496, + "step": 29741 + }, + { + "epoch": 0.5139272878075754, + "grad_norm": 0.5761999624308844, + "learning_rate": 1.003497802448934e-05, + "loss": 0.6163, + "step": 29742 + }, + { + "epoch": 0.5139445673209843, + "grad_norm": 1.141062180465342, + "learning_rate": 1.0034418378325398e-05, + "loss": 0.4274, + "step": 29743 + }, + { + "epoch": 0.5139618468343932, + "grad_norm": 1.139588904254204, + "learning_rate": 1.0033858732053658e-05, + "loss": 0.4655, + "step": 29744 + }, + { + "epoch": 0.5139791263478021, + "grad_norm": 1.0080900968067505, + "learning_rate": 1.003329908567587e-05, + "loss": 0.3369, + "step": 29745 + }, + { + "epoch": 0.513996405861211, + "grad_norm": 0.7849929430475372, + "learning_rate": 1.0032739439193781e-05, + "loss": 0.4562, + "step": 29746 + }, + { + "epoch": 0.5140136853746199, + "grad_norm": 1.2028654993751609, + "learning_rate": 1.0032179792609155e-05, + "loss": 0.4716, + "step": 29747 + }, + { + "epoch": 0.5140309648880288, + "grad_norm": 1.278817008059415, + "learning_rate": 1.003162014592374e-05, + "loss": 0.3584, + "step": 29748 + }, + { + "epoch": 0.5140482444014377, + "grad_norm": 0.9338631680706068, + "learning_rate": 1.0031060499139282e-05, + "loss": 0.477, + "step": 29749 + }, + { + "epoch": 0.5140655239148466, + "grad_norm": 0.41575431440313043, + "learning_rate": 1.0030500852257545e-05, + "loss": 0.5843, + "step": 29750 + }, + { + "epoch": 0.5140828034282554, + "grad_norm": 1.3696787745265044, + "learning_rate": 1.0029941205280279e-05, + "loss": 0.3786, + "step": 29751 + }, + { + "epoch": 0.5141000829416643, + "grad_norm": 1.3520352804143392, + "learning_rate": 1.0029381558209231e-05, + "loss": 0.5166, + "step": 29752 + }, + { + "epoch": 0.5141173624550732, + "grad_norm": 0.9519489236365111, + "learning_rate": 1.0028821911046158e-05, + "loss": 0.4633, + "step": 29753 + }, + { + "epoch": 0.5141346419684821, + "grad_norm": 0.6866259701195361, + "learning_rate": 1.0028262263792815e-05, + "loss": 0.3455, + "step": 29754 + }, + { + "epoch": 0.514151921481891, + "grad_norm": 1.0010870770331954, + "learning_rate": 1.002770261645095e-05, + "loss": 0.5859, + "step": 29755 + }, + { + "epoch": 0.5141692009953, + "grad_norm": 0.9420820029582968, + "learning_rate": 1.0027142969022318e-05, + "loss": 0.5413, + "step": 29756 + }, + { + "epoch": 0.5141864805087089, + "grad_norm": 1.1002963916527737, + "learning_rate": 1.0026583321508675e-05, + "loss": 0.3837, + "step": 29757 + }, + { + "epoch": 0.5142037600221178, + "grad_norm": 1.2435844878070228, + "learning_rate": 1.002602367391177e-05, + "loss": 0.5673, + "step": 29758 + }, + { + "epoch": 0.5142210395355267, + "grad_norm": 1.446092872705386, + "learning_rate": 1.0025464026233356e-05, + "loss": 0.4947, + "step": 29759 + }, + { + "epoch": 0.5142383190489356, + "grad_norm": 1.0596040425857611, + "learning_rate": 1.0024904378475187e-05, + "loss": 0.3094, + "step": 29760 + }, + { + "epoch": 0.5142555985623445, + "grad_norm": 0.9500505254433779, + "learning_rate": 1.0024344730639013e-05, + "loss": 0.4064, + "step": 29761 + }, + { + "epoch": 0.5142728780757534, + "grad_norm": 0.7229666122433818, + "learning_rate": 1.0023785082726594e-05, + "loss": 0.5037, + "step": 29762 + }, + { + "epoch": 0.5142901575891623, + "grad_norm": 1.1239155681989164, + "learning_rate": 1.0023225434739676e-05, + "loss": 0.3202, + "step": 29763 + }, + { + "epoch": 0.5143074371025712, + "grad_norm": 0.6730266106123098, + "learning_rate": 1.0022665786680014e-05, + "loss": 0.7333, + "step": 29764 + }, + { + "epoch": 0.5143247166159801, + "grad_norm": 1.4043567063117763, + "learning_rate": 1.0022106138549364e-05, + "loss": 0.5128, + "step": 29765 + }, + { + "epoch": 0.514341996129389, + "grad_norm": 1.122015170885713, + "learning_rate": 1.0021546490349472e-05, + "loss": 0.4287, + "step": 29766 + }, + { + "epoch": 0.5143592756427979, + "grad_norm": 1.1185462752064081, + "learning_rate": 1.0020986842082097e-05, + "loss": 0.5256, + "step": 29767 + }, + { + "epoch": 0.5143765551562068, + "grad_norm": 1.2327794429505055, + "learning_rate": 1.0020427193748987e-05, + "loss": 0.5708, + "step": 29768 + }, + { + "epoch": 0.5143938346696157, + "grad_norm": 1.0665155404816102, + "learning_rate": 1.0019867545351897e-05, + "loss": 0.4088, + "step": 29769 + }, + { + "epoch": 0.5144111141830247, + "grad_norm": 0.5305968559845402, + "learning_rate": 1.0019307896892584e-05, + "loss": 0.5182, + "step": 29770 + }, + { + "epoch": 0.5144283936964336, + "grad_norm": 1.1404981910019294, + "learning_rate": 1.0018748248372799e-05, + "loss": 0.5767, + "step": 29771 + }, + { + "epoch": 0.5144456732098424, + "grad_norm": 1.187764222127959, + "learning_rate": 1.0018188599794288e-05, + "loss": 0.4772, + "step": 29772 + }, + { + "epoch": 0.5144629527232513, + "grad_norm": 0.5895881304849596, + "learning_rate": 1.0017628951158811e-05, + "loss": 0.6446, + "step": 29773 + }, + { + "epoch": 0.5144802322366602, + "grad_norm": 1.0097716896172455, + "learning_rate": 1.0017069302468121e-05, + "loss": 0.4098, + "step": 29774 + }, + { + "epoch": 0.5144975117500691, + "grad_norm": 1.6087012221380488, + "learning_rate": 1.0016509653723965e-05, + "loss": 0.415, + "step": 29775 + }, + { + "epoch": 0.514514791263478, + "grad_norm": 1.0341713121275413, + "learning_rate": 1.00159500049281e-05, + "loss": 0.5325, + "step": 29776 + }, + { + "epoch": 0.5145320707768869, + "grad_norm": 1.4028287906543278, + "learning_rate": 1.001539035608228e-05, + "loss": 0.3322, + "step": 29777 + }, + { + "epoch": 0.5145493502902958, + "grad_norm": 1.001903002998804, + "learning_rate": 1.0014830707188255e-05, + "loss": 0.6282, + "step": 29778 + }, + { + "epoch": 0.5145666298037047, + "grad_norm": 1.07361339178832, + "learning_rate": 1.001427105824778e-05, + "loss": 0.283, + "step": 29779 + }, + { + "epoch": 0.5145839093171136, + "grad_norm": 1.1374213618818112, + "learning_rate": 1.0013711409262605e-05, + "loss": 0.3294, + "step": 29780 + }, + { + "epoch": 0.5146011888305225, + "grad_norm": 0.9130258110942037, + "learning_rate": 1.0013151760234485e-05, + "loss": 0.3768, + "step": 29781 + }, + { + "epoch": 0.5146184683439314, + "grad_norm": 0.9761172821310462, + "learning_rate": 1.0012592111165176e-05, + "loss": 0.3117, + "step": 29782 + }, + { + "epoch": 0.5146357478573403, + "grad_norm": 1.0430989119741474, + "learning_rate": 1.0012032462056425e-05, + "loss": 0.4171, + "step": 29783 + }, + { + "epoch": 0.5146530273707492, + "grad_norm": 0.8318021164329833, + "learning_rate": 1.0011472812909989e-05, + "loss": 0.3014, + "step": 29784 + }, + { + "epoch": 0.5146703068841582, + "grad_norm": 0.9363491437928466, + "learning_rate": 1.0010913163727618e-05, + "loss": 0.3412, + "step": 29785 + }, + { + "epoch": 0.5146875863975671, + "grad_norm": 0.874113440409889, + "learning_rate": 1.0010353514511065e-05, + "loss": 0.3493, + "step": 29786 + }, + { + "epoch": 0.514704865910976, + "grad_norm": 0.7110743331876519, + "learning_rate": 1.0009793865262085e-05, + "loss": 0.3753, + "step": 29787 + }, + { + "epoch": 0.5147221454243849, + "grad_norm": 1.0151645138617829, + "learning_rate": 1.0009234215982431e-05, + "loss": 0.2677, + "step": 29788 + }, + { + "epoch": 0.5147394249377938, + "grad_norm": 0.8677703631867532, + "learning_rate": 1.0008674566673853e-05, + "loss": 0.8953, + "step": 29789 + }, + { + "epoch": 0.5147567044512027, + "grad_norm": 0.8681524100062973, + "learning_rate": 1.0008114917338108e-05, + "loss": 0.5162, + "step": 29790 + }, + { + "epoch": 0.5147739839646116, + "grad_norm": 0.919135197531618, + "learning_rate": 1.0007555267976944e-05, + "loss": 0.3446, + "step": 29791 + }, + { + "epoch": 0.5147912634780205, + "grad_norm": 0.8036038429143293, + "learning_rate": 1.0006995618592118e-05, + "loss": 0.3882, + "step": 29792 + }, + { + "epoch": 0.5148085429914293, + "grad_norm": 1.1066728504497563, + "learning_rate": 1.000643596918538e-05, + "loss": 0.5681, + "step": 29793 + }, + { + "epoch": 0.5148258225048382, + "grad_norm": 1.824263051966162, + "learning_rate": 1.0005876319758485e-05, + "loss": 0.4478, + "step": 29794 + }, + { + "epoch": 0.5148431020182471, + "grad_norm": 1.2174952123179263, + "learning_rate": 1.0005316670313183e-05, + "loss": 0.3207, + "step": 29795 + }, + { + "epoch": 0.514860381531656, + "grad_norm": 1.2771439952718802, + "learning_rate": 1.0004757020851232e-05, + "loss": 0.3717, + "step": 29796 + }, + { + "epoch": 0.5148776610450649, + "grad_norm": 0.7227867615898673, + "learning_rate": 1.000419737137438e-05, + "loss": 0.6945, + "step": 29797 + }, + { + "epoch": 0.5148949405584738, + "grad_norm": 0.7829783268254127, + "learning_rate": 1.0003637721884381e-05, + "loss": 0.372, + "step": 29798 + }, + { + "epoch": 0.5149122200718828, + "grad_norm": 1.0008190319957355, + "learning_rate": 1.0003078072382988e-05, + "loss": 0.3425, + "step": 29799 + }, + { + "epoch": 0.5149294995852917, + "grad_norm": 1.4013973673498747, + "learning_rate": 1.0002518422871957e-05, + "loss": 0.2849, + "step": 29800 + }, + { + "epoch": 0.5149467790987006, + "grad_norm": 1.1056580631486619, + "learning_rate": 1.0001958773353033e-05, + "loss": 0.4981, + "step": 29801 + }, + { + "epoch": 0.5149640586121095, + "grad_norm": 1.331934010010052, + "learning_rate": 1.0001399123827978e-05, + "loss": 0.5238, + "step": 29802 + }, + { + "epoch": 0.5149813381255184, + "grad_norm": 1.0466330577743217, + "learning_rate": 1.0000839474298541e-05, + "loss": 0.4106, + "step": 29803 + }, + { + "epoch": 0.5149986176389273, + "grad_norm": 1.0816262948834738, + "learning_rate": 1.0000279824766473e-05, + "loss": 0.3727, + "step": 29804 + }, + { + "epoch": 0.5150158971523362, + "grad_norm": 1.538420580104328, + "learning_rate": 9.99972017523353e-06, + "loss": 0.6267, + "step": 29805 + }, + { + "epoch": 0.5150331766657451, + "grad_norm": 1.234367069587825, + "learning_rate": 9.999160525701462e-06, + "loss": 0.4577, + "step": 29806 + }, + { + "epoch": 0.515050456179154, + "grad_norm": 1.6972629747778414, + "learning_rate": 9.998600876172024e-06, + "loss": 0.5719, + "step": 29807 + }, + { + "epoch": 0.5150677356925629, + "grad_norm": 0.8151902963595697, + "learning_rate": 9.998041226646967e-06, + "loss": 0.3018, + "step": 29808 + }, + { + "epoch": 0.5150850152059718, + "grad_norm": 0.7945434031463426, + "learning_rate": 9.997481577128047e-06, + "loss": 0.2707, + "step": 29809 + }, + { + "epoch": 0.5151022947193807, + "grad_norm": 0.9926579253199489, + "learning_rate": 9.996921927617016e-06, + "loss": 0.4656, + "step": 29810 + }, + { + "epoch": 0.5151195742327896, + "grad_norm": 1.0242608606283234, + "learning_rate": 9.996362278115622e-06, + "loss": 0.5691, + "step": 29811 + }, + { + "epoch": 0.5151368537461986, + "grad_norm": 1.198187094887751, + "learning_rate": 9.995802628625624e-06, + "loss": 0.4121, + "step": 29812 + }, + { + "epoch": 0.5151541332596075, + "grad_norm": 0.7875627885633704, + "learning_rate": 9.995242979148771e-06, + "loss": 0.3054, + "step": 29813 + }, + { + "epoch": 0.5151714127730163, + "grad_norm": 1.3712653639605055, + "learning_rate": 9.99468332968682e-06, + "loss": 0.4954, + "step": 29814 + }, + { + "epoch": 0.5151886922864252, + "grad_norm": 1.4037181303494923, + "learning_rate": 9.994123680241517e-06, + "loss": 0.422, + "step": 29815 + }, + { + "epoch": 0.5152059717998341, + "grad_norm": 1.3132237143471992, + "learning_rate": 9.993564030814623e-06, + "loss": 0.4969, + "step": 29816 + }, + { + "epoch": 0.515223251313243, + "grad_norm": 0.8713188759625803, + "learning_rate": 9.993004381407884e-06, + "loss": 0.423, + "step": 29817 + }, + { + "epoch": 0.5152405308266519, + "grad_norm": 0.9654652682866788, + "learning_rate": 9.992444732023059e-06, + "loss": 0.4091, + "step": 29818 + }, + { + "epoch": 0.5152578103400608, + "grad_norm": 1.111431966076266, + "learning_rate": 9.991885082661893e-06, + "loss": 0.376, + "step": 29819 + }, + { + "epoch": 0.5152750898534697, + "grad_norm": 1.2354687929684411, + "learning_rate": 9.99132543332615e-06, + "loss": 0.4071, + "step": 29820 + }, + { + "epoch": 0.5152923693668786, + "grad_norm": 0.9710090677988055, + "learning_rate": 9.990765784017574e-06, + "loss": 0.6419, + "step": 29821 + }, + { + "epoch": 0.5153096488802875, + "grad_norm": 0.7747888794558208, + "learning_rate": 9.990206134737918e-06, + "loss": 0.3614, + "step": 29822 + }, + { + "epoch": 0.5153269283936964, + "grad_norm": 1.3382700775955632, + "learning_rate": 9.989646485488939e-06, + "loss": 0.3765, + "step": 29823 + }, + { + "epoch": 0.5153442079071053, + "grad_norm": 1.379932143435831, + "learning_rate": 9.989086836272385e-06, + "loss": 0.4655, + "step": 29824 + }, + { + "epoch": 0.5153614874205142, + "grad_norm": 0.6175970922470508, + "learning_rate": 9.988527187090017e-06, + "loss": 0.2237, + "step": 29825 + }, + { + "epoch": 0.5153787669339231, + "grad_norm": 1.5226685502047679, + "learning_rate": 9.987967537943579e-06, + "loss": 0.4466, + "step": 29826 + }, + { + "epoch": 0.5153960464473321, + "grad_norm": 0.7740645717718612, + "learning_rate": 9.987407888834827e-06, + "loss": 0.4852, + "step": 29827 + }, + { + "epoch": 0.515413325960741, + "grad_norm": 0.6948523161660229, + "learning_rate": 9.986848239765515e-06, + "loss": 0.8071, + "step": 29828 + }, + { + "epoch": 0.5154306054741499, + "grad_norm": 0.6062915341515352, + "learning_rate": 9.986288590737398e-06, + "loss": 0.3165, + "step": 29829 + }, + { + "epoch": 0.5154478849875588, + "grad_norm": 0.835415240848349, + "learning_rate": 9.985728941752222e-06, + "loss": 0.2851, + "step": 29830 + }, + { + "epoch": 0.5154651645009677, + "grad_norm": 0.9082665604513978, + "learning_rate": 9.985169292811749e-06, + "loss": 0.4982, + "step": 29831 + }, + { + "epoch": 0.5154824440143766, + "grad_norm": 1.3373513049825154, + "learning_rate": 9.984609643917725e-06, + "loss": 0.3912, + "step": 29832 + }, + { + "epoch": 0.5154997235277855, + "grad_norm": 0.982049022759867, + "learning_rate": 9.984049995071903e-06, + "loss": 0.4402, + "step": 29833 + }, + { + "epoch": 0.5155170030411944, + "grad_norm": 0.7199528931281706, + "learning_rate": 9.983490346276042e-06, + "loss": 0.3638, + "step": 29834 + }, + { + "epoch": 0.5155342825546032, + "grad_norm": 1.1217483441506002, + "learning_rate": 9.982930697531882e-06, + "loss": 0.4368, + "step": 29835 + }, + { + "epoch": 0.5155515620680121, + "grad_norm": 0.8971561750285965, + "learning_rate": 9.982371048841192e-06, + "loss": 0.3761, + "step": 29836 + }, + { + "epoch": 0.515568841581421, + "grad_norm": 0.8130768911853922, + "learning_rate": 9.981811400205714e-06, + "loss": 0.4063, + "step": 29837 + }, + { + "epoch": 0.5155861210948299, + "grad_norm": 0.9785956350214142, + "learning_rate": 9.981251751627206e-06, + "loss": 0.3359, + "step": 29838 + }, + { + "epoch": 0.5156034006082388, + "grad_norm": 0.887936249578653, + "learning_rate": 9.980692103107415e-06, + "loss": 0.2802, + "step": 29839 + }, + { + "epoch": 0.5156206801216477, + "grad_norm": 1.0798174493954107, + "learning_rate": 9.980132454648104e-06, + "loss": 0.3725, + "step": 29840 + }, + { + "epoch": 0.5156379596350567, + "grad_norm": 1.249196349233212, + "learning_rate": 9.979572806251016e-06, + "loss": 0.4898, + "step": 29841 + }, + { + "epoch": 0.5156552391484656, + "grad_norm": 1.0892727095793433, + "learning_rate": 9.979013157917907e-06, + "loss": 0.328, + "step": 29842 + }, + { + "epoch": 0.5156725186618745, + "grad_norm": 0.9294587941759929, + "learning_rate": 9.978453509650532e-06, + "loss": 0.5986, + "step": 29843 + }, + { + "epoch": 0.5156897981752834, + "grad_norm": 1.1710386455721256, + "learning_rate": 9.97789386145064e-06, + "loss": 0.331, + "step": 29844 + }, + { + "epoch": 0.5157070776886923, + "grad_norm": 0.8033080950934521, + "learning_rate": 9.977334213319991e-06, + "loss": 0.3177, + "step": 29845 + }, + { + "epoch": 0.5157243572021012, + "grad_norm": 0.7697722817529159, + "learning_rate": 9.976774565260327e-06, + "loss": 0.4741, + "step": 29846 + }, + { + "epoch": 0.5157416367155101, + "grad_norm": 0.8729677295789603, + "learning_rate": 9.97621491727341e-06, + "loss": 0.442, + "step": 29847 + }, + { + "epoch": 0.515758916228919, + "grad_norm": 0.9916081463643879, + "learning_rate": 9.975655269360987e-06, + "loss": 0.3206, + "step": 29848 + }, + { + "epoch": 0.5157761957423279, + "grad_norm": 1.0600901810292918, + "learning_rate": 9.975095621524816e-06, + "loss": 0.4979, + "step": 29849 + }, + { + "epoch": 0.5157934752557368, + "grad_norm": 0.8746171563963934, + "learning_rate": 9.974535973766645e-06, + "loss": 0.2962, + "step": 29850 + }, + { + "epoch": 0.5158107547691457, + "grad_norm": 0.8267741351219176, + "learning_rate": 9.973976326088234e-06, + "loss": 0.529, + "step": 29851 + }, + { + "epoch": 0.5158280342825546, + "grad_norm": 1.2232326701333611, + "learning_rate": 9.973416678491328e-06, + "loss": 0.4857, + "step": 29852 + }, + { + "epoch": 0.5158453137959635, + "grad_norm": 0.921246119916008, + "learning_rate": 9.972857030977684e-06, + "loss": 0.4179, + "step": 29853 + }, + { + "epoch": 0.5158625933093725, + "grad_norm": 1.1546339389471336, + "learning_rate": 9.972297383549056e-06, + "loss": 0.3274, + "step": 29854 + }, + { + "epoch": 0.5158798728227814, + "grad_norm": 0.9594271245391592, + "learning_rate": 9.971737736207186e-06, + "loss": 0.4979, + "step": 29855 + }, + { + "epoch": 0.5158971523361903, + "grad_norm": 0.9257045537475789, + "learning_rate": 9.971178088953845e-06, + "loss": 0.5254, + "step": 29856 + }, + { + "epoch": 0.5159144318495991, + "grad_norm": 1.0550542864699974, + "learning_rate": 9.97061844179077e-06, + "loss": 0.5436, + "step": 29857 + }, + { + "epoch": 0.515931711363008, + "grad_norm": 1.5610369614008093, + "learning_rate": 9.970058794719725e-06, + "loss": 0.5525, + "step": 29858 + }, + { + "epoch": 0.5159489908764169, + "grad_norm": 0.8448249414871317, + "learning_rate": 9.969499147742455e-06, + "loss": 0.4213, + "step": 29859 + }, + { + "epoch": 0.5159662703898258, + "grad_norm": 1.5194774038984085, + "learning_rate": 9.96893950086072e-06, + "loss": 0.6553, + "step": 29860 + }, + { + "epoch": 0.5159835499032347, + "grad_norm": 1.1733359199853848, + "learning_rate": 9.968379854076262e-06, + "loss": 0.4915, + "step": 29861 + }, + { + "epoch": 0.5160008294166436, + "grad_norm": 0.9556004856648442, + "learning_rate": 9.967820207390848e-06, + "loss": 0.4061, + "step": 29862 + }, + { + "epoch": 0.5160181089300525, + "grad_norm": 0.9992219860101104, + "learning_rate": 9.96726056080622e-06, + "loss": 0.3817, + "step": 29863 + }, + { + "epoch": 0.5160353884434614, + "grad_norm": 0.7256737945043211, + "learning_rate": 9.966700914324134e-06, + "loss": 0.7845, + "step": 29864 + }, + { + "epoch": 0.5160526679568703, + "grad_norm": 1.1920725167560973, + "learning_rate": 9.966141267946347e-06, + "loss": 0.4086, + "step": 29865 + }, + { + "epoch": 0.5160699474702792, + "grad_norm": 1.1749201993842335, + "learning_rate": 9.965581621674603e-06, + "loss": 0.4439, + "step": 29866 + }, + { + "epoch": 0.5160872269836881, + "grad_norm": 0.9187376618336296, + "learning_rate": 9.965021975510662e-06, + "loss": 0.2793, + "step": 29867 + }, + { + "epoch": 0.516104506497097, + "grad_norm": 0.6581268951054137, + "learning_rate": 9.964462329456274e-06, + "loss": 0.4138, + "step": 29868 + }, + { + "epoch": 0.516121786010506, + "grad_norm": 0.9287800720369062, + "learning_rate": 9.963902683513195e-06, + "loss": 0.4848, + "step": 29869 + }, + { + "epoch": 0.5161390655239149, + "grad_norm": 1.2792822078668873, + "learning_rate": 9.963343037683172e-06, + "loss": 0.3904, + "step": 29870 + }, + { + "epoch": 0.5161563450373238, + "grad_norm": 1.0012976743613404, + "learning_rate": 9.962783391967967e-06, + "loss": 0.3356, + "step": 29871 + }, + { + "epoch": 0.5161736245507327, + "grad_norm": 0.7965723963044188, + "learning_rate": 9.962223746369325e-06, + "loss": 0.4428, + "step": 29872 + }, + { + "epoch": 0.5161909040641416, + "grad_norm": 0.5638052865884942, + "learning_rate": 9.961664100888996e-06, + "loss": 0.8424, + "step": 29873 + }, + { + "epoch": 0.5162081835775505, + "grad_norm": 0.9030912043272448, + "learning_rate": 9.961104455528745e-06, + "loss": 0.4747, + "step": 29874 + }, + { + "epoch": 0.5162254630909594, + "grad_norm": 1.2966693346020368, + "learning_rate": 9.96054481029031e-06, + "loss": 0.3469, + "step": 29875 + }, + { + "epoch": 0.5162427426043683, + "grad_norm": 0.9855015788830459, + "learning_rate": 9.95998516517546e-06, + "loss": 0.2996, + "step": 29876 + }, + { + "epoch": 0.5162600221177772, + "grad_norm": 0.9776204552087219, + "learning_rate": 9.959425520185935e-06, + "loss": 0.3642, + "step": 29877 + }, + { + "epoch": 0.516277301631186, + "grad_norm": 0.8719360065644132, + "learning_rate": 9.958865875323493e-06, + "loss": 0.2352, + "step": 29878 + }, + { + "epoch": 0.5162945811445949, + "grad_norm": 1.1503592195757715, + "learning_rate": 9.958306230589886e-06, + "loss": 0.6133, + "step": 29879 + }, + { + "epoch": 0.5163118606580038, + "grad_norm": 0.9139749264024767, + "learning_rate": 9.95774658598687e-06, + "loss": 0.281, + "step": 29880 + }, + { + "epoch": 0.5163291401714127, + "grad_norm": 0.9761897613913213, + "learning_rate": 9.957186941516188e-06, + "loss": 0.4404, + "step": 29881 + }, + { + "epoch": 0.5163464196848216, + "grad_norm": 0.8368726842088752, + "learning_rate": 9.956627297179606e-06, + "loss": 0.3041, + "step": 29882 + }, + { + "epoch": 0.5163636991982306, + "grad_norm": 0.8577254635537435, + "learning_rate": 9.95606765297887e-06, + "loss": 0.6477, + "step": 29883 + }, + { + "epoch": 0.5163809787116395, + "grad_norm": 0.986634683832366, + "learning_rate": 9.955508008915732e-06, + "loss": 0.5203, + "step": 29884 + }, + { + "epoch": 0.5163982582250484, + "grad_norm": 1.087159808328082, + "learning_rate": 9.954948364991948e-06, + "loss": 0.4763, + "step": 29885 + }, + { + "epoch": 0.5164155377384573, + "grad_norm": 0.8070975465796408, + "learning_rate": 9.954388721209265e-06, + "loss": 0.3912, + "step": 29886 + }, + { + "epoch": 0.5164328172518662, + "grad_norm": 1.1114989056808415, + "learning_rate": 9.953829077569443e-06, + "loss": 0.4399, + "step": 29887 + }, + { + "epoch": 0.5164500967652751, + "grad_norm": 1.3240712565490944, + "learning_rate": 9.953269434074231e-06, + "loss": 0.37, + "step": 29888 + }, + { + "epoch": 0.516467376278684, + "grad_norm": 0.829693625682497, + "learning_rate": 9.952709790725384e-06, + "loss": 0.3128, + "step": 29889 + }, + { + "epoch": 0.5164846557920929, + "grad_norm": 0.8182210358560241, + "learning_rate": 9.95215014752465e-06, + "loss": 0.4591, + "step": 29890 + }, + { + "epoch": 0.5165019353055018, + "grad_norm": 0.8011199897785951, + "learning_rate": 9.95159050447379e-06, + "loss": 0.3362, + "step": 29891 + }, + { + "epoch": 0.5165192148189107, + "grad_norm": 1.1034243453133585, + "learning_rate": 9.95103086157455e-06, + "loss": 0.46, + "step": 29892 + }, + { + "epoch": 0.5165364943323196, + "grad_norm": 0.8114994498841233, + "learning_rate": 9.950471218828685e-06, + "loss": 0.4751, + "step": 29893 + }, + { + "epoch": 0.5165537738457285, + "grad_norm": 0.42673835143148814, + "learning_rate": 9.949911576237952e-06, + "loss": 0.6493, + "step": 29894 + }, + { + "epoch": 0.5165710533591374, + "grad_norm": 1.0008343218135956, + "learning_rate": 9.949351933804091e-06, + "loss": 0.3047, + "step": 29895 + }, + { + "epoch": 0.5165883328725464, + "grad_norm": 0.9897619040641595, + "learning_rate": 9.948792291528871e-06, + "loss": 0.4485, + "step": 29896 + }, + { + "epoch": 0.5166056123859553, + "grad_norm": 0.8388407544507624, + "learning_rate": 9.948232649414033e-06, + "loss": 0.2777, + "step": 29897 + }, + { + "epoch": 0.5166228918993642, + "grad_norm": 1.5551183220608993, + "learning_rate": 9.947673007461338e-06, + "loss": 0.3455, + "step": 29898 + }, + { + "epoch": 0.516640171412773, + "grad_norm": 0.9333829648932084, + "learning_rate": 9.947113365672532e-06, + "loss": 0.4338, + "step": 29899 + }, + { + "epoch": 0.5166574509261819, + "grad_norm": 0.9338411025065162, + "learning_rate": 9.946553724049374e-06, + "loss": 0.3651, + "step": 29900 + }, + { + "epoch": 0.5166747304395908, + "grad_norm": 0.6861785019786508, + "learning_rate": 9.945994082593608e-06, + "loss": 0.5068, + "step": 29901 + }, + { + "epoch": 0.5166920099529997, + "grad_norm": 0.8804479267238556, + "learning_rate": 9.945434441307e-06, + "loss": 0.3782, + "step": 29902 + }, + { + "epoch": 0.5167092894664086, + "grad_norm": 0.9256330581695968, + "learning_rate": 9.944874800191293e-06, + "loss": 0.4307, + "step": 29903 + }, + { + "epoch": 0.5167265689798175, + "grad_norm": 1.0444196694362082, + "learning_rate": 9.94431515924824e-06, + "loss": 0.3477, + "step": 29904 + }, + { + "epoch": 0.5167438484932264, + "grad_norm": 1.058047796385485, + "learning_rate": 9.9437555184796e-06, + "loss": 0.6234, + "step": 29905 + }, + { + "epoch": 0.5167611280066353, + "grad_norm": 0.9756755912809932, + "learning_rate": 9.943195877887118e-06, + "loss": 0.4186, + "step": 29906 + }, + { + "epoch": 0.5167784075200442, + "grad_norm": 1.0634572674010214, + "learning_rate": 9.942636237472553e-06, + "loss": 0.4426, + "step": 29907 + }, + { + "epoch": 0.5167956870334531, + "grad_norm": 1.1624263257725413, + "learning_rate": 9.942076597237654e-06, + "loss": 0.3994, + "step": 29908 + }, + { + "epoch": 0.516812966546862, + "grad_norm": 0.8413225624041712, + "learning_rate": 9.941516957184178e-06, + "loss": 0.5399, + "step": 29909 + }, + { + "epoch": 0.516830246060271, + "grad_norm": 1.2701303405512923, + "learning_rate": 9.940957317313873e-06, + "loss": 0.4609, + "step": 29910 + }, + { + "epoch": 0.5168475255736799, + "grad_norm": 0.5080266826615648, + "learning_rate": 9.940397677628496e-06, + "loss": 0.5102, + "step": 29911 + }, + { + "epoch": 0.5168648050870888, + "grad_norm": 0.9508293833379389, + "learning_rate": 9.939838038129796e-06, + "loss": 0.4039, + "step": 29912 + }, + { + "epoch": 0.5168820846004977, + "grad_norm": 1.2245341937835792, + "learning_rate": 9.93927839881953e-06, + "loss": 0.4343, + "step": 29913 + }, + { + "epoch": 0.5168993641139066, + "grad_norm": 0.7718728094732078, + "learning_rate": 9.93871875969945e-06, + "loss": 0.2491, + "step": 29914 + }, + { + "epoch": 0.5169166436273155, + "grad_norm": 0.8882521530257917, + "learning_rate": 9.938159120771301e-06, + "loss": 0.3911, + "step": 29915 + }, + { + "epoch": 0.5169339231407244, + "grad_norm": 0.8647740414097878, + "learning_rate": 9.93759948203685e-06, + "loss": 0.5357, + "step": 29916 + }, + { + "epoch": 0.5169512026541333, + "grad_norm": 1.0318045334255865, + "learning_rate": 9.937039843497837e-06, + "loss": 0.3286, + "step": 29917 + }, + { + "epoch": 0.5169684821675422, + "grad_norm": 1.0023083076507444, + "learning_rate": 9.936480205156022e-06, + "loss": 0.4184, + "step": 29918 + }, + { + "epoch": 0.5169857616809511, + "grad_norm": 1.4195695510586748, + "learning_rate": 9.935920567013154e-06, + "loss": 0.4192, + "step": 29919 + }, + { + "epoch": 0.5170030411943599, + "grad_norm": 1.1011662995316907, + "learning_rate": 9.935360929070992e-06, + "loss": 0.4564, + "step": 29920 + }, + { + "epoch": 0.5170203207077688, + "grad_norm": 1.0799730416739663, + "learning_rate": 9.934801291331278e-06, + "loss": 0.4783, + "step": 29921 + }, + { + "epoch": 0.5170376002211777, + "grad_norm": 1.2459604839571965, + "learning_rate": 9.934241653795778e-06, + "loss": 0.4575, + "step": 29922 + }, + { + "epoch": 0.5170548797345866, + "grad_norm": 1.0436636374276504, + "learning_rate": 9.933682016466234e-06, + "loss": 0.4576, + "step": 29923 + }, + { + "epoch": 0.5170721592479955, + "grad_norm": 1.078721359594331, + "learning_rate": 9.933122379344405e-06, + "loss": 0.4667, + "step": 29924 + }, + { + "epoch": 0.5170894387614045, + "grad_norm": 0.8960286263421867, + "learning_rate": 9.932562742432042e-06, + "loss": 0.4166, + "step": 29925 + }, + { + "epoch": 0.5171067182748134, + "grad_norm": 1.37789123472104, + "learning_rate": 9.932003105730895e-06, + "loss": 0.7025, + "step": 29926 + }, + { + "epoch": 0.5171239977882223, + "grad_norm": 1.0211817838417674, + "learning_rate": 9.931443469242723e-06, + "loss": 0.3635, + "step": 29927 + }, + { + "epoch": 0.5171412773016312, + "grad_norm": 1.4489267088260913, + "learning_rate": 9.93088383296927e-06, + "loss": 0.5134, + "step": 29928 + }, + { + "epoch": 0.5171585568150401, + "grad_norm": 1.0511270913351136, + "learning_rate": 9.930324196912298e-06, + "loss": 0.3601, + "step": 29929 + }, + { + "epoch": 0.517175836328449, + "grad_norm": 1.7661709283169629, + "learning_rate": 9.929764561073555e-06, + "loss": 0.4464, + "step": 29930 + }, + { + "epoch": 0.5171931158418579, + "grad_norm": 1.157366482568049, + "learning_rate": 9.929204925454795e-06, + "loss": 0.4875, + "step": 29931 + }, + { + "epoch": 0.5172103953552668, + "grad_norm": 1.4006073507805896, + "learning_rate": 9.928645290057771e-06, + "loss": 0.6482, + "step": 29932 + }, + { + "epoch": 0.5172276748686757, + "grad_norm": 0.6509972547123494, + "learning_rate": 9.928085654884234e-06, + "loss": 0.7022, + "step": 29933 + }, + { + "epoch": 0.5172449543820846, + "grad_norm": 0.7771741912152117, + "learning_rate": 9.927526019935942e-06, + "loss": 0.2657, + "step": 29934 + }, + { + "epoch": 0.5172622338954935, + "grad_norm": 0.9444699272863422, + "learning_rate": 9.926966385214637e-06, + "loss": 0.6136, + "step": 29935 + }, + { + "epoch": 0.5172795134089024, + "grad_norm": 0.9109024304989799, + "learning_rate": 9.926406750722086e-06, + "loss": 0.4458, + "step": 29936 + }, + { + "epoch": 0.5172967929223113, + "grad_norm": 0.9931433484269327, + "learning_rate": 9.92584711646003e-06, + "loss": 0.5219, + "step": 29937 + }, + { + "epoch": 0.5173140724357203, + "grad_norm": 1.0540716745297158, + "learning_rate": 9.925287482430228e-06, + "loss": 0.4247, + "step": 29938 + }, + { + "epoch": 0.5173313519491292, + "grad_norm": 0.7689662886030931, + "learning_rate": 9.92472784863443e-06, + "loss": 0.4222, + "step": 29939 + }, + { + "epoch": 0.5173486314625381, + "grad_norm": 0.6686394341497269, + "learning_rate": 9.924168215074394e-06, + "loss": 0.6039, + "step": 29940 + }, + { + "epoch": 0.5173659109759469, + "grad_norm": 1.1482322629226136, + "learning_rate": 9.923608581751862e-06, + "loss": 0.3995, + "step": 29941 + }, + { + "epoch": 0.5173831904893558, + "grad_norm": 0.8023342742076098, + "learning_rate": 9.923048948668603e-06, + "loss": 0.3885, + "step": 29942 + }, + { + "epoch": 0.5174004700027647, + "grad_norm": 0.6475668747988662, + "learning_rate": 9.922489315826353e-06, + "loss": 0.3825, + "step": 29943 + }, + { + "epoch": 0.5174177495161736, + "grad_norm": 0.694464121156045, + "learning_rate": 9.921929683226875e-06, + "loss": 0.8408, + "step": 29944 + }, + { + "epoch": 0.5174350290295825, + "grad_norm": 1.0153023375848822, + "learning_rate": 9.92137005087192e-06, + "loss": 0.4396, + "step": 29945 + }, + { + "epoch": 0.5174523085429914, + "grad_norm": 0.7890168982645676, + "learning_rate": 9.920810418763239e-06, + "loss": 0.4105, + "step": 29946 + }, + { + "epoch": 0.5174695880564003, + "grad_norm": 1.35940121573372, + "learning_rate": 9.920250786902587e-06, + "loss": 0.2339, + "step": 29947 + }, + { + "epoch": 0.5174868675698092, + "grad_norm": 1.5489282162047593, + "learning_rate": 9.919691155291712e-06, + "loss": 0.3218, + "step": 29948 + }, + { + "epoch": 0.5175041470832181, + "grad_norm": 1.1278233288666577, + "learning_rate": 9.919131523932373e-06, + "loss": 0.4545, + "step": 29949 + }, + { + "epoch": 0.517521426596627, + "grad_norm": 1.2343963250555192, + "learning_rate": 9.918571892826319e-06, + "loss": 0.3647, + "step": 29950 + }, + { + "epoch": 0.5175387061100359, + "grad_norm": 0.6225412209480938, + "learning_rate": 9.918012261975309e-06, + "loss": 0.9078, + "step": 29951 + }, + { + "epoch": 0.5175559856234448, + "grad_norm": 1.436068311499158, + "learning_rate": 9.917452631381084e-06, + "loss": 0.6263, + "step": 29952 + }, + { + "epoch": 0.5175732651368538, + "grad_norm": 0.9485697601809623, + "learning_rate": 9.916893001045407e-06, + "loss": 0.4382, + "step": 29953 + }, + { + "epoch": 0.5175905446502627, + "grad_norm": 0.5893213954926745, + "learning_rate": 9.91633337097003e-06, + "loss": 0.6527, + "step": 29954 + }, + { + "epoch": 0.5176078241636716, + "grad_norm": 0.9455189133399311, + "learning_rate": 9.915773741156697e-06, + "loss": 0.4913, + "step": 29955 + }, + { + "epoch": 0.5176251036770805, + "grad_norm": 1.1263463639097429, + "learning_rate": 9.915214111607171e-06, + "loss": 0.2647, + "step": 29956 + }, + { + "epoch": 0.5176423831904894, + "grad_norm": 0.7499213840000597, + "learning_rate": 9.9146544823232e-06, + "loss": 0.5697, + "step": 29957 + }, + { + "epoch": 0.5176596627038983, + "grad_norm": 0.6964425067806121, + "learning_rate": 9.914094853306538e-06, + "loss": 0.3264, + "step": 29958 + }, + { + "epoch": 0.5176769422173072, + "grad_norm": 0.9731728370479827, + "learning_rate": 9.913535224558936e-06, + "loss": 0.487, + "step": 29959 + }, + { + "epoch": 0.5176942217307161, + "grad_norm": 0.8775463023087444, + "learning_rate": 9.912975596082152e-06, + "loss": 0.2781, + "step": 29960 + }, + { + "epoch": 0.517711501244125, + "grad_norm": 0.9667857550374425, + "learning_rate": 9.912415967877928e-06, + "loss": 0.5379, + "step": 29961 + }, + { + "epoch": 0.5177287807575338, + "grad_norm": 2.040226328234756, + "learning_rate": 9.911856339948032e-06, + "loss": 0.4525, + "step": 29962 + }, + { + "epoch": 0.5177460602709427, + "grad_norm": 0.8669281975417823, + "learning_rate": 9.911296712294204e-06, + "loss": 0.5889, + "step": 29963 + }, + { + "epoch": 0.5177633397843516, + "grad_norm": 0.6762693638990671, + "learning_rate": 9.910737084918202e-06, + "loss": 0.5644, + "step": 29964 + }, + { + "epoch": 0.5177806192977605, + "grad_norm": 1.0931015183353823, + "learning_rate": 9.910177457821781e-06, + "loss": 0.5833, + "step": 29965 + }, + { + "epoch": 0.5177978988111694, + "grad_norm": 0.9788264954686526, + "learning_rate": 9.909617831006686e-06, + "loss": 0.3105, + "step": 29966 + }, + { + "epoch": 0.5178151783245784, + "grad_norm": 0.9323600977937673, + "learning_rate": 9.90905820447468e-06, + "loss": 0.4184, + "step": 29967 + }, + { + "epoch": 0.5178324578379873, + "grad_norm": 2.0175303436384247, + "learning_rate": 9.908498578227505e-06, + "loss": 0.5672, + "step": 29968 + }, + { + "epoch": 0.5178497373513962, + "grad_norm": 0.862971954415369, + "learning_rate": 9.907938952266924e-06, + "loss": 0.3213, + "step": 29969 + }, + { + "epoch": 0.5178670168648051, + "grad_norm": 1.4744266997122188, + "learning_rate": 9.907379326594683e-06, + "loss": 0.5883, + "step": 29970 + }, + { + "epoch": 0.517884296378214, + "grad_norm": 1.0507889066342164, + "learning_rate": 9.90681970121254e-06, + "loss": 0.4209, + "step": 29971 + }, + { + "epoch": 0.5179015758916229, + "grad_norm": 0.9184583282975491, + "learning_rate": 9.90626007612224e-06, + "loss": 0.3666, + "step": 29972 + }, + { + "epoch": 0.5179188554050318, + "grad_norm": 0.545781426068767, + "learning_rate": 9.905700451325543e-06, + "loss": 0.3051, + "step": 29973 + }, + { + "epoch": 0.5179361349184407, + "grad_norm": 1.096481395959894, + "learning_rate": 9.905140826824199e-06, + "loss": 0.4688, + "step": 29974 + }, + { + "epoch": 0.5179534144318496, + "grad_norm": 1.7735977033032133, + "learning_rate": 9.904581202619962e-06, + "loss": 0.4832, + "step": 29975 + }, + { + "epoch": 0.5179706939452585, + "grad_norm": 1.3877316562933093, + "learning_rate": 9.904021578714586e-06, + "loss": 0.4421, + "step": 29976 + }, + { + "epoch": 0.5179879734586674, + "grad_norm": 0.9515456821103739, + "learning_rate": 9.903461955109817e-06, + "loss": 0.4137, + "step": 29977 + }, + { + "epoch": 0.5180052529720763, + "grad_norm": 0.827350756198452, + "learning_rate": 9.902902331807415e-06, + "loss": 0.3963, + "step": 29978 + }, + { + "epoch": 0.5180225324854852, + "grad_norm": 1.4985414969451474, + "learning_rate": 9.90234270880913e-06, + "loss": 0.2555, + "step": 29979 + }, + { + "epoch": 0.5180398119988942, + "grad_norm": 1.2570526094148156, + "learning_rate": 9.901783086116717e-06, + "loss": 0.4256, + "step": 29980 + }, + { + "epoch": 0.5180570915123031, + "grad_norm": 0.6528567165117982, + "learning_rate": 9.901223463731921e-06, + "loss": 0.3535, + "step": 29981 + }, + { + "epoch": 0.518074371025712, + "grad_norm": 0.3828041579350185, + "learning_rate": 9.900663841656508e-06, + "loss": 0.5576, + "step": 29982 + }, + { + "epoch": 0.5180916505391209, + "grad_norm": 0.8884737126350607, + "learning_rate": 9.900104219892218e-06, + "loss": 0.4559, + "step": 29983 + }, + { + "epoch": 0.5181089300525297, + "grad_norm": 1.214852001526968, + "learning_rate": 9.899544598440811e-06, + "loss": 0.4423, + "step": 29984 + }, + { + "epoch": 0.5181262095659386, + "grad_norm": 1.0104362930279656, + "learning_rate": 9.89898497730404e-06, + "loss": 0.4334, + "step": 29985 + }, + { + "epoch": 0.5181434890793475, + "grad_norm": 1.499440536437599, + "learning_rate": 9.898425356483653e-06, + "loss": 0.4103, + "step": 29986 + }, + { + "epoch": 0.5181607685927564, + "grad_norm": 0.6987162952620702, + "learning_rate": 9.897865735981407e-06, + "loss": 0.5716, + "step": 29987 + }, + { + "epoch": 0.5181780481061653, + "grad_norm": 0.9394358820424734, + "learning_rate": 9.89730611579905e-06, + "loss": 0.3585, + "step": 29988 + }, + { + "epoch": 0.5181953276195742, + "grad_norm": 0.9009746899665351, + "learning_rate": 9.896746495938342e-06, + "loss": 0.4039, + "step": 29989 + }, + { + "epoch": 0.5182126071329831, + "grad_norm": 0.966088725255647, + "learning_rate": 9.896186876401028e-06, + "loss": 0.5724, + "step": 29990 + }, + { + "epoch": 0.518229886646392, + "grad_norm": 0.781903995274297, + "learning_rate": 9.89562725718887e-06, + "loss": 0.4292, + "step": 29991 + }, + { + "epoch": 0.5182471661598009, + "grad_norm": 1.1230674967588883, + "learning_rate": 9.89506763830361e-06, + "loss": 0.5316, + "step": 29992 + }, + { + "epoch": 0.5182644456732098, + "grad_norm": 1.026606282694349, + "learning_rate": 9.89450801974701e-06, + "loss": 0.5725, + "step": 29993 + }, + { + "epoch": 0.5182817251866187, + "grad_norm": 0.5989293893218087, + "learning_rate": 9.893948401520815e-06, + "loss": 0.2821, + "step": 29994 + }, + { + "epoch": 0.5182990047000277, + "grad_norm": 1.1263290822861103, + "learning_rate": 9.893388783626785e-06, + "loss": 0.4143, + "step": 29995 + }, + { + "epoch": 0.5183162842134366, + "grad_norm": 0.8156701426680156, + "learning_rate": 9.892829166066671e-06, + "loss": 0.3772, + "step": 29996 + }, + { + "epoch": 0.5183335637268455, + "grad_norm": 0.8544653297189825, + "learning_rate": 9.89226954884222e-06, + "loss": 0.4416, + "step": 29997 + }, + { + "epoch": 0.5183508432402544, + "grad_norm": 1.0426444052744794, + "learning_rate": 9.891709931955192e-06, + "loss": 0.305, + "step": 29998 + }, + { + "epoch": 0.5183681227536633, + "grad_norm": 0.9772313624195712, + "learning_rate": 9.891150315407334e-06, + "loss": 0.5298, + "step": 29999 + }, + { + "epoch": 0.5183854022670722, + "grad_norm": 1.2079021277881719, + "learning_rate": 9.890590699200405e-06, + "loss": 0.5123, + "step": 30000 + }, + { + "epoch": 0.5184026817804811, + "grad_norm": 0.6503941360113702, + "learning_rate": 9.890031083336148e-06, + "loss": 0.3466, + "step": 30001 + }, + { + "epoch": 0.51841996129389, + "grad_norm": 0.9098634955942222, + "learning_rate": 9.889471467816329e-06, + "loss": 0.3083, + "step": 30002 + }, + { + "epoch": 0.5184372408072989, + "grad_norm": 0.8327276513859279, + "learning_rate": 9.888911852642688e-06, + "loss": 0.4283, + "step": 30003 + }, + { + "epoch": 0.5184545203207078, + "grad_norm": 1.4390428415019956, + "learning_rate": 9.888352237816987e-06, + "loss": 0.5648, + "step": 30004 + }, + { + "epoch": 0.5184717998341166, + "grad_norm": 0.8783012449838775, + "learning_rate": 9.887792623340974e-06, + "loss": 0.2938, + "step": 30005 + }, + { + "epoch": 0.5184890793475255, + "grad_norm": 1.0251215422236364, + "learning_rate": 9.887233009216406e-06, + "loss": 0.5118, + "step": 30006 + }, + { + "epoch": 0.5185063588609344, + "grad_norm": 0.9603303396508973, + "learning_rate": 9.886673395445032e-06, + "loss": 0.3056, + "step": 30007 + }, + { + "epoch": 0.5185236383743433, + "grad_norm": 1.2373624377124, + "learning_rate": 9.886113782028602e-06, + "loss": 0.5877, + "step": 30008 + }, + { + "epoch": 0.5185409178877523, + "grad_norm": 0.5789328753684739, + "learning_rate": 9.885554168968873e-06, + "loss": 0.6143, + "step": 30009 + }, + { + "epoch": 0.5185581974011612, + "grad_norm": 0.8905428609025217, + "learning_rate": 9.884994556267597e-06, + "loss": 0.4524, + "step": 30010 + }, + { + "epoch": 0.5185754769145701, + "grad_norm": 1.1352371423978365, + "learning_rate": 9.884434943926531e-06, + "loss": 0.3794, + "step": 30011 + }, + { + "epoch": 0.518592756427979, + "grad_norm": 1.7528796755200509, + "learning_rate": 9.88387533194742e-06, + "loss": 0.5684, + "step": 30012 + }, + { + "epoch": 0.5186100359413879, + "grad_norm": 0.9181076059032006, + "learning_rate": 9.883315720332021e-06, + "loss": 0.4348, + "step": 30013 + }, + { + "epoch": 0.5186273154547968, + "grad_norm": 1.4504217390417065, + "learning_rate": 9.882756109082085e-06, + "loss": 0.7779, + "step": 30014 + }, + { + "epoch": 0.5186445949682057, + "grad_norm": 0.8464732074045005, + "learning_rate": 9.882196498199366e-06, + "loss": 0.3721, + "step": 30015 + }, + { + "epoch": 0.5186618744816146, + "grad_norm": 0.8650032352386607, + "learning_rate": 9.88163688768562e-06, + "loss": 0.3572, + "step": 30016 + }, + { + "epoch": 0.5186791539950235, + "grad_norm": 0.848407581790271, + "learning_rate": 9.881077277542593e-06, + "loss": 0.4142, + "step": 30017 + }, + { + "epoch": 0.5186964335084324, + "grad_norm": 0.8879207253870891, + "learning_rate": 9.880517667772042e-06, + "loss": 0.3777, + "step": 30018 + }, + { + "epoch": 0.5187137130218413, + "grad_norm": 0.6974433533613933, + "learning_rate": 9.879958058375715e-06, + "loss": 0.276, + "step": 30019 + }, + { + "epoch": 0.5187309925352502, + "grad_norm": 1.2346332208025257, + "learning_rate": 9.879398449355376e-06, + "loss": 0.4908, + "step": 30020 + }, + { + "epoch": 0.5187482720486591, + "grad_norm": 0.9929580615013278, + "learning_rate": 9.878838840712762e-06, + "loss": 0.7947, + "step": 30021 + }, + { + "epoch": 0.518765551562068, + "grad_norm": 1.3282454534672619, + "learning_rate": 9.87827923244964e-06, + "loss": 0.4579, + "step": 30022 + }, + { + "epoch": 0.518782831075477, + "grad_norm": 1.1608128709798244, + "learning_rate": 9.877719624567754e-06, + "loss": 0.3515, + "step": 30023 + }, + { + "epoch": 0.5188001105888859, + "grad_norm": 1.7789475616683388, + "learning_rate": 9.87716001706886e-06, + "loss": 0.4893, + "step": 30024 + }, + { + "epoch": 0.5188173901022948, + "grad_norm": 0.805229508046721, + "learning_rate": 9.876600409954708e-06, + "loss": 0.3587, + "step": 30025 + }, + { + "epoch": 0.5188346696157036, + "grad_norm": 1.0969139331262427, + "learning_rate": 9.876040803227059e-06, + "loss": 0.3719, + "step": 30026 + }, + { + "epoch": 0.5188519491291125, + "grad_norm": 1.3518997836929685, + "learning_rate": 9.875481196887655e-06, + "loss": 0.556, + "step": 30027 + }, + { + "epoch": 0.5188692286425214, + "grad_norm": 1.5057542626843838, + "learning_rate": 9.874921590938253e-06, + "loss": 0.4869, + "step": 30028 + }, + { + "epoch": 0.5188865081559303, + "grad_norm": 1.7813340678886618, + "learning_rate": 9.874361985380609e-06, + "loss": 0.4202, + "step": 30029 + }, + { + "epoch": 0.5189037876693392, + "grad_norm": 0.9728982308529713, + "learning_rate": 9.873802380216469e-06, + "loss": 0.6289, + "step": 30030 + }, + { + "epoch": 0.5189210671827481, + "grad_norm": 0.9982263852341622, + "learning_rate": 9.873242775447593e-06, + "loss": 0.3719, + "step": 30031 + }, + { + "epoch": 0.518938346696157, + "grad_norm": 0.5596851800842719, + "learning_rate": 9.872683171075727e-06, + "loss": 0.4894, + "step": 30032 + }, + { + "epoch": 0.5189556262095659, + "grad_norm": 1.228135108911464, + "learning_rate": 9.87212356710263e-06, + "loss": 0.3052, + "step": 30033 + }, + { + "epoch": 0.5189729057229748, + "grad_norm": 1.1636479800617543, + "learning_rate": 9.871563963530048e-06, + "loss": 0.4357, + "step": 30034 + }, + { + "epoch": 0.5189901852363837, + "grad_norm": 0.552390272936583, + "learning_rate": 9.871004360359742e-06, + "loss": 0.7561, + "step": 30035 + }, + { + "epoch": 0.5190074647497926, + "grad_norm": 0.9034898357373926, + "learning_rate": 9.870444757593456e-06, + "loss": 0.32, + "step": 30036 + }, + { + "epoch": 0.5190247442632016, + "grad_norm": 1.0552227124235587, + "learning_rate": 9.86988515523295e-06, + "loss": 0.3764, + "step": 30037 + }, + { + "epoch": 0.5190420237766105, + "grad_norm": 1.0635952609665649, + "learning_rate": 9.869325553279975e-06, + "loss": 0.3403, + "step": 30038 + }, + { + "epoch": 0.5190593032900194, + "grad_norm": 1.0804981187816183, + "learning_rate": 9.868765951736278e-06, + "loss": 0.5303, + "step": 30039 + }, + { + "epoch": 0.5190765828034283, + "grad_norm": 0.7935226440103612, + "learning_rate": 9.868206350603621e-06, + "loss": 0.4689, + "step": 30040 + }, + { + "epoch": 0.5190938623168372, + "grad_norm": 0.8879952774987059, + "learning_rate": 9.867646749883743e-06, + "loss": 0.3834, + "step": 30041 + }, + { + "epoch": 0.5191111418302461, + "grad_norm": 0.8999119800391397, + "learning_rate": 9.867087149578414e-06, + "loss": 0.4404, + "step": 30042 + }, + { + "epoch": 0.519128421343655, + "grad_norm": 1.2675318436870977, + "learning_rate": 9.866527549689375e-06, + "loss": 0.6552, + "step": 30043 + }, + { + "epoch": 0.5191457008570639, + "grad_norm": 0.5748003538003467, + "learning_rate": 9.865967950218382e-06, + "loss": 0.6244, + "step": 30044 + }, + { + "epoch": 0.5191629803704728, + "grad_norm": 0.959304812991644, + "learning_rate": 9.865408351167186e-06, + "loss": 0.4618, + "step": 30045 + }, + { + "epoch": 0.5191802598838817, + "grad_norm": 0.8730543315752035, + "learning_rate": 9.864848752537546e-06, + "loss": 0.3187, + "step": 30046 + }, + { + "epoch": 0.5191975393972905, + "grad_norm": 1.0866898376705114, + "learning_rate": 9.864289154331205e-06, + "loss": 0.2941, + "step": 30047 + }, + { + "epoch": 0.5192148189106994, + "grad_norm": 0.5653354384186594, + "learning_rate": 9.863729556549923e-06, + "loss": 0.6165, + "step": 30048 + }, + { + "epoch": 0.5192320984241083, + "grad_norm": 1.0747602336051993, + "learning_rate": 9.86316995919545e-06, + "loss": 0.3508, + "step": 30049 + }, + { + "epoch": 0.5192493779375172, + "grad_norm": 0.8848306846445162, + "learning_rate": 9.862610362269537e-06, + "loss": 0.4484, + "step": 30050 + }, + { + "epoch": 0.5192666574509262, + "grad_norm": 0.6508665046471772, + "learning_rate": 9.862050765773942e-06, + "loss": 0.2551, + "step": 30051 + }, + { + "epoch": 0.5192839369643351, + "grad_norm": 1.2559790499590542, + "learning_rate": 9.861491169710412e-06, + "loss": 0.4447, + "step": 30052 + }, + { + "epoch": 0.519301216477744, + "grad_norm": 2.4721576470328563, + "learning_rate": 9.860931574080704e-06, + "loss": 0.3092, + "step": 30053 + }, + { + "epoch": 0.5193184959911529, + "grad_norm": 1.5904915059668807, + "learning_rate": 9.860371978886567e-06, + "loss": 0.3285, + "step": 30054 + }, + { + "epoch": 0.5193357755045618, + "grad_norm": 0.9116922326947755, + "learning_rate": 9.859812384129758e-06, + "loss": 0.3494, + "step": 30055 + }, + { + "epoch": 0.5193530550179707, + "grad_norm": 1.1408619222450143, + "learning_rate": 9.859252789812024e-06, + "loss": 0.4227, + "step": 30056 + }, + { + "epoch": 0.5193703345313796, + "grad_norm": 0.7760327273519522, + "learning_rate": 9.858693195935126e-06, + "loss": 0.4943, + "step": 30057 + }, + { + "epoch": 0.5193876140447885, + "grad_norm": 1.5381155325214484, + "learning_rate": 9.858133602500808e-06, + "loss": 0.4691, + "step": 30058 + }, + { + "epoch": 0.5194048935581974, + "grad_norm": 0.9210192166504477, + "learning_rate": 9.857574009510826e-06, + "loss": 0.2979, + "step": 30059 + }, + { + "epoch": 0.5194221730716063, + "grad_norm": 1.045671969725895, + "learning_rate": 9.857014416966938e-06, + "loss": 0.3782, + "step": 30060 + }, + { + "epoch": 0.5194394525850152, + "grad_norm": 1.1779619567885182, + "learning_rate": 9.856454824870886e-06, + "loss": 0.3917, + "step": 30061 + }, + { + "epoch": 0.5194567320984241, + "grad_norm": 1.1099544648041002, + "learning_rate": 9.855895233224431e-06, + "loss": 0.4804, + "step": 30062 + }, + { + "epoch": 0.519474011611833, + "grad_norm": 0.7554036640653752, + "learning_rate": 9.85533564202932e-06, + "loss": 0.3499, + "step": 30063 + }, + { + "epoch": 0.519491291125242, + "grad_norm": 1.0565038303137917, + "learning_rate": 9.854776051287312e-06, + "loss": 0.2875, + "step": 30064 + }, + { + "epoch": 0.5195085706386509, + "grad_norm": 1.1659832356305844, + "learning_rate": 9.854216461000153e-06, + "loss": 0.4362, + "step": 30065 + }, + { + "epoch": 0.5195258501520598, + "grad_norm": 1.3024779435658616, + "learning_rate": 9.853656871169605e-06, + "loss": 0.4669, + "step": 30066 + }, + { + "epoch": 0.5195431296654687, + "grad_norm": 1.01285865882837, + "learning_rate": 9.85309728179741e-06, + "loss": 0.4808, + "step": 30067 + }, + { + "epoch": 0.5195604091788775, + "grad_norm": 0.9209774485234399, + "learning_rate": 9.852537692885327e-06, + "loss": 0.3278, + "step": 30068 + }, + { + "epoch": 0.5195776886922864, + "grad_norm": 1.161496616490142, + "learning_rate": 9.851978104435108e-06, + "loss": 0.3903, + "step": 30069 + }, + { + "epoch": 0.5195949682056953, + "grad_norm": 0.730331741809346, + "learning_rate": 9.851418516448501e-06, + "loss": 0.3547, + "step": 30070 + }, + { + "epoch": 0.5196122477191042, + "grad_norm": 0.512222187867353, + "learning_rate": 9.85085892892727e-06, + "loss": 0.5297, + "step": 30071 + }, + { + "epoch": 0.5196295272325131, + "grad_norm": 1.1845669713082418, + "learning_rate": 9.850299341873153e-06, + "loss": 0.5787, + "step": 30072 + }, + { + "epoch": 0.519646806745922, + "grad_norm": 1.4123859330758646, + "learning_rate": 9.849739755287912e-06, + "loss": 0.3654, + "step": 30073 + }, + { + "epoch": 0.5196640862593309, + "grad_norm": 1.106769602917161, + "learning_rate": 9.849180169173297e-06, + "loss": 0.2154, + "step": 30074 + }, + { + "epoch": 0.5196813657727398, + "grad_norm": 1.0838072107736556, + "learning_rate": 9.848620583531062e-06, + "loss": 0.3718, + "step": 30075 + }, + { + "epoch": 0.5196986452861487, + "grad_norm": 1.3888598881932879, + "learning_rate": 9.848060998362958e-06, + "loss": 0.694, + "step": 30076 + }, + { + "epoch": 0.5197159247995576, + "grad_norm": 1.2594443383139549, + "learning_rate": 9.847501413670742e-06, + "loss": 0.3257, + "step": 30077 + }, + { + "epoch": 0.5197332043129665, + "grad_norm": 1.3074782933003113, + "learning_rate": 9.84694182945616e-06, + "loss": 0.426, + "step": 30078 + }, + { + "epoch": 0.5197504838263755, + "grad_norm": 1.0452918356601124, + "learning_rate": 9.846382245720968e-06, + "loss": 0.4779, + "step": 30079 + }, + { + "epoch": 0.5197677633397844, + "grad_norm": 0.7934777674993678, + "learning_rate": 9.845822662466921e-06, + "loss": 0.399, + "step": 30080 + }, + { + "epoch": 0.5197850428531933, + "grad_norm": 0.8820263827916193, + "learning_rate": 9.845263079695766e-06, + "loss": 0.409, + "step": 30081 + }, + { + "epoch": 0.5198023223666022, + "grad_norm": 1.3243435620173856, + "learning_rate": 9.84470349740926e-06, + "loss": 0.4765, + "step": 30082 + }, + { + "epoch": 0.5198196018800111, + "grad_norm": 0.7332746616350316, + "learning_rate": 9.844143915609152e-06, + "loss": 0.4008, + "step": 30083 + }, + { + "epoch": 0.51983688139342, + "grad_norm": 0.6567543085086173, + "learning_rate": 9.8435843342972e-06, + "loss": 0.3311, + "step": 30084 + }, + { + "epoch": 0.5198541609068289, + "grad_norm": 0.8234903148026012, + "learning_rate": 9.843024753475152e-06, + "loss": 0.2171, + "step": 30085 + }, + { + "epoch": 0.5198714404202378, + "grad_norm": 1.0913616176450962, + "learning_rate": 9.842465173144766e-06, + "loss": 0.3868, + "step": 30086 + }, + { + "epoch": 0.5198887199336467, + "grad_norm": 1.0699387499034667, + "learning_rate": 9.841905593307787e-06, + "loss": 0.4871, + "step": 30087 + }, + { + "epoch": 0.5199059994470556, + "grad_norm": 0.8643369295584965, + "learning_rate": 9.841346013965975e-06, + "loss": 0.482, + "step": 30088 + }, + { + "epoch": 0.5199232789604644, + "grad_norm": 0.9502720725726713, + "learning_rate": 9.840786435121076e-06, + "loss": 0.4933, + "step": 30089 + }, + { + "epoch": 0.5199405584738733, + "grad_norm": 0.9006790344241281, + "learning_rate": 9.840226856774847e-06, + "loss": 0.4616, + "step": 30090 + }, + { + "epoch": 0.5199578379872822, + "grad_norm": 1.9318169963771539, + "learning_rate": 9.839667278929042e-06, + "loss": 0.562, + "step": 30091 + }, + { + "epoch": 0.5199751175006911, + "grad_norm": 1.1728242654797052, + "learning_rate": 9.83910770158541e-06, + "loss": 0.4634, + "step": 30092 + }, + { + "epoch": 0.5199923970141, + "grad_norm": 0.9724547761435173, + "learning_rate": 9.838548124745704e-06, + "loss": 0.3857, + "step": 30093 + }, + { + "epoch": 0.520009676527509, + "grad_norm": 0.8186333990399061, + "learning_rate": 9.837988548411675e-06, + "loss": 0.286, + "step": 30094 + }, + { + "epoch": 0.5200269560409179, + "grad_norm": 0.9849006946369278, + "learning_rate": 9.837428972585082e-06, + "loss": 0.3106, + "step": 30095 + }, + { + "epoch": 0.5200442355543268, + "grad_norm": 0.9929416357173938, + "learning_rate": 9.836869397267672e-06, + "loss": 0.6112, + "step": 30096 + }, + { + "epoch": 0.5200615150677357, + "grad_norm": 1.0408431301007184, + "learning_rate": 9.836309822461203e-06, + "loss": 0.458, + "step": 30097 + }, + { + "epoch": 0.5200787945811446, + "grad_norm": 1.0044815893648986, + "learning_rate": 9.835750248167421e-06, + "loss": 0.4308, + "step": 30098 + }, + { + "epoch": 0.5200960740945535, + "grad_norm": 0.8828795723497279, + "learning_rate": 9.835190674388081e-06, + "loss": 0.4925, + "step": 30099 + }, + { + "epoch": 0.5201133536079624, + "grad_norm": 1.281371241214964, + "learning_rate": 9.834631101124941e-06, + "loss": 0.4309, + "step": 30100 + }, + { + "epoch": 0.5201306331213713, + "grad_norm": 1.1870915143939027, + "learning_rate": 9.834071528379742e-06, + "loss": 0.4816, + "step": 30101 + }, + { + "epoch": 0.5201479126347802, + "grad_norm": 1.4010711467283834, + "learning_rate": 9.833511956154249e-06, + "loss": 0.448, + "step": 30102 + }, + { + "epoch": 0.5201651921481891, + "grad_norm": 0.9033376138946093, + "learning_rate": 9.832952384450205e-06, + "loss": 0.4066, + "step": 30103 + }, + { + "epoch": 0.520182471661598, + "grad_norm": 0.8859544155340484, + "learning_rate": 9.832392813269371e-06, + "loss": 0.4888, + "step": 30104 + }, + { + "epoch": 0.5201997511750069, + "grad_norm": 0.752678023247872, + "learning_rate": 9.83183324261349e-06, + "loss": 0.6675, + "step": 30105 + }, + { + "epoch": 0.5202170306884158, + "grad_norm": 1.388345844800189, + "learning_rate": 9.831273672484327e-06, + "loss": 0.4647, + "step": 30106 + }, + { + "epoch": 0.5202343102018248, + "grad_norm": 1.2753696020425556, + "learning_rate": 9.830714102883623e-06, + "loss": 0.5031, + "step": 30107 + }, + { + "epoch": 0.5202515897152337, + "grad_norm": 0.918480242576431, + "learning_rate": 9.830154533813137e-06, + "loss": 0.4939, + "step": 30108 + }, + { + "epoch": 0.5202688692286426, + "grad_norm": 1.398968242188165, + "learning_rate": 9.829594965274618e-06, + "loss": 0.5533, + "step": 30109 + }, + { + "epoch": 0.5202861487420514, + "grad_norm": 0.5999539085054769, + "learning_rate": 9.829035397269822e-06, + "loss": 0.6773, + "step": 30110 + }, + { + "epoch": 0.5203034282554603, + "grad_norm": 1.8388918558847662, + "learning_rate": 9.828475829800503e-06, + "loss": 0.5021, + "step": 30111 + }, + { + "epoch": 0.5203207077688692, + "grad_norm": 0.9288548665620944, + "learning_rate": 9.827916262868405e-06, + "loss": 0.5555, + "step": 30112 + }, + { + "epoch": 0.5203379872822781, + "grad_norm": 0.9158512498890089, + "learning_rate": 9.82735669647529e-06, + "loss": 0.3263, + "step": 30113 + }, + { + "epoch": 0.520355266795687, + "grad_norm": 0.9213501594023522, + "learning_rate": 9.826797130622905e-06, + "loss": 0.3757, + "step": 30114 + }, + { + "epoch": 0.5203725463090959, + "grad_norm": 1.504360566650026, + "learning_rate": 9.826237565313006e-06, + "loss": 0.3998, + "step": 30115 + }, + { + "epoch": 0.5203898258225048, + "grad_norm": 1.340634027949882, + "learning_rate": 9.825678000547342e-06, + "loss": 0.3763, + "step": 30116 + }, + { + "epoch": 0.5204071053359137, + "grad_norm": 0.9387366207674551, + "learning_rate": 9.825118436327674e-06, + "loss": 0.5708, + "step": 30117 + }, + { + "epoch": 0.5204243848493226, + "grad_norm": 0.9205929352718797, + "learning_rate": 9.82455887265574e-06, + "loss": 0.4112, + "step": 30118 + }, + { + "epoch": 0.5204416643627315, + "grad_norm": 0.8771063912504348, + "learning_rate": 9.823999309533307e-06, + "loss": 0.3795, + "step": 30119 + }, + { + "epoch": 0.5204589438761404, + "grad_norm": 0.6424433804409512, + "learning_rate": 9.823439746962122e-06, + "loss": 0.4241, + "step": 30120 + }, + { + "epoch": 0.5204762233895494, + "grad_norm": 1.1137567847281427, + "learning_rate": 9.822880184943932e-06, + "loss": 0.4247, + "step": 30121 + }, + { + "epoch": 0.5204935029029583, + "grad_norm": 0.651832790481075, + "learning_rate": 9.822320623480497e-06, + "loss": 0.3189, + "step": 30122 + }, + { + "epoch": 0.5205107824163672, + "grad_norm": 1.4621306162519299, + "learning_rate": 9.821761062573568e-06, + "loss": 0.4149, + "step": 30123 + }, + { + "epoch": 0.5205280619297761, + "grad_norm": 0.7927963334833286, + "learning_rate": 9.821201502224896e-06, + "loss": 0.2154, + "step": 30124 + }, + { + "epoch": 0.520545341443185, + "grad_norm": 1.5380180175423228, + "learning_rate": 9.820641942436232e-06, + "loss": 0.3095, + "step": 30125 + }, + { + "epoch": 0.5205626209565939, + "grad_norm": 1.274824939010592, + "learning_rate": 9.820082383209339e-06, + "loss": 0.431, + "step": 30126 + }, + { + "epoch": 0.5205799004700028, + "grad_norm": 0.78243663914189, + "learning_rate": 9.819522824545954e-06, + "loss": 0.3315, + "step": 30127 + }, + { + "epoch": 0.5205971799834117, + "grad_norm": 0.9023865053482651, + "learning_rate": 9.818963266447841e-06, + "loss": 0.4167, + "step": 30128 + }, + { + "epoch": 0.5206144594968206, + "grad_norm": 0.5130949371598299, + "learning_rate": 9.818403708916747e-06, + "loss": 0.495, + "step": 30129 + }, + { + "epoch": 0.5206317390102295, + "grad_norm": 1.411774176605971, + "learning_rate": 9.817844151954426e-06, + "loss": 0.4214, + "step": 30130 + }, + { + "epoch": 0.5206490185236384, + "grad_norm": 1.089027631286902, + "learning_rate": 9.817284595562636e-06, + "loss": 0.3671, + "step": 30131 + }, + { + "epoch": 0.5206662980370472, + "grad_norm": 1.2060077407541343, + "learning_rate": 9.816725039743118e-06, + "loss": 0.4755, + "step": 30132 + }, + { + "epoch": 0.5206835775504561, + "grad_norm": 0.9734566285632212, + "learning_rate": 9.816165484497633e-06, + "loss": 0.2508, + "step": 30133 + }, + { + "epoch": 0.520700857063865, + "grad_norm": 0.9207147965005212, + "learning_rate": 9.815605929827931e-06, + "loss": 0.5155, + "step": 30134 + }, + { + "epoch": 0.520718136577274, + "grad_norm": 1.0380931409245906, + "learning_rate": 9.815046375735768e-06, + "loss": 0.3401, + "step": 30135 + }, + { + "epoch": 0.5207354160906829, + "grad_norm": 0.8033886703228729, + "learning_rate": 9.81448682222289e-06, + "loss": 0.2812, + "step": 30136 + }, + { + "epoch": 0.5207526956040918, + "grad_norm": 0.6467984038889293, + "learning_rate": 9.813927269291059e-06, + "loss": 0.4513, + "step": 30137 + }, + { + "epoch": 0.5207699751175007, + "grad_norm": 0.6739622360518825, + "learning_rate": 9.813367716942017e-06, + "loss": 0.4493, + "step": 30138 + }, + { + "epoch": 0.5207872546309096, + "grad_norm": 1.37886583504356, + "learning_rate": 9.812808165177524e-06, + "loss": 0.2645, + "step": 30139 + }, + { + "epoch": 0.5208045341443185, + "grad_norm": 0.656493861601918, + "learning_rate": 9.812248613999332e-06, + "loss": 0.5276, + "step": 30140 + }, + { + "epoch": 0.5208218136577274, + "grad_norm": 0.97205070655657, + "learning_rate": 9.811689063409186e-06, + "loss": 0.5411, + "step": 30141 + }, + { + "epoch": 0.5208390931711363, + "grad_norm": 1.5431788417733439, + "learning_rate": 9.811129513408847e-06, + "loss": 0.3974, + "step": 30142 + }, + { + "epoch": 0.5208563726845452, + "grad_norm": 1.4270929253448288, + "learning_rate": 9.810569964000065e-06, + "loss": 0.3944, + "step": 30143 + }, + { + "epoch": 0.5208736521979541, + "grad_norm": 0.7329100645918986, + "learning_rate": 9.810010415184592e-06, + "loss": 0.3122, + "step": 30144 + }, + { + "epoch": 0.520890931711363, + "grad_norm": 0.8523506321736714, + "learning_rate": 9.80945086696418e-06, + "loss": 0.5625, + "step": 30145 + }, + { + "epoch": 0.5209082112247719, + "grad_norm": 1.0522364606112984, + "learning_rate": 9.808891319340585e-06, + "loss": 0.4524, + "step": 30146 + }, + { + "epoch": 0.5209254907381808, + "grad_norm": 1.0830238297366341, + "learning_rate": 9.808331772315553e-06, + "loss": 0.4434, + "step": 30147 + }, + { + "epoch": 0.5209427702515897, + "grad_norm": 1.2407188069702342, + "learning_rate": 9.807772225890844e-06, + "loss": 0.4069, + "step": 30148 + }, + { + "epoch": 0.5209600497649987, + "grad_norm": 0.9459205379517517, + "learning_rate": 9.807212680068204e-06, + "loss": 0.4143, + "step": 30149 + }, + { + "epoch": 0.5209773292784076, + "grad_norm": 1.223896007899836, + "learning_rate": 9.80665313484939e-06, + "loss": 0.3935, + "step": 30150 + }, + { + "epoch": 0.5209946087918165, + "grad_norm": 0.9082296343381782, + "learning_rate": 9.806093590236156e-06, + "loss": 0.2941, + "step": 30151 + }, + { + "epoch": 0.5210118883052254, + "grad_norm": 0.5107818750846234, + "learning_rate": 9.805534046230248e-06, + "loss": 0.6558, + "step": 30152 + }, + { + "epoch": 0.5210291678186342, + "grad_norm": 0.8351537848381392, + "learning_rate": 9.804974502833424e-06, + "loss": 0.572, + "step": 30153 + }, + { + "epoch": 0.5210464473320431, + "grad_norm": 0.7807187874357578, + "learning_rate": 9.804414960047431e-06, + "loss": 0.3511, + "step": 30154 + }, + { + "epoch": 0.521063726845452, + "grad_norm": 1.2322784534148277, + "learning_rate": 9.803855417874028e-06, + "loss": 0.4133, + "step": 30155 + }, + { + "epoch": 0.5210810063588609, + "grad_norm": 0.9484935956999974, + "learning_rate": 9.803295876314964e-06, + "loss": 0.5319, + "step": 30156 + }, + { + "epoch": 0.5210982858722698, + "grad_norm": 0.6981779019990056, + "learning_rate": 9.802736335371994e-06, + "loss": 0.4246, + "step": 30157 + }, + { + "epoch": 0.5211155653856787, + "grad_norm": 0.9459359551920651, + "learning_rate": 9.802176795046866e-06, + "loss": 0.4262, + "step": 30158 + }, + { + "epoch": 0.5211328448990876, + "grad_norm": 0.7354076900274059, + "learning_rate": 9.801617255341337e-06, + "loss": 0.4832, + "step": 30159 + }, + { + "epoch": 0.5211501244124965, + "grad_norm": 1.9087461058438233, + "learning_rate": 9.801057716257161e-06, + "loss": 0.3956, + "step": 30160 + }, + { + "epoch": 0.5211674039259054, + "grad_norm": 0.8118686167797832, + "learning_rate": 9.800498177796083e-06, + "loss": 0.2769, + "step": 30161 + }, + { + "epoch": 0.5211846834393143, + "grad_norm": 0.7061321293449382, + "learning_rate": 9.799938639959861e-06, + "loss": 0.4294, + "step": 30162 + }, + { + "epoch": 0.5212019629527233, + "grad_norm": 0.8411071390155749, + "learning_rate": 9.799379102750245e-06, + "loss": 0.3761, + "step": 30163 + }, + { + "epoch": 0.5212192424661322, + "grad_norm": 1.0338117440964891, + "learning_rate": 9.79881956616899e-06, + "loss": 0.4308, + "step": 30164 + }, + { + "epoch": 0.5212365219795411, + "grad_norm": 0.5148471940510858, + "learning_rate": 9.798260030217846e-06, + "loss": 0.3004, + "step": 30165 + }, + { + "epoch": 0.52125380149295, + "grad_norm": 0.9514452260652839, + "learning_rate": 9.797700494898572e-06, + "loss": 0.3309, + "step": 30166 + }, + { + "epoch": 0.5212710810063589, + "grad_norm": 0.7603955119362621, + "learning_rate": 9.79714096021291e-06, + "loss": 0.3083, + "step": 30167 + }, + { + "epoch": 0.5212883605197678, + "grad_norm": 0.7993442624543148, + "learning_rate": 9.796581426162622e-06, + "loss": 0.4072, + "step": 30168 + }, + { + "epoch": 0.5213056400331767, + "grad_norm": 0.5669668818273808, + "learning_rate": 9.796021892749451e-06, + "loss": 0.56, + "step": 30169 + }, + { + "epoch": 0.5213229195465856, + "grad_norm": 1.0696805496251718, + "learning_rate": 9.79546235997516e-06, + "loss": 0.374, + "step": 30170 + }, + { + "epoch": 0.5213401990599945, + "grad_norm": 0.9390563091511337, + "learning_rate": 9.794902827841497e-06, + "loss": 0.5794, + "step": 30171 + }, + { + "epoch": 0.5213574785734034, + "grad_norm": 1.0205095340586627, + "learning_rate": 9.794343296350209e-06, + "loss": 0.3785, + "step": 30172 + }, + { + "epoch": 0.5213747580868123, + "grad_norm": 1.2273105225185155, + "learning_rate": 9.793783765503058e-06, + "loss": 0.3746, + "step": 30173 + }, + { + "epoch": 0.5213920376002211, + "grad_norm": 2.0879319498321047, + "learning_rate": 9.793224235301787e-06, + "loss": 0.3461, + "step": 30174 + }, + { + "epoch": 0.52140931711363, + "grad_norm": 0.7758774567183018, + "learning_rate": 9.792664705748157e-06, + "loss": 0.3473, + "step": 30175 + }, + { + "epoch": 0.5214265966270389, + "grad_norm": 0.715239179084827, + "learning_rate": 9.792105176843916e-06, + "loss": 0.4319, + "step": 30176 + }, + { + "epoch": 0.5214438761404478, + "grad_norm": 1.127000946081766, + "learning_rate": 9.79154564859082e-06, + "loss": 0.5667, + "step": 30177 + }, + { + "epoch": 0.5214611556538568, + "grad_norm": 0.7251456666212313, + "learning_rate": 9.790986120990615e-06, + "loss": 0.3386, + "step": 30178 + }, + { + "epoch": 0.5214784351672657, + "grad_norm": 0.7023892147256726, + "learning_rate": 9.79042659404506e-06, + "loss": 0.3033, + "step": 30179 + }, + { + "epoch": 0.5214957146806746, + "grad_norm": 1.2629474689969205, + "learning_rate": 9.7898670677559e-06, + "loss": 0.3758, + "step": 30180 + }, + { + "epoch": 0.5215129941940835, + "grad_norm": 1.0613123592058855, + "learning_rate": 9.7893075421249e-06, + "loss": 0.4265, + "step": 30181 + }, + { + "epoch": 0.5215302737074924, + "grad_norm": 1.0542245987488366, + "learning_rate": 9.788748017153803e-06, + "loss": 0.4087, + "step": 30182 + }, + { + "epoch": 0.5215475532209013, + "grad_norm": 1.127593956823459, + "learning_rate": 9.788188492844358e-06, + "loss": 0.3679, + "step": 30183 + }, + { + "epoch": 0.5215648327343102, + "grad_norm": 0.8549064261719818, + "learning_rate": 9.787628969198326e-06, + "loss": 0.3586, + "step": 30184 + }, + { + "epoch": 0.5215821122477191, + "grad_norm": 0.884200872896567, + "learning_rate": 9.787069446217455e-06, + "loss": 0.6721, + "step": 30185 + }, + { + "epoch": 0.521599391761128, + "grad_norm": 0.7073191033256744, + "learning_rate": 9.786509923903503e-06, + "loss": 0.5116, + "step": 30186 + }, + { + "epoch": 0.5216166712745369, + "grad_norm": 1.1007726416857568, + "learning_rate": 9.785950402258212e-06, + "loss": 0.3016, + "step": 30187 + }, + { + "epoch": 0.5216339507879458, + "grad_norm": 0.8011193137213902, + "learning_rate": 9.785390881283345e-06, + "loss": 0.3761, + "step": 30188 + }, + { + "epoch": 0.5216512303013547, + "grad_norm": 0.8314778773299468, + "learning_rate": 9.784831360980647e-06, + "loss": 0.331, + "step": 30189 + }, + { + "epoch": 0.5216685098147636, + "grad_norm": 0.9799060515361921, + "learning_rate": 9.784271841351875e-06, + "loss": 0.4051, + "step": 30190 + }, + { + "epoch": 0.5216857893281726, + "grad_norm": 0.5552726559296917, + "learning_rate": 9.783712322398784e-06, + "loss": 0.7315, + "step": 30191 + }, + { + "epoch": 0.5217030688415815, + "grad_norm": 0.8153697491429167, + "learning_rate": 9.783152804123115e-06, + "loss": 0.3359, + "step": 30192 + }, + { + "epoch": 0.5217203483549904, + "grad_norm": 1.1919680399183097, + "learning_rate": 9.782593286526632e-06, + "loss": 0.499, + "step": 30193 + }, + { + "epoch": 0.5217376278683993, + "grad_norm": 1.3333581626625024, + "learning_rate": 9.78203376961108e-06, + "loss": 0.3631, + "step": 30194 + }, + { + "epoch": 0.5217549073818081, + "grad_norm": 1.0707748894382556, + "learning_rate": 9.781474253378219e-06, + "loss": 0.473, + "step": 30195 + }, + { + "epoch": 0.521772186895217, + "grad_norm": 1.276732593553177, + "learning_rate": 9.780914737829794e-06, + "loss": 0.5341, + "step": 30196 + }, + { + "epoch": 0.5217894664086259, + "grad_norm": 1.509597446391207, + "learning_rate": 9.780355222967567e-06, + "loss": 0.5569, + "step": 30197 + }, + { + "epoch": 0.5218067459220348, + "grad_norm": 1.5641326881367978, + "learning_rate": 9.779795708793278e-06, + "loss": 0.511, + "step": 30198 + }, + { + "epoch": 0.5218240254354437, + "grad_norm": 0.903265074803211, + "learning_rate": 9.779236195308687e-06, + "loss": 0.2923, + "step": 30199 + }, + { + "epoch": 0.5218413049488526, + "grad_norm": 0.8966007698404966, + "learning_rate": 9.778676682515543e-06, + "loss": 0.4371, + "step": 30200 + }, + { + "epoch": 0.5218585844622615, + "grad_norm": 1.1746523692820572, + "learning_rate": 9.778117170415606e-06, + "loss": 0.2993, + "step": 30201 + }, + { + "epoch": 0.5218758639756704, + "grad_norm": 0.9951765547806499, + "learning_rate": 9.77755765901062e-06, + "loss": 0.2752, + "step": 30202 + }, + { + "epoch": 0.5218931434890793, + "grad_norm": 0.9032664208365243, + "learning_rate": 9.776998148302338e-06, + "loss": 0.509, + "step": 30203 + }, + { + "epoch": 0.5219104230024882, + "grad_norm": 5.550182687820728, + "learning_rate": 9.776438638292517e-06, + "loss": 0.9385, + "step": 30204 + }, + { + "epoch": 0.5219277025158972, + "grad_norm": 1.1359540708652733, + "learning_rate": 9.775879128982906e-06, + "loss": 0.5208, + "step": 30205 + }, + { + "epoch": 0.5219449820293061, + "grad_norm": 1.2435366143419386, + "learning_rate": 9.775319620375262e-06, + "loss": 0.4819, + "step": 30206 + }, + { + "epoch": 0.521962261542715, + "grad_norm": 0.8231800228878191, + "learning_rate": 9.774760112471329e-06, + "loss": 0.4259, + "step": 30207 + }, + { + "epoch": 0.5219795410561239, + "grad_norm": 1.1427107346993284, + "learning_rate": 9.774200605272868e-06, + "loss": 0.4724, + "step": 30208 + }, + { + "epoch": 0.5219968205695328, + "grad_norm": 1.254772172817154, + "learning_rate": 9.773641098781624e-06, + "loss": 0.6297, + "step": 30209 + }, + { + "epoch": 0.5220141000829417, + "grad_norm": 1.001394395625397, + "learning_rate": 9.773081592999356e-06, + "loss": 0.4215, + "step": 30210 + }, + { + "epoch": 0.5220313795963506, + "grad_norm": 0.9363145535542811, + "learning_rate": 9.772522087927814e-06, + "loss": 0.502, + "step": 30211 + }, + { + "epoch": 0.5220486591097595, + "grad_norm": 1.3187681018988466, + "learning_rate": 9.771962583568751e-06, + "loss": 0.6483, + "step": 30212 + }, + { + "epoch": 0.5220659386231684, + "grad_norm": 1.09868222414997, + "learning_rate": 9.771403079923919e-06, + "loss": 0.4849, + "step": 30213 + }, + { + "epoch": 0.5220832181365773, + "grad_norm": 0.8761067583665118, + "learning_rate": 9.770843576995067e-06, + "loss": 0.4276, + "step": 30214 + }, + { + "epoch": 0.5221004976499862, + "grad_norm": 1.1085594821672482, + "learning_rate": 9.770284074783951e-06, + "loss": 0.3565, + "step": 30215 + }, + { + "epoch": 0.522117777163395, + "grad_norm": 1.3533212714886411, + "learning_rate": 9.769724573292322e-06, + "loss": 0.2726, + "step": 30216 + }, + { + "epoch": 0.5221350566768039, + "grad_norm": 0.589651646777901, + "learning_rate": 9.769165072521938e-06, + "loss": 0.2606, + "step": 30217 + }, + { + "epoch": 0.5221523361902128, + "grad_norm": 1.2387294287691382, + "learning_rate": 9.768605572474541e-06, + "loss": 0.4256, + "step": 30218 + }, + { + "epoch": 0.5221696157036217, + "grad_norm": 0.6051893660125127, + "learning_rate": 9.768046073151891e-06, + "loss": 0.731, + "step": 30219 + }, + { + "epoch": 0.5221868952170307, + "grad_norm": 1.039911540239996, + "learning_rate": 9.767486574555737e-06, + "loss": 0.4733, + "step": 30220 + }, + { + "epoch": 0.5222041747304396, + "grad_norm": 1.275372360475729, + "learning_rate": 9.766927076687837e-06, + "loss": 0.37, + "step": 30221 + }, + { + "epoch": 0.5222214542438485, + "grad_norm": 1.4162122393284873, + "learning_rate": 9.766367579549936e-06, + "loss": 0.5334, + "step": 30222 + }, + { + "epoch": 0.5222387337572574, + "grad_norm": 1.0207968251729431, + "learning_rate": 9.76580808314379e-06, + "loss": 0.3046, + "step": 30223 + }, + { + "epoch": 0.5222560132706663, + "grad_norm": 1.203980400915767, + "learning_rate": 9.76524858747115e-06, + "loss": 0.3871, + "step": 30224 + }, + { + "epoch": 0.5222732927840752, + "grad_norm": 1.0788253737796192, + "learning_rate": 9.764689092533767e-06, + "loss": 0.4272, + "step": 30225 + }, + { + "epoch": 0.5222905722974841, + "grad_norm": 1.2984600502604684, + "learning_rate": 9.764129598333401e-06, + "loss": 0.4395, + "step": 30226 + }, + { + "epoch": 0.522307851810893, + "grad_norm": 1.0731409375249346, + "learning_rate": 9.763570104871795e-06, + "loss": 0.372, + "step": 30227 + }, + { + "epoch": 0.5223251313243019, + "grad_norm": 1.00556619613539, + "learning_rate": 9.763010612150708e-06, + "loss": 0.3224, + "step": 30228 + }, + { + "epoch": 0.5223424108377108, + "grad_norm": 0.8587860639693065, + "learning_rate": 9.762451120171886e-06, + "loss": 0.6337, + "step": 30229 + }, + { + "epoch": 0.5223596903511197, + "grad_norm": 1.3114470992586227, + "learning_rate": 9.761891628937087e-06, + "loss": 0.4675, + "step": 30230 + }, + { + "epoch": 0.5223769698645286, + "grad_norm": 1.1050347085996932, + "learning_rate": 9.761332138448062e-06, + "loss": 0.4707, + "step": 30231 + }, + { + "epoch": 0.5223942493779375, + "grad_norm": 1.2580702603043599, + "learning_rate": 9.760772648706566e-06, + "loss": 0.4551, + "step": 30232 + }, + { + "epoch": 0.5224115288913465, + "grad_norm": 1.8300981865373553, + "learning_rate": 9.760213159714346e-06, + "loss": 0.5563, + "step": 30233 + }, + { + "epoch": 0.5224288084047554, + "grad_norm": 0.9163416880152344, + "learning_rate": 9.759653671473154e-06, + "loss": 0.5486, + "step": 30234 + }, + { + "epoch": 0.5224460879181643, + "grad_norm": 1.0610528245841977, + "learning_rate": 9.759094183984748e-06, + "loss": 0.3464, + "step": 30235 + }, + { + "epoch": 0.5224633674315732, + "grad_norm": 0.711660109783663, + "learning_rate": 9.758534697250874e-06, + "loss": 0.3696, + "step": 30236 + }, + { + "epoch": 0.522480646944982, + "grad_norm": 1.0649709960023217, + "learning_rate": 9.757975211273292e-06, + "loss": 0.4429, + "step": 30237 + }, + { + "epoch": 0.5224979264583909, + "grad_norm": 1.08486259344462, + "learning_rate": 9.757415726053747e-06, + "loss": 0.4636, + "step": 30238 + }, + { + "epoch": 0.5225152059717998, + "grad_norm": 1.1947407133756018, + "learning_rate": 9.756856241593996e-06, + "loss": 0.5273, + "step": 30239 + }, + { + "epoch": 0.5225324854852087, + "grad_norm": 0.8108273780370329, + "learning_rate": 9.756296757895787e-06, + "loss": 0.3874, + "step": 30240 + }, + { + "epoch": 0.5225497649986176, + "grad_norm": 0.9325819508940918, + "learning_rate": 9.755737274960881e-06, + "loss": 0.2903, + "step": 30241 + }, + { + "epoch": 0.5225670445120265, + "grad_norm": 0.844916504383594, + "learning_rate": 9.755177792791017e-06, + "loss": 0.3877, + "step": 30242 + }, + { + "epoch": 0.5225843240254354, + "grad_norm": 1.0112565675415115, + "learning_rate": 9.75461831138796e-06, + "loss": 0.4115, + "step": 30243 + }, + { + "epoch": 0.5226016035388443, + "grad_norm": 1.1029638364569325, + "learning_rate": 9.754058830753456e-06, + "loss": 0.367, + "step": 30244 + }, + { + "epoch": 0.5226188830522532, + "grad_norm": 0.8529967108897667, + "learning_rate": 9.753499350889256e-06, + "loss": 0.4912, + "step": 30245 + }, + { + "epoch": 0.5226361625656621, + "grad_norm": 0.678556634777173, + "learning_rate": 9.752939871797119e-06, + "loss": 0.5533, + "step": 30246 + }, + { + "epoch": 0.522653442079071, + "grad_norm": 1.2427400862158324, + "learning_rate": 9.75238039347879e-06, + "loss": 0.2863, + "step": 30247 + }, + { + "epoch": 0.52267072159248, + "grad_norm": 0.5963362324170578, + "learning_rate": 9.751820915936025e-06, + "loss": 0.6836, + "step": 30248 + }, + { + "epoch": 0.5226880011058889, + "grad_norm": 0.6700616372843056, + "learning_rate": 9.751261439170573e-06, + "loss": 0.4072, + "step": 30249 + }, + { + "epoch": 0.5227052806192978, + "grad_norm": 0.8920482063824167, + "learning_rate": 9.750701963184194e-06, + "loss": 0.3874, + "step": 30250 + }, + { + "epoch": 0.5227225601327067, + "grad_norm": 1.1639744302346946, + "learning_rate": 9.750142487978632e-06, + "loss": 0.3767, + "step": 30251 + }, + { + "epoch": 0.5227398396461156, + "grad_norm": 0.7281632634359394, + "learning_rate": 9.749583013555647e-06, + "loss": 0.2411, + "step": 30252 + }, + { + "epoch": 0.5227571191595245, + "grad_norm": 0.49826073804845605, + "learning_rate": 9.749023539916984e-06, + "loss": 0.5811, + "step": 30253 + }, + { + "epoch": 0.5227743986729334, + "grad_norm": 0.6619872202036781, + "learning_rate": 9.748464067064397e-06, + "loss": 0.2384, + "step": 30254 + }, + { + "epoch": 0.5227916781863423, + "grad_norm": 0.8242104120051444, + "learning_rate": 9.747904594999642e-06, + "loss": 0.3776, + "step": 30255 + }, + { + "epoch": 0.5228089576997512, + "grad_norm": 0.9349282053470571, + "learning_rate": 9.747345123724467e-06, + "loss": 0.5506, + "step": 30256 + }, + { + "epoch": 0.5228262372131601, + "grad_norm": 0.9699747320438675, + "learning_rate": 9.746785653240628e-06, + "loss": 0.4125, + "step": 30257 + }, + { + "epoch": 0.522843516726569, + "grad_norm": 1.2202095001624942, + "learning_rate": 9.746226183549874e-06, + "loss": 0.4481, + "step": 30258 + }, + { + "epoch": 0.5228607962399778, + "grad_norm": 1.2628666010967908, + "learning_rate": 9.74566671465396e-06, + "loss": 0.2604, + "step": 30259 + }, + { + "epoch": 0.5228780757533867, + "grad_norm": 0.9074698629266771, + "learning_rate": 9.745107246554637e-06, + "loss": 0.4799, + "step": 30260 + }, + { + "epoch": 0.5228953552667956, + "grad_norm": 0.7461630388742577, + "learning_rate": 9.744547779253659e-06, + "loss": 0.5708, + "step": 30261 + }, + { + "epoch": 0.5229126347802046, + "grad_norm": 0.709901829449689, + "learning_rate": 9.743988312752771e-06, + "loss": 0.3212, + "step": 30262 + }, + { + "epoch": 0.5229299142936135, + "grad_norm": 0.97163342921881, + "learning_rate": 9.743428847053739e-06, + "loss": 0.4606, + "step": 30263 + }, + { + "epoch": 0.5229471938070224, + "grad_norm": 0.9696567450816727, + "learning_rate": 9.742869382158304e-06, + "loss": 0.384, + "step": 30264 + }, + { + "epoch": 0.5229644733204313, + "grad_norm": 1.1427240111655168, + "learning_rate": 9.74230991806822e-06, + "loss": 0.3575, + "step": 30265 + }, + { + "epoch": 0.5229817528338402, + "grad_norm": 0.5908007039836081, + "learning_rate": 9.741750454785244e-06, + "loss": 0.6738, + "step": 30266 + }, + { + "epoch": 0.5229990323472491, + "grad_norm": 0.8916680358833492, + "learning_rate": 9.741190992311122e-06, + "loss": 0.3672, + "step": 30267 + }, + { + "epoch": 0.523016311860658, + "grad_norm": 0.661124244212625, + "learning_rate": 9.740631530647611e-06, + "loss": 0.499, + "step": 30268 + }, + { + "epoch": 0.5230335913740669, + "grad_norm": 1.2832896032505108, + "learning_rate": 9.740072069796462e-06, + "loss": 0.5769, + "step": 30269 + }, + { + "epoch": 0.5230508708874758, + "grad_norm": 0.8861710398469949, + "learning_rate": 9.739512609759427e-06, + "loss": 0.3101, + "step": 30270 + }, + { + "epoch": 0.5230681504008847, + "grad_norm": 1.1508564296142076, + "learning_rate": 9.738953150538255e-06, + "loss": 0.3365, + "step": 30271 + }, + { + "epoch": 0.5230854299142936, + "grad_norm": 1.1164754083167772, + "learning_rate": 9.738393692134708e-06, + "loss": 0.4345, + "step": 30272 + }, + { + "epoch": 0.5231027094277025, + "grad_norm": 1.3064289083980123, + "learning_rate": 9.737834234550526e-06, + "loss": 0.3386, + "step": 30273 + }, + { + "epoch": 0.5231199889411114, + "grad_norm": 0.5694567397629487, + "learning_rate": 9.73727477778747e-06, + "loss": 0.5549, + "step": 30274 + }, + { + "epoch": 0.5231372684545204, + "grad_norm": 1.2767446505598645, + "learning_rate": 9.736715321847289e-06, + "loss": 0.7021, + "step": 30275 + }, + { + "epoch": 0.5231545479679293, + "grad_norm": 1.0280033155234476, + "learning_rate": 9.736155866731734e-06, + "loss": 0.4704, + "step": 30276 + }, + { + "epoch": 0.5231718274813382, + "grad_norm": 0.7714954083517958, + "learning_rate": 9.735596412442564e-06, + "loss": 0.7117, + "step": 30277 + }, + { + "epoch": 0.5231891069947471, + "grad_norm": 0.8947275749607561, + "learning_rate": 9.73503695898152e-06, + "loss": 0.3483, + "step": 30278 + }, + { + "epoch": 0.523206386508156, + "grad_norm": 0.9001825887550338, + "learning_rate": 9.734477506350363e-06, + "loss": 0.4071, + "step": 30279 + }, + { + "epoch": 0.5232236660215648, + "grad_norm": 1.014454166586532, + "learning_rate": 9.733918054550842e-06, + "loss": 0.5171, + "step": 30280 + }, + { + "epoch": 0.5232409455349737, + "grad_norm": 1.139064700686401, + "learning_rate": 9.733358603584713e-06, + "loss": 0.4002, + "step": 30281 + }, + { + "epoch": 0.5232582250483826, + "grad_norm": 0.8518655888654336, + "learning_rate": 9.732799153453718e-06, + "loss": 0.3462, + "step": 30282 + }, + { + "epoch": 0.5232755045617915, + "grad_norm": 1.179580315946515, + "learning_rate": 9.732239704159624e-06, + "loss": 0.3462, + "step": 30283 + }, + { + "epoch": 0.5232927840752004, + "grad_norm": 2.263528874555401, + "learning_rate": 9.731680255704174e-06, + "loss": 0.5703, + "step": 30284 + }, + { + "epoch": 0.5233100635886093, + "grad_norm": 0.6309573071236082, + "learning_rate": 9.731120808089117e-06, + "loss": 0.5859, + "step": 30285 + }, + { + "epoch": 0.5233273431020182, + "grad_norm": 1.08217110773309, + "learning_rate": 9.730561361316216e-06, + "loss": 0.3667, + "step": 30286 + }, + { + "epoch": 0.5233446226154271, + "grad_norm": 0.9155953698416255, + "learning_rate": 9.730001915387215e-06, + "loss": 0.4688, + "step": 30287 + }, + { + "epoch": 0.523361902128836, + "grad_norm": 1.3759485720717857, + "learning_rate": 9.729442470303867e-06, + "loss": 0.4247, + "step": 30288 + }, + { + "epoch": 0.523379181642245, + "grad_norm": 0.8690969472902833, + "learning_rate": 9.728883026067926e-06, + "loss": 0.3726, + "step": 30289 + }, + { + "epoch": 0.5233964611556539, + "grad_norm": 1.0352508656980117, + "learning_rate": 9.728323582681145e-06, + "loss": 0.4548, + "step": 30290 + }, + { + "epoch": 0.5234137406690628, + "grad_norm": 1.0695738031716742, + "learning_rate": 9.727764140145274e-06, + "loss": 0.6406, + "step": 30291 + }, + { + "epoch": 0.5234310201824717, + "grad_norm": 0.5064580036018183, + "learning_rate": 9.727204698462069e-06, + "loss": 0.6524, + "step": 30292 + }, + { + "epoch": 0.5234482996958806, + "grad_norm": 0.816368013573375, + "learning_rate": 9.726645257633278e-06, + "loss": 0.4905, + "step": 30293 + }, + { + "epoch": 0.5234655792092895, + "grad_norm": 1.158242228734521, + "learning_rate": 9.726085817660654e-06, + "loss": 0.5994, + "step": 30294 + }, + { + "epoch": 0.5234828587226984, + "grad_norm": 1.035630062805006, + "learning_rate": 9.725526378545953e-06, + "loss": 0.6758, + "step": 30295 + }, + { + "epoch": 0.5235001382361073, + "grad_norm": 0.8598692256116052, + "learning_rate": 9.72496694029092e-06, + "loss": 0.4006, + "step": 30296 + }, + { + "epoch": 0.5235174177495162, + "grad_norm": 0.7384217446310776, + "learning_rate": 9.724407502897317e-06, + "loss": 0.3618, + "step": 30297 + }, + { + "epoch": 0.5235346972629251, + "grad_norm": 1.0098266752646865, + "learning_rate": 9.723848066366884e-06, + "loss": 0.3728, + "step": 30298 + }, + { + "epoch": 0.523551976776334, + "grad_norm": 1.1347937355631188, + "learning_rate": 9.723288630701384e-06, + "loss": 0.3767, + "step": 30299 + }, + { + "epoch": 0.5235692562897429, + "grad_norm": 0.9235020955073022, + "learning_rate": 9.722729195902563e-06, + "loss": 0.4113, + "step": 30300 + }, + { + "epoch": 0.5235865358031517, + "grad_norm": 0.8734578876011962, + "learning_rate": 9.72216976197218e-06, + "loss": 0.5103, + "step": 30301 + }, + { + "epoch": 0.5236038153165606, + "grad_norm": 0.7935922641205944, + "learning_rate": 9.721610328911973e-06, + "loss": 0.4625, + "step": 30302 + }, + { + "epoch": 0.5236210948299695, + "grad_norm": 1.3818846580857624, + "learning_rate": 9.721050896723712e-06, + "loss": 0.4523, + "step": 30303 + }, + { + "epoch": 0.5236383743433785, + "grad_norm": 1.2009018323582243, + "learning_rate": 9.720491465409139e-06, + "loss": 0.3623, + "step": 30304 + }, + { + "epoch": 0.5236556538567874, + "grad_norm": 0.7425723071409949, + "learning_rate": 9.719932034970005e-06, + "loss": 0.4583, + "step": 30305 + }, + { + "epoch": 0.5236729333701963, + "grad_norm": 0.49944433662965154, + "learning_rate": 9.71937260540807e-06, + "loss": 0.8761, + "step": 30306 + }, + { + "epoch": 0.5236902128836052, + "grad_norm": 0.7112073128312086, + "learning_rate": 9.718813176725078e-06, + "loss": 0.2826, + "step": 30307 + }, + { + "epoch": 0.5237074923970141, + "grad_norm": 1.170043046390386, + "learning_rate": 9.718253748922786e-06, + "loss": 0.5312, + "step": 30308 + }, + { + "epoch": 0.523724771910423, + "grad_norm": 1.161237002995, + "learning_rate": 9.717694322002941e-06, + "loss": 0.3064, + "step": 30309 + }, + { + "epoch": 0.5237420514238319, + "grad_norm": 1.353325782652092, + "learning_rate": 9.717134895967303e-06, + "loss": 0.7248, + "step": 30310 + }, + { + "epoch": 0.5237593309372408, + "grad_norm": 0.7541177884091949, + "learning_rate": 9.716575470817617e-06, + "loss": 0.49, + "step": 30311 + }, + { + "epoch": 0.5237766104506497, + "grad_norm": 1.0504180019320815, + "learning_rate": 9.716016046555642e-06, + "loss": 0.5631, + "step": 30312 + }, + { + "epoch": 0.5237938899640586, + "grad_norm": 0.6266308582249773, + "learning_rate": 9.715456623183122e-06, + "loss": 0.4744, + "step": 30313 + }, + { + "epoch": 0.5238111694774675, + "grad_norm": 0.392742843840718, + "learning_rate": 9.714897200701817e-06, + "loss": 0.5141, + "step": 30314 + }, + { + "epoch": 0.5238284489908764, + "grad_norm": 1.3559860808885922, + "learning_rate": 9.714337779113477e-06, + "loss": 0.5609, + "step": 30315 + }, + { + "epoch": 0.5238457285042853, + "grad_norm": 1.4090910852123073, + "learning_rate": 9.713778358419847e-06, + "loss": 0.4409, + "step": 30316 + }, + { + "epoch": 0.5238630080176943, + "grad_norm": 1.0239150956110348, + "learning_rate": 9.71321893862269e-06, + "loss": 0.5404, + "step": 30317 + }, + { + "epoch": 0.5238802875311032, + "grad_norm": 1.0426262137252915, + "learning_rate": 9.71265951972375e-06, + "loss": 0.3931, + "step": 30318 + }, + { + "epoch": 0.5238975670445121, + "grad_norm": 0.8653525282011186, + "learning_rate": 9.712100101724782e-06, + "loss": 0.4441, + "step": 30319 + }, + { + "epoch": 0.523914846557921, + "grad_norm": 0.984959099882865, + "learning_rate": 9.71154068462754e-06, + "loss": 0.4306, + "step": 30320 + }, + { + "epoch": 0.5239321260713299, + "grad_norm": 1.1699886831913457, + "learning_rate": 9.710981268433776e-06, + "loss": 0.3723, + "step": 30321 + }, + { + "epoch": 0.5239494055847387, + "grad_norm": 1.5093020482933257, + "learning_rate": 9.710421853145234e-06, + "loss": 0.3937, + "step": 30322 + }, + { + "epoch": 0.5239666850981476, + "grad_norm": 0.7494021448255204, + "learning_rate": 9.709862438763681e-06, + "loss": 0.4043, + "step": 30323 + }, + { + "epoch": 0.5239839646115565, + "grad_norm": 0.8080603319929929, + "learning_rate": 9.709303025290856e-06, + "loss": 0.4386, + "step": 30324 + }, + { + "epoch": 0.5240012441249654, + "grad_norm": 0.7392502231380956, + "learning_rate": 9.708743612728518e-06, + "loss": 0.4643, + "step": 30325 + }, + { + "epoch": 0.5240185236383743, + "grad_norm": 0.7844926949016254, + "learning_rate": 9.70818420107842e-06, + "loss": 0.4238, + "step": 30326 + }, + { + "epoch": 0.5240358031517832, + "grad_norm": 0.6812414663611704, + "learning_rate": 9.707624790342305e-06, + "loss": 0.2272, + "step": 30327 + }, + { + "epoch": 0.5240530826651921, + "grad_norm": 1.296776227047678, + "learning_rate": 9.707065380521935e-06, + "loss": 0.5555, + "step": 30328 + }, + { + "epoch": 0.524070362178601, + "grad_norm": 1.2300165595739345, + "learning_rate": 9.706505971619056e-06, + "loss": 0.3835, + "step": 30329 + }, + { + "epoch": 0.5240876416920099, + "grad_norm": 1.198204461673222, + "learning_rate": 9.705946563635424e-06, + "loss": 0.3769, + "step": 30330 + }, + { + "epoch": 0.5241049212054188, + "grad_norm": 0.8925641231564438, + "learning_rate": 9.70538715657279e-06, + "loss": 0.6055, + "step": 30331 + }, + { + "epoch": 0.5241222007188278, + "grad_norm": 0.8387792165292832, + "learning_rate": 9.704827750432908e-06, + "loss": 0.3887, + "step": 30332 + }, + { + "epoch": 0.5241394802322367, + "grad_norm": 0.8497773361465042, + "learning_rate": 9.704268345217524e-06, + "loss": 0.3462, + "step": 30333 + }, + { + "epoch": 0.5241567597456456, + "grad_norm": 1.1835031030597267, + "learning_rate": 9.703708940928396e-06, + "loss": 0.4976, + "step": 30334 + }, + { + "epoch": 0.5241740392590545, + "grad_norm": 1.0479839490081762, + "learning_rate": 9.703149537567277e-06, + "loss": 0.5771, + "step": 30335 + }, + { + "epoch": 0.5241913187724634, + "grad_norm": 0.9505986470064023, + "learning_rate": 9.70259013513591e-06, + "loss": 0.4458, + "step": 30336 + }, + { + "epoch": 0.5242085982858723, + "grad_norm": 1.162109278971126, + "learning_rate": 9.702030733636059e-06, + "loss": 0.3245, + "step": 30337 + }, + { + "epoch": 0.5242258777992812, + "grad_norm": 1.0613489406513308, + "learning_rate": 9.701471333069465e-06, + "loss": 0.5103, + "step": 30338 + }, + { + "epoch": 0.5242431573126901, + "grad_norm": 1.3166217284240522, + "learning_rate": 9.70091193343789e-06, + "loss": 0.3734, + "step": 30339 + }, + { + "epoch": 0.524260436826099, + "grad_norm": 0.7398573824373291, + "learning_rate": 9.700352534743077e-06, + "loss": 0.2542, + "step": 30340 + }, + { + "epoch": 0.5242777163395079, + "grad_norm": 0.9442389229978858, + "learning_rate": 9.699793136986789e-06, + "loss": 0.3309, + "step": 30341 + }, + { + "epoch": 0.5242949958529168, + "grad_norm": 0.6113041122066996, + "learning_rate": 9.699233740170763e-06, + "loss": 0.6161, + "step": 30342 + }, + { + "epoch": 0.5243122753663256, + "grad_norm": 1.2039651178355584, + "learning_rate": 9.698674344296768e-06, + "loss": 0.4416, + "step": 30343 + }, + { + "epoch": 0.5243295548797345, + "grad_norm": 0.840769263356731, + "learning_rate": 9.698114949366544e-06, + "loss": 0.4787, + "step": 30344 + }, + { + "epoch": 0.5243468343931434, + "grad_norm": 1.5090089857188027, + "learning_rate": 9.697555555381848e-06, + "loss": 0.289, + "step": 30345 + }, + { + "epoch": 0.5243641139065524, + "grad_norm": 0.9883820169054646, + "learning_rate": 9.696996162344433e-06, + "loss": 0.6901, + "step": 30346 + }, + { + "epoch": 0.5243813934199613, + "grad_norm": 0.9795844000703857, + "learning_rate": 9.696436770256044e-06, + "loss": 0.6091, + "step": 30347 + }, + { + "epoch": 0.5243986729333702, + "grad_norm": 0.8224806927535496, + "learning_rate": 9.695877379118442e-06, + "loss": 0.2688, + "step": 30348 + }, + { + "epoch": 0.5244159524467791, + "grad_norm": 0.7370541603925136, + "learning_rate": 9.695317988933373e-06, + "loss": 0.3073, + "step": 30349 + }, + { + "epoch": 0.524433231960188, + "grad_norm": 1.034518935090644, + "learning_rate": 9.694758599702592e-06, + "loss": 0.3463, + "step": 30350 + }, + { + "epoch": 0.5244505114735969, + "grad_norm": 0.8633055453222997, + "learning_rate": 9.69419921142785e-06, + "loss": 0.45, + "step": 30351 + }, + { + "epoch": 0.5244677909870058, + "grad_norm": 0.6913582277595689, + "learning_rate": 9.693639824110902e-06, + "loss": 0.3203, + "step": 30352 + }, + { + "epoch": 0.5244850705004147, + "grad_norm": 1.0707646662621981, + "learning_rate": 9.693080437753494e-06, + "loss": 0.4411, + "step": 30353 + }, + { + "epoch": 0.5245023500138236, + "grad_norm": 1.2127552734713738, + "learning_rate": 9.692521052357381e-06, + "loss": 0.5117, + "step": 30354 + }, + { + "epoch": 0.5245196295272325, + "grad_norm": 0.8857905433651977, + "learning_rate": 9.691961667924317e-06, + "loss": 0.3784, + "step": 30355 + }, + { + "epoch": 0.5245369090406414, + "grad_norm": 0.8076538988475276, + "learning_rate": 9.691402284456053e-06, + "loss": 0.4436, + "step": 30356 + }, + { + "epoch": 0.5245541885540503, + "grad_norm": 1.0580763014599388, + "learning_rate": 9.690842901954342e-06, + "loss": 0.3597, + "step": 30357 + }, + { + "epoch": 0.5245714680674592, + "grad_norm": 0.7289873638874319, + "learning_rate": 9.690283520420932e-06, + "loss": 0.6577, + "step": 30358 + }, + { + "epoch": 0.5245887475808682, + "grad_norm": 0.8746531434573164, + "learning_rate": 9.689724139857578e-06, + "loss": 0.3794, + "step": 30359 + }, + { + "epoch": 0.5246060270942771, + "grad_norm": 1.1276345654603823, + "learning_rate": 9.68916476026603e-06, + "loss": 0.3148, + "step": 30360 + }, + { + "epoch": 0.524623306607686, + "grad_norm": 0.995486208468521, + "learning_rate": 9.688605381648044e-06, + "loss": 0.3717, + "step": 30361 + }, + { + "epoch": 0.5246405861210949, + "grad_norm": 1.8077105503794517, + "learning_rate": 9.688046004005367e-06, + "loss": 0.4621, + "step": 30362 + }, + { + "epoch": 0.5246578656345038, + "grad_norm": 1.0848674252512704, + "learning_rate": 9.687486627339757e-06, + "loss": 0.571, + "step": 30363 + }, + { + "epoch": 0.5246751451479126, + "grad_norm": 1.2089162348151934, + "learning_rate": 9.68692725165296e-06, + "loss": 0.3715, + "step": 30364 + }, + { + "epoch": 0.5246924246613215, + "grad_norm": 1.0572082880595184, + "learning_rate": 9.686367876946731e-06, + "loss": 0.4345, + "step": 30365 + }, + { + "epoch": 0.5247097041747304, + "grad_norm": 0.9794849190953441, + "learning_rate": 9.685808503222826e-06, + "loss": 0.5912, + "step": 30366 + }, + { + "epoch": 0.5247269836881393, + "grad_norm": 1.4489843120026717, + "learning_rate": 9.685249130482987e-06, + "loss": 0.2576, + "step": 30367 + }, + { + "epoch": 0.5247442632015482, + "grad_norm": 1.0493333512433067, + "learning_rate": 9.684689758728975e-06, + "loss": 0.495, + "step": 30368 + }, + { + "epoch": 0.5247615427149571, + "grad_norm": 1.0701771751320917, + "learning_rate": 9.684130387962536e-06, + "loss": 0.4884, + "step": 30369 + }, + { + "epoch": 0.524778822228366, + "grad_norm": 0.835298206305517, + "learning_rate": 9.683571018185425e-06, + "loss": 0.3506, + "step": 30370 + }, + { + "epoch": 0.5247961017417749, + "grad_norm": 1.1492229703721901, + "learning_rate": 9.683011649399393e-06, + "loss": 0.4085, + "step": 30371 + }, + { + "epoch": 0.5248133812551838, + "grad_norm": 0.9571331681347646, + "learning_rate": 9.682452281606198e-06, + "loss": 0.3841, + "step": 30372 + }, + { + "epoch": 0.5248306607685927, + "grad_norm": 0.6680959617776914, + "learning_rate": 9.68189291480758e-06, + "loss": 0.3879, + "step": 30373 + }, + { + "epoch": 0.5248479402820017, + "grad_norm": 0.7771899055583805, + "learning_rate": 9.681333549005301e-06, + "loss": 0.4142, + "step": 30374 + }, + { + "epoch": 0.5248652197954106, + "grad_norm": 0.4324867536448348, + "learning_rate": 9.680774184201107e-06, + "loss": 0.5925, + "step": 30375 + }, + { + "epoch": 0.5248824993088195, + "grad_norm": 1.199897609470081, + "learning_rate": 9.680214820396755e-06, + "loss": 0.6276, + "step": 30376 + }, + { + "epoch": 0.5248997788222284, + "grad_norm": 0.9295529139544819, + "learning_rate": 9.679655457593997e-06, + "loss": 0.3592, + "step": 30377 + }, + { + "epoch": 0.5249170583356373, + "grad_norm": 0.9824982756913229, + "learning_rate": 9.679096095794577e-06, + "loss": 0.5879, + "step": 30378 + }, + { + "epoch": 0.5249343378490462, + "grad_norm": 0.8920478514614746, + "learning_rate": 9.678536735000256e-06, + "loss": 0.4166, + "step": 30379 + }, + { + "epoch": 0.5249516173624551, + "grad_norm": 0.8396651937035199, + "learning_rate": 9.67797737521278e-06, + "loss": 0.3756, + "step": 30380 + }, + { + "epoch": 0.524968896875864, + "grad_norm": 1.0200642541600893, + "learning_rate": 9.677418016433905e-06, + "loss": 0.6678, + "step": 30381 + }, + { + "epoch": 0.5249861763892729, + "grad_norm": 1.113069522932001, + "learning_rate": 9.676858658665378e-06, + "loss": 0.4027, + "step": 30382 + }, + { + "epoch": 0.5250034559026818, + "grad_norm": 0.9350196005719978, + "learning_rate": 9.67629930190896e-06, + "loss": 0.5545, + "step": 30383 + }, + { + "epoch": 0.5250207354160907, + "grad_norm": 1.0214988321677065, + "learning_rate": 9.675739946166391e-06, + "loss": 0.3909, + "step": 30384 + }, + { + "epoch": 0.5250380149294995, + "grad_norm": 0.7709425895700279, + "learning_rate": 9.675180591439434e-06, + "loss": 0.393, + "step": 30385 + }, + { + "epoch": 0.5250552944429084, + "grad_norm": 1.15935673179039, + "learning_rate": 9.674621237729832e-06, + "loss": 0.7702, + "step": 30386 + }, + { + "epoch": 0.5250725739563173, + "grad_norm": 1.0332394145509571, + "learning_rate": 9.674061885039347e-06, + "loss": 0.3663, + "step": 30387 + }, + { + "epoch": 0.5250898534697263, + "grad_norm": 1.4019840221049003, + "learning_rate": 9.67350253336972e-06, + "loss": 0.3116, + "step": 30388 + }, + { + "epoch": 0.5251071329831352, + "grad_norm": 0.9243641556203289, + "learning_rate": 9.672943182722707e-06, + "loss": 0.535, + "step": 30389 + }, + { + "epoch": 0.5251244124965441, + "grad_norm": 0.8026683261803789, + "learning_rate": 9.672383833100066e-06, + "loss": 0.3662, + "step": 30390 + }, + { + "epoch": 0.525141692009953, + "grad_norm": 0.6864996495962159, + "learning_rate": 9.671824484503539e-06, + "loss": 0.2374, + "step": 30391 + }, + { + "epoch": 0.5251589715233619, + "grad_norm": 0.8304198495753327, + "learning_rate": 9.671265136934886e-06, + "loss": 0.3886, + "step": 30392 + }, + { + "epoch": 0.5251762510367708, + "grad_norm": 0.8098994350497997, + "learning_rate": 9.670705790395853e-06, + "loss": 0.3513, + "step": 30393 + }, + { + "epoch": 0.5251935305501797, + "grad_norm": 0.9040164140967792, + "learning_rate": 9.670146444888196e-06, + "loss": 0.2578, + "step": 30394 + }, + { + "epoch": 0.5252108100635886, + "grad_norm": 1.104846847404403, + "learning_rate": 9.669587100413663e-06, + "loss": 0.28, + "step": 30395 + }, + { + "epoch": 0.5252280895769975, + "grad_norm": 0.945875925734831, + "learning_rate": 9.66902775697401e-06, + "loss": 0.6254, + "step": 30396 + }, + { + "epoch": 0.5252453690904064, + "grad_norm": 1.0298721114245557, + "learning_rate": 9.66846841457099e-06, + "loss": 0.4584, + "step": 30397 + }, + { + "epoch": 0.5252626486038153, + "grad_norm": 1.1409998976445654, + "learning_rate": 9.667909073206348e-06, + "loss": 0.5455, + "step": 30398 + }, + { + "epoch": 0.5252799281172242, + "grad_norm": 1.3422762755499413, + "learning_rate": 9.667349732881843e-06, + "loss": 0.5486, + "step": 30399 + }, + { + "epoch": 0.5252972076306331, + "grad_norm": 1.1786197152033946, + "learning_rate": 9.66679039359922e-06, + "loss": 0.5478, + "step": 30400 + }, + { + "epoch": 0.525314487144042, + "grad_norm": 0.895488107000586, + "learning_rate": 9.66623105536024e-06, + "loss": 0.6004, + "step": 30401 + }, + { + "epoch": 0.525331766657451, + "grad_norm": 1.305072563653133, + "learning_rate": 9.665671718166642e-06, + "loss": 0.8923, + "step": 30402 + }, + { + "epoch": 0.5253490461708599, + "grad_norm": 1.324233900227232, + "learning_rate": 9.665112382020194e-06, + "loss": 0.361, + "step": 30403 + }, + { + "epoch": 0.5253663256842688, + "grad_norm": 1.2524657769714167, + "learning_rate": 9.664553046922634e-06, + "loss": 0.4227, + "step": 30404 + }, + { + "epoch": 0.5253836051976777, + "grad_norm": 1.3192258784672743, + "learning_rate": 9.663993712875722e-06, + "loss": 0.3327, + "step": 30405 + }, + { + "epoch": 0.5254008847110866, + "grad_norm": 1.1296551692496974, + "learning_rate": 9.663434379881205e-06, + "loss": 0.4106, + "step": 30406 + }, + { + "epoch": 0.5254181642244954, + "grad_norm": 0.9535686185028235, + "learning_rate": 9.662875047940842e-06, + "loss": 0.4518, + "step": 30407 + }, + { + "epoch": 0.5254354437379043, + "grad_norm": 1.3881842835178622, + "learning_rate": 9.662315717056377e-06, + "loss": 0.3283, + "step": 30408 + }, + { + "epoch": 0.5254527232513132, + "grad_norm": 1.2605531801217402, + "learning_rate": 9.661756387229561e-06, + "loss": 0.3129, + "step": 30409 + }, + { + "epoch": 0.5254700027647221, + "grad_norm": 1.295494356246886, + "learning_rate": 9.661197058462155e-06, + "loss": 0.3957, + "step": 30410 + }, + { + "epoch": 0.525487282278131, + "grad_norm": 1.1318444386988196, + "learning_rate": 9.660637730755901e-06, + "loss": 0.6065, + "step": 30411 + }, + { + "epoch": 0.5255045617915399, + "grad_norm": 1.8712455516265578, + "learning_rate": 9.660078404112562e-06, + "loss": 0.4964, + "step": 30412 + }, + { + "epoch": 0.5255218413049488, + "grad_norm": 0.9847089816253803, + "learning_rate": 9.659519078533878e-06, + "loss": 0.278, + "step": 30413 + }, + { + "epoch": 0.5255391208183577, + "grad_norm": 0.8830829815484528, + "learning_rate": 9.658959754021607e-06, + "loss": 0.3554, + "step": 30414 + }, + { + "epoch": 0.5255564003317666, + "grad_norm": 0.8438604071509215, + "learning_rate": 9.658400430577498e-06, + "loss": 0.3843, + "step": 30415 + }, + { + "epoch": 0.5255736798451756, + "grad_norm": 0.8934566826062731, + "learning_rate": 9.65784110820331e-06, + "loss": 0.4442, + "step": 30416 + }, + { + "epoch": 0.5255909593585845, + "grad_norm": 1.1272105744662833, + "learning_rate": 9.657281786900784e-06, + "loss": 0.6145, + "step": 30417 + }, + { + "epoch": 0.5256082388719934, + "grad_norm": 0.9758741445146792, + "learning_rate": 9.656722466671685e-06, + "loss": 0.4553, + "step": 30418 + }, + { + "epoch": 0.5256255183854023, + "grad_norm": 1.1023741325145526, + "learning_rate": 9.656163147517752e-06, + "loss": 0.4572, + "step": 30419 + }, + { + "epoch": 0.5256427978988112, + "grad_norm": 1.0105872671932754, + "learning_rate": 9.65560382944074e-06, + "loss": 0.4724, + "step": 30420 + }, + { + "epoch": 0.5256600774122201, + "grad_norm": 0.8947800163866585, + "learning_rate": 9.655044512442409e-06, + "loss": 0.3518, + "step": 30421 + }, + { + "epoch": 0.525677356925629, + "grad_norm": 1.8343218160913861, + "learning_rate": 9.6544851965245e-06, + "loss": 0.4758, + "step": 30422 + }, + { + "epoch": 0.5256946364390379, + "grad_norm": 1.0760240013468825, + "learning_rate": 9.653925881688773e-06, + "loss": 0.6335, + "step": 30423 + }, + { + "epoch": 0.5257119159524468, + "grad_norm": 1.5215301718188996, + "learning_rate": 9.653366567936972e-06, + "loss": 0.4688, + "step": 30424 + }, + { + "epoch": 0.5257291954658557, + "grad_norm": 0.8844366419395148, + "learning_rate": 9.652807255270857e-06, + "loss": 0.4815, + "step": 30425 + }, + { + "epoch": 0.5257464749792646, + "grad_norm": 1.5427227935924561, + "learning_rate": 9.652247943692174e-06, + "loss": 0.2315, + "step": 30426 + }, + { + "epoch": 0.5257637544926735, + "grad_norm": 0.8933097799834453, + "learning_rate": 9.65168863320268e-06, + "loss": 0.409, + "step": 30427 + }, + { + "epoch": 0.5257810340060823, + "grad_norm": 0.7490524139975846, + "learning_rate": 9.651129323804122e-06, + "loss": 0.4049, + "step": 30428 + }, + { + "epoch": 0.5257983135194912, + "grad_norm": 1.5974078778674468, + "learning_rate": 9.650570015498252e-06, + "loss": 0.2892, + "step": 30429 + }, + { + "epoch": 0.5258155930329002, + "grad_norm": 1.6784931963007985, + "learning_rate": 9.650010708286824e-06, + "loss": 0.4504, + "step": 30430 + }, + { + "epoch": 0.5258328725463091, + "grad_norm": 1.1543076413297895, + "learning_rate": 9.649451402171588e-06, + "loss": 0.5154, + "step": 30431 + }, + { + "epoch": 0.525850152059718, + "grad_norm": 1.0504507499763047, + "learning_rate": 9.648892097154301e-06, + "loss": 0.5758, + "step": 30432 + }, + { + "epoch": 0.5258674315731269, + "grad_norm": 0.8231304827550903, + "learning_rate": 9.648332793236707e-06, + "loss": 0.3703, + "step": 30433 + }, + { + "epoch": 0.5258847110865358, + "grad_norm": 1.0101252934510747, + "learning_rate": 9.647773490420562e-06, + "loss": 0.4373, + "step": 30434 + }, + { + "epoch": 0.5259019905999447, + "grad_norm": 1.984965844790331, + "learning_rate": 9.647214188707617e-06, + "loss": 0.5102, + "step": 30435 + }, + { + "epoch": 0.5259192701133536, + "grad_norm": 0.8910203996402787, + "learning_rate": 9.646654888099625e-06, + "loss": 0.2705, + "step": 30436 + }, + { + "epoch": 0.5259365496267625, + "grad_norm": 1.1880658712726282, + "learning_rate": 9.646095588598335e-06, + "loss": 0.4814, + "step": 30437 + }, + { + "epoch": 0.5259538291401714, + "grad_norm": 0.9672302567645795, + "learning_rate": 9.645536290205503e-06, + "loss": 0.3739, + "step": 30438 + }, + { + "epoch": 0.5259711086535803, + "grad_norm": 0.9859493642400036, + "learning_rate": 9.644976992922878e-06, + "loss": 0.3533, + "step": 30439 + }, + { + "epoch": 0.5259883881669892, + "grad_norm": 1.066917666258152, + "learning_rate": 9.64441769675221e-06, + "loss": 0.4771, + "step": 30440 + }, + { + "epoch": 0.5260056676803981, + "grad_norm": 0.8971054278012576, + "learning_rate": 9.643858401695256e-06, + "loss": 0.4181, + "step": 30441 + }, + { + "epoch": 0.526022947193807, + "grad_norm": 0.9081233713986826, + "learning_rate": 9.643299107753758e-06, + "loss": 0.5314, + "step": 30442 + }, + { + "epoch": 0.526040226707216, + "grad_norm": 1.34943179110014, + "learning_rate": 9.642739814929482e-06, + "loss": 0.375, + "step": 30443 + }, + { + "epoch": 0.5260575062206249, + "grad_norm": 0.9524740985781802, + "learning_rate": 9.642180523224167e-06, + "loss": 0.4771, + "step": 30444 + }, + { + "epoch": 0.5260747857340338, + "grad_norm": 0.878287136152797, + "learning_rate": 9.641621232639573e-06, + "loss": 0.4446, + "step": 30445 + }, + { + "epoch": 0.5260920652474427, + "grad_norm": 0.4843480197973303, + "learning_rate": 9.641061943177445e-06, + "loss": 0.4337, + "step": 30446 + }, + { + "epoch": 0.5261093447608516, + "grad_norm": 0.796155819282689, + "learning_rate": 9.640502654839543e-06, + "loss": 0.4457, + "step": 30447 + }, + { + "epoch": 0.5261266242742605, + "grad_norm": 0.836316417453802, + "learning_rate": 9.639943367627607e-06, + "loss": 0.2617, + "step": 30448 + }, + { + "epoch": 0.5261439037876693, + "grad_norm": 0.9804413762886218, + "learning_rate": 9.639384081543402e-06, + "loss": 0.4386, + "step": 30449 + }, + { + "epoch": 0.5261611833010782, + "grad_norm": 1.1556663483999552, + "learning_rate": 9.638824796588673e-06, + "loss": 0.4453, + "step": 30450 + }, + { + "epoch": 0.5261784628144871, + "grad_norm": 1.2922448841334098, + "learning_rate": 9.638265512765169e-06, + "loss": 0.4101, + "step": 30451 + }, + { + "epoch": 0.526195742327896, + "grad_norm": 0.9821291170445391, + "learning_rate": 9.637706230074649e-06, + "loss": 0.4909, + "step": 30452 + }, + { + "epoch": 0.5262130218413049, + "grad_norm": 0.8480472750705877, + "learning_rate": 9.637146948518857e-06, + "loss": 0.3524, + "step": 30453 + }, + { + "epoch": 0.5262303013547138, + "grad_norm": 0.6388527312138278, + "learning_rate": 9.63658766809955e-06, + "loss": 0.3549, + "step": 30454 + }, + { + "epoch": 0.5262475808681227, + "grad_norm": 0.5062241283982776, + "learning_rate": 9.636028388818476e-06, + "loss": 0.44, + "step": 30455 + }, + { + "epoch": 0.5262648603815316, + "grad_norm": 1.071110861064421, + "learning_rate": 9.635469110677392e-06, + "loss": 0.41, + "step": 30456 + }, + { + "epoch": 0.5262821398949405, + "grad_norm": 1.0206918846515867, + "learning_rate": 9.634909833678043e-06, + "loss": 0.5569, + "step": 30457 + }, + { + "epoch": 0.5262994194083495, + "grad_norm": 0.8139759895261157, + "learning_rate": 9.634350557822187e-06, + "loss": 0.5999, + "step": 30458 + }, + { + "epoch": 0.5263166989217584, + "grad_norm": 1.1404689212587114, + "learning_rate": 9.633791283111574e-06, + "loss": 0.2953, + "step": 30459 + }, + { + "epoch": 0.5263339784351673, + "grad_norm": 1.1415797603926698, + "learning_rate": 9.63323200954795e-06, + "loss": 0.3597, + "step": 30460 + }, + { + "epoch": 0.5263512579485762, + "grad_norm": 0.8407797886922654, + "learning_rate": 9.632672737133077e-06, + "loss": 0.3322, + "step": 30461 + }, + { + "epoch": 0.5263685374619851, + "grad_norm": 0.774386686779893, + "learning_rate": 9.632113465868693e-06, + "loss": 0.3118, + "step": 30462 + }, + { + "epoch": 0.526385816975394, + "grad_norm": 0.9005719188836324, + "learning_rate": 9.631554195756565e-06, + "loss": 0.4341, + "step": 30463 + }, + { + "epoch": 0.5264030964888029, + "grad_norm": 0.7096506366615579, + "learning_rate": 9.630994926798433e-06, + "loss": 0.2936, + "step": 30464 + }, + { + "epoch": 0.5264203760022118, + "grad_norm": 1.066072514585355, + "learning_rate": 9.630435658996055e-06, + "loss": 0.4835, + "step": 30465 + }, + { + "epoch": 0.5264376555156207, + "grad_norm": 1.151875882456511, + "learning_rate": 9.629876392351177e-06, + "loss": 0.3198, + "step": 30466 + }, + { + "epoch": 0.5264549350290296, + "grad_norm": 0.6814564969812522, + "learning_rate": 9.62931712686556e-06, + "loss": 0.3274, + "step": 30467 + }, + { + "epoch": 0.5264722145424385, + "grad_norm": 0.6359492714896964, + "learning_rate": 9.628757862540942e-06, + "loss": 0.7653, + "step": 30468 + }, + { + "epoch": 0.5264894940558474, + "grad_norm": 1.8683457772128051, + "learning_rate": 9.628198599379092e-06, + "loss": 0.3735, + "step": 30469 + }, + { + "epoch": 0.5265067735692562, + "grad_norm": 1.0490340794035413, + "learning_rate": 9.627639337381748e-06, + "loss": 0.2987, + "step": 30470 + }, + { + "epoch": 0.5265240530826651, + "grad_norm": 0.8863400665557569, + "learning_rate": 9.627080076550665e-06, + "loss": 0.3661, + "step": 30471 + }, + { + "epoch": 0.526541332596074, + "grad_norm": 1.2006237312761825, + "learning_rate": 9.626520816887598e-06, + "loss": 0.3045, + "step": 30472 + }, + { + "epoch": 0.526558612109483, + "grad_norm": 0.6253332862574247, + "learning_rate": 9.625961558394294e-06, + "loss": 0.3139, + "step": 30473 + }, + { + "epoch": 0.5265758916228919, + "grad_norm": 1.5520775914707372, + "learning_rate": 9.625402301072508e-06, + "loss": 0.3981, + "step": 30474 + }, + { + "epoch": 0.5265931711363008, + "grad_norm": 0.8231702277228906, + "learning_rate": 9.624843044923987e-06, + "loss": 0.4426, + "step": 30475 + }, + { + "epoch": 0.5266104506497097, + "grad_norm": 1.0156442683491649, + "learning_rate": 9.62428378995049e-06, + "loss": 0.3934, + "step": 30476 + }, + { + "epoch": 0.5266277301631186, + "grad_norm": 1.0848027119175108, + "learning_rate": 9.623724536153761e-06, + "loss": 0.3215, + "step": 30477 + }, + { + "epoch": 0.5266450096765275, + "grad_norm": 1.1804375506314124, + "learning_rate": 9.623165283535562e-06, + "loss": 0.42, + "step": 30478 + }, + { + "epoch": 0.5266622891899364, + "grad_norm": 1.2928223986638512, + "learning_rate": 9.62260603209763e-06, + "loss": 0.5286, + "step": 30479 + }, + { + "epoch": 0.5266795687033453, + "grad_norm": 0.7711644841944596, + "learning_rate": 9.62204678184173e-06, + "loss": 0.3758, + "step": 30480 + }, + { + "epoch": 0.5266968482167542, + "grad_norm": 0.6773532930111504, + "learning_rate": 9.62148753276961e-06, + "loss": 0.211, + "step": 30481 + }, + { + "epoch": 0.5267141277301631, + "grad_norm": 0.8099406524938895, + "learning_rate": 9.620928284883013e-06, + "loss": 0.4646, + "step": 30482 + }, + { + "epoch": 0.526731407243572, + "grad_norm": 1.250723642894309, + "learning_rate": 9.620369038183704e-06, + "loss": 0.4824, + "step": 30483 + }, + { + "epoch": 0.5267486867569809, + "grad_norm": 0.7334822299980082, + "learning_rate": 9.619809792673422e-06, + "loss": 0.1823, + "step": 30484 + }, + { + "epoch": 0.5267659662703899, + "grad_norm": 1.3091368159462031, + "learning_rate": 9.619250548353929e-06, + "loss": 0.4701, + "step": 30485 + }, + { + "epoch": 0.5267832457837988, + "grad_norm": 0.9429696437657169, + "learning_rate": 9.61869130522697e-06, + "loss": 0.3245, + "step": 30486 + }, + { + "epoch": 0.5268005252972077, + "grad_norm": 0.8605853185259432, + "learning_rate": 9.618132063294301e-06, + "loss": 0.5095, + "step": 30487 + }, + { + "epoch": 0.5268178048106166, + "grad_norm": 1.16051505834805, + "learning_rate": 9.617572822557665e-06, + "loss": 0.5106, + "step": 30488 + }, + { + "epoch": 0.5268350843240255, + "grad_norm": 0.6421125173610264, + "learning_rate": 9.617013583018829e-06, + "loss": 0.5565, + "step": 30489 + }, + { + "epoch": 0.5268523638374344, + "grad_norm": 0.8552815278478052, + "learning_rate": 9.616454344679532e-06, + "loss": 0.5347, + "step": 30490 + }, + { + "epoch": 0.5268696433508432, + "grad_norm": 1.1074044389034996, + "learning_rate": 9.615895107541527e-06, + "loss": 0.284, + "step": 30491 + }, + { + "epoch": 0.5268869228642521, + "grad_norm": 0.8457611510438263, + "learning_rate": 9.615335871606572e-06, + "loss": 0.4079, + "step": 30492 + }, + { + "epoch": 0.526904202377661, + "grad_norm": 1.2206052969418024, + "learning_rate": 9.61477663687641e-06, + "loss": 0.4221, + "step": 30493 + }, + { + "epoch": 0.5269214818910699, + "grad_norm": 0.7360134705319534, + "learning_rate": 9.614217403352798e-06, + "loss": 0.5449, + "step": 30494 + }, + { + "epoch": 0.5269387614044788, + "grad_norm": 0.4645283615560776, + "learning_rate": 9.613658171037486e-06, + "loss": 0.8189, + "step": 30495 + }, + { + "epoch": 0.5269560409178877, + "grad_norm": 1.1086618178044958, + "learning_rate": 9.613098939932228e-06, + "loss": 0.2834, + "step": 30496 + }, + { + "epoch": 0.5269733204312966, + "grad_norm": 1.1446563144398094, + "learning_rate": 9.612539710038771e-06, + "loss": 0.4939, + "step": 30497 + }, + { + "epoch": 0.5269905999447055, + "grad_norm": 1.3547484608521958, + "learning_rate": 9.611980481358874e-06, + "loss": 0.4669, + "step": 30498 + }, + { + "epoch": 0.5270078794581144, + "grad_norm": 1.0403357827120967, + "learning_rate": 9.611421253894277e-06, + "loss": 0.4509, + "step": 30499 + }, + { + "epoch": 0.5270251589715234, + "grad_norm": 0.968526310274607, + "learning_rate": 9.610862027646743e-06, + "loss": 0.1979, + "step": 30500 + }, + { + "epoch": 0.5270424384849323, + "grad_norm": 0.8653183174103418, + "learning_rate": 9.61030280261802e-06, + "loss": 0.3227, + "step": 30501 + }, + { + "epoch": 0.5270597179983412, + "grad_norm": 0.9516616495320868, + "learning_rate": 9.60974357880985e-06, + "loss": 0.5451, + "step": 30502 + }, + { + "epoch": 0.5270769975117501, + "grad_norm": 0.5083235764497652, + "learning_rate": 9.609184356224001e-06, + "loss": 0.7004, + "step": 30503 + }, + { + "epoch": 0.527094277025159, + "grad_norm": 1.133460396787185, + "learning_rate": 9.608625134862212e-06, + "loss": 0.4, + "step": 30504 + }, + { + "epoch": 0.5271115565385679, + "grad_norm": 0.8135426823681975, + "learning_rate": 9.608065914726241e-06, + "loss": 0.3954, + "step": 30505 + }, + { + "epoch": 0.5271288360519768, + "grad_norm": 1.2630219713166533, + "learning_rate": 9.607506695817836e-06, + "loss": 0.2665, + "step": 30506 + }, + { + "epoch": 0.5271461155653857, + "grad_norm": 0.8304649364979446, + "learning_rate": 9.606947478138752e-06, + "loss": 0.4258, + "step": 30507 + }, + { + "epoch": 0.5271633950787946, + "grad_norm": 0.8391343349040137, + "learning_rate": 9.606388261690733e-06, + "loss": 0.393, + "step": 30508 + }, + { + "epoch": 0.5271806745922035, + "grad_norm": 0.7849485268761702, + "learning_rate": 9.605829046475542e-06, + "loss": 0.5112, + "step": 30509 + }, + { + "epoch": 0.5271979541056124, + "grad_norm": 0.9213687325287988, + "learning_rate": 9.605269832494922e-06, + "loss": 0.5201, + "step": 30510 + }, + { + "epoch": 0.5272152336190213, + "grad_norm": 1.272221769049699, + "learning_rate": 9.604710619750626e-06, + "loss": 0.5436, + "step": 30511 + }, + { + "epoch": 0.5272325131324301, + "grad_norm": 1.2952532356935331, + "learning_rate": 9.60415140824441e-06, + "loss": 0.3302, + "step": 30512 + }, + { + "epoch": 0.527249792645839, + "grad_norm": 1.2239335324705851, + "learning_rate": 9.603592197978017e-06, + "loss": 0.3897, + "step": 30513 + }, + { + "epoch": 0.527267072159248, + "grad_norm": 0.7849578460419534, + "learning_rate": 9.603032988953206e-06, + "loss": 0.2852, + "step": 30514 + }, + { + "epoch": 0.5272843516726569, + "grad_norm": 0.5589056346256224, + "learning_rate": 9.602473781171724e-06, + "loss": 0.7584, + "step": 30515 + }, + { + "epoch": 0.5273016311860658, + "grad_norm": 0.8709653224559077, + "learning_rate": 9.601914574635326e-06, + "loss": 0.3147, + "step": 30516 + }, + { + "epoch": 0.5273189106994747, + "grad_norm": 0.9006687947770847, + "learning_rate": 9.60135536934576e-06, + "loss": 0.3263, + "step": 30517 + }, + { + "epoch": 0.5273361902128836, + "grad_norm": 0.9604121292079824, + "learning_rate": 9.600796165304784e-06, + "loss": 0.3708, + "step": 30518 + }, + { + "epoch": 0.5273534697262925, + "grad_norm": 0.7257892114048652, + "learning_rate": 9.60023696251414e-06, + "loss": 0.6757, + "step": 30519 + }, + { + "epoch": 0.5273707492397014, + "grad_norm": 1.0548824755642159, + "learning_rate": 9.599677760975585e-06, + "loss": 0.4502, + "step": 30520 + }, + { + "epoch": 0.5273880287531103, + "grad_norm": 1.314642339258173, + "learning_rate": 9.599118560690873e-06, + "loss": 0.482, + "step": 30521 + }, + { + "epoch": 0.5274053082665192, + "grad_norm": 0.8228073538016262, + "learning_rate": 9.598559361661746e-06, + "loss": 0.4752, + "step": 30522 + }, + { + "epoch": 0.5274225877799281, + "grad_norm": 1.4671564663648562, + "learning_rate": 9.598000163889967e-06, + "loss": 0.4142, + "step": 30523 + }, + { + "epoch": 0.527439867293337, + "grad_norm": 1.1777723940027693, + "learning_rate": 9.59744096737728e-06, + "loss": 0.4045, + "step": 30524 + }, + { + "epoch": 0.5274571468067459, + "grad_norm": 0.896865475648121, + "learning_rate": 9.596881772125439e-06, + "loss": 0.4369, + "step": 30525 + }, + { + "epoch": 0.5274744263201548, + "grad_norm": 1.610814545106112, + "learning_rate": 9.596322578136192e-06, + "loss": 0.3802, + "step": 30526 + }, + { + "epoch": 0.5274917058335638, + "grad_norm": 1.2039714414270504, + "learning_rate": 9.595763385411298e-06, + "loss": 0.401, + "step": 30527 + }, + { + "epoch": 0.5275089853469727, + "grad_norm": 1.0117508933201764, + "learning_rate": 9.595204193952498e-06, + "loss": 0.4762, + "step": 30528 + }, + { + "epoch": 0.5275262648603816, + "grad_norm": 1.0080322419960654, + "learning_rate": 9.594645003761557e-06, + "loss": 0.3348, + "step": 30529 + }, + { + "epoch": 0.5275435443737905, + "grad_norm": 1.114489971814817, + "learning_rate": 9.594085814840212e-06, + "loss": 0.3943, + "step": 30530 + }, + { + "epoch": 0.5275608238871994, + "grad_norm": 0.8420102773083642, + "learning_rate": 9.593526627190225e-06, + "loss": 0.495, + "step": 30531 + }, + { + "epoch": 0.5275781034006083, + "grad_norm": 1.5343541264392802, + "learning_rate": 9.592967440813345e-06, + "loss": 0.6142, + "step": 30532 + }, + { + "epoch": 0.5275953829140171, + "grad_norm": 0.6631039623169639, + "learning_rate": 9.592408255711317e-06, + "loss": 0.4369, + "step": 30533 + }, + { + "epoch": 0.527612662427426, + "grad_norm": 0.4881096710902486, + "learning_rate": 9.5918490718859e-06, + "loss": 0.8736, + "step": 30534 + }, + { + "epoch": 0.5276299419408349, + "grad_norm": 1.1762027054200106, + "learning_rate": 9.59128988933884e-06, + "loss": 0.5688, + "step": 30535 + }, + { + "epoch": 0.5276472214542438, + "grad_norm": 1.2832127185688331, + "learning_rate": 9.590730708071893e-06, + "loss": 0.5145, + "step": 30536 + }, + { + "epoch": 0.5276645009676527, + "grad_norm": 0.8357076018211922, + "learning_rate": 9.590171528086808e-06, + "loss": 0.4135, + "step": 30537 + }, + { + "epoch": 0.5276817804810616, + "grad_norm": 1.012034789312535, + "learning_rate": 9.589612349385341e-06, + "loss": 0.5371, + "step": 30538 + }, + { + "epoch": 0.5276990599944705, + "grad_norm": 0.7316144175714101, + "learning_rate": 9.589053171969234e-06, + "loss": 0.4969, + "step": 30539 + }, + { + "epoch": 0.5277163395078794, + "grad_norm": 0.9948403834666123, + "learning_rate": 9.588493995840247e-06, + "loss": 0.4162, + "step": 30540 + }, + { + "epoch": 0.5277336190212883, + "grad_norm": 0.7420631677582911, + "learning_rate": 9.587934821000128e-06, + "loss": 0.2022, + "step": 30541 + }, + { + "epoch": 0.5277508985346973, + "grad_norm": 1.2922863254551085, + "learning_rate": 9.587375647450623e-06, + "loss": 0.2004, + "step": 30542 + }, + { + "epoch": 0.5277681780481062, + "grad_norm": 1.32185812426209, + "learning_rate": 9.586816475193496e-06, + "loss": 0.4173, + "step": 30543 + }, + { + "epoch": 0.5277854575615151, + "grad_norm": 0.7809597171505982, + "learning_rate": 9.586257304230488e-06, + "loss": 0.2719, + "step": 30544 + }, + { + "epoch": 0.527802737074924, + "grad_norm": 1.0277388044420959, + "learning_rate": 9.585698134563354e-06, + "loss": 0.3758, + "step": 30545 + }, + { + "epoch": 0.5278200165883329, + "grad_norm": 1.0385392941477913, + "learning_rate": 9.585138966193844e-06, + "loss": 0.417, + "step": 30546 + }, + { + "epoch": 0.5278372961017418, + "grad_norm": 0.9741299701324718, + "learning_rate": 9.584579799123712e-06, + "loss": 0.4409, + "step": 30547 + }, + { + "epoch": 0.5278545756151507, + "grad_norm": 1.06170371122425, + "learning_rate": 9.584020633354703e-06, + "loss": 0.5965, + "step": 30548 + }, + { + "epoch": 0.5278718551285596, + "grad_norm": 0.8111242677623157, + "learning_rate": 9.58346146888858e-06, + "loss": 0.207, + "step": 30549 + }, + { + "epoch": 0.5278891346419685, + "grad_norm": 0.9927103944344533, + "learning_rate": 9.582902305727082e-06, + "loss": 0.4761, + "step": 30550 + }, + { + "epoch": 0.5279064141553774, + "grad_norm": 0.501983149187209, + "learning_rate": 9.582343143871969e-06, + "loss": 0.3672, + "step": 30551 + }, + { + "epoch": 0.5279236936687863, + "grad_norm": 1.2674349507942086, + "learning_rate": 9.58178398332499e-06, + "loss": 0.5018, + "step": 30552 + }, + { + "epoch": 0.5279409731821952, + "grad_norm": 0.9706127933722744, + "learning_rate": 9.581224824087892e-06, + "loss": 0.3241, + "step": 30553 + }, + { + "epoch": 0.5279582526956041, + "grad_norm": 1.1484076306060367, + "learning_rate": 9.580665666162431e-06, + "loss": 0.4356, + "step": 30554 + }, + { + "epoch": 0.5279755322090129, + "grad_norm": 0.9859540383075795, + "learning_rate": 9.580106509550353e-06, + "loss": 0.5084, + "step": 30555 + }, + { + "epoch": 0.5279928117224219, + "grad_norm": 1.007453840874405, + "learning_rate": 9.57954735425342e-06, + "loss": 0.3175, + "step": 30556 + }, + { + "epoch": 0.5280100912358308, + "grad_norm": 0.8171702541273599, + "learning_rate": 9.578988200273371e-06, + "loss": 0.3256, + "step": 30557 + }, + { + "epoch": 0.5280273707492397, + "grad_norm": 1.0516715090250852, + "learning_rate": 9.578429047611968e-06, + "loss": 0.3564, + "step": 30558 + }, + { + "epoch": 0.5280446502626486, + "grad_norm": 1.6987035806164466, + "learning_rate": 9.577869896270954e-06, + "loss": 0.389, + "step": 30559 + }, + { + "epoch": 0.5280619297760575, + "grad_norm": 0.9700123623269196, + "learning_rate": 9.577310746252084e-06, + "loss": 0.3801, + "step": 30560 + }, + { + "epoch": 0.5280792092894664, + "grad_norm": 0.871519899385068, + "learning_rate": 9.576751597557108e-06, + "loss": 0.3561, + "step": 30561 + }, + { + "epoch": 0.5280964888028753, + "grad_norm": 0.4902846684544554, + "learning_rate": 9.57619245018778e-06, + "loss": 0.624, + "step": 30562 + }, + { + "epoch": 0.5281137683162842, + "grad_norm": 0.4742722408113143, + "learning_rate": 9.575633304145852e-06, + "loss": 0.5153, + "step": 30563 + }, + { + "epoch": 0.5281310478296931, + "grad_norm": 1.0773242607089708, + "learning_rate": 9.575074159433068e-06, + "loss": 0.526, + "step": 30564 + }, + { + "epoch": 0.528148327343102, + "grad_norm": 0.8284923974015679, + "learning_rate": 9.574515016051187e-06, + "loss": 0.4364, + "step": 30565 + }, + { + "epoch": 0.5281656068565109, + "grad_norm": 1.2963865652195607, + "learning_rate": 9.573955874001953e-06, + "loss": 0.4462, + "step": 30566 + }, + { + "epoch": 0.5281828863699198, + "grad_norm": 0.7759658850225148, + "learning_rate": 9.573396733287129e-06, + "loss": 0.3921, + "step": 30567 + }, + { + "epoch": 0.5282001658833287, + "grad_norm": 0.9432301624848326, + "learning_rate": 9.572837593908449e-06, + "loss": 0.3761, + "step": 30568 + }, + { + "epoch": 0.5282174453967377, + "grad_norm": 0.7859623248646822, + "learning_rate": 9.572278455867684e-06, + "loss": 0.5532, + "step": 30569 + }, + { + "epoch": 0.5282347249101466, + "grad_norm": 1.106703234262747, + "learning_rate": 9.571719319166569e-06, + "loss": 0.45, + "step": 30570 + }, + { + "epoch": 0.5282520044235555, + "grad_norm": 0.7033876748571086, + "learning_rate": 9.571160183806864e-06, + "loss": 0.8197, + "step": 30571 + }, + { + "epoch": 0.5282692839369644, + "grad_norm": 0.8979161434402724, + "learning_rate": 9.57060104979032e-06, + "loss": 0.5122, + "step": 30572 + }, + { + "epoch": 0.5282865634503733, + "grad_norm": 1.2579801727556752, + "learning_rate": 9.570041917118681e-06, + "loss": 0.4839, + "step": 30573 + }, + { + "epoch": 0.5283038429637822, + "grad_norm": 0.809246188459672, + "learning_rate": 9.569482785793706e-06, + "loss": 0.3134, + "step": 30574 + }, + { + "epoch": 0.5283211224771911, + "grad_norm": 0.9376777624469653, + "learning_rate": 9.568923655817144e-06, + "loss": 0.3227, + "step": 30575 + }, + { + "epoch": 0.5283384019905999, + "grad_norm": 0.9251972768091623, + "learning_rate": 9.568364527190746e-06, + "loss": 0.3309, + "step": 30576 + }, + { + "epoch": 0.5283556815040088, + "grad_norm": 1.0619874274126946, + "learning_rate": 9.56780539991626e-06, + "loss": 0.3147, + "step": 30577 + }, + { + "epoch": 0.5283729610174177, + "grad_norm": 1.0604571133683849, + "learning_rate": 9.567246273995446e-06, + "loss": 0.4067, + "step": 30578 + }, + { + "epoch": 0.5283902405308266, + "grad_norm": 0.8537427747520179, + "learning_rate": 9.566687149430045e-06, + "loss": 0.4218, + "step": 30579 + }, + { + "epoch": 0.5284075200442355, + "grad_norm": 1.0723121083988219, + "learning_rate": 9.566128026221815e-06, + "loss": 0.32, + "step": 30580 + }, + { + "epoch": 0.5284247995576444, + "grad_norm": 1.6321047840237162, + "learning_rate": 9.565568904372502e-06, + "loss": 0.3659, + "step": 30581 + }, + { + "epoch": 0.5284420790710533, + "grad_norm": 1.0810829349068922, + "learning_rate": 9.565009783883863e-06, + "loss": 0.3955, + "step": 30582 + }, + { + "epoch": 0.5284593585844622, + "grad_norm": 1.4897160201899262, + "learning_rate": 9.564450664757648e-06, + "loss": 0.4912, + "step": 30583 + }, + { + "epoch": 0.5284766380978712, + "grad_norm": 0.9975429642784626, + "learning_rate": 9.563891546995604e-06, + "loss": 0.4686, + "step": 30584 + }, + { + "epoch": 0.5284939176112801, + "grad_norm": 1.5911837669585218, + "learning_rate": 9.563332430599486e-06, + "loss": 0.4086, + "step": 30585 + }, + { + "epoch": 0.528511197124689, + "grad_norm": 0.8391324562562613, + "learning_rate": 9.56277331557104e-06, + "loss": 0.4379, + "step": 30586 + }, + { + "epoch": 0.5285284766380979, + "grad_norm": 1.1309459597072928, + "learning_rate": 9.562214201912029e-06, + "loss": 0.4745, + "step": 30587 + }, + { + "epoch": 0.5285457561515068, + "grad_norm": 1.1090613651664818, + "learning_rate": 9.561655089624187e-06, + "loss": 0.4755, + "step": 30588 + }, + { + "epoch": 0.5285630356649157, + "grad_norm": 1.680870523158574, + "learning_rate": 9.561095978709283e-06, + "loss": 0.4185, + "step": 30589 + }, + { + "epoch": 0.5285803151783246, + "grad_norm": 0.8681010452850995, + "learning_rate": 9.560536869169055e-06, + "loss": 0.4148, + "step": 30590 + }, + { + "epoch": 0.5285975946917335, + "grad_norm": 1.178479612774067, + "learning_rate": 9.559977761005261e-06, + "loss": 0.4187, + "step": 30591 + }, + { + "epoch": 0.5286148742051424, + "grad_norm": 0.8791250885590651, + "learning_rate": 9.559418654219647e-06, + "loss": 0.4564, + "step": 30592 + }, + { + "epoch": 0.5286321537185513, + "grad_norm": 1.1507095237849398, + "learning_rate": 9.558859548813972e-06, + "loss": 0.431, + "step": 30593 + }, + { + "epoch": 0.5286494332319602, + "grad_norm": 0.912058964955923, + "learning_rate": 9.55830044478998e-06, + "loss": 0.3691, + "step": 30594 + }, + { + "epoch": 0.5286667127453691, + "grad_norm": 0.9532428782176473, + "learning_rate": 9.557741342149424e-06, + "loss": 0.4219, + "step": 30595 + }, + { + "epoch": 0.528683992258778, + "grad_norm": 0.7711325887726544, + "learning_rate": 9.557182240894056e-06, + "loss": 0.2855, + "step": 30596 + }, + { + "epoch": 0.5287012717721868, + "grad_norm": 1.1356622203851077, + "learning_rate": 9.556623141025626e-06, + "loss": 0.5865, + "step": 30597 + }, + { + "epoch": 0.5287185512855958, + "grad_norm": 1.3256040104041875, + "learning_rate": 9.556064042545889e-06, + "loss": 0.6715, + "step": 30598 + }, + { + "epoch": 0.5287358307990047, + "grad_norm": 1.176117086199783, + "learning_rate": 9.555504945456591e-06, + "loss": 0.5213, + "step": 30599 + }, + { + "epoch": 0.5287531103124136, + "grad_norm": 1.359402243448346, + "learning_rate": 9.554945849759486e-06, + "loss": 0.4347, + "step": 30600 + }, + { + "epoch": 0.5287703898258225, + "grad_norm": 0.9083317751472916, + "learning_rate": 9.554386755456322e-06, + "loss": 0.3731, + "step": 30601 + }, + { + "epoch": 0.5287876693392314, + "grad_norm": 0.8892018143154127, + "learning_rate": 9.553827662548855e-06, + "loss": 0.4085, + "step": 30602 + }, + { + "epoch": 0.5288049488526403, + "grad_norm": 1.1326387126283073, + "learning_rate": 9.553268571038837e-06, + "loss": 0.5455, + "step": 30603 + }, + { + "epoch": 0.5288222283660492, + "grad_norm": 0.9445689480378648, + "learning_rate": 9.55270948092801e-06, + "loss": 0.4449, + "step": 30604 + }, + { + "epoch": 0.5288395078794581, + "grad_norm": 0.7548949707814584, + "learning_rate": 9.552150392218133e-06, + "loss": 0.4582, + "step": 30605 + }, + { + "epoch": 0.528856787392867, + "grad_norm": 1.1598159008102822, + "learning_rate": 9.551591304910953e-06, + "loss": 0.6929, + "step": 30606 + }, + { + "epoch": 0.5288740669062759, + "grad_norm": 0.9360760510387518, + "learning_rate": 9.551032219008229e-06, + "loss": 0.4126, + "step": 30607 + }, + { + "epoch": 0.5288913464196848, + "grad_norm": 1.035209070664199, + "learning_rate": 9.550473134511697e-06, + "loss": 0.521, + "step": 30608 + }, + { + "epoch": 0.5289086259330937, + "grad_norm": 0.8314959496039549, + "learning_rate": 9.549914051423126e-06, + "loss": 0.3709, + "step": 30609 + }, + { + "epoch": 0.5289259054465026, + "grad_norm": 0.9109738829824544, + "learning_rate": 9.549354969744253e-06, + "loss": 0.3884, + "step": 30610 + }, + { + "epoch": 0.5289431849599115, + "grad_norm": 0.692579247666986, + "learning_rate": 9.548795889476837e-06, + "loss": 0.2977, + "step": 30611 + }, + { + "epoch": 0.5289604644733205, + "grad_norm": 1.0715212160956566, + "learning_rate": 9.548236810622624e-06, + "loss": 0.6086, + "step": 30612 + }, + { + "epoch": 0.5289777439867294, + "grad_norm": 0.8175313637106024, + "learning_rate": 9.547677733183373e-06, + "loss": 0.2898, + "step": 30613 + }, + { + "epoch": 0.5289950235001383, + "grad_norm": 0.9782299728439979, + "learning_rate": 9.547118657160827e-06, + "loss": 0.4324, + "step": 30614 + }, + { + "epoch": 0.5290123030135472, + "grad_norm": 1.0823082691314332, + "learning_rate": 9.546559582556737e-06, + "loss": 0.5134, + "step": 30615 + }, + { + "epoch": 0.5290295825269561, + "grad_norm": 0.9852012935340392, + "learning_rate": 9.54600050937286e-06, + "loss": 0.3618, + "step": 30616 + }, + { + "epoch": 0.529046862040365, + "grad_norm": 1.2564673185880506, + "learning_rate": 9.54544143761094e-06, + "loss": 0.466, + "step": 30617 + }, + { + "epoch": 0.5290641415537738, + "grad_norm": 0.8603132444418976, + "learning_rate": 9.544882367272739e-06, + "loss": 0.3394, + "step": 30618 + }, + { + "epoch": 0.5290814210671827, + "grad_norm": 0.9589064544199252, + "learning_rate": 9.544323298359994e-06, + "loss": 0.438, + "step": 30619 + }, + { + "epoch": 0.5290987005805916, + "grad_norm": 0.9426501672943459, + "learning_rate": 9.543764230874468e-06, + "loss": 0.2331, + "step": 30620 + }, + { + "epoch": 0.5291159800940005, + "grad_norm": 1.3560018336761075, + "learning_rate": 9.543205164817903e-06, + "loss": 0.5298, + "step": 30621 + }, + { + "epoch": 0.5291332596074094, + "grad_norm": 0.8594693334926643, + "learning_rate": 9.542646100192056e-06, + "loss": 0.5416, + "step": 30622 + }, + { + "epoch": 0.5291505391208183, + "grad_norm": 1.2087042637049057, + "learning_rate": 9.542087036998674e-06, + "loss": 0.2764, + "step": 30623 + }, + { + "epoch": 0.5291678186342272, + "grad_norm": 1.0403690016605935, + "learning_rate": 9.541527975239516e-06, + "loss": 0.4225, + "step": 30624 + }, + { + "epoch": 0.5291850981476361, + "grad_norm": 0.9402298574107164, + "learning_rate": 9.540968914916324e-06, + "loss": 0.4191, + "step": 30625 + }, + { + "epoch": 0.529202377661045, + "grad_norm": 0.9504342141447606, + "learning_rate": 9.540409856030852e-06, + "loss": 0.3314, + "step": 30626 + }, + { + "epoch": 0.529219657174454, + "grad_norm": 0.8902762400122997, + "learning_rate": 9.539850798584853e-06, + "loss": 0.4923, + "step": 30627 + }, + { + "epoch": 0.5292369366878629, + "grad_norm": 1.0704271271267523, + "learning_rate": 9.53929174258007e-06, + "loss": 0.666, + "step": 30628 + }, + { + "epoch": 0.5292542162012718, + "grad_norm": 1.1702098950212905, + "learning_rate": 9.538732688018268e-06, + "loss": 0.6655, + "step": 30629 + }, + { + "epoch": 0.5292714957146807, + "grad_norm": 0.6636520622046874, + "learning_rate": 9.538173634901185e-06, + "loss": 0.3022, + "step": 30630 + }, + { + "epoch": 0.5292887752280896, + "grad_norm": 1.018391797195398, + "learning_rate": 9.537614583230582e-06, + "loss": 0.4302, + "step": 30631 + }, + { + "epoch": 0.5293060547414985, + "grad_norm": 1.1169308529108184, + "learning_rate": 9.537055533008199e-06, + "loss": 0.5247, + "step": 30632 + }, + { + "epoch": 0.5293233342549074, + "grad_norm": 0.6723910307740137, + "learning_rate": 9.5364964842358e-06, + "loss": 0.3172, + "step": 30633 + }, + { + "epoch": 0.5293406137683163, + "grad_norm": 1.3078309748028005, + "learning_rate": 9.535937436915126e-06, + "loss": 0.6926, + "step": 30634 + }, + { + "epoch": 0.5293578932817252, + "grad_norm": 0.9535390888764829, + "learning_rate": 9.53537839104793e-06, + "loss": 0.4661, + "step": 30635 + }, + { + "epoch": 0.5293751727951341, + "grad_norm": 1.0000342700767706, + "learning_rate": 9.534819346635966e-06, + "loss": 0.3516, + "step": 30636 + }, + { + "epoch": 0.529392452308543, + "grad_norm": 0.7973646016082747, + "learning_rate": 9.534260303680981e-06, + "loss": 0.5202, + "step": 30637 + }, + { + "epoch": 0.529409731821952, + "grad_norm": 1.7200300356094753, + "learning_rate": 9.533701262184733e-06, + "loss": 0.4115, + "step": 30638 + }, + { + "epoch": 0.5294270113353607, + "grad_norm": 1.0797435613340967, + "learning_rate": 9.533142222148963e-06, + "loss": 0.3889, + "step": 30639 + }, + { + "epoch": 0.5294442908487696, + "grad_norm": 1.2838863276754253, + "learning_rate": 9.53258318357543e-06, + "loss": 0.3911, + "step": 30640 + }, + { + "epoch": 0.5294615703621786, + "grad_norm": 1.5557513608720106, + "learning_rate": 9.53202414646588e-06, + "loss": 0.422, + "step": 30641 + }, + { + "epoch": 0.5294788498755875, + "grad_norm": 2.089468598202907, + "learning_rate": 9.531465110822067e-06, + "loss": 0.4849, + "step": 30642 + }, + { + "epoch": 0.5294961293889964, + "grad_norm": 1.0052333999871232, + "learning_rate": 9.530906076645738e-06, + "loss": 0.3443, + "step": 30643 + }, + { + "epoch": 0.5295134089024053, + "grad_norm": 0.6561881643214932, + "learning_rate": 9.530347043938653e-06, + "loss": 0.2786, + "step": 30644 + }, + { + "epoch": 0.5295306884158142, + "grad_norm": 0.8226654185346858, + "learning_rate": 9.529788012702554e-06, + "loss": 0.4575, + "step": 30645 + }, + { + "epoch": 0.5295479679292231, + "grad_norm": 1.2067871804100063, + "learning_rate": 9.529228982939192e-06, + "loss": 0.5273, + "step": 30646 + }, + { + "epoch": 0.529565247442632, + "grad_norm": 0.8234516854326506, + "learning_rate": 9.528669954650326e-06, + "loss": 0.4248, + "step": 30647 + }, + { + "epoch": 0.5295825269560409, + "grad_norm": 0.840261141702653, + "learning_rate": 9.528110927837693e-06, + "loss": 0.3643, + "step": 30648 + }, + { + "epoch": 0.5295998064694498, + "grad_norm": 1.3317500901183537, + "learning_rate": 9.52755190250306e-06, + "loss": 0.4848, + "step": 30649 + }, + { + "epoch": 0.5296170859828587, + "grad_norm": 0.9656780372002962, + "learning_rate": 9.526992878648168e-06, + "loss": 0.3127, + "step": 30650 + }, + { + "epoch": 0.5296343654962676, + "grad_norm": 1.325547723612564, + "learning_rate": 9.52643385627477e-06, + "loss": 0.3997, + "step": 30651 + }, + { + "epoch": 0.5296516450096765, + "grad_norm": 0.8160219789046717, + "learning_rate": 9.525874835384615e-06, + "loss": 0.3926, + "step": 30652 + }, + { + "epoch": 0.5296689245230854, + "grad_norm": 0.901414674188196, + "learning_rate": 9.525315815979462e-06, + "loss": 0.3966, + "step": 30653 + }, + { + "epoch": 0.5296862040364944, + "grad_norm": 0.8905336920565181, + "learning_rate": 9.52475679806105e-06, + "loss": 0.356, + "step": 30654 + }, + { + "epoch": 0.5297034835499033, + "grad_norm": 1.224135662152643, + "learning_rate": 9.52419778163114e-06, + "loss": 0.416, + "step": 30655 + }, + { + "epoch": 0.5297207630633122, + "grad_norm": 0.8525340937727299, + "learning_rate": 9.523638766691476e-06, + "loss": 0.5461, + "step": 30656 + }, + { + "epoch": 0.5297380425767211, + "grad_norm": 0.707201613632091, + "learning_rate": 9.523079753243811e-06, + "loss": 0.3543, + "step": 30657 + }, + { + "epoch": 0.52975532209013, + "grad_norm": 1.0620293045503035, + "learning_rate": 9.5225207412899e-06, + "loss": 0.488, + "step": 30658 + }, + { + "epoch": 0.5297726016035389, + "grad_norm": 1.1046968466384026, + "learning_rate": 9.521961730831487e-06, + "loss": 0.3084, + "step": 30659 + }, + { + "epoch": 0.5297898811169477, + "grad_norm": 1.0535446431259905, + "learning_rate": 9.521402721870328e-06, + "loss": 0.3592, + "step": 30660 + }, + { + "epoch": 0.5298071606303566, + "grad_norm": 1.1492701885535872, + "learning_rate": 9.52084371440817e-06, + "loss": 0.5945, + "step": 30661 + }, + { + "epoch": 0.5298244401437655, + "grad_norm": 0.8003733455362004, + "learning_rate": 9.520284708446767e-06, + "loss": 0.3099, + "step": 30662 + }, + { + "epoch": 0.5298417196571744, + "grad_norm": 1.348994649115105, + "learning_rate": 9.519725703987868e-06, + "loss": 0.399, + "step": 30663 + }, + { + "epoch": 0.5298589991705833, + "grad_norm": 1.1311368438312823, + "learning_rate": 9.51916670103323e-06, + "loss": 0.4196, + "step": 30664 + }, + { + "epoch": 0.5298762786839922, + "grad_norm": 0.595441363162693, + "learning_rate": 9.518607699584594e-06, + "loss": 0.4689, + "step": 30665 + }, + { + "epoch": 0.5298935581974011, + "grad_norm": 0.8428023845722074, + "learning_rate": 9.518048699643713e-06, + "loss": 0.4641, + "step": 30666 + }, + { + "epoch": 0.52991083771081, + "grad_norm": 0.8318243348481261, + "learning_rate": 9.517489701212346e-06, + "loss": 0.2874, + "step": 30667 + }, + { + "epoch": 0.529928117224219, + "grad_norm": 0.8391731711567872, + "learning_rate": 9.51693070429223e-06, + "loss": 0.2696, + "step": 30668 + }, + { + "epoch": 0.5299453967376279, + "grad_norm": 0.9421759013248848, + "learning_rate": 9.516371708885131e-06, + "loss": 0.3734, + "step": 30669 + }, + { + "epoch": 0.5299626762510368, + "grad_norm": 0.9064913366960796, + "learning_rate": 9.515812714992788e-06, + "loss": 0.369, + "step": 30670 + }, + { + "epoch": 0.5299799557644457, + "grad_norm": 1.1885446541446423, + "learning_rate": 9.515253722616958e-06, + "loss": 0.4224, + "step": 30671 + }, + { + "epoch": 0.5299972352778546, + "grad_norm": 0.9881715484409188, + "learning_rate": 9.514694731759389e-06, + "loss": 0.4666, + "step": 30672 + }, + { + "epoch": 0.5300145147912635, + "grad_norm": 1.3740288068659408, + "learning_rate": 9.514135742421837e-06, + "loss": 0.5131, + "step": 30673 + }, + { + "epoch": 0.5300317943046724, + "grad_norm": 0.7276209317559252, + "learning_rate": 9.513576754606044e-06, + "loss": 0.5306, + "step": 30674 + }, + { + "epoch": 0.5300490738180813, + "grad_norm": 1.0431831681694248, + "learning_rate": 9.513017768313768e-06, + "loss": 0.3444, + "step": 30675 + }, + { + "epoch": 0.5300663533314902, + "grad_norm": 0.7283420586448153, + "learning_rate": 9.512458783546758e-06, + "loss": 0.471, + "step": 30676 + }, + { + "epoch": 0.5300836328448991, + "grad_norm": 2.0931662850610464, + "learning_rate": 9.511899800306761e-06, + "loss": 0.4085, + "step": 30677 + }, + { + "epoch": 0.530100912358308, + "grad_norm": 1.6285144500899429, + "learning_rate": 9.511340818595537e-06, + "loss": 0.4497, + "step": 30678 + }, + { + "epoch": 0.5301181918717169, + "grad_norm": 0.9504311986290387, + "learning_rate": 9.510781838414824e-06, + "loss": 0.5226, + "step": 30679 + }, + { + "epoch": 0.5301354713851258, + "grad_norm": 0.8585802792727623, + "learning_rate": 9.510222859766383e-06, + "loss": 0.4484, + "step": 30680 + }, + { + "epoch": 0.5301527508985348, + "grad_norm": 1.3132580223027128, + "learning_rate": 9.509663882651959e-06, + "loss": 0.374, + "step": 30681 + }, + { + "epoch": 0.5301700304119435, + "grad_norm": 0.9049978325765216, + "learning_rate": 9.509104907073307e-06, + "loss": 0.38, + "step": 30682 + }, + { + "epoch": 0.5301873099253525, + "grad_norm": 1.3875281401430144, + "learning_rate": 9.508545933032172e-06, + "loss": 0.5108, + "step": 30683 + }, + { + "epoch": 0.5302045894387614, + "grad_norm": 0.90800031268077, + "learning_rate": 9.507986960530315e-06, + "loss": 0.2808, + "step": 30684 + }, + { + "epoch": 0.5302218689521703, + "grad_norm": 0.9158929742530935, + "learning_rate": 9.507427989569478e-06, + "loss": 0.5238, + "step": 30685 + }, + { + "epoch": 0.5302391484655792, + "grad_norm": 0.7050748120433478, + "learning_rate": 9.50686902015141e-06, + "loss": 0.5228, + "step": 30686 + }, + { + "epoch": 0.5302564279789881, + "grad_norm": 0.9593603708333279, + "learning_rate": 9.506310052277872e-06, + "loss": 0.3991, + "step": 30687 + }, + { + "epoch": 0.530273707492397, + "grad_norm": 1.1507927281365384, + "learning_rate": 9.5057510859506e-06, + "loss": 0.3479, + "step": 30688 + }, + { + "epoch": 0.5302909870058059, + "grad_norm": 1.0308453455030224, + "learning_rate": 9.505192121171361e-06, + "loss": 0.3606, + "step": 30689 + }, + { + "epoch": 0.5303082665192148, + "grad_norm": 0.8343155996581828, + "learning_rate": 9.504633157941894e-06, + "loss": 0.2842, + "step": 30690 + }, + { + "epoch": 0.5303255460326237, + "grad_norm": 1.0996152093037632, + "learning_rate": 9.504074196263954e-06, + "loss": 0.6479, + "step": 30691 + }, + { + "epoch": 0.5303428255460326, + "grad_norm": 0.9932042152623911, + "learning_rate": 9.503515236139292e-06, + "loss": 0.3557, + "step": 30692 + }, + { + "epoch": 0.5303601050594415, + "grad_norm": 0.9532267028366304, + "learning_rate": 9.502956277569661e-06, + "loss": 0.4102, + "step": 30693 + }, + { + "epoch": 0.5303773845728504, + "grad_norm": 0.46050388889693455, + "learning_rate": 9.502397320556802e-06, + "loss": 0.4759, + "step": 30694 + }, + { + "epoch": 0.5303946640862593, + "grad_norm": 0.46689517068172176, + "learning_rate": 9.501838365102478e-06, + "loss": 0.738, + "step": 30695 + }, + { + "epoch": 0.5304119435996683, + "grad_norm": 0.7129961024995632, + "learning_rate": 9.501279411208434e-06, + "loss": 0.3534, + "step": 30696 + }, + { + "epoch": 0.5304292231130772, + "grad_norm": 0.8384592331922008, + "learning_rate": 9.500720458876416e-06, + "loss": 0.5165, + "step": 30697 + }, + { + "epoch": 0.5304465026264861, + "grad_norm": 1.4663086369135183, + "learning_rate": 9.500161508108185e-06, + "loss": 0.5545, + "step": 30698 + }, + { + "epoch": 0.530463782139895, + "grad_norm": 1.7340442433326093, + "learning_rate": 9.499602558905481e-06, + "loss": 0.3317, + "step": 30699 + }, + { + "epoch": 0.5304810616533039, + "grad_norm": 1.256356367822875, + "learning_rate": 9.499043611270064e-06, + "loss": 0.4491, + "step": 30700 + }, + { + "epoch": 0.5304983411667128, + "grad_norm": 1.027746727357793, + "learning_rate": 9.498484665203676e-06, + "loss": 0.5058, + "step": 30701 + }, + { + "epoch": 0.5305156206801217, + "grad_norm": 1.4223050400182373, + "learning_rate": 9.497925720708076e-06, + "loss": 0.5146, + "step": 30702 + }, + { + "epoch": 0.5305329001935305, + "grad_norm": 1.7118163769779162, + "learning_rate": 9.497366777785009e-06, + "loss": 0.5653, + "step": 30703 + }, + { + "epoch": 0.5305501797069394, + "grad_norm": 0.9464941107509525, + "learning_rate": 9.49680783643623e-06, + "loss": 0.5173, + "step": 30704 + }, + { + "epoch": 0.5305674592203483, + "grad_norm": 1.1149345898641057, + "learning_rate": 9.496248896663483e-06, + "loss": 0.4795, + "step": 30705 + }, + { + "epoch": 0.5305847387337572, + "grad_norm": 1.1846238814309238, + "learning_rate": 9.495689958468527e-06, + "loss": 0.4518, + "step": 30706 + }, + { + "epoch": 0.5306020182471661, + "grad_norm": 1.313934986681182, + "learning_rate": 9.49513102185311e-06, + "loss": 0.431, + "step": 30707 + }, + { + "epoch": 0.530619297760575, + "grad_norm": 1.216736935171543, + "learning_rate": 9.494572086818972e-06, + "loss": 0.3576, + "step": 30708 + }, + { + "epoch": 0.530636577273984, + "grad_norm": 1.0054218614035029, + "learning_rate": 9.494013153367881e-06, + "loss": 0.4317, + "step": 30709 + }, + { + "epoch": 0.5306538567873929, + "grad_norm": 0.8992329960114154, + "learning_rate": 9.493454221501574e-06, + "loss": 0.4446, + "step": 30710 + }, + { + "epoch": 0.5306711363008018, + "grad_norm": 1.030556754929545, + "learning_rate": 9.492895291221811e-06, + "loss": 0.4131, + "step": 30711 + }, + { + "epoch": 0.5306884158142107, + "grad_norm": 1.4201691900837408, + "learning_rate": 9.492336362530334e-06, + "loss": 0.4654, + "step": 30712 + }, + { + "epoch": 0.5307056953276196, + "grad_norm": 0.7487237855947226, + "learning_rate": 9.491777435428904e-06, + "loss": 0.3895, + "step": 30713 + }, + { + "epoch": 0.5307229748410285, + "grad_norm": 1.193915662460588, + "learning_rate": 9.491218509919261e-06, + "loss": 0.6187, + "step": 30714 + }, + { + "epoch": 0.5307402543544374, + "grad_norm": 1.196121416704497, + "learning_rate": 9.490659586003163e-06, + "loss": 0.4487, + "step": 30715 + }, + { + "epoch": 0.5307575338678463, + "grad_norm": 1.0961659254803546, + "learning_rate": 9.490100663682357e-06, + "loss": 0.5098, + "step": 30716 + }, + { + "epoch": 0.5307748133812552, + "grad_norm": 1.183905576394466, + "learning_rate": 9.489541742958592e-06, + "loss": 0.4764, + "step": 30717 + }, + { + "epoch": 0.5307920928946641, + "grad_norm": 0.8464854676479561, + "learning_rate": 9.488982823833625e-06, + "loss": 0.6994, + "step": 30718 + }, + { + "epoch": 0.530809372408073, + "grad_norm": 1.038950047471922, + "learning_rate": 9.4884239063092e-06, + "loss": 0.383, + "step": 30719 + }, + { + "epoch": 0.5308266519214819, + "grad_norm": 0.8038361744967285, + "learning_rate": 9.487864990387071e-06, + "loss": 0.4676, + "step": 30720 + }, + { + "epoch": 0.5308439314348908, + "grad_norm": 1.6758471866605507, + "learning_rate": 9.487306076068986e-06, + "loss": 0.4099, + "step": 30721 + }, + { + "epoch": 0.5308612109482997, + "grad_norm": 0.7432215837900549, + "learning_rate": 9.4867471633567e-06, + "loss": 0.2175, + "step": 30722 + }, + { + "epoch": 0.5308784904617087, + "grad_norm": 1.2468748573269108, + "learning_rate": 9.48618825225196e-06, + "loss": 0.3757, + "step": 30723 + }, + { + "epoch": 0.5308957699751174, + "grad_norm": 0.9253962115371086, + "learning_rate": 9.485629342756518e-06, + "loss": 0.4627, + "step": 30724 + }, + { + "epoch": 0.5309130494885264, + "grad_norm": 1.0302653417597245, + "learning_rate": 9.485070434872122e-06, + "loss": 0.4461, + "step": 30725 + }, + { + "epoch": 0.5309303290019353, + "grad_norm": 0.6553969071126597, + "learning_rate": 9.484511528600527e-06, + "loss": 0.7848, + "step": 30726 + }, + { + "epoch": 0.5309476085153442, + "grad_norm": 0.8552454692786132, + "learning_rate": 9.483952623943483e-06, + "loss": 0.5685, + "step": 30727 + }, + { + "epoch": 0.5309648880287531, + "grad_norm": 1.1454015221007658, + "learning_rate": 9.483393720902732e-06, + "loss": 0.347, + "step": 30728 + }, + { + "epoch": 0.530982167542162, + "grad_norm": 0.9357082524403331, + "learning_rate": 9.482834819480038e-06, + "loss": 0.3848, + "step": 30729 + }, + { + "epoch": 0.5309994470555709, + "grad_norm": 0.9330018396736801, + "learning_rate": 9.482275919677138e-06, + "loss": 0.3509, + "step": 30730 + }, + { + "epoch": 0.5310167265689798, + "grad_norm": 1.222478224488949, + "learning_rate": 9.481717021495795e-06, + "loss": 0.4376, + "step": 30731 + }, + { + "epoch": 0.5310340060823887, + "grad_norm": 0.9190663317078266, + "learning_rate": 9.481158124937748e-06, + "loss": 0.5354, + "step": 30732 + }, + { + "epoch": 0.5310512855957976, + "grad_norm": 1.1197647558482957, + "learning_rate": 9.48059923000476e-06, + "loss": 0.4879, + "step": 30733 + }, + { + "epoch": 0.5310685651092065, + "grad_norm": 1.230372783482376, + "learning_rate": 9.48004033669857e-06, + "loss": 0.3884, + "step": 30734 + }, + { + "epoch": 0.5310858446226154, + "grad_norm": 0.6160370172170123, + "learning_rate": 9.479481445020934e-06, + "loss": 0.245, + "step": 30735 + }, + { + "epoch": 0.5311031241360243, + "grad_norm": 1.102252429747214, + "learning_rate": 9.4789225549736e-06, + "loss": 0.4272, + "step": 30736 + }, + { + "epoch": 0.5311204036494332, + "grad_norm": 0.5356346768875224, + "learning_rate": 9.478363666558323e-06, + "loss": 0.6646, + "step": 30737 + }, + { + "epoch": 0.5311376831628422, + "grad_norm": 0.5947767477971584, + "learning_rate": 9.477804779776853e-06, + "loss": 0.3369, + "step": 30738 + }, + { + "epoch": 0.5311549626762511, + "grad_norm": 1.3776112406600511, + "learning_rate": 9.477245894630933e-06, + "loss": 0.4276, + "step": 30739 + }, + { + "epoch": 0.53117224218966, + "grad_norm": 0.7150747794379909, + "learning_rate": 9.476687011122322e-06, + "loss": 0.5446, + "step": 30740 + }, + { + "epoch": 0.5311895217030689, + "grad_norm": 1.2428135376604603, + "learning_rate": 9.476128129252763e-06, + "loss": 0.5113, + "step": 30741 + }, + { + "epoch": 0.5312068012164778, + "grad_norm": 0.8959536972900889, + "learning_rate": 9.475569249024012e-06, + "loss": 0.2701, + "step": 30742 + }, + { + "epoch": 0.5312240807298867, + "grad_norm": 1.5908587146040463, + "learning_rate": 9.475010370437819e-06, + "loss": 0.5181, + "step": 30743 + }, + { + "epoch": 0.5312413602432956, + "grad_norm": 1.0344165311319216, + "learning_rate": 9.474451493495935e-06, + "loss": 0.6616, + "step": 30744 + }, + { + "epoch": 0.5312586397567044, + "grad_norm": 1.1960391103486994, + "learning_rate": 9.473892618200107e-06, + "loss": 0.4022, + "step": 30745 + }, + { + "epoch": 0.5312759192701133, + "grad_norm": 0.8456578119926735, + "learning_rate": 9.473333744552086e-06, + "loss": 0.296, + "step": 30746 + }, + { + "epoch": 0.5312931987835222, + "grad_norm": 1.3770378838953805, + "learning_rate": 9.472774872553629e-06, + "loss": 0.3044, + "step": 30747 + }, + { + "epoch": 0.5313104782969311, + "grad_norm": 0.9110497429690086, + "learning_rate": 9.472216002206473e-06, + "loss": 0.4428, + "step": 30748 + }, + { + "epoch": 0.53132775781034, + "grad_norm": 1.3977894015303458, + "learning_rate": 9.471657133512386e-06, + "loss": 0.682, + "step": 30749 + }, + { + "epoch": 0.5313450373237489, + "grad_norm": 1.0712011193465785, + "learning_rate": 9.4710982664731e-06, + "loss": 0.6502, + "step": 30750 + }, + { + "epoch": 0.5313623168371578, + "grad_norm": 0.8995193216177972, + "learning_rate": 9.47053940109038e-06, + "loss": 0.3003, + "step": 30751 + }, + { + "epoch": 0.5313795963505668, + "grad_norm": 0.4760675683390465, + "learning_rate": 9.469980537365968e-06, + "loss": 0.648, + "step": 30752 + }, + { + "epoch": 0.5313968758639757, + "grad_norm": 1.1474140685543535, + "learning_rate": 9.46942167530162e-06, + "loss": 0.6617, + "step": 30753 + }, + { + "epoch": 0.5314141553773846, + "grad_norm": 0.4488726113080603, + "learning_rate": 9.468862814899083e-06, + "loss": 0.5205, + "step": 30754 + }, + { + "epoch": 0.5314314348907935, + "grad_norm": 0.8957308030672154, + "learning_rate": 9.468303956160107e-06, + "loss": 0.5021, + "step": 30755 + }, + { + "epoch": 0.5314487144042024, + "grad_norm": 1.0106032818438107, + "learning_rate": 9.467745099086442e-06, + "loss": 0.4206, + "step": 30756 + }, + { + "epoch": 0.5314659939176113, + "grad_norm": 0.7703691340258625, + "learning_rate": 9.46718624367984e-06, + "loss": 0.4891, + "step": 30757 + }, + { + "epoch": 0.5314832734310202, + "grad_norm": 0.5061542751973237, + "learning_rate": 9.466627389942057e-06, + "loss": 0.8176, + "step": 30758 + }, + { + "epoch": 0.5315005529444291, + "grad_norm": 0.8763278379731742, + "learning_rate": 9.466068537874832e-06, + "loss": 0.291, + "step": 30759 + }, + { + "epoch": 0.531517832457838, + "grad_norm": 0.9154949143680056, + "learning_rate": 9.465509687479922e-06, + "loss": 0.4216, + "step": 30760 + }, + { + "epoch": 0.5315351119712469, + "grad_norm": 1.363082764561387, + "learning_rate": 9.464950838759075e-06, + "loss": 0.508, + "step": 30761 + }, + { + "epoch": 0.5315523914846558, + "grad_norm": 1.3939633857510203, + "learning_rate": 9.464391991714044e-06, + "loss": 0.6428, + "step": 30762 + }, + { + "epoch": 0.5315696709980647, + "grad_norm": 1.0715122860339599, + "learning_rate": 9.463833146346578e-06, + "loss": 0.5843, + "step": 30763 + }, + { + "epoch": 0.5315869505114736, + "grad_norm": 0.958059495933983, + "learning_rate": 9.46327430265843e-06, + "loss": 0.3901, + "step": 30764 + }, + { + "epoch": 0.5316042300248826, + "grad_norm": 0.873898079560909, + "learning_rate": 9.462715460651343e-06, + "loss": 0.3251, + "step": 30765 + }, + { + "epoch": 0.5316215095382913, + "grad_norm": 1.3650903460710835, + "learning_rate": 9.462156620327075e-06, + "loss": 0.3825, + "step": 30766 + }, + { + "epoch": 0.5316387890517003, + "grad_norm": 0.8640349132063725, + "learning_rate": 9.461597781687369e-06, + "loss": 0.4126, + "step": 30767 + }, + { + "epoch": 0.5316560685651092, + "grad_norm": 0.7876901101882524, + "learning_rate": 9.461038944733985e-06, + "loss": 0.351, + "step": 30768 + }, + { + "epoch": 0.5316733480785181, + "grad_norm": 0.46333306154850695, + "learning_rate": 9.460480109468668e-06, + "loss": 0.639, + "step": 30769 + }, + { + "epoch": 0.531690627591927, + "grad_norm": 0.9381711713701792, + "learning_rate": 9.459921275893164e-06, + "loss": 0.434, + "step": 30770 + }, + { + "epoch": 0.5317079071053359, + "grad_norm": 1.348473276585432, + "learning_rate": 9.459362444009232e-06, + "loss": 0.3887, + "step": 30771 + }, + { + "epoch": 0.5317251866187448, + "grad_norm": 1.10749247832502, + "learning_rate": 9.458803613818614e-06, + "loss": 0.439, + "step": 30772 + }, + { + "epoch": 0.5317424661321537, + "grad_norm": 1.0166063654627597, + "learning_rate": 9.458244785323067e-06, + "loss": 0.4703, + "step": 30773 + }, + { + "epoch": 0.5317597456455626, + "grad_norm": 0.7418526917268282, + "learning_rate": 9.457685958524336e-06, + "loss": 0.5581, + "step": 30774 + }, + { + "epoch": 0.5317770251589715, + "grad_norm": 1.0649336623654744, + "learning_rate": 9.457127133424176e-06, + "loss": 0.4195, + "step": 30775 + }, + { + "epoch": 0.5317943046723804, + "grad_norm": 0.7194620698500938, + "learning_rate": 9.456568310024333e-06, + "loss": 0.2973, + "step": 30776 + }, + { + "epoch": 0.5318115841857893, + "grad_norm": 0.9561450060226894, + "learning_rate": 9.456009488326561e-06, + "loss": 0.5922, + "step": 30777 + }, + { + "epoch": 0.5318288636991982, + "grad_norm": 0.8145652657561753, + "learning_rate": 9.455450668332612e-06, + "loss": 0.4577, + "step": 30778 + }, + { + "epoch": 0.5318461432126071, + "grad_norm": 1.2802388785281904, + "learning_rate": 9.454891850044228e-06, + "loss": 0.425, + "step": 30779 + }, + { + "epoch": 0.531863422726016, + "grad_norm": 0.9852943667753664, + "learning_rate": 9.454333033463165e-06, + "loss": 0.2887, + "step": 30780 + }, + { + "epoch": 0.531880702239425, + "grad_norm": 1.2206940849225922, + "learning_rate": 9.453774218591172e-06, + "loss": 0.3655, + "step": 30781 + }, + { + "epoch": 0.5318979817528339, + "grad_norm": 0.9951516392644243, + "learning_rate": 9.45321540543e-06, + "loss": 0.3633, + "step": 30782 + }, + { + "epoch": 0.5319152612662428, + "grad_norm": 1.0750020550383654, + "learning_rate": 9.452656593981398e-06, + "loss": 0.4328, + "step": 30783 + }, + { + "epoch": 0.5319325407796517, + "grad_norm": 0.642400212510177, + "learning_rate": 9.452097784247122e-06, + "loss": 0.4368, + "step": 30784 + }, + { + "epoch": 0.5319498202930606, + "grad_norm": 0.9542056033797356, + "learning_rate": 9.451538976228912e-06, + "loss": 0.5518, + "step": 30785 + }, + { + "epoch": 0.5319670998064695, + "grad_norm": 1.1791829459600294, + "learning_rate": 9.450980169928526e-06, + "loss": 0.3808, + "step": 30786 + }, + { + "epoch": 0.5319843793198783, + "grad_norm": 0.8909099084423755, + "learning_rate": 9.45042136534771e-06, + "loss": 0.5516, + "step": 30787 + }, + { + "epoch": 0.5320016588332872, + "grad_norm": 1.125649716782642, + "learning_rate": 9.449862562488217e-06, + "loss": 0.5237, + "step": 30788 + }, + { + "epoch": 0.5320189383466961, + "grad_norm": 1.2523598453963214, + "learning_rate": 9.4493037613518e-06, + "loss": 0.5751, + "step": 30789 + }, + { + "epoch": 0.532036217860105, + "grad_norm": 0.9208269334866072, + "learning_rate": 9.4487449619402e-06, + "loss": 0.4366, + "step": 30790 + }, + { + "epoch": 0.5320534973735139, + "grad_norm": 0.8156092896836011, + "learning_rate": 9.448186164255176e-06, + "loss": 0.4298, + "step": 30791 + }, + { + "epoch": 0.5320707768869228, + "grad_norm": 0.8513447825465335, + "learning_rate": 9.447627368298472e-06, + "loss": 0.3895, + "step": 30792 + }, + { + "epoch": 0.5320880564003317, + "grad_norm": 0.8809709544846205, + "learning_rate": 9.447068574071846e-06, + "loss": 0.3879, + "step": 30793 + }, + { + "epoch": 0.5321053359137407, + "grad_norm": 0.9724415899723443, + "learning_rate": 9.446509781577038e-06, + "loss": 0.4438, + "step": 30794 + }, + { + "epoch": 0.5321226154271496, + "grad_norm": 0.4170755284420599, + "learning_rate": 9.445950990815806e-06, + "loss": 0.6275, + "step": 30795 + }, + { + "epoch": 0.5321398949405585, + "grad_norm": 1.2521935642588802, + "learning_rate": 9.445392201789897e-06, + "loss": 0.3694, + "step": 30796 + }, + { + "epoch": 0.5321571744539674, + "grad_norm": 0.9693611173772791, + "learning_rate": 9.444833414501062e-06, + "loss": 0.3361, + "step": 30797 + }, + { + "epoch": 0.5321744539673763, + "grad_norm": 1.675169063137495, + "learning_rate": 9.444274628951048e-06, + "loss": 0.3197, + "step": 30798 + }, + { + "epoch": 0.5321917334807852, + "grad_norm": 0.778664006318266, + "learning_rate": 9.443715845141613e-06, + "loss": 0.4056, + "step": 30799 + }, + { + "epoch": 0.5322090129941941, + "grad_norm": 1.0405743843038944, + "learning_rate": 9.4431570630745e-06, + "loss": 0.3344, + "step": 30800 + }, + { + "epoch": 0.532226292507603, + "grad_norm": 1.0118923852248256, + "learning_rate": 9.442598282751461e-06, + "loss": 0.2825, + "step": 30801 + }, + { + "epoch": 0.5322435720210119, + "grad_norm": 0.9037208651288107, + "learning_rate": 9.442039504174248e-06, + "loss": 0.4672, + "step": 30802 + }, + { + "epoch": 0.5322608515344208, + "grad_norm": 0.6754152665341449, + "learning_rate": 9.441480727344606e-06, + "loss": 0.3257, + "step": 30803 + }, + { + "epoch": 0.5322781310478297, + "grad_norm": 1.1353680482833755, + "learning_rate": 9.440921952264294e-06, + "loss": 0.3232, + "step": 30804 + }, + { + "epoch": 0.5322954105612386, + "grad_norm": 1.3968018488334513, + "learning_rate": 9.440363178935054e-06, + "loss": 0.4973, + "step": 30805 + }, + { + "epoch": 0.5323126900746475, + "grad_norm": 0.8286654328837841, + "learning_rate": 9.439804407358639e-06, + "loss": 0.3167, + "step": 30806 + }, + { + "epoch": 0.5323299695880565, + "grad_norm": 1.4092720943261385, + "learning_rate": 9.439245637536799e-06, + "loss": 0.4803, + "step": 30807 + }, + { + "epoch": 0.5323472491014652, + "grad_norm": 0.87906313211101, + "learning_rate": 9.438686869471287e-06, + "loss": 0.3487, + "step": 30808 + }, + { + "epoch": 0.5323645286148742, + "grad_norm": 1.0683761585731983, + "learning_rate": 9.43812810316385e-06, + "loss": 0.5918, + "step": 30809 + }, + { + "epoch": 0.5323818081282831, + "grad_norm": 0.8257308873638383, + "learning_rate": 9.437569338616235e-06, + "loss": 0.2635, + "step": 30810 + }, + { + "epoch": 0.532399087641692, + "grad_norm": 0.7030782040493028, + "learning_rate": 9.437010575830198e-06, + "loss": 0.6254, + "step": 30811 + }, + { + "epoch": 0.5324163671551009, + "grad_norm": 0.8172617844902523, + "learning_rate": 9.436451814807483e-06, + "loss": 0.3221, + "step": 30812 + }, + { + "epoch": 0.5324336466685098, + "grad_norm": 0.7275812465402997, + "learning_rate": 9.43589305554985e-06, + "loss": 0.3551, + "step": 30813 + }, + { + "epoch": 0.5324509261819187, + "grad_norm": 0.8094473099634257, + "learning_rate": 9.435334298059038e-06, + "loss": 0.3335, + "step": 30814 + }, + { + "epoch": 0.5324682056953276, + "grad_norm": 1.1250155766107173, + "learning_rate": 9.434775542336803e-06, + "loss": 0.3453, + "step": 30815 + }, + { + "epoch": 0.5324854852087365, + "grad_norm": 1.0077443953949063, + "learning_rate": 9.434216788384893e-06, + "loss": 0.3511, + "step": 30816 + }, + { + "epoch": 0.5325027647221454, + "grad_norm": 1.1047645916410538, + "learning_rate": 9.433658036205062e-06, + "loss": 0.2501, + "step": 30817 + }, + { + "epoch": 0.5325200442355543, + "grad_norm": 1.441531696112203, + "learning_rate": 9.433099285799054e-06, + "loss": 0.5074, + "step": 30818 + }, + { + "epoch": 0.5325373237489632, + "grad_norm": 0.7899373900413501, + "learning_rate": 9.432540537168626e-06, + "loss": 0.343, + "step": 30819 + }, + { + "epoch": 0.5325546032623721, + "grad_norm": 0.5866990211434698, + "learning_rate": 9.431981790315523e-06, + "loss": 0.7399, + "step": 30820 + }, + { + "epoch": 0.532571882775781, + "grad_norm": 1.0657586156874883, + "learning_rate": 9.431423045241494e-06, + "loss": 0.6491, + "step": 30821 + }, + { + "epoch": 0.53258916228919, + "grad_norm": 0.9987352259399611, + "learning_rate": 9.430864301948292e-06, + "loss": 0.403, + "step": 30822 + }, + { + "epoch": 0.5326064418025989, + "grad_norm": 0.7936452775411542, + "learning_rate": 9.430305560437664e-06, + "loss": 0.408, + "step": 30823 + }, + { + "epoch": 0.5326237213160078, + "grad_norm": 1.060678542500434, + "learning_rate": 9.42974682071137e-06, + "loss": 0.463, + "step": 30824 + }, + { + "epoch": 0.5326410008294167, + "grad_norm": 1.301767759425192, + "learning_rate": 9.429188082771143e-06, + "loss": 0.2991, + "step": 30825 + }, + { + "epoch": 0.5326582803428256, + "grad_norm": 1.0489556489636618, + "learning_rate": 9.428629346618748e-06, + "loss": 0.3207, + "step": 30826 + }, + { + "epoch": 0.5326755598562345, + "grad_norm": 0.8597249844132514, + "learning_rate": 9.428070612255926e-06, + "loss": 0.5682, + "step": 30827 + }, + { + "epoch": 0.5326928393696434, + "grad_norm": 0.9919767591803095, + "learning_rate": 9.427511879684435e-06, + "loss": 0.4031, + "step": 30828 + }, + { + "epoch": 0.5327101188830523, + "grad_norm": 0.7842773997859036, + "learning_rate": 9.426953148906013e-06, + "loss": 0.5677, + "step": 30829 + }, + { + "epoch": 0.5327273983964611, + "grad_norm": 0.9337049580098594, + "learning_rate": 9.426394419922425e-06, + "loss": 0.5379, + "step": 30830 + }, + { + "epoch": 0.53274467790987, + "grad_norm": 0.7919314808427742, + "learning_rate": 9.42583569273541e-06, + "loss": 0.3465, + "step": 30831 + }, + { + "epoch": 0.5327619574232789, + "grad_norm": 0.9471351412505961, + "learning_rate": 9.425276967346719e-06, + "loss": 0.5629, + "step": 30832 + }, + { + "epoch": 0.5327792369366878, + "grad_norm": 1.2635070674871027, + "learning_rate": 9.42471824375811e-06, + "loss": 0.4342, + "step": 30833 + }, + { + "epoch": 0.5327965164500967, + "grad_norm": 0.6455018224007826, + "learning_rate": 9.424159521971321e-06, + "loss": 0.3121, + "step": 30834 + }, + { + "epoch": 0.5328137959635056, + "grad_norm": 0.8353182716604359, + "learning_rate": 9.42360080198811e-06, + "loss": 0.3193, + "step": 30835 + }, + { + "epoch": 0.5328310754769146, + "grad_norm": 0.9259405475380968, + "learning_rate": 9.423042083810225e-06, + "loss": 0.4828, + "step": 30836 + }, + { + "epoch": 0.5328483549903235, + "grad_norm": 1.678969264565825, + "learning_rate": 9.422483367439418e-06, + "loss": 0.4883, + "step": 30837 + }, + { + "epoch": 0.5328656345037324, + "grad_norm": 0.8796574771293562, + "learning_rate": 9.421924652877435e-06, + "loss": 0.4611, + "step": 30838 + }, + { + "epoch": 0.5328829140171413, + "grad_norm": 0.8326350600774494, + "learning_rate": 9.42136594012603e-06, + "loss": 0.4229, + "step": 30839 + }, + { + "epoch": 0.5329001935305502, + "grad_norm": 0.5465051362957132, + "learning_rate": 9.42080722918695e-06, + "loss": 0.647, + "step": 30840 + }, + { + "epoch": 0.5329174730439591, + "grad_norm": 0.7845542186676322, + "learning_rate": 9.420248520061944e-06, + "loss": 0.3622, + "step": 30841 + }, + { + "epoch": 0.532934752557368, + "grad_norm": 1.5608187136183695, + "learning_rate": 9.419689812752765e-06, + "loss": 0.5011, + "step": 30842 + }, + { + "epoch": 0.5329520320707769, + "grad_norm": 1.3508517208715671, + "learning_rate": 9.41913110726116e-06, + "loss": 0.4362, + "step": 30843 + }, + { + "epoch": 0.5329693115841858, + "grad_norm": 0.8524537144070391, + "learning_rate": 9.418572403588885e-06, + "loss": 0.4008, + "step": 30844 + }, + { + "epoch": 0.5329865910975947, + "grad_norm": 1.2322456151322798, + "learning_rate": 9.418013701737682e-06, + "loss": 0.3951, + "step": 30845 + }, + { + "epoch": 0.5330038706110036, + "grad_norm": 0.5680063285213099, + "learning_rate": 9.417455001709305e-06, + "loss": 0.2655, + "step": 30846 + }, + { + "epoch": 0.5330211501244125, + "grad_norm": 0.9758879668306342, + "learning_rate": 9.416896303505502e-06, + "loss": 0.4605, + "step": 30847 + }, + { + "epoch": 0.5330384296378214, + "grad_norm": 1.2654242499500388, + "learning_rate": 9.416337607128027e-06, + "loss": 0.4888, + "step": 30848 + }, + { + "epoch": 0.5330557091512304, + "grad_norm": 1.0605154776668562, + "learning_rate": 9.415778912578622e-06, + "loss": 0.4548, + "step": 30849 + }, + { + "epoch": 0.5330729886646393, + "grad_norm": 1.3861260250937724, + "learning_rate": 9.415220219859047e-06, + "loss": 0.4193, + "step": 30850 + }, + { + "epoch": 0.533090268178048, + "grad_norm": 1.3814699822784555, + "learning_rate": 9.414661528971045e-06, + "loss": 0.314, + "step": 30851 + }, + { + "epoch": 0.533107547691457, + "grad_norm": 1.064999372537385, + "learning_rate": 9.414102839916365e-06, + "loss": 0.4235, + "step": 30852 + }, + { + "epoch": 0.5331248272048659, + "grad_norm": 1.0100670725207608, + "learning_rate": 9.413544152696765e-06, + "loss": 0.5716, + "step": 30853 + }, + { + "epoch": 0.5331421067182748, + "grad_norm": 0.6939876166097325, + "learning_rate": 9.412985467313986e-06, + "loss": 0.5388, + "step": 30854 + }, + { + "epoch": 0.5331593862316837, + "grad_norm": 1.3416616745712482, + "learning_rate": 9.41242678376978e-06, + "loss": 0.4569, + "step": 30855 + }, + { + "epoch": 0.5331766657450926, + "grad_norm": 0.7902370060631391, + "learning_rate": 9.411868102065898e-06, + "loss": 0.4058, + "step": 30856 + }, + { + "epoch": 0.5331939452585015, + "grad_norm": 1.3182195354387432, + "learning_rate": 9.411309422204091e-06, + "loss": 0.5035, + "step": 30857 + }, + { + "epoch": 0.5332112247719104, + "grad_norm": 0.9028907846998099, + "learning_rate": 9.410750744186106e-06, + "loss": 0.479, + "step": 30858 + }, + { + "epoch": 0.5332285042853193, + "grad_norm": 0.9654155133727177, + "learning_rate": 9.4101920680137e-06, + "loss": 0.3902, + "step": 30859 + }, + { + "epoch": 0.5332457837987282, + "grad_norm": 0.6329502869765854, + "learning_rate": 9.40963339368861e-06, + "loss": 0.6617, + "step": 30860 + }, + { + "epoch": 0.5332630633121371, + "grad_norm": 0.9835208628324861, + "learning_rate": 9.409074721212598e-06, + "loss": 0.2701, + "step": 30861 + }, + { + "epoch": 0.533280342825546, + "grad_norm": 1.1677561998602293, + "learning_rate": 9.408516050587407e-06, + "loss": 0.384, + "step": 30862 + }, + { + "epoch": 0.533297622338955, + "grad_norm": 0.8676240613026508, + "learning_rate": 9.407957381814788e-06, + "loss": 0.2946, + "step": 30863 + }, + { + "epoch": 0.5333149018523639, + "grad_norm": 1.0465335601444605, + "learning_rate": 9.407398714896494e-06, + "loss": 0.4057, + "step": 30864 + }, + { + "epoch": 0.5333321813657728, + "grad_norm": 1.0898012531562444, + "learning_rate": 9.40684004983427e-06, + "loss": 0.5994, + "step": 30865 + }, + { + "epoch": 0.5333494608791817, + "grad_norm": 0.889459458616471, + "learning_rate": 9.406281386629869e-06, + "loss": 0.5613, + "step": 30866 + }, + { + "epoch": 0.5333667403925906, + "grad_norm": 0.9792124921050454, + "learning_rate": 9.405722725285036e-06, + "loss": 0.2854, + "step": 30867 + }, + { + "epoch": 0.5333840199059995, + "grad_norm": 0.6500631585616781, + "learning_rate": 9.405164065801532e-06, + "loss": 0.5612, + "step": 30868 + }, + { + "epoch": 0.5334012994194084, + "grad_norm": 0.8381954068763104, + "learning_rate": 9.404605408181091e-06, + "loss": 0.3894, + "step": 30869 + }, + { + "epoch": 0.5334185789328173, + "grad_norm": 0.6056904945019731, + "learning_rate": 9.404046752425478e-06, + "loss": 0.2041, + "step": 30870 + }, + { + "epoch": 0.5334358584462262, + "grad_norm": 1.0807920412388492, + "learning_rate": 9.403488098536435e-06, + "loss": 0.3096, + "step": 30871 + }, + { + "epoch": 0.533453137959635, + "grad_norm": 0.9585782803222869, + "learning_rate": 9.402929446515709e-06, + "loss": 0.4723, + "step": 30872 + }, + { + "epoch": 0.5334704174730439, + "grad_norm": 0.49952393575922216, + "learning_rate": 9.402370796365057e-06, + "loss": 0.6573, + "step": 30873 + }, + { + "epoch": 0.5334876969864528, + "grad_norm": 0.8124704789158377, + "learning_rate": 9.401812148086223e-06, + "loss": 0.3292, + "step": 30874 + }, + { + "epoch": 0.5335049764998617, + "grad_norm": 1.0561850827448866, + "learning_rate": 9.401253501680961e-06, + "loss": 0.4942, + "step": 30875 + }, + { + "epoch": 0.5335222560132706, + "grad_norm": 0.6879646579702535, + "learning_rate": 9.400694857151013e-06, + "loss": 0.7142, + "step": 30876 + }, + { + "epoch": 0.5335395355266795, + "grad_norm": 1.493740476847148, + "learning_rate": 9.40013621449814e-06, + "loss": 0.5114, + "step": 30877 + }, + { + "epoch": 0.5335568150400885, + "grad_norm": 1.2582479439174512, + "learning_rate": 9.399577573724081e-06, + "loss": 0.6844, + "step": 30878 + }, + { + "epoch": 0.5335740945534974, + "grad_norm": 1.0039076198610626, + "learning_rate": 9.399018934830598e-06, + "loss": 0.3505, + "step": 30879 + }, + { + "epoch": 0.5335913740669063, + "grad_norm": 0.9033118616140775, + "learning_rate": 9.398460297819426e-06, + "loss": 0.4375, + "step": 30880 + }, + { + "epoch": 0.5336086535803152, + "grad_norm": 0.765877788134009, + "learning_rate": 9.397901662692326e-06, + "loss": 0.3124, + "step": 30881 + }, + { + "epoch": 0.5336259330937241, + "grad_norm": 1.0299569503942014, + "learning_rate": 9.397343029451044e-06, + "loss": 0.4919, + "step": 30882 + }, + { + "epoch": 0.533643212607133, + "grad_norm": 0.8462506657083725, + "learning_rate": 9.396784398097326e-06, + "loss": 0.4205, + "step": 30883 + }, + { + "epoch": 0.5336604921205419, + "grad_norm": 1.596381319059638, + "learning_rate": 9.396225768632931e-06, + "loss": 0.4127, + "step": 30884 + }, + { + "epoch": 0.5336777716339508, + "grad_norm": 0.7528511569996239, + "learning_rate": 9.395667141059597e-06, + "loss": 0.2946, + "step": 30885 + }, + { + "epoch": 0.5336950511473597, + "grad_norm": 1.5695616890978965, + "learning_rate": 9.395108515379082e-06, + "loss": 0.3581, + "step": 30886 + }, + { + "epoch": 0.5337123306607686, + "grad_norm": 1.0932760890078612, + "learning_rate": 9.394549891593132e-06, + "loss": 0.4468, + "step": 30887 + }, + { + "epoch": 0.5337296101741775, + "grad_norm": 1.6178103999845623, + "learning_rate": 9.3939912697035e-06, + "loss": 0.3682, + "step": 30888 + }, + { + "epoch": 0.5337468896875864, + "grad_norm": 1.1219462923718426, + "learning_rate": 9.39343264971193e-06, + "loss": 0.5509, + "step": 30889 + }, + { + "epoch": 0.5337641692009953, + "grad_norm": 1.10142596453051, + "learning_rate": 9.392874031620179e-06, + "loss": 0.4043, + "step": 30890 + }, + { + "epoch": 0.5337814487144042, + "grad_norm": 0.9476494918044559, + "learning_rate": 9.39231541542999e-06, + "loss": 0.4692, + "step": 30891 + }, + { + "epoch": 0.5337987282278132, + "grad_norm": 1.6736451524385885, + "learning_rate": 9.391756801143113e-06, + "loss": 0.3633, + "step": 30892 + }, + { + "epoch": 0.533816007741222, + "grad_norm": 1.104488284992728, + "learning_rate": 9.391198188761306e-06, + "loss": 0.392, + "step": 30893 + }, + { + "epoch": 0.5338332872546309, + "grad_norm": 1.125256061358932, + "learning_rate": 9.390639578286308e-06, + "loss": 0.4195, + "step": 30894 + }, + { + "epoch": 0.5338505667680398, + "grad_norm": 1.128091599592537, + "learning_rate": 9.390080969719876e-06, + "loss": 0.5395, + "step": 30895 + }, + { + "epoch": 0.5338678462814487, + "grad_norm": 1.3927149594945005, + "learning_rate": 9.389522363063753e-06, + "loss": 0.3788, + "step": 30896 + }, + { + "epoch": 0.5338851257948576, + "grad_norm": 0.9312421016822434, + "learning_rate": 9.388963758319695e-06, + "loss": 0.3814, + "step": 30897 + }, + { + "epoch": 0.5339024053082665, + "grad_norm": 1.0311004348181267, + "learning_rate": 9.388405155489447e-06, + "loss": 0.412, + "step": 30898 + }, + { + "epoch": 0.5339196848216754, + "grad_norm": 1.4652673123891382, + "learning_rate": 9.387846554574764e-06, + "loss": 0.3226, + "step": 30899 + }, + { + "epoch": 0.5339369643350843, + "grad_norm": 0.8976073760161554, + "learning_rate": 9.387287955577389e-06, + "loss": 0.3551, + "step": 30900 + }, + { + "epoch": 0.5339542438484932, + "grad_norm": 0.9441750852902459, + "learning_rate": 9.386729358499077e-06, + "loss": 0.4177, + "step": 30901 + }, + { + "epoch": 0.5339715233619021, + "grad_norm": 1.3974101875313552, + "learning_rate": 9.386170763341574e-06, + "loss": 0.2005, + "step": 30902 + }, + { + "epoch": 0.533988802875311, + "grad_norm": 1.1961209881718737, + "learning_rate": 9.38561217010663e-06, + "loss": 0.4883, + "step": 30903 + }, + { + "epoch": 0.5340060823887199, + "grad_norm": 1.0449379139143495, + "learning_rate": 9.385053578795998e-06, + "loss": 0.3895, + "step": 30904 + }, + { + "epoch": 0.5340233619021288, + "grad_norm": 0.9830494547331552, + "learning_rate": 9.384494989411422e-06, + "loss": 0.5254, + "step": 30905 + }, + { + "epoch": 0.5340406414155378, + "grad_norm": 1.703839693271531, + "learning_rate": 9.383936401954656e-06, + "loss": 0.6006, + "step": 30906 + }, + { + "epoch": 0.5340579209289467, + "grad_norm": 1.1448568661580665, + "learning_rate": 9.383377816427447e-06, + "loss": 0.5443, + "step": 30907 + }, + { + "epoch": 0.5340752004423556, + "grad_norm": 1.253101111637949, + "learning_rate": 9.382819232831549e-06, + "loss": 0.3504, + "step": 30908 + }, + { + "epoch": 0.5340924799557645, + "grad_norm": 1.097690801598019, + "learning_rate": 9.382260651168701e-06, + "loss": 0.3979, + "step": 30909 + }, + { + "epoch": 0.5341097594691734, + "grad_norm": 0.6923464885226656, + "learning_rate": 9.381702071440667e-06, + "loss": 0.5523, + "step": 30910 + }, + { + "epoch": 0.5341270389825823, + "grad_norm": 0.8215881853745228, + "learning_rate": 9.381143493649184e-06, + "loss": 0.5641, + "step": 30911 + }, + { + "epoch": 0.5341443184959912, + "grad_norm": 0.9260980010813834, + "learning_rate": 9.38058491779601e-06, + "loss": 0.4223, + "step": 30912 + }, + { + "epoch": 0.5341615980094001, + "grad_norm": 1.2031573461275158, + "learning_rate": 9.380026343882892e-06, + "loss": 0.4536, + "step": 30913 + }, + { + "epoch": 0.5341788775228089, + "grad_norm": 1.0499641040052992, + "learning_rate": 9.379467771911574e-06, + "loss": 0.3958, + "step": 30914 + }, + { + "epoch": 0.5341961570362178, + "grad_norm": 0.8784659200649074, + "learning_rate": 9.378909201883814e-06, + "loss": 0.4958, + "step": 30915 + }, + { + "epoch": 0.5342134365496267, + "grad_norm": 1.5735586792704552, + "learning_rate": 9.378350633801354e-06, + "loss": 0.4978, + "step": 30916 + }, + { + "epoch": 0.5342307160630356, + "grad_norm": 0.8366418640359026, + "learning_rate": 9.377792067665949e-06, + "loss": 0.7611, + "step": 30917 + }, + { + "epoch": 0.5342479955764445, + "grad_norm": 1.2890435592119562, + "learning_rate": 9.377233503479344e-06, + "loss": 0.6453, + "step": 30918 + }, + { + "epoch": 0.5342652750898534, + "grad_norm": 0.9874250096463625, + "learning_rate": 9.376674941243297e-06, + "loss": 0.3658, + "step": 30919 + }, + { + "epoch": 0.5342825546032623, + "grad_norm": 1.2679941756793396, + "learning_rate": 9.376116380959546e-06, + "loss": 0.3787, + "step": 30920 + }, + { + "epoch": 0.5342998341166713, + "grad_norm": 1.291782951782973, + "learning_rate": 9.37555782262985e-06, + "loss": 0.4554, + "step": 30921 + }, + { + "epoch": 0.5343171136300802, + "grad_norm": 0.9514259264582398, + "learning_rate": 9.374999266255954e-06, + "loss": 0.2797, + "step": 30922 + }, + { + "epoch": 0.5343343931434891, + "grad_norm": 1.3946367675353533, + "learning_rate": 9.374440711839603e-06, + "loss": 0.2706, + "step": 30923 + }, + { + "epoch": 0.534351672656898, + "grad_norm": 1.38236063884746, + "learning_rate": 9.373882159382557e-06, + "loss": 0.4866, + "step": 30924 + }, + { + "epoch": 0.5343689521703069, + "grad_norm": 0.8260287200647857, + "learning_rate": 9.373323608886556e-06, + "loss": 0.3564, + "step": 30925 + }, + { + "epoch": 0.5343862316837158, + "grad_norm": 1.2996027370742531, + "learning_rate": 9.372765060353354e-06, + "loss": 0.4284, + "step": 30926 + }, + { + "epoch": 0.5344035111971247, + "grad_norm": 0.7446395873547009, + "learning_rate": 9.372206513784698e-06, + "loss": 0.4879, + "step": 30927 + }, + { + "epoch": 0.5344207907105336, + "grad_norm": 1.18958019632436, + "learning_rate": 9.371647969182343e-06, + "loss": 0.4974, + "step": 30928 + }, + { + "epoch": 0.5344380702239425, + "grad_norm": 1.2368457542612799, + "learning_rate": 9.371089426548027e-06, + "loss": 0.4698, + "step": 30929 + }, + { + "epoch": 0.5344553497373514, + "grad_norm": 0.7373504776367606, + "learning_rate": 9.370530885883516e-06, + "loss": 0.244, + "step": 30930 + }, + { + "epoch": 0.5344726292507603, + "grad_norm": 0.7443947530908699, + "learning_rate": 9.369972347190543e-06, + "loss": 0.4857, + "step": 30931 + }, + { + "epoch": 0.5344899087641692, + "grad_norm": 0.8971677161506141, + "learning_rate": 9.369413810470868e-06, + "loss": 0.5032, + "step": 30932 + }, + { + "epoch": 0.5345071882775781, + "grad_norm": 0.8847444098301832, + "learning_rate": 9.368855275726239e-06, + "loss": 0.4355, + "step": 30933 + }, + { + "epoch": 0.5345244677909871, + "grad_norm": 0.9128077854451356, + "learning_rate": 9.368296742958397e-06, + "loss": 0.5114, + "step": 30934 + }, + { + "epoch": 0.5345417473043959, + "grad_norm": 0.5769824991123059, + "learning_rate": 9.367738212169103e-06, + "loss": 0.6879, + "step": 30935 + }, + { + "epoch": 0.5345590268178048, + "grad_norm": 0.9759646386208393, + "learning_rate": 9.367179683360097e-06, + "loss": 0.3491, + "step": 30936 + }, + { + "epoch": 0.5345763063312137, + "grad_norm": 0.7237002435319208, + "learning_rate": 9.366621156533134e-06, + "loss": 0.3043, + "step": 30937 + }, + { + "epoch": 0.5345935858446226, + "grad_norm": 1.6483977096511795, + "learning_rate": 9.366062631689961e-06, + "loss": 0.3086, + "step": 30938 + }, + { + "epoch": 0.5346108653580315, + "grad_norm": 0.8450645853281618, + "learning_rate": 9.36550410883233e-06, + "loss": 0.3838, + "step": 30939 + }, + { + "epoch": 0.5346281448714404, + "grad_norm": 1.0679886321997927, + "learning_rate": 9.364945587961987e-06, + "loss": 0.4188, + "step": 30940 + }, + { + "epoch": 0.5346454243848493, + "grad_norm": 1.3005412263864065, + "learning_rate": 9.364387069080683e-06, + "loss": 0.4458, + "step": 30941 + }, + { + "epoch": 0.5346627038982582, + "grad_norm": 1.3975760491280391, + "learning_rate": 9.363828552190165e-06, + "loss": 0.4747, + "step": 30942 + }, + { + "epoch": 0.5346799834116671, + "grad_norm": 0.6547106499383537, + "learning_rate": 9.363270037292188e-06, + "loss": 0.3619, + "step": 30943 + }, + { + "epoch": 0.534697262925076, + "grad_norm": 0.9965164865944703, + "learning_rate": 9.362711524388497e-06, + "loss": 0.327, + "step": 30944 + }, + { + "epoch": 0.5347145424384849, + "grad_norm": 1.6775344056581836, + "learning_rate": 9.362153013480839e-06, + "loss": 0.3837, + "step": 30945 + }, + { + "epoch": 0.5347318219518938, + "grad_norm": 1.3739885160771406, + "learning_rate": 9.36159450457097e-06, + "loss": 0.3894, + "step": 30946 + }, + { + "epoch": 0.5347491014653027, + "grad_norm": 1.246202001821239, + "learning_rate": 9.361035997660631e-06, + "loss": 0.5879, + "step": 30947 + }, + { + "epoch": 0.5347663809787117, + "grad_norm": 1.0797443819591768, + "learning_rate": 9.360477492751581e-06, + "loss": 0.3226, + "step": 30948 + }, + { + "epoch": 0.5347836604921206, + "grad_norm": 0.9404000449272153, + "learning_rate": 9.359918989845558e-06, + "loss": 0.3624, + "step": 30949 + }, + { + "epoch": 0.5348009400055295, + "grad_norm": 1.6096299614354475, + "learning_rate": 9.359360488944324e-06, + "loss": 0.4461, + "step": 30950 + }, + { + "epoch": 0.5348182195189384, + "grad_norm": 1.093734594370865, + "learning_rate": 9.358801990049616e-06, + "loss": 0.4729, + "step": 30951 + }, + { + "epoch": 0.5348354990323473, + "grad_norm": 1.0694961595231565, + "learning_rate": 9.35824349316319e-06, + "loss": 0.2904, + "step": 30952 + }, + { + "epoch": 0.5348527785457562, + "grad_norm": 0.8364977280260909, + "learning_rate": 9.3576849982868e-06, + "loss": 0.5198, + "step": 30953 + }, + { + "epoch": 0.5348700580591651, + "grad_norm": 0.5929775875717334, + "learning_rate": 9.357126505422183e-06, + "loss": 0.2505, + "step": 30954 + }, + { + "epoch": 0.534887337572574, + "grad_norm": 1.2993901992710304, + "learning_rate": 9.356568014571096e-06, + "loss": 0.3218, + "step": 30955 + }, + { + "epoch": 0.5349046170859829, + "grad_norm": 1.2338713573703557, + "learning_rate": 9.356009525735286e-06, + "loss": 0.465, + "step": 30956 + }, + { + "epoch": 0.5349218965993917, + "grad_norm": 0.7280671350420468, + "learning_rate": 9.355451038916507e-06, + "loss": 0.7684, + "step": 30957 + }, + { + "epoch": 0.5349391761128006, + "grad_norm": 0.928380283129053, + "learning_rate": 9.3548925541165e-06, + "loss": 0.4568, + "step": 30958 + }, + { + "epoch": 0.5349564556262095, + "grad_norm": 1.0893687896033462, + "learning_rate": 9.354334071337023e-06, + "loss": 0.5711, + "step": 30959 + }, + { + "epoch": 0.5349737351396184, + "grad_norm": 1.220033640532708, + "learning_rate": 9.353775590579816e-06, + "loss": 0.6712, + "step": 30960 + }, + { + "epoch": 0.5349910146530273, + "grad_norm": 0.8141172837370214, + "learning_rate": 9.353217111846635e-06, + "loss": 0.3831, + "step": 30961 + }, + { + "epoch": 0.5350082941664362, + "grad_norm": 0.946822708912921, + "learning_rate": 9.352658635139225e-06, + "loss": 0.5874, + "step": 30962 + }, + { + "epoch": 0.5350255736798452, + "grad_norm": 1.2086743565210418, + "learning_rate": 9.352100160459341e-06, + "loss": 0.4685, + "step": 30963 + }, + { + "epoch": 0.5350428531932541, + "grad_norm": 1.4590848228735585, + "learning_rate": 9.351541687808731e-06, + "loss": 0.4818, + "step": 30964 + }, + { + "epoch": 0.535060132706663, + "grad_norm": 1.4420663799663644, + "learning_rate": 9.350983217189135e-06, + "loss": 0.4536, + "step": 30965 + }, + { + "epoch": 0.5350774122200719, + "grad_norm": 1.6993168100680338, + "learning_rate": 9.350424748602313e-06, + "loss": 0.5214, + "step": 30966 + }, + { + "epoch": 0.5350946917334808, + "grad_norm": 1.2922369284590252, + "learning_rate": 9.349866282050007e-06, + "loss": 0.6343, + "step": 30967 + }, + { + "epoch": 0.5351119712468897, + "grad_norm": 0.7069889600682522, + "learning_rate": 9.349307817533974e-06, + "loss": 0.2239, + "step": 30968 + }, + { + "epoch": 0.5351292507602986, + "grad_norm": 0.8965731221012376, + "learning_rate": 9.34874935505595e-06, + "loss": 0.3587, + "step": 30969 + }, + { + "epoch": 0.5351465302737075, + "grad_norm": 0.5429563107338929, + "learning_rate": 9.348190894617702e-06, + "loss": 0.5528, + "step": 30970 + }, + { + "epoch": 0.5351638097871164, + "grad_norm": 1.3606863934228504, + "learning_rate": 9.347632436220964e-06, + "loss": 0.5456, + "step": 30971 + }, + { + "epoch": 0.5351810893005253, + "grad_norm": 1.34251403502933, + "learning_rate": 9.347073979867492e-06, + "loss": 0.5254, + "step": 30972 + }, + { + "epoch": 0.5351983688139342, + "grad_norm": 1.2154774651331264, + "learning_rate": 9.346515525559033e-06, + "loss": 0.5765, + "step": 30973 + }, + { + "epoch": 0.5352156483273431, + "grad_norm": 1.3611319689231105, + "learning_rate": 9.345957073297341e-06, + "loss": 0.5362, + "step": 30974 + }, + { + "epoch": 0.535232927840752, + "grad_norm": 1.0245773772072175, + "learning_rate": 9.345398623084158e-06, + "loss": 0.2101, + "step": 30975 + }, + { + "epoch": 0.535250207354161, + "grad_norm": 1.0779826188890316, + "learning_rate": 9.344840174921237e-06, + "loss": 0.2679, + "step": 30976 + }, + { + "epoch": 0.5352674868675699, + "grad_norm": 1.2248643135990165, + "learning_rate": 9.344281728810325e-06, + "loss": 0.2916, + "step": 30977 + }, + { + "epoch": 0.5352847663809787, + "grad_norm": 0.8646168943498881, + "learning_rate": 9.343723284753173e-06, + "loss": 0.3798, + "step": 30978 + }, + { + "epoch": 0.5353020458943876, + "grad_norm": 0.8334314895352822, + "learning_rate": 9.343164842751532e-06, + "loss": 0.5369, + "step": 30979 + }, + { + "epoch": 0.5353193254077965, + "grad_norm": 0.8944456367395983, + "learning_rate": 9.342606402807146e-06, + "loss": 0.2516, + "step": 30980 + }, + { + "epoch": 0.5353366049212054, + "grad_norm": 0.8417185369226726, + "learning_rate": 9.342047964921768e-06, + "loss": 0.3599, + "step": 30981 + }, + { + "epoch": 0.5353538844346143, + "grad_norm": 1.0532346398352264, + "learning_rate": 9.341489529097144e-06, + "loss": 0.2791, + "step": 30982 + }, + { + "epoch": 0.5353711639480232, + "grad_norm": 1.0779157702427515, + "learning_rate": 9.340931095335026e-06, + "loss": 0.4224, + "step": 30983 + }, + { + "epoch": 0.5353884434614321, + "grad_norm": 0.47622099550874825, + "learning_rate": 9.340372663637164e-06, + "loss": 0.5396, + "step": 30984 + }, + { + "epoch": 0.535405722974841, + "grad_norm": 1.0074905021945058, + "learning_rate": 9.339814234005301e-06, + "loss": 0.4545, + "step": 30985 + }, + { + "epoch": 0.5354230024882499, + "grad_norm": 1.2163405237575027, + "learning_rate": 9.339255806441191e-06, + "loss": 0.3312, + "step": 30986 + }, + { + "epoch": 0.5354402820016588, + "grad_norm": 1.2965458365516271, + "learning_rate": 9.338697380946582e-06, + "loss": 0.3738, + "step": 30987 + }, + { + "epoch": 0.5354575615150677, + "grad_norm": 0.8812319657591017, + "learning_rate": 9.338138957523227e-06, + "loss": 0.5497, + "step": 30988 + }, + { + "epoch": 0.5354748410284766, + "grad_norm": 0.826896366779384, + "learning_rate": 9.337580536172864e-06, + "loss": 0.4242, + "step": 30989 + }, + { + "epoch": 0.5354921205418856, + "grad_norm": 0.8627501014026498, + "learning_rate": 9.337022116897256e-06, + "loss": 0.4198, + "step": 30990 + }, + { + "epoch": 0.5355094000552945, + "grad_norm": 1.050799217138796, + "learning_rate": 9.336463699698142e-06, + "loss": 0.5612, + "step": 30991 + }, + { + "epoch": 0.5355266795687034, + "grad_norm": 1.3000743111560886, + "learning_rate": 9.335905284577273e-06, + "loss": 0.4664, + "step": 30992 + }, + { + "epoch": 0.5355439590821123, + "grad_norm": 0.8178176054247338, + "learning_rate": 9.335346871536399e-06, + "loss": 0.4502, + "step": 30993 + }, + { + "epoch": 0.5355612385955212, + "grad_norm": 0.836081723263767, + "learning_rate": 9.334788460577275e-06, + "loss": 0.5481, + "step": 30994 + }, + { + "epoch": 0.5355785181089301, + "grad_norm": 1.0118838386780873, + "learning_rate": 9.334230051701639e-06, + "loss": 0.4353, + "step": 30995 + }, + { + "epoch": 0.535595797622339, + "grad_norm": 1.09120845983211, + "learning_rate": 9.333671644911244e-06, + "loss": 0.4361, + "step": 30996 + }, + { + "epoch": 0.5356130771357479, + "grad_norm": 0.9928755271693529, + "learning_rate": 9.333113240207843e-06, + "loss": 0.7528, + "step": 30997 + }, + { + "epoch": 0.5356303566491568, + "grad_norm": 0.9352572317435696, + "learning_rate": 9.33255483759318e-06, + "loss": 0.4758, + "step": 30998 + }, + { + "epoch": 0.5356476361625656, + "grad_norm": 0.8057255729039527, + "learning_rate": 9.331996437069008e-06, + "loss": 0.3437, + "step": 30999 + }, + { + "epoch": 0.5356649156759745, + "grad_norm": 0.5845603197092252, + "learning_rate": 9.331438038637072e-06, + "loss": 0.5018, + "step": 31000 + }, + { + "epoch": 0.5356821951893834, + "grad_norm": 1.1677051006665422, + "learning_rate": 9.330879642299124e-06, + "loss": 0.2969, + "step": 31001 + }, + { + "epoch": 0.5356994747027923, + "grad_norm": 0.8900846035696954, + "learning_rate": 9.330321248056909e-06, + "loss": 0.3208, + "step": 31002 + }, + { + "epoch": 0.5357167542162012, + "grad_norm": 0.9529716404651031, + "learning_rate": 9.329762855912183e-06, + "loss": 0.3905, + "step": 31003 + }, + { + "epoch": 0.5357340337296101, + "grad_norm": 1.104474884454525, + "learning_rate": 9.329204465866686e-06, + "loss": 0.3227, + "step": 31004 + }, + { + "epoch": 0.5357513132430191, + "grad_norm": 0.8823359161424658, + "learning_rate": 9.328646077922178e-06, + "loss": 0.2929, + "step": 31005 + }, + { + "epoch": 0.535768592756428, + "grad_norm": 1.18199268272048, + "learning_rate": 9.328087692080398e-06, + "loss": 0.4063, + "step": 31006 + }, + { + "epoch": 0.5357858722698369, + "grad_norm": 1.3468376390072367, + "learning_rate": 9.327529308343096e-06, + "loss": 0.362, + "step": 31007 + }, + { + "epoch": 0.5358031517832458, + "grad_norm": 1.2529015962995573, + "learning_rate": 9.326970926712029e-06, + "loss": 0.3894, + "step": 31008 + }, + { + "epoch": 0.5358204312966547, + "grad_norm": 0.8567169094224593, + "learning_rate": 9.326412547188932e-06, + "loss": 0.474, + "step": 31009 + }, + { + "epoch": 0.5358377108100636, + "grad_norm": 0.8547902229689169, + "learning_rate": 9.325854169775571e-06, + "loss": 0.5036, + "step": 31010 + }, + { + "epoch": 0.5358549903234725, + "grad_norm": 0.8372920682929452, + "learning_rate": 9.32529579447368e-06, + "loss": 0.4065, + "step": 31011 + }, + { + "epoch": 0.5358722698368814, + "grad_norm": 0.4895125923469251, + "learning_rate": 9.324737421285016e-06, + "loss": 0.6788, + "step": 31012 + }, + { + "epoch": 0.5358895493502903, + "grad_norm": 1.0257490673243341, + "learning_rate": 9.324179050211324e-06, + "loss": 0.4165, + "step": 31013 + }, + { + "epoch": 0.5359068288636992, + "grad_norm": 0.7213631196789074, + "learning_rate": 9.323620681254359e-06, + "loss": 0.5238, + "step": 31014 + }, + { + "epoch": 0.5359241083771081, + "grad_norm": 0.7981659570297424, + "learning_rate": 9.323062314415864e-06, + "loss": 0.4223, + "step": 31015 + }, + { + "epoch": 0.535941387890517, + "grad_norm": 1.5857706915512102, + "learning_rate": 9.322503949697585e-06, + "loss": 0.5437, + "step": 31016 + }, + { + "epoch": 0.535958667403926, + "grad_norm": 1.1191136664215617, + "learning_rate": 9.321945587101278e-06, + "loss": 0.3598, + "step": 31017 + }, + { + "epoch": 0.5359759469173349, + "grad_norm": 0.9722100375256978, + "learning_rate": 9.321387226628688e-06, + "loss": 0.3414, + "step": 31018 + }, + { + "epoch": 0.5359932264307438, + "grad_norm": 1.0067008061084126, + "learning_rate": 9.320828868281569e-06, + "loss": 0.6613, + "step": 31019 + }, + { + "epoch": 0.5360105059441526, + "grad_norm": 1.347114151724543, + "learning_rate": 9.32027051206166e-06, + "loss": 0.4705, + "step": 31020 + }, + { + "epoch": 0.5360277854575615, + "grad_norm": 1.1102718187443474, + "learning_rate": 9.319712157970717e-06, + "loss": 0.6269, + "step": 31021 + }, + { + "epoch": 0.5360450649709704, + "grad_norm": 1.2477582905365996, + "learning_rate": 9.319153806010486e-06, + "loss": 0.2698, + "step": 31022 + }, + { + "epoch": 0.5360623444843793, + "grad_norm": 0.6320039761947248, + "learning_rate": 9.31859545618272e-06, + "loss": 0.3088, + "step": 31023 + }, + { + "epoch": 0.5360796239977882, + "grad_norm": 1.3460005787875418, + "learning_rate": 9.318037108489161e-06, + "loss": 0.4641, + "step": 31024 + }, + { + "epoch": 0.5360969035111971, + "grad_norm": 0.7158296734013163, + "learning_rate": 9.31747876293157e-06, + "loss": 0.7826, + "step": 31025 + }, + { + "epoch": 0.536114183024606, + "grad_norm": 1.812989469649587, + "learning_rate": 9.31692041951168e-06, + "loss": 0.2502, + "step": 31026 + }, + { + "epoch": 0.5361314625380149, + "grad_norm": 1.0594147041573705, + "learning_rate": 9.316362078231249e-06, + "loss": 0.4814, + "step": 31027 + }, + { + "epoch": 0.5361487420514238, + "grad_norm": 0.8136620084440616, + "learning_rate": 9.315803739092025e-06, + "loss": 0.321, + "step": 31028 + }, + { + "epoch": 0.5361660215648327, + "grad_norm": 0.919528155444291, + "learning_rate": 9.31524540209575e-06, + "loss": 0.404, + "step": 31029 + }, + { + "epoch": 0.5361833010782416, + "grad_norm": 1.0623239926178671, + "learning_rate": 9.314687067244186e-06, + "loss": 0.4349, + "step": 31030 + }, + { + "epoch": 0.5362005805916505, + "grad_norm": 0.5188113470398161, + "learning_rate": 9.31412873453907e-06, + "loss": 0.7045, + "step": 31031 + }, + { + "epoch": 0.5362178601050595, + "grad_norm": 1.061752787004497, + "learning_rate": 9.313570403982155e-06, + "loss": 0.3553, + "step": 31032 + }, + { + "epoch": 0.5362351396184684, + "grad_norm": 1.0981064771244287, + "learning_rate": 9.31301207557519e-06, + "loss": 0.4733, + "step": 31033 + }, + { + "epoch": 0.5362524191318773, + "grad_norm": 0.6660392008165723, + "learning_rate": 9.312453749319927e-06, + "loss": 0.8193, + "step": 31034 + }, + { + "epoch": 0.5362696986452862, + "grad_norm": 1.1315434079530835, + "learning_rate": 9.311895425218105e-06, + "loss": 0.2713, + "step": 31035 + }, + { + "epoch": 0.5362869781586951, + "grad_norm": 1.1841271362970365, + "learning_rate": 9.311337103271483e-06, + "loss": 0.3511, + "step": 31036 + }, + { + "epoch": 0.536304257672104, + "grad_norm": 0.5927483872480911, + "learning_rate": 9.310778783481807e-06, + "loss": 0.2835, + "step": 31037 + }, + { + "epoch": 0.5363215371855129, + "grad_norm": 1.2592041860190535, + "learning_rate": 9.310220465850819e-06, + "loss": 0.3553, + "step": 31038 + }, + { + "epoch": 0.5363388166989218, + "grad_norm": 1.141948995433174, + "learning_rate": 9.309662150380279e-06, + "loss": 0.4688, + "step": 31039 + }, + { + "epoch": 0.5363560962123307, + "grad_norm": 0.7951972587276628, + "learning_rate": 9.309103837071925e-06, + "loss": 0.3325, + "step": 31040 + }, + { + "epoch": 0.5363733757257395, + "grad_norm": 0.9669157201116978, + "learning_rate": 9.308545525927511e-06, + "loss": 0.331, + "step": 31041 + }, + { + "epoch": 0.5363906552391484, + "grad_norm": 1.0119375523832979, + "learning_rate": 9.307987216948784e-06, + "loss": 0.2956, + "step": 31042 + }, + { + "epoch": 0.5364079347525573, + "grad_norm": 1.3552277385710374, + "learning_rate": 9.307428910137496e-06, + "loss": 0.3027, + "step": 31043 + }, + { + "epoch": 0.5364252142659662, + "grad_norm": 2.1029788289681175, + "learning_rate": 9.30687060549539e-06, + "loss": 0.4634, + "step": 31044 + }, + { + "epoch": 0.5364424937793751, + "grad_norm": 0.7920925277509056, + "learning_rate": 9.306312303024223e-06, + "loss": 0.459, + "step": 31045 + }, + { + "epoch": 0.536459773292784, + "grad_norm": 1.147726700595373, + "learning_rate": 9.305754002725738e-06, + "loss": 0.5429, + "step": 31046 + }, + { + "epoch": 0.536477052806193, + "grad_norm": 0.9603961244662704, + "learning_rate": 9.305195704601679e-06, + "loss": 0.2732, + "step": 31047 + }, + { + "epoch": 0.5364943323196019, + "grad_norm": 0.9697152312111326, + "learning_rate": 9.304637408653805e-06, + "loss": 0.39, + "step": 31048 + }, + { + "epoch": 0.5365116118330108, + "grad_norm": 0.7510738969278022, + "learning_rate": 9.304079114883855e-06, + "loss": 0.4821, + "step": 31049 + }, + { + "epoch": 0.5365288913464197, + "grad_norm": 1.9817305674157608, + "learning_rate": 9.303520823293586e-06, + "loss": 0.5208, + "step": 31050 + }, + { + "epoch": 0.5365461708598286, + "grad_norm": 0.8799026204867082, + "learning_rate": 9.302962533884741e-06, + "loss": 0.4367, + "step": 31051 + }, + { + "epoch": 0.5365634503732375, + "grad_norm": 1.202105734079909, + "learning_rate": 9.302404246659071e-06, + "loss": 0.4919, + "step": 31052 + }, + { + "epoch": 0.5365807298866464, + "grad_norm": 1.4514082782332134, + "learning_rate": 9.301845961618322e-06, + "loss": 0.6242, + "step": 31053 + }, + { + "epoch": 0.5365980094000553, + "grad_norm": 1.512815732881069, + "learning_rate": 9.30128767876425e-06, + "loss": 0.4279, + "step": 31054 + }, + { + "epoch": 0.5366152889134642, + "grad_norm": 1.1374067418271783, + "learning_rate": 9.300729398098589e-06, + "loss": 0.773, + "step": 31055 + }, + { + "epoch": 0.5366325684268731, + "grad_norm": 1.0217456759823935, + "learning_rate": 9.300171119623104e-06, + "loss": 0.3332, + "step": 31056 + }, + { + "epoch": 0.536649847940282, + "grad_norm": 1.0298964621055948, + "learning_rate": 9.299612843339536e-06, + "loss": 0.539, + "step": 31057 + }, + { + "epoch": 0.5366671274536909, + "grad_norm": 1.2600177208397303, + "learning_rate": 9.299054569249629e-06, + "loss": 0.4714, + "step": 31058 + }, + { + "epoch": 0.5366844069670998, + "grad_norm": 1.7759427454495962, + "learning_rate": 9.298496297355143e-06, + "loss": 0.8414, + "step": 31059 + }, + { + "epoch": 0.5367016864805088, + "grad_norm": 1.1735829741737844, + "learning_rate": 9.297938027657813e-06, + "loss": 0.2704, + "step": 31060 + }, + { + "epoch": 0.5367189659939177, + "grad_norm": 0.6368651405239818, + "learning_rate": 9.2973797601594e-06, + "loss": 0.6012, + "step": 31061 + }, + { + "epoch": 0.5367362455073265, + "grad_norm": 1.098850636465602, + "learning_rate": 9.296821494861642e-06, + "loss": 0.3554, + "step": 31062 + }, + { + "epoch": 0.5367535250207354, + "grad_norm": 1.3075657322292995, + "learning_rate": 9.296263231766297e-06, + "loss": 0.5467, + "step": 31063 + }, + { + "epoch": 0.5367708045341443, + "grad_norm": 0.9621550421285221, + "learning_rate": 9.295704970875104e-06, + "loss": 0.3018, + "step": 31064 + }, + { + "epoch": 0.5367880840475532, + "grad_norm": 0.813154517192798, + "learning_rate": 9.295146712189823e-06, + "loss": 0.3798, + "step": 31065 + }, + { + "epoch": 0.5368053635609621, + "grad_norm": 0.7067840742438694, + "learning_rate": 9.29458845571219e-06, + "loss": 0.4487, + "step": 31066 + }, + { + "epoch": 0.536822643074371, + "grad_norm": 0.4440194927876267, + "learning_rate": 9.294030201443964e-06, + "loss": 0.8142, + "step": 31067 + }, + { + "epoch": 0.5368399225877799, + "grad_norm": 0.914273843706471, + "learning_rate": 9.29347194938689e-06, + "loss": 0.2411, + "step": 31068 + }, + { + "epoch": 0.5368572021011888, + "grad_norm": 0.7561305585668233, + "learning_rate": 9.292913699542708e-06, + "loss": 0.3884, + "step": 31069 + }, + { + "epoch": 0.5368744816145977, + "grad_norm": 1.109583246801146, + "learning_rate": 9.292355451913182e-06, + "loss": 0.3904, + "step": 31070 + }, + { + "epoch": 0.5368917611280066, + "grad_norm": 1.006323949993597, + "learning_rate": 9.291797206500048e-06, + "loss": 0.637, + "step": 31071 + }, + { + "epoch": 0.5369090406414155, + "grad_norm": 0.9779609797410677, + "learning_rate": 9.29123896330506e-06, + "loss": 0.2581, + "step": 31072 + }, + { + "epoch": 0.5369263201548244, + "grad_norm": 1.9366989720142587, + "learning_rate": 9.290680722329966e-06, + "loss": 0.3848, + "step": 31073 + }, + { + "epoch": 0.5369435996682334, + "grad_norm": 0.9170435755536742, + "learning_rate": 9.290122483576517e-06, + "loss": 0.4879, + "step": 31074 + }, + { + "epoch": 0.5369608791816423, + "grad_norm": 1.2427464053686852, + "learning_rate": 9.289564247046449e-06, + "loss": 0.4779, + "step": 31075 + }, + { + "epoch": 0.5369781586950512, + "grad_norm": 1.4117975585513405, + "learning_rate": 9.289006012741529e-06, + "loss": 0.3936, + "step": 31076 + }, + { + "epoch": 0.5369954382084601, + "grad_norm": 1.1800912151885006, + "learning_rate": 9.288447780663494e-06, + "loss": 0.4372, + "step": 31077 + }, + { + "epoch": 0.537012717721869, + "grad_norm": 0.7999969878869027, + "learning_rate": 9.287889550814092e-06, + "loss": 0.3574, + "step": 31078 + }, + { + "epoch": 0.5370299972352779, + "grad_norm": 1.2101764350575068, + "learning_rate": 9.287331323195079e-06, + "loss": 0.3433, + "step": 31079 + }, + { + "epoch": 0.5370472767486868, + "grad_norm": 1.0464935111832576, + "learning_rate": 9.286773097808192e-06, + "loss": 0.5479, + "step": 31080 + }, + { + "epoch": 0.5370645562620957, + "grad_norm": 1.2620926969188702, + "learning_rate": 9.28621487465519e-06, + "loss": 0.2798, + "step": 31081 + }, + { + "epoch": 0.5370818357755046, + "grad_norm": 0.5871476485388747, + "learning_rate": 9.285656653737816e-06, + "loss": 0.6769, + "step": 31082 + }, + { + "epoch": 0.5370991152889134, + "grad_norm": 1.1674720626368327, + "learning_rate": 9.28509843505782e-06, + "loss": 0.54, + "step": 31083 + }, + { + "epoch": 0.5371163948023223, + "grad_norm": 0.5725447514020179, + "learning_rate": 9.284540218616949e-06, + "loss": 0.5333, + "step": 31084 + }, + { + "epoch": 0.5371336743157312, + "grad_norm": 1.1486406286392778, + "learning_rate": 9.283982004416955e-06, + "loss": 0.5892, + "step": 31085 + }, + { + "epoch": 0.5371509538291401, + "grad_norm": 1.0839513008638915, + "learning_rate": 9.283423792459581e-06, + "loss": 0.3166, + "step": 31086 + }, + { + "epoch": 0.537168233342549, + "grad_norm": 1.4331608250043777, + "learning_rate": 9.28286558274658e-06, + "loss": 0.5081, + "step": 31087 + }, + { + "epoch": 0.537185512855958, + "grad_norm": 1.6358940308349055, + "learning_rate": 9.2823073752797e-06, + "loss": 0.5335, + "step": 31088 + }, + { + "epoch": 0.5372027923693669, + "grad_norm": 0.8804978028798003, + "learning_rate": 9.281749170060682e-06, + "loss": 0.4656, + "step": 31089 + }, + { + "epoch": 0.5372200718827758, + "grad_norm": 1.0542677280307533, + "learning_rate": 9.281190967091287e-06, + "loss": 0.3462, + "step": 31090 + }, + { + "epoch": 0.5372373513961847, + "grad_norm": 0.9864916466105391, + "learning_rate": 9.28063276637325e-06, + "loss": 0.243, + "step": 31091 + }, + { + "epoch": 0.5372546309095936, + "grad_norm": 1.0520618303265494, + "learning_rate": 9.28007456790833e-06, + "loss": 0.2126, + "step": 31092 + }, + { + "epoch": 0.5372719104230025, + "grad_norm": 0.8360841746030062, + "learning_rate": 9.27951637169827e-06, + "loss": 0.28, + "step": 31093 + }, + { + "epoch": 0.5372891899364114, + "grad_norm": 0.6886452996091729, + "learning_rate": 9.278958177744823e-06, + "loss": 0.2721, + "step": 31094 + }, + { + "epoch": 0.5373064694498203, + "grad_norm": 0.9846947912260496, + "learning_rate": 9.278399986049727e-06, + "loss": 0.4347, + "step": 31095 + }, + { + "epoch": 0.5373237489632292, + "grad_norm": 1.0268193445907903, + "learning_rate": 9.277841796614743e-06, + "loss": 0.2865, + "step": 31096 + }, + { + "epoch": 0.5373410284766381, + "grad_norm": 2.045900213977766, + "learning_rate": 9.27728360944161e-06, + "loss": 0.5029, + "step": 31097 + }, + { + "epoch": 0.537358307990047, + "grad_norm": 1.0298665348770752, + "learning_rate": 9.27672542453208e-06, + "loss": 0.4418, + "step": 31098 + }, + { + "epoch": 0.5373755875034559, + "grad_norm": 0.8584829366539936, + "learning_rate": 9.276167241887904e-06, + "loss": 0.2085, + "step": 31099 + }, + { + "epoch": 0.5373928670168648, + "grad_norm": 1.2876229729431796, + "learning_rate": 9.275609061510824e-06, + "loss": 0.6115, + "step": 31100 + }, + { + "epoch": 0.5374101465302737, + "grad_norm": 1.0166525304215892, + "learning_rate": 9.275050883402592e-06, + "loss": 0.4323, + "step": 31101 + }, + { + "epoch": 0.5374274260436827, + "grad_norm": 1.0023458887809078, + "learning_rate": 9.274492707564955e-06, + "loss": 0.4886, + "step": 31102 + }, + { + "epoch": 0.5374447055570916, + "grad_norm": 1.0107749636423693, + "learning_rate": 9.273934533999664e-06, + "loss": 0.3824, + "step": 31103 + }, + { + "epoch": 0.5374619850705005, + "grad_norm": 1.1132674346253058, + "learning_rate": 9.273376362708462e-06, + "loss": 0.3776, + "step": 31104 + }, + { + "epoch": 0.5374792645839093, + "grad_norm": 1.2932835792487636, + "learning_rate": 9.272818193693106e-06, + "loss": 0.3677, + "step": 31105 + }, + { + "epoch": 0.5374965440973182, + "grad_norm": 1.365490564796923, + "learning_rate": 9.272260026955333e-06, + "loss": 0.4253, + "step": 31106 + }, + { + "epoch": 0.5375138236107271, + "grad_norm": 1.5314695420394628, + "learning_rate": 9.2717018624969e-06, + "loss": 0.3212, + "step": 31107 + }, + { + "epoch": 0.537531103124136, + "grad_norm": 0.7429099757297973, + "learning_rate": 9.271143700319556e-06, + "loss": 0.3332, + "step": 31108 + }, + { + "epoch": 0.5375483826375449, + "grad_norm": 1.0281533277460047, + "learning_rate": 9.270585540425036e-06, + "loss": 0.2926, + "step": 31109 + }, + { + "epoch": 0.5375656621509538, + "grad_norm": 0.9189382603408841, + "learning_rate": 9.270027382815104e-06, + "loss": 0.4726, + "step": 31110 + }, + { + "epoch": 0.5375829416643627, + "grad_norm": 1.3162024434750408, + "learning_rate": 9.2694692274915e-06, + "loss": 0.4112, + "step": 31111 + }, + { + "epoch": 0.5376002211777716, + "grad_norm": 1.441529000349514, + "learning_rate": 9.268911074455974e-06, + "loss": 0.3665, + "step": 31112 + }, + { + "epoch": 0.5376175006911805, + "grad_norm": 1.0024353146455722, + "learning_rate": 9.26835292371027e-06, + "loss": 0.3855, + "step": 31113 + }, + { + "epoch": 0.5376347802045894, + "grad_norm": 0.4741308619845765, + "learning_rate": 9.267794775256148e-06, + "loss": 0.6981, + "step": 31114 + }, + { + "epoch": 0.5376520597179983, + "grad_norm": 0.5445472087457625, + "learning_rate": 9.267236629095341e-06, + "loss": 0.7703, + "step": 31115 + }, + { + "epoch": 0.5376693392314073, + "grad_norm": 1.0722745067088173, + "learning_rate": 9.26667848522961e-06, + "loss": 0.4994, + "step": 31116 + }, + { + "epoch": 0.5376866187448162, + "grad_norm": 0.9476168704867062, + "learning_rate": 9.266120343660695e-06, + "loss": 0.3475, + "step": 31117 + }, + { + "epoch": 0.5377038982582251, + "grad_norm": 1.152801522925171, + "learning_rate": 9.265562204390349e-06, + "loss": 0.4293, + "step": 31118 + }, + { + "epoch": 0.537721177771634, + "grad_norm": 1.457552146612902, + "learning_rate": 9.265004067420319e-06, + "loss": 0.5264, + "step": 31119 + }, + { + "epoch": 0.5377384572850429, + "grad_norm": 1.3669282145822712, + "learning_rate": 9.264445932752348e-06, + "loss": 0.4916, + "step": 31120 + }, + { + "epoch": 0.5377557367984518, + "grad_norm": 0.6026464034675025, + "learning_rate": 9.26388780038819e-06, + "loss": 0.4091, + "step": 31121 + }, + { + "epoch": 0.5377730163118607, + "grad_norm": 1.4197955202761936, + "learning_rate": 9.26332967032959e-06, + "loss": 0.326, + "step": 31122 + }, + { + "epoch": 0.5377902958252696, + "grad_norm": 0.9375162365694665, + "learning_rate": 9.262771542578299e-06, + "loss": 0.3309, + "step": 31123 + }, + { + "epoch": 0.5378075753386785, + "grad_norm": 0.9554152732187379, + "learning_rate": 9.262213417136062e-06, + "loss": 0.4035, + "step": 31124 + }, + { + "epoch": 0.5378248548520874, + "grad_norm": 1.0614000629544573, + "learning_rate": 9.261655294004633e-06, + "loss": 0.5258, + "step": 31125 + }, + { + "epoch": 0.5378421343654962, + "grad_norm": 0.8949738489130361, + "learning_rate": 9.261097173185751e-06, + "loss": 0.3254, + "step": 31126 + }, + { + "epoch": 0.5378594138789051, + "grad_norm": 1.3875037150420233, + "learning_rate": 9.260539054681173e-06, + "loss": 0.3876, + "step": 31127 + }, + { + "epoch": 0.537876693392314, + "grad_norm": 0.8008587208078449, + "learning_rate": 9.259980938492642e-06, + "loss": 0.6928, + "step": 31128 + }, + { + "epoch": 0.5378939729057229, + "grad_norm": 1.6154359268326741, + "learning_rate": 9.259422824621902e-06, + "loss": 0.5349, + "step": 31129 + }, + { + "epoch": 0.5379112524191318, + "grad_norm": 0.8503875975009239, + "learning_rate": 9.258864713070713e-06, + "loss": 0.5563, + "step": 31130 + }, + { + "epoch": 0.5379285319325408, + "grad_norm": 1.25639737860902, + "learning_rate": 9.25830660384081e-06, + "loss": 0.382, + "step": 31131 + }, + { + "epoch": 0.5379458114459497, + "grad_norm": 1.116975152984691, + "learning_rate": 9.25774849693395e-06, + "loss": 0.5036, + "step": 31132 + }, + { + "epoch": 0.5379630909593586, + "grad_norm": 0.9675917366540734, + "learning_rate": 9.257190392351876e-06, + "loss": 0.3651, + "step": 31133 + }, + { + "epoch": 0.5379803704727675, + "grad_norm": 1.3450257600337623, + "learning_rate": 9.256632290096343e-06, + "loss": 0.7284, + "step": 31134 + }, + { + "epoch": 0.5379976499861764, + "grad_norm": 1.0410917365108443, + "learning_rate": 9.256074190169086e-06, + "loss": 0.3121, + "step": 31135 + }, + { + "epoch": 0.5380149294995853, + "grad_norm": 1.2221741723618824, + "learning_rate": 9.25551609257187e-06, + "loss": 0.4583, + "step": 31136 + }, + { + "epoch": 0.5380322090129942, + "grad_norm": 1.2285426154071608, + "learning_rate": 9.254957997306429e-06, + "loss": 0.3843, + "step": 31137 + }, + { + "epoch": 0.5380494885264031, + "grad_norm": 1.1128369303880101, + "learning_rate": 9.254399904374518e-06, + "loss": 0.4894, + "step": 31138 + }, + { + "epoch": 0.538066768039812, + "grad_norm": 1.0362568886368366, + "learning_rate": 9.253841813777885e-06, + "loss": 0.4202, + "step": 31139 + }, + { + "epoch": 0.5380840475532209, + "grad_norm": 0.5888010230692395, + "learning_rate": 9.253283725518272e-06, + "loss": 0.3226, + "step": 31140 + }, + { + "epoch": 0.5381013270666298, + "grad_norm": 1.1904679415016397, + "learning_rate": 9.252725639597433e-06, + "loss": 0.5993, + "step": 31141 + }, + { + "epoch": 0.5381186065800387, + "grad_norm": 0.9123449665001747, + "learning_rate": 9.252167556017113e-06, + "loss": 0.4419, + "step": 31142 + }, + { + "epoch": 0.5381358860934476, + "grad_norm": 0.9395658641255611, + "learning_rate": 9.251609474779063e-06, + "loss": 0.5589, + "step": 31143 + }, + { + "epoch": 0.5381531656068566, + "grad_norm": 1.0716381318391162, + "learning_rate": 9.251051395885025e-06, + "loss": 0.5513, + "step": 31144 + }, + { + "epoch": 0.5381704451202655, + "grad_norm": 0.8930840364627298, + "learning_rate": 9.250493319336757e-06, + "loss": 0.4198, + "step": 31145 + }, + { + "epoch": 0.5381877246336744, + "grad_norm": 0.8087797453039507, + "learning_rate": 9.249935245135997e-06, + "loss": 0.509, + "step": 31146 + }, + { + "epoch": 0.5382050041470832, + "grad_norm": 1.4672681902948774, + "learning_rate": 9.249377173284498e-06, + "loss": 0.4474, + "step": 31147 + }, + { + "epoch": 0.5382222836604921, + "grad_norm": 0.5765554955618543, + "learning_rate": 9.248819103784005e-06, + "loss": 0.5239, + "step": 31148 + }, + { + "epoch": 0.538239563173901, + "grad_norm": 0.8099272115004577, + "learning_rate": 9.24826103663627e-06, + "loss": 0.3294, + "step": 31149 + }, + { + "epoch": 0.5382568426873099, + "grad_norm": 1.1707931060720371, + "learning_rate": 9.24770297184304e-06, + "loss": 0.3229, + "step": 31150 + }, + { + "epoch": 0.5382741222007188, + "grad_norm": 0.9618730913689344, + "learning_rate": 9.247144909406057e-06, + "loss": 0.4768, + "step": 31151 + }, + { + "epoch": 0.5382914017141277, + "grad_norm": 0.5780412622796627, + "learning_rate": 9.246586849327077e-06, + "loss": 0.587, + "step": 31152 + }, + { + "epoch": 0.5383086812275366, + "grad_norm": 1.0083204658254432, + "learning_rate": 9.24602879160784e-06, + "loss": 0.3727, + "step": 31153 + }, + { + "epoch": 0.5383259607409455, + "grad_norm": 1.1424150784123306, + "learning_rate": 9.245470736250104e-06, + "loss": 0.2492, + "step": 31154 + }, + { + "epoch": 0.5383432402543544, + "grad_norm": 0.8550219008815436, + "learning_rate": 9.244912683255605e-06, + "loss": 0.4881, + "step": 31155 + }, + { + "epoch": 0.5383605197677633, + "grad_norm": 1.136036620160498, + "learning_rate": 9.244354632626103e-06, + "loss": 0.5387, + "step": 31156 + }, + { + "epoch": 0.5383777992811722, + "grad_norm": 0.9360155978445988, + "learning_rate": 9.243796584363333e-06, + "loss": 0.3512, + "step": 31157 + }, + { + "epoch": 0.5383950787945812, + "grad_norm": 1.3392128390403832, + "learning_rate": 9.243238538469055e-06, + "loss": 0.4542, + "step": 31158 + }, + { + "epoch": 0.5384123583079901, + "grad_norm": 0.6517793329307445, + "learning_rate": 9.242680494945011e-06, + "loss": 0.4274, + "step": 31159 + }, + { + "epoch": 0.538429637821399, + "grad_norm": 0.959811699835478, + "learning_rate": 9.242122453792947e-06, + "loss": 0.5722, + "step": 31160 + }, + { + "epoch": 0.5384469173348079, + "grad_norm": 1.0912966534742785, + "learning_rate": 9.241564415014614e-06, + "loss": 0.2981, + "step": 31161 + }, + { + "epoch": 0.5384641968482168, + "grad_norm": 1.2166513599844433, + "learning_rate": 9.241006378611757e-06, + "loss": 0.4533, + "step": 31162 + }, + { + "epoch": 0.5384814763616257, + "grad_norm": 0.7202715964299673, + "learning_rate": 9.240448344586129e-06, + "loss": 0.2545, + "step": 31163 + }, + { + "epoch": 0.5384987558750346, + "grad_norm": 1.2163692424268535, + "learning_rate": 9.23989031293947e-06, + "loss": 0.3602, + "step": 31164 + }, + { + "epoch": 0.5385160353884435, + "grad_norm": 1.084886873530138, + "learning_rate": 9.239332283673537e-06, + "loss": 0.3891, + "step": 31165 + }, + { + "epoch": 0.5385333149018524, + "grad_norm": 0.7814684171009622, + "learning_rate": 9.23877425679007e-06, + "loss": 0.6466, + "step": 31166 + }, + { + "epoch": 0.5385505944152613, + "grad_norm": 1.4728453853796595, + "learning_rate": 9.238216232290821e-06, + "loss": 0.3967, + "step": 31167 + }, + { + "epoch": 0.5385678739286701, + "grad_norm": 0.938300417851234, + "learning_rate": 9.237658210177536e-06, + "loss": 0.3074, + "step": 31168 + }, + { + "epoch": 0.538585153442079, + "grad_norm": 0.9875845069357176, + "learning_rate": 9.237100190451965e-06, + "loss": 0.3455, + "step": 31169 + }, + { + "epoch": 0.5386024329554879, + "grad_norm": 1.195310601039664, + "learning_rate": 9.236542173115855e-06, + "loss": 0.3649, + "step": 31170 + }, + { + "epoch": 0.5386197124688968, + "grad_norm": 1.0135578869829651, + "learning_rate": 9.23598415817095e-06, + "loss": 0.3349, + "step": 31171 + }, + { + "epoch": 0.5386369919823057, + "grad_norm": 0.9039952496105329, + "learning_rate": 9.235426145619003e-06, + "loss": 0.5009, + "step": 31172 + }, + { + "epoch": 0.5386542714957147, + "grad_norm": 1.6765337920347674, + "learning_rate": 9.234868135461757e-06, + "loss": 0.2645, + "step": 31173 + }, + { + "epoch": 0.5386715510091236, + "grad_norm": 0.7335496359745993, + "learning_rate": 9.234310127700965e-06, + "loss": 0.2842, + "step": 31174 + }, + { + "epoch": 0.5386888305225325, + "grad_norm": 1.2042738895642282, + "learning_rate": 9.233752122338368e-06, + "loss": 0.3092, + "step": 31175 + }, + { + "epoch": 0.5387061100359414, + "grad_norm": 1.212732950126926, + "learning_rate": 9.233194119375723e-06, + "loss": 0.4052, + "step": 31176 + }, + { + "epoch": 0.5387233895493503, + "grad_norm": 1.2828558644278971, + "learning_rate": 9.232636118814767e-06, + "loss": 0.3592, + "step": 31177 + }, + { + "epoch": 0.5387406690627592, + "grad_norm": 0.8167839081048763, + "learning_rate": 9.232078120657256e-06, + "loss": 0.4981, + "step": 31178 + }, + { + "epoch": 0.5387579485761681, + "grad_norm": 0.6148620618232802, + "learning_rate": 9.231520124904933e-06, + "loss": 0.3815, + "step": 31179 + }, + { + "epoch": 0.538775228089577, + "grad_norm": 1.582855650045311, + "learning_rate": 9.230962131559551e-06, + "loss": 0.4768, + "step": 31180 + }, + { + "epoch": 0.5387925076029859, + "grad_norm": 0.8515090802500609, + "learning_rate": 9.230404140622851e-06, + "loss": 0.3128, + "step": 31181 + }, + { + "epoch": 0.5388097871163948, + "grad_norm": 0.9163299982566906, + "learning_rate": 9.229846152096584e-06, + "loss": 0.4922, + "step": 31182 + }, + { + "epoch": 0.5388270666298037, + "grad_norm": 1.1000656402557647, + "learning_rate": 9.229288165982498e-06, + "loss": 0.297, + "step": 31183 + }, + { + "epoch": 0.5388443461432126, + "grad_norm": 1.0147600676572555, + "learning_rate": 9.228730182282338e-06, + "loss": 0.4725, + "step": 31184 + }, + { + "epoch": 0.5388616256566215, + "grad_norm": 0.9079819551810394, + "learning_rate": 9.228172200997856e-06, + "loss": 0.6075, + "step": 31185 + }, + { + "epoch": 0.5388789051700305, + "grad_norm": 1.1122381140554196, + "learning_rate": 9.227614222130797e-06, + "loss": 0.2337, + "step": 31186 + }, + { + "epoch": 0.5388961846834394, + "grad_norm": 1.364130450081849, + "learning_rate": 9.227056245682909e-06, + "loss": 0.3039, + "step": 31187 + }, + { + "epoch": 0.5389134641968483, + "grad_norm": 0.716415818729209, + "learning_rate": 9.226498271655936e-06, + "loss": 0.5148, + "step": 31188 + }, + { + "epoch": 0.5389307437102571, + "grad_norm": 1.0705862254929244, + "learning_rate": 9.225940300051634e-06, + "loss": 0.517, + "step": 31189 + }, + { + "epoch": 0.538948023223666, + "grad_norm": 1.0279245365707588, + "learning_rate": 9.225382330871746e-06, + "loss": 0.3772, + "step": 31190 + }, + { + "epoch": 0.5389653027370749, + "grad_norm": 1.1002612024087173, + "learning_rate": 9.224824364118016e-06, + "loss": 0.2593, + "step": 31191 + }, + { + "epoch": 0.5389825822504838, + "grad_norm": 0.8189509352622011, + "learning_rate": 9.224266399792197e-06, + "loss": 0.2746, + "step": 31192 + }, + { + "epoch": 0.5389998617638927, + "grad_norm": 0.9163948795640916, + "learning_rate": 9.223708437896033e-06, + "loss": 0.3859, + "step": 31193 + }, + { + "epoch": 0.5390171412773016, + "grad_norm": 1.0506817321983397, + "learning_rate": 9.223150478431276e-06, + "loss": 0.5934, + "step": 31194 + }, + { + "epoch": 0.5390344207907105, + "grad_norm": 1.3942175593746389, + "learning_rate": 9.222592521399666e-06, + "loss": 0.3638, + "step": 31195 + }, + { + "epoch": 0.5390517003041194, + "grad_norm": 1.074331531285215, + "learning_rate": 9.222034566802961e-06, + "loss": 0.3733, + "step": 31196 + }, + { + "epoch": 0.5390689798175283, + "grad_norm": 1.2702840507205684, + "learning_rate": 9.221476614642898e-06, + "loss": 0.5434, + "step": 31197 + }, + { + "epoch": 0.5390862593309372, + "grad_norm": 1.2962386055424056, + "learning_rate": 9.220918664921232e-06, + "loss": 0.5212, + "step": 31198 + }, + { + "epoch": 0.5391035388443461, + "grad_norm": 1.1141562893443293, + "learning_rate": 9.220360717639706e-06, + "loss": 0.455, + "step": 31199 + }, + { + "epoch": 0.539120818357755, + "grad_norm": 1.1138019117416345, + "learning_rate": 9.219802772800074e-06, + "loss": 0.4695, + "step": 31200 + }, + { + "epoch": 0.539138097871164, + "grad_norm": 0.8360611885001107, + "learning_rate": 9.219244830404076e-06, + "loss": 0.315, + "step": 31201 + }, + { + "epoch": 0.5391553773845729, + "grad_norm": 1.0651736640303735, + "learning_rate": 9.21868689045346e-06, + "loss": 0.4562, + "step": 31202 + }, + { + "epoch": 0.5391726568979818, + "grad_norm": 1.0140322452026227, + "learning_rate": 9.218128952949981e-06, + "loss": 0.443, + "step": 31203 + }, + { + "epoch": 0.5391899364113907, + "grad_norm": 2.0095569538320692, + "learning_rate": 9.217571017895377e-06, + "loss": 0.4247, + "step": 31204 + }, + { + "epoch": 0.5392072159247996, + "grad_norm": 0.9143318170317595, + "learning_rate": 9.217013085291406e-06, + "loss": 0.4324, + "step": 31205 + }, + { + "epoch": 0.5392244954382085, + "grad_norm": 0.8498785260954836, + "learning_rate": 9.216455155139803e-06, + "loss": 0.5191, + "step": 31206 + }, + { + "epoch": 0.5392417749516174, + "grad_norm": 1.4257724324971566, + "learning_rate": 9.215897227442325e-06, + "loss": 0.4673, + "step": 31207 + }, + { + "epoch": 0.5392590544650263, + "grad_norm": 1.7238784056768863, + "learning_rate": 9.215339302200715e-06, + "loss": 0.3727, + "step": 31208 + }, + { + "epoch": 0.5392763339784352, + "grad_norm": 1.3082032187915182, + "learning_rate": 9.214781379416723e-06, + "loss": 0.3121, + "step": 31209 + }, + { + "epoch": 0.539293613491844, + "grad_norm": 1.2631133582366054, + "learning_rate": 9.214223459092095e-06, + "loss": 0.4681, + "step": 31210 + }, + { + "epoch": 0.5393108930052529, + "grad_norm": 0.757643024253004, + "learning_rate": 9.213665541228582e-06, + "loss": 0.4278, + "step": 31211 + }, + { + "epoch": 0.5393281725186618, + "grad_norm": 0.9393703982245982, + "learning_rate": 9.213107625827926e-06, + "loss": 0.5107, + "step": 31212 + }, + { + "epoch": 0.5393454520320707, + "grad_norm": 0.5688825408857822, + "learning_rate": 9.212549712891874e-06, + "loss": 0.9391, + "step": 31213 + }, + { + "epoch": 0.5393627315454796, + "grad_norm": 1.7235602084681265, + "learning_rate": 9.211991802422181e-06, + "loss": 0.5206, + "step": 31214 + }, + { + "epoch": 0.5393800110588886, + "grad_norm": 1.0939928027280768, + "learning_rate": 9.211433894420583e-06, + "loss": 0.3037, + "step": 31215 + }, + { + "epoch": 0.5393972905722975, + "grad_norm": 0.9178064944816744, + "learning_rate": 9.210875988888842e-06, + "loss": 0.4863, + "step": 31216 + }, + { + "epoch": 0.5394145700857064, + "grad_norm": 0.9616787733430179, + "learning_rate": 9.210318085828691e-06, + "loss": 0.326, + "step": 31217 + }, + { + "epoch": 0.5394318495991153, + "grad_norm": 1.6028704364737463, + "learning_rate": 9.209760185241886e-06, + "loss": 0.56, + "step": 31218 + }, + { + "epoch": 0.5394491291125242, + "grad_norm": 0.8800325225008778, + "learning_rate": 9.209202287130171e-06, + "loss": 0.4174, + "step": 31219 + }, + { + "epoch": 0.5394664086259331, + "grad_norm": 1.133053746787148, + "learning_rate": 9.208644391495298e-06, + "loss": 0.3685, + "step": 31220 + }, + { + "epoch": 0.539483688139342, + "grad_norm": 1.1341351892014637, + "learning_rate": 9.208086498339011e-06, + "loss": 0.4757, + "step": 31221 + }, + { + "epoch": 0.5395009676527509, + "grad_norm": 0.8877953648450901, + "learning_rate": 9.207528607663052e-06, + "loss": 0.3284, + "step": 31222 + }, + { + "epoch": 0.5395182471661598, + "grad_norm": 1.1036596134993761, + "learning_rate": 9.206970719469177e-06, + "loss": 0.6641, + "step": 31223 + }, + { + "epoch": 0.5395355266795687, + "grad_norm": 1.0593936599958695, + "learning_rate": 9.206412833759128e-06, + "loss": 0.3085, + "step": 31224 + }, + { + "epoch": 0.5395528061929776, + "grad_norm": 1.1286747571822244, + "learning_rate": 9.20585495053466e-06, + "loss": 0.5117, + "step": 31225 + }, + { + "epoch": 0.5395700857063865, + "grad_norm": 1.0098126145589041, + "learning_rate": 9.205297069797509e-06, + "loss": 0.4232, + "step": 31226 + }, + { + "epoch": 0.5395873652197954, + "grad_norm": 1.5543839091990608, + "learning_rate": 9.204739191549428e-06, + "loss": 0.446, + "step": 31227 + }, + { + "epoch": 0.5396046447332044, + "grad_norm": 0.919039678690184, + "learning_rate": 9.204181315792166e-06, + "loss": 0.3691, + "step": 31228 + }, + { + "epoch": 0.5396219242466133, + "grad_norm": 0.875527548798836, + "learning_rate": 9.203623442527468e-06, + "loss": 0.3306, + "step": 31229 + }, + { + "epoch": 0.5396392037600222, + "grad_norm": 0.9190868731961858, + "learning_rate": 9.203065571757081e-06, + "loss": 0.4741, + "step": 31230 + }, + { + "epoch": 0.539656483273431, + "grad_norm": 0.8630054223539625, + "learning_rate": 9.202507703482756e-06, + "loss": 0.5235, + "step": 31231 + }, + { + "epoch": 0.5396737627868399, + "grad_norm": 1.0978184108215145, + "learning_rate": 9.201949837706238e-06, + "loss": 0.4189, + "step": 31232 + }, + { + "epoch": 0.5396910423002488, + "grad_norm": 1.561991852230492, + "learning_rate": 9.20139197442927e-06, + "loss": 0.6384, + "step": 31233 + }, + { + "epoch": 0.5397083218136577, + "grad_norm": 1.3571438044301416, + "learning_rate": 9.200834113653607e-06, + "loss": 0.5492, + "step": 31234 + }, + { + "epoch": 0.5397256013270666, + "grad_norm": 1.0232414177959177, + "learning_rate": 9.200276255380986e-06, + "loss": 0.3374, + "step": 31235 + }, + { + "epoch": 0.5397428808404755, + "grad_norm": 1.264210165218277, + "learning_rate": 9.199718399613169e-06, + "loss": 0.4515, + "step": 31236 + }, + { + "epoch": 0.5397601603538844, + "grad_norm": 0.8617466366357333, + "learning_rate": 9.199160546351888e-06, + "loss": 0.4222, + "step": 31237 + }, + { + "epoch": 0.5397774398672933, + "grad_norm": 0.8474183422743948, + "learning_rate": 9.1986026955989e-06, + "loss": 0.4471, + "step": 31238 + }, + { + "epoch": 0.5397947193807022, + "grad_norm": 1.2447999996275152, + "learning_rate": 9.198044847355947e-06, + "loss": 0.391, + "step": 31239 + }, + { + "epoch": 0.5398119988941111, + "grad_norm": 1.03857296664659, + "learning_rate": 9.197487001624783e-06, + "loss": 0.5188, + "step": 31240 + }, + { + "epoch": 0.53982927840752, + "grad_norm": 1.2736016216822457, + "learning_rate": 9.196929158407144e-06, + "loss": 0.302, + "step": 31241 + }, + { + "epoch": 0.539846557920929, + "grad_norm": 0.9311346250045542, + "learning_rate": 9.19637131770479e-06, + "loss": 0.3459, + "step": 31242 + }, + { + "epoch": 0.5398638374343379, + "grad_norm": 0.45266834063142525, + "learning_rate": 9.195813479519461e-06, + "loss": 0.6375, + "step": 31243 + }, + { + "epoch": 0.5398811169477468, + "grad_norm": 1.1827161242200805, + "learning_rate": 9.195255643852903e-06, + "loss": 0.3639, + "step": 31244 + }, + { + "epoch": 0.5398983964611557, + "grad_norm": 1.1656246518095508, + "learning_rate": 9.194697810706869e-06, + "loss": 0.5239, + "step": 31245 + }, + { + "epoch": 0.5399156759745646, + "grad_norm": 1.504989981769394, + "learning_rate": 9.1941399800831e-06, + "loss": 0.4381, + "step": 31246 + }, + { + "epoch": 0.5399329554879735, + "grad_norm": 1.1412993332197103, + "learning_rate": 9.193582151983346e-06, + "loss": 0.3184, + "step": 31247 + }, + { + "epoch": 0.5399502350013824, + "grad_norm": 0.8919368161299203, + "learning_rate": 9.193024326409353e-06, + "loss": 0.7122, + "step": 31248 + }, + { + "epoch": 0.5399675145147913, + "grad_norm": 1.618860430546982, + "learning_rate": 9.19246650336287e-06, + "loss": 0.257, + "step": 31249 + }, + { + "epoch": 0.5399847940282002, + "grad_norm": 0.9113302717384093, + "learning_rate": 9.191908682845642e-06, + "loss": 0.4647, + "step": 31250 + }, + { + "epoch": 0.5400020735416091, + "grad_norm": 0.7972985616685822, + "learning_rate": 9.191350864859421e-06, + "loss": 0.3703, + "step": 31251 + }, + { + "epoch": 0.540019353055018, + "grad_norm": 0.9304593097887003, + "learning_rate": 9.190793049405949e-06, + "loss": 0.4432, + "step": 31252 + }, + { + "epoch": 0.5400366325684268, + "grad_norm": 0.8107037644323594, + "learning_rate": 9.190235236486974e-06, + "loss": 0.4507, + "step": 31253 + }, + { + "epoch": 0.5400539120818357, + "grad_norm": 1.4693143392445576, + "learning_rate": 9.189677426104247e-06, + "loss": 0.4805, + "step": 31254 + }, + { + "epoch": 0.5400711915952446, + "grad_norm": 0.7599339863340105, + "learning_rate": 9.189119618259504e-06, + "loss": 0.4243, + "step": 31255 + }, + { + "epoch": 0.5400884711086535, + "grad_norm": 0.7447940645610803, + "learning_rate": 9.188561812954507e-06, + "loss": 0.6398, + "step": 31256 + }, + { + "epoch": 0.5401057506220625, + "grad_norm": 0.7621007915957458, + "learning_rate": 9.188004010190992e-06, + "loss": 0.2827, + "step": 31257 + }, + { + "epoch": 0.5401230301354714, + "grad_norm": 0.9477838980805668, + "learning_rate": 9.187446209970715e-06, + "loss": 0.4837, + "step": 31258 + }, + { + "epoch": 0.5401403096488803, + "grad_norm": 1.1881667780944283, + "learning_rate": 9.186888412295412e-06, + "loss": 0.5907, + "step": 31259 + }, + { + "epoch": 0.5401575891622892, + "grad_norm": 1.0166272554776792, + "learning_rate": 9.186330617166843e-06, + "loss": 0.431, + "step": 31260 + }, + { + "epoch": 0.5401748686756981, + "grad_norm": 0.6595397344410132, + "learning_rate": 9.185772824586741e-06, + "loss": 0.3055, + "step": 31261 + }, + { + "epoch": 0.540192148189107, + "grad_norm": 1.1518988968600439, + "learning_rate": 9.185215034556866e-06, + "loss": 0.5561, + "step": 31262 + }, + { + "epoch": 0.5402094277025159, + "grad_norm": 1.3880143203575162, + "learning_rate": 9.184657247078958e-06, + "loss": 0.3878, + "step": 31263 + }, + { + "epoch": 0.5402267072159248, + "grad_norm": 0.9306412278616888, + "learning_rate": 9.184099462154764e-06, + "loss": 0.4368, + "step": 31264 + }, + { + "epoch": 0.5402439867293337, + "grad_norm": 1.1861561407443653, + "learning_rate": 9.183541679786036e-06, + "loss": 0.3976, + "step": 31265 + }, + { + "epoch": 0.5402612662427426, + "grad_norm": 1.5682877262805188, + "learning_rate": 9.182983899974514e-06, + "loss": 0.2926, + "step": 31266 + }, + { + "epoch": 0.5402785457561515, + "grad_norm": 0.8636081301842703, + "learning_rate": 9.182426122721952e-06, + "loss": 0.5825, + "step": 31267 + }, + { + "epoch": 0.5402958252695604, + "grad_norm": 1.1000677053659045, + "learning_rate": 9.181868348030088e-06, + "loss": 0.5872, + "step": 31268 + }, + { + "epoch": 0.5403131047829693, + "grad_norm": 0.925067338480787, + "learning_rate": 9.18131057590068e-06, + "loss": 0.485, + "step": 31269 + }, + { + "epoch": 0.5403303842963783, + "grad_norm": 0.8530736807763389, + "learning_rate": 9.180752806335464e-06, + "loss": 0.4797, + "step": 31270 + }, + { + "epoch": 0.5403476638097872, + "grad_norm": 1.0163324169248233, + "learning_rate": 9.1801950393362e-06, + "loss": 0.3635, + "step": 31271 + }, + { + "epoch": 0.5403649433231961, + "grad_norm": 0.924304468981308, + "learning_rate": 9.179637274904623e-06, + "loss": 0.4488, + "step": 31272 + }, + { + "epoch": 0.540382222836605, + "grad_norm": 1.2033328577631774, + "learning_rate": 9.179079513042483e-06, + "loss": 0.3907, + "step": 31273 + }, + { + "epoch": 0.5403995023500138, + "grad_norm": 1.5671573529927896, + "learning_rate": 9.178521753751532e-06, + "loss": 0.3681, + "step": 31274 + }, + { + "epoch": 0.5404167818634227, + "grad_norm": 0.8648912346822061, + "learning_rate": 9.177963997033508e-06, + "loss": 0.3434, + "step": 31275 + }, + { + "epoch": 0.5404340613768316, + "grad_norm": 0.9464060738766435, + "learning_rate": 9.177406242890168e-06, + "loss": 0.3261, + "step": 31276 + }, + { + "epoch": 0.5404513408902405, + "grad_norm": 0.8889119226973836, + "learning_rate": 9.176848491323251e-06, + "loss": 0.3249, + "step": 31277 + }, + { + "epoch": 0.5404686204036494, + "grad_norm": 1.0262620872009651, + "learning_rate": 9.17629074233451e-06, + "loss": 0.5489, + "step": 31278 + }, + { + "epoch": 0.5404858999170583, + "grad_norm": 0.8837339690897943, + "learning_rate": 9.175732995925686e-06, + "loss": 0.6076, + "step": 31279 + }, + { + "epoch": 0.5405031794304672, + "grad_norm": 1.2058027256879307, + "learning_rate": 9.175175252098533e-06, + "loss": 0.3354, + "step": 31280 + }, + { + "epoch": 0.5405204589438761, + "grad_norm": 0.9926681724189125, + "learning_rate": 9.174617510854788e-06, + "loss": 0.4002, + "step": 31281 + }, + { + "epoch": 0.540537738457285, + "grad_norm": 1.9025360376428315, + "learning_rate": 9.17405977219621e-06, + "loss": 0.4404, + "step": 31282 + }, + { + "epoch": 0.5405550179706939, + "grad_norm": 0.5201306814482562, + "learning_rate": 9.173502036124536e-06, + "loss": 0.5993, + "step": 31283 + }, + { + "epoch": 0.5405722974841028, + "grad_norm": 1.7520075078058415, + "learning_rate": 9.172944302641516e-06, + "loss": 0.4064, + "step": 31284 + }, + { + "epoch": 0.5405895769975118, + "grad_norm": 1.0445986891073473, + "learning_rate": 9.172386571748902e-06, + "loss": 0.4241, + "step": 31285 + }, + { + "epoch": 0.5406068565109207, + "grad_norm": 0.5810842744811092, + "learning_rate": 9.17182884344843e-06, + "loss": 0.7458, + "step": 31286 + }, + { + "epoch": 0.5406241360243296, + "grad_norm": 0.8764993124931101, + "learning_rate": 9.171271117741856e-06, + "loss": 0.5081, + "step": 31287 + }, + { + "epoch": 0.5406414155377385, + "grad_norm": 1.3930237482293342, + "learning_rate": 9.170713394630923e-06, + "loss": 0.6233, + "step": 31288 + }, + { + "epoch": 0.5406586950511474, + "grad_norm": 1.0276488533985129, + "learning_rate": 9.170155674117377e-06, + "loss": 0.48, + "step": 31289 + }, + { + "epoch": 0.5406759745645563, + "grad_norm": 1.4291790674942446, + "learning_rate": 9.169597956202968e-06, + "loss": 0.5502, + "step": 31290 + }, + { + "epoch": 0.5406932540779652, + "grad_norm": 0.589922032321365, + "learning_rate": 9.169040240889446e-06, + "loss": 0.5755, + "step": 31291 + }, + { + "epoch": 0.5407105335913741, + "grad_norm": 1.3914154303668163, + "learning_rate": 9.168482528178546e-06, + "loss": 0.3823, + "step": 31292 + }, + { + "epoch": 0.540727813104783, + "grad_norm": 1.7531028549690637, + "learning_rate": 9.167924818072027e-06, + "loss": 0.4523, + "step": 31293 + }, + { + "epoch": 0.5407450926181919, + "grad_norm": 1.229483037399792, + "learning_rate": 9.16736711057163e-06, + "loss": 0.2914, + "step": 31294 + }, + { + "epoch": 0.5407623721316007, + "grad_norm": 1.011555723493234, + "learning_rate": 9.166809405679096e-06, + "loss": 0.3589, + "step": 31295 + }, + { + "epoch": 0.5407796516450096, + "grad_norm": 1.251935316002596, + "learning_rate": 9.166251703396187e-06, + "loss": 0.2772, + "step": 31296 + }, + { + "epoch": 0.5407969311584185, + "grad_norm": 1.1111837399654743, + "learning_rate": 9.165694003724636e-06, + "loss": 0.4762, + "step": 31297 + }, + { + "epoch": 0.5408142106718274, + "grad_norm": 1.0418016647109203, + "learning_rate": 9.165136306666196e-06, + "loss": 0.4724, + "step": 31298 + }, + { + "epoch": 0.5408314901852364, + "grad_norm": 0.44965504325696026, + "learning_rate": 9.164578612222612e-06, + "loss": 0.6016, + "step": 31299 + }, + { + "epoch": 0.5408487696986453, + "grad_norm": 1.0915065244554265, + "learning_rate": 9.164020920395635e-06, + "loss": 0.443, + "step": 31300 + }, + { + "epoch": 0.5408660492120542, + "grad_norm": 1.0552965931721678, + "learning_rate": 9.163463231187003e-06, + "loss": 0.3022, + "step": 31301 + }, + { + "epoch": 0.5408833287254631, + "grad_norm": 0.6963691204660148, + "learning_rate": 9.16290554459847e-06, + "loss": 0.4654, + "step": 31302 + }, + { + "epoch": 0.540900608238872, + "grad_norm": 1.4180788662071493, + "learning_rate": 9.16234786063178e-06, + "loss": 0.6592, + "step": 31303 + }, + { + "epoch": 0.5409178877522809, + "grad_norm": 1.012309610878354, + "learning_rate": 9.161790179288679e-06, + "loss": 0.4072, + "step": 31304 + }, + { + "epoch": 0.5409351672656898, + "grad_norm": 1.0563211939727855, + "learning_rate": 9.161232500570919e-06, + "loss": 0.4367, + "step": 31305 + }, + { + "epoch": 0.5409524467790987, + "grad_norm": 1.2155082786013354, + "learning_rate": 9.160674824480238e-06, + "loss": 0.3338, + "step": 31306 + }, + { + "epoch": 0.5409697262925076, + "grad_norm": 0.9013382047898337, + "learning_rate": 9.160117151018389e-06, + "loss": 0.368, + "step": 31307 + }, + { + "epoch": 0.5409870058059165, + "grad_norm": 0.9094301551486037, + "learning_rate": 9.159559480187115e-06, + "loss": 0.3704, + "step": 31308 + }, + { + "epoch": 0.5410042853193254, + "grad_norm": 1.2287748988929725, + "learning_rate": 9.159001811988168e-06, + "loss": 0.2487, + "step": 31309 + }, + { + "epoch": 0.5410215648327343, + "grad_norm": 1.188767642536256, + "learning_rate": 9.158444146423288e-06, + "loss": 0.398, + "step": 31310 + }, + { + "epoch": 0.5410388443461432, + "grad_norm": 1.0931462057610082, + "learning_rate": 9.157886483494229e-06, + "loss": 0.5691, + "step": 31311 + }, + { + "epoch": 0.5410561238595522, + "grad_norm": 1.0933446994667533, + "learning_rate": 9.157328823202728e-06, + "loss": 0.4825, + "step": 31312 + }, + { + "epoch": 0.5410734033729611, + "grad_norm": 1.1265108167833517, + "learning_rate": 9.156771165550542e-06, + "loss": 0.4482, + "step": 31313 + }, + { + "epoch": 0.54109068288637, + "grad_norm": 0.9852575518892915, + "learning_rate": 9.156213510539415e-06, + "loss": 0.3524, + "step": 31314 + }, + { + "epoch": 0.5411079623997789, + "grad_norm": 0.8458176319352309, + "learning_rate": 9.155655858171083e-06, + "loss": 0.2707, + "step": 31315 + }, + { + "epoch": 0.5411252419131877, + "grad_norm": 0.8927344618041155, + "learning_rate": 9.155098208447308e-06, + "loss": 0.4682, + "step": 31316 + }, + { + "epoch": 0.5411425214265966, + "grad_norm": 1.2556956952545542, + "learning_rate": 9.154540561369825e-06, + "loss": 0.4115, + "step": 31317 + }, + { + "epoch": 0.5411598009400055, + "grad_norm": 1.2347070397170223, + "learning_rate": 9.153982916940389e-06, + "loss": 0.4701, + "step": 31318 + }, + { + "epoch": 0.5411770804534144, + "grad_norm": 1.1225413691606292, + "learning_rate": 9.15342527516074e-06, + "loss": 0.4502, + "step": 31319 + }, + { + "epoch": 0.5411943599668233, + "grad_norm": 0.7409108498687349, + "learning_rate": 9.15286763603263e-06, + "loss": 0.4557, + "step": 31320 + }, + { + "epoch": 0.5412116394802322, + "grad_norm": 1.0101886894066758, + "learning_rate": 9.1523099995578e-06, + "loss": 0.3905, + "step": 31321 + }, + { + "epoch": 0.5412289189936411, + "grad_norm": 1.1545857419961039, + "learning_rate": 9.151752365738001e-06, + "loss": 0.4078, + "step": 31322 + }, + { + "epoch": 0.54124619850705, + "grad_norm": 0.5396943615794374, + "learning_rate": 9.151194734574978e-06, + "loss": 0.5059, + "step": 31323 + }, + { + "epoch": 0.5412634780204589, + "grad_norm": 1.0654090725427077, + "learning_rate": 9.150637106070476e-06, + "loss": 0.3157, + "step": 31324 + }, + { + "epoch": 0.5412807575338678, + "grad_norm": 0.9565162070175071, + "learning_rate": 9.150079480226248e-06, + "loss": 0.3752, + "step": 31325 + }, + { + "epoch": 0.5412980370472767, + "grad_norm": 1.0545733899292353, + "learning_rate": 9.149521857044032e-06, + "loss": 0.6542, + "step": 31326 + }, + { + "epoch": 0.5413153165606857, + "grad_norm": 0.8221159123945234, + "learning_rate": 9.148964236525578e-06, + "loss": 0.2847, + "step": 31327 + }, + { + "epoch": 0.5413325960740946, + "grad_norm": 1.0408184128945286, + "learning_rate": 9.148406618672631e-06, + "loss": 0.4862, + "step": 31328 + }, + { + "epoch": 0.5413498755875035, + "grad_norm": 1.1164862185561895, + "learning_rate": 9.147849003486943e-06, + "loss": 0.4099, + "step": 31329 + }, + { + "epoch": 0.5413671551009124, + "grad_norm": 0.923295508330513, + "learning_rate": 9.147291390970252e-06, + "loss": 0.5728, + "step": 31330 + }, + { + "epoch": 0.5413844346143213, + "grad_norm": 1.299907699182502, + "learning_rate": 9.146733781124316e-06, + "loss": 0.2064, + "step": 31331 + }, + { + "epoch": 0.5414017141277302, + "grad_norm": 0.749188693623032, + "learning_rate": 9.146176173950869e-06, + "loss": 0.296, + "step": 31332 + }, + { + "epoch": 0.5414189936411391, + "grad_norm": 0.4078284792971256, + "learning_rate": 9.145618569451667e-06, + "loss": 0.4834, + "step": 31333 + }, + { + "epoch": 0.541436273154548, + "grad_norm": 0.930011462037192, + "learning_rate": 9.145060967628452e-06, + "loss": 0.5062, + "step": 31334 + }, + { + "epoch": 0.5414535526679569, + "grad_norm": 1.0309609308038448, + "learning_rate": 9.144503368482966e-06, + "loss": 0.3426, + "step": 31335 + }, + { + "epoch": 0.5414708321813658, + "grad_norm": 1.0379766870236922, + "learning_rate": 9.143945772016967e-06, + "loss": 0.5293, + "step": 31336 + }, + { + "epoch": 0.5414881116947746, + "grad_norm": 1.000140816310034, + "learning_rate": 9.143388178232192e-06, + "loss": 0.4885, + "step": 31337 + }, + { + "epoch": 0.5415053912081835, + "grad_norm": 0.6213248897431192, + "learning_rate": 9.14283058713039e-06, + "loss": 0.4741, + "step": 31338 + }, + { + "epoch": 0.5415226707215924, + "grad_norm": 1.4105812741093342, + "learning_rate": 9.142272998713306e-06, + "loss": 0.3553, + "step": 31339 + }, + { + "epoch": 0.5415399502350013, + "grad_norm": 1.3608047892906703, + "learning_rate": 9.141715412982693e-06, + "loss": 0.397, + "step": 31340 + }, + { + "epoch": 0.5415572297484103, + "grad_norm": 0.896920001083462, + "learning_rate": 9.141157829940289e-06, + "loss": 0.4755, + "step": 31341 + }, + { + "epoch": 0.5415745092618192, + "grad_norm": 1.0982074237221267, + "learning_rate": 9.140600249587845e-06, + "loss": 0.4646, + "step": 31342 + }, + { + "epoch": 0.5415917887752281, + "grad_norm": 0.871122694718656, + "learning_rate": 9.140042671927104e-06, + "loss": 0.2881, + "step": 31343 + }, + { + "epoch": 0.541609068288637, + "grad_norm": 0.9161973171980344, + "learning_rate": 9.139485096959818e-06, + "loss": 0.465, + "step": 31344 + }, + { + "epoch": 0.5416263478020459, + "grad_norm": 1.362249175757218, + "learning_rate": 9.13892752468773e-06, + "loss": 0.6513, + "step": 31345 + }, + { + "epoch": 0.5416436273154548, + "grad_norm": 0.8635022153208598, + "learning_rate": 9.138369955112585e-06, + "loss": 0.4217, + "step": 31346 + }, + { + "epoch": 0.5416609068288637, + "grad_norm": 0.6976881260678761, + "learning_rate": 9.137812388236132e-06, + "loss": 0.2347, + "step": 31347 + }, + { + "epoch": 0.5416781863422726, + "grad_norm": 0.8142669614740299, + "learning_rate": 9.137254824060112e-06, + "loss": 0.2831, + "step": 31348 + }, + { + "epoch": 0.5416954658556815, + "grad_norm": 1.3805182696954061, + "learning_rate": 9.136697262586279e-06, + "loss": 0.3374, + "step": 31349 + }, + { + "epoch": 0.5417127453690904, + "grad_norm": 1.0937479645309809, + "learning_rate": 9.136139703816375e-06, + "loss": 0.3164, + "step": 31350 + }, + { + "epoch": 0.5417300248824993, + "grad_norm": 0.8106897459901669, + "learning_rate": 9.13558214775215e-06, + "loss": 0.358, + "step": 31351 + }, + { + "epoch": 0.5417473043959082, + "grad_norm": 0.7968864335566993, + "learning_rate": 9.135024594395342e-06, + "loss": 0.4837, + "step": 31352 + }, + { + "epoch": 0.5417645839093171, + "grad_norm": 1.0063360295519326, + "learning_rate": 9.134467043747707e-06, + "loss": 0.4267, + "step": 31353 + }, + { + "epoch": 0.541781863422726, + "grad_norm": 0.9800737940459986, + "learning_rate": 9.133909495810983e-06, + "loss": 0.4295, + "step": 31354 + }, + { + "epoch": 0.541799142936135, + "grad_norm": 0.8658653002733903, + "learning_rate": 9.133351950586924e-06, + "loss": 0.6729, + "step": 31355 + }, + { + "epoch": 0.5418164224495439, + "grad_norm": 1.0619927493082104, + "learning_rate": 9.132794408077274e-06, + "loss": 0.2814, + "step": 31356 + }, + { + "epoch": 0.5418337019629528, + "grad_norm": 1.1371580980530278, + "learning_rate": 9.132236868283775e-06, + "loss": 0.4179, + "step": 31357 + }, + { + "epoch": 0.5418509814763616, + "grad_norm": 1.3257341215152887, + "learning_rate": 9.131679331208177e-06, + "loss": 0.3818, + "step": 31358 + }, + { + "epoch": 0.5418682609897705, + "grad_norm": 1.1679622782465688, + "learning_rate": 9.131121796852222e-06, + "loss": 0.4303, + "step": 31359 + }, + { + "epoch": 0.5418855405031794, + "grad_norm": 1.3044660593357833, + "learning_rate": 9.130564265217664e-06, + "loss": 0.5124, + "step": 31360 + }, + { + "epoch": 0.5419028200165883, + "grad_norm": 0.9760184616707641, + "learning_rate": 9.130006736306243e-06, + "loss": 0.5601, + "step": 31361 + }, + { + "epoch": 0.5419200995299972, + "grad_norm": 0.9780677659708092, + "learning_rate": 9.129449210119708e-06, + "loss": 0.3908, + "step": 31362 + }, + { + "epoch": 0.5419373790434061, + "grad_norm": 1.1572656167885156, + "learning_rate": 9.1288916866598e-06, + "loss": 0.5309, + "step": 31363 + }, + { + "epoch": 0.541954658556815, + "grad_norm": 0.5639519228653945, + "learning_rate": 9.128334165928274e-06, + "loss": 0.5188, + "step": 31364 + }, + { + "epoch": 0.5419719380702239, + "grad_norm": 1.019889422235185, + "learning_rate": 9.127776647926874e-06, + "loss": 0.4882, + "step": 31365 + }, + { + "epoch": 0.5419892175836328, + "grad_norm": 0.9892521764114879, + "learning_rate": 9.127219132657338e-06, + "loss": 0.4467, + "step": 31366 + }, + { + "epoch": 0.5420064970970417, + "grad_norm": 1.4418069903988548, + "learning_rate": 9.126661620121422e-06, + "loss": 0.2572, + "step": 31367 + }, + { + "epoch": 0.5420237766104506, + "grad_norm": 1.3114775866901998, + "learning_rate": 9.126104110320864e-06, + "loss": 0.4921, + "step": 31368 + }, + { + "epoch": 0.5420410561238596, + "grad_norm": 0.9677985597503754, + "learning_rate": 9.125546603257417e-06, + "loss": 0.3705, + "step": 31369 + }, + { + "epoch": 0.5420583356372685, + "grad_norm": 0.9921215767011126, + "learning_rate": 9.124989098932821e-06, + "loss": 0.4405, + "step": 31370 + }, + { + "epoch": 0.5420756151506774, + "grad_norm": 1.2872387429621241, + "learning_rate": 9.124431597348832e-06, + "loss": 0.3299, + "step": 31371 + }, + { + "epoch": 0.5420928946640863, + "grad_norm": 1.1679637589712213, + "learning_rate": 9.123874098507185e-06, + "loss": 0.4891, + "step": 31372 + }, + { + "epoch": 0.5421101741774952, + "grad_norm": 0.836895634806942, + "learning_rate": 9.123316602409634e-06, + "loss": 0.4455, + "step": 31373 + }, + { + "epoch": 0.5421274536909041, + "grad_norm": 1.1355924402081101, + "learning_rate": 9.122759109057918e-06, + "loss": 0.4636, + "step": 31374 + }, + { + "epoch": 0.542144733204313, + "grad_norm": 1.0123403486118194, + "learning_rate": 9.122201618453791e-06, + "loss": 0.3603, + "step": 31375 + }, + { + "epoch": 0.5421620127177219, + "grad_norm": 1.0339397202439915, + "learning_rate": 9.121644130598997e-06, + "loss": 0.5217, + "step": 31376 + }, + { + "epoch": 0.5421792922311308, + "grad_norm": 1.0664835674347026, + "learning_rate": 9.121086645495276e-06, + "loss": 0.4232, + "step": 31377 + }, + { + "epoch": 0.5421965717445397, + "grad_norm": 0.8262490520040946, + "learning_rate": 9.12052916314438e-06, + "loss": 0.315, + "step": 31378 + }, + { + "epoch": 0.5422138512579486, + "grad_norm": 1.252551761792705, + "learning_rate": 9.119971683548051e-06, + "loss": 0.511, + "step": 31379 + }, + { + "epoch": 0.5422311307713574, + "grad_norm": 0.9042700011191676, + "learning_rate": 9.119414206708043e-06, + "loss": 0.4441, + "step": 31380 + }, + { + "epoch": 0.5422484102847663, + "grad_norm": 0.8273979915312163, + "learning_rate": 9.11885673262609e-06, + "loss": 0.4622, + "step": 31381 + }, + { + "epoch": 0.5422656897981752, + "grad_norm": 0.780784264432231, + "learning_rate": 9.11829926130395e-06, + "loss": 0.3371, + "step": 31382 + }, + { + "epoch": 0.5422829693115842, + "grad_norm": 0.7416381116985338, + "learning_rate": 9.11774179274336e-06, + "loss": 0.2902, + "step": 31383 + }, + { + "epoch": 0.5423002488249931, + "grad_norm": 1.4811919078104085, + "learning_rate": 9.117184326946072e-06, + "loss": 0.5843, + "step": 31384 + }, + { + "epoch": 0.542317528338402, + "grad_norm": 0.8476120378298452, + "learning_rate": 9.116626863913827e-06, + "loss": 0.39, + "step": 31385 + }, + { + "epoch": 0.5423348078518109, + "grad_norm": 0.8970080204181138, + "learning_rate": 9.11606940364838e-06, + "loss": 0.3348, + "step": 31386 + }, + { + "epoch": 0.5423520873652198, + "grad_norm": 1.3063599050425645, + "learning_rate": 9.115511946151467e-06, + "loss": 0.5629, + "step": 31387 + }, + { + "epoch": 0.5423693668786287, + "grad_norm": 1.0396916649204848, + "learning_rate": 9.114954491424837e-06, + "loss": 0.4183, + "step": 31388 + }, + { + "epoch": 0.5423866463920376, + "grad_norm": 1.2522006830487646, + "learning_rate": 9.114397039470238e-06, + "loss": 0.3002, + "step": 31389 + }, + { + "epoch": 0.5424039259054465, + "grad_norm": 1.26923807942492, + "learning_rate": 9.113839590289411e-06, + "loss": 0.4957, + "step": 31390 + }, + { + "epoch": 0.5424212054188554, + "grad_norm": 0.6182053556870314, + "learning_rate": 9.113282143884112e-06, + "loss": 0.2497, + "step": 31391 + }, + { + "epoch": 0.5424384849322643, + "grad_norm": 1.0866612870421262, + "learning_rate": 9.112724700256077e-06, + "loss": 0.4235, + "step": 31392 + }, + { + "epoch": 0.5424557644456732, + "grad_norm": 1.1320750496704801, + "learning_rate": 9.112167259407056e-06, + "loss": 0.3406, + "step": 31393 + }, + { + "epoch": 0.5424730439590821, + "grad_norm": 0.9637558076310601, + "learning_rate": 9.111609821338795e-06, + "loss": 0.2155, + "step": 31394 + }, + { + "epoch": 0.542490323472491, + "grad_norm": 0.9358875515711182, + "learning_rate": 9.111052386053038e-06, + "loss": 0.4884, + "step": 31395 + }, + { + "epoch": 0.5425076029859, + "grad_norm": 1.6906566333852042, + "learning_rate": 9.110494953551539e-06, + "loss": 0.4096, + "step": 31396 + }, + { + "epoch": 0.5425248824993089, + "grad_norm": 1.028914527725302, + "learning_rate": 9.10993752383603e-06, + "loss": 0.5892, + "step": 31397 + }, + { + "epoch": 0.5425421620127178, + "grad_norm": 0.9961114475917856, + "learning_rate": 9.109380096908267e-06, + "loss": 0.3566, + "step": 31398 + }, + { + "epoch": 0.5425594415261267, + "grad_norm": 1.7782109546213285, + "learning_rate": 9.108822672769991e-06, + "loss": 0.4501, + "step": 31399 + }, + { + "epoch": 0.5425767210395356, + "grad_norm": 1.1305599509658812, + "learning_rate": 9.108265251422955e-06, + "loss": 0.3905, + "step": 31400 + }, + { + "epoch": 0.5425940005529444, + "grad_norm": 1.2630792617557103, + "learning_rate": 9.107707832868896e-06, + "loss": 0.4745, + "step": 31401 + }, + { + "epoch": 0.5426112800663533, + "grad_norm": 0.9842414858152985, + "learning_rate": 9.107150417109565e-06, + "loss": 0.4826, + "step": 31402 + }, + { + "epoch": 0.5426285595797622, + "grad_norm": 0.8523068572811935, + "learning_rate": 9.106593004146705e-06, + "loss": 0.4227, + "step": 31403 + }, + { + "epoch": 0.5426458390931711, + "grad_norm": 1.0588037801661219, + "learning_rate": 9.106035593982067e-06, + "loss": 0.3931, + "step": 31404 + }, + { + "epoch": 0.54266311860658, + "grad_norm": 1.5465994637147698, + "learning_rate": 9.10547818661739e-06, + "loss": 0.3023, + "step": 31405 + }, + { + "epoch": 0.5426803981199889, + "grad_norm": 0.8615463080852154, + "learning_rate": 9.104920782054427e-06, + "loss": 0.3082, + "step": 31406 + }, + { + "epoch": 0.5426976776333978, + "grad_norm": 1.3027292604333252, + "learning_rate": 9.10436338029492e-06, + "loss": 0.533, + "step": 31407 + }, + { + "epoch": 0.5427149571468067, + "grad_norm": 0.9581680689963228, + "learning_rate": 9.103805981340612e-06, + "loss": 0.2494, + "step": 31408 + }, + { + "epoch": 0.5427322366602156, + "grad_norm": 1.2441520722297372, + "learning_rate": 9.103248585193253e-06, + "loss": 0.4743, + "step": 31409 + }, + { + "epoch": 0.5427495161736245, + "grad_norm": 0.806312665532141, + "learning_rate": 9.102691191854586e-06, + "loss": 0.3407, + "step": 31410 + }, + { + "epoch": 0.5427667956870335, + "grad_norm": 1.3081752289561397, + "learning_rate": 9.102133801326362e-06, + "loss": 0.2783, + "step": 31411 + }, + { + "epoch": 0.5427840752004424, + "grad_norm": 1.432148974202163, + "learning_rate": 9.10157641361032e-06, + "loss": 0.3075, + "step": 31412 + }, + { + "epoch": 0.5428013547138513, + "grad_norm": 0.7169317972406409, + "learning_rate": 9.10101902870821e-06, + "loss": 0.3194, + "step": 31413 + }, + { + "epoch": 0.5428186342272602, + "grad_norm": 1.1260801834425977, + "learning_rate": 9.100461646621776e-06, + "loss": 0.4744, + "step": 31414 + }, + { + "epoch": 0.5428359137406691, + "grad_norm": 1.2127272659053745, + "learning_rate": 9.099904267352767e-06, + "loss": 0.438, + "step": 31415 + }, + { + "epoch": 0.542853193254078, + "grad_norm": 1.3738171261415102, + "learning_rate": 9.099346890902922e-06, + "loss": 0.3968, + "step": 31416 + }, + { + "epoch": 0.5428704727674869, + "grad_norm": 0.7971375685642385, + "learning_rate": 9.098789517273997e-06, + "loss": 0.3837, + "step": 31417 + }, + { + "epoch": 0.5428877522808958, + "grad_norm": 0.8987326538426758, + "learning_rate": 9.098232146467727e-06, + "loss": 0.3682, + "step": 31418 + }, + { + "epoch": 0.5429050317943047, + "grad_norm": 1.2819339377734156, + "learning_rate": 9.097674778485862e-06, + "loss": 0.4329, + "step": 31419 + }, + { + "epoch": 0.5429223113077136, + "grad_norm": 0.8054170719528907, + "learning_rate": 9.097117413330153e-06, + "loss": 0.4028, + "step": 31420 + }, + { + "epoch": 0.5429395908211225, + "grad_norm": 1.3412777036486578, + "learning_rate": 9.096560051002336e-06, + "loss": 0.3547, + "step": 31421 + }, + { + "epoch": 0.5429568703345313, + "grad_norm": 0.5812183785858642, + "learning_rate": 9.096002691504164e-06, + "loss": 0.2865, + "step": 31422 + }, + { + "epoch": 0.5429741498479402, + "grad_norm": 0.840881910259138, + "learning_rate": 9.095445334837378e-06, + "loss": 0.3484, + "step": 31423 + }, + { + "epoch": 0.5429914293613491, + "grad_norm": 0.8729495171745811, + "learning_rate": 9.094887981003728e-06, + "loss": 0.4389, + "step": 31424 + }, + { + "epoch": 0.543008708874758, + "grad_norm": 0.8389597628708511, + "learning_rate": 9.094330630004955e-06, + "loss": 0.4296, + "step": 31425 + }, + { + "epoch": 0.543025988388167, + "grad_norm": 0.8726845885307656, + "learning_rate": 9.093773281842812e-06, + "loss": 0.3769, + "step": 31426 + }, + { + "epoch": 0.5430432679015759, + "grad_norm": 0.8752956360340929, + "learning_rate": 9.093215936519038e-06, + "loss": 0.4521, + "step": 31427 + }, + { + "epoch": 0.5430605474149848, + "grad_norm": 1.6154618388262605, + "learning_rate": 9.09265859403538e-06, + "loss": 0.5057, + "step": 31428 + }, + { + "epoch": 0.5430778269283937, + "grad_norm": 1.1599634364476468, + "learning_rate": 9.092101254393584e-06, + "loss": 0.5983, + "step": 31429 + }, + { + "epoch": 0.5430951064418026, + "grad_norm": 1.0513303811183459, + "learning_rate": 9.091543917595395e-06, + "loss": 0.4589, + "step": 31430 + }, + { + "epoch": 0.5431123859552115, + "grad_norm": 0.8357235178310191, + "learning_rate": 9.090986583642563e-06, + "loss": 0.3905, + "step": 31431 + }, + { + "epoch": 0.5431296654686204, + "grad_norm": 1.1626949926883228, + "learning_rate": 9.090429252536825e-06, + "loss": 0.4364, + "step": 31432 + }, + { + "epoch": 0.5431469449820293, + "grad_norm": 1.4096988188788586, + "learning_rate": 9.089871924279936e-06, + "loss": 0.3715, + "step": 31433 + }, + { + "epoch": 0.5431642244954382, + "grad_norm": 0.6265292237068182, + "learning_rate": 9.089314598873633e-06, + "loss": 0.2287, + "step": 31434 + }, + { + "epoch": 0.5431815040088471, + "grad_norm": 1.4355901623000882, + "learning_rate": 9.08875727631967e-06, + "loss": 0.6979, + "step": 31435 + }, + { + "epoch": 0.543198783522256, + "grad_norm": 0.8250295711610415, + "learning_rate": 9.088199956619786e-06, + "loss": 0.2026, + "step": 31436 + }, + { + "epoch": 0.5432160630356649, + "grad_norm": 0.6500444354150171, + "learning_rate": 9.087642639775732e-06, + "loss": 0.3536, + "step": 31437 + }, + { + "epoch": 0.5432333425490738, + "grad_norm": 0.9827914015786438, + "learning_rate": 9.087085325789248e-06, + "loss": 0.4867, + "step": 31438 + }, + { + "epoch": 0.5432506220624828, + "grad_norm": 1.0745708701959327, + "learning_rate": 9.086528014662083e-06, + "loss": 0.4576, + "step": 31439 + }, + { + "epoch": 0.5432679015758917, + "grad_norm": 0.543839355547999, + "learning_rate": 9.085970706395983e-06, + "loss": 0.3363, + "step": 31440 + }, + { + "epoch": 0.5432851810893006, + "grad_norm": 1.0890753662506285, + "learning_rate": 9.08541340099269e-06, + "loss": 0.4882, + "step": 31441 + }, + { + "epoch": 0.5433024606027095, + "grad_norm": 0.5926626878333259, + "learning_rate": 9.084856098453953e-06, + "loss": 0.7307, + "step": 31442 + }, + { + "epoch": 0.5433197401161183, + "grad_norm": 0.8157434994501381, + "learning_rate": 9.084298798781514e-06, + "loss": 0.3879, + "step": 31443 + }, + { + "epoch": 0.5433370196295272, + "grad_norm": 0.9775239270787636, + "learning_rate": 9.083741501977123e-06, + "loss": 0.3484, + "step": 31444 + }, + { + "epoch": 0.5433542991429361, + "grad_norm": 1.1301976336842596, + "learning_rate": 9.083184208042521e-06, + "loss": 0.3273, + "step": 31445 + }, + { + "epoch": 0.543371578656345, + "grad_norm": 1.1709735917612645, + "learning_rate": 9.08262691697946e-06, + "loss": 0.3802, + "step": 31446 + }, + { + "epoch": 0.5433888581697539, + "grad_norm": 1.3846008875091944, + "learning_rate": 9.082069628789676e-06, + "loss": 0.7862, + "step": 31447 + }, + { + "epoch": 0.5434061376831628, + "grad_norm": 1.4114160913487013, + "learning_rate": 9.081512343474924e-06, + "loss": 0.5266, + "step": 31448 + }, + { + "epoch": 0.5434234171965717, + "grad_norm": 0.9427847268506445, + "learning_rate": 9.080955061036945e-06, + "loss": 0.404, + "step": 31449 + }, + { + "epoch": 0.5434406967099806, + "grad_norm": 1.1904463800001182, + "learning_rate": 9.08039778147748e-06, + "loss": 0.5267, + "step": 31450 + }, + { + "epoch": 0.5434579762233895, + "grad_norm": 0.9168079957709181, + "learning_rate": 9.079840504798286e-06, + "loss": 0.2909, + "step": 31451 + }, + { + "epoch": 0.5434752557367984, + "grad_norm": 1.465772625012061, + "learning_rate": 9.079283231001096e-06, + "loss": 0.4976, + "step": 31452 + }, + { + "epoch": 0.5434925352502074, + "grad_norm": 1.1405720779082118, + "learning_rate": 9.078725960087661e-06, + "loss": 0.3993, + "step": 31453 + }, + { + "epoch": 0.5435098147636163, + "grad_norm": 1.260741450153374, + "learning_rate": 9.078168692059728e-06, + "loss": 0.3876, + "step": 31454 + }, + { + "epoch": 0.5435270942770252, + "grad_norm": 1.5227607390171296, + "learning_rate": 9.07761142691904e-06, + "loss": 0.4999, + "step": 31455 + }, + { + "epoch": 0.5435443737904341, + "grad_norm": 0.8723663317227472, + "learning_rate": 9.077054164667343e-06, + "loss": 0.4354, + "step": 31456 + }, + { + "epoch": 0.543561653303843, + "grad_norm": 1.1133537866054735, + "learning_rate": 9.076496905306386e-06, + "loss": 0.4334, + "step": 31457 + }, + { + "epoch": 0.5435789328172519, + "grad_norm": 0.9858057793561807, + "learning_rate": 9.075939648837907e-06, + "loss": 0.3591, + "step": 31458 + }, + { + "epoch": 0.5435962123306608, + "grad_norm": 0.8999847130285362, + "learning_rate": 9.075382395263656e-06, + "loss": 0.5799, + "step": 31459 + }, + { + "epoch": 0.5436134918440697, + "grad_norm": 0.5010333544804022, + "learning_rate": 9.07482514458538e-06, + "loss": 0.7135, + "step": 31460 + }, + { + "epoch": 0.5436307713574786, + "grad_norm": 1.3972732936857086, + "learning_rate": 9.074267896804819e-06, + "loss": 0.4707, + "step": 31461 + }, + { + "epoch": 0.5436480508708875, + "grad_norm": 0.7465832989840516, + "learning_rate": 9.073710651923722e-06, + "loss": 0.3431, + "step": 31462 + }, + { + "epoch": 0.5436653303842964, + "grad_norm": 1.417982331733404, + "learning_rate": 9.07315340994383e-06, + "loss": 0.3977, + "step": 31463 + }, + { + "epoch": 0.5436826098977052, + "grad_norm": 0.9671161000871804, + "learning_rate": 9.072596170866899e-06, + "loss": 0.5271, + "step": 31464 + }, + { + "epoch": 0.5436998894111141, + "grad_norm": 1.0847460261988398, + "learning_rate": 9.072038934694662e-06, + "loss": 0.3482, + "step": 31465 + }, + { + "epoch": 0.543717168924523, + "grad_norm": 1.068352996147879, + "learning_rate": 9.071481701428874e-06, + "loss": 0.5359, + "step": 31466 + }, + { + "epoch": 0.543734448437932, + "grad_norm": 0.8847966094826669, + "learning_rate": 9.070924471071272e-06, + "loss": 0.3925, + "step": 31467 + }, + { + "epoch": 0.5437517279513409, + "grad_norm": 1.4805765209961366, + "learning_rate": 9.070367243623606e-06, + "loss": 0.3822, + "step": 31468 + }, + { + "epoch": 0.5437690074647498, + "grad_norm": 0.813230598867466, + "learning_rate": 9.069810019087621e-06, + "loss": 0.2478, + "step": 31469 + }, + { + "epoch": 0.5437862869781587, + "grad_norm": 1.2799109774715156, + "learning_rate": 9.069252797465062e-06, + "loss": 0.4348, + "step": 31470 + }, + { + "epoch": 0.5438035664915676, + "grad_norm": 1.0208869935952583, + "learning_rate": 9.068695578757675e-06, + "loss": 0.3485, + "step": 31471 + }, + { + "epoch": 0.5438208460049765, + "grad_norm": 1.2073719250150405, + "learning_rate": 9.068138362967202e-06, + "loss": 0.3024, + "step": 31472 + }, + { + "epoch": 0.5438381255183854, + "grad_norm": 0.7693086473076076, + "learning_rate": 9.067581150095391e-06, + "loss": 0.4697, + "step": 31473 + }, + { + "epoch": 0.5438554050317943, + "grad_norm": 0.4831871402748588, + "learning_rate": 9.067023940143987e-06, + "loss": 0.5923, + "step": 31474 + }, + { + "epoch": 0.5438726845452032, + "grad_norm": 1.243031031383243, + "learning_rate": 9.066466733114735e-06, + "loss": 0.349, + "step": 31475 + }, + { + "epoch": 0.5438899640586121, + "grad_norm": 0.9452467491631714, + "learning_rate": 9.065909529009378e-06, + "loss": 0.3661, + "step": 31476 + }, + { + "epoch": 0.543907243572021, + "grad_norm": 0.8839719324005014, + "learning_rate": 9.065352327829669e-06, + "loss": 0.5149, + "step": 31477 + }, + { + "epoch": 0.5439245230854299, + "grad_norm": 1.0637691189209983, + "learning_rate": 9.064795129577343e-06, + "loss": 0.4898, + "step": 31478 + }, + { + "epoch": 0.5439418025988388, + "grad_norm": 0.60808886713558, + "learning_rate": 9.064237934254149e-06, + "loss": 0.7288, + "step": 31479 + }, + { + "epoch": 0.5439590821122477, + "grad_norm": 1.1465203247628157, + "learning_rate": 9.06368074186184e-06, + "loss": 0.4288, + "step": 31480 + }, + { + "epoch": 0.5439763616256567, + "grad_norm": 1.043699072226954, + "learning_rate": 9.063123552402146e-06, + "loss": 0.4509, + "step": 31481 + }, + { + "epoch": 0.5439936411390656, + "grad_norm": 1.72110745468662, + "learning_rate": 9.062566365876824e-06, + "loss": 0.5021, + "step": 31482 + }, + { + "epoch": 0.5440109206524745, + "grad_norm": 1.2141580303221275, + "learning_rate": 9.062009182287613e-06, + "loss": 0.3618, + "step": 31483 + }, + { + "epoch": 0.5440282001658834, + "grad_norm": 0.758354901415732, + "learning_rate": 9.061452001636261e-06, + "loss": 0.3014, + "step": 31484 + }, + { + "epoch": 0.5440454796792922, + "grad_norm": 0.8344724400348156, + "learning_rate": 9.060894823924511e-06, + "loss": 0.413, + "step": 31485 + }, + { + "epoch": 0.5440627591927011, + "grad_norm": 0.7282884406173876, + "learning_rate": 9.060337649154116e-06, + "loss": 0.296, + "step": 31486 + }, + { + "epoch": 0.54408003870611, + "grad_norm": 1.2943545540671135, + "learning_rate": 9.05978047732681e-06, + "loss": 0.4179, + "step": 31487 + }, + { + "epoch": 0.5440973182195189, + "grad_norm": 0.8053648583216618, + "learning_rate": 9.059223308444345e-06, + "loss": 0.4099, + "step": 31488 + }, + { + "epoch": 0.5441145977329278, + "grad_norm": 1.303182257959577, + "learning_rate": 9.058666142508462e-06, + "loss": 0.461, + "step": 31489 + }, + { + "epoch": 0.5441318772463367, + "grad_norm": 1.181136772701848, + "learning_rate": 9.058108979520909e-06, + "loss": 0.5862, + "step": 31490 + }, + { + "epoch": 0.5441491567597456, + "grad_norm": 1.1593197372028723, + "learning_rate": 9.057551819483433e-06, + "loss": 0.8885, + "step": 31491 + }, + { + "epoch": 0.5441664362731545, + "grad_norm": 0.8504822302482016, + "learning_rate": 9.056994662397773e-06, + "loss": 0.4698, + "step": 31492 + }, + { + "epoch": 0.5441837157865634, + "grad_norm": 1.024045154113687, + "learning_rate": 9.056437508265679e-06, + "loss": 0.4591, + "step": 31493 + }, + { + "epoch": 0.5442009952999723, + "grad_norm": 0.7991069479401405, + "learning_rate": 9.055880357088892e-06, + "loss": 0.3772, + "step": 31494 + }, + { + "epoch": 0.5442182748133813, + "grad_norm": 0.8268365181401249, + "learning_rate": 9.055323208869162e-06, + "loss": 0.3086, + "step": 31495 + }, + { + "epoch": 0.5442355543267902, + "grad_norm": 1.5427146915822878, + "learning_rate": 9.05476606360823e-06, + "loss": 0.3626, + "step": 31496 + }, + { + "epoch": 0.5442528338401991, + "grad_norm": 1.6960756513954385, + "learning_rate": 9.054208921307846e-06, + "loss": 0.3984, + "step": 31497 + }, + { + "epoch": 0.544270113353608, + "grad_norm": 1.3398337746433635, + "learning_rate": 9.053651781969747e-06, + "loss": 0.6073, + "step": 31498 + }, + { + "epoch": 0.5442873928670169, + "grad_norm": 1.053680284652093, + "learning_rate": 9.053094645595685e-06, + "loss": 0.2474, + "step": 31499 + }, + { + "epoch": 0.5443046723804258, + "grad_norm": 1.255862115594933, + "learning_rate": 9.052537512187404e-06, + "loss": 0.448, + "step": 31500 + }, + { + "epoch": 0.5443219518938347, + "grad_norm": 1.294396871852219, + "learning_rate": 9.051980381746644e-06, + "loss": 0.395, + "step": 31501 + }, + { + "epoch": 0.5443392314072436, + "grad_norm": 0.882620356138213, + "learning_rate": 9.051423254275156e-06, + "loss": 0.3725, + "step": 31502 + }, + { + "epoch": 0.5443565109206525, + "grad_norm": 1.2273737426914415, + "learning_rate": 9.05086612977468e-06, + "loss": 0.3855, + "step": 31503 + }, + { + "epoch": 0.5443737904340614, + "grad_norm": 0.9631892994978327, + "learning_rate": 9.050309008246965e-06, + "loss": 0.494, + "step": 31504 + }, + { + "epoch": 0.5443910699474703, + "grad_norm": 1.0441522750991696, + "learning_rate": 9.049751889693753e-06, + "loss": 0.4566, + "step": 31505 + }, + { + "epoch": 0.5444083494608791, + "grad_norm": 0.9462911860443591, + "learning_rate": 9.049194774116794e-06, + "loss": 0.3229, + "step": 31506 + }, + { + "epoch": 0.544425628974288, + "grad_norm": 0.9776057323899853, + "learning_rate": 9.048637661517825e-06, + "loss": 0.2857, + "step": 31507 + }, + { + "epoch": 0.5444429084876969, + "grad_norm": 0.8132377191807766, + "learning_rate": 9.048080551898597e-06, + "loss": 0.3677, + "step": 31508 + }, + { + "epoch": 0.5444601880011058, + "grad_norm": 1.1222210530719983, + "learning_rate": 9.047523445260854e-06, + "loss": 0.4882, + "step": 31509 + }, + { + "epoch": 0.5444774675145148, + "grad_norm": 1.2862953799862749, + "learning_rate": 9.046966341606336e-06, + "loss": 0.2962, + "step": 31510 + }, + { + "epoch": 0.5444947470279237, + "grad_norm": 0.8034089191759936, + "learning_rate": 9.046409240936799e-06, + "loss": 0.424, + "step": 31511 + }, + { + "epoch": 0.5445120265413326, + "grad_norm": 0.9348317106390588, + "learning_rate": 9.045852143253974e-06, + "loss": 0.4284, + "step": 31512 + }, + { + "epoch": 0.5445293060547415, + "grad_norm": 1.0552897618944101, + "learning_rate": 9.045295048559617e-06, + "loss": 0.5817, + "step": 31513 + }, + { + "epoch": 0.5445465855681504, + "grad_norm": 1.511869032547573, + "learning_rate": 9.044737956855464e-06, + "loss": 0.3961, + "step": 31514 + }, + { + "epoch": 0.5445638650815593, + "grad_norm": 1.054203543234879, + "learning_rate": 9.044180868143267e-06, + "loss": 0.5188, + "step": 31515 + }, + { + "epoch": 0.5445811445949682, + "grad_norm": 1.026436491241688, + "learning_rate": 9.043623782424766e-06, + "loss": 0.2116, + "step": 31516 + }, + { + "epoch": 0.5445984241083771, + "grad_norm": 0.7798838134442732, + "learning_rate": 9.043066699701714e-06, + "loss": 0.4347, + "step": 31517 + }, + { + "epoch": 0.544615703621786, + "grad_norm": 0.44849635192463233, + "learning_rate": 9.042509619975843e-06, + "loss": 0.7494, + "step": 31518 + }, + { + "epoch": 0.5446329831351949, + "grad_norm": 0.4334536897302768, + "learning_rate": 9.041952543248909e-06, + "loss": 0.5487, + "step": 31519 + }, + { + "epoch": 0.5446502626486038, + "grad_norm": 0.8772493324765751, + "learning_rate": 9.041395469522654e-06, + "loss": 0.4935, + "step": 31520 + }, + { + "epoch": 0.5446675421620127, + "grad_norm": 1.2833839988367248, + "learning_rate": 9.040838398798816e-06, + "loss": 0.4715, + "step": 31521 + }, + { + "epoch": 0.5446848216754216, + "grad_norm": 0.8587725021160477, + "learning_rate": 9.040281331079148e-06, + "loss": 0.4267, + "step": 31522 + }, + { + "epoch": 0.5447021011888306, + "grad_norm": 1.137506467842838, + "learning_rate": 9.03972426636539e-06, + "loss": 0.3525, + "step": 31523 + }, + { + "epoch": 0.5447193807022395, + "grad_norm": 1.2457858641637485, + "learning_rate": 9.039167204659289e-06, + "loss": 0.3904, + "step": 31524 + }, + { + "epoch": 0.5447366602156484, + "grad_norm": 1.3882722254226616, + "learning_rate": 9.038610145962589e-06, + "loss": 0.3871, + "step": 31525 + }, + { + "epoch": 0.5447539397290573, + "grad_norm": 0.7024719674765729, + "learning_rate": 9.038053090277038e-06, + "loss": 0.3044, + "step": 31526 + }, + { + "epoch": 0.5447712192424662, + "grad_norm": 1.0317966027244105, + "learning_rate": 9.037496037604373e-06, + "loss": 0.2893, + "step": 31527 + }, + { + "epoch": 0.544788498755875, + "grad_norm": 1.3212518967069082, + "learning_rate": 9.036938987946348e-06, + "loss": 0.5884, + "step": 31528 + }, + { + "epoch": 0.5448057782692839, + "grad_norm": 0.7369838305851374, + "learning_rate": 9.0363819413047e-06, + "loss": 0.4631, + "step": 31529 + }, + { + "epoch": 0.5448230577826928, + "grad_norm": 1.129941291685283, + "learning_rate": 9.035824897681179e-06, + "loss": 0.376, + "step": 31530 + }, + { + "epoch": 0.5448403372961017, + "grad_norm": 0.7157047683220517, + "learning_rate": 9.035267857077531e-06, + "loss": 0.5096, + "step": 31531 + }, + { + "epoch": 0.5448576168095106, + "grad_norm": 0.7878746624815077, + "learning_rate": 9.034710819495492e-06, + "loss": 0.4527, + "step": 31532 + }, + { + "epoch": 0.5448748963229195, + "grad_norm": 1.388416656023276, + "learning_rate": 9.034153784936815e-06, + "loss": 0.5468, + "step": 31533 + }, + { + "epoch": 0.5448921758363284, + "grad_norm": 1.395314763814558, + "learning_rate": 9.033596753403238e-06, + "loss": 0.3515, + "step": 31534 + }, + { + "epoch": 0.5449094553497373, + "grad_norm": 1.4543687235069238, + "learning_rate": 9.033039724896513e-06, + "loss": 0.4572, + "step": 31535 + }, + { + "epoch": 0.5449267348631462, + "grad_norm": 1.4673557983356036, + "learning_rate": 9.032482699418377e-06, + "loss": 0.2805, + "step": 31536 + }, + { + "epoch": 0.5449440143765552, + "grad_norm": 0.9508634801287671, + "learning_rate": 9.031925676970586e-06, + "loss": 0.3284, + "step": 31537 + }, + { + "epoch": 0.5449612938899641, + "grad_norm": 0.8656971190183936, + "learning_rate": 9.03136865755487e-06, + "loss": 0.2725, + "step": 31538 + }, + { + "epoch": 0.544978573403373, + "grad_norm": 1.4009931949184378, + "learning_rate": 9.030811641172985e-06, + "loss": 0.4914, + "step": 31539 + }, + { + "epoch": 0.5449958529167819, + "grad_norm": 0.6706340296108506, + "learning_rate": 9.030254627826673e-06, + "loss": 0.2265, + "step": 31540 + }, + { + "epoch": 0.5450131324301908, + "grad_norm": 0.9737611398963707, + "learning_rate": 9.029697617517674e-06, + "loss": 0.3299, + "step": 31541 + }, + { + "epoch": 0.5450304119435997, + "grad_norm": 1.4413718624443757, + "learning_rate": 9.029140610247737e-06, + "loss": 0.3131, + "step": 31542 + }, + { + "epoch": 0.5450476914570086, + "grad_norm": 1.2605391465570195, + "learning_rate": 9.028583606018605e-06, + "loss": 0.3361, + "step": 31543 + }, + { + "epoch": 0.5450649709704175, + "grad_norm": 0.9209870430103398, + "learning_rate": 9.028026604832021e-06, + "loss": 0.4531, + "step": 31544 + }, + { + "epoch": 0.5450822504838264, + "grad_norm": 1.16724859494095, + "learning_rate": 9.027469606689733e-06, + "loss": 0.5335, + "step": 31545 + }, + { + "epoch": 0.5450995299972353, + "grad_norm": 1.8893207392310267, + "learning_rate": 9.026912611593488e-06, + "loss": 0.4967, + "step": 31546 + }, + { + "epoch": 0.5451168095106442, + "grad_norm": 0.6977146333239097, + "learning_rate": 9.026355619545021e-06, + "loss": 0.8358, + "step": 31547 + }, + { + "epoch": 0.5451340890240531, + "grad_norm": 1.1289885790092107, + "learning_rate": 9.025798630546085e-06, + "loss": 0.523, + "step": 31548 + }, + { + "epoch": 0.5451513685374619, + "grad_norm": 0.9816740541052854, + "learning_rate": 9.02524164459842e-06, + "loss": 0.4378, + "step": 31549 + }, + { + "epoch": 0.5451686480508708, + "grad_norm": 1.4675981196570296, + "learning_rate": 9.024684661703775e-06, + "loss": 0.3355, + "step": 31550 + }, + { + "epoch": 0.5451859275642797, + "grad_norm": 0.8520770420837769, + "learning_rate": 9.024127681863892e-06, + "loss": 0.4092, + "step": 31551 + }, + { + "epoch": 0.5452032070776887, + "grad_norm": 0.6423716556314447, + "learning_rate": 9.023570705080512e-06, + "loss": 0.3636, + "step": 31552 + }, + { + "epoch": 0.5452204865910976, + "grad_norm": 0.8910956658138403, + "learning_rate": 9.023013731355384e-06, + "loss": 0.4418, + "step": 31553 + }, + { + "epoch": 0.5452377661045065, + "grad_norm": 1.634800490046949, + "learning_rate": 9.02245676069025e-06, + "loss": 0.3218, + "step": 31554 + }, + { + "epoch": 0.5452550456179154, + "grad_norm": 1.0398360363922514, + "learning_rate": 9.02189979308686e-06, + "loss": 0.3111, + "step": 31555 + }, + { + "epoch": 0.5452723251313243, + "grad_norm": 1.5457807410933935, + "learning_rate": 9.021342828546949e-06, + "loss": 0.614, + "step": 31556 + }, + { + "epoch": 0.5452896046447332, + "grad_norm": 1.3854454234712443, + "learning_rate": 9.020785867072272e-06, + "loss": 0.4681, + "step": 31557 + }, + { + "epoch": 0.5453068841581421, + "grad_norm": 1.198739525437765, + "learning_rate": 9.020228908664562e-06, + "loss": 0.5465, + "step": 31558 + }, + { + "epoch": 0.545324163671551, + "grad_norm": 0.5857909687076284, + "learning_rate": 9.019671953325574e-06, + "loss": 0.5106, + "step": 31559 + }, + { + "epoch": 0.5453414431849599, + "grad_norm": 1.0557636527578098, + "learning_rate": 9.019115001057045e-06, + "loss": 0.579, + "step": 31560 + }, + { + "epoch": 0.5453587226983688, + "grad_norm": 1.0002025710922977, + "learning_rate": 9.018558051860726e-06, + "loss": 0.4038, + "step": 31561 + }, + { + "epoch": 0.5453760022117777, + "grad_norm": 0.7689815325122615, + "learning_rate": 9.018001105738357e-06, + "loss": 0.2837, + "step": 31562 + }, + { + "epoch": 0.5453932817251866, + "grad_norm": 1.0105441266144137, + "learning_rate": 9.01744416269168e-06, + "loss": 0.4714, + "step": 31563 + }, + { + "epoch": 0.5454105612385955, + "grad_norm": 1.1963369487156201, + "learning_rate": 9.016887222722444e-06, + "loss": 0.5407, + "step": 31564 + }, + { + "epoch": 0.5454278407520045, + "grad_norm": 0.6843361187967132, + "learning_rate": 9.016330285832392e-06, + "loss": 0.5694, + "step": 31565 + }, + { + "epoch": 0.5454451202654134, + "grad_norm": 0.9714866085099138, + "learning_rate": 9.015773352023271e-06, + "loss": 0.2916, + "step": 31566 + }, + { + "epoch": 0.5454623997788223, + "grad_norm": 0.6298643732919122, + "learning_rate": 9.015216421296819e-06, + "loss": 0.188, + "step": 31567 + }, + { + "epoch": 0.5454796792922312, + "grad_norm": 0.7523553808747186, + "learning_rate": 9.014659493654786e-06, + "loss": 0.3941, + "step": 31568 + }, + { + "epoch": 0.5454969588056401, + "grad_norm": 1.0822972792375731, + "learning_rate": 9.01410256909891e-06, + "loss": 0.3428, + "step": 31569 + }, + { + "epoch": 0.5455142383190489, + "grad_norm": 1.1401608390251148, + "learning_rate": 9.013545647630945e-06, + "loss": 0.4975, + "step": 31570 + }, + { + "epoch": 0.5455315178324578, + "grad_norm": 1.297195971637312, + "learning_rate": 9.012988729252632e-06, + "loss": 0.3102, + "step": 31571 + }, + { + "epoch": 0.5455487973458667, + "grad_norm": 0.8100403663667695, + "learning_rate": 9.012431813965708e-06, + "loss": 0.2965, + "step": 31572 + }, + { + "epoch": 0.5455660768592756, + "grad_norm": 1.1659790804347532, + "learning_rate": 9.011874901771924e-06, + "loss": 0.3938, + "step": 31573 + }, + { + "epoch": 0.5455833563726845, + "grad_norm": 0.7296972127326916, + "learning_rate": 9.011317992673022e-06, + "loss": 0.6159, + "step": 31574 + }, + { + "epoch": 0.5456006358860934, + "grad_norm": 1.0849080330979153, + "learning_rate": 9.010761086670753e-06, + "loss": 0.3641, + "step": 31575 + }, + { + "epoch": 0.5456179153995023, + "grad_norm": 1.3542588739063721, + "learning_rate": 9.010204183766845e-06, + "loss": 0.615, + "step": 31576 + }, + { + "epoch": 0.5456351949129112, + "grad_norm": 0.7483992259656912, + "learning_rate": 9.009647283963062e-06, + "loss": 0.8551, + "step": 31577 + }, + { + "epoch": 0.5456524744263201, + "grad_norm": 0.7857111343421922, + "learning_rate": 9.009090387261133e-06, + "loss": 0.3956, + "step": 31578 + }, + { + "epoch": 0.545669753939729, + "grad_norm": 0.6159962271953765, + "learning_rate": 9.008533493662811e-06, + "loss": 0.8772, + "step": 31579 + }, + { + "epoch": 0.545687033453138, + "grad_norm": 0.6884419094248403, + "learning_rate": 9.007976603169836e-06, + "loss": 0.3376, + "step": 31580 + }, + { + "epoch": 0.5457043129665469, + "grad_norm": 1.0083072158879072, + "learning_rate": 9.007419715783959e-06, + "loss": 0.4313, + "step": 31581 + }, + { + "epoch": 0.5457215924799558, + "grad_norm": 1.1145058044218659, + "learning_rate": 9.006862831506914e-06, + "loss": 0.4325, + "step": 31582 + }, + { + "epoch": 0.5457388719933647, + "grad_norm": 0.9073329384129033, + "learning_rate": 9.006305950340449e-06, + "loss": 0.3909, + "step": 31583 + }, + { + "epoch": 0.5457561515067736, + "grad_norm": 2.22406981632159, + "learning_rate": 9.005749072286312e-06, + "loss": 0.5302, + "step": 31584 + }, + { + "epoch": 0.5457734310201825, + "grad_norm": 1.6004480794093914, + "learning_rate": 9.005192197346242e-06, + "loss": 0.449, + "step": 31585 + }, + { + "epoch": 0.5457907105335914, + "grad_norm": 1.452300578403456, + "learning_rate": 9.00463532552199e-06, + "loss": 0.3805, + "step": 31586 + }, + { + "epoch": 0.5458079900470003, + "grad_norm": 1.4239307116730087, + "learning_rate": 9.004078456815291e-06, + "loss": 0.3511, + "step": 31587 + }, + { + "epoch": 0.5458252695604092, + "grad_norm": 0.664359207976991, + "learning_rate": 9.003521591227898e-06, + "loss": 0.2889, + "step": 31588 + }, + { + "epoch": 0.5458425490738181, + "grad_norm": 0.7645690285748756, + "learning_rate": 9.002964728761547e-06, + "loss": 0.3708, + "step": 31589 + }, + { + "epoch": 0.545859828587227, + "grad_norm": 0.9347609247402715, + "learning_rate": 9.002407869417988e-06, + "loss": 0.3899, + "step": 31590 + }, + { + "epoch": 0.5458771081006358, + "grad_norm": 1.367149293662134, + "learning_rate": 9.001851013198962e-06, + "loss": 0.4842, + "step": 31591 + }, + { + "epoch": 0.5458943876140447, + "grad_norm": 1.3317080879156171, + "learning_rate": 9.001294160106218e-06, + "loss": 0.2921, + "step": 31592 + }, + { + "epoch": 0.5459116671274536, + "grad_norm": 0.9005936217425002, + "learning_rate": 9.000737310141496e-06, + "loss": 0.4148, + "step": 31593 + }, + { + "epoch": 0.5459289466408626, + "grad_norm": 1.1200779055443364, + "learning_rate": 9.000180463306538e-06, + "loss": 0.478, + "step": 31594 + }, + { + "epoch": 0.5459462261542715, + "grad_norm": 0.8213492079531958, + "learning_rate": 8.999623619603096e-06, + "loss": 0.5398, + "step": 31595 + }, + { + "epoch": 0.5459635056676804, + "grad_norm": 0.5090344002816986, + "learning_rate": 8.9990667790329e-06, + "loss": 0.6741, + "step": 31596 + }, + { + "epoch": 0.5459807851810893, + "grad_norm": 1.0839559612474199, + "learning_rate": 8.998509941597711e-06, + "loss": 0.6014, + "step": 31597 + }, + { + "epoch": 0.5459980646944982, + "grad_norm": 1.4988750583989014, + "learning_rate": 8.997953107299261e-06, + "loss": 0.4272, + "step": 31598 + }, + { + "epoch": 0.5460153442079071, + "grad_norm": 0.9395839647656202, + "learning_rate": 8.997396276139299e-06, + "loss": 0.2557, + "step": 31599 + }, + { + "epoch": 0.546032623721316, + "grad_norm": 1.0139664794262466, + "learning_rate": 8.996839448119567e-06, + "loss": 0.4333, + "step": 31600 + }, + { + "epoch": 0.5460499032347249, + "grad_norm": 0.9962716441335262, + "learning_rate": 8.996282623241813e-06, + "loss": 0.4886, + "step": 31601 + }, + { + "epoch": 0.5460671827481338, + "grad_norm": 0.822320748511014, + "learning_rate": 8.995725801507777e-06, + "loss": 0.4102, + "step": 31602 + }, + { + "epoch": 0.5460844622615427, + "grad_norm": 1.107056417710468, + "learning_rate": 8.995168982919203e-06, + "loss": 0.4696, + "step": 31603 + }, + { + "epoch": 0.5461017417749516, + "grad_norm": 0.7304715659581505, + "learning_rate": 8.994612167477836e-06, + "loss": 0.4399, + "step": 31604 + }, + { + "epoch": 0.5461190212883605, + "grad_norm": 1.7422848786202485, + "learning_rate": 8.994055355185419e-06, + "loss": 0.54, + "step": 31605 + }, + { + "epoch": 0.5461363008017694, + "grad_norm": 0.8772638142700163, + "learning_rate": 8.9934985460437e-06, + "loss": 0.4156, + "step": 31606 + }, + { + "epoch": 0.5461535803151784, + "grad_norm": 1.0796481000400346, + "learning_rate": 8.992941740054418e-06, + "loss": 0.456, + "step": 31607 + }, + { + "epoch": 0.5461708598285873, + "grad_norm": 1.256667826860824, + "learning_rate": 8.99238493721932e-06, + "loss": 0.3592, + "step": 31608 + }, + { + "epoch": 0.5461881393419962, + "grad_norm": 1.1074011024008712, + "learning_rate": 8.991828137540147e-06, + "loss": 0.6499, + "step": 31609 + }, + { + "epoch": 0.5462054188554051, + "grad_norm": 1.7254047757372504, + "learning_rate": 8.991271341018648e-06, + "loss": 0.5644, + "step": 31610 + }, + { + "epoch": 0.546222698368814, + "grad_norm": 0.9759833659357108, + "learning_rate": 8.99071454765656e-06, + "loss": 0.3804, + "step": 31611 + }, + { + "epoch": 0.5462399778822228, + "grad_norm": 0.5117132703983235, + "learning_rate": 8.990157757455637e-06, + "loss": 0.6366, + "step": 31612 + }, + { + "epoch": 0.5462572573956317, + "grad_norm": 1.1502810565796973, + "learning_rate": 8.989600970417612e-06, + "loss": 0.4349, + "step": 31613 + }, + { + "epoch": 0.5462745369090406, + "grad_norm": 1.3495015713412932, + "learning_rate": 8.989044186544233e-06, + "loss": 0.3573, + "step": 31614 + }, + { + "epoch": 0.5462918164224495, + "grad_norm": 0.5578647138652465, + "learning_rate": 8.98848740583725e-06, + "loss": 0.5634, + "step": 31615 + }, + { + "epoch": 0.5463090959358584, + "grad_norm": 1.0846755269954755, + "learning_rate": 8.98793062829839e-06, + "loss": 0.5124, + "step": 31616 + }, + { + "epoch": 0.5463263754492673, + "grad_norm": 1.4587432408121022, + "learning_rate": 8.98737385392942e-06, + "loss": 0.6841, + "step": 31617 + }, + { + "epoch": 0.5463436549626762, + "grad_norm": 0.6127054675181779, + "learning_rate": 8.986817082732066e-06, + "loss": 0.3442, + "step": 31618 + }, + { + "epoch": 0.5463609344760851, + "grad_norm": 0.6353215950905019, + "learning_rate": 8.98626031470808e-06, + "loss": 0.3505, + "step": 31619 + }, + { + "epoch": 0.546378213989494, + "grad_norm": 0.5456668821696447, + "learning_rate": 8.985703549859201e-06, + "loss": 0.7248, + "step": 31620 + }, + { + "epoch": 0.546395493502903, + "grad_norm": 1.1520895100075712, + "learning_rate": 8.985146788187179e-06, + "loss": 0.3197, + "step": 31621 + }, + { + "epoch": 0.5464127730163119, + "grad_norm": 0.8658540252296697, + "learning_rate": 8.98459002969375e-06, + "loss": 0.4024, + "step": 31622 + }, + { + "epoch": 0.5464300525297208, + "grad_norm": 0.7696127355862825, + "learning_rate": 8.984033274380668e-06, + "loss": 0.3871, + "step": 31623 + }, + { + "epoch": 0.5464473320431297, + "grad_norm": 1.2972687893628458, + "learning_rate": 8.983476522249667e-06, + "loss": 0.3835, + "step": 31624 + }, + { + "epoch": 0.5464646115565386, + "grad_norm": 1.0490986734564964, + "learning_rate": 8.982919773302495e-06, + "loss": 0.4147, + "step": 31625 + }, + { + "epoch": 0.5464818910699475, + "grad_norm": 1.0410946603926992, + "learning_rate": 8.9823630275409e-06, + "loss": 0.4592, + "step": 31626 + }, + { + "epoch": 0.5464991705833564, + "grad_norm": 0.9957482722220588, + "learning_rate": 8.981806284966616e-06, + "loss": 0.4335, + "step": 31627 + }, + { + "epoch": 0.5465164500967653, + "grad_norm": 1.0913293723259827, + "learning_rate": 8.981249545581394e-06, + "loss": 0.5398, + "step": 31628 + }, + { + "epoch": 0.5465337296101742, + "grad_norm": 0.6509825326789355, + "learning_rate": 8.980692809386975e-06, + "loss": 0.417, + "step": 31629 + }, + { + "epoch": 0.5465510091235831, + "grad_norm": 0.9051154919797638, + "learning_rate": 8.980136076385103e-06, + "loss": 0.3258, + "step": 31630 + }, + { + "epoch": 0.546568288636992, + "grad_norm": 1.152316294799889, + "learning_rate": 8.979579346577522e-06, + "loss": 0.3504, + "step": 31631 + }, + { + "epoch": 0.5465855681504009, + "grad_norm": 0.8290497408387871, + "learning_rate": 8.979022619965981e-06, + "loss": 0.2591, + "step": 31632 + }, + { + "epoch": 0.5466028476638097, + "grad_norm": 1.2151383136768623, + "learning_rate": 8.978465896552216e-06, + "loss": 0.5599, + "step": 31633 + }, + { + "epoch": 0.5466201271772186, + "grad_norm": 1.493642147785548, + "learning_rate": 8.97790917633797e-06, + "loss": 0.4329, + "step": 31634 + }, + { + "epoch": 0.5466374066906275, + "grad_norm": 0.943268514080915, + "learning_rate": 8.977352459324997e-06, + "loss": 0.5069, + "step": 31635 + }, + { + "epoch": 0.5466546862040365, + "grad_norm": 0.867446091080418, + "learning_rate": 8.976795745515025e-06, + "loss": 0.4779, + "step": 31636 + }, + { + "epoch": 0.5466719657174454, + "grad_norm": 0.8951280230156967, + "learning_rate": 8.976239034909815e-06, + "loss": 0.4388, + "step": 31637 + }, + { + "epoch": 0.5466892452308543, + "grad_norm": 1.1024720058020536, + "learning_rate": 8.975682327511097e-06, + "loss": 0.4314, + "step": 31638 + }, + { + "epoch": 0.5467065247442632, + "grad_norm": 1.0905753542293135, + "learning_rate": 8.97512562332062e-06, + "loss": 0.3388, + "step": 31639 + }, + { + "epoch": 0.5467238042576721, + "grad_norm": 1.2863433969765616, + "learning_rate": 8.974568922340127e-06, + "loss": 0.372, + "step": 31640 + }, + { + "epoch": 0.546741083771081, + "grad_norm": 1.6271240264718818, + "learning_rate": 8.974012224571367e-06, + "loss": 0.5604, + "step": 31641 + }, + { + "epoch": 0.5467583632844899, + "grad_norm": 1.061444186779701, + "learning_rate": 8.973455530016072e-06, + "loss": 0.3324, + "step": 31642 + }, + { + "epoch": 0.5467756427978988, + "grad_norm": 1.1700349061675166, + "learning_rate": 8.972898838675998e-06, + "loss": 0.594, + "step": 31643 + }, + { + "epoch": 0.5467929223113077, + "grad_norm": 1.0447712063783365, + "learning_rate": 8.972342150552882e-06, + "loss": 0.3748, + "step": 31644 + }, + { + "epoch": 0.5468102018247166, + "grad_norm": 1.6087506731641676, + "learning_rate": 8.971785465648465e-06, + "loss": 0.4273, + "step": 31645 + }, + { + "epoch": 0.5468274813381255, + "grad_norm": 0.6754844976557419, + "learning_rate": 8.971228783964498e-06, + "loss": 0.3581, + "step": 31646 + }, + { + "epoch": 0.5468447608515344, + "grad_norm": 1.1766497844214212, + "learning_rate": 8.970672105502718e-06, + "loss": 0.4517, + "step": 31647 + }, + { + "epoch": 0.5468620403649433, + "grad_norm": 0.8970764883598382, + "learning_rate": 8.970115430264872e-06, + "loss": 0.3716, + "step": 31648 + }, + { + "epoch": 0.5468793198783523, + "grad_norm": 0.7683538467586675, + "learning_rate": 8.969558758252701e-06, + "loss": 0.3049, + "step": 31649 + }, + { + "epoch": 0.5468965993917612, + "grad_norm": 1.7370044129931836, + "learning_rate": 8.969002089467954e-06, + "loss": 0.3026, + "step": 31650 + }, + { + "epoch": 0.5469138789051701, + "grad_norm": 0.7714715563886944, + "learning_rate": 8.968445423912365e-06, + "loss": 0.3751, + "step": 31651 + }, + { + "epoch": 0.546931158418579, + "grad_norm": 0.9507436398256657, + "learning_rate": 8.96788876158769e-06, + "loss": 0.3508, + "step": 31652 + }, + { + "epoch": 0.5469484379319879, + "grad_norm": 1.30144935299939, + "learning_rate": 8.967332102495661e-06, + "loss": 0.4574, + "step": 31653 + }, + { + "epoch": 0.5469657174453968, + "grad_norm": 1.0104035203690858, + "learning_rate": 8.96677544663803e-06, + "loss": 0.3877, + "step": 31654 + }, + { + "epoch": 0.5469829969588056, + "grad_norm": 0.7489194180262824, + "learning_rate": 8.966218794016537e-06, + "loss": 0.4141, + "step": 31655 + }, + { + "epoch": 0.5470002764722145, + "grad_norm": 0.9298401079643127, + "learning_rate": 8.96566214463292e-06, + "loss": 0.4744, + "step": 31656 + }, + { + "epoch": 0.5470175559856234, + "grad_norm": 0.9737738956557143, + "learning_rate": 8.965105498488934e-06, + "loss": 0.6458, + "step": 31657 + }, + { + "epoch": 0.5470348354990323, + "grad_norm": 0.8857917189274535, + "learning_rate": 8.964548855586311e-06, + "loss": 0.4801, + "step": 31658 + }, + { + "epoch": 0.5470521150124412, + "grad_norm": 1.120618808662565, + "learning_rate": 8.963992215926803e-06, + "loss": 0.5836, + "step": 31659 + }, + { + "epoch": 0.5470693945258501, + "grad_norm": 0.8905138148812487, + "learning_rate": 8.963435579512148e-06, + "loss": 0.3939, + "step": 31660 + }, + { + "epoch": 0.547086674039259, + "grad_norm": 0.8975090363520256, + "learning_rate": 8.962878946344096e-06, + "loss": 0.3419, + "step": 31661 + }, + { + "epoch": 0.5471039535526679, + "grad_norm": 0.7971887788125477, + "learning_rate": 8.962322316424378e-06, + "loss": 0.3244, + "step": 31662 + }, + { + "epoch": 0.5471212330660769, + "grad_norm": 0.8849927814278008, + "learning_rate": 8.961765689754752e-06, + "loss": 0.4489, + "step": 31663 + }, + { + "epoch": 0.5471385125794858, + "grad_norm": 1.0376725544742214, + "learning_rate": 8.961209066336955e-06, + "loss": 0.5053, + "step": 31664 + }, + { + "epoch": 0.5471557920928947, + "grad_norm": 0.742213851593018, + "learning_rate": 8.960652446172725e-06, + "loss": 0.4635, + "step": 31665 + }, + { + "epoch": 0.5471730716063036, + "grad_norm": 0.9307034488926954, + "learning_rate": 8.960095829263816e-06, + "loss": 0.4678, + "step": 31666 + }, + { + "epoch": 0.5471903511197125, + "grad_norm": 1.2888942232314582, + "learning_rate": 8.959539215611962e-06, + "loss": 0.2057, + "step": 31667 + }, + { + "epoch": 0.5472076306331214, + "grad_norm": 1.235769327590495, + "learning_rate": 8.958982605218913e-06, + "loss": 0.3798, + "step": 31668 + }, + { + "epoch": 0.5472249101465303, + "grad_norm": 1.4330674074009158, + "learning_rate": 8.958425998086407e-06, + "loss": 0.3929, + "step": 31669 + }, + { + "epoch": 0.5472421896599392, + "grad_norm": 0.9393774998788288, + "learning_rate": 8.95786939421619e-06, + "loss": 0.3788, + "step": 31670 + }, + { + "epoch": 0.5472594691733481, + "grad_norm": 0.8097209987793805, + "learning_rate": 8.957312793610006e-06, + "loss": 0.3248, + "step": 31671 + }, + { + "epoch": 0.547276748686757, + "grad_norm": 1.4727602784635796, + "learning_rate": 8.9567561962696e-06, + "loss": 0.5655, + "step": 31672 + }, + { + "epoch": 0.5472940282001659, + "grad_norm": 1.282213233097224, + "learning_rate": 8.95619960219671e-06, + "loss": 0.4762, + "step": 31673 + }, + { + "epoch": 0.5473113077135748, + "grad_norm": 1.0588007663343582, + "learning_rate": 8.955643011393083e-06, + "loss": 0.4771, + "step": 31674 + }, + { + "epoch": 0.5473285872269837, + "grad_norm": 1.0264018415073075, + "learning_rate": 8.955086423860464e-06, + "loss": 0.3169, + "step": 31675 + }, + { + "epoch": 0.5473458667403925, + "grad_norm": 0.8016612082173883, + "learning_rate": 8.954529839600587e-06, + "loss": 0.5627, + "step": 31676 + }, + { + "epoch": 0.5473631462538014, + "grad_norm": 1.3095417619152008, + "learning_rate": 8.953973258615209e-06, + "loss": 0.5455, + "step": 31677 + }, + { + "epoch": 0.5473804257672104, + "grad_norm": 0.8329657703231702, + "learning_rate": 8.953416680906061e-06, + "loss": 0.3184, + "step": 31678 + }, + { + "epoch": 0.5473977052806193, + "grad_norm": 0.8292481708367756, + "learning_rate": 8.952860106474895e-06, + "loss": 0.3331, + "step": 31679 + }, + { + "epoch": 0.5474149847940282, + "grad_norm": 0.5618025158663188, + "learning_rate": 8.95230353532345e-06, + "loss": 0.6253, + "step": 31680 + }, + { + "epoch": 0.5474322643074371, + "grad_norm": 1.000591116317708, + "learning_rate": 8.95174696745347e-06, + "loss": 0.3526, + "step": 31681 + }, + { + "epoch": 0.547449543820846, + "grad_norm": 0.5941768478597957, + "learning_rate": 8.951190402866696e-06, + "loss": 0.5452, + "step": 31682 + }, + { + "epoch": 0.5474668233342549, + "grad_norm": 1.063469504401164, + "learning_rate": 8.950633841564878e-06, + "loss": 0.4526, + "step": 31683 + }, + { + "epoch": 0.5474841028476638, + "grad_norm": 0.8137237963552042, + "learning_rate": 8.95007728354975e-06, + "loss": 0.3869, + "step": 31684 + }, + { + "epoch": 0.5475013823610727, + "grad_norm": 0.9191364713019365, + "learning_rate": 8.949520728823061e-06, + "loss": 0.3378, + "step": 31685 + }, + { + "epoch": 0.5475186618744816, + "grad_norm": 0.8476128040631389, + "learning_rate": 8.948964177386556e-06, + "loss": 0.4422, + "step": 31686 + }, + { + "epoch": 0.5475359413878905, + "grad_norm": 1.412403572503559, + "learning_rate": 8.94840762924197e-06, + "loss": 0.5797, + "step": 31687 + }, + { + "epoch": 0.5475532209012994, + "grad_norm": 0.7157359663586818, + "learning_rate": 8.947851084391055e-06, + "loss": 0.5964, + "step": 31688 + }, + { + "epoch": 0.5475705004147083, + "grad_norm": 0.9796455718333634, + "learning_rate": 8.947294542835547e-06, + "loss": 0.443, + "step": 31689 + }, + { + "epoch": 0.5475877799281172, + "grad_norm": 1.4418373145953922, + "learning_rate": 8.946738004577196e-06, + "loss": 0.3776, + "step": 31690 + }, + { + "epoch": 0.5476050594415262, + "grad_norm": 0.9374606809786458, + "learning_rate": 8.946181469617738e-06, + "loss": 0.4472, + "step": 31691 + }, + { + "epoch": 0.5476223389549351, + "grad_norm": 0.8362019897102984, + "learning_rate": 8.945624937958924e-06, + "loss": 0.4057, + "step": 31692 + }, + { + "epoch": 0.547639618468344, + "grad_norm": 0.9938944814398951, + "learning_rate": 8.94506840960249e-06, + "loss": 0.431, + "step": 31693 + }, + { + "epoch": 0.5476568979817529, + "grad_norm": 0.8405231921091226, + "learning_rate": 8.944511884550182e-06, + "loss": 0.3623, + "step": 31694 + }, + { + "epoch": 0.5476741774951618, + "grad_norm": 1.4409595293847175, + "learning_rate": 8.943955362803747e-06, + "loss": 0.3222, + "step": 31695 + }, + { + "epoch": 0.5476914570085707, + "grad_norm": 1.5785703550038457, + "learning_rate": 8.943398844364916e-06, + "loss": 0.3246, + "step": 31696 + }, + { + "epoch": 0.5477087365219795, + "grad_norm": 0.8724032272906482, + "learning_rate": 8.942842329235449e-06, + "loss": 0.3381, + "step": 31697 + }, + { + "epoch": 0.5477260160353884, + "grad_norm": 0.8202807918882662, + "learning_rate": 8.942285817417074e-06, + "loss": 0.1979, + "step": 31698 + }, + { + "epoch": 0.5477432955487973, + "grad_norm": 0.9869478808774094, + "learning_rate": 8.941729308911542e-06, + "loss": 0.2867, + "step": 31699 + }, + { + "epoch": 0.5477605750622062, + "grad_norm": 1.0427972438032778, + "learning_rate": 8.941172803720592e-06, + "loss": 0.408, + "step": 31700 + }, + { + "epoch": 0.5477778545756151, + "grad_norm": 1.328936750527561, + "learning_rate": 8.940616301845975e-06, + "loss": 0.4015, + "step": 31701 + }, + { + "epoch": 0.547795134089024, + "grad_norm": 1.0838243337003035, + "learning_rate": 8.940059803289422e-06, + "loss": 0.3602, + "step": 31702 + }, + { + "epoch": 0.5478124136024329, + "grad_norm": 1.306623387148532, + "learning_rate": 8.939503308052687e-06, + "loss": 0.3968, + "step": 31703 + }, + { + "epoch": 0.5478296931158418, + "grad_norm": 0.5567912574247389, + "learning_rate": 8.938946816137504e-06, + "loss": 0.4807, + "step": 31704 + }, + { + "epoch": 0.5478469726292508, + "grad_norm": 0.8222739565968411, + "learning_rate": 8.938390327545625e-06, + "loss": 0.4304, + "step": 31705 + }, + { + "epoch": 0.5478642521426597, + "grad_norm": 1.2241253583562517, + "learning_rate": 8.937833842278786e-06, + "loss": 0.4663, + "step": 31706 + }, + { + "epoch": 0.5478815316560686, + "grad_norm": 0.7683741770496332, + "learning_rate": 8.937277360338733e-06, + "loss": 0.3601, + "step": 31707 + }, + { + "epoch": 0.5478988111694775, + "grad_norm": 1.0232350035763096, + "learning_rate": 8.936720881727205e-06, + "loss": 0.3811, + "step": 31708 + }, + { + "epoch": 0.5479160906828864, + "grad_norm": 1.5821776118359911, + "learning_rate": 8.93616440644595e-06, + "loss": 0.5484, + "step": 31709 + }, + { + "epoch": 0.5479333701962953, + "grad_norm": 1.0213888515631886, + "learning_rate": 8.935607934496708e-06, + "loss": 0.3832, + "step": 31710 + }, + { + "epoch": 0.5479506497097042, + "grad_norm": 0.9886965707379075, + "learning_rate": 8.935051465881222e-06, + "loss": 0.5825, + "step": 31711 + }, + { + "epoch": 0.5479679292231131, + "grad_norm": 1.4057807123597101, + "learning_rate": 8.934495000601241e-06, + "loss": 0.3567, + "step": 31712 + }, + { + "epoch": 0.547985208736522, + "grad_norm": 1.0077030617971796, + "learning_rate": 8.933938538658497e-06, + "loss": 0.3165, + "step": 31713 + }, + { + "epoch": 0.5480024882499309, + "grad_norm": 0.8708791285570774, + "learning_rate": 8.933382080054741e-06, + "loss": 0.4304, + "step": 31714 + }, + { + "epoch": 0.5480197677633398, + "grad_norm": 1.2864513222741918, + "learning_rate": 8.932825624791716e-06, + "loss": 0.2584, + "step": 31715 + }, + { + "epoch": 0.5480370472767487, + "grad_norm": 0.9385133903689705, + "learning_rate": 8.932269172871155e-06, + "loss": 0.594, + "step": 31716 + }, + { + "epoch": 0.5480543267901576, + "grad_norm": 0.46928637566274645, + "learning_rate": 8.931712724294816e-06, + "loss": 0.6328, + "step": 31717 + }, + { + "epoch": 0.5480716063035664, + "grad_norm": 0.9653052281446758, + "learning_rate": 8.931156279064428e-06, + "loss": 0.5038, + "step": 31718 + }, + { + "epoch": 0.5480888858169753, + "grad_norm": 1.010549406945147, + "learning_rate": 8.930599837181743e-06, + "loss": 0.3837, + "step": 31719 + }, + { + "epoch": 0.5481061653303843, + "grad_norm": 1.258647029379197, + "learning_rate": 8.930043398648498e-06, + "loss": 0.2132, + "step": 31720 + }, + { + "epoch": 0.5481234448437932, + "grad_norm": 1.1674116957895875, + "learning_rate": 8.929486963466443e-06, + "loss": 0.5051, + "step": 31721 + }, + { + "epoch": 0.5481407243572021, + "grad_norm": 1.1228101761287455, + "learning_rate": 8.92893053163731e-06, + "loss": 0.5986, + "step": 31722 + }, + { + "epoch": 0.548158003870611, + "grad_norm": 0.751725693255323, + "learning_rate": 8.928374103162853e-06, + "loss": 0.5712, + "step": 31723 + }, + { + "epoch": 0.5481752833840199, + "grad_norm": 1.121586912294568, + "learning_rate": 8.927817678044805e-06, + "loss": 0.4828, + "step": 31724 + }, + { + "epoch": 0.5481925628974288, + "grad_norm": 1.1336694409576678, + "learning_rate": 8.927261256284917e-06, + "loss": 0.3935, + "step": 31725 + }, + { + "epoch": 0.5482098424108377, + "grad_norm": 0.7769450999616431, + "learning_rate": 8.92670483788493e-06, + "loss": 0.4553, + "step": 31726 + }, + { + "epoch": 0.5482271219242466, + "grad_norm": 1.2104504674484486, + "learning_rate": 8.926148422846583e-06, + "loss": 0.3423, + "step": 31727 + }, + { + "epoch": 0.5482444014376555, + "grad_norm": 0.7440282651202507, + "learning_rate": 8.92559201117162e-06, + "loss": 0.7529, + "step": 31728 + }, + { + "epoch": 0.5482616809510644, + "grad_norm": 0.8578937308240585, + "learning_rate": 8.925035602861784e-06, + "loss": 0.4075, + "step": 31729 + }, + { + "epoch": 0.5482789604644733, + "grad_norm": 1.0344351832944705, + "learning_rate": 8.924479197918818e-06, + "loss": 0.3981, + "step": 31730 + }, + { + "epoch": 0.5482962399778822, + "grad_norm": 0.9327294184101024, + "learning_rate": 8.923922796344466e-06, + "loss": 0.2114, + "step": 31731 + }, + { + "epoch": 0.5483135194912911, + "grad_norm": 0.7163134467462445, + "learning_rate": 8.923366398140471e-06, + "loss": 0.5976, + "step": 31732 + }, + { + "epoch": 0.5483307990047, + "grad_norm": 1.1828530191826252, + "learning_rate": 8.922810003308571e-06, + "loss": 0.5184, + "step": 31733 + }, + { + "epoch": 0.548348078518109, + "grad_norm": 1.054675032308492, + "learning_rate": 8.922253611850515e-06, + "loss": 0.5341, + "step": 31734 + }, + { + "epoch": 0.5483653580315179, + "grad_norm": 1.2783953262799348, + "learning_rate": 8.92169722376804e-06, + "loss": 0.335, + "step": 31735 + }, + { + "epoch": 0.5483826375449268, + "grad_norm": 0.8808818965245115, + "learning_rate": 8.921140839062894e-06, + "loss": 0.3499, + "step": 31736 + }, + { + "epoch": 0.5483999170583357, + "grad_norm": 1.000061545108604, + "learning_rate": 8.920584457736817e-06, + "loss": 0.2375, + "step": 31737 + }, + { + "epoch": 0.5484171965717446, + "grad_norm": 0.8267409734776225, + "learning_rate": 8.920028079791548e-06, + "loss": 0.4901, + "step": 31738 + }, + { + "epoch": 0.5484344760851534, + "grad_norm": 0.9847129517485256, + "learning_rate": 8.919471705228836e-06, + "loss": 0.5938, + "step": 31739 + }, + { + "epoch": 0.5484517555985623, + "grad_norm": 1.0319960195193594, + "learning_rate": 8.918915334050417e-06, + "loss": 0.3498, + "step": 31740 + }, + { + "epoch": 0.5484690351119712, + "grad_norm": 1.364953400542991, + "learning_rate": 8.918358966258043e-06, + "loss": 0.5018, + "step": 31741 + }, + { + "epoch": 0.5484863146253801, + "grad_norm": 1.3083241316074363, + "learning_rate": 8.917802601853445e-06, + "loss": 0.2973, + "step": 31742 + }, + { + "epoch": 0.548503594138789, + "grad_norm": 1.1662846820432868, + "learning_rate": 8.917246240838378e-06, + "loss": 0.3774, + "step": 31743 + }, + { + "epoch": 0.5485208736521979, + "grad_norm": 0.5164638558106117, + "learning_rate": 8.916689883214573e-06, + "loss": 0.589, + "step": 31744 + }, + { + "epoch": 0.5485381531656068, + "grad_norm": 0.9929132259360016, + "learning_rate": 8.91613352898378e-06, + "loss": 0.4603, + "step": 31745 + }, + { + "epoch": 0.5485554326790157, + "grad_norm": 1.1359158208756075, + "learning_rate": 8.915577178147741e-06, + "loss": 0.3064, + "step": 31746 + }, + { + "epoch": 0.5485727121924246, + "grad_norm": 1.1969612115780182, + "learning_rate": 8.915020830708191e-06, + "loss": 0.5134, + "step": 31747 + }, + { + "epoch": 0.5485899917058336, + "grad_norm": 0.6891049128919514, + "learning_rate": 8.914464486666884e-06, + "loss": 0.2146, + "step": 31748 + }, + { + "epoch": 0.5486072712192425, + "grad_norm": 1.0683740864028188, + "learning_rate": 8.913908146025552e-06, + "loss": 0.4249, + "step": 31749 + }, + { + "epoch": 0.5486245507326514, + "grad_norm": 1.2772958849787646, + "learning_rate": 8.913351808785945e-06, + "loss": 0.5093, + "step": 31750 + }, + { + "epoch": 0.5486418302460603, + "grad_norm": 0.9768570851581951, + "learning_rate": 8.912795474949798e-06, + "loss": 0.314, + "step": 31751 + }, + { + "epoch": 0.5486591097594692, + "grad_norm": 0.8954417104575482, + "learning_rate": 8.912239144518867e-06, + "loss": 0.4495, + "step": 31752 + }, + { + "epoch": 0.5486763892728781, + "grad_norm": 1.0728128810384387, + "learning_rate": 8.91168281749488e-06, + "loss": 0.2983, + "step": 31753 + }, + { + "epoch": 0.548693668786287, + "grad_norm": 0.9018838728532501, + "learning_rate": 8.911126493879585e-06, + "loss": 0.4506, + "step": 31754 + }, + { + "epoch": 0.5487109482996959, + "grad_norm": 1.67989302824414, + "learning_rate": 8.910570173674724e-06, + "loss": 0.4539, + "step": 31755 + }, + { + "epoch": 0.5487282278131048, + "grad_norm": 1.3862826288116386, + "learning_rate": 8.910013856882043e-06, + "loss": 0.4449, + "step": 31756 + }, + { + "epoch": 0.5487455073265137, + "grad_norm": 0.7658220049407038, + "learning_rate": 8.909457543503282e-06, + "loss": 0.2802, + "step": 31757 + }, + { + "epoch": 0.5487627868399226, + "grad_norm": 1.062481163856312, + "learning_rate": 8.908901233540178e-06, + "loss": 0.506, + "step": 31758 + }, + { + "epoch": 0.5487800663533315, + "grad_norm": 0.7944288077573706, + "learning_rate": 8.908344926994483e-06, + "loss": 0.4197, + "step": 31759 + }, + { + "epoch": 0.5487973458667403, + "grad_norm": 1.3904225867916575, + "learning_rate": 8.907788623867931e-06, + "loss": 0.3797, + "step": 31760 + }, + { + "epoch": 0.5488146253801492, + "grad_norm": 1.1141598340110739, + "learning_rate": 8.907232324162273e-06, + "loss": 0.4155, + "step": 31761 + }, + { + "epoch": 0.5488319048935582, + "grad_norm": 1.1131689917188416, + "learning_rate": 8.906676027879239e-06, + "loss": 0.3271, + "step": 31762 + }, + { + "epoch": 0.5488491844069671, + "grad_norm": 0.8670109390958484, + "learning_rate": 8.906119735020587e-06, + "loss": 0.4544, + "step": 31763 + }, + { + "epoch": 0.548866463920376, + "grad_norm": 1.310383612491639, + "learning_rate": 8.905563445588045e-06, + "loss": 0.5293, + "step": 31764 + }, + { + "epoch": 0.5488837434337849, + "grad_norm": 0.5539720971909442, + "learning_rate": 8.905007159583363e-06, + "loss": 0.6894, + "step": 31765 + }, + { + "epoch": 0.5489010229471938, + "grad_norm": 1.404842008545707, + "learning_rate": 8.904450877008282e-06, + "loss": 0.5379, + "step": 31766 + }, + { + "epoch": 0.5489183024606027, + "grad_norm": 0.6314969002687695, + "learning_rate": 8.903894597864546e-06, + "loss": 0.5856, + "step": 31767 + }, + { + "epoch": 0.5489355819740116, + "grad_norm": 1.3495233191022586, + "learning_rate": 8.903338322153895e-06, + "loss": 0.5329, + "step": 31768 + }, + { + "epoch": 0.5489528614874205, + "grad_norm": 1.6718939419456655, + "learning_rate": 8.902782049878068e-06, + "loss": 0.451, + "step": 31769 + }, + { + "epoch": 0.5489701410008294, + "grad_norm": 0.9487275393888678, + "learning_rate": 8.902225781038814e-06, + "loss": 0.3871, + "step": 31770 + }, + { + "epoch": 0.5489874205142383, + "grad_norm": 0.9122707042585448, + "learning_rate": 8.901669515637871e-06, + "loss": 0.5143, + "step": 31771 + }, + { + "epoch": 0.5490047000276472, + "grad_norm": 0.9121102405243272, + "learning_rate": 8.901113253676987e-06, + "loss": 0.5868, + "step": 31772 + }, + { + "epoch": 0.5490219795410561, + "grad_norm": 0.6786716249850543, + "learning_rate": 8.900556995157894e-06, + "loss": 0.2155, + "step": 31773 + }, + { + "epoch": 0.549039259054465, + "grad_norm": 0.6258242721397136, + "learning_rate": 8.900000740082344e-06, + "loss": 0.3519, + "step": 31774 + }, + { + "epoch": 0.549056538567874, + "grad_norm": 0.9829620819411566, + "learning_rate": 8.899444488452071e-06, + "loss": 0.39, + "step": 31775 + }, + { + "epoch": 0.5490738180812829, + "grad_norm": 0.7548947542232209, + "learning_rate": 8.898888240268823e-06, + "loss": 0.38, + "step": 31776 + }, + { + "epoch": 0.5490910975946918, + "grad_norm": 1.4319497787890885, + "learning_rate": 8.898331995534346e-06, + "loss": 0.3567, + "step": 31777 + }, + { + "epoch": 0.5491083771081007, + "grad_norm": 1.0879958433482357, + "learning_rate": 8.89777575425037e-06, + "loss": 0.4825, + "step": 31778 + }, + { + "epoch": 0.5491256566215096, + "grad_norm": 0.5485887319430798, + "learning_rate": 8.897219516418648e-06, + "loss": 0.609, + "step": 31779 + }, + { + "epoch": 0.5491429361349185, + "grad_norm": 1.134181582359629, + "learning_rate": 8.896663282040915e-06, + "loss": 0.4699, + "step": 31780 + }, + { + "epoch": 0.5491602156483273, + "grad_norm": 0.9582094622639158, + "learning_rate": 8.896107051118921e-06, + "loss": 0.4008, + "step": 31781 + }, + { + "epoch": 0.5491774951617362, + "grad_norm": 0.631981541976944, + "learning_rate": 8.895550823654399e-06, + "loss": 0.7311, + "step": 31782 + }, + { + "epoch": 0.5491947746751451, + "grad_norm": 1.1491545850928748, + "learning_rate": 8.8949945996491e-06, + "loss": 0.3364, + "step": 31783 + }, + { + "epoch": 0.549212054188554, + "grad_norm": 0.6858146639604218, + "learning_rate": 8.894438379104759e-06, + "loss": 0.6716, + "step": 31784 + }, + { + "epoch": 0.5492293337019629, + "grad_norm": 1.4607644028179425, + "learning_rate": 8.89388216202312e-06, + "loss": 0.3605, + "step": 31785 + }, + { + "epoch": 0.5492466132153718, + "grad_norm": 1.490270407380294, + "learning_rate": 8.893325948405927e-06, + "loss": 0.5037, + "step": 31786 + }, + { + "epoch": 0.5492638927287807, + "grad_norm": 1.1993313631513884, + "learning_rate": 8.892769738254926e-06, + "loss": 0.3374, + "step": 31787 + }, + { + "epoch": 0.5492811722421896, + "grad_norm": 1.1116198708869043, + "learning_rate": 8.89221353157185e-06, + "loss": 0.4244, + "step": 31788 + }, + { + "epoch": 0.5492984517555985, + "grad_norm": 0.6570407030374938, + "learning_rate": 8.891657328358445e-06, + "loss": 0.44, + "step": 31789 + }, + { + "epoch": 0.5493157312690075, + "grad_norm": 1.493043032379307, + "learning_rate": 8.891101128616455e-06, + "loss": 0.4761, + "step": 31790 + }, + { + "epoch": 0.5493330107824164, + "grad_norm": 1.2033193560614, + "learning_rate": 8.890544932347618e-06, + "loss": 0.601, + "step": 31791 + }, + { + "epoch": 0.5493502902958253, + "grad_norm": 1.0844627102252535, + "learning_rate": 8.889988739553684e-06, + "loss": 0.3072, + "step": 31792 + }, + { + "epoch": 0.5493675698092342, + "grad_norm": 1.8325115079726777, + "learning_rate": 8.889432550236383e-06, + "loss": 0.4165, + "step": 31793 + }, + { + "epoch": 0.5493848493226431, + "grad_norm": 0.6389805242138785, + "learning_rate": 8.888876364397468e-06, + "loss": 0.6618, + "step": 31794 + }, + { + "epoch": 0.549402128836052, + "grad_norm": 0.8949762006816169, + "learning_rate": 8.888320182038674e-06, + "loss": 0.4683, + "step": 31795 + }, + { + "epoch": 0.5494194083494609, + "grad_norm": 0.8892804012753404, + "learning_rate": 8.887764003161747e-06, + "loss": 0.362, + "step": 31796 + }, + { + "epoch": 0.5494366878628698, + "grad_norm": 0.9814826421017625, + "learning_rate": 8.887207827768427e-06, + "loss": 0.4928, + "step": 31797 + }, + { + "epoch": 0.5494539673762787, + "grad_norm": 1.126840373821977, + "learning_rate": 8.88665165586046e-06, + "loss": 0.4024, + "step": 31798 + }, + { + "epoch": 0.5494712468896876, + "grad_norm": 1.4198725218673238, + "learning_rate": 8.886095487439583e-06, + "loss": 0.4068, + "step": 31799 + }, + { + "epoch": 0.5494885264030965, + "grad_norm": 1.9195149481172986, + "learning_rate": 8.885539322507537e-06, + "loss": 0.3162, + "step": 31800 + }, + { + "epoch": 0.5495058059165054, + "grad_norm": 0.8341637096008656, + "learning_rate": 8.88498316106607e-06, + "loss": 0.364, + "step": 31801 + }, + { + "epoch": 0.5495230854299143, + "grad_norm": 1.6184155946549157, + "learning_rate": 8.884427003116915e-06, + "loss": 0.4316, + "step": 31802 + }, + { + "epoch": 0.5495403649433231, + "grad_norm": 1.0257378773263397, + "learning_rate": 8.883870848661827e-06, + "loss": 0.2873, + "step": 31803 + }, + { + "epoch": 0.549557644456732, + "grad_norm": 1.1992363577494598, + "learning_rate": 8.883314697702535e-06, + "loss": 0.5046, + "step": 31804 + }, + { + "epoch": 0.549574923970141, + "grad_norm": 0.8852644629822912, + "learning_rate": 8.882758550240787e-06, + "loss": 0.3465, + "step": 31805 + }, + { + "epoch": 0.5495922034835499, + "grad_norm": 1.2030699432968288, + "learning_rate": 8.882202406278323e-06, + "loss": 0.4376, + "step": 31806 + }, + { + "epoch": 0.5496094829969588, + "grad_norm": 1.5057580026300097, + "learning_rate": 8.881646265816892e-06, + "loss": 0.3267, + "step": 31807 + }, + { + "epoch": 0.5496267625103677, + "grad_norm": 0.7626965782353999, + "learning_rate": 8.881090128858226e-06, + "loss": 0.2892, + "step": 31808 + }, + { + "epoch": 0.5496440420237766, + "grad_norm": 0.881956973670907, + "learning_rate": 8.880533995404067e-06, + "loss": 0.3593, + "step": 31809 + }, + { + "epoch": 0.5496613215371855, + "grad_norm": 1.1875054739864672, + "learning_rate": 8.879977865456165e-06, + "loss": 0.3197, + "step": 31810 + }, + { + "epoch": 0.5496786010505944, + "grad_norm": 0.9516012642159385, + "learning_rate": 8.879421739016256e-06, + "loss": 0.6717, + "step": 31811 + }, + { + "epoch": 0.5496958805640033, + "grad_norm": 0.7185095513789763, + "learning_rate": 8.878865616086085e-06, + "loss": 0.3426, + "step": 31812 + }, + { + "epoch": 0.5497131600774122, + "grad_norm": 0.871576690974404, + "learning_rate": 8.87830949666739e-06, + "loss": 0.3429, + "step": 31813 + }, + { + "epoch": 0.5497304395908211, + "grad_norm": 0.8191112469708259, + "learning_rate": 8.877753380761916e-06, + "loss": 0.3499, + "step": 31814 + }, + { + "epoch": 0.54974771910423, + "grad_norm": 1.5094571184157877, + "learning_rate": 8.877197268371402e-06, + "loss": 0.4546, + "step": 31815 + }, + { + "epoch": 0.549764998617639, + "grad_norm": 1.235849285035631, + "learning_rate": 8.876641159497593e-06, + "loss": 0.4388, + "step": 31816 + }, + { + "epoch": 0.5497822781310479, + "grad_norm": 1.4209044816869856, + "learning_rate": 8.876085054142228e-06, + "loss": 0.3803, + "step": 31817 + }, + { + "epoch": 0.5497995576444568, + "grad_norm": 0.8318462499259907, + "learning_rate": 8.875528952307054e-06, + "loss": 0.4742, + "step": 31818 + }, + { + "epoch": 0.5498168371578657, + "grad_norm": 0.9688519782412628, + "learning_rate": 8.874972853993807e-06, + "loss": 0.6792, + "step": 31819 + }, + { + "epoch": 0.5498341166712746, + "grad_norm": 1.059892269033023, + "learning_rate": 8.874416759204228e-06, + "loss": 0.4949, + "step": 31820 + }, + { + "epoch": 0.5498513961846835, + "grad_norm": 1.3790873417835112, + "learning_rate": 8.873860667940066e-06, + "loss": 0.5386, + "step": 31821 + }, + { + "epoch": 0.5498686756980924, + "grad_norm": 1.060478138123054, + "learning_rate": 8.873304580203049e-06, + "loss": 0.3525, + "step": 31822 + }, + { + "epoch": 0.5498859552115013, + "grad_norm": 1.2076635258332444, + "learning_rate": 8.872748495994937e-06, + "loss": 0.3496, + "step": 31823 + }, + { + "epoch": 0.5499032347249101, + "grad_norm": 0.7762974582940764, + "learning_rate": 8.872192415317457e-06, + "loss": 0.348, + "step": 31824 + }, + { + "epoch": 0.549920514238319, + "grad_norm": 1.017321251946552, + "learning_rate": 8.871636338172358e-06, + "loss": 0.4387, + "step": 31825 + }, + { + "epoch": 0.5499377937517279, + "grad_norm": 0.8467753199844225, + "learning_rate": 8.871080264561379e-06, + "loss": 0.3922, + "step": 31826 + }, + { + "epoch": 0.5499550732651368, + "grad_norm": 0.8343788092222153, + "learning_rate": 8.870524194486266e-06, + "loss": 0.2079, + "step": 31827 + }, + { + "epoch": 0.5499723527785457, + "grad_norm": 0.928711676101019, + "learning_rate": 8.869968127948749e-06, + "loss": 0.5529, + "step": 31828 + }, + { + "epoch": 0.5499896322919546, + "grad_norm": 1.399720937176026, + "learning_rate": 8.869412064950586e-06, + "loss": 0.3982, + "step": 31829 + }, + { + "epoch": 0.5500069118053635, + "grad_norm": 0.9983718098580762, + "learning_rate": 8.868856005493507e-06, + "loss": 0.3157, + "step": 31830 + }, + { + "epoch": 0.5500241913187724, + "grad_norm": 0.8378555983068326, + "learning_rate": 8.868299949579255e-06, + "loss": 0.556, + "step": 31831 + }, + { + "epoch": 0.5500414708321814, + "grad_norm": 1.076692230296517, + "learning_rate": 8.867743897209579e-06, + "loss": 0.4409, + "step": 31832 + }, + { + "epoch": 0.5500587503455903, + "grad_norm": 0.9638205117366567, + "learning_rate": 8.867187848386211e-06, + "loss": 0.3255, + "step": 31833 + }, + { + "epoch": 0.5500760298589992, + "grad_norm": 1.5345645588689931, + "learning_rate": 8.866631803110898e-06, + "loss": 0.471, + "step": 31834 + }, + { + "epoch": 0.5500933093724081, + "grad_norm": 1.4518769757616643, + "learning_rate": 8.866075761385378e-06, + "loss": 0.3315, + "step": 31835 + }, + { + "epoch": 0.550110588885817, + "grad_norm": 0.8850660535885707, + "learning_rate": 8.865519723211396e-06, + "loss": 0.336, + "step": 31836 + }, + { + "epoch": 0.5501278683992259, + "grad_norm": 0.9802866403276903, + "learning_rate": 8.864963688590692e-06, + "loss": 0.512, + "step": 31837 + }, + { + "epoch": 0.5501451479126348, + "grad_norm": 0.8959166670703221, + "learning_rate": 8.864407657525013e-06, + "loss": 0.4439, + "step": 31838 + }, + { + "epoch": 0.5501624274260437, + "grad_norm": 1.0073782352595193, + "learning_rate": 8.863851630016092e-06, + "loss": 0.3112, + "step": 31839 + }, + { + "epoch": 0.5501797069394526, + "grad_norm": 0.9099416772265733, + "learning_rate": 8.863295606065672e-06, + "loss": 0.4234, + "step": 31840 + }, + { + "epoch": 0.5501969864528615, + "grad_norm": 0.807088519628285, + "learning_rate": 8.862739585675501e-06, + "loss": 0.1975, + "step": 31841 + }, + { + "epoch": 0.5502142659662704, + "grad_norm": 0.6788374586831417, + "learning_rate": 8.862183568847308e-06, + "loss": 0.304, + "step": 31842 + }, + { + "epoch": 0.5502315454796793, + "grad_norm": 1.156592127335565, + "learning_rate": 8.861627555582853e-06, + "loss": 0.7023, + "step": 31843 + }, + { + "epoch": 0.5502488249930882, + "grad_norm": 1.671477832900053, + "learning_rate": 8.861071545883859e-06, + "loss": 0.2825, + "step": 31844 + }, + { + "epoch": 0.550266104506497, + "grad_norm": 0.8347184463678021, + "learning_rate": 8.86051553975208e-06, + "loss": 0.4444, + "step": 31845 + }, + { + "epoch": 0.550283384019906, + "grad_norm": 0.7791451808910808, + "learning_rate": 8.859959537189248e-06, + "loss": 0.2667, + "step": 31846 + }, + { + "epoch": 0.5503006635333149, + "grad_norm": 1.154516264469217, + "learning_rate": 8.859403538197115e-06, + "loss": 0.5545, + "step": 31847 + }, + { + "epoch": 0.5503179430467238, + "grad_norm": 0.7955420467102713, + "learning_rate": 8.85884754277741e-06, + "loss": 0.2466, + "step": 31848 + }, + { + "epoch": 0.5503352225601327, + "grad_norm": 1.1709892575084642, + "learning_rate": 8.85829155093189e-06, + "loss": 0.3932, + "step": 31849 + }, + { + "epoch": 0.5503525020735416, + "grad_norm": 0.784084945927301, + "learning_rate": 8.857735562662285e-06, + "loss": 0.4856, + "step": 31850 + }, + { + "epoch": 0.5503697815869505, + "grad_norm": 0.8790240485385765, + "learning_rate": 8.857179577970334e-06, + "loss": 0.2603, + "step": 31851 + }, + { + "epoch": 0.5503870611003594, + "grad_norm": 1.330101095636781, + "learning_rate": 8.856623596857789e-06, + "loss": 0.5348, + "step": 31852 + }, + { + "epoch": 0.5504043406137683, + "grad_norm": 0.7361171637230156, + "learning_rate": 8.856067619326382e-06, + "loss": 0.3687, + "step": 31853 + }, + { + "epoch": 0.5504216201271772, + "grad_norm": 1.3595512758476338, + "learning_rate": 8.85551164537786e-06, + "loss": 0.3707, + "step": 31854 + }, + { + "epoch": 0.5504388996405861, + "grad_norm": 1.2160323555740113, + "learning_rate": 8.854955675013962e-06, + "loss": 0.2458, + "step": 31855 + }, + { + "epoch": 0.550456179153995, + "grad_norm": 1.49442830536054, + "learning_rate": 8.85439970823643e-06, + "loss": 0.4662, + "step": 31856 + }, + { + "epoch": 0.5504734586674039, + "grad_norm": 1.1186326010587855, + "learning_rate": 8.853843745047003e-06, + "loss": 0.5229, + "step": 31857 + }, + { + "epoch": 0.5504907381808128, + "grad_norm": 1.1926387175569029, + "learning_rate": 8.85328778544743e-06, + "loss": 0.3979, + "step": 31858 + }, + { + "epoch": 0.5505080176942218, + "grad_norm": 0.9131095635467812, + "learning_rate": 8.852731829439443e-06, + "loss": 0.4482, + "step": 31859 + }, + { + "epoch": 0.5505252972076307, + "grad_norm": 1.1443793112023726, + "learning_rate": 8.852175877024786e-06, + "loss": 0.4756, + "step": 31860 + }, + { + "epoch": 0.5505425767210396, + "grad_norm": 1.0177629411045515, + "learning_rate": 8.851619928205208e-06, + "loss": 0.2526, + "step": 31861 + }, + { + "epoch": 0.5505598562344485, + "grad_norm": 0.7885852929662953, + "learning_rate": 8.851063982982434e-06, + "loss": 0.4022, + "step": 31862 + }, + { + "epoch": 0.5505771357478574, + "grad_norm": 1.0525274730084953, + "learning_rate": 8.850508041358223e-06, + "loss": 0.3201, + "step": 31863 + }, + { + "epoch": 0.5505944152612663, + "grad_norm": 0.5821997214720062, + "learning_rate": 8.849952103334301e-06, + "loss": 0.681, + "step": 31864 + }, + { + "epoch": 0.5506116947746752, + "grad_norm": 1.0615998728707927, + "learning_rate": 8.849396168912421e-06, + "loss": 0.3786, + "step": 31865 + }, + { + "epoch": 0.550628974288084, + "grad_norm": 1.0338219659901626, + "learning_rate": 8.84884023809432e-06, + "loss": 0.3756, + "step": 31866 + }, + { + "epoch": 0.5506462538014929, + "grad_norm": 1.0333909529866885, + "learning_rate": 8.848284310881738e-06, + "loss": 0.5691, + "step": 31867 + }, + { + "epoch": 0.5506635333149018, + "grad_norm": 1.0829568639158054, + "learning_rate": 8.847728387276413e-06, + "loss": 0.4412, + "step": 31868 + }, + { + "epoch": 0.5506808128283107, + "grad_norm": 0.9442272714641468, + "learning_rate": 8.847172467280097e-06, + "loss": 0.3348, + "step": 31869 + }, + { + "epoch": 0.5506980923417196, + "grad_norm": 1.130579252111336, + "learning_rate": 8.846616550894523e-06, + "loss": 0.5054, + "step": 31870 + }, + { + "epoch": 0.5507153718551285, + "grad_norm": 0.8915530374583498, + "learning_rate": 8.84606063812143e-06, + "loss": 0.4576, + "step": 31871 + }, + { + "epoch": 0.5507326513685374, + "grad_norm": 1.639674659265109, + "learning_rate": 8.845504728962566e-06, + "loss": 0.3701, + "step": 31872 + }, + { + "epoch": 0.5507499308819463, + "grad_norm": 1.2224357302245374, + "learning_rate": 8.844948823419666e-06, + "loss": 0.6936, + "step": 31873 + }, + { + "epoch": 0.5507672103953553, + "grad_norm": 1.809160008604978, + "learning_rate": 8.844392921494476e-06, + "loss": 0.3695, + "step": 31874 + }, + { + "epoch": 0.5507844899087642, + "grad_norm": 1.4438644914400314, + "learning_rate": 8.843837023188734e-06, + "loss": 0.4375, + "step": 31875 + }, + { + "epoch": 0.5508017694221731, + "grad_norm": 0.7261250151606691, + "learning_rate": 8.843281128504183e-06, + "loss": 0.3655, + "step": 31876 + }, + { + "epoch": 0.550819048935582, + "grad_norm": 0.901222144878052, + "learning_rate": 8.842725237442564e-06, + "loss": 0.3978, + "step": 31877 + }, + { + "epoch": 0.5508363284489909, + "grad_norm": 0.9232999330545676, + "learning_rate": 8.84216935000562e-06, + "loss": 0.3273, + "step": 31878 + }, + { + "epoch": 0.5508536079623998, + "grad_norm": 4.4112976388072695, + "learning_rate": 8.841613466195085e-06, + "loss": 0.6154, + "step": 31879 + }, + { + "epoch": 0.5508708874758087, + "grad_norm": 1.08893561417977, + "learning_rate": 8.841057586012707e-06, + "loss": 0.4483, + "step": 31880 + }, + { + "epoch": 0.5508881669892176, + "grad_norm": 1.0151056597348769, + "learning_rate": 8.840501709460227e-06, + "loss": 0.3945, + "step": 31881 + }, + { + "epoch": 0.5509054465026265, + "grad_norm": 1.2122917482261877, + "learning_rate": 8.839945836539376e-06, + "loss": 0.5707, + "step": 31882 + }, + { + "epoch": 0.5509227260160354, + "grad_norm": 0.8550423202935912, + "learning_rate": 8.839389967251912e-06, + "loss": 0.3801, + "step": 31883 + }, + { + "epoch": 0.5509400055294443, + "grad_norm": 0.7735935306303806, + "learning_rate": 8.838834101599561e-06, + "loss": 0.3533, + "step": 31884 + }, + { + "epoch": 0.5509572850428532, + "grad_norm": 1.6346879662934168, + "learning_rate": 8.83827823958407e-06, + "loss": 0.5019, + "step": 31885 + }, + { + "epoch": 0.5509745645562621, + "grad_norm": 0.7647886625548483, + "learning_rate": 8.837722381207182e-06, + "loss": 0.6379, + "step": 31886 + }, + { + "epoch": 0.5509918440696709, + "grad_norm": 0.9775932860610758, + "learning_rate": 8.837166526470636e-06, + "loss": 0.5306, + "step": 31887 + }, + { + "epoch": 0.5510091235830799, + "grad_norm": 0.9441165027613059, + "learning_rate": 8.836610675376167e-06, + "loss": 0.3012, + "step": 31888 + }, + { + "epoch": 0.5510264030964888, + "grad_norm": 1.1342829083031694, + "learning_rate": 8.836054827925531e-06, + "loss": 0.2679, + "step": 31889 + }, + { + "epoch": 0.5510436826098977, + "grad_norm": 0.5737141855759244, + "learning_rate": 8.835498984120456e-06, + "loss": 0.2795, + "step": 31890 + }, + { + "epoch": 0.5510609621233066, + "grad_norm": 1.085344406211329, + "learning_rate": 8.834943143962684e-06, + "loss": 0.4237, + "step": 31891 + }, + { + "epoch": 0.5510782416367155, + "grad_norm": 1.1659668564382348, + "learning_rate": 8.834387307453965e-06, + "loss": 0.3705, + "step": 31892 + }, + { + "epoch": 0.5510955211501244, + "grad_norm": 0.9840670514448953, + "learning_rate": 8.833831474596026e-06, + "loss": 0.4199, + "step": 31893 + }, + { + "epoch": 0.5511128006635333, + "grad_norm": 0.8106894026250654, + "learning_rate": 8.83327564539062e-06, + "loss": 0.504, + "step": 31894 + }, + { + "epoch": 0.5511300801769422, + "grad_norm": 0.7944193978978451, + "learning_rate": 8.832719819839481e-06, + "loss": 0.2356, + "step": 31895 + }, + { + "epoch": 0.5511473596903511, + "grad_norm": 1.0107867359512546, + "learning_rate": 8.832163997944352e-06, + "loss": 0.4382, + "step": 31896 + }, + { + "epoch": 0.55116463920376, + "grad_norm": 1.4741095225139962, + "learning_rate": 8.831608179706975e-06, + "loss": 0.4157, + "step": 31897 + }, + { + "epoch": 0.5511819187171689, + "grad_norm": 1.3746272661928116, + "learning_rate": 8.831052365129093e-06, + "loss": 0.3607, + "step": 31898 + }, + { + "epoch": 0.5511991982305778, + "grad_norm": 1.2027331231249792, + "learning_rate": 8.83049655421244e-06, + "loss": 0.3737, + "step": 31899 + }, + { + "epoch": 0.5512164777439867, + "grad_norm": 0.8530089112005641, + "learning_rate": 8.829940746958762e-06, + "loss": 0.2326, + "step": 31900 + }, + { + "epoch": 0.5512337572573957, + "grad_norm": 0.7083274912658227, + "learning_rate": 8.8293849433698e-06, + "loss": 0.6609, + "step": 31901 + }, + { + "epoch": 0.5512510367708046, + "grad_norm": 1.374096252070226, + "learning_rate": 8.828829143447288e-06, + "loss": 0.4932, + "step": 31902 + }, + { + "epoch": 0.5512683162842135, + "grad_norm": 0.8389048652901857, + "learning_rate": 8.82827334719298e-06, + "loss": 0.3438, + "step": 31903 + }, + { + "epoch": 0.5512855957976224, + "grad_norm": 1.5870137806383204, + "learning_rate": 8.827717554608602e-06, + "loss": 0.4652, + "step": 31904 + }, + { + "epoch": 0.5513028753110313, + "grad_norm": 0.9040813481365767, + "learning_rate": 8.827161765695905e-06, + "loss": 0.3995, + "step": 31905 + }, + { + "epoch": 0.5513201548244402, + "grad_norm": 1.301696347143152, + "learning_rate": 8.826605980456623e-06, + "loss": 0.4537, + "step": 31906 + }, + { + "epoch": 0.5513374343378491, + "grad_norm": 1.0783043304265754, + "learning_rate": 8.826050198892508e-06, + "loss": 0.3283, + "step": 31907 + }, + { + "epoch": 0.5513547138512579, + "grad_norm": 0.9905747202104344, + "learning_rate": 8.825494421005284e-06, + "loss": 0.4102, + "step": 31908 + }, + { + "epoch": 0.5513719933646668, + "grad_norm": 0.8649409139819821, + "learning_rate": 8.824938646796708e-06, + "loss": 0.4137, + "step": 31909 + }, + { + "epoch": 0.5513892728780757, + "grad_norm": 0.9599411874530643, + "learning_rate": 8.824382876268508e-06, + "loss": 0.3999, + "step": 31910 + }, + { + "epoch": 0.5514065523914846, + "grad_norm": 1.0223002943519872, + "learning_rate": 8.823827109422434e-06, + "loss": 0.4163, + "step": 31911 + }, + { + "epoch": 0.5514238319048935, + "grad_norm": 0.7571575629162062, + "learning_rate": 8.823271346260225e-06, + "loss": 0.3327, + "step": 31912 + }, + { + "epoch": 0.5514411114183024, + "grad_norm": 1.4633312361784705, + "learning_rate": 8.822715586783613e-06, + "loss": 0.49, + "step": 31913 + }, + { + "epoch": 0.5514583909317113, + "grad_norm": 1.5374625270645346, + "learning_rate": 8.822159830994349e-06, + "loss": 0.3232, + "step": 31914 + }, + { + "epoch": 0.5514756704451202, + "grad_norm": 1.6522642469144586, + "learning_rate": 8.821604078894166e-06, + "loss": 0.3681, + "step": 31915 + }, + { + "epoch": 0.5514929499585292, + "grad_norm": 1.5219409984564918, + "learning_rate": 8.821048330484814e-06, + "loss": 0.2502, + "step": 31916 + }, + { + "epoch": 0.5515102294719381, + "grad_norm": 0.9694018337814527, + "learning_rate": 8.820492585768023e-06, + "loss": 0.3702, + "step": 31917 + }, + { + "epoch": 0.551527508985347, + "grad_norm": 0.8530606268729205, + "learning_rate": 8.819936844745545e-06, + "loss": 0.4845, + "step": 31918 + }, + { + "epoch": 0.5515447884987559, + "grad_norm": 0.9530401782519666, + "learning_rate": 8.81938110741911e-06, + "loss": 0.4186, + "step": 31919 + }, + { + "epoch": 0.5515620680121648, + "grad_norm": 1.3591845600396766, + "learning_rate": 8.818825373790465e-06, + "loss": 0.4578, + "step": 31920 + }, + { + "epoch": 0.5515793475255737, + "grad_norm": 1.3905064236102078, + "learning_rate": 8.81826964386135e-06, + "loss": 0.5435, + "step": 31921 + }, + { + "epoch": 0.5515966270389826, + "grad_norm": 1.1702652465359524, + "learning_rate": 8.817713917633498e-06, + "loss": 0.311, + "step": 31922 + }, + { + "epoch": 0.5516139065523915, + "grad_norm": 1.134431953967947, + "learning_rate": 8.817158195108663e-06, + "loss": 0.3933, + "step": 31923 + }, + { + "epoch": 0.5516311860658004, + "grad_norm": 0.7181804129915365, + "learning_rate": 8.816602476288574e-06, + "loss": 0.783, + "step": 31924 + }, + { + "epoch": 0.5516484655792093, + "grad_norm": 1.0036313713299412, + "learning_rate": 8.81604676117498e-06, + "loss": 0.3161, + "step": 31925 + }, + { + "epoch": 0.5516657450926182, + "grad_norm": 0.9016979359958555, + "learning_rate": 8.815491049769614e-06, + "loss": 0.3348, + "step": 31926 + }, + { + "epoch": 0.5516830246060271, + "grad_norm": 1.400081783761311, + "learning_rate": 8.814935342074224e-06, + "loss": 0.4046, + "step": 31927 + }, + { + "epoch": 0.551700304119436, + "grad_norm": 1.4224482999452335, + "learning_rate": 8.814379638090538e-06, + "loss": 0.3999, + "step": 31928 + }, + { + "epoch": 0.551717583632845, + "grad_norm": 1.1162828412263843, + "learning_rate": 8.813823937820315e-06, + "loss": 0.3908, + "step": 31929 + }, + { + "epoch": 0.5517348631462538, + "grad_norm": 1.9010540556690154, + "learning_rate": 8.81326824126528e-06, + "loss": 0.4953, + "step": 31930 + }, + { + "epoch": 0.5517521426596627, + "grad_norm": 0.8209854455043236, + "learning_rate": 8.81271254842718e-06, + "loss": 0.2777, + "step": 31931 + }, + { + "epoch": 0.5517694221730716, + "grad_norm": 1.1455769182247153, + "learning_rate": 8.812156859307757e-06, + "loss": 0.4313, + "step": 31932 + }, + { + "epoch": 0.5517867016864805, + "grad_norm": 0.5300804327803742, + "learning_rate": 8.811601173908746e-06, + "loss": 0.6536, + "step": 31933 + }, + { + "epoch": 0.5518039811998894, + "grad_norm": 1.108403797206303, + "learning_rate": 8.811045492231892e-06, + "loss": 0.2826, + "step": 31934 + }, + { + "epoch": 0.5518212607132983, + "grad_norm": 1.244419127909295, + "learning_rate": 8.810489814278932e-06, + "loss": 0.3795, + "step": 31935 + }, + { + "epoch": 0.5518385402267072, + "grad_norm": 1.2129183859886636, + "learning_rate": 8.80993414005161e-06, + "loss": 0.4965, + "step": 31936 + }, + { + "epoch": 0.5518558197401161, + "grad_norm": 0.7964994915322731, + "learning_rate": 8.809378469551663e-06, + "loss": 0.2203, + "step": 31937 + }, + { + "epoch": 0.551873099253525, + "grad_norm": 0.6057923213461202, + "learning_rate": 8.808822802780837e-06, + "loss": 0.6564, + "step": 31938 + }, + { + "epoch": 0.5518903787669339, + "grad_norm": 0.933961523225535, + "learning_rate": 8.808267139740864e-06, + "loss": 0.3321, + "step": 31939 + }, + { + "epoch": 0.5519076582803428, + "grad_norm": 1.5010323352583315, + "learning_rate": 8.807711480433492e-06, + "loss": 0.3625, + "step": 31940 + }, + { + "epoch": 0.5519249377937517, + "grad_norm": 1.2322514022823947, + "learning_rate": 8.807155824860457e-06, + "loss": 0.4806, + "step": 31941 + }, + { + "epoch": 0.5519422173071606, + "grad_norm": 0.9580527427280593, + "learning_rate": 8.8066001730235e-06, + "loss": 0.3482, + "step": 31942 + }, + { + "epoch": 0.5519594968205696, + "grad_norm": 0.7008014187824896, + "learning_rate": 8.806044524924365e-06, + "loss": 0.3315, + "step": 31943 + }, + { + "epoch": 0.5519767763339785, + "grad_norm": 0.8914887282935642, + "learning_rate": 8.805488880564786e-06, + "loss": 0.3779, + "step": 31944 + }, + { + "epoch": 0.5519940558473874, + "grad_norm": 1.8956610748278362, + "learning_rate": 8.804933239946507e-06, + "loss": 0.3813, + "step": 31945 + }, + { + "epoch": 0.5520113353607963, + "grad_norm": 1.0625705433747166, + "learning_rate": 8.804377603071269e-06, + "loss": 0.419, + "step": 31946 + }, + { + "epoch": 0.5520286148742052, + "grad_norm": 0.8600164186870013, + "learning_rate": 8.803821969940812e-06, + "loss": 0.4199, + "step": 31947 + }, + { + "epoch": 0.5520458943876141, + "grad_norm": 1.0151329801335496, + "learning_rate": 8.80326634055687e-06, + "loss": 0.4787, + "step": 31948 + }, + { + "epoch": 0.552063173901023, + "grad_norm": 1.2436530912354002, + "learning_rate": 8.802710714921197e-06, + "loss": 0.5233, + "step": 31949 + }, + { + "epoch": 0.5520804534144319, + "grad_norm": 1.071140156960982, + "learning_rate": 8.802155093035518e-06, + "loss": 0.4696, + "step": 31950 + }, + { + "epoch": 0.5520977329278407, + "grad_norm": 0.7568983944985308, + "learning_rate": 8.801599474901585e-06, + "loss": 0.5025, + "step": 31951 + }, + { + "epoch": 0.5521150124412496, + "grad_norm": 0.9969996929187119, + "learning_rate": 8.801043860521134e-06, + "loss": 0.3549, + "step": 31952 + }, + { + "epoch": 0.5521322919546585, + "grad_norm": 0.6739000921995214, + "learning_rate": 8.8004882498959e-06, + "loss": 0.3018, + "step": 31953 + }, + { + "epoch": 0.5521495714680674, + "grad_norm": 1.1304647267395034, + "learning_rate": 8.799932643027631e-06, + "loss": 0.6191, + "step": 31954 + }, + { + "epoch": 0.5521668509814763, + "grad_norm": 0.9118416644384842, + "learning_rate": 8.799377039918062e-06, + "loss": 0.2847, + "step": 31955 + }, + { + "epoch": 0.5521841304948852, + "grad_norm": 1.3779065659647236, + "learning_rate": 8.79882144056894e-06, + "loss": 0.4064, + "step": 31956 + }, + { + "epoch": 0.5522014100082941, + "grad_norm": 1.1555882645447466, + "learning_rate": 8.798265844981995e-06, + "loss": 0.3906, + "step": 31957 + }, + { + "epoch": 0.552218689521703, + "grad_norm": 1.4378772413923098, + "learning_rate": 8.797710253158977e-06, + "loss": 0.4754, + "step": 31958 + }, + { + "epoch": 0.552235969035112, + "grad_norm": 0.7577478766465945, + "learning_rate": 8.797154665101619e-06, + "loss": 0.4728, + "step": 31959 + }, + { + "epoch": 0.5522532485485209, + "grad_norm": 1.1794565418040828, + "learning_rate": 8.796599080811665e-06, + "loss": 0.3883, + "step": 31960 + }, + { + "epoch": 0.5522705280619298, + "grad_norm": 0.9565601259731766, + "learning_rate": 8.796043500290853e-06, + "loss": 0.3111, + "step": 31961 + }, + { + "epoch": 0.5522878075753387, + "grad_norm": 0.7639418254471637, + "learning_rate": 8.795487923540926e-06, + "loss": 0.22, + "step": 31962 + }, + { + "epoch": 0.5523050870887476, + "grad_norm": 1.1224800078333286, + "learning_rate": 8.794932350563623e-06, + "loss": 0.5565, + "step": 31963 + }, + { + "epoch": 0.5523223666021565, + "grad_norm": 0.9978849291431157, + "learning_rate": 8.794376781360681e-06, + "loss": 0.3974, + "step": 31964 + }, + { + "epoch": 0.5523396461155654, + "grad_norm": 0.9629310145184925, + "learning_rate": 8.793821215933846e-06, + "loss": 0.4044, + "step": 31965 + }, + { + "epoch": 0.5523569256289743, + "grad_norm": 0.8566086308626809, + "learning_rate": 8.79326565428485e-06, + "loss": 0.4655, + "step": 31966 + }, + { + "epoch": 0.5523742051423832, + "grad_norm": 1.1963067925431046, + "learning_rate": 8.792710096415443e-06, + "loss": 0.4666, + "step": 31967 + }, + { + "epoch": 0.5523914846557921, + "grad_norm": 0.7797789589134126, + "learning_rate": 8.792154542327354e-06, + "loss": 0.4883, + "step": 31968 + }, + { + "epoch": 0.552408764169201, + "grad_norm": 1.9065734630886626, + "learning_rate": 8.791598992022335e-06, + "loss": 0.404, + "step": 31969 + }, + { + "epoch": 0.55242604368261, + "grad_norm": 1.012995945075335, + "learning_rate": 8.791043445502114e-06, + "loss": 0.2901, + "step": 31970 + }, + { + "epoch": 0.5524433231960189, + "grad_norm": 0.8565087866420531, + "learning_rate": 8.79048790276844e-06, + "loss": 0.3703, + "step": 31971 + }, + { + "epoch": 0.5524606027094277, + "grad_norm": 1.052997319665443, + "learning_rate": 8.789932363823048e-06, + "loss": 0.3044, + "step": 31972 + }, + { + "epoch": 0.5524778822228366, + "grad_norm": 1.181456400406037, + "learning_rate": 8.789376828667682e-06, + "loss": 0.4263, + "step": 31973 + }, + { + "epoch": 0.5524951617362455, + "grad_norm": 0.8201297683318512, + "learning_rate": 8.78882129730408e-06, + "loss": 0.2924, + "step": 31974 + }, + { + "epoch": 0.5525124412496544, + "grad_norm": 0.8712260155898807, + "learning_rate": 8.788265769733979e-06, + "loss": 0.4428, + "step": 31975 + }, + { + "epoch": 0.5525297207630633, + "grad_norm": 0.9485025549865678, + "learning_rate": 8.787710245959123e-06, + "loss": 0.4855, + "step": 31976 + }, + { + "epoch": 0.5525470002764722, + "grad_norm": 1.1735709312140008, + "learning_rate": 8.78715472598125e-06, + "loss": 0.3599, + "step": 31977 + }, + { + "epoch": 0.5525642797898811, + "grad_norm": 0.9954518121255811, + "learning_rate": 8.786599209802103e-06, + "loss": 0.3729, + "step": 31978 + }, + { + "epoch": 0.55258155930329, + "grad_norm": 1.1200823812244116, + "learning_rate": 8.786043697423416e-06, + "loss": 0.523, + "step": 31979 + }, + { + "epoch": 0.5525988388166989, + "grad_norm": 0.7834625588875244, + "learning_rate": 8.785488188846934e-06, + "loss": 0.431, + "step": 31980 + }, + { + "epoch": 0.5526161183301078, + "grad_norm": 0.5064068572564581, + "learning_rate": 8.784932684074395e-06, + "loss": 0.5305, + "step": 31981 + }, + { + "epoch": 0.5526333978435167, + "grad_norm": 1.5390593105552526, + "learning_rate": 8.78437718310754e-06, + "loss": 0.5124, + "step": 31982 + }, + { + "epoch": 0.5526506773569256, + "grad_norm": 1.2810039022215707, + "learning_rate": 8.78382168594811e-06, + "loss": 0.4564, + "step": 31983 + }, + { + "epoch": 0.5526679568703345, + "grad_norm": 1.0395828459083136, + "learning_rate": 8.78326619259784e-06, + "loss": 0.285, + "step": 31984 + }, + { + "epoch": 0.5526852363837435, + "grad_norm": 1.0458453581874625, + "learning_rate": 8.782710703058473e-06, + "loss": 0.4096, + "step": 31985 + }, + { + "epoch": 0.5527025158971524, + "grad_norm": 0.7121444595898125, + "learning_rate": 8.782155217331746e-06, + "loss": 0.3857, + "step": 31986 + }, + { + "epoch": 0.5527197954105613, + "grad_norm": 0.9945994826216411, + "learning_rate": 8.781599735419409e-06, + "loss": 0.5344, + "step": 31987 + }, + { + "epoch": 0.5527370749239702, + "grad_norm": 1.0086055789706652, + "learning_rate": 8.781044257323186e-06, + "loss": 0.3396, + "step": 31988 + }, + { + "epoch": 0.5527543544373791, + "grad_norm": 1.1333739664787552, + "learning_rate": 8.78048878304483e-06, + "loss": 0.3548, + "step": 31989 + }, + { + "epoch": 0.552771633950788, + "grad_norm": 0.7644288930273951, + "learning_rate": 8.779933312586073e-06, + "loss": 0.3347, + "step": 31990 + }, + { + "epoch": 0.5527889134641969, + "grad_norm": 0.7777193445728493, + "learning_rate": 8.77937784594866e-06, + "loss": 0.4669, + "step": 31991 + }, + { + "epoch": 0.5528061929776058, + "grad_norm": 0.7287131861980897, + "learning_rate": 8.778822383134325e-06, + "loss": 0.4484, + "step": 31992 + }, + { + "epoch": 0.5528234724910146, + "grad_norm": 1.4261370362514099, + "learning_rate": 8.778266924144817e-06, + "loss": 0.3909, + "step": 31993 + }, + { + "epoch": 0.5528407520044235, + "grad_norm": 0.6765462925743102, + "learning_rate": 8.777711468981867e-06, + "loss": 0.5301, + "step": 31994 + }, + { + "epoch": 0.5528580315178324, + "grad_norm": 0.9218310687113658, + "learning_rate": 8.777156017647214e-06, + "loss": 0.5549, + "step": 31995 + }, + { + "epoch": 0.5528753110312413, + "grad_norm": 1.6539728669640266, + "learning_rate": 8.776600570142607e-06, + "loss": 0.3212, + "step": 31996 + }, + { + "epoch": 0.5528925905446502, + "grad_norm": 1.1285917840367305, + "learning_rate": 8.776045126469775e-06, + "loss": 0.5768, + "step": 31997 + }, + { + "epoch": 0.5529098700580591, + "grad_norm": 0.9756129103349942, + "learning_rate": 8.775489686630468e-06, + "loss": 0.4797, + "step": 31998 + }, + { + "epoch": 0.552927149571468, + "grad_norm": 0.8757626521415599, + "learning_rate": 8.774934250626415e-06, + "loss": 0.433, + "step": 31999 + }, + { + "epoch": 0.552944429084877, + "grad_norm": 0.9961888129946324, + "learning_rate": 8.774378818459363e-06, + "loss": 0.3333, + "step": 32000 + }, + { + "epoch": 0.5529617085982859, + "grad_norm": 1.3081889821515984, + "learning_rate": 8.773823390131048e-06, + "loss": 0.3689, + "step": 32001 + }, + { + "epoch": 0.5529789881116948, + "grad_norm": 1.4267806995236603, + "learning_rate": 8.773267965643214e-06, + "loss": 0.412, + "step": 32002 + }, + { + "epoch": 0.5529962676251037, + "grad_norm": 1.404387001829893, + "learning_rate": 8.772712544997596e-06, + "loss": 0.4639, + "step": 32003 + }, + { + "epoch": 0.5530135471385126, + "grad_norm": 1.0651584356372452, + "learning_rate": 8.772157128195939e-06, + "loss": 0.3984, + "step": 32004 + }, + { + "epoch": 0.5530308266519215, + "grad_norm": 0.9780484531259053, + "learning_rate": 8.771601715239978e-06, + "loss": 0.4178, + "step": 32005 + }, + { + "epoch": 0.5530481061653304, + "grad_norm": 1.0385250909436883, + "learning_rate": 8.771046306131451e-06, + "loss": 0.3639, + "step": 32006 + }, + { + "epoch": 0.5530653856787393, + "grad_norm": 1.055046996561279, + "learning_rate": 8.770490900872106e-06, + "loss": 0.522, + "step": 32007 + }, + { + "epoch": 0.5530826651921482, + "grad_norm": 1.0928703983024253, + "learning_rate": 8.769935499463668e-06, + "loss": 0.2996, + "step": 32008 + }, + { + "epoch": 0.5530999447055571, + "grad_norm": 0.49777708954616146, + "learning_rate": 8.769380101907892e-06, + "loss": 0.5141, + "step": 32009 + }, + { + "epoch": 0.553117224218966, + "grad_norm": 1.0462242284547654, + "learning_rate": 8.76882470820651e-06, + "loss": 0.6448, + "step": 32010 + }, + { + "epoch": 0.5531345037323749, + "grad_norm": 1.6947428930011197, + "learning_rate": 8.768269318361262e-06, + "loss": 0.5182, + "step": 32011 + }, + { + "epoch": 0.5531517832457838, + "grad_norm": 1.165610639816425, + "learning_rate": 8.767713932373886e-06, + "loss": 0.4398, + "step": 32012 + }, + { + "epoch": 0.5531690627591928, + "grad_norm": 1.152972652046459, + "learning_rate": 8.76715855024613e-06, + "loss": 0.3655, + "step": 32013 + }, + { + "epoch": 0.5531863422726016, + "grad_norm": 0.7046833674598572, + "learning_rate": 8.766603171979723e-06, + "loss": 0.5654, + "step": 32014 + }, + { + "epoch": 0.5532036217860105, + "grad_norm": 1.3291816905824716, + "learning_rate": 8.766047797576407e-06, + "loss": 0.3577, + "step": 32015 + }, + { + "epoch": 0.5532209012994194, + "grad_norm": 1.1407685725052354, + "learning_rate": 8.765492427037924e-06, + "loss": 0.6859, + "step": 32016 + }, + { + "epoch": 0.5532381808128283, + "grad_norm": 1.4521602025641855, + "learning_rate": 8.764937060366013e-06, + "loss": 0.6357, + "step": 32017 + }, + { + "epoch": 0.5532554603262372, + "grad_norm": 1.4007606021375791, + "learning_rate": 8.764381697562415e-06, + "loss": 0.6256, + "step": 32018 + }, + { + "epoch": 0.5532727398396461, + "grad_norm": 1.1367305616281864, + "learning_rate": 8.763826338628865e-06, + "loss": 0.4761, + "step": 32019 + }, + { + "epoch": 0.553290019353055, + "grad_norm": 1.0754989022861017, + "learning_rate": 8.763270983567105e-06, + "loss": 0.501, + "step": 32020 + }, + { + "epoch": 0.5533072988664639, + "grad_norm": 1.2117735152557085, + "learning_rate": 8.762715632378874e-06, + "loss": 0.5035, + "step": 32021 + }, + { + "epoch": 0.5533245783798728, + "grad_norm": 1.3020309067295273, + "learning_rate": 8.762160285065912e-06, + "loss": 0.5144, + "step": 32022 + }, + { + "epoch": 0.5533418578932817, + "grad_norm": 1.8198626066184789, + "learning_rate": 8.761604941629958e-06, + "loss": 0.4935, + "step": 32023 + }, + { + "epoch": 0.5533591374066906, + "grad_norm": 1.7400573542148208, + "learning_rate": 8.761049602072752e-06, + "loss": 0.3808, + "step": 32024 + }, + { + "epoch": 0.5533764169200995, + "grad_norm": 0.8730011292381054, + "learning_rate": 8.760494266396035e-06, + "loss": 0.3012, + "step": 32025 + }, + { + "epoch": 0.5533936964335084, + "grad_norm": 1.2823546676721276, + "learning_rate": 8.75993893460154e-06, + "loss": 0.3253, + "step": 32026 + }, + { + "epoch": 0.5534109759469173, + "grad_norm": 1.347928256341756, + "learning_rate": 8.759383606691016e-06, + "loss": 0.3876, + "step": 32027 + }, + { + "epoch": 0.5534282554603263, + "grad_norm": 1.3950483546082002, + "learning_rate": 8.758828282666189e-06, + "loss": 0.4299, + "step": 32028 + }, + { + "epoch": 0.5534455349737352, + "grad_norm": 1.2275983862755047, + "learning_rate": 8.758272962528812e-06, + "loss": 0.5129, + "step": 32029 + }, + { + "epoch": 0.5534628144871441, + "grad_norm": 0.8049070408382922, + "learning_rate": 8.757717646280615e-06, + "loss": 0.2907, + "step": 32030 + }, + { + "epoch": 0.553480094000553, + "grad_norm": 0.648204110674426, + "learning_rate": 8.757162333923343e-06, + "loss": 0.3259, + "step": 32031 + }, + { + "epoch": 0.5534973735139619, + "grad_norm": 0.909471430441491, + "learning_rate": 8.756607025458732e-06, + "loss": 0.4556, + "step": 32032 + }, + { + "epoch": 0.5535146530273708, + "grad_norm": 0.7910708343418112, + "learning_rate": 8.756051720888525e-06, + "loss": 0.2897, + "step": 32033 + }, + { + "epoch": 0.5535319325407797, + "grad_norm": 0.9458033331964271, + "learning_rate": 8.755496420214455e-06, + "loss": 0.4244, + "step": 32034 + }, + { + "epoch": 0.5535492120541885, + "grad_norm": 0.9286073375795924, + "learning_rate": 8.754941123438266e-06, + "loss": 0.4475, + "step": 32035 + }, + { + "epoch": 0.5535664915675974, + "grad_norm": 0.7376656558862092, + "learning_rate": 8.754385830561698e-06, + "loss": 0.3769, + "step": 32036 + }, + { + "epoch": 0.5535837710810063, + "grad_norm": 1.516577584795316, + "learning_rate": 8.753830541586484e-06, + "loss": 0.2842, + "step": 32037 + }, + { + "epoch": 0.5536010505944152, + "grad_norm": 0.8221353762717286, + "learning_rate": 8.753275256514373e-06, + "loss": 0.31, + "step": 32038 + }, + { + "epoch": 0.5536183301078241, + "grad_norm": 0.7656955529835118, + "learning_rate": 8.752719975347094e-06, + "loss": 0.3589, + "step": 32039 + }, + { + "epoch": 0.553635609621233, + "grad_norm": 1.01758429783179, + "learning_rate": 8.752164698086393e-06, + "loss": 0.5359, + "step": 32040 + }, + { + "epoch": 0.553652889134642, + "grad_norm": 0.861803002774534, + "learning_rate": 8.751609424734005e-06, + "loss": 0.491, + "step": 32041 + }, + { + "epoch": 0.5536701686480509, + "grad_norm": 1.0296154811051903, + "learning_rate": 8.751054155291672e-06, + "loss": 0.3642, + "step": 32042 + }, + { + "epoch": 0.5536874481614598, + "grad_norm": 1.5974921335371133, + "learning_rate": 8.750498889761133e-06, + "loss": 0.6755, + "step": 32043 + }, + { + "epoch": 0.5537047276748687, + "grad_norm": 1.0672949420476165, + "learning_rate": 8.749943628144128e-06, + "loss": 0.284, + "step": 32044 + }, + { + "epoch": 0.5537220071882776, + "grad_norm": 1.163165845632089, + "learning_rate": 8.749388370442394e-06, + "loss": 0.5396, + "step": 32045 + }, + { + "epoch": 0.5537392867016865, + "grad_norm": 1.2439234383388886, + "learning_rate": 8.748833116657667e-06, + "loss": 0.3098, + "step": 32046 + }, + { + "epoch": 0.5537565662150954, + "grad_norm": 1.0836672560945657, + "learning_rate": 8.748277866791694e-06, + "loss": 0.4357, + "step": 32047 + }, + { + "epoch": 0.5537738457285043, + "grad_norm": 1.4046278462075303, + "learning_rate": 8.747722620846205e-06, + "loss": 0.3964, + "step": 32048 + }, + { + "epoch": 0.5537911252419132, + "grad_norm": 0.8694270697824459, + "learning_rate": 8.74716737882295e-06, + "loss": 0.4234, + "step": 32049 + }, + { + "epoch": 0.5538084047553221, + "grad_norm": 0.6696704333917931, + "learning_rate": 8.746612140723656e-06, + "loss": 0.293, + "step": 32050 + }, + { + "epoch": 0.553825684268731, + "grad_norm": 1.1449990839445419, + "learning_rate": 8.746056906550074e-06, + "loss": 0.3521, + "step": 32051 + }, + { + "epoch": 0.5538429637821399, + "grad_norm": 1.0437405410459617, + "learning_rate": 8.745501676303932e-06, + "loss": 0.5147, + "step": 32052 + }, + { + "epoch": 0.5538602432955488, + "grad_norm": 1.0500806353596595, + "learning_rate": 8.744946449986979e-06, + "loss": 0.5649, + "step": 32053 + }, + { + "epoch": 0.5538775228089577, + "grad_norm": 0.9563927108442202, + "learning_rate": 8.744391227600944e-06, + "loss": 0.5225, + "step": 32054 + }, + { + "epoch": 0.5538948023223667, + "grad_norm": 1.157008898708908, + "learning_rate": 8.743836009147574e-06, + "loss": 0.4181, + "step": 32055 + }, + { + "epoch": 0.5539120818357754, + "grad_norm": 1.0942729386765166, + "learning_rate": 8.743280794628605e-06, + "loss": 0.5079, + "step": 32056 + }, + { + "epoch": 0.5539293613491844, + "grad_norm": 0.8143943857045305, + "learning_rate": 8.742725584045773e-06, + "loss": 0.3688, + "step": 32057 + }, + { + "epoch": 0.5539466408625933, + "grad_norm": 1.0685805526709236, + "learning_rate": 8.742170377400826e-06, + "loss": 0.333, + "step": 32058 + }, + { + "epoch": 0.5539639203760022, + "grad_norm": 1.1896355860353045, + "learning_rate": 8.74161517469549e-06, + "loss": 0.4901, + "step": 32059 + }, + { + "epoch": 0.5539811998894111, + "grad_norm": 0.5973938271158933, + "learning_rate": 8.741059975931515e-06, + "loss": 0.5165, + "step": 32060 + }, + { + "epoch": 0.55399847940282, + "grad_norm": 1.1637413948513158, + "learning_rate": 8.740504781110634e-06, + "loss": 0.4226, + "step": 32061 + }, + { + "epoch": 0.5540157589162289, + "grad_norm": 0.5328985717700228, + "learning_rate": 8.73994959023459e-06, + "loss": 0.6269, + "step": 32062 + }, + { + "epoch": 0.5540330384296378, + "grad_norm": 1.106958118398008, + "learning_rate": 8.739394403305119e-06, + "loss": 0.568, + "step": 32063 + }, + { + "epoch": 0.5540503179430467, + "grad_norm": 1.360397240252047, + "learning_rate": 8.738839220323961e-06, + "loss": 0.4483, + "step": 32064 + }, + { + "epoch": 0.5540675974564556, + "grad_norm": 1.1560737153026641, + "learning_rate": 8.738284041292855e-06, + "loss": 0.3737, + "step": 32065 + }, + { + "epoch": 0.5540848769698645, + "grad_norm": 1.1010400553068214, + "learning_rate": 8.737728866213537e-06, + "loss": 0.4301, + "step": 32066 + }, + { + "epoch": 0.5541021564832734, + "grad_norm": 0.7664350965925858, + "learning_rate": 8.73717369508775e-06, + "loss": 0.448, + "step": 32067 + }, + { + "epoch": 0.5541194359966823, + "grad_norm": 0.9165089539854074, + "learning_rate": 8.73661852791723e-06, + "loss": 0.4706, + "step": 32068 + }, + { + "epoch": 0.5541367155100912, + "grad_norm": 0.9732457129659651, + "learning_rate": 8.736063364703716e-06, + "loss": 0.3891, + "step": 32069 + }, + { + "epoch": 0.5541539950235002, + "grad_norm": 1.876776347587841, + "learning_rate": 8.735508205448947e-06, + "loss": 0.5881, + "step": 32070 + }, + { + "epoch": 0.5541712745369091, + "grad_norm": 1.0455826624307591, + "learning_rate": 8.734953050154664e-06, + "loss": 0.3862, + "step": 32071 + }, + { + "epoch": 0.554188554050318, + "grad_norm": 1.8939555319405708, + "learning_rate": 8.734397898822602e-06, + "loss": 0.3935, + "step": 32072 + }, + { + "epoch": 0.5542058335637269, + "grad_norm": 0.7343346890232393, + "learning_rate": 8.733842751454506e-06, + "loss": 0.2962, + "step": 32073 + }, + { + "epoch": 0.5542231130771358, + "grad_norm": 1.067041818445778, + "learning_rate": 8.733287608052108e-06, + "loss": 0.36, + "step": 32074 + }, + { + "epoch": 0.5542403925905447, + "grad_norm": 1.4089486663279545, + "learning_rate": 8.73273246861715e-06, + "loss": 0.5981, + "step": 32075 + }, + { + "epoch": 0.5542576721039536, + "grad_norm": 0.9256647075565295, + "learning_rate": 8.732177333151371e-06, + "loss": 0.3411, + "step": 32076 + }, + { + "epoch": 0.5542749516173625, + "grad_norm": 1.0927035161465097, + "learning_rate": 8.731622201656506e-06, + "loss": 0.6872, + "step": 32077 + }, + { + "epoch": 0.5542922311307713, + "grad_norm": 1.2557260937910544, + "learning_rate": 8.731067074134302e-06, + "loss": 0.5931, + "step": 32078 + }, + { + "epoch": 0.5543095106441802, + "grad_norm": 0.8128604302393957, + "learning_rate": 8.730511950586488e-06, + "loss": 0.4018, + "step": 32079 + }, + { + "epoch": 0.5543267901575891, + "grad_norm": 1.020260986911421, + "learning_rate": 8.729956831014808e-06, + "loss": 0.5658, + "step": 32080 + }, + { + "epoch": 0.554344069670998, + "grad_norm": 1.1355005794380344, + "learning_rate": 8.729401715421e-06, + "loss": 0.5626, + "step": 32081 + }, + { + "epoch": 0.5543613491844069, + "grad_norm": 0.7896356743875447, + "learning_rate": 8.728846603806802e-06, + "loss": 0.5192, + "step": 32082 + }, + { + "epoch": 0.5543786286978158, + "grad_norm": 0.8681431143444662, + "learning_rate": 8.728291496173951e-06, + "loss": 0.3494, + "step": 32083 + }, + { + "epoch": 0.5543959082112248, + "grad_norm": 1.128328437536958, + "learning_rate": 8.727736392524195e-06, + "loss": 0.3667, + "step": 32084 + }, + { + "epoch": 0.5544131877246337, + "grad_norm": 0.8511127504542592, + "learning_rate": 8.727181292859258e-06, + "loss": 0.1618, + "step": 32085 + }, + { + "epoch": 0.5544304672380426, + "grad_norm": 1.005008839232825, + "learning_rate": 8.726626197180888e-06, + "loss": 0.481, + "step": 32086 + }, + { + "epoch": 0.5544477467514515, + "grad_norm": 0.588485928333442, + "learning_rate": 8.726071105490825e-06, + "loss": 0.5813, + "step": 32087 + }, + { + "epoch": 0.5544650262648604, + "grad_norm": 0.9528679482996057, + "learning_rate": 8.725516017790799e-06, + "loss": 0.4067, + "step": 32088 + }, + { + "epoch": 0.5544823057782693, + "grad_norm": 1.1608380285332702, + "learning_rate": 8.724960934082556e-06, + "loss": 0.3736, + "step": 32089 + }, + { + "epoch": 0.5544995852916782, + "grad_norm": 0.9942713112265298, + "learning_rate": 8.724405854367832e-06, + "loss": 0.4496, + "step": 32090 + }, + { + "epoch": 0.5545168648050871, + "grad_norm": 1.1876038732160423, + "learning_rate": 8.723850778648365e-06, + "loss": 0.5638, + "step": 32091 + }, + { + "epoch": 0.554534144318496, + "grad_norm": 1.3017723913562724, + "learning_rate": 8.723295706925895e-06, + "loss": 0.3753, + "step": 32092 + }, + { + "epoch": 0.5545514238319049, + "grad_norm": 1.0540639005448358, + "learning_rate": 8.722740639202161e-06, + "loss": 0.5553, + "step": 32093 + }, + { + "epoch": 0.5545687033453138, + "grad_norm": 1.558181656937329, + "learning_rate": 8.722185575478899e-06, + "loss": 0.3517, + "step": 32094 + }, + { + "epoch": 0.5545859828587227, + "grad_norm": 0.6830028283401448, + "learning_rate": 8.721630515757848e-06, + "loss": 0.7627, + "step": 32095 + }, + { + "epoch": 0.5546032623721316, + "grad_norm": 1.319958216099968, + "learning_rate": 8.72107546004075e-06, + "loss": 0.3734, + "step": 32096 + }, + { + "epoch": 0.5546205418855406, + "grad_norm": 0.8285167814749617, + "learning_rate": 8.720520408329338e-06, + "loss": 0.438, + "step": 32097 + }, + { + "epoch": 0.5546378213989495, + "grad_norm": 0.49019929160196984, + "learning_rate": 8.719965360625357e-06, + "loss": 0.4774, + "step": 32098 + }, + { + "epoch": 0.5546551009123583, + "grad_norm": 0.7714015080277273, + "learning_rate": 8.719410316930537e-06, + "loss": 0.3432, + "step": 32099 + }, + { + "epoch": 0.5546723804257672, + "grad_norm": 0.9227355081440258, + "learning_rate": 8.718855277246625e-06, + "loss": 0.5526, + "step": 32100 + }, + { + "epoch": 0.5546896599391761, + "grad_norm": 0.7123084969509356, + "learning_rate": 8.718300241575353e-06, + "loss": 0.4891, + "step": 32101 + }, + { + "epoch": 0.554706939452585, + "grad_norm": 1.1687374971756543, + "learning_rate": 8.717745209918465e-06, + "loss": 0.2807, + "step": 32102 + }, + { + "epoch": 0.5547242189659939, + "grad_norm": 0.8728768855284282, + "learning_rate": 8.717190182277693e-06, + "loss": 0.4777, + "step": 32103 + }, + { + "epoch": 0.5547414984794028, + "grad_norm": 1.4610104879666608, + "learning_rate": 8.716635158654783e-06, + "loss": 0.3847, + "step": 32104 + }, + { + "epoch": 0.5547587779928117, + "grad_norm": 1.3367129239547761, + "learning_rate": 8.716080139051464e-06, + "loss": 0.4175, + "step": 32105 + }, + { + "epoch": 0.5547760575062206, + "grad_norm": 0.774742793800846, + "learning_rate": 8.715525123469484e-06, + "loss": 0.3453, + "step": 32106 + }, + { + "epoch": 0.5547933370196295, + "grad_norm": 2.145645419306035, + "learning_rate": 8.714970111910579e-06, + "loss": 0.3046, + "step": 32107 + }, + { + "epoch": 0.5548106165330384, + "grad_norm": 1.070048646914469, + "learning_rate": 8.714415104376479e-06, + "loss": 0.211, + "step": 32108 + }, + { + "epoch": 0.5548278960464473, + "grad_norm": 2.229649525154892, + "learning_rate": 8.713860100868932e-06, + "loss": 0.5979, + "step": 32109 + }, + { + "epoch": 0.5548451755598562, + "grad_norm": 0.9963073324115571, + "learning_rate": 8.713305101389671e-06, + "loss": 0.2112, + "step": 32110 + }, + { + "epoch": 0.5548624550732651, + "grad_norm": 1.3246033057221467, + "learning_rate": 8.712750105940437e-06, + "loss": 0.4746, + "step": 32111 + }, + { + "epoch": 0.5548797345866741, + "grad_norm": 0.945865839065539, + "learning_rate": 8.712195114522967e-06, + "loss": 0.2581, + "step": 32112 + }, + { + "epoch": 0.554897014100083, + "grad_norm": 1.2002731427672564, + "learning_rate": 8.711640127139004e-06, + "loss": 0.5138, + "step": 32113 + }, + { + "epoch": 0.5549142936134919, + "grad_norm": 1.1882978255042638, + "learning_rate": 8.711085143790276e-06, + "loss": 0.4666, + "step": 32114 + }, + { + "epoch": 0.5549315731269008, + "grad_norm": 1.020605868523313, + "learning_rate": 8.710530164478531e-06, + "loss": 0.3393, + "step": 32115 + }, + { + "epoch": 0.5549488526403097, + "grad_norm": 1.9548509643315488, + "learning_rate": 8.709975189205501e-06, + "loss": 0.424, + "step": 32116 + }, + { + "epoch": 0.5549661321537186, + "grad_norm": 2.0362579195982002, + "learning_rate": 8.709420217972927e-06, + "loss": 0.3904, + "step": 32117 + }, + { + "epoch": 0.5549834116671275, + "grad_norm": 0.9155670985917015, + "learning_rate": 8.708865250782552e-06, + "loss": 0.4605, + "step": 32118 + }, + { + "epoch": 0.5550006911805364, + "grad_norm": 0.9804277457612729, + "learning_rate": 8.708310287636106e-06, + "loss": 0.4627, + "step": 32119 + }, + { + "epoch": 0.5550179706939452, + "grad_norm": 1.106426836199258, + "learning_rate": 8.707755328535329e-06, + "loss": 0.3081, + "step": 32120 + }, + { + "epoch": 0.5550352502073541, + "grad_norm": 1.0649569647956731, + "learning_rate": 8.707200373481958e-06, + "loss": 0.2554, + "step": 32121 + }, + { + "epoch": 0.555052529720763, + "grad_norm": 1.3418277430465906, + "learning_rate": 8.706645422477739e-06, + "loss": 0.3746, + "step": 32122 + }, + { + "epoch": 0.5550698092341719, + "grad_norm": 1.2046386954119357, + "learning_rate": 8.706090475524402e-06, + "loss": 0.3873, + "step": 32123 + }, + { + "epoch": 0.5550870887475808, + "grad_norm": 1.1484449630338043, + "learning_rate": 8.70553553262369e-06, + "loss": 0.3428, + "step": 32124 + }, + { + "epoch": 0.5551043682609897, + "grad_norm": 0.9624398884062075, + "learning_rate": 8.704980593777337e-06, + "loss": 0.418, + "step": 32125 + }, + { + "epoch": 0.5551216477743987, + "grad_norm": 0.9425054819680085, + "learning_rate": 8.704425658987084e-06, + "loss": 0.2555, + "step": 32126 + }, + { + "epoch": 0.5551389272878076, + "grad_norm": 0.9286458528145518, + "learning_rate": 8.70387072825467e-06, + "loss": 0.4383, + "step": 32127 + }, + { + "epoch": 0.5551562068012165, + "grad_norm": 0.9489722951220978, + "learning_rate": 8.703315801581828e-06, + "loss": 0.5981, + "step": 32128 + }, + { + "epoch": 0.5551734863146254, + "grad_norm": 1.1452022124024204, + "learning_rate": 8.702760878970304e-06, + "loss": 0.3853, + "step": 32129 + }, + { + "epoch": 0.5551907658280343, + "grad_norm": 0.9669014269875437, + "learning_rate": 8.702205960421826e-06, + "loss": 0.5194, + "step": 32130 + }, + { + "epoch": 0.5552080453414432, + "grad_norm": 1.6107107235114533, + "learning_rate": 8.701651045938142e-06, + "loss": 0.4219, + "step": 32131 + }, + { + "epoch": 0.5552253248548521, + "grad_norm": 0.875790934916566, + "learning_rate": 8.701096135520982e-06, + "loss": 0.2952, + "step": 32132 + }, + { + "epoch": 0.555242604368261, + "grad_norm": 0.9548245434895507, + "learning_rate": 8.70054122917209e-06, + "loss": 0.412, + "step": 32133 + }, + { + "epoch": 0.5552598838816699, + "grad_norm": 1.0472679886720244, + "learning_rate": 8.6999863268932e-06, + "loss": 0.4393, + "step": 32134 + }, + { + "epoch": 0.5552771633950788, + "grad_norm": 1.6033349940750166, + "learning_rate": 8.699431428686055e-06, + "loss": 0.4368, + "step": 32135 + }, + { + "epoch": 0.5552944429084877, + "grad_norm": 0.7480225592834135, + "learning_rate": 8.698876534552385e-06, + "loss": 0.3139, + "step": 32136 + }, + { + "epoch": 0.5553117224218966, + "grad_norm": 0.9990587387480409, + "learning_rate": 8.698321644493934e-06, + "loss": 0.3167, + "step": 32137 + }, + { + "epoch": 0.5553290019353055, + "grad_norm": 1.084037629554718, + "learning_rate": 8.697766758512443e-06, + "loss": 0.5465, + "step": 32138 + }, + { + "epoch": 0.5553462814487145, + "grad_norm": 0.9685075823563504, + "learning_rate": 8.69721187660964e-06, + "loss": 0.3559, + "step": 32139 + }, + { + "epoch": 0.5553635609621234, + "grad_norm": 1.0399272903902856, + "learning_rate": 8.69665699878727e-06, + "loss": 0.3652, + "step": 32140 + }, + { + "epoch": 0.5553808404755322, + "grad_norm": 1.0927179852859585, + "learning_rate": 8.69610212504707e-06, + "loss": 0.3725, + "step": 32141 + }, + { + "epoch": 0.5553981199889411, + "grad_norm": 0.921229142345063, + "learning_rate": 8.695547255390776e-06, + "loss": 0.4457, + "step": 32142 + }, + { + "epoch": 0.55541539950235, + "grad_norm": 0.8535870121455206, + "learning_rate": 8.694992389820126e-06, + "loss": 0.377, + "step": 32143 + }, + { + "epoch": 0.5554326790157589, + "grad_norm": 0.8433300624038281, + "learning_rate": 8.694437528336864e-06, + "loss": 0.4021, + "step": 32144 + }, + { + "epoch": 0.5554499585291678, + "grad_norm": 0.9935670588448645, + "learning_rate": 8.693882670942718e-06, + "loss": 0.3055, + "step": 32145 + }, + { + "epoch": 0.5554672380425767, + "grad_norm": 0.9462518824408869, + "learning_rate": 8.693327817639434e-06, + "loss": 0.4883, + "step": 32146 + }, + { + "epoch": 0.5554845175559856, + "grad_norm": 0.948616817166461, + "learning_rate": 8.692772968428743e-06, + "loss": 0.3507, + "step": 32147 + }, + { + "epoch": 0.5555017970693945, + "grad_norm": 0.8838566354230954, + "learning_rate": 8.692218123312392e-06, + "loss": 0.4003, + "step": 32148 + }, + { + "epoch": 0.5555190765828034, + "grad_norm": 0.720220858653416, + "learning_rate": 8.69166328229211e-06, + "loss": 0.2645, + "step": 32149 + }, + { + "epoch": 0.5555363560962123, + "grad_norm": 0.44827484466827233, + "learning_rate": 8.691108445369636e-06, + "loss": 0.5998, + "step": 32150 + }, + { + "epoch": 0.5555536356096212, + "grad_norm": 0.45137836459962555, + "learning_rate": 8.690553612546713e-06, + "loss": 0.644, + "step": 32151 + }, + { + "epoch": 0.5555709151230301, + "grad_norm": 1.1096446545244008, + "learning_rate": 8.689998783825073e-06, + "loss": 0.4396, + "step": 32152 + }, + { + "epoch": 0.555588194636439, + "grad_norm": 0.5715856593178537, + "learning_rate": 8.68944395920646e-06, + "loss": 0.3164, + "step": 32153 + }, + { + "epoch": 0.555605474149848, + "grad_norm": 1.1966689136785496, + "learning_rate": 8.688889138692603e-06, + "loss": 0.5489, + "step": 32154 + }, + { + "epoch": 0.5556227536632569, + "grad_norm": 0.715726193163504, + "learning_rate": 8.688334322285248e-06, + "loss": 0.385, + "step": 32155 + }, + { + "epoch": 0.5556400331766658, + "grad_norm": 1.0395021380972933, + "learning_rate": 8.687779509986128e-06, + "loss": 0.56, + "step": 32156 + }, + { + "epoch": 0.5556573126900747, + "grad_norm": 1.0112321212920246, + "learning_rate": 8.687224701796983e-06, + "loss": 0.3326, + "step": 32157 + }, + { + "epoch": 0.5556745922034836, + "grad_norm": 0.7974338789354016, + "learning_rate": 8.686669897719554e-06, + "loss": 0.3155, + "step": 32158 + }, + { + "epoch": 0.5556918717168925, + "grad_norm": 0.7314897043605368, + "learning_rate": 8.68611509775557e-06, + "loss": 0.2903, + "step": 32159 + }, + { + "epoch": 0.5557091512303014, + "grad_norm": 0.9912994583502374, + "learning_rate": 8.685560301906775e-06, + "loss": 0.3144, + "step": 32160 + }, + { + "epoch": 0.5557264307437103, + "grad_norm": 1.3124168894236667, + "learning_rate": 8.685005510174903e-06, + "loss": 0.4754, + "step": 32161 + }, + { + "epoch": 0.5557437102571191, + "grad_norm": 0.5913952433215777, + "learning_rate": 8.684450722561696e-06, + "loss": 0.5628, + "step": 32162 + }, + { + "epoch": 0.555760989770528, + "grad_norm": 1.3709346909672953, + "learning_rate": 8.683895939068887e-06, + "loss": 0.476, + "step": 32163 + }, + { + "epoch": 0.5557782692839369, + "grad_norm": 0.8799674149778329, + "learning_rate": 8.68334115969822e-06, + "loss": 0.4081, + "step": 32164 + }, + { + "epoch": 0.5557955487973458, + "grad_norm": 1.0308662545358602, + "learning_rate": 8.682786384451424e-06, + "loss": 0.428, + "step": 32165 + }, + { + "epoch": 0.5558128283107547, + "grad_norm": 1.2021885921530246, + "learning_rate": 8.682231613330244e-06, + "loss": 0.3512, + "step": 32166 + }, + { + "epoch": 0.5558301078241636, + "grad_norm": 0.7200417337949212, + "learning_rate": 8.681676846336411e-06, + "loss": 0.2899, + "step": 32167 + }, + { + "epoch": 0.5558473873375726, + "grad_norm": 0.8949480636697141, + "learning_rate": 8.681122083471674e-06, + "loss": 0.5017, + "step": 32168 + }, + { + "epoch": 0.5558646668509815, + "grad_norm": 1.094732232072762, + "learning_rate": 8.680567324737757e-06, + "loss": 0.4111, + "step": 32169 + }, + { + "epoch": 0.5558819463643904, + "grad_norm": 0.9047888093063338, + "learning_rate": 8.680012570136404e-06, + "loss": 0.4216, + "step": 32170 + }, + { + "epoch": 0.5558992258777993, + "grad_norm": 0.8176702084310572, + "learning_rate": 8.679457819669353e-06, + "loss": 0.3968, + "step": 32171 + }, + { + "epoch": 0.5559165053912082, + "grad_norm": 1.1378041714350164, + "learning_rate": 8.678903073338338e-06, + "loss": 0.5568, + "step": 32172 + }, + { + "epoch": 0.5559337849046171, + "grad_norm": 1.3877063708366226, + "learning_rate": 8.678348331145104e-06, + "loss": 0.6311, + "step": 32173 + }, + { + "epoch": 0.555951064418026, + "grad_norm": 1.1609793970629154, + "learning_rate": 8.677793593091378e-06, + "loss": 0.4176, + "step": 32174 + }, + { + "epoch": 0.5559683439314349, + "grad_norm": 1.3225453082337248, + "learning_rate": 8.677238859178906e-06, + "loss": 0.471, + "step": 32175 + }, + { + "epoch": 0.5559856234448438, + "grad_norm": 1.1380046395448238, + "learning_rate": 8.67668412940942e-06, + "loss": 0.4619, + "step": 32176 + }, + { + "epoch": 0.5560029029582527, + "grad_norm": 1.3590719128590902, + "learning_rate": 8.676129403784662e-06, + "loss": 0.6427, + "step": 32177 + }, + { + "epoch": 0.5560201824716616, + "grad_norm": 0.9852887603731014, + "learning_rate": 8.675574682306367e-06, + "loss": 0.3886, + "step": 32178 + }, + { + "epoch": 0.5560374619850705, + "grad_norm": 0.9244600913142529, + "learning_rate": 8.675019964976274e-06, + "loss": 0.3918, + "step": 32179 + }, + { + "epoch": 0.5560547414984794, + "grad_norm": 0.5490355771335539, + "learning_rate": 8.674465251796118e-06, + "loss": 0.6531, + "step": 32180 + }, + { + "epoch": 0.5560720210118884, + "grad_norm": 0.888620961448858, + "learning_rate": 8.673910542767636e-06, + "loss": 0.3401, + "step": 32181 + }, + { + "epoch": 0.5560893005252973, + "grad_norm": 1.0540732745653507, + "learning_rate": 8.67335583789257e-06, + "loss": 0.4674, + "step": 32182 + }, + { + "epoch": 0.556106580038706, + "grad_norm": 1.0305815854205806, + "learning_rate": 8.672801137172651e-06, + "loss": 0.4169, + "step": 32183 + }, + { + "epoch": 0.556123859552115, + "grad_norm": 2.3137558869178267, + "learning_rate": 8.672246440609625e-06, + "loss": 0.5727, + "step": 32184 + }, + { + "epoch": 0.5561411390655239, + "grad_norm": 1.5184201034894977, + "learning_rate": 8.671691748205218e-06, + "loss": 0.3976, + "step": 32185 + }, + { + "epoch": 0.5561584185789328, + "grad_norm": 0.7511174565297734, + "learning_rate": 8.671137059961176e-06, + "loss": 0.2129, + "step": 32186 + }, + { + "epoch": 0.5561756980923417, + "grad_norm": 1.0647224267225996, + "learning_rate": 8.670582375879231e-06, + "loss": 0.5431, + "step": 32187 + }, + { + "epoch": 0.5561929776057506, + "grad_norm": 0.8879716079159534, + "learning_rate": 8.67002769596113e-06, + "loss": 0.3927, + "step": 32188 + }, + { + "epoch": 0.5562102571191595, + "grad_norm": 1.0980637110659783, + "learning_rate": 8.669473020208599e-06, + "loss": 0.4489, + "step": 32189 + }, + { + "epoch": 0.5562275366325684, + "grad_norm": 0.6274855731621812, + "learning_rate": 8.668918348623377e-06, + "loss": 0.3588, + "step": 32190 + }, + { + "epoch": 0.5562448161459773, + "grad_norm": 0.9885501433481707, + "learning_rate": 8.668363681207209e-06, + "loss": 0.4964, + "step": 32191 + }, + { + "epoch": 0.5562620956593862, + "grad_norm": 0.6575882820714691, + "learning_rate": 8.667809017961821e-06, + "loss": 0.496, + "step": 32192 + }, + { + "epoch": 0.5562793751727951, + "grad_norm": 1.4479229086623717, + "learning_rate": 8.667254358888964e-06, + "loss": 0.3438, + "step": 32193 + }, + { + "epoch": 0.556296654686204, + "grad_norm": 0.6296701634113603, + "learning_rate": 8.666699703990362e-06, + "loss": 0.7096, + "step": 32194 + }, + { + "epoch": 0.556313934199613, + "grad_norm": 0.8732175393953807, + "learning_rate": 8.666145053267759e-06, + "loss": 0.4089, + "step": 32195 + }, + { + "epoch": 0.5563312137130219, + "grad_norm": 1.2841585423980146, + "learning_rate": 8.66559040672289e-06, + "loss": 0.3308, + "step": 32196 + }, + { + "epoch": 0.5563484932264308, + "grad_norm": 0.7131109185181306, + "learning_rate": 8.665035764357495e-06, + "loss": 0.6804, + "step": 32197 + }, + { + "epoch": 0.5563657727398397, + "grad_norm": 0.8586829147088018, + "learning_rate": 8.664481126173307e-06, + "loss": 0.7052, + "step": 32198 + }, + { + "epoch": 0.5563830522532486, + "grad_norm": 0.8941335234399976, + "learning_rate": 8.663926492172071e-06, + "loss": 0.4009, + "step": 32199 + }, + { + "epoch": 0.5564003317666575, + "grad_norm": 1.1472618109845343, + "learning_rate": 8.663371862355516e-06, + "loss": 0.4566, + "step": 32200 + }, + { + "epoch": 0.5564176112800664, + "grad_norm": 1.0038716005179984, + "learning_rate": 8.662817236725381e-06, + "loss": 0.4065, + "step": 32201 + }, + { + "epoch": 0.5564348907934753, + "grad_norm": 1.1821579097464139, + "learning_rate": 8.662262615283406e-06, + "loss": 0.6718, + "step": 32202 + }, + { + "epoch": 0.5564521703068842, + "grad_norm": 0.8788115385998956, + "learning_rate": 8.661707998031322e-06, + "loss": 0.2829, + "step": 32203 + }, + { + "epoch": 0.556469449820293, + "grad_norm": 1.325056594254437, + "learning_rate": 8.661153384970875e-06, + "loss": 0.6118, + "step": 32204 + }, + { + "epoch": 0.5564867293337019, + "grad_norm": 1.2638297536899947, + "learning_rate": 8.660598776103795e-06, + "loss": 0.4845, + "step": 32205 + }, + { + "epoch": 0.5565040088471108, + "grad_norm": 0.5169474125881675, + "learning_rate": 8.660044171431822e-06, + "loss": 0.7686, + "step": 32206 + }, + { + "epoch": 0.5565212883605197, + "grad_norm": 1.120734059713171, + "learning_rate": 8.65948957095669e-06, + "loss": 0.4178, + "step": 32207 + }, + { + "epoch": 0.5565385678739286, + "grad_norm": 0.5733907451087694, + "learning_rate": 8.658934974680144e-06, + "loss": 0.6638, + "step": 32208 + }, + { + "epoch": 0.5565558473873375, + "grad_norm": 1.918306702613855, + "learning_rate": 8.658380382603908e-06, + "loss": 0.5021, + "step": 32209 + }, + { + "epoch": 0.5565731269007465, + "grad_norm": 1.3863126644625334, + "learning_rate": 8.657825794729734e-06, + "loss": 0.3964, + "step": 32210 + }, + { + "epoch": 0.5565904064141554, + "grad_norm": 0.6907755262955039, + "learning_rate": 8.65727121105935e-06, + "loss": 0.6703, + "step": 32211 + }, + { + "epoch": 0.5566076859275643, + "grad_norm": 0.7544030391433614, + "learning_rate": 8.656716631594491e-06, + "loss": 0.2149, + "step": 32212 + }, + { + "epoch": 0.5566249654409732, + "grad_norm": 0.867606734256302, + "learning_rate": 8.656162056336901e-06, + "loss": 0.263, + "step": 32213 + }, + { + "epoch": 0.5566422449543821, + "grad_norm": 1.0930660743954645, + "learning_rate": 8.655607485288313e-06, + "loss": 0.4423, + "step": 32214 + }, + { + "epoch": 0.556659524467791, + "grad_norm": 1.1689411886041658, + "learning_rate": 8.655052918450464e-06, + "loss": 0.483, + "step": 32215 + }, + { + "epoch": 0.5566768039811999, + "grad_norm": 1.0814105749318232, + "learning_rate": 8.65449835582509e-06, + "loss": 0.4366, + "step": 32216 + }, + { + "epoch": 0.5566940834946088, + "grad_norm": 0.9493333588944189, + "learning_rate": 8.653943797413931e-06, + "loss": 0.4413, + "step": 32217 + }, + { + "epoch": 0.5567113630080177, + "grad_norm": 0.8119455340544504, + "learning_rate": 8.65338924321872e-06, + "loss": 0.4059, + "step": 32218 + }, + { + "epoch": 0.5567286425214266, + "grad_norm": 1.3051909826469632, + "learning_rate": 8.652834693241201e-06, + "loss": 0.36, + "step": 32219 + }, + { + "epoch": 0.5567459220348355, + "grad_norm": 0.8395391022946407, + "learning_rate": 8.652280147483104e-06, + "loss": 0.4315, + "step": 32220 + }, + { + "epoch": 0.5567632015482444, + "grad_norm": 0.6563480643688874, + "learning_rate": 8.651725605946165e-06, + "loss": 0.2344, + "step": 32221 + }, + { + "epoch": 0.5567804810616533, + "grad_norm": 1.1034792786583325, + "learning_rate": 8.651171068632127e-06, + "loss": 0.4321, + "step": 32222 + }, + { + "epoch": 0.5567977605750623, + "grad_norm": 0.9103223402547421, + "learning_rate": 8.650616535542722e-06, + "loss": 0.5482, + "step": 32223 + }, + { + "epoch": 0.5568150400884712, + "grad_norm": 0.7234469739285395, + "learning_rate": 8.650062006679691e-06, + "loss": 0.4812, + "step": 32224 + }, + { + "epoch": 0.5568323196018801, + "grad_norm": 1.0845634654428447, + "learning_rate": 8.649507482044766e-06, + "loss": 0.4862, + "step": 32225 + }, + { + "epoch": 0.5568495991152889, + "grad_norm": 0.9844740446042507, + "learning_rate": 8.648952961639688e-06, + "loss": 0.2713, + "step": 32226 + }, + { + "epoch": 0.5568668786286978, + "grad_norm": 1.1712715257271071, + "learning_rate": 8.64839844546619e-06, + "loss": 0.3313, + "step": 32227 + }, + { + "epoch": 0.5568841581421067, + "grad_norm": 0.9591127671046843, + "learning_rate": 8.647843933526014e-06, + "loss": 0.6445, + "step": 32228 + }, + { + "epoch": 0.5569014376555156, + "grad_norm": 1.2653201307125672, + "learning_rate": 8.647289425820889e-06, + "loss": 0.3691, + "step": 32229 + }, + { + "epoch": 0.5569187171689245, + "grad_norm": 0.582768514343312, + "learning_rate": 8.64673492235256e-06, + "loss": 0.4957, + "step": 32230 + }, + { + "epoch": 0.5569359966823334, + "grad_norm": 1.1064469116592066, + "learning_rate": 8.64618042312276e-06, + "loss": 0.4887, + "step": 32231 + }, + { + "epoch": 0.5569532761957423, + "grad_norm": 1.093404734185489, + "learning_rate": 8.645625928133224e-06, + "loss": 0.559, + "step": 32232 + }, + { + "epoch": 0.5569705557091512, + "grad_norm": 0.9495400009643297, + "learning_rate": 8.645071437385693e-06, + "loss": 0.4852, + "step": 32233 + }, + { + "epoch": 0.5569878352225601, + "grad_norm": 0.8334719546087823, + "learning_rate": 8.644516950881898e-06, + "loss": 0.4243, + "step": 32234 + }, + { + "epoch": 0.557005114735969, + "grad_norm": 0.46874958059853655, + "learning_rate": 8.64396246862358e-06, + "loss": 0.6163, + "step": 32235 + }, + { + "epoch": 0.5570223942493779, + "grad_norm": 1.6235360969529773, + "learning_rate": 8.643407990612474e-06, + "loss": 0.3926, + "step": 32236 + }, + { + "epoch": 0.5570396737627868, + "grad_norm": 0.9372721849998367, + "learning_rate": 8.642853516850319e-06, + "loss": 0.3756, + "step": 32237 + }, + { + "epoch": 0.5570569532761958, + "grad_norm": 0.5116068145295757, + "learning_rate": 8.642299047338846e-06, + "loss": 0.6836, + "step": 32238 + }, + { + "epoch": 0.5570742327896047, + "grad_norm": 0.6835543064038533, + "learning_rate": 8.641744582079802e-06, + "loss": 0.2842, + "step": 32239 + }, + { + "epoch": 0.5570915123030136, + "grad_norm": 1.2134438671688486, + "learning_rate": 8.641190121074912e-06, + "loss": 0.3397, + "step": 32240 + }, + { + "epoch": 0.5571087918164225, + "grad_norm": 1.0068003551203395, + "learning_rate": 8.640635664325922e-06, + "loss": 0.5143, + "step": 32241 + }, + { + "epoch": 0.5571260713298314, + "grad_norm": 0.587816660376517, + "learning_rate": 8.640081211834561e-06, + "loss": 0.7526, + "step": 32242 + }, + { + "epoch": 0.5571433508432403, + "grad_norm": 1.091507179521697, + "learning_rate": 8.639526763602569e-06, + "loss": 0.5052, + "step": 32243 + }, + { + "epoch": 0.5571606303566492, + "grad_norm": 0.6868424474638696, + "learning_rate": 8.638972319631687e-06, + "loss": 0.261, + "step": 32244 + }, + { + "epoch": 0.5571779098700581, + "grad_norm": 1.468580259485342, + "learning_rate": 8.638417879923641e-06, + "loss": 0.3315, + "step": 32245 + }, + { + "epoch": 0.557195189383467, + "grad_norm": 0.9944820847799287, + "learning_rate": 8.637863444480178e-06, + "loss": 0.5758, + "step": 32246 + }, + { + "epoch": 0.5572124688968758, + "grad_norm": 0.4493579855816279, + "learning_rate": 8.637309013303028e-06, + "loss": 0.6207, + "step": 32247 + }, + { + "epoch": 0.5572297484102847, + "grad_norm": 0.592518585173796, + "learning_rate": 8.636754586393932e-06, + "loss": 0.2974, + "step": 32248 + }, + { + "epoch": 0.5572470279236936, + "grad_norm": 1.1084627415325299, + "learning_rate": 8.636200163754619e-06, + "loss": 0.4397, + "step": 32249 + }, + { + "epoch": 0.5572643074371025, + "grad_norm": 1.1381796673670086, + "learning_rate": 8.635645745386838e-06, + "loss": 0.2535, + "step": 32250 + }, + { + "epoch": 0.5572815869505114, + "grad_norm": 1.0679840164794627, + "learning_rate": 8.635091331292314e-06, + "loss": 0.4366, + "step": 32251 + }, + { + "epoch": 0.5572988664639204, + "grad_norm": 0.552808508733219, + "learning_rate": 8.634536921472786e-06, + "loss": 0.6692, + "step": 32252 + }, + { + "epoch": 0.5573161459773293, + "grad_norm": 0.8464530914477889, + "learning_rate": 8.633982515929999e-06, + "loss": 0.2984, + "step": 32253 + }, + { + "epoch": 0.5573334254907382, + "grad_norm": 1.1938383755265083, + "learning_rate": 8.633428114665675e-06, + "loss": 0.3576, + "step": 32254 + }, + { + "epoch": 0.5573507050041471, + "grad_norm": 0.6204381137625717, + "learning_rate": 8.632873717681563e-06, + "loss": 0.8288, + "step": 32255 + }, + { + "epoch": 0.557367984517556, + "grad_norm": 1.4700176072114888, + "learning_rate": 8.63231932497939e-06, + "loss": 0.4758, + "step": 32256 + }, + { + "epoch": 0.5573852640309649, + "grad_norm": 0.6879932024445867, + "learning_rate": 8.6317649365609e-06, + "loss": 0.8035, + "step": 32257 + }, + { + "epoch": 0.5574025435443738, + "grad_norm": 0.973693739451543, + "learning_rate": 8.631210552427823e-06, + "loss": 0.3952, + "step": 32258 + }, + { + "epoch": 0.5574198230577827, + "grad_norm": 0.6567399655702271, + "learning_rate": 8.630656172581904e-06, + "loss": 0.3661, + "step": 32259 + }, + { + "epoch": 0.5574371025711916, + "grad_norm": 0.9550084137290763, + "learning_rate": 8.63010179702487e-06, + "loss": 0.3852, + "step": 32260 + }, + { + "epoch": 0.5574543820846005, + "grad_norm": 1.4698530771968197, + "learning_rate": 8.629547425758463e-06, + "loss": 0.5302, + "step": 32261 + }, + { + "epoch": 0.5574716615980094, + "grad_norm": 1.3444733476540793, + "learning_rate": 8.628993058784416e-06, + "loss": 0.4098, + "step": 32262 + }, + { + "epoch": 0.5574889411114183, + "grad_norm": 0.835500536135974, + "learning_rate": 8.628438696104467e-06, + "loss": 0.2719, + "step": 32263 + }, + { + "epoch": 0.5575062206248272, + "grad_norm": 0.9470395452928473, + "learning_rate": 8.627884337720356e-06, + "loss": 0.2975, + "step": 32264 + }, + { + "epoch": 0.5575235001382361, + "grad_norm": 1.5836109066217894, + "learning_rate": 8.62732998363381e-06, + "loss": 0.4156, + "step": 32265 + }, + { + "epoch": 0.5575407796516451, + "grad_norm": 1.2261706258492355, + "learning_rate": 8.626775633846574e-06, + "loss": 0.5299, + "step": 32266 + }, + { + "epoch": 0.557558059165054, + "grad_norm": 1.1075231625566775, + "learning_rate": 8.62622128836038e-06, + "loss": 0.4892, + "step": 32267 + }, + { + "epoch": 0.5575753386784628, + "grad_norm": 1.5765996619183416, + "learning_rate": 8.625666947176968e-06, + "loss": 0.4466, + "step": 32268 + }, + { + "epoch": 0.5575926181918717, + "grad_norm": 1.338413296720599, + "learning_rate": 8.625112610298065e-06, + "loss": 0.4863, + "step": 32269 + }, + { + "epoch": 0.5576098977052806, + "grad_norm": 1.196480763108759, + "learning_rate": 8.62455827772542e-06, + "loss": 0.6444, + "step": 32270 + }, + { + "epoch": 0.5576271772186895, + "grad_norm": 0.9400353147380048, + "learning_rate": 8.624003949460763e-06, + "loss": 0.3985, + "step": 32271 + }, + { + "epoch": 0.5576444567320984, + "grad_norm": 1.1416790553983955, + "learning_rate": 8.623449625505827e-06, + "loss": 0.5214, + "step": 32272 + }, + { + "epoch": 0.5576617362455073, + "grad_norm": 1.1362379941890421, + "learning_rate": 8.622895305862354e-06, + "loss": 0.3217, + "step": 32273 + }, + { + "epoch": 0.5576790157589162, + "grad_norm": 1.193623719892095, + "learning_rate": 8.622340990532075e-06, + "loss": 0.3843, + "step": 32274 + }, + { + "epoch": 0.5576962952723251, + "grad_norm": 1.0672817112158406, + "learning_rate": 8.62178667951673e-06, + "loss": 0.5216, + "step": 32275 + }, + { + "epoch": 0.557713574785734, + "grad_norm": 1.2013550936235935, + "learning_rate": 8.621232372818054e-06, + "loss": 0.4419, + "step": 32276 + }, + { + "epoch": 0.5577308542991429, + "grad_norm": 0.9757153206787733, + "learning_rate": 8.620678070437783e-06, + "loss": 0.4554, + "step": 32277 + }, + { + "epoch": 0.5577481338125518, + "grad_norm": 0.7977013919608159, + "learning_rate": 8.62012377237765e-06, + "loss": 0.2629, + "step": 32278 + }, + { + "epoch": 0.5577654133259607, + "grad_norm": 1.092906936345522, + "learning_rate": 8.6195694786394e-06, + "loss": 0.4864, + "step": 32279 + }, + { + "epoch": 0.5577826928393697, + "grad_norm": 0.9595211984222483, + "learning_rate": 8.61901518922476e-06, + "loss": 0.5144, + "step": 32280 + }, + { + "epoch": 0.5577999723527786, + "grad_norm": 1.1084430914403927, + "learning_rate": 8.618460904135471e-06, + "loss": 0.4529, + "step": 32281 + }, + { + "epoch": 0.5578172518661875, + "grad_norm": 1.3347653598955735, + "learning_rate": 8.617906623373266e-06, + "loss": 0.3759, + "step": 32282 + }, + { + "epoch": 0.5578345313795964, + "grad_norm": 1.0205422879123975, + "learning_rate": 8.617352346939881e-06, + "loss": 0.4941, + "step": 32283 + }, + { + "epoch": 0.5578518108930053, + "grad_norm": 0.6493616333348408, + "learning_rate": 8.616798074837058e-06, + "loss": 0.5777, + "step": 32284 + }, + { + "epoch": 0.5578690904064142, + "grad_norm": 1.7076798206977666, + "learning_rate": 8.616243807066524e-06, + "loss": 0.4292, + "step": 32285 + }, + { + "epoch": 0.5578863699198231, + "grad_norm": 1.0908630046550276, + "learning_rate": 8.61568954363002e-06, + "loss": 0.3333, + "step": 32286 + }, + { + "epoch": 0.557903649433232, + "grad_norm": 1.1836839620616133, + "learning_rate": 8.615135284529284e-06, + "loss": 0.5003, + "step": 32287 + }, + { + "epoch": 0.5579209289466409, + "grad_norm": 0.98423635795013, + "learning_rate": 8.614581029766051e-06, + "loss": 0.3573, + "step": 32288 + }, + { + "epoch": 0.5579382084600497, + "grad_norm": 1.4931915478434274, + "learning_rate": 8.614026779342049e-06, + "loss": 0.3741, + "step": 32289 + }, + { + "epoch": 0.5579554879734586, + "grad_norm": 1.3483485469268326, + "learning_rate": 8.613472533259026e-06, + "loss": 0.3525, + "step": 32290 + }, + { + "epoch": 0.5579727674868675, + "grad_norm": 0.8075707628124326, + "learning_rate": 8.61291829151871e-06, + "loss": 0.5602, + "step": 32291 + }, + { + "epoch": 0.5579900470002764, + "grad_norm": 1.1390663688186047, + "learning_rate": 8.612364054122841e-06, + "loss": 0.3989, + "step": 32292 + }, + { + "epoch": 0.5580073265136853, + "grad_norm": 1.1330128916609754, + "learning_rate": 8.611809821073155e-06, + "loss": 0.459, + "step": 32293 + }, + { + "epoch": 0.5580246060270942, + "grad_norm": 1.2526442089270775, + "learning_rate": 8.61125559237138e-06, + "loss": 0.3819, + "step": 32294 + }, + { + "epoch": 0.5580418855405032, + "grad_norm": 0.8276030960586779, + "learning_rate": 8.610701368019264e-06, + "loss": 0.3897, + "step": 32295 + }, + { + "epoch": 0.5580591650539121, + "grad_norm": 0.9861285828817844, + "learning_rate": 8.610147148018533e-06, + "loss": 0.5547, + "step": 32296 + }, + { + "epoch": 0.558076444567321, + "grad_norm": 0.6482228360376943, + "learning_rate": 8.609592932370928e-06, + "loss": 0.7858, + "step": 32297 + }, + { + "epoch": 0.5580937240807299, + "grad_norm": 0.8783706420500871, + "learning_rate": 8.609038721078183e-06, + "loss": 0.2327, + "step": 32298 + }, + { + "epoch": 0.5581110035941388, + "grad_norm": 1.037220128614189, + "learning_rate": 8.608484514142038e-06, + "loss": 0.4008, + "step": 32299 + }, + { + "epoch": 0.5581282831075477, + "grad_norm": 0.4859286394201096, + "learning_rate": 8.60793031156422e-06, + "loss": 0.5465, + "step": 32300 + }, + { + "epoch": 0.5581455626209566, + "grad_norm": 1.2150676572144443, + "learning_rate": 8.607376113346474e-06, + "loss": 0.3552, + "step": 32301 + }, + { + "epoch": 0.5581628421343655, + "grad_norm": 0.9494452940039183, + "learning_rate": 8.606821919490532e-06, + "loss": 0.5282, + "step": 32302 + }, + { + "epoch": 0.5581801216477744, + "grad_norm": 1.1827293692197056, + "learning_rate": 8.606267729998126e-06, + "loss": 0.3267, + "step": 32303 + }, + { + "epoch": 0.5581974011611833, + "grad_norm": 1.1041272106842024, + "learning_rate": 8.605713544871e-06, + "loss": 0.4936, + "step": 32304 + }, + { + "epoch": 0.5582146806745922, + "grad_norm": 0.6570352155935345, + "learning_rate": 8.60515936411088e-06, + "loss": 0.4389, + "step": 32305 + }, + { + "epoch": 0.5582319601880011, + "grad_norm": 1.3464863909118177, + "learning_rate": 8.60460518771951e-06, + "loss": 0.4559, + "step": 32306 + }, + { + "epoch": 0.55824923970141, + "grad_norm": 0.9249804180913667, + "learning_rate": 8.60405101569862e-06, + "loss": 0.3982, + "step": 32307 + }, + { + "epoch": 0.558266519214819, + "grad_norm": 1.0403648841971525, + "learning_rate": 8.603496848049953e-06, + "loss": 0.4245, + "step": 32308 + }, + { + "epoch": 0.5582837987282279, + "grad_norm": 0.8922937495576722, + "learning_rate": 8.602942684775235e-06, + "loss": 0.4619, + "step": 32309 + }, + { + "epoch": 0.5583010782416367, + "grad_norm": 1.109968085994898, + "learning_rate": 8.60238852587621e-06, + "loss": 0.2615, + "step": 32310 + }, + { + "epoch": 0.5583183577550456, + "grad_norm": 1.1735060203994294, + "learning_rate": 8.601834371354608e-06, + "loss": 0.4378, + "step": 32311 + }, + { + "epoch": 0.5583356372684545, + "grad_norm": 0.6960569398733848, + "learning_rate": 8.601280221212168e-06, + "loss": 0.3122, + "step": 32312 + }, + { + "epoch": 0.5583529167818634, + "grad_norm": 1.1311372902654666, + "learning_rate": 8.600726075450629e-06, + "loss": 0.3308, + "step": 32313 + }, + { + "epoch": 0.5583701962952723, + "grad_norm": 0.5737130566473525, + "learning_rate": 8.600171934071717e-06, + "loss": 0.2616, + "step": 32314 + }, + { + "epoch": 0.5583874758086812, + "grad_norm": 0.7048843895033875, + "learning_rate": 8.599617797077175e-06, + "loss": 0.3685, + "step": 32315 + }, + { + "epoch": 0.5584047553220901, + "grad_norm": 0.46464029944616847, + "learning_rate": 8.599063664468734e-06, + "loss": 0.7223, + "step": 32316 + }, + { + "epoch": 0.558422034835499, + "grad_norm": 0.9140200905378575, + "learning_rate": 8.598509536248135e-06, + "loss": 0.3655, + "step": 32317 + }, + { + "epoch": 0.5584393143489079, + "grad_norm": 1.3120794436965335, + "learning_rate": 8.597955412417108e-06, + "loss": 0.5998, + "step": 32318 + }, + { + "epoch": 0.5584565938623168, + "grad_norm": 1.2725961060408548, + "learning_rate": 8.597401292977395e-06, + "loss": 0.4483, + "step": 32319 + }, + { + "epoch": 0.5584738733757257, + "grad_norm": 1.1438888273471695, + "learning_rate": 8.596847177930723e-06, + "loss": 0.5155, + "step": 32320 + }, + { + "epoch": 0.5584911528891346, + "grad_norm": 1.0131118056648025, + "learning_rate": 8.596293067278836e-06, + "loss": 0.4389, + "step": 32321 + }, + { + "epoch": 0.5585084324025436, + "grad_norm": 0.985042176752183, + "learning_rate": 8.595738961023462e-06, + "loss": 0.4229, + "step": 32322 + }, + { + "epoch": 0.5585257119159525, + "grad_norm": 0.802400626807123, + "learning_rate": 8.595184859166343e-06, + "loss": 0.3888, + "step": 32323 + }, + { + "epoch": 0.5585429914293614, + "grad_norm": 1.2493058999500846, + "learning_rate": 8.594630761709216e-06, + "loss": 0.3987, + "step": 32324 + }, + { + "epoch": 0.5585602709427703, + "grad_norm": 1.0387125915831361, + "learning_rate": 8.594076668653804e-06, + "loss": 0.3561, + "step": 32325 + }, + { + "epoch": 0.5585775504561792, + "grad_norm": 1.2206679367454103, + "learning_rate": 8.593522580001857e-06, + "loss": 0.3401, + "step": 32326 + }, + { + "epoch": 0.5585948299695881, + "grad_norm": 0.667316371732004, + "learning_rate": 8.592968495755099e-06, + "loss": 0.8136, + "step": 32327 + }, + { + "epoch": 0.558612109482997, + "grad_norm": 1.218104728748239, + "learning_rate": 8.592414415915275e-06, + "loss": 0.4535, + "step": 32328 + }, + { + "epoch": 0.5586293889964059, + "grad_norm": 1.1794791118979657, + "learning_rate": 8.591860340484111e-06, + "loss": 0.3203, + "step": 32329 + }, + { + "epoch": 0.5586466685098148, + "grad_norm": 1.006646481852869, + "learning_rate": 8.591306269463352e-06, + "loss": 0.4953, + "step": 32330 + }, + { + "epoch": 0.5586639480232236, + "grad_norm": 1.0543116172423113, + "learning_rate": 8.590752202854727e-06, + "loss": 0.6004, + "step": 32331 + }, + { + "epoch": 0.5586812275366325, + "grad_norm": 0.947361296403853, + "learning_rate": 8.590198140659976e-06, + "loss": 0.5409, + "step": 32332 + }, + { + "epoch": 0.5586985070500414, + "grad_norm": 1.0477749122836335, + "learning_rate": 8.58964408288083e-06, + "loss": 0.428, + "step": 32333 + }, + { + "epoch": 0.5587157865634503, + "grad_norm": 0.9283873422655499, + "learning_rate": 8.589090029519025e-06, + "loss": 0.3465, + "step": 32334 + }, + { + "epoch": 0.5587330660768592, + "grad_norm": 1.3518102536105188, + "learning_rate": 8.588535980576297e-06, + "loss": 0.3668, + "step": 32335 + }, + { + "epoch": 0.5587503455902681, + "grad_norm": 1.2245948912780575, + "learning_rate": 8.587981936054382e-06, + "loss": 0.4877, + "step": 32336 + }, + { + "epoch": 0.5587676251036771, + "grad_norm": 0.8956327716554857, + "learning_rate": 8.587427895955015e-06, + "loss": 0.346, + "step": 32337 + }, + { + "epoch": 0.558784904617086, + "grad_norm": 0.8049728211635673, + "learning_rate": 8.586873860279928e-06, + "loss": 0.3506, + "step": 32338 + }, + { + "epoch": 0.5588021841304949, + "grad_norm": 0.8772093032444216, + "learning_rate": 8.586319829030866e-06, + "loss": 0.412, + "step": 32339 + }, + { + "epoch": 0.5588194636439038, + "grad_norm": 1.2691481443856023, + "learning_rate": 8.585765802209552e-06, + "loss": 0.294, + "step": 32340 + }, + { + "epoch": 0.5588367431573127, + "grad_norm": 1.4944844073719648, + "learning_rate": 8.58521177981773e-06, + "loss": 0.4368, + "step": 32341 + }, + { + "epoch": 0.5588540226707216, + "grad_norm": 0.7825841308833058, + "learning_rate": 8.58465776185713e-06, + "loss": 0.3919, + "step": 32342 + }, + { + "epoch": 0.5588713021841305, + "grad_norm": 1.114867333653543, + "learning_rate": 8.584103748329492e-06, + "loss": 0.4353, + "step": 32343 + }, + { + "epoch": 0.5588885816975394, + "grad_norm": 1.4290362926420375, + "learning_rate": 8.58354973923655e-06, + "loss": 0.4664, + "step": 32344 + }, + { + "epoch": 0.5589058612109483, + "grad_norm": 0.8510034433148227, + "learning_rate": 8.582995734580034e-06, + "loss": 0.4176, + "step": 32345 + }, + { + "epoch": 0.5589231407243572, + "grad_norm": 1.1508743913453559, + "learning_rate": 8.582441734361685e-06, + "loss": 0.4641, + "step": 32346 + }, + { + "epoch": 0.5589404202377661, + "grad_norm": 0.9344884287333577, + "learning_rate": 8.581887738583234e-06, + "loss": 0.3896, + "step": 32347 + }, + { + "epoch": 0.558957699751175, + "grad_norm": 0.7404201906326845, + "learning_rate": 8.581333747246424e-06, + "loss": 0.3066, + "step": 32348 + }, + { + "epoch": 0.558974979264584, + "grad_norm": 1.2848941900484754, + "learning_rate": 8.580779760352976e-06, + "loss": 0.4215, + "step": 32349 + }, + { + "epoch": 0.5589922587779929, + "grad_norm": 1.0797743812773761, + "learning_rate": 8.58022577790464e-06, + "loss": 0.5573, + "step": 32350 + }, + { + "epoch": 0.5590095382914018, + "grad_norm": 1.116408444540703, + "learning_rate": 8.579671799903142e-06, + "loss": 0.2822, + "step": 32351 + }, + { + "epoch": 0.5590268178048107, + "grad_norm": 1.564618164372704, + "learning_rate": 8.579117826350223e-06, + "loss": 0.4416, + "step": 32352 + }, + { + "epoch": 0.5590440973182195, + "grad_norm": 0.8637275774267782, + "learning_rate": 8.57856385724761e-06, + "loss": 0.2613, + "step": 32353 + }, + { + "epoch": 0.5590613768316284, + "grad_norm": 1.025217958705583, + "learning_rate": 8.578009892597048e-06, + "loss": 0.4625, + "step": 32354 + }, + { + "epoch": 0.5590786563450373, + "grad_norm": 1.1417966371854795, + "learning_rate": 8.577455932400266e-06, + "loss": 0.4978, + "step": 32355 + }, + { + "epoch": 0.5590959358584462, + "grad_norm": 1.221913103615444, + "learning_rate": 8.576901976658997e-06, + "loss": 0.3727, + "step": 32356 + }, + { + "epoch": 0.5591132153718551, + "grad_norm": 1.3839585565758659, + "learning_rate": 8.576348025374982e-06, + "loss": 0.5009, + "step": 32357 + }, + { + "epoch": 0.559130494885264, + "grad_norm": 0.9871716529587714, + "learning_rate": 8.57579407854995e-06, + "loss": 0.4537, + "step": 32358 + }, + { + "epoch": 0.5591477743986729, + "grad_norm": 1.1429347555639708, + "learning_rate": 8.575240136185645e-06, + "loss": 0.2485, + "step": 32359 + }, + { + "epoch": 0.5591650539120818, + "grad_norm": 0.9810873803008636, + "learning_rate": 8.574686198283792e-06, + "loss": 0.331, + "step": 32360 + }, + { + "epoch": 0.5591823334254907, + "grad_norm": 0.9958794636309414, + "learning_rate": 8.57413226484613e-06, + "loss": 0.4497, + "step": 32361 + }, + { + "epoch": 0.5591996129388996, + "grad_norm": 0.896211986080508, + "learning_rate": 8.573578335874395e-06, + "loss": 0.2762, + "step": 32362 + }, + { + "epoch": 0.5592168924523085, + "grad_norm": 2.6771415434439048, + "learning_rate": 8.57302441137032e-06, + "loss": 0.5701, + "step": 32363 + }, + { + "epoch": 0.5592341719657175, + "grad_norm": 1.2083727751551574, + "learning_rate": 8.572470491335646e-06, + "loss": 0.5097, + "step": 32364 + }, + { + "epoch": 0.5592514514791264, + "grad_norm": 0.9977033785633774, + "learning_rate": 8.571916575772096e-06, + "loss": 0.3951, + "step": 32365 + }, + { + "epoch": 0.5592687309925353, + "grad_norm": 1.0411666143548126, + "learning_rate": 8.571362664681416e-06, + "loss": 0.4336, + "step": 32366 + }, + { + "epoch": 0.5592860105059442, + "grad_norm": 1.0643950453061666, + "learning_rate": 8.570808758065334e-06, + "loss": 0.3568, + "step": 32367 + }, + { + "epoch": 0.5593032900193531, + "grad_norm": 0.9868652745837323, + "learning_rate": 8.570254855925593e-06, + "loss": 0.3542, + "step": 32368 + }, + { + "epoch": 0.559320569532762, + "grad_norm": 1.069397051469373, + "learning_rate": 8.569700958263913e-06, + "loss": 0.4246, + "step": 32369 + }, + { + "epoch": 0.5593378490461709, + "grad_norm": 1.0257657105500295, + "learning_rate": 8.569147065082047e-06, + "loss": 0.5464, + "step": 32370 + }, + { + "epoch": 0.5593551285595798, + "grad_norm": 1.2171054082905133, + "learning_rate": 8.568593176381716e-06, + "loss": 0.5239, + "step": 32371 + }, + { + "epoch": 0.5593724080729887, + "grad_norm": 1.0449128152520832, + "learning_rate": 8.568039292164664e-06, + "loss": 0.3483, + "step": 32372 + }, + { + "epoch": 0.5593896875863976, + "grad_norm": 1.3014987085080798, + "learning_rate": 8.567485412432618e-06, + "loss": 0.3215, + "step": 32373 + }, + { + "epoch": 0.5594069670998064, + "grad_norm": 1.0661135748271307, + "learning_rate": 8.56693153718732e-06, + "loss": 0.3857, + "step": 32374 + }, + { + "epoch": 0.5594242466132153, + "grad_norm": 1.4137556212206173, + "learning_rate": 8.5663776664305e-06, + "loss": 0.4125, + "step": 32375 + }, + { + "epoch": 0.5594415261266242, + "grad_norm": 1.2852181918045809, + "learning_rate": 8.565823800163893e-06, + "loss": 0.4335, + "step": 32376 + }, + { + "epoch": 0.5594588056400331, + "grad_norm": 1.1079918513227704, + "learning_rate": 8.565269938389236e-06, + "loss": 0.4339, + "step": 32377 + }, + { + "epoch": 0.559476085153442, + "grad_norm": 0.9170416035117086, + "learning_rate": 8.56471608110826e-06, + "loss": 0.3992, + "step": 32378 + }, + { + "epoch": 0.559493364666851, + "grad_norm": 1.1588782980077899, + "learning_rate": 8.564162228322708e-06, + "loss": 0.2866, + "step": 32379 + }, + { + "epoch": 0.5595106441802599, + "grad_norm": 0.8660809820053625, + "learning_rate": 8.563608380034302e-06, + "loss": 0.5181, + "step": 32380 + }, + { + "epoch": 0.5595279236936688, + "grad_norm": 0.9600965651058931, + "learning_rate": 8.563054536244789e-06, + "loss": 0.4088, + "step": 32381 + }, + { + "epoch": 0.5595452032070777, + "grad_norm": 1.5956001116633733, + "learning_rate": 8.562500696955895e-06, + "loss": 0.3199, + "step": 32382 + }, + { + "epoch": 0.5595624827204866, + "grad_norm": 0.9314775053484116, + "learning_rate": 8.561946862169361e-06, + "loss": 0.3437, + "step": 32383 + }, + { + "epoch": 0.5595797622338955, + "grad_norm": 1.391464449913409, + "learning_rate": 8.561393031886916e-06, + "loss": 0.4457, + "step": 32384 + }, + { + "epoch": 0.5595970417473044, + "grad_norm": 1.0670193876723606, + "learning_rate": 8.5608392061103e-06, + "loss": 0.412, + "step": 32385 + }, + { + "epoch": 0.5596143212607133, + "grad_norm": 1.0183260842749033, + "learning_rate": 8.560285384841245e-06, + "loss": 0.572, + "step": 32386 + }, + { + "epoch": 0.5596316007741222, + "grad_norm": 1.10885939652049, + "learning_rate": 8.559731568081483e-06, + "loss": 0.4908, + "step": 32387 + }, + { + "epoch": 0.5596488802875311, + "grad_norm": 0.6489640552145993, + "learning_rate": 8.559177755832755e-06, + "loss": 0.2837, + "step": 32388 + }, + { + "epoch": 0.55966615980094, + "grad_norm": 0.9907938865684943, + "learning_rate": 8.558623948096784e-06, + "loss": 0.349, + "step": 32389 + }, + { + "epoch": 0.5596834393143489, + "grad_norm": 1.0369137369595829, + "learning_rate": 8.558070144875321e-06, + "loss": 0.5571, + "step": 32390 + }, + { + "epoch": 0.5597007188277578, + "grad_norm": 1.1237047701385146, + "learning_rate": 8.557516346170086e-06, + "loss": 0.4364, + "step": 32391 + }, + { + "epoch": 0.5597179983411668, + "grad_norm": 1.4219516324812043, + "learning_rate": 8.556962551982824e-06, + "loss": 0.3319, + "step": 32392 + }, + { + "epoch": 0.5597352778545757, + "grad_norm": 0.7552553779097588, + "learning_rate": 8.55640876231526e-06, + "loss": 0.2504, + "step": 32393 + }, + { + "epoch": 0.5597525573679846, + "grad_norm": 0.6700495338245805, + "learning_rate": 8.55585497716914e-06, + "loss": 0.2773, + "step": 32394 + }, + { + "epoch": 0.5597698368813934, + "grad_norm": 0.4143102436906599, + "learning_rate": 8.555301196546188e-06, + "loss": 0.6072, + "step": 32395 + }, + { + "epoch": 0.5597871163948023, + "grad_norm": 0.9783560026416697, + "learning_rate": 8.554747420448142e-06, + "loss": 0.3938, + "step": 32396 + }, + { + "epoch": 0.5598043959082112, + "grad_norm": 1.1401820620107295, + "learning_rate": 8.554193648876738e-06, + "loss": 0.4378, + "step": 32397 + }, + { + "epoch": 0.5598216754216201, + "grad_norm": 0.9907204418789036, + "learning_rate": 8.553639881833706e-06, + "loss": 0.3584, + "step": 32398 + }, + { + "epoch": 0.559838954935029, + "grad_norm": 1.2720021898412728, + "learning_rate": 8.55308611932079e-06, + "loss": 0.5637, + "step": 32399 + }, + { + "epoch": 0.5598562344484379, + "grad_norm": 1.1286624083390302, + "learning_rate": 8.552532361339714e-06, + "loss": 0.501, + "step": 32400 + }, + { + "epoch": 0.5598735139618468, + "grad_norm": 1.2694612973646957, + "learning_rate": 8.55197860789222e-06, + "loss": 0.4314, + "step": 32401 + }, + { + "epoch": 0.5598907934752557, + "grad_norm": 1.0656674976551441, + "learning_rate": 8.551424858980033e-06, + "loss": 0.4216, + "step": 32402 + }, + { + "epoch": 0.5599080729886646, + "grad_norm": 0.8487571742385414, + "learning_rate": 8.550871114604896e-06, + "loss": 0.5589, + "step": 32403 + }, + { + "epoch": 0.5599253525020735, + "grad_norm": 1.3260057707206812, + "learning_rate": 8.550317374768542e-06, + "loss": 0.4369, + "step": 32404 + }, + { + "epoch": 0.5599426320154824, + "grad_norm": 0.8431733480720665, + "learning_rate": 8.549763639472707e-06, + "loss": 0.3592, + "step": 32405 + }, + { + "epoch": 0.5599599115288914, + "grad_norm": 0.7756893709782556, + "learning_rate": 8.54920990871912e-06, + "loss": 0.4854, + "step": 32406 + }, + { + "epoch": 0.5599771910423003, + "grad_norm": 1.0452050580394265, + "learning_rate": 8.548656182509516e-06, + "loss": 0.2852, + "step": 32407 + }, + { + "epoch": 0.5599944705557092, + "grad_norm": 1.2830187314056007, + "learning_rate": 8.548102460845635e-06, + "loss": 0.2938, + "step": 32408 + }, + { + "epoch": 0.5600117500691181, + "grad_norm": 1.218743940409582, + "learning_rate": 8.5475487437292e-06, + "loss": 0.5005, + "step": 32409 + }, + { + "epoch": 0.560029029582527, + "grad_norm": 1.4749914626567484, + "learning_rate": 8.546995031161962e-06, + "loss": 0.3993, + "step": 32410 + }, + { + "epoch": 0.5600463090959359, + "grad_norm": 1.3211608787265776, + "learning_rate": 8.54644132314564e-06, + "loss": 0.4019, + "step": 32411 + }, + { + "epoch": 0.5600635886093448, + "grad_norm": 1.0746905456504503, + "learning_rate": 8.545887619681976e-06, + "loss": 0.3336, + "step": 32412 + }, + { + "epoch": 0.5600808681227537, + "grad_norm": 0.9851012848660081, + "learning_rate": 8.545333920772701e-06, + "loss": 0.2318, + "step": 32413 + }, + { + "epoch": 0.5600981476361626, + "grad_norm": 1.3351058514992298, + "learning_rate": 8.544780226419556e-06, + "loss": 0.3254, + "step": 32414 + }, + { + "epoch": 0.5601154271495715, + "grad_norm": 1.2692236535355408, + "learning_rate": 8.544226536624262e-06, + "loss": 0.4063, + "step": 32415 + }, + { + "epoch": 0.5601327066629803, + "grad_norm": 1.2576433544100516, + "learning_rate": 8.543672851388566e-06, + "loss": 0.5219, + "step": 32416 + }, + { + "epoch": 0.5601499861763892, + "grad_norm": 0.7718052982304321, + "learning_rate": 8.543119170714198e-06, + "loss": 0.3406, + "step": 32417 + }, + { + "epoch": 0.5601672656897981, + "grad_norm": 0.8764513934940829, + "learning_rate": 8.54256549460289e-06, + "loss": 0.4024, + "step": 32418 + }, + { + "epoch": 0.560184545203207, + "grad_norm": 1.301315109079446, + "learning_rate": 8.54201182305638e-06, + "loss": 0.3843, + "step": 32419 + }, + { + "epoch": 0.560201824716616, + "grad_norm": 1.0481292089823073, + "learning_rate": 8.541458156076394e-06, + "loss": 0.6261, + "step": 32420 + }, + { + "epoch": 0.5602191042300249, + "grad_norm": 1.3344931872926526, + "learning_rate": 8.540904493664677e-06, + "loss": 0.5667, + "step": 32421 + }, + { + "epoch": 0.5602363837434338, + "grad_norm": 0.945292322426737, + "learning_rate": 8.540350835822954e-06, + "loss": 0.4187, + "step": 32422 + }, + { + "epoch": 0.5602536632568427, + "grad_norm": 0.9316399269036691, + "learning_rate": 8.539797182552967e-06, + "loss": 0.4682, + "step": 32423 + }, + { + "epoch": 0.5602709427702516, + "grad_norm": 1.231818555592284, + "learning_rate": 8.539243533856442e-06, + "loss": 0.2915, + "step": 32424 + }, + { + "epoch": 0.5602882222836605, + "grad_norm": 0.5933134371122571, + "learning_rate": 8.538689889735124e-06, + "loss": 0.6567, + "step": 32425 + }, + { + "epoch": 0.5603055017970694, + "grad_norm": 0.7906924121133657, + "learning_rate": 8.538136250190736e-06, + "loss": 0.5389, + "step": 32426 + }, + { + "epoch": 0.5603227813104783, + "grad_norm": 1.0446479625923268, + "learning_rate": 8.537582615225014e-06, + "loss": 0.2816, + "step": 32427 + }, + { + "epoch": 0.5603400608238872, + "grad_norm": 0.812569863099863, + "learning_rate": 8.5370289848397e-06, + "loss": 0.3136, + "step": 32428 + }, + { + "epoch": 0.5603573403372961, + "grad_norm": 0.9017718004448139, + "learning_rate": 8.536475359036516e-06, + "loss": 0.5495, + "step": 32429 + }, + { + "epoch": 0.560374619850705, + "grad_norm": 1.2230298775792932, + "learning_rate": 8.535921737817208e-06, + "loss": 0.3685, + "step": 32430 + }, + { + "epoch": 0.5603918993641139, + "grad_norm": 0.7740459867915848, + "learning_rate": 8.5353681211835e-06, + "loss": 0.3143, + "step": 32431 + }, + { + "epoch": 0.5604091788775228, + "grad_norm": 1.150534571197117, + "learning_rate": 8.534814509137134e-06, + "loss": 0.3701, + "step": 32432 + }, + { + "epoch": 0.5604264583909317, + "grad_norm": 0.8829269213156732, + "learning_rate": 8.534260901679836e-06, + "loss": 0.3393, + "step": 32433 + }, + { + "epoch": 0.5604437379043407, + "grad_norm": 0.9708710229428023, + "learning_rate": 8.53370729881335e-06, + "loss": 0.3148, + "step": 32434 + }, + { + "epoch": 0.5604610174177496, + "grad_norm": 1.461582056726497, + "learning_rate": 8.533153700539398e-06, + "loss": 0.3062, + "step": 32435 + }, + { + "epoch": 0.5604782969311585, + "grad_norm": 1.2414397852170302, + "learning_rate": 8.532600106859725e-06, + "loss": 0.4974, + "step": 32436 + }, + { + "epoch": 0.5604955764445673, + "grad_norm": 1.4053495862940215, + "learning_rate": 8.53204651777606e-06, + "loss": 0.4966, + "step": 32437 + }, + { + "epoch": 0.5605128559579762, + "grad_norm": 0.9546992342843014, + "learning_rate": 8.531492933290133e-06, + "loss": 0.4281, + "step": 32438 + }, + { + "epoch": 0.5605301354713851, + "grad_norm": 1.0081034612710456, + "learning_rate": 8.530939353403687e-06, + "loss": 0.3718, + "step": 32439 + }, + { + "epoch": 0.560547414984794, + "grad_norm": 0.982041641739766, + "learning_rate": 8.530385778118444e-06, + "loss": 0.3626, + "step": 32440 + }, + { + "epoch": 0.5605646944982029, + "grad_norm": 1.598551369248853, + "learning_rate": 8.529832207436151e-06, + "loss": 0.5857, + "step": 32441 + }, + { + "epoch": 0.5605819740116118, + "grad_norm": 0.8554083595026523, + "learning_rate": 8.52927864135853e-06, + "loss": 0.3462, + "step": 32442 + }, + { + "epoch": 0.5605992535250207, + "grad_norm": 1.2814388689162803, + "learning_rate": 8.528725079887321e-06, + "loss": 0.4098, + "step": 32443 + }, + { + "epoch": 0.5606165330384296, + "grad_norm": 0.893145018026135, + "learning_rate": 8.528171523024257e-06, + "loss": 0.3911, + "step": 32444 + }, + { + "epoch": 0.5606338125518385, + "grad_norm": 0.8829087049661177, + "learning_rate": 8.527617970771075e-06, + "loss": 0.3918, + "step": 32445 + }, + { + "epoch": 0.5606510920652474, + "grad_norm": 1.4810705835023836, + "learning_rate": 8.527064423129505e-06, + "loss": 0.3365, + "step": 32446 + }, + { + "epoch": 0.5606683715786563, + "grad_norm": 1.1764655575086291, + "learning_rate": 8.526510880101277e-06, + "loss": 0.2556, + "step": 32447 + }, + { + "epoch": 0.5606856510920653, + "grad_norm": 1.5554625794524832, + "learning_rate": 8.525957341688134e-06, + "loss": 0.3875, + "step": 32448 + }, + { + "epoch": 0.5607029306054742, + "grad_norm": 1.2058068343412063, + "learning_rate": 8.525403807891798e-06, + "loss": 0.2741, + "step": 32449 + }, + { + "epoch": 0.5607202101188831, + "grad_norm": 1.1836249635438478, + "learning_rate": 8.524850278714016e-06, + "loss": 0.5579, + "step": 32450 + }, + { + "epoch": 0.560737489632292, + "grad_norm": 1.065909999657665, + "learning_rate": 8.524296754156511e-06, + "loss": 0.411, + "step": 32451 + }, + { + "epoch": 0.5607547691457009, + "grad_norm": 1.3357048751268732, + "learning_rate": 8.523743234221022e-06, + "loss": 0.4247, + "step": 32452 + }, + { + "epoch": 0.5607720486591098, + "grad_norm": 1.3238662543185338, + "learning_rate": 8.52318971890928e-06, + "loss": 0.5617, + "step": 32453 + }, + { + "epoch": 0.5607893281725187, + "grad_norm": 0.9249833317240388, + "learning_rate": 8.522636208223023e-06, + "loss": 0.4628, + "step": 32454 + }, + { + "epoch": 0.5608066076859276, + "grad_norm": 1.1297250799913987, + "learning_rate": 8.522082702163977e-06, + "loss": 0.5237, + "step": 32455 + }, + { + "epoch": 0.5608238871993365, + "grad_norm": 0.8257555639095716, + "learning_rate": 8.521529200733887e-06, + "loss": 0.3075, + "step": 32456 + }, + { + "epoch": 0.5608411667127454, + "grad_norm": 1.1389917491331087, + "learning_rate": 8.520975703934475e-06, + "loss": 0.3111, + "step": 32457 + }, + { + "epoch": 0.5608584462261542, + "grad_norm": 0.8863689250733708, + "learning_rate": 8.520422211767479e-06, + "loss": 0.635, + "step": 32458 + }, + { + "epoch": 0.5608757257395631, + "grad_norm": 0.9838858263451716, + "learning_rate": 8.519868724234639e-06, + "loss": 0.3887, + "step": 32459 + }, + { + "epoch": 0.560893005252972, + "grad_norm": 0.9827117282039335, + "learning_rate": 8.519315241337675e-06, + "loss": 0.5001, + "step": 32460 + }, + { + "epoch": 0.5609102847663809, + "grad_norm": 1.0424222507997178, + "learning_rate": 8.518761763078334e-06, + "loss": 0.2089, + "step": 32461 + }, + { + "epoch": 0.5609275642797898, + "grad_norm": 1.0653003649972814, + "learning_rate": 8.518208289458339e-06, + "loss": 0.4951, + "step": 32462 + }, + { + "epoch": 0.5609448437931988, + "grad_norm": 0.9561161643931279, + "learning_rate": 8.51765482047943e-06, + "loss": 0.263, + "step": 32463 + }, + { + "epoch": 0.5609621233066077, + "grad_norm": 1.2979574589102585, + "learning_rate": 8.517101356143339e-06, + "loss": 0.4938, + "step": 32464 + }, + { + "epoch": 0.5609794028200166, + "grad_norm": 1.4612813439870143, + "learning_rate": 8.516547896451803e-06, + "loss": 0.489, + "step": 32465 + }, + { + "epoch": 0.5609966823334255, + "grad_norm": 1.1871743832192723, + "learning_rate": 8.515994441406546e-06, + "loss": 0.3566, + "step": 32466 + }, + { + "epoch": 0.5610139618468344, + "grad_norm": 1.5416039819670562, + "learning_rate": 8.51544099100931e-06, + "loss": 0.3616, + "step": 32467 + }, + { + "epoch": 0.5610312413602433, + "grad_norm": 1.121023399375288, + "learning_rate": 8.514887545261827e-06, + "loss": 0.6004, + "step": 32468 + }, + { + "epoch": 0.5610485208736522, + "grad_norm": 0.915932843148166, + "learning_rate": 8.514334104165825e-06, + "loss": 0.4624, + "step": 32469 + }, + { + "epoch": 0.5610658003870611, + "grad_norm": 0.48636535146658944, + "learning_rate": 8.513780667723047e-06, + "loss": 0.6009, + "step": 32470 + }, + { + "epoch": 0.56108307990047, + "grad_norm": 0.5848953092003871, + "learning_rate": 8.513227235935216e-06, + "loss": 0.2074, + "step": 32471 + }, + { + "epoch": 0.5611003594138789, + "grad_norm": 1.3661236551751867, + "learning_rate": 8.51267380880407e-06, + "loss": 0.431, + "step": 32472 + }, + { + "epoch": 0.5611176389272878, + "grad_norm": 0.6512931905869757, + "learning_rate": 8.512120386331343e-06, + "loss": 0.3529, + "step": 32473 + }, + { + "epoch": 0.5611349184406967, + "grad_norm": 1.3139121619316507, + "learning_rate": 8.511566968518773e-06, + "loss": 0.4537, + "step": 32474 + }, + { + "epoch": 0.5611521979541056, + "grad_norm": 0.8948767320550826, + "learning_rate": 8.511013555368081e-06, + "loss": 0.2722, + "step": 32475 + }, + { + "epoch": 0.5611694774675146, + "grad_norm": 1.3947946572169854, + "learning_rate": 8.510460146881015e-06, + "loss": 0.5186, + "step": 32476 + }, + { + "epoch": 0.5611867569809235, + "grad_norm": 1.8206946334129364, + "learning_rate": 8.509906743059297e-06, + "loss": 0.5033, + "step": 32477 + }, + { + "epoch": 0.5612040364943324, + "grad_norm": 1.0726431605566038, + "learning_rate": 8.509353343904665e-06, + "loss": 0.3484, + "step": 32478 + }, + { + "epoch": 0.5612213160077412, + "grad_norm": 1.3353653896623654, + "learning_rate": 8.508799949418854e-06, + "loss": 0.4763, + "step": 32479 + }, + { + "epoch": 0.5612385955211501, + "grad_norm": 0.8855127693702621, + "learning_rate": 8.50824655960359e-06, + "loss": 0.4168, + "step": 32480 + }, + { + "epoch": 0.561255875034559, + "grad_norm": 1.3022422278812296, + "learning_rate": 8.507693174460616e-06, + "loss": 0.2578, + "step": 32481 + }, + { + "epoch": 0.5612731545479679, + "grad_norm": 1.2039665973298874, + "learning_rate": 8.507139793991656e-06, + "loss": 0.5413, + "step": 32482 + }, + { + "epoch": 0.5612904340613768, + "grad_norm": 0.7893550196518148, + "learning_rate": 8.50658641819845e-06, + "loss": 0.3124, + "step": 32483 + }, + { + "epoch": 0.5613077135747857, + "grad_norm": 0.8788852335359595, + "learning_rate": 8.506033047082726e-06, + "loss": 0.4647, + "step": 32484 + }, + { + "epoch": 0.5613249930881946, + "grad_norm": 1.3104112311009954, + "learning_rate": 8.505479680646226e-06, + "loss": 0.3415, + "step": 32485 + }, + { + "epoch": 0.5613422726016035, + "grad_norm": 1.0676808275837075, + "learning_rate": 8.504926318890671e-06, + "loss": 0.2659, + "step": 32486 + }, + { + "epoch": 0.5613595521150124, + "grad_norm": 1.2611023987865286, + "learning_rate": 8.504372961817803e-06, + "loss": 0.433, + "step": 32487 + }, + { + "epoch": 0.5613768316284213, + "grad_norm": 1.0805157212150813, + "learning_rate": 8.503819609429357e-06, + "loss": 0.4535, + "step": 32488 + }, + { + "epoch": 0.5613941111418302, + "grad_norm": 1.143023690543521, + "learning_rate": 8.503266261727054e-06, + "loss": 0.4125, + "step": 32489 + }, + { + "epoch": 0.5614113906552392, + "grad_norm": 0.6102331520634965, + "learning_rate": 8.50271291871264e-06, + "loss": 0.6574, + "step": 32490 + }, + { + "epoch": 0.5614286701686481, + "grad_norm": 0.7299585078422454, + "learning_rate": 8.50215958038784e-06, + "loss": 0.4897, + "step": 32491 + }, + { + "epoch": 0.561445949682057, + "grad_norm": 1.615168537944417, + "learning_rate": 8.501606246754392e-06, + "loss": 0.4592, + "step": 32492 + }, + { + "epoch": 0.5614632291954659, + "grad_norm": 0.7642807543964404, + "learning_rate": 8.501052917814023e-06, + "loss": 0.3352, + "step": 32493 + }, + { + "epoch": 0.5614805087088748, + "grad_norm": 0.6082424155915839, + "learning_rate": 8.500499593568477e-06, + "loss": 0.6519, + "step": 32494 + }, + { + "epoch": 0.5614977882222837, + "grad_norm": 0.9119362457439313, + "learning_rate": 8.499946274019472e-06, + "loss": 0.3591, + "step": 32495 + }, + { + "epoch": 0.5615150677356926, + "grad_norm": 1.075478770653751, + "learning_rate": 8.499392959168758e-06, + "loss": 0.2623, + "step": 32496 + }, + { + "epoch": 0.5615323472491015, + "grad_norm": 1.0876914643166908, + "learning_rate": 8.498839649018053e-06, + "loss": 0.3193, + "step": 32497 + }, + { + "epoch": 0.5615496267625104, + "grad_norm": 1.2552093284412438, + "learning_rate": 8.498286343569099e-06, + "loss": 0.4458, + "step": 32498 + }, + { + "epoch": 0.5615669062759193, + "grad_norm": 0.8832602647763549, + "learning_rate": 8.497733042823628e-06, + "loss": 0.5884, + "step": 32499 + }, + { + "epoch": 0.5615841857893282, + "grad_norm": 0.9908271081204697, + "learning_rate": 8.497179746783369e-06, + "loss": 0.4616, + "step": 32500 + }, + { + "epoch": 0.561601465302737, + "grad_norm": 0.9473274553645538, + "learning_rate": 8.496626455450057e-06, + "loss": 0.3594, + "step": 32501 + }, + { + "epoch": 0.5616187448161459, + "grad_norm": 1.5394736888447091, + "learning_rate": 8.496073168825424e-06, + "loss": 0.4903, + "step": 32502 + }, + { + "epoch": 0.5616360243295548, + "grad_norm": 2.203956473189843, + "learning_rate": 8.495519886911206e-06, + "loss": 0.5623, + "step": 32503 + }, + { + "epoch": 0.5616533038429637, + "grad_norm": 1.4119420435387382, + "learning_rate": 8.494966609709133e-06, + "loss": 0.4684, + "step": 32504 + }, + { + "epoch": 0.5616705833563727, + "grad_norm": 1.1691690778213983, + "learning_rate": 8.494413337220942e-06, + "loss": 0.3246, + "step": 32505 + }, + { + "epoch": 0.5616878628697816, + "grad_norm": 1.001250375685793, + "learning_rate": 8.493860069448359e-06, + "loss": 0.5011, + "step": 32506 + }, + { + "epoch": 0.5617051423831905, + "grad_norm": 1.0658046955346436, + "learning_rate": 8.493306806393122e-06, + "loss": 0.3986, + "step": 32507 + }, + { + "epoch": 0.5617224218965994, + "grad_norm": 1.1424055745018318, + "learning_rate": 8.492753548056965e-06, + "loss": 0.4312, + "step": 32508 + }, + { + "epoch": 0.5617397014100083, + "grad_norm": 1.040728442211728, + "learning_rate": 8.492200294441612e-06, + "loss": 0.4875, + "step": 32509 + }, + { + "epoch": 0.5617569809234172, + "grad_norm": 1.3783949944725142, + "learning_rate": 8.491647045548811e-06, + "loss": 0.5595, + "step": 32510 + }, + { + "epoch": 0.5617742604368261, + "grad_norm": 1.2267070844629444, + "learning_rate": 8.491093801380279e-06, + "loss": 0.419, + "step": 32511 + }, + { + "epoch": 0.561791539950235, + "grad_norm": 0.7375335766433463, + "learning_rate": 8.49054056193776e-06, + "loss": 0.6035, + "step": 32512 + }, + { + "epoch": 0.5618088194636439, + "grad_norm": 1.6855481708774245, + "learning_rate": 8.48998732722298e-06, + "loss": 0.6366, + "step": 32513 + }, + { + "epoch": 0.5618260989770528, + "grad_norm": 1.3084052671221316, + "learning_rate": 8.489434097237678e-06, + "loss": 0.5073, + "step": 32514 + }, + { + "epoch": 0.5618433784904617, + "grad_norm": 0.7578380047921809, + "learning_rate": 8.488880871983576e-06, + "loss": 0.368, + "step": 32515 + }, + { + "epoch": 0.5618606580038706, + "grad_norm": 0.8940501051562336, + "learning_rate": 8.488327651462422e-06, + "loss": 0.4682, + "step": 32516 + }, + { + "epoch": 0.5618779375172795, + "grad_norm": 0.8719920567871391, + "learning_rate": 8.487774435675936e-06, + "loss": 0.33, + "step": 32517 + }, + { + "epoch": 0.5618952170306885, + "grad_norm": 1.5287880932235962, + "learning_rate": 8.487221224625856e-06, + "loss": 0.4029, + "step": 32518 + }, + { + "epoch": 0.5619124965440974, + "grad_norm": 1.1745806827034004, + "learning_rate": 8.48666801831392e-06, + "loss": 0.304, + "step": 32519 + }, + { + "epoch": 0.5619297760575063, + "grad_norm": 1.1197367294784946, + "learning_rate": 8.486114816741848e-06, + "loss": 0.4435, + "step": 32520 + }, + { + "epoch": 0.5619470555709152, + "grad_norm": 0.6900626029392603, + "learning_rate": 8.485561619911382e-06, + "loss": 0.8483, + "step": 32521 + }, + { + "epoch": 0.561964335084324, + "grad_norm": 1.079964211576804, + "learning_rate": 8.485008427824249e-06, + "loss": 0.369, + "step": 32522 + }, + { + "epoch": 0.5619816145977329, + "grad_norm": 1.0703071523255552, + "learning_rate": 8.484455240482186e-06, + "loss": 0.616, + "step": 32523 + }, + { + "epoch": 0.5619988941111418, + "grad_norm": 1.1078548388302638, + "learning_rate": 8.483902057886925e-06, + "loss": 0.4225, + "step": 32524 + }, + { + "epoch": 0.5620161736245507, + "grad_norm": 0.8398509666364041, + "learning_rate": 8.4833488800402e-06, + "loss": 0.4191, + "step": 32525 + }, + { + "epoch": 0.5620334531379596, + "grad_norm": 0.551621921443319, + "learning_rate": 8.482795706943738e-06, + "loss": 0.5336, + "step": 32526 + }, + { + "epoch": 0.5620507326513685, + "grad_norm": 0.8770977844652169, + "learning_rate": 8.482242538599279e-06, + "loss": 0.3257, + "step": 32527 + }, + { + "epoch": 0.5620680121647774, + "grad_norm": 0.4388566652890565, + "learning_rate": 8.481689375008549e-06, + "loss": 0.5833, + "step": 32528 + }, + { + "epoch": 0.5620852916781863, + "grad_norm": 1.3206316256253126, + "learning_rate": 8.481136216173284e-06, + "loss": 0.3891, + "step": 32529 + }, + { + "epoch": 0.5621025711915952, + "grad_norm": 1.0947400865620327, + "learning_rate": 8.480583062095218e-06, + "loss": 0.482, + "step": 32530 + }, + { + "epoch": 0.5621198507050041, + "grad_norm": 0.6334189746496356, + "learning_rate": 8.480029912776078e-06, + "loss": 0.5414, + "step": 32531 + }, + { + "epoch": 0.562137130218413, + "grad_norm": 0.6747170312478181, + "learning_rate": 8.4794767682176e-06, + "loss": 0.5933, + "step": 32532 + }, + { + "epoch": 0.562154409731822, + "grad_norm": 1.8218841701045518, + "learning_rate": 8.478923628421517e-06, + "loss": 0.3867, + "step": 32533 + }, + { + "epoch": 0.5621716892452309, + "grad_norm": 1.2071017632963739, + "learning_rate": 8.478370493389563e-06, + "loss": 0.4359, + "step": 32534 + }, + { + "epoch": 0.5621889687586398, + "grad_norm": 0.905573018511715, + "learning_rate": 8.477817363123463e-06, + "loss": 0.3439, + "step": 32535 + }, + { + "epoch": 0.5622062482720487, + "grad_norm": 1.7040604922909017, + "learning_rate": 8.47726423762496e-06, + "loss": 0.2855, + "step": 32536 + }, + { + "epoch": 0.5622235277854576, + "grad_norm": 0.8629350205679348, + "learning_rate": 8.476711116895778e-06, + "loss": 0.4534, + "step": 32537 + }, + { + "epoch": 0.5622408072988665, + "grad_norm": 0.8720510816535026, + "learning_rate": 8.476158000937654e-06, + "loss": 0.5789, + "step": 32538 + }, + { + "epoch": 0.5622580868122754, + "grad_norm": 1.0326529649879677, + "learning_rate": 8.475604889752322e-06, + "loss": 0.5156, + "step": 32539 + }, + { + "epoch": 0.5622753663256843, + "grad_norm": 0.7582819814971677, + "learning_rate": 8.475051783341506e-06, + "loss": 0.3265, + "step": 32540 + }, + { + "epoch": 0.5622926458390932, + "grad_norm": 1.265897779459755, + "learning_rate": 8.474498681706947e-06, + "loss": 0.4268, + "step": 32541 + }, + { + "epoch": 0.5623099253525021, + "grad_norm": 0.7481793434704299, + "learning_rate": 8.47394558485037e-06, + "loss": 0.3127, + "step": 32542 + }, + { + "epoch": 0.5623272048659109, + "grad_norm": 0.6874076342644638, + "learning_rate": 8.473392492773515e-06, + "loss": 0.2358, + "step": 32543 + }, + { + "epoch": 0.5623444843793198, + "grad_norm": 1.2477320513131984, + "learning_rate": 8.472839405478109e-06, + "loss": 0.5078, + "step": 32544 + }, + { + "epoch": 0.5623617638927287, + "grad_norm": 0.6393614102408679, + "learning_rate": 8.472286322965889e-06, + "loss": 0.5782, + "step": 32545 + }, + { + "epoch": 0.5623790434061376, + "grad_norm": 1.011704682452694, + "learning_rate": 8.471733245238582e-06, + "loss": 0.4624, + "step": 32546 + }, + { + "epoch": 0.5623963229195466, + "grad_norm": 0.9486731967157839, + "learning_rate": 8.471180172297923e-06, + "loss": 0.5731, + "step": 32547 + }, + { + "epoch": 0.5624136024329555, + "grad_norm": 1.0477858076957074, + "learning_rate": 8.47062710414564e-06, + "loss": 0.3733, + "step": 32548 + }, + { + "epoch": 0.5624308819463644, + "grad_norm": 1.5322901566612297, + "learning_rate": 8.470074040783475e-06, + "loss": 0.3446, + "step": 32549 + }, + { + "epoch": 0.5624481614597733, + "grad_norm": 0.9468636271759602, + "learning_rate": 8.469520982213155e-06, + "loss": 0.4169, + "step": 32550 + }, + { + "epoch": 0.5624654409731822, + "grad_norm": 1.2459021044964482, + "learning_rate": 8.468967928436408e-06, + "loss": 0.2764, + "step": 32551 + }, + { + "epoch": 0.5624827204865911, + "grad_norm": 1.0461932639909695, + "learning_rate": 8.468414879454971e-06, + "loss": 0.5507, + "step": 32552 + }, + { + "epoch": 0.5625, + "grad_norm": 0.5583813149610024, + "learning_rate": 8.467861835270572e-06, + "loss": 0.7493, + "step": 32553 + }, + { + "epoch": 0.5625172795134089, + "grad_norm": 1.106714118643975, + "learning_rate": 8.467308795884952e-06, + "loss": 0.6046, + "step": 32554 + }, + { + "epoch": 0.5625345590268178, + "grad_norm": 0.6941332319191966, + "learning_rate": 8.46675576129983e-06, + "loss": 0.2253, + "step": 32555 + }, + { + "epoch": 0.5625518385402267, + "grad_norm": 1.1882913284483174, + "learning_rate": 8.466202731516953e-06, + "loss": 0.5365, + "step": 32556 + }, + { + "epoch": 0.5625691180536356, + "grad_norm": 0.9655180003301322, + "learning_rate": 8.46564970653804e-06, + "loss": 0.3792, + "step": 32557 + }, + { + "epoch": 0.5625863975670445, + "grad_norm": 0.9526603174842763, + "learning_rate": 8.465096686364833e-06, + "loss": 0.291, + "step": 32558 + }, + { + "epoch": 0.5626036770804534, + "grad_norm": 0.9721356183991928, + "learning_rate": 8.464543670999056e-06, + "loss": 0.3649, + "step": 32559 + }, + { + "epoch": 0.5626209565938624, + "grad_norm": 0.986488062216145, + "learning_rate": 8.46399066044245e-06, + "loss": 0.2981, + "step": 32560 + }, + { + "epoch": 0.5626382361072713, + "grad_norm": 1.0908842655050621, + "learning_rate": 8.463437654696738e-06, + "loss": 0.5849, + "step": 32561 + }, + { + "epoch": 0.5626555156206802, + "grad_norm": 0.5827123761703462, + "learning_rate": 8.462884653763655e-06, + "loss": 0.7201, + "step": 32562 + }, + { + "epoch": 0.5626727951340891, + "grad_norm": 1.5533466901115043, + "learning_rate": 8.462331657644937e-06, + "loss": 0.3893, + "step": 32563 + }, + { + "epoch": 0.5626900746474979, + "grad_norm": 1.1200395968687882, + "learning_rate": 8.461778666342311e-06, + "loss": 0.3853, + "step": 32564 + }, + { + "epoch": 0.5627073541609068, + "grad_norm": 1.8113232165782287, + "learning_rate": 8.461225679857515e-06, + "loss": 0.3249, + "step": 32565 + }, + { + "epoch": 0.5627246336743157, + "grad_norm": 0.7210728423869704, + "learning_rate": 8.460672698192272e-06, + "loss": 0.54, + "step": 32566 + }, + { + "epoch": 0.5627419131877246, + "grad_norm": 0.97602444105908, + "learning_rate": 8.46011972134832e-06, + "loss": 0.6684, + "step": 32567 + }, + { + "epoch": 0.5627591927011335, + "grad_norm": 0.9458123550225108, + "learning_rate": 8.459566749327392e-06, + "loss": 0.3189, + "step": 32568 + }, + { + "epoch": 0.5627764722145424, + "grad_norm": 1.312044047795259, + "learning_rate": 8.459013782131218e-06, + "loss": 0.6281, + "step": 32569 + }, + { + "epoch": 0.5627937517279513, + "grad_norm": 1.1159314702989263, + "learning_rate": 8.458460819761532e-06, + "loss": 0.419, + "step": 32570 + }, + { + "epoch": 0.5628110312413602, + "grad_norm": 1.322915229892051, + "learning_rate": 8.457907862220058e-06, + "loss": 0.3318, + "step": 32571 + }, + { + "epoch": 0.5628283107547691, + "grad_norm": 0.9462599164461313, + "learning_rate": 8.457354909508538e-06, + "loss": 0.3828, + "step": 32572 + }, + { + "epoch": 0.562845590268178, + "grad_norm": 1.0541131440704448, + "learning_rate": 8.456801961628698e-06, + "loss": 0.4006, + "step": 32573 + }, + { + "epoch": 0.562862869781587, + "grad_norm": 0.8827772161487794, + "learning_rate": 8.456249018582274e-06, + "loss": 0.3682, + "step": 32574 + }, + { + "epoch": 0.5628801492949959, + "grad_norm": 1.07185825357753, + "learning_rate": 8.455696080370989e-06, + "loss": 0.5267, + "step": 32575 + }, + { + "epoch": 0.5628974288084048, + "grad_norm": 0.9180957906217497, + "learning_rate": 8.455143146996589e-06, + "loss": 0.3826, + "step": 32576 + }, + { + "epoch": 0.5629147083218137, + "grad_norm": 0.811943602636429, + "learning_rate": 8.454590218460795e-06, + "loss": 0.5099, + "step": 32577 + }, + { + "epoch": 0.5629319878352226, + "grad_norm": 1.1083681100370228, + "learning_rate": 8.45403729476534e-06, + "loss": 0.1991, + "step": 32578 + }, + { + "epoch": 0.5629492673486315, + "grad_norm": 0.8905503402130168, + "learning_rate": 8.453484375911959e-06, + "loss": 0.3829, + "step": 32579 + }, + { + "epoch": 0.5629665468620404, + "grad_norm": 1.2592970485204087, + "learning_rate": 8.452931461902385e-06, + "loss": 0.4222, + "step": 32580 + }, + { + "epoch": 0.5629838263754493, + "grad_norm": 0.9529384222676468, + "learning_rate": 8.452378552738347e-06, + "loss": 0.5005, + "step": 32581 + }, + { + "epoch": 0.5630011058888582, + "grad_norm": 1.065190399130439, + "learning_rate": 8.451825648421573e-06, + "loss": 0.6451, + "step": 32582 + }, + { + "epoch": 0.5630183854022671, + "grad_norm": 0.9851359470674559, + "learning_rate": 8.451272748953801e-06, + "loss": 0.3378, + "step": 32583 + }, + { + "epoch": 0.563035664915676, + "grad_norm": 0.8801342969908024, + "learning_rate": 8.450719854336759e-06, + "loss": 0.284, + "step": 32584 + }, + { + "epoch": 0.5630529444290848, + "grad_norm": 0.9271413747737327, + "learning_rate": 8.450166964572183e-06, + "loss": 0.2748, + "step": 32585 + }, + { + "epoch": 0.5630702239424937, + "grad_norm": 1.3385449015094883, + "learning_rate": 8.449614079661798e-06, + "loss": 0.4943, + "step": 32586 + }, + { + "epoch": 0.5630875034559026, + "grad_norm": 0.8180902877458994, + "learning_rate": 8.449061199607344e-06, + "loss": 0.484, + "step": 32587 + }, + { + "epoch": 0.5631047829693115, + "grad_norm": 1.3133879217878344, + "learning_rate": 8.448508324410543e-06, + "loss": 0.3669, + "step": 32588 + }, + { + "epoch": 0.5631220624827205, + "grad_norm": 1.5395829403400576, + "learning_rate": 8.447955454073136e-06, + "loss": 0.5585, + "step": 32589 + }, + { + "epoch": 0.5631393419961294, + "grad_norm": 1.048340935654809, + "learning_rate": 8.447402588596848e-06, + "loss": 0.4234, + "step": 32590 + }, + { + "epoch": 0.5631566215095383, + "grad_norm": 0.6662034518321397, + "learning_rate": 8.446849727983418e-06, + "loss": 0.2309, + "step": 32591 + }, + { + "epoch": 0.5631739010229472, + "grad_norm": 1.124056468829972, + "learning_rate": 8.446296872234568e-06, + "loss": 0.3317, + "step": 32592 + }, + { + "epoch": 0.5631911805363561, + "grad_norm": 1.1594212591091793, + "learning_rate": 8.445744021352035e-06, + "loss": 0.3941, + "step": 32593 + }, + { + "epoch": 0.563208460049765, + "grad_norm": 0.7866787136467441, + "learning_rate": 8.445191175337552e-06, + "loss": 0.3185, + "step": 32594 + }, + { + "epoch": 0.5632257395631739, + "grad_norm": 1.2797574115810146, + "learning_rate": 8.444638334192842e-06, + "loss": 0.3774, + "step": 32595 + }, + { + "epoch": 0.5632430190765828, + "grad_norm": 1.3297500949338201, + "learning_rate": 8.444085497919651e-06, + "loss": 0.584, + "step": 32596 + }, + { + "epoch": 0.5632602985899917, + "grad_norm": 1.0713721167522077, + "learning_rate": 8.443532666519698e-06, + "loss": 0.5688, + "step": 32597 + }, + { + "epoch": 0.5632775781034006, + "grad_norm": 1.2197737226169374, + "learning_rate": 8.442979839994721e-06, + "loss": 0.4393, + "step": 32598 + }, + { + "epoch": 0.5632948576168095, + "grad_norm": 0.8932784808507734, + "learning_rate": 8.442427018346447e-06, + "loss": 0.4222, + "step": 32599 + }, + { + "epoch": 0.5633121371302184, + "grad_norm": 1.205703379117936, + "learning_rate": 8.441874201576613e-06, + "loss": 0.4911, + "step": 32600 + }, + { + "epoch": 0.5633294166436273, + "grad_norm": 1.2912936182863055, + "learning_rate": 8.441321389686947e-06, + "loss": 0.4898, + "step": 32601 + }, + { + "epoch": 0.5633466961570363, + "grad_norm": 1.308210582222142, + "learning_rate": 8.440768582679177e-06, + "loss": 0.4889, + "step": 32602 + }, + { + "epoch": 0.5633639756704452, + "grad_norm": 1.0905100019800373, + "learning_rate": 8.440215780555042e-06, + "loss": 0.3177, + "step": 32603 + }, + { + "epoch": 0.5633812551838541, + "grad_norm": 0.9389287504248923, + "learning_rate": 8.439662983316267e-06, + "loss": 0.5222, + "step": 32604 + }, + { + "epoch": 0.563398534697263, + "grad_norm": 1.1573731968708907, + "learning_rate": 8.43911019096459e-06, + "loss": 0.5146, + "step": 32605 + }, + { + "epoch": 0.5634158142106718, + "grad_norm": 1.1078700555159664, + "learning_rate": 8.438557403501735e-06, + "loss": 0.4287, + "step": 32606 + }, + { + "epoch": 0.5634330937240807, + "grad_norm": 0.634595650696916, + "learning_rate": 8.438004620929439e-06, + "loss": 0.8518, + "step": 32607 + }, + { + "epoch": 0.5634503732374896, + "grad_norm": 0.9633089340627901, + "learning_rate": 8.437451843249428e-06, + "loss": 0.5017, + "step": 32608 + }, + { + "epoch": 0.5634676527508985, + "grad_norm": 1.2506792671965072, + "learning_rate": 8.43689907046344e-06, + "loss": 0.4558, + "step": 32609 + }, + { + "epoch": 0.5634849322643074, + "grad_norm": 0.8967659111307975, + "learning_rate": 8.436346302573201e-06, + "loss": 0.4542, + "step": 32610 + }, + { + "epoch": 0.5635022117777163, + "grad_norm": 1.1779302488642676, + "learning_rate": 8.435793539580447e-06, + "loss": 0.3224, + "step": 32611 + }, + { + "epoch": 0.5635194912911252, + "grad_norm": 1.1767357777085095, + "learning_rate": 8.435240781486906e-06, + "loss": 0.6123, + "step": 32612 + }, + { + "epoch": 0.5635367708045341, + "grad_norm": 0.9836748791629958, + "learning_rate": 8.434688028294307e-06, + "loss": 0.2869, + "step": 32613 + }, + { + "epoch": 0.563554050317943, + "grad_norm": 1.248729979820461, + "learning_rate": 8.434135280004388e-06, + "loss": 0.3297, + "step": 32614 + }, + { + "epoch": 0.5635713298313519, + "grad_norm": 0.9503555540901742, + "learning_rate": 8.43358253661887e-06, + "loss": 0.3437, + "step": 32615 + }, + { + "epoch": 0.5635886093447608, + "grad_norm": 1.1225749910902059, + "learning_rate": 8.433029798139496e-06, + "loss": 0.3348, + "step": 32616 + }, + { + "epoch": 0.5636058888581698, + "grad_norm": 0.888553276391638, + "learning_rate": 8.432477064567988e-06, + "loss": 0.488, + "step": 32617 + }, + { + "epoch": 0.5636231683715787, + "grad_norm": 0.5461835758688979, + "learning_rate": 8.431924335906086e-06, + "loss": 0.5635, + "step": 32618 + }, + { + "epoch": 0.5636404478849876, + "grad_norm": 1.0744350658019701, + "learning_rate": 8.43137161215551e-06, + "loss": 0.3543, + "step": 32619 + }, + { + "epoch": 0.5636577273983965, + "grad_norm": 0.8342282211693837, + "learning_rate": 8.430818893318004e-06, + "loss": 0.4447, + "step": 32620 + }, + { + "epoch": 0.5636750069118054, + "grad_norm": 0.7155605804005205, + "learning_rate": 8.430266179395286e-06, + "loss": 0.4814, + "step": 32621 + }, + { + "epoch": 0.5636922864252143, + "grad_norm": 0.4539228792367479, + "learning_rate": 8.429713470389099e-06, + "loss": 0.832, + "step": 32622 + }, + { + "epoch": 0.5637095659386232, + "grad_norm": 0.7715458424499276, + "learning_rate": 8.429160766301166e-06, + "loss": 0.4712, + "step": 32623 + }, + { + "epoch": 0.5637268454520321, + "grad_norm": 1.12479392405258, + "learning_rate": 8.428608067133219e-06, + "loss": 0.535, + "step": 32624 + }, + { + "epoch": 0.563744124965441, + "grad_norm": 1.211701235647617, + "learning_rate": 8.428055372886997e-06, + "loss": 0.5406, + "step": 32625 + }, + { + "epoch": 0.5637614044788499, + "grad_norm": 0.5798081531461924, + "learning_rate": 8.427502683564219e-06, + "loss": 0.6801, + "step": 32626 + }, + { + "epoch": 0.5637786839922588, + "grad_norm": 1.2849365052638675, + "learning_rate": 8.426949999166625e-06, + "loss": 0.5666, + "step": 32627 + }, + { + "epoch": 0.5637959635056676, + "grad_norm": 1.0267558169647368, + "learning_rate": 8.42639731969594e-06, + "loss": 0.4158, + "step": 32628 + }, + { + "epoch": 0.5638132430190765, + "grad_norm": 0.7505077648125916, + "learning_rate": 8.425844645153902e-06, + "loss": 0.3243, + "step": 32629 + }, + { + "epoch": 0.5638305225324854, + "grad_norm": 1.3029278407153986, + "learning_rate": 8.425291975542234e-06, + "loss": 0.3525, + "step": 32630 + }, + { + "epoch": 0.5638478020458944, + "grad_norm": 1.0409830391478831, + "learning_rate": 8.424739310862678e-06, + "loss": 0.3836, + "step": 32631 + }, + { + "epoch": 0.5638650815593033, + "grad_norm": 1.0860671934340618, + "learning_rate": 8.424186651116954e-06, + "loss": 0.4218, + "step": 32632 + }, + { + "epoch": 0.5638823610727122, + "grad_norm": 0.8658776401126937, + "learning_rate": 8.423633996306796e-06, + "loss": 0.4682, + "step": 32633 + }, + { + "epoch": 0.5638996405861211, + "grad_norm": 1.2437935869088113, + "learning_rate": 8.423081346433942e-06, + "loss": 0.3526, + "step": 32634 + }, + { + "epoch": 0.56391692009953, + "grad_norm": 1.5287100096383222, + "learning_rate": 8.422528701500108e-06, + "loss": 0.4806, + "step": 32635 + }, + { + "epoch": 0.5639341996129389, + "grad_norm": 1.02804893316963, + "learning_rate": 8.42197606150704e-06, + "loss": 0.4529, + "step": 32636 + }, + { + "epoch": 0.5639514791263478, + "grad_norm": 0.5128127571086647, + "learning_rate": 8.42142342645646e-06, + "loss": 0.4812, + "step": 32637 + }, + { + "epoch": 0.5639687586397567, + "grad_norm": 1.0488252214007912, + "learning_rate": 8.420870796350105e-06, + "loss": 0.3804, + "step": 32638 + }, + { + "epoch": 0.5639860381531656, + "grad_norm": 1.1529622978144982, + "learning_rate": 8.4203181711897e-06, + "loss": 0.3127, + "step": 32639 + }, + { + "epoch": 0.5640033176665745, + "grad_norm": 1.134663729263743, + "learning_rate": 8.41976555097698e-06, + "loss": 0.352, + "step": 32640 + }, + { + "epoch": 0.5640205971799834, + "grad_norm": 0.8725517412728633, + "learning_rate": 8.419212935713673e-06, + "loss": 0.4164, + "step": 32641 + }, + { + "epoch": 0.5640378766933923, + "grad_norm": 1.109234737028759, + "learning_rate": 8.418660325401513e-06, + "loss": 0.4825, + "step": 32642 + }, + { + "epoch": 0.5640551562068012, + "grad_norm": 1.2773970925173057, + "learning_rate": 8.418107720042228e-06, + "loss": 0.354, + "step": 32643 + }, + { + "epoch": 0.5640724357202102, + "grad_norm": 0.7738607883546632, + "learning_rate": 8.417555119637548e-06, + "loss": 0.3621, + "step": 32644 + }, + { + "epoch": 0.5640897152336191, + "grad_norm": 1.2100411010165624, + "learning_rate": 8.41700252418921e-06, + "loss": 0.5064, + "step": 32645 + }, + { + "epoch": 0.564106994747028, + "grad_norm": 0.812774218851394, + "learning_rate": 8.416449933698938e-06, + "loss": 0.3012, + "step": 32646 + }, + { + "epoch": 0.5641242742604369, + "grad_norm": 0.6481103941161345, + "learning_rate": 8.415897348168465e-06, + "loss": 0.4695, + "step": 32647 + }, + { + "epoch": 0.5641415537738458, + "grad_norm": 0.780974563454647, + "learning_rate": 8.41534476759952e-06, + "loss": 0.3251, + "step": 32648 + }, + { + "epoch": 0.5641588332872546, + "grad_norm": 0.8280635855884785, + "learning_rate": 8.414792191993839e-06, + "loss": 0.2989, + "step": 32649 + }, + { + "epoch": 0.5641761128006635, + "grad_norm": 0.6250910785935143, + "learning_rate": 8.414239621353146e-06, + "loss": 0.3138, + "step": 32650 + }, + { + "epoch": 0.5641933923140724, + "grad_norm": 0.8627276328089123, + "learning_rate": 8.413687055679179e-06, + "loss": 0.4269, + "step": 32651 + }, + { + "epoch": 0.5642106718274813, + "grad_norm": 1.1561043039952867, + "learning_rate": 8.413134494973664e-06, + "loss": 0.3791, + "step": 32652 + }, + { + "epoch": 0.5642279513408902, + "grad_norm": 0.9403277061726576, + "learning_rate": 8.41258193923833e-06, + "loss": 0.5107, + "step": 32653 + }, + { + "epoch": 0.5642452308542991, + "grad_norm": 0.955086854741402, + "learning_rate": 8.412029388474913e-06, + "loss": 0.371, + "step": 32654 + }, + { + "epoch": 0.564262510367708, + "grad_norm": 1.0150943175057299, + "learning_rate": 8.411476842685136e-06, + "loss": 0.5383, + "step": 32655 + }, + { + "epoch": 0.5642797898811169, + "grad_norm": 1.256849745836412, + "learning_rate": 8.410924301870738e-06, + "loss": 0.5393, + "step": 32656 + }, + { + "epoch": 0.5642970693945258, + "grad_norm": 0.6940739985845259, + "learning_rate": 8.410371766033444e-06, + "loss": 0.2304, + "step": 32657 + }, + { + "epoch": 0.5643143489079347, + "grad_norm": 0.8112144827391281, + "learning_rate": 8.409819235174987e-06, + "loss": 0.4864, + "step": 32658 + }, + { + "epoch": 0.5643316284213437, + "grad_norm": 0.7816130554228123, + "learning_rate": 8.409266709297097e-06, + "loss": 0.4896, + "step": 32659 + }, + { + "epoch": 0.5643489079347526, + "grad_norm": 1.0174096598888027, + "learning_rate": 8.408714188401506e-06, + "loss": 0.4528, + "step": 32660 + }, + { + "epoch": 0.5643661874481615, + "grad_norm": 0.7437781220775216, + "learning_rate": 8.40816167248994e-06, + "loss": 0.4981, + "step": 32661 + }, + { + "epoch": 0.5643834669615704, + "grad_norm": 0.848121852080419, + "learning_rate": 8.407609161564137e-06, + "loss": 0.5526, + "step": 32662 + }, + { + "epoch": 0.5644007464749793, + "grad_norm": 1.5362885959402106, + "learning_rate": 8.40705665562582e-06, + "loss": 0.3833, + "step": 32663 + }, + { + "epoch": 0.5644180259883882, + "grad_norm": 1.247971130294368, + "learning_rate": 8.406504154676723e-06, + "loss": 0.4235, + "step": 32664 + }, + { + "epoch": 0.5644353055017971, + "grad_norm": 1.1783946067323965, + "learning_rate": 8.405951658718578e-06, + "loss": 0.3122, + "step": 32665 + }, + { + "epoch": 0.564452585015206, + "grad_norm": 1.2155534617601926, + "learning_rate": 8.40539916775311e-06, + "loss": 0.4771, + "step": 32666 + }, + { + "epoch": 0.5644698645286149, + "grad_norm": 0.9642266639912317, + "learning_rate": 8.404846681782056e-06, + "loss": 0.4279, + "step": 32667 + }, + { + "epoch": 0.5644871440420238, + "grad_norm": 1.0249001345089888, + "learning_rate": 8.40429420080714e-06, + "loss": 0.3838, + "step": 32668 + }, + { + "epoch": 0.5645044235554327, + "grad_norm": 0.8582781043847417, + "learning_rate": 8.403741724830099e-06, + "loss": 0.2852, + "step": 32669 + }, + { + "epoch": 0.5645217030688415, + "grad_norm": 0.954702845820455, + "learning_rate": 8.403189253852658e-06, + "loss": 0.5277, + "step": 32670 + }, + { + "epoch": 0.5645389825822504, + "grad_norm": 1.0127497421597802, + "learning_rate": 8.402636787876554e-06, + "loss": 0.4921, + "step": 32671 + }, + { + "epoch": 0.5645562620956593, + "grad_norm": 1.3953714509203992, + "learning_rate": 8.402084326903508e-06, + "loss": 0.418, + "step": 32672 + }, + { + "epoch": 0.5645735416090683, + "grad_norm": 1.1126977077421372, + "learning_rate": 8.401531870935257e-06, + "loss": 0.5721, + "step": 32673 + }, + { + "epoch": 0.5645908211224772, + "grad_norm": 0.9168204106704037, + "learning_rate": 8.400979419973534e-06, + "loss": 0.3019, + "step": 32674 + }, + { + "epoch": 0.5646081006358861, + "grad_norm": 1.1760125011712828, + "learning_rate": 8.400426974020057e-06, + "loss": 0.5088, + "step": 32675 + }, + { + "epoch": 0.564625380149295, + "grad_norm": 0.978570341518797, + "learning_rate": 8.399874533076571e-06, + "loss": 0.301, + "step": 32676 + }, + { + "epoch": 0.5646426596627039, + "grad_norm": 1.0523623797694999, + "learning_rate": 8.399322097144795e-06, + "loss": 0.5097, + "step": 32677 + }, + { + "epoch": 0.5646599391761128, + "grad_norm": 1.2101025884744838, + "learning_rate": 8.398769666226467e-06, + "loss": 0.4858, + "step": 32678 + }, + { + "epoch": 0.5646772186895217, + "grad_norm": 0.9267006725357189, + "learning_rate": 8.39821724032331e-06, + "loss": 0.4279, + "step": 32679 + }, + { + "epoch": 0.5646944982029306, + "grad_norm": 0.9236699643352086, + "learning_rate": 8.397664819437064e-06, + "loss": 0.313, + "step": 32680 + }, + { + "epoch": 0.5647117777163395, + "grad_norm": 1.0864687289177084, + "learning_rate": 8.397112403569448e-06, + "loss": 0.5089, + "step": 32681 + }, + { + "epoch": 0.5647290572297484, + "grad_norm": 1.309719714622429, + "learning_rate": 8.396559992722202e-06, + "loss": 0.2833, + "step": 32682 + }, + { + "epoch": 0.5647463367431573, + "grad_norm": 1.0719480664924592, + "learning_rate": 8.39600758689705e-06, + "loss": 0.3496, + "step": 32683 + }, + { + "epoch": 0.5647636162565662, + "grad_norm": 0.971227769049798, + "learning_rate": 8.395455186095723e-06, + "loss": 0.4661, + "step": 32684 + }, + { + "epoch": 0.5647808957699751, + "grad_norm": 0.8755037708549248, + "learning_rate": 8.394902790319957e-06, + "loss": 0.4754, + "step": 32685 + }, + { + "epoch": 0.564798175283384, + "grad_norm": 0.9370431061310949, + "learning_rate": 8.394350399571473e-06, + "loss": 0.3179, + "step": 32686 + }, + { + "epoch": 0.564815454796793, + "grad_norm": 1.6975044037291507, + "learning_rate": 8.393798013852006e-06, + "loss": 0.4049, + "step": 32687 + }, + { + "epoch": 0.5648327343102019, + "grad_norm": 1.0634907663811681, + "learning_rate": 8.393245633163285e-06, + "loss": 0.4133, + "step": 32688 + }, + { + "epoch": 0.5648500138236108, + "grad_norm": 0.741632137187204, + "learning_rate": 8.392693257507043e-06, + "loss": 0.8094, + "step": 32689 + }, + { + "epoch": 0.5648672933370197, + "grad_norm": 1.6939630185618604, + "learning_rate": 8.392140886885005e-06, + "loss": 0.5184, + "step": 32690 + }, + { + "epoch": 0.5648845728504285, + "grad_norm": 1.168555924849836, + "learning_rate": 8.39158852129891e-06, + "loss": 0.4404, + "step": 32691 + }, + { + "epoch": 0.5649018523638374, + "grad_norm": 0.5807322766442023, + "learning_rate": 8.391036160750477e-06, + "loss": 0.7794, + "step": 32692 + }, + { + "epoch": 0.5649191318772463, + "grad_norm": 1.150955151042733, + "learning_rate": 8.390483805241442e-06, + "loss": 0.2546, + "step": 32693 + }, + { + "epoch": 0.5649364113906552, + "grad_norm": 0.8087713048750226, + "learning_rate": 8.389931454773537e-06, + "loss": 0.4891, + "step": 32694 + }, + { + "epoch": 0.5649536909040641, + "grad_norm": 1.2083284193449781, + "learning_rate": 8.389379109348481e-06, + "loss": 0.4187, + "step": 32695 + }, + { + "epoch": 0.564970970417473, + "grad_norm": 0.6331290606388242, + "learning_rate": 8.38882676896802e-06, + "loss": 0.6756, + "step": 32696 + }, + { + "epoch": 0.5649882499308819, + "grad_norm": 1.6863972405286674, + "learning_rate": 8.388274433633872e-06, + "loss": 0.3135, + "step": 32697 + }, + { + "epoch": 0.5650055294442908, + "grad_norm": 0.7651298744534701, + "learning_rate": 8.387722103347774e-06, + "loss": 0.4479, + "step": 32698 + }, + { + "epoch": 0.5650228089576997, + "grad_norm": 1.3307537634254665, + "learning_rate": 8.387169778111449e-06, + "loss": 0.3646, + "step": 32699 + }, + { + "epoch": 0.5650400884711086, + "grad_norm": 0.7477890069982048, + "learning_rate": 8.386617457926635e-06, + "loss": 0.4757, + "step": 32700 + }, + { + "epoch": 0.5650573679845176, + "grad_norm": 0.8742623946933975, + "learning_rate": 8.386065142795054e-06, + "loss": 0.5858, + "step": 32701 + }, + { + "epoch": 0.5650746474979265, + "grad_norm": 0.7340825810163372, + "learning_rate": 8.385512832718442e-06, + "loss": 0.8505, + "step": 32702 + }, + { + "epoch": 0.5650919270113354, + "grad_norm": 0.9834062150868631, + "learning_rate": 8.384960527698523e-06, + "loss": 0.3662, + "step": 32703 + }, + { + "epoch": 0.5651092065247443, + "grad_norm": 0.9981697316607729, + "learning_rate": 8.384408227737035e-06, + "loss": 0.4671, + "step": 32704 + }, + { + "epoch": 0.5651264860381532, + "grad_norm": 0.5590411106956801, + "learning_rate": 8.383855932835704e-06, + "loss": 0.4706, + "step": 32705 + }, + { + "epoch": 0.5651437655515621, + "grad_norm": 0.7575046628712689, + "learning_rate": 8.383303642996254e-06, + "loss": 0.4414, + "step": 32706 + }, + { + "epoch": 0.565161045064971, + "grad_norm": 0.4791087475101482, + "learning_rate": 8.382751358220423e-06, + "loss": 0.4576, + "step": 32707 + }, + { + "epoch": 0.5651783245783799, + "grad_norm": 1.065820913579349, + "learning_rate": 8.382199078509935e-06, + "loss": 0.512, + "step": 32708 + }, + { + "epoch": 0.5651956040917888, + "grad_norm": 0.5306259822052596, + "learning_rate": 8.381646803866526e-06, + "loss": 0.6751, + "step": 32709 + }, + { + "epoch": 0.5652128836051977, + "grad_norm": 1.222763775097534, + "learning_rate": 8.381094534291919e-06, + "loss": 0.3321, + "step": 32710 + }, + { + "epoch": 0.5652301631186066, + "grad_norm": 0.778921675247466, + "learning_rate": 8.38054226978785e-06, + "loss": 0.4662, + "step": 32711 + }, + { + "epoch": 0.5652474426320154, + "grad_norm": 1.2662126830608318, + "learning_rate": 8.379990010356042e-06, + "loss": 0.4438, + "step": 32712 + }, + { + "epoch": 0.5652647221454243, + "grad_norm": 0.9301070934365758, + "learning_rate": 8.379437755998231e-06, + "loss": 0.4362, + "step": 32713 + }, + { + "epoch": 0.5652820016588332, + "grad_norm": 0.7886936595980069, + "learning_rate": 8.378885506716147e-06, + "loss": 0.3879, + "step": 32714 + }, + { + "epoch": 0.5652992811722422, + "grad_norm": 1.1393860835068739, + "learning_rate": 8.37833326251151e-06, + "loss": 0.2806, + "step": 32715 + }, + { + "epoch": 0.5653165606856511, + "grad_norm": 1.4173324181290647, + "learning_rate": 8.377781023386062e-06, + "loss": 0.5718, + "step": 32716 + }, + { + "epoch": 0.56533384019906, + "grad_norm": 1.6027035418255788, + "learning_rate": 8.377228789341523e-06, + "loss": 0.4901, + "step": 32717 + }, + { + "epoch": 0.5653511197124689, + "grad_norm": 0.9681769538443845, + "learning_rate": 8.37667656037963e-06, + "loss": 0.4267, + "step": 32718 + }, + { + "epoch": 0.5653683992258778, + "grad_norm": 1.5460020252317035, + "learning_rate": 8.376124336502105e-06, + "loss": 0.356, + "step": 32719 + }, + { + "epoch": 0.5653856787392867, + "grad_norm": 0.7945681008744062, + "learning_rate": 8.37557211771069e-06, + "loss": 0.322, + "step": 32720 + }, + { + "epoch": 0.5654029582526956, + "grad_norm": 0.4506783079838279, + "learning_rate": 8.3750199040071e-06, + "loss": 0.7539, + "step": 32721 + }, + { + "epoch": 0.5654202377661045, + "grad_norm": 1.4802912210388193, + "learning_rate": 8.374467695393073e-06, + "loss": 0.7329, + "step": 32722 + }, + { + "epoch": 0.5654375172795134, + "grad_norm": 1.6958231751656643, + "learning_rate": 8.373915491870334e-06, + "loss": 0.4324, + "step": 32723 + }, + { + "epoch": 0.5654547967929223, + "grad_norm": 1.2503051504775546, + "learning_rate": 8.37336329344062e-06, + "loss": 0.4779, + "step": 32724 + }, + { + "epoch": 0.5654720763063312, + "grad_norm": 1.1809890341977711, + "learning_rate": 8.372811100105656e-06, + "loss": 0.4604, + "step": 32725 + }, + { + "epoch": 0.5654893558197401, + "grad_norm": 0.9150682993286288, + "learning_rate": 8.372258911867168e-06, + "loss": 0.449, + "step": 32726 + }, + { + "epoch": 0.565506635333149, + "grad_norm": 1.055912742128019, + "learning_rate": 8.37170672872689e-06, + "loss": 0.4783, + "step": 32727 + }, + { + "epoch": 0.565523914846558, + "grad_norm": 1.0254303500428907, + "learning_rate": 8.371154550686547e-06, + "loss": 0.3255, + "step": 32728 + }, + { + "epoch": 0.5655411943599669, + "grad_norm": 0.48478825561354444, + "learning_rate": 8.370602377747876e-06, + "loss": 0.5697, + "step": 32729 + }, + { + "epoch": 0.5655584738733758, + "grad_norm": 1.5381967940232348, + "learning_rate": 8.370050209912599e-06, + "loss": 0.446, + "step": 32730 + }, + { + "epoch": 0.5655757533867847, + "grad_norm": 0.8934602598534089, + "learning_rate": 8.369498047182454e-06, + "loss": 0.7653, + "step": 32731 + }, + { + "epoch": 0.5655930329001936, + "grad_norm": 1.0453659688435508, + "learning_rate": 8.36894588955916e-06, + "loss": 0.4104, + "step": 32732 + }, + { + "epoch": 0.5656103124136024, + "grad_norm": 1.2366364380020065, + "learning_rate": 8.368393737044452e-06, + "loss": 0.4331, + "step": 32733 + }, + { + "epoch": 0.5656275919270113, + "grad_norm": 1.6836671579141202, + "learning_rate": 8.367841589640058e-06, + "loss": 0.3139, + "step": 32734 + }, + { + "epoch": 0.5656448714404202, + "grad_norm": 0.9481375777376364, + "learning_rate": 8.367289447347712e-06, + "loss": 0.4555, + "step": 32735 + }, + { + "epoch": 0.5656621509538291, + "grad_norm": 1.080217869978345, + "learning_rate": 8.366737310169139e-06, + "loss": 0.4091, + "step": 32736 + }, + { + "epoch": 0.565679430467238, + "grad_norm": 1.402584696308849, + "learning_rate": 8.366185178106067e-06, + "loss": 0.3382, + "step": 32737 + }, + { + "epoch": 0.5656967099806469, + "grad_norm": 1.3568564273794688, + "learning_rate": 8.365633051160226e-06, + "loss": 0.3816, + "step": 32738 + }, + { + "epoch": 0.5657139894940558, + "grad_norm": 0.9242783155540454, + "learning_rate": 8.365080929333348e-06, + "loss": 0.3869, + "step": 32739 + }, + { + "epoch": 0.5657312690074647, + "grad_norm": 1.088902275819295, + "learning_rate": 8.364528812627164e-06, + "loss": 0.2999, + "step": 32740 + }, + { + "epoch": 0.5657485485208736, + "grad_norm": 0.9194269196759108, + "learning_rate": 8.363976701043394e-06, + "loss": 0.2748, + "step": 32741 + }, + { + "epoch": 0.5657658280342825, + "grad_norm": 0.8950420102559457, + "learning_rate": 8.363424594583777e-06, + "loss": 0.6668, + "step": 32742 + }, + { + "epoch": 0.5657831075476915, + "grad_norm": 1.149536216914574, + "learning_rate": 8.362872493250036e-06, + "loss": 0.4922, + "step": 32743 + }, + { + "epoch": 0.5658003870611004, + "grad_norm": 0.8424913165392912, + "learning_rate": 8.362320397043907e-06, + "loss": 0.489, + "step": 32744 + }, + { + "epoch": 0.5658176665745093, + "grad_norm": 0.8078460204412995, + "learning_rate": 8.361768305967114e-06, + "loss": 0.3532, + "step": 32745 + }, + { + "epoch": 0.5658349460879182, + "grad_norm": 0.5688408804720617, + "learning_rate": 8.361216220021385e-06, + "loss": 0.6022, + "step": 32746 + }, + { + "epoch": 0.5658522256013271, + "grad_norm": 1.4689780119600155, + "learning_rate": 8.360664139208451e-06, + "loss": 0.2671, + "step": 32747 + }, + { + "epoch": 0.565869505114736, + "grad_norm": 1.0781583250208504, + "learning_rate": 8.360112063530042e-06, + "loss": 0.5881, + "step": 32748 + }, + { + "epoch": 0.5658867846281449, + "grad_norm": 1.0142155751341608, + "learning_rate": 8.359559992987886e-06, + "loss": 0.4102, + "step": 32749 + }, + { + "epoch": 0.5659040641415538, + "grad_norm": 1.0995610007015466, + "learning_rate": 8.359007927583714e-06, + "loss": 0.3522, + "step": 32750 + }, + { + "epoch": 0.5659213436549627, + "grad_norm": 1.1473447245985569, + "learning_rate": 8.358455867319256e-06, + "loss": 0.401, + "step": 32751 + }, + { + "epoch": 0.5659386231683716, + "grad_norm": 1.2910725096482505, + "learning_rate": 8.357903812196235e-06, + "loss": 0.4109, + "step": 32752 + }, + { + "epoch": 0.5659559026817805, + "grad_norm": 0.8916394609205546, + "learning_rate": 8.357351762216386e-06, + "loss": 0.516, + "step": 32753 + }, + { + "epoch": 0.5659731821951893, + "grad_norm": 0.8239389227078757, + "learning_rate": 8.356799717381434e-06, + "loss": 0.4358, + "step": 32754 + }, + { + "epoch": 0.5659904617085982, + "grad_norm": 0.45110503958852405, + "learning_rate": 8.356247677693114e-06, + "loss": 0.5348, + "step": 32755 + }, + { + "epoch": 0.5660077412220071, + "grad_norm": 0.9626158223460476, + "learning_rate": 8.355695643153152e-06, + "loss": 0.3836, + "step": 32756 + }, + { + "epoch": 0.566025020735416, + "grad_norm": 1.5757863607506437, + "learning_rate": 8.355143613763272e-06, + "loss": 0.2884, + "step": 32757 + }, + { + "epoch": 0.566042300248825, + "grad_norm": 0.8679483127285089, + "learning_rate": 8.354591589525209e-06, + "loss": 0.3769, + "step": 32758 + }, + { + "epoch": 0.5660595797622339, + "grad_norm": 1.4756205297301832, + "learning_rate": 8.354039570440688e-06, + "loss": 0.403, + "step": 32759 + }, + { + "epoch": 0.5660768592756428, + "grad_norm": 1.2128169602854026, + "learning_rate": 8.353487556511444e-06, + "loss": 0.5249, + "step": 32760 + }, + { + "epoch": 0.5660941387890517, + "grad_norm": 1.3507708856336822, + "learning_rate": 8.352935547739198e-06, + "loss": 0.3482, + "step": 32761 + }, + { + "epoch": 0.5661114183024606, + "grad_norm": 0.9645764661820107, + "learning_rate": 8.352383544125686e-06, + "loss": 0.4037, + "step": 32762 + }, + { + "epoch": 0.5661286978158695, + "grad_norm": 1.1600791150546454, + "learning_rate": 8.351831545672631e-06, + "loss": 0.2416, + "step": 32763 + }, + { + "epoch": 0.5661459773292784, + "grad_norm": 0.9729216968018531, + "learning_rate": 8.351279552381767e-06, + "loss": 0.4532, + "step": 32764 + }, + { + "epoch": 0.5661632568426873, + "grad_norm": 1.3863503148610083, + "learning_rate": 8.350727564254821e-06, + "loss": 0.3314, + "step": 32765 + }, + { + "epoch": 0.5661805363560962, + "grad_norm": 0.7644003042321889, + "learning_rate": 8.350175581293524e-06, + "loss": 0.3609, + "step": 32766 + }, + { + "epoch": 0.5661978158695051, + "grad_norm": 0.8235123733798907, + "learning_rate": 8.349623603499601e-06, + "loss": 0.464, + "step": 32767 + }, + { + "epoch": 0.566215095382914, + "grad_norm": 1.0348565176029934, + "learning_rate": 8.34907163087478e-06, + "loss": 0.3382, + "step": 32768 + }, + { + "epoch": 0.5662323748963229, + "grad_norm": 0.9794116983849107, + "learning_rate": 8.348519663420792e-06, + "loss": 0.5043, + "step": 32769 + }, + { + "epoch": 0.5662496544097319, + "grad_norm": 1.0292654658238505, + "learning_rate": 8.347967701139366e-06, + "loss": 0.397, + "step": 32770 + }, + { + "epoch": 0.5662669339231408, + "grad_norm": 0.9969238441904437, + "learning_rate": 8.347415744032235e-06, + "loss": 0.6562, + "step": 32771 + }, + { + "epoch": 0.5662842134365497, + "grad_norm": 0.6323567349182517, + "learning_rate": 8.34686379210112e-06, + "loss": 0.3151, + "step": 32772 + }, + { + "epoch": 0.5663014929499586, + "grad_norm": 0.636763833444462, + "learning_rate": 8.346311845347752e-06, + "loss": 0.2583, + "step": 32773 + }, + { + "epoch": 0.5663187724633675, + "grad_norm": 0.9165268348604713, + "learning_rate": 8.345759903773862e-06, + "loss": 0.4387, + "step": 32774 + }, + { + "epoch": 0.5663360519767764, + "grad_norm": 1.2234759343503343, + "learning_rate": 8.345207967381177e-06, + "loss": 0.3883, + "step": 32775 + }, + { + "epoch": 0.5663533314901852, + "grad_norm": 0.9277418810354844, + "learning_rate": 8.344656036171432e-06, + "loss": 0.3195, + "step": 32776 + }, + { + "epoch": 0.5663706110035941, + "grad_norm": 2.1697500138344523, + "learning_rate": 8.344104110146345e-06, + "loss": 0.4839, + "step": 32777 + }, + { + "epoch": 0.566387890517003, + "grad_norm": 0.8533500967591471, + "learning_rate": 8.34355218930765e-06, + "loss": 0.633, + "step": 32778 + }, + { + "epoch": 0.5664051700304119, + "grad_norm": 1.308975656967746, + "learning_rate": 8.343000273657073e-06, + "loss": 0.5276, + "step": 32779 + }, + { + "epoch": 0.5664224495438208, + "grad_norm": 0.911529656616669, + "learning_rate": 8.342448363196352e-06, + "loss": 0.2552, + "step": 32780 + }, + { + "epoch": 0.5664397290572297, + "grad_norm": 1.0885593080783378, + "learning_rate": 8.341896457927204e-06, + "loss": 0.3074, + "step": 32781 + }, + { + "epoch": 0.5664570085706386, + "grad_norm": 1.024669200423225, + "learning_rate": 8.341344557851364e-06, + "loss": 0.4469, + "step": 32782 + }, + { + "epoch": 0.5664742880840475, + "grad_norm": 1.2033007245035758, + "learning_rate": 8.340792662970555e-06, + "loss": 0.4024, + "step": 32783 + }, + { + "epoch": 0.5664915675974564, + "grad_norm": 1.1755673248534477, + "learning_rate": 8.340240773286511e-06, + "loss": 0.3728, + "step": 32784 + }, + { + "epoch": 0.5665088471108654, + "grad_norm": 1.4441253900984463, + "learning_rate": 8.339688888800961e-06, + "loss": 0.4448, + "step": 32785 + }, + { + "epoch": 0.5665261266242743, + "grad_norm": 0.7780582088752059, + "learning_rate": 8.339137009515633e-06, + "loss": 0.6054, + "step": 32786 + }, + { + "epoch": 0.5665434061376832, + "grad_norm": 1.8170054725588327, + "learning_rate": 8.338585135432253e-06, + "loss": 0.5025, + "step": 32787 + }, + { + "epoch": 0.5665606856510921, + "grad_norm": 1.226721868262627, + "learning_rate": 8.338033266552548e-06, + "loss": 0.4167, + "step": 32788 + }, + { + "epoch": 0.566577965164501, + "grad_norm": 1.2224229553905472, + "learning_rate": 8.33748140287825e-06, + "loss": 0.6677, + "step": 32789 + }, + { + "epoch": 0.5665952446779099, + "grad_norm": 0.7625777145314125, + "learning_rate": 8.336929544411088e-06, + "loss": 0.25, + "step": 32790 + }, + { + "epoch": 0.5666125241913188, + "grad_norm": 0.889524223628563, + "learning_rate": 8.33637769115279e-06, + "loss": 0.3855, + "step": 32791 + }, + { + "epoch": 0.5666298037047277, + "grad_norm": 0.905061532881394, + "learning_rate": 8.33582584310508e-06, + "loss": 0.3452, + "step": 32792 + }, + { + "epoch": 0.5666470832181366, + "grad_norm": 1.2515713602301772, + "learning_rate": 8.335274000269694e-06, + "loss": 0.2944, + "step": 32793 + }, + { + "epoch": 0.5666643627315455, + "grad_norm": 1.2261597113940201, + "learning_rate": 8.334722162648353e-06, + "loss": 0.4405, + "step": 32794 + }, + { + "epoch": 0.5666816422449544, + "grad_norm": 1.1364360929178081, + "learning_rate": 8.33417033024279e-06, + "loss": 0.4877, + "step": 32795 + }, + { + "epoch": 0.5666989217583633, + "grad_norm": 0.9878367895934653, + "learning_rate": 8.33361850305473e-06, + "loss": 0.4792, + "step": 32796 + }, + { + "epoch": 0.5667162012717721, + "grad_norm": 0.8513847780133256, + "learning_rate": 8.333066681085908e-06, + "loss": 0.3641, + "step": 32797 + }, + { + "epoch": 0.566733480785181, + "grad_norm": 0.6372160862018845, + "learning_rate": 8.332514864338046e-06, + "loss": 0.3073, + "step": 32798 + }, + { + "epoch": 0.56675076029859, + "grad_norm": 0.8717143847023475, + "learning_rate": 8.331963052812873e-06, + "loss": 0.3726, + "step": 32799 + }, + { + "epoch": 0.5667680398119989, + "grad_norm": 1.0870359397191303, + "learning_rate": 8.33141124651212e-06, + "loss": 0.3648, + "step": 32800 + }, + { + "epoch": 0.5667853193254078, + "grad_norm": 1.1272767391943241, + "learning_rate": 8.330859445437513e-06, + "loss": 0.3983, + "step": 32801 + }, + { + "epoch": 0.5668025988388167, + "grad_norm": 0.918010473510037, + "learning_rate": 8.330307649590782e-06, + "loss": 0.4847, + "step": 32802 + }, + { + "epoch": 0.5668198783522256, + "grad_norm": 1.3545251629993893, + "learning_rate": 8.32975585897365e-06, + "loss": 0.4452, + "step": 32803 + }, + { + "epoch": 0.5668371578656345, + "grad_norm": 2.0650187739865173, + "learning_rate": 8.329204073587855e-06, + "loss": 0.2905, + "step": 32804 + }, + { + "epoch": 0.5668544373790434, + "grad_norm": 1.3597710621987333, + "learning_rate": 8.328652293435116e-06, + "loss": 0.5296, + "step": 32805 + }, + { + "epoch": 0.5668717168924523, + "grad_norm": 0.6014493019624576, + "learning_rate": 8.328100518517169e-06, + "loss": 0.6465, + "step": 32806 + }, + { + "epoch": 0.5668889964058612, + "grad_norm": 1.2212664918562917, + "learning_rate": 8.327548748835736e-06, + "loss": 0.412, + "step": 32807 + }, + { + "epoch": 0.5669062759192701, + "grad_norm": 0.5611968388154212, + "learning_rate": 8.326996984392546e-06, + "loss": 0.7551, + "step": 32808 + }, + { + "epoch": 0.566923555432679, + "grad_norm": 0.6480450285365883, + "learning_rate": 8.32644522518933e-06, + "loss": 0.1796, + "step": 32809 + }, + { + "epoch": 0.5669408349460879, + "grad_norm": 0.7489025512779268, + "learning_rate": 8.325893471227814e-06, + "loss": 0.5079, + "step": 32810 + }, + { + "epoch": 0.5669581144594968, + "grad_norm": 0.9812716511363764, + "learning_rate": 8.32534172250973e-06, + "loss": 0.5553, + "step": 32811 + }, + { + "epoch": 0.5669753939729058, + "grad_norm": 1.2835049502246043, + "learning_rate": 8.3247899790368e-06, + "loss": 0.4103, + "step": 32812 + }, + { + "epoch": 0.5669926734863147, + "grad_norm": 1.295957019335567, + "learning_rate": 8.324238240810755e-06, + "loss": 0.4938, + "step": 32813 + }, + { + "epoch": 0.5670099529997236, + "grad_norm": 1.4455565337094949, + "learning_rate": 8.323686507833321e-06, + "loss": 0.3918, + "step": 32814 + }, + { + "epoch": 0.5670272325131325, + "grad_norm": 0.6424105320941356, + "learning_rate": 8.323134780106236e-06, + "loss": 0.6905, + "step": 32815 + }, + { + "epoch": 0.5670445120265414, + "grad_norm": 1.1888062502796855, + "learning_rate": 8.322583057631211e-06, + "loss": 0.4444, + "step": 32816 + }, + { + "epoch": 0.5670617915399503, + "grad_norm": 1.3475023810656237, + "learning_rate": 8.322031340409991e-06, + "loss": 0.3722, + "step": 32817 + }, + { + "epoch": 0.5670790710533591, + "grad_norm": 0.7287574206794797, + "learning_rate": 8.321479628444293e-06, + "loss": 0.4213, + "step": 32818 + }, + { + "epoch": 0.567096350566768, + "grad_norm": 0.9046558336369779, + "learning_rate": 8.320927921735848e-06, + "loss": 0.5516, + "step": 32819 + }, + { + "epoch": 0.5671136300801769, + "grad_norm": 1.2427103323152549, + "learning_rate": 8.320376220286387e-06, + "loss": 0.517, + "step": 32820 + }, + { + "epoch": 0.5671309095935858, + "grad_norm": 0.8258282958669836, + "learning_rate": 8.31982452409763e-06, + "loss": 0.2831, + "step": 32821 + }, + { + "epoch": 0.5671481891069947, + "grad_norm": 0.6145777819527075, + "learning_rate": 8.319272833171314e-06, + "loss": 0.8266, + "step": 32822 + }, + { + "epoch": 0.5671654686204036, + "grad_norm": 0.8695745104065816, + "learning_rate": 8.318721147509163e-06, + "loss": 0.2564, + "step": 32823 + }, + { + "epoch": 0.5671827481338125, + "grad_norm": 0.5571691504506289, + "learning_rate": 8.318169467112905e-06, + "loss": 0.752, + "step": 32824 + }, + { + "epoch": 0.5672000276472214, + "grad_norm": 1.2464694377327692, + "learning_rate": 8.317617791984265e-06, + "loss": 0.4501, + "step": 32825 + }, + { + "epoch": 0.5672173071606303, + "grad_norm": 0.8995515751174381, + "learning_rate": 8.31706612212498e-06, + "loss": 0.3247, + "step": 32826 + }, + { + "epoch": 0.5672345866740393, + "grad_norm": 0.9057093299629005, + "learning_rate": 8.31651445753677e-06, + "loss": 0.6098, + "step": 32827 + }, + { + "epoch": 0.5672518661874482, + "grad_norm": 0.9926439754355965, + "learning_rate": 8.315962798221361e-06, + "loss": 0.4926, + "step": 32828 + }, + { + "epoch": 0.5672691457008571, + "grad_norm": 1.8815161475732023, + "learning_rate": 8.315411144180488e-06, + "loss": 0.3664, + "step": 32829 + }, + { + "epoch": 0.567286425214266, + "grad_norm": 0.8507211309041759, + "learning_rate": 8.314859495415873e-06, + "loss": 0.5715, + "step": 32830 + }, + { + "epoch": 0.5673037047276749, + "grad_norm": 0.9081298094233146, + "learning_rate": 8.314307851929248e-06, + "loss": 0.3744, + "step": 32831 + }, + { + "epoch": 0.5673209842410838, + "grad_norm": 1.1092135333326592, + "learning_rate": 8.313756213722336e-06, + "loss": 0.364, + "step": 32832 + }, + { + "epoch": 0.5673382637544927, + "grad_norm": 0.7407859408158634, + "learning_rate": 8.313204580796871e-06, + "loss": 0.6817, + "step": 32833 + }, + { + "epoch": 0.5673555432679016, + "grad_norm": 0.7876948277370395, + "learning_rate": 8.312652953154576e-06, + "loss": 0.4377, + "step": 32834 + }, + { + "epoch": 0.5673728227813105, + "grad_norm": 0.8922349255384866, + "learning_rate": 8.312101330797182e-06, + "loss": 0.3358, + "step": 32835 + }, + { + "epoch": 0.5673901022947194, + "grad_norm": 0.7862414527822317, + "learning_rate": 8.31154971372641e-06, + "loss": 0.2735, + "step": 32836 + }, + { + "epoch": 0.5674073818081283, + "grad_norm": 1.0669143035329054, + "learning_rate": 8.310998101943998e-06, + "loss": 0.4717, + "step": 32837 + }, + { + "epoch": 0.5674246613215372, + "grad_norm": 0.9999533479691909, + "learning_rate": 8.310446495451667e-06, + "loss": 0.411, + "step": 32838 + }, + { + "epoch": 0.567441940834946, + "grad_norm": 0.5423909788087747, + "learning_rate": 8.309894894251145e-06, + "loss": 0.5324, + "step": 32839 + }, + { + "epoch": 0.5674592203483549, + "grad_norm": 0.9416858977876543, + "learning_rate": 8.309343298344162e-06, + "loss": 0.4231, + "step": 32840 + }, + { + "epoch": 0.5674764998617639, + "grad_norm": 1.213184057801613, + "learning_rate": 8.308791707732443e-06, + "loss": 0.3337, + "step": 32841 + }, + { + "epoch": 0.5674937793751728, + "grad_norm": 1.2494712616813308, + "learning_rate": 8.308240122417717e-06, + "loss": 0.5149, + "step": 32842 + }, + { + "epoch": 0.5675110588885817, + "grad_norm": 0.9913491030013676, + "learning_rate": 8.307688542401709e-06, + "loss": 0.3814, + "step": 32843 + }, + { + "epoch": 0.5675283384019906, + "grad_norm": 1.2520421174266128, + "learning_rate": 8.307136967686154e-06, + "loss": 0.34, + "step": 32844 + }, + { + "epoch": 0.5675456179153995, + "grad_norm": 1.0689164441658427, + "learning_rate": 8.30658539827277e-06, + "loss": 0.4989, + "step": 32845 + }, + { + "epoch": 0.5675628974288084, + "grad_norm": 0.7037947740127808, + "learning_rate": 8.306033834163295e-06, + "loss": 0.2883, + "step": 32846 + }, + { + "epoch": 0.5675801769422173, + "grad_norm": 0.8034950403907349, + "learning_rate": 8.305482275359445e-06, + "loss": 0.3506, + "step": 32847 + }, + { + "epoch": 0.5675974564556262, + "grad_norm": 0.7412838617602359, + "learning_rate": 8.304930721862956e-06, + "loss": 0.4381, + "step": 32848 + }, + { + "epoch": 0.5676147359690351, + "grad_norm": 0.9505595141169859, + "learning_rate": 8.304379173675553e-06, + "loss": 0.4105, + "step": 32849 + }, + { + "epoch": 0.567632015482444, + "grad_norm": 1.205653429119547, + "learning_rate": 8.30382763079896e-06, + "loss": 0.2691, + "step": 32850 + }, + { + "epoch": 0.5676492949958529, + "grad_norm": 0.8066483342247022, + "learning_rate": 8.303276093234914e-06, + "loss": 0.6678, + "step": 32851 + }, + { + "epoch": 0.5676665745092618, + "grad_norm": 1.2922695898553382, + "learning_rate": 8.302724560985132e-06, + "loss": 0.4915, + "step": 32852 + }, + { + "epoch": 0.5676838540226707, + "grad_norm": 1.5593143804836418, + "learning_rate": 8.302173034051346e-06, + "loss": 0.4614, + "step": 32853 + }, + { + "epoch": 0.5677011335360796, + "grad_norm": 0.9433473315012454, + "learning_rate": 8.301621512435281e-06, + "loss": 0.4051, + "step": 32854 + }, + { + "epoch": 0.5677184130494886, + "grad_norm": 0.6393784008939455, + "learning_rate": 8.301069996138674e-06, + "loss": 0.2513, + "step": 32855 + }, + { + "epoch": 0.5677356925628975, + "grad_norm": 1.2731018687235933, + "learning_rate": 8.300518485163236e-06, + "loss": 0.449, + "step": 32856 + }, + { + "epoch": 0.5677529720763064, + "grad_norm": 1.5993801051845604, + "learning_rate": 8.299966979510708e-06, + "loss": 0.3348, + "step": 32857 + }, + { + "epoch": 0.5677702515897153, + "grad_norm": 1.1809693853571055, + "learning_rate": 8.299415479182813e-06, + "loss": 0.4566, + "step": 32858 + }, + { + "epoch": 0.5677875311031242, + "grad_norm": 0.8012286836575304, + "learning_rate": 8.298863984181274e-06, + "loss": 0.2618, + "step": 32859 + }, + { + "epoch": 0.567804810616533, + "grad_norm": 0.8402427974873243, + "learning_rate": 8.298312494507829e-06, + "loss": 0.2702, + "step": 32860 + }, + { + "epoch": 0.5678220901299419, + "grad_norm": 0.7528704150901079, + "learning_rate": 8.297761010164192e-06, + "loss": 0.2396, + "step": 32861 + }, + { + "epoch": 0.5678393696433508, + "grad_norm": 0.8865981035142037, + "learning_rate": 8.2972095311521e-06, + "loss": 0.3841, + "step": 32862 + }, + { + "epoch": 0.5678566491567597, + "grad_norm": 0.9194030945282492, + "learning_rate": 8.296658057473274e-06, + "loss": 0.4774, + "step": 32863 + }, + { + "epoch": 0.5678739286701686, + "grad_norm": 0.4981698633268597, + "learning_rate": 8.29610658912945e-06, + "loss": 0.5862, + "step": 32864 + }, + { + "epoch": 0.5678912081835775, + "grad_norm": 0.7760400929304335, + "learning_rate": 8.295555126122343e-06, + "loss": 0.2737, + "step": 32865 + }, + { + "epoch": 0.5679084876969864, + "grad_norm": 1.0126639237623247, + "learning_rate": 8.295003668453693e-06, + "loss": 0.3434, + "step": 32866 + }, + { + "epoch": 0.5679257672103953, + "grad_norm": 1.1665680980838635, + "learning_rate": 8.294452216125217e-06, + "loss": 0.5655, + "step": 32867 + }, + { + "epoch": 0.5679430467238042, + "grad_norm": 1.1082239793397048, + "learning_rate": 8.29390076913865e-06, + "loss": 0.53, + "step": 32868 + }, + { + "epoch": 0.5679603262372132, + "grad_norm": 1.1223763878395678, + "learning_rate": 8.293349327495714e-06, + "loss": 0.5548, + "step": 32869 + }, + { + "epoch": 0.5679776057506221, + "grad_norm": 0.7519493799917567, + "learning_rate": 8.292797891198134e-06, + "loss": 0.2809, + "step": 32870 + }, + { + "epoch": 0.567994885264031, + "grad_norm": 1.106878797372798, + "learning_rate": 8.292246460247647e-06, + "loss": 0.38, + "step": 32871 + }, + { + "epoch": 0.5680121647774399, + "grad_norm": 1.1447075453752429, + "learning_rate": 8.291695034645969e-06, + "loss": 0.2703, + "step": 32872 + }, + { + "epoch": 0.5680294442908488, + "grad_norm": 1.2098964407154396, + "learning_rate": 8.291143614394834e-06, + "loss": 0.523, + "step": 32873 + }, + { + "epoch": 0.5680467238042577, + "grad_norm": 0.6084035465765754, + "learning_rate": 8.290592199495964e-06, + "loss": 0.6367, + "step": 32874 + }, + { + "epoch": 0.5680640033176666, + "grad_norm": 1.297294784361085, + "learning_rate": 8.290040789951095e-06, + "loss": 0.3431, + "step": 32875 + }, + { + "epoch": 0.5680812828310755, + "grad_norm": 1.541262448327058, + "learning_rate": 8.289489385761942e-06, + "loss": 0.6854, + "step": 32876 + }, + { + "epoch": 0.5680985623444844, + "grad_norm": 0.8247473587438515, + "learning_rate": 8.288937986930243e-06, + "loss": 0.3342, + "step": 32877 + }, + { + "epoch": 0.5681158418578933, + "grad_norm": 0.956214738082543, + "learning_rate": 8.288386593457717e-06, + "loss": 0.4448, + "step": 32878 + }, + { + "epoch": 0.5681331213713022, + "grad_norm": 1.34860258818411, + "learning_rate": 8.287835205346097e-06, + "loss": 0.3384, + "step": 32879 + }, + { + "epoch": 0.5681504008847111, + "grad_norm": 1.2288186511287174, + "learning_rate": 8.287283822597107e-06, + "loss": 0.5097, + "step": 32880 + }, + { + "epoch": 0.5681676803981199, + "grad_norm": 1.3582094032805123, + "learning_rate": 8.286732445212473e-06, + "loss": 0.5054, + "step": 32881 + }, + { + "epoch": 0.5681849599115288, + "grad_norm": 0.8122816742719993, + "learning_rate": 8.286181073193925e-06, + "loss": 0.4132, + "step": 32882 + }, + { + "epoch": 0.5682022394249377, + "grad_norm": 0.9307882937194182, + "learning_rate": 8.285629706543183e-06, + "loss": 0.3962, + "step": 32883 + }, + { + "epoch": 0.5682195189383467, + "grad_norm": 0.5362961628602183, + "learning_rate": 8.285078345261982e-06, + "loss": 0.4565, + "step": 32884 + }, + { + "epoch": 0.5682367984517556, + "grad_norm": 1.0183500918124726, + "learning_rate": 8.284526989352046e-06, + "loss": 0.3448, + "step": 32885 + }, + { + "epoch": 0.5682540779651645, + "grad_norm": 1.7396919837469673, + "learning_rate": 8.283975638815105e-06, + "loss": 0.4275, + "step": 32886 + }, + { + "epoch": 0.5682713574785734, + "grad_norm": 1.1692166468348297, + "learning_rate": 8.283424293652879e-06, + "loss": 0.5929, + "step": 32887 + }, + { + "epoch": 0.5682886369919823, + "grad_norm": 1.5179815976828792, + "learning_rate": 8.2828729538671e-06, + "loss": 0.4325, + "step": 32888 + }, + { + "epoch": 0.5683059165053912, + "grad_norm": 0.8256947073162679, + "learning_rate": 8.282321619459493e-06, + "loss": 0.4927, + "step": 32889 + }, + { + "epoch": 0.5683231960188001, + "grad_norm": 0.9017428019452627, + "learning_rate": 8.281770290431784e-06, + "loss": 0.2902, + "step": 32890 + }, + { + "epoch": 0.568340475532209, + "grad_norm": 0.7537318634681106, + "learning_rate": 8.281218966785705e-06, + "loss": 0.202, + "step": 32891 + }, + { + "epoch": 0.5683577550456179, + "grad_norm": 1.061072798436875, + "learning_rate": 8.280667648522975e-06, + "loss": 0.484, + "step": 32892 + }, + { + "epoch": 0.5683750345590268, + "grad_norm": 0.993117070834738, + "learning_rate": 8.280116335645326e-06, + "loss": 0.3928, + "step": 32893 + }, + { + "epoch": 0.5683923140724357, + "grad_norm": 0.7974879312392801, + "learning_rate": 8.279565028154481e-06, + "loss": 0.3895, + "step": 32894 + }, + { + "epoch": 0.5684095935858446, + "grad_norm": 1.0910392964375963, + "learning_rate": 8.279013726052174e-06, + "loss": 0.3559, + "step": 32895 + }, + { + "epoch": 0.5684268730992535, + "grad_norm": 0.6994027883207503, + "learning_rate": 8.27846242934012e-06, + "loss": 0.4877, + "step": 32896 + }, + { + "epoch": 0.5684441526126625, + "grad_norm": 1.4789415100825407, + "learning_rate": 8.277911138020059e-06, + "loss": 0.45, + "step": 32897 + }, + { + "epoch": 0.5684614321260714, + "grad_norm": 1.2902772273743404, + "learning_rate": 8.277359852093707e-06, + "loss": 0.6085, + "step": 32898 + }, + { + "epoch": 0.5684787116394803, + "grad_norm": 0.7906163163819797, + "learning_rate": 8.276808571562798e-06, + "loss": 0.2625, + "step": 32899 + }, + { + "epoch": 0.5684959911528892, + "grad_norm": 1.3418978443879848, + "learning_rate": 8.276257296429056e-06, + "loss": 0.4422, + "step": 32900 + }, + { + "epoch": 0.5685132706662981, + "grad_norm": 1.1535235923181026, + "learning_rate": 8.275706026694203e-06, + "loss": 0.4343, + "step": 32901 + }, + { + "epoch": 0.568530550179707, + "grad_norm": 1.3576570962362484, + "learning_rate": 8.275154762359974e-06, + "loss": 0.3184, + "step": 32902 + }, + { + "epoch": 0.5685478296931158, + "grad_norm": 2.3599768547282376, + "learning_rate": 8.274603503428086e-06, + "loss": 0.7168, + "step": 32903 + }, + { + "epoch": 0.5685651092065247, + "grad_norm": 0.8348713454185241, + "learning_rate": 8.274052249900276e-06, + "loss": 0.3719, + "step": 32904 + }, + { + "epoch": 0.5685823887199336, + "grad_norm": 0.8566094546557502, + "learning_rate": 8.273501001778262e-06, + "loss": 0.2399, + "step": 32905 + }, + { + "epoch": 0.5685996682333425, + "grad_norm": 0.5687368594321012, + "learning_rate": 8.272949759063777e-06, + "loss": 0.337, + "step": 32906 + }, + { + "epoch": 0.5686169477467514, + "grad_norm": 1.235608189683864, + "learning_rate": 8.272398521758542e-06, + "loss": 0.2353, + "step": 32907 + }, + { + "epoch": 0.5686342272601603, + "grad_norm": 0.9364743943924813, + "learning_rate": 8.271847289864288e-06, + "loss": 0.3411, + "step": 32908 + }, + { + "epoch": 0.5686515067735692, + "grad_norm": 1.0340793538773723, + "learning_rate": 8.27129606338274e-06, + "loss": 0.4491, + "step": 32909 + }, + { + "epoch": 0.5686687862869781, + "grad_norm": 1.1819492495202417, + "learning_rate": 8.270744842315622e-06, + "loss": 0.5171, + "step": 32910 + }, + { + "epoch": 0.568686065800387, + "grad_norm": 0.9594391597188582, + "learning_rate": 8.270193626664666e-06, + "loss": 0.5842, + "step": 32911 + }, + { + "epoch": 0.568703345313796, + "grad_norm": 1.2001902573956258, + "learning_rate": 8.269642416431593e-06, + "loss": 0.384, + "step": 32912 + }, + { + "epoch": 0.5687206248272049, + "grad_norm": 0.8246427189430992, + "learning_rate": 8.269091211618131e-06, + "loss": 0.6801, + "step": 32913 + }, + { + "epoch": 0.5687379043406138, + "grad_norm": 1.628169318428115, + "learning_rate": 8.268540012226006e-06, + "loss": 0.5149, + "step": 32914 + }, + { + "epoch": 0.5687551838540227, + "grad_norm": 0.9637511935181681, + "learning_rate": 8.26798881825695e-06, + "loss": 0.2911, + "step": 32915 + }, + { + "epoch": 0.5687724633674316, + "grad_norm": 0.8526269967972462, + "learning_rate": 8.267437629712676e-06, + "loss": 0.3128, + "step": 32916 + }, + { + "epoch": 0.5687897428808405, + "grad_norm": 1.1914909412096237, + "learning_rate": 8.266886446594927e-06, + "loss": 0.4013, + "step": 32917 + }, + { + "epoch": 0.5688070223942494, + "grad_norm": 0.7439530601456283, + "learning_rate": 8.266335268905417e-06, + "loss": 0.4349, + "step": 32918 + }, + { + "epoch": 0.5688243019076583, + "grad_norm": 1.2121101707945923, + "learning_rate": 8.26578409664588e-06, + "loss": 0.528, + "step": 32919 + }, + { + "epoch": 0.5688415814210672, + "grad_norm": 1.0372513955059306, + "learning_rate": 8.265232929818039e-06, + "loss": 0.3739, + "step": 32920 + }, + { + "epoch": 0.5688588609344761, + "grad_norm": 0.9130365282580671, + "learning_rate": 8.264681768423616e-06, + "loss": 0.4258, + "step": 32921 + }, + { + "epoch": 0.568876140447885, + "grad_norm": 0.47971085990228435, + "learning_rate": 8.264130612464345e-06, + "loss": 0.404, + "step": 32922 + }, + { + "epoch": 0.568893419961294, + "grad_norm": 0.7441955779136071, + "learning_rate": 8.263579461941946e-06, + "loss": 0.3859, + "step": 32923 + }, + { + "epoch": 0.5689106994747027, + "grad_norm": 1.1732680987525832, + "learning_rate": 8.263028316858151e-06, + "loss": 0.3456, + "step": 32924 + }, + { + "epoch": 0.5689279789881116, + "grad_norm": 0.6922760658945813, + "learning_rate": 8.262477177214681e-06, + "loss": 0.4196, + "step": 32925 + }, + { + "epoch": 0.5689452585015206, + "grad_norm": 1.5337769383923165, + "learning_rate": 8.261926043013267e-06, + "loss": 0.5496, + "step": 32926 + }, + { + "epoch": 0.5689625380149295, + "grad_norm": 1.2087596242454979, + "learning_rate": 8.26137491425563e-06, + "loss": 0.4644, + "step": 32927 + }, + { + "epoch": 0.5689798175283384, + "grad_norm": 1.0811147871273334, + "learning_rate": 8.260823790943501e-06, + "loss": 0.3898, + "step": 32928 + }, + { + "epoch": 0.5689970970417473, + "grad_norm": 0.9985976081375023, + "learning_rate": 8.260272673078602e-06, + "loss": 0.3706, + "step": 32929 + }, + { + "epoch": 0.5690143765551562, + "grad_norm": 0.9143455386311149, + "learning_rate": 8.259721560662664e-06, + "loss": 0.3638, + "step": 32930 + }, + { + "epoch": 0.5690316560685651, + "grad_norm": 1.0139545901298905, + "learning_rate": 8.259170453697411e-06, + "loss": 0.3967, + "step": 32931 + }, + { + "epoch": 0.569048935581974, + "grad_norm": 0.8734627944097041, + "learning_rate": 8.258619352184564e-06, + "loss": 0.3391, + "step": 32932 + }, + { + "epoch": 0.5690662150953829, + "grad_norm": 1.2673712302151445, + "learning_rate": 8.258068256125857e-06, + "loss": 0.4156, + "step": 32933 + }, + { + "epoch": 0.5690834946087918, + "grad_norm": 0.9366307403001224, + "learning_rate": 8.257517165523011e-06, + "loss": 0.4387, + "step": 32934 + }, + { + "epoch": 0.5691007741222007, + "grad_norm": 0.7206980759561027, + "learning_rate": 8.256966080377758e-06, + "loss": 0.3065, + "step": 32935 + }, + { + "epoch": 0.5691180536356096, + "grad_norm": 0.9799538667782974, + "learning_rate": 8.256415000691811e-06, + "loss": 0.4217, + "step": 32936 + }, + { + "epoch": 0.5691353331490185, + "grad_norm": 0.7116280238563015, + "learning_rate": 8.255863926466913e-06, + "loss": 0.3603, + "step": 32937 + }, + { + "epoch": 0.5691526126624274, + "grad_norm": 1.16540133883028, + "learning_rate": 8.255312857704776e-06, + "loss": 0.3305, + "step": 32938 + }, + { + "epoch": 0.5691698921758364, + "grad_norm": 1.4160353503008811, + "learning_rate": 8.254761794407135e-06, + "loss": 0.2663, + "step": 32939 + }, + { + "epoch": 0.5691871716892453, + "grad_norm": 0.8944485955306023, + "learning_rate": 8.25421073657571e-06, + "loss": 0.3849, + "step": 32940 + }, + { + "epoch": 0.5692044512026542, + "grad_norm": 1.203098899524191, + "learning_rate": 8.253659684212234e-06, + "loss": 0.45, + "step": 32941 + }, + { + "epoch": 0.5692217307160631, + "grad_norm": 1.0362181927220042, + "learning_rate": 8.253108637318427e-06, + "loss": 0.6325, + "step": 32942 + }, + { + "epoch": 0.569239010229472, + "grad_norm": 0.9852527569547099, + "learning_rate": 8.252557595896013e-06, + "loss": 0.3959, + "step": 32943 + }, + { + "epoch": 0.5692562897428809, + "grad_norm": 1.3155212219762082, + "learning_rate": 8.252006559946725e-06, + "loss": 0.4707, + "step": 32944 + }, + { + "epoch": 0.5692735692562897, + "grad_norm": 1.3629677090496848, + "learning_rate": 8.251455529472283e-06, + "loss": 0.4884, + "step": 32945 + }, + { + "epoch": 0.5692908487696986, + "grad_norm": 1.4495353409877447, + "learning_rate": 8.25090450447442e-06, + "loss": 0.748, + "step": 32946 + }, + { + "epoch": 0.5693081282831075, + "grad_norm": 1.087969994398824, + "learning_rate": 8.250353484954851e-06, + "loss": 0.4518, + "step": 32947 + }, + { + "epoch": 0.5693254077965164, + "grad_norm": 0.9660521343269151, + "learning_rate": 8.249802470915311e-06, + "loss": 0.3714, + "step": 32948 + }, + { + "epoch": 0.5693426873099253, + "grad_norm": 1.5657826509574628, + "learning_rate": 8.24925146235752e-06, + "loss": 0.4221, + "step": 32949 + }, + { + "epoch": 0.5693599668233342, + "grad_norm": 1.071681049800412, + "learning_rate": 8.24870045928321e-06, + "loss": 0.3829, + "step": 32950 + }, + { + "epoch": 0.5693772463367431, + "grad_norm": 0.9094706257284942, + "learning_rate": 8.248149461694104e-06, + "loss": 0.2351, + "step": 32951 + }, + { + "epoch": 0.569394525850152, + "grad_norm": 0.7365362038411198, + "learning_rate": 8.247598469591923e-06, + "loss": 0.5045, + "step": 32952 + }, + { + "epoch": 0.569411805363561, + "grad_norm": 0.7277736435853585, + "learning_rate": 8.2470474829784e-06, + "loss": 0.3605, + "step": 32953 + }, + { + "epoch": 0.5694290848769699, + "grad_norm": 1.039487921786708, + "learning_rate": 8.246496501855253e-06, + "loss": 0.2992, + "step": 32954 + }, + { + "epoch": 0.5694463643903788, + "grad_norm": 0.8623885843955227, + "learning_rate": 8.245945526224218e-06, + "loss": 0.3658, + "step": 32955 + }, + { + "epoch": 0.5694636439037877, + "grad_norm": 0.5364310036067436, + "learning_rate": 8.245394556087009e-06, + "loss": 0.5595, + "step": 32956 + }, + { + "epoch": 0.5694809234171966, + "grad_norm": 1.1526071185946092, + "learning_rate": 8.244843591445364e-06, + "loss": 0.2943, + "step": 32957 + }, + { + "epoch": 0.5694982029306055, + "grad_norm": 1.0603717623194053, + "learning_rate": 8.244292632300996e-06, + "loss": 0.4959, + "step": 32958 + }, + { + "epoch": 0.5695154824440144, + "grad_norm": 1.2481231106624846, + "learning_rate": 8.243741678655641e-06, + "loss": 0.3432, + "step": 32959 + }, + { + "epoch": 0.5695327619574233, + "grad_norm": 0.9872612613298856, + "learning_rate": 8.243190730511018e-06, + "loss": 0.2273, + "step": 32960 + }, + { + "epoch": 0.5695500414708322, + "grad_norm": 0.562380498647636, + "learning_rate": 8.24263978786886e-06, + "loss": 0.2181, + "step": 32961 + }, + { + "epoch": 0.5695673209842411, + "grad_norm": 1.0267711139630797, + "learning_rate": 8.242088850730885e-06, + "loss": 0.4587, + "step": 32962 + }, + { + "epoch": 0.56958460049765, + "grad_norm": 1.3610670923458017, + "learning_rate": 8.241537919098819e-06, + "loss": 0.2771, + "step": 32963 + }, + { + "epoch": 0.5696018800110589, + "grad_norm": 1.1397260655396666, + "learning_rate": 8.240986992974391e-06, + "loss": 0.5354, + "step": 32964 + }, + { + "epoch": 0.5696191595244678, + "grad_norm": 0.6796039596765322, + "learning_rate": 8.240436072359324e-06, + "loss": 0.6554, + "step": 32965 + }, + { + "epoch": 0.5696364390378766, + "grad_norm": 1.1446532720319207, + "learning_rate": 8.23988515725535e-06, + "loss": 0.4635, + "step": 32966 + }, + { + "epoch": 0.5696537185512855, + "grad_norm": 0.8772807644594868, + "learning_rate": 8.239334247664184e-06, + "loss": 0.4873, + "step": 32967 + }, + { + "epoch": 0.5696709980646945, + "grad_norm": 0.8029875828523363, + "learning_rate": 8.238783343587561e-06, + "loss": 0.4272, + "step": 32968 + }, + { + "epoch": 0.5696882775781034, + "grad_norm": 0.9367796640226181, + "learning_rate": 8.238232445027198e-06, + "loss": 0.4504, + "step": 32969 + }, + { + "epoch": 0.5697055570915123, + "grad_norm": 0.9670504572269155, + "learning_rate": 8.237681551984828e-06, + "loss": 0.318, + "step": 32970 + }, + { + "epoch": 0.5697228366049212, + "grad_norm": 0.7008541976995042, + "learning_rate": 8.237130664462171e-06, + "loss": 0.3357, + "step": 32971 + }, + { + "epoch": 0.5697401161183301, + "grad_norm": 0.5584712101245474, + "learning_rate": 8.23657978246096e-06, + "loss": 0.6791, + "step": 32972 + }, + { + "epoch": 0.569757395631739, + "grad_norm": 0.9608042225554755, + "learning_rate": 8.236028905982914e-06, + "loss": 0.4403, + "step": 32973 + }, + { + "epoch": 0.5697746751451479, + "grad_norm": 0.9576065799375035, + "learning_rate": 8.235478035029755e-06, + "loss": 0.4679, + "step": 32974 + }, + { + "epoch": 0.5697919546585568, + "grad_norm": 0.4781176724422413, + "learning_rate": 8.234927169603217e-06, + "loss": 0.5827, + "step": 32975 + }, + { + "epoch": 0.5698092341719657, + "grad_norm": 1.1562684034306987, + "learning_rate": 8.234376309705015e-06, + "loss": 0.6021, + "step": 32976 + }, + { + "epoch": 0.5698265136853746, + "grad_norm": 1.2022521519648446, + "learning_rate": 8.233825455336888e-06, + "loss": 0.5116, + "step": 32977 + }, + { + "epoch": 0.5698437931987835, + "grad_norm": 1.1361985805397525, + "learning_rate": 8.233274606500549e-06, + "loss": 0.3533, + "step": 32978 + }, + { + "epoch": 0.5698610727121924, + "grad_norm": 0.9718877888408446, + "learning_rate": 8.232723763197732e-06, + "loss": 0.3783, + "step": 32979 + }, + { + "epoch": 0.5698783522256013, + "grad_norm": 1.181634005332681, + "learning_rate": 8.232172925430153e-06, + "loss": 0.3536, + "step": 32980 + }, + { + "epoch": 0.5698956317390103, + "grad_norm": 2.112411216425546, + "learning_rate": 8.231622093199551e-06, + "loss": 0.4086, + "step": 32981 + }, + { + "epoch": 0.5699129112524192, + "grad_norm": 1.310590117236884, + "learning_rate": 8.231071266507638e-06, + "loss": 0.3356, + "step": 32982 + }, + { + "epoch": 0.5699301907658281, + "grad_norm": 0.870658077196771, + "learning_rate": 8.230520445356142e-06, + "loss": 0.4549, + "step": 32983 + }, + { + "epoch": 0.569947470279237, + "grad_norm": 1.289663666479424, + "learning_rate": 8.229969629746795e-06, + "loss": 0.3111, + "step": 32984 + }, + { + "epoch": 0.5699647497926459, + "grad_norm": 0.9381830232025037, + "learning_rate": 8.229418819681314e-06, + "loss": 0.6682, + "step": 32985 + }, + { + "epoch": 0.5699820293060548, + "grad_norm": 0.7402695808376833, + "learning_rate": 8.228868015161432e-06, + "loss": 0.22, + "step": 32986 + }, + { + "epoch": 0.5699993088194636, + "grad_norm": 1.3733315479019292, + "learning_rate": 8.228317216188867e-06, + "loss": 0.2992, + "step": 32987 + }, + { + "epoch": 0.5700165883328725, + "grad_norm": 1.4820709947969435, + "learning_rate": 8.227766422765347e-06, + "loss": 0.3587, + "step": 32988 + }, + { + "epoch": 0.5700338678462814, + "grad_norm": 1.0899552972276045, + "learning_rate": 8.227215634892596e-06, + "loss": 0.3366, + "step": 32989 + }, + { + "epoch": 0.5700511473596903, + "grad_norm": 0.7433884369297572, + "learning_rate": 8.226664852572343e-06, + "loss": 0.8036, + "step": 32990 + }, + { + "epoch": 0.5700684268730992, + "grad_norm": 1.0746909494473502, + "learning_rate": 8.226114075806307e-06, + "loss": 0.4498, + "step": 32991 + }, + { + "epoch": 0.5700857063865081, + "grad_norm": 0.9732752133887972, + "learning_rate": 8.225563304596223e-06, + "loss": 0.3981, + "step": 32992 + }, + { + "epoch": 0.570102985899917, + "grad_norm": 1.2611821560837289, + "learning_rate": 8.225012538943806e-06, + "loss": 0.3862, + "step": 32993 + }, + { + "epoch": 0.5701202654133259, + "grad_norm": 1.5883687450592514, + "learning_rate": 8.224461778850783e-06, + "loss": 0.4485, + "step": 32994 + }, + { + "epoch": 0.5701375449267349, + "grad_norm": 0.982697263201687, + "learning_rate": 8.223911024318885e-06, + "loss": 0.4633, + "step": 32995 + }, + { + "epoch": 0.5701548244401438, + "grad_norm": 1.2075841180387936, + "learning_rate": 8.223360275349825e-06, + "loss": 0.5355, + "step": 32996 + }, + { + "epoch": 0.5701721039535527, + "grad_norm": 1.3352545309261474, + "learning_rate": 8.222809531945342e-06, + "loss": 0.5612, + "step": 32997 + }, + { + "epoch": 0.5701893834669616, + "grad_norm": 1.2183307896210513, + "learning_rate": 8.222258794107152e-06, + "loss": 0.4083, + "step": 32998 + }, + { + "epoch": 0.5702066629803705, + "grad_norm": 0.8537630173821983, + "learning_rate": 8.221708061836984e-06, + "loss": 0.3518, + "step": 32999 + }, + { + "epoch": 0.5702239424937794, + "grad_norm": 1.172868927285641, + "learning_rate": 8.221157335136559e-06, + "loss": 0.3409, + "step": 33000 + }, + { + "epoch": 0.5702412220071883, + "grad_norm": 1.1189211226002118, + "learning_rate": 8.220606614007609e-06, + "loss": 0.2819, + "step": 33001 + }, + { + "epoch": 0.5702585015205972, + "grad_norm": 1.6037165192765268, + "learning_rate": 8.220055898451848e-06, + "loss": 0.4645, + "step": 33002 + }, + { + "epoch": 0.5702757810340061, + "grad_norm": 0.5955243051254999, + "learning_rate": 8.219505188471014e-06, + "loss": 0.2158, + "step": 33003 + }, + { + "epoch": 0.570293060547415, + "grad_norm": 0.7751007867532771, + "learning_rate": 8.218954484066822e-06, + "loss": 0.2318, + "step": 33004 + }, + { + "epoch": 0.5703103400608239, + "grad_norm": 2.0367133196927187, + "learning_rate": 8.218403785240998e-06, + "loss": 0.3417, + "step": 33005 + }, + { + "epoch": 0.5703276195742328, + "grad_norm": 0.9506148090263898, + "learning_rate": 8.217853091995274e-06, + "loss": 0.3168, + "step": 33006 + }, + { + "epoch": 0.5703448990876417, + "grad_norm": 1.2037193545256002, + "learning_rate": 8.217302404331364e-06, + "loss": 0.3462, + "step": 33007 + }, + { + "epoch": 0.5703621786010505, + "grad_norm": 1.0553085528786852, + "learning_rate": 8.216751722251001e-06, + "loss": 0.3956, + "step": 33008 + }, + { + "epoch": 0.5703794581144594, + "grad_norm": 1.4914717714039403, + "learning_rate": 8.216201045755905e-06, + "loss": 0.4126, + "step": 33009 + }, + { + "epoch": 0.5703967376278684, + "grad_norm": 1.1844456188486263, + "learning_rate": 8.215650374847805e-06, + "loss": 0.4007, + "step": 33010 + }, + { + "epoch": 0.5704140171412773, + "grad_norm": 1.3731918748817615, + "learning_rate": 8.215099709528422e-06, + "loss": 0.2868, + "step": 33011 + }, + { + "epoch": 0.5704312966546862, + "grad_norm": 0.843498043479937, + "learning_rate": 8.214549049799486e-06, + "loss": 0.3066, + "step": 33012 + }, + { + "epoch": 0.5704485761680951, + "grad_norm": 0.8409072491551597, + "learning_rate": 8.213998395662716e-06, + "loss": 0.5054, + "step": 33013 + }, + { + "epoch": 0.570465855681504, + "grad_norm": 0.9729381060767818, + "learning_rate": 8.213447747119836e-06, + "loss": 0.3188, + "step": 33014 + }, + { + "epoch": 0.5704831351949129, + "grad_norm": 1.103850606556252, + "learning_rate": 8.21289710417258e-06, + "loss": 0.4779, + "step": 33015 + }, + { + "epoch": 0.5705004147083218, + "grad_norm": 1.0762634128724446, + "learning_rate": 8.212346466822658e-06, + "loss": 0.4831, + "step": 33016 + }, + { + "epoch": 0.5705176942217307, + "grad_norm": 0.8955331746779231, + "learning_rate": 8.211795835071808e-06, + "loss": 0.5251, + "step": 33017 + }, + { + "epoch": 0.5705349737351396, + "grad_norm": 0.49378536276346296, + "learning_rate": 8.211245208921747e-06, + "loss": 0.6073, + "step": 33018 + }, + { + "epoch": 0.5705522532485485, + "grad_norm": 0.9803163315441752, + "learning_rate": 8.210694588374202e-06, + "loss": 0.4105, + "step": 33019 + }, + { + "epoch": 0.5705695327619574, + "grad_norm": 0.8738999360363033, + "learning_rate": 8.210143973430897e-06, + "loss": 0.5294, + "step": 33020 + }, + { + "epoch": 0.5705868122753663, + "grad_norm": 1.1543256493979892, + "learning_rate": 8.209593364093563e-06, + "loss": 0.566, + "step": 33021 + }, + { + "epoch": 0.5706040917887752, + "grad_norm": 0.4887701674717412, + "learning_rate": 8.20904276036391e-06, + "loss": 0.6093, + "step": 33022 + }, + { + "epoch": 0.5706213713021842, + "grad_norm": 1.2088466835490204, + "learning_rate": 8.208492162243678e-06, + "loss": 0.4738, + "step": 33023 + }, + { + "epoch": 0.5706386508155931, + "grad_norm": 1.0449235903650558, + "learning_rate": 8.207941569734581e-06, + "loss": 0.4164, + "step": 33024 + }, + { + "epoch": 0.570655930329002, + "grad_norm": 1.0007617573857015, + "learning_rate": 8.207390982838348e-06, + "loss": 0.4831, + "step": 33025 + }, + { + "epoch": 0.5706732098424109, + "grad_norm": 0.5014732858550753, + "learning_rate": 8.206840401556704e-06, + "loss": 0.9295, + "step": 33026 + }, + { + "epoch": 0.5706904893558198, + "grad_norm": 1.1871442576396023, + "learning_rate": 8.206289825891369e-06, + "loss": 0.3629, + "step": 33027 + }, + { + "epoch": 0.5707077688692287, + "grad_norm": 0.6608946883087012, + "learning_rate": 8.205739255844074e-06, + "loss": 0.6349, + "step": 33028 + }, + { + "epoch": 0.5707250483826375, + "grad_norm": 0.7974818300499154, + "learning_rate": 8.205188691416536e-06, + "loss": 0.3455, + "step": 33029 + }, + { + "epoch": 0.5707423278960464, + "grad_norm": 1.5817465326339253, + "learning_rate": 8.204638132610487e-06, + "loss": 0.5278, + "step": 33030 + }, + { + "epoch": 0.5707596074094553, + "grad_norm": 0.9568772830514783, + "learning_rate": 8.204087579427646e-06, + "loss": 0.3501, + "step": 33031 + }, + { + "epoch": 0.5707768869228642, + "grad_norm": 1.1066711789973414, + "learning_rate": 8.203537031869743e-06, + "loss": 0.3936, + "step": 33032 + }, + { + "epoch": 0.5707941664362731, + "grad_norm": 2.219066511767639, + "learning_rate": 8.202986489938496e-06, + "loss": 0.3007, + "step": 33033 + }, + { + "epoch": 0.570811445949682, + "grad_norm": 1.301174325740973, + "learning_rate": 8.202435953635629e-06, + "loss": 0.5651, + "step": 33034 + }, + { + "epoch": 0.5708287254630909, + "grad_norm": 0.9527389988973552, + "learning_rate": 8.201885422962874e-06, + "loss": 0.3376, + "step": 33035 + }, + { + "epoch": 0.5708460049764998, + "grad_norm": 1.114662594837318, + "learning_rate": 8.201334897921945e-06, + "loss": 0.291, + "step": 33036 + }, + { + "epoch": 0.5708632844899088, + "grad_norm": 0.7736413980195174, + "learning_rate": 8.200784378514576e-06, + "loss": 0.4683, + "step": 33037 + }, + { + "epoch": 0.5708805640033177, + "grad_norm": 0.8391342997181899, + "learning_rate": 8.200233864742487e-06, + "loss": 0.3778, + "step": 33038 + }, + { + "epoch": 0.5708978435167266, + "grad_norm": 0.46765812048727956, + "learning_rate": 8.1996833566074e-06, + "loss": 0.6545, + "step": 33039 + }, + { + "epoch": 0.5709151230301355, + "grad_norm": 1.5094616750538243, + "learning_rate": 8.199132854111041e-06, + "loss": 0.4565, + "step": 33040 + }, + { + "epoch": 0.5709324025435444, + "grad_norm": 1.2205866897907034, + "learning_rate": 8.19858235725514e-06, + "loss": 0.5098, + "step": 33041 + }, + { + "epoch": 0.5709496820569533, + "grad_norm": 1.2408551598314652, + "learning_rate": 8.19803186604141e-06, + "loss": 0.4144, + "step": 33042 + }, + { + "epoch": 0.5709669615703622, + "grad_norm": 0.7044206435881375, + "learning_rate": 8.197481380471586e-06, + "loss": 0.4169, + "step": 33043 + }, + { + "epoch": 0.5709842410837711, + "grad_norm": 1.1750676044998898, + "learning_rate": 8.196930900547386e-06, + "loss": 0.4553, + "step": 33044 + }, + { + "epoch": 0.57100152059718, + "grad_norm": 1.4513797251123106, + "learning_rate": 8.196380426270532e-06, + "loss": 0.468, + "step": 33045 + }, + { + "epoch": 0.5710188001105889, + "grad_norm": 0.9092904833356102, + "learning_rate": 8.195829957642756e-06, + "loss": 0.4027, + "step": 33046 + }, + { + "epoch": 0.5710360796239978, + "grad_norm": 1.1007254308887704, + "learning_rate": 8.195279494665774e-06, + "loss": 0.6182, + "step": 33047 + }, + { + "epoch": 0.5710533591374067, + "grad_norm": 0.9730030459093761, + "learning_rate": 8.194729037341318e-06, + "loss": 0.3358, + "step": 33048 + }, + { + "epoch": 0.5710706386508156, + "grad_norm": 0.8461675918812605, + "learning_rate": 8.194178585671102e-06, + "loss": 0.4088, + "step": 33049 + }, + { + "epoch": 0.5710879181642246, + "grad_norm": 1.4897052559934454, + "learning_rate": 8.19362813965686e-06, + "loss": 0.4989, + "step": 33050 + }, + { + "epoch": 0.5711051976776333, + "grad_norm": 1.4717276157391215, + "learning_rate": 8.19307769930031e-06, + "loss": 0.4642, + "step": 33051 + }, + { + "epoch": 0.5711224771910423, + "grad_norm": 0.9589738071647069, + "learning_rate": 8.192527264603181e-06, + "loss": 0.3479, + "step": 33052 + }, + { + "epoch": 0.5711397567044512, + "grad_norm": 1.0955019757677733, + "learning_rate": 8.19197683556719e-06, + "loss": 0.3351, + "step": 33053 + }, + { + "epoch": 0.5711570362178601, + "grad_norm": 1.1271257984019527, + "learning_rate": 8.191426412194068e-06, + "loss": 0.3672, + "step": 33054 + }, + { + "epoch": 0.571174315731269, + "grad_norm": 0.8070799839371773, + "learning_rate": 8.190875994485539e-06, + "loss": 0.326, + "step": 33055 + }, + { + "epoch": 0.5711915952446779, + "grad_norm": 0.8951784702466276, + "learning_rate": 8.190325582443314e-06, + "loss": 0.3042, + "step": 33056 + }, + { + "epoch": 0.5712088747580868, + "grad_norm": 1.3555691396624499, + "learning_rate": 8.189775176069136e-06, + "loss": 0.4344, + "step": 33057 + }, + { + "epoch": 0.5712261542714957, + "grad_norm": 1.2634724898606253, + "learning_rate": 8.189224775364716e-06, + "loss": 0.3682, + "step": 33058 + }, + { + "epoch": 0.5712434337849046, + "grad_norm": 1.5558934848672346, + "learning_rate": 8.188674380331782e-06, + "loss": 0.4171, + "step": 33059 + }, + { + "epoch": 0.5712607132983135, + "grad_norm": 1.0566807576856159, + "learning_rate": 8.188123990972056e-06, + "loss": 0.3988, + "step": 33060 + }, + { + "epoch": 0.5712779928117224, + "grad_norm": 1.2985321170486241, + "learning_rate": 8.187573607287267e-06, + "loss": 0.5092, + "step": 33061 + }, + { + "epoch": 0.5712952723251313, + "grad_norm": 1.2967914322502854, + "learning_rate": 8.18702322927913e-06, + "loss": 0.4259, + "step": 33062 + }, + { + "epoch": 0.5713125518385402, + "grad_norm": 0.6261416510644804, + "learning_rate": 8.18647285694938e-06, + "loss": 0.3448, + "step": 33063 + }, + { + "epoch": 0.5713298313519491, + "grad_norm": 1.088028155473673, + "learning_rate": 8.185922490299732e-06, + "loss": 0.3283, + "step": 33064 + }, + { + "epoch": 0.571347110865358, + "grad_norm": 1.0693634853567309, + "learning_rate": 8.18537212933191e-06, + "loss": 0.2665, + "step": 33065 + }, + { + "epoch": 0.571364390378767, + "grad_norm": 0.7949549918805414, + "learning_rate": 8.184821774047646e-06, + "loss": 0.5387, + "step": 33066 + }, + { + "epoch": 0.5713816698921759, + "grad_norm": 1.1685195225719065, + "learning_rate": 8.184271424448655e-06, + "loss": 0.4876, + "step": 33067 + }, + { + "epoch": 0.5713989494055848, + "grad_norm": 0.8585133994265935, + "learning_rate": 8.183721080536663e-06, + "loss": 0.4703, + "step": 33068 + }, + { + "epoch": 0.5714162289189937, + "grad_norm": 0.8775633297309234, + "learning_rate": 8.183170742313394e-06, + "loss": 0.3119, + "step": 33069 + }, + { + "epoch": 0.5714335084324026, + "grad_norm": 0.5343754696760166, + "learning_rate": 8.182620409780576e-06, + "loss": 0.8267, + "step": 33070 + }, + { + "epoch": 0.5714507879458115, + "grad_norm": 0.8381540155590537, + "learning_rate": 8.182070082939925e-06, + "loss": 0.3254, + "step": 33071 + }, + { + "epoch": 0.5714680674592203, + "grad_norm": 1.5965645552184604, + "learning_rate": 8.181519761793172e-06, + "loss": 0.5078, + "step": 33072 + }, + { + "epoch": 0.5714853469726292, + "grad_norm": 1.1182162996386587, + "learning_rate": 8.180969446342035e-06, + "loss": 0.4105, + "step": 33073 + }, + { + "epoch": 0.5715026264860381, + "grad_norm": 0.9749627012651179, + "learning_rate": 8.180419136588241e-06, + "loss": 0.5361, + "step": 33074 + }, + { + "epoch": 0.571519905999447, + "grad_norm": 0.9146146159307728, + "learning_rate": 8.179868832533515e-06, + "loss": 0.3094, + "step": 33075 + }, + { + "epoch": 0.5715371855128559, + "grad_norm": 0.8501991758560716, + "learning_rate": 8.179318534179572e-06, + "loss": 0.4573, + "step": 33076 + }, + { + "epoch": 0.5715544650262648, + "grad_norm": 0.7317632025920461, + "learning_rate": 8.178768241528148e-06, + "loss": 0.2808, + "step": 33077 + }, + { + "epoch": 0.5715717445396737, + "grad_norm": 0.9309335243731931, + "learning_rate": 8.178217954580957e-06, + "loss": 0.222, + "step": 33078 + }, + { + "epoch": 0.5715890240530827, + "grad_norm": 1.0486644980644488, + "learning_rate": 8.177667673339727e-06, + "loss": 0.2286, + "step": 33079 + }, + { + "epoch": 0.5716063035664916, + "grad_norm": 0.8465886886961759, + "learning_rate": 8.177117397806178e-06, + "loss": 0.5255, + "step": 33080 + }, + { + "epoch": 0.5716235830799005, + "grad_norm": 1.411749017379662, + "learning_rate": 8.176567127982039e-06, + "loss": 0.3667, + "step": 33081 + }, + { + "epoch": 0.5716408625933094, + "grad_norm": 1.1001515369594963, + "learning_rate": 8.176016863869024e-06, + "loss": 0.4394, + "step": 33082 + }, + { + "epoch": 0.5716581421067183, + "grad_norm": 0.9072884370115741, + "learning_rate": 8.175466605468871e-06, + "loss": 0.4723, + "step": 33083 + }, + { + "epoch": 0.5716754216201272, + "grad_norm": 1.0758199563922581, + "learning_rate": 8.17491635278329e-06, + "loss": 0.4526, + "step": 33084 + }, + { + "epoch": 0.5716927011335361, + "grad_norm": 1.108907705704405, + "learning_rate": 8.174366105814013e-06, + "loss": 0.4783, + "step": 33085 + }, + { + "epoch": 0.571709980646945, + "grad_norm": 1.344784743201024, + "learning_rate": 8.17381586456276e-06, + "loss": 0.3063, + "step": 33086 + }, + { + "epoch": 0.5717272601603539, + "grad_norm": 1.1016696715181673, + "learning_rate": 8.173265629031254e-06, + "loss": 0.2582, + "step": 33087 + }, + { + "epoch": 0.5717445396737628, + "grad_norm": 0.9587674296568028, + "learning_rate": 8.172715399221217e-06, + "loss": 0.4241, + "step": 33088 + }, + { + "epoch": 0.5717618191871717, + "grad_norm": 1.6269420231049314, + "learning_rate": 8.172165175134374e-06, + "loss": 0.2432, + "step": 33089 + }, + { + "epoch": 0.5717790987005806, + "grad_norm": 1.1197622881433906, + "learning_rate": 8.17161495677245e-06, + "loss": 0.3434, + "step": 33090 + }, + { + "epoch": 0.5717963782139895, + "grad_norm": 1.5172797817879131, + "learning_rate": 8.171064744137166e-06, + "loss": 0.5456, + "step": 33091 + }, + { + "epoch": 0.5718136577273984, + "grad_norm": 1.3385669170478154, + "learning_rate": 8.17051453723025e-06, + "loss": 0.3373, + "step": 33092 + }, + { + "epoch": 0.5718309372408072, + "grad_norm": 1.0986309704234805, + "learning_rate": 8.169964336053415e-06, + "loss": 0.365, + "step": 33093 + }, + { + "epoch": 0.5718482167542162, + "grad_norm": 0.7880642856491273, + "learning_rate": 8.169414140608396e-06, + "loss": 0.4209, + "step": 33094 + }, + { + "epoch": 0.5718654962676251, + "grad_norm": 1.2570341563854783, + "learning_rate": 8.168863950896911e-06, + "loss": 0.4956, + "step": 33095 + }, + { + "epoch": 0.571882775781034, + "grad_norm": 1.323185223073594, + "learning_rate": 8.16831376692068e-06, + "loss": 0.4119, + "step": 33096 + }, + { + "epoch": 0.5719000552944429, + "grad_norm": 0.9656265257270058, + "learning_rate": 8.167763588681433e-06, + "loss": 0.4638, + "step": 33097 + }, + { + "epoch": 0.5719173348078518, + "grad_norm": 1.515885895093406, + "learning_rate": 8.167213416180886e-06, + "loss": 0.4493, + "step": 33098 + }, + { + "epoch": 0.5719346143212607, + "grad_norm": 1.2424808168165824, + "learning_rate": 8.166663249420767e-06, + "loss": 0.6154, + "step": 33099 + }, + { + "epoch": 0.5719518938346696, + "grad_norm": 0.810577671475085, + "learning_rate": 8.166113088402798e-06, + "loss": 0.2123, + "step": 33100 + }, + { + "epoch": 0.5719691733480785, + "grad_norm": 0.7736326063401611, + "learning_rate": 8.165562933128706e-06, + "loss": 0.4222, + "step": 33101 + }, + { + "epoch": 0.5719864528614874, + "grad_norm": 1.433196999138426, + "learning_rate": 8.165012783600202e-06, + "loss": 0.4363, + "step": 33102 + }, + { + "epoch": 0.5720037323748963, + "grad_norm": 1.214449454072013, + "learning_rate": 8.164462639819026e-06, + "loss": 0.5299, + "step": 33103 + }, + { + "epoch": 0.5720210118883052, + "grad_norm": 1.0044949490733763, + "learning_rate": 8.163912501786886e-06, + "loss": 0.4418, + "step": 33104 + }, + { + "epoch": 0.5720382914017141, + "grad_norm": 1.7155147306217018, + "learning_rate": 8.163362369505516e-06, + "loss": 0.3988, + "step": 33105 + }, + { + "epoch": 0.572055570915123, + "grad_norm": 1.4504336319752011, + "learning_rate": 8.162812242976636e-06, + "loss": 0.2866, + "step": 33106 + }, + { + "epoch": 0.572072850428532, + "grad_norm": 1.3542467277276962, + "learning_rate": 8.162262122201964e-06, + "loss": 0.4552, + "step": 33107 + }, + { + "epoch": 0.5720901299419409, + "grad_norm": 1.1076622289345772, + "learning_rate": 8.161712007183229e-06, + "loss": 0.4047, + "step": 33108 + }, + { + "epoch": 0.5721074094553498, + "grad_norm": 1.143024096803202, + "learning_rate": 8.161161897922147e-06, + "loss": 0.278, + "step": 33109 + }, + { + "epoch": 0.5721246889687587, + "grad_norm": 1.079302003758935, + "learning_rate": 8.16061179442045e-06, + "loss": 0.3548, + "step": 33110 + }, + { + "epoch": 0.5721419684821676, + "grad_norm": 0.8268440547009274, + "learning_rate": 8.160061696679854e-06, + "loss": 0.4178, + "step": 33111 + }, + { + "epoch": 0.5721592479955765, + "grad_norm": 0.9038742016288007, + "learning_rate": 8.159511604702088e-06, + "loss": 0.398, + "step": 33112 + }, + { + "epoch": 0.5721765275089854, + "grad_norm": 1.0552683403112333, + "learning_rate": 8.15896151848887e-06, + "loss": 0.4003, + "step": 33113 + }, + { + "epoch": 0.5721938070223942, + "grad_norm": 1.2674115859191617, + "learning_rate": 8.158411438041925e-06, + "loss": 0.5431, + "step": 33114 + }, + { + "epoch": 0.5722110865358031, + "grad_norm": 1.0179648327118005, + "learning_rate": 8.15786136336297e-06, + "loss": 0.4589, + "step": 33115 + }, + { + "epoch": 0.572228366049212, + "grad_norm": 1.147769501230997, + "learning_rate": 8.15731129445374e-06, + "loss": 0.3945, + "step": 33116 + }, + { + "epoch": 0.5722456455626209, + "grad_norm": 0.8345809460911987, + "learning_rate": 8.156761231315951e-06, + "loss": 0.1764, + "step": 33117 + }, + { + "epoch": 0.5722629250760298, + "grad_norm": 1.287689125569117, + "learning_rate": 8.156211173951323e-06, + "loss": 0.5181, + "step": 33118 + }, + { + "epoch": 0.5722802045894387, + "grad_norm": 1.1205617967798251, + "learning_rate": 8.155661122361582e-06, + "loss": 0.423, + "step": 33119 + }, + { + "epoch": 0.5722974841028476, + "grad_norm": 0.6454756198667841, + "learning_rate": 8.15511107654845e-06, + "loss": 0.3267, + "step": 33120 + }, + { + "epoch": 0.5723147636162565, + "grad_norm": 1.1869799632663018, + "learning_rate": 8.154561036513654e-06, + "loss": 0.3644, + "step": 33121 + }, + { + "epoch": 0.5723320431296655, + "grad_norm": 1.3444639411572588, + "learning_rate": 8.154011002258905e-06, + "loss": 0.5485, + "step": 33122 + }, + { + "epoch": 0.5723493226430744, + "grad_norm": 1.2651182782117214, + "learning_rate": 8.153460973785942e-06, + "loss": 0.3766, + "step": 33123 + }, + { + "epoch": 0.5723666021564833, + "grad_norm": 1.649291932064492, + "learning_rate": 8.152910951096476e-06, + "loss": 0.3532, + "step": 33124 + }, + { + "epoch": 0.5723838816698922, + "grad_norm": 1.011242366484529, + "learning_rate": 8.152360934192235e-06, + "loss": 0.4124, + "step": 33125 + }, + { + "epoch": 0.5724011611833011, + "grad_norm": 1.0331140356630675, + "learning_rate": 8.151810923074942e-06, + "loss": 0.3504, + "step": 33126 + }, + { + "epoch": 0.57241844069671, + "grad_norm": 0.6185783873772654, + "learning_rate": 8.151260917746314e-06, + "loss": 0.1895, + "step": 33127 + }, + { + "epoch": 0.5724357202101189, + "grad_norm": 0.8551501265227015, + "learning_rate": 8.150710918208078e-06, + "loss": 0.4872, + "step": 33128 + }, + { + "epoch": 0.5724529997235278, + "grad_norm": 1.1103302030791238, + "learning_rate": 8.150160924461954e-06, + "loss": 0.3791, + "step": 33129 + }, + { + "epoch": 0.5724702792369367, + "grad_norm": 1.3277830676824771, + "learning_rate": 8.149610936509669e-06, + "loss": 0.3858, + "step": 33130 + }, + { + "epoch": 0.5724875587503456, + "grad_norm": 1.2415191179425484, + "learning_rate": 8.14906095435294e-06, + "loss": 0.2795, + "step": 33131 + }, + { + "epoch": 0.5725048382637545, + "grad_norm": 1.1187361174205088, + "learning_rate": 8.1485109779935e-06, + "loss": 0.2212, + "step": 33132 + }, + { + "epoch": 0.5725221177771634, + "grad_norm": 1.0063753874227148, + "learning_rate": 8.147961007433057e-06, + "loss": 0.5535, + "step": 33133 + }, + { + "epoch": 0.5725393972905723, + "grad_norm": 0.9677136219546603, + "learning_rate": 8.147411042673345e-06, + "loss": 0.3326, + "step": 33134 + }, + { + "epoch": 0.5725566768039811, + "grad_norm": 1.2477938651472555, + "learning_rate": 8.146861083716078e-06, + "loss": 0.4693, + "step": 33135 + }, + { + "epoch": 0.57257395631739, + "grad_norm": 1.2568176488644738, + "learning_rate": 8.146311130562987e-06, + "loss": 0.4022, + "step": 33136 + }, + { + "epoch": 0.572591235830799, + "grad_norm": 0.9976616813497854, + "learning_rate": 8.145761183215792e-06, + "loss": 0.6159, + "step": 33137 + }, + { + "epoch": 0.5726085153442079, + "grad_norm": 0.6643392020939098, + "learning_rate": 8.14521124167621e-06, + "loss": 0.2642, + "step": 33138 + }, + { + "epoch": 0.5726257948576168, + "grad_norm": 1.141448921866054, + "learning_rate": 8.14466130594597e-06, + "loss": 0.5126, + "step": 33139 + }, + { + "epoch": 0.5726430743710257, + "grad_norm": 1.1423327042169678, + "learning_rate": 8.144111376026788e-06, + "loss": 0.4193, + "step": 33140 + }, + { + "epoch": 0.5726603538844346, + "grad_norm": 1.3596859187875339, + "learning_rate": 8.143561451920395e-06, + "loss": 0.3308, + "step": 33141 + }, + { + "epoch": 0.5726776333978435, + "grad_norm": 0.9931202438680928, + "learning_rate": 8.143011533628502e-06, + "loss": 0.4145, + "step": 33142 + }, + { + "epoch": 0.5726949129112524, + "grad_norm": 0.8662527013633436, + "learning_rate": 8.142461621152845e-06, + "loss": 0.4853, + "step": 33143 + }, + { + "epoch": 0.5727121924246613, + "grad_norm": 1.5130062742055488, + "learning_rate": 8.141911714495135e-06, + "loss": 0.3549, + "step": 33144 + }, + { + "epoch": 0.5727294719380702, + "grad_norm": 1.6223883385540125, + "learning_rate": 8.141361813657101e-06, + "loss": 0.5518, + "step": 33145 + }, + { + "epoch": 0.5727467514514791, + "grad_norm": 0.9789888977588885, + "learning_rate": 8.14081191864046e-06, + "loss": 0.5483, + "step": 33146 + }, + { + "epoch": 0.572764030964888, + "grad_norm": 1.0718489893538685, + "learning_rate": 8.140262029446943e-06, + "loss": 0.2847, + "step": 33147 + }, + { + "epoch": 0.572781310478297, + "grad_norm": 0.6540566908445715, + "learning_rate": 8.139712146078264e-06, + "loss": 0.4085, + "step": 33148 + }, + { + "epoch": 0.5727985899917059, + "grad_norm": 1.842732630287224, + "learning_rate": 8.139162268536145e-06, + "loss": 0.2983, + "step": 33149 + }, + { + "epoch": 0.5728158695051148, + "grad_norm": 1.3315991064415438, + "learning_rate": 8.138612396822315e-06, + "loss": 0.5272, + "step": 33150 + }, + { + "epoch": 0.5728331490185237, + "grad_norm": 1.026354746597122, + "learning_rate": 8.138062530938488e-06, + "loss": 0.3602, + "step": 33151 + }, + { + "epoch": 0.5728504285319326, + "grad_norm": 0.9839519526713594, + "learning_rate": 8.137512670886397e-06, + "loss": 0.4303, + "step": 33152 + }, + { + "epoch": 0.5728677080453415, + "grad_norm": 1.2252044766368024, + "learning_rate": 8.136962816667752e-06, + "loss": 0.4771, + "step": 33153 + }, + { + "epoch": 0.5728849875587504, + "grad_norm": 0.6630887567666734, + "learning_rate": 8.136412968284286e-06, + "loss": 0.6273, + "step": 33154 + }, + { + "epoch": 0.5729022670721593, + "grad_norm": 0.90070474237452, + "learning_rate": 8.135863125737711e-06, + "loss": 0.4555, + "step": 33155 + }, + { + "epoch": 0.5729195465855681, + "grad_norm": 0.7397386991843709, + "learning_rate": 8.135313289029758e-06, + "loss": 0.3926, + "step": 33156 + }, + { + "epoch": 0.572936826098977, + "grad_norm": 1.0057989806553571, + "learning_rate": 8.134763458162146e-06, + "loss": 0.6159, + "step": 33157 + }, + { + "epoch": 0.5729541056123859, + "grad_norm": 1.1884608040045734, + "learning_rate": 8.134213633136594e-06, + "loss": 0.3856, + "step": 33158 + }, + { + "epoch": 0.5729713851257948, + "grad_norm": 1.1825332903994483, + "learning_rate": 8.133663813954829e-06, + "loss": 0.4226, + "step": 33159 + }, + { + "epoch": 0.5729886646392037, + "grad_norm": 1.0668782099376362, + "learning_rate": 8.133114000618568e-06, + "loss": 0.5435, + "step": 33160 + }, + { + "epoch": 0.5730059441526126, + "grad_norm": 1.030381252941808, + "learning_rate": 8.13256419312954e-06, + "loss": 0.5795, + "step": 33161 + }, + { + "epoch": 0.5730232236660215, + "grad_norm": 1.0855606185479325, + "learning_rate": 8.132014391489456e-06, + "loss": 0.3592, + "step": 33162 + }, + { + "epoch": 0.5730405031794304, + "grad_norm": 1.310755692707525, + "learning_rate": 8.131464595700052e-06, + "loss": 0.3087, + "step": 33163 + }, + { + "epoch": 0.5730577826928394, + "grad_norm": 1.6938946664752896, + "learning_rate": 8.13091480576304e-06, + "loss": 0.3879, + "step": 33164 + }, + { + "epoch": 0.5730750622062483, + "grad_norm": 1.0397208365829462, + "learning_rate": 8.130365021680144e-06, + "loss": 0.4331, + "step": 33165 + }, + { + "epoch": 0.5730923417196572, + "grad_norm": 1.5025966325822984, + "learning_rate": 8.129815243453086e-06, + "loss": 0.3745, + "step": 33166 + }, + { + "epoch": 0.5731096212330661, + "grad_norm": 1.351712130058012, + "learning_rate": 8.129265471083594e-06, + "loss": 0.3972, + "step": 33167 + }, + { + "epoch": 0.573126900746475, + "grad_norm": 1.4886641093912305, + "learning_rate": 8.128715704573383e-06, + "loss": 0.4295, + "step": 33168 + }, + { + "epoch": 0.5731441802598839, + "grad_norm": 1.1378360175378788, + "learning_rate": 8.128165943924172e-06, + "loss": 0.4413, + "step": 33169 + }, + { + "epoch": 0.5731614597732928, + "grad_norm": 0.8985076279786798, + "learning_rate": 8.127616189137692e-06, + "loss": 0.5599, + "step": 33170 + }, + { + "epoch": 0.5731787392867017, + "grad_norm": 1.0120342892448446, + "learning_rate": 8.127066440215656e-06, + "loss": 0.3419, + "step": 33171 + }, + { + "epoch": 0.5731960188001106, + "grad_norm": 1.2526455395270908, + "learning_rate": 8.126516697159796e-06, + "loss": 0.4482, + "step": 33172 + }, + { + "epoch": 0.5732132983135195, + "grad_norm": 1.4293226099089489, + "learning_rate": 8.125966959971825e-06, + "loss": 0.4971, + "step": 33173 + }, + { + "epoch": 0.5732305778269284, + "grad_norm": 1.246555925204837, + "learning_rate": 8.125417228653468e-06, + "loss": 0.5097, + "step": 33174 + }, + { + "epoch": 0.5732478573403373, + "grad_norm": 0.7499145170569491, + "learning_rate": 8.124867503206443e-06, + "loss": 0.2705, + "step": 33175 + }, + { + "epoch": 0.5732651368537462, + "grad_norm": 1.2919460099152367, + "learning_rate": 8.124317783632481e-06, + "loss": 0.3717, + "step": 33176 + }, + { + "epoch": 0.573282416367155, + "grad_norm": 0.7165504087845911, + "learning_rate": 8.123768069933296e-06, + "loss": 0.7508, + "step": 33177 + }, + { + "epoch": 0.573299695880564, + "grad_norm": 1.1402545859483268, + "learning_rate": 8.123218362110615e-06, + "loss": 0.2628, + "step": 33178 + }, + { + "epoch": 0.5733169753939729, + "grad_norm": 1.499295827374076, + "learning_rate": 8.122668660166154e-06, + "loss": 0.5653, + "step": 33179 + }, + { + "epoch": 0.5733342549073818, + "grad_norm": 1.406011935032597, + "learning_rate": 8.122118964101636e-06, + "loss": 0.3888, + "step": 33180 + }, + { + "epoch": 0.5733515344207907, + "grad_norm": 0.7132744458014916, + "learning_rate": 8.121569273918789e-06, + "loss": 0.2868, + "step": 33181 + }, + { + "epoch": 0.5733688139341996, + "grad_norm": 0.9326756781211076, + "learning_rate": 8.121019589619322e-06, + "loss": 0.3729, + "step": 33182 + }, + { + "epoch": 0.5733860934476085, + "grad_norm": 1.0593302406410074, + "learning_rate": 8.120469911204973e-06, + "loss": 0.4836, + "step": 33183 + }, + { + "epoch": 0.5734033729610174, + "grad_norm": 0.7242214905994082, + "learning_rate": 8.119920238677448e-06, + "loss": 0.3222, + "step": 33184 + }, + { + "epoch": 0.5734206524744263, + "grad_norm": 1.672098692860256, + "learning_rate": 8.11937057203848e-06, + "loss": 0.4723, + "step": 33185 + }, + { + "epoch": 0.5734379319878352, + "grad_norm": 1.1862929917449145, + "learning_rate": 8.118820911289784e-06, + "loss": 0.5764, + "step": 33186 + }, + { + "epoch": 0.5734552115012441, + "grad_norm": 1.237651253066413, + "learning_rate": 8.118271256433086e-06, + "loss": 0.3563, + "step": 33187 + }, + { + "epoch": 0.573472491014653, + "grad_norm": 1.0553626883965472, + "learning_rate": 8.117721607470106e-06, + "loss": 0.3619, + "step": 33188 + }, + { + "epoch": 0.5734897705280619, + "grad_norm": 1.1772562530266872, + "learning_rate": 8.117171964402562e-06, + "loss": 0.3464, + "step": 33189 + }, + { + "epoch": 0.5735070500414708, + "grad_norm": 0.9080450387526334, + "learning_rate": 8.11662232723218e-06, + "loss": 0.3403, + "step": 33190 + }, + { + "epoch": 0.5735243295548798, + "grad_norm": 1.674952733158552, + "learning_rate": 8.11607269596068e-06, + "loss": 0.6372, + "step": 33191 + }, + { + "epoch": 0.5735416090682887, + "grad_norm": 1.027697139068112, + "learning_rate": 8.115523070589783e-06, + "loss": 0.2993, + "step": 33192 + }, + { + "epoch": 0.5735588885816976, + "grad_norm": 1.0198164367105118, + "learning_rate": 8.11497345112121e-06, + "loss": 0.4533, + "step": 33193 + }, + { + "epoch": 0.5735761680951065, + "grad_norm": 1.2191101730537361, + "learning_rate": 8.114423837556686e-06, + "loss": 0.4148, + "step": 33194 + }, + { + "epoch": 0.5735934476085154, + "grad_norm": 0.9174227520806671, + "learning_rate": 8.113874229897926e-06, + "loss": 0.5145, + "step": 33195 + }, + { + "epoch": 0.5736107271219243, + "grad_norm": 1.220599553465522, + "learning_rate": 8.113324628146659e-06, + "loss": 0.5564, + "step": 33196 + }, + { + "epoch": 0.5736280066353332, + "grad_norm": 0.8486083826080496, + "learning_rate": 8.112775032304601e-06, + "loss": 0.4693, + "step": 33197 + }, + { + "epoch": 0.5736452861487421, + "grad_norm": 1.1928516498826989, + "learning_rate": 8.112225442373479e-06, + "loss": 0.2588, + "step": 33198 + }, + { + "epoch": 0.5736625656621509, + "grad_norm": 0.9397398753254558, + "learning_rate": 8.111675858355005e-06, + "loss": 0.8122, + "step": 33199 + }, + { + "epoch": 0.5736798451755598, + "grad_norm": 0.9848610390268356, + "learning_rate": 8.111126280250907e-06, + "loss": 0.374, + "step": 33200 + }, + { + "epoch": 0.5736971246889687, + "grad_norm": 0.9003710605782006, + "learning_rate": 8.110576708062909e-06, + "loss": 0.5815, + "step": 33201 + }, + { + "epoch": 0.5737144042023776, + "grad_norm": 1.3794982318842837, + "learning_rate": 8.11002714179272e-06, + "loss": 0.3847, + "step": 33202 + }, + { + "epoch": 0.5737316837157865, + "grad_norm": 0.5203102001251364, + "learning_rate": 8.109477581442078e-06, + "loss": 0.5072, + "step": 33203 + }, + { + "epoch": 0.5737489632291954, + "grad_norm": 0.8967508304620995, + "learning_rate": 8.10892802701269e-06, + "loss": 0.2295, + "step": 33204 + }, + { + "epoch": 0.5737662427426043, + "grad_norm": 0.9993060972823546, + "learning_rate": 8.108378478506286e-06, + "loss": 0.3742, + "step": 33205 + }, + { + "epoch": 0.5737835222560133, + "grad_norm": 0.6404906175675529, + "learning_rate": 8.107828935924582e-06, + "loss": 0.7104, + "step": 33206 + }, + { + "epoch": 0.5738008017694222, + "grad_norm": 1.4258922112166943, + "learning_rate": 8.107279399269307e-06, + "loss": 0.7169, + "step": 33207 + }, + { + "epoch": 0.5738180812828311, + "grad_norm": 0.9235986902108302, + "learning_rate": 8.106729868542169e-06, + "loss": 0.3268, + "step": 33208 + }, + { + "epoch": 0.57383536079624, + "grad_norm": 0.6886642864392394, + "learning_rate": 8.106180343744905e-06, + "loss": 0.2915, + "step": 33209 + }, + { + "epoch": 0.5738526403096489, + "grad_norm": 0.8107693814369109, + "learning_rate": 8.105630824879225e-06, + "loss": 0.4988, + "step": 33210 + }, + { + "epoch": 0.5738699198230578, + "grad_norm": 0.7761150341364996, + "learning_rate": 8.10508131194685e-06, + "loss": 0.2279, + "step": 33211 + }, + { + "epoch": 0.5738871993364667, + "grad_norm": 0.6623749813865701, + "learning_rate": 8.10453180494951e-06, + "loss": 0.4098, + "step": 33212 + }, + { + "epoch": 0.5739044788498756, + "grad_norm": 1.312871031306195, + "learning_rate": 8.103982303888916e-06, + "loss": 0.4781, + "step": 33213 + }, + { + "epoch": 0.5739217583632845, + "grad_norm": 1.0096474068534371, + "learning_rate": 8.103432808766794e-06, + "loss": 0.4133, + "step": 33214 + }, + { + "epoch": 0.5739390378766934, + "grad_norm": 1.3738738027638127, + "learning_rate": 8.102883319584864e-06, + "loss": 0.3222, + "step": 33215 + }, + { + "epoch": 0.5739563173901023, + "grad_norm": 0.7321959270686056, + "learning_rate": 8.10233383634485e-06, + "loss": 0.2045, + "step": 33216 + }, + { + "epoch": 0.5739735969035112, + "grad_norm": 0.9101814481710397, + "learning_rate": 8.101784359048468e-06, + "loss": 0.4666, + "step": 33217 + }, + { + "epoch": 0.5739908764169201, + "grad_norm": 1.0090588812852295, + "learning_rate": 8.101234887697447e-06, + "loss": 0.4313, + "step": 33218 + }, + { + "epoch": 0.5740081559303291, + "grad_norm": 1.1043546281820193, + "learning_rate": 8.100685422293498e-06, + "loss": 0.444, + "step": 33219 + }, + { + "epoch": 0.5740254354437379, + "grad_norm": 0.9372856957071363, + "learning_rate": 8.100135962838346e-06, + "loss": 0.4007, + "step": 33220 + }, + { + "epoch": 0.5740427149571468, + "grad_norm": 1.5229669255103273, + "learning_rate": 8.099586509333716e-06, + "loss": 0.4631, + "step": 33221 + }, + { + "epoch": 0.5740599944705557, + "grad_norm": 0.8284034516661285, + "learning_rate": 8.09903706178132e-06, + "loss": 0.7477, + "step": 33222 + }, + { + "epoch": 0.5740772739839646, + "grad_norm": 1.1154518443775718, + "learning_rate": 8.098487620182892e-06, + "loss": 0.3454, + "step": 33223 + }, + { + "epoch": 0.5740945534973735, + "grad_norm": 1.3418258137740495, + "learning_rate": 8.097938184540139e-06, + "loss": 0.4452, + "step": 33224 + }, + { + "epoch": 0.5741118330107824, + "grad_norm": 0.9386385239377921, + "learning_rate": 8.097388754854792e-06, + "loss": 0.4032, + "step": 33225 + }, + { + "epoch": 0.5741291125241913, + "grad_norm": 0.8018209355469504, + "learning_rate": 8.096839331128564e-06, + "loss": 0.3277, + "step": 33226 + }, + { + "epoch": 0.5741463920376002, + "grad_norm": 0.9990780465767731, + "learning_rate": 8.096289913363185e-06, + "loss": 0.4471, + "step": 33227 + }, + { + "epoch": 0.5741636715510091, + "grad_norm": 0.8396502140754245, + "learning_rate": 8.095740501560364e-06, + "loss": 0.3963, + "step": 33228 + }, + { + "epoch": 0.574180951064418, + "grad_norm": 1.0748370821635298, + "learning_rate": 8.095191095721835e-06, + "loss": 0.4585, + "step": 33229 + }, + { + "epoch": 0.5741982305778269, + "grad_norm": 1.1976787868147682, + "learning_rate": 8.09464169584931e-06, + "loss": 0.3079, + "step": 33230 + }, + { + "epoch": 0.5742155100912358, + "grad_norm": 1.333573821583128, + "learning_rate": 8.09409230194451e-06, + "loss": 0.3498, + "step": 33231 + }, + { + "epoch": 0.5742327896046447, + "grad_norm": 0.6697960290818258, + "learning_rate": 8.093542914009163e-06, + "loss": 0.3245, + "step": 33232 + }, + { + "epoch": 0.5742500691180537, + "grad_norm": 1.0399738909055973, + "learning_rate": 8.09299353204498e-06, + "loss": 0.3771, + "step": 33233 + }, + { + "epoch": 0.5742673486314626, + "grad_norm": 1.0855837198441238, + "learning_rate": 8.092444156053687e-06, + "loss": 0.3843, + "step": 33234 + }, + { + "epoch": 0.5742846281448715, + "grad_norm": 1.185498859187678, + "learning_rate": 8.091894786037002e-06, + "loss": 0.3336, + "step": 33235 + }, + { + "epoch": 0.5743019076582804, + "grad_norm": 0.9454543343633484, + "learning_rate": 8.09134542199665e-06, + "loss": 0.4143, + "step": 33236 + }, + { + "epoch": 0.5743191871716893, + "grad_norm": 0.6916869522761094, + "learning_rate": 8.090796063934348e-06, + "loss": 0.8091, + "step": 33237 + }, + { + "epoch": 0.5743364666850982, + "grad_norm": 1.4866235254172473, + "learning_rate": 8.09024671185182e-06, + "loss": 0.3355, + "step": 33238 + }, + { + "epoch": 0.5743537461985071, + "grad_norm": 1.2707558350052552, + "learning_rate": 8.089697365750783e-06, + "loss": 0.3817, + "step": 33239 + }, + { + "epoch": 0.574371025711916, + "grad_norm": 1.0514509883842746, + "learning_rate": 8.089148025632958e-06, + "loss": 0.5499, + "step": 33240 + }, + { + "epoch": 0.5743883052253248, + "grad_norm": 0.7602945204297924, + "learning_rate": 8.08859869150007e-06, + "loss": 0.5226, + "step": 33241 + }, + { + "epoch": 0.5744055847387337, + "grad_norm": 1.4662606040774069, + "learning_rate": 8.088049363353831e-06, + "loss": 0.5211, + "step": 33242 + }, + { + "epoch": 0.5744228642521426, + "grad_norm": 0.880418005756808, + "learning_rate": 8.087500041195971e-06, + "loss": 0.3887, + "step": 33243 + }, + { + "epoch": 0.5744401437655515, + "grad_norm": 0.8635028843159169, + "learning_rate": 8.086950725028203e-06, + "loss": 0.3258, + "step": 33244 + }, + { + "epoch": 0.5744574232789604, + "grad_norm": 0.7275363600015586, + "learning_rate": 8.086401414852254e-06, + "loss": 0.3489, + "step": 33245 + }, + { + "epoch": 0.5744747027923693, + "grad_norm": 0.48672736206044726, + "learning_rate": 8.085852110669837e-06, + "loss": 0.6964, + "step": 33246 + }, + { + "epoch": 0.5744919823057782, + "grad_norm": 0.8597354955318779, + "learning_rate": 8.085302812482683e-06, + "loss": 0.4247, + "step": 33247 + }, + { + "epoch": 0.5745092618191872, + "grad_norm": 0.803371853099107, + "learning_rate": 8.084753520292497e-06, + "loss": 0.4319, + "step": 33248 + }, + { + "epoch": 0.5745265413325961, + "grad_norm": 0.7168941275568027, + "learning_rate": 8.084204234101017e-06, + "loss": 0.3092, + "step": 33249 + }, + { + "epoch": 0.574543820846005, + "grad_norm": 1.01071053461285, + "learning_rate": 8.083654953909953e-06, + "loss": 0.3564, + "step": 33250 + }, + { + "epoch": 0.5745611003594139, + "grad_norm": 0.6813828085342247, + "learning_rate": 8.083105679721024e-06, + "loss": 0.8191, + "step": 33251 + }, + { + "epoch": 0.5745783798728228, + "grad_norm": 1.1130114209941635, + "learning_rate": 8.082556411535959e-06, + "loss": 0.3529, + "step": 33252 + }, + { + "epoch": 0.5745956593862317, + "grad_norm": 1.1931119062477646, + "learning_rate": 8.082007149356468e-06, + "loss": 0.4218, + "step": 33253 + }, + { + "epoch": 0.5746129388996406, + "grad_norm": 0.7325142401210665, + "learning_rate": 8.081457893184279e-06, + "loss": 0.2519, + "step": 33254 + }, + { + "epoch": 0.5746302184130495, + "grad_norm": 0.6621330280476944, + "learning_rate": 8.080908643021107e-06, + "loss": 0.4922, + "step": 33255 + }, + { + "epoch": 0.5746474979264584, + "grad_norm": 0.41816274071031345, + "learning_rate": 8.080359398868676e-06, + "loss": 0.6704, + "step": 33256 + }, + { + "epoch": 0.5746647774398673, + "grad_norm": 1.2318692325429856, + "learning_rate": 8.079810160728706e-06, + "loss": 0.382, + "step": 33257 + }, + { + "epoch": 0.5746820569532762, + "grad_norm": 1.05829547829082, + "learning_rate": 8.07926092860292e-06, + "loss": 0.2861, + "step": 33258 + }, + { + "epoch": 0.5746993364666851, + "grad_norm": 0.8053682893047689, + "learning_rate": 8.078711702493029e-06, + "loss": 0.289, + "step": 33259 + }, + { + "epoch": 0.574716615980094, + "grad_norm": 1.185301472756332, + "learning_rate": 8.078162482400762e-06, + "loss": 0.3459, + "step": 33260 + }, + { + "epoch": 0.574733895493503, + "grad_norm": 0.892012166776629, + "learning_rate": 8.077613268327838e-06, + "loss": 0.4234, + "step": 33261 + }, + { + "epoch": 0.5747511750069118, + "grad_norm": 1.0001159188624984, + "learning_rate": 8.077064060275968e-06, + "loss": 0.3587, + "step": 33262 + }, + { + "epoch": 0.5747684545203207, + "grad_norm": 1.204534437886159, + "learning_rate": 8.076514858246887e-06, + "loss": 0.3092, + "step": 33263 + }, + { + "epoch": 0.5747857340337296, + "grad_norm": 0.8930129553832306, + "learning_rate": 8.075965662242304e-06, + "loss": 0.3802, + "step": 33264 + }, + { + "epoch": 0.5748030135471385, + "grad_norm": 0.8629261796954031, + "learning_rate": 8.075416472263944e-06, + "loss": 0.3447, + "step": 33265 + }, + { + "epoch": 0.5748202930605474, + "grad_norm": 0.5725572292951173, + "learning_rate": 8.074867288313524e-06, + "loss": 0.5421, + "step": 33266 + }, + { + "epoch": 0.5748375725739563, + "grad_norm": 1.266343590068919, + "learning_rate": 8.07431811039277e-06, + "loss": 0.3398, + "step": 33267 + }, + { + "epoch": 0.5748548520873652, + "grad_norm": 1.1582403516159148, + "learning_rate": 8.073768938503393e-06, + "loss": 0.2976, + "step": 33268 + }, + { + "epoch": 0.5748721316007741, + "grad_norm": 0.7386250491117127, + "learning_rate": 8.073219772647123e-06, + "loss": 0.3695, + "step": 33269 + }, + { + "epoch": 0.574889411114183, + "grad_norm": 1.1548133426733491, + "learning_rate": 8.072670612825672e-06, + "loss": 0.6006, + "step": 33270 + }, + { + "epoch": 0.5749066906275919, + "grad_norm": 0.9431039724867938, + "learning_rate": 8.072121459040762e-06, + "loss": 0.4381, + "step": 33271 + }, + { + "epoch": 0.5749239701410008, + "grad_norm": 1.0896352923097172, + "learning_rate": 8.07157231129412e-06, + "loss": 0.2973, + "step": 33272 + }, + { + "epoch": 0.5749412496544097, + "grad_norm": 1.5729797270331272, + "learning_rate": 8.071023169587454e-06, + "loss": 0.4761, + "step": 33273 + }, + { + "epoch": 0.5749585291678186, + "grad_norm": 1.3410289495683447, + "learning_rate": 8.070474033922493e-06, + "loss": 0.446, + "step": 33274 + }, + { + "epoch": 0.5749758086812276, + "grad_norm": 1.4713041944416512, + "learning_rate": 8.069924904300953e-06, + "loss": 0.5567, + "step": 33275 + }, + { + "epoch": 0.5749930881946365, + "grad_norm": 0.7072872380296058, + "learning_rate": 8.069375780724555e-06, + "loss": 0.5366, + "step": 33276 + }, + { + "epoch": 0.5750103677080454, + "grad_norm": 0.7818179670432186, + "learning_rate": 8.068826663195016e-06, + "loss": 0.246, + "step": 33277 + }, + { + "epoch": 0.5750276472214543, + "grad_norm": 0.8485729369753485, + "learning_rate": 8.068277551714064e-06, + "loss": 0.7003, + "step": 33278 + }, + { + "epoch": 0.5750449267348632, + "grad_norm": 1.2405645939261243, + "learning_rate": 8.067728446283409e-06, + "loss": 0.4592, + "step": 33279 + }, + { + "epoch": 0.5750622062482721, + "grad_norm": 0.6836885311738365, + "learning_rate": 8.067179346904778e-06, + "loss": 0.6876, + "step": 33280 + }, + { + "epoch": 0.575079485761681, + "grad_norm": 1.3364790970278755, + "learning_rate": 8.066630253579891e-06, + "loss": 0.2614, + "step": 33281 + }, + { + "epoch": 0.5750967652750899, + "grad_norm": 0.9390725936690727, + "learning_rate": 8.066081166310457e-06, + "loss": 0.3048, + "step": 33282 + }, + { + "epoch": 0.5751140447884987, + "grad_norm": 0.707827104406684, + "learning_rate": 8.06553208509821e-06, + "loss": 0.2283, + "step": 33283 + }, + { + "epoch": 0.5751313243019076, + "grad_norm": 1.3645654252205566, + "learning_rate": 8.06498300994486e-06, + "loss": 0.5317, + "step": 33284 + }, + { + "epoch": 0.5751486038153165, + "grad_norm": 0.91332935433758, + "learning_rate": 8.064433940852133e-06, + "loss": 0.3883, + "step": 33285 + }, + { + "epoch": 0.5751658833287254, + "grad_norm": 0.8167297969134626, + "learning_rate": 8.063884877821743e-06, + "loss": 0.2665, + "step": 33286 + }, + { + "epoch": 0.5751831628421343, + "grad_norm": 0.46209899745619, + "learning_rate": 8.063335820855418e-06, + "loss": 0.4926, + "step": 33287 + }, + { + "epoch": 0.5752004423555432, + "grad_norm": 0.7489155991231711, + "learning_rate": 8.062786769954865e-06, + "loss": 0.317, + "step": 33288 + }, + { + "epoch": 0.5752177218689521, + "grad_norm": 1.2173392580916642, + "learning_rate": 8.062237725121818e-06, + "loss": 0.3622, + "step": 33289 + }, + { + "epoch": 0.575235001382361, + "grad_norm": 0.8576312854052072, + "learning_rate": 8.061688686357985e-06, + "loss": 0.4691, + "step": 33290 + }, + { + "epoch": 0.57525228089577, + "grad_norm": 1.349031883883007, + "learning_rate": 8.061139653665091e-06, + "loss": 0.3008, + "step": 33291 + }, + { + "epoch": 0.5752695604091789, + "grad_norm": 0.7002363946889636, + "learning_rate": 8.06059062704486e-06, + "loss": 0.325, + "step": 33292 + }, + { + "epoch": 0.5752868399225878, + "grad_norm": 0.9096759852905465, + "learning_rate": 8.060041606499002e-06, + "loss": 0.2967, + "step": 33293 + }, + { + "epoch": 0.5753041194359967, + "grad_norm": 1.2417342820420563, + "learning_rate": 8.059492592029241e-06, + "loss": 0.5902, + "step": 33294 + }, + { + "epoch": 0.5753213989494056, + "grad_norm": 2.166984350983956, + "learning_rate": 8.058943583637296e-06, + "loss": 0.4861, + "step": 33295 + }, + { + "epoch": 0.5753386784628145, + "grad_norm": 1.1700762567081748, + "learning_rate": 8.05839458132489e-06, + "loss": 0.4506, + "step": 33296 + }, + { + "epoch": 0.5753559579762234, + "grad_norm": 1.0153373068572564, + "learning_rate": 8.057845585093737e-06, + "loss": 0.4414, + "step": 33297 + }, + { + "epoch": 0.5753732374896323, + "grad_norm": 0.9560142083220645, + "learning_rate": 8.057296594945563e-06, + "loss": 0.3512, + "step": 33298 + }, + { + "epoch": 0.5753905170030412, + "grad_norm": 1.2376591266538015, + "learning_rate": 8.056747610882078e-06, + "loss": 0.285, + "step": 33299 + }, + { + "epoch": 0.5754077965164501, + "grad_norm": 0.8704466333528542, + "learning_rate": 8.056198632905013e-06, + "loss": 0.4586, + "step": 33300 + }, + { + "epoch": 0.575425076029859, + "grad_norm": 1.2068858001020568, + "learning_rate": 8.055649661016081e-06, + "loss": 0.6708, + "step": 33301 + }, + { + "epoch": 0.575442355543268, + "grad_norm": 0.6853426911743578, + "learning_rate": 8.055100695216996e-06, + "loss": 0.5309, + "step": 33302 + }, + { + "epoch": 0.5754596350566769, + "grad_norm": 1.1231805980084757, + "learning_rate": 8.054551735509491e-06, + "loss": 0.5401, + "step": 33303 + }, + { + "epoch": 0.5754769145700857, + "grad_norm": 1.1382159775131817, + "learning_rate": 8.054002781895272e-06, + "loss": 0.2805, + "step": 33304 + }, + { + "epoch": 0.5754941940834946, + "grad_norm": 1.5813019763257665, + "learning_rate": 8.053453834376067e-06, + "loss": 0.436, + "step": 33305 + }, + { + "epoch": 0.5755114735969035, + "grad_norm": 1.04308506447756, + "learning_rate": 8.05290489295359e-06, + "loss": 0.3269, + "step": 33306 + }, + { + "epoch": 0.5755287531103124, + "grad_norm": 1.3200122823833833, + "learning_rate": 8.052355957629568e-06, + "loss": 0.3422, + "step": 33307 + }, + { + "epoch": 0.5755460326237213, + "grad_norm": 1.4276564478715335, + "learning_rate": 8.051807028405711e-06, + "loss": 0.4544, + "step": 33308 + }, + { + "epoch": 0.5755633121371302, + "grad_norm": 1.0938882597524022, + "learning_rate": 8.051258105283743e-06, + "loss": 0.5333, + "step": 33309 + }, + { + "epoch": 0.5755805916505391, + "grad_norm": 0.98569852934408, + "learning_rate": 8.050709188265382e-06, + "loss": 0.3276, + "step": 33310 + }, + { + "epoch": 0.575597871163948, + "grad_norm": 1.0156213095922433, + "learning_rate": 8.05016027735235e-06, + "loss": 0.3038, + "step": 33311 + }, + { + "epoch": 0.5756151506773569, + "grad_norm": 1.089697120607976, + "learning_rate": 8.049611372546366e-06, + "loss": 0.4049, + "step": 33312 + }, + { + "epoch": 0.5756324301907658, + "grad_norm": 1.4447836435272532, + "learning_rate": 8.049062473849143e-06, + "loss": 0.4476, + "step": 33313 + }, + { + "epoch": 0.5756497097041747, + "grad_norm": 0.9179825257187926, + "learning_rate": 8.048513581262408e-06, + "loss": 0.4905, + "step": 33314 + }, + { + "epoch": 0.5756669892175836, + "grad_norm": 0.692099379737729, + "learning_rate": 8.047964694787872e-06, + "loss": 0.6137, + "step": 33315 + }, + { + "epoch": 0.5756842687309925, + "grad_norm": 1.422862293121462, + "learning_rate": 8.047415814427262e-06, + "loss": 0.3972, + "step": 33316 + }, + { + "epoch": 0.5757015482444015, + "grad_norm": 0.8415768043518013, + "learning_rate": 8.046866940182293e-06, + "loss": 0.4034, + "step": 33317 + }, + { + "epoch": 0.5757188277578104, + "grad_norm": 0.9631573376274439, + "learning_rate": 8.04631807205469e-06, + "loss": 0.3987, + "step": 33318 + }, + { + "epoch": 0.5757361072712193, + "grad_norm": 1.0099210146277302, + "learning_rate": 8.045769210046162e-06, + "loss": 0.4288, + "step": 33319 + }, + { + "epoch": 0.5757533867846282, + "grad_norm": 0.514555982275855, + "learning_rate": 8.045220354158435e-06, + "loss": 0.5846, + "step": 33320 + }, + { + "epoch": 0.5757706662980371, + "grad_norm": 1.0215035050306676, + "learning_rate": 8.044671504393224e-06, + "loss": 0.3321, + "step": 33321 + }, + { + "epoch": 0.575787945811446, + "grad_norm": 0.9167939201131596, + "learning_rate": 8.044122660752254e-06, + "loss": 0.4948, + "step": 33322 + }, + { + "epoch": 0.5758052253248549, + "grad_norm": 1.3484205846434354, + "learning_rate": 8.043573823237242e-06, + "loss": 0.6236, + "step": 33323 + }, + { + "epoch": 0.5758225048382638, + "grad_norm": 1.107121391179708, + "learning_rate": 8.043024991849902e-06, + "loss": 0.3453, + "step": 33324 + }, + { + "epoch": 0.5758397843516727, + "grad_norm": 0.9971766744939112, + "learning_rate": 8.042476166591957e-06, + "loss": 0.3142, + "step": 33325 + }, + { + "epoch": 0.5758570638650815, + "grad_norm": 0.9370299382699028, + "learning_rate": 8.041927347465125e-06, + "loss": 0.3668, + "step": 33326 + }, + { + "epoch": 0.5758743433784904, + "grad_norm": 0.5331056364035421, + "learning_rate": 8.041378534471127e-06, + "loss": 0.5158, + "step": 33327 + }, + { + "epoch": 0.5758916228918993, + "grad_norm": 1.0320709049435015, + "learning_rate": 8.040829727611678e-06, + "loss": 0.2938, + "step": 33328 + }, + { + "epoch": 0.5759089024053082, + "grad_norm": 0.9811487651890144, + "learning_rate": 8.040280926888502e-06, + "loss": 0.4386, + "step": 33329 + }, + { + "epoch": 0.5759261819187171, + "grad_norm": 0.7490867224393675, + "learning_rate": 8.03973213230331e-06, + "loss": 0.5003, + "step": 33330 + }, + { + "epoch": 0.575943461432126, + "grad_norm": 0.8277667381203982, + "learning_rate": 8.03918334385783e-06, + "loss": 0.2814, + "step": 33331 + }, + { + "epoch": 0.575960740945535, + "grad_norm": 0.6125772926492875, + "learning_rate": 8.03863456155378e-06, + "loss": 0.7299, + "step": 33332 + }, + { + "epoch": 0.5759780204589439, + "grad_norm": 1.0787112372174914, + "learning_rate": 8.03808578539287e-06, + "loss": 0.377, + "step": 33333 + }, + { + "epoch": 0.5759952999723528, + "grad_norm": 1.4478745908393125, + "learning_rate": 8.037537015376826e-06, + "loss": 0.3974, + "step": 33334 + }, + { + "epoch": 0.5760125794857617, + "grad_norm": 0.7689522022020381, + "learning_rate": 8.036988251507364e-06, + "loss": 0.2521, + "step": 33335 + }, + { + "epoch": 0.5760298589991706, + "grad_norm": 0.9980966954483415, + "learning_rate": 8.036439493786205e-06, + "loss": 0.2731, + "step": 33336 + }, + { + "epoch": 0.5760471385125795, + "grad_norm": 0.8934079520081113, + "learning_rate": 8.035890742215066e-06, + "loss": 0.3878, + "step": 33337 + }, + { + "epoch": 0.5760644180259884, + "grad_norm": 1.1453450232168727, + "learning_rate": 8.03534199679567e-06, + "loss": 0.3928, + "step": 33338 + }, + { + "epoch": 0.5760816975393973, + "grad_norm": 1.1821290420393786, + "learning_rate": 8.03479325752973e-06, + "loss": 0.3437, + "step": 33339 + }, + { + "epoch": 0.5760989770528062, + "grad_norm": 0.6848071856557146, + "learning_rate": 8.034244524418966e-06, + "loss": 0.3977, + "step": 33340 + }, + { + "epoch": 0.5761162565662151, + "grad_norm": 1.096731411550037, + "learning_rate": 8.033695797465096e-06, + "loss": 0.384, + "step": 33341 + }, + { + "epoch": 0.576133536079624, + "grad_norm": 0.6712184732793017, + "learning_rate": 8.033147076669842e-06, + "loss": 0.534, + "step": 33342 + }, + { + "epoch": 0.5761508155930329, + "grad_norm": 1.1048052756248634, + "learning_rate": 8.032598362034924e-06, + "loss": 0.3347, + "step": 33343 + }, + { + "epoch": 0.5761680951064418, + "grad_norm": 1.160088624086669, + "learning_rate": 8.032049653562055e-06, + "loss": 0.2701, + "step": 33344 + }, + { + "epoch": 0.5761853746198508, + "grad_norm": 0.9354014850184909, + "learning_rate": 8.031500951252954e-06, + "loss": 0.5124, + "step": 33345 + }, + { + "epoch": 0.5762026541332597, + "grad_norm": 1.1459580993609302, + "learning_rate": 8.030952255109343e-06, + "loss": 0.546, + "step": 33346 + }, + { + "epoch": 0.5762199336466685, + "grad_norm": 1.741049154562698, + "learning_rate": 8.030403565132942e-06, + "loss": 0.3498, + "step": 33347 + }, + { + "epoch": 0.5762372131600774, + "grad_norm": 1.5444403668933329, + "learning_rate": 8.029854881325462e-06, + "loss": 0.4891, + "step": 33348 + }, + { + "epoch": 0.5762544926734863, + "grad_norm": 1.1808860982285496, + "learning_rate": 8.02930620368863e-06, + "loss": 0.3455, + "step": 33349 + }, + { + "epoch": 0.5762717721868952, + "grad_norm": 0.956614413410136, + "learning_rate": 8.028757532224158e-06, + "loss": 0.3386, + "step": 33350 + }, + { + "epoch": 0.5762890517003041, + "grad_norm": 1.1128058318889966, + "learning_rate": 8.028208866933769e-06, + "loss": 0.4659, + "step": 33351 + }, + { + "epoch": 0.576306331213713, + "grad_norm": 1.167878730609615, + "learning_rate": 8.027660207819176e-06, + "loss": 0.4683, + "step": 33352 + }, + { + "epoch": 0.5763236107271219, + "grad_norm": 1.286526500340847, + "learning_rate": 8.027111554882107e-06, + "loss": 0.325, + "step": 33353 + }, + { + "epoch": 0.5763408902405308, + "grad_norm": 1.2780146485684403, + "learning_rate": 8.026562908124273e-06, + "loss": 0.4769, + "step": 33354 + }, + { + "epoch": 0.5763581697539397, + "grad_norm": 1.0955142528209438, + "learning_rate": 8.02601426754739e-06, + "loss": 0.3575, + "step": 33355 + }, + { + "epoch": 0.5763754492673486, + "grad_norm": 1.1027804620995896, + "learning_rate": 8.025465633153185e-06, + "loss": 0.5257, + "step": 33356 + }, + { + "epoch": 0.5763927287807575, + "grad_norm": 1.3682755544737757, + "learning_rate": 8.024917004943367e-06, + "loss": 0.4336, + "step": 33357 + }, + { + "epoch": 0.5764100082941664, + "grad_norm": 1.3530074489911519, + "learning_rate": 8.024368382919665e-06, + "loss": 0.2543, + "step": 33358 + }, + { + "epoch": 0.5764272878075754, + "grad_norm": 1.0493700618984787, + "learning_rate": 8.023819767083787e-06, + "loss": 0.571, + "step": 33359 + }, + { + "epoch": 0.5764445673209843, + "grad_norm": 0.9957603629581908, + "learning_rate": 8.023271157437457e-06, + "loss": 0.2824, + "step": 33360 + }, + { + "epoch": 0.5764618468343932, + "grad_norm": 1.102599046510957, + "learning_rate": 8.02272255398239e-06, + "loss": 0.4026, + "step": 33361 + }, + { + "epoch": 0.5764791263478021, + "grad_norm": 1.0474553962826767, + "learning_rate": 8.02217395672031e-06, + "loss": 0.428, + "step": 33362 + }, + { + "epoch": 0.576496405861211, + "grad_norm": 1.4647532235212326, + "learning_rate": 8.02162536565293e-06, + "loss": 0.4093, + "step": 33363 + }, + { + "epoch": 0.5765136853746199, + "grad_norm": 1.034901975876778, + "learning_rate": 8.021076780781968e-06, + "loss": 0.3789, + "step": 33364 + }, + { + "epoch": 0.5765309648880288, + "grad_norm": 0.8735428971481979, + "learning_rate": 8.020528202109146e-06, + "loss": 0.3511, + "step": 33365 + }, + { + "epoch": 0.5765482444014377, + "grad_norm": 1.0035697331494446, + "learning_rate": 8.019979629636177e-06, + "loss": 0.5335, + "step": 33366 + }, + { + "epoch": 0.5765655239148466, + "grad_norm": 1.3498563575972649, + "learning_rate": 8.019431063364788e-06, + "loss": 0.4538, + "step": 33367 + }, + { + "epoch": 0.5765828034282554, + "grad_norm": 1.2248806081556571, + "learning_rate": 8.018882503296685e-06, + "loss": 0.3527, + "step": 33368 + }, + { + "epoch": 0.5766000829416643, + "grad_norm": 1.2868481213194756, + "learning_rate": 8.018333949433597e-06, + "loss": 0.3408, + "step": 33369 + }, + { + "epoch": 0.5766173624550732, + "grad_norm": 0.7640625257869053, + "learning_rate": 8.017785401777234e-06, + "loss": 0.201, + "step": 33370 + }, + { + "epoch": 0.5766346419684821, + "grad_norm": 0.931608009979007, + "learning_rate": 8.01723686032932e-06, + "loss": 0.3859, + "step": 33371 + }, + { + "epoch": 0.576651921481891, + "grad_norm": 1.2075360285983396, + "learning_rate": 8.01668832509157e-06, + "loss": 0.3003, + "step": 33372 + }, + { + "epoch": 0.5766692009953, + "grad_norm": 0.7902627039683255, + "learning_rate": 8.016139796065705e-06, + "loss": 0.2799, + "step": 33373 + }, + { + "epoch": 0.5766864805087089, + "grad_norm": 0.8070547994191968, + "learning_rate": 8.01559127325344e-06, + "loss": 0.4528, + "step": 33374 + }, + { + "epoch": 0.5767037600221178, + "grad_norm": 1.0223497338942495, + "learning_rate": 8.015042756656494e-06, + "loss": 0.3846, + "step": 33375 + }, + { + "epoch": 0.5767210395355267, + "grad_norm": 1.363587199224978, + "learning_rate": 8.014494246276584e-06, + "loss": 0.4761, + "step": 33376 + }, + { + "epoch": 0.5767383190489356, + "grad_norm": 0.8722632006085689, + "learning_rate": 8.013945742115427e-06, + "loss": 0.3133, + "step": 33377 + }, + { + "epoch": 0.5767555985623445, + "grad_norm": 1.0693004273288254, + "learning_rate": 8.013397244174749e-06, + "loss": 0.6297, + "step": 33378 + }, + { + "epoch": 0.5767728780757534, + "grad_norm": 0.8219867217693633, + "learning_rate": 8.012848752456257e-06, + "loss": 0.4026, + "step": 33379 + }, + { + "epoch": 0.5767901575891623, + "grad_norm": 1.3598354463842595, + "learning_rate": 8.012300266961674e-06, + "loss": 0.4538, + "step": 33380 + }, + { + "epoch": 0.5768074371025712, + "grad_norm": 1.0408715591908544, + "learning_rate": 8.011751787692717e-06, + "loss": 0.3787, + "step": 33381 + }, + { + "epoch": 0.5768247166159801, + "grad_norm": 1.026360436024625, + "learning_rate": 8.011203314651107e-06, + "loss": 0.4125, + "step": 33382 + }, + { + "epoch": 0.576841996129389, + "grad_norm": 1.2534129364738247, + "learning_rate": 8.010654847838557e-06, + "loss": 0.5677, + "step": 33383 + }, + { + "epoch": 0.5768592756427979, + "grad_norm": 0.9093276144443357, + "learning_rate": 8.01010638725679e-06, + "loss": 0.5609, + "step": 33384 + }, + { + "epoch": 0.5768765551562068, + "grad_norm": 0.948155130071144, + "learning_rate": 8.009557932907522e-06, + "loss": 0.4218, + "step": 33385 + }, + { + "epoch": 0.5768938346696157, + "grad_norm": 0.7809555975698486, + "learning_rate": 8.009009484792465e-06, + "loss": 0.4726, + "step": 33386 + }, + { + "epoch": 0.5769111141830247, + "grad_norm": 1.1473753432353486, + "learning_rate": 8.008461042913345e-06, + "loss": 0.4741, + "step": 33387 + }, + { + "epoch": 0.5769283936964336, + "grad_norm": 1.3326985920158547, + "learning_rate": 8.007912607271875e-06, + "loss": 0.3789, + "step": 33388 + }, + { + "epoch": 0.5769456732098424, + "grad_norm": 0.8471133943341089, + "learning_rate": 8.007364177869774e-06, + "loss": 0.3381, + "step": 33389 + }, + { + "epoch": 0.5769629527232513, + "grad_norm": 1.2542217917285352, + "learning_rate": 8.006815754708759e-06, + "loss": 0.568, + "step": 33390 + }, + { + "epoch": 0.5769802322366602, + "grad_norm": 0.8408399137804607, + "learning_rate": 8.00626733779055e-06, + "loss": 0.2647, + "step": 33391 + }, + { + "epoch": 0.5769975117500691, + "grad_norm": 0.9250241775865398, + "learning_rate": 8.005718927116861e-06, + "loss": 0.377, + "step": 33392 + }, + { + "epoch": 0.577014791263478, + "grad_norm": 1.3097396233345298, + "learning_rate": 8.005170522689418e-06, + "loss": 0.4201, + "step": 33393 + }, + { + "epoch": 0.5770320707768869, + "grad_norm": 1.3202454723382446, + "learning_rate": 8.004622124509928e-06, + "loss": 0.4833, + "step": 33394 + }, + { + "epoch": 0.5770493502902958, + "grad_norm": 0.7211041552044231, + "learning_rate": 8.004073732580112e-06, + "loss": 0.3895, + "step": 33395 + }, + { + "epoch": 0.5770666298037047, + "grad_norm": 0.46540468922887485, + "learning_rate": 8.00352534690169e-06, + "loss": 0.655, + "step": 33396 + }, + { + "epoch": 0.5770839093171136, + "grad_norm": 0.8043719200454316, + "learning_rate": 8.002976967476378e-06, + "loss": 0.4253, + "step": 33397 + }, + { + "epoch": 0.5771011888305225, + "grad_norm": 1.1259853038002883, + "learning_rate": 8.002428594305897e-06, + "loss": 0.4424, + "step": 33398 + }, + { + "epoch": 0.5771184683439314, + "grad_norm": 1.338234948230881, + "learning_rate": 8.001880227391957e-06, + "loss": 0.431, + "step": 33399 + }, + { + "epoch": 0.5771357478573403, + "grad_norm": 0.8868369497342218, + "learning_rate": 8.001331866736283e-06, + "loss": 0.3781, + "step": 33400 + }, + { + "epoch": 0.5771530273707492, + "grad_norm": 0.5657318770673718, + "learning_rate": 8.000783512340587e-06, + "loss": 0.5864, + "step": 33401 + }, + { + "epoch": 0.5771703068841582, + "grad_norm": 1.1193254557470407, + "learning_rate": 8.00023516420659e-06, + "loss": 0.3469, + "step": 33402 + }, + { + "epoch": 0.5771875863975671, + "grad_norm": 1.1454074946349515, + "learning_rate": 7.999686822336007e-06, + "loss": 0.4564, + "step": 33403 + }, + { + "epoch": 0.577204865910976, + "grad_norm": 1.0349542207903357, + "learning_rate": 7.999138486730563e-06, + "loss": 0.3231, + "step": 33404 + }, + { + "epoch": 0.5772221454243849, + "grad_norm": 1.0841019816739341, + "learning_rate": 7.998590157391965e-06, + "loss": 0.5678, + "step": 33405 + }, + { + "epoch": 0.5772394249377938, + "grad_norm": 1.2534978240037906, + "learning_rate": 7.998041834321933e-06, + "loss": 0.2208, + "step": 33406 + }, + { + "epoch": 0.5772567044512027, + "grad_norm": 0.7824324836739466, + "learning_rate": 7.997493517522191e-06, + "loss": 0.7902, + "step": 33407 + }, + { + "epoch": 0.5772739839646116, + "grad_norm": 0.9322255608911308, + "learning_rate": 7.996945206994447e-06, + "loss": 0.2913, + "step": 33408 + }, + { + "epoch": 0.5772912634780205, + "grad_norm": 0.9907514542713258, + "learning_rate": 7.996396902740423e-06, + "loss": 0.4414, + "step": 33409 + }, + { + "epoch": 0.5773085429914293, + "grad_norm": 1.3285104192543862, + "learning_rate": 7.995848604761836e-06, + "loss": 0.689, + "step": 33410 + }, + { + "epoch": 0.5773258225048382, + "grad_norm": 1.4655565960711734, + "learning_rate": 7.995300313060405e-06, + "loss": 0.422, + "step": 33411 + }, + { + "epoch": 0.5773431020182471, + "grad_norm": 1.213590791545857, + "learning_rate": 7.994752027637844e-06, + "loss": 0.4228, + "step": 33412 + }, + { + "epoch": 0.577360381531656, + "grad_norm": 1.8080426803813319, + "learning_rate": 7.994203748495877e-06, + "loss": 0.4747, + "step": 33413 + }, + { + "epoch": 0.5773776610450649, + "grad_norm": 1.0548159287739782, + "learning_rate": 7.993655475636215e-06, + "loss": 0.399, + "step": 33414 + }, + { + "epoch": 0.5773949405584738, + "grad_norm": 0.7455379210795422, + "learning_rate": 7.993107209060571e-06, + "loss": 0.6288, + "step": 33415 + }, + { + "epoch": 0.5774122200718828, + "grad_norm": 0.9814309349183977, + "learning_rate": 7.992558948770673e-06, + "loss": 0.3668, + "step": 33416 + }, + { + "epoch": 0.5774294995852917, + "grad_norm": 1.1305810350080823, + "learning_rate": 7.99201069476823e-06, + "loss": 0.5895, + "step": 33417 + }, + { + "epoch": 0.5774467790987006, + "grad_norm": 0.5600735651704176, + "learning_rate": 7.991462447054964e-06, + "loss": 0.7497, + "step": 33418 + }, + { + "epoch": 0.5774640586121095, + "grad_norm": 0.9361908671990666, + "learning_rate": 7.990914205632588e-06, + "loss": 0.3121, + "step": 33419 + }, + { + "epoch": 0.5774813381255184, + "grad_norm": 0.4560829701488979, + "learning_rate": 7.990365970502824e-06, + "loss": 0.5318, + "step": 33420 + }, + { + "epoch": 0.5774986176389273, + "grad_norm": 1.0048600330539494, + "learning_rate": 7.989817741667384e-06, + "loss": 0.3315, + "step": 33421 + }, + { + "epoch": 0.5775158971523362, + "grad_norm": 0.9283910986207928, + "learning_rate": 7.98926951912799e-06, + "loss": 0.4055, + "step": 33422 + }, + { + "epoch": 0.5775331766657451, + "grad_norm": 1.1822161683645134, + "learning_rate": 7.988721302886354e-06, + "loss": 0.4696, + "step": 33423 + }, + { + "epoch": 0.577550456179154, + "grad_norm": 1.0543008239974603, + "learning_rate": 7.988173092944199e-06, + "loss": 0.305, + "step": 33424 + }, + { + "epoch": 0.5775677356925629, + "grad_norm": 1.4946370126838346, + "learning_rate": 7.987624889303239e-06, + "loss": 0.4947, + "step": 33425 + }, + { + "epoch": 0.5775850152059718, + "grad_norm": 0.8091307201354049, + "learning_rate": 7.987076691965187e-06, + "loss": 0.3706, + "step": 33426 + }, + { + "epoch": 0.5776022947193807, + "grad_norm": 0.6043775955167, + "learning_rate": 7.986528500931769e-06, + "loss": 0.8839, + "step": 33427 + }, + { + "epoch": 0.5776195742327896, + "grad_norm": 0.9489030412720116, + "learning_rate": 7.985980316204692e-06, + "loss": 0.4389, + "step": 33428 + }, + { + "epoch": 0.5776368537461986, + "grad_norm": 1.8152865785876031, + "learning_rate": 7.98543213778568e-06, + "loss": 0.349, + "step": 33429 + }, + { + "epoch": 0.5776541332596075, + "grad_norm": 1.4883715045509955, + "learning_rate": 7.984883965676444e-06, + "loss": 0.5791, + "step": 33430 + }, + { + "epoch": 0.5776714127730163, + "grad_norm": 1.1217175167671198, + "learning_rate": 7.98433579987871e-06, + "loss": 0.3248, + "step": 33431 + }, + { + "epoch": 0.5776886922864252, + "grad_norm": 0.9706032130679927, + "learning_rate": 7.983787640394183e-06, + "loss": 0.3837, + "step": 33432 + }, + { + "epoch": 0.5777059717998341, + "grad_norm": 1.3472686921075931, + "learning_rate": 7.983239487224593e-06, + "loss": 0.4848, + "step": 33433 + }, + { + "epoch": 0.577723251313243, + "grad_norm": 1.072579904253544, + "learning_rate": 7.982691340371646e-06, + "loss": 0.5038, + "step": 33434 + }, + { + "epoch": 0.5777405308266519, + "grad_norm": 0.5095886811840197, + "learning_rate": 7.982143199837065e-06, + "loss": 0.62, + "step": 33435 + }, + { + "epoch": 0.5777578103400608, + "grad_norm": 1.107072045637752, + "learning_rate": 7.981595065622564e-06, + "loss": 0.3639, + "step": 33436 + }, + { + "epoch": 0.5777750898534697, + "grad_norm": 1.4561813487210045, + "learning_rate": 7.98104693772986e-06, + "loss": 0.4917, + "step": 33437 + }, + { + "epoch": 0.5777923693668786, + "grad_norm": 1.0343549056264942, + "learning_rate": 7.980498816160674e-06, + "loss": 0.4206, + "step": 33438 + }, + { + "epoch": 0.5778096488802875, + "grad_norm": 1.1345040006901546, + "learning_rate": 7.979950700916716e-06, + "loss": 0.6588, + "step": 33439 + }, + { + "epoch": 0.5778269283936964, + "grad_norm": 0.8480865543466738, + "learning_rate": 7.979402591999706e-06, + "loss": 0.4341, + "step": 33440 + }, + { + "epoch": 0.5778442079071053, + "grad_norm": 0.7327367585602105, + "learning_rate": 7.97885448941136e-06, + "loss": 0.351, + "step": 33441 + }, + { + "epoch": 0.5778614874205142, + "grad_norm": 0.9991790522533804, + "learning_rate": 7.978306393153396e-06, + "loss": 0.4606, + "step": 33442 + }, + { + "epoch": 0.5778787669339231, + "grad_norm": 0.8595379642945112, + "learning_rate": 7.97775830322753e-06, + "loss": 0.3502, + "step": 33443 + }, + { + "epoch": 0.5778960464473321, + "grad_norm": 1.182331430992352, + "learning_rate": 7.977210219635482e-06, + "loss": 0.3053, + "step": 33444 + }, + { + "epoch": 0.577913325960741, + "grad_norm": 1.0907901534816966, + "learning_rate": 7.976662142378963e-06, + "loss": 0.6068, + "step": 33445 + }, + { + "epoch": 0.5779306054741499, + "grad_norm": 1.1159973695328385, + "learning_rate": 7.97611407145969e-06, + "loss": 0.3982, + "step": 33446 + }, + { + "epoch": 0.5779478849875588, + "grad_norm": 1.317710909457429, + "learning_rate": 7.975566006879385e-06, + "loss": 0.6062, + "step": 33447 + }, + { + "epoch": 0.5779651645009677, + "grad_norm": 0.5304611205173495, + "learning_rate": 7.975017948639757e-06, + "loss": 0.6776, + "step": 33448 + }, + { + "epoch": 0.5779824440143766, + "grad_norm": 1.2829950206539935, + "learning_rate": 7.974469896742529e-06, + "loss": 0.295, + "step": 33449 + }, + { + "epoch": 0.5779997235277855, + "grad_norm": 0.9596634647957115, + "learning_rate": 7.973921851189415e-06, + "loss": 0.3742, + "step": 33450 + }, + { + "epoch": 0.5780170030411944, + "grad_norm": 1.0997954094669489, + "learning_rate": 7.973373811982131e-06, + "loss": 0.4737, + "step": 33451 + }, + { + "epoch": 0.5780342825546032, + "grad_norm": 0.8400132944509544, + "learning_rate": 7.972825779122393e-06, + "loss": 0.3174, + "step": 33452 + }, + { + "epoch": 0.5780515620680121, + "grad_norm": 1.3212709385847998, + "learning_rate": 7.972277752611925e-06, + "loss": 0.4924, + "step": 33453 + }, + { + "epoch": 0.578068841581421, + "grad_norm": 1.1890810269876402, + "learning_rate": 7.97172973245243e-06, + "loss": 0.5178, + "step": 33454 + }, + { + "epoch": 0.5780861210948299, + "grad_norm": 1.4543134014113541, + "learning_rate": 7.971181718645635e-06, + "loss": 0.355, + "step": 33455 + }, + { + "epoch": 0.5781034006082388, + "grad_norm": 1.5746062000788228, + "learning_rate": 7.970633711193253e-06, + "loss": 0.2378, + "step": 33456 + }, + { + "epoch": 0.5781206801216477, + "grad_norm": 0.7615534501510394, + "learning_rate": 7.970085710096999e-06, + "loss": 0.3339, + "step": 33457 + }, + { + "epoch": 0.5781379596350567, + "grad_norm": 0.8482620119401089, + "learning_rate": 7.969537715358594e-06, + "loss": 0.3245, + "step": 33458 + }, + { + "epoch": 0.5781552391484656, + "grad_norm": 1.2077932776184246, + "learning_rate": 7.968989726979748e-06, + "loss": 0.5252, + "step": 33459 + }, + { + "epoch": 0.5781725186618745, + "grad_norm": 0.6822266365592385, + "learning_rate": 7.96844174496218e-06, + "loss": 0.8339, + "step": 33460 + }, + { + "epoch": 0.5781897981752834, + "grad_norm": 0.9332404601997474, + "learning_rate": 7.967893769307608e-06, + "loss": 0.3403, + "step": 33461 + }, + { + "epoch": 0.5782070776886923, + "grad_norm": 0.5563429644567186, + "learning_rate": 7.967345800017749e-06, + "loss": 0.5432, + "step": 33462 + }, + { + "epoch": 0.5782243572021012, + "grad_norm": 1.5603988377589606, + "learning_rate": 7.966797837094313e-06, + "loss": 0.3176, + "step": 33463 + }, + { + "epoch": 0.5782416367155101, + "grad_norm": 1.0433752316059424, + "learning_rate": 7.966249880539027e-06, + "loss": 0.5016, + "step": 33464 + }, + { + "epoch": 0.578258916228919, + "grad_norm": 0.7440851428197843, + "learning_rate": 7.965701930353595e-06, + "loss": 0.2089, + "step": 33465 + }, + { + "epoch": 0.5782761957423279, + "grad_norm": 1.7377165835746031, + "learning_rate": 7.965153986539744e-06, + "loss": 0.246, + "step": 33466 + }, + { + "epoch": 0.5782934752557368, + "grad_norm": 0.6558809490192227, + "learning_rate": 7.964606049099184e-06, + "loss": 0.3782, + "step": 33467 + }, + { + "epoch": 0.5783107547691457, + "grad_norm": 1.1937866970497857, + "learning_rate": 7.96405811803363e-06, + "loss": 0.6746, + "step": 33468 + }, + { + "epoch": 0.5783280342825546, + "grad_norm": 0.7538998618845755, + "learning_rate": 7.963510193344803e-06, + "loss": 0.2886, + "step": 33469 + }, + { + "epoch": 0.5783453137959635, + "grad_norm": 1.035679094049586, + "learning_rate": 7.962962275034414e-06, + "loss": 0.414, + "step": 33470 + }, + { + "epoch": 0.5783625933093725, + "grad_norm": 1.5229035059174143, + "learning_rate": 7.962414363104185e-06, + "loss": 0.3723, + "step": 33471 + }, + { + "epoch": 0.5783798728227814, + "grad_norm": 0.8054069096951046, + "learning_rate": 7.961866457555827e-06, + "loss": 0.2474, + "step": 33472 + }, + { + "epoch": 0.5783971523361903, + "grad_norm": 1.4136984836385826, + "learning_rate": 7.961318558391062e-06, + "loss": 0.2525, + "step": 33473 + }, + { + "epoch": 0.5784144318495991, + "grad_norm": 0.809076268660444, + "learning_rate": 7.960770665611598e-06, + "loss": 0.4949, + "step": 33474 + }, + { + "epoch": 0.578431711363008, + "grad_norm": 0.7835619858613054, + "learning_rate": 7.960222779219158e-06, + "loss": 0.4849, + "step": 33475 + }, + { + "epoch": 0.5784489908764169, + "grad_norm": 0.8908201828696459, + "learning_rate": 7.959674899215454e-06, + "loss": 0.2829, + "step": 33476 + }, + { + "epoch": 0.5784662703898258, + "grad_norm": 0.7903882927545878, + "learning_rate": 7.959127025602203e-06, + "loss": 0.3164, + "step": 33477 + }, + { + "epoch": 0.5784835499032347, + "grad_norm": 0.7995185701354284, + "learning_rate": 7.958579158381124e-06, + "loss": 0.3776, + "step": 33478 + }, + { + "epoch": 0.5785008294166436, + "grad_norm": 0.9369795471107876, + "learning_rate": 7.958031297553925e-06, + "loss": 0.4377, + "step": 33479 + }, + { + "epoch": 0.5785181089300525, + "grad_norm": 1.2009353724319018, + "learning_rate": 7.957483443122332e-06, + "loss": 0.4358, + "step": 33480 + }, + { + "epoch": 0.5785353884434614, + "grad_norm": 0.9324632702715637, + "learning_rate": 7.956935595088051e-06, + "loss": 0.4044, + "step": 33481 + }, + { + "epoch": 0.5785526679568703, + "grad_norm": 1.2292012160813979, + "learning_rate": 7.956387753452807e-06, + "loss": 0.3548, + "step": 33482 + }, + { + "epoch": 0.5785699474702792, + "grad_norm": 1.706764387123981, + "learning_rate": 7.955839918218309e-06, + "loss": 0.2569, + "step": 33483 + }, + { + "epoch": 0.5785872269836881, + "grad_norm": 1.026288290352685, + "learning_rate": 7.95529208938628e-06, + "loss": 0.522, + "step": 33484 + }, + { + "epoch": 0.578604506497097, + "grad_norm": 0.6776925350284858, + "learning_rate": 7.954744266958428e-06, + "loss": 0.2951, + "step": 33485 + }, + { + "epoch": 0.578621786010506, + "grad_norm": 0.7223005584211634, + "learning_rate": 7.954196450936473e-06, + "loss": 0.5818, + "step": 33486 + }, + { + "epoch": 0.5786390655239149, + "grad_norm": 1.065105333790531, + "learning_rate": 7.953648641322135e-06, + "loss": 0.3697, + "step": 33487 + }, + { + "epoch": 0.5786563450373238, + "grad_norm": 1.098442214745643, + "learning_rate": 7.953100838117118e-06, + "loss": 0.4789, + "step": 33488 + }, + { + "epoch": 0.5786736245507327, + "grad_norm": 1.669128084047122, + "learning_rate": 7.952553041323148e-06, + "loss": 0.3449, + "step": 33489 + }, + { + "epoch": 0.5786909040641416, + "grad_norm": 1.1243501769083073, + "learning_rate": 7.952005250941935e-06, + "loss": 0.299, + "step": 33490 + }, + { + "epoch": 0.5787081835775505, + "grad_norm": 1.1187179837297951, + "learning_rate": 7.9514574669752e-06, + "loss": 0.5119, + "step": 33491 + }, + { + "epoch": 0.5787254630909594, + "grad_norm": 1.075561007599257, + "learning_rate": 7.950909689424654e-06, + "loss": 0.4457, + "step": 33492 + }, + { + "epoch": 0.5787427426043683, + "grad_norm": 0.6611260602798185, + "learning_rate": 7.950361918292017e-06, + "loss": 0.6732, + "step": 33493 + }, + { + "epoch": 0.5787600221177772, + "grad_norm": 0.9622786358465538, + "learning_rate": 7.949814153579001e-06, + "loss": 0.4519, + "step": 33494 + }, + { + "epoch": 0.578777301631186, + "grad_norm": 0.8983614884737245, + "learning_rate": 7.949266395287322e-06, + "loss": 0.3355, + "step": 33495 + }, + { + "epoch": 0.5787945811445949, + "grad_norm": 1.1334230471230922, + "learning_rate": 7.948718643418697e-06, + "loss": 0.4417, + "step": 33496 + }, + { + "epoch": 0.5788118606580038, + "grad_norm": 0.9644401496853428, + "learning_rate": 7.948170897974841e-06, + "loss": 0.412, + "step": 33497 + }, + { + "epoch": 0.5788291401714127, + "grad_norm": 0.9858750518996934, + "learning_rate": 7.947623158957472e-06, + "loss": 0.3619, + "step": 33498 + }, + { + "epoch": 0.5788464196848216, + "grad_norm": 0.9995854496846514, + "learning_rate": 7.9470754263683e-06, + "loss": 0.2966, + "step": 33499 + }, + { + "epoch": 0.5788636991982306, + "grad_norm": 0.7313018269417713, + "learning_rate": 7.946527700209045e-06, + "loss": 0.4113, + "step": 33500 + }, + { + "epoch": 0.5788809787116395, + "grad_norm": 1.3677547048438687, + "learning_rate": 7.94597998048142e-06, + "loss": 0.3623, + "step": 33501 + }, + { + "epoch": 0.5788982582250484, + "grad_norm": 0.8183802301524061, + "learning_rate": 7.945432267187144e-06, + "loss": 0.2794, + "step": 33502 + }, + { + "epoch": 0.5789155377384573, + "grad_norm": 1.262591668536042, + "learning_rate": 7.944884560327928e-06, + "loss": 0.333, + "step": 33503 + }, + { + "epoch": 0.5789328172518662, + "grad_norm": 1.256632504922496, + "learning_rate": 7.944336859905495e-06, + "loss": 0.6169, + "step": 33504 + }, + { + "epoch": 0.5789500967652751, + "grad_norm": 0.8839601100249314, + "learning_rate": 7.943789165921549e-06, + "loss": 0.2317, + "step": 33505 + }, + { + "epoch": 0.578967376278684, + "grad_norm": 0.7816471615761554, + "learning_rate": 7.943241478377814e-06, + "loss": 0.4479, + "step": 33506 + }, + { + "epoch": 0.5789846557920929, + "grad_norm": 1.0744256691010983, + "learning_rate": 7.942693797276006e-06, + "loss": 0.5294, + "step": 33507 + }, + { + "epoch": 0.5790019353055018, + "grad_norm": 1.4145464013263704, + "learning_rate": 7.942146122617833e-06, + "loss": 0.2615, + "step": 33508 + }, + { + "epoch": 0.5790192148189107, + "grad_norm": 1.060504904104258, + "learning_rate": 7.941598454405016e-06, + "loss": 0.6348, + "step": 33509 + }, + { + "epoch": 0.5790364943323196, + "grad_norm": 2.312578980236636, + "learning_rate": 7.941050792639267e-06, + "loss": 0.3184, + "step": 33510 + }, + { + "epoch": 0.5790537738457285, + "grad_norm": 1.0890217030300147, + "learning_rate": 7.940503137322306e-06, + "loss": 0.5152, + "step": 33511 + }, + { + "epoch": 0.5790710533591374, + "grad_norm": 1.3792066612893528, + "learning_rate": 7.939955488455844e-06, + "loss": 0.4788, + "step": 33512 + }, + { + "epoch": 0.5790883328725464, + "grad_norm": 0.7738068233881924, + "learning_rate": 7.939407846041602e-06, + "loss": 0.5534, + "step": 33513 + }, + { + "epoch": 0.5791056123859553, + "grad_norm": 0.9632059474938086, + "learning_rate": 7.938860210081288e-06, + "loss": 0.3552, + "step": 33514 + }, + { + "epoch": 0.5791228918993642, + "grad_norm": 1.1741215547517014, + "learning_rate": 7.93831258057662e-06, + "loss": 0.5279, + "step": 33515 + }, + { + "epoch": 0.579140171412773, + "grad_norm": 0.8814459079908933, + "learning_rate": 7.937764957529314e-06, + "loss": 0.3579, + "step": 33516 + }, + { + "epoch": 0.5791574509261819, + "grad_norm": 0.7212686124266197, + "learning_rate": 7.937217340941086e-06, + "loss": 0.3591, + "step": 33517 + }, + { + "epoch": 0.5791747304395908, + "grad_norm": 0.7745282361670617, + "learning_rate": 7.93666973081365e-06, + "loss": 0.5679, + "step": 33518 + }, + { + "epoch": 0.5791920099529997, + "grad_norm": 1.3635096219279266, + "learning_rate": 7.93612212714872e-06, + "loss": 0.4921, + "step": 33519 + }, + { + "epoch": 0.5792092894664086, + "grad_norm": 1.3029524152122993, + "learning_rate": 7.935574529948013e-06, + "loss": 0.2354, + "step": 33520 + }, + { + "epoch": 0.5792265689798175, + "grad_norm": 1.6308664684556857, + "learning_rate": 7.935026939213243e-06, + "loss": 0.3013, + "step": 33521 + }, + { + "epoch": 0.5792438484932264, + "grad_norm": 0.9300429113651455, + "learning_rate": 7.934479354946127e-06, + "loss": 0.397, + "step": 33522 + }, + { + "epoch": 0.5792611280066353, + "grad_norm": 1.0788616711732517, + "learning_rate": 7.933931777148376e-06, + "loss": 0.5156, + "step": 33523 + }, + { + "epoch": 0.5792784075200442, + "grad_norm": 0.9161295043832589, + "learning_rate": 7.933384205821712e-06, + "loss": 0.3911, + "step": 33524 + }, + { + "epoch": 0.5792956870334531, + "grad_norm": 1.144431391252133, + "learning_rate": 7.93283664096784e-06, + "loss": 0.433, + "step": 33525 + }, + { + "epoch": 0.579312966546862, + "grad_norm": 0.8099099313041632, + "learning_rate": 7.932289082588486e-06, + "loss": 0.21, + "step": 33526 + }, + { + "epoch": 0.579330246060271, + "grad_norm": 0.4609685615122662, + "learning_rate": 7.931741530685355e-06, + "loss": 0.4651, + "step": 33527 + }, + { + "epoch": 0.5793475255736799, + "grad_norm": 0.7021066633985484, + "learning_rate": 7.931193985260173e-06, + "loss": 0.3652, + "step": 33528 + }, + { + "epoch": 0.5793648050870888, + "grad_norm": 1.606968526299809, + "learning_rate": 7.930646446314646e-06, + "loss": 0.2668, + "step": 33529 + }, + { + "epoch": 0.5793820846004977, + "grad_norm": 0.947391331271752, + "learning_rate": 7.930098913850487e-06, + "loss": 0.4888, + "step": 33530 + }, + { + "epoch": 0.5793993641139066, + "grad_norm": 1.2982687167334406, + "learning_rate": 7.929551387869421e-06, + "loss": 0.4196, + "step": 33531 + }, + { + "epoch": 0.5794166436273155, + "grad_norm": 0.832480376421441, + "learning_rate": 7.929003868373153e-06, + "loss": 0.3028, + "step": 33532 + }, + { + "epoch": 0.5794339231407244, + "grad_norm": 1.0139627940420919, + "learning_rate": 7.928456355363408e-06, + "loss": 0.4375, + "step": 33533 + }, + { + "epoch": 0.5794512026541333, + "grad_norm": 1.0447594945457137, + "learning_rate": 7.92790884884189e-06, + "loss": 0.4747, + "step": 33534 + }, + { + "epoch": 0.5794684821675422, + "grad_norm": 1.1341571390538718, + "learning_rate": 7.92736134881032e-06, + "loss": 0.352, + "step": 33535 + }, + { + "epoch": 0.5794857616809511, + "grad_norm": 1.3363085355616178, + "learning_rate": 7.926813855270413e-06, + "loss": 0.4146, + "step": 33536 + }, + { + "epoch": 0.5795030411943599, + "grad_norm": 1.3496792809879847, + "learning_rate": 7.926266368223882e-06, + "loss": 0.6662, + "step": 33537 + }, + { + "epoch": 0.5795203207077688, + "grad_norm": 0.7932428563868713, + "learning_rate": 7.925718887672445e-06, + "loss": 0.3798, + "step": 33538 + }, + { + "epoch": 0.5795376002211777, + "grad_norm": 1.5583572925551534, + "learning_rate": 7.92517141361781e-06, + "loss": 0.5141, + "step": 33539 + }, + { + "epoch": 0.5795548797345866, + "grad_norm": 1.010802940916315, + "learning_rate": 7.924623946061698e-06, + "loss": 0.3744, + "step": 33540 + }, + { + "epoch": 0.5795721592479955, + "grad_norm": 0.5444455372533158, + "learning_rate": 7.92407648500582e-06, + "loss": 0.6364, + "step": 33541 + }, + { + "epoch": 0.5795894387614045, + "grad_norm": 1.0823200188070539, + "learning_rate": 7.923529030451894e-06, + "loss": 0.3831, + "step": 33542 + }, + { + "epoch": 0.5796067182748134, + "grad_norm": 0.6176285267526312, + "learning_rate": 7.92298158240163e-06, + "loss": 0.3029, + "step": 33543 + }, + { + "epoch": 0.5796239977882223, + "grad_norm": 0.8083743123006341, + "learning_rate": 7.92243414085675e-06, + "loss": 0.4787, + "step": 33544 + }, + { + "epoch": 0.5796412773016312, + "grad_norm": 0.6901196757301284, + "learning_rate": 7.92188670581896e-06, + "loss": 0.423, + "step": 33545 + }, + { + "epoch": 0.5796585568150401, + "grad_norm": 1.4649980997845202, + "learning_rate": 7.92133927728998e-06, + "loss": 0.3432, + "step": 33546 + }, + { + "epoch": 0.579675836328449, + "grad_norm": 1.061198581807151, + "learning_rate": 7.920791855271523e-06, + "loss": 0.1633, + "step": 33547 + }, + { + "epoch": 0.5796931158418579, + "grad_norm": 1.0121832690924726, + "learning_rate": 7.920244439765308e-06, + "loss": 0.4758, + "step": 33548 + }, + { + "epoch": 0.5797103953552668, + "grad_norm": 0.8192847077733435, + "learning_rate": 7.91969703077304e-06, + "loss": 0.483, + "step": 33549 + }, + { + "epoch": 0.5797276748686757, + "grad_norm": 1.0345145912340212, + "learning_rate": 7.91914962829644e-06, + "loss": 0.5233, + "step": 33550 + }, + { + "epoch": 0.5797449543820846, + "grad_norm": 0.8880198617199294, + "learning_rate": 7.918602232337225e-06, + "loss": 0.2704, + "step": 33551 + }, + { + "epoch": 0.5797622338954935, + "grad_norm": 0.6069899493489047, + "learning_rate": 7.918054842897101e-06, + "loss": 0.5532, + "step": 33552 + }, + { + "epoch": 0.5797795134089024, + "grad_norm": 0.8847046572047863, + "learning_rate": 7.917507459977794e-06, + "loss": 0.2802, + "step": 33553 + }, + { + "epoch": 0.5797967929223113, + "grad_norm": 1.2263035954921095, + "learning_rate": 7.916960083581006e-06, + "loss": 0.446, + "step": 33554 + }, + { + "epoch": 0.5798140724357203, + "grad_norm": 0.8125886582161573, + "learning_rate": 7.916412713708461e-06, + "loss": 0.3321, + "step": 33555 + }, + { + "epoch": 0.5798313519491292, + "grad_norm": 0.7179421591043169, + "learning_rate": 7.915865350361866e-06, + "loss": 0.3028, + "step": 33556 + }, + { + "epoch": 0.5798486314625381, + "grad_norm": 1.1188135646446502, + "learning_rate": 7.915317993542942e-06, + "loss": 0.3547, + "step": 33557 + }, + { + "epoch": 0.5798659109759469, + "grad_norm": 1.5781398352065839, + "learning_rate": 7.914770643253397e-06, + "loss": 0.3441, + "step": 33558 + }, + { + "epoch": 0.5798831904893558, + "grad_norm": 1.3284794945909262, + "learning_rate": 7.914223299494954e-06, + "loss": 0.4845, + "step": 33559 + }, + { + "epoch": 0.5799004700027647, + "grad_norm": 0.9175196586182977, + "learning_rate": 7.91367596226932e-06, + "loss": 0.3815, + "step": 33560 + }, + { + "epoch": 0.5799177495161736, + "grad_norm": 0.676831769386076, + "learning_rate": 7.91312863157821e-06, + "loss": 0.2985, + "step": 33561 + }, + { + "epoch": 0.5799350290295825, + "grad_norm": 1.0679574530854015, + "learning_rate": 7.912581307423343e-06, + "loss": 0.3818, + "step": 33562 + }, + { + "epoch": 0.5799523085429914, + "grad_norm": 0.5121135004408685, + "learning_rate": 7.912033989806426e-06, + "loss": 0.576, + "step": 33563 + }, + { + "epoch": 0.5799695880564003, + "grad_norm": 1.3245315901434571, + "learning_rate": 7.91148667872918e-06, + "loss": 0.4245, + "step": 33564 + }, + { + "epoch": 0.5799868675698092, + "grad_norm": 1.1469815969402235, + "learning_rate": 7.910939374193314e-06, + "loss": 0.3708, + "step": 33565 + }, + { + "epoch": 0.5800041470832181, + "grad_norm": 1.0270023598046527, + "learning_rate": 7.910392076200545e-06, + "loss": 0.4468, + "step": 33566 + }, + { + "epoch": 0.580021426596627, + "grad_norm": 1.101213845964076, + "learning_rate": 7.909844784752587e-06, + "loss": 0.5911, + "step": 33567 + }, + { + "epoch": 0.5800387061100359, + "grad_norm": 0.9200686080397361, + "learning_rate": 7.909297499851158e-06, + "loss": 0.5702, + "step": 33568 + }, + { + "epoch": 0.5800559856234448, + "grad_norm": 1.8708215058192839, + "learning_rate": 7.908750221497967e-06, + "loss": 0.4195, + "step": 33569 + }, + { + "epoch": 0.5800732651368538, + "grad_norm": 1.1424767967862777, + "learning_rate": 7.908202949694725e-06, + "loss": 0.4503, + "step": 33570 + }, + { + "epoch": 0.5800905446502627, + "grad_norm": 1.0177756266897715, + "learning_rate": 7.907655684443153e-06, + "loss": 0.4535, + "step": 33571 + }, + { + "epoch": 0.5801078241636716, + "grad_norm": 0.8408427147974578, + "learning_rate": 7.907108425744961e-06, + "loss": 0.4266, + "step": 33572 + }, + { + "epoch": 0.5801251036770805, + "grad_norm": 1.1009584748210939, + "learning_rate": 7.906561173601867e-06, + "loss": 0.3428, + "step": 33573 + }, + { + "epoch": 0.5801423831904894, + "grad_norm": 1.5245333729340698, + "learning_rate": 7.90601392801558e-06, + "loss": 0.3811, + "step": 33574 + }, + { + "epoch": 0.5801596627038983, + "grad_norm": 1.7958291819333625, + "learning_rate": 7.905466688987817e-06, + "loss": 0.4763, + "step": 33575 + }, + { + "epoch": 0.5801769422173072, + "grad_norm": 0.8036756869306192, + "learning_rate": 7.90491945652029e-06, + "loss": 0.2958, + "step": 33576 + }, + { + "epoch": 0.5801942217307161, + "grad_norm": 0.7975662333101636, + "learning_rate": 7.904372230614717e-06, + "loss": 0.5636, + "step": 33577 + }, + { + "epoch": 0.580211501244125, + "grad_norm": 1.584114534207597, + "learning_rate": 7.903825011272808e-06, + "loss": 0.4449, + "step": 33578 + }, + { + "epoch": 0.5802287807575338, + "grad_norm": 0.8042687875209381, + "learning_rate": 7.903277798496282e-06, + "loss": 0.3376, + "step": 33579 + }, + { + "epoch": 0.5802460602709427, + "grad_norm": 0.9177446069614184, + "learning_rate": 7.902730592286847e-06, + "loss": 0.5079, + "step": 33580 + }, + { + "epoch": 0.5802633397843516, + "grad_norm": 1.3744273227887387, + "learning_rate": 7.902183392646218e-06, + "loss": 0.4136, + "step": 33581 + }, + { + "epoch": 0.5802806192977605, + "grad_norm": 0.9979691256949931, + "learning_rate": 7.901636199576113e-06, + "loss": 0.4472, + "step": 33582 + }, + { + "epoch": 0.5802978988111694, + "grad_norm": 0.9760961642794664, + "learning_rate": 7.901089013078237e-06, + "loss": 0.6723, + "step": 33583 + }, + { + "epoch": 0.5803151783245784, + "grad_norm": 0.9428660016660709, + "learning_rate": 7.900541833154316e-06, + "loss": 0.3705, + "step": 33584 + }, + { + "epoch": 0.5803324578379873, + "grad_norm": 0.9691182901696682, + "learning_rate": 7.899994659806053e-06, + "loss": 0.3307, + "step": 33585 + }, + { + "epoch": 0.5803497373513962, + "grad_norm": 0.5719496544264799, + "learning_rate": 7.89944749303517e-06, + "loss": 0.6806, + "step": 33586 + }, + { + "epoch": 0.5803670168648051, + "grad_norm": 1.4033131434449768, + "learning_rate": 7.898900332843372e-06, + "loss": 0.5642, + "step": 33587 + }, + { + "epoch": 0.580384296378214, + "grad_norm": 0.8618496121399677, + "learning_rate": 7.898353179232384e-06, + "loss": 0.3826, + "step": 33588 + }, + { + "epoch": 0.5804015758916229, + "grad_norm": 0.7698867998771616, + "learning_rate": 7.897806032203907e-06, + "loss": 0.3144, + "step": 33589 + }, + { + "epoch": 0.5804188554050318, + "grad_norm": 1.1427279952286897, + "learning_rate": 7.897258891759668e-06, + "loss": 0.3684, + "step": 33590 + }, + { + "epoch": 0.5804361349184407, + "grad_norm": 1.048587799460348, + "learning_rate": 7.896711757901371e-06, + "loss": 0.3908, + "step": 33591 + }, + { + "epoch": 0.5804534144318496, + "grad_norm": 2.1562344715577373, + "learning_rate": 7.896164630630731e-06, + "loss": 0.3239, + "step": 33592 + }, + { + "epoch": 0.5804706939452585, + "grad_norm": 0.7167680698121216, + "learning_rate": 7.895617509949468e-06, + "loss": 0.5704, + "step": 33593 + }, + { + "epoch": 0.5804879734586674, + "grad_norm": 0.9287820539244939, + "learning_rate": 7.895070395859286e-06, + "loss": 0.4237, + "step": 33594 + }, + { + "epoch": 0.5805052529720763, + "grad_norm": 0.45927427988410485, + "learning_rate": 7.894523288361906e-06, + "loss": 0.5489, + "step": 33595 + }, + { + "epoch": 0.5805225324854852, + "grad_norm": 1.1848501319400302, + "learning_rate": 7.893976187459035e-06, + "loss": 0.5353, + "step": 33596 + }, + { + "epoch": 0.5805398119988942, + "grad_norm": 0.8672633857730448, + "learning_rate": 7.893429093152397e-06, + "loss": 0.3729, + "step": 33597 + }, + { + "epoch": 0.5805570915123031, + "grad_norm": 1.0346652863658594, + "learning_rate": 7.892882005443692e-06, + "loss": 0.421, + "step": 33598 + }, + { + "epoch": 0.580574371025712, + "grad_norm": 0.9873873850049508, + "learning_rate": 7.892334924334648e-06, + "loss": 0.3058, + "step": 33599 + }, + { + "epoch": 0.5805916505391209, + "grad_norm": 1.269079012650507, + "learning_rate": 7.891787849826969e-06, + "loss": 0.4303, + "step": 33600 + }, + { + "epoch": 0.5806089300525297, + "grad_norm": 1.5491354886945796, + "learning_rate": 7.891240781922368e-06, + "loss": 0.4776, + "step": 33601 + }, + { + "epoch": 0.5806262095659386, + "grad_norm": 1.2453684508329557, + "learning_rate": 7.890693720622565e-06, + "loss": 0.3358, + "step": 33602 + }, + { + "epoch": 0.5806434890793475, + "grad_norm": 1.670199381784583, + "learning_rate": 7.890146665929263e-06, + "loss": 0.5385, + "step": 33603 + }, + { + "epoch": 0.5806607685927564, + "grad_norm": 1.084178391142517, + "learning_rate": 7.88959961784419e-06, + "loss": 0.4889, + "step": 33604 + }, + { + "epoch": 0.5806780481061653, + "grad_norm": 0.9439976361275588, + "learning_rate": 7.889052576369047e-06, + "loss": 0.4828, + "step": 33605 + }, + { + "epoch": 0.5806953276195742, + "grad_norm": 1.1773754828262026, + "learning_rate": 7.888505541505552e-06, + "loss": 0.6215, + "step": 33606 + }, + { + "epoch": 0.5807126071329831, + "grad_norm": 0.9709410622673894, + "learning_rate": 7.887958513255416e-06, + "loss": 0.4883, + "step": 33607 + }, + { + "epoch": 0.580729886646392, + "grad_norm": 1.0879839297678169, + "learning_rate": 7.88741149162036e-06, + "loss": 0.351, + "step": 33608 + }, + { + "epoch": 0.5807471661598009, + "grad_norm": 0.7378914994182075, + "learning_rate": 7.886864476602086e-06, + "loss": 0.7335, + "step": 33609 + }, + { + "epoch": 0.5807644456732098, + "grad_norm": 0.8875294392755716, + "learning_rate": 7.886317468202318e-06, + "loss": 0.4375, + "step": 33610 + }, + { + "epoch": 0.5807817251866187, + "grad_norm": 0.5275227094497178, + "learning_rate": 7.885770466422763e-06, + "loss": 0.4726, + "step": 33611 + }, + { + "epoch": 0.5807990047000277, + "grad_norm": 1.2231416130645547, + "learning_rate": 7.885223471265134e-06, + "loss": 0.3771, + "step": 33612 + }, + { + "epoch": 0.5808162842134366, + "grad_norm": 1.2419130464134664, + "learning_rate": 7.884676482731148e-06, + "loss": 0.4064, + "step": 33613 + }, + { + "epoch": 0.5808335637268455, + "grad_norm": 1.2465382650742616, + "learning_rate": 7.884129500822513e-06, + "loss": 0.3841, + "step": 33614 + }, + { + "epoch": 0.5808508432402544, + "grad_norm": 1.228915491381441, + "learning_rate": 7.883582525540948e-06, + "loss": 0.5846, + "step": 33615 + }, + { + "epoch": 0.5808681227536633, + "grad_norm": 0.9112229738093763, + "learning_rate": 7.88303555688816e-06, + "loss": 0.3702, + "step": 33616 + }, + { + "epoch": 0.5808854022670722, + "grad_norm": 1.2865538288687761, + "learning_rate": 7.882488594865869e-06, + "loss": 0.3722, + "step": 33617 + }, + { + "epoch": 0.5809026817804811, + "grad_norm": 1.126681965990796, + "learning_rate": 7.88194163947578e-06, + "loss": 0.4479, + "step": 33618 + }, + { + "epoch": 0.58091996129389, + "grad_norm": 1.073988633139544, + "learning_rate": 7.881394690719618e-06, + "loss": 0.4436, + "step": 33619 + }, + { + "epoch": 0.5809372408072989, + "grad_norm": 1.5114926517921194, + "learning_rate": 7.880847748599085e-06, + "loss": 0.326, + "step": 33620 + }, + { + "epoch": 0.5809545203207078, + "grad_norm": 1.6704656552477946, + "learning_rate": 7.880300813115896e-06, + "loss": 0.5039, + "step": 33621 + }, + { + "epoch": 0.5809717998341166, + "grad_norm": 0.8788656850672627, + "learning_rate": 7.87975388427177e-06, + "loss": 0.409, + "step": 33622 + }, + { + "epoch": 0.5809890793475255, + "grad_norm": 0.7137766353048624, + "learning_rate": 7.879206962068411e-06, + "loss": 0.3305, + "step": 33623 + }, + { + "epoch": 0.5810063588609344, + "grad_norm": 1.2576345111209974, + "learning_rate": 7.878660046507542e-06, + "loss": 0.3488, + "step": 33624 + }, + { + "epoch": 0.5810236383743433, + "grad_norm": 0.8163077740677795, + "learning_rate": 7.878113137590865e-06, + "loss": 0.4028, + "step": 33625 + }, + { + "epoch": 0.5810409178877523, + "grad_norm": 1.117885897734531, + "learning_rate": 7.877566235320104e-06, + "loss": 0.2921, + "step": 33626 + }, + { + "epoch": 0.5810581974011612, + "grad_norm": 0.8997672040608491, + "learning_rate": 7.877019339696964e-06, + "loss": 0.4283, + "step": 33627 + }, + { + "epoch": 0.5810754769145701, + "grad_norm": 1.037471025321566, + "learning_rate": 7.876472450723165e-06, + "loss": 0.3758, + "step": 33628 + }, + { + "epoch": 0.581092756427979, + "grad_norm": 1.2247795012480265, + "learning_rate": 7.87592556840041e-06, + "loss": 0.4964, + "step": 33629 + }, + { + "epoch": 0.5811100359413879, + "grad_norm": 1.296108218290822, + "learning_rate": 7.875378692730421e-06, + "loss": 0.3415, + "step": 33630 + }, + { + "epoch": 0.5811273154547968, + "grad_norm": 1.2399453985296949, + "learning_rate": 7.874831823714907e-06, + "loss": 0.6588, + "step": 33631 + }, + { + "epoch": 0.5811445949682057, + "grad_norm": 0.9214036747674444, + "learning_rate": 7.874284961355581e-06, + "loss": 0.3477, + "step": 33632 + }, + { + "epoch": 0.5811618744816146, + "grad_norm": 0.6559577465266332, + "learning_rate": 7.873738105654158e-06, + "loss": 0.2678, + "step": 33633 + }, + { + "epoch": 0.5811791539950235, + "grad_norm": 0.8444079429028422, + "learning_rate": 7.873191256612346e-06, + "loss": 0.2493, + "step": 33634 + }, + { + "epoch": 0.5811964335084324, + "grad_norm": 1.3762629178420576, + "learning_rate": 7.872644414231861e-06, + "loss": 0.3779, + "step": 33635 + }, + { + "epoch": 0.5812137130218413, + "grad_norm": 1.1474116806256873, + "learning_rate": 7.872097578514415e-06, + "loss": 0.3293, + "step": 33636 + }, + { + "epoch": 0.5812309925352502, + "grad_norm": 1.1763186610164338, + "learning_rate": 7.871550749461722e-06, + "loss": 0.4803, + "step": 33637 + }, + { + "epoch": 0.5812482720486591, + "grad_norm": 0.8909696133306744, + "learning_rate": 7.871003927075492e-06, + "loss": 0.4092, + "step": 33638 + }, + { + "epoch": 0.581265551562068, + "grad_norm": 1.1041687812531233, + "learning_rate": 7.870457111357444e-06, + "loss": 0.5387, + "step": 33639 + }, + { + "epoch": 0.581282831075477, + "grad_norm": 0.9272676852305313, + "learning_rate": 7.869910302309282e-06, + "loss": 0.4961, + "step": 33640 + }, + { + "epoch": 0.5813001105888859, + "grad_norm": 0.903254906917272, + "learning_rate": 7.869363499932725e-06, + "loss": 0.488, + "step": 33641 + }, + { + "epoch": 0.5813173901022948, + "grad_norm": 0.9651421235391666, + "learning_rate": 7.868816704229484e-06, + "loss": 0.3038, + "step": 33642 + }, + { + "epoch": 0.5813346696157036, + "grad_norm": 1.366043596875738, + "learning_rate": 7.868269915201267e-06, + "loss": 0.4611, + "step": 33643 + }, + { + "epoch": 0.5813519491291125, + "grad_norm": 1.1679864880762458, + "learning_rate": 7.867723132849797e-06, + "loss": 0.3635, + "step": 33644 + }, + { + "epoch": 0.5813692286425214, + "grad_norm": 0.8543909326703191, + "learning_rate": 7.867176357176774e-06, + "loss": 0.5636, + "step": 33645 + }, + { + "epoch": 0.5813865081559303, + "grad_norm": 0.7659038577994445, + "learning_rate": 7.86662958818392e-06, + "loss": 0.36, + "step": 33646 + }, + { + "epoch": 0.5814037876693392, + "grad_norm": 1.0260044238190609, + "learning_rate": 7.866082825872943e-06, + "loss": 0.2771, + "step": 33647 + }, + { + "epoch": 0.5814210671827481, + "grad_norm": 0.7938388740380927, + "learning_rate": 7.865536070245558e-06, + "loss": 0.3843, + "step": 33648 + }, + { + "epoch": 0.581438346696157, + "grad_norm": 0.9683779814529166, + "learning_rate": 7.864989321303473e-06, + "loss": 0.3344, + "step": 33649 + }, + { + "epoch": 0.5814556262095659, + "grad_norm": 1.669785486905326, + "learning_rate": 7.86444257904841e-06, + "loss": 0.4005, + "step": 33650 + }, + { + "epoch": 0.5814729057229748, + "grad_norm": 1.0023021813905126, + "learning_rate": 7.86389584348207e-06, + "loss": 0.3612, + "step": 33651 + }, + { + "epoch": 0.5814901852363837, + "grad_norm": 1.131012397435029, + "learning_rate": 7.863349114606172e-06, + "loss": 0.5353, + "step": 33652 + }, + { + "epoch": 0.5815074647497926, + "grad_norm": 0.7715953162855979, + "learning_rate": 7.862802392422429e-06, + "loss": 0.2443, + "step": 33653 + }, + { + "epoch": 0.5815247442632016, + "grad_norm": 1.0147130514097293, + "learning_rate": 7.862255676932548e-06, + "loss": 0.4113, + "step": 33654 + }, + { + "epoch": 0.5815420237766105, + "grad_norm": 1.0166968349586991, + "learning_rate": 7.861708968138247e-06, + "loss": 0.3653, + "step": 33655 + }, + { + "epoch": 0.5815593032900194, + "grad_norm": 1.005255788315827, + "learning_rate": 7.861162266041232e-06, + "loss": 0.5951, + "step": 33656 + }, + { + "epoch": 0.5815765828034283, + "grad_norm": 1.3736645281772497, + "learning_rate": 7.860615570643223e-06, + "loss": 0.3564, + "step": 33657 + }, + { + "epoch": 0.5815938623168372, + "grad_norm": 1.0440350190429766, + "learning_rate": 7.860068881945926e-06, + "loss": 0.3339, + "step": 33658 + }, + { + "epoch": 0.5816111418302461, + "grad_norm": 1.4616480515940706, + "learning_rate": 7.85952219995106e-06, + "loss": 0.4926, + "step": 33659 + }, + { + "epoch": 0.581628421343655, + "grad_norm": 1.0314770601018382, + "learning_rate": 7.85897552466033e-06, + "loss": 0.7928, + "step": 33660 + }, + { + "epoch": 0.5816457008570639, + "grad_norm": 1.0161339581849433, + "learning_rate": 7.858428856075453e-06, + "loss": 0.404, + "step": 33661 + }, + { + "epoch": 0.5816629803704728, + "grad_norm": 0.8204929851888875, + "learning_rate": 7.857882194198142e-06, + "loss": 0.4911, + "step": 33662 + }, + { + "epoch": 0.5816802598838817, + "grad_norm": 0.6080033246940556, + "learning_rate": 7.857335539030098e-06, + "loss": 0.2825, + "step": 33663 + }, + { + "epoch": 0.5816975393972905, + "grad_norm": 1.1146844600273538, + "learning_rate": 7.856788890573051e-06, + "loss": 0.4743, + "step": 33664 + }, + { + "epoch": 0.5817148189106994, + "grad_norm": 0.610666628871143, + "learning_rate": 7.856242248828699e-06, + "loss": 0.6789, + "step": 33665 + }, + { + "epoch": 0.5817320984241083, + "grad_norm": 1.3109382921553752, + "learning_rate": 7.855695613798762e-06, + "loss": 0.3411, + "step": 33666 + }, + { + "epoch": 0.5817493779375172, + "grad_norm": 1.0900839470284875, + "learning_rate": 7.855148985484946e-06, + "loss": 0.396, + "step": 33667 + }, + { + "epoch": 0.5817666574509262, + "grad_norm": 1.602167887963286, + "learning_rate": 7.85460236388897e-06, + "loss": 0.3779, + "step": 33668 + }, + { + "epoch": 0.5817839369643351, + "grad_norm": 1.057175505141687, + "learning_rate": 7.854055749012538e-06, + "loss": 0.4198, + "step": 33669 + }, + { + "epoch": 0.581801216477744, + "grad_norm": 0.8216314292027893, + "learning_rate": 7.853509140857373e-06, + "loss": 0.6745, + "step": 33670 + }, + { + "epoch": 0.5818184959911529, + "grad_norm": 0.8639190404811498, + "learning_rate": 7.852962539425176e-06, + "loss": 0.3317, + "step": 33671 + }, + { + "epoch": 0.5818357755045618, + "grad_norm": 1.1436424335399544, + "learning_rate": 7.852415944717665e-06, + "loss": 0.4311, + "step": 33672 + }, + { + "epoch": 0.5818530550179707, + "grad_norm": 1.4538721826870198, + "learning_rate": 7.85186935673655e-06, + "loss": 0.3297, + "step": 33673 + }, + { + "epoch": 0.5818703345313796, + "grad_norm": 0.912635089212856, + "learning_rate": 7.851322775483543e-06, + "loss": 0.4683, + "step": 33674 + }, + { + "epoch": 0.5818876140447885, + "grad_norm": 1.2750599345305975, + "learning_rate": 7.850776200960358e-06, + "loss": 0.4005, + "step": 33675 + }, + { + "epoch": 0.5819048935581974, + "grad_norm": 1.1437247325186568, + "learning_rate": 7.850229633168701e-06, + "loss": 0.504, + "step": 33676 + }, + { + "epoch": 0.5819221730716063, + "grad_norm": 1.3434889076008272, + "learning_rate": 7.849683072110292e-06, + "loss": 0.3329, + "step": 33677 + }, + { + "epoch": 0.5819394525850152, + "grad_norm": 0.8275963828104126, + "learning_rate": 7.849136517786835e-06, + "loss": 0.5299, + "step": 33678 + }, + { + "epoch": 0.5819567320984241, + "grad_norm": 1.2214096402253578, + "learning_rate": 7.848589970200053e-06, + "loss": 0.2182, + "step": 33679 + }, + { + "epoch": 0.581974011611833, + "grad_norm": 0.6021398332059101, + "learning_rate": 7.848043429351646e-06, + "loss": 0.7454, + "step": 33680 + }, + { + "epoch": 0.581991291125242, + "grad_norm": 1.4401251325946849, + "learning_rate": 7.847496895243331e-06, + "loss": 0.511, + "step": 33681 + }, + { + "epoch": 0.5820085706386509, + "grad_norm": 1.5435700809196702, + "learning_rate": 7.846950367876822e-06, + "loss": 0.3918, + "step": 33682 + }, + { + "epoch": 0.5820258501520598, + "grad_norm": 1.6287071371066921, + "learning_rate": 7.846403847253823e-06, + "loss": 0.5459, + "step": 33683 + }, + { + "epoch": 0.5820431296654687, + "grad_norm": 0.929304970524183, + "learning_rate": 7.845857333376055e-06, + "loss": 0.7118, + "step": 33684 + }, + { + "epoch": 0.5820604091788775, + "grad_norm": 1.1855086070656666, + "learning_rate": 7.845310826245222e-06, + "loss": 0.4306, + "step": 33685 + }, + { + "epoch": 0.5820776886922864, + "grad_norm": 0.8012022007238969, + "learning_rate": 7.844764325863043e-06, + "loss": 0.3697, + "step": 33686 + }, + { + "epoch": 0.5820949682056953, + "grad_norm": 1.3172238943791148, + "learning_rate": 7.844217832231223e-06, + "loss": 0.4236, + "step": 33687 + }, + { + "epoch": 0.5821122477191042, + "grad_norm": 1.6740138452357574, + "learning_rate": 7.84367134535148e-06, + "loss": 0.2993, + "step": 33688 + }, + { + "epoch": 0.5821295272325131, + "grad_norm": 1.0367390066128381, + "learning_rate": 7.843124865225516e-06, + "loss": 0.4696, + "step": 33689 + }, + { + "epoch": 0.582146806745922, + "grad_norm": 1.1772610993600907, + "learning_rate": 7.842578391855056e-06, + "loss": 0.3172, + "step": 33690 + }, + { + "epoch": 0.5821640862593309, + "grad_norm": 0.8357261891564811, + "learning_rate": 7.842031925241798e-06, + "loss": 0.4115, + "step": 33691 + }, + { + "epoch": 0.5821813657727398, + "grad_norm": 0.9926401786309341, + "learning_rate": 7.841485465387464e-06, + "loss": 0.4668, + "step": 33692 + }, + { + "epoch": 0.5821986452861487, + "grad_norm": 0.9431888752827029, + "learning_rate": 7.840939012293765e-06, + "loss": 0.2754, + "step": 33693 + }, + { + "epoch": 0.5822159247995576, + "grad_norm": 0.9774954139842112, + "learning_rate": 7.840392565962403e-06, + "loss": 0.4737, + "step": 33694 + }, + { + "epoch": 0.5822332043129665, + "grad_norm": 1.2767133858799695, + "learning_rate": 7.839846126395098e-06, + "loss": 0.3809, + "step": 33695 + }, + { + "epoch": 0.5822504838263755, + "grad_norm": 1.0536484496635579, + "learning_rate": 7.839299693593556e-06, + "loss": 0.4227, + "step": 33696 + }, + { + "epoch": 0.5822677633397844, + "grad_norm": 1.519154005887462, + "learning_rate": 7.838753267559495e-06, + "loss": 0.5325, + "step": 33697 + }, + { + "epoch": 0.5822850428531933, + "grad_norm": 1.2165305315408532, + "learning_rate": 7.83820684829462e-06, + "loss": 0.4788, + "step": 33698 + }, + { + "epoch": 0.5823023223666022, + "grad_norm": 1.019379154146131, + "learning_rate": 7.837660435800652e-06, + "loss": 0.3648, + "step": 33699 + }, + { + "epoch": 0.5823196018800111, + "grad_norm": 1.52931821411943, + "learning_rate": 7.837114030079288e-06, + "loss": 0.4328, + "step": 33700 + }, + { + "epoch": 0.58233688139342, + "grad_norm": 0.9260157371576996, + "learning_rate": 7.836567631132252e-06, + "loss": 0.3391, + "step": 33701 + }, + { + "epoch": 0.5823541609068289, + "grad_norm": 0.9601043976611184, + "learning_rate": 7.836021238961247e-06, + "loss": 0.4037, + "step": 33702 + }, + { + "epoch": 0.5823714404202378, + "grad_norm": 1.01015572829646, + "learning_rate": 7.835474853567991e-06, + "loss": 0.2674, + "step": 33703 + }, + { + "epoch": 0.5823887199336467, + "grad_norm": 0.8971800703339562, + "learning_rate": 7.834928474954196e-06, + "loss": 0.3959, + "step": 33704 + }, + { + "epoch": 0.5824059994470556, + "grad_norm": 0.7077951807301104, + "learning_rate": 7.834382103121564e-06, + "loss": 0.2456, + "step": 33705 + }, + { + "epoch": 0.5824232789604644, + "grad_norm": 1.0142323244475697, + "learning_rate": 7.833835738071812e-06, + "loss": 0.2386, + "step": 33706 + }, + { + "epoch": 0.5824405584738733, + "grad_norm": 0.8849629088025528, + "learning_rate": 7.83328937980665e-06, + "loss": 0.2928, + "step": 33707 + }, + { + "epoch": 0.5824578379872822, + "grad_norm": 1.1540756079678849, + "learning_rate": 7.832743028327795e-06, + "loss": 0.2135, + "step": 33708 + }, + { + "epoch": 0.5824751175006911, + "grad_norm": 1.113471168385008, + "learning_rate": 7.832196683636948e-06, + "loss": 0.4917, + "step": 33709 + }, + { + "epoch": 0.5824923970141, + "grad_norm": 1.411033881820422, + "learning_rate": 7.83165034573583e-06, + "loss": 0.3579, + "step": 33710 + }, + { + "epoch": 0.582509676527509, + "grad_norm": 1.7431359807301587, + "learning_rate": 7.831104014626146e-06, + "loss": 0.3336, + "step": 33711 + }, + { + "epoch": 0.5825269560409179, + "grad_norm": 1.2299040850528893, + "learning_rate": 7.830557690309609e-06, + "loss": 0.5909, + "step": 33712 + }, + { + "epoch": 0.5825442355543268, + "grad_norm": 0.8403537186999955, + "learning_rate": 7.830011372787933e-06, + "loss": 0.4857, + "step": 33713 + }, + { + "epoch": 0.5825615150677357, + "grad_norm": 1.3816755549070432, + "learning_rate": 7.829465062062823e-06, + "loss": 0.4834, + "step": 33714 + }, + { + "epoch": 0.5825787945811446, + "grad_norm": 1.743967187890659, + "learning_rate": 7.828918758135996e-06, + "loss": 0.5778, + "step": 33715 + }, + { + "epoch": 0.5825960740945535, + "grad_norm": 1.2871963838830662, + "learning_rate": 7.828372461009157e-06, + "loss": 0.3952, + "step": 33716 + }, + { + "epoch": 0.5826133536079624, + "grad_norm": 1.4621115548630323, + "learning_rate": 7.827826170684021e-06, + "loss": 0.5539, + "step": 33717 + }, + { + "epoch": 0.5826306331213713, + "grad_norm": 0.8578896604596937, + "learning_rate": 7.8272798871623e-06, + "loss": 0.3621, + "step": 33718 + }, + { + "epoch": 0.5826479126347802, + "grad_norm": 1.0386801375445618, + "learning_rate": 7.826733610445706e-06, + "loss": 0.3663, + "step": 33719 + }, + { + "epoch": 0.5826651921481891, + "grad_norm": 1.0988808152862959, + "learning_rate": 7.826187340535943e-06, + "loss": 0.2501, + "step": 33720 + }, + { + "epoch": 0.582682471661598, + "grad_norm": 1.399951065247891, + "learning_rate": 7.82564107743473e-06, + "loss": 0.5336, + "step": 33721 + }, + { + "epoch": 0.5826997511750069, + "grad_norm": 1.2758352016013794, + "learning_rate": 7.825094821143769e-06, + "loss": 0.4564, + "step": 33722 + }, + { + "epoch": 0.5827170306884158, + "grad_norm": 0.9604505411065773, + "learning_rate": 7.824548571664781e-06, + "loss": 0.438, + "step": 33723 + }, + { + "epoch": 0.5827343102018248, + "grad_norm": 0.9641388418737635, + "learning_rate": 7.824002328999474e-06, + "loss": 0.4955, + "step": 33724 + }, + { + "epoch": 0.5827515897152337, + "grad_norm": 1.0966045834321334, + "learning_rate": 7.823456093149553e-06, + "loss": 0.5226, + "step": 33725 + }, + { + "epoch": 0.5827688692286426, + "grad_norm": 1.4471346598369905, + "learning_rate": 7.822909864116735e-06, + "loss": 0.488, + "step": 33726 + }, + { + "epoch": 0.5827861487420514, + "grad_norm": 0.648168044645995, + "learning_rate": 7.822363641902727e-06, + "loss": 0.6293, + "step": 33727 + }, + { + "epoch": 0.5828034282554603, + "grad_norm": 1.1660675365236486, + "learning_rate": 7.821817426509244e-06, + "loss": 0.3824, + "step": 33728 + }, + { + "epoch": 0.5828207077688692, + "grad_norm": 1.2205829651461417, + "learning_rate": 7.821271217937989e-06, + "loss": 0.46, + "step": 33729 + }, + { + "epoch": 0.5828379872822781, + "grad_norm": 1.2233896142452776, + "learning_rate": 7.820725016190684e-06, + "loss": 0.369, + "step": 33730 + }, + { + "epoch": 0.582855266795687, + "grad_norm": 0.6340403172198043, + "learning_rate": 7.82017882126903e-06, + "loss": 0.3281, + "step": 33731 + }, + { + "epoch": 0.5828725463090959, + "grad_norm": 0.7695712752130829, + "learning_rate": 7.819632633174746e-06, + "loss": 0.3105, + "step": 33732 + }, + { + "epoch": 0.5828898258225048, + "grad_norm": 1.02247035490966, + "learning_rate": 7.819086451909534e-06, + "loss": 0.4259, + "step": 33733 + }, + { + "epoch": 0.5829071053359137, + "grad_norm": 1.0685034159497666, + "learning_rate": 7.818540277475113e-06, + "loss": 0.3355, + "step": 33734 + }, + { + "epoch": 0.5829243848493226, + "grad_norm": 1.3437965204509201, + "learning_rate": 7.817994109873188e-06, + "loss": 0.4395, + "step": 33735 + }, + { + "epoch": 0.5829416643627315, + "grad_norm": 1.6067213293425486, + "learning_rate": 7.817447949105468e-06, + "loss": 0.3765, + "step": 33736 + }, + { + "epoch": 0.5829589438761404, + "grad_norm": 1.0922003600655867, + "learning_rate": 7.816901795173671e-06, + "loss": 0.3632, + "step": 33737 + }, + { + "epoch": 0.5829762233895494, + "grad_norm": 1.2402513995026394, + "learning_rate": 7.816355648079501e-06, + "loss": 0.4263, + "step": 33738 + }, + { + "epoch": 0.5829935029029583, + "grad_norm": 1.1666519105616107, + "learning_rate": 7.815809507824673e-06, + "loss": 0.2621, + "step": 33739 + }, + { + "epoch": 0.5830107824163672, + "grad_norm": 1.1399595131386442, + "learning_rate": 7.815263374410894e-06, + "loss": 0.3578, + "step": 33740 + }, + { + "epoch": 0.5830280619297761, + "grad_norm": 0.5702458376076726, + "learning_rate": 7.814717247839878e-06, + "loss": 0.624, + "step": 33741 + }, + { + "epoch": 0.583045341443185, + "grad_norm": 1.0045855573331481, + "learning_rate": 7.81417112811333e-06, + "loss": 0.4051, + "step": 33742 + }, + { + "epoch": 0.5830626209565939, + "grad_norm": 0.9053328854888915, + "learning_rate": 7.813625015232969e-06, + "loss": 0.3153, + "step": 33743 + }, + { + "epoch": 0.5830799004700028, + "grad_norm": 0.5609925039198628, + "learning_rate": 7.8130789092005e-06, + "loss": 0.6621, + "step": 33744 + }, + { + "epoch": 0.5830971799834117, + "grad_norm": 0.7894816366770203, + "learning_rate": 7.81253281001763e-06, + "loss": 0.3931, + "step": 33745 + }, + { + "epoch": 0.5831144594968206, + "grad_norm": 0.561988897063238, + "learning_rate": 7.811986717686077e-06, + "loss": 0.448, + "step": 33746 + }, + { + "epoch": 0.5831317390102295, + "grad_norm": 0.46592090298599387, + "learning_rate": 7.811440632207545e-06, + "loss": 0.4395, + "step": 33747 + }, + { + "epoch": 0.5831490185236384, + "grad_norm": 0.9795082623340717, + "learning_rate": 7.810894553583751e-06, + "loss": 0.3238, + "step": 33748 + }, + { + "epoch": 0.5831662980370472, + "grad_norm": 1.0337075327657097, + "learning_rate": 7.810348481816396e-06, + "loss": 0.418, + "step": 33749 + }, + { + "epoch": 0.5831835775504561, + "grad_norm": 0.8765535738750871, + "learning_rate": 7.809802416907202e-06, + "loss": 0.3106, + "step": 33750 + }, + { + "epoch": 0.583200857063865, + "grad_norm": 0.7268694864924844, + "learning_rate": 7.80925635885787e-06, + "loss": 0.2987, + "step": 33751 + }, + { + "epoch": 0.583218136577274, + "grad_norm": 0.8782940879162557, + "learning_rate": 7.808710307670116e-06, + "loss": 0.309, + "step": 33752 + }, + { + "epoch": 0.5832354160906829, + "grad_norm": 1.1458920037015758, + "learning_rate": 7.808164263345644e-06, + "loss": 0.382, + "step": 33753 + }, + { + "epoch": 0.5832526956040918, + "grad_norm": 0.8124942194033765, + "learning_rate": 7.807618225886173e-06, + "loss": 0.5594, + "step": 33754 + }, + { + "epoch": 0.5832699751175007, + "grad_norm": 0.42799699932429963, + "learning_rate": 7.807072195293406e-06, + "loss": 0.7072, + "step": 33755 + }, + { + "epoch": 0.5832872546309096, + "grad_norm": 1.013883860166173, + "learning_rate": 7.806526171569053e-06, + "loss": 0.4005, + "step": 33756 + }, + { + "epoch": 0.5833045341443185, + "grad_norm": 0.8064192051187772, + "learning_rate": 7.80598015471483e-06, + "loss": 0.3068, + "step": 33757 + }, + { + "epoch": 0.5833218136577274, + "grad_norm": 0.9453584977322383, + "learning_rate": 7.805434144732441e-06, + "loss": 0.2558, + "step": 33758 + }, + { + "epoch": 0.5833390931711363, + "grad_norm": 1.542973810301246, + "learning_rate": 7.804888141623603e-06, + "loss": 0.5702, + "step": 33759 + }, + { + "epoch": 0.5833563726845452, + "grad_norm": 0.9127375614962545, + "learning_rate": 7.80434214539002e-06, + "loss": 0.3556, + "step": 33760 + }, + { + "epoch": 0.5833736521979541, + "grad_norm": 1.27327384839474, + "learning_rate": 7.803796156033403e-06, + "loss": 0.5179, + "step": 33761 + }, + { + "epoch": 0.583390931711363, + "grad_norm": 1.0477545408576907, + "learning_rate": 7.803250173555464e-06, + "loss": 0.3353, + "step": 33762 + }, + { + "epoch": 0.5834082112247719, + "grad_norm": 1.1244839319905102, + "learning_rate": 7.802704197957914e-06, + "loss": 0.3459, + "step": 33763 + }, + { + "epoch": 0.5834254907381808, + "grad_norm": 0.7924344968809909, + "learning_rate": 7.802158229242459e-06, + "loss": 0.3599, + "step": 33764 + }, + { + "epoch": 0.5834427702515897, + "grad_norm": 0.978601768470141, + "learning_rate": 7.801612267410815e-06, + "loss": 0.3967, + "step": 33765 + }, + { + "epoch": 0.5834600497649987, + "grad_norm": 0.5835383080321199, + "learning_rate": 7.801066312464687e-06, + "loss": 0.8386, + "step": 33766 + }, + { + "epoch": 0.5834773292784076, + "grad_norm": 1.0740601560125405, + "learning_rate": 7.800520364405784e-06, + "loss": 0.4323, + "step": 33767 + }, + { + "epoch": 0.5834946087918165, + "grad_norm": 0.9472608465130815, + "learning_rate": 7.799974423235823e-06, + "loss": 0.4938, + "step": 33768 + }, + { + "epoch": 0.5835118883052254, + "grad_norm": 1.1508991928654082, + "learning_rate": 7.799428488956503e-06, + "loss": 0.5212, + "step": 33769 + }, + { + "epoch": 0.5835291678186342, + "grad_norm": 1.1666831141632144, + "learning_rate": 7.798882561569546e-06, + "loss": 0.4318, + "step": 33770 + }, + { + "epoch": 0.5835464473320431, + "grad_norm": 0.7050367491897681, + "learning_rate": 7.798336641076652e-06, + "loss": 0.5467, + "step": 33771 + }, + { + "epoch": 0.583563726845452, + "grad_norm": 1.2786416756345895, + "learning_rate": 7.797790727479537e-06, + "loss": 0.455, + "step": 33772 + }, + { + "epoch": 0.5835810063588609, + "grad_norm": 0.9493914033575207, + "learning_rate": 7.797244820779907e-06, + "loss": 0.3825, + "step": 33773 + }, + { + "epoch": 0.5835982858722698, + "grad_norm": 1.1402369048351415, + "learning_rate": 7.796698920979478e-06, + "loss": 0.3885, + "step": 33774 + }, + { + "epoch": 0.5836155653856787, + "grad_norm": 1.0330269184920966, + "learning_rate": 7.796153028079953e-06, + "loss": 0.3464, + "step": 33775 + }, + { + "epoch": 0.5836328448990876, + "grad_norm": 0.7023353767237706, + "learning_rate": 7.795607142083042e-06, + "loss": 0.748, + "step": 33776 + }, + { + "epoch": 0.5836501244124965, + "grad_norm": 1.1494611798638033, + "learning_rate": 7.79506126299046e-06, + "loss": 0.2534, + "step": 33777 + }, + { + "epoch": 0.5836674039259054, + "grad_norm": 1.0151344543438074, + "learning_rate": 7.794515390803911e-06, + "loss": 0.4573, + "step": 33778 + }, + { + "epoch": 0.5836846834393143, + "grad_norm": 1.0842019721154479, + "learning_rate": 7.793969525525111e-06, + "loss": 0.6406, + "step": 33779 + }, + { + "epoch": 0.5837019629527233, + "grad_norm": 1.0061605865325072, + "learning_rate": 7.793423667155764e-06, + "loss": 0.3535, + "step": 33780 + }, + { + "epoch": 0.5837192424661322, + "grad_norm": 2.040333946296462, + "learning_rate": 7.792877815697582e-06, + "loss": 0.4234, + "step": 33781 + }, + { + "epoch": 0.5837365219795411, + "grad_norm": 0.9803529757204896, + "learning_rate": 7.792331971152271e-06, + "loss": 0.3471, + "step": 33782 + }, + { + "epoch": 0.58375380149295, + "grad_norm": 1.2562513964308037, + "learning_rate": 7.791786133521548e-06, + "loss": 0.2814, + "step": 33783 + }, + { + "epoch": 0.5837710810063589, + "grad_norm": 1.079137507317115, + "learning_rate": 7.791240302807116e-06, + "loss": 0.7996, + "step": 33784 + }, + { + "epoch": 0.5837883605197678, + "grad_norm": 0.9665127760128702, + "learning_rate": 7.790694479010693e-06, + "loss": 0.4319, + "step": 33785 + }, + { + "epoch": 0.5838056400331767, + "grad_norm": 1.1950146498667429, + "learning_rate": 7.79014866213398e-06, + "loss": 0.3246, + "step": 33786 + }, + { + "epoch": 0.5838229195465856, + "grad_norm": 1.2793737433441288, + "learning_rate": 7.789602852178686e-06, + "loss": 0.2598, + "step": 33787 + }, + { + "epoch": 0.5838401990599945, + "grad_norm": 0.8137467332901794, + "learning_rate": 7.789057049146529e-06, + "loss": 0.3309, + "step": 33788 + }, + { + "epoch": 0.5838574785734034, + "grad_norm": 0.8469960530186633, + "learning_rate": 7.788511253039206e-06, + "loss": 0.2807, + "step": 33789 + }, + { + "epoch": 0.5838747580868123, + "grad_norm": 1.6106849243815013, + "learning_rate": 7.787965463858442e-06, + "loss": 0.4101, + "step": 33790 + }, + { + "epoch": 0.5838920376002211, + "grad_norm": 1.0705586747123637, + "learning_rate": 7.787419681605932e-06, + "loss": 0.3315, + "step": 33791 + }, + { + "epoch": 0.58390931711363, + "grad_norm": 0.7391501625902028, + "learning_rate": 7.786873906283394e-06, + "loss": 0.884, + "step": 33792 + }, + { + "epoch": 0.5839265966270389, + "grad_norm": 1.2987654126018169, + "learning_rate": 7.786328137892534e-06, + "loss": 0.4115, + "step": 33793 + }, + { + "epoch": 0.5839438761404478, + "grad_norm": 0.9216400305753868, + "learning_rate": 7.785782376435067e-06, + "loss": 0.4638, + "step": 33794 + }, + { + "epoch": 0.5839611556538568, + "grad_norm": 1.1031524774755193, + "learning_rate": 7.785236621912691e-06, + "loss": 0.2919, + "step": 33795 + }, + { + "epoch": 0.5839784351672657, + "grad_norm": 0.6495198297156449, + "learning_rate": 7.784690874327128e-06, + "loss": 0.9221, + "step": 33796 + }, + { + "epoch": 0.5839957146806746, + "grad_norm": 0.9401383752736658, + "learning_rate": 7.784145133680082e-06, + "loss": 0.3395, + "step": 33797 + }, + { + "epoch": 0.5840129941940835, + "grad_norm": 1.3493671118821229, + "learning_rate": 7.783599399973256e-06, + "loss": 0.4568, + "step": 33798 + }, + { + "epoch": 0.5840302737074924, + "grad_norm": 0.8574418172460141, + "learning_rate": 7.783053673208371e-06, + "loss": 0.2908, + "step": 33799 + }, + { + "epoch": 0.5840475532209013, + "grad_norm": 1.1685700765627876, + "learning_rate": 7.782507953387125e-06, + "loss": 0.6433, + "step": 33800 + }, + { + "epoch": 0.5840648327343102, + "grad_norm": 1.2510275401043485, + "learning_rate": 7.781962240511236e-06, + "loss": 0.319, + "step": 33801 + }, + { + "epoch": 0.5840821122477191, + "grad_norm": 1.1609587569879483, + "learning_rate": 7.781416534582409e-06, + "loss": 0.4531, + "step": 33802 + }, + { + "epoch": 0.584099391761128, + "grad_norm": 1.5626845289650082, + "learning_rate": 7.780870835602354e-06, + "loss": 0.4341, + "step": 33803 + }, + { + "epoch": 0.5841166712745369, + "grad_norm": 0.8937988268155119, + "learning_rate": 7.78032514357278e-06, + "loss": 0.2871, + "step": 33804 + }, + { + "epoch": 0.5841339507879458, + "grad_norm": 1.1066471770576567, + "learning_rate": 7.7797794584954e-06, + "loss": 0.3714, + "step": 33805 + }, + { + "epoch": 0.5841512303013547, + "grad_norm": 1.1203004020057605, + "learning_rate": 7.779233780371915e-06, + "loss": 0.4764, + "step": 33806 + }, + { + "epoch": 0.5841685098147636, + "grad_norm": 1.261015412847768, + "learning_rate": 7.778688109204039e-06, + "loss": 0.4817, + "step": 33807 + }, + { + "epoch": 0.5841857893281726, + "grad_norm": 1.116126719927283, + "learning_rate": 7.778142444993484e-06, + "loss": 0.4106, + "step": 33808 + }, + { + "epoch": 0.5842030688415815, + "grad_norm": 0.846261054585888, + "learning_rate": 7.77759678774195e-06, + "loss": 0.4266, + "step": 33809 + }, + { + "epoch": 0.5842203483549904, + "grad_norm": 1.579303436130282, + "learning_rate": 7.777051137451157e-06, + "loss": 0.4201, + "step": 33810 + }, + { + "epoch": 0.5842376278683993, + "grad_norm": 3.0656196303284666, + "learning_rate": 7.776505494122805e-06, + "loss": 0.3571, + "step": 33811 + }, + { + "epoch": 0.5842549073818081, + "grad_norm": 1.6086750943727643, + "learning_rate": 7.77595985775861e-06, + "loss": 0.4273, + "step": 33812 + }, + { + "epoch": 0.584272186895217, + "grad_norm": 0.6471657605561442, + "learning_rate": 7.775414228360274e-06, + "loss": 0.9311, + "step": 33813 + }, + { + "epoch": 0.5842894664086259, + "grad_norm": 0.9769514673104936, + "learning_rate": 7.774868605929515e-06, + "loss": 0.4652, + "step": 33814 + }, + { + "epoch": 0.5843067459220348, + "grad_norm": 1.4657130386309623, + "learning_rate": 7.774322990468029e-06, + "loss": 0.3154, + "step": 33815 + }, + { + "epoch": 0.5843240254354437, + "grad_norm": 0.9176902735711038, + "learning_rate": 7.773777381977541e-06, + "loss": 0.4194, + "step": 33816 + }, + { + "epoch": 0.5843413049488526, + "grad_norm": 1.3691062078228713, + "learning_rate": 7.773231780459748e-06, + "loss": 0.2979, + "step": 33817 + }, + { + "epoch": 0.5843585844622615, + "grad_norm": 1.2871493318524354, + "learning_rate": 7.77268618591636e-06, + "loss": 0.3425, + "step": 33818 + }, + { + "epoch": 0.5843758639756704, + "grad_norm": 1.1439982600784733, + "learning_rate": 7.772140598349093e-06, + "loss": 0.5312, + "step": 33819 + }, + { + "epoch": 0.5843931434890793, + "grad_norm": 0.8067599852810057, + "learning_rate": 7.771595017759645e-06, + "loss": 0.2574, + "step": 33820 + }, + { + "epoch": 0.5844104230024882, + "grad_norm": 0.8322055325679617, + "learning_rate": 7.771049444149735e-06, + "loss": 0.4996, + "step": 33821 + }, + { + "epoch": 0.5844277025158972, + "grad_norm": 0.843314914277015, + "learning_rate": 7.770503877521066e-06, + "loss": 0.2602, + "step": 33822 + }, + { + "epoch": 0.5844449820293061, + "grad_norm": 1.2180249585480558, + "learning_rate": 7.769958317875348e-06, + "loss": 0.348, + "step": 33823 + }, + { + "epoch": 0.584462261542715, + "grad_norm": 0.9500039862391332, + "learning_rate": 7.769412765214288e-06, + "loss": 0.4596, + "step": 33824 + }, + { + "epoch": 0.5844795410561239, + "grad_norm": 1.3982934121520658, + "learning_rate": 7.768867219539602e-06, + "loss": 0.2737, + "step": 33825 + }, + { + "epoch": 0.5844968205695328, + "grad_norm": 0.8995386504746411, + "learning_rate": 7.768321680852992e-06, + "loss": 0.3303, + "step": 33826 + }, + { + "epoch": 0.5845141000829417, + "grad_norm": 1.086205172573327, + "learning_rate": 7.767776149156166e-06, + "loss": 0.5326, + "step": 33827 + }, + { + "epoch": 0.5845313795963506, + "grad_norm": 0.8510923664304947, + "learning_rate": 7.767230624450837e-06, + "loss": 0.2912, + "step": 33828 + }, + { + "epoch": 0.5845486591097595, + "grad_norm": 0.86212866185177, + "learning_rate": 7.766685106738706e-06, + "loss": 0.2734, + "step": 33829 + }, + { + "epoch": 0.5845659386231684, + "grad_norm": 0.9079655378367973, + "learning_rate": 7.766139596021493e-06, + "loss": 0.368, + "step": 33830 + }, + { + "epoch": 0.5845832181365773, + "grad_norm": 1.1861373591468203, + "learning_rate": 7.765594092300898e-06, + "loss": 0.4366, + "step": 33831 + }, + { + "epoch": 0.5846004976499862, + "grad_norm": 1.373986929286011, + "learning_rate": 7.765048595578632e-06, + "loss": 0.5778, + "step": 33832 + }, + { + "epoch": 0.584617777163395, + "grad_norm": 1.0706977752427338, + "learning_rate": 7.764503105856404e-06, + "loss": 0.4623, + "step": 33833 + }, + { + "epoch": 0.5846350566768039, + "grad_norm": 1.1058877406963188, + "learning_rate": 7.763957623135925e-06, + "loss": 0.2072, + "step": 33834 + }, + { + "epoch": 0.5846523361902128, + "grad_norm": 2.10544926768536, + "learning_rate": 7.763412147418894e-06, + "loss": 0.3549, + "step": 33835 + }, + { + "epoch": 0.5846696157036217, + "grad_norm": 1.059850296199816, + "learning_rate": 7.762866678707033e-06, + "loss": 0.6359, + "step": 33836 + }, + { + "epoch": 0.5846868952170307, + "grad_norm": 0.9551388187053395, + "learning_rate": 7.762321217002041e-06, + "loss": 0.3695, + "step": 33837 + }, + { + "epoch": 0.5847041747304396, + "grad_norm": 1.556824583151084, + "learning_rate": 7.761775762305626e-06, + "loss": 0.27, + "step": 33838 + }, + { + "epoch": 0.5847214542438485, + "grad_norm": 0.8176037631575648, + "learning_rate": 7.761230314619505e-06, + "loss": 0.2922, + "step": 33839 + }, + { + "epoch": 0.5847387337572574, + "grad_norm": 1.3239521698451193, + "learning_rate": 7.760684873945377e-06, + "loss": 0.3669, + "step": 33840 + }, + { + "epoch": 0.5847560132706663, + "grad_norm": 1.3175679142225372, + "learning_rate": 7.760139440284957e-06, + "loss": 0.4305, + "step": 33841 + }, + { + "epoch": 0.5847732927840752, + "grad_norm": 1.4160930288030802, + "learning_rate": 7.759594013639946e-06, + "loss": 0.4093, + "step": 33842 + }, + { + "epoch": 0.5847905722974841, + "grad_norm": 1.1694810190938516, + "learning_rate": 7.759048594012059e-06, + "loss": 0.2987, + "step": 33843 + }, + { + "epoch": 0.584807851810893, + "grad_norm": 1.1432284250972224, + "learning_rate": 7.758503181403002e-06, + "loss": 0.3788, + "step": 33844 + }, + { + "epoch": 0.5848251313243019, + "grad_norm": 1.057799745350881, + "learning_rate": 7.757957775814486e-06, + "loss": 0.4968, + "step": 33845 + }, + { + "epoch": 0.5848424108377108, + "grad_norm": 1.2599056578871555, + "learning_rate": 7.757412377248213e-06, + "loss": 0.4167, + "step": 33846 + }, + { + "epoch": 0.5848596903511197, + "grad_norm": 0.8607399925833105, + "learning_rate": 7.756866985705897e-06, + "loss": 0.3993, + "step": 33847 + }, + { + "epoch": 0.5848769698645286, + "grad_norm": 1.2542476787549044, + "learning_rate": 7.756321601189245e-06, + "loss": 0.5009, + "step": 33848 + }, + { + "epoch": 0.5848942493779375, + "grad_norm": 0.8209364342252974, + "learning_rate": 7.75577622369996e-06, + "loss": 0.4832, + "step": 33849 + }, + { + "epoch": 0.5849115288913465, + "grad_norm": 0.8691708658175444, + "learning_rate": 7.755230853239761e-06, + "loss": 0.4264, + "step": 33850 + }, + { + "epoch": 0.5849288084047554, + "grad_norm": 1.5754630469842463, + "learning_rate": 7.754685489810344e-06, + "loss": 0.4916, + "step": 33851 + }, + { + "epoch": 0.5849460879181643, + "grad_norm": 1.0029205559076217, + "learning_rate": 7.754140133413424e-06, + "loss": 0.4316, + "step": 33852 + }, + { + "epoch": 0.5849633674315732, + "grad_norm": 1.2729991703386894, + "learning_rate": 7.753594784050706e-06, + "loss": 0.2327, + "step": 33853 + }, + { + "epoch": 0.584980646944982, + "grad_norm": 0.986967360838784, + "learning_rate": 7.753049441723905e-06, + "loss": 0.2908, + "step": 33854 + }, + { + "epoch": 0.5849979264583909, + "grad_norm": 0.7458332932589928, + "learning_rate": 7.752504106434718e-06, + "loss": 0.7223, + "step": 33855 + }, + { + "epoch": 0.5850152059717998, + "grad_norm": 1.200320433245083, + "learning_rate": 7.751958778184866e-06, + "loss": 0.4742, + "step": 33856 + }, + { + "epoch": 0.5850324854852087, + "grad_norm": 1.5154588983337132, + "learning_rate": 7.751413456976044e-06, + "loss": 0.414, + "step": 33857 + }, + { + "epoch": 0.5850497649986176, + "grad_norm": 0.8742383881344077, + "learning_rate": 7.750868142809966e-06, + "loss": 0.6216, + "step": 33858 + }, + { + "epoch": 0.5850670445120265, + "grad_norm": 1.7124059025321658, + "learning_rate": 7.750322835688345e-06, + "loss": 0.3405, + "step": 33859 + }, + { + "epoch": 0.5850843240254354, + "grad_norm": 1.089395722503172, + "learning_rate": 7.749777535612879e-06, + "loss": 0.3415, + "step": 33860 + }, + { + "epoch": 0.5851016035388443, + "grad_norm": 1.3139827599312652, + "learning_rate": 7.749232242585282e-06, + "loss": 0.4326, + "step": 33861 + }, + { + "epoch": 0.5851188830522532, + "grad_norm": 1.0426822944712975, + "learning_rate": 7.748686956607261e-06, + "loss": 0.5133, + "step": 33862 + }, + { + "epoch": 0.5851361625656621, + "grad_norm": 1.007575190676453, + "learning_rate": 7.748141677680522e-06, + "loss": 0.3338, + "step": 33863 + }, + { + "epoch": 0.585153442079071, + "grad_norm": 1.0235663464160139, + "learning_rate": 7.747596405806776e-06, + "loss": 0.4131, + "step": 33864 + }, + { + "epoch": 0.58517072159248, + "grad_norm": 1.1614543726112536, + "learning_rate": 7.747051140987731e-06, + "loss": 0.4929, + "step": 33865 + }, + { + "epoch": 0.5851880011058889, + "grad_norm": 2.4230238292928026, + "learning_rate": 7.746505883225091e-06, + "loss": 0.3324, + "step": 33866 + }, + { + "epoch": 0.5852052806192978, + "grad_norm": 1.0892650755015185, + "learning_rate": 7.745960632520566e-06, + "loss": 0.4325, + "step": 33867 + }, + { + "epoch": 0.5852225601327067, + "grad_norm": 1.192204191561097, + "learning_rate": 7.745415388875865e-06, + "loss": 0.3342, + "step": 33868 + }, + { + "epoch": 0.5852398396461156, + "grad_norm": 0.8130470740199746, + "learning_rate": 7.744870152292691e-06, + "loss": 0.4628, + "step": 33869 + }, + { + "epoch": 0.5852571191595245, + "grad_norm": 1.071089505067403, + "learning_rate": 7.744324922772758e-06, + "loss": 0.3285, + "step": 33870 + }, + { + "epoch": 0.5852743986729334, + "grad_norm": 1.1182253339777997, + "learning_rate": 7.74377970031777e-06, + "loss": 0.558, + "step": 33871 + }, + { + "epoch": 0.5852916781863423, + "grad_norm": 1.4132475330228984, + "learning_rate": 7.743234484929436e-06, + "loss": 0.5297, + "step": 33872 + }, + { + "epoch": 0.5853089576997512, + "grad_norm": 1.3415412470200114, + "learning_rate": 7.742689276609459e-06, + "loss": 0.4326, + "step": 33873 + }, + { + "epoch": 0.5853262372131601, + "grad_norm": 2.2298755279733498, + "learning_rate": 7.742144075359558e-06, + "loss": 0.3641, + "step": 33874 + }, + { + "epoch": 0.585343516726569, + "grad_norm": 0.8577362358577245, + "learning_rate": 7.741598881181424e-06, + "loss": 0.4343, + "step": 33875 + }, + { + "epoch": 0.5853607962399778, + "grad_norm": 1.302945915917369, + "learning_rate": 7.741053694076784e-06, + "loss": 0.4041, + "step": 33876 + }, + { + "epoch": 0.5853780757533867, + "grad_norm": 1.3154819589471325, + "learning_rate": 7.74050851404733e-06, + "loss": 0.3576, + "step": 33877 + }, + { + "epoch": 0.5853953552667956, + "grad_norm": 0.8449418546630406, + "learning_rate": 7.739963341094775e-06, + "loss": 0.3493, + "step": 33878 + }, + { + "epoch": 0.5854126347802046, + "grad_norm": 0.831537609079811, + "learning_rate": 7.73941817522083e-06, + "loss": 0.4361, + "step": 33879 + }, + { + "epoch": 0.5854299142936135, + "grad_norm": 1.1666740346733566, + "learning_rate": 7.738873016427195e-06, + "loss": 0.4341, + "step": 33880 + }, + { + "epoch": 0.5854471938070224, + "grad_norm": 1.1656718840924294, + "learning_rate": 7.738327864715584e-06, + "loss": 0.3696, + "step": 33881 + }, + { + "epoch": 0.5854644733204313, + "grad_norm": 0.9179151786274125, + "learning_rate": 7.737782720087699e-06, + "loss": 0.3868, + "step": 33882 + }, + { + "epoch": 0.5854817528338402, + "grad_norm": 1.0635510106040515, + "learning_rate": 7.737237582545253e-06, + "loss": 0.4627, + "step": 33883 + }, + { + "epoch": 0.5854990323472491, + "grad_norm": 0.7766676668580044, + "learning_rate": 7.736692452089949e-06, + "loss": 0.3177, + "step": 33884 + }, + { + "epoch": 0.585516311860658, + "grad_norm": 1.096211282366096, + "learning_rate": 7.7361473287235e-06, + "loss": 0.4749, + "step": 33885 + }, + { + "epoch": 0.5855335913740669, + "grad_norm": 1.1608828614313311, + "learning_rate": 7.735602212447606e-06, + "loss": 0.2967, + "step": 33886 + }, + { + "epoch": 0.5855508708874758, + "grad_norm": 0.9142950135842877, + "learning_rate": 7.73505710326398e-06, + "loss": 0.3374, + "step": 33887 + }, + { + "epoch": 0.5855681504008847, + "grad_norm": 1.246923184266913, + "learning_rate": 7.73451200117433e-06, + "loss": 0.4553, + "step": 33888 + }, + { + "epoch": 0.5855854299142936, + "grad_norm": 0.8433988138649764, + "learning_rate": 7.733966906180353e-06, + "loss": 0.396, + "step": 33889 + }, + { + "epoch": 0.5856027094277025, + "grad_norm": 1.0190154670002105, + "learning_rate": 7.73342181828377e-06, + "loss": 0.5931, + "step": 33890 + }, + { + "epoch": 0.5856199889411114, + "grad_norm": 0.5668310771442432, + "learning_rate": 7.732876737486279e-06, + "loss": 0.8668, + "step": 33891 + }, + { + "epoch": 0.5856372684545204, + "grad_norm": 1.369883733461875, + "learning_rate": 7.732331663789592e-06, + "loss": 0.4175, + "step": 33892 + }, + { + "epoch": 0.5856545479679293, + "grad_norm": 1.0603387990120603, + "learning_rate": 7.731786597195412e-06, + "loss": 0.5068, + "step": 33893 + }, + { + "epoch": 0.5856718274813382, + "grad_norm": 1.4406200285978086, + "learning_rate": 7.731241537705455e-06, + "loss": 0.4213, + "step": 33894 + }, + { + "epoch": 0.5856891069947471, + "grad_norm": 1.0426830726803473, + "learning_rate": 7.730696485321413e-06, + "loss": 0.4253, + "step": 33895 + }, + { + "epoch": 0.585706386508156, + "grad_norm": 0.7925340429571042, + "learning_rate": 7.730151440045009e-06, + "loss": 0.3953, + "step": 33896 + }, + { + "epoch": 0.5857236660215648, + "grad_norm": 0.9234222480856803, + "learning_rate": 7.729606401877939e-06, + "loss": 0.2473, + "step": 33897 + }, + { + "epoch": 0.5857409455349737, + "grad_norm": 1.1844571989464063, + "learning_rate": 7.729061370821917e-06, + "loss": 0.4603, + "step": 33898 + }, + { + "epoch": 0.5857582250483826, + "grad_norm": 0.7142053805948128, + "learning_rate": 7.72851634687865e-06, + "loss": 0.3996, + "step": 33899 + }, + { + "epoch": 0.5857755045617915, + "grad_norm": 1.9129462969643796, + "learning_rate": 7.727971330049837e-06, + "loss": 0.4736, + "step": 33900 + }, + { + "epoch": 0.5857927840752004, + "grad_norm": 1.2781616868403607, + "learning_rate": 7.727426320337192e-06, + "loss": 0.3649, + "step": 33901 + }, + { + "epoch": 0.5858100635886093, + "grad_norm": 0.8128127146143848, + "learning_rate": 7.726881317742421e-06, + "loss": 0.4503, + "step": 33902 + }, + { + "epoch": 0.5858273431020182, + "grad_norm": 0.764327444556401, + "learning_rate": 7.72633632226723e-06, + "loss": 0.4832, + "step": 33903 + }, + { + "epoch": 0.5858446226154271, + "grad_norm": 1.105018426902467, + "learning_rate": 7.725791333913326e-06, + "loss": 0.4146, + "step": 33904 + }, + { + "epoch": 0.585861902128836, + "grad_norm": 1.0274953063313605, + "learning_rate": 7.72524635268242e-06, + "loss": 0.5504, + "step": 33905 + }, + { + "epoch": 0.585879181642245, + "grad_norm": 0.5471182197331296, + "learning_rate": 7.72470137857621e-06, + "loss": 0.7722, + "step": 33906 + }, + { + "epoch": 0.5858964611556539, + "grad_norm": 1.3848462694441481, + "learning_rate": 7.724156411596412e-06, + "loss": 0.629, + "step": 33907 + }, + { + "epoch": 0.5859137406690628, + "grad_norm": 0.8293239622079217, + "learning_rate": 7.723611451744726e-06, + "loss": 0.6165, + "step": 33908 + }, + { + "epoch": 0.5859310201824717, + "grad_norm": 1.0365474115088629, + "learning_rate": 7.723066499022865e-06, + "loss": 0.2579, + "step": 33909 + }, + { + "epoch": 0.5859482996958806, + "grad_norm": 0.9471961101763409, + "learning_rate": 7.722521553432534e-06, + "loss": 0.2879, + "step": 33910 + }, + { + "epoch": 0.5859655792092895, + "grad_norm": 1.0140460858804972, + "learning_rate": 7.721976614975433e-06, + "loss": 0.4473, + "step": 33911 + }, + { + "epoch": 0.5859828587226984, + "grad_norm": 1.0983960287003585, + "learning_rate": 7.721431683653279e-06, + "loss": 0.3728, + "step": 33912 + }, + { + "epoch": 0.5860001382361073, + "grad_norm": 1.2455898997430601, + "learning_rate": 7.720886759467771e-06, + "loss": 0.4195, + "step": 33913 + }, + { + "epoch": 0.5860174177495162, + "grad_norm": 1.5569568246657872, + "learning_rate": 7.720341842420623e-06, + "loss": 0.398, + "step": 33914 + }, + { + "epoch": 0.5860346972629251, + "grad_norm": 1.3620474075092563, + "learning_rate": 7.719796932513531e-06, + "loss": 0.2953, + "step": 33915 + }, + { + "epoch": 0.586051976776334, + "grad_norm": 0.9827793950507211, + "learning_rate": 7.719252029748215e-06, + "loss": 0.2535, + "step": 33916 + }, + { + "epoch": 0.5860692562897429, + "grad_norm": 0.8270066940959325, + "learning_rate": 7.718707134126372e-06, + "loss": 0.3062, + "step": 33917 + }, + { + "epoch": 0.5860865358031517, + "grad_norm": 1.1885960966653792, + "learning_rate": 7.718162245649712e-06, + "loss": 0.5891, + "step": 33918 + }, + { + "epoch": 0.5861038153165606, + "grad_norm": 1.3590085902235882, + "learning_rate": 7.717617364319943e-06, + "loss": 0.6084, + "step": 33919 + }, + { + "epoch": 0.5861210948299695, + "grad_norm": 0.9966420522836694, + "learning_rate": 7.717072490138767e-06, + "loss": 0.3562, + "step": 33920 + }, + { + "epoch": 0.5861383743433785, + "grad_norm": 1.3194407677494717, + "learning_rate": 7.716527623107894e-06, + "loss": 0.4686, + "step": 33921 + }, + { + "epoch": 0.5861556538567874, + "grad_norm": 1.1413716576996478, + "learning_rate": 7.71598276322903e-06, + "loss": 0.4243, + "step": 33922 + }, + { + "epoch": 0.5861729333701963, + "grad_norm": 1.6879638962289691, + "learning_rate": 7.715437910503882e-06, + "loss": 0.5171, + "step": 33923 + }, + { + "epoch": 0.5861902128836052, + "grad_norm": 1.0939657275844759, + "learning_rate": 7.714893064934154e-06, + "loss": 0.6286, + "step": 33924 + }, + { + "epoch": 0.5862074923970141, + "grad_norm": 1.100172255900978, + "learning_rate": 7.714348226521562e-06, + "loss": 0.5126, + "step": 33925 + }, + { + "epoch": 0.586224771910423, + "grad_norm": 1.1228433568352187, + "learning_rate": 7.713803395267797e-06, + "loss": 0.7546, + "step": 33926 + }, + { + "epoch": 0.5862420514238319, + "grad_norm": 1.1275347889249194, + "learning_rate": 7.713258571174577e-06, + "loss": 0.4524, + "step": 33927 + }, + { + "epoch": 0.5862593309372408, + "grad_norm": 0.8999075898941672, + "learning_rate": 7.712713754243602e-06, + "loss": 0.3187, + "step": 33928 + }, + { + "epoch": 0.5862766104506497, + "grad_norm": 1.0537401470832686, + "learning_rate": 7.712168944476584e-06, + "loss": 0.3143, + "step": 33929 + }, + { + "epoch": 0.5862938899640586, + "grad_norm": 1.1234530088939692, + "learning_rate": 7.71162414187523e-06, + "loss": 0.4157, + "step": 33930 + }, + { + "epoch": 0.5863111694774675, + "grad_norm": 0.9615113601646145, + "learning_rate": 7.711079346441238e-06, + "loss": 0.4961, + "step": 33931 + }, + { + "epoch": 0.5863284489908764, + "grad_norm": 1.2161385156964297, + "learning_rate": 7.710534558176321e-06, + "loss": 0.5678, + "step": 33932 + }, + { + "epoch": 0.5863457285042853, + "grad_norm": 1.2065316381015396, + "learning_rate": 7.709989777082184e-06, + "loss": 0.2962, + "step": 33933 + }, + { + "epoch": 0.5863630080176943, + "grad_norm": 1.1111024135846481, + "learning_rate": 7.709445003160533e-06, + "loss": 0.3726, + "step": 33934 + }, + { + "epoch": 0.5863802875311032, + "grad_norm": 1.770672360352897, + "learning_rate": 7.70890023641307e-06, + "loss": 0.5285, + "step": 33935 + }, + { + "epoch": 0.5863975670445121, + "grad_norm": 1.1551975534175822, + "learning_rate": 7.708355476841513e-06, + "loss": 0.4276, + "step": 33936 + }, + { + "epoch": 0.586414846557921, + "grad_norm": 1.1039214909936454, + "learning_rate": 7.707810724447556e-06, + "loss": 0.4478, + "step": 33937 + }, + { + "epoch": 0.5864321260713299, + "grad_norm": 1.337904775281645, + "learning_rate": 7.707265979232912e-06, + "loss": 0.4039, + "step": 33938 + }, + { + "epoch": 0.5864494055847387, + "grad_norm": 0.8272252543044208, + "learning_rate": 7.706721241199284e-06, + "loss": 0.3109, + "step": 33939 + }, + { + "epoch": 0.5864666850981476, + "grad_norm": 0.9947647080063468, + "learning_rate": 7.706176510348383e-06, + "loss": 0.4173, + "step": 33940 + }, + { + "epoch": 0.5864839646115565, + "grad_norm": 0.8907922331439221, + "learning_rate": 7.705631786681908e-06, + "loss": 0.3914, + "step": 33941 + }, + { + "epoch": 0.5865012441249654, + "grad_norm": 1.493479286320242, + "learning_rate": 7.705087070201568e-06, + "loss": 0.5315, + "step": 33942 + }, + { + "epoch": 0.5865185236383743, + "grad_norm": 1.0482169324082684, + "learning_rate": 7.70454236090907e-06, + "loss": 0.2624, + "step": 33943 + }, + { + "epoch": 0.5865358031517832, + "grad_norm": 1.2595381920955513, + "learning_rate": 7.703997658806119e-06, + "loss": 0.3142, + "step": 33944 + }, + { + "epoch": 0.5865530826651921, + "grad_norm": 1.0471601211786945, + "learning_rate": 7.703452963894426e-06, + "loss": 0.4673, + "step": 33945 + }, + { + "epoch": 0.586570362178601, + "grad_norm": 1.1887574314228107, + "learning_rate": 7.70290827617569e-06, + "loss": 0.3573, + "step": 33946 + }, + { + "epoch": 0.5865876416920099, + "grad_norm": 1.150079093511209, + "learning_rate": 7.70236359565162e-06, + "loss": 0.3871, + "step": 33947 + }, + { + "epoch": 0.5866049212054188, + "grad_norm": 0.7991355072975967, + "learning_rate": 7.701818922323921e-06, + "loss": 0.1666, + "step": 33948 + }, + { + "epoch": 0.5866222007188278, + "grad_norm": 0.7549693432115229, + "learning_rate": 7.701274256194301e-06, + "loss": 0.3002, + "step": 33949 + }, + { + "epoch": 0.5866394802322367, + "grad_norm": 0.9094815712676754, + "learning_rate": 7.700729597264466e-06, + "loss": 0.4227, + "step": 33950 + }, + { + "epoch": 0.5866567597456456, + "grad_norm": 0.8070773425242725, + "learning_rate": 7.700184945536119e-06, + "loss": 0.2526, + "step": 33951 + }, + { + "epoch": 0.5866740392590545, + "grad_norm": 1.315954482633997, + "learning_rate": 7.699640301010967e-06, + "loss": 0.3575, + "step": 33952 + }, + { + "epoch": 0.5866913187724634, + "grad_norm": 0.8531479556914482, + "learning_rate": 7.699095663690716e-06, + "loss": 0.9091, + "step": 33953 + }, + { + "epoch": 0.5867085982858723, + "grad_norm": 0.9485317469070129, + "learning_rate": 7.698551033577076e-06, + "loss": 0.3788, + "step": 33954 + }, + { + "epoch": 0.5867258777992812, + "grad_norm": 1.2218341553913026, + "learning_rate": 7.698006410671744e-06, + "loss": 0.4064, + "step": 33955 + }, + { + "epoch": 0.5867431573126901, + "grad_norm": 1.1454222186664182, + "learning_rate": 7.697461794976436e-06, + "loss": 0.581, + "step": 33956 + }, + { + "epoch": 0.586760436826099, + "grad_norm": 1.512287687061174, + "learning_rate": 7.696917186492847e-06, + "loss": 0.4194, + "step": 33957 + }, + { + "epoch": 0.5867777163395079, + "grad_norm": 0.8120601767240836, + "learning_rate": 7.696372585222691e-06, + "loss": 0.3271, + "step": 33958 + }, + { + "epoch": 0.5867949958529168, + "grad_norm": 1.3456480276416993, + "learning_rate": 7.69582799116767e-06, + "loss": 0.3445, + "step": 33959 + }, + { + "epoch": 0.5868122753663256, + "grad_norm": 1.0657823012406011, + "learning_rate": 7.695283404329496e-06, + "loss": 0.4443, + "step": 33960 + }, + { + "epoch": 0.5868295548797345, + "grad_norm": 0.833363775990785, + "learning_rate": 7.694738824709867e-06, + "loss": 0.375, + "step": 33961 + }, + { + "epoch": 0.5868468343931434, + "grad_norm": 0.7541040782411655, + "learning_rate": 7.694194252310487e-06, + "loss": 0.263, + "step": 33962 + }, + { + "epoch": 0.5868641139065524, + "grad_norm": 1.1337347608930912, + "learning_rate": 7.693649687133069e-06, + "loss": 0.5303, + "step": 33963 + }, + { + "epoch": 0.5868813934199613, + "grad_norm": 0.5495882833302844, + "learning_rate": 7.693105129179314e-06, + "loss": 0.8263, + "step": 33964 + }, + { + "epoch": 0.5868986729333702, + "grad_norm": 1.9050403712036423, + "learning_rate": 7.692560578450933e-06, + "loss": 0.3205, + "step": 33965 + }, + { + "epoch": 0.5869159524467791, + "grad_norm": 1.179288334948556, + "learning_rate": 7.692016034949624e-06, + "loss": 0.3974, + "step": 33966 + }, + { + "epoch": 0.586933231960188, + "grad_norm": 1.205377262648254, + "learning_rate": 7.691471498677097e-06, + "loss": 0.3094, + "step": 33967 + }, + { + "epoch": 0.5869505114735969, + "grad_norm": 0.9567091877336318, + "learning_rate": 7.690926969635055e-06, + "loss": 0.5108, + "step": 33968 + }, + { + "epoch": 0.5869677909870058, + "grad_norm": 1.1808515347522122, + "learning_rate": 7.690382447825207e-06, + "loss": 0.6771, + "step": 33969 + }, + { + "epoch": 0.5869850705004147, + "grad_norm": 0.9514625035842512, + "learning_rate": 7.689837933249256e-06, + "loss": 0.5553, + "step": 33970 + }, + { + "epoch": 0.5870023500138236, + "grad_norm": 0.6398932412745459, + "learning_rate": 7.689293425908912e-06, + "loss": 0.3207, + "step": 33971 + }, + { + "epoch": 0.5870196295272325, + "grad_norm": 0.742382145155565, + "learning_rate": 7.688748925805874e-06, + "loss": 0.4917, + "step": 33972 + }, + { + "epoch": 0.5870369090406414, + "grad_norm": 0.8501487973985119, + "learning_rate": 7.688204432941848e-06, + "loss": 0.2865, + "step": 33973 + }, + { + "epoch": 0.5870541885540503, + "grad_norm": 0.8806094032413072, + "learning_rate": 7.687659947318545e-06, + "loss": 0.5117, + "step": 33974 + }, + { + "epoch": 0.5870714680674592, + "grad_norm": 1.2585171666323771, + "learning_rate": 7.68711546893766e-06, + "loss": 0.4856, + "step": 33975 + }, + { + "epoch": 0.5870887475808682, + "grad_norm": 1.240993539554199, + "learning_rate": 7.686570997800914e-06, + "loss": 0.3004, + "step": 33976 + }, + { + "epoch": 0.5871060270942771, + "grad_norm": 0.7156005952163239, + "learning_rate": 7.686026533909996e-06, + "loss": 0.43, + "step": 33977 + }, + { + "epoch": 0.587123306607686, + "grad_norm": 1.021417564961866, + "learning_rate": 7.685482077266624e-06, + "loss": 0.5807, + "step": 33978 + }, + { + "epoch": 0.5871405861210949, + "grad_norm": 1.174654038527788, + "learning_rate": 7.684937627872494e-06, + "loss": 0.5548, + "step": 33979 + }, + { + "epoch": 0.5871578656345038, + "grad_norm": 1.8284860389813125, + "learning_rate": 7.684393185729319e-06, + "loss": 0.4433, + "step": 33980 + }, + { + "epoch": 0.5871751451479126, + "grad_norm": 1.1229154853414456, + "learning_rate": 7.6838487508388e-06, + "loss": 0.2897, + "step": 33981 + }, + { + "epoch": 0.5871924246613215, + "grad_norm": 0.8822540502563444, + "learning_rate": 7.68330432320264e-06, + "loss": 0.3995, + "step": 33982 + }, + { + "epoch": 0.5872097041747304, + "grad_norm": 1.014988222805417, + "learning_rate": 7.68275990282255e-06, + "loss": 0.3165, + "step": 33983 + }, + { + "epoch": 0.5872269836881393, + "grad_norm": 1.3343025683426726, + "learning_rate": 7.682215489700228e-06, + "loss": 0.4179, + "step": 33984 + }, + { + "epoch": 0.5872442632015482, + "grad_norm": 1.3573534214218745, + "learning_rate": 7.681671083837389e-06, + "loss": 0.357, + "step": 33985 + }, + { + "epoch": 0.5872615427149571, + "grad_norm": 0.7429934376630549, + "learning_rate": 7.681126685235727e-06, + "loss": 0.8796, + "step": 33986 + }, + { + "epoch": 0.587278822228366, + "grad_norm": 0.9184731431100964, + "learning_rate": 7.680582293896956e-06, + "loss": 0.4209, + "step": 33987 + }, + { + "epoch": 0.5872961017417749, + "grad_norm": 0.7986325617843032, + "learning_rate": 7.680037909822773e-06, + "loss": 0.2723, + "step": 33988 + }, + { + "epoch": 0.5873133812551838, + "grad_norm": 1.3300597732568145, + "learning_rate": 7.679493533014892e-06, + "loss": 0.5852, + "step": 33989 + }, + { + "epoch": 0.5873306607685927, + "grad_norm": 0.7830892457339803, + "learning_rate": 7.678949163475012e-06, + "loss": 0.3317, + "step": 33990 + }, + { + "epoch": 0.5873479402820017, + "grad_norm": 1.7141044160949537, + "learning_rate": 7.678404801204841e-06, + "loss": 0.3763, + "step": 33991 + }, + { + "epoch": 0.5873652197954106, + "grad_norm": 1.050286035228435, + "learning_rate": 7.677860446206083e-06, + "loss": 0.4359, + "step": 33992 + }, + { + "epoch": 0.5873824993088195, + "grad_norm": 1.0655354909760633, + "learning_rate": 7.67731609848044e-06, + "loss": 0.3001, + "step": 33993 + }, + { + "epoch": 0.5873997788222284, + "grad_norm": 1.0824181903943872, + "learning_rate": 7.676771758029623e-06, + "loss": 0.1568, + "step": 33994 + }, + { + "epoch": 0.5874170583356373, + "grad_norm": 0.8633230885387342, + "learning_rate": 7.676227424855328e-06, + "loss": 0.4547, + "step": 33995 + }, + { + "epoch": 0.5874343378490462, + "grad_norm": 1.159639672033758, + "learning_rate": 7.67568309895927e-06, + "loss": 0.6274, + "step": 33996 + }, + { + "epoch": 0.5874516173624551, + "grad_norm": 1.315204812116733, + "learning_rate": 7.675138780343146e-06, + "loss": 0.3925, + "step": 33997 + }, + { + "epoch": 0.587468896875864, + "grad_norm": 1.6329096590565038, + "learning_rate": 7.674594469008666e-06, + "loss": 0.5809, + "step": 33998 + }, + { + "epoch": 0.5874861763892729, + "grad_norm": 2.0615828917579306, + "learning_rate": 7.67405016495753e-06, + "loss": 0.4066, + "step": 33999 + }, + { + "epoch": 0.5875034559026818, + "grad_norm": 0.870767478796292, + "learning_rate": 7.673505868191451e-06, + "loss": 0.3585, + "step": 34000 + }, + { + "epoch": 0.5875207354160907, + "grad_norm": 0.542941027623026, + "learning_rate": 7.672961578712126e-06, + "loss": 0.7532, + "step": 34001 + }, + { + "epoch": 0.5875380149294995, + "grad_norm": 1.199874722648018, + "learning_rate": 7.67241729652126e-06, + "loss": 0.5327, + "step": 34002 + }, + { + "epoch": 0.5875552944429084, + "grad_norm": 1.2230441808572208, + "learning_rate": 7.671873021620563e-06, + "loss": 0.382, + "step": 34003 + }, + { + "epoch": 0.5875725739563173, + "grad_norm": 1.1135239259245746, + "learning_rate": 7.671328754011734e-06, + "loss": 0.4731, + "step": 34004 + }, + { + "epoch": 0.5875898534697263, + "grad_norm": 1.192947728017567, + "learning_rate": 7.670784493696483e-06, + "loss": 0.3752, + "step": 34005 + }, + { + "epoch": 0.5876071329831352, + "grad_norm": 1.9966898191007663, + "learning_rate": 7.67024024067651e-06, + "loss": 0.449, + "step": 34006 + }, + { + "epoch": 0.5876244124965441, + "grad_norm": 1.181023462885468, + "learning_rate": 7.669695994953523e-06, + "loss": 0.6366, + "step": 34007 + }, + { + "epoch": 0.587641692009953, + "grad_norm": 0.8496111310194514, + "learning_rate": 7.669151756529222e-06, + "loss": 0.2021, + "step": 34008 + }, + { + "epoch": 0.5876589715233619, + "grad_norm": 0.9072457269963786, + "learning_rate": 7.668607525405317e-06, + "loss": 0.2647, + "step": 34009 + }, + { + "epoch": 0.5876762510367708, + "grad_norm": 0.9264882415046337, + "learning_rate": 7.66806330158351e-06, + "loss": 0.5649, + "step": 34010 + }, + { + "epoch": 0.5876935305501797, + "grad_norm": 1.4046958583617635, + "learning_rate": 7.667519085065509e-06, + "loss": 0.4958, + "step": 34011 + }, + { + "epoch": 0.5877108100635886, + "grad_norm": 1.1224682663113674, + "learning_rate": 7.666974875853013e-06, + "loss": 0.3772, + "step": 34012 + }, + { + "epoch": 0.5877280895769975, + "grad_norm": 0.7441765598490689, + "learning_rate": 7.666430673947726e-06, + "loss": 0.5422, + "step": 34013 + }, + { + "epoch": 0.5877453690904064, + "grad_norm": 1.1046232194205319, + "learning_rate": 7.665886479351361e-06, + "loss": 0.3657, + "step": 34014 + }, + { + "epoch": 0.5877626486038153, + "grad_norm": 0.9630959149825542, + "learning_rate": 7.665342292065611e-06, + "loss": 0.3699, + "step": 34015 + }, + { + "epoch": 0.5877799281172242, + "grad_norm": 1.0115715723403043, + "learning_rate": 7.664798112092193e-06, + "loss": 0.3193, + "step": 34016 + }, + { + "epoch": 0.5877972076306331, + "grad_norm": 0.9989491929830948, + "learning_rate": 7.6642539394328e-06, + "loss": 0.2814, + "step": 34017 + }, + { + "epoch": 0.587814487144042, + "grad_norm": 1.1406386590702557, + "learning_rate": 7.663709774089144e-06, + "loss": 0.4717, + "step": 34018 + }, + { + "epoch": 0.587831766657451, + "grad_norm": 1.1182350717673561, + "learning_rate": 7.663165616062925e-06, + "loss": 0.5874, + "step": 34019 + }, + { + "epoch": 0.5878490461708599, + "grad_norm": 1.0699679028823057, + "learning_rate": 7.66262146535585e-06, + "loss": 0.4841, + "step": 34020 + }, + { + "epoch": 0.5878663256842688, + "grad_norm": 0.8523385014968046, + "learning_rate": 7.66207732196962e-06, + "loss": 0.3607, + "step": 34021 + }, + { + "epoch": 0.5878836051976777, + "grad_norm": 1.5200107641965919, + "learning_rate": 7.661533185905944e-06, + "loss": 0.6009, + "step": 34022 + }, + { + "epoch": 0.5879008847110866, + "grad_norm": 1.0356474526745418, + "learning_rate": 7.660989057166523e-06, + "loss": 0.3969, + "step": 34023 + }, + { + "epoch": 0.5879181642244954, + "grad_norm": 0.9718237198504186, + "learning_rate": 7.66044493575306e-06, + "loss": 0.3885, + "step": 34024 + }, + { + "epoch": 0.5879354437379043, + "grad_norm": 1.1653027133515403, + "learning_rate": 7.659900821667266e-06, + "loss": 0.5678, + "step": 34025 + }, + { + "epoch": 0.5879527232513132, + "grad_norm": 0.7294013439267412, + "learning_rate": 7.659356714910835e-06, + "loss": 0.3526, + "step": 34026 + }, + { + "epoch": 0.5879700027647221, + "grad_norm": 1.2240144857579887, + "learning_rate": 7.65881261548548e-06, + "loss": 0.3729, + "step": 34027 + }, + { + "epoch": 0.587987282278131, + "grad_norm": 1.3094767702087475, + "learning_rate": 7.658268523392898e-06, + "loss": 0.5605, + "step": 34028 + }, + { + "epoch": 0.5880045617915399, + "grad_norm": 0.9284468143765606, + "learning_rate": 7.6577244386348e-06, + "loss": 0.7718, + "step": 34029 + }, + { + "epoch": 0.5880218413049488, + "grad_norm": 1.074303143851958, + "learning_rate": 7.657180361212888e-06, + "loss": 0.5615, + "step": 34030 + }, + { + "epoch": 0.5880391208183577, + "grad_norm": 0.8528934604809768, + "learning_rate": 7.656636291128865e-06, + "loss": 0.3846, + "step": 34031 + }, + { + "epoch": 0.5880564003317666, + "grad_norm": 0.9748809482927668, + "learning_rate": 7.656092228384436e-06, + "loss": 0.288, + "step": 34032 + }, + { + "epoch": 0.5880736798451756, + "grad_norm": 0.6890548836352607, + "learning_rate": 7.6555481729813e-06, + "loss": 0.373, + "step": 34033 + }, + { + "epoch": 0.5880909593585845, + "grad_norm": 1.3305468524505801, + "learning_rate": 7.655004124921171e-06, + "loss": 0.2021, + "step": 34034 + }, + { + "epoch": 0.5881082388719934, + "grad_norm": 1.1750390043923689, + "learning_rate": 7.65446008420574e-06, + "loss": 0.3305, + "step": 34035 + }, + { + "epoch": 0.5881255183854023, + "grad_norm": 1.5260695304481402, + "learning_rate": 7.653916050836725e-06, + "loss": 0.3586, + "step": 34036 + }, + { + "epoch": 0.5881427978988112, + "grad_norm": 1.5560310893854428, + "learning_rate": 7.65337202481582e-06, + "loss": 0.4792, + "step": 34037 + }, + { + "epoch": 0.5881600774122201, + "grad_norm": 1.1182987740429278, + "learning_rate": 7.652828006144732e-06, + "loss": 0.4569, + "step": 34038 + }, + { + "epoch": 0.588177356925629, + "grad_norm": 0.9035645796272966, + "learning_rate": 7.652283994825165e-06, + "loss": 0.2681, + "step": 34039 + }, + { + "epoch": 0.5881946364390379, + "grad_norm": 1.1492801285660386, + "learning_rate": 7.651739990858827e-06, + "loss": 0.5939, + "step": 34040 + }, + { + "epoch": 0.5882119159524468, + "grad_norm": 1.171898528295288, + "learning_rate": 7.651195994247413e-06, + "loss": 0.541, + "step": 34041 + }, + { + "epoch": 0.5882291954658557, + "grad_norm": 0.9938375835203875, + "learning_rate": 7.650652004992634e-06, + "loss": 0.3464, + "step": 34042 + }, + { + "epoch": 0.5882464749792646, + "grad_norm": 1.0015313454626829, + "learning_rate": 7.65010802309619e-06, + "loss": 0.2929, + "step": 34043 + }, + { + "epoch": 0.5882637544926735, + "grad_norm": 1.086949948181555, + "learning_rate": 7.649564048559786e-06, + "loss": 0.3448, + "step": 34044 + }, + { + "epoch": 0.5882810340060823, + "grad_norm": 1.0856747161015379, + "learning_rate": 7.64902008138513e-06, + "loss": 0.3681, + "step": 34045 + }, + { + "epoch": 0.5882983135194912, + "grad_norm": 0.9086586440886152, + "learning_rate": 7.648476121573916e-06, + "loss": 0.3671, + "step": 34046 + }, + { + "epoch": 0.5883155930329002, + "grad_norm": 0.7147132080663864, + "learning_rate": 7.647932169127857e-06, + "loss": 0.4928, + "step": 34047 + }, + { + "epoch": 0.5883328725463091, + "grad_norm": 1.0775721426955127, + "learning_rate": 7.64738822404865e-06, + "loss": 0.5345, + "step": 34048 + }, + { + "epoch": 0.588350152059718, + "grad_norm": 0.945179581114312, + "learning_rate": 7.646844286338007e-06, + "loss": 0.4658, + "step": 34049 + }, + { + "epoch": 0.5883674315731269, + "grad_norm": 1.1659824275860637, + "learning_rate": 7.64630035599762e-06, + "loss": 0.4548, + "step": 34050 + }, + { + "epoch": 0.5883847110865358, + "grad_norm": 0.6003168837429296, + "learning_rate": 7.645756433029206e-06, + "loss": 0.477, + "step": 34051 + }, + { + "epoch": 0.5884019905999447, + "grad_norm": 1.0693106840698452, + "learning_rate": 7.645212517434459e-06, + "loss": 0.4793, + "step": 34052 + }, + { + "epoch": 0.5884192701133536, + "grad_norm": 0.9171746101577769, + "learning_rate": 7.644668609215085e-06, + "loss": 0.3809, + "step": 34053 + }, + { + "epoch": 0.5884365496267625, + "grad_norm": 1.1012119625679802, + "learning_rate": 7.64412470837279e-06, + "loss": 0.5263, + "step": 34054 + }, + { + "epoch": 0.5884538291401714, + "grad_norm": 1.0448917580215122, + "learning_rate": 7.643580814909273e-06, + "loss": 0.4069, + "step": 34055 + }, + { + "epoch": 0.5884711086535803, + "grad_norm": 1.2334058665479504, + "learning_rate": 7.643036928826241e-06, + "loss": 0.267, + "step": 34056 + }, + { + "epoch": 0.5884883881669892, + "grad_norm": 1.0098649752691977, + "learning_rate": 7.642493050125395e-06, + "loss": 0.3375, + "step": 34057 + }, + { + "epoch": 0.5885056676803981, + "grad_norm": 1.4681090370500212, + "learning_rate": 7.641949178808441e-06, + "loss": 0.595, + "step": 34058 + }, + { + "epoch": 0.588522947193807, + "grad_norm": 1.242455638791039, + "learning_rate": 7.64140531487708e-06, + "loss": 0.2423, + "step": 34059 + }, + { + "epoch": 0.588540226707216, + "grad_norm": 1.1922117839454889, + "learning_rate": 7.64086145833302e-06, + "loss": 0.4611, + "step": 34060 + }, + { + "epoch": 0.5885575062206249, + "grad_norm": 1.0162522972590826, + "learning_rate": 7.640317609177958e-06, + "loss": 0.418, + "step": 34061 + }, + { + "epoch": 0.5885747857340338, + "grad_norm": 1.3693681757267502, + "learning_rate": 7.639773767413602e-06, + "loss": 0.4275, + "step": 34062 + }, + { + "epoch": 0.5885920652474427, + "grad_norm": 1.4644533524241083, + "learning_rate": 7.639229933041654e-06, + "loss": 0.3943, + "step": 34063 + }, + { + "epoch": 0.5886093447608516, + "grad_norm": 0.9863858250238339, + "learning_rate": 7.638686106063816e-06, + "loss": 0.2876, + "step": 34064 + }, + { + "epoch": 0.5886266242742605, + "grad_norm": 0.902785494029507, + "learning_rate": 7.638142286481795e-06, + "loss": 0.2569, + "step": 34065 + }, + { + "epoch": 0.5886439037876693, + "grad_norm": 0.8433911227893928, + "learning_rate": 7.63759847429729e-06, + "loss": 0.2882, + "step": 34066 + }, + { + "epoch": 0.5886611833010782, + "grad_norm": 1.246646007120767, + "learning_rate": 7.637054669512007e-06, + "loss": 0.2449, + "step": 34067 + }, + { + "epoch": 0.5886784628144871, + "grad_norm": 1.2610567228702227, + "learning_rate": 7.636510872127646e-06, + "loss": 0.2217, + "step": 34068 + }, + { + "epoch": 0.588695742327896, + "grad_norm": 0.7694792632845887, + "learning_rate": 7.635967082145915e-06, + "loss": 0.3429, + "step": 34069 + }, + { + "epoch": 0.5887130218413049, + "grad_norm": 1.0659306471670849, + "learning_rate": 7.635423299568512e-06, + "loss": 0.2898, + "step": 34070 + }, + { + "epoch": 0.5887303013547138, + "grad_norm": 1.029856414401024, + "learning_rate": 7.634879524397149e-06, + "loss": 0.4146, + "step": 34071 + }, + { + "epoch": 0.5887475808681227, + "grad_norm": 1.3098024772877064, + "learning_rate": 7.634335756633517e-06, + "loss": 0.3519, + "step": 34072 + }, + { + "epoch": 0.5887648603815316, + "grad_norm": 1.16150785937096, + "learning_rate": 7.633791996279328e-06, + "loss": 0.5288, + "step": 34073 + }, + { + "epoch": 0.5887821398949405, + "grad_norm": 1.3715382513851144, + "learning_rate": 7.633248243336284e-06, + "loss": 0.5756, + "step": 34074 + }, + { + "epoch": 0.5887994194083495, + "grad_norm": 1.905972338882506, + "learning_rate": 7.632704497806083e-06, + "loss": 0.4809, + "step": 34075 + }, + { + "epoch": 0.5888166989217584, + "grad_norm": 1.432077196652494, + "learning_rate": 7.632160759690434e-06, + "loss": 0.2573, + "step": 34076 + }, + { + "epoch": 0.5888339784351673, + "grad_norm": 1.2183114824155208, + "learning_rate": 7.631617028991034e-06, + "loss": 0.336, + "step": 34077 + }, + { + "epoch": 0.5888512579485762, + "grad_norm": 1.3357445219586899, + "learning_rate": 7.631073305709591e-06, + "loss": 0.2838, + "step": 34078 + }, + { + "epoch": 0.5888685374619851, + "grad_norm": 1.109294721023664, + "learning_rate": 7.630529589847807e-06, + "loss": 0.5559, + "step": 34079 + }, + { + "epoch": 0.588885816975394, + "grad_norm": 1.24238790281881, + "learning_rate": 7.629985881407386e-06, + "loss": 0.3758, + "step": 34080 + }, + { + "epoch": 0.5889030964888029, + "grad_norm": 0.9586953873320704, + "learning_rate": 7.629442180390025e-06, + "loss": 0.2257, + "step": 34081 + }, + { + "epoch": 0.5889203760022118, + "grad_norm": 0.966906713864722, + "learning_rate": 7.628898486797435e-06, + "loss": 0.357, + "step": 34082 + }, + { + "epoch": 0.5889376555156207, + "grad_norm": 0.8073989036674287, + "learning_rate": 7.628354800631312e-06, + "loss": 0.4555, + "step": 34083 + }, + { + "epoch": 0.5889549350290296, + "grad_norm": 0.9541866120472697, + "learning_rate": 7.627811121893365e-06, + "loss": 0.3335, + "step": 34084 + }, + { + "epoch": 0.5889722145424385, + "grad_norm": 1.174111827242984, + "learning_rate": 7.627267450585295e-06, + "loss": 0.3913, + "step": 34085 + }, + { + "epoch": 0.5889894940558474, + "grad_norm": 0.9075346803117934, + "learning_rate": 7.6267237867088e-06, + "loss": 0.2616, + "step": 34086 + }, + { + "epoch": 0.5890067735692562, + "grad_norm": 1.0500303734285477, + "learning_rate": 7.626180130265589e-06, + "loss": 0.2562, + "step": 34087 + }, + { + "epoch": 0.5890240530826651, + "grad_norm": 1.2456019298611307, + "learning_rate": 7.625636481257359e-06, + "loss": 0.3181, + "step": 34088 + }, + { + "epoch": 0.589041332596074, + "grad_norm": 1.183103812670005, + "learning_rate": 7.625092839685819e-06, + "loss": 0.6027, + "step": 34089 + }, + { + "epoch": 0.589058612109483, + "grad_norm": 0.87816843604015, + "learning_rate": 7.624549205552667e-06, + "loss": 0.279, + "step": 34090 + }, + { + "epoch": 0.5890758916228919, + "grad_norm": 1.4596032089911655, + "learning_rate": 7.62400557885961e-06, + "loss": 0.445, + "step": 34091 + }, + { + "epoch": 0.5890931711363008, + "grad_norm": 1.3252640753309455, + "learning_rate": 7.623461959608345e-06, + "loss": 0.4815, + "step": 34092 + }, + { + "epoch": 0.5891104506497097, + "grad_norm": 0.7044699735309996, + "learning_rate": 7.62291834780058e-06, + "loss": 0.2845, + "step": 34093 + }, + { + "epoch": 0.5891277301631186, + "grad_norm": 1.8020503558755698, + "learning_rate": 7.622374743438017e-06, + "loss": 0.3897, + "step": 34094 + }, + { + "epoch": 0.5891450096765275, + "grad_norm": 1.1616454471290942, + "learning_rate": 7.621831146522353e-06, + "loss": 0.3159, + "step": 34095 + }, + { + "epoch": 0.5891622891899364, + "grad_norm": 0.7549252796388083, + "learning_rate": 7.621287557055297e-06, + "loss": 0.5923, + "step": 34096 + }, + { + "epoch": 0.5891795687033453, + "grad_norm": 1.2413781664374026, + "learning_rate": 7.620743975038547e-06, + "loss": 0.4344, + "step": 34097 + }, + { + "epoch": 0.5891968482167542, + "grad_norm": 1.172234590647571, + "learning_rate": 7.620200400473809e-06, + "loss": 0.3058, + "step": 34098 + }, + { + "epoch": 0.5892141277301631, + "grad_norm": 0.9159429705637033, + "learning_rate": 7.619656833362783e-06, + "loss": 0.4179, + "step": 34099 + }, + { + "epoch": 0.589231407243572, + "grad_norm": 1.1901031055009808, + "learning_rate": 7.619113273707176e-06, + "loss": 0.3915, + "step": 34100 + }, + { + "epoch": 0.5892486867569809, + "grad_norm": 1.302574857747588, + "learning_rate": 7.618569721508684e-06, + "loss": 0.5106, + "step": 34101 + }, + { + "epoch": 0.5892659662703899, + "grad_norm": 0.7621405796443474, + "learning_rate": 7.618026176769013e-06, + "loss": 0.3946, + "step": 34102 + }, + { + "epoch": 0.5892832457837988, + "grad_norm": 0.7246506762423632, + "learning_rate": 7.617482639489865e-06, + "loss": 0.2608, + "step": 34103 + }, + { + "epoch": 0.5893005252972077, + "grad_norm": 1.0489555558555301, + "learning_rate": 7.6169391096729424e-06, + "loss": 0.3949, + "step": 34104 + }, + { + "epoch": 0.5893178048106166, + "grad_norm": 0.9166211627626312, + "learning_rate": 7.616395587319951e-06, + "loss": 0.4288, + "step": 34105 + }, + { + "epoch": 0.5893350843240255, + "grad_norm": 0.9014025735873378, + "learning_rate": 7.615852072432586e-06, + "loss": 0.5789, + "step": 34106 + }, + { + "epoch": 0.5893523638374344, + "grad_norm": 1.040857352971354, + "learning_rate": 7.615308565012556e-06, + "loss": 0.2662, + "step": 34107 + }, + { + "epoch": 0.5893696433508432, + "grad_norm": 1.1405078587070858, + "learning_rate": 7.614765065061558e-06, + "loss": 0.5068, + "step": 34108 + }, + { + "epoch": 0.5893869228642521, + "grad_norm": 1.1521973601957445, + "learning_rate": 7.614221572581298e-06, + "loss": 0.5836, + "step": 34109 + }, + { + "epoch": 0.589404202377661, + "grad_norm": 1.0337074147102436, + "learning_rate": 7.613678087573476e-06, + "loss": 0.3662, + "step": 34110 + }, + { + "epoch": 0.5894214818910699, + "grad_norm": 0.8380860285746954, + "learning_rate": 7.6131346100398e-06, + "loss": 0.4503, + "step": 34111 + }, + { + "epoch": 0.5894387614044788, + "grad_norm": 0.4530231768486879, + "learning_rate": 7.612591139981964e-06, + "loss": 0.6741, + "step": 34112 + }, + { + "epoch": 0.5894560409178877, + "grad_norm": 1.0624445319227487, + "learning_rate": 7.612047677401676e-06, + "loss": 0.3109, + "step": 34113 + }, + { + "epoch": 0.5894733204312966, + "grad_norm": 0.8735257911569274, + "learning_rate": 7.611504222300634e-06, + "loss": 0.2916, + "step": 34114 + }, + { + "epoch": 0.5894905999447055, + "grad_norm": 2.3499141363899487, + "learning_rate": 7.610960774680546e-06, + "loss": 0.5961, + "step": 34115 + }, + { + "epoch": 0.5895078794581144, + "grad_norm": 0.7727163344572873, + "learning_rate": 7.610417334543109e-06, + "loss": 0.2603, + "step": 34116 + }, + { + "epoch": 0.5895251589715234, + "grad_norm": 1.3178440451466025, + "learning_rate": 7.609873901890024e-06, + "loss": 0.2427, + "step": 34117 + }, + { + "epoch": 0.5895424384849323, + "grad_norm": 1.2822316063726698, + "learning_rate": 7.609330476722997e-06, + "loss": 0.4453, + "step": 34118 + }, + { + "epoch": 0.5895597179983412, + "grad_norm": 0.9957361077926818, + "learning_rate": 7.608787059043728e-06, + "loss": 0.6508, + "step": 34119 + }, + { + "epoch": 0.5895769975117501, + "grad_norm": 1.011979671259096, + "learning_rate": 7.608243648853922e-06, + "loss": 0.3463, + "step": 34120 + }, + { + "epoch": 0.589594277025159, + "grad_norm": 1.0885556149500204, + "learning_rate": 7.607700246155277e-06, + "loss": 0.4802, + "step": 34121 + }, + { + "epoch": 0.5896115565385679, + "grad_norm": 1.2164948327329188, + "learning_rate": 7.6071568509494966e-06, + "loss": 0.4237, + "step": 34122 + }, + { + "epoch": 0.5896288360519768, + "grad_norm": 1.348312649702503, + "learning_rate": 7.606613463238282e-06, + "loss": 0.3525, + "step": 34123 + }, + { + "epoch": 0.5896461155653857, + "grad_norm": 1.2300452614365913, + "learning_rate": 7.606070083023336e-06, + "loss": 0.4882, + "step": 34124 + }, + { + "epoch": 0.5896633950787946, + "grad_norm": 0.5574941623758093, + "learning_rate": 7.605526710306364e-06, + "loss": 0.3768, + "step": 34125 + }, + { + "epoch": 0.5896806745922035, + "grad_norm": 0.8678148447286169, + "learning_rate": 7.6049833450890605e-06, + "loss": 0.4237, + "step": 34126 + }, + { + "epoch": 0.5896979541056124, + "grad_norm": 0.4616471969305876, + "learning_rate": 7.604439987373133e-06, + "loss": 0.565, + "step": 34127 + }, + { + "epoch": 0.5897152336190213, + "grad_norm": 2.1447664226604513, + "learning_rate": 7.6038966371602795e-06, + "loss": 0.5466, + "step": 34128 + }, + { + "epoch": 0.5897325131324301, + "grad_norm": 0.9449110638150331, + "learning_rate": 7.603353294452205e-06, + "loss": 0.3533, + "step": 34129 + }, + { + "epoch": 0.589749792645839, + "grad_norm": 0.7644245867129703, + "learning_rate": 7.602809959250609e-06, + "loss": 0.3986, + "step": 34130 + }, + { + "epoch": 0.589767072159248, + "grad_norm": 0.9147901113448911, + "learning_rate": 7.602266631557198e-06, + "loss": 0.4002, + "step": 34131 + }, + { + "epoch": 0.5897843516726569, + "grad_norm": 0.5712609428302512, + "learning_rate": 7.601723311373667e-06, + "loss": 0.6938, + "step": 34132 + }, + { + "epoch": 0.5898016311860658, + "grad_norm": 0.9457474789580719, + "learning_rate": 7.601179998701722e-06, + "loss": 0.2777, + "step": 34133 + }, + { + "epoch": 0.5898189106994747, + "grad_norm": 1.1397165221801677, + "learning_rate": 7.6006366935430605e-06, + "loss": 0.5694, + "step": 34134 + }, + { + "epoch": 0.5898361902128836, + "grad_norm": 1.1328492293787806, + "learning_rate": 7.6000933958993925e-06, + "loss": 0.3971, + "step": 34135 + }, + { + "epoch": 0.5898534697262925, + "grad_norm": 0.8812640729696454, + "learning_rate": 7.599550105772413e-06, + "loss": 0.4763, + "step": 34136 + }, + { + "epoch": 0.5898707492397014, + "grad_norm": 0.9886769120854518, + "learning_rate": 7.5990068231638216e-06, + "loss": 0.4179, + "step": 34137 + }, + { + "epoch": 0.5898880287531103, + "grad_norm": 1.0308306384609611, + "learning_rate": 7.598463548075326e-06, + "loss": 0.5299, + "step": 34138 + }, + { + "epoch": 0.5899053082665192, + "grad_norm": 1.6706433417397655, + "learning_rate": 7.597920280508624e-06, + "loss": 0.3127, + "step": 34139 + }, + { + "epoch": 0.5899225877799281, + "grad_norm": 1.3180110044710658, + "learning_rate": 7.59737702046542e-06, + "loss": 0.4121, + "step": 34140 + }, + { + "epoch": 0.589939867293337, + "grad_norm": 1.0687460693554571, + "learning_rate": 7.5968337679474115e-06, + "loss": 0.5596, + "step": 34141 + }, + { + "epoch": 0.5899571468067459, + "grad_norm": 1.245906542038901, + "learning_rate": 7.596290522956304e-06, + "loss": 0.3286, + "step": 34142 + }, + { + "epoch": 0.5899744263201548, + "grad_norm": 1.1692222209879926, + "learning_rate": 7.595747285493795e-06, + "loss": 0.4238, + "step": 34143 + }, + { + "epoch": 0.5899917058335638, + "grad_norm": 1.0261268720918004, + "learning_rate": 7.59520405556159e-06, + "loss": 0.3082, + "step": 34144 + }, + { + "epoch": 0.5900089853469727, + "grad_norm": 0.7962885945680693, + "learning_rate": 7.594660833161387e-06, + "loss": 0.4373, + "step": 34145 + }, + { + "epoch": 0.5900262648603816, + "grad_norm": 0.9705352791577561, + "learning_rate": 7.594117618294893e-06, + "loss": 0.6185, + "step": 34146 + }, + { + "epoch": 0.5900435443737905, + "grad_norm": 0.9261177132125138, + "learning_rate": 7.593574410963805e-06, + "loss": 0.4175, + "step": 34147 + }, + { + "epoch": 0.5900608238871994, + "grad_norm": 0.9568070220170508, + "learning_rate": 7.59303121116982e-06, + "loss": 0.3159, + "step": 34148 + }, + { + "epoch": 0.5900781034006083, + "grad_norm": 1.1138708785396851, + "learning_rate": 7.5924880189146474e-06, + "loss": 0.2721, + "step": 34149 + }, + { + "epoch": 0.5900953829140171, + "grad_norm": 0.9714676042362392, + "learning_rate": 7.5919448341999835e-06, + "loss": 0.385, + "step": 34150 + }, + { + "epoch": 0.590112662427426, + "grad_norm": 0.936752463750637, + "learning_rate": 7.591401657027535e-06, + "loss": 0.3824, + "step": 34151 + }, + { + "epoch": 0.5901299419408349, + "grad_norm": 1.2226371027535827, + "learning_rate": 7.590858487398995e-06, + "loss": 0.3668, + "step": 34152 + }, + { + "epoch": 0.5901472214542438, + "grad_norm": 1.589766308972798, + "learning_rate": 7.590315325316073e-06, + "loss": 0.4118, + "step": 34153 + }, + { + "epoch": 0.5901645009676527, + "grad_norm": 1.272742242936194, + "learning_rate": 7.589772170780463e-06, + "loss": 0.748, + "step": 34154 + }, + { + "epoch": 0.5901817804810616, + "grad_norm": 0.9357268428181283, + "learning_rate": 7.589229023793875e-06, + "loss": 0.6027, + "step": 34155 + }, + { + "epoch": 0.5901990599944705, + "grad_norm": 1.0289619704142443, + "learning_rate": 7.588685884358003e-06, + "loss": 0.3958, + "step": 34156 + }, + { + "epoch": 0.5902163395078794, + "grad_norm": 1.6158987350124752, + "learning_rate": 7.588142752474546e-06, + "loss": 0.5251, + "step": 34157 + }, + { + "epoch": 0.5902336190212883, + "grad_norm": 1.4048020983969713, + "learning_rate": 7.587599628145213e-06, + "loss": 0.5629, + "step": 34158 + }, + { + "epoch": 0.5902508985346973, + "grad_norm": 0.858619206505129, + "learning_rate": 7.5870565113716985e-06, + "loss": 0.3333, + "step": 34159 + }, + { + "epoch": 0.5902681780481062, + "grad_norm": 0.8611546254751339, + "learning_rate": 7.586513402155711e-06, + "loss": 0.4712, + "step": 34160 + }, + { + "epoch": 0.5902854575615151, + "grad_norm": 1.0027216493531625, + "learning_rate": 7.585970300498942e-06, + "loss": 0.3354, + "step": 34161 + }, + { + "epoch": 0.590302737074924, + "grad_norm": 0.993473311446259, + "learning_rate": 7.585427206403101e-06, + "loss": 0.2882, + "step": 34162 + }, + { + "epoch": 0.5903200165883329, + "grad_norm": 0.8566923899219676, + "learning_rate": 7.584884119869882e-06, + "loss": 0.3398, + "step": 34163 + }, + { + "epoch": 0.5903372961017418, + "grad_norm": 0.7702744418650693, + "learning_rate": 7.5843410409009925e-06, + "loss": 0.2736, + "step": 34164 + }, + { + "epoch": 0.5903545756151507, + "grad_norm": 0.4996265277000253, + "learning_rate": 7.583797969498128e-06, + "loss": 0.5349, + "step": 34165 + }, + { + "epoch": 0.5903718551285596, + "grad_norm": 1.247998034316479, + "learning_rate": 7.583254905662996e-06, + "loss": 0.3621, + "step": 34166 + }, + { + "epoch": 0.5903891346419685, + "grad_norm": 1.5531503729207636, + "learning_rate": 7.582711849397291e-06, + "loss": 0.5476, + "step": 34167 + }, + { + "epoch": 0.5904064141553774, + "grad_norm": 1.5130068223575814, + "learning_rate": 7.582168800702714e-06, + "loss": 0.501, + "step": 34168 + }, + { + "epoch": 0.5904236936687863, + "grad_norm": 1.180526703936052, + "learning_rate": 7.581625759580971e-06, + "loss": 0.4504, + "step": 34169 + }, + { + "epoch": 0.5904409731821952, + "grad_norm": 1.56928335764631, + "learning_rate": 7.581082726033757e-06, + "loss": 0.4369, + "step": 34170 + }, + { + "epoch": 0.5904582526956041, + "grad_norm": 1.0690103499765167, + "learning_rate": 7.580539700062781e-06, + "loss": 0.2782, + "step": 34171 + }, + { + "epoch": 0.5904755322090129, + "grad_norm": 0.6599743283837576, + "learning_rate": 7.579996681669733e-06, + "loss": 0.6312, + "step": 34172 + }, + { + "epoch": 0.5904928117224219, + "grad_norm": 0.833321002292142, + "learning_rate": 7.579453670856322e-06, + "loss": 0.4088, + "step": 34173 + }, + { + "epoch": 0.5905100912358308, + "grad_norm": 1.3115624150131187, + "learning_rate": 7.578910667624244e-06, + "loss": 0.4395, + "step": 34174 + }, + { + "epoch": 0.5905273707492397, + "grad_norm": 1.7191206783070876, + "learning_rate": 7.578367671975206e-06, + "loss": 0.4663, + "step": 34175 + }, + { + "epoch": 0.5905446502626486, + "grad_norm": 0.7022470175172577, + "learning_rate": 7.577824683910898e-06, + "loss": 0.3118, + "step": 34176 + }, + { + "epoch": 0.5905619297760575, + "grad_norm": 0.44912159172124616, + "learning_rate": 7.577281703433033e-06, + "loss": 0.4436, + "step": 34177 + }, + { + "epoch": 0.5905792092894664, + "grad_norm": 0.8864712128634573, + "learning_rate": 7.576738730543305e-06, + "loss": 0.2997, + "step": 34178 + }, + { + "epoch": 0.5905964888028753, + "grad_norm": 1.1414981050039, + "learning_rate": 7.576195765243413e-06, + "loss": 0.4326, + "step": 34179 + }, + { + "epoch": 0.5906137683162842, + "grad_norm": 1.3149289847410441, + "learning_rate": 7.575652807535064e-06, + "loss": 0.3946, + "step": 34180 + }, + { + "epoch": 0.5906310478296931, + "grad_norm": 1.0193198872541276, + "learning_rate": 7.57510985741995e-06, + "loss": 0.5084, + "step": 34181 + }, + { + "epoch": 0.590648327343102, + "grad_norm": 0.9164505202072255, + "learning_rate": 7.574566914899779e-06, + "loss": 0.4293, + "step": 34182 + }, + { + "epoch": 0.5906656068565109, + "grad_norm": 1.0636856791617866, + "learning_rate": 7.574023979976246e-06, + "loss": 0.5991, + "step": 34183 + }, + { + "epoch": 0.5906828863699198, + "grad_norm": 0.7714121540814037, + "learning_rate": 7.573481052651057e-06, + "loss": 0.3752, + "step": 34184 + }, + { + "epoch": 0.5907001658833287, + "grad_norm": 0.9951858175579993, + "learning_rate": 7.572938132925907e-06, + "loss": 0.2702, + "step": 34185 + }, + { + "epoch": 0.5907174453967377, + "grad_norm": 1.0504496289006955, + "learning_rate": 7.572395220802504e-06, + "loss": 0.3823, + "step": 34186 + }, + { + "epoch": 0.5907347249101466, + "grad_norm": 0.8324845719112708, + "learning_rate": 7.571852316282542e-06, + "loss": 0.1876, + "step": 34187 + }, + { + "epoch": 0.5907520044235555, + "grad_norm": 0.8978869366897827, + "learning_rate": 7.571309419367719e-06, + "loss": 0.4134, + "step": 34188 + }, + { + "epoch": 0.5907692839369644, + "grad_norm": 0.48360609254501696, + "learning_rate": 7.570766530059743e-06, + "loss": 0.7021, + "step": 34189 + }, + { + "epoch": 0.5907865634503733, + "grad_norm": 1.1961281243449695, + "learning_rate": 7.5702236483603085e-06, + "loss": 0.3968, + "step": 34190 + }, + { + "epoch": 0.5908038429637822, + "grad_norm": 1.0454179505966126, + "learning_rate": 7.569680774271123e-06, + "loss": 0.5404, + "step": 34191 + }, + { + "epoch": 0.5908211224771911, + "grad_norm": 0.824258350461457, + "learning_rate": 7.569137907793878e-06, + "loss": 0.5451, + "step": 34192 + }, + { + "epoch": 0.5908384019905999, + "grad_norm": 1.2291603779826654, + "learning_rate": 7.568595048930278e-06, + "loss": 0.3905, + "step": 34193 + }, + { + "epoch": 0.5908556815040088, + "grad_norm": 1.0023824935075472, + "learning_rate": 7.568052197682023e-06, + "loss": 0.4166, + "step": 34194 + }, + { + "epoch": 0.5908729610174177, + "grad_norm": 0.8701870592724062, + "learning_rate": 7.567509354050817e-06, + "loss": 0.2802, + "step": 34195 + }, + { + "epoch": 0.5908902405308266, + "grad_norm": 1.2435442094667655, + "learning_rate": 7.56696651803835e-06, + "loss": 0.2225, + "step": 34196 + }, + { + "epoch": 0.5909075200442355, + "grad_norm": 1.1043862374005509, + "learning_rate": 7.566423689646335e-06, + "loss": 0.3161, + "step": 34197 + }, + { + "epoch": 0.5909247995576444, + "grad_norm": 1.5174293888891466, + "learning_rate": 7.565880868876464e-06, + "loss": 0.3386, + "step": 34198 + }, + { + "epoch": 0.5909420790710533, + "grad_norm": 0.8569381474459087, + "learning_rate": 7.565338055730437e-06, + "loss": 0.8499, + "step": 34199 + }, + { + "epoch": 0.5909593585844622, + "grad_norm": 1.2569090009691932, + "learning_rate": 7.5647952502099595e-06, + "loss": 0.6633, + "step": 34200 + }, + { + "epoch": 0.5909766380978712, + "grad_norm": 1.7578202503923779, + "learning_rate": 7.564252452316726e-06, + "loss": 0.2864, + "step": 34201 + }, + { + "epoch": 0.5909939176112801, + "grad_norm": 1.4455885726226827, + "learning_rate": 7.56370966205244e-06, + "loss": 0.9331, + "step": 34202 + }, + { + "epoch": 0.591011197124689, + "grad_norm": 0.9410591489589272, + "learning_rate": 7.563166879418798e-06, + "loss": 0.3936, + "step": 34203 + }, + { + "epoch": 0.5910284766380979, + "grad_norm": 1.1689765917031096, + "learning_rate": 7.5626241044175055e-06, + "loss": 0.4462, + "step": 34204 + }, + { + "epoch": 0.5910457561515068, + "grad_norm": 0.7078283511736282, + "learning_rate": 7.5620813370502565e-06, + "loss": 0.2924, + "step": 34205 + }, + { + "epoch": 0.5910630356649157, + "grad_norm": 0.897552462587498, + "learning_rate": 7.561538577318759e-06, + "loss": 0.224, + "step": 34206 + }, + { + "epoch": 0.5910803151783246, + "grad_norm": 2.3188468787635776, + "learning_rate": 7.5609958252247065e-06, + "loss": 0.3464, + "step": 34207 + }, + { + "epoch": 0.5910975946917335, + "grad_norm": 1.3959491323612971, + "learning_rate": 7.560453080769796e-06, + "loss": 0.4445, + "step": 34208 + }, + { + "epoch": 0.5911148742051424, + "grad_norm": 1.1166935480875892, + "learning_rate": 7.559910343955736e-06, + "loss": 0.4585, + "step": 34209 + }, + { + "epoch": 0.5911321537185513, + "grad_norm": 0.7941245504007443, + "learning_rate": 7.55936761478422e-06, + "loss": 0.2873, + "step": 34210 + }, + { + "epoch": 0.5911494332319602, + "grad_norm": 0.8083301619926954, + "learning_rate": 7.558824893256954e-06, + "loss": 0.2956, + "step": 34211 + }, + { + "epoch": 0.5911667127453691, + "grad_norm": 1.346086585285119, + "learning_rate": 7.558282179375629e-06, + "loss": 0.5386, + "step": 34212 + }, + { + "epoch": 0.591183992258778, + "grad_norm": 1.0860064726186527, + "learning_rate": 7.5577394731419526e-06, + "loss": 0.5945, + "step": 34213 + }, + { + "epoch": 0.5912012717721868, + "grad_norm": 1.0456379551748451, + "learning_rate": 7.55719677455762e-06, + "loss": 0.5141, + "step": 34214 + }, + { + "epoch": 0.5912185512855958, + "grad_norm": 1.3764124448157289, + "learning_rate": 7.5566540836243365e-06, + "loss": 0.4977, + "step": 34215 + }, + { + "epoch": 0.5912358307990047, + "grad_norm": 1.6362723950072986, + "learning_rate": 7.556111400343792e-06, + "loss": 0.5463, + "step": 34216 + }, + { + "epoch": 0.5912531103124136, + "grad_norm": 1.2623314582929903, + "learning_rate": 7.555568724717699e-06, + "loss": 0.3972, + "step": 34217 + }, + { + "epoch": 0.5912703898258225, + "grad_norm": 0.9456934611085117, + "learning_rate": 7.555026056747747e-06, + "loss": 0.418, + "step": 34218 + }, + { + "epoch": 0.5912876693392314, + "grad_norm": 1.0546073378629597, + "learning_rate": 7.554483396435638e-06, + "loss": 0.3117, + "step": 34219 + }, + { + "epoch": 0.5913049488526403, + "grad_norm": 0.9597497320233475, + "learning_rate": 7.553940743783077e-06, + "loss": 0.4672, + "step": 34220 + }, + { + "epoch": 0.5913222283660492, + "grad_norm": 1.1241800451830954, + "learning_rate": 7.553398098791756e-06, + "loss": 0.305, + "step": 34221 + }, + { + "epoch": 0.5913395078794581, + "grad_norm": 0.7020763326997217, + "learning_rate": 7.55285546146338e-06, + "loss": 0.3035, + "step": 34222 + }, + { + "epoch": 0.591356787392867, + "grad_norm": 0.9483914912711452, + "learning_rate": 7.552312831799643e-06, + "loss": 0.3157, + "step": 34223 + }, + { + "epoch": 0.5913740669062759, + "grad_norm": 0.6604755271113552, + "learning_rate": 7.551770209802251e-06, + "loss": 0.3632, + "step": 34224 + }, + { + "epoch": 0.5913913464196848, + "grad_norm": 1.2488641933252371, + "learning_rate": 7.551227595472899e-06, + "loss": 0.3915, + "step": 34225 + }, + { + "epoch": 0.5914086259330937, + "grad_norm": 1.201262831076137, + "learning_rate": 7.550684988813293e-06, + "loss": 0.3954, + "step": 34226 + }, + { + "epoch": 0.5914259054465026, + "grad_norm": 1.1931053804100158, + "learning_rate": 7.550142389825124e-06, + "loss": 0.4763, + "step": 34227 + }, + { + "epoch": 0.5914431849599115, + "grad_norm": 1.1373666530511657, + "learning_rate": 7.549599798510095e-06, + "loss": 0.411, + "step": 34228 + }, + { + "epoch": 0.5914604644733205, + "grad_norm": 1.4632354703492696, + "learning_rate": 7.549057214869908e-06, + "loss": 0.3094, + "step": 34229 + }, + { + "epoch": 0.5914777439867294, + "grad_norm": 0.9246096820368769, + "learning_rate": 7.548514638906257e-06, + "loss": 0.3886, + "step": 34230 + }, + { + "epoch": 0.5914950235001383, + "grad_norm": 1.1003603632755887, + "learning_rate": 7.547972070620848e-06, + "loss": 0.3465, + "step": 34231 + }, + { + "epoch": 0.5915123030135472, + "grad_norm": 0.9902086241752298, + "learning_rate": 7.547429510015374e-06, + "loss": 0.3412, + "step": 34232 + }, + { + "epoch": 0.5915295825269561, + "grad_norm": 1.2116181909231283, + "learning_rate": 7.5468869570915386e-06, + "loss": 0.4649, + "step": 34233 + }, + { + "epoch": 0.591546862040365, + "grad_norm": 1.2890560317580444, + "learning_rate": 7.546344411851037e-06, + "loss": 0.4798, + "step": 34234 + }, + { + "epoch": 0.5915641415537738, + "grad_norm": 0.9367650121056996, + "learning_rate": 7.545801874295576e-06, + "loss": 0.5476, + "step": 34235 + }, + { + "epoch": 0.5915814210671827, + "grad_norm": 0.49232802048425034, + "learning_rate": 7.545259344426844e-06, + "loss": 0.592, + "step": 34236 + }, + { + "epoch": 0.5915987005805916, + "grad_norm": 0.942137255570982, + "learning_rate": 7.544716822246553e-06, + "loss": 0.4248, + "step": 34237 + }, + { + "epoch": 0.5916159800940005, + "grad_norm": 1.01823951497505, + "learning_rate": 7.5441743077563926e-06, + "loss": 0.5235, + "step": 34238 + }, + { + "epoch": 0.5916332596074094, + "grad_norm": 1.0123670006104006, + "learning_rate": 7.543631800958062e-06, + "loss": 0.3767, + "step": 34239 + }, + { + "epoch": 0.5916505391208183, + "grad_norm": 1.212359352532625, + "learning_rate": 7.543089301853268e-06, + "loss": 0.4833, + "step": 34240 + }, + { + "epoch": 0.5916678186342272, + "grad_norm": 0.8814043565245526, + "learning_rate": 7.542546810443702e-06, + "loss": 0.3404, + "step": 34241 + }, + { + "epoch": 0.5916850981476361, + "grad_norm": 1.4860555115492449, + "learning_rate": 7.542004326731067e-06, + "loss": 0.4476, + "step": 34242 + }, + { + "epoch": 0.591702377661045, + "grad_norm": 1.123212856988275, + "learning_rate": 7.54146185071706e-06, + "loss": 0.4865, + "step": 34243 + }, + { + "epoch": 0.591719657174454, + "grad_norm": 1.0957078559059332, + "learning_rate": 7.540919382403384e-06, + "loss": 0.3058, + "step": 34244 + }, + { + "epoch": 0.5917369366878629, + "grad_norm": 1.009354465573698, + "learning_rate": 7.540376921791731e-06, + "loss": 0.4462, + "step": 34245 + }, + { + "epoch": 0.5917542162012718, + "grad_norm": 1.0970422879836292, + "learning_rate": 7.53983446888381e-06, + "loss": 0.3288, + "step": 34246 + }, + { + "epoch": 0.5917714957146807, + "grad_norm": 0.96382052564956, + "learning_rate": 7.5392920236813105e-06, + "loss": 0.5088, + "step": 34247 + }, + { + "epoch": 0.5917887752280896, + "grad_norm": 1.4239099184221469, + "learning_rate": 7.538749586185936e-06, + "loss": 0.3463, + "step": 34248 + }, + { + "epoch": 0.5918060547414985, + "grad_norm": 1.216474127690954, + "learning_rate": 7.538207156399386e-06, + "loss": 0.4771, + "step": 34249 + }, + { + "epoch": 0.5918233342549074, + "grad_norm": 1.3608084622428285, + "learning_rate": 7.5376647343233566e-06, + "loss": 0.3242, + "step": 34250 + }, + { + "epoch": 0.5918406137683163, + "grad_norm": 0.954468586490483, + "learning_rate": 7.537122319959552e-06, + "loss": 0.3788, + "step": 34251 + }, + { + "epoch": 0.5918578932817252, + "grad_norm": 1.0168359605730282, + "learning_rate": 7.536579913309664e-06, + "loss": 0.3503, + "step": 34252 + }, + { + "epoch": 0.5918751727951341, + "grad_norm": 0.530448437473904, + "learning_rate": 7.536037514375396e-06, + "loss": 0.5112, + "step": 34253 + }, + { + "epoch": 0.591892452308543, + "grad_norm": 1.209517454798407, + "learning_rate": 7.535495123158444e-06, + "loss": 0.4853, + "step": 34254 + }, + { + "epoch": 0.591909731821952, + "grad_norm": 0.9817840451395532, + "learning_rate": 7.5349527396605125e-06, + "loss": 0.3407, + "step": 34255 + }, + { + "epoch": 0.5919270113353607, + "grad_norm": 1.0061683893289497, + "learning_rate": 7.534410363883291e-06, + "loss": 0.4751, + "step": 34256 + }, + { + "epoch": 0.5919442908487696, + "grad_norm": 0.6462561925906578, + "learning_rate": 7.533867995828489e-06, + "loss": 0.4156, + "step": 34257 + }, + { + "epoch": 0.5919615703621786, + "grad_norm": 0.8276750779687719, + "learning_rate": 7.533325635497796e-06, + "loss": 0.2819, + "step": 34258 + }, + { + "epoch": 0.5919788498755875, + "grad_norm": 0.8128228166275675, + "learning_rate": 7.532783282892917e-06, + "loss": 0.255, + "step": 34259 + }, + { + "epoch": 0.5919961293889964, + "grad_norm": 1.299533891222676, + "learning_rate": 7.5322409380155495e-06, + "loss": 0.3233, + "step": 34260 + }, + { + "epoch": 0.5920134089024053, + "grad_norm": 1.48218329869706, + "learning_rate": 7.531698600867387e-06, + "loss": 0.5115, + "step": 34261 + }, + { + "epoch": 0.5920306884158142, + "grad_norm": 1.0539427001740849, + "learning_rate": 7.531156271450135e-06, + "loss": 0.4283, + "step": 34262 + }, + { + "epoch": 0.5920479679292231, + "grad_norm": 1.4476397717917082, + "learning_rate": 7.530613949765487e-06, + "loss": 0.6831, + "step": 34263 + }, + { + "epoch": 0.592065247442632, + "grad_norm": 0.748787620829223, + "learning_rate": 7.530071635815144e-06, + "loss": 0.2571, + "step": 34264 + }, + { + "epoch": 0.5920825269560409, + "grad_norm": 2.5422091096360875, + "learning_rate": 7.529529329600804e-06, + "loss": 0.2506, + "step": 34265 + }, + { + "epoch": 0.5920998064694498, + "grad_norm": 1.5512282776465187, + "learning_rate": 7.528987031124169e-06, + "loss": 0.4144, + "step": 34266 + }, + { + "epoch": 0.5921170859828587, + "grad_norm": 0.9872100525292684, + "learning_rate": 7.528444740386931e-06, + "loss": 0.3293, + "step": 34267 + }, + { + "epoch": 0.5921343654962676, + "grad_norm": 1.3081870873548636, + "learning_rate": 7.527902457390794e-06, + "loss": 0.3426, + "step": 34268 + }, + { + "epoch": 0.5921516450096765, + "grad_norm": 0.6413339106918976, + "learning_rate": 7.527360182137454e-06, + "loss": 0.4317, + "step": 34269 + }, + { + "epoch": 0.5921689245230854, + "grad_norm": 1.1154488218528646, + "learning_rate": 7.526817914628608e-06, + "loss": 0.3594, + "step": 34270 + }, + { + "epoch": 0.5921862040364944, + "grad_norm": 0.8227939464413101, + "learning_rate": 7.526275654865961e-06, + "loss": 0.3005, + "step": 34271 + }, + { + "epoch": 0.5922034835499033, + "grad_norm": 1.239595973149951, + "learning_rate": 7.525733402851202e-06, + "loss": 0.393, + "step": 34272 + }, + { + "epoch": 0.5922207630633122, + "grad_norm": 0.8869307191883288, + "learning_rate": 7.525191158586036e-06, + "loss": 0.2828, + "step": 34273 + }, + { + "epoch": 0.5922380425767211, + "grad_norm": 1.5381470330340685, + "learning_rate": 7.524648922072157e-06, + "loss": 0.402, + "step": 34274 + }, + { + "epoch": 0.59225532209013, + "grad_norm": 0.8263756225379872, + "learning_rate": 7.52410669331127e-06, + "loss": 0.3902, + "step": 34275 + }, + { + "epoch": 0.5922726016035389, + "grad_norm": 1.0980042190886292, + "learning_rate": 7.5235644723050624e-06, + "loss": 0.4155, + "step": 34276 + }, + { + "epoch": 0.5922898811169477, + "grad_norm": 0.9633631029625135, + "learning_rate": 7.523022259055246e-06, + "loss": 0.4334, + "step": 34277 + }, + { + "epoch": 0.5923071606303566, + "grad_norm": 0.9074351305628665, + "learning_rate": 7.522480053563508e-06, + "loss": 0.4694, + "step": 34278 + }, + { + "epoch": 0.5923244401437655, + "grad_norm": 1.9928023346405792, + "learning_rate": 7.521937855831552e-06, + "loss": 0.3407, + "step": 34279 + }, + { + "epoch": 0.5923417196571744, + "grad_norm": 1.1421976656070423, + "learning_rate": 7.521395665861079e-06, + "loss": 0.4849, + "step": 34280 + }, + { + "epoch": 0.5923589991705833, + "grad_norm": 1.2307460593061337, + "learning_rate": 7.5208534836537775e-06, + "loss": 0.6123, + "step": 34281 + }, + { + "epoch": 0.5923762786839922, + "grad_norm": 1.0876456025831975, + "learning_rate": 7.5203113092113534e-06, + "loss": 0.2776, + "step": 34282 + }, + { + "epoch": 0.5923935581974011, + "grad_norm": 1.2869238349914829, + "learning_rate": 7.5197691425355e-06, + "loss": 0.5349, + "step": 34283 + }, + { + "epoch": 0.59241083771081, + "grad_norm": 0.9684176458295044, + "learning_rate": 7.519226983627921e-06, + "loss": 0.3911, + "step": 34284 + }, + { + "epoch": 0.592428117224219, + "grad_norm": 1.2707300065395366, + "learning_rate": 7.518684832490311e-06, + "loss": 0.5611, + "step": 34285 + }, + { + "epoch": 0.5924453967376279, + "grad_norm": 1.072623434856195, + "learning_rate": 7.51814268912437e-06, + "loss": 0.2056, + "step": 34286 + }, + { + "epoch": 0.5924626762510368, + "grad_norm": 1.1671759583371604, + "learning_rate": 7.517600553531792e-06, + "loss": 0.5296, + "step": 34287 + }, + { + "epoch": 0.5924799557644457, + "grad_norm": 1.188183151325716, + "learning_rate": 7.51705842571428e-06, + "loss": 0.4261, + "step": 34288 + }, + { + "epoch": 0.5924972352778546, + "grad_norm": 1.0562391973004834, + "learning_rate": 7.516516305673527e-06, + "loss": 0.707, + "step": 34289 + }, + { + "epoch": 0.5925145147912635, + "grad_norm": 1.1466195064469946, + "learning_rate": 7.515974193411237e-06, + "loss": 0.3897, + "step": 34290 + }, + { + "epoch": 0.5925317943046724, + "grad_norm": 1.0352427274325107, + "learning_rate": 7.515432088929106e-06, + "loss": 0.247, + "step": 34291 + }, + { + "epoch": 0.5925490738180813, + "grad_norm": 2.031001316568134, + "learning_rate": 7.514889992228825e-06, + "loss": 0.3343, + "step": 34292 + }, + { + "epoch": 0.5925663533314902, + "grad_norm": 1.0056652661743413, + "learning_rate": 7.5143479033121e-06, + "loss": 0.2052, + "step": 34293 + }, + { + "epoch": 0.5925836328448991, + "grad_norm": 1.3630892928590002, + "learning_rate": 7.513805822180626e-06, + "loss": 0.5522, + "step": 34294 + }, + { + "epoch": 0.592600912358308, + "grad_norm": 1.3552371576914943, + "learning_rate": 7.513263748836104e-06, + "loss": 0.4248, + "step": 34295 + }, + { + "epoch": 0.5926181918717169, + "grad_norm": 0.686441589657948, + "learning_rate": 7.512721683280222e-06, + "loss": 0.2859, + "step": 34296 + }, + { + "epoch": 0.5926354713851258, + "grad_norm": 0.9109226060505069, + "learning_rate": 7.512179625514692e-06, + "loss": 0.2129, + "step": 34297 + }, + { + "epoch": 0.5926527508985348, + "grad_norm": 0.589652520496147, + "learning_rate": 7.5116375755412e-06, + "loss": 0.2503, + "step": 34298 + }, + { + "epoch": 0.5926700304119435, + "grad_norm": 1.3473546930035578, + "learning_rate": 7.51109553336145e-06, + "loss": 0.5492, + "step": 34299 + }, + { + "epoch": 0.5926873099253525, + "grad_norm": 1.0306284777345318, + "learning_rate": 7.510553498977141e-06, + "loss": 0.3189, + "step": 34300 + }, + { + "epoch": 0.5927045894387614, + "grad_norm": 0.9776540350961347, + "learning_rate": 7.510011472389962e-06, + "loss": 0.2703, + "step": 34301 + }, + { + "epoch": 0.5927218689521703, + "grad_norm": 1.5427530916705126, + "learning_rate": 7.50946945360162e-06, + "loss": 0.5123, + "step": 34302 + }, + { + "epoch": 0.5927391484655792, + "grad_norm": 1.0179303404425524, + "learning_rate": 7.508927442613806e-06, + "loss": 0.3652, + "step": 34303 + }, + { + "epoch": 0.5927564279789881, + "grad_norm": 1.2749826432927376, + "learning_rate": 7.508385439428223e-06, + "loss": 0.2411, + "step": 34304 + }, + { + "epoch": 0.592773707492397, + "grad_norm": 0.74393730826581, + "learning_rate": 7.507843444046563e-06, + "loss": 0.5208, + "step": 34305 + }, + { + "epoch": 0.5927909870058059, + "grad_norm": 1.7744938132923973, + "learning_rate": 7.507301456470533e-06, + "loss": 0.4708, + "step": 34306 + }, + { + "epoch": 0.5928082665192148, + "grad_norm": 1.2113541538090804, + "learning_rate": 7.5067594767018174e-06, + "loss": 0.6835, + "step": 34307 + }, + { + "epoch": 0.5928255460326237, + "grad_norm": 1.0136528042750041, + "learning_rate": 7.506217504742124e-06, + "loss": 0.3023, + "step": 34308 + }, + { + "epoch": 0.5928428255460326, + "grad_norm": 0.820328735474178, + "learning_rate": 7.505675540593145e-06, + "loss": 0.3714, + "step": 34309 + }, + { + "epoch": 0.5928601050594415, + "grad_norm": 0.9497556577253182, + "learning_rate": 7.505133584256582e-06, + "loss": 0.438, + "step": 34310 + }, + { + "epoch": 0.5928773845728504, + "grad_norm": 0.943013429035032, + "learning_rate": 7.504591635734132e-06, + "loss": 0.3117, + "step": 34311 + }, + { + "epoch": 0.5928946640862593, + "grad_norm": 1.1573061723895752, + "learning_rate": 7.504049695027486e-06, + "loss": 0.2237, + "step": 34312 + }, + { + "epoch": 0.5929119435996683, + "grad_norm": 1.0031169559847344, + "learning_rate": 7.503507762138349e-06, + "loss": 0.4078, + "step": 34313 + }, + { + "epoch": 0.5929292231130772, + "grad_norm": 1.101240039079227, + "learning_rate": 7.502965837068414e-06, + "loss": 0.3434, + "step": 34314 + }, + { + "epoch": 0.5929465026264861, + "grad_norm": 1.4313044058627953, + "learning_rate": 7.502423919819382e-06, + "loss": 0.384, + "step": 34315 + }, + { + "epoch": 0.592963782139895, + "grad_norm": 0.8012466585078908, + "learning_rate": 7.501882010392943e-06, + "loss": 0.3381, + "step": 34316 + }, + { + "epoch": 0.5929810616533039, + "grad_norm": 1.5539338297332972, + "learning_rate": 7.501340108790806e-06, + "loss": 0.3318, + "step": 34317 + }, + { + "epoch": 0.5929983411667128, + "grad_norm": 0.8001108610243968, + "learning_rate": 7.5007982150146564e-06, + "loss": 0.4324, + "step": 34318 + }, + { + "epoch": 0.5930156206801217, + "grad_norm": 1.3467168252832658, + "learning_rate": 7.500256329066199e-06, + "loss": 0.3383, + "step": 34319 + }, + { + "epoch": 0.5930329001935305, + "grad_norm": 1.35506584391215, + "learning_rate": 7.499714450947126e-06, + "loss": 0.4641, + "step": 34320 + }, + { + "epoch": 0.5930501797069394, + "grad_norm": 1.0628004451584778, + "learning_rate": 7.4991725806591434e-06, + "loss": 0.4031, + "step": 34321 + }, + { + "epoch": 0.5930674592203483, + "grad_norm": 1.3875620827317066, + "learning_rate": 7.498630718203939e-06, + "loss": 0.5335, + "step": 34322 + }, + { + "epoch": 0.5930847387337572, + "grad_norm": 0.680829327687532, + "learning_rate": 7.498088863583212e-06, + "loss": 0.5377, + "step": 34323 + }, + { + "epoch": 0.5931020182471661, + "grad_norm": 0.5028012353301089, + "learning_rate": 7.497547016798663e-06, + "loss": 0.6183, + "step": 34324 + }, + { + "epoch": 0.593119297760575, + "grad_norm": 1.0030612708490558, + "learning_rate": 7.497005177851984e-06, + "loss": 0.4039, + "step": 34325 + }, + { + "epoch": 0.593136577273984, + "grad_norm": 1.7968173537902825, + "learning_rate": 7.496463346744879e-06, + "loss": 0.2926, + "step": 34326 + }, + { + "epoch": 0.5931538567873929, + "grad_norm": 1.26519909187035, + "learning_rate": 7.495921523479037e-06, + "loss": 0.5794, + "step": 34327 + }, + { + "epoch": 0.5931711363008018, + "grad_norm": 1.3915143719415652, + "learning_rate": 7.495379708056162e-06, + "loss": 0.4822, + "step": 34328 + }, + { + "epoch": 0.5931884158142107, + "grad_norm": 1.3831203867054787, + "learning_rate": 7.494837900477945e-06, + "loss": 0.4871, + "step": 34329 + }, + { + "epoch": 0.5932056953276196, + "grad_norm": 1.1061792820377365, + "learning_rate": 7.494296100746089e-06, + "loss": 0.535, + "step": 34330 + }, + { + "epoch": 0.5932229748410285, + "grad_norm": 1.241570721110304, + "learning_rate": 7.49375430886229e-06, + "loss": 0.1529, + "step": 34331 + }, + { + "epoch": 0.5932402543544374, + "grad_norm": 0.9406714156704156, + "learning_rate": 7.493212524828239e-06, + "loss": 0.3174, + "step": 34332 + }, + { + "epoch": 0.5932575338678463, + "grad_norm": 0.8049568346675801, + "learning_rate": 7.492670748645639e-06, + "loss": 0.4856, + "step": 34333 + }, + { + "epoch": 0.5932748133812552, + "grad_norm": 1.1170789052894128, + "learning_rate": 7.492128980316181e-06, + "loss": 0.3749, + "step": 34334 + }, + { + "epoch": 0.5932920928946641, + "grad_norm": 1.0817111898620841, + "learning_rate": 7.491587219841572e-06, + "loss": 0.5398, + "step": 34335 + }, + { + "epoch": 0.593309372408073, + "grad_norm": 1.3817989316438755, + "learning_rate": 7.4910454672234945e-06, + "loss": 0.3104, + "step": 34336 + }, + { + "epoch": 0.5933266519214819, + "grad_norm": 1.0418448950879498, + "learning_rate": 7.49050372246366e-06, + "loss": 0.3955, + "step": 34337 + }, + { + "epoch": 0.5933439314348908, + "grad_norm": 1.0770419045062878, + "learning_rate": 7.489961985563755e-06, + "loss": 0.3833, + "step": 34338 + }, + { + "epoch": 0.5933612109482997, + "grad_norm": 1.609575216676635, + "learning_rate": 7.489420256525481e-06, + "loss": 0.7288, + "step": 34339 + }, + { + "epoch": 0.5933784904617087, + "grad_norm": 1.2563288187234052, + "learning_rate": 7.488878535350532e-06, + "loss": 0.2455, + "step": 34340 + }, + { + "epoch": 0.5933957699751174, + "grad_norm": 0.705525343724942, + "learning_rate": 7.488336822040612e-06, + "loss": 0.3211, + "step": 34341 + }, + { + "epoch": 0.5934130494885264, + "grad_norm": 0.5335421174796559, + "learning_rate": 7.4877951165974074e-06, + "loss": 0.6876, + "step": 34342 + }, + { + "epoch": 0.5934303290019353, + "grad_norm": 0.9849599174787305, + "learning_rate": 7.487253419022618e-06, + "loss": 0.307, + "step": 34343 + }, + { + "epoch": 0.5934476085153442, + "grad_norm": 0.805087492484019, + "learning_rate": 7.486711729317945e-06, + "loss": 0.5066, + "step": 34344 + }, + { + "epoch": 0.5934648880287531, + "grad_norm": 1.4771456349003076, + "learning_rate": 7.486170047485079e-06, + "loss": 0.4087, + "step": 34345 + }, + { + "epoch": 0.593482167542162, + "grad_norm": 0.9540504538823065, + "learning_rate": 7.485628373525724e-06, + "loss": 0.4128, + "step": 34346 + }, + { + "epoch": 0.5934994470555709, + "grad_norm": 1.336546780818362, + "learning_rate": 7.485086707441567e-06, + "loss": 0.3014, + "step": 34347 + }, + { + "epoch": 0.5935167265689798, + "grad_norm": 1.4858718401000153, + "learning_rate": 7.484545049234312e-06, + "loss": 0.349, + "step": 34348 + }, + { + "epoch": 0.5935340060823887, + "grad_norm": 1.0896959584612413, + "learning_rate": 7.484003398905651e-06, + "loss": 0.3965, + "step": 34349 + }, + { + "epoch": 0.5935512855957976, + "grad_norm": 0.8493188793769789, + "learning_rate": 7.4834617564572845e-06, + "loss": 0.306, + "step": 34350 + }, + { + "epoch": 0.5935685651092065, + "grad_norm": 1.1788118903928206, + "learning_rate": 7.4829201218909054e-06, + "loss": 0.4737, + "step": 34351 + }, + { + "epoch": 0.5935858446226154, + "grad_norm": 1.3052519834010174, + "learning_rate": 7.482378495208216e-06, + "loss": 0.5006, + "step": 34352 + }, + { + "epoch": 0.5936031241360243, + "grad_norm": 1.621188608447405, + "learning_rate": 7.481836876410906e-06, + "loss": 0.3374, + "step": 34353 + }, + { + "epoch": 0.5936204036494332, + "grad_norm": 0.7453602750818141, + "learning_rate": 7.481295265500672e-06, + "loss": 0.4201, + "step": 34354 + }, + { + "epoch": 0.5936376831628422, + "grad_norm": 1.622412697759864, + "learning_rate": 7.480753662479218e-06, + "loss": 0.3569, + "step": 34355 + }, + { + "epoch": 0.5936549626762511, + "grad_norm": 1.203307746476201, + "learning_rate": 7.480212067348227e-06, + "loss": 0.4653, + "step": 34356 + }, + { + "epoch": 0.59367224218966, + "grad_norm": 0.8570274013971853, + "learning_rate": 7.4796704801094115e-06, + "loss": 0.5415, + "step": 34357 + }, + { + "epoch": 0.5936895217030689, + "grad_norm": 1.0937617082283664, + "learning_rate": 7.479128900764454e-06, + "loss": 0.4045, + "step": 34358 + }, + { + "epoch": 0.5937068012164778, + "grad_norm": 0.9641933732145699, + "learning_rate": 7.478587329315059e-06, + "loss": 0.3383, + "step": 34359 + }, + { + "epoch": 0.5937240807298867, + "grad_norm": 1.0612027530932324, + "learning_rate": 7.478045765762918e-06, + "loss": 0.3393, + "step": 34360 + }, + { + "epoch": 0.5937413602432956, + "grad_norm": 0.925631570303089, + "learning_rate": 7.477504210109733e-06, + "loss": 0.391, + "step": 34361 + }, + { + "epoch": 0.5937586397567044, + "grad_norm": 1.000408184849493, + "learning_rate": 7.476962662357195e-06, + "loss": 0.3338, + "step": 34362 + }, + { + "epoch": 0.5937759192701133, + "grad_norm": 1.1771505013474286, + "learning_rate": 7.476421122506999e-06, + "loss": 0.4758, + "step": 34363 + }, + { + "epoch": 0.5937931987835222, + "grad_norm": 1.1302036201973324, + "learning_rate": 7.4758795905608465e-06, + "loss": 0.4282, + "step": 34364 + }, + { + "epoch": 0.5938104782969311, + "grad_norm": 1.22487829125003, + "learning_rate": 7.475338066520428e-06, + "loss": 0.2774, + "step": 34365 + }, + { + "epoch": 0.59382775781034, + "grad_norm": 1.2091175853592997, + "learning_rate": 7.474796550387448e-06, + "loss": 0.3718, + "step": 34366 + }, + { + "epoch": 0.5938450373237489, + "grad_norm": 0.9911443391869112, + "learning_rate": 7.474255042163592e-06, + "loss": 0.4881, + "step": 34367 + }, + { + "epoch": 0.5938623168371578, + "grad_norm": 1.2606756446110208, + "learning_rate": 7.4737135418505625e-06, + "loss": 0.5604, + "step": 34368 + }, + { + "epoch": 0.5938795963505668, + "grad_norm": 0.645359219476582, + "learning_rate": 7.473172049450054e-06, + "loss": 0.213, + "step": 34369 + }, + { + "epoch": 0.5938968758639757, + "grad_norm": 1.3586560923163549, + "learning_rate": 7.472630564963763e-06, + "loss": 0.5662, + "step": 34370 + }, + { + "epoch": 0.5939141553773846, + "grad_norm": 0.879553919517217, + "learning_rate": 7.4720890883933835e-06, + "loss": 0.3674, + "step": 34371 + }, + { + "epoch": 0.5939314348907935, + "grad_norm": 1.0845164325697432, + "learning_rate": 7.471547619740619e-06, + "loss": 0.3012, + "step": 34372 + }, + { + "epoch": 0.5939487144042024, + "grad_norm": 0.9935293415920776, + "learning_rate": 7.471006159007156e-06, + "loss": 0.3892, + "step": 34373 + }, + { + "epoch": 0.5939659939176113, + "grad_norm": 1.7110069659070986, + "learning_rate": 7.470464706194691e-06, + "loss": 0.4859, + "step": 34374 + }, + { + "epoch": 0.5939832734310202, + "grad_norm": 1.1780392749459863, + "learning_rate": 7.469923261304929e-06, + "loss": 0.4172, + "step": 34375 + }, + { + "epoch": 0.5940005529444291, + "grad_norm": 1.2720622826969914, + "learning_rate": 7.469381824339552e-06, + "loss": 0.3353, + "step": 34376 + }, + { + "epoch": 0.594017832457838, + "grad_norm": 1.1315253080885836, + "learning_rate": 7.4688403953002694e-06, + "loss": 0.3438, + "step": 34377 + }, + { + "epoch": 0.5940351119712469, + "grad_norm": 1.3150209471386842, + "learning_rate": 7.468298974188768e-06, + "loss": 0.4258, + "step": 34378 + }, + { + "epoch": 0.5940523914846558, + "grad_norm": 1.1750315166603698, + "learning_rate": 7.467757561006748e-06, + "loss": 0.3583, + "step": 34379 + }, + { + "epoch": 0.5940696709980647, + "grad_norm": 1.324844923828368, + "learning_rate": 7.467216155755902e-06, + "loss": 0.2937, + "step": 34380 + }, + { + "epoch": 0.5940869505114736, + "grad_norm": 1.038649407720279, + "learning_rate": 7.466674758437931e-06, + "loss": 0.4427, + "step": 34381 + }, + { + "epoch": 0.5941042300248826, + "grad_norm": 1.407268197550817, + "learning_rate": 7.4661333690545225e-06, + "loss": 0.3708, + "step": 34382 + }, + { + "epoch": 0.5941215095382913, + "grad_norm": 0.8335292843440849, + "learning_rate": 7.465591987607381e-06, + "loss": 0.5471, + "step": 34383 + }, + { + "epoch": 0.5941387890517003, + "grad_norm": 1.392563141920976, + "learning_rate": 7.465050614098197e-06, + "loss": 0.3784, + "step": 34384 + }, + { + "epoch": 0.5941560685651092, + "grad_norm": 1.6027981036194128, + "learning_rate": 7.464509248528665e-06, + "loss": 0.4955, + "step": 34385 + }, + { + "epoch": 0.5941733480785181, + "grad_norm": 0.9696133689943013, + "learning_rate": 7.463967890900488e-06, + "loss": 0.3177, + "step": 34386 + }, + { + "epoch": 0.594190627591927, + "grad_norm": 1.7472658119013924, + "learning_rate": 7.4634265412153505e-06, + "loss": 0.5535, + "step": 34387 + }, + { + "epoch": 0.5942079071053359, + "grad_norm": 1.3349935038613738, + "learning_rate": 7.462885199474956e-06, + "loss": 0.4299, + "step": 34388 + }, + { + "epoch": 0.5942251866187448, + "grad_norm": 1.1921698862049932, + "learning_rate": 7.462343865680997e-06, + "loss": 0.2452, + "step": 34389 + }, + { + "epoch": 0.5942424661321537, + "grad_norm": 0.6742245229772981, + "learning_rate": 7.4618025398351705e-06, + "loss": 0.7663, + "step": 34390 + }, + { + "epoch": 0.5942597456455626, + "grad_norm": 0.7238166390434012, + "learning_rate": 7.461261221939169e-06, + "loss": 0.6924, + "step": 34391 + }, + { + "epoch": 0.5942770251589715, + "grad_norm": 1.06552877046415, + "learning_rate": 7.460719911994696e-06, + "loss": 0.4222, + "step": 34392 + }, + { + "epoch": 0.5942943046723804, + "grad_norm": 1.636228134235323, + "learning_rate": 7.460178610003439e-06, + "loss": 0.3567, + "step": 34393 + }, + { + "epoch": 0.5943115841857893, + "grad_norm": 0.957787715025396, + "learning_rate": 7.459637315967092e-06, + "loss": 0.3539, + "step": 34394 + }, + { + "epoch": 0.5943288636991982, + "grad_norm": 0.9028457873133632, + "learning_rate": 7.45909602988736e-06, + "loss": 0.3394, + "step": 34395 + }, + { + "epoch": 0.5943461432126071, + "grad_norm": 0.5441860549620858, + "learning_rate": 7.458554751765925e-06, + "loss": 0.7614, + "step": 34396 + }, + { + "epoch": 0.594363422726016, + "grad_norm": 0.9499009739443784, + "learning_rate": 7.458013481604496e-06, + "loss": 0.4714, + "step": 34397 + }, + { + "epoch": 0.594380702239425, + "grad_norm": 1.2778040729203304, + "learning_rate": 7.4574722194047575e-06, + "loss": 0.303, + "step": 34398 + }, + { + "epoch": 0.5943979817528339, + "grad_norm": 1.240755423293058, + "learning_rate": 7.4569309651684114e-06, + "loss": 0.1935, + "step": 34399 + }, + { + "epoch": 0.5944152612662428, + "grad_norm": 1.0047167230278373, + "learning_rate": 7.45638971889715e-06, + "loss": 0.3223, + "step": 34400 + }, + { + "epoch": 0.5944325407796517, + "grad_norm": 0.7573078764083827, + "learning_rate": 7.4558484805926735e-06, + "loss": 0.3824, + "step": 34401 + }, + { + "epoch": 0.5944498202930606, + "grad_norm": 0.9744513893550483, + "learning_rate": 7.455307250256665e-06, + "loss": 0.5005, + "step": 34402 + }, + { + "epoch": 0.5944670998064695, + "grad_norm": 1.2748174169207447, + "learning_rate": 7.454766027890834e-06, + "loss": 0.5996, + "step": 34403 + }, + { + "epoch": 0.5944843793198783, + "grad_norm": 1.3849818432943346, + "learning_rate": 7.4542248134968685e-06, + "loss": 0.4393, + "step": 34404 + }, + { + "epoch": 0.5945016588332872, + "grad_norm": 1.0148055760813361, + "learning_rate": 7.453683607076461e-06, + "loss": 0.3085, + "step": 34405 + }, + { + "epoch": 0.5945189383466961, + "grad_norm": 1.0509616991430393, + "learning_rate": 7.453142408631315e-06, + "loss": 0.4208, + "step": 34406 + }, + { + "epoch": 0.594536217860105, + "grad_norm": 0.8313305955469884, + "learning_rate": 7.452601218163116e-06, + "loss": 0.3273, + "step": 34407 + }, + { + "epoch": 0.5945534973735139, + "grad_norm": 1.5701985053375578, + "learning_rate": 7.452060035673565e-06, + "loss": 0.359, + "step": 34408 + }, + { + "epoch": 0.5945707768869228, + "grad_norm": 1.1571111189138614, + "learning_rate": 7.451518861164354e-06, + "loss": 0.3634, + "step": 34409 + }, + { + "epoch": 0.5945880564003317, + "grad_norm": 0.867319690142892, + "learning_rate": 7.450977694637182e-06, + "loss": 0.6469, + "step": 34410 + }, + { + "epoch": 0.5946053359137407, + "grad_norm": 1.080493771113176, + "learning_rate": 7.450436536093739e-06, + "loss": 0.4285, + "step": 34411 + }, + { + "epoch": 0.5946226154271496, + "grad_norm": 0.9479481256942794, + "learning_rate": 7.449895385535728e-06, + "loss": 0.3285, + "step": 34412 + }, + { + "epoch": 0.5946398949405585, + "grad_norm": 0.9968007665607233, + "learning_rate": 7.449354242964835e-06, + "loss": 0.4397, + "step": 34413 + }, + { + "epoch": 0.5946571744539674, + "grad_norm": 1.6062467297264944, + "learning_rate": 7.448813108382757e-06, + "loss": 0.4009, + "step": 34414 + }, + { + "epoch": 0.5946744539673763, + "grad_norm": 1.1041764385702022, + "learning_rate": 7.448271981791193e-06, + "loss": 0.3833, + "step": 34415 + }, + { + "epoch": 0.5946917334807852, + "grad_norm": 0.9213256759722255, + "learning_rate": 7.447730863191831e-06, + "loss": 0.559, + "step": 34416 + }, + { + "epoch": 0.5947090129941941, + "grad_norm": 1.3257153470221066, + "learning_rate": 7.447189752586374e-06, + "loss": 0.5383, + "step": 34417 + }, + { + "epoch": 0.594726292507603, + "grad_norm": 0.6204496438486484, + "learning_rate": 7.44664864997651e-06, + "loss": 0.5928, + "step": 34418 + }, + { + "epoch": 0.5947435720210119, + "grad_norm": 0.6004998095650409, + "learning_rate": 7.446107555363938e-06, + "loss": 0.6135, + "step": 34419 + }, + { + "epoch": 0.5947608515344208, + "grad_norm": 0.607061787855973, + "learning_rate": 7.4455664687503495e-06, + "loss": 0.3443, + "step": 34420 + }, + { + "epoch": 0.5947781310478297, + "grad_norm": 1.2999644404255317, + "learning_rate": 7.4450253901374445e-06, + "loss": 0.4682, + "step": 34421 + }, + { + "epoch": 0.5947954105612386, + "grad_norm": 0.5670122173363912, + "learning_rate": 7.444484319526908e-06, + "loss": 0.2772, + "step": 34422 + }, + { + "epoch": 0.5948126900746475, + "grad_norm": 0.8164654239570507, + "learning_rate": 7.443943256920447e-06, + "loss": 0.3101, + "step": 34423 + }, + { + "epoch": 0.5948299695880565, + "grad_norm": 1.0644791323964007, + "learning_rate": 7.44340220231975e-06, + "loss": 0.4253, + "step": 34424 + }, + { + "epoch": 0.5948472491014652, + "grad_norm": 2.2227644745146318, + "learning_rate": 7.442861155726506e-06, + "loss": 0.3685, + "step": 34425 + }, + { + "epoch": 0.5948645286148742, + "grad_norm": 1.2143399540359714, + "learning_rate": 7.442320117142422e-06, + "loss": 0.5155, + "step": 34426 + }, + { + "epoch": 0.5948818081282831, + "grad_norm": 0.7914485207301202, + "learning_rate": 7.441779086569181e-06, + "loss": 0.511, + "step": 34427 + }, + { + "epoch": 0.594899087641692, + "grad_norm": 1.4941877892565336, + "learning_rate": 7.441238064008485e-06, + "loss": 0.3546, + "step": 34428 + }, + { + "epoch": 0.5949163671551009, + "grad_norm": 0.6434523275840017, + "learning_rate": 7.440697049462022e-06, + "loss": 0.481, + "step": 34429 + }, + { + "epoch": 0.5949336466685098, + "grad_norm": 1.1717293540353326, + "learning_rate": 7.440156042931493e-06, + "loss": 0.6606, + "step": 34430 + }, + { + "epoch": 0.5949509261819187, + "grad_norm": 0.7873409945441765, + "learning_rate": 7.439615044418589e-06, + "loss": 0.3779, + "step": 34431 + }, + { + "epoch": 0.5949682056953276, + "grad_norm": 0.44667336728603824, + "learning_rate": 7.4390740539250085e-06, + "loss": 0.5531, + "step": 34432 + }, + { + "epoch": 0.5949854852087365, + "grad_norm": 1.389182168197055, + "learning_rate": 7.438533071452439e-06, + "loss": 0.5038, + "step": 34433 + }, + { + "epoch": 0.5950027647221454, + "grad_norm": 0.958618861034019, + "learning_rate": 7.437992097002579e-06, + "loss": 0.3465, + "step": 34434 + }, + { + "epoch": 0.5950200442355543, + "grad_norm": 1.1469837315322822, + "learning_rate": 7.437451130577126e-06, + "loss": 0.4218, + "step": 34435 + }, + { + "epoch": 0.5950373237489632, + "grad_norm": 1.0856808101054427, + "learning_rate": 7.436910172177763e-06, + "loss": 0.4309, + "step": 34436 + }, + { + "epoch": 0.5950546032623721, + "grad_norm": 0.9598765579806948, + "learning_rate": 7.436369221806201e-06, + "loss": 0.3784, + "step": 34437 + }, + { + "epoch": 0.595071882775781, + "grad_norm": 1.2094608169713905, + "learning_rate": 7.435828279464119e-06, + "loss": 0.3929, + "step": 34438 + }, + { + "epoch": 0.59508916228919, + "grad_norm": 1.1585850019530888, + "learning_rate": 7.435287345153221e-06, + "loss": 0.3933, + "step": 34439 + }, + { + "epoch": 0.5951064418025989, + "grad_norm": 1.0604622106094463, + "learning_rate": 7.434746418875196e-06, + "loss": 0.4316, + "step": 34440 + }, + { + "epoch": 0.5951237213160078, + "grad_norm": 1.1432078443216103, + "learning_rate": 7.434205500631743e-06, + "loss": 0.5259, + "step": 34441 + }, + { + "epoch": 0.5951410008294167, + "grad_norm": 0.9742296962905067, + "learning_rate": 7.4336645904245475e-06, + "loss": 0.4434, + "step": 34442 + }, + { + "epoch": 0.5951582803428256, + "grad_norm": 1.0505208509869899, + "learning_rate": 7.433123688255317e-06, + "loss": 0.539, + "step": 34443 + }, + { + "epoch": 0.5951755598562345, + "grad_norm": 0.9539589558517997, + "learning_rate": 7.432582794125736e-06, + "loss": 0.4019, + "step": 34444 + }, + { + "epoch": 0.5951928393696434, + "grad_norm": 0.539211291184041, + "learning_rate": 7.4320419080374974e-06, + "loss": 0.5538, + "step": 34445 + }, + { + "epoch": 0.5952101188830523, + "grad_norm": 0.8943801066697877, + "learning_rate": 7.431501029992304e-06, + "loss": 0.3225, + "step": 34446 + }, + { + "epoch": 0.5952273983964611, + "grad_norm": 1.3316944005093028, + "learning_rate": 7.4309601599918405e-06, + "loss": 0.3494, + "step": 34447 + }, + { + "epoch": 0.59524467790987, + "grad_norm": 0.5612866772663526, + "learning_rate": 7.430419298037805e-06, + "loss": 0.3486, + "step": 34448 + }, + { + "epoch": 0.5952619574232789, + "grad_norm": 1.2309603533851328, + "learning_rate": 7.4298784441318915e-06, + "loss": 0.3137, + "step": 34449 + }, + { + "epoch": 0.5952792369366878, + "grad_norm": 1.159757211517279, + "learning_rate": 7.429337598275797e-06, + "loss": 0.434, + "step": 34450 + }, + { + "epoch": 0.5952965164500967, + "grad_norm": 1.2052948300918782, + "learning_rate": 7.428796760471209e-06, + "loss": 0.4087, + "step": 34451 + }, + { + "epoch": 0.5953137959635056, + "grad_norm": 0.8781097107194131, + "learning_rate": 7.428255930719828e-06, + "loss": 0.8838, + "step": 34452 + }, + { + "epoch": 0.5953310754769146, + "grad_norm": 1.0535481249662366, + "learning_rate": 7.427715109023342e-06, + "loss": 0.3566, + "step": 34453 + }, + { + "epoch": 0.5953483549903235, + "grad_norm": 1.198157842491437, + "learning_rate": 7.42717429538345e-06, + "loss": 0.4858, + "step": 34454 + }, + { + "epoch": 0.5953656345037324, + "grad_norm": 1.0870157270953862, + "learning_rate": 7.4266334898018454e-06, + "loss": 0.7001, + "step": 34455 + }, + { + "epoch": 0.5953829140171413, + "grad_norm": 1.1132612254496463, + "learning_rate": 7.426092692280214e-06, + "loss": 0.3621, + "step": 34456 + }, + { + "epoch": 0.5954001935305502, + "grad_norm": 0.8790385576334963, + "learning_rate": 7.425551902820262e-06, + "loss": 0.372, + "step": 34457 + }, + { + "epoch": 0.5954174730439591, + "grad_norm": 1.0758181133360354, + "learning_rate": 7.425011121423673e-06, + "loss": 0.2736, + "step": 34458 + }, + { + "epoch": 0.595434752557368, + "grad_norm": 1.0568835419359661, + "learning_rate": 7.424470348092147e-06, + "loss": 0.3525, + "step": 34459 + }, + { + "epoch": 0.5954520320707769, + "grad_norm": 1.1752519866741147, + "learning_rate": 7.4239295828273734e-06, + "loss": 0.2535, + "step": 34460 + }, + { + "epoch": 0.5954693115841858, + "grad_norm": 0.7375840403136651, + "learning_rate": 7.423388825631053e-06, + "loss": 0.4939, + "step": 34461 + }, + { + "epoch": 0.5954865910975947, + "grad_norm": 0.9897184020765567, + "learning_rate": 7.422848076504868e-06, + "loss": 0.4985, + "step": 34462 + }, + { + "epoch": 0.5955038706110036, + "grad_norm": 1.0841491871580147, + "learning_rate": 7.422307335450525e-06, + "loss": 0.4794, + "step": 34463 + }, + { + "epoch": 0.5955211501244125, + "grad_norm": 1.2090489381689478, + "learning_rate": 7.4217666024697065e-06, + "loss": 0.3172, + "step": 34464 + }, + { + "epoch": 0.5955384296378214, + "grad_norm": 0.9167492742169627, + "learning_rate": 7.421225877564113e-06, + "loss": 0.2877, + "step": 34465 + }, + { + "epoch": 0.5955557091512304, + "grad_norm": 1.1823164578763858, + "learning_rate": 7.420685160735439e-06, + "loss": 0.3987, + "step": 34466 + }, + { + "epoch": 0.5955729886646393, + "grad_norm": 0.7969532513316284, + "learning_rate": 7.420144451985372e-06, + "loss": 0.48, + "step": 34467 + }, + { + "epoch": 0.595590268178048, + "grad_norm": 1.0539165107201012, + "learning_rate": 7.4196037513156085e-06, + "loss": 0.2778, + "step": 34468 + }, + { + "epoch": 0.595607547691457, + "grad_norm": 2.0663229931940545, + "learning_rate": 7.4190630587278405e-06, + "loss": 0.3812, + "step": 34469 + }, + { + "epoch": 0.5956248272048659, + "grad_norm": 1.5010563989635555, + "learning_rate": 7.418522374223765e-06, + "loss": 0.4329, + "step": 34470 + }, + { + "epoch": 0.5956421067182748, + "grad_norm": 1.0311573959136522, + "learning_rate": 7.417981697805073e-06, + "loss": 0.6235, + "step": 34471 + }, + { + "epoch": 0.5956593862316837, + "grad_norm": 1.4406844165933559, + "learning_rate": 7.417441029473463e-06, + "loss": 0.4316, + "step": 34472 + }, + { + "epoch": 0.5956766657450926, + "grad_norm": 1.260708946615669, + "learning_rate": 7.4169003692306175e-06, + "loss": 0.3286, + "step": 34473 + }, + { + "epoch": 0.5956939452585015, + "grad_norm": 0.9571956960632165, + "learning_rate": 7.416359717078239e-06, + "loss": 0.6751, + "step": 34474 + }, + { + "epoch": 0.5957112247719104, + "grad_norm": 1.311355328204127, + "learning_rate": 7.41581907301802e-06, + "loss": 0.2959, + "step": 34475 + }, + { + "epoch": 0.5957285042853193, + "grad_norm": 0.836206432879078, + "learning_rate": 7.415278437051647e-06, + "loss": 0.3202, + "step": 34476 + }, + { + "epoch": 0.5957457837987282, + "grad_norm": 0.7565215872554265, + "learning_rate": 7.414737809180825e-06, + "loss": 0.3734, + "step": 34477 + }, + { + "epoch": 0.5957630633121371, + "grad_norm": 1.2853836836998143, + "learning_rate": 7.414197189407235e-06, + "loss": 0.3828, + "step": 34478 + }, + { + "epoch": 0.595780342825546, + "grad_norm": 1.1899950326862707, + "learning_rate": 7.4136565777325795e-06, + "loss": 0.569, + "step": 34479 + }, + { + "epoch": 0.595797622338955, + "grad_norm": 0.6644167652433147, + "learning_rate": 7.413115974158544e-06, + "loss": 0.7403, + "step": 34480 + }, + { + "epoch": 0.5958149018523639, + "grad_norm": 1.3066724465321677, + "learning_rate": 7.412575378686833e-06, + "loss": 0.3145, + "step": 34481 + }, + { + "epoch": 0.5958321813657728, + "grad_norm": 0.8342772437440816, + "learning_rate": 7.412034791319124e-06, + "loss": 0.725, + "step": 34482 + }, + { + "epoch": 0.5958494608791817, + "grad_norm": 0.7799403984276064, + "learning_rate": 7.4114942120571255e-06, + "loss": 0.3346, + "step": 34483 + }, + { + "epoch": 0.5958667403925906, + "grad_norm": 0.8741312616065254, + "learning_rate": 7.41095364090252e-06, + "loss": 0.2609, + "step": 34484 + }, + { + "epoch": 0.5958840199059995, + "grad_norm": 1.652853641193096, + "learning_rate": 7.410413077857006e-06, + "loss": 0.3807, + "step": 34485 + }, + { + "epoch": 0.5959012994194084, + "grad_norm": 0.8798673190029276, + "learning_rate": 7.4098725229222775e-06, + "loss": 0.4146, + "step": 34486 + }, + { + "epoch": 0.5959185789328173, + "grad_norm": 0.9033680998906323, + "learning_rate": 7.4093319761000205e-06, + "loss": 0.4587, + "step": 34487 + }, + { + "epoch": 0.5959358584462262, + "grad_norm": 1.3923946153147877, + "learning_rate": 7.408791437391936e-06, + "loss": 0.4508, + "step": 34488 + }, + { + "epoch": 0.595953137959635, + "grad_norm": 1.183346588312973, + "learning_rate": 7.40825090679971e-06, + "loss": 0.2418, + "step": 34489 + }, + { + "epoch": 0.5959704174730439, + "grad_norm": 2.1806034808118095, + "learning_rate": 7.407710384325042e-06, + "loss": 0.3506, + "step": 34490 + }, + { + "epoch": 0.5959876969864528, + "grad_norm": 1.2844648253508173, + "learning_rate": 7.40716986996962e-06, + "loss": 0.5612, + "step": 34491 + }, + { + "epoch": 0.5960049764998617, + "grad_norm": 0.9997823792173045, + "learning_rate": 7.406629363735143e-06, + "loss": 0.3519, + "step": 34492 + }, + { + "epoch": 0.5960222560132706, + "grad_norm": 1.2873882898972449, + "learning_rate": 7.406088865623296e-06, + "loss": 0.3991, + "step": 34493 + }, + { + "epoch": 0.5960395355266795, + "grad_norm": 1.598681235695643, + "learning_rate": 7.405548375635778e-06, + "loss": 0.2834, + "step": 34494 + }, + { + "epoch": 0.5960568150400885, + "grad_norm": 1.0898342426152892, + "learning_rate": 7.405007893774276e-06, + "loss": 0.4826, + "step": 34495 + }, + { + "epoch": 0.5960740945534974, + "grad_norm": 1.0730257829229342, + "learning_rate": 7.404467420040491e-06, + "loss": 0.6242, + "step": 34496 + }, + { + "epoch": 0.5960913740669063, + "grad_norm": 1.0625200045652625, + "learning_rate": 7.4039269544361115e-06, + "loss": 0.5585, + "step": 34497 + }, + { + "epoch": 0.5961086535803152, + "grad_norm": 1.0492777040390284, + "learning_rate": 7.403386496962827e-06, + "loss": 0.5125, + "step": 34498 + }, + { + "epoch": 0.5961259330937241, + "grad_norm": 1.2273569485302604, + "learning_rate": 7.402846047622334e-06, + "loss": 0.5568, + "step": 34499 + }, + { + "epoch": 0.596143212607133, + "grad_norm": 1.176381119340381, + "learning_rate": 7.402305606416325e-06, + "loss": 0.5205, + "step": 34500 + }, + { + "epoch": 0.5961604921205419, + "grad_norm": 0.8567699727425261, + "learning_rate": 7.401765173346495e-06, + "loss": 0.4793, + "step": 34501 + }, + { + "epoch": 0.5961777716339508, + "grad_norm": 1.2510191160830255, + "learning_rate": 7.401224748414526e-06, + "loss": 0.231, + "step": 34502 + }, + { + "epoch": 0.5961950511473597, + "grad_norm": 1.454721124145695, + "learning_rate": 7.400684331622128e-06, + "loss": 0.4251, + "step": 34503 + }, + { + "epoch": 0.5962123306607686, + "grad_norm": 0.9881319219783646, + "learning_rate": 7.400143922970977e-06, + "loss": 0.4741, + "step": 34504 + }, + { + "epoch": 0.5962296101741775, + "grad_norm": 1.2322548574697385, + "learning_rate": 7.3996035224627774e-06, + "loss": 0.4784, + "step": 34505 + }, + { + "epoch": 0.5962468896875864, + "grad_norm": 1.2529121634304992, + "learning_rate": 7.399063130099218e-06, + "loss": 0.3388, + "step": 34506 + }, + { + "epoch": 0.5962641692009953, + "grad_norm": 1.2332501322742202, + "learning_rate": 7.398522745881986e-06, + "loss": 0.6112, + "step": 34507 + }, + { + "epoch": 0.5962814487144042, + "grad_norm": 1.4199593802626633, + "learning_rate": 7.3979823698127815e-06, + "loss": 0.4585, + "step": 34508 + }, + { + "epoch": 0.5962987282278132, + "grad_norm": 0.7718899869076834, + "learning_rate": 7.397442001893291e-06, + "loss": 0.3488, + "step": 34509 + }, + { + "epoch": 0.596316007741222, + "grad_norm": 1.4801543310780747, + "learning_rate": 7.396901642125214e-06, + "loss": 0.582, + "step": 34510 + }, + { + "epoch": 0.5963332872546309, + "grad_norm": 1.392851433793547, + "learning_rate": 7.396361290510234e-06, + "loss": 0.6533, + "step": 34511 + }, + { + "epoch": 0.5963505667680398, + "grad_norm": 1.0563805218951285, + "learning_rate": 7.395820947050054e-06, + "loss": 0.5129, + "step": 34512 + }, + { + "epoch": 0.5963678462814487, + "grad_norm": 0.8220511223084352, + "learning_rate": 7.395280611746357e-06, + "loss": 0.3212, + "step": 34513 + }, + { + "epoch": 0.5963851257948576, + "grad_norm": 1.7751739974821819, + "learning_rate": 7.394740284600841e-06, + "loss": 0.3279, + "step": 34514 + }, + { + "epoch": 0.5964024053082665, + "grad_norm": 1.2523505622783457, + "learning_rate": 7.394199965615194e-06, + "loss": 0.3213, + "step": 34515 + }, + { + "epoch": 0.5964196848216754, + "grad_norm": 1.405914682913398, + "learning_rate": 7.393659654791112e-06, + "loss": 0.3246, + "step": 34516 + }, + { + "epoch": 0.5964369643350843, + "grad_norm": 1.204347440093577, + "learning_rate": 7.393119352130289e-06, + "loss": 0.3529, + "step": 34517 + }, + { + "epoch": 0.5964542438484932, + "grad_norm": 1.360181559783219, + "learning_rate": 7.392579057634411e-06, + "loss": 0.5554, + "step": 34518 + }, + { + "epoch": 0.5964715233619021, + "grad_norm": 1.0655164106941535, + "learning_rate": 7.392038771305175e-06, + "loss": 0.3269, + "step": 34519 + }, + { + "epoch": 0.596488802875311, + "grad_norm": 1.7502373611337811, + "learning_rate": 7.391498493144268e-06, + "loss": 0.4944, + "step": 34520 + }, + { + "epoch": 0.5965060823887199, + "grad_norm": 1.2732827078263265, + "learning_rate": 7.3909582231533915e-06, + "loss": 0.5159, + "step": 34521 + }, + { + "epoch": 0.5965233619021288, + "grad_norm": 0.835948863205892, + "learning_rate": 7.390417961334226e-06, + "loss": 0.4293, + "step": 34522 + }, + { + "epoch": 0.5965406414155378, + "grad_norm": 1.0384463847681649, + "learning_rate": 7.389877707688475e-06, + "loss": 0.3073, + "step": 34523 + }, + { + "epoch": 0.5965579209289467, + "grad_norm": 1.4173749650914753, + "learning_rate": 7.3893374622178224e-06, + "loss": 0.2942, + "step": 34524 + }, + { + "epoch": 0.5965752004423556, + "grad_norm": 0.9927462232971891, + "learning_rate": 7.388797224923965e-06, + "loss": 0.4535, + "step": 34525 + }, + { + "epoch": 0.5965924799557645, + "grad_norm": 1.235780428784199, + "learning_rate": 7.388256995808591e-06, + "loss": 0.4867, + "step": 34526 + }, + { + "epoch": 0.5966097594691734, + "grad_norm": 0.5445109859080169, + "learning_rate": 7.387716774873399e-06, + "loss": 0.4785, + "step": 34527 + }, + { + "epoch": 0.5966270389825823, + "grad_norm": 0.7765209746339369, + "learning_rate": 7.387176562120074e-06, + "loss": 0.3327, + "step": 34528 + }, + { + "epoch": 0.5966443184959912, + "grad_norm": 1.171345634622437, + "learning_rate": 7.38663635755031e-06, + "loss": 0.2635, + "step": 34529 + }, + { + "epoch": 0.5966615980094001, + "grad_norm": 0.6917161837861429, + "learning_rate": 7.3860961611657985e-06, + "loss": 0.3738, + "step": 34530 + }, + { + "epoch": 0.5966788775228089, + "grad_norm": 0.8829849003047593, + "learning_rate": 7.385555972968232e-06, + "loss": 0.3885, + "step": 34531 + }, + { + "epoch": 0.5966961570362178, + "grad_norm": 0.8099796625490912, + "learning_rate": 7.385015792959309e-06, + "loss": 0.2893, + "step": 34532 + }, + { + "epoch": 0.5967134365496267, + "grad_norm": 0.8292310948498689, + "learning_rate": 7.38447562114071e-06, + "loss": 0.5374, + "step": 34533 + }, + { + "epoch": 0.5967307160630356, + "grad_norm": 0.9666762123231412, + "learning_rate": 7.383935457514133e-06, + "loss": 0.4991, + "step": 34534 + }, + { + "epoch": 0.5967479955764445, + "grad_norm": 1.254385114951258, + "learning_rate": 7.3833953020812665e-06, + "loss": 0.4446, + "step": 34535 + }, + { + "epoch": 0.5967652750898534, + "grad_norm": 0.9414031187444846, + "learning_rate": 7.382855154843809e-06, + "loss": 0.2883, + "step": 34536 + }, + { + "epoch": 0.5967825546032623, + "grad_norm": 1.0185484743154833, + "learning_rate": 7.382315015803448e-06, + "loss": 0.4268, + "step": 34537 + }, + { + "epoch": 0.5967998341166713, + "grad_norm": 1.2373768864044934, + "learning_rate": 7.381774884961872e-06, + "loss": 0.3434, + "step": 34538 + }, + { + "epoch": 0.5968171136300802, + "grad_norm": 0.6594110482661809, + "learning_rate": 7.381234762320779e-06, + "loss": 0.2796, + "step": 34539 + }, + { + "epoch": 0.5968343931434891, + "grad_norm": 1.110445893438082, + "learning_rate": 7.3806946478818546e-06, + "loss": 0.3906, + "step": 34540 + }, + { + "epoch": 0.596851672656898, + "grad_norm": 1.0164464194051275, + "learning_rate": 7.380154541646798e-06, + "loss": 0.417, + "step": 34541 + }, + { + "epoch": 0.5968689521703069, + "grad_norm": 0.7410610717840631, + "learning_rate": 7.379614443617291e-06, + "loss": 0.3314, + "step": 34542 + }, + { + "epoch": 0.5968862316837158, + "grad_norm": 1.5235092966597379, + "learning_rate": 7.379074353795036e-06, + "loss": 0.5048, + "step": 34543 + }, + { + "epoch": 0.5969035111971247, + "grad_norm": 1.108058114060259, + "learning_rate": 7.378534272181715e-06, + "loss": 0.4079, + "step": 34544 + }, + { + "epoch": 0.5969207907105336, + "grad_norm": 1.3399829224578979, + "learning_rate": 7.377994198779027e-06, + "loss": 0.4575, + "step": 34545 + }, + { + "epoch": 0.5969380702239425, + "grad_norm": 0.7060879413881336, + "learning_rate": 7.377454133588657e-06, + "loss": 0.5809, + "step": 34546 + }, + { + "epoch": 0.5969553497373514, + "grad_norm": 1.4201199872476824, + "learning_rate": 7.376914076612305e-06, + "loss": 0.4421, + "step": 34547 + }, + { + "epoch": 0.5969726292507603, + "grad_norm": 1.3496288807976222, + "learning_rate": 7.376374027851656e-06, + "loss": 0.3288, + "step": 34548 + }, + { + "epoch": 0.5969899087641692, + "grad_norm": 0.652029932546321, + "learning_rate": 7.375833987308401e-06, + "loss": 0.376, + "step": 34549 + }, + { + "epoch": 0.5970071882775781, + "grad_norm": 0.8713483108911028, + "learning_rate": 7.375293954984233e-06, + "loss": 0.2135, + "step": 34550 + }, + { + "epoch": 0.5970244677909871, + "grad_norm": 1.184373179002495, + "learning_rate": 7.374753930880844e-06, + "loss": 0.2824, + "step": 34551 + }, + { + "epoch": 0.5970417473043959, + "grad_norm": 0.7591792902388895, + "learning_rate": 7.374213914999928e-06, + "loss": 0.3927, + "step": 34552 + }, + { + "epoch": 0.5970590268178048, + "grad_norm": 1.4394095231288904, + "learning_rate": 7.373673907343171e-06, + "loss": 0.3301, + "step": 34553 + }, + { + "epoch": 0.5970763063312137, + "grad_norm": 0.8848507689754832, + "learning_rate": 7.373133907912268e-06, + "loss": 0.2104, + "step": 34554 + }, + { + "epoch": 0.5970935858446226, + "grad_norm": 1.424630970181258, + "learning_rate": 7.372593916708906e-06, + "loss": 0.4276, + "step": 34555 + }, + { + "epoch": 0.5971108653580315, + "grad_norm": 0.7265796934395289, + "learning_rate": 7.372053933734784e-06, + "loss": 0.4083, + "step": 34556 + }, + { + "epoch": 0.5971281448714404, + "grad_norm": 1.3333598085874194, + "learning_rate": 7.3715139589915856e-06, + "loss": 0.3664, + "step": 34557 + }, + { + "epoch": 0.5971454243848493, + "grad_norm": 1.2348903946322487, + "learning_rate": 7.370973992481009e-06, + "loss": 0.382, + "step": 34558 + }, + { + "epoch": 0.5971627038982582, + "grad_norm": 1.031261187510182, + "learning_rate": 7.37043403420474e-06, + "loss": 0.455, + "step": 34559 + }, + { + "epoch": 0.5971799834116671, + "grad_norm": 1.4185784814071223, + "learning_rate": 7.369894084164468e-06, + "loss": 0.4085, + "step": 34560 + }, + { + "epoch": 0.597197262925076, + "grad_norm": 0.9277550408995701, + "learning_rate": 7.369354142361894e-06, + "loss": 0.297, + "step": 34561 + }, + { + "epoch": 0.5972145424384849, + "grad_norm": 0.8679226802728245, + "learning_rate": 7.368814208798696e-06, + "loss": 0.312, + "step": 34562 + }, + { + "epoch": 0.5972318219518938, + "grad_norm": 0.9412955593203133, + "learning_rate": 7.368274283476577e-06, + "loss": 0.3943, + "step": 34563 + }, + { + "epoch": 0.5972491014653027, + "grad_norm": 0.8742687122935487, + "learning_rate": 7.367734366397219e-06, + "loss": 0.3577, + "step": 34564 + }, + { + "epoch": 0.5972663809787117, + "grad_norm": 0.5572041191102579, + "learning_rate": 7.36719445756232e-06, + "loss": 0.7542, + "step": 34565 + }, + { + "epoch": 0.5972836604921206, + "grad_norm": 0.7497925425288842, + "learning_rate": 7.366654556973566e-06, + "loss": 0.4463, + "step": 34566 + }, + { + "epoch": 0.5973009400055295, + "grad_norm": 0.8567541032645253, + "learning_rate": 7.3661146646326535e-06, + "loss": 0.2595, + "step": 34567 + }, + { + "epoch": 0.5973182195189384, + "grad_norm": 1.5379297943699723, + "learning_rate": 7.365574780541268e-06, + "loss": 0.1878, + "step": 34568 + }, + { + "epoch": 0.5973354990323473, + "grad_norm": 0.9865540749562961, + "learning_rate": 7.3650349047011e-06, + "loss": 0.4901, + "step": 34569 + }, + { + "epoch": 0.5973527785457562, + "grad_norm": 1.9120700243311053, + "learning_rate": 7.3644950371138455e-06, + "loss": 0.5334, + "step": 34570 + }, + { + "epoch": 0.5973700580591651, + "grad_norm": 1.459590577038192, + "learning_rate": 7.363955177781193e-06, + "loss": 0.2902, + "step": 34571 + }, + { + "epoch": 0.597387337572574, + "grad_norm": 1.0057574395595157, + "learning_rate": 7.363415326704834e-06, + "loss": 0.3091, + "step": 34572 + }, + { + "epoch": 0.5974046170859829, + "grad_norm": 1.0868340762106414, + "learning_rate": 7.362875483886456e-06, + "loss": 0.3445, + "step": 34573 + }, + { + "epoch": 0.5974218965993917, + "grad_norm": 0.7226717691372199, + "learning_rate": 7.362335649327754e-06, + "loss": 0.2888, + "step": 34574 + }, + { + "epoch": 0.5974391761128006, + "grad_norm": 0.8026009043635253, + "learning_rate": 7.3617958230304154e-06, + "loss": 0.2516, + "step": 34575 + }, + { + "epoch": 0.5974564556262095, + "grad_norm": 0.8648630359700582, + "learning_rate": 7.361256004996134e-06, + "loss": 0.3173, + "step": 34576 + }, + { + "epoch": 0.5974737351396184, + "grad_norm": 0.5511229061622157, + "learning_rate": 7.360716195226598e-06, + "loss": 0.244, + "step": 34577 + }, + { + "epoch": 0.5974910146530273, + "grad_norm": 1.3077596247871608, + "learning_rate": 7.360176393723504e-06, + "loss": 0.5501, + "step": 34578 + }, + { + "epoch": 0.5975082941664362, + "grad_norm": 1.009691014914882, + "learning_rate": 7.359636600488535e-06, + "loss": 0.5025, + "step": 34579 + }, + { + "epoch": 0.5975255736798452, + "grad_norm": 1.154601540311667, + "learning_rate": 7.359096815523383e-06, + "loss": 0.3739, + "step": 34580 + }, + { + "epoch": 0.5975428531932541, + "grad_norm": 1.046556024413929, + "learning_rate": 7.358557038829745e-06, + "loss": 0.2678, + "step": 34581 + }, + { + "epoch": 0.597560132706663, + "grad_norm": 1.510713813796235, + "learning_rate": 7.3580172704093014e-06, + "loss": 0.2893, + "step": 34582 + }, + { + "epoch": 0.5975774122200719, + "grad_norm": 1.1552781389413143, + "learning_rate": 7.357477510263755e-06, + "loss": 0.3371, + "step": 34583 + }, + { + "epoch": 0.5975946917334808, + "grad_norm": 1.1021369763036617, + "learning_rate": 7.356937758394784e-06, + "loss": 0.4948, + "step": 34584 + }, + { + "epoch": 0.5976119712468897, + "grad_norm": 1.0414040215094922, + "learning_rate": 7.356398014804088e-06, + "loss": 0.3519, + "step": 34585 + }, + { + "epoch": 0.5976292507602986, + "grad_norm": 0.9881136889119221, + "learning_rate": 7.355858279493352e-06, + "loss": 0.3937, + "step": 34586 + }, + { + "epoch": 0.5976465302737075, + "grad_norm": 1.3146048559991985, + "learning_rate": 7.355318552464273e-06, + "loss": 0.3757, + "step": 34587 + }, + { + "epoch": 0.5976638097871164, + "grad_norm": 1.4484883934311794, + "learning_rate": 7.3547788337185356e-06, + "loss": 0.4338, + "step": 34588 + }, + { + "epoch": 0.5976810893005253, + "grad_norm": 1.123402850488973, + "learning_rate": 7.35423912325783e-06, + "loss": 0.4381, + "step": 34589 + }, + { + "epoch": 0.5976983688139342, + "grad_norm": 0.8617853636211008, + "learning_rate": 7.3536994210838485e-06, + "loss": 0.3696, + "step": 34590 + }, + { + "epoch": 0.5977156483273431, + "grad_norm": 1.0977827484847698, + "learning_rate": 7.353159727198282e-06, + "loss": 0.2995, + "step": 34591 + }, + { + "epoch": 0.597732927840752, + "grad_norm": 1.2455509031747218, + "learning_rate": 7.352620041602822e-06, + "loss": 0.467, + "step": 34592 + }, + { + "epoch": 0.597750207354161, + "grad_norm": 0.8955950544902512, + "learning_rate": 7.352080364299157e-06, + "loss": 0.3775, + "step": 34593 + }, + { + "epoch": 0.5977674868675699, + "grad_norm": 1.1220932311539555, + "learning_rate": 7.351540695288977e-06, + "loss": 0.3478, + "step": 34594 + }, + { + "epoch": 0.5977847663809787, + "grad_norm": 1.0959401721532047, + "learning_rate": 7.3510010345739704e-06, + "loss": 0.4263, + "step": 34595 + }, + { + "epoch": 0.5978020458943876, + "grad_norm": 0.9877442852845062, + "learning_rate": 7.350461382155832e-06, + "loss": 0.4065, + "step": 34596 + }, + { + "epoch": 0.5978193254077965, + "grad_norm": 1.5475491113104272, + "learning_rate": 7.349921738036248e-06, + "loss": 0.4703, + "step": 34597 + }, + { + "epoch": 0.5978366049212054, + "grad_norm": 1.369673909790683, + "learning_rate": 7.349382102216916e-06, + "loss": 0.4053, + "step": 34598 + }, + { + "epoch": 0.5978538844346143, + "grad_norm": 1.9755779611245785, + "learning_rate": 7.348842474699518e-06, + "loss": 0.3271, + "step": 34599 + }, + { + "epoch": 0.5978711639480232, + "grad_norm": 0.7948669615043105, + "learning_rate": 7.348302855485744e-06, + "loss": 0.3567, + "step": 34600 + }, + { + "epoch": 0.5978884434614321, + "grad_norm": 2.2345996476075207, + "learning_rate": 7.347763244577291e-06, + "loss": 0.2987, + "step": 34601 + }, + { + "epoch": 0.597905722974841, + "grad_norm": 0.9781379108906109, + "learning_rate": 7.347223641975841e-06, + "loss": 0.3823, + "step": 34602 + }, + { + "epoch": 0.5979230024882499, + "grad_norm": 1.212234431758367, + "learning_rate": 7.346684047683091e-06, + "loss": 0.3743, + "step": 34603 + }, + { + "epoch": 0.5979402820016588, + "grad_norm": 1.1022711244133288, + "learning_rate": 7.346144461700726e-06, + "loss": 0.3633, + "step": 34604 + }, + { + "epoch": 0.5979575615150677, + "grad_norm": 1.1046255749237095, + "learning_rate": 7.345604884030441e-06, + "loss": 0.3562, + "step": 34605 + }, + { + "epoch": 0.5979748410284766, + "grad_norm": 1.1076795820068044, + "learning_rate": 7.34506531467392e-06, + "loss": 0.3681, + "step": 34606 + }, + { + "epoch": 0.5979921205418856, + "grad_norm": 0.8228558975374354, + "learning_rate": 7.344525753632861e-06, + "loss": 0.3906, + "step": 34607 + }, + { + "epoch": 0.5980094000552945, + "grad_norm": 0.824527326011584, + "learning_rate": 7.343986200908944e-06, + "loss": 0.3049, + "step": 34608 + }, + { + "epoch": 0.5980266795687034, + "grad_norm": 1.577596263191073, + "learning_rate": 7.343446656503867e-06, + "loss": 0.4172, + "step": 34609 + }, + { + "epoch": 0.5980439590821123, + "grad_norm": 0.9802064912370656, + "learning_rate": 7.342907120419317e-06, + "loss": 0.5878, + "step": 34610 + }, + { + "epoch": 0.5980612385955212, + "grad_norm": 0.9718706875860307, + "learning_rate": 7.342367592656981e-06, + "loss": 0.3282, + "step": 34611 + }, + { + "epoch": 0.5980785181089301, + "grad_norm": 0.6642022140774732, + "learning_rate": 7.341828073218558e-06, + "loss": 0.5826, + "step": 34612 + }, + { + "epoch": 0.598095797622339, + "grad_norm": 0.9896738086768319, + "learning_rate": 7.341288562105724e-06, + "loss": 0.416, + "step": 34613 + }, + { + "epoch": 0.5981130771357479, + "grad_norm": 0.6052640613281979, + "learning_rate": 7.34074905932018e-06, + "loss": 0.7751, + "step": 34614 + }, + { + "epoch": 0.5981303566491568, + "grad_norm": 1.258744274497976, + "learning_rate": 7.340209564863611e-06, + "loss": 0.6687, + "step": 34615 + }, + { + "epoch": 0.5981476361625656, + "grad_norm": 0.8100539250306721, + "learning_rate": 7.339670078737708e-06, + "loss": 0.3473, + "step": 34616 + }, + { + "epoch": 0.5981649156759745, + "grad_norm": 0.9224692872588001, + "learning_rate": 7.3391306009441595e-06, + "loss": 0.4019, + "step": 34617 + }, + { + "epoch": 0.5981821951893834, + "grad_norm": 1.2033980842056906, + "learning_rate": 7.33859113148466e-06, + "loss": 0.2993, + "step": 34618 + }, + { + "epoch": 0.5981994747027923, + "grad_norm": 0.8128628151975569, + "learning_rate": 7.338051670360894e-06, + "loss": 0.3551, + "step": 34619 + }, + { + "epoch": 0.5982167542162012, + "grad_norm": 1.4604743814404528, + "learning_rate": 7.3375122175745494e-06, + "loss": 0.2826, + "step": 34620 + }, + { + "epoch": 0.5982340337296101, + "grad_norm": 0.7445573013051351, + "learning_rate": 7.336972773127323e-06, + "loss": 0.3269, + "step": 34621 + }, + { + "epoch": 0.5982513132430191, + "grad_norm": 1.1048386664977565, + "learning_rate": 7.336433337020894e-06, + "loss": 0.3019, + "step": 34622 + }, + { + "epoch": 0.598268592756428, + "grad_norm": 1.1727333282299728, + "learning_rate": 7.335893909256966e-06, + "loss": 0.3659, + "step": 34623 + }, + { + "epoch": 0.5982858722698369, + "grad_norm": 1.1946331109016952, + "learning_rate": 7.3353544898372155e-06, + "loss": 0.3726, + "step": 34624 + }, + { + "epoch": 0.5983031517832458, + "grad_norm": 1.5263481369178504, + "learning_rate": 7.334815078763339e-06, + "loss": 0.6488, + "step": 34625 + }, + { + "epoch": 0.5983204312966547, + "grad_norm": 0.8549979796577095, + "learning_rate": 7.334275676037022e-06, + "loss": 0.4002, + "step": 34626 + }, + { + "epoch": 0.5983377108100636, + "grad_norm": 1.2282224598014246, + "learning_rate": 7.333736281659961e-06, + "loss": 0.4364, + "step": 34627 + }, + { + "epoch": 0.5983549903234725, + "grad_norm": 1.165689585448613, + "learning_rate": 7.333196895633836e-06, + "loss": 0.4332, + "step": 34628 + }, + { + "epoch": 0.5983722698368814, + "grad_norm": 1.2433157749334904, + "learning_rate": 7.332657517960343e-06, + "loss": 0.3863, + "step": 34629 + }, + { + "epoch": 0.5983895493502903, + "grad_norm": 0.8534627071897511, + "learning_rate": 7.33211814864117e-06, + "loss": 0.3541, + "step": 34630 + }, + { + "epoch": 0.5984068288636992, + "grad_norm": 0.7922381967223987, + "learning_rate": 7.331578787678003e-06, + "loss": 0.3879, + "step": 34631 + }, + { + "epoch": 0.5984241083771081, + "grad_norm": 0.8798069935096442, + "learning_rate": 7.331039435072539e-06, + "loss": 0.326, + "step": 34632 + }, + { + "epoch": 0.598441387890517, + "grad_norm": 0.7427512786241388, + "learning_rate": 7.330500090826458e-06, + "loss": 0.2734, + "step": 34633 + }, + { + "epoch": 0.598458667403926, + "grad_norm": 0.5509327990456504, + "learning_rate": 7.329960754941454e-06, + "loss": 0.672, + "step": 34634 + }, + { + "epoch": 0.5984759469173349, + "grad_norm": 0.7903446521027536, + "learning_rate": 7.329421427419216e-06, + "loss": 0.4118, + "step": 34635 + }, + { + "epoch": 0.5984932264307438, + "grad_norm": 0.8664539105620076, + "learning_rate": 7.3288821082614335e-06, + "loss": 0.3786, + "step": 34636 + }, + { + "epoch": 0.5985105059441526, + "grad_norm": 1.3925158041818322, + "learning_rate": 7.328342797469794e-06, + "loss": 0.4973, + "step": 34637 + }, + { + "epoch": 0.5985277854575615, + "grad_norm": 1.39171843574368, + "learning_rate": 7.327803495045992e-06, + "loss": 0.4322, + "step": 34638 + }, + { + "epoch": 0.5985450649709704, + "grad_norm": 0.8574036577296417, + "learning_rate": 7.327264200991709e-06, + "loss": 0.3606, + "step": 34639 + }, + { + "epoch": 0.5985623444843793, + "grad_norm": 0.8548432553445917, + "learning_rate": 7.326724915308638e-06, + "loss": 0.2789, + "step": 34640 + }, + { + "epoch": 0.5985796239977882, + "grad_norm": 1.0690138810277683, + "learning_rate": 7.326185637998471e-06, + "loss": 0.5043, + "step": 34641 + }, + { + "epoch": 0.5985969035111971, + "grad_norm": 1.207116587545779, + "learning_rate": 7.325646369062887e-06, + "loss": 0.5666, + "step": 34642 + }, + { + "epoch": 0.598614183024606, + "grad_norm": 1.2202906791341213, + "learning_rate": 7.3251071085035885e-06, + "loss": 0.405, + "step": 34643 + }, + { + "epoch": 0.5986314625380149, + "grad_norm": 1.353427401370757, + "learning_rate": 7.324567856322253e-06, + "loss": 0.5402, + "step": 34644 + }, + { + "epoch": 0.5986487420514238, + "grad_norm": 0.7331588970075407, + "learning_rate": 7.324028612520578e-06, + "loss": 0.3868, + "step": 34645 + }, + { + "epoch": 0.5986660215648327, + "grad_norm": 0.9785177606163546, + "learning_rate": 7.323489377100245e-06, + "loss": 0.4006, + "step": 34646 + }, + { + "epoch": 0.5986833010782416, + "grad_norm": 1.0884598425822505, + "learning_rate": 7.3229501500629505e-06, + "loss": 0.3579, + "step": 34647 + }, + { + "epoch": 0.5987005805916505, + "grad_norm": 0.5003053534036624, + "learning_rate": 7.322410931410378e-06, + "loss": 0.1672, + "step": 34648 + }, + { + "epoch": 0.5987178601050595, + "grad_norm": 1.3150271057982796, + "learning_rate": 7.321871721144218e-06, + "loss": 0.4805, + "step": 34649 + }, + { + "epoch": 0.5987351396184684, + "grad_norm": 0.5626199234020167, + "learning_rate": 7.32133251926616e-06, + "loss": 0.7476, + "step": 34650 + }, + { + "epoch": 0.5987524191318773, + "grad_norm": 1.2266454013449364, + "learning_rate": 7.3207933257778885e-06, + "loss": 0.3903, + "step": 34651 + }, + { + "epoch": 0.5987696986452862, + "grad_norm": 1.1938543477273782, + "learning_rate": 7.320254140681103e-06, + "loss": 0.3154, + "step": 34652 + }, + { + "epoch": 0.5987869781586951, + "grad_norm": 1.1308327748818796, + "learning_rate": 7.3197149639774775e-06, + "loss": 0.4492, + "step": 34653 + }, + { + "epoch": 0.598804257672104, + "grad_norm": 1.1792119346527161, + "learning_rate": 7.319175795668713e-06, + "loss": 0.3754, + "step": 34654 + }, + { + "epoch": 0.5988215371855129, + "grad_norm": 0.7340923290846019, + "learning_rate": 7.318636635756491e-06, + "loss": 0.4643, + "step": 34655 + }, + { + "epoch": 0.5988388166989218, + "grad_norm": 1.2799606394329572, + "learning_rate": 7.318097484242505e-06, + "loss": 0.7652, + "step": 34656 + }, + { + "epoch": 0.5988560962123307, + "grad_norm": 0.7276881017966086, + "learning_rate": 7.317558341128439e-06, + "loss": 0.2911, + "step": 34657 + }, + { + "epoch": 0.5988733757257395, + "grad_norm": 1.5504170972485558, + "learning_rate": 7.317019206415987e-06, + "loss": 0.3699, + "step": 34658 + }, + { + "epoch": 0.5988906552391484, + "grad_norm": 1.3824193340272966, + "learning_rate": 7.316480080106834e-06, + "loss": 0.3982, + "step": 34659 + }, + { + "epoch": 0.5989079347525573, + "grad_norm": 1.9248269996993965, + "learning_rate": 7.315940962202668e-06, + "loss": 0.4742, + "step": 34660 + }, + { + "epoch": 0.5989252142659662, + "grad_norm": 1.071191670312834, + "learning_rate": 7.315401852705183e-06, + "loss": 0.3422, + "step": 34661 + }, + { + "epoch": 0.5989424937793751, + "grad_norm": 1.4731554680170038, + "learning_rate": 7.3148627516160565e-06, + "loss": 0.3882, + "step": 34662 + }, + { + "epoch": 0.598959773292784, + "grad_norm": 0.9754147057995446, + "learning_rate": 7.31432365893699e-06, + "loss": 0.4484, + "step": 34663 + }, + { + "epoch": 0.598977052806193, + "grad_norm": 0.7671198135936139, + "learning_rate": 7.313784574669662e-06, + "loss": 0.3593, + "step": 34664 + }, + { + "epoch": 0.5989943323196019, + "grad_norm": 0.8161810935843011, + "learning_rate": 7.3132454988157675e-06, + "loss": 0.268, + "step": 34665 + }, + { + "epoch": 0.5990116118330108, + "grad_norm": 1.1776612629953032, + "learning_rate": 7.31270643137699e-06, + "loss": 0.4459, + "step": 34666 + }, + { + "epoch": 0.5990288913464197, + "grad_norm": 1.0219897294888156, + "learning_rate": 7.3121673723550236e-06, + "loss": 0.4145, + "step": 34667 + }, + { + "epoch": 0.5990461708598286, + "grad_norm": 1.1716541251452093, + "learning_rate": 7.31162832175155e-06, + "loss": 0.465, + "step": 34668 + }, + { + "epoch": 0.5990634503732375, + "grad_norm": 0.7668808493657062, + "learning_rate": 7.311089279568262e-06, + "loss": 0.3148, + "step": 34669 + }, + { + "epoch": 0.5990807298866464, + "grad_norm": 1.2474158021431025, + "learning_rate": 7.310550245806844e-06, + "loss": 0.4087, + "step": 34670 + }, + { + "epoch": 0.5990980094000553, + "grad_norm": 0.994251416858016, + "learning_rate": 7.310011220468991e-06, + "loss": 0.3917, + "step": 34671 + }, + { + "epoch": 0.5991152889134642, + "grad_norm": 1.4585315436684634, + "learning_rate": 7.309472203556389e-06, + "loss": 0.3786, + "step": 34672 + }, + { + "epoch": 0.5991325684268731, + "grad_norm": 1.938349416747829, + "learning_rate": 7.3089331950707195e-06, + "loss": 0.3519, + "step": 34673 + }, + { + "epoch": 0.599149847940282, + "grad_norm": 1.0613220509083114, + "learning_rate": 7.308394195013678e-06, + "loss": 0.6069, + "step": 34674 + }, + { + "epoch": 0.5991671274536909, + "grad_norm": 1.1349483681485761, + "learning_rate": 7.3078552033869485e-06, + "loss": 0.5622, + "step": 34675 + }, + { + "epoch": 0.5991844069670998, + "grad_norm": 1.2395754385832358, + "learning_rate": 7.307316220192224e-06, + "loss": 0.4074, + "step": 34676 + }, + { + "epoch": 0.5992016864805088, + "grad_norm": 1.2923523403017716, + "learning_rate": 7.306777245431187e-06, + "loss": 0.5193, + "step": 34677 + }, + { + "epoch": 0.5992189659939177, + "grad_norm": 1.3398437999829802, + "learning_rate": 7.306238279105532e-06, + "loss": 0.3325, + "step": 34678 + }, + { + "epoch": 0.5992362455073265, + "grad_norm": 1.2826946533365495, + "learning_rate": 7.305699321216939e-06, + "loss": 0.4012, + "step": 34679 + }, + { + "epoch": 0.5992535250207354, + "grad_norm": 1.3075406844315713, + "learning_rate": 7.305160371767104e-06, + "loss": 0.4425, + "step": 34680 + }, + { + "epoch": 0.5992708045341443, + "grad_norm": 1.9396379438511944, + "learning_rate": 7.304621430757712e-06, + "loss": 0.4575, + "step": 34681 + }, + { + "epoch": 0.5992880840475532, + "grad_norm": 1.252955249168527, + "learning_rate": 7.304082498190446e-06, + "loss": 0.3205, + "step": 34682 + }, + { + "epoch": 0.5993053635609621, + "grad_norm": 1.0727112710234032, + "learning_rate": 7.303543574067005e-06, + "loss": 0.4026, + "step": 34683 + }, + { + "epoch": 0.599322643074371, + "grad_norm": 0.8418306260455086, + "learning_rate": 7.303004658389063e-06, + "loss": 0.3076, + "step": 34684 + }, + { + "epoch": 0.5993399225877799, + "grad_norm": 0.9795396652730725, + "learning_rate": 7.3024657511583205e-06, + "loss": 0.2975, + "step": 34685 + }, + { + "epoch": 0.5993572021011888, + "grad_norm": 1.2257652404617916, + "learning_rate": 7.301926852376458e-06, + "loss": 0.5014, + "step": 34686 + }, + { + "epoch": 0.5993744816145977, + "grad_norm": 1.3552201776719484, + "learning_rate": 7.3013879620451674e-06, + "loss": 0.3461, + "step": 34687 + }, + { + "epoch": 0.5993917611280066, + "grad_norm": 1.0187653624148598, + "learning_rate": 7.3008490801661334e-06, + "loss": 0.3201, + "step": 34688 + }, + { + "epoch": 0.5994090406414155, + "grad_norm": 1.2052699001494753, + "learning_rate": 7.300310206741046e-06, + "loss": 0.4797, + "step": 34689 + }, + { + "epoch": 0.5994263201548244, + "grad_norm": 1.1733874372098188, + "learning_rate": 7.299771341771591e-06, + "loss": 0.5404, + "step": 34690 + }, + { + "epoch": 0.5994435996682334, + "grad_norm": 1.0967489012605807, + "learning_rate": 7.2992324852594575e-06, + "loss": 0.368, + "step": 34691 + }, + { + "epoch": 0.5994608791816423, + "grad_norm": 0.9470181358399984, + "learning_rate": 7.298693637206338e-06, + "loss": 0.386, + "step": 34692 + }, + { + "epoch": 0.5994781586950512, + "grad_norm": 0.8564830342751941, + "learning_rate": 7.298154797613909e-06, + "loss": 0.369, + "step": 34693 + }, + { + "epoch": 0.5994954382084601, + "grad_norm": 0.8809002845910601, + "learning_rate": 7.2976159664838675e-06, + "loss": 0.3575, + "step": 34694 + }, + { + "epoch": 0.599512717721869, + "grad_norm": 0.8588735856982188, + "learning_rate": 7.297077143817896e-06, + "loss": 0.3658, + "step": 34695 + }, + { + "epoch": 0.5995299972352779, + "grad_norm": 0.9540117978114011, + "learning_rate": 7.296538329617686e-06, + "loss": 0.2551, + "step": 34696 + }, + { + "epoch": 0.5995472767486868, + "grad_norm": 1.3459632809006237, + "learning_rate": 7.295999523884921e-06, + "loss": 0.4985, + "step": 34697 + }, + { + "epoch": 0.5995645562620957, + "grad_norm": 1.4791013458855828, + "learning_rate": 7.295460726621295e-06, + "loss": 0.3699, + "step": 34698 + }, + { + "epoch": 0.5995818357755046, + "grad_norm": 0.9083561195280296, + "learning_rate": 7.294921937828489e-06, + "loss": 0.4332, + "step": 34699 + }, + { + "epoch": 0.5995991152889134, + "grad_norm": 1.110933929693163, + "learning_rate": 7.294383157508193e-06, + "loss": 0.4177, + "step": 34700 + }, + { + "epoch": 0.5996163948023223, + "grad_norm": 1.1073123845866502, + "learning_rate": 7.293844385662094e-06, + "loss": 0.4684, + "step": 34701 + }, + { + "epoch": 0.5996336743157312, + "grad_norm": 0.9847123331208387, + "learning_rate": 7.293305622291882e-06, + "loss": 0.3047, + "step": 34702 + }, + { + "epoch": 0.5996509538291401, + "grad_norm": 1.2716199470610758, + "learning_rate": 7.292766867399246e-06, + "loss": 0.4592, + "step": 34703 + }, + { + "epoch": 0.599668233342549, + "grad_norm": 1.501214804794619, + "learning_rate": 7.292228120985862e-06, + "loss": 0.3999, + "step": 34704 + }, + { + "epoch": 0.599685512855958, + "grad_norm": 1.3533307161764205, + "learning_rate": 7.291689383053431e-06, + "loss": 0.3442, + "step": 34705 + }, + { + "epoch": 0.5997027923693669, + "grad_norm": 1.2006499495093315, + "learning_rate": 7.2911506536036315e-06, + "loss": 0.4564, + "step": 34706 + }, + { + "epoch": 0.5997200718827758, + "grad_norm": 1.2178840118246828, + "learning_rate": 7.290611932638156e-06, + "loss": 0.4397, + "step": 34707 + }, + { + "epoch": 0.5997373513961847, + "grad_norm": 0.8511682648769653, + "learning_rate": 7.290073220158689e-06, + "loss": 0.4002, + "step": 34708 + }, + { + "epoch": 0.5997546309095936, + "grad_norm": 0.5786420033853938, + "learning_rate": 7.2895345161669185e-06, + "loss": 0.9324, + "step": 34709 + }, + { + "epoch": 0.5997719104230025, + "grad_norm": 0.8841134834673611, + "learning_rate": 7.288995820664531e-06, + "loss": 0.3586, + "step": 34710 + }, + { + "epoch": 0.5997891899364114, + "grad_norm": 0.4888715483174219, + "learning_rate": 7.2884571336532166e-06, + "loss": 0.5755, + "step": 34711 + }, + { + "epoch": 0.5998064694498203, + "grad_norm": 1.278175967078581, + "learning_rate": 7.287918455134661e-06, + "loss": 0.5134, + "step": 34712 + }, + { + "epoch": 0.5998237489632292, + "grad_norm": 0.7843528221822125, + "learning_rate": 7.2873797851105485e-06, + "loss": 0.2224, + "step": 34713 + }, + { + "epoch": 0.5998410284766381, + "grad_norm": 0.8854546056504876, + "learning_rate": 7.286841123582571e-06, + "loss": 0.4455, + "step": 34714 + }, + { + "epoch": 0.599858307990047, + "grad_norm": 1.146296498620807, + "learning_rate": 7.2863024705524085e-06, + "loss": 0.4066, + "step": 34715 + }, + { + "epoch": 0.5998755875034559, + "grad_norm": 1.5372221854350243, + "learning_rate": 7.285763826021758e-06, + "loss": 0.4521, + "step": 34716 + }, + { + "epoch": 0.5998928670168648, + "grad_norm": 1.1785207860300775, + "learning_rate": 7.285225189992297e-06, + "loss": 0.4897, + "step": 34717 + }, + { + "epoch": 0.5999101465302737, + "grad_norm": 0.9182434590739535, + "learning_rate": 7.284686562465723e-06, + "loss": 0.4989, + "step": 34718 + }, + { + "epoch": 0.5999274260436827, + "grad_norm": 1.0892205468300227, + "learning_rate": 7.284147943443713e-06, + "loss": 0.3349, + "step": 34719 + }, + { + "epoch": 0.5999447055570916, + "grad_norm": 1.200417540298085, + "learning_rate": 7.283609332927959e-06, + "loss": 0.3429, + "step": 34720 + }, + { + "epoch": 0.5999619850705005, + "grad_norm": 1.0713252603488106, + "learning_rate": 7.2830707309201456e-06, + "loss": 0.229, + "step": 34721 + }, + { + "epoch": 0.5999792645839093, + "grad_norm": 0.7669237694355239, + "learning_rate": 7.282532137421962e-06, + "loss": 0.8836, + "step": 34722 + }, + { + "epoch": 0.5999965440973182, + "grad_norm": 0.748653411781204, + "learning_rate": 7.281993552435098e-06, + "loss": 0.2864, + "step": 34723 + }, + { + "epoch": 0.6000138236107271, + "grad_norm": 1.5035260932203647, + "learning_rate": 7.281454975961233e-06, + "loss": 0.5476, + "step": 34724 + }, + { + "epoch": 0.600031103124136, + "grad_norm": 1.4143756310460127, + "learning_rate": 7.280916408002058e-06, + "loss": 0.5133, + "step": 34725 + }, + { + "epoch": 0.6000483826375449, + "grad_norm": 1.1464830754751378, + "learning_rate": 7.280377848559258e-06, + "loss": 0.4829, + "step": 34726 + }, + { + "epoch": 0.6000656621509538, + "grad_norm": 1.169965696289603, + "learning_rate": 7.279839297634526e-06, + "loss": 0.4, + "step": 34727 + }, + { + "epoch": 0.6000829416643627, + "grad_norm": 1.194781051756553, + "learning_rate": 7.279300755229539e-06, + "loss": 0.4619, + "step": 34728 + }, + { + "epoch": 0.6001002211777716, + "grad_norm": 0.9575674913894934, + "learning_rate": 7.278762221345991e-06, + "loss": 0.248, + "step": 34729 + }, + { + "epoch": 0.6001175006911805, + "grad_norm": 0.9323588114199833, + "learning_rate": 7.2782236959855645e-06, + "loss": 0.2673, + "step": 34730 + }, + { + "epoch": 0.6001347802045894, + "grad_norm": 0.9205264658269333, + "learning_rate": 7.277685179149951e-06, + "loss": 0.2965, + "step": 34731 + }, + { + "epoch": 0.6001520597179983, + "grad_norm": 2.0737976892651244, + "learning_rate": 7.277146670840831e-06, + "loss": 0.5684, + "step": 34732 + }, + { + "epoch": 0.6001693392314073, + "grad_norm": 0.847649382448387, + "learning_rate": 7.2766081710599005e-06, + "loss": 0.3567, + "step": 34733 + }, + { + "epoch": 0.6001866187448162, + "grad_norm": 0.9361611437557783, + "learning_rate": 7.276069679808837e-06, + "loss": 0.2967, + "step": 34734 + }, + { + "epoch": 0.6002038982582251, + "grad_norm": 0.8834551017251063, + "learning_rate": 7.275531197089327e-06, + "loss": 0.2538, + "step": 34735 + }, + { + "epoch": 0.600221177771634, + "grad_norm": 1.8656119653598797, + "learning_rate": 7.2749927229030645e-06, + "loss": 0.3916, + "step": 34736 + }, + { + "epoch": 0.6002384572850429, + "grad_norm": 1.6823094490737192, + "learning_rate": 7.274454257251729e-06, + "loss": 0.315, + "step": 34737 + }, + { + "epoch": 0.6002557367984518, + "grad_norm": 0.9686043486650852, + "learning_rate": 7.273915800137015e-06, + "loss": 0.2873, + "step": 34738 + }, + { + "epoch": 0.6002730163118607, + "grad_norm": 1.0306128453015808, + "learning_rate": 7.273377351560598e-06, + "loss": 0.4718, + "step": 34739 + }, + { + "epoch": 0.6002902958252696, + "grad_norm": 1.2021340815348014, + "learning_rate": 7.272838911524174e-06, + "loss": 0.4743, + "step": 34740 + }, + { + "epoch": 0.6003075753386785, + "grad_norm": 1.0199067374699804, + "learning_rate": 7.2723004800294215e-06, + "loss": 0.3574, + "step": 34741 + }, + { + "epoch": 0.6003248548520874, + "grad_norm": 1.0658837941789943, + "learning_rate": 7.271762057078036e-06, + "loss": 0.4672, + "step": 34742 + }, + { + "epoch": 0.6003421343654962, + "grad_norm": 0.9973881819408658, + "learning_rate": 7.271223642671699e-06, + "loss": 0.4752, + "step": 34743 + }, + { + "epoch": 0.6003594138789051, + "grad_norm": 1.1662362182224726, + "learning_rate": 7.270685236812094e-06, + "loss": 0.3834, + "step": 34744 + }, + { + "epoch": 0.600376693392314, + "grad_norm": 0.9448885016078765, + "learning_rate": 7.270146839500911e-06, + "loss": 0.5109, + "step": 34745 + }, + { + "epoch": 0.6003939729057229, + "grad_norm": 1.1489806326122363, + "learning_rate": 7.269608450739836e-06, + "loss": 0.4283, + "step": 34746 + }, + { + "epoch": 0.6004112524191318, + "grad_norm": 1.4744231280534006, + "learning_rate": 7.269070070530557e-06, + "loss": 0.4206, + "step": 34747 + }, + { + "epoch": 0.6004285319325408, + "grad_norm": 1.1641220425797894, + "learning_rate": 7.268531698874754e-06, + "loss": 0.339, + "step": 34748 + }, + { + "epoch": 0.6004458114459497, + "grad_norm": 1.0449295458659533, + "learning_rate": 7.267993335774119e-06, + "loss": 0.3606, + "step": 34749 + }, + { + "epoch": 0.6004630909593586, + "grad_norm": 1.117803576876573, + "learning_rate": 7.267454981230335e-06, + "loss": 0.4695, + "step": 34750 + }, + { + "epoch": 0.6004803704727675, + "grad_norm": 1.250502842048241, + "learning_rate": 7.266916635245093e-06, + "loss": 0.3701, + "step": 34751 + }, + { + "epoch": 0.6004976499861764, + "grad_norm": 1.4948597136585988, + "learning_rate": 7.266378297820073e-06, + "loss": 0.4704, + "step": 34752 + }, + { + "epoch": 0.6005149294995853, + "grad_norm": 0.9245591024133747, + "learning_rate": 7.265839968956967e-06, + "loss": 0.4252, + "step": 34753 + }, + { + "epoch": 0.6005322090129942, + "grad_norm": 0.9466527261127842, + "learning_rate": 7.2653016486574566e-06, + "loss": 0.419, + "step": 34754 + }, + { + "epoch": 0.6005494885264031, + "grad_norm": 1.4939179874511073, + "learning_rate": 7.264763336923228e-06, + "loss": 0.6566, + "step": 34755 + }, + { + "epoch": 0.600566768039812, + "grad_norm": 1.0715584106273772, + "learning_rate": 7.26422503375597e-06, + "loss": 0.4391, + "step": 34756 + }, + { + "epoch": 0.6005840475532209, + "grad_norm": 1.0468059224118687, + "learning_rate": 7.263686739157365e-06, + "loss": 0.5604, + "step": 34757 + }, + { + "epoch": 0.6006013270666298, + "grad_norm": 1.1666532278065869, + "learning_rate": 7.263148453129105e-06, + "loss": 0.2289, + "step": 34758 + }, + { + "epoch": 0.6006186065800387, + "grad_norm": 1.1139432983069357, + "learning_rate": 7.262610175672868e-06, + "loss": 0.3672, + "step": 34759 + }, + { + "epoch": 0.6006358860934476, + "grad_norm": 1.3934217800382533, + "learning_rate": 7.262071906790346e-06, + "loss": 0.3518, + "step": 34760 + }, + { + "epoch": 0.6006531656068566, + "grad_norm": 1.1909049176328133, + "learning_rate": 7.2615336464832206e-06, + "loss": 0.3119, + "step": 34761 + }, + { + "epoch": 0.6006704451202655, + "grad_norm": 0.7515294113256955, + "learning_rate": 7.260995394753182e-06, + "loss": 0.2524, + "step": 34762 + }, + { + "epoch": 0.6006877246336744, + "grad_norm": 1.0379317304466313, + "learning_rate": 7.260457151601912e-06, + "loss": 0.3939, + "step": 34763 + }, + { + "epoch": 0.6007050041470832, + "grad_norm": 0.9538913958293505, + "learning_rate": 7.259918917031103e-06, + "loss": 0.3715, + "step": 34764 + }, + { + "epoch": 0.6007222836604921, + "grad_norm": 1.0705468453501958, + "learning_rate": 7.259380691042434e-06, + "loss": 0.3588, + "step": 34765 + }, + { + "epoch": 0.600739563173901, + "grad_norm": 1.2909989467607281, + "learning_rate": 7.258842473637591e-06, + "loss": 0.4932, + "step": 34766 + }, + { + "epoch": 0.6007568426873099, + "grad_norm": 1.2985804520063915, + "learning_rate": 7.258304264818264e-06, + "loss": 0.316, + "step": 34767 + }, + { + "epoch": 0.6007741222007188, + "grad_norm": 1.6466409966618891, + "learning_rate": 7.257766064586134e-06, + "loss": 0.4098, + "step": 34768 + }, + { + "epoch": 0.6007914017141277, + "grad_norm": 1.113518705005719, + "learning_rate": 7.257227872942892e-06, + "loss": 0.4678, + "step": 34769 + }, + { + "epoch": 0.6008086812275366, + "grad_norm": 1.7178928303800363, + "learning_rate": 7.256689689890217e-06, + "loss": 0.5147, + "step": 34770 + }, + { + "epoch": 0.6008259607409455, + "grad_norm": 1.1779811519088053, + "learning_rate": 7.256151515429801e-06, + "loss": 0.3739, + "step": 34771 + }, + { + "epoch": 0.6008432402543544, + "grad_norm": 0.8200539203055588, + "learning_rate": 7.255613349563324e-06, + "loss": 0.6392, + "step": 34772 + }, + { + "epoch": 0.6008605197677633, + "grad_norm": 0.9794316338940829, + "learning_rate": 7.25507519229248e-06, + "loss": 0.2848, + "step": 34773 + }, + { + "epoch": 0.6008777992811722, + "grad_norm": 1.245204048115339, + "learning_rate": 7.254537043618948e-06, + "loss": 0.652, + "step": 34774 + }, + { + "epoch": 0.6008950787945812, + "grad_norm": 1.562852745450242, + "learning_rate": 7.25399890354441e-06, + "loss": 0.3281, + "step": 34775 + }, + { + "epoch": 0.6009123583079901, + "grad_norm": 0.9726504475471618, + "learning_rate": 7.25346077207056e-06, + "loss": 0.8229, + "step": 34776 + }, + { + "epoch": 0.600929637821399, + "grad_norm": 1.4553990373953511, + "learning_rate": 7.252922649199076e-06, + "loss": 0.555, + "step": 34777 + }, + { + "epoch": 0.6009469173348079, + "grad_norm": 1.7024081837315967, + "learning_rate": 7.2523845349316535e-06, + "loss": 0.5049, + "step": 34778 + }, + { + "epoch": 0.6009641968482168, + "grad_norm": 1.0052017528100772, + "learning_rate": 7.251846429269965e-06, + "loss": 0.2988, + "step": 34779 + }, + { + "epoch": 0.6009814763616257, + "grad_norm": 1.1212736713296307, + "learning_rate": 7.251308332215705e-06, + "loss": 0.5054, + "step": 34780 + }, + { + "epoch": 0.6009987558750346, + "grad_norm": 0.562594425598688, + "learning_rate": 7.250770243770555e-06, + "loss": 0.6044, + "step": 34781 + }, + { + "epoch": 0.6010160353884435, + "grad_norm": 1.144789388312946, + "learning_rate": 7.250232163936204e-06, + "loss": 0.5061, + "step": 34782 + }, + { + "epoch": 0.6010333149018524, + "grad_norm": 1.3185107365085569, + "learning_rate": 7.249694092714331e-06, + "loss": 0.4344, + "step": 34783 + }, + { + "epoch": 0.6010505944152613, + "grad_norm": 1.2006532403092247, + "learning_rate": 7.2491560301066295e-06, + "loss": 0.3079, + "step": 34784 + }, + { + "epoch": 0.6010678739286701, + "grad_norm": 0.9219651923809902, + "learning_rate": 7.24861797611478e-06, + "loss": 0.2432, + "step": 34785 + }, + { + "epoch": 0.601085153442079, + "grad_norm": 0.9785344217329229, + "learning_rate": 7.2480799307404634e-06, + "loss": 0.2416, + "step": 34786 + }, + { + "epoch": 0.6011024329554879, + "grad_norm": 1.1458822159746003, + "learning_rate": 7.247541893985377e-06, + "loss": 0.4852, + "step": 34787 + }, + { + "epoch": 0.6011197124688968, + "grad_norm": 1.0026317243877927, + "learning_rate": 7.247003865851192e-06, + "loss": 0.3166, + "step": 34788 + }, + { + "epoch": 0.6011369919823057, + "grad_norm": 1.0528900230531155, + "learning_rate": 7.2464658463396034e-06, + "loss": 0.2772, + "step": 34789 + }, + { + "epoch": 0.6011542714957147, + "grad_norm": 0.9982078632546452, + "learning_rate": 7.24592783545229e-06, + "loss": 0.4042, + "step": 34790 + }, + { + "epoch": 0.6011715510091236, + "grad_norm": 1.1228582427245035, + "learning_rate": 7.245389833190942e-06, + "loss": 0.299, + "step": 34791 + }, + { + "epoch": 0.6011888305225325, + "grad_norm": 0.7370467576963687, + "learning_rate": 7.244851839557241e-06, + "loss": 0.7701, + "step": 34792 + }, + { + "epoch": 0.6012061100359414, + "grad_norm": 1.5739393605437089, + "learning_rate": 7.244313854552878e-06, + "loss": 0.5166, + "step": 34793 + }, + { + "epoch": 0.6012233895493503, + "grad_norm": 1.246385760575608, + "learning_rate": 7.2437758781795294e-06, + "loss": 0.3719, + "step": 34794 + }, + { + "epoch": 0.6012406690627592, + "grad_norm": 1.2558293629688617, + "learning_rate": 7.243237910438884e-06, + "loss": 0.6402, + "step": 34795 + }, + { + "epoch": 0.6012579485761681, + "grad_norm": 1.2165599482217653, + "learning_rate": 7.242699951332628e-06, + "loss": 0.4916, + "step": 34796 + }, + { + "epoch": 0.601275228089577, + "grad_norm": 0.5816102484766219, + "learning_rate": 7.242162000862444e-06, + "loss": 0.6303, + "step": 34797 + }, + { + "epoch": 0.6012925076029859, + "grad_norm": 1.2730399683861293, + "learning_rate": 7.241624059030021e-06, + "loss": 0.5584, + "step": 34798 + }, + { + "epoch": 0.6013097871163948, + "grad_norm": 1.1442455797351272, + "learning_rate": 7.241086125837039e-06, + "loss": 0.2697, + "step": 34799 + }, + { + "epoch": 0.6013270666298037, + "grad_norm": 1.7366024475104633, + "learning_rate": 7.240548201285186e-06, + "loss": 0.4985, + "step": 34800 + }, + { + "epoch": 0.6013443461432126, + "grad_norm": 1.422795557087501, + "learning_rate": 7.240010285376144e-06, + "loss": 0.4961, + "step": 34801 + }, + { + "epoch": 0.6013616256566215, + "grad_norm": 0.7876689659067321, + "learning_rate": 7.239472378111603e-06, + "loss": 0.154, + "step": 34802 + }, + { + "epoch": 0.6013789051700305, + "grad_norm": 1.0483261806515507, + "learning_rate": 7.238934479493238e-06, + "loss": 0.4249, + "step": 34803 + }, + { + "epoch": 0.6013961846834394, + "grad_norm": 0.8530327616130612, + "learning_rate": 7.238396589522747e-06, + "loss": 0.3431, + "step": 34804 + }, + { + "epoch": 0.6014134641968483, + "grad_norm": 0.9730087674202356, + "learning_rate": 7.237858708201806e-06, + "loss": 0.4436, + "step": 34805 + }, + { + "epoch": 0.6014307437102571, + "grad_norm": 1.0466880372692005, + "learning_rate": 7.237320835532098e-06, + "loss": 0.4309, + "step": 34806 + }, + { + "epoch": 0.601448023223666, + "grad_norm": 0.9911333189463796, + "learning_rate": 7.236782971515316e-06, + "loss": 0.3088, + "step": 34807 + }, + { + "epoch": 0.6014653027370749, + "grad_norm": 1.236176491044207, + "learning_rate": 7.236245116153135e-06, + "loss": 0.3609, + "step": 34808 + }, + { + "epoch": 0.6014825822504838, + "grad_norm": 1.0379830426068186, + "learning_rate": 7.235707269447248e-06, + "loss": 0.3678, + "step": 34809 + }, + { + "epoch": 0.6014998617638927, + "grad_norm": 1.2310146255050238, + "learning_rate": 7.2351694313993315e-06, + "loss": 0.5677, + "step": 34810 + }, + { + "epoch": 0.6015171412773016, + "grad_norm": 0.7704556297942149, + "learning_rate": 7.2346316020110786e-06, + "loss": 0.9292, + "step": 34811 + }, + { + "epoch": 0.6015344207907105, + "grad_norm": 1.5951743331704786, + "learning_rate": 7.234093781284166e-06, + "loss": 0.3287, + "step": 34812 + }, + { + "epoch": 0.6015517003041194, + "grad_norm": 1.1166576334481546, + "learning_rate": 7.233555969220287e-06, + "loss": 0.5178, + "step": 34813 + }, + { + "epoch": 0.6015689798175283, + "grad_norm": 1.2139841943405523, + "learning_rate": 7.233018165821118e-06, + "loss": 0.5788, + "step": 34814 + }, + { + "epoch": 0.6015862593309372, + "grad_norm": 1.2014575926543476, + "learning_rate": 7.232480371088346e-06, + "loss": 0.377, + "step": 34815 + }, + { + "epoch": 0.6016035388443461, + "grad_norm": 0.9635299412507267, + "learning_rate": 7.231942585023657e-06, + "loss": 0.4505, + "step": 34816 + }, + { + "epoch": 0.601620818357755, + "grad_norm": 1.6636474507375332, + "learning_rate": 7.231404807628732e-06, + "loss": 0.5623, + "step": 34817 + }, + { + "epoch": 0.601638097871164, + "grad_norm": 1.0315914168937106, + "learning_rate": 7.230867038905261e-06, + "loss": 0.2525, + "step": 34818 + }, + { + "epoch": 0.6016553773845729, + "grad_norm": 0.7592610134720986, + "learning_rate": 7.230329278854922e-06, + "loss": 0.3703, + "step": 34819 + }, + { + "epoch": 0.6016726568979818, + "grad_norm": 0.9529077410370907, + "learning_rate": 7.229791527479404e-06, + "loss": 0.3528, + "step": 34820 + }, + { + "epoch": 0.6016899364113907, + "grad_norm": 1.1075559576676874, + "learning_rate": 7.229253784780387e-06, + "loss": 0.2915, + "step": 34821 + }, + { + "epoch": 0.6017072159247996, + "grad_norm": 1.0857557593672347, + "learning_rate": 7.228716050759561e-06, + "loss": 0.3109, + "step": 34822 + }, + { + "epoch": 0.6017244954382085, + "grad_norm": 1.1061311573531372, + "learning_rate": 7.2281783254186026e-06, + "loss": 0.3131, + "step": 34823 + }, + { + "epoch": 0.6017417749516174, + "grad_norm": 0.888286112451359, + "learning_rate": 7.227640608759206e-06, + "loss": 0.3948, + "step": 34824 + }, + { + "epoch": 0.6017590544650263, + "grad_norm": 0.9799141448209949, + "learning_rate": 7.227102900783047e-06, + "loss": 0.4536, + "step": 34825 + }, + { + "epoch": 0.6017763339784352, + "grad_norm": 1.2812668475055953, + "learning_rate": 7.226565201491811e-06, + "loss": 0.3903, + "step": 34826 + }, + { + "epoch": 0.601793613491844, + "grad_norm": 1.2476832024108617, + "learning_rate": 7.2260275108871865e-06, + "loss": 0.2933, + "step": 34827 + }, + { + "epoch": 0.6018108930052529, + "grad_norm": 1.083873197393889, + "learning_rate": 7.225489828970851e-06, + "loss": 0.3911, + "step": 34828 + }, + { + "epoch": 0.6018281725186618, + "grad_norm": 1.2504088269699634, + "learning_rate": 7.224952155744496e-06, + "loss": 0.5858, + "step": 34829 + }, + { + "epoch": 0.6018454520320707, + "grad_norm": 0.8360502692507936, + "learning_rate": 7.2244144912097965e-06, + "loss": 0.3331, + "step": 34830 + }, + { + "epoch": 0.6018627315454796, + "grad_norm": 1.368389025119314, + "learning_rate": 7.223876835368446e-06, + "loss": 0.3059, + "step": 34831 + }, + { + "epoch": 0.6018800110588886, + "grad_norm": 1.2535826740512335, + "learning_rate": 7.223339188222123e-06, + "loss": 0.7734, + "step": 34832 + }, + { + "epoch": 0.6018972905722975, + "grad_norm": 0.9463770643811799, + "learning_rate": 7.222801549772515e-06, + "loss": 0.3484, + "step": 34833 + }, + { + "epoch": 0.6019145700857064, + "grad_norm": 1.7534539821116015, + "learning_rate": 7.222263920021299e-06, + "loss": 0.3348, + "step": 34834 + }, + { + "epoch": 0.6019318495991153, + "grad_norm": 1.4314866217027618, + "learning_rate": 7.221726298970167e-06, + "loss": 0.6079, + "step": 34835 + }, + { + "epoch": 0.6019491291125242, + "grad_norm": 1.1798653523752256, + "learning_rate": 7.221188686620799e-06, + "loss": 0.4356, + "step": 34836 + }, + { + "epoch": 0.6019664086259331, + "grad_norm": 1.1675404540959677, + "learning_rate": 7.220651082974878e-06, + "loss": 0.4446, + "step": 34837 + }, + { + "epoch": 0.601983688139342, + "grad_norm": 1.180008690584254, + "learning_rate": 7.220113488034093e-06, + "loss": 0.5713, + "step": 34838 + }, + { + "epoch": 0.6020009676527509, + "grad_norm": 1.0441859256280768, + "learning_rate": 7.2195759018001175e-06, + "loss": 0.4444, + "step": 34839 + }, + { + "epoch": 0.6020182471661598, + "grad_norm": 1.0466286904412787, + "learning_rate": 7.219038324274646e-06, + "loss": 0.5127, + "step": 34840 + }, + { + "epoch": 0.6020355266795687, + "grad_norm": 0.9350353209050082, + "learning_rate": 7.218500755459354e-06, + "loss": 0.4032, + "step": 34841 + }, + { + "epoch": 0.6020528061929776, + "grad_norm": 1.0122489601331084, + "learning_rate": 7.217963195355935e-06, + "loss": 0.3859, + "step": 34842 + }, + { + "epoch": 0.6020700857063865, + "grad_norm": 1.000147608742649, + "learning_rate": 7.217425643966059e-06, + "loss": 0.503, + "step": 34843 + }, + { + "epoch": 0.6020873652197954, + "grad_norm": 1.9041781865964875, + "learning_rate": 7.216888101291424e-06, + "loss": 0.481, + "step": 34844 + }, + { + "epoch": 0.6021046447332044, + "grad_norm": 0.7458030169233875, + "learning_rate": 7.216350567333703e-06, + "loss": 0.5307, + "step": 34845 + }, + { + "epoch": 0.6021219242466133, + "grad_norm": 1.019178939958293, + "learning_rate": 7.215813042094586e-06, + "loss": 0.3669, + "step": 34846 + }, + { + "epoch": 0.6021392037600222, + "grad_norm": 1.2690580484924816, + "learning_rate": 7.215275525575756e-06, + "loss": 0.3257, + "step": 34847 + }, + { + "epoch": 0.602156483273431, + "grad_norm": 1.1126982756152448, + "learning_rate": 7.214738017778891e-06, + "loss": 0.4295, + "step": 34848 + }, + { + "epoch": 0.6021737627868399, + "grad_norm": 0.8416463567306113, + "learning_rate": 7.214200518705678e-06, + "loss": 0.4862, + "step": 34849 + }, + { + "epoch": 0.6021910423002488, + "grad_norm": 0.9676330549369871, + "learning_rate": 7.213663028357801e-06, + "loss": 0.4276, + "step": 34850 + }, + { + "epoch": 0.6022083218136577, + "grad_norm": 1.1557988265539951, + "learning_rate": 7.213125546736945e-06, + "loss": 0.6534, + "step": 34851 + }, + { + "epoch": 0.6022256013270666, + "grad_norm": 1.4202564057501856, + "learning_rate": 7.212588073844789e-06, + "loss": 0.247, + "step": 34852 + }, + { + "epoch": 0.6022428808404755, + "grad_norm": 0.8826205041632825, + "learning_rate": 7.2120506096830235e-06, + "loss": 0.4186, + "step": 34853 + }, + { + "epoch": 0.6022601603538844, + "grad_norm": 1.3025673102311432, + "learning_rate": 7.2115131542533225e-06, + "loss": 0.363, + "step": 34854 + }, + { + "epoch": 0.6022774398672933, + "grad_norm": 0.6723910709007697, + "learning_rate": 7.210975707557376e-06, + "loss": 0.2644, + "step": 34855 + }, + { + "epoch": 0.6022947193807022, + "grad_norm": 0.7619393382843673, + "learning_rate": 7.210438269596866e-06, + "loss": 0.172, + "step": 34856 + }, + { + "epoch": 0.6023119988941111, + "grad_norm": 1.3561399493308968, + "learning_rate": 7.209900840373474e-06, + "loss": 0.4195, + "step": 34857 + }, + { + "epoch": 0.60232927840752, + "grad_norm": 1.0750504264810135, + "learning_rate": 7.209363419888889e-06, + "loss": 0.329, + "step": 34858 + }, + { + "epoch": 0.602346557920929, + "grad_norm": 0.7374810489047728, + "learning_rate": 7.208826008144784e-06, + "loss": 0.2739, + "step": 34859 + }, + { + "epoch": 0.6023638374343379, + "grad_norm": 1.2402029001189725, + "learning_rate": 7.2082886051428506e-06, + "loss": 0.2845, + "step": 34860 + }, + { + "epoch": 0.6023811169477468, + "grad_norm": 1.1858397875801174, + "learning_rate": 7.207751210884768e-06, + "loss": 0.3716, + "step": 34861 + }, + { + "epoch": 0.6023983964611557, + "grad_norm": 0.9851678509136584, + "learning_rate": 7.207213825372224e-06, + "loss": 0.4027, + "step": 34862 + }, + { + "epoch": 0.6024156759745646, + "grad_norm": 1.1891720967012833, + "learning_rate": 7.206676448606893e-06, + "loss": 0.4305, + "step": 34863 + }, + { + "epoch": 0.6024329554879735, + "grad_norm": 1.7837900375240054, + "learning_rate": 7.206139080590471e-06, + "loss": 0.3014, + "step": 34864 + }, + { + "epoch": 0.6024502350013824, + "grad_norm": 0.9616246726311738, + "learning_rate": 7.205601721324629e-06, + "loss": 0.3933, + "step": 34865 + }, + { + "epoch": 0.6024675145147913, + "grad_norm": 1.7709403564062098, + "learning_rate": 7.205064370811056e-06, + "loss": 0.3748, + "step": 34866 + }, + { + "epoch": 0.6024847940282002, + "grad_norm": 0.9081629679692362, + "learning_rate": 7.204527029051436e-06, + "loss": 0.3743, + "step": 34867 + }, + { + "epoch": 0.6025020735416091, + "grad_norm": 0.532639655915349, + "learning_rate": 7.203989696047446e-06, + "loss": 0.6684, + "step": 34868 + }, + { + "epoch": 0.602519353055018, + "grad_norm": 0.9690830177541806, + "learning_rate": 7.203452371800775e-06, + "loss": 0.4941, + "step": 34869 + }, + { + "epoch": 0.6025366325684268, + "grad_norm": 1.2235386310527103, + "learning_rate": 7.2029150563131e-06, + "loss": 0.4075, + "step": 34870 + }, + { + "epoch": 0.6025539120818357, + "grad_norm": 0.8230719029722788, + "learning_rate": 7.2023777495861116e-06, + "loss": 0.4326, + "step": 34871 + }, + { + "epoch": 0.6025711915952446, + "grad_norm": 0.8722541545450918, + "learning_rate": 7.201840451621486e-06, + "loss": 0.3584, + "step": 34872 + }, + { + "epoch": 0.6025884711086535, + "grad_norm": 0.5143690903872866, + "learning_rate": 7.201303162420914e-06, + "loss": 0.7219, + "step": 34873 + }, + { + "epoch": 0.6026057506220625, + "grad_norm": 1.4410981917669041, + "learning_rate": 7.2007658819860674e-06, + "loss": 0.3542, + "step": 34874 + }, + { + "epoch": 0.6026230301354714, + "grad_norm": 1.0940586506722971, + "learning_rate": 7.200228610318638e-06, + "loss": 0.2118, + "step": 34875 + }, + { + "epoch": 0.6026403096488803, + "grad_norm": 0.8454446351060605, + "learning_rate": 7.199691347420301e-06, + "loss": 0.3297, + "step": 34876 + }, + { + "epoch": 0.6026575891622892, + "grad_norm": 1.238160221168891, + "learning_rate": 7.199154093292748e-06, + "loss": 0.3128, + "step": 34877 + }, + { + "epoch": 0.6026748686756981, + "grad_norm": 1.675580354083689, + "learning_rate": 7.1986168479376585e-06, + "loss": 0.4401, + "step": 34878 + }, + { + "epoch": 0.602692148189107, + "grad_norm": 1.644365379826183, + "learning_rate": 7.19807961135671e-06, + "loss": 0.3515, + "step": 34879 + }, + { + "epoch": 0.6027094277025159, + "grad_norm": 0.987710730872172, + "learning_rate": 7.197542383551592e-06, + "loss": 0.4221, + "step": 34880 + }, + { + "epoch": 0.6027267072159248, + "grad_norm": 0.9596821592537147, + "learning_rate": 7.19700516452398e-06, + "loss": 0.4418, + "step": 34881 + }, + { + "epoch": 0.6027439867293337, + "grad_norm": 0.9912657162054633, + "learning_rate": 7.196467954275568e-06, + "loss": 0.2824, + "step": 34882 + }, + { + "epoch": 0.6027612662427426, + "grad_norm": 1.1725436450105182, + "learning_rate": 7.195930752808023e-06, + "loss": 0.3186, + "step": 34883 + }, + { + "epoch": 0.6027785457561515, + "grad_norm": 0.7820119446673566, + "learning_rate": 7.195393560123043e-06, + "loss": 0.3034, + "step": 34884 + }, + { + "epoch": 0.6027958252695604, + "grad_norm": 1.6388284508785724, + "learning_rate": 7.194856376222299e-06, + "loss": 0.366, + "step": 34885 + }, + { + "epoch": 0.6028131047829693, + "grad_norm": 1.0733611544885293, + "learning_rate": 7.19431920110748e-06, + "loss": 0.2496, + "step": 34886 + }, + { + "epoch": 0.6028303842963783, + "grad_norm": 1.7959419001323829, + "learning_rate": 7.193782034780268e-06, + "loss": 0.3984, + "step": 34887 + }, + { + "epoch": 0.6028476638097872, + "grad_norm": 1.1658824478097125, + "learning_rate": 7.1932448772423404e-06, + "loss": 0.327, + "step": 34888 + }, + { + "epoch": 0.6028649433231961, + "grad_norm": 0.9265562301630267, + "learning_rate": 7.192707728495385e-06, + "loss": 0.3529, + "step": 34889 + }, + { + "epoch": 0.602882222836605, + "grad_norm": 0.7664739338391595, + "learning_rate": 7.192170588541081e-06, + "loss": 0.4418, + "step": 34890 + }, + { + "epoch": 0.6028995023500138, + "grad_norm": 1.01360702200862, + "learning_rate": 7.1916334573811145e-06, + "loss": 0.3878, + "step": 34891 + }, + { + "epoch": 0.6029167818634227, + "grad_norm": 1.136522460825208, + "learning_rate": 7.191096335017162e-06, + "loss": 0.3532, + "step": 34892 + }, + { + "epoch": 0.6029340613768316, + "grad_norm": 1.6713571200063093, + "learning_rate": 7.190559221450914e-06, + "loss": 0.3292, + "step": 34893 + }, + { + "epoch": 0.6029513408902405, + "grad_norm": 1.1813980351346052, + "learning_rate": 7.1900221166840434e-06, + "loss": 0.3098, + "step": 34894 + }, + { + "epoch": 0.6029686204036494, + "grad_norm": 0.9408850912165425, + "learning_rate": 7.1894850207182406e-06, + "loss": 0.4878, + "step": 34895 + }, + { + "epoch": 0.6029858999170583, + "grad_norm": 1.539897948233151, + "learning_rate": 7.188947933555182e-06, + "loss": 0.3695, + "step": 34896 + }, + { + "epoch": 0.6030031794304672, + "grad_norm": 0.8074603863153029, + "learning_rate": 7.188410855196553e-06, + "loss": 0.5226, + "step": 34897 + }, + { + "epoch": 0.6030204589438761, + "grad_norm": 1.2389788309856045, + "learning_rate": 7.187873785644038e-06, + "loss": 0.4026, + "step": 34898 + }, + { + "epoch": 0.603037738457285, + "grad_norm": 0.9282572803894479, + "learning_rate": 7.18733672489931e-06, + "loss": 0.6764, + "step": 34899 + }, + { + "epoch": 0.6030550179706939, + "grad_norm": 1.6113158213926737, + "learning_rate": 7.18679967296406e-06, + "loss": 0.2949, + "step": 34900 + }, + { + "epoch": 0.6030722974841028, + "grad_norm": 1.3524737116359304, + "learning_rate": 7.186262629839966e-06, + "loss": 0.615, + "step": 34901 + }, + { + "epoch": 0.6030895769975118, + "grad_norm": 1.1130718864250593, + "learning_rate": 7.185725595528716e-06, + "loss": 0.2564, + "step": 34902 + }, + { + "epoch": 0.6031068565109207, + "grad_norm": 1.4593700903990863, + "learning_rate": 7.185188570031979e-06, + "loss": 0.4051, + "step": 34903 + }, + { + "epoch": 0.6031241360243296, + "grad_norm": 1.3250291259634615, + "learning_rate": 7.184651553351454e-06, + "loss": 0.3399, + "step": 34904 + }, + { + "epoch": 0.6031414155377385, + "grad_norm": 0.8518743428250324, + "learning_rate": 7.1841145454888076e-06, + "loss": 0.3588, + "step": 34905 + }, + { + "epoch": 0.6031586950511474, + "grad_norm": 0.8225188014754476, + "learning_rate": 7.183577546445732e-06, + "loss": 0.1997, + "step": 34906 + }, + { + "epoch": 0.6031759745645563, + "grad_norm": 1.1266635433656065, + "learning_rate": 7.183040556223904e-06, + "loss": 0.2386, + "step": 34907 + }, + { + "epoch": 0.6031932540779652, + "grad_norm": 0.8281015808995276, + "learning_rate": 7.182503574825009e-06, + "loss": 0.2881, + "step": 34908 + }, + { + "epoch": 0.6032105335913741, + "grad_norm": 0.9830653969968554, + "learning_rate": 7.1819666022507275e-06, + "loss": 0.3493, + "step": 34909 + }, + { + "epoch": 0.603227813104783, + "grad_norm": 0.8333877583495586, + "learning_rate": 7.1814296385027375e-06, + "loss": 0.5042, + "step": 34910 + }, + { + "epoch": 0.6032450926181919, + "grad_norm": 0.8159055401449006, + "learning_rate": 7.180892683582726e-06, + "loss": 0.2733, + "step": 34911 + }, + { + "epoch": 0.6032623721316007, + "grad_norm": 1.0284316835308827, + "learning_rate": 7.180355737492371e-06, + "loss": 0.2803, + "step": 34912 + }, + { + "epoch": 0.6032796516450096, + "grad_norm": 1.064493049000129, + "learning_rate": 7.17981880023336e-06, + "loss": 0.3468, + "step": 34913 + }, + { + "epoch": 0.6032969311584185, + "grad_norm": 0.9139012458863546, + "learning_rate": 7.179281871807366e-06, + "loss": 0.4428, + "step": 34914 + }, + { + "epoch": 0.6033142106718274, + "grad_norm": 0.9690264339669802, + "learning_rate": 7.178744952216079e-06, + "loss": 0.5822, + "step": 34915 + }, + { + "epoch": 0.6033314901852364, + "grad_norm": 1.5052384537442736, + "learning_rate": 7.178208041461175e-06, + "loss": 0.3734, + "step": 34916 + }, + { + "epoch": 0.6033487696986453, + "grad_norm": 0.8265021715261407, + "learning_rate": 7.17767113954434e-06, + "loss": 0.3303, + "step": 34917 + }, + { + "epoch": 0.6033660492120542, + "grad_norm": 0.7731069551082678, + "learning_rate": 7.177134246467254e-06, + "loss": 0.4258, + "step": 34918 + }, + { + "epoch": 0.6033833287254631, + "grad_norm": 0.8698555234414108, + "learning_rate": 7.176597362231595e-06, + "loss": 0.3703, + "step": 34919 + }, + { + "epoch": 0.603400608238872, + "grad_norm": 0.9897953005108279, + "learning_rate": 7.17606048683905e-06, + "loss": 0.472, + "step": 34920 + }, + { + "epoch": 0.6034178877522809, + "grad_norm": 1.8423744186005033, + "learning_rate": 7.175523620291296e-06, + "loss": 0.4907, + "step": 34921 + }, + { + "epoch": 0.6034351672656898, + "grad_norm": 1.2866950925189455, + "learning_rate": 7.17498676259002e-06, + "loss": 0.4682, + "step": 34922 + }, + { + "epoch": 0.6034524467790987, + "grad_norm": 0.9434937602753021, + "learning_rate": 7.174449913736895e-06, + "loss": 0.2783, + "step": 34923 + }, + { + "epoch": 0.6034697262925076, + "grad_norm": 1.0957909514162885, + "learning_rate": 7.173913073733614e-06, + "loss": 0.4113, + "step": 34924 + }, + { + "epoch": 0.6034870058059165, + "grad_norm": 0.8659066851470999, + "learning_rate": 7.173376242581846e-06, + "loss": 0.3319, + "step": 34925 + }, + { + "epoch": 0.6035042853193254, + "grad_norm": 1.2046424502532926, + "learning_rate": 7.172839420283283e-06, + "loss": 0.4053, + "step": 34926 + }, + { + "epoch": 0.6035215648327343, + "grad_norm": 0.9991909451028299, + "learning_rate": 7.172302606839599e-06, + "loss": 0.3226, + "step": 34927 + }, + { + "epoch": 0.6035388443461432, + "grad_norm": 1.1468333255142713, + "learning_rate": 7.171765802252482e-06, + "loss": 0.4623, + "step": 34928 + }, + { + "epoch": 0.6035561238595522, + "grad_norm": 1.2198522113520998, + "learning_rate": 7.171229006523606e-06, + "loss": 0.4064, + "step": 34929 + }, + { + "epoch": 0.6035734033729611, + "grad_norm": 0.7288270856439204, + "learning_rate": 7.170692219654656e-06, + "loss": 0.3299, + "step": 34930 + }, + { + "epoch": 0.60359068288637, + "grad_norm": 0.8131736114940569, + "learning_rate": 7.170155441647313e-06, + "loss": 0.2857, + "step": 34931 + }, + { + "epoch": 0.6036079623997789, + "grad_norm": 0.8656968592520928, + "learning_rate": 7.169618672503257e-06, + "loss": 0.5948, + "step": 34932 + }, + { + "epoch": 0.6036252419131877, + "grad_norm": 1.261844460952849, + "learning_rate": 7.169081912224175e-06, + "loss": 0.5897, + "step": 34933 + }, + { + "epoch": 0.6036425214265966, + "grad_norm": 0.9634011200965611, + "learning_rate": 7.168545160811739e-06, + "loss": 0.2674, + "step": 34934 + }, + { + "epoch": 0.6036598009400055, + "grad_norm": 0.8954084606199717, + "learning_rate": 7.168008418267637e-06, + "loss": 0.3919, + "step": 34935 + }, + { + "epoch": 0.6036770804534144, + "grad_norm": 1.2036697108072199, + "learning_rate": 7.167471684593545e-06, + "loss": 0.3148, + "step": 34936 + }, + { + "epoch": 0.6036943599668233, + "grad_norm": 1.137996039643708, + "learning_rate": 7.166934959791149e-06, + "loss": 0.507, + "step": 34937 + }, + { + "epoch": 0.6037116394802322, + "grad_norm": 0.5844502032941282, + "learning_rate": 7.166398243862127e-06, + "loss": 0.6591, + "step": 34938 + }, + { + "epoch": 0.6037289189936411, + "grad_norm": 1.0092704486331359, + "learning_rate": 7.165861536808165e-06, + "loss": 0.491, + "step": 34939 + }, + { + "epoch": 0.60374619850705, + "grad_norm": 1.1723156335516798, + "learning_rate": 7.165324838630938e-06, + "loss": 0.4196, + "step": 34940 + }, + { + "epoch": 0.6037634780204589, + "grad_norm": 1.1130750720602105, + "learning_rate": 7.164788149332127e-06, + "loss": 0.4344, + "step": 34941 + }, + { + "epoch": 0.6037807575338678, + "grad_norm": 0.7476879717166508, + "learning_rate": 7.164251468913419e-06, + "loss": 0.4345, + "step": 34942 + }, + { + "epoch": 0.6037980370472767, + "grad_norm": 1.5633677735856206, + "learning_rate": 7.163714797376484e-06, + "loss": 0.312, + "step": 34943 + }, + { + "epoch": 0.6038153165606857, + "grad_norm": 1.020867185461778, + "learning_rate": 7.163178134723017e-06, + "loss": 0.2426, + "step": 34944 + }, + { + "epoch": 0.6038325960740946, + "grad_norm": 1.0186408939727467, + "learning_rate": 7.162641480954686e-06, + "loss": 0.3296, + "step": 34945 + }, + { + "epoch": 0.6038498755875035, + "grad_norm": 0.6404795411984261, + "learning_rate": 7.162104836073182e-06, + "loss": 0.7, + "step": 34946 + }, + { + "epoch": 0.6038671551009124, + "grad_norm": 1.0943424311441592, + "learning_rate": 7.161568200080178e-06, + "loss": 0.4496, + "step": 34947 + }, + { + "epoch": 0.6038844346143213, + "grad_norm": 0.5891816858271687, + "learning_rate": 7.161031572977362e-06, + "loss": 0.6985, + "step": 34948 + }, + { + "epoch": 0.6039017141277302, + "grad_norm": 0.8470189002565139, + "learning_rate": 7.1604949547664085e-06, + "loss": 0.4295, + "step": 34949 + }, + { + "epoch": 0.6039189936411391, + "grad_norm": 1.4558100597679229, + "learning_rate": 7.1599583454489995e-06, + "loss": 0.3054, + "step": 34950 + }, + { + "epoch": 0.603936273154548, + "grad_norm": 0.8343828777725465, + "learning_rate": 7.159421745026818e-06, + "loss": 0.3103, + "step": 34951 + }, + { + "epoch": 0.6039535526679569, + "grad_norm": 1.1528187042815, + "learning_rate": 7.158885153501543e-06, + "loss": 0.446, + "step": 34952 + }, + { + "epoch": 0.6039708321813658, + "grad_norm": 1.3610098321165798, + "learning_rate": 7.1583485708748575e-06, + "loss": 0.3479, + "step": 34953 + }, + { + "epoch": 0.6039881116947746, + "grad_norm": 0.9915736266733962, + "learning_rate": 7.157811997148437e-06, + "loss": 0.5823, + "step": 34954 + }, + { + "epoch": 0.6040053912081835, + "grad_norm": 1.3805086439215828, + "learning_rate": 7.157275432323968e-06, + "loss": 0.3284, + "step": 34955 + }, + { + "epoch": 0.6040226707215924, + "grad_norm": 0.9815857603320923, + "learning_rate": 7.1567388764031254e-06, + "loss": 0.5452, + "step": 34956 + }, + { + "epoch": 0.6040399502350013, + "grad_norm": 2.2737120138357976, + "learning_rate": 7.156202329387596e-06, + "loss": 0.4842, + "step": 34957 + }, + { + "epoch": 0.6040572297484103, + "grad_norm": 1.5693502039978924, + "learning_rate": 7.155665791279054e-06, + "loss": 0.3196, + "step": 34958 + }, + { + "epoch": 0.6040745092618192, + "grad_norm": 0.7214983518553587, + "learning_rate": 7.1551292620791865e-06, + "loss": 0.6973, + "step": 34959 + }, + { + "epoch": 0.6040917887752281, + "grad_norm": 0.7177314241711085, + "learning_rate": 7.15459274178967e-06, + "loss": 0.3245, + "step": 34960 + }, + { + "epoch": 0.604109068288637, + "grad_norm": 1.499458604120091, + "learning_rate": 7.154056230412182e-06, + "loss": 0.3356, + "step": 34961 + }, + { + "epoch": 0.6041263478020459, + "grad_norm": 1.3488389995865182, + "learning_rate": 7.15351972794841e-06, + "loss": 0.3518, + "step": 34962 + }, + { + "epoch": 0.6041436273154548, + "grad_norm": 0.8270266946982691, + "learning_rate": 7.152983234400025e-06, + "loss": 0.2678, + "step": 34963 + }, + { + "epoch": 0.6041609068288637, + "grad_norm": 1.727240508661579, + "learning_rate": 7.152446749768719e-06, + "loss": 0.6889, + "step": 34964 + }, + { + "epoch": 0.6041781863422726, + "grad_norm": 1.6347162341480734, + "learning_rate": 7.1519102740561615e-06, + "loss": 0.5731, + "step": 34965 + }, + { + "epoch": 0.6041954658556815, + "grad_norm": 0.9574979656522431, + "learning_rate": 7.151373807264041e-06, + "loss": 0.4022, + "step": 34966 + }, + { + "epoch": 0.6042127453690904, + "grad_norm": 1.2771893642240675, + "learning_rate": 7.1508373493940305e-06, + "loss": 0.633, + "step": 34967 + }, + { + "epoch": 0.6042300248824993, + "grad_norm": 0.8502089974446657, + "learning_rate": 7.15030090044782e-06, + "loss": 0.2865, + "step": 34968 + }, + { + "epoch": 0.6042473043959082, + "grad_norm": 0.9587240304475975, + "learning_rate": 7.149764460427076e-06, + "loss": 0.2655, + "step": 34969 + }, + { + "epoch": 0.6042645839093171, + "grad_norm": 2.223193898791265, + "learning_rate": 7.149228029333493e-06, + "loss": 0.4939, + "step": 34970 + }, + { + "epoch": 0.604281863422726, + "grad_norm": 1.4453773004247095, + "learning_rate": 7.14869160716874e-06, + "loss": 0.4455, + "step": 34971 + }, + { + "epoch": 0.604299142936135, + "grad_norm": 1.0526201670126876, + "learning_rate": 7.148155193934503e-06, + "loss": 0.4812, + "step": 34972 + }, + { + "epoch": 0.6043164224495439, + "grad_norm": 1.1035183561033337, + "learning_rate": 7.147618789632464e-06, + "loss": 0.5305, + "step": 34973 + }, + { + "epoch": 0.6043337019629528, + "grad_norm": 1.4675001609687712, + "learning_rate": 7.147082394264294e-06, + "loss": 0.6277, + "step": 34974 + }, + { + "epoch": 0.6043509814763616, + "grad_norm": 1.0331469105643445, + "learning_rate": 7.146546007831683e-06, + "loss": 0.3006, + "step": 34975 + }, + { + "epoch": 0.6043682609897705, + "grad_norm": 1.4666113965251921, + "learning_rate": 7.146009630336303e-06, + "loss": 0.4957, + "step": 34976 + }, + { + "epoch": 0.6043855405031794, + "grad_norm": 1.2576364837533198, + "learning_rate": 7.14547326177984e-06, + "loss": 0.3255, + "step": 34977 + }, + { + "epoch": 0.6044028200165883, + "grad_norm": 1.8528464854090843, + "learning_rate": 7.1449369021639706e-06, + "loss": 0.4961, + "step": 34978 + }, + { + "epoch": 0.6044200995299972, + "grad_norm": 0.8600560237834379, + "learning_rate": 7.1444005514903784e-06, + "loss": 0.4173, + "step": 34979 + }, + { + "epoch": 0.6044373790434061, + "grad_norm": 0.6476101774351484, + "learning_rate": 7.14386420976074e-06, + "loss": 0.3157, + "step": 34980 + }, + { + "epoch": 0.604454658556815, + "grad_norm": 1.317209975236036, + "learning_rate": 7.143327876976733e-06, + "loss": 0.3489, + "step": 34981 + }, + { + "epoch": 0.6044719380702239, + "grad_norm": 1.8050758738951378, + "learning_rate": 7.142791553140045e-06, + "loss": 0.3653, + "step": 34982 + }, + { + "epoch": 0.6044892175836328, + "grad_norm": 1.4680313810023922, + "learning_rate": 7.1422552382523445e-06, + "loss": 0.5132, + "step": 34983 + }, + { + "epoch": 0.6045064970970417, + "grad_norm": 1.598968996639413, + "learning_rate": 7.141718932315324e-06, + "loss": 0.486, + "step": 34984 + }, + { + "epoch": 0.6045237766104506, + "grad_norm": 1.3063989891463637, + "learning_rate": 7.141182635330652e-06, + "loss": 0.3545, + "step": 34985 + }, + { + "epoch": 0.6045410561238596, + "grad_norm": 1.0233214720993302, + "learning_rate": 7.140646347300018e-06, + "loss": 0.2977, + "step": 34986 + }, + { + "epoch": 0.6045583356372685, + "grad_norm": 1.289107966725408, + "learning_rate": 7.140110068225092e-06, + "loss": 0.3638, + "step": 34987 + }, + { + "epoch": 0.6045756151506774, + "grad_norm": 1.404124025754125, + "learning_rate": 7.139573798107564e-06, + "loss": 0.287, + "step": 34988 + }, + { + "epoch": 0.6045928946640863, + "grad_norm": 0.956307439328901, + "learning_rate": 7.139037536949102e-06, + "loss": 0.1765, + "step": 34989 + }, + { + "epoch": 0.6046101741774952, + "grad_norm": 0.9174445987496633, + "learning_rate": 7.138501284751398e-06, + "loss": 0.492, + "step": 34990 + }, + { + "epoch": 0.6046274536909041, + "grad_norm": 1.0941982922323759, + "learning_rate": 7.137965041516124e-06, + "loss": 0.405, + "step": 34991 + }, + { + "epoch": 0.604644733204313, + "grad_norm": 0.967888581664531, + "learning_rate": 7.137428807244957e-06, + "loss": 0.4586, + "step": 34992 + }, + { + "epoch": 0.6046620127177219, + "grad_norm": 1.1642181626960222, + "learning_rate": 7.136892581939586e-06, + "loss": 0.5347, + "step": 34993 + }, + { + "epoch": 0.6046792922311308, + "grad_norm": 0.47897881392772196, + "learning_rate": 7.136356365601681e-06, + "loss": 0.5151, + "step": 34994 + }, + { + "epoch": 0.6046965717445397, + "grad_norm": 1.0514826984175436, + "learning_rate": 7.1358201582329264e-06, + "loss": 0.2914, + "step": 34995 + }, + { + "epoch": 0.6047138512579486, + "grad_norm": 0.916485762488134, + "learning_rate": 7.135283959835e-06, + "loss": 0.2946, + "step": 34996 + }, + { + "epoch": 0.6047311307713574, + "grad_norm": 1.2635582689010876, + "learning_rate": 7.134747770409584e-06, + "loss": 0.4508, + "step": 34997 + }, + { + "epoch": 0.6047484102847663, + "grad_norm": 1.4533393261770196, + "learning_rate": 7.134211589958353e-06, + "loss": 0.2843, + "step": 34998 + }, + { + "epoch": 0.6047656897981752, + "grad_norm": 0.7980512450884931, + "learning_rate": 7.133675418482995e-06, + "loss": 0.2653, + "step": 34999 + }, + { + "epoch": 0.6047829693115842, + "grad_norm": 1.4535172105684464, + "learning_rate": 7.13313925598518e-06, + "loss": 0.3845, + "step": 35000 + }, + { + "epoch": 0.6048002488249931, + "grad_norm": 1.102898787984848, + "learning_rate": 7.132603102466588e-06, + "loss": 0.381, + "step": 35001 + }, + { + "epoch": 0.604817528338402, + "grad_norm": 1.4831403175295057, + "learning_rate": 7.132066957928905e-06, + "loss": 0.3832, + "step": 35002 + }, + { + "epoch": 0.6048348078518109, + "grad_norm": 0.734223643609189, + "learning_rate": 7.1315308223738e-06, + "loss": 0.5075, + "step": 35003 + }, + { + "epoch": 0.6048520873652198, + "grad_norm": 1.0578900025367466, + "learning_rate": 7.130994695802966e-06, + "loss": 0.2905, + "step": 35004 + }, + { + "epoch": 0.6048693668786287, + "grad_norm": 1.0382195941721903, + "learning_rate": 7.13045857821807e-06, + "loss": 0.4231, + "step": 35005 + }, + { + "epoch": 0.6048866463920376, + "grad_norm": 1.4627529140138342, + "learning_rate": 7.129922469620798e-06, + "loss": 0.3767, + "step": 35006 + }, + { + "epoch": 0.6049039259054465, + "grad_norm": 0.9940020970355187, + "learning_rate": 7.129386370012825e-06, + "loss": 0.2623, + "step": 35007 + }, + { + "epoch": 0.6049212054188554, + "grad_norm": 0.931292874080375, + "learning_rate": 7.128850279395836e-06, + "loss": 0.4329, + "step": 35008 + }, + { + "epoch": 0.6049384849322643, + "grad_norm": 1.2128308263747392, + "learning_rate": 7.1283141977715e-06, + "loss": 0.4011, + "step": 35009 + }, + { + "epoch": 0.6049557644456732, + "grad_norm": 1.2098037253072143, + "learning_rate": 7.127778125141507e-06, + "loss": 0.4666, + "step": 35010 + }, + { + "epoch": 0.6049730439590821, + "grad_norm": 1.0362584288416166, + "learning_rate": 7.127242061507531e-06, + "loss": 0.2354, + "step": 35011 + }, + { + "epoch": 0.604990323472491, + "grad_norm": 1.0065617272252028, + "learning_rate": 7.126706006871248e-06, + "loss": 0.3562, + "step": 35012 + }, + { + "epoch": 0.6050076029859, + "grad_norm": 1.2592989863352353, + "learning_rate": 7.126169961234344e-06, + "loss": 0.5775, + "step": 35013 + }, + { + "epoch": 0.6050248824993089, + "grad_norm": 1.3868903292745034, + "learning_rate": 7.125633924598492e-06, + "loss": 0.4299, + "step": 35014 + }, + { + "epoch": 0.6050421620127178, + "grad_norm": 1.2920297704382424, + "learning_rate": 7.1250978969653725e-06, + "loss": 0.3351, + "step": 35015 + }, + { + "epoch": 0.6050594415261267, + "grad_norm": 1.3788643373342946, + "learning_rate": 7.124561878336665e-06, + "loss": 0.4283, + "step": 35016 + }, + { + "epoch": 0.6050767210395356, + "grad_norm": 0.8823626555205594, + "learning_rate": 7.124025868714048e-06, + "loss": 0.4433, + "step": 35017 + }, + { + "epoch": 0.6050940005529444, + "grad_norm": 1.542334035312394, + "learning_rate": 7.1234898680992e-06, + "loss": 0.4121, + "step": 35018 + }, + { + "epoch": 0.6051112800663533, + "grad_norm": 1.3306899897176059, + "learning_rate": 7.122953876493804e-06, + "loss": 0.5219, + "step": 35019 + }, + { + "epoch": 0.6051285595797622, + "grad_norm": 1.2885121209883745, + "learning_rate": 7.122417893899531e-06, + "loss": 0.6414, + "step": 35020 + }, + { + "epoch": 0.6051458390931711, + "grad_norm": 1.1565331976302515, + "learning_rate": 7.121881920318067e-06, + "loss": 0.3904, + "step": 35021 + }, + { + "epoch": 0.60516311860658, + "grad_norm": 1.2037514927611916, + "learning_rate": 7.1213459557510885e-06, + "loss": 0.4488, + "step": 35022 + }, + { + "epoch": 0.6051803981199889, + "grad_norm": 1.141923135720348, + "learning_rate": 7.120810000200267e-06, + "loss": 0.4382, + "step": 35023 + }, + { + "epoch": 0.6051976776333978, + "grad_norm": 1.5444156710550436, + "learning_rate": 7.1202740536672934e-06, + "loss": 0.4402, + "step": 35024 + }, + { + "epoch": 0.6052149571468067, + "grad_norm": 1.2542809262468344, + "learning_rate": 7.119738116153837e-06, + "loss": 0.3888, + "step": 35025 + }, + { + "epoch": 0.6052322366602156, + "grad_norm": 1.1860331935492252, + "learning_rate": 7.119202187661581e-06, + "loss": 0.4538, + "step": 35026 + }, + { + "epoch": 0.6052495161736245, + "grad_norm": 0.9007287498166704, + "learning_rate": 7.118666268192201e-06, + "loss": 0.3283, + "step": 35027 + }, + { + "epoch": 0.6052667956870335, + "grad_norm": 1.4025667528834391, + "learning_rate": 7.118130357747382e-06, + "loss": 0.3765, + "step": 35028 + }, + { + "epoch": 0.6052840752004424, + "grad_norm": 1.212952138747779, + "learning_rate": 7.1175944563287916e-06, + "loss": 0.3897, + "step": 35029 + }, + { + "epoch": 0.6053013547138513, + "grad_norm": 1.4325962824828726, + "learning_rate": 7.117058563938119e-06, + "loss": 0.3346, + "step": 35030 + }, + { + "epoch": 0.6053186342272602, + "grad_norm": 0.9386226323982284, + "learning_rate": 7.1165226805770375e-06, + "loss": 0.7392, + "step": 35031 + }, + { + "epoch": 0.6053359137406691, + "grad_norm": 0.7600667980595089, + "learning_rate": 7.115986806247223e-06, + "loss": 0.2818, + "step": 35032 + }, + { + "epoch": 0.605353193254078, + "grad_norm": 1.0737639356699809, + "learning_rate": 7.115450940950361e-06, + "loss": 0.3092, + "step": 35033 + }, + { + "epoch": 0.6053704727674869, + "grad_norm": 1.8198741827441047, + "learning_rate": 7.114915084688123e-06, + "loss": 0.6076, + "step": 35034 + }, + { + "epoch": 0.6053877522808958, + "grad_norm": 1.1539572008242185, + "learning_rate": 7.114379237462191e-06, + "loss": 0.456, + "step": 35035 + }, + { + "epoch": 0.6054050317943047, + "grad_norm": 1.1227532781371767, + "learning_rate": 7.1138433992742404e-06, + "loss": 0.4909, + "step": 35036 + }, + { + "epoch": 0.6054223113077136, + "grad_norm": 1.121577650867482, + "learning_rate": 7.113307570125956e-06, + "loss": 0.4101, + "step": 35037 + }, + { + "epoch": 0.6054395908211225, + "grad_norm": 1.1686679505787434, + "learning_rate": 7.1127717500190075e-06, + "loss": 0.2801, + "step": 35038 + }, + { + "epoch": 0.6054568703345313, + "grad_norm": 1.900819714633889, + "learning_rate": 7.112235938955081e-06, + "loss": 0.7112, + "step": 35039 + }, + { + "epoch": 0.6054741498479402, + "grad_norm": 1.2652605003859303, + "learning_rate": 7.111700136935848e-06, + "loss": 0.3574, + "step": 35040 + }, + { + "epoch": 0.6054914293613491, + "grad_norm": 1.375944834888056, + "learning_rate": 7.1111643439629915e-06, + "loss": 0.3723, + "step": 35041 + }, + { + "epoch": 0.605508708874758, + "grad_norm": 0.9194986387008452, + "learning_rate": 7.110628560038191e-06, + "loss": 0.6013, + "step": 35042 + }, + { + "epoch": 0.605525988388167, + "grad_norm": 1.2168474841824455, + "learning_rate": 7.110092785163114e-06, + "loss": 0.4944, + "step": 35043 + }, + { + "epoch": 0.6055432679015759, + "grad_norm": 1.2423817716333734, + "learning_rate": 7.109557019339452e-06, + "loss": 0.2877, + "step": 35044 + }, + { + "epoch": 0.6055605474149848, + "grad_norm": 2.073439212710611, + "learning_rate": 7.1090212625688736e-06, + "loss": 0.2849, + "step": 35045 + }, + { + "epoch": 0.6055778269283937, + "grad_norm": 0.9904553711683014, + "learning_rate": 7.108485514853061e-06, + "loss": 0.3779, + "step": 35046 + }, + { + "epoch": 0.6055951064418026, + "grad_norm": 1.273492214257855, + "learning_rate": 7.107949776193691e-06, + "loss": 0.2529, + "step": 35047 + }, + { + "epoch": 0.6056123859552115, + "grad_norm": 0.7548659721677314, + "learning_rate": 7.107414046592446e-06, + "loss": 0.2641, + "step": 35048 + }, + { + "epoch": 0.6056296654686204, + "grad_norm": 1.634547888370768, + "learning_rate": 7.106878326050993e-06, + "loss": 0.4321, + "step": 35049 + }, + { + "epoch": 0.6056469449820293, + "grad_norm": 1.0736396987189842, + "learning_rate": 7.1063426145710246e-06, + "loss": 0.3179, + "step": 35050 + }, + { + "epoch": 0.6056642244954382, + "grad_norm": 0.9131390485493847, + "learning_rate": 7.105806912154205e-06, + "loss": 0.3689, + "step": 35051 + }, + { + "epoch": 0.6056815040088471, + "grad_norm": 0.6944039546982966, + "learning_rate": 7.105271218802221e-06, + "loss": 0.3862, + "step": 35052 + }, + { + "epoch": 0.605698783522256, + "grad_norm": 1.1180927524957724, + "learning_rate": 7.104735534516751e-06, + "loss": 0.3645, + "step": 35053 + }, + { + "epoch": 0.6057160630356649, + "grad_norm": 0.9279136639114964, + "learning_rate": 7.1041998592994635e-06, + "loss": 0.4916, + "step": 35054 + }, + { + "epoch": 0.6057333425490738, + "grad_norm": 0.8277196311753653, + "learning_rate": 7.103664193152044e-06, + "loss": 0.2449, + "step": 35055 + }, + { + "epoch": 0.6057506220624828, + "grad_norm": 1.3716273003612038, + "learning_rate": 7.103128536076167e-06, + "loss": 0.3964, + "step": 35056 + }, + { + "epoch": 0.6057679015758917, + "grad_norm": 1.1786924893758057, + "learning_rate": 7.102592888073514e-06, + "loss": 0.4847, + "step": 35057 + }, + { + "epoch": 0.6057851810893006, + "grad_norm": 0.9081885754049117, + "learning_rate": 7.102057249145757e-06, + "loss": 0.1713, + "step": 35058 + }, + { + "epoch": 0.6058024606027095, + "grad_norm": 1.8386487812084362, + "learning_rate": 7.101521619294582e-06, + "loss": 0.4831, + "step": 35059 + }, + { + "epoch": 0.6058197401161183, + "grad_norm": 0.8082841793101903, + "learning_rate": 7.100985998521659e-06, + "loss": 0.6851, + "step": 35060 + }, + { + "epoch": 0.6058370196295272, + "grad_norm": 0.9955706319093166, + "learning_rate": 7.100450386828668e-06, + "loss": 0.3959, + "step": 35061 + }, + { + "epoch": 0.6058542991429361, + "grad_norm": 0.8903380721153767, + "learning_rate": 7.09991478421729e-06, + "loss": 0.7005, + "step": 35062 + }, + { + "epoch": 0.605871578656345, + "grad_norm": 1.2022056447344016, + "learning_rate": 7.099379190689192e-06, + "loss": 0.5551, + "step": 35063 + }, + { + "epoch": 0.6058888581697539, + "grad_norm": 1.2894563495725455, + "learning_rate": 7.098843606246065e-06, + "loss": 0.3927, + "step": 35064 + }, + { + "epoch": 0.6059061376831628, + "grad_norm": 1.4500915603108706, + "learning_rate": 7.098308030889577e-06, + "loss": 0.3989, + "step": 35065 + }, + { + "epoch": 0.6059234171965717, + "grad_norm": 1.3532782377968875, + "learning_rate": 7.097772464621411e-06, + "loss": 0.3351, + "step": 35066 + }, + { + "epoch": 0.6059406967099806, + "grad_norm": 1.0248185745955958, + "learning_rate": 7.097236907443239e-06, + "loss": 0.3043, + "step": 35067 + }, + { + "epoch": 0.6059579762233895, + "grad_norm": 1.4442289781685345, + "learning_rate": 7.096701359356745e-06, + "loss": 0.4047, + "step": 35068 + }, + { + "epoch": 0.6059752557367984, + "grad_norm": 1.020693277606465, + "learning_rate": 7.096165820363598e-06, + "loss": 0.2502, + "step": 35069 + }, + { + "epoch": 0.6059925352502074, + "grad_norm": 1.1140600639909577, + "learning_rate": 7.095630290465488e-06, + "loss": 0.4299, + "step": 35070 + }, + { + "epoch": 0.6060098147636163, + "grad_norm": 1.1030547569311577, + "learning_rate": 7.095094769664079e-06, + "loss": 0.2887, + "step": 35071 + }, + { + "epoch": 0.6060270942770252, + "grad_norm": 1.5580296334503, + "learning_rate": 7.094559257961056e-06, + "loss": 0.3507, + "step": 35072 + }, + { + "epoch": 0.6060443737904341, + "grad_norm": 0.9113472774778723, + "learning_rate": 7.094023755358097e-06, + "loss": 0.6174, + "step": 35073 + }, + { + "epoch": 0.606061653303843, + "grad_norm": 1.6385505814718364, + "learning_rate": 7.093488261856871e-06, + "loss": 0.359, + "step": 35074 + }, + { + "epoch": 0.6060789328172519, + "grad_norm": 0.9252120611926488, + "learning_rate": 7.092952777459064e-06, + "loss": 0.6415, + "step": 35075 + }, + { + "epoch": 0.6060962123306608, + "grad_norm": 0.9232468316949352, + "learning_rate": 7.0924173021663475e-06, + "loss": 0.3383, + "step": 35076 + }, + { + "epoch": 0.6061134918440697, + "grad_norm": 0.9227329105961823, + "learning_rate": 7.0918818359804035e-06, + "loss": 0.4941, + "step": 35077 + }, + { + "epoch": 0.6061307713574786, + "grad_norm": 1.3549765576495998, + "learning_rate": 7.091346378902904e-06, + "loss": 0.4277, + "step": 35078 + }, + { + "epoch": 0.6061480508708875, + "grad_norm": 1.1264898699240424, + "learning_rate": 7.090810930935532e-06, + "loss": 0.4513, + "step": 35079 + }, + { + "epoch": 0.6061653303842964, + "grad_norm": 1.2744130077491058, + "learning_rate": 7.0902754920799575e-06, + "loss": 0.488, + "step": 35080 + }, + { + "epoch": 0.6061826098977052, + "grad_norm": 0.9103748352531584, + "learning_rate": 7.089740062337864e-06, + "loss": 0.3748, + "step": 35081 + }, + { + "epoch": 0.6061998894111141, + "grad_norm": 0.9383318701839949, + "learning_rate": 7.089204641710922e-06, + "loss": 0.3816, + "step": 35082 + }, + { + "epoch": 0.606217168924523, + "grad_norm": 1.7492546290440014, + "learning_rate": 7.088669230200818e-06, + "loss": 0.5295, + "step": 35083 + }, + { + "epoch": 0.606234448437932, + "grad_norm": 1.1806060046232671, + "learning_rate": 7.088133827809223e-06, + "loss": 0.2715, + "step": 35084 + }, + { + "epoch": 0.6062517279513409, + "grad_norm": 1.1296982492781529, + "learning_rate": 7.087598434537809e-06, + "loss": 0.4989, + "step": 35085 + }, + { + "epoch": 0.6062690074647498, + "grad_norm": 0.728719185472185, + "learning_rate": 7.087063050388261e-06, + "loss": 0.2985, + "step": 35086 + }, + { + "epoch": 0.6062862869781587, + "grad_norm": 0.4845995487555337, + "learning_rate": 7.086527675362252e-06, + "loss": 0.5418, + "step": 35087 + }, + { + "epoch": 0.6063035664915676, + "grad_norm": 0.9585480667651941, + "learning_rate": 7.085992309461462e-06, + "loss": 0.6268, + "step": 35088 + }, + { + "epoch": 0.6063208460049765, + "grad_norm": 0.8940073309464387, + "learning_rate": 7.085456952687559e-06, + "loss": 0.296, + "step": 35089 + }, + { + "epoch": 0.6063381255183854, + "grad_norm": 1.35371608659662, + "learning_rate": 7.084921605042234e-06, + "loss": 0.506, + "step": 35090 + }, + { + "epoch": 0.6063554050317943, + "grad_norm": 0.9229358854132917, + "learning_rate": 7.084386266527152e-06, + "loss": 0.4027, + "step": 35091 + }, + { + "epoch": 0.6063726845452032, + "grad_norm": 1.8639511624171496, + "learning_rate": 7.083850937143996e-06, + "loss": 0.3492, + "step": 35092 + }, + { + "epoch": 0.6063899640586121, + "grad_norm": 1.4247868643652535, + "learning_rate": 7.083315616894442e-06, + "loss": 0.584, + "step": 35093 + }, + { + "epoch": 0.606407243572021, + "grad_norm": 0.9771200048279124, + "learning_rate": 7.082780305780162e-06, + "loss": 0.4719, + "step": 35094 + }, + { + "epoch": 0.6064245230854299, + "grad_norm": 1.2461202709867698, + "learning_rate": 7.082245003802836e-06, + "loss": 0.4332, + "step": 35095 + }, + { + "epoch": 0.6064418025988388, + "grad_norm": 0.7603451443830217, + "learning_rate": 7.08170971096414e-06, + "loss": 0.4076, + "step": 35096 + }, + { + "epoch": 0.6064590821122477, + "grad_norm": 1.101806227530689, + "learning_rate": 7.081174427265751e-06, + "loss": 0.332, + "step": 35097 + }, + { + "epoch": 0.6064763616256567, + "grad_norm": 0.8826278276663125, + "learning_rate": 7.080639152709345e-06, + "loss": 0.3539, + "step": 35098 + }, + { + "epoch": 0.6064936411390656, + "grad_norm": 0.9818886104111582, + "learning_rate": 7.080103887296603e-06, + "loss": 0.4013, + "step": 35099 + }, + { + "epoch": 0.6065109206524745, + "grad_norm": 0.950115554298858, + "learning_rate": 7.079568631029193e-06, + "loss": 0.4311, + "step": 35100 + }, + { + "epoch": 0.6065282001658834, + "grad_norm": 1.543930437683018, + "learning_rate": 7.0790333839087986e-06, + "loss": 0.4907, + "step": 35101 + }, + { + "epoch": 0.6065454796792922, + "grad_norm": 1.378770745300758, + "learning_rate": 7.078498145937092e-06, + "loss": 0.2958, + "step": 35102 + }, + { + "epoch": 0.6065627591927011, + "grad_norm": 0.9551850052869342, + "learning_rate": 7.077962917115751e-06, + "loss": 0.2658, + "step": 35103 + }, + { + "epoch": 0.60658003870611, + "grad_norm": 1.1620838847620674, + "learning_rate": 7.077427697446458e-06, + "loss": 0.5423, + "step": 35104 + }, + { + "epoch": 0.6065973182195189, + "grad_norm": 1.0176085118776779, + "learning_rate": 7.076892486930876e-06, + "loss": 0.5102, + "step": 35105 + }, + { + "epoch": 0.6066145977329278, + "grad_norm": 1.2855478768622464, + "learning_rate": 7.0763572855706915e-06, + "loss": 0.3443, + "step": 35106 + }, + { + "epoch": 0.6066318772463367, + "grad_norm": 0.8880203816901847, + "learning_rate": 7.075822093367577e-06, + "loss": 0.321, + "step": 35107 + }, + { + "epoch": 0.6066491567597456, + "grad_norm": 0.8579333747132141, + "learning_rate": 7.075286910323215e-06, + "loss": 0.3138, + "step": 35108 + }, + { + "epoch": 0.6066664362731545, + "grad_norm": 1.2811865355780674, + "learning_rate": 7.0747517364392694e-06, + "loss": 0.5563, + "step": 35109 + }, + { + "epoch": 0.6066837157865634, + "grad_norm": 1.1046414416747354, + "learning_rate": 7.074216571717429e-06, + "loss": 0.224, + "step": 35110 + }, + { + "epoch": 0.6067009952999723, + "grad_norm": 0.7979239147933519, + "learning_rate": 7.073681416159362e-06, + "loss": 0.6412, + "step": 35111 + }, + { + "epoch": 0.6067182748133813, + "grad_norm": 1.1288533422253846, + "learning_rate": 7.073146269766749e-06, + "loss": 0.5564, + "step": 35112 + }, + { + "epoch": 0.6067355543267902, + "grad_norm": 2.6999063829492163, + "learning_rate": 7.072611132541261e-06, + "loss": 0.5808, + "step": 35113 + }, + { + "epoch": 0.6067528338401991, + "grad_norm": 1.1374546417562497, + "learning_rate": 7.072076004484583e-06, + "loss": 0.2948, + "step": 35114 + }, + { + "epoch": 0.606770113353608, + "grad_norm": 1.762310599871268, + "learning_rate": 7.071540885598382e-06, + "loss": 0.3544, + "step": 35115 + }, + { + "epoch": 0.6067873928670169, + "grad_norm": 0.9209386417888831, + "learning_rate": 7.071005775884336e-06, + "loss": 0.398, + "step": 35116 + }, + { + "epoch": 0.6068046723804258, + "grad_norm": 1.1194318725611498, + "learning_rate": 7.070470675344126e-06, + "loss": 0.404, + "step": 35117 + }, + { + "epoch": 0.6068219518938347, + "grad_norm": 1.2728479701789797, + "learning_rate": 7.0699355839794215e-06, + "loss": 0.3637, + "step": 35118 + }, + { + "epoch": 0.6068392314072436, + "grad_norm": 0.7560370552542234, + "learning_rate": 7.069400501791905e-06, + "loss": 0.5132, + "step": 35119 + }, + { + "epoch": 0.6068565109206525, + "grad_norm": 0.7977545480123405, + "learning_rate": 7.068865428783245e-06, + "loss": 0.3222, + "step": 35120 + }, + { + "epoch": 0.6068737904340614, + "grad_norm": 1.0873869389684356, + "learning_rate": 7.068330364955123e-06, + "loss": 0.4034, + "step": 35121 + }, + { + "epoch": 0.6068910699474703, + "grad_norm": 0.9995408390356617, + "learning_rate": 7.0677953103092115e-06, + "loss": 0.361, + "step": 35122 + }, + { + "epoch": 0.6069083494608791, + "grad_norm": 0.9592769775393458, + "learning_rate": 7.06726026484719e-06, + "loss": 0.5699, + "step": 35123 + }, + { + "epoch": 0.606925628974288, + "grad_norm": 1.9696621458749612, + "learning_rate": 7.066725228570734e-06, + "loss": 0.4474, + "step": 35124 + }, + { + "epoch": 0.6069429084876969, + "grad_norm": 1.1342621682845764, + "learning_rate": 7.066190201481513e-06, + "loss": 0.4902, + "step": 35125 + }, + { + "epoch": 0.6069601880011058, + "grad_norm": 0.6909621306701288, + "learning_rate": 7.06565518358121e-06, + "loss": 0.2532, + "step": 35126 + }, + { + "epoch": 0.6069774675145148, + "grad_norm": 2.4583480524592995, + "learning_rate": 7.065120174871496e-06, + "loss": 0.5402, + "step": 35127 + }, + { + "epoch": 0.6069947470279237, + "grad_norm": 1.2270207521508358, + "learning_rate": 7.064585175354053e-06, + "loss": 0.4726, + "step": 35128 + }, + { + "epoch": 0.6070120265413326, + "grad_norm": 0.8476751391055956, + "learning_rate": 7.0640501850305456e-06, + "loss": 0.3115, + "step": 35129 + }, + { + "epoch": 0.6070293060547415, + "grad_norm": 1.0920772511941583, + "learning_rate": 7.063515203902662e-06, + "loss": 0.5181, + "step": 35130 + }, + { + "epoch": 0.6070465855681504, + "grad_norm": 0.8155067726298594, + "learning_rate": 7.062980231972068e-06, + "loss": 0.3756, + "step": 35131 + }, + { + "epoch": 0.6070638650815593, + "grad_norm": 0.9975213897755432, + "learning_rate": 7.062445269240446e-06, + "loss": 0.3556, + "step": 35132 + }, + { + "epoch": 0.6070811445949682, + "grad_norm": 1.0124142272226606, + "learning_rate": 7.061910315709466e-06, + "loss": 0.4596, + "step": 35133 + }, + { + "epoch": 0.6070984241083771, + "grad_norm": 0.8028812554693056, + "learning_rate": 7.0613753713808095e-06, + "loss": 0.7874, + "step": 35134 + }, + { + "epoch": 0.607115703621786, + "grad_norm": 1.0818956046343262, + "learning_rate": 7.060840436256148e-06, + "loss": 0.4806, + "step": 35135 + }, + { + "epoch": 0.6071329831351949, + "grad_norm": 0.9462617648410945, + "learning_rate": 7.060305510337155e-06, + "loss": 0.4399, + "step": 35136 + }, + { + "epoch": 0.6071502626486038, + "grad_norm": 1.0586013815333972, + "learning_rate": 7.05977059362551e-06, + "loss": 0.4419, + "step": 35137 + }, + { + "epoch": 0.6071675421620127, + "grad_norm": 1.414527128873284, + "learning_rate": 7.059235686122885e-06, + "loss": 0.4385, + "step": 35138 + }, + { + "epoch": 0.6071848216754216, + "grad_norm": 0.8929149517094421, + "learning_rate": 7.058700787830963e-06, + "loss": 0.4487, + "step": 35139 + }, + { + "epoch": 0.6072021011888306, + "grad_norm": 1.3750390627257967, + "learning_rate": 7.058165898751408e-06, + "loss": 0.4311, + "step": 35140 + }, + { + "epoch": 0.6072193807022395, + "grad_norm": 1.2055956651129605, + "learning_rate": 7.057631018885905e-06, + "loss": 0.5118, + "step": 35141 + }, + { + "epoch": 0.6072366602156484, + "grad_norm": 0.9430952708036536, + "learning_rate": 7.05709614823612e-06, + "loss": 0.3699, + "step": 35142 + }, + { + "epoch": 0.6072539397290573, + "grad_norm": 1.0061143419107967, + "learning_rate": 7.0565612868037384e-06, + "loss": 0.4753, + "step": 35143 + }, + { + "epoch": 0.6072712192424662, + "grad_norm": 1.2564431511510572, + "learning_rate": 7.056026434590426e-06, + "loss": 0.4827, + "step": 35144 + }, + { + "epoch": 0.607288498755875, + "grad_norm": 0.9881449521365198, + "learning_rate": 7.055491591597868e-06, + "loss": 0.2192, + "step": 35145 + }, + { + "epoch": 0.6073057782692839, + "grad_norm": 1.2613529710611893, + "learning_rate": 7.054956757827732e-06, + "loss": 0.4332, + "step": 35146 + }, + { + "epoch": 0.6073230577826928, + "grad_norm": 1.8451916064523142, + "learning_rate": 7.054421933281694e-06, + "loss": 0.526, + "step": 35147 + }, + { + "epoch": 0.6073403372961017, + "grad_norm": 0.9726959306652012, + "learning_rate": 7.053887117961433e-06, + "loss": 0.6152, + "step": 35148 + }, + { + "epoch": 0.6073576168095106, + "grad_norm": 0.8189218119445405, + "learning_rate": 7.053352311868617e-06, + "loss": 0.4993, + "step": 35149 + }, + { + "epoch": 0.6073748963229195, + "grad_norm": 1.0314896242563845, + "learning_rate": 7.05281751500493e-06, + "loss": 0.4737, + "step": 35150 + }, + { + "epoch": 0.6073921758363284, + "grad_norm": 0.9627012816603174, + "learning_rate": 7.05228272737204e-06, + "loss": 0.5793, + "step": 35151 + }, + { + "epoch": 0.6074094553497373, + "grad_norm": 0.6564049157738645, + "learning_rate": 7.051747948971624e-06, + "loss": 0.2719, + "step": 35152 + }, + { + "epoch": 0.6074267348631462, + "grad_norm": 1.2577871053327612, + "learning_rate": 7.051213179805357e-06, + "loss": 0.4049, + "step": 35153 + }, + { + "epoch": 0.6074440143765552, + "grad_norm": 1.1479647724707223, + "learning_rate": 7.050678419874919e-06, + "loss": 0.5605, + "step": 35154 + }, + { + "epoch": 0.6074612938899641, + "grad_norm": 1.2813513989937564, + "learning_rate": 7.050143669181978e-06, + "loss": 0.4718, + "step": 35155 + }, + { + "epoch": 0.607478573403373, + "grad_norm": 1.301598297487493, + "learning_rate": 7.049608927728209e-06, + "loss": 0.3014, + "step": 35156 + }, + { + "epoch": 0.6074958529167819, + "grad_norm": 0.8437389247632969, + "learning_rate": 7.0490741955152905e-06, + "loss": 0.3623, + "step": 35157 + }, + { + "epoch": 0.6075131324301908, + "grad_norm": 1.487227707500023, + "learning_rate": 7.048539472544895e-06, + "loss": 0.5125, + "step": 35158 + }, + { + "epoch": 0.6075304119435997, + "grad_norm": 1.1287306615926753, + "learning_rate": 7.048004758818702e-06, + "loss": 0.2695, + "step": 35159 + }, + { + "epoch": 0.6075476914570086, + "grad_norm": 1.2577405592826547, + "learning_rate": 7.047470054338378e-06, + "loss": 0.3937, + "step": 35160 + }, + { + "epoch": 0.6075649709704175, + "grad_norm": 0.7238904286841946, + "learning_rate": 7.046935359105604e-06, + "loss": 0.3781, + "step": 35161 + }, + { + "epoch": 0.6075822504838264, + "grad_norm": 0.8961782891729786, + "learning_rate": 7.046400673122052e-06, + "loss": 0.3415, + "step": 35162 + }, + { + "epoch": 0.6075995299972353, + "grad_norm": 1.2381765080550788, + "learning_rate": 7.0458659963893985e-06, + "loss": 0.378, + "step": 35163 + }, + { + "epoch": 0.6076168095106442, + "grad_norm": 0.6308572101868587, + "learning_rate": 7.045331328909315e-06, + "loss": 0.6153, + "step": 35164 + }, + { + "epoch": 0.6076340890240531, + "grad_norm": 1.2439342177284112, + "learning_rate": 7.044796670683483e-06, + "loss": 0.2863, + "step": 35165 + }, + { + "epoch": 0.6076513685374619, + "grad_norm": 1.4443843318953296, + "learning_rate": 7.04426202171357e-06, + "loss": 0.432, + "step": 35166 + }, + { + "epoch": 0.6076686480508708, + "grad_norm": 0.8516567973941062, + "learning_rate": 7.043727382001251e-06, + "loss": 0.582, + "step": 35167 + }, + { + "epoch": 0.6076859275642797, + "grad_norm": 0.8302975145694168, + "learning_rate": 7.043192751548207e-06, + "loss": 0.1592, + "step": 35168 + }, + { + "epoch": 0.6077032070776887, + "grad_norm": 1.0637956928260779, + "learning_rate": 7.042658130356102e-06, + "loss": 0.2061, + "step": 35169 + }, + { + "epoch": 0.6077204865910976, + "grad_norm": 1.386068609650403, + "learning_rate": 7.042123518426622e-06, + "loss": 0.5282, + "step": 35170 + }, + { + "epoch": 0.6077377661045065, + "grad_norm": 1.2772654863965611, + "learning_rate": 7.041588915761431e-06, + "loss": 0.3338, + "step": 35171 + }, + { + "epoch": 0.6077550456179154, + "grad_norm": 1.0044235315117591, + "learning_rate": 7.041054322362212e-06, + "loss": 0.3917, + "step": 35172 + }, + { + "epoch": 0.6077723251313243, + "grad_norm": 1.7312463528291009, + "learning_rate": 7.0405197382306335e-06, + "loss": 0.3484, + "step": 35173 + }, + { + "epoch": 0.6077896046447332, + "grad_norm": 1.2375757131123513, + "learning_rate": 7.039985163368375e-06, + "loss": 0.45, + "step": 35174 + }, + { + "epoch": 0.6078068841581421, + "grad_norm": 0.8701589724323185, + "learning_rate": 7.0394505977771065e-06, + "loss": 0.387, + "step": 35175 + }, + { + "epoch": 0.607824163671551, + "grad_norm": 0.8840044251193752, + "learning_rate": 7.038916041458502e-06, + "loss": 0.4242, + "step": 35176 + }, + { + "epoch": 0.6078414431849599, + "grad_norm": 1.437390805647188, + "learning_rate": 7.038381494414239e-06, + "loss": 0.255, + "step": 35177 + }, + { + "epoch": 0.6078587226983688, + "grad_norm": 1.220023726071245, + "learning_rate": 7.037846956645989e-06, + "loss": 0.2871, + "step": 35178 + }, + { + "epoch": 0.6078760022117777, + "grad_norm": 1.0019510582555522, + "learning_rate": 7.0373124281554315e-06, + "loss": 0.294, + "step": 35179 + }, + { + "epoch": 0.6078932817251866, + "grad_norm": 0.9750070236403345, + "learning_rate": 7.0367779089442325e-06, + "loss": 0.6915, + "step": 35180 + }, + { + "epoch": 0.6079105612385955, + "grad_norm": 0.9415338594955829, + "learning_rate": 7.036243399014071e-06, + "loss": 0.3511, + "step": 35181 + }, + { + "epoch": 0.6079278407520045, + "grad_norm": 0.9471996993321871, + "learning_rate": 7.03570889836662e-06, + "loss": 0.3087, + "step": 35182 + }, + { + "epoch": 0.6079451202654134, + "grad_norm": 1.6552880162686572, + "learning_rate": 7.035174407003556e-06, + "loss": 0.4301, + "step": 35183 + }, + { + "epoch": 0.6079623997788223, + "grad_norm": 1.6655512895601388, + "learning_rate": 7.034639924926547e-06, + "loss": 0.4027, + "step": 35184 + }, + { + "epoch": 0.6079796792922312, + "grad_norm": 0.9112155392727419, + "learning_rate": 7.034105452137278e-06, + "loss": 0.528, + "step": 35185 + }, + { + "epoch": 0.6079969588056401, + "grad_norm": 0.9772958199366076, + "learning_rate": 7.033570988637413e-06, + "loss": 0.5224, + "step": 35186 + }, + { + "epoch": 0.6080142383190489, + "grad_norm": 1.5330925003708336, + "learning_rate": 7.033036534428626e-06, + "loss": 0.2857, + "step": 35187 + }, + { + "epoch": 0.6080315178324578, + "grad_norm": 1.1462241403589535, + "learning_rate": 7.032502089512598e-06, + "loss": 0.9101, + "step": 35188 + }, + { + "epoch": 0.6080487973458667, + "grad_norm": 1.4294696782508272, + "learning_rate": 7.031967653890994e-06, + "loss": 0.6505, + "step": 35189 + }, + { + "epoch": 0.6080660768592756, + "grad_norm": 0.843638127588606, + "learning_rate": 7.031433227565499e-06, + "loss": 0.6377, + "step": 35190 + }, + { + "epoch": 0.6080833563726845, + "grad_norm": 1.2963869006807838, + "learning_rate": 7.030898810537777e-06, + "loss": 0.3628, + "step": 35191 + }, + { + "epoch": 0.6081006358860934, + "grad_norm": 0.9704025212536235, + "learning_rate": 7.030364402809507e-06, + "loss": 0.4611, + "step": 35192 + }, + { + "epoch": 0.6081179153995023, + "grad_norm": 1.201415278499342, + "learning_rate": 7.029830004382358e-06, + "loss": 0.4786, + "step": 35193 + }, + { + "epoch": 0.6081351949129112, + "grad_norm": 1.2580932236686104, + "learning_rate": 7.0292956152580135e-06, + "loss": 0.3047, + "step": 35194 + }, + { + "epoch": 0.6081524744263201, + "grad_norm": 0.7688237596500462, + "learning_rate": 7.028761235438134e-06, + "loss": 0.3165, + "step": 35195 + }, + { + "epoch": 0.608169753939729, + "grad_norm": 1.5610783089936568, + "learning_rate": 7.028226864924407e-06, + "loss": 0.3431, + "step": 35196 + }, + { + "epoch": 0.608187033453138, + "grad_norm": 1.2046847119862807, + "learning_rate": 7.027692503718495e-06, + "loss": 0.3797, + "step": 35197 + }, + { + "epoch": 0.6082043129665469, + "grad_norm": 0.8172397152872883, + "learning_rate": 7.027158151822076e-06, + "loss": 0.1769, + "step": 35198 + }, + { + "epoch": 0.6082215924799558, + "grad_norm": 1.2468873307402075, + "learning_rate": 7.026623809236828e-06, + "loss": 0.2799, + "step": 35199 + }, + { + "epoch": 0.6082388719933647, + "grad_norm": 1.0416812338670494, + "learning_rate": 7.026089475964415e-06, + "loss": 0.4539, + "step": 35200 + }, + { + "epoch": 0.6082561515067736, + "grad_norm": 0.9574541946704632, + "learning_rate": 7.025555152006518e-06, + "loss": 0.41, + "step": 35201 + }, + { + "epoch": 0.6082734310201825, + "grad_norm": 1.1597372504989394, + "learning_rate": 7.025020837364806e-06, + "loss": 0.4221, + "step": 35202 + }, + { + "epoch": 0.6082907105335914, + "grad_norm": 1.1811407245965226, + "learning_rate": 7.0244865320409575e-06, + "loss": 0.3203, + "step": 35203 + }, + { + "epoch": 0.6083079900470003, + "grad_norm": 1.1478761903604189, + "learning_rate": 7.02395223603664e-06, + "loss": 0.2929, + "step": 35204 + }, + { + "epoch": 0.6083252695604092, + "grad_norm": 1.3116904260365203, + "learning_rate": 7.023417949353535e-06, + "loss": 0.3202, + "step": 35205 + }, + { + "epoch": 0.6083425490738181, + "grad_norm": 1.239835614137402, + "learning_rate": 7.0228836719933095e-06, + "loss": 0.3372, + "step": 35206 + }, + { + "epoch": 0.608359828587227, + "grad_norm": 1.1903895534896562, + "learning_rate": 7.022349403957635e-06, + "loss": 0.4716, + "step": 35207 + }, + { + "epoch": 0.6083771081006358, + "grad_norm": 0.9378085541480067, + "learning_rate": 7.0218151452481955e-06, + "loss": 0.3228, + "step": 35208 + }, + { + "epoch": 0.6083943876140447, + "grad_norm": 0.9883213644232556, + "learning_rate": 7.021280895866649e-06, + "loss": 0.4209, + "step": 35209 + }, + { + "epoch": 0.6084116671274536, + "grad_norm": 0.6442968351090369, + "learning_rate": 7.020746655814684e-06, + "loss": 1.0244, + "step": 35210 + }, + { + "epoch": 0.6084289466408626, + "grad_norm": 1.0000562682443004, + "learning_rate": 7.020212425093962e-06, + "loss": 0.3544, + "step": 35211 + }, + { + "epoch": 0.6084462261542715, + "grad_norm": 1.188192619648483, + "learning_rate": 7.019678203706164e-06, + "loss": 0.4767, + "step": 35212 + }, + { + "epoch": 0.6084635056676804, + "grad_norm": 1.2659997824774551, + "learning_rate": 7.019143991652959e-06, + "loss": 0.5871, + "step": 35213 + }, + { + "epoch": 0.6084807851810893, + "grad_norm": 1.283426877104856, + "learning_rate": 7.0186097889360235e-06, + "loss": 0.4058, + "step": 35214 + }, + { + "epoch": 0.6084980646944982, + "grad_norm": 0.6144646260719908, + "learning_rate": 7.018075595557024e-06, + "loss": 0.5437, + "step": 35215 + }, + { + "epoch": 0.6085153442079071, + "grad_norm": 0.8945642307154231, + "learning_rate": 7.017541411517645e-06, + "loss": 0.4466, + "step": 35216 + }, + { + "epoch": 0.608532623721316, + "grad_norm": 1.3859463322230274, + "learning_rate": 7.0170072368195504e-06, + "loss": 0.4677, + "step": 35217 + }, + { + "epoch": 0.6085499032347249, + "grad_norm": 1.1529141818653244, + "learning_rate": 7.016473071464414e-06, + "loss": 0.4911, + "step": 35218 + }, + { + "epoch": 0.6085671827481338, + "grad_norm": 0.8819398023543684, + "learning_rate": 7.015938915453915e-06, + "loss": 0.2681, + "step": 35219 + }, + { + "epoch": 0.6085844622615427, + "grad_norm": 1.3132588491757353, + "learning_rate": 7.015404768789716e-06, + "loss": 0.3763, + "step": 35220 + }, + { + "epoch": 0.6086017417749516, + "grad_norm": 1.5514403199072149, + "learning_rate": 7.0148706314735e-06, + "loss": 0.4433, + "step": 35221 + }, + { + "epoch": 0.6086190212883605, + "grad_norm": 0.7106264648273993, + "learning_rate": 7.014336503506933e-06, + "loss": 0.4899, + "step": 35222 + }, + { + "epoch": 0.6086363008017694, + "grad_norm": 1.2966591875061215, + "learning_rate": 7.013802384891695e-06, + "loss": 0.5585, + "step": 35223 + }, + { + "epoch": 0.6086535803151784, + "grad_norm": 1.3081486725379738, + "learning_rate": 7.01326827562945e-06, + "loss": 0.5028, + "step": 35224 + }, + { + "epoch": 0.6086708598285873, + "grad_norm": 1.1049751469035565, + "learning_rate": 7.012734175721883e-06, + "loss": 0.4417, + "step": 35225 + }, + { + "epoch": 0.6086881393419962, + "grad_norm": 1.0813465157907656, + "learning_rate": 7.012200085170653e-06, + "loss": 0.4425, + "step": 35226 + }, + { + "epoch": 0.6087054188554051, + "grad_norm": 0.7398672624159646, + "learning_rate": 7.011666003977442e-06, + "loss": 0.5071, + "step": 35227 + }, + { + "epoch": 0.608722698368814, + "grad_norm": 1.0287875160768944, + "learning_rate": 7.011131932143922e-06, + "loss": 0.5032, + "step": 35228 + }, + { + "epoch": 0.6087399778822228, + "grad_norm": 1.487529190903268, + "learning_rate": 7.010597869671758e-06, + "loss": 0.3876, + "step": 35229 + }, + { + "epoch": 0.6087572573956317, + "grad_norm": 1.353123056872012, + "learning_rate": 7.010063816562636e-06, + "loss": 0.3148, + "step": 35230 + }, + { + "epoch": 0.6087745369090406, + "grad_norm": 1.030168802743523, + "learning_rate": 7.009529772818216e-06, + "loss": 0.3425, + "step": 35231 + }, + { + "epoch": 0.6087918164224495, + "grad_norm": 2.311712864326087, + "learning_rate": 7.008995738440178e-06, + "loss": 0.4933, + "step": 35232 + }, + { + "epoch": 0.6088090959358584, + "grad_norm": 1.026664499552654, + "learning_rate": 7.0084617134301905e-06, + "loss": 0.3502, + "step": 35233 + }, + { + "epoch": 0.6088263754492673, + "grad_norm": 1.3571608665591979, + "learning_rate": 7.007927697789934e-06, + "loss": 0.3132, + "step": 35234 + }, + { + "epoch": 0.6088436549626762, + "grad_norm": 1.1228268541245685, + "learning_rate": 7.007393691521067e-06, + "loss": 0.3525, + "step": 35235 + }, + { + "epoch": 0.6088609344760851, + "grad_norm": 1.0537167681645918, + "learning_rate": 7.0068596946252775e-06, + "loss": 0.3073, + "step": 35236 + }, + { + "epoch": 0.608878213989494, + "grad_norm": 1.6349930886635347, + "learning_rate": 7.006325707104229e-06, + "loss": 0.2976, + "step": 35237 + }, + { + "epoch": 0.608895493502903, + "grad_norm": 1.17292323310473, + "learning_rate": 7.005791728959595e-06, + "loss": 0.4743, + "step": 35238 + }, + { + "epoch": 0.6089127730163119, + "grad_norm": 1.357137060526065, + "learning_rate": 7.00525776019305e-06, + "loss": 0.358, + "step": 35239 + }, + { + "epoch": 0.6089300525297208, + "grad_norm": 1.025478654445003, + "learning_rate": 7.0047238008062635e-06, + "loss": 0.4024, + "step": 35240 + }, + { + "epoch": 0.6089473320431297, + "grad_norm": 0.6700927364885025, + "learning_rate": 7.004189850800911e-06, + "loss": 0.8744, + "step": 35241 + }, + { + "epoch": 0.6089646115565386, + "grad_norm": 1.3280600377245044, + "learning_rate": 7.003655910178662e-06, + "loss": 0.4468, + "step": 35242 + }, + { + "epoch": 0.6089818910699475, + "grad_norm": 0.8042051079468142, + "learning_rate": 7.003121978941192e-06, + "loss": 0.4248, + "step": 35243 + }, + { + "epoch": 0.6089991705833564, + "grad_norm": 1.2986416475466769, + "learning_rate": 7.002588057090169e-06, + "loss": 0.6007, + "step": 35244 + }, + { + "epoch": 0.6090164500967653, + "grad_norm": 0.8305215182810366, + "learning_rate": 7.002054144627273e-06, + "loss": 0.2605, + "step": 35245 + }, + { + "epoch": 0.6090337296101742, + "grad_norm": 1.1910077376311803, + "learning_rate": 7.001520241554167e-06, + "loss": 0.3846, + "step": 35246 + }, + { + "epoch": 0.6090510091235831, + "grad_norm": 0.9966790261345361, + "learning_rate": 7.00098634787253e-06, + "loss": 0.5111, + "step": 35247 + }, + { + "epoch": 0.609068288636992, + "grad_norm": 1.1910958723360094, + "learning_rate": 7.000452463584033e-06, + "loss": 0.3158, + "step": 35248 + }, + { + "epoch": 0.6090855681504009, + "grad_norm": 0.832685599581904, + "learning_rate": 6.999918588690341e-06, + "loss": 0.4308, + "step": 35249 + }, + { + "epoch": 0.6091028476638097, + "grad_norm": 1.6844223087999701, + "learning_rate": 6.999384723193138e-06, + "loss": 0.4553, + "step": 35250 + }, + { + "epoch": 0.6091201271772186, + "grad_norm": 1.0360131041726002, + "learning_rate": 6.998850867094086e-06, + "loss": 0.2944, + "step": 35251 + }, + { + "epoch": 0.6091374066906275, + "grad_norm": 1.2148058519605414, + "learning_rate": 6.998317020394862e-06, + "loss": 0.4158, + "step": 35252 + }, + { + "epoch": 0.6091546862040365, + "grad_norm": 0.6751368442356604, + "learning_rate": 6.9977831830971376e-06, + "loss": 0.2772, + "step": 35253 + }, + { + "epoch": 0.6091719657174454, + "grad_norm": 0.962063432829473, + "learning_rate": 6.997249355202587e-06, + "loss": 0.2955, + "step": 35254 + }, + { + "epoch": 0.6091892452308543, + "grad_norm": 0.5684191417609573, + "learning_rate": 6.996715536712875e-06, + "loss": 0.59, + "step": 35255 + }, + { + "epoch": 0.6092065247442632, + "grad_norm": 1.0731909709930414, + "learning_rate": 6.996181727629683e-06, + "loss": 0.4939, + "step": 35256 + }, + { + "epoch": 0.6092238042576721, + "grad_norm": 1.0871442997034029, + "learning_rate": 6.995647927954674e-06, + "loss": 0.4447, + "step": 35257 + }, + { + "epoch": 0.609241083771081, + "grad_norm": 1.72681918419635, + "learning_rate": 6.995114137689525e-06, + "loss": 0.4268, + "step": 35258 + }, + { + "epoch": 0.6092583632844899, + "grad_norm": 0.9980363596520194, + "learning_rate": 6.994580356835912e-06, + "loss": 0.4069, + "step": 35259 + }, + { + "epoch": 0.6092756427978988, + "grad_norm": 1.0576792689766852, + "learning_rate": 6.994046585395495e-06, + "loss": 0.2572, + "step": 35260 + }, + { + "epoch": 0.6092929223113077, + "grad_norm": 1.0025025202385054, + "learning_rate": 6.993512823369955e-06, + "loss": 0.4995, + "step": 35261 + }, + { + "epoch": 0.6093102018247166, + "grad_norm": 1.191850903777076, + "learning_rate": 6.9929790707609595e-06, + "loss": 0.4828, + "step": 35262 + }, + { + "epoch": 0.6093274813381255, + "grad_norm": 1.1762009305935643, + "learning_rate": 6.992445327570184e-06, + "loss": 0.2562, + "step": 35263 + }, + { + "epoch": 0.6093447608515344, + "grad_norm": 1.1670942955532895, + "learning_rate": 6.991911593799297e-06, + "loss": 0.4999, + "step": 35264 + }, + { + "epoch": 0.6093620403649433, + "grad_norm": 1.279710728071177, + "learning_rate": 6.991377869449976e-06, + "loss": 0.3586, + "step": 35265 + }, + { + "epoch": 0.6093793198783523, + "grad_norm": 0.8423862161954867, + "learning_rate": 6.990844154523881e-06, + "loss": 0.3485, + "step": 35266 + }, + { + "epoch": 0.6093965993917612, + "grad_norm": 0.9568571534228622, + "learning_rate": 6.990310449022697e-06, + "loss": 0.4676, + "step": 35267 + }, + { + "epoch": 0.6094138789051701, + "grad_norm": 2.0547275025438703, + "learning_rate": 6.989776752948089e-06, + "loss": 0.2225, + "step": 35268 + }, + { + "epoch": 0.609431158418579, + "grad_norm": 0.9711581213574102, + "learning_rate": 6.989243066301723e-06, + "loss": 0.3945, + "step": 35269 + }, + { + "epoch": 0.6094484379319879, + "grad_norm": 1.5687657860651076, + "learning_rate": 6.988709389085283e-06, + "loss": 0.4799, + "step": 35270 + }, + { + "epoch": 0.6094657174453968, + "grad_norm": 1.0595293968962027, + "learning_rate": 6.98817572130043e-06, + "loss": 0.341, + "step": 35271 + }, + { + "epoch": 0.6094829969588056, + "grad_norm": 1.3504225282487445, + "learning_rate": 6.987642062948841e-06, + "loss": 0.3876, + "step": 35272 + }, + { + "epoch": 0.6095002764722145, + "grad_norm": 1.317018252039442, + "learning_rate": 6.9871084140321845e-06, + "loss": 0.3325, + "step": 35273 + }, + { + "epoch": 0.6095175559856234, + "grad_norm": 1.07878031820772, + "learning_rate": 6.986574774552137e-06, + "loss": 0.3464, + "step": 35274 + }, + { + "epoch": 0.6095348354990323, + "grad_norm": 1.9549684749741643, + "learning_rate": 6.986041144510362e-06, + "loss": 0.3095, + "step": 35275 + }, + { + "epoch": 0.6095521150124412, + "grad_norm": 0.9580697203254478, + "learning_rate": 6.985507523908539e-06, + "loss": 0.4663, + "step": 35276 + }, + { + "epoch": 0.6095693945258501, + "grad_norm": 1.1881503737289882, + "learning_rate": 6.984973912748333e-06, + "loss": 0.3448, + "step": 35277 + }, + { + "epoch": 0.609586674039259, + "grad_norm": 1.348745677627263, + "learning_rate": 6.98444031103142e-06, + "loss": 0.5346, + "step": 35278 + }, + { + "epoch": 0.6096039535526679, + "grad_norm": 0.8229738149499224, + "learning_rate": 6.98390671875947e-06, + "loss": 0.3984, + "step": 35279 + }, + { + "epoch": 0.6096212330660769, + "grad_norm": 1.3581233761397848, + "learning_rate": 6.98337313593415e-06, + "loss": 0.2857, + "step": 35280 + }, + { + "epoch": 0.6096385125794858, + "grad_norm": 0.7433688511468195, + "learning_rate": 6.9828395625571355e-06, + "loss": 0.4101, + "step": 35281 + }, + { + "epoch": 0.6096557920928947, + "grad_norm": 0.7912476006826636, + "learning_rate": 6.982305998630096e-06, + "loss": 0.2794, + "step": 35282 + }, + { + "epoch": 0.6096730716063036, + "grad_norm": 1.1222027373287817, + "learning_rate": 6.981772444154705e-06, + "loss": 0.5931, + "step": 35283 + }, + { + "epoch": 0.6096903511197125, + "grad_norm": 1.2011355219988415, + "learning_rate": 6.98123889913263e-06, + "loss": 0.2356, + "step": 35284 + }, + { + "epoch": 0.6097076306331214, + "grad_norm": 1.1133548257466264, + "learning_rate": 6.980705363565549e-06, + "loss": 0.4987, + "step": 35285 + }, + { + "epoch": 0.6097249101465303, + "grad_norm": 1.0411827841269037, + "learning_rate": 6.980171837455123e-06, + "loss": 0.2947, + "step": 35286 + }, + { + "epoch": 0.6097421896599392, + "grad_norm": 1.277537555677701, + "learning_rate": 6.979638320803031e-06, + "loss": 0.3768, + "step": 35287 + }, + { + "epoch": 0.6097594691733481, + "grad_norm": 1.4287770649736196, + "learning_rate": 6.97910481361094e-06, + "loss": 0.5145, + "step": 35288 + }, + { + "epoch": 0.609776748686757, + "grad_norm": 1.3420397180404933, + "learning_rate": 6.978571315880523e-06, + "loss": 0.6507, + "step": 35289 + }, + { + "epoch": 0.6097940282001659, + "grad_norm": 1.0533318633033113, + "learning_rate": 6.978037827613453e-06, + "loss": 0.4687, + "step": 35290 + }, + { + "epoch": 0.6098113077135748, + "grad_norm": 1.2052168378048698, + "learning_rate": 6.977504348811394e-06, + "loss": 0.4654, + "step": 35291 + }, + { + "epoch": 0.6098285872269837, + "grad_norm": 1.43700601753643, + "learning_rate": 6.9769708794760226e-06, + "loss": 0.3366, + "step": 35292 + }, + { + "epoch": 0.6098458667403925, + "grad_norm": 1.0484179545338372, + "learning_rate": 6.976437419609007e-06, + "loss": 0.4658, + "step": 35293 + }, + { + "epoch": 0.6098631462538014, + "grad_norm": 1.214290586976228, + "learning_rate": 6.975903969212023e-06, + "loss": 0.5028, + "step": 35294 + }, + { + "epoch": 0.6098804257672104, + "grad_norm": 0.9124369560619373, + "learning_rate": 6.975370528286733e-06, + "loss": 0.362, + "step": 35295 + }, + { + "epoch": 0.6098977052806193, + "grad_norm": 1.0275621879033365, + "learning_rate": 6.974837096834814e-06, + "loss": 0.6305, + "step": 35296 + }, + { + "epoch": 0.6099149847940282, + "grad_norm": 0.9657269597493107, + "learning_rate": 6.974303674857934e-06, + "loss": 0.2485, + "step": 35297 + }, + { + "epoch": 0.6099322643074371, + "grad_norm": 0.9524947460996984, + "learning_rate": 6.973770262357767e-06, + "loss": 0.4167, + "step": 35298 + }, + { + "epoch": 0.609949543820846, + "grad_norm": 1.9796233877130738, + "learning_rate": 6.973236859335984e-06, + "loss": 0.3607, + "step": 35299 + }, + { + "epoch": 0.6099668233342549, + "grad_norm": 0.9045640632502969, + "learning_rate": 6.972703465794248e-06, + "loss": 0.4897, + "step": 35300 + }, + { + "epoch": 0.6099841028476638, + "grad_norm": 1.850820967907931, + "learning_rate": 6.972170081734237e-06, + "loss": 0.3864, + "step": 35301 + }, + { + "epoch": 0.6100013823610727, + "grad_norm": 1.299486392752385, + "learning_rate": 6.971636707157618e-06, + "loss": 0.3171, + "step": 35302 + }, + { + "epoch": 0.6100186618744816, + "grad_norm": 1.8537888127433442, + "learning_rate": 6.971103342066065e-06, + "loss": 0.5091, + "step": 35303 + }, + { + "epoch": 0.6100359413878905, + "grad_norm": 1.1950287804687088, + "learning_rate": 6.970569986461244e-06, + "loss": 0.5269, + "step": 35304 + }, + { + "epoch": 0.6100532209012994, + "grad_norm": 1.1637707012066658, + "learning_rate": 6.9700366403448315e-06, + "loss": 0.5498, + "step": 35305 + }, + { + "epoch": 0.6100705004147083, + "grad_norm": 1.182428924103276, + "learning_rate": 6.969503303718491e-06, + "loss": 0.6127, + "step": 35306 + }, + { + "epoch": 0.6100877799281172, + "grad_norm": 1.183176293166487, + "learning_rate": 6.968969976583899e-06, + "loss": 0.4457, + "step": 35307 + }, + { + "epoch": 0.6101050594415262, + "grad_norm": 0.8280535710822616, + "learning_rate": 6.968436658942721e-06, + "loss": 0.2262, + "step": 35308 + }, + { + "epoch": 0.6101223389549351, + "grad_norm": 0.8119207456941548, + "learning_rate": 6.967903350796632e-06, + "loss": 0.3403, + "step": 35309 + }, + { + "epoch": 0.610139618468344, + "grad_norm": 0.9407227644364784, + "learning_rate": 6.967370052147302e-06, + "loss": 0.4407, + "step": 35310 + }, + { + "epoch": 0.6101568979817529, + "grad_norm": 1.3527976249555436, + "learning_rate": 6.9668367629963954e-06, + "loss": 0.3673, + "step": 35311 + }, + { + "epoch": 0.6101741774951618, + "grad_norm": 1.173411354158477, + "learning_rate": 6.966303483345589e-06, + "loss": 0.2454, + "step": 35312 + }, + { + "epoch": 0.6101914570085707, + "grad_norm": 1.9403448406778805, + "learning_rate": 6.965770213196548e-06, + "loss": 0.4064, + "step": 35313 + }, + { + "epoch": 0.6102087365219795, + "grad_norm": 1.3934730851267052, + "learning_rate": 6.965236952550949e-06, + "loss": 0.4245, + "step": 35314 + }, + { + "epoch": 0.6102260160353884, + "grad_norm": 1.1663750119700698, + "learning_rate": 6.964703701410455e-06, + "loss": 0.5292, + "step": 35315 + }, + { + "epoch": 0.6102432955487973, + "grad_norm": 1.1418838348120308, + "learning_rate": 6.964170459776743e-06, + "loss": 0.6415, + "step": 35316 + }, + { + "epoch": 0.6102605750622062, + "grad_norm": 0.842609186630577, + "learning_rate": 6.963637227651475e-06, + "loss": 0.4048, + "step": 35317 + }, + { + "epoch": 0.6102778545756151, + "grad_norm": 1.3200255118140678, + "learning_rate": 6.9631040050363296e-06, + "loss": 0.371, + "step": 35318 + }, + { + "epoch": 0.610295134089024, + "grad_norm": 1.389051197943878, + "learning_rate": 6.962570791932972e-06, + "loss": 0.401, + "step": 35319 + }, + { + "epoch": 0.6103124136024329, + "grad_norm": 0.8631480973293371, + "learning_rate": 6.9620375883430754e-06, + "loss": 0.4069, + "step": 35320 + }, + { + "epoch": 0.6103296931158418, + "grad_norm": 1.2045079962174208, + "learning_rate": 6.961504394268308e-06, + "loss": 0.612, + "step": 35321 + }, + { + "epoch": 0.6103469726292508, + "grad_norm": 1.1560612182099759, + "learning_rate": 6.960971209710336e-06, + "loss": 0.2461, + "step": 35322 + }, + { + "epoch": 0.6103642521426597, + "grad_norm": 1.639532276909935, + "learning_rate": 6.9604380346708365e-06, + "loss": 0.4633, + "step": 35323 + }, + { + "epoch": 0.6103815316560686, + "grad_norm": 0.7536253836930291, + "learning_rate": 6.959904869151475e-06, + "loss": 0.4403, + "step": 35324 + }, + { + "epoch": 0.6103988111694775, + "grad_norm": 0.7650414971949585, + "learning_rate": 6.959371713153924e-06, + "loss": 0.2707, + "step": 35325 + }, + { + "epoch": 0.6104160906828864, + "grad_norm": 1.5142816252073232, + "learning_rate": 6.958838566679849e-06, + "loss": 0.4056, + "step": 35326 + }, + { + "epoch": 0.6104333701962953, + "grad_norm": 1.1992272128184895, + "learning_rate": 6.958305429730925e-06, + "loss": 0.6414, + "step": 35327 + }, + { + "epoch": 0.6104506497097042, + "grad_norm": 1.5882360569620673, + "learning_rate": 6.9577723023088174e-06, + "loss": 0.3883, + "step": 35328 + }, + { + "epoch": 0.6104679292231131, + "grad_norm": 1.6955484710524629, + "learning_rate": 6.9572391844152e-06, + "loss": 0.48, + "step": 35329 + }, + { + "epoch": 0.610485208736522, + "grad_norm": 1.3321033780549627, + "learning_rate": 6.956706076051742e-06, + "loss": 0.3537, + "step": 35330 + }, + { + "epoch": 0.6105024882499309, + "grad_norm": 1.338404713722097, + "learning_rate": 6.956172977220109e-06, + "loss": 0.2295, + "step": 35331 + }, + { + "epoch": 0.6105197677633398, + "grad_norm": 1.1821772308815863, + "learning_rate": 6.955639887921975e-06, + "loss": 0.3679, + "step": 35332 + }, + { + "epoch": 0.6105370472767487, + "grad_norm": 1.0981931050970124, + "learning_rate": 6.9551068081590065e-06, + "loss": 0.2405, + "step": 35333 + }, + { + "epoch": 0.6105543267901576, + "grad_norm": 0.8030855554149577, + "learning_rate": 6.954573737932879e-06, + "loss": 0.5913, + "step": 35334 + }, + { + "epoch": 0.6105716063035664, + "grad_norm": 1.4872133715132143, + "learning_rate": 6.954040677245254e-06, + "loss": 0.4478, + "step": 35335 + }, + { + "epoch": 0.6105888858169753, + "grad_norm": 1.5270604407553658, + "learning_rate": 6.953507626097806e-06, + "loss": 0.3894, + "step": 35336 + }, + { + "epoch": 0.6106061653303843, + "grad_norm": 1.0267736412077793, + "learning_rate": 6.952974584492203e-06, + "loss": 0.459, + "step": 35337 + }, + { + "epoch": 0.6106234448437932, + "grad_norm": 1.2633576066280756, + "learning_rate": 6.952441552430117e-06, + "loss": 0.482, + "step": 35338 + }, + { + "epoch": 0.6106407243572021, + "grad_norm": 1.1215376113360755, + "learning_rate": 6.9519085299132135e-06, + "loss": 0.3048, + "step": 35339 + }, + { + "epoch": 0.610658003870611, + "grad_norm": 1.1814347456316825, + "learning_rate": 6.951375516943168e-06, + "loss": 0.3492, + "step": 35340 + }, + { + "epoch": 0.6106752833840199, + "grad_norm": 0.6801831838822204, + "learning_rate": 6.950842513521644e-06, + "loss": 0.4183, + "step": 35341 + }, + { + "epoch": 0.6106925628974288, + "grad_norm": 1.173662354720408, + "learning_rate": 6.950309519650311e-06, + "loss": 0.3298, + "step": 35342 + }, + { + "epoch": 0.6107098424108377, + "grad_norm": 0.7632464855015727, + "learning_rate": 6.949776535330842e-06, + "loss": 0.2366, + "step": 35343 + }, + { + "epoch": 0.6107271219242466, + "grad_norm": 1.6296942231601022, + "learning_rate": 6.949243560564903e-06, + "loss": 0.4911, + "step": 35344 + }, + { + "epoch": 0.6107444014376555, + "grad_norm": 0.996466069808647, + "learning_rate": 6.948710595354169e-06, + "loss": 0.4496, + "step": 35345 + }, + { + "epoch": 0.6107616809510644, + "grad_norm": 0.8733228024963539, + "learning_rate": 6.948177639700302e-06, + "loss": 0.2699, + "step": 35346 + }, + { + "epoch": 0.6107789604644733, + "grad_norm": 1.2074310996017432, + "learning_rate": 6.947644693604976e-06, + "loss": 0.4561, + "step": 35347 + }, + { + "epoch": 0.6107962399778822, + "grad_norm": 1.8215465749884225, + "learning_rate": 6.947111757069857e-06, + "loss": 0.3941, + "step": 35348 + }, + { + "epoch": 0.6108135194912911, + "grad_norm": 1.5187606296176719, + "learning_rate": 6.946578830096618e-06, + "loss": 0.3988, + "step": 35349 + }, + { + "epoch": 0.6108307990047, + "grad_norm": 1.4963725790593958, + "learning_rate": 6.946045912686923e-06, + "loss": 0.4538, + "step": 35350 + }, + { + "epoch": 0.610848078518109, + "grad_norm": 1.1691010359756675, + "learning_rate": 6.9455130048424505e-06, + "loss": 0.3204, + "step": 35351 + }, + { + "epoch": 0.6108653580315179, + "grad_norm": 0.7961481289005973, + "learning_rate": 6.944980106564859e-06, + "loss": 0.3328, + "step": 35352 + }, + { + "epoch": 0.6108826375449268, + "grad_norm": 1.1470556695951124, + "learning_rate": 6.9444472178558205e-06, + "loss": 0.6093, + "step": 35353 + }, + { + "epoch": 0.6108999170583357, + "grad_norm": 1.2860342127602364, + "learning_rate": 6.943914338717011e-06, + "loss": 0.5709, + "step": 35354 + }, + { + "epoch": 0.6109171965717446, + "grad_norm": 1.3784684698152596, + "learning_rate": 6.943381469150089e-06, + "loss": 0.2822, + "step": 35355 + }, + { + "epoch": 0.6109344760851534, + "grad_norm": 1.0626045438161205, + "learning_rate": 6.94284860915673e-06, + "loss": 0.6013, + "step": 35356 + }, + { + "epoch": 0.6109517555985623, + "grad_norm": 0.4505001823764436, + "learning_rate": 6.9423157587386e-06, + "loss": 0.4751, + "step": 35357 + }, + { + "epoch": 0.6109690351119712, + "grad_norm": 0.928626099745975, + "learning_rate": 6.941782917897373e-06, + "loss": 0.3757, + "step": 35358 + }, + { + "epoch": 0.6109863146253801, + "grad_norm": 0.9491602214501308, + "learning_rate": 6.94125008663471e-06, + "loss": 0.5138, + "step": 35359 + }, + { + "epoch": 0.611003594138789, + "grad_norm": 0.9579131627018278, + "learning_rate": 6.940717264952289e-06, + "loss": 0.3135, + "step": 35360 + }, + { + "epoch": 0.6110208736521979, + "grad_norm": 1.0828994886444738, + "learning_rate": 6.940184452851773e-06, + "loss": 0.4093, + "step": 35361 + }, + { + "epoch": 0.6110381531656068, + "grad_norm": 1.185479831661158, + "learning_rate": 6.939651650334829e-06, + "loss": 0.2819, + "step": 35362 + }, + { + "epoch": 0.6110554326790157, + "grad_norm": 0.9169936904581537, + "learning_rate": 6.9391188574031295e-06, + "loss": 0.473, + "step": 35363 + }, + { + "epoch": 0.6110727121924246, + "grad_norm": 0.8235571522372636, + "learning_rate": 6.938586074058342e-06, + "loss": 0.3338, + "step": 35364 + }, + { + "epoch": 0.6110899917058336, + "grad_norm": 0.47153330381865693, + "learning_rate": 6.9380533003021385e-06, + "loss": 0.594, + "step": 35365 + }, + { + "epoch": 0.6111072712192425, + "grad_norm": 1.574042965830092, + "learning_rate": 6.937520536136182e-06, + "loss": 0.4924, + "step": 35366 + }, + { + "epoch": 0.6111245507326514, + "grad_norm": 1.036392714845275, + "learning_rate": 6.936987781562145e-06, + "loss": 0.6213, + "step": 35367 + }, + { + "epoch": 0.6111418302460603, + "grad_norm": 1.1142213002167174, + "learning_rate": 6.936455036581693e-06, + "loss": 0.506, + "step": 35368 + }, + { + "epoch": 0.6111591097594692, + "grad_norm": 0.9118989665592894, + "learning_rate": 6.935922301196498e-06, + "loss": 0.2886, + "step": 35369 + }, + { + "epoch": 0.6111763892728781, + "grad_norm": 1.764478220290535, + "learning_rate": 6.935389575408226e-06, + "loss": 0.5917, + "step": 35370 + }, + { + "epoch": 0.611193668786287, + "grad_norm": 0.8240065852780487, + "learning_rate": 6.934856859218552e-06, + "loss": 0.4483, + "step": 35371 + }, + { + "epoch": 0.6112109482996959, + "grad_norm": 0.8978224230650572, + "learning_rate": 6.934324152629135e-06, + "loss": 0.4553, + "step": 35372 + }, + { + "epoch": 0.6112282278131048, + "grad_norm": 1.5043327428544597, + "learning_rate": 6.933791455641647e-06, + "loss": 0.2932, + "step": 35373 + }, + { + "epoch": 0.6112455073265137, + "grad_norm": 0.7329642732282317, + "learning_rate": 6.933258768257761e-06, + "loss": 0.335, + "step": 35374 + }, + { + "epoch": 0.6112627868399226, + "grad_norm": 0.4798696283473697, + "learning_rate": 6.932726090479138e-06, + "loss": 0.4614, + "step": 35375 + }, + { + "epoch": 0.6112800663533315, + "grad_norm": 1.0933101564906462, + "learning_rate": 6.932193422307451e-06, + "loss": 0.2711, + "step": 35376 + }, + { + "epoch": 0.6112973458667403, + "grad_norm": 0.9119278464910627, + "learning_rate": 6.931660763744366e-06, + "loss": 0.3559, + "step": 35377 + }, + { + "epoch": 0.6113146253801492, + "grad_norm": 0.746417445573842, + "learning_rate": 6.9311281147915545e-06, + "loss": 0.4234, + "step": 35378 + }, + { + "epoch": 0.6113319048935582, + "grad_norm": 1.0136470640905237, + "learning_rate": 6.930595475450681e-06, + "loss": 0.2774, + "step": 35379 + }, + { + "epoch": 0.6113491844069671, + "grad_norm": 1.0225274097076482, + "learning_rate": 6.93006284572342e-06, + "loss": 0.3783, + "step": 35380 + }, + { + "epoch": 0.611366463920376, + "grad_norm": 1.1782480873736332, + "learning_rate": 6.9295302256114334e-06, + "loss": 0.4394, + "step": 35381 + }, + { + "epoch": 0.6113837434337849, + "grad_norm": 1.2480816587821533, + "learning_rate": 6.9289976151163895e-06, + "loss": 0.4243, + "step": 35382 + }, + { + "epoch": 0.6114010229471938, + "grad_norm": 1.8316824302240988, + "learning_rate": 6.92846501423996e-06, + "loss": 0.3473, + "step": 35383 + }, + { + "epoch": 0.6114183024606027, + "grad_norm": 1.129003011970606, + "learning_rate": 6.9279324229838096e-06, + "loss": 0.4049, + "step": 35384 + }, + { + "epoch": 0.6114355819740116, + "grad_norm": 0.7282599165494738, + "learning_rate": 6.927399841349613e-06, + "loss": 0.4136, + "step": 35385 + }, + { + "epoch": 0.6114528614874205, + "grad_norm": 1.6089837702525187, + "learning_rate": 6.9268672693390295e-06, + "loss": 0.4936, + "step": 35386 + }, + { + "epoch": 0.6114701410008294, + "grad_norm": 1.4543226816080874, + "learning_rate": 6.926334706953732e-06, + "loss": 0.5994, + "step": 35387 + }, + { + "epoch": 0.6114874205142383, + "grad_norm": 1.0919305534326436, + "learning_rate": 6.925802154195387e-06, + "loss": 0.2713, + "step": 35388 + }, + { + "epoch": 0.6115047000276472, + "grad_norm": 0.860913577625611, + "learning_rate": 6.925269611065666e-06, + "loss": 0.2996, + "step": 35389 + }, + { + "epoch": 0.6115219795410561, + "grad_norm": 1.5249463043560527, + "learning_rate": 6.92473707756623e-06, + "loss": 0.3392, + "step": 35390 + }, + { + "epoch": 0.611539259054465, + "grad_norm": 0.791992463482253, + "learning_rate": 6.924204553698757e-06, + "loss": 0.6904, + "step": 35391 + }, + { + "epoch": 0.611556538567874, + "grad_norm": 1.4211132771208537, + "learning_rate": 6.923672039464907e-06, + "loss": 0.3797, + "step": 35392 + }, + { + "epoch": 0.6115738180812829, + "grad_norm": 2.0238226823256715, + "learning_rate": 6.923139534866347e-06, + "loss": 0.4543, + "step": 35393 + }, + { + "epoch": 0.6115910975946918, + "grad_norm": 1.1252439722111442, + "learning_rate": 6.9226070399047526e-06, + "loss": 0.4216, + "step": 35394 + }, + { + "epoch": 0.6116083771081007, + "grad_norm": 1.0101810016700332, + "learning_rate": 6.922074554581783e-06, + "loss": 0.4164, + "step": 35395 + }, + { + "epoch": 0.6116256566215096, + "grad_norm": 0.9314770570222796, + "learning_rate": 6.92154207889911e-06, + "loss": 0.3937, + "step": 35396 + }, + { + "epoch": 0.6116429361349185, + "grad_norm": 1.5777669890355253, + "learning_rate": 6.921009612858402e-06, + "loss": 0.4676, + "step": 35397 + }, + { + "epoch": 0.6116602156483273, + "grad_norm": 0.7881574723995247, + "learning_rate": 6.920477156461325e-06, + "loss": 0.3515, + "step": 35398 + }, + { + "epoch": 0.6116774951617362, + "grad_norm": 1.018964082310466, + "learning_rate": 6.9199447097095475e-06, + "loss": 0.3042, + "step": 35399 + }, + { + "epoch": 0.6116947746751451, + "grad_norm": 1.6232017413868367, + "learning_rate": 6.919412272604741e-06, + "loss": 0.3605, + "step": 35400 + }, + { + "epoch": 0.611712054188554, + "grad_norm": 1.1241697623268865, + "learning_rate": 6.918879845148565e-06, + "loss": 0.3978, + "step": 35401 + }, + { + "epoch": 0.6117293337019629, + "grad_norm": 1.4127458173375373, + "learning_rate": 6.918347427342693e-06, + "loss": 0.4974, + "step": 35402 + }, + { + "epoch": 0.6117466132153718, + "grad_norm": 1.5203713354843933, + "learning_rate": 6.917815019188791e-06, + "loss": 0.2626, + "step": 35403 + }, + { + "epoch": 0.6117638927287807, + "grad_norm": 1.0645384602425358, + "learning_rate": 6.917282620688526e-06, + "loss": 0.4145, + "step": 35404 + }, + { + "epoch": 0.6117811722421896, + "grad_norm": 1.6306286191494221, + "learning_rate": 6.916750231843569e-06, + "loss": 0.3463, + "step": 35405 + }, + { + "epoch": 0.6117984517555985, + "grad_norm": 1.0954168390152128, + "learning_rate": 6.916217852655582e-06, + "loss": 0.3973, + "step": 35406 + }, + { + "epoch": 0.6118157312690075, + "grad_norm": 1.0052164575874767, + "learning_rate": 6.9156854831262355e-06, + "loss": 0.3769, + "step": 35407 + }, + { + "epoch": 0.6118330107824164, + "grad_norm": 1.0164010674260444, + "learning_rate": 6.9151531232571945e-06, + "loss": 0.4164, + "step": 35408 + }, + { + "epoch": 0.6118502902958253, + "grad_norm": 1.0859336943022324, + "learning_rate": 6.914620773050131e-06, + "loss": 0.4188, + "step": 35409 + }, + { + "epoch": 0.6118675698092342, + "grad_norm": 1.3090492815637365, + "learning_rate": 6.914088432506707e-06, + "loss": 0.3652, + "step": 35410 + }, + { + "epoch": 0.6118848493226431, + "grad_norm": 1.0610748845504467, + "learning_rate": 6.913556101628598e-06, + "loss": 0.5948, + "step": 35411 + }, + { + "epoch": 0.611902128836052, + "grad_norm": 1.2620953097662684, + "learning_rate": 6.913023780417463e-06, + "loss": 0.3175, + "step": 35412 + }, + { + "epoch": 0.6119194083494609, + "grad_norm": 1.03469287003114, + "learning_rate": 6.912491468874969e-06, + "loss": 0.4633, + "step": 35413 + }, + { + "epoch": 0.6119366878628698, + "grad_norm": 1.3094774894790449, + "learning_rate": 6.911959167002793e-06, + "loss": 0.261, + "step": 35414 + }, + { + "epoch": 0.6119539673762787, + "grad_norm": 1.210357964166836, + "learning_rate": 6.9114268748025894e-06, + "loss": 0.373, + "step": 35415 + }, + { + "epoch": 0.6119712468896876, + "grad_norm": 1.0784950949805179, + "learning_rate": 6.910894592276034e-06, + "loss": 0.5492, + "step": 35416 + }, + { + "epoch": 0.6119885264030965, + "grad_norm": 0.9873446796737145, + "learning_rate": 6.91036231942479e-06, + "loss": 0.5051, + "step": 35417 + }, + { + "epoch": 0.6120058059165054, + "grad_norm": 1.089337432121999, + "learning_rate": 6.909830056250527e-06, + "loss": 0.5647, + "step": 35418 + }, + { + "epoch": 0.6120230854299143, + "grad_norm": 1.019178460330326, + "learning_rate": 6.90929780275491e-06, + "loss": 0.4215, + "step": 35419 + }, + { + "epoch": 0.6120403649433231, + "grad_norm": 0.703455813984072, + "learning_rate": 6.908765558939611e-06, + "loss": 0.2944, + "step": 35420 + }, + { + "epoch": 0.612057644456732, + "grad_norm": 1.4330202221454296, + "learning_rate": 6.9082333248062885e-06, + "loss": 0.4623, + "step": 35421 + }, + { + "epoch": 0.612074923970141, + "grad_norm": 0.8464968961081754, + "learning_rate": 6.907701100356616e-06, + "loss": 0.3311, + "step": 35422 + }, + { + "epoch": 0.6120922034835499, + "grad_norm": 0.9547326399102096, + "learning_rate": 6.90716888559226e-06, + "loss": 0.334, + "step": 35423 + }, + { + "epoch": 0.6121094829969588, + "grad_norm": 0.5955747653570714, + "learning_rate": 6.906636680514884e-06, + "loss": 0.2646, + "step": 35424 + }, + { + "epoch": 0.6121267625103677, + "grad_norm": 1.0805033136885702, + "learning_rate": 6.90610448512616e-06, + "loss": 0.3282, + "step": 35425 + }, + { + "epoch": 0.6121440420237766, + "grad_norm": 1.4115864230365596, + "learning_rate": 6.90557229942775e-06, + "loss": 0.37, + "step": 35426 + }, + { + "epoch": 0.6121613215371855, + "grad_norm": 1.2237823082247696, + "learning_rate": 6.905040123421323e-06, + "loss": 0.2962, + "step": 35427 + }, + { + "epoch": 0.6121786010505944, + "grad_norm": 0.7557086436176497, + "learning_rate": 6.904507957108543e-06, + "loss": 0.4262, + "step": 35428 + }, + { + "epoch": 0.6121958805640033, + "grad_norm": 1.034166023625995, + "learning_rate": 6.9039758004910815e-06, + "loss": 0.3845, + "step": 35429 + }, + { + "epoch": 0.6122131600774122, + "grad_norm": 1.4548333990752111, + "learning_rate": 6.903443653570602e-06, + "loss": 0.2761, + "step": 35430 + }, + { + "epoch": 0.6122304395908211, + "grad_norm": 1.283363616538022, + "learning_rate": 6.902911516348776e-06, + "loss": 0.5569, + "step": 35431 + }, + { + "epoch": 0.61224771910423, + "grad_norm": 1.293884979847113, + "learning_rate": 6.902379388827263e-06, + "loss": 0.3844, + "step": 35432 + }, + { + "epoch": 0.612264998617639, + "grad_norm": 0.9717176853007677, + "learning_rate": 6.901847271007733e-06, + "loss": 0.2691, + "step": 35433 + }, + { + "epoch": 0.6122822781310479, + "grad_norm": 1.792437454918847, + "learning_rate": 6.901315162891857e-06, + "loss": 0.5916, + "step": 35434 + }, + { + "epoch": 0.6122995576444568, + "grad_norm": 1.4931422237757304, + "learning_rate": 6.900783064481293e-06, + "loss": 0.5526, + "step": 35435 + }, + { + "epoch": 0.6123168371578657, + "grad_norm": 1.0268686814011734, + "learning_rate": 6.900250975777714e-06, + "loss": 0.3754, + "step": 35436 + }, + { + "epoch": 0.6123341166712746, + "grad_norm": 1.112809349644566, + "learning_rate": 6.899718896782782e-06, + "loss": 0.4408, + "step": 35437 + }, + { + "epoch": 0.6123513961846835, + "grad_norm": 1.1039851819033142, + "learning_rate": 6.899186827498168e-06, + "loss": 0.406, + "step": 35438 + }, + { + "epoch": 0.6123686756980924, + "grad_norm": 0.8035252684074998, + "learning_rate": 6.898654767925535e-06, + "loss": 0.236, + "step": 35439 + }, + { + "epoch": 0.6123859552115013, + "grad_norm": 0.9441463375684129, + "learning_rate": 6.898122718066555e-06, + "loss": 0.5054, + "step": 35440 + }, + { + "epoch": 0.6124032347249101, + "grad_norm": 1.6683382894596819, + "learning_rate": 6.897590677922886e-06, + "loss": 0.5515, + "step": 35441 + }, + { + "epoch": 0.612420514238319, + "grad_norm": 1.2480364984073893, + "learning_rate": 6.897058647496201e-06, + "loss": 0.4039, + "step": 35442 + }, + { + "epoch": 0.6124377937517279, + "grad_norm": 0.8594096358046203, + "learning_rate": 6.896526626788165e-06, + "loss": 0.2608, + "step": 35443 + }, + { + "epoch": 0.6124550732651368, + "grad_norm": 1.1856577075343817, + "learning_rate": 6.8959946158004395e-06, + "loss": 0.3073, + "step": 35444 + }, + { + "epoch": 0.6124723527785457, + "grad_norm": 1.0207491292517032, + "learning_rate": 6.8954626145347e-06, + "loss": 0.3254, + "step": 35445 + }, + { + "epoch": 0.6124896322919546, + "grad_norm": 1.0166393954145323, + "learning_rate": 6.894930622992604e-06, + "loss": 0.3265, + "step": 35446 + }, + { + "epoch": 0.6125069118053635, + "grad_norm": 1.374722520463741, + "learning_rate": 6.894398641175821e-06, + "loss": 0.4069, + "step": 35447 + }, + { + "epoch": 0.6125241913187724, + "grad_norm": 1.228884221141243, + "learning_rate": 6.893866669086018e-06, + "loss": 0.3731, + "step": 35448 + }, + { + "epoch": 0.6125414708321814, + "grad_norm": 1.240969269782526, + "learning_rate": 6.893334706724861e-06, + "loss": 0.5434, + "step": 35449 + }, + { + "epoch": 0.6125587503455903, + "grad_norm": 0.5090634397852054, + "learning_rate": 6.892802754094015e-06, + "loss": 0.6483, + "step": 35450 + }, + { + "epoch": 0.6125760298589992, + "grad_norm": 1.5483682684368667, + "learning_rate": 6.89227081119515e-06, + "loss": 0.2987, + "step": 35451 + }, + { + "epoch": 0.6125933093724081, + "grad_norm": 1.142839679697535, + "learning_rate": 6.891738878029925e-06, + "loss": 0.4677, + "step": 35452 + }, + { + "epoch": 0.612610588885817, + "grad_norm": 0.4743542330269775, + "learning_rate": 6.891206954600013e-06, + "loss": 0.6311, + "step": 35453 + }, + { + "epoch": 0.6126278683992259, + "grad_norm": 1.1028226449952243, + "learning_rate": 6.890675040907079e-06, + "loss": 0.416, + "step": 35454 + }, + { + "epoch": 0.6126451479126348, + "grad_norm": 1.0534039625149918, + "learning_rate": 6.8901431369527826e-06, + "loss": 0.4602, + "step": 35455 + }, + { + "epoch": 0.6126624274260437, + "grad_norm": 1.4220579689980941, + "learning_rate": 6.8896112427387965e-06, + "loss": 0.2717, + "step": 35456 + }, + { + "epoch": 0.6126797069394526, + "grad_norm": 0.7046960348924949, + "learning_rate": 6.889079358266781e-06, + "loss": 0.3008, + "step": 35457 + }, + { + "epoch": 0.6126969864528615, + "grad_norm": 1.424755675879116, + "learning_rate": 6.8885474835384095e-06, + "loss": 0.3686, + "step": 35458 + }, + { + "epoch": 0.6127142659662704, + "grad_norm": 1.0393956400728286, + "learning_rate": 6.888015618555341e-06, + "loss": 0.4754, + "step": 35459 + }, + { + "epoch": 0.6127315454796793, + "grad_norm": 1.686077265741946, + "learning_rate": 6.887483763319247e-06, + "loss": 0.4271, + "step": 35460 + }, + { + "epoch": 0.6127488249930882, + "grad_norm": 0.8770328342018475, + "learning_rate": 6.8869519178317875e-06, + "loss": 0.4127, + "step": 35461 + }, + { + "epoch": 0.612766104506497, + "grad_norm": 1.3494401298625796, + "learning_rate": 6.886420082094633e-06, + "loss": 0.4895, + "step": 35462 + }, + { + "epoch": 0.612783384019906, + "grad_norm": 1.1972336391212879, + "learning_rate": 6.885888256109445e-06, + "loss": 0.3484, + "step": 35463 + }, + { + "epoch": 0.6128006635333149, + "grad_norm": 0.8211680046771341, + "learning_rate": 6.885356439877894e-06, + "loss": 0.5844, + "step": 35464 + }, + { + "epoch": 0.6128179430467238, + "grad_norm": 0.9352652918390104, + "learning_rate": 6.884824633401646e-06, + "loss": 0.258, + "step": 35465 + }, + { + "epoch": 0.6128352225601327, + "grad_norm": 1.3178315168518637, + "learning_rate": 6.884292836682358e-06, + "loss": 0.4413, + "step": 35466 + }, + { + "epoch": 0.6128525020735416, + "grad_norm": 1.2523359717080067, + "learning_rate": 6.883761049721706e-06, + "loss": 0.4288, + "step": 35467 + }, + { + "epoch": 0.6128697815869505, + "grad_norm": 1.1771873089242022, + "learning_rate": 6.883229272521348e-06, + "loss": 0.316, + "step": 35468 + }, + { + "epoch": 0.6128870611003594, + "grad_norm": 1.2984188876665679, + "learning_rate": 6.882697505082953e-06, + "loss": 0.1906, + "step": 35469 + }, + { + "epoch": 0.6129043406137683, + "grad_norm": 0.8285685943845394, + "learning_rate": 6.882165747408187e-06, + "loss": 0.4608, + "step": 35470 + }, + { + "epoch": 0.6129216201271772, + "grad_norm": 1.1080731706649094, + "learning_rate": 6.881633999498718e-06, + "loss": 0.5366, + "step": 35471 + }, + { + "epoch": 0.6129388996405861, + "grad_norm": 1.4490269762403993, + "learning_rate": 6.881102261356205e-06, + "loss": 0.4031, + "step": 35472 + }, + { + "epoch": 0.612956179153995, + "grad_norm": 1.4867310595563665, + "learning_rate": 6.880570532982318e-06, + "loss": 0.2893, + "step": 35473 + }, + { + "epoch": 0.6129734586674039, + "grad_norm": 0.9677965031899822, + "learning_rate": 6.8800388143787234e-06, + "loss": 0.3043, + "step": 35474 + }, + { + "epoch": 0.6129907381808128, + "grad_norm": 0.9478286603959198, + "learning_rate": 6.8795071055470806e-06, + "loss": 0.3381, + "step": 35475 + }, + { + "epoch": 0.6130080176942218, + "grad_norm": 1.0017236553708364, + "learning_rate": 6.87897540648906e-06, + "loss": 0.2359, + "step": 35476 + }, + { + "epoch": 0.6130252972076307, + "grad_norm": 1.954194071462672, + "learning_rate": 6.878443717206325e-06, + "loss": 0.5584, + "step": 35477 + }, + { + "epoch": 0.6130425767210396, + "grad_norm": 2.07060160891734, + "learning_rate": 6.877912037700541e-06, + "loss": 0.4239, + "step": 35478 + }, + { + "epoch": 0.6130598562344485, + "grad_norm": 1.0099605186188776, + "learning_rate": 6.877380367973373e-06, + "loss": 0.4055, + "step": 35479 + }, + { + "epoch": 0.6130771357478574, + "grad_norm": 0.7808278427785745, + "learning_rate": 6.876848708026491e-06, + "loss": 0.3562, + "step": 35480 + }, + { + "epoch": 0.6130944152612663, + "grad_norm": 1.0495085988575594, + "learning_rate": 6.876317057861552e-06, + "loss": 0.3777, + "step": 35481 + }, + { + "epoch": 0.6131116947746752, + "grad_norm": 0.7104540370430651, + "learning_rate": 6.875785417480229e-06, + "loss": 0.4597, + "step": 35482 + }, + { + "epoch": 0.613128974288084, + "grad_norm": 1.4149273539280836, + "learning_rate": 6.8752537868841795e-06, + "loss": 0.4942, + "step": 35483 + }, + { + "epoch": 0.6131462538014929, + "grad_norm": 1.363754962361964, + "learning_rate": 6.8747221660750765e-06, + "loss": 0.5391, + "step": 35484 + }, + { + "epoch": 0.6131635333149018, + "grad_norm": 1.7774456771138962, + "learning_rate": 6.874190555054581e-06, + "loss": 0.4935, + "step": 35485 + }, + { + "epoch": 0.6131808128283107, + "grad_norm": 1.4066710612841296, + "learning_rate": 6.873658953824356e-06, + "loss": 0.5226, + "step": 35486 + }, + { + "epoch": 0.6131980923417196, + "grad_norm": 0.8918022346071645, + "learning_rate": 6.873127362386069e-06, + "loss": 0.3201, + "step": 35487 + }, + { + "epoch": 0.6132153718551285, + "grad_norm": 1.1803203823959476, + "learning_rate": 6.872595780741385e-06, + "loss": 0.3621, + "step": 35488 + }, + { + "epoch": 0.6132326513685374, + "grad_norm": 0.7925053353422884, + "learning_rate": 6.8720642088919685e-06, + "loss": 0.3011, + "step": 35489 + }, + { + "epoch": 0.6132499308819463, + "grad_norm": 1.3561728565648625, + "learning_rate": 6.871532646839484e-06, + "loss": 0.3943, + "step": 35490 + }, + { + "epoch": 0.6132672103953553, + "grad_norm": 0.6493879477511331, + "learning_rate": 6.871001094585601e-06, + "loss": 0.1904, + "step": 35491 + }, + { + "epoch": 0.6132844899087642, + "grad_norm": 0.8415825124880786, + "learning_rate": 6.8704695521319756e-06, + "loss": 0.4402, + "step": 35492 + }, + { + "epoch": 0.6133017694221731, + "grad_norm": 1.1609506316938016, + "learning_rate": 6.86993801948028e-06, + "loss": 0.4569, + "step": 35493 + }, + { + "epoch": 0.613319048935582, + "grad_norm": 1.235264710788817, + "learning_rate": 6.869406496632174e-06, + "loss": 0.4574, + "step": 35494 + }, + { + "epoch": 0.6133363284489909, + "grad_norm": 1.2015724892635555, + "learning_rate": 6.868874983589329e-06, + "loss": 0.2682, + "step": 35495 + }, + { + "epoch": 0.6133536079623998, + "grad_norm": 1.1244415624339659, + "learning_rate": 6.8683434803534034e-06, + "loss": 0.4381, + "step": 35496 + }, + { + "epoch": 0.6133708874758087, + "grad_norm": 1.1767136051478122, + "learning_rate": 6.867811986926062e-06, + "loss": 0.3416, + "step": 35497 + }, + { + "epoch": 0.6133881669892176, + "grad_norm": 1.3028325760634263, + "learning_rate": 6.867280503308973e-06, + "loss": 0.3588, + "step": 35498 + }, + { + "epoch": 0.6134054465026265, + "grad_norm": 1.4923618054902805, + "learning_rate": 6.866749029503799e-06, + "loss": 0.3571, + "step": 35499 + }, + { + "epoch": 0.6134227260160354, + "grad_norm": 1.2168467609809306, + "learning_rate": 6.866217565512207e-06, + "loss": 0.3088, + "step": 35500 + }, + { + "epoch": 0.6134400055294443, + "grad_norm": 0.7069580870221893, + "learning_rate": 6.865686111335857e-06, + "loss": 0.4274, + "step": 35501 + }, + { + "epoch": 0.6134572850428532, + "grad_norm": 2.2706267926432884, + "learning_rate": 6.865154666976419e-06, + "loss": 0.3187, + "step": 35502 + }, + { + "epoch": 0.6134745645562621, + "grad_norm": 1.6584974274853714, + "learning_rate": 6.864623232435552e-06, + "loss": 0.5961, + "step": 35503 + }, + { + "epoch": 0.6134918440696709, + "grad_norm": 0.821379304902422, + "learning_rate": 6.864091807714925e-06, + "loss": 0.2973, + "step": 35504 + }, + { + "epoch": 0.6135091235830799, + "grad_norm": 0.5058664545824997, + "learning_rate": 6.8635603928162045e-06, + "loss": 0.5743, + "step": 35505 + }, + { + "epoch": 0.6135264030964888, + "grad_norm": 1.731522414098637, + "learning_rate": 6.863028987741045e-06, + "loss": 0.392, + "step": 35506 + }, + { + "epoch": 0.6135436826098977, + "grad_norm": 1.123980103850424, + "learning_rate": 6.8624975924911195e-06, + "loss": 0.2621, + "step": 35507 + }, + { + "epoch": 0.6135609621233066, + "grad_norm": 1.6205858717906807, + "learning_rate": 6.861966207068089e-06, + "loss": 0.3672, + "step": 35508 + }, + { + "epoch": 0.6135782416367155, + "grad_norm": 0.9181305025617931, + "learning_rate": 6.86143483147362e-06, + "loss": 0.1982, + "step": 35509 + }, + { + "epoch": 0.6135955211501244, + "grad_norm": 0.9390633461946138, + "learning_rate": 6.860903465709374e-06, + "loss": 0.5006, + "step": 35510 + }, + { + "epoch": 0.6136128006635333, + "grad_norm": 0.71090635052621, + "learning_rate": 6.8603721097770205e-06, + "loss": 0.1849, + "step": 35511 + }, + { + "epoch": 0.6136300801769422, + "grad_norm": 1.0951618955192937, + "learning_rate": 6.8598407636782164e-06, + "loss": 0.3668, + "step": 35512 + }, + { + "epoch": 0.6136473596903511, + "grad_norm": 0.9888831194036476, + "learning_rate": 6.859309427414632e-06, + "loss": 0.248, + "step": 35513 + }, + { + "epoch": 0.61366463920376, + "grad_norm": 0.6899081216197007, + "learning_rate": 6.858778100987926e-06, + "loss": 0.7115, + "step": 35514 + }, + { + "epoch": 0.6136819187171689, + "grad_norm": 1.053713129599789, + "learning_rate": 6.858246784399772e-06, + "loss": 0.274, + "step": 35515 + }, + { + "epoch": 0.6136991982305778, + "grad_norm": 1.584343377059997, + "learning_rate": 6.8577154776518254e-06, + "loss": 0.5671, + "step": 35516 + }, + { + "epoch": 0.6137164777439867, + "grad_norm": 1.2230981688247076, + "learning_rate": 6.857184180745748e-06, + "loss": 0.3892, + "step": 35517 + }, + { + "epoch": 0.6137337572573957, + "grad_norm": 1.1881185147147544, + "learning_rate": 6.8566528936832134e-06, + "loss": 0.2771, + "step": 35518 + }, + { + "epoch": 0.6137510367708046, + "grad_norm": 2.3964240060090836, + "learning_rate": 6.856121616465877e-06, + "loss": 0.3506, + "step": 35519 + }, + { + "epoch": 0.6137683162842135, + "grad_norm": 1.2702399194502232, + "learning_rate": 6.855590349095411e-06, + "loss": 0.3872, + "step": 35520 + }, + { + "epoch": 0.6137855957976224, + "grad_norm": 0.9599339426391931, + "learning_rate": 6.855059091573472e-06, + "loss": 0.4386, + "step": 35521 + }, + { + "epoch": 0.6138028753110313, + "grad_norm": 1.1207384473690913, + "learning_rate": 6.854527843901728e-06, + "loss": 0.3394, + "step": 35522 + }, + { + "epoch": 0.6138201548244402, + "grad_norm": 0.6613245750095, + "learning_rate": 6.853996606081841e-06, + "loss": 0.7413, + "step": 35523 + }, + { + "epoch": 0.6138374343378491, + "grad_norm": 0.8014185074150646, + "learning_rate": 6.853465378115477e-06, + "loss": 0.2668, + "step": 35524 + }, + { + "epoch": 0.6138547138512579, + "grad_norm": 1.0810797377991739, + "learning_rate": 6.8529341600042965e-06, + "loss": 0.544, + "step": 35525 + }, + { + "epoch": 0.6138719933646668, + "grad_norm": 1.0827591547117272, + "learning_rate": 6.852402951749969e-06, + "loss": 0.3967, + "step": 35526 + }, + { + "epoch": 0.6138892728780757, + "grad_norm": 1.3592451536220822, + "learning_rate": 6.851871753354154e-06, + "loss": 0.2813, + "step": 35527 + }, + { + "epoch": 0.6139065523914846, + "grad_norm": 0.9983261389281074, + "learning_rate": 6.851340564818512e-06, + "loss": 0.3066, + "step": 35528 + }, + { + "epoch": 0.6139238319048935, + "grad_norm": 1.5665866391846743, + "learning_rate": 6.850809386144714e-06, + "loss": 0.5113, + "step": 35529 + }, + { + "epoch": 0.6139411114183024, + "grad_norm": 1.1110654660182449, + "learning_rate": 6.8502782173344174e-06, + "loss": 0.3425, + "step": 35530 + }, + { + "epoch": 0.6139583909317113, + "grad_norm": 1.0194809076409928, + "learning_rate": 6.849747058389293e-06, + "loss": 0.5018, + "step": 35531 + }, + { + "epoch": 0.6139756704451202, + "grad_norm": 1.2722231184743176, + "learning_rate": 6.849215909310998e-06, + "loss": 0.2237, + "step": 35532 + }, + { + "epoch": 0.6139929499585292, + "grad_norm": 0.8039220230196477, + "learning_rate": 6.848684770101197e-06, + "loss": 0.2383, + "step": 35533 + }, + { + "epoch": 0.6140102294719381, + "grad_norm": 1.1755065944442669, + "learning_rate": 6.848153640761555e-06, + "loss": 0.5469, + "step": 35534 + }, + { + "epoch": 0.614027508985347, + "grad_norm": 1.5361705500941734, + "learning_rate": 6.847622521293739e-06, + "loss": 0.4249, + "step": 35535 + }, + { + "epoch": 0.6140447884987559, + "grad_norm": 1.2005514100184675, + "learning_rate": 6.847091411699407e-06, + "loss": 0.2593, + "step": 35536 + }, + { + "epoch": 0.6140620680121648, + "grad_norm": 1.2764921583264017, + "learning_rate": 6.846560311980222e-06, + "loss": 0.491, + "step": 35537 + }, + { + "epoch": 0.6140793475255737, + "grad_norm": 1.2685314730905601, + "learning_rate": 6.846029222137851e-06, + "loss": 0.3389, + "step": 35538 + }, + { + "epoch": 0.6140966270389826, + "grad_norm": 0.7275865117076656, + "learning_rate": 6.845498142173956e-06, + "loss": 0.3633, + "step": 35539 + }, + { + "epoch": 0.6141139065523915, + "grad_norm": 1.3621634755693794, + "learning_rate": 6.844967072090203e-06, + "loss": 0.4684, + "step": 35540 + }, + { + "epoch": 0.6141311860658004, + "grad_norm": 1.0856876161071065, + "learning_rate": 6.844436011888249e-06, + "loss": 0.5063, + "step": 35541 + }, + { + "epoch": 0.6141484655792093, + "grad_norm": 0.8154571367726297, + "learning_rate": 6.843904961569765e-06, + "loss": 0.4252, + "step": 35542 + }, + { + "epoch": 0.6141657450926182, + "grad_norm": 1.731015624077695, + "learning_rate": 6.843373921136407e-06, + "loss": 0.3422, + "step": 35543 + }, + { + "epoch": 0.6141830246060271, + "grad_norm": 1.1018412329590979, + "learning_rate": 6.8428428905898436e-06, + "loss": 0.4853, + "step": 35544 + }, + { + "epoch": 0.614200304119436, + "grad_norm": 0.9329023167395358, + "learning_rate": 6.842311869931735e-06, + "loss": 0.3185, + "step": 35545 + }, + { + "epoch": 0.614217583632845, + "grad_norm": 1.595775670107736, + "learning_rate": 6.841780859163749e-06, + "loss": 0.5051, + "step": 35546 + }, + { + "epoch": 0.6142348631462538, + "grad_norm": 1.611341425785177, + "learning_rate": 6.841249858287543e-06, + "loss": 0.3797, + "step": 35547 + }, + { + "epoch": 0.6142521426596627, + "grad_norm": 0.9782827814405957, + "learning_rate": 6.840718867304781e-06, + "loss": 0.5478, + "step": 35548 + }, + { + "epoch": 0.6142694221730716, + "grad_norm": 1.0798688815569981, + "learning_rate": 6.8401878862171315e-06, + "loss": 0.6511, + "step": 35549 + }, + { + "epoch": 0.6142867016864805, + "grad_norm": 0.8507549136098554, + "learning_rate": 6.8396569150262474e-06, + "loss": 0.357, + "step": 35550 + }, + { + "epoch": 0.6143039811998894, + "grad_norm": 0.7831632682920634, + "learning_rate": 6.839125953733805e-06, + "loss": 0.4603, + "step": 35551 + }, + { + "epoch": 0.6143212607132983, + "grad_norm": 1.0023026278043925, + "learning_rate": 6.8385950023414546e-06, + "loss": 0.4625, + "step": 35552 + }, + { + "epoch": 0.6143385402267072, + "grad_norm": 1.2223208826290772, + "learning_rate": 6.838064060850869e-06, + "loss": 0.4108, + "step": 35553 + }, + { + "epoch": 0.6143558197401161, + "grad_norm": 1.6575767716481289, + "learning_rate": 6.837533129263704e-06, + "loss": 0.3197, + "step": 35554 + }, + { + "epoch": 0.614373099253525, + "grad_norm": 1.001539306026563, + "learning_rate": 6.83700220758163e-06, + "loss": 0.5348, + "step": 35555 + }, + { + "epoch": 0.6143903787669339, + "grad_norm": 0.6087393463782073, + "learning_rate": 6.836471295806303e-06, + "loss": 0.6507, + "step": 35556 + }, + { + "epoch": 0.6144076582803428, + "grad_norm": 1.3725455052487623, + "learning_rate": 6.835940393939386e-06, + "loss": 0.3161, + "step": 35557 + }, + { + "epoch": 0.6144249377937517, + "grad_norm": 1.0459312650425445, + "learning_rate": 6.835409501982546e-06, + "loss": 0.2816, + "step": 35558 + }, + { + "epoch": 0.6144422173071606, + "grad_norm": 1.1819032504663165, + "learning_rate": 6.834878619937444e-06, + "loss": 0.4587, + "step": 35559 + }, + { + "epoch": 0.6144594968205696, + "grad_norm": 1.5977196182351951, + "learning_rate": 6.834347747805745e-06, + "loss": 0.3485, + "step": 35560 + }, + { + "epoch": 0.6144767763339785, + "grad_norm": 1.30787023433162, + "learning_rate": 6.833816885589105e-06, + "loss": 0.4802, + "step": 35561 + }, + { + "epoch": 0.6144940558473874, + "grad_norm": 1.0615332616463213, + "learning_rate": 6.833286033289195e-06, + "loss": 0.3434, + "step": 35562 + }, + { + "epoch": 0.6145113353607963, + "grad_norm": 1.1663163002215902, + "learning_rate": 6.83275519090767e-06, + "loss": 0.3785, + "step": 35563 + }, + { + "epoch": 0.6145286148742052, + "grad_norm": 1.0068321928009243, + "learning_rate": 6.832224358446199e-06, + "loss": 0.4003, + "step": 35564 + }, + { + "epoch": 0.6145458943876141, + "grad_norm": 1.6393698970279402, + "learning_rate": 6.8316935359064406e-06, + "loss": 0.4639, + "step": 35565 + }, + { + "epoch": 0.614563173901023, + "grad_norm": 0.8760576821638917, + "learning_rate": 6.831162723290061e-06, + "loss": 0.4253, + "step": 35566 + }, + { + "epoch": 0.6145804534144319, + "grad_norm": 0.9167826181528527, + "learning_rate": 6.83063192059872e-06, + "loss": 0.3995, + "step": 35567 + }, + { + "epoch": 0.6145977329278407, + "grad_norm": 1.1137659955366215, + "learning_rate": 6.830101127834079e-06, + "loss": 0.5175, + "step": 35568 + }, + { + "epoch": 0.6146150124412496, + "grad_norm": 1.321190513278316, + "learning_rate": 6.829570344997807e-06, + "loss": 0.4642, + "step": 35569 + }, + { + "epoch": 0.6146322919546585, + "grad_norm": 1.0409080014514662, + "learning_rate": 6.829039572091553e-06, + "loss": 0.4602, + "step": 35570 + }, + { + "epoch": 0.6146495714680674, + "grad_norm": 1.3433865090971424, + "learning_rate": 6.828508809116996e-06, + "loss": 0.4624, + "step": 35571 + }, + { + "epoch": 0.6146668509814763, + "grad_norm": 1.0940786774193036, + "learning_rate": 6.8279780560757854e-06, + "loss": 0.3806, + "step": 35572 + }, + { + "epoch": 0.6146841304948852, + "grad_norm": 0.7673760654144521, + "learning_rate": 6.827447312969591e-06, + "loss": 0.4445, + "step": 35573 + }, + { + "epoch": 0.6147014100082941, + "grad_norm": 0.8463635400370494, + "learning_rate": 6.826916579800072e-06, + "loss": 0.378, + "step": 35574 + }, + { + "epoch": 0.614718689521703, + "grad_norm": 1.5499060411711563, + "learning_rate": 6.8263858565688935e-06, + "loss": 0.4019, + "step": 35575 + }, + { + "epoch": 0.614735969035112, + "grad_norm": 1.069354650482604, + "learning_rate": 6.8258551432777105e-06, + "loss": 0.2554, + "step": 35576 + }, + { + "epoch": 0.6147532485485209, + "grad_norm": 0.6784635772212694, + "learning_rate": 6.8253244399281956e-06, + "loss": 0.3086, + "step": 35577 + }, + { + "epoch": 0.6147705280619298, + "grad_norm": 1.4717679106191506, + "learning_rate": 6.824793746522005e-06, + "loss": 0.3126, + "step": 35578 + }, + { + "epoch": 0.6147878075753387, + "grad_norm": 0.874676642266234, + "learning_rate": 6.824263063060799e-06, + "loss": 0.431, + "step": 35579 + }, + { + "epoch": 0.6148050870887476, + "grad_norm": 1.2916757370201752, + "learning_rate": 6.823732389546247e-06, + "loss": 0.4225, + "step": 35580 + }, + { + "epoch": 0.6148223666021565, + "grad_norm": 1.2981175628024735, + "learning_rate": 6.823201725980002e-06, + "loss": 0.3174, + "step": 35581 + }, + { + "epoch": 0.6148396461155654, + "grad_norm": 0.9782174507755226, + "learning_rate": 6.822671072363734e-06, + "loss": 0.3024, + "step": 35582 + }, + { + "epoch": 0.6148569256289743, + "grad_norm": 1.2825568008477082, + "learning_rate": 6.822140428699098e-06, + "loss": 0.6109, + "step": 35583 + }, + { + "epoch": 0.6148742051423832, + "grad_norm": 1.3640169744052688, + "learning_rate": 6.821609794987762e-06, + "loss": 0.3772, + "step": 35584 + }, + { + "epoch": 0.6148914846557921, + "grad_norm": 0.7101813058824025, + "learning_rate": 6.821079171231384e-06, + "loss": 0.5582, + "step": 35585 + }, + { + "epoch": 0.614908764169201, + "grad_norm": 1.5352113903952394, + "learning_rate": 6.820548557431631e-06, + "loss": 0.512, + "step": 35586 + }, + { + "epoch": 0.61492604368261, + "grad_norm": 0.9538048343746552, + "learning_rate": 6.82001795359016e-06, + "loss": 0.4207, + "step": 35587 + }, + { + "epoch": 0.6149433231960189, + "grad_norm": 1.5051592355431984, + "learning_rate": 6.819487359708632e-06, + "loss": 0.3229, + "step": 35588 + }, + { + "epoch": 0.6149606027094277, + "grad_norm": 1.4820867518402667, + "learning_rate": 6.818956775788716e-06, + "loss": 0.5952, + "step": 35589 + }, + { + "epoch": 0.6149778822228366, + "grad_norm": 1.242776693002547, + "learning_rate": 6.818426201832063e-06, + "loss": 0.2361, + "step": 35590 + }, + { + "epoch": 0.6149951617362455, + "grad_norm": 0.8091817820242448, + "learning_rate": 6.817895637840347e-06, + "loss": 0.3084, + "step": 35591 + }, + { + "epoch": 0.6150124412496544, + "grad_norm": 1.220599303352542, + "learning_rate": 6.817365083815219e-06, + "loss": 0.4098, + "step": 35592 + }, + { + "epoch": 0.6150297207630633, + "grad_norm": 1.3665225675244228, + "learning_rate": 6.816834539758348e-06, + "loss": 0.4943, + "step": 35593 + }, + { + "epoch": 0.6150470002764722, + "grad_norm": 0.9772634593643682, + "learning_rate": 6.816304005671392e-06, + "loss": 0.4976, + "step": 35594 + }, + { + "epoch": 0.6150642797898811, + "grad_norm": 0.828907036663809, + "learning_rate": 6.815773481556016e-06, + "loss": 0.4366, + "step": 35595 + }, + { + "epoch": 0.61508155930329, + "grad_norm": 1.357372426203436, + "learning_rate": 6.815242967413874e-06, + "loss": 0.5361, + "step": 35596 + }, + { + "epoch": 0.6150988388166989, + "grad_norm": 1.009197426864166, + "learning_rate": 6.81471246324664e-06, + "loss": 0.5115, + "step": 35597 + }, + { + "epoch": 0.6151161183301078, + "grad_norm": 1.3083254959284902, + "learning_rate": 6.814181969055966e-06, + "loss": 0.2678, + "step": 35598 + }, + { + "epoch": 0.6151333978435167, + "grad_norm": 0.9380373389313832, + "learning_rate": 6.813651484843515e-06, + "loss": 0.408, + "step": 35599 + }, + { + "epoch": 0.6151506773569256, + "grad_norm": 0.9851667783766415, + "learning_rate": 6.813121010610953e-06, + "loss": 0.4312, + "step": 35600 + }, + { + "epoch": 0.6151679568703345, + "grad_norm": 1.0205711113962663, + "learning_rate": 6.812590546359934e-06, + "loss": 0.4414, + "step": 35601 + }, + { + "epoch": 0.6151852363837435, + "grad_norm": 1.298590949360505, + "learning_rate": 6.812060092092125e-06, + "loss": 0.4197, + "step": 35602 + }, + { + "epoch": 0.6152025158971524, + "grad_norm": 0.8932239062590017, + "learning_rate": 6.811529647809185e-06, + "loss": 0.2795, + "step": 35603 + }, + { + "epoch": 0.6152197954105613, + "grad_norm": 1.3314233812717604, + "learning_rate": 6.810999213512778e-06, + "loss": 0.3958, + "step": 35604 + }, + { + "epoch": 0.6152370749239702, + "grad_norm": 0.7344385609933828, + "learning_rate": 6.810468789204563e-06, + "loss": 0.4601, + "step": 35605 + }, + { + "epoch": 0.6152543544373791, + "grad_norm": 1.6609639920521142, + "learning_rate": 6.809938374886204e-06, + "loss": 0.2489, + "step": 35606 + }, + { + "epoch": 0.615271633950788, + "grad_norm": 1.1222323952762348, + "learning_rate": 6.809407970559359e-06, + "loss": 0.2859, + "step": 35607 + }, + { + "epoch": 0.6152889134641969, + "grad_norm": 0.8848152865237965, + "learning_rate": 6.8088775762256895e-06, + "loss": 0.2617, + "step": 35608 + }, + { + "epoch": 0.6153061929776058, + "grad_norm": 1.0264031871838997, + "learning_rate": 6.808347191886864e-06, + "loss": 0.29, + "step": 35609 + }, + { + "epoch": 0.6153234724910146, + "grad_norm": 1.6522795794241814, + "learning_rate": 6.8078168175445284e-06, + "loss": 0.4312, + "step": 35610 + }, + { + "epoch": 0.6153407520044235, + "grad_norm": 0.7729989663480044, + "learning_rate": 6.8072864532003605e-06, + "loss": 0.2555, + "step": 35611 + }, + { + "epoch": 0.6153580315178324, + "grad_norm": 0.8761948793054751, + "learning_rate": 6.8067560988560085e-06, + "loss": 0.3705, + "step": 35612 + }, + { + "epoch": 0.6153753110312413, + "grad_norm": 1.1141779179067122, + "learning_rate": 6.806225754513143e-06, + "loss": 0.4945, + "step": 35613 + }, + { + "epoch": 0.6153925905446502, + "grad_norm": 1.1709095479229041, + "learning_rate": 6.80569542017342e-06, + "loss": 0.4716, + "step": 35614 + }, + { + "epoch": 0.6154098700580591, + "grad_norm": 1.2249047049090906, + "learning_rate": 6.805165095838503e-06, + "loss": 0.6062, + "step": 35615 + }, + { + "epoch": 0.615427149571468, + "grad_norm": 0.9092812923954586, + "learning_rate": 6.804634781510047e-06, + "loss": 0.4282, + "step": 35616 + }, + { + "epoch": 0.615444429084877, + "grad_norm": 1.6898364435721742, + "learning_rate": 6.804104477189723e-06, + "loss": 0.4607, + "step": 35617 + }, + { + "epoch": 0.6154617085982859, + "grad_norm": 1.0458142159270853, + "learning_rate": 6.803574182879186e-06, + "loss": 0.3972, + "step": 35618 + }, + { + "epoch": 0.6154789881116948, + "grad_norm": 1.7164612197232116, + "learning_rate": 6.803043898580095e-06, + "loss": 0.2178, + "step": 35619 + }, + { + "epoch": 0.6154962676251037, + "grad_norm": 0.8836452434917874, + "learning_rate": 6.802513624294117e-06, + "loss": 0.3014, + "step": 35620 + }, + { + "epoch": 0.6155135471385126, + "grad_norm": 1.1620295991457876, + "learning_rate": 6.8019833600229055e-06, + "loss": 0.3754, + "step": 35621 + }, + { + "epoch": 0.6155308266519215, + "grad_norm": 0.78979497115692, + "learning_rate": 6.801453105768128e-06, + "loss": 0.2809, + "step": 35622 + }, + { + "epoch": 0.6155481061653304, + "grad_norm": 1.3509602264264662, + "learning_rate": 6.80092286153144e-06, + "loss": 0.3081, + "step": 35623 + }, + { + "epoch": 0.6155653856787393, + "grad_norm": 0.6844233567668271, + "learning_rate": 6.800392627314506e-06, + "loss": 0.5029, + "step": 35624 + }, + { + "epoch": 0.6155826651921482, + "grad_norm": 0.8646930441527675, + "learning_rate": 6.799862403118984e-06, + "loss": 0.5057, + "step": 35625 + }, + { + "epoch": 0.6155999447055571, + "grad_norm": 1.543700001534932, + "learning_rate": 6.79933218894654e-06, + "loss": 0.2977, + "step": 35626 + }, + { + "epoch": 0.615617224218966, + "grad_norm": 0.901857306682588, + "learning_rate": 6.798801984798826e-06, + "loss": 0.3617, + "step": 35627 + }, + { + "epoch": 0.6156345037323749, + "grad_norm": 0.8480950360760773, + "learning_rate": 6.7982717906775105e-06, + "loss": 0.5668, + "step": 35628 + }, + { + "epoch": 0.6156517832457838, + "grad_norm": 1.012272251647189, + "learning_rate": 6.797741606584254e-06, + "loss": 0.3578, + "step": 35629 + }, + { + "epoch": 0.6156690627591928, + "grad_norm": 1.1783059604075117, + "learning_rate": 6.7972114325207074e-06, + "loss": 0.3675, + "step": 35630 + }, + { + "epoch": 0.6156863422726016, + "grad_norm": 1.2078724647775925, + "learning_rate": 6.796681268488543e-06, + "loss": 0.3892, + "step": 35631 + }, + { + "epoch": 0.6157036217860105, + "grad_norm": 0.6198739688239886, + "learning_rate": 6.796151114489413e-06, + "loss": 0.6151, + "step": 35632 + }, + { + "epoch": 0.6157209012994194, + "grad_norm": 0.8079225697273007, + "learning_rate": 6.795620970524983e-06, + "loss": 0.3225, + "step": 35633 + }, + { + "epoch": 0.6157381808128283, + "grad_norm": 0.8843349625232058, + "learning_rate": 6.795090836596911e-06, + "loss": 0.3864, + "step": 35634 + }, + { + "epoch": 0.6157554603262372, + "grad_norm": 1.093095027073968, + "learning_rate": 6.79456071270686e-06, + "loss": 0.4588, + "step": 35635 + }, + { + "epoch": 0.6157727398396461, + "grad_norm": 1.167334202461726, + "learning_rate": 6.794030598856484e-06, + "loss": 0.3604, + "step": 35636 + }, + { + "epoch": 0.615790019353055, + "grad_norm": 1.1327907289334251, + "learning_rate": 6.793500495047453e-06, + "loss": 0.3075, + "step": 35637 + }, + { + "epoch": 0.6158072988664639, + "grad_norm": 0.9627137543274226, + "learning_rate": 6.792970401281418e-06, + "loss": 0.3858, + "step": 35638 + }, + { + "epoch": 0.6158245783798728, + "grad_norm": 0.9036885504390538, + "learning_rate": 6.792440317560046e-06, + "loss": 0.3381, + "step": 35639 + }, + { + "epoch": 0.6158418578932817, + "grad_norm": 0.8894253264967521, + "learning_rate": 6.791910243884996e-06, + "loss": 0.4138, + "step": 35640 + }, + { + "epoch": 0.6158591374066906, + "grad_norm": 0.989800117315887, + "learning_rate": 6.791380180257924e-06, + "loss": 0.5144, + "step": 35641 + }, + { + "epoch": 0.6158764169200995, + "grad_norm": 0.9746997588460578, + "learning_rate": 6.790850126680495e-06, + "loss": 0.4227, + "step": 35642 + }, + { + "epoch": 0.6158936964335084, + "grad_norm": 0.8722905416934194, + "learning_rate": 6.790320083154365e-06, + "loss": 0.419, + "step": 35643 + }, + { + "epoch": 0.6159109759469173, + "grad_norm": 1.0245303397982355, + "learning_rate": 6.789790049681198e-06, + "loss": 0.4437, + "step": 35644 + }, + { + "epoch": 0.6159282554603263, + "grad_norm": 0.49711547778707077, + "learning_rate": 6.78926002626265e-06, + "loss": 0.7453, + "step": 35645 + }, + { + "epoch": 0.6159455349737352, + "grad_norm": 0.9352704210424116, + "learning_rate": 6.788730012900388e-06, + "loss": 0.2954, + "step": 35646 + }, + { + "epoch": 0.6159628144871441, + "grad_norm": 0.9646933290432947, + "learning_rate": 6.788200009596065e-06, + "loss": 0.3506, + "step": 35647 + }, + { + "epoch": 0.615980094000553, + "grad_norm": 0.9134205865635003, + "learning_rate": 6.787670016351344e-06, + "loss": 0.4248, + "step": 35648 + }, + { + "epoch": 0.6159973735139619, + "grad_norm": 1.6875241763035016, + "learning_rate": 6.7871400331678875e-06, + "loss": 0.6109, + "step": 35649 + }, + { + "epoch": 0.6160146530273708, + "grad_norm": 0.9255542053770964, + "learning_rate": 6.786610060047346e-06, + "loss": 0.364, + "step": 35650 + }, + { + "epoch": 0.6160319325407797, + "grad_norm": 0.8461657028976433, + "learning_rate": 6.786080096991392e-06, + "loss": 0.352, + "step": 35651 + }, + { + "epoch": 0.6160492120541885, + "grad_norm": 1.3562777728573192, + "learning_rate": 6.785550144001676e-06, + "loss": 0.4519, + "step": 35652 + }, + { + "epoch": 0.6160664915675974, + "grad_norm": 2.03552572197091, + "learning_rate": 6.7850202010798635e-06, + "loss": 0.4827, + "step": 35653 + }, + { + "epoch": 0.6160837710810063, + "grad_norm": 1.5932036229729567, + "learning_rate": 6.7844902682276095e-06, + "loss": 0.4419, + "step": 35654 + }, + { + "epoch": 0.6161010505944152, + "grad_norm": 1.141379832331392, + "learning_rate": 6.7839603454465806e-06, + "loss": 0.233, + "step": 35655 + }, + { + "epoch": 0.6161183301078241, + "grad_norm": 1.0261098646084246, + "learning_rate": 6.783430432738426e-06, + "loss": 0.4953, + "step": 35656 + }, + { + "epoch": 0.616135609621233, + "grad_norm": 0.5701093276163083, + "learning_rate": 6.782900530104818e-06, + "loss": 0.58, + "step": 35657 + }, + { + "epoch": 0.616152889134642, + "grad_norm": 0.7873815338446144, + "learning_rate": 6.782370637547407e-06, + "loss": 0.2655, + "step": 35658 + }, + { + "epoch": 0.6161701686480509, + "grad_norm": 0.9351626050756425, + "learning_rate": 6.781840755067855e-06, + "loss": 0.329, + "step": 35659 + }, + { + "epoch": 0.6161874481614598, + "grad_norm": 1.2707761477326276, + "learning_rate": 6.781310882667826e-06, + "loss": 0.3712, + "step": 35660 + }, + { + "epoch": 0.6162047276748687, + "grad_norm": 1.2401584755842239, + "learning_rate": 6.780781020348972e-06, + "loss": 0.4614, + "step": 35661 + }, + { + "epoch": 0.6162220071882776, + "grad_norm": 0.6913720231333844, + "learning_rate": 6.780251168112959e-06, + "loss": 0.2729, + "step": 35662 + }, + { + "epoch": 0.6162392867016865, + "grad_norm": 1.6701358258051415, + "learning_rate": 6.779721325961441e-06, + "loss": 0.3514, + "step": 35663 + }, + { + "epoch": 0.6162565662150954, + "grad_norm": 1.3990211246346125, + "learning_rate": 6.779191493896083e-06, + "loss": 0.5838, + "step": 35664 + }, + { + "epoch": 0.6162738457285043, + "grad_norm": 1.5221620277042827, + "learning_rate": 6.778661671918539e-06, + "loss": 0.3938, + "step": 35665 + }, + { + "epoch": 0.6162911252419132, + "grad_norm": 0.7220091779485489, + "learning_rate": 6.7781318600304766e-06, + "loss": 0.2414, + "step": 35666 + }, + { + "epoch": 0.6163084047553221, + "grad_norm": 1.1190624548824706, + "learning_rate": 6.777602058233545e-06, + "loss": 0.4979, + "step": 35667 + }, + { + "epoch": 0.616325684268731, + "grad_norm": 0.8872465777347185, + "learning_rate": 6.777072266529412e-06, + "loss": 0.3766, + "step": 35668 + }, + { + "epoch": 0.6163429637821399, + "grad_norm": 0.8640701977158496, + "learning_rate": 6.776542484919731e-06, + "loss": 0.3268, + "step": 35669 + }, + { + "epoch": 0.6163602432955488, + "grad_norm": 1.2551954451235519, + "learning_rate": 6.776012713406166e-06, + "loss": 0.5824, + "step": 35670 + }, + { + "epoch": 0.6163775228089577, + "grad_norm": 1.133448941057802, + "learning_rate": 6.7754829519903775e-06, + "loss": 0.4909, + "step": 35671 + }, + { + "epoch": 0.6163948023223667, + "grad_norm": 0.9525998211546802, + "learning_rate": 6.774953200674015e-06, + "loss": 0.3459, + "step": 35672 + }, + { + "epoch": 0.6164120818357754, + "grad_norm": 0.7810108649983977, + "learning_rate": 6.7744234594587475e-06, + "loss": 0.2787, + "step": 35673 + }, + { + "epoch": 0.6164293613491844, + "grad_norm": 1.7705512815080313, + "learning_rate": 6.773893728346228e-06, + "loss": 0.2626, + "step": 35674 + }, + { + "epoch": 0.6164466408625933, + "grad_norm": 0.7626502082433084, + "learning_rate": 6.7733640073381235e-06, + "loss": 0.2629, + "step": 35675 + }, + { + "epoch": 0.6164639203760022, + "grad_norm": 1.1664265345764067, + "learning_rate": 6.772834296436082e-06, + "loss": 0.4168, + "step": 35676 + }, + { + "epoch": 0.6164811998894111, + "grad_norm": 0.9489638942188553, + "learning_rate": 6.772304595641774e-06, + "loss": 0.5277, + "step": 35677 + }, + { + "epoch": 0.61649847940282, + "grad_norm": 0.9949718037349438, + "learning_rate": 6.7717749049568505e-06, + "loss": 0.5266, + "step": 35678 + }, + { + "epoch": 0.6165157589162289, + "grad_norm": 2.0025261716780562, + "learning_rate": 6.771245224382975e-06, + "loss": 0.4869, + "step": 35679 + }, + { + "epoch": 0.6165330384296378, + "grad_norm": 1.1454062383811112, + "learning_rate": 6.7707155539218076e-06, + "loss": 0.2897, + "step": 35680 + }, + { + "epoch": 0.6165503179430467, + "grad_norm": 1.0713341922115238, + "learning_rate": 6.7701858935749985e-06, + "loss": 0.5063, + "step": 35681 + }, + { + "epoch": 0.6165675974564556, + "grad_norm": 1.2010495138491128, + "learning_rate": 6.769656243344216e-06, + "loss": 0.178, + "step": 35682 + }, + { + "epoch": 0.6165848769698645, + "grad_norm": 1.0166431876134738, + "learning_rate": 6.769126603231113e-06, + "loss": 0.4965, + "step": 35683 + }, + { + "epoch": 0.6166021564832734, + "grad_norm": 1.2283856800998696, + "learning_rate": 6.768596973237354e-06, + "loss": 0.317, + "step": 35684 + }, + { + "epoch": 0.6166194359966823, + "grad_norm": 0.8640751226105916, + "learning_rate": 6.768067353364591e-06, + "loss": 0.3216, + "step": 35685 + }, + { + "epoch": 0.6166367155100912, + "grad_norm": 1.180245549839158, + "learning_rate": 6.767537743614492e-06, + "loss": 0.333, + "step": 35686 + }, + { + "epoch": 0.6166539950235002, + "grad_norm": 0.7855501476903056, + "learning_rate": 6.767008143988706e-06, + "loss": 0.4817, + "step": 35687 + }, + { + "epoch": 0.6166712745369091, + "grad_norm": 1.0948103289295035, + "learning_rate": 6.7664785544888975e-06, + "loss": 0.3742, + "step": 35688 + }, + { + "epoch": 0.616688554050318, + "grad_norm": 1.1671788572133992, + "learning_rate": 6.765948975116723e-06, + "loss": 0.3687, + "step": 35689 + }, + { + "epoch": 0.6167058335637269, + "grad_norm": 1.1167067162753177, + "learning_rate": 6.7654194058738445e-06, + "loss": 0.4424, + "step": 35690 + }, + { + "epoch": 0.6167231130771358, + "grad_norm": 1.4048023774392204, + "learning_rate": 6.76488984676192e-06, + "loss": 0.3641, + "step": 35691 + }, + { + "epoch": 0.6167403925905447, + "grad_norm": 0.9537234461888277, + "learning_rate": 6.764360297782602e-06, + "loss": 0.4238, + "step": 35692 + }, + { + "epoch": 0.6167576721039536, + "grad_norm": 1.2543104569043375, + "learning_rate": 6.763830758937555e-06, + "loss": 0.3161, + "step": 35693 + }, + { + "epoch": 0.6167749516173625, + "grad_norm": 0.9720556600565707, + "learning_rate": 6.763301230228434e-06, + "loss": 0.3433, + "step": 35694 + }, + { + "epoch": 0.6167922311307713, + "grad_norm": 0.6810752790629518, + "learning_rate": 6.762771711656905e-06, + "loss": 0.8998, + "step": 35695 + }, + { + "epoch": 0.6168095106441802, + "grad_norm": 1.2812390847555548, + "learning_rate": 6.762242203224614e-06, + "loss": 0.3547, + "step": 35696 + }, + { + "epoch": 0.6168267901575891, + "grad_norm": 1.2950524438471358, + "learning_rate": 6.7617127049332324e-06, + "loss": 0.2976, + "step": 35697 + }, + { + "epoch": 0.616844069670998, + "grad_norm": 1.656747299928565, + "learning_rate": 6.761183216784408e-06, + "loss": 0.3192, + "step": 35698 + }, + { + "epoch": 0.6168613491844069, + "grad_norm": 1.2382868660217525, + "learning_rate": 6.760653738779807e-06, + "loss": 0.5921, + "step": 35699 + }, + { + "epoch": 0.6168786286978158, + "grad_norm": 1.0742260919076885, + "learning_rate": 6.760124270921082e-06, + "loss": 0.3587, + "step": 35700 + }, + { + "epoch": 0.6168959082112248, + "grad_norm": 0.8965476550310372, + "learning_rate": 6.759594813209898e-06, + "loss": 0.3303, + "step": 35701 + }, + { + "epoch": 0.6169131877246337, + "grad_norm": 1.5942841696127228, + "learning_rate": 6.759065365647908e-06, + "loss": 0.3327, + "step": 35702 + }, + { + "epoch": 0.6169304672380426, + "grad_norm": 1.4348473974338105, + "learning_rate": 6.758535928236769e-06, + "loss": 0.454, + "step": 35703 + }, + { + "epoch": 0.6169477467514515, + "grad_norm": 1.1669209747825289, + "learning_rate": 6.758006500978145e-06, + "loss": 0.4077, + "step": 35704 + }, + { + "epoch": 0.6169650262648604, + "grad_norm": 0.7817341671851471, + "learning_rate": 6.757477083873687e-06, + "loss": 0.6821, + "step": 35705 + }, + { + "epoch": 0.6169823057782693, + "grad_norm": 1.0634743692128876, + "learning_rate": 6.756947676925063e-06, + "loss": 0.4833, + "step": 35706 + }, + { + "epoch": 0.6169995852916782, + "grad_norm": 1.7831428363992639, + "learning_rate": 6.756418280133921e-06, + "loss": 0.3408, + "step": 35707 + }, + { + "epoch": 0.6170168648050871, + "grad_norm": 0.971762691111226, + "learning_rate": 6.755888893501925e-06, + "loss": 0.2944, + "step": 35708 + }, + { + "epoch": 0.617034144318496, + "grad_norm": 1.1498051744206188, + "learning_rate": 6.755359517030729e-06, + "loss": 0.4887, + "step": 35709 + }, + { + "epoch": 0.6170514238319049, + "grad_norm": 1.266657657964806, + "learning_rate": 6.754830150721996e-06, + "loss": 0.386, + "step": 35710 + }, + { + "epoch": 0.6170687033453138, + "grad_norm": 1.2084270114914326, + "learning_rate": 6.754300794577384e-06, + "loss": 0.326, + "step": 35711 + }, + { + "epoch": 0.6170859828587227, + "grad_norm": 0.8546761052746159, + "learning_rate": 6.753771448598545e-06, + "loss": 0.1821, + "step": 35712 + }, + { + "epoch": 0.6171032623721316, + "grad_norm": 1.080037375195561, + "learning_rate": 6.753242112787142e-06, + "loss": 0.2479, + "step": 35713 + }, + { + "epoch": 0.6171205418855406, + "grad_norm": 0.6998910009823394, + "learning_rate": 6.752712787144829e-06, + "loss": 0.5721, + "step": 35714 + }, + { + "epoch": 0.6171378213989495, + "grad_norm": 0.9222228390937094, + "learning_rate": 6.75218347167327e-06, + "loss": 0.3792, + "step": 35715 + }, + { + "epoch": 0.6171551009123583, + "grad_norm": 0.8873627805838438, + "learning_rate": 6.751654166374114e-06, + "loss": 0.2203, + "step": 35716 + }, + { + "epoch": 0.6171723804257672, + "grad_norm": 1.1984805613877845, + "learning_rate": 6.751124871249031e-06, + "loss": 0.3988, + "step": 35717 + }, + { + "epoch": 0.6171896599391761, + "grad_norm": 1.0988127259278533, + "learning_rate": 6.750595586299667e-06, + "loss": 0.2906, + "step": 35718 + }, + { + "epoch": 0.617206939452585, + "grad_norm": 1.7581285388995642, + "learning_rate": 6.750066311527686e-06, + "loss": 0.3161, + "step": 35719 + }, + { + "epoch": 0.6172242189659939, + "grad_norm": 0.8231046055656589, + "learning_rate": 6.749537046934742e-06, + "loss": 0.3213, + "step": 35720 + }, + { + "epoch": 0.6172414984794028, + "grad_norm": 1.422260386194872, + "learning_rate": 6.7490077925224995e-06, + "loss": 0.3658, + "step": 35721 + }, + { + "epoch": 0.6172587779928117, + "grad_norm": 1.0282252493066453, + "learning_rate": 6.74847854829261e-06, + "loss": 0.3965, + "step": 35722 + }, + { + "epoch": 0.6172760575062206, + "grad_norm": 2.186873535837134, + "learning_rate": 6.747949314246729e-06, + "loss": 0.3803, + "step": 35723 + }, + { + "epoch": 0.6172933370196295, + "grad_norm": 0.9094066855518292, + "learning_rate": 6.747420090386523e-06, + "loss": 0.4419, + "step": 35724 + }, + { + "epoch": 0.6173106165330384, + "grad_norm": 1.1297592431471548, + "learning_rate": 6.74689087671364e-06, + "loss": 0.4058, + "step": 35725 + }, + { + "epoch": 0.6173278960464473, + "grad_norm": 1.1477130395199524, + "learning_rate": 6.746361673229746e-06, + "loss": 0.3027, + "step": 35726 + }, + { + "epoch": 0.6173451755598562, + "grad_norm": 1.1815241192169652, + "learning_rate": 6.745832479936492e-06, + "loss": 0.3733, + "step": 35727 + }, + { + "epoch": 0.6173624550732651, + "grad_norm": 0.8766141870672365, + "learning_rate": 6.745303296835539e-06, + "loss": 0.4197, + "step": 35728 + }, + { + "epoch": 0.6173797345866741, + "grad_norm": 0.9274170956390583, + "learning_rate": 6.744774123928542e-06, + "loss": 0.4434, + "step": 35729 + }, + { + "epoch": 0.617397014100083, + "grad_norm": 0.6703690840317702, + "learning_rate": 6.744244961217161e-06, + "loss": 0.4392, + "step": 35730 + }, + { + "epoch": 0.6174142936134919, + "grad_norm": 1.2187042772943024, + "learning_rate": 6.743715808703051e-06, + "loss": 0.3532, + "step": 35731 + }, + { + "epoch": 0.6174315731269008, + "grad_norm": 1.0997661166427268, + "learning_rate": 6.743186666387876e-06, + "loss": 0.2563, + "step": 35732 + }, + { + "epoch": 0.6174488526403097, + "grad_norm": 0.9101233476415871, + "learning_rate": 6.742657534273283e-06, + "loss": 0.2757, + "step": 35733 + }, + { + "epoch": 0.6174661321537186, + "grad_norm": 1.1972459210394075, + "learning_rate": 6.742128412360933e-06, + "loss": 0.5734, + "step": 35734 + }, + { + "epoch": 0.6174834116671275, + "grad_norm": 0.9673557901104657, + "learning_rate": 6.741599300652489e-06, + "loss": 0.4786, + "step": 35735 + }, + { + "epoch": 0.6175006911805364, + "grad_norm": 1.0441489695982271, + "learning_rate": 6.741070199149598e-06, + "loss": 0.6287, + "step": 35736 + }, + { + "epoch": 0.6175179706939452, + "grad_norm": 1.2882353295529383, + "learning_rate": 6.740541107853929e-06, + "loss": 0.281, + "step": 35737 + }, + { + "epoch": 0.6175352502073541, + "grad_norm": 0.830440532235965, + "learning_rate": 6.7400120267671265e-06, + "loss": 0.5988, + "step": 35738 + }, + { + "epoch": 0.617552529720763, + "grad_norm": 0.9876155782587922, + "learning_rate": 6.739482955890858e-06, + "loss": 0.43, + "step": 35739 + }, + { + "epoch": 0.6175698092341719, + "grad_norm": 0.9335989087865995, + "learning_rate": 6.738953895226774e-06, + "loss": 0.468, + "step": 35740 + }, + { + "epoch": 0.6175870887475808, + "grad_norm": 1.3303934264403543, + "learning_rate": 6.738424844776539e-06, + "loss": 0.3194, + "step": 35741 + }, + { + "epoch": 0.6176043682609897, + "grad_norm": 2.4465214161680238, + "learning_rate": 6.737895804541803e-06, + "loss": 0.5262, + "step": 35742 + }, + { + "epoch": 0.6176216477743987, + "grad_norm": 1.3450086859669095, + "learning_rate": 6.737366774524224e-06, + "loss": 0.5313, + "step": 35743 + }, + { + "epoch": 0.6176389272878076, + "grad_norm": 1.1054925381469431, + "learning_rate": 6.736837754725461e-06, + "loss": 0.3532, + "step": 35744 + }, + { + "epoch": 0.6176562068012165, + "grad_norm": 1.4877355021494176, + "learning_rate": 6.736308745147169e-06, + "loss": 0.2802, + "step": 35745 + }, + { + "epoch": 0.6176734863146254, + "grad_norm": 1.4637942077303499, + "learning_rate": 6.735779745791009e-06, + "loss": 0.585, + "step": 35746 + }, + { + "epoch": 0.6176907658280343, + "grad_norm": 1.7084991983357556, + "learning_rate": 6.735250756658631e-06, + "loss": 0.3294, + "step": 35747 + }, + { + "epoch": 0.6177080453414432, + "grad_norm": 0.8399358897522825, + "learning_rate": 6.734721777751698e-06, + "loss": 0.3071, + "step": 35748 + }, + { + "epoch": 0.6177253248548521, + "grad_norm": 0.9873517616917332, + "learning_rate": 6.7341928090718634e-06, + "loss": 0.3767, + "step": 35749 + }, + { + "epoch": 0.617742604368261, + "grad_norm": 0.6248279905924353, + "learning_rate": 6.733663850620787e-06, + "loss": 0.7521, + "step": 35750 + }, + { + "epoch": 0.6177598838816699, + "grad_norm": 0.9244705798893673, + "learning_rate": 6.73313490240012e-06, + "loss": 0.4023, + "step": 35751 + }, + { + "epoch": 0.6177771633950788, + "grad_norm": 1.2936779209902838, + "learning_rate": 6.732605964411528e-06, + "loss": 0.4271, + "step": 35752 + }, + { + "epoch": 0.6177944429084877, + "grad_norm": 1.3097463023141351, + "learning_rate": 6.732077036656661e-06, + "loss": 0.3973, + "step": 35753 + }, + { + "epoch": 0.6178117224218966, + "grad_norm": 1.2122571870457195, + "learning_rate": 6.731548119137175e-06, + "loss": 0.5392, + "step": 35754 + }, + { + "epoch": 0.6178290019353055, + "grad_norm": 1.6459251644109592, + "learning_rate": 6.7310192118547316e-06, + "loss": 0.3954, + "step": 35755 + }, + { + "epoch": 0.6178462814487145, + "grad_norm": 1.1846535852633437, + "learning_rate": 6.730490314810978e-06, + "loss": 0.3352, + "step": 35756 + }, + { + "epoch": 0.6178635609621234, + "grad_norm": 1.2824734105452888, + "learning_rate": 6.7299614280075845e-06, + "loss": 0.4088, + "step": 35757 + }, + { + "epoch": 0.6178808404755322, + "grad_norm": 0.85776805652322, + "learning_rate": 6.729432551446195e-06, + "loss": 0.3154, + "step": 35758 + }, + { + "epoch": 0.6178981199889411, + "grad_norm": 1.3174879732714972, + "learning_rate": 6.728903685128475e-06, + "loss": 0.347, + "step": 35759 + }, + { + "epoch": 0.61791539950235, + "grad_norm": 1.4025180134001383, + "learning_rate": 6.728374829056074e-06, + "loss": 0.4043, + "step": 35760 + }, + { + "epoch": 0.6179326790157589, + "grad_norm": 0.9620253772370274, + "learning_rate": 6.727845983230656e-06, + "loss": 0.3984, + "step": 35761 + }, + { + "epoch": 0.6179499585291678, + "grad_norm": 1.1363961773611122, + "learning_rate": 6.727317147653873e-06, + "loss": 0.324, + "step": 35762 + }, + { + "epoch": 0.6179672380425767, + "grad_norm": 2.107062599205882, + "learning_rate": 6.726788322327377e-06, + "loss": 0.3038, + "step": 35763 + }, + { + "epoch": 0.6179845175559856, + "grad_norm": 1.32113907112045, + "learning_rate": 6.72625950725283e-06, + "loss": 0.4477, + "step": 35764 + }, + { + "epoch": 0.6180017970693945, + "grad_norm": 0.8597330866704795, + "learning_rate": 6.725730702431887e-06, + "loss": 0.3063, + "step": 35765 + }, + { + "epoch": 0.6180190765828034, + "grad_norm": 1.422609278584098, + "learning_rate": 6.725201907866208e-06, + "loss": 0.2909, + "step": 35766 + }, + { + "epoch": 0.6180363560962123, + "grad_norm": 0.9133925284867584, + "learning_rate": 6.724673123557441e-06, + "loss": 0.5986, + "step": 35767 + }, + { + "epoch": 0.6180536356096212, + "grad_norm": 0.9512423635539717, + "learning_rate": 6.724144349507249e-06, + "loss": 0.768, + "step": 35768 + }, + { + "epoch": 0.6180709151230301, + "grad_norm": 1.2245285269279802, + "learning_rate": 6.723615585717283e-06, + "loss": 0.4816, + "step": 35769 + }, + { + "epoch": 0.618088194636439, + "grad_norm": 1.1712508592542097, + "learning_rate": 6.723086832189205e-06, + "loss": 0.4002, + "step": 35770 + }, + { + "epoch": 0.618105474149848, + "grad_norm": 0.8750391207440734, + "learning_rate": 6.722558088924666e-06, + "loss": 0.3047, + "step": 35771 + }, + { + "epoch": 0.6181227536632569, + "grad_norm": 0.6317988292485676, + "learning_rate": 6.722029355925328e-06, + "loss": 0.7026, + "step": 35772 + }, + { + "epoch": 0.6181400331766658, + "grad_norm": 1.3727453035046264, + "learning_rate": 6.7215006331928415e-06, + "loss": 0.379, + "step": 35773 + }, + { + "epoch": 0.6181573126900747, + "grad_norm": 0.8316787015086572, + "learning_rate": 6.720971920728861e-06, + "loss": 0.4673, + "step": 35774 + }, + { + "epoch": 0.6181745922034836, + "grad_norm": 1.0823575442682938, + "learning_rate": 6.720443218535051e-06, + "loss": 0.4539, + "step": 35775 + }, + { + "epoch": 0.6181918717168925, + "grad_norm": 1.4398269429775896, + "learning_rate": 6.719914526613056e-06, + "loss": 0.4262, + "step": 35776 + }, + { + "epoch": 0.6182091512303014, + "grad_norm": 0.8745528571599173, + "learning_rate": 6.719385844964543e-06, + "loss": 0.2509, + "step": 35777 + }, + { + "epoch": 0.6182264307437103, + "grad_norm": 0.8225213936773804, + "learning_rate": 6.718857173591159e-06, + "loss": 0.4363, + "step": 35778 + }, + { + "epoch": 0.6182437102571191, + "grad_norm": 0.9460344880331872, + "learning_rate": 6.718328512494566e-06, + "loss": 0.408, + "step": 35779 + }, + { + "epoch": 0.618260989770528, + "grad_norm": 0.6690849414414003, + "learning_rate": 6.717799861676415e-06, + "loss": 0.35, + "step": 35780 + }, + { + "epoch": 0.6182782692839369, + "grad_norm": 1.3245735454899692, + "learning_rate": 6.717271221138369e-06, + "loss": 0.3295, + "step": 35781 + }, + { + "epoch": 0.6182955487973458, + "grad_norm": 1.0128760464297135, + "learning_rate": 6.716742590882072e-06, + "loss": 0.3692, + "step": 35782 + }, + { + "epoch": 0.6183128283107547, + "grad_norm": 1.3353556271081335, + "learning_rate": 6.716213970909195e-06, + "loss": 0.3978, + "step": 35783 + }, + { + "epoch": 0.6183301078241636, + "grad_norm": 1.8519491199956513, + "learning_rate": 6.715685361221383e-06, + "loss": 0.3641, + "step": 35784 + }, + { + "epoch": 0.6183473873375726, + "grad_norm": 0.8108847408803389, + "learning_rate": 6.715156761820289e-06, + "loss": 0.3938, + "step": 35785 + }, + { + "epoch": 0.6183646668509815, + "grad_norm": 1.4850079550327835, + "learning_rate": 6.71462817270758e-06, + "loss": 0.2859, + "step": 35786 + }, + { + "epoch": 0.6183819463643904, + "grad_norm": 1.0204218083537204, + "learning_rate": 6.714099593884902e-06, + "loss": 0.3005, + "step": 35787 + }, + { + "epoch": 0.6183992258777993, + "grad_norm": 0.9918414023685788, + "learning_rate": 6.713571025353913e-06, + "loss": 0.3727, + "step": 35788 + }, + { + "epoch": 0.6184165053912082, + "grad_norm": 0.49513989987807616, + "learning_rate": 6.71304246711627e-06, + "loss": 0.5757, + "step": 35789 + }, + { + "epoch": 0.6184337849046171, + "grad_norm": 1.2032277483403895, + "learning_rate": 6.712513919173629e-06, + "loss": 0.3858, + "step": 35790 + }, + { + "epoch": 0.618451064418026, + "grad_norm": 0.955519682363592, + "learning_rate": 6.711985381527641e-06, + "loss": 0.3592, + "step": 35791 + }, + { + "epoch": 0.6184683439314349, + "grad_norm": 1.4110770947552496, + "learning_rate": 6.7114568541799695e-06, + "loss": 0.4083, + "step": 35792 + }, + { + "epoch": 0.6184856234448438, + "grad_norm": 1.2566712102288158, + "learning_rate": 6.710928337132263e-06, + "loss": 0.4002, + "step": 35793 + }, + { + "epoch": 0.6185029029582527, + "grad_norm": 0.7394331004847867, + "learning_rate": 6.710399830386178e-06, + "loss": 0.2501, + "step": 35794 + }, + { + "epoch": 0.6185201824716616, + "grad_norm": 1.6437689417256807, + "learning_rate": 6.7098713339433745e-06, + "loss": 0.5551, + "step": 35795 + }, + { + "epoch": 0.6185374619850705, + "grad_norm": 1.1174294339631186, + "learning_rate": 6.7093428478054965e-06, + "loss": 0.456, + "step": 35796 + }, + { + "epoch": 0.6185547414984794, + "grad_norm": 1.146632306881221, + "learning_rate": 6.708814371974214e-06, + "loss": 0.3569, + "step": 35797 + }, + { + "epoch": 0.6185720210118884, + "grad_norm": 1.429268264099231, + "learning_rate": 6.7082859064511705e-06, + "loss": 0.6148, + "step": 35798 + }, + { + "epoch": 0.6185893005252973, + "grad_norm": 0.8175460109517344, + "learning_rate": 6.7077574512380284e-06, + "loss": 0.4633, + "step": 35799 + }, + { + "epoch": 0.618606580038706, + "grad_norm": 1.5458107312466538, + "learning_rate": 6.7072290063364376e-06, + "loss": 0.5121, + "step": 35800 + }, + { + "epoch": 0.618623859552115, + "grad_norm": 1.2799585334892412, + "learning_rate": 6.706700571748059e-06, + "loss": 0.4077, + "step": 35801 + }, + { + "epoch": 0.6186411390655239, + "grad_norm": 1.0341175114449814, + "learning_rate": 6.706172147474539e-06, + "loss": 0.3002, + "step": 35802 + }, + { + "epoch": 0.6186584185789328, + "grad_norm": 0.7806992105252192, + "learning_rate": 6.705643733517545e-06, + "loss": 0.6034, + "step": 35803 + }, + { + "epoch": 0.6186756980923417, + "grad_norm": 0.8803223120204627, + "learning_rate": 6.705115329878722e-06, + "loss": 0.4015, + "step": 35804 + }, + { + "epoch": 0.6186929776057506, + "grad_norm": 0.9093410297333105, + "learning_rate": 6.704586936559727e-06, + "loss": 0.4658, + "step": 35805 + }, + { + "epoch": 0.6187102571191595, + "grad_norm": 1.0454067454560536, + "learning_rate": 6.7040585535622205e-06, + "loss": 0.4536, + "step": 35806 + }, + { + "epoch": 0.6187275366325684, + "grad_norm": 0.7940053459961042, + "learning_rate": 6.70353018088785e-06, + "loss": 0.4669, + "step": 35807 + }, + { + "epoch": 0.6187448161459773, + "grad_norm": 0.9828542602062447, + "learning_rate": 6.703001818538273e-06, + "loss": 0.3327, + "step": 35808 + }, + { + "epoch": 0.6187620956593862, + "grad_norm": 1.416071771899528, + "learning_rate": 6.702473466515143e-06, + "loss": 0.337, + "step": 35809 + }, + { + "epoch": 0.6187793751727951, + "grad_norm": 1.1616194856618645, + "learning_rate": 6.701945124820121e-06, + "loss": 0.3141, + "step": 35810 + }, + { + "epoch": 0.618796654686204, + "grad_norm": 0.8662054508664154, + "learning_rate": 6.7014167934548535e-06, + "loss": 0.246, + "step": 35811 + }, + { + "epoch": 0.618813934199613, + "grad_norm": 0.8960189685873288, + "learning_rate": 6.7008884724210035e-06, + "loss": 0.4068, + "step": 35812 + }, + { + "epoch": 0.6188312137130219, + "grad_norm": 0.49266447997529667, + "learning_rate": 6.700360161720219e-06, + "loss": 0.736, + "step": 35813 + }, + { + "epoch": 0.6188484932264308, + "grad_norm": 1.3618160021470274, + "learning_rate": 6.699831861354158e-06, + "loss": 0.4205, + "step": 35814 + }, + { + "epoch": 0.6188657727398397, + "grad_norm": 0.663713259131383, + "learning_rate": 6.699303571324476e-06, + "loss": 0.5189, + "step": 35815 + }, + { + "epoch": 0.6188830522532486, + "grad_norm": 1.07551783714543, + "learning_rate": 6.69877529163282e-06, + "loss": 0.3024, + "step": 35816 + }, + { + "epoch": 0.6189003317666575, + "grad_norm": 1.2942086408508306, + "learning_rate": 6.698247022280859e-06, + "loss": 0.4604, + "step": 35817 + }, + { + "epoch": 0.6189176112800664, + "grad_norm": 1.225148483401308, + "learning_rate": 6.697718763270233e-06, + "loss": 0.5301, + "step": 35818 + }, + { + "epoch": 0.6189348907934753, + "grad_norm": 1.7219552925129935, + "learning_rate": 6.697190514602605e-06, + "loss": 0.2868, + "step": 35819 + }, + { + "epoch": 0.6189521703068842, + "grad_norm": 0.6719672684798806, + "learning_rate": 6.6966622762796265e-06, + "loss": 0.3287, + "step": 35820 + }, + { + "epoch": 0.618969449820293, + "grad_norm": 1.5046246645255044, + "learning_rate": 6.696134048302956e-06, + "loss": 0.2843, + "step": 35821 + }, + { + "epoch": 0.6189867293337019, + "grad_norm": 0.7167027862855355, + "learning_rate": 6.695605830674239e-06, + "loss": 0.3121, + "step": 35822 + }, + { + "epoch": 0.6190040088471108, + "grad_norm": 0.8511346317108933, + "learning_rate": 6.695077623395141e-06, + "loss": 0.2669, + "step": 35823 + }, + { + "epoch": 0.6190212883605197, + "grad_norm": 1.0504077240629766, + "learning_rate": 6.694549426467309e-06, + "loss": 0.3431, + "step": 35824 + }, + { + "epoch": 0.6190385678739286, + "grad_norm": 1.5308535761990756, + "learning_rate": 6.694021239892399e-06, + "loss": 0.4256, + "step": 35825 + }, + { + "epoch": 0.6190558473873375, + "grad_norm": 1.2292922339958365, + "learning_rate": 6.693493063672068e-06, + "loss": 0.3613, + "step": 35826 + }, + { + "epoch": 0.6190731269007465, + "grad_norm": 0.7231627155358388, + "learning_rate": 6.692964897807965e-06, + "loss": 0.8734, + "step": 35827 + }, + { + "epoch": 0.6190904064141554, + "grad_norm": 0.9583252464169008, + "learning_rate": 6.692436742301749e-06, + "loss": 0.3241, + "step": 35828 + }, + { + "epoch": 0.6191076859275643, + "grad_norm": 1.4496926878897012, + "learning_rate": 6.6919085971550705e-06, + "loss": 0.2189, + "step": 35829 + }, + { + "epoch": 0.6191249654409732, + "grad_norm": 1.056722073779289, + "learning_rate": 6.691380462369588e-06, + "loss": 0.2265, + "step": 35830 + }, + { + "epoch": 0.6191422449543821, + "grad_norm": 0.816144180861248, + "learning_rate": 6.690852337946949e-06, + "loss": 0.2158, + "step": 35831 + }, + { + "epoch": 0.619159524467791, + "grad_norm": 1.1097628053532027, + "learning_rate": 6.690324223888819e-06, + "loss": 0.3764, + "step": 35832 + }, + { + "epoch": 0.6191768039811999, + "grad_norm": 1.0191133730722242, + "learning_rate": 6.68979612019684e-06, + "loss": 0.4487, + "step": 35833 + }, + { + "epoch": 0.6191940834946088, + "grad_norm": 1.2481480332811112, + "learning_rate": 6.689268026872673e-06, + "loss": 0.3384, + "step": 35834 + }, + { + "epoch": 0.6192113630080177, + "grad_norm": 1.3557003133438406, + "learning_rate": 6.6887399439179725e-06, + "loss": 0.3831, + "step": 35835 + }, + { + "epoch": 0.6192286425214266, + "grad_norm": 1.1852670390657516, + "learning_rate": 6.6882118713343825e-06, + "loss": 0.3384, + "step": 35836 + }, + { + "epoch": 0.6192459220348355, + "grad_norm": 1.0375909484620045, + "learning_rate": 6.687683809123573e-06, + "loss": 0.3966, + "step": 35837 + }, + { + "epoch": 0.6192632015482444, + "grad_norm": 1.0031014896630635, + "learning_rate": 6.6871557572871844e-06, + "loss": 0.4401, + "step": 35838 + }, + { + "epoch": 0.6192804810616533, + "grad_norm": 1.0262563766523034, + "learning_rate": 6.6866277158268764e-06, + "loss": 0.4667, + "step": 35839 + }, + { + "epoch": 0.6192977605750623, + "grad_norm": 1.3432735393186352, + "learning_rate": 6.686099684744303e-06, + "loss": 0.3503, + "step": 35840 + }, + { + "epoch": 0.6193150400884712, + "grad_norm": 0.759290104853094, + "learning_rate": 6.68557166404112e-06, + "loss": 0.2923, + "step": 35841 + }, + { + "epoch": 0.6193323196018801, + "grad_norm": 1.229843499324067, + "learning_rate": 6.685043653718972e-06, + "loss": 0.3366, + "step": 35842 + }, + { + "epoch": 0.6193495991152889, + "grad_norm": 1.0303150833973966, + "learning_rate": 6.684515653779525e-06, + "loss": 0.1829, + "step": 35843 + }, + { + "epoch": 0.6193668786286978, + "grad_norm": 1.2901541974706348, + "learning_rate": 6.683987664224423e-06, + "loss": 0.4919, + "step": 35844 + }, + { + "epoch": 0.6193841581421067, + "grad_norm": 1.436280942341633, + "learning_rate": 6.683459685055327e-06, + "loss": 0.3479, + "step": 35845 + }, + { + "epoch": 0.6194014376555156, + "grad_norm": 0.9573820537321333, + "learning_rate": 6.682931716273888e-06, + "loss": 0.2712, + "step": 35846 + }, + { + "epoch": 0.6194187171689245, + "grad_norm": 1.2953991390055901, + "learning_rate": 6.682403757881754e-06, + "loss": 0.3307, + "step": 35847 + }, + { + "epoch": 0.6194359966823334, + "grad_norm": 1.5282329174260672, + "learning_rate": 6.681875809880588e-06, + "loss": 0.2977, + "step": 35848 + }, + { + "epoch": 0.6194532761957423, + "grad_norm": 1.428786698970105, + "learning_rate": 6.681347872272036e-06, + "loss": 0.2809, + "step": 35849 + }, + { + "epoch": 0.6194705557091512, + "grad_norm": 1.099927964767942, + "learning_rate": 6.680819945057757e-06, + "loss": 0.2765, + "step": 35850 + }, + { + "epoch": 0.6194878352225601, + "grad_norm": 1.169427870955527, + "learning_rate": 6.680292028239399e-06, + "loss": 0.3626, + "step": 35851 + }, + { + "epoch": 0.619505114735969, + "grad_norm": 0.777467677992426, + "learning_rate": 6.679764121818624e-06, + "loss": 0.654, + "step": 35852 + }, + { + "epoch": 0.6195223942493779, + "grad_norm": 1.127983751020762, + "learning_rate": 6.679236225797077e-06, + "loss": 0.3259, + "step": 35853 + }, + { + "epoch": 0.6195396737627868, + "grad_norm": 0.6975235110013321, + "learning_rate": 6.678708340176414e-06, + "loss": 0.2756, + "step": 35854 + }, + { + "epoch": 0.6195569532761958, + "grad_norm": 0.6869664672919515, + "learning_rate": 6.6781804649582926e-06, + "loss": 0.3107, + "step": 35855 + }, + { + "epoch": 0.6195742327896047, + "grad_norm": 0.4910718205061244, + "learning_rate": 6.677652600144355e-06, + "loss": 0.7293, + "step": 35856 + }, + { + "epoch": 0.6195915123030136, + "grad_norm": 1.0713316815492417, + "learning_rate": 6.67712474573627e-06, + "loss": 0.3932, + "step": 35857 + }, + { + "epoch": 0.6196087918164225, + "grad_norm": 0.8543564428792126, + "learning_rate": 6.676596901735677e-06, + "loss": 0.3918, + "step": 35858 + }, + { + "epoch": 0.6196260713298314, + "grad_norm": 0.8169844458741282, + "learning_rate": 6.676069068144237e-06, + "loss": 0.3593, + "step": 35859 + }, + { + "epoch": 0.6196433508432403, + "grad_norm": 1.3647231068492238, + "learning_rate": 6.675541244963601e-06, + "loss": 0.5319, + "step": 35860 + }, + { + "epoch": 0.6196606303566492, + "grad_norm": 0.9257230795737864, + "learning_rate": 6.675013432195425e-06, + "loss": 0.5393, + "step": 35861 + }, + { + "epoch": 0.6196779098700581, + "grad_norm": 0.572306486809384, + "learning_rate": 6.674485629841354e-06, + "loss": 0.7543, + "step": 35862 + }, + { + "epoch": 0.619695189383467, + "grad_norm": 1.5414680487954762, + "learning_rate": 6.673957837903053e-06, + "loss": 0.3324, + "step": 35863 + }, + { + "epoch": 0.6197124688968758, + "grad_norm": 1.0465581785047586, + "learning_rate": 6.6734300563821644e-06, + "loss": 0.4077, + "step": 35864 + }, + { + "epoch": 0.6197297484102847, + "grad_norm": 0.9731083901354823, + "learning_rate": 6.672902285280347e-06, + "loss": 0.2333, + "step": 35865 + }, + { + "epoch": 0.6197470279236936, + "grad_norm": 1.396183032081917, + "learning_rate": 6.6723745245992565e-06, + "loss": 0.3738, + "step": 35866 + }, + { + "epoch": 0.6197643074371025, + "grad_norm": 0.9831311685885951, + "learning_rate": 6.671846774340537e-06, + "loss": 0.4754, + "step": 35867 + }, + { + "epoch": 0.6197815869505114, + "grad_norm": 1.1328594978136894, + "learning_rate": 6.6713190345058474e-06, + "loss": 0.3306, + "step": 35868 + }, + { + "epoch": 0.6197988664639204, + "grad_norm": 0.9656953645362165, + "learning_rate": 6.670791305096839e-06, + "loss": 0.2781, + "step": 35869 + }, + { + "epoch": 0.6198161459773293, + "grad_norm": 0.6844098638288743, + "learning_rate": 6.670263586115166e-06, + "loss": 0.4933, + "step": 35870 + }, + { + "epoch": 0.6198334254907382, + "grad_norm": 0.9140320896964984, + "learning_rate": 6.6697358775624785e-06, + "loss": 0.4104, + "step": 35871 + }, + { + "epoch": 0.6198507050041471, + "grad_norm": 1.1676683477182308, + "learning_rate": 6.6692081794404365e-06, + "loss": 0.3682, + "step": 35872 + }, + { + "epoch": 0.619867984517556, + "grad_norm": 1.20429645302861, + "learning_rate": 6.668680491750683e-06, + "loss": 0.5027, + "step": 35873 + }, + { + "epoch": 0.6198852640309649, + "grad_norm": 1.1363207471103982, + "learning_rate": 6.668152814494877e-06, + "loss": 0.3248, + "step": 35874 + }, + { + "epoch": 0.6199025435443738, + "grad_norm": 0.9290036107828279, + "learning_rate": 6.667625147674668e-06, + "loss": 0.3958, + "step": 35875 + }, + { + "epoch": 0.6199198230577827, + "grad_norm": 1.5132285677971233, + "learning_rate": 6.667097491291712e-06, + "loss": 0.2561, + "step": 35876 + }, + { + "epoch": 0.6199371025711916, + "grad_norm": 0.9021180226261489, + "learning_rate": 6.666569845347663e-06, + "loss": 0.5072, + "step": 35877 + }, + { + "epoch": 0.6199543820846005, + "grad_norm": 0.9013960151730033, + "learning_rate": 6.666042209844166e-06, + "loss": 0.509, + "step": 35878 + }, + { + "epoch": 0.6199716615980094, + "grad_norm": 1.0545872626383392, + "learning_rate": 6.66551458478288e-06, + "loss": 0.4268, + "step": 35879 + }, + { + "epoch": 0.6199889411114183, + "grad_norm": 0.8627924790296773, + "learning_rate": 6.664986970165452e-06, + "loss": 0.3582, + "step": 35880 + }, + { + "epoch": 0.6200062206248272, + "grad_norm": 1.548799910488144, + "learning_rate": 6.664459365993545e-06, + "loss": 0.42, + "step": 35881 + }, + { + "epoch": 0.6200235001382361, + "grad_norm": 1.1168487288904925, + "learning_rate": 6.663931772268797e-06, + "loss": 0.401, + "step": 35882 + }, + { + "epoch": 0.6200407796516451, + "grad_norm": 1.0285300511491327, + "learning_rate": 6.663404188992875e-06, + "loss": 0.3589, + "step": 35883 + }, + { + "epoch": 0.620058059165054, + "grad_norm": 1.5806853607023408, + "learning_rate": 6.66287661616742e-06, + "loss": 0.3412, + "step": 35884 + }, + { + "epoch": 0.6200753386784628, + "grad_norm": 1.0326454993816736, + "learning_rate": 6.662349053794093e-06, + "loss": 0.2332, + "step": 35885 + }, + { + "epoch": 0.6200926181918717, + "grad_norm": 1.0131535706695647, + "learning_rate": 6.661821501874542e-06, + "loss": 0.3862, + "step": 35886 + }, + { + "epoch": 0.6201098977052806, + "grad_norm": 1.6032825148049432, + "learning_rate": 6.661293960410417e-06, + "loss": 0.3699, + "step": 35887 + }, + { + "epoch": 0.6201271772186895, + "grad_norm": 0.6185419611348518, + "learning_rate": 6.660766429403375e-06, + "loss": 0.3824, + "step": 35888 + }, + { + "epoch": 0.6201444567320984, + "grad_norm": 0.6378325325893102, + "learning_rate": 6.660238908855063e-06, + "loss": 0.5084, + "step": 35889 + }, + { + "epoch": 0.6201617362455073, + "grad_norm": 0.9922289996871743, + "learning_rate": 6.65971139876714e-06, + "loss": 0.5557, + "step": 35890 + }, + { + "epoch": 0.6201790157589162, + "grad_norm": 1.5708121705117621, + "learning_rate": 6.659183899141253e-06, + "loss": 0.6775, + "step": 35891 + }, + { + "epoch": 0.6201962952723251, + "grad_norm": 0.8557131369165112, + "learning_rate": 6.658656409979058e-06, + "loss": 0.3878, + "step": 35892 + }, + { + "epoch": 0.620213574785734, + "grad_norm": 1.2320118150962303, + "learning_rate": 6.658128931282204e-06, + "loss": 0.6172, + "step": 35893 + }, + { + "epoch": 0.6202308542991429, + "grad_norm": 2.0286159806677264, + "learning_rate": 6.657601463052344e-06, + "loss": 0.3755, + "step": 35894 + }, + { + "epoch": 0.6202481338125518, + "grad_norm": 1.3902621852695232, + "learning_rate": 6.657074005291128e-06, + "loss": 0.4686, + "step": 35895 + }, + { + "epoch": 0.6202654133259607, + "grad_norm": 0.9562436660987279, + "learning_rate": 6.6565465580002146e-06, + "loss": 0.461, + "step": 35896 + }, + { + "epoch": 0.6202826928393697, + "grad_norm": 1.2396084698745071, + "learning_rate": 6.656019121181252e-06, + "loss": 0.4802, + "step": 35897 + }, + { + "epoch": 0.6202999723527786, + "grad_norm": 1.3103304608927726, + "learning_rate": 6.6554916948358875e-06, + "loss": 0.2933, + "step": 35898 + }, + { + "epoch": 0.6203172518661875, + "grad_norm": 1.2261010269205908, + "learning_rate": 6.6549642789657785e-06, + "loss": 0.5032, + "step": 35899 + }, + { + "epoch": 0.6203345313795964, + "grad_norm": 0.9575940046310484, + "learning_rate": 6.654436873572574e-06, + "loss": 0.4475, + "step": 35900 + }, + { + "epoch": 0.6203518108930053, + "grad_norm": 0.7919825123712548, + "learning_rate": 6.653909478657932e-06, + "loss": 0.1434, + "step": 35901 + }, + { + "epoch": 0.6203690904064142, + "grad_norm": 1.0761626135650615, + "learning_rate": 6.653382094223494e-06, + "loss": 0.5369, + "step": 35902 + }, + { + "epoch": 0.6203863699198231, + "grad_norm": 1.1794049443391736, + "learning_rate": 6.652854720270923e-06, + "loss": 0.5199, + "step": 35903 + }, + { + "epoch": 0.620403649433232, + "grad_norm": 0.9466717861879507, + "learning_rate": 6.652327356801862e-06, + "loss": 0.261, + "step": 35904 + }, + { + "epoch": 0.6204209289466409, + "grad_norm": 1.0501525607229858, + "learning_rate": 6.6518000038179675e-06, + "loss": 0.3958, + "step": 35905 + }, + { + "epoch": 0.6204382084600497, + "grad_norm": 1.0265074097861562, + "learning_rate": 6.651272661320887e-06, + "loss": 0.2773, + "step": 35906 + }, + { + "epoch": 0.6204554879734586, + "grad_norm": 0.5950615234877223, + "learning_rate": 6.65074532931228e-06, + "loss": 0.6726, + "step": 35907 + }, + { + "epoch": 0.6204727674868675, + "grad_norm": 1.8476662148544007, + "learning_rate": 6.650218007793792e-06, + "loss": 0.3445, + "step": 35908 + }, + { + "epoch": 0.6204900470002764, + "grad_norm": 1.9329497042057313, + "learning_rate": 6.649690696767072e-06, + "loss": 0.3674, + "step": 35909 + }, + { + "epoch": 0.6205073265136853, + "grad_norm": 1.0639461780371962, + "learning_rate": 6.64916339623378e-06, + "loss": 0.5365, + "step": 35910 + }, + { + "epoch": 0.6205246060270942, + "grad_norm": 0.9876381498065293, + "learning_rate": 6.648636106195558e-06, + "loss": 0.2909, + "step": 35911 + }, + { + "epoch": 0.6205418855405032, + "grad_norm": 0.9937340211216866, + "learning_rate": 6.648108826654069e-06, + "loss": 0.3427, + "step": 35912 + }, + { + "epoch": 0.6205591650539121, + "grad_norm": 0.6556573370188843, + "learning_rate": 6.647581557610952e-06, + "loss": 0.9045, + "step": 35913 + }, + { + "epoch": 0.620576444567321, + "grad_norm": 1.1343245223415126, + "learning_rate": 6.6470542990678674e-06, + "loss": 0.2072, + "step": 35914 + }, + { + "epoch": 0.6205937240807299, + "grad_norm": 1.1643348254778179, + "learning_rate": 6.646527051026461e-06, + "loss": 0.4215, + "step": 35915 + }, + { + "epoch": 0.6206110035941388, + "grad_norm": 1.4852773483475716, + "learning_rate": 6.645999813488389e-06, + "loss": 0.5448, + "step": 35916 + }, + { + "epoch": 0.6206282831075477, + "grad_norm": 1.0992520307899176, + "learning_rate": 6.645472586455302e-06, + "loss": 0.2742, + "step": 35917 + }, + { + "epoch": 0.6206455626209566, + "grad_norm": 0.906810455411708, + "learning_rate": 6.6449453699288456e-06, + "loss": 0.352, + "step": 35918 + }, + { + "epoch": 0.6206628421343655, + "grad_norm": 0.8563717540884029, + "learning_rate": 6.6444181639106775e-06, + "loss": 0.5173, + "step": 35919 + }, + { + "epoch": 0.6206801216477744, + "grad_norm": 1.1036502282163811, + "learning_rate": 6.643890968402445e-06, + "loss": 0.4427, + "step": 35920 + }, + { + "epoch": 0.6206974011611833, + "grad_norm": 1.2233710061163194, + "learning_rate": 6.643363783405805e-06, + "loss": 0.2591, + "step": 35921 + }, + { + "epoch": 0.6207146806745922, + "grad_norm": 0.4825845708766543, + "learning_rate": 6.642836608922399e-06, + "loss": 0.2694, + "step": 35922 + }, + { + "epoch": 0.6207319601880011, + "grad_norm": 0.47930336452348987, + "learning_rate": 6.642309444953889e-06, + "loss": 0.8051, + "step": 35923 + }, + { + "epoch": 0.62074923970141, + "grad_norm": 1.226412366858507, + "learning_rate": 6.6417822915019194e-06, + "loss": 0.4991, + "step": 35924 + }, + { + "epoch": 0.620766519214819, + "grad_norm": 0.7124675264941603, + "learning_rate": 6.641255148568142e-06, + "loss": 0.1789, + "step": 35925 + }, + { + "epoch": 0.6207837987282279, + "grad_norm": 0.6869471376808015, + "learning_rate": 6.640728016154207e-06, + "loss": 0.2645, + "step": 35926 + }, + { + "epoch": 0.6208010782416367, + "grad_norm": 1.1819695035782212, + "learning_rate": 6.640200894261773e-06, + "loss": 0.4001, + "step": 35927 + }, + { + "epoch": 0.6208183577550456, + "grad_norm": 1.215521443055368, + "learning_rate": 6.6396737828924815e-06, + "loss": 0.2783, + "step": 35928 + }, + { + "epoch": 0.6208356372684545, + "grad_norm": 1.090450454019698, + "learning_rate": 6.639146682047986e-06, + "loss": 0.5014, + "step": 35929 + }, + { + "epoch": 0.6208529167818634, + "grad_norm": 0.8223636510282266, + "learning_rate": 6.63861959172994e-06, + "loss": 0.863, + "step": 35930 + }, + { + "epoch": 0.6208701962952723, + "grad_norm": 1.2700806839753915, + "learning_rate": 6.638092511939992e-06, + "loss": 0.5198, + "step": 35931 + }, + { + "epoch": 0.6208874758086812, + "grad_norm": 0.896844411877372, + "learning_rate": 6.637565442679796e-06, + "loss": 0.3934, + "step": 35932 + }, + { + "epoch": 0.6209047553220901, + "grad_norm": 1.2159596704845175, + "learning_rate": 6.637038383950998e-06, + "loss": 0.4518, + "step": 35933 + }, + { + "epoch": 0.620922034835499, + "grad_norm": 0.824172577955353, + "learning_rate": 6.636511335755254e-06, + "loss": 0.3557, + "step": 35934 + }, + { + "epoch": 0.6209393143489079, + "grad_norm": 1.1781218575635113, + "learning_rate": 6.635984298094207e-06, + "loss": 0.4704, + "step": 35935 + }, + { + "epoch": 0.6209565938623168, + "grad_norm": 1.5514236489395528, + "learning_rate": 6.635457270969518e-06, + "loss": 0.4982, + "step": 35936 + }, + { + "epoch": 0.6209738733757257, + "grad_norm": 1.120553370735426, + "learning_rate": 6.634930254382829e-06, + "loss": 0.5122, + "step": 35937 + }, + { + "epoch": 0.6209911528891346, + "grad_norm": 0.7905691024324061, + "learning_rate": 6.634403248335798e-06, + "loss": 0.3615, + "step": 35938 + }, + { + "epoch": 0.6210084324025436, + "grad_norm": 1.180867460822661, + "learning_rate": 6.63387625283007e-06, + "loss": 0.3935, + "step": 35939 + }, + { + "epoch": 0.6210257119159525, + "grad_norm": 1.5159172044470057, + "learning_rate": 6.633349267867296e-06, + "loss": 0.6435, + "step": 35940 + }, + { + "epoch": 0.6210429914293614, + "grad_norm": 0.748610455362535, + "learning_rate": 6.632822293449131e-06, + "loss": 0.2188, + "step": 35941 + }, + { + "epoch": 0.6210602709427703, + "grad_norm": 0.7760036722018401, + "learning_rate": 6.632295329577218e-06, + "loss": 0.1987, + "step": 35942 + }, + { + "epoch": 0.6210775504561792, + "grad_norm": 1.1170175202247858, + "learning_rate": 6.631768376253216e-06, + "loss": 0.2717, + "step": 35943 + }, + { + "epoch": 0.6210948299695881, + "grad_norm": 1.2364753449475212, + "learning_rate": 6.631241433478769e-06, + "loss": 0.503, + "step": 35944 + }, + { + "epoch": 0.621112109482997, + "grad_norm": 0.7638701127920792, + "learning_rate": 6.630714501255531e-06, + "loss": 0.1751, + "step": 35945 + }, + { + "epoch": 0.6211293889964059, + "grad_norm": 0.6970045990815787, + "learning_rate": 6.63018757958515e-06, + "loss": 0.8386, + "step": 35946 + }, + { + "epoch": 0.6211466685098148, + "grad_norm": 1.2247363868332897, + "learning_rate": 6.629660668469281e-06, + "loss": 0.2401, + "step": 35947 + }, + { + "epoch": 0.6211639480232236, + "grad_norm": 1.0484330025707584, + "learning_rate": 6.629133767909569e-06, + "loss": 0.4541, + "step": 35948 + }, + { + "epoch": 0.6211812275366325, + "grad_norm": 1.2266718078190977, + "learning_rate": 6.628606877907664e-06, + "loss": 0.3132, + "step": 35949 + }, + { + "epoch": 0.6211985070500414, + "grad_norm": 0.7839964706578921, + "learning_rate": 6.628079998465221e-06, + "loss": 0.7068, + "step": 35950 + }, + { + "epoch": 0.6212157865634503, + "grad_norm": 1.1325657775188096, + "learning_rate": 6.627553129583886e-06, + "loss": 0.389, + "step": 35951 + }, + { + "epoch": 0.6212330660768592, + "grad_norm": 1.5660935137796197, + "learning_rate": 6.627026271265313e-06, + "loss": 0.5522, + "step": 35952 + }, + { + "epoch": 0.6212503455902681, + "grad_norm": 1.6724691398330096, + "learning_rate": 6.626499423511148e-06, + "loss": 0.2883, + "step": 35953 + }, + { + "epoch": 0.6212676251036771, + "grad_norm": 1.0570717267818073, + "learning_rate": 6.625972586323044e-06, + "loss": 0.5138, + "step": 35954 + }, + { + "epoch": 0.621284904617086, + "grad_norm": 1.2365976088484727, + "learning_rate": 6.625445759702649e-06, + "loss": 0.6176, + "step": 35955 + }, + { + "epoch": 0.6213021841304949, + "grad_norm": 1.212747850844853, + "learning_rate": 6.624918943651617e-06, + "loss": 0.467, + "step": 35956 + }, + { + "epoch": 0.6213194636439038, + "grad_norm": 1.0996793291846323, + "learning_rate": 6.624392138171592e-06, + "loss": 0.4755, + "step": 35957 + }, + { + "epoch": 0.6213367431573127, + "grad_norm": 0.71399882697158, + "learning_rate": 6.623865343264232e-06, + "loss": 0.4762, + "step": 35958 + }, + { + "epoch": 0.6213540226707216, + "grad_norm": 0.9447665766234191, + "learning_rate": 6.623338558931181e-06, + "loss": 0.7238, + "step": 35959 + }, + { + "epoch": 0.6213713021841305, + "grad_norm": 1.0065914588200495, + "learning_rate": 6.622811785174086e-06, + "loss": 0.3291, + "step": 35960 + }, + { + "epoch": 0.6213885816975394, + "grad_norm": 1.1598664370093303, + "learning_rate": 6.622285021994608e-06, + "loss": 0.2486, + "step": 35961 + }, + { + "epoch": 0.6214058612109483, + "grad_norm": 0.7042449488771214, + "learning_rate": 6.621758269394382e-06, + "loss": 0.6975, + "step": 35962 + }, + { + "epoch": 0.6214231407243572, + "grad_norm": 1.2648920588935944, + "learning_rate": 6.621231527375071e-06, + "loss": 0.4547, + "step": 35963 + }, + { + "epoch": 0.6214404202377661, + "grad_norm": 0.8781533666166037, + "learning_rate": 6.620704795938318e-06, + "loss": 0.4695, + "step": 35964 + }, + { + "epoch": 0.621457699751175, + "grad_norm": 0.5381401429071517, + "learning_rate": 6.6201780750857756e-06, + "loss": 0.7006, + "step": 35965 + }, + { + "epoch": 0.621474979264584, + "grad_norm": 1.6625637489647336, + "learning_rate": 6.619651364819091e-06, + "loss": 0.5545, + "step": 35966 + }, + { + "epoch": 0.6214922587779929, + "grad_norm": 0.8531472514668167, + "learning_rate": 6.619124665139918e-06, + "loss": 0.3061, + "step": 35967 + }, + { + "epoch": 0.6215095382914018, + "grad_norm": 1.0695392971333728, + "learning_rate": 6.618597976049902e-06, + "loss": 0.4128, + "step": 35968 + }, + { + "epoch": 0.6215268178048107, + "grad_norm": 1.2008534962577861, + "learning_rate": 6.618071297550692e-06, + "loss": 0.5016, + "step": 35969 + }, + { + "epoch": 0.6215440973182195, + "grad_norm": 0.9782101123964919, + "learning_rate": 6.617544629643941e-06, + "loss": 0.3626, + "step": 35970 + }, + { + "epoch": 0.6215613768316284, + "grad_norm": 1.7943177965803845, + "learning_rate": 6.6170179723312965e-06, + "loss": 0.5353, + "step": 35971 + }, + { + "epoch": 0.6215786563450373, + "grad_norm": 1.0421529015057298, + "learning_rate": 6.616491325614412e-06, + "loss": 0.4181, + "step": 35972 + }, + { + "epoch": 0.6215959358584462, + "grad_norm": 1.1221938023906217, + "learning_rate": 6.6159646894949315e-06, + "loss": 0.4201, + "step": 35973 + }, + { + "epoch": 0.6216132153718551, + "grad_norm": 0.9932351214788443, + "learning_rate": 6.615438063974506e-06, + "loss": 0.413, + "step": 35974 + }, + { + "epoch": 0.621630494885264, + "grad_norm": 1.230973852716894, + "learning_rate": 6.614911449054786e-06, + "loss": 0.3473, + "step": 35975 + }, + { + "epoch": 0.6216477743986729, + "grad_norm": 1.0358232386305226, + "learning_rate": 6.614384844737421e-06, + "loss": 0.45, + "step": 35976 + }, + { + "epoch": 0.6216650539120818, + "grad_norm": 0.7611101866942124, + "learning_rate": 6.613858251024059e-06, + "loss": 0.445, + "step": 35977 + }, + { + "epoch": 0.6216823334254907, + "grad_norm": 0.9966542803300191, + "learning_rate": 6.613331667916354e-06, + "loss": 0.4445, + "step": 35978 + }, + { + "epoch": 0.6216996129388996, + "grad_norm": 1.5483381392166737, + "learning_rate": 6.61280509541595e-06, + "loss": 0.2642, + "step": 35979 + }, + { + "epoch": 0.6217168924523085, + "grad_norm": 1.4557279164287051, + "learning_rate": 6.612278533524494e-06, + "loss": 0.307, + "step": 35980 + }, + { + "epoch": 0.6217341719657175, + "grad_norm": 1.334771463162043, + "learning_rate": 6.611751982243646e-06, + "loss": 0.3642, + "step": 35981 + }, + { + "epoch": 0.6217514514791264, + "grad_norm": 0.9923632306033776, + "learning_rate": 6.61122544157504e-06, + "loss": 0.3002, + "step": 35982 + }, + { + "epoch": 0.6217687309925353, + "grad_norm": 0.6371629904688217, + "learning_rate": 6.610698911520341e-06, + "loss": 0.9637, + "step": 35983 + }, + { + "epoch": 0.6217860105059442, + "grad_norm": 1.0782008530357594, + "learning_rate": 6.610172392081185e-06, + "loss": 0.256, + "step": 35984 + }, + { + "epoch": 0.6218032900193531, + "grad_norm": 0.7474473398400361, + "learning_rate": 6.60964588325923e-06, + "loss": 0.3496, + "step": 35985 + }, + { + "epoch": 0.621820569532762, + "grad_norm": 0.8126937163754493, + "learning_rate": 6.60911938505612e-06, + "loss": 0.5906, + "step": 35986 + }, + { + "epoch": 0.6218378490461709, + "grad_norm": 1.025919894040197, + "learning_rate": 6.608592897473509e-06, + "loss": 0.3677, + "step": 35987 + }, + { + "epoch": 0.6218551285595798, + "grad_norm": 1.1983649239179892, + "learning_rate": 6.608066420513038e-06, + "loss": 0.34, + "step": 35988 + }, + { + "epoch": 0.6218724080729887, + "grad_norm": 1.2805173317066363, + "learning_rate": 6.607539954176363e-06, + "loss": 0.386, + "step": 35989 + }, + { + "epoch": 0.6218896875863976, + "grad_norm": 1.097389283592514, + "learning_rate": 6.607013498465131e-06, + "loss": 0.3344, + "step": 35990 + }, + { + "epoch": 0.6219069670998064, + "grad_norm": 1.2507073510367654, + "learning_rate": 6.60648705338099e-06, + "loss": 0.4459, + "step": 35991 + }, + { + "epoch": 0.6219242466132153, + "grad_norm": 0.8023512464668391, + "learning_rate": 6.605960618925592e-06, + "loss": 0.2334, + "step": 35992 + }, + { + "epoch": 0.6219415261266242, + "grad_norm": 1.1600210469085706, + "learning_rate": 6.605434195100579e-06, + "loss": 0.4169, + "step": 35993 + }, + { + "epoch": 0.6219588056400331, + "grad_norm": 1.436208774522786, + "learning_rate": 6.604907781907608e-06, + "loss": 0.3384, + "step": 35994 + }, + { + "epoch": 0.621976085153442, + "grad_norm": 1.1864921788448497, + "learning_rate": 6.604381379348319e-06, + "loss": 0.4261, + "step": 35995 + }, + { + "epoch": 0.621993364666851, + "grad_norm": 0.9395172253132673, + "learning_rate": 6.603854987424369e-06, + "loss": 0.3458, + "step": 35996 + }, + { + "epoch": 0.6220106441802599, + "grad_norm": 0.9608242233296456, + "learning_rate": 6.603328606137402e-06, + "loss": 0.3054, + "step": 35997 + }, + { + "epoch": 0.6220279236936688, + "grad_norm": 1.033380561089382, + "learning_rate": 6.602802235489072e-06, + "loss": 0.3601, + "step": 35998 + }, + { + "epoch": 0.6220452032070777, + "grad_norm": 0.724614000598279, + "learning_rate": 6.602275875481021e-06, + "loss": 0.8682, + "step": 35999 + }, + { + "epoch": 0.6220624827204866, + "grad_norm": 0.8439567217344282, + "learning_rate": 6.601749526114898e-06, + "loss": 0.4546, + "step": 36000 + }, + { + "epoch": 0.6220797622338955, + "grad_norm": 1.3447453861913756, + "learning_rate": 6.601223187392358e-06, + "loss": 0.3329, + "step": 36001 + }, + { + "epoch": 0.6220970417473044, + "grad_norm": 1.3126773143257042, + "learning_rate": 6.6006968593150386e-06, + "loss": 0.3337, + "step": 36002 + }, + { + "epoch": 0.6221143212607133, + "grad_norm": 1.0740011300114263, + "learning_rate": 6.600170541884601e-06, + "loss": 0.2047, + "step": 36003 + }, + { + "epoch": 0.6221316007741222, + "grad_norm": 1.271626606551234, + "learning_rate": 6.599644235102685e-06, + "loss": 0.2519, + "step": 36004 + }, + { + "epoch": 0.6221488802875311, + "grad_norm": 1.0157441800396745, + "learning_rate": 6.5991179389709425e-06, + "loss": 0.4173, + "step": 36005 + }, + { + "epoch": 0.62216615980094, + "grad_norm": 2.0194710377434006, + "learning_rate": 6.598591653491019e-06, + "loss": 0.4094, + "step": 36006 + }, + { + "epoch": 0.6221834393143489, + "grad_norm": 1.1934983946717284, + "learning_rate": 6.5980653786645685e-06, + "loss": 0.4085, + "step": 36007 + }, + { + "epoch": 0.6222007188277578, + "grad_norm": 1.134015372103851, + "learning_rate": 6.597539114493233e-06, + "loss": 0.355, + "step": 36008 + }, + { + "epoch": 0.6222179983411668, + "grad_norm": 0.6314385973403764, + "learning_rate": 6.5970128609786654e-06, + "loss": 0.5245, + "step": 36009 + }, + { + "epoch": 0.6222352778545757, + "grad_norm": 0.7891513962178895, + "learning_rate": 6.596486618122512e-06, + "loss": 0.344, + "step": 36010 + }, + { + "epoch": 0.6222525573679846, + "grad_norm": 1.0877419606862209, + "learning_rate": 6.595960385926418e-06, + "loss": 0.3365, + "step": 36011 + }, + { + "epoch": 0.6222698368813934, + "grad_norm": 1.0030540985362553, + "learning_rate": 6.59543416439204e-06, + "loss": 0.3078, + "step": 36012 + }, + { + "epoch": 0.6222871163948023, + "grad_norm": 0.9414645285711006, + "learning_rate": 6.594907953521017e-06, + "loss": 0.377, + "step": 36013 + }, + { + "epoch": 0.6223043959082112, + "grad_norm": 0.6751216790656295, + "learning_rate": 6.594381753315003e-06, + "loss": 0.3618, + "step": 36014 + }, + { + "epoch": 0.6223216754216201, + "grad_norm": 1.317932044440312, + "learning_rate": 6.593855563775642e-06, + "loss": 0.3129, + "step": 36015 + }, + { + "epoch": 0.622338954935029, + "grad_norm": 1.4399325470990516, + "learning_rate": 6.593329384904586e-06, + "loss": 0.482, + "step": 36016 + }, + { + "epoch": 0.6223562344484379, + "grad_norm": 1.4135885303966458, + "learning_rate": 6.592803216703481e-06, + "loss": 0.4129, + "step": 36017 + }, + { + "epoch": 0.6223735139618468, + "grad_norm": 1.2223706884255778, + "learning_rate": 6.5922770591739775e-06, + "loss": 0.4764, + "step": 36018 + }, + { + "epoch": 0.6223907934752557, + "grad_norm": 0.751285716281821, + "learning_rate": 6.591750912317716e-06, + "loss": 0.7719, + "step": 36019 + }, + { + "epoch": 0.6224080729886646, + "grad_norm": 0.5280944144855779, + "learning_rate": 6.5912247761363534e-06, + "loss": 0.5802, + "step": 36020 + }, + { + "epoch": 0.6224253525020735, + "grad_norm": 1.023196884363493, + "learning_rate": 6.590698650631535e-06, + "loss": 0.5135, + "step": 36021 + }, + { + "epoch": 0.6224426320154824, + "grad_norm": 0.9505588397769933, + "learning_rate": 6.590172535804903e-06, + "loss": 0.2973, + "step": 36022 + }, + { + "epoch": 0.6224599115288914, + "grad_norm": 1.0412338987994176, + "learning_rate": 6.589646431658114e-06, + "loss": 0.446, + "step": 36023 + }, + { + "epoch": 0.6224771910423003, + "grad_norm": 1.1756700002998415, + "learning_rate": 6.589120338192809e-06, + "loss": 0.3622, + "step": 36024 + }, + { + "epoch": 0.6224944705557092, + "grad_norm": 1.4020002138396903, + "learning_rate": 6.58859425541064e-06, + "loss": 0.3424, + "step": 36025 + }, + { + "epoch": 0.6225117500691181, + "grad_norm": 1.1694330804038593, + "learning_rate": 6.58806818331325e-06, + "loss": 0.485, + "step": 36026 + }, + { + "epoch": 0.622529029582527, + "grad_norm": 1.7262326810667532, + "learning_rate": 6.587542121902295e-06, + "loss": 0.5772, + "step": 36027 + }, + { + "epoch": 0.6225463090959359, + "grad_norm": 1.2965398176656722, + "learning_rate": 6.587016071179413e-06, + "loss": 0.4305, + "step": 36028 + }, + { + "epoch": 0.6225635886093448, + "grad_norm": 1.9348744157534763, + "learning_rate": 6.5864900311462575e-06, + "loss": 0.4713, + "step": 36029 + }, + { + "epoch": 0.6225808681227537, + "grad_norm": 0.8962274427141674, + "learning_rate": 6.585964001804476e-06, + "loss": 0.4267, + "step": 36030 + }, + { + "epoch": 0.6225981476361626, + "grad_norm": 0.8294589790864424, + "learning_rate": 6.585437983155712e-06, + "loss": 0.3029, + "step": 36031 + }, + { + "epoch": 0.6226154271495715, + "grad_norm": 1.1081774227235635, + "learning_rate": 6.584911975201619e-06, + "loss": 0.4131, + "step": 36032 + }, + { + "epoch": 0.6226327066629803, + "grad_norm": 1.0318077704833717, + "learning_rate": 6.584385977943838e-06, + "loss": 0.4191, + "step": 36033 + }, + { + "epoch": 0.6226499861763892, + "grad_norm": 1.864279212770532, + "learning_rate": 6.583859991384021e-06, + "loss": 0.4086, + "step": 36034 + }, + { + "epoch": 0.6226672656897981, + "grad_norm": 1.0253249842564727, + "learning_rate": 6.583334015523812e-06, + "loss": 0.3633, + "step": 36035 + }, + { + "epoch": 0.622684545203207, + "grad_norm": 0.6035212729423987, + "learning_rate": 6.582808050364864e-06, + "loss": 0.7685, + "step": 36036 + }, + { + "epoch": 0.622701824716616, + "grad_norm": 1.34118536198852, + "learning_rate": 6.5822820959088185e-06, + "loss": 0.4372, + "step": 36037 + }, + { + "epoch": 0.6227191042300249, + "grad_norm": 0.9380027778090901, + "learning_rate": 6.581756152157328e-06, + "loss": 0.3147, + "step": 36038 + }, + { + "epoch": 0.6227363837434338, + "grad_norm": 1.3591280052977333, + "learning_rate": 6.581230219112034e-06, + "loss": 0.4101, + "step": 36039 + }, + { + "epoch": 0.6227536632568427, + "grad_norm": 2.1799643232532073, + "learning_rate": 6.580704296774589e-06, + "loss": 0.2066, + "step": 36040 + }, + { + "epoch": 0.6227709427702516, + "grad_norm": 1.183197222416022, + "learning_rate": 6.580178385146638e-06, + "loss": 0.4999, + "step": 36041 + }, + { + "epoch": 0.6227882222836605, + "grad_norm": 2.0751952894085464, + "learning_rate": 6.579652484229826e-06, + "loss": 0.2846, + "step": 36042 + }, + { + "epoch": 0.6228055017970694, + "grad_norm": 1.168902017938444, + "learning_rate": 6.579126594025804e-06, + "loss": 0.3008, + "step": 36043 + }, + { + "epoch": 0.6228227813104783, + "grad_norm": 1.32498677286577, + "learning_rate": 6.578600714536217e-06, + "loss": 0.281, + "step": 36044 + }, + { + "epoch": 0.6228400608238872, + "grad_norm": 1.4510503939025985, + "learning_rate": 6.5780748457627115e-06, + "loss": 0.4366, + "step": 36045 + }, + { + "epoch": 0.6228573403372961, + "grad_norm": 1.1209193915013325, + "learning_rate": 6.577548987706936e-06, + "loss": 0.4232, + "step": 36046 + }, + { + "epoch": 0.622874619850705, + "grad_norm": 0.6565218508995839, + "learning_rate": 6.57702314037054e-06, + "loss": 0.415, + "step": 36047 + }, + { + "epoch": 0.6228918993641139, + "grad_norm": 1.7467420594338534, + "learning_rate": 6.576497303755165e-06, + "loss": 0.353, + "step": 36048 + }, + { + "epoch": 0.6229091788775228, + "grad_norm": 1.9099527981813358, + "learning_rate": 6.575971477862462e-06, + "loss": 0.4379, + "step": 36049 + }, + { + "epoch": 0.6229264583909317, + "grad_norm": 1.1277037118183857, + "learning_rate": 6.575445662694074e-06, + "loss": 0.4477, + "step": 36050 + }, + { + "epoch": 0.6229437379043407, + "grad_norm": 1.103149014904585, + "learning_rate": 6.574919858251652e-06, + "loss": 0.3266, + "step": 36051 + }, + { + "epoch": 0.6229610174177496, + "grad_norm": 0.7816305448030798, + "learning_rate": 6.574394064536844e-06, + "loss": 0.2988, + "step": 36052 + }, + { + "epoch": 0.6229782969311585, + "grad_norm": 1.254981718424641, + "learning_rate": 6.57386828155129e-06, + "loss": 0.4843, + "step": 36053 + }, + { + "epoch": 0.6229955764445673, + "grad_norm": 0.9176438297576176, + "learning_rate": 6.573342509296643e-06, + "loss": 0.3582, + "step": 36054 + }, + { + "epoch": 0.6230128559579762, + "grad_norm": 1.4662394232691756, + "learning_rate": 6.572816747774546e-06, + "loss": 0.3811, + "step": 36055 + }, + { + "epoch": 0.6230301354713851, + "grad_norm": 1.40205838734722, + "learning_rate": 6.572290996986649e-06, + "loss": 0.4517, + "step": 36056 + }, + { + "epoch": 0.623047414984794, + "grad_norm": 0.9332683262989754, + "learning_rate": 6.571765256934595e-06, + "loss": 0.3524, + "step": 36057 + }, + { + "epoch": 0.6230646944982029, + "grad_norm": 0.7788932261512052, + "learning_rate": 6.571239527620037e-06, + "loss": 0.4984, + "step": 36058 + }, + { + "epoch": 0.6230819740116118, + "grad_norm": 1.3509123544018566, + "learning_rate": 6.570713809044613e-06, + "loss": 0.2669, + "step": 36059 + }, + { + "epoch": 0.6230992535250207, + "grad_norm": 0.7092872640440887, + "learning_rate": 6.570188101209976e-06, + "loss": 0.6774, + "step": 36060 + }, + { + "epoch": 0.6231165330384296, + "grad_norm": 1.6275907682853716, + "learning_rate": 6.5696624041177735e-06, + "loss": 0.5848, + "step": 36061 + }, + { + "epoch": 0.6231338125518385, + "grad_norm": 1.0395174561509959, + "learning_rate": 6.569136717769644e-06, + "loss": 0.3153, + "step": 36062 + }, + { + "epoch": 0.6231510920652474, + "grad_norm": 1.0997937467789067, + "learning_rate": 6.568611042167241e-06, + "loss": 0.4297, + "step": 36063 + }, + { + "epoch": 0.6231683715786563, + "grad_norm": 1.4674614616041128, + "learning_rate": 6.5680853773122075e-06, + "loss": 0.4158, + "step": 36064 + }, + { + "epoch": 0.6231856510920653, + "grad_norm": 0.9305153794297549, + "learning_rate": 6.567559723206194e-06, + "loss": 0.281, + "step": 36065 + }, + { + "epoch": 0.6232029306054742, + "grad_norm": 1.090296075981434, + "learning_rate": 6.567034079850842e-06, + "loss": 0.4713, + "step": 36066 + }, + { + "epoch": 0.6232202101188831, + "grad_norm": 1.047792757638116, + "learning_rate": 6.566508447247803e-06, + "loss": 0.471, + "step": 36067 + }, + { + "epoch": 0.623237489632292, + "grad_norm": 1.5375236400351036, + "learning_rate": 6.565982825398718e-06, + "loss": 0.3961, + "step": 36068 + }, + { + "epoch": 0.6232547691457009, + "grad_norm": 1.4074285472606947, + "learning_rate": 6.565457214305236e-06, + "loss": 0.515, + "step": 36069 + }, + { + "epoch": 0.6232720486591098, + "grad_norm": 0.9365205984601308, + "learning_rate": 6.564931613969003e-06, + "loss": 0.2835, + "step": 36070 + }, + { + "epoch": 0.6232893281725187, + "grad_norm": 1.2723985633209476, + "learning_rate": 6.5644060243916655e-06, + "loss": 0.3778, + "step": 36071 + }, + { + "epoch": 0.6233066076859276, + "grad_norm": 0.819926606121455, + "learning_rate": 6.563880445574873e-06, + "loss": 0.2265, + "step": 36072 + }, + { + "epoch": 0.6233238871993365, + "grad_norm": 0.7705337503991985, + "learning_rate": 6.563354877520262e-06, + "loss": 0.3192, + "step": 36073 + }, + { + "epoch": 0.6233411667127454, + "grad_norm": 1.0906523109355155, + "learning_rate": 6.562829320229489e-06, + "loss": 0.4282, + "step": 36074 + }, + { + "epoch": 0.6233584462261542, + "grad_norm": 1.0614596916356493, + "learning_rate": 6.5623037737041926e-06, + "loss": 0.471, + "step": 36075 + }, + { + "epoch": 0.6233757257395631, + "grad_norm": 1.1930950369349111, + "learning_rate": 6.561778237946024e-06, + "loss": 0.3881, + "step": 36076 + }, + { + "epoch": 0.623393005252972, + "grad_norm": 1.4604084123331742, + "learning_rate": 6.561252712956625e-06, + "loss": 0.2542, + "step": 36077 + }, + { + "epoch": 0.6234102847663809, + "grad_norm": 1.0114605478855778, + "learning_rate": 6.560727198737648e-06, + "loss": 0.4164, + "step": 36078 + }, + { + "epoch": 0.6234275642797898, + "grad_norm": 0.9391851939599032, + "learning_rate": 6.560201695290732e-06, + "loss": 0.3511, + "step": 36079 + }, + { + "epoch": 0.6234448437931988, + "grad_norm": 1.2027333237215767, + "learning_rate": 6.559676202617527e-06, + "loss": 0.3128, + "step": 36080 + }, + { + "epoch": 0.6234621233066077, + "grad_norm": 0.7930917102732709, + "learning_rate": 6.559150720719676e-06, + "loss": 0.599, + "step": 36081 + }, + { + "epoch": 0.6234794028200166, + "grad_norm": 0.8594057866441435, + "learning_rate": 6.558625249598829e-06, + "loss": 0.4295, + "step": 36082 + }, + { + "epoch": 0.6234966823334255, + "grad_norm": 1.0165554598947435, + "learning_rate": 6.558099789256629e-06, + "loss": 0.4993, + "step": 36083 + }, + { + "epoch": 0.6235139618468344, + "grad_norm": 1.5563468139595926, + "learning_rate": 6.5575743396947185e-06, + "loss": 0.4839, + "step": 36084 + }, + { + "epoch": 0.6235312413602433, + "grad_norm": 1.4503568151946087, + "learning_rate": 6.557048900914749e-06, + "loss": 0.4562, + "step": 36085 + }, + { + "epoch": 0.6235485208736522, + "grad_norm": 1.3364058777163104, + "learning_rate": 6.556523472918363e-06, + "loss": 0.3424, + "step": 36086 + }, + { + "epoch": 0.6235658003870611, + "grad_norm": 1.180559749372897, + "learning_rate": 6.55599805570721e-06, + "loss": 0.2708, + "step": 36087 + }, + { + "epoch": 0.62358307990047, + "grad_norm": 1.535010370625483, + "learning_rate": 6.5554726492829306e-06, + "loss": 0.3582, + "step": 36088 + }, + { + "epoch": 0.6236003594138789, + "grad_norm": 2.3147482174624616, + "learning_rate": 6.554947253647173e-06, + "loss": 0.3707, + "step": 36089 + }, + { + "epoch": 0.6236176389272878, + "grad_norm": 1.4767552020761976, + "learning_rate": 6.554421868801579e-06, + "loss": 0.432, + "step": 36090 + }, + { + "epoch": 0.6236349184406967, + "grad_norm": 1.5967119864150892, + "learning_rate": 6.5538964947478024e-06, + "loss": 0.3612, + "step": 36091 + }, + { + "epoch": 0.6236521979541056, + "grad_norm": 1.1845453165474715, + "learning_rate": 6.553371131487485e-06, + "loss": 0.4615, + "step": 36092 + }, + { + "epoch": 0.6236694774675146, + "grad_norm": 0.9638765930877974, + "learning_rate": 6.552845779022268e-06, + "loss": 0.4453, + "step": 36093 + }, + { + "epoch": 0.6236867569809235, + "grad_norm": 1.4935287085870903, + "learning_rate": 6.552320437353801e-06, + "loss": 0.2363, + "step": 36094 + }, + { + "epoch": 0.6237040364943324, + "grad_norm": 1.1311535014212724, + "learning_rate": 6.551795106483725e-06, + "loss": 0.3379, + "step": 36095 + }, + { + "epoch": 0.6237213160077412, + "grad_norm": 0.9197823970350665, + "learning_rate": 6.551269786413692e-06, + "loss": 0.3066, + "step": 36096 + }, + { + "epoch": 0.6237385955211501, + "grad_norm": 1.2810253879778966, + "learning_rate": 6.550744477145343e-06, + "loss": 0.4634, + "step": 36097 + }, + { + "epoch": 0.623755875034559, + "grad_norm": 1.461304662864444, + "learning_rate": 6.550219178680328e-06, + "loss": 0.2857, + "step": 36098 + }, + { + "epoch": 0.6237731545479679, + "grad_norm": 0.6284599108807983, + "learning_rate": 6.549693891020283e-06, + "loss": 0.339, + "step": 36099 + }, + { + "epoch": 0.6237904340613768, + "grad_norm": 1.265344238792801, + "learning_rate": 6.549168614166863e-06, + "loss": 0.5125, + "step": 36100 + }, + { + "epoch": 0.6238077135747857, + "grad_norm": 1.0112875016599665, + "learning_rate": 6.548643348121707e-06, + "loss": 0.3406, + "step": 36101 + }, + { + "epoch": 0.6238249930881946, + "grad_norm": 1.3071742244350404, + "learning_rate": 6.548118092886467e-06, + "loss": 0.242, + "step": 36102 + }, + { + "epoch": 0.6238422726016035, + "grad_norm": 1.2637565379924798, + "learning_rate": 6.547592848462779e-06, + "loss": 0.5442, + "step": 36103 + }, + { + "epoch": 0.6238595521150124, + "grad_norm": 0.8618369465755165, + "learning_rate": 6.547067614852291e-06, + "loss": 0.5674, + "step": 36104 + }, + { + "epoch": 0.6238768316284213, + "grad_norm": 1.1878753176301424, + "learning_rate": 6.546542392056654e-06, + "loss": 0.4364, + "step": 36105 + }, + { + "epoch": 0.6238941111418302, + "grad_norm": 0.5482875942694624, + "learning_rate": 6.546017180077505e-06, + "loss": 0.6741, + "step": 36106 + }, + { + "epoch": 0.6239113906552392, + "grad_norm": 0.8605243879528477, + "learning_rate": 6.545491978916497e-06, + "loss": 0.5294, + "step": 36107 + }, + { + "epoch": 0.6239286701686481, + "grad_norm": 1.682706909760519, + "learning_rate": 6.544966788575267e-06, + "loss": 0.3015, + "step": 36108 + }, + { + "epoch": 0.623945949682057, + "grad_norm": 2.1174881076096703, + "learning_rate": 6.5444416090554654e-06, + "loss": 0.4001, + "step": 36109 + }, + { + "epoch": 0.6239632291954659, + "grad_norm": 1.2439003307028496, + "learning_rate": 6.543916440358733e-06, + "loss": 0.4667, + "step": 36110 + }, + { + "epoch": 0.6239805087088748, + "grad_norm": 0.690948950154179, + "learning_rate": 6.543391282486719e-06, + "loss": 0.4254, + "step": 36111 + }, + { + "epoch": 0.6239977882222837, + "grad_norm": 1.1877847372238706, + "learning_rate": 6.542866135441064e-06, + "loss": 0.4395, + "step": 36112 + }, + { + "epoch": 0.6240150677356926, + "grad_norm": 1.1500519379600778, + "learning_rate": 6.5423409992234195e-06, + "loss": 0.3667, + "step": 36113 + }, + { + "epoch": 0.6240323472491015, + "grad_norm": 1.0371969422983898, + "learning_rate": 6.541815873835423e-06, + "loss": 0.5084, + "step": 36114 + }, + { + "epoch": 0.6240496267625104, + "grad_norm": 1.5056768405189969, + "learning_rate": 6.54129075927872e-06, + "loss": 0.3617, + "step": 36115 + }, + { + "epoch": 0.6240669062759193, + "grad_norm": 0.9601933875935224, + "learning_rate": 6.5407656555549605e-06, + "loss": 0.4376, + "step": 36116 + }, + { + "epoch": 0.6240841857893282, + "grad_norm": 1.1390849666380463, + "learning_rate": 6.540240562665783e-06, + "loss": 0.398, + "step": 36117 + }, + { + "epoch": 0.624101465302737, + "grad_norm": 1.2841625954943372, + "learning_rate": 6.539715480612838e-06, + "loss": 0.4577, + "step": 36118 + }, + { + "epoch": 0.6241187448161459, + "grad_norm": 1.1336779867810676, + "learning_rate": 6.539190409397764e-06, + "loss": 0.2779, + "step": 36119 + }, + { + "epoch": 0.6241360243295548, + "grad_norm": 0.799470662615011, + "learning_rate": 6.538665349022212e-06, + "loss": 0.4371, + "step": 36120 + }, + { + "epoch": 0.6241533038429637, + "grad_norm": 0.9746240427351742, + "learning_rate": 6.53814029948782e-06, + "loss": 0.3928, + "step": 36121 + }, + { + "epoch": 0.6241705833563727, + "grad_norm": 0.4833122798460762, + "learning_rate": 6.5376152607962395e-06, + "loss": 0.4301, + "step": 36122 + }, + { + "epoch": 0.6241878628697816, + "grad_norm": 1.4351793949003937, + "learning_rate": 6.5370902329491095e-06, + "loss": 0.5157, + "step": 36123 + }, + { + "epoch": 0.6242051423831905, + "grad_norm": 0.990525423401659, + "learning_rate": 6.536565215948074e-06, + "loss": 0.2853, + "step": 36124 + }, + { + "epoch": 0.6242224218965994, + "grad_norm": 1.5662809503377713, + "learning_rate": 6.536040209794781e-06, + "loss": 0.3064, + "step": 36125 + }, + { + "epoch": 0.6242397014100083, + "grad_norm": 0.8372022671023673, + "learning_rate": 6.535515214490871e-06, + "loss": 0.2881, + "step": 36126 + }, + { + "epoch": 0.6242569809234172, + "grad_norm": 0.5480050598688294, + "learning_rate": 6.534990230037997e-06, + "loss": 0.612, + "step": 36127 + }, + { + "epoch": 0.6242742604368261, + "grad_norm": 1.0822489597729799, + "learning_rate": 6.53446525643779e-06, + "loss": 0.6231, + "step": 36128 + }, + { + "epoch": 0.624291539950235, + "grad_norm": 1.2499939927815107, + "learning_rate": 6.533940293691904e-06, + "loss": 0.3498, + "step": 36129 + }, + { + "epoch": 0.6243088194636439, + "grad_norm": 1.7826804031915957, + "learning_rate": 6.533415341801979e-06, + "loss": 0.3319, + "step": 36130 + }, + { + "epoch": 0.6243260989770528, + "grad_norm": 1.5567709367669802, + "learning_rate": 6.532890400769663e-06, + "loss": 0.3701, + "step": 36131 + }, + { + "epoch": 0.6243433784904617, + "grad_norm": 0.8024700278711655, + "learning_rate": 6.532365470596595e-06, + "loss": 0.3877, + "step": 36132 + }, + { + "epoch": 0.6243606580038706, + "grad_norm": 0.7704427731665919, + "learning_rate": 6.531840551284425e-06, + "loss": 0.5696, + "step": 36133 + }, + { + "epoch": 0.6243779375172795, + "grad_norm": 0.9665435266965187, + "learning_rate": 6.5313156428347936e-06, + "loss": 0.3458, + "step": 36134 + }, + { + "epoch": 0.6243952170306885, + "grad_norm": 1.2642160749054805, + "learning_rate": 6.530790745249343e-06, + "loss": 0.3364, + "step": 36135 + }, + { + "epoch": 0.6244124965440974, + "grad_norm": 0.949706870648716, + "learning_rate": 6.5302658585297204e-06, + "loss": 0.4386, + "step": 36136 + }, + { + "epoch": 0.6244297760575063, + "grad_norm": 1.2607592556681748, + "learning_rate": 6.529740982677567e-06, + "loss": 0.4723, + "step": 36137 + }, + { + "epoch": 0.6244470555709152, + "grad_norm": 1.4454069623459582, + "learning_rate": 6.529216117694533e-06, + "loss": 0.3167, + "step": 36138 + }, + { + "epoch": 0.624464335084324, + "grad_norm": 1.048487722397562, + "learning_rate": 6.528691263582254e-06, + "loss": 0.3387, + "step": 36139 + }, + { + "epoch": 0.6244816145977329, + "grad_norm": 1.045722446766598, + "learning_rate": 6.52816642034238e-06, + "loss": 0.3511, + "step": 36140 + }, + { + "epoch": 0.6244988941111418, + "grad_norm": 1.0399693333641684, + "learning_rate": 6.5276415879765495e-06, + "loss": 0.4596, + "step": 36141 + }, + { + "epoch": 0.6245161736245507, + "grad_norm": 1.2170779821093802, + "learning_rate": 6.527116766486414e-06, + "loss": 0.5139, + "step": 36142 + }, + { + "epoch": 0.6245334531379596, + "grad_norm": 0.7377615732017444, + "learning_rate": 6.526591955873611e-06, + "loss": 0.2426, + "step": 36143 + }, + { + "epoch": 0.6245507326513685, + "grad_norm": 0.9350748044368364, + "learning_rate": 6.526067156139785e-06, + "loss": 0.2053, + "step": 36144 + }, + { + "epoch": 0.6245680121647774, + "grad_norm": 0.7976462602727621, + "learning_rate": 6.525542367286581e-06, + "loss": 0.2863, + "step": 36145 + }, + { + "epoch": 0.6245852916781863, + "grad_norm": 1.3976031640654734, + "learning_rate": 6.525017589315641e-06, + "loss": 0.4081, + "step": 36146 + }, + { + "epoch": 0.6246025711915952, + "grad_norm": 1.6897476915840952, + "learning_rate": 6.524492822228614e-06, + "loss": 0.5923, + "step": 36147 + }, + { + "epoch": 0.6246198507050041, + "grad_norm": 1.1539110821933185, + "learning_rate": 6.523968066027135e-06, + "loss": 0.5898, + "step": 36148 + }, + { + "epoch": 0.624637130218413, + "grad_norm": 1.5541772671232383, + "learning_rate": 6.523443320712856e-06, + "loss": 0.3715, + "step": 36149 + }, + { + "epoch": 0.624654409731822, + "grad_norm": 1.2389619805799337, + "learning_rate": 6.522918586287412e-06, + "loss": 0.4447, + "step": 36150 + }, + { + "epoch": 0.6246716892452309, + "grad_norm": 0.9947961123230289, + "learning_rate": 6.522393862752455e-06, + "loss": 0.4185, + "step": 36151 + }, + { + "epoch": 0.6246889687586398, + "grad_norm": 1.0641576696624313, + "learning_rate": 6.521869150109623e-06, + "loss": 0.4131, + "step": 36152 + }, + { + "epoch": 0.6247062482720487, + "grad_norm": 1.2824831720084213, + "learning_rate": 6.521344448360565e-06, + "loss": 0.3748, + "step": 36153 + }, + { + "epoch": 0.6247235277854576, + "grad_norm": 1.4707680918053123, + "learning_rate": 6.520819757506918e-06, + "loss": 0.4525, + "step": 36154 + }, + { + "epoch": 0.6247408072988665, + "grad_norm": 1.1237121793207272, + "learning_rate": 6.520295077550325e-06, + "loss": 0.2798, + "step": 36155 + }, + { + "epoch": 0.6247580868122754, + "grad_norm": 1.1319140413038769, + "learning_rate": 6.519770408492436e-06, + "loss": 0.4276, + "step": 36156 + }, + { + "epoch": 0.6247753663256843, + "grad_norm": 1.1324602235048635, + "learning_rate": 6.519245750334888e-06, + "loss": 0.3781, + "step": 36157 + }, + { + "epoch": 0.6247926458390932, + "grad_norm": 0.5710089743429411, + "learning_rate": 6.5187211030793305e-06, + "loss": 0.6099, + "step": 36158 + }, + { + "epoch": 0.6248099253525021, + "grad_norm": 0.9380763011198064, + "learning_rate": 6.5181964667274e-06, + "loss": 0.4616, + "step": 36159 + }, + { + "epoch": 0.6248272048659109, + "grad_norm": 1.1326241416265779, + "learning_rate": 6.517671841280744e-06, + "loss": 0.3511, + "step": 36160 + }, + { + "epoch": 0.6248444843793198, + "grad_norm": 0.7690084657790588, + "learning_rate": 6.517147226741002e-06, + "loss": 0.2319, + "step": 36161 + }, + { + "epoch": 0.6248617638927287, + "grad_norm": 1.3389088141202161, + "learning_rate": 6.516622623109824e-06, + "loss": 0.2284, + "step": 36162 + }, + { + "epoch": 0.6248790434061376, + "grad_norm": 1.3397654713098845, + "learning_rate": 6.516098030388842e-06, + "loss": 0.2611, + "step": 36163 + }, + { + "epoch": 0.6248963229195466, + "grad_norm": 1.3126483197202345, + "learning_rate": 6.515573448579713e-06, + "loss": 0.4937, + "step": 36164 + }, + { + "epoch": 0.6249136024329555, + "grad_norm": 0.7188063960416878, + "learning_rate": 6.51504887768407e-06, + "loss": 0.2457, + "step": 36165 + }, + { + "epoch": 0.6249308819463644, + "grad_norm": 1.293167261460075, + "learning_rate": 6.514524317703557e-06, + "loss": 0.3198, + "step": 36166 + }, + { + "epoch": 0.6249481614597733, + "grad_norm": 0.9538846315608334, + "learning_rate": 6.513999768639822e-06, + "loss": 0.3722, + "step": 36167 + }, + { + "epoch": 0.6249654409731822, + "grad_norm": 0.8260812785125153, + "learning_rate": 6.513475230494503e-06, + "loss": 0.3386, + "step": 36168 + }, + { + "epoch": 0.6249827204865911, + "grad_norm": 1.0076780917818995, + "learning_rate": 6.512950703269245e-06, + "loss": 0.4456, + "step": 36169 + }, + { + "epoch": 0.625, + "grad_norm": 1.1102598913109472, + "learning_rate": 6.512426186965688e-06, + "loss": 0.3803, + "step": 36170 + }, + { + "epoch": 0.6250172795134089, + "grad_norm": 1.1982217026668815, + "learning_rate": 6.511901681585479e-06, + "loss": 0.3354, + "step": 36171 + }, + { + "epoch": 0.6250345590268178, + "grad_norm": 0.8225160774224454, + "learning_rate": 6.511377187130258e-06, + "loss": 0.2714, + "step": 36172 + }, + { + "epoch": 0.6250518385402267, + "grad_norm": 0.8949468336168456, + "learning_rate": 6.510852703601672e-06, + "loss": 0.364, + "step": 36173 + }, + { + "epoch": 0.6250691180536356, + "grad_norm": 1.2363139909399479, + "learning_rate": 6.510328231001359e-06, + "loss": 0.3498, + "step": 36174 + }, + { + "epoch": 0.6250863975670445, + "grad_norm": 0.8912790666940577, + "learning_rate": 6.5098037693309615e-06, + "loss": 0.4049, + "step": 36175 + }, + { + "epoch": 0.6251036770804534, + "grad_norm": 0.47993088371868786, + "learning_rate": 6.509279318592125e-06, + "loss": 0.675, + "step": 36176 + }, + { + "epoch": 0.6251209565938624, + "grad_norm": 1.6259086724907132, + "learning_rate": 6.50875487878649e-06, + "loss": 0.5011, + "step": 36177 + }, + { + "epoch": 0.6251382361072713, + "grad_norm": 0.8256753506384349, + "learning_rate": 6.508230449915702e-06, + "loss": 0.3944, + "step": 36178 + }, + { + "epoch": 0.6251555156206802, + "grad_norm": 1.0923112078229158, + "learning_rate": 6.5077060319814e-06, + "loss": 0.3956, + "step": 36179 + }, + { + "epoch": 0.6251727951340891, + "grad_norm": 1.2916956531555608, + "learning_rate": 6.5071816249852285e-06, + "loss": 0.3059, + "step": 36180 + }, + { + "epoch": 0.6251900746474979, + "grad_norm": 1.075217663760686, + "learning_rate": 6.506657228928828e-06, + "loss": 0.4816, + "step": 36181 + }, + { + "epoch": 0.6252073541609068, + "grad_norm": 0.6458531239708664, + "learning_rate": 6.506132843813847e-06, + "loss": 0.6491, + "step": 36182 + }, + { + "epoch": 0.6252246336743157, + "grad_norm": 1.5244371821112184, + "learning_rate": 6.505608469641917e-06, + "loss": 0.3072, + "step": 36183 + }, + { + "epoch": 0.6252419131877246, + "grad_norm": 0.6580233599521272, + "learning_rate": 6.505084106414694e-06, + "loss": 0.2524, + "step": 36184 + }, + { + "epoch": 0.6252591927011335, + "grad_norm": 1.3439531008151255, + "learning_rate": 6.50455975413381e-06, + "loss": 0.352, + "step": 36185 + }, + { + "epoch": 0.6252764722145424, + "grad_norm": 1.981881156931417, + "learning_rate": 6.504035412800909e-06, + "loss": 0.3658, + "step": 36186 + }, + { + "epoch": 0.6252937517279513, + "grad_norm": 1.0272374519538885, + "learning_rate": 6.503511082417637e-06, + "loss": 0.4178, + "step": 36187 + }, + { + "epoch": 0.6253110312413602, + "grad_norm": 1.2523651999140033, + "learning_rate": 6.502986762985632e-06, + "loss": 0.3459, + "step": 36188 + }, + { + "epoch": 0.6253283107547691, + "grad_norm": 1.0006494613344072, + "learning_rate": 6.50246245450654e-06, + "loss": 0.2695, + "step": 36189 + }, + { + "epoch": 0.625345590268178, + "grad_norm": 1.4771736630383383, + "learning_rate": 6.5019381569819995e-06, + "loss": 0.429, + "step": 36190 + }, + { + "epoch": 0.625362869781587, + "grad_norm": 1.142759637254388, + "learning_rate": 6.501413870413655e-06, + "loss": 0.4252, + "step": 36191 + }, + { + "epoch": 0.6253801492949959, + "grad_norm": 1.0546793877646925, + "learning_rate": 6.500889594803148e-06, + "loss": 0.5761, + "step": 36192 + }, + { + "epoch": 0.6253974288084048, + "grad_norm": 1.2005610477349915, + "learning_rate": 6.5003653301521244e-06, + "loss": 0.5656, + "step": 36193 + }, + { + "epoch": 0.6254147083218137, + "grad_norm": 0.9400214065937306, + "learning_rate": 6.499841076462218e-06, + "loss": 0.2809, + "step": 36194 + }, + { + "epoch": 0.6254319878352226, + "grad_norm": 1.2288221351381778, + "learning_rate": 6.4993168337350765e-06, + "loss": 0.4951, + "step": 36195 + }, + { + "epoch": 0.6254492673486315, + "grad_norm": 1.0395024128792356, + "learning_rate": 6.498792601972342e-06, + "loss": 0.4884, + "step": 36196 + }, + { + "epoch": 0.6254665468620404, + "grad_norm": 0.9478344305803811, + "learning_rate": 6.498268381175653e-06, + "loss": 0.2089, + "step": 36197 + }, + { + "epoch": 0.6254838263754493, + "grad_norm": 1.2764534122927333, + "learning_rate": 6.497744171346658e-06, + "loss": 0.3364, + "step": 36198 + }, + { + "epoch": 0.6255011058888582, + "grad_norm": 1.5747882130506268, + "learning_rate": 6.49721997248699e-06, + "loss": 0.5265, + "step": 36199 + }, + { + "epoch": 0.6255183854022671, + "grad_norm": 1.2386802421891983, + "learning_rate": 6.496695784598297e-06, + "loss": 0.4625, + "step": 36200 + }, + { + "epoch": 0.625535664915676, + "grad_norm": 0.9118941018413698, + "learning_rate": 6.496171607682217e-06, + "loss": 0.4955, + "step": 36201 + }, + { + "epoch": 0.6255529444290848, + "grad_norm": 0.8378109296011544, + "learning_rate": 6.495647441740398e-06, + "loss": 0.2162, + "step": 36202 + }, + { + "epoch": 0.6255702239424937, + "grad_norm": 1.1400839806814367, + "learning_rate": 6.495123286774471e-06, + "loss": 0.5572, + "step": 36203 + }, + { + "epoch": 0.6255875034559026, + "grad_norm": 1.5025135839420038, + "learning_rate": 6.494599142786091e-06, + "loss": 0.503, + "step": 36204 + }, + { + "epoch": 0.6256047829693115, + "grad_norm": 1.3768939745850466, + "learning_rate": 6.49407500977689e-06, + "loss": 0.4263, + "step": 36205 + }, + { + "epoch": 0.6256220624827205, + "grad_norm": 0.9126671020153152, + "learning_rate": 6.493550887748512e-06, + "loss": 0.3873, + "step": 36206 + }, + { + "epoch": 0.6256393419961294, + "grad_norm": 0.7911496623891096, + "learning_rate": 6.493026776702602e-06, + "loss": 0.3028, + "step": 36207 + }, + { + "epoch": 0.6256566215095383, + "grad_norm": 0.9174068805642653, + "learning_rate": 6.492502676640795e-06, + "loss": 0.2221, + "step": 36208 + }, + { + "epoch": 0.6256739010229472, + "grad_norm": 0.8010316426438333, + "learning_rate": 6.491978587564738e-06, + "loss": 0.3801, + "step": 36209 + }, + { + "epoch": 0.6256911805363561, + "grad_norm": 0.9095484295062375, + "learning_rate": 6.491454509476067e-06, + "loss": 0.5699, + "step": 36210 + }, + { + "epoch": 0.625708460049765, + "grad_norm": 1.3870254269759503, + "learning_rate": 6.490930442376432e-06, + "loss": 0.298, + "step": 36211 + }, + { + "epoch": 0.6257257395631739, + "grad_norm": 1.017258545364703, + "learning_rate": 6.490406386267466e-06, + "loss": 0.339, + "step": 36212 + }, + { + "epoch": 0.6257430190765828, + "grad_norm": 1.5316550351986526, + "learning_rate": 6.489882341150818e-06, + "loss": 0.348, + "step": 36213 + }, + { + "epoch": 0.6257602985899917, + "grad_norm": 1.1690913787072401, + "learning_rate": 6.489358307028122e-06, + "loss": 0.2347, + "step": 36214 + }, + { + "epoch": 0.6257775781034006, + "grad_norm": 1.9223078065719361, + "learning_rate": 6.488834283901024e-06, + "loss": 0.312, + "step": 36215 + }, + { + "epoch": 0.6257948576168095, + "grad_norm": 1.131640520922862, + "learning_rate": 6.488310271771163e-06, + "loss": 0.342, + "step": 36216 + }, + { + "epoch": 0.6258121371302184, + "grad_norm": 1.0583232409085992, + "learning_rate": 6.487786270640182e-06, + "loss": 0.4521, + "step": 36217 + }, + { + "epoch": 0.6258294166436273, + "grad_norm": 1.1095017688650937, + "learning_rate": 6.487262280509723e-06, + "loss": 0.6117, + "step": 36218 + }, + { + "epoch": 0.6258466961570363, + "grad_norm": 1.2263016058766523, + "learning_rate": 6.4867383013814215e-06, + "loss": 0.4595, + "step": 36219 + }, + { + "epoch": 0.6258639756704452, + "grad_norm": 0.6171718130771403, + "learning_rate": 6.486214333256925e-06, + "loss": 0.5561, + "step": 36220 + }, + { + "epoch": 0.6258812551838541, + "grad_norm": 0.7884969102272938, + "learning_rate": 6.48569037613787e-06, + "loss": 0.3172, + "step": 36221 + }, + { + "epoch": 0.625898534697263, + "grad_norm": 1.242444140423333, + "learning_rate": 6.4851664300259046e-06, + "loss": 0.2428, + "step": 36222 + }, + { + "epoch": 0.6259158142106718, + "grad_norm": 0.9953615897217943, + "learning_rate": 6.484642494922657e-06, + "loss": 0.5233, + "step": 36223 + }, + { + "epoch": 0.6259330937240807, + "grad_norm": 1.8070953322701286, + "learning_rate": 6.4841185708297845e-06, + "loss": 0.3758, + "step": 36224 + }, + { + "epoch": 0.6259503732374896, + "grad_norm": 1.3194893421196923, + "learning_rate": 6.483594657748915e-06, + "loss": 0.2248, + "step": 36225 + }, + { + "epoch": 0.6259676527508985, + "grad_norm": 1.1245244913322316, + "learning_rate": 6.483070755681698e-06, + "loss": 0.318, + "step": 36226 + }, + { + "epoch": 0.6259849322643074, + "grad_norm": 0.9816750393608606, + "learning_rate": 6.482546864629771e-06, + "loss": 0.3822, + "step": 36227 + }, + { + "epoch": 0.6260022117777163, + "grad_norm": 1.334177846778519, + "learning_rate": 6.482022984594769e-06, + "loss": 0.2505, + "step": 36228 + }, + { + "epoch": 0.6260194912911252, + "grad_norm": 1.723954499868675, + "learning_rate": 6.481499115578342e-06, + "loss": 0.3475, + "step": 36229 + }, + { + "epoch": 0.6260367708045341, + "grad_norm": 1.0280854466490208, + "learning_rate": 6.480975257582125e-06, + "loss": 0.3368, + "step": 36230 + }, + { + "epoch": 0.626054050317943, + "grad_norm": 2.0712512977693227, + "learning_rate": 6.4804514106077645e-06, + "loss": 0.2943, + "step": 36231 + }, + { + "epoch": 0.6260713298313519, + "grad_norm": 2.4269755686142243, + "learning_rate": 6.479927574656893e-06, + "loss": 0.39, + "step": 36232 + }, + { + "epoch": 0.6260886093447608, + "grad_norm": 0.7408158326083407, + "learning_rate": 6.4794037497311615e-06, + "loss": 0.7382, + "step": 36233 + }, + { + "epoch": 0.6261058888581698, + "grad_norm": 0.8483414930030485, + "learning_rate": 6.478879935832201e-06, + "loss": 0.4321, + "step": 36234 + }, + { + "epoch": 0.6261231683715787, + "grad_norm": 0.952016924702544, + "learning_rate": 6.478356132961657e-06, + "loss": 0.4084, + "step": 36235 + }, + { + "epoch": 0.6261404478849876, + "grad_norm": 0.6606724122125146, + "learning_rate": 6.477832341121171e-06, + "loss": 0.5882, + "step": 36236 + }, + { + "epoch": 0.6261577273983965, + "grad_norm": 1.2482540250458736, + "learning_rate": 6.4773085603123785e-06, + "loss": 0.498, + "step": 36237 + }, + { + "epoch": 0.6261750069118054, + "grad_norm": 0.94333904695227, + "learning_rate": 6.476784790536927e-06, + "loss": 0.3041, + "step": 36238 + }, + { + "epoch": 0.6261922864252143, + "grad_norm": 0.843189428547451, + "learning_rate": 6.476261031796449e-06, + "loss": 0.433, + "step": 36239 + }, + { + "epoch": 0.6262095659386232, + "grad_norm": 1.1642419222109275, + "learning_rate": 6.475737284092593e-06, + "loss": 0.5097, + "step": 36240 + }, + { + "epoch": 0.6262268454520321, + "grad_norm": 1.681294356841036, + "learning_rate": 6.475213547426993e-06, + "loss": 0.2971, + "step": 36241 + }, + { + "epoch": 0.626244124965441, + "grad_norm": 1.252864321085202, + "learning_rate": 6.474689821801295e-06, + "loss": 0.4365, + "step": 36242 + }, + { + "epoch": 0.6262614044788499, + "grad_norm": 1.8226311677166196, + "learning_rate": 6.474166107217132e-06, + "loss": 0.4419, + "step": 36243 + }, + { + "epoch": 0.6262786839922588, + "grad_norm": 1.042816235329749, + "learning_rate": 6.473642403676153e-06, + "loss": 0.3445, + "step": 36244 + }, + { + "epoch": 0.6262959635056676, + "grad_norm": 2.1476085530476543, + "learning_rate": 6.473118711179991e-06, + "loss": 0.3434, + "step": 36245 + }, + { + "epoch": 0.6263132430190765, + "grad_norm": 0.9415994821434496, + "learning_rate": 6.472595029730291e-06, + "loss": 0.323, + "step": 36246 + }, + { + "epoch": 0.6263305225324854, + "grad_norm": 1.0907414869800223, + "learning_rate": 6.472071359328695e-06, + "loss": 0.3589, + "step": 36247 + }, + { + "epoch": 0.6263478020458944, + "grad_norm": 0.7708960941770786, + "learning_rate": 6.471547699976833e-06, + "loss": 0.3859, + "step": 36248 + }, + { + "epoch": 0.6263650815593033, + "grad_norm": 1.0004337133154007, + "learning_rate": 6.4710240516763555e-06, + "loss": 0.4288, + "step": 36249 + }, + { + "epoch": 0.6263823610727122, + "grad_norm": 1.1862415486325235, + "learning_rate": 6.470500414428895e-06, + "loss": 0.3809, + "step": 36250 + }, + { + "epoch": 0.6263996405861211, + "grad_norm": 0.8619477725894135, + "learning_rate": 6.469976788236099e-06, + "loss": 0.3419, + "step": 36251 + }, + { + "epoch": 0.62641692009953, + "grad_norm": 1.3600823596872966, + "learning_rate": 6.4694531730996025e-06, + "loss": 0.5377, + "step": 36252 + }, + { + "epoch": 0.6264341996129389, + "grad_norm": 1.1008924074509077, + "learning_rate": 6.46892956902105e-06, + "loss": 0.4702, + "step": 36253 + }, + { + "epoch": 0.6264514791263478, + "grad_norm": 1.2620575054348158, + "learning_rate": 6.468405976002075e-06, + "loss": 0.3911, + "step": 36254 + }, + { + "epoch": 0.6264687586397567, + "grad_norm": 0.801281667137026, + "learning_rate": 6.4678823940443225e-06, + "loss": 0.3182, + "step": 36255 + }, + { + "epoch": 0.6264860381531656, + "grad_norm": 1.1630862734746326, + "learning_rate": 6.467358823149428e-06, + "loss": 0.4237, + "step": 36256 + }, + { + "epoch": 0.6265033176665745, + "grad_norm": 1.0789997128370443, + "learning_rate": 6.466835263319038e-06, + "loss": 0.2847, + "step": 36257 + }, + { + "epoch": 0.6265205971799834, + "grad_norm": 1.0993647642940465, + "learning_rate": 6.466311714554789e-06, + "loss": 0.3121, + "step": 36258 + }, + { + "epoch": 0.6265378766933923, + "grad_norm": 1.1384892400450817, + "learning_rate": 6.4657881768583166e-06, + "loss": 0.6977, + "step": 36259 + }, + { + "epoch": 0.6265551562068012, + "grad_norm": 1.3152754668128026, + "learning_rate": 6.465264650231266e-06, + "loss": 0.2606, + "step": 36260 + }, + { + "epoch": 0.6265724357202102, + "grad_norm": 1.304479019118237, + "learning_rate": 6.464741134675273e-06, + "loss": 0.2633, + "step": 36261 + }, + { + "epoch": 0.6265897152336191, + "grad_norm": 1.4375191273017873, + "learning_rate": 6.464217630191985e-06, + "loss": 0.5141, + "step": 36262 + }, + { + "epoch": 0.626606994747028, + "grad_norm": 0.773314143724353, + "learning_rate": 6.463694136783027e-06, + "loss": 0.596, + "step": 36263 + }, + { + "epoch": 0.6266242742604369, + "grad_norm": 1.1433368363940282, + "learning_rate": 6.463170654450057e-06, + "loss": 0.318, + "step": 36264 + }, + { + "epoch": 0.6266415537738458, + "grad_norm": 0.9946313210445201, + "learning_rate": 6.462647183194699e-06, + "loss": 0.2339, + "step": 36265 + }, + { + "epoch": 0.6266588332872546, + "grad_norm": 1.0475172447569203, + "learning_rate": 6.462123723018603e-06, + "loss": 0.4229, + "step": 36266 + }, + { + "epoch": 0.6266761128006635, + "grad_norm": 0.9939195363746234, + "learning_rate": 6.461600273923405e-06, + "loss": 0.4407, + "step": 36267 + }, + { + "epoch": 0.6266933923140724, + "grad_norm": 1.1818238915691464, + "learning_rate": 6.46107683591074e-06, + "loss": 0.3396, + "step": 36268 + }, + { + "epoch": 0.6267106718274813, + "grad_norm": 1.3563696757247137, + "learning_rate": 6.460553408982253e-06, + "loss": 0.3272, + "step": 36269 + }, + { + "epoch": 0.6267279513408902, + "grad_norm": 1.3738990456220965, + "learning_rate": 6.460029993139579e-06, + "loss": 0.2065, + "step": 36270 + }, + { + "epoch": 0.6267452308542991, + "grad_norm": 0.8320438545704782, + "learning_rate": 6.4595065883843636e-06, + "loss": 0.5698, + "step": 36271 + }, + { + "epoch": 0.626762510367708, + "grad_norm": 1.4347892681488137, + "learning_rate": 6.45898319471824e-06, + "loss": 0.439, + "step": 36272 + }, + { + "epoch": 0.6267797898811169, + "grad_norm": 0.8093448456386052, + "learning_rate": 6.458459812142854e-06, + "loss": 0.3993, + "step": 36273 + }, + { + "epoch": 0.6267970693945258, + "grad_norm": 1.1654577580161654, + "learning_rate": 6.4579364406598375e-06, + "loss": 0.2499, + "step": 36274 + }, + { + "epoch": 0.6268143489079347, + "grad_norm": 0.9439370291733967, + "learning_rate": 6.457413080270836e-06, + "loss": 0.3137, + "step": 36275 + }, + { + "epoch": 0.6268316284213437, + "grad_norm": 1.045373547890449, + "learning_rate": 6.4568897309774826e-06, + "loss": 0.3944, + "step": 36276 + }, + { + "epoch": 0.6268489079347526, + "grad_norm": 0.6636219556418121, + "learning_rate": 6.4563663927814215e-06, + "loss": 0.6956, + "step": 36277 + }, + { + "epoch": 0.6268661874481615, + "grad_norm": 0.8185874637114078, + "learning_rate": 6.455843065684293e-06, + "loss": 0.4872, + "step": 36278 + }, + { + "epoch": 0.6268834669615704, + "grad_norm": 0.5751148148955023, + "learning_rate": 6.455319749687729e-06, + "loss": 0.1792, + "step": 36279 + }, + { + "epoch": 0.6269007464749793, + "grad_norm": 0.896853433173276, + "learning_rate": 6.454796444793375e-06, + "loss": 0.3928, + "step": 36280 + }, + { + "epoch": 0.6269180259883882, + "grad_norm": 0.8057242879265115, + "learning_rate": 6.454273151002866e-06, + "loss": 0.3197, + "step": 36281 + }, + { + "epoch": 0.6269353055017971, + "grad_norm": 0.9927482357482261, + "learning_rate": 6.453749868317848e-06, + "loss": 0.3996, + "step": 36282 + }, + { + "epoch": 0.626952585015206, + "grad_norm": 1.5113432550495691, + "learning_rate": 6.4532265967399475e-06, + "loss": 0.4159, + "step": 36283 + }, + { + "epoch": 0.6269698645286149, + "grad_norm": 0.7223027343525471, + "learning_rate": 6.452703336270818e-06, + "loss": 0.3185, + "step": 36284 + }, + { + "epoch": 0.6269871440420238, + "grad_norm": 1.3670402874619632, + "learning_rate": 6.452180086912086e-06, + "loss": 0.4359, + "step": 36285 + }, + { + "epoch": 0.6270044235554327, + "grad_norm": 1.0742283512956612, + "learning_rate": 6.451656848665398e-06, + "loss": 0.4527, + "step": 36286 + }, + { + "epoch": 0.6270217030688415, + "grad_norm": 0.9710301957886707, + "learning_rate": 6.451133621532388e-06, + "loss": 0.2711, + "step": 36287 + }, + { + "epoch": 0.6270389825822504, + "grad_norm": 1.4529766295093125, + "learning_rate": 6.450610405514703e-06, + "loss": 0.3726, + "step": 36288 + }, + { + "epoch": 0.6270562620956593, + "grad_norm": 1.4133128896932454, + "learning_rate": 6.450087200613972e-06, + "loss": 0.4866, + "step": 36289 + }, + { + "epoch": 0.6270735416090683, + "grad_norm": 1.3064536958739659, + "learning_rate": 6.4495640068318365e-06, + "loss": 0.4005, + "step": 36290 + }, + { + "epoch": 0.6270908211224772, + "grad_norm": 1.055696382934018, + "learning_rate": 6.449040824169939e-06, + "loss": 0.2788, + "step": 36291 + }, + { + "epoch": 0.6271081006358861, + "grad_norm": 1.3548818812214074, + "learning_rate": 6.448517652629913e-06, + "loss": 0.3776, + "step": 36292 + }, + { + "epoch": 0.627125380149295, + "grad_norm": 1.439370423222101, + "learning_rate": 6.447994492213403e-06, + "loss": 0.3461, + "step": 36293 + }, + { + "epoch": 0.6271426596627039, + "grad_norm": 1.2260992364867658, + "learning_rate": 6.44747134292204e-06, + "loss": 0.4637, + "step": 36294 + }, + { + "epoch": 0.6271599391761128, + "grad_norm": 1.0646255167128937, + "learning_rate": 6.44694820475747e-06, + "loss": 0.3154, + "step": 36295 + }, + { + "epoch": 0.6271772186895217, + "grad_norm": 1.0479336639037555, + "learning_rate": 6.446425077721326e-06, + "loss": 0.3979, + "step": 36296 + }, + { + "epoch": 0.6271944982029306, + "grad_norm": 1.0289462200883484, + "learning_rate": 6.44590196181525e-06, + "loss": 0.1801, + "step": 36297 + }, + { + "epoch": 0.6272117777163395, + "grad_norm": 1.7120134704098233, + "learning_rate": 6.44537885704088e-06, + "loss": 0.4165, + "step": 36298 + }, + { + "epoch": 0.6272290572297484, + "grad_norm": 1.5879883188130743, + "learning_rate": 6.444855763399852e-06, + "loss": 0.5811, + "step": 36299 + }, + { + "epoch": 0.6272463367431573, + "grad_norm": 1.153970749440768, + "learning_rate": 6.444332680893808e-06, + "loss": 0.4782, + "step": 36300 + }, + { + "epoch": 0.6272636162565662, + "grad_norm": 2.15042711299322, + "learning_rate": 6.443809609524381e-06, + "loss": 0.2946, + "step": 36301 + }, + { + "epoch": 0.6272808957699751, + "grad_norm": 2.1155209904453383, + "learning_rate": 6.4432865492932166e-06, + "loss": 0.3466, + "step": 36302 + }, + { + "epoch": 0.627298175283384, + "grad_norm": 1.660313805804368, + "learning_rate": 6.4427635002019425e-06, + "loss": 0.3869, + "step": 36303 + }, + { + "epoch": 0.627315454796793, + "grad_norm": 1.0756720416479673, + "learning_rate": 6.4422404622522095e-06, + "loss": 0.4772, + "step": 36304 + }, + { + "epoch": 0.6273327343102019, + "grad_norm": 1.1540994281511943, + "learning_rate": 6.441717435445647e-06, + "loss": 0.3606, + "step": 36305 + }, + { + "epoch": 0.6273500138236108, + "grad_norm": 0.8405407588707599, + "learning_rate": 6.441194419783897e-06, + "loss": 0.4153, + "step": 36306 + }, + { + "epoch": 0.6273672933370197, + "grad_norm": 1.7110432810575735, + "learning_rate": 6.440671415268596e-06, + "loss": 0.3238, + "step": 36307 + }, + { + "epoch": 0.6273845728504285, + "grad_norm": 1.3162031378646912, + "learning_rate": 6.440148421901384e-06, + "loss": 0.4762, + "step": 36308 + }, + { + "epoch": 0.6274018523638374, + "grad_norm": 1.3367613548774873, + "learning_rate": 6.439625439683898e-06, + "loss": 0.3973, + "step": 36309 + }, + { + "epoch": 0.6274191318772463, + "grad_norm": 1.9540815378146854, + "learning_rate": 6.439102468617772e-06, + "loss": 0.5082, + "step": 36310 + }, + { + "epoch": 0.6274364113906552, + "grad_norm": 1.3400517574551314, + "learning_rate": 6.438579508704651e-06, + "loss": 0.5755, + "step": 36311 + }, + { + "epoch": 0.6274536909040641, + "grad_norm": 1.4220627193523174, + "learning_rate": 6.438056559946168e-06, + "loss": 0.321, + "step": 36312 + }, + { + "epoch": 0.627470970417473, + "grad_norm": 1.0676185993367344, + "learning_rate": 6.437533622343966e-06, + "loss": 0.2661, + "step": 36313 + }, + { + "epoch": 0.6274882499308819, + "grad_norm": 0.8097301386019385, + "learning_rate": 6.437010695899675e-06, + "loss": 0.5501, + "step": 36314 + }, + { + "epoch": 0.6275055294442908, + "grad_norm": 1.3804467064397257, + "learning_rate": 6.436487780614939e-06, + "loss": 0.5071, + "step": 36315 + }, + { + "epoch": 0.6275228089576997, + "grad_norm": 0.7102442940483182, + "learning_rate": 6.435964876491392e-06, + "loss": 0.6509, + "step": 36316 + }, + { + "epoch": 0.6275400884711086, + "grad_norm": 1.3703336989868347, + "learning_rate": 6.435441983530677e-06, + "loss": 0.3856, + "step": 36317 + }, + { + "epoch": 0.6275573679845176, + "grad_norm": 0.8373129118844576, + "learning_rate": 6.434919101734425e-06, + "loss": 0.295, + "step": 36318 + }, + { + "epoch": 0.6275746474979265, + "grad_norm": 2.3804502861833545, + "learning_rate": 6.434396231104283e-06, + "loss": 0.2989, + "step": 36319 + }, + { + "epoch": 0.6275919270113354, + "grad_norm": 1.580802072743592, + "learning_rate": 6.433873371641881e-06, + "loss": 0.5733, + "step": 36320 + }, + { + "epoch": 0.6276092065247443, + "grad_norm": 1.3057756200623614, + "learning_rate": 6.433350523348856e-06, + "loss": 0.4026, + "step": 36321 + }, + { + "epoch": 0.6276264860381532, + "grad_norm": 0.7036179234613492, + "learning_rate": 6.432827686226851e-06, + "loss": 0.6809, + "step": 36322 + }, + { + "epoch": 0.6276437655515621, + "grad_norm": 1.436245473682387, + "learning_rate": 6.432304860277496e-06, + "loss": 0.3453, + "step": 36323 + }, + { + "epoch": 0.627661045064971, + "grad_norm": 1.7323315880824166, + "learning_rate": 6.43178204550244e-06, + "loss": 0.5208, + "step": 36324 + }, + { + "epoch": 0.6276783245783799, + "grad_norm": 1.2408777739679706, + "learning_rate": 6.4312592419033086e-06, + "loss": 0.485, + "step": 36325 + }, + { + "epoch": 0.6276956040917888, + "grad_norm": 1.2359405301731792, + "learning_rate": 6.430736449481747e-06, + "loss": 0.5013, + "step": 36326 + }, + { + "epoch": 0.6277128836051977, + "grad_norm": 0.8351543116183042, + "learning_rate": 6.4302136682393875e-06, + "loss": 0.3023, + "step": 36327 + }, + { + "epoch": 0.6277301631186066, + "grad_norm": 1.1839792126799356, + "learning_rate": 6.429690898177875e-06, + "loss": 0.5749, + "step": 36328 + }, + { + "epoch": 0.6277474426320154, + "grad_norm": 0.5255851405408509, + "learning_rate": 6.429168139298839e-06, + "loss": 0.6198, + "step": 36329 + }, + { + "epoch": 0.6277647221454243, + "grad_norm": 0.9010877811344513, + "learning_rate": 6.428645391603918e-06, + "loss": 0.503, + "step": 36330 + }, + { + "epoch": 0.6277820016588332, + "grad_norm": 0.8296631446318955, + "learning_rate": 6.428122655094754e-06, + "loss": 0.4104, + "step": 36331 + }, + { + "epoch": 0.6277992811722422, + "grad_norm": 0.8260118944864677, + "learning_rate": 6.427599929772979e-06, + "loss": 0.2986, + "step": 36332 + }, + { + "epoch": 0.6278165606856511, + "grad_norm": 1.395425444391935, + "learning_rate": 6.4270772156402365e-06, + "loss": 0.3965, + "step": 36333 + }, + { + "epoch": 0.62783384019906, + "grad_norm": 0.9749721958920555, + "learning_rate": 6.426554512698154e-06, + "loss": 0.5986, + "step": 36334 + }, + { + "epoch": 0.6278511197124689, + "grad_norm": 0.4752068291965882, + "learning_rate": 6.426031820948378e-06, + "loss": 0.5284, + "step": 36335 + }, + { + "epoch": 0.6278683992258778, + "grad_norm": 0.7113851478165388, + "learning_rate": 6.42550914039254e-06, + "loss": 0.4303, + "step": 36336 + }, + { + "epoch": 0.6278856787392867, + "grad_norm": 0.9363099948564513, + "learning_rate": 6.42498647103228e-06, + "loss": 0.454, + "step": 36337 + }, + { + "epoch": 0.6279029582526956, + "grad_norm": 1.0070790351632646, + "learning_rate": 6.4244638128692325e-06, + "loss": 0.3557, + "step": 36338 + }, + { + "epoch": 0.6279202377661045, + "grad_norm": 1.1420684165225097, + "learning_rate": 6.423941165905041e-06, + "loss": 0.2934, + "step": 36339 + }, + { + "epoch": 0.6279375172795134, + "grad_norm": 0.8268847004873949, + "learning_rate": 6.4234185301413345e-06, + "loss": 0.5168, + "step": 36340 + }, + { + "epoch": 0.6279547967929223, + "grad_norm": 1.8813875627776666, + "learning_rate": 6.4228959055797515e-06, + "loss": 0.3539, + "step": 36341 + }, + { + "epoch": 0.6279720763063312, + "grad_norm": 1.3950608883305113, + "learning_rate": 6.422373292221934e-06, + "loss": 0.3936, + "step": 36342 + }, + { + "epoch": 0.6279893558197401, + "grad_norm": 0.9625349351675043, + "learning_rate": 6.421850690069509e-06, + "loss": 0.4259, + "step": 36343 + }, + { + "epoch": 0.628006635333149, + "grad_norm": 0.8216374521655309, + "learning_rate": 6.421328099124127e-06, + "loss": 0.3126, + "step": 36344 + }, + { + "epoch": 0.628023914846558, + "grad_norm": 1.7821877929029997, + "learning_rate": 6.420805519387412e-06, + "loss": 0.5961, + "step": 36345 + }, + { + "epoch": 0.6280411943599669, + "grad_norm": 1.1955150277325675, + "learning_rate": 6.420282950861008e-06, + "loss": 0.4178, + "step": 36346 + }, + { + "epoch": 0.6280584738733758, + "grad_norm": 1.418120652785194, + "learning_rate": 6.419760393546548e-06, + "loss": 0.2295, + "step": 36347 + }, + { + "epoch": 0.6280757533867847, + "grad_norm": 1.1151305926940285, + "learning_rate": 6.419237847445675e-06, + "loss": 0.5216, + "step": 36348 + }, + { + "epoch": 0.6280930329001936, + "grad_norm": 1.7507919121284437, + "learning_rate": 6.41871531256002e-06, + "loss": 0.4352, + "step": 36349 + }, + { + "epoch": 0.6281103124136024, + "grad_norm": 1.0085187229928028, + "learning_rate": 6.418192788891218e-06, + "loss": 0.2584, + "step": 36350 + }, + { + "epoch": 0.6281275919270113, + "grad_norm": 1.2714521196206852, + "learning_rate": 6.417670276440909e-06, + "loss": 0.4263, + "step": 36351 + }, + { + "epoch": 0.6281448714404202, + "grad_norm": 0.9514489768722358, + "learning_rate": 6.417147775210728e-06, + "loss": 0.3015, + "step": 36352 + }, + { + "epoch": 0.6281621509538291, + "grad_norm": 1.0463044373971075, + "learning_rate": 6.416625285202317e-06, + "loss": 0.4106, + "step": 36353 + }, + { + "epoch": 0.628179430467238, + "grad_norm": 0.9570950760965408, + "learning_rate": 6.416102806417304e-06, + "loss": 0.4105, + "step": 36354 + }, + { + "epoch": 0.6281967099806469, + "grad_norm": 1.0811385146180177, + "learning_rate": 6.41558033885733e-06, + "loss": 0.4814, + "step": 36355 + }, + { + "epoch": 0.6282139894940558, + "grad_norm": 0.667499017825847, + "learning_rate": 6.41505788252403e-06, + "loss": 0.5984, + "step": 36356 + }, + { + "epoch": 0.6282312690074647, + "grad_norm": 1.197881229281545, + "learning_rate": 6.414535437419042e-06, + "loss": 0.4322, + "step": 36357 + }, + { + "epoch": 0.6282485485208736, + "grad_norm": 1.2180361548700038, + "learning_rate": 6.414013003544002e-06, + "loss": 0.4303, + "step": 36358 + }, + { + "epoch": 0.6282658280342825, + "grad_norm": 0.8817866329651771, + "learning_rate": 6.4134905809005474e-06, + "loss": 0.1986, + "step": 36359 + }, + { + "epoch": 0.6282831075476915, + "grad_norm": 0.8766135773091208, + "learning_rate": 6.412968169490312e-06, + "loss": 0.3101, + "step": 36360 + }, + { + "epoch": 0.6283003870611004, + "grad_norm": 1.0957710403978795, + "learning_rate": 6.41244576931493e-06, + "loss": 0.3758, + "step": 36361 + }, + { + "epoch": 0.6283176665745093, + "grad_norm": 1.0596649699006309, + "learning_rate": 6.411923380376046e-06, + "loss": 0.4326, + "step": 36362 + }, + { + "epoch": 0.6283349460879182, + "grad_norm": 0.9642823508649001, + "learning_rate": 6.411401002675284e-06, + "loss": 0.3785, + "step": 36363 + }, + { + "epoch": 0.6283522256013271, + "grad_norm": 1.0394092711880172, + "learning_rate": 6.410878636214292e-06, + "loss": 0.4136, + "step": 36364 + }, + { + "epoch": 0.628369505114736, + "grad_norm": 0.9835846370728084, + "learning_rate": 6.4103562809946965e-06, + "loss": 0.5479, + "step": 36365 + }, + { + "epoch": 0.6283867846281449, + "grad_norm": 1.0176782332041803, + "learning_rate": 6.4098339370181415e-06, + "loss": 0.193, + "step": 36366 + }, + { + "epoch": 0.6284040641415538, + "grad_norm": 1.1943791623142495, + "learning_rate": 6.409311604286257e-06, + "loss": 0.3343, + "step": 36367 + }, + { + "epoch": 0.6284213436549627, + "grad_norm": 0.9448465414291267, + "learning_rate": 6.408789282800684e-06, + "loss": 0.3669, + "step": 36368 + }, + { + "epoch": 0.6284386231683716, + "grad_norm": 1.0432427719727493, + "learning_rate": 6.408266972563051e-06, + "loss": 0.5173, + "step": 36369 + }, + { + "epoch": 0.6284559026817805, + "grad_norm": 1.0174972542422236, + "learning_rate": 6.407744673575005e-06, + "loss": 0.3661, + "step": 36370 + }, + { + "epoch": 0.6284731821951893, + "grad_norm": 1.0007607384375514, + "learning_rate": 6.407222385838174e-06, + "loss": 0.6011, + "step": 36371 + }, + { + "epoch": 0.6284904617085982, + "grad_norm": 1.105762494353837, + "learning_rate": 6.406700109354194e-06, + "loss": 0.3915, + "step": 36372 + }, + { + "epoch": 0.6285077412220071, + "grad_norm": 0.9833383419877838, + "learning_rate": 6.406177844124704e-06, + "loss": 0.3049, + "step": 36373 + }, + { + "epoch": 0.628525020735416, + "grad_norm": 1.007949858561849, + "learning_rate": 6.405655590151336e-06, + "loss": 0.3964, + "step": 36374 + }, + { + "epoch": 0.628542300248825, + "grad_norm": 0.9885959973118295, + "learning_rate": 6.405133347435729e-06, + "loss": 0.3036, + "step": 36375 + }, + { + "epoch": 0.6285595797622339, + "grad_norm": 1.323109450198113, + "learning_rate": 6.404611115979515e-06, + "loss": 0.5053, + "step": 36376 + }, + { + "epoch": 0.6285768592756428, + "grad_norm": 0.6861837098968223, + "learning_rate": 6.404088895784336e-06, + "loss": 0.1872, + "step": 36377 + }, + { + "epoch": 0.6285941387890517, + "grad_norm": 1.0068357555518472, + "learning_rate": 6.403566686851821e-06, + "loss": 0.4372, + "step": 36378 + }, + { + "epoch": 0.6286114183024606, + "grad_norm": 1.2253896790763747, + "learning_rate": 6.403044489183613e-06, + "loss": 0.2738, + "step": 36379 + }, + { + "epoch": 0.6286286978158695, + "grad_norm": 1.0938187457870496, + "learning_rate": 6.4025223027813395e-06, + "loss": 0.3216, + "step": 36380 + }, + { + "epoch": 0.6286459773292784, + "grad_norm": 1.5694021542200882, + "learning_rate": 6.402000127646639e-06, + "loss": 0.294, + "step": 36381 + }, + { + "epoch": 0.6286632568426873, + "grad_norm": 1.3962333353963272, + "learning_rate": 6.401477963781151e-06, + "loss": 0.3463, + "step": 36382 + }, + { + "epoch": 0.6286805363560962, + "grad_norm": 1.1172853851263642, + "learning_rate": 6.400955811186502e-06, + "loss": 0.4358, + "step": 36383 + }, + { + "epoch": 0.6286978158695051, + "grad_norm": 1.463419660261802, + "learning_rate": 6.4004336698643385e-06, + "loss": 0.3968, + "step": 36384 + }, + { + "epoch": 0.628715095382914, + "grad_norm": 1.2853569117623156, + "learning_rate": 6.399911539816286e-06, + "loss": 0.4732, + "step": 36385 + }, + { + "epoch": 0.6287323748963229, + "grad_norm": 1.6896008794384159, + "learning_rate": 6.399389421043987e-06, + "loss": 0.4912, + "step": 36386 + }, + { + "epoch": 0.6287496544097319, + "grad_norm": 0.8326875494104243, + "learning_rate": 6.398867313549071e-06, + "loss": 0.1442, + "step": 36387 + }, + { + "epoch": 0.6287669339231408, + "grad_norm": 0.9923030056327083, + "learning_rate": 6.398345217333181e-06, + "loss": 0.2517, + "step": 36388 + }, + { + "epoch": 0.6287842134365497, + "grad_norm": 0.8646498007648575, + "learning_rate": 6.39782313239794e-06, + "loss": 0.3791, + "step": 36389 + }, + { + "epoch": 0.6288014929499586, + "grad_norm": 0.7397251111475329, + "learning_rate": 6.397301058744998e-06, + "loss": 0.5769, + "step": 36390 + }, + { + "epoch": 0.6288187724633675, + "grad_norm": 0.8291717725006706, + "learning_rate": 6.396778996375981e-06, + "loss": 0.3608, + "step": 36391 + }, + { + "epoch": 0.6288360519767764, + "grad_norm": 1.6979822086922438, + "learning_rate": 6.396256945292524e-06, + "loss": 0.3769, + "step": 36392 + }, + { + "epoch": 0.6288533314901852, + "grad_norm": 1.2729402751332521, + "learning_rate": 6.3957349054962685e-06, + "loss": 0.3542, + "step": 36393 + }, + { + "epoch": 0.6288706110035941, + "grad_norm": 0.8970522796524388, + "learning_rate": 6.39521287698884e-06, + "loss": 0.3015, + "step": 36394 + }, + { + "epoch": 0.628887890517003, + "grad_norm": 1.258397629702359, + "learning_rate": 6.3946908597718816e-06, + "loss": 0.4021, + "step": 36395 + }, + { + "epoch": 0.6289051700304119, + "grad_norm": 0.8281697514269263, + "learning_rate": 6.394168853847023e-06, + "loss": 0.3525, + "step": 36396 + }, + { + "epoch": 0.6289224495438208, + "grad_norm": 1.1031015608120778, + "learning_rate": 6.393646859215903e-06, + "loss": 0.3261, + "step": 36397 + }, + { + "epoch": 0.6289397290572297, + "grad_norm": 0.7603765260644126, + "learning_rate": 6.393124875880155e-06, + "loss": 0.675, + "step": 36398 + }, + { + "epoch": 0.6289570085706386, + "grad_norm": 1.3562523123870933, + "learning_rate": 6.392602903841416e-06, + "loss": 0.44, + "step": 36399 + }, + { + "epoch": 0.6289742880840475, + "grad_norm": 0.6605053671941581, + "learning_rate": 6.3920809431013155e-06, + "loss": 0.383, + "step": 36400 + }, + { + "epoch": 0.6289915675974564, + "grad_norm": 1.1673134518898278, + "learning_rate": 6.391558993661494e-06, + "loss": 0.2504, + "step": 36401 + }, + { + "epoch": 0.6290088471108654, + "grad_norm": 1.013984425340842, + "learning_rate": 6.3910370555235855e-06, + "loss": 0.5107, + "step": 36402 + }, + { + "epoch": 0.6290261266242743, + "grad_norm": 1.184166471003964, + "learning_rate": 6.390515128689218e-06, + "loss": 0.5576, + "step": 36403 + }, + { + "epoch": 0.6290434061376832, + "grad_norm": 1.014509444720316, + "learning_rate": 6.389993213160037e-06, + "loss": 0.5159, + "step": 36404 + }, + { + "epoch": 0.6290606856510921, + "grad_norm": 1.0609475378620181, + "learning_rate": 6.389471308937667e-06, + "loss": 0.4636, + "step": 36405 + }, + { + "epoch": 0.629077965164501, + "grad_norm": 0.5523127836940565, + "learning_rate": 6.38894941602375e-06, + "loss": 0.75, + "step": 36406 + }, + { + "epoch": 0.6290952446779099, + "grad_norm": 1.4732932647485693, + "learning_rate": 6.3884275344199156e-06, + "loss": 0.4077, + "step": 36407 + }, + { + "epoch": 0.6291125241913188, + "grad_norm": 0.8615649567523394, + "learning_rate": 6.387905664127805e-06, + "loss": 0.3262, + "step": 36408 + }, + { + "epoch": 0.6291298037047277, + "grad_norm": 0.5714989345865458, + "learning_rate": 6.387383805149041e-06, + "loss": 0.6051, + "step": 36409 + }, + { + "epoch": 0.6291470832181366, + "grad_norm": 1.9249629843661131, + "learning_rate": 6.386861957485273e-06, + "loss": 0.4379, + "step": 36410 + }, + { + "epoch": 0.6291643627315455, + "grad_norm": 1.6193942904436796, + "learning_rate": 6.386340121138126e-06, + "loss": 0.7054, + "step": 36411 + }, + { + "epoch": 0.6291816422449544, + "grad_norm": 1.0238159623608898, + "learning_rate": 6.3858182961092344e-06, + "loss": 0.3099, + "step": 36412 + }, + { + "epoch": 0.6291989217583633, + "grad_norm": 1.1873023370416276, + "learning_rate": 6.385296482400237e-06, + "loss": 0.5748, + "step": 36413 + }, + { + "epoch": 0.6292162012717721, + "grad_norm": 1.9269591584162051, + "learning_rate": 6.384774680012763e-06, + "loss": 0.3931, + "step": 36414 + }, + { + "epoch": 0.629233480785181, + "grad_norm": 0.9559682494540973, + "learning_rate": 6.384252888948451e-06, + "loss": 0.3518, + "step": 36415 + }, + { + "epoch": 0.62925076029859, + "grad_norm": 1.0535392412608366, + "learning_rate": 6.383731109208933e-06, + "loss": 0.2685, + "step": 36416 + }, + { + "epoch": 0.6292680398119989, + "grad_norm": 1.289332724610437, + "learning_rate": 6.383209340795844e-06, + "loss": 0.3899, + "step": 36417 + }, + { + "epoch": 0.6292853193254078, + "grad_norm": 1.454507257085853, + "learning_rate": 6.382687583710817e-06, + "loss": 0.4305, + "step": 36418 + }, + { + "epoch": 0.6293025988388167, + "grad_norm": 1.433195620118338, + "learning_rate": 6.382165837955491e-06, + "loss": 0.3813, + "step": 36419 + }, + { + "epoch": 0.6293198783522256, + "grad_norm": 0.9549600797278558, + "learning_rate": 6.381644103531493e-06, + "loss": 0.3743, + "step": 36420 + }, + { + "epoch": 0.6293371578656345, + "grad_norm": 1.2275018946261105, + "learning_rate": 6.381122380440463e-06, + "loss": 0.3699, + "step": 36421 + }, + { + "epoch": 0.6293544373790434, + "grad_norm": 1.4875631744534519, + "learning_rate": 6.380600668684034e-06, + "loss": 0.4162, + "step": 36422 + }, + { + "epoch": 0.6293717168924523, + "grad_norm": 1.8486006899591252, + "learning_rate": 6.380078968263834e-06, + "loss": 0.6311, + "step": 36423 + }, + { + "epoch": 0.6293889964058612, + "grad_norm": 1.5412447941174328, + "learning_rate": 6.379557279181508e-06, + "loss": 0.5159, + "step": 36424 + }, + { + "epoch": 0.6294062759192701, + "grad_norm": 0.6656744974948078, + "learning_rate": 6.379035601438677e-06, + "loss": 0.6542, + "step": 36425 + }, + { + "epoch": 0.629423555432679, + "grad_norm": 1.3480538501030024, + "learning_rate": 6.378513935036985e-06, + "loss": 0.2925, + "step": 36426 + }, + { + "epoch": 0.6294408349460879, + "grad_norm": 1.114074319947642, + "learning_rate": 6.377992279978062e-06, + "loss": 0.4862, + "step": 36427 + }, + { + "epoch": 0.6294581144594968, + "grad_norm": 0.7874539495814218, + "learning_rate": 6.377470636263545e-06, + "loss": 0.3092, + "step": 36428 + }, + { + "epoch": 0.6294753939729058, + "grad_norm": 0.6941328038334247, + "learning_rate": 6.37694900389506e-06, + "loss": 0.3172, + "step": 36429 + }, + { + "epoch": 0.6294926734863147, + "grad_norm": 0.8204175871245775, + "learning_rate": 6.3764273828742516e-06, + "loss": 0.3264, + "step": 36430 + }, + { + "epoch": 0.6295099529997236, + "grad_norm": 0.8477445589084943, + "learning_rate": 6.375905773202744e-06, + "loss": 0.2775, + "step": 36431 + }, + { + "epoch": 0.6295272325131325, + "grad_norm": 0.8954237050305255, + "learning_rate": 6.3753841748821776e-06, + "loss": 0.3404, + "step": 36432 + }, + { + "epoch": 0.6295445120265414, + "grad_norm": 1.9273079849630899, + "learning_rate": 6.3748625879141845e-06, + "loss": 0.4075, + "step": 36433 + }, + { + "epoch": 0.6295617915399503, + "grad_norm": 1.5075204783854796, + "learning_rate": 6.374341012300394e-06, + "loss": 0.4147, + "step": 36434 + }, + { + "epoch": 0.6295790710533591, + "grad_norm": 1.3904808301086156, + "learning_rate": 6.373819448042444e-06, + "loss": 0.3683, + "step": 36435 + }, + { + "epoch": 0.629596350566768, + "grad_norm": 1.2236314372377552, + "learning_rate": 6.373297895141965e-06, + "loss": 0.648, + "step": 36436 + }, + { + "epoch": 0.6296136300801769, + "grad_norm": 0.606576711302855, + "learning_rate": 6.372776353600596e-06, + "loss": 0.6438, + "step": 36437 + }, + { + "epoch": 0.6296309095935858, + "grad_norm": 1.4633785334232186, + "learning_rate": 6.372254823419966e-06, + "loss": 0.5005, + "step": 36438 + }, + { + "epoch": 0.6296481891069947, + "grad_norm": 0.9988203186348569, + "learning_rate": 6.37173330460171e-06, + "loss": 0.4581, + "step": 36439 + }, + { + "epoch": 0.6296654686204036, + "grad_norm": 1.040151072182409, + "learning_rate": 6.37121179714746e-06, + "loss": 0.4948, + "step": 36440 + }, + { + "epoch": 0.6296827481338125, + "grad_norm": 0.8930208783397166, + "learning_rate": 6.370690301058852e-06, + "loss": 0.5632, + "step": 36441 + }, + { + "epoch": 0.6297000276472214, + "grad_norm": 1.3953245140519424, + "learning_rate": 6.370168816337519e-06, + "loss": 0.5484, + "step": 36442 + }, + { + "epoch": 0.6297173071606303, + "grad_norm": 1.1432552420738609, + "learning_rate": 6.369647342985088e-06, + "loss": 0.4866, + "step": 36443 + }, + { + "epoch": 0.6297345866740393, + "grad_norm": 2.0915648013712342, + "learning_rate": 6.369125881003202e-06, + "loss": 0.3054, + "step": 36444 + }, + { + "epoch": 0.6297518661874482, + "grad_norm": 2.460930707471005, + "learning_rate": 6.368604430393487e-06, + "loss": 0.4138, + "step": 36445 + }, + { + "epoch": 0.6297691457008571, + "grad_norm": 1.4937631306464463, + "learning_rate": 6.36808299115758e-06, + "loss": 0.6259, + "step": 36446 + }, + { + "epoch": 0.629786425214266, + "grad_norm": 1.1607668049455862, + "learning_rate": 6.367561563297112e-06, + "loss": 0.4405, + "step": 36447 + }, + { + "epoch": 0.6298037047276749, + "grad_norm": 1.2040211618035352, + "learning_rate": 6.367040146813721e-06, + "loss": 0.355, + "step": 36448 + }, + { + "epoch": 0.6298209842410838, + "grad_norm": 0.8162692597275398, + "learning_rate": 6.36651874170903e-06, + "loss": 0.5338, + "step": 36449 + }, + { + "epoch": 0.6298382637544927, + "grad_norm": 1.8638769358712486, + "learning_rate": 6.365997347984685e-06, + "loss": 0.3481, + "step": 36450 + }, + { + "epoch": 0.6298555432679016, + "grad_norm": 1.3028050885121176, + "learning_rate": 6.365475965642307e-06, + "loss": 0.6102, + "step": 36451 + }, + { + "epoch": 0.6298728227813105, + "grad_norm": 1.2396959469824465, + "learning_rate": 6.364954594683539e-06, + "loss": 0.5262, + "step": 36452 + }, + { + "epoch": 0.6298901022947194, + "grad_norm": 1.56042870485264, + "learning_rate": 6.364433235110009e-06, + "loss": 0.6083, + "step": 36453 + }, + { + "epoch": 0.6299073818081283, + "grad_norm": 1.0522800276822952, + "learning_rate": 6.3639118869233475e-06, + "loss": 0.3066, + "step": 36454 + }, + { + "epoch": 0.6299246613215372, + "grad_norm": 0.7878573930922602, + "learning_rate": 6.363390550125192e-06, + "loss": 0.2525, + "step": 36455 + }, + { + "epoch": 0.629941940834946, + "grad_norm": 1.1614503798111893, + "learning_rate": 6.362869224717172e-06, + "loss": 0.4274, + "step": 36456 + }, + { + "epoch": 0.6299592203483549, + "grad_norm": 1.413396337467271, + "learning_rate": 6.3623479107009236e-06, + "loss": 0.2753, + "step": 36457 + }, + { + "epoch": 0.6299764998617639, + "grad_norm": 0.9360514398219059, + "learning_rate": 6.361826608078077e-06, + "loss": 0.4476, + "step": 36458 + }, + { + "epoch": 0.6299937793751728, + "grad_norm": 1.615945444993342, + "learning_rate": 6.361305316850269e-06, + "loss": 0.2736, + "step": 36459 + }, + { + "epoch": 0.6300110588885817, + "grad_norm": 1.212529186153315, + "learning_rate": 6.360784037019126e-06, + "loss": 0.4992, + "step": 36460 + }, + { + "epoch": 0.6300283384019906, + "grad_norm": 0.7746355890271922, + "learning_rate": 6.360262768586285e-06, + "loss": 0.3328, + "step": 36461 + }, + { + "epoch": 0.6300456179153995, + "grad_norm": 1.4852281783970522, + "learning_rate": 6.3597415115533766e-06, + "loss": 0.4159, + "step": 36462 + }, + { + "epoch": 0.6300628974288084, + "grad_norm": 0.9640831571367073, + "learning_rate": 6.3592202659220345e-06, + "loss": 0.2612, + "step": 36463 + }, + { + "epoch": 0.6300801769422173, + "grad_norm": 1.4650510740997555, + "learning_rate": 6.3586990316938955e-06, + "loss": 0.4724, + "step": 36464 + }, + { + "epoch": 0.6300974564556262, + "grad_norm": 1.147356611668454, + "learning_rate": 6.3581778088705825e-06, + "loss": 0.4548, + "step": 36465 + }, + { + "epoch": 0.6301147359690351, + "grad_norm": 1.0326258059268345, + "learning_rate": 6.357656597453736e-06, + "loss": 0.2817, + "step": 36466 + }, + { + "epoch": 0.630132015482444, + "grad_norm": 1.2508616103533303, + "learning_rate": 6.357135397444985e-06, + "loss": 0.5139, + "step": 36467 + }, + { + "epoch": 0.6301492949958529, + "grad_norm": 0.9062950744829957, + "learning_rate": 6.356614208845965e-06, + "loss": 0.2979, + "step": 36468 + }, + { + "epoch": 0.6301665745092618, + "grad_norm": 1.0315163200988409, + "learning_rate": 6.3560930316583e-06, + "loss": 0.2556, + "step": 36469 + }, + { + "epoch": 0.6301838540226707, + "grad_norm": 1.2191050205746121, + "learning_rate": 6.355571865883636e-06, + "loss": 0.4823, + "step": 36470 + }, + { + "epoch": 0.6302011335360796, + "grad_norm": 1.3957838965072387, + "learning_rate": 6.3550507115235925e-06, + "loss": 0.3503, + "step": 36471 + }, + { + "epoch": 0.6302184130494886, + "grad_norm": 0.7328257245930407, + "learning_rate": 6.3545295685798106e-06, + "loss": 0.1609, + "step": 36472 + }, + { + "epoch": 0.6302356925628975, + "grad_norm": 1.4705131899496815, + "learning_rate": 6.35400843705392e-06, + "loss": 0.3331, + "step": 36473 + }, + { + "epoch": 0.6302529720763064, + "grad_norm": 0.933841844777221, + "learning_rate": 6.353487316947548e-06, + "loss": 0.3616, + "step": 36474 + }, + { + "epoch": 0.6302702515897153, + "grad_norm": 0.946049473001083, + "learning_rate": 6.352966208262333e-06, + "loss": 0.5008, + "step": 36475 + }, + { + "epoch": 0.6302875311031242, + "grad_norm": 1.8189701967812628, + "learning_rate": 6.352445110999903e-06, + "loss": 0.3899, + "step": 36476 + }, + { + "epoch": 0.630304810616533, + "grad_norm": 1.5356005952793992, + "learning_rate": 6.3519240251618954e-06, + "loss": 0.3376, + "step": 36477 + }, + { + "epoch": 0.6303220901299419, + "grad_norm": 1.110001693937641, + "learning_rate": 6.351402950749936e-06, + "loss": 0.5174, + "step": 36478 + }, + { + "epoch": 0.6303393696433508, + "grad_norm": 0.9320273698222226, + "learning_rate": 6.350881887765663e-06, + "loss": 0.3765, + "step": 36479 + }, + { + "epoch": 0.6303566491567597, + "grad_norm": 1.30981715306713, + "learning_rate": 6.350360836210703e-06, + "loss": 0.4523, + "step": 36480 + }, + { + "epoch": 0.6303739286701686, + "grad_norm": 0.8773392688565886, + "learning_rate": 6.349839796086691e-06, + "loss": 0.5286, + "step": 36481 + }, + { + "epoch": 0.6303912081835775, + "grad_norm": 1.0023841572208292, + "learning_rate": 6.349318767395256e-06, + "loss": 0.2969, + "step": 36482 + }, + { + "epoch": 0.6304084876969864, + "grad_norm": 3.6448380976223773, + "learning_rate": 6.348797750138034e-06, + "loss": 0.7052, + "step": 36483 + }, + { + "epoch": 0.6304257672103953, + "grad_norm": 1.2548866841453121, + "learning_rate": 6.348276744316658e-06, + "loss": 0.4535, + "step": 36484 + }, + { + "epoch": 0.6304430467238042, + "grad_norm": 1.495736290394778, + "learning_rate": 6.347755749932753e-06, + "loss": 0.3059, + "step": 36485 + }, + { + "epoch": 0.6304603262372132, + "grad_norm": 1.1536249105808714, + "learning_rate": 6.347234766987956e-06, + "loss": 0.3948, + "step": 36486 + }, + { + "epoch": 0.6304776057506221, + "grad_norm": 1.088038327096665, + "learning_rate": 6.346713795483895e-06, + "loss": 0.4968, + "step": 36487 + }, + { + "epoch": 0.630494885264031, + "grad_norm": 1.5266490534840877, + "learning_rate": 6.346192835422207e-06, + "loss": 0.5406, + "step": 36488 + }, + { + "epoch": 0.6305121647774399, + "grad_norm": 1.0529174052884498, + "learning_rate": 6.345671886804516e-06, + "loss": 0.3557, + "step": 36489 + }, + { + "epoch": 0.6305294442908488, + "grad_norm": 0.525296207992522, + "learning_rate": 6.345150949632464e-06, + "loss": 0.7357, + "step": 36490 + }, + { + "epoch": 0.6305467238042577, + "grad_norm": 1.3095664791254045, + "learning_rate": 6.344630023907673e-06, + "loss": 0.6493, + "step": 36491 + }, + { + "epoch": 0.6305640033176666, + "grad_norm": 1.0032068585547107, + "learning_rate": 6.3441091096317806e-06, + "loss": 0.4679, + "step": 36492 + }, + { + "epoch": 0.6305812828310755, + "grad_norm": 1.288902523897769, + "learning_rate": 6.343588206806415e-06, + "loss": 0.4275, + "step": 36493 + }, + { + "epoch": 0.6305985623444844, + "grad_norm": 1.3614397710674688, + "learning_rate": 6.343067315433212e-06, + "loss": 0.3273, + "step": 36494 + }, + { + "epoch": 0.6306158418578933, + "grad_norm": 0.8244418158137109, + "learning_rate": 6.342546435513799e-06, + "loss": 0.2258, + "step": 36495 + }, + { + "epoch": 0.6306331213713022, + "grad_norm": 2.1851737289379773, + "learning_rate": 6.342025567049805e-06, + "loss": 0.2475, + "step": 36496 + }, + { + "epoch": 0.6306504008847111, + "grad_norm": 1.4942434875847808, + "learning_rate": 6.34150471004287e-06, + "loss": 0.4771, + "step": 36497 + }, + { + "epoch": 0.6306676803981199, + "grad_norm": 1.0768726273264142, + "learning_rate": 6.3409838644946146e-06, + "loss": 0.4154, + "step": 36498 + }, + { + "epoch": 0.6306849599115288, + "grad_norm": 1.2215837672631504, + "learning_rate": 6.340463030406681e-06, + "loss": 0.3328, + "step": 36499 + }, + { + "epoch": 0.6307022394249377, + "grad_norm": 1.1534553627800328, + "learning_rate": 6.339942207780693e-06, + "loss": 0.2808, + "step": 36500 + }, + { + "epoch": 0.6307195189383467, + "grad_norm": 1.2189895948704101, + "learning_rate": 6.339421396618284e-06, + "loss": 0.3071, + "step": 36501 + }, + { + "epoch": 0.6307367984517556, + "grad_norm": 1.1470356811140885, + "learning_rate": 6.338900596921084e-06, + "loss": 0.3122, + "step": 36502 + }, + { + "epoch": 0.6307540779651645, + "grad_norm": 1.467062567183467, + "learning_rate": 6.338379808690729e-06, + "loss": 0.402, + "step": 36503 + }, + { + "epoch": 0.6307713574785734, + "grad_norm": 1.0399211901255494, + "learning_rate": 6.337859031928848e-06, + "loss": 0.4651, + "step": 36504 + }, + { + "epoch": 0.6307886369919823, + "grad_norm": 1.0213335873817453, + "learning_rate": 6.337338266637065e-06, + "loss": 0.314, + "step": 36505 + }, + { + "epoch": 0.6308059165053912, + "grad_norm": 1.6581614554004966, + "learning_rate": 6.336817512817022e-06, + "loss": 0.4571, + "step": 36506 + }, + { + "epoch": 0.6308231960188001, + "grad_norm": 1.227247933360162, + "learning_rate": 6.33629677047034e-06, + "loss": 0.4587, + "step": 36507 + }, + { + "epoch": 0.630840475532209, + "grad_norm": 1.0968530142160577, + "learning_rate": 6.33577603959866e-06, + "loss": 0.3106, + "step": 36508 + }, + { + "epoch": 0.6308577550456179, + "grad_norm": 1.713728660812748, + "learning_rate": 6.335255320203601e-06, + "loss": 0.3323, + "step": 36509 + }, + { + "epoch": 0.6308750345590268, + "grad_norm": 1.2485496231810835, + "learning_rate": 6.334734612286809e-06, + "loss": 0.2905, + "step": 36510 + }, + { + "epoch": 0.6308923140724357, + "grad_norm": 0.9845414893130625, + "learning_rate": 6.334213915849903e-06, + "loss": 0.4016, + "step": 36511 + }, + { + "epoch": 0.6309095935858446, + "grad_norm": 1.101011732878368, + "learning_rate": 6.333693230894518e-06, + "loss": 0.3824, + "step": 36512 + }, + { + "epoch": 0.6309268730992535, + "grad_norm": 1.4558939691052002, + "learning_rate": 6.333172557422282e-06, + "loss": 0.3415, + "step": 36513 + }, + { + "epoch": 0.6309441526126625, + "grad_norm": 1.133210244244022, + "learning_rate": 6.332651895434834e-06, + "loss": 0.4956, + "step": 36514 + }, + { + "epoch": 0.6309614321260714, + "grad_norm": 1.3287749719572806, + "learning_rate": 6.332131244933795e-06, + "loss": 0.4366, + "step": 36515 + }, + { + "epoch": 0.6309787116394803, + "grad_norm": 1.0842292652021486, + "learning_rate": 6.331610605920799e-06, + "loss": 0.5734, + "step": 36516 + }, + { + "epoch": 0.6309959911528892, + "grad_norm": 1.1529960871268157, + "learning_rate": 6.331089978397478e-06, + "loss": 0.351, + "step": 36517 + }, + { + "epoch": 0.6310132706662981, + "grad_norm": 1.2434187965167425, + "learning_rate": 6.330569362365462e-06, + "loss": 0.5296, + "step": 36518 + }, + { + "epoch": 0.631030550179707, + "grad_norm": 1.1314455397171372, + "learning_rate": 6.330048757826383e-06, + "loss": 0.2429, + "step": 36519 + }, + { + "epoch": 0.6310478296931158, + "grad_norm": 1.539975851686461, + "learning_rate": 6.329528164781868e-06, + "loss": 0.3711, + "step": 36520 + }, + { + "epoch": 0.6310651092065247, + "grad_norm": 1.0440900911267832, + "learning_rate": 6.329007583233553e-06, + "loss": 0.3833, + "step": 36521 + }, + { + "epoch": 0.6310823887199336, + "grad_norm": 0.7854833431120302, + "learning_rate": 6.3284870131830615e-06, + "loss": 0.8705, + "step": 36522 + }, + { + "epoch": 0.6310996682333425, + "grad_norm": 1.4164711448762553, + "learning_rate": 6.32796645463203e-06, + "loss": 0.3762, + "step": 36523 + }, + { + "epoch": 0.6311169477467514, + "grad_norm": 1.3064694959294132, + "learning_rate": 6.327445907582084e-06, + "loss": 0.3736, + "step": 36524 + }, + { + "epoch": 0.6311342272601603, + "grad_norm": 1.1848183166460033, + "learning_rate": 6.326925372034862e-06, + "loss": 0.2441, + "step": 36525 + }, + { + "epoch": 0.6311515067735692, + "grad_norm": 1.3252151041680578, + "learning_rate": 6.3264048479919844e-06, + "loss": 0.4955, + "step": 36526 + }, + { + "epoch": 0.6311687862869781, + "grad_norm": 0.948541878562174, + "learning_rate": 6.3258843354550866e-06, + "loss": 0.2359, + "step": 36527 + }, + { + "epoch": 0.631186065800387, + "grad_norm": 1.394563864959446, + "learning_rate": 6.325363834425802e-06, + "loss": 0.3765, + "step": 36528 + }, + { + "epoch": 0.631203345313796, + "grad_norm": 1.8192995045604587, + "learning_rate": 6.32484334490575e-06, + "loss": 0.2467, + "step": 36529 + }, + { + "epoch": 0.6312206248272049, + "grad_norm": 1.291190188183681, + "learning_rate": 6.324322866896575e-06, + "loss": 0.3249, + "step": 36530 + }, + { + "epoch": 0.6312379043406138, + "grad_norm": 2.2142937966006513, + "learning_rate": 6.323802400399894e-06, + "loss": 0.2762, + "step": 36531 + }, + { + "epoch": 0.6312551838540227, + "grad_norm": 1.1511178210165478, + "learning_rate": 6.323281945417348e-06, + "loss": 0.6235, + "step": 36532 + }, + { + "epoch": 0.6312724633674316, + "grad_norm": 1.275733824970154, + "learning_rate": 6.322761501950558e-06, + "loss": 0.4844, + "step": 36533 + }, + { + "epoch": 0.6312897428808405, + "grad_norm": 1.7006633640916773, + "learning_rate": 6.322241070001165e-06, + "loss": 0.5218, + "step": 36534 + }, + { + "epoch": 0.6313070223942494, + "grad_norm": 1.2497565500211245, + "learning_rate": 6.321720649570788e-06, + "loss": 0.3797, + "step": 36535 + }, + { + "epoch": 0.6313243019076583, + "grad_norm": 0.9201805113974628, + "learning_rate": 6.321200240661061e-06, + "loss": 0.4263, + "step": 36536 + }, + { + "epoch": 0.6313415814210672, + "grad_norm": 1.3300158393252597, + "learning_rate": 6.320679843273616e-06, + "loss": 0.2213, + "step": 36537 + }, + { + "epoch": 0.6313588609344761, + "grad_norm": 0.9793624483105958, + "learning_rate": 6.320159457410079e-06, + "loss": 0.3766, + "step": 36538 + }, + { + "epoch": 0.631376140447885, + "grad_norm": 1.8423461615784544, + "learning_rate": 6.319639083072087e-06, + "loss": 0.5589, + "step": 36539 + }, + { + "epoch": 0.631393419961294, + "grad_norm": 3.1196419595174105, + "learning_rate": 6.319118720261259e-06, + "loss": 0.397, + "step": 36540 + }, + { + "epoch": 0.6314106994747027, + "grad_norm": 1.0332548480147827, + "learning_rate": 6.318598368979235e-06, + "loss": 0.2756, + "step": 36541 + }, + { + "epoch": 0.6314279789881116, + "grad_norm": 0.9355610284072122, + "learning_rate": 6.318078029227639e-06, + "loss": 0.3365, + "step": 36542 + }, + { + "epoch": 0.6314452585015206, + "grad_norm": 1.4750128413142167, + "learning_rate": 6.317557701008104e-06, + "loss": 0.5286, + "step": 36543 + }, + { + "epoch": 0.6314625380149295, + "grad_norm": 1.228342849382358, + "learning_rate": 6.317037384322255e-06, + "loss": 0.2947, + "step": 36544 + }, + { + "epoch": 0.6314798175283384, + "grad_norm": 1.2478902396537401, + "learning_rate": 6.316517079171729e-06, + "loss": 0.4189, + "step": 36545 + }, + { + "epoch": 0.6314970970417473, + "grad_norm": 1.3426518403190946, + "learning_rate": 6.3159967855581505e-06, + "loss": 0.3765, + "step": 36546 + }, + { + "epoch": 0.6315143765551562, + "grad_norm": 1.1009078282258002, + "learning_rate": 6.3154765034831475e-06, + "loss": 0.4111, + "step": 36547 + }, + { + "epoch": 0.6315316560685651, + "grad_norm": 1.3631128952618339, + "learning_rate": 6.314956232948356e-06, + "loss": 0.4642, + "step": 36548 + }, + { + "epoch": 0.631548935581974, + "grad_norm": 1.3624763550338863, + "learning_rate": 6.314435973955396e-06, + "loss": 0.299, + "step": 36549 + }, + { + "epoch": 0.6315662150953829, + "grad_norm": 1.3164019158764741, + "learning_rate": 6.3139157265059085e-06, + "loss": 0.2765, + "step": 36550 + }, + { + "epoch": 0.6315834946087918, + "grad_norm": 1.3095345561854126, + "learning_rate": 6.313395490601513e-06, + "loss": 0.3209, + "step": 36551 + }, + { + "epoch": 0.6316007741222007, + "grad_norm": 1.2428910997989995, + "learning_rate": 6.3128752662438455e-06, + "loss": 0.23, + "step": 36552 + }, + { + "epoch": 0.6316180536356096, + "grad_norm": 0.6144231352577425, + "learning_rate": 6.312355053434529e-06, + "loss": 0.9367, + "step": 36553 + }, + { + "epoch": 0.6316353331490185, + "grad_norm": 1.0897990896117697, + "learning_rate": 6.311834852175203e-06, + "loss": 0.6244, + "step": 36554 + }, + { + "epoch": 0.6316526126624274, + "grad_norm": 0.8145874910283756, + "learning_rate": 6.311314662467487e-06, + "loss": 0.2014, + "step": 36555 + }, + { + "epoch": 0.6316698921758364, + "grad_norm": 0.8298788472561262, + "learning_rate": 6.310794484313012e-06, + "loss": 0.4061, + "step": 36556 + }, + { + "epoch": 0.6316871716892453, + "grad_norm": 1.876931829041042, + "learning_rate": 6.310274317713412e-06, + "loss": 0.4822, + "step": 36557 + }, + { + "epoch": 0.6317044512026542, + "grad_norm": 1.1637791890053188, + "learning_rate": 6.309754162670311e-06, + "loss": 0.3284, + "step": 36558 + }, + { + "epoch": 0.6317217307160631, + "grad_norm": 0.5463695139824017, + "learning_rate": 6.309234019185344e-06, + "loss": 0.3892, + "step": 36559 + }, + { + "epoch": 0.631739010229472, + "grad_norm": 1.6569447245411724, + "learning_rate": 6.308713887260133e-06, + "loss": 0.2335, + "step": 36560 + }, + { + "epoch": 0.6317562897428809, + "grad_norm": 1.105571875769031, + "learning_rate": 6.308193766896311e-06, + "loss": 0.5042, + "step": 36561 + }, + { + "epoch": 0.6317735692562897, + "grad_norm": 1.3971817884934092, + "learning_rate": 6.307673658095507e-06, + "loss": 0.5845, + "step": 36562 + }, + { + "epoch": 0.6317908487696986, + "grad_norm": 0.9771989997935551, + "learning_rate": 6.30715356085935e-06, + "loss": 0.3467, + "step": 36563 + }, + { + "epoch": 0.6318081282831075, + "grad_norm": 1.065240674737292, + "learning_rate": 6.306633475189467e-06, + "loss": 0.3957, + "step": 36564 + }, + { + "epoch": 0.6318254077965164, + "grad_norm": 1.3802900904830457, + "learning_rate": 6.3061134010874946e-06, + "loss": 0.27, + "step": 36565 + }, + { + "epoch": 0.6318426873099253, + "grad_norm": 0.8745944336623462, + "learning_rate": 6.305593338555052e-06, + "loss": 0.2663, + "step": 36566 + }, + { + "epoch": 0.6318599668233342, + "grad_norm": 0.9749432822995182, + "learning_rate": 6.30507328759377e-06, + "loss": 0.271, + "step": 36567 + }, + { + "epoch": 0.6318772463367431, + "grad_norm": 1.283441412227022, + "learning_rate": 6.304553248205283e-06, + "loss": 0.1928, + "step": 36568 + }, + { + "epoch": 0.631894525850152, + "grad_norm": 0.9660073337614795, + "learning_rate": 6.3040332203912106e-06, + "loss": 0.3884, + "step": 36569 + }, + { + "epoch": 0.631911805363561, + "grad_norm": 0.8947736014206124, + "learning_rate": 6.303513204153192e-06, + "loss": 0.3778, + "step": 36570 + }, + { + "epoch": 0.6319290848769699, + "grad_norm": 0.8575127990764593, + "learning_rate": 6.302993199492849e-06, + "loss": 0.3863, + "step": 36571 + }, + { + "epoch": 0.6319463643903788, + "grad_norm": 0.9447693266129171, + "learning_rate": 6.302473206411812e-06, + "loss": 0.2481, + "step": 36572 + }, + { + "epoch": 0.6319636439037877, + "grad_norm": 1.1202406393015116, + "learning_rate": 6.301953224911709e-06, + "loss": 0.4563, + "step": 36573 + }, + { + "epoch": 0.6319809234171966, + "grad_norm": 1.0475802072639324, + "learning_rate": 6.301433254994174e-06, + "loss": 0.3688, + "step": 36574 + }, + { + "epoch": 0.6319982029306055, + "grad_norm": 0.9595442435182983, + "learning_rate": 6.300913296660826e-06, + "loss": 0.5281, + "step": 36575 + }, + { + "epoch": 0.6320154824440144, + "grad_norm": 1.5974533715594628, + "learning_rate": 6.300393349913301e-06, + "loss": 0.3469, + "step": 36576 + }, + { + "epoch": 0.6320327619574233, + "grad_norm": 1.5893043727573748, + "learning_rate": 6.299873414753226e-06, + "loss": 0.6494, + "step": 36577 + }, + { + "epoch": 0.6320500414708322, + "grad_norm": 1.3063472996813927, + "learning_rate": 6.299353491182225e-06, + "loss": 0.4744, + "step": 36578 + }, + { + "epoch": 0.6320673209842411, + "grad_norm": 1.5268348659450888, + "learning_rate": 6.2988335792019366e-06, + "loss": 0.3236, + "step": 36579 + }, + { + "epoch": 0.63208460049765, + "grad_norm": 1.5542709106384067, + "learning_rate": 6.298313678813977e-06, + "loss": 0.6746, + "step": 36580 + }, + { + "epoch": 0.6321018800110589, + "grad_norm": 1.067945676826263, + "learning_rate": 6.297793790019983e-06, + "loss": 0.3408, + "step": 36581 + }, + { + "epoch": 0.6321191595244678, + "grad_norm": 1.1653095259676787, + "learning_rate": 6.297273912821578e-06, + "loss": 0.2873, + "step": 36582 + }, + { + "epoch": 0.6321364390378766, + "grad_norm": 1.23230959996144, + "learning_rate": 6.296754047220394e-06, + "loss": 0.402, + "step": 36583 + }, + { + "epoch": 0.6321537185512855, + "grad_norm": 1.187435576745361, + "learning_rate": 6.296234193218056e-06, + "loss": 0.253, + "step": 36584 + }, + { + "epoch": 0.6321709980646945, + "grad_norm": 0.9128136271632612, + "learning_rate": 6.295714350816198e-06, + "loss": 0.355, + "step": 36585 + }, + { + "epoch": 0.6321882775781034, + "grad_norm": 0.8550387421729374, + "learning_rate": 6.2951945200164425e-06, + "loss": 0.1899, + "step": 36586 + }, + { + "epoch": 0.6322055570915123, + "grad_norm": 1.2990633172941384, + "learning_rate": 6.294674700820416e-06, + "loss": 0.3772, + "step": 36587 + }, + { + "epoch": 0.6322228366049212, + "grad_norm": 0.7339922074604315, + "learning_rate": 6.2941548932297555e-06, + "loss": 0.4171, + "step": 36588 + }, + { + "epoch": 0.6322401161183301, + "grad_norm": 1.3944540944497534, + "learning_rate": 6.2936350972460765e-06, + "loss": 0.5652, + "step": 36589 + }, + { + "epoch": 0.632257395631739, + "grad_norm": 1.2999665653834862, + "learning_rate": 6.293115312871021e-06, + "loss": 0.4518, + "step": 36590 + }, + { + "epoch": 0.6322746751451479, + "grad_norm": 1.012245396857485, + "learning_rate": 6.292595540106205e-06, + "loss": 0.4431, + "step": 36591 + }, + { + "epoch": 0.6322919546585568, + "grad_norm": 1.3459670517915492, + "learning_rate": 6.292075778953264e-06, + "loss": 0.422, + "step": 36592 + }, + { + "epoch": 0.6323092341719657, + "grad_norm": 1.645127689376682, + "learning_rate": 6.291556029413822e-06, + "loss": 0.442, + "step": 36593 + }, + { + "epoch": 0.6323265136853746, + "grad_norm": 1.1196428919364196, + "learning_rate": 6.291036291489511e-06, + "loss": 0.4484, + "step": 36594 + }, + { + "epoch": 0.6323437931987835, + "grad_norm": 1.1656773451939717, + "learning_rate": 6.2905165651819545e-06, + "loss": 0.4536, + "step": 36595 + }, + { + "epoch": 0.6323610727121924, + "grad_norm": 0.6677789935696585, + "learning_rate": 6.289996850492782e-06, + "loss": 0.8184, + "step": 36596 + }, + { + "epoch": 0.6323783522256013, + "grad_norm": 0.90082845951411, + "learning_rate": 6.289477147423621e-06, + "loss": 0.4319, + "step": 36597 + }, + { + "epoch": 0.6323956317390103, + "grad_norm": 1.0519247507171035, + "learning_rate": 6.288957455976099e-06, + "loss": 0.1903, + "step": 36598 + }, + { + "epoch": 0.6324129112524192, + "grad_norm": 0.9088860554508271, + "learning_rate": 6.288437776151849e-06, + "loss": 0.4089, + "step": 36599 + }, + { + "epoch": 0.6324301907658281, + "grad_norm": 0.7376292107155107, + "learning_rate": 6.287918107952489e-06, + "loss": 0.302, + "step": 36600 + }, + { + "epoch": 0.632447470279237, + "grad_norm": 0.5415667667869504, + "learning_rate": 6.287398451379653e-06, + "loss": 0.7115, + "step": 36601 + }, + { + "epoch": 0.6324647497926459, + "grad_norm": 1.1036603380926544, + "learning_rate": 6.286878806434965e-06, + "loss": 0.4763, + "step": 36602 + }, + { + "epoch": 0.6324820293060548, + "grad_norm": 1.2893614648455052, + "learning_rate": 6.286359173120058e-06, + "loss": 0.4004, + "step": 36603 + }, + { + "epoch": 0.6324993088194636, + "grad_norm": 1.3820884284470636, + "learning_rate": 6.285839551436553e-06, + "loss": 0.4278, + "step": 36604 + }, + { + "epoch": 0.6325165883328725, + "grad_norm": 0.791115887316649, + "learning_rate": 6.285319941386085e-06, + "loss": 0.2611, + "step": 36605 + }, + { + "epoch": 0.6325338678462814, + "grad_norm": 1.0314541267381783, + "learning_rate": 6.284800342970274e-06, + "loss": 0.2959, + "step": 36606 + }, + { + "epoch": 0.6325511473596903, + "grad_norm": 0.8028048618335948, + "learning_rate": 6.284280756190753e-06, + "loss": 0.5096, + "step": 36607 + }, + { + "epoch": 0.6325684268730992, + "grad_norm": 1.079869365711037, + "learning_rate": 6.283761181049149e-06, + "loss": 0.3534, + "step": 36608 + }, + { + "epoch": 0.6325857063865081, + "grad_norm": 0.6083836910986439, + "learning_rate": 6.28324161754708e-06, + "loss": 0.3147, + "step": 36609 + }, + { + "epoch": 0.632602985899917, + "grad_norm": 0.7967555288126167, + "learning_rate": 6.282722065686186e-06, + "loss": 0.3399, + "step": 36610 + }, + { + "epoch": 0.6326202654133259, + "grad_norm": 0.9618968284892986, + "learning_rate": 6.282202525468088e-06, + "loss": 0.1665, + "step": 36611 + }, + { + "epoch": 0.6326375449267349, + "grad_norm": 1.504116529768081, + "learning_rate": 6.2816829968944135e-06, + "loss": 0.5303, + "step": 36612 + }, + { + "epoch": 0.6326548244401438, + "grad_norm": 1.4205766382030187, + "learning_rate": 6.28116347996679e-06, + "loss": 0.498, + "step": 36613 + }, + { + "epoch": 0.6326721039535527, + "grad_norm": 0.6585215101955499, + "learning_rate": 6.280643974686849e-06, + "loss": 0.256, + "step": 36614 + }, + { + "epoch": 0.6326893834669616, + "grad_norm": 1.2239252144397073, + "learning_rate": 6.280124481056209e-06, + "loss": 0.3185, + "step": 36615 + }, + { + "epoch": 0.6327066629803705, + "grad_norm": 1.1377752873782134, + "learning_rate": 6.279604999076505e-06, + "loss": 0.4867, + "step": 36616 + }, + { + "epoch": 0.6327239424937794, + "grad_norm": 1.6210459118381093, + "learning_rate": 6.2790855287493605e-06, + "loss": 0.4063, + "step": 36617 + }, + { + "epoch": 0.6327412220071883, + "grad_norm": 1.348019456515258, + "learning_rate": 6.278566070076401e-06, + "loss": 0.325, + "step": 36618 + }, + { + "epoch": 0.6327585015205972, + "grad_norm": 1.5028016959138353, + "learning_rate": 6.2780466230592585e-06, + "loss": 0.5015, + "step": 36619 + }, + { + "epoch": 0.6327757810340061, + "grad_norm": 0.6927717481104734, + "learning_rate": 6.277527187699553e-06, + "loss": 0.5557, + "step": 36620 + }, + { + "epoch": 0.632793060547415, + "grad_norm": 1.1969524025266933, + "learning_rate": 6.277007763998918e-06, + "loss": 0.314, + "step": 36621 + }, + { + "epoch": 0.6328103400608239, + "grad_norm": 0.9724655713909658, + "learning_rate": 6.276488351958975e-06, + "loss": 0.3921, + "step": 36622 + }, + { + "epoch": 0.6328276195742328, + "grad_norm": 0.8376545548368607, + "learning_rate": 6.2759689515813546e-06, + "loss": 0.3236, + "step": 36623 + }, + { + "epoch": 0.6328448990876417, + "grad_norm": 0.8203785540866655, + "learning_rate": 6.275449562867683e-06, + "loss": 0.3743, + "step": 36624 + }, + { + "epoch": 0.6328621786010505, + "grad_norm": 0.8621781781224119, + "learning_rate": 6.274930185819588e-06, + "loss": 0.5217, + "step": 36625 + }, + { + "epoch": 0.6328794581144594, + "grad_norm": 1.2608224860671764, + "learning_rate": 6.274410820438692e-06, + "loss": 0.2785, + "step": 36626 + }, + { + "epoch": 0.6328967376278684, + "grad_norm": 1.784810482730203, + "learning_rate": 6.273891466726626e-06, + "loss": 0.3843, + "step": 36627 + }, + { + "epoch": 0.6329140171412773, + "grad_norm": 1.5477747138098807, + "learning_rate": 6.273372124685018e-06, + "loss": 0.4863, + "step": 36628 + }, + { + "epoch": 0.6329312966546862, + "grad_norm": 1.8904467478079212, + "learning_rate": 6.272852794315485e-06, + "loss": 0.4398, + "step": 36629 + }, + { + "epoch": 0.6329485761680951, + "grad_norm": 1.5358565078780249, + "learning_rate": 6.272333475619665e-06, + "loss": 0.2702, + "step": 36630 + }, + { + "epoch": 0.632965855681504, + "grad_norm": 1.0869918041798188, + "learning_rate": 6.271814168599177e-06, + "loss": 0.4388, + "step": 36631 + }, + { + "epoch": 0.6329831351949129, + "grad_norm": 0.9756251816089208, + "learning_rate": 6.271294873255653e-06, + "loss": 0.3759, + "step": 36632 + }, + { + "epoch": 0.6330004147083218, + "grad_norm": 1.094395440618352, + "learning_rate": 6.2707755895907145e-06, + "loss": 0.4043, + "step": 36633 + }, + { + "epoch": 0.6330176942217307, + "grad_norm": 1.628936674446834, + "learning_rate": 6.270256317605994e-06, + "loss": 0.3787, + "step": 36634 + }, + { + "epoch": 0.6330349737351396, + "grad_norm": 0.996424643454177, + "learning_rate": 6.26973705730311e-06, + "loss": 0.3235, + "step": 36635 + }, + { + "epoch": 0.6330522532485485, + "grad_norm": 1.074649174751968, + "learning_rate": 6.269217808683696e-06, + "loss": 0.4825, + "step": 36636 + }, + { + "epoch": 0.6330695327619574, + "grad_norm": 1.305862031280763, + "learning_rate": 6.268698571749372e-06, + "loss": 0.3618, + "step": 36637 + }, + { + "epoch": 0.6330868122753663, + "grad_norm": 1.4841379995344406, + "learning_rate": 6.26817934650177e-06, + "loss": 0.3913, + "step": 36638 + }, + { + "epoch": 0.6331040917887752, + "grad_norm": 1.7907224200384149, + "learning_rate": 6.267660132942517e-06, + "loss": 0.3273, + "step": 36639 + }, + { + "epoch": 0.6331213713021842, + "grad_norm": 1.2591577743358955, + "learning_rate": 6.26714093107323e-06, + "loss": 0.4275, + "step": 36640 + }, + { + "epoch": 0.6331386508155931, + "grad_norm": 0.6910183566383278, + "learning_rate": 6.266621740895545e-06, + "loss": 0.3129, + "step": 36641 + }, + { + "epoch": 0.633155930329002, + "grad_norm": 1.0172376714839955, + "learning_rate": 6.266102562411083e-06, + "loss": 0.4403, + "step": 36642 + }, + { + "epoch": 0.6331732098424109, + "grad_norm": 1.045907802225736, + "learning_rate": 6.265583395621472e-06, + "loss": 0.4221, + "step": 36643 + }, + { + "epoch": 0.6331904893558198, + "grad_norm": 0.8509630500067735, + "learning_rate": 6.265064240528335e-06, + "loss": 0.3408, + "step": 36644 + }, + { + "epoch": 0.6332077688692287, + "grad_norm": 0.9590324405545899, + "learning_rate": 6.264545097133306e-06, + "loss": 0.4017, + "step": 36645 + }, + { + "epoch": 0.6332250483826375, + "grad_norm": 0.7715259332524693, + "learning_rate": 6.264025965438002e-06, + "loss": 0.3362, + "step": 36646 + }, + { + "epoch": 0.6332423278960464, + "grad_norm": 1.690549890685834, + "learning_rate": 6.263506845444054e-06, + "loss": 0.3203, + "step": 36647 + }, + { + "epoch": 0.6332596074094553, + "grad_norm": 0.9134121830637573, + "learning_rate": 6.2629877371530875e-06, + "loss": 0.3935, + "step": 36648 + }, + { + "epoch": 0.6332768869228642, + "grad_norm": 0.8521943669379539, + "learning_rate": 6.262468640566724e-06, + "loss": 0.3951, + "step": 36649 + }, + { + "epoch": 0.6332941664362731, + "grad_norm": 1.5523447656993556, + "learning_rate": 6.261949555686597e-06, + "loss": 0.3186, + "step": 36650 + }, + { + "epoch": 0.633311445949682, + "grad_norm": 1.125852326727371, + "learning_rate": 6.261430482514324e-06, + "loss": 0.3535, + "step": 36651 + }, + { + "epoch": 0.6333287254630909, + "grad_norm": 1.3101427935190717, + "learning_rate": 6.260911421051536e-06, + "loss": 0.3577, + "step": 36652 + }, + { + "epoch": 0.6333460049764998, + "grad_norm": 1.1402910774036223, + "learning_rate": 6.260392371299857e-06, + "loss": 0.4698, + "step": 36653 + }, + { + "epoch": 0.6333632844899088, + "grad_norm": 1.431693092112972, + "learning_rate": 6.259873333260917e-06, + "loss": 0.3037, + "step": 36654 + }, + { + "epoch": 0.6333805640033177, + "grad_norm": 0.8396186327890306, + "learning_rate": 6.259354306936334e-06, + "loss": 0.3641, + "step": 36655 + }, + { + "epoch": 0.6333978435167266, + "grad_norm": 1.509834375314941, + "learning_rate": 6.2588352923277406e-06, + "loss": 0.5024, + "step": 36656 + }, + { + "epoch": 0.6334151230301355, + "grad_norm": 0.9451317732967891, + "learning_rate": 6.2583162894367556e-06, + "loss": 0.3852, + "step": 36657 + }, + { + "epoch": 0.6334324025435444, + "grad_norm": 1.0563334986135555, + "learning_rate": 6.257797298265012e-06, + "loss": 0.2841, + "step": 36658 + }, + { + "epoch": 0.6334496820569533, + "grad_norm": 0.9324778115628645, + "learning_rate": 6.257278318814131e-06, + "loss": 0.4003, + "step": 36659 + }, + { + "epoch": 0.6334669615703622, + "grad_norm": 1.4309897416765676, + "learning_rate": 6.256759351085737e-06, + "loss": 0.4337, + "step": 36660 + }, + { + "epoch": 0.6334842410837711, + "grad_norm": 1.0205687279336693, + "learning_rate": 6.256240395081459e-06, + "loss": 0.2367, + "step": 36661 + }, + { + "epoch": 0.63350152059718, + "grad_norm": 1.3719541618188418, + "learning_rate": 6.255721450802918e-06, + "loss": 0.4203, + "step": 36662 + }, + { + "epoch": 0.6335188001105889, + "grad_norm": 1.243905468532494, + "learning_rate": 6.255202518251744e-06, + "loss": 0.4482, + "step": 36663 + }, + { + "epoch": 0.6335360796239978, + "grad_norm": 1.1864397856495839, + "learning_rate": 6.2546835974295585e-06, + "loss": 0.4073, + "step": 36664 + }, + { + "epoch": 0.6335533591374067, + "grad_norm": 1.3249413670917636, + "learning_rate": 6.254164688337993e-06, + "loss": 0.2951, + "step": 36665 + }, + { + "epoch": 0.6335706386508156, + "grad_norm": 1.3529523782675579, + "learning_rate": 6.2536457909786644e-06, + "loss": 0.5475, + "step": 36666 + }, + { + "epoch": 0.6335879181642246, + "grad_norm": 0.8465778059539137, + "learning_rate": 6.253126905353204e-06, + "loss": 0.7724, + "step": 36667 + }, + { + "epoch": 0.6336051976776333, + "grad_norm": 1.1662593010936018, + "learning_rate": 6.252608031463232e-06, + "loss": 0.594, + "step": 36668 + }, + { + "epoch": 0.6336224771910423, + "grad_norm": 1.2856583999646192, + "learning_rate": 6.252089169310378e-06, + "loss": 0.3909, + "step": 36669 + }, + { + "epoch": 0.6336397567044512, + "grad_norm": 1.211855842607535, + "learning_rate": 6.25157031889627e-06, + "loss": 0.3822, + "step": 36670 + }, + { + "epoch": 0.6336570362178601, + "grad_norm": 0.890616122052624, + "learning_rate": 6.251051480222522e-06, + "loss": 0.3181, + "step": 36671 + }, + { + "epoch": 0.633674315731269, + "grad_norm": 1.6262360631128905, + "learning_rate": 6.250532653290768e-06, + "loss": 0.3657, + "step": 36672 + }, + { + "epoch": 0.6336915952446779, + "grad_norm": 1.4479728864876658, + "learning_rate": 6.25001383810263e-06, + "loss": 0.5989, + "step": 36673 + }, + { + "epoch": 0.6337088747580868, + "grad_norm": 1.0743899138820285, + "learning_rate": 6.2494950346597375e-06, + "loss": 0.4694, + "step": 36674 + }, + { + "epoch": 0.6337261542714957, + "grad_norm": 1.3037811650381568, + "learning_rate": 6.2489762429637065e-06, + "loss": 0.5202, + "step": 36675 + }, + { + "epoch": 0.6337434337849046, + "grad_norm": 0.8936118600997439, + "learning_rate": 6.248457463016169e-06, + "loss": 0.4467, + "step": 36676 + }, + { + "epoch": 0.6337607132983135, + "grad_norm": 1.0159595851573422, + "learning_rate": 6.247938694818747e-06, + "loss": 0.4008, + "step": 36677 + }, + { + "epoch": 0.6337779928117224, + "grad_norm": 1.2566708285776216, + "learning_rate": 6.247419938373067e-06, + "loss": 0.4339, + "step": 36678 + }, + { + "epoch": 0.6337952723251313, + "grad_norm": 0.9074431125403482, + "learning_rate": 6.246901193680757e-06, + "loss": 0.2776, + "step": 36679 + }, + { + "epoch": 0.6338125518385402, + "grad_norm": 0.9837039441771995, + "learning_rate": 6.246382460743431e-06, + "loss": 0.3199, + "step": 36680 + }, + { + "epoch": 0.6338298313519491, + "grad_norm": 2.3293200167251835, + "learning_rate": 6.2458637395627234e-06, + "loss": 0.3689, + "step": 36681 + }, + { + "epoch": 0.633847110865358, + "grad_norm": 0.9994338636727647, + "learning_rate": 6.245345030140254e-06, + "loss": 0.2981, + "step": 36682 + }, + { + "epoch": 0.633864390378767, + "grad_norm": 1.1143130274668427, + "learning_rate": 6.244826332477651e-06, + "loss": 0.3624, + "step": 36683 + }, + { + "epoch": 0.6338816698921759, + "grad_norm": 0.9691546244117625, + "learning_rate": 6.244307646576536e-06, + "loss": 0.3558, + "step": 36684 + }, + { + "epoch": 0.6338989494055848, + "grad_norm": 1.1543326020843276, + "learning_rate": 6.243788972438538e-06, + "loss": 0.3951, + "step": 36685 + }, + { + "epoch": 0.6339162289189937, + "grad_norm": 0.9072994851872255, + "learning_rate": 6.243270310065276e-06, + "loss": 0.5143, + "step": 36686 + }, + { + "epoch": 0.6339335084324026, + "grad_norm": 0.6329339429865118, + "learning_rate": 6.242751659458377e-06, + "loss": 0.9697, + "step": 36687 + }, + { + "epoch": 0.6339507879458115, + "grad_norm": 0.8823204136895465, + "learning_rate": 6.242233020619465e-06, + "loss": 0.3191, + "step": 36688 + }, + { + "epoch": 0.6339680674592203, + "grad_norm": 0.821119729428495, + "learning_rate": 6.241714393550166e-06, + "loss": 0.4376, + "step": 36689 + }, + { + "epoch": 0.6339853469726292, + "grad_norm": 0.7341204507089404, + "learning_rate": 6.241195778252105e-06, + "loss": 0.2495, + "step": 36690 + }, + { + "epoch": 0.6340026264860381, + "grad_norm": 1.744202280161565, + "learning_rate": 6.2406771747269e-06, + "loss": 0.448, + "step": 36691 + }, + { + "epoch": 0.634019905999447, + "grad_norm": 0.7702618409328114, + "learning_rate": 6.2401585829761825e-06, + "loss": 0.3236, + "step": 36692 + }, + { + "epoch": 0.6340371855128559, + "grad_norm": 0.4605296689534408, + "learning_rate": 6.239640003001573e-06, + "loss": 0.5958, + "step": 36693 + }, + { + "epoch": 0.6340544650262648, + "grad_norm": 0.7800158428862829, + "learning_rate": 6.239121434804698e-06, + "loss": 0.2623, + "step": 36694 + }, + { + "epoch": 0.6340717445396737, + "grad_norm": 1.451963987147135, + "learning_rate": 6.238602878387179e-06, + "loss": 0.4951, + "step": 36695 + }, + { + "epoch": 0.6340890240530827, + "grad_norm": 0.9153029274028192, + "learning_rate": 6.2380843337506426e-06, + "loss": 0.343, + "step": 36696 + }, + { + "epoch": 0.6341063035664916, + "grad_norm": 1.7762986188345145, + "learning_rate": 6.2375658008967096e-06, + "loss": 0.3854, + "step": 36697 + }, + { + "epoch": 0.6341235830799005, + "grad_norm": 1.0124482552787362, + "learning_rate": 6.237047279827009e-06, + "loss": 0.4202, + "step": 36698 + }, + { + "epoch": 0.6341408625933094, + "grad_norm": 0.7060230837721004, + "learning_rate": 6.236528770543161e-06, + "loss": 0.7506, + "step": 36699 + }, + { + "epoch": 0.6341581421067183, + "grad_norm": 0.9728013067599597, + "learning_rate": 6.236010273046794e-06, + "loss": 0.412, + "step": 36700 + }, + { + "epoch": 0.6341754216201272, + "grad_norm": 1.538716513147945, + "learning_rate": 6.235491787339527e-06, + "loss": 0.3668, + "step": 36701 + }, + { + "epoch": 0.6341927011335361, + "grad_norm": 1.5221377567589234, + "learning_rate": 6.234973313422984e-06, + "loss": 0.4535, + "step": 36702 + }, + { + "epoch": 0.634209980646945, + "grad_norm": 1.1530205435451093, + "learning_rate": 6.2344548512987926e-06, + "loss": 0.6183, + "step": 36703 + }, + { + "epoch": 0.6342272601603539, + "grad_norm": 1.798824544963629, + "learning_rate": 6.2339364009685725e-06, + "loss": 0.4131, + "step": 36704 + }, + { + "epoch": 0.6342445396737628, + "grad_norm": 1.107588195725599, + "learning_rate": 6.2334179624339545e-06, + "loss": 0.4055, + "step": 36705 + }, + { + "epoch": 0.6342618191871717, + "grad_norm": 1.2435398805922384, + "learning_rate": 6.232899535696553e-06, + "loss": 0.273, + "step": 36706 + }, + { + "epoch": 0.6342790987005806, + "grad_norm": 0.959812929624935, + "learning_rate": 6.232381120757999e-06, + "loss": 0.3037, + "step": 36707 + }, + { + "epoch": 0.6342963782139895, + "grad_norm": 1.1010221175581834, + "learning_rate": 6.231862717619912e-06, + "loss": 0.3377, + "step": 36708 + }, + { + "epoch": 0.6343136577273984, + "grad_norm": 0.7939786064848418, + "learning_rate": 6.231344326283918e-06, + "loss": 0.4125, + "step": 36709 + }, + { + "epoch": 0.6343309372408072, + "grad_norm": 1.55557496939516, + "learning_rate": 6.230825946751643e-06, + "loss": 0.3728, + "step": 36710 + }, + { + "epoch": 0.6343482167542162, + "grad_norm": 1.8670440698108146, + "learning_rate": 6.230307579024704e-06, + "loss": 0.2571, + "step": 36711 + }, + { + "epoch": 0.6343654962676251, + "grad_norm": 1.070088019169979, + "learning_rate": 6.2297892231047285e-06, + "loss": 0.3213, + "step": 36712 + }, + { + "epoch": 0.634382775781034, + "grad_norm": 2.0246306289155274, + "learning_rate": 6.229270878993339e-06, + "loss": 0.6178, + "step": 36713 + }, + { + "epoch": 0.6344000552944429, + "grad_norm": 0.7820573264688762, + "learning_rate": 6.228752546692163e-06, + "loss": 0.5207, + "step": 36714 + }, + { + "epoch": 0.6344173348078518, + "grad_norm": 1.5715453782760906, + "learning_rate": 6.228234226202816e-06, + "loss": 0.325, + "step": 36715 + }, + { + "epoch": 0.6344346143212607, + "grad_norm": 1.0427464596484923, + "learning_rate": 6.22771591752693e-06, + "loss": 0.4114, + "step": 36716 + }, + { + "epoch": 0.6344518938346696, + "grad_norm": 1.3414186095581606, + "learning_rate": 6.227197620666121e-06, + "loss": 0.4818, + "step": 36717 + }, + { + "epoch": 0.6344691733480785, + "grad_norm": 1.326549030099783, + "learning_rate": 6.226679335622019e-06, + "loss": 0.3781, + "step": 36718 + }, + { + "epoch": 0.6344864528614874, + "grad_norm": 1.2082046483957303, + "learning_rate": 6.226161062396241e-06, + "loss": 0.3721, + "step": 36719 + }, + { + "epoch": 0.6345037323748963, + "grad_norm": 1.2697306094635439, + "learning_rate": 6.2256428009904166e-06, + "loss": 0.609, + "step": 36720 + }, + { + "epoch": 0.6345210118883052, + "grad_norm": 0.7986452490259047, + "learning_rate": 6.2251245514061656e-06, + "loss": 0.2422, + "step": 36721 + }, + { + "epoch": 0.6345382914017141, + "grad_norm": 0.9822811154724521, + "learning_rate": 6.224606313645107e-06, + "loss": 0.4017, + "step": 36722 + }, + { + "epoch": 0.634555570915123, + "grad_norm": 1.3203662045656215, + "learning_rate": 6.2240880877088725e-06, + "loss": 0.3803, + "step": 36723 + }, + { + "epoch": 0.634572850428532, + "grad_norm": 0.9237503775615262, + "learning_rate": 6.223569873599079e-06, + "loss": 0.2042, + "step": 36724 + }, + { + "epoch": 0.6345901299419409, + "grad_norm": 0.7555546276094127, + "learning_rate": 6.223051671317355e-06, + "loss": 0.7542, + "step": 36725 + }, + { + "epoch": 0.6346074094553498, + "grad_norm": 1.137694803158735, + "learning_rate": 6.222533480865316e-06, + "loss": 0.2178, + "step": 36726 + }, + { + "epoch": 0.6346246889687587, + "grad_norm": 1.1825522899398926, + "learning_rate": 6.222015302244593e-06, + "loss": 0.3433, + "step": 36727 + }, + { + "epoch": 0.6346419684821676, + "grad_norm": 1.4636596650276439, + "learning_rate": 6.221497135456801e-06, + "loss": 0.345, + "step": 36728 + }, + { + "epoch": 0.6346592479955765, + "grad_norm": 1.3260858869315815, + "learning_rate": 6.220978980503569e-06, + "loss": 0.3439, + "step": 36729 + }, + { + "epoch": 0.6346765275089854, + "grad_norm": 1.28541689842987, + "learning_rate": 6.220460837386522e-06, + "loss": 0.4661, + "step": 36730 + }, + { + "epoch": 0.6346938070223942, + "grad_norm": 1.275988532026422, + "learning_rate": 6.219942706107273e-06, + "loss": 0.2696, + "step": 36731 + }, + { + "epoch": 0.6347110865358031, + "grad_norm": 0.8641826304320575, + "learning_rate": 6.2194245866674545e-06, + "loss": 0.4544, + "step": 36732 + }, + { + "epoch": 0.634728366049212, + "grad_norm": 1.5665220512612372, + "learning_rate": 6.218906479068683e-06, + "loss": 0.3043, + "step": 36733 + }, + { + "epoch": 0.6347456455626209, + "grad_norm": 0.8015067928465213, + "learning_rate": 6.218388383312586e-06, + "loss": 0.4721, + "step": 36734 + }, + { + "epoch": 0.6347629250760298, + "grad_norm": 0.9886526711326546, + "learning_rate": 6.217870299400781e-06, + "loss": 0.4117, + "step": 36735 + }, + { + "epoch": 0.6347802045894387, + "grad_norm": 0.7142577938686627, + "learning_rate": 6.217352227334896e-06, + "loss": 0.5959, + "step": 36736 + }, + { + "epoch": 0.6347974841028476, + "grad_norm": 1.0393114187562003, + "learning_rate": 6.21683416711655e-06, + "loss": 0.4461, + "step": 36737 + }, + { + "epoch": 0.6348147636162565, + "grad_norm": 0.8225070421599737, + "learning_rate": 6.216316118747367e-06, + "loss": 0.3709, + "step": 36738 + }, + { + "epoch": 0.6348320431296655, + "grad_norm": 1.2886201905856562, + "learning_rate": 6.21579808222897e-06, + "loss": 0.5766, + "step": 36739 + }, + { + "epoch": 0.6348493226430744, + "grad_norm": 0.9857336429378846, + "learning_rate": 6.2152800575629826e-06, + "loss": 0.3224, + "step": 36740 + }, + { + "epoch": 0.6348666021564833, + "grad_norm": 1.6482287304775545, + "learning_rate": 6.2147620447510245e-06, + "loss": 0.5475, + "step": 36741 + }, + { + "epoch": 0.6348838816698922, + "grad_norm": 1.0081952994936947, + "learning_rate": 6.214244043794717e-06, + "loss": 0.3441, + "step": 36742 + }, + { + "epoch": 0.6349011611833011, + "grad_norm": 1.1699828426988383, + "learning_rate": 6.213726054695688e-06, + "loss": 0.5481, + "step": 36743 + }, + { + "epoch": 0.63491844069671, + "grad_norm": 1.126742219090014, + "learning_rate": 6.213208077455555e-06, + "loss": 0.462, + "step": 36744 + }, + { + "epoch": 0.6349357202101189, + "grad_norm": 1.0266179777084177, + "learning_rate": 6.2126901120759445e-06, + "loss": 0.3673, + "step": 36745 + }, + { + "epoch": 0.6349529997235278, + "grad_norm": 0.5489113948551606, + "learning_rate": 6.212172158558474e-06, + "loss": 0.508, + "step": 36746 + }, + { + "epoch": 0.6349702792369367, + "grad_norm": 0.9918499419020645, + "learning_rate": 6.211654216904768e-06, + "loss": 0.605, + "step": 36747 + }, + { + "epoch": 0.6349875587503456, + "grad_norm": 1.115589773081837, + "learning_rate": 6.211136287116448e-06, + "loss": 0.4683, + "step": 36748 + }, + { + "epoch": 0.6350048382637545, + "grad_norm": 1.246738414421853, + "learning_rate": 6.21061836919514e-06, + "loss": 0.3805, + "step": 36749 + }, + { + "epoch": 0.6350221177771634, + "grad_norm": 1.1360307200329767, + "learning_rate": 6.21010046314246e-06, + "loss": 0.5976, + "step": 36750 + }, + { + "epoch": 0.6350393972905723, + "grad_norm": 0.8106317874640936, + "learning_rate": 6.209582568960037e-06, + "loss": 0.4706, + "step": 36751 + }, + { + "epoch": 0.6350566768039811, + "grad_norm": 0.5902547098185914, + "learning_rate": 6.209064686649488e-06, + "loss": 0.8492, + "step": 36752 + }, + { + "epoch": 0.63507395631739, + "grad_norm": 1.0854380863832946, + "learning_rate": 6.208546816212436e-06, + "loss": 0.27, + "step": 36753 + }, + { + "epoch": 0.635091235830799, + "grad_norm": 1.1976475881870956, + "learning_rate": 6.208028957650507e-06, + "loss": 0.5454, + "step": 36754 + }, + { + "epoch": 0.6351085153442079, + "grad_norm": 1.0988279089376893, + "learning_rate": 6.2075111109653145e-06, + "loss": 0.4272, + "step": 36755 + }, + { + "epoch": 0.6351257948576168, + "grad_norm": 1.0003443220973514, + "learning_rate": 6.206993276158487e-06, + "loss": 0.399, + "step": 36756 + }, + { + "epoch": 0.6351430743710257, + "grad_norm": 1.049408208624745, + "learning_rate": 6.206475453231644e-06, + "loss": 0.4942, + "step": 36757 + }, + { + "epoch": 0.6351603538844346, + "grad_norm": 1.0068300116208055, + "learning_rate": 6.20595764218641e-06, + "loss": 0.3801, + "step": 36758 + }, + { + "epoch": 0.6351776333978435, + "grad_norm": 1.1816353739976377, + "learning_rate": 6.205439843024402e-06, + "loss": 0.3527, + "step": 36759 + }, + { + "epoch": 0.6351949129112524, + "grad_norm": 1.0653448945393154, + "learning_rate": 6.20492205574725e-06, + "loss": 0.4632, + "step": 36760 + }, + { + "epoch": 0.6352121924246613, + "grad_norm": 1.1403687883740101, + "learning_rate": 6.204404280356568e-06, + "loss": 0.3872, + "step": 36761 + }, + { + "epoch": 0.6352294719380702, + "grad_norm": 1.6221333649370937, + "learning_rate": 6.203886516853978e-06, + "loss": 0.322, + "step": 36762 + }, + { + "epoch": 0.6352467514514791, + "grad_norm": 1.1759164979244716, + "learning_rate": 6.203368765241107e-06, + "loss": 0.3552, + "step": 36763 + }, + { + "epoch": 0.635264030964888, + "grad_norm": 0.9972794863988574, + "learning_rate": 6.202851025519569e-06, + "loss": 0.3422, + "step": 36764 + }, + { + "epoch": 0.635281310478297, + "grad_norm": 1.0984897379813245, + "learning_rate": 6.202333297690997e-06, + "loss": 0.3313, + "step": 36765 + }, + { + "epoch": 0.6352985899917059, + "grad_norm": 1.6005681244949, + "learning_rate": 6.201815581757e-06, + "loss": 0.3106, + "step": 36766 + }, + { + "epoch": 0.6353158695051148, + "grad_norm": 1.102904011557302, + "learning_rate": 6.201297877719208e-06, + "loss": 0.8547, + "step": 36767 + }, + { + "epoch": 0.6353331490185237, + "grad_norm": 1.4459664147587774, + "learning_rate": 6.200780185579238e-06, + "loss": 0.5129, + "step": 36768 + }, + { + "epoch": 0.6353504285319326, + "grad_norm": 1.0938333112866614, + "learning_rate": 6.2002625053387145e-06, + "loss": 0.3533, + "step": 36769 + }, + { + "epoch": 0.6353677080453415, + "grad_norm": 1.6203333790178378, + "learning_rate": 6.199744836999256e-06, + "loss": 0.4253, + "step": 36770 + }, + { + "epoch": 0.6353849875587504, + "grad_norm": 0.9624238609377941, + "learning_rate": 6.199227180562491e-06, + "loss": 0.2763, + "step": 36771 + }, + { + "epoch": 0.6354022670721593, + "grad_norm": 0.8576225964346885, + "learning_rate": 6.198709536030031e-06, + "loss": 0.5305, + "step": 36772 + }, + { + "epoch": 0.6354195465855681, + "grad_norm": 0.5338560221915637, + "learning_rate": 6.1981919034034996e-06, + "loss": 0.9028, + "step": 36773 + }, + { + "epoch": 0.635436826098977, + "grad_norm": 0.9203813198376549, + "learning_rate": 6.197674282684525e-06, + "loss": 0.5264, + "step": 36774 + }, + { + "epoch": 0.6354541056123859, + "grad_norm": 1.051662881942083, + "learning_rate": 6.19715667387472e-06, + "loss": 0.3041, + "step": 36775 + }, + { + "epoch": 0.6354713851257948, + "grad_norm": 1.3810187927977935, + "learning_rate": 6.19663907697571e-06, + "loss": 0.3007, + "step": 36776 + }, + { + "epoch": 0.6354886646392037, + "grad_norm": 0.9928401861527127, + "learning_rate": 6.196121491989114e-06, + "loss": 0.4188, + "step": 36777 + }, + { + "epoch": 0.6355059441526126, + "grad_norm": 1.2740761962294038, + "learning_rate": 6.195603918916557e-06, + "loss": 0.5344, + "step": 36778 + }, + { + "epoch": 0.6355232236660215, + "grad_norm": 1.1978899766624955, + "learning_rate": 6.195086357759655e-06, + "loss": 0.365, + "step": 36779 + }, + { + "epoch": 0.6355405031794304, + "grad_norm": 0.9091185932358218, + "learning_rate": 6.194568808520036e-06, + "loss": 0.4585, + "step": 36780 + }, + { + "epoch": 0.6355577826928394, + "grad_norm": 1.5429721656010797, + "learning_rate": 6.1940512711993124e-06, + "loss": 0.4508, + "step": 36781 + }, + { + "epoch": 0.6355750622062483, + "grad_norm": 1.0364943184792055, + "learning_rate": 6.193533745799113e-06, + "loss": 0.2481, + "step": 36782 + }, + { + "epoch": 0.6355923417196572, + "grad_norm": 1.1560228663431666, + "learning_rate": 6.193016232321054e-06, + "loss": 0.2954, + "step": 36783 + }, + { + "epoch": 0.6356096212330661, + "grad_norm": 1.5711779177789562, + "learning_rate": 6.192498730766755e-06, + "loss": 0.3543, + "step": 36784 + }, + { + "epoch": 0.635626900746475, + "grad_norm": 1.1093750655774683, + "learning_rate": 6.191981241137843e-06, + "loss": 0.5141, + "step": 36785 + }, + { + "epoch": 0.6356441802598839, + "grad_norm": 1.1850300946989234, + "learning_rate": 6.191463763435932e-06, + "loss": 0.4473, + "step": 36786 + }, + { + "epoch": 0.6356614597732928, + "grad_norm": 0.740073195751315, + "learning_rate": 6.190946297662649e-06, + "loss": 0.457, + "step": 36787 + }, + { + "epoch": 0.6356787392867017, + "grad_norm": 0.8562981224279861, + "learning_rate": 6.190428843819608e-06, + "loss": 0.5824, + "step": 36788 + }, + { + "epoch": 0.6356960188001106, + "grad_norm": 0.7815236872378196, + "learning_rate": 6.189911401908436e-06, + "loss": 0.2846, + "step": 36789 + }, + { + "epoch": 0.6357132983135195, + "grad_norm": 0.8922743716065598, + "learning_rate": 6.189393971930748e-06, + "loss": 0.3592, + "step": 36790 + }, + { + "epoch": 0.6357305778269284, + "grad_norm": 1.082901982230135, + "learning_rate": 6.188876553888174e-06, + "loss": 0.6234, + "step": 36791 + }, + { + "epoch": 0.6357478573403373, + "grad_norm": 1.164203516365827, + "learning_rate": 6.188359147782325e-06, + "loss": 0.4947, + "step": 36792 + }, + { + "epoch": 0.6357651368537462, + "grad_norm": 1.2654838150953291, + "learning_rate": 6.187841753614823e-06, + "loss": 0.2922, + "step": 36793 + }, + { + "epoch": 0.635782416367155, + "grad_norm": 1.1191871252805745, + "learning_rate": 6.1873243713872936e-06, + "loss": 0.5227, + "step": 36794 + }, + { + "epoch": 0.635799695880564, + "grad_norm": 2.1146631902777524, + "learning_rate": 6.18680700110135e-06, + "loss": 0.3693, + "step": 36795 + }, + { + "epoch": 0.6358169753939729, + "grad_norm": 1.0396378575429457, + "learning_rate": 6.1862896427586195e-06, + "loss": 0.2922, + "step": 36796 + }, + { + "epoch": 0.6358342549073818, + "grad_norm": 1.2341000103880428, + "learning_rate": 6.185772296360717e-06, + "loss": 0.2997, + "step": 36797 + }, + { + "epoch": 0.6358515344207907, + "grad_norm": 1.0371009705124097, + "learning_rate": 6.185254961909268e-06, + "loss": 0.502, + "step": 36798 + }, + { + "epoch": 0.6358688139341996, + "grad_norm": 1.1722770962371905, + "learning_rate": 6.184737639405889e-06, + "loss": 0.4344, + "step": 36799 + }, + { + "epoch": 0.6358860934476085, + "grad_norm": 1.0310864490965468, + "learning_rate": 6.184220328852204e-06, + "loss": 0.3362, + "step": 36800 + }, + { + "epoch": 0.6359033729610174, + "grad_norm": 1.044184573207663, + "learning_rate": 6.183703030249828e-06, + "loss": 0.2844, + "step": 36801 + }, + { + "epoch": 0.6359206524744263, + "grad_norm": 1.0266657986572756, + "learning_rate": 6.183185743600386e-06, + "loss": 0.3717, + "step": 36802 + }, + { + "epoch": 0.6359379319878352, + "grad_norm": 2.000471585642853, + "learning_rate": 6.182668468905497e-06, + "loss": 0.517, + "step": 36803 + }, + { + "epoch": 0.6359552115012441, + "grad_norm": 1.079914306648587, + "learning_rate": 6.1821512061667775e-06, + "loss": 0.4026, + "step": 36804 + }, + { + "epoch": 0.635972491014653, + "grad_norm": 1.2858317335031324, + "learning_rate": 6.181633955385855e-06, + "loss": 0.303, + "step": 36805 + }, + { + "epoch": 0.6359897705280619, + "grad_norm": 1.199937168921062, + "learning_rate": 6.18111671656434e-06, + "loss": 0.2971, + "step": 36806 + }, + { + "epoch": 0.6360070500414708, + "grad_norm": 1.20472387562597, + "learning_rate": 6.180599489703861e-06, + "loss": 0.4446, + "step": 36807 + }, + { + "epoch": 0.6360243295548798, + "grad_norm": 0.8764415088791958, + "learning_rate": 6.180082274806032e-06, + "loss": 0.4005, + "step": 36808 + }, + { + "epoch": 0.6360416090682887, + "grad_norm": 1.5510442660663601, + "learning_rate": 6.17956507187248e-06, + "loss": 0.4248, + "step": 36809 + }, + { + "epoch": 0.6360588885816976, + "grad_norm": 1.1145562728228509, + "learning_rate": 6.179047880904814e-06, + "loss": 0.264, + "step": 36810 + }, + { + "epoch": 0.6360761680951065, + "grad_norm": 0.7569633552384696, + "learning_rate": 6.178530701904668e-06, + "loss": 0.2392, + "step": 36811 + }, + { + "epoch": 0.6360934476085154, + "grad_norm": 1.3637525170933065, + "learning_rate": 6.178013534873648e-06, + "loss": 0.3497, + "step": 36812 + }, + { + "epoch": 0.6361107271219243, + "grad_norm": 0.7945747796617826, + "learning_rate": 6.177496379813384e-06, + "loss": 0.3313, + "step": 36813 + }, + { + "epoch": 0.6361280066353332, + "grad_norm": 0.9749340344378191, + "learning_rate": 6.176979236725493e-06, + "loss": 0.3069, + "step": 36814 + }, + { + "epoch": 0.6361452861487421, + "grad_norm": 0.9378063558394204, + "learning_rate": 6.176462105611589e-06, + "loss": 0.3331, + "step": 36815 + }, + { + "epoch": 0.6361625656621509, + "grad_norm": 0.7843393380213938, + "learning_rate": 6.175944986473299e-06, + "loss": 0.3386, + "step": 36816 + }, + { + "epoch": 0.6361798451755598, + "grad_norm": 0.4622053261031936, + "learning_rate": 6.175427879312238e-06, + "loss": 0.6205, + "step": 36817 + }, + { + "epoch": 0.6361971246889687, + "grad_norm": 1.259615711418767, + "learning_rate": 6.174910784130029e-06, + "loss": 0.2358, + "step": 36818 + }, + { + "epoch": 0.6362144042023776, + "grad_norm": 0.8897146587350343, + "learning_rate": 6.17439370092829e-06, + "loss": 0.3918, + "step": 36819 + }, + { + "epoch": 0.6362316837157865, + "grad_norm": 1.165869816661317, + "learning_rate": 6.1738766297086414e-06, + "loss": 0.4246, + "step": 36820 + }, + { + "epoch": 0.6362489632291954, + "grad_norm": 1.3779320487565736, + "learning_rate": 6.1733595704727e-06, + "loss": 0.3106, + "step": 36821 + }, + { + "epoch": 0.6362662427426043, + "grad_norm": 0.8587003328213776, + "learning_rate": 6.172842523222089e-06, + "loss": 0.3036, + "step": 36822 + }, + { + "epoch": 0.6362835222560133, + "grad_norm": 0.8747259703217816, + "learning_rate": 6.172325487958426e-06, + "loss": 0.481, + "step": 36823 + }, + { + "epoch": 0.6363008017694222, + "grad_norm": 1.2319258774424457, + "learning_rate": 6.171808464683329e-06, + "loss": 0.3126, + "step": 36824 + }, + { + "epoch": 0.6363180812828311, + "grad_norm": 0.7056551576239731, + "learning_rate": 6.171291453398422e-06, + "loss": 0.2933, + "step": 36825 + }, + { + "epoch": 0.63633536079624, + "grad_norm": 2.2389931537941976, + "learning_rate": 6.170774454105317e-06, + "loss": 0.468, + "step": 36826 + }, + { + "epoch": 0.6363526403096489, + "grad_norm": 1.583814353319023, + "learning_rate": 6.170257466805641e-06, + "loss": 0.4771, + "step": 36827 + }, + { + "epoch": 0.6363699198230578, + "grad_norm": 1.881617523639866, + "learning_rate": 6.1697404915010064e-06, + "loss": 0.3966, + "step": 36828 + }, + { + "epoch": 0.6363871993364667, + "grad_norm": 1.141644347611856, + "learning_rate": 6.169223528193039e-06, + "loss": 0.2657, + "step": 36829 + }, + { + "epoch": 0.6364044788498756, + "grad_norm": 1.261136964188043, + "learning_rate": 6.1687065768833495e-06, + "loss": 0.3494, + "step": 36830 + }, + { + "epoch": 0.6364217583632845, + "grad_norm": 1.5042782892822086, + "learning_rate": 6.168189637573568e-06, + "loss": 0.2999, + "step": 36831 + }, + { + "epoch": 0.6364390378766934, + "grad_norm": 1.1052770037622828, + "learning_rate": 6.167672710265303e-06, + "loss": 0.6232, + "step": 36832 + }, + { + "epoch": 0.6364563173901023, + "grad_norm": 1.508743434807096, + "learning_rate": 6.1671557949601825e-06, + "loss": 0.2817, + "step": 36833 + }, + { + "epoch": 0.6364735969035112, + "grad_norm": 0.9608848096159551, + "learning_rate": 6.166638891659822e-06, + "loss": 0.2691, + "step": 36834 + }, + { + "epoch": 0.6364908764169201, + "grad_norm": 1.0132689002176034, + "learning_rate": 6.166122000365835e-06, + "loss": 0.4719, + "step": 36835 + }, + { + "epoch": 0.6365081559303291, + "grad_norm": 1.296344347310318, + "learning_rate": 6.165605121079847e-06, + "loss": 0.3215, + "step": 36836 + }, + { + "epoch": 0.6365254354437379, + "grad_norm": 1.2862068145618608, + "learning_rate": 6.165088253803475e-06, + "loss": 0.4225, + "step": 36837 + }, + { + "epoch": 0.6365427149571468, + "grad_norm": 1.3277591241158875, + "learning_rate": 6.1645713985383394e-06, + "loss": 0.541, + "step": 36838 + }, + { + "epoch": 0.6365599944705557, + "grad_norm": 1.5494349331339492, + "learning_rate": 6.164054555286055e-06, + "loss": 0.2789, + "step": 36839 + }, + { + "epoch": 0.6365772739839646, + "grad_norm": 1.630176428931699, + "learning_rate": 6.163537724048248e-06, + "loss": 0.3245, + "step": 36840 + }, + { + "epoch": 0.6365945534973735, + "grad_norm": 0.940809263219092, + "learning_rate": 6.1630209048265285e-06, + "loss": 0.4028, + "step": 36841 + }, + { + "epoch": 0.6366118330107824, + "grad_norm": 1.1574868350809515, + "learning_rate": 6.162504097622522e-06, + "loss": 0.3763, + "step": 36842 + }, + { + "epoch": 0.6366291125241913, + "grad_norm": 2.1392454164501253, + "learning_rate": 6.161987302437839e-06, + "loss": 0.452, + "step": 36843 + }, + { + "epoch": 0.6366463920376002, + "grad_norm": 1.3289819978588877, + "learning_rate": 6.161470519274109e-06, + "loss": 0.3911, + "step": 36844 + }, + { + "epoch": 0.6366636715510091, + "grad_norm": 0.7340662776366536, + "learning_rate": 6.1609537481329455e-06, + "loss": 0.9062, + "step": 36845 + }, + { + "epoch": 0.636680951064418, + "grad_norm": 0.6401402273270228, + "learning_rate": 6.160436989015962e-06, + "loss": 0.6043, + "step": 36846 + }, + { + "epoch": 0.6366982305778269, + "grad_norm": 1.3077320246003596, + "learning_rate": 6.159920241924784e-06, + "loss": 0.3664, + "step": 36847 + }, + { + "epoch": 0.6367155100912358, + "grad_norm": 1.479033254514449, + "learning_rate": 6.159403506861026e-06, + "loss": 0.3314, + "step": 36848 + }, + { + "epoch": 0.6367327896046447, + "grad_norm": 1.4997313478747434, + "learning_rate": 6.1588867838263124e-06, + "loss": 0.2465, + "step": 36849 + }, + { + "epoch": 0.6367500691180537, + "grad_norm": 1.2962827384491926, + "learning_rate": 6.1583700728222505e-06, + "loss": 0.4514, + "step": 36850 + }, + { + "epoch": 0.6367673486314626, + "grad_norm": 1.6116682600734415, + "learning_rate": 6.157853373850473e-06, + "loss": 0.304, + "step": 36851 + }, + { + "epoch": 0.6367846281448715, + "grad_norm": 1.188450119992409, + "learning_rate": 6.157336686912585e-06, + "loss": 0.2478, + "step": 36852 + }, + { + "epoch": 0.6368019076582804, + "grad_norm": 1.6350376151834647, + "learning_rate": 6.156820012010214e-06, + "loss": 0.5001, + "step": 36853 + }, + { + "epoch": 0.6368191871716893, + "grad_norm": 1.64325867193318, + "learning_rate": 6.1563033491449765e-06, + "loss": 0.3129, + "step": 36854 + }, + { + "epoch": 0.6368364666850982, + "grad_norm": 1.137805408936657, + "learning_rate": 6.155786698318485e-06, + "loss": 0.3906, + "step": 36855 + }, + { + "epoch": 0.6368537461985071, + "grad_norm": 1.2490766390545782, + "learning_rate": 6.155270059532362e-06, + "loss": 0.1831, + "step": 36856 + }, + { + "epoch": 0.636871025711916, + "grad_norm": 1.4399140342550258, + "learning_rate": 6.1547534327882255e-06, + "loss": 0.5049, + "step": 36857 + }, + { + "epoch": 0.6368883052253248, + "grad_norm": 0.934743635865075, + "learning_rate": 6.154236818087693e-06, + "loss": 0.8157, + "step": 36858 + }, + { + "epoch": 0.6369055847387337, + "grad_norm": 0.86251881827354, + "learning_rate": 6.1537202154323834e-06, + "loss": 0.5353, + "step": 36859 + }, + { + "epoch": 0.6369228642521426, + "grad_norm": 1.2226597871010252, + "learning_rate": 6.153203624823918e-06, + "loss": 0.2798, + "step": 36860 + }, + { + "epoch": 0.6369401437655515, + "grad_norm": 1.0012409592006792, + "learning_rate": 6.152687046263907e-06, + "loss": 0.4548, + "step": 36861 + }, + { + "epoch": 0.6369574232789604, + "grad_norm": 1.5341641890391615, + "learning_rate": 6.152170479753974e-06, + "loss": 0.392, + "step": 36862 + }, + { + "epoch": 0.6369747027923693, + "grad_norm": 1.3477172939448485, + "learning_rate": 6.151653925295734e-06, + "loss": 0.2777, + "step": 36863 + }, + { + "epoch": 0.6369919823057782, + "grad_norm": 0.8255925361249532, + "learning_rate": 6.151137382890808e-06, + "loss": 0.3603, + "step": 36864 + }, + { + "epoch": 0.6370092618191872, + "grad_norm": 0.5807320950381811, + "learning_rate": 6.150620852540814e-06, + "loss": 0.6472, + "step": 36865 + }, + { + "epoch": 0.6370265413325961, + "grad_norm": 1.4034804141247352, + "learning_rate": 6.1501043342473654e-06, + "loss": 0.333, + "step": 36866 + }, + { + "epoch": 0.637043820846005, + "grad_norm": 1.4242931945632227, + "learning_rate": 6.149587828012084e-06, + "loss": 0.4087, + "step": 36867 + }, + { + "epoch": 0.6370611003594139, + "grad_norm": 1.0584863713107637, + "learning_rate": 6.1490713338365825e-06, + "loss": 0.4228, + "step": 36868 + }, + { + "epoch": 0.6370783798728228, + "grad_norm": 1.359407448428958, + "learning_rate": 6.148554851722488e-06, + "loss": 0.3974, + "step": 36869 + }, + { + "epoch": 0.6370956593862317, + "grad_norm": 0.9396714403112426, + "learning_rate": 6.148038381671406e-06, + "loss": 0.3242, + "step": 36870 + }, + { + "epoch": 0.6371129388996406, + "grad_norm": 0.9379941729551966, + "learning_rate": 6.147521923684967e-06, + "loss": 0.3619, + "step": 36871 + }, + { + "epoch": 0.6371302184130495, + "grad_norm": 1.076023693830884, + "learning_rate": 6.1470054777647785e-06, + "loss": 0.2343, + "step": 36872 + }, + { + "epoch": 0.6371474979264584, + "grad_norm": 1.3049695075505274, + "learning_rate": 6.1464890439124625e-06, + "loss": 0.4309, + "step": 36873 + }, + { + "epoch": 0.6371647774398673, + "grad_norm": 1.656954369513859, + "learning_rate": 6.145972622129633e-06, + "loss": 0.5278, + "step": 36874 + }, + { + "epoch": 0.6371820569532762, + "grad_norm": 1.236524887352972, + "learning_rate": 6.145456212417915e-06, + "loss": 0.407, + "step": 36875 + }, + { + "epoch": 0.6371993364666851, + "grad_norm": 0.677936979097347, + "learning_rate": 6.14493981477892e-06, + "loss": 0.388, + "step": 36876 + }, + { + "epoch": 0.637216615980094, + "grad_norm": 1.1293960949630117, + "learning_rate": 6.144423429214263e-06, + "loss": 0.423, + "step": 36877 + }, + { + "epoch": 0.637233895493503, + "grad_norm": 0.5357996169423892, + "learning_rate": 6.143907055725567e-06, + "loss": 0.4174, + "step": 36878 + }, + { + "epoch": 0.6372511750069118, + "grad_norm": 0.763807945512201, + "learning_rate": 6.143390694314446e-06, + "loss": 0.3295, + "step": 36879 + }, + { + "epoch": 0.6372684545203207, + "grad_norm": 1.1262000589724928, + "learning_rate": 6.142874344982522e-06, + "loss": 0.3857, + "step": 36880 + }, + { + "epoch": 0.6372857340337296, + "grad_norm": 0.7134157158158905, + "learning_rate": 6.142358007731406e-06, + "loss": 0.2487, + "step": 36881 + }, + { + "epoch": 0.6373030135471385, + "grad_norm": 1.3973688438287142, + "learning_rate": 6.141841682562718e-06, + "loss": 0.3188, + "step": 36882 + }, + { + "epoch": 0.6373202930605474, + "grad_norm": 1.2947191382074237, + "learning_rate": 6.141325369478074e-06, + "loss": 0.3403, + "step": 36883 + }, + { + "epoch": 0.6373375725739563, + "grad_norm": 1.5400264080916428, + "learning_rate": 6.140809068479094e-06, + "loss": 0.3774, + "step": 36884 + }, + { + "epoch": 0.6373548520873652, + "grad_norm": 1.324064511067455, + "learning_rate": 6.140292779567395e-06, + "loss": 0.3524, + "step": 36885 + }, + { + "epoch": 0.6373721316007741, + "grad_norm": 1.2064344862395227, + "learning_rate": 6.1397765027445874e-06, + "loss": 0.2948, + "step": 36886 + }, + { + "epoch": 0.637389411114183, + "grad_norm": 1.0733864418565546, + "learning_rate": 6.139260238012297e-06, + "loss": 0.4281, + "step": 36887 + }, + { + "epoch": 0.6374066906275919, + "grad_norm": 1.4067626251875514, + "learning_rate": 6.138743985372135e-06, + "loss": 0.3646, + "step": 36888 + }, + { + "epoch": 0.6374239701410008, + "grad_norm": 0.9936585350945271, + "learning_rate": 6.138227744825723e-06, + "loss": 0.3983, + "step": 36889 + }, + { + "epoch": 0.6374412496544097, + "grad_norm": 1.8781435701990883, + "learning_rate": 6.137711516374669e-06, + "loss": 0.4381, + "step": 36890 + }, + { + "epoch": 0.6374585291678186, + "grad_norm": 1.0395365067919573, + "learning_rate": 6.137195300020604e-06, + "loss": 0.8905, + "step": 36891 + }, + { + "epoch": 0.6374758086812276, + "grad_norm": 1.1487747355041535, + "learning_rate": 6.136679095765131e-06, + "loss": 0.2623, + "step": 36892 + }, + { + "epoch": 0.6374930881946365, + "grad_norm": 1.1669606088423787, + "learning_rate": 6.136162903609874e-06, + "loss": 0.3813, + "step": 36893 + }, + { + "epoch": 0.6375103677080454, + "grad_norm": 0.7121426133903366, + "learning_rate": 6.13564672355645e-06, + "loss": 0.2382, + "step": 36894 + }, + { + "epoch": 0.6375276472214543, + "grad_norm": 1.2155607652978977, + "learning_rate": 6.135130555606475e-06, + "loss": 0.4509, + "step": 36895 + }, + { + "epoch": 0.6375449267348632, + "grad_norm": 1.2505963058872547, + "learning_rate": 6.134614399761564e-06, + "loss": 0.406, + "step": 36896 + }, + { + "epoch": 0.6375622062482721, + "grad_norm": 1.16139064992616, + "learning_rate": 6.134098256023332e-06, + "loss": 0.4699, + "step": 36897 + }, + { + "epoch": 0.637579485761681, + "grad_norm": 0.5974366918861264, + "learning_rate": 6.1335821243934e-06, + "loss": 0.6337, + "step": 36898 + }, + { + "epoch": 0.6375967652750899, + "grad_norm": 1.3519709346463313, + "learning_rate": 6.1330660048733805e-06, + "loss": 0.5456, + "step": 36899 + }, + { + "epoch": 0.6376140447884987, + "grad_norm": 0.8335598617465931, + "learning_rate": 6.132549897464897e-06, + "loss": 0.5812, + "step": 36900 + }, + { + "epoch": 0.6376313243019076, + "grad_norm": 0.9528745451640598, + "learning_rate": 6.132033802169557e-06, + "loss": 0.2453, + "step": 36901 + }, + { + "epoch": 0.6376486038153165, + "grad_norm": 2.0083263295982388, + "learning_rate": 6.131517718988982e-06, + "loss": 0.4247, + "step": 36902 + }, + { + "epoch": 0.6376658833287254, + "grad_norm": 1.35782473357273, + "learning_rate": 6.1310016479247855e-06, + "loss": 0.6407, + "step": 36903 + }, + { + "epoch": 0.6376831628421343, + "grad_norm": 0.9797957765645453, + "learning_rate": 6.13048558897859e-06, + "loss": 0.4807, + "step": 36904 + }, + { + "epoch": 0.6377004423555432, + "grad_norm": 1.2593151423093984, + "learning_rate": 6.129969542152003e-06, + "loss": 0.3888, + "step": 36905 + }, + { + "epoch": 0.6377177218689521, + "grad_norm": 1.0788224677684768, + "learning_rate": 6.129453507446652e-06, + "loss": 0.4131, + "step": 36906 + }, + { + "epoch": 0.637735001382361, + "grad_norm": 1.5119133279554435, + "learning_rate": 6.128937484864144e-06, + "loss": 0.3062, + "step": 36907 + }, + { + "epoch": 0.63775228089577, + "grad_norm": 1.13569410846403, + "learning_rate": 6.128421474406094e-06, + "loss": 0.4816, + "step": 36908 + }, + { + "epoch": 0.6377695604091789, + "grad_norm": 1.4142712877752392, + "learning_rate": 6.127905476074128e-06, + "loss": 0.4631, + "step": 36909 + }, + { + "epoch": 0.6377868399225878, + "grad_norm": 1.385615446203418, + "learning_rate": 6.127389489869851e-06, + "loss": 0.485, + "step": 36910 + }, + { + "epoch": 0.6378041194359967, + "grad_norm": 0.8470934285154461, + "learning_rate": 6.126873515794889e-06, + "loss": 0.3943, + "step": 36911 + }, + { + "epoch": 0.6378213989494056, + "grad_norm": 0.8596029623841898, + "learning_rate": 6.1263575538508505e-06, + "loss": 0.3143, + "step": 36912 + }, + { + "epoch": 0.6378386784628145, + "grad_norm": 1.224724875432405, + "learning_rate": 6.125841604039355e-06, + "loss": 0.3573, + "step": 36913 + }, + { + "epoch": 0.6378559579762234, + "grad_norm": 1.4399022847553575, + "learning_rate": 6.125325666362018e-06, + "loss": 0.3373, + "step": 36914 + }, + { + "epoch": 0.6378732374896323, + "grad_norm": 1.0086461691208288, + "learning_rate": 6.1248097408204585e-06, + "loss": 0.2902, + "step": 36915 + }, + { + "epoch": 0.6378905170030412, + "grad_norm": 0.9621115592593906, + "learning_rate": 6.124293827416288e-06, + "loss": 0.3636, + "step": 36916 + }, + { + "epoch": 0.6379077965164501, + "grad_norm": 1.1260383398279339, + "learning_rate": 6.1237779261511195e-06, + "loss": 0.3539, + "step": 36917 + }, + { + "epoch": 0.637925076029859, + "grad_norm": 1.3517594482134951, + "learning_rate": 6.123262037026578e-06, + "loss": 0.5124, + "step": 36918 + }, + { + "epoch": 0.637942355543268, + "grad_norm": 0.8030761274352789, + "learning_rate": 6.122746160044271e-06, + "loss": 0.2459, + "step": 36919 + }, + { + "epoch": 0.6379596350566769, + "grad_norm": 0.8830958228374508, + "learning_rate": 6.122230295205822e-06, + "loss": 0.3172, + "step": 36920 + }, + { + "epoch": 0.6379769145700857, + "grad_norm": 1.1615531996112156, + "learning_rate": 6.1217144425128385e-06, + "loss": 0.473, + "step": 36921 + }, + { + "epoch": 0.6379941940834946, + "grad_norm": 1.057496922686796, + "learning_rate": 6.121198601966941e-06, + "loss": 0.3994, + "step": 36922 + }, + { + "epoch": 0.6380114735969035, + "grad_norm": 1.1156127696349705, + "learning_rate": 6.120682773569744e-06, + "loss": 0.4592, + "step": 36923 + }, + { + "epoch": 0.6380287531103124, + "grad_norm": 1.909930321940422, + "learning_rate": 6.120166957322864e-06, + "loss": 0.335, + "step": 36924 + }, + { + "epoch": 0.6380460326237213, + "grad_norm": 0.7370095798416686, + "learning_rate": 6.119651153227915e-06, + "loss": 0.2784, + "step": 36925 + }, + { + "epoch": 0.6380633121371302, + "grad_norm": 2.377820766108136, + "learning_rate": 6.119135361286517e-06, + "loss": 0.4075, + "step": 36926 + }, + { + "epoch": 0.6380805916505391, + "grad_norm": 1.6723837328631637, + "learning_rate": 6.1186195815002805e-06, + "loss": 0.381, + "step": 36927 + }, + { + "epoch": 0.638097871163948, + "grad_norm": 1.127378885143329, + "learning_rate": 6.118103813870819e-06, + "loss": 0.4415, + "step": 36928 + }, + { + "epoch": 0.6381151506773569, + "grad_norm": 1.017224227037565, + "learning_rate": 6.117588058399756e-06, + "loss": 0.4634, + "step": 36929 + }, + { + "epoch": 0.6381324301907658, + "grad_norm": 1.1653593781411857, + "learning_rate": 6.117072315088697e-06, + "loss": 0.4851, + "step": 36930 + }, + { + "epoch": 0.6381497097041747, + "grad_norm": 1.2560920920524241, + "learning_rate": 6.116556583939268e-06, + "loss": 0.4572, + "step": 36931 + }, + { + "epoch": 0.6381669892175836, + "grad_norm": 1.153236219469568, + "learning_rate": 6.116040864953074e-06, + "loss": 0.4299, + "step": 36932 + }, + { + "epoch": 0.6381842687309925, + "grad_norm": 0.9499414447605152, + "learning_rate": 6.115525158131738e-06, + "loss": 0.495, + "step": 36933 + }, + { + "epoch": 0.6382015482444015, + "grad_norm": 1.2583426449550668, + "learning_rate": 6.115009463476871e-06, + "loss": 0.4361, + "step": 36934 + }, + { + "epoch": 0.6382188277578104, + "grad_norm": 1.128974465579573, + "learning_rate": 6.114493780990094e-06, + "loss": 0.4181, + "step": 36935 + }, + { + "epoch": 0.6382361072712193, + "grad_norm": 1.1893197476182513, + "learning_rate": 6.113978110673016e-06, + "loss": 0.3671, + "step": 36936 + }, + { + "epoch": 0.6382533867846282, + "grad_norm": 0.7331729619473725, + "learning_rate": 6.11346245252725e-06, + "loss": 0.3747, + "step": 36937 + }, + { + "epoch": 0.6382706662980371, + "grad_norm": 1.3245501317335124, + "learning_rate": 6.112946806554417e-06, + "loss": 0.4455, + "step": 36938 + }, + { + "epoch": 0.638287945811446, + "grad_norm": 1.188000773331713, + "learning_rate": 6.1124311727561295e-06, + "loss": 0.3496, + "step": 36939 + }, + { + "epoch": 0.6383052253248549, + "grad_norm": 0.9717984363099286, + "learning_rate": 6.111915551134007e-06, + "loss": 0.3982, + "step": 36940 + }, + { + "epoch": 0.6383225048382638, + "grad_norm": 1.0342469995022199, + "learning_rate": 6.111399941689655e-06, + "loss": 0.4884, + "step": 36941 + }, + { + "epoch": 0.6383397843516727, + "grad_norm": 1.3157143260116906, + "learning_rate": 6.1108843444246966e-06, + "loss": 0.4156, + "step": 36942 + }, + { + "epoch": 0.6383570638650815, + "grad_norm": 1.1752717368777403, + "learning_rate": 6.1103687593407426e-06, + "loss": 0.4208, + "step": 36943 + }, + { + "epoch": 0.6383743433784904, + "grad_norm": 1.2113891832305597, + "learning_rate": 6.109853186439411e-06, + "loss": 0.225, + "step": 36944 + }, + { + "epoch": 0.6383916228918993, + "grad_norm": 1.8311256591370741, + "learning_rate": 6.109337625722312e-06, + "loss": 0.2642, + "step": 36945 + }, + { + "epoch": 0.6384089024053082, + "grad_norm": 1.090284924287361, + "learning_rate": 6.108822077191068e-06, + "loss": 0.3959, + "step": 36946 + }, + { + "epoch": 0.6384261819187171, + "grad_norm": 0.6415873338334818, + "learning_rate": 6.108306540847286e-06, + "loss": 0.7209, + "step": 36947 + }, + { + "epoch": 0.638443461432126, + "grad_norm": 1.3971729603237597, + "learning_rate": 6.107791016692584e-06, + "loss": 0.4398, + "step": 36948 + }, + { + "epoch": 0.638460740945535, + "grad_norm": 0.9398666643691396, + "learning_rate": 6.107275504728578e-06, + "loss": 0.3121, + "step": 36949 + }, + { + "epoch": 0.6384780204589439, + "grad_norm": 1.075196528218393, + "learning_rate": 6.106760004956876e-06, + "loss": 0.4794, + "step": 36950 + }, + { + "epoch": 0.6384952999723528, + "grad_norm": 0.9896081279033984, + "learning_rate": 6.106244517379103e-06, + "loss": 0.2202, + "step": 36951 + }, + { + "epoch": 0.6385125794857617, + "grad_norm": 1.233042643317516, + "learning_rate": 6.105729041996865e-06, + "loss": 0.2358, + "step": 36952 + }, + { + "epoch": 0.6385298589991706, + "grad_norm": 2.0227699257113283, + "learning_rate": 6.1052135788117815e-06, + "loss": 0.3014, + "step": 36953 + }, + { + "epoch": 0.6385471385125795, + "grad_norm": 1.1260309971537947, + "learning_rate": 6.104698127825462e-06, + "loss": 0.4814, + "step": 36954 + }, + { + "epoch": 0.6385644180259884, + "grad_norm": 0.8759069507518599, + "learning_rate": 6.104182689039529e-06, + "loss": 0.1904, + "step": 36955 + }, + { + "epoch": 0.6385816975393973, + "grad_norm": 0.839994198896083, + "learning_rate": 6.103667262455586e-06, + "loss": 0.3597, + "step": 36956 + }, + { + "epoch": 0.6385989770528062, + "grad_norm": 0.77591398833666, + "learning_rate": 6.103151848075258e-06, + "loss": 0.4952, + "step": 36957 + }, + { + "epoch": 0.6386162565662151, + "grad_norm": 0.8679296221230531, + "learning_rate": 6.102636445900154e-06, + "loss": 0.1881, + "step": 36958 + }, + { + "epoch": 0.638633536079624, + "grad_norm": 1.0335822984439558, + "learning_rate": 6.1021210559318856e-06, + "loss": 0.2175, + "step": 36959 + }, + { + "epoch": 0.6386508155930329, + "grad_norm": 0.9433212701407744, + "learning_rate": 6.1016056781720745e-06, + "loss": 0.6193, + "step": 36960 + }, + { + "epoch": 0.6386680951064418, + "grad_norm": 0.9982730440350654, + "learning_rate": 6.1010903126223265e-06, + "loss": 0.4223, + "step": 36961 + }, + { + "epoch": 0.6386853746198508, + "grad_norm": 0.9981729627125623, + "learning_rate": 6.100574959284263e-06, + "loss": 0.3937, + "step": 36962 + }, + { + "epoch": 0.6387026541332597, + "grad_norm": 1.0460430114417585, + "learning_rate": 6.100059618159493e-06, + "loss": 0.4117, + "step": 36963 + }, + { + "epoch": 0.6387199336466685, + "grad_norm": 1.359866521112634, + "learning_rate": 6.099544289249634e-06, + "loss": 0.398, + "step": 36964 + }, + { + "epoch": 0.6387372131600774, + "grad_norm": 1.952481633647313, + "learning_rate": 6.0990289725562965e-06, + "loss": 0.3994, + "step": 36965 + }, + { + "epoch": 0.6387544926734863, + "grad_norm": 0.887047862406071, + "learning_rate": 6.098513668081101e-06, + "loss": 0.3139, + "step": 36966 + }, + { + "epoch": 0.6387717721868952, + "grad_norm": 0.6459434845635945, + "learning_rate": 6.097998375825656e-06, + "loss": 0.7415, + "step": 36967 + }, + { + "epoch": 0.6387890517003041, + "grad_norm": 3.7867191887241964, + "learning_rate": 6.097483095791574e-06, + "loss": 0.4159, + "step": 36968 + }, + { + "epoch": 0.638806331213713, + "grad_norm": 1.0541232287855034, + "learning_rate": 6.096967827980476e-06, + "loss": 0.3511, + "step": 36969 + }, + { + "epoch": 0.6388236107271219, + "grad_norm": 1.317963535531707, + "learning_rate": 6.096452572393964e-06, + "loss": 0.4433, + "step": 36970 + }, + { + "epoch": 0.6388408902405308, + "grad_norm": 0.9713769198976485, + "learning_rate": 6.095937329033665e-06, + "loss": 0.3193, + "step": 36971 + }, + { + "epoch": 0.6388581697539397, + "grad_norm": 0.9095922781381568, + "learning_rate": 6.095422097901185e-06, + "loss": 0.2932, + "step": 36972 + }, + { + "epoch": 0.6388754492673486, + "grad_norm": 1.1596332533025437, + "learning_rate": 6.0949068789981416e-06, + "loss": 0.4141, + "step": 36973 + }, + { + "epoch": 0.6388927287807575, + "grad_norm": 1.9744884813095342, + "learning_rate": 6.094391672326143e-06, + "loss": 0.3441, + "step": 36974 + }, + { + "epoch": 0.6389100082941664, + "grad_norm": 1.231074319787862, + "learning_rate": 6.093876477886811e-06, + "loss": 0.5706, + "step": 36975 + }, + { + "epoch": 0.6389272878075754, + "grad_norm": 0.7249485805532417, + "learning_rate": 6.09336129568175e-06, + "loss": 0.4843, + "step": 36976 + }, + { + "epoch": 0.6389445673209843, + "grad_norm": 1.7441600215631203, + "learning_rate": 6.092846125712582e-06, + "loss": 0.3963, + "step": 36977 + }, + { + "epoch": 0.6389618468343932, + "grad_norm": 1.7387551006501492, + "learning_rate": 6.092330967980917e-06, + "loss": 0.4449, + "step": 36978 + }, + { + "epoch": 0.6389791263478021, + "grad_norm": 1.7635290444618597, + "learning_rate": 6.091815822488365e-06, + "loss": 0.4824, + "step": 36979 + }, + { + "epoch": 0.638996405861211, + "grad_norm": 0.9291475386944901, + "learning_rate": 6.091300689236548e-06, + "loss": 0.2333, + "step": 36980 + }, + { + "epoch": 0.6390136853746199, + "grad_norm": 1.0159026033923737, + "learning_rate": 6.090785568227069e-06, + "loss": 0.3066, + "step": 36981 + }, + { + "epoch": 0.6390309648880288, + "grad_norm": 1.478251158182427, + "learning_rate": 6.0902704594615496e-06, + "loss": 0.3652, + "step": 36982 + }, + { + "epoch": 0.6390482444014377, + "grad_norm": 1.478928967029807, + "learning_rate": 6.0897553629415984e-06, + "loss": 0.514, + "step": 36983 + }, + { + "epoch": 0.6390655239148466, + "grad_norm": 1.3964308853326435, + "learning_rate": 6.089240278668832e-06, + "loss": 0.3739, + "step": 36984 + }, + { + "epoch": 0.6390828034282554, + "grad_norm": 2.0815673903693552, + "learning_rate": 6.088725206644861e-06, + "loss": 0.2697, + "step": 36985 + }, + { + "epoch": 0.6391000829416643, + "grad_norm": 0.8567122398310719, + "learning_rate": 6.088210146871303e-06, + "loss": 0.3042, + "step": 36986 + }, + { + "epoch": 0.6391173624550732, + "grad_norm": 0.9330807192890869, + "learning_rate": 6.087695099349764e-06, + "loss": 0.3408, + "step": 36987 + }, + { + "epoch": 0.6391346419684821, + "grad_norm": 0.954459112954914, + "learning_rate": 6.0871800640818645e-06, + "loss": 0.2984, + "step": 36988 + }, + { + "epoch": 0.639151921481891, + "grad_norm": 1.1004547113003424, + "learning_rate": 6.0866650410692155e-06, + "loss": 0.3787, + "step": 36989 + }, + { + "epoch": 0.6391692009953, + "grad_norm": 1.61461992504568, + "learning_rate": 6.086150030313422e-06, + "loss": 0.4529, + "step": 36990 + }, + { + "epoch": 0.6391864805087089, + "grad_norm": 1.7303631623500946, + "learning_rate": 6.085635031816113e-06, + "loss": 0.3951, + "step": 36991 + }, + { + "epoch": 0.6392037600221178, + "grad_norm": 1.2015201398217972, + "learning_rate": 6.085120045578886e-06, + "loss": 0.8883, + "step": 36992 + }, + { + "epoch": 0.6392210395355267, + "grad_norm": 1.0381589496406631, + "learning_rate": 6.084605071603363e-06, + "loss": 0.3631, + "step": 36993 + }, + { + "epoch": 0.6392383190489356, + "grad_norm": 1.0588985699374904, + "learning_rate": 6.084090109891153e-06, + "loss": 0.3364, + "step": 36994 + }, + { + "epoch": 0.6392555985623445, + "grad_norm": 1.6453834958834044, + "learning_rate": 6.083575160443874e-06, + "loss": 0.3157, + "step": 36995 + }, + { + "epoch": 0.6392728780757534, + "grad_norm": 1.0186038425195554, + "learning_rate": 6.08306022326313e-06, + "loss": 0.4126, + "step": 36996 + }, + { + "epoch": 0.6392901575891623, + "grad_norm": 1.2007809894857207, + "learning_rate": 6.082545298350544e-06, + "loss": 0.5719, + "step": 36997 + }, + { + "epoch": 0.6393074371025712, + "grad_norm": 0.8878401530531919, + "learning_rate": 6.082030385707722e-06, + "loss": 0.3331, + "step": 36998 + }, + { + "epoch": 0.6393247166159801, + "grad_norm": 1.1697429108031792, + "learning_rate": 6.081515485336278e-06, + "loss": 0.3967, + "step": 36999 + }, + { + "epoch": 0.639341996129389, + "grad_norm": 1.2123882222975328, + "learning_rate": 6.081000597237828e-06, + "loss": 0.4713, + "step": 37000 + }, + { + "epoch": 0.6393592756427979, + "grad_norm": 0.8611623737950329, + "learning_rate": 6.080485721413978e-06, + "loss": 0.3594, + "step": 37001 + }, + { + "epoch": 0.6393765551562068, + "grad_norm": 1.2109693157852663, + "learning_rate": 6.079970857866346e-06, + "loss": 0.4315, + "step": 37002 + }, + { + "epoch": 0.6393938346696157, + "grad_norm": 1.0934531134514491, + "learning_rate": 6.079456006596542e-06, + "loss": 0.2549, + "step": 37003 + }, + { + "epoch": 0.6394111141830247, + "grad_norm": 1.5440590187998773, + "learning_rate": 6.078941167606183e-06, + "loss": 0.2511, + "step": 37004 + }, + { + "epoch": 0.6394283936964336, + "grad_norm": 1.4344217127431722, + "learning_rate": 6.0784263408968744e-06, + "loss": 0.3204, + "step": 37005 + }, + { + "epoch": 0.6394456732098424, + "grad_norm": 1.001201700545124, + "learning_rate": 6.0779115264702375e-06, + "loss": 0.3608, + "step": 37006 + }, + { + "epoch": 0.6394629527232513, + "grad_norm": 1.518225308064191, + "learning_rate": 6.077396724327875e-06, + "loss": 0.5869, + "step": 37007 + }, + { + "epoch": 0.6394802322366602, + "grad_norm": 0.8267903332932667, + "learning_rate": 6.076881934471406e-06, + "loss": 0.7385, + "step": 37008 + }, + { + "epoch": 0.6394975117500691, + "grad_norm": 1.025612732957342, + "learning_rate": 6.076367156902443e-06, + "loss": 0.3491, + "step": 37009 + }, + { + "epoch": 0.639514791263478, + "grad_norm": 1.023405701044953, + "learning_rate": 6.07585239162259e-06, + "loss": 0.4815, + "step": 37010 + }, + { + "epoch": 0.6395320707768869, + "grad_norm": 1.0543319048463653, + "learning_rate": 6.075337638633473e-06, + "loss": 0.3845, + "step": 37011 + }, + { + "epoch": 0.6395493502902958, + "grad_norm": 1.3601773047055914, + "learning_rate": 6.074822897936692e-06, + "loss": 0.4065, + "step": 37012 + }, + { + "epoch": 0.6395666298037047, + "grad_norm": 0.8466546402239064, + "learning_rate": 6.0743081695338644e-06, + "loss": 0.3293, + "step": 37013 + }, + { + "epoch": 0.6395839093171136, + "grad_norm": 1.5754258924299485, + "learning_rate": 6.073793453426602e-06, + "loss": 0.4015, + "step": 37014 + }, + { + "epoch": 0.6396011888305225, + "grad_norm": 1.6086802906248947, + "learning_rate": 6.07327874961652e-06, + "loss": 0.351, + "step": 37015 + }, + { + "epoch": 0.6396184683439314, + "grad_norm": 0.6716539081049971, + "learning_rate": 6.072764058105223e-06, + "loss": 0.3509, + "step": 37016 + }, + { + "epoch": 0.6396357478573403, + "grad_norm": 2.033897989266194, + "learning_rate": 6.0722493788943315e-06, + "loss": 0.3893, + "step": 37017 + }, + { + "epoch": 0.6396530273707492, + "grad_norm": 1.6672537160360046, + "learning_rate": 6.07173471198545e-06, + "loss": 0.5973, + "step": 37018 + }, + { + "epoch": 0.6396703068841582, + "grad_norm": 1.1903415460277837, + "learning_rate": 6.071220057380196e-06, + "loss": 0.4196, + "step": 37019 + }, + { + "epoch": 0.6396875863975671, + "grad_norm": 0.9416712012085292, + "learning_rate": 6.07070541508018e-06, + "loss": 0.5238, + "step": 37020 + }, + { + "epoch": 0.639704865910976, + "grad_norm": 1.3171283085094072, + "learning_rate": 6.070190785087011e-06, + "loss": 0.3124, + "step": 37021 + }, + { + "epoch": 0.6397221454243849, + "grad_norm": 1.4253393537787136, + "learning_rate": 6.069676167402305e-06, + "loss": 0.3093, + "step": 37022 + }, + { + "epoch": 0.6397394249377938, + "grad_norm": 0.7582755534085945, + "learning_rate": 6.06916156202767e-06, + "loss": 0.2901, + "step": 37023 + }, + { + "epoch": 0.6397567044512027, + "grad_norm": 1.487167457968351, + "learning_rate": 6.068646968964722e-06, + "loss": 0.4469, + "step": 37024 + }, + { + "epoch": 0.6397739839646116, + "grad_norm": 1.263495255342699, + "learning_rate": 6.0681323882150665e-06, + "loss": 0.2835, + "step": 37025 + }, + { + "epoch": 0.6397912634780205, + "grad_norm": 0.8339810433905743, + "learning_rate": 6.0676178197803245e-06, + "loss": 0.5394, + "step": 37026 + }, + { + "epoch": 0.6398085429914293, + "grad_norm": 0.9484168880756015, + "learning_rate": 6.067103263662099e-06, + "loss": 0.5043, + "step": 37027 + }, + { + "epoch": 0.6398258225048382, + "grad_norm": 1.2821829925942407, + "learning_rate": 6.066588719862006e-06, + "loss": 0.3239, + "step": 37028 + }, + { + "epoch": 0.6398431020182471, + "grad_norm": 1.3276892064975192, + "learning_rate": 6.066074188381659e-06, + "loss": 0.4613, + "step": 37029 + }, + { + "epoch": 0.639860381531656, + "grad_norm": 1.2519137103418012, + "learning_rate": 6.06555966922266e-06, + "loss": 0.4886, + "step": 37030 + }, + { + "epoch": 0.6398776610450649, + "grad_norm": 0.7880905691885768, + "learning_rate": 6.065045162386632e-06, + "loss": 0.3887, + "step": 37031 + }, + { + "epoch": 0.6398949405584738, + "grad_norm": 0.7174520080496124, + "learning_rate": 6.0645306678751795e-06, + "loss": 0.3243, + "step": 37032 + }, + { + "epoch": 0.6399122200718828, + "grad_norm": 1.0487158905732024, + "learning_rate": 6.064016185689916e-06, + "loss": 0.3393, + "step": 37033 + }, + { + "epoch": 0.6399294995852917, + "grad_norm": 1.149936809304734, + "learning_rate": 6.063501715832453e-06, + "loss": 0.4485, + "step": 37034 + }, + { + "epoch": 0.6399467790987006, + "grad_norm": 0.8202303485948443, + "learning_rate": 6.0629872583044045e-06, + "loss": 0.3134, + "step": 37035 + }, + { + "epoch": 0.6399640586121095, + "grad_norm": 1.035680894209061, + "learning_rate": 6.062472813107374e-06, + "loss": 0.2385, + "step": 37036 + }, + { + "epoch": 0.6399813381255184, + "grad_norm": 1.0414629105684983, + "learning_rate": 6.061958380242982e-06, + "loss": 0.2321, + "step": 37037 + }, + { + "epoch": 0.6399986176389273, + "grad_norm": 1.226635264486099, + "learning_rate": 6.061443959712834e-06, + "loss": 0.4791, + "step": 37038 + }, + { + "epoch": 0.6400158971523362, + "grad_norm": 0.7869719339765976, + "learning_rate": 6.060929551518544e-06, + "loss": 0.3313, + "step": 37039 + }, + { + "epoch": 0.6400331766657451, + "grad_norm": 2.270537441778365, + "learning_rate": 6.060415155661724e-06, + "loss": 0.4035, + "step": 37040 + }, + { + "epoch": 0.640050456179154, + "grad_norm": 1.6476132832226082, + "learning_rate": 6.059900772143979e-06, + "loss": 0.4434, + "step": 37041 + }, + { + "epoch": 0.6400677356925629, + "grad_norm": 1.0493706063334998, + "learning_rate": 6.059386400966927e-06, + "loss": 0.3955, + "step": 37042 + }, + { + "epoch": 0.6400850152059718, + "grad_norm": 1.191700221138676, + "learning_rate": 6.058872042132173e-06, + "loss": 0.4424, + "step": 37043 + }, + { + "epoch": 0.6401022947193807, + "grad_norm": 1.626709372453227, + "learning_rate": 6.058357695641333e-06, + "loss": 0.2828, + "step": 37044 + }, + { + "epoch": 0.6401195742327896, + "grad_norm": 0.883563430313118, + "learning_rate": 6.0578433614960165e-06, + "loss": 0.3646, + "step": 37045 + }, + { + "epoch": 0.6401368537461986, + "grad_norm": 0.5622463680099089, + "learning_rate": 6.057329039697835e-06, + "loss": 0.5064, + "step": 37046 + }, + { + "epoch": 0.6401541332596075, + "grad_norm": 0.7491129046993001, + "learning_rate": 6.056814730248397e-06, + "loss": 0.4017, + "step": 37047 + }, + { + "epoch": 0.6401714127730163, + "grad_norm": 1.3789194738544333, + "learning_rate": 6.056300433149316e-06, + "loss": 0.5073, + "step": 37048 + }, + { + "epoch": 0.6401886922864252, + "grad_norm": 2.4454921734459685, + "learning_rate": 6.055786148402201e-06, + "loss": 0.5173, + "step": 37049 + }, + { + "epoch": 0.6402059717998341, + "grad_norm": 0.9243113344325194, + "learning_rate": 6.055271876008664e-06, + "loss": 0.343, + "step": 37050 + }, + { + "epoch": 0.640223251313243, + "grad_norm": 0.8276788154797438, + "learning_rate": 6.054757615970318e-06, + "loss": 0.2837, + "step": 37051 + }, + { + "epoch": 0.6402405308266519, + "grad_norm": 1.0445003378090627, + "learning_rate": 6.0542433682887656e-06, + "loss": 0.3414, + "step": 37052 + }, + { + "epoch": 0.6402578103400608, + "grad_norm": 1.0117738810819865, + "learning_rate": 6.053729132965626e-06, + "loss": 0.5659, + "step": 37053 + }, + { + "epoch": 0.6402750898534697, + "grad_norm": 1.2323571543140839, + "learning_rate": 6.053214910002505e-06, + "loss": 0.2814, + "step": 37054 + }, + { + "epoch": 0.6402923693668786, + "grad_norm": 0.8228814368125759, + "learning_rate": 6.05270069940102e-06, + "loss": 0.3046, + "step": 37055 + }, + { + "epoch": 0.6403096488802875, + "grad_norm": 0.784184948440422, + "learning_rate": 6.052186501162768e-06, + "loss": 0.359, + "step": 37056 + }, + { + "epoch": 0.6403269283936964, + "grad_norm": 0.8410919991153262, + "learning_rate": 6.051672315289374e-06, + "loss": 0.4617, + "step": 37057 + }, + { + "epoch": 0.6403442079071053, + "grad_norm": 0.9958906951130134, + "learning_rate": 6.05115814178244e-06, + "loss": 0.3886, + "step": 37058 + }, + { + "epoch": 0.6403614874205142, + "grad_norm": 0.7737804201329186, + "learning_rate": 6.05064398064358e-06, + "loss": 0.5747, + "step": 37059 + }, + { + "epoch": 0.6403787669339231, + "grad_norm": 0.9557400105759275, + "learning_rate": 6.050129831874405e-06, + "loss": 0.3207, + "step": 37060 + }, + { + "epoch": 0.6403960464473321, + "grad_norm": 1.270095376221818, + "learning_rate": 6.049615695476521e-06, + "loss": 0.4813, + "step": 37061 + }, + { + "epoch": 0.640413325960741, + "grad_norm": 2.184845908537096, + "learning_rate": 6.049101571451541e-06, + "loss": 0.3665, + "step": 37062 + }, + { + "epoch": 0.6404306054741499, + "grad_norm": 1.3905052862276739, + "learning_rate": 6.048587459801073e-06, + "loss": 0.5249, + "step": 37063 + }, + { + "epoch": 0.6404478849875588, + "grad_norm": 1.2120576116287989, + "learning_rate": 6.048073360526731e-06, + "loss": 0.3301, + "step": 37064 + }, + { + "epoch": 0.6404651645009677, + "grad_norm": 0.908542040445302, + "learning_rate": 6.047559273630123e-06, + "loss": 0.4271, + "step": 37065 + }, + { + "epoch": 0.6404824440143766, + "grad_norm": 1.3662598688526875, + "learning_rate": 6.047045199112865e-06, + "loss": 0.385, + "step": 37066 + }, + { + "epoch": 0.6404997235277855, + "grad_norm": 0.6589605318571988, + "learning_rate": 6.046531136976555e-06, + "loss": 0.2457, + "step": 37067 + }, + { + "epoch": 0.6405170030411944, + "grad_norm": 0.35826042014363335, + "learning_rate": 6.046017087222813e-06, + "loss": 0.4326, + "step": 37068 + }, + { + "epoch": 0.6405342825546032, + "grad_norm": 1.4610704028771002, + "learning_rate": 6.0455030498532454e-06, + "loss": 0.3776, + "step": 37069 + }, + { + "epoch": 0.6405515620680121, + "grad_norm": 0.7932394662530662, + "learning_rate": 6.044989024869463e-06, + "loss": 0.433, + "step": 37070 + }, + { + "epoch": 0.640568841581421, + "grad_norm": 1.2194510563580816, + "learning_rate": 6.044475012273079e-06, + "loss": 0.563, + "step": 37071 + }, + { + "epoch": 0.6405861210948299, + "grad_norm": 1.1492061817733328, + "learning_rate": 6.043961012065693e-06, + "loss": 0.4128, + "step": 37072 + }, + { + "epoch": 0.6406034006082388, + "grad_norm": 0.973182617520375, + "learning_rate": 6.043447024248928e-06, + "loss": 0.3941, + "step": 37073 + }, + { + "epoch": 0.6406206801216477, + "grad_norm": 0.9620129415039465, + "learning_rate": 6.042933048824382e-06, + "loss": 0.4545, + "step": 37074 + }, + { + "epoch": 0.6406379596350567, + "grad_norm": 2.239242616966758, + "learning_rate": 6.042419085793676e-06, + "loss": 0.3746, + "step": 37075 + }, + { + "epoch": 0.6406552391484656, + "grad_norm": 0.8674004382822512, + "learning_rate": 6.041905135158408e-06, + "loss": 0.2369, + "step": 37076 + }, + { + "epoch": 0.6406725186618745, + "grad_norm": 0.8803317556955877, + "learning_rate": 6.041391196920199e-06, + "loss": 0.4369, + "step": 37077 + }, + { + "epoch": 0.6406897981752834, + "grad_norm": 1.026539612103712, + "learning_rate": 6.0408772710806505e-06, + "loss": 0.282, + "step": 37078 + }, + { + "epoch": 0.6407070776886923, + "grad_norm": 1.1253094428126298, + "learning_rate": 6.040363357641378e-06, + "loss": 0.4853, + "step": 37079 + }, + { + "epoch": 0.6407243572021012, + "grad_norm": 1.1578905374677355, + "learning_rate": 6.039849456603985e-06, + "loss": 0.3543, + "step": 37080 + }, + { + "epoch": 0.6407416367155101, + "grad_norm": 1.3386003129957884, + "learning_rate": 6.03933556797009e-06, + "loss": 0.5153, + "step": 37081 + }, + { + "epoch": 0.640758916228919, + "grad_norm": 1.311728451926639, + "learning_rate": 6.038821691741293e-06, + "loss": 0.4379, + "step": 37082 + }, + { + "epoch": 0.6407761957423279, + "grad_norm": 0.9878534807007944, + "learning_rate": 6.038307827919208e-06, + "loss": 0.2425, + "step": 37083 + }, + { + "epoch": 0.6407934752557368, + "grad_norm": 1.300494128346084, + "learning_rate": 6.037793976505444e-06, + "loss": 0.5111, + "step": 37084 + }, + { + "epoch": 0.6408107547691457, + "grad_norm": 1.1228047939837593, + "learning_rate": 6.037280137501609e-06, + "loss": 0.2504, + "step": 37085 + }, + { + "epoch": 0.6408280342825546, + "grad_norm": 0.7926987945663801, + "learning_rate": 6.0367663109093165e-06, + "loss": 0.7066, + "step": 37086 + }, + { + "epoch": 0.6408453137959635, + "grad_norm": 2.1235933083352583, + "learning_rate": 6.036252496730171e-06, + "loss": 0.5679, + "step": 37087 + }, + { + "epoch": 0.6408625933093725, + "grad_norm": 1.40456894285011, + "learning_rate": 6.035738694965785e-06, + "loss": 0.3784, + "step": 37088 + }, + { + "epoch": 0.6408798728227814, + "grad_norm": 1.6093074161520535, + "learning_rate": 6.035224905617765e-06, + "loss": 0.6264, + "step": 37089 + }, + { + "epoch": 0.6408971523361903, + "grad_norm": 0.9418872161324585, + "learning_rate": 6.0347111286877236e-06, + "loss": 0.2338, + "step": 37090 + }, + { + "epoch": 0.6409144318495991, + "grad_norm": 0.9311687013765936, + "learning_rate": 6.03419736417727e-06, + "loss": 0.3859, + "step": 37091 + }, + { + "epoch": 0.640931711363008, + "grad_norm": 1.2869841663740018, + "learning_rate": 6.033683612088008e-06, + "loss": 0.37, + "step": 37092 + }, + { + "epoch": 0.6409489908764169, + "grad_norm": 0.7807449911059868, + "learning_rate": 6.033169872421554e-06, + "loss": 0.3021, + "step": 37093 + }, + { + "epoch": 0.6409662703898258, + "grad_norm": 0.47747512871769027, + "learning_rate": 6.0326561451795095e-06, + "loss": 0.6159, + "step": 37094 + }, + { + "epoch": 0.6409835499032347, + "grad_norm": 1.164458541290129, + "learning_rate": 6.032142430363491e-06, + "loss": 0.4167, + "step": 37095 + }, + { + "epoch": 0.6410008294166436, + "grad_norm": 1.629991495245807, + "learning_rate": 6.031628727975098e-06, + "loss": 0.4113, + "step": 37096 + }, + { + "epoch": 0.6410181089300525, + "grad_norm": 0.8757861032823818, + "learning_rate": 6.031115038015953e-06, + "loss": 0.7078, + "step": 37097 + }, + { + "epoch": 0.6410353884434614, + "grad_norm": 0.9048999598873378, + "learning_rate": 6.030601360487653e-06, + "loss": 0.3787, + "step": 37098 + }, + { + "epoch": 0.6410526679568703, + "grad_norm": 1.014543545924606, + "learning_rate": 6.030087695391813e-06, + "loss": 0.5188, + "step": 37099 + }, + { + "epoch": 0.6410699474702792, + "grad_norm": 1.1018065646728512, + "learning_rate": 6.029574042730038e-06, + "loss": 0.5133, + "step": 37100 + }, + { + "epoch": 0.6410872269836881, + "grad_norm": 1.4185130640611134, + "learning_rate": 6.029060402503942e-06, + "loss": 0.3382, + "step": 37101 + }, + { + "epoch": 0.641104506497097, + "grad_norm": 0.9417831247501772, + "learning_rate": 6.028546774715129e-06, + "loss": 0.407, + "step": 37102 + }, + { + "epoch": 0.641121786010506, + "grad_norm": 1.5659249741288326, + "learning_rate": 6.028033159365208e-06, + "loss": 0.4145, + "step": 37103 + }, + { + "epoch": 0.6411390655239149, + "grad_norm": 1.0460268512268427, + "learning_rate": 6.02751955645579e-06, + "loss": 0.3466, + "step": 37104 + }, + { + "epoch": 0.6411563450373238, + "grad_norm": 1.4048347093843667, + "learning_rate": 6.027005965988482e-06, + "loss": 0.4717, + "step": 37105 + }, + { + "epoch": 0.6411736245507327, + "grad_norm": 1.4405020050277892, + "learning_rate": 6.026492387964896e-06, + "loss": 0.4728, + "step": 37106 + }, + { + "epoch": 0.6411909040641416, + "grad_norm": 1.2477963416387912, + "learning_rate": 6.025978822386634e-06, + "loss": 0.437, + "step": 37107 + }, + { + "epoch": 0.6412081835775505, + "grad_norm": 1.5014838723714832, + "learning_rate": 6.02546526925531e-06, + "loss": 0.5264, + "step": 37108 + }, + { + "epoch": 0.6412254630909594, + "grad_norm": 1.0247268619532264, + "learning_rate": 6.02495172857253e-06, + "loss": 0.4303, + "step": 37109 + }, + { + "epoch": 0.6412427426043683, + "grad_norm": 1.0707911970238062, + "learning_rate": 6.024438200339906e-06, + "loss": 0.4889, + "step": 37110 + }, + { + "epoch": 0.6412600221177772, + "grad_norm": 0.9331453160769709, + "learning_rate": 6.0239246845590394e-06, + "loss": 0.3479, + "step": 37111 + }, + { + "epoch": 0.641277301631186, + "grad_norm": 1.3432780001734097, + "learning_rate": 6.023411181231549e-06, + "loss": 0.4858, + "step": 37112 + }, + { + "epoch": 0.6412945811445949, + "grad_norm": 1.3877980909684229, + "learning_rate": 6.022897690359033e-06, + "loss": 0.3201, + "step": 37113 + }, + { + "epoch": 0.6413118606580038, + "grad_norm": 1.1482002349463134, + "learning_rate": 6.022384211943102e-06, + "loss": 0.266, + "step": 37114 + }, + { + "epoch": 0.6413291401714127, + "grad_norm": 1.0948466506715944, + "learning_rate": 6.021870745985371e-06, + "loss": 0.4871, + "step": 37115 + }, + { + "epoch": 0.6413464196848216, + "grad_norm": 1.0357210633528633, + "learning_rate": 6.021357292487437e-06, + "loss": 0.3422, + "step": 37116 + }, + { + "epoch": 0.6413636991982306, + "grad_norm": 1.326159387032116, + "learning_rate": 6.02084385145092e-06, + "loss": 0.4239, + "step": 37117 + }, + { + "epoch": 0.6413809787116395, + "grad_norm": 1.771058669301791, + "learning_rate": 6.020330422877419e-06, + "loss": 0.4306, + "step": 37118 + }, + { + "epoch": 0.6413982582250484, + "grad_norm": 1.30665172097076, + "learning_rate": 6.0198170067685465e-06, + "loss": 0.4386, + "step": 37119 + }, + { + "epoch": 0.6414155377384573, + "grad_norm": 0.720653179632791, + "learning_rate": 6.019303603125908e-06, + "loss": 0.4555, + "step": 37120 + }, + { + "epoch": 0.6414328172518662, + "grad_norm": 1.3103949812608968, + "learning_rate": 6.018790211951118e-06, + "loss": 0.4348, + "step": 37121 + }, + { + "epoch": 0.6414500967652751, + "grad_norm": 1.2799030799494004, + "learning_rate": 6.018276833245776e-06, + "loss": 0.5002, + "step": 37122 + }, + { + "epoch": 0.641467376278684, + "grad_norm": 1.0179846123709129, + "learning_rate": 6.0177634670114925e-06, + "loss": 0.3063, + "step": 37123 + }, + { + "epoch": 0.6414846557920929, + "grad_norm": 0.8332937603877388, + "learning_rate": 6.017250113249877e-06, + "loss": 0.2515, + "step": 37124 + }, + { + "epoch": 0.6415019353055018, + "grad_norm": 1.2687555439818685, + "learning_rate": 6.016736771962537e-06, + "loss": 0.4104, + "step": 37125 + }, + { + "epoch": 0.6415192148189107, + "grad_norm": 1.3307945345981615, + "learning_rate": 6.016223443151083e-06, + "loss": 0.3273, + "step": 37126 + }, + { + "epoch": 0.6415364943323196, + "grad_norm": 1.1734231812068745, + "learning_rate": 6.015710126817117e-06, + "loss": 0.4111, + "step": 37127 + }, + { + "epoch": 0.6415537738457285, + "grad_norm": 0.9385586479465098, + "learning_rate": 6.015196822962251e-06, + "loss": 0.3434, + "step": 37128 + }, + { + "epoch": 0.6415710533591374, + "grad_norm": 1.0092049232513038, + "learning_rate": 6.014683531588089e-06, + "loss": 0.228, + "step": 37129 + }, + { + "epoch": 0.6415883328725464, + "grad_norm": 1.7673128597808494, + "learning_rate": 6.014170252696243e-06, + "loss": 0.3776, + "step": 37130 + }, + { + "epoch": 0.6416056123859553, + "grad_norm": 1.182662724708105, + "learning_rate": 6.0136569862883175e-06, + "loss": 0.4161, + "step": 37131 + }, + { + "epoch": 0.6416228918993642, + "grad_norm": 0.9369235532963479, + "learning_rate": 6.013143732365925e-06, + "loss": 0.4321, + "step": 37132 + }, + { + "epoch": 0.641640171412773, + "grad_norm": 1.0273589002264243, + "learning_rate": 6.0126304909306665e-06, + "loss": 0.197, + "step": 37133 + }, + { + "epoch": 0.6416574509261819, + "grad_norm": 1.3261510994218186, + "learning_rate": 6.012117261984152e-06, + "loss": 0.3333, + "step": 37134 + }, + { + "epoch": 0.6416747304395908, + "grad_norm": 0.5560230861303291, + "learning_rate": 6.011604045527992e-06, + "loss": 0.1386, + "step": 37135 + }, + { + "epoch": 0.6416920099529997, + "grad_norm": 0.9841979101860369, + "learning_rate": 6.011090841563784e-06, + "loss": 0.4779, + "step": 37136 + }, + { + "epoch": 0.6417092894664086, + "grad_norm": 1.105604331042638, + "learning_rate": 6.010577650093152e-06, + "loss": 0.3157, + "step": 37137 + }, + { + "epoch": 0.6417265689798175, + "grad_norm": 1.3856919803894736, + "learning_rate": 6.010064471117687e-06, + "loss": 0.465, + "step": 37138 + }, + { + "epoch": 0.6417438484932264, + "grad_norm": 1.8465217996290895, + "learning_rate": 6.009551304639007e-06, + "loss": 0.6039, + "step": 37139 + }, + { + "epoch": 0.6417611280066353, + "grad_norm": 1.161891512297485, + "learning_rate": 6.009038150658712e-06, + "loss": 0.4019, + "step": 37140 + }, + { + "epoch": 0.6417784075200442, + "grad_norm": 1.031622920046892, + "learning_rate": 6.008525009178419e-06, + "loss": 0.3616, + "step": 37141 + }, + { + "epoch": 0.6417956870334531, + "grad_norm": 1.0079312436495096, + "learning_rate": 6.008011880199727e-06, + "loss": 0.3508, + "step": 37142 + }, + { + "epoch": 0.641812966546862, + "grad_norm": 0.634967978075602, + "learning_rate": 6.007498763724241e-06, + "loss": 0.6217, + "step": 37143 + }, + { + "epoch": 0.641830246060271, + "grad_norm": 1.5725296209361053, + "learning_rate": 6.006985659753575e-06, + "loss": 0.2499, + "step": 37144 + }, + { + "epoch": 0.6418475255736799, + "grad_norm": 1.1155861808749108, + "learning_rate": 6.0064725682893325e-06, + "loss": 0.6868, + "step": 37145 + }, + { + "epoch": 0.6418648050870888, + "grad_norm": 1.0299992008812586, + "learning_rate": 6.005959489333124e-06, + "loss": 0.3704, + "step": 37146 + }, + { + "epoch": 0.6418820846004977, + "grad_norm": 0.6522613731057817, + "learning_rate": 6.005446422886551e-06, + "loss": 0.3243, + "step": 37147 + }, + { + "epoch": 0.6418993641139066, + "grad_norm": 1.4542535873634614, + "learning_rate": 6.004933368951225e-06, + "loss": 0.2117, + "step": 37148 + }, + { + "epoch": 0.6419166436273155, + "grad_norm": 0.7010424822194595, + "learning_rate": 6.004420327528749e-06, + "loss": 0.3766, + "step": 37149 + }, + { + "epoch": 0.6419339231407244, + "grad_norm": 0.7571541836903796, + "learning_rate": 6.003907298620735e-06, + "loss": 0.3949, + "step": 37150 + }, + { + "epoch": 0.6419512026541333, + "grad_norm": 0.9539242634126205, + "learning_rate": 6.003394282228783e-06, + "loss": 0.211, + "step": 37151 + }, + { + "epoch": 0.6419684821675422, + "grad_norm": 1.9837868070719482, + "learning_rate": 6.00288127835451e-06, + "loss": 0.4275, + "step": 37152 + }, + { + "epoch": 0.6419857616809511, + "grad_norm": 1.1857630899633826, + "learning_rate": 6.002368286999514e-06, + "loss": 0.6905, + "step": 37153 + }, + { + "epoch": 0.6420030411943599, + "grad_norm": 1.4753484671759047, + "learning_rate": 6.001855308165402e-06, + "loss": 0.3864, + "step": 37154 + }, + { + "epoch": 0.6420203207077688, + "grad_norm": 1.4748435713171761, + "learning_rate": 6.001342341853786e-06, + "loss": 0.4352, + "step": 37155 + }, + { + "epoch": 0.6420376002211777, + "grad_norm": 1.2816104175466327, + "learning_rate": 6.0008293880662664e-06, + "loss": 0.5048, + "step": 37156 + }, + { + "epoch": 0.6420548797345866, + "grad_norm": 0.8520465369075286, + "learning_rate": 6.000316446804457e-06, + "loss": 0.4808, + "step": 37157 + }, + { + "epoch": 0.6420721592479955, + "grad_norm": 0.5062972326202374, + "learning_rate": 5.999803518069957e-06, + "loss": 0.5387, + "step": 37158 + }, + { + "epoch": 0.6420894387614045, + "grad_norm": 1.1482199771711241, + "learning_rate": 5.999290601864378e-06, + "loss": 0.5905, + "step": 37159 + }, + { + "epoch": 0.6421067182748134, + "grad_norm": 1.0396783164393144, + "learning_rate": 5.998777698189324e-06, + "loss": 0.3199, + "step": 37160 + }, + { + "epoch": 0.6421239977882223, + "grad_norm": 1.1254724214407716, + "learning_rate": 5.998264807046405e-06, + "loss": 0.3085, + "step": 37161 + }, + { + "epoch": 0.6421412773016312, + "grad_norm": 1.6039751034189942, + "learning_rate": 5.99775192843722e-06, + "loss": 0.4424, + "step": 37162 + }, + { + "epoch": 0.6421585568150401, + "grad_norm": 1.3631303761789524, + "learning_rate": 5.997239062363384e-06, + "loss": 0.4141, + "step": 37163 + }, + { + "epoch": 0.642175836328449, + "grad_norm": 1.0678977183383802, + "learning_rate": 5.9967262088265e-06, + "loss": 0.3162, + "step": 37164 + }, + { + "epoch": 0.6421931158418579, + "grad_norm": 1.1702705526783208, + "learning_rate": 5.9962133678281706e-06, + "loss": 0.3825, + "step": 37165 + }, + { + "epoch": 0.6422103953552668, + "grad_norm": 1.6398524519813287, + "learning_rate": 5.99570053937001e-06, + "loss": 0.4577, + "step": 37166 + }, + { + "epoch": 0.6422276748686757, + "grad_norm": 0.8651446646213818, + "learning_rate": 5.995187723453615e-06, + "loss": 0.2961, + "step": 37167 + }, + { + "epoch": 0.6422449543820846, + "grad_norm": 1.0893450576758819, + "learning_rate": 5.994674920080598e-06, + "loss": 0.3849, + "step": 37168 + }, + { + "epoch": 0.6422622338954935, + "grad_norm": 0.7247547709989273, + "learning_rate": 5.994162129252561e-06, + "loss": 0.2862, + "step": 37169 + }, + { + "epoch": 0.6422795134089024, + "grad_norm": 1.143252589431791, + "learning_rate": 5.993649350971116e-06, + "loss": 0.539, + "step": 37170 + }, + { + "epoch": 0.6422967929223113, + "grad_norm": 1.6692155178811172, + "learning_rate": 5.993136585237864e-06, + "loss": 0.2832, + "step": 37171 + }, + { + "epoch": 0.6423140724357203, + "grad_norm": 1.1663839828426823, + "learning_rate": 5.992623832054416e-06, + "loss": 0.2926, + "step": 37172 + }, + { + "epoch": 0.6423313519491292, + "grad_norm": 0.9466282587579582, + "learning_rate": 5.9921110914223724e-06, + "loss": 0.3513, + "step": 37173 + }, + { + "epoch": 0.6423486314625381, + "grad_norm": 0.77463081329368, + "learning_rate": 5.991598363343339e-06, + "loss": 0.2553, + "step": 37174 + }, + { + "epoch": 0.6423659109759469, + "grad_norm": 1.1790252232439697, + "learning_rate": 5.9910856478189295e-06, + "loss": 0.3154, + "step": 37175 + }, + { + "epoch": 0.6423831904893558, + "grad_norm": 1.6616152755041649, + "learning_rate": 5.990572944850738e-06, + "loss": 0.5087, + "step": 37176 + }, + { + "epoch": 0.6424004700027647, + "grad_norm": 0.9362880151332332, + "learning_rate": 5.990060254440382e-06, + "loss": 0.4965, + "step": 37177 + }, + { + "epoch": 0.6424177495161736, + "grad_norm": 1.502140305050232, + "learning_rate": 5.9895475765894585e-06, + "loss": 0.3443, + "step": 37178 + }, + { + "epoch": 0.6424350290295825, + "grad_norm": 0.9713947689779452, + "learning_rate": 5.9890349112995785e-06, + "loss": 0.4008, + "step": 37179 + }, + { + "epoch": 0.6424523085429914, + "grad_norm": 2.4276163088934823, + "learning_rate": 5.988522258572345e-06, + "loss": 0.2732, + "step": 37180 + }, + { + "epoch": 0.6424695880564003, + "grad_norm": 2.007232312843584, + "learning_rate": 5.988009618409367e-06, + "loss": 0.316, + "step": 37181 + }, + { + "epoch": 0.6424868675698092, + "grad_norm": 1.0927620835761285, + "learning_rate": 5.987496990812243e-06, + "loss": 0.3068, + "step": 37182 + }, + { + "epoch": 0.6425041470832181, + "grad_norm": 1.1009516750617119, + "learning_rate": 5.98698437578259e-06, + "loss": 0.446, + "step": 37183 + }, + { + "epoch": 0.642521426596627, + "grad_norm": 1.011118843344932, + "learning_rate": 5.986471773322003e-06, + "loss": 0.4562, + "step": 37184 + }, + { + "epoch": 0.6425387061100359, + "grad_norm": 1.00410464542205, + "learning_rate": 5.985959183432091e-06, + "loss": 0.4326, + "step": 37185 + }, + { + "epoch": 0.6425559856234448, + "grad_norm": 1.4948349174660363, + "learning_rate": 5.985446606114463e-06, + "loss": 0.3937, + "step": 37186 + }, + { + "epoch": 0.6425732651368538, + "grad_norm": 0.997798040088563, + "learning_rate": 5.984934041370718e-06, + "loss": 0.5459, + "step": 37187 + }, + { + "epoch": 0.6425905446502627, + "grad_norm": 0.9836286315884059, + "learning_rate": 5.984421489202466e-06, + "loss": 0.277, + "step": 37188 + }, + { + "epoch": 0.6426078241636716, + "grad_norm": 1.5524498392068349, + "learning_rate": 5.98390894961131e-06, + "loss": 0.3734, + "step": 37189 + }, + { + "epoch": 0.6426251036770805, + "grad_norm": 1.3198278100454517, + "learning_rate": 5.9833964225988575e-06, + "loss": 0.4306, + "step": 37190 + }, + { + "epoch": 0.6426423831904894, + "grad_norm": 1.347564410278124, + "learning_rate": 5.98288390816671e-06, + "loss": 0.4518, + "step": 37191 + }, + { + "epoch": 0.6426596627038983, + "grad_norm": 1.581040165169679, + "learning_rate": 5.982371406316481e-06, + "loss": 0.4188, + "step": 37192 + }, + { + "epoch": 0.6426769422173072, + "grad_norm": 1.2845116861025212, + "learning_rate": 5.981858917049765e-06, + "loss": 0.4061, + "step": 37193 + }, + { + "epoch": 0.6426942217307161, + "grad_norm": 0.988889846964604, + "learning_rate": 5.981346440368175e-06, + "loss": 0.3242, + "step": 37194 + }, + { + "epoch": 0.642711501244125, + "grad_norm": 0.7041484268272679, + "learning_rate": 5.980833976273315e-06, + "loss": 0.323, + "step": 37195 + }, + { + "epoch": 0.6427287807575338, + "grad_norm": 0.7861378106531526, + "learning_rate": 5.980321524766783e-06, + "loss": 0.3455, + "step": 37196 + }, + { + "epoch": 0.6427460602709427, + "grad_norm": 1.2943778148425427, + "learning_rate": 5.979809085850195e-06, + "loss": 0.5749, + "step": 37197 + }, + { + "epoch": 0.6427633397843516, + "grad_norm": 1.3747503723320535, + "learning_rate": 5.979296659525146e-06, + "loss": 0.3716, + "step": 37198 + }, + { + "epoch": 0.6427806192977605, + "grad_norm": 1.1174988886126374, + "learning_rate": 5.978784245793248e-06, + "loss": 0.3911, + "step": 37199 + }, + { + "epoch": 0.6427978988111694, + "grad_norm": 1.516932213412793, + "learning_rate": 5.978271844656101e-06, + "loss": 0.4225, + "step": 37200 + }, + { + "epoch": 0.6428151783245784, + "grad_norm": 0.9187393340318263, + "learning_rate": 5.977759456115318e-06, + "loss": 0.4571, + "step": 37201 + }, + { + "epoch": 0.6428324578379873, + "grad_norm": 0.948775419274277, + "learning_rate": 5.977247080172489e-06, + "loss": 0.4221, + "step": 37202 + }, + { + "epoch": 0.6428497373513962, + "grad_norm": 0.9242237355065159, + "learning_rate": 5.976734716829236e-06, + "loss": 0.2611, + "step": 37203 + }, + { + "epoch": 0.6428670168648051, + "grad_norm": 0.512558691354907, + "learning_rate": 5.976222366087152e-06, + "loss": 0.3959, + "step": 37204 + }, + { + "epoch": 0.642884296378214, + "grad_norm": 0.5605544754416444, + "learning_rate": 5.975710027947845e-06, + "loss": 0.4872, + "step": 37205 + }, + { + "epoch": 0.6429015758916229, + "grad_norm": 1.093321365223423, + "learning_rate": 5.975197702412923e-06, + "loss": 0.4461, + "step": 37206 + }, + { + "epoch": 0.6429188554050318, + "grad_norm": 0.5787447633759267, + "learning_rate": 5.974685389483984e-06, + "loss": 0.7921, + "step": 37207 + }, + { + "epoch": 0.6429361349184407, + "grad_norm": 1.094043915205346, + "learning_rate": 5.974173089162638e-06, + "loss": 0.2926, + "step": 37208 + }, + { + "epoch": 0.6429534144318496, + "grad_norm": 1.8376971260985135, + "learning_rate": 5.973660801450486e-06, + "loss": 0.2949, + "step": 37209 + }, + { + "epoch": 0.6429706939452585, + "grad_norm": 1.3924017952677739, + "learning_rate": 5.973148526349137e-06, + "loss": 0.38, + "step": 37210 + }, + { + "epoch": 0.6429879734586674, + "grad_norm": 1.170824579567758, + "learning_rate": 5.97263626386019e-06, + "loss": 0.3972, + "step": 37211 + }, + { + "epoch": 0.6430052529720763, + "grad_norm": 0.9855737769722575, + "learning_rate": 5.972124013985257e-06, + "loss": 0.721, + "step": 37212 + }, + { + "epoch": 0.6430225324854852, + "grad_norm": 0.937421446429471, + "learning_rate": 5.971611776725933e-06, + "loss": 0.2693, + "step": 37213 + }, + { + "epoch": 0.6430398119988942, + "grad_norm": 2.069472877063329, + "learning_rate": 5.97109955208383e-06, + "loss": 0.4559, + "step": 37214 + }, + { + "epoch": 0.6430570915123031, + "grad_norm": 0.906801953046976, + "learning_rate": 5.97058734006055e-06, + "loss": 0.4686, + "step": 37215 + }, + { + "epoch": 0.643074371025712, + "grad_norm": 0.6589705223468932, + "learning_rate": 5.970075140657692e-06, + "loss": 0.2047, + "step": 37216 + }, + { + "epoch": 0.6430916505391209, + "grad_norm": 1.706908996088186, + "learning_rate": 5.96956295387687e-06, + "loss": 0.555, + "step": 37217 + }, + { + "epoch": 0.6431089300525297, + "grad_norm": 0.7807357864281574, + "learning_rate": 5.969050779719679e-06, + "loss": 0.3498, + "step": 37218 + }, + { + "epoch": 0.6431262095659386, + "grad_norm": 0.9183311504712587, + "learning_rate": 5.968538618187729e-06, + "loss": 0.4078, + "step": 37219 + }, + { + "epoch": 0.6431434890793475, + "grad_norm": 1.0329023628957896, + "learning_rate": 5.9680264692826215e-06, + "loss": 0.444, + "step": 37220 + }, + { + "epoch": 0.6431607685927564, + "grad_norm": 1.195040429470179, + "learning_rate": 5.967514333005966e-06, + "loss": 0.5426, + "step": 37221 + }, + { + "epoch": 0.6431780481061653, + "grad_norm": 1.8975075191076043, + "learning_rate": 5.967002209359355e-06, + "loss": 0.3822, + "step": 37222 + }, + { + "epoch": 0.6431953276195742, + "grad_norm": 0.9699266437949725, + "learning_rate": 5.966490098344406e-06, + "loss": 0.4186, + "step": 37223 + }, + { + "epoch": 0.6432126071329831, + "grad_norm": 0.4532700349705981, + "learning_rate": 5.965977999962712e-06, + "loss": 0.6603, + "step": 37224 + }, + { + "epoch": 0.643229886646392, + "grad_norm": 1.4495053610877497, + "learning_rate": 5.965465914215885e-06, + "loss": 0.2435, + "step": 37225 + }, + { + "epoch": 0.6432471661598009, + "grad_norm": 1.1382648576537064, + "learning_rate": 5.964953841105525e-06, + "loss": 0.2834, + "step": 37226 + }, + { + "epoch": 0.6432644456732098, + "grad_norm": 1.165627948810706, + "learning_rate": 5.964441780633235e-06, + "loss": 0.2146, + "step": 37227 + }, + { + "epoch": 0.6432817251866187, + "grad_norm": 0.7002805329434644, + "learning_rate": 5.963929732800619e-06, + "loss": 0.254, + "step": 37228 + }, + { + "epoch": 0.6432990047000277, + "grad_norm": 1.046431975836969, + "learning_rate": 5.9634176976092815e-06, + "loss": 0.4475, + "step": 37229 + }, + { + "epoch": 0.6433162842134366, + "grad_norm": 0.7121498138952006, + "learning_rate": 5.9629056750608285e-06, + "loss": 0.3235, + "step": 37230 + }, + { + "epoch": 0.6433335637268455, + "grad_norm": 1.170670468983953, + "learning_rate": 5.96239366515686e-06, + "loss": 0.3431, + "step": 37231 + }, + { + "epoch": 0.6433508432402544, + "grad_norm": 1.052190131312132, + "learning_rate": 5.961881667898986e-06, + "loss": 0.3612, + "step": 37232 + }, + { + "epoch": 0.6433681227536633, + "grad_norm": 1.2663082007198203, + "learning_rate": 5.961369683288799e-06, + "loss": 0.3125, + "step": 37233 + }, + { + "epoch": 0.6433854022670722, + "grad_norm": 1.1514919820588074, + "learning_rate": 5.960857711327912e-06, + "loss": 0.4, + "step": 37234 + }, + { + "epoch": 0.6434026817804811, + "grad_norm": 1.5475872546724196, + "learning_rate": 5.960345752017929e-06, + "loss": 0.5229, + "step": 37235 + }, + { + "epoch": 0.64341996129389, + "grad_norm": 1.5040099128332003, + "learning_rate": 5.959833805360443e-06, + "loss": 0.3263, + "step": 37236 + }, + { + "epoch": 0.6434372408072989, + "grad_norm": 0.9847057858775807, + "learning_rate": 5.959321871357071e-06, + "loss": 0.4479, + "step": 37237 + }, + { + "epoch": 0.6434545203207078, + "grad_norm": 1.0532399111278794, + "learning_rate": 5.958809950009406e-06, + "loss": 0.3209, + "step": 37238 + }, + { + "epoch": 0.6434717998341166, + "grad_norm": 1.1510870883698854, + "learning_rate": 5.958298041319057e-06, + "loss": 0.4413, + "step": 37239 + }, + { + "epoch": 0.6434890793475255, + "grad_norm": 0.8936781447702505, + "learning_rate": 5.957786145287624e-06, + "loss": 0.2218, + "step": 37240 + }, + { + "epoch": 0.6435063588609344, + "grad_norm": 1.4791498708901154, + "learning_rate": 5.957274261916716e-06, + "loss": 0.306, + "step": 37241 + }, + { + "epoch": 0.6435236383743433, + "grad_norm": 0.930687008098222, + "learning_rate": 5.956762391207926e-06, + "loss": 0.4373, + "step": 37242 + }, + { + "epoch": 0.6435409178877523, + "grad_norm": 0.7632248489908036, + "learning_rate": 5.956250533162869e-06, + "loss": 0.262, + "step": 37243 + }, + { + "epoch": 0.6435581974011612, + "grad_norm": 1.0363099407479868, + "learning_rate": 5.955738687783139e-06, + "loss": 0.2676, + "step": 37244 + }, + { + "epoch": 0.6435754769145701, + "grad_norm": 1.2941461525093265, + "learning_rate": 5.955226855070346e-06, + "loss": 0.3649, + "step": 37245 + }, + { + "epoch": 0.643592756427979, + "grad_norm": 1.0987619093244836, + "learning_rate": 5.954715035026091e-06, + "loss": 0.5871, + "step": 37246 + }, + { + "epoch": 0.6436100359413879, + "grad_norm": 0.6674537242902878, + "learning_rate": 5.954203227651971e-06, + "loss": 0.3332, + "step": 37247 + }, + { + "epoch": 0.6436273154547968, + "grad_norm": 1.293640884592873, + "learning_rate": 5.953691432949597e-06, + "loss": 0.4039, + "step": 37248 + }, + { + "epoch": 0.6436445949682057, + "grad_norm": 0.9168220347299433, + "learning_rate": 5.953179650920566e-06, + "loss": 0.6211, + "step": 37249 + }, + { + "epoch": 0.6436618744816146, + "grad_norm": 1.0474888250234065, + "learning_rate": 5.952667881566488e-06, + "loss": 0.4256, + "step": 37250 + }, + { + "epoch": 0.6436791539950235, + "grad_norm": 1.0200410433703138, + "learning_rate": 5.952156124888957e-06, + "loss": 0.3183, + "step": 37251 + }, + { + "epoch": 0.6436964335084324, + "grad_norm": 1.0236657128185345, + "learning_rate": 5.951644380889587e-06, + "loss": 0.465, + "step": 37252 + }, + { + "epoch": 0.6437137130218413, + "grad_norm": 1.3690451489106759, + "learning_rate": 5.951132649569968e-06, + "loss": 0.4851, + "step": 37253 + }, + { + "epoch": 0.6437309925352502, + "grad_norm": 1.4042168079715538, + "learning_rate": 5.950620930931714e-06, + "loss": 0.3221, + "step": 37254 + }, + { + "epoch": 0.6437482720486591, + "grad_norm": 1.3519087317957839, + "learning_rate": 5.950109224976419e-06, + "loss": 0.5109, + "step": 37255 + }, + { + "epoch": 0.643765551562068, + "grad_norm": 0.8754344689994944, + "learning_rate": 5.949597531705693e-06, + "loss": 0.5156, + "step": 37256 + }, + { + "epoch": 0.643782831075477, + "grad_norm": 0.9167553655976647, + "learning_rate": 5.949085851121137e-06, + "loss": 0.3806, + "step": 37257 + }, + { + "epoch": 0.6438001105888859, + "grad_norm": 1.2188749700934263, + "learning_rate": 5.948574183224347e-06, + "loss": 0.4657, + "step": 37258 + }, + { + "epoch": 0.6438173901022948, + "grad_norm": 1.397406738015286, + "learning_rate": 5.948062528016933e-06, + "loss": 0.4534, + "step": 37259 + }, + { + "epoch": 0.6438346696157036, + "grad_norm": 1.1402153414789118, + "learning_rate": 5.947550885500495e-06, + "loss": 0.2106, + "step": 37260 + }, + { + "epoch": 0.6438519491291125, + "grad_norm": 1.0654661530640326, + "learning_rate": 5.947039255676637e-06, + "loss": 0.4051, + "step": 37261 + }, + { + "epoch": 0.6438692286425214, + "grad_norm": 1.1471879478190161, + "learning_rate": 5.946527638546955e-06, + "loss": 0.353, + "step": 37262 + }, + { + "epoch": 0.6438865081559303, + "grad_norm": 1.0634901248505524, + "learning_rate": 5.946016034113063e-06, + "loss": 0.403, + "step": 37263 + }, + { + "epoch": 0.6439037876693392, + "grad_norm": 0.8267349789657925, + "learning_rate": 5.945504442376552e-06, + "loss": 0.25, + "step": 37264 + }, + { + "epoch": 0.6439210671827481, + "grad_norm": 1.3723838541537794, + "learning_rate": 5.944992863339032e-06, + "loss": 0.4416, + "step": 37265 + }, + { + "epoch": 0.643938346696157, + "grad_norm": 0.7338426164931441, + "learning_rate": 5.944481297002104e-06, + "loss": 0.7619, + "step": 37266 + }, + { + "epoch": 0.6439556262095659, + "grad_norm": 1.2039964271350776, + "learning_rate": 5.943969743367365e-06, + "loss": 0.3793, + "step": 37267 + }, + { + "epoch": 0.6439729057229748, + "grad_norm": 0.7535386698644476, + "learning_rate": 5.943458202436423e-06, + "loss": 0.2958, + "step": 37268 + }, + { + "epoch": 0.6439901852363837, + "grad_norm": 1.4957412458294803, + "learning_rate": 5.9429466742108765e-06, + "loss": 0.4473, + "step": 37269 + }, + { + "epoch": 0.6440074647497926, + "grad_norm": 0.9829651930103513, + "learning_rate": 5.94243515869233e-06, + "loss": 0.2185, + "step": 37270 + }, + { + "epoch": 0.6440247442632016, + "grad_norm": 1.0128902772486341, + "learning_rate": 5.9419236558823845e-06, + "loss": 0.4235, + "step": 37271 + }, + { + "epoch": 0.6440420237766105, + "grad_norm": 0.7898330402436785, + "learning_rate": 5.941412165782645e-06, + "loss": 0.3868, + "step": 37272 + }, + { + "epoch": 0.6440593032900194, + "grad_norm": 1.1846138622669313, + "learning_rate": 5.940900688394708e-06, + "loss": 0.4323, + "step": 37273 + }, + { + "epoch": 0.6440765828034283, + "grad_norm": 0.958179493399647, + "learning_rate": 5.940389223720181e-06, + "loss": 0.3017, + "step": 37274 + }, + { + "epoch": 0.6440938623168372, + "grad_norm": 1.0269830429733158, + "learning_rate": 5.939877771760661e-06, + "loss": 0.4277, + "step": 37275 + }, + { + "epoch": 0.6441111418302461, + "grad_norm": 1.0608745015236603, + "learning_rate": 5.9393663325177545e-06, + "loss": 0.3864, + "step": 37276 + }, + { + "epoch": 0.644128421343655, + "grad_norm": 1.2106614196107555, + "learning_rate": 5.938854905993063e-06, + "loss": 0.2547, + "step": 37277 + }, + { + "epoch": 0.6441457008570639, + "grad_norm": 1.7128511884506714, + "learning_rate": 5.938343492188183e-06, + "loss": 0.4489, + "step": 37278 + }, + { + "epoch": 0.6441629803704728, + "grad_norm": 0.7164167335637749, + "learning_rate": 5.93783209110472e-06, + "loss": 0.3896, + "step": 37279 + }, + { + "epoch": 0.6441802598838817, + "grad_norm": 1.1066169213595909, + "learning_rate": 5.937320702744276e-06, + "loss": 0.3072, + "step": 37280 + }, + { + "epoch": 0.6441975393972905, + "grad_norm": 1.1354488327982053, + "learning_rate": 5.936809327108456e-06, + "loss": 0.2935, + "step": 37281 + }, + { + "epoch": 0.6442148189106994, + "grad_norm": 1.490732999785801, + "learning_rate": 5.936297964198852e-06, + "loss": 0.5281, + "step": 37282 + }, + { + "epoch": 0.6442320984241083, + "grad_norm": 1.2364080273538867, + "learning_rate": 5.935786614017076e-06, + "loss": 0.2938, + "step": 37283 + }, + { + "epoch": 0.6442493779375172, + "grad_norm": 0.8953470863661022, + "learning_rate": 5.935275276564722e-06, + "loss": 0.3398, + "step": 37284 + }, + { + "epoch": 0.6442666574509262, + "grad_norm": 1.2481086992834058, + "learning_rate": 5.934763951843397e-06, + "loss": 0.4973, + "step": 37285 + }, + { + "epoch": 0.6442839369643351, + "grad_norm": 1.040078341469899, + "learning_rate": 5.9342526398546985e-06, + "loss": 0.4903, + "step": 37286 + }, + { + "epoch": 0.644301216477744, + "grad_norm": 1.0288116290095501, + "learning_rate": 5.933741340600234e-06, + "loss": 0.3104, + "step": 37287 + }, + { + "epoch": 0.6443184959911529, + "grad_norm": 1.0861842100827377, + "learning_rate": 5.933230054081598e-06, + "loss": 0.4079, + "step": 37288 + }, + { + "epoch": 0.6443357755045618, + "grad_norm": 1.0770197324720676, + "learning_rate": 5.9327187803003925e-06, + "loss": 0.4074, + "step": 37289 + }, + { + "epoch": 0.6443530550179707, + "grad_norm": 1.452355746514086, + "learning_rate": 5.932207519258222e-06, + "loss": 0.4763, + "step": 37290 + }, + { + "epoch": 0.6443703345313796, + "grad_norm": 0.8832037097426615, + "learning_rate": 5.931696270956686e-06, + "loss": 0.4121, + "step": 37291 + }, + { + "epoch": 0.6443876140447885, + "grad_norm": 3.7824822780816323, + "learning_rate": 5.931185035397391e-06, + "loss": 0.2922, + "step": 37292 + }, + { + "epoch": 0.6444048935581974, + "grad_norm": 1.1210347180721736, + "learning_rate": 5.930673812581929e-06, + "loss": 0.5649, + "step": 37293 + }, + { + "epoch": 0.6444221730716063, + "grad_norm": 0.6716590541670525, + "learning_rate": 5.930162602511906e-06, + "loss": 0.834, + "step": 37294 + }, + { + "epoch": 0.6444394525850152, + "grad_norm": 1.80252334273221, + "learning_rate": 5.929651405188923e-06, + "loss": 0.4925, + "step": 37295 + }, + { + "epoch": 0.6444567320984241, + "grad_norm": 1.0904143858384547, + "learning_rate": 5.929140220614583e-06, + "loss": 0.3475, + "step": 37296 + }, + { + "epoch": 0.644474011611833, + "grad_norm": 1.511249059837028, + "learning_rate": 5.928629048790487e-06, + "loss": 0.5417, + "step": 37297 + }, + { + "epoch": 0.644491291125242, + "grad_norm": 1.115137766782, + "learning_rate": 5.92811788971823e-06, + "loss": 0.5025, + "step": 37298 + }, + { + "epoch": 0.6445085706386509, + "grad_norm": 1.3345482095138792, + "learning_rate": 5.927606743399421e-06, + "loss": 0.3544, + "step": 37299 + }, + { + "epoch": 0.6445258501520598, + "grad_norm": 1.0668516305836964, + "learning_rate": 5.927095609835654e-06, + "loss": 0.3355, + "step": 37300 + }, + { + "epoch": 0.6445431296654687, + "grad_norm": 0.8047792993385346, + "learning_rate": 5.926584489028535e-06, + "loss": 0.3016, + "step": 37301 + }, + { + "epoch": 0.6445604091788775, + "grad_norm": 0.5509636359690857, + "learning_rate": 5.9260733809796625e-06, + "loss": 0.6015, + "step": 37302 + }, + { + "epoch": 0.6445776886922864, + "grad_norm": 1.1760297123161494, + "learning_rate": 5.925562285690637e-06, + "loss": 0.2806, + "step": 37303 + }, + { + "epoch": 0.6445949682056953, + "grad_norm": 1.1164322411009395, + "learning_rate": 5.925051203163059e-06, + "loss": 0.2428, + "step": 37304 + }, + { + "epoch": 0.6446122477191042, + "grad_norm": 0.9971916316058215, + "learning_rate": 5.924540133398533e-06, + "loss": 0.3323, + "step": 37305 + }, + { + "epoch": 0.6446295272325131, + "grad_norm": 0.7243248927657348, + "learning_rate": 5.924029076398654e-06, + "loss": 0.305, + "step": 37306 + }, + { + "epoch": 0.644646806745922, + "grad_norm": 0.4343053772930561, + "learning_rate": 5.9235180321650294e-06, + "loss": 0.7632, + "step": 37307 + }, + { + "epoch": 0.6446640862593309, + "grad_norm": 1.151508675050113, + "learning_rate": 5.923007000699254e-06, + "loss": 0.2763, + "step": 37308 + }, + { + "epoch": 0.6446813657727398, + "grad_norm": 1.1437889289197838, + "learning_rate": 5.92249598200293e-06, + "loss": 0.3657, + "step": 37309 + }, + { + "epoch": 0.6446986452861487, + "grad_norm": 1.3015736546387369, + "learning_rate": 5.92198497607766e-06, + "loss": 0.2226, + "step": 37310 + }, + { + "epoch": 0.6447159247995576, + "grad_norm": 1.184222284292938, + "learning_rate": 5.921473982925041e-06, + "loss": 0.4535, + "step": 37311 + }, + { + "epoch": 0.6447332043129665, + "grad_norm": 1.016460192062157, + "learning_rate": 5.92096300254668e-06, + "loss": 0.4747, + "step": 37312 + }, + { + "epoch": 0.6447504838263755, + "grad_norm": 1.0104229457751503, + "learning_rate": 5.920452034944167e-06, + "loss": 0.2561, + "step": 37313 + }, + { + "epoch": 0.6447677633397844, + "grad_norm": 1.638865331259032, + "learning_rate": 5.919941080119111e-06, + "loss": 0.5128, + "step": 37314 + }, + { + "epoch": 0.6447850428531933, + "grad_norm": 1.322320878595299, + "learning_rate": 5.9194301380731086e-06, + "loss": 0.3375, + "step": 37315 + }, + { + "epoch": 0.6448023223666022, + "grad_norm": 0.8388763442093615, + "learning_rate": 5.918919208807763e-06, + "loss": 0.2255, + "step": 37316 + }, + { + "epoch": 0.6448196018800111, + "grad_norm": 1.3981709451385107, + "learning_rate": 5.918408292324674e-06, + "loss": 0.3785, + "step": 37317 + }, + { + "epoch": 0.64483688139342, + "grad_norm": 0.9508613514519213, + "learning_rate": 5.9178973886254355e-06, + "loss": 0.4315, + "step": 37318 + }, + { + "epoch": 0.6448541609068289, + "grad_norm": 1.0423028339971068, + "learning_rate": 5.917386497711657e-06, + "loss": 0.391, + "step": 37319 + }, + { + "epoch": 0.6448714404202378, + "grad_norm": 1.4086887952111942, + "learning_rate": 5.916875619584929e-06, + "loss": 0.3134, + "step": 37320 + }, + { + "epoch": 0.6448887199336467, + "grad_norm": 1.4726630870643045, + "learning_rate": 5.916364754246864e-06, + "loss": 0.4456, + "step": 37321 + }, + { + "epoch": 0.6449059994470556, + "grad_norm": 1.4245176558870671, + "learning_rate": 5.9158539016990494e-06, + "loss": 0.3723, + "step": 37322 + }, + { + "epoch": 0.6449232789604644, + "grad_norm": 1.1136128340831979, + "learning_rate": 5.915343061943093e-06, + "loss": 0.3273, + "step": 37323 + }, + { + "epoch": 0.6449405584738733, + "grad_norm": 0.7554629113802303, + "learning_rate": 5.91483223498059e-06, + "loss": 0.4414, + "step": 37324 + }, + { + "epoch": 0.6449578379872822, + "grad_norm": 1.259280745520928, + "learning_rate": 5.914321420813146e-06, + "loss": 0.3454, + "step": 37325 + }, + { + "epoch": 0.6449751175006911, + "grad_norm": 0.789556637791777, + "learning_rate": 5.913810619442355e-06, + "loss": 0.9987, + "step": 37326 + }, + { + "epoch": 0.6449923970141, + "grad_norm": 1.1161036868440712, + "learning_rate": 5.913299830869824e-06, + "loss": 0.4749, + "step": 37327 + }, + { + "epoch": 0.645009676527509, + "grad_norm": 1.07015026993205, + "learning_rate": 5.912789055097146e-06, + "loss": 0.4112, + "step": 37328 + }, + { + "epoch": 0.6450269560409179, + "grad_norm": 1.3022911411403448, + "learning_rate": 5.912278292125922e-06, + "loss": 0.4888, + "step": 37329 + }, + { + "epoch": 0.6450442355543268, + "grad_norm": 1.0249795809554414, + "learning_rate": 5.911767541957756e-06, + "loss": 0.3904, + "step": 37330 + }, + { + "epoch": 0.6450615150677357, + "grad_norm": 1.5700256880722456, + "learning_rate": 5.911256804594241e-06, + "loss": 0.4151, + "step": 37331 + }, + { + "epoch": 0.6450787945811446, + "grad_norm": 1.2778000504999165, + "learning_rate": 5.910746080036984e-06, + "loss": 0.4094, + "step": 37332 + }, + { + "epoch": 0.6450960740945535, + "grad_norm": 0.9495396506466715, + "learning_rate": 5.910235368287579e-06, + "loss": 0.2837, + "step": 37333 + }, + { + "epoch": 0.6451133536079624, + "grad_norm": 1.113687214453528, + "learning_rate": 5.909724669347628e-06, + "loss": 0.2798, + "step": 37334 + }, + { + "epoch": 0.6451306331213713, + "grad_norm": 1.122766289082644, + "learning_rate": 5.909213983218729e-06, + "loss": 0.2965, + "step": 37335 + }, + { + "epoch": 0.6451479126347802, + "grad_norm": 0.7234711703827214, + "learning_rate": 5.908703309902485e-06, + "loss": 0.5236, + "step": 37336 + }, + { + "epoch": 0.6451651921481891, + "grad_norm": 1.4513461823320133, + "learning_rate": 5.90819264940049e-06, + "loss": 0.3148, + "step": 37337 + }, + { + "epoch": 0.645182471661598, + "grad_norm": 1.031400753701999, + "learning_rate": 5.9076820017143525e-06, + "loss": 0.3307, + "step": 37338 + }, + { + "epoch": 0.6451997511750069, + "grad_norm": 1.1810158707279395, + "learning_rate": 5.907171366845662e-06, + "loss": 0.3159, + "step": 37339 + }, + { + "epoch": 0.6452170306884158, + "grad_norm": 1.1686540237057501, + "learning_rate": 5.90666074479602e-06, + "loss": 0.4315, + "step": 37340 + }, + { + "epoch": 0.6452343102018248, + "grad_norm": 0.9062817937660078, + "learning_rate": 5.906150135567032e-06, + "loss": 0.3812, + "step": 37341 + }, + { + "epoch": 0.6452515897152337, + "grad_norm": 0.5112738915252932, + "learning_rate": 5.90563953916029e-06, + "loss": 0.4852, + "step": 37342 + }, + { + "epoch": 0.6452688692286426, + "grad_norm": 1.127988240151063, + "learning_rate": 5.9051289555773975e-06, + "loss": 0.3249, + "step": 37343 + }, + { + "epoch": 0.6452861487420514, + "grad_norm": 1.3925338236464655, + "learning_rate": 5.90461838481995e-06, + "loss": 0.4452, + "step": 37344 + }, + { + "epoch": 0.6453034282554603, + "grad_norm": 1.0736967945585805, + "learning_rate": 5.904107826889552e-06, + "loss": 0.2575, + "step": 37345 + }, + { + "epoch": 0.6453207077688692, + "grad_norm": 0.8913747459365402, + "learning_rate": 5.903597281787795e-06, + "loss": 0.5466, + "step": 37346 + }, + { + "epoch": 0.6453379872822781, + "grad_norm": 1.0666810122214156, + "learning_rate": 5.903086749516289e-06, + "loss": 0.4573, + "step": 37347 + }, + { + "epoch": 0.645355266795687, + "grad_norm": 0.9976586928494576, + "learning_rate": 5.902576230076624e-06, + "loss": 0.4124, + "step": 37348 + }, + { + "epoch": 0.6453725463090959, + "grad_norm": 0.812763878115003, + "learning_rate": 5.902065723470398e-06, + "loss": 0.26, + "step": 37349 + }, + { + "epoch": 0.6453898258225048, + "grad_norm": 1.1411332186099294, + "learning_rate": 5.9015552296992186e-06, + "loss": 0.295, + "step": 37350 + }, + { + "epoch": 0.6454071053359137, + "grad_norm": 1.2348166743879334, + "learning_rate": 5.901044748764675e-06, + "loss": 0.3617, + "step": 37351 + }, + { + "epoch": 0.6454243848493226, + "grad_norm": 0.905523744958272, + "learning_rate": 5.900534280668376e-06, + "loss": 0.3717, + "step": 37352 + }, + { + "epoch": 0.6454416643627315, + "grad_norm": 1.5633555860376054, + "learning_rate": 5.900023825411911e-06, + "loss": 0.3838, + "step": 37353 + }, + { + "epoch": 0.6454589438761404, + "grad_norm": 1.5843442098580223, + "learning_rate": 5.899513382996886e-06, + "loss": 0.3937, + "step": 37354 + }, + { + "epoch": 0.6454762233895494, + "grad_norm": 0.8050534365242467, + "learning_rate": 5.899002953424893e-06, + "loss": 0.2446, + "step": 37355 + }, + { + "epoch": 0.6454935029029583, + "grad_norm": 1.3157939682734685, + "learning_rate": 5.898492536697538e-06, + "loss": 0.3419, + "step": 37356 + }, + { + "epoch": 0.6455107824163672, + "grad_norm": 1.2559263924216657, + "learning_rate": 5.897982132816414e-06, + "loss": 0.3703, + "step": 37357 + }, + { + "epoch": 0.6455280619297761, + "grad_norm": 1.3347834536530474, + "learning_rate": 5.897471741783124e-06, + "loss": 0.4351, + "step": 37358 + }, + { + "epoch": 0.645545341443185, + "grad_norm": 1.458823253185868, + "learning_rate": 5.896961363599263e-06, + "loss": 0.3576, + "step": 37359 + }, + { + "epoch": 0.6455626209565939, + "grad_norm": 1.4953666167721982, + "learning_rate": 5.89645099826643e-06, + "loss": 0.5203, + "step": 37360 + }, + { + "epoch": 0.6455799004700028, + "grad_norm": 1.0178309670746177, + "learning_rate": 5.895940645786227e-06, + "loss": 0.3052, + "step": 37361 + }, + { + "epoch": 0.6455971799834117, + "grad_norm": 0.7103348935749846, + "learning_rate": 5.895430306160246e-06, + "loss": 0.9102, + "step": 37362 + }, + { + "epoch": 0.6456144594968206, + "grad_norm": 0.684687237908246, + "learning_rate": 5.8949199793900915e-06, + "loss": 0.7415, + "step": 37363 + }, + { + "epoch": 0.6456317390102295, + "grad_norm": 1.4037036771308922, + "learning_rate": 5.894409665477358e-06, + "loss": 0.2656, + "step": 37364 + }, + { + "epoch": 0.6456490185236384, + "grad_norm": 2.637425294305121, + "learning_rate": 5.893899364423646e-06, + "loss": 0.3171, + "step": 37365 + }, + { + "epoch": 0.6456662980370472, + "grad_norm": 0.8913317748466113, + "learning_rate": 5.893389076230552e-06, + "loss": 0.2815, + "step": 37366 + }, + { + "epoch": 0.6456835775504561, + "grad_norm": 1.6348825185640312, + "learning_rate": 5.8928788008996796e-06, + "loss": 0.3461, + "step": 37367 + }, + { + "epoch": 0.645700857063865, + "grad_norm": 1.3546126143364114, + "learning_rate": 5.89236853843262e-06, + "loss": 0.3504, + "step": 37368 + }, + { + "epoch": 0.645718136577274, + "grad_norm": 0.8161130929098963, + "learning_rate": 5.891858288830975e-06, + "loss": 0.3551, + "step": 37369 + }, + { + "epoch": 0.6457354160906829, + "grad_norm": 0.39798595594686537, + "learning_rate": 5.891348052096342e-06, + "loss": 0.33, + "step": 37370 + }, + { + "epoch": 0.6457526956040918, + "grad_norm": 1.494595346206884, + "learning_rate": 5.890837828230318e-06, + "loss": 0.4117, + "step": 37371 + }, + { + "epoch": 0.6457699751175007, + "grad_norm": 1.936034458468804, + "learning_rate": 5.890327617234506e-06, + "loss": 0.2601, + "step": 37372 + }, + { + "epoch": 0.6457872546309096, + "grad_norm": 1.49739845631944, + "learning_rate": 5.889817419110495e-06, + "loss": 0.3242, + "step": 37373 + }, + { + "epoch": 0.6458045341443185, + "grad_norm": 1.415203216987687, + "learning_rate": 5.889307233859891e-06, + "loss": 0.4859, + "step": 37374 + }, + { + "epoch": 0.6458218136577274, + "grad_norm": 0.9257893776329592, + "learning_rate": 5.888797061484288e-06, + "loss": 0.3135, + "step": 37375 + }, + { + "epoch": 0.6458390931711363, + "grad_norm": 0.6562701773632543, + "learning_rate": 5.888286901985287e-06, + "loss": 0.7858, + "step": 37376 + }, + { + "epoch": 0.6458563726845452, + "grad_norm": 0.7168981368677037, + "learning_rate": 5.88777675536448e-06, + "loss": 0.2997, + "step": 37377 + }, + { + "epoch": 0.6458736521979541, + "grad_norm": 1.4105674210603152, + "learning_rate": 5.887266621623475e-06, + "loss": 0.4304, + "step": 37378 + }, + { + "epoch": 0.645890931711363, + "grad_norm": 1.3086334411870786, + "learning_rate": 5.8867565007638605e-06, + "loss": 0.1896, + "step": 37379 + }, + { + "epoch": 0.6459082112247719, + "grad_norm": 1.0394128664699345, + "learning_rate": 5.886246392787235e-06, + "loss": 0.2958, + "step": 37380 + }, + { + "epoch": 0.6459254907381808, + "grad_norm": 1.0689834082709373, + "learning_rate": 5.885736297695202e-06, + "loss": 0.4102, + "step": 37381 + }, + { + "epoch": 0.6459427702515897, + "grad_norm": 0.9111380557060007, + "learning_rate": 5.885226215489353e-06, + "loss": 0.5893, + "step": 37382 + }, + { + "epoch": 0.6459600497649987, + "grad_norm": 0.5828268438466067, + "learning_rate": 5.88471614617129e-06, + "loss": 0.6795, + "step": 37383 + }, + { + "epoch": 0.6459773292784076, + "grad_norm": 1.0802421935919952, + "learning_rate": 5.8842060897426055e-06, + "loss": 0.5118, + "step": 37384 + }, + { + "epoch": 0.6459946087918165, + "grad_norm": 1.0957376670455372, + "learning_rate": 5.883696046204903e-06, + "loss": 0.4232, + "step": 37385 + }, + { + "epoch": 0.6460118883052254, + "grad_norm": 1.4390465765418685, + "learning_rate": 5.883186015559775e-06, + "loss": 0.4848, + "step": 37386 + }, + { + "epoch": 0.6460291678186342, + "grad_norm": 1.2321748883761356, + "learning_rate": 5.882675997808826e-06, + "loss": 0.3838, + "step": 37387 + }, + { + "epoch": 0.6460464473320431, + "grad_norm": 1.6287056712774326, + "learning_rate": 5.882165992953644e-06, + "loss": 0.2316, + "step": 37388 + }, + { + "epoch": 0.646063726845452, + "grad_norm": 1.2854048149521398, + "learning_rate": 5.881656000995834e-06, + "loss": 0.7216, + "step": 37389 + }, + { + "epoch": 0.6460810063588609, + "grad_norm": 1.0814178252584143, + "learning_rate": 5.88114602193699e-06, + "loss": 0.3926, + "step": 37390 + }, + { + "epoch": 0.6460982858722698, + "grad_norm": 1.3148782773386256, + "learning_rate": 5.880636055778707e-06, + "loss": 0.4071, + "step": 37391 + }, + { + "epoch": 0.6461155653856787, + "grad_norm": 0.7948627454649264, + "learning_rate": 5.8801261025225895e-06, + "loss": 0.2242, + "step": 37392 + }, + { + "epoch": 0.6461328448990876, + "grad_norm": 1.347262122190213, + "learning_rate": 5.879616162170225e-06, + "loss": 0.4125, + "step": 37393 + }, + { + "epoch": 0.6461501244124965, + "grad_norm": 1.020385954634158, + "learning_rate": 5.879106234723219e-06, + "loss": 0.586, + "step": 37394 + }, + { + "epoch": 0.6461674039259054, + "grad_norm": 1.481282838267784, + "learning_rate": 5.878596320183164e-06, + "loss": 0.4042, + "step": 37395 + }, + { + "epoch": 0.6461846834393143, + "grad_norm": 0.9463855694282084, + "learning_rate": 5.878086418551659e-06, + "loss": 0.3993, + "step": 37396 + }, + { + "epoch": 0.6462019629527233, + "grad_norm": 1.0814378143487757, + "learning_rate": 5.8775765298303e-06, + "loss": 0.7071, + "step": 37397 + }, + { + "epoch": 0.6462192424661322, + "grad_norm": 1.277125747581132, + "learning_rate": 5.877066654020689e-06, + "loss": 0.292, + "step": 37398 + }, + { + "epoch": 0.6462365219795411, + "grad_norm": 1.3671887375353753, + "learning_rate": 5.8765567911244125e-06, + "loss": 0.5085, + "step": 37399 + }, + { + "epoch": 0.64625380149295, + "grad_norm": 1.0097291163259774, + "learning_rate": 5.876046941143077e-06, + "loss": 0.3643, + "step": 37400 + }, + { + "epoch": 0.6462710810063589, + "grad_norm": 1.0078598753312746, + "learning_rate": 5.8755371040782775e-06, + "loss": 0.4981, + "step": 37401 + }, + { + "epoch": 0.6462883605197678, + "grad_norm": 1.2688903541895968, + "learning_rate": 5.875027279931604e-06, + "loss": 0.3305, + "step": 37402 + }, + { + "epoch": 0.6463056400331767, + "grad_norm": 1.278162169265761, + "learning_rate": 5.874517468704663e-06, + "loss": 0.4878, + "step": 37403 + }, + { + "epoch": 0.6463229195465856, + "grad_norm": 0.9434478182988524, + "learning_rate": 5.874007670399043e-06, + "loss": 0.5227, + "step": 37404 + }, + { + "epoch": 0.6463401990599945, + "grad_norm": 1.092974317482143, + "learning_rate": 5.873497885016348e-06, + "loss": 0.3174, + "step": 37405 + }, + { + "epoch": 0.6463574785734034, + "grad_norm": 1.1385123647923525, + "learning_rate": 5.872988112558168e-06, + "loss": 0.2503, + "step": 37406 + }, + { + "epoch": 0.6463747580868123, + "grad_norm": 1.180677862984656, + "learning_rate": 5.872478353026108e-06, + "loss": 0.4338, + "step": 37407 + }, + { + "epoch": 0.6463920376002211, + "grad_norm": 0.7928325276874146, + "learning_rate": 5.871968606421754e-06, + "loss": 0.4966, + "step": 37408 + }, + { + "epoch": 0.64640931711363, + "grad_norm": 1.303202595680198, + "learning_rate": 5.871458872746711e-06, + "loss": 0.356, + "step": 37409 + }, + { + "epoch": 0.6464265966270389, + "grad_norm": 2.067924559125307, + "learning_rate": 5.870949152002572e-06, + "loss": 0.4086, + "step": 37410 + }, + { + "epoch": 0.6464438761404478, + "grad_norm": 1.0907616434724463, + "learning_rate": 5.8704394441909315e-06, + "loss": 0.3758, + "step": 37411 + }, + { + "epoch": 0.6464611556538568, + "grad_norm": 1.1809221822070632, + "learning_rate": 5.869929749313394e-06, + "loss": 0.3352, + "step": 37412 + }, + { + "epoch": 0.6464784351672657, + "grad_norm": 1.6324171961122877, + "learning_rate": 5.869420067371545e-06, + "loss": 0.5634, + "step": 37413 + }, + { + "epoch": 0.6464957146806746, + "grad_norm": 1.1376378968847554, + "learning_rate": 5.868910398366989e-06, + "loss": 0.498, + "step": 37414 + }, + { + "epoch": 0.6465129941940835, + "grad_norm": 0.9871530915555868, + "learning_rate": 5.868400742301319e-06, + "loss": 0.3862, + "step": 37415 + }, + { + "epoch": 0.6465302737074924, + "grad_norm": 1.2707594807959541, + "learning_rate": 5.867891099176131e-06, + "loss": 0.4202, + "step": 37416 + }, + { + "epoch": 0.6465475532209013, + "grad_norm": 0.9241124321130996, + "learning_rate": 5.867381468993022e-06, + "loss": 0.4154, + "step": 37417 + }, + { + "epoch": 0.6465648327343102, + "grad_norm": 0.9519203367653362, + "learning_rate": 5.866871851753593e-06, + "loss": 0.5523, + "step": 37418 + }, + { + "epoch": 0.6465821122477191, + "grad_norm": 1.1750515571288134, + "learning_rate": 5.866362247459429e-06, + "loss": 0.4017, + "step": 37419 + }, + { + "epoch": 0.646599391761128, + "grad_norm": 1.3835002104098746, + "learning_rate": 5.865852656112136e-06, + "loss": 0.3205, + "step": 37420 + }, + { + "epoch": 0.6466166712745369, + "grad_norm": 0.8969865299857218, + "learning_rate": 5.865343077713309e-06, + "loss": 0.2974, + "step": 37421 + }, + { + "epoch": 0.6466339507879458, + "grad_norm": 1.0802729136337723, + "learning_rate": 5.8648335122645375e-06, + "loss": 0.4088, + "step": 37422 + }, + { + "epoch": 0.6466512303013547, + "grad_norm": 1.0159649166748703, + "learning_rate": 5.864323959767424e-06, + "loss": 0.4205, + "step": 37423 + }, + { + "epoch": 0.6466685098147636, + "grad_norm": 1.1687832563079257, + "learning_rate": 5.863814420223559e-06, + "loss": 0.3695, + "step": 37424 + }, + { + "epoch": 0.6466857893281726, + "grad_norm": 1.1268307252627805, + "learning_rate": 5.863304893634544e-06, + "loss": 0.2889, + "step": 37425 + }, + { + "epoch": 0.6467030688415815, + "grad_norm": 1.1063698681494345, + "learning_rate": 5.862795380001971e-06, + "loss": 0.4842, + "step": 37426 + }, + { + "epoch": 0.6467203483549904, + "grad_norm": 1.4888076780405268, + "learning_rate": 5.862285879327441e-06, + "loss": 0.3819, + "step": 37427 + }, + { + "epoch": 0.6467376278683993, + "grad_norm": 1.3092162532093534, + "learning_rate": 5.861776391612541e-06, + "loss": 0.828, + "step": 37428 + }, + { + "epoch": 0.6467549073818081, + "grad_norm": 1.0763392401024094, + "learning_rate": 5.861266916858875e-06, + "loss": 0.4567, + "step": 37429 + }, + { + "epoch": 0.646772186895217, + "grad_norm": 1.2736597520310093, + "learning_rate": 5.860757455068034e-06, + "loss": 0.3547, + "step": 37430 + }, + { + "epoch": 0.6467894664086259, + "grad_norm": 1.3206726586551745, + "learning_rate": 5.860248006241617e-06, + "loss": 0.2469, + "step": 37431 + }, + { + "epoch": 0.6468067459220348, + "grad_norm": 0.8399898741988239, + "learning_rate": 5.859738570381219e-06, + "loss": 0.4426, + "step": 37432 + }, + { + "epoch": 0.6468240254354437, + "grad_norm": 0.4727801352565225, + "learning_rate": 5.859229147488431e-06, + "loss": 0.8346, + "step": 37433 + }, + { + "epoch": 0.6468413049488526, + "grad_norm": 0.9510252262836183, + "learning_rate": 5.858719737564854e-06, + "loss": 0.4714, + "step": 37434 + }, + { + "epoch": 0.6468585844622615, + "grad_norm": 1.2518237132703627, + "learning_rate": 5.8582103406120795e-06, + "loss": 0.435, + "step": 37435 + }, + { + "epoch": 0.6468758639756704, + "grad_norm": 1.1235823210033777, + "learning_rate": 5.8577009566317065e-06, + "loss": 0.2008, + "step": 37436 + }, + { + "epoch": 0.6468931434890793, + "grad_norm": 1.1058089336702026, + "learning_rate": 5.857191585625328e-06, + "loss": 0.4361, + "step": 37437 + }, + { + "epoch": 0.6469104230024882, + "grad_norm": 2.4356287272553296, + "learning_rate": 5.856682227594544e-06, + "loss": 0.3049, + "step": 37438 + }, + { + "epoch": 0.6469277025158972, + "grad_norm": 1.0204384859615485, + "learning_rate": 5.85617288254094e-06, + "loss": 0.3151, + "step": 37439 + }, + { + "epoch": 0.6469449820293061, + "grad_norm": 1.7066167041903175, + "learning_rate": 5.855663550466122e-06, + "loss": 0.4353, + "step": 37440 + }, + { + "epoch": 0.646962261542715, + "grad_norm": 1.255684711852974, + "learning_rate": 5.855154231371682e-06, + "loss": 0.4841, + "step": 37441 + }, + { + "epoch": 0.6469795410561239, + "grad_norm": 1.6887200142991192, + "learning_rate": 5.85464492525921e-06, + "loss": 0.4438, + "step": 37442 + }, + { + "epoch": 0.6469968205695328, + "grad_norm": 1.209593703422125, + "learning_rate": 5.854135632130307e-06, + "loss": 0.3379, + "step": 37443 + }, + { + "epoch": 0.6470141000829417, + "grad_norm": 1.3006108957005398, + "learning_rate": 5.853626351986564e-06, + "loss": 0.497, + "step": 37444 + }, + { + "epoch": 0.6470313795963506, + "grad_norm": 1.3656925696046085, + "learning_rate": 5.85311708482958e-06, + "loss": 0.4648, + "step": 37445 + }, + { + "epoch": 0.6470486591097595, + "grad_norm": 1.1811101548260425, + "learning_rate": 5.852607830660948e-06, + "loss": 0.4763, + "step": 37446 + }, + { + "epoch": 0.6470659386231684, + "grad_norm": 1.3147064101191415, + "learning_rate": 5.852098589482267e-06, + "loss": 0.2564, + "step": 37447 + }, + { + "epoch": 0.6470832181365773, + "grad_norm": 1.1705798981858446, + "learning_rate": 5.851589361295124e-06, + "loss": 0.3253, + "step": 37448 + }, + { + "epoch": 0.6471004976499862, + "grad_norm": 0.925971137439629, + "learning_rate": 5.85108014610112e-06, + "loss": 0.4175, + "step": 37449 + }, + { + "epoch": 0.647117777163395, + "grad_norm": 1.2548742400622404, + "learning_rate": 5.8505709439018475e-06, + "loss": 0.5368, + "step": 37450 + }, + { + "epoch": 0.6471350566768039, + "grad_norm": 0.8189558668138842, + "learning_rate": 5.850061754698904e-06, + "loss": 0.3645, + "step": 37451 + }, + { + "epoch": 0.6471523361902128, + "grad_norm": 1.2886229791813468, + "learning_rate": 5.8495525784938825e-06, + "loss": 0.3211, + "step": 37452 + }, + { + "epoch": 0.6471696157036217, + "grad_norm": 1.2551037322201228, + "learning_rate": 5.849043415288376e-06, + "loss": 0.6292, + "step": 37453 + }, + { + "epoch": 0.6471868952170307, + "grad_norm": 0.9756693314834127, + "learning_rate": 5.848534265083981e-06, + "loss": 0.3495, + "step": 37454 + }, + { + "epoch": 0.6472041747304396, + "grad_norm": 1.4503489109625705, + "learning_rate": 5.848025127882291e-06, + "loss": 0.3964, + "step": 37455 + }, + { + "epoch": 0.6472214542438485, + "grad_norm": 1.5804539501603847, + "learning_rate": 5.847516003684904e-06, + "loss": 0.6728, + "step": 37456 + }, + { + "epoch": 0.6472387337572574, + "grad_norm": 1.4880680089202816, + "learning_rate": 5.847006892493411e-06, + "loss": 0.3004, + "step": 37457 + }, + { + "epoch": 0.6472560132706663, + "grad_norm": 1.2576111293336414, + "learning_rate": 5.846497794309411e-06, + "loss": 0.5799, + "step": 37458 + }, + { + "epoch": 0.6472732927840752, + "grad_norm": 0.8327159741574327, + "learning_rate": 5.845988709134493e-06, + "loss": 0.5054, + "step": 37459 + }, + { + "epoch": 0.6472905722974841, + "grad_norm": 1.5013193931715383, + "learning_rate": 5.845479636970255e-06, + "loss": 0.5151, + "step": 37460 + }, + { + "epoch": 0.647307851810893, + "grad_norm": 1.4774089208481236, + "learning_rate": 5.844970577818288e-06, + "loss": 0.3578, + "step": 37461 + }, + { + "epoch": 0.6473251313243019, + "grad_norm": 0.6069886101582097, + "learning_rate": 5.8444615316801946e-06, + "loss": 0.6653, + "step": 37462 + }, + { + "epoch": 0.6473424108377108, + "grad_norm": 0.9285825636178361, + "learning_rate": 5.84395249855756e-06, + "loss": 0.4521, + "step": 37463 + }, + { + "epoch": 0.6473596903511197, + "grad_norm": 0.8771999324042442, + "learning_rate": 5.84344347845198e-06, + "loss": 0.364, + "step": 37464 + }, + { + "epoch": 0.6473769698645286, + "grad_norm": 1.5367183912675708, + "learning_rate": 5.842934471365054e-06, + "loss": 0.469, + "step": 37465 + }, + { + "epoch": 0.6473942493779375, + "grad_norm": 1.2543304308776044, + "learning_rate": 5.84242547729837e-06, + "loss": 0.2201, + "step": 37466 + }, + { + "epoch": 0.6474115288913465, + "grad_norm": 1.3878649850252223, + "learning_rate": 5.84191649625353e-06, + "loss": 0.3073, + "step": 37467 + }, + { + "epoch": 0.6474288084047554, + "grad_norm": 1.0656717328268457, + "learning_rate": 5.841407528232119e-06, + "loss": 0.4829, + "step": 37468 + }, + { + "epoch": 0.6474460879181643, + "grad_norm": 1.191525294059763, + "learning_rate": 5.8408985732357395e-06, + "loss": 0.4062, + "step": 37469 + }, + { + "epoch": 0.6474633674315732, + "grad_norm": 1.077835895645486, + "learning_rate": 5.840389631265978e-06, + "loss": 0.2813, + "step": 37470 + }, + { + "epoch": 0.647480646944982, + "grad_norm": 1.9211677195958514, + "learning_rate": 5.839880702324434e-06, + "loss": 0.3256, + "step": 37471 + }, + { + "epoch": 0.6474979264583909, + "grad_norm": 1.2323486786688995, + "learning_rate": 5.839371786412703e-06, + "loss": 0.3737, + "step": 37472 + }, + { + "epoch": 0.6475152059717998, + "grad_norm": 1.1926409907500355, + "learning_rate": 5.8388628835323715e-06, + "loss": 0.5058, + "step": 37473 + }, + { + "epoch": 0.6475324854852087, + "grad_norm": 0.838705801204849, + "learning_rate": 5.83835399368504e-06, + "loss": 0.3213, + "step": 37474 + }, + { + "epoch": 0.6475497649986176, + "grad_norm": 1.3508016577334034, + "learning_rate": 5.837845116872297e-06, + "loss": 0.2497, + "step": 37475 + }, + { + "epoch": 0.6475670445120265, + "grad_norm": 0.9540341157631063, + "learning_rate": 5.8373362530957445e-06, + "loss": 0.5365, + "step": 37476 + }, + { + "epoch": 0.6475843240254354, + "grad_norm": 1.2070304529277864, + "learning_rate": 5.836827402356966e-06, + "loss": 0.3634, + "step": 37477 + }, + { + "epoch": 0.6476016035388443, + "grad_norm": 0.9281825649067437, + "learning_rate": 5.836318564657561e-06, + "loss": 0.2064, + "step": 37478 + }, + { + "epoch": 0.6476188830522532, + "grad_norm": 0.9407254488538789, + "learning_rate": 5.835809739999123e-06, + "loss": 0.4975, + "step": 37479 + }, + { + "epoch": 0.6476361625656621, + "grad_norm": 0.6955199956519625, + "learning_rate": 5.835300928383249e-06, + "loss": 0.2879, + "step": 37480 + }, + { + "epoch": 0.647653442079071, + "grad_norm": 0.991575477605137, + "learning_rate": 5.8347921298115255e-06, + "loss": 0.3326, + "step": 37481 + }, + { + "epoch": 0.64767072159248, + "grad_norm": 1.4242202942750997, + "learning_rate": 5.8342833442855525e-06, + "loss": 0.2527, + "step": 37482 + }, + { + "epoch": 0.6476880011058889, + "grad_norm": 1.4253334589162923, + "learning_rate": 5.8337745718069185e-06, + "loss": 0.4586, + "step": 37483 + }, + { + "epoch": 0.6477052806192978, + "grad_norm": 0.9394527825242214, + "learning_rate": 5.833265812377218e-06, + "loss": 0.4207, + "step": 37484 + }, + { + "epoch": 0.6477225601327067, + "grad_norm": 0.880006755865877, + "learning_rate": 5.8327570659980495e-06, + "loss": 0.2815, + "step": 37485 + }, + { + "epoch": 0.6477398396461156, + "grad_norm": 1.4207959274153374, + "learning_rate": 5.832248332670998e-06, + "loss": 0.3325, + "step": 37486 + }, + { + "epoch": 0.6477571191595245, + "grad_norm": 0.8434977897789737, + "learning_rate": 5.831739612397666e-06, + "loss": 0.3014, + "step": 37487 + }, + { + "epoch": 0.6477743986729334, + "grad_norm": 0.8640276185855957, + "learning_rate": 5.831230905179639e-06, + "loss": 0.357, + "step": 37488 + }, + { + "epoch": 0.6477916781863423, + "grad_norm": 0.7068371713635839, + "learning_rate": 5.830722211018517e-06, + "loss": 0.4034, + "step": 37489 + }, + { + "epoch": 0.6478089576997512, + "grad_norm": 1.5327744552285039, + "learning_rate": 5.830213529915882e-06, + "loss": 0.3778, + "step": 37490 + }, + { + "epoch": 0.6478262372131601, + "grad_norm": 0.8041724261204956, + "learning_rate": 5.8297048618733435e-06, + "loss": 0.2515, + "step": 37491 + }, + { + "epoch": 0.647843516726569, + "grad_norm": 1.140460350165591, + "learning_rate": 5.829196206892482e-06, + "loss": 0.516, + "step": 37492 + }, + { + "epoch": 0.6478607962399778, + "grad_norm": 0.8831466146471518, + "learning_rate": 5.8286875649748985e-06, + "loss": 0.3554, + "step": 37493 + }, + { + "epoch": 0.6478780757533867, + "grad_norm": 1.4256575695101903, + "learning_rate": 5.828178936122183e-06, + "loss": 0.2396, + "step": 37494 + }, + { + "epoch": 0.6478953552667956, + "grad_norm": 1.5102007285657415, + "learning_rate": 5.827670320335924e-06, + "loss": 0.4315, + "step": 37495 + }, + { + "epoch": 0.6479126347802046, + "grad_norm": 1.6918347526526951, + "learning_rate": 5.827161717617719e-06, + "loss": 0.4157, + "step": 37496 + }, + { + "epoch": 0.6479299142936135, + "grad_norm": 1.7070855718822084, + "learning_rate": 5.826653127969159e-06, + "loss": 0.2615, + "step": 37497 + }, + { + "epoch": 0.6479471938070224, + "grad_norm": 1.079402936061884, + "learning_rate": 5.826144551391845e-06, + "loss": 0.4019, + "step": 37498 + }, + { + "epoch": 0.6479644733204313, + "grad_norm": 1.017379354299374, + "learning_rate": 5.8256359878873585e-06, + "loss": 0.4172, + "step": 37499 + }, + { + "epoch": 0.6479817528338402, + "grad_norm": 0.988695361993043, + "learning_rate": 5.825127437457301e-06, + "loss": 0.364, + "step": 37500 + }, + { + "epoch": 0.6479990323472491, + "grad_norm": 0.8891453260841098, + "learning_rate": 5.824618900103258e-06, + "loss": 0.3842, + "step": 37501 + }, + { + "epoch": 0.648016311860658, + "grad_norm": 1.0372432052717062, + "learning_rate": 5.824110375826826e-06, + "loss": 0.3164, + "step": 37502 + }, + { + "epoch": 0.6480335913740669, + "grad_norm": 1.0258600115802132, + "learning_rate": 5.823601864629601e-06, + "loss": 0.3617, + "step": 37503 + }, + { + "epoch": 0.6480508708874758, + "grad_norm": 0.972921225825516, + "learning_rate": 5.823093366513168e-06, + "loss": 0.4077, + "step": 37504 + }, + { + "epoch": 0.6480681504008847, + "grad_norm": 1.3383149422069545, + "learning_rate": 5.82258488147913e-06, + "loss": 0.3743, + "step": 37505 + }, + { + "epoch": 0.6480854299142936, + "grad_norm": 0.8999550179252747, + "learning_rate": 5.822076409529067e-06, + "loss": 0.4019, + "step": 37506 + }, + { + "epoch": 0.6481027094277025, + "grad_norm": 1.0564768752021139, + "learning_rate": 5.821567950664581e-06, + "loss": 0.3159, + "step": 37507 + }, + { + "epoch": 0.6481199889411114, + "grad_norm": 1.614703946973835, + "learning_rate": 5.82105950488726e-06, + "loss": 0.4392, + "step": 37508 + }, + { + "epoch": 0.6481372684545204, + "grad_norm": 1.04006022567764, + "learning_rate": 5.820551072198702e-06, + "loss": 0.4358, + "step": 37509 + }, + { + "epoch": 0.6481545479679293, + "grad_norm": 1.2086399517389084, + "learning_rate": 5.8200426526004925e-06, + "loss": 0.4788, + "step": 37510 + }, + { + "epoch": 0.6481718274813382, + "grad_norm": 1.7086475788904798, + "learning_rate": 5.8195342460942314e-06, + "loss": 0.3646, + "step": 37511 + }, + { + "epoch": 0.6481891069947471, + "grad_norm": 1.0467552607201742, + "learning_rate": 5.819025852681501e-06, + "loss": 0.5164, + "step": 37512 + }, + { + "epoch": 0.648206386508156, + "grad_norm": 1.222135908901397, + "learning_rate": 5.818517472363901e-06, + "loss": 0.3144, + "step": 37513 + }, + { + "epoch": 0.6482236660215648, + "grad_norm": 0.9269348381173528, + "learning_rate": 5.818009105143025e-06, + "loss": 0.399, + "step": 37514 + }, + { + "epoch": 0.6482409455349737, + "grad_norm": 1.1084462536691915, + "learning_rate": 5.81750075102046e-06, + "loss": 0.435, + "step": 37515 + }, + { + "epoch": 0.6482582250483826, + "grad_norm": 1.1998079046401435, + "learning_rate": 5.816992409997803e-06, + "loss": 0.4656, + "step": 37516 + }, + { + "epoch": 0.6482755045617915, + "grad_norm": 1.0923947414466506, + "learning_rate": 5.81648408207664e-06, + "loss": 0.3532, + "step": 37517 + }, + { + "epoch": 0.6482927840752004, + "grad_norm": 0.9120030003923542, + "learning_rate": 5.815975767258566e-06, + "loss": 0.2757, + "step": 37518 + }, + { + "epoch": 0.6483100635886093, + "grad_norm": 1.0276260141707942, + "learning_rate": 5.815467465545175e-06, + "loss": 0.2778, + "step": 37519 + }, + { + "epoch": 0.6483273431020182, + "grad_norm": 0.9749984384610725, + "learning_rate": 5.814959176938062e-06, + "loss": 0.3716, + "step": 37520 + }, + { + "epoch": 0.6483446226154271, + "grad_norm": 0.8623170048782073, + "learning_rate": 5.814450901438811e-06, + "loss": 0.3996, + "step": 37521 + }, + { + "epoch": 0.648361902128836, + "grad_norm": 1.0585957647808681, + "learning_rate": 5.813942639049022e-06, + "loss": 0.4197, + "step": 37522 + }, + { + "epoch": 0.648379181642245, + "grad_norm": 1.2245360374314518, + "learning_rate": 5.813434389770278e-06, + "loss": 0.2486, + "step": 37523 + }, + { + "epoch": 0.6483964611556539, + "grad_norm": 1.4236937369827993, + "learning_rate": 5.812926153604174e-06, + "loss": 0.4416, + "step": 37524 + }, + { + "epoch": 0.6484137406690628, + "grad_norm": 0.8158954734393856, + "learning_rate": 5.81241793055231e-06, + "loss": 0.4339, + "step": 37525 + }, + { + "epoch": 0.6484310201824717, + "grad_norm": 1.045234005826037, + "learning_rate": 5.811909720616266e-06, + "loss": 0.2736, + "step": 37526 + }, + { + "epoch": 0.6484482996958806, + "grad_norm": 0.6798705395873851, + "learning_rate": 5.811401523797644e-06, + "loss": 0.3075, + "step": 37527 + }, + { + "epoch": 0.6484655792092895, + "grad_norm": 0.5387847465025875, + "learning_rate": 5.810893340098027e-06, + "loss": 0.2552, + "step": 37528 + }, + { + "epoch": 0.6484828587226984, + "grad_norm": 1.518077152769843, + "learning_rate": 5.810385169519014e-06, + "loss": 0.3997, + "step": 37529 + }, + { + "epoch": 0.6485001382361073, + "grad_norm": 0.7627649455755368, + "learning_rate": 5.809877012062184e-06, + "loss": 0.2746, + "step": 37530 + }, + { + "epoch": 0.6485174177495162, + "grad_norm": 0.802962310185449, + "learning_rate": 5.809368867729147e-06, + "loss": 0.2867, + "step": 37531 + }, + { + "epoch": 0.6485346972629251, + "grad_norm": 0.9674627892157682, + "learning_rate": 5.808860736521481e-06, + "loss": 0.2607, + "step": 37532 + }, + { + "epoch": 0.648551976776334, + "grad_norm": 1.3506919622967863, + "learning_rate": 5.808352618440785e-06, + "loss": 0.8313, + "step": 37533 + }, + { + "epoch": 0.6485692562897429, + "grad_norm": 1.3180292135505498, + "learning_rate": 5.807844513488647e-06, + "loss": 0.4017, + "step": 37534 + }, + { + "epoch": 0.6485865358031517, + "grad_norm": 0.8575496015154291, + "learning_rate": 5.807336421666656e-06, + "loss": 0.7145, + "step": 37535 + }, + { + "epoch": 0.6486038153165606, + "grad_norm": 1.479567619681503, + "learning_rate": 5.806828342976405e-06, + "loss": 0.3783, + "step": 37536 + }, + { + "epoch": 0.6486210948299695, + "grad_norm": 1.1873097718523653, + "learning_rate": 5.806320277419486e-06, + "loss": 0.2727, + "step": 37537 + }, + { + "epoch": 0.6486383743433785, + "grad_norm": 1.1140767132182459, + "learning_rate": 5.8058122249974955e-06, + "loss": 0.3584, + "step": 37538 + }, + { + "epoch": 0.6486556538567874, + "grad_norm": 1.0638416704242295, + "learning_rate": 5.8053041857120154e-06, + "loss": 0.4264, + "step": 37539 + }, + { + "epoch": 0.6486729333701963, + "grad_norm": 1.0916040701710765, + "learning_rate": 5.804796159564645e-06, + "loss": 0.5244, + "step": 37540 + }, + { + "epoch": 0.6486902128836052, + "grad_norm": 1.8418818016751115, + "learning_rate": 5.804288146556968e-06, + "loss": 0.4171, + "step": 37541 + }, + { + "epoch": 0.6487074923970141, + "grad_norm": 1.03823712767887, + "learning_rate": 5.80378014669058e-06, + "loss": 0.4507, + "step": 37542 + }, + { + "epoch": 0.648724771910423, + "grad_norm": 1.2459969818189836, + "learning_rate": 5.8032721599670705e-06, + "loss": 0.5168, + "step": 37543 + }, + { + "epoch": 0.6487420514238319, + "grad_norm": 1.3675069531304411, + "learning_rate": 5.802764186388037e-06, + "loss": 0.4471, + "step": 37544 + }, + { + "epoch": 0.6487593309372408, + "grad_norm": 1.2744445683955898, + "learning_rate": 5.802256225955063e-06, + "loss": 0.5565, + "step": 37545 + }, + { + "epoch": 0.6487766104506497, + "grad_norm": 0.963719701596223, + "learning_rate": 5.801748278669739e-06, + "loss": 0.3637, + "step": 37546 + }, + { + "epoch": 0.6487938899640586, + "grad_norm": 1.005051118868316, + "learning_rate": 5.801240344533658e-06, + "loss": 0.4085, + "step": 37547 + }, + { + "epoch": 0.6488111694774675, + "grad_norm": 1.3831233495741253, + "learning_rate": 5.800732423548412e-06, + "loss": 0.5756, + "step": 37548 + }, + { + "epoch": 0.6488284489908764, + "grad_norm": 0.8339141194351204, + "learning_rate": 5.800224515715594e-06, + "loss": 0.2836, + "step": 37549 + }, + { + "epoch": 0.6488457285042853, + "grad_norm": 1.6900826987082276, + "learning_rate": 5.799716621036788e-06, + "loss": 0.3906, + "step": 37550 + }, + { + "epoch": 0.6488630080176943, + "grad_norm": 2.496796301439819, + "learning_rate": 5.799208739513593e-06, + "loss": 0.5077, + "step": 37551 + }, + { + "epoch": 0.6488802875311032, + "grad_norm": 1.1444379558405748, + "learning_rate": 5.7987008711475925e-06, + "loss": 0.4632, + "step": 37552 + }, + { + "epoch": 0.6488975670445121, + "grad_norm": 1.0819501131579365, + "learning_rate": 5.798193015940379e-06, + "loss": 0.4751, + "step": 37553 + }, + { + "epoch": 0.648914846557921, + "grad_norm": 1.342156774307719, + "learning_rate": 5.797685173893549e-06, + "loss": 0.3774, + "step": 37554 + }, + { + "epoch": 0.6489321260713299, + "grad_norm": 1.0283491387341488, + "learning_rate": 5.797177345008684e-06, + "loss": 0.3794, + "step": 37555 + }, + { + "epoch": 0.6489494055847387, + "grad_norm": 0.6736770916633926, + "learning_rate": 5.7966695292873845e-06, + "loss": 0.5916, + "step": 37556 + }, + { + "epoch": 0.6489666850981476, + "grad_norm": 1.254414837379033, + "learning_rate": 5.796161726731229e-06, + "loss": 0.3446, + "step": 37557 + }, + { + "epoch": 0.6489839646115565, + "grad_norm": 1.09497754020023, + "learning_rate": 5.795653937341816e-06, + "loss": 0.4339, + "step": 37558 + }, + { + "epoch": 0.6490012441249654, + "grad_norm": 1.0867940777503844, + "learning_rate": 5.795146161120733e-06, + "loss": 0.2714, + "step": 37559 + }, + { + "epoch": 0.6490185236383743, + "grad_norm": 1.2824262170653775, + "learning_rate": 5.794638398069577e-06, + "loss": 0.3931, + "step": 37560 + }, + { + "epoch": 0.6490358031517832, + "grad_norm": 0.9123617023367895, + "learning_rate": 5.794130648189928e-06, + "loss": 0.5171, + "step": 37561 + }, + { + "epoch": 0.6490530826651921, + "grad_norm": 0.8207095940114909, + "learning_rate": 5.793622911483388e-06, + "loss": 0.5067, + "step": 37562 + }, + { + "epoch": 0.649070362178601, + "grad_norm": 1.4949049070736808, + "learning_rate": 5.7931151879515346e-06, + "loss": 0.4158, + "step": 37563 + }, + { + "epoch": 0.6490876416920099, + "grad_norm": 1.333712402140499, + "learning_rate": 5.792607477595964e-06, + "loss": 0.5934, + "step": 37564 + }, + { + "epoch": 0.6491049212054188, + "grad_norm": 1.9849618850659128, + "learning_rate": 5.792099780418272e-06, + "loss": 0.5576, + "step": 37565 + }, + { + "epoch": 0.6491222007188278, + "grad_norm": 1.2081522564907528, + "learning_rate": 5.791592096420038e-06, + "loss": 0.3192, + "step": 37566 + }, + { + "epoch": 0.6491394802322367, + "grad_norm": 1.0421249062267057, + "learning_rate": 5.791084425602862e-06, + "loss": 0.3327, + "step": 37567 + }, + { + "epoch": 0.6491567597456456, + "grad_norm": 0.7007939953769599, + "learning_rate": 5.790576767968325e-06, + "loss": 0.2388, + "step": 37568 + }, + { + "epoch": 0.6491740392590545, + "grad_norm": 0.7280405002053959, + "learning_rate": 5.790069123518025e-06, + "loss": 0.4369, + "step": 37569 + }, + { + "epoch": 0.6491913187724634, + "grad_norm": 0.8740279631976213, + "learning_rate": 5.789561492253542e-06, + "loss": 0.3102, + "step": 37570 + }, + { + "epoch": 0.6492085982858723, + "grad_norm": 1.3407584717701349, + "learning_rate": 5.789053874176479e-06, + "loss": 0.3816, + "step": 37571 + }, + { + "epoch": 0.6492258777992812, + "grad_norm": 0.9941787662117335, + "learning_rate": 5.788546269288415e-06, + "loss": 0.3383, + "step": 37572 + }, + { + "epoch": 0.6492431573126901, + "grad_norm": 0.674041449968878, + "learning_rate": 5.78803867759095e-06, + "loss": 0.3647, + "step": 37573 + }, + { + "epoch": 0.649260436826099, + "grad_norm": 0.9618606161173412, + "learning_rate": 5.7875310990856625e-06, + "loss": 0.4576, + "step": 37574 + }, + { + "epoch": 0.6492777163395079, + "grad_norm": 1.3142604487629934, + "learning_rate": 5.787023533774152e-06, + "loss": 0.3221, + "step": 37575 + }, + { + "epoch": 0.6492949958529168, + "grad_norm": 1.3846531284808663, + "learning_rate": 5.7865159816580004e-06, + "loss": 0.3186, + "step": 37576 + }, + { + "epoch": 0.6493122753663256, + "grad_norm": 0.8744159948079605, + "learning_rate": 5.7860084427388e-06, + "loss": 0.3735, + "step": 37577 + }, + { + "epoch": 0.6493295548797345, + "grad_norm": 0.8906229165461025, + "learning_rate": 5.785500917018147e-06, + "loss": 0.3942, + "step": 37578 + }, + { + "epoch": 0.6493468343931434, + "grad_norm": 0.7407606873883587, + "learning_rate": 5.78499340449762e-06, + "loss": 0.4744, + "step": 37579 + }, + { + "epoch": 0.6493641139065524, + "grad_norm": 0.9633767783462616, + "learning_rate": 5.784485905178818e-06, + "loss": 0.4001, + "step": 37580 + }, + { + "epoch": 0.6493813934199613, + "grad_norm": 0.9316167346896602, + "learning_rate": 5.783978419063323e-06, + "loss": 0.3051, + "step": 37581 + }, + { + "epoch": 0.6493986729333702, + "grad_norm": 0.9427357659315403, + "learning_rate": 5.7834709461527286e-06, + "loss": 0.33, + "step": 37582 + }, + { + "epoch": 0.6494159524467791, + "grad_norm": 0.7917606259397523, + "learning_rate": 5.782963486448623e-06, + "loss": 0.2979, + "step": 37583 + }, + { + "epoch": 0.649433231960188, + "grad_norm": 1.0362640693794085, + "learning_rate": 5.7824560399526e-06, + "loss": 0.4023, + "step": 37584 + }, + { + "epoch": 0.6494505114735969, + "grad_norm": 1.3441129184280047, + "learning_rate": 5.781948606666246e-06, + "loss": 0.3023, + "step": 37585 + }, + { + "epoch": 0.6494677909870058, + "grad_norm": 1.1732179936917646, + "learning_rate": 5.781441186591144e-06, + "loss": 0.2581, + "step": 37586 + }, + { + "epoch": 0.6494850705004147, + "grad_norm": 0.9841718822129922, + "learning_rate": 5.78093377972889e-06, + "loss": 0.3432, + "step": 37587 + }, + { + "epoch": 0.6495023500138236, + "grad_norm": 1.2833167314279272, + "learning_rate": 5.780426386081072e-06, + "loss": 0.3573, + "step": 37588 + }, + { + "epoch": 0.6495196295272325, + "grad_norm": 0.8948024474310181, + "learning_rate": 5.779919005649283e-06, + "loss": 0.3533, + "step": 37589 + }, + { + "epoch": 0.6495369090406414, + "grad_norm": 0.9926737950157629, + "learning_rate": 5.779411638435104e-06, + "loss": 0.5266, + "step": 37590 + }, + { + "epoch": 0.6495541885540503, + "grad_norm": 1.2669705021950508, + "learning_rate": 5.778904284440133e-06, + "loss": 0.4866, + "step": 37591 + }, + { + "epoch": 0.6495714680674592, + "grad_norm": 1.195083397631046, + "learning_rate": 5.77839694366595e-06, + "loss": 0.3468, + "step": 37592 + }, + { + "epoch": 0.6495887475808682, + "grad_norm": 0.9435570756139205, + "learning_rate": 5.777889616114149e-06, + "loss": 0.3304, + "step": 37593 + }, + { + "epoch": 0.6496060270942771, + "grad_norm": 1.4891719264650518, + "learning_rate": 5.777382301786319e-06, + "loss": 0.3349, + "step": 37594 + }, + { + "epoch": 0.649623306607686, + "grad_norm": 1.180664738283059, + "learning_rate": 5.7768750006840524e-06, + "loss": 0.2277, + "step": 37595 + }, + { + "epoch": 0.6496405861210949, + "grad_norm": 0.8097019676632293, + "learning_rate": 5.776367712808934e-06, + "loss": 0.2785, + "step": 37596 + }, + { + "epoch": 0.6496578656345038, + "grad_norm": 1.085631048960984, + "learning_rate": 5.77586043816255e-06, + "loss": 0.3007, + "step": 37597 + }, + { + "epoch": 0.6496751451479126, + "grad_norm": 1.2101947559865518, + "learning_rate": 5.7753531767464895e-06, + "loss": 0.4269, + "step": 37598 + }, + { + "epoch": 0.6496924246613215, + "grad_norm": 1.3427943119300187, + "learning_rate": 5.774845928562347e-06, + "loss": 0.501, + "step": 37599 + }, + { + "epoch": 0.6497097041747304, + "grad_norm": 1.2064783531091932, + "learning_rate": 5.77433869361171e-06, + "loss": 0.378, + "step": 37600 + }, + { + "epoch": 0.6497269836881393, + "grad_norm": 1.2962833242732168, + "learning_rate": 5.773831471896162e-06, + "loss": 0.4267, + "step": 37601 + }, + { + "epoch": 0.6497442632015482, + "grad_norm": 1.4828164563093933, + "learning_rate": 5.773324263417299e-06, + "loss": 0.3486, + "step": 37602 + }, + { + "epoch": 0.6497615427149571, + "grad_norm": 0.7993558663302015, + "learning_rate": 5.772817068176702e-06, + "loss": 0.4244, + "step": 37603 + }, + { + "epoch": 0.649778822228366, + "grad_norm": 1.5864570864281815, + "learning_rate": 5.772309886175962e-06, + "loss": 0.246, + "step": 37604 + }, + { + "epoch": 0.6497961017417749, + "grad_norm": 0.8851143331391844, + "learning_rate": 5.77180271741667e-06, + "loss": 0.3724, + "step": 37605 + }, + { + "epoch": 0.6498133812551838, + "grad_norm": 1.2520951100286628, + "learning_rate": 5.771295561900416e-06, + "loss": 0.3625, + "step": 37606 + }, + { + "epoch": 0.6498306607685927, + "grad_norm": 1.2822638084122808, + "learning_rate": 5.770788419628788e-06, + "loss": 0.5783, + "step": 37607 + }, + { + "epoch": 0.6498479402820017, + "grad_norm": 1.0204715097403845, + "learning_rate": 5.7702812906033654e-06, + "loss": 0.3863, + "step": 37608 + }, + { + "epoch": 0.6498652197954106, + "grad_norm": 1.3417532758223603, + "learning_rate": 5.769774174825749e-06, + "loss": 0.3858, + "step": 37609 + }, + { + "epoch": 0.6498824993088195, + "grad_norm": 0.7498433433645697, + "learning_rate": 5.769267072297513e-06, + "loss": 0.3644, + "step": 37610 + }, + { + "epoch": 0.6498997788222284, + "grad_norm": 1.1627071024075448, + "learning_rate": 5.768759983020263e-06, + "loss": 0.3623, + "step": 37611 + }, + { + "epoch": 0.6499170583356373, + "grad_norm": 1.9271358225992263, + "learning_rate": 5.7682529069955716e-06, + "loss": 0.4045, + "step": 37612 + }, + { + "epoch": 0.6499343378490462, + "grad_norm": 1.7763108134599568, + "learning_rate": 5.7677458442250386e-06, + "loss": 0.384, + "step": 37613 + }, + { + "epoch": 0.6499516173624551, + "grad_norm": 1.1317591669242497, + "learning_rate": 5.767238794710245e-06, + "loss": 0.4135, + "step": 37614 + }, + { + "epoch": 0.649968896875864, + "grad_norm": 0.7503717774263705, + "learning_rate": 5.7667317584527836e-06, + "loss": 0.6086, + "step": 37615 + }, + { + "epoch": 0.6499861763892729, + "grad_norm": 0.7479376488718137, + "learning_rate": 5.766224735454237e-06, + "loss": 0.456, + "step": 37616 + }, + { + "epoch": 0.6500034559026818, + "grad_norm": 0.8973411977582778, + "learning_rate": 5.7657177257161955e-06, + "loss": 0.2935, + "step": 37617 + }, + { + "epoch": 0.6500207354160907, + "grad_norm": 1.3862098682055906, + "learning_rate": 5.765210729240251e-06, + "loss": 0.3086, + "step": 37618 + }, + { + "epoch": 0.6500380149294995, + "grad_norm": 1.0200988573140213, + "learning_rate": 5.764703746027987e-06, + "loss": 0.296, + "step": 37619 + }, + { + "epoch": 0.6500552944429084, + "grad_norm": 1.2390047017457826, + "learning_rate": 5.764196776080995e-06, + "loss": 0.3195, + "step": 37620 + }, + { + "epoch": 0.6500725739563173, + "grad_norm": 1.1219178274195394, + "learning_rate": 5.7636898194008575e-06, + "loss": 0.5049, + "step": 37621 + }, + { + "epoch": 0.6500898534697263, + "grad_norm": 1.2599164178544995, + "learning_rate": 5.763182875989164e-06, + "loss": 0.5928, + "step": 37622 + }, + { + "epoch": 0.6501071329831352, + "grad_norm": 0.915699748298645, + "learning_rate": 5.762675945847505e-06, + "loss": 0.3331, + "step": 37623 + }, + { + "epoch": 0.6501244124965441, + "grad_norm": 0.8600761841489715, + "learning_rate": 5.762169028977469e-06, + "loss": 0.2254, + "step": 37624 + }, + { + "epoch": 0.650141692009953, + "grad_norm": 1.3016417532423528, + "learning_rate": 5.7616621253806405e-06, + "loss": 0.4469, + "step": 37625 + }, + { + "epoch": 0.6501589715233619, + "grad_norm": 0.9819074380814007, + "learning_rate": 5.761155235058611e-06, + "loss": 0.4298, + "step": 37626 + }, + { + "epoch": 0.6501762510367708, + "grad_norm": 0.7841768148161006, + "learning_rate": 5.760648358012961e-06, + "loss": 0.4004, + "step": 37627 + }, + { + "epoch": 0.6501935305501797, + "grad_norm": 1.5897036012032444, + "learning_rate": 5.760141494245283e-06, + "loss": 0.4126, + "step": 37628 + }, + { + "epoch": 0.6502108100635886, + "grad_norm": 1.284021802857965, + "learning_rate": 5.759634643757168e-06, + "loss": 0.527, + "step": 37629 + }, + { + "epoch": 0.6502280895769975, + "grad_norm": 0.9990540364443099, + "learning_rate": 5.759127806550195e-06, + "loss": 0.3719, + "step": 37630 + }, + { + "epoch": 0.6502453690904064, + "grad_norm": 1.6928559531164262, + "learning_rate": 5.75862098262596e-06, + "loss": 0.3974, + "step": 37631 + }, + { + "epoch": 0.6502626486038153, + "grad_norm": 1.6132904485217017, + "learning_rate": 5.758114171986044e-06, + "loss": 0.328, + "step": 37632 + }, + { + "epoch": 0.6502799281172242, + "grad_norm": 0.8044140997929556, + "learning_rate": 5.757607374632035e-06, + "loss": 0.4018, + "step": 37633 + }, + { + "epoch": 0.6502972076306331, + "grad_norm": 1.21584683057325, + "learning_rate": 5.757100590565523e-06, + "loss": 0.5384, + "step": 37634 + }, + { + "epoch": 0.650314487144042, + "grad_norm": 1.0008553736568016, + "learning_rate": 5.756593819788099e-06, + "loss": 0.316, + "step": 37635 + }, + { + "epoch": 0.650331766657451, + "grad_norm": 1.571634168083854, + "learning_rate": 5.75608706230134e-06, + "loss": 0.4412, + "step": 37636 + }, + { + "epoch": 0.6503490461708599, + "grad_norm": 0.7408924931647489, + "learning_rate": 5.7555803181068434e-06, + "loss": 0.2947, + "step": 37637 + }, + { + "epoch": 0.6503663256842688, + "grad_norm": 1.2798463328558978, + "learning_rate": 5.755073587206188e-06, + "loss": 0.5313, + "step": 37638 + }, + { + "epoch": 0.6503836051976777, + "grad_norm": 0.5596711787877621, + "learning_rate": 5.754566869600966e-06, + "loss": 0.4265, + "step": 37639 + }, + { + "epoch": 0.6504008847110866, + "grad_norm": 1.258899773446465, + "learning_rate": 5.754060165292765e-06, + "loss": 0.3719, + "step": 37640 + }, + { + "epoch": 0.6504181642244954, + "grad_norm": 0.9942487193113902, + "learning_rate": 5.753553474283168e-06, + "loss": 0.3255, + "step": 37641 + }, + { + "epoch": 0.6504354437379043, + "grad_norm": 0.6360957295018577, + "learning_rate": 5.753046796573767e-06, + "loss": 0.4099, + "step": 37642 + }, + { + "epoch": 0.6504527232513132, + "grad_norm": 0.8793445039876014, + "learning_rate": 5.752540132166142e-06, + "loss": 0.4972, + "step": 37643 + }, + { + "epoch": 0.6504700027647221, + "grad_norm": 1.3446327613098987, + "learning_rate": 5.752033481061885e-06, + "loss": 0.3249, + "step": 37644 + }, + { + "epoch": 0.650487282278131, + "grad_norm": 1.5317782734523835, + "learning_rate": 5.751526843262583e-06, + "loss": 0.4935, + "step": 37645 + }, + { + "epoch": 0.6505045617915399, + "grad_norm": 1.010974271367287, + "learning_rate": 5.751020218769824e-06, + "loss": 0.2386, + "step": 37646 + }, + { + "epoch": 0.6505218413049488, + "grad_norm": 1.5392564907695032, + "learning_rate": 5.750513607585193e-06, + "loss": 0.3483, + "step": 37647 + }, + { + "epoch": 0.6505391208183577, + "grad_norm": 0.7918109831940477, + "learning_rate": 5.750007009710272e-06, + "loss": 0.3889, + "step": 37648 + }, + { + "epoch": 0.6505564003317666, + "grad_norm": 1.1475813031795157, + "learning_rate": 5.749500425146657e-06, + "loss": 0.3536, + "step": 37649 + }, + { + "epoch": 0.6505736798451756, + "grad_norm": 1.6509262407610379, + "learning_rate": 5.748993853895921e-06, + "loss": 0.4469, + "step": 37650 + }, + { + "epoch": 0.6505909593585845, + "grad_norm": 0.9101330133168082, + "learning_rate": 5.748487295959668e-06, + "loss": 0.3296, + "step": 37651 + }, + { + "epoch": 0.6506082388719934, + "grad_norm": 1.6093659619664156, + "learning_rate": 5.747980751339471e-06, + "loss": 0.2879, + "step": 37652 + }, + { + "epoch": 0.6506255183854023, + "grad_norm": 1.1047077454141163, + "learning_rate": 5.747474220036926e-06, + "loss": 0.3152, + "step": 37653 + }, + { + "epoch": 0.6506427978988112, + "grad_norm": 1.2164131488570744, + "learning_rate": 5.746967702053612e-06, + "loss": 0.4468, + "step": 37654 + }, + { + "epoch": 0.6506600774122201, + "grad_norm": 1.1737182806663677, + "learning_rate": 5.74646119739112e-06, + "loss": 0.3747, + "step": 37655 + }, + { + "epoch": 0.650677356925629, + "grad_norm": 1.51328141940634, + "learning_rate": 5.745954706051029e-06, + "loss": 0.3323, + "step": 37656 + }, + { + "epoch": 0.6506946364390379, + "grad_norm": 0.916624193647565, + "learning_rate": 5.745448228034938e-06, + "loss": 0.3526, + "step": 37657 + }, + { + "epoch": 0.6507119159524468, + "grad_norm": 0.8728303813822719, + "learning_rate": 5.744941763344426e-06, + "loss": 0.405, + "step": 37658 + }, + { + "epoch": 0.6507291954658557, + "grad_norm": 1.4880485475148593, + "learning_rate": 5.744435311981076e-06, + "loss": 0.3717, + "step": 37659 + }, + { + "epoch": 0.6507464749792646, + "grad_norm": 1.0852338002756905, + "learning_rate": 5.7439288739464824e-06, + "loss": 0.3277, + "step": 37660 + }, + { + "epoch": 0.6507637544926735, + "grad_norm": 1.0511748769523284, + "learning_rate": 5.743422449242224e-06, + "loss": 0.3652, + "step": 37661 + }, + { + "epoch": 0.6507810340060823, + "grad_norm": 1.1367105613994868, + "learning_rate": 5.742916037869888e-06, + "loss": 0.3883, + "step": 37662 + }, + { + "epoch": 0.6507983135194912, + "grad_norm": 1.2911466443135444, + "learning_rate": 5.742409639831063e-06, + "loss": 0.2934, + "step": 37663 + }, + { + "epoch": 0.6508155930329002, + "grad_norm": 0.8806373225170466, + "learning_rate": 5.74190325512734e-06, + "loss": 0.5451, + "step": 37664 + }, + { + "epoch": 0.6508328725463091, + "grad_norm": 1.101133157903728, + "learning_rate": 5.7413968837602955e-06, + "loss": 0.3709, + "step": 37665 + }, + { + "epoch": 0.650850152059718, + "grad_norm": 1.2815726750055527, + "learning_rate": 5.740890525731522e-06, + "loss": 0.3973, + "step": 37666 + }, + { + "epoch": 0.6508674315731269, + "grad_norm": 1.5561477106565387, + "learning_rate": 5.7403841810426e-06, + "loss": 0.4655, + "step": 37667 + }, + { + "epoch": 0.6508847110865358, + "grad_norm": 1.0599753390226008, + "learning_rate": 5.739877849695118e-06, + "loss": 0.233, + "step": 37668 + }, + { + "epoch": 0.6509019905999447, + "grad_norm": 1.2719770085642756, + "learning_rate": 5.739371531690667e-06, + "loss": 0.4368, + "step": 37669 + }, + { + "epoch": 0.6509192701133536, + "grad_norm": 0.7758008393396943, + "learning_rate": 5.738865227030823e-06, + "loss": 0.3293, + "step": 37670 + }, + { + "epoch": 0.6509365496267625, + "grad_norm": 1.2010409210944177, + "learning_rate": 5.738358935717182e-06, + "loss": 0.4236, + "step": 37671 + }, + { + "epoch": 0.6509538291401714, + "grad_norm": 1.3062516457766626, + "learning_rate": 5.73785265775132e-06, + "loss": 0.5121, + "step": 37672 + }, + { + "epoch": 0.6509711086535803, + "grad_norm": 0.8256146960197956, + "learning_rate": 5.737346393134828e-06, + "loss": 0.5173, + "step": 37673 + }, + { + "epoch": 0.6509883881669892, + "grad_norm": 1.5776651297344664, + "learning_rate": 5.736840141869291e-06, + "loss": 0.4757, + "step": 37674 + }, + { + "epoch": 0.6510056676803981, + "grad_norm": 1.5078070382261775, + "learning_rate": 5.7363339039563e-06, + "loss": 0.3552, + "step": 37675 + }, + { + "epoch": 0.651022947193807, + "grad_norm": 1.2284814387914642, + "learning_rate": 5.735827679397427e-06, + "loss": 0.5503, + "step": 37676 + }, + { + "epoch": 0.651040226707216, + "grad_norm": 1.1573147238980062, + "learning_rate": 5.735321468194274e-06, + "loss": 0.3566, + "step": 37677 + }, + { + "epoch": 0.6510575062206249, + "grad_norm": 0.8622848895376115, + "learning_rate": 5.734815270348413e-06, + "loss": 0.2786, + "step": 37678 + }, + { + "epoch": 0.6510747857340338, + "grad_norm": 1.209703728914931, + "learning_rate": 5.7343090858614335e-06, + "loss": 0.412, + "step": 37679 + }, + { + "epoch": 0.6510920652474427, + "grad_norm": 0.8908572230674991, + "learning_rate": 5.7338029147349275e-06, + "loss": 0.3786, + "step": 37680 + }, + { + "epoch": 0.6511093447608516, + "grad_norm": 2.1002519314763517, + "learning_rate": 5.73329675697047e-06, + "loss": 0.4679, + "step": 37681 + }, + { + "epoch": 0.6511266242742605, + "grad_norm": 1.0985407130264253, + "learning_rate": 5.7327906125696566e-06, + "loss": 0.3276, + "step": 37682 + }, + { + "epoch": 0.6511439037876693, + "grad_norm": 1.0362016282237494, + "learning_rate": 5.732284481534063e-06, + "loss": 0.4945, + "step": 37683 + }, + { + "epoch": 0.6511611833010782, + "grad_norm": 1.1801355476748712, + "learning_rate": 5.731778363865278e-06, + "loss": 0.2828, + "step": 37684 + }, + { + "epoch": 0.6511784628144871, + "grad_norm": 1.578174134021795, + "learning_rate": 5.731272259564889e-06, + "loss": 0.3662, + "step": 37685 + }, + { + "epoch": 0.651195742327896, + "grad_norm": 1.2302137653565255, + "learning_rate": 5.7307661686344805e-06, + "loss": 0.3113, + "step": 37686 + }, + { + "epoch": 0.6512130218413049, + "grad_norm": 0.8402646877588689, + "learning_rate": 5.730260091075636e-06, + "loss": 0.5501, + "step": 37687 + }, + { + "epoch": 0.6512303013547138, + "grad_norm": 0.8568836833007237, + "learning_rate": 5.729754026889944e-06, + "loss": 0.4557, + "step": 37688 + }, + { + "epoch": 0.6512475808681227, + "grad_norm": 0.8490084669351718, + "learning_rate": 5.729247976078986e-06, + "loss": 0.3511, + "step": 37689 + }, + { + "epoch": 0.6512648603815316, + "grad_norm": 1.1115612244359367, + "learning_rate": 5.728741938644342e-06, + "loss": 0.3254, + "step": 37690 + }, + { + "epoch": 0.6512821398949405, + "grad_norm": 1.0886233979686242, + "learning_rate": 5.72823591458761e-06, + "loss": 0.5393, + "step": 37691 + }, + { + "epoch": 0.6512994194083495, + "grad_norm": 1.395960654504523, + "learning_rate": 5.727729903910364e-06, + "loss": 0.3655, + "step": 37692 + }, + { + "epoch": 0.6513166989217584, + "grad_norm": 1.1074448243675354, + "learning_rate": 5.727223906614196e-06, + "loss": 0.4285, + "step": 37693 + }, + { + "epoch": 0.6513339784351673, + "grad_norm": 1.8641621341864092, + "learning_rate": 5.7267179227006845e-06, + "loss": 0.4933, + "step": 37694 + }, + { + "epoch": 0.6513512579485762, + "grad_norm": 1.3026264714863038, + "learning_rate": 5.726211952171421e-06, + "loss": 0.4324, + "step": 37695 + }, + { + "epoch": 0.6513685374619851, + "grad_norm": 1.4397965169367062, + "learning_rate": 5.7257059950279766e-06, + "loss": 0.2946, + "step": 37696 + }, + { + "epoch": 0.651385816975394, + "grad_norm": 0.8245507537376127, + "learning_rate": 5.725200051271955e-06, + "loss": 0.385, + "step": 37697 + }, + { + "epoch": 0.6514030964888029, + "grad_norm": 1.1280266234696936, + "learning_rate": 5.724694120904929e-06, + "loss": 0.2693, + "step": 37698 + }, + { + "epoch": 0.6514203760022118, + "grad_norm": 0.8603980378050885, + "learning_rate": 5.724188203928489e-06, + "loss": 0.4059, + "step": 37699 + }, + { + "epoch": 0.6514376555156207, + "grad_norm": 1.0679947595793746, + "learning_rate": 5.723682300344215e-06, + "loss": 0.413, + "step": 37700 + }, + { + "epoch": 0.6514549350290296, + "grad_norm": 0.716253991574906, + "learning_rate": 5.723176410153691e-06, + "loss": 0.7374, + "step": 37701 + }, + { + "epoch": 0.6514722145424385, + "grad_norm": 1.3947614565935558, + "learning_rate": 5.722670533358503e-06, + "loss": 0.4424, + "step": 37702 + }, + { + "epoch": 0.6514894940558474, + "grad_norm": 1.0725940419813087, + "learning_rate": 5.722164669960236e-06, + "loss": 0.6451, + "step": 37703 + }, + { + "epoch": 0.6515067735692562, + "grad_norm": 1.2211279195553686, + "learning_rate": 5.721658819960478e-06, + "loss": 0.4095, + "step": 37704 + }, + { + "epoch": 0.6515240530826651, + "grad_norm": 0.6655421578162865, + "learning_rate": 5.721152983360806e-06, + "loss": 0.6961, + "step": 37705 + }, + { + "epoch": 0.651541332596074, + "grad_norm": 1.973702880781361, + "learning_rate": 5.720647160162812e-06, + "loss": 0.259, + "step": 37706 + }, + { + "epoch": 0.651558612109483, + "grad_norm": 1.5671876518883805, + "learning_rate": 5.720141350368072e-06, + "loss": 0.471, + "step": 37707 + }, + { + "epoch": 0.6515758916228919, + "grad_norm": 0.808779764683718, + "learning_rate": 5.719635553978176e-06, + "loss": 0.4061, + "step": 37708 + }, + { + "epoch": 0.6515931711363008, + "grad_norm": 1.1413952664305032, + "learning_rate": 5.719129770994709e-06, + "loss": 0.6436, + "step": 37709 + }, + { + "epoch": 0.6516104506497097, + "grad_norm": 1.3409475457153248, + "learning_rate": 5.718624001419248e-06, + "loss": 0.3836, + "step": 37710 + }, + { + "epoch": 0.6516277301631186, + "grad_norm": 0.7701758029114251, + "learning_rate": 5.718118245253388e-06, + "loss": 0.9061, + "step": 37711 + }, + { + "epoch": 0.6516450096765275, + "grad_norm": 1.072433345452009, + "learning_rate": 5.7176125024987024e-06, + "loss": 0.3235, + "step": 37712 + }, + { + "epoch": 0.6516622891899364, + "grad_norm": 1.2464105924873925, + "learning_rate": 5.7171067731567805e-06, + "loss": 0.3812, + "step": 37713 + }, + { + "epoch": 0.6516795687033453, + "grad_norm": 1.0516817120612465, + "learning_rate": 5.716601057229205e-06, + "loss": 0.2368, + "step": 37714 + }, + { + "epoch": 0.6516968482167542, + "grad_norm": 0.6661115595954673, + "learning_rate": 5.716095354717566e-06, + "loss": 0.7639, + "step": 37715 + }, + { + "epoch": 0.6517141277301631, + "grad_norm": 1.0149658149874927, + "learning_rate": 5.715589665623436e-06, + "loss": 0.6202, + "step": 37716 + }, + { + "epoch": 0.651731407243572, + "grad_norm": 1.1491808849562255, + "learning_rate": 5.715083989948409e-06, + "loss": 0.4277, + "step": 37717 + }, + { + "epoch": 0.6517486867569809, + "grad_norm": 0.7425891985942812, + "learning_rate": 5.714578327694061e-06, + "loss": 0.3544, + "step": 37718 + }, + { + "epoch": 0.6517659662703899, + "grad_norm": 1.1976111393643145, + "learning_rate": 5.714072678861979e-06, + "loss": 0.5389, + "step": 37719 + }, + { + "epoch": 0.6517832457837988, + "grad_norm": 1.708331821226734, + "learning_rate": 5.713567043453752e-06, + "loss": 0.5926, + "step": 37720 + }, + { + "epoch": 0.6518005252972077, + "grad_norm": 1.3449628431165221, + "learning_rate": 5.713061421470954e-06, + "loss": 0.427, + "step": 37721 + }, + { + "epoch": 0.6518178048106166, + "grad_norm": 0.6711492407776991, + "learning_rate": 5.712555812915177e-06, + "loss": 0.6676, + "step": 37722 + }, + { + "epoch": 0.6518350843240255, + "grad_norm": 1.3114235514216033, + "learning_rate": 5.712050217787999e-06, + "loss": 0.3655, + "step": 37723 + }, + { + "epoch": 0.6518523638374344, + "grad_norm": 1.2087912018080789, + "learning_rate": 5.7115446360910035e-06, + "loss": 0.5508, + "step": 37724 + }, + { + "epoch": 0.6518696433508432, + "grad_norm": 1.1283966748296193, + "learning_rate": 5.711039067825776e-06, + "loss": 0.4067, + "step": 37725 + }, + { + "epoch": 0.6518869228642521, + "grad_norm": 0.8151475718394765, + "learning_rate": 5.710533512993905e-06, + "loss": 0.2553, + "step": 37726 + }, + { + "epoch": 0.651904202377661, + "grad_norm": 2.3943071489062095, + "learning_rate": 5.710027971596965e-06, + "loss": 0.3603, + "step": 37727 + }, + { + "epoch": 0.6519214818910699, + "grad_norm": 1.685738869166196, + "learning_rate": 5.709522443636548e-06, + "loss": 0.9885, + "step": 37728 + }, + { + "epoch": 0.6519387614044788, + "grad_norm": 1.316884130345934, + "learning_rate": 5.709016929114233e-06, + "loss": 0.3537, + "step": 37729 + }, + { + "epoch": 0.6519560409178877, + "grad_norm": 1.1462635787593465, + "learning_rate": 5.7085114280315935e-06, + "loss": 0.2911, + "step": 37730 + }, + { + "epoch": 0.6519733204312966, + "grad_norm": 0.929783618869485, + "learning_rate": 5.708005940390231e-06, + "loss": 0.4326, + "step": 37731 + }, + { + "epoch": 0.6519905999447055, + "grad_norm": 0.8193513366783891, + "learning_rate": 5.7075004661917155e-06, + "loss": 0.3725, + "step": 37732 + }, + { + "epoch": 0.6520078794581144, + "grad_norm": 0.9818443242363586, + "learning_rate": 5.70699500543764e-06, + "loss": 0.4543, + "step": 37733 + }, + { + "epoch": 0.6520251589715234, + "grad_norm": 1.3069493275085102, + "learning_rate": 5.706489558129577e-06, + "loss": 0.2015, + "step": 37734 + }, + { + "epoch": 0.6520424384849323, + "grad_norm": 0.9760256563426978, + "learning_rate": 5.705984124269121e-06, + "loss": 0.2208, + "step": 37735 + }, + { + "epoch": 0.6520597179983412, + "grad_norm": 1.3068529614359252, + "learning_rate": 5.705478703857839e-06, + "loss": 0.2959, + "step": 37736 + }, + { + "epoch": 0.6520769975117501, + "grad_norm": 1.0616372173777908, + "learning_rate": 5.704973296897333e-06, + "loss": 0.4284, + "step": 37737 + }, + { + "epoch": 0.652094277025159, + "grad_norm": 1.7260149811591783, + "learning_rate": 5.704467903389172e-06, + "loss": 0.5466, + "step": 37738 + }, + { + "epoch": 0.6521115565385679, + "grad_norm": 0.6266246637948003, + "learning_rate": 5.7039625233349495e-06, + "loss": 0.2576, + "step": 37739 + }, + { + "epoch": 0.6521288360519768, + "grad_norm": 0.7000501726246418, + "learning_rate": 5.703457156736243e-06, + "loss": 0.3024, + "step": 37740 + }, + { + "epoch": 0.6521461155653857, + "grad_norm": 1.8341249849901047, + "learning_rate": 5.702951803594631e-06, + "loss": 0.3776, + "step": 37741 + }, + { + "epoch": 0.6521633950787946, + "grad_norm": 1.4286675342106554, + "learning_rate": 5.702446463911699e-06, + "loss": 0.5072, + "step": 37742 + }, + { + "epoch": 0.6521806745922035, + "grad_norm": 1.0092602577206995, + "learning_rate": 5.701941137689033e-06, + "loss": 0.5422, + "step": 37743 + }, + { + "epoch": 0.6521979541056124, + "grad_norm": 1.4068333516923544, + "learning_rate": 5.7014358249282165e-06, + "loss": 0.3252, + "step": 37744 + }, + { + "epoch": 0.6522152336190213, + "grad_norm": 0.8282834305327694, + "learning_rate": 5.700930525630828e-06, + "loss": 0.3677, + "step": 37745 + }, + { + "epoch": 0.6522325131324301, + "grad_norm": 1.793211176269289, + "learning_rate": 5.7004252397984525e-06, + "loss": 0.3895, + "step": 37746 + }, + { + "epoch": 0.652249792645839, + "grad_norm": 1.3192752412075293, + "learning_rate": 5.69991996743267e-06, + "loss": 0.5511, + "step": 37747 + }, + { + "epoch": 0.652267072159248, + "grad_norm": 0.7595786838301529, + "learning_rate": 5.699414708535065e-06, + "loss": 0.4656, + "step": 37748 + }, + { + "epoch": 0.6522843516726569, + "grad_norm": 1.4229919732207057, + "learning_rate": 5.698909463107221e-06, + "loss": 0.3834, + "step": 37749 + }, + { + "epoch": 0.6523016311860658, + "grad_norm": 1.1402161361095358, + "learning_rate": 5.69840423115072e-06, + "loss": 0.5338, + "step": 37750 + }, + { + "epoch": 0.6523189106994747, + "grad_norm": 1.0571353522252012, + "learning_rate": 5.697899012667147e-06, + "loss": 0.4498, + "step": 37751 + }, + { + "epoch": 0.6523361902128836, + "grad_norm": 0.6017009160401805, + "learning_rate": 5.697393807658076e-06, + "loss": 0.7974, + "step": 37752 + }, + { + "epoch": 0.6523534697262925, + "grad_norm": 0.7781279397660611, + "learning_rate": 5.696888616125095e-06, + "loss": 0.3984, + "step": 37753 + }, + { + "epoch": 0.6523707492397014, + "grad_norm": 1.1596060379456308, + "learning_rate": 5.696383438069786e-06, + "loss": 0.3268, + "step": 37754 + }, + { + "epoch": 0.6523880287531103, + "grad_norm": 0.9286099711722178, + "learning_rate": 5.695878273493733e-06, + "loss": 0.3325, + "step": 37755 + }, + { + "epoch": 0.6524053082665192, + "grad_norm": 1.0781778304877696, + "learning_rate": 5.695373122398516e-06, + "loss": 0.5105, + "step": 37756 + }, + { + "epoch": 0.6524225877799281, + "grad_norm": 1.084511813349107, + "learning_rate": 5.694867984785719e-06, + "loss": 0.3556, + "step": 37757 + }, + { + "epoch": 0.652439867293337, + "grad_norm": 0.7839885278312795, + "learning_rate": 5.694362860656919e-06, + "loss": 0.2256, + "step": 37758 + }, + { + "epoch": 0.6524571468067459, + "grad_norm": 0.8656357380295469, + "learning_rate": 5.6938577500137025e-06, + "loss": 0.3721, + "step": 37759 + }, + { + "epoch": 0.6524744263201548, + "grad_norm": 1.0190036500357105, + "learning_rate": 5.693352652857653e-06, + "loss": 0.5326, + "step": 37760 + }, + { + "epoch": 0.6524917058335638, + "grad_norm": 1.6749638031121763, + "learning_rate": 5.692847569190349e-06, + "loss": 0.2849, + "step": 37761 + }, + { + "epoch": 0.6525089853469727, + "grad_norm": 0.9321775153897032, + "learning_rate": 5.692342499013376e-06, + "loss": 0.3901, + "step": 37762 + }, + { + "epoch": 0.6525262648603816, + "grad_norm": 1.6191583018011, + "learning_rate": 5.6918374423283095e-06, + "loss": 0.349, + "step": 37763 + }, + { + "epoch": 0.6525435443737905, + "grad_norm": 1.7759769759297506, + "learning_rate": 5.691332399136736e-06, + "loss": 0.3117, + "step": 37764 + }, + { + "epoch": 0.6525608238871994, + "grad_norm": 1.0308957765187683, + "learning_rate": 5.690827369440237e-06, + "loss": 0.6452, + "step": 37765 + }, + { + "epoch": 0.6525781034006083, + "grad_norm": 0.987399421456841, + "learning_rate": 5.690322353240399e-06, + "loss": 0.2481, + "step": 37766 + }, + { + "epoch": 0.6525953829140171, + "grad_norm": 0.6587211957375129, + "learning_rate": 5.689817350538793e-06, + "loss": 0.5981, + "step": 37767 + }, + { + "epoch": 0.652612662427426, + "grad_norm": 1.13446838335072, + "learning_rate": 5.689312361337012e-06, + "loss": 0.2865, + "step": 37768 + }, + { + "epoch": 0.6526299419408349, + "grad_norm": 2.10595249518076, + "learning_rate": 5.688807385636628e-06, + "loss": 0.353, + "step": 37769 + }, + { + "epoch": 0.6526472214542438, + "grad_norm": 1.3328242752094444, + "learning_rate": 5.688302423439227e-06, + "loss": 0.2901, + "step": 37770 + }, + { + "epoch": 0.6526645009676527, + "grad_norm": 0.8128150503922311, + "learning_rate": 5.6877974747463935e-06, + "loss": 0.2309, + "step": 37771 + }, + { + "epoch": 0.6526817804810616, + "grad_norm": 1.4921681635573822, + "learning_rate": 5.687292539559703e-06, + "loss": 0.3486, + "step": 37772 + }, + { + "epoch": 0.6526990599944705, + "grad_norm": 1.2277289509097054, + "learning_rate": 5.686787617880745e-06, + "loss": 0.5232, + "step": 37773 + }, + { + "epoch": 0.6527163395078794, + "grad_norm": 1.2209251273719475, + "learning_rate": 5.686282709711089e-06, + "loss": 0.3267, + "step": 37774 + }, + { + "epoch": 0.6527336190212883, + "grad_norm": 1.229240265501932, + "learning_rate": 5.685777815052329e-06, + "loss": 0.4419, + "step": 37775 + }, + { + "epoch": 0.6527508985346973, + "grad_norm": 1.036663257269933, + "learning_rate": 5.685272933906034e-06, + "loss": 0.4176, + "step": 37776 + }, + { + "epoch": 0.6527681780481062, + "grad_norm": 1.0283800191827384, + "learning_rate": 5.684768066273798e-06, + "loss": 0.304, + "step": 37777 + }, + { + "epoch": 0.6527854575615151, + "grad_norm": 1.433612346447915, + "learning_rate": 5.684263212157194e-06, + "loss": 0.421, + "step": 37778 + }, + { + "epoch": 0.652802737074924, + "grad_norm": 0.8483674267492859, + "learning_rate": 5.68375837155781e-06, + "loss": 0.556, + "step": 37779 + }, + { + "epoch": 0.6528200165883329, + "grad_norm": 1.4218461247456584, + "learning_rate": 5.683253544477216e-06, + "loss": 0.3415, + "step": 37780 + }, + { + "epoch": 0.6528372961017418, + "grad_norm": 1.2686482428845534, + "learning_rate": 5.682748730917007e-06, + "loss": 0.43, + "step": 37781 + }, + { + "epoch": 0.6528545756151507, + "grad_norm": 0.7802277286108642, + "learning_rate": 5.682243930878751e-06, + "loss": 0.2542, + "step": 37782 + }, + { + "epoch": 0.6528718551285596, + "grad_norm": 1.2075771137145435, + "learning_rate": 5.681739144364036e-06, + "loss": 0.3343, + "step": 37783 + }, + { + "epoch": 0.6528891346419685, + "grad_norm": 1.2418942292452553, + "learning_rate": 5.681234371374446e-06, + "loss": 0.3229, + "step": 37784 + }, + { + "epoch": 0.6529064141553774, + "grad_norm": 0.7523032478795608, + "learning_rate": 5.680729611911555e-06, + "loss": 0.3835, + "step": 37785 + }, + { + "epoch": 0.6529236936687863, + "grad_norm": 1.0148594693671804, + "learning_rate": 5.68022486597695e-06, + "loss": 0.2257, + "step": 37786 + }, + { + "epoch": 0.6529409731821952, + "grad_norm": 0.8690606974717081, + "learning_rate": 5.6797201335722064e-06, + "loss": 0.2803, + "step": 37787 + }, + { + "epoch": 0.6529582526956041, + "grad_norm": 0.9159929093591561, + "learning_rate": 5.679215414698906e-06, + "loss": 0.4953, + "step": 37788 + }, + { + "epoch": 0.6529755322090129, + "grad_norm": 1.2954063905864743, + "learning_rate": 5.678710709358632e-06, + "loss": 0.4709, + "step": 37789 + }, + { + "epoch": 0.6529928117224219, + "grad_norm": 1.4415364310378547, + "learning_rate": 5.678206017552967e-06, + "loss": 0.4081, + "step": 37790 + }, + { + "epoch": 0.6530100912358308, + "grad_norm": 1.2174966818308257, + "learning_rate": 5.677701339283492e-06, + "loss": 0.3822, + "step": 37791 + }, + { + "epoch": 0.6530273707492397, + "grad_norm": 0.9490690024526419, + "learning_rate": 5.677196674551779e-06, + "loss": 0.4518, + "step": 37792 + }, + { + "epoch": 0.6530446502626486, + "grad_norm": 0.8949165789061257, + "learning_rate": 5.676692023359416e-06, + "loss": 0.1846, + "step": 37793 + }, + { + "epoch": 0.6530619297760575, + "grad_norm": 0.7222945844380321, + "learning_rate": 5.676187385707981e-06, + "loss": 0.6282, + "step": 37794 + }, + { + "epoch": 0.6530792092894664, + "grad_norm": 1.6273387287810712, + "learning_rate": 5.675682761599059e-06, + "loss": 0.3245, + "step": 37795 + }, + { + "epoch": 0.6530964888028753, + "grad_norm": 1.3558056521622155, + "learning_rate": 5.675178151034224e-06, + "loss": 0.2474, + "step": 37796 + }, + { + "epoch": 0.6531137683162842, + "grad_norm": 1.2806742092356942, + "learning_rate": 5.674673554015064e-06, + "loss": 0.4018, + "step": 37797 + }, + { + "epoch": 0.6531310478296931, + "grad_norm": 1.0287005783850915, + "learning_rate": 5.674168970543151e-06, + "loss": 0.3373, + "step": 37798 + }, + { + "epoch": 0.653148327343102, + "grad_norm": 1.6855978681545194, + "learning_rate": 5.673664400620069e-06, + "loss": 0.426, + "step": 37799 + }, + { + "epoch": 0.6531656068565109, + "grad_norm": 1.444958258855151, + "learning_rate": 5.6731598442474e-06, + "loss": 0.4403, + "step": 37800 + }, + { + "epoch": 0.6531828863699198, + "grad_norm": 1.0609133233375698, + "learning_rate": 5.672655301426727e-06, + "loss": 0.2941, + "step": 37801 + }, + { + "epoch": 0.6532001658833287, + "grad_norm": 1.3096435872989842, + "learning_rate": 5.672150772159626e-06, + "loss": 0.5393, + "step": 37802 + }, + { + "epoch": 0.6532174453967377, + "grad_norm": 0.7709149405334653, + "learning_rate": 5.671646256447674e-06, + "loss": 0.2489, + "step": 37803 + }, + { + "epoch": 0.6532347249101466, + "grad_norm": 1.1358449794976695, + "learning_rate": 5.671141754292454e-06, + "loss": 0.4061, + "step": 37804 + }, + { + "epoch": 0.6532520044235555, + "grad_norm": 1.178731770280504, + "learning_rate": 5.670637265695547e-06, + "loss": 0.4937, + "step": 37805 + }, + { + "epoch": 0.6532692839369644, + "grad_norm": 1.0596885516258803, + "learning_rate": 5.670132790658538e-06, + "loss": 0.3123, + "step": 37806 + }, + { + "epoch": 0.6532865634503733, + "grad_norm": 1.3648421112149876, + "learning_rate": 5.669628329182999e-06, + "loss": 0.3315, + "step": 37807 + }, + { + "epoch": 0.6533038429637822, + "grad_norm": 1.3563438529133922, + "learning_rate": 5.669123881270515e-06, + "loss": 0.3361, + "step": 37808 + }, + { + "epoch": 0.6533211224771911, + "grad_norm": 1.0239656393392527, + "learning_rate": 5.668619446922662e-06, + "loss": 0.403, + "step": 37809 + }, + { + "epoch": 0.6533384019905999, + "grad_norm": 0.7848044525231073, + "learning_rate": 5.668115026141021e-06, + "loss": 0.3875, + "step": 37810 + }, + { + "epoch": 0.6533556815040088, + "grad_norm": 0.870918350119725, + "learning_rate": 5.667610618927173e-06, + "loss": 0.438, + "step": 37811 + }, + { + "epoch": 0.6533729610174177, + "grad_norm": 0.856842310853443, + "learning_rate": 5.667106225282701e-06, + "loss": 0.3929, + "step": 37812 + }, + { + "epoch": 0.6533902405308266, + "grad_norm": 1.7557345223898826, + "learning_rate": 5.666601845209183e-06, + "loss": 0.2689, + "step": 37813 + }, + { + "epoch": 0.6534075200442355, + "grad_norm": 1.156158273857195, + "learning_rate": 5.6660974787081935e-06, + "loss": 0.4983, + "step": 37814 + }, + { + "epoch": 0.6534247995576444, + "grad_norm": 1.0894558926532971, + "learning_rate": 5.665593125781319e-06, + "loss": 0.339, + "step": 37815 + }, + { + "epoch": 0.6534420790710533, + "grad_norm": 1.3019459342107171, + "learning_rate": 5.665088786430129e-06, + "loss": 0.486, + "step": 37816 + }, + { + "epoch": 0.6534593585844622, + "grad_norm": 1.1365398552030566, + "learning_rate": 5.664584460656218e-06, + "loss": 0.4056, + "step": 37817 + }, + { + "epoch": 0.6534766380978712, + "grad_norm": 0.983348347499576, + "learning_rate": 5.664080148461155e-06, + "loss": 0.2988, + "step": 37818 + }, + { + "epoch": 0.6534939176112801, + "grad_norm": 1.2022067314153502, + "learning_rate": 5.663575849846526e-06, + "loss": 0.3379, + "step": 37819 + }, + { + "epoch": 0.653511197124689, + "grad_norm": 1.020007557640832, + "learning_rate": 5.663071564813902e-06, + "loss": 0.4678, + "step": 37820 + }, + { + "epoch": 0.6535284766380979, + "grad_norm": 1.215165588511553, + "learning_rate": 5.662567293364871e-06, + "loss": 0.2704, + "step": 37821 + }, + { + "epoch": 0.6535457561515068, + "grad_norm": 2.6104327413750665, + "learning_rate": 5.662063035501007e-06, + "loss": 0.535, + "step": 37822 + }, + { + "epoch": 0.6535630356649157, + "grad_norm": 1.390397849542873, + "learning_rate": 5.661558791223889e-06, + "loss": 0.3666, + "step": 37823 + }, + { + "epoch": 0.6535803151783246, + "grad_norm": 1.182289676192479, + "learning_rate": 5.661054560535104e-06, + "loss": 0.517, + "step": 37824 + }, + { + "epoch": 0.6535975946917335, + "grad_norm": 1.9948130750738775, + "learning_rate": 5.660550343436222e-06, + "loss": 0.4101, + "step": 37825 + }, + { + "epoch": 0.6536148742051424, + "grad_norm": 1.3355486119647002, + "learning_rate": 5.66004613992883e-06, + "loss": 0.4744, + "step": 37826 + }, + { + "epoch": 0.6536321537185513, + "grad_norm": 1.5907667250483388, + "learning_rate": 5.6595419500144975e-06, + "loss": 0.3298, + "step": 37827 + }, + { + "epoch": 0.6536494332319602, + "grad_norm": 1.4627846485307194, + "learning_rate": 5.659037773694811e-06, + "loss": 0.3897, + "step": 37828 + }, + { + "epoch": 0.6536667127453691, + "grad_norm": 1.233524024010697, + "learning_rate": 5.6585336109713484e-06, + "loss": 0.3832, + "step": 37829 + }, + { + "epoch": 0.653683992258778, + "grad_norm": 1.1878534201628548, + "learning_rate": 5.658029461845692e-06, + "loss": 0.3881, + "step": 37830 + }, + { + "epoch": 0.6537012717721868, + "grad_norm": 0.5647217964686779, + "learning_rate": 5.657525326319413e-06, + "loss": 0.6883, + "step": 37831 + }, + { + "epoch": 0.6537185512855958, + "grad_norm": 0.9690670162890545, + "learning_rate": 5.6570212043940995e-06, + "loss": 0.2061, + "step": 37832 + }, + { + "epoch": 0.6537358307990047, + "grad_norm": 1.3142812517976006, + "learning_rate": 5.656517096071321e-06, + "loss": 0.3375, + "step": 37833 + }, + { + "epoch": 0.6537531103124136, + "grad_norm": 1.047869609648755, + "learning_rate": 5.656013001352663e-06, + "loss": 0.2341, + "step": 37834 + }, + { + "epoch": 0.6537703898258225, + "grad_norm": 0.8931397842265385, + "learning_rate": 5.655508920239703e-06, + "loss": 0.3246, + "step": 37835 + }, + { + "epoch": 0.6537876693392314, + "grad_norm": 1.3064464313640693, + "learning_rate": 5.655004852734017e-06, + "loss": 0.4661, + "step": 37836 + }, + { + "epoch": 0.6538049488526403, + "grad_norm": 1.2950499190753313, + "learning_rate": 5.65450079883719e-06, + "loss": 0.5798, + "step": 37837 + }, + { + "epoch": 0.6538222283660492, + "grad_norm": 1.2565275399981084, + "learning_rate": 5.653996758550792e-06, + "loss": 0.5583, + "step": 37838 + }, + { + "epoch": 0.6538395078794581, + "grad_norm": 0.7754873805300188, + "learning_rate": 5.653492731876409e-06, + "loss": 0.3747, + "step": 37839 + }, + { + "epoch": 0.653856787392867, + "grad_norm": 0.4070958013140449, + "learning_rate": 5.6529887188156155e-06, + "loss": 0.5449, + "step": 37840 + }, + { + "epoch": 0.6538740669062759, + "grad_norm": 1.7997071319500801, + "learning_rate": 5.652484719369995e-06, + "loss": 0.3071, + "step": 37841 + }, + { + "epoch": 0.6538913464196848, + "grad_norm": 1.0352008103441708, + "learning_rate": 5.651980733541118e-06, + "loss": 0.1793, + "step": 37842 + }, + { + "epoch": 0.6539086259330937, + "grad_norm": 1.0513855009253135, + "learning_rate": 5.651476761330573e-06, + "loss": 0.3931, + "step": 37843 + }, + { + "epoch": 0.6539259054465026, + "grad_norm": 1.1297134183577855, + "learning_rate": 5.650972802739929e-06, + "loss": 0.2821, + "step": 37844 + }, + { + "epoch": 0.6539431849599115, + "grad_norm": 0.738747901876347, + "learning_rate": 5.65046885777077e-06, + "loss": 0.1817, + "step": 37845 + }, + { + "epoch": 0.6539604644733205, + "grad_norm": 1.556937231111968, + "learning_rate": 5.649964926424676e-06, + "loss": 0.5476, + "step": 37846 + }, + { + "epoch": 0.6539777439867294, + "grad_norm": 1.2864468360474774, + "learning_rate": 5.6494610087032185e-06, + "loss": 0.5382, + "step": 37847 + }, + { + "epoch": 0.6539950235001383, + "grad_norm": 0.7801096939910057, + "learning_rate": 5.648957104607982e-06, + "loss": 0.2841, + "step": 37848 + }, + { + "epoch": 0.6540123030135472, + "grad_norm": 0.9296753011500246, + "learning_rate": 5.6484532141405415e-06, + "loss": 0.2798, + "step": 37849 + }, + { + "epoch": 0.6540295825269561, + "grad_norm": 1.024063253038572, + "learning_rate": 5.647949337302474e-06, + "loss": 0.4272, + "step": 37850 + }, + { + "epoch": 0.654046862040365, + "grad_norm": 1.1935972454372754, + "learning_rate": 5.6474454740953614e-06, + "loss": 0.3999, + "step": 37851 + }, + { + "epoch": 0.6540641415537738, + "grad_norm": 2.173175872559476, + "learning_rate": 5.646941624520783e-06, + "loss": 0.2724, + "step": 37852 + }, + { + "epoch": 0.6540814210671827, + "grad_norm": 1.4380031078993585, + "learning_rate": 5.646437788580316e-06, + "loss": 0.3543, + "step": 37853 + }, + { + "epoch": 0.6540987005805916, + "grad_norm": 1.3646162583801855, + "learning_rate": 5.6459339662755315e-06, + "loss": 0.4345, + "step": 37854 + }, + { + "epoch": 0.6541159800940005, + "grad_norm": 1.1180358971074065, + "learning_rate": 5.645430157608017e-06, + "loss": 0.374, + "step": 37855 + }, + { + "epoch": 0.6541332596074094, + "grad_norm": 1.3273061593983524, + "learning_rate": 5.644926362579338e-06, + "loss": 0.2068, + "step": 37856 + }, + { + "epoch": 0.6541505391208183, + "grad_norm": 1.2461625851172164, + "learning_rate": 5.644422581191089e-06, + "loss": 0.2666, + "step": 37857 + }, + { + "epoch": 0.6541678186342272, + "grad_norm": 1.299254704008343, + "learning_rate": 5.643918813444835e-06, + "loss": 0.3777, + "step": 37858 + }, + { + "epoch": 0.6541850981476361, + "grad_norm": 0.6783977381834706, + "learning_rate": 5.643415059342163e-06, + "loss": 0.7234, + "step": 37859 + }, + { + "epoch": 0.654202377661045, + "grad_norm": 1.1063365840468673, + "learning_rate": 5.642911318884641e-06, + "loss": 0.3106, + "step": 37860 + }, + { + "epoch": 0.654219657174454, + "grad_norm": 0.7421138942304273, + "learning_rate": 5.642407592073858e-06, + "loss": 0.331, + "step": 37861 + }, + { + "epoch": 0.6542369366878629, + "grad_norm": 1.2554028141386522, + "learning_rate": 5.6419038789113744e-06, + "loss": 0.3627, + "step": 37862 + }, + { + "epoch": 0.6542542162012718, + "grad_norm": 1.7797732756572304, + "learning_rate": 5.6414001793987905e-06, + "loss": 0.3281, + "step": 37863 + }, + { + "epoch": 0.6542714957146807, + "grad_norm": 1.353651181400298, + "learning_rate": 5.64089649353767e-06, + "loss": 0.3016, + "step": 37864 + }, + { + "epoch": 0.6542887752280896, + "grad_norm": 1.1075203175382933, + "learning_rate": 5.64039282132959e-06, + "loss": 0.3733, + "step": 37865 + }, + { + "epoch": 0.6543060547414985, + "grad_norm": 0.8962926176179387, + "learning_rate": 5.639889162776134e-06, + "loss": 0.2862, + "step": 37866 + }, + { + "epoch": 0.6543233342549074, + "grad_norm": 1.4905288474458271, + "learning_rate": 5.639385517878873e-06, + "loss": 0.4187, + "step": 37867 + }, + { + "epoch": 0.6543406137683163, + "grad_norm": 0.8431683084325587, + "learning_rate": 5.638881886639389e-06, + "loss": 0.3032, + "step": 37868 + }, + { + "epoch": 0.6543578932817252, + "grad_norm": 1.1452109549842817, + "learning_rate": 5.638378269059257e-06, + "loss": 0.4689, + "step": 37869 + }, + { + "epoch": 0.6543751727951341, + "grad_norm": 1.2141879145914243, + "learning_rate": 5.637874665140061e-06, + "loss": 0.3374, + "step": 37870 + }, + { + "epoch": 0.654392452308543, + "grad_norm": 0.7636751354615288, + "learning_rate": 5.6373710748833684e-06, + "loss": 0.7754, + "step": 37871 + }, + { + "epoch": 0.654409731821952, + "grad_norm": 1.0679609946571926, + "learning_rate": 5.636867498290766e-06, + "loss": 0.4229, + "step": 37872 + }, + { + "epoch": 0.6544270113353607, + "grad_norm": 0.9873373676338686, + "learning_rate": 5.63636393536382e-06, + "loss": 0.3985, + "step": 37873 + }, + { + "epoch": 0.6544442908487696, + "grad_norm": 0.9136570530066732, + "learning_rate": 5.635860386104115e-06, + "loss": 0.2601, + "step": 37874 + }, + { + "epoch": 0.6544615703621786, + "grad_norm": 1.2596542007635279, + "learning_rate": 5.635356850513232e-06, + "loss": 0.4204, + "step": 37875 + }, + { + "epoch": 0.6544788498755875, + "grad_norm": 0.8852952544510233, + "learning_rate": 5.634853328592738e-06, + "loss": 0.5675, + "step": 37876 + }, + { + "epoch": 0.6544961293889964, + "grad_norm": 1.5784654938058582, + "learning_rate": 5.634349820344218e-06, + "loss": 0.4866, + "step": 37877 + }, + { + "epoch": 0.6545134089024053, + "grad_norm": 1.1084865444703649, + "learning_rate": 5.6338463257692435e-06, + "loss": 0.4733, + "step": 37878 + }, + { + "epoch": 0.6545306884158142, + "grad_norm": 1.6035107991836897, + "learning_rate": 5.633342844869394e-06, + "loss": 0.6518, + "step": 37879 + }, + { + "epoch": 0.6545479679292231, + "grad_norm": 0.7327878279907443, + "learning_rate": 5.632839377646247e-06, + "loss": 0.3345, + "step": 37880 + }, + { + "epoch": 0.654565247442632, + "grad_norm": 0.42132476623655546, + "learning_rate": 5.632335924101382e-06, + "loss": 0.501, + "step": 37881 + }, + { + "epoch": 0.6545825269560409, + "grad_norm": 1.8927504304579064, + "learning_rate": 5.631832484236369e-06, + "loss": 0.4182, + "step": 37882 + }, + { + "epoch": 0.6545998064694498, + "grad_norm": 1.736260120924134, + "learning_rate": 5.631329058052792e-06, + "loss": 0.4647, + "step": 37883 + }, + { + "epoch": 0.6546170859828587, + "grad_norm": 1.6507783772170532, + "learning_rate": 5.630825645552222e-06, + "loss": 0.6321, + "step": 37884 + }, + { + "epoch": 0.6546343654962676, + "grad_norm": 0.8994244267790708, + "learning_rate": 5.630322246736237e-06, + "loss": 0.3953, + "step": 37885 + }, + { + "epoch": 0.6546516450096765, + "grad_norm": 1.487630496241397, + "learning_rate": 5.62981886160642e-06, + "loss": 0.3443, + "step": 37886 + }, + { + "epoch": 0.6546689245230854, + "grad_norm": 1.2151926660915449, + "learning_rate": 5.629315490164338e-06, + "loss": 0.2673, + "step": 37887 + }, + { + "epoch": 0.6546862040364944, + "grad_norm": 0.9706749859023208, + "learning_rate": 5.628812132411576e-06, + "loss": 0.386, + "step": 37888 + }, + { + "epoch": 0.6547034835499033, + "grad_norm": 1.1888810756741683, + "learning_rate": 5.628308788349701e-06, + "loss": 0.2199, + "step": 37889 + }, + { + "epoch": 0.6547207630633122, + "grad_norm": 1.279727331326816, + "learning_rate": 5.627805457980298e-06, + "loss": 0.3017, + "step": 37890 + }, + { + "epoch": 0.6547380425767211, + "grad_norm": 0.9166706109230803, + "learning_rate": 5.627302141304938e-06, + "loss": 0.3733, + "step": 37891 + }, + { + "epoch": 0.65475532209013, + "grad_norm": 1.7171654638328047, + "learning_rate": 5.626798838325205e-06, + "loss": 0.4646, + "step": 37892 + }, + { + "epoch": 0.6547726016035389, + "grad_norm": 1.3102343681859507, + "learning_rate": 5.626295549042666e-06, + "loss": 0.418, + "step": 37893 + }, + { + "epoch": 0.6547898811169477, + "grad_norm": 1.0124895004912522, + "learning_rate": 5.625792273458906e-06, + "loss": 0.3395, + "step": 37894 + }, + { + "epoch": 0.6548071606303566, + "grad_norm": 1.5850312636066122, + "learning_rate": 5.625289011575497e-06, + "loss": 0.604, + "step": 37895 + }, + { + "epoch": 0.6548244401437655, + "grad_norm": 1.0339725340001613, + "learning_rate": 5.6247857633940084e-06, + "loss": 0.364, + "step": 37896 + }, + { + "epoch": 0.6548417196571744, + "grad_norm": 1.3621033333414168, + "learning_rate": 5.6242825289160295e-06, + "loss": 0.2514, + "step": 37897 + }, + { + "epoch": 0.6548589991705833, + "grad_norm": 0.788448800962421, + "learning_rate": 5.623779308143127e-06, + "loss": 0.4396, + "step": 37898 + }, + { + "epoch": 0.6548762786839922, + "grad_norm": 1.5459936730020005, + "learning_rate": 5.623276101076885e-06, + "loss": 0.24, + "step": 37899 + }, + { + "epoch": 0.6548935581974011, + "grad_norm": 1.1519163000584294, + "learning_rate": 5.62277290771887e-06, + "loss": 0.408, + "step": 37900 + }, + { + "epoch": 0.65491083771081, + "grad_norm": 0.690732310292432, + "learning_rate": 5.622269728070667e-06, + "loss": 0.5357, + "step": 37901 + }, + { + "epoch": 0.654928117224219, + "grad_norm": 1.3347406516315679, + "learning_rate": 5.621766562133841e-06, + "loss": 0.4563, + "step": 37902 + }, + { + "epoch": 0.6549453967376279, + "grad_norm": 1.7825920555540713, + "learning_rate": 5.6212634099099815e-06, + "loss": 0.4818, + "step": 37903 + }, + { + "epoch": 0.6549626762510368, + "grad_norm": 1.598793652651131, + "learning_rate": 5.6207602714006585e-06, + "loss": 0.2922, + "step": 37904 + }, + { + "epoch": 0.6549799557644457, + "grad_norm": 0.9389581097353217, + "learning_rate": 5.620257146607442e-06, + "loss": 0.3923, + "step": 37905 + }, + { + "epoch": 0.6549972352778546, + "grad_norm": 0.997388738436679, + "learning_rate": 5.619754035531918e-06, + "loss": 0.3543, + "step": 37906 + }, + { + "epoch": 0.6550145147912635, + "grad_norm": 1.451561085728237, + "learning_rate": 5.619250938175653e-06, + "loss": 0.264, + "step": 37907 + }, + { + "epoch": 0.6550317943046724, + "grad_norm": 1.2357468868308954, + "learning_rate": 5.618747854540228e-06, + "loss": 0.732, + "step": 37908 + }, + { + "epoch": 0.6550490738180813, + "grad_norm": 1.2302132100307992, + "learning_rate": 5.618244784627216e-06, + "loss": 0.4512, + "step": 37909 + }, + { + "epoch": 0.6550663533314902, + "grad_norm": 0.9775479175654491, + "learning_rate": 5.6177417284382e-06, + "loss": 0.2442, + "step": 37910 + }, + { + "epoch": 0.6550836328448991, + "grad_norm": 1.3279656687798442, + "learning_rate": 5.617238685974744e-06, + "loss": 0.3705, + "step": 37911 + }, + { + "epoch": 0.655100912358308, + "grad_norm": 1.8308046281315362, + "learning_rate": 5.616735657238435e-06, + "loss": 0.3614, + "step": 37912 + }, + { + "epoch": 0.6551181918717169, + "grad_norm": 1.1562099019745886, + "learning_rate": 5.616232642230839e-06, + "loss": 0.3722, + "step": 37913 + }, + { + "epoch": 0.6551354713851258, + "grad_norm": 1.0521073475606755, + "learning_rate": 5.615729640953535e-06, + "loss": 0.2519, + "step": 37914 + }, + { + "epoch": 0.6551527508985348, + "grad_norm": 0.9231675588386339, + "learning_rate": 5.615226653408102e-06, + "loss": 0.3245, + "step": 37915 + }, + { + "epoch": 0.6551700304119435, + "grad_norm": 0.8200477864577829, + "learning_rate": 5.61472367959611e-06, + "loss": 0.3088, + "step": 37916 + }, + { + "epoch": 0.6551873099253525, + "grad_norm": 1.4029648809214623, + "learning_rate": 5.614220719519139e-06, + "loss": 0.499, + "step": 37917 + }, + { + "epoch": 0.6552045894387614, + "grad_norm": 1.230017480441527, + "learning_rate": 5.61371777317876e-06, + "loss": 0.3715, + "step": 37918 + }, + { + "epoch": 0.6552218689521703, + "grad_norm": 0.8822706515326274, + "learning_rate": 5.613214840576549e-06, + "loss": 0.3538, + "step": 37919 + }, + { + "epoch": 0.6552391484655792, + "grad_norm": 0.7959517466550461, + "learning_rate": 5.612711921714083e-06, + "loss": 0.3879, + "step": 37920 + }, + { + "epoch": 0.6552564279789881, + "grad_norm": 0.8831206353729564, + "learning_rate": 5.61220901659294e-06, + "loss": 0.3901, + "step": 37921 + }, + { + "epoch": 0.655273707492397, + "grad_norm": 1.1189005889688566, + "learning_rate": 5.611706125214689e-06, + "loss": 0.2401, + "step": 37922 + }, + { + "epoch": 0.6552909870058059, + "grad_norm": 1.0019343812750992, + "learning_rate": 5.611203247580912e-06, + "loss": 0.4343, + "step": 37923 + }, + { + "epoch": 0.6553082665192148, + "grad_norm": 0.9581279533400836, + "learning_rate": 5.610700383693175e-06, + "loss": 0.2718, + "step": 37924 + }, + { + "epoch": 0.6553255460326237, + "grad_norm": 1.0348371889807604, + "learning_rate": 5.610197533553058e-06, + "loss": 0.4458, + "step": 37925 + }, + { + "epoch": 0.6553428255460326, + "grad_norm": 2.080672180267964, + "learning_rate": 5.60969469716214e-06, + "loss": 0.3695, + "step": 37926 + }, + { + "epoch": 0.6553601050594415, + "grad_norm": 0.9211202630822563, + "learning_rate": 5.609191874521989e-06, + "loss": 0.3751, + "step": 37927 + }, + { + "epoch": 0.6553773845728504, + "grad_norm": 0.6036610306374139, + "learning_rate": 5.608689065634185e-06, + "loss": 0.8289, + "step": 37928 + }, + { + "epoch": 0.6553946640862593, + "grad_norm": 1.207792259909448, + "learning_rate": 5.608186270500298e-06, + "loss": 0.4424, + "step": 37929 + }, + { + "epoch": 0.6554119435996683, + "grad_norm": 1.5375024262501151, + "learning_rate": 5.607683489121904e-06, + "loss": 0.3682, + "step": 37930 + }, + { + "epoch": 0.6554292231130772, + "grad_norm": 1.4293466016441114, + "learning_rate": 5.6071807215005805e-06, + "loss": 0.427, + "step": 37931 + }, + { + "epoch": 0.6554465026264861, + "grad_norm": 1.3254879499310515, + "learning_rate": 5.606677967637903e-06, + "loss": 0.4616, + "step": 37932 + }, + { + "epoch": 0.655463782139895, + "grad_norm": 0.8763288658342478, + "learning_rate": 5.606175227535442e-06, + "loss": 0.2255, + "step": 37933 + }, + { + "epoch": 0.6554810616533039, + "grad_norm": 0.7209358401448053, + "learning_rate": 5.6056725011947766e-06, + "loss": 0.2364, + "step": 37934 + }, + { + "epoch": 0.6554983411667128, + "grad_norm": 1.8928482166801446, + "learning_rate": 5.60516978861748e-06, + "loss": 0.4668, + "step": 37935 + }, + { + "epoch": 0.6555156206801217, + "grad_norm": 1.2510063235247475, + "learning_rate": 5.604667089805116e-06, + "loss": 0.3084, + "step": 37936 + }, + { + "epoch": 0.6555329001935305, + "grad_norm": 0.5836441338934928, + "learning_rate": 5.604164404759278e-06, + "loss": 0.7049, + "step": 37937 + }, + { + "epoch": 0.6555501797069394, + "grad_norm": 1.153765932099177, + "learning_rate": 5.603661733481527e-06, + "loss": 0.4898, + "step": 37938 + }, + { + "epoch": 0.6555674592203483, + "grad_norm": 1.3247060115236455, + "learning_rate": 5.603159075973446e-06, + "loss": 0.4623, + "step": 37939 + }, + { + "epoch": 0.6555847387337572, + "grad_norm": 1.0000534597136463, + "learning_rate": 5.602656432236601e-06, + "loss": 0.3948, + "step": 37940 + }, + { + "epoch": 0.6556020182471661, + "grad_norm": 1.4530926688286363, + "learning_rate": 5.602153802272574e-06, + "loss": 0.3535, + "step": 37941 + }, + { + "epoch": 0.655619297760575, + "grad_norm": 1.1657890037864078, + "learning_rate": 5.60165118608293e-06, + "loss": 0.5274, + "step": 37942 + }, + { + "epoch": 0.655636577273984, + "grad_norm": 1.0095930132988087, + "learning_rate": 5.60114858366925e-06, + "loss": 0.4436, + "step": 37943 + }, + { + "epoch": 0.6556538567873929, + "grad_norm": 1.192196462222498, + "learning_rate": 5.6006459950331074e-06, + "loss": 0.3814, + "step": 37944 + }, + { + "epoch": 0.6556711363008018, + "grad_norm": 1.0493132641202176, + "learning_rate": 5.60014342017608e-06, + "loss": 0.2732, + "step": 37945 + }, + { + "epoch": 0.6556884158142107, + "grad_norm": 1.0816691536142016, + "learning_rate": 5.599640859099736e-06, + "loss": 0.4469, + "step": 37946 + }, + { + "epoch": 0.6557056953276196, + "grad_norm": 0.8006195686957162, + "learning_rate": 5.5991383118056495e-06, + "loss": 0.2295, + "step": 37947 + }, + { + "epoch": 0.6557229748410285, + "grad_norm": 1.2365583842732082, + "learning_rate": 5.598635778295395e-06, + "loss": 0.4605, + "step": 37948 + }, + { + "epoch": 0.6557402543544374, + "grad_norm": 1.2761649558991799, + "learning_rate": 5.598133258570549e-06, + "loss": 0.2996, + "step": 37949 + }, + { + "epoch": 0.6557575338678463, + "grad_norm": 0.936937453784083, + "learning_rate": 5.5976307526326855e-06, + "loss": 0.4552, + "step": 37950 + }, + { + "epoch": 0.6557748133812552, + "grad_norm": 1.1232265339174847, + "learning_rate": 5.597128260483375e-06, + "loss": 0.3975, + "step": 37951 + }, + { + "epoch": 0.6557920928946641, + "grad_norm": 1.3650404225118073, + "learning_rate": 5.596625782124197e-06, + "loss": 0.3203, + "step": 37952 + }, + { + "epoch": 0.655809372408073, + "grad_norm": 1.375174719603419, + "learning_rate": 5.596123317556718e-06, + "loss": 0.3468, + "step": 37953 + }, + { + "epoch": 0.6558266519214819, + "grad_norm": 1.0129764868172195, + "learning_rate": 5.595620866782515e-06, + "loss": 0.4165, + "step": 37954 + }, + { + "epoch": 0.6558439314348908, + "grad_norm": 1.2808449467113343, + "learning_rate": 5.595118429803162e-06, + "loss": 0.5019, + "step": 37955 + }, + { + "epoch": 0.6558612109482997, + "grad_norm": 1.3521940293959407, + "learning_rate": 5.594616006620237e-06, + "loss": 0.3541, + "step": 37956 + }, + { + "epoch": 0.6558784904617087, + "grad_norm": 1.3364577067296346, + "learning_rate": 5.59411359723531e-06, + "loss": 0.4506, + "step": 37957 + }, + { + "epoch": 0.6558957699751174, + "grad_norm": 1.2925517546487053, + "learning_rate": 5.59361120164995e-06, + "loss": 0.4046, + "step": 37958 + }, + { + "epoch": 0.6559130494885264, + "grad_norm": 1.0192536017976168, + "learning_rate": 5.5931088198657335e-06, + "loss": 0.5514, + "step": 37959 + }, + { + "epoch": 0.6559303290019353, + "grad_norm": 0.831294329562049, + "learning_rate": 5.5926064518842374e-06, + "loss": 0.3782, + "step": 37960 + }, + { + "epoch": 0.6559476085153442, + "grad_norm": 1.5319898774660259, + "learning_rate": 5.592104097707034e-06, + "loss": 0.2785, + "step": 37961 + }, + { + "epoch": 0.6559648880287531, + "grad_norm": 1.8657688929077616, + "learning_rate": 5.591601757335694e-06, + "loss": 0.3581, + "step": 37962 + }, + { + "epoch": 0.655982167542162, + "grad_norm": 2.0079178354352223, + "learning_rate": 5.591099430771794e-06, + "loss": 0.5246, + "step": 37963 + }, + { + "epoch": 0.6559994470555709, + "grad_norm": 1.5779088904212377, + "learning_rate": 5.590597118016903e-06, + "loss": 0.6631, + "step": 37964 + }, + { + "epoch": 0.6560167265689798, + "grad_norm": 0.9886153699285714, + "learning_rate": 5.5900948190725975e-06, + "loss": 0.5172, + "step": 37965 + }, + { + "epoch": 0.6560340060823887, + "grad_norm": 1.6052759243517407, + "learning_rate": 5.589592533940454e-06, + "loss": 0.2535, + "step": 37966 + }, + { + "epoch": 0.6560512855957976, + "grad_norm": 1.1378166136613306, + "learning_rate": 5.5890902626220365e-06, + "loss": 0.7436, + "step": 37967 + }, + { + "epoch": 0.6560685651092065, + "grad_norm": 0.8976241712937123, + "learning_rate": 5.588588005118928e-06, + "loss": 0.4757, + "step": 37968 + }, + { + "epoch": 0.6560858446226154, + "grad_norm": 0.5587444323703998, + "learning_rate": 5.5880857614326935e-06, + "loss": 0.4548, + "step": 37969 + }, + { + "epoch": 0.6561031241360243, + "grad_norm": 0.8981489769947834, + "learning_rate": 5.587583531564909e-06, + "loss": 0.3389, + "step": 37970 + }, + { + "epoch": 0.6561204036494332, + "grad_norm": 1.5131053763309017, + "learning_rate": 5.587081315517149e-06, + "loss": 0.3553, + "step": 37971 + }, + { + "epoch": 0.6561376831628422, + "grad_norm": 0.9879279653458789, + "learning_rate": 5.5865791132909905e-06, + "loss": 0.3458, + "step": 37972 + }, + { + "epoch": 0.6561549626762511, + "grad_norm": 1.6515971436530335, + "learning_rate": 5.586076924887997e-06, + "loss": 0.2781, + "step": 37973 + }, + { + "epoch": 0.65617224218966, + "grad_norm": 1.3637123604933323, + "learning_rate": 5.585574750309749e-06, + "loss": 0.3436, + "step": 37974 + }, + { + "epoch": 0.6561895217030689, + "grad_norm": 1.0002632673410947, + "learning_rate": 5.585072589557813e-06, + "loss": 0.6167, + "step": 37975 + }, + { + "epoch": 0.6562068012164778, + "grad_norm": 1.1714013596076738, + "learning_rate": 5.584570442633765e-06, + "loss": 0.4848, + "step": 37976 + }, + { + "epoch": 0.6562240807298867, + "grad_norm": 1.0339774319097625, + "learning_rate": 5.584068309539182e-06, + "loss": 0.3458, + "step": 37977 + }, + { + "epoch": 0.6562413602432956, + "grad_norm": 1.298523199109711, + "learning_rate": 5.5835661902756285e-06, + "loss": 0.4113, + "step": 37978 + }, + { + "epoch": 0.6562586397567044, + "grad_norm": 1.8818202514169529, + "learning_rate": 5.583064084844686e-06, + "loss": 0.4613, + "step": 37979 + }, + { + "epoch": 0.6562759192701133, + "grad_norm": 0.9790385068531812, + "learning_rate": 5.582561993247918e-06, + "loss": 0.2997, + "step": 37980 + }, + { + "epoch": 0.6562931987835222, + "grad_norm": 1.2317570832043694, + "learning_rate": 5.582059915486906e-06, + "loss": 0.3754, + "step": 37981 + }, + { + "epoch": 0.6563104782969311, + "grad_norm": 1.3713721294911474, + "learning_rate": 5.581557851563213e-06, + "loss": 0.5006, + "step": 37982 + }, + { + "epoch": 0.65632775781034, + "grad_norm": 1.0390642524379843, + "learning_rate": 5.581055801478418e-06, + "loss": 0.4071, + "step": 37983 + }, + { + "epoch": 0.6563450373237489, + "grad_norm": 1.4183534350525646, + "learning_rate": 5.580553765234091e-06, + "loss": 0.4312, + "step": 37984 + }, + { + "epoch": 0.6563623168371578, + "grad_norm": 1.055414115075911, + "learning_rate": 5.58005174283181e-06, + "loss": 0.3281, + "step": 37985 + }, + { + "epoch": 0.6563795963505668, + "grad_norm": 0.8119794935380178, + "learning_rate": 5.57954973427314e-06, + "loss": 0.1874, + "step": 37986 + }, + { + "epoch": 0.6563968758639757, + "grad_norm": 1.2337434411501396, + "learning_rate": 5.5790477395596575e-06, + "loss": 0.217, + "step": 37987 + }, + { + "epoch": 0.6564141553773846, + "grad_norm": 1.1281244850676249, + "learning_rate": 5.578545758692931e-06, + "loss": 0.3064, + "step": 37988 + }, + { + "epoch": 0.6564314348907935, + "grad_norm": 1.2208631400826089, + "learning_rate": 5.578043791674535e-06, + "loss": 0.4332, + "step": 37989 + }, + { + "epoch": 0.6564487144042024, + "grad_norm": 1.0657454693642767, + "learning_rate": 5.577541838506046e-06, + "loss": 0.3247, + "step": 37990 + }, + { + "epoch": 0.6564659939176113, + "grad_norm": 1.369607732661547, + "learning_rate": 5.577039899189027e-06, + "loss": 0.379, + "step": 37991 + }, + { + "epoch": 0.6564832734310202, + "grad_norm": 1.042000381795732, + "learning_rate": 5.576537973725061e-06, + "loss": 0.1908, + "step": 37992 + }, + { + "epoch": 0.6565005529444291, + "grad_norm": 0.7984157052341816, + "learning_rate": 5.576036062115709e-06, + "loss": 0.2884, + "step": 37993 + }, + { + "epoch": 0.656517832457838, + "grad_norm": 1.2614899965610398, + "learning_rate": 5.57553416436255e-06, + "loss": 0.5622, + "step": 37994 + }, + { + "epoch": 0.6565351119712469, + "grad_norm": 1.0230219452144507, + "learning_rate": 5.5750322804671516e-06, + "loss": 0.4475, + "step": 37995 + }, + { + "epoch": 0.6565523914846558, + "grad_norm": 0.8418148070613972, + "learning_rate": 5.574530410431094e-06, + "loss": 0.2803, + "step": 37996 + }, + { + "epoch": 0.6565696709980647, + "grad_norm": 0.5375659048374772, + "learning_rate": 5.574028554255943e-06, + "loss": 0.6619, + "step": 37997 + }, + { + "epoch": 0.6565869505114736, + "grad_norm": 1.1534997844208295, + "learning_rate": 5.573526711943268e-06, + "loss": 0.3148, + "step": 37998 + }, + { + "epoch": 0.6566042300248826, + "grad_norm": 0.8657885524803287, + "learning_rate": 5.573024883494642e-06, + "loss": 0.4108, + "step": 37999 + }, + { + "epoch": 0.6566215095382913, + "grad_norm": 1.4259551315099013, + "learning_rate": 5.572523068911639e-06, + "loss": 0.4588, + "step": 38000 + }, + { + "epoch": 0.6566387890517003, + "grad_norm": 1.175370033208945, + "learning_rate": 5.5720212681958355e-06, + "loss": 0.3792, + "step": 38001 + }, + { + "epoch": 0.6566560685651092, + "grad_norm": 1.609573633745889, + "learning_rate": 5.571519481348793e-06, + "loss": 0.5063, + "step": 38002 + }, + { + "epoch": 0.6566733480785181, + "grad_norm": 1.336919499535204, + "learning_rate": 5.571017708372091e-06, + "loss": 0.5031, + "step": 38003 + }, + { + "epoch": 0.656690627591927, + "grad_norm": 1.0715415323116588, + "learning_rate": 5.570515949267297e-06, + "loss": 0.5778, + "step": 38004 + }, + { + "epoch": 0.6567079071053359, + "grad_norm": 1.0569761477945367, + "learning_rate": 5.570014204035981e-06, + "loss": 0.4682, + "step": 38005 + }, + { + "epoch": 0.6567251866187448, + "grad_norm": 1.2323713048676184, + "learning_rate": 5.569512472679718e-06, + "loss": 0.3117, + "step": 38006 + }, + { + "epoch": 0.6567424661321537, + "grad_norm": 1.3608477055651207, + "learning_rate": 5.5690107552000824e-06, + "loss": 0.3391, + "step": 38007 + }, + { + "epoch": 0.6567597456455626, + "grad_norm": 0.9687566838870791, + "learning_rate": 5.568509051598643e-06, + "loss": 0.2486, + "step": 38008 + }, + { + "epoch": 0.6567770251589715, + "grad_norm": 1.0747071635900705, + "learning_rate": 5.5680073618769635e-06, + "loss": 0.2543, + "step": 38009 + }, + { + "epoch": 0.6567943046723804, + "grad_norm": 1.4072593509227636, + "learning_rate": 5.567505686036624e-06, + "loss": 0.3806, + "step": 38010 + }, + { + "epoch": 0.6568115841857893, + "grad_norm": 1.0630271617394622, + "learning_rate": 5.567004024079192e-06, + "loss": 0.3853, + "step": 38011 + }, + { + "epoch": 0.6568288636991982, + "grad_norm": 2.1470071428757653, + "learning_rate": 5.566502376006246e-06, + "loss": 0.4182, + "step": 38012 + }, + { + "epoch": 0.6568461432126071, + "grad_norm": 0.9751756960432016, + "learning_rate": 5.566000741819347e-06, + "loss": 0.3406, + "step": 38013 + }, + { + "epoch": 0.656863422726016, + "grad_norm": 1.1691223634527148, + "learning_rate": 5.565499121520075e-06, + "loss": 0.3332, + "step": 38014 + }, + { + "epoch": 0.656880702239425, + "grad_norm": 1.8694232543689777, + "learning_rate": 5.564997515109991e-06, + "loss": 0.4894, + "step": 38015 + }, + { + "epoch": 0.6568979817528339, + "grad_norm": 1.332008794220042, + "learning_rate": 5.564495922590673e-06, + "loss": 0.3266, + "step": 38016 + }, + { + "epoch": 0.6569152612662428, + "grad_norm": 0.9096995586472113, + "learning_rate": 5.563994343963691e-06, + "loss": 0.4011, + "step": 38017 + }, + { + "epoch": 0.6569325407796517, + "grad_norm": 1.0400389085617006, + "learning_rate": 5.5634927792306195e-06, + "loss": 0.5658, + "step": 38018 + }, + { + "epoch": 0.6569498202930606, + "grad_norm": 0.8182647912249105, + "learning_rate": 5.5629912283930266e-06, + "loss": 0.4104, + "step": 38019 + }, + { + "epoch": 0.6569670998064695, + "grad_norm": 1.2928190921881701, + "learning_rate": 5.562489691452477e-06, + "loss": 0.4938, + "step": 38020 + }, + { + "epoch": 0.6569843793198783, + "grad_norm": 0.954852074816255, + "learning_rate": 5.561988168410552e-06, + "loss": 0.2169, + "step": 38021 + }, + { + "epoch": 0.6570016588332872, + "grad_norm": 0.9928267308880268, + "learning_rate": 5.5614866592688136e-06, + "loss": 0.2584, + "step": 38022 + }, + { + "epoch": 0.6570189383466961, + "grad_norm": 2.276488434416718, + "learning_rate": 5.5609851640288344e-06, + "loss": 0.6407, + "step": 38023 + }, + { + "epoch": 0.657036217860105, + "grad_norm": 1.9405340398424233, + "learning_rate": 5.560483682692188e-06, + "loss": 0.6601, + "step": 38024 + }, + { + "epoch": 0.6570534973735139, + "grad_norm": 1.4447005457326203, + "learning_rate": 5.559982215260449e-06, + "loss": 0.3919, + "step": 38025 + }, + { + "epoch": 0.6570707768869228, + "grad_norm": 1.2819023124550617, + "learning_rate": 5.559480761735179e-06, + "loss": 0.5167, + "step": 38026 + }, + { + "epoch": 0.6570880564003317, + "grad_norm": 2.0217664518000014, + "learning_rate": 5.558979322117957e-06, + "loss": 0.3796, + "step": 38027 + }, + { + "epoch": 0.6571053359137407, + "grad_norm": 1.394276565470606, + "learning_rate": 5.558477896410344e-06, + "loss": 0.422, + "step": 38028 + }, + { + "epoch": 0.6571226154271496, + "grad_norm": 1.544995458988023, + "learning_rate": 5.557976484613916e-06, + "loss": 0.3037, + "step": 38029 + }, + { + "epoch": 0.6571398949405585, + "grad_norm": 1.2943240990020821, + "learning_rate": 5.557475086730248e-06, + "loss": 0.4845, + "step": 38030 + }, + { + "epoch": 0.6571571744539674, + "grad_norm": 0.9477397819053066, + "learning_rate": 5.556973702760901e-06, + "loss": 0.3751, + "step": 38031 + }, + { + "epoch": 0.6571744539673763, + "grad_norm": 1.1291509263678443, + "learning_rate": 5.556472332707455e-06, + "loss": 0.2584, + "step": 38032 + }, + { + "epoch": 0.6571917334807852, + "grad_norm": 0.702110282390278, + "learning_rate": 5.55597097657147e-06, + "loss": 0.8156, + "step": 38033 + }, + { + "epoch": 0.6572090129941941, + "grad_norm": 1.069780602445543, + "learning_rate": 5.555469634354521e-06, + "loss": 0.549, + "step": 38034 + }, + { + "epoch": 0.657226292507603, + "grad_norm": 1.5321673730902945, + "learning_rate": 5.55496830605818e-06, + "loss": 0.3301, + "step": 38035 + }, + { + "epoch": 0.6572435720210119, + "grad_norm": 1.5207295845997229, + "learning_rate": 5.55446699168402e-06, + "loss": 0.4204, + "step": 38036 + }, + { + "epoch": 0.6572608515344208, + "grad_norm": 1.1195904890186013, + "learning_rate": 5.553965691233604e-06, + "loss": 0.3419, + "step": 38037 + }, + { + "epoch": 0.6572781310478297, + "grad_norm": 1.1690001610491587, + "learning_rate": 5.553464404708509e-06, + "loss": 0.2959, + "step": 38038 + }, + { + "epoch": 0.6572954105612386, + "grad_norm": 1.3399812660312882, + "learning_rate": 5.552963132110296e-06, + "loss": 0.4155, + "step": 38039 + }, + { + "epoch": 0.6573126900746475, + "grad_norm": 1.3285598618687053, + "learning_rate": 5.5524618734405425e-06, + "loss": 0.3591, + "step": 38040 + }, + { + "epoch": 0.6573299695880565, + "grad_norm": 0.9081013314754038, + "learning_rate": 5.551960628700818e-06, + "loss": 0.3685, + "step": 38041 + }, + { + "epoch": 0.6573472491014652, + "grad_norm": 1.2332158496148593, + "learning_rate": 5.551459397892689e-06, + "loss": 0.4311, + "step": 38042 + }, + { + "epoch": 0.6573645286148742, + "grad_norm": 0.6954049340298273, + "learning_rate": 5.5509581810177315e-06, + "loss": 0.4266, + "step": 38043 + }, + { + "epoch": 0.6573818081282831, + "grad_norm": 1.1152724873270596, + "learning_rate": 5.550456978077505e-06, + "loss": 0.334, + "step": 38044 + }, + { + "epoch": 0.657399087641692, + "grad_norm": 1.033263159799684, + "learning_rate": 5.549955789073587e-06, + "loss": 0.4359, + "step": 38045 + }, + { + "epoch": 0.6574163671551009, + "grad_norm": 1.2110489945474454, + "learning_rate": 5.549454614007545e-06, + "loss": 0.4064, + "step": 38046 + }, + { + "epoch": 0.6574336466685098, + "grad_norm": 1.2310882211473761, + "learning_rate": 5.548953452880954e-06, + "loss": 0.3628, + "step": 38047 + }, + { + "epoch": 0.6574509261819187, + "grad_norm": 1.21270899542901, + "learning_rate": 5.548452305695375e-06, + "loss": 0.2895, + "step": 38048 + }, + { + "epoch": 0.6574682056953276, + "grad_norm": 1.1314186000704316, + "learning_rate": 5.547951172452384e-06, + "loss": 0.3634, + "step": 38049 + }, + { + "epoch": 0.6574854852087365, + "grad_norm": 1.0974612377726076, + "learning_rate": 5.547450053153546e-06, + "loss": 0.3926, + "step": 38050 + }, + { + "epoch": 0.6575027647221454, + "grad_norm": 1.0688130811999614, + "learning_rate": 5.5469489478004326e-06, + "loss": 0.2564, + "step": 38051 + }, + { + "epoch": 0.6575200442355543, + "grad_norm": 0.6573118556401949, + "learning_rate": 5.5464478563946165e-06, + "loss": 0.6547, + "step": 38052 + }, + { + "epoch": 0.6575373237489632, + "grad_norm": 1.134955303502123, + "learning_rate": 5.545946778937661e-06, + "loss": 0.2367, + "step": 38053 + }, + { + "epoch": 0.6575546032623721, + "grad_norm": 1.5611828885944614, + "learning_rate": 5.545445715431143e-06, + "loss": 0.3755, + "step": 38054 + }, + { + "epoch": 0.657571882775781, + "grad_norm": 0.8526956860236806, + "learning_rate": 5.544944665876624e-06, + "loss": 0.3676, + "step": 38055 + }, + { + "epoch": 0.65758916228919, + "grad_norm": 1.5997140878277458, + "learning_rate": 5.544443630275675e-06, + "loss": 0.3986, + "step": 38056 + }, + { + "epoch": 0.6576064418025989, + "grad_norm": 2.784918806607725, + "learning_rate": 5.543942608629869e-06, + "loss": 0.3137, + "step": 38057 + }, + { + "epoch": 0.6576237213160078, + "grad_norm": 1.6367377040288738, + "learning_rate": 5.543441600940777e-06, + "loss": 0.3086, + "step": 38058 + }, + { + "epoch": 0.6576410008294167, + "grad_norm": 0.8108477969721459, + "learning_rate": 5.542940607209964e-06, + "loss": 0.2388, + "step": 38059 + }, + { + "epoch": 0.6576582803428256, + "grad_norm": 0.9586719639348267, + "learning_rate": 5.5424396274389955e-06, + "loss": 0.3761, + "step": 38060 + }, + { + "epoch": 0.6576755598562345, + "grad_norm": 1.8138457486330435, + "learning_rate": 5.5419386616294494e-06, + "loss": 0.5026, + "step": 38061 + }, + { + "epoch": 0.6576928393696434, + "grad_norm": 0.8729756041672284, + "learning_rate": 5.541437709782887e-06, + "loss": 0.3363, + "step": 38062 + }, + { + "epoch": 0.6577101188830523, + "grad_norm": 1.1338612689040155, + "learning_rate": 5.540936771900879e-06, + "loss": 0.4572, + "step": 38063 + }, + { + "epoch": 0.6577273983964611, + "grad_norm": 1.5517920097521378, + "learning_rate": 5.5404358479849965e-06, + "loss": 0.3946, + "step": 38064 + }, + { + "epoch": 0.65774467790987, + "grad_norm": 1.2353489670067956, + "learning_rate": 5.539934938036812e-06, + "loss": 0.392, + "step": 38065 + }, + { + "epoch": 0.6577619574232789, + "grad_norm": 1.3485113261384885, + "learning_rate": 5.539434042057885e-06, + "loss": 0.3576, + "step": 38066 + }, + { + "epoch": 0.6577792369366878, + "grad_norm": 1.0636159109233068, + "learning_rate": 5.538933160049795e-06, + "loss": 0.3963, + "step": 38067 + }, + { + "epoch": 0.6577965164500967, + "grad_norm": 1.5565151776212935, + "learning_rate": 5.538432292014102e-06, + "loss": 0.4818, + "step": 38068 + }, + { + "epoch": 0.6578137959635056, + "grad_norm": 0.9706086552434386, + "learning_rate": 5.537931437952377e-06, + "loss": 0.1935, + "step": 38069 + }, + { + "epoch": 0.6578310754769146, + "grad_norm": 1.4507238251808847, + "learning_rate": 5.537430597866193e-06, + "loss": 0.5186, + "step": 38070 + }, + { + "epoch": 0.6578483549903235, + "grad_norm": 1.0591501631596938, + "learning_rate": 5.536929771757114e-06, + "loss": 0.4133, + "step": 38071 + }, + { + "epoch": 0.6578656345037324, + "grad_norm": 0.8326534237886746, + "learning_rate": 5.536428959626712e-06, + "loss": 0.2028, + "step": 38072 + }, + { + "epoch": 0.6578829140171413, + "grad_norm": 1.8621129685499562, + "learning_rate": 5.535928161476551e-06, + "loss": 0.3518, + "step": 38073 + }, + { + "epoch": 0.6579001935305502, + "grad_norm": 1.0476358258844212, + "learning_rate": 5.5354273773082015e-06, + "loss": 0.3312, + "step": 38074 + }, + { + "epoch": 0.6579174730439591, + "grad_norm": 1.1847209371056326, + "learning_rate": 5.534926607123232e-06, + "loss": 0.449, + "step": 38075 + }, + { + "epoch": 0.657934752557368, + "grad_norm": 1.0276749428799128, + "learning_rate": 5.534425850923217e-06, + "loss": 0.4921, + "step": 38076 + }, + { + "epoch": 0.6579520320707769, + "grad_norm": 1.3580246304833739, + "learning_rate": 5.533925108709716e-06, + "loss": 0.4199, + "step": 38077 + }, + { + "epoch": 0.6579693115841858, + "grad_norm": 1.2396762950501843, + "learning_rate": 5.533424380484303e-06, + "loss": 0.4837, + "step": 38078 + }, + { + "epoch": 0.6579865910975947, + "grad_norm": 1.1300207867270884, + "learning_rate": 5.5329236662485416e-06, + "loss": 0.3012, + "step": 38079 + }, + { + "epoch": 0.6580038706110036, + "grad_norm": 1.4587024776374133, + "learning_rate": 5.532422966004003e-06, + "loss": 0.525, + "step": 38080 + }, + { + "epoch": 0.6580211501244125, + "grad_norm": 1.0727011797876225, + "learning_rate": 5.531922279752259e-06, + "loss": 0.3524, + "step": 38081 + }, + { + "epoch": 0.6580384296378214, + "grad_norm": 0.9411194211146124, + "learning_rate": 5.531421607494869e-06, + "loss": 0.2469, + "step": 38082 + }, + { + "epoch": 0.6580557091512304, + "grad_norm": 1.0797703267247825, + "learning_rate": 5.530920949233411e-06, + "loss": 0.4733, + "step": 38083 + }, + { + "epoch": 0.6580729886646393, + "grad_norm": 1.0765467556897683, + "learning_rate": 5.530420304969445e-06, + "loss": 0.4014, + "step": 38084 + }, + { + "epoch": 0.658090268178048, + "grad_norm": 1.3378418933858158, + "learning_rate": 5.529919674704541e-06, + "loss": 0.3515, + "step": 38085 + }, + { + "epoch": 0.658107547691457, + "grad_norm": 1.418017455221153, + "learning_rate": 5.5294190584402685e-06, + "loss": 0.3827, + "step": 38086 + }, + { + "epoch": 0.6581248272048659, + "grad_norm": 1.0491599424702736, + "learning_rate": 5.5289184561782e-06, + "loss": 0.4658, + "step": 38087 + }, + { + "epoch": 0.6581421067182748, + "grad_norm": 0.9569549253692711, + "learning_rate": 5.528417867919894e-06, + "loss": 0.2616, + "step": 38088 + }, + { + "epoch": 0.6581593862316837, + "grad_norm": 0.8766591463724147, + "learning_rate": 5.527917293666927e-06, + "loss": 0.1958, + "step": 38089 + }, + { + "epoch": 0.6581766657450926, + "grad_norm": 1.3878195867842644, + "learning_rate": 5.52741673342086e-06, + "loss": 0.4033, + "step": 38090 + }, + { + "epoch": 0.6581939452585015, + "grad_norm": 1.5160028614103689, + "learning_rate": 5.526916187183262e-06, + "loss": 0.3085, + "step": 38091 + }, + { + "epoch": 0.6582112247719104, + "grad_norm": 1.5356855575987045, + "learning_rate": 5.526415654955708e-06, + "loss": 0.2854, + "step": 38092 + }, + { + "epoch": 0.6582285042853193, + "grad_norm": 1.0720481689833465, + "learning_rate": 5.525915136739754e-06, + "loss": 0.3241, + "step": 38093 + }, + { + "epoch": 0.6582457837987282, + "grad_norm": 1.285813883713131, + "learning_rate": 5.525414632536981e-06, + "loss": 0.3928, + "step": 38094 + }, + { + "epoch": 0.6582630633121371, + "grad_norm": 2.8909741622962057, + "learning_rate": 5.524914142348943e-06, + "loss": 0.4972, + "step": 38095 + }, + { + "epoch": 0.658280342825546, + "grad_norm": 0.9356969928618728, + "learning_rate": 5.524413666177216e-06, + "loss": 0.3412, + "step": 38096 + }, + { + "epoch": 0.658297622338955, + "grad_norm": 0.886060131052495, + "learning_rate": 5.5239132040233635e-06, + "loss": 0.3091, + "step": 38097 + }, + { + "epoch": 0.6583149018523639, + "grad_norm": 1.1882947108081938, + "learning_rate": 5.5234127558889595e-06, + "loss": 0.5991, + "step": 38098 + }, + { + "epoch": 0.6583321813657728, + "grad_norm": 1.0369694221650412, + "learning_rate": 5.522912321775563e-06, + "loss": 0.4063, + "step": 38099 + }, + { + "epoch": 0.6583494608791817, + "grad_norm": 1.1636362330673893, + "learning_rate": 5.52241190168475e-06, + "loss": 0.3557, + "step": 38100 + }, + { + "epoch": 0.6583667403925906, + "grad_norm": 1.0824003891230387, + "learning_rate": 5.521911495618082e-06, + "loss": 0.8424, + "step": 38101 + }, + { + "epoch": 0.6583840199059995, + "grad_norm": 0.8957601674099345, + "learning_rate": 5.5214111035771245e-06, + "loss": 0.3569, + "step": 38102 + }, + { + "epoch": 0.6584012994194084, + "grad_norm": 1.360850210373571, + "learning_rate": 5.520910725563448e-06, + "loss": 0.3625, + "step": 38103 + }, + { + "epoch": 0.6584185789328173, + "grad_norm": 1.305937883817618, + "learning_rate": 5.520410361578619e-06, + "loss": 0.3081, + "step": 38104 + }, + { + "epoch": 0.6584358584462262, + "grad_norm": 1.5069305639574344, + "learning_rate": 5.5199100116242085e-06, + "loss": 0.4362, + "step": 38105 + }, + { + "epoch": 0.658453137959635, + "grad_norm": 0.8177220200907485, + "learning_rate": 5.519409675701777e-06, + "loss": 0.4501, + "step": 38106 + }, + { + "epoch": 0.6584704174730439, + "grad_norm": 1.2517907943062203, + "learning_rate": 5.518909353812899e-06, + "loss": 0.5154, + "step": 38107 + }, + { + "epoch": 0.6584876969864528, + "grad_norm": 1.0132903762733525, + "learning_rate": 5.5184090459591344e-06, + "loss": 0.4705, + "step": 38108 + }, + { + "epoch": 0.6585049764998617, + "grad_norm": 0.965985742081418, + "learning_rate": 5.517908752142051e-06, + "loss": 0.3845, + "step": 38109 + }, + { + "epoch": 0.6585222560132706, + "grad_norm": 1.671454935941037, + "learning_rate": 5.517408472363224e-06, + "loss": 0.3957, + "step": 38110 + }, + { + "epoch": 0.6585395355266795, + "grad_norm": 1.1160271031331006, + "learning_rate": 5.51690820662421e-06, + "loss": 0.2429, + "step": 38111 + }, + { + "epoch": 0.6585568150400885, + "grad_norm": 1.3887602025345074, + "learning_rate": 5.516407954926584e-06, + "loss": 0.329, + "step": 38112 + }, + { + "epoch": 0.6585740945534974, + "grad_norm": 1.5054550879965127, + "learning_rate": 5.515907717271904e-06, + "loss": 0.5473, + "step": 38113 + }, + { + "epoch": 0.6585913740669063, + "grad_norm": 0.9952275033337721, + "learning_rate": 5.515407493661743e-06, + "loss": 0.4804, + "step": 38114 + }, + { + "epoch": 0.6586086535803152, + "grad_norm": 0.8781399519915992, + "learning_rate": 5.5149072840976655e-06, + "loss": 0.3623, + "step": 38115 + }, + { + "epoch": 0.6586259330937241, + "grad_norm": 1.142602242000028, + "learning_rate": 5.514407088581244e-06, + "loss": 0.2959, + "step": 38116 + }, + { + "epoch": 0.658643212607133, + "grad_norm": 1.6121641500083297, + "learning_rate": 5.513906907114037e-06, + "loss": 0.6111, + "step": 38117 + }, + { + "epoch": 0.6586604921205419, + "grad_norm": 1.370726911686675, + "learning_rate": 5.513406739697618e-06, + "loss": 0.4244, + "step": 38118 + }, + { + "epoch": 0.6586777716339508, + "grad_norm": 1.152625728011088, + "learning_rate": 5.512906586333545e-06, + "loss": 0.5728, + "step": 38119 + }, + { + "epoch": 0.6586950511473597, + "grad_norm": 0.6827347955623804, + "learning_rate": 5.512406447023391e-06, + "loss": 0.2976, + "step": 38120 + }, + { + "epoch": 0.6587123306607686, + "grad_norm": 0.4940845368538148, + "learning_rate": 5.511906321768724e-06, + "loss": 0.6615, + "step": 38121 + }, + { + "epoch": 0.6587296101741775, + "grad_norm": 0.984497987379436, + "learning_rate": 5.511406210571104e-06, + "loss": 0.4605, + "step": 38122 + }, + { + "epoch": 0.6587468896875864, + "grad_norm": 1.1173383757197777, + "learning_rate": 5.510906113432105e-06, + "loss": 0.414, + "step": 38123 + }, + { + "epoch": 0.6587641692009953, + "grad_norm": 1.7642927743920471, + "learning_rate": 5.5104060303532854e-06, + "loss": 0.4524, + "step": 38124 + }, + { + "epoch": 0.6587814487144042, + "grad_norm": 1.2287467428074244, + "learning_rate": 5.509905961336215e-06, + "loss": 0.4661, + "step": 38125 + }, + { + "epoch": 0.6587987282278132, + "grad_norm": 0.5850759792793325, + "learning_rate": 5.50940590638246e-06, + "loss": 0.6326, + "step": 38126 + }, + { + "epoch": 0.658816007741222, + "grad_norm": 0.7742861653300606, + "learning_rate": 5.508905865493593e-06, + "loss": 0.3943, + "step": 38127 + }, + { + "epoch": 0.6588332872546309, + "grad_norm": 1.3206380802767719, + "learning_rate": 5.508405838671167e-06, + "loss": 0.6311, + "step": 38128 + }, + { + "epoch": 0.6588505667680398, + "grad_norm": 0.8357719249311298, + "learning_rate": 5.50790582591676e-06, + "loss": 0.2829, + "step": 38129 + }, + { + "epoch": 0.6588678462814487, + "grad_norm": 1.039114011039659, + "learning_rate": 5.507405827231931e-06, + "loss": 0.3295, + "step": 38130 + }, + { + "epoch": 0.6588851257948576, + "grad_norm": 1.430940295573806, + "learning_rate": 5.506905842618247e-06, + "loss": 0.3871, + "step": 38131 + }, + { + "epoch": 0.6589024053082665, + "grad_norm": 1.5758086049972406, + "learning_rate": 5.50640587207728e-06, + "loss": 0.4797, + "step": 38132 + }, + { + "epoch": 0.6589196848216754, + "grad_norm": 1.0652302753024765, + "learning_rate": 5.5059059156105876e-06, + "loss": 0.6247, + "step": 38133 + }, + { + "epoch": 0.6589369643350843, + "grad_norm": 0.6392284390700071, + "learning_rate": 5.505405973219742e-06, + "loss": 0.3152, + "step": 38134 + }, + { + "epoch": 0.6589542438484932, + "grad_norm": 1.71159840212465, + "learning_rate": 5.504906044906304e-06, + "loss": 0.4367, + "step": 38135 + }, + { + "epoch": 0.6589715233619021, + "grad_norm": 1.1476680212743677, + "learning_rate": 5.504406130671841e-06, + "loss": 0.3395, + "step": 38136 + }, + { + "epoch": 0.658988802875311, + "grad_norm": 1.492825662187796, + "learning_rate": 5.50390623051792e-06, + "loss": 0.2896, + "step": 38137 + }, + { + "epoch": 0.6590060823887199, + "grad_norm": 1.368498468280718, + "learning_rate": 5.503406344446111e-06, + "loss": 0.3944, + "step": 38138 + }, + { + "epoch": 0.6590233619021288, + "grad_norm": 1.3428685815469068, + "learning_rate": 5.50290647245797e-06, + "loss": 0.4663, + "step": 38139 + }, + { + "epoch": 0.6590406414155378, + "grad_norm": 1.0656955172015246, + "learning_rate": 5.502406614555071e-06, + "loss": 0.3254, + "step": 38140 + }, + { + "epoch": 0.6590579209289467, + "grad_norm": 1.0666899616799044, + "learning_rate": 5.501906770738978e-06, + "loss": 0.5117, + "step": 38141 + }, + { + "epoch": 0.6590752004423556, + "grad_norm": 1.2102854764817474, + "learning_rate": 5.50140694101125e-06, + "loss": 0.3591, + "step": 38142 + }, + { + "epoch": 0.6590924799557645, + "grad_norm": 0.8839057766895896, + "learning_rate": 5.500907125373458e-06, + "loss": 0.3518, + "step": 38143 + }, + { + "epoch": 0.6591097594691734, + "grad_norm": 0.7737422984858446, + "learning_rate": 5.500407323827166e-06, + "loss": 0.3487, + "step": 38144 + }, + { + "epoch": 0.6591270389825823, + "grad_norm": 1.2710791563085901, + "learning_rate": 5.4999075363739454e-06, + "loss": 0.3637, + "step": 38145 + }, + { + "epoch": 0.6591443184959912, + "grad_norm": 1.4280004040602574, + "learning_rate": 5.4994077630153505e-06, + "loss": 0.4695, + "step": 38146 + }, + { + "epoch": 0.6591615980094001, + "grad_norm": 1.3140068582240962, + "learning_rate": 5.498908003752958e-06, + "loss": 0.3718, + "step": 38147 + }, + { + "epoch": 0.6591788775228089, + "grad_norm": 1.1195363257627715, + "learning_rate": 5.4984082585883235e-06, + "loss": 0.2905, + "step": 38148 + }, + { + "epoch": 0.6591961570362178, + "grad_norm": 0.8813620811189854, + "learning_rate": 5.497908527523016e-06, + "loss": 0.3687, + "step": 38149 + }, + { + "epoch": 0.6592134365496267, + "grad_norm": 1.1548336294664365, + "learning_rate": 5.497408810558601e-06, + "loss": 0.4177, + "step": 38150 + }, + { + "epoch": 0.6592307160630356, + "grad_norm": 1.04816275985925, + "learning_rate": 5.496909107696648e-06, + "loss": 0.6699, + "step": 38151 + }, + { + "epoch": 0.6592479955764445, + "grad_norm": 1.1045779584814588, + "learning_rate": 5.496409418938719e-06, + "loss": 0.3286, + "step": 38152 + }, + { + "epoch": 0.6592652750898534, + "grad_norm": 1.1650056025884468, + "learning_rate": 5.495909744286372e-06, + "loss": 0.4048, + "step": 38153 + }, + { + "epoch": 0.6592825546032623, + "grad_norm": 0.9983865096921793, + "learning_rate": 5.4954100837411794e-06, + "loss": 0.4676, + "step": 38154 + }, + { + "epoch": 0.6592998341166713, + "grad_norm": 0.9492753817769679, + "learning_rate": 5.494910437304703e-06, + "loss": 0.4557, + "step": 38155 + }, + { + "epoch": 0.6593171136300802, + "grad_norm": 0.5779828910569149, + "learning_rate": 5.494410804978516e-06, + "loss": 0.4564, + "step": 38156 + }, + { + "epoch": 0.6593343931434891, + "grad_norm": 1.2613489861345797, + "learning_rate": 5.493911186764172e-06, + "loss": 0.3456, + "step": 38157 + }, + { + "epoch": 0.659351672656898, + "grad_norm": 0.9521203315364923, + "learning_rate": 5.493411582663244e-06, + "loss": 0.1709, + "step": 38158 + }, + { + "epoch": 0.6593689521703069, + "grad_norm": 0.7143088182178114, + "learning_rate": 5.49291199267729e-06, + "loss": 0.6983, + "step": 38159 + }, + { + "epoch": 0.6593862316837158, + "grad_norm": 0.6972767809743359, + "learning_rate": 5.4924124168078774e-06, + "loss": 0.5581, + "step": 38160 + }, + { + "epoch": 0.6594035111971247, + "grad_norm": 0.9501577197732364, + "learning_rate": 5.491912855056571e-06, + "loss": 0.3887, + "step": 38161 + }, + { + "epoch": 0.6594207907105336, + "grad_norm": 1.4066449743552603, + "learning_rate": 5.491413307424942e-06, + "loss": 0.6039, + "step": 38162 + }, + { + "epoch": 0.6594380702239425, + "grad_norm": 1.024269442705078, + "learning_rate": 5.490913773914548e-06, + "loss": 0.3509, + "step": 38163 + }, + { + "epoch": 0.6594553497373514, + "grad_norm": 0.9916212284213577, + "learning_rate": 5.490414254526951e-06, + "loss": 0.4025, + "step": 38164 + }, + { + "epoch": 0.6594726292507603, + "grad_norm": 1.402910328893406, + "learning_rate": 5.489914749263719e-06, + "loss": 0.4035, + "step": 38165 + }, + { + "epoch": 0.6594899087641692, + "grad_norm": 1.168056579973727, + "learning_rate": 5.489415258126418e-06, + "loss": 0.3337, + "step": 38166 + }, + { + "epoch": 0.6595071882775781, + "grad_norm": 0.9696781262424407, + "learning_rate": 5.488915781116615e-06, + "loss": 0.3612, + "step": 38167 + }, + { + "epoch": 0.6595244677909871, + "grad_norm": 1.3476311244055377, + "learning_rate": 5.488416318235866e-06, + "loss": 0.2616, + "step": 38168 + }, + { + "epoch": 0.6595417473043959, + "grad_norm": 1.1385796788399065, + "learning_rate": 5.4879168694857434e-06, + "loss": 0.2102, + "step": 38169 + }, + { + "epoch": 0.6595590268178048, + "grad_norm": 1.1738209687366887, + "learning_rate": 5.4874174348678036e-06, + "loss": 0.4942, + "step": 38170 + }, + { + "epoch": 0.6595763063312137, + "grad_norm": 1.1958649946521398, + "learning_rate": 5.486918014383617e-06, + "loss": 0.4169, + "step": 38171 + }, + { + "epoch": 0.6595935858446226, + "grad_norm": 1.1192373858795286, + "learning_rate": 5.486418608034749e-06, + "loss": 0.4462, + "step": 38172 + }, + { + "epoch": 0.6596108653580315, + "grad_norm": 1.6042047764795446, + "learning_rate": 5.485919215822757e-06, + "loss": 0.4444, + "step": 38173 + }, + { + "epoch": 0.6596281448714404, + "grad_norm": 0.8743461425241019, + "learning_rate": 5.485419837749214e-06, + "loss": 0.2382, + "step": 38174 + }, + { + "epoch": 0.6596454243848493, + "grad_norm": 1.2845621966743812, + "learning_rate": 5.484920473815675e-06, + "loss": 0.3437, + "step": 38175 + }, + { + "epoch": 0.6596627038982582, + "grad_norm": 1.1786010398891957, + "learning_rate": 5.484421124023711e-06, + "loss": 0.3335, + "step": 38176 + }, + { + "epoch": 0.6596799834116671, + "grad_norm": 0.880004447212252, + "learning_rate": 5.483921788374876e-06, + "loss": 0.3196, + "step": 38177 + }, + { + "epoch": 0.659697262925076, + "grad_norm": 1.7556572688193957, + "learning_rate": 5.483422466870749e-06, + "loss": 0.3872, + "step": 38178 + }, + { + "epoch": 0.6597145424384849, + "grad_norm": 1.4131699799634674, + "learning_rate": 5.482923159512883e-06, + "loss": 0.381, + "step": 38179 + }, + { + "epoch": 0.6597318219518938, + "grad_norm": 1.1641551162763513, + "learning_rate": 5.482423866302848e-06, + "loss": 0.3501, + "step": 38180 + }, + { + "epoch": 0.6597491014653027, + "grad_norm": 1.7086660546288182, + "learning_rate": 5.481924587242201e-06, + "loss": 0.3386, + "step": 38181 + }, + { + "epoch": 0.6597663809787117, + "grad_norm": 1.5373021171049432, + "learning_rate": 5.4814253223325145e-06, + "loss": 0.3538, + "step": 38182 + }, + { + "epoch": 0.6597836604921206, + "grad_norm": 1.4403289531902839, + "learning_rate": 5.480926071575341e-06, + "loss": 0.4083, + "step": 38183 + }, + { + "epoch": 0.6598009400055295, + "grad_norm": 1.3223740498384635, + "learning_rate": 5.480426834972252e-06, + "loss": 0.2992, + "step": 38184 + }, + { + "epoch": 0.6598182195189384, + "grad_norm": 0.9441155916888867, + "learning_rate": 5.479927612524814e-06, + "loss": 0.3406, + "step": 38185 + }, + { + "epoch": 0.6598354990323473, + "grad_norm": 0.6600043852232211, + "learning_rate": 5.479428404234581e-06, + "loss": 0.7061, + "step": 38186 + }, + { + "epoch": 0.6598527785457562, + "grad_norm": 1.3649441846681132, + "learning_rate": 5.478929210103128e-06, + "loss": 0.4737, + "step": 38187 + }, + { + "epoch": 0.6598700580591651, + "grad_norm": 1.1337523219484271, + "learning_rate": 5.478430030132006e-06, + "loss": 0.3778, + "step": 38188 + }, + { + "epoch": 0.659887337572574, + "grad_norm": 1.179718171249153, + "learning_rate": 5.4779308643227855e-06, + "loss": 0.5914, + "step": 38189 + }, + { + "epoch": 0.6599046170859829, + "grad_norm": 0.8449134467518672, + "learning_rate": 5.477431712677029e-06, + "loss": 0.3493, + "step": 38190 + }, + { + "epoch": 0.6599218965993917, + "grad_norm": 0.9148780041519926, + "learning_rate": 5.476932575196305e-06, + "loss": 0.2542, + "step": 38191 + }, + { + "epoch": 0.6599391761128006, + "grad_norm": 1.3088554543754318, + "learning_rate": 5.476433451882167e-06, + "loss": 0.4213, + "step": 38192 + }, + { + "epoch": 0.6599564556262095, + "grad_norm": 1.0643945510026216, + "learning_rate": 5.475934342736187e-06, + "loss": 0.227, + "step": 38193 + }, + { + "epoch": 0.6599737351396184, + "grad_norm": 1.1646810095619868, + "learning_rate": 5.475435247759921e-06, + "loss": 0.3059, + "step": 38194 + }, + { + "epoch": 0.6599910146530273, + "grad_norm": 1.1139710874675048, + "learning_rate": 5.4749361669549365e-06, + "loss": 0.6683, + "step": 38195 + }, + { + "epoch": 0.6600082941664362, + "grad_norm": 1.1509242555761732, + "learning_rate": 5.474437100322798e-06, + "loss": 0.4463, + "step": 38196 + }, + { + "epoch": 0.6600255736798452, + "grad_norm": 0.7339210938303596, + "learning_rate": 5.473938047865064e-06, + "loss": 0.6696, + "step": 38197 + }, + { + "epoch": 0.6600428531932541, + "grad_norm": 1.5217529613595073, + "learning_rate": 5.473439009583304e-06, + "loss": 0.4359, + "step": 38198 + }, + { + "epoch": 0.660060132706663, + "grad_norm": 0.9308192395947238, + "learning_rate": 5.472939985479071e-06, + "loss": 0.4468, + "step": 38199 + }, + { + "epoch": 0.6600774122200719, + "grad_norm": 0.9666773347557087, + "learning_rate": 5.472440975553935e-06, + "loss": 0.3335, + "step": 38200 + }, + { + "epoch": 0.6600946917334808, + "grad_norm": 0.7223184782334501, + "learning_rate": 5.4719419798094586e-06, + "loss": 0.3587, + "step": 38201 + }, + { + "epoch": 0.6601119712468897, + "grad_norm": 1.319358493801433, + "learning_rate": 5.4714429982472076e-06, + "loss": 0.5523, + "step": 38202 + }, + { + "epoch": 0.6601292507602986, + "grad_norm": 0.7583894082675467, + "learning_rate": 5.470944030868741e-06, + "loss": 0.462, + "step": 38203 + }, + { + "epoch": 0.6601465302737075, + "grad_norm": 1.010865576431791, + "learning_rate": 5.470445077675619e-06, + "loss": 0.235, + "step": 38204 + }, + { + "epoch": 0.6601638097871164, + "grad_norm": 1.5684573495938006, + "learning_rate": 5.469946138669406e-06, + "loss": 0.3971, + "step": 38205 + }, + { + "epoch": 0.6601810893005253, + "grad_norm": 0.9265915750098423, + "learning_rate": 5.469447213851668e-06, + "loss": 0.2363, + "step": 38206 + }, + { + "epoch": 0.6601983688139342, + "grad_norm": 1.7932147006514505, + "learning_rate": 5.468948303223968e-06, + "loss": 0.4002, + "step": 38207 + }, + { + "epoch": 0.6602156483273431, + "grad_norm": 1.0383183840878114, + "learning_rate": 5.468449406787862e-06, + "loss": 0.2441, + "step": 38208 + }, + { + "epoch": 0.660232927840752, + "grad_norm": 0.9910059995636483, + "learning_rate": 5.4679505245449205e-06, + "loss": 0.3765, + "step": 38209 + }, + { + "epoch": 0.660250207354161, + "grad_norm": 0.8914263790629355, + "learning_rate": 5.467451656496701e-06, + "loss": 0.3349, + "step": 38210 + }, + { + "epoch": 0.6602674868675699, + "grad_norm": 1.2476889484275047, + "learning_rate": 5.466952802644765e-06, + "loss": 0.5196, + "step": 38211 + }, + { + "epoch": 0.6602847663809787, + "grad_norm": 1.2620564300211468, + "learning_rate": 5.466453962990677e-06, + "loss": 0.4878, + "step": 38212 + }, + { + "epoch": 0.6603020458943876, + "grad_norm": 1.369173726758702, + "learning_rate": 5.465955137536005e-06, + "loss": 0.3932, + "step": 38213 + }, + { + "epoch": 0.6603193254077965, + "grad_norm": 0.7027461888593639, + "learning_rate": 5.465456326282306e-06, + "loss": 0.5756, + "step": 38214 + }, + { + "epoch": 0.6603366049212054, + "grad_norm": 1.0092383666263582, + "learning_rate": 5.464957529231137e-06, + "loss": 0.5171, + "step": 38215 + }, + { + "epoch": 0.6603538844346143, + "grad_norm": 1.7479239627646497, + "learning_rate": 5.464458746384072e-06, + "loss": 0.4321, + "step": 38216 + }, + { + "epoch": 0.6603711639480232, + "grad_norm": 1.1652442651474697, + "learning_rate": 5.463959977742657e-06, + "loss": 0.6609, + "step": 38217 + }, + { + "epoch": 0.6603884434614321, + "grad_norm": 0.8025733815475103, + "learning_rate": 5.463461223308472e-06, + "loss": 0.3407, + "step": 38218 + }, + { + "epoch": 0.660405722974841, + "grad_norm": 0.8711253604495216, + "learning_rate": 5.462962483083069e-06, + "loss": 0.3379, + "step": 38219 + }, + { + "epoch": 0.6604230024882499, + "grad_norm": 1.1599149462155736, + "learning_rate": 5.462463757068015e-06, + "loss": 0.4779, + "step": 38220 + }, + { + "epoch": 0.6604402820016588, + "grad_norm": 0.8401314879167107, + "learning_rate": 5.461965045264865e-06, + "loss": 0.5188, + "step": 38221 + }, + { + "epoch": 0.6604575615150677, + "grad_norm": 0.9946772153030562, + "learning_rate": 5.4614663476751904e-06, + "loss": 0.2732, + "step": 38222 + }, + { + "epoch": 0.6604748410284766, + "grad_norm": 1.4155396091974461, + "learning_rate": 5.46096766430054e-06, + "loss": 0.4428, + "step": 38223 + }, + { + "epoch": 0.6604921205418856, + "grad_norm": 0.9524552298109165, + "learning_rate": 5.460468995142491e-06, + "loss": 0.3936, + "step": 38224 + }, + { + "epoch": 0.6605094000552945, + "grad_norm": 1.0364245656583555, + "learning_rate": 5.459970340202598e-06, + "loss": 0.3087, + "step": 38225 + }, + { + "epoch": 0.6605266795687034, + "grad_norm": 1.0596023308926603, + "learning_rate": 5.459471699482421e-06, + "loss": 0.2997, + "step": 38226 + }, + { + "epoch": 0.6605439590821123, + "grad_norm": 1.550389671189907, + "learning_rate": 5.458973072983526e-06, + "loss": 0.2234, + "step": 38227 + }, + { + "epoch": 0.6605612385955212, + "grad_norm": 1.2740094459123834, + "learning_rate": 5.458474460707468e-06, + "loss": 0.7893, + "step": 38228 + }, + { + "epoch": 0.6605785181089301, + "grad_norm": 1.3826086737679808, + "learning_rate": 5.4579758626558136e-06, + "loss": 0.3992, + "step": 38229 + }, + { + "epoch": 0.660595797622339, + "grad_norm": 1.047364890011499, + "learning_rate": 5.457477278830124e-06, + "loss": 0.441, + "step": 38230 + }, + { + "epoch": 0.6606130771357479, + "grad_norm": 0.9112284671854576, + "learning_rate": 5.456978709231966e-06, + "loss": 0.347, + "step": 38231 + }, + { + "epoch": 0.6606303566491568, + "grad_norm": 1.3207113842552556, + "learning_rate": 5.456480153862891e-06, + "loss": 0.2924, + "step": 38232 + }, + { + "epoch": 0.6606476361625656, + "grad_norm": 1.0187342463932179, + "learning_rate": 5.455981612724468e-06, + "loss": 0.4438, + "step": 38233 + }, + { + "epoch": 0.6606649156759745, + "grad_norm": 1.180190105808051, + "learning_rate": 5.455483085818254e-06, + "loss": 0.4006, + "step": 38234 + }, + { + "epoch": 0.6606821951893834, + "grad_norm": 0.7057080013135089, + "learning_rate": 5.454984573145812e-06, + "loss": 0.3195, + "step": 38235 + }, + { + "epoch": 0.6606994747027923, + "grad_norm": 1.2454244114677173, + "learning_rate": 5.454486074708707e-06, + "loss": 0.3228, + "step": 38236 + }, + { + "epoch": 0.6607167542162012, + "grad_norm": 0.9802245999359709, + "learning_rate": 5.453987590508494e-06, + "loss": 0.515, + "step": 38237 + }, + { + "epoch": 0.6607340337296101, + "grad_norm": 1.1036929743599897, + "learning_rate": 5.45348912054674e-06, + "loss": 0.278, + "step": 38238 + }, + { + "epoch": 0.6607513132430191, + "grad_norm": 1.5536123730104328, + "learning_rate": 5.452990664825e-06, + "loss": 0.4872, + "step": 38239 + }, + { + "epoch": 0.660768592756428, + "grad_norm": 1.2701253750219235, + "learning_rate": 5.452492223344838e-06, + "loss": 0.3585, + "step": 38240 + }, + { + "epoch": 0.6607858722698369, + "grad_norm": 1.7860190516380219, + "learning_rate": 5.451993796107818e-06, + "loss": 0.5657, + "step": 38241 + }, + { + "epoch": 0.6608031517832458, + "grad_norm": 1.5134226980809677, + "learning_rate": 5.451495383115502e-06, + "loss": 0.456, + "step": 38242 + }, + { + "epoch": 0.6608204312966547, + "grad_norm": 1.1680530870150465, + "learning_rate": 5.450996984369445e-06, + "loss": 0.4361, + "step": 38243 + }, + { + "epoch": 0.6608377108100636, + "grad_norm": 1.011684806926902, + "learning_rate": 5.450498599871214e-06, + "loss": 0.375, + "step": 38244 + }, + { + "epoch": 0.6608549903234725, + "grad_norm": 1.0364587428236158, + "learning_rate": 5.450000229622363e-06, + "loss": 0.2904, + "step": 38245 + }, + { + "epoch": 0.6608722698368814, + "grad_norm": 0.625693015033654, + "learning_rate": 5.4495018736244585e-06, + "loss": 0.6568, + "step": 38246 + }, + { + "epoch": 0.6608895493502903, + "grad_norm": 1.3399427037603397, + "learning_rate": 5.449003531879063e-06, + "loss": 0.5681, + "step": 38247 + }, + { + "epoch": 0.6609068288636992, + "grad_norm": 1.3664650467223955, + "learning_rate": 5.4485052043877304e-06, + "loss": 0.3979, + "step": 38248 + }, + { + "epoch": 0.6609241083771081, + "grad_norm": 1.4027904687928168, + "learning_rate": 5.448006891152029e-06, + "loss": 0.1942, + "step": 38249 + }, + { + "epoch": 0.660941387890517, + "grad_norm": 0.9926273346680831, + "learning_rate": 5.447508592173513e-06, + "loss": 0.6553, + "step": 38250 + }, + { + "epoch": 0.660958667403926, + "grad_norm": 1.1677023278927394, + "learning_rate": 5.447010307453746e-06, + "loss": 0.3223, + "step": 38251 + }, + { + "epoch": 0.6609759469173349, + "grad_norm": 1.2337775278519143, + "learning_rate": 5.446512036994287e-06, + "loss": 0.3129, + "step": 38252 + }, + { + "epoch": 0.6609932264307438, + "grad_norm": 1.1332621509892613, + "learning_rate": 5.4460137807967036e-06, + "loss": 0.332, + "step": 38253 + }, + { + "epoch": 0.6610105059441526, + "grad_norm": 1.7315215524966567, + "learning_rate": 5.445515538862547e-06, + "loss": 0.4865, + "step": 38254 + }, + { + "epoch": 0.6610277854575615, + "grad_norm": 1.3028927485090287, + "learning_rate": 5.4450173111933854e-06, + "loss": 0.3989, + "step": 38255 + }, + { + "epoch": 0.6610450649709704, + "grad_norm": 1.069107007840938, + "learning_rate": 5.444519097790777e-06, + "loss": 0.2136, + "step": 38256 + }, + { + "epoch": 0.6610623444843793, + "grad_norm": 1.6234025864038952, + "learning_rate": 5.444020898656273e-06, + "loss": 0.311, + "step": 38257 + }, + { + "epoch": 0.6610796239977882, + "grad_norm": 1.4000228155494026, + "learning_rate": 5.443522713791449e-06, + "loss": 0.2843, + "step": 38258 + }, + { + "epoch": 0.6610969035111971, + "grad_norm": 1.6642870978727242, + "learning_rate": 5.443024543197853e-06, + "loss": 0.3452, + "step": 38259 + }, + { + "epoch": 0.661114183024606, + "grad_norm": 0.7728701182248159, + "learning_rate": 5.442526386877056e-06, + "loss": 0.2682, + "step": 38260 + }, + { + "epoch": 0.6611314625380149, + "grad_norm": 0.4928615283992927, + "learning_rate": 5.442028244830608e-06, + "loss": 0.539, + "step": 38261 + }, + { + "epoch": 0.6611487420514238, + "grad_norm": 1.0487449052679645, + "learning_rate": 5.4415301170600786e-06, + "loss": 0.441, + "step": 38262 + }, + { + "epoch": 0.6611660215648327, + "grad_norm": 1.6883656544836192, + "learning_rate": 5.441032003567015e-06, + "loss": 0.4727, + "step": 38263 + }, + { + "epoch": 0.6611833010782416, + "grad_norm": 1.2680812095946965, + "learning_rate": 5.440533904352993e-06, + "loss": 0.3457, + "step": 38264 + }, + { + "epoch": 0.6612005805916505, + "grad_norm": 0.8965710653799923, + "learning_rate": 5.440035819419567e-06, + "loss": 0.3239, + "step": 38265 + }, + { + "epoch": 0.6612178601050595, + "grad_norm": 0.940914370085919, + "learning_rate": 5.43953774876829e-06, + "loss": 0.2706, + "step": 38266 + }, + { + "epoch": 0.6612351396184684, + "grad_norm": 1.4023501091795663, + "learning_rate": 5.439039692400732e-06, + "loss": 0.3791, + "step": 38267 + }, + { + "epoch": 0.6612524191318773, + "grad_norm": 0.9553613597581522, + "learning_rate": 5.438541650318443e-06, + "loss": 0.535, + "step": 38268 + }, + { + "epoch": 0.6612696986452862, + "grad_norm": 0.91986119076337, + "learning_rate": 5.438043622522991e-06, + "loss": 0.3598, + "step": 38269 + }, + { + "epoch": 0.6612869781586951, + "grad_norm": 0.5059512124114308, + "learning_rate": 5.437545609015931e-06, + "loss": 0.6559, + "step": 38270 + }, + { + "epoch": 0.661304257672104, + "grad_norm": 1.2610661284866207, + "learning_rate": 5.4370476097988275e-06, + "loss": 0.3076, + "step": 38271 + }, + { + "epoch": 0.6613215371855129, + "grad_norm": 1.547702194931826, + "learning_rate": 5.436549624873235e-06, + "loss": 0.4383, + "step": 38272 + }, + { + "epoch": 0.6613388166989218, + "grad_norm": 0.7565551052817097, + "learning_rate": 5.436051654240721e-06, + "loss": 0.2526, + "step": 38273 + }, + { + "epoch": 0.6613560962123307, + "grad_norm": 1.3925827827890893, + "learning_rate": 5.435553697902836e-06, + "loss": 0.4114, + "step": 38274 + }, + { + "epoch": 0.6613733757257395, + "grad_norm": 0.9774369264567344, + "learning_rate": 5.435055755861142e-06, + "loss": 0.6013, + "step": 38275 + }, + { + "epoch": 0.6613906552391484, + "grad_norm": 0.9445831392845822, + "learning_rate": 5.434557828117205e-06, + "loss": 0.3391, + "step": 38276 + }, + { + "epoch": 0.6614079347525573, + "grad_norm": 0.9816006028212554, + "learning_rate": 5.434059914672576e-06, + "loss": 0.1985, + "step": 38277 + }, + { + "epoch": 0.6614252142659662, + "grad_norm": 1.365010972284133, + "learning_rate": 5.433562015528823e-06, + "loss": 0.4003, + "step": 38278 + }, + { + "epoch": 0.6614424937793751, + "grad_norm": 1.04592411253654, + "learning_rate": 5.433064130687496e-06, + "loss": 0.298, + "step": 38279 + }, + { + "epoch": 0.661459773292784, + "grad_norm": 1.0274328346654642, + "learning_rate": 5.43256626015016e-06, + "loss": 0.4627, + "step": 38280 + }, + { + "epoch": 0.661477052806193, + "grad_norm": 1.4067736704032991, + "learning_rate": 5.4320684039183725e-06, + "loss": 0.3538, + "step": 38281 + }, + { + "epoch": 0.6614943323196019, + "grad_norm": 1.5203013262766603, + "learning_rate": 5.431570561993698e-06, + "loss": 0.5209, + "step": 38282 + }, + { + "epoch": 0.6615116118330108, + "grad_norm": 1.2025262308132532, + "learning_rate": 5.431072734377688e-06, + "loss": 0.3833, + "step": 38283 + }, + { + "epoch": 0.6615288913464197, + "grad_norm": 0.8852387864962068, + "learning_rate": 5.43057492107191e-06, + "loss": 0.3444, + "step": 38284 + }, + { + "epoch": 0.6615461708598286, + "grad_norm": 0.8798632693476504, + "learning_rate": 5.430077122077915e-06, + "loss": 0.2757, + "step": 38285 + }, + { + "epoch": 0.6615634503732375, + "grad_norm": 1.2012759932174533, + "learning_rate": 5.429579337397264e-06, + "loss": 0.2404, + "step": 38286 + }, + { + "epoch": 0.6615807298866464, + "grad_norm": 1.1413057702960432, + "learning_rate": 5.429081567031522e-06, + "loss": 0.3582, + "step": 38287 + }, + { + "epoch": 0.6615980094000553, + "grad_norm": 1.1303686708878193, + "learning_rate": 5.428583810982242e-06, + "loss": 0.2156, + "step": 38288 + }, + { + "epoch": 0.6616152889134642, + "grad_norm": 1.1435968535681005, + "learning_rate": 5.428086069250986e-06, + "loss": 0.4129, + "step": 38289 + }, + { + "epoch": 0.6616325684268731, + "grad_norm": 1.7771359048652355, + "learning_rate": 5.427588341839311e-06, + "loss": 0.3967, + "step": 38290 + }, + { + "epoch": 0.661649847940282, + "grad_norm": 0.8420596410637247, + "learning_rate": 5.427090628748775e-06, + "loss": 0.3692, + "step": 38291 + }, + { + "epoch": 0.6616671274536909, + "grad_norm": 1.2151465862896822, + "learning_rate": 5.426592929980939e-06, + "loss": 0.3508, + "step": 38292 + }, + { + "epoch": 0.6616844069670998, + "grad_norm": 1.2285796935342608, + "learning_rate": 5.426095245537365e-06, + "loss": 0.4665, + "step": 38293 + }, + { + "epoch": 0.6617016864805088, + "grad_norm": 1.3384616057954908, + "learning_rate": 5.4255975754196044e-06, + "loss": 0.334, + "step": 38294 + }, + { + "epoch": 0.6617189659939177, + "grad_norm": 0.7982361447393794, + "learning_rate": 5.4250999196292245e-06, + "loss": 0.298, + "step": 38295 + }, + { + "epoch": 0.6617362455073265, + "grad_norm": 0.95590292881576, + "learning_rate": 5.424602278167779e-06, + "loss": 0.4334, + "step": 38296 + }, + { + "epoch": 0.6617535250207354, + "grad_norm": 1.1763686640213638, + "learning_rate": 5.424104651036819e-06, + "loss": 0.3975, + "step": 38297 + }, + { + "epoch": 0.6617708045341443, + "grad_norm": 1.281780798423567, + "learning_rate": 5.423607038237919e-06, + "loss": 0.2614, + "step": 38298 + }, + { + "epoch": 0.6617880840475532, + "grad_norm": 1.498682886045648, + "learning_rate": 5.423109439772624e-06, + "loss": 0.3743, + "step": 38299 + }, + { + "epoch": 0.6618053635609621, + "grad_norm": 0.8067300922124275, + "learning_rate": 5.422611855642505e-06, + "loss": 0.6966, + "step": 38300 + }, + { + "epoch": 0.661822643074371, + "grad_norm": 1.4823844992977824, + "learning_rate": 5.4221142858491075e-06, + "loss": 0.412, + "step": 38301 + }, + { + "epoch": 0.6618399225877799, + "grad_norm": 1.3564151962911013, + "learning_rate": 5.421616730394e-06, + "loss": 0.5942, + "step": 38302 + }, + { + "epoch": 0.6618572021011888, + "grad_norm": 1.4944896160583578, + "learning_rate": 5.4211191892787294e-06, + "loss": 0.4177, + "step": 38303 + }, + { + "epoch": 0.6618744816145977, + "grad_norm": 1.3157583707799403, + "learning_rate": 5.42062166250487e-06, + "loss": 0.4804, + "step": 38304 + }, + { + "epoch": 0.6618917611280066, + "grad_norm": 0.8816518293659666, + "learning_rate": 5.420124150073967e-06, + "loss": 0.3709, + "step": 38305 + }, + { + "epoch": 0.6619090406414155, + "grad_norm": 1.0645938508425008, + "learning_rate": 5.419626651987587e-06, + "loss": 0.5958, + "step": 38306 + }, + { + "epoch": 0.6619263201548244, + "grad_norm": 1.305585577359736, + "learning_rate": 5.419129168247285e-06, + "loss": 0.5019, + "step": 38307 + }, + { + "epoch": 0.6619435996682334, + "grad_norm": 1.0380585857818447, + "learning_rate": 5.4186316988546136e-06, + "loss": 0.3858, + "step": 38308 + }, + { + "epoch": 0.6619608791816423, + "grad_norm": 1.5404826215202998, + "learning_rate": 5.418134243811137e-06, + "loss": 0.6179, + "step": 38309 + }, + { + "epoch": 0.6619781586950512, + "grad_norm": 1.4433588933842876, + "learning_rate": 5.417636803118411e-06, + "loss": 0.4431, + "step": 38310 + }, + { + "epoch": 0.6619954382084601, + "grad_norm": 0.7462994563215612, + "learning_rate": 5.417139376778e-06, + "loss": 0.4559, + "step": 38311 + }, + { + "epoch": 0.662012717721869, + "grad_norm": 0.5275394612685681, + "learning_rate": 5.4166419647914525e-06, + "loss": 0.5261, + "step": 38312 + }, + { + "epoch": 0.6620299972352779, + "grad_norm": 1.2818118300692967, + "learning_rate": 5.416144567160334e-06, + "loss": 0.4248, + "step": 38313 + }, + { + "epoch": 0.6620472767486868, + "grad_norm": 1.8188598181401632, + "learning_rate": 5.4156471838861955e-06, + "loss": 0.4372, + "step": 38314 + }, + { + "epoch": 0.6620645562620957, + "grad_norm": 1.2686623309406664, + "learning_rate": 5.415149814970599e-06, + "loss": 0.7239, + "step": 38315 + }, + { + "epoch": 0.6620818357755046, + "grad_norm": 1.7541880624997852, + "learning_rate": 5.414652460415104e-06, + "loss": 0.4724, + "step": 38316 + }, + { + "epoch": 0.6620991152889134, + "grad_norm": 1.2831522983548735, + "learning_rate": 5.414155120221264e-06, + "loss": 0.3679, + "step": 38317 + }, + { + "epoch": 0.6621163948023223, + "grad_norm": 1.4034249301413126, + "learning_rate": 5.413657794390641e-06, + "loss": 0.3754, + "step": 38318 + }, + { + "epoch": 0.6621336743157312, + "grad_norm": 1.230849983504732, + "learning_rate": 5.413160482924788e-06, + "loss": 0.4129, + "step": 38319 + }, + { + "epoch": 0.6621509538291401, + "grad_norm": 1.2809265077022922, + "learning_rate": 5.412663185825265e-06, + "loss": 0.3462, + "step": 38320 + }, + { + "epoch": 0.662168233342549, + "grad_norm": 0.9749777356852348, + "learning_rate": 5.412165903093627e-06, + "loss": 0.4208, + "step": 38321 + }, + { + "epoch": 0.662185512855958, + "grad_norm": 1.5945175730284258, + "learning_rate": 5.41166863473144e-06, + "loss": 0.4694, + "step": 38322 + }, + { + "epoch": 0.6622027923693669, + "grad_norm": 1.0299756311253159, + "learning_rate": 5.411171380740251e-06, + "loss": 0.2925, + "step": 38323 + }, + { + "epoch": 0.6622200718827758, + "grad_norm": 0.9289902446694209, + "learning_rate": 5.410674141121625e-06, + "loss": 0.3804, + "step": 38324 + }, + { + "epoch": 0.6622373513961847, + "grad_norm": 1.1869364491642447, + "learning_rate": 5.410176915877114e-06, + "loss": 0.379, + "step": 38325 + }, + { + "epoch": 0.6622546309095936, + "grad_norm": 1.6053063397779672, + "learning_rate": 5.409679705008277e-06, + "loss": 0.3353, + "step": 38326 + }, + { + "epoch": 0.6622719104230025, + "grad_norm": 2.0088273965752164, + "learning_rate": 5.409182508516677e-06, + "loss": 0.5458, + "step": 38327 + }, + { + "epoch": 0.6622891899364114, + "grad_norm": 1.7530197968407626, + "learning_rate": 5.408685326403862e-06, + "loss": 0.2609, + "step": 38328 + }, + { + "epoch": 0.6623064694498203, + "grad_norm": 0.8600454322312443, + "learning_rate": 5.408188158671397e-06, + "loss": 0.2881, + "step": 38329 + }, + { + "epoch": 0.6623237489632292, + "grad_norm": 1.3896231399357553, + "learning_rate": 5.407691005320831e-06, + "loss": 0.3022, + "step": 38330 + }, + { + "epoch": 0.6623410284766381, + "grad_norm": 1.5192671258627317, + "learning_rate": 5.407193866353726e-06, + "loss": 0.394, + "step": 38331 + }, + { + "epoch": 0.662358307990047, + "grad_norm": 1.36067288489298, + "learning_rate": 5.40669674177164e-06, + "loss": 0.5965, + "step": 38332 + }, + { + "epoch": 0.6623755875034559, + "grad_norm": 1.0348924616464454, + "learning_rate": 5.406199631576132e-06, + "loss": 0.5437, + "step": 38333 + }, + { + "epoch": 0.6623928670168648, + "grad_norm": 1.067921415757312, + "learning_rate": 5.405702535768752e-06, + "loss": 0.3825, + "step": 38334 + }, + { + "epoch": 0.6624101465302737, + "grad_norm": 1.2046263043624539, + "learning_rate": 5.405205454351065e-06, + "loss": 0.4313, + "step": 38335 + }, + { + "epoch": 0.6624274260436827, + "grad_norm": 1.0527430397460247, + "learning_rate": 5.40470838732462e-06, + "loss": 0.3489, + "step": 38336 + }, + { + "epoch": 0.6624447055570916, + "grad_norm": 1.512736368721873, + "learning_rate": 5.404211334690976e-06, + "loss": 0.418, + "step": 38337 + }, + { + "epoch": 0.6624619850705005, + "grad_norm": 1.2293394030172589, + "learning_rate": 5.403714296451696e-06, + "loss": 0.4456, + "step": 38338 + }, + { + "epoch": 0.6624792645839093, + "grad_norm": 1.2968707551154206, + "learning_rate": 5.40321727260833e-06, + "loss": 0.3922, + "step": 38339 + }, + { + "epoch": 0.6624965440973182, + "grad_norm": 1.1929791760687045, + "learning_rate": 5.402720263162438e-06, + "loss": 0.2943, + "step": 38340 + }, + { + "epoch": 0.6625138236107271, + "grad_norm": 0.7595834600795416, + "learning_rate": 5.402223268115574e-06, + "loss": 0.7339, + "step": 38341 + }, + { + "epoch": 0.662531103124136, + "grad_norm": 1.2654749391097666, + "learning_rate": 5.401726287469299e-06, + "loss": 0.4206, + "step": 38342 + }, + { + "epoch": 0.6625483826375449, + "grad_norm": 1.6521103916876816, + "learning_rate": 5.401229321225159e-06, + "loss": 0.3471, + "step": 38343 + }, + { + "epoch": 0.6625656621509538, + "grad_norm": 0.9419648135816645, + "learning_rate": 5.4007323693847256e-06, + "loss": 0.8021, + "step": 38344 + }, + { + "epoch": 0.6625829416643627, + "grad_norm": 0.9521057976831406, + "learning_rate": 5.400235431949544e-06, + "loss": 0.3591, + "step": 38345 + }, + { + "epoch": 0.6626002211777716, + "grad_norm": 1.6343349335786723, + "learning_rate": 5.399738508921179e-06, + "loss": 0.3951, + "step": 38346 + }, + { + "epoch": 0.6626175006911805, + "grad_norm": 2.0734930097114557, + "learning_rate": 5.399241600301184e-06, + "loss": 0.4257, + "step": 38347 + }, + { + "epoch": 0.6626347802045894, + "grad_norm": 1.1963077080297302, + "learning_rate": 5.398744706091108e-06, + "loss": 0.2985, + "step": 38348 + }, + { + "epoch": 0.6626520597179983, + "grad_norm": 1.3589225800937417, + "learning_rate": 5.398247826292513e-06, + "loss": 0.2799, + "step": 38349 + }, + { + "epoch": 0.6626693392314073, + "grad_norm": 1.1739382490378667, + "learning_rate": 5.3977509609069565e-06, + "loss": 0.448, + "step": 38350 + }, + { + "epoch": 0.6626866187448162, + "grad_norm": 1.108931176907331, + "learning_rate": 5.397254109935999e-06, + "loss": 0.4032, + "step": 38351 + }, + { + "epoch": 0.6627038982582251, + "grad_norm": 1.1037877900313484, + "learning_rate": 5.396757273381186e-06, + "loss": 0.4732, + "step": 38352 + }, + { + "epoch": 0.662721177771634, + "grad_norm": 1.2668862066677593, + "learning_rate": 5.396260451244082e-06, + "loss": 0.2642, + "step": 38353 + }, + { + "epoch": 0.6627384572850429, + "grad_norm": 0.9536546667253507, + "learning_rate": 5.3957636435262374e-06, + "loss": 0.3082, + "step": 38354 + }, + { + "epoch": 0.6627557367984518, + "grad_norm": 1.6807295339744526, + "learning_rate": 5.39526685022921e-06, + "loss": 0.5141, + "step": 38355 + }, + { + "epoch": 0.6627730163118607, + "grad_norm": 0.7423246226908556, + "learning_rate": 5.394770071354558e-06, + "loss": 0.302, + "step": 38356 + }, + { + "epoch": 0.6627902958252696, + "grad_norm": 1.1409203885494796, + "learning_rate": 5.39427330690384e-06, + "loss": 0.5441, + "step": 38357 + }, + { + "epoch": 0.6628075753386785, + "grad_norm": 1.5202336969286574, + "learning_rate": 5.393776556878607e-06, + "loss": 0.2202, + "step": 38358 + }, + { + "epoch": 0.6628248548520874, + "grad_norm": 1.0652457890491311, + "learning_rate": 5.393279821280413e-06, + "loss": 0.3024, + "step": 38359 + }, + { + "epoch": 0.6628421343654962, + "grad_norm": 1.3885133786258652, + "learning_rate": 5.392783100110817e-06, + "loss": 0.3794, + "step": 38360 + }, + { + "epoch": 0.6628594138789051, + "grad_norm": 0.9137158114545866, + "learning_rate": 5.392286393371373e-06, + "loss": 0.5458, + "step": 38361 + }, + { + "epoch": 0.662876693392314, + "grad_norm": 1.08099643319741, + "learning_rate": 5.391789701063643e-06, + "loss": 0.3009, + "step": 38362 + }, + { + "epoch": 0.6628939729057229, + "grad_norm": 1.1306211783960571, + "learning_rate": 5.391293023189175e-06, + "loss": 0.4791, + "step": 38363 + }, + { + "epoch": 0.6629112524191318, + "grad_norm": 0.7758261253118641, + "learning_rate": 5.3907963597495294e-06, + "loss": 0.2555, + "step": 38364 + }, + { + "epoch": 0.6629285319325408, + "grad_norm": 1.0056196322884605, + "learning_rate": 5.390299710746256e-06, + "loss": 0.3895, + "step": 38365 + }, + { + "epoch": 0.6629458114459497, + "grad_norm": 0.8998667763197489, + "learning_rate": 5.389803076180916e-06, + "loss": 0.2976, + "step": 38366 + }, + { + "epoch": 0.6629630909593586, + "grad_norm": 0.9502115533614376, + "learning_rate": 5.389306456055062e-06, + "loss": 0.3564, + "step": 38367 + }, + { + "epoch": 0.6629803704727675, + "grad_norm": 1.3412282189728295, + "learning_rate": 5.388809850370256e-06, + "loss": 0.2795, + "step": 38368 + }, + { + "epoch": 0.6629976499861764, + "grad_norm": 1.3222119270901223, + "learning_rate": 5.3883132591280465e-06, + "loss": 0.4428, + "step": 38369 + }, + { + "epoch": 0.6630149294995853, + "grad_norm": 2.023590429642548, + "learning_rate": 5.387816682329985e-06, + "loss": 0.4888, + "step": 38370 + }, + { + "epoch": 0.6630322090129942, + "grad_norm": 1.589246530556898, + "learning_rate": 5.387320119977635e-06, + "loss": 0.7566, + "step": 38371 + }, + { + "epoch": 0.6630494885264031, + "grad_norm": 1.1826064925036763, + "learning_rate": 5.386823572072547e-06, + "loss": 0.4518, + "step": 38372 + }, + { + "epoch": 0.663066768039812, + "grad_norm": 1.4127732584057169, + "learning_rate": 5.386327038616284e-06, + "loss": 0.3096, + "step": 38373 + }, + { + "epoch": 0.6630840475532209, + "grad_norm": 1.3266511140895687, + "learning_rate": 5.3858305196103885e-06, + "loss": 0.6011, + "step": 38374 + }, + { + "epoch": 0.6631013270666298, + "grad_norm": 1.125306004143677, + "learning_rate": 5.3853340150564295e-06, + "loss": 0.3112, + "step": 38375 + }, + { + "epoch": 0.6631186065800387, + "grad_norm": 1.4769320499066914, + "learning_rate": 5.384837524955949e-06, + "loss": 0.4395, + "step": 38376 + }, + { + "epoch": 0.6631358860934476, + "grad_norm": 0.965740975460551, + "learning_rate": 5.384341049310508e-06, + "loss": 0.2219, + "step": 38377 + }, + { + "epoch": 0.6631531656068566, + "grad_norm": 1.0640137451742793, + "learning_rate": 5.383844588121667e-06, + "loss": 0.4722, + "step": 38378 + }, + { + "epoch": 0.6631704451202655, + "grad_norm": 1.4810438480753376, + "learning_rate": 5.383348141390972e-06, + "loss": 0.3807, + "step": 38379 + }, + { + "epoch": 0.6631877246336744, + "grad_norm": 1.2313409052279918, + "learning_rate": 5.382851709119984e-06, + "loss": 0.3287, + "step": 38380 + }, + { + "epoch": 0.6632050041470832, + "grad_norm": 1.5200732708515894, + "learning_rate": 5.382355291310251e-06, + "loss": 0.3516, + "step": 38381 + }, + { + "epoch": 0.6632222836604921, + "grad_norm": 1.3133043105356297, + "learning_rate": 5.381858887963338e-06, + "loss": 0.4065, + "step": 38382 + }, + { + "epoch": 0.663239563173901, + "grad_norm": 1.620737622273056, + "learning_rate": 5.381362499080784e-06, + "loss": 0.316, + "step": 38383 + }, + { + "epoch": 0.6632568426873099, + "grad_norm": 1.4147733324297718, + "learning_rate": 5.3808661246641636e-06, + "loss": 0.5886, + "step": 38384 + }, + { + "epoch": 0.6632741222007188, + "grad_norm": 1.1737188130400498, + "learning_rate": 5.380369764715016e-06, + "loss": 0.6173, + "step": 38385 + }, + { + "epoch": 0.6632914017141277, + "grad_norm": 1.7080945983426314, + "learning_rate": 5.379873419234907e-06, + "loss": 0.2005, + "step": 38386 + }, + { + "epoch": 0.6633086812275366, + "grad_norm": 1.6026518566035837, + "learning_rate": 5.379377088225379e-06, + "loss": 0.3771, + "step": 38387 + }, + { + "epoch": 0.6633259607409455, + "grad_norm": 1.455491678753232, + "learning_rate": 5.378880771688e-06, + "loss": 0.4713, + "step": 38388 + }, + { + "epoch": 0.6633432402543544, + "grad_norm": 1.1353233484466392, + "learning_rate": 5.378384469624312e-06, + "loss": 0.2177, + "step": 38389 + }, + { + "epoch": 0.6633605197677633, + "grad_norm": 1.3513981540782405, + "learning_rate": 5.3778881820358756e-06, + "loss": 0.6763, + "step": 38390 + }, + { + "epoch": 0.6633777992811722, + "grad_norm": 1.754758939713548, + "learning_rate": 5.377391908924248e-06, + "loss": 0.3427, + "step": 38391 + }, + { + "epoch": 0.6633950787945812, + "grad_norm": 1.1090243177823444, + "learning_rate": 5.376895650290976e-06, + "loss": 0.5498, + "step": 38392 + }, + { + "epoch": 0.6634123583079901, + "grad_norm": 0.9579566537031542, + "learning_rate": 5.376399406137622e-06, + "loss": 0.2974, + "step": 38393 + }, + { + "epoch": 0.663429637821399, + "grad_norm": 1.2631854720379347, + "learning_rate": 5.375903176465734e-06, + "loss": 0.3887, + "step": 38394 + }, + { + "epoch": 0.6634469173348079, + "grad_norm": 1.1864766732917378, + "learning_rate": 5.3754069612768675e-06, + "loss": 0.1544, + "step": 38395 + }, + { + "epoch": 0.6634641968482168, + "grad_norm": 1.380126112840061, + "learning_rate": 5.374910760572578e-06, + "loss": 0.3111, + "step": 38396 + }, + { + "epoch": 0.6634814763616257, + "grad_norm": 1.5525742342532494, + "learning_rate": 5.374414574354424e-06, + "loss": 0.3228, + "step": 38397 + }, + { + "epoch": 0.6634987558750346, + "grad_norm": 1.1475325352385186, + "learning_rate": 5.373918402623951e-06, + "loss": 0.3457, + "step": 38398 + }, + { + "epoch": 0.6635160353884435, + "grad_norm": 1.9130435357120803, + "learning_rate": 5.37342224538272e-06, + "loss": 0.3667, + "step": 38399 + }, + { + "epoch": 0.6635333149018524, + "grad_norm": 3.2329296244193038, + "learning_rate": 5.3729261026322795e-06, + "loss": 0.3147, + "step": 38400 + }, + { + "epoch": 0.6635505944152613, + "grad_norm": 1.0228063575942559, + "learning_rate": 5.372429974374186e-06, + "loss": 0.3548, + "step": 38401 + }, + { + "epoch": 0.6635678739286701, + "grad_norm": 1.5109021518494465, + "learning_rate": 5.3719338606099965e-06, + "loss": 0.3211, + "step": 38402 + }, + { + "epoch": 0.663585153442079, + "grad_norm": 1.5077436027861157, + "learning_rate": 5.371437761341259e-06, + "loss": 0.5506, + "step": 38403 + }, + { + "epoch": 0.6636024329554879, + "grad_norm": 1.0239242906191148, + "learning_rate": 5.3709416765695345e-06, + "loss": 0.3673, + "step": 38404 + }, + { + "epoch": 0.6636197124688968, + "grad_norm": 1.3712259848636983, + "learning_rate": 5.3704456062963674e-06, + "loss": 0.3873, + "step": 38405 + }, + { + "epoch": 0.6636369919823057, + "grad_norm": 1.5311844131598535, + "learning_rate": 5.369949550523317e-06, + "loss": 0.4221, + "step": 38406 + }, + { + "epoch": 0.6636542714957147, + "grad_norm": 1.1724617984622103, + "learning_rate": 5.369453509251936e-06, + "loss": 0.3983, + "step": 38407 + }, + { + "epoch": 0.6636715510091236, + "grad_norm": 0.8528582767993401, + "learning_rate": 5.368957482483783e-06, + "loss": 0.3109, + "step": 38408 + }, + { + "epoch": 0.6636888305225325, + "grad_norm": 1.0113326760011199, + "learning_rate": 5.368461470220406e-06, + "loss": 0.5238, + "step": 38409 + }, + { + "epoch": 0.6637061100359414, + "grad_norm": 1.0499320615326506, + "learning_rate": 5.367965472463357e-06, + "loss": 0.2206, + "step": 38410 + }, + { + "epoch": 0.6637233895493503, + "grad_norm": 1.2848327064745901, + "learning_rate": 5.367469489214192e-06, + "loss": 0.4211, + "step": 38411 + }, + { + "epoch": 0.6637406690627592, + "grad_norm": 0.9112267876328002, + "learning_rate": 5.366973520474463e-06, + "loss": 0.3863, + "step": 38412 + }, + { + "epoch": 0.6637579485761681, + "grad_norm": 1.2793082172053714, + "learning_rate": 5.366477566245731e-06, + "loss": 0.3813, + "step": 38413 + }, + { + "epoch": 0.663775228089577, + "grad_norm": 1.5732382069989455, + "learning_rate": 5.36598162652954e-06, + "loss": 0.4266, + "step": 38414 + }, + { + "epoch": 0.6637925076029859, + "grad_norm": 0.929750184772329, + "learning_rate": 5.365485701327449e-06, + "loss": 0.4345, + "step": 38415 + }, + { + "epoch": 0.6638097871163948, + "grad_norm": 0.8207979624040739, + "learning_rate": 5.364989790641004e-06, + "loss": 0.3726, + "step": 38416 + }, + { + "epoch": 0.6638270666298037, + "grad_norm": 0.6000143978933505, + "learning_rate": 5.364493894471765e-06, + "loss": 0.3281, + "step": 38417 + }, + { + "epoch": 0.6638443461432126, + "grad_norm": 0.4696361283374742, + "learning_rate": 5.363998012821283e-06, + "loss": 0.7351, + "step": 38418 + }, + { + "epoch": 0.6638616256566215, + "grad_norm": 0.47287164607004917, + "learning_rate": 5.363502145691116e-06, + "loss": 0.7105, + "step": 38419 + }, + { + "epoch": 0.6638789051700305, + "grad_norm": 1.2337085704004842, + "learning_rate": 5.363006293082813e-06, + "loss": 0.4336, + "step": 38420 + }, + { + "epoch": 0.6638961846834394, + "grad_norm": 1.1316570975394242, + "learning_rate": 5.3625104549979225e-06, + "loss": 0.3998, + "step": 38421 + }, + { + "epoch": 0.6639134641968483, + "grad_norm": 1.329525936013792, + "learning_rate": 5.362014631438004e-06, + "loss": 0.3754, + "step": 38422 + }, + { + "epoch": 0.6639307437102571, + "grad_norm": 1.4894383551835189, + "learning_rate": 5.361518822404601e-06, + "loss": 0.4949, + "step": 38423 + }, + { + "epoch": 0.663948023223666, + "grad_norm": 1.1873573597601743, + "learning_rate": 5.361023027899283e-06, + "loss": 0.3479, + "step": 38424 + }, + { + "epoch": 0.6639653027370749, + "grad_norm": 1.0685171281919412, + "learning_rate": 5.360527247923588e-06, + "loss": 0.319, + "step": 38425 + }, + { + "epoch": 0.6639825822504838, + "grad_norm": 1.8295561793736346, + "learning_rate": 5.360031482479078e-06, + "loss": 0.3994, + "step": 38426 + }, + { + "epoch": 0.6639998617638927, + "grad_norm": 0.8214360977610767, + "learning_rate": 5.359535731567298e-06, + "loss": 0.3437, + "step": 38427 + }, + { + "epoch": 0.6640171412773016, + "grad_norm": 0.9411467244258991, + "learning_rate": 5.35903999518981e-06, + "loss": 0.3474, + "step": 38428 + }, + { + "epoch": 0.6640344207907105, + "grad_norm": 1.2503582951740004, + "learning_rate": 5.358544273348153e-06, + "loss": 0.2982, + "step": 38429 + }, + { + "epoch": 0.6640517003041194, + "grad_norm": 1.6059377025521682, + "learning_rate": 5.358048566043896e-06, + "loss": 0.266, + "step": 38430 + }, + { + "epoch": 0.6640689798175283, + "grad_norm": 1.0551061784148728, + "learning_rate": 5.357552873278583e-06, + "loss": 0.4861, + "step": 38431 + }, + { + "epoch": 0.6640862593309372, + "grad_norm": 1.1836789248451784, + "learning_rate": 5.3570571950537635e-06, + "loss": 0.4447, + "step": 38432 + }, + { + "epoch": 0.6641035388443461, + "grad_norm": 0.7678924345340715, + "learning_rate": 5.356561531370999e-06, + "loss": 0.4786, + "step": 38433 + }, + { + "epoch": 0.664120818357755, + "grad_norm": 1.2075804721479118, + "learning_rate": 5.356065882231831e-06, + "loss": 0.2598, + "step": 38434 + }, + { + "epoch": 0.664138097871164, + "grad_norm": 1.2061166275292452, + "learning_rate": 5.355570247637818e-06, + "loss": 0.4065, + "step": 38435 + }, + { + "epoch": 0.6641553773845729, + "grad_norm": 0.5455234786395009, + "learning_rate": 5.355074627590513e-06, + "loss": 0.6092, + "step": 38436 + }, + { + "epoch": 0.6641726568979818, + "grad_norm": 0.4851064439397856, + "learning_rate": 5.354579022091469e-06, + "loss": 0.495, + "step": 38437 + }, + { + "epoch": 0.6641899364113907, + "grad_norm": 1.1221001006532176, + "learning_rate": 5.3540834311422345e-06, + "loss": 0.2463, + "step": 38438 + }, + { + "epoch": 0.6642072159247996, + "grad_norm": 0.8085625844770854, + "learning_rate": 5.353587854744367e-06, + "loss": 0.6938, + "step": 38439 + }, + { + "epoch": 0.6642244954382085, + "grad_norm": 1.5827849337131208, + "learning_rate": 5.353092292899411e-06, + "loss": 0.3754, + "step": 38440 + }, + { + "epoch": 0.6642417749516174, + "grad_norm": 0.8847192687005075, + "learning_rate": 5.352596745608924e-06, + "loss": 0.4126, + "step": 38441 + }, + { + "epoch": 0.6642590544650263, + "grad_norm": 1.3490142590910947, + "learning_rate": 5.352101212874461e-06, + "loss": 0.3948, + "step": 38442 + }, + { + "epoch": 0.6642763339784352, + "grad_norm": 1.0344530924165745, + "learning_rate": 5.351605694697567e-06, + "loss": 0.3731, + "step": 38443 + }, + { + "epoch": 0.664293613491844, + "grad_norm": 1.0595562142449642, + "learning_rate": 5.3511101910798e-06, + "loss": 0.3685, + "step": 38444 + }, + { + "epoch": 0.6643108930052529, + "grad_norm": 0.6612347750380224, + "learning_rate": 5.350614702022705e-06, + "loss": 0.5842, + "step": 38445 + }, + { + "epoch": 0.6643281725186618, + "grad_norm": 1.4647672503500244, + "learning_rate": 5.350119227527839e-06, + "loss": 0.3456, + "step": 38446 + }, + { + "epoch": 0.6643454520320707, + "grad_norm": 1.7859076048419507, + "learning_rate": 5.349623767596752e-06, + "loss": 0.3012, + "step": 38447 + }, + { + "epoch": 0.6643627315454796, + "grad_norm": 1.3443326725685028, + "learning_rate": 5.349128322231003e-06, + "loss": 0.3941, + "step": 38448 + }, + { + "epoch": 0.6643800110588886, + "grad_norm": 1.058111900975175, + "learning_rate": 5.348632891432131e-06, + "loss": 0.2399, + "step": 38449 + }, + { + "epoch": 0.6643972905722975, + "grad_norm": 1.488696636228264, + "learning_rate": 5.3481374752017e-06, + "loss": 0.4725, + "step": 38450 + }, + { + "epoch": 0.6644145700857064, + "grad_norm": 0.9356987397744263, + "learning_rate": 5.3476420735412506e-06, + "loss": 0.3121, + "step": 38451 + }, + { + "epoch": 0.6644318495991153, + "grad_norm": 1.1007800949598008, + "learning_rate": 5.34714668645234e-06, + "loss": 0.3118, + "step": 38452 + }, + { + "epoch": 0.6644491291125242, + "grad_norm": 1.1264508262253146, + "learning_rate": 5.3466513139365235e-06, + "loss": 0.2723, + "step": 38453 + }, + { + "epoch": 0.6644664086259331, + "grad_norm": 1.2543791822268284, + "learning_rate": 5.346155955995347e-06, + "loss": 0.2878, + "step": 38454 + }, + { + "epoch": 0.664483688139342, + "grad_norm": 1.293101802563403, + "learning_rate": 5.3456606126303655e-06, + "loss": 0.337, + "step": 38455 + }, + { + "epoch": 0.6645009676527509, + "grad_norm": 0.9379192717358753, + "learning_rate": 5.345165283843124e-06, + "loss": 0.4839, + "step": 38456 + }, + { + "epoch": 0.6645182471661598, + "grad_norm": 0.4689860791670401, + "learning_rate": 5.34466996963518e-06, + "loss": 0.7889, + "step": 38457 + }, + { + "epoch": 0.6645355266795687, + "grad_norm": 1.3574847008393538, + "learning_rate": 5.3441746700080835e-06, + "loss": 0.3037, + "step": 38458 + }, + { + "epoch": 0.6645528061929776, + "grad_norm": 0.9914388119276898, + "learning_rate": 5.34367938496339e-06, + "loss": 0.756, + "step": 38459 + }, + { + "epoch": 0.6645700857063865, + "grad_norm": 1.0517238108400482, + "learning_rate": 5.343184114502642e-06, + "loss": 0.1807, + "step": 38460 + }, + { + "epoch": 0.6645873652197954, + "grad_norm": 1.4177270002818765, + "learning_rate": 5.342688858627399e-06, + "loss": 0.3928, + "step": 38461 + }, + { + "epoch": 0.6646046447332044, + "grad_norm": 0.7941338355889468, + "learning_rate": 5.342193617339208e-06, + "loss": 0.3487, + "step": 38462 + }, + { + "epoch": 0.6646219242466133, + "grad_norm": 1.1892768125676323, + "learning_rate": 5.3416983906396135e-06, + "loss": 0.4354, + "step": 38463 + }, + { + "epoch": 0.6646392037600222, + "grad_norm": 1.2895510683193543, + "learning_rate": 5.341203178530181e-06, + "loss": 0.4719, + "step": 38464 + }, + { + "epoch": 0.664656483273431, + "grad_norm": 0.7743298066403169, + "learning_rate": 5.340707981012451e-06, + "loss": 0.9403, + "step": 38465 + }, + { + "epoch": 0.6646737627868399, + "grad_norm": 1.3265471015880168, + "learning_rate": 5.340212798087981e-06, + "loss": 0.2813, + "step": 38466 + }, + { + "epoch": 0.6646910423002488, + "grad_norm": 1.7005656942743501, + "learning_rate": 5.339717629758314e-06, + "loss": 0.3225, + "step": 38467 + }, + { + "epoch": 0.6647083218136577, + "grad_norm": 1.18924252723086, + "learning_rate": 5.3392224760250115e-06, + "loss": 0.348, + "step": 38468 + }, + { + "epoch": 0.6647256013270666, + "grad_norm": 1.2537176030842547, + "learning_rate": 5.3387273368896085e-06, + "loss": 0.3401, + "step": 38469 + }, + { + "epoch": 0.6647428808404755, + "grad_norm": 0.7789616492309035, + "learning_rate": 5.338232212353676e-06, + "loss": 0.8064, + "step": 38470 + }, + { + "epoch": 0.6647601603538844, + "grad_norm": 1.535885516652335, + "learning_rate": 5.337737102418752e-06, + "loss": 0.4893, + "step": 38471 + }, + { + "epoch": 0.6647774398672933, + "grad_norm": 0.7373856870282058, + "learning_rate": 5.337242007086387e-06, + "loss": 0.3483, + "step": 38472 + }, + { + "epoch": 0.6647947193807022, + "grad_norm": 1.5713521993940998, + "learning_rate": 5.3367469263581385e-06, + "loss": 0.2894, + "step": 38473 + }, + { + "epoch": 0.6648119988941111, + "grad_norm": 1.0805805521762446, + "learning_rate": 5.336251860235548e-06, + "loss": 0.5465, + "step": 38474 + }, + { + "epoch": 0.66482927840752, + "grad_norm": 1.3148924147984715, + "learning_rate": 5.335756808720172e-06, + "loss": 0.2925, + "step": 38475 + }, + { + "epoch": 0.664846557920929, + "grad_norm": 1.0845334844656136, + "learning_rate": 5.33526177181356e-06, + "loss": 0.4777, + "step": 38476 + }, + { + "epoch": 0.6648638374343379, + "grad_norm": 1.6388795537517453, + "learning_rate": 5.334766749517266e-06, + "loss": 0.5101, + "step": 38477 + }, + { + "epoch": 0.6648811169477468, + "grad_norm": 1.0268070206646258, + "learning_rate": 5.334271741832835e-06, + "loss": 0.2303, + "step": 38478 + }, + { + "epoch": 0.6648983964611557, + "grad_norm": 0.879349850480305, + "learning_rate": 5.333776748761821e-06, + "loss": 0.4001, + "step": 38479 + }, + { + "epoch": 0.6649156759745646, + "grad_norm": 1.0462361358841834, + "learning_rate": 5.33328177030577e-06, + "loss": 0.2372, + "step": 38480 + }, + { + "epoch": 0.6649329554879735, + "grad_norm": 0.8255749815379059, + "learning_rate": 5.332786806466235e-06, + "loss": 1.0483, + "step": 38481 + }, + { + "epoch": 0.6649502350013824, + "grad_norm": 0.9358028045783804, + "learning_rate": 5.3322918572447715e-06, + "loss": 0.2761, + "step": 38482 + }, + { + "epoch": 0.6649675145147913, + "grad_norm": 1.2807513802285941, + "learning_rate": 5.33179692264292e-06, + "loss": 0.2616, + "step": 38483 + }, + { + "epoch": 0.6649847940282002, + "grad_norm": 1.1202955927479912, + "learning_rate": 5.331302002662238e-06, + "loss": 0.4073, + "step": 38484 + }, + { + "epoch": 0.6650020735416091, + "grad_norm": 0.7239436914636269, + "learning_rate": 5.33080709730427e-06, + "loss": 0.8172, + "step": 38485 + }, + { + "epoch": 0.665019353055018, + "grad_norm": 0.8729206312505385, + "learning_rate": 5.330312206570569e-06, + "loss": 0.547, + "step": 38486 + }, + { + "epoch": 0.6650366325684268, + "grad_norm": 1.4041497825731792, + "learning_rate": 5.329817330462685e-06, + "loss": 0.3908, + "step": 38487 + }, + { + "epoch": 0.6650539120818357, + "grad_norm": 0.996370100584979, + "learning_rate": 5.329322468982172e-06, + "loss": 0.4589, + "step": 38488 + }, + { + "epoch": 0.6650711915952446, + "grad_norm": 1.3645762376873558, + "learning_rate": 5.328827622130572e-06, + "loss": 0.3235, + "step": 38489 + }, + { + "epoch": 0.6650884711086535, + "grad_norm": 1.1174187862299274, + "learning_rate": 5.328332789909443e-06, + "loss": 0.8687, + "step": 38490 + }, + { + "epoch": 0.6651057506220625, + "grad_norm": 2.2351921789344904, + "learning_rate": 5.327837972320327e-06, + "loss": 0.2389, + "step": 38491 + }, + { + "epoch": 0.6651230301354714, + "grad_norm": 0.9308034195941584, + "learning_rate": 5.327343169364777e-06, + "loss": 0.7142, + "step": 38492 + }, + { + "epoch": 0.6651403096488803, + "grad_norm": 1.5330375659456752, + "learning_rate": 5.326848381044347e-06, + "loss": 0.393, + "step": 38493 + }, + { + "epoch": 0.6651575891622892, + "grad_norm": 1.2299492529592642, + "learning_rate": 5.3263536073605806e-06, + "loss": 0.447, + "step": 38494 + }, + { + "epoch": 0.6651748686756981, + "grad_norm": 0.9658587510621992, + "learning_rate": 5.3258588483150335e-06, + "loss": 0.39, + "step": 38495 + }, + { + "epoch": 0.665192148189107, + "grad_norm": 1.5997827493151344, + "learning_rate": 5.325364103909246e-06, + "loss": 0.3424, + "step": 38496 + }, + { + "epoch": 0.6652094277025159, + "grad_norm": 1.6044604320891969, + "learning_rate": 5.3248693741447746e-06, + "loss": 0.3463, + "step": 38497 + }, + { + "epoch": 0.6652267072159248, + "grad_norm": 1.2071545620100539, + "learning_rate": 5.324374659023167e-06, + "loss": 0.2523, + "step": 38498 + }, + { + "epoch": 0.6652439867293337, + "grad_norm": 1.7082475913637316, + "learning_rate": 5.3238799585459774e-06, + "loss": 0.4017, + "step": 38499 + }, + { + "epoch": 0.6652612662427426, + "grad_norm": 1.6792892342041552, + "learning_rate": 5.323385272714747e-06, + "loss": 0.3341, + "step": 38500 + }, + { + "epoch": 0.6652785457561515, + "grad_norm": 1.152612607058719, + "learning_rate": 5.322890601531034e-06, + "loss": 0.6193, + "step": 38501 + }, + { + "epoch": 0.6652958252695604, + "grad_norm": 1.1327383138860208, + "learning_rate": 5.322395944996383e-06, + "loss": 0.3187, + "step": 38502 + }, + { + "epoch": 0.6653131047829693, + "grad_norm": 1.635865624298353, + "learning_rate": 5.321901303112336e-06, + "loss": 0.3062, + "step": 38503 + }, + { + "epoch": 0.6653303842963783, + "grad_norm": 0.9779148200294002, + "learning_rate": 5.321406675880456e-06, + "loss": 0.4126, + "step": 38504 + }, + { + "epoch": 0.6653476638097872, + "grad_norm": 0.48360977339448463, + "learning_rate": 5.320912063302284e-06, + "loss": 0.5178, + "step": 38505 + }, + { + "epoch": 0.6653649433231961, + "grad_norm": 0.5465729142993658, + "learning_rate": 5.3204174653793726e-06, + "loss": 0.7488, + "step": 38506 + }, + { + "epoch": 0.665382222836605, + "grad_norm": 1.1756360211095582, + "learning_rate": 5.319922882113266e-06, + "loss": 0.2286, + "step": 38507 + }, + { + "epoch": 0.6653995023500138, + "grad_norm": 0.9906134847649024, + "learning_rate": 5.319428313505523e-06, + "loss": 0.3453, + "step": 38508 + }, + { + "epoch": 0.6654167818634227, + "grad_norm": 1.3396977514126147, + "learning_rate": 5.3189337595576765e-06, + "loss": 0.4409, + "step": 38509 + }, + { + "epoch": 0.6654340613768316, + "grad_norm": 1.2924484998681178, + "learning_rate": 5.318439220271292e-06, + "loss": 0.3264, + "step": 38510 + }, + { + "epoch": 0.6654513408902405, + "grad_norm": 0.88895019015959, + "learning_rate": 5.3179446956479094e-06, + "loss": 1.1209, + "step": 38511 + }, + { + "epoch": 0.6654686204036494, + "grad_norm": 0.9075661121473714, + "learning_rate": 5.317450185689083e-06, + "loss": 0.3751, + "step": 38512 + }, + { + "epoch": 0.6654858999170583, + "grad_norm": 1.085891415626028, + "learning_rate": 5.316955690396358e-06, + "loss": 0.4456, + "step": 38513 + }, + { + "epoch": 0.6655031794304672, + "grad_norm": 0.7523618200422623, + "learning_rate": 5.3164612097712794e-06, + "loss": 0.1706, + "step": 38514 + }, + { + "epoch": 0.6655204589438761, + "grad_norm": 1.238506943599799, + "learning_rate": 5.315966743815402e-06, + "loss": 0.2906, + "step": 38515 + }, + { + "epoch": 0.665537738457285, + "grad_norm": 0.788873587868787, + "learning_rate": 5.315472292530272e-06, + "loss": 0.3884, + "step": 38516 + }, + { + "epoch": 0.6655550179706939, + "grad_norm": 1.139996653076166, + "learning_rate": 5.314977855917442e-06, + "loss": 0.3557, + "step": 38517 + }, + { + "epoch": 0.6655722974841028, + "grad_norm": 0.5705638894999899, + "learning_rate": 5.314483433978454e-06, + "loss": 0.6607, + "step": 38518 + }, + { + "epoch": 0.6655895769975118, + "grad_norm": 1.1263216473425288, + "learning_rate": 5.313989026714865e-06, + "loss": 0.2019, + "step": 38519 + }, + { + "epoch": 0.6656068565109207, + "grad_norm": 1.8673540155908306, + "learning_rate": 5.3134946341282135e-06, + "loss": 0.3623, + "step": 38520 + }, + { + "epoch": 0.6656241360243296, + "grad_norm": 1.2910118415579843, + "learning_rate": 5.313000256220053e-06, + "loss": 0.2884, + "step": 38521 + }, + { + "epoch": 0.6656414155377385, + "grad_norm": 0.9956144613268255, + "learning_rate": 5.312505892991936e-06, + "loss": 0.3496, + "step": 38522 + }, + { + "epoch": 0.6656586950511474, + "grad_norm": 0.6831768165189647, + "learning_rate": 5.312011544445403e-06, + "loss": 0.4966, + "step": 38523 + }, + { + "epoch": 0.6656759745645563, + "grad_norm": 1.5134904232848478, + "learning_rate": 5.311517210582009e-06, + "loss": 0.436, + "step": 38524 + }, + { + "epoch": 0.6656932540779652, + "grad_norm": 1.0184718806365682, + "learning_rate": 5.311022891403297e-06, + "loss": 0.2847, + "step": 38525 + }, + { + "epoch": 0.6657105335913741, + "grad_norm": 1.3282348470336813, + "learning_rate": 5.310528586910817e-06, + "loss": 0.3033, + "step": 38526 + }, + { + "epoch": 0.665727813104783, + "grad_norm": 1.1583800562774011, + "learning_rate": 5.310034297106117e-06, + "loss": 0.4528, + "step": 38527 + }, + { + "epoch": 0.6657450926181919, + "grad_norm": 0.9542060734088913, + "learning_rate": 5.3095400219907515e-06, + "loss": 0.3444, + "step": 38528 + }, + { + "epoch": 0.6657623721316007, + "grad_norm": 1.5542115608519687, + "learning_rate": 5.309045761566258e-06, + "loss": 0.2353, + "step": 38529 + }, + { + "epoch": 0.6657796516450096, + "grad_norm": 1.2448597401653034, + "learning_rate": 5.308551515834193e-06, + "loss": 0.2164, + "step": 38530 + }, + { + "epoch": 0.6657969311584185, + "grad_norm": 1.416719104537529, + "learning_rate": 5.308057284796099e-06, + "loss": 0.6595, + "step": 38531 + }, + { + "epoch": 0.6658142106718274, + "grad_norm": 1.3770356740333884, + "learning_rate": 5.307563068453525e-06, + "loss": 0.4528, + "step": 38532 + }, + { + "epoch": 0.6658314901852364, + "grad_norm": 1.3734610814961787, + "learning_rate": 5.307068866808025e-06, + "loss": 0.3816, + "step": 38533 + }, + { + "epoch": 0.6658487696986453, + "grad_norm": 1.474845036155656, + "learning_rate": 5.306574679861136e-06, + "loss": 0.3833, + "step": 38534 + }, + { + "epoch": 0.6658660492120542, + "grad_norm": 1.4213915829831774, + "learning_rate": 5.306080507614417e-06, + "loss": 0.3317, + "step": 38535 + }, + { + "epoch": 0.6658833287254631, + "grad_norm": 1.3504486828968778, + "learning_rate": 5.305586350069407e-06, + "loss": 0.3926, + "step": 38536 + }, + { + "epoch": 0.665900608238872, + "grad_norm": 1.4322260546428816, + "learning_rate": 5.3050922072276575e-06, + "loss": 0.3665, + "step": 38537 + }, + { + "epoch": 0.6659178877522809, + "grad_norm": 1.1561417715950606, + "learning_rate": 5.304598079090716e-06, + "loss": 0.6021, + "step": 38538 + }, + { + "epoch": 0.6659351672656898, + "grad_norm": 1.1258535245586305, + "learning_rate": 5.304103965660133e-06, + "loss": 0.2363, + "step": 38539 + }, + { + "epoch": 0.6659524467790987, + "grad_norm": 1.235415655411568, + "learning_rate": 5.3036098669374515e-06, + "loss": 0.4326, + "step": 38540 + }, + { + "epoch": 0.6659697262925076, + "grad_norm": 0.8781386922382476, + "learning_rate": 5.303115782924222e-06, + "loss": 0.3397, + "step": 38541 + }, + { + "epoch": 0.6659870058059165, + "grad_norm": 0.8404181020460664, + "learning_rate": 5.302621713621989e-06, + "loss": 0.237, + "step": 38542 + }, + { + "epoch": 0.6660042853193254, + "grad_norm": 1.1110926096612452, + "learning_rate": 5.302127659032301e-06, + "loss": 0.4181, + "step": 38543 + }, + { + "epoch": 0.6660215648327343, + "grad_norm": 1.4643461009574463, + "learning_rate": 5.30163361915671e-06, + "loss": 0.5185, + "step": 38544 + }, + { + "epoch": 0.6660388443461432, + "grad_norm": 1.749882518423976, + "learning_rate": 5.301139593996756e-06, + "loss": 0.3956, + "step": 38545 + }, + { + "epoch": 0.6660561238595522, + "grad_norm": 1.1530387459497584, + "learning_rate": 5.300645583553993e-06, + "loss": 0.3554, + "step": 38546 + }, + { + "epoch": 0.6660734033729611, + "grad_norm": 1.1104086163963196, + "learning_rate": 5.300151587829961e-06, + "loss": 0.4441, + "step": 38547 + }, + { + "epoch": 0.66609068288637, + "grad_norm": 0.902524153075792, + "learning_rate": 5.299657606826216e-06, + "loss": 0.3536, + "step": 38548 + }, + { + "epoch": 0.6661079623997789, + "grad_norm": 0.9380387159961269, + "learning_rate": 5.2991636405442935e-06, + "loss": 0.3703, + "step": 38549 + }, + { + "epoch": 0.6661252419131877, + "grad_norm": 1.2757410508397697, + "learning_rate": 5.298669688985755e-06, + "loss": 0.542, + "step": 38550 + }, + { + "epoch": 0.6661425214265966, + "grad_norm": 1.0337184932166887, + "learning_rate": 5.298175752152136e-06, + "loss": 0.6452, + "step": 38551 + }, + { + "epoch": 0.6661598009400055, + "grad_norm": 1.0603741546657273, + "learning_rate": 5.2976818300449915e-06, + "loss": 0.3274, + "step": 38552 + }, + { + "epoch": 0.6661770804534144, + "grad_norm": 0.8850068902730891, + "learning_rate": 5.297187922665865e-06, + "loss": 0.8215, + "step": 38553 + }, + { + "epoch": 0.6661943599668233, + "grad_norm": 1.386416287334243, + "learning_rate": 5.2966940300163005e-06, + "loss": 0.3762, + "step": 38554 + }, + { + "epoch": 0.6662116394802322, + "grad_norm": 1.101353276523591, + "learning_rate": 5.2962001520978475e-06, + "loss": 0.2625, + "step": 38555 + }, + { + "epoch": 0.6662289189936411, + "grad_norm": 1.4850777555248695, + "learning_rate": 5.295706288912053e-06, + "loss": 0.3719, + "step": 38556 + }, + { + "epoch": 0.66624619850705, + "grad_norm": 1.2527110325888995, + "learning_rate": 5.2952124404604665e-06, + "loss": 0.2076, + "step": 38557 + }, + { + "epoch": 0.6662634780204589, + "grad_norm": 1.3839222743318906, + "learning_rate": 5.29471860674463e-06, + "loss": 0.4898, + "step": 38558 + }, + { + "epoch": 0.6662807575338678, + "grad_norm": 1.2942371398851416, + "learning_rate": 5.294224787766096e-06, + "loss": 0.4545, + "step": 38559 + }, + { + "epoch": 0.6662980370472767, + "grad_norm": 1.312199655848344, + "learning_rate": 5.293730983526403e-06, + "loss": 0.443, + "step": 38560 + }, + { + "epoch": 0.6663153165606857, + "grad_norm": 1.1491617851172247, + "learning_rate": 5.293237194027102e-06, + "loss": 0.3627, + "step": 38561 + }, + { + "epoch": 0.6663325960740946, + "grad_norm": 1.5557515961683814, + "learning_rate": 5.292743419269741e-06, + "loss": 0.4871, + "step": 38562 + }, + { + "epoch": 0.6663498755875035, + "grad_norm": 0.9525064535480147, + "learning_rate": 5.292249659255868e-06, + "loss": 0.3732, + "step": 38563 + }, + { + "epoch": 0.6663671551009124, + "grad_norm": 0.9807887286762693, + "learning_rate": 5.2917559139870275e-06, + "loss": 0.4579, + "step": 38564 + }, + { + "epoch": 0.6663844346143213, + "grad_norm": 0.776910636155216, + "learning_rate": 5.291262183464761e-06, + "loss": 0.3402, + "step": 38565 + }, + { + "epoch": 0.6664017141277302, + "grad_norm": 2.2878653826171584, + "learning_rate": 5.29076846769062e-06, + "loss": 0.2991, + "step": 38566 + }, + { + "epoch": 0.6664189936411391, + "grad_norm": 1.7895756157249216, + "learning_rate": 5.29027476666615e-06, + "loss": 0.7915, + "step": 38567 + }, + { + "epoch": 0.666436273154548, + "grad_norm": 1.6507783752339078, + "learning_rate": 5.289781080392899e-06, + "loss": 0.3961, + "step": 38568 + }, + { + "epoch": 0.6664535526679569, + "grad_norm": 1.6715727971955192, + "learning_rate": 5.289287408872411e-06, + "loss": 0.412, + "step": 38569 + }, + { + "epoch": 0.6664708321813658, + "grad_norm": 1.093587511014671, + "learning_rate": 5.288793752106235e-06, + "loss": 0.7186, + "step": 38570 + }, + { + "epoch": 0.6664881116947746, + "grad_norm": 1.3429684864704956, + "learning_rate": 5.288300110095912e-06, + "loss": 0.3562, + "step": 38571 + }, + { + "epoch": 0.6665053912081835, + "grad_norm": 1.677643466099499, + "learning_rate": 5.28780648284299e-06, + "loss": 0.4028, + "step": 38572 + }, + { + "epoch": 0.6665226707215924, + "grad_norm": 0.9651848897602748, + "learning_rate": 5.287312870349017e-06, + "loss": 0.4228, + "step": 38573 + }, + { + "epoch": 0.6665399502350013, + "grad_norm": 0.9652664360023232, + "learning_rate": 5.286819272615542e-06, + "loss": 0.3058, + "step": 38574 + }, + { + "epoch": 0.6665572297484103, + "grad_norm": 1.046752891812138, + "learning_rate": 5.286325689644108e-06, + "loss": 0.3779, + "step": 38575 + }, + { + "epoch": 0.6665745092618192, + "grad_norm": 1.2239598254804351, + "learning_rate": 5.285832121436254e-06, + "loss": 0.2572, + "step": 38576 + }, + { + "epoch": 0.6665917887752281, + "grad_norm": 1.2609503405545266, + "learning_rate": 5.285338567993533e-06, + "loss": 0.3955, + "step": 38577 + }, + { + "epoch": 0.666609068288637, + "grad_norm": 0.7588266260205688, + "learning_rate": 5.284845029317491e-06, + "loss": 0.5264, + "step": 38578 + }, + { + "epoch": 0.6666263478020459, + "grad_norm": 1.275149517697493, + "learning_rate": 5.284351505409675e-06, + "loss": 0.3097, + "step": 38579 + }, + { + "epoch": 0.6666436273154548, + "grad_norm": 1.1596033868006732, + "learning_rate": 5.2838579962716265e-06, + "loss": 0.4991, + "step": 38580 + }, + { + "epoch": 0.6666609068288637, + "grad_norm": 1.285677197455377, + "learning_rate": 5.2833645019048954e-06, + "loss": 0.4839, + "step": 38581 + }, + { + "epoch": 0.6666781863422726, + "grad_norm": 1.0106675304042845, + "learning_rate": 5.282871022311023e-06, + "loss": 0.2819, + "step": 38582 + }, + { + "epoch": 0.6666954658556815, + "grad_norm": 1.0425192101719563, + "learning_rate": 5.2823775574915555e-06, + "loss": 0.4247, + "step": 38583 + }, + { + "epoch": 0.6667127453690904, + "grad_norm": 1.009263981634197, + "learning_rate": 5.281884107448044e-06, + "loss": 0.2648, + "step": 38584 + }, + { + "epoch": 0.6667300248824993, + "grad_norm": 0.917337323403793, + "learning_rate": 5.2813906721820265e-06, + "loss": 0.3923, + "step": 38585 + }, + { + "epoch": 0.6667473043959082, + "grad_norm": 0.7757614107261595, + "learning_rate": 5.280897251695057e-06, + "loss": 0.3744, + "step": 38586 + }, + { + "epoch": 0.6667645839093171, + "grad_norm": 1.2173266581706181, + "learning_rate": 5.28040384598867e-06, + "loss": 0.1826, + "step": 38587 + }, + { + "epoch": 0.666781863422726, + "grad_norm": 2.176251901292593, + "learning_rate": 5.279910455064421e-06, + "loss": 0.3569, + "step": 38588 + }, + { + "epoch": 0.666799142936135, + "grad_norm": 1.5130673567224076, + "learning_rate": 5.279417078923845e-06, + "loss": 0.2956, + "step": 38589 + }, + { + "epoch": 0.6668164224495439, + "grad_norm": 1.2775307987756548, + "learning_rate": 5.2789237175685005e-06, + "loss": 0.4735, + "step": 38590 + }, + { + "epoch": 0.6668337019629528, + "grad_norm": 0.6449555704958604, + "learning_rate": 5.278430370999921e-06, + "loss": 0.4953, + "step": 38591 + }, + { + "epoch": 0.6668509814763616, + "grad_norm": 0.9301245344511776, + "learning_rate": 5.27793703921966e-06, + "loss": 0.446, + "step": 38592 + }, + { + "epoch": 0.6668682609897705, + "grad_norm": 1.3708527508843362, + "learning_rate": 5.277443722229256e-06, + "loss": 0.3953, + "step": 38593 + }, + { + "epoch": 0.6668855405031794, + "grad_norm": 1.4109643828753635, + "learning_rate": 5.2769504200302626e-06, + "loss": 0.3593, + "step": 38594 + }, + { + "epoch": 0.6669028200165883, + "grad_norm": 2.0623858432419664, + "learning_rate": 5.2764571326242135e-06, + "loss": 0.3431, + "step": 38595 + }, + { + "epoch": 0.6669200995299972, + "grad_norm": 1.2580229155334772, + "learning_rate": 5.27596386001266e-06, + "loss": 0.3597, + "step": 38596 + }, + { + "epoch": 0.6669373790434061, + "grad_norm": 1.6584388794501117, + "learning_rate": 5.275470602197152e-06, + "loss": 0.4384, + "step": 38597 + }, + { + "epoch": 0.666954658556815, + "grad_norm": 1.3779702447274318, + "learning_rate": 5.274977359179223e-06, + "loss": 0.4572, + "step": 38598 + }, + { + "epoch": 0.6669719380702239, + "grad_norm": 0.8222087618897189, + "learning_rate": 5.2744841309604285e-06, + "loss": 0.3725, + "step": 38599 + }, + { + "epoch": 0.6669892175836328, + "grad_norm": 1.1046657795768644, + "learning_rate": 5.273990917542307e-06, + "loss": 0.4878, + "step": 38600 + }, + { + "epoch": 0.6670064970970417, + "grad_norm": 1.9194860323072607, + "learning_rate": 5.2734977189264024e-06, + "loss": 0.4659, + "step": 38601 + }, + { + "epoch": 0.6670237766104506, + "grad_norm": 1.2663705712457065, + "learning_rate": 5.273004535114263e-06, + "loss": 0.2822, + "step": 38602 + }, + { + "epoch": 0.6670410561238596, + "grad_norm": 0.9883434699063677, + "learning_rate": 5.272511366107437e-06, + "loss": 0.4155, + "step": 38603 + }, + { + "epoch": 0.6670583356372685, + "grad_norm": 1.4513940980674838, + "learning_rate": 5.27201821190746e-06, + "loss": 0.459, + "step": 38604 + }, + { + "epoch": 0.6670756151506774, + "grad_norm": 0.838397614309904, + "learning_rate": 5.271525072515886e-06, + "loss": 0.4209, + "step": 38605 + }, + { + "epoch": 0.6670928946640863, + "grad_norm": 1.423681714414085, + "learning_rate": 5.271031947934251e-06, + "loss": 0.3611, + "step": 38606 + }, + { + "epoch": 0.6671101741774952, + "grad_norm": 1.2407261880711604, + "learning_rate": 5.2705388381641036e-06, + "loss": 0.3522, + "step": 38607 + }, + { + "epoch": 0.6671274536909041, + "grad_norm": 1.124273138090631, + "learning_rate": 5.270045743206991e-06, + "loss": 0.3673, + "step": 38608 + }, + { + "epoch": 0.667144733204313, + "grad_norm": 1.2098030561706845, + "learning_rate": 5.26955266306445e-06, + "loss": 0.335, + "step": 38609 + }, + { + "epoch": 0.6671620127177219, + "grad_norm": 1.396199880221026, + "learning_rate": 5.269059597738036e-06, + "loss": 0.4964, + "step": 38610 + }, + { + "epoch": 0.6671792922311308, + "grad_norm": 1.3724914564316462, + "learning_rate": 5.2685665472292805e-06, + "loss": 0.4671, + "step": 38611 + }, + { + "epoch": 0.6671965717445397, + "grad_norm": 2.109820817215601, + "learning_rate": 5.268073511539735e-06, + "loss": 0.4649, + "step": 38612 + }, + { + "epoch": 0.6672138512579486, + "grad_norm": 1.2450008747016232, + "learning_rate": 5.267580490670943e-06, + "loss": 0.3096, + "step": 38613 + }, + { + "epoch": 0.6672311307713574, + "grad_norm": 1.4732920000795313, + "learning_rate": 5.267087484624451e-06, + "loss": 0.2864, + "step": 38614 + }, + { + "epoch": 0.6672484102847663, + "grad_norm": 1.3716489656160664, + "learning_rate": 5.266594493401802e-06, + "loss": 0.3233, + "step": 38615 + }, + { + "epoch": 0.6672656897981752, + "grad_norm": 1.1621453797176904, + "learning_rate": 5.266101517004536e-06, + "loss": 0.3166, + "step": 38616 + }, + { + "epoch": 0.6672829693115842, + "grad_norm": 2.555773441694063, + "learning_rate": 5.265608555434198e-06, + "loss": 0.3201, + "step": 38617 + }, + { + "epoch": 0.6673002488249931, + "grad_norm": 1.0411750261729622, + "learning_rate": 5.265115608692335e-06, + "loss": 0.3795, + "step": 38618 + }, + { + "epoch": 0.667317528338402, + "grad_norm": 1.19901901894453, + "learning_rate": 5.2646226767804935e-06, + "loss": 0.2761, + "step": 38619 + }, + { + "epoch": 0.6673348078518109, + "grad_norm": 1.3935396262332396, + "learning_rate": 5.264129759700209e-06, + "loss": 0.4286, + "step": 38620 + }, + { + "epoch": 0.6673520873652198, + "grad_norm": 0.9903324709524104, + "learning_rate": 5.263636857453034e-06, + "loss": 0.6657, + "step": 38621 + }, + { + "epoch": 0.6673693668786287, + "grad_norm": 1.1614730607943702, + "learning_rate": 5.263143970040505e-06, + "loss": 0.5106, + "step": 38622 + }, + { + "epoch": 0.6673866463920376, + "grad_norm": 0.9154032787409755, + "learning_rate": 5.2626510974641685e-06, + "loss": 0.3107, + "step": 38623 + }, + { + "epoch": 0.6674039259054465, + "grad_norm": 0.8919365940616072, + "learning_rate": 5.26215823972557e-06, + "loss": 0.5155, + "step": 38624 + }, + { + "epoch": 0.6674212054188554, + "grad_norm": 1.293260590181387, + "learning_rate": 5.261665396826255e-06, + "loss": 0.3012, + "step": 38625 + }, + { + "epoch": 0.6674384849322643, + "grad_norm": 2.036937670741018, + "learning_rate": 5.261172568767763e-06, + "loss": 0.3067, + "step": 38626 + }, + { + "epoch": 0.6674557644456732, + "grad_norm": 0.8796540687396147, + "learning_rate": 5.260679755551637e-06, + "loss": 0.4638, + "step": 38627 + }, + { + "epoch": 0.6674730439590821, + "grad_norm": 1.322102732889926, + "learning_rate": 5.260186957179425e-06, + "loss": 0.3973, + "step": 38628 + }, + { + "epoch": 0.667490323472491, + "grad_norm": 1.347168588177511, + "learning_rate": 5.259694173652659e-06, + "loss": 0.3918, + "step": 38629 + }, + { + "epoch": 0.6675076029859, + "grad_norm": 0.9921746582522691, + "learning_rate": 5.2592014049729e-06, + "loss": 0.2221, + "step": 38630 + }, + { + "epoch": 0.6675248824993089, + "grad_norm": 0.9598303394286004, + "learning_rate": 5.2587086511416776e-06, + "loss": 0.378, + "step": 38631 + }, + { + "epoch": 0.6675421620127178, + "grad_norm": 1.2520379223439821, + "learning_rate": 5.258215912160545e-06, + "loss": 0.2557, + "step": 38632 + }, + { + "epoch": 0.6675594415261267, + "grad_norm": 1.30126802652124, + "learning_rate": 5.2577231880310365e-06, + "loss": 0.4078, + "step": 38633 + }, + { + "epoch": 0.6675767210395356, + "grad_norm": 0.8322229474470925, + "learning_rate": 5.257230478754702e-06, + "loss": 0.5781, + "step": 38634 + }, + { + "epoch": 0.6675940005529444, + "grad_norm": 1.1014438206020516, + "learning_rate": 5.2567377843330795e-06, + "loss": 0.3788, + "step": 38635 + }, + { + "epoch": 0.6676112800663533, + "grad_norm": 1.4103570999516652, + "learning_rate": 5.256245104767714e-06, + "loss": 0.3252, + "step": 38636 + }, + { + "epoch": 0.6676285595797622, + "grad_norm": 1.551411270949581, + "learning_rate": 5.255752440060154e-06, + "loss": 0.4962, + "step": 38637 + }, + { + "epoch": 0.6676458390931711, + "grad_norm": 2.1717334569935876, + "learning_rate": 5.255259790211934e-06, + "loss": 0.3438, + "step": 38638 + }, + { + "epoch": 0.66766311860658, + "grad_norm": 1.1242457267905392, + "learning_rate": 5.254767155224605e-06, + "loss": 0.6261, + "step": 38639 + }, + { + "epoch": 0.6676803981199889, + "grad_norm": 1.1904755068594437, + "learning_rate": 5.2542745350997e-06, + "loss": 0.2769, + "step": 38640 + }, + { + "epoch": 0.6676976776333978, + "grad_norm": 1.3694408427386, + "learning_rate": 5.2537819298387705e-06, + "loss": 0.2489, + "step": 38641 + }, + { + "epoch": 0.6677149571468067, + "grad_norm": 1.4692120325261968, + "learning_rate": 5.2532893394433546e-06, + "loss": 0.3518, + "step": 38642 + }, + { + "epoch": 0.6677322366602156, + "grad_norm": 1.2607807154290205, + "learning_rate": 5.252796763915002e-06, + "loss": 0.1757, + "step": 38643 + }, + { + "epoch": 0.6677495161736245, + "grad_norm": 1.69331509557868, + "learning_rate": 5.252304203255247e-06, + "loss": 0.609, + "step": 38644 + }, + { + "epoch": 0.6677667956870335, + "grad_norm": 0.9474829620722682, + "learning_rate": 5.25181165746564e-06, + "loss": 0.5439, + "step": 38645 + }, + { + "epoch": 0.6677840752004424, + "grad_norm": 1.0455154896586307, + "learning_rate": 5.251319126547716e-06, + "loss": 0.6454, + "step": 38646 + }, + { + "epoch": 0.6678013547138513, + "grad_norm": 1.3280352640679762, + "learning_rate": 5.25082661050302e-06, + "loss": 0.4566, + "step": 38647 + }, + { + "epoch": 0.6678186342272602, + "grad_norm": 1.3097519615536928, + "learning_rate": 5.250334109333102e-06, + "loss": 0.3912, + "step": 38648 + }, + { + "epoch": 0.6678359137406691, + "grad_norm": 1.0832429305437923, + "learning_rate": 5.249841623039493e-06, + "loss": 0.4643, + "step": 38649 + }, + { + "epoch": 0.667853193254078, + "grad_norm": 1.7693024151459822, + "learning_rate": 5.249349151623745e-06, + "loss": 0.5223, + "step": 38650 + }, + { + "epoch": 0.6678704727674869, + "grad_norm": 1.009742084798312, + "learning_rate": 5.2488566950873935e-06, + "loss": 0.3174, + "step": 38651 + }, + { + "epoch": 0.6678877522808958, + "grad_norm": 0.8054154201946879, + "learning_rate": 5.248364253431984e-06, + "loss": 0.2231, + "step": 38652 + }, + { + "epoch": 0.6679050317943047, + "grad_norm": 1.1109935966235678, + "learning_rate": 5.247871826659057e-06, + "loss": 0.3959, + "step": 38653 + }, + { + "epoch": 0.6679223113077136, + "grad_norm": 1.142491614526194, + "learning_rate": 5.247379414770162e-06, + "loss": 0.3977, + "step": 38654 + }, + { + "epoch": 0.6679395908211225, + "grad_norm": 1.034393941309353, + "learning_rate": 5.246887017766831e-06, + "loss": 0.2963, + "step": 38655 + }, + { + "epoch": 0.6679568703345313, + "grad_norm": 1.247677149319072, + "learning_rate": 5.246394635650616e-06, + "loss": 0.2656, + "step": 38656 + }, + { + "epoch": 0.6679741498479402, + "grad_norm": 1.0103207827118732, + "learning_rate": 5.24590226842305e-06, + "loss": 0.4487, + "step": 38657 + }, + { + "epoch": 0.6679914293613491, + "grad_norm": 0.9953810398467156, + "learning_rate": 5.24540991608568e-06, + "loss": 0.2883, + "step": 38658 + }, + { + "epoch": 0.668008708874758, + "grad_norm": 1.1470457243422645, + "learning_rate": 5.24491757864005e-06, + "loss": 0.5254, + "step": 38659 + }, + { + "epoch": 0.668025988388167, + "grad_norm": 1.2160348173968232, + "learning_rate": 5.244425256087696e-06, + "loss": 0.3148, + "step": 38660 + }, + { + "epoch": 0.6680432679015759, + "grad_norm": 1.2518907204453966, + "learning_rate": 5.243932948430169e-06, + "loss": 0.5834, + "step": 38661 + }, + { + "epoch": 0.6680605474149848, + "grad_norm": 1.0564578370225532, + "learning_rate": 5.243440655668999e-06, + "loss": 0.3097, + "step": 38662 + }, + { + "epoch": 0.6680778269283937, + "grad_norm": 1.2324122691872612, + "learning_rate": 5.2429483778057366e-06, + "loss": 0.2688, + "step": 38663 + }, + { + "epoch": 0.6680951064418026, + "grad_norm": 0.9223203019564847, + "learning_rate": 5.24245611484192e-06, + "loss": 0.2488, + "step": 38664 + }, + { + "epoch": 0.6681123859552115, + "grad_norm": 0.6147692264440446, + "learning_rate": 5.241963866779096e-06, + "loss": 0.8124, + "step": 38665 + }, + { + "epoch": 0.6681296654686204, + "grad_norm": 0.5428568240900805, + "learning_rate": 5.2414716336187995e-06, + "loss": 0.5558, + "step": 38666 + }, + { + "epoch": 0.6681469449820293, + "grad_norm": 1.3746402108896336, + "learning_rate": 5.240979415362581e-06, + "loss": 0.2657, + "step": 38667 + }, + { + "epoch": 0.6681642244954382, + "grad_norm": 0.9857967182319802, + "learning_rate": 5.240487212011974e-06, + "loss": 0.2537, + "step": 38668 + }, + { + "epoch": 0.6681815040088471, + "grad_norm": 1.4440139424934773, + "learning_rate": 5.23999502356852e-06, + "loss": 0.3639, + "step": 38669 + }, + { + "epoch": 0.668198783522256, + "grad_norm": 1.1777651360032793, + "learning_rate": 5.239502850033764e-06, + "loss": 0.4779, + "step": 38670 + }, + { + "epoch": 0.6682160630356649, + "grad_norm": 1.190188347650885, + "learning_rate": 5.239010691409246e-06, + "loss": 0.4782, + "step": 38671 + }, + { + "epoch": 0.6682333425490738, + "grad_norm": 0.6979269170561189, + "learning_rate": 5.238518547696513e-06, + "loss": 0.2621, + "step": 38672 + }, + { + "epoch": 0.6682506220624828, + "grad_norm": 1.7337791038724095, + "learning_rate": 5.238026418897097e-06, + "loss": 0.293, + "step": 38673 + }, + { + "epoch": 0.6682679015758917, + "grad_norm": 1.458986540775482, + "learning_rate": 5.2375343050125484e-06, + "loss": 0.514, + "step": 38674 + }, + { + "epoch": 0.6682851810893006, + "grad_norm": 1.1939809020706844, + "learning_rate": 5.237042206044401e-06, + "loss": 0.6935, + "step": 38675 + }, + { + "epoch": 0.6683024606027095, + "grad_norm": 0.9474573024980064, + "learning_rate": 5.236550121994197e-06, + "loss": 0.3107, + "step": 38676 + }, + { + "epoch": 0.6683197401161183, + "grad_norm": 0.7124799219928576, + "learning_rate": 5.236058052863486e-06, + "loss": 0.2169, + "step": 38677 + }, + { + "epoch": 0.6683370196295272, + "grad_norm": 1.29640394162757, + "learning_rate": 5.2355659986538e-06, + "loss": 0.3293, + "step": 38678 + }, + { + "epoch": 0.6683542991429361, + "grad_norm": 0.7119866212925761, + "learning_rate": 5.235073959366686e-06, + "loss": 0.4484, + "step": 38679 + }, + { + "epoch": 0.668371578656345, + "grad_norm": 1.2716576348602175, + "learning_rate": 5.234581935003679e-06, + "loss": 0.4523, + "step": 38680 + }, + { + "epoch": 0.6683888581697539, + "grad_norm": 1.3298931734613924, + "learning_rate": 5.234089925566324e-06, + "loss": 0.4499, + "step": 38681 + }, + { + "epoch": 0.6684061376831628, + "grad_norm": 0.6169875473699901, + "learning_rate": 5.23359793105616e-06, + "loss": 0.5573, + "step": 38682 + }, + { + "epoch": 0.6684234171965717, + "grad_norm": 1.0536798068948592, + "learning_rate": 5.233105951474735e-06, + "loss": 0.2517, + "step": 38683 + }, + { + "epoch": 0.6684406967099806, + "grad_norm": 0.8107334390722133, + "learning_rate": 5.232613986823579e-06, + "loss": 0.4272, + "step": 38684 + }, + { + "epoch": 0.6684579762233895, + "grad_norm": 1.2153289235387714, + "learning_rate": 5.232122037104244e-06, + "loss": 0.4727, + "step": 38685 + }, + { + "epoch": 0.6684752557367984, + "grad_norm": 1.5205930300348398, + "learning_rate": 5.231630102318259e-06, + "loss": 0.4448, + "step": 38686 + }, + { + "epoch": 0.6684925352502074, + "grad_norm": 2.6315363029292014, + "learning_rate": 5.231138182467172e-06, + "loss": 0.3103, + "step": 38687 + }, + { + "epoch": 0.6685098147636163, + "grad_norm": 0.9794823423597322, + "learning_rate": 5.230646277552528e-06, + "loss": 0.5446, + "step": 38688 + }, + { + "epoch": 0.6685270942770252, + "grad_norm": 0.9030556675439133, + "learning_rate": 5.230154387575856e-06, + "loss": 0.4432, + "step": 38689 + }, + { + "epoch": 0.6685443737904341, + "grad_norm": 0.9427659743319029, + "learning_rate": 5.229662512538709e-06, + "loss": 0.4193, + "step": 38690 + }, + { + "epoch": 0.668561653303843, + "grad_norm": 1.4210924909320761, + "learning_rate": 5.229170652442617e-06, + "loss": 0.4982, + "step": 38691 + }, + { + "epoch": 0.6685789328172519, + "grad_norm": 1.8909313148897782, + "learning_rate": 5.228678807289125e-06, + "loss": 0.3401, + "step": 38692 + }, + { + "epoch": 0.6685962123306608, + "grad_norm": 1.4333205125590502, + "learning_rate": 5.2281869770797726e-06, + "loss": 0.5294, + "step": 38693 + }, + { + "epoch": 0.6686134918440697, + "grad_norm": 0.8013205981451735, + "learning_rate": 5.2276951618161066e-06, + "loss": 0.464, + "step": 38694 + }, + { + "epoch": 0.6686307713574786, + "grad_norm": 1.7496957034511635, + "learning_rate": 5.2272033614996575e-06, + "loss": 0.33, + "step": 38695 + }, + { + "epoch": 0.6686480508708875, + "grad_norm": 1.0391363761739443, + "learning_rate": 5.226711576131973e-06, + "loss": 0.55, + "step": 38696 + }, + { + "epoch": 0.6686653303842964, + "grad_norm": 1.6688065389722497, + "learning_rate": 5.226219805714587e-06, + "loss": 0.3857, + "step": 38697 + }, + { + "epoch": 0.6686826098977052, + "grad_norm": 1.682770860777319, + "learning_rate": 5.225728050249045e-06, + "loss": 0.4345, + "step": 38698 + }, + { + "epoch": 0.6686998894111141, + "grad_norm": 1.3926560538688784, + "learning_rate": 5.225236309736889e-06, + "loss": 0.3468, + "step": 38699 + }, + { + "epoch": 0.668717168924523, + "grad_norm": 0.8265078991876234, + "learning_rate": 5.224744584179651e-06, + "loss": 0.6425, + "step": 38700 + }, + { + "epoch": 0.668734448437932, + "grad_norm": 0.9840778879458935, + "learning_rate": 5.22425287357888e-06, + "loss": 0.4355, + "step": 38701 + }, + { + "epoch": 0.6687517279513409, + "grad_norm": 0.7042122331025537, + "learning_rate": 5.223761177936109e-06, + "loss": 0.7261, + "step": 38702 + }, + { + "epoch": 0.6687690074647498, + "grad_norm": 1.2066484104778037, + "learning_rate": 5.2232694972528785e-06, + "loss": 0.3165, + "step": 38703 + }, + { + "epoch": 0.6687862869781587, + "grad_norm": 1.2789402105450214, + "learning_rate": 5.222777831530734e-06, + "loss": 0.3238, + "step": 38704 + }, + { + "epoch": 0.6688035664915676, + "grad_norm": 1.084168262427369, + "learning_rate": 5.222286180771214e-06, + "loss": 0.4694, + "step": 38705 + }, + { + "epoch": 0.6688208460049765, + "grad_norm": 0.8906606985021779, + "learning_rate": 5.221794544975854e-06, + "loss": 0.3769, + "step": 38706 + }, + { + "epoch": 0.6688381255183854, + "grad_norm": 1.0049879127872423, + "learning_rate": 5.221302924146201e-06, + "loss": 0.4402, + "step": 38707 + }, + { + "epoch": 0.6688554050317943, + "grad_norm": 1.2901310149233143, + "learning_rate": 5.22081131828379e-06, + "loss": 0.2813, + "step": 38708 + }, + { + "epoch": 0.6688726845452032, + "grad_norm": 1.0707950731001565, + "learning_rate": 5.220319727390156e-06, + "loss": 0.4145, + "step": 38709 + }, + { + "epoch": 0.6688899640586121, + "grad_norm": 1.1586201097200022, + "learning_rate": 5.2198281514668445e-06, + "loss": 0.3859, + "step": 38710 + }, + { + "epoch": 0.668907243572021, + "grad_norm": 0.8920933827411056, + "learning_rate": 5.219336590515396e-06, + "loss": 0.6962, + "step": 38711 + }, + { + "epoch": 0.6689245230854299, + "grad_norm": 1.21765494132641, + "learning_rate": 5.21884504453735e-06, + "loss": 0.4205, + "step": 38712 + }, + { + "epoch": 0.6689418025988388, + "grad_norm": 1.562972666922364, + "learning_rate": 5.2183535135342424e-06, + "loss": 0.5644, + "step": 38713 + }, + { + "epoch": 0.6689590821122477, + "grad_norm": 1.1758768560688337, + "learning_rate": 5.217861997507618e-06, + "loss": 0.2939, + "step": 38714 + }, + { + "epoch": 0.6689763616256567, + "grad_norm": 1.3858170249342898, + "learning_rate": 5.217370496459011e-06, + "loss": 0.3542, + "step": 38715 + }, + { + "epoch": 0.6689936411390656, + "grad_norm": 1.8760253387722414, + "learning_rate": 5.216879010389961e-06, + "loss": 0.3485, + "step": 38716 + }, + { + "epoch": 0.6690109206524745, + "grad_norm": 1.6030030368120491, + "learning_rate": 5.216387539302012e-06, + "loss": 0.4736, + "step": 38717 + }, + { + "epoch": 0.6690282001658834, + "grad_norm": 1.9050439944179198, + "learning_rate": 5.215896083196703e-06, + "loss": 0.2425, + "step": 38718 + }, + { + "epoch": 0.6690454796792922, + "grad_norm": 1.8376788836376734, + "learning_rate": 5.21540464207557e-06, + "loss": 0.3537, + "step": 38719 + }, + { + "epoch": 0.6690627591927011, + "grad_norm": 1.0379109650651062, + "learning_rate": 5.2149132159401515e-06, + "loss": 0.3799, + "step": 38720 + }, + { + "epoch": 0.66908003870611, + "grad_norm": 1.078134360511273, + "learning_rate": 5.214421804791987e-06, + "loss": 0.3733, + "step": 38721 + }, + { + "epoch": 0.6690973182195189, + "grad_norm": 1.2583186280554934, + "learning_rate": 5.213930408632618e-06, + "loss": 0.3402, + "step": 38722 + }, + { + "epoch": 0.6691145977329278, + "grad_norm": 1.5924082169691727, + "learning_rate": 5.213439027463586e-06, + "loss": 0.4574, + "step": 38723 + }, + { + "epoch": 0.6691318772463367, + "grad_norm": 1.273350027526555, + "learning_rate": 5.212947661286423e-06, + "loss": 0.3729, + "step": 38724 + }, + { + "epoch": 0.6691491567597456, + "grad_norm": 1.0125234192918322, + "learning_rate": 5.212456310102676e-06, + "loss": 0.2779, + "step": 38725 + }, + { + "epoch": 0.6691664362731545, + "grad_norm": 1.172561451251796, + "learning_rate": 5.211964973913876e-06, + "loss": 0.3669, + "step": 38726 + }, + { + "epoch": 0.6691837157865634, + "grad_norm": 1.3165036767257983, + "learning_rate": 5.211473652721563e-06, + "loss": 0.406, + "step": 38727 + }, + { + "epoch": 0.6692009952999723, + "grad_norm": 0.9796064032035594, + "learning_rate": 5.2109823465272855e-06, + "loss": 0.543, + "step": 38728 + }, + { + "epoch": 0.6692182748133813, + "grad_norm": 0.7556733042571345, + "learning_rate": 5.21049105533257e-06, + "loss": 0.255, + "step": 38729 + }, + { + "epoch": 0.6692355543267902, + "grad_norm": 0.8091210130987504, + "learning_rate": 5.209999779138965e-06, + "loss": 0.3802, + "step": 38730 + }, + { + "epoch": 0.6692528338401991, + "grad_norm": 1.0294607902469692, + "learning_rate": 5.209508517948001e-06, + "loss": 0.4097, + "step": 38731 + }, + { + "epoch": 0.669270113353608, + "grad_norm": 1.1183589455438057, + "learning_rate": 5.2090172717612184e-06, + "loss": 0.4612, + "step": 38732 + }, + { + "epoch": 0.6692873928670169, + "grad_norm": 1.619102364883437, + "learning_rate": 5.20852604058016e-06, + "loss": 0.33, + "step": 38733 + }, + { + "epoch": 0.6693046723804258, + "grad_norm": 1.7549038741867642, + "learning_rate": 5.208034824406365e-06, + "loss": 0.3851, + "step": 38734 + }, + { + "epoch": 0.6693219518938347, + "grad_norm": 1.1866018089342152, + "learning_rate": 5.2075436232413655e-06, + "loss": 0.5802, + "step": 38735 + }, + { + "epoch": 0.6693392314072436, + "grad_norm": 1.2943628165988594, + "learning_rate": 5.207052437086708e-06, + "loss": 0.3794, + "step": 38736 + }, + { + "epoch": 0.6693565109206525, + "grad_norm": 1.2316370929813945, + "learning_rate": 5.2065612659439215e-06, + "loss": 0.3002, + "step": 38737 + }, + { + "epoch": 0.6693737904340614, + "grad_norm": 1.4025098980330306, + "learning_rate": 5.206070109814551e-06, + "loss": 0.4147, + "step": 38738 + }, + { + "epoch": 0.6693910699474703, + "grad_norm": 1.1919705110651753, + "learning_rate": 5.205578968700136e-06, + "loss": 0.3142, + "step": 38739 + }, + { + "epoch": 0.6694083494608791, + "grad_norm": 0.842479763395523, + "learning_rate": 5.2050878426022075e-06, + "loss": 0.3674, + "step": 38740 + }, + { + "epoch": 0.669425628974288, + "grad_norm": 0.4852270733249116, + "learning_rate": 5.204596731522314e-06, + "loss": 0.5738, + "step": 38741 + }, + { + "epoch": 0.6694429084876969, + "grad_norm": 0.9217740449332057, + "learning_rate": 5.204105635461982e-06, + "loss": 0.209, + "step": 38742 + }, + { + "epoch": 0.6694601880011058, + "grad_norm": 0.9290323584687465, + "learning_rate": 5.203614554422758e-06, + "loss": 0.3716, + "step": 38743 + }, + { + "epoch": 0.6694774675145148, + "grad_norm": 1.2464367305044028, + "learning_rate": 5.203123488406175e-06, + "loss": 0.396, + "step": 38744 + }, + { + "epoch": 0.6694947470279237, + "grad_norm": 0.7985287705358665, + "learning_rate": 5.20263243741378e-06, + "loss": 0.877, + "step": 38745 + }, + { + "epoch": 0.6695120265413326, + "grad_norm": 1.1307292636860091, + "learning_rate": 5.2021414014471e-06, + "loss": 0.4683, + "step": 38746 + }, + { + "epoch": 0.6695293060547415, + "grad_norm": 1.6355240650118106, + "learning_rate": 5.201650380507682e-06, + "loss": 0.2336, + "step": 38747 + }, + { + "epoch": 0.6695465855681504, + "grad_norm": 1.1223202088660196, + "learning_rate": 5.2011593745970545e-06, + "loss": 0.3071, + "step": 38748 + }, + { + "epoch": 0.6695638650815593, + "grad_norm": 1.8065958378563298, + "learning_rate": 5.200668383716765e-06, + "loss": 0.4639, + "step": 38749 + }, + { + "epoch": 0.6695811445949682, + "grad_norm": 1.3987269822836605, + "learning_rate": 5.200177407868343e-06, + "loss": 0.3167, + "step": 38750 + }, + { + "epoch": 0.6695984241083771, + "grad_norm": 1.007493625649512, + "learning_rate": 5.199686447053332e-06, + "loss": 0.2746, + "step": 38751 + }, + { + "epoch": 0.669615703621786, + "grad_norm": 1.2435403596595394, + "learning_rate": 5.199195501273269e-06, + "loss": 0.4178, + "step": 38752 + }, + { + "epoch": 0.6696329831351949, + "grad_norm": 1.2626215664896203, + "learning_rate": 5.198704570529688e-06, + "loss": 0.6561, + "step": 38753 + }, + { + "epoch": 0.6696502626486038, + "grad_norm": 2.465296723247304, + "learning_rate": 5.198213654824134e-06, + "loss": 0.3019, + "step": 38754 + }, + { + "epoch": 0.6696675421620127, + "grad_norm": 1.52152390212438, + "learning_rate": 5.197722754158135e-06, + "loss": 0.4251, + "step": 38755 + }, + { + "epoch": 0.6696848216754216, + "grad_norm": 1.2283266954514331, + "learning_rate": 5.197231868533233e-06, + "loss": 0.2697, + "step": 38756 + }, + { + "epoch": 0.6697021011888306, + "grad_norm": 1.725938748371274, + "learning_rate": 5.196740997950964e-06, + "loss": 0.4215, + "step": 38757 + }, + { + "epoch": 0.6697193807022395, + "grad_norm": 1.3787377260457954, + "learning_rate": 5.196250142412873e-06, + "loss": 0.3856, + "step": 38758 + }, + { + "epoch": 0.6697366602156484, + "grad_norm": 1.1764296271592471, + "learning_rate": 5.195759301920492e-06, + "loss": 0.3226, + "step": 38759 + }, + { + "epoch": 0.6697539397290573, + "grad_norm": 1.8630591341001168, + "learning_rate": 5.195268476475354e-06, + "loss": 0.3693, + "step": 38760 + }, + { + "epoch": 0.6697712192424662, + "grad_norm": 0.970038684355774, + "learning_rate": 5.194777666078999e-06, + "loss": 0.3497, + "step": 38761 + }, + { + "epoch": 0.669788498755875, + "grad_norm": 0.9996380201806115, + "learning_rate": 5.194286870732968e-06, + "loss": 0.4068, + "step": 38762 + }, + { + "epoch": 0.6698057782692839, + "grad_norm": 1.2108028959130246, + "learning_rate": 5.193796090438797e-06, + "loss": 0.3908, + "step": 38763 + }, + { + "epoch": 0.6698230577826928, + "grad_norm": 0.6087820323735577, + "learning_rate": 5.193305325198019e-06, + "loss": 0.6757, + "step": 38764 + }, + { + "epoch": 0.6698403372961017, + "grad_norm": 1.1461120236597047, + "learning_rate": 5.192814575012178e-06, + "loss": 0.4312, + "step": 38765 + }, + { + "epoch": 0.6698576168095106, + "grad_norm": 0.6711705782208629, + "learning_rate": 5.192323839882803e-06, + "loss": 0.4698, + "step": 38766 + }, + { + "epoch": 0.6698748963229195, + "grad_norm": 1.44067514127586, + "learning_rate": 5.191833119811436e-06, + "loss": 0.34, + "step": 38767 + }, + { + "epoch": 0.6698921758363284, + "grad_norm": 1.2031929113443791, + "learning_rate": 5.1913424147996115e-06, + "loss": 0.4716, + "step": 38768 + }, + { + "epoch": 0.6699094553497373, + "grad_norm": 1.1651088688165352, + "learning_rate": 5.190851724848873e-06, + "loss": 0.2878, + "step": 38769 + }, + { + "epoch": 0.6699267348631462, + "grad_norm": 1.2211652261303103, + "learning_rate": 5.190361049960753e-06, + "loss": 0.3905, + "step": 38770 + }, + { + "epoch": 0.6699440143765552, + "grad_norm": 1.6489603580509657, + "learning_rate": 5.189870390136782e-06, + "loss": 0.3034, + "step": 38771 + }, + { + "epoch": 0.6699612938899641, + "grad_norm": 1.5786423020142921, + "learning_rate": 5.189379745378505e-06, + "loss": 0.375, + "step": 38772 + }, + { + "epoch": 0.669978573403373, + "grad_norm": 1.1951032153088819, + "learning_rate": 5.1888891156874545e-06, + "loss": 0.4662, + "step": 38773 + }, + { + "epoch": 0.6699958529167819, + "grad_norm": 1.3573982655820949, + "learning_rate": 5.188398501065173e-06, + "loss": 0.3583, + "step": 38774 + }, + { + "epoch": 0.6700131324301908, + "grad_norm": 1.0715289201038927, + "learning_rate": 5.187907901513188e-06, + "loss": 0.3628, + "step": 38775 + }, + { + "epoch": 0.6700304119435997, + "grad_norm": 0.9886998866792608, + "learning_rate": 5.187417317033047e-06, + "loss": 0.3574, + "step": 38776 + }, + { + "epoch": 0.6700476914570086, + "grad_norm": 1.3473886411617848, + "learning_rate": 5.186926747626275e-06, + "loss": 0.4685, + "step": 38777 + }, + { + "epoch": 0.6700649709704175, + "grad_norm": 1.4493873345231572, + "learning_rate": 5.186436193294416e-06, + "loss": 0.364, + "step": 38778 + }, + { + "epoch": 0.6700822504838264, + "grad_norm": 1.0710353635850176, + "learning_rate": 5.185945654039004e-06, + "loss": 0.2695, + "step": 38779 + }, + { + "epoch": 0.6700995299972353, + "grad_norm": 1.3860472512967927, + "learning_rate": 5.18545512986158e-06, + "loss": 0.3726, + "step": 38780 + }, + { + "epoch": 0.6701168095106442, + "grad_norm": 1.1385662196912465, + "learning_rate": 5.184964620763676e-06, + "loss": 0.4724, + "step": 38781 + }, + { + "epoch": 0.6701340890240531, + "grad_norm": 0.8537619007546974, + "learning_rate": 5.184474126746826e-06, + "loss": 0.5386, + "step": 38782 + }, + { + "epoch": 0.6701513685374619, + "grad_norm": 1.1165145814921686, + "learning_rate": 5.1839836478125675e-06, + "loss": 0.3567, + "step": 38783 + }, + { + "epoch": 0.6701686480508708, + "grad_norm": 1.4450498484972123, + "learning_rate": 5.183493183962438e-06, + "loss": 0.4415, + "step": 38784 + }, + { + "epoch": 0.6701859275642797, + "grad_norm": 1.34102405407208, + "learning_rate": 5.183002735197979e-06, + "loss": 0.6018, + "step": 38785 + }, + { + "epoch": 0.6702032070776887, + "grad_norm": 2.000140535790801, + "learning_rate": 5.182512301520718e-06, + "loss": 0.511, + "step": 38786 + }, + { + "epoch": 0.6702204865910976, + "grad_norm": 1.0793799277679745, + "learning_rate": 5.182021882932198e-06, + "loss": 0.3344, + "step": 38787 + }, + { + "epoch": 0.6702377661045065, + "grad_norm": 1.1593580633894878, + "learning_rate": 5.1815314794339475e-06, + "loss": 0.4641, + "step": 38788 + }, + { + "epoch": 0.6702550456179154, + "grad_norm": 1.3971631653853618, + "learning_rate": 5.18104109102751e-06, + "loss": 0.4681, + "step": 38789 + }, + { + "epoch": 0.6702723251313243, + "grad_norm": 0.9773602522718913, + "learning_rate": 5.1805507177144145e-06, + "loss": 0.3936, + "step": 38790 + }, + { + "epoch": 0.6702896046447332, + "grad_norm": 1.3549655651485555, + "learning_rate": 5.180060359496201e-06, + "loss": 0.4055, + "step": 38791 + }, + { + "epoch": 0.6703068841581421, + "grad_norm": 0.7578737025791261, + "learning_rate": 5.179570016374408e-06, + "loss": 0.7018, + "step": 38792 + }, + { + "epoch": 0.670324163671551, + "grad_norm": 1.1123076479257168, + "learning_rate": 5.179079688350563e-06, + "loss": 0.3301, + "step": 38793 + }, + { + "epoch": 0.6703414431849599, + "grad_norm": 1.2148009779055664, + "learning_rate": 5.1785893754262125e-06, + "loss": 0.5633, + "step": 38794 + }, + { + "epoch": 0.6703587226983688, + "grad_norm": 0.9945500727040086, + "learning_rate": 5.178099077602883e-06, + "loss": 0.4526, + "step": 38795 + }, + { + "epoch": 0.6703760022117777, + "grad_norm": 1.3790943748032476, + "learning_rate": 5.177608794882112e-06, + "loss": 0.387, + "step": 38796 + }, + { + "epoch": 0.6703932817251866, + "grad_norm": 1.4690722006024088, + "learning_rate": 5.177118527265438e-06, + "loss": 0.334, + "step": 38797 + }, + { + "epoch": 0.6704105612385955, + "grad_norm": 0.9808226888066022, + "learning_rate": 5.1766282747543984e-06, + "loss": 0.2371, + "step": 38798 + }, + { + "epoch": 0.6704278407520045, + "grad_norm": 0.9989177994409462, + "learning_rate": 5.176138037350522e-06, + "loss": 0.3211, + "step": 38799 + }, + { + "epoch": 0.6704451202654134, + "grad_norm": 1.4466302932149835, + "learning_rate": 5.1756478150553514e-06, + "loss": 0.1985, + "step": 38800 + }, + { + "epoch": 0.6704623997788223, + "grad_norm": 1.562890697355627, + "learning_rate": 5.175157607870415e-06, + "loss": 0.4225, + "step": 38801 + }, + { + "epoch": 0.6704796792922312, + "grad_norm": 1.0010940582773165, + "learning_rate": 5.174667415797252e-06, + "loss": 0.4318, + "step": 38802 + }, + { + "epoch": 0.6704969588056401, + "grad_norm": 1.8095765237766381, + "learning_rate": 5.1741772388374015e-06, + "loss": 0.6918, + "step": 38803 + }, + { + "epoch": 0.6705142383190489, + "grad_norm": 1.1136840456735322, + "learning_rate": 5.17368707699239e-06, + "loss": 0.4511, + "step": 38804 + }, + { + "epoch": 0.6705315178324578, + "grad_norm": 1.119889467889106, + "learning_rate": 5.1731969302637605e-06, + "loss": 0.479, + "step": 38805 + }, + { + "epoch": 0.6705487973458667, + "grad_norm": 1.442309406826107, + "learning_rate": 5.172706798653042e-06, + "loss": 0.3456, + "step": 38806 + }, + { + "epoch": 0.6705660768592756, + "grad_norm": 1.668952004193627, + "learning_rate": 5.172216682161773e-06, + "loss": 0.2392, + "step": 38807 + }, + { + "epoch": 0.6705833563726845, + "grad_norm": 0.7744658198493077, + "learning_rate": 5.171726580791487e-06, + "loss": 0.43, + "step": 38808 + }, + { + "epoch": 0.6706006358860934, + "grad_norm": 1.4865304722188784, + "learning_rate": 5.171236494543726e-06, + "loss": 0.3989, + "step": 38809 + }, + { + "epoch": 0.6706179153995023, + "grad_norm": 1.6719953716622333, + "learning_rate": 5.170746423420015e-06, + "loss": 0.3104, + "step": 38810 + }, + { + "epoch": 0.6706351949129112, + "grad_norm": 0.8980702773635516, + "learning_rate": 5.170256367421895e-06, + "loss": 0.3347, + "step": 38811 + }, + { + "epoch": 0.6706524744263201, + "grad_norm": 2.4124346048224434, + "learning_rate": 5.169766326550898e-06, + "loss": 0.4752, + "step": 38812 + }, + { + "epoch": 0.670669753939729, + "grad_norm": 1.053338531057579, + "learning_rate": 5.169276300808559e-06, + "loss": 0.278, + "step": 38813 + }, + { + "epoch": 0.670687033453138, + "grad_norm": 1.921342152587878, + "learning_rate": 5.168786290196417e-06, + "loss": 0.3802, + "step": 38814 + }, + { + "epoch": 0.6707043129665469, + "grad_norm": 0.8412574873642563, + "learning_rate": 5.168296294716e-06, + "loss": 0.4279, + "step": 38815 + }, + { + "epoch": 0.6707215924799558, + "grad_norm": 1.5955561138585317, + "learning_rate": 5.16780631436885e-06, + "loss": 0.3299, + "step": 38816 + }, + { + "epoch": 0.6707388719933647, + "grad_norm": 1.3145438083148384, + "learning_rate": 5.167316349156495e-06, + "loss": 0.4879, + "step": 38817 + }, + { + "epoch": 0.6707561515067736, + "grad_norm": 1.0751522652697165, + "learning_rate": 5.1668263990804715e-06, + "loss": 0.2409, + "step": 38818 + }, + { + "epoch": 0.6707734310201825, + "grad_norm": 0.7162171424204352, + "learning_rate": 5.166336464142314e-06, + "loss": 0.7193, + "step": 38819 + }, + { + "epoch": 0.6707907105335914, + "grad_norm": 0.9926376937137147, + "learning_rate": 5.1658465443435625e-06, + "loss": 0.4453, + "step": 38820 + }, + { + "epoch": 0.6708079900470003, + "grad_norm": 1.0158965182309054, + "learning_rate": 5.165356639685748e-06, + "loss": 0.4356, + "step": 38821 + }, + { + "epoch": 0.6708252695604092, + "grad_norm": 1.572953400427837, + "learning_rate": 5.164866750170399e-06, + "loss": 0.4471, + "step": 38822 + }, + { + "epoch": 0.6708425490738181, + "grad_norm": 1.7738083185154658, + "learning_rate": 5.1643768757990554e-06, + "loss": 0.3796, + "step": 38823 + }, + { + "epoch": 0.670859828587227, + "grad_norm": 1.2192481880212862, + "learning_rate": 5.163887016573252e-06, + "loss": 0.2651, + "step": 38824 + }, + { + "epoch": 0.6708771081006358, + "grad_norm": 1.0159077810921247, + "learning_rate": 5.163397172494524e-06, + "loss": 0.3744, + "step": 38825 + }, + { + "epoch": 0.6708943876140447, + "grad_norm": 1.265622739192197, + "learning_rate": 5.1629073435644e-06, + "loss": 0.4403, + "step": 38826 + }, + { + "epoch": 0.6709116671274536, + "grad_norm": 0.88089924120637, + "learning_rate": 5.162417529784422e-06, + "loss": 0.4768, + "step": 38827 + }, + { + "epoch": 0.6709289466408626, + "grad_norm": 1.1898315954450052, + "learning_rate": 5.161927731156117e-06, + "loss": 0.3409, + "step": 38828 + }, + { + "epoch": 0.6709462261542715, + "grad_norm": 1.3516557818168533, + "learning_rate": 5.161437947681025e-06, + "loss": 0.2507, + "step": 38829 + }, + { + "epoch": 0.6709635056676804, + "grad_norm": 1.2915207284430523, + "learning_rate": 5.1609481793606696e-06, + "loss": 0.3372, + "step": 38830 + }, + { + "epoch": 0.6709807851810893, + "grad_norm": 1.7519078112651916, + "learning_rate": 5.1604584261965995e-06, + "loss": 0.351, + "step": 38831 + }, + { + "epoch": 0.6709980646944982, + "grad_norm": 1.5163375848648708, + "learning_rate": 5.159968688190342e-06, + "loss": 0.4723, + "step": 38832 + }, + { + "epoch": 0.6710153442079071, + "grad_norm": 1.6486549455768844, + "learning_rate": 5.159478965343425e-06, + "loss": 0.4405, + "step": 38833 + }, + { + "epoch": 0.671032623721316, + "grad_norm": 1.332491949465892, + "learning_rate": 5.1589892576573945e-06, + "loss": 0.4069, + "step": 38834 + }, + { + "epoch": 0.6710499032347249, + "grad_norm": 1.1454375714147051, + "learning_rate": 5.158499565133771e-06, + "loss": 0.4815, + "step": 38835 + }, + { + "epoch": 0.6710671827481338, + "grad_norm": 1.186216293035562, + "learning_rate": 5.158009887774096e-06, + "loss": 0.234, + "step": 38836 + }, + { + "epoch": 0.6710844622615427, + "grad_norm": 1.099465009103526, + "learning_rate": 5.157520225579902e-06, + "loss": 0.346, + "step": 38837 + }, + { + "epoch": 0.6711017417749516, + "grad_norm": 0.9466285346374038, + "learning_rate": 5.157030578552727e-06, + "loss": 0.2432, + "step": 38838 + }, + { + "epoch": 0.6711190212883605, + "grad_norm": 0.7088821771282905, + "learning_rate": 5.156540946694095e-06, + "loss": 0.3687, + "step": 38839 + }, + { + "epoch": 0.6711363008017694, + "grad_norm": 1.6782558259718525, + "learning_rate": 5.15605133000555e-06, + "loss": 0.4253, + "step": 38840 + }, + { + "epoch": 0.6711535803151784, + "grad_norm": 1.3938692862623043, + "learning_rate": 5.155561728488616e-06, + "loss": 0.5041, + "step": 38841 + }, + { + "epoch": 0.6711708598285873, + "grad_norm": 1.9993145988230703, + "learning_rate": 5.15507214214483e-06, + "loss": 0.4591, + "step": 38842 + }, + { + "epoch": 0.6711881393419962, + "grad_norm": 1.2593586459181607, + "learning_rate": 5.154582570975731e-06, + "loss": 0.3478, + "step": 38843 + }, + { + "epoch": 0.6712054188554051, + "grad_norm": 1.1330496221465558, + "learning_rate": 5.154093014982844e-06, + "loss": 0.9012, + "step": 38844 + }, + { + "epoch": 0.671222698368814, + "grad_norm": 1.0981034747260958, + "learning_rate": 5.15360347416771e-06, + "loss": 0.3113, + "step": 38845 + }, + { + "epoch": 0.6712399778822228, + "grad_norm": 1.3350548095921604, + "learning_rate": 5.153113948531854e-06, + "loss": 0.4982, + "step": 38846 + }, + { + "epoch": 0.6712572573956317, + "grad_norm": 1.002655994346431, + "learning_rate": 5.152624438076812e-06, + "loss": 0.3492, + "step": 38847 + }, + { + "epoch": 0.6712745369090406, + "grad_norm": 1.3640105256706396, + "learning_rate": 5.1521349428041215e-06, + "loss": 0.367, + "step": 38848 + }, + { + "epoch": 0.6712918164224495, + "grad_norm": 0.8321898314287269, + "learning_rate": 5.151645462715313e-06, + "loss": 0.1483, + "step": 38849 + }, + { + "epoch": 0.6713090959358584, + "grad_norm": 1.1761601689616303, + "learning_rate": 5.151155997811919e-06, + "loss": 0.4074, + "step": 38850 + }, + { + "epoch": 0.6713263754492673, + "grad_norm": 1.121416217065523, + "learning_rate": 5.150666548095476e-06, + "loss": 0.2026, + "step": 38851 + }, + { + "epoch": 0.6713436549626762, + "grad_norm": 1.0995890607474128, + "learning_rate": 5.150177113567509e-06, + "loss": 0.3686, + "step": 38852 + }, + { + "epoch": 0.6713609344760851, + "grad_norm": 0.6501296796137265, + "learning_rate": 5.149687694229556e-06, + "loss": 0.4817, + "step": 38853 + }, + { + "epoch": 0.671378213989494, + "grad_norm": 1.033555924801879, + "learning_rate": 5.149198290083154e-06, + "loss": 0.4028, + "step": 38854 + }, + { + "epoch": 0.671395493502903, + "grad_norm": 1.3013758794548926, + "learning_rate": 5.148708901129828e-06, + "loss": 0.5179, + "step": 38855 + }, + { + "epoch": 0.6714127730163119, + "grad_norm": 0.7732792769771257, + "learning_rate": 5.148219527371118e-06, + "loss": 0.3507, + "step": 38856 + }, + { + "epoch": 0.6714300525297208, + "grad_norm": 1.1218734018377132, + "learning_rate": 5.147730168808549e-06, + "loss": 0.5339, + "step": 38857 + }, + { + "epoch": 0.6714473320431297, + "grad_norm": 1.2658729436564788, + "learning_rate": 5.147240825443658e-06, + "loss": 0.3713, + "step": 38858 + }, + { + "epoch": 0.6714646115565386, + "grad_norm": 1.2490234433207448, + "learning_rate": 5.146751497277978e-06, + "loss": 0.4381, + "step": 38859 + }, + { + "epoch": 0.6714818910699475, + "grad_norm": 0.9454772612495249, + "learning_rate": 5.146262184313044e-06, + "loss": 0.3276, + "step": 38860 + }, + { + "epoch": 0.6714991705833564, + "grad_norm": 0.6971803128814336, + "learning_rate": 5.145772886550382e-06, + "loss": 0.6343, + "step": 38861 + }, + { + "epoch": 0.6715164500967653, + "grad_norm": 1.584878442321663, + "learning_rate": 5.145283603991533e-06, + "loss": 0.6205, + "step": 38862 + }, + { + "epoch": 0.6715337296101742, + "grad_norm": 1.0584500728594894, + "learning_rate": 5.144794336638022e-06, + "loss": 0.3071, + "step": 38863 + }, + { + "epoch": 0.6715510091235831, + "grad_norm": 1.2227471870691229, + "learning_rate": 5.144305084491381e-06, + "loss": 0.4268, + "step": 38864 + }, + { + "epoch": 0.671568288636992, + "grad_norm": 0.8678365449682469, + "learning_rate": 5.143815847553151e-06, + "loss": 0.2096, + "step": 38865 + }, + { + "epoch": 0.6715855681504009, + "grad_norm": 1.9317321946761865, + "learning_rate": 5.143326625824856e-06, + "loss": 0.3793, + "step": 38866 + }, + { + "epoch": 0.6716028476638097, + "grad_norm": 0.8923273700957428, + "learning_rate": 5.142837419308033e-06, + "loss": 0.3666, + "step": 38867 + }, + { + "epoch": 0.6716201271772186, + "grad_norm": 0.7653110127529634, + "learning_rate": 5.142348228004209e-06, + "loss": 0.3768, + "step": 38868 + }, + { + "epoch": 0.6716374066906275, + "grad_norm": 0.5432214561111525, + "learning_rate": 5.141859051914924e-06, + "loss": 0.7526, + "step": 38869 + }, + { + "epoch": 0.6716546862040365, + "grad_norm": 1.1631446647359616, + "learning_rate": 5.1413698910416975e-06, + "loss": 0.5416, + "step": 38870 + }, + { + "epoch": 0.6716719657174454, + "grad_norm": 0.9425633843066359, + "learning_rate": 5.140880745386078e-06, + "loss": 0.4581, + "step": 38871 + }, + { + "epoch": 0.6716892452308543, + "grad_norm": 0.4892453355970859, + "learning_rate": 5.140391614949589e-06, + "loss": 0.6568, + "step": 38872 + }, + { + "epoch": 0.6717065247442632, + "grad_norm": 1.535173387086895, + "learning_rate": 5.139902499733758e-06, + "loss": 0.4004, + "step": 38873 + }, + { + "epoch": 0.6717238042576721, + "grad_norm": 0.983395281518856, + "learning_rate": 5.1394133997401275e-06, + "loss": 0.3038, + "step": 38874 + }, + { + "epoch": 0.671741083771081, + "grad_norm": 1.5492677985060623, + "learning_rate": 5.138924314970217e-06, + "loss": 0.4156, + "step": 38875 + }, + { + "epoch": 0.6717583632844899, + "grad_norm": 1.400856609137132, + "learning_rate": 5.1384352454255685e-06, + "loss": 0.3199, + "step": 38876 + }, + { + "epoch": 0.6717756427978988, + "grad_norm": 0.9839450535356211, + "learning_rate": 5.137946191107708e-06, + "loss": 0.3757, + "step": 38877 + }, + { + "epoch": 0.6717929223113077, + "grad_norm": 0.6905275368486588, + "learning_rate": 5.137457152018173e-06, + "loss": 0.3211, + "step": 38878 + }, + { + "epoch": 0.6718102018247166, + "grad_norm": 1.8045168736939317, + "learning_rate": 5.136968128158488e-06, + "loss": 0.4169, + "step": 38879 + }, + { + "epoch": 0.6718274813381255, + "grad_norm": 1.2444163833395177, + "learning_rate": 5.1364791195301935e-06, + "loss": 0.3051, + "step": 38880 + }, + { + "epoch": 0.6718447608515344, + "grad_norm": 1.4294166610227792, + "learning_rate": 5.135990126134811e-06, + "loss": 0.2544, + "step": 38881 + }, + { + "epoch": 0.6718620403649433, + "grad_norm": 1.2197975490167898, + "learning_rate": 5.135501147973878e-06, + "loss": 0.3846, + "step": 38882 + }, + { + "epoch": 0.6718793198783523, + "grad_norm": 1.097455342941081, + "learning_rate": 5.1350121850489284e-06, + "loss": 0.2672, + "step": 38883 + }, + { + "epoch": 0.6718965993917612, + "grad_norm": 0.621244544147124, + "learning_rate": 5.1345232373614874e-06, + "loss": 0.3884, + "step": 38884 + }, + { + "epoch": 0.6719138789051701, + "grad_norm": 0.9130652299625944, + "learning_rate": 5.1340343049130924e-06, + "loss": 0.521, + "step": 38885 + }, + { + "epoch": 0.671931158418579, + "grad_norm": 1.3404003492337944, + "learning_rate": 5.133545387705268e-06, + "loss": 0.3238, + "step": 38886 + }, + { + "epoch": 0.6719484379319879, + "grad_norm": 0.9999381780886317, + "learning_rate": 5.133056485739549e-06, + "loss": 0.3925, + "step": 38887 + }, + { + "epoch": 0.6719657174453968, + "grad_norm": 1.2597571731897035, + "learning_rate": 5.132567599017468e-06, + "loss": 0.323, + "step": 38888 + }, + { + "epoch": 0.6719829969588056, + "grad_norm": 1.1721858951322224, + "learning_rate": 5.132078727540558e-06, + "loss": 0.3691, + "step": 38889 + }, + { + "epoch": 0.6720002764722145, + "grad_norm": 0.9290516316878643, + "learning_rate": 5.131589871310344e-06, + "loss": 0.5379, + "step": 38890 + }, + { + "epoch": 0.6720175559856234, + "grad_norm": 1.4893169941249158, + "learning_rate": 5.131101030328365e-06, + "loss": 0.5121, + "step": 38891 + }, + { + "epoch": 0.6720348354990323, + "grad_norm": 1.0545931391096777, + "learning_rate": 5.130612204596141e-06, + "loss": 0.2309, + "step": 38892 + }, + { + "epoch": 0.6720521150124412, + "grad_norm": 1.0126151422004699, + "learning_rate": 5.130123394115212e-06, + "loss": 0.3518, + "step": 38893 + }, + { + "epoch": 0.6720693945258501, + "grad_norm": 0.7330119678526302, + "learning_rate": 5.12963459888711e-06, + "loss": 0.3341, + "step": 38894 + }, + { + "epoch": 0.672086674039259, + "grad_norm": 1.355656549219246, + "learning_rate": 5.1291458189133596e-06, + "loss": 0.2957, + "step": 38895 + }, + { + "epoch": 0.6721039535526679, + "grad_norm": 1.1492824425488204, + "learning_rate": 5.1286570541954986e-06, + "loss": 0.5405, + "step": 38896 + }, + { + "epoch": 0.6721212330660769, + "grad_norm": 0.7275963337387741, + "learning_rate": 5.128168304735048e-06, + "loss": 0.3893, + "step": 38897 + }, + { + "epoch": 0.6721385125794858, + "grad_norm": 1.2277496991297063, + "learning_rate": 5.1276795705335455e-06, + "loss": 0.4527, + "step": 38898 + }, + { + "epoch": 0.6721557920928947, + "grad_norm": 0.7752952396858087, + "learning_rate": 5.1271908515925205e-06, + "loss": 0.6726, + "step": 38899 + }, + { + "epoch": 0.6721730716063036, + "grad_norm": 1.5389765582331982, + "learning_rate": 5.12670214791351e-06, + "loss": 0.3758, + "step": 38900 + }, + { + "epoch": 0.6721903511197125, + "grad_norm": 1.3613982668666453, + "learning_rate": 5.1262134594980325e-06, + "loss": 0.4014, + "step": 38901 + }, + { + "epoch": 0.6722076306331214, + "grad_norm": 1.693103495132195, + "learning_rate": 5.12572478634763e-06, + "loss": 0.3237, + "step": 38902 + }, + { + "epoch": 0.6722249101465303, + "grad_norm": 1.7745650665774706, + "learning_rate": 5.125236128463823e-06, + "loss": 0.312, + "step": 38903 + }, + { + "epoch": 0.6722421896599392, + "grad_norm": 0.6713539764637633, + "learning_rate": 5.124747485848147e-06, + "loss": 0.3834, + "step": 38904 + }, + { + "epoch": 0.6722594691733481, + "grad_norm": 1.7739051051163837, + "learning_rate": 5.124258858502138e-06, + "loss": 0.4471, + "step": 38905 + }, + { + "epoch": 0.672276748686757, + "grad_norm": 1.2084954298491555, + "learning_rate": 5.123770246427315e-06, + "loss": 0.423, + "step": 38906 + }, + { + "epoch": 0.6722940282001659, + "grad_norm": 1.2080029644922083, + "learning_rate": 5.123281649625219e-06, + "loss": 0.3702, + "step": 38907 + }, + { + "epoch": 0.6723113077135748, + "grad_norm": 1.4512122246371564, + "learning_rate": 5.122793068097372e-06, + "loss": 0.3124, + "step": 38908 + }, + { + "epoch": 0.6723285872269837, + "grad_norm": 1.0638916334892345, + "learning_rate": 5.12230450184531e-06, + "loss": 0.2521, + "step": 38909 + }, + { + "epoch": 0.6723458667403925, + "grad_norm": 1.1568631662966768, + "learning_rate": 5.1218159508705545e-06, + "loss": 0.2578, + "step": 38910 + }, + { + "epoch": 0.6723631462538014, + "grad_norm": 1.114256004205166, + "learning_rate": 5.121327415174649e-06, + "loss": 0.3436, + "step": 38911 + }, + { + "epoch": 0.6723804257672104, + "grad_norm": 0.9901480553893647, + "learning_rate": 5.120838894759115e-06, + "loss": 0.4397, + "step": 38912 + }, + { + "epoch": 0.6723977052806193, + "grad_norm": 1.7416904939290154, + "learning_rate": 5.120350389625486e-06, + "loss": 0.5742, + "step": 38913 + }, + { + "epoch": 0.6724149847940282, + "grad_norm": 1.5385125446631405, + "learning_rate": 5.119861899775292e-06, + "loss": 0.4187, + "step": 38914 + }, + { + "epoch": 0.6724322643074371, + "grad_norm": 1.0494895125763672, + "learning_rate": 5.119373425210056e-06, + "loss": 0.4313, + "step": 38915 + }, + { + "epoch": 0.672449543820846, + "grad_norm": 1.34339667957164, + "learning_rate": 5.118884965931314e-06, + "loss": 0.2984, + "step": 38916 + }, + { + "epoch": 0.6724668233342549, + "grad_norm": 1.4476707752169515, + "learning_rate": 5.118396521940597e-06, + "loss": 0.3409, + "step": 38917 + }, + { + "epoch": 0.6724841028476638, + "grad_norm": 1.4683505043761118, + "learning_rate": 5.117908093239434e-06, + "loss": 0.4798, + "step": 38918 + }, + { + "epoch": 0.6725013823610727, + "grad_norm": 0.9976939165037975, + "learning_rate": 5.117419679829352e-06, + "loss": 0.3605, + "step": 38919 + }, + { + "epoch": 0.6725186618744816, + "grad_norm": 1.6673667540774508, + "learning_rate": 5.116931281711886e-06, + "loss": 0.5438, + "step": 38920 + }, + { + "epoch": 0.6725359413878905, + "grad_norm": 1.130417145229195, + "learning_rate": 5.116442898888557e-06, + "loss": 0.5182, + "step": 38921 + }, + { + "epoch": 0.6725532209012994, + "grad_norm": 0.9544757286200052, + "learning_rate": 5.115954531360901e-06, + "loss": 0.3719, + "step": 38922 + }, + { + "epoch": 0.6725705004147083, + "grad_norm": 1.0627124074594658, + "learning_rate": 5.115466179130447e-06, + "loss": 0.2917, + "step": 38923 + }, + { + "epoch": 0.6725877799281172, + "grad_norm": 0.9482569127644033, + "learning_rate": 5.1149778421987275e-06, + "loss": 0.3967, + "step": 38924 + }, + { + "epoch": 0.6726050594415262, + "grad_norm": 1.0575639195174227, + "learning_rate": 5.11448952056727e-06, + "loss": 0.5153, + "step": 38925 + }, + { + "epoch": 0.6726223389549351, + "grad_norm": 0.924212069870757, + "learning_rate": 5.114001214237597e-06, + "loss": 0.3567, + "step": 38926 + }, + { + "epoch": 0.672639618468344, + "grad_norm": 1.2369112836594327, + "learning_rate": 5.113512923211245e-06, + "loss": 0.3056, + "step": 38927 + }, + { + "epoch": 0.6726568979817529, + "grad_norm": 0.9988389873670257, + "learning_rate": 5.11302464748974e-06, + "loss": 0.2394, + "step": 38928 + }, + { + "epoch": 0.6726741774951618, + "grad_norm": 0.7351592536274073, + "learning_rate": 5.112536387074618e-06, + "loss": 0.3896, + "step": 38929 + }, + { + "epoch": 0.6726914570085707, + "grad_norm": 1.1769902635944949, + "learning_rate": 5.112048141967401e-06, + "loss": 0.4677, + "step": 38930 + }, + { + "epoch": 0.6727087365219795, + "grad_norm": 1.251283464788794, + "learning_rate": 5.111559912169623e-06, + "loss": 0.3409, + "step": 38931 + }, + { + "epoch": 0.6727260160353884, + "grad_norm": 1.202582025568114, + "learning_rate": 5.111071697682808e-06, + "loss": 0.317, + "step": 38932 + }, + { + "epoch": 0.6727432955487973, + "grad_norm": 1.3658091636575362, + "learning_rate": 5.1105834985084865e-06, + "loss": 0.2969, + "step": 38933 + }, + { + "epoch": 0.6727605750622062, + "grad_norm": 1.1071996693383093, + "learning_rate": 5.110095314648194e-06, + "loss": 0.5597, + "step": 38934 + }, + { + "epoch": 0.6727778545756151, + "grad_norm": 0.9317448914267218, + "learning_rate": 5.10960714610345e-06, + "loss": 0.3416, + "step": 38935 + }, + { + "epoch": 0.672795134089024, + "grad_norm": 1.430706160162144, + "learning_rate": 5.1091189928757925e-06, + "loss": 0.3843, + "step": 38936 + }, + { + "epoch": 0.6728124136024329, + "grad_norm": 1.2175215956285153, + "learning_rate": 5.108630854966743e-06, + "loss": 0.5984, + "step": 38937 + }, + { + "epoch": 0.6728296931158418, + "grad_norm": 0.9435770402420057, + "learning_rate": 5.1081427323778325e-06, + "loss": 0.2954, + "step": 38938 + }, + { + "epoch": 0.6728469726292508, + "grad_norm": 1.387954035348297, + "learning_rate": 5.10765462511059e-06, + "loss": 0.2749, + "step": 38939 + }, + { + "epoch": 0.6728642521426597, + "grad_norm": 1.1883746939969422, + "learning_rate": 5.10716653316655e-06, + "loss": 0.3546, + "step": 38940 + }, + { + "epoch": 0.6728815316560686, + "grad_norm": 1.329479710491958, + "learning_rate": 5.106678456547232e-06, + "loss": 0.2819, + "step": 38941 + }, + { + "epoch": 0.6728988111694775, + "grad_norm": 1.3527364398641961, + "learning_rate": 5.106190395254173e-06, + "loss": 0.4947, + "step": 38942 + }, + { + "epoch": 0.6729160906828864, + "grad_norm": 1.1560719479049573, + "learning_rate": 5.1057023492888925e-06, + "loss": 0.3043, + "step": 38943 + }, + { + "epoch": 0.6729333701962953, + "grad_norm": 1.464317473105059, + "learning_rate": 5.1052143186529255e-06, + "loss": 0.295, + "step": 38944 + }, + { + "epoch": 0.6729506497097042, + "grad_norm": 0.9651403104353854, + "learning_rate": 5.1047263033478026e-06, + "loss": 0.3718, + "step": 38945 + }, + { + "epoch": 0.6729679292231131, + "grad_norm": 0.8619717294500109, + "learning_rate": 5.104238303375047e-06, + "loss": 0.2948, + "step": 38946 + }, + { + "epoch": 0.672985208736522, + "grad_norm": 1.2035783650759264, + "learning_rate": 5.10375031873619e-06, + "loss": 0.3503, + "step": 38947 + }, + { + "epoch": 0.6730024882499309, + "grad_norm": 1.107763160133573, + "learning_rate": 5.103262349432756e-06, + "loss": 0.2786, + "step": 38948 + }, + { + "epoch": 0.6730197677633398, + "grad_norm": 1.1057062985400117, + "learning_rate": 5.1027743954662815e-06, + "loss": 0.4667, + "step": 38949 + }, + { + "epoch": 0.6730370472767487, + "grad_norm": 1.0236333486814895, + "learning_rate": 5.102286456838282e-06, + "loss": 0.467, + "step": 38950 + }, + { + "epoch": 0.6730543267901576, + "grad_norm": 1.251121319464061, + "learning_rate": 5.101798533550299e-06, + "loss": 0.3198, + "step": 38951 + }, + { + "epoch": 0.6730716063035664, + "grad_norm": 1.2918043882814942, + "learning_rate": 5.1013106256038545e-06, + "loss": 0.442, + "step": 38952 + }, + { + "epoch": 0.6730888858169753, + "grad_norm": 0.7020564358124837, + "learning_rate": 5.100822733000479e-06, + "loss": 0.4596, + "step": 38953 + }, + { + "epoch": 0.6731061653303843, + "grad_norm": 1.3450464772107897, + "learning_rate": 5.100334855741697e-06, + "loss": 0.3155, + "step": 38954 + }, + { + "epoch": 0.6731234448437932, + "grad_norm": 0.9283379826258921, + "learning_rate": 5.099846993829041e-06, + "loss": 0.3779, + "step": 38955 + }, + { + "epoch": 0.6731407243572021, + "grad_norm": 0.944819542835575, + "learning_rate": 5.099359147264034e-06, + "loss": 0.2224, + "step": 38956 + }, + { + "epoch": 0.673158003870611, + "grad_norm": 1.7565241550236594, + "learning_rate": 5.098871316048204e-06, + "loss": 0.462, + "step": 38957 + }, + { + "epoch": 0.6731752833840199, + "grad_norm": 1.0385815658622743, + "learning_rate": 5.098383500183089e-06, + "loss": 0.3901, + "step": 38958 + }, + { + "epoch": 0.6731925628974288, + "grad_norm": 1.7742377238539835, + "learning_rate": 5.097895699670203e-06, + "loss": 0.4558, + "step": 38959 + }, + { + "epoch": 0.6732098424108377, + "grad_norm": 1.0084273417799243, + "learning_rate": 5.097407914511083e-06, + "loss": 0.3236, + "step": 38960 + }, + { + "epoch": 0.6732271219242466, + "grad_norm": 0.8845681031249621, + "learning_rate": 5.0969201447072535e-06, + "loss": 0.4187, + "step": 38961 + }, + { + "epoch": 0.6732444014376555, + "grad_norm": 1.250651146332064, + "learning_rate": 5.09643239026024e-06, + "loss": 0.3805, + "step": 38962 + }, + { + "epoch": 0.6732616809510644, + "grad_norm": 1.202430561634719, + "learning_rate": 5.095944651171573e-06, + "loss": 0.4057, + "step": 38963 + }, + { + "epoch": 0.6732789604644733, + "grad_norm": 1.209641229521349, + "learning_rate": 5.0954569274427855e-06, + "loss": 0.4545, + "step": 38964 + }, + { + "epoch": 0.6732962399778822, + "grad_norm": 1.3881857812365013, + "learning_rate": 5.094969219075399e-06, + "loss": 0.4617, + "step": 38965 + }, + { + "epoch": 0.6733135194912911, + "grad_norm": 1.7685375969137955, + "learning_rate": 5.0944815260709375e-06, + "loss": 0.458, + "step": 38966 + }, + { + "epoch": 0.6733307990047, + "grad_norm": 1.351910009274106, + "learning_rate": 5.0939938484309335e-06, + "loss": 0.5149, + "step": 38967 + }, + { + "epoch": 0.673348078518109, + "grad_norm": 1.2062969807394521, + "learning_rate": 5.0935061861569135e-06, + "loss": 0.3843, + "step": 38968 + }, + { + "epoch": 0.6733653580315179, + "grad_norm": 1.5305073544798835, + "learning_rate": 5.09301853925041e-06, + "loss": 0.5101, + "step": 38969 + }, + { + "epoch": 0.6733826375449268, + "grad_norm": 0.974050632110753, + "learning_rate": 5.092530907712938e-06, + "loss": 0.7028, + "step": 38970 + }, + { + "epoch": 0.6733999170583357, + "grad_norm": 1.3091722575001181, + "learning_rate": 5.092043291546038e-06, + "loss": 0.3991, + "step": 38971 + }, + { + "epoch": 0.6734171965717446, + "grad_norm": 1.3690955491754333, + "learning_rate": 5.091555690751229e-06, + "loss": 0.3705, + "step": 38972 + }, + { + "epoch": 0.6734344760851534, + "grad_norm": 1.178918715243099, + "learning_rate": 5.091068105330039e-06, + "loss": 0.5695, + "step": 38973 + }, + { + "epoch": 0.6734517555985623, + "grad_norm": 1.3541736354331992, + "learning_rate": 5.090580535283997e-06, + "loss": 0.3581, + "step": 38974 + }, + { + "epoch": 0.6734690351119712, + "grad_norm": 1.2873767828681748, + "learning_rate": 5.0900929806146335e-06, + "loss": 0.4659, + "step": 38975 + }, + { + "epoch": 0.6734863146253801, + "grad_norm": 0.6664785092427765, + "learning_rate": 5.089605441323471e-06, + "loss": 0.2636, + "step": 38976 + }, + { + "epoch": 0.673503594138789, + "grad_norm": 1.4138928489343028, + "learning_rate": 5.0891179174120356e-06, + "loss": 0.2625, + "step": 38977 + }, + { + "epoch": 0.6735208736521979, + "grad_norm": 1.5907185986346348, + "learning_rate": 5.088630408881855e-06, + "loss": 0.3879, + "step": 38978 + }, + { + "epoch": 0.6735381531656068, + "grad_norm": 1.288631351916553, + "learning_rate": 5.0881429157344576e-06, + "loss": 0.4036, + "step": 38979 + }, + { + "epoch": 0.6735554326790157, + "grad_norm": 1.4314894932844777, + "learning_rate": 5.087655437971373e-06, + "loss": 0.2667, + "step": 38980 + }, + { + "epoch": 0.6735727121924246, + "grad_norm": 1.2268213665884395, + "learning_rate": 5.087167975594121e-06, + "loss": 0.3431, + "step": 38981 + }, + { + "epoch": 0.6735899917058336, + "grad_norm": 1.611355475915937, + "learning_rate": 5.086680528604237e-06, + "loss": 0.1962, + "step": 38982 + }, + { + "epoch": 0.6736072712192425, + "grad_norm": 1.6430577708207645, + "learning_rate": 5.086193097003238e-06, + "loss": 0.2923, + "step": 38983 + }, + { + "epoch": 0.6736245507326514, + "grad_norm": 0.8333853349681408, + "learning_rate": 5.0857056807926565e-06, + "loss": 0.4275, + "step": 38984 + }, + { + "epoch": 0.6736418302460603, + "grad_norm": 1.1377913617035544, + "learning_rate": 5.085218279974019e-06, + "loss": 0.4111, + "step": 38985 + }, + { + "epoch": 0.6736591097594692, + "grad_norm": 0.9239875083727764, + "learning_rate": 5.084730894548855e-06, + "loss": 0.2278, + "step": 38986 + }, + { + "epoch": 0.6736763892728781, + "grad_norm": 1.1162509328025219, + "learning_rate": 5.084243524518686e-06, + "loss": 0.4212, + "step": 38987 + }, + { + "epoch": 0.673693668786287, + "grad_norm": 1.0383150404137338, + "learning_rate": 5.083756169885037e-06, + "loss": 0.4854, + "step": 38988 + }, + { + "epoch": 0.6737109482996959, + "grad_norm": 1.1885474564202712, + "learning_rate": 5.083268830649441e-06, + "loss": 0.3925, + "step": 38989 + }, + { + "epoch": 0.6737282278131048, + "grad_norm": 1.1721340175481887, + "learning_rate": 5.082781506813412e-06, + "loss": 0.2798, + "step": 38990 + }, + { + "epoch": 0.6737455073265137, + "grad_norm": 0.7973014587111312, + "learning_rate": 5.082294198378495e-06, + "loss": 0.5615, + "step": 38991 + }, + { + "epoch": 0.6737627868399226, + "grad_norm": 1.377877428996411, + "learning_rate": 5.0818069053462e-06, + "loss": 0.344, + "step": 38992 + }, + { + "epoch": 0.6737800663533315, + "grad_norm": 0.9815172154303263, + "learning_rate": 5.081319627718065e-06, + "loss": 0.5437, + "step": 38993 + }, + { + "epoch": 0.6737973458667403, + "grad_norm": 1.1649789959150534, + "learning_rate": 5.080832365495606e-06, + "loss": 0.5524, + "step": 38994 + }, + { + "epoch": 0.6738146253801492, + "grad_norm": 0.555212349011312, + "learning_rate": 5.080345118680359e-06, + "loss": 0.9144, + "step": 38995 + }, + { + "epoch": 0.6738319048935582, + "grad_norm": 1.2536818516053796, + "learning_rate": 5.07985788727384e-06, + "loss": 0.3288, + "step": 38996 + }, + { + "epoch": 0.6738491844069671, + "grad_norm": 1.0308614420678102, + "learning_rate": 5.0793706712775795e-06, + "loss": 0.303, + "step": 38997 + }, + { + "epoch": 0.673866463920376, + "grad_norm": 0.6636203990954629, + "learning_rate": 5.078883470693109e-06, + "loss": 0.5782, + "step": 38998 + }, + { + "epoch": 0.6738837434337849, + "grad_norm": 1.5141407900963637, + "learning_rate": 5.078396285521945e-06, + "loss": 0.5866, + "step": 38999 + }, + { + "epoch": 0.6739010229471938, + "grad_norm": 1.3913200857937413, + "learning_rate": 5.077909115765622e-06, + "loss": 0.325, + "step": 39000 + }, + { + "epoch": 0.6739183024606027, + "grad_norm": 0.708946755210966, + "learning_rate": 5.077421961425657e-06, + "loss": 0.3085, + "step": 39001 + }, + { + "epoch": 0.6739355819740116, + "grad_norm": 1.5469826584652602, + "learning_rate": 5.076934822503582e-06, + "loss": 0.2414, + "step": 39002 + }, + { + "epoch": 0.6739528614874205, + "grad_norm": 1.2067032849162216, + "learning_rate": 5.0764476990009204e-06, + "loss": 0.3705, + "step": 39003 + }, + { + "epoch": 0.6739701410008294, + "grad_norm": 0.8624204153609644, + "learning_rate": 5.0759605909192024e-06, + "loss": 0.2633, + "step": 39004 + }, + { + "epoch": 0.6739874205142383, + "grad_norm": 1.0563450195068884, + "learning_rate": 5.075473498259948e-06, + "loss": 0.2701, + "step": 39005 + }, + { + "epoch": 0.6740047000276472, + "grad_norm": 1.558969708685828, + "learning_rate": 5.074986421024685e-06, + "loss": 0.4325, + "step": 39006 + }, + { + "epoch": 0.6740219795410561, + "grad_norm": 1.5074672188025862, + "learning_rate": 5.074499359214939e-06, + "loss": 0.4434, + "step": 39007 + }, + { + "epoch": 0.674039259054465, + "grad_norm": 1.8870307182455468, + "learning_rate": 5.0740123128322325e-06, + "loss": 0.3434, + "step": 39008 + }, + { + "epoch": 0.674056538567874, + "grad_norm": 1.1789458125848968, + "learning_rate": 5.0735252818781e-06, + "loss": 0.4029, + "step": 39009 + }, + { + "epoch": 0.6740738180812829, + "grad_norm": 1.2478213293546574, + "learning_rate": 5.073038266354056e-06, + "loss": 0.3514, + "step": 39010 + }, + { + "epoch": 0.6740910975946918, + "grad_norm": 1.688783457690688, + "learning_rate": 5.072551266261634e-06, + "loss": 0.4804, + "step": 39011 + }, + { + "epoch": 0.6741083771081007, + "grad_norm": 0.9530028702087153, + "learning_rate": 5.072064281602351e-06, + "loss": 0.2166, + "step": 39012 + }, + { + "epoch": 0.6741256566215096, + "grad_norm": 1.2780089766638891, + "learning_rate": 5.0715773123777375e-06, + "loss": 0.4477, + "step": 39013 + }, + { + "epoch": 0.6741429361349185, + "grad_norm": 1.502304426199683, + "learning_rate": 5.07109035858932e-06, + "loss": 0.1727, + "step": 39014 + }, + { + "epoch": 0.6741602156483273, + "grad_norm": 1.010011404934323, + "learning_rate": 5.070603420238625e-06, + "loss": 0.3984, + "step": 39015 + }, + { + "epoch": 0.6741774951617362, + "grad_norm": 1.1266267901256657, + "learning_rate": 5.070116497327171e-06, + "loss": 0.3704, + "step": 39016 + }, + { + "epoch": 0.6741947746751451, + "grad_norm": 1.3131116381387895, + "learning_rate": 5.06962958985649e-06, + "loss": 0.3387, + "step": 39017 + }, + { + "epoch": 0.674212054188554, + "grad_norm": 1.0724342748887306, + "learning_rate": 5.069142697828102e-06, + "loss": 0.3721, + "step": 39018 + }, + { + "epoch": 0.6742293337019629, + "grad_norm": 1.325073284403791, + "learning_rate": 5.068655821243531e-06, + "loss": 0.3251, + "step": 39019 + }, + { + "epoch": 0.6742466132153718, + "grad_norm": 1.6715810421380688, + "learning_rate": 5.068168960104311e-06, + "loss": 0.4704, + "step": 39020 + }, + { + "epoch": 0.6742638927287807, + "grad_norm": 1.3337888072856896, + "learning_rate": 5.067682114411956e-06, + "loss": 0.3794, + "step": 39021 + }, + { + "epoch": 0.6742811722421896, + "grad_norm": 1.3692277323331339, + "learning_rate": 5.067195284167999e-06, + "loss": 0.271, + "step": 39022 + }, + { + "epoch": 0.6742984517555985, + "grad_norm": 1.4963146883036882, + "learning_rate": 5.066708469373958e-06, + "loss": 0.3557, + "step": 39023 + }, + { + "epoch": 0.6743157312690075, + "grad_norm": 1.54917429404566, + "learning_rate": 5.06622167003136e-06, + "loss": 0.2655, + "step": 39024 + }, + { + "epoch": 0.6743330107824164, + "grad_norm": 0.8475006539255712, + "learning_rate": 5.065734886141731e-06, + "loss": 0.5785, + "step": 39025 + }, + { + "epoch": 0.6743502902958253, + "grad_norm": 1.7460275498666438, + "learning_rate": 5.0652481177065984e-06, + "loss": 0.4324, + "step": 39026 + }, + { + "epoch": 0.6743675698092342, + "grad_norm": 1.1005809313392105, + "learning_rate": 5.0647613647274855e-06, + "loss": 0.3917, + "step": 39027 + }, + { + "epoch": 0.6743848493226431, + "grad_norm": 1.5572669132979395, + "learning_rate": 5.06427462720591e-06, + "loss": 0.2557, + "step": 39028 + }, + { + "epoch": 0.674402128836052, + "grad_norm": 1.3210096209638085, + "learning_rate": 5.063787905143405e-06, + "loss": 0.4237, + "step": 39029 + }, + { + "epoch": 0.6744194083494609, + "grad_norm": 1.0338797799458581, + "learning_rate": 5.063301198541485e-06, + "loss": 0.3996, + "step": 39030 + }, + { + "epoch": 0.6744366878628698, + "grad_norm": 2.2019863043388384, + "learning_rate": 5.062814507401688e-06, + "loss": 0.4914, + "step": 39031 + }, + { + "epoch": 0.6744539673762787, + "grad_norm": 1.0977461476712709, + "learning_rate": 5.0623278317255266e-06, + "loss": 0.2642, + "step": 39032 + }, + { + "epoch": 0.6744712468896876, + "grad_norm": 0.8320237851857362, + "learning_rate": 5.061841171514534e-06, + "loss": 0.4824, + "step": 39033 + }, + { + "epoch": 0.6744885264030965, + "grad_norm": 1.0378798287630517, + "learning_rate": 5.0613545267702265e-06, + "loss": 0.3473, + "step": 39034 + }, + { + "epoch": 0.6745058059165054, + "grad_norm": 1.390429895352245, + "learning_rate": 5.060867897494136e-06, + "loss": 0.3296, + "step": 39035 + }, + { + "epoch": 0.6745230854299143, + "grad_norm": 2.488109739987871, + "learning_rate": 5.0603812836877745e-06, + "loss": 0.4085, + "step": 39036 + }, + { + "epoch": 0.6745403649433231, + "grad_norm": 1.4894596811366954, + "learning_rate": 5.059894685352682e-06, + "loss": 0.3366, + "step": 39037 + }, + { + "epoch": 0.674557644456732, + "grad_norm": 1.120360929221825, + "learning_rate": 5.0594081024903755e-06, + "loss": 0.4574, + "step": 39038 + }, + { + "epoch": 0.674574923970141, + "grad_norm": 1.1308142761913151, + "learning_rate": 5.058921535102373e-06, + "loss": 0.5261, + "step": 39039 + }, + { + "epoch": 0.6745922034835499, + "grad_norm": 1.1772237178359366, + "learning_rate": 5.058434983190209e-06, + "loss": 0.3673, + "step": 39040 + }, + { + "epoch": 0.6746094829969588, + "grad_norm": 0.8806918643710452, + "learning_rate": 5.0579484467553985e-06, + "loss": 0.3684, + "step": 39041 + }, + { + "epoch": 0.6746267625103677, + "grad_norm": 0.8353644994760793, + "learning_rate": 5.057461925799467e-06, + "loss": 0.2771, + "step": 39042 + }, + { + "epoch": 0.6746440420237766, + "grad_norm": 1.06921749738518, + "learning_rate": 5.056975420323942e-06, + "loss": 0.3795, + "step": 39043 + }, + { + "epoch": 0.6746613215371855, + "grad_norm": 0.676985244315889, + "learning_rate": 5.05648893033035e-06, + "loss": 0.6614, + "step": 39044 + }, + { + "epoch": 0.6746786010505944, + "grad_norm": 1.2200185676360413, + "learning_rate": 5.0560024558202045e-06, + "loss": 0.4519, + "step": 39045 + }, + { + "epoch": 0.6746958805640033, + "grad_norm": 0.9275090590254652, + "learning_rate": 5.0555159967950405e-06, + "loss": 0.4289, + "step": 39046 + }, + { + "epoch": 0.6747131600774122, + "grad_norm": 0.7688736912688112, + "learning_rate": 5.055029553256372e-06, + "loss": 0.252, + "step": 39047 + }, + { + "epoch": 0.6747304395908211, + "grad_norm": 0.5269268172156514, + "learning_rate": 5.054543125205727e-06, + "loss": 0.8557, + "step": 39048 + }, + { + "epoch": 0.67474771910423, + "grad_norm": 1.1389451055860895, + "learning_rate": 5.054056712644632e-06, + "loss": 0.3341, + "step": 39049 + }, + { + "epoch": 0.674764998617639, + "grad_norm": 1.5912035716466055, + "learning_rate": 5.053570315574603e-06, + "loss": 0.4065, + "step": 39050 + }, + { + "epoch": 0.6747822781310479, + "grad_norm": 1.0860000854635217, + "learning_rate": 5.053083933997171e-06, + "loss": 0.2225, + "step": 39051 + }, + { + "epoch": 0.6747995576444568, + "grad_norm": 1.0017489906421908, + "learning_rate": 5.052597567913855e-06, + "loss": 0.3343, + "step": 39052 + }, + { + "epoch": 0.6748168371578657, + "grad_norm": 1.0278902193524562, + "learning_rate": 5.052111217326178e-06, + "loss": 0.409, + "step": 39053 + }, + { + "epoch": 0.6748341166712746, + "grad_norm": 1.3258773114632503, + "learning_rate": 5.051624882235664e-06, + "loss": 0.359, + "step": 39054 + }, + { + "epoch": 0.6748513961846835, + "grad_norm": 1.0017482640326711, + "learning_rate": 5.051138562643841e-06, + "loss": 0.2413, + "step": 39055 + }, + { + "epoch": 0.6748686756980924, + "grad_norm": 1.2826140854709893, + "learning_rate": 5.050652258552224e-06, + "loss": 0.2874, + "step": 39056 + }, + { + "epoch": 0.6748859552115013, + "grad_norm": 1.2410445550720828, + "learning_rate": 5.050165969962346e-06, + "loss": 0.4986, + "step": 39057 + }, + { + "epoch": 0.6749032347249101, + "grad_norm": 1.1031277823003791, + "learning_rate": 5.049679696875718e-06, + "loss": 0.416, + "step": 39058 + }, + { + "epoch": 0.674920514238319, + "grad_norm": 0.9302758424182029, + "learning_rate": 5.049193439293872e-06, + "loss": 0.2542, + "step": 39059 + }, + { + "epoch": 0.6749377937517279, + "grad_norm": 2.191166496571072, + "learning_rate": 5.04870719721833e-06, + "loss": 0.4092, + "step": 39060 + }, + { + "epoch": 0.6749550732651368, + "grad_norm": 0.9881330410395525, + "learning_rate": 5.04822097065061e-06, + "loss": 0.4033, + "step": 39061 + }, + { + "epoch": 0.6749723527785457, + "grad_norm": 1.3988895086603792, + "learning_rate": 5.0477347595922424e-06, + "loss": 0.4534, + "step": 39062 + }, + { + "epoch": 0.6749896322919546, + "grad_norm": 1.1019435427418243, + "learning_rate": 5.047248564044742e-06, + "loss": 0.3623, + "step": 39063 + }, + { + "epoch": 0.6750069118053635, + "grad_norm": 1.3286066779010566, + "learning_rate": 5.0467623840096355e-06, + "loss": 0.4479, + "step": 39064 + }, + { + "epoch": 0.6750241913187724, + "grad_norm": 1.3515335538644375, + "learning_rate": 5.046276219488446e-06, + "loss": 0.3432, + "step": 39065 + }, + { + "epoch": 0.6750414708321814, + "grad_norm": 0.9703926703177878, + "learning_rate": 5.045790070482699e-06, + "loss": 0.4124, + "step": 39066 + }, + { + "epoch": 0.6750587503455903, + "grad_norm": 1.5106306775993656, + "learning_rate": 5.045303936993911e-06, + "loss": 0.33, + "step": 39067 + }, + { + "epoch": 0.6750760298589992, + "grad_norm": 1.0494807194531997, + "learning_rate": 5.044817819023611e-06, + "loss": 0.3705, + "step": 39068 + }, + { + "epoch": 0.6750933093724081, + "grad_norm": 1.4187878546639507, + "learning_rate": 5.044331716573319e-06, + "loss": 0.4092, + "step": 39069 + }, + { + "epoch": 0.675110588885817, + "grad_norm": 1.0017578087191004, + "learning_rate": 5.043845629644547e-06, + "loss": 0.3281, + "step": 39070 + }, + { + "epoch": 0.6751278683992259, + "grad_norm": 1.0802040894071223, + "learning_rate": 5.043359558238836e-06, + "loss": 0.3166, + "step": 39071 + }, + { + "epoch": 0.6751451479126348, + "grad_norm": 1.147811765334288, + "learning_rate": 5.0428735023576955e-06, + "loss": 0.44, + "step": 39072 + }, + { + "epoch": 0.6751624274260437, + "grad_norm": 0.732387905179293, + "learning_rate": 5.042387462002656e-06, + "loss": 0.4029, + "step": 39073 + }, + { + "epoch": 0.6751797069394526, + "grad_norm": 1.9617493710829554, + "learning_rate": 5.041901437175232e-06, + "loss": 0.3686, + "step": 39074 + }, + { + "epoch": 0.6751969864528615, + "grad_norm": 1.0740520704155434, + "learning_rate": 5.0414154278769526e-06, + "loss": 0.2962, + "step": 39075 + }, + { + "epoch": 0.6752142659662704, + "grad_norm": 1.338435989364129, + "learning_rate": 5.040929434109329e-06, + "loss": 0.3739, + "step": 39076 + }, + { + "epoch": 0.6752315454796793, + "grad_norm": 0.8275777661505718, + "learning_rate": 5.0404434558739e-06, + "loss": 0.2768, + "step": 39077 + }, + { + "epoch": 0.6752488249930882, + "grad_norm": 0.8173578382177177, + "learning_rate": 5.03995749317218e-06, + "loss": 0.1886, + "step": 39078 + }, + { + "epoch": 0.675266104506497, + "grad_norm": 1.376426652327033, + "learning_rate": 5.039471546005685e-06, + "loss": 0.5797, + "step": 39079 + }, + { + "epoch": 0.675283384019906, + "grad_norm": 0.8892979319790852, + "learning_rate": 5.038985614375946e-06, + "loss": 0.4907, + "step": 39080 + }, + { + "epoch": 0.6753006635333149, + "grad_norm": 1.5581360878826258, + "learning_rate": 5.0384996982844765e-06, + "loss": 0.456, + "step": 39081 + }, + { + "epoch": 0.6753179430467238, + "grad_norm": 1.0193948779145026, + "learning_rate": 5.038013797732804e-06, + "loss": 0.3521, + "step": 39082 + }, + { + "epoch": 0.6753352225601327, + "grad_norm": 1.118049153089803, + "learning_rate": 5.037527912722449e-06, + "loss": 0.3829, + "step": 39083 + }, + { + "epoch": 0.6753525020735416, + "grad_norm": 0.8740975909464856, + "learning_rate": 5.037042043254939e-06, + "loss": 0.4942, + "step": 39084 + }, + { + "epoch": 0.6753697815869505, + "grad_norm": 1.136923434077736, + "learning_rate": 5.036556189331785e-06, + "loss": 0.3182, + "step": 39085 + }, + { + "epoch": 0.6753870611003594, + "grad_norm": 1.38934827890556, + "learning_rate": 5.036070350954518e-06, + "loss": 0.4036, + "step": 39086 + }, + { + "epoch": 0.6754043406137683, + "grad_norm": 1.7752404315901966, + "learning_rate": 5.035584528124652e-06, + "loss": 0.2379, + "step": 39087 + }, + { + "epoch": 0.6754216201271772, + "grad_norm": 0.9522321600275303, + "learning_rate": 5.035098720843713e-06, + "loss": 0.5098, + "step": 39088 + }, + { + "epoch": 0.6754388996405861, + "grad_norm": 0.4846808456046505, + "learning_rate": 5.034612929113225e-06, + "loss": 0.4037, + "step": 39089 + }, + { + "epoch": 0.675456179153995, + "grad_norm": 0.8510190206984252, + "learning_rate": 5.034127152934704e-06, + "loss": 0.3934, + "step": 39090 + }, + { + "epoch": 0.6754734586674039, + "grad_norm": 0.6218405663811191, + "learning_rate": 5.033641392309678e-06, + "loss": 0.7946, + "step": 39091 + }, + { + "epoch": 0.6754907381808128, + "grad_norm": 2.175976184122469, + "learning_rate": 5.0331556472396604e-06, + "loss": 0.3427, + "step": 39092 + }, + { + "epoch": 0.6755080176942218, + "grad_norm": 1.5050667392530794, + "learning_rate": 5.032669917726178e-06, + "loss": 0.5963, + "step": 39093 + }, + { + "epoch": 0.6755252972076307, + "grad_norm": 1.5685996167130232, + "learning_rate": 5.0321842037707494e-06, + "loss": 0.403, + "step": 39094 + }, + { + "epoch": 0.6755425767210396, + "grad_norm": 0.9840078465827721, + "learning_rate": 5.0316985053749015e-06, + "loss": 0.2596, + "step": 39095 + }, + { + "epoch": 0.6755598562344485, + "grad_norm": 0.9739045312463177, + "learning_rate": 5.031212822540148e-06, + "loss": 0.2812, + "step": 39096 + }, + { + "epoch": 0.6755771357478574, + "grad_norm": 0.7799216631696018, + "learning_rate": 5.030727155268017e-06, + "loss": 0.4219, + "step": 39097 + }, + { + "epoch": 0.6755944152612663, + "grad_norm": 1.5206003960885697, + "learning_rate": 5.030241503560023e-06, + "loss": 0.4751, + "step": 39098 + }, + { + "epoch": 0.6756116947746752, + "grad_norm": 0.9461084148127169, + "learning_rate": 5.0297558674176895e-06, + "loss": 0.281, + "step": 39099 + }, + { + "epoch": 0.675628974288084, + "grad_norm": 0.9118547856258234, + "learning_rate": 5.029270246842541e-06, + "loss": 0.1976, + "step": 39100 + }, + { + "epoch": 0.6756462538014929, + "grad_norm": 1.0518353386458594, + "learning_rate": 5.028784641836095e-06, + "loss": 0.297, + "step": 39101 + }, + { + "epoch": 0.6756635333149018, + "grad_norm": 1.5542614665601564, + "learning_rate": 5.028299052399875e-06, + "loss": 0.5243, + "step": 39102 + }, + { + "epoch": 0.6756808128283107, + "grad_norm": 0.8314264694582488, + "learning_rate": 5.027813478535397e-06, + "loss": 0.5076, + "step": 39103 + }, + { + "epoch": 0.6756980923417196, + "grad_norm": 0.8557889446789477, + "learning_rate": 5.027327920244185e-06, + "loss": 0.3185, + "step": 39104 + }, + { + "epoch": 0.6757153718551285, + "grad_norm": 1.362664853623704, + "learning_rate": 5.026842377527761e-06, + "loss": 0.3901, + "step": 39105 + }, + { + "epoch": 0.6757326513685374, + "grad_norm": 0.902103620349997, + "learning_rate": 5.026356850387647e-06, + "loss": 0.4058, + "step": 39106 + }, + { + "epoch": 0.6757499308819463, + "grad_norm": 1.6657346654145415, + "learning_rate": 5.025871338825358e-06, + "loss": 0.2532, + "step": 39107 + }, + { + "epoch": 0.6757672103953553, + "grad_norm": 1.5281335973691341, + "learning_rate": 5.02538584284242e-06, + "loss": 0.4102, + "step": 39108 + }, + { + "epoch": 0.6757844899087642, + "grad_norm": 1.5842447645824551, + "learning_rate": 5.024900362440355e-06, + "loss": 0.4586, + "step": 39109 + }, + { + "epoch": 0.6758017694221731, + "grad_norm": 1.312271371602175, + "learning_rate": 5.024414897620671e-06, + "loss": 0.22, + "step": 39110 + }, + { + "epoch": 0.675819048935582, + "grad_norm": 1.0355750388195846, + "learning_rate": 5.023929448384904e-06, + "loss": 0.3386, + "step": 39111 + }, + { + "epoch": 0.6758363284489909, + "grad_norm": 0.7984809116593281, + "learning_rate": 5.023444014734566e-06, + "loss": 0.2036, + "step": 39112 + }, + { + "epoch": 0.6758536079623998, + "grad_norm": 1.50105658377243, + "learning_rate": 5.022958596671183e-06, + "loss": 0.4209, + "step": 39113 + }, + { + "epoch": 0.6758708874758087, + "grad_norm": 1.1935869645744699, + "learning_rate": 5.022473194196268e-06, + "loss": 0.4105, + "step": 39114 + }, + { + "epoch": 0.6758881669892176, + "grad_norm": 1.0466761653452241, + "learning_rate": 5.021987807311348e-06, + "loss": 0.4793, + "step": 39115 + }, + { + "epoch": 0.6759054465026265, + "grad_norm": 1.4711324562080241, + "learning_rate": 5.021502436017934e-06, + "loss": 0.4751, + "step": 39116 + }, + { + "epoch": 0.6759227260160354, + "grad_norm": 1.6345770994000208, + "learning_rate": 5.02101708031756e-06, + "loss": 0.3177, + "step": 39117 + }, + { + "epoch": 0.6759400055294443, + "grad_norm": 1.0696956825528663, + "learning_rate": 5.020531740211736e-06, + "loss": 0.2989, + "step": 39118 + }, + { + "epoch": 0.6759572850428532, + "grad_norm": 0.921845460807268, + "learning_rate": 5.020046415701988e-06, + "loss": 0.6606, + "step": 39119 + }, + { + "epoch": 0.6759745645562621, + "grad_norm": 1.840599278860771, + "learning_rate": 5.019561106789833e-06, + "loss": 0.2988, + "step": 39120 + }, + { + "epoch": 0.6759918440696709, + "grad_norm": 1.2320396114750745, + "learning_rate": 5.019075813476788e-06, + "loss": 0.3512, + "step": 39121 + }, + { + "epoch": 0.6760091235830799, + "grad_norm": 1.008518800822406, + "learning_rate": 5.018590535764376e-06, + "loss": 0.2701, + "step": 39122 + }, + { + "epoch": 0.6760264030964888, + "grad_norm": 1.0325976529669287, + "learning_rate": 5.0181052736541155e-06, + "loss": 0.6282, + "step": 39123 + }, + { + "epoch": 0.6760436826098977, + "grad_norm": 1.3622560914357609, + "learning_rate": 5.017620027147534e-06, + "loss": 0.4831, + "step": 39124 + }, + { + "epoch": 0.6760609621233066, + "grad_norm": 1.947833693002451, + "learning_rate": 5.0171347962461395e-06, + "loss": 0.2952, + "step": 39125 + }, + { + "epoch": 0.6760782416367155, + "grad_norm": 1.1422231587393916, + "learning_rate": 5.016649580951462e-06, + "loss": 0.3954, + "step": 39126 + }, + { + "epoch": 0.6760955211501244, + "grad_norm": 1.4409912054958844, + "learning_rate": 5.016164381265013e-06, + "loss": 0.2921, + "step": 39127 + }, + { + "epoch": 0.6761128006635333, + "grad_norm": 0.8851024656552485, + "learning_rate": 5.015679197188316e-06, + "loss": 0.4399, + "step": 39128 + }, + { + "epoch": 0.6761300801769422, + "grad_norm": 1.4039801156933858, + "learning_rate": 5.01519402872289e-06, + "loss": 0.346, + "step": 39129 + }, + { + "epoch": 0.6761473596903511, + "grad_norm": 1.5265217268624824, + "learning_rate": 5.01470887587026e-06, + "loss": 0.3896, + "step": 39130 + }, + { + "epoch": 0.67616463920376, + "grad_norm": 1.776114026279101, + "learning_rate": 5.01422373863194e-06, + "loss": 0.4075, + "step": 39131 + }, + { + "epoch": 0.6761819187171689, + "grad_norm": 1.850552528409622, + "learning_rate": 5.013738617009446e-06, + "loss": 0.4198, + "step": 39132 + }, + { + "epoch": 0.6761991982305778, + "grad_norm": 1.1422268652279417, + "learning_rate": 5.013253511004302e-06, + "loss": 0.3267, + "step": 39133 + }, + { + "epoch": 0.6762164777439867, + "grad_norm": 1.287095954277184, + "learning_rate": 5.012768420618026e-06, + "loss": 0.3209, + "step": 39134 + }, + { + "epoch": 0.6762337572573957, + "grad_norm": 0.7511999509255699, + "learning_rate": 5.012283345852143e-06, + "loss": 0.2602, + "step": 39135 + }, + { + "epoch": 0.6762510367708046, + "grad_norm": 1.9765319420217078, + "learning_rate": 5.011798286708165e-06, + "loss": 0.6112, + "step": 39136 + }, + { + "epoch": 0.6762683162842135, + "grad_norm": 1.0016907576729879, + "learning_rate": 5.011313243187617e-06, + "loss": 0.4913, + "step": 39137 + }, + { + "epoch": 0.6762855957976224, + "grad_norm": 1.1281128790354011, + "learning_rate": 5.0108282152920094e-06, + "loss": 0.361, + "step": 39138 + }, + { + "epoch": 0.6763028753110313, + "grad_norm": 1.0854959071309431, + "learning_rate": 5.010343203022869e-06, + "loss": 0.2383, + "step": 39139 + }, + { + "epoch": 0.6763201548244402, + "grad_norm": 1.7602894887677414, + "learning_rate": 5.009858206381716e-06, + "loss": 0.5041, + "step": 39140 + }, + { + "epoch": 0.6763374343378491, + "grad_norm": 1.4710395580499378, + "learning_rate": 5.009373225370062e-06, + "loss": 0.3704, + "step": 39141 + }, + { + "epoch": 0.6763547138512579, + "grad_norm": 1.0376686863508802, + "learning_rate": 5.008888259989435e-06, + "loss": 0.2765, + "step": 39142 + }, + { + "epoch": 0.6763719933646668, + "grad_norm": 1.1970930297124456, + "learning_rate": 5.008403310241346e-06, + "loss": 0.5165, + "step": 39143 + }, + { + "epoch": 0.6763892728780757, + "grad_norm": 0.7765999798289724, + "learning_rate": 5.007918376127316e-06, + "loss": 0.4291, + "step": 39144 + }, + { + "epoch": 0.6764065523914846, + "grad_norm": 1.848160209190165, + "learning_rate": 5.007433457648865e-06, + "loss": 0.4341, + "step": 39145 + }, + { + "epoch": 0.6764238319048935, + "grad_norm": 1.34446000112806, + "learning_rate": 5.0069485548075155e-06, + "loss": 0.5769, + "step": 39146 + }, + { + "epoch": 0.6764411114183024, + "grad_norm": 0.9900334847358494, + "learning_rate": 5.00646366760478e-06, + "loss": 0.3585, + "step": 39147 + }, + { + "epoch": 0.6764583909317113, + "grad_norm": 1.417512722326825, + "learning_rate": 5.005978796042184e-06, + "loss": 0.3396, + "step": 39148 + }, + { + "epoch": 0.6764756704451202, + "grad_norm": 0.9568979721305962, + "learning_rate": 5.005493940121235e-06, + "loss": 0.4484, + "step": 39149 + }, + { + "epoch": 0.6764929499585292, + "grad_norm": 1.2232827265174746, + "learning_rate": 5.005009099843462e-06, + "loss": 0.3776, + "step": 39150 + }, + { + "epoch": 0.6765102294719381, + "grad_norm": 1.0800293111863146, + "learning_rate": 5.004524275210383e-06, + "loss": 0.4495, + "step": 39151 + }, + { + "epoch": 0.676527508985347, + "grad_norm": 1.6723633632260986, + "learning_rate": 5.00403946622351e-06, + "loss": 0.3019, + "step": 39152 + }, + { + "epoch": 0.6765447884987559, + "grad_norm": 2.540067809762555, + "learning_rate": 5.003554672884367e-06, + "loss": 0.4468, + "step": 39153 + }, + { + "epoch": 0.6765620680121648, + "grad_norm": 1.0908211813577542, + "learning_rate": 5.003069895194468e-06, + "loss": 0.3007, + "step": 39154 + }, + { + "epoch": 0.6765793475255737, + "grad_norm": 1.3088862036736213, + "learning_rate": 5.002585133155338e-06, + "loss": 0.3262, + "step": 39155 + }, + { + "epoch": 0.6765966270389826, + "grad_norm": 1.0593674447072055, + "learning_rate": 5.002100386768483e-06, + "loss": 0.3871, + "step": 39156 + }, + { + "epoch": 0.6766139065523915, + "grad_norm": 0.994470709173204, + "learning_rate": 5.001615656035437e-06, + "loss": 0.349, + "step": 39157 + }, + { + "epoch": 0.6766311860658004, + "grad_norm": 1.3499060643156027, + "learning_rate": 5.001130940957706e-06, + "loss": 0.4553, + "step": 39158 + }, + { + "epoch": 0.6766484655792093, + "grad_norm": 1.1640399341045413, + "learning_rate": 5.000646241536818e-06, + "loss": 0.402, + "step": 39159 + }, + { + "epoch": 0.6766657450926182, + "grad_norm": 1.0827959751497613, + "learning_rate": 5.0001615577742815e-06, + "loss": 0.3516, + "step": 39160 + }, + { + "epoch": 0.6766830246060271, + "grad_norm": 1.345465837708672, + "learning_rate": 4.999676889671623e-06, + "loss": 0.339, + "step": 39161 + }, + { + "epoch": 0.676700304119436, + "grad_norm": 1.2092154732896676, + "learning_rate": 4.999192237230352e-06, + "loss": 0.4524, + "step": 39162 + }, + { + "epoch": 0.676717583632845, + "grad_norm": 1.7456588094151255, + "learning_rate": 4.998707600451992e-06, + "loss": 0.1454, + "step": 39163 + }, + { + "epoch": 0.6767348631462538, + "grad_norm": 0.9087890605056119, + "learning_rate": 4.998222979338063e-06, + "loss": 0.3799, + "step": 39164 + }, + { + "epoch": 0.6767521426596627, + "grad_norm": 1.2751782459397558, + "learning_rate": 4.997738373890076e-06, + "loss": 0.4621, + "step": 39165 + }, + { + "epoch": 0.6767694221730716, + "grad_norm": 1.494293825780525, + "learning_rate": 4.9972537841095566e-06, + "loss": 0.3111, + "step": 39166 + }, + { + "epoch": 0.6767867016864805, + "grad_norm": 1.0780556233560519, + "learning_rate": 4.996769209998015e-06, + "loss": 0.3822, + "step": 39167 + }, + { + "epoch": 0.6768039811998894, + "grad_norm": 0.7743319100989517, + "learning_rate": 4.996284651556972e-06, + "loss": 0.502, + "step": 39168 + }, + { + "epoch": 0.6768212607132983, + "grad_norm": 1.2442476709921169, + "learning_rate": 4.995800108787946e-06, + "loss": 0.3155, + "step": 39169 + }, + { + "epoch": 0.6768385402267072, + "grad_norm": 1.197930639397446, + "learning_rate": 4.995315581692458e-06, + "loss": 0.3069, + "step": 39170 + }, + { + "epoch": 0.6768558197401161, + "grad_norm": 1.2368502457413124, + "learning_rate": 4.994831070272022e-06, + "loss": 0.3544, + "step": 39171 + }, + { + "epoch": 0.676873099253525, + "grad_norm": 0.8423907985583846, + "learning_rate": 4.994346574528152e-06, + "loss": 0.3957, + "step": 39172 + }, + { + "epoch": 0.6768903787669339, + "grad_norm": 1.130762863099081, + "learning_rate": 4.993862094462369e-06, + "loss": 0.4445, + "step": 39173 + }, + { + "epoch": 0.6769076582803428, + "grad_norm": 1.3844831359369152, + "learning_rate": 4.99337763007619e-06, + "loss": 0.364, + "step": 39174 + }, + { + "epoch": 0.6769249377937517, + "grad_norm": 1.168512305493901, + "learning_rate": 4.992893181371136e-06, + "loss": 0.4061, + "step": 39175 + }, + { + "epoch": 0.6769422173071606, + "grad_norm": 1.3259806616837753, + "learning_rate": 4.992408748348718e-06, + "loss": 0.3421, + "step": 39176 + }, + { + "epoch": 0.6769594968205696, + "grad_norm": 1.0544357689137875, + "learning_rate": 4.991924331010461e-06, + "loss": 0.4349, + "step": 39177 + }, + { + "epoch": 0.6769767763339785, + "grad_norm": 1.4639203566344132, + "learning_rate": 4.991439929357872e-06, + "loss": 0.4052, + "step": 39178 + }, + { + "epoch": 0.6769940558473874, + "grad_norm": 1.1559907529867983, + "learning_rate": 4.990955543392474e-06, + "loss": 0.3895, + "step": 39179 + }, + { + "epoch": 0.6770113353607963, + "grad_norm": 1.2553191267486299, + "learning_rate": 4.9904711731157854e-06, + "loss": 0.423, + "step": 39180 + }, + { + "epoch": 0.6770286148742052, + "grad_norm": 0.9478145614775144, + "learning_rate": 4.989986818529324e-06, + "loss": 0.3335, + "step": 39181 + }, + { + "epoch": 0.6770458943876141, + "grad_norm": 1.6370596662656987, + "learning_rate": 4.989502479634606e-06, + "loss": 0.419, + "step": 39182 + }, + { + "epoch": 0.677063173901023, + "grad_norm": 1.1728296593821002, + "learning_rate": 4.9890181564331415e-06, + "loss": 0.3682, + "step": 39183 + }, + { + "epoch": 0.6770804534144319, + "grad_norm": 1.0031838627282281, + "learning_rate": 4.988533848926455e-06, + "loss": 0.2274, + "step": 39184 + }, + { + "epoch": 0.6770977329278407, + "grad_norm": 1.2898862176939783, + "learning_rate": 4.988049557116059e-06, + "loss": 0.4085, + "step": 39185 + }, + { + "epoch": 0.6771150124412496, + "grad_norm": 1.3997477376923786, + "learning_rate": 4.987565281003478e-06, + "loss": 0.2621, + "step": 39186 + }, + { + "epoch": 0.6771322919546585, + "grad_norm": 1.2169479776066785, + "learning_rate": 4.98708102059022e-06, + "loss": 0.4176, + "step": 39187 + }, + { + "epoch": 0.6771495714680674, + "grad_norm": 0.8083108562359032, + "learning_rate": 4.986596775877809e-06, + "loss": 0.2761, + "step": 39188 + }, + { + "epoch": 0.6771668509814763, + "grad_norm": 1.1335016261655273, + "learning_rate": 4.986112546867753e-06, + "loss": 0.4855, + "step": 39189 + }, + { + "epoch": 0.6771841304948852, + "grad_norm": 1.515909405615747, + "learning_rate": 4.985628333561574e-06, + "loss": 0.5786, + "step": 39190 + }, + { + "epoch": 0.6772014100082941, + "grad_norm": 0.9810621834128413, + "learning_rate": 4.985144135960789e-06, + "loss": 0.3199, + "step": 39191 + }, + { + "epoch": 0.677218689521703, + "grad_norm": 1.1212658774398099, + "learning_rate": 4.984659954066916e-06, + "loss": 0.2503, + "step": 39192 + }, + { + "epoch": 0.677235969035112, + "grad_norm": 1.0177877310865966, + "learning_rate": 4.98417578788147e-06, + "loss": 0.3848, + "step": 39193 + }, + { + "epoch": 0.6772532485485209, + "grad_norm": 1.0208815306208363, + "learning_rate": 4.983691637405963e-06, + "loss": 0.3479, + "step": 39194 + }, + { + "epoch": 0.6772705280619298, + "grad_norm": 0.932608976699504, + "learning_rate": 4.983207502641918e-06, + "loss": 0.2829, + "step": 39195 + }, + { + "epoch": 0.6772878075753387, + "grad_norm": 1.0609625626507915, + "learning_rate": 4.9827233835908414e-06, + "loss": 0.4352, + "step": 39196 + }, + { + "epoch": 0.6773050870887476, + "grad_norm": 1.3495637176063453, + "learning_rate": 4.982239280254264e-06, + "loss": 0.364, + "step": 39197 + }, + { + "epoch": 0.6773223666021565, + "grad_norm": 1.286513730777672, + "learning_rate": 4.981755192633691e-06, + "loss": 0.5106, + "step": 39198 + }, + { + "epoch": 0.6773396461155654, + "grad_norm": 1.0263427814835595, + "learning_rate": 4.981271120730645e-06, + "loss": 0.3826, + "step": 39199 + }, + { + "epoch": 0.6773569256289743, + "grad_norm": 1.8520838845228664, + "learning_rate": 4.980787064546637e-06, + "loss": 0.2531, + "step": 39200 + }, + { + "epoch": 0.6773742051423832, + "grad_norm": 1.006705461423693, + "learning_rate": 4.980303024083188e-06, + "loss": 0.2792, + "step": 39201 + }, + { + "epoch": 0.6773914846557921, + "grad_norm": 0.933034246597278, + "learning_rate": 4.979818999341808e-06, + "loss": 0.4549, + "step": 39202 + }, + { + "epoch": 0.677408764169201, + "grad_norm": 1.4119792885344713, + "learning_rate": 4.979334990324016e-06, + "loss": 0.4503, + "step": 39203 + }, + { + "epoch": 0.67742604368261, + "grad_norm": 1.105695031091744, + "learning_rate": 4.9788509970313325e-06, + "loss": 0.4582, + "step": 39204 + }, + { + "epoch": 0.6774433231960189, + "grad_norm": 0.5658817977288585, + "learning_rate": 4.978367019465265e-06, + "loss": 0.8478, + "step": 39205 + }, + { + "epoch": 0.6774606027094277, + "grad_norm": 1.0832651236354618, + "learning_rate": 4.977883057627338e-06, + "loss": 0.4236, + "step": 39206 + }, + { + "epoch": 0.6774778822228366, + "grad_norm": 1.6135882190442972, + "learning_rate": 4.977399111519059e-06, + "loss": 0.4143, + "step": 39207 + }, + { + "epoch": 0.6774951617362455, + "grad_norm": 0.7773116021085396, + "learning_rate": 4.976915181141948e-06, + "loss": 0.4641, + "step": 39208 + }, + { + "epoch": 0.6775124412496544, + "grad_norm": 1.062176824151942, + "learning_rate": 4.97643126649752e-06, + "loss": 0.3595, + "step": 39209 + }, + { + "epoch": 0.6775297207630633, + "grad_norm": 1.1828272523438017, + "learning_rate": 4.975947367587295e-06, + "loss": 0.4783, + "step": 39210 + }, + { + "epoch": 0.6775470002764722, + "grad_norm": 1.2836996720681109, + "learning_rate": 4.9754634844127805e-06, + "loss": 0.2998, + "step": 39211 + }, + { + "epoch": 0.6775642797898811, + "grad_norm": 1.0574971571738248, + "learning_rate": 4.9749796169755e-06, + "loss": 0.3644, + "step": 39212 + }, + { + "epoch": 0.67758155930329, + "grad_norm": 1.0322989273901428, + "learning_rate": 4.974495765276962e-06, + "loss": 0.2448, + "step": 39213 + }, + { + "epoch": 0.6775988388166989, + "grad_norm": 1.3197068776842478, + "learning_rate": 4.974011929318685e-06, + "loss": 0.6803, + "step": 39214 + }, + { + "epoch": 0.6776161183301078, + "grad_norm": 1.486371579200089, + "learning_rate": 4.973528109102188e-06, + "loss": 0.4915, + "step": 39215 + }, + { + "epoch": 0.6776333978435167, + "grad_norm": 1.8674509411552076, + "learning_rate": 4.973044304628979e-06, + "loss": 0.2721, + "step": 39216 + }, + { + "epoch": 0.6776506773569256, + "grad_norm": 0.9790721092796443, + "learning_rate": 4.972560515900582e-06, + "loss": 0.2286, + "step": 39217 + }, + { + "epoch": 0.6776679568703345, + "grad_norm": 0.9822408603587645, + "learning_rate": 4.972076742918504e-06, + "loss": 0.2937, + "step": 39218 + }, + { + "epoch": 0.6776852363837435, + "grad_norm": 1.2772491795037348, + "learning_rate": 4.971592985684262e-06, + "loss": 0.9137, + "step": 39219 + }, + { + "epoch": 0.6777025158971524, + "grad_norm": 1.0941420716411236, + "learning_rate": 4.9711092441993736e-06, + "loss": 0.3593, + "step": 39220 + }, + { + "epoch": 0.6777197954105613, + "grad_norm": 1.4776675554866243, + "learning_rate": 4.970625518465357e-06, + "loss": 0.4161, + "step": 39221 + }, + { + "epoch": 0.6777370749239702, + "grad_norm": 1.6852653240309279, + "learning_rate": 4.97014180848372e-06, + "loss": 0.2193, + "step": 39222 + }, + { + "epoch": 0.6777543544373791, + "grad_norm": 1.947908980188707, + "learning_rate": 4.9696581142559856e-06, + "loss": 0.5661, + "step": 39223 + }, + { + "epoch": 0.677771633950788, + "grad_norm": 1.1405484964654289, + "learning_rate": 4.96917443578366e-06, + "loss": 0.3262, + "step": 39224 + }, + { + "epoch": 0.6777889134641969, + "grad_norm": 0.763306124367739, + "learning_rate": 4.968690773068262e-06, + "loss": 0.2776, + "step": 39225 + }, + { + "epoch": 0.6778061929776058, + "grad_norm": 0.8068287707086198, + "learning_rate": 4.968207126111311e-06, + "loss": 0.3748, + "step": 39226 + }, + { + "epoch": 0.6778234724910146, + "grad_norm": 1.1441475709737012, + "learning_rate": 4.967723494914314e-06, + "loss": 0.3445, + "step": 39227 + }, + { + "epoch": 0.6778407520044235, + "grad_norm": 1.3126910465200705, + "learning_rate": 4.967239879478792e-06, + "loss": 0.4256, + "step": 39228 + }, + { + "epoch": 0.6778580315178324, + "grad_norm": 1.9828078787432268, + "learning_rate": 4.966756279806255e-06, + "loss": 0.3383, + "step": 39229 + }, + { + "epoch": 0.6778753110312413, + "grad_norm": 1.4238384261501071, + "learning_rate": 4.96627269589822e-06, + "loss": 0.3171, + "step": 39230 + }, + { + "epoch": 0.6778925905446502, + "grad_norm": 0.8441474941881446, + "learning_rate": 4.9657891277562e-06, + "loss": 0.5317, + "step": 39231 + }, + { + "epoch": 0.6779098700580591, + "grad_norm": 1.5963942620234752, + "learning_rate": 4.9653055753817155e-06, + "loss": 0.4015, + "step": 39232 + }, + { + "epoch": 0.677927149571468, + "grad_norm": 0.9654903551178284, + "learning_rate": 4.964822038776277e-06, + "loss": 0.7603, + "step": 39233 + }, + { + "epoch": 0.677944429084877, + "grad_norm": 1.919513830981137, + "learning_rate": 4.964338517941394e-06, + "loss": 0.2363, + "step": 39234 + }, + { + "epoch": 0.6779617085982859, + "grad_norm": 2.2012600479145377, + "learning_rate": 4.96385501287859e-06, + "loss": 0.3436, + "step": 39235 + }, + { + "epoch": 0.6779789881116948, + "grad_norm": 1.2161968049685012, + "learning_rate": 4.963371523589366e-06, + "loss": 0.4703, + "step": 39236 + }, + { + "epoch": 0.6779962676251037, + "grad_norm": 0.7292289845898551, + "learning_rate": 4.962888050075254e-06, + "loss": 0.7547, + "step": 39237 + }, + { + "epoch": 0.6780135471385126, + "grad_norm": 1.9762732181893292, + "learning_rate": 4.962404592337755e-06, + "loss": 0.3974, + "step": 39238 + }, + { + "epoch": 0.6780308266519215, + "grad_norm": 1.220638416659655, + "learning_rate": 4.961921150378391e-06, + "loss": 0.37, + "step": 39239 + }, + { + "epoch": 0.6780481061653304, + "grad_norm": 1.2040886378846802, + "learning_rate": 4.961437724198671e-06, + "loss": 0.406, + "step": 39240 + }, + { + "epoch": 0.6780653856787393, + "grad_norm": 1.4204252302072833, + "learning_rate": 4.960954313800113e-06, + "loss": 0.4706, + "step": 39241 + }, + { + "epoch": 0.6780826651921482, + "grad_norm": 1.1974535464286848, + "learning_rate": 4.960470919184226e-06, + "loss": 0.3246, + "step": 39242 + }, + { + "epoch": 0.6780999447055571, + "grad_norm": 0.8168261493607589, + "learning_rate": 4.959987540352527e-06, + "loss": 0.5184, + "step": 39243 + }, + { + "epoch": 0.678117224218966, + "grad_norm": 1.3623285918995522, + "learning_rate": 4.9595041773065335e-06, + "loss": 0.3549, + "step": 39244 + }, + { + "epoch": 0.6781345037323749, + "grad_norm": 1.2360347083421688, + "learning_rate": 4.959020830047751e-06, + "loss": 0.4308, + "step": 39245 + }, + { + "epoch": 0.6781517832457838, + "grad_norm": 0.9734979992299313, + "learning_rate": 4.958537498577705e-06, + "loss": 0.3239, + "step": 39246 + }, + { + "epoch": 0.6781690627591928, + "grad_norm": 1.7128898454318457, + "learning_rate": 4.9580541828978965e-06, + "loss": 0.4656, + "step": 39247 + }, + { + "epoch": 0.6781863422726016, + "grad_norm": 1.039450517159755, + "learning_rate": 4.957570883009847e-06, + "loss": 0.3164, + "step": 39248 + }, + { + "epoch": 0.6782036217860105, + "grad_norm": 1.2269829751899863, + "learning_rate": 4.957087598915068e-06, + "loss": 0.5508, + "step": 39249 + }, + { + "epoch": 0.6782209012994194, + "grad_norm": 1.5907650040847399, + "learning_rate": 4.9566043306150776e-06, + "loss": 0.498, + "step": 39250 + }, + { + "epoch": 0.6782381808128283, + "grad_norm": 1.1299892473662068, + "learning_rate": 4.956121078111383e-06, + "loss": 0.4356, + "step": 39251 + }, + { + "epoch": 0.6782554603262372, + "grad_norm": 0.9835353491610048, + "learning_rate": 4.955637841405503e-06, + "loss": 0.5202, + "step": 39252 + }, + { + "epoch": 0.6782727398396461, + "grad_norm": 1.2603926013152214, + "learning_rate": 4.9551546204989445e-06, + "loss": 0.3338, + "step": 39253 + }, + { + "epoch": 0.678290019353055, + "grad_norm": 1.233834437301777, + "learning_rate": 4.954671415393226e-06, + "loss": 0.2834, + "step": 39254 + }, + { + "epoch": 0.6783072988664639, + "grad_norm": 1.2634166719247568, + "learning_rate": 4.954188226089864e-06, + "loss": 0.4367, + "step": 39255 + }, + { + "epoch": 0.6783245783798728, + "grad_norm": 1.1371171868008323, + "learning_rate": 4.953705052590363e-06, + "loss": 0.5541, + "step": 39256 + }, + { + "epoch": 0.6783418578932817, + "grad_norm": 1.2546187183199191, + "learning_rate": 4.953221894896246e-06, + "loss": 0.4752, + "step": 39257 + }, + { + "epoch": 0.6783591374066906, + "grad_norm": 1.7237294047235243, + "learning_rate": 4.952738753009018e-06, + "loss": 0.3055, + "step": 39258 + }, + { + "epoch": 0.6783764169200995, + "grad_norm": 1.2643366660944084, + "learning_rate": 4.952255626930196e-06, + "loss": 0.3288, + "step": 39259 + }, + { + "epoch": 0.6783936964335084, + "grad_norm": 0.9701366719368816, + "learning_rate": 4.951772516661292e-06, + "loss": 0.2801, + "step": 39260 + }, + { + "epoch": 0.6784109759469173, + "grad_norm": 1.0811373684750283, + "learning_rate": 4.951289422203824e-06, + "loss": 0.3347, + "step": 39261 + }, + { + "epoch": 0.6784282554603263, + "grad_norm": 0.9889111031919228, + "learning_rate": 4.950806343559298e-06, + "loss": 0.3547, + "step": 39262 + }, + { + "epoch": 0.6784455349737352, + "grad_norm": 0.6693257936213332, + "learning_rate": 4.950323280729233e-06, + "loss": 0.2793, + "step": 39263 + }, + { + "epoch": 0.6784628144871441, + "grad_norm": 0.7955954419274947, + "learning_rate": 4.949840233715137e-06, + "loss": 0.2396, + "step": 39264 + }, + { + "epoch": 0.678480094000553, + "grad_norm": 2.3502091275624397, + "learning_rate": 4.949357202518525e-06, + "loss": 0.5903, + "step": 39265 + }, + { + "epoch": 0.6784973735139619, + "grad_norm": 1.8338723388513725, + "learning_rate": 4.948874187140913e-06, + "loss": 0.6906, + "step": 39266 + }, + { + "epoch": 0.6785146530273708, + "grad_norm": 0.8461698991385349, + "learning_rate": 4.9483911875838075e-06, + "loss": 0.4168, + "step": 39267 + }, + { + "epoch": 0.6785319325407797, + "grad_norm": 1.0949627704579, + "learning_rate": 4.9479082038487294e-06, + "loss": 0.4702, + "step": 39268 + }, + { + "epoch": 0.6785492120541885, + "grad_norm": 1.1507731639954464, + "learning_rate": 4.947425235937181e-06, + "loss": 0.4394, + "step": 39269 + }, + { + "epoch": 0.6785664915675974, + "grad_norm": 1.1341090992199152, + "learning_rate": 4.946942283850682e-06, + "loss": 0.4301, + "step": 39270 + }, + { + "epoch": 0.6785837710810063, + "grad_norm": 1.0499589336678747, + "learning_rate": 4.9464593475907445e-06, + "loss": 0.3643, + "step": 39271 + }, + { + "epoch": 0.6786010505944152, + "grad_norm": 1.041744718202358, + "learning_rate": 4.945976427158883e-06, + "loss": 0.4462, + "step": 39272 + }, + { + "epoch": 0.6786183301078241, + "grad_norm": 0.9838759842415268, + "learning_rate": 4.9454935225566035e-06, + "loss": 0.3971, + "step": 39273 + }, + { + "epoch": 0.678635609621233, + "grad_norm": 1.4644347523132766, + "learning_rate": 4.9450106337854266e-06, + "loss": 0.4025, + "step": 39274 + }, + { + "epoch": 0.678652889134642, + "grad_norm": 0.907692218637513, + "learning_rate": 4.94452776084686e-06, + "loss": 0.344, + "step": 39275 + }, + { + "epoch": 0.6786701686480509, + "grad_norm": 1.3190625611390752, + "learning_rate": 4.9440449037424095e-06, + "loss": 0.3028, + "step": 39276 + }, + { + "epoch": 0.6786874481614598, + "grad_norm": 1.8437095959015606, + "learning_rate": 4.943562062473603e-06, + "loss": 0.2407, + "step": 39277 + }, + { + "epoch": 0.6787047276748687, + "grad_norm": 1.2324344262005107, + "learning_rate": 4.94307923704194e-06, + "loss": 0.5501, + "step": 39278 + }, + { + "epoch": 0.6787220071882776, + "grad_norm": 1.2620199440553561, + "learning_rate": 4.94259642744894e-06, + "loss": 0.5189, + "step": 39279 + }, + { + "epoch": 0.6787392867016865, + "grad_norm": 1.1124511586349684, + "learning_rate": 4.94211363369611e-06, + "loss": 0.3194, + "step": 39280 + }, + { + "epoch": 0.6787565662150954, + "grad_norm": 1.0970572412391022, + "learning_rate": 4.9416308557849675e-06, + "loss": 0.3767, + "step": 39281 + }, + { + "epoch": 0.6787738457285043, + "grad_norm": 0.7139799146987568, + "learning_rate": 4.9411480937170175e-06, + "loss": 0.324, + "step": 39282 + }, + { + "epoch": 0.6787911252419132, + "grad_norm": 1.2229818918422095, + "learning_rate": 4.940665347493777e-06, + "loss": 0.3105, + "step": 39283 + }, + { + "epoch": 0.6788084047553221, + "grad_norm": 0.5143628848075661, + "learning_rate": 4.9401826171167615e-06, + "loss": 0.1924, + "step": 39284 + }, + { + "epoch": 0.678825684268731, + "grad_norm": 1.227585496000233, + "learning_rate": 4.939699902587473e-06, + "loss": 0.3641, + "step": 39285 + }, + { + "epoch": 0.6788429637821399, + "grad_norm": 1.1325954535942875, + "learning_rate": 4.939217203907434e-06, + "loss": 0.3614, + "step": 39286 + }, + { + "epoch": 0.6788602432955488, + "grad_norm": 1.1465115599593831, + "learning_rate": 4.938734521078146e-06, + "loss": 0.2289, + "step": 39287 + }, + { + "epoch": 0.6788775228089577, + "grad_norm": 1.6113303979538478, + "learning_rate": 4.938251854101128e-06, + "loss": 0.4812, + "step": 39288 + }, + { + "epoch": 0.6788948023223667, + "grad_norm": 1.2718302547597995, + "learning_rate": 4.937769202977889e-06, + "loss": 0.1956, + "step": 39289 + }, + { + "epoch": 0.6789120818357754, + "grad_norm": 0.9395493365480168, + "learning_rate": 4.937286567709946e-06, + "loss": 0.2637, + "step": 39290 + }, + { + "epoch": 0.6789293613491844, + "grad_norm": 2.0525647521862505, + "learning_rate": 4.936803948298802e-06, + "loss": 0.4003, + "step": 39291 + }, + { + "epoch": 0.6789466408625933, + "grad_norm": 1.6624024130146224, + "learning_rate": 4.936321344745976e-06, + "loss": 0.5208, + "step": 39292 + }, + { + "epoch": 0.6789639203760022, + "grad_norm": 1.4958907938342105, + "learning_rate": 4.935838757052973e-06, + "loss": 0.2824, + "step": 39293 + }, + { + "epoch": 0.6789811998894111, + "grad_norm": 1.4947070160767661, + "learning_rate": 4.935356185221307e-06, + "loss": 0.2938, + "step": 39294 + }, + { + "epoch": 0.67899847940282, + "grad_norm": 1.038357141415702, + "learning_rate": 4.934873629252495e-06, + "loss": 0.8508, + "step": 39295 + }, + { + "epoch": 0.6790157589162289, + "grad_norm": 1.0312089729079223, + "learning_rate": 4.934391089148041e-06, + "loss": 0.3774, + "step": 39296 + }, + { + "epoch": 0.6790330384296378, + "grad_norm": 1.451705797128647, + "learning_rate": 4.933908564909462e-06, + "loss": 0.4965, + "step": 39297 + }, + { + "epoch": 0.6790503179430467, + "grad_norm": 1.6444168985316712, + "learning_rate": 4.933426056538262e-06, + "loss": 0.5144, + "step": 39298 + }, + { + "epoch": 0.6790675974564556, + "grad_norm": 1.0334513538621586, + "learning_rate": 4.932943564035957e-06, + "loss": 0.3646, + "step": 39299 + }, + { + "epoch": 0.6790848769698645, + "grad_norm": 0.9276963235732567, + "learning_rate": 4.932461087404058e-06, + "loss": 0.2929, + "step": 39300 + }, + { + "epoch": 0.6791021564832734, + "grad_norm": 1.6292033033995086, + "learning_rate": 4.931978626644079e-06, + "loss": 0.376, + "step": 39301 + }, + { + "epoch": 0.6791194359966823, + "grad_norm": 0.9098059588282154, + "learning_rate": 4.931496181757526e-06, + "loss": 0.3501, + "step": 39302 + }, + { + "epoch": 0.6791367155100912, + "grad_norm": 1.5786849454572869, + "learning_rate": 4.931013752745915e-06, + "loss": 0.3914, + "step": 39303 + }, + { + "epoch": 0.6791539950235002, + "grad_norm": 0.9387416627260279, + "learning_rate": 4.9305313396107505e-06, + "loss": 0.2053, + "step": 39304 + }, + { + "epoch": 0.6791712745369091, + "grad_norm": 1.0299586481117284, + "learning_rate": 4.9300489423535466e-06, + "loss": 0.4506, + "step": 39305 + }, + { + "epoch": 0.679188554050318, + "grad_norm": 0.8895346721980676, + "learning_rate": 4.929566560975819e-06, + "loss": 0.267, + "step": 39306 + }, + { + "epoch": 0.6792058335637269, + "grad_norm": 1.0533447767056725, + "learning_rate": 4.92908419547907e-06, + "loss": 0.278, + "step": 39307 + }, + { + "epoch": 0.6792231130771358, + "grad_norm": 0.9807418833833481, + "learning_rate": 4.9286018458648185e-06, + "loss": 0.2931, + "step": 39308 + }, + { + "epoch": 0.6792403925905447, + "grad_norm": 0.8990173144333222, + "learning_rate": 4.928119512134568e-06, + "loss": 0.3246, + "step": 39309 + }, + { + "epoch": 0.6792576721039536, + "grad_norm": 0.5691367654923863, + "learning_rate": 4.927637194289833e-06, + "loss": 0.8168, + "step": 39310 + }, + { + "epoch": 0.6792749516173625, + "grad_norm": 1.225771025183732, + "learning_rate": 4.927154892332124e-06, + "loss": 0.3706, + "step": 39311 + }, + { + "epoch": 0.6792922311307713, + "grad_norm": 0.8243612166256152, + "learning_rate": 4.926672606262954e-06, + "loss": 0.6074, + "step": 39312 + }, + { + "epoch": 0.6793095106441802, + "grad_norm": 1.4211299450032118, + "learning_rate": 4.926190336083828e-06, + "loss": 0.3413, + "step": 39313 + }, + { + "epoch": 0.6793267901575891, + "grad_norm": 1.2385227934585923, + "learning_rate": 4.925708081796264e-06, + "loss": 0.2202, + "step": 39314 + }, + { + "epoch": 0.679344069670998, + "grad_norm": 1.3369975088735326, + "learning_rate": 4.925225843401765e-06, + "loss": 0.4831, + "step": 39315 + }, + { + "epoch": 0.6793613491844069, + "grad_norm": 0.9088602146630497, + "learning_rate": 4.924743620901839e-06, + "loss": 0.5726, + "step": 39316 + }, + { + "epoch": 0.6793786286978158, + "grad_norm": 1.164457426271101, + "learning_rate": 4.9242614142980095e-06, + "loss": 0.4617, + "step": 39317 + }, + { + "epoch": 0.6793959082112248, + "grad_norm": 1.1879423238336164, + "learning_rate": 4.923779223591774e-06, + "loss": 0.3565, + "step": 39318 + }, + { + "epoch": 0.6794131877246337, + "grad_norm": 1.7097843434866296, + "learning_rate": 4.923297048784652e-06, + "loss": 0.4579, + "step": 39319 + }, + { + "epoch": 0.6794304672380426, + "grad_norm": 0.7669675191581534, + "learning_rate": 4.922814889878146e-06, + "loss": 0.4412, + "step": 39320 + }, + { + "epoch": 0.6794477467514515, + "grad_norm": 0.4515607486590224, + "learning_rate": 4.922332746873773e-06, + "loss": 0.5677, + "step": 39321 + }, + { + "epoch": 0.6794650262648604, + "grad_norm": 0.4780153870019108, + "learning_rate": 4.921850619773036e-06, + "loss": 0.7338, + "step": 39322 + }, + { + "epoch": 0.6794823057782693, + "grad_norm": 0.8028322333741026, + "learning_rate": 4.92136850857745e-06, + "loss": 0.3666, + "step": 39323 + }, + { + "epoch": 0.6794995852916782, + "grad_norm": 1.1654111668513094, + "learning_rate": 4.920886413288523e-06, + "loss": 0.3846, + "step": 39324 + }, + { + "epoch": 0.6795168648050871, + "grad_norm": 1.7557593376576839, + "learning_rate": 4.920404333907769e-06, + "loss": 0.4074, + "step": 39325 + }, + { + "epoch": 0.679534144318496, + "grad_norm": 1.4592300826995341, + "learning_rate": 4.919922270436694e-06, + "loss": 0.3855, + "step": 39326 + }, + { + "epoch": 0.6795514238319049, + "grad_norm": 0.9121718349073303, + "learning_rate": 4.9194402228768064e-06, + "loss": 0.2883, + "step": 39327 + }, + { + "epoch": 0.6795687033453138, + "grad_norm": 1.1409069146886854, + "learning_rate": 4.9189581912296165e-06, + "loss": 0.2799, + "step": 39328 + }, + { + "epoch": 0.6795859828587227, + "grad_norm": 0.8491410337883684, + "learning_rate": 4.918476175496637e-06, + "loss": 0.3523, + "step": 39329 + }, + { + "epoch": 0.6796032623721316, + "grad_norm": 0.6761652766647129, + "learning_rate": 4.9179941756793805e-06, + "loss": 0.2342, + "step": 39330 + }, + { + "epoch": 0.6796205418855406, + "grad_norm": 1.3038195395633112, + "learning_rate": 4.917512191779348e-06, + "loss": 0.4306, + "step": 39331 + }, + { + "epoch": 0.6796378213989495, + "grad_norm": 1.0699196484054325, + "learning_rate": 4.917030223798057e-06, + "loss": 0.1896, + "step": 39332 + }, + { + "epoch": 0.6796551009123583, + "grad_norm": 1.016247043847299, + "learning_rate": 4.916548271737009e-06, + "loss": 0.3903, + "step": 39333 + }, + { + "epoch": 0.6796723804257672, + "grad_norm": 0.595641981269155, + "learning_rate": 4.9160663355977204e-06, + "loss": 0.7693, + "step": 39334 + }, + { + "epoch": 0.6796896599391761, + "grad_norm": 0.9386780257440701, + "learning_rate": 4.915584415381697e-06, + "loss": 0.3442, + "step": 39335 + }, + { + "epoch": 0.679706939452585, + "grad_norm": 1.2770539655070332, + "learning_rate": 4.915102511090454e-06, + "loss": 0.5878, + "step": 39336 + }, + { + "epoch": 0.6797242189659939, + "grad_norm": 1.1084699970683067, + "learning_rate": 4.9146206227254956e-06, + "loss": 0.5704, + "step": 39337 + }, + { + "epoch": 0.6797414984794028, + "grad_norm": 0.5745896573168888, + "learning_rate": 4.914138750288329e-06, + "loss": 0.7973, + "step": 39338 + }, + { + "epoch": 0.6797587779928117, + "grad_norm": 1.4847519208455036, + "learning_rate": 4.913656893780465e-06, + "loss": 0.3432, + "step": 39339 + }, + { + "epoch": 0.6797760575062206, + "grad_norm": 1.9089030996857022, + "learning_rate": 4.913175053203415e-06, + "loss": 0.4777, + "step": 39340 + }, + { + "epoch": 0.6797933370196295, + "grad_norm": 1.8718230000273444, + "learning_rate": 4.912693228558691e-06, + "loss": 0.3189, + "step": 39341 + }, + { + "epoch": 0.6798106165330384, + "grad_norm": 1.3726513985325366, + "learning_rate": 4.912211419847795e-06, + "loss": 0.8696, + "step": 39342 + }, + { + "epoch": 0.6798278960464473, + "grad_norm": 1.299842203551648, + "learning_rate": 4.9117296270722425e-06, + "loss": 0.3509, + "step": 39343 + }, + { + "epoch": 0.6798451755598562, + "grad_norm": 1.177201711727798, + "learning_rate": 4.911247850233537e-06, + "loss": 0.422, + "step": 39344 + }, + { + "epoch": 0.6798624550732651, + "grad_norm": 1.172973094886424, + "learning_rate": 4.910766089333189e-06, + "loss": 0.2337, + "step": 39345 + }, + { + "epoch": 0.6798797345866741, + "grad_norm": 1.0953127649353442, + "learning_rate": 4.9102843443727125e-06, + "loss": 0.4099, + "step": 39346 + }, + { + "epoch": 0.679897014100083, + "grad_norm": 1.588935243137279, + "learning_rate": 4.909802615353608e-06, + "loss": 0.3305, + "step": 39347 + }, + { + "epoch": 0.6799142936134919, + "grad_norm": 1.4349909009217228, + "learning_rate": 4.909320902277393e-06, + "loss": 0.4832, + "step": 39348 + }, + { + "epoch": 0.6799315731269008, + "grad_norm": 1.266105982318943, + "learning_rate": 4.908839205145567e-06, + "loss": 0.2995, + "step": 39349 + }, + { + "epoch": 0.6799488526403097, + "grad_norm": 0.6651906049237387, + "learning_rate": 4.908357523959645e-06, + "loss": 0.2479, + "step": 39350 + }, + { + "epoch": 0.6799661321537186, + "grad_norm": 0.9800003064948385, + "learning_rate": 4.907875858721133e-06, + "loss": 0.3559, + "step": 39351 + }, + { + "epoch": 0.6799834116671275, + "grad_norm": 0.8831897971794631, + "learning_rate": 4.907394209431544e-06, + "loss": 0.3399, + "step": 39352 + }, + { + "epoch": 0.6800006911805364, + "grad_norm": 1.0582380277259311, + "learning_rate": 4.906912576092381e-06, + "loss": 0.3097, + "step": 39353 + }, + { + "epoch": 0.6800179706939452, + "grad_norm": 1.0176700665546494, + "learning_rate": 4.906430958705158e-06, + "loss": 0.3865, + "step": 39354 + }, + { + "epoch": 0.6800352502073541, + "grad_norm": 1.4319861990242388, + "learning_rate": 4.905949357271377e-06, + "loss": 0.3461, + "step": 39355 + }, + { + "epoch": 0.680052529720763, + "grad_norm": 1.1433476232157505, + "learning_rate": 4.905467771792549e-06, + "loss": 0.4883, + "step": 39356 + }, + { + "epoch": 0.6800698092341719, + "grad_norm": 0.9130381524429595, + "learning_rate": 4.904986202270188e-06, + "loss": 0.1512, + "step": 39357 + }, + { + "epoch": 0.6800870887475808, + "grad_norm": 1.4480639736189438, + "learning_rate": 4.904504648705794e-06, + "loss": 0.4774, + "step": 39358 + }, + { + "epoch": 0.6801043682609897, + "grad_norm": 0.9434627478607647, + "learning_rate": 4.90402311110088e-06, + "loss": 0.4365, + "step": 39359 + }, + { + "epoch": 0.6801216477743987, + "grad_norm": 0.7805444235321792, + "learning_rate": 4.90354158945695e-06, + "loss": 0.2859, + "step": 39360 + }, + { + "epoch": 0.6801389272878076, + "grad_norm": 1.3219663150247347, + "learning_rate": 4.90306008377552e-06, + "loss": 0.1895, + "step": 39361 + }, + { + "epoch": 0.6801562068012165, + "grad_norm": 1.650744054936351, + "learning_rate": 4.902578594058089e-06, + "loss": 0.2672, + "step": 39362 + }, + { + "epoch": 0.6801734863146254, + "grad_norm": 1.2243645690527596, + "learning_rate": 4.90209712030617e-06, + "loss": 0.3573, + "step": 39363 + }, + { + "epoch": 0.6801907658280343, + "grad_norm": 0.777748640308121, + "learning_rate": 4.901615662521268e-06, + "loss": 0.7967, + "step": 39364 + }, + { + "epoch": 0.6802080453414432, + "grad_norm": 1.9799993277589607, + "learning_rate": 4.901134220704899e-06, + "loss": 0.3859, + "step": 39365 + }, + { + "epoch": 0.6802253248548521, + "grad_norm": 0.9928151444047606, + "learning_rate": 4.90065279485856e-06, + "loss": 0.4592, + "step": 39366 + }, + { + "epoch": 0.680242604368261, + "grad_norm": 1.5429100978288912, + "learning_rate": 4.900171384983768e-06, + "loss": 0.2377, + "step": 39367 + }, + { + "epoch": 0.6802598838816699, + "grad_norm": 0.8717731725869466, + "learning_rate": 4.899689991082024e-06, + "loss": 0.4081, + "step": 39368 + }, + { + "epoch": 0.6802771633950788, + "grad_norm": 0.665340247571985, + "learning_rate": 4.899208613154838e-06, + "loss": 0.608, + "step": 39369 + }, + { + "epoch": 0.6802944429084877, + "grad_norm": 1.5085594198562875, + "learning_rate": 4.898727251203722e-06, + "loss": 0.3908, + "step": 39370 + }, + { + "epoch": 0.6803117224218966, + "grad_norm": 1.4594605392863824, + "learning_rate": 4.898245905230177e-06, + "loss": 0.5373, + "step": 39371 + }, + { + "epoch": 0.6803290019353055, + "grad_norm": 1.0325945115982111, + "learning_rate": 4.897764575235716e-06, + "loss": 0.4449, + "step": 39372 + }, + { + "epoch": 0.6803462814487145, + "grad_norm": 1.4991734181317176, + "learning_rate": 4.897283261221841e-06, + "loss": 0.3741, + "step": 39373 + }, + { + "epoch": 0.6803635609621234, + "grad_norm": 1.1293582244653768, + "learning_rate": 4.896801963190062e-06, + "loss": 0.7551, + "step": 39374 + }, + { + "epoch": 0.6803808404755322, + "grad_norm": 0.8114660800957402, + "learning_rate": 4.896320681141887e-06, + "loss": 0.3424, + "step": 39375 + }, + { + "epoch": 0.6803981199889411, + "grad_norm": 1.2151491035398634, + "learning_rate": 4.895839415078829e-06, + "loss": 0.5829, + "step": 39376 + }, + { + "epoch": 0.68041539950235, + "grad_norm": 1.6400227070827258, + "learning_rate": 4.895358165002388e-06, + "loss": 0.6052, + "step": 39377 + }, + { + "epoch": 0.6804326790157589, + "grad_norm": 1.1942096588008848, + "learning_rate": 4.89487693091407e-06, + "loss": 0.3663, + "step": 39378 + }, + { + "epoch": 0.6804499585291678, + "grad_norm": 1.3777094336502465, + "learning_rate": 4.894395712815385e-06, + "loss": 0.3221, + "step": 39379 + }, + { + "epoch": 0.6804672380425767, + "grad_norm": 1.2054821141165155, + "learning_rate": 4.893914510707842e-06, + "loss": 0.5628, + "step": 39380 + }, + { + "epoch": 0.6804845175559856, + "grad_norm": 0.6839995667813745, + "learning_rate": 4.89343332459295e-06, + "loss": 0.7395, + "step": 39381 + }, + { + "epoch": 0.6805017970693945, + "grad_norm": 1.0963479224708736, + "learning_rate": 4.8929521544722084e-06, + "loss": 0.3394, + "step": 39382 + }, + { + "epoch": 0.6805190765828034, + "grad_norm": 1.0365456761558232, + "learning_rate": 4.892471000347134e-06, + "loss": 0.3484, + "step": 39383 + }, + { + "epoch": 0.6805363560962123, + "grad_norm": 1.0736517121094105, + "learning_rate": 4.891989862219223e-06, + "loss": 0.4296, + "step": 39384 + }, + { + "epoch": 0.6805536356096212, + "grad_norm": 0.5811272379270713, + "learning_rate": 4.891508740089988e-06, + "loss": 0.5775, + "step": 39385 + }, + { + "epoch": 0.6805709151230301, + "grad_norm": 0.9648586282101805, + "learning_rate": 4.891027633960936e-06, + "loss": 0.2607, + "step": 39386 + }, + { + "epoch": 0.680588194636439, + "grad_norm": 0.7164437034988962, + "learning_rate": 4.890546543833579e-06, + "loss": 0.3779, + "step": 39387 + }, + { + "epoch": 0.680605474149848, + "grad_norm": 1.0255342686182916, + "learning_rate": 4.890065469709417e-06, + "loss": 0.4563, + "step": 39388 + }, + { + "epoch": 0.6806227536632569, + "grad_norm": 1.1768953911846363, + "learning_rate": 4.889584411589956e-06, + "loss": 0.3901, + "step": 39389 + }, + { + "epoch": 0.6806400331766658, + "grad_norm": 1.2646646947767766, + "learning_rate": 4.889103369476704e-06, + "loss": 0.1829, + "step": 39390 + }, + { + "epoch": 0.6806573126900747, + "grad_norm": 1.3547973975580951, + "learning_rate": 4.8886223433711685e-06, + "loss": 0.2957, + "step": 39391 + }, + { + "epoch": 0.6806745922034836, + "grad_norm": 1.6252920988228632, + "learning_rate": 4.88814133327486e-06, + "loss": 0.4081, + "step": 39392 + }, + { + "epoch": 0.6806918717168925, + "grad_norm": 1.3089678300313368, + "learning_rate": 4.887660339189279e-06, + "loss": 0.3736, + "step": 39393 + }, + { + "epoch": 0.6807091512303014, + "grad_norm": 0.8773920914904262, + "learning_rate": 4.8871793611159355e-06, + "loss": 0.3248, + "step": 39394 + }, + { + "epoch": 0.6807264307437103, + "grad_norm": 1.3364565348550426, + "learning_rate": 4.886698399056334e-06, + "loss": 0.3235, + "step": 39395 + }, + { + "epoch": 0.6807437102571191, + "grad_norm": 1.1250784024030225, + "learning_rate": 4.88621745301198e-06, + "loss": 0.4644, + "step": 39396 + }, + { + "epoch": 0.680760989770528, + "grad_norm": 1.4221562961146865, + "learning_rate": 4.885736522984381e-06, + "loss": 0.5538, + "step": 39397 + }, + { + "epoch": 0.6807782692839369, + "grad_norm": 0.9874262138886776, + "learning_rate": 4.885255608975049e-06, + "loss": 0.5315, + "step": 39398 + }, + { + "epoch": 0.6807955487973458, + "grad_norm": 1.2736039286993661, + "learning_rate": 4.884774710985485e-06, + "loss": 0.4702, + "step": 39399 + }, + { + "epoch": 0.6808128283107547, + "grad_norm": 1.665366718253813, + "learning_rate": 4.88429382901719e-06, + "loss": 0.3052, + "step": 39400 + }, + { + "epoch": 0.6808301078241636, + "grad_norm": 1.2730525828553063, + "learning_rate": 4.883812963071681e-06, + "loss": 0.2245, + "step": 39401 + }, + { + "epoch": 0.6808473873375726, + "grad_norm": 0.8865524791082989, + "learning_rate": 4.8833321131504545e-06, + "loss": 0.3374, + "step": 39402 + }, + { + "epoch": 0.6808646668509815, + "grad_norm": 1.0892332602438861, + "learning_rate": 4.88285127925502e-06, + "loss": 0.5104, + "step": 39403 + }, + { + "epoch": 0.6808819463643904, + "grad_norm": 1.254538033512621, + "learning_rate": 4.882370461386885e-06, + "loss": 0.419, + "step": 39404 + }, + { + "epoch": 0.6808992258777993, + "grad_norm": 0.9172224668089067, + "learning_rate": 4.881889659547557e-06, + "loss": 0.3943, + "step": 39405 + }, + { + "epoch": 0.6809165053912082, + "grad_norm": 1.096212232901997, + "learning_rate": 4.881408873738538e-06, + "loss": 0.2478, + "step": 39406 + }, + { + "epoch": 0.6809337849046171, + "grad_norm": 1.3804196612328619, + "learning_rate": 4.880928103961338e-06, + "loss": 0.3432, + "step": 39407 + }, + { + "epoch": 0.680951064418026, + "grad_norm": 1.8097171578265416, + "learning_rate": 4.880447350217456e-06, + "loss": 0.5054, + "step": 39408 + }, + { + "epoch": 0.6809683439314349, + "grad_norm": 1.4162550217082353, + "learning_rate": 4.879966612508402e-06, + "loss": 0.2548, + "step": 39409 + }, + { + "epoch": 0.6809856234448438, + "grad_norm": 1.1037611740564393, + "learning_rate": 4.879485890835687e-06, + "loss": 0.2454, + "step": 39410 + }, + { + "epoch": 0.6810029029582527, + "grad_norm": 1.181764705406419, + "learning_rate": 4.879005185200806e-06, + "loss": 0.3213, + "step": 39411 + }, + { + "epoch": 0.6810201824716616, + "grad_norm": 1.3326670847603814, + "learning_rate": 4.878524495605273e-06, + "loss": 0.3308, + "step": 39412 + }, + { + "epoch": 0.6810374619850705, + "grad_norm": 1.991876113385726, + "learning_rate": 4.878043822050589e-06, + "loss": 0.3758, + "step": 39413 + }, + { + "epoch": 0.6810547414984794, + "grad_norm": 0.8826849121746724, + "learning_rate": 4.877563164538259e-06, + "loss": 0.3198, + "step": 39414 + }, + { + "epoch": 0.6810720210118884, + "grad_norm": 0.7305310868076841, + "learning_rate": 4.877082523069792e-06, + "loss": 0.337, + "step": 39415 + }, + { + "epoch": 0.6810893005252973, + "grad_norm": 1.7244879394898123, + "learning_rate": 4.876601897646696e-06, + "loss": 0.3736, + "step": 39416 + }, + { + "epoch": 0.681106580038706, + "grad_norm": 1.239285418831202, + "learning_rate": 4.876121288270467e-06, + "loss": 0.2787, + "step": 39417 + }, + { + "epoch": 0.681123859552115, + "grad_norm": 1.556162308904668, + "learning_rate": 4.87564069494262e-06, + "loss": 0.5714, + "step": 39418 + }, + { + "epoch": 0.6811411390655239, + "grad_norm": 1.449789221024853, + "learning_rate": 4.875160117664651e-06, + "loss": 0.5952, + "step": 39419 + }, + { + "epoch": 0.6811584185789328, + "grad_norm": 1.8666156061332653, + "learning_rate": 4.874679556438071e-06, + "loss": 0.3166, + "step": 39420 + }, + { + "epoch": 0.6811756980923417, + "grad_norm": 0.9355807662379007, + "learning_rate": 4.8741990112643875e-06, + "loss": 0.3686, + "step": 39421 + }, + { + "epoch": 0.6811929776057506, + "grad_norm": 1.7634173665354889, + "learning_rate": 4.873718482145099e-06, + "loss": 0.4132, + "step": 39422 + }, + { + "epoch": 0.6812102571191595, + "grad_norm": 1.2314157452171854, + "learning_rate": 4.873237969081717e-06, + "loss": 0.3741, + "step": 39423 + }, + { + "epoch": 0.6812275366325684, + "grad_norm": 0.6929195351907435, + "learning_rate": 4.87275747207574e-06, + "loss": 0.7178, + "step": 39424 + }, + { + "epoch": 0.6812448161459773, + "grad_norm": 1.8328574763978018, + "learning_rate": 4.872276991128675e-06, + "loss": 0.3305, + "step": 39425 + }, + { + "epoch": 0.6812620956593862, + "grad_norm": 1.0036792359123132, + "learning_rate": 4.8717965262420295e-06, + "loss": 0.5102, + "step": 39426 + }, + { + "epoch": 0.6812793751727951, + "grad_norm": 1.3923215465998684, + "learning_rate": 4.87131607741731e-06, + "loss": 0.3375, + "step": 39427 + }, + { + "epoch": 0.681296654686204, + "grad_norm": 1.4252156347983669, + "learning_rate": 4.870835644656015e-06, + "loss": 0.5663, + "step": 39428 + }, + { + "epoch": 0.681313934199613, + "grad_norm": 0.7382758481428929, + "learning_rate": 4.8703552279596565e-06, + "loss": 0.3134, + "step": 39429 + }, + { + "epoch": 0.6813312137130219, + "grad_norm": 0.9858189773786297, + "learning_rate": 4.869874827329731e-06, + "loss": 0.3845, + "step": 39430 + }, + { + "epoch": 0.6813484932264308, + "grad_norm": 1.6924061227809923, + "learning_rate": 4.8693944427677476e-06, + "loss": 0.266, + "step": 39431 + }, + { + "epoch": 0.6813657727398397, + "grad_norm": 1.1020059453286593, + "learning_rate": 4.868914074275215e-06, + "loss": 0.4241, + "step": 39432 + }, + { + "epoch": 0.6813830522532486, + "grad_norm": 1.0057650410106236, + "learning_rate": 4.868433721853629e-06, + "loss": 0.3855, + "step": 39433 + }, + { + "epoch": 0.6814003317666575, + "grad_norm": 1.0090860320815622, + "learning_rate": 4.867953385504503e-06, + "loss": 0.5939, + "step": 39434 + }, + { + "epoch": 0.6814176112800664, + "grad_norm": 1.2847705407534236, + "learning_rate": 4.867473065229332e-06, + "loss": 0.4791, + "step": 39435 + }, + { + "epoch": 0.6814348907934753, + "grad_norm": 0.9857154390020483, + "learning_rate": 4.8669927610296305e-06, + "loss": 0.3825, + "step": 39436 + }, + { + "epoch": 0.6814521703068842, + "grad_norm": 1.2625281406916833, + "learning_rate": 4.8665124729068905e-06, + "loss": 0.2487, + "step": 39437 + }, + { + "epoch": 0.681469449820293, + "grad_norm": 1.3652152124374066, + "learning_rate": 4.866032200862629e-06, + "loss": 0.4268, + "step": 39438 + }, + { + "epoch": 0.6814867293337019, + "grad_norm": 1.7729300538176025, + "learning_rate": 4.865551944898347e-06, + "loss": 0.3821, + "step": 39439 + }, + { + "epoch": 0.6815040088471108, + "grad_norm": 1.4404710393641476, + "learning_rate": 4.865071705015542e-06, + "loss": 0.3381, + "step": 39440 + }, + { + "epoch": 0.6815212883605197, + "grad_norm": 0.957467837745183, + "learning_rate": 4.864591481215726e-06, + "loss": 0.3916, + "step": 39441 + }, + { + "epoch": 0.6815385678739286, + "grad_norm": 0.6300093965428594, + "learning_rate": 4.864111273500397e-06, + "loss": 0.3204, + "step": 39442 + }, + { + "epoch": 0.6815558473873375, + "grad_norm": 0.46691434341033494, + "learning_rate": 4.863631081871061e-06, + "loss": 0.755, + "step": 39443 + }, + { + "epoch": 0.6815731269007465, + "grad_norm": 0.9055467370016338, + "learning_rate": 4.863150906329223e-06, + "loss": 0.2485, + "step": 39444 + }, + { + "epoch": 0.6815904064141554, + "grad_norm": 1.4297702594657256, + "learning_rate": 4.86267074687639e-06, + "loss": 0.253, + "step": 39445 + }, + { + "epoch": 0.6816076859275643, + "grad_norm": 1.0511483049630457, + "learning_rate": 4.86219060351406e-06, + "loss": 0.5309, + "step": 39446 + }, + { + "epoch": 0.6816249654409732, + "grad_norm": 1.0599517116400545, + "learning_rate": 4.861710476243742e-06, + "loss": 0.2444, + "step": 39447 + }, + { + "epoch": 0.6816422449543821, + "grad_norm": 1.323924427995079, + "learning_rate": 4.861230365066934e-06, + "loss": 0.2589, + "step": 39448 + }, + { + "epoch": 0.681659524467791, + "grad_norm": 1.0977376154628815, + "learning_rate": 4.8607502699851425e-06, + "loss": 0.3023, + "step": 39449 + }, + { + "epoch": 0.6816768039811999, + "grad_norm": 0.930089249057641, + "learning_rate": 4.860270190999877e-06, + "loss": 0.2692, + "step": 39450 + }, + { + "epoch": 0.6816940834946088, + "grad_norm": 1.1536774415400937, + "learning_rate": 4.859790128112631e-06, + "loss": 0.2688, + "step": 39451 + }, + { + "epoch": 0.6817113630080177, + "grad_norm": 0.9888630432795932, + "learning_rate": 4.8593100813249174e-06, + "loss": 0.4473, + "step": 39452 + }, + { + "epoch": 0.6817286425214266, + "grad_norm": 1.512729545032062, + "learning_rate": 4.858830050638231e-06, + "loss": 0.6545, + "step": 39453 + }, + { + "epoch": 0.6817459220348355, + "grad_norm": 1.2350353243964094, + "learning_rate": 4.858350036054079e-06, + "loss": 0.5195, + "step": 39454 + }, + { + "epoch": 0.6817632015482444, + "grad_norm": 1.3004993048254683, + "learning_rate": 4.857870037573967e-06, + "loss": 0.3876, + "step": 39455 + }, + { + "epoch": 0.6817804810616533, + "grad_norm": 1.3534803873696681, + "learning_rate": 4.8573900551993995e-06, + "loss": 0.2699, + "step": 39456 + }, + { + "epoch": 0.6817977605750623, + "grad_norm": 0.977563632525012, + "learning_rate": 4.856910088931873e-06, + "loss": 0.7243, + "step": 39457 + }, + { + "epoch": 0.6818150400884712, + "grad_norm": 1.1330842171215867, + "learning_rate": 4.856430138772899e-06, + "loss": 0.4139, + "step": 39458 + }, + { + "epoch": 0.6818323196018801, + "grad_norm": 1.0066049327307098, + "learning_rate": 4.855950204723973e-06, + "loss": 0.2066, + "step": 39459 + }, + { + "epoch": 0.6818495991152889, + "grad_norm": 1.2937065267947896, + "learning_rate": 4.8554702867866034e-06, + "loss": 0.3785, + "step": 39460 + }, + { + "epoch": 0.6818668786286978, + "grad_norm": 1.3199956831982746, + "learning_rate": 4.854990384962295e-06, + "loss": 0.2231, + "step": 39461 + }, + { + "epoch": 0.6818841581421067, + "grad_norm": 1.2955865175016172, + "learning_rate": 4.854510499252543e-06, + "loss": 0.3274, + "step": 39462 + }, + { + "epoch": 0.6819014376555156, + "grad_norm": 1.3707675031288065, + "learning_rate": 4.854030629658859e-06, + "loss": 0.2762, + "step": 39463 + }, + { + "epoch": 0.6819187171689245, + "grad_norm": 0.7910608425068443, + "learning_rate": 4.853550776182739e-06, + "loss": 0.4037, + "step": 39464 + }, + { + "epoch": 0.6819359966823334, + "grad_norm": 1.2319384694055355, + "learning_rate": 4.853070938825688e-06, + "loss": 0.3391, + "step": 39465 + }, + { + "epoch": 0.6819532761957423, + "grad_norm": 0.9639778029280135, + "learning_rate": 4.85259111758921e-06, + "loss": 0.2373, + "step": 39466 + }, + { + "epoch": 0.6819705557091512, + "grad_norm": 1.0544031402111782, + "learning_rate": 4.852111312474813e-06, + "loss": 0.4011, + "step": 39467 + }, + { + "epoch": 0.6819878352225601, + "grad_norm": 1.0006436123518578, + "learning_rate": 4.851631523483991e-06, + "loss": 0.3866, + "step": 39468 + }, + { + "epoch": 0.682005114735969, + "grad_norm": 1.6648042882290186, + "learning_rate": 4.851151750618253e-06, + "loss": 0.4196, + "step": 39469 + }, + { + "epoch": 0.6820223942493779, + "grad_norm": 1.077220099118443, + "learning_rate": 4.850671993879094e-06, + "loss": 0.4229, + "step": 39470 + }, + { + "epoch": 0.6820396737627868, + "grad_norm": 1.3149339628139218, + "learning_rate": 4.8501922532680244e-06, + "loss": 0.342, + "step": 39471 + }, + { + "epoch": 0.6820569532761958, + "grad_norm": 1.5645267478756866, + "learning_rate": 4.849712528786546e-06, + "loss": 0.3961, + "step": 39472 + }, + { + "epoch": 0.6820742327896047, + "grad_norm": 1.6414479281597039, + "learning_rate": 4.849232820436156e-06, + "loss": 0.3734, + "step": 39473 + }, + { + "epoch": 0.6820915123030136, + "grad_norm": 1.3856419895738548, + "learning_rate": 4.8487531282183645e-06, + "loss": 0.36, + "step": 39474 + }, + { + "epoch": 0.6821087918164225, + "grad_norm": 1.2137125237299904, + "learning_rate": 4.848273452134665e-06, + "loss": 0.3765, + "step": 39475 + }, + { + "epoch": 0.6821260713298314, + "grad_norm": 1.2231578996883345, + "learning_rate": 4.847793792186569e-06, + "loss": 0.4451, + "step": 39476 + }, + { + "epoch": 0.6821433508432403, + "grad_norm": 0.7048385877331765, + "learning_rate": 4.847314148375567e-06, + "loss": 0.6161, + "step": 39477 + }, + { + "epoch": 0.6821606303566492, + "grad_norm": 1.6055015003428579, + "learning_rate": 4.846834520703176e-06, + "loss": 0.604, + "step": 39478 + }, + { + "epoch": 0.6821779098700581, + "grad_norm": 1.0455166865078107, + "learning_rate": 4.846354909170887e-06, + "loss": 0.2993, + "step": 39479 + }, + { + "epoch": 0.682195189383467, + "grad_norm": 1.1499640623434622, + "learning_rate": 4.845875313780211e-06, + "loss": 0.2864, + "step": 39480 + }, + { + "epoch": 0.6822124688968758, + "grad_norm": 1.020490474146262, + "learning_rate": 4.845395734532643e-06, + "loss": 0.56, + "step": 39481 + }, + { + "epoch": 0.6822297484102847, + "grad_norm": 1.3218483126128409, + "learning_rate": 4.844916171429684e-06, + "loss": 0.394, + "step": 39482 + }, + { + "epoch": 0.6822470279236936, + "grad_norm": 1.4489615754841139, + "learning_rate": 4.844436624472841e-06, + "loss": 0.3801, + "step": 39483 + }, + { + "epoch": 0.6822643074371025, + "grad_norm": 0.912726926365882, + "learning_rate": 4.843957093663614e-06, + "loss": 0.2704, + "step": 39484 + }, + { + "epoch": 0.6822815869505114, + "grad_norm": 0.7591777801185119, + "learning_rate": 4.843477579003507e-06, + "loss": 0.8073, + "step": 39485 + }, + { + "epoch": 0.6822988664639204, + "grad_norm": 1.2027033203405324, + "learning_rate": 4.842998080494018e-06, + "loss": 0.3546, + "step": 39486 + }, + { + "epoch": 0.6823161459773293, + "grad_norm": 1.082233605145492, + "learning_rate": 4.842518598136654e-06, + "loss": 0.2066, + "step": 39487 + }, + { + "epoch": 0.6823334254907382, + "grad_norm": 1.274177032864555, + "learning_rate": 4.84203913193291e-06, + "loss": 0.3887, + "step": 39488 + }, + { + "epoch": 0.6823507050041471, + "grad_norm": 1.1044897710785955, + "learning_rate": 4.841559681884292e-06, + "loss": 0.3691, + "step": 39489 + }, + { + "epoch": 0.682367984517556, + "grad_norm": 0.8575936283830516, + "learning_rate": 4.841080247992304e-06, + "loss": 0.3865, + "step": 39490 + }, + { + "epoch": 0.6823852640309649, + "grad_norm": 1.5990577834119717, + "learning_rate": 4.840600830258443e-06, + "loss": 0.3953, + "step": 39491 + }, + { + "epoch": 0.6824025435443738, + "grad_norm": 0.975695919923796, + "learning_rate": 4.8401214286842144e-06, + "loss": 0.2959, + "step": 39492 + }, + { + "epoch": 0.6824198230577827, + "grad_norm": 1.3100153919006934, + "learning_rate": 4.839642043271114e-06, + "loss": 0.2801, + "step": 39493 + }, + { + "epoch": 0.6824371025711916, + "grad_norm": 3.007601926578068, + "learning_rate": 4.8391626740206465e-06, + "loss": 0.5878, + "step": 39494 + }, + { + "epoch": 0.6824543820846005, + "grad_norm": 0.9531672789668677, + "learning_rate": 4.838683320934314e-06, + "loss": 0.5216, + "step": 39495 + }, + { + "epoch": 0.6824716615980094, + "grad_norm": 1.1555835116774489, + "learning_rate": 4.838203984013622e-06, + "loss": 0.3745, + "step": 39496 + }, + { + "epoch": 0.6824889411114183, + "grad_norm": 1.5092244129820342, + "learning_rate": 4.8377246632600625e-06, + "loss": 0.356, + "step": 39497 + }, + { + "epoch": 0.6825062206248272, + "grad_norm": 2.057842246957608, + "learning_rate": 4.837245358675147e-06, + "loss": 0.5962, + "step": 39498 + }, + { + "epoch": 0.6825235001382361, + "grad_norm": 1.9539515207239442, + "learning_rate": 4.836766070260367e-06, + "loss": 0.3974, + "step": 39499 + }, + { + "epoch": 0.6825407796516451, + "grad_norm": 0.7779804264891728, + "learning_rate": 4.8362867980172265e-06, + "loss": 0.5226, + "step": 39500 + }, + { + "epoch": 0.682558059165054, + "grad_norm": 1.3506510204903708, + "learning_rate": 4.835807541947234e-06, + "loss": 0.4428, + "step": 39501 + }, + { + "epoch": 0.6825753386784628, + "grad_norm": 1.4372331553813722, + "learning_rate": 4.83532830205188e-06, + "loss": 0.2984, + "step": 39502 + }, + { + "epoch": 0.6825926181918717, + "grad_norm": 1.6064818820007993, + "learning_rate": 4.834849078332674e-06, + "loss": 0.2924, + "step": 39503 + }, + { + "epoch": 0.6826098977052806, + "grad_norm": 1.2960952371335608, + "learning_rate": 4.83436987079111e-06, + "loss": 0.5149, + "step": 39504 + }, + { + "epoch": 0.6826271772186895, + "grad_norm": 0.8156641305683784, + "learning_rate": 4.833890679428692e-06, + "loss": 0.4008, + "step": 39505 + }, + { + "epoch": 0.6826444567320984, + "grad_norm": 0.8631118220403995, + "learning_rate": 4.833411504246921e-06, + "loss": 0.2585, + "step": 39506 + }, + { + "epoch": 0.6826617362455073, + "grad_norm": 1.58792923618533, + "learning_rate": 4.832932345247301e-06, + "loss": 0.3761, + "step": 39507 + }, + { + "epoch": 0.6826790157589162, + "grad_norm": 0.978814402582162, + "learning_rate": 4.832453202431326e-06, + "loss": 0.3801, + "step": 39508 + }, + { + "epoch": 0.6826962952723251, + "grad_norm": 0.84079476350671, + "learning_rate": 4.831974075800504e-06, + "loss": 0.5761, + "step": 39509 + }, + { + "epoch": 0.682713574785734, + "grad_norm": 0.9113093312646334, + "learning_rate": 4.8314949653563295e-06, + "loss": 0.4101, + "step": 39510 + }, + { + "epoch": 0.6827308542991429, + "grad_norm": 1.1146445614143257, + "learning_rate": 4.8310158711003044e-06, + "loss": 0.4799, + "step": 39511 + }, + { + "epoch": 0.6827481338125518, + "grad_norm": 1.0135091686574411, + "learning_rate": 4.830536793033934e-06, + "loss": 0.3478, + "step": 39512 + }, + { + "epoch": 0.6827654133259607, + "grad_norm": 1.0910648130170317, + "learning_rate": 4.830057731158712e-06, + "loss": 0.5952, + "step": 39513 + }, + { + "epoch": 0.6827826928393697, + "grad_norm": 1.5022106276060763, + "learning_rate": 4.829578685476145e-06, + "loss": 0.5049, + "step": 39514 + }, + { + "epoch": 0.6827999723527786, + "grad_norm": 1.5005064180540282, + "learning_rate": 4.829099655987728e-06, + "loss": 0.3484, + "step": 39515 + }, + { + "epoch": 0.6828172518661875, + "grad_norm": 0.6876189998777639, + "learning_rate": 4.828620642694967e-06, + "loss": 0.4236, + "step": 39516 + }, + { + "epoch": 0.6828345313795964, + "grad_norm": 1.2204028865315057, + "learning_rate": 4.828141645599351e-06, + "loss": 0.3439, + "step": 39517 + }, + { + "epoch": 0.6828518108930053, + "grad_norm": 1.3868127594642223, + "learning_rate": 4.8276626647023975e-06, + "loss": 0.5925, + "step": 39518 + }, + { + "epoch": 0.6828690904064142, + "grad_norm": 0.6571611149341878, + "learning_rate": 4.827183700005592e-06, + "loss": 0.3229, + "step": 39519 + }, + { + "epoch": 0.6828863699198231, + "grad_norm": 1.0410458377881544, + "learning_rate": 4.826704751510445e-06, + "loss": 0.2625, + "step": 39520 + }, + { + "epoch": 0.682903649433232, + "grad_norm": 1.1314170692964474, + "learning_rate": 4.826225819218452e-06, + "loss": 0.2415, + "step": 39521 + }, + { + "epoch": 0.6829209289466409, + "grad_norm": 1.247255388175169, + "learning_rate": 4.825746903131109e-06, + "loss": 0.4507, + "step": 39522 + }, + { + "epoch": 0.6829382084600497, + "grad_norm": 1.632439412648102, + "learning_rate": 4.82526800324992e-06, + "loss": 0.4055, + "step": 39523 + }, + { + "epoch": 0.6829554879734586, + "grad_norm": 1.50805159637088, + "learning_rate": 4.824789119576384e-06, + "loss": 0.3329, + "step": 39524 + }, + { + "epoch": 0.6829727674868675, + "grad_norm": 1.0069910691957915, + "learning_rate": 4.824310252112007e-06, + "loss": 0.4557, + "step": 39525 + }, + { + "epoch": 0.6829900470002764, + "grad_norm": 1.2501142426604674, + "learning_rate": 4.823831400858279e-06, + "loss": 0.2797, + "step": 39526 + }, + { + "epoch": 0.6830073265136853, + "grad_norm": 1.0258353216202454, + "learning_rate": 4.823352565816708e-06, + "loss": 0.3107, + "step": 39527 + }, + { + "epoch": 0.6830246060270942, + "grad_norm": 1.067915186799217, + "learning_rate": 4.822873746988788e-06, + "loss": 0.3528, + "step": 39528 + }, + { + "epoch": 0.6830418855405032, + "grad_norm": 0.8493963192982226, + "learning_rate": 4.822394944376018e-06, + "loss": 0.2668, + "step": 39529 + }, + { + "epoch": 0.6830591650539121, + "grad_norm": 1.2637929812656428, + "learning_rate": 4.821916157979903e-06, + "loss": 0.3668, + "step": 39530 + }, + { + "epoch": 0.683076444567321, + "grad_norm": 0.9616194427133119, + "learning_rate": 4.821437387801942e-06, + "loss": 0.3224, + "step": 39531 + }, + { + "epoch": 0.6830937240807299, + "grad_norm": 1.6830943782244736, + "learning_rate": 4.820958633843634e-06, + "loss": 0.467, + "step": 39532 + }, + { + "epoch": 0.6831110035941388, + "grad_norm": 1.091441282878344, + "learning_rate": 4.8204798961064735e-06, + "loss": 0.2844, + "step": 39533 + }, + { + "epoch": 0.6831282831075477, + "grad_norm": 1.171357005922072, + "learning_rate": 4.820001174591964e-06, + "loss": 0.4914, + "step": 39534 + }, + { + "epoch": 0.6831455626209566, + "grad_norm": 2.0716738235517784, + "learning_rate": 4.819522469301603e-06, + "loss": 0.4767, + "step": 39535 + }, + { + "epoch": 0.6831628421343655, + "grad_norm": 0.9766482765247647, + "learning_rate": 4.819043780236896e-06, + "loss": 0.5732, + "step": 39536 + }, + { + "epoch": 0.6831801216477744, + "grad_norm": 1.5494724086816531, + "learning_rate": 4.818565107399333e-06, + "loss": 0.484, + "step": 39537 + }, + { + "epoch": 0.6831974011611833, + "grad_norm": 1.4090716697153978, + "learning_rate": 4.818086450790423e-06, + "loss": 0.4099, + "step": 39538 + }, + { + "epoch": 0.6832146806745922, + "grad_norm": 1.3035558113993613, + "learning_rate": 4.817607810411655e-06, + "loss": 0.3569, + "step": 39539 + }, + { + "epoch": 0.6832319601880011, + "grad_norm": 1.3111139667948322, + "learning_rate": 4.817129186264534e-06, + "loss": 0.5032, + "step": 39540 + }, + { + "epoch": 0.68324923970141, + "grad_norm": 1.1143808865095133, + "learning_rate": 4.816650578350558e-06, + "loss": 0.3691, + "step": 39541 + }, + { + "epoch": 0.683266519214819, + "grad_norm": 1.8295715890695743, + "learning_rate": 4.81617198667123e-06, + "loss": 0.4616, + "step": 39542 + }, + { + "epoch": 0.6832837987282279, + "grad_norm": 1.0591934492664585, + "learning_rate": 4.815693411228045e-06, + "loss": 0.327, + "step": 39543 + }, + { + "epoch": 0.6833010782416367, + "grad_norm": 1.0434592323996648, + "learning_rate": 4.8152148520225e-06, + "loss": 0.289, + "step": 39544 + }, + { + "epoch": 0.6833183577550456, + "grad_norm": 0.9530334760341629, + "learning_rate": 4.814736309056094e-06, + "loss": 0.2823, + "step": 39545 + }, + { + "epoch": 0.6833356372684545, + "grad_norm": 0.9300027249768081, + "learning_rate": 4.8142577823303295e-06, + "loss": 0.6574, + "step": 39546 + }, + { + "epoch": 0.6833529167818634, + "grad_norm": 0.9325969672497365, + "learning_rate": 4.813779271846705e-06, + "loss": 0.315, + "step": 39547 + }, + { + "epoch": 0.6833701962952723, + "grad_norm": 1.2616194323614294, + "learning_rate": 4.813300777606718e-06, + "loss": 0.3508, + "step": 39548 + }, + { + "epoch": 0.6833874758086812, + "grad_norm": 1.2379372171820922, + "learning_rate": 4.812822299611868e-06, + "loss": 0.3409, + "step": 39549 + }, + { + "epoch": 0.6834047553220901, + "grad_norm": 1.872778487425256, + "learning_rate": 4.81234383786365e-06, + "loss": 0.505, + "step": 39550 + }, + { + "epoch": 0.683422034835499, + "grad_norm": 0.876317456854184, + "learning_rate": 4.811865392363566e-06, + "loss": 0.383, + "step": 39551 + }, + { + "epoch": 0.6834393143489079, + "grad_norm": 1.4761896055372143, + "learning_rate": 4.811386963113116e-06, + "loss": 0.3681, + "step": 39552 + }, + { + "epoch": 0.6834565938623168, + "grad_norm": 1.9071981924188788, + "learning_rate": 4.8109085501137945e-06, + "loss": 0.209, + "step": 39553 + }, + { + "epoch": 0.6834738733757257, + "grad_norm": 1.3343688928081747, + "learning_rate": 4.810430153367104e-06, + "loss": 0.4517, + "step": 39554 + }, + { + "epoch": 0.6834911528891346, + "grad_norm": 1.8400344837316198, + "learning_rate": 4.8099517728745375e-06, + "loss": 0.2963, + "step": 39555 + }, + { + "epoch": 0.6835084324025436, + "grad_norm": 1.1534138703073746, + "learning_rate": 4.8094734086376e-06, + "loss": 0.4299, + "step": 39556 + }, + { + "epoch": 0.6835257119159525, + "grad_norm": 1.863798472756182, + "learning_rate": 4.80899506065778e-06, + "loss": 0.3443, + "step": 39557 + }, + { + "epoch": 0.6835429914293614, + "grad_norm": 1.1701387038078255, + "learning_rate": 4.8085167289365875e-06, + "loss": 0.3986, + "step": 39558 + }, + { + "epoch": 0.6835602709427703, + "grad_norm": 1.5452627240929782, + "learning_rate": 4.808038413475512e-06, + "loss": 0.4271, + "step": 39559 + }, + { + "epoch": 0.6835775504561792, + "grad_norm": 1.0579201549240955, + "learning_rate": 4.807560114276059e-06, + "loss": 0.3827, + "step": 39560 + }, + { + "epoch": 0.6835948299695881, + "grad_norm": 0.9830813940365932, + "learning_rate": 4.807081831339719e-06, + "loss": 0.5992, + "step": 39561 + }, + { + "epoch": 0.683612109482997, + "grad_norm": 1.2288936192305575, + "learning_rate": 4.806603564667996e-06, + "loss": 0.3533, + "step": 39562 + }, + { + "epoch": 0.6836293889964059, + "grad_norm": 1.419069177356163, + "learning_rate": 4.806125314262382e-06, + "loss": 0.4185, + "step": 39563 + }, + { + "epoch": 0.6836466685098148, + "grad_norm": 1.2380626181647423, + "learning_rate": 4.805647080124378e-06, + "loss": 0.3662, + "step": 39564 + }, + { + "epoch": 0.6836639480232236, + "grad_norm": 1.1701517135691293, + "learning_rate": 4.805168862255487e-06, + "loss": 0.3769, + "step": 39565 + }, + { + "epoch": 0.6836812275366325, + "grad_norm": 1.5202067327082627, + "learning_rate": 4.804690660657197e-06, + "loss": 0.4636, + "step": 39566 + }, + { + "epoch": 0.6836985070500414, + "grad_norm": 1.205939548933244, + "learning_rate": 4.804212475331015e-06, + "loss": 0.4207, + "step": 39567 + }, + { + "epoch": 0.6837157865634503, + "grad_norm": 1.0206539770987586, + "learning_rate": 4.80373430627843e-06, + "loss": 0.473, + "step": 39568 + }, + { + "epoch": 0.6837330660768592, + "grad_norm": 1.1193142151746152, + "learning_rate": 4.8032561535009445e-06, + "loss": 0.3155, + "step": 39569 + }, + { + "epoch": 0.6837503455902681, + "grad_norm": 1.405868374918926, + "learning_rate": 4.802778017000056e-06, + "loss": 0.384, + "step": 39570 + }, + { + "epoch": 0.6837676251036771, + "grad_norm": 1.553276464117219, + "learning_rate": 4.802299896777265e-06, + "loss": 0.5803, + "step": 39571 + }, + { + "epoch": 0.683784904617086, + "grad_norm": 1.4467421593054715, + "learning_rate": 4.801821792834063e-06, + "loss": 0.2459, + "step": 39572 + }, + { + "epoch": 0.6838021841304949, + "grad_norm": 0.9872138441826372, + "learning_rate": 4.8013437051719525e-06, + "loss": 0.3929, + "step": 39573 + }, + { + "epoch": 0.6838194636439038, + "grad_norm": 1.204624707605274, + "learning_rate": 4.800865633792425e-06, + "loss": 0.249, + "step": 39574 + }, + { + "epoch": 0.6838367431573127, + "grad_norm": 1.2930411182574675, + "learning_rate": 4.800387578696982e-06, + "loss": 0.2927, + "step": 39575 + }, + { + "epoch": 0.6838540226707216, + "grad_norm": 1.4005712999193758, + "learning_rate": 4.799909539887124e-06, + "loss": 0.2609, + "step": 39576 + }, + { + "epoch": 0.6838713021841305, + "grad_norm": 1.4167146408450193, + "learning_rate": 4.79943151736434e-06, + "loss": 0.328, + "step": 39577 + }, + { + "epoch": 0.6838885816975394, + "grad_norm": 1.1570146772798826, + "learning_rate": 4.798953511130137e-06, + "loss": 0.3103, + "step": 39578 + }, + { + "epoch": 0.6839058612109483, + "grad_norm": 0.975270188994955, + "learning_rate": 4.798475521186001e-06, + "loss": 0.4732, + "step": 39579 + }, + { + "epoch": 0.6839231407243572, + "grad_norm": 0.962995687313806, + "learning_rate": 4.797997547533437e-06, + "loss": 0.2811, + "step": 39580 + }, + { + "epoch": 0.6839404202377661, + "grad_norm": 1.2669569859425185, + "learning_rate": 4.797519590173939e-06, + "loss": 0.415, + "step": 39581 + }, + { + "epoch": 0.683957699751175, + "grad_norm": 1.1049580588735273, + "learning_rate": 4.797041649109009e-06, + "loss": 0.3617, + "step": 39582 + }, + { + "epoch": 0.683974979264584, + "grad_norm": 2.532141903888675, + "learning_rate": 4.79656372434014e-06, + "loss": 0.2814, + "step": 39583 + }, + { + "epoch": 0.6839922587779929, + "grad_norm": 1.2098943443264358, + "learning_rate": 4.796085815868825e-06, + "loss": 0.5339, + "step": 39584 + }, + { + "epoch": 0.6840095382914018, + "grad_norm": 0.9680238583587518, + "learning_rate": 4.795607923696565e-06, + "loss": 0.3927, + "step": 39585 + }, + { + "epoch": 0.6840268178048107, + "grad_norm": 1.1572898345620195, + "learning_rate": 4.795130047824856e-06, + "loss": 0.3684, + "step": 39586 + }, + { + "epoch": 0.6840440973182195, + "grad_norm": 0.7361375249165657, + "learning_rate": 4.794652188255199e-06, + "loss": 0.5367, + "step": 39587 + }, + { + "epoch": 0.6840613768316284, + "grad_norm": 0.9241538744843086, + "learning_rate": 4.794174344989083e-06, + "loss": 0.3564, + "step": 39588 + }, + { + "epoch": 0.6840786563450373, + "grad_norm": 0.615564149489196, + "learning_rate": 4.793696518028014e-06, + "loss": 0.5571, + "step": 39589 + }, + { + "epoch": 0.6840959358584462, + "grad_norm": 0.8076110969585915, + "learning_rate": 4.7932187073734775e-06, + "loss": 0.4332, + "step": 39590 + }, + { + "epoch": 0.6841132153718551, + "grad_norm": 1.1683580700334524, + "learning_rate": 4.792740913026977e-06, + "loss": 0.5564, + "step": 39591 + }, + { + "epoch": 0.684130494885264, + "grad_norm": 1.8716516666141543, + "learning_rate": 4.792263134990006e-06, + "loss": 0.3235, + "step": 39592 + }, + { + "epoch": 0.6841477743986729, + "grad_norm": 1.998242304748844, + "learning_rate": 4.791785373264067e-06, + "loss": 0.3436, + "step": 39593 + }, + { + "epoch": 0.6841650539120818, + "grad_norm": 1.546293113896361, + "learning_rate": 4.791307627850653e-06, + "loss": 0.5693, + "step": 39594 + }, + { + "epoch": 0.6841823334254907, + "grad_norm": 1.4842861840709125, + "learning_rate": 4.790829898751256e-06, + "loss": 0.386, + "step": 39595 + }, + { + "epoch": 0.6841996129388996, + "grad_norm": 1.2665448002206436, + "learning_rate": 4.790352185967378e-06, + "loss": 0.3315, + "step": 39596 + }, + { + "epoch": 0.6842168924523085, + "grad_norm": 1.1130162371194705, + "learning_rate": 4.789874489500506e-06, + "loss": 0.3569, + "step": 39597 + }, + { + "epoch": 0.6842341719657175, + "grad_norm": 0.958984565996167, + "learning_rate": 4.7893968093521506e-06, + "loss": 0.5054, + "step": 39598 + }, + { + "epoch": 0.6842514514791264, + "grad_norm": 1.2709747514744125, + "learning_rate": 4.788919145523796e-06, + "loss": 0.4751, + "step": 39599 + }, + { + "epoch": 0.6842687309925353, + "grad_norm": 1.1728146739643637, + "learning_rate": 4.788441498016947e-06, + "loss": 0.439, + "step": 39600 + }, + { + "epoch": 0.6842860105059442, + "grad_norm": 1.3261376706472556, + "learning_rate": 4.7879638668330905e-06, + "loss": 0.6108, + "step": 39601 + }, + { + "epoch": 0.6843032900193531, + "grad_norm": 1.156340380216814, + "learning_rate": 4.787486251973733e-06, + "loss": 0.3927, + "step": 39602 + }, + { + "epoch": 0.684320569532762, + "grad_norm": 0.9408442511048527, + "learning_rate": 4.7870086534403555e-06, + "loss": 0.3608, + "step": 39603 + }, + { + "epoch": 0.6843378490461709, + "grad_norm": 1.248628234418207, + "learning_rate": 4.786531071234472e-06, + "loss": 0.2628, + "step": 39604 + }, + { + "epoch": 0.6843551285595798, + "grad_norm": 0.8438799480796032, + "learning_rate": 4.786053505357568e-06, + "loss": 0.5066, + "step": 39605 + }, + { + "epoch": 0.6843724080729887, + "grad_norm": 0.8498875502872858, + "learning_rate": 4.785575955811138e-06, + "loss": 0.3101, + "step": 39606 + }, + { + "epoch": 0.6843896875863976, + "grad_norm": 1.6175589589397432, + "learning_rate": 4.785098422596684e-06, + "loss": 0.2884, + "step": 39607 + }, + { + "epoch": 0.6844069670998064, + "grad_norm": 1.381403603746052, + "learning_rate": 4.7846209057156935e-06, + "loss": 0.3783, + "step": 39608 + }, + { + "epoch": 0.6844242466132153, + "grad_norm": 1.2541733229613459, + "learning_rate": 4.784143405169667e-06, + "loss": 0.5208, + "step": 39609 + }, + { + "epoch": 0.6844415261266242, + "grad_norm": 1.4727918663803012, + "learning_rate": 4.7836659209601e-06, + "loss": 0.4717, + "step": 39610 + }, + { + "epoch": 0.6844588056400331, + "grad_norm": 1.866374812803461, + "learning_rate": 4.783188453088491e-06, + "loss": 0.3003, + "step": 39611 + }, + { + "epoch": 0.684476085153442, + "grad_norm": 0.9817785049273459, + "learning_rate": 4.782711001556329e-06, + "loss": 0.2791, + "step": 39612 + }, + { + "epoch": 0.684493364666851, + "grad_norm": 0.93867824875838, + "learning_rate": 4.782233566365117e-06, + "loss": 0.3892, + "step": 39613 + }, + { + "epoch": 0.6845106441802599, + "grad_norm": 1.310164322100638, + "learning_rate": 4.781756147516341e-06, + "loss": 0.1647, + "step": 39614 + }, + { + "epoch": 0.6845279236936688, + "grad_norm": 0.8698959818082779, + "learning_rate": 4.781278745011502e-06, + "loss": 0.2382, + "step": 39615 + }, + { + "epoch": 0.6845452032070777, + "grad_norm": 1.0586644270609664, + "learning_rate": 4.7808013588520974e-06, + "loss": 0.3183, + "step": 39616 + }, + { + "epoch": 0.6845624827204866, + "grad_norm": 1.5056524220003373, + "learning_rate": 4.780323989039617e-06, + "loss": 0.5247, + "step": 39617 + }, + { + "epoch": 0.6845797622338955, + "grad_norm": 0.7162315953881726, + "learning_rate": 4.779846635575562e-06, + "loss": 0.8752, + "step": 39618 + }, + { + "epoch": 0.6845970417473044, + "grad_norm": 1.6980115389266084, + "learning_rate": 4.77936929846142e-06, + "loss": 0.5623, + "step": 39619 + }, + { + "epoch": 0.6846143212607133, + "grad_norm": 0.7264547324294514, + "learning_rate": 4.77889197769869e-06, + "loss": 0.3121, + "step": 39620 + }, + { + "epoch": 0.6846316007741222, + "grad_norm": 0.7867351071904943, + "learning_rate": 4.778414673288868e-06, + "loss": 0.322, + "step": 39621 + }, + { + "epoch": 0.6846488802875311, + "grad_norm": 1.1934576794700262, + "learning_rate": 4.777937385233451e-06, + "loss": 0.2727, + "step": 39622 + }, + { + "epoch": 0.68466615980094, + "grad_norm": 1.6964925518049028, + "learning_rate": 4.777460113533927e-06, + "loss": 0.3897, + "step": 39623 + }, + { + "epoch": 0.6846834393143489, + "grad_norm": 1.395452769088551, + "learning_rate": 4.7769828581918e-06, + "loss": 0.3636, + "step": 39624 + }, + { + "epoch": 0.6847007188277578, + "grad_norm": 0.9829990097670448, + "learning_rate": 4.776505619208555e-06, + "loss": 0.435, + "step": 39625 + }, + { + "epoch": 0.6847179983411668, + "grad_norm": 1.4268085128162014, + "learning_rate": 4.77602839658569e-06, + "loss": 0.6348, + "step": 39626 + }, + { + "epoch": 0.6847352778545757, + "grad_norm": 1.6519519364339266, + "learning_rate": 4.7755511903247065e-06, + "loss": 0.1896, + "step": 39627 + }, + { + "epoch": 0.6847525573679846, + "grad_norm": 1.4864716677861918, + "learning_rate": 4.77507400042709e-06, + "loss": 0.4245, + "step": 39628 + }, + { + "epoch": 0.6847698368813934, + "grad_norm": 0.9978058496679154, + "learning_rate": 4.774596826894342e-06, + "loss": 0.4649, + "step": 39629 + }, + { + "epoch": 0.6847871163948023, + "grad_norm": 1.2551089700915776, + "learning_rate": 4.77411966972795e-06, + "loss": 0.3992, + "step": 39630 + }, + { + "epoch": 0.6848043959082112, + "grad_norm": 1.6321565756522607, + "learning_rate": 4.773642528929413e-06, + "loss": 0.2631, + "step": 39631 + }, + { + "epoch": 0.6848216754216201, + "grad_norm": 0.9902053266284554, + "learning_rate": 4.773165404500224e-06, + "loss": 0.1917, + "step": 39632 + }, + { + "epoch": 0.684838954935029, + "grad_norm": 1.227565839498797, + "learning_rate": 4.772688296441882e-06, + "loss": 0.3039, + "step": 39633 + }, + { + "epoch": 0.6848562344484379, + "grad_norm": 1.2608876583125637, + "learning_rate": 4.772211204755875e-06, + "loss": 0.483, + "step": 39634 + }, + { + "epoch": 0.6848735139618468, + "grad_norm": 1.0800002945236702, + "learning_rate": 4.771734129443701e-06, + "loss": 0.2636, + "step": 39635 + }, + { + "epoch": 0.6848907934752557, + "grad_norm": 1.3588745635292805, + "learning_rate": 4.771257070506856e-06, + "loss": 0.3904, + "step": 39636 + }, + { + "epoch": 0.6849080729886646, + "grad_norm": 0.9893197525678814, + "learning_rate": 4.770780027946822e-06, + "loss": 0.4872, + "step": 39637 + }, + { + "epoch": 0.6849253525020735, + "grad_norm": 1.2067958604901556, + "learning_rate": 4.77030300176511e-06, + "loss": 0.5513, + "step": 39638 + }, + { + "epoch": 0.6849426320154824, + "grad_norm": 1.0655951246708484, + "learning_rate": 4.769825991963203e-06, + "loss": 0.3656, + "step": 39639 + }, + { + "epoch": 0.6849599115288914, + "grad_norm": 1.2869722230031932, + "learning_rate": 4.769348998542602e-06, + "loss": 0.5436, + "step": 39640 + }, + { + "epoch": 0.6849771910423003, + "grad_norm": 1.2416850068036007, + "learning_rate": 4.768872021504795e-06, + "loss": 0.239, + "step": 39641 + }, + { + "epoch": 0.6849944705557092, + "grad_norm": 2.352701079311388, + "learning_rate": 4.768395060851279e-06, + "loss": 0.4116, + "step": 39642 + }, + { + "epoch": 0.6850117500691181, + "grad_norm": 1.7680660528671805, + "learning_rate": 4.767918116583543e-06, + "loss": 0.3654, + "step": 39643 + }, + { + "epoch": 0.685029029582527, + "grad_norm": 0.8559341986358343, + "learning_rate": 4.767441188703092e-06, + "loss": 0.3954, + "step": 39644 + }, + { + "epoch": 0.6850463090959359, + "grad_norm": 0.9202127124649793, + "learning_rate": 4.766964277211411e-06, + "loss": 0.4757, + "step": 39645 + }, + { + "epoch": 0.6850635886093448, + "grad_norm": 0.8808831320877781, + "learning_rate": 4.766487382109993e-06, + "loss": 0.4065, + "step": 39646 + }, + { + "epoch": 0.6850808681227537, + "grad_norm": 1.5054171456240015, + "learning_rate": 4.766010503400338e-06, + "loss": 0.3082, + "step": 39647 + }, + { + "epoch": 0.6850981476361626, + "grad_norm": 1.3995397156192713, + "learning_rate": 4.765533641083932e-06, + "loss": 0.3376, + "step": 39648 + }, + { + "epoch": 0.6851154271495715, + "grad_norm": 0.708560012189419, + "learning_rate": 4.7650567951622724e-06, + "loss": 0.2024, + "step": 39649 + }, + { + "epoch": 0.6851327066629803, + "grad_norm": 1.8340428205038675, + "learning_rate": 4.764579965636854e-06, + "loss": 0.3392, + "step": 39650 + }, + { + "epoch": 0.6851499861763892, + "grad_norm": 0.9091707378812087, + "learning_rate": 4.76410315250917e-06, + "loss": 0.3616, + "step": 39651 + }, + { + "epoch": 0.6851672656897981, + "grad_norm": 1.2005941558773472, + "learning_rate": 4.7636263557807115e-06, + "loss": 0.3573, + "step": 39652 + }, + { + "epoch": 0.685184545203207, + "grad_norm": 2.07694404056623, + "learning_rate": 4.763149575452976e-06, + "loss": 0.3596, + "step": 39653 + }, + { + "epoch": 0.685201824716616, + "grad_norm": 1.5306452941209905, + "learning_rate": 4.7626728115274515e-06, + "loss": 0.4699, + "step": 39654 + }, + { + "epoch": 0.6852191042300249, + "grad_norm": 0.8919435575308828, + "learning_rate": 4.762196064005633e-06, + "loss": 0.4051, + "step": 39655 + }, + { + "epoch": 0.6852363837434338, + "grad_norm": 0.9892972365598393, + "learning_rate": 4.761719332889018e-06, + "loss": 0.1591, + "step": 39656 + }, + { + "epoch": 0.6852536632568427, + "grad_norm": 1.1019210742062355, + "learning_rate": 4.761242618179091e-06, + "loss": 0.2949, + "step": 39657 + }, + { + "epoch": 0.6852709427702516, + "grad_norm": 1.401569030332112, + "learning_rate": 4.760765919877357e-06, + "loss": 0.5213, + "step": 39658 + }, + { + "epoch": 0.6852882222836605, + "grad_norm": 1.1112219693477792, + "learning_rate": 4.7602892379852965e-06, + "loss": 0.3675, + "step": 39659 + }, + { + "epoch": 0.6853055017970694, + "grad_norm": 1.4197654752956441, + "learning_rate": 4.759812572504409e-06, + "loss": 0.302, + "step": 39660 + }, + { + "epoch": 0.6853227813104783, + "grad_norm": 1.6765011058138661, + "learning_rate": 4.759335923436187e-06, + "loss": 0.3288, + "step": 39661 + }, + { + "epoch": 0.6853400608238872, + "grad_norm": 1.0689410912011894, + "learning_rate": 4.758859290782126e-06, + "loss": 0.3283, + "step": 39662 + }, + { + "epoch": 0.6853573403372961, + "grad_norm": 1.9034449489741352, + "learning_rate": 4.758382674543713e-06, + "loss": 0.4889, + "step": 39663 + }, + { + "epoch": 0.685374619850705, + "grad_norm": 1.3359419449969334, + "learning_rate": 4.757906074722447e-06, + "loss": 0.3945, + "step": 39664 + }, + { + "epoch": 0.6853918993641139, + "grad_norm": 1.2229488298296693, + "learning_rate": 4.757429491319814e-06, + "loss": 0.3299, + "step": 39665 + }, + { + "epoch": 0.6854091788775228, + "grad_norm": 0.9925629399846957, + "learning_rate": 4.75695292433731e-06, + "loss": 0.4177, + "step": 39666 + }, + { + "epoch": 0.6854264583909317, + "grad_norm": 1.1697904683890836, + "learning_rate": 4.756476373776432e-06, + "loss": 0.3687, + "step": 39667 + }, + { + "epoch": 0.6854437379043407, + "grad_norm": 0.9438890989295152, + "learning_rate": 4.755999839638665e-06, + "loss": 0.2835, + "step": 39668 + }, + { + "epoch": 0.6854610174177496, + "grad_norm": 1.2096724672858874, + "learning_rate": 4.755523321925508e-06, + "loss": 0.4664, + "step": 39669 + }, + { + "epoch": 0.6854782969311585, + "grad_norm": 1.6598610975166774, + "learning_rate": 4.755046820638448e-06, + "loss": 0.4211, + "step": 39670 + }, + { + "epoch": 0.6854955764445673, + "grad_norm": 1.2350404091604184, + "learning_rate": 4.75457033577898e-06, + "loss": 0.3692, + "step": 39671 + }, + { + "epoch": 0.6855128559579762, + "grad_norm": 1.1455089605002136, + "learning_rate": 4.7540938673485945e-06, + "loss": 0.2787, + "step": 39672 + }, + { + "epoch": 0.6855301354713851, + "grad_norm": 1.2145500693976257, + "learning_rate": 4.753617415348791e-06, + "loss": 0.511, + "step": 39673 + }, + { + "epoch": 0.685547414984794, + "grad_norm": 1.2793745035798743, + "learning_rate": 4.7531409797810524e-06, + "loss": 0.3624, + "step": 39674 + }, + { + "epoch": 0.6855646944982029, + "grad_norm": 1.3038219320755542, + "learning_rate": 4.752664560646878e-06, + "loss": 0.4842, + "step": 39675 + }, + { + "epoch": 0.6855819740116118, + "grad_norm": 1.341031532450035, + "learning_rate": 4.752188157947758e-06, + "loss": 0.4621, + "step": 39676 + }, + { + "epoch": 0.6855992535250207, + "grad_norm": 1.9380447131845275, + "learning_rate": 4.751711771685175e-06, + "loss": 0.3915, + "step": 39677 + }, + { + "epoch": 0.6856165330384296, + "grad_norm": 1.1811279502560257, + "learning_rate": 4.7512354018606375e-06, + "loss": 0.3044, + "step": 39678 + }, + { + "epoch": 0.6856338125518385, + "grad_norm": 0.676187239399353, + "learning_rate": 4.750759048475626e-06, + "loss": 0.2958, + "step": 39679 + }, + { + "epoch": 0.6856510920652474, + "grad_norm": 0.4745108878203402, + "learning_rate": 4.750282711531641e-06, + "loss": 0.6238, + "step": 39680 + }, + { + "epoch": 0.6856683715786563, + "grad_norm": 1.2990446671071356, + "learning_rate": 4.749806391030164e-06, + "loss": 0.3884, + "step": 39681 + }, + { + "epoch": 0.6856856510920653, + "grad_norm": 1.2025961552842508, + "learning_rate": 4.749330086972697e-06, + "loss": 0.3246, + "step": 39682 + }, + { + "epoch": 0.6857029306054742, + "grad_norm": 1.0300041710421948, + "learning_rate": 4.74885379936072e-06, + "loss": 0.442, + "step": 39683 + }, + { + "epoch": 0.6857202101188831, + "grad_norm": 1.5690553151269497, + "learning_rate": 4.748377528195738e-06, + "loss": 0.3659, + "step": 39684 + }, + { + "epoch": 0.685737489632292, + "grad_norm": 1.2617447631751946, + "learning_rate": 4.747901273479234e-06, + "loss": 0.3372, + "step": 39685 + }, + { + "epoch": 0.6857547691457009, + "grad_norm": 1.1290307930522532, + "learning_rate": 4.747425035212706e-06, + "loss": 0.4984, + "step": 39686 + }, + { + "epoch": 0.6857720486591098, + "grad_norm": 1.2374378982300633, + "learning_rate": 4.746948813397642e-06, + "loss": 0.4915, + "step": 39687 + }, + { + "epoch": 0.6857893281725187, + "grad_norm": 1.2060016142045464, + "learning_rate": 4.74647260803553e-06, + "loss": 0.3507, + "step": 39688 + }, + { + "epoch": 0.6858066076859276, + "grad_norm": 1.011608246210882, + "learning_rate": 4.745996419127865e-06, + "loss": 0.3641, + "step": 39689 + }, + { + "epoch": 0.6858238871993365, + "grad_norm": 1.4930037331879293, + "learning_rate": 4.745520246676139e-06, + "loss": 0.3146, + "step": 39690 + }, + { + "epoch": 0.6858411667127454, + "grad_norm": 1.1061234468181163, + "learning_rate": 4.745044090681845e-06, + "loss": 0.4243, + "step": 39691 + }, + { + "epoch": 0.6858584462261542, + "grad_norm": 1.2452443561356794, + "learning_rate": 4.7445679511464695e-06, + "loss": 0.3142, + "step": 39692 + }, + { + "epoch": 0.6858757257395631, + "grad_norm": 1.320233307048698, + "learning_rate": 4.7440918280715095e-06, + "loss": 0.3482, + "step": 39693 + }, + { + "epoch": 0.685893005252972, + "grad_norm": 0.8299731382165421, + "learning_rate": 4.743615721458451e-06, + "loss": 0.3678, + "step": 39694 + }, + { + "epoch": 0.6859102847663809, + "grad_norm": 1.3594197368965575, + "learning_rate": 4.743139631308787e-06, + "loss": 0.4463, + "step": 39695 + }, + { + "epoch": 0.6859275642797898, + "grad_norm": 0.7896093852693257, + "learning_rate": 4.742663557624014e-06, + "loss": 0.342, + "step": 39696 + }, + { + "epoch": 0.6859448437931988, + "grad_norm": 0.7831629982714814, + "learning_rate": 4.742187500405613e-06, + "loss": 0.2565, + "step": 39697 + }, + { + "epoch": 0.6859621233066077, + "grad_norm": 1.4405562114583004, + "learning_rate": 4.741711459655084e-06, + "loss": 0.4069, + "step": 39698 + }, + { + "epoch": 0.6859794028200166, + "grad_norm": 1.4861953982376443, + "learning_rate": 4.741235435373911e-06, + "loss": 0.567, + "step": 39699 + }, + { + "epoch": 0.6859966823334255, + "grad_norm": 1.3809085872071745, + "learning_rate": 4.7407594275635885e-06, + "loss": 0.335, + "step": 39700 + }, + { + "epoch": 0.6860139618468344, + "grad_norm": 1.7361281693395434, + "learning_rate": 4.740283436225607e-06, + "loss": 0.338, + "step": 39701 + }, + { + "epoch": 0.6860312413602433, + "grad_norm": 0.7189383823428347, + "learning_rate": 4.739807461361461e-06, + "loss": 0.3598, + "step": 39702 + }, + { + "epoch": 0.6860485208736522, + "grad_norm": 1.141114871875876, + "learning_rate": 4.739331502972634e-06, + "loss": 0.2539, + "step": 39703 + }, + { + "epoch": 0.6860658003870611, + "grad_norm": 1.4037408506844955, + "learning_rate": 4.738855561060624e-06, + "loss": 0.3072, + "step": 39704 + }, + { + "epoch": 0.68608307990047, + "grad_norm": 1.0938086418567343, + "learning_rate": 4.7383796356269155e-06, + "loss": 0.2292, + "step": 39705 + }, + { + "epoch": 0.6861003594138789, + "grad_norm": 1.583078019759255, + "learning_rate": 4.737903726673001e-06, + "loss": 0.2842, + "step": 39706 + }, + { + "epoch": 0.6861176389272878, + "grad_norm": 0.8143966300948453, + "learning_rate": 4.737427834200376e-06, + "loss": 0.246, + "step": 39707 + }, + { + "epoch": 0.6861349184406967, + "grad_norm": 1.0858954384350474, + "learning_rate": 4.736951958210524e-06, + "loss": 0.4258, + "step": 39708 + }, + { + "epoch": 0.6861521979541056, + "grad_norm": 0.565723209159056, + "learning_rate": 4.73647609870494e-06, + "loss": 0.5668, + "step": 39709 + }, + { + "epoch": 0.6861694774675146, + "grad_norm": 0.6030154870167795, + "learning_rate": 4.736000255685111e-06, + "loss": 0.5828, + "step": 39710 + }, + { + "epoch": 0.6861867569809235, + "grad_norm": 1.1480303479017089, + "learning_rate": 4.73552442915253e-06, + "loss": 0.5715, + "step": 39711 + }, + { + "epoch": 0.6862040364943324, + "grad_norm": 1.452212762295247, + "learning_rate": 4.735048619108685e-06, + "loss": 0.3859, + "step": 39712 + }, + { + "epoch": 0.6862213160077412, + "grad_norm": 1.2532608020909146, + "learning_rate": 4.734572825555071e-06, + "loss": 0.3538, + "step": 39713 + }, + { + "epoch": 0.6862385955211501, + "grad_norm": 1.0550949000430856, + "learning_rate": 4.7340970484931735e-06, + "loss": 0.5293, + "step": 39714 + }, + { + "epoch": 0.686255875034559, + "grad_norm": 0.8993706500929851, + "learning_rate": 4.733621287924487e-06, + "loss": 0.3964, + "step": 39715 + }, + { + "epoch": 0.6862731545479679, + "grad_norm": 1.0194989180151721, + "learning_rate": 4.733145543850495e-06, + "loss": 0.3001, + "step": 39716 + }, + { + "epoch": 0.6862904340613768, + "grad_norm": 1.7281897441020335, + "learning_rate": 4.732669816272691e-06, + "loss": 0.4667, + "step": 39717 + }, + { + "epoch": 0.6863077135747857, + "grad_norm": 1.3485999090210854, + "learning_rate": 4.732194105192569e-06, + "loss": 0.2761, + "step": 39718 + }, + { + "epoch": 0.6863249930881946, + "grad_norm": 1.546888805925263, + "learning_rate": 4.7317184106116124e-06, + "loss": 0.2448, + "step": 39719 + }, + { + "epoch": 0.6863422726016035, + "grad_norm": 0.976899699683394, + "learning_rate": 4.731242732531317e-06, + "loss": 0.6514, + "step": 39720 + }, + { + "epoch": 0.6863595521150124, + "grad_norm": 1.5733891892675302, + "learning_rate": 4.730767070953167e-06, + "loss": 0.509, + "step": 39721 + }, + { + "epoch": 0.6863768316284213, + "grad_norm": 2.330866419467967, + "learning_rate": 4.730291425878658e-06, + "loss": 0.3893, + "step": 39722 + }, + { + "epoch": 0.6863941111418302, + "grad_norm": 1.069493888216419, + "learning_rate": 4.729815797309271e-06, + "loss": 0.4294, + "step": 39723 + }, + { + "epoch": 0.6864113906552392, + "grad_norm": 0.5776377744222664, + "learning_rate": 4.729340185246507e-06, + "loss": 0.7965, + "step": 39724 + }, + { + "epoch": 0.6864286701686481, + "grad_norm": 1.3610711517901357, + "learning_rate": 4.728864589691846e-06, + "loss": 0.5038, + "step": 39725 + }, + { + "epoch": 0.686445949682057, + "grad_norm": 1.2852960343282636, + "learning_rate": 4.728389010646787e-06, + "loss": 0.4709, + "step": 39726 + }, + { + "epoch": 0.6864632291954659, + "grad_norm": 1.715676376636311, + "learning_rate": 4.727913448112813e-06, + "loss": 0.3911, + "step": 39727 + }, + { + "epoch": 0.6864805087088748, + "grad_norm": 1.3882010230832056, + "learning_rate": 4.727437902091412e-06, + "loss": 0.3528, + "step": 39728 + }, + { + "epoch": 0.6864977882222837, + "grad_norm": 1.6147387011564813, + "learning_rate": 4.726962372584076e-06, + "loss": 0.3445, + "step": 39729 + }, + { + "epoch": 0.6865150677356926, + "grad_norm": 1.911631689600967, + "learning_rate": 4.726486859592294e-06, + "loss": 0.4645, + "step": 39730 + }, + { + "epoch": 0.6865323472491015, + "grad_norm": 1.0458775465079397, + "learning_rate": 4.7260113631175605e-06, + "loss": 0.3413, + "step": 39731 + }, + { + "epoch": 0.6865496267625104, + "grad_norm": 1.0884335143190556, + "learning_rate": 4.725535883161356e-06, + "loss": 0.4356, + "step": 39732 + }, + { + "epoch": 0.6865669062759193, + "grad_norm": 1.8851551530430195, + "learning_rate": 4.725060419725177e-06, + "loss": 0.3226, + "step": 39733 + }, + { + "epoch": 0.6865841857893282, + "grad_norm": 1.2411281871290705, + "learning_rate": 4.724584972810508e-06, + "loss": 0.3647, + "step": 39734 + }, + { + "epoch": 0.686601465302737, + "grad_norm": 1.3763309979783833, + "learning_rate": 4.724109542418838e-06, + "loss": 0.6331, + "step": 39735 + }, + { + "epoch": 0.6866187448161459, + "grad_norm": 1.0865505797744506, + "learning_rate": 4.723634128551657e-06, + "loss": 0.5663, + "step": 39736 + }, + { + "epoch": 0.6866360243295548, + "grad_norm": 1.913758463022127, + "learning_rate": 4.723158731210461e-06, + "loss": 0.4456, + "step": 39737 + }, + { + "epoch": 0.6866533038429637, + "grad_norm": 1.331982578824145, + "learning_rate": 4.722683350396732e-06, + "loss": 0.4239, + "step": 39738 + }, + { + "epoch": 0.6866705833563727, + "grad_norm": 0.737680504818371, + "learning_rate": 4.722207986111955e-06, + "loss": 0.3734, + "step": 39739 + }, + { + "epoch": 0.6866878628697816, + "grad_norm": 1.0721722641154932, + "learning_rate": 4.721732638357624e-06, + "loss": 0.4101, + "step": 39740 + }, + { + "epoch": 0.6867051423831905, + "grad_norm": 1.3202121334090497, + "learning_rate": 4.721257307135228e-06, + "loss": 0.4148, + "step": 39741 + }, + { + "epoch": 0.6867224218965994, + "grad_norm": 1.41231212304605, + "learning_rate": 4.72078199244626e-06, + "loss": 0.2878, + "step": 39742 + }, + { + "epoch": 0.6867397014100083, + "grad_norm": 1.9849946200346946, + "learning_rate": 4.7203066942921996e-06, + "loss": 0.4202, + "step": 39743 + }, + { + "epoch": 0.6867569809234172, + "grad_norm": 1.2176822075779261, + "learning_rate": 4.7198314126745424e-06, + "loss": 0.4836, + "step": 39744 + }, + { + "epoch": 0.6867742604368261, + "grad_norm": 1.1591639963012628, + "learning_rate": 4.7193561475947725e-06, + "loss": 0.4238, + "step": 39745 + }, + { + "epoch": 0.686791539950235, + "grad_norm": 1.2031797205929353, + "learning_rate": 4.71888089905438e-06, + "loss": 0.3735, + "step": 39746 + }, + { + "epoch": 0.6868088194636439, + "grad_norm": 1.2466163952683658, + "learning_rate": 4.718405667054853e-06, + "loss": 0.5848, + "step": 39747 + }, + { + "epoch": 0.6868260989770528, + "grad_norm": 1.2376921448283944, + "learning_rate": 4.717930451597686e-06, + "loss": 0.3234, + "step": 39748 + }, + { + "epoch": 0.6868433784904617, + "grad_norm": 0.7621001879315094, + "learning_rate": 4.717455252684361e-06, + "loss": 0.4151, + "step": 39749 + }, + { + "epoch": 0.6868606580038706, + "grad_norm": 1.0902341495091612, + "learning_rate": 4.716980070316364e-06, + "loss": 0.421, + "step": 39750 + }, + { + "epoch": 0.6868779375172795, + "grad_norm": 1.3109025246782127, + "learning_rate": 4.716504904495187e-06, + "loss": 0.4341, + "step": 39751 + }, + { + "epoch": 0.6868952170306885, + "grad_norm": 0.9222474237275272, + "learning_rate": 4.716029755222318e-06, + "loss": 0.3945, + "step": 39752 + }, + { + "epoch": 0.6869124965440974, + "grad_norm": 1.133764959385614, + "learning_rate": 4.71555462249925e-06, + "loss": 0.3893, + "step": 39753 + }, + { + "epoch": 0.6869297760575063, + "grad_norm": 0.9491408148486303, + "learning_rate": 4.715079506327462e-06, + "loss": 0.3885, + "step": 39754 + }, + { + "epoch": 0.6869470555709152, + "grad_norm": 0.8178114814642915, + "learning_rate": 4.7146044067084506e-06, + "loss": 0.3507, + "step": 39755 + }, + { + "epoch": 0.686964335084324, + "grad_norm": 1.371513108572939, + "learning_rate": 4.714129323643696e-06, + "loss": 0.3201, + "step": 39756 + }, + { + "epoch": 0.6869816145977329, + "grad_norm": 1.3058570379658145, + "learning_rate": 4.71365425713469e-06, + "loss": 0.2715, + "step": 39757 + }, + { + "epoch": 0.6869988941111418, + "grad_norm": 1.2991503239897955, + "learning_rate": 4.713179207182925e-06, + "loss": 0.3017, + "step": 39758 + }, + { + "epoch": 0.6870161736245507, + "grad_norm": 1.0076528339533848, + "learning_rate": 4.712704173789881e-06, + "loss": 0.3465, + "step": 39759 + }, + { + "epoch": 0.6870334531379596, + "grad_norm": 1.685564963296481, + "learning_rate": 4.712229156957051e-06, + "loss": 0.3524, + "step": 39760 + }, + { + "epoch": 0.6870507326513685, + "grad_norm": 1.4150556846893836, + "learning_rate": 4.711754156685919e-06, + "loss": 0.2863, + "step": 39761 + }, + { + "epoch": 0.6870680121647774, + "grad_norm": 1.503573304778637, + "learning_rate": 4.711279172977979e-06, + "loss": 0.3728, + "step": 39762 + }, + { + "epoch": 0.6870852916781863, + "grad_norm": 1.3448456960378938, + "learning_rate": 4.710804205834706e-06, + "loss": 0.4331, + "step": 39763 + }, + { + "epoch": 0.6871025711915952, + "grad_norm": 1.1035111867287906, + "learning_rate": 4.710329255257604e-06, + "loss": 0.4154, + "step": 39764 + }, + { + "epoch": 0.6871198507050041, + "grad_norm": 1.1140954990237497, + "learning_rate": 4.7098543212481495e-06, + "loss": 0.2645, + "step": 39765 + }, + { + "epoch": 0.687137130218413, + "grad_norm": 0.8558083121198067, + "learning_rate": 4.709379403807837e-06, + "loss": 0.3585, + "step": 39766 + }, + { + "epoch": 0.687154409731822, + "grad_norm": 0.4657085872895757, + "learning_rate": 4.708904502938146e-06, + "loss": 0.688, + "step": 39767 + }, + { + "epoch": 0.6871716892452309, + "grad_norm": 0.6357929384889331, + "learning_rate": 4.708429618640572e-06, + "loss": 0.2732, + "step": 39768 + }, + { + "epoch": 0.6871889687586398, + "grad_norm": 1.4915648793392795, + "learning_rate": 4.7079547509165956e-06, + "loss": 0.3469, + "step": 39769 + }, + { + "epoch": 0.6872062482720487, + "grad_norm": 0.8515316621768505, + "learning_rate": 4.707479899767706e-06, + "loss": 0.3811, + "step": 39770 + }, + { + "epoch": 0.6872235277854576, + "grad_norm": 1.3245147312661352, + "learning_rate": 4.707005065195396e-06, + "loss": 0.3367, + "step": 39771 + }, + { + "epoch": 0.6872408072988665, + "grad_norm": 2.382805881845507, + "learning_rate": 4.706530247201144e-06, + "loss": 0.2078, + "step": 39772 + }, + { + "epoch": 0.6872580868122754, + "grad_norm": 0.9015827389551521, + "learning_rate": 4.706055445786446e-06, + "loss": 0.4319, + "step": 39773 + }, + { + "epoch": 0.6872753663256843, + "grad_norm": 1.5151811498591357, + "learning_rate": 4.705580660952781e-06, + "loss": 0.436, + "step": 39774 + }, + { + "epoch": 0.6872926458390932, + "grad_norm": 1.2290042347623515, + "learning_rate": 4.705105892701639e-06, + "loss": 0.2731, + "step": 39775 + }, + { + "epoch": 0.6873099253525021, + "grad_norm": 1.247304904541968, + "learning_rate": 4.704631141034509e-06, + "loss": 0.4616, + "step": 39776 + }, + { + "epoch": 0.6873272048659109, + "grad_norm": 1.0184445074374033, + "learning_rate": 4.70415640595288e-06, + "loss": 0.2842, + "step": 39777 + }, + { + "epoch": 0.6873444843793198, + "grad_norm": 1.1135329429753809, + "learning_rate": 4.703681687458231e-06, + "loss": 0.4033, + "step": 39778 + }, + { + "epoch": 0.6873617638927287, + "grad_norm": 1.1067206200606912, + "learning_rate": 4.7032069855520575e-06, + "loss": 0.1931, + "step": 39779 + }, + { + "epoch": 0.6873790434061376, + "grad_norm": 1.2086624191159434, + "learning_rate": 4.702732300235839e-06, + "loss": 0.4622, + "step": 39780 + }, + { + "epoch": 0.6873963229195466, + "grad_norm": 1.2766761795366917, + "learning_rate": 4.702257631511066e-06, + "loss": 0.4796, + "step": 39781 + }, + { + "epoch": 0.6874136024329555, + "grad_norm": 1.2463214469979547, + "learning_rate": 4.701782979379227e-06, + "loss": 0.4988, + "step": 39782 + }, + { + "epoch": 0.6874308819463644, + "grad_norm": 0.9488642311332607, + "learning_rate": 4.701308343841803e-06, + "loss": 0.3776, + "step": 39783 + }, + { + "epoch": 0.6874481614597733, + "grad_norm": 1.247000439568369, + "learning_rate": 4.7008337249002875e-06, + "loss": 0.4203, + "step": 39784 + }, + { + "epoch": 0.6874654409731822, + "grad_norm": 1.4754517623671564, + "learning_rate": 4.700359122556161e-06, + "loss": 0.4114, + "step": 39785 + }, + { + "epoch": 0.6874827204865911, + "grad_norm": 1.2319699000106772, + "learning_rate": 4.6998845368109116e-06, + "loss": 0.3134, + "step": 39786 + }, + { + "epoch": 0.6875, + "grad_norm": 1.3207043244124266, + "learning_rate": 4.699409967666027e-06, + "loss": 0.2027, + "step": 39787 + }, + { + "epoch": 0.6875172795134089, + "grad_norm": 1.2951350682076757, + "learning_rate": 4.698935415122997e-06, + "loss": 0.4899, + "step": 39788 + }, + { + "epoch": 0.6875345590268178, + "grad_norm": 1.08725149216667, + "learning_rate": 4.698460879183304e-06, + "loss": 0.3604, + "step": 39789 + }, + { + "epoch": 0.6875518385402267, + "grad_norm": 0.6150607863456424, + "learning_rate": 4.69798635984843e-06, + "loss": 0.4947, + "step": 39790 + }, + { + "epoch": 0.6875691180536356, + "grad_norm": 1.1561476044150658, + "learning_rate": 4.697511857119867e-06, + "loss": 0.3173, + "step": 39791 + }, + { + "epoch": 0.6875863975670445, + "grad_norm": 1.3429037251027038, + "learning_rate": 4.697037370999098e-06, + "loss": 0.398, + "step": 39792 + }, + { + "epoch": 0.6876036770804534, + "grad_norm": 1.3795252357807255, + "learning_rate": 4.696562901487616e-06, + "loss": 0.4269, + "step": 39793 + }, + { + "epoch": 0.6876209565938624, + "grad_norm": 1.37387211023648, + "learning_rate": 4.696088448586898e-06, + "loss": 0.3931, + "step": 39794 + }, + { + "epoch": 0.6876382361072713, + "grad_norm": 1.4337892063111075, + "learning_rate": 4.6956140122984385e-06, + "loss": 0.267, + "step": 39795 + }, + { + "epoch": 0.6876555156206802, + "grad_norm": 1.1325832908416011, + "learning_rate": 4.695139592623714e-06, + "loss": 0.3576, + "step": 39796 + }, + { + "epoch": 0.6876727951340891, + "grad_norm": 1.0024087471479164, + "learning_rate": 4.694665189564217e-06, + "loss": 0.4255, + "step": 39797 + }, + { + "epoch": 0.6876900746474979, + "grad_norm": 1.1326615222493432, + "learning_rate": 4.69419080312143e-06, + "loss": 0.3053, + "step": 39798 + }, + { + "epoch": 0.6877073541609068, + "grad_norm": 0.8930589932706282, + "learning_rate": 4.6937164332968455e-06, + "loss": 0.3528, + "step": 39799 + }, + { + "epoch": 0.6877246336743157, + "grad_norm": 1.5999406510975842, + "learning_rate": 4.693242080091944e-06, + "loss": 0.3778, + "step": 39800 + }, + { + "epoch": 0.6877419131877246, + "grad_norm": 1.0048140728366146, + "learning_rate": 4.6927677435082085e-06, + "loss": 0.5444, + "step": 39801 + }, + { + "epoch": 0.6877591927011335, + "grad_norm": 1.4677987751161135, + "learning_rate": 4.692293423547131e-06, + "loss": 0.2355, + "step": 39802 + }, + { + "epoch": 0.6877764722145424, + "grad_norm": 1.9296555811347271, + "learning_rate": 4.6918191202101874e-06, + "loss": 0.3985, + "step": 39803 + }, + { + "epoch": 0.6877937517279513, + "grad_norm": 1.2989442244281582, + "learning_rate": 4.691344833498875e-06, + "loss": 0.5497, + "step": 39804 + }, + { + "epoch": 0.6878110312413602, + "grad_norm": 1.3917357154483814, + "learning_rate": 4.690870563414672e-06, + "loss": 0.324, + "step": 39805 + }, + { + "epoch": 0.6878283107547691, + "grad_norm": 1.2531826299241446, + "learning_rate": 4.690396309959069e-06, + "loss": 0.3591, + "step": 39806 + }, + { + "epoch": 0.687845590268178, + "grad_norm": 1.1629165006814688, + "learning_rate": 4.689922073133545e-06, + "loss": 0.5232, + "step": 39807 + }, + { + "epoch": 0.687862869781587, + "grad_norm": 1.3458412535388609, + "learning_rate": 4.6894478529395915e-06, + "loss": 0.3366, + "step": 39808 + }, + { + "epoch": 0.6878801492949959, + "grad_norm": 0.8897808683099848, + "learning_rate": 4.688973649378684e-06, + "loss": 0.267, + "step": 39809 + }, + { + "epoch": 0.6878974288084048, + "grad_norm": 1.434459786923569, + "learning_rate": 4.688499462452322e-06, + "loss": 0.4258, + "step": 39810 + }, + { + "epoch": 0.6879147083218137, + "grad_norm": 1.8980418057887962, + "learning_rate": 4.688025292161984e-06, + "loss": 0.3606, + "step": 39811 + }, + { + "epoch": 0.6879319878352226, + "grad_norm": 1.1962103887179119, + "learning_rate": 4.68755113850915e-06, + "loss": 0.4722, + "step": 39812 + }, + { + "epoch": 0.6879492673486315, + "grad_norm": 1.4670074491297802, + "learning_rate": 4.687077001495314e-06, + "loss": 0.3173, + "step": 39813 + }, + { + "epoch": 0.6879665468620404, + "grad_norm": 1.163229692113752, + "learning_rate": 4.686602881121952e-06, + "loss": 0.3093, + "step": 39814 + }, + { + "epoch": 0.6879838263754493, + "grad_norm": 2.4109271666211636, + "learning_rate": 4.686128777390554e-06, + "loss": 0.2795, + "step": 39815 + }, + { + "epoch": 0.6880011058888582, + "grad_norm": 1.6163026895464336, + "learning_rate": 4.685654690302605e-06, + "loss": 0.6097, + "step": 39816 + }, + { + "epoch": 0.6880183854022671, + "grad_norm": 1.0375034200527058, + "learning_rate": 4.685180619859592e-06, + "loss": 0.5063, + "step": 39817 + }, + { + "epoch": 0.688035664915676, + "grad_norm": 1.6565066088472564, + "learning_rate": 4.684706566062994e-06, + "loss": 0.335, + "step": 39818 + }, + { + "epoch": 0.6880529444290848, + "grad_norm": 1.2421271941280483, + "learning_rate": 4.684232528914303e-06, + "loss": 0.4861, + "step": 39819 + }, + { + "epoch": 0.6880702239424937, + "grad_norm": 1.3979480060924396, + "learning_rate": 4.683758508414995e-06, + "loss": 0.4987, + "step": 39820 + }, + { + "epoch": 0.6880875034559026, + "grad_norm": 1.406974964132088, + "learning_rate": 4.683284504566561e-06, + "loss": 0.2604, + "step": 39821 + }, + { + "epoch": 0.6881047829693115, + "grad_norm": 0.8184701854760998, + "learning_rate": 4.6828105173704865e-06, + "loss": 0.4674, + "step": 39822 + }, + { + "epoch": 0.6881220624827205, + "grad_norm": 1.477896372943674, + "learning_rate": 4.68233654682825e-06, + "loss": 0.378, + "step": 39823 + }, + { + "epoch": 0.6881393419961294, + "grad_norm": 0.8943068925599951, + "learning_rate": 4.681862592941343e-06, + "loss": 0.6569, + "step": 39824 + }, + { + "epoch": 0.6881566215095383, + "grad_norm": 0.9601814684962842, + "learning_rate": 4.681388655711243e-06, + "loss": 0.2276, + "step": 39825 + }, + { + "epoch": 0.6881739010229472, + "grad_norm": 0.775141148304172, + "learning_rate": 4.680914735139439e-06, + "loss": 0.2808, + "step": 39826 + }, + { + "epoch": 0.6881911805363561, + "grad_norm": 0.9513844014213706, + "learning_rate": 4.680440831227413e-06, + "loss": 0.425, + "step": 39827 + }, + { + "epoch": 0.688208460049765, + "grad_norm": 0.9550158035452198, + "learning_rate": 4.6799669439766545e-06, + "loss": 0.275, + "step": 39828 + }, + { + "epoch": 0.6882257395631739, + "grad_norm": 1.4125011509149012, + "learning_rate": 4.679493073388641e-06, + "loss": 0.2329, + "step": 39829 + }, + { + "epoch": 0.6882430190765828, + "grad_norm": 1.348140039246422, + "learning_rate": 4.679019219464862e-06, + "loss": 0.2585, + "step": 39830 + }, + { + "epoch": 0.6882602985899917, + "grad_norm": 0.9401787377776292, + "learning_rate": 4.678545382206796e-06, + "loss": 0.298, + "step": 39831 + }, + { + "epoch": 0.6882775781034006, + "grad_norm": 0.7928697217456221, + "learning_rate": 4.678071561615931e-06, + "loss": 0.2078, + "step": 39832 + }, + { + "epoch": 0.6882948576168095, + "grad_norm": 1.445965187542966, + "learning_rate": 4.677597757693754e-06, + "loss": 0.2488, + "step": 39833 + }, + { + "epoch": 0.6883121371302184, + "grad_norm": 1.1516166601502549, + "learning_rate": 4.67712397044174e-06, + "loss": 0.3192, + "step": 39834 + }, + { + "epoch": 0.6883294166436273, + "grad_norm": 1.054962831548327, + "learning_rate": 4.676650199861383e-06, + "loss": 0.456, + "step": 39835 + }, + { + "epoch": 0.6883466961570363, + "grad_norm": 1.1151033822319198, + "learning_rate": 4.676176445954159e-06, + "loss": 0.45, + "step": 39836 + }, + { + "epoch": 0.6883639756704452, + "grad_norm": 1.5747139441463858, + "learning_rate": 4.675702708721554e-06, + "loss": 0.284, + "step": 39837 + }, + { + "epoch": 0.6883812551838541, + "grad_norm": 1.187990713962456, + "learning_rate": 4.675228988165054e-06, + "loss": 0.3976, + "step": 39838 + }, + { + "epoch": 0.688398534697263, + "grad_norm": 1.2114190169470456, + "learning_rate": 4.6747552842861435e-06, + "loss": 0.4546, + "step": 39839 + }, + { + "epoch": 0.6884158142106718, + "grad_norm": 1.4503607433892753, + "learning_rate": 4.674281597086301e-06, + "loss": 0.3222, + "step": 39840 + }, + { + "epoch": 0.6884330937240807, + "grad_norm": 1.6473667233725862, + "learning_rate": 4.673807926567018e-06, + "loss": 0.5589, + "step": 39841 + }, + { + "epoch": 0.6884503732374896, + "grad_norm": 0.9201303368535785, + "learning_rate": 4.673334272729773e-06, + "loss": 0.3722, + "step": 39842 + }, + { + "epoch": 0.6884676527508985, + "grad_norm": 1.400273982170505, + "learning_rate": 4.672860635576041e-06, + "loss": 0.5401, + "step": 39843 + }, + { + "epoch": 0.6884849322643074, + "grad_norm": 0.917192253538885, + "learning_rate": 4.672387015107323e-06, + "loss": 0.8098, + "step": 39844 + }, + { + "epoch": 0.6885022117777163, + "grad_norm": 1.4008368253180983, + "learning_rate": 4.671913411325089e-06, + "loss": 0.3547, + "step": 39845 + }, + { + "epoch": 0.6885194912911252, + "grad_norm": 1.3946814068226132, + "learning_rate": 4.671439824230832e-06, + "loss": 0.2664, + "step": 39846 + }, + { + "epoch": 0.6885367708045341, + "grad_norm": 0.8799652745285103, + "learning_rate": 4.670966253826027e-06, + "loss": 0.3854, + "step": 39847 + }, + { + "epoch": 0.688554050317943, + "grad_norm": 0.9631622566883731, + "learning_rate": 4.670492700112164e-06, + "loss": 0.1412, + "step": 39848 + }, + { + "epoch": 0.6885713298313519, + "grad_norm": 1.6870908357435104, + "learning_rate": 4.670019163090714e-06, + "loss": 0.3598, + "step": 39849 + }, + { + "epoch": 0.6885886093447608, + "grad_norm": 0.8499974319594212, + "learning_rate": 4.669545642763178e-06, + "loss": 0.2469, + "step": 39850 + }, + { + "epoch": 0.6886058888581698, + "grad_norm": 1.122924875328317, + "learning_rate": 4.66907213913103e-06, + "loss": 0.454, + "step": 39851 + }, + { + "epoch": 0.6886231683715787, + "grad_norm": 0.6749037996351788, + "learning_rate": 4.66859865219575e-06, + "loss": 0.8863, + "step": 39852 + }, + { + "epoch": 0.6886404478849876, + "grad_norm": 0.9850443213407443, + "learning_rate": 4.668125181958827e-06, + "loss": 0.2756, + "step": 39853 + }, + { + "epoch": 0.6886577273983965, + "grad_norm": 1.0306737921320384, + "learning_rate": 4.667651728421738e-06, + "loss": 0.3136, + "step": 39854 + }, + { + "epoch": 0.6886750069118054, + "grad_norm": 1.5448544565299958, + "learning_rate": 4.667178291585969e-06, + "loss": 0.534, + "step": 39855 + }, + { + "epoch": 0.6886922864252143, + "grad_norm": 1.1576717971662833, + "learning_rate": 4.666704871453002e-06, + "loss": 0.4989, + "step": 39856 + }, + { + "epoch": 0.6887095659386232, + "grad_norm": 1.270279755232866, + "learning_rate": 4.666231468024325e-06, + "loss": 0.3722, + "step": 39857 + }, + { + "epoch": 0.6887268454520321, + "grad_norm": 1.310157114481926, + "learning_rate": 4.665758081301413e-06, + "loss": 0.4328, + "step": 39858 + }, + { + "epoch": 0.688744124965441, + "grad_norm": 1.124390541901964, + "learning_rate": 4.665284711285754e-06, + "loss": 0.4347, + "step": 39859 + }, + { + "epoch": 0.6887614044788499, + "grad_norm": 1.626704185812726, + "learning_rate": 4.664811357978826e-06, + "loss": 0.3168, + "step": 39860 + }, + { + "epoch": 0.6887786839922588, + "grad_norm": 1.4090009985319585, + "learning_rate": 4.664338021382115e-06, + "loss": 0.65, + "step": 39861 + }, + { + "epoch": 0.6887959635056676, + "grad_norm": 0.7308658460597243, + "learning_rate": 4.663864701497108e-06, + "loss": 0.4996, + "step": 39862 + }, + { + "epoch": 0.6888132430190765, + "grad_norm": 1.3772547724413582, + "learning_rate": 4.6633913983252755e-06, + "loss": 0.3303, + "step": 39863 + }, + { + "epoch": 0.6888305225324854, + "grad_norm": 1.2400339461961094, + "learning_rate": 4.662918111868112e-06, + "loss": 0.366, + "step": 39864 + }, + { + "epoch": 0.6888478020458944, + "grad_norm": 1.4203898828384076, + "learning_rate": 4.662444842127091e-06, + "loss": 0.315, + "step": 39865 + }, + { + "epoch": 0.6888650815593033, + "grad_norm": 1.4242878342125898, + "learning_rate": 4.661971589103698e-06, + "loss": 0.3862, + "step": 39866 + }, + { + "epoch": 0.6888823610727122, + "grad_norm": 1.8929504942087596, + "learning_rate": 4.6614983527994154e-06, + "loss": 0.4243, + "step": 39867 + }, + { + "epoch": 0.6888996405861211, + "grad_norm": 1.5348624623392975, + "learning_rate": 4.6610251332157295e-06, + "loss": 0.5268, + "step": 39868 + }, + { + "epoch": 0.68891692009953, + "grad_norm": 0.9962931676608395, + "learning_rate": 4.660551930354115e-06, + "loss": 0.2443, + "step": 39869 + }, + { + "epoch": 0.6889341996129389, + "grad_norm": 1.3151941727721148, + "learning_rate": 4.660078744216061e-06, + "loss": 0.4426, + "step": 39870 + }, + { + "epoch": 0.6889514791263478, + "grad_norm": 0.835366184526928, + "learning_rate": 4.6596055748030435e-06, + "loss": 0.3402, + "step": 39871 + }, + { + "epoch": 0.6889687586397567, + "grad_norm": 0.9058126607462395, + "learning_rate": 4.659132422116547e-06, + "loss": 0.579, + "step": 39872 + }, + { + "epoch": 0.6889860381531656, + "grad_norm": 0.45846328169496553, + "learning_rate": 4.658659286158056e-06, + "loss": 0.5203, + "step": 39873 + }, + { + "epoch": 0.6890033176665745, + "grad_norm": 1.2641772532441038, + "learning_rate": 4.6581861669290475e-06, + "loss": 0.2371, + "step": 39874 + }, + { + "epoch": 0.6890205971799834, + "grad_norm": 1.0429396722146929, + "learning_rate": 4.65771306443101e-06, + "loss": 0.3317, + "step": 39875 + }, + { + "epoch": 0.6890378766933923, + "grad_norm": 0.787877820715193, + "learning_rate": 4.657239978665416e-06, + "loss": 0.4763, + "step": 39876 + }, + { + "epoch": 0.6890551562068012, + "grad_norm": 0.9627892978182497, + "learning_rate": 4.656766909633753e-06, + "loss": 0.3543, + "step": 39877 + }, + { + "epoch": 0.6890724357202102, + "grad_norm": 0.9158383366706275, + "learning_rate": 4.656293857337503e-06, + "loss": 0.3848, + "step": 39878 + }, + { + "epoch": 0.6890897152336191, + "grad_norm": 0.49679650196933, + "learning_rate": 4.65582082177815e-06, + "loss": 0.4968, + "step": 39879 + }, + { + "epoch": 0.689106994747028, + "grad_norm": 1.445471781059566, + "learning_rate": 4.655347802957167e-06, + "loss": 0.5004, + "step": 39880 + }, + { + "epoch": 0.6891242742604369, + "grad_norm": 0.871490801397173, + "learning_rate": 4.6548748008760465e-06, + "loss": 0.4368, + "step": 39881 + }, + { + "epoch": 0.6891415537738458, + "grad_norm": 1.2874562237914067, + "learning_rate": 4.654401815536264e-06, + "loss": 0.5025, + "step": 39882 + }, + { + "epoch": 0.6891588332872546, + "grad_norm": 1.0166381691140287, + "learning_rate": 4.653928846939294e-06, + "loss": 0.4424, + "step": 39883 + }, + { + "epoch": 0.6891761128006635, + "grad_norm": 0.7293862515893133, + "learning_rate": 4.653455895086633e-06, + "loss": 0.2199, + "step": 39884 + }, + { + "epoch": 0.6891933923140724, + "grad_norm": 0.9869568869720885, + "learning_rate": 4.652982959979749e-06, + "loss": 0.2857, + "step": 39885 + }, + { + "epoch": 0.6892106718274813, + "grad_norm": 1.3470685242095914, + "learning_rate": 4.652510041620134e-06, + "loss": 0.2938, + "step": 39886 + }, + { + "epoch": 0.6892279513408902, + "grad_norm": 1.6770196412611442, + "learning_rate": 4.652037140009259e-06, + "loss": 0.4395, + "step": 39887 + }, + { + "epoch": 0.6892452308542991, + "grad_norm": 1.2940535863622054, + "learning_rate": 4.6515642551486154e-06, + "loss": 0.3693, + "step": 39888 + }, + { + "epoch": 0.689262510367708, + "grad_norm": 1.2598856694355853, + "learning_rate": 4.65109138703967e-06, + "loss": 0.4317, + "step": 39889 + }, + { + "epoch": 0.6892797898811169, + "grad_norm": 1.1953163477572366, + "learning_rate": 4.650618535683921e-06, + "loss": 0.2356, + "step": 39890 + }, + { + "epoch": 0.6892970693945258, + "grad_norm": 0.9437905013280226, + "learning_rate": 4.6501457010828375e-06, + "loss": 0.4029, + "step": 39891 + }, + { + "epoch": 0.6893143489079347, + "grad_norm": 1.2996534894233505, + "learning_rate": 4.6496728832379075e-06, + "loss": 0.3635, + "step": 39892 + }, + { + "epoch": 0.6893316284213437, + "grad_norm": 1.138173405288928, + "learning_rate": 4.64920008215061e-06, + "loss": 0.3126, + "step": 39893 + }, + { + "epoch": 0.6893489079347526, + "grad_norm": 1.1230935852853647, + "learning_rate": 4.648727297822419e-06, + "loss": 0.478, + "step": 39894 + }, + { + "epoch": 0.6893661874481615, + "grad_norm": 0.8715919268375041, + "learning_rate": 4.648254530254822e-06, + "loss": 0.31, + "step": 39895 + }, + { + "epoch": 0.6893834669615704, + "grad_norm": 0.7996035165303075, + "learning_rate": 4.647781779449298e-06, + "loss": 0.2633, + "step": 39896 + }, + { + "epoch": 0.6894007464749793, + "grad_norm": 0.9418579626399747, + "learning_rate": 4.647309045407332e-06, + "loss": 0.3568, + "step": 39897 + }, + { + "epoch": 0.6894180259883882, + "grad_norm": 0.5666658552189703, + "learning_rate": 4.646836328130397e-06, + "loss": 0.6514, + "step": 39898 + }, + { + "epoch": 0.6894353055017971, + "grad_norm": 1.2734357357791846, + "learning_rate": 4.646363627619982e-06, + "loss": 0.461, + "step": 39899 + }, + { + "epoch": 0.689452585015206, + "grad_norm": 1.2946420594890935, + "learning_rate": 4.645890943877559e-06, + "loss": 0.2939, + "step": 39900 + }, + { + "epoch": 0.6894698645286149, + "grad_norm": 1.0945835207936192, + "learning_rate": 4.645418276904612e-06, + "loss": 0.4113, + "step": 39901 + }, + { + "epoch": 0.6894871440420238, + "grad_norm": 1.284071001072827, + "learning_rate": 4.644945626702626e-06, + "loss": 0.504, + "step": 39902 + }, + { + "epoch": 0.6895044235554327, + "grad_norm": 1.51011333825069, + "learning_rate": 4.644472993273074e-06, + "loss": 0.2743, + "step": 39903 + }, + { + "epoch": 0.6895217030688415, + "grad_norm": 1.2485622680489026, + "learning_rate": 4.6440003766174426e-06, + "loss": 0.7841, + "step": 39904 + }, + { + "epoch": 0.6895389825822504, + "grad_norm": 1.2709913610691905, + "learning_rate": 4.643527776737205e-06, + "loss": 0.4477, + "step": 39905 + }, + { + "epoch": 0.6895562620956593, + "grad_norm": 1.3347791690418516, + "learning_rate": 4.643055193633845e-06, + "loss": 0.3702, + "step": 39906 + }, + { + "epoch": 0.6895735416090683, + "grad_norm": 1.0172051622968203, + "learning_rate": 4.642582627308845e-06, + "loss": 0.4606, + "step": 39907 + }, + { + "epoch": 0.6895908211224772, + "grad_norm": 1.3865317154474384, + "learning_rate": 4.642110077763687e-06, + "loss": 0.3393, + "step": 39908 + }, + { + "epoch": 0.6896081006358861, + "grad_norm": 1.1253593722243265, + "learning_rate": 4.6416375449998415e-06, + "loss": 0.4098, + "step": 39909 + }, + { + "epoch": 0.689625380149295, + "grad_norm": 0.7898295201860954, + "learning_rate": 4.6411650290188005e-06, + "loss": 0.3774, + "step": 39910 + }, + { + "epoch": 0.6896426596627039, + "grad_norm": 0.8578064077242644, + "learning_rate": 4.6406925298220326e-06, + "loss": 0.2133, + "step": 39911 + }, + { + "epoch": 0.6896599391761128, + "grad_norm": 1.0507800848297857, + "learning_rate": 4.640220047411024e-06, + "loss": 0.2402, + "step": 39912 + }, + { + "epoch": 0.6896772186895217, + "grad_norm": 1.3782814320282324, + "learning_rate": 4.6397475817872565e-06, + "loss": 0.3718, + "step": 39913 + }, + { + "epoch": 0.6896944982029306, + "grad_norm": 1.2538053214445624, + "learning_rate": 4.639275132952204e-06, + "loss": 0.4887, + "step": 39914 + }, + { + "epoch": 0.6897117777163395, + "grad_norm": 1.1803079301293176, + "learning_rate": 4.638802700907352e-06, + "loss": 0.3687, + "step": 39915 + }, + { + "epoch": 0.6897290572297484, + "grad_norm": 1.0810964702480323, + "learning_rate": 4.638330285654174e-06, + "loss": 0.411, + "step": 39916 + }, + { + "epoch": 0.6897463367431573, + "grad_norm": 1.3965599074315262, + "learning_rate": 4.637857887194154e-06, + "loss": 0.2911, + "step": 39917 + }, + { + "epoch": 0.6897636162565662, + "grad_norm": 0.959408701601924, + "learning_rate": 4.637385505528769e-06, + "loss": 0.4193, + "step": 39918 + }, + { + "epoch": 0.6897808957699751, + "grad_norm": 1.033387389145112, + "learning_rate": 4.636913140659504e-06, + "loss": 0.1623, + "step": 39919 + }, + { + "epoch": 0.689798175283384, + "grad_norm": 1.344300318090567, + "learning_rate": 4.6364407925878315e-06, + "loss": 0.3976, + "step": 39920 + }, + { + "epoch": 0.689815454796793, + "grad_norm": 0.9312514382257294, + "learning_rate": 4.6359684613152386e-06, + "loss": 0.2721, + "step": 39921 + }, + { + "epoch": 0.6898327343102019, + "grad_norm": 1.3036815469486933, + "learning_rate": 4.635496146843196e-06, + "loss": 0.3746, + "step": 39922 + }, + { + "epoch": 0.6898500138236108, + "grad_norm": 1.214717905960946, + "learning_rate": 4.635023849173186e-06, + "loss": 0.2355, + "step": 39923 + }, + { + "epoch": 0.6898672933370197, + "grad_norm": 1.425064143470519, + "learning_rate": 4.634551568306695e-06, + "loss": 0.4348, + "step": 39924 + }, + { + "epoch": 0.6898845728504285, + "grad_norm": 1.4009964511138835, + "learning_rate": 4.63407930424519e-06, + "loss": 0.3756, + "step": 39925 + }, + { + "epoch": 0.6899018523638374, + "grad_norm": 2.1113017561808047, + "learning_rate": 4.633607056990161e-06, + "loss": 0.3695, + "step": 39926 + }, + { + "epoch": 0.6899191318772463, + "grad_norm": 1.610550039613566, + "learning_rate": 4.633134826543079e-06, + "loss": 0.371, + "step": 39927 + }, + { + "epoch": 0.6899364113906552, + "grad_norm": 1.2548435961787554, + "learning_rate": 4.632662612905431e-06, + "loss": 0.3727, + "step": 39928 + }, + { + "epoch": 0.6899536909040641, + "grad_norm": 1.0967736187073915, + "learning_rate": 4.632190416078687e-06, + "loss": 0.5824, + "step": 39929 + }, + { + "epoch": 0.689970970417473, + "grad_norm": 1.5874664319115683, + "learning_rate": 4.63171823606433e-06, + "loss": 0.3991, + "step": 39930 + }, + { + "epoch": 0.6899882499308819, + "grad_norm": 1.12468733667783, + "learning_rate": 4.631246072863841e-06, + "loss": 0.384, + "step": 39931 + }, + { + "epoch": 0.6900055294442908, + "grad_norm": 0.8673565983872965, + "learning_rate": 4.6307739264787e-06, + "loss": 0.3208, + "step": 39932 + }, + { + "epoch": 0.6900228089576997, + "grad_norm": 1.5465992498797905, + "learning_rate": 4.630301796910384e-06, + "loss": 0.318, + "step": 39933 + }, + { + "epoch": 0.6900400884711086, + "grad_norm": 1.4232885736117364, + "learning_rate": 4.629829684160365e-06, + "loss": 0.2666, + "step": 39934 + }, + { + "epoch": 0.6900573679845176, + "grad_norm": 0.9812085481805962, + "learning_rate": 4.62935758823013e-06, + "loss": 0.3558, + "step": 39935 + }, + { + "epoch": 0.6900746474979265, + "grad_norm": 0.9396198760709491, + "learning_rate": 4.628885509121153e-06, + "loss": 0.7754, + "step": 39936 + }, + { + "epoch": 0.6900919270113354, + "grad_norm": 1.6267102645496887, + "learning_rate": 4.628413446834921e-06, + "loss": 0.4239, + "step": 39937 + }, + { + "epoch": 0.6901092065247443, + "grad_norm": 0.9276351630218002, + "learning_rate": 4.627941401372901e-06, + "loss": 0.818, + "step": 39938 + }, + { + "epoch": 0.6901264860381532, + "grad_norm": 1.341107807353832, + "learning_rate": 4.62746937273658e-06, + "loss": 0.5144, + "step": 39939 + }, + { + "epoch": 0.6901437655515621, + "grad_norm": 1.4285038708498, + "learning_rate": 4.626997360927431e-06, + "loss": 0.4302, + "step": 39940 + }, + { + "epoch": 0.690161045064971, + "grad_norm": 1.0303692208266515, + "learning_rate": 4.626525365946933e-06, + "loss": 0.6386, + "step": 39941 + }, + { + "epoch": 0.6901783245783799, + "grad_norm": 1.0635778504226503, + "learning_rate": 4.626053387796568e-06, + "loss": 0.354, + "step": 39942 + }, + { + "epoch": 0.6901956040917888, + "grad_norm": 1.114684060728791, + "learning_rate": 4.625581426477814e-06, + "loss": 0.3815, + "step": 39943 + }, + { + "epoch": 0.6902128836051977, + "grad_norm": 1.3661843280797057, + "learning_rate": 4.625109481992148e-06, + "loss": 0.4907, + "step": 39944 + }, + { + "epoch": 0.6902301631186066, + "grad_norm": 1.003816345870563, + "learning_rate": 4.6246375543410445e-06, + "loss": 0.1638, + "step": 39945 + }, + { + "epoch": 0.6902474426320154, + "grad_norm": 1.0535726445769424, + "learning_rate": 4.624165643525984e-06, + "loss": 0.3639, + "step": 39946 + }, + { + "epoch": 0.6902647221454243, + "grad_norm": 1.2136991247397404, + "learning_rate": 4.623693749548446e-06, + "loss": 0.2141, + "step": 39947 + }, + { + "epoch": 0.6902820016588332, + "grad_norm": 1.961327213258291, + "learning_rate": 4.623221872409911e-06, + "loss": 0.455, + "step": 39948 + }, + { + "epoch": 0.6902992811722422, + "grad_norm": 0.8830542022172897, + "learning_rate": 4.622750012111849e-06, + "loss": 0.3911, + "step": 39949 + }, + { + "epoch": 0.6903165606856511, + "grad_norm": 1.485162920384358, + "learning_rate": 4.6222781686557485e-06, + "loss": 0.3677, + "step": 39950 + }, + { + "epoch": 0.69033384019906, + "grad_norm": 0.8703507839206941, + "learning_rate": 4.621806342043076e-06, + "loss": 0.3203, + "step": 39951 + }, + { + "epoch": 0.6903511197124689, + "grad_norm": 1.7169376399350254, + "learning_rate": 4.621334532275316e-06, + "loss": 0.2951, + "step": 39952 + }, + { + "epoch": 0.6903683992258778, + "grad_norm": 1.4117358324808522, + "learning_rate": 4.620862739353943e-06, + "loss": 0.3981, + "step": 39953 + }, + { + "epoch": 0.6903856787392867, + "grad_norm": 1.3333309128274746, + "learning_rate": 4.620390963280443e-06, + "loss": 0.4802, + "step": 39954 + }, + { + "epoch": 0.6904029582526956, + "grad_norm": 0.663238953253116, + "learning_rate": 4.6199192040562856e-06, + "loss": 0.8924, + "step": 39955 + }, + { + "epoch": 0.6904202377661045, + "grad_norm": 1.3211975237294669, + "learning_rate": 4.619447461682947e-06, + "loss": 0.4936, + "step": 39956 + }, + { + "epoch": 0.6904375172795134, + "grad_norm": 1.2293913821200493, + "learning_rate": 4.618975736161907e-06, + "loss": 0.341, + "step": 39957 + }, + { + "epoch": 0.6904547967929223, + "grad_norm": 0.7876508524519619, + "learning_rate": 4.618504027494645e-06, + "loss": 0.3606, + "step": 39958 + }, + { + "epoch": 0.6904720763063312, + "grad_norm": 1.2105285386335078, + "learning_rate": 4.6180323356826405e-06, + "loss": 0.3354, + "step": 39959 + }, + { + "epoch": 0.6904893558197401, + "grad_norm": 0.9593245435463517, + "learning_rate": 4.617560660727364e-06, + "loss": 0.4714, + "step": 39960 + }, + { + "epoch": 0.690506635333149, + "grad_norm": 1.9645984289597505, + "learning_rate": 4.617089002630301e-06, + "loss": 0.3563, + "step": 39961 + }, + { + "epoch": 0.690523914846558, + "grad_norm": 1.2417992006579908, + "learning_rate": 4.61661736139292e-06, + "loss": 0.4984, + "step": 39962 + }, + { + "epoch": 0.6905411943599669, + "grad_norm": 0.9237513561660248, + "learning_rate": 4.616145737016703e-06, + "loss": 0.3587, + "step": 39963 + }, + { + "epoch": 0.6905584738733758, + "grad_norm": 0.8082753447250522, + "learning_rate": 4.615674129503129e-06, + "loss": 0.1428, + "step": 39964 + }, + { + "epoch": 0.6905757533867847, + "grad_norm": 0.9324453229923795, + "learning_rate": 4.615202538853671e-06, + "loss": 0.297, + "step": 39965 + }, + { + "epoch": 0.6905930329001936, + "grad_norm": 1.549140134283411, + "learning_rate": 4.61473096506981e-06, + "loss": 0.3687, + "step": 39966 + }, + { + "epoch": 0.6906103124136024, + "grad_norm": 1.231658520109735, + "learning_rate": 4.614259408153019e-06, + "loss": 0.2787, + "step": 39967 + }, + { + "epoch": 0.6906275919270113, + "grad_norm": 1.0118716252326143, + "learning_rate": 4.613787868104779e-06, + "loss": 0.2802, + "step": 39968 + }, + { + "epoch": 0.6906448714404202, + "grad_norm": 1.2941684855529123, + "learning_rate": 4.613316344926562e-06, + "loss": 0.4137, + "step": 39969 + }, + { + "epoch": 0.6906621509538291, + "grad_norm": 1.2993778372719493, + "learning_rate": 4.612844838619846e-06, + "loss": 0.3677, + "step": 39970 + }, + { + "epoch": 0.690679430467238, + "grad_norm": 1.1099095510490276, + "learning_rate": 4.612373349186111e-06, + "loss": 0.3926, + "step": 39971 + }, + { + "epoch": 0.6906967099806469, + "grad_norm": 1.4300970648977072, + "learning_rate": 4.611901876626835e-06, + "loss": 0.4287, + "step": 39972 + }, + { + "epoch": 0.6907139894940558, + "grad_norm": 0.6108458512512623, + "learning_rate": 4.611430420943489e-06, + "loss": 0.5413, + "step": 39973 + }, + { + "epoch": 0.6907312690074647, + "grad_norm": 1.3729343311604578, + "learning_rate": 4.610958982137556e-06, + "loss": 0.3955, + "step": 39974 + }, + { + "epoch": 0.6907485485208736, + "grad_norm": 1.425371598494412, + "learning_rate": 4.6104875602105046e-06, + "loss": 0.2791, + "step": 39975 + }, + { + "epoch": 0.6907658280342825, + "grad_norm": 2.344552189227884, + "learning_rate": 4.6100161551638155e-06, + "loss": 0.2028, + "step": 39976 + }, + { + "epoch": 0.6907831075476915, + "grad_norm": 0.971032158118574, + "learning_rate": 4.60954476699897e-06, + "loss": 0.3135, + "step": 39977 + }, + { + "epoch": 0.6908003870611004, + "grad_norm": 0.5948220385553812, + "learning_rate": 4.609073395717436e-06, + "loss": 0.5378, + "step": 39978 + }, + { + "epoch": 0.6908176665745093, + "grad_norm": 1.0712386563101184, + "learning_rate": 4.608602041320698e-06, + "loss": 0.3625, + "step": 39979 + }, + { + "epoch": 0.6908349460879182, + "grad_norm": 1.4033408400729457, + "learning_rate": 4.608130703810224e-06, + "loss": 0.317, + "step": 39980 + }, + { + "epoch": 0.6908522256013271, + "grad_norm": 1.420145499696233, + "learning_rate": 4.607659383187494e-06, + "loss": 0.2256, + "step": 39981 + }, + { + "epoch": 0.690869505114736, + "grad_norm": 2.0338347010027373, + "learning_rate": 4.6071880794539844e-06, + "loss": 0.5358, + "step": 39982 + }, + { + "epoch": 0.6908867846281449, + "grad_norm": 1.3805842555988426, + "learning_rate": 4.606716792611175e-06, + "loss": 0.3906, + "step": 39983 + }, + { + "epoch": 0.6909040641415538, + "grad_norm": 1.3730594347757135, + "learning_rate": 4.606245522660536e-06, + "loss": 0.2718, + "step": 39984 + }, + { + "epoch": 0.6909213436549627, + "grad_norm": 1.0106680356996243, + "learning_rate": 4.6057742696035505e-06, + "loss": 0.3606, + "step": 39985 + }, + { + "epoch": 0.6909386231683716, + "grad_norm": 1.2423441308647238, + "learning_rate": 4.605303033441684e-06, + "loss": 0.281, + "step": 39986 + }, + { + "epoch": 0.6909559026817805, + "grad_norm": 1.0863173043236656, + "learning_rate": 4.604831814176419e-06, + "loss": 0.2983, + "step": 39987 + }, + { + "epoch": 0.6909731821951893, + "grad_norm": 2.6340600542210137, + "learning_rate": 4.604360611809236e-06, + "loss": 0.4902, + "step": 39988 + }, + { + "epoch": 0.6909904617085982, + "grad_norm": 1.3831966983145398, + "learning_rate": 4.6038894263416e-06, + "loss": 0.3557, + "step": 39989 + }, + { + "epoch": 0.6910077412220071, + "grad_norm": 2.214515812730105, + "learning_rate": 4.6034182577749965e-06, + "loss": 0.4898, + "step": 39990 + }, + { + "epoch": 0.691025020735416, + "grad_norm": 1.731679859240433, + "learning_rate": 4.602947106110894e-06, + "loss": 0.3476, + "step": 39991 + }, + { + "epoch": 0.691042300248825, + "grad_norm": 1.4689038350759505, + "learning_rate": 4.6024759713507705e-06, + "loss": 0.525, + "step": 39992 + }, + { + "epoch": 0.6910595797622339, + "grad_norm": 1.1627024643566086, + "learning_rate": 4.602004853496104e-06, + "loss": 0.2794, + "step": 39993 + }, + { + "epoch": 0.6910768592756428, + "grad_norm": 2.1084066393629604, + "learning_rate": 4.60153375254837e-06, + "loss": 0.428, + "step": 39994 + }, + { + "epoch": 0.6910941387890517, + "grad_norm": 1.291480197250614, + "learning_rate": 4.601062668509044e-06, + "loss": 0.3961, + "step": 39995 + }, + { + "epoch": 0.6911114183024606, + "grad_norm": 1.689819956689739, + "learning_rate": 4.600591601379597e-06, + "loss": 0.2591, + "step": 39996 + }, + { + "epoch": 0.6911286978158695, + "grad_norm": 1.3916272199221005, + "learning_rate": 4.600120551161506e-06, + "loss": 0.2601, + "step": 39997 + }, + { + "epoch": 0.6911459773292784, + "grad_norm": 1.4109700048046625, + "learning_rate": 4.599649517856249e-06, + "loss": 0.2899, + "step": 39998 + }, + { + "epoch": 0.6911632568426873, + "grad_norm": 0.9759639283665829, + "learning_rate": 4.599178501465304e-06, + "loss": 0.299, + "step": 39999 + }, + { + "epoch": 0.6911805363560962, + "grad_norm": 1.226604441054406, + "learning_rate": 4.598707501990137e-06, + "loss": 0.5574, + "step": 40000 + }, + { + "epoch": 0.6911978158695051, + "grad_norm": 1.2137963350020309, + "learning_rate": 4.598236519432233e-06, + "loss": 0.1888, + "step": 40001 + }, + { + "epoch": 0.691215095382914, + "grad_norm": 1.2339632027493865, + "learning_rate": 4.5977655537930585e-06, + "loss": 0.3255, + "step": 40002 + }, + { + "epoch": 0.6912323748963229, + "grad_norm": 1.2175569438137428, + "learning_rate": 4.597294605074094e-06, + "loss": 0.3931, + "step": 40003 + }, + { + "epoch": 0.6912496544097319, + "grad_norm": 1.4222794529603875, + "learning_rate": 4.596823673276813e-06, + "loss": 0.2008, + "step": 40004 + }, + { + "epoch": 0.6912669339231408, + "grad_norm": 1.124654153423893, + "learning_rate": 4.596352758402694e-06, + "loss": 0.4555, + "step": 40005 + }, + { + "epoch": 0.6912842134365497, + "grad_norm": 1.111276707593271, + "learning_rate": 4.595881860453209e-06, + "loss": 0.3121, + "step": 40006 + }, + { + "epoch": 0.6913014929499586, + "grad_norm": 1.2930775534068213, + "learning_rate": 4.5954109794298305e-06, + "loss": 0.2936, + "step": 40007 + }, + { + "epoch": 0.6913187724633675, + "grad_norm": 1.532441572478141, + "learning_rate": 4.594940115334038e-06, + "loss": 0.2323, + "step": 40008 + }, + { + "epoch": 0.6913360519767764, + "grad_norm": 1.805575585009535, + "learning_rate": 4.5944692681673e-06, + "loss": 0.3155, + "step": 40009 + }, + { + "epoch": 0.6913533314901852, + "grad_norm": 1.241353932986253, + "learning_rate": 4.593998437931096e-06, + "loss": 0.3564, + "step": 40010 + }, + { + "epoch": 0.6913706110035941, + "grad_norm": 0.829471751620131, + "learning_rate": 4.593527624626899e-06, + "loss": 0.3904, + "step": 40011 + }, + { + "epoch": 0.691387890517003, + "grad_norm": 2.000753135209071, + "learning_rate": 4.5930568282561874e-06, + "loss": 0.7015, + "step": 40012 + }, + { + "epoch": 0.6914051700304119, + "grad_norm": 1.2582088382273136, + "learning_rate": 4.59258604882043e-06, + "loss": 0.5073, + "step": 40013 + }, + { + "epoch": 0.6914224495438208, + "grad_norm": 1.387572221066136, + "learning_rate": 4.592115286321107e-06, + "loss": 0.4499, + "step": 40014 + }, + { + "epoch": 0.6914397290572297, + "grad_norm": 1.0920215468637944, + "learning_rate": 4.5916445407596865e-06, + "loss": 0.5153, + "step": 40015 + }, + { + "epoch": 0.6914570085706386, + "grad_norm": 1.0153099143431616, + "learning_rate": 4.591173812137647e-06, + "loss": 0.4612, + "step": 40016 + }, + { + "epoch": 0.6914742880840475, + "grad_norm": 1.0172477729242793, + "learning_rate": 4.590703100456465e-06, + "loss": 0.3093, + "step": 40017 + }, + { + "epoch": 0.6914915675974564, + "grad_norm": 1.1121000738350089, + "learning_rate": 4.59023240571761e-06, + "loss": 0.2934, + "step": 40018 + }, + { + "epoch": 0.6915088471108654, + "grad_norm": 1.1933280406486404, + "learning_rate": 4.589761727922559e-06, + "loss": 0.6723, + "step": 40019 + }, + { + "epoch": 0.6915261266242743, + "grad_norm": 1.1609325634250311, + "learning_rate": 4.589291067072784e-06, + "loss": 0.3851, + "step": 40020 + }, + { + "epoch": 0.6915434061376832, + "grad_norm": 1.504894884701459, + "learning_rate": 4.58882042316976e-06, + "loss": 0.2179, + "step": 40021 + }, + { + "epoch": 0.6915606856510921, + "grad_norm": 0.7442068840635561, + "learning_rate": 4.588349796214961e-06, + "loss": 0.278, + "step": 40022 + }, + { + "epoch": 0.691577965164501, + "grad_norm": 0.5530641840373085, + "learning_rate": 4.587879186209866e-06, + "loss": 0.2904, + "step": 40023 + }, + { + "epoch": 0.6915952446779099, + "grad_norm": 1.049336275794597, + "learning_rate": 4.587408593155941e-06, + "loss": 0.5765, + "step": 40024 + }, + { + "epoch": 0.6916125241913188, + "grad_norm": 1.3275169064770447, + "learning_rate": 4.586938017054668e-06, + "loss": 0.38, + "step": 40025 + }, + { + "epoch": 0.6916298037047277, + "grad_norm": 1.1412296131511472, + "learning_rate": 4.586467457907512e-06, + "loss": 0.4034, + "step": 40026 + }, + { + "epoch": 0.6916470832181366, + "grad_norm": 0.6139210813147138, + "learning_rate": 4.585996915715951e-06, + "loss": 0.8066, + "step": 40027 + }, + { + "epoch": 0.6916643627315455, + "grad_norm": 1.0747491872163366, + "learning_rate": 4.585526390481463e-06, + "loss": 0.2201, + "step": 40028 + }, + { + "epoch": 0.6916816422449544, + "grad_norm": 2.0568974352800558, + "learning_rate": 4.585055882205515e-06, + "loss": 0.3337, + "step": 40029 + }, + { + "epoch": 0.6916989217583633, + "grad_norm": 1.1231728449458918, + "learning_rate": 4.584585390889587e-06, + "loss": 0.344, + "step": 40030 + }, + { + "epoch": 0.6917162012717721, + "grad_norm": 1.3086873890990465, + "learning_rate": 4.584114916535144e-06, + "loss": 0.4211, + "step": 40031 + }, + { + "epoch": 0.691733480785181, + "grad_norm": 0.9679525645070844, + "learning_rate": 4.583644459143665e-06, + "loss": 0.3255, + "step": 40032 + }, + { + "epoch": 0.69175076029859, + "grad_norm": 1.1421481580809985, + "learning_rate": 4.583174018716624e-06, + "loss": 0.2914, + "step": 40033 + }, + { + "epoch": 0.6917680398119989, + "grad_norm": 0.8771672987393838, + "learning_rate": 4.582703595255498e-06, + "loss": 0.4157, + "step": 40034 + }, + { + "epoch": 0.6917853193254078, + "grad_norm": 1.3367123196520871, + "learning_rate": 4.582233188761751e-06, + "loss": 0.4535, + "step": 40035 + }, + { + "epoch": 0.6918025988388167, + "grad_norm": 1.214301172968457, + "learning_rate": 4.581762799236866e-06, + "loss": 0.2447, + "step": 40036 + }, + { + "epoch": 0.6918198783522256, + "grad_norm": 1.1756062498623392, + "learning_rate": 4.581292426682308e-06, + "loss": 0.381, + "step": 40037 + }, + { + "epoch": 0.6918371578656345, + "grad_norm": 0.944414402735925, + "learning_rate": 4.580822071099553e-06, + "loss": 0.2828, + "step": 40038 + }, + { + "epoch": 0.6918544373790434, + "grad_norm": 0.8631600791962376, + "learning_rate": 4.580351732490079e-06, + "loss": 0.3518, + "step": 40039 + }, + { + "epoch": 0.6918717168924523, + "grad_norm": 1.0732436235794354, + "learning_rate": 4.579881410855353e-06, + "loss": 0.4703, + "step": 40040 + }, + { + "epoch": 0.6918889964058612, + "grad_norm": 1.1016109542226364, + "learning_rate": 4.579411106196851e-06, + "loss": 0.3256, + "step": 40041 + }, + { + "epoch": 0.6919062759192701, + "grad_norm": 0.9664137075041738, + "learning_rate": 4.578940818516045e-06, + "loss": 0.2252, + "step": 40042 + }, + { + "epoch": 0.691923555432679, + "grad_norm": 0.7984115305348932, + "learning_rate": 4.578470547814406e-06, + "loss": 0.814, + "step": 40043 + }, + { + "epoch": 0.6919408349460879, + "grad_norm": 1.3060911911255295, + "learning_rate": 4.578000294093411e-06, + "loss": 0.2219, + "step": 40044 + }, + { + "epoch": 0.6919581144594968, + "grad_norm": 1.3226482188572513, + "learning_rate": 4.577530057354533e-06, + "loss": 0.4994, + "step": 40045 + }, + { + "epoch": 0.6919753939729058, + "grad_norm": 1.196948754292604, + "learning_rate": 4.577059837599245e-06, + "loss": 0.6499, + "step": 40046 + }, + { + "epoch": 0.6919926734863147, + "grad_norm": 1.149841292137698, + "learning_rate": 4.576589634829013e-06, + "loss": 0.3315, + "step": 40047 + }, + { + "epoch": 0.6920099529997236, + "grad_norm": 1.6846502776974512, + "learning_rate": 4.576119449045318e-06, + "loss": 0.3444, + "step": 40048 + }, + { + "epoch": 0.6920272325131325, + "grad_norm": 1.092101266575917, + "learning_rate": 4.575649280249626e-06, + "loss": 0.4547, + "step": 40049 + }, + { + "epoch": 0.6920445120265414, + "grad_norm": 1.2155824710375092, + "learning_rate": 4.575179128443411e-06, + "loss": 0.3084, + "step": 40050 + }, + { + "epoch": 0.6920617915399503, + "grad_norm": 0.8977171249268373, + "learning_rate": 4.574708993628149e-06, + "loss": 0.4104, + "step": 40051 + }, + { + "epoch": 0.6920790710533591, + "grad_norm": 1.3460931421629911, + "learning_rate": 4.574238875805314e-06, + "loss": 0.5387, + "step": 40052 + }, + { + "epoch": 0.692096350566768, + "grad_norm": 1.3694152531041455, + "learning_rate": 4.573768774976371e-06, + "loss": 0.3011, + "step": 40053 + }, + { + "epoch": 0.6921136300801769, + "grad_norm": 1.172588238647933, + "learning_rate": 4.5732986911428e-06, + "loss": 0.3513, + "step": 40054 + }, + { + "epoch": 0.6921309095935858, + "grad_norm": 2.148308405182004, + "learning_rate": 4.572828624306067e-06, + "loss": 0.2529, + "step": 40055 + }, + { + "epoch": 0.6921481891069947, + "grad_norm": 0.9804292672743836, + "learning_rate": 4.572358574467648e-06, + "loss": 0.4174, + "step": 40056 + }, + { + "epoch": 0.6921654686204036, + "grad_norm": 0.6636831765981372, + "learning_rate": 4.571888541629017e-06, + "loss": 0.5948, + "step": 40057 + }, + { + "epoch": 0.6921827481338125, + "grad_norm": 1.6376591499455624, + "learning_rate": 4.57141852579164e-06, + "loss": 0.3499, + "step": 40058 + }, + { + "epoch": 0.6922000276472214, + "grad_norm": 1.0619156043105102, + "learning_rate": 4.570948526956996e-06, + "loss": 0.4162, + "step": 40059 + }, + { + "epoch": 0.6922173071606303, + "grad_norm": 1.034141859169646, + "learning_rate": 4.570478545126551e-06, + "loss": 0.4729, + "step": 40060 + }, + { + "epoch": 0.6922345866740393, + "grad_norm": 0.5254039893228115, + "learning_rate": 4.570008580301779e-06, + "loss": 0.6651, + "step": 40061 + }, + { + "epoch": 0.6922518661874482, + "grad_norm": 1.18455417379035, + "learning_rate": 4.569538632484154e-06, + "loss": 0.3222, + "step": 40062 + }, + { + "epoch": 0.6922691457008571, + "grad_norm": 1.0581011119961017, + "learning_rate": 4.569068701675149e-06, + "loss": 0.3393, + "step": 40063 + }, + { + "epoch": 0.692286425214266, + "grad_norm": 1.3690380262898099, + "learning_rate": 4.568598787876231e-06, + "loss": 0.369, + "step": 40064 + }, + { + "epoch": 0.6923037047276749, + "grad_norm": 0.9635353866779426, + "learning_rate": 4.568128891088878e-06, + "loss": 0.3882, + "step": 40065 + }, + { + "epoch": 0.6923209842410838, + "grad_norm": 0.7279717075284113, + "learning_rate": 4.567659011314553e-06, + "loss": 0.472, + "step": 40066 + }, + { + "epoch": 0.6923382637544927, + "grad_norm": 1.0515830118062914, + "learning_rate": 4.567189148554734e-06, + "loss": 0.3606, + "step": 40067 + }, + { + "epoch": 0.6923555432679016, + "grad_norm": 1.2192437960380273, + "learning_rate": 4.566719302810895e-06, + "loss": 0.3364, + "step": 40068 + }, + { + "epoch": 0.6923728227813105, + "grad_norm": 1.2659647680959556, + "learning_rate": 4.566249474084501e-06, + "loss": 0.3181, + "step": 40069 + }, + { + "epoch": 0.6923901022947194, + "grad_norm": 1.197538923656247, + "learning_rate": 4.565779662377029e-06, + "loss": 0.3187, + "step": 40070 + }, + { + "epoch": 0.6924073818081283, + "grad_norm": 1.168536887762124, + "learning_rate": 4.565309867689945e-06, + "loss": 0.353, + "step": 40071 + }, + { + "epoch": 0.6924246613215372, + "grad_norm": 1.430685741671294, + "learning_rate": 4.5648400900247235e-06, + "loss": 0.3918, + "step": 40072 + }, + { + "epoch": 0.692441940834946, + "grad_norm": 0.8370183895989989, + "learning_rate": 4.564370329382835e-06, + "loss": 0.7091, + "step": 40073 + }, + { + "epoch": 0.6924592203483549, + "grad_norm": 1.0569388643052262, + "learning_rate": 4.563900585765757e-06, + "loss": 0.5296, + "step": 40074 + }, + { + "epoch": 0.6924764998617639, + "grad_norm": 1.9095332063056611, + "learning_rate": 4.563430859174951e-06, + "loss": 0.2846, + "step": 40075 + }, + { + "epoch": 0.6924937793751728, + "grad_norm": 1.1889605561581638, + "learning_rate": 4.562961149611897e-06, + "loss": 0.7525, + "step": 40076 + }, + { + "epoch": 0.6925110588885817, + "grad_norm": 1.1073650392561596, + "learning_rate": 4.562491457078057e-06, + "loss": 0.1894, + "step": 40077 + }, + { + "epoch": 0.6925283384019906, + "grad_norm": 1.033974811970172, + "learning_rate": 4.562021781574908e-06, + "loss": 0.4696, + "step": 40078 + }, + { + "epoch": 0.6925456179153995, + "grad_norm": 1.0626722942639018, + "learning_rate": 4.561552123103924e-06, + "loss": 0.3192, + "step": 40079 + }, + { + "epoch": 0.6925628974288084, + "grad_norm": 2.248912389900373, + "learning_rate": 4.561082481666568e-06, + "loss": 0.2188, + "step": 40080 + }, + { + "epoch": 0.6925801769422173, + "grad_norm": 0.9863006569425179, + "learning_rate": 4.560612857264318e-06, + "loss": 0.4195, + "step": 40081 + }, + { + "epoch": 0.6925974564556262, + "grad_norm": 1.2204768613765165, + "learning_rate": 4.560143249898639e-06, + "loss": 0.245, + "step": 40082 + }, + { + "epoch": 0.6926147359690351, + "grad_norm": 1.3398316856936894, + "learning_rate": 4.559673659571004e-06, + "loss": 0.2946, + "step": 40083 + }, + { + "epoch": 0.692632015482444, + "grad_norm": 1.0421020474725322, + "learning_rate": 4.559204086282885e-06, + "loss": 0.372, + "step": 40084 + }, + { + "epoch": 0.6926492949958529, + "grad_norm": 1.4947521191011954, + "learning_rate": 4.558734530035756e-06, + "loss": 0.3489, + "step": 40085 + }, + { + "epoch": 0.6926665745092618, + "grad_norm": 1.0051970114662447, + "learning_rate": 4.55826499083108e-06, + "loss": 0.4414, + "step": 40086 + }, + { + "epoch": 0.6926838540226707, + "grad_norm": 0.911659754030993, + "learning_rate": 4.557795468670337e-06, + "loss": 0.3934, + "step": 40087 + }, + { + "epoch": 0.6927011335360796, + "grad_norm": 1.3132113221483455, + "learning_rate": 4.55732596355499e-06, + "loss": 0.4741, + "step": 40088 + }, + { + "epoch": 0.6927184130494886, + "grad_norm": 0.8810833916393446, + "learning_rate": 4.55685647548651e-06, + "loss": 0.2767, + "step": 40089 + }, + { + "epoch": 0.6927356925628975, + "grad_norm": 1.636772731541905, + "learning_rate": 4.556387004466367e-06, + "loss": 0.4111, + "step": 40090 + }, + { + "epoch": 0.6927529720763064, + "grad_norm": 2.5293756659122995, + "learning_rate": 4.555917550496035e-06, + "loss": 0.7198, + "step": 40091 + }, + { + "epoch": 0.6927702515897153, + "grad_norm": 1.0575425706617958, + "learning_rate": 4.555448113576985e-06, + "loss": 0.5911, + "step": 40092 + }, + { + "epoch": 0.6927875311031242, + "grad_norm": 1.6888862386139913, + "learning_rate": 4.5549786937106845e-06, + "loss": 0.2168, + "step": 40093 + }, + { + "epoch": 0.692804810616533, + "grad_norm": 0.8888104088962755, + "learning_rate": 4.554509290898605e-06, + "loss": 0.3066, + "step": 40094 + }, + { + "epoch": 0.6928220901299419, + "grad_norm": 0.9124846942119083, + "learning_rate": 4.5540399051422145e-06, + "loss": 0.3572, + "step": 40095 + }, + { + "epoch": 0.6928393696433508, + "grad_norm": 0.944205557425495, + "learning_rate": 4.553570536442985e-06, + "loss": 0.496, + "step": 40096 + }, + { + "epoch": 0.6928566491567597, + "grad_norm": 1.0149631444754055, + "learning_rate": 4.553101184802386e-06, + "loss": 0.3143, + "step": 40097 + }, + { + "epoch": 0.6928739286701686, + "grad_norm": 1.2745504953531246, + "learning_rate": 4.552631850221892e-06, + "loss": 0.4221, + "step": 40098 + }, + { + "epoch": 0.6928912081835775, + "grad_norm": 1.0214129684623405, + "learning_rate": 4.552162532702968e-06, + "loss": 0.3014, + "step": 40099 + }, + { + "epoch": 0.6929084876969864, + "grad_norm": 1.0335016006194246, + "learning_rate": 4.551693232247083e-06, + "loss": 0.2983, + "step": 40100 + }, + { + "epoch": 0.6929257672103953, + "grad_norm": 1.3180116788719243, + "learning_rate": 4.551223948855707e-06, + "loss": 0.288, + "step": 40101 + }, + { + "epoch": 0.6929430467238042, + "grad_norm": 1.03832092449283, + "learning_rate": 4.550754682530313e-06, + "loss": 0.3811, + "step": 40102 + }, + { + "epoch": 0.6929603262372132, + "grad_norm": 1.291570848876464, + "learning_rate": 4.550285433272373e-06, + "loss": 0.6507, + "step": 40103 + }, + { + "epoch": 0.6929776057506221, + "grad_norm": 0.9335974022425242, + "learning_rate": 4.549816201083349e-06, + "loss": 0.452, + "step": 40104 + }, + { + "epoch": 0.692994885264031, + "grad_norm": 1.4673584371963084, + "learning_rate": 4.549346985964719e-06, + "loss": 0.2734, + "step": 40105 + }, + { + "epoch": 0.6930121647774399, + "grad_norm": 1.567474377366858, + "learning_rate": 4.548877787917945e-06, + "loss": 0.3506, + "step": 40106 + }, + { + "epoch": 0.6930294442908488, + "grad_norm": 1.2471661533111515, + "learning_rate": 4.548408606944499e-06, + "loss": 0.4425, + "step": 40107 + }, + { + "epoch": 0.6930467238042577, + "grad_norm": 1.0888065368350146, + "learning_rate": 4.547939443045856e-06, + "loss": 0.3689, + "step": 40108 + }, + { + "epoch": 0.6930640033176666, + "grad_norm": 0.8647897423305048, + "learning_rate": 4.547470296223478e-06, + "loss": 0.2911, + "step": 40109 + }, + { + "epoch": 0.6930812828310755, + "grad_norm": 1.0767527823260772, + "learning_rate": 4.54700116647884e-06, + "loss": 0.5471, + "step": 40110 + }, + { + "epoch": 0.6930985623444844, + "grad_norm": 1.2494785717790753, + "learning_rate": 4.546532053813405e-06, + "loss": 0.2867, + "step": 40111 + }, + { + "epoch": 0.6931158418578933, + "grad_norm": 1.1047431069989058, + "learning_rate": 4.5460629582286465e-06, + "loss": 0.2949, + "step": 40112 + }, + { + "epoch": 0.6931331213713022, + "grad_norm": 1.129394432446461, + "learning_rate": 4.545593879726032e-06, + "loss": 0.4508, + "step": 40113 + }, + { + "epoch": 0.6931504008847111, + "grad_norm": 1.0427829047708321, + "learning_rate": 4.5451248183070375e-06, + "loss": 0.4612, + "step": 40114 + }, + { + "epoch": 0.6931676803981199, + "grad_norm": 0.9991548256602418, + "learning_rate": 4.5446557739731225e-06, + "loss": 0.3609, + "step": 40115 + }, + { + "epoch": 0.6931849599115288, + "grad_norm": 0.5849230996759052, + "learning_rate": 4.544186746725764e-06, + "loss": 0.5903, + "step": 40116 + }, + { + "epoch": 0.6932022394249377, + "grad_norm": 0.8028093119925779, + "learning_rate": 4.543717736566423e-06, + "loss": 0.4309, + "step": 40117 + }, + { + "epoch": 0.6932195189383467, + "grad_norm": 1.5460979806427002, + "learning_rate": 4.543248743496573e-06, + "loss": 0.4604, + "step": 40118 + }, + { + "epoch": 0.6932367984517556, + "grad_norm": 1.2870717594523806, + "learning_rate": 4.542779767517687e-06, + "loss": 0.5054, + "step": 40119 + }, + { + "epoch": 0.6932540779651645, + "grad_norm": 0.913297234808352, + "learning_rate": 4.542310808631224e-06, + "loss": 0.3772, + "step": 40120 + }, + { + "epoch": 0.6932713574785734, + "grad_norm": 0.9438485644500628, + "learning_rate": 4.541841866838663e-06, + "loss": 0.417, + "step": 40121 + }, + { + "epoch": 0.6932886369919823, + "grad_norm": 1.5608782307969198, + "learning_rate": 4.541372942141465e-06, + "loss": 0.2919, + "step": 40122 + }, + { + "epoch": 0.6933059165053912, + "grad_norm": 1.4648092008312406, + "learning_rate": 4.540904034541102e-06, + "loss": 0.3981, + "step": 40123 + }, + { + "epoch": 0.6933231960188001, + "grad_norm": 1.3983781511711246, + "learning_rate": 4.540435144039041e-06, + "loss": 0.2973, + "step": 40124 + }, + { + "epoch": 0.693340475532209, + "grad_norm": 1.2126930105605669, + "learning_rate": 4.539966270636757e-06, + "loss": 0.2542, + "step": 40125 + }, + { + "epoch": 0.6933577550456179, + "grad_norm": 1.0206999831928787, + "learning_rate": 4.5394974143357084e-06, + "loss": 0.4491, + "step": 40126 + }, + { + "epoch": 0.6933750345590268, + "grad_norm": 1.0931880286743925, + "learning_rate": 4.5390285751373744e-06, + "loss": 0.3244, + "step": 40127 + }, + { + "epoch": 0.6933923140724357, + "grad_norm": 0.9890706868529854, + "learning_rate": 4.538559753043212e-06, + "loss": 0.2415, + "step": 40128 + }, + { + "epoch": 0.6934095935858446, + "grad_norm": 1.3122964401734034, + "learning_rate": 4.538090948054701e-06, + "loss": 0.4019, + "step": 40129 + }, + { + "epoch": 0.6934268730992535, + "grad_norm": 0.5468776179720717, + "learning_rate": 4.537622160173301e-06, + "loss": 0.7008, + "step": 40130 + }, + { + "epoch": 0.6934441526126625, + "grad_norm": 0.5665682683290867, + "learning_rate": 4.537153389400481e-06, + "loss": 0.7885, + "step": 40131 + }, + { + "epoch": 0.6934614321260714, + "grad_norm": 0.8316088739582481, + "learning_rate": 4.536684635737718e-06, + "loss": 0.2978, + "step": 40132 + }, + { + "epoch": 0.6934787116394803, + "grad_norm": 1.3595758487934768, + "learning_rate": 4.536215899186469e-06, + "loss": 0.2561, + "step": 40133 + }, + { + "epoch": 0.6934959911528892, + "grad_norm": 0.879824208592179, + "learning_rate": 4.535747179748211e-06, + "loss": 0.2963, + "step": 40134 + }, + { + "epoch": 0.6935132706662981, + "grad_norm": 1.2437563945170753, + "learning_rate": 4.535278477424405e-06, + "loss": 0.3871, + "step": 40135 + }, + { + "epoch": 0.693530550179707, + "grad_norm": 1.4356092525506432, + "learning_rate": 4.534809792216521e-06, + "loss": 0.56, + "step": 40136 + }, + { + "epoch": 0.6935478296931158, + "grad_norm": 1.4052253415850173, + "learning_rate": 4.53434112412603e-06, + "loss": 0.4056, + "step": 40137 + }, + { + "epoch": 0.6935651092065247, + "grad_norm": 1.1125541538421908, + "learning_rate": 4.533872473154399e-06, + "loss": 0.2414, + "step": 40138 + }, + { + "epoch": 0.6935823887199336, + "grad_norm": 1.3093663516110257, + "learning_rate": 4.533403839303097e-06, + "loss": 0.4738, + "step": 40139 + }, + { + "epoch": 0.6935996682333425, + "grad_norm": 1.2738716789391114, + "learning_rate": 4.532935222573585e-06, + "loss": 0.313, + "step": 40140 + }, + { + "epoch": 0.6936169477467514, + "grad_norm": 1.4723618353531929, + "learning_rate": 4.532466622967334e-06, + "loss": 0.2669, + "step": 40141 + }, + { + "epoch": 0.6936342272601603, + "grad_norm": 1.1874440012880216, + "learning_rate": 4.531998040485815e-06, + "loss": 0.4206, + "step": 40142 + }, + { + "epoch": 0.6936515067735692, + "grad_norm": 1.2226564007229668, + "learning_rate": 4.531529475130497e-06, + "loss": 0.3518, + "step": 40143 + }, + { + "epoch": 0.6936687862869781, + "grad_norm": 1.2811436210448812, + "learning_rate": 4.53106092690284e-06, + "loss": 0.5941, + "step": 40144 + }, + { + "epoch": 0.693686065800387, + "grad_norm": 1.0592794527904477, + "learning_rate": 4.5305923958043195e-06, + "loss": 0.3413, + "step": 40145 + }, + { + "epoch": 0.693703345313796, + "grad_norm": 1.2410670464089246, + "learning_rate": 4.5301238818363965e-06, + "loss": 0.3003, + "step": 40146 + }, + { + "epoch": 0.6937206248272049, + "grad_norm": 1.3092795153779273, + "learning_rate": 4.529655385000541e-06, + "loss": 0.4685, + "step": 40147 + }, + { + "epoch": 0.6937379043406138, + "grad_norm": 1.1244759590081501, + "learning_rate": 4.52918690529822e-06, + "loss": 0.5219, + "step": 40148 + }, + { + "epoch": 0.6937551838540227, + "grad_norm": 0.9871324321277588, + "learning_rate": 4.528718442730905e-06, + "loss": 0.3991, + "step": 40149 + }, + { + "epoch": 0.6937724633674316, + "grad_norm": 1.8350703367578243, + "learning_rate": 4.528249997300061e-06, + "loss": 0.2859, + "step": 40150 + }, + { + "epoch": 0.6937897428808405, + "grad_norm": 0.832417021538153, + "learning_rate": 4.527781569007148e-06, + "loss": 0.4301, + "step": 40151 + }, + { + "epoch": 0.6938070223942494, + "grad_norm": 1.594828124175272, + "learning_rate": 4.52731315785364e-06, + "loss": 0.3619, + "step": 40152 + }, + { + "epoch": 0.6938243019076583, + "grad_norm": 1.3418520586328104, + "learning_rate": 4.526844763841004e-06, + "loss": 0.2569, + "step": 40153 + }, + { + "epoch": 0.6938415814210672, + "grad_norm": 1.5933391206645098, + "learning_rate": 4.526376386970709e-06, + "loss": 0.3808, + "step": 40154 + }, + { + "epoch": 0.6938588609344761, + "grad_norm": 1.0538472003319332, + "learning_rate": 4.5259080272442154e-06, + "loss": 0.4674, + "step": 40155 + }, + { + "epoch": 0.693876140447885, + "grad_norm": 0.731195298898003, + "learning_rate": 4.5254396846629975e-06, + "loss": 0.2783, + "step": 40156 + }, + { + "epoch": 0.693893419961294, + "grad_norm": 0.5696938495002788, + "learning_rate": 4.524971359228515e-06, + "loss": 0.8005, + "step": 40157 + }, + { + "epoch": 0.6939106994747027, + "grad_norm": 1.0167348426659166, + "learning_rate": 4.524503050942238e-06, + "loss": 0.612, + "step": 40158 + }, + { + "epoch": 0.6939279789881116, + "grad_norm": 1.5767543006539666, + "learning_rate": 4.524034759805635e-06, + "loss": 0.4469, + "step": 40159 + }, + { + "epoch": 0.6939452585015206, + "grad_norm": 1.4542939252992715, + "learning_rate": 4.523566485820173e-06, + "loss": 0.4175, + "step": 40160 + }, + { + "epoch": 0.6939625380149295, + "grad_norm": 1.0934144504419923, + "learning_rate": 4.523098228987318e-06, + "loss": 0.3966, + "step": 40161 + }, + { + "epoch": 0.6939798175283384, + "grad_norm": 1.690113035550003, + "learning_rate": 4.522629989308531e-06, + "loss": 0.3558, + "step": 40162 + }, + { + "epoch": 0.6939970970417473, + "grad_norm": 1.0798899752184488, + "learning_rate": 4.522161766785288e-06, + "loss": 0.4517, + "step": 40163 + }, + { + "epoch": 0.6940143765551562, + "grad_norm": 1.3524422688110325, + "learning_rate": 4.5216935614190425e-06, + "loss": 0.3367, + "step": 40164 + }, + { + "epoch": 0.6940316560685651, + "grad_norm": 1.8632123974674328, + "learning_rate": 4.521225373211278e-06, + "loss": 0.5014, + "step": 40165 + }, + { + "epoch": 0.694048935581974, + "grad_norm": 1.0366345755296522, + "learning_rate": 4.520757202163446e-06, + "loss": 0.3412, + "step": 40166 + }, + { + "epoch": 0.6940662150953829, + "grad_norm": 1.0483021040944671, + "learning_rate": 4.520289048277024e-06, + "loss": 0.3305, + "step": 40167 + }, + { + "epoch": 0.6940834946087918, + "grad_norm": 1.6865188912439113, + "learning_rate": 4.519820911553469e-06, + "loss": 0.5709, + "step": 40168 + }, + { + "epoch": 0.6941007741222007, + "grad_norm": 1.1307139257231864, + "learning_rate": 4.519352791994255e-06, + "loss": 0.493, + "step": 40169 + }, + { + "epoch": 0.6941180536356096, + "grad_norm": 0.9116059799883374, + "learning_rate": 4.518884689600841e-06, + "loss": 0.194, + "step": 40170 + }, + { + "epoch": 0.6941353331490185, + "grad_norm": 1.7578361764667685, + "learning_rate": 4.518416604374697e-06, + "loss": 0.3305, + "step": 40171 + }, + { + "epoch": 0.6941526126624274, + "grad_norm": 1.5933673144597498, + "learning_rate": 4.517948536317293e-06, + "loss": 0.191, + "step": 40172 + }, + { + "epoch": 0.6941698921758364, + "grad_norm": 2.25968071138125, + "learning_rate": 4.517480485430086e-06, + "loss": 0.3334, + "step": 40173 + }, + { + "epoch": 0.6941871716892453, + "grad_norm": 1.0977513329173196, + "learning_rate": 4.51701245171455e-06, + "loss": 0.2143, + "step": 40174 + }, + { + "epoch": 0.6942044512026542, + "grad_norm": 1.1022197818116999, + "learning_rate": 4.516544435172145e-06, + "loss": 0.62, + "step": 40175 + }, + { + "epoch": 0.6942217307160631, + "grad_norm": 1.8006005130650764, + "learning_rate": 4.51607643580434e-06, + "loss": 0.3966, + "step": 40176 + }, + { + "epoch": 0.694239010229472, + "grad_norm": 1.483279680815953, + "learning_rate": 4.5156084536125984e-06, + "loss": 0.4137, + "step": 40177 + }, + { + "epoch": 0.6942562897428809, + "grad_norm": 1.3182377961998462, + "learning_rate": 4.515140488598394e-06, + "loss": 0.7076, + "step": 40178 + }, + { + "epoch": 0.6942735692562897, + "grad_norm": 0.9161882736163646, + "learning_rate": 4.514672540763181e-06, + "loss": 0.4555, + "step": 40179 + }, + { + "epoch": 0.6942908487696986, + "grad_norm": 1.7312444451364308, + "learning_rate": 4.514204610108435e-06, + "loss": 0.3173, + "step": 40180 + }, + { + "epoch": 0.6943081282831075, + "grad_norm": 1.7348365932741203, + "learning_rate": 4.513736696635613e-06, + "loss": 0.4706, + "step": 40181 + }, + { + "epoch": 0.6943254077965164, + "grad_norm": 1.1316444173976492, + "learning_rate": 4.513268800346186e-06, + "loss": 0.3167, + "step": 40182 + }, + { + "epoch": 0.6943426873099253, + "grad_norm": 1.6392053170692307, + "learning_rate": 4.51280092124162e-06, + "loss": 0.4646, + "step": 40183 + }, + { + "epoch": 0.6943599668233342, + "grad_norm": 1.3905483721941705, + "learning_rate": 4.512333059323375e-06, + "loss": 0.2929, + "step": 40184 + }, + { + "epoch": 0.6943772463367431, + "grad_norm": 1.3589660569150974, + "learning_rate": 4.511865214592924e-06, + "loss": 0.4029, + "step": 40185 + }, + { + "epoch": 0.694394525850152, + "grad_norm": 2.6595910265296103, + "learning_rate": 4.5113973870517246e-06, + "loss": 0.3788, + "step": 40186 + }, + { + "epoch": 0.694411805363561, + "grad_norm": 1.1550015273385583, + "learning_rate": 4.510929576701246e-06, + "loss": 0.4691, + "step": 40187 + }, + { + "epoch": 0.6944290848769699, + "grad_norm": 1.0838189630767303, + "learning_rate": 4.510461783542952e-06, + "loss": 0.395, + "step": 40188 + }, + { + "epoch": 0.6944463643903788, + "grad_norm": 0.8392850730339009, + "learning_rate": 4.509994007578314e-06, + "loss": 0.5756, + "step": 40189 + }, + { + "epoch": 0.6944636439037877, + "grad_norm": 0.5467913128048938, + "learning_rate": 4.509526248808788e-06, + "loss": 0.7403, + "step": 40190 + }, + { + "epoch": 0.6944809234171966, + "grad_norm": 1.0966126618106071, + "learning_rate": 4.509058507235846e-06, + "loss": 0.2677, + "step": 40191 + }, + { + "epoch": 0.6944982029306055, + "grad_norm": 0.7371808683067725, + "learning_rate": 4.5085907828609465e-06, + "loss": 0.1863, + "step": 40192 + }, + { + "epoch": 0.6945154824440144, + "grad_norm": 1.0665034384959782, + "learning_rate": 4.508123075685559e-06, + "loss": 1.0808, + "step": 40193 + }, + { + "epoch": 0.6945327619574233, + "grad_norm": 1.4147767623491967, + "learning_rate": 4.50765538571115e-06, + "loss": 0.2985, + "step": 40194 + }, + { + "epoch": 0.6945500414708322, + "grad_norm": 1.103031669417297, + "learning_rate": 4.507187712939178e-06, + "loss": 0.4318, + "step": 40195 + }, + { + "epoch": 0.6945673209842411, + "grad_norm": 0.9592460279048243, + "learning_rate": 4.506720057371115e-06, + "loss": 0.3634, + "step": 40196 + }, + { + "epoch": 0.69458460049765, + "grad_norm": 1.6111293980725154, + "learning_rate": 4.50625241900842e-06, + "loss": 0.4963, + "step": 40197 + }, + { + "epoch": 0.6946018800110589, + "grad_norm": 1.2089880179506498, + "learning_rate": 4.505784797852559e-06, + "loss": 0.5598, + "step": 40198 + }, + { + "epoch": 0.6946191595244678, + "grad_norm": 1.2751996038697737, + "learning_rate": 4.505317193904998e-06, + "loss": 0.3784, + "step": 40199 + }, + { + "epoch": 0.6946364390378766, + "grad_norm": 1.1943997475542634, + "learning_rate": 4.504849607167204e-06, + "loss": 0.2132, + "step": 40200 + }, + { + "epoch": 0.6946537185512855, + "grad_norm": 1.0857256179547188, + "learning_rate": 4.50438203764064e-06, + "loss": 0.5465, + "step": 40201 + }, + { + "epoch": 0.6946709980646945, + "grad_norm": 1.0430706840919468, + "learning_rate": 4.503914485326763e-06, + "loss": 0.3078, + "step": 40202 + }, + { + "epoch": 0.6946882775781034, + "grad_norm": 0.8681427788254088, + "learning_rate": 4.503446950227049e-06, + "loss": 0.3078, + "step": 40203 + }, + { + "epoch": 0.6947055570915123, + "grad_norm": 1.3492168784001906, + "learning_rate": 4.50297943234295e-06, + "loss": 0.2556, + "step": 40204 + }, + { + "epoch": 0.6947228366049212, + "grad_norm": 2.084334936772518, + "learning_rate": 4.502511931675944e-06, + "loss": 0.4285, + "step": 40205 + }, + { + "epoch": 0.6947401161183301, + "grad_norm": 1.2693069890645172, + "learning_rate": 4.502044448227484e-06, + "loss": 0.2089, + "step": 40206 + }, + { + "epoch": 0.694757395631739, + "grad_norm": 0.8339992003147251, + "learning_rate": 4.501576981999043e-06, + "loss": 0.3415, + "step": 40207 + }, + { + "epoch": 0.6947746751451479, + "grad_norm": 0.6303913849474695, + "learning_rate": 4.5011095329920774e-06, + "loss": 0.883, + "step": 40208 + }, + { + "epoch": 0.6947919546585568, + "grad_norm": 0.9748586611501409, + "learning_rate": 4.500642101208058e-06, + "loss": 0.446, + "step": 40209 + }, + { + "epoch": 0.6948092341719657, + "grad_norm": 1.553966994969533, + "learning_rate": 4.500174686648437e-06, + "loss": 0.4228, + "step": 40210 + }, + { + "epoch": 0.6948265136853746, + "grad_norm": 1.4972429311844608, + "learning_rate": 4.4997072893146956e-06, + "loss": 0.3594, + "step": 40211 + }, + { + "epoch": 0.6948437931987835, + "grad_norm": 1.3670207566711972, + "learning_rate": 4.499239909208289e-06, + "loss": 0.4806, + "step": 40212 + }, + { + "epoch": 0.6948610727121924, + "grad_norm": 1.0911337181371135, + "learning_rate": 4.498772546330677e-06, + "loss": 0.2331, + "step": 40213 + }, + { + "epoch": 0.6948783522256013, + "grad_norm": 1.387695486789858, + "learning_rate": 4.49830520068333e-06, + "loss": 0.3792, + "step": 40214 + }, + { + "epoch": 0.6948956317390103, + "grad_norm": 1.30827814428157, + "learning_rate": 4.497837872267707e-06, + "loss": 0.2455, + "step": 40215 + }, + { + "epoch": 0.6949129112524192, + "grad_norm": 1.4422568144338905, + "learning_rate": 4.497370561085274e-06, + "loss": 0.3601, + "step": 40216 + }, + { + "epoch": 0.6949301907658281, + "grad_norm": 0.9911664203842325, + "learning_rate": 4.496903267137493e-06, + "loss": 0.2833, + "step": 40217 + }, + { + "epoch": 0.694947470279237, + "grad_norm": 1.0298116549552896, + "learning_rate": 4.496435990425835e-06, + "loss": 0.2805, + "step": 40218 + }, + { + "epoch": 0.6949647497926459, + "grad_norm": 0.970910482427405, + "learning_rate": 4.495968730951753e-06, + "loss": 0.5272, + "step": 40219 + }, + { + "epoch": 0.6949820293060548, + "grad_norm": 1.4279179128975898, + "learning_rate": 4.495501488716719e-06, + "loss": 0.4035, + "step": 40220 + }, + { + "epoch": 0.6949993088194636, + "grad_norm": 1.5330220418638136, + "learning_rate": 4.495034263722189e-06, + "loss": 0.4149, + "step": 40221 + }, + { + "epoch": 0.6950165883328725, + "grad_norm": 1.6003608433306746, + "learning_rate": 4.49456705596963e-06, + "loss": 0.4457, + "step": 40222 + }, + { + "epoch": 0.6950338678462814, + "grad_norm": 1.2521971297862737, + "learning_rate": 4.494099865460509e-06, + "loss": 0.2814, + "step": 40223 + }, + { + "epoch": 0.6950511473596903, + "grad_norm": 1.5337156929422273, + "learning_rate": 4.493632692196281e-06, + "loss": 0.2892, + "step": 40224 + }, + { + "epoch": 0.6950684268730992, + "grad_norm": 2.738921163504218, + "learning_rate": 4.49316553617842e-06, + "loss": 0.319, + "step": 40225 + }, + { + "epoch": 0.6950857063865081, + "grad_norm": 0.8934981116923527, + "learning_rate": 4.492698397408377e-06, + "loss": 0.282, + "step": 40226 + }, + { + "epoch": 0.695102985899917, + "grad_norm": 1.7004706810560426, + "learning_rate": 4.492231275887623e-06, + "loss": 0.3749, + "step": 40227 + }, + { + "epoch": 0.6951202654133259, + "grad_norm": 1.1897485863761124, + "learning_rate": 4.491764171617617e-06, + "loss": 0.3376, + "step": 40228 + }, + { + "epoch": 0.6951375449267349, + "grad_norm": 1.619966201978437, + "learning_rate": 4.49129708459983e-06, + "loss": 0.3904, + "step": 40229 + }, + { + "epoch": 0.6951548244401438, + "grad_norm": 2.0423369898399857, + "learning_rate": 4.490830014835714e-06, + "loss": 0.5899, + "step": 40230 + }, + { + "epoch": 0.6951721039535527, + "grad_norm": 1.081423328876468, + "learning_rate": 4.490362962326742e-06, + "loss": 0.3367, + "step": 40231 + }, + { + "epoch": 0.6951893834669616, + "grad_norm": 1.0634672682860848, + "learning_rate": 4.489895927074368e-06, + "loss": 0.4404, + "step": 40232 + }, + { + "epoch": 0.6952066629803705, + "grad_norm": 0.9495196216664709, + "learning_rate": 4.489428909080058e-06, + "loss": 0.3735, + "step": 40233 + }, + { + "epoch": 0.6952239424937794, + "grad_norm": 1.041509619368444, + "learning_rate": 4.4889619083452795e-06, + "loss": 0.3704, + "step": 40234 + }, + { + "epoch": 0.6952412220071883, + "grad_norm": 1.2317544155585218, + "learning_rate": 4.488494924871489e-06, + "loss": 0.2971, + "step": 40235 + }, + { + "epoch": 0.6952585015205972, + "grad_norm": 1.032720538666974, + "learning_rate": 4.488027958660153e-06, + "loss": 0.4123, + "step": 40236 + }, + { + "epoch": 0.6952757810340061, + "grad_norm": 1.738834650132, + "learning_rate": 4.487561009712729e-06, + "loss": 0.4155, + "step": 40237 + }, + { + "epoch": 0.695293060547415, + "grad_norm": 0.796674673649482, + "learning_rate": 4.487094078030683e-06, + "loss": 0.3452, + "step": 40238 + }, + { + "epoch": 0.6953103400608239, + "grad_norm": 0.4444454077356989, + "learning_rate": 4.486627163615477e-06, + "loss": 0.5328, + "step": 40239 + }, + { + "epoch": 0.6953276195742328, + "grad_norm": 0.5084142845943657, + "learning_rate": 4.486160266468578e-06, + "loss": 0.7769, + "step": 40240 + }, + { + "epoch": 0.6953448990876417, + "grad_norm": 1.3157051179075152, + "learning_rate": 4.4856933865914395e-06, + "loss": 0.5483, + "step": 40241 + }, + { + "epoch": 0.6953621786010505, + "grad_norm": 0.8040718851607954, + "learning_rate": 4.485226523985532e-06, + "loss": 0.5699, + "step": 40242 + }, + { + "epoch": 0.6953794581144594, + "grad_norm": 1.5748056620747932, + "learning_rate": 4.484759678652315e-06, + "loss": 0.5004, + "step": 40243 + }, + { + "epoch": 0.6953967376278684, + "grad_norm": 1.252824209956982, + "learning_rate": 4.484292850593243e-06, + "loss": 0.5305, + "step": 40244 + }, + { + "epoch": 0.6954140171412773, + "grad_norm": 1.058623590527026, + "learning_rate": 4.483826039809791e-06, + "loss": 0.2665, + "step": 40245 + }, + { + "epoch": 0.6954312966546862, + "grad_norm": 1.8928874775037061, + "learning_rate": 4.483359246303411e-06, + "loss": 0.5038, + "step": 40246 + }, + { + "epoch": 0.6954485761680951, + "grad_norm": 1.3239385468872646, + "learning_rate": 4.482892470075575e-06, + "loss": 0.4011, + "step": 40247 + }, + { + "epoch": 0.695465855681504, + "grad_norm": 0.787165144603838, + "learning_rate": 4.482425711127734e-06, + "loss": 0.9875, + "step": 40248 + }, + { + "epoch": 0.6954831351949129, + "grad_norm": 1.0554919600932573, + "learning_rate": 4.481958969461359e-06, + "loss": 0.3789, + "step": 40249 + }, + { + "epoch": 0.6955004147083218, + "grad_norm": 0.6512367110632675, + "learning_rate": 4.4814922450779e-06, + "loss": 0.7998, + "step": 40250 + }, + { + "epoch": 0.6955176942217307, + "grad_norm": 1.3868877120417842, + "learning_rate": 4.4810255379788336e-06, + "loss": 0.1908, + "step": 40251 + }, + { + "epoch": 0.6955349737351396, + "grad_norm": 1.5747421602462879, + "learning_rate": 4.480558848165616e-06, + "loss": 0.3245, + "step": 40252 + }, + { + "epoch": 0.6955522532485485, + "grad_norm": 0.7337437927223885, + "learning_rate": 4.480092175639703e-06, + "loss": 0.2711, + "step": 40253 + }, + { + "epoch": 0.6955695327619574, + "grad_norm": 1.0906175198884192, + "learning_rate": 4.479625520402565e-06, + "loss": 0.3298, + "step": 40254 + }, + { + "epoch": 0.6955868122753663, + "grad_norm": 1.0641115292226084, + "learning_rate": 4.479158882455655e-06, + "loss": 0.387, + "step": 40255 + }, + { + "epoch": 0.6956040917887752, + "grad_norm": 1.7284373088183895, + "learning_rate": 4.478692261800439e-06, + "loss": 0.4092, + "step": 40256 + }, + { + "epoch": 0.6956213713021842, + "grad_norm": 0.7442265057667469, + "learning_rate": 4.478225658438379e-06, + "loss": 0.5962, + "step": 40257 + }, + { + "epoch": 0.6956386508155931, + "grad_norm": 0.8650366512822595, + "learning_rate": 4.47775907237094e-06, + "loss": 0.4042, + "step": 40258 + }, + { + "epoch": 0.695655930329002, + "grad_norm": 1.0922490869307135, + "learning_rate": 4.477292503599574e-06, + "loss": 0.4976, + "step": 40259 + }, + { + "epoch": 0.6956732098424109, + "grad_norm": 0.6902578342469778, + "learning_rate": 4.476825952125753e-06, + "loss": 0.3186, + "step": 40260 + }, + { + "epoch": 0.6956904893558198, + "grad_norm": 1.055668154605529, + "learning_rate": 4.476359417950929e-06, + "loss": 0.3317, + "step": 40261 + }, + { + "epoch": 0.6957077688692287, + "grad_norm": 1.208421557207759, + "learning_rate": 4.475892901076565e-06, + "loss": 0.3821, + "step": 40262 + }, + { + "epoch": 0.6957250483826375, + "grad_norm": 0.8969335578745936, + "learning_rate": 4.47542640150413e-06, + "loss": 0.3431, + "step": 40263 + }, + { + "epoch": 0.6957423278960464, + "grad_norm": 0.9343208321234555, + "learning_rate": 4.474959919235075e-06, + "loss": 0.3147, + "step": 40264 + }, + { + "epoch": 0.6957596074094553, + "grad_norm": 1.289903743170349, + "learning_rate": 4.4744934542708696e-06, + "loss": 0.4275, + "step": 40265 + }, + { + "epoch": 0.6957768869228642, + "grad_norm": 1.3025017991953796, + "learning_rate": 4.474027006612966e-06, + "loss": 0.281, + "step": 40266 + }, + { + "epoch": 0.6957941664362731, + "grad_norm": 1.1749674625742696, + "learning_rate": 4.473560576262831e-06, + "loss": 0.5175, + "step": 40267 + }, + { + "epoch": 0.695811445949682, + "grad_norm": 1.0454187930976526, + "learning_rate": 4.473094163221922e-06, + "loss": 0.3987, + "step": 40268 + }, + { + "epoch": 0.6958287254630909, + "grad_norm": 1.008133826193608, + "learning_rate": 4.472627767491708e-06, + "loss": 0.4176, + "step": 40269 + }, + { + "epoch": 0.6958460049764998, + "grad_norm": 1.0851945535223622, + "learning_rate": 4.472161389073641e-06, + "loss": 0.2922, + "step": 40270 + }, + { + "epoch": 0.6958632844899088, + "grad_norm": 1.1614816300967261, + "learning_rate": 4.471695027969186e-06, + "loss": 0.3453, + "step": 40271 + }, + { + "epoch": 0.6958805640033177, + "grad_norm": 1.5586978623377168, + "learning_rate": 4.471228684179799e-06, + "loss": 0.1836, + "step": 40272 + }, + { + "epoch": 0.6958978435167266, + "grad_norm": 1.5780161095757343, + "learning_rate": 4.470762357706945e-06, + "loss": 0.4462, + "step": 40273 + }, + { + "epoch": 0.6959151230301355, + "grad_norm": 1.0128688095399423, + "learning_rate": 4.470296048552086e-06, + "loss": 0.3856, + "step": 40274 + }, + { + "epoch": 0.6959324025435444, + "grad_norm": 1.3656292907980867, + "learning_rate": 4.469829756716678e-06, + "loss": 0.3006, + "step": 40275 + }, + { + "epoch": 0.6959496820569533, + "grad_norm": 1.2436415778323828, + "learning_rate": 4.469363482202186e-06, + "loss": 0.4576, + "step": 40276 + }, + { + "epoch": 0.6959669615703622, + "grad_norm": 2.1534697891145105, + "learning_rate": 4.468897225010064e-06, + "loss": 0.4194, + "step": 40277 + }, + { + "epoch": 0.6959842410837711, + "grad_norm": 1.2997723249672777, + "learning_rate": 4.468430985141776e-06, + "loss": 0.3434, + "step": 40278 + }, + { + "epoch": 0.69600152059718, + "grad_norm": 0.825520740002116, + "learning_rate": 4.467964762598785e-06, + "loss": 0.507, + "step": 40279 + }, + { + "epoch": 0.6960188001105889, + "grad_norm": 1.1281187936700487, + "learning_rate": 4.4674985573825495e-06, + "loss": 0.3507, + "step": 40280 + }, + { + "epoch": 0.6960360796239978, + "grad_norm": 0.7814458808791418, + "learning_rate": 4.467032369494526e-06, + "loss": 0.3306, + "step": 40281 + }, + { + "epoch": 0.6960533591374067, + "grad_norm": 1.115831670242159, + "learning_rate": 4.466566198936182e-06, + "loss": 0.2169, + "step": 40282 + }, + { + "epoch": 0.6960706386508156, + "grad_norm": 1.5001442216030665, + "learning_rate": 4.466100045708973e-06, + "loss": 0.3584, + "step": 40283 + }, + { + "epoch": 0.6960879181642246, + "grad_norm": 1.8478203164294247, + "learning_rate": 4.465633909814352e-06, + "loss": 0.2299, + "step": 40284 + }, + { + "epoch": 0.6961051976776333, + "grad_norm": 1.1307406979318595, + "learning_rate": 4.465167791253793e-06, + "loss": 0.5248, + "step": 40285 + }, + { + "epoch": 0.6961224771910423, + "grad_norm": 0.9299442474641371, + "learning_rate": 4.4647016900287455e-06, + "loss": 0.2436, + "step": 40286 + }, + { + "epoch": 0.6961397567044512, + "grad_norm": 1.6478605498858683, + "learning_rate": 4.464235606140676e-06, + "loss": 0.2873, + "step": 40287 + }, + { + "epoch": 0.6961570362178601, + "grad_norm": 1.3275316450025534, + "learning_rate": 4.463769539591039e-06, + "loss": 0.9404, + "step": 40288 + }, + { + "epoch": 0.696174315731269, + "grad_norm": 1.0246348017999827, + "learning_rate": 4.4633034903813e-06, + "loss": 0.3899, + "step": 40289 + }, + { + "epoch": 0.6961915952446779, + "grad_norm": 0.9091187826316072, + "learning_rate": 4.462837458512907e-06, + "loss": 0.4619, + "step": 40290 + }, + { + "epoch": 0.6962088747580868, + "grad_norm": 0.9798000010498054, + "learning_rate": 4.462371443987337e-06, + "loss": 0.3459, + "step": 40291 + }, + { + "epoch": 0.6962261542714957, + "grad_norm": 1.0620058718332797, + "learning_rate": 4.461905446806035e-06, + "loss": 0.2622, + "step": 40292 + }, + { + "epoch": 0.6962434337849046, + "grad_norm": 1.4264109607380004, + "learning_rate": 4.461439466970471e-06, + "loss": 0.3659, + "step": 40293 + }, + { + "epoch": 0.6962607132983135, + "grad_norm": 1.070450176240686, + "learning_rate": 4.460973504482098e-06, + "loss": 0.2513, + "step": 40294 + }, + { + "epoch": 0.6962779928117224, + "grad_norm": 0.9072162838446951, + "learning_rate": 4.460507559342374e-06, + "loss": 0.2955, + "step": 40295 + }, + { + "epoch": 0.6962952723251313, + "grad_norm": 1.4255324331513533, + "learning_rate": 4.460041631552761e-06, + "loss": 0.4694, + "step": 40296 + }, + { + "epoch": 0.6963125518385402, + "grad_norm": 1.27026489363663, + "learning_rate": 4.4595757211147196e-06, + "loss": 0.3083, + "step": 40297 + }, + { + "epoch": 0.6963298313519491, + "grad_norm": 1.7198102948098388, + "learning_rate": 4.459109828029712e-06, + "loss": 0.3281, + "step": 40298 + }, + { + "epoch": 0.696347110865358, + "grad_norm": 1.2547910064890295, + "learning_rate": 4.458643952299188e-06, + "loss": 0.2895, + "step": 40299 + }, + { + "epoch": 0.696364390378767, + "grad_norm": 0.949635989182808, + "learning_rate": 4.458178093924618e-06, + "loss": 0.3201, + "step": 40300 + }, + { + "epoch": 0.6963816698921759, + "grad_norm": 0.49612694031434246, + "learning_rate": 4.45771225290745e-06, + "loss": 0.5145, + "step": 40301 + }, + { + "epoch": 0.6963989494055848, + "grad_norm": 1.4035798476712407, + "learning_rate": 4.457246429249149e-06, + "loss": 0.5711, + "step": 40302 + }, + { + "epoch": 0.6964162289189937, + "grad_norm": 0.9539602231565536, + "learning_rate": 4.456780622951174e-06, + "loss": 0.383, + "step": 40303 + }, + { + "epoch": 0.6964335084324026, + "grad_norm": 0.8980765565317845, + "learning_rate": 4.456314834014987e-06, + "loss": 0.3484, + "step": 40304 + }, + { + "epoch": 0.6964507879458115, + "grad_norm": 1.0011590746776342, + "learning_rate": 4.455849062442041e-06, + "loss": 0.2832, + "step": 40305 + }, + { + "epoch": 0.6964680674592203, + "grad_norm": 1.060603269590297, + "learning_rate": 4.455383308233796e-06, + "loss": 0.312, + "step": 40306 + }, + { + "epoch": 0.6964853469726292, + "grad_norm": 1.8113519294036275, + "learning_rate": 4.454917571391711e-06, + "loss": 0.5009, + "step": 40307 + }, + { + "epoch": 0.6965026264860381, + "grad_norm": 2.2367021810075154, + "learning_rate": 4.4544518519172454e-06, + "loss": 0.5963, + "step": 40308 + }, + { + "epoch": 0.696519905999447, + "grad_norm": 1.2913056342244589, + "learning_rate": 4.453986149811862e-06, + "loss": 0.3736, + "step": 40309 + }, + { + "epoch": 0.6965371855128559, + "grad_norm": 1.3722862612004967, + "learning_rate": 4.453520465077013e-06, + "loss": 0.2157, + "step": 40310 + }, + { + "epoch": 0.6965544650262648, + "grad_norm": 1.9208590396940621, + "learning_rate": 4.453054797714161e-06, + "loss": 0.4538, + "step": 40311 + }, + { + "epoch": 0.6965717445396737, + "grad_norm": 1.3523240709181303, + "learning_rate": 4.4525891477247596e-06, + "loss": 0.3316, + "step": 40312 + }, + { + "epoch": 0.6965890240530827, + "grad_norm": 1.2062136308721694, + "learning_rate": 4.452123515110272e-06, + "loss": 0.1898, + "step": 40313 + }, + { + "epoch": 0.6966063035664916, + "grad_norm": 0.6966898150382376, + "learning_rate": 4.451657899872157e-06, + "loss": 0.3439, + "step": 40314 + }, + { + "epoch": 0.6966235830799005, + "grad_norm": 1.110430243482116, + "learning_rate": 4.451192302011869e-06, + "loss": 0.4722, + "step": 40315 + }, + { + "epoch": 0.6966408625933094, + "grad_norm": 2.662935980999314, + "learning_rate": 4.450726721530871e-06, + "loss": 0.2848, + "step": 40316 + }, + { + "epoch": 0.6966581421067183, + "grad_norm": 1.6429292875246535, + "learning_rate": 4.450261158430616e-06, + "loss": 0.3908, + "step": 40317 + }, + { + "epoch": 0.6966754216201272, + "grad_norm": 1.5206933937099245, + "learning_rate": 4.449795612712564e-06, + "loss": 0.3991, + "step": 40318 + }, + { + "epoch": 0.6966927011335361, + "grad_norm": 1.7009801578714088, + "learning_rate": 4.449330084378174e-06, + "loss": 0.4398, + "step": 40319 + }, + { + "epoch": 0.696709980646945, + "grad_norm": 1.0141119769594342, + "learning_rate": 4.448864573428907e-06, + "loss": 0.5469, + "step": 40320 + }, + { + "epoch": 0.6967272601603539, + "grad_norm": 1.2966179439344887, + "learning_rate": 4.448399079866216e-06, + "loss": 0.4422, + "step": 40321 + }, + { + "epoch": 0.6967445396737628, + "grad_norm": 0.9696341243054382, + "learning_rate": 4.447933603691563e-06, + "loss": 0.4439, + "step": 40322 + }, + { + "epoch": 0.6967618191871717, + "grad_norm": 0.6004656446285327, + "learning_rate": 4.447468144906401e-06, + "loss": 0.6525, + "step": 40323 + }, + { + "epoch": 0.6967790987005806, + "grad_norm": 1.273721704256202, + "learning_rate": 4.447002703512191e-06, + "loss": 0.3655, + "step": 40324 + }, + { + "epoch": 0.6967963782139895, + "grad_norm": 1.334129650335307, + "learning_rate": 4.446537279510395e-06, + "loss": 0.3819, + "step": 40325 + }, + { + "epoch": 0.6968136577273984, + "grad_norm": 0.8519544328983095, + "learning_rate": 4.446071872902461e-06, + "loss": 0.56, + "step": 40326 + }, + { + "epoch": 0.6968309372408072, + "grad_norm": 0.9811041077674499, + "learning_rate": 4.445606483689856e-06, + "loss": 0.2224, + "step": 40327 + }, + { + "epoch": 0.6968482167542162, + "grad_norm": 1.52289868716861, + "learning_rate": 4.445141111874031e-06, + "loss": 0.3673, + "step": 40328 + }, + { + "epoch": 0.6968654962676251, + "grad_norm": 2.0705363214818107, + "learning_rate": 4.444675757456449e-06, + "loss": 0.2424, + "step": 40329 + }, + { + "epoch": 0.696882775781034, + "grad_norm": 1.1353518232330329, + "learning_rate": 4.444210420438557e-06, + "loss": 0.3768, + "step": 40330 + }, + { + "epoch": 0.6969000552944429, + "grad_norm": 1.2316947885117595, + "learning_rate": 4.443745100821828e-06, + "loss": 0.3653, + "step": 40331 + }, + { + "epoch": 0.6969173348078518, + "grad_norm": 0.9027855371316552, + "learning_rate": 4.443279798607708e-06, + "loss": 0.4196, + "step": 40332 + }, + { + "epoch": 0.6969346143212607, + "grad_norm": 0.8827871790727142, + "learning_rate": 4.442814513797662e-06, + "loss": 0.4352, + "step": 40333 + }, + { + "epoch": 0.6969518938346696, + "grad_norm": 1.4693466785276617, + "learning_rate": 4.442349246393138e-06, + "loss": 0.3676, + "step": 40334 + }, + { + "epoch": 0.6969691733480785, + "grad_norm": 0.9388821143805531, + "learning_rate": 4.441883996395604e-06, + "loss": 0.3004, + "step": 40335 + }, + { + "epoch": 0.6969864528614874, + "grad_norm": 1.4368581526387378, + "learning_rate": 4.4414187638065074e-06, + "loss": 0.4216, + "step": 40336 + }, + { + "epoch": 0.6970037323748963, + "grad_norm": 1.2632610466399103, + "learning_rate": 4.44095354862731e-06, + "loss": 0.3934, + "step": 40337 + }, + { + "epoch": 0.6970210118883052, + "grad_norm": 1.1415648934574958, + "learning_rate": 4.440488350859472e-06, + "loss": 0.455, + "step": 40338 + }, + { + "epoch": 0.6970382914017141, + "grad_norm": 2.0495650947337865, + "learning_rate": 4.440023170504443e-06, + "loss": 0.2446, + "step": 40339 + }, + { + "epoch": 0.697055570915123, + "grad_norm": 1.4048891941919077, + "learning_rate": 4.439558007563688e-06, + "loss": 0.2923, + "step": 40340 + }, + { + "epoch": 0.697072850428532, + "grad_norm": 1.2668413289186664, + "learning_rate": 4.439092862038656e-06, + "loss": 0.3685, + "step": 40341 + }, + { + "epoch": 0.6970901299419409, + "grad_norm": 1.6044119055159907, + "learning_rate": 4.438627733930808e-06, + "loss": 0.4214, + "step": 40342 + }, + { + "epoch": 0.6971074094553498, + "grad_norm": 1.1612126134949483, + "learning_rate": 4.438162623241601e-06, + "loss": 0.3935, + "step": 40343 + }, + { + "epoch": 0.6971246889687587, + "grad_norm": 1.299779607483072, + "learning_rate": 4.437697529972494e-06, + "loss": 0.4718, + "step": 40344 + }, + { + "epoch": 0.6971419684821676, + "grad_norm": 1.2171485279548204, + "learning_rate": 4.4372324541249414e-06, + "loss": 0.3682, + "step": 40345 + }, + { + "epoch": 0.6971592479955765, + "grad_norm": 0.9856080682071523, + "learning_rate": 4.436767395700396e-06, + "loss": 0.3917, + "step": 40346 + }, + { + "epoch": 0.6971765275089854, + "grad_norm": 1.3038675066432976, + "learning_rate": 4.4363023547003184e-06, + "loss": 0.483, + "step": 40347 + }, + { + "epoch": 0.6971938070223942, + "grad_norm": 0.9899102642095233, + "learning_rate": 4.435837331126164e-06, + "loss": 0.4065, + "step": 40348 + }, + { + "epoch": 0.6972110865358031, + "grad_norm": 0.46676580962727937, + "learning_rate": 4.435372324979394e-06, + "loss": 0.6182, + "step": 40349 + }, + { + "epoch": 0.697228366049212, + "grad_norm": 0.5580374883936544, + "learning_rate": 4.434907336261456e-06, + "loss": 0.6708, + "step": 40350 + }, + { + "epoch": 0.6972456455626209, + "grad_norm": 1.0547032176615643, + "learning_rate": 4.434442364973817e-06, + "loss": 0.5619, + "step": 40351 + }, + { + "epoch": 0.6972629250760298, + "grad_norm": 0.5768601196145157, + "learning_rate": 4.433977411117921e-06, + "loss": 0.7407, + "step": 40352 + }, + { + "epoch": 0.6972802045894387, + "grad_norm": 1.0289353809187545, + "learning_rate": 4.433512474695233e-06, + "loss": 0.3318, + "step": 40353 + }, + { + "epoch": 0.6972974841028476, + "grad_norm": 1.094995885019039, + "learning_rate": 4.433047555707206e-06, + "loss": 0.2739, + "step": 40354 + }, + { + "epoch": 0.6973147636162565, + "grad_norm": 1.0212267483057424, + "learning_rate": 4.432582654155301e-06, + "loss": 0.2353, + "step": 40355 + }, + { + "epoch": 0.6973320431296655, + "grad_norm": 2.1738742852039254, + "learning_rate": 4.43211777004097e-06, + "loss": 0.5121, + "step": 40356 + }, + { + "epoch": 0.6973493226430744, + "grad_norm": 1.205696971403236, + "learning_rate": 4.431652903365666e-06, + "loss": 0.1657, + "step": 40357 + }, + { + "epoch": 0.6973666021564833, + "grad_norm": 1.5061446550496431, + "learning_rate": 4.431188054130848e-06, + "loss": 0.2774, + "step": 40358 + }, + { + "epoch": 0.6973838816698922, + "grad_norm": 1.1626456396426619, + "learning_rate": 4.4307232223379724e-06, + "loss": 0.2919, + "step": 40359 + }, + { + "epoch": 0.6974011611833011, + "grad_norm": 1.3410470601446314, + "learning_rate": 4.4302584079884995e-06, + "loss": 0.3333, + "step": 40360 + }, + { + "epoch": 0.69741844069671, + "grad_norm": 0.9756975590208028, + "learning_rate": 4.429793611083876e-06, + "loss": 0.7021, + "step": 40361 + }, + { + "epoch": 0.6974357202101189, + "grad_norm": 1.2603324054154665, + "learning_rate": 4.429328831625565e-06, + "loss": 0.266, + "step": 40362 + }, + { + "epoch": 0.6974529997235278, + "grad_norm": 1.440188468167263, + "learning_rate": 4.428864069615016e-06, + "loss": 0.4027, + "step": 40363 + }, + { + "epoch": 0.6974702792369367, + "grad_norm": 0.981775362359755, + "learning_rate": 4.42839932505369e-06, + "loss": 0.3852, + "step": 40364 + }, + { + "epoch": 0.6974875587503456, + "grad_norm": 1.2178045006087517, + "learning_rate": 4.427934597943039e-06, + "loss": 0.4714, + "step": 40365 + }, + { + "epoch": 0.6975048382637545, + "grad_norm": 1.7618053050774038, + "learning_rate": 4.427469888284525e-06, + "loss": 0.4672, + "step": 40366 + }, + { + "epoch": 0.6975221177771634, + "grad_norm": 1.1111607754291535, + "learning_rate": 4.427005196079598e-06, + "loss": 0.3867, + "step": 40367 + }, + { + "epoch": 0.6975393972905723, + "grad_norm": 0.795344533376673, + "learning_rate": 4.426540521329711e-06, + "loss": 0.2403, + "step": 40368 + }, + { + "epoch": 0.6975566768039811, + "grad_norm": 0.8111900717786865, + "learning_rate": 4.426075864036325e-06, + "loss": 0.4031, + "step": 40369 + }, + { + "epoch": 0.69757395631739, + "grad_norm": 0.9400323313840461, + "learning_rate": 4.425611224200888e-06, + "loss": 0.3549, + "step": 40370 + }, + { + "epoch": 0.697591235830799, + "grad_norm": 1.1850583012313707, + "learning_rate": 4.425146601824865e-06, + "loss": 0.434, + "step": 40371 + }, + { + "epoch": 0.6976085153442079, + "grad_norm": 1.1804293872593405, + "learning_rate": 4.4246819969097054e-06, + "loss": 0.3143, + "step": 40372 + }, + { + "epoch": 0.6976257948576168, + "grad_norm": 1.3012212046188099, + "learning_rate": 4.424217409456868e-06, + "loss": 0.4225, + "step": 40373 + }, + { + "epoch": 0.6976430743710257, + "grad_norm": 0.6898323738397586, + "learning_rate": 4.423752839467802e-06, + "loss": 0.4902, + "step": 40374 + }, + { + "epoch": 0.6976603538844346, + "grad_norm": 1.3062507788467064, + "learning_rate": 4.42328828694397e-06, + "loss": 0.5287, + "step": 40375 + }, + { + "epoch": 0.6976776333978435, + "grad_norm": 0.9390111578108613, + "learning_rate": 4.422823751886818e-06, + "loss": 0.3429, + "step": 40376 + }, + { + "epoch": 0.6976949129112524, + "grad_norm": 1.451184235404113, + "learning_rate": 4.422359234297806e-06, + "loss": 0.2259, + "step": 40377 + }, + { + "epoch": 0.6977121924246613, + "grad_norm": 0.9717225929969999, + "learning_rate": 4.421894734178393e-06, + "loss": 0.2188, + "step": 40378 + }, + { + "epoch": 0.6977294719380702, + "grad_norm": 1.1961594465225842, + "learning_rate": 4.421430251530025e-06, + "loss": 0.3209, + "step": 40379 + }, + { + "epoch": 0.6977467514514791, + "grad_norm": 1.1441159610320581, + "learning_rate": 4.420965786354166e-06, + "loss": 0.227, + "step": 40380 + }, + { + "epoch": 0.697764030964888, + "grad_norm": 0.8529842914966818, + "learning_rate": 4.4205013386522635e-06, + "loss": 0.5324, + "step": 40381 + }, + { + "epoch": 0.697781310478297, + "grad_norm": 1.2538430493856645, + "learning_rate": 4.4200369084257735e-06, + "loss": 0.5332, + "step": 40382 + }, + { + "epoch": 0.6977985899917059, + "grad_norm": 1.61254859673096, + "learning_rate": 4.419572495676152e-06, + "loss": 0.2322, + "step": 40383 + }, + { + "epoch": 0.6978158695051148, + "grad_norm": 0.7847699795354635, + "learning_rate": 4.419108100404857e-06, + "loss": 0.3958, + "step": 40384 + }, + { + "epoch": 0.6978331490185237, + "grad_norm": 0.7927664908090488, + "learning_rate": 4.418643722613337e-06, + "loss": 0.3775, + "step": 40385 + }, + { + "epoch": 0.6978504285319326, + "grad_norm": 1.345290331734273, + "learning_rate": 4.418179362303052e-06, + "loss": 0.2651, + "step": 40386 + }, + { + "epoch": 0.6978677080453415, + "grad_norm": 0.9461812184281628, + "learning_rate": 4.41771501947545e-06, + "loss": 0.3071, + "step": 40387 + }, + { + "epoch": 0.6978849875587504, + "grad_norm": 1.3131913354927371, + "learning_rate": 4.417250694131988e-06, + "loss": 0.4547, + "step": 40388 + }, + { + "epoch": 0.6979022670721593, + "grad_norm": 0.8379202867738099, + "learning_rate": 4.4167863862741254e-06, + "loss": 0.6089, + "step": 40389 + }, + { + "epoch": 0.6979195465855681, + "grad_norm": 1.4360817159027817, + "learning_rate": 4.4163220959033096e-06, + "loss": 0.2327, + "step": 40390 + }, + { + "epoch": 0.697936826098977, + "grad_norm": 0.9428859825671535, + "learning_rate": 4.415857823020999e-06, + "loss": 0.2657, + "step": 40391 + }, + { + "epoch": 0.6979541056123859, + "grad_norm": 1.3113372666764078, + "learning_rate": 4.415393567628643e-06, + "loss": 0.4215, + "step": 40392 + }, + { + "epoch": 0.6979713851257948, + "grad_norm": 1.6274411621499427, + "learning_rate": 4.414929329727699e-06, + "loss": 0.4279, + "step": 40393 + }, + { + "epoch": 0.6979886646392037, + "grad_norm": 1.0041615280593366, + "learning_rate": 4.414465109319621e-06, + "loss": 0.7022, + "step": 40394 + }, + { + "epoch": 0.6980059441526126, + "grad_norm": 1.2898266326211172, + "learning_rate": 4.414000906405866e-06, + "loss": 0.4918, + "step": 40395 + }, + { + "epoch": 0.6980232236660215, + "grad_norm": 1.558729856923366, + "learning_rate": 4.413536720987881e-06, + "loss": 0.2585, + "step": 40396 + }, + { + "epoch": 0.6980405031794304, + "grad_norm": 1.9487276910641531, + "learning_rate": 4.413072553067128e-06, + "loss": 0.3607, + "step": 40397 + }, + { + "epoch": 0.6980577826928394, + "grad_norm": 0.9325952600078146, + "learning_rate": 4.4126084026450525e-06, + "loss": 0.2836, + "step": 40398 + }, + { + "epoch": 0.6980750622062483, + "grad_norm": 1.1325010286411856, + "learning_rate": 4.412144269723111e-06, + "loss": 0.5404, + "step": 40399 + }, + { + "epoch": 0.6980923417196572, + "grad_norm": 1.3361079336285036, + "learning_rate": 4.411680154302763e-06, + "loss": 0.3566, + "step": 40400 + }, + { + "epoch": 0.6981096212330661, + "grad_norm": 1.3318910237875505, + "learning_rate": 4.411216056385453e-06, + "loss": 0.2328, + "step": 40401 + }, + { + "epoch": 0.698126900746475, + "grad_norm": 0.8953739457208431, + "learning_rate": 4.410751975972644e-06, + "loss": 0.1857, + "step": 40402 + }, + { + "epoch": 0.6981441802598839, + "grad_norm": 2.0375874631776028, + "learning_rate": 4.410287913065779e-06, + "loss": 0.535, + "step": 40403 + }, + { + "epoch": 0.6981614597732928, + "grad_norm": 1.431847500003675, + "learning_rate": 4.4098238676663184e-06, + "loss": 0.5386, + "step": 40404 + }, + { + "epoch": 0.6981787392867017, + "grad_norm": 1.0315267597730078, + "learning_rate": 4.409359839775715e-06, + "loss": 0.3037, + "step": 40405 + }, + { + "epoch": 0.6981960188001106, + "grad_norm": 1.3572534621709056, + "learning_rate": 4.408895829395424e-06, + "loss": 0.5032, + "step": 40406 + }, + { + "epoch": 0.6982132983135195, + "grad_norm": 1.041549524205302, + "learning_rate": 4.408431836526897e-06, + "loss": 0.3689, + "step": 40407 + }, + { + "epoch": 0.6982305778269284, + "grad_norm": 0.6966076637132155, + "learning_rate": 4.407967861171581e-06, + "loss": 0.6146, + "step": 40408 + }, + { + "epoch": 0.6982478573403373, + "grad_norm": 0.682354646116684, + "learning_rate": 4.40750390333094e-06, + "loss": 0.2898, + "step": 40409 + }, + { + "epoch": 0.6982651368537462, + "grad_norm": 1.3834727341281692, + "learning_rate": 4.407039963006414e-06, + "loss": 0.5471, + "step": 40410 + }, + { + "epoch": 0.698282416367155, + "grad_norm": 1.2936793701872642, + "learning_rate": 4.406576040199471e-06, + "loss": 0.4742, + "step": 40411 + }, + { + "epoch": 0.698299695880564, + "grad_norm": 1.134533134310814, + "learning_rate": 4.406112134911553e-06, + "loss": 0.3162, + "step": 40412 + }, + { + "epoch": 0.6983169753939729, + "grad_norm": 1.6514564341663076, + "learning_rate": 4.405648247144121e-06, + "loss": 0.4239, + "step": 40413 + }, + { + "epoch": 0.6983342549073818, + "grad_norm": 1.2489837926420704, + "learning_rate": 4.4051843768986205e-06, + "loss": 0.4064, + "step": 40414 + }, + { + "epoch": 0.6983515344207907, + "grad_norm": 1.0631378251543702, + "learning_rate": 4.404720524176511e-06, + "loss": 0.2626, + "step": 40415 + }, + { + "epoch": 0.6983688139341996, + "grad_norm": 1.8585171026528235, + "learning_rate": 4.404256688979235e-06, + "loss": 0.453, + "step": 40416 + }, + { + "epoch": 0.6983860934476085, + "grad_norm": 0.7946337937875051, + "learning_rate": 4.403792871308259e-06, + "loss": 0.382, + "step": 40417 + }, + { + "epoch": 0.6984033729610174, + "grad_norm": 0.9008417358201005, + "learning_rate": 4.403329071165029e-06, + "loss": 0.317, + "step": 40418 + }, + { + "epoch": 0.6984206524744263, + "grad_norm": 1.2998800010928069, + "learning_rate": 4.402865288550995e-06, + "loss": 0.3723, + "step": 40419 + }, + { + "epoch": 0.6984379319878352, + "grad_norm": 1.8109641716458404, + "learning_rate": 4.402401523467616e-06, + "loss": 0.3124, + "step": 40420 + }, + { + "epoch": 0.6984552115012441, + "grad_norm": 1.979132089285863, + "learning_rate": 4.401937775916335e-06, + "loss": 0.3105, + "step": 40421 + }, + { + "epoch": 0.698472491014653, + "grad_norm": 1.7258295738396943, + "learning_rate": 4.401474045898613e-06, + "loss": 0.3107, + "step": 40422 + }, + { + "epoch": 0.6984897705280619, + "grad_norm": 1.8754832941544517, + "learning_rate": 4.401010333415898e-06, + "loss": 0.4029, + "step": 40423 + }, + { + "epoch": 0.6985070500414708, + "grad_norm": 0.5387390337730906, + "learning_rate": 4.400546638469648e-06, + "loss": 0.1941, + "step": 40424 + }, + { + "epoch": 0.6985243295548798, + "grad_norm": 0.9860667556195932, + "learning_rate": 4.400082961061308e-06, + "loss": 0.4309, + "step": 40425 + }, + { + "epoch": 0.6985416090682887, + "grad_norm": 1.3746288766652484, + "learning_rate": 4.399619301192338e-06, + "loss": 0.476, + "step": 40426 + }, + { + "epoch": 0.6985588885816976, + "grad_norm": 1.1819217981051966, + "learning_rate": 4.399155658864183e-06, + "loss": 0.2159, + "step": 40427 + }, + { + "epoch": 0.6985761680951065, + "grad_norm": 1.051193151993018, + "learning_rate": 4.398692034078296e-06, + "loss": 0.2993, + "step": 40428 + }, + { + "epoch": 0.6985934476085154, + "grad_norm": 1.939184401686633, + "learning_rate": 4.398228426836137e-06, + "loss": 0.6011, + "step": 40429 + }, + { + "epoch": 0.6986107271219243, + "grad_norm": 1.7473185636611706, + "learning_rate": 4.397764837139147e-06, + "loss": 0.4807, + "step": 40430 + }, + { + "epoch": 0.6986280066353332, + "grad_norm": 1.0014237440422153, + "learning_rate": 4.397301264988787e-06, + "loss": 0.2913, + "step": 40431 + }, + { + "epoch": 0.6986452861487421, + "grad_norm": 1.1945027549528089, + "learning_rate": 4.396837710386503e-06, + "loss": 0.1631, + "step": 40432 + }, + { + "epoch": 0.6986625656621509, + "grad_norm": 1.2560536545601793, + "learning_rate": 4.396374173333747e-06, + "loss": 0.4133, + "step": 40433 + }, + { + "epoch": 0.6986798451755598, + "grad_norm": 1.3135209342050744, + "learning_rate": 4.395910653831975e-06, + "loss": 0.6345, + "step": 40434 + }, + { + "epoch": 0.6986971246889687, + "grad_norm": 1.3602283936929616, + "learning_rate": 4.395447151882639e-06, + "loss": 0.2935, + "step": 40435 + }, + { + "epoch": 0.6987144042023776, + "grad_norm": 1.2449187848247947, + "learning_rate": 4.394983667487186e-06, + "loss": 0.5365, + "step": 40436 + }, + { + "epoch": 0.6987316837157865, + "grad_norm": 1.895397530638967, + "learning_rate": 4.394520200647072e-06, + "loss": 0.3229, + "step": 40437 + }, + { + "epoch": 0.6987489632291954, + "grad_norm": 1.7423608640680108, + "learning_rate": 4.394056751363744e-06, + "loss": 0.2245, + "step": 40438 + }, + { + "epoch": 0.6987662427426043, + "grad_norm": 1.0459391845437955, + "learning_rate": 4.3935933196386575e-06, + "loss": 0.283, + "step": 40439 + }, + { + "epoch": 0.6987835222560133, + "grad_norm": 1.1004029425353814, + "learning_rate": 4.393129905473266e-06, + "loss": 0.3268, + "step": 40440 + }, + { + "epoch": 0.6988008017694222, + "grad_norm": 1.375976275419837, + "learning_rate": 4.392666508869012e-06, + "loss": 0.2805, + "step": 40441 + }, + { + "epoch": 0.6988180812828311, + "grad_norm": 0.9729539021706125, + "learning_rate": 4.392203129827358e-06, + "loss": 0.4458, + "step": 40442 + }, + { + "epoch": 0.69883536079624, + "grad_norm": 1.4719584865241133, + "learning_rate": 4.391739768349747e-06, + "loss": 0.3532, + "step": 40443 + }, + { + "epoch": 0.6988526403096489, + "grad_norm": 1.241027954920534, + "learning_rate": 4.391276424437631e-06, + "loss": 0.2766, + "step": 40444 + }, + { + "epoch": 0.6988699198230578, + "grad_norm": 2.2571976049005844, + "learning_rate": 4.390813098092466e-06, + "loss": 0.3021, + "step": 40445 + }, + { + "epoch": 0.6988871993364667, + "grad_norm": 1.6142981713014637, + "learning_rate": 4.390349789315703e-06, + "loss": 0.4959, + "step": 40446 + }, + { + "epoch": 0.6989044788498756, + "grad_norm": 1.3320326083841618, + "learning_rate": 4.389886498108787e-06, + "loss": 0.3594, + "step": 40447 + }, + { + "epoch": 0.6989217583632845, + "grad_norm": 1.65572881859098, + "learning_rate": 4.389423224473177e-06, + "loss": 0.4945, + "step": 40448 + }, + { + "epoch": 0.6989390378766934, + "grad_norm": 1.2947325860150107, + "learning_rate": 4.38895996841032e-06, + "loss": 0.3728, + "step": 40449 + }, + { + "epoch": 0.6989563173901023, + "grad_norm": 1.8821279002827094, + "learning_rate": 4.388496729921658e-06, + "loss": 0.4266, + "step": 40450 + }, + { + "epoch": 0.6989735969035112, + "grad_norm": 1.5526635902169645, + "learning_rate": 4.38803350900866e-06, + "loss": 0.7646, + "step": 40451 + }, + { + "epoch": 0.6989908764169201, + "grad_norm": 1.218436291558645, + "learning_rate": 4.387570305672764e-06, + "loss": 0.3639, + "step": 40452 + }, + { + "epoch": 0.6990081559303291, + "grad_norm": 1.243610906621391, + "learning_rate": 4.387107119915426e-06, + "loss": 0.4105, + "step": 40453 + }, + { + "epoch": 0.6990254354437379, + "grad_norm": 2.144635906356251, + "learning_rate": 4.386643951738092e-06, + "loss": 0.3313, + "step": 40454 + }, + { + "epoch": 0.6990427149571468, + "grad_norm": 1.4528420396850004, + "learning_rate": 4.38618080114222e-06, + "loss": 0.3685, + "step": 40455 + }, + { + "epoch": 0.6990599944705557, + "grad_norm": 0.9431533953044213, + "learning_rate": 4.385717668129249e-06, + "loss": 0.2999, + "step": 40456 + }, + { + "epoch": 0.6990772739839646, + "grad_norm": 1.4211473619308868, + "learning_rate": 4.385254552700644e-06, + "loss": 0.2596, + "step": 40457 + }, + { + "epoch": 0.6990945534973735, + "grad_norm": 1.2927358771689492, + "learning_rate": 4.384791454857848e-06, + "loss": 0.4408, + "step": 40458 + }, + { + "epoch": 0.6991118330107824, + "grad_norm": 1.2853026998274635, + "learning_rate": 4.38432837460231e-06, + "loss": 0.3564, + "step": 40459 + }, + { + "epoch": 0.6991291125241913, + "grad_norm": 0.9950388186631439, + "learning_rate": 4.383865311935485e-06, + "loss": 0.2645, + "step": 40460 + }, + { + "epoch": 0.6991463920376002, + "grad_norm": 0.9662360873129364, + "learning_rate": 4.383402266858816e-06, + "loss": 0.4074, + "step": 40461 + }, + { + "epoch": 0.6991636715510091, + "grad_norm": 1.150434396488926, + "learning_rate": 4.382939239373759e-06, + "loss": 0.4527, + "step": 40462 + }, + { + "epoch": 0.699180951064418, + "grad_norm": 1.470466142322755, + "learning_rate": 4.3824762294817636e-06, + "loss": 0.2988, + "step": 40463 + }, + { + "epoch": 0.6991982305778269, + "grad_norm": 1.6203529438199011, + "learning_rate": 4.3820132371842815e-06, + "loss": 0.335, + "step": 40464 + }, + { + "epoch": 0.6992155100912358, + "grad_norm": 1.391538303567942, + "learning_rate": 4.381550262482759e-06, + "loss": 0.4562, + "step": 40465 + }, + { + "epoch": 0.6992327896046447, + "grad_norm": 1.2930887803738536, + "learning_rate": 4.38108730537865e-06, + "loss": 0.3993, + "step": 40466 + }, + { + "epoch": 0.6992500691180537, + "grad_norm": 1.0975939866100242, + "learning_rate": 4.3806243658734005e-06, + "loss": 0.5361, + "step": 40467 + }, + { + "epoch": 0.6992673486314626, + "grad_norm": 0.9653527556686063, + "learning_rate": 4.380161443968463e-06, + "loss": 0.4518, + "step": 40468 + }, + { + "epoch": 0.6992846281448715, + "grad_norm": 1.121185040540162, + "learning_rate": 4.379698539665289e-06, + "loss": 0.4073, + "step": 40469 + }, + { + "epoch": 0.6993019076582804, + "grad_norm": 1.2100359132551988, + "learning_rate": 4.379235652965323e-06, + "loss": 0.21, + "step": 40470 + }, + { + "epoch": 0.6993191871716893, + "grad_norm": 1.0032679422719446, + "learning_rate": 4.378772783870023e-06, + "loss": 0.4279, + "step": 40471 + }, + { + "epoch": 0.6993364666850982, + "grad_norm": 1.218635978978697, + "learning_rate": 4.378309932380828e-06, + "loss": 0.3938, + "step": 40472 + }, + { + "epoch": 0.6993537461985071, + "grad_norm": 1.0950489248535202, + "learning_rate": 4.377847098499195e-06, + "loss": 0.3816, + "step": 40473 + }, + { + "epoch": 0.699371025711916, + "grad_norm": 1.1938463202886518, + "learning_rate": 4.377384282226571e-06, + "loss": 0.2947, + "step": 40474 + }, + { + "epoch": 0.6993883052253248, + "grad_norm": 1.4234481880914844, + "learning_rate": 4.376921483564412e-06, + "loss": 0.5005, + "step": 40475 + }, + { + "epoch": 0.6994055847387337, + "grad_norm": 1.4706676924090705, + "learning_rate": 4.376458702514158e-06, + "loss": 0.5937, + "step": 40476 + }, + { + "epoch": 0.6994228642521426, + "grad_norm": 1.1312342469464594, + "learning_rate": 4.375995939077265e-06, + "loss": 0.3727, + "step": 40477 + }, + { + "epoch": 0.6994401437655515, + "grad_norm": 1.074936466324376, + "learning_rate": 4.375533193255178e-06, + "loss": 0.2503, + "step": 40478 + }, + { + "epoch": 0.6994574232789604, + "grad_norm": 0.936442556293198, + "learning_rate": 4.375070465049348e-06, + "loss": 0.3077, + "step": 40479 + }, + { + "epoch": 0.6994747027923693, + "grad_norm": 1.6823954905141545, + "learning_rate": 4.3746077544612285e-06, + "loss": 0.2862, + "step": 40480 + }, + { + "epoch": 0.6994919823057782, + "grad_norm": 1.1323485921636927, + "learning_rate": 4.374145061492262e-06, + "loss": 0.3833, + "step": 40481 + }, + { + "epoch": 0.6995092618191872, + "grad_norm": 2.6108483374009355, + "learning_rate": 4.373682386143904e-06, + "loss": 0.33, + "step": 40482 + }, + { + "epoch": 0.6995265413325961, + "grad_norm": 1.462831269781344, + "learning_rate": 4.373219728417597e-06, + "loss": 0.4045, + "step": 40483 + }, + { + "epoch": 0.699543820846005, + "grad_norm": 1.0022945284829106, + "learning_rate": 4.372757088314793e-06, + "loss": 0.4727, + "step": 40484 + }, + { + "epoch": 0.6995611003594139, + "grad_norm": 1.8615983489778245, + "learning_rate": 4.372294465836941e-06, + "loss": 0.4019, + "step": 40485 + }, + { + "epoch": 0.6995783798728228, + "grad_norm": 1.2617531327443612, + "learning_rate": 4.371831860985495e-06, + "loss": 0.4272, + "step": 40486 + }, + { + "epoch": 0.6995956593862317, + "grad_norm": 0.834213998287928, + "learning_rate": 4.371369273761894e-06, + "loss": 0.3749, + "step": 40487 + }, + { + "epoch": 0.6996129388996406, + "grad_norm": 0.9539768512976957, + "learning_rate": 4.370906704167598e-06, + "loss": 0.4432, + "step": 40488 + }, + { + "epoch": 0.6996302184130495, + "grad_norm": 0.8145604903623342, + "learning_rate": 4.370444152204048e-06, + "loss": 0.2874, + "step": 40489 + }, + { + "epoch": 0.6996474979264584, + "grad_norm": 0.8823421656290713, + "learning_rate": 4.36998161787269e-06, + "loss": 0.4344, + "step": 40490 + }, + { + "epoch": 0.6996647774398673, + "grad_norm": 1.0591550232353806, + "learning_rate": 4.369519101174983e-06, + "loss": 0.3914, + "step": 40491 + }, + { + "epoch": 0.6996820569532762, + "grad_norm": 1.2442061616249496, + "learning_rate": 4.369056602112366e-06, + "loss": 0.3235, + "step": 40492 + }, + { + "epoch": 0.6996993364666851, + "grad_norm": 1.2419451015165417, + "learning_rate": 4.3685941206862956e-06, + "loss": 0.4564, + "step": 40493 + }, + { + "epoch": 0.699716615980094, + "grad_norm": 0.6868591515362489, + "learning_rate": 4.3681316568982134e-06, + "loss": 0.6081, + "step": 40494 + }, + { + "epoch": 0.699733895493503, + "grad_norm": 1.2931067378006376, + "learning_rate": 4.367669210749575e-06, + "loss": 0.2794, + "step": 40495 + }, + { + "epoch": 0.6997511750069118, + "grad_norm": 1.2517211839055613, + "learning_rate": 4.367206782241815e-06, + "loss": 0.3301, + "step": 40496 + }, + { + "epoch": 0.6997684545203207, + "grad_norm": 1.573788843978135, + "learning_rate": 4.366744371376401e-06, + "loss": 0.6457, + "step": 40497 + }, + { + "epoch": 0.6997857340337296, + "grad_norm": 0.9210918243820444, + "learning_rate": 4.366281978154767e-06, + "loss": 0.3961, + "step": 40498 + }, + { + "epoch": 0.6998030135471385, + "grad_norm": 1.0861554778909002, + "learning_rate": 4.3658196025783706e-06, + "loss": 0.3275, + "step": 40499 + }, + { + "epoch": 0.6998202930605474, + "grad_norm": 1.8720833159852803, + "learning_rate": 4.3653572446486545e-06, + "loss": 0.1658, + "step": 40500 + }, + { + "epoch": 0.6998375725739563, + "grad_norm": 1.3694193222901563, + "learning_rate": 4.364894904367064e-06, + "loss": 0.3941, + "step": 40501 + }, + { + "epoch": 0.6998548520873652, + "grad_norm": 2.0534770555475252, + "learning_rate": 4.364432581735052e-06, + "loss": 0.6148, + "step": 40502 + }, + { + "epoch": 0.6998721316007741, + "grad_norm": 0.8942460853058884, + "learning_rate": 4.363970276754064e-06, + "loss": 0.402, + "step": 40503 + }, + { + "epoch": 0.699889411114183, + "grad_norm": 1.421649270639764, + "learning_rate": 4.3635079894255535e-06, + "loss": 0.2852, + "step": 40504 + }, + { + "epoch": 0.6999066906275919, + "grad_norm": 1.3734470573491764, + "learning_rate": 4.3630457197509615e-06, + "loss": 0.4694, + "step": 40505 + }, + { + "epoch": 0.6999239701410008, + "grad_norm": 1.6939985161807858, + "learning_rate": 4.362583467731741e-06, + "loss": 0.5048, + "step": 40506 + }, + { + "epoch": 0.6999412496544097, + "grad_norm": 1.069769812790643, + "learning_rate": 4.3621212333693356e-06, + "loss": 0.2582, + "step": 40507 + }, + { + "epoch": 0.6999585291678186, + "grad_norm": 1.0532380800858736, + "learning_rate": 4.361659016665194e-06, + "loss": 0.4575, + "step": 40508 + }, + { + "epoch": 0.6999758086812276, + "grad_norm": 1.3923761182997085, + "learning_rate": 4.3611968176207655e-06, + "loss": 0.3344, + "step": 40509 + }, + { + "epoch": 0.6999930881946365, + "grad_norm": 1.805360075191238, + "learning_rate": 4.360734636237499e-06, + "loss": 0.3282, + "step": 40510 + }, + { + "epoch": 0.7000103677080454, + "grad_norm": 1.4484793495274944, + "learning_rate": 4.360272472516842e-06, + "loss": 0.2378, + "step": 40511 + }, + { + "epoch": 0.7000276472214543, + "grad_norm": 1.4724937115292178, + "learning_rate": 4.359810326460235e-06, + "loss": 0.4825, + "step": 40512 + }, + { + "epoch": 0.7000449267348632, + "grad_norm": 1.4151758612003704, + "learning_rate": 4.359348198069132e-06, + "loss": 0.5155, + "step": 40513 + }, + { + "epoch": 0.7000622062482721, + "grad_norm": 0.9246241259461861, + "learning_rate": 4.358886087344979e-06, + "loss": 0.4518, + "step": 40514 + }, + { + "epoch": 0.700079485761681, + "grad_norm": 1.151001428860941, + "learning_rate": 4.358423994289227e-06, + "loss": 0.2137, + "step": 40515 + }, + { + "epoch": 0.7000967652750899, + "grad_norm": 1.3292719954607395, + "learning_rate": 4.357961918903316e-06, + "loss": 0.3402, + "step": 40516 + }, + { + "epoch": 0.7001140447884987, + "grad_norm": 1.3380303975392802, + "learning_rate": 4.357499861188701e-06, + "loss": 0.5571, + "step": 40517 + }, + { + "epoch": 0.7001313243019076, + "grad_norm": 1.303573647059639, + "learning_rate": 4.357037821146821e-06, + "loss": 0.1974, + "step": 40518 + }, + { + "epoch": 0.7001486038153165, + "grad_norm": 1.2592177826788684, + "learning_rate": 4.356575798779128e-06, + "loss": 0.4453, + "step": 40519 + }, + { + "epoch": 0.7001658833287254, + "grad_norm": 1.5416647633127596, + "learning_rate": 4.356113794087071e-06, + "loss": 0.4353, + "step": 40520 + }, + { + "epoch": 0.7001831628421343, + "grad_norm": 1.3419149852760115, + "learning_rate": 4.3556518070720925e-06, + "loss": 0.4094, + "step": 40521 + }, + { + "epoch": 0.7002004423555432, + "grad_norm": 1.0940958295536591, + "learning_rate": 4.355189837735644e-06, + "loss": 0.4437, + "step": 40522 + }, + { + "epoch": 0.7002177218689521, + "grad_norm": 1.5164675039518536, + "learning_rate": 4.354727886079167e-06, + "loss": 0.3405, + "step": 40523 + }, + { + "epoch": 0.700235001382361, + "grad_norm": 1.5451702289987874, + "learning_rate": 4.354265952104111e-06, + "loss": 0.2225, + "step": 40524 + }, + { + "epoch": 0.70025228089577, + "grad_norm": 1.0544254823061625, + "learning_rate": 4.353804035811923e-06, + "loss": 0.4553, + "step": 40525 + }, + { + "epoch": 0.7002695604091789, + "grad_norm": 1.401818546515447, + "learning_rate": 4.353342137204053e-06, + "loss": 0.3451, + "step": 40526 + }, + { + "epoch": 0.7002868399225878, + "grad_norm": 1.231830332643025, + "learning_rate": 4.3528802562819415e-06, + "loss": 0.3767, + "step": 40527 + }, + { + "epoch": 0.7003041194359967, + "grad_norm": 1.4765303967285843, + "learning_rate": 4.352418393047042e-06, + "loss": 0.3982, + "step": 40528 + }, + { + "epoch": 0.7003213989494056, + "grad_norm": 1.181898973550531, + "learning_rate": 4.351956547500793e-06, + "loss": 0.4709, + "step": 40529 + }, + { + "epoch": 0.7003386784628145, + "grad_norm": 1.1718630794755822, + "learning_rate": 4.351494719644645e-06, + "loss": 0.5004, + "step": 40530 + }, + { + "epoch": 0.7003559579762234, + "grad_norm": 0.9370836808659956, + "learning_rate": 4.351032909480049e-06, + "loss": 0.3357, + "step": 40531 + }, + { + "epoch": 0.7003732374896323, + "grad_norm": 0.5817793785538785, + "learning_rate": 4.350571117008445e-06, + "loss": 0.6477, + "step": 40532 + }, + { + "epoch": 0.7003905170030412, + "grad_norm": 1.9593996139681682, + "learning_rate": 4.3501093422312835e-06, + "loss": 0.4547, + "step": 40533 + }, + { + "epoch": 0.7004077965164501, + "grad_norm": 1.4155730908368167, + "learning_rate": 4.349647585150006e-06, + "loss": 0.4235, + "step": 40534 + }, + { + "epoch": 0.700425076029859, + "grad_norm": 1.9804304828101391, + "learning_rate": 4.349185845766065e-06, + "loss": 0.4822, + "step": 40535 + }, + { + "epoch": 0.700442355543268, + "grad_norm": 1.6350070905876655, + "learning_rate": 4.348724124080896e-06, + "loss": 0.322, + "step": 40536 + }, + { + "epoch": 0.7004596350566769, + "grad_norm": 0.7992559274720884, + "learning_rate": 4.34826242009596e-06, + "loss": 0.3226, + "step": 40537 + }, + { + "epoch": 0.7004769145700857, + "grad_norm": 0.8076608410300761, + "learning_rate": 4.347800733812693e-06, + "loss": 0.5055, + "step": 40538 + }, + { + "epoch": 0.7004941940834946, + "grad_norm": 1.6488049147155994, + "learning_rate": 4.347339065232546e-06, + "loss": 0.2731, + "step": 40539 + }, + { + "epoch": 0.7005114735969035, + "grad_norm": 1.0048566969849888, + "learning_rate": 4.3468774143569594e-06, + "loss": 0.3838, + "step": 40540 + }, + { + "epoch": 0.7005287531103124, + "grad_norm": 1.2358982666941514, + "learning_rate": 4.346415781187386e-06, + "loss": 0.2895, + "step": 40541 + }, + { + "epoch": 0.7005460326237213, + "grad_norm": 1.1739100635585664, + "learning_rate": 4.3459541657252635e-06, + "loss": 0.3026, + "step": 40542 + }, + { + "epoch": 0.7005633121371302, + "grad_norm": 1.5550200915861805, + "learning_rate": 4.345492567972043e-06, + "loss": 0.4317, + "step": 40543 + }, + { + "epoch": 0.7005805916505391, + "grad_norm": 1.8343620433342338, + "learning_rate": 4.345030987929173e-06, + "loss": 0.5822, + "step": 40544 + }, + { + "epoch": 0.700597871163948, + "grad_norm": 1.3350259196269307, + "learning_rate": 4.344569425598093e-06, + "loss": 0.2898, + "step": 40545 + }, + { + "epoch": 0.7006151506773569, + "grad_norm": 0.8219333983906283, + "learning_rate": 4.344107880980254e-06, + "loss": 0.6725, + "step": 40546 + }, + { + "epoch": 0.7006324301907658, + "grad_norm": 0.8055153846189842, + "learning_rate": 4.343646354077096e-06, + "loss": 0.6359, + "step": 40547 + }, + { + "epoch": 0.7006497097041747, + "grad_norm": 1.032756241105753, + "learning_rate": 4.343184844890066e-06, + "loss": 0.8056, + "step": 40548 + }, + { + "epoch": 0.7006669892175836, + "grad_norm": 0.7904175480046627, + "learning_rate": 4.342723353420612e-06, + "loss": 0.243, + "step": 40549 + }, + { + "epoch": 0.7006842687309925, + "grad_norm": 1.0637613855628776, + "learning_rate": 4.342261879670182e-06, + "loss": 0.2236, + "step": 40550 + }, + { + "epoch": 0.7007015482444015, + "grad_norm": 1.3260632053727572, + "learning_rate": 4.341800423640218e-06, + "loss": 0.4657, + "step": 40551 + }, + { + "epoch": 0.7007188277578104, + "grad_norm": 1.1601722996939696, + "learning_rate": 4.341338985332162e-06, + "loss": 0.4205, + "step": 40552 + }, + { + "epoch": 0.7007361072712193, + "grad_norm": 0.9809622444977483, + "learning_rate": 4.340877564747461e-06, + "loss": 0.3869, + "step": 40553 + }, + { + "epoch": 0.7007533867846282, + "grad_norm": 1.0343456733826026, + "learning_rate": 4.340416161887561e-06, + "loss": 0.3824, + "step": 40554 + }, + { + "epoch": 0.7007706662980371, + "grad_norm": 0.5752700569259398, + "learning_rate": 4.339954776753913e-06, + "loss": 0.1892, + "step": 40555 + }, + { + "epoch": 0.700787945811446, + "grad_norm": 1.9935632254334315, + "learning_rate": 4.339493409347953e-06, + "loss": 0.3877, + "step": 40556 + }, + { + "epoch": 0.7008052253248549, + "grad_norm": 1.0294487614959693, + "learning_rate": 4.339032059671134e-06, + "loss": 0.3608, + "step": 40557 + }, + { + "epoch": 0.7008225048382638, + "grad_norm": 1.5592464352131126, + "learning_rate": 4.338570727724891e-06, + "loss": 0.3999, + "step": 40558 + }, + { + "epoch": 0.7008397843516727, + "grad_norm": 1.0388566148904463, + "learning_rate": 4.338109413510676e-06, + "loss": 0.3634, + "step": 40559 + }, + { + "epoch": 0.7008570638650815, + "grad_norm": 1.940989609160205, + "learning_rate": 4.337648117029933e-06, + "loss": 0.4235, + "step": 40560 + }, + { + "epoch": 0.7008743433784904, + "grad_norm": 1.0169474876050812, + "learning_rate": 4.33718683828411e-06, + "loss": 0.4411, + "step": 40561 + }, + { + "epoch": 0.7008916228918993, + "grad_norm": 1.0704767572668188, + "learning_rate": 4.336725577274647e-06, + "loss": 0.3786, + "step": 40562 + }, + { + "epoch": 0.7009089024053082, + "grad_norm": 0.9690058085481578, + "learning_rate": 4.336264334002987e-06, + "loss": 0.4713, + "step": 40563 + }, + { + "epoch": 0.7009261819187171, + "grad_norm": 1.3107552207401878, + "learning_rate": 4.3358031084705775e-06, + "loss": 0.3339, + "step": 40564 + }, + { + "epoch": 0.700943461432126, + "grad_norm": 1.2106687835553744, + "learning_rate": 4.335341900678864e-06, + "loss": 0.3886, + "step": 40565 + }, + { + "epoch": 0.700960740945535, + "grad_norm": 1.171216392341455, + "learning_rate": 4.3348807106292924e-06, + "loss": 0.3947, + "step": 40566 + }, + { + "epoch": 0.7009780204589439, + "grad_norm": 1.5174049455745195, + "learning_rate": 4.334419538323302e-06, + "loss": 0.2419, + "step": 40567 + }, + { + "epoch": 0.7009952999723528, + "grad_norm": 0.9832119141463861, + "learning_rate": 4.333958383762345e-06, + "loss": 0.2519, + "step": 40568 + }, + { + "epoch": 0.7010125794857617, + "grad_norm": 1.3153504341259916, + "learning_rate": 4.333497246947856e-06, + "loss": 0.2917, + "step": 40569 + }, + { + "epoch": 0.7010298589991706, + "grad_norm": 1.1197308932668173, + "learning_rate": 4.333036127881284e-06, + "loss": 0.2597, + "step": 40570 + }, + { + "epoch": 0.7010471385125795, + "grad_norm": 1.1683880722532263, + "learning_rate": 4.332575026564073e-06, + "loss": 0.3589, + "step": 40571 + }, + { + "epoch": 0.7010644180259884, + "grad_norm": 1.82475004573514, + "learning_rate": 4.332113942997672e-06, + "loss": 0.6073, + "step": 40572 + }, + { + "epoch": 0.7010816975393973, + "grad_norm": 1.0020485791429632, + "learning_rate": 4.331652877183521e-06, + "loss": 0.3044, + "step": 40573 + }, + { + "epoch": 0.7010989770528062, + "grad_norm": 0.7208492309132258, + "learning_rate": 4.33119182912306e-06, + "loss": 0.1516, + "step": 40574 + }, + { + "epoch": 0.7011162565662151, + "grad_norm": 1.5319849088584574, + "learning_rate": 4.330730798817742e-06, + "loss": 0.4313, + "step": 40575 + }, + { + "epoch": 0.701133536079624, + "grad_norm": 2.4004398222729444, + "learning_rate": 4.330269786268998e-06, + "loss": 0.446, + "step": 40576 + }, + { + "epoch": 0.7011508155930329, + "grad_norm": 1.2557658917278374, + "learning_rate": 4.329808791478286e-06, + "loss": 0.2262, + "step": 40577 + }, + { + "epoch": 0.7011680951064418, + "grad_norm": 1.1835151633122334, + "learning_rate": 4.329347814447041e-06, + "loss": 0.3565, + "step": 40578 + }, + { + "epoch": 0.7011853746198508, + "grad_norm": 2.0465446274177603, + "learning_rate": 4.3288868551767136e-06, + "loss": 0.3822, + "step": 40579 + }, + { + "epoch": 0.7012026541332597, + "grad_norm": 0.9503159937397505, + "learning_rate": 4.32842591366874e-06, + "loss": 0.3351, + "step": 40580 + }, + { + "epoch": 0.7012199336466685, + "grad_norm": 1.5945592316070756, + "learning_rate": 4.32796498992457e-06, + "loss": 0.2772, + "step": 40581 + }, + { + "epoch": 0.7012372131600774, + "grad_norm": 1.0805866726909865, + "learning_rate": 4.327504083945643e-06, + "loss": 0.3366, + "step": 40582 + }, + { + "epoch": 0.7012544926734863, + "grad_norm": 0.8578922097588048, + "learning_rate": 4.327043195733402e-06, + "loss": 0.6611, + "step": 40583 + }, + { + "epoch": 0.7012717721868952, + "grad_norm": 1.968285585593288, + "learning_rate": 4.326582325289298e-06, + "loss": 0.4161, + "step": 40584 + }, + { + "epoch": 0.7012890517003041, + "grad_norm": 1.785167769527586, + "learning_rate": 4.326121472614765e-06, + "loss": 0.4219, + "step": 40585 + }, + { + "epoch": 0.701306331213713, + "grad_norm": 1.1030315219155007, + "learning_rate": 4.3256606377112554e-06, + "loss": 0.4746, + "step": 40586 + }, + { + "epoch": 0.7013236107271219, + "grad_norm": 1.0737072901222606, + "learning_rate": 4.3251998205802035e-06, + "loss": 0.7085, + "step": 40587 + }, + { + "epoch": 0.7013408902405308, + "grad_norm": 0.7956616606228493, + "learning_rate": 4.324739021223057e-06, + "loss": 0.3109, + "step": 40588 + }, + { + "epoch": 0.7013581697539397, + "grad_norm": 1.2803736593566963, + "learning_rate": 4.324278239641259e-06, + "loss": 0.3253, + "step": 40589 + }, + { + "epoch": 0.7013754492673486, + "grad_norm": 1.0630173097765594, + "learning_rate": 4.323817475836257e-06, + "loss": 0.6417, + "step": 40590 + }, + { + "epoch": 0.7013927287807575, + "grad_norm": 1.1848912205155149, + "learning_rate": 4.323356729809486e-06, + "loss": 0.3616, + "step": 40591 + }, + { + "epoch": 0.7014100082941664, + "grad_norm": 0.6433023274648589, + "learning_rate": 4.322896001562397e-06, + "loss": 0.2905, + "step": 40592 + }, + { + "epoch": 0.7014272878075754, + "grad_norm": 1.2217057924291528, + "learning_rate": 4.322435291096425e-06, + "loss": 0.3392, + "step": 40593 + }, + { + "epoch": 0.7014445673209843, + "grad_norm": 0.8214257681182452, + "learning_rate": 4.321974598413018e-06, + "loss": 0.5674, + "step": 40594 + }, + { + "epoch": 0.7014618468343932, + "grad_norm": 0.8365508718363983, + "learning_rate": 4.321513923513622e-06, + "loss": 0.6478, + "step": 40595 + }, + { + "epoch": 0.7014791263478021, + "grad_norm": 0.8462691873777268, + "learning_rate": 4.3210532663996705e-06, + "loss": 0.3547, + "step": 40596 + }, + { + "epoch": 0.701496405861211, + "grad_norm": 1.1121252911735378, + "learning_rate": 4.320592627072617e-06, + "loss": 0.4232, + "step": 40597 + }, + { + "epoch": 0.7015136853746199, + "grad_norm": 1.119668818006473, + "learning_rate": 4.3201320055338945e-06, + "loss": 0.4376, + "step": 40598 + }, + { + "epoch": 0.7015309648880288, + "grad_norm": 0.57341929614706, + "learning_rate": 4.319671401784952e-06, + "loss": 0.5517, + "step": 40599 + }, + { + "epoch": 0.7015482444014377, + "grad_norm": 1.116342152493681, + "learning_rate": 4.3192108158272285e-06, + "loss": 0.3499, + "step": 40600 + }, + { + "epoch": 0.7015655239148466, + "grad_norm": 1.6108756225065637, + "learning_rate": 4.318750247662172e-06, + "loss": 0.3945, + "step": 40601 + }, + { + "epoch": 0.7015828034282554, + "grad_norm": 1.3327820724000516, + "learning_rate": 4.318289697291219e-06, + "loss": 0.3157, + "step": 40602 + }, + { + "epoch": 0.7016000829416643, + "grad_norm": 0.9812249398396801, + "learning_rate": 4.317829164715818e-06, + "loss": 0.6291, + "step": 40603 + }, + { + "epoch": 0.7016173624550732, + "grad_norm": 1.0317102710765271, + "learning_rate": 4.317368649937403e-06, + "loss": 0.285, + "step": 40604 + }, + { + "epoch": 0.7016346419684821, + "grad_norm": 2.2439312632817896, + "learning_rate": 4.316908152957422e-06, + "loss": 0.3033, + "step": 40605 + }, + { + "epoch": 0.701651921481891, + "grad_norm": 1.436026501383125, + "learning_rate": 4.31644767377732e-06, + "loss": 0.4631, + "step": 40606 + }, + { + "epoch": 0.7016692009953, + "grad_norm": 0.7788328600481988, + "learning_rate": 4.315987212398533e-06, + "loss": 0.3654, + "step": 40607 + }, + { + "epoch": 0.7016864805087089, + "grad_norm": 0.9520068519730154, + "learning_rate": 4.315526768822509e-06, + "loss": 0.4808, + "step": 40608 + }, + { + "epoch": 0.7017037600221178, + "grad_norm": 0.7041696715757305, + "learning_rate": 4.315066343050683e-06, + "loss": 0.8538, + "step": 40609 + }, + { + "epoch": 0.7017210395355267, + "grad_norm": 1.5815324589023945, + "learning_rate": 4.314605935084503e-06, + "loss": 0.2653, + "step": 40610 + }, + { + "epoch": 0.7017383190489356, + "grad_norm": 1.4089007252910002, + "learning_rate": 4.314145544925408e-06, + "loss": 0.531, + "step": 40611 + }, + { + "epoch": 0.7017555985623445, + "grad_norm": 1.189670326658396, + "learning_rate": 4.313685172574844e-06, + "loss": 0.5007, + "step": 40612 + }, + { + "epoch": 0.7017728780757534, + "grad_norm": 1.5854347842132956, + "learning_rate": 4.313224818034252e-06, + "loss": 0.51, + "step": 40613 + }, + { + "epoch": 0.7017901575891623, + "grad_norm": 1.5447670528419661, + "learning_rate": 4.312764481305067e-06, + "loss": 0.2387, + "step": 40614 + }, + { + "epoch": 0.7018074371025712, + "grad_norm": 1.1117127824415651, + "learning_rate": 4.3123041623887405e-06, + "loss": 0.356, + "step": 40615 + }, + { + "epoch": 0.7018247166159801, + "grad_norm": 0.9361357356043203, + "learning_rate": 4.311843861286702e-06, + "loss": 0.3581, + "step": 40616 + }, + { + "epoch": 0.701841996129389, + "grad_norm": 0.9772808818474419, + "learning_rate": 4.311383578000408e-06, + "loss": 0.2999, + "step": 40617 + }, + { + "epoch": 0.7018592756427979, + "grad_norm": 1.2305545299657457, + "learning_rate": 4.310923312531289e-06, + "loss": 0.4377, + "step": 40618 + }, + { + "epoch": 0.7018765551562068, + "grad_norm": 2.4333762860976456, + "learning_rate": 4.310463064880794e-06, + "loss": 0.3672, + "step": 40619 + }, + { + "epoch": 0.7018938346696157, + "grad_norm": 1.8223500390505454, + "learning_rate": 4.310002835050358e-06, + "loss": 0.382, + "step": 40620 + }, + { + "epoch": 0.7019111141830247, + "grad_norm": 1.2420157263824219, + "learning_rate": 4.309542623041429e-06, + "loss": 0.2427, + "step": 40621 + }, + { + "epoch": 0.7019283936964336, + "grad_norm": 0.9886958101584042, + "learning_rate": 4.3090824288554425e-06, + "loss": 0.3308, + "step": 40622 + }, + { + "epoch": 0.7019456732098424, + "grad_norm": 1.3038955720364849, + "learning_rate": 4.308622252493842e-06, + "loss": 0.3447, + "step": 40623 + }, + { + "epoch": 0.7019629527232513, + "grad_norm": 0.956867842522109, + "learning_rate": 4.3081620939580715e-06, + "loss": 0.4166, + "step": 40624 + }, + { + "epoch": 0.7019802322366602, + "grad_norm": 0.5076544816826358, + "learning_rate": 4.3077019532495665e-06, + "loss": 0.4342, + "step": 40625 + }, + { + "epoch": 0.7019975117500691, + "grad_norm": 1.2121157922966597, + "learning_rate": 4.307241830369776e-06, + "loss": 0.5231, + "step": 40626 + }, + { + "epoch": 0.702014791263478, + "grad_norm": 1.762253251396033, + "learning_rate": 4.3067817253201315e-06, + "loss": 0.4666, + "step": 40627 + }, + { + "epoch": 0.7020320707768869, + "grad_norm": 0.9297825481504767, + "learning_rate": 4.3063216381020815e-06, + "loss": 0.4052, + "step": 40628 + }, + { + "epoch": 0.7020493502902958, + "grad_norm": 0.7533037369834485, + "learning_rate": 4.3058615687170634e-06, + "loss": 0.3394, + "step": 40629 + }, + { + "epoch": 0.7020666298037047, + "grad_norm": 1.1450194717960305, + "learning_rate": 4.305401517166523e-06, + "loss": 0.5376, + "step": 40630 + }, + { + "epoch": 0.7020839093171136, + "grad_norm": 1.5075741204418807, + "learning_rate": 4.304941483451895e-06, + "loss": 0.3536, + "step": 40631 + }, + { + "epoch": 0.7021011888305225, + "grad_norm": 1.3189362370209319, + "learning_rate": 4.304481467574627e-06, + "loss": 0.3323, + "step": 40632 + }, + { + "epoch": 0.7021184683439314, + "grad_norm": 1.0235366971408655, + "learning_rate": 4.304021469536151e-06, + "loss": 0.2213, + "step": 40633 + }, + { + "epoch": 0.7021357478573403, + "grad_norm": 1.008972504389391, + "learning_rate": 4.303561489337914e-06, + "loss": 0.3581, + "step": 40634 + }, + { + "epoch": 0.7021530273707492, + "grad_norm": 1.2013396506429337, + "learning_rate": 4.303101526981358e-06, + "loss": 0.3795, + "step": 40635 + }, + { + "epoch": 0.7021703068841582, + "grad_norm": 1.1083248908027963, + "learning_rate": 4.302641582467918e-06, + "loss": 0.3948, + "step": 40636 + }, + { + "epoch": 0.7021875863975671, + "grad_norm": 1.3411940887559397, + "learning_rate": 4.302181655799041e-06, + "loss": 0.4384, + "step": 40637 + }, + { + "epoch": 0.702204865910976, + "grad_norm": 1.034438832284785, + "learning_rate": 4.301721746976161e-06, + "loss": 0.1981, + "step": 40638 + }, + { + "epoch": 0.7022221454243849, + "grad_norm": 0.9035042426546301, + "learning_rate": 4.30126185600072e-06, + "loss": 0.4276, + "step": 40639 + }, + { + "epoch": 0.7022394249377938, + "grad_norm": 1.2098569946713604, + "learning_rate": 4.300801982874161e-06, + "loss": 0.3847, + "step": 40640 + }, + { + "epoch": 0.7022567044512027, + "grad_norm": 0.7121577362212322, + "learning_rate": 4.300342127597927e-06, + "loss": 0.6835, + "step": 40641 + }, + { + "epoch": 0.7022739839646116, + "grad_norm": 1.498851884307417, + "learning_rate": 4.29988229017345e-06, + "loss": 0.429, + "step": 40642 + }, + { + "epoch": 0.7022912634780205, + "grad_norm": 1.3261745308953428, + "learning_rate": 4.299422470602179e-06, + "loss": 0.2471, + "step": 40643 + }, + { + "epoch": 0.7023085429914293, + "grad_norm": 0.8453892405187127, + "learning_rate": 4.298962668885547e-06, + "loss": 0.4517, + "step": 40644 + }, + { + "epoch": 0.7023258225048382, + "grad_norm": 1.031597527826793, + "learning_rate": 4.298502885024997e-06, + "loss": 0.4208, + "step": 40645 + }, + { + "epoch": 0.7023431020182471, + "grad_norm": 1.2828500020134965, + "learning_rate": 4.298043119021973e-06, + "loss": 0.32, + "step": 40646 + }, + { + "epoch": 0.702360381531656, + "grad_norm": 0.7677431636587966, + "learning_rate": 4.297583370877907e-06, + "loss": 0.4692, + "step": 40647 + }, + { + "epoch": 0.7023776610450649, + "grad_norm": 0.6934242575741361, + "learning_rate": 4.297123640594247e-06, + "loss": 0.2676, + "step": 40648 + }, + { + "epoch": 0.7023949405584738, + "grad_norm": 1.4612805897514027, + "learning_rate": 4.2966639281724266e-06, + "loss": 0.2859, + "step": 40649 + }, + { + "epoch": 0.7024122200718828, + "grad_norm": 1.1065018653500323, + "learning_rate": 4.296204233613888e-06, + "loss": 0.3421, + "step": 40650 + }, + { + "epoch": 0.7024294995852917, + "grad_norm": 0.5990570045894817, + "learning_rate": 4.29574455692007e-06, + "loss": 0.6357, + "step": 40651 + }, + { + "epoch": 0.7024467790987006, + "grad_norm": 1.2949570848854033, + "learning_rate": 4.2952848980924175e-06, + "loss": 0.2317, + "step": 40652 + }, + { + "epoch": 0.7024640586121095, + "grad_norm": 1.5863680845206072, + "learning_rate": 4.294825257132364e-06, + "loss": 0.3593, + "step": 40653 + }, + { + "epoch": 0.7024813381255184, + "grad_norm": 1.2243855955209344, + "learning_rate": 4.294365634041353e-06, + "loss": 0.5596, + "step": 40654 + }, + { + "epoch": 0.7024986176389273, + "grad_norm": 0.5982507533063965, + "learning_rate": 4.293906028820823e-06, + "loss": 0.7772, + "step": 40655 + }, + { + "epoch": 0.7025158971523362, + "grad_norm": 1.5760829694706806, + "learning_rate": 4.293446441472207e-06, + "loss": 0.4211, + "step": 40656 + }, + { + "epoch": 0.7025331766657451, + "grad_norm": 1.2079159716188128, + "learning_rate": 4.292986871996956e-06, + "loss": 0.4271, + "step": 40657 + }, + { + "epoch": 0.702550456179154, + "grad_norm": 0.9435878139140005, + "learning_rate": 4.292527320396501e-06, + "loss": 0.3742, + "step": 40658 + }, + { + "epoch": 0.7025677356925629, + "grad_norm": 1.253562695851898, + "learning_rate": 4.292067786672288e-06, + "loss": 0.399, + "step": 40659 + }, + { + "epoch": 0.7025850152059718, + "grad_norm": 1.5422462819600122, + "learning_rate": 4.291608270825749e-06, + "loss": 0.401, + "step": 40660 + }, + { + "epoch": 0.7026022947193807, + "grad_norm": 0.8633797619155968, + "learning_rate": 4.291148772858329e-06, + "loss": 0.5569, + "step": 40661 + }, + { + "epoch": 0.7026195742327896, + "grad_norm": 1.824397020615575, + "learning_rate": 4.290689292771462e-06, + "loss": 0.3374, + "step": 40662 + }, + { + "epoch": 0.7026368537461986, + "grad_norm": 1.5156149121895033, + "learning_rate": 4.290229830566591e-06, + "loss": 0.3191, + "step": 40663 + }, + { + "epoch": 0.7026541332596075, + "grad_norm": 1.8088566224933353, + "learning_rate": 4.289770386245157e-06, + "loss": 0.2489, + "step": 40664 + }, + { + "epoch": 0.7026714127730163, + "grad_norm": 1.214241489195178, + "learning_rate": 4.289310959808592e-06, + "loss": 0.4623, + "step": 40665 + }, + { + "epoch": 0.7026886922864252, + "grad_norm": 1.2277296418392603, + "learning_rate": 4.288851551258344e-06, + "loss": 0.3316, + "step": 40666 + }, + { + "epoch": 0.7027059717998341, + "grad_norm": 0.811375562379927, + "learning_rate": 4.288392160595843e-06, + "loss": 0.3289, + "step": 40667 + }, + { + "epoch": 0.702723251313243, + "grad_norm": 1.5786147338649448, + "learning_rate": 4.2879327878225305e-06, + "loss": 0.4546, + "step": 40668 + }, + { + "epoch": 0.7027405308266519, + "grad_norm": 0.9175087759164672, + "learning_rate": 4.287473432939847e-06, + "loss": 0.2366, + "step": 40669 + }, + { + "epoch": 0.7027578103400608, + "grad_norm": 1.1685277040759527, + "learning_rate": 4.287014095949236e-06, + "loss": 0.4625, + "step": 40670 + }, + { + "epoch": 0.7027750898534697, + "grad_norm": 0.8344072102094466, + "learning_rate": 4.286554776852125e-06, + "loss": 0.1827, + "step": 40671 + }, + { + "epoch": 0.7027923693668786, + "grad_norm": 1.6810563813552863, + "learning_rate": 4.286095475649964e-06, + "loss": 0.3497, + "step": 40672 + }, + { + "epoch": 0.7028096488802875, + "grad_norm": 1.090394613633193, + "learning_rate": 4.285636192344183e-06, + "loss": 0.2605, + "step": 40673 + }, + { + "epoch": 0.7028269283936964, + "grad_norm": 1.1313233469001787, + "learning_rate": 4.285176926936222e-06, + "loss": 0.3022, + "step": 40674 + }, + { + "epoch": 0.7028442079071053, + "grad_norm": 1.2128063200349857, + "learning_rate": 4.284717679427526e-06, + "loss": 0.366, + "step": 40675 + }, + { + "epoch": 0.7028614874205142, + "grad_norm": 1.6760277157449655, + "learning_rate": 4.284258449819525e-06, + "loss": 0.4911, + "step": 40676 + }, + { + "epoch": 0.7028787669339231, + "grad_norm": 1.0808280597842026, + "learning_rate": 4.283799238113664e-06, + "loss": 0.334, + "step": 40677 + }, + { + "epoch": 0.7028960464473321, + "grad_norm": 0.7469568393739365, + "learning_rate": 4.2833400443113745e-06, + "loss": 0.2496, + "step": 40678 + }, + { + "epoch": 0.702913325960741, + "grad_norm": 1.0386053803933537, + "learning_rate": 4.282880868414099e-06, + "loss": 0.3037, + "step": 40679 + }, + { + "epoch": 0.7029306054741499, + "grad_norm": 0.8698685248437144, + "learning_rate": 4.282421710423274e-06, + "loss": 0.3567, + "step": 40680 + }, + { + "epoch": 0.7029478849875588, + "grad_norm": 0.8792149041680015, + "learning_rate": 4.281962570340343e-06, + "loss": 0.4635, + "step": 40681 + }, + { + "epoch": 0.7029651645009677, + "grad_norm": 1.4232517450036786, + "learning_rate": 4.281503448166737e-06, + "loss": 0.5053, + "step": 40682 + }, + { + "epoch": 0.7029824440143766, + "grad_norm": 1.3226247250082421, + "learning_rate": 4.2810443439039e-06, + "loss": 0.2537, + "step": 40683 + }, + { + "epoch": 0.7029997235277855, + "grad_norm": 1.2163152557084953, + "learning_rate": 4.280585257553263e-06, + "loss": 0.487, + "step": 40684 + }, + { + "epoch": 0.7030170030411944, + "grad_norm": 1.1702074138342737, + "learning_rate": 4.280126189116267e-06, + "loss": 0.3004, + "step": 40685 + }, + { + "epoch": 0.7030342825546032, + "grad_norm": 1.0923383787216254, + "learning_rate": 4.2796671385943544e-06, + "loss": 0.4079, + "step": 40686 + }, + { + "epoch": 0.7030515620680121, + "grad_norm": 1.3576092783582638, + "learning_rate": 4.2792081059889556e-06, + "loss": 0.5141, + "step": 40687 + }, + { + "epoch": 0.703068841581421, + "grad_norm": 2.507345155986739, + "learning_rate": 4.278749091301516e-06, + "loss": 0.6882, + "step": 40688 + }, + { + "epoch": 0.7030861210948299, + "grad_norm": 2.813473529483363, + "learning_rate": 4.278290094533465e-06, + "loss": 0.5296, + "step": 40689 + }, + { + "epoch": 0.7031034006082388, + "grad_norm": 0.888225434794492, + "learning_rate": 4.277831115686244e-06, + "loss": 0.5173, + "step": 40690 + }, + { + "epoch": 0.7031206801216477, + "grad_norm": 1.4318996077737038, + "learning_rate": 4.27737215476129e-06, + "loss": 0.5473, + "step": 40691 + }, + { + "epoch": 0.7031379596350567, + "grad_norm": 1.636514579578435, + "learning_rate": 4.276913211760046e-06, + "loss": 0.6546, + "step": 40692 + }, + { + "epoch": 0.7031552391484656, + "grad_norm": 0.674314059033678, + "learning_rate": 4.276454286683941e-06, + "loss": 0.4722, + "step": 40693 + }, + { + "epoch": 0.7031725186618745, + "grad_norm": 1.1590513741234418, + "learning_rate": 4.27599537953442e-06, + "loss": 0.3139, + "step": 40694 + }, + { + "epoch": 0.7031897981752834, + "grad_norm": 1.0714584949973656, + "learning_rate": 4.275536490312916e-06, + "loss": 0.4451, + "step": 40695 + }, + { + "epoch": 0.7032070776886923, + "grad_norm": 0.9780313648171626, + "learning_rate": 4.275077619020864e-06, + "loss": 0.4047, + "step": 40696 + }, + { + "epoch": 0.7032243572021012, + "grad_norm": 1.403402038937739, + "learning_rate": 4.274618765659703e-06, + "loss": 0.3115, + "step": 40697 + }, + { + "epoch": 0.7032416367155101, + "grad_norm": 1.2588107353050895, + "learning_rate": 4.274159930230871e-06, + "loss": 0.224, + "step": 40698 + }, + { + "epoch": 0.703258916228919, + "grad_norm": 1.1588426484811924, + "learning_rate": 4.27370111273581e-06, + "loss": 0.4022, + "step": 40699 + }, + { + "epoch": 0.7032761957423279, + "grad_norm": 1.85233284127199, + "learning_rate": 4.273242313175948e-06, + "loss": 0.2364, + "step": 40700 + }, + { + "epoch": 0.7032934752557368, + "grad_norm": 1.1948079124530557, + "learning_rate": 4.27278353155273e-06, + "loss": 0.4062, + "step": 40701 + }, + { + "epoch": 0.7033107547691457, + "grad_norm": 1.2561783359152108, + "learning_rate": 4.272324767867585e-06, + "loss": 0.5304, + "step": 40702 + }, + { + "epoch": 0.7033280342825546, + "grad_norm": 1.4053682820362654, + "learning_rate": 4.271866022121955e-06, + "loss": 0.3303, + "step": 40703 + }, + { + "epoch": 0.7033453137959635, + "grad_norm": 1.2644934399902168, + "learning_rate": 4.2714072943172755e-06, + "loss": 0.589, + "step": 40704 + }, + { + "epoch": 0.7033625933093725, + "grad_norm": 1.3610382924522582, + "learning_rate": 4.270948584454988e-06, + "loss": 0.3206, + "step": 40705 + }, + { + "epoch": 0.7033798728227814, + "grad_norm": 1.1776634850838323, + "learning_rate": 4.270489892536524e-06, + "loss": 0.1606, + "step": 40706 + }, + { + "epoch": 0.7033971523361903, + "grad_norm": 0.9436274185372308, + "learning_rate": 4.270031218563318e-06, + "loss": 0.316, + "step": 40707 + }, + { + "epoch": 0.7034144318495991, + "grad_norm": 0.6455559489828742, + "learning_rate": 4.269572562536809e-06, + "loss": 0.2489, + "step": 40708 + }, + { + "epoch": 0.703431711363008, + "grad_norm": 1.1830125357923535, + "learning_rate": 4.269113924458434e-06, + "loss": 0.3821, + "step": 40709 + }, + { + "epoch": 0.7034489908764169, + "grad_norm": 1.1162008634582197, + "learning_rate": 4.2686553043296355e-06, + "loss": 0.268, + "step": 40710 + }, + { + "epoch": 0.7034662703898258, + "grad_norm": 0.8443061734802406, + "learning_rate": 4.268196702151839e-06, + "loss": 0.3724, + "step": 40711 + }, + { + "epoch": 0.7034835499032347, + "grad_norm": 1.1452827640442889, + "learning_rate": 4.26773811792649e-06, + "loss": 0.5242, + "step": 40712 + }, + { + "epoch": 0.7035008294166436, + "grad_norm": 2.065197436565157, + "learning_rate": 4.2672795516550155e-06, + "loss": 0.3384, + "step": 40713 + }, + { + "epoch": 0.7035181089300525, + "grad_norm": 1.4299617642067022, + "learning_rate": 4.266821003338859e-06, + "loss": 0.3276, + "step": 40714 + }, + { + "epoch": 0.7035353884434614, + "grad_norm": 0.9011335189859432, + "learning_rate": 4.266362472979455e-06, + "loss": 0.3205, + "step": 40715 + }, + { + "epoch": 0.7035526679568703, + "grad_norm": 1.6454858804651085, + "learning_rate": 4.265903960578242e-06, + "loss": 0.381, + "step": 40716 + }, + { + "epoch": 0.7035699474702792, + "grad_norm": 1.0147154104298854, + "learning_rate": 4.265445466136654e-06, + "loss": 0.4258, + "step": 40717 + }, + { + "epoch": 0.7035872269836881, + "grad_norm": 0.9366727273623194, + "learning_rate": 4.2649869896561235e-06, + "loss": 0.3495, + "step": 40718 + }, + { + "epoch": 0.703604506497097, + "grad_norm": 0.5340981871986287, + "learning_rate": 4.264528531138089e-06, + "loss": 0.7229, + "step": 40719 + }, + { + "epoch": 0.703621786010506, + "grad_norm": 1.2696232485347776, + "learning_rate": 4.264070090583987e-06, + "loss": 0.358, + "step": 40720 + }, + { + "epoch": 0.7036390655239149, + "grad_norm": 0.9133786294827491, + "learning_rate": 4.263611667995258e-06, + "loss": 0.4055, + "step": 40721 + }, + { + "epoch": 0.7036563450373238, + "grad_norm": 1.157316018008374, + "learning_rate": 4.263153263373329e-06, + "loss": 0.3873, + "step": 40722 + }, + { + "epoch": 0.7036736245507327, + "grad_norm": 1.287771905863157, + "learning_rate": 4.262694876719643e-06, + "loss": 0.3485, + "step": 40723 + }, + { + "epoch": 0.7036909040641416, + "grad_norm": 0.9092236485919079, + "learning_rate": 4.262236508035629e-06, + "loss": 0.1849, + "step": 40724 + }, + { + "epoch": 0.7037081835775505, + "grad_norm": 1.1953972151308807, + "learning_rate": 4.261778157322727e-06, + "loss": 0.45, + "step": 40725 + }, + { + "epoch": 0.7037254630909594, + "grad_norm": 0.8784535114120898, + "learning_rate": 4.261319824582375e-06, + "loss": 0.3613, + "step": 40726 + }, + { + "epoch": 0.7037427426043683, + "grad_norm": 0.761706337328863, + "learning_rate": 4.260861509816002e-06, + "loss": 0.4765, + "step": 40727 + }, + { + "epoch": 0.7037600221177772, + "grad_norm": 1.632965585800896, + "learning_rate": 4.260403213025051e-06, + "loss": 0.4627, + "step": 40728 + }, + { + "epoch": 0.703777301631186, + "grad_norm": 1.457473576231626, + "learning_rate": 4.259944934210949e-06, + "loss": 0.3686, + "step": 40729 + }, + { + "epoch": 0.7037945811445949, + "grad_norm": 1.035993585149274, + "learning_rate": 4.259486673375136e-06, + "loss": 0.3462, + "step": 40730 + }, + { + "epoch": 0.7038118606580038, + "grad_norm": 2.8232824035370068, + "learning_rate": 4.259028430519047e-06, + "loss": 0.4296, + "step": 40731 + }, + { + "epoch": 0.7038291401714127, + "grad_norm": 0.9060479358133741, + "learning_rate": 4.2585702056441215e-06, + "loss": 0.3275, + "step": 40732 + }, + { + "epoch": 0.7038464196848216, + "grad_norm": 0.9485617225678102, + "learning_rate": 4.258111998751786e-06, + "loss": 0.3061, + "step": 40733 + }, + { + "epoch": 0.7038636991982306, + "grad_norm": 1.1116638788037014, + "learning_rate": 4.257653809843484e-06, + "loss": 0.293, + "step": 40734 + }, + { + "epoch": 0.7038809787116395, + "grad_norm": 1.298063269936349, + "learning_rate": 4.257195638920643e-06, + "loss": 0.3888, + "step": 40735 + }, + { + "epoch": 0.7038982582250484, + "grad_norm": 1.9104459333608441, + "learning_rate": 4.256737485984704e-06, + "loss": 0.4391, + "step": 40736 + }, + { + "epoch": 0.7039155377384573, + "grad_norm": 1.3149193795468836, + "learning_rate": 4.256279351037098e-06, + "loss": 0.3975, + "step": 40737 + }, + { + "epoch": 0.7039328172518662, + "grad_norm": 1.2616705625783113, + "learning_rate": 4.255821234079262e-06, + "loss": 0.5104, + "step": 40738 + }, + { + "epoch": 0.7039500967652751, + "grad_norm": 1.804927101590313, + "learning_rate": 4.255363135112632e-06, + "loss": 0.4158, + "step": 40739 + }, + { + "epoch": 0.703967376278684, + "grad_norm": 1.1675574607046781, + "learning_rate": 4.2549050541386386e-06, + "loss": 0.2893, + "step": 40740 + }, + { + "epoch": 0.7039846557920929, + "grad_norm": 1.1963661392913327, + "learning_rate": 4.254446991158723e-06, + "loss": 0.3535, + "step": 40741 + }, + { + "epoch": 0.7040019353055018, + "grad_norm": 0.9861526610909116, + "learning_rate": 4.253988946174312e-06, + "loss": 0.3807, + "step": 40742 + }, + { + "epoch": 0.7040192148189107, + "grad_norm": 1.100361581193071, + "learning_rate": 4.2535309191868445e-06, + "loss": 0.2038, + "step": 40743 + }, + { + "epoch": 0.7040364943323196, + "grad_norm": 1.6172455531463774, + "learning_rate": 4.253072910197755e-06, + "loss": 0.272, + "step": 40744 + }, + { + "epoch": 0.7040537738457285, + "grad_norm": 0.9848898210186181, + "learning_rate": 4.252614919208481e-06, + "loss": 0.1688, + "step": 40745 + }, + { + "epoch": 0.7040710533591374, + "grad_norm": 1.5429057755420117, + "learning_rate": 4.252156946220451e-06, + "loss": 0.4046, + "step": 40746 + }, + { + "epoch": 0.7040883328725464, + "grad_norm": 1.2165250978824063, + "learning_rate": 4.2516989912351056e-06, + "loss": 0.364, + "step": 40747 + }, + { + "epoch": 0.7041056123859553, + "grad_norm": 0.7717696413879231, + "learning_rate": 4.251241054253872e-06, + "loss": 0.6694, + "step": 40748 + }, + { + "epoch": 0.7041228918993642, + "grad_norm": 1.065356780960271, + "learning_rate": 4.250783135278188e-06, + "loss": 0.3883, + "step": 40749 + }, + { + "epoch": 0.704140171412773, + "grad_norm": 0.6426963197176052, + "learning_rate": 4.250325234309493e-06, + "loss": 0.8642, + "step": 40750 + }, + { + "epoch": 0.7041574509261819, + "grad_norm": 1.3080293413575086, + "learning_rate": 4.249867351349212e-06, + "loss": 0.3164, + "step": 40751 + }, + { + "epoch": 0.7041747304395908, + "grad_norm": 0.8145212686525993, + "learning_rate": 4.249409486398787e-06, + "loss": 0.6928, + "step": 40752 + }, + { + "epoch": 0.7041920099529997, + "grad_norm": 1.258331478390726, + "learning_rate": 4.248951639459644e-06, + "loss": 0.4676, + "step": 40753 + }, + { + "epoch": 0.7042092894664086, + "grad_norm": 1.4564651961865411, + "learning_rate": 4.248493810533223e-06, + "loss": 0.3874, + "step": 40754 + }, + { + "epoch": 0.7042265689798175, + "grad_norm": 1.7309454592121294, + "learning_rate": 4.248035999620956e-06, + "loss": 0.332, + "step": 40755 + }, + { + "epoch": 0.7042438484932264, + "grad_norm": 1.188602105936985, + "learning_rate": 4.247578206724281e-06, + "loss": 0.3461, + "step": 40756 + }, + { + "epoch": 0.7042611280066353, + "grad_norm": 1.0795859789157238, + "learning_rate": 4.247120431844628e-06, + "loss": 0.3769, + "step": 40757 + }, + { + "epoch": 0.7042784075200442, + "grad_norm": 1.4589773837524262, + "learning_rate": 4.2466626749834275e-06, + "loss": 0.4789, + "step": 40758 + }, + { + "epoch": 0.7042956870334531, + "grad_norm": 1.4048796389969063, + "learning_rate": 4.246204936142116e-06, + "loss": 0.4068, + "step": 40759 + }, + { + "epoch": 0.704312966546862, + "grad_norm": 1.094235872761478, + "learning_rate": 4.245747215322129e-06, + "loss": 0.5197, + "step": 40760 + }, + { + "epoch": 0.704330246060271, + "grad_norm": 1.5319121170298944, + "learning_rate": 4.245289512524903e-06, + "loss": 0.4195, + "step": 40761 + }, + { + "epoch": 0.7043475255736799, + "grad_norm": 1.3363611839440528, + "learning_rate": 4.244831827751863e-06, + "loss": 0.3679, + "step": 40762 + }, + { + "epoch": 0.7043648050870888, + "grad_norm": 0.9581660126838871, + "learning_rate": 4.2443741610044505e-06, + "loss": 0.3265, + "step": 40763 + }, + { + "epoch": 0.7043820846004977, + "grad_norm": 1.4177462434668295, + "learning_rate": 4.243916512284093e-06, + "loss": 0.3431, + "step": 40764 + }, + { + "epoch": 0.7043993641139066, + "grad_norm": 1.116248864502967, + "learning_rate": 4.243458881592226e-06, + "loss": 0.4778, + "step": 40765 + }, + { + "epoch": 0.7044166436273155, + "grad_norm": 1.060790664548603, + "learning_rate": 4.2430012689302825e-06, + "loss": 0.3239, + "step": 40766 + }, + { + "epoch": 0.7044339231407244, + "grad_norm": 1.2593525753930845, + "learning_rate": 4.242543674299701e-06, + "loss": 0.2779, + "step": 40767 + }, + { + "epoch": 0.7044512026541333, + "grad_norm": 1.0272916346918346, + "learning_rate": 4.242086097701909e-06, + "loss": 0.2605, + "step": 40768 + }, + { + "epoch": 0.7044684821675422, + "grad_norm": 1.23286523009561, + "learning_rate": 4.24162853913834e-06, + "loss": 0.469, + "step": 40769 + }, + { + "epoch": 0.7044857616809511, + "grad_norm": 0.8562148749050281, + "learning_rate": 4.2411709986104255e-06, + "loss": 0.349, + "step": 40770 + }, + { + "epoch": 0.7045030411943599, + "grad_norm": 1.5551541121071868, + "learning_rate": 4.240713476119602e-06, + "loss": 0.4626, + "step": 40771 + }, + { + "epoch": 0.7045203207077688, + "grad_norm": 1.1436863499958916, + "learning_rate": 4.240255971667306e-06, + "loss": 0.3897, + "step": 40772 + }, + { + "epoch": 0.7045376002211777, + "grad_norm": 1.0152504120615566, + "learning_rate": 4.239798485254961e-06, + "loss": 0.5781, + "step": 40773 + }, + { + "epoch": 0.7045548797345866, + "grad_norm": 1.0536565295564135, + "learning_rate": 4.239341016884008e-06, + "loss": 0.4878, + "step": 40774 + }, + { + "epoch": 0.7045721592479955, + "grad_norm": 1.2742971914628107, + "learning_rate": 4.238883566555876e-06, + "loss": 0.4303, + "step": 40775 + }, + { + "epoch": 0.7045894387614045, + "grad_norm": 1.1265998716166519, + "learning_rate": 4.238426134272e-06, + "loss": 0.3372, + "step": 40776 + }, + { + "epoch": 0.7046067182748134, + "grad_norm": 1.410582227985786, + "learning_rate": 4.237968720033804e-06, + "loss": 0.4592, + "step": 40777 + }, + { + "epoch": 0.7046239977882223, + "grad_norm": 1.3248015503998023, + "learning_rate": 4.237511323842735e-06, + "loss": 0.6057, + "step": 40778 + }, + { + "epoch": 0.7046412773016312, + "grad_norm": 1.3178019173312367, + "learning_rate": 4.237053945700219e-06, + "loss": 0.4097, + "step": 40779 + }, + { + "epoch": 0.7046585568150401, + "grad_norm": 0.9147800631379, + "learning_rate": 4.236596585607684e-06, + "loss": 0.2762, + "step": 40780 + }, + { + "epoch": 0.704675836328449, + "grad_norm": 0.9696904547494334, + "learning_rate": 4.23613924356657e-06, + "loss": 0.5264, + "step": 40781 + }, + { + "epoch": 0.7046931158418579, + "grad_norm": 1.6056275104586561, + "learning_rate": 4.235681919578304e-06, + "loss": 0.3007, + "step": 40782 + }, + { + "epoch": 0.7047103953552668, + "grad_norm": 0.9635245503418983, + "learning_rate": 4.235224613644318e-06, + "loss": 0.3048, + "step": 40783 + }, + { + "epoch": 0.7047276748686757, + "grad_norm": 1.4345678947536054, + "learning_rate": 4.234767325766049e-06, + "loss": 0.3161, + "step": 40784 + }, + { + "epoch": 0.7047449543820846, + "grad_norm": 1.1442416626043552, + "learning_rate": 4.234310055944928e-06, + "loss": 0.4744, + "step": 40785 + }, + { + "epoch": 0.7047622338954935, + "grad_norm": 0.998008416273309, + "learning_rate": 4.233852804182384e-06, + "loss": 0.3775, + "step": 40786 + }, + { + "epoch": 0.7047795134089024, + "grad_norm": 1.4321136616806993, + "learning_rate": 4.233395570479855e-06, + "loss": 0.2234, + "step": 40787 + }, + { + "epoch": 0.7047967929223113, + "grad_norm": 1.7309917086345201, + "learning_rate": 4.232938354838765e-06, + "loss": 0.3413, + "step": 40788 + }, + { + "epoch": 0.7048140724357203, + "grad_norm": 0.9799306179322685, + "learning_rate": 4.23248115726055e-06, + "loss": 0.3555, + "step": 40789 + }, + { + "epoch": 0.7048313519491292, + "grad_norm": 1.1539256289939466, + "learning_rate": 4.232023977746646e-06, + "loss": 0.271, + "step": 40790 + }, + { + "epoch": 0.7048486314625381, + "grad_norm": 2.0082432925357923, + "learning_rate": 4.231566816298478e-06, + "loss": 0.4291, + "step": 40791 + }, + { + "epoch": 0.7048659109759469, + "grad_norm": 1.2344806912878281, + "learning_rate": 4.231109672917485e-06, + "loss": 0.3887, + "step": 40792 + }, + { + "epoch": 0.7048831904893558, + "grad_norm": 0.9402599015493844, + "learning_rate": 4.230652547605091e-06, + "loss": 0.3331, + "step": 40793 + }, + { + "epoch": 0.7049004700027647, + "grad_norm": 1.211935320616536, + "learning_rate": 4.230195440362732e-06, + "loss": 0.3007, + "step": 40794 + }, + { + "epoch": 0.7049177495161736, + "grad_norm": 1.4385097583949384, + "learning_rate": 4.229738351191839e-06, + "loss": 0.3476, + "step": 40795 + }, + { + "epoch": 0.7049350290295825, + "grad_norm": 1.6818268367002474, + "learning_rate": 4.229281280093848e-06, + "loss": 0.5553, + "step": 40796 + }, + { + "epoch": 0.7049523085429914, + "grad_norm": 1.0063486104539816, + "learning_rate": 4.2288242270701826e-06, + "loss": 0.2066, + "step": 40797 + }, + { + "epoch": 0.7049695880564003, + "grad_norm": 1.767212488466111, + "learning_rate": 4.228367192122282e-06, + "loss": 0.3145, + "step": 40798 + }, + { + "epoch": 0.7049868675698092, + "grad_norm": 1.459643647516201, + "learning_rate": 4.2279101752515704e-06, + "loss": 0.3408, + "step": 40799 + }, + { + "epoch": 0.7050041470832181, + "grad_norm": 0.9399909573272178, + "learning_rate": 4.2274531764594825e-06, + "loss": 0.406, + "step": 40800 + }, + { + "epoch": 0.705021426596627, + "grad_norm": 1.8383096642199075, + "learning_rate": 4.2269961957474534e-06, + "loss": 0.5422, + "step": 40801 + }, + { + "epoch": 0.7050387061100359, + "grad_norm": 1.193247862573656, + "learning_rate": 4.226539233116909e-06, + "loss": 0.7338, + "step": 40802 + }, + { + "epoch": 0.7050559856234448, + "grad_norm": 1.1246165492066393, + "learning_rate": 4.226082288569284e-06, + "loss": 0.3107, + "step": 40803 + }, + { + "epoch": 0.7050732651368538, + "grad_norm": 1.4556011130315947, + "learning_rate": 4.225625362106006e-06, + "loss": 0.3417, + "step": 40804 + }, + { + "epoch": 0.7050905446502627, + "grad_norm": 2.5532318863815, + "learning_rate": 4.225168453728507e-06, + "loss": 0.4784, + "step": 40805 + }, + { + "epoch": 0.7051078241636716, + "grad_norm": 1.4232321370132348, + "learning_rate": 4.224711563438221e-06, + "loss": 0.3372, + "step": 40806 + }, + { + "epoch": 0.7051251036770805, + "grad_norm": 1.3608499362002306, + "learning_rate": 4.224254691236579e-06, + "loss": 0.3052, + "step": 40807 + }, + { + "epoch": 0.7051423831904894, + "grad_norm": 1.0397293956618392, + "learning_rate": 4.2237978371250075e-06, + "loss": 0.4173, + "step": 40808 + }, + { + "epoch": 0.7051596627038983, + "grad_norm": 1.4162579051971944, + "learning_rate": 4.223341001104944e-06, + "loss": 0.3062, + "step": 40809 + }, + { + "epoch": 0.7051769422173072, + "grad_norm": 1.418561181785367, + "learning_rate": 4.2228841831778114e-06, + "loss": 0.2906, + "step": 40810 + }, + { + "epoch": 0.7051942217307161, + "grad_norm": 1.3954880846462483, + "learning_rate": 4.222427383345044e-06, + "loss": 0.4887, + "step": 40811 + }, + { + "epoch": 0.705211501244125, + "grad_norm": 1.763607889318327, + "learning_rate": 4.221970601608077e-06, + "loss": 0.3857, + "step": 40812 + }, + { + "epoch": 0.7052287807575338, + "grad_norm": 1.141793076400332, + "learning_rate": 4.221513837968335e-06, + "loss": 0.2809, + "step": 40813 + }, + { + "epoch": 0.7052460602709427, + "grad_norm": 0.866536629769713, + "learning_rate": 4.2210570924272535e-06, + "loss": 0.2882, + "step": 40814 + }, + { + "epoch": 0.7052633397843516, + "grad_norm": 0.9405028714492244, + "learning_rate": 4.220600364986257e-06, + "loss": 0.4929, + "step": 40815 + }, + { + "epoch": 0.7052806192977605, + "grad_norm": 1.1973286044781781, + "learning_rate": 4.220143655646783e-06, + "loss": 0.3661, + "step": 40816 + }, + { + "epoch": 0.7052978988111694, + "grad_norm": 0.88966564691524, + "learning_rate": 4.21968696441025e-06, + "loss": 0.2367, + "step": 40817 + }, + { + "epoch": 0.7053151783245784, + "grad_norm": 1.3666827290211858, + "learning_rate": 4.219230291278105e-06, + "loss": 0.4357, + "step": 40818 + }, + { + "epoch": 0.7053324578379873, + "grad_norm": 1.4111455304698481, + "learning_rate": 4.218773636251769e-06, + "loss": 0.4603, + "step": 40819 + }, + { + "epoch": 0.7053497373513962, + "grad_norm": 1.4042772089349524, + "learning_rate": 4.218316999332671e-06, + "loss": 0.4091, + "step": 40820 + }, + { + "epoch": 0.7053670168648051, + "grad_norm": 1.2821769508406895, + "learning_rate": 4.217860380522246e-06, + "loss": 0.2822, + "step": 40821 + }, + { + "epoch": 0.705384296378214, + "grad_norm": 0.8724006769629907, + "learning_rate": 4.217403779821919e-06, + "loss": 0.3467, + "step": 40822 + }, + { + "epoch": 0.7054015758916229, + "grad_norm": 0.9023376019902564, + "learning_rate": 4.216947197233123e-06, + "loss": 0.4668, + "step": 40823 + }, + { + "epoch": 0.7054188554050318, + "grad_norm": 0.9939533485112991, + "learning_rate": 4.216490632757287e-06, + "loss": 0.3251, + "step": 40824 + }, + { + "epoch": 0.7054361349184407, + "grad_norm": 1.7244180700757288, + "learning_rate": 4.216034086395846e-06, + "loss": 0.3942, + "step": 40825 + }, + { + "epoch": 0.7054534144318496, + "grad_norm": 1.3242862369743007, + "learning_rate": 4.215577558150222e-06, + "loss": 0.3697, + "step": 40826 + }, + { + "epoch": 0.7054706939452585, + "grad_norm": 1.242041752319927, + "learning_rate": 4.215121048021853e-06, + "loss": 0.4117, + "step": 40827 + }, + { + "epoch": 0.7054879734586674, + "grad_norm": 0.6879123627233524, + "learning_rate": 4.21466455601216e-06, + "loss": 0.8604, + "step": 40828 + }, + { + "epoch": 0.7055052529720763, + "grad_norm": 1.3498277455322671, + "learning_rate": 4.214208082122578e-06, + "loss": 0.2864, + "step": 40829 + }, + { + "epoch": 0.7055225324854852, + "grad_norm": 1.5514285287852052, + "learning_rate": 4.21375162635454e-06, + "loss": 0.4237, + "step": 40830 + }, + { + "epoch": 0.7055398119988942, + "grad_norm": 1.0427447235016252, + "learning_rate": 4.213295188709466e-06, + "loss": 0.3702, + "step": 40831 + }, + { + "epoch": 0.7055570915123031, + "grad_norm": 1.0779477037015386, + "learning_rate": 4.212838769188796e-06, + "loss": 0.2634, + "step": 40832 + }, + { + "epoch": 0.705574371025712, + "grad_norm": 2.3548168932571074, + "learning_rate": 4.2123823677939526e-06, + "loss": 0.4058, + "step": 40833 + }, + { + "epoch": 0.7055916505391209, + "grad_norm": 1.0703991739245975, + "learning_rate": 4.211925984526365e-06, + "loss": 0.1865, + "step": 40834 + }, + { + "epoch": 0.7056089300525297, + "grad_norm": 1.6708328581013043, + "learning_rate": 4.211469619387467e-06, + "loss": 0.3681, + "step": 40835 + }, + { + "epoch": 0.7056262095659386, + "grad_norm": 0.955176921144456, + "learning_rate": 4.211013272378689e-06, + "loss": 0.6799, + "step": 40836 + }, + { + "epoch": 0.7056434890793475, + "grad_norm": 2.3360616738023716, + "learning_rate": 4.210556943501454e-06, + "loss": 0.4057, + "step": 40837 + }, + { + "epoch": 0.7056607685927564, + "grad_norm": 1.0539790914859282, + "learning_rate": 4.210100632757197e-06, + "loss": 0.3137, + "step": 40838 + }, + { + "epoch": 0.7056780481061653, + "grad_norm": 1.460148254061676, + "learning_rate": 4.209644340147342e-06, + "loss": 0.219, + "step": 40839 + }, + { + "epoch": 0.7056953276195742, + "grad_norm": 1.2378247386607406, + "learning_rate": 4.209188065673322e-06, + "loss": 0.3598, + "step": 40840 + }, + { + "epoch": 0.7057126071329831, + "grad_norm": 1.1706256647541304, + "learning_rate": 4.208731809336567e-06, + "loss": 0.4341, + "step": 40841 + }, + { + "epoch": 0.705729886646392, + "grad_norm": 1.4870865104003452, + "learning_rate": 4.208275571138501e-06, + "loss": 0.2769, + "step": 40842 + }, + { + "epoch": 0.7057471661598009, + "grad_norm": 1.8789712699878014, + "learning_rate": 4.207819351080561e-06, + "loss": 0.5054, + "step": 40843 + }, + { + "epoch": 0.7057644456732098, + "grad_norm": 1.5724884687047913, + "learning_rate": 4.207363149164165e-06, + "loss": 0.5376, + "step": 40844 + }, + { + "epoch": 0.7057817251866187, + "grad_norm": 1.5495877216152305, + "learning_rate": 4.20690696539075e-06, + "loss": 0.2698, + "step": 40845 + }, + { + "epoch": 0.7057990047000277, + "grad_norm": 1.2353625206356142, + "learning_rate": 4.206450799761742e-06, + "loss": 0.412, + "step": 40846 + }, + { + "epoch": 0.7058162842134366, + "grad_norm": 1.2025340638438056, + "learning_rate": 4.205994652278574e-06, + "loss": 0.309, + "step": 40847 + }, + { + "epoch": 0.7058335637268455, + "grad_norm": 1.1132811216310727, + "learning_rate": 4.205538522942667e-06, + "loss": 0.324, + "step": 40848 + }, + { + "epoch": 0.7058508432402544, + "grad_norm": 1.5569087695282453, + "learning_rate": 4.205082411755458e-06, + "loss": 0.3363, + "step": 40849 + }, + { + "epoch": 0.7058681227536633, + "grad_norm": 1.14344143555745, + "learning_rate": 4.2046263187183664e-06, + "loss": 0.782, + "step": 40850 + }, + { + "epoch": 0.7058854022670722, + "grad_norm": 2.3537672287685623, + "learning_rate": 4.204170243832827e-06, + "loss": 0.3465, + "step": 40851 + }, + { + "epoch": 0.7059026817804811, + "grad_norm": 1.3284893752335143, + "learning_rate": 4.203714187100269e-06, + "loss": 0.4188, + "step": 40852 + }, + { + "epoch": 0.70591996129389, + "grad_norm": 1.199012471096909, + "learning_rate": 4.2032581485221165e-06, + "loss": 0.3078, + "step": 40853 + }, + { + "epoch": 0.7059372408072989, + "grad_norm": 1.8599711015504787, + "learning_rate": 4.202802128099803e-06, + "loss": 0.4643, + "step": 40854 + }, + { + "epoch": 0.7059545203207078, + "grad_norm": 1.2912387457800933, + "learning_rate": 4.202346125834751e-06, + "loss": 0.4169, + "step": 40855 + }, + { + "epoch": 0.7059717998341166, + "grad_norm": 1.2182307800402083, + "learning_rate": 4.201890141728394e-06, + "loss": 0.3052, + "step": 40856 + }, + { + "epoch": 0.7059890793475255, + "grad_norm": 1.5085428885120205, + "learning_rate": 4.201434175782151e-06, + "loss": 0.5197, + "step": 40857 + }, + { + "epoch": 0.7060063588609344, + "grad_norm": 1.0751482853847867, + "learning_rate": 4.200978227997464e-06, + "loss": 0.3317, + "step": 40858 + }, + { + "epoch": 0.7060236383743433, + "grad_norm": 1.9773729265552855, + "learning_rate": 4.200522298375751e-06, + "loss": 0.4664, + "step": 40859 + }, + { + "epoch": 0.7060409178877523, + "grad_norm": 1.0709999958809842, + "learning_rate": 4.200066386918445e-06, + "loss": 0.1859, + "step": 40860 + }, + { + "epoch": 0.7060581974011612, + "grad_norm": 1.0064268674830013, + "learning_rate": 4.199610493626973e-06, + "loss": 0.4003, + "step": 40861 + }, + { + "epoch": 0.7060754769145701, + "grad_norm": 1.3713370909443643, + "learning_rate": 4.199154618502758e-06, + "loss": 0.2073, + "step": 40862 + }, + { + "epoch": 0.706092756427979, + "grad_norm": 1.8100962822862146, + "learning_rate": 4.198698761547232e-06, + "loss": 0.3054, + "step": 40863 + }, + { + "epoch": 0.7061100359413879, + "grad_norm": 1.3983166636781519, + "learning_rate": 4.198242922761822e-06, + "loss": 0.4373, + "step": 40864 + }, + { + "epoch": 0.7061273154547968, + "grad_norm": 1.1048208299823947, + "learning_rate": 4.197787102147959e-06, + "loss": 0.4073, + "step": 40865 + }, + { + "epoch": 0.7061445949682057, + "grad_norm": 1.405728835478913, + "learning_rate": 4.197331299707065e-06, + "loss": 0.386, + "step": 40866 + }, + { + "epoch": 0.7061618744816146, + "grad_norm": 2.273655412812027, + "learning_rate": 4.196875515440573e-06, + "loss": 0.3593, + "step": 40867 + }, + { + "epoch": 0.7061791539950235, + "grad_norm": 1.240367764973449, + "learning_rate": 4.196419749349905e-06, + "loss": 0.5388, + "step": 40868 + }, + { + "epoch": 0.7061964335084324, + "grad_norm": 1.258434643410689, + "learning_rate": 4.195964001436491e-06, + "loss": 0.3034, + "step": 40869 + }, + { + "epoch": 0.7062137130218413, + "grad_norm": 1.7706462135620284, + "learning_rate": 4.1955082717017625e-06, + "loss": 0.1877, + "step": 40870 + }, + { + "epoch": 0.7062309925352502, + "grad_norm": 1.70128065342364, + "learning_rate": 4.19505256014714e-06, + "loss": 0.4376, + "step": 40871 + }, + { + "epoch": 0.7062482720486591, + "grad_norm": 1.4449000094794384, + "learning_rate": 4.1945968667740576e-06, + "loss": 0.5094, + "step": 40872 + }, + { + "epoch": 0.706265551562068, + "grad_norm": 1.363967894482367, + "learning_rate": 4.194141191583935e-06, + "loss": 0.3497, + "step": 40873 + }, + { + "epoch": 0.706282831075477, + "grad_norm": 1.16459178107496, + "learning_rate": 4.193685534578204e-06, + "loss": 0.3073, + "step": 40874 + }, + { + "epoch": 0.7063001105888859, + "grad_norm": 0.9465625855555779, + "learning_rate": 4.193229895758291e-06, + "loss": 0.3006, + "step": 40875 + }, + { + "epoch": 0.7063173901022948, + "grad_norm": 1.8283093461268445, + "learning_rate": 4.192774275125626e-06, + "loss": 0.2408, + "step": 40876 + }, + { + "epoch": 0.7063346696157036, + "grad_norm": 0.8922905632249606, + "learning_rate": 4.192318672681631e-06, + "loss": 0.2931, + "step": 40877 + }, + { + "epoch": 0.7063519491291125, + "grad_norm": 0.9363699671253125, + "learning_rate": 4.191863088427738e-06, + "loss": 0.4533, + "step": 40878 + }, + { + "epoch": 0.7063692286425214, + "grad_norm": 0.9667655469633751, + "learning_rate": 4.191407522365369e-06, + "loss": 0.2924, + "step": 40879 + }, + { + "epoch": 0.7063865081559303, + "grad_norm": 1.8604460030282568, + "learning_rate": 4.190951974495951e-06, + "loss": 0.2469, + "step": 40880 + }, + { + "epoch": 0.7064037876693392, + "grad_norm": 1.8360300929741857, + "learning_rate": 4.190496444820917e-06, + "loss": 0.375, + "step": 40881 + }, + { + "epoch": 0.7064210671827481, + "grad_norm": 1.3728427512150774, + "learning_rate": 4.190040933341687e-06, + "loss": 0.3226, + "step": 40882 + }, + { + "epoch": 0.706438346696157, + "grad_norm": 1.1764259261961472, + "learning_rate": 4.189585440059693e-06, + "loss": 0.4499, + "step": 40883 + }, + { + "epoch": 0.7064556262095659, + "grad_norm": 1.4938561081870407, + "learning_rate": 4.189129964976356e-06, + "loss": 0.3993, + "step": 40884 + }, + { + "epoch": 0.7064729057229748, + "grad_norm": 1.6163062250685192, + "learning_rate": 4.1886745080931055e-06, + "loss": 0.3306, + "step": 40885 + }, + { + "epoch": 0.7064901852363837, + "grad_norm": 1.284428190575545, + "learning_rate": 4.188219069411369e-06, + "loss": 0.3361, + "step": 40886 + }, + { + "epoch": 0.7065074647497926, + "grad_norm": 1.0992674554722566, + "learning_rate": 4.187763648932574e-06, + "loss": 0.2497, + "step": 40887 + }, + { + "epoch": 0.7065247442632016, + "grad_norm": 1.4723677541426885, + "learning_rate": 4.187308246658143e-06, + "loss": 0.1882, + "step": 40888 + }, + { + "epoch": 0.7065420237766105, + "grad_norm": 2.5357764146077315, + "learning_rate": 4.186852862589507e-06, + "loss": 0.3424, + "step": 40889 + }, + { + "epoch": 0.7065593032900194, + "grad_norm": 1.9984481646162855, + "learning_rate": 4.1863974967280855e-06, + "loss": 0.3561, + "step": 40890 + }, + { + "epoch": 0.7065765828034283, + "grad_norm": 1.2329248386907763, + "learning_rate": 4.185942149075309e-06, + "loss": 0.2456, + "step": 40891 + }, + { + "epoch": 0.7065938623168372, + "grad_norm": 1.0571363748641784, + "learning_rate": 4.185486819632607e-06, + "loss": 0.2994, + "step": 40892 + }, + { + "epoch": 0.7066111418302461, + "grad_norm": 1.4306485433305138, + "learning_rate": 4.185031508401399e-06, + "loss": 0.2733, + "step": 40893 + }, + { + "epoch": 0.706628421343655, + "grad_norm": 0.9653005527005548, + "learning_rate": 4.1845762153831184e-06, + "loss": 0.4702, + "step": 40894 + }, + { + "epoch": 0.7066457008570639, + "grad_norm": 1.5009382912628164, + "learning_rate": 4.184120940579183e-06, + "loss": 0.3005, + "step": 40895 + }, + { + "epoch": 0.7066629803704728, + "grad_norm": 1.1927232526135423, + "learning_rate": 4.183665683991026e-06, + "loss": 0.2591, + "step": 40896 + }, + { + "epoch": 0.7066802598838817, + "grad_norm": 1.6424770271872868, + "learning_rate": 4.1832104456200625e-06, + "loss": 0.2308, + "step": 40897 + }, + { + "epoch": 0.7066975393972905, + "grad_norm": 1.445506708912622, + "learning_rate": 4.182755225467735e-06, + "loss": 0.325, + "step": 40898 + }, + { + "epoch": 0.7067148189106994, + "grad_norm": 0.8297076298943475, + "learning_rate": 4.182300023535455e-06, + "loss": 0.2094, + "step": 40899 + }, + { + "epoch": 0.7067320984241083, + "grad_norm": 1.3205976601441995, + "learning_rate": 4.181844839824658e-06, + "loss": 0.5195, + "step": 40900 + }, + { + "epoch": 0.7067493779375172, + "grad_norm": 1.1796092799374436, + "learning_rate": 4.181389674336764e-06, + "loss": 0.3425, + "step": 40901 + }, + { + "epoch": 0.7067666574509262, + "grad_norm": 1.5480681851322073, + "learning_rate": 4.180934527073197e-06, + "loss": 0.3718, + "step": 40902 + }, + { + "epoch": 0.7067839369643351, + "grad_norm": 1.1753723939551082, + "learning_rate": 4.180479398035385e-06, + "loss": 0.3043, + "step": 40903 + }, + { + "epoch": 0.706801216477744, + "grad_norm": 1.126632567247833, + "learning_rate": 4.180024287224755e-06, + "loss": 0.2562, + "step": 40904 + }, + { + "epoch": 0.7068184959911529, + "grad_norm": 1.3976289953603236, + "learning_rate": 4.179569194642733e-06, + "loss": 0.4684, + "step": 40905 + }, + { + "epoch": 0.7068357755045618, + "grad_norm": 1.9621276404667727, + "learning_rate": 4.179114120290739e-06, + "loss": 0.2623, + "step": 40906 + }, + { + "epoch": 0.7068530550179707, + "grad_norm": 1.1138938952565534, + "learning_rate": 4.178659064170207e-06, + "loss": 0.3121, + "step": 40907 + }, + { + "epoch": 0.7068703345313796, + "grad_norm": 0.6763275390257759, + "learning_rate": 4.178204026282553e-06, + "loss": 0.5599, + "step": 40908 + }, + { + "epoch": 0.7068876140447885, + "grad_norm": 1.1732838608822893, + "learning_rate": 4.177749006629206e-06, + "loss": 0.3942, + "step": 40909 + }, + { + "epoch": 0.7069048935581974, + "grad_norm": 1.171936005270771, + "learning_rate": 4.177294005211592e-06, + "loss": 0.239, + "step": 40910 + }, + { + "epoch": 0.7069221730716063, + "grad_norm": 1.740296178085501, + "learning_rate": 4.176839022031139e-06, + "loss": 0.3294, + "step": 40911 + }, + { + "epoch": 0.7069394525850152, + "grad_norm": 1.2209986906896089, + "learning_rate": 4.176384057089269e-06, + "loss": 0.3212, + "step": 40912 + }, + { + "epoch": 0.7069567320984241, + "grad_norm": 0.9287698304089796, + "learning_rate": 4.175929110387403e-06, + "loss": 0.3523, + "step": 40913 + }, + { + "epoch": 0.706974011611833, + "grad_norm": 0.787950008538238, + "learning_rate": 4.17547418192697e-06, + "loss": 0.3475, + "step": 40914 + }, + { + "epoch": 0.706991291125242, + "grad_norm": 1.0936050563154192, + "learning_rate": 4.175019271709394e-06, + "loss": 0.4695, + "step": 40915 + }, + { + "epoch": 0.7070085706386509, + "grad_norm": 1.2949450413899528, + "learning_rate": 4.174564379736105e-06, + "loss": 0.3283, + "step": 40916 + }, + { + "epoch": 0.7070258501520598, + "grad_norm": 1.7557046667841956, + "learning_rate": 4.174109506008518e-06, + "loss": 0.3514, + "step": 40917 + }, + { + "epoch": 0.7070431296654687, + "grad_norm": 0.9418171616230646, + "learning_rate": 4.1736546505280674e-06, + "loss": 0.2314, + "step": 40918 + }, + { + "epoch": 0.7070604091788775, + "grad_norm": 1.6041826852824894, + "learning_rate": 4.1731998132961695e-06, + "loss": 0.4702, + "step": 40919 + }, + { + "epoch": 0.7070776886922864, + "grad_norm": 1.5011749843498454, + "learning_rate": 4.172744994314252e-06, + "loss": 0.361, + "step": 40920 + }, + { + "epoch": 0.7070949682056953, + "grad_norm": 0.9429264666525351, + "learning_rate": 4.17229019358374e-06, + "loss": 0.5449, + "step": 40921 + }, + { + "epoch": 0.7071122477191042, + "grad_norm": 1.0523837130392248, + "learning_rate": 4.171835411106062e-06, + "loss": 0.458, + "step": 40922 + }, + { + "epoch": 0.7071295272325131, + "grad_norm": 1.0349959434943523, + "learning_rate": 4.171380646882638e-06, + "loss": 0.3895, + "step": 40923 + }, + { + "epoch": 0.707146806745922, + "grad_norm": 0.918390275712873, + "learning_rate": 4.170925900914889e-06, + "loss": 0.3002, + "step": 40924 + }, + { + "epoch": 0.7071640862593309, + "grad_norm": 0.8938952675835047, + "learning_rate": 4.170471173204245e-06, + "loss": 0.3952, + "step": 40925 + }, + { + "epoch": 0.7071813657727398, + "grad_norm": 1.1501343148409233, + "learning_rate": 4.170016463752127e-06, + "loss": 0.3767, + "step": 40926 + }, + { + "epoch": 0.7071986452861487, + "grad_norm": 0.9588497752114884, + "learning_rate": 4.169561772559964e-06, + "loss": 0.4982, + "step": 40927 + }, + { + "epoch": 0.7072159247995576, + "grad_norm": 1.1979016442513797, + "learning_rate": 4.169107099629173e-06, + "loss": 0.3366, + "step": 40928 + }, + { + "epoch": 0.7072332043129665, + "grad_norm": 0.7247711227047984, + "learning_rate": 4.1686524449611845e-06, + "loss": 0.7404, + "step": 40929 + }, + { + "epoch": 0.7072504838263755, + "grad_norm": 1.3425339145081376, + "learning_rate": 4.168197808557417e-06, + "loss": 0.3273, + "step": 40930 + }, + { + "epoch": 0.7072677633397844, + "grad_norm": 0.8816914616713613, + "learning_rate": 4.1677431904192975e-06, + "loss": 0.6497, + "step": 40931 + }, + { + "epoch": 0.7072850428531933, + "grad_norm": 1.4723649171749138, + "learning_rate": 4.167288590548253e-06, + "loss": 0.5323, + "step": 40932 + }, + { + "epoch": 0.7073023223666022, + "grad_norm": 1.332718483586232, + "learning_rate": 4.166834008945699e-06, + "loss": 0.3392, + "step": 40933 + }, + { + "epoch": 0.7073196018800111, + "grad_norm": 0.9473219103533027, + "learning_rate": 4.166379445613069e-06, + "loss": 0.738, + "step": 40934 + }, + { + "epoch": 0.70733688139342, + "grad_norm": 1.1836968175259297, + "learning_rate": 4.165924900551779e-06, + "loss": 0.3998, + "step": 40935 + }, + { + "epoch": 0.7073541609068289, + "grad_norm": 1.2203724027145513, + "learning_rate": 4.165470373763258e-06, + "loss": 0.3547, + "step": 40936 + }, + { + "epoch": 0.7073714404202378, + "grad_norm": 0.9026262398080768, + "learning_rate": 4.16501586524892e-06, + "loss": 0.2037, + "step": 40937 + }, + { + "epoch": 0.7073887199336467, + "grad_norm": 3.2978476348048527, + "learning_rate": 4.164561375010204e-06, + "loss": 0.2305, + "step": 40938 + }, + { + "epoch": 0.7074059994470556, + "grad_norm": 1.587238429180593, + "learning_rate": 4.16410690304852e-06, + "loss": 0.4226, + "step": 40939 + }, + { + "epoch": 0.7074232789604644, + "grad_norm": 0.8308065883858422, + "learning_rate": 4.163652449365302e-06, + "loss": 0.3212, + "step": 40940 + }, + { + "epoch": 0.7074405584738733, + "grad_norm": 1.3141771639122886, + "learning_rate": 4.163198013961964e-06, + "loss": 0.4563, + "step": 40941 + }, + { + "epoch": 0.7074578379872822, + "grad_norm": 1.1668810432468755, + "learning_rate": 4.162743596839937e-06, + "loss": 0.5214, + "step": 40942 + }, + { + "epoch": 0.7074751175006911, + "grad_norm": 0.8070947570380855, + "learning_rate": 4.162289198000636e-06, + "loss": 0.1862, + "step": 40943 + }, + { + "epoch": 0.7074923970141, + "grad_norm": 1.4027423844285165, + "learning_rate": 4.16183481744549e-06, + "loss": 0.3567, + "step": 40944 + }, + { + "epoch": 0.707509676527509, + "grad_norm": 1.2445096869307324, + "learning_rate": 4.161380455175924e-06, + "loss": 0.2933, + "step": 40945 + }, + { + "epoch": 0.7075269560409179, + "grad_norm": 1.4825731066461878, + "learning_rate": 4.160926111193355e-06, + "loss": 0.3248, + "step": 40946 + }, + { + "epoch": 0.7075442355543268, + "grad_norm": 1.365621147925762, + "learning_rate": 4.160471785499213e-06, + "loss": 0.4899, + "step": 40947 + }, + { + "epoch": 0.7075615150677357, + "grad_norm": 0.5868550894689755, + "learning_rate": 4.1600174780949135e-06, + "loss": 0.5237, + "step": 40948 + }, + { + "epoch": 0.7075787945811446, + "grad_norm": 1.059647790140492, + "learning_rate": 4.159563188981882e-06, + "loss": 0.39, + "step": 40949 + }, + { + "epoch": 0.7075960740945535, + "grad_norm": 1.1247801925249319, + "learning_rate": 4.159108918161544e-06, + "loss": 0.4332, + "step": 40950 + }, + { + "epoch": 0.7076133536079624, + "grad_norm": 0.66666132933928, + "learning_rate": 4.158654665635323e-06, + "loss": 0.3303, + "step": 40951 + }, + { + "epoch": 0.7076306331213713, + "grad_norm": 1.2400163098456811, + "learning_rate": 4.158200431404637e-06, + "loss": 0.2962, + "step": 40952 + }, + { + "epoch": 0.7076479126347802, + "grad_norm": 1.2503875929186028, + "learning_rate": 4.1577462154709135e-06, + "loss": 0.4293, + "step": 40953 + }, + { + "epoch": 0.7076651921481891, + "grad_norm": 1.110039130575059, + "learning_rate": 4.15729201783557e-06, + "loss": 0.337, + "step": 40954 + }, + { + "epoch": 0.707682471661598, + "grad_norm": 1.0348104969032363, + "learning_rate": 4.156837838500033e-06, + "loss": 0.2321, + "step": 40955 + }, + { + "epoch": 0.7076997511750069, + "grad_norm": 1.9818339340983533, + "learning_rate": 4.156383677465726e-06, + "loss": 0.2446, + "step": 40956 + }, + { + "epoch": 0.7077170306884158, + "grad_norm": 0.6198625928978191, + "learning_rate": 4.155929534734067e-06, + "loss": 0.3706, + "step": 40957 + }, + { + "epoch": 0.7077343102018248, + "grad_norm": 0.8576439017658628, + "learning_rate": 4.1554754103064834e-06, + "loss": 0.3663, + "step": 40958 + }, + { + "epoch": 0.7077515897152337, + "grad_norm": 1.2852240953820737, + "learning_rate": 4.155021304184391e-06, + "loss": 0.3423, + "step": 40959 + }, + { + "epoch": 0.7077688692286426, + "grad_norm": 1.3346259734751347, + "learning_rate": 4.154567216369217e-06, + "loss": 0.2406, + "step": 40960 + }, + { + "epoch": 0.7077861487420514, + "grad_norm": 1.3639754975968823, + "learning_rate": 4.154113146862383e-06, + "loss": 0.3049, + "step": 40961 + }, + { + "epoch": 0.7078034282554603, + "grad_norm": 1.043953785569631, + "learning_rate": 4.153659095665315e-06, + "loss": 0.4521, + "step": 40962 + }, + { + "epoch": 0.7078207077688692, + "grad_norm": 1.1136053748396166, + "learning_rate": 4.15320506277943e-06, + "loss": 0.4673, + "step": 40963 + }, + { + "epoch": 0.7078379872822781, + "grad_norm": 0.914224456718721, + "learning_rate": 4.152751048206147e-06, + "loss": 0.2579, + "step": 40964 + }, + { + "epoch": 0.707855266795687, + "grad_norm": 1.6585911005797427, + "learning_rate": 4.152297051946892e-06, + "loss": 0.2761, + "step": 40965 + }, + { + "epoch": 0.7078725463090959, + "grad_norm": 1.534475337323931, + "learning_rate": 4.151843074003088e-06, + "loss": 0.4165, + "step": 40966 + }, + { + "epoch": 0.7078898258225048, + "grad_norm": 0.8575341558567166, + "learning_rate": 4.15138911437616e-06, + "loss": 0.6502, + "step": 40967 + }, + { + "epoch": 0.7079071053359137, + "grad_norm": 1.3909336865547512, + "learning_rate": 4.15093517306752e-06, + "loss": 0.3011, + "step": 40968 + }, + { + "epoch": 0.7079243848493226, + "grad_norm": 1.6668568649429227, + "learning_rate": 4.150481250078602e-06, + "loss": 0.583, + "step": 40969 + }, + { + "epoch": 0.7079416643627315, + "grad_norm": 1.5640867710998303, + "learning_rate": 4.150027345410816e-06, + "loss": 0.3272, + "step": 40970 + }, + { + "epoch": 0.7079589438761404, + "grad_norm": 0.976291939307546, + "learning_rate": 4.149573459065589e-06, + "loss": 0.3278, + "step": 40971 + }, + { + "epoch": 0.7079762233895494, + "grad_norm": 1.3661929562371944, + "learning_rate": 4.149119591044342e-06, + "loss": 0.5115, + "step": 40972 + }, + { + "epoch": 0.7079935029029583, + "grad_norm": 1.0669102634125516, + "learning_rate": 4.148665741348502e-06, + "loss": 0.5161, + "step": 40973 + }, + { + "epoch": 0.7080107824163672, + "grad_norm": 1.6228445087293197, + "learning_rate": 4.148211909979485e-06, + "loss": 0.446, + "step": 40974 + }, + { + "epoch": 0.7080280619297761, + "grad_norm": 0.7986205874130322, + "learning_rate": 4.147758096938709e-06, + "loss": 0.2069, + "step": 40975 + }, + { + "epoch": 0.708045341443185, + "grad_norm": 1.2844294880149698, + "learning_rate": 4.1473043022276035e-06, + "loss": 0.2954, + "step": 40976 + }, + { + "epoch": 0.7080626209565939, + "grad_norm": 0.8576633286148482, + "learning_rate": 4.1468505258475785e-06, + "loss": 0.2702, + "step": 40977 + }, + { + "epoch": 0.7080799004700028, + "grad_norm": 1.0266510711642287, + "learning_rate": 4.14639676780007e-06, + "loss": 0.4655, + "step": 40978 + }, + { + "epoch": 0.7080971799834117, + "grad_norm": 0.947854661818959, + "learning_rate": 4.145943028086487e-06, + "loss": 0.5013, + "step": 40979 + }, + { + "epoch": 0.7081144594968206, + "grad_norm": 1.5662253722431758, + "learning_rate": 4.1454893067082605e-06, + "loss": 0.3058, + "step": 40980 + }, + { + "epoch": 0.7081317390102295, + "grad_norm": 1.4110677124548128, + "learning_rate": 4.145035603666802e-06, + "loss": 0.5523, + "step": 40981 + }, + { + "epoch": 0.7081490185236384, + "grad_norm": 1.0891384069632601, + "learning_rate": 4.14458191896354e-06, + "loss": 0.5355, + "step": 40982 + }, + { + "epoch": 0.7081662980370472, + "grad_norm": 1.1630475931741968, + "learning_rate": 4.144128252599885e-06, + "loss": 0.2952, + "step": 40983 + }, + { + "epoch": 0.7081835775504561, + "grad_norm": 1.4667043461400502, + "learning_rate": 4.143674604577274e-06, + "loss": 0.3131, + "step": 40984 + }, + { + "epoch": 0.708200857063865, + "grad_norm": 1.1250736411625766, + "learning_rate": 4.143220974897118e-06, + "loss": 0.3382, + "step": 40985 + }, + { + "epoch": 0.708218136577274, + "grad_norm": 1.3037523320401196, + "learning_rate": 4.142767363560836e-06, + "loss": 0.3274, + "step": 40986 + }, + { + "epoch": 0.7082354160906829, + "grad_norm": 1.2048659753434672, + "learning_rate": 4.142313770569855e-06, + "loss": 0.2999, + "step": 40987 + }, + { + "epoch": 0.7082526956040918, + "grad_norm": 1.419873740692479, + "learning_rate": 4.141860195925589e-06, + "loss": 0.3054, + "step": 40988 + }, + { + "epoch": 0.7082699751175007, + "grad_norm": 1.4070143189549915, + "learning_rate": 4.1414066396294614e-06, + "loss": 0.3969, + "step": 40989 + }, + { + "epoch": 0.7082872546309096, + "grad_norm": 1.4406426043837885, + "learning_rate": 4.140953101682896e-06, + "loss": 0.3166, + "step": 40990 + }, + { + "epoch": 0.7083045341443185, + "grad_norm": 1.7192257512013274, + "learning_rate": 4.140499582087312e-06, + "loss": 0.4773, + "step": 40991 + }, + { + "epoch": 0.7083218136577274, + "grad_norm": 0.7942023155050743, + "learning_rate": 4.140046080844125e-06, + "loss": 0.4834, + "step": 40992 + }, + { + "epoch": 0.7083390931711363, + "grad_norm": 1.3771042375638578, + "learning_rate": 4.139592597954763e-06, + "loss": 0.3932, + "step": 40993 + }, + { + "epoch": 0.7083563726845452, + "grad_norm": 1.4935595393480003, + "learning_rate": 4.1391391334206385e-06, + "loss": 0.4217, + "step": 40994 + }, + { + "epoch": 0.7083736521979541, + "grad_norm": 1.2912934932560023, + "learning_rate": 4.1386856872431765e-06, + "loss": 0.4466, + "step": 40995 + }, + { + "epoch": 0.708390931711363, + "grad_norm": 1.0336431995141335, + "learning_rate": 4.138232259423799e-06, + "loss": 0.2988, + "step": 40996 + }, + { + "epoch": 0.7084082112247719, + "grad_norm": 1.0303998703184156, + "learning_rate": 4.13777884996392e-06, + "loss": 0.4032, + "step": 40997 + }, + { + "epoch": 0.7084254907381808, + "grad_norm": 1.217954450159095, + "learning_rate": 4.137325458864968e-06, + "loss": 0.3234, + "step": 40998 + }, + { + "epoch": 0.7084427702515897, + "grad_norm": 1.5559431253877622, + "learning_rate": 4.136872086128354e-06, + "loss": 0.6045, + "step": 40999 + }, + { + "epoch": 0.7084600497649987, + "grad_norm": 1.2960427149316056, + "learning_rate": 4.136418731755502e-06, + "loss": 0.5122, + "step": 41000 + }, + { + "epoch": 0.7084773292784076, + "grad_norm": 1.057052802833885, + "learning_rate": 4.135965395747832e-06, + "loss": 0.2088, + "step": 41001 + }, + { + "epoch": 0.7084946087918165, + "grad_norm": 1.465273929247216, + "learning_rate": 4.135512078106768e-06, + "loss": 0.282, + "step": 41002 + }, + { + "epoch": 0.7085118883052254, + "grad_norm": 1.2033215080673856, + "learning_rate": 4.135058778833722e-06, + "loss": 0.8231, + "step": 41003 + }, + { + "epoch": 0.7085291678186342, + "grad_norm": 1.1756941683560007, + "learning_rate": 4.134605497930121e-06, + "loss": 0.5961, + "step": 41004 + }, + { + "epoch": 0.7085464473320431, + "grad_norm": 1.3477944525650458, + "learning_rate": 4.1341522353973785e-06, + "loss": 0.3722, + "step": 41005 + }, + { + "epoch": 0.708563726845452, + "grad_norm": 1.2896781238702766, + "learning_rate": 4.133698991236916e-06, + "loss": 0.3449, + "step": 41006 + }, + { + "epoch": 0.7085810063588609, + "grad_norm": 2.432867834823749, + "learning_rate": 4.133245765450158e-06, + "loss": 0.3736, + "step": 41007 + }, + { + "epoch": 0.7085982858722698, + "grad_norm": 1.4102981727117931, + "learning_rate": 4.132792558038518e-06, + "loss": 0.4615, + "step": 41008 + }, + { + "epoch": 0.7086155653856787, + "grad_norm": 1.0993107750116413, + "learning_rate": 4.132339369003419e-06, + "loss": 0.1745, + "step": 41009 + }, + { + "epoch": 0.7086328448990876, + "grad_norm": 1.3250067152638274, + "learning_rate": 4.131886198346277e-06, + "loss": 0.3669, + "step": 41010 + }, + { + "epoch": 0.7086501244124965, + "grad_norm": 1.2832404903363364, + "learning_rate": 4.131433046068511e-06, + "loss": 0.3914, + "step": 41011 + }, + { + "epoch": 0.7086674039259054, + "grad_norm": 1.4971030530194507, + "learning_rate": 4.130979912171545e-06, + "loss": 0.3594, + "step": 41012 + }, + { + "epoch": 0.7086846834393143, + "grad_norm": 1.2336824277988185, + "learning_rate": 4.130526796656799e-06, + "loss": 0.3932, + "step": 41013 + }, + { + "epoch": 0.7087019629527233, + "grad_norm": 1.1165865873946865, + "learning_rate": 4.13007369952569e-06, + "loss": 0.2939, + "step": 41014 + }, + { + "epoch": 0.7087192424661322, + "grad_norm": 1.2286913635939218, + "learning_rate": 4.12962062077963e-06, + "loss": 0.2992, + "step": 41015 + }, + { + "epoch": 0.7087365219795411, + "grad_norm": 0.7964658888411902, + "learning_rate": 4.129167560420051e-06, + "loss": 0.5093, + "step": 41016 + }, + { + "epoch": 0.70875380149295, + "grad_norm": 1.2886084342917592, + "learning_rate": 4.128714518448357e-06, + "loss": 0.4305, + "step": 41017 + }, + { + "epoch": 0.7087710810063589, + "grad_norm": 1.1705361731930142, + "learning_rate": 4.128261494865981e-06, + "loss": 0.2688, + "step": 41018 + }, + { + "epoch": 0.7087883605197678, + "grad_norm": 1.7230247505332346, + "learning_rate": 4.127808489674335e-06, + "loss": 0.3558, + "step": 41019 + }, + { + "epoch": 0.7088056400331767, + "grad_norm": 1.4067762797290317, + "learning_rate": 4.127355502874841e-06, + "loss": 0.6698, + "step": 41020 + }, + { + "epoch": 0.7088229195465856, + "grad_norm": 1.1564575209779155, + "learning_rate": 4.126902534468914e-06, + "loss": 0.2778, + "step": 41021 + }, + { + "epoch": 0.7088401990599945, + "grad_norm": 0.9105842857838711, + "learning_rate": 4.126449584457977e-06, + "loss": 0.4279, + "step": 41022 + }, + { + "epoch": 0.7088574785734034, + "grad_norm": 1.4571048826564046, + "learning_rate": 4.125996652843439e-06, + "loss": 0.4242, + "step": 41023 + }, + { + "epoch": 0.7088747580868123, + "grad_norm": 1.0638847488488603, + "learning_rate": 4.125543739626734e-06, + "loss": 0.3857, + "step": 41024 + }, + { + "epoch": 0.7088920376002211, + "grad_norm": 1.3519620055767, + "learning_rate": 4.125090844809272e-06, + "loss": 0.364, + "step": 41025 + }, + { + "epoch": 0.70890931711363, + "grad_norm": 1.0012571560382277, + "learning_rate": 4.124637968392468e-06, + "loss": 0.3323, + "step": 41026 + }, + { + "epoch": 0.7089265966270389, + "grad_norm": 1.2743590160515461, + "learning_rate": 4.124185110377748e-06, + "loss": 0.3218, + "step": 41027 + }, + { + "epoch": 0.7089438761404478, + "grad_norm": 1.6682482488987058, + "learning_rate": 4.123732270766523e-06, + "loss": 0.4961, + "step": 41028 + }, + { + "epoch": 0.7089611556538568, + "grad_norm": 1.50003401027822, + "learning_rate": 4.123279449560216e-06, + "loss": 0.2013, + "step": 41029 + }, + { + "epoch": 0.7089784351672657, + "grad_norm": 1.0643719943843195, + "learning_rate": 4.122826646760243e-06, + "loss": 0.3584, + "step": 41030 + }, + { + "epoch": 0.7089957146806746, + "grad_norm": 1.2002346634855643, + "learning_rate": 4.122373862368029e-06, + "loss": 0.3226, + "step": 41031 + }, + { + "epoch": 0.7090129941940835, + "grad_norm": 1.3052930218811882, + "learning_rate": 4.121921096384981e-06, + "loss": 0.5043, + "step": 41032 + }, + { + "epoch": 0.7090302737074924, + "grad_norm": 1.3812304850976564, + "learning_rate": 4.1214683488125284e-06, + "loss": 0.2462, + "step": 41033 + }, + { + "epoch": 0.7090475532209013, + "grad_norm": 1.6722103310590632, + "learning_rate": 4.121015619652079e-06, + "loss": 0.3045, + "step": 41034 + }, + { + "epoch": 0.7090648327343102, + "grad_norm": 0.9001986724215187, + "learning_rate": 4.120562908905056e-06, + "loss": 0.5196, + "step": 41035 + }, + { + "epoch": 0.7090821122477191, + "grad_norm": 1.391811847250556, + "learning_rate": 4.12011021657288e-06, + "loss": 0.204, + "step": 41036 + }, + { + "epoch": 0.709099391761128, + "grad_norm": 1.032878561718044, + "learning_rate": 4.119657542656962e-06, + "loss": 0.7887, + "step": 41037 + }, + { + "epoch": 0.7091166712745369, + "grad_norm": 0.9017900548404864, + "learning_rate": 4.119204887158728e-06, + "loss": 0.3352, + "step": 41038 + }, + { + "epoch": 0.7091339507879458, + "grad_norm": 1.1888565962410964, + "learning_rate": 4.118752250079586e-06, + "loss": 0.4597, + "step": 41039 + }, + { + "epoch": 0.7091512303013547, + "grad_norm": 1.21533602580748, + "learning_rate": 4.11829963142096e-06, + "loss": 0.4349, + "step": 41040 + }, + { + "epoch": 0.7091685098147636, + "grad_norm": 1.287634850562778, + "learning_rate": 4.117847031184267e-06, + "loss": 0.4122, + "step": 41041 + }, + { + "epoch": 0.7091857893281726, + "grad_norm": 1.4161111061148877, + "learning_rate": 4.117394449370926e-06, + "loss": 0.1786, + "step": 41042 + }, + { + "epoch": 0.7092030688415815, + "grad_norm": 1.1024944497324596, + "learning_rate": 4.11694188598235e-06, + "loss": 0.2862, + "step": 41043 + }, + { + "epoch": 0.7092203483549904, + "grad_norm": 1.4215492045803457, + "learning_rate": 4.116489341019963e-06, + "loss": 0.4018, + "step": 41044 + }, + { + "epoch": 0.7092376278683993, + "grad_norm": 1.898567192446631, + "learning_rate": 4.1160368144851745e-06, + "loss": 0.3613, + "step": 41045 + }, + { + "epoch": 0.7092549073818081, + "grad_norm": 1.5335825327338632, + "learning_rate": 4.115584306379407e-06, + "loss": 0.414, + "step": 41046 + }, + { + "epoch": 0.709272186895217, + "grad_norm": 0.9004653543921334, + "learning_rate": 4.115131816704079e-06, + "loss": 0.2295, + "step": 41047 + }, + { + "epoch": 0.7092894664086259, + "grad_norm": 1.5382602738249538, + "learning_rate": 4.114679345460602e-06, + "loss": 0.2652, + "step": 41048 + }, + { + "epoch": 0.7093067459220348, + "grad_norm": 1.8564720601765035, + "learning_rate": 4.1142268926504e-06, + "loss": 0.4865, + "step": 41049 + }, + { + "epoch": 0.7093240254354437, + "grad_norm": 1.049951743505927, + "learning_rate": 4.113774458274884e-06, + "loss": 0.2136, + "step": 41050 + }, + { + "epoch": 0.7093413049488526, + "grad_norm": 1.3943764695254135, + "learning_rate": 4.113322042335473e-06, + "loss": 0.4106, + "step": 41051 + }, + { + "epoch": 0.7093585844622615, + "grad_norm": 0.689554641518309, + "learning_rate": 4.112869644833585e-06, + "loss": 0.4665, + "step": 41052 + }, + { + "epoch": 0.7093758639756704, + "grad_norm": 0.9665256636293729, + "learning_rate": 4.11241726577064e-06, + "loss": 0.2697, + "step": 41053 + }, + { + "epoch": 0.7093931434890793, + "grad_norm": 0.6716457970076531, + "learning_rate": 4.1119649051480485e-06, + "loss": 0.4301, + "step": 41054 + }, + { + "epoch": 0.7094104230024882, + "grad_norm": 1.3910022529312829, + "learning_rate": 4.111512562967233e-06, + "loss": 0.3233, + "step": 41055 + }, + { + "epoch": 0.7094277025158972, + "grad_norm": 1.191750332399773, + "learning_rate": 4.111060239229608e-06, + "loss": 0.3968, + "step": 41056 + }, + { + "epoch": 0.7094449820293061, + "grad_norm": 1.3238125316419918, + "learning_rate": 4.110607933936584e-06, + "loss": 0.36, + "step": 41057 + }, + { + "epoch": 0.709462261542715, + "grad_norm": 1.681995387439675, + "learning_rate": 4.110155647089589e-06, + "loss": 0.4364, + "step": 41058 + }, + { + "epoch": 0.7094795410561239, + "grad_norm": 1.142828200086591, + "learning_rate": 4.109703378690031e-06, + "loss": 0.4002, + "step": 41059 + }, + { + "epoch": 0.7094968205695328, + "grad_norm": 1.0262542146047848, + "learning_rate": 4.109251128739333e-06, + "loss": 0.3141, + "step": 41060 + }, + { + "epoch": 0.7095141000829417, + "grad_norm": 1.0149318184513336, + "learning_rate": 4.108798897238905e-06, + "loss": 0.4461, + "step": 41061 + }, + { + "epoch": 0.7095313795963506, + "grad_norm": 1.0220919742374937, + "learning_rate": 4.1083466841901695e-06, + "loss": 0.3192, + "step": 41062 + }, + { + "epoch": 0.7095486591097595, + "grad_norm": 0.9742819501884499, + "learning_rate": 4.107894489594534e-06, + "loss": 0.2285, + "step": 41063 + }, + { + "epoch": 0.7095659386231684, + "grad_norm": 1.1362359128316246, + "learning_rate": 4.107442313453427e-06, + "loss": 0.2431, + "step": 41064 + }, + { + "epoch": 0.7095832181365773, + "grad_norm": 1.001711067916941, + "learning_rate": 4.106990155768255e-06, + "loss": 0.3591, + "step": 41065 + }, + { + "epoch": 0.7096004976499862, + "grad_norm": 0.8481221380420255, + "learning_rate": 4.106538016540442e-06, + "loss": 0.3209, + "step": 41066 + }, + { + "epoch": 0.709617777163395, + "grad_norm": 1.3791487822397575, + "learning_rate": 4.1060858957713986e-06, + "loss": 0.2947, + "step": 41067 + }, + { + "epoch": 0.7096350566768039, + "grad_norm": 0.8726933658395829, + "learning_rate": 4.10563379346254e-06, + "loss": 0.4074, + "step": 41068 + }, + { + "epoch": 0.7096523361902128, + "grad_norm": 1.4789126538334867, + "learning_rate": 4.105181709615284e-06, + "loss": 0.3609, + "step": 41069 + }, + { + "epoch": 0.7096696157036217, + "grad_norm": 0.939309468212677, + "learning_rate": 4.104729644231045e-06, + "loss": 0.2578, + "step": 41070 + }, + { + "epoch": 0.7096868952170307, + "grad_norm": 1.1662692564770445, + "learning_rate": 4.104277597311247e-06, + "loss": 0.5673, + "step": 41071 + }, + { + "epoch": 0.7097041747304396, + "grad_norm": 0.9471021444503631, + "learning_rate": 4.103825568857295e-06, + "loss": 0.2938, + "step": 41072 + }, + { + "epoch": 0.7097214542438485, + "grad_norm": 1.006483480066204, + "learning_rate": 4.103373558870612e-06, + "loss": 0.376, + "step": 41073 + }, + { + "epoch": 0.7097387337572574, + "grad_norm": 1.321227206811706, + "learning_rate": 4.102921567352608e-06, + "loss": 0.3076, + "step": 41074 + }, + { + "epoch": 0.7097560132706663, + "grad_norm": 1.1237801512075347, + "learning_rate": 4.102469594304702e-06, + "loss": 0.563, + "step": 41075 + }, + { + "epoch": 0.7097732927840752, + "grad_norm": 1.1132426912770983, + "learning_rate": 4.102017639728314e-06, + "loss": 0.3322, + "step": 41076 + }, + { + "epoch": 0.7097905722974841, + "grad_norm": 1.8913488476355693, + "learning_rate": 4.10156570362485e-06, + "loss": 0.3585, + "step": 41077 + }, + { + "epoch": 0.709807851810893, + "grad_norm": 1.424571111281307, + "learning_rate": 4.1011137859957354e-06, + "loss": 0.4306, + "step": 41078 + }, + { + "epoch": 0.7098251313243019, + "grad_norm": 1.312597634600694, + "learning_rate": 4.100661886842376e-06, + "loss": 0.41, + "step": 41079 + }, + { + "epoch": 0.7098424108377108, + "grad_norm": 1.250898840792913, + "learning_rate": 4.100210006166192e-06, + "loss": 0.5017, + "step": 41080 + }, + { + "epoch": 0.7098596903511197, + "grad_norm": 1.6127010271575757, + "learning_rate": 4.0997581439685995e-06, + "loss": 0.368, + "step": 41081 + }, + { + "epoch": 0.7098769698645286, + "grad_norm": 1.329744166029125, + "learning_rate": 4.099306300251016e-06, + "loss": 0.4721, + "step": 41082 + }, + { + "epoch": 0.7098942493779375, + "grad_norm": 1.148355794369677, + "learning_rate": 4.098854475014849e-06, + "loss": 0.3197, + "step": 41083 + }, + { + "epoch": 0.7099115288913465, + "grad_norm": 1.148197795216684, + "learning_rate": 4.098402668261523e-06, + "loss": 0.3316, + "step": 41084 + }, + { + "epoch": 0.7099288084047554, + "grad_norm": 1.1888215675610632, + "learning_rate": 4.097950879992445e-06, + "loss": 0.6475, + "step": 41085 + }, + { + "epoch": 0.7099460879181643, + "grad_norm": 1.018133057171023, + "learning_rate": 4.097499110209032e-06, + "loss": 0.3625, + "step": 41086 + }, + { + "epoch": 0.7099633674315732, + "grad_norm": 1.2078059382889366, + "learning_rate": 4.097047358912706e-06, + "loss": 0.479, + "step": 41087 + }, + { + "epoch": 0.709980646944982, + "grad_norm": 0.5688058368484975, + "learning_rate": 4.096595626104871e-06, + "loss": 0.8769, + "step": 41088 + }, + { + "epoch": 0.7099979264583909, + "grad_norm": 1.1653502627025276, + "learning_rate": 4.096143911786951e-06, + "loss": 0.2997, + "step": 41089 + }, + { + "epoch": 0.7100152059717998, + "grad_norm": 0.7028071855770992, + "learning_rate": 4.095692215960354e-06, + "loss": 0.6694, + "step": 41090 + }, + { + "epoch": 0.7100324854852087, + "grad_norm": 1.108142164936823, + "learning_rate": 4.095240538626497e-06, + "loss": 0.3792, + "step": 41091 + }, + { + "epoch": 0.7100497649986176, + "grad_norm": 1.13734361965644, + "learning_rate": 4.094788879786796e-06, + "loss": 0.4366, + "step": 41092 + }, + { + "epoch": 0.7100670445120265, + "grad_norm": 1.486769661671265, + "learning_rate": 4.094337239442669e-06, + "loss": 0.299, + "step": 41093 + }, + { + "epoch": 0.7100843240254354, + "grad_norm": 1.3581921381077828, + "learning_rate": 4.0938856175955215e-06, + "loss": 0.3188, + "step": 41094 + }, + { + "epoch": 0.7101016035388443, + "grad_norm": 1.568457325289085, + "learning_rate": 4.0934340142467775e-06, + "loss": 0.5403, + "step": 41095 + }, + { + "epoch": 0.7101188830522532, + "grad_norm": 0.8324726005778458, + "learning_rate": 4.092982429397844e-06, + "loss": 0.4355, + "step": 41096 + }, + { + "epoch": 0.7101361625656621, + "grad_norm": 1.602160124106562, + "learning_rate": 4.092530863050137e-06, + "loss": 0.2661, + "step": 41097 + }, + { + "epoch": 0.710153442079071, + "grad_norm": 1.4277986059481962, + "learning_rate": 4.092079315205077e-06, + "loss": 0.2781, + "step": 41098 + }, + { + "epoch": 0.71017072159248, + "grad_norm": 1.2026742872428373, + "learning_rate": 4.0916277858640696e-06, + "loss": 0.434, + "step": 41099 + }, + { + "epoch": 0.7101880011058889, + "grad_norm": 1.6818561450639644, + "learning_rate": 4.091176275028537e-06, + "loss": 0.3522, + "step": 41100 + }, + { + "epoch": 0.7102052806192978, + "grad_norm": 1.1903188229080572, + "learning_rate": 4.0907247826998855e-06, + "loss": 0.2799, + "step": 41101 + }, + { + "epoch": 0.7102225601327067, + "grad_norm": 1.0648478407167192, + "learning_rate": 4.090273308879537e-06, + "loss": 0.5258, + "step": 41102 + }, + { + "epoch": 0.7102398396461156, + "grad_norm": 0.9444037389779347, + "learning_rate": 4.089821853568894e-06, + "loss": 0.3685, + "step": 41103 + }, + { + "epoch": 0.7102571191595245, + "grad_norm": 1.0344582386734424, + "learning_rate": 4.089370416769385e-06, + "loss": 0.578, + "step": 41104 + }, + { + "epoch": 0.7102743986729334, + "grad_norm": 1.1224321347817472, + "learning_rate": 4.088918998482414e-06, + "loss": 0.5024, + "step": 41105 + }, + { + "epoch": 0.7102916781863423, + "grad_norm": 1.2660396982789424, + "learning_rate": 4.088467598709401e-06, + "loss": 0.3375, + "step": 41106 + }, + { + "epoch": 0.7103089576997512, + "grad_norm": 1.5799836803237453, + "learning_rate": 4.088016217451757e-06, + "loss": 0.3637, + "step": 41107 + }, + { + "epoch": 0.7103262372131601, + "grad_norm": 3.21647701997839, + "learning_rate": 4.087564854710891e-06, + "loss": 0.2253, + "step": 41108 + }, + { + "epoch": 0.710343516726569, + "grad_norm": 1.9737886880336444, + "learning_rate": 4.087113510488223e-06, + "loss": 0.4679, + "step": 41109 + }, + { + "epoch": 0.7103607962399778, + "grad_norm": 1.9887648852973456, + "learning_rate": 4.086662184785164e-06, + "loss": 0.7372, + "step": 41110 + }, + { + "epoch": 0.7103780757533867, + "grad_norm": 1.372516725335801, + "learning_rate": 4.086210877603132e-06, + "loss": 0.444, + "step": 41111 + }, + { + "epoch": 0.7103953552667956, + "grad_norm": 1.6043523092626502, + "learning_rate": 4.085759588943534e-06, + "loss": 0.374, + "step": 41112 + }, + { + "epoch": 0.7104126347802046, + "grad_norm": 1.9062949186506246, + "learning_rate": 4.0853083188077895e-06, + "loss": 0.289, + "step": 41113 + }, + { + "epoch": 0.7104299142936135, + "grad_norm": 2.1881842373032665, + "learning_rate": 4.084857067197305e-06, + "loss": 0.4364, + "step": 41114 + }, + { + "epoch": 0.7104471938070224, + "grad_norm": 0.8309348130548881, + "learning_rate": 4.084405834113497e-06, + "loss": 0.3851, + "step": 41115 + }, + { + "epoch": 0.7104644733204313, + "grad_norm": 1.5233541567090636, + "learning_rate": 4.083954619557781e-06, + "loss": 0.3824, + "step": 41116 + }, + { + "epoch": 0.7104817528338402, + "grad_norm": 1.605964402462317, + "learning_rate": 4.083503423531572e-06, + "loss": 0.389, + "step": 41117 + }, + { + "epoch": 0.7104990323472491, + "grad_norm": 1.727189070434456, + "learning_rate": 4.08305224603628e-06, + "loss": 0.31, + "step": 41118 + }, + { + "epoch": 0.710516311860658, + "grad_norm": 1.5030997156938621, + "learning_rate": 4.082601087073313e-06, + "loss": 0.3292, + "step": 41119 + }, + { + "epoch": 0.7105335913740669, + "grad_norm": 1.5781552929046405, + "learning_rate": 4.082149946644091e-06, + "loss": 0.3549, + "step": 41120 + }, + { + "epoch": 0.7105508708874758, + "grad_norm": 1.2591361416672227, + "learning_rate": 4.081698824750024e-06, + "loss": 0.2516, + "step": 41121 + }, + { + "epoch": 0.7105681504008847, + "grad_norm": 1.6714665259314874, + "learning_rate": 4.081247721392531e-06, + "loss": 0.3423, + "step": 41122 + }, + { + "epoch": 0.7105854299142936, + "grad_norm": 1.1760026769655192, + "learning_rate": 4.0807966365730144e-06, + "loss": 0.3293, + "step": 41123 + }, + { + "epoch": 0.7106027094277025, + "grad_norm": 1.128973925737377, + "learning_rate": 4.080345570292898e-06, + "loss": 0.4122, + "step": 41124 + }, + { + "epoch": 0.7106199889411114, + "grad_norm": 2.253266237514784, + "learning_rate": 4.079894522553583e-06, + "loss": 0.4049, + "step": 41125 + }, + { + "epoch": 0.7106372684545204, + "grad_norm": 0.7988403845115433, + "learning_rate": 4.079443493356491e-06, + "loss": 0.4076, + "step": 41126 + }, + { + "epoch": 0.7106545479679293, + "grad_norm": 1.3401864498392417, + "learning_rate": 4.0789924827030304e-06, + "loss": 0.3289, + "step": 41127 + }, + { + "epoch": 0.7106718274813382, + "grad_norm": 1.187609074726243, + "learning_rate": 4.07854149059462e-06, + "loss": 0.2671, + "step": 41128 + }, + { + "epoch": 0.7106891069947471, + "grad_norm": 2.5070159086199495, + "learning_rate": 4.078090517032667e-06, + "loss": 0.3205, + "step": 41129 + }, + { + "epoch": 0.710706386508156, + "grad_norm": 1.7484788052525475, + "learning_rate": 4.07763956201858e-06, + "loss": 0.3994, + "step": 41130 + }, + { + "epoch": 0.7107236660215648, + "grad_norm": 1.0667229879414002, + "learning_rate": 4.077188625553778e-06, + "loss": 0.3558, + "step": 41131 + }, + { + "epoch": 0.7107409455349737, + "grad_norm": 1.0117207543108033, + "learning_rate": 4.07673770763967e-06, + "loss": 0.3318, + "step": 41132 + }, + { + "epoch": 0.7107582250483826, + "grad_norm": 1.213855169205915, + "learning_rate": 4.076286808277673e-06, + "loss": 0.3894, + "step": 41133 + }, + { + "epoch": 0.7107755045617915, + "grad_norm": 0.920281458595419, + "learning_rate": 4.075835927469193e-06, + "loss": 0.4777, + "step": 41134 + }, + { + "epoch": 0.7107927840752004, + "grad_norm": 0.7833731508469982, + "learning_rate": 4.075385065215649e-06, + "loss": 0.1695, + "step": 41135 + }, + { + "epoch": 0.7108100635886093, + "grad_norm": 1.1491977275478384, + "learning_rate": 4.074934221518445e-06, + "loss": 0.3784, + "step": 41136 + }, + { + "epoch": 0.7108273431020182, + "grad_norm": 0.5485751232928057, + "learning_rate": 4.074483396378997e-06, + "loss": 0.8088, + "step": 41137 + }, + { + "epoch": 0.7108446226154271, + "grad_norm": 1.0488572536192686, + "learning_rate": 4.074032589798722e-06, + "loss": 0.3964, + "step": 41138 + }, + { + "epoch": 0.710861902128836, + "grad_norm": 1.023157821857013, + "learning_rate": 4.073581801779023e-06, + "loss": 0.3174, + "step": 41139 + }, + { + "epoch": 0.710879181642245, + "grad_norm": 1.0680633495529017, + "learning_rate": 4.073131032321319e-06, + "loss": 0.2626, + "step": 41140 + }, + { + "epoch": 0.7108964611556539, + "grad_norm": 1.1112252390650235, + "learning_rate": 4.072680281427017e-06, + "loss": 0.4142, + "step": 41141 + }, + { + "epoch": 0.7109137406690628, + "grad_norm": 0.7179315849178872, + "learning_rate": 4.072229549097534e-06, + "loss": 0.6443, + "step": 41142 + }, + { + "epoch": 0.7109310201824717, + "grad_norm": 0.9022498014278192, + "learning_rate": 4.07177883533427e-06, + "loss": 0.3593, + "step": 41143 + }, + { + "epoch": 0.7109482996958806, + "grad_norm": 1.2397868483165775, + "learning_rate": 4.071328140138654e-06, + "loss": 0.304, + "step": 41144 + }, + { + "epoch": 0.7109655792092895, + "grad_norm": 1.2531934745316633, + "learning_rate": 4.070877463512085e-06, + "loss": 0.4374, + "step": 41145 + }, + { + "epoch": 0.7109828587226984, + "grad_norm": 2.4979246690623724, + "learning_rate": 4.070426805455981e-06, + "loss": 0.4083, + "step": 41146 + }, + { + "epoch": 0.7110001382361073, + "grad_norm": 1.4502688254720895, + "learning_rate": 4.069976165971748e-06, + "loss": 0.286, + "step": 41147 + }, + { + "epoch": 0.7110174177495162, + "grad_norm": 1.2706947020167385, + "learning_rate": 4.069525545060804e-06, + "loss": 0.4609, + "step": 41148 + }, + { + "epoch": 0.7110346972629251, + "grad_norm": 0.7535353230794746, + "learning_rate": 4.069074942724553e-06, + "loss": 0.7795, + "step": 41149 + }, + { + "epoch": 0.711051976776334, + "grad_norm": 0.7296778365544745, + "learning_rate": 4.0686243589644105e-06, + "loss": 0.6104, + "step": 41150 + }, + { + "epoch": 0.7110692562897429, + "grad_norm": 1.14653557475565, + "learning_rate": 4.06817379378179e-06, + "loss": 0.2991, + "step": 41151 + }, + { + "epoch": 0.7110865358031517, + "grad_norm": 0.7845634817262023, + "learning_rate": 4.067723247178097e-06, + "loss": 0.7851, + "step": 41152 + }, + { + "epoch": 0.7111038153165606, + "grad_norm": 0.9340599463507496, + "learning_rate": 4.067272719154749e-06, + "loss": 0.3628, + "step": 41153 + }, + { + "epoch": 0.7111210948299695, + "grad_norm": 1.7836401846863685, + "learning_rate": 4.06682220971315e-06, + "loss": 0.3341, + "step": 41154 + }, + { + "epoch": 0.7111383743433785, + "grad_norm": 0.9252799580186258, + "learning_rate": 4.0663717188547145e-06, + "loss": 0.2019, + "step": 41155 + }, + { + "epoch": 0.7111556538567874, + "grad_norm": 1.5841931837568566, + "learning_rate": 4.065921246580854e-06, + "loss": 0.3634, + "step": 41156 + }, + { + "epoch": 0.7111729333701963, + "grad_norm": 1.2952067724233431, + "learning_rate": 4.065470792892984e-06, + "loss": 0.3224, + "step": 41157 + }, + { + "epoch": 0.7111902128836052, + "grad_norm": 0.9851442917968316, + "learning_rate": 4.065020357792505e-06, + "loss": 0.4874, + "step": 41158 + }, + { + "epoch": 0.7112074923970141, + "grad_norm": 1.1893895153145733, + "learning_rate": 4.064569941280837e-06, + "loss": 0.4717, + "step": 41159 + }, + { + "epoch": 0.711224771910423, + "grad_norm": 1.2813432908714113, + "learning_rate": 4.064119543359385e-06, + "loss": 0.3286, + "step": 41160 + }, + { + "epoch": 0.7112420514238319, + "grad_norm": 2.1146967546863467, + "learning_rate": 4.06366916402956e-06, + "loss": 0.3575, + "step": 41161 + }, + { + "epoch": 0.7112593309372408, + "grad_norm": 1.5937189561613505, + "learning_rate": 4.063218803292779e-06, + "loss": 0.3533, + "step": 41162 + }, + { + "epoch": 0.7112766104506497, + "grad_norm": 1.4459134251748968, + "learning_rate": 4.062768461150443e-06, + "loss": 0.3807, + "step": 41163 + }, + { + "epoch": 0.7112938899640586, + "grad_norm": 1.010501796115301, + "learning_rate": 4.062318137603972e-06, + "loss": 0.3728, + "step": 41164 + }, + { + "epoch": 0.7113111694774675, + "grad_norm": 0.9219268287066872, + "learning_rate": 4.0618678326547686e-06, + "loss": 0.3611, + "step": 41165 + }, + { + "epoch": 0.7113284489908764, + "grad_norm": 1.0306179801501254, + "learning_rate": 4.061417546304246e-06, + "loss": 0.3997, + "step": 41166 + }, + { + "epoch": 0.7113457285042853, + "grad_norm": 1.0301484833397534, + "learning_rate": 4.0609672785538145e-06, + "loss": 0.2953, + "step": 41167 + }, + { + "epoch": 0.7113630080176943, + "grad_norm": 0.943411299690871, + "learning_rate": 4.060517029404889e-06, + "loss": 0.3689, + "step": 41168 + }, + { + "epoch": 0.7113802875311032, + "grad_norm": 1.3408980476822299, + "learning_rate": 4.060066798858876e-06, + "loss": 0.423, + "step": 41169 + }, + { + "epoch": 0.7113975670445121, + "grad_norm": 1.1237902134371944, + "learning_rate": 4.05961658691718e-06, + "loss": 0.2088, + "step": 41170 + }, + { + "epoch": 0.711414846557921, + "grad_norm": 0.9297275459460861, + "learning_rate": 4.0591663935812175e-06, + "loss": 0.2955, + "step": 41171 + }, + { + "epoch": 0.7114321260713299, + "grad_norm": 1.3101901019394382, + "learning_rate": 4.058716218852398e-06, + "loss": 0.4108, + "step": 41172 + }, + { + "epoch": 0.7114494055847387, + "grad_norm": 1.08038306743361, + "learning_rate": 4.058266062732133e-06, + "loss": 0.3304, + "step": 41173 + }, + { + "epoch": 0.7114666850981476, + "grad_norm": 0.9454298258232854, + "learning_rate": 4.057815925221827e-06, + "loss": 0.2064, + "step": 41174 + }, + { + "epoch": 0.7114839646115565, + "grad_norm": 0.8238491719033701, + "learning_rate": 4.057365806322897e-06, + "loss": 0.4463, + "step": 41175 + }, + { + "epoch": 0.7115012441249654, + "grad_norm": 1.4691101672965132, + "learning_rate": 4.056915706036744e-06, + "loss": 0.369, + "step": 41176 + }, + { + "epoch": 0.7115185236383743, + "grad_norm": 1.207085476084686, + "learning_rate": 4.056465624364785e-06, + "loss": 0.3643, + "step": 41177 + }, + { + "epoch": 0.7115358031517832, + "grad_norm": 1.637300479116577, + "learning_rate": 4.0560155613084265e-06, + "loss": 0.3904, + "step": 41178 + }, + { + "epoch": 0.7115530826651921, + "grad_norm": 1.2688601804161224, + "learning_rate": 4.0555655168690825e-06, + "loss": 0.378, + "step": 41179 + }, + { + "epoch": 0.711570362178601, + "grad_norm": 0.8435390665566117, + "learning_rate": 4.055115491048158e-06, + "loss": 0.4102, + "step": 41180 + }, + { + "epoch": 0.7115876416920099, + "grad_norm": 1.7731692967685921, + "learning_rate": 4.0546654838470615e-06, + "loss": 0.2151, + "step": 41181 + }, + { + "epoch": 0.7116049212054188, + "grad_norm": 1.5586237240419938, + "learning_rate": 4.054215495267208e-06, + "loss": 0.5801, + "step": 41182 + }, + { + "epoch": 0.7116222007188278, + "grad_norm": 1.2837173516784233, + "learning_rate": 4.053765525309995e-06, + "loss": 0.464, + "step": 41183 + }, + { + "epoch": 0.7116394802322367, + "grad_norm": 0.5628848774698308, + "learning_rate": 4.053315573976848e-06, + "loss": 0.1986, + "step": 41184 + }, + { + "epoch": 0.7116567597456456, + "grad_norm": 0.9923093165440533, + "learning_rate": 4.052865641269166e-06, + "loss": 0.3975, + "step": 41185 + }, + { + "epoch": 0.7116740392590545, + "grad_norm": 1.6402219075825137, + "learning_rate": 4.0524157271883635e-06, + "loss": 0.3315, + "step": 41186 + }, + { + "epoch": 0.7116913187724634, + "grad_norm": 1.077674729395251, + "learning_rate": 4.0519658317358435e-06, + "loss": 0.4313, + "step": 41187 + }, + { + "epoch": 0.7117085982858723, + "grad_norm": 1.3723867033194217, + "learning_rate": 4.051515954913021e-06, + "loss": 0.3455, + "step": 41188 + }, + { + "epoch": 0.7117258777992812, + "grad_norm": 1.3723363659226264, + "learning_rate": 4.051066096721301e-06, + "loss": 0.7879, + "step": 41189 + }, + { + "epoch": 0.7117431573126901, + "grad_norm": 1.5185488386853019, + "learning_rate": 4.050616257162093e-06, + "loss": 0.392, + "step": 41190 + }, + { + "epoch": 0.711760436826099, + "grad_norm": 1.1625030075642233, + "learning_rate": 4.050166436236811e-06, + "loss": 0.3626, + "step": 41191 + }, + { + "epoch": 0.7117777163395079, + "grad_norm": 0.9861128824980961, + "learning_rate": 4.049716633946855e-06, + "loss": 0.3319, + "step": 41192 + }, + { + "epoch": 0.7117949958529168, + "grad_norm": 1.3016651643776924, + "learning_rate": 4.049266850293644e-06, + "loss": 0.2891, + "step": 41193 + }, + { + "epoch": 0.7118122753663256, + "grad_norm": 1.5149447583065476, + "learning_rate": 4.0488170852785766e-06, + "loss": 0.3889, + "step": 41194 + }, + { + "epoch": 0.7118295548797345, + "grad_norm": 0.8984305255030005, + "learning_rate": 4.0483673389030675e-06, + "loss": 0.2199, + "step": 41195 + }, + { + "epoch": 0.7118468343931434, + "grad_norm": 0.9393297210672094, + "learning_rate": 4.047917611168523e-06, + "loss": 0.3417, + "step": 41196 + }, + { + "epoch": 0.7118641139065524, + "grad_norm": 0.8675131634090882, + "learning_rate": 4.047467902076357e-06, + "loss": 0.2689, + "step": 41197 + }, + { + "epoch": 0.7118813934199613, + "grad_norm": 0.9118977295664706, + "learning_rate": 4.047018211627971e-06, + "loss": 0.4685, + "step": 41198 + }, + { + "epoch": 0.7118986729333702, + "grad_norm": 0.7911392933905719, + "learning_rate": 4.04656853982478e-06, + "loss": 0.252, + "step": 41199 + }, + { + "epoch": 0.7119159524467791, + "grad_norm": 1.4057869649850279, + "learning_rate": 4.046118886668185e-06, + "loss": 0.3732, + "step": 41200 + }, + { + "epoch": 0.711933231960188, + "grad_norm": 0.8250340266989116, + "learning_rate": 4.045669252159598e-06, + "loss": 0.4931, + "step": 41201 + }, + { + "epoch": 0.7119505114735969, + "grad_norm": 1.3018851576435462, + "learning_rate": 4.04521963630043e-06, + "loss": 0.2638, + "step": 41202 + }, + { + "epoch": 0.7119677909870058, + "grad_norm": 0.9844834601215543, + "learning_rate": 4.044770039092084e-06, + "loss": 0.4383, + "step": 41203 + }, + { + "epoch": 0.7119850705004147, + "grad_norm": 1.4678207096253968, + "learning_rate": 4.044320460535974e-06, + "loss": 0.5556, + "step": 41204 + }, + { + "epoch": 0.7120023500138236, + "grad_norm": 1.021955804204542, + "learning_rate": 4.043870900633502e-06, + "loss": 0.4425, + "step": 41205 + }, + { + "epoch": 0.7120196295272325, + "grad_norm": 1.4838684134907556, + "learning_rate": 4.043421359386079e-06, + "loss": 0.4025, + "step": 41206 + }, + { + "epoch": 0.7120369090406414, + "grad_norm": 1.1932051355412894, + "learning_rate": 4.042971836795112e-06, + "loss": 0.4319, + "step": 41207 + }, + { + "epoch": 0.7120541885540503, + "grad_norm": 1.1196306052052085, + "learning_rate": 4.0425223328620135e-06, + "loss": 0.2257, + "step": 41208 + }, + { + "epoch": 0.7120714680674592, + "grad_norm": 1.165570338495679, + "learning_rate": 4.042072847588186e-06, + "loss": 0.5397, + "step": 41209 + }, + { + "epoch": 0.7120887475808682, + "grad_norm": 1.617742211729689, + "learning_rate": 4.041623380975041e-06, + "loss": 0.4285, + "step": 41210 + }, + { + "epoch": 0.7121060270942771, + "grad_norm": 1.2028764497134807, + "learning_rate": 4.041173933023981e-06, + "loss": 0.5778, + "step": 41211 + }, + { + "epoch": 0.712123306607686, + "grad_norm": 1.1251041457267112, + "learning_rate": 4.040724503736417e-06, + "loss": 0.2701, + "step": 41212 + }, + { + "epoch": 0.7121405861210949, + "grad_norm": 0.8949642839921823, + "learning_rate": 4.04027509311376e-06, + "loss": 0.2885, + "step": 41213 + }, + { + "epoch": 0.7121578656345038, + "grad_norm": 1.0634866517524344, + "learning_rate": 4.039825701157412e-06, + "loss": 0.462, + "step": 41214 + }, + { + "epoch": 0.7121751451479126, + "grad_norm": 1.2739604384319225, + "learning_rate": 4.039376327868786e-06, + "loss": 0.4048, + "step": 41215 + }, + { + "epoch": 0.7121924246613215, + "grad_norm": 1.1124824488505405, + "learning_rate": 4.038926973249283e-06, + "loss": 0.5274, + "step": 41216 + }, + { + "epoch": 0.7122097041747304, + "grad_norm": 1.2678932301059775, + "learning_rate": 4.0384776373003135e-06, + "loss": 0.209, + "step": 41217 + }, + { + "epoch": 0.7122269836881393, + "grad_norm": 1.4920990682410895, + "learning_rate": 4.038028320023285e-06, + "loss": 0.4333, + "step": 41218 + }, + { + "epoch": 0.7122442632015482, + "grad_norm": 1.3223555516628078, + "learning_rate": 4.0375790214196075e-06, + "loss": 0.4871, + "step": 41219 + }, + { + "epoch": 0.7122615427149571, + "grad_norm": 1.6428649737772545, + "learning_rate": 4.037129741490687e-06, + "loss": 0.424, + "step": 41220 + }, + { + "epoch": 0.712278822228366, + "grad_norm": 1.0682271616414454, + "learning_rate": 4.036680480237926e-06, + "loss": 0.239, + "step": 41221 + }, + { + "epoch": 0.7122961017417749, + "grad_norm": 2.240149978581154, + "learning_rate": 4.036231237662737e-06, + "loss": 0.2813, + "step": 41222 + }, + { + "epoch": 0.7123133812551838, + "grad_norm": 0.725205377476255, + "learning_rate": 4.035782013766519e-06, + "loss": 0.4046, + "step": 41223 + }, + { + "epoch": 0.7123306607685927, + "grad_norm": 1.8892966980262567, + "learning_rate": 4.035332808550691e-06, + "loss": 0.3616, + "step": 41224 + }, + { + "epoch": 0.7123479402820017, + "grad_norm": 1.1147617636987006, + "learning_rate": 4.034883622016652e-06, + "loss": 0.2914, + "step": 41225 + }, + { + "epoch": 0.7123652197954106, + "grad_norm": 0.9603764384072837, + "learning_rate": 4.034434454165813e-06, + "loss": 0.2355, + "step": 41226 + }, + { + "epoch": 0.7123824993088195, + "grad_norm": 0.6748872194592058, + "learning_rate": 4.033985304999576e-06, + "loss": 0.6673, + "step": 41227 + }, + { + "epoch": 0.7123997788222284, + "grad_norm": 0.9521762071257988, + "learning_rate": 4.033536174519354e-06, + "loss": 0.4292, + "step": 41228 + }, + { + "epoch": 0.7124170583356373, + "grad_norm": 0.953335268206096, + "learning_rate": 4.033087062726546e-06, + "loss": 0.5311, + "step": 41229 + }, + { + "epoch": 0.7124343378490462, + "grad_norm": 0.4806746242341254, + "learning_rate": 4.032637969622563e-06, + "loss": 0.6473, + "step": 41230 + }, + { + "epoch": 0.7124516173624551, + "grad_norm": 1.1072143219863904, + "learning_rate": 4.032188895208815e-06, + "loss": 0.2789, + "step": 41231 + }, + { + "epoch": 0.712468896875864, + "grad_norm": 1.068496137308301, + "learning_rate": 4.031739839486701e-06, + "loss": 0.2277, + "step": 41232 + }, + { + "epoch": 0.7124861763892729, + "grad_norm": 1.5796022019708336, + "learning_rate": 4.031290802457636e-06, + "loss": 0.3059, + "step": 41233 + }, + { + "epoch": 0.7125034559026818, + "grad_norm": 1.1237889175554157, + "learning_rate": 4.0308417841230175e-06, + "loss": 0.3084, + "step": 41234 + }, + { + "epoch": 0.7125207354160907, + "grad_norm": 1.535423682692159, + "learning_rate": 4.030392784484256e-06, + "loss": 0.3139, + "step": 41235 + }, + { + "epoch": 0.7125380149294995, + "grad_norm": 1.2311828147933719, + "learning_rate": 4.029943803542758e-06, + "loss": 0.3539, + "step": 41236 + }, + { + "epoch": 0.7125552944429084, + "grad_norm": 0.7039979145817257, + "learning_rate": 4.029494841299934e-06, + "loss": 0.6536, + "step": 41237 + }, + { + "epoch": 0.7125725739563173, + "grad_norm": 1.5038960568966069, + "learning_rate": 4.029045897757181e-06, + "loss": 0.3096, + "step": 41238 + }, + { + "epoch": 0.7125898534697263, + "grad_norm": 1.5353086546442887, + "learning_rate": 4.028596972915915e-06, + "loss": 0.2574, + "step": 41239 + }, + { + "epoch": 0.7126071329831352, + "grad_norm": 1.583509040107395, + "learning_rate": 4.028148066777533e-06, + "loss": 0.4795, + "step": 41240 + }, + { + "epoch": 0.7126244124965441, + "grad_norm": 0.9747684320866824, + "learning_rate": 4.027699179343445e-06, + "loss": 0.3161, + "step": 41241 + }, + { + "epoch": 0.712641692009953, + "grad_norm": 1.3162139526625953, + "learning_rate": 4.027250310615061e-06, + "loss": 0.231, + "step": 41242 + }, + { + "epoch": 0.7126589715233619, + "grad_norm": 1.4413212378176266, + "learning_rate": 4.0268014605937785e-06, + "loss": 0.4553, + "step": 41243 + }, + { + "epoch": 0.7126762510367708, + "grad_norm": 0.857039729429092, + "learning_rate": 4.026352629281012e-06, + "loss": 0.3238, + "step": 41244 + }, + { + "epoch": 0.7126935305501797, + "grad_norm": 1.5591452381197697, + "learning_rate": 4.025903816678159e-06, + "loss": 0.4506, + "step": 41245 + }, + { + "epoch": 0.7127108100635886, + "grad_norm": 2.7998797355250384, + "learning_rate": 4.02545502278663e-06, + "loss": 0.2488, + "step": 41246 + }, + { + "epoch": 0.7127280895769975, + "grad_norm": 1.7328771366426556, + "learning_rate": 4.025006247607829e-06, + "loss": 0.4197, + "step": 41247 + }, + { + "epoch": 0.7127453690904064, + "grad_norm": 1.8237959673652326, + "learning_rate": 4.024557491143168e-06, + "loss": 0.2556, + "step": 41248 + }, + { + "epoch": 0.7127626486038153, + "grad_norm": 1.0434118046544743, + "learning_rate": 4.024108753394041e-06, + "loss": 0.3823, + "step": 41249 + }, + { + "epoch": 0.7127799281172242, + "grad_norm": 1.844969235559304, + "learning_rate": 4.023660034361864e-06, + "loss": 0.5131, + "step": 41250 + }, + { + "epoch": 0.7127972076306331, + "grad_norm": 1.149536603205074, + "learning_rate": 4.023211334048035e-06, + "loss": 0.2871, + "step": 41251 + }, + { + "epoch": 0.712814487144042, + "grad_norm": 1.337219913139882, + "learning_rate": 4.022762652453961e-06, + "loss": 0.331, + "step": 41252 + }, + { + "epoch": 0.712831766657451, + "grad_norm": 2.1356574898599576, + "learning_rate": 4.022313989581054e-06, + "loss": 0.4205, + "step": 41253 + }, + { + "epoch": 0.7128490461708599, + "grad_norm": 1.0814069684267495, + "learning_rate": 4.021865345430709e-06, + "loss": 0.9085, + "step": 41254 + }, + { + "epoch": 0.7128663256842688, + "grad_norm": 1.0905158816442244, + "learning_rate": 4.02141672000434e-06, + "loss": 0.9666, + "step": 41255 + }, + { + "epoch": 0.7128836051976777, + "grad_norm": 0.9557075611068587, + "learning_rate": 4.020968113303344e-06, + "loss": 0.3053, + "step": 41256 + }, + { + "epoch": 0.7129008847110866, + "grad_norm": 1.2767967035446413, + "learning_rate": 4.020519525329132e-06, + "loss": 0.5224, + "step": 41257 + }, + { + "epoch": 0.7129181642244954, + "grad_norm": 1.096067243411911, + "learning_rate": 4.020070956083105e-06, + "loss": 0.366, + "step": 41258 + }, + { + "epoch": 0.7129354437379043, + "grad_norm": 1.422678953078035, + "learning_rate": 4.0196224055666755e-06, + "loss": 0.298, + "step": 41259 + }, + { + "epoch": 0.7129527232513132, + "grad_norm": 1.620897251277308, + "learning_rate": 4.019173873781239e-06, + "loss": 0.2987, + "step": 41260 + }, + { + "epoch": 0.7129700027647221, + "grad_norm": 1.405473037407335, + "learning_rate": 4.0187253607282085e-06, + "loss": 0.2991, + "step": 41261 + }, + { + "epoch": 0.712987282278131, + "grad_norm": 1.4087656496179783, + "learning_rate": 4.018276866408983e-06, + "loss": 0.275, + "step": 41262 + }, + { + "epoch": 0.7130045617915399, + "grad_norm": 1.3729297823839008, + "learning_rate": 4.017828390824964e-06, + "loss": 0.3024, + "step": 41263 + }, + { + "epoch": 0.7130218413049488, + "grad_norm": 1.5954784126655768, + "learning_rate": 4.017379933977567e-06, + "loss": 0.21, + "step": 41264 + }, + { + "epoch": 0.7130391208183577, + "grad_norm": 1.8697903185802827, + "learning_rate": 4.016931495868187e-06, + "loss": 0.2855, + "step": 41265 + }, + { + "epoch": 0.7130564003317666, + "grad_norm": 0.8674611420882605, + "learning_rate": 4.016483076498236e-06, + "loss": 0.3162, + "step": 41266 + }, + { + "epoch": 0.7130736798451756, + "grad_norm": 1.0112908628999255, + "learning_rate": 4.016034675869112e-06, + "loss": 0.4252, + "step": 41267 + }, + { + "epoch": 0.7130909593585845, + "grad_norm": 1.1225980458563782, + "learning_rate": 4.015586293982226e-06, + "loss": 0.2385, + "step": 41268 + }, + { + "epoch": 0.7131082388719934, + "grad_norm": 0.8274904736284672, + "learning_rate": 4.0151379308389735e-06, + "loss": 0.2753, + "step": 41269 + }, + { + "epoch": 0.7131255183854023, + "grad_norm": 1.3317712264048112, + "learning_rate": 4.014689586440765e-06, + "loss": 0.3524, + "step": 41270 + }, + { + "epoch": 0.7131427978988112, + "grad_norm": 1.5511144509125812, + "learning_rate": 4.0142412607890025e-06, + "loss": 0.4623, + "step": 41271 + }, + { + "epoch": 0.7131600774122201, + "grad_norm": 1.6882426261797372, + "learning_rate": 4.013792953885095e-06, + "loss": 0.3381, + "step": 41272 + }, + { + "epoch": 0.713177356925629, + "grad_norm": 1.2481377578627655, + "learning_rate": 4.013344665730444e-06, + "loss": 0.352, + "step": 41273 + }, + { + "epoch": 0.7131946364390379, + "grad_norm": 1.0284796231427729, + "learning_rate": 4.0128963963264476e-06, + "loss": 0.4345, + "step": 41274 + }, + { + "epoch": 0.7132119159524468, + "grad_norm": 1.1003327058778793, + "learning_rate": 4.012448145674516e-06, + "loss": 0.2944, + "step": 41275 + }, + { + "epoch": 0.7132291954658557, + "grad_norm": 1.1404233572710918, + "learning_rate": 4.0119999137760515e-06, + "loss": 0.4235, + "step": 41276 + }, + { + "epoch": 0.7132464749792646, + "grad_norm": 1.2637215149892533, + "learning_rate": 4.011551700632461e-06, + "loss": 0.1869, + "step": 41277 + }, + { + "epoch": 0.7132637544926735, + "grad_norm": 1.1024100934034153, + "learning_rate": 4.011103506245142e-06, + "loss": 0.3537, + "step": 41278 + }, + { + "epoch": 0.7132810340060823, + "grad_norm": 1.136023319386567, + "learning_rate": 4.010655330615506e-06, + "loss": 0.3765, + "step": 41279 + }, + { + "epoch": 0.7132983135194912, + "grad_norm": 0.8230422584003229, + "learning_rate": 4.01020717374495e-06, + "loss": 0.4227, + "step": 41280 + }, + { + "epoch": 0.7133155930329002, + "grad_norm": 1.0239039339155218, + "learning_rate": 4.009759035634879e-06, + "loss": 0.447, + "step": 41281 + }, + { + "epoch": 0.7133328725463091, + "grad_norm": 1.0954925832494034, + "learning_rate": 4.009310916286703e-06, + "loss": 0.3972, + "step": 41282 + }, + { + "epoch": 0.713350152059718, + "grad_norm": 1.0802370851633236, + "learning_rate": 4.008862815701816e-06, + "loss": 0.2352, + "step": 41283 + }, + { + "epoch": 0.7133674315731269, + "grad_norm": 1.4158054596865615, + "learning_rate": 4.00841473388163e-06, + "loss": 0.5542, + "step": 41284 + }, + { + "epoch": 0.7133847110865358, + "grad_norm": 0.8684667532577014, + "learning_rate": 4.00796667082754e-06, + "loss": 0.7168, + "step": 41285 + }, + { + "epoch": 0.7134019905999447, + "grad_norm": 2.681971604589449, + "learning_rate": 4.007518626540954e-06, + "loss": 0.3407, + "step": 41286 + }, + { + "epoch": 0.7134192701133536, + "grad_norm": 1.4778017052348889, + "learning_rate": 4.007070601023275e-06, + "loss": 0.4525, + "step": 41287 + }, + { + "epoch": 0.7134365496267625, + "grad_norm": 0.7261834281688513, + "learning_rate": 4.00662259427591e-06, + "loss": 0.2891, + "step": 41288 + }, + { + "epoch": 0.7134538291401714, + "grad_norm": 1.587111607452001, + "learning_rate": 4.006174606300255e-06, + "loss": 0.5403, + "step": 41289 + }, + { + "epoch": 0.7134711086535803, + "grad_norm": 1.090406370911643, + "learning_rate": 4.005726637097721e-06, + "loss": 0.2486, + "step": 41290 + }, + { + "epoch": 0.7134883881669892, + "grad_norm": 1.1760970796299994, + "learning_rate": 4.005278686669703e-06, + "loss": 0.4821, + "step": 41291 + }, + { + "epoch": 0.7135056676803981, + "grad_norm": 0.9507980423094842, + "learning_rate": 4.0048307550176065e-06, + "loss": 0.3223, + "step": 41292 + }, + { + "epoch": 0.713522947193807, + "grad_norm": 2.1423103602090996, + "learning_rate": 4.00438284214284e-06, + "loss": 0.4061, + "step": 41293 + }, + { + "epoch": 0.713540226707216, + "grad_norm": 1.0362804474065686, + "learning_rate": 4.003934948046798e-06, + "loss": 0.3159, + "step": 41294 + }, + { + "epoch": 0.7135575062206249, + "grad_norm": 1.1290046692505216, + "learning_rate": 4.003487072730892e-06, + "loss": 0.5973, + "step": 41295 + }, + { + "epoch": 0.7135747857340338, + "grad_norm": 1.5902176869745133, + "learning_rate": 4.003039216196516e-06, + "loss": 0.493, + "step": 41296 + }, + { + "epoch": 0.7135920652474427, + "grad_norm": 1.2436557257469265, + "learning_rate": 4.002591378445076e-06, + "loss": 0.2849, + "step": 41297 + }, + { + "epoch": 0.7136093447608516, + "grad_norm": 1.2671470480143265, + "learning_rate": 4.002143559477978e-06, + "loss": 0.3686, + "step": 41298 + }, + { + "epoch": 0.7136266242742605, + "grad_norm": 1.2826509244358744, + "learning_rate": 4.001695759296625e-06, + "loss": 0.4107, + "step": 41299 + }, + { + "epoch": 0.7136439037876693, + "grad_norm": 1.0484793865156161, + "learning_rate": 4.001247977902412e-06, + "loss": 0.3538, + "step": 41300 + }, + { + "epoch": 0.7136611833010782, + "grad_norm": 1.4429643721247973, + "learning_rate": 4.00080021529675e-06, + "loss": 0.3304, + "step": 41301 + }, + { + "epoch": 0.7136784628144871, + "grad_norm": 1.4753900261312578, + "learning_rate": 4.000352471481034e-06, + "loss": 0.1653, + "step": 41302 + }, + { + "epoch": 0.713695742327896, + "grad_norm": 1.2163151726115937, + "learning_rate": 3.999904746456671e-06, + "loss": 0.3411, + "step": 41303 + }, + { + "epoch": 0.7137130218413049, + "grad_norm": 0.9272655421411501, + "learning_rate": 3.9994570402250656e-06, + "loss": 0.33, + "step": 41304 + }, + { + "epoch": 0.7137303013547138, + "grad_norm": 1.3468363108295118, + "learning_rate": 3.999009352787613e-06, + "loss": 0.3668, + "step": 41305 + }, + { + "epoch": 0.7137475808681227, + "grad_norm": 1.3989325111235302, + "learning_rate": 3.998561684145722e-06, + "loss": 0.5005, + "step": 41306 + }, + { + "epoch": 0.7137648603815316, + "grad_norm": 1.287893442916248, + "learning_rate": 3.99811403430079e-06, + "loss": 0.3653, + "step": 41307 + }, + { + "epoch": 0.7137821398949405, + "grad_norm": 1.412796053910549, + "learning_rate": 3.997666403254223e-06, + "loss": 0.5338, + "step": 41308 + }, + { + "epoch": 0.7137994194083495, + "grad_norm": 1.005282737243529, + "learning_rate": 3.997218791007418e-06, + "loss": 0.5928, + "step": 41309 + }, + { + "epoch": 0.7138166989217584, + "grad_norm": 0.8250191801089632, + "learning_rate": 3.996771197561779e-06, + "loss": 0.3477, + "step": 41310 + }, + { + "epoch": 0.7138339784351673, + "grad_norm": 2.0726572317425482, + "learning_rate": 3.996323622918711e-06, + "loss": 0.3024, + "step": 41311 + }, + { + "epoch": 0.7138512579485762, + "grad_norm": 0.8312991358727609, + "learning_rate": 3.995876067079615e-06, + "loss": 0.2858, + "step": 41312 + }, + { + "epoch": 0.7138685374619851, + "grad_norm": 1.1088448677673204, + "learning_rate": 3.995428530045893e-06, + "loss": 0.3349, + "step": 41313 + }, + { + "epoch": 0.713885816975394, + "grad_norm": 1.0244240188955196, + "learning_rate": 3.99498101181894e-06, + "loss": 0.4126, + "step": 41314 + }, + { + "epoch": 0.7139030964888029, + "grad_norm": 0.9301742121643103, + "learning_rate": 3.994533512400165e-06, + "loss": 0.2803, + "step": 41315 + }, + { + "epoch": 0.7139203760022118, + "grad_norm": 1.181421460384173, + "learning_rate": 3.9940860317909656e-06, + "loss": 0.2917, + "step": 41316 + }, + { + "epoch": 0.7139376555156207, + "grad_norm": 1.1351447514023842, + "learning_rate": 3.9936385699927495e-06, + "loss": 0.3342, + "step": 41317 + }, + { + "epoch": 0.7139549350290296, + "grad_norm": 0.9117287763077302, + "learning_rate": 3.99319112700691e-06, + "loss": 0.5889, + "step": 41318 + }, + { + "epoch": 0.7139722145424385, + "grad_norm": 0.9258603122676816, + "learning_rate": 3.992743702834857e-06, + "loss": 0.3637, + "step": 41319 + }, + { + "epoch": 0.7139894940558474, + "grad_norm": 1.0122168155934825, + "learning_rate": 3.992296297477982e-06, + "loss": 0.2952, + "step": 41320 + }, + { + "epoch": 0.7140067735692562, + "grad_norm": 1.3035504627577308, + "learning_rate": 3.991848910937694e-06, + "loss": 0.3685, + "step": 41321 + }, + { + "epoch": 0.7140240530826651, + "grad_norm": 2.491579747688505, + "learning_rate": 3.991401543215389e-06, + "loss": 0.3736, + "step": 41322 + }, + { + "epoch": 0.714041332596074, + "grad_norm": 1.3737811777313882, + "learning_rate": 3.9909541943124764e-06, + "loss": 0.3931, + "step": 41323 + }, + { + "epoch": 0.714058612109483, + "grad_norm": 1.0888138255305415, + "learning_rate": 3.990506864230351e-06, + "loss": 0.446, + "step": 41324 + }, + { + "epoch": 0.7140758916228919, + "grad_norm": 1.146059903199893, + "learning_rate": 3.9900595529704126e-06, + "loss": 0.3465, + "step": 41325 + }, + { + "epoch": 0.7140931711363008, + "grad_norm": 1.0370381599794398, + "learning_rate": 3.989612260534063e-06, + "loss": 0.3759, + "step": 41326 + }, + { + "epoch": 0.7141104506497097, + "grad_norm": 1.4978726344962707, + "learning_rate": 3.989164986922705e-06, + "loss": 0.5174, + "step": 41327 + }, + { + "epoch": 0.7141277301631186, + "grad_norm": 0.9516163516720367, + "learning_rate": 3.988717732137743e-06, + "loss": 0.4366, + "step": 41328 + }, + { + "epoch": 0.7141450096765275, + "grad_norm": 1.1534485222940605, + "learning_rate": 3.98827049618057e-06, + "loss": 0.4236, + "step": 41329 + }, + { + "epoch": 0.7141622891899364, + "grad_norm": 2.1684813513379564, + "learning_rate": 3.9878232790525936e-06, + "loss": 0.2832, + "step": 41330 + }, + { + "epoch": 0.7141795687033453, + "grad_norm": 1.4972920758543802, + "learning_rate": 3.987376080755208e-06, + "loss": 0.5201, + "step": 41331 + }, + { + "epoch": 0.7141968482167542, + "grad_norm": 0.8947217407853036, + "learning_rate": 3.9869289012898195e-06, + "loss": 0.2721, + "step": 41332 + }, + { + "epoch": 0.7142141277301631, + "grad_norm": 1.4070255378251333, + "learning_rate": 3.9864817406578245e-06, + "loss": 0.3451, + "step": 41333 + }, + { + "epoch": 0.714231407243572, + "grad_norm": 0.8708203707172321, + "learning_rate": 3.986034598860631e-06, + "loss": 0.5862, + "step": 41334 + }, + { + "epoch": 0.7142486867569809, + "grad_norm": 0.8798647570586363, + "learning_rate": 3.985587475899633e-06, + "loss": 0.5553, + "step": 41335 + }, + { + "epoch": 0.7142659662703899, + "grad_norm": 1.4112192322559536, + "learning_rate": 3.985140371776228e-06, + "loss": 0.3973, + "step": 41336 + }, + { + "epoch": 0.7142832457837988, + "grad_norm": 1.385468655334321, + "learning_rate": 3.984693286491822e-06, + "loss": 0.2906, + "step": 41337 + }, + { + "epoch": 0.7143005252972077, + "grad_norm": 0.9820973545083984, + "learning_rate": 3.984246220047813e-06, + "loss": 0.3127, + "step": 41338 + }, + { + "epoch": 0.7143178048106166, + "grad_norm": 1.2191864589430295, + "learning_rate": 3.983799172445605e-06, + "loss": 0.5769, + "step": 41339 + }, + { + "epoch": 0.7143350843240255, + "grad_norm": 1.274190089690345, + "learning_rate": 3.983352143686593e-06, + "loss": 0.3526, + "step": 41340 + }, + { + "epoch": 0.7143523638374344, + "grad_norm": 0.5933659610475378, + "learning_rate": 3.982905133772181e-06, + "loss": 0.6067, + "step": 41341 + }, + { + "epoch": 0.7143696433508432, + "grad_norm": 1.1244533736342304, + "learning_rate": 3.982458142703765e-06, + "loss": 0.2969, + "step": 41342 + }, + { + "epoch": 0.7143869228642521, + "grad_norm": 1.108541849958309, + "learning_rate": 3.982011170482748e-06, + "loss": 0.344, + "step": 41343 + }, + { + "epoch": 0.714404202377661, + "grad_norm": 1.1124051735635596, + "learning_rate": 3.981564217110533e-06, + "loss": 0.2504, + "step": 41344 + }, + { + "epoch": 0.7144214818910699, + "grad_norm": 1.501106387546292, + "learning_rate": 3.981117282588511e-06, + "loss": 0.4133, + "step": 41345 + }, + { + "epoch": 0.7144387614044788, + "grad_norm": 0.9404821559352808, + "learning_rate": 3.980670366918092e-06, + "loss": 0.2525, + "step": 41346 + }, + { + "epoch": 0.7144560409178877, + "grad_norm": 0.6713402014918217, + "learning_rate": 3.980223470100667e-06, + "loss": 0.7643, + "step": 41347 + }, + { + "epoch": 0.7144733204312966, + "grad_norm": 1.355864866658077, + "learning_rate": 3.979776592137643e-06, + "loss": 0.1886, + "step": 41348 + }, + { + "epoch": 0.7144905999447055, + "grad_norm": 1.4219333969539174, + "learning_rate": 3.979329733030414e-06, + "loss": 0.3636, + "step": 41349 + }, + { + "epoch": 0.7145078794581144, + "grad_norm": 1.8044788138710552, + "learning_rate": 3.97888289278038e-06, + "loss": 0.3444, + "step": 41350 + }, + { + "epoch": 0.7145251589715234, + "grad_norm": 1.000703883675461, + "learning_rate": 3.978436071388942e-06, + "loss": 0.2753, + "step": 41351 + }, + { + "epoch": 0.7145424384849323, + "grad_norm": 1.3880362803836077, + "learning_rate": 3.977989268857505e-06, + "loss": 0.3669, + "step": 41352 + }, + { + "epoch": 0.7145597179983412, + "grad_norm": 1.3931285386550658, + "learning_rate": 3.977542485187459e-06, + "loss": 0.2774, + "step": 41353 + }, + { + "epoch": 0.7145769975117501, + "grad_norm": 1.1695027572333654, + "learning_rate": 3.977095720380212e-06, + "loss": 0.4068, + "step": 41354 + }, + { + "epoch": 0.714594277025159, + "grad_norm": 1.043604765751708, + "learning_rate": 3.976648974437154e-06, + "loss": 0.4054, + "step": 41355 + }, + { + "epoch": 0.7146115565385679, + "grad_norm": 1.2559468965198657, + "learning_rate": 3.97620224735969e-06, + "loss": 0.2964, + "step": 41356 + }, + { + "epoch": 0.7146288360519768, + "grad_norm": 1.2999908561562736, + "learning_rate": 3.975755539149222e-06, + "loss": 0.4786, + "step": 41357 + }, + { + "epoch": 0.7146461155653857, + "grad_norm": 0.6249532798943357, + "learning_rate": 3.975308849807141e-06, + "loss": 0.6277, + "step": 41358 + }, + { + "epoch": 0.7146633950787946, + "grad_norm": 1.1182878851034532, + "learning_rate": 3.974862179334855e-06, + "loss": 0.3132, + "step": 41359 + }, + { + "epoch": 0.7146806745922035, + "grad_norm": 1.2123647686991856, + "learning_rate": 3.974415527733755e-06, + "loss": 0.3108, + "step": 41360 + }, + { + "epoch": 0.7146979541056124, + "grad_norm": 1.0241398308611087, + "learning_rate": 3.973968895005244e-06, + "loss": 0.2053, + "step": 41361 + }, + { + "epoch": 0.7147152336190213, + "grad_norm": 1.4716340607766067, + "learning_rate": 3.973522281150719e-06, + "loss": 0.2019, + "step": 41362 + }, + { + "epoch": 0.7147325131324301, + "grad_norm": 1.0794921534040096, + "learning_rate": 3.9730756861715845e-06, + "loss": 0.2939, + "step": 41363 + }, + { + "epoch": 0.714749792645839, + "grad_norm": 1.0662715567862917, + "learning_rate": 3.972629110069231e-06, + "loss": 0.2493, + "step": 41364 + }, + { + "epoch": 0.714767072159248, + "grad_norm": 1.338855351871589, + "learning_rate": 3.972182552845065e-06, + "loss": 0.5477, + "step": 41365 + }, + { + "epoch": 0.7147843516726569, + "grad_norm": 1.430597932638392, + "learning_rate": 3.971736014500478e-06, + "loss": 0.2161, + "step": 41366 + }, + { + "epoch": 0.7148016311860658, + "grad_norm": 1.2046286441296865, + "learning_rate": 3.971289495036872e-06, + "loss": 0.4362, + "step": 41367 + }, + { + "epoch": 0.7148189106994747, + "grad_norm": 1.1261283318071555, + "learning_rate": 3.970842994455648e-06, + "loss": 0.4083, + "step": 41368 + }, + { + "epoch": 0.7148361902128836, + "grad_norm": 1.0949074979390512, + "learning_rate": 3.9703965127582e-06, + "loss": 0.2871, + "step": 41369 + }, + { + "epoch": 0.7148534697262925, + "grad_norm": 1.1911474338353067, + "learning_rate": 3.969950049945931e-06, + "loss": 0.4674, + "step": 41370 + }, + { + "epoch": 0.7148707492397014, + "grad_norm": 1.38642298265065, + "learning_rate": 3.969503606020233e-06, + "loss": 0.5501, + "step": 41371 + }, + { + "epoch": 0.7148880287531103, + "grad_norm": 0.9832921524586709, + "learning_rate": 3.969057180982508e-06, + "loss": 0.26, + "step": 41372 + }, + { + "epoch": 0.7149053082665192, + "grad_norm": 0.7941813642963732, + "learning_rate": 3.968610774834155e-06, + "loss": 0.352, + "step": 41373 + }, + { + "epoch": 0.7149225877799281, + "grad_norm": 1.452651548161935, + "learning_rate": 3.968164387576574e-06, + "loss": 0.4062, + "step": 41374 + }, + { + "epoch": 0.714939867293337, + "grad_norm": 1.1931241903294731, + "learning_rate": 3.967718019211162e-06, + "loss": 0.4266, + "step": 41375 + }, + { + "epoch": 0.7149571468067459, + "grad_norm": 1.5346754740610942, + "learning_rate": 3.967271669739311e-06, + "loss": 0.4521, + "step": 41376 + }, + { + "epoch": 0.7149744263201548, + "grad_norm": 1.6474950974966454, + "learning_rate": 3.966825339162423e-06, + "loss": 0.5107, + "step": 41377 + }, + { + "epoch": 0.7149917058335638, + "grad_norm": 1.1981131928219146, + "learning_rate": 3.966379027481897e-06, + "loss": 0.2121, + "step": 41378 + }, + { + "epoch": 0.7150089853469727, + "grad_norm": 0.8917876150579979, + "learning_rate": 3.965932734699135e-06, + "loss": 0.2917, + "step": 41379 + }, + { + "epoch": 0.7150262648603816, + "grad_norm": 1.602530891924628, + "learning_rate": 3.965486460815525e-06, + "loss": 0.4748, + "step": 41380 + }, + { + "epoch": 0.7150435443737905, + "grad_norm": 0.8632110477638005, + "learning_rate": 3.965040205832475e-06, + "loss": 0.4961, + "step": 41381 + }, + { + "epoch": 0.7150608238871994, + "grad_norm": 1.613220856376627, + "learning_rate": 3.964593969751374e-06, + "loss": 0.3422, + "step": 41382 + }, + { + "epoch": 0.7150781034006083, + "grad_norm": 1.519856153606048, + "learning_rate": 3.964147752573623e-06, + "loss": 0.245, + "step": 41383 + }, + { + "epoch": 0.7150953829140171, + "grad_norm": 1.2358720106249064, + "learning_rate": 3.96370155430062e-06, + "loss": 0.3177, + "step": 41384 + }, + { + "epoch": 0.715112662427426, + "grad_norm": 0.8235006595885378, + "learning_rate": 3.963255374933766e-06, + "loss": 0.6809, + "step": 41385 + }, + { + "epoch": 0.7151299419408349, + "grad_norm": 1.2938559335540176, + "learning_rate": 3.962809214474455e-06, + "loss": 0.2978, + "step": 41386 + }, + { + "epoch": 0.7151472214542438, + "grad_norm": 1.2163364008254016, + "learning_rate": 3.96236307292408e-06, + "loss": 0.4546, + "step": 41387 + }, + { + "epoch": 0.7151645009676527, + "grad_norm": 2.271325332862251, + "learning_rate": 3.961916950284047e-06, + "loss": 0.3904, + "step": 41388 + }, + { + "epoch": 0.7151817804810616, + "grad_norm": 1.212263639590634, + "learning_rate": 3.961470846555745e-06, + "loss": 0.5481, + "step": 41389 + }, + { + "epoch": 0.7151990599944705, + "grad_norm": 1.6387487784669292, + "learning_rate": 3.961024761740576e-06, + "loss": 0.3995, + "step": 41390 + }, + { + "epoch": 0.7152163395078794, + "grad_norm": 1.0762018894213181, + "learning_rate": 3.960578695839936e-06, + "loss": 0.6709, + "step": 41391 + }, + { + "epoch": 0.7152336190212883, + "grad_norm": 1.2311815744376406, + "learning_rate": 3.960132648855226e-06, + "loss": 0.3497, + "step": 41392 + }, + { + "epoch": 0.7152508985346973, + "grad_norm": 1.057287687292272, + "learning_rate": 3.959686620787836e-06, + "loss": 0.2964, + "step": 41393 + }, + { + "epoch": 0.7152681780481062, + "grad_norm": 1.3784058024688473, + "learning_rate": 3.95924061163917e-06, + "loss": 0.3727, + "step": 41394 + }, + { + "epoch": 0.7152854575615151, + "grad_norm": 1.6243728096006478, + "learning_rate": 3.958794621410619e-06, + "loss": 0.46, + "step": 41395 + }, + { + "epoch": 0.715302737074924, + "grad_norm": 1.0302890198526935, + "learning_rate": 3.95834865010358e-06, + "loss": 0.2361, + "step": 41396 + }, + { + "epoch": 0.7153200165883329, + "grad_norm": 1.5352701917424136, + "learning_rate": 3.957902697719458e-06, + "loss": 0.3303, + "step": 41397 + }, + { + "epoch": 0.7153372961017418, + "grad_norm": 1.1537194114374578, + "learning_rate": 3.957456764259639e-06, + "loss": 0.3173, + "step": 41398 + }, + { + "epoch": 0.7153545756151507, + "grad_norm": 1.3124483822262447, + "learning_rate": 3.957010849725529e-06, + "loss": 0.3177, + "step": 41399 + }, + { + "epoch": 0.7153718551285596, + "grad_norm": 0.8379504144332227, + "learning_rate": 3.956564954118517e-06, + "loss": 0.6366, + "step": 41400 + }, + { + "epoch": 0.7153891346419685, + "grad_norm": 1.0198710002692388, + "learning_rate": 3.956119077440003e-06, + "loss": 0.3395, + "step": 41401 + }, + { + "epoch": 0.7154064141553774, + "grad_norm": 0.8692296721570532, + "learning_rate": 3.955673219691382e-06, + "loss": 0.3532, + "step": 41402 + }, + { + "epoch": 0.7154236936687863, + "grad_norm": 0.7309150431430776, + "learning_rate": 3.955227380874056e-06, + "loss": 0.2116, + "step": 41403 + }, + { + "epoch": 0.7154409731821952, + "grad_norm": 1.3305369082395755, + "learning_rate": 3.954781560989414e-06, + "loss": 0.4121, + "step": 41404 + }, + { + "epoch": 0.7154582526956041, + "grad_norm": 1.177047501729825, + "learning_rate": 3.954335760038859e-06, + "loss": 0.3808, + "step": 41405 + }, + { + "epoch": 0.7154755322090129, + "grad_norm": 1.4690752892598846, + "learning_rate": 3.953889978023779e-06, + "loss": 0.4862, + "step": 41406 + }, + { + "epoch": 0.7154928117224219, + "grad_norm": 1.5254266257251956, + "learning_rate": 3.9534442149455766e-06, + "loss": 0.2958, + "step": 41407 + }, + { + "epoch": 0.7155100912358308, + "grad_norm": 1.2583212884984987, + "learning_rate": 3.95299847080565e-06, + "loss": 0.501, + "step": 41408 + }, + { + "epoch": 0.7155273707492397, + "grad_norm": 0.9062120779216807, + "learning_rate": 3.952552745605388e-06, + "loss": 0.4057, + "step": 41409 + }, + { + "epoch": 0.7155446502626486, + "grad_norm": 2.20817016145545, + "learning_rate": 3.952107039346194e-06, + "loss": 0.5997, + "step": 41410 + }, + { + "epoch": 0.7155619297760575, + "grad_norm": 1.583282309520744, + "learning_rate": 3.951661352029457e-06, + "loss": 0.3909, + "step": 41411 + }, + { + "epoch": 0.7155792092894664, + "grad_norm": 1.1954792322305539, + "learning_rate": 3.951215683656575e-06, + "loss": 0.3555, + "step": 41412 + }, + { + "epoch": 0.7155964888028753, + "grad_norm": 1.4994231415983805, + "learning_rate": 3.950770034228946e-06, + "loss": 0.228, + "step": 41413 + }, + { + "epoch": 0.7156137683162842, + "grad_norm": 1.187097326982003, + "learning_rate": 3.950324403747968e-06, + "loss": 0.3921, + "step": 41414 + }, + { + "epoch": 0.7156310478296931, + "grad_norm": 1.2503564180936286, + "learning_rate": 3.949878792215031e-06, + "loss": 0.3377, + "step": 41415 + }, + { + "epoch": 0.715648327343102, + "grad_norm": 1.2150072780781986, + "learning_rate": 3.949433199631537e-06, + "loss": 0.5786, + "step": 41416 + }, + { + "epoch": 0.7156656068565109, + "grad_norm": 0.8436938452213452, + "learning_rate": 3.948987625998875e-06, + "loss": 0.3136, + "step": 41417 + }, + { + "epoch": 0.7156828863699198, + "grad_norm": 1.251393760644058, + "learning_rate": 3.948542071318442e-06, + "loss": 0.2704, + "step": 41418 + }, + { + "epoch": 0.7157001658833287, + "grad_norm": 0.8784396983500051, + "learning_rate": 3.94809653559164e-06, + "loss": 0.3383, + "step": 41419 + }, + { + "epoch": 0.7157174453967377, + "grad_norm": 0.6326653214149232, + "learning_rate": 3.947651018819856e-06, + "loss": 0.6993, + "step": 41420 + }, + { + "epoch": 0.7157347249101466, + "grad_norm": 1.087918946308906, + "learning_rate": 3.947205521004491e-06, + "loss": 0.3602, + "step": 41421 + }, + { + "epoch": 0.7157520044235555, + "grad_norm": 1.8679391976003594, + "learning_rate": 3.946760042146936e-06, + "loss": 0.2528, + "step": 41422 + }, + { + "epoch": 0.7157692839369644, + "grad_norm": 1.0578897499251796, + "learning_rate": 3.946314582248593e-06, + "loss": 0.4124, + "step": 41423 + }, + { + "epoch": 0.7157865634503733, + "grad_norm": 1.4714436136346298, + "learning_rate": 3.945869141310844e-06, + "loss": 0.4727, + "step": 41424 + }, + { + "epoch": 0.7158038429637822, + "grad_norm": 1.2841052990968729, + "learning_rate": 3.945423719335101e-06, + "loss": 0.3232, + "step": 41425 + }, + { + "epoch": 0.7158211224771911, + "grad_norm": 1.1246745229476582, + "learning_rate": 3.944978316322751e-06, + "loss": 0.2983, + "step": 41426 + }, + { + "epoch": 0.7158384019905999, + "grad_norm": 1.2258677959536328, + "learning_rate": 3.944532932275185e-06, + "loss": 0.256, + "step": 41427 + }, + { + "epoch": 0.7158556815040088, + "grad_norm": 1.1957894340597401, + "learning_rate": 3.944087567193805e-06, + "loss": 0.3384, + "step": 41428 + }, + { + "epoch": 0.7158729610174177, + "grad_norm": 1.0867529767833093, + "learning_rate": 3.943642221080001e-06, + "loss": 0.5163, + "step": 41429 + }, + { + "epoch": 0.7158902405308266, + "grad_norm": 1.1443492228704686, + "learning_rate": 3.943196893935169e-06, + "loss": 0.5616, + "step": 41430 + }, + { + "epoch": 0.7159075200442355, + "grad_norm": 1.2701392481969778, + "learning_rate": 3.942751585760705e-06, + "loss": 0.418, + "step": 41431 + }, + { + "epoch": 0.7159247995576444, + "grad_norm": 1.7887585934378094, + "learning_rate": 3.942306296558006e-06, + "loss": 0.4957, + "step": 41432 + }, + { + "epoch": 0.7159420790710533, + "grad_norm": 1.3651394345743133, + "learning_rate": 3.941861026328462e-06, + "loss": 0.2111, + "step": 41433 + }, + { + "epoch": 0.7159593585844622, + "grad_norm": 0.9106043476880749, + "learning_rate": 3.941415775073472e-06, + "loss": 0.4242, + "step": 41434 + }, + { + "epoch": 0.7159766380978712, + "grad_norm": 1.3162936161328396, + "learning_rate": 3.940970542794425e-06, + "loss": 0.5102, + "step": 41435 + }, + { + "epoch": 0.7159939176112801, + "grad_norm": 1.7278879514013294, + "learning_rate": 3.940525329492718e-06, + "loss": 0.3858, + "step": 41436 + }, + { + "epoch": 0.716011197124689, + "grad_norm": 0.785620582717234, + "learning_rate": 3.9400801351697516e-06, + "loss": 0.2577, + "step": 41437 + }, + { + "epoch": 0.7160284766380979, + "grad_norm": 1.6388426345491725, + "learning_rate": 3.939634959826909e-06, + "loss": 0.4039, + "step": 41438 + }, + { + "epoch": 0.7160457561515068, + "grad_norm": 1.08792780293055, + "learning_rate": 3.939189803465595e-06, + "loss": 0.2583, + "step": 41439 + }, + { + "epoch": 0.7160630356649157, + "grad_norm": 1.149106663934491, + "learning_rate": 3.938744666087196e-06, + "loss": 0.2217, + "step": 41440 + }, + { + "epoch": 0.7160803151783246, + "grad_norm": 0.6223197957559141, + "learning_rate": 3.938299547693109e-06, + "loss": 0.6088, + "step": 41441 + }, + { + "epoch": 0.7160975946917335, + "grad_norm": 0.7883768066119379, + "learning_rate": 3.937854448284728e-06, + "loss": 0.4698, + "step": 41442 + }, + { + "epoch": 0.7161148742051424, + "grad_norm": 2.4379323114178737, + "learning_rate": 3.937409367863451e-06, + "loss": 0.3331, + "step": 41443 + }, + { + "epoch": 0.7161321537185513, + "grad_norm": 1.3323115663567109, + "learning_rate": 3.936964306430666e-06, + "loss": 0.3488, + "step": 41444 + }, + { + "epoch": 0.7161494332319602, + "grad_norm": 1.238903999640294, + "learning_rate": 3.936519263987772e-06, + "loss": 0.5154, + "step": 41445 + }, + { + "epoch": 0.7161667127453691, + "grad_norm": 0.9964276565210968, + "learning_rate": 3.936074240536158e-06, + "loss": 0.2786, + "step": 41446 + }, + { + "epoch": 0.716183992258778, + "grad_norm": 0.6521011776012035, + "learning_rate": 3.93562923607722e-06, + "loss": 0.6447, + "step": 41447 + }, + { + "epoch": 0.7162012717721868, + "grad_norm": 1.0018154298665922, + "learning_rate": 3.935184250612356e-06, + "loss": 0.2782, + "step": 41448 + }, + { + "epoch": 0.7162185512855958, + "grad_norm": 0.8622179245987358, + "learning_rate": 3.9347392841429525e-06, + "loss": 0.3567, + "step": 41449 + }, + { + "epoch": 0.7162358307990047, + "grad_norm": 0.519173827798717, + "learning_rate": 3.934294336670409e-06, + "loss": 0.6731, + "step": 41450 + }, + { + "epoch": 0.7162531103124136, + "grad_norm": 1.0527169657498157, + "learning_rate": 3.9338494081961135e-06, + "loss": 0.3298, + "step": 41451 + }, + { + "epoch": 0.7162703898258225, + "grad_norm": 1.6148518918075911, + "learning_rate": 3.933404498721462e-06, + "loss": 0.4581, + "step": 41452 + }, + { + "epoch": 0.7162876693392314, + "grad_norm": 1.3578769096179906, + "learning_rate": 3.93295960824785e-06, + "loss": 0.505, + "step": 41453 + }, + { + "epoch": 0.7163049488526403, + "grad_norm": 1.4189840957602655, + "learning_rate": 3.932514736776672e-06, + "loss": 0.4209, + "step": 41454 + }, + { + "epoch": 0.7163222283660492, + "grad_norm": 0.7865654421201218, + "learning_rate": 3.932069884309315e-06, + "loss": 0.6767, + "step": 41455 + }, + { + "epoch": 0.7163395078794581, + "grad_norm": 1.4715249005446793, + "learning_rate": 3.931625050847181e-06, + "loss": 0.312, + "step": 41456 + }, + { + "epoch": 0.716356787392867, + "grad_norm": 1.127700290896836, + "learning_rate": 3.931180236391655e-06, + "loss": 0.3928, + "step": 41457 + }, + { + "epoch": 0.7163740669062759, + "grad_norm": 1.2949159661491423, + "learning_rate": 3.9307354409441336e-06, + "loss": 0.5148, + "step": 41458 + }, + { + "epoch": 0.7163913464196848, + "grad_norm": 1.401663569605851, + "learning_rate": 3.9302906645060136e-06, + "loss": 0.3574, + "step": 41459 + }, + { + "epoch": 0.7164086259330937, + "grad_norm": 1.6807662623472825, + "learning_rate": 3.92984590707868e-06, + "loss": 0.3861, + "step": 41460 + }, + { + "epoch": 0.7164259054465026, + "grad_norm": 1.0960156437500346, + "learning_rate": 3.929401168663534e-06, + "loss": 0.4639, + "step": 41461 + }, + { + "epoch": 0.7164431849599115, + "grad_norm": 1.640265051405156, + "learning_rate": 3.928956449261963e-06, + "loss": 0.2597, + "step": 41462 + }, + { + "epoch": 0.7164604644733205, + "grad_norm": 0.9148641978888258, + "learning_rate": 3.928511748875364e-06, + "loss": 0.4581, + "step": 41463 + }, + { + "epoch": 0.7164777439867294, + "grad_norm": 1.2227513386060793, + "learning_rate": 3.92806706750512e-06, + "loss": 0.2732, + "step": 41464 + }, + { + "epoch": 0.7164950235001383, + "grad_norm": 0.9668898416930999, + "learning_rate": 3.92762240515264e-06, + "loss": 0.4164, + "step": 41465 + }, + { + "epoch": 0.7165123030135472, + "grad_norm": 1.8845346319937275, + "learning_rate": 3.927177761819304e-06, + "loss": 0.3745, + "step": 41466 + }, + { + "epoch": 0.7165295825269561, + "grad_norm": 0.7967255894453069, + "learning_rate": 3.926733137506512e-06, + "loss": 0.485, + "step": 41467 + }, + { + "epoch": 0.716546862040365, + "grad_norm": 0.8491636851002192, + "learning_rate": 3.926288532215653e-06, + "loss": 0.2204, + "step": 41468 + }, + { + "epoch": 0.7165641415537738, + "grad_norm": 1.5685708790075945, + "learning_rate": 3.925843945948116e-06, + "loss": 0.3137, + "step": 41469 + }, + { + "epoch": 0.7165814210671827, + "grad_norm": 1.3666905992346723, + "learning_rate": 3.925399378705299e-06, + "loss": 0.4524, + "step": 41470 + }, + { + "epoch": 0.7165987005805916, + "grad_norm": 1.3491856969170943, + "learning_rate": 3.924954830488592e-06, + "loss": 0.3001, + "step": 41471 + }, + { + "epoch": 0.7166159800940005, + "grad_norm": 1.1905607199548205, + "learning_rate": 3.9245103012993925e-06, + "loss": 0.2612, + "step": 41472 + }, + { + "epoch": 0.7166332596074094, + "grad_norm": 0.8969339214458864, + "learning_rate": 3.924065791139083e-06, + "loss": 0.2573, + "step": 41473 + }, + { + "epoch": 0.7166505391208183, + "grad_norm": 1.3983801091344878, + "learning_rate": 3.923621300009066e-06, + "loss": 0.4478, + "step": 41474 + }, + { + "epoch": 0.7166678186342272, + "grad_norm": 2.000550119842473, + "learning_rate": 3.923176827910726e-06, + "loss": 0.3706, + "step": 41475 + }, + { + "epoch": 0.7166850981476361, + "grad_norm": 1.6659841573317937, + "learning_rate": 3.922732374845457e-06, + "loss": 0.408, + "step": 41476 + }, + { + "epoch": 0.716702377661045, + "grad_norm": 0.898395122194729, + "learning_rate": 3.922287940814652e-06, + "loss": 0.3704, + "step": 41477 + }, + { + "epoch": 0.716719657174454, + "grad_norm": 1.5427718316723131, + "learning_rate": 3.921843525819707e-06, + "loss": 0.2254, + "step": 41478 + }, + { + "epoch": 0.7167369366878629, + "grad_norm": 1.16116976666963, + "learning_rate": 3.92139912986201e-06, + "loss": 0.2759, + "step": 41479 + }, + { + "epoch": 0.7167542162012718, + "grad_norm": 1.4372494997887917, + "learning_rate": 3.920954752942949e-06, + "loss": 0.3421, + "step": 41480 + }, + { + "epoch": 0.7167714957146807, + "grad_norm": 1.4338502213824251, + "learning_rate": 3.92051039506392e-06, + "loss": 0.24, + "step": 41481 + }, + { + "epoch": 0.7167887752280896, + "grad_norm": 0.8576222399857788, + "learning_rate": 3.920066056226315e-06, + "loss": 0.3036, + "step": 41482 + }, + { + "epoch": 0.7168060547414985, + "grad_norm": 1.371968491035634, + "learning_rate": 3.919621736431528e-06, + "loss": 0.3848, + "step": 41483 + }, + { + "epoch": 0.7168233342549074, + "grad_norm": 1.2432926587239232, + "learning_rate": 3.919177435680944e-06, + "loss": 0.1908, + "step": 41484 + }, + { + "epoch": 0.7168406137683163, + "grad_norm": 1.1959952877322086, + "learning_rate": 3.918733153975962e-06, + "loss": 0.5295, + "step": 41485 + }, + { + "epoch": 0.7168578932817252, + "grad_norm": 1.263078963622462, + "learning_rate": 3.9182888913179676e-06, + "loss": 0.2444, + "step": 41486 + }, + { + "epoch": 0.7168751727951341, + "grad_norm": 0.9844418455667645, + "learning_rate": 3.917844647708353e-06, + "loss": 0.4518, + "step": 41487 + }, + { + "epoch": 0.716892452308543, + "grad_norm": 1.1349964716231493, + "learning_rate": 3.917400423148515e-06, + "loss": 0.3658, + "step": 41488 + }, + { + "epoch": 0.716909731821952, + "grad_norm": 0.6336666416949069, + "learning_rate": 3.916956217639839e-06, + "loss": 0.8225, + "step": 41489 + }, + { + "epoch": 0.7169270113353607, + "grad_norm": 1.7753706292432194, + "learning_rate": 3.91651203118372e-06, + "loss": 0.3624, + "step": 41490 + }, + { + "epoch": 0.7169442908487696, + "grad_norm": 1.411322542061287, + "learning_rate": 3.916067863781545e-06, + "loss": 0.538, + "step": 41491 + }, + { + "epoch": 0.7169615703621786, + "grad_norm": 1.0736939151129363, + "learning_rate": 3.915623715434709e-06, + "loss": 0.3737, + "step": 41492 + }, + { + "epoch": 0.7169788498755875, + "grad_norm": 1.1980902109840155, + "learning_rate": 3.915179586144601e-06, + "loss": 0.369, + "step": 41493 + }, + { + "epoch": 0.7169961293889964, + "grad_norm": 0.9325114638608614, + "learning_rate": 3.914735475912615e-06, + "loss": 0.3222, + "step": 41494 + }, + { + "epoch": 0.7170134089024053, + "grad_norm": 1.5319598937422951, + "learning_rate": 3.914291384740139e-06, + "loss": 0.6003, + "step": 41495 + }, + { + "epoch": 0.7170306884158142, + "grad_norm": 1.4686473134473286, + "learning_rate": 3.913847312628567e-06, + "loss": 0.285, + "step": 41496 + }, + { + "epoch": 0.7170479679292231, + "grad_norm": 0.9940518965168019, + "learning_rate": 3.913403259579284e-06, + "loss": 0.7507, + "step": 41497 + }, + { + "epoch": 0.717065247442632, + "grad_norm": 0.9937731675287563, + "learning_rate": 3.912959225593684e-06, + "loss": 0.4996, + "step": 41498 + }, + { + "epoch": 0.7170825269560409, + "grad_norm": 1.5981041879052422, + "learning_rate": 3.9125152106731635e-06, + "loss": 0.2387, + "step": 41499 + }, + { + "epoch": 0.7170998064694498, + "grad_norm": 1.2608486432381294, + "learning_rate": 3.912071214819104e-06, + "loss": 0.4089, + "step": 41500 + }, + { + "epoch": 0.7171170859828587, + "grad_norm": 1.6516690623084858, + "learning_rate": 3.911627238032904e-06, + "loss": 0.3966, + "step": 41501 + }, + { + "epoch": 0.7171343654962676, + "grad_norm": 1.110332671385212, + "learning_rate": 3.911183280315945e-06, + "loss": 0.5867, + "step": 41502 + }, + { + "epoch": 0.7171516450096765, + "grad_norm": 0.8490839954138952, + "learning_rate": 3.910739341669627e-06, + "loss": 0.2437, + "step": 41503 + }, + { + "epoch": 0.7171689245230854, + "grad_norm": 1.0367570870249778, + "learning_rate": 3.9102954220953295e-06, + "loss": 0.481, + "step": 41504 + }, + { + "epoch": 0.7171862040364944, + "grad_norm": 0.7177107675467896, + "learning_rate": 3.9098515215944565e-06, + "loss": 0.4278, + "step": 41505 + }, + { + "epoch": 0.7172034835499033, + "grad_norm": 1.3632112443299969, + "learning_rate": 3.909407640168388e-06, + "loss": 0.3354, + "step": 41506 + }, + { + "epoch": 0.7172207630633122, + "grad_norm": 1.6449692692828446, + "learning_rate": 3.9089637778185205e-06, + "loss": 0.5331, + "step": 41507 + }, + { + "epoch": 0.7172380425767211, + "grad_norm": 1.2753686359913525, + "learning_rate": 3.9085199345462385e-06, + "loss": 0.4237, + "step": 41508 + }, + { + "epoch": 0.71725532209013, + "grad_norm": 1.3578762644092943, + "learning_rate": 3.908076110352938e-06, + "loss": 0.3963, + "step": 41509 + }, + { + "epoch": 0.7172726016035389, + "grad_norm": 1.2417986338897822, + "learning_rate": 3.907632305240004e-06, + "loss": 0.316, + "step": 41510 + }, + { + "epoch": 0.7172898811169477, + "grad_norm": 0.8891148281841247, + "learning_rate": 3.907188519208827e-06, + "loss": 0.4632, + "step": 41511 + }, + { + "epoch": 0.7173071606303566, + "grad_norm": 1.2842985057021887, + "learning_rate": 3.906744752260802e-06, + "loss": 0.3209, + "step": 41512 + }, + { + "epoch": 0.7173244401437655, + "grad_norm": 0.5990643133998594, + "learning_rate": 3.906301004397313e-06, + "loss": 0.1452, + "step": 41513 + }, + { + "epoch": 0.7173417196571744, + "grad_norm": 0.5627196122327214, + "learning_rate": 3.905857275619756e-06, + "loss": 0.5542, + "step": 41514 + }, + { + "epoch": 0.7173589991705833, + "grad_norm": 0.9634743632818894, + "learning_rate": 3.905413565929513e-06, + "loss": 0.4692, + "step": 41515 + }, + { + "epoch": 0.7173762786839922, + "grad_norm": 1.1317709081085578, + "learning_rate": 3.904969875327977e-06, + "loss": 0.4177, + "step": 41516 + }, + { + "epoch": 0.7173935581974011, + "grad_norm": 1.3010961522275348, + "learning_rate": 3.90452620381654e-06, + "loss": 0.4436, + "step": 41517 + }, + { + "epoch": 0.71741083771081, + "grad_norm": 0.626397331333982, + "learning_rate": 3.904082551396593e-06, + "loss": 0.577, + "step": 41518 + }, + { + "epoch": 0.717428117224219, + "grad_norm": 1.130675313730598, + "learning_rate": 3.903638918069524e-06, + "loss": 0.2606, + "step": 41519 + }, + { + "epoch": 0.7174453967376279, + "grad_norm": 1.3880942704145005, + "learning_rate": 3.903195303836715e-06, + "loss": 0.3593, + "step": 41520 + }, + { + "epoch": 0.7174626762510368, + "grad_norm": 1.018790025085849, + "learning_rate": 3.9027517086995635e-06, + "loss": 0.2214, + "step": 41521 + }, + { + "epoch": 0.7174799557644457, + "grad_norm": 1.133168188185356, + "learning_rate": 3.902308132659457e-06, + "loss": 0.3125, + "step": 41522 + }, + { + "epoch": 0.7174972352778546, + "grad_norm": 1.1216478140297539, + "learning_rate": 3.901864575717789e-06, + "loss": 0.4301, + "step": 41523 + }, + { + "epoch": 0.7175145147912635, + "grad_norm": 0.9175949541995, + "learning_rate": 3.90142103787594e-06, + "loss": 0.2473, + "step": 41524 + }, + { + "epoch": 0.7175317943046724, + "grad_norm": 1.0288980561273517, + "learning_rate": 3.9009775191353075e-06, + "loss": 0.362, + "step": 41525 + }, + { + "epoch": 0.7175490738180813, + "grad_norm": 1.8174724888204805, + "learning_rate": 3.900534019497273e-06, + "loss": 0.4326, + "step": 41526 + }, + { + "epoch": 0.7175663533314902, + "grad_norm": 2.252732533113129, + "learning_rate": 3.90009053896323e-06, + "loss": 0.2749, + "step": 41527 + }, + { + "epoch": 0.7175836328448991, + "grad_norm": 1.5390135714493232, + "learning_rate": 3.899647077534567e-06, + "loss": 0.3431, + "step": 41528 + }, + { + "epoch": 0.717600912358308, + "grad_norm": 0.9274397020124834, + "learning_rate": 3.899203635212676e-06, + "loss": 0.234, + "step": 41529 + }, + { + "epoch": 0.7176181918717169, + "grad_norm": 1.3705862509884723, + "learning_rate": 3.8987602119989445e-06, + "loss": 0.1305, + "step": 41530 + }, + { + "epoch": 0.7176354713851258, + "grad_norm": 1.182276961781113, + "learning_rate": 3.898316807894754e-06, + "loss": 0.626, + "step": 41531 + }, + { + "epoch": 0.7176527508985348, + "grad_norm": 1.0794738648386333, + "learning_rate": 3.8978734229015005e-06, + "loss": 0.3966, + "step": 41532 + }, + { + "epoch": 0.7176700304119435, + "grad_norm": 0.8722254722989042, + "learning_rate": 3.8974300570205704e-06, + "loss": 0.4079, + "step": 41533 + }, + { + "epoch": 0.7176873099253525, + "grad_norm": 1.1013451638173124, + "learning_rate": 3.8969867102533564e-06, + "loss": 0.3904, + "step": 41534 + }, + { + "epoch": 0.7177045894387614, + "grad_norm": 0.7406456055784246, + "learning_rate": 3.896543382601241e-06, + "loss": 0.2325, + "step": 41535 + }, + { + "epoch": 0.7177218689521703, + "grad_norm": 1.3630013489806647, + "learning_rate": 3.896100074065619e-06, + "loss": 0.4225, + "step": 41536 + }, + { + "epoch": 0.7177391484655792, + "grad_norm": 1.5492652281996835, + "learning_rate": 3.895656784647872e-06, + "loss": 0.5314, + "step": 41537 + }, + { + "epoch": 0.7177564279789881, + "grad_norm": 1.1897277384840237, + "learning_rate": 3.895213514349392e-06, + "loss": 0.4061, + "step": 41538 + }, + { + "epoch": 0.717773707492397, + "grad_norm": 1.454894181042941, + "learning_rate": 3.894770263171566e-06, + "loss": 0.2993, + "step": 41539 + }, + { + "epoch": 0.7177909870058059, + "grad_norm": 1.5490241421156337, + "learning_rate": 3.894327031115789e-06, + "loss": 0.3751, + "step": 41540 + }, + { + "epoch": 0.7178082665192148, + "grad_norm": 1.3737282756811986, + "learning_rate": 3.8938838181834416e-06, + "loss": 0.2849, + "step": 41541 + }, + { + "epoch": 0.7178255460326237, + "grad_norm": 1.370818204289827, + "learning_rate": 3.893440624375911e-06, + "loss": 0.4008, + "step": 41542 + }, + { + "epoch": 0.7178428255460326, + "grad_norm": 1.7812488946761273, + "learning_rate": 3.8929974496945935e-06, + "loss": 0.46, + "step": 41543 + }, + { + "epoch": 0.7178601050594415, + "grad_norm": 1.020659298042211, + "learning_rate": 3.892554294140864e-06, + "loss": 0.5608, + "step": 41544 + }, + { + "epoch": 0.7178773845728504, + "grad_norm": 1.0980113707289385, + "learning_rate": 3.892111157716127e-06, + "loss": 0.3086, + "step": 41545 + }, + { + "epoch": 0.7178946640862593, + "grad_norm": 1.40047795937661, + "learning_rate": 3.891668040421757e-06, + "loss": 0.3735, + "step": 41546 + }, + { + "epoch": 0.7179119435996683, + "grad_norm": 1.1315658969395408, + "learning_rate": 3.891224942259151e-06, + "loss": 0.3637, + "step": 41547 + }, + { + "epoch": 0.7179292231130772, + "grad_norm": 1.2613605081955752, + "learning_rate": 3.890781863229688e-06, + "loss": 0.28, + "step": 41548 + }, + { + "epoch": 0.7179465026264861, + "grad_norm": 1.2611336050947253, + "learning_rate": 3.890338803334765e-06, + "loss": 0.2837, + "step": 41549 + }, + { + "epoch": 0.717963782139895, + "grad_norm": 1.2781623360639245, + "learning_rate": 3.889895762575762e-06, + "loss": 0.2824, + "step": 41550 + }, + { + "epoch": 0.7179810616533039, + "grad_norm": 0.8568937186096016, + "learning_rate": 3.88945274095407e-06, + "loss": 0.349, + "step": 41551 + }, + { + "epoch": 0.7179983411667128, + "grad_norm": 1.0797134108633082, + "learning_rate": 3.889009738471079e-06, + "loss": 0.3923, + "step": 41552 + }, + { + "epoch": 0.7180156206801217, + "grad_norm": 1.5958607806049994, + "learning_rate": 3.8885667551281715e-06, + "loss": 0.3819, + "step": 41553 + }, + { + "epoch": 0.7180329001935305, + "grad_norm": 1.3993681145339385, + "learning_rate": 3.88812379092674e-06, + "loss": 0.4627, + "step": 41554 + }, + { + "epoch": 0.7180501797069394, + "grad_norm": 1.3075221811397353, + "learning_rate": 3.8876808458681655e-06, + "loss": 0.4188, + "step": 41555 + }, + { + "epoch": 0.7180674592203483, + "grad_norm": 0.6297063528130699, + "learning_rate": 3.88723791995384e-06, + "loss": 0.422, + "step": 41556 + }, + { + "epoch": 0.7180847387337572, + "grad_norm": 1.1206907469473266, + "learning_rate": 3.88679501318515e-06, + "loss": 0.5439, + "step": 41557 + }, + { + "epoch": 0.7181020182471661, + "grad_norm": 0.8775589289841534, + "learning_rate": 3.886352125563485e-06, + "loss": 0.1831, + "step": 41558 + }, + { + "epoch": 0.718119297760575, + "grad_norm": 1.0064890283286787, + "learning_rate": 3.885909257090227e-06, + "loss": 0.2924, + "step": 41559 + }, + { + "epoch": 0.718136577273984, + "grad_norm": 1.2234324172269513, + "learning_rate": 3.885466407766769e-06, + "loss": 0.383, + "step": 41560 + }, + { + "epoch": 0.7181538567873929, + "grad_norm": 0.9209402505142844, + "learning_rate": 3.885023577594491e-06, + "loss": 0.3199, + "step": 41561 + }, + { + "epoch": 0.7181711363008018, + "grad_norm": 0.5395284762845954, + "learning_rate": 3.884580766574785e-06, + "loss": 0.7416, + "step": 41562 + }, + { + "epoch": 0.7181884158142107, + "grad_norm": 1.091670438733079, + "learning_rate": 3.88413797470904e-06, + "loss": 0.2548, + "step": 41563 + }, + { + "epoch": 0.7182056953276196, + "grad_norm": 1.1289858269686868, + "learning_rate": 3.883695201998635e-06, + "loss": 0.3202, + "step": 41564 + }, + { + "epoch": 0.7182229748410285, + "grad_norm": 1.6841938138560173, + "learning_rate": 3.883252448444966e-06, + "loss": 0.4145, + "step": 41565 + }, + { + "epoch": 0.7182402543544374, + "grad_norm": 1.0320193638303385, + "learning_rate": 3.882809714049412e-06, + "loss": 0.2623, + "step": 41566 + }, + { + "epoch": 0.7182575338678463, + "grad_norm": 1.3888441428258815, + "learning_rate": 3.882366998813363e-06, + "loss": 0.3269, + "step": 41567 + }, + { + "epoch": 0.7182748133812552, + "grad_norm": 1.1792815137279347, + "learning_rate": 3.881924302738205e-06, + "loss": 0.3615, + "step": 41568 + }, + { + "epoch": 0.7182920928946641, + "grad_norm": 1.7331428189158558, + "learning_rate": 3.8814816258253275e-06, + "loss": 0.3572, + "step": 41569 + }, + { + "epoch": 0.718309372408073, + "grad_norm": 0.9128452175422627, + "learning_rate": 3.8810389680761115e-06, + "loss": 0.2454, + "step": 41570 + }, + { + "epoch": 0.7183266519214819, + "grad_norm": 1.0566581873507175, + "learning_rate": 3.880596329491951e-06, + "loss": 0.3437, + "step": 41571 + }, + { + "epoch": 0.7183439314348908, + "grad_norm": 0.9740459443535358, + "learning_rate": 3.8801537100742235e-06, + "loss": 0.2404, + "step": 41572 + }, + { + "epoch": 0.7183612109482997, + "grad_norm": 1.9640187142797152, + "learning_rate": 3.8797111098243205e-06, + "loss": 0.3252, + "step": 41573 + }, + { + "epoch": 0.7183784904617087, + "grad_norm": 1.0894302470079098, + "learning_rate": 3.87926852874363e-06, + "loss": 0.4081, + "step": 41574 + }, + { + "epoch": 0.7183957699751174, + "grad_norm": 1.081061252396175, + "learning_rate": 3.8788259668335325e-06, + "loss": 0.3672, + "step": 41575 + }, + { + "epoch": 0.7184130494885264, + "grad_norm": 1.3536042932620007, + "learning_rate": 3.878383424095421e-06, + "loss": 0.4018, + "step": 41576 + }, + { + "epoch": 0.7184303290019353, + "grad_norm": 1.2763360860553414, + "learning_rate": 3.877940900530675e-06, + "loss": 0.3899, + "step": 41577 + }, + { + "epoch": 0.7184476085153442, + "grad_norm": 1.2776287711821983, + "learning_rate": 3.877498396140682e-06, + "loss": 0.3975, + "step": 41578 + }, + { + "epoch": 0.7184648880287531, + "grad_norm": 1.3022533571710135, + "learning_rate": 3.87705591092683e-06, + "loss": 0.3147, + "step": 41579 + }, + { + "epoch": 0.718482167542162, + "grad_norm": 2.077941653219272, + "learning_rate": 3.876613444890508e-06, + "loss": 0.5092, + "step": 41580 + }, + { + "epoch": 0.7184994470555709, + "grad_norm": 1.2016874918164173, + "learning_rate": 3.876170998033097e-06, + "loss": 0.6145, + "step": 41581 + }, + { + "epoch": 0.7185167265689798, + "grad_norm": 1.0560288309201948, + "learning_rate": 3.875728570355981e-06, + "loss": 0.3972, + "step": 41582 + }, + { + "epoch": 0.7185340060823887, + "grad_norm": 1.651373633381924, + "learning_rate": 3.875286161860552e-06, + "loss": 0.2714, + "step": 41583 + }, + { + "epoch": 0.7185512855957976, + "grad_norm": 1.018009543031299, + "learning_rate": 3.874843772548185e-06, + "loss": 0.2973, + "step": 41584 + }, + { + "epoch": 0.7185685651092065, + "grad_norm": 2.511894382422144, + "learning_rate": 3.87440140242028e-06, + "loss": 0.6864, + "step": 41585 + }, + { + "epoch": 0.7185858446226154, + "grad_norm": 1.1292975177135745, + "learning_rate": 3.873959051478212e-06, + "loss": 0.3425, + "step": 41586 + }, + { + "epoch": 0.7186031241360243, + "grad_norm": 1.1566818126112162, + "learning_rate": 3.873516719723373e-06, + "loss": 0.2767, + "step": 41587 + }, + { + "epoch": 0.7186204036494332, + "grad_norm": 1.2542017892124846, + "learning_rate": 3.873074407157143e-06, + "loss": 0.381, + "step": 41588 + }, + { + "epoch": 0.7186376831628422, + "grad_norm": 1.1003225844286952, + "learning_rate": 3.872632113780911e-06, + "loss": 0.3713, + "step": 41589 + }, + { + "epoch": 0.7186549626762511, + "grad_norm": 1.3893499662554016, + "learning_rate": 3.872189839596055e-06, + "loss": 0.2933, + "step": 41590 + }, + { + "epoch": 0.71867224218966, + "grad_norm": 0.7693739811449015, + "learning_rate": 3.871747584603973e-06, + "loss": 0.3834, + "step": 41591 + }, + { + "epoch": 0.7186895217030689, + "grad_norm": 1.1631242072763452, + "learning_rate": 3.8713053488060425e-06, + "loss": 0.2334, + "step": 41592 + }, + { + "epoch": 0.7187068012164778, + "grad_norm": 1.2178188410229827, + "learning_rate": 3.870863132203647e-06, + "loss": 0.4875, + "step": 41593 + }, + { + "epoch": 0.7187240807298867, + "grad_norm": 1.1712086790399998, + "learning_rate": 3.870420934798177e-06, + "loss": 0.3375, + "step": 41594 + }, + { + "epoch": 0.7187413602432956, + "grad_norm": 1.289462969762001, + "learning_rate": 3.869978756591011e-06, + "loss": 0.444, + "step": 41595 + }, + { + "epoch": 0.7187586397567044, + "grad_norm": 1.6908532613590799, + "learning_rate": 3.8695365975835376e-06, + "loss": 0.4508, + "step": 41596 + }, + { + "epoch": 0.7187759192701133, + "grad_norm": 1.631613973404312, + "learning_rate": 3.869094457777141e-06, + "loss": 0.3769, + "step": 41597 + }, + { + "epoch": 0.7187931987835222, + "grad_norm": 1.276523432773076, + "learning_rate": 3.868652337173211e-06, + "loss": 0.4298, + "step": 41598 + }, + { + "epoch": 0.7188104782969311, + "grad_norm": 1.1080712709204696, + "learning_rate": 3.868210235773123e-06, + "loss": 0.2933, + "step": 41599 + }, + { + "epoch": 0.71882775781034, + "grad_norm": 2.2423034302777234, + "learning_rate": 3.867768153578271e-06, + "loss": 0.1947, + "step": 41600 + }, + { + "epoch": 0.7188450373237489, + "grad_norm": 1.8191876372626965, + "learning_rate": 3.867326090590031e-06, + "loss": 0.4601, + "step": 41601 + }, + { + "epoch": 0.7188623168371578, + "grad_norm": 1.1265430679061226, + "learning_rate": 3.8668840468097924e-06, + "loss": 0.3303, + "step": 41602 + }, + { + "epoch": 0.7188795963505668, + "grad_norm": 0.9899146402877993, + "learning_rate": 3.866442022238943e-06, + "loss": 0.2377, + "step": 41603 + }, + { + "epoch": 0.7188968758639757, + "grad_norm": 1.8355914895813255, + "learning_rate": 3.866000016878859e-06, + "loss": 0.3928, + "step": 41604 + }, + { + "epoch": 0.7189141553773846, + "grad_norm": 0.8163620512917513, + "learning_rate": 3.865558030730933e-06, + "loss": 0.293, + "step": 41605 + }, + { + "epoch": 0.7189314348907935, + "grad_norm": 0.619182394723725, + "learning_rate": 3.865116063796541e-06, + "loss": 0.666, + "step": 41606 + }, + { + "epoch": 0.7189487144042024, + "grad_norm": 1.1617718674861464, + "learning_rate": 3.8646741160770725e-06, + "loss": 0.3072, + "step": 41607 + }, + { + "epoch": 0.7189659939176113, + "grad_norm": 1.0721547132667888, + "learning_rate": 3.864232187573911e-06, + "loss": 0.2772, + "step": 41608 + }, + { + "epoch": 0.7189832734310202, + "grad_norm": 1.29334706978336, + "learning_rate": 3.863790278288444e-06, + "loss": 0.2713, + "step": 41609 + }, + { + "epoch": 0.7190005529444291, + "grad_norm": 0.9439958911343632, + "learning_rate": 3.863348388222049e-06, + "loss": 0.2994, + "step": 41610 + }, + { + "epoch": 0.719017832457838, + "grad_norm": 1.4679121269982025, + "learning_rate": 3.862906517376116e-06, + "loss": 0.3535, + "step": 41611 + }, + { + "epoch": 0.7190351119712469, + "grad_norm": 1.1078732054647726, + "learning_rate": 3.862464665752023e-06, + "loss": 0.4277, + "step": 41612 + }, + { + "epoch": 0.7190523914846558, + "grad_norm": 1.007189119125027, + "learning_rate": 3.862022833351157e-06, + "loss": 0.4609, + "step": 41613 + }, + { + "epoch": 0.7190696709980647, + "grad_norm": 1.58518189905788, + "learning_rate": 3.861581020174906e-06, + "loss": 0.4836, + "step": 41614 + }, + { + "epoch": 0.7190869505114736, + "grad_norm": 1.0150286453989061, + "learning_rate": 3.861139226224646e-06, + "loss": 0.4132, + "step": 41615 + }, + { + "epoch": 0.7191042300248826, + "grad_norm": 1.3099347738647436, + "learning_rate": 3.860697451501767e-06, + "loss": 0.2823, + "step": 41616 + }, + { + "epoch": 0.7191215095382913, + "grad_norm": 1.5410380257523002, + "learning_rate": 3.860255696007647e-06, + "loss": 0.3095, + "step": 41617 + }, + { + "epoch": 0.7191387890517003, + "grad_norm": 2.084213084759562, + "learning_rate": 3.859813959743675e-06, + "loss": 0.3949, + "step": 41618 + }, + { + "epoch": 0.7191560685651092, + "grad_norm": 0.9474128002870261, + "learning_rate": 3.8593722427112304e-06, + "loss": 0.3053, + "step": 41619 + }, + { + "epoch": 0.7191733480785181, + "grad_norm": 1.3666530618247184, + "learning_rate": 3.858930544911702e-06, + "loss": 0.3078, + "step": 41620 + }, + { + "epoch": 0.719190627591927, + "grad_norm": 1.5168556757502591, + "learning_rate": 3.858488866346466e-06, + "loss": 0.3624, + "step": 41621 + }, + { + "epoch": 0.7192079071053359, + "grad_norm": 1.0416603594545595, + "learning_rate": 3.8580472070169135e-06, + "loss": 0.4639, + "step": 41622 + }, + { + "epoch": 0.7192251866187448, + "grad_norm": 1.0344893548220484, + "learning_rate": 3.857605566924425e-06, + "loss": 0.2699, + "step": 41623 + }, + { + "epoch": 0.7192424661321537, + "grad_norm": 1.461806133401403, + "learning_rate": 3.8571639460703744e-06, + "loss": 0.3543, + "step": 41624 + }, + { + "epoch": 0.7192597456455626, + "grad_norm": 1.2274149988327296, + "learning_rate": 3.856722344456161e-06, + "loss": 0.4816, + "step": 41625 + }, + { + "epoch": 0.7192770251589715, + "grad_norm": 1.6688193891836371, + "learning_rate": 3.856280762083156e-06, + "loss": 0.2157, + "step": 41626 + }, + { + "epoch": 0.7192943046723804, + "grad_norm": 1.1991593834158913, + "learning_rate": 3.85583919895275e-06, + "loss": 0.3513, + "step": 41627 + }, + { + "epoch": 0.7193115841857893, + "grad_norm": 1.0899116734628385, + "learning_rate": 3.8553976550663185e-06, + "loss": 0.2631, + "step": 41628 + }, + { + "epoch": 0.7193288636991982, + "grad_norm": 1.2604404576901618, + "learning_rate": 3.854956130425254e-06, + "loss": 0.4228, + "step": 41629 + }, + { + "epoch": 0.7193461432126071, + "grad_norm": 1.0923716470926723, + "learning_rate": 3.854514625030925e-06, + "loss": 0.2317, + "step": 41630 + }, + { + "epoch": 0.719363422726016, + "grad_norm": 1.8464101973997757, + "learning_rate": 3.854073138884731e-06, + "loss": 0.4139, + "step": 41631 + }, + { + "epoch": 0.719380702239425, + "grad_norm": 1.4769527942534437, + "learning_rate": 3.853631671988047e-06, + "loss": 0.3595, + "step": 41632 + }, + { + "epoch": 0.7193979817528339, + "grad_norm": 1.0364187365446211, + "learning_rate": 3.853190224342253e-06, + "loss": 0.3792, + "step": 41633 + }, + { + "epoch": 0.7194152612662428, + "grad_norm": 1.20542488937167, + "learning_rate": 3.852748795948737e-06, + "loss": 0.2725, + "step": 41634 + }, + { + "epoch": 0.7194325407796517, + "grad_norm": 1.0807920461894172, + "learning_rate": 3.852307386808875e-06, + "loss": 0.2946, + "step": 41635 + }, + { + "epoch": 0.7194498202930606, + "grad_norm": 1.2379532383171605, + "learning_rate": 3.851865996924054e-06, + "loss": 0.2504, + "step": 41636 + }, + { + "epoch": 0.7194670998064695, + "grad_norm": 1.237008151974774, + "learning_rate": 3.8514246262956554e-06, + "loss": 0.5231, + "step": 41637 + }, + { + "epoch": 0.7194843793198783, + "grad_norm": 0.5781579594399352, + "learning_rate": 3.850983274925066e-06, + "loss": 0.5585, + "step": 41638 + }, + { + "epoch": 0.7195016588332872, + "grad_norm": 1.016569180090841, + "learning_rate": 3.850541942813661e-06, + "loss": 0.3231, + "step": 41639 + }, + { + "epoch": 0.7195189383466961, + "grad_norm": 1.146040906528909, + "learning_rate": 3.850100629962829e-06, + "loss": 0.3221, + "step": 41640 + }, + { + "epoch": 0.719536217860105, + "grad_norm": 1.0246868566356426, + "learning_rate": 3.849659336373947e-06, + "loss": 0.3388, + "step": 41641 + }, + { + "epoch": 0.7195534973735139, + "grad_norm": 1.1398456254465659, + "learning_rate": 3.849218062048398e-06, + "loss": 0.3395, + "step": 41642 + }, + { + "epoch": 0.7195707768869228, + "grad_norm": 2.4913728776954867, + "learning_rate": 3.8487768069875695e-06, + "loss": 0.329, + "step": 41643 + }, + { + "epoch": 0.7195880564003317, + "grad_norm": 0.9718361269833539, + "learning_rate": 3.848335571192836e-06, + "loss": 0.3519, + "step": 41644 + }, + { + "epoch": 0.7196053359137407, + "grad_norm": 1.3025213315998547, + "learning_rate": 3.8478943546655855e-06, + "loss": 0.5789, + "step": 41645 + }, + { + "epoch": 0.7196226154271496, + "grad_norm": 1.4563880590099383, + "learning_rate": 3.847453157407195e-06, + "loss": 0.4198, + "step": 41646 + }, + { + "epoch": 0.7196398949405585, + "grad_norm": 1.264496430871637, + "learning_rate": 3.8470119794190485e-06, + "loss": 0.4531, + "step": 41647 + }, + { + "epoch": 0.7196571744539674, + "grad_norm": 1.3972398745172694, + "learning_rate": 3.8465708207025285e-06, + "loss": 0.2463, + "step": 41648 + }, + { + "epoch": 0.7196744539673763, + "grad_norm": 1.4521675522500321, + "learning_rate": 3.84612968125902e-06, + "loss": 0.3378, + "step": 41649 + }, + { + "epoch": 0.7196917334807852, + "grad_norm": 1.5309474312287423, + "learning_rate": 3.845688561089898e-06, + "loss": 0.2974, + "step": 41650 + }, + { + "epoch": 0.7197090129941941, + "grad_norm": 1.1207901597886056, + "learning_rate": 3.845247460196549e-06, + "loss": 0.3855, + "step": 41651 + }, + { + "epoch": 0.719726292507603, + "grad_norm": 1.6807108768557513, + "learning_rate": 3.844806378580349e-06, + "loss": 0.2756, + "step": 41652 + }, + { + "epoch": 0.7197435720210119, + "grad_norm": 1.5807858289015964, + "learning_rate": 3.844365316242685e-06, + "loss": 0.3297, + "step": 41653 + }, + { + "epoch": 0.7197608515344208, + "grad_norm": 1.3412415730162688, + "learning_rate": 3.8439242731849395e-06, + "loss": 0.2695, + "step": 41654 + }, + { + "epoch": 0.7197781310478297, + "grad_norm": 1.225507250167177, + "learning_rate": 3.843483249408487e-06, + "loss": 0.2934, + "step": 41655 + }, + { + "epoch": 0.7197954105612386, + "grad_norm": 1.3320277895061359, + "learning_rate": 3.843042244914717e-06, + "loss": 0.1891, + "step": 41656 + }, + { + "epoch": 0.7198126900746475, + "grad_norm": 0.8590995459672764, + "learning_rate": 3.842601259705002e-06, + "loss": 0.4327, + "step": 41657 + }, + { + "epoch": 0.7198299695880565, + "grad_norm": 1.028734355559709, + "learning_rate": 3.842160293780729e-06, + "loss": 0.4702, + "step": 41658 + }, + { + "epoch": 0.7198472491014652, + "grad_norm": 0.9941016892272965, + "learning_rate": 3.8417193471432764e-06, + "loss": 0.3273, + "step": 41659 + }, + { + "epoch": 0.7198645286148742, + "grad_norm": 1.2755818462911697, + "learning_rate": 3.841278419794032e-06, + "loss": 0.3728, + "step": 41660 + }, + { + "epoch": 0.7198818081282831, + "grad_norm": 1.118325723009145, + "learning_rate": 3.840837511734367e-06, + "loss": 0.3846, + "step": 41661 + }, + { + "epoch": 0.719899087641692, + "grad_norm": 1.589653006949028, + "learning_rate": 3.840396622965672e-06, + "loss": 0.4578, + "step": 41662 + }, + { + "epoch": 0.7199163671551009, + "grad_norm": 0.9567321384407893, + "learning_rate": 3.839955753489322e-06, + "loss": 0.4285, + "step": 41663 + }, + { + "epoch": 0.7199336466685098, + "grad_norm": 1.2757041867075223, + "learning_rate": 3.839514903306692e-06, + "loss": 0.2155, + "step": 41664 + }, + { + "epoch": 0.7199509261819187, + "grad_norm": 3.0188569939498935, + "learning_rate": 3.839074072419176e-06, + "loss": 0.3406, + "step": 41665 + }, + { + "epoch": 0.7199682056953276, + "grad_norm": 1.937219012927582, + "learning_rate": 3.838633260828144e-06, + "loss": 0.286, + "step": 41666 + }, + { + "epoch": 0.7199854852087365, + "grad_norm": 1.7714241639277961, + "learning_rate": 3.838192468534986e-06, + "loss": 0.2948, + "step": 41667 + }, + { + "epoch": 0.7200027647221454, + "grad_norm": 0.9907128040175889, + "learning_rate": 3.8377516955410725e-06, + "loss": 0.2961, + "step": 41668 + }, + { + "epoch": 0.7200200442355543, + "grad_norm": 1.3930215747736123, + "learning_rate": 3.837310941847793e-06, + "loss": 0.498, + "step": 41669 + }, + { + "epoch": 0.7200373237489632, + "grad_norm": 0.8500939194455157, + "learning_rate": 3.836870207456518e-06, + "loss": 0.2825, + "step": 41670 + }, + { + "epoch": 0.7200546032623721, + "grad_norm": 1.306865895026679, + "learning_rate": 3.83642949236864e-06, + "loss": 0.3019, + "step": 41671 + }, + { + "epoch": 0.720071882775781, + "grad_norm": 1.1149688755192508, + "learning_rate": 3.835988796585529e-06, + "loss": 0.3008, + "step": 41672 + }, + { + "epoch": 0.72008916228919, + "grad_norm": 1.6437976340324263, + "learning_rate": 3.835548120108574e-06, + "loss": 0.3273, + "step": 41673 + }, + { + "epoch": 0.7201064418025989, + "grad_norm": 1.37797345768775, + "learning_rate": 3.83510746293915e-06, + "loss": 0.4568, + "step": 41674 + }, + { + "epoch": 0.7201237213160078, + "grad_norm": 1.2815227689087993, + "learning_rate": 3.834666825078635e-06, + "loss": 0.2928, + "step": 41675 + }, + { + "epoch": 0.7201410008294167, + "grad_norm": 0.9743677624154123, + "learning_rate": 3.834226206528413e-06, + "loss": 0.3425, + "step": 41676 + }, + { + "epoch": 0.7201582803428256, + "grad_norm": 0.6267642934038309, + "learning_rate": 3.833785607289862e-06, + "loss": 0.1677, + "step": 41677 + }, + { + "epoch": 0.7201755598562345, + "grad_norm": 0.5546813441143092, + "learning_rate": 3.833345027364367e-06, + "loss": 0.5822, + "step": 41678 + }, + { + "epoch": 0.7201928393696434, + "grad_norm": 1.175340891999018, + "learning_rate": 3.8329044667533e-06, + "loss": 0.4584, + "step": 41679 + }, + { + "epoch": 0.7202101188830523, + "grad_norm": 1.4409619823900692, + "learning_rate": 3.832463925458049e-06, + "loss": 0.2583, + "step": 41680 + }, + { + "epoch": 0.7202273983964611, + "grad_norm": 1.122623052841139, + "learning_rate": 3.832023403479985e-06, + "loss": 0.8358, + "step": 41681 + }, + { + "epoch": 0.72024467790987, + "grad_norm": 1.007626079577222, + "learning_rate": 3.831582900820494e-06, + "loss": 0.2949, + "step": 41682 + }, + { + "epoch": 0.7202619574232789, + "grad_norm": 1.1206580414779999, + "learning_rate": 3.831142417480954e-06, + "loss": 0.5017, + "step": 41683 + }, + { + "epoch": 0.7202792369366878, + "grad_norm": 1.207161604118611, + "learning_rate": 3.830701953462748e-06, + "loss": 0.5013, + "step": 41684 + }, + { + "epoch": 0.7202965164500967, + "grad_norm": 1.0557631054780359, + "learning_rate": 3.830261508767251e-06, + "loss": 0.3731, + "step": 41685 + }, + { + "epoch": 0.7203137959635056, + "grad_norm": 1.1341855538567112, + "learning_rate": 3.829821083395842e-06, + "loss": 0.4186, + "step": 41686 + }, + { + "epoch": 0.7203310754769146, + "grad_norm": 1.6919749046182773, + "learning_rate": 3.829380677349902e-06, + "loss": 0.2967, + "step": 41687 + }, + { + "epoch": 0.7203483549903235, + "grad_norm": 1.0293967794644021, + "learning_rate": 3.82894029063081e-06, + "loss": 0.524, + "step": 41688 + }, + { + "epoch": 0.7203656345037324, + "grad_norm": 1.4879656573100948, + "learning_rate": 3.82849992323995e-06, + "loss": 0.4401, + "step": 41689 + }, + { + "epoch": 0.7203829140171413, + "grad_norm": 0.9780813061747156, + "learning_rate": 3.828059575178693e-06, + "loss": 0.2859, + "step": 41690 + }, + { + "epoch": 0.7204001935305502, + "grad_norm": 1.7566013010606925, + "learning_rate": 3.827619246448427e-06, + "loss": 0.5102, + "step": 41691 + }, + { + "epoch": 0.7204174730439591, + "grad_norm": 1.7213872197773106, + "learning_rate": 3.827178937050523e-06, + "loss": 0.423, + "step": 41692 + }, + { + "epoch": 0.720434752557368, + "grad_norm": 1.5861384973945105, + "learning_rate": 3.826738646986363e-06, + "loss": 0.2941, + "step": 41693 + }, + { + "epoch": 0.7204520320707769, + "grad_norm": 1.4162689080351833, + "learning_rate": 3.826298376257331e-06, + "loss": 0.2131, + "step": 41694 + }, + { + "epoch": 0.7204693115841858, + "grad_norm": 1.479553716471629, + "learning_rate": 3.825858124864797e-06, + "loss": 0.2819, + "step": 41695 + }, + { + "epoch": 0.7204865910975947, + "grad_norm": 1.402051083618292, + "learning_rate": 3.82541789281015e-06, + "loss": 0.3966, + "step": 41696 + }, + { + "epoch": 0.7205038706110036, + "grad_norm": 1.2848660138177326, + "learning_rate": 3.824977680094758e-06, + "loss": 0.305, + "step": 41697 + }, + { + "epoch": 0.7205211501244125, + "grad_norm": 1.3300904040684944, + "learning_rate": 3.824537486720006e-06, + "loss": 0.3799, + "step": 41698 + }, + { + "epoch": 0.7205384296378214, + "grad_norm": 0.8780443811315344, + "learning_rate": 3.8240973126872715e-06, + "loss": 0.8592, + "step": 41699 + }, + { + "epoch": 0.7205557091512304, + "grad_norm": 1.133307690482304, + "learning_rate": 3.823657157997938e-06, + "loss": 0.369, + "step": 41700 + }, + { + "epoch": 0.7205729886646393, + "grad_norm": 1.5464936339383932, + "learning_rate": 3.823217022653376e-06, + "loss": 0.3635, + "step": 41701 + }, + { + "epoch": 0.720590268178048, + "grad_norm": 1.4343192515351952, + "learning_rate": 3.82277690665497e-06, + "loss": 0.3319, + "step": 41702 + }, + { + "epoch": 0.720607547691457, + "grad_norm": 1.1392945160242656, + "learning_rate": 3.822336810004094e-06, + "loss": 0.6397, + "step": 41703 + }, + { + "epoch": 0.7206248272048659, + "grad_norm": 1.2369318224008103, + "learning_rate": 3.821896732702129e-06, + "loss": 0.4044, + "step": 41704 + }, + { + "epoch": 0.7206421067182748, + "grad_norm": 1.487708426324011, + "learning_rate": 3.821456674750454e-06, + "loss": 0.3425, + "step": 41705 + }, + { + "epoch": 0.7206593862316837, + "grad_norm": 0.8424408877844298, + "learning_rate": 3.821016636150444e-06, + "loss": 0.4474, + "step": 41706 + }, + { + "epoch": 0.7206766657450926, + "grad_norm": 1.3361564238083783, + "learning_rate": 3.8205766169034845e-06, + "loss": 0.2938, + "step": 41707 + }, + { + "epoch": 0.7206939452585015, + "grad_norm": 1.1802286250191838, + "learning_rate": 3.820136617010943e-06, + "loss": 0.4285, + "step": 41708 + }, + { + "epoch": 0.7207112247719104, + "grad_norm": 1.3201496961070043, + "learning_rate": 3.819696636474208e-06, + "loss": 0.3525, + "step": 41709 + }, + { + "epoch": 0.7207285042853193, + "grad_norm": 1.2972406926114675, + "learning_rate": 3.819256675294646e-06, + "loss": 0.2388, + "step": 41710 + }, + { + "epoch": 0.7207457837987282, + "grad_norm": 1.0216087487715242, + "learning_rate": 3.818816733473648e-06, + "loss": 0.2074, + "step": 41711 + }, + { + "epoch": 0.7207630633121371, + "grad_norm": 1.27259399624775, + "learning_rate": 3.818376811012582e-06, + "loss": 0.4655, + "step": 41712 + }, + { + "epoch": 0.720780342825546, + "grad_norm": 1.0896414233250284, + "learning_rate": 3.8179369079128335e-06, + "loss": 0.2381, + "step": 41713 + }, + { + "epoch": 0.720797622338955, + "grad_norm": 1.6960779892954578, + "learning_rate": 3.817497024175773e-06, + "loss": 0.3421, + "step": 41714 + }, + { + "epoch": 0.7208149018523639, + "grad_norm": 1.2615154162254967, + "learning_rate": 3.817057159802786e-06, + "loss": 0.4015, + "step": 41715 + }, + { + "epoch": 0.7208321813657728, + "grad_norm": 0.9138300850885616, + "learning_rate": 3.816617314795241e-06, + "loss": 0.4904, + "step": 41716 + }, + { + "epoch": 0.7208494608791817, + "grad_norm": 1.1098635886932564, + "learning_rate": 3.816177489154521e-06, + "loss": 0.3763, + "step": 41717 + }, + { + "epoch": 0.7208667403925906, + "grad_norm": 2.1618102493516114, + "learning_rate": 3.815737682882007e-06, + "loss": 0.4419, + "step": 41718 + }, + { + "epoch": 0.7208840199059995, + "grad_norm": 1.2109656435950837, + "learning_rate": 3.815297895979069e-06, + "loss": 0.2504, + "step": 41719 + }, + { + "epoch": 0.7209012994194084, + "grad_norm": 1.9584490169839779, + "learning_rate": 3.8148581284470905e-06, + "loss": 0.2934, + "step": 41720 + }, + { + "epoch": 0.7209185789328173, + "grad_norm": 1.068963033122371, + "learning_rate": 3.8144183802874445e-06, + "loss": 0.3634, + "step": 41721 + }, + { + "epoch": 0.7209358584462262, + "grad_norm": 1.248481977854726, + "learning_rate": 3.8139786515015087e-06, + "loss": 0.3668, + "step": 41722 + }, + { + "epoch": 0.720953137959635, + "grad_norm": 1.595156291062335, + "learning_rate": 3.813538942090663e-06, + "loss": 0.4572, + "step": 41723 + }, + { + "epoch": 0.7209704174730439, + "grad_norm": 1.962044402909642, + "learning_rate": 3.813099252056286e-06, + "loss": 0.3801, + "step": 41724 + }, + { + "epoch": 0.7209876969864528, + "grad_norm": 1.1671846693421604, + "learning_rate": 3.8126595813997536e-06, + "loss": 0.4926, + "step": 41725 + }, + { + "epoch": 0.7210049764998617, + "grad_norm": 1.5999099351963713, + "learning_rate": 3.812219930122437e-06, + "loss": 0.2423, + "step": 41726 + }, + { + "epoch": 0.7210222560132706, + "grad_norm": 0.9580078423644689, + "learning_rate": 3.8117802982257177e-06, + "loss": 0.2864, + "step": 41727 + }, + { + "epoch": 0.7210395355266795, + "grad_norm": 1.1196052456206682, + "learning_rate": 3.8113406857109735e-06, + "loss": 0.3945, + "step": 41728 + }, + { + "epoch": 0.7210568150400885, + "grad_norm": 1.173311377406474, + "learning_rate": 3.8109010925795844e-06, + "loss": 0.3342, + "step": 41729 + }, + { + "epoch": 0.7210740945534974, + "grad_norm": 1.2156789107001686, + "learning_rate": 3.81046151883292e-06, + "loss": 0.3221, + "step": 41730 + }, + { + "epoch": 0.7210913740669063, + "grad_norm": 0.9731693850180836, + "learning_rate": 3.8100219644723623e-06, + "loss": 0.4227, + "step": 41731 + }, + { + "epoch": 0.7211086535803152, + "grad_norm": 1.4548224884895213, + "learning_rate": 3.8095824294992844e-06, + "loss": 0.2392, + "step": 41732 + }, + { + "epoch": 0.7211259330937241, + "grad_norm": 1.4153581022194694, + "learning_rate": 3.809142913915064e-06, + "loss": 0.1843, + "step": 41733 + }, + { + "epoch": 0.721143212607133, + "grad_norm": 1.1799998173972237, + "learning_rate": 3.8087034177210792e-06, + "loss": 0.212, + "step": 41734 + }, + { + "epoch": 0.7211604921205419, + "grad_norm": 1.7694124715154833, + "learning_rate": 3.808263940918708e-06, + "loss": 0.4769, + "step": 41735 + }, + { + "epoch": 0.7211777716339508, + "grad_norm": 1.150178473523859, + "learning_rate": 3.8078244835093258e-06, + "loss": 0.4076, + "step": 41736 + }, + { + "epoch": 0.7211950511473597, + "grad_norm": 0.7161012358132008, + "learning_rate": 3.807385045494304e-06, + "loss": 0.8254, + "step": 41737 + }, + { + "epoch": 0.7212123306607686, + "grad_norm": 1.1885582544820825, + "learning_rate": 3.8069456268750236e-06, + "loss": 0.5155, + "step": 41738 + }, + { + "epoch": 0.7212296101741775, + "grad_norm": 0.7167863534710925, + "learning_rate": 3.8065062276528597e-06, + "loss": 0.4974, + "step": 41739 + }, + { + "epoch": 0.7212468896875864, + "grad_norm": 1.2335805486714544, + "learning_rate": 3.806066847829192e-06, + "loss": 0.4032, + "step": 41740 + }, + { + "epoch": 0.7212641692009953, + "grad_norm": 0.8047402629449963, + "learning_rate": 3.80562748740539e-06, + "loss": 0.4789, + "step": 41741 + }, + { + "epoch": 0.7212814487144042, + "grad_norm": 2.0097576800809485, + "learning_rate": 3.8051881463828377e-06, + "loss": 0.2784, + "step": 41742 + }, + { + "epoch": 0.7212987282278132, + "grad_norm": 1.629473230591342, + "learning_rate": 3.8047488247629038e-06, + "loss": 0.4218, + "step": 41743 + }, + { + "epoch": 0.721316007741222, + "grad_norm": 1.1236691792117173, + "learning_rate": 3.804309522546966e-06, + "loss": 0.433, + "step": 41744 + }, + { + "epoch": 0.7213332872546309, + "grad_norm": 1.5152490454089205, + "learning_rate": 3.8038702397364027e-06, + "loss": 0.4687, + "step": 41745 + }, + { + "epoch": 0.7213505667680398, + "grad_norm": 0.7456961277757861, + "learning_rate": 3.8034309763325917e-06, + "loss": 0.2185, + "step": 41746 + }, + { + "epoch": 0.7213678462814487, + "grad_norm": 1.3975464246640097, + "learning_rate": 3.802991732336906e-06, + "loss": 0.551, + "step": 41747 + }, + { + "epoch": 0.7213851257948576, + "grad_norm": 0.933065043746205, + "learning_rate": 3.8025525077507177e-06, + "loss": 0.6095, + "step": 41748 + }, + { + "epoch": 0.7214024053082665, + "grad_norm": 1.5228354652181026, + "learning_rate": 3.80211330257541e-06, + "loss": 0.412, + "step": 41749 + }, + { + "epoch": 0.7214196848216754, + "grad_norm": 1.2551775962094531, + "learning_rate": 3.801674116812346e-06, + "loss": 0.4141, + "step": 41750 + }, + { + "epoch": 0.7214369643350843, + "grad_norm": 1.0791820201423223, + "learning_rate": 3.8012349504629175e-06, + "loss": 0.3206, + "step": 41751 + }, + { + "epoch": 0.7214542438484932, + "grad_norm": 1.220519436641559, + "learning_rate": 3.80079580352849e-06, + "loss": 0.4175, + "step": 41752 + }, + { + "epoch": 0.7214715233619021, + "grad_norm": 1.3261576231778993, + "learning_rate": 3.8003566760104427e-06, + "loss": 0.3954, + "step": 41753 + }, + { + "epoch": 0.721488802875311, + "grad_norm": 2.0239362362196647, + "learning_rate": 3.7999175679101463e-06, + "loss": 0.3766, + "step": 41754 + }, + { + "epoch": 0.7215060823887199, + "grad_norm": 1.254788505613592, + "learning_rate": 3.799478479228984e-06, + "loss": 0.4402, + "step": 41755 + }, + { + "epoch": 0.7215233619021288, + "grad_norm": 1.1662235180172225, + "learning_rate": 3.7990394099683214e-06, + "loss": 0.3115, + "step": 41756 + }, + { + "epoch": 0.7215406414155378, + "grad_norm": 1.0765948965807193, + "learning_rate": 3.7986003601295397e-06, + "loss": 0.407, + "step": 41757 + }, + { + "epoch": 0.7215579209289467, + "grad_norm": 1.573076181355162, + "learning_rate": 3.7981613297140163e-06, + "loss": 0.418, + "step": 41758 + }, + { + "epoch": 0.7215752004423556, + "grad_norm": 0.7536072595695188, + "learning_rate": 3.7977223187231183e-06, + "loss": 0.3249, + "step": 41759 + }, + { + "epoch": 0.7215924799557645, + "grad_norm": 1.602795148935596, + "learning_rate": 3.79728332715823e-06, + "loss": 0.4157, + "step": 41760 + }, + { + "epoch": 0.7216097594691734, + "grad_norm": 1.0081264954918965, + "learning_rate": 3.796844355020718e-06, + "loss": 0.3941, + "step": 41761 + }, + { + "epoch": 0.7216270389825823, + "grad_norm": 1.3085171234688007, + "learning_rate": 3.7964054023119602e-06, + "loss": 0.2792, + "step": 41762 + }, + { + "epoch": 0.7216443184959912, + "grad_norm": 1.1275176482480496, + "learning_rate": 3.7959664690333333e-06, + "loss": 0.4522, + "step": 41763 + }, + { + "epoch": 0.7216615980094001, + "grad_norm": 1.944613551467809, + "learning_rate": 3.7955275551862134e-06, + "loss": 0.5079, + "step": 41764 + }, + { + "epoch": 0.7216788775228089, + "grad_norm": 1.818185430883209, + "learning_rate": 3.7950886607719696e-06, + "loss": 0.4884, + "step": 41765 + }, + { + "epoch": 0.7216961570362178, + "grad_norm": 1.0018722408631566, + "learning_rate": 3.794649785791984e-06, + "loss": 0.4636, + "step": 41766 + }, + { + "epoch": 0.7217134365496267, + "grad_norm": 0.9934206062070272, + "learning_rate": 3.7942109302476225e-06, + "loss": 0.2825, + "step": 41767 + }, + { + "epoch": 0.7217307160630356, + "grad_norm": 1.3248421403674242, + "learning_rate": 3.7937720941402634e-06, + "loss": 0.2329, + "step": 41768 + }, + { + "epoch": 0.7217479955764445, + "grad_norm": 1.5576601435812774, + "learning_rate": 3.7933332774712862e-06, + "loss": 0.4145, + "step": 41769 + }, + { + "epoch": 0.7217652750898534, + "grad_norm": 0.9234267930223309, + "learning_rate": 3.7928944802420575e-06, + "loss": 0.3946, + "step": 41770 + }, + { + "epoch": 0.7217825546032623, + "grad_norm": 1.873883545286497, + "learning_rate": 3.7924557024539577e-06, + "loss": 0.4628, + "step": 41771 + }, + { + "epoch": 0.7217998341166713, + "grad_norm": 0.982487737620928, + "learning_rate": 3.7920169441083553e-06, + "loss": 0.3984, + "step": 41772 + }, + { + "epoch": 0.7218171136300802, + "grad_norm": 1.244003087150789, + "learning_rate": 3.7915782052066276e-06, + "loss": 0.4894, + "step": 41773 + }, + { + "epoch": 0.7218343931434891, + "grad_norm": 0.8656595666362026, + "learning_rate": 3.7911394857501493e-06, + "loss": 0.3188, + "step": 41774 + }, + { + "epoch": 0.721851672656898, + "grad_norm": 1.1005494473918487, + "learning_rate": 3.790700785740297e-06, + "loss": 0.3962, + "step": 41775 + }, + { + "epoch": 0.7218689521703069, + "grad_norm": 1.242360486131354, + "learning_rate": 3.790262105178438e-06, + "loss": 0.5131, + "step": 41776 + }, + { + "epoch": 0.7218862316837158, + "grad_norm": 1.1871422999620347, + "learning_rate": 3.789823444065953e-06, + "loss": 0.3568, + "step": 41777 + }, + { + "epoch": 0.7219035111971247, + "grad_norm": 0.9085231253116804, + "learning_rate": 3.7893848024042103e-06, + "loss": 0.3254, + "step": 41778 + }, + { + "epoch": 0.7219207907105336, + "grad_norm": 1.260214078787622, + "learning_rate": 3.788946180194587e-06, + "loss": 0.3426, + "step": 41779 + }, + { + "epoch": 0.7219380702239425, + "grad_norm": 1.4486474608114774, + "learning_rate": 3.7885075774384582e-06, + "loss": 0.3802, + "step": 41780 + }, + { + "epoch": 0.7219553497373514, + "grad_norm": 2.0068288077505163, + "learning_rate": 3.788068994137194e-06, + "loss": 0.421, + "step": 41781 + }, + { + "epoch": 0.7219726292507603, + "grad_norm": 1.0388775586881884, + "learning_rate": 3.7876304302921717e-06, + "loss": 0.233, + "step": 41782 + }, + { + "epoch": 0.7219899087641692, + "grad_norm": 1.0847119308237951, + "learning_rate": 3.787191885904761e-06, + "loss": 0.3549, + "step": 41783 + }, + { + "epoch": 0.7220071882775781, + "grad_norm": 1.427344165832654, + "learning_rate": 3.7867533609763365e-06, + "loss": 0.2918, + "step": 41784 + }, + { + "epoch": 0.7220244677909871, + "grad_norm": 0.9078145913937227, + "learning_rate": 3.7863148555082717e-06, + "loss": 0.649, + "step": 41785 + }, + { + "epoch": 0.7220417473043959, + "grad_norm": 1.2259965214642692, + "learning_rate": 3.7858763695019464e-06, + "loss": 0.5138, + "step": 41786 + }, + { + "epoch": 0.7220590268178048, + "grad_norm": 1.2570011180938974, + "learning_rate": 3.7854379029587272e-06, + "loss": 0.3408, + "step": 41787 + }, + { + "epoch": 0.7220763063312137, + "grad_norm": 0.9911723106264115, + "learning_rate": 3.7849994558799863e-06, + "loss": 0.3467, + "step": 41788 + }, + { + "epoch": 0.7220935858446226, + "grad_norm": 1.7721093669846428, + "learning_rate": 3.784561028267102e-06, + "loss": 0.4644, + "step": 41789 + }, + { + "epoch": 0.7221108653580315, + "grad_norm": 1.7233665033505192, + "learning_rate": 3.784122620121439e-06, + "loss": 0.4797, + "step": 41790 + }, + { + "epoch": 0.7221281448714404, + "grad_norm": 1.290780837432591, + "learning_rate": 3.7836842314443822e-06, + "loss": 0.3105, + "step": 41791 + }, + { + "epoch": 0.7221454243848493, + "grad_norm": 1.4462716240024833, + "learning_rate": 3.783245862237296e-06, + "loss": 0.3602, + "step": 41792 + }, + { + "epoch": 0.7221627038982582, + "grad_norm": 1.2383930693851826, + "learning_rate": 3.7828075125015597e-06, + "loss": 0.4182, + "step": 41793 + }, + { + "epoch": 0.7221799834116671, + "grad_norm": 0.7452195989708281, + "learning_rate": 3.782369182238539e-06, + "loss": 0.2821, + "step": 41794 + }, + { + "epoch": 0.722197262925076, + "grad_norm": 0.7821138190714131, + "learning_rate": 3.781930871449614e-06, + "loss": 0.2895, + "step": 41795 + }, + { + "epoch": 0.7222145424384849, + "grad_norm": 1.0754331338104652, + "learning_rate": 3.781492580136148e-06, + "loss": 0.381, + "step": 41796 + }, + { + "epoch": 0.7222318219518938, + "grad_norm": 1.3228940226121246, + "learning_rate": 3.7810543082995256e-06, + "loss": 0.3472, + "step": 41797 + }, + { + "epoch": 0.7222491014653027, + "grad_norm": 1.174865471363786, + "learning_rate": 3.7806160559411143e-06, + "loss": 0.4948, + "step": 41798 + }, + { + "epoch": 0.7222663809787117, + "grad_norm": 1.2820325729058595, + "learning_rate": 3.7801778230622823e-06, + "loss": 0.3607, + "step": 41799 + }, + { + "epoch": 0.7222836604921206, + "grad_norm": 0.8637182781883211, + "learning_rate": 3.77973960966441e-06, + "loss": 0.3308, + "step": 41800 + }, + { + "epoch": 0.7223009400055295, + "grad_norm": 1.1166158470146497, + "learning_rate": 3.7793014157488615e-06, + "loss": 0.4158, + "step": 41801 + }, + { + "epoch": 0.7223182195189384, + "grad_norm": 1.1917853733870007, + "learning_rate": 3.7788632413170155e-06, + "loss": 0.3128, + "step": 41802 + }, + { + "epoch": 0.7223354990323473, + "grad_norm": 0.9535234204367197, + "learning_rate": 3.778425086370241e-06, + "loss": 0.3527, + "step": 41803 + }, + { + "epoch": 0.7223527785457562, + "grad_norm": 1.3144050254395612, + "learning_rate": 3.7779869509099166e-06, + "loss": 0.4594, + "step": 41804 + }, + { + "epoch": 0.7223700580591651, + "grad_norm": 0.786994522216071, + "learning_rate": 3.7775488349374056e-06, + "loss": 0.3144, + "step": 41805 + }, + { + "epoch": 0.722387337572574, + "grad_norm": 1.252336432998876, + "learning_rate": 3.7771107384540884e-06, + "loss": 0.2622, + "step": 41806 + }, + { + "epoch": 0.7224046170859829, + "grad_norm": 1.7636530663431162, + "learning_rate": 3.7766726614613293e-06, + "loss": 0.2593, + "step": 41807 + }, + { + "epoch": 0.7224218965993917, + "grad_norm": 0.9686884463264029, + "learning_rate": 3.7762346039605047e-06, + "loss": 0.2123, + "step": 41808 + }, + { + "epoch": 0.7224391761128006, + "grad_norm": 1.4010801577810874, + "learning_rate": 3.7757965659529905e-06, + "loss": 0.3181, + "step": 41809 + }, + { + "epoch": 0.7224564556262095, + "grad_norm": 1.432433479007988, + "learning_rate": 3.77535854744015e-06, + "loss": 0.3597, + "step": 41810 + }, + { + "epoch": 0.7224737351396184, + "grad_norm": 1.0288081491918504, + "learning_rate": 3.774920548423364e-06, + "loss": 0.5635, + "step": 41811 + }, + { + "epoch": 0.7224910146530273, + "grad_norm": 1.218677028321571, + "learning_rate": 3.7744825689039954e-06, + "loss": 0.2788, + "step": 41812 + }, + { + "epoch": 0.7225082941664362, + "grad_norm": 1.093167878642191, + "learning_rate": 3.7740446088834213e-06, + "loss": 0.312, + "step": 41813 + }, + { + "epoch": 0.7225255736798452, + "grad_norm": 1.4204047865336606, + "learning_rate": 3.7736066683630123e-06, + "loss": 0.4099, + "step": 41814 + }, + { + "epoch": 0.7225428531932541, + "grad_norm": 1.146490291193564, + "learning_rate": 3.7731687473441435e-06, + "loss": 0.3221, + "step": 41815 + }, + { + "epoch": 0.722560132706663, + "grad_norm": 1.2470721148506025, + "learning_rate": 3.772730845828181e-06, + "loss": 0.3898, + "step": 41816 + }, + { + "epoch": 0.7225774122200719, + "grad_norm": 1.5921499572896693, + "learning_rate": 3.772292963816502e-06, + "loss": 0.3464, + "step": 41817 + }, + { + "epoch": 0.7225946917334808, + "grad_norm": 1.0143509877405337, + "learning_rate": 3.7718551013104697e-06, + "loss": 0.1904, + "step": 41818 + }, + { + "epoch": 0.7226119712468897, + "grad_norm": 1.0087029656670403, + "learning_rate": 3.7714172583114626e-06, + "loss": 0.2822, + "step": 41819 + }, + { + "epoch": 0.7226292507602986, + "grad_norm": 1.1118788368656725, + "learning_rate": 3.7709794348208517e-06, + "loss": 0.3929, + "step": 41820 + }, + { + "epoch": 0.7226465302737075, + "grad_norm": 1.1739164721552948, + "learning_rate": 3.7705416308400044e-06, + "loss": 0.4835, + "step": 41821 + }, + { + "epoch": 0.7226638097871164, + "grad_norm": 1.055427196874873, + "learning_rate": 3.7701038463702977e-06, + "loss": 0.2952, + "step": 41822 + }, + { + "epoch": 0.7226810893005253, + "grad_norm": 1.269301262433397, + "learning_rate": 3.7696660814130948e-06, + "loss": 0.3347, + "step": 41823 + }, + { + "epoch": 0.7226983688139342, + "grad_norm": 1.0380727314022773, + "learning_rate": 3.7692283359697712e-06, + "loss": 0.8526, + "step": 41824 + }, + { + "epoch": 0.7227156483273431, + "grad_norm": 1.1288542435318272, + "learning_rate": 3.7687906100416993e-06, + "loss": 0.3201, + "step": 41825 + }, + { + "epoch": 0.722732927840752, + "grad_norm": 1.0786376545733074, + "learning_rate": 3.768352903630251e-06, + "loss": 0.2867, + "step": 41826 + }, + { + "epoch": 0.722750207354161, + "grad_norm": 1.209793957424393, + "learning_rate": 3.7679152167367917e-06, + "loss": 0.3995, + "step": 41827 + }, + { + "epoch": 0.7227674868675699, + "grad_norm": 1.0984372512550868, + "learning_rate": 3.767477549362699e-06, + "loss": 0.3424, + "step": 41828 + }, + { + "epoch": 0.7227847663809787, + "grad_norm": 0.6180514201777814, + "learning_rate": 3.7670399015093407e-06, + "loss": 0.8616, + "step": 41829 + }, + { + "epoch": 0.7228020458943876, + "grad_norm": 1.7569998088852667, + "learning_rate": 3.7666022731780804e-06, + "loss": 0.3442, + "step": 41830 + }, + { + "epoch": 0.7228193254077965, + "grad_norm": 1.059066384773399, + "learning_rate": 3.7661646643703022e-06, + "loss": 0.4432, + "step": 41831 + }, + { + "epoch": 0.7228366049212054, + "grad_norm": 2.8877340422988036, + "learning_rate": 3.7657270750873666e-06, + "loss": 0.5083, + "step": 41832 + }, + { + "epoch": 0.7228538844346143, + "grad_norm": 1.2131683336051928, + "learning_rate": 3.7652895053306504e-06, + "loss": 0.4182, + "step": 41833 + }, + { + "epoch": 0.7228711639480232, + "grad_norm": 1.5424295207346235, + "learning_rate": 3.7648519551015182e-06, + "loss": 0.4032, + "step": 41834 + }, + { + "epoch": 0.7228884434614321, + "grad_norm": 1.0271250465156574, + "learning_rate": 3.764414424401347e-06, + "loss": 0.2715, + "step": 41835 + }, + { + "epoch": 0.722905722974841, + "grad_norm": 1.7519244509676961, + "learning_rate": 3.763976913231496e-06, + "loss": 0.3271, + "step": 41836 + }, + { + "epoch": 0.7229230024882499, + "grad_norm": 1.4685802519785949, + "learning_rate": 3.763539421593352e-06, + "loss": 0.3422, + "step": 41837 + }, + { + "epoch": 0.7229402820016588, + "grad_norm": 1.4445867894032516, + "learning_rate": 3.763101949488275e-06, + "loss": 0.2925, + "step": 41838 + }, + { + "epoch": 0.7229575615150677, + "grad_norm": 1.1006179465197772, + "learning_rate": 3.762664496917634e-06, + "loss": 0.2703, + "step": 41839 + }, + { + "epoch": 0.7229748410284766, + "grad_norm": 1.5935460598020725, + "learning_rate": 3.762227063882805e-06, + "loss": 0.5052, + "step": 41840 + }, + { + "epoch": 0.7229921205418856, + "grad_norm": 1.1845673911035057, + "learning_rate": 3.7617896503851515e-06, + "loss": 0.3943, + "step": 41841 + }, + { + "epoch": 0.7230094000552945, + "grad_norm": 1.1545906469262772, + "learning_rate": 3.7613522564260463e-06, + "loss": 0.1925, + "step": 41842 + }, + { + "epoch": 0.7230266795687034, + "grad_norm": 1.5937970175760623, + "learning_rate": 3.760914882006861e-06, + "loss": 0.3678, + "step": 41843 + }, + { + "epoch": 0.7230439590821123, + "grad_norm": 1.1320425045512867, + "learning_rate": 3.7604775271289673e-06, + "loss": 0.3622, + "step": 41844 + }, + { + "epoch": 0.7230612385955212, + "grad_norm": 1.225824890658512, + "learning_rate": 3.760040191793728e-06, + "loss": 0.4148, + "step": 41845 + }, + { + "epoch": 0.7230785181089301, + "grad_norm": 1.1871169618707917, + "learning_rate": 3.7596028760025214e-06, + "loss": 0.24, + "step": 41846 + }, + { + "epoch": 0.723095797622339, + "grad_norm": 1.0441883673317112, + "learning_rate": 3.7591655797567093e-06, + "loss": 0.4852, + "step": 41847 + }, + { + "epoch": 0.7231130771357479, + "grad_norm": 2.9967569902365794, + "learning_rate": 3.7587283030576647e-06, + "loss": 0.29, + "step": 41848 + }, + { + "epoch": 0.7231303566491568, + "grad_norm": 1.906804941638289, + "learning_rate": 3.758291045906761e-06, + "loss": 0.3713, + "step": 41849 + }, + { + "epoch": 0.7231476361625656, + "grad_norm": 1.249476219864988, + "learning_rate": 3.7578538083053607e-06, + "loss": 0.3531, + "step": 41850 + }, + { + "epoch": 0.7231649156759745, + "grad_norm": 1.4158054127105681, + "learning_rate": 3.7574165902548397e-06, + "loss": 0.2777, + "step": 41851 + }, + { + "epoch": 0.7231821951893834, + "grad_norm": 1.4724606234632214, + "learning_rate": 3.756979391756561e-06, + "loss": 0.3395, + "step": 41852 + }, + { + "epoch": 0.7231994747027923, + "grad_norm": 1.1683533105039583, + "learning_rate": 3.7565422128118978e-06, + "loss": 0.4318, + "step": 41853 + }, + { + "epoch": 0.7232167542162012, + "grad_norm": 1.407255625207081, + "learning_rate": 3.7561050534222187e-06, + "loss": 0.197, + "step": 41854 + }, + { + "epoch": 0.7232340337296101, + "grad_norm": 1.0390202663238401, + "learning_rate": 3.755667913588896e-06, + "loss": 0.2507, + "step": 41855 + }, + { + "epoch": 0.7232513132430191, + "grad_norm": 1.4208067842025118, + "learning_rate": 3.7552307933132936e-06, + "loss": 0.2289, + "step": 41856 + }, + { + "epoch": 0.723268592756428, + "grad_norm": 1.1129891111413324, + "learning_rate": 3.7547936925967856e-06, + "loss": 0.8544, + "step": 41857 + }, + { + "epoch": 0.7232858722698369, + "grad_norm": 1.068908462215901, + "learning_rate": 3.754356611440735e-06, + "loss": 0.4008, + "step": 41858 + }, + { + "epoch": 0.7233031517832458, + "grad_norm": 1.1893936007273418, + "learning_rate": 3.7539195498465144e-06, + "loss": 0.3754, + "step": 41859 + }, + { + "epoch": 0.7233204312966547, + "grad_norm": 1.4429803742891094, + "learning_rate": 3.7534825078154957e-06, + "loss": 0.3897, + "step": 41860 + }, + { + "epoch": 0.7233377108100636, + "grad_norm": 3.137979366143866, + "learning_rate": 3.7530454853490407e-06, + "loss": 0.5064, + "step": 41861 + }, + { + "epoch": 0.7233549903234725, + "grad_norm": 0.9828268225024822, + "learning_rate": 3.7526084824485263e-06, + "loss": 0.3291, + "step": 41862 + }, + { + "epoch": 0.7233722698368814, + "grad_norm": 1.4878121259403316, + "learning_rate": 3.7521714991153124e-06, + "loss": 0.3243, + "step": 41863 + }, + { + "epoch": 0.7233895493502903, + "grad_norm": 1.1885488759635674, + "learning_rate": 3.751734535350773e-06, + "loss": 0.3551, + "step": 41864 + }, + { + "epoch": 0.7234068288636992, + "grad_norm": 0.8809351655013832, + "learning_rate": 3.7512975911562753e-06, + "loss": 0.3016, + "step": 41865 + }, + { + "epoch": 0.7234241083771081, + "grad_norm": 1.1066157210250493, + "learning_rate": 3.7508606665331926e-06, + "loss": 0.2877, + "step": 41866 + }, + { + "epoch": 0.723441387890517, + "grad_norm": 1.66955737081542, + "learning_rate": 3.7504237614828853e-06, + "loss": 0.2954, + "step": 41867 + }, + { + "epoch": 0.723458667403926, + "grad_norm": 1.6671032338656657, + "learning_rate": 3.749986876006729e-06, + "loss": 0.3929, + "step": 41868 + }, + { + "epoch": 0.7234759469173349, + "grad_norm": 1.145761950531511, + "learning_rate": 3.749550010106089e-06, + "loss": 0.4148, + "step": 41869 + }, + { + "epoch": 0.7234932264307438, + "grad_norm": 1.3076693271751432, + "learning_rate": 3.7491131637823262e-06, + "loss": 0.4092, + "step": 41870 + }, + { + "epoch": 0.7235105059441526, + "grad_norm": 1.887553151727266, + "learning_rate": 3.748676337036824e-06, + "loss": 0.2544, + "step": 41871 + }, + { + "epoch": 0.7235277854575615, + "grad_norm": 1.4297620860412557, + "learning_rate": 3.7482395298709384e-06, + "loss": 0.2205, + "step": 41872 + }, + { + "epoch": 0.7235450649709704, + "grad_norm": 1.3339899819898626, + "learning_rate": 3.747802742286045e-06, + "loss": 0.5408, + "step": 41873 + }, + { + "epoch": 0.7235623444843793, + "grad_norm": 1.3263108278309357, + "learning_rate": 3.7473659742835057e-06, + "loss": 0.466, + "step": 41874 + }, + { + "epoch": 0.7235796239977882, + "grad_norm": 1.3149772141758527, + "learning_rate": 3.7469292258646937e-06, + "loss": 0.4258, + "step": 41875 + }, + { + "epoch": 0.7235969035111971, + "grad_norm": 1.6894533512422358, + "learning_rate": 3.746492497030969e-06, + "loss": 0.3862, + "step": 41876 + }, + { + "epoch": 0.723614183024606, + "grad_norm": 1.1427454219084614, + "learning_rate": 3.746055787783711e-06, + "loss": 0.3353, + "step": 41877 + }, + { + "epoch": 0.7236314625380149, + "grad_norm": 1.1597617508248792, + "learning_rate": 3.7456190981242777e-06, + "loss": 0.159, + "step": 41878 + }, + { + "epoch": 0.7236487420514238, + "grad_norm": 1.1741093424291216, + "learning_rate": 3.745182428054045e-06, + "loss": 0.2561, + "step": 41879 + }, + { + "epoch": 0.7236660215648327, + "grad_norm": 1.4385288929967093, + "learning_rate": 3.7447457775743767e-06, + "loss": 0.2149, + "step": 41880 + }, + { + "epoch": 0.7236833010782416, + "grad_norm": 2.0855253807976, + "learning_rate": 3.7443091466866356e-06, + "loss": 0.225, + "step": 41881 + }, + { + "epoch": 0.7237005805916505, + "grad_norm": 0.8949298827148686, + "learning_rate": 3.7438725353921947e-06, + "loss": 0.379, + "step": 41882 + }, + { + "epoch": 0.7237178601050595, + "grad_norm": 2.2882485293987878, + "learning_rate": 3.7434359436924194e-06, + "loss": 0.4812, + "step": 41883 + }, + { + "epoch": 0.7237351396184684, + "grad_norm": 1.3644202055249421, + "learning_rate": 3.7429993715886815e-06, + "loss": 0.367, + "step": 41884 + }, + { + "epoch": 0.7237524191318773, + "grad_norm": 1.2706511658016353, + "learning_rate": 3.7425628190823425e-06, + "loss": 0.2564, + "step": 41885 + }, + { + "epoch": 0.7237696986452862, + "grad_norm": 0.8073705956080927, + "learning_rate": 3.7421262861747755e-06, + "loss": 0.2918, + "step": 41886 + }, + { + "epoch": 0.7237869781586951, + "grad_norm": 1.6563878998260977, + "learning_rate": 3.741689772867341e-06, + "loss": 0.4987, + "step": 41887 + }, + { + "epoch": 0.723804257672104, + "grad_norm": 0.6317551818310089, + "learning_rate": 3.74125327916141e-06, + "loss": 0.3004, + "step": 41888 + }, + { + "epoch": 0.7238215371855129, + "grad_norm": 1.8222241331430133, + "learning_rate": 3.7408168050583493e-06, + "loss": 0.4104, + "step": 41889 + }, + { + "epoch": 0.7238388166989218, + "grad_norm": 1.9293261339451266, + "learning_rate": 3.7403803505595295e-06, + "loss": 0.6336, + "step": 41890 + }, + { + "epoch": 0.7238560962123307, + "grad_norm": 1.577170294325155, + "learning_rate": 3.7399439156663144e-06, + "loss": 0.422, + "step": 41891 + }, + { + "epoch": 0.7238733757257395, + "grad_norm": 1.1373130550834833, + "learning_rate": 3.7395075003800663e-06, + "loss": 0.408, + "step": 41892 + }, + { + "epoch": 0.7238906552391484, + "grad_norm": 2.33104525622062, + "learning_rate": 3.7390711047021566e-06, + "loss": 0.3779, + "step": 41893 + }, + { + "epoch": 0.7239079347525573, + "grad_norm": 1.4418214956047588, + "learning_rate": 3.738634728633953e-06, + "loss": 0.3391, + "step": 41894 + }, + { + "epoch": 0.7239252142659662, + "grad_norm": 2.8115861530081543, + "learning_rate": 3.7381983721768246e-06, + "loss": 0.2705, + "step": 41895 + }, + { + "epoch": 0.7239424937793751, + "grad_norm": 2.181973789764505, + "learning_rate": 3.737762035332131e-06, + "loss": 0.4374, + "step": 41896 + }, + { + "epoch": 0.723959773292784, + "grad_norm": 1.4275412394726847, + "learning_rate": 3.7373257181012455e-06, + "loss": 0.3289, + "step": 41897 + }, + { + "epoch": 0.723977052806193, + "grad_norm": 1.0241181366700256, + "learning_rate": 3.736889420485529e-06, + "loss": 0.4179, + "step": 41898 + }, + { + "epoch": 0.7239943323196019, + "grad_norm": 1.0081600045055528, + "learning_rate": 3.7364531424863504e-06, + "loss": 0.4436, + "step": 41899 + }, + { + "epoch": 0.7240116118330108, + "grad_norm": 1.0940409530002948, + "learning_rate": 3.7360168841050805e-06, + "loss": 0.2873, + "step": 41900 + }, + { + "epoch": 0.7240288913464197, + "grad_norm": 1.8803616625860926, + "learning_rate": 3.735580645343078e-06, + "loss": 0.3462, + "step": 41901 + }, + { + "epoch": 0.7240461708598286, + "grad_norm": 1.3641264066502619, + "learning_rate": 3.7351444262017167e-06, + "loss": 0.2402, + "step": 41902 + }, + { + "epoch": 0.7240634503732375, + "grad_norm": 1.2263526063850119, + "learning_rate": 3.7347082266823553e-06, + "loss": 0.39, + "step": 41903 + }, + { + "epoch": 0.7240807298866464, + "grad_norm": 0.7428331544181594, + "learning_rate": 3.7342720467863646e-06, + "loss": 0.4686, + "step": 41904 + }, + { + "epoch": 0.7240980094000553, + "grad_norm": 1.2430904098357616, + "learning_rate": 3.733835886515109e-06, + "loss": 0.3815, + "step": 41905 + }, + { + "epoch": 0.7241152889134642, + "grad_norm": 0.6797100446560304, + "learning_rate": 3.73339974586996e-06, + "loss": 0.5945, + "step": 41906 + }, + { + "epoch": 0.7241325684268731, + "grad_norm": 1.0641858583787378, + "learning_rate": 3.732963624852275e-06, + "loss": 0.5035, + "step": 41907 + }, + { + "epoch": 0.724149847940282, + "grad_norm": 0.5097752208472986, + "learning_rate": 3.7325275234634285e-06, + "loss": 0.5754, + "step": 41908 + }, + { + "epoch": 0.7241671274536909, + "grad_norm": 0.8995996815706585, + "learning_rate": 3.7320914417047784e-06, + "loss": 0.3581, + "step": 41909 + }, + { + "epoch": 0.7241844069670998, + "grad_norm": 1.2601082597899318, + "learning_rate": 3.7316553795776932e-06, + "loss": 0.4778, + "step": 41910 + }, + { + "epoch": 0.7242016864805088, + "grad_norm": 1.2606373865904974, + "learning_rate": 3.7312193370835447e-06, + "loss": 0.2504, + "step": 41911 + }, + { + "epoch": 0.7242189659939177, + "grad_norm": 1.2400057943981666, + "learning_rate": 3.7307833142236893e-06, + "loss": 0.2276, + "step": 41912 + }, + { + "epoch": 0.7242362455073265, + "grad_norm": 1.0109984518708943, + "learning_rate": 3.7303473109995014e-06, + "loss": 0.3455, + "step": 41913 + }, + { + "epoch": 0.7242535250207354, + "grad_norm": 1.8471829537817546, + "learning_rate": 3.729911327412338e-06, + "loss": 0.2897, + "step": 41914 + }, + { + "epoch": 0.7242708045341443, + "grad_norm": 1.2163015932900183, + "learning_rate": 3.7294753634635715e-06, + "loss": 0.4663, + "step": 41915 + }, + { + "epoch": 0.7242880840475532, + "grad_norm": 1.2432822433154038, + "learning_rate": 3.7290394191545633e-06, + "loss": 0.3988, + "step": 41916 + }, + { + "epoch": 0.7243053635609621, + "grad_norm": 1.4817301575572448, + "learning_rate": 3.728603494486679e-06, + "loss": 0.3478, + "step": 41917 + }, + { + "epoch": 0.724322643074371, + "grad_norm": 1.2056127573191466, + "learning_rate": 3.7281675894612846e-06, + "loss": 0.5111, + "step": 41918 + }, + { + "epoch": 0.7243399225877799, + "grad_norm": 1.1803320380142557, + "learning_rate": 3.72773170407975e-06, + "loss": 0.3881, + "step": 41919 + }, + { + "epoch": 0.7243572021011888, + "grad_norm": 1.1180317977750425, + "learning_rate": 3.727295838343433e-06, + "loss": 0.3647, + "step": 41920 + }, + { + "epoch": 0.7243744816145977, + "grad_norm": 1.3024763479007042, + "learning_rate": 3.7268599922537054e-06, + "loss": 0.4651, + "step": 41921 + }, + { + "epoch": 0.7243917611280066, + "grad_norm": 1.5522691221300018, + "learning_rate": 3.726424165811926e-06, + "loss": 0.3057, + "step": 41922 + }, + { + "epoch": 0.7244090406414155, + "grad_norm": 1.2641357977163463, + "learning_rate": 3.7259883590194625e-06, + "loss": 0.3434, + "step": 41923 + }, + { + "epoch": 0.7244263201548244, + "grad_norm": 1.159244443105666, + "learning_rate": 3.7255525718776832e-06, + "loss": 0.4307, + "step": 41924 + }, + { + "epoch": 0.7244435996682334, + "grad_norm": 1.4291063131037138, + "learning_rate": 3.725116804387947e-06, + "loss": 0.3648, + "step": 41925 + }, + { + "epoch": 0.7244608791816423, + "grad_norm": 2.1052790673665096, + "learning_rate": 3.7246810565516255e-06, + "loss": 0.4424, + "step": 41926 + }, + { + "epoch": 0.7244781586950512, + "grad_norm": 0.8268704057341918, + "learning_rate": 3.7242453283700775e-06, + "loss": 0.186, + "step": 41927 + }, + { + "epoch": 0.7244954382084601, + "grad_norm": 1.3196647259494327, + "learning_rate": 3.723809619844668e-06, + "loss": 0.3396, + "step": 41928 + }, + { + "epoch": 0.724512717721869, + "grad_norm": 1.40642799518589, + "learning_rate": 3.7233739309767657e-06, + "loss": 0.3733, + "step": 41929 + }, + { + "epoch": 0.7245299972352779, + "grad_norm": 0.9932929977609625, + "learning_rate": 3.7229382617677356e-06, + "loss": 0.3461, + "step": 41930 + }, + { + "epoch": 0.7245472767486868, + "grad_norm": 1.8461433489091308, + "learning_rate": 3.72250261221894e-06, + "loss": 0.4157, + "step": 41931 + }, + { + "epoch": 0.7245645562620957, + "grad_norm": 1.4109214374127594, + "learning_rate": 3.7220669823317402e-06, + "loss": 0.3382, + "step": 41932 + }, + { + "epoch": 0.7245818357755046, + "grad_norm": 1.2902102662324806, + "learning_rate": 3.7216313721075024e-06, + "loss": 0.4455, + "step": 41933 + }, + { + "epoch": 0.7245991152889134, + "grad_norm": 1.0228698919858432, + "learning_rate": 3.721195781547594e-06, + "loss": 0.6975, + "step": 41934 + }, + { + "epoch": 0.7246163948023223, + "grad_norm": 0.46976449291280437, + "learning_rate": 3.7207602106533804e-06, + "loss": 0.6009, + "step": 41935 + }, + { + "epoch": 0.7246336743157312, + "grad_norm": 1.0443483394566924, + "learning_rate": 3.720324659426219e-06, + "loss": 0.527, + "step": 41936 + }, + { + "epoch": 0.7246509538291401, + "grad_norm": 1.644007497911971, + "learning_rate": 3.7198891278674822e-06, + "loss": 0.5843, + "step": 41937 + }, + { + "epoch": 0.724668233342549, + "grad_norm": 1.2150094832478608, + "learning_rate": 3.7194536159785265e-06, + "loss": 0.5427, + "step": 41938 + }, + { + "epoch": 0.724685512855958, + "grad_norm": 0.918863419114861, + "learning_rate": 3.719018123760719e-06, + "loss": 0.3317, + "step": 41939 + }, + { + "epoch": 0.7247027923693669, + "grad_norm": 1.6561872155569373, + "learning_rate": 3.7185826512154243e-06, + "loss": 0.2727, + "step": 41940 + }, + { + "epoch": 0.7247200718827758, + "grad_norm": 1.1944243354657884, + "learning_rate": 3.718147198344009e-06, + "loss": 0.4511, + "step": 41941 + }, + { + "epoch": 0.7247373513961847, + "grad_norm": 2.028370279616842, + "learning_rate": 3.7177117651478346e-06, + "loss": 0.3604, + "step": 41942 + }, + { + "epoch": 0.7247546309095936, + "grad_norm": 1.0331515449745394, + "learning_rate": 3.7172763516282607e-06, + "loss": 0.3621, + "step": 41943 + }, + { + "epoch": 0.7247719104230025, + "grad_norm": 1.5924235993767797, + "learning_rate": 3.7168409577866547e-06, + "loss": 0.4394, + "step": 41944 + }, + { + "epoch": 0.7247891899364114, + "grad_norm": 1.0116839636061272, + "learning_rate": 3.7164055836243807e-06, + "loss": 0.3625, + "step": 41945 + }, + { + "epoch": 0.7248064694498203, + "grad_norm": 1.3941195493861807, + "learning_rate": 3.7159702291428055e-06, + "loss": 0.3279, + "step": 41946 + }, + { + "epoch": 0.7248237489632292, + "grad_norm": 1.2057568933815919, + "learning_rate": 3.7155348943432855e-06, + "loss": 0.4349, + "step": 41947 + }, + { + "epoch": 0.7248410284766381, + "grad_norm": 1.0291243710628029, + "learning_rate": 3.7150995792271904e-06, + "loss": 0.2582, + "step": 41948 + }, + { + "epoch": 0.724858307990047, + "grad_norm": 0.9900320930612773, + "learning_rate": 3.7146642837958793e-06, + "loss": 0.2651, + "step": 41949 + }, + { + "epoch": 0.7248755875034559, + "grad_norm": 1.6453292855053534, + "learning_rate": 3.714229008050716e-06, + "loss": 0.3853, + "step": 41950 + }, + { + "epoch": 0.7248928670168648, + "grad_norm": 1.6098762184733941, + "learning_rate": 3.7137937519930655e-06, + "loss": 0.3409, + "step": 41951 + }, + { + "epoch": 0.7249101465302737, + "grad_norm": 1.2711029193292158, + "learning_rate": 3.7133585156242936e-06, + "loss": 0.379, + "step": 41952 + }, + { + "epoch": 0.7249274260436827, + "grad_norm": 1.351336276408682, + "learning_rate": 3.712923298945762e-06, + "loss": 0.3856, + "step": 41953 + }, + { + "epoch": 0.7249447055570916, + "grad_norm": 1.6679056347254395, + "learning_rate": 3.7124881019588277e-06, + "loss": 0.2401, + "step": 41954 + }, + { + "epoch": 0.7249619850705005, + "grad_norm": 1.2875729035603627, + "learning_rate": 3.712052924664862e-06, + "loss": 0.5473, + "step": 41955 + }, + { + "epoch": 0.7249792645839093, + "grad_norm": 1.6240233825082435, + "learning_rate": 3.711617767065222e-06, + "loss": 0.5575, + "step": 41956 + }, + { + "epoch": 0.7249965440973182, + "grad_norm": 1.1492038356083503, + "learning_rate": 3.711182629161272e-06, + "loss": 0.5237, + "step": 41957 + }, + { + "epoch": 0.7250138236107271, + "grad_norm": 0.9595055652024649, + "learning_rate": 3.7107475109543767e-06, + "loss": 0.4403, + "step": 41958 + }, + { + "epoch": 0.725031103124136, + "grad_norm": 1.0644318231700098, + "learning_rate": 3.710312412445901e-06, + "loss": 0.2779, + "step": 41959 + }, + { + "epoch": 0.7250483826375449, + "grad_norm": 1.3230462503749978, + "learning_rate": 3.709877333637202e-06, + "loss": 0.3022, + "step": 41960 + }, + { + "epoch": 0.7250656621509538, + "grad_norm": 1.395983452613968, + "learning_rate": 3.709442274529648e-06, + "loss": 0.2904, + "step": 41961 + }, + { + "epoch": 0.7250829416643627, + "grad_norm": 1.703359651415221, + "learning_rate": 3.7090072351245952e-06, + "loss": 0.3049, + "step": 41962 + }, + { + "epoch": 0.7251002211777716, + "grad_norm": 1.0187673275190303, + "learning_rate": 3.7085722154234107e-06, + "loss": 0.4167, + "step": 41963 + }, + { + "epoch": 0.7251175006911805, + "grad_norm": 2.3705323922585007, + "learning_rate": 3.7081372154274595e-06, + "loss": 0.3516, + "step": 41964 + }, + { + "epoch": 0.7251347802045894, + "grad_norm": 1.0312439807272726, + "learning_rate": 3.7077022351380963e-06, + "loss": 0.371, + "step": 41965 + }, + { + "epoch": 0.7251520597179983, + "grad_norm": 0.8708554422618592, + "learning_rate": 3.707267274556693e-06, + "loss": 0.3308, + "step": 41966 + }, + { + "epoch": 0.7251693392314073, + "grad_norm": 1.0680571108023935, + "learning_rate": 3.706832333684602e-06, + "loss": 0.4212, + "step": 41967 + }, + { + "epoch": 0.7251866187448162, + "grad_norm": 1.1274663844219954, + "learning_rate": 3.7063974125231904e-06, + "loss": 0.2993, + "step": 41968 + }, + { + "epoch": 0.7252038982582251, + "grad_norm": 1.541263240003356, + "learning_rate": 3.705962511073821e-06, + "loss": 0.3109, + "step": 41969 + }, + { + "epoch": 0.725221177771634, + "grad_norm": 1.3197468953177174, + "learning_rate": 3.7055276293378582e-06, + "loss": 0.5391, + "step": 41970 + }, + { + "epoch": 0.7252384572850429, + "grad_norm": 1.0000638199713152, + "learning_rate": 3.705092767316659e-06, + "loss": 0.3853, + "step": 41971 + }, + { + "epoch": 0.7252557367984518, + "grad_norm": 1.5844934274862188, + "learning_rate": 3.70465792501159e-06, + "loss": 0.3405, + "step": 41972 + }, + { + "epoch": 0.7252730163118607, + "grad_norm": 1.5350445532733605, + "learning_rate": 3.704223102424007e-06, + "loss": 0.4085, + "step": 41973 + }, + { + "epoch": 0.7252902958252696, + "grad_norm": 1.0071663621897515, + "learning_rate": 3.7037882995552767e-06, + "loss": 0.3763, + "step": 41974 + }, + { + "epoch": 0.7253075753386785, + "grad_norm": 0.5763973478542043, + "learning_rate": 3.7033535164067634e-06, + "loss": 0.8234, + "step": 41975 + }, + { + "epoch": 0.7253248548520874, + "grad_norm": 0.524309040823176, + "learning_rate": 3.7029187529798214e-06, + "loss": 0.4729, + "step": 41976 + }, + { + "epoch": 0.7253421343654962, + "grad_norm": 0.7277501600356976, + "learning_rate": 3.7024840092758206e-06, + "loss": 0.235, + "step": 41977 + }, + { + "epoch": 0.7253594138789051, + "grad_norm": 1.164642413727017, + "learning_rate": 3.7020492852961145e-06, + "loss": 0.3784, + "step": 41978 + }, + { + "epoch": 0.725376693392314, + "grad_norm": 0.73072828554108, + "learning_rate": 3.701614581042068e-06, + "loss": 0.53, + "step": 41979 + }, + { + "epoch": 0.7253939729057229, + "grad_norm": 0.7293780286585237, + "learning_rate": 3.701179896515045e-06, + "loss": 0.6614, + "step": 41980 + }, + { + "epoch": 0.7254112524191318, + "grad_norm": 0.8040357453666697, + "learning_rate": 3.700745231716407e-06, + "loss": 0.36, + "step": 41981 + }, + { + "epoch": 0.7254285319325408, + "grad_norm": 1.7027433607913431, + "learning_rate": 3.700310586647511e-06, + "loss": 0.2794, + "step": 41982 + }, + { + "epoch": 0.7254458114459497, + "grad_norm": 1.654298283129648, + "learning_rate": 3.6998759613097247e-06, + "loss": 0.381, + "step": 41983 + }, + { + "epoch": 0.7254630909593586, + "grad_norm": 2.1016356473485636, + "learning_rate": 3.699441355704402e-06, + "loss": 0.1987, + "step": 41984 + }, + { + "epoch": 0.7254803704727675, + "grad_norm": 1.5780373974827107, + "learning_rate": 3.699006769832907e-06, + "loss": 0.3643, + "step": 41985 + }, + { + "epoch": 0.7254976499861764, + "grad_norm": 2.477324691486802, + "learning_rate": 3.6985722036966055e-06, + "loss": 0.4454, + "step": 41986 + }, + { + "epoch": 0.7255149294995853, + "grad_norm": 2.017549408816138, + "learning_rate": 3.698137657296852e-06, + "loss": 0.6294, + "step": 41987 + }, + { + "epoch": 0.7255322090129942, + "grad_norm": 1.5732441566387718, + "learning_rate": 3.697703130635013e-06, + "loss": 0.2642, + "step": 41988 + }, + { + "epoch": 0.7255494885264031, + "grad_norm": 2.297702843460651, + "learning_rate": 3.697268623712443e-06, + "loss": 0.3807, + "step": 41989 + }, + { + "epoch": 0.725566768039812, + "grad_norm": 1.2752743237592965, + "learning_rate": 3.6968341365305082e-06, + "loss": 0.4242, + "step": 41990 + }, + { + "epoch": 0.7255840475532209, + "grad_norm": 1.4200081063938697, + "learning_rate": 3.6963996690905667e-06, + "loss": 0.4995, + "step": 41991 + }, + { + "epoch": 0.7256013270666298, + "grad_norm": 1.2292326397118825, + "learning_rate": 3.6959652213939845e-06, + "loss": 0.4559, + "step": 41992 + }, + { + "epoch": 0.7256186065800387, + "grad_norm": 1.39205021762756, + "learning_rate": 3.695530793442118e-06, + "loss": 0.3339, + "step": 41993 + }, + { + "epoch": 0.7256358860934476, + "grad_norm": 1.1175974480302562, + "learning_rate": 3.6950963852363242e-06, + "loss": 0.3293, + "step": 41994 + }, + { + "epoch": 0.7256531656068566, + "grad_norm": 0.8153896487915202, + "learning_rate": 3.694661996777972e-06, + "loss": 0.7805, + "step": 41995 + }, + { + "epoch": 0.7256704451202655, + "grad_norm": 1.3244934957123962, + "learning_rate": 3.694227628068414e-06, + "loss": 0.3504, + "step": 41996 + }, + { + "epoch": 0.7256877246336744, + "grad_norm": 1.599954931586563, + "learning_rate": 3.6937932791090135e-06, + "loss": 0.3862, + "step": 41997 + }, + { + "epoch": 0.7257050041470832, + "grad_norm": 1.1157204532767158, + "learning_rate": 3.6933589499011322e-06, + "loss": 0.1188, + "step": 41998 + }, + { + "epoch": 0.7257222836604921, + "grad_norm": 1.3755284842888638, + "learning_rate": 3.692924640446134e-06, + "loss": 0.3664, + "step": 41999 + }, + { + "epoch": 0.725739563173901, + "grad_norm": 1.7690095483212835, + "learning_rate": 3.6924903507453714e-06, + "loss": 0.2812, + "step": 42000 + }, + { + "epoch": 0.7257568426873099, + "grad_norm": 1.4750011086577501, + "learning_rate": 3.6920560808002116e-06, + "loss": 0.3736, + "step": 42001 + }, + { + "epoch": 0.7257741222007188, + "grad_norm": 1.457860870720633, + "learning_rate": 3.6916218306120077e-06, + "loss": 0.3053, + "step": 42002 + }, + { + "epoch": 0.7257914017141277, + "grad_norm": 1.5873858528700264, + "learning_rate": 3.6911876001821246e-06, + "loss": 0.3852, + "step": 42003 + }, + { + "epoch": 0.7258086812275366, + "grad_norm": 1.7621730606581503, + "learning_rate": 3.690753389511924e-06, + "loss": 0.4221, + "step": 42004 + }, + { + "epoch": 0.7258259607409455, + "grad_norm": 1.620342792328493, + "learning_rate": 3.690319198602761e-06, + "loss": 0.2843, + "step": 42005 + }, + { + "epoch": 0.7258432402543544, + "grad_norm": 1.2368784115112614, + "learning_rate": 3.6898850274560006e-06, + "loss": 0.3082, + "step": 42006 + }, + { + "epoch": 0.7258605197677633, + "grad_norm": 1.162823137841321, + "learning_rate": 3.6894508760729953e-06, + "loss": 0.3343, + "step": 42007 + }, + { + "epoch": 0.7258777992811722, + "grad_norm": 0.642511669130268, + "learning_rate": 3.6890167444551107e-06, + "loss": 0.6208, + "step": 42008 + }, + { + "epoch": 0.7258950787945812, + "grad_norm": 1.0732644373241145, + "learning_rate": 3.6885826326037043e-06, + "loss": 0.1828, + "step": 42009 + }, + { + "epoch": 0.7259123583079901, + "grad_norm": 1.0085096453974776, + "learning_rate": 3.688148540520141e-06, + "loss": 0.3993, + "step": 42010 + }, + { + "epoch": 0.725929637821399, + "grad_norm": 1.003781362144529, + "learning_rate": 3.687714468205773e-06, + "loss": 0.3016, + "step": 42011 + }, + { + "epoch": 0.7259469173348079, + "grad_norm": 1.4765457104122408, + "learning_rate": 3.6872804156619656e-06, + "loss": 0.5, + "step": 42012 + }, + { + "epoch": 0.7259641968482168, + "grad_norm": 1.05026056345658, + "learning_rate": 3.6868463828900723e-06, + "loss": 0.3271, + "step": 42013 + }, + { + "epoch": 0.7259814763616257, + "grad_norm": 1.2890243824216956, + "learning_rate": 3.6864123698914567e-06, + "loss": 0.4458, + "step": 42014 + }, + { + "epoch": 0.7259987558750346, + "grad_norm": 1.064596088425675, + "learning_rate": 3.6859783766674805e-06, + "loss": 0.264, + "step": 42015 + }, + { + "epoch": 0.7260160353884435, + "grad_norm": 1.0602394387246838, + "learning_rate": 3.6855444032194967e-06, + "loss": 0.4412, + "step": 42016 + }, + { + "epoch": 0.7260333149018524, + "grad_norm": 1.8139762098570218, + "learning_rate": 3.68511044954887e-06, + "loss": 0.4937, + "step": 42017 + }, + { + "epoch": 0.7260505944152613, + "grad_norm": 1.4351051825496328, + "learning_rate": 3.684676515656954e-06, + "loss": 0.4828, + "step": 42018 + }, + { + "epoch": 0.7260678739286701, + "grad_norm": 1.638360280577057, + "learning_rate": 3.684242601545113e-06, + "loss": 0.6988, + "step": 42019 + }, + { + "epoch": 0.726085153442079, + "grad_norm": 1.6496302314531206, + "learning_rate": 3.6838087072147022e-06, + "loss": 0.373, + "step": 42020 + }, + { + "epoch": 0.7261024329554879, + "grad_norm": 1.1425031512555044, + "learning_rate": 3.683374832667087e-06, + "loss": 0.7611, + "step": 42021 + }, + { + "epoch": 0.7261197124688968, + "grad_norm": 2.0062812335653364, + "learning_rate": 3.6829409779036175e-06, + "loss": 0.2923, + "step": 42022 + }, + { + "epoch": 0.7261369919823057, + "grad_norm": 1.1351028200697715, + "learning_rate": 3.682507142925661e-06, + "loss": 0.5763, + "step": 42023 + }, + { + "epoch": 0.7261542714957147, + "grad_norm": 0.9003209404751571, + "learning_rate": 3.6820733277345688e-06, + "loss": 0.5108, + "step": 42024 + }, + { + "epoch": 0.7261715510091236, + "grad_norm": 1.0160551408967733, + "learning_rate": 3.6816395323317035e-06, + "loss": 0.3279, + "step": 42025 + }, + { + "epoch": 0.7261888305225325, + "grad_norm": 1.0805921286775966, + "learning_rate": 3.6812057567184255e-06, + "loss": 0.3164, + "step": 42026 + }, + { + "epoch": 0.7262061100359414, + "grad_norm": 0.9888295443629234, + "learning_rate": 3.680772000896089e-06, + "loss": 0.401, + "step": 42027 + }, + { + "epoch": 0.7262233895493503, + "grad_norm": 1.4565996136245598, + "learning_rate": 3.6803382648660578e-06, + "loss": 0.3937, + "step": 42028 + }, + { + "epoch": 0.7262406690627592, + "grad_norm": 1.5547354072792423, + "learning_rate": 3.6799045486296835e-06, + "loss": 0.2289, + "step": 42029 + }, + { + "epoch": 0.7262579485761681, + "grad_norm": 1.2861023313987348, + "learning_rate": 3.6794708521883295e-06, + "loss": 0.468, + "step": 42030 + }, + { + "epoch": 0.726275228089577, + "grad_norm": 1.431026252480314, + "learning_rate": 3.679037175543353e-06, + "loss": 0.3076, + "step": 42031 + }, + { + "epoch": 0.7262925076029859, + "grad_norm": 1.7286128254537179, + "learning_rate": 3.6786035186961155e-06, + "loss": 0.443, + "step": 42032 + }, + { + "epoch": 0.7263097871163948, + "grad_norm": 0.8861852247522063, + "learning_rate": 3.6781698816479685e-06, + "loss": 0.4051, + "step": 42033 + }, + { + "epoch": 0.7263270666298037, + "grad_norm": 1.5366378485637455, + "learning_rate": 3.677736264400278e-06, + "loss": 0.4151, + "step": 42034 + }, + { + "epoch": 0.7263443461432126, + "grad_norm": 1.2544578042664876, + "learning_rate": 3.677302666954398e-06, + "loss": 0.2759, + "step": 42035 + }, + { + "epoch": 0.7263616256566215, + "grad_norm": 2.210006582365391, + "learning_rate": 3.676869089311682e-06, + "loss": 0.2998, + "step": 42036 + }, + { + "epoch": 0.7263789051700305, + "grad_norm": 1.4074733078865203, + "learning_rate": 3.6764355314734934e-06, + "loss": 0.4535, + "step": 42037 + }, + { + "epoch": 0.7263961846834394, + "grad_norm": 1.2174953055636473, + "learning_rate": 3.6760019934411894e-06, + "loss": 0.3862, + "step": 42038 + }, + { + "epoch": 0.7264134641968483, + "grad_norm": 1.1001273805753304, + "learning_rate": 3.6755684752161303e-06, + "loss": 0.3351, + "step": 42039 + }, + { + "epoch": 0.7264307437102571, + "grad_norm": 1.3757105283269877, + "learning_rate": 3.675134976799669e-06, + "loss": 0.5137, + "step": 42040 + }, + { + "epoch": 0.726448023223666, + "grad_norm": 0.796185630056627, + "learning_rate": 3.6747014981931683e-06, + "loss": 0.348, + "step": 42041 + }, + { + "epoch": 0.7264653027370749, + "grad_norm": 0.8852869357543852, + "learning_rate": 3.67426803939798e-06, + "loss": 0.319, + "step": 42042 + }, + { + "epoch": 0.7264825822504838, + "grad_norm": 1.064767869154738, + "learning_rate": 3.673834600415466e-06, + "loss": 0.314, + "step": 42043 + }, + { + "epoch": 0.7264998617638927, + "grad_norm": 1.5516055499376877, + "learning_rate": 3.673401181246985e-06, + "loss": 0.2094, + "step": 42044 + }, + { + "epoch": 0.7265171412773016, + "grad_norm": 1.7084465801673925, + "learning_rate": 3.6729677818938893e-06, + "loss": 0.2785, + "step": 42045 + }, + { + "epoch": 0.7265344207907105, + "grad_norm": 1.482887292694044, + "learning_rate": 3.672534402357543e-06, + "loss": 0.2605, + "step": 42046 + }, + { + "epoch": 0.7265517003041194, + "grad_norm": 0.8185992492686038, + "learning_rate": 3.6721010426392957e-06, + "loss": 0.3776, + "step": 42047 + }, + { + "epoch": 0.7265689798175283, + "grad_norm": 1.1765295253460788, + "learning_rate": 3.671667702740509e-06, + "loss": 0.2798, + "step": 42048 + }, + { + "epoch": 0.7265862593309372, + "grad_norm": 1.465228072564947, + "learning_rate": 3.671234382662541e-06, + "loss": 0.4822, + "step": 42049 + }, + { + "epoch": 0.7266035388443461, + "grad_norm": 0.9231038555522011, + "learning_rate": 3.67080108240675e-06, + "loss": 0.2896, + "step": 42050 + }, + { + "epoch": 0.726620818357755, + "grad_norm": 1.3275744664638072, + "learning_rate": 3.670367801974488e-06, + "loss": 0.2761, + "step": 42051 + }, + { + "epoch": 0.726638097871164, + "grad_norm": 1.3185571908288491, + "learning_rate": 3.6699345413671185e-06, + "loss": 0.283, + "step": 42052 + }, + { + "epoch": 0.7266553773845729, + "grad_norm": 0.8962635157571548, + "learning_rate": 3.6695013005859916e-06, + "loss": 0.2168, + "step": 42053 + }, + { + "epoch": 0.7266726568979818, + "grad_norm": 1.2839902764082372, + "learning_rate": 3.6690680796324686e-06, + "loss": 0.3596, + "step": 42054 + }, + { + "epoch": 0.7266899364113907, + "grad_norm": 1.1833777064403823, + "learning_rate": 3.6686348785079072e-06, + "loss": 0.3513, + "step": 42055 + }, + { + "epoch": 0.7267072159247996, + "grad_norm": 1.627890500617887, + "learning_rate": 3.6682016972136602e-06, + "loss": 0.5025, + "step": 42056 + }, + { + "epoch": 0.7267244954382085, + "grad_norm": 1.035351240518721, + "learning_rate": 3.6677685357510905e-06, + "loss": 0.3839, + "step": 42057 + }, + { + "epoch": 0.7267417749516174, + "grad_norm": 0.9093100728117677, + "learning_rate": 3.6673353941215474e-06, + "loss": 0.2438, + "step": 42058 + }, + { + "epoch": 0.7267590544650263, + "grad_norm": 0.41377506886154714, + "learning_rate": 3.6669022723263904e-06, + "loss": 0.6188, + "step": 42059 + }, + { + "epoch": 0.7267763339784352, + "grad_norm": 0.8980828106132461, + "learning_rate": 3.6664691703669776e-06, + "loss": 0.4249, + "step": 42060 + }, + { + "epoch": 0.726793613491844, + "grad_norm": 1.1001179901305158, + "learning_rate": 3.6660360882446678e-06, + "loss": 0.3157, + "step": 42061 + }, + { + "epoch": 0.7268108930052529, + "grad_norm": 1.0828223396044059, + "learning_rate": 3.6656030259608098e-06, + "loss": 0.3643, + "step": 42062 + }, + { + "epoch": 0.7268281725186618, + "grad_norm": 1.1563980646599405, + "learning_rate": 3.66516998351677e-06, + "loss": 0.3897, + "step": 42063 + }, + { + "epoch": 0.7268454520320707, + "grad_norm": 1.4841242096287528, + "learning_rate": 3.664736960913894e-06, + "loss": 0.416, + "step": 42064 + }, + { + "epoch": 0.7268627315454796, + "grad_norm": 1.1157324600145044, + "learning_rate": 3.6643039581535446e-06, + "loss": 0.4041, + "step": 42065 + }, + { + "epoch": 0.7268800110588886, + "grad_norm": 0.7268973717904548, + "learning_rate": 3.6638709752370795e-06, + "loss": 0.6975, + "step": 42066 + }, + { + "epoch": 0.7268972905722975, + "grad_norm": 0.9451977791562616, + "learning_rate": 3.6634380121658484e-06, + "loss": 0.4928, + "step": 42067 + }, + { + "epoch": 0.7269145700857064, + "grad_norm": 0.8679294373933673, + "learning_rate": 3.6630050689412146e-06, + "loss": 0.2351, + "step": 42068 + }, + { + "epoch": 0.7269318495991153, + "grad_norm": 2.2216537475826454, + "learning_rate": 3.662572145564528e-06, + "loss": 0.394, + "step": 42069 + }, + { + "epoch": 0.7269491291125242, + "grad_norm": 1.4604248554193526, + "learning_rate": 3.662139242037146e-06, + "loss": 0.4081, + "step": 42070 + }, + { + "epoch": 0.7269664086259331, + "grad_norm": 0.850580553992529, + "learning_rate": 3.6617063583604252e-06, + "loss": 0.5997, + "step": 42071 + }, + { + "epoch": 0.726983688139342, + "grad_norm": 1.7882173351580182, + "learning_rate": 3.6612734945357265e-06, + "loss": 0.3568, + "step": 42072 + }, + { + "epoch": 0.7270009676527509, + "grad_norm": 1.3202665746962543, + "learning_rate": 3.6608406505643967e-06, + "loss": 0.6308, + "step": 42073 + }, + { + "epoch": 0.7270182471661598, + "grad_norm": 0.9950583913811345, + "learning_rate": 3.660407826447799e-06, + "loss": 0.4598, + "step": 42074 + }, + { + "epoch": 0.7270355266795687, + "grad_norm": 1.0864296526856048, + "learning_rate": 3.6599750221872865e-06, + "loss": 0.4917, + "step": 42075 + }, + { + "epoch": 0.7270528061929776, + "grad_norm": 0.79117716236249, + "learning_rate": 3.6595422377842103e-06, + "loss": 0.3142, + "step": 42076 + }, + { + "epoch": 0.7270700857063865, + "grad_norm": 1.055161928328582, + "learning_rate": 3.659109473239929e-06, + "loss": 0.3353, + "step": 42077 + }, + { + "epoch": 0.7270873652197954, + "grad_norm": 1.8252075038943136, + "learning_rate": 3.6586767285558002e-06, + "loss": 0.4253, + "step": 42078 + }, + { + "epoch": 0.7271046447332044, + "grad_norm": 1.0612214993236233, + "learning_rate": 3.6582440037331803e-06, + "loss": 0.4298, + "step": 42079 + }, + { + "epoch": 0.7271219242466133, + "grad_norm": 1.2478818793178028, + "learning_rate": 3.6578112987734182e-06, + "loss": 0.3808, + "step": 42080 + }, + { + "epoch": 0.7271392037600222, + "grad_norm": 0.88649185128356, + "learning_rate": 3.6573786136778777e-06, + "loss": 0.3991, + "step": 42081 + }, + { + "epoch": 0.727156483273431, + "grad_norm": 0.9297055034602676, + "learning_rate": 3.6569459484479054e-06, + "loss": 0.4262, + "step": 42082 + }, + { + "epoch": 0.7271737627868399, + "grad_norm": 0.858878803305536, + "learning_rate": 3.6565133030848597e-06, + "loss": 0.3854, + "step": 42083 + }, + { + "epoch": 0.7271910423002488, + "grad_norm": 2.361549494390004, + "learning_rate": 3.656080677590097e-06, + "loss": 0.4357, + "step": 42084 + }, + { + "epoch": 0.7272083218136577, + "grad_norm": 1.1488498232151059, + "learning_rate": 3.6556480719649757e-06, + "loss": 0.8426, + "step": 42085 + }, + { + "epoch": 0.7272256013270666, + "grad_norm": 1.790113007041037, + "learning_rate": 3.655215486210847e-06, + "loss": 0.2183, + "step": 42086 + }, + { + "epoch": 0.7272428808404755, + "grad_norm": 2.0667533099136106, + "learning_rate": 3.654782920329061e-06, + "loss": 0.5426, + "step": 42087 + }, + { + "epoch": 0.7272601603538844, + "grad_norm": 1.685707060011815, + "learning_rate": 3.6543503743209775e-06, + "loss": 0.3404, + "step": 42088 + }, + { + "epoch": 0.7272774398672933, + "grad_norm": 2.032700884737674, + "learning_rate": 3.6539178481879513e-06, + "loss": 0.2605, + "step": 42089 + }, + { + "epoch": 0.7272947193807022, + "grad_norm": 1.4211378571906887, + "learning_rate": 3.6534853419313397e-06, + "loss": 0.4545, + "step": 42090 + }, + { + "epoch": 0.7273119988941111, + "grad_norm": 1.3420307709082477, + "learning_rate": 3.653052855552491e-06, + "loss": 0.4156, + "step": 42091 + }, + { + "epoch": 0.72732927840752, + "grad_norm": 1.2596926115389895, + "learning_rate": 3.652620389052767e-06, + "loss": 0.2502, + "step": 42092 + }, + { + "epoch": 0.727346557920929, + "grad_norm": 1.1232608980326426, + "learning_rate": 3.6521879424335136e-06, + "loss": 0.2697, + "step": 42093 + }, + { + "epoch": 0.7273638374343379, + "grad_norm": 1.0486686954169098, + "learning_rate": 3.6517555156960917e-06, + "loss": 0.5192, + "step": 42094 + }, + { + "epoch": 0.7273811169477468, + "grad_norm": 0.4410201745697694, + "learning_rate": 3.6513231088418534e-06, + "loss": 0.586, + "step": 42095 + }, + { + "epoch": 0.7273983964611557, + "grad_norm": 1.0476014212209765, + "learning_rate": 3.650890721872157e-06, + "loss": 0.3783, + "step": 42096 + }, + { + "epoch": 0.7274156759745646, + "grad_norm": 1.3616210237165987, + "learning_rate": 3.650458354788352e-06, + "loss": 0.4928, + "step": 42097 + }, + { + "epoch": 0.7274329554879735, + "grad_norm": 1.2789055424596278, + "learning_rate": 3.6500260075917925e-06, + "loss": 0.4241, + "step": 42098 + }, + { + "epoch": 0.7274502350013824, + "grad_norm": 1.57872018811847, + "learning_rate": 3.649593680283833e-06, + "loss": 0.2524, + "step": 42099 + }, + { + "epoch": 0.7274675145147913, + "grad_norm": 1.265691651663347, + "learning_rate": 3.6491613728658292e-06, + "loss": 0.2652, + "step": 42100 + }, + { + "epoch": 0.7274847940282002, + "grad_norm": 1.792165857680801, + "learning_rate": 3.648729085339138e-06, + "loss": 0.4213, + "step": 42101 + }, + { + "epoch": 0.7275020735416091, + "grad_norm": 1.5081190241385822, + "learning_rate": 3.648296817705106e-06, + "loss": 0.3761, + "step": 42102 + }, + { + "epoch": 0.727519353055018, + "grad_norm": 1.181791275205556, + "learning_rate": 3.647864569965095e-06, + "loss": 0.2677, + "step": 42103 + }, + { + "epoch": 0.7275366325684268, + "grad_norm": 1.3281765004810098, + "learning_rate": 3.6474323421204517e-06, + "loss": 0.4492, + "step": 42104 + }, + { + "epoch": 0.7275539120818357, + "grad_norm": 1.5766803983319504, + "learning_rate": 3.6470001341725324e-06, + "loss": 0.4776, + "step": 42105 + }, + { + "epoch": 0.7275711915952446, + "grad_norm": 1.04985989106426, + "learning_rate": 3.6465679461226956e-06, + "loss": 0.2994, + "step": 42106 + }, + { + "epoch": 0.7275884711086535, + "grad_norm": 1.2120526153798883, + "learning_rate": 3.6461357779722873e-06, + "loss": 0.3929, + "step": 42107 + }, + { + "epoch": 0.7276057506220625, + "grad_norm": 2.279650458682358, + "learning_rate": 3.6457036297226678e-06, + "loss": 0.256, + "step": 42108 + }, + { + "epoch": 0.7276230301354714, + "grad_norm": 1.2394730296606202, + "learning_rate": 3.645271501375184e-06, + "loss": 0.333, + "step": 42109 + }, + { + "epoch": 0.7276403096488803, + "grad_norm": 0.8681880348895636, + "learning_rate": 3.6448393929311933e-06, + "loss": 0.4157, + "step": 42110 + }, + { + "epoch": 0.7276575891622892, + "grad_norm": 0.8445320594791821, + "learning_rate": 3.644407304392048e-06, + "loss": 0.2165, + "step": 42111 + }, + { + "epoch": 0.7276748686756981, + "grad_norm": 1.6394279752633651, + "learning_rate": 3.6439752357591052e-06, + "loss": 0.3927, + "step": 42112 + }, + { + "epoch": 0.727692148189107, + "grad_norm": 1.4558147595103028, + "learning_rate": 3.6435431870337123e-06, + "loss": 0.3736, + "step": 42113 + }, + { + "epoch": 0.7277094277025159, + "grad_norm": 1.1370963317133556, + "learning_rate": 3.6431111582172283e-06, + "loss": 0.3806, + "step": 42114 + }, + { + "epoch": 0.7277267072159248, + "grad_norm": 1.3159482455834288, + "learning_rate": 3.6426791493110004e-06, + "loss": 0.3819, + "step": 42115 + }, + { + "epoch": 0.7277439867293337, + "grad_norm": 0.9397870713488472, + "learning_rate": 3.6422471603163867e-06, + "loss": 0.3879, + "step": 42116 + }, + { + "epoch": 0.7277612662427426, + "grad_norm": 0.47062583062107144, + "learning_rate": 3.6418151912347354e-06, + "loss": 0.6886, + "step": 42117 + }, + { + "epoch": 0.7277785457561515, + "grad_norm": 0.5460607006941387, + "learning_rate": 3.641383242067402e-06, + "loss": 0.7699, + "step": 42118 + }, + { + "epoch": 0.7277958252695604, + "grad_norm": 1.4483532150517329, + "learning_rate": 3.6409513128157435e-06, + "loss": 0.2374, + "step": 42119 + }, + { + "epoch": 0.7278131047829693, + "grad_norm": 1.8042411453376033, + "learning_rate": 3.6405194034811053e-06, + "loss": 0.4688, + "step": 42120 + }, + { + "epoch": 0.7278303842963783, + "grad_norm": 1.1791473599022608, + "learning_rate": 3.640087514064846e-06, + "loss": 0.5901, + "step": 42121 + }, + { + "epoch": 0.7278476638097872, + "grad_norm": 0.9369603261470039, + "learning_rate": 3.639655644568314e-06, + "loss": 0.3696, + "step": 42122 + }, + { + "epoch": 0.7278649433231961, + "grad_norm": 2.047321012857277, + "learning_rate": 3.6392237949928623e-06, + "loss": 0.4585, + "step": 42123 + }, + { + "epoch": 0.727882222836605, + "grad_norm": 1.175707580347663, + "learning_rate": 3.6387919653398463e-06, + "loss": 0.4945, + "step": 42124 + }, + { + "epoch": 0.7278995023500138, + "grad_norm": 1.0660392774526113, + "learning_rate": 3.6383601556106194e-06, + "loss": 0.3073, + "step": 42125 + }, + { + "epoch": 0.7279167818634227, + "grad_norm": 1.0667611809977495, + "learning_rate": 3.63792836580653e-06, + "loss": 0.3006, + "step": 42126 + }, + { + "epoch": 0.7279340613768316, + "grad_norm": 1.6185962714814137, + "learning_rate": 3.6374965959289355e-06, + "loss": 0.323, + "step": 42127 + }, + { + "epoch": 0.7279513408902405, + "grad_norm": 1.1438025652579955, + "learning_rate": 3.6370648459791812e-06, + "loss": 0.4274, + "step": 42128 + }, + { + "epoch": 0.7279686204036494, + "grad_norm": 1.151073065898324, + "learning_rate": 3.6366331159586245e-06, + "loss": 0.6236, + "step": 42129 + }, + { + "epoch": 0.7279858999170583, + "grad_norm": 1.2505455170304383, + "learning_rate": 3.636201405868619e-06, + "loss": 0.2848, + "step": 42130 + }, + { + "epoch": 0.7280031794304672, + "grad_norm": 0.919749863765005, + "learning_rate": 3.635769715710511e-06, + "loss": 0.3202, + "step": 42131 + }, + { + "epoch": 0.7280204589438761, + "grad_norm": 1.0980738692804428, + "learning_rate": 3.635338045485659e-06, + "loss": 0.2539, + "step": 42132 + }, + { + "epoch": 0.728037738457285, + "grad_norm": 1.6879903739865683, + "learning_rate": 3.634906395195409e-06, + "loss": 0.4252, + "step": 42133 + }, + { + "epoch": 0.7280550179706939, + "grad_norm": 1.696874695086317, + "learning_rate": 3.634474764841116e-06, + "loss": 0.5039, + "step": 42134 + }, + { + "epoch": 0.7280722974841028, + "grad_norm": 1.0348581364252452, + "learning_rate": 3.6340431544241317e-06, + "loss": 0.3107, + "step": 42135 + }, + { + "epoch": 0.7280895769975118, + "grad_norm": 1.2365893221761581, + "learning_rate": 3.6336115639458113e-06, + "loss": 0.2939, + "step": 42136 + }, + { + "epoch": 0.7281068565109207, + "grad_norm": 1.0513760620109276, + "learning_rate": 3.633179993407503e-06, + "loss": 0.3892, + "step": 42137 + }, + { + "epoch": 0.7281241360243296, + "grad_norm": 1.0231563365758602, + "learning_rate": 3.632748442810555e-06, + "loss": 0.3186, + "step": 42138 + }, + { + "epoch": 0.7281414155377385, + "grad_norm": 1.5618191308774652, + "learning_rate": 3.632316912156324e-06, + "loss": 0.4243, + "step": 42139 + }, + { + "epoch": 0.7281586950511474, + "grad_norm": 0.9283464110765458, + "learning_rate": 3.63188540144616e-06, + "loss": 0.3756, + "step": 42140 + }, + { + "epoch": 0.7281759745645563, + "grad_norm": 1.8982900218327223, + "learning_rate": 3.6314539106814183e-06, + "loss": 0.4758, + "step": 42141 + }, + { + "epoch": 0.7281932540779652, + "grad_norm": 1.556690365410938, + "learning_rate": 3.631022439863443e-06, + "loss": 0.4803, + "step": 42142 + }, + { + "epoch": 0.7282105335913741, + "grad_norm": 1.865461198280766, + "learning_rate": 3.630590988993594e-06, + "loss": 0.4363, + "step": 42143 + }, + { + "epoch": 0.728227813104783, + "grad_norm": 0.9820439137542797, + "learning_rate": 3.630159558073214e-06, + "loss": 0.5183, + "step": 42144 + }, + { + "epoch": 0.7282450926181919, + "grad_norm": 1.3633627832402033, + "learning_rate": 3.629728147103658e-06, + "loss": 0.2846, + "step": 42145 + }, + { + "epoch": 0.7282623721316007, + "grad_norm": 0.9252236209055671, + "learning_rate": 3.629296756086278e-06, + "loss": 0.2714, + "step": 42146 + }, + { + "epoch": 0.7282796516450096, + "grad_norm": 1.4640718596472706, + "learning_rate": 3.6288653850224275e-06, + "loss": 0.2485, + "step": 42147 + }, + { + "epoch": 0.7282969311584185, + "grad_norm": 0.8362117082942176, + "learning_rate": 3.628434033913456e-06, + "loss": 0.3257, + "step": 42148 + }, + { + "epoch": 0.7283142106718274, + "grad_norm": 1.2327906210999107, + "learning_rate": 3.628002702760709e-06, + "loss": 0.4158, + "step": 42149 + }, + { + "epoch": 0.7283314901852364, + "grad_norm": 1.1278930651551489, + "learning_rate": 3.6275713915655418e-06, + "loss": 0.4403, + "step": 42150 + }, + { + "epoch": 0.7283487696986453, + "grad_norm": 1.1544477093690746, + "learning_rate": 3.6271401003293062e-06, + "loss": 0.3727, + "step": 42151 + }, + { + "epoch": 0.7283660492120542, + "grad_norm": 1.3149899956088344, + "learning_rate": 3.6267088290533534e-06, + "loss": 0.3688, + "step": 42152 + }, + { + "epoch": 0.7283833287254631, + "grad_norm": 0.9966365919735181, + "learning_rate": 3.6262775777390314e-06, + "loss": 0.2892, + "step": 42153 + }, + { + "epoch": 0.728400608238872, + "grad_norm": 1.9557774067564755, + "learning_rate": 3.6258463463876957e-06, + "loss": 0.4683, + "step": 42154 + }, + { + "epoch": 0.7284178877522809, + "grad_norm": 1.3905018192344938, + "learning_rate": 3.6254151350006895e-06, + "loss": 0.3204, + "step": 42155 + }, + { + "epoch": 0.7284351672656898, + "grad_norm": 1.1647298926841247, + "learning_rate": 3.624983943579372e-06, + "loss": 0.4559, + "step": 42156 + }, + { + "epoch": 0.7284524467790987, + "grad_norm": 0.9880545214648889, + "learning_rate": 3.6245527721250816e-06, + "loss": 0.3121, + "step": 42157 + }, + { + "epoch": 0.7284697262925076, + "grad_norm": 1.966116855139252, + "learning_rate": 3.624121620639184e-06, + "loss": 0.3035, + "step": 42158 + }, + { + "epoch": 0.7284870058059165, + "grad_norm": 1.1276848966170174, + "learning_rate": 3.6236904891230208e-06, + "loss": 0.8368, + "step": 42159 + }, + { + "epoch": 0.7285042853193254, + "grad_norm": 1.0300924700577065, + "learning_rate": 3.623259377577941e-06, + "loss": 0.4425, + "step": 42160 + }, + { + "epoch": 0.7285215648327343, + "grad_norm": 1.6179000389971492, + "learning_rate": 3.622828286005301e-06, + "loss": 0.5441, + "step": 42161 + }, + { + "epoch": 0.7285388443461432, + "grad_norm": 0.6356923084524069, + "learning_rate": 3.6223972144064433e-06, + "loss": 0.2247, + "step": 42162 + }, + { + "epoch": 0.7285561238595522, + "grad_norm": 0.8986025785159345, + "learning_rate": 3.621966162782723e-06, + "loss": 0.4749, + "step": 42163 + }, + { + "epoch": 0.7285734033729611, + "grad_norm": 1.09037183327871, + "learning_rate": 3.6215351311354885e-06, + "loss": 0.4696, + "step": 42164 + }, + { + "epoch": 0.72859068288637, + "grad_norm": 1.4017868978583106, + "learning_rate": 3.6211041194660944e-06, + "loss": 0.3285, + "step": 42165 + }, + { + "epoch": 0.7286079623997789, + "grad_norm": 0.8983146168870304, + "learning_rate": 3.6206731277758842e-06, + "loss": 0.3684, + "step": 42166 + }, + { + "epoch": 0.7286252419131877, + "grad_norm": 0.9743635425702811, + "learning_rate": 3.620242156066214e-06, + "loss": 0.299, + "step": 42167 + }, + { + "epoch": 0.7286425214265966, + "grad_norm": 0.7922193333165741, + "learning_rate": 3.619811204338426e-06, + "loss": 0.4644, + "step": 42168 + }, + { + "epoch": 0.7286598009400055, + "grad_norm": 1.6818040913323842, + "learning_rate": 3.6193802725938743e-06, + "loss": 0.5406, + "step": 42169 + }, + { + "epoch": 0.7286770804534144, + "grad_norm": 1.2274355788883138, + "learning_rate": 3.618949360833912e-06, + "loss": 0.2831, + "step": 42170 + }, + { + "epoch": 0.7286943599668233, + "grad_norm": 2.124201695803873, + "learning_rate": 3.618518469059883e-06, + "loss": 0.4594, + "step": 42171 + }, + { + "epoch": 0.7287116394802322, + "grad_norm": 1.0999525540304678, + "learning_rate": 3.618087597273141e-06, + "loss": 0.3197, + "step": 42172 + }, + { + "epoch": 0.7287289189936411, + "grad_norm": 2.3705094231477504, + "learning_rate": 3.617656745475031e-06, + "loss": 0.483, + "step": 42173 + }, + { + "epoch": 0.72874619850705, + "grad_norm": 1.6832502890078562, + "learning_rate": 3.617225913666905e-06, + "loss": 0.4064, + "step": 42174 + }, + { + "epoch": 0.7287634780204589, + "grad_norm": 0.7993324954195076, + "learning_rate": 3.6167951018501125e-06, + "loss": 0.2117, + "step": 42175 + }, + { + "epoch": 0.7287807575338678, + "grad_norm": 0.5590939314766274, + "learning_rate": 3.616364310026006e-06, + "loss": 0.6086, + "step": 42176 + }, + { + "epoch": 0.7287980370472767, + "grad_norm": 1.3710895284093128, + "learning_rate": 3.6159335381959283e-06, + "loss": 0.3068, + "step": 42177 + }, + { + "epoch": 0.7288153165606857, + "grad_norm": 0.9550480354630374, + "learning_rate": 3.615502786361236e-06, + "loss": 0.6863, + "step": 42178 + }, + { + "epoch": 0.7288325960740946, + "grad_norm": 1.0788562171615146, + "learning_rate": 3.61507205452327e-06, + "loss": 0.6182, + "step": 42179 + }, + { + "epoch": 0.7288498755875035, + "grad_norm": 1.2027368533624947, + "learning_rate": 3.6146413426833836e-06, + "loss": 0.4041, + "step": 42180 + }, + { + "epoch": 0.7288671551009124, + "grad_norm": 1.0013163118228965, + "learning_rate": 3.61421065084293e-06, + "loss": 0.2915, + "step": 42181 + }, + { + "epoch": 0.7288844346143213, + "grad_norm": 1.9780693495085349, + "learning_rate": 3.613779979003249e-06, + "loss": 0.4811, + "step": 42182 + }, + { + "epoch": 0.7289017141277302, + "grad_norm": 1.2616327493947133, + "learning_rate": 3.6133493271656992e-06, + "loss": 0.4983, + "step": 42183 + }, + { + "epoch": 0.7289189936411391, + "grad_norm": 0.8656534286190156, + "learning_rate": 3.612918695331621e-06, + "loss": 0.315, + "step": 42184 + }, + { + "epoch": 0.728936273154548, + "grad_norm": 1.5568654151849461, + "learning_rate": 3.612488083502367e-06, + "loss": 0.419, + "step": 42185 + }, + { + "epoch": 0.7289535526679569, + "grad_norm": 1.2240180393826043, + "learning_rate": 3.612057491679285e-06, + "loss": 0.8942, + "step": 42186 + }, + { + "epoch": 0.7289708321813658, + "grad_norm": 0.9429900980417949, + "learning_rate": 3.6116269198637278e-06, + "loss": 0.3731, + "step": 42187 + }, + { + "epoch": 0.7289881116947746, + "grad_norm": 1.1897680986519947, + "learning_rate": 3.6111963680570415e-06, + "loss": 0.3876, + "step": 42188 + }, + { + "epoch": 0.7290053912081835, + "grad_norm": 0.8466256797951026, + "learning_rate": 3.61076583626057e-06, + "loss": 0.3785, + "step": 42189 + }, + { + "epoch": 0.7290226707215924, + "grad_norm": 2.337823489784211, + "learning_rate": 3.610335324475669e-06, + "loss": 0.2488, + "step": 42190 + }, + { + "epoch": 0.7290399502350013, + "grad_norm": 1.2779897228788855, + "learning_rate": 3.6099048327036757e-06, + "loss": 0.5068, + "step": 42191 + }, + { + "epoch": 0.7290572297484103, + "grad_norm": 0.8896011274675597, + "learning_rate": 3.6094743609459538e-06, + "loss": 0.2079, + "step": 42192 + }, + { + "epoch": 0.7290745092618192, + "grad_norm": 0.7609355150805402, + "learning_rate": 3.6090439092038398e-06, + "loss": 0.214, + "step": 42193 + }, + { + "epoch": 0.7290917887752281, + "grad_norm": 1.7956906933071448, + "learning_rate": 3.60861347747869e-06, + "loss": 0.405, + "step": 42194 + }, + { + "epoch": 0.729109068288637, + "grad_norm": 1.1059582921806237, + "learning_rate": 3.6081830657718443e-06, + "loss": 0.5313, + "step": 42195 + }, + { + "epoch": 0.7291263478020459, + "grad_norm": 1.4037402455426118, + "learning_rate": 3.6077526740846593e-06, + "loss": 0.359, + "step": 42196 + }, + { + "epoch": 0.7291436273154548, + "grad_norm": 2.0641260321602193, + "learning_rate": 3.607322302418471e-06, + "loss": 0.3666, + "step": 42197 + }, + { + "epoch": 0.7291609068288637, + "grad_norm": 0.9975384973682855, + "learning_rate": 3.6068919507746425e-06, + "loss": 0.2858, + "step": 42198 + }, + { + "epoch": 0.7291781863422726, + "grad_norm": 1.578310454688414, + "learning_rate": 3.6064616191545144e-06, + "loss": 0.4099, + "step": 42199 + }, + { + "epoch": 0.7291954658556815, + "grad_norm": 0.8492313068426206, + "learning_rate": 3.606031307559431e-06, + "loss": 0.3579, + "step": 42200 + }, + { + "epoch": 0.7292127453690904, + "grad_norm": 1.3767703930380066, + "learning_rate": 3.6056010159907463e-06, + "loss": 0.3814, + "step": 42201 + }, + { + "epoch": 0.7292300248824993, + "grad_norm": 1.7565154966856782, + "learning_rate": 3.605170744449802e-06, + "loss": 0.4182, + "step": 42202 + }, + { + "epoch": 0.7292473043959082, + "grad_norm": 0.9786867378469062, + "learning_rate": 3.604740492937949e-06, + "loss": 0.3173, + "step": 42203 + }, + { + "epoch": 0.7292645839093171, + "grad_norm": 1.1987765728074604, + "learning_rate": 3.604310261456535e-06, + "loss": 0.4378, + "step": 42204 + }, + { + "epoch": 0.729281863422726, + "grad_norm": 1.3243458091751126, + "learning_rate": 3.6038800500069103e-06, + "loss": 0.6099, + "step": 42205 + }, + { + "epoch": 0.729299142936135, + "grad_norm": 1.2419149284226414, + "learning_rate": 3.6034498585904166e-06, + "loss": 0.3421, + "step": 42206 + }, + { + "epoch": 0.7293164224495439, + "grad_norm": 1.5140336669788574, + "learning_rate": 3.6030196872084065e-06, + "loss": 0.2505, + "step": 42207 + }, + { + "epoch": 0.7293337019629528, + "grad_norm": 1.3574670555194555, + "learning_rate": 3.6025895358622222e-06, + "loss": 0.4345, + "step": 42208 + }, + { + "epoch": 0.7293509814763616, + "grad_norm": 1.0283756379459843, + "learning_rate": 3.602159404553214e-06, + "loss": 0.3153, + "step": 42209 + }, + { + "epoch": 0.7293682609897705, + "grad_norm": 1.0031274244345991, + "learning_rate": 3.6017292932827307e-06, + "loss": 0.2296, + "step": 42210 + }, + { + "epoch": 0.7293855405031794, + "grad_norm": 1.6487768093744934, + "learning_rate": 3.6012992020521154e-06, + "loss": 0.8105, + "step": 42211 + }, + { + "epoch": 0.7294028200165883, + "grad_norm": 1.3725230213412554, + "learning_rate": 3.600869130862721e-06, + "loss": 0.2325, + "step": 42212 + }, + { + "epoch": 0.7294200995299972, + "grad_norm": 1.8543564372868933, + "learning_rate": 3.6004390797158863e-06, + "loss": 0.4147, + "step": 42213 + }, + { + "epoch": 0.7294373790434061, + "grad_norm": 1.5107411021620833, + "learning_rate": 3.6000090486129635e-06, + "loss": 0.2635, + "step": 42214 + }, + { + "epoch": 0.729454658556815, + "grad_norm": 1.35456662957237, + "learning_rate": 3.5995790375552986e-06, + "loss": 0.3759, + "step": 42215 + }, + { + "epoch": 0.7294719380702239, + "grad_norm": 1.1506013714109415, + "learning_rate": 3.5991490465442413e-06, + "loss": 0.3609, + "step": 42216 + }, + { + "epoch": 0.7294892175836328, + "grad_norm": 1.1388803304563064, + "learning_rate": 3.598719075581133e-06, + "loss": 0.3119, + "step": 42217 + }, + { + "epoch": 0.7295064970970417, + "grad_norm": 0.8050671587694648, + "learning_rate": 3.598289124667326e-06, + "loss": 0.2833, + "step": 42218 + }, + { + "epoch": 0.7295237766104506, + "grad_norm": 1.2437861326021293, + "learning_rate": 3.5978591938041608e-06, + "loss": 0.3625, + "step": 42219 + }, + { + "epoch": 0.7295410561238596, + "grad_norm": 1.158070185611097, + "learning_rate": 3.597429282992987e-06, + "loss": 0.5049, + "step": 42220 + }, + { + "epoch": 0.7295583356372685, + "grad_norm": 1.4858711417819397, + "learning_rate": 3.5969993922351554e-06, + "loss": 0.4408, + "step": 42221 + }, + { + "epoch": 0.7295756151506774, + "grad_norm": 1.063247170393719, + "learning_rate": 3.596569521532004e-06, + "loss": 0.3894, + "step": 42222 + }, + { + "epoch": 0.7295928946640863, + "grad_norm": 1.2720105151149173, + "learning_rate": 3.596139670884887e-06, + "loss": 0.5398, + "step": 42223 + }, + { + "epoch": 0.7296101741774952, + "grad_norm": 1.1821322230362614, + "learning_rate": 3.595709840295144e-06, + "loss": 0.4439, + "step": 42224 + }, + { + "epoch": 0.7296274536909041, + "grad_norm": 1.1789149143255715, + "learning_rate": 3.5952800297641245e-06, + "loss": 0.3369, + "step": 42225 + }, + { + "epoch": 0.729644733204313, + "grad_norm": 1.4675643161023404, + "learning_rate": 3.594850239293175e-06, + "loss": 0.2544, + "step": 42226 + }, + { + "epoch": 0.7296620127177219, + "grad_norm": 1.1393320584343236, + "learning_rate": 3.5944204688836446e-06, + "loss": 0.2544, + "step": 42227 + }, + { + "epoch": 0.7296792922311308, + "grad_norm": 2.6690978675899215, + "learning_rate": 3.5939907185368726e-06, + "loss": 0.2909, + "step": 42228 + }, + { + "epoch": 0.7296965717445397, + "grad_norm": 1.7088078893256744, + "learning_rate": 3.5935609882542112e-06, + "loss": 0.2799, + "step": 42229 + }, + { + "epoch": 0.7297138512579486, + "grad_norm": 1.2069261916753171, + "learning_rate": 3.593131278037004e-06, + "loss": 0.4357, + "step": 42230 + }, + { + "epoch": 0.7297311307713574, + "grad_norm": 1.6412750633008344, + "learning_rate": 3.5927015878865914e-06, + "loss": 0.4297, + "step": 42231 + }, + { + "epoch": 0.7297484102847663, + "grad_norm": 1.51725036322887, + "learning_rate": 3.5922719178043285e-06, + "loss": 0.3903, + "step": 42232 + }, + { + "epoch": 0.7297656897981752, + "grad_norm": 1.172590929483933, + "learning_rate": 3.5918422677915554e-06, + "loss": 0.4806, + "step": 42233 + }, + { + "epoch": 0.7297829693115842, + "grad_norm": 2.071457929486234, + "learning_rate": 3.5914126378496215e-06, + "loss": 0.2174, + "step": 42234 + }, + { + "epoch": 0.7298002488249931, + "grad_norm": 1.4636405192685809, + "learning_rate": 3.5909830279798684e-06, + "loss": 0.3756, + "step": 42235 + }, + { + "epoch": 0.729817528338402, + "grad_norm": 0.963917606211561, + "learning_rate": 3.590553438183646e-06, + "loss": 0.2029, + "step": 42236 + }, + { + "epoch": 0.7298348078518109, + "grad_norm": 1.1971050348838992, + "learning_rate": 3.5901238684622906e-06, + "loss": 0.4932, + "step": 42237 + }, + { + "epoch": 0.7298520873652198, + "grad_norm": 0.8981528705903826, + "learning_rate": 3.5896943188171607e-06, + "loss": 0.5629, + "step": 42238 + }, + { + "epoch": 0.7298693668786287, + "grad_norm": 2.5105636634649513, + "learning_rate": 3.5892647892495924e-06, + "loss": 0.2838, + "step": 42239 + }, + { + "epoch": 0.7298866463920376, + "grad_norm": 1.2793128357587575, + "learning_rate": 3.588835279760937e-06, + "loss": 0.2628, + "step": 42240 + }, + { + "epoch": 0.7299039259054465, + "grad_norm": 1.3463927415616734, + "learning_rate": 3.588405790352537e-06, + "loss": 0.547, + "step": 42241 + }, + { + "epoch": 0.7299212054188554, + "grad_norm": 0.9531481265039854, + "learning_rate": 3.5879763210257347e-06, + "loss": 0.3944, + "step": 42242 + }, + { + "epoch": 0.7299384849322643, + "grad_norm": 1.6336366346570825, + "learning_rate": 3.5875468717818773e-06, + "loss": 0.3045, + "step": 42243 + }, + { + "epoch": 0.7299557644456732, + "grad_norm": 0.9380786807464687, + "learning_rate": 3.58711744262231e-06, + "loss": 0.3023, + "step": 42244 + }, + { + "epoch": 0.7299730439590821, + "grad_norm": 1.2875346625137214, + "learning_rate": 3.5866880335483823e-06, + "loss": 0.1524, + "step": 42245 + }, + { + "epoch": 0.729990323472491, + "grad_norm": 1.0522292988593713, + "learning_rate": 3.586258644561431e-06, + "loss": 0.4675, + "step": 42246 + }, + { + "epoch": 0.7300076029859, + "grad_norm": 0.9679298475650056, + "learning_rate": 3.585829275662809e-06, + "loss": 0.2619, + "step": 42247 + }, + { + "epoch": 0.7300248824993089, + "grad_norm": 1.0601163033434369, + "learning_rate": 3.585399926853853e-06, + "loss": 0.2873, + "step": 42248 + }, + { + "epoch": 0.7300421620127178, + "grad_norm": 1.2173973488250052, + "learning_rate": 3.584970598135913e-06, + "loss": 0.3402, + "step": 42249 + }, + { + "epoch": 0.7300594415261267, + "grad_norm": 0.9162929422270386, + "learning_rate": 3.5845412895103347e-06, + "loss": 0.7152, + "step": 42250 + }, + { + "epoch": 0.7300767210395356, + "grad_norm": 1.40488839616968, + "learning_rate": 3.584112000978459e-06, + "loss": 0.3015, + "step": 42251 + }, + { + "epoch": 0.7300940005529444, + "grad_norm": 1.4836202594513033, + "learning_rate": 3.5836827325416345e-06, + "loss": 0.2805, + "step": 42252 + }, + { + "epoch": 0.7301112800663533, + "grad_norm": 1.1325970297158086, + "learning_rate": 3.583253484201199e-06, + "loss": 0.3935, + "step": 42253 + }, + { + "epoch": 0.7301285595797622, + "grad_norm": 1.5750701017452449, + "learning_rate": 3.5828242559585023e-06, + "loss": 0.4158, + "step": 42254 + }, + { + "epoch": 0.7301458390931711, + "grad_norm": 1.2840004981707942, + "learning_rate": 3.582395047814888e-06, + "loss": 0.1686, + "step": 42255 + }, + { + "epoch": 0.73016311860658, + "grad_norm": 0.9498336016654886, + "learning_rate": 3.5819658597717034e-06, + "loss": 0.3291, + "step": 42256 + }, + { + "epoch": 0.7301803981199889, + "grad_norm": 1.251612908070777, + "learning_rate": 3.581536691830285e-06, + "loss": 0.501, + "step": 42257 + }, + { + "epoch": 0.7301976776333978, + "grad_norm": 1.1061231011577992, + "learning_rate": 3.5811075439919864e-06, + "loss": 0.3283, + "step": 42258 + }, + { + "epoch": 0.7302149571468067, + "grad_norm": 1.5279872088302588, + "learning_rate": 3.580678416258142e-06, + "loss": 0.496, + "step": 42259 + }, + { + "epoch": 0.7302322366602156, + "grad_norm": 1.5363908673626498, + "learning_rate": 3.5802493086301016e-06, + "loss": 0.4031, + "step": 42260 + }, + { + "epoch": 0.7302495161736245, + "grad_norm": 1.0990940255785833, + "learning_rate": 3.5798202211092113e-06, + "loss": 0.3186, + "step": 42261 + }, + { + "epoch": 0.7302667956870335, + "grad_norm": 2.720180287450157, + "learning_rate": 3.5793911536968087e-06, + "loss": 0.237, + "step": 42262 + }, + { + "epoch": 0.7302840752004424, + "grad_norm": 1.869255729680853, + "learning_rate": 3.5789621063942438e-06, + "loss": 0.3446, + "step": 42263 + }, + { + "epoch": 0.7303013547138513, + "grad_norm": 1.6235978691583215, + "learning_rate": 3.578533079202855e-06, + "loss": 0.3675, + "step": 42264 + }, + { + "epoch": 0.7303186342272602, + "grad_norm": 1.5393204793043218, + "learning_rate": 3.578104072123988e-06, + "loss": 0.4457, + "step": 42265 + }, + { + "epoch": 0.7303359137406691, + "grad_norm": 1.315905049697607, + "learning_rate": 3.5776750851589883e-06, + "loss": 0.2723, + "step": 42266 + }, + { + "epoch": 0.730353193254078, + "grad_norm": 0.6678158298790314, + "learning_rate": 3.5772461183092e-06, + "loss": 0.2255, + "step": 42267 + }, + { + "epoch": 0.7303704727674869, + "grad_norm": 0.8149974979928346, + "learning_rate": 3.5768171715759624e-06, + "loss": 0.2668, + "step": 42268 + }, + { + "epoch": 0.7303877522808958, + "grad_norm": 0.975848454611561, + "learning_rate": 3.5763882449606246e-06, + "loss": 0.3651, + "step": 42269 + }, + { + "epoch": 0.7304050317943047, + "grad_norm": 0.8779882082666984, + "learning_rate": 3.575959338464524e-06, + "loss": 0.2125, + "step": 42270 + }, + { + "epoch": 0.7304223113077136, + "grad_norm": 1.5315825632685816, + "learning_rate": 3.5755304520890065e-06, + "loss": 0.4452, + "step": 42271 + }, + { + "epoch": 0.7304395908211225, + "grad_norm": 1.0367069347333477, + "learning_rate": 3.5751015858354197e-06, + "loss": 0.2839, + "step": 42272 + }, + { + "epoch": 0.7304568703345313, + "grad_norm": 1.4023513860303518, + "learning_rate": 3.5746727397050996e-06, + "loss": 0.2425, + "step": 42273 + }, + { + "epoch": 0.7304741498479402, + "grad_norm": 1.187783384939316, + "learning_rate": 3.5742439136993968e-06, + "loss": 0.3735, + "step": 42274 + }, + { + "epoch": 0.7304914293613491, + "grad_norm": 1.1908664084544993, + "learning_rate": 3.573815107819646e-06, + "loss": 0.4866, + "step": 42275 + }, + { + "epoch": 0.730508708874758, + "grad_norm": 1.053691341827965, + "learning_rate": 3.573386322067198e-06, + "loss": 0.3471, + "step": 42276 + }, + { + "epoch": 0.730525988388167, + "grad_norm": 1.1283089231139443, + "learning_rate": 3.572957556443386e-06, + "loss": 0.3525, + "step": 42277 + }, + { + "epoch": 0.7305432679015759, + "grad_norm": 1.604070783808501, + "learning_rate": 3.572528810949567e-06, + "loss": 0.4424, + "step": 42278 + }, + { + "epoch": 0.7305605474149848, + "grad_norm": 1.9076696748833077, + "learning_rate": 3.572100085587071e-06, + "loss": 0.3148, + "step": 42279 + }, + { + "epoch": 0.7305778269283937, + "grad_norm": 1.4492822560793068, + "learning_rate": 3.571671380357251e-06, + "loss": 0.3384, + "step": 42280 + }, + { + "epoch": 0.7305951064418026, + "grad_norm": 1.1989095453244187, + "learning_rate": 3.571242695261444e-06, + "loss": 0.4749, + "step": 42281 + }, + { + "epoch": 0.7306123859552115, + "grad_norm": 1.4637133903008945, + "learning_rate": 3.5708140303009907e-06, + "loss": 0.2258, + "step": 42282 + }, + { + "epoch": 0.7306296654686204, + "grad_norm": 1.10988851168957, + "learning_rate": 3.5703853854772365e-06, + "loss": 0.2662, + "step": 42283 + }, + { + "epoch": 0.7306469449820293, + "grad_norm": 1.9440811565034255, + "learning_rate": 3.569956760791523e-06, + "loss": 0.2511, + "step": 42284 + }, + { + "epoch": 0.7306642244954382, + "grad_norm": 1.8054686806308127, + "learning_rate": 3.5695281562451965e-06, + "loss": 0.2936, + "step": 42285 + }, + { + "epoch": 0.7306815040088471, + "grad_norm": 1.01745198130855, + "learning_rate": 3.569099571839595e-06, + "loss": 0.2589, + "step": 42286 + }, + { + "epoch": 0.730698783522256, + "grad_norm": 1.2857104539307571, + "learning_rate": 3.5686710075760643e-06, + "loss": 0.3317, + "step": 42287 + }, + { + "epoch": 0.7307160630356649, + "grad_norm": 1.019051267869516, + "learning_rate": 3.568242463455942e-06, + "loss": 0.3906, + "step": 42288 + }, + { + "epoch": 0.7307333425490738, + "grad_norm": 1.3854476508375049, + "learning_rate": 3.5678139394805733e-06, + "loss": 0.4289, + "step": 42289 + }, + { + "epoch": 0.7307506220624828, + "grad_norm": 0.9679903240662764, + "learning_rate": 3.5673854356512994e-06, + "loss": 0.3885, + "step": 42290 + }, + { + "epoch": 0.7307679015758917, + "grad_norm": 0.5510975865446176, + "learning_rate": 3.566956951969467e-06, + "loss": 0.7324, + "step": 42291 + }, + { + "epoch": 0.7307851810893006, + "grad_norm": 1.0242728129931526, + "learning_rate": 3.5665284884364147e-06, + "loss": 0.2361, + "step": 42292 + }, + { + "epoch": 0.7308024606027095, + "grad_norm": 1.3402167742774342, + "learning_rate": 3.56610004505348e-06, + "loss": 0.4488, + "step": 42293 + }, + { + "epoch": 0.7308197401161183, + "grad_norm": 1.1647094439994143, + "learning_rate": 3.565671621822009e-06, + "loss": 0.4071, + "step": 42294 + }, + { + "epoch": 0.7308370196295272, + "grad_norm": 2.0496728174205656, + "learning_rate": 3.565243218743344e-06, + "loss": 0.4394, + "step": 42295 + }, + { + "epoch": 0.7308542991429361, + "grad_norm": 1.7541979082750236, + "learning_rate": 3.5648148358188296e-06, + "loss": 0.5145, + "step": 42296 + }, + { + "epoch": 0.730871578656345, + "grad_norm": 0.9523139433330439, + "learning_rate": 3.5643864730498e-06, + "loss": 0.3523, + "step": 42297 + }, + { + "epoch": 0.7308888581697539, + "grad_norm": 1.3165972599024112, + "learning_rate": 3.563958130437606e-06, + "loss": 0.3367, + "step": 42298 + }, + { + "epoch": 0.7309061376831628, + "grad_norm": 1.1091615374655348, + "learning_rate": 3.563529807983579e-06, + "loss": 0.3045, + "step": 42299 + }, + { + "epoch": 0.7309234171965717, + "grad_norm": 1.8935859349659732, + "learning_rate": 3.5631015056890673e-06, + "loss": 0.2667, + "step": 42300 + }, + { + "epoch": 0.7309406967099806, + "grad_norm": 2.9474678615182857, + "learning_rate": 3.5626732235554094e-06, + "loss": 0.4828, + "step": 42301 + }, + { + "epoch": 0.7309579762233895, + "grad_norm": 1.6414573873104361, + "learning_rate": 3.562244961583953e-06, + "loss": 0.5751, + "step": 42302 + }, + { + "epoch": 0.7309752557367984, + "grad_norm": 1.3329987169070854, + "learning_rate": 3.5618167197760343e-06, + "loss": 0.408, + "step": 42303 + }, + { + "epoch": 0.7309925352502074, + "grad_norm": 1.3821146986612374, + "learning_rate": 3.5613884981329917e-06, + "loss": 0.3086, + "step": 42304 + }, + { + "epoch": 0.7310098147636163, + "grad_norm": 1.1734790048431258, + "learning_rate": 3.5609602966561683e-06, + "loss": 0.426, + "step": 42305 + }, + { + "epoch": 0.7310270942770252, + "grad_norm": 1.063646894872032, + "learning_rate": 3.5605321153469076e-06, + "loss": 0.4362, + "step": 42306 + }, + { + "epoch": 0.7310443737904341, + "grad_norm": 1.2619700961902003, + "learning_rate": 3.560103954206553e-06, + "loss": 0.3002, + "step": 42307 + }, + { + "epoch": 0.731061653303843, + "grad_norm": 0.8361055148272726, + "learning_rate": 3.5596758132364393e-06, + "loss": 0.4818, + "step": 42308 + }, + { + "epoch": 0.7310789328172519, + "grad_norm": 0.9927206607219553, + "learning_rate": 3.5592476924379126e-06, + "loss": 0.3566, + "step": 42309 + }, + { + "epoch": 0.7310962123306608, + "grad_norm": 1.7026431396212967, + "learning_rate": 3.558819591812309e-06, + "loss": 0.2303, + "step": 42310 + }, + { + "epoch": 0.7311134918440697, + "grad_norm": 1.6869203392866385, + "learning_rate": 3.5583915113609723e-06, + "loss": 0.3341, + "step": 42311 + }, + { + "epoch": 0.7311307713574786, + "grad_norm": 1.1471246097217236, + "learning_rate": 3.5579634510852456e-06, + "loss": 0.1962, + "step": 42312 + }, + { + "epoch": 0.7311480508708875, + "grad_norm": 1.421067041174752, + "learning_rate": 3.5575354109864645e-06, + "loss": 0.6902, + "step": 42313 + }, + { + "epoch": 0.7311653303842964, + "grad_norm": 1.3130952919994283, + "learning_rate": 3.557107391065975e-06, + "loss": 0.3981, + "step": 42314 + }, + { + "epoch": 0.7311826098977052, + "grad_norm": 1.0938915827494435, + "learning_rate": 3.556679391325111e-06, + "loss": 0.3324, + "step": 42315 + }, + { + "epoch": 0.7311998894111141, + "grad_norm": 1.4133289401178306, + "learning_rate": 3.5562514117652203e-06, + "loss": 0.3557, + "step": 42316 + }, + { + "epoch": 0.731217168924523, + "grad_norm": 0.9912069307855718, + "learning_rate": 3.5558234523876335e-06, + "loss": 0.5, + "step": 42317 + }, + { + "epoch": 0.731234448437932, + "grad_norm": 1.6253321319990497, + "learning_rate": 3.5553955131937046e-06, + "loss": 0.3427, + "step": 42318 + }, + { + "epoch": 0.7312517279513409, + "grad_norm": 1.1097673797916778, + "learning_rate": 3.554967594184762e-06, + "loss": 0.2612, + "step": 42319 + }, + { + "epoch": 0.7312690074647498, + "grad_norm": 1.4649411090530189, + "learning_rate": 3.5545396953621546e-06, + "loss": 0.2746, + "step": 42320 + }, + { + "epoch": 0.7312862869781587, + "grad_norm": 0.9985924783172315, + "learning_rate": 3.554111816727216e-06, + "loss": 0.375, + "step": 42321 + }, + { + "epoch": 0.7313035664915676, + "grad_norm": 1.4133412397163234, + "learning_rate": 3.553683958281292e-06, + "loss": 0.264, + "step": 42322 + }, + { + "epoch": 0.7313208460049765, + "grad_norm": 1.505701949278707, + "learning_rate": 3.5532561200257163e-06, + "loss": 0.4755, + "step": 42323 + }, + { + "epoch": 0.7313381255183854, + "grad_norm": 1.1638775669501749, + "learning_rate": 3.5528283019618328e-06, + "loss": 0.2883, + "step": 42324 + }, + { + "epoch": 0.7313554050317943, + "grad_norm": 1.008114271173304, + "learning_rate": 3.552400504090984e-06, + "loss": 0.3852, + "step": 42325 + }, + { + "epoch": 0.7313726845452032, + "grad_norm": 0.9560047615634962, + "learning_rate": 3.551972726414503e-06, + "loss": 0.4279, + "step": 42326 + }, + { + "epoch": 0.7313899640586121, + "grad_norm": 1.232929459324893, + "learning_rate": 3.5515449689337376e-06, + "loss": 0.4441, + "step": 42327 + }, + { + "epoch": 0.731407243572021, + "grad_norm": 1.5553521673910669, + "learning_rate": 3.5511172316500196e-06, + "loss": 0.5298, + "step": 42328 + }, + { + "epoch": 0.7314245230854299, + "grad_norm": 1.1866248754263693, + "learning_rate": 3.5506895145646923e-06, + "loss": 0.3345, + "step": 42329 + }, + { + "epoch": 0.7314418025988388, + "grad_norm": 0.6317948955341679, + "learning_rate": 3.550261817679096e-06, + "loss": 0.6596, + "step": 42330 + }, + { + "epoch": 0.7314590821122477, + "grad_norm": 1.3822456838798798, + "learning_rate": 3.5498341409945734e-06, + "loss": 0.2375, + "step": 42331 + }, + { + "epoch": 0.7314763616256567, + "grad_norm": 1.2468714272383772, + "learning_rate": 3.5494064845124567e-06, + "loss": 0.2391, + "step": 42332 + }, + { + "epoch": 0.7314936411390656, + "grad_norm": 1.438452756607687, + "learning_rate": 3.5489788482340925e-06, + "loss": 0.3897, + "step": 42333 + }, + { + "epoch": 0.7315109206524745, + "grad_norm": 1.078763214161025, + "learning_rate": 3.5485512321608138e-06, + "loss": 0.4798, + "step": 42334 + }, + { + "epoch": 0.7315282001658834, + "grad_norm": 1.0980189174827506, + "learning_rate": 3.548123636293962e-06, + "loss": 0.3412, + "step": 42335 + }, + { + "epoch": 0.7315454796792922, + "grad_norm": 1.1521946942848338, + "learning_rate": 3.547696060634882e-06, + "loss": 0.2773, + "step": 42336 + }, + { + "epoch": 0.7315627591927011, + "grad_norm": 1.2161356813496849, + "learning_rate": 3.5472685051849042e-06, + "loss": 0.3065, + "step": 42337 + }, + { + "epoch": 0.73158003870611, + "grad_norm": 1.4250078770137307, + "learning_rate": 3.5468409699453755e-06, + "loss": 0.4296, + "step": 42338 + }, + { + "epoch": 0.7315973182195189, + "grad_norm": 1.6693295795378271, + "learning_rate": 3.5464134549176276e-06, + "loss": 0.4577, + "step": 42339 + }, + { + "epoch": 0.7316145977329278, + "grad_norm": 0.7945831638976733, + "learning_rate": 3.5459859601030033e-06, + "loss": 0.3975, + "step": 42340 + }, + { + "epoch": 0.7316318772463367, + "grad_norm": 1.4214882572823568, + "learning_rate": 3.545558485502841e-06, + "loss": 0.3378, + "step": 42341 + }, + { + "epoch": 0.7316491567597456, + "grad_norm": 1.1522753972633673, + "learning_rate": 3.545131031118484e-06, + "loss": 0.3896, + "step": 42342 + }, + { + "epoch": 0.7316664362731545, + "grad_norm": 1.5883807737982603, + "learning_rate": 3.5447035969512676e-06, + "loss": 0.3788, + "step": 42343 + }, + { + "epoch": 0.7316837157865634, + "grad_norm": 1.2980549629732943, + "learning_rate": 3.5442761830025265e-06, + "loss": 0.2546, + "step": 42344 + }, + { + "epoch": 0.7317009952999723, + "grad_norm": 0.8592380086152873, + "learning_rate": 3.543848789273602e-06, + "loss": 0.2222, + "step": 42345 + }, + { + "epoch": 0.7317182748133813, + "grad_norm": 1.0148086619984569, + "learning_rate": 3.5434214157658344e-06, + "loss": 0.2514, + "step": 42346 + }, + { + "epoch": 0.7317355543267902, + "grad_norm": 0.9008173593177194, + "learning_rate": 3.5429940624805646e-06, + "loss": 0.4417, + "step": 42347 + }, + { + "epoch": 0.7317528338401991, + "grad_norm": 1.0018155737438263, + "learning_rate": 3.5425667294191255e-06, + "loss": 0.3549, + "step": 42348 + }, + { + "epoch": 0.731770113353608, + "grad_norm": 1.575767308997143, + "learning_rate": 3.5421394165828613e-06, + "loss": 0.365, + "step": 42349 + }, + { + "epoch": 0.7317873928670169, + "grad_norm": 1.5081965799996073, + "learning_rate": 3.5417121239731044e-06, + "loss": 0.3564, + "step": 42350 + }, + { + "epoch": 0.7318046723804258, + "grad_norm": 1.3274542264846956, + "learning_rate": 3.5412848515911948e-06, + "loss": 0.3535, + "step": 42351 + }, + { + "epoch": 0.7318219518938347, + "grad_norm": 1.0872678659499475, + "learning_rate": 3.5408575994384733e-06, + "loss": 0.4331, + "step": 42352 + }, + { + "epoch": 0.7318392314072436, + "grad_norm": 1.4157747540881664, + "learning_rate": 3.5404303675162786e-06, + "loss": 0.3059, + "step": 42353 + }, + { + "epoch": 0.7318565109206525, + "grad_norm": 1.2193239069333583, + "learning_rate": 3.5400031558259474e-06, + "loss": 0.67, + "step": 42354 + }, + { + "epoch": 0.7318737904340614, + "grad_norm": 0.9169488443143442, + "learning_rate": 3.5395759643688134e-06, + "loss": 0.1633, + "step": 42355 + }, + { + "epoch": 0.7318910699474703, + "grad_norm": 1.42343548846604, + "learning_rate": 3.5391487931462233e-06, + "loss": 0.4075, + "step": 42356 + }, + { + "epoch": 0.7319083494608791, + "grad_norm": 0.9379434263385772, + "learning_rate": 3.538721642159502e-06, + "loss": 0.318, + "step": 42357 + }, + { + "epoch": 0.731925628974288, + "grad_norm": 2.085137626774744, + "learning_rate": 3.538294511410003e-06, + "loss": 0.2627, + "step": 42358 + }, + { + "epoch": 0.7319429084876969, + "grad_norm": 0.83920136068902, + "learning_rate": 3.5378674008990534e-06, + "loss": 0.2328, + "step": 42359 + }, + { + "epoch": 0.7319601880011058, + "grad_norm": 1.3822855402336993, + "learning_rate": 3.5374403106279963e-06, + "loss": 0.1935, + "step": 42360 + }, + { + "epoch": 0.7319774675145148, + "grad_norm": 0.9710929227547744, + "learning_rate": 3.537013240598165e-06, + "loss": 0.5079, + "step": 42361 + }, + { + "epoch": 0.7319947470279237, + "grad_norm": 1.106679580140678, + "learning_rate": 3.5365861908109034e-06, + "loss": 0.381, + "step": 42362 + }, + { + "epoch": 0.7320120265413326, + "grad_norm": 1.2848792268045015, + "learning_rate": 3.536159161267537e-06, + "loss": 0.2196, + "step": 42363 + }, + { + "epoch": 0.7320293060547415, + "grad_norm": 1.272241706700284, + "learning_rate": 3.5357321519694188e-06, + "loss": 0.5009, + "step": 42364 + }, + { + "epoch": 0.7320465855681504, + "grad_norm": 1.239070273998242, + "learning_rate": 3.535305162917878e-06, + "loss": 0.306, + "step": 42365 + }, + { + "epoch": 0.7320638650815593, + "grad_norm": 1.3793467995116304, + "learning_rate": 3.534878194114251e-06, + "loss": 0.4735, + "step": 42366 + }, + { + "epoch": 0.7320811445949682, + "grad_norm": 1.0563401338853111, + "learning_rate": 3.534451245559879e-06, + "loss": 0.3641, + "step": 42367 + }, + { + "epoch": 0.7320984241083771, + "grad_norm": 1.1998080720111837, + "learning_rate": 3.534024317256094e-06, + "loss": 0.2853, + "step": 42368 + }, + { + "epoch": 0.732115703621786, + "grad_norm": 1.297297748364896, + "learning_rate": 3.533597409204236e-06, + "loss": 0.2753, + "step": 42369 + }, + { + "epoch": 0.7321329831351949, + "grad_norm": 1.0684781273351274, + "learning_rate": 3.5331705214056424e-06, + "loss": 0.2342, + "step": 42370 + }, + { + "epoch": 0.7321502626486038, + "grad_norm": 0.8676865234697465, + "learning_rate": 3.5327436538616535e-06, + "loss": 0.3088, + "step": 42371 + }, + { + "epoch": 0.7321675421620127, + "grad_norm": 1.2532750558113943, + "learning_rate": 3.532316806573599e-06, + "loss": 0.3402, + "step": 42372 + }, + { + "epoch": 0.7321848216754216, + "grad_norm": 1.5102833977005914, + "learning_rate": 3.5318899795428242e-06, + "loss": 0.3991, + "step": 42373 + }, + { + "epoch": 0.7322021011888306, + "grad_norm": 1.1782975573468069, + "learning_rate": 3.5314631727706583e-06, + "loss": 0.3914, + "step": 42374 + }, + { + "epoch": 0.7322193807022395, + "grad_norm": 1.0018456097415813, + "learning_rate": 3.5310363862584406e-06, + "loss": 0.4057, + "step": 42375 + }, + { + "epoch": 0.7322366602156484, + "grad_norm": 1.3430228294762405, + "learning_rate": 3.530609620007512e-06, + "loss": 0.4316, + "step": 42376 + }, + { + "epoch": 0.7322539397290573, + "grad_norm": 1.170065820309064, + "learning_rate": 3.5301828740192023e-06, + "loss": 0.644, + "step": 42377 + }, + { + "epoch": 0.7322712192424662, + "grad_norm": 1.1432563141026955, + "learning_rate": 3.529756148294855e-06, + "loss": 0.0951, + "step": 42378 + }, + { + "epoch": 0.732288498755875, + "grad_norm": 0.9581325154388841, + "learning_rate": 3.5293294428358005e-06, + "loss": 0.4129, + "step": 42379 + }, + { + "epoch": 0.7323057782692839, + "grad_norm": 1.0436693044745664, + "learning_rate": 3.528902757643378e-06, + "loss": 0.2794, + "step": 42380 + }, + { + "epoch": 0.7323230577826928, + "grad_norm": 1.8811982710099076, + "learning_rate": 3.528476092718922e-06, + "loss": 0.3735, + "step": 42381 + }, + { + "epoch": 0.7323403372961017, + "grad_norm": 0.9205525546714761, + "learning_rate": 3.5280494480637762e-06, + "loss": 0.4019, + "step": 42382 + }, + { + "epoch": 0.7323576168095106, + "grad_norm": 1.4820725871283678, + "learning_rate": 3.5276228236792664e-06, + "loss": 0.2217, + "step": 42383 + }, + { + "epoch": 0.7323748963229195, + "grad_norm": 1.2956070395252977, + "learning_rate": 3.5271962195667377e-06, + "loss": 0.3004, + "step": 42384 + }, + { + "epoch": 0.7323921758363284, + "grad_norm": 1.2904296447779202, + "learning_rate": 3.5267696357275195e-06, + "loss": 0.2804, + "step": 42385 + }, + { + "epoch": 0.7324094553497373, + "grad_norm": 1.0511568528116602, + "learning_rate": 3.52634307216295e-06, + "loss": 0.4119, + "step": 42386 + }, + { + "epoch": 0.7324267348631462, + "grad_norm": 0.5418747321858596, + "learning_rate": 3.5259165288743703e-06, + "loss": 0.461, + "step": 42387 + }, + { + "epoch": 0.7324440143765552, + "grad_norm": 1.064088391922928, + "learning_rate": 3.525490005863108e-06, + "loss": 0.4371, + "step": 42388 + }, + { + "epoch": 0.7324612938899641, + "grad_norm": 1.801055851699487, + "learning_rate": 3.525063503130507e-06, + "loss": 0.4245, + "step": 42389 + }, + { + "epoch": 0.732478573403373, + "grad_norm": 1.545733049593475, + "learning_rate": 3.5246370206778946e-06, + "loss": 0.2737, + "step": 42390 + }, + { + "epoch": 0.7324958529167819, + "grad_norm": 1.50856498347068, + "learning_rate": 3.524210558506612e-06, + "loss": 0.4406, + "step": 42391 + }, + { + "epoch": 0.7325131324301908, + "grad_norm": 0.821569459166589, + "learning_rate": 3.5237841166179944e-06, + "loss": 0.7345, + "step": 42392 + }, + { + "epoch": 0.7325304119435997, + "grad_norm": 1.4321872594161442, + "learning_rate": 3.5233576950133797e-06, + "loss": 0.2345, + "step": 42393 + }, + { + "epoch": 0.7325476914570086, + "grad_norm": 1.2424753458589504, + "learning_rate": 3.5229312936941017e-06, + "loss": 0.4011, + "step": 42394 + }, + { + "epoch": 0.7325649709704175, + "grad_norm": 0.9758884912651533, + "learning_rate": 3.5225049126614906e-06, + "loss": 0.3958, + "step": 42395 + }, + { + "epoch": 0.7325822504838264, + "grad_norm": 0.7626168589039417, + "learning_rate": 3.522078551916891e-06, + "loss": 0.3616, + "step": 42396 + }, + { + "epoch": 0.7325995299972353, + "grad_norm": 0.9452240897764015, + "learning_rate": 3.521652211461626e-06, + "loss": 0.3763, + "step": 42397 + }, + { + "epoch": 0.7326168095106442, + "grad_norm": 1.4668037500604625, + "learning_rate": 3.5212258912970467e-06, + "loss": 0.3292, + "step": 42398 + }, + { + "epoch": 0.7326340890240531, + "grad_norm": 1.639090964781941, + "learning_rate": 3.5207995914244754e-06, + "loss": 0.5151, + "step": 42399 + }, + { + "epoch": 0.7326513685374619, + "grad_norm": 1.4860700301872165, + "learning_rate": 3.5203733118452564e-06, + "loss": 0.3786, + "step": 42400 + }, + { + "epoch": 0.7326686480508708, + "grad_norm": 1.276753568726169, + "learning_rate": 3.519947052560717e-06, + "loss": 0.254, + "step": 42401 + }, + { + "epoch": 0.7326859275642797, + "grad_norm": 1.1031471579530292, + "learning_rate": 3.519520813572199e-06, + "loss": 0.5362, + "step": 42402 + }, + { + "epoch": 0.7327032070776887, + "grad_norm": 1.2923699419412675, + "learning_rate": 3.5190945948810273e-06, + "loss": 0.4526, + "step": 42403 + }, + { + "epoch": 0.7327204865910976, + "grad_norm": 1.9721619894362574, + "learning_rate": 3.5186683964885517e-06, + "loss": 0.6094, + "step": 42404 + }, + { + "epoch": 0.7327377661045065, + "grad_norm": 1.2786597936484787, + "learning_rate": 3.5182422183960985e-06, + "loss": 0.299, + "step": 42405 + }, + { + "epoch": 0.7327550456179154, + "grad_norm": 1.1891186491673678, + "learning_rate": 3.5178160606050003e-06, + "loss": 0.2602, + "step": 42406 + }, + { + "epoch": 0.7327723251313243, + "grad_norm": 1.6752892052289763, + "learning_rate": 3.5173899231165974e-06, + "loss": 0.2814, + "step": 42407 + }, + { + "epoch": 0.7327896046447332, + "grad_norm": 1.0118071415281849, + "learning_rate": 3.5169638059322197e-06, + "loss": 0.214, + "step": 42408 + }, + { + "epoch": 0.7328068841581421, + "grad_norm": 1.2621294047310472, + "learning_rate": 3.516537709053204e-06, + "loss": 0.4787, + "step": 42409 + }, + { + "epoch": 0.732824163671551, + "grad_norm": 0.9065866276135031, + "learning_rate": 3.5161116324808843e-06, + "loss": 0.379, + "step": 42410 + }, + { + "epoch": 0.7328414431849599, + "grad_norm": 1.083916269802326, + "learning_rate": 3.515685576216599e-06, + "loss": 0.3646, + "step": 42411 + }, + { + "epoch": 0.7328587226983688, + "grad_norm": 1.041294492123008, + "learning_rate": 3.5152595402616762e-06, + "loss": 0.4171, + "step": 42412 + }, + { + "epoch": 0.7328760022117777, + "grad_norm": 0.9274608132953279, + "learning_rate": 3.514833524617456e-06, + "loss": 0.2774, + "step": 42413 + }, + { + "epoch": 0.7328932817251866, + "grad_norm": 1.2672816645209186, + "learning_rate": 3.5144075292852674e-06, + "loss": 0.286, + "step": 42414 + }, + { + "epoch": 0.7329105612385955, + "grad_norm": 1.2669908918639918, + "learning_rate": 3.513981554266447e-06, + "loss": 0.2644, + "step": 42415 + }, + { + "epoch": 0.7329278407520045, + "grad_norm": 1.0241820639961556, + "learning_rate": 3.5135555995623326e-06, + "loss": 0.4116, + "step": 42416 + }, + { + "epoch": 0.7329451202654134, + "grad_norm": 0.8625696546047846, + "learning_rate": 3.513129665174252e-06, + "loss": 0.3188, + "step": 42417 + }, + { + "epoch": 0.7329623997788223, + "grad_norm": 1.721146677274721, + "learning_rate": 3.5127037511035455e-06, + "loss": 0.496, + "step": 42418 + }, + { + "epoch": 0.7329796792922312, + "grad_norm": 1.2253654000962135, + "learning_rate": 3.5122778573515414e-06, + "loss": 0.3046, + "step": 42419 + }, + { + "epoch": 0.7329969588056401, + "grad_norm": 1.2956223484990201, + "learning_rate": 3.511851983919575e-06, + "loss": 0.4459, + "step": 42420 + }, + { + "epoch": 0.7330142383190489, + "grad_norm": 1.2858467608126425, + "learning_rate": 3.5114261308089814e-06, + "loss": 0.2133, + "step": 42421 + }, + { + "epoch": 0.7330315178324578, + "grad_norm": 1.50651556497037, + "learning_rate": 3.511000298021098e-06, + "loss": 0.435, + "step": 42422 + }, + { + "epoch": 0.7330487973458667, + "grad_norm": 2.1530787688279966, + "learning_rate": 3.510574485557251e-06, + "loss": 0.325, + "step": 42423 + }, + { + "epoch": 0.7330660768592756, + "grad_norm": 1.4982058618512555, + "learning_rate": 3.510148693418781e-06, + "loss": 0.4794, + "step": 42424 + }, + { + "epoch": 0.7330833563726845, + "grad_norm": 1.4030848432666987, + "learning_rate": 3.509722921607016e-06, + "loss": 0.3682, + "step": 42425 + }, + { + "epoch": 0.7331006358860934, + "grad_norm": 2.016002716070592, + "learning_rate": 3.5092971701232913e-06, + "loss": 0.3563, + "step": 42426 + }, + { + "epoch": 0.7331179153995023, + "grad_norm": 0.8500378824047535, + "learning_rate": 3.508871438968945e-06, + "loss": 0.336, + "step": 42427 + }, + { + "epoch": 0.7331351949129112, + "grad_norm": 1.2703169606473566, + "learning_rate": 3.5084457281453034e-06, + "loss": 0.1126, + "step": 42428 + }, + { + "epoch": 0.7331524744263201, + "grad_norm": 1.5967047456040604, + "learning_rate": 3.5080200376537064e-06, + "loss": 0.2662, + "step": 42429 + }, + { + "epoch": 0.733169753939729, + "grad_norm": 1.1458103592267959, + "learning_rate": 3.5075943674954805e-06, + "loss": 0.3338, + "step": 42430 + }, + { + "epoch": 0.733187033453138, + "grad_norm": 0.8518973085226881, + "learning_rate": 3.5071687176719614e-06, + "loss": 0.3806, + "step": 42431 + }, + { + "epoch": 0.7332043129665469, + "grad_norm": 1.0611457612531174, + "learning_rate": 3.5067430881844853e-06, + "loss": 0.656, + "step": 42432 + }, + { + "epoch": 0.7332215924799558, + "grad_norm": 1.5701023467101032, + "learning_rate": 3.5063174790343855e-06, + "loss": 0.4274, + "step": 42433 + }, + { + "epoch": 0.7332388719933647, + "grad_norm": 1.0878519252507204, + "learning_rate": 3.50589189022299e-06, + "loss": 0.9853, + "step": 42434 + }, + { + "epoch": 0.7332561515067736, + "grad_norm": 1.6247172022124492, + "learning_rate": 3.505466321751637e-06, + "loss": 0.416, + "step": 42435 + }, + { + "epoch": 0.7332734310201825, + "grad_norm": 1.236487392993638, + "learning_rate": 3.5050407736216585e-06, + "loss": 0.409, + "step": 42436 + }, + { + "epoch": 0.7332907105335914, + "grad_norm": 1.3450378770647868, + "learning_rate": 3.5046152458343774e-06, + "loss": 0.3451, + "step": 42437 + }, + { + "epoch": 0.7333079900470003, + "grad_norm": 1.5248210719306559, + "learning_rate": 3.504189738391144e-06, + "loss": 0.4495, + "step": 42438 + }, + { + "epoch": 0.7333252695604092, + "grad_norm": 1.2380193632827585, + "learning_rate": 3.503764251293277e-06, + "loss": 0.4284, + "step": 42439 + }, + { + "epoch": 0.7333425490738181, + "grad_norm": 1.4207805618352334, + "learning_rate": 3.5033387845421172e-06, + "loss": 0.4416, + "step": 42440 + }, + { + "epoch": 0.733359828587227, + "grad_norm": 0.8959128408198271, + "learning_rate": 3.502913338138991e-06, + "loss": 0.4152, + "step": 42441 + }, + { + "epoch": 0.7333771081006358, + "grad_norm": 1.4834820690001336, + "learning_rate": 3.5024879120852383e-06, + "loss": 0.406, + "step": 42442 + }, + { + "epoch": 0.7333943876140447, + "grad_norm": 0.882990210815665, + "learning_rate": 3.50206250638218e-06, + "loss": 0.4399, + "step": 42443 + }, + { + "epoch": 0.7334116671274536, + "grad_norm": 1.1909077683505032, + "learning_rate": 3.501637121031162e-06, + "loss": 0.3081, + "step": 42444 + }, + { + "epoch": 0.7334289466408626, + "grad_norm": 1.5141343174885122, + "learning_rate": 3.501211756033508e-06, + "loss": 0.429, + "step": 42445 + }, + { + "epoch": 0.7334462261542715, + "grad_norm": 1.729170945166859, + "learning_rate": 3.500786411390554e-06, + "loss": 0.2612, + "step": 42446 + }, + { + "epoch": 0.7334635056676804, + "grad_norm": 1.7080807796247173, + "learning_rate": 3.500361087103633e-06, + "loss": 0.3168, + "step": 42447 + }, + { + "epoch": 0.7334807851810893, + "grad_norm": 1.4913498950608512, + "learning_rate": 3.499935783174071e-06, + "loss": 0.2747, + "step": 42448 + }, + { + "epoch": 0.7334980646944982, + "grad_norm": 1.1376111434377587, + "learning_rate": 3.4995104996032037e-06, + "loss": 0.4874, + "step": 42449 + }, + { + "epoch": 0.7335153442079071, + "grad_norm": 1.1347936081620114, + "learning_rate": 3.4990852363923646e-06, + "loss": 0.483, + "step": 42450 + }, + { + "epoch": 0.733532623721316, + "grad_norm": 1.1316861957693838, + "learning_rate": 3.498659993542887e-06, + "loss": 0.5747, + "step": 42451 + }, + { + "epoch": 0.7335499032347249, + "grad_norm": 0.5901368871351104, + "learning_rate": 3.498234771056097e-06, + "loss": 0.6276, + "step": 42452 + }, + { + "epoch": 0.7335671827481338, + "grad_norm": 1.5711219812040425, + "learning_rate": 3.4978095689333335e-06, + "loss": 0.3262, + "step": 42453 + }, + { + "epoch": 0.7335844622615427, + "grad_norm": 1.3070475289049215, + "learning_rate": 3.4973843871759215e-06, + "loss": 0.4371, + "step": 42454 + }, + { + "epoch": 0.7336017417749516, + "grad_norm": 1.1195609186198834, + "learning_rate": 3.4969592257851957e-06, + "loss": 0.4687, + "step": 42455 + }, + { + "epoch": 0.7336190212883605, + "grad_norm": 0.8294869740125781, + "learning_rate": 3.496534084762491e-06, + "loss": 0.5187, + "step": 42456 + }, + { + "epoch": 0.7336363008017694, + "grad_norm": 0.9583567692102075, + "learning_rate": 3.4961089641091315e-06, + "loss": 0.3569, + "step": 42457 + }, + { + "epoch": 0.7336535803151784, + "grad_norm": 1.9379148296219464, + "learning_rate": 3.495683863826458e-06, + "loss": 0.4794, + "step": 42458 + }, + { + "epoch": 0.7336708598285873, + "grad_norm": 2.1474617535688325, + "learning_rate": 3.4952587839157924e-06, + "loss": 0.3141, + "step": 42459 + }, + { + "epoch": 0.7336881393419962, + "grad_norm": 0.8138738650698516, + "learning_rate": 3.494833724378471e-06, + "loss": 0.3579, + "step": 42460 + }, + { + "epoch": 0.7337054188554051, + "grad_norm": 0.8989904873762727, + "learning_rate": 3.4944086852158253e-06, + "loss": 0.5194, + "step": 42461 + }, + { + "epoch": 0.733722698368814, + "grad_norm": 1.4174168619051153, + "learning_rate": 3.4939836664291884e-06, + "loss": 0.2609, + "step": 42462 + }, + { + "epoch": 0.7337399778822228, + "grad_norm": 1.387018850311122, + "learning_rate": 3.493558668019886e-06, + "loss": 0.4168, + "step": 42463 + }, + { + "epoch": 0.7337572573956317, + "grad_norm": 1.3123764835900242, + "learning_rate": 3.4931336899892563e-06, + "loss": 0.4265, + "step": 42464 + }, + { + "epoch": 0.7337745369090406, + "grad_norm": 1.0919607607587285, + "learning_rate": 3.492708732338622e-06, + "loss": 0.3776, + "step": 42465 + }, + { + "epoch": 0.7337918164224495, + "grad_norm": 1.3287866292888055, + "learning_rate": 3.49228379506932e-06, + "loss": 0.306, + "step": 42466 + }, + { + "epoch": 0.7338090959358584, + "grad_norm": 1.2681064002183207, + "learning_rate": 3.4918588781826814e-06, + "loss": 0.3571, + "step": 42467 + }, + { + "epoch": 0.7338263754492673, + "grad_norm": 1.4510623112639094, + "learning_rate": 3.4914339816800337e-06, + "loss": 0.3927, + "step": 42468 + }, + { + "epoch": 0.7338436549626762, + "grad_norm": 0.9543091659078327, + "learning_rate": 3.4910091055627115e-06, + "loss": 0.3158, + "step": 42469 + }, + { + "epoch": 0.7338609344760851, + "grad_norm": 0.9862501844602725, + "learning_rate": 3.4905842498320396e-06, + "loss": 0.4324, + "step": 42470 + }, + { + "epoch": 0.733878213989494, + "grad_norm": 1.2585056862739863, + "learning_rate": 3.4901594144893534e-06, + "loss": 0.4134, + "step": 42471 + }, + { + "epoch": 0.733895493502903, + "grad_norm": 0.6587017150244899, + "learning_rate": 3.4897345995359835e-06, + "loss": 0.7138, + "step": 42472 + }, + { + "epoch": 0.7339127730163119, + "grad_norm": 1.032572515831716, + "learning_rate": 3.4893098049732622e-06, + "loss": 0.2754, + "step": 42473 + }, + { + "epoch": 0.7339300525297208, + "grad_norm": 1.565340946884452, + "learning_rate": 3.4888850308025136e-06, + "loss": 0.2754, + "step": 42474 + }, + { + "epoch": 0.7339473320431297, + "grad_norm": 1.5202809300906037, + "learning_rate": 3.488460277025075e-06, + "loss": 0.3073, + "step": 42475 + }, + { + "epoch": 0.7339646115565386, + "grad_norm": 1.461186546655049, + "learning_rate": 3.488035543642272e-06, + "loss": 0.3328, + "step": 42476 + }, + { + "epoch": 0.7339818910699475, + "grad_norm": 2.0447307264836825, + "learning_rate": 3.4876108306554345e-06, + "loss": 0.2658, + "step": 42477 + }, + { + "epoch": 0.7339991705833564, + "grad_norm": 1.0029487270389035, + "learning_rate": 3.4871861380659e-06, + "loss": 0.3295, + "step": 42478 + }, + { + "epoch": 0.7340164500967653, + "grad_norm": 1.4245141545725752, + "learning_rate": 3.486761465874988e-06, + "loss": 0.4102, + "step": 42479 + }, + { + "epoch": 0.7340337296101742, + "grad_norm": 0.7253346306044073, + "learning_rate": 3.486336814084039e-06, + "loss": 0.5464, + "step": 42480 + }, + { + "epoch": 0.7340510091235831, + "grad_norm": 1.0249319144442037, + "learning_rate": 3.485912182694374e-06, + "loss": 0.3546, + "step": 42481 + }, + { + "epoch": 0.734068288636992, + "grad_norm": 1.3000503265534267, + "learning_rate": 3.485487571707332e-06, + "loss": 0.3676, + "step": 42482 + }, + { + "epoch": 0.7340855681504009, + "grad_norm": 1.018186993427193, + "learning_rate": 3.48506298112423e-06, + "loss": 0.3834, + "step": 42483 + }, + { + "epoch": 0.7341028476638097, + "grad_norm": 0.8675910719397025, + "learning_rate": 3.4846384109464126e-06, + "loss": 0.2251, + "step": 42484 + }, + { + "epoch": 0.7341201271772186, + "grad_norm": 0.6970503887499577, + "learning_rate": 3.4842138611751997e-06, + "loss": 0.5674, + "step": 42485 + }, + { + "epoch": 0.7341374066906275, + "grad_norm": 1.7650359646433034, + "learning_rate": 3.4837893318119276e-06, + "loss": 0.4663, + "step": 42486 + }, + { + "epoch": 0.7341546862040365, + "grad_norm": 1.5159019192847145, + "learning_rate": 3.483364822857922e-06, + "loss": 0.4051, + "step": 42487 + }, + { + "epoch": 0.7341719657174454, + "grad_norm": 1.4903069013388266, + "learning_rate": 3.4829403343145107e-06, + "loss": 0.4403, + "step": 42488 + }, + { + "epoch": 0.7341892452308543, + "grad_norm": 0.9319431872859859, + "learning_rate": 3.482515866183025e-06, + "loss": 0.4669, + "step": 42489 + }, + { + "epoch": 0.7342065247442632, + "grad_norm": 1.9181869163910243, + "learning_rate": 3.4820914184647946e-06, + "loss": 0.3631, + "step": 42490 + }, + { + "epoch": 0.7342238042576721, + "grad_norm": 0.9588431133582611, + "learning_rate": 3.481666991161152e-06, + "loss": 0.4864, + "step": 42491 + }, + { + "epoch": 0.734241083771081, + "grad_norm": 1.7378001625179913, + "learning_rate": 3.481242584273422e-06, + "loss": 0.3223, + "step": 42492 + }, + { + "epoch": 0.7342583632844899, + "grad_norm": 1.6529484550534261, + "learning_rate": 3.4808181978029377e-06, + "loss": 0.3286, + "step": 42493 + }, + { + "epoch": 0.7342756427978988, + "grad_norm": 0.6431557706631711, + "learning_rate": 3.4803938317510234e-06, + "loss": 0.2409, + "step": 42494 + }, + { + "epoch": 0.7342929223113077, + "grad_norm": 1.1956193538159898, + "learning_rate": 3.4799694861190113e-06, + "loss": 0.3097, + "step": 42495 + }, + { + "epoch": 0.7343102018247166, + "grad_norm": 1.3216688095082623, + "learning_rate": 3.4795451609082296e-06, + "loss": 0.3871, + "step": 42496 + }, + { + "epoch": 0.7343274813381255, + "grad_norm": 0.739164914441924, + "learning_rate": 3.479120856120012e-06, + "loss": 0.2428, + "step": 42497 + }, + { + "epoch": 0.7343447608515344, + "grad_norm": 1.6404964101456332, + "learning_rate": 3.478696571755683e-06, + "loss": 0.5399, + "step": 42498 + }, + { + "epoch": 0.7343620403649433, + "grad_norm": 1.2332278158942063, + "learning_rate": 3.4782723078165692e-06, + "loss": 0.3394, + "step": 42499 + }, + { + "epoch": 0.7343793198783523, + "grad_norm": 1.395347878690162, + "learning_rate": 3.4778480643040025e-06, + "loss": 0.289, + "step": 42500 + }, + { + "epoch": 0.7343965993917612, + "grad_norm": 2.237744563444374, + "learning_rate": 3.4774238412193095e-06, + "loss": 0.2673, + "step": 42501 + }, + { + "epoch": 0.7344138789051701, + "grad_norm": 1.524825550423348, + "learning_rate": 3.476999638563826e-06, + "loss": 0.2827, + "step": 42502 + }, + { + "epoch": 0.734431158418579, + "grad_norm": 1.3801436671117588, + "learning_rate": 3.476575456338871e-06, + "loss": 0.3482, + "step": 42503 + }, + { + "epoch": 0.7344484379319879, + "grad_norm": 1.1910332948282323, + "learning_rate": 3.476151294545781e-06, + "loss": 0.4033, + "step": 42504 + }, + { + "epoch": 0.7344657174453968, + "grad_norm": 1.3212552988493136, + "learning_rate": 3.4757271531858773e-06, + "loss": 0.332, + "step": 42505 + }, + { + "epoch": 0.7344829969588056, + "grad_norm": 0.8899065647309944, + "learning_rate": 3.475303032260493e-06, + "loss": 0.3849, + "step": 42506 + }, + { + "epoch": 0.7345002764722145, + "grad_norm": 0.7090442268092408, + "learning_rate": 3.474878931770954e-06, + "loss": 0.2053, + "step": 42507 + }, + { + "epoch": 0.7345175559856234, + "grad_norm": 0.8824587667458907, + "learning_rate": 3.4744548517185937e-06, + "loss": 0.3199, + "step": 42508 + }, + { + "epoch": 0.7345348354990323, + "grad_norm": 1.1245603517131644, + "learning_rate": 3.4740307921047377e-06, + "loss": 0.3439, + "step": 42509 + }, + { + "epoch": 0.7345521150124412, + "grad_norm": 1.4115268504406724, + "learning_rate": 3.4736067529307092e-06, + "loss": 0.4507, + "step": 42510 + }, + { + "epoch": 0.7345693945258501, + "grad_norm": 1.4066119544268498, + "learning_rate": 3.473182734197841e-06, + "loss": 0.3675, + "step": 42511 + }, + { + "epoch": 0.734586674039259, + "grad_norm": 1.1532573543417197, + "learning_rate": 3.47275873590746e-06, + "loss": 0.3924, + "step": 42512 + }, + { + "epoch": 0.7346039535526679, + "grad_norm": 1.195838319869062, + "learning_rate": 3.4723347580608977e-06, + "loss": 0.2422, + "step": 42513 + }, + { + "epoch": 0.7346212330660769, + "grad_norm": 1.946499551233994, + "learning_rate": 3.471910800659476e-06, + "loss": 0.338, + "step": 42514 + }, + { + "epoch": 0.7346385125794858, + "grad_norm": 1.7300468398828597, + "learning_rate": 3.4714868637045294e-06, + "loss": 0.4903, + "step": 42515 + }, + { + "epoch": 0.7346557920928947, + "grad_norm": 1.5632366861495561, + "learning_rate": 3.471062947197379e-06, + "loss": 0.443, + "step": 42516 + }, + { + "epoch": 0.7346730716063036, + "grad_norm": 0.8934100877371031, + "learning_rate": 3.470639051139354e-06, + "loss": 0.222, + "step": 42517 + }, + { + "epoch": 0.7346903511197125, + "grad_norm": 1.9185142617775641, + "learning_rate": 3.470215175531788e-06, + "loss": 0.2396, + "step": 42518 + }, + { + "epoch": 0.7347076306331214, + "grad_norm": 1.11332888069113, + "learning_rate": 3.4697913203760014e-06, + "loss": 0.4239, + "step": 42519 + }, + { + "epoch": 0.7347249101465303, + "grad_norm": 0.948655481646467, + "learning_rate": 3.4693674856733284e-06, + "loss": 0.3379, + "step": 42520 + }, + { + "epoch": 0.7347421896599392, + "grad_norm": 1.0547259970503742, + "learning_rate": 3.4689436714250878e-06, + "loss": 0.321, + "step": 42521 + }, + { + "epoch": 0.7347594691733481, + "grad_norm": 2.4485060904619296, + "learning_rate": 3.4685198776326166e-06, + "loss": 0.262, + "step": 42522 + }, + { + "epoch": 0.734776748686757, + "grad_norm": 0.8099095440650246, + "learning_rate": 3.4680961042972304e-06, + "loss": 0.2546, + "step": 42523 + }, + { + "epoch": 0.7347940282001659, + "grad_norm": 0.8362419362277475, + "learning_rate": 3.4676723514202703e-06, + "loss": 0.3159, + "step": 42524 + }, + { + "epoch": 0.7348113077135748, + "grad_norm": 1.0614544535801953, + "learning_rate": 3.4672486190030543e-06, + "loss": 0.3697, + "step": 42525 + }, + { + "epoch": 0.7348285872269837, + "grad_norm": 1.5863819945234474, + "learning_rate": 3.466824907046914e-06, + "loss": 0.474, + "step": 42526 + }, + { + "epoch": 0.7348458667403925, + "grad_norm": 1.1469253664836787, + "learning_rate": 3.4664012155531725e-06, + "loss": 0.4494, + "step": 42527 + }, + { + "epoch": 0.7348631462538014, + "grad_norm": 1.301537825780003, + "learning_rate": 3.4659775445231612e-06, + "loss": 0.4848, + "step": 42528 + }, + { + "epoch": 0.7348804257672104, + "grad_norm": 1.0414849987121524, + "learning_rate": 3.4655538939582025e-06, + "loss": 0.2852, + "step": 42529 + }, + { + "epoch": 0.7348977052806193, + "grad_norm": 1.1815124229194471, + "learning_rate": 3.465130263859625e-06, + "loss": 0.4222, + "step": 42530 + }, + { + "epoch": 0.7349149847940282, + "grad_norm": 1.754371332070554, + "learning_rate": 3.46470665422876e-06, + "loss": 0.2258, + "step": 42531 + }, + { + "epoch": 0.7349322643074371, + "grad_norm": 1.5752881584459077, + "learning_rate": 3.4642830650669257e-06, + "loss": 0.2957, + "step": 42532 + }, + { + "epoch": 0.734949543820846, + "grad_norm": 1.7264546886658236, + "learning_rate": 3.463859496375458e-06, + "loss": 0.3438, + "step": 42533 + }, + { + "epoch": 0.7349668233342549, + "grad_norm": 0.9421269341390233, + "learning_rate": 3.4634359481556755e-06, + "loss": 0.3003, + "step": 42534 + }, + { + "epoch": 0.7349841028476638, + "grad_norm": 0.9003752401167459, + "learning_rate": 3.463012420408909e-06, + "loss": 0.2804, + "step": 42535 + }, + { + "epoch": 0.7350013823610727, + "grad_norm": 1.3506716834640569, + "learning_rate": 3.462588913136484e-06, + "loss": 0.298, + "step": 42536 + }, + { + "epoch": 0.7350186618744816, + "grad_norm": 1.3093401395331246, + "learning_rate": 3.46216542633973e-06, + "loss": 0.417, + "step": 42537 + }, + { + "epoch": 0.7350359413878905, + "grad_norm": 1.8342708609278582, + "learning_rate": 3.461741960019969e-06, + "loss": 0.3253, + "step": 42538 + }, + { + "epoch": 0.7350532209012994, + "grad_norm": 1.113384526435052, + "learning_rate": 3.461318514178531e-06, + "loss": 0.3845, + "step": 42539 + }, + { + "epoch": 0.7350705004147083, + "grad_norm": 1.4563518868340664, + "learning_rate": 3.460895088816737e-06, + "loss": 0.5134, + "step": 42540 + }, + { + "epoch": 0.7350877799281172, + "grad_norm": 0.9095585356718693, + "learning_rate": 3.4604716839359164e-06, + "loss": 0.3758, + "step": 42541 + }, + { + "epoch": 0.7351050594415262, + "grad_norm": 1.3676166512944659, + "learning_rate": 3.4600482995373995e-06, + "loss": 0.4347, + "step": 42542 + }, + { + "epoch": 0.7351223389549351, + "grad_norm": 1.7547025046936087, + "learning_rate": 3.4596249356225055e-06, + "loss": 0.444, + "step": 42543 + }, + { + "epoch": 0.735139618468344, + "grad_norm": 1.6823317384813923, + "learning_rate": 3.4592015921925646e-06, + "loss": 0.3388, + "step": 42544 + }, + { + "epoch": 0.7351568979817529, + "grad_norm": 1.5050948572465135, + "learning_rate": 3.4587782692489003e-06, + "loss": 0.3622, + "step": 42545 + }, + { + "epoch": 0.7351741774951618, + "grad_norm": 1.4537189512306572, + "learning_rate": 3.4583549667928386e-06, + "loss": 0.3499, + "step": 42546 + }, + { + "epoch": 0.7351914570085707, + "grad_norm": 2.1342157591922795, + "learning_rate": 3.457931684825706e-06, + "loss": 0.3393, + "step": 42547 + }, + { + "epoch": 0.7352087365219795, + "grad_norm": 1.384133081461521, + "learning_rate": 3.457508423348832e-06, + "loss": 0.3373, + "step": 42548 + }, + { + "epoch": 0.7352260160353884, + "grad_norm": 2.1556427130818028, + "learning_rate": 3.45708518236354e-06, + "loss": 0.3659, + "step": 42549 + }, + { + "epoch": 0.7352432955487973, + "grad_norm": 1.0551573660959894, + "learning_rate": 3.4566619618711495e-06, + "loss": 0.3727, + "step": 42550 + }, + { + "epoch": 0.7352605750622062, + "grad_norm": 1.6052967514197665, + "learning_rate": 3.456238761872992e-06, + "loss": 0.412, + "step": 42551 + }, + { + "epoch": 0.7352778545756151, + "grad_norm": 1.2886149172107544, + "learning_rate": 3.455815582370391e-06, + "loss": 0.1662, + "step": 42552 + }, + { + "epoch": 0.735295134089024, + "grad_norm": 2.2699484984551765, + "learning_rate": 3.4553924233646775e-06, + "loss": 0.3738, + "step": 42553 + }, + { + "epoch": 0.7353124136024329, + "grad_norm": 1.9581308665938288, + "learning_rate": 3.454969284857168e-06, + "loss": 0.2673, + "step": 42554 + }, + { + "epoch": 0.7353296931158418, + "grad_norm": 2.115312551929704, + "learning_rate": 3.454546166849195e-06, + "loss": 0.3777, + "step": 42555 + }, + { + "epoch": 0.7353469726292508, + "grad_norm": 1.3298152777431715, + "learning_rate": 3.454123069342079e-06, + "loss": 0.3566, + "step": 42556 + }, + { + "epoch": 0.7353642521426597, + "grad_norm": 1.8950497904400565, + "learning_rate": 3.4536999923371463e-06, + "loss": 0.3326, + "step": 42557 + }, + { + "epoch": 0.7353815316560686, + "grad_norm": 1.4420585400149928, + "learning_rate": 3.4532769358357233e-06, + "loss": 0.3927, + "step": 42558 + }, + { + "epoch": 0.7353988111694775, + "grad_norm": 1.9526663960277262, + "learning_rate": 3.452853899839137e-06, + "loss": 0.2498, + "step": 42559 + }, + { + "epoch": 0.7354160906828864, + "grad_norm": 1.349398712500779, + "learning_rate": 3.45243088434871e-06, + "loss": 0.5646, + "step": 42560 + }, + { + "epoch": 0.7354333701962953, + "grad_norm": 1.308123617562603, + "learning_rate": 3.452007889365764e-06, + "loss": 0.5339, + "step": 42561 + }, + { + "epoch": 0.7354506497097042, + "grad_norm": 1.3049624317649668, + "learning_rate": 3.4515849148916303e-06, + "loss": 0.3056, + "step": 42562 + }, + { + "epoch": 0.7354679292231131, + "grad_norm": 0.6143995264350957, + "learning_rate": 3.4511619609276236e-06, + "loss": 0.3425, + "step": 42563 + }, + { + "epoch": 0.735485208736522, + "grad_norm": 1.305868602928926, + "learning_rate": 3.4507390274750818e-06, + "loss": 0.23, + "step": 42564 + }, + { + "epoch": 0.7355024882499309, + "grad_norm": 1.3800085412576444, + "learning_rate": 3.4503161145353194e-06, + "loss": 0.4554, + "step": 42565 + }, + { + "epoch": 0.7355197677633398, + "grad_norm": 1.1055034930754049, + "learning_rate": 3.4498932221096683e-06, + "loss": 0.4918, + "step": 42566 + }, + { + "epoch": 0.7355370472767487, + "grad_norm": 1.5260111391794489, + "learning_rate": 3.4494703501994466e-06, + "loss": 0.4752, + "step": 42567 + }, + { + "epoch": 0.7355543267901576, + "grad_norm": 1.1136157175164283, + "learning_rate": 3.449047498805984e-06, + "loss": 0.326, + "step": 42568 + }, + { + "epoch": 0.7355716063035664, + "grad_norm": 1.6366766051467814, + "learning_rate": 3.4486246679306e-06, + "loss": 0.2956, + "step": 42569 + }, + { + "epoch": 0.7355888858169753, + "grad_norm": 1.6287192024752306, + "learning_rate": 3.4482018575746214e-06, + "loss": 0.4763, + "step": 42570 + }, + { + "epoch": 0.7356061653303843, + "grad_norm": 1.0635029803007086, + "learning_rate": 3.4477790677393753e-06, + "loss": 0.4248, + "step": 42571 + }, + { + "epoch": 0.7356234448437932, + "grad_norm": 0.8309294524374045, + "learning_rate": 3.44735629842618e-06, + "loss": 0.3761, + "step": 42572 + }, + { + "epoch": 0.7356407243572021, + "grad_norm": 1.2907028798228577, + "learning_rate": 3.4469335496363666e-06, + "loss": 0.2663, + "step": 42573 + }, + { + "epoch": 0.735658003870611, + "grad_norm": 2.3348730631101406, + "learning_rate": 3.4465108213712517e-06, + "loss": 0.3684, + "step": 42574 + }, + { + "epoch": 0.7356752833840199, + "grad_norm": 1.4537449270051734, + "learning_rate": 3.446088113632162e-06, + "loss": 0.3572, + "step": 42575 + }, + { + "epoch": 0.7356925628974288, + "grad_norm": 1.0838179031899917, + "learning_rate": 3.4456654264204236e-06, + "loss": 0.2394, + "step": 42576 + }, + { + "epoch": 0.7357098424108377, + "grad_norm": 1.846850628687196, + "learning_rate": 3.4452427597373615e-06, + "loss": 0.3001, + "step": 42577 + }, + { + "epoch": 0.7357271219242466, + "grad_norm": 1.223318624109744, + "learning_rate": 3.4448201135842952e-06, + "loss": 0.2985, + "step": 42578 + }, + { + "epoch": 0.7357444014376555, + "grad_norm": 0.9547481388128185, + "learning_rate": 3.444397487962553e-06, + "loss": 0.3144, + "step": 42579 + }, + { + "epoch": 0.7357616809510644, + "grad_norm": 1.1028038383894907, + "learning_rate": 3.4439748828734533e-06, + "loss": 0.3761, + "step": 42580 + }, + { + "epoch": 0.7357789604644733, + "grad_norm": 1.4402681170102092, + "learning_rate": 3.4435522983183212e-06, + "loss": 0.5494, + "step": 42581 + }, + { + "epoch": 0.7357962399778822, + "grad_norm": 1.0959851986413793, + "learning_rate": 3.443129734298486e-06, + "loss": 0.2612, + "step": 42582 + }, + { + "epoch": 0.7358135194912911, + "grad_norm": 1.252704770603433, + "learning_rate": 3.442707190815263e-06, + "loss": 0.4022, + "step": 42583 + }, + { + "epoch": 0.7358307990047, + "grad_norm": 1.326358191918607, + "learning_rate": 3.442284667869983e-06, + "loss": 0.6117, + "step": 42584 + }, + { + "epoch": 0.735848078518109, + "grad_norm": 1.3426151584024908, + "learning_rate": 3.4418621654639626e-06, + "loss": 0.3217, + "step": 42585 + }, + { + "epoch": 0.7358653580315179, + "grad_norm": 1.1698958234121013, + "learning_rate": 3.4414396835985286e-06, + "loss": 0.3362, + "step": 42586 + }, + { + "epoch": 0.7358826375449268, + "grad_norm": 0.7992594548060798, + "learning_rate": 3.4410172222750026e-06, + "loss": 0.3922, + "step": 42587 + }, + { + "epoch": 0.7358999170583357, + "grad_norm": 0.5132077742967357, + "learning_rate": 3.440594781494714e-06, + "loss": 0.8395, + "step": 42588 + }, + { + "epoch": 0.7359171965717446, + "grad_norm": 0.9930271448965147, + "learning_rate": 3.4401723612589766e-06, + "loss": 0.494, + "step": 42589 + }, + { + "epoch": 0.7359344760851534, + "grad_norm": 0.9087317678756932, + "learning_rate": 3.439749961569122e-06, + "loss": 0.251, + "step": 42590 + }, + { + "epoch": 0.7359517555985623, + "grad_norm": 1.211608346678751, + "learning_rate": 3.4393275824264647e-06, + "loss": 0.3588, + "step": 42591 + }, + { + "epoch": 0.7359690351119712, + "grad_norm": 2.1197636125377213, + "learning_rate": 3.438905223832333e-06, + "loss": 0.4875, + "step": 42592 + }, + { + "epoch": 0.7359863146253801, + "grad_norm": 0.9260810850142087, + "learning_rate": 3.4384828857880526e-06, + "loss": 0.3635, + "step": 42593 + }, + { + "epoch": 0.736003594138789, + "grad_norm": 1.1593889343989014, + "learning_rate": 3.4380605682949376e-06, + "loss": 0.3885, + "step": 42594 + }, + { + "epoch": 0.7360208736521979, + "grad_norm": 1.0310126339121473, + "learning_rate": 3.4376382713543197e-06, + "loss": 0.33, + "step": 42595 + }, + { + "epoch": 0.7360381531656068, + "grad_norm": 1.4716498888297544, + "learning_rate": 3.437215994967514e-06, + "loss": 0.6313, + "step": 42596 + }, + { + "epoch": 0.7360554326790157, + "grad_norm": 0.8261480068257148, + "learning_rate": 3.4367937391358464e-06, + "loss": 0.3914, + "step": 42597 + }, + { + "epoch": 0.7360727121924246, + "grad_norm": 1.191280798641338, + "learning_rate": 3.4363715038606404e-06, + "loss": 0.4641, + "step": 42598 + }, + { + "epoch": 0.7360899917058336, + "grad_norm": 2.0006666537703266, + "learning_rate": 3.43594928914322e-06, + "loss": 0.4461, + "step": 42599 + }, + { + "epoch": 0.7361072712192425, + "grad_norm": 1.2592134514079834, + "learning_rate": 3.4355270949849064e-06, + "loss": 0.386, + "step": 42600 + }, + { + "epoch": 0.7361245507326514, + "grad_norm": 1.750717919530358, + "learning_rate": 3.4351049213870156e-06, + "loss": 0.4782, + "step": 42601 + }, + { + "epoch": 0.7361418302460603, + "grad_norm": 1.6840659602503623, + "learning_rate": 3.4346827683508797e-06, + "loss": 0.2481, + "step": 42602 + }, + { + "epoch": 0.7361591097594692, + "grad_norm": 0.8201820169767743, + "learning_rate": 3.43426063587781e-06, + "loss": 0.2472, + "step": 42603 + }, + { + "epoch": 0.7361763892728781, + "grad_norm": 0.8749541636193348, + "learning_rate": 3.4338385239691418e-06, + "loss": 0.2988, + "step": 42604 + }, + { + "epoch": 0.736193668786287, + "grad_norm": 1.0942872081310582, + "learning_rate": 3.4334164326261864e-06, + "loss": 0.2981, + "step": 42605 + }, + { + "epoch": 0.7362109482996959, + "grad_norm": 0.9652607342809347, + "learning_rate": 3.4329943618502726e-06, + "loss": 0.3085, + "step": 42606 + }, + { + "epoch": 0.7362282278131048, + "grad_norm": 1.0126488316019169, + "learning_rate": 3.4325723116427167e-06, + "loss": 0.5072, + "step": 42607 + }, + { + "epoch": 0.7362455073265137, + "grad_norm": 1.8262083665187996, + "learning_rate": 3.432150282004847e-06, + "loss": 0.144, + "step": 42608 + }, + { + "epoch": 0.7362627868399226, + "grad_norm": 1.5050114279689926, + "learning_rate": 3.4317282729379786e-06, + "loss": 0.4598, + "step": 42609 + }, + { + "epoch": 0.7362800663533315, + "grad_norm": 1.0472286233646855, + "learning_rate": 3.4313062844434353e-06, + "loss": 0.4228, + "step": 42610 + }, + { + "epoch": 0.7362973458667403, + "grad_norm": 1.361357996439589, + "learning_rate": 3.4308843165225447e-06, + "loss": 0.2905, + "step": 42611 + }, + { + "epoch": 0.7363146253801492, + "grad_norm": 1.168750427018059, + "learning_rate": 3.4304623691766193e-06, + "loss": 0.3598, + "step": 42612 + }, + { + "epoch": 0.7363319048935582, + "grad_norm": 1.294406115881524, + "learning_rate": 3.430040442406989e-06, + "loss": 0.6053, + "step": 42613 + }, + { + "epoch": 0.7363491844069671, + "grad_norm": 1.1065475994113618, + "learning_rate": 3.429618536214967e-06, + "loss": 0.5213, + "step": 42614 + }, + { + "epoch": 0.736366463920376, + "grad_norm": 1.0238419515976527, + "learning_rate": 3.4291966506018805e-06, + "loss": 0.4407, + "step": 42615 + }, + { + "epoch": 0.7363837434337849, + "grad_norm": 1.4688886285061795, + "learning_rate": 3.4287747855690478e-06, + "loss": 0.3583, + "step": 42616 + }, + { + "epoch": 0.7364010229471938, + "grad_norm": 1.00691131431441, + "learning_rate": 3.4283529411177964e-06, + "loss": 0.6145, + "step": 42617 + }, + { + "epoch": 0.7364183024606027, + "grad_norm": 1.5488424530093168, + "learning_rate": 3.427931117249439e-06, + "loss": 0.4494, + "step": 42618 + }, + { + "epoch": 0.7364355819740116, + "grad_norm": 1.335996229100416, + "learning_rate": 3.427509313965305e-06, + "loss": 0.3752, + "step": 42619 + }, + { + "epoch": 0.7364528614874205, + "grad_norm": 1.7410019161414507, + "learning_rate": 3.4270875312667073e-06, + "loss": 0.365, + "step": 42620 + }, + { + "epoch": 0.7364701410008294, + "grad_norm": 1.4866063726633725, + "learning_rate": 3.426665769154971e-06, + "loss": 0.2547, + "step": 42621 + }, + { + "epoch": 0.7364874205142383, + "grad_norm": 1.6134749391704173, + "learning_rate": 3.426244027631421e-06, + "loss": 0.2739, + "step": 42622 + }, + { + "epoch": 0.7365047000276472, + "grad_norm": 1.492472829345535, + "learning_rate": 3.4258223066973704e-06, + "loss": 0.4123, + "step": 42623 + }, + { + "epoch": 0.7365219795410561, + "grad_norm": 1.466190681823449, + "learning_rate": 3.425400606354148e-06, + "loss": 0.5037, + "step": 42624 + }, + { + "epoch": 0.736539259054465, + "grad_norm": 1.2722873216087325, + "learning_rate": 3.4249789266030665e-06, + "loss": 0.3215, + "step": 42625 + }, + { + "epoch": 0.736556538567874, + "grad_norm": 0.9663037737955612, + "learning_rate": 3.4245572674454507e-06, + "loss": 0.3393, + "step": 42626 + }, + { + "epoch": 0.7365738180812829, + "grad_norm": 1.5463654963843085, + "learning_rate": 3.424135628882621e-06, + "loss": 0.5177, + "step": 42627 + }, + { + "epoch": 0.7365910975946918, + "grad_norm": 1.14476865728786, + "learning_rate": 3.4237140109159015e-06, + "loss": 0.2589, + "step": 42628 + }, + { + "epoch": 0.7366083771081007, + "grad_norm": 1.1298865487101346, + "learning_rate": 3.423292413546607e-06, + "loss": 0.4244, + "step": 42629 + }, + { + "epoch": 0.7366256566215096, + "grad_norm": 0.9878668028216774, + "learning_rate": 3.4228708367760634e-06, + "loss": 0.4809, + "step": 42630 + }, + { + "epoch": 0.7366429361349185, + "grad_norm": 1.18567510168958, + "learning_rate": 3.4224492806055854e-06, + "loss": 0.5563, + "step": 42631 + }, + { + "epoch": 0.7366602156483273, + "grad_norm": 1.247506783744424, + "learning_rate": 3.422027745036496e-06, + "loss": 0.3955, + "step": 42632 + }, + { + "epoch": 0.7366774951617362, + "grad_norm": 1.7633497284255204, + "learning_rate": 3.421606230070118e-06, + "loss": 0.3941, + "step": 42633 + }, + { + "epoch": 0.7366947746751451, + "grad_norm": 1.1758587155599731, + "learning_rate": 3.4211847357077665e-06, + "loss": 0.4415, + "step": 42634 + }, + { + "epoch": 0.736712054188554, + "grad_norm": 1.2409795766030323, + "learning_rate": 3.420763261950768e-06, + "loss": 0.2866, + "step": 42635 + }, + { + "epoch": 0.7367293337019629, + "grad_norm": 1.1771103446322742, + "learning_rate": 3.420341808800435e-06, + "loss": 0.235, + "step": 42636 + }, + { + "epoch": 0.7367466132153718, + "grad_norm": 0.8893937037391567, + "learning_rate": 3.4199203762580912e-06, + "loss": 0.622, + "step": 42637 + }, + { + "epoch": 0.7367638927287807, + "grad_norm": 1.2141501454034178, + "learning_rate": 3.4194989643250575e-06, + "loss": 0.3289, + "step": 42638 + }, + { + "epoch": 0.7367811722421896, + "grad_norm": 1.3218312897489164, + "learning_rate": 3.419077573002657e-06, + "loss": 0.5414, + "step": 42639 + }, + { + "epoch": 0.7367984517555985, + "grad_norm": 1.0637577472755637, + "learning_rate": 3.418656202292202e-06, + "loss": 0.4865, + "step": 42640 + }, + { + "epoch": 0.7368157312690075, + "grad_norm": 1.7971610794144661, + "learning_rate": 3.4182348521950183e-06, + "loss": 0.3315, + "step": 42641 + }, + { + "epoch": 0.7368330107824164, + "grad_norm": 1.4318926565605579, + "learning_rate": 3.4178135227124244e-06, + "loss": 0.2619, + "step": 42642 + }, + { + "epoch": 0.7368502902958253, + "grad_norm": 1.420183965073121, + "learning_rate": 3.4173922138457316e-06, + "loss": 0.3262, + "step": 42643 + }, + { + "epoch": 0.7368675698092342, + "grad_norm": 1.630379489739554, + "learning_rate": 3.4169709255962734e-06, + "loss": 0.2473, + "step": 42644 + }, + { + "epoch": 0.7368848493226431, + "grad_norm": 1.152982615520444, + "learning_rate": 3.416549657965359e-06, + "loss": 0.5329, + "step": 42645 + }, + { + "epoch": 0.736902128836052, + "grad_norm": 0.9413496717436456, + "learning_rate": 3.416128410954315e-06, + "loss": 0.3576, + "step": 42646 + }, + { + "epoch": 0.7369194083494609, + "grad_norm": 0.8356634896860243, + "learning_rate": 3.415707184564454e-06, + "loss": 0.2311, + "step": 42647 + }, + { + "epoch": 0.7369366878628698, + "grad_norm": 2.1022081049779917, + "learning_rate": 3.4152859787971015e-06, + "loss": 0.2907, + "step": 42648 + }, + { + "epoch": 0.7369539673762787, + "grad_norm": 1.1954175550483481, + "learning_rate": 3.4148647936535704e-06, + "loss": 0.4859, + "step": 42649 + }, + { + "epoch": 0.7369712468896876, + "grad_norm": 1.5556911294191453, + "learning_rate": 3.4144436291351835e-06, + "loss": 0.2187, + "step": 42650 + }, + { + "epoch": 0.7369885264030965, + "grad_norm": 0.983998455144291, + "learning_rate": 3.414022485243259e-06, + "loss": 0.2664, + "step": 42651 + }, + { + "epoch": 0.7370058059165054, + "grad_norm": 1.0986571069477653, + "learning_rate": 3.413601361979121e-06, + "loss": 0.3734, + "step": 42652 + }, + { + "epoch": 0.7370230854299143, + "grad_norm": 0.9630128360838676, + "learning_rate": 3.4131802593440833e-06, + "loss": 0.3892, + "step": 42653 + }, + { + "epoch": 0.7370403649433231, + "grad_norm": 1.5481760732760126, + "learning_rate": 3.412759177339462e-06, + "loss": 0.3249, + "step": 42654 + }, + { + "epoch": 0.737057644456732, + "grad_norm": 0.9255650339721858, + "learning_rate": 3.4123381159665804e-06, + "loss": 0.3716, + "step": 42655 + }, + { + "epoch": 0.737074923970141, + "grad_norm": 1.5690888460898187, + "learning_rate": 3.411917075226756e-06, + "loss": 0.3397, + "step": 42656 + }, + { + "epoch": 0.7370922034835499, + "grad_norm": 1.294445936524808, + "learning_rate": 3.4114960551213117e-06, + "loss": 0.2339, + "step": 42657 + }, + { + "epoch": 0.7371094829969588, + "grad_norm": 1.151247567663673, + "learning_rate": 3.411075055651558e-06, + "loss": 0.3659, + "step": 42658 + }, + { + "epoch": 0.7371267625103677, + "grad_norm": 1.353324152893108, + "learning_rate": 3.410654076818822e-06, + "loss": 0.2815, + "step": 42659 + }, + { + "epoch": 0.7371440420237766, + "grad_norm": 1.0613881974366395, + "learning_rate": 3.4102331186244154e-06, + "loss": 0.3676, + "step": 42660 + }, + { + "epoch": 0.7371613215371855, + "grad_norm": 0.8847557915138369, + "learning_rate": 3.4098121810696595e-06, + "loss": 0.4451, + "step": 42661 + }, + { + "epoch": 0.7371786010505944, + "grad_norm": 1.2097620058912169, + "learning_rate": 3.4093912641558757e-06, + "loss": 0.2443, + "step": 42662 + }, + { + "epoch": 0.7371958805640033, + "grad_norm": 1.0139508924494096, + "learning_rate": 3.4089703678843753e-06, + "loss": 0.2979, + "step": 42663 + }, + { + "epoch": 0.7372131600774122, + "grad_norm": 1.2269345115970092, + "learning_rate": 3.408549492256484e-06, + "loss": 0.664, + "step": 42664 + }, + { + "epoch": 0.7372304395908211, + "grad_norm": 0.9275559048507512, + "learning_rate": 3.4081286372735146e-06, + "loss": 0.293, + "step": 42665 + }, + { + "epoch": 0.73724771910423, + "grad_norm": 2.7923177048806207, + "learning_rate": 3.407707802936786e-06, + "loss": 0.4014, + "step": 42666 + }, + { + "epoch": 0.737264998617639, + "grad_norm": 2.0917616172770943, + "learning_rate": 3.407286989247619e-06, + "loss": 0.2471, + "step": 42667 + }, + { + "epoch": 0.7372822781310479, + "grad_norm": 1.1120222881319508, + "learning_rate": 3.406866196207332e-06, + "loss": 0.4677, + "step": 42668 + }, + { + "epoch": 0.7372995576444568, + "grad_norm": 1.0413594519757603, + "learning_rate": 3.4064454238172384e-06, + "loss": 0.5057, + "step": 42669 + }, + { + "epoch": 0.7373168371578657, + "grad_norm": 0.8438326516808187, + "learning_rate": 3.406024672078663e-06, + "loss": 0.2722, + "step": 42670 + }, + { + "epoch": 0.7373341166712746, + "grad_norm": 1.518333525185511, + "learning_rate": 3.4056039409929144e-06, + "loss": 0.4414, + "step": 42671 + }, + { + "epoch": 0.7373513961846835, + "grad_norm": 1.0258770272131614, + "learning_rate": 3.4051832305613164e-06, + "loss": 0.5779, + "step": 42672 + }, + { + "epoch": 0.7373686756980924, + "grad_norm": 0.7456361342674912, + "learning_rate": 3.404762540785189e-06, + "loss": 0.2824, + "step": 42673 + }, + { + "epoch": 0.7373859552115013, + "grad_norm": 0.7189061681913297, + "learning_rate": 3.4043418716658426e-06, + "loss": 0.215, + "step": 42674 + }, + { + "epoch": 0.7374032347249101, + "grad_norm": 0.878340851414401, + "learning_rate": 3.4039212232046027e-06, + "loss": 0.383, + "step": 42675 + }, + { + "epoch": 0.737420514238319, + "grad_norm": 0.8673407746956132, + "learning_rate": 3.40350059540278e-06, + "loss": 0.2928, + "step": 42676 + }, + { + "epoch": 0.7374377937517279, + "grad_norm": 1.0562577563890994, + "learning_rate": 3.4030799882616937e-06, + "loss": 0.3072, + "step": 42677 + }, + { + "epoch": 0.7374550732651368, + "grad_norm": 0.6611064864794098, + "learning_rate": 3.402659401782662e-06, + "loss": 0.805, + "step": 42678 + }, + { + "epoch": 0.7374723527785457, + "grad_norm": 1.660374336390359, + "learning_rate": 3.402238835967007e-06, + "loss": 0.1997, + "step": 42679 + }, + { + "epoch": 0.7374896322919546, + "grad_norm": 0.7985041575760615, + "learning_rate": 3.401818290816037e-06, + "loss": 0.2738, + "step": 42680 + }, + { + "epoch": 0.7375069118053635, + "grad_norm": 0.9512845800757908, + "learning_rate": 3.401397766331077e-06, + "loss": 0.2705, + "step": 42681 + }, + { + "epoch": 0.7375241913187724, + "grad_norm": 1.5554320695880173, + "learning_rate": 3.4009772625134384e-06, + "loss": 0.3699, + "step": 42682 + }, + { + "epoch": 0.7375414708321814, + "grad_norm": 1.139294104488529, + "learning_rate": 3.4005567793644433e-06, + "loss": 0.2919, + "step": 42683 + }, + { + "epoch": 0.7375587503455903, + "grad_norm": 0.923657765884209, + "learning_rate": 3.4001363168854017e-06, + "loss": 0.5459, + "step": 42684 + }, + { + "epoch": 0.7375760298589992, + "grad_norm": 1.4615986072207667, + "learning_rate": 3.399715875077636e-06, + "loss": 0.2989, + "step": 42685 + }, + { + "epoch": 0.7375933093724081, + "grad_norm": 1.2308741751119954, + "learning_rate": 3.399295453942464e-06, + "loss": 0.4141, + "step": 42686 + }, + { + "epoch": 0.737610588885817, + "grad_norm": 1.8528029481068629, + "learning_rate": 3.3988750534811975e-06, + "loss": 0.3739, + "step": 42687 + }, + { + "epoch": 0.7376278683992259, + "grad_norm": 1.5593709144202539, + "learning_rate": 3.3984546736951595e-06, + "loss": 0.2826, + "step": 42688 + }, + { + "epoch": 0.7376451479126348, + "grad_norm": 0.8372447987249616, + "learning_rate": 3.39803431458566e-06, + "loss": 0.4448, + "step": 42689 + }, + { + "epoch": 0.7376624274260437, + "grad_norm": 0.5269504628182081, + "learning_rate": 3.3976139761540183e-06, + "loss": 0.5239, + "step": 42690 + }, + { + "epoch": 0.7376797069394526, + "grad_norm": 1.6436459591169408, + "learning_rate": 3.3971936584015507e-06, + "loss": 0.3451, + "step": 42691 + }, + { + "epoch": 0.7376969864528615, + "grad_norm": 1.4402807942705276, + "learning_rate": 3.3967733613295785e-06, + "loss": 0.3394, + "step": 42692 + }, + { + "epoch": 0.7377142659662704, + "grad_norm": 1.575142594158309, + "learning_rate": 3.3963530849394144e-06, + "loss": 0.2302, + "step": 42693 + }, + { + "epoch": 0.7377315454796793, + "grad_norm": 1.3350124010840902, + "learning_rate": 3.3959328292323703e-06, + "loss": 0.4651, + "step": 42694 + }, + { + "epoch": 0.7377488249930882, + "grad_norm": 1.1956688828723236, + "learning_rate": 3.3955125942097665e-06, + "loss": 0.5387, + "step": 42695 + }, + { + "epoch": 0.737766104506497, + "grad_norm": 0.903479583547589, + "learning_rate": 3.3950923798729194e-06, + "loss": 0.3224, + "step": 42696 + }, + { + "epoch": 0.737783384019906, + "grad_norm": 1.5124229073796298, + "learning_rate": 3.394672186223148e-06, + "loss": 0.2172, + "step": 42697 + }, + { + "epoch": 0.7378006635333149, + "grad_norm": 2.2867833078393263, + "learning_rate": 3.394252013261763e-06, + "loss": 0.4686, + "step": 42698 + }, + { + "epoch": 0.7378179430467238, + "grad_norm": 1.2303106532663317, + "learning_rate": 3.3938318609900845e-06, + "loss": 0.3748, + "step": 42699 + }, + { + "epoch": 0.7378352225601327, + "grad_norm": 0.8456008275049323, + "learning_rate": 3.3934117294094236e-06, + "loss": 0.2123, + "step": 42700 + }, + { + "epoch": 0.7378525020735416, + "grad_norm": 1.3655973847227785, + "learning_rate": 3.3929916185211e-06, + "loss": 0.2618, + "step": 42701 + }, + { + "epoch": 0.7378697815869505, + "grad_norm": 0.8732131787778101, + "learning_rate": 3.3925715283264283e-06, + "loss": 0.2148, + "step": 42702 + }, + { + "epoch": 0.7378870611003594, + "grad_norm": 2.198888607013828, + "learning_rate": 3.392151458826728e-06, + "loss": 0.3519, + "step": 42703 + }, + { + "epoch": 0.7379043406137683, + "grad_norm": 1.0459476261691008, + "learning_rate": 3.3917314100233123e-06, + "loss": 0.4751, + "step": 42704 + }, + { + "epoch": 0.7379216201271772, + "grad_norm": 1.291816228161736, + "learning_rate": 3.3913113819174914e-06, + "loss": 0.285, + "step": 42705 + }, + { + "epoch": 0.7379388996405861, + "grad_norm": 0.9132647276135102, + "learning_rate": 3.3908913745105866e-06, + "loss": 0.5478, + "step": 42706 + }, + { + "epoch": 0.737956179153995, + "grad_norm": 0.7888265148279195, + "learning_rate": 3.390471387803912e-06, + "loss": 0.4167, + "step": 42707 + }, + { + "epoch": 0.7379734586674039, + "grad_norm": 1.0095775270785976, + "learning_rate": 3.3900514217987866e-06, + "loss": 0.5932, + "step": 42708 + }, + { + "epoch": 0.7379907381808128, + "grad_norm": 1.1733897583523798, + "learning_rate": 3.389631476496519e-06, + "loss": 0.3397, + "step": 42709 + }, + { + "epoch": 0.7380080176942218, + "grad_norm": 1.3459928122862583, + "learning_rate": 3.3892115518984315e-06, + "loss": 0.4166, + "step": 42710 + }, + { + "epoch": 0.7380252972076307, + "grad_norm": 1.3710766705496649, + "learning_rate": 3.3887916480058325e-06, + "loss": 0.4775, + "step": 42711 + }, + { + "epoch": 0.7380425767210396, + "grad_norm": 1.0425616241834668, + "learning_rate": 3.38837176482004e-06, + "loss": 0.3768, + "step": 42712 + }, + { + "epoch": 0.7380598562344485, + "grad_norm": 0.9245414802874874, + "learning_rate": 3.38795190234237e-06, + "loss": 0.3488, + "step": 42713 + }, + { + "epoch": 0.7380771357478574, + "grad_norm": 1.2019446337396333, + "learning_rate": 3.38753206057414e-06, + "loss": 0.363, + "step": 42714 + }, + { + "epoch": 0.7380944152612663, + "grad_norm": 1.9158065043609056, + "learning_rate": 3.387112239516663e-06, + "loss": 0.3616, + "step": 42715 + }, + { + "epoch": 0.7381116947746752, + "grad_norm": 1.4761369186478286, + "learning_rate": 3.3866924391712496e-06, + "loss": 0.2003, + "step": 42716 + }, + { + "epoch": 0.738128974288084, + "grad_norm": 1.4503014813475308, + "learning_rate": 3.386272659539217e-06, + "loss": 0.4975, + "step": 42717 + }, + { + "epoch": 0.7381462538014929, + "grad_norm": 1.1660886336236354, + "learning_rate": 3.385852900621882e-06, + "loss": 0.4675, + "step": 42718 + }, + { + "epoch": 0.7381635333149018, + "grad_norm": 1.4268543643156375, + "learning_rate": 3.3854331624205616e-06, + "loss": 0.359, + "step": 42719 + }, + { + "epoch": 0.7381808128283107, + "grad_norm": 1.219519274383373, + "learning_rate": 3.3850134449365633e-06, + "loss": 0.4382, + "step": 42720 + }, + { + "epoch": 0.7381980923417196, + "grad_norm": 0.9989111035243539, + "learning_rate": 3.38459374817121e-06, + "loss": 0.3199, + "step": 42721 + }, + { + "epoch": 0.7382153718551285, + "grad_norm": 1.5950582332459522, + "learning_rate": 3.384174072125808e-06, + "loss": 0.4336, + "step": 42722 + }, + { + "epoch": 0.7382326513685374, + "grad_norm": 0.5874914043590789, + "learning_rate": 3.383754416801679e-06, + "loss": 0.484, + "step": 42723 + }, + { + "epoch": 0.7382499308819463, + "grad_norm": 1.375954222235814, + "learning_rate": 3.3833347822001306e-06, + "loss": 0.5686, + "step": 42724 + }, + { + "epoch": 0.7382672103953553, + "grad_norm": 0.9761697493338767, + "learning_rate": 3.3829151683224805e-06, + "loss": 0.1864, + "step": 42725 + }, + { + "epoch": 0.7382844899087642, + "grad_norm": 1.05718486559457, + "learning_rate": 3.382495575170046e-06, + "loss": 0.4034, + "step": 42726 + }, + { + "epoch": 0.7383017694221731, + "grad_norm": 1.5614701110333111, + "learning_rate": 3.3820760027441347e-06, + "loss": 0.3896, + "step": 42727 + }, + { + "epoch": 0.738319048935582, + "grad_norm": 1.5738586380742694, + "learning_rate": 3.3816564510460683e-06, + "loss": 0.2893, + "step": 42728 + }, + { + "epoch": 0.7383363284489909, + "grad_norm": 1.2127745634174385, + "learning_rate": 3.3812369200771533e-06, + "loss": 0.4351, + "step": 42729 + }, + { + "epoch": 0.7383536079623998, + "grad_norm": 1.4702759947927746, + "learning_rate": 3.380817409838707e-06, + "loss": 0.341, + "step": 42730 + }, + { + "epoch": 0.7383708874758087, + "grad_norm": 1.4143280201193005, + "learning_rate": 3.380397920332045e-06, + "loss": 0.5104, + "step": 42731 + }, + { + "epoch": 0.7383881669892176, + "grad_norm": 1.2442195074350613, + "learning_rate": 3.379978451558481e-06, + "loss": 0.3643, + "step": 42732 + }, + { + "epoch": 0.7384054465026265, + "grad_norm": 1.5845371728334303, + "learning_rate": 3.379559003519326e-06, + "loss": 0.279, + "step": 42733 + }, + { + "epoch": 0.7384227260160354, + "grad_norm": 1.2972753716969947, + "learning_rate": 3.3791395762158972e-06, + "loss": 0.3914, + "step": 42734 + }, + { + "epoch": 0.7384400055294443, + "grad_norm": 1.0981542239393978, + "learning_rate": 3.378720169649504e-06, + "loss": 0.3135, + "step": 42735 + }, + { + "epoch": 0.7384572850428532, + "grad_norm": 1.5223281415521603, + "learning_rate": 3.378300783821462e-06, + "loss": 0.2569, + "step": 42736 + }, + { + "epoch": 0.7384745645562621, + "grad_norm": 1.412762453290143, + "learning_rate": 3.377881418733089e-06, + "loss": 0.2537, + "step": 42737 + }, + { + "epoch": 0.7384918440696709, + "grad_norm": 1.1292749154032375, + "learning_rate": 3.377462074385691e-06, + "loss": 0.3894, + "step": 42738 + }, + { + "epoch": 0.7385091235830799, + "grad_norm": 0.8220826094499895, + "learning_rate": 3.377042750780589e-06, + "loss": 0.4006, + "step": 42739 + }, + { + "epoch": 0.7385264030964888, + "grad_norm": 1.3061891986847993, + "learning_rate": 3.3766234479190884e-06, + "loss": 0.3215, + "step": 42740 + }, + { + "epoch": 0.7385436826098977, + "grad_norm": 1.3560173522837529, + "learning_rate": 3.3762041658025067e-06, + "loss": 0.4236, + "step": 42741 + }, + { + "epoch": 0.7385609621233066, + "grad_norm": 1.5322590957462066, + "learning_rate": 3.375784904432157e-06, + "loss": 0.4105, + "step": 42742 + }, + { + "epoch": 0.7385782416367155, + "grad_norm": 1.8619453179794327, + "learning_rate": 3.3753656638093556e-06, + "loss": 0.2865, + "step": 42743 + }, + { + "epoch": 0.7385955211501244, + "grad_norm": 1.1783210493600156, + "learning_rate": 3.3749464439354095e-06, + "loss": 0.4564, + "step": 42744 + }, + { + "epoch": 0.7386128006635333, + "grad_norm": 2.2143929180827904, + "learning_rate": 3.3745272448116373e-06, + "loss": 0.3594, + "step": 42745 + }, + { + "epoch": 0.7386300801769422, + "grad_norm": 1.2641647536346676, + "learning_rate": 3.3741080664393465e-06, + "loss": 0.4532, + "step": 42746 + }, + { + "epoch": 0.7386473596903511, + "grad_norm": 2.119955154573418, + "learning_rate": 3.373688908819852e-06, + "loss": 0.2781, + "step": 42747 + }, + { + "epoch": 0.73866463920376, + "grad_norm": 1.5386611333765745, + "learning_rate": 3.3732697719544716e-06, + "loss": 0.4048, + "step": 42748 + }, + { + "epoch": 0.7386819187171689, + "grad_norm": 0.9595569719014632, + "learning_rate": 3.3728506558445107e-06, + "loss": 0.3718, + "step": 42749 + }, + { + "epoch": 0.7386991982305778, + "grad_norm": 1.1221790901452304, + "learning_rate": 3.3724315604912882e-06, + "loss": 0.5062, + "step": 42750 + }, + { + "epoch": 0.7387164777439867, + "grad_norm": 1.2553689788398403, + "learning_rate": 3.37201248589611e-06, + "loss": 0.3502, + "step": 42751 + }, + { + "epoch": 0.7387337572573957, + "grad_norm": 0.9351909402293328, + "learning_rate": 3.3715934320602926e-06, + "loss": 0.5511, + "step": 42752 + }, + { + "epoch": 0.7387510367708046, + "grad_norm": 1.0467434664950213, + "learning_rate": 3.3711743989851485e-06, + "loss": 0.28, + "step": 42753 + }, + { + "epoch": 0.7387683162842135, + "grad_norm": 0.7435927803103294, + "learning_rate": 3.3707553866719933e-06, + "loss": 0.3657, + "step": 42754 + }, + { + "epoch": 0.7387855957976224, + "grad_norm": 1.5082277974643026, + "learning_rate": 3.3703363951221355e-06, + "loss": 0.3703, + "step": 42755 + }, + { + "epoch": 0.7388028753110313, + "grad_norm": 0.9318397823127426, + "learning_rate": 3.369917424336886e-06, + "loss": 0.3973, + "step": 42756 + }, + { + "epoch": 0.7388201548244402, + "grad_norm": 1.614494292031017, + "learning_rate": 3.3694984743175576e-06, + "loss": 0.4566, + "step": 42757 + }, + { + "epoch": 0.7388374343378491, + "grad_norm": 2.054243189789766, + "learning_rate": 3.3690795450654645e-06, + "loss": 0.456, + "step": 42758 + }, + { + "epoch": 0.7388547138512579, + "grad_norm": 2.281547634601037, + "learning_rate": 3.368660636581921e-06, + "loss": 0.4231, + "step": 42759 + }, + { + "epoch": 0.7388719933646668, + "grad_norm": 0.9219223595115641, + "learning_rate": 3.368241748868234e-06, + "loss": 0.424, + "step": 42760 + }, + { + "epoch": 0.7388892728780757, + "grad_norm": 0.6183488550618002, + "learning_rate": 3.36782288192572e-06, + "loss": 0.6818, + "step": 42761 + }, + { + "epoch": 0.7389065523914846, + "grad_norm": 1.2749986320662983, + "learning_rate": 3.367404035755686e-06, + "loss": 0.5305, + "step": 42762 + }, + { + "epoch": 0.7389238319048935, + "grad_norm": 1.7838996538398095, + "learning_rate": 3.3669852103594503e-06, + "loss": 0.3139, + "step": 42763 + }, + { + "epoch": 0.7389411114183024, + "grad_norm": 1.152473559748454, + "learning_rate": 3.3665664057383142e-06, + "loss": 0.2157, + "step": 42764 + }, + { + "epoch": 0.7389583909317113, + "grad_norm": 1.0583031180629972, + "learning_rate": 3.3661476218936017e-06, + "loss": 0.2335, + "step": 42765 + }, + { + "epoch": 0.7389756704451202, + "grad_norm": 0.978744216017903, + "learning_rate": 3.36572885882662e-06, + "loss": 0.3114, + "step": 42766 + }, + { + "epoch": 0.7389929499585292, + "grad_norm": 1.8198962470226798, + "learning_rate": 3.365310116538677e-06, + "loss": 0.4294, + "step": 42767 + }, + { + "epoch": 0.7390102294719381, + "grad_norm": 0.8467288355533807, + "learning_rate": 3.364891395031089e-06, + "loss": 0.3267, + "step": 42768 + }, + { + "epoch": 0.739027508985347, + "grad_norm": 1.305818507398576, + "learning_rate": 3.364472694305163e-06, + "loss": 0.3406, + "step": 42769 + }, + { + "epoch": 0.7390447884987559, + "grad_norm": 1.438789067773254, + "learning_rate": 3.3640540143622124e-06, + "loss": 0.4787, + "step": 42770 + }, + { + "epoch": 0.7390620680121648, + "grad_norm": 1.3712786209210197, + "learning_rate": 3.3636353552035495e-06, + "loss": 0.1839, + "step": 42771 + }, + { + "epoch": 0.7390793475255737, + "grad_norm": 1.241033534617613, + "learning_rate": 3.3632167168304873e-06, + "loss": 0.5586, + "step": 42772 + }, + { + "epoch": 0.7390966270389826, + "grad_norm": 1.2423141641199997, + "learning_rate": 3.362798099244332e-06, + "loss": 0.3213, + "step": 42773 + }, + { + "epoch": 0.7391139065523915, + "grad_norm": 0.9878765562402158, + "learning_rate": 3.3623795024464014e-06, + "loss": 0.4458, + "step": 42774 + }, + { + "epoch": 0.7391311860658004, + "grad_norm": 1.0695556217083253, + "learning_rate": 3.3619609264379983e-06, + "loss": 0.3475, + "step": 42775 + }, + { + "epoch": 0.7391484655792093, + "grad_norm": 1.158031167084898, + "learning_rate": 3.3615423712204378e-06, + "loss": 0.449, + "step": 42776 + }, + { + "epoch": 0.7391657450926182, + "grad_norm": 1.0713040838167507, + "learning_rate": 3.361123836795035e-06, + "loss": 0.373, + "step": 42777 + }, + { + "epoch": 0.7391830246060271, + "grad_norm": 0.7072179459225094, + "learning_rate": 3.360705323163094e-06, + "loss": 0.5996, + "step": 42778 + }, + { + "epoch": 0.739200304119436, + "grad_norm": 1.1314624669013988, + "learning_rate": 3.3602868303259316e-06, + "loss": 0.1855, + "step": 42779 + }, + { + "epoch": 0.739217583632845, + "grad_norm": 1.2740859344349595, + "learning_rate": 3.3598683582848513e-06, + "loss": 0.1931, + "step": 42780 + }, + { + "epoch": 0.7392348631462538, + "grad_norm": 1.6065707525086186, + "learning_rate": 3.359449907041169e-06, + "loss": 0.4852, + "step": 42781 + }, + { + "epoch": 0.7392521426596627, + "grad_norm": 1.4410787470325868, + "learning_rate": 3.359031476596194e-06, + "loss": 0.3165, + "step": 42782 + }, + { + "epoch": 0.7392694221730716, + "grad_norm": 1.2181615255859348, + "learning_rate": 3.3586130669512397e-06, + "loss": 0.3908, + "step": 42783 + }, + { + "epoch": 0.7392867016864805, + "grad_norm": 1.169767618898146, + "learning_rate": 3.358194678107611e-06, + "loss": 0.3218, + "step": 42784 + }, + { + "epoch": 0.7393039811998894, + "grad_norm": 1.4712016565044852, + "learning_rate": 3.357776310066625e-06, + "loss": 0.1872, + "step": 42785 + }, + { + "epoch": 0.7393212607132983, + "grad_norm": 1.2679020697516379, + "learning_rate": 3.357357962829585e-06, + "loss": 0.4612, + "step": 42786 + }, + { + "epoch": 0.7393385402267072, + "grad_norm": 1.0146918550904125, + "learning_rate": 3.3569396363978046e-06, + "loss": 0.3173, + "step": 42787 + }, + { + "epoch": 0.7393558197401161, + "grad_norm": 1.349544156211043, + "learning_rate": 3.3565213307725974e-06, + "loss": 0.5137, + "step": 42788 + }, + { + "epoch": 0.739373099253525, + "grad_norm": 0.8589674014308832, + "learning_rate": 3.3561030459552667e-06, + "loss": 0.3476, + "step": 42789 + }, + { + "epoch": 0.7393903787669339, + "grad_norm": 0.5561584845896389, + "learning_rate": 3.3556847819471307e-06, + "loss": 0.8134, + "step": 42790 + }, + { + "epoch": 0.7394076582803428, + "grad_norm": 1.0410016358776382, + "learning_rate": 3.355266538749491e-06, + "loss": 0.3552, + "step": 42791 + }, + { + "epoch": 0.7394249377937517, + "grad_norm": 0.6046292201331297, + "learning_rate": 3.3548483163636604e-06, + "loss": 0.6556, + "step": 42792 + }, + { + "epoch": 0.7394422173071606, + "grad_norm": 1.070654228401596, + "learning_rate": 3.3544301147909507e-06, + "loss": 0.5884, + "step": 42793 + }, + { + "epoch": 0.7394594968205696, + "grad_norm": 1.2815193461171932, + "learning_rate": 3.354011934032675e-06, + "loss": 0.3764, + "step": 42794 + }, + { + "epoch": 0.7394767763339785, + "grad_norm": 0.7487354546421928, + "learning_rate": 3.353593774090135e-06, + "loss": 0.3005, + "step": 42795 + }, + { + "epoch": 0.7394940558473874, + "grad_norm": 1.3914321923198316, + "learning_rate": 3.353175634964647e-06, + "loss": 0.3901, + "step": 42796 + }, + { + "epoch": 0.7395113353607963, + "grad_norm": 1.4622922495036517, + "learning_rate": 3.3527575166575156e-06, + "loss": 0.4693, + "step": 42797 + }, + { + "epoch": 0.7395286148742052, + "grad_norm": 0.8955862886896264, + "learning_rate": 3.352339419170052e-06, + "loss": 0.5994, + "step": 42798 + }, + { + "epoch": 0.7395458943876141, + "grad_norm": 1.3344663371493635, + "learning_rate": 3.3519213425035714e-06, + "loss": 0.4189, + "step": 42799 + }, + { + "epoch": 0.739563173901023, + "grad_norm": 1.1347508941771112, + "learning_rate": 3.3515032866593735e-06, + "loss": 0.2471, + "step": 42800 + }, + { + "epoch": 0.7395804534144319, + "grad_norm": 1.6742437806150772, + "learning_rate": 3.351085251638776e-06, + "loss": 0.5868, + "step": 42801 + }, + { + "epoch": 0.7395977329278407, + "grad_norm": 1.1498435728075536, + "learning_rate": 3.3506672374430825e-06, + "loss": 0.3476, + "step": 42802 + }, + { + "epoch": 0.7396150124412496, + "grad_norm": 1.5277403744508458, + "learning_rate": 3.3502492440736068e-06, + "loss": 0.4467, + "step": 42803 + }, + { + "epoch": 0.7396322919546585, + "grad_norm": 1.4075404042764603, + "learning_rate": 3.3498312715316505e-06, + "loss": 0.4888, + "step": 42804 + }, + { + "epoch": 0.7396495714680674, + "grad_norm": 1.252084555304354, + "learning_rate": 3.3494133198185343e-06, + "loss": 0.5143, + "step": 42805 + }, + { + "epoch": 0.7396668509814763, + "grad_norm": 2.5110282935657997, + "learning_rate": 3.3489953889355596e-06, + "loss": 0.2877, + "step": 42806 + }, + { + "epoch": 0.7396841304948852, + "grad_norm": 1.1496196744559755, + "learning_rate": 3.3485774788840354e-06, + "loss": 0.3685, + "step": 42807 + }, + { + "epoch": 0.7397014100082941, + "grad_norm": 0.736782602985478, + "learning_rate": 3.348159589665274e-06, + "loss": 0.322, + "step": 42808 + }, + { + "epoch": 0.739718689521703, + "grad_norm": 1.2021483361095455, + "learning_rate": 3.3477417212805794e-06, + "loss": 0.4123, + "step": 42809 + }, + { + "epoch": 0.739735969035112, + "grad_norm": 1.4129112541704, + "learning_rate": 3.347323873731263e-06, + "loss": 0.2471, + "step": 42810 + }, + { + "epoch": 0.7397532485485209, + "grad_norm": 1.2144380103435257, + "learning_rate": 3.3469060470186344e-06, + "loss": 0.7146, + "step": 42811 + }, + { + "epoch": 0.7397705280619298, + "grad_norm": 1.8941190997164363, + "learning_rate": 3.3464882411440046e-06, + "loss": 0.2765, + "step": 42812 + }, + { + "epoch": 0.7397878075753387, + "grad_norm": 1.0458709446159733, + "learning_rate": 3.3460704561086765e-06, + "loss": 0.2407, + "step": 42813 + }, + { + "epoch": 0.7398050870887476, + "grad_norm": 2.0177285840988652, + "learning_rate": 3.3456526919139642e-06, + "loss": 0.4572, + "step": 42814 + }, + { + "epoch": 0.7398223666021565, + "grad_norm": 1.239330045065454, + "learning_rate": 3.3452349485611702e-06, + "loss": 0.4984, + "step": 42815 + }, + { + "epoch": 0.7398396461155654, + "grad_norm": 1.4001061163368667, + "learning_rate": 3.3448172260516052e-06, + "loss": 0.4167, + "step": 42816 + }, + { + "epoch": 0.7398569256289743, + "grad_norm": 0.9529898149158241, + "learning_rate": 3.3443995243865834e-06, + "loss": 0.6287, + "step": 42817 + }, + { + "epoch": 0.7398742051423832, + "grad_norm": 1.0157956184881518, + "learning_rate": 3.3439818435674045e-06, + "loss": 0.433, + "step": 42818 + }, + { + "epoch": 0.7398914846557921, + "grad_norm": 1.6000135789439063, + "learning_rate": 3.3435641835953827e-06, + "loss": 0.4009, + "step": 42819 + }, + { + "epoch": 0.739908764169201, + "grad_norm": 0.7856244455688797, + "learning_rate": 3.3431465444718203e-06, + "loss": 0.5482, + "step": 42820 + }, + { + "epoch": 0.73992604368261, + "grad_norm": 1.6143894774391507, + "learning_rate": 3.34272892619803e-06, + "loss": 0.5219, + "step": 42821 + }, + { + "epoch": 0.7399433231960189, + "grad_norm": 1.0275597515890897, + "learning_rate": 3.3423113287753174e-06, + "loss": 0.3174, + "step": 42822 + }, + { + "epoch": 0.7399606027094277, + "grad_norm": 1.1352353479641668, + "learning_rate": 3.3418937522049954e-06, + "loss": 0.4349, + "step": 42823 + }, + { + "epoch": 0.7399778822228366, + "grad_norm": 1.482052325610469, + "learning_rate": 3.341476196488366e-06, + "loss": 0.2572, + "step": 42824 + }, + { + "epoch": 0.7399951617362455, + "grad_norm": 1.366091423541132, + "learning_rate": 3.3410586616267416e-06, + "loss": 0.3467, + "step": 42825 + }, + { + "epoch": 0.7400124412496544, + "grad_norm": 0.9544233327640504, + "learning_rate": 3.340641147621424e-06, + "loss": 0.6063, + "step": 42826 + }, + { + "epoch": 0.7400297207630633, + "grad_norm": 1.6966458722419724, + "learning_rate": 3.340223654473724e-06, + "loss": 0.4303, + "step": 42827 + }, + { + "epoch": 0.7400470002764722, + "grad_norm": 1.7091008168524047, + "learning_rate": 3.339806182184954e-06, + "loss": 0.3507, + "step": 42828 + }, + { + "epoch": 0.7400642797898811, + "grad_norm": 0.9330846130319542, + "learning_rate": 3.3393887307564133e-06, + "loss": 0.6767, + "step": 42829 + }, + { + "epoch": 0.74008155930329, + "grad_norm": 2.549875272963156, + "learning_rate": 3.3389713001894163e-06, + "loss": 0.4047, + "step": 42830 + }, + { + "epoch": 0.7400988388166989, + "grad_norm": 0.8237140487469945, + "learning_rate": 3.3385538904852642e-06, + "loss": 0.4926, + "step": 42831 + }, + { + "epoch": 0.7401161183301078, + "grad_norm": 1.4077552125272046, + "learning_rate": 3.3381365016452684e-06, + "loss": 0.3281, + "step": 42832 + }, + { + "epoch": 0.7401333978435167, + "grad_norm": 0.9772471940716326, + "learning_rate": 3.337719133670735e-06, + "loss": 0.3698, + "step": 42833 + }, + { + "epoch": 0.7401506773569256, + "grad_norm": 0.8415868477899817, + "learning_rate": 3.3373017865629742e-06, + "loss": 0.3436, + "step": 42834 + }, + { + "epoch": 0.7401679568703345, + "grad_norm": 1.1032441307305172, + "learning_rate": 3.3368844603232885e-06, + "loss": 0.2998, + "step": 42835 + }, + { + "epoch": 0.7401852363837435, + "grad_norm": 1.2211193543632766, + "learning_rate": 3.3364671549529894e-06, + "loss": 0.3729, + "step": 42836 + }, + { + "epoch": 0.7402025158971524, + "grad_norm": 1.2449664134996241, + "learning_rate": 3.3360498704533784e-06, + "loss": 0.4067, + "step": 42837 + }, + { + "epoch": 0.7402197954105613, + "grad_norm": 1.3605333581661352, + "learning_rate": 3.335632606825766e-06, + "loss": 0.3868, + "step": 42838 + }, + { + "epoch": 0.7402370749239702, + "grad_norm": 1.334202641157103, + "learning_rate": 3.3352153640714613e-06, + "loss": 0.3682, + "step": 42839 + }, + { + "epoch": 0.7402543544373791, + "grad_norm": 1.065447598466402, + "learning_rate": 3.334798142191766e-06, + "loss": 0.2693, + "step": 42840 + }, + { + "epoch": 0.740271633950788, + "grad_norm": 2.1085006223111447, + "learning_rate": 3.334380941187992e-06, + "loss": 0.3575, + "step": 42841 + }, + { + "epoch": 0.7402889134641969, + "grad_norm": 1.1372390657368747, + "learning_rate": 3.3339637610614416e-06, + "loss": 0.3424, + "step": 42842 + }, + { + "epoch": 0.7403061929776058, + "grad_norm": 1.3136573976294694, + "learning_rate": 3.3335466018134255e-06, + "loss": 0.5714, + "step": 42843 + }, + { + "epoch": 0.7403234724910146, + "grad_norm": 0.9433056379462788, + "learning_rate": 3.3331294634452418e-06, + "loss": 0.3381, + "step": 42844 + }, + { + "epoch": 0.7403407520044235, + "grad_norm": 1.3760471298390802, + "learning_rate": 3.33271234595821e-06, + "loss": 0.4515, + "step": 42845 + }, + { + "epoch": 0.7403580315178324, + "grad_norm": 0.5726642157586425, + "learning_rate": 3.3322952493536255e-06, + "loss": 0.186, + "step": 42846 + }, + { + "epoch": 0.7403753110312413, + "grad_norm": 1.57787953273172, + "learning_rate": 3.3318781736328032e-06, + "loss": 0.3157, + "step": 42847 + }, + { + "epoch": 0.7403925905446502, + "grad_norm": 1.5357226729984834, + "learning_rate": 3.3314611187970457e-06, + "loss": 0.4362, + "step": 42848 + }, + { + "epoch": 0.7404098700580591, + "grad_norm": 1.3312017697358722, + "learning_rate": 3.3310440848476545e-06, + "loss": 0.3018, + "step": 42849 + }, + { + "epoch": 0.740427149571468, + "grad_norm": 0.8093455012963252, + "learning_rate": 3.3306270717859402e-06, + "loss": 0.2959, + "step": 42850 + }, + { + "epoch": 0.740444429084877, + "grad_norm": 0.5692817015192234, + "learning_rate": 3.3302100796132096e-06, + "loss": 0.7582, + "step": 42851 + }, + { + "epoch": 0.7404617085982859, + "grad_norm": 1.7150184144892757, + "learning_rate": 3.32979310833077e-06, + "loss": 0.522, + "step": 42852 + }, + { + "epoch": 0.7404789881116948, + "grad_norm": 1.1340630126827367, + "learning_rate": 3.329376157939923e-06, + "loss": 0.3827, + "step": 42853 + }, + { + "epoch": 0.7404962676251037, + "grad_norm": 1.2088457066635505, + "learning_rate": 3.328959228441979e-06, + "loss": 0.3827, + "step": 42854 + }, + { + "epoch": 0.7405135471385126, + "grad_norm": 1.178465739139818, + "learning_rate": 3.3285423198382393e-06, + "loss": 0.2355, + "step": 42855 + }, + { + "epoch": 0.7405308266519215, + "grad_norm": 0.9760558527684104, + "learning_rate": 3.3281254321300127e-06, + "loss": 0.3096, + "step": 42856 + }, + { + "epoch": 0.7405481061653304, + "grad_norm": 1.193472802759328, + "learning_rate": 3.3277085653186038e-06, + "loss": 0.442, + "step": 42857 + }, + { + "epoch": 0.7405653856787393, + "grad_norm": 0.9937050651506277, + "learning_rate": 3.327291719405321e-06, + "loss": 0.3376, + "step": 42858 + }, + { + "epoch": 0.7405826651921482, + "grad_norm": 1.0651875786973573, + "learning_rate": 3.3268748943914686e-06, + "loss": 0.271, + "step": 42859 + }, + { + "epoch": 0.7405999447055571, + "grad_norm": 1.5652420700029899, + "learning_rate": 3.3264580902783474e-06, + "loss": 0.5483, + "step": 42860 + }, + { + "epoch": 0.740617224218966, + "grad_norm": 0.9698943234144035, + "learning_rate": 3.326041307067267e-06, + "loss": 0.2901, + "step": 42861 + }, + { + "epoch": 0.7406345037323749, + "grad_norm": 1.436048943174275, + "learning_rate": 3.3256245447595324e-06, + "loss": 0.2603, + "step": 42862 + }, + { + "epoch": 0.7406517832457838, + "grad_norm": 1.277047200889713, + "learning_rate": 3.3252078033564518e-06, + "loss": 0.3435, + "step": 42863 + }, + { + "epoch": 0.7406690627591928, + "grad_norm": 1.8035777677786067, + "learning_rate": 3.324791082859324e-06, + "loss": 0.5304, + "step": 42864 + }, + { + "epoch": 0.7406863422726016, + "grad_norm": 1.3710361105363753, + "learning_rate": 3.324374383269463e-06, + "loss": 0.3182, + "step": 42865 + }, + { + "epoch": 0.7407036217860105, + "grad_norm": 1.176501680071466, + "learning_rate": 3.323957704588164e-06, + "loss": 0.2318, + "step": 42866 + }, + { + "epoch": 0.7407209012994194, + "grad_norm": 1.961208000916292, + "learning_rate": 3.323541046816737e-06, + "loss": 0.4251, + "step": 42867 + }, + { + "epoch": 0.7407381808128283, + "grad_norm": 1.3109704951658174, + "learning_rate": 3.32312440995649e-06, + "loss": 0.2974, + "step": 42868 + }, + { + "epoch": 0.7407554603262372, + "grad_norm": 0.8492108320399014, + "learning_rate": 3.3227077940087216e-06, + "loss": 0.3073, + "step": 42869 + }, + { + "epoch": 0.7407727398396461, + "grad_norm": 0.5890203121364104, + "learning_rate": 3.322291198974743e-06, + "loss": 0.7846, + "step": 42870 + }, + { + "epoch": 0.740790019353055, + "grad_norm": 0.5666675058464795, + "learning_rate": 3.3218746248558533e-06, + "loss": 0.7354, + "step": 42871 + }, + { + "epoch": 0.7408072988664639, + "grad_norm": 1.262208979264762, + "learning_rate": 3.3214580716533596e-06, + "loss": 0.4853, + "step": 42872 + }, + { + "epoch": 0.7408245783798728, + "grad_norm": 0.7662058219656036, + "learning_rate": 3.3210415393685657e-06, + "loss": 0.5916, + "step": 42873 + }, + { + "epoch": 0.7408418578932817, + "grad_norm": 1.2070409319168003, + "learning_rate": 3.320625028002782e-06, + "loss": 0.2965, + "step": 42874 + }, + { + "epoch": 0.7408591374066906, + "grad_norm": 1.240452914029324, + "learning_rate": 3.3202085375573044e-06, + "loss": 0.3245, + "step": 42875 + }, + { + "epoch": 0.7408764169200995, + "grad_norm": 1.307732749268139, + "learning_rate": 3.319792068033444e-06, + "loss": 0.5648, + "step": 42876 + }, + { + "epoch": 0.7408936964335084, + "grad_norm": 1.7387059852614648, + "learning_rate": 3.3193756194325e-06, + "loss": 0.4345, + "step": 42877 + }, + { + "epoch": 0.7409109759469173, + "grad_norm": 0.8682501533842284, + "learning_rate": 3.3189591917557793e-06, + "loss": 0.3472, + "step": 42878 + }, + { + "epoch": 0.7409282554603263, + "grad_norm": 1.3400398050827804, + "learning_rate": 3.3185427850045894e-06, + "loss": 0.3973, + "step": 42879 + }, + { + "epoch": 0.7409455349737352, + "grad_norm": 1.1776240870543226, + "learning_rate": 3.318126399180227e-06, + "loss": 0.376, + "step": 42880 + }, + { + "epoch": 0.7409628144871441, + "grad_norm": 1.7108803707765559, + "learning_rate": 3.3177100342840053e-06, + "loss": 0.2994, + "step": 42881 + }, + { + "epoch": 0.740980094000553, + "grad_norm": 1.1268509839795324, + "learning_rate": 3.317293690317219e-06, + "loss": 0.408, + "step": 42882 + }, + { + "epoch": 0.7409973735139619, + "grad_norm": 1.0124307211315584, + "learning_rate": 3.3168773672811816e-06, + "loss": 0.4768, + "step": 42883 + }, + { + "epoch": 0.7410146530273708, + "grad_norm": 0.7439969151916862, + "learning_rate": 3.3164610651771845e-06, + "loss": 0.2979, + "step": 42884 + }, + { + "epoch": 0.7410319325407797, + "grad_norm": 0.519904618824316, + "learning_rate": 3.316044784006546e-06, + "loss": 0.8073, + "step": 42885 + }, + { + "epoch": 0.7410492120541885, + "grad_norm": 1.5234506891916824, + "learning_rate": 3.3156285237705597e-06, + "loss": 0.421, + "step": 42886 + }, + { + "epoch": 0.7410664915675974, + "grad_norm": 0.9833256675711205, + "learning_rate": 3.315212284470536e-06, + "loss": 0.416, + "step": 42887 + }, + { + "epoch": 0.7410837710810063, + "grad_norm": 0.9467338335473003, + "learning_rate": 3.3147960661077726e-06, + "loss": 0.3847, + "step": 42888 + }, + { + "epoch": 0.7411010505944152, + "grad_norm": 1.1274031267076499, + "learning_rate": 3.314379868683579e-06, + "loss": 0.3309, + "step": 42889 + }, + { + "epoch": 0.7411183301078241, + "grad_norm": 0.6771782484723274, + "learning_rate": 3.3139636921992534e-06, + "loss": 0.7505, + "step": 42890 + }, + { + "epoch": 0.741135609621233, + "grad_norm": 1.3883968523477568, + "learning_rate": 3.313547536656101e-06, + "loss": 0.4415, + "step": 42891 + }, + { + "epoch": 0.741152889134642, + "grad_norm": 0.8746475658182505, + "learning_rate": 3.3131314020554284e-06, + "loss": 0.9993, + "step": 42892 + }, + { + "epoch": 0.7411701686480509, + "grad_norm": 0.8087215867106786, + "learning_rate": 3.3127152883985337e-06, + "loss": 0.3811, + "step": 42893 + }, + { + "epoch": 0.7411874481614598, + "grad_norm": 1.1234968675842847, + "learning_rate": 3.3122991956867266e-06, + "loss": 0.4438, + "step": 42894 + }, + { + "epoch": 0.7412047276748687, + "grad_norm": 1.3673579941678407, + "learning_rate": 3.311883123921303e-06, + "loss": 0.3698, + "step": 42895 + }, + { + "epoch": 0.7412220071882776, + "grad_norm": 0.9271245332432306, + "learning_rate": 3.311467073103569e-06, + "loss": 0.3197, + "step": 42896 + }, + { + "epoch": 0.7412392867016865, + "grad_norm": 1.4004545536220734, + "learning_rate": 3.3110510432348296e-06, + "loss": 0.3133, + "step": 42897 + }, + { + "epoch": 0.7412565662150954, + "grad_norm": 1.3061929671532886, + "learning_rate": 3.3106350343163896e-06, + "loss": 0.2649, + "step": 42898 + }, + { + "epoch": 0.7412738457285043, + "grad_norm": 0.8752167008247023, + "learning_rate": 3.3102190463495497e-06, + "loss": 0.2428, + "step": 42899 + }, + { + "epoch": 0.7412911252419132, + "grad_norm": 0.94896246517608, + "learning_rate": 3.3098030793356083e-06, + "loss": 0.2859, + "step": 42900 + }, + { + "epoch": 0.7413084047553221, + "grad_norm": 1.0963011502664242, + "learning_rate": 3.309387133275872e-06, + "loss": 0.3412, + "step": 42901 + }, + { + "epoch": 0.741325684268731, + "grad_norm": 1.044558460759992, + "learning_rate": 3.308971208171643e-06, + "loss": 0.3257, + "step": 42902 + }, + { + "epoch": 0.7413429637821399, + "grad_norm": 1.932772061802638, + "learning_rate": 3.308555304024229e-06, + "loss": 0.3444, + "step": 42903 + }, + { + "epoch": 0.7413602432955488, + "grad_norm": 2.8257701338921035, + "learning_rate": 3.308139420834925e-06, + "loss": 0.298, + "step": 42904 + }, + { + "epoch": 0.7413775228089577, + "grad_norm": 1.5529118977507494, + "learning_rate": 3.3077235586050393e-06, + "loss": 0.457, + "step": 42905 + }, + { + "epoch": 0.7413948023223667, + "grad_norm": 1.5439619214149722, + "learning_rate": 3.3073077173358693e-06, + "loss": 0.4814, + "step": 42906 + }, + { + "epoch": 0.7414120818357754, + "grad_norm": 1.8446742899882238, + "learning_rate": 3.30689189702872e-06, + "loss": 0.2971, + "step": 42907 + }, + { + "epoch": 0.7414293613491844, + "grad_norm": 0.630024460227487, + "learning_rate": 3.3064760976848944e-06, + "loss": 0.2136, + "step": 42908 + }, + { + "epoch": 0.7414466408625933, + "grad_norm": 1.9684477619442802, + "learning_rate": 3.3060603193056973e-06, + "loss": 0.4981, + "step": 42909 + }, + { + "epoch": 0.7414639203760022, + "grad_norm": 1.7622842828617582, + "learning_rate": 3.3056445618924283e-06, + "loss": 0.2366, + "step": 42910 + }, + { + "epoch": 0.7414811998894111, + "grad_norm": 0.9070390537595202, + "learning_rate": 3.305228825446385e-06, + "loss": 0.3374, + "step": 42911 + }, + { + "epoch": 0.74149847940282, + "grad_norm": 1.3695008104712385, + "learning_rate": 3.3048131099688754e-06, + "loss": 0.4288, + "step": 42912 + }, + { + "epoch": 0.7415157589162289, + "grad_norm": 1.5305090387829834, + "learning_rate": 3.304397415461198e-06, + "loss": 0.3567, + "step": 42913 + }, + { + "epoch": 0.7415330384296378, + "grad_norm": 1.8554931372727628, + "learning_rate": 3.303981741924661e-06, + "loss": 0.3639, + "step": 42914 + }, + { + "epoch": 0.7415503179430467, + "grad_norm": 1.6125454951835743, + "learning_rate": 3.303566089360559e-06, + "loss": 0.4783, + "step": 42915 + }, + { + "epoch": 0.7415675974564556, + "grad_norm": 1.3186744343111287, + "learning_rate": 3.3031504577702e-06, + "loss": 0.186, + "step": 42916 + }, + { + "epoch": 0.7415848769698645, + "grad_norm": 1.575549571427753, + "learning_rate": 3.3027348471548804e-06, + "loss": 0.3385, + "step": 42917 + }, + { + "epoch": 0.7416021564832734, + "grad_norm": 1.35329638059973, + "learning_rate": 3.3023192575159024e-06, + "loss": 0.4656, + "step": 42918 + }, + { + "epoch": 0.7416194359966823, + "grad_norm": 1.354201885242704, + "learning_rate": 3.3019036888545706e-06, + "loss": 0.4083, + "step": 42919 + }, + { + "epoch": 0.7416367155100912, + "grad_norm": 0.9718018224590923, + "learning_rate": 3.301488141172189e-06, + "loss": 0.3022, + "step": 42920 + }, + { + "epoch": 0.7416539950235002, + "grad_norm": 1.2994205150471576, + "learning_rate": 3.3010726144700554e-06, + "loss": 0.3974, + "step": 42921 + }, + { + "epoch": 0.7416712745369091, + "grad_norm": 1.4510558341186486, + "learning_rate": 3.3006571087494677e-06, + "loss": 0.2577, + "step": 42922 + }, + { + "epoch": 0.741688554050318, + "grad_norm": 1.1345087452666098, + "learning_rate": 3.300241624011734e-06, + "loss": 0.2825, + "step": 42923 + }, + { + "epoch": 0.7417058335637269, + "grad_norm": 1.0101191906501445, + "learning_rate": 3.299826160258147e-06, + "loss": 0.4481, + "step": 42924 + }, + { + "epoch": 0.7417231130771358, + "grad_norm": 1.3259440774330267, + "learning_rate": 3.2994107174900203e-06, + "loss": 0.3024, + "step": 42925 + }, + { + "epoch": 0.7417403925905447, + "grad_norm": 1.5771909723097863, + "learning_rate": 3.2989952957086448e-06, + "loss": 0.211, + "step": 42926 + }, + { + "epoch": 0.7417576721039536, + "grad_norm": 1.5813508756566825, + "learning_rate": 3.298579894915329e-06, + "loss": 0.2631, + "step": 42927 + }, + { + "epoch": 0.7417749516173625, + "grad_norm": 1.4909381866766462, + "learning_rate": 3.298164515111366e-06, + "loss": 0.2788, + "step": 42928 + }, + { + "epoch": 0.7417922311307713, + "grad_norm": 0.7630420537107757, + "learning_rate": 3.2977491562980656e-06, + "loss": 0.3191, + "step": 42929 + }, + { + "epoch": 0.7418095106441802, + "grad_norm": 1.5050975952660157, + "learning_rate": 3.29733381847672e-06, + "loss": 0.3272, + "step": 42930 + }, + { + "epoch": 0.7418267901575891, + "grad_norm": 1.0565670122133537, + "learning_rate": 3.2969185016486348e-06, + "loss": 0.3485, + "step": 42931 + }, + { + "epoch": 0.741844069670998, + "grad_norm": 1.8772635892038465, + "learning_rate": 3.2965032058151127e-06, + "loss": 0.4026, + "step": 42932 + }, + { + "epoch": 0.7418613491844069, + "grad_norm": 1.0132279605499006, + "learning_rate": 3.2960879309774495e-06, + "loss": 0.4334, + "step": 42933 + }, + { + "epoch": 0.7418786286978158, + "grad_norm": 0.9635690114688407, + "learning_rate": 3.2956726771369516e-06, + "loss": 0.3435, + "step": 42934 + }, + { + "epoch": 0.7418959082112248, + "grad_norm": 2.5108024560997997, + "learning_rate": 3.2952574442949125e-06, + "loss": 0.2809, + "step": 42935 + }, + { + "epoch": 0.7419131877246337, + "grad_norm": 1.6809301353863284, + "learning_rate": 3.2948422324526376e-06, + "loss": 0.4062, + "step": 42936 + }, + { + "epoch": 0.7419304672380426, + "grad_norm": 1.1749155388902521, + "learning_rate": 3.2944270416114256e-06, + "loss": 0.5743, + "step": 42937 + }, + { + "epoch": 0.7419477467514515, + "grad_norm": 1.3718160962056871, + "learning_rate": 3.294011871772581e-06, + "loss": 0.266, + "step": 42938 + }, + { + "epoch": 0.7419650262648604, + "grad_norm": 1.0200551860707423, + "learning_rate": 3.293596722937399e-06, + "loss": 0.4584, + "step": 42939 + }, + { + "epoch": 0.7419823057782693, + "grad_norm": 1.1045209940983467, + "learning_rate": 3.2931815951071835e-06, + "loss": 0.4023, + "step": 42940 + }, + { + "epoch": 0.7419995852916782, + "grad_norm": 0.9937222250247316, + "learning_rate": 3.29276648828323e-06, + "loss": 0.343, + "step": 42941 + }, + { + "epoch": 0.7420168648050871, + "grad_norm": 1.2621686683697053, + "learning_rate": 3.292351402466841e-06, + "loss": 0.3563, + "step": 42942 + }, + { + "epoch": 0.742034144318496, + "grad_norm": 0.8687298940885524, + "learning_rate": 3.2919363376593217e-06, + "loss": 0.3429, + "step": 42943 + }, + { + "epoch": 0.7420514238319049, + "grad_norm": 0.6520382028617074, + "learning_rate": 3.2915212938619634e-06, + "loss": 0.7862, + "step": 42944 + }, + { + "epoch": 0.7420687033453138, + "grad_norm": 2.3379610818776952, + "learning_rate": 3.2911062710760743e-06, + "loss": 0.387, + "step": 42945 + }, + { + "epoch": 0.7420859828587227, + "grad_norm": 1.5231839600832509, + "learning_rate": 3.2906912693029457e-06, + "loss": 0.3047, + "step": 42946 + }, + { + "epoch": 0.7421032623721316, + "grad_norm": 1.1392623046736943, + "learning_rate": 3.2902762885438833e-06, + "loss": 0.5887, + "step": 42947 + }, + { + "epoch": 0.7421205418855406, + "grad_norm": 2.4895281766686925, + "learning_rate": 3.2898613288001848e-06, + "loss": 0.1773, + "step": 42948 + }, + { + "epoch": 0.7421378213989495, + "grad_norm": 1.5752782020467515, + "learning_rate": 3.2894463900731533e-06, + "loss": 0.2611, + "step": 42949 + }, + { + "epoch": 0.7421551009123583, + "grad_norm": 1.1888718342248676, + "learning_rate": 3.2890314723640836e-06, + "loss": 0.3441, + "step": 42950 + }, + { + "epoch": 0.7421723804257672, + "grad_norm": 1.3721007192836046, + "learning_rate": 3.2886165756742796e-06, + "loss": 0.3146, + "step": 42951 + }, + { + "epoch": 0.7421896599391761, + "grad_norm": 1.010906576641499, + "learning_rate": 3.288201700005036e-06, + "loss": 0.4102, + "step": 42952 + }, + { + "epoch": 0.742206939452585, + "grad_norm": 1.508599335369051, + "learning_rate": 3.2877868453576543e-06, + "loss": 0.4119, + "step": 42953 + }, + { + "epoch": 0.7422242189659939, + "grad_norm": 1.6358058708074972, + "learning_rate": 3.2873720117334385e-06, + "loss": 0.2868, + "step": 42954 + }, + { + "epoch": 0.7422414984794028, + "grad_norm": 1.2894040807093516, + "learning_rate": 3.286957199133679e-06, + "loss": 0.3013, + "step": 42955 + }, + { + "epoch": 0.7422587779928117, + "grad_norm": 1.6152111970350373, + "learning_rate": 3.286542407559684e-06, + "loss": 0.2915, + "step": 42956 + }, + { + "epoch": 0.7422760575062206, + "grad_norm": 1.5049482820247824, + "learning_rate": 3.286127637012745e-06, + "loss": 0.3499, + "step": 42957 + }, + { + "epoch": 0.7422933370196295, + "grad_norm": 2.012638854965014, + "learning_rate": 3.2857128874941637e-06, + "loss": 0.4143, + "step": 42958 + }, + { + "epoch": 0.7423106165330384, + "grad_norm": 1.743990635887671, + "learning_rate": 3.28529815900524e-06, + "loss": 0.3801, + "step": 42959 + }, + { + "epoch": 0.7423278960464473, + "grad_norm": 0.9618713849248115, + "learning_rate": 3.284883451547277e-06, + "loss": 0.353, + "step": 42960 + }, + { + "epoch": 0.7423451755598562, + "grad_norm": 1.331393874762089, + "learning_rate": 3.284468765121569e-06, + "loss": 0.4779, + "step": 42961 + }, + { + "epoch": 0.7423624550732651, + "grad_norm": 3.8082662971031893, + "learning_rate": 3.284054099729411e-06, + "loss": 0.5108, + "step": 42962 + }, + { + "epoch": 0.7423797345866741, + "grad_norm": 1.2962952484907633, + "learning_rate": 3.2836394553721108e-06, + "loss": 0.5179, + "step": 42963 + }, + { + "epoch": 0.742397014100083, + "grad_norm": 1.2440083064747838, + "learning_rate": 3.283224832050954e-06, + "loss": 0.2914, + "step": 42964 + }, + { + "epoch": 0.7424142936134919, + "grad_norm": 1.606113742434908, + "learning_rate": 3.282810229767256e-06, + "loss": 0.3592, + "step": 42965 + }, + { + "epoch": 0.7424315731269008, + "grad_norm": 1.0245470133033097, + "learning_rate": 3.2823956485223017e-06, + "loss": 0.2478, + "step": 42966 + }, + { + "epoch": 0.7424488526403097, + "grad_norm": 1.5091776490856668, + "learning_rate": 3.2819810883174e-06, + "loss": 0.292, + "step": 42967 + }, + { + "epoch": 0.7424661321537186, + "grad_norm": 1.2362908838750297, + "learning_rate": 3.2815665491538383e-06, + "loss": 0.3804, + "step": 42968 + }, + { + "epoch": 0.7424834116671275, + "grad_norm": 0.9339844885482953, + "learning_rate": 3.281152031032926e-06, + "loss": 0.4282, + "step": 42969 + }, + { + "epoch": 0.7425006911805364, + "grad_norm": 0.8706070364630055, + "learning_rate": 3.280737533955949e-06, + "loss": 0.2637, + "step": 42970 + }, + { + "epoch": 0.7425179706939452, + "grad_norm": 1.5351628941855047, + "learning_rate": 3.2803230579242195e-06, + "loss": 0.3781, + "step": 42971 + }, + { + "epoch": 0.7425352502073541, + "grad_norm": 1.4863971743947122, + "learning_rate": 3.279908602939028e-06, + "loss": 0.461, + "step": 42972 + }, + { + "epoch": 0.742552529720763, + "grad_norm": 1.2321370777906602, + "learning_rate": 3.2794941690016712e-06, + "loss": 0.3652, + "step": 42973 + }, + { + "epoch": 0.7425698092341719, + "grad_norm": 1.052662593243971, + "learning_rate": 3.2790797561134515e-06, + "loss": 0.472, + "step": 42974 + }, + { + "epoch": 0.7425870887475808, + "grad_norm": 1.4231548382061807, + "learning_rate": 3.278665364275663e-06, + "loss": 0.4419, + "step": 42975 + }, + { + "epoch": 0.7426043682609897, + "grad_norm": 0.9016986039746074, + "learning_rate": 3.2782509934896045e-06, + "loss": 0.3678, + "step": 42976 + }, + { + "epoch": 0.7426216477743987, + "grad_norm": 1.1636164701016667, + "learning_rate": 3.2778366437565743e-06, + "loss": 0.2772, + "step": 42977 + }, + { + "epoch": 0.7426389272878076, + "grad_norm": 1.1691669051285223, + "learning_rate": 3.277422315077875e-06, + "loss": 0.4263, + "step": 42978 + }, + { + "epoch": 0.7426562068012165, + "grad_norm": 1.3532130778308817, + "learning_rate": 3.277008007454796e-06, + "loss": 0.3533, + "step": 42979 + }, + { + "epoch": 0.7426734863146254, + "grad_norm": 1.078033294166034, + "learning_rate": 3.2765937208886424e-06, + "loss": 0.1817, + "step": 42980 + }, + { + "epoch": 0.7426907658280343, + "grad_norm": 1.046876281591259, + "learning_rate": 3.2761794553807034e-06, + "loss": 0.3299, + "step": 42981 + }, + { + "epoch": 0.7427080453414432, + "grad_norm": 1.300000852554406, + "learning_rate": 3.2757652109322837e-06, + "loss": 0.3372, + "step": 42982 + }, + { + "epoch": 0.7427253248548521, + "grad_norm": 0.9279591434007622, + "learning_rate": 3.2753509875446797e-06, + "loss": 0.3106, + "step": 42983 + }, + { + "epoch": 0.742742604368261, + "grad_norm": 0.8077482693150966, + "learning_rate": 3.274936785219186e-06, + "loss": 0.3541, + "step": 42984 + }, + { + "epoch": 0.7427598838816699, + "grad_norm": 1.3433878707094944, + "learning_rate": 3.2745226039571033e-06, + "loss": 0.3539, + "step": 42985 + }, + { + "epoch": 0.7427771633950788, + "grad_norm": 1.3582161047814512, + "learning_rate": 3.2741084437597237e-06, + "loss": 0.466, + "step": 42986 + }, + { + "epoch": 0.7427944429084877, + "grad_norm": 0.9112045794966332, + "learning_rate": 3.2736943046283487e-06, + "loss": 0.389, + "step": 42987 + }, + { + "epoch": 0.7428117224218966, + "grad_norm": 1.015773669051741, + "learning_rate": 3.273280186564274e-06, + "loss": 0.3116, + "step": 42988 + }, + { + "epoch": 0.7428290019353055, + "grad_norm": 1.4292404455518453, + "learning_rate": 3.2728660895687993e-06, + "loss": 0.3128, + "step": 42989 + }, + { + "epoch": 0.7428462814487145, + "grad_norm": 1.2658933421753507, + "learning_rate": 3.272452013643216e-06, + "loss": 0.2982, + "step": 42990 + }, + { + "epoch": 0.7428635609621234, + "grad_norm": 0.9291675477561625, + "learning_rate": 3.2720379587888283e-06, + "loss": 0.3946, + "step": 42991 + }, + { + "epoch": 0.7428808404755322, + "grad_norm": 0.8561437227247581, + "learning_rate": 3.2716239250069258e-06, + "loss": 0.252, + "step": 42992 + }, + { + "epoch": 0.7428981199889411, + "grad_norm": 1.2267132883399843, + "learning_rate": 3.2712099122988083e-06, + "loss": 0.38, + "step": 42993 + }, + { + "epoch": 0.74291539950235, + "grad_norm": 1.889351465132375, + "learning_rate": 3.2707959206657756e-06, + "loss": 0.215, + "step": 42994 + }, + { + "epoch": 0.7429326790157589, + "grad_norm": 1.1304736760111715, + "learning_rate": 3.270381950109118e-06, + "loss": 0.315, + "step": 42995 + }, + { + "epoch": 0.7429499585291678, + "grad_norm": 0.8077732231157926, + "learning_rate": 3.2699680006301404e-06, + "loss": 0.6048, + "step": 42996 + }, + { + "epoch": 0.7429672380425767, + "grad_norm": 1.8209254809855757, + "learning_rate": 3.26955407223013e-06, + "loss": 0.3793, + "step": 42997 + }, + { + "epoch": 0.7429845175559856, + "grad_norm": 2.091238466725156, + "learning_rate": 3.2691401649103882e-06, + "loss": 0.2953, + "step": 42998 + }, + { + "epoch": 0.7430017970693945, + "grad_norm": 1.661886700746517, + "learning_rate": 3.2687262786722095e-06, + "loss": 0.9701, + "step": 42999 + }, + { + "epoch": 0.7430190765828034, + "grad_norm": 1.640465036254409, + "learning_rate": 3.2683124135168965e-06, + "loss": 0.5609, + "step": 43000 + }, + { + "epoch": 0.7430363560962123, + "grad_norm": 1.3724996570967014, + "learning_rate": 3.2678985694457366e-06, + "loss": 0.2613, + "step": 43001 + }, + { + "epoch": 0.7430536356096212, + "grad_norm": 1.2497352053411306, + "learning_rate": 3.2674847464600346e-06, + "loss": 0.3439, + "step": 43002 + }, + { + "epoch": 0.7430709151230301, + "grad_norm": 1.2726881963469172, + "learning_rate": 3.2670709445610806e-06, + "loss": 0.2537, + "step": 43003 + }, + { + "epoch": 0.743088194636439, + "grad_norm": 1.3830975463777246, + "learning_rate": 3.2666571637501656e-06, + "loss": 0.3596, + "step": 43004 + }, + { + "epoch": 0.743105474149848, + "grad_norm": 0.8625377592695638, + "learning_rate": 3.2662434040285997e-06, + "loss": 0.208, + "step": 43005 + }, + { + "epoch": 0.7431227536632569, + "grad_norm": 0.8246247042439871, + "learning_rate": 3.2658296653976664e-06, + "loss": 0.3418, + "step": 43006 + }, + { + "epoch": 0.7431400331766658, + "grad_norm": 1.3394922387430104, + "learning_rate": 3.2654159478586712e-06, + "loss": 0.4326, + "step": 43007 + }, + { + "epoch": 0.7431573126900747, + "grad_norm": 1.0511090317859204, + "learning_rate": 3.265002251412901e-06, + "loss": 0.4467, + "step": 43008 + }, + { + "epoch": 0.7431745922034836, + "grad_norm": 1.7769160836158089, + "learning_rate": 3.264588576061658e-06, + "loss": 0.4168, + "step": 43009 + }, + { + "epoch": 0.7431918717168925, + "grad_norm": 1.3452612086805498, + "learning_rate": 3.26417492180623e-06, + "loss": 0.4317, + "step": 43010 + }, + { + "epoch": 0.7432091512303014, + "grad_norm": 1.0439781535580204, + "learning_rate": 3.263761288647924e-06, + "loss": 0.5006, + "step": 43011 + }, + { + "epoch": 0.7432264307437103, + "grad_norm": 1.4815208436506102, + "learning_rate": 3.2633476765880313e-06, + "loss": 0.4912, + "step": 43012 + }, + { + "epoch": 0.7432437102571191, + "grad_norm": 1.7660380523554784, + "learning_rate": 3.2629340856278403e-06, + "loss": 0.3402, + "step": 43013 + }, + { + "epoch": 0.743260989770528, + "grad_norm": 1.2434025392212642, + "learning_rate": 3.262520515768656e-06, + "loss": 0.353, + "step": 43014 + }, + { + "epoch": 0.7432782692839369, + "grad_norm": 1.7166866523411257, + "learning_rate": 3.2621069670117658e-06, + "loss": 0.4313, + "step": 43015 + }, + { + "epoch": 0.7432955487973458, + "grad_norm": 1.1349355900636955, + "learning_rate": 3.2616934393584686e-06, + "loss": 0.4731, + "step": 43016 + }, + { + "epoch": 0.7433128283107547, + "grad_norm": 1.3848300154064435, + "learning_rate": 3.2612799328100596e-06, + "loss": 0.5567, + "step": 43017 + }, + { + "epoch": 0.7433301078241636, + "grad_norm": 1.1045953857756234, + "learning_rate": 3.2608664473678375e-06, + "loss": 0.3804, + "step": 43018 + }, + { + "epoch": 0.7433473873375726, + "grad_norm": 0.7629363566820115, + "learning_rate": 3.2604529830330912e-06, + "loss": 0.3627, + "step": 43019 + }, + { + "epoch": 0.7433646668509815, + "grad_norm": 1.3027551657958096, + "learning_rate": 3.2600395398071204e-06, + "loss": 0.4251, + "step": 43020 + }, + { + "epoch": 0.7433819463643904, + "grad_norm": 1.5175621847847736, + "learning_rate": 3.259626117691216e-06, + "loss": 0.3793, + "step": 43021 + }, + { + "epoch": 0.7433992258777993, + "grad_norm": 1.3311721979219708, + "learning_rate": 3.259212716686674e-06, + "loss": 0.328, + "step": 43022 + }, + { + "epoch": 0.7434165053912082, + "grad_norm": 0.991663832569566, + "learning_rate": 3.2587993367947936e-06, + "loss": 0.3972, + "step": 43023 + }, + { + "epoch": 0.7434337849046171, + "grad_norm": 1.1471986379119377, + "learning_rate": 3.2583859780168625e-06, + "loss": 0.3976, + "step": 43024 + }, + { + "epoch": 0.743451064418026, + "grad_norm": 1.2519850726621662, + "learning_rate": 3.2579726403541824e-06, + "loss": 0.3848, + "step": 43025 + }, + { + "epoch": 0.7434683439314349, + "grad_norm": 0.9466307158641343, + "learning_rate": 3.2575593238080405e-06, + "loss": 0.3701, + "step": 43026 + }, + { + "epoch": 0.7434856234448438, + "grad_norm": 1.2773813774711877, + "learning_rate": 3.2571460283797353e-06, + "loss": 0.3196, + "step": 43027 + }, + { + "epoch": 0.7435029029582527, + "grad_norm": 1.0056642232341453, + "learning_rate": 3.2567327540705618e-06, + "loss": 0.438, + "step": 43028 + }, + { + "epoch": 0.7435201824716616, + "grad_norm": 1.0243299954241523, + "learning_rate": 3.2563195008818173e-06, + "loss": 0.2259, + "step": 43029 + }, + { + "epoch": 0.7435374619850705, + "grad_norm": 0.9750130445206796, + "learning_rate": 3.255906268814788e-06, + "loss": 0.4532, + "step": 43030 + }, + { + "epoch": 0.7435547414984794, + "grad_norm": 1.4006369270538686, + "learning_rate": 3.2554930578707767e-06, + "loss": 0.2409, + "step": 43031 + }, + { + "epoch": 0.7435720210118884, + "grad_norm": 1.2067461830112425, + "learning_rate": 3.255079868051071e-06, + "loss": 0.3209, + "step": 43032 + }, + { + "epoch": 0.7435893005252973, + "grad_norm": 1.4646761322599404, + "learning_rate": 3.2546666993569676e-06, + "loss": 0.2979, + "step": 43033 + }, + { + "epoch": 0.743606580038706, + "grad_norm": 0.982606749160261, + "learning_rate": 3.2542535517897634e-06, + "loss": 0.2072, + "step": 43034 + }, + { + "epoch": 0.743623859552115, + "grad_norm": 1.1906699221923618, + "learning_rate": 3.253840425350747e-06, + "loss": 0.3552, + "step": 43035 + }, + { + "epoch": 0.7436411390655239, + "grad_norm": 1.394676594245791, + "learning_rate": 3.253427320041218e-06, + "loss": 0.2239, + "step": 43036 + }, + { + "epoch": 0.7436584185789328, + "grad_norm": 1.2914788669094688, + "learning_rate": 3.253014235862464e-06, + "loss": 0.2649, + "step": 43037 + }, + { + "epoch": 0.7436756980923417, + "grad_norm": 1.2586240896664644, + "learning_rate": 3.2526011728157825e-06, + "loss": 0.2429, + "step": 43038 + }, + { + "epoch": 0.7436929776057506, + "grad_norm": 1.3013498047510423, + "learning_rate": 3.2521881309024673e-06, + "loss": 0.3543, + "step": 43039 + }, + { + "epoch": 0.7437102571191595, + "grad_norm": 1.1247438805662915, + "learning_rate": 3.251775110123814e-06, + "loss": 0.5394, + "step": 43040 + }, + { + "epoch": 0.7437275366325684, + "grad_norm": 0.8265727400077931, + "learning_rate": 3.2513621104811112e-06, + "loss": 0.5703, + "step": 43041 + }, + { + "epoch": 0.7437448161459773, + "grad_norm": 0.9580664901322621, + "learning_rate": 3.250949131975658e-06, + "loss": 0.3416, + "step": 43042 + }, + { + "epoch": 0.7437620956593862, + "grad_norm": 1.121970504462656, + "learning_rate": 3.2505361746087463e-06, + "loss": 0.4481, + "step": 43043 + }, + { + "epoch": 0.7437793751727951, + "grad_norm": 0.9551733968970713, + "learning_rate": 3.2501232383816606e-06, + "loss": 0.3747, + "step": 43044 + }, + { + "epoch": 0.743796654686204, + "grad_norm": 0.9024253296439747, + "learning_rate": 3.2497103232957084e-06, + "loss": 0.1859, + "step": 43045 + }, + { + "epoch": 0.743813934199613, + "grad_norm": 1.4517113306445184, + "learning_rate": 3.249297429352173e-06, + "loss": 0.2371, + "step": 43046 + }, + { + "epoch": 0.7438312137130219, + "grad_norm": 1.8357300976200357, + "learning_rate": 3.248884556552355e-06, + "loss": 0.2009, + "step": 43047 + }, + { + "epoch": 0.7438484932264308, + "grad_norm": 0.859505466511057, + "learning_rate": 3.24847170489754e-06, + "loss": 0.4745, + "step": 43048 + }, + { + "epoch": 0.7438657727398397, + "grad_norm": 1.4230134513097743, + "learning_rate": 3.2480588743890287e-06, + "loss": 0.2144, + "step": 43049 + }, + { + "epoch": 0.7438830522532486, + "grad_norm": 1.3288247515348444, + "learning_rate": 3.2476460650281026e-06, + "loss": 0.4611, + "step": 43050 + }, + { + "epoch": 0.7439003317666575, + "grad_norm": 0.8850723043774587, + "learning_rate": 3.2472332768160684e-06, + "loss": 0.2325, + "step": 43051 + }, + { + "epoch": 0.7439176112800664, + "grad_norm": 1.4792235816786903, + "learning_rate": 3.2468205097542106e-06, + "loss": 0.4163, + "step": 43052 + }, + { + "epoch": 0.7439348907934753, + "grad_norm": 1.4749813927630318, + "learning_rate": 3.246407763843827e-06, + "loss": 0.3472, + "step": 43053 + }, + { + "epoch": 0.7439521703068842, + "grad_norm": 1.2543439116576538, + "learning_rate": 3.2459950390862083e-06, + "loss": 0.5891, + "step": 43054 + }, + { + "epoch": 0.743969449820293, + "grad_norm": 1.5092802845472293, + "learning_rate": 3.245582335482642e-06, + "loss": 0.2661, + "step": 43055 + }, + { + "epoch": 0.7439867293337019, + "grad_norm": 1.15509810758976, + "learning_rate": 3.2451696530344257e-06, + "loss": 0.2501, + "step": 43056 + }, + { + "epoch": 0.7440040088471108, + "grad_norm": 0.7032970639076753, + "learning_rate": 3.244756991742851e-06, + "loss": 0.4045, + "step": 43057 + }, + { + "epoch": 0.7440212883605197, + "grad_norm": 1.3154497676442072, + "learning_rate": 3.2443443516092145e-06, + "loss": 0.4137, + "step": 43058 + }, + { + "epoch": 0.7440385678739286, + "grad_norm": 1.2515777052480241, + "learning_rate": 3.243931732634802e-06, + "loss": 0.2448, + "step": 43059 + }, + { + "epoch": 0.7440558473873375, + "grad_norm": 1.052672504259901, + "learning_rate": 3.2435191348209106e-06, + "loss": 0.4922, + "step": 43060 + }, + { + "epoch": 0.7440731269007465, + "grad_norm": 1.463506762115239, + "learning_rate": 3.243106558168829e-06, + "loss": 0.3814, + "step": 43061 + }, + { + "epoch": 0.7440904064141554, + "grad_norm": 1.0099888062065068, + "learning_rate": 3.24269400267985e-06, + "loss": 0.2506, + "step": 43062 + }, + { + "epoch": 0.7441076859275643, + "grad_norm": 1.4001394790464714, + "learning_rate": 3.242281468355267e-06, + "loss": 0.3224, + "step": 43063 + }, + { + "epoch": 0.7441249654409732, + "grad_norm": 1.2770300344626393, + "learning_rate": 3.2418689551963765e-06, + "loss": 0.4172, + "step": 43064 + }, + { + "epoch": 0.7441422449543821, + "grad_norm": 1.093010497521422, + "learning_rate": 3.2414564632044652e-06, + "loss": 0.2186, + "step": 43065 + }, + { + "epoch": 0.744159524467791, + "grad_norm": 1.0545327385148913, + "learning_rate": 3.2410439923808225e-06, + "loss": 0.2854, + "step": 43066 + }, + { + "epoch": 0.7441768039811999, + "grad_norm": 1.4712763060712652, + "learning_rate": 3.2406315427267443e-06, + "loss": 0.3459, + "step": 43067 + }, + { + "epoch": 0.7441940834946088, + "grad_norm": 1.634104646224493, + "learning_rate": 3.240219114243521e-06, + "loss": 0.2789, + "step": 43068 + }, + { + "epoch": 0.7442113630080177, + "grad_norm": 0.822810155014207, + "learning_rate": 3.239806706932449e-06, + "loss": 0.3492, + "step": 43069 + }, + { + "epoch": 0.7442286425214266, + "grad_norm": 1.0826885814417866, + "learning_rate": 3.239394320794813e-06, + "loss": 0.3441, + "step": 43070 + }, + { + "epoch": 0.7442459220348355, + "grad_norm": 1.4252086051786892, + "learning_rate": 3.23898195583191e-06, + "loss": 0.3466, + "step": 43071 + }, + { + "epoch": 0.7442632015482444, + "grad_norm": 0.8583773579051105, + "learning_rate": 3.238569612045027e-06, + "loss": 0.5313, + "step": 43072 + }, + { + "epoch": 0.7442804810616533, + "grad_norm": 1.1448807510325074, + "learning_rate": 3.2381572894354572e-06, + "loss": 0.4775, + "step": 43073 + }, + { + "epoch": 0.7442977605750623, + "grad_norm": 1.011930414632624, + "learning_rate": 3.2377449880044965e-06, + "loss": 0.3629, + "step": 43074 + }, + { + "epoch": 0.7443150400884712, + "grad_norm": 1.4964250850272685, + "learning_rate": 3.237332707753429e-06, + "loss": 0.3035, + "step": 43075 + }, + { + "epoch": 0.7443323196018801, + "grad_norm": 3.045143473767946, + "learning_rate": 3.2369204486835526e-06, + "loss": 0.4374, + "step": 43076 + }, + { + "epoch": 0.7443495991152889, + "grad_norm": 1.018860411084647, + "learning_rate": 3.236508210796152e-06, + "loss": 0.3243, + "step": 43077 + }, + { + "epoch": 0.7443668786286978, + "grad_norm": 0.9473872521228902, + "learning_rate": 3.2360959940925217e-06, + "loss": 0.5373, + "step": 43078 + }, + { + "epoch": 0.7443841581421067, + "grad_norm": 1.9542717467296753, + "learning_rate": 3.235683798573953e-06, + "loss": 0.2671, + "step": 43079 + }, + { + "epoch": 0.7444014376555156, + "grad_norm": 1.5285249673214685, + "learning_rate": 3.2352716242417404e-06, + "loss": 0.5355, + "step": 43080 + }, + { + "epoch": 0.7444187171689245, + "grad_norm": 1.7477224201247432, + "learning_rate": 3.234859471097167e-06, + "loss": 0.5486, + "step": 43081 + }, + { + "epoch": 0.7444359966823334, + "grad_norm": 0.9770267436045751, + "learning_rate": 3.234447339141532e-06, + "loss": 0.308, + "step": 43082 + }, + { + "epoch": 0.7444532761957423, + "grad_norm": 1.9349759314086594, + "learning_rate": 3.234035228376118e-06, + "loss": 0.2859, + "step": 43083 + }, + { + "epoch": 0.7444705557091512, + "grad_norm": 1.3367712443127417, + "learning_rate": 3.233623138802221e-06, + "loss": 0.3221, + "step": 43084 + }, + { + "epoch": 0.7444878352225601, + "grad_norm": 1.9374955564028244, + "learning_rate": 3.233211070421133e-06, + "loss": 0.4004, + "step": 43085 + }, + { + "epoch": 0.744505114735969, + "grad_norm": 1.3010411660569805, + "learning_rate": 3.2327990232341387e-06, + "loss": 0.4573, + "step": 43086 + }, + { + "epoch": 0.7445223942493779, + "grad_norm": 1.091568346587254, + "learning_rate": 3.232386997242536e-06, + "loss": 0.3062, + "step": 43087 + }, + { + "epoch": 0.7445396737627868, + "grad_norm": 1.25926675630479, + "learning_rate": 3.2319749924476074e-06, + "loss": 0.4526, + "step": 43088 + }, + { + "epoch": 0.7445569532761958, + "grad_norm": 1.1274692426481447, + "learning_rate": 3.2315630088506512e-06, + "loss": 0.4114, + "step": 43089 + }, + { + "epoch": 0.7445742327896047, + "grad_norm": 0.6408567417467822, + "learning_rate": 3.2311510464529472e-06, + "loss": 0.6183, + "step": 43090 + }, + { + "epoch": 0.7445915123030136, + "grad_norm": 1.1492130892155032, + "learning_rate": 3.2307391052558003e-06, + "loss": 0.2471, + "step": 43091 + }, + { + "epoch": 0.7446087918164225, + "grad_norm": 1.551932117566554, + "learning_rate": 3.2303271852604877e-06, + "loss": 0.3026, + "step": 43092 + }, + { + "epoch": 0.7446260713298314, + "grad_norm": 1.016814075477856, + "learning_rate": 3.2299152864683105e-06, + "loss": 0.2454, + "step": 43093 + }, + { + "epoch": 0.7446433508432403, + "grad_norm": 0.8092646598639248, + "learning_rate": 3.2295034088805477e-06, + "loss": 0.6254, + "step": 43094 + }, + { + "epoch": 0.7446606303566492, + "grad_norm": 2.1601100592743974, + "learning_rate": 3.2290915524984988e-06, + "loss": 0.5036, + "step": 43095 + }, + { + "epoch": 0.7446779098700581, + "grad_norm": 1.751824987149765, + "learning_rate": 3.2286797173234473e-06, + "loss": 0.4362, + "step": 43096 + }, + { + "epoch": 0.744695189383467, + "grad_norm": 1.619754785976403, + "learning_rate": 3.2282679033566845e-06, + "loss": 0.3286, + "step": 43097 + }, + { + "epoch": 0.7447124688968758, + "grad_norm": 1.2632644229386114, + "learning_rate": 3.2278561105995052e-06, + "loss": 0.3224, + "step": 43098 + }, + { + "epoch": 0.7447297484102847, + "grad_norm": 1.0601677559511589, + "learning_rate": 3.2274443390531918e-06, + "loss": 0.3906, + "step": 43099 + }, + { + "epoch": 0.7447470279236936, + "grad_norm": 1.9397736143447715, + "learning_rate": 3.22703258871904e-06, + "loss": 0.4887, + "step": 43100 + }, + { + "epoch": 0.7447643074371025, + "grad_norm": 1.1348565752670035, + "learning_rate": 3.226620859598334e-06, + "loss": 0.2169, + "step": 43101 + }, + { + "epoch": 0.7447815869505114, + "grad_norm": 1.2540975881496328, + "learning_rate": 3.2262091516923664e-06, + "loss": 0.2316, + "step": 43102 + }, + { + "epoch": 0.7447988664639204, + "grad_norm": 1.1322200805815335, + "learning_rate": 3.225797465002426e-06, + "loss": 0.4487, + "step": 43103 + }, + { + "epoch": 0.7448161459773293, + "grad_norm": 1.470449386963784, + "learning_rate": 3.2253857995298067e-06, + "loss": 0.3277, + "step": 43104 + }, + { + "epoch": 0.7448334254907382, + "grad_norm": 0.996048261116595, + "learning_rate": 3.224974155275794e-06, + "loss": 0.4683, + "step": 43105 + }, + { + "epoch": 0.7448507050041471, + "grad_norm": 1.278530057699463, + "learning_rate": 3.2245625322416742e-06, + "loss": 0.2374, + "step": 43106 + }, + { + "epoch": 0.744867984517556, + "grad_norm": 1.5447815821798305, + "learning_rate": 3.2241509304287377e-06, + "loss": 0.4302, + "step": 43107 + }, + { + "epoch": 0.7448852640309649, + "grad_norm": 1.5892175615362298, + "learning_rate": 3.2237393498382772e-06, + "loss": 0.3499, + "step": 43108 + }, + { + "epoch": 0.7449025435443738, + "grad_norm": 1.3528243242953173, + "learning_rate": 3.223327790471582e-06, + "loss": 0.5588, + "step": 43109 + }, + { + "epoch": 0.7449198230577827, + "grad_norm": 1.1032793424762761, + "learning_rate": 3.2229162523299373e-06, + "loss": 0.4566, + "step": 43110 + }, + { + "epoch": 0.7449371025711916, + "grad_norm": 1.4353072189921905, + "learning_rate": 3.2225047354146366e-06, + "loss": 0.3724, + "step": 43111 + }, + { + "epoch": 0.7449543820846005, + "grad_norm": 0.9463656495157644, + "learning_rate": 3.2220932397269624e-06, + "loss": 0.4767, + "step": 43112 + }, + { + "epoch": 0.7449716615980094, + "grad_norm": 1.8862117487264314, + "learning_rate": 3.2216817652682077e-06, + "loss": 0.3056, + "step": 43113 + }, + { + "epoch": 0.7449889411114183, + "grad_norm": 1.2829450267143476, + "learning_rate": 3.2212703120396605e-06, + "loss": 0.3306, + "step": 43114 + }, + { + "epoch": 0.7450062206248272, + "grad_norm": 1.3855609286987716, + "learning_rate": 3.2208588800426132e-06, + "loss": 0.5488, + "step": 43115 + }, + { + "epoch": 0.7450235001382361, + "grad_norm": 1.7372232684582671, + "learning_rate": 3.220447469278352e-06, + "loss": 0.3431, + "step": 43116 + }, + { + "epoch": 0.7450407796516451, + "grad_norm": 1.1874839880120218, + "learning_rate": 3.22003607974816e-06, + "loss": 0.3315, + "step": 43117 + }, + { + "epoch": 0.745058059165054, + "grad_norm": 1.3539759833961837, + "learning_rate": 3.2196247114533308e-06, + "loss": 0.2352, + "step": 43118 + }, + { + "epoch": 0.7450753386784628, + "grad_norm": 1.5867090816175522, + "learning_rate": 3.219213364395153e-06, + "loss": 0.4087, + "step": 43119 + }, + { + "epoch": 0.7450926181918717, + "grad_norm": 2.612565600254325, + "learning_rate": 3.2188020385749164e-06, + "loss": 0.293, + "step": 43120 + }, + { + "epoch": 0.7451098977052806, + "grad_norm": 1.2862892318234778, + "learning_rate": 3.2183907339939048e-06, + "loss": 0.3465, + "step": 43121 + }, + { + "epoch": 0.7451271772186895, + "grad_norm": 1.3618315676493626, + "learning_rate": 3.217979450653411e-06, + "loss": 0.4232, + "step": 43122 + }, + { + "epoch": 0.7451444567320984, + "grad_norm": 1.1268537991820589, + "learning_rate": 3.217568188554718e-06, + "loss": 0.2312, + "step": 43123 + }, + { + "epoch": 0.7451617362455073, + "grad_norm": 0.8660764198861258, + "learning_rate": 3.217156947699117e-06, + "loss": 0.2887, + "step": 43124 + }, + { + "epoch": 0.7451790157589162, + "grad_norm": 1.2214401177977343, + "learning_rate": 3.2167457280878965e-06, + "loss": 0.222, + "step": 43125 + }, + { + "epoch": 0.7451962952723251, + "grad_norm": 1.1095748944979669, + "learning_rate": 3.216334529722347e-06, + "loss": 0.3889, + "step": 43126 + }, + { + "epoch": 0.745213574785734, + "grad_norm": 1.699939026046398, + "learning_rate": 3.2159233526037524e-06, + "loss": 0.5063, + "step": 43127 + }, + { + "epoch": 0.7452308542991429, + "grad_norm": 1.3966538563153705, + "learning_rate": 3.215512196733399e-06, + "loss": 0.419, + "step": 43128 + }, + { + "epoch": 0.7452481338125518, + "grad_norm": 1.0174637549398717, + "learning_rate": 3.21510106211258e-06, + "loss": 0.5214, + "step": 43129 + }, + { + "epoch": 0.7452654133259607, + "grad_norm": 2.3395428905309013, + "learning_rate": 3.214689948742574e-06, + "loss": 0.4392, + "step": 43130 + }, + { + "epoch": 0.7452826928393697, + "grad_norm": 1.542193579345603, + "learning_rate": 3.2142788566246807e-06, + "loss": 0.6085, + "step": 43131 + }, + { + "epoch": 0.7452999723527786, + "grad_norm": 2.8119097851538997, + "learning_rate": 3.2138677857601784e-06, + "loss": 0.2907, + "step": 43132 + }, + { + "epoch": 0.7453172518661875, + "grad_norm": 1.2154371426300696, + "learning_rate": 3.2134567361503623e-06, + "loss": 0.4266, + "step": 43133 + }, + { + "epoch": 0.7453345313795964, + "grad_norm": 1.768213795631657, + "learning_rate": 3.2130457077965115e-06, + "loss": 0.4636, + "step": 43134 + }, + { + "epoch": 0.7453518108930053, + "grad_norm": 1.391508072766571, + "learning_rate": 3.2126347006999203e-06, + "loss": 0.2052, + "step": 43135 + }, + { + "epoch": 0.7453690904064142, + "grad_norm": 0.9291579672546442, + "learning_rate": 3.2122237148618698e-06, + "loss": 0.7189, + "step": 43136 + }, + { + "epoch": 0.7453863699198231, + "grad_norm": 1.537583338011256, + "learning_rate": 3.2118127502836504e-06, + "loss": 0.5119, + "step": 43137 + }, + { + "epoch": 0.745403649433232, + "grad_norm": 1.4734218132685113, + "learning_rate": 3.211401806966553e-06, + "loss": 0.2899, + "step": 43138 + }, + { + "epoch": 0.7454209289466409, + "grad_norm": 1.2932681476874939, + "learning_rate": 3.2109908849118574e-06, + "loss": 0.522, + "step": 43139 + }, + { + "epoch": 0.7454382084600497, + "grad_norm": 1.4328334801556208, + "learning_rate": 3.2105799841208585e-06, + "loss": 0.2953, + "step": 43140 + }, + { + "epoch": 0.7454554879734586, + "grad_norm": 1.4354728151544889, + "learning_rate": 3.2101691045948347e-06, + "loss": 0.3052, + "step": 43141 + }, + { + "epoch": 0.7454727674868675, + "grad_norm": 1.0695323134437538, + "learning_rate": 3.2097582463350775e-06, + "loss": 0.3261, + "step": 43142 + }, + { + "epoch": 0.7454900470002764, + "grad_norm": 1.111986720352977, + "learning_rate": 3.2093474093428733e-06, + "loss": 0.3158, + "step": 43143 + }, + { + "epoch": 0.7455073265136853, + "grad_norm": 1.8049109336919775, + "learning_rate": 3.2089365936195127e-06, + "loss": 0.3374, + "step": 43144 + }, + { + "epoch": 0.7455246060270942, + "grad_norm": 1.2505568709497474, + "learning_rate": 3.2085257991662755e-06, + "loss": 0.5775, + "step": 43145 + }, + { + "epoch": 0.7455418855405032, + "grad_norm": 0.5696296653783558, + "learning_rate": 3.208115025984455e-06, + "loss": 0.7728, + "step": 43146 + }, + { + "epoch": 0.7455591650539121, + "grad_norm": 0.7786847738312702, + "learning_rate": 3.2077042740753316e-06, + "loss": 0.4624, + "step": 43147 + }, + { + "epoch": 0.745576444567321, + "grad_norm": 1.2097610776389838, + "learning_rate": 3.2072935434401943e-06, + "loss": 0.5985, + "step": 43148 + }, + { + "epoch": 0.7455937240807299, + "grad_norm": 1.3142627755475254, + "learning_rate": 3.2068828340803325e-06, + "loss": 0.268, + "step": 43149 + }, + { + "epoch": 0.7456110035941388, + "grad_norm": 1.1221229063909688, + "learning_rate": 3.206472145997027e-06, + "loss": 0.2655, + "step": 43150 + }, + { + "epoch": 0.7456282831075477, + "grad_norm": 1.5542940983955982, + "learning_rate": 3.20606147919157e-06, + "loss": 0.3048, + "step": 43151 + }, + { + "epoch": 0.7456455626209566, + "grad_norm": 1.3115782656192707, + "learning_rate": 3.2056508336652426e-06, + "loss": 0.4697, + "step": 43152 + }, + { + "epoch": 0.7456628421343655, + "grad_norm": 1.0493177684103263, + "learning_rate": 3.2052402094193324e-06, + "loss": 0.3776, + "step": 43153 + }, + { + "epoch": 0.7456801216477744, + "grad_norm": 1.1807736163340243, + "learning_rate": 3.2048296064551255e-06, + "loss": 0.359, + "step": 43154 + }, + { + "epoch": 0.7456974011611833, + "grad_norm": 1.4916568955689264, + "learning_rate": 3.2044190247739126e-06, + "loss": 0.4157, + "step": 43155 + }, + { + "epoch": 0.7457146806745922, + "grad_norm": 1.2903358072135755, + "learning_rate": 3.2040084643769732e-06, + "loss": 0.4178, + "step": 43156 + }, + { + "epoch": 0.7457319601880011, + "grad_norm": 1.3271049985362682, + "learning_rate": 3.203597925265598e-06, + "loss": 0.3445, + "step": 43157 + }, + { + "epoch": 0.74574923970141, + "grad_norm": 1.7699902256292976, + "learning_rate": 3.2031874074410685e-06, + "loss": 0.3324, + "step": 43158 + }, + { + "epoch": 0.745766519214819, + "grad_norm": 1.2187563143556153, + "learning_rate": 3.2027769109046714e-06, + "loss": 0.3424, + "step": 43159 + }, + { + "epoch": 0.7457837987282279, + "grad_norm": 1.497080873493544, + "learning_rate": 3.2023664356576977e-06, + "loss": 0.245, + "step": 43160 + }, + { + "epoch": 0.7458010782416367, + "grad_norm": 1.1733763269952793, + "learning_rate": 3.2019559817014245e-06, + "loss": 0.4175, + "step": 43161 + }, + { + "epoch": 0.7458183577550456, + "grad_norm": 2.3765664048115624, + "learning_rate": 3.201545549037146e-06, + "loss": 0.2479, + "step": 43162 + }, + { + "epoch": 0.7458356372684545, + "grad_norm": 1.1897628020503404, + "learning_rate": 3.20113513766614e-06, + "loss": 0.5462, + "step": 43163 + }, + { + "epoch": 0.7458529167818634, + "grad_norm": 1.4958724527358367, + "learning_rate": 3.2007247475896965e-06, + "loss": 0.3575, + "step": 43164 + }, + { + "epoch": 0.7458701962952723, + "grad_norm": 0.9119029925577466, + "learning_rate": 3.2003143788090985e-06, + "loss": 0.3985, + "step": 43165 + }, + { + "epoch": 0.7458874758086812, + "grad_norm": 2.03883250484563, + "learning_rate": 3.199904031325636e-06, + "loss": 0.4053, + "step": 43166 + }, + { + "epoch": 0.7459047553220901, + "grad_norm": 1.0914384977221465, + "learning_rate": 3.1994937051405905e-06, + "loss": 0.394, + "step": 43167 + }, + { + "epoch": 0.745922034835499, + "grad_norm": 1.180246379563881, + "learning_rate": 3.1990834002552453e-06, + "loss": 0.3709, + "step": 43168 + }, + { + "epoch": 0.7459393143489079, + "grad_norm": 1.1542788854881407, + "learning_rate": 3.1986731166708906e-06, + "loss": 0.5684, + "step": 43169 + }, + { + "epoch": 0.7459565938623168, + "grad_norm": 1.1208971057194475, + "learning_rate": 3.1982628543888018e-06, + "loss": 0.421, + "step": 43170 + }, + { + "epoch": 0.7459738733757257, + "grad_norm": 1.3746396726570442, + "learning_rate": 3.197852613410277e-06, + "loss": 0.5871, + "step": 43171 + }, + { + "epoch": 0.7459911528891346, + "grad_norm": 0.8096424071126002, + "learning_rate": 3.1974423937365917e-06, + "loss": 0.3285, + "step": 43172 + }, + { + "epoch": 0.7460084324025436, + "grad_norm": 1.0826170337447245, + "learning_rate": 3.1970321953690375e-06, + "loss": 0.2024, + "step": 43173 + }, + { + "epoch": 0.7460257119159525, + "grad_norm": 2.3380771838973504, + "learning_rate": 3.1966220183088926e-06, + "loss": 0.4214, + "step": 43174 + }, + { + "epoch": 0.7460429914293614, + "grad_norm": 2.2443757434367035, + "learning_rate": 3.196211862557447e-06, + "loss": 0.3312, + "step": 43175 + }, + { + "epoch": 0.7460602709427703, + "grad_norm": 1.617375276552286, + "learning_rate": 3.195801728115979e-06, + "loss": 0.5026, + "step": 43176 + }, + { + "epoch": 0.7460775504561792, + "grad_norm": 1.3226864695082898, + "learning_rate": 3.195391614985779e-06, + "loss": 0.2738, + "step": 43177 + }, + { + "epoch": 0.7460948299695881, + "grad_norm": 1.0930825473996442, + "learning_rate": 3.1949815231681313e-06, + "loss": 0.4106, + "step": 43178 + }, + { + "epoch": 0.746112109482997, + "grad_norm": 1.605010789657793, + "learning_rate": 3.1945714526643157e-06, + "loss": 0.4782, + "step": 43179 + }, + { + "epoch": 0.7461293889964059, + "grad_norm": 0.9926652740592692, + "learning_rate": 3.194161403475623e-06, + "loss": 0.4459, + "step": 43180 + }, + { + "epoch": 0.7461466685098148, + "grad_norm": 1.0749526479454243, + "learning_rate": 3.193751375603331e-06, + "loss": 0.2494, + "step": 43181 + }, + { + "epoch": 0.7461639480232236, + "grad_norm": 0.7785215452483657, + "learning_rate": 3.193341369048726e-06, + "loss": 0.6237, + "step": 43182 + }, + { + "epoch": 0.7461812275366325, + "grad_norm": 1.2878894399054084, + "learning_rate": 3.1929313838130936e-06, + "loss": 0.3533, + "step": 43183 + }, + { + "epoch": 0.7461985070500414, + "grad_norm": 0.9052342469499619, + "learning_rate": 3.1925214198977217e-06, + "loss": 0.2875, + "step": 43184 + }, + { + "epoch": 0.7462157865634503, + "grad_norm": 1.2885359788042197, + "learning_rate": 3.1921114773038853e-06, + "loss": 0.514, + "step": 43185 + }, + { + "epoch": 0.7462330660768592, + "grad_norm": 1.5390121214349768, + "learning_rate": 3.1917015560328767e-06, + "loss": 0.2717, + "step": 43186 + }, + { + "epoch": 0.7462503455902681, + "grad_norm": 0.8197850227801621, + "learning_rate": 3.1912916560859743e-06, + "loss": 0.2866, + "step": 43187 + }, + { + "epoch": 0.7462676251036771, + "grad_norm": 1.192056096521419, + "learning_rate": 3.1908817774644617e-06, + "loss": 0.3595, + "step": 43188 + }, + { + "epoch": 0.746284904617086, + "grad_norm": 1.4483812225414796, + "learning_rate": 3.1904719201696298e-06, + "loss": 0.4094, + "step": 43189 + }, + { + "epoch": 0.7463021841304949, + "grad_norm": 0.9805152804897413, + "learning_rate": 3.190062084202754e-06, + "loss": 0.2927, + "step": 43190 + }, + { + "epoch": 0.7463194636439038, + "grad_norm": 1.410772180043371, + "learning_rate": 3.189652269565123e-06, + "loss": 0.3454, + "step": 43191 + }, + { + "epoch": 0.7463367431573127, + "grad_norm": 2.3205043047058815, + "learning_rate": 3.1892424762580175e-06, + "loss": 0.3578, + "step": 43192 + }, + { + "epoch": 0.7463540226707216, + "grad_norm": 1.3452829089713334, + "learning_rate": 3.1888327042827204e-06, + "loss": 0.3513, + "step": 43193 + }, + { + "epoch": 0.7463713021841305, + "grad_norm": 1.131538380248857, + "learning_rate": 3.1884229536405187e-06, + "loss": 0.4023, + "step": 43194 + }, + { + "epoch": 0.7463885816975394, + "grad_norm": 1.351603092267742, + "learning_rate": 3.1880132243326954e-06, + "loss": 0.4415, + "step": 43195 + }, + { + "epoch": 0.7464058612109483, + "grad_norm": 2.0425853424720732, + "learning_rate": 3.18760351636053e-06, + "loss": 0.3212, + "step": 43196 + }, + { + "epoch": 0.7464231407243572, + "grad_norm": 1.39736534283877, + "learning_rate": 3.1871938297253115e-06, + "loss": 0.3119, + "step": 43197 + }, + { + "epoch": 0.7464404202377661, + "grad_norm": 0.880539017083642, + "learning_rate": 3.186784164428316e-06, + "loss": 0.4166, + "step": 43198 + }, + { + "epoch": 0.746457699751175, + "grad_norm": 1.2743633247981017, + "learning_rate": 3.186374520470831e-06, + "loss": 0.3878, + "step": 43199 + }, + { + "epoch": 0.746474979264584, + "grad_norm": 1.1741318685092192, + "learning_rate": 3.1859648978541414e-06, + "loss": 0.4097, + "step": 43200 + }, + { + "epoch": 0.7464922587779929, + "grad_norm": 1.7504490914250699, + "learning_rate": 3.1855552965795244e-06, + "loss": 0.3279, + "step": 43201 + }, + { + "epoch": 0.7465095382914018, + "grad_norm": 0.8652897484712503, + "learning_rate": 3.185145716648269e-06, + "loss": 0.3365, + "step": 43202 + }, + { + "epoch": 0.7465268178048107, + "grad_norm": 1.465166750860278, + "learning_rate": 3.1847361580616533e-06, + "loss": 0.3661, + "step": 43203 + }, + { + "epoch": 0.7465440973182195, + "grad_norm": 1.242582577442152, + "learning_rate": 3.18432662082096e-06, + "loss": 0.3487, + "step": 43204 + }, + { + "epoch": 0.7465613768316284, + "grad_norm": 1.0462586947889831, + "learning_rate": 3.183917104927475e-06, + "loss": 0.3815, + "step": 43205 + }, + { + "epoch": 0.7465786563450373, + "grad_norm": 0.98019015305692, + "learning_rate": 3.1835076103824823e-06, + "loss": 0.3926, + "step": 43206 + }, + { + "epoch": 0.7465959358584462, + "grad_norm": 1.7063316688037578, + "learning_rate": 3.1830981371872595e-06, + "loss": 0.3686, + "step": 43207 + }, + { + "epoch": 0.7466132153718551, + "grad_norm": 0.9183633837323919, + "learning_rate": 3.1826886853430928e-06, + "loss": 0.3188, + "step": 43208 + }, + { + "epoch": 0.746630494885264, + "grad_norm": 1.1324596315031534, + "learning_rate": 3.182279254851265e-06, + "loss": 0.2403, + "step": 43209 + }, + { + "epoch": 0.7466477743986729, + "grad_norm": 1.2278759478911725, + "learning_rate": 3.1818698457130495e-06, + "loss": 0.3805, + "step": 43210 + }, + { + "epoch": 0.7466650539120818, + "grad_norm": 1.2268937588531277, + "learning_rate": 3.1814604579297427e-06, + "loss": 0.3438, + "step": 43211 + }, + { + "epoch": 0.7466823334254907, + "grad_norm": 1.0008771738629527, + "learning_rate": 3.181051091502616e-06, + "loss": 0.4504, + "step": 43212 + }, + { + "epoch": 0.7466996129388996, + "grad_norm": 1.4591091874828712, + "learning_rate": 3.1806417464329585e-06, + "loss": 0.3328, + "step": 43213 + }, + { + "epoch": 0.7467168924523085, + "grad_norm": 1.3868107840395592, + "learning_rate": 3.180232422722047e-06, + "loss": 0.3985, + "step": 43214 + }, + { + "epoch": 0.7467341719657175, + "grad_norm": 1.1911856208190716, + "learning_rate": 3.179823120371168e-06, + "loss": 0.3284, + "step": 43215 + }, + { + "epoch": 0.7467514514791264, + "grad_norm": 1.006934688336347, + "learning_rate": 3.1794138393815997e-06, + "loss": 0.2973, + "step": 43216 + }, + { + "epoch": 0.7467687309925353, + "grad_norm": 1.732098050286274, + "learning_rate": 3.1790045797546244e-06, + "loss": 0.287, + "step": 43217 + }, + { + "epoch": 0.7467860105059442, + "grad_norm": 1.7479410886318236, + "learning_rate": 3.1785953414915284e-06, + "loss": 0.3477, + "step": 43218 + }, + { + "epoch": 0.7468032900193531, + "grad_norm": 0.921227972885939, + "learning_rate": 3.1781861245935875e-06, + "loss": 0.3146, + "step": 43219 + }, + { + "epoch": 0.746820569532762, + "grad_norm": 1.1115670109176494, + "learning_rate": 3.1777769290620887e-06, + "loss": 0.3866, + "step": 43220 + }, + { + "epoch": 0.7468378490461709, + "grad_norm": 0.8560555498458629, + "learning_rate": 3.1773677548983085e-06, + "loss": 0.2029, + "step": 43221 + }, + { + "epoch": 0.7468551285595798, + "grad_norm": 0.8169855876681238, + "learning_rate": 3.1769586021035305e-06, + "loss": 0.3126, + "step": 43222 + }, + { + "epoch": 0.7468724080729887, + "grad_norm": 2.4974870070908164, + "learning_rate": 3.1765494706790368e-06, + "loss": 0.3496, + "step": 43223 + }, + { + "epoch": 0.7468896875863976, + "grad_norm": 0.8813360249195783, + "learning_rate": 3.176140360626112e-06, + "loss": 0.4859, + "step": 43224 + }, + { + "epoch": 0.7469069670998064, + "grad_norm": 1.372593087220463, + "learning_rate": 3.1757312719460307e-06, + "loss": 0.2545, + "step": 43225 + }, + { + "epoch": 0.7469242466132153, + "grad_norm": 1.016372461205698, + "learning_rate": 3.175322204640081e-06, + "loss": 0.3497, + "step": 43226 + }, + { + "epoch": 0.7469415261266242, + "grad_norm": 1.2240585633589611, + "learning_rate": 3.1749131587095383e-06, + "loss": 0.4298, + "step": 43227 + }, + { + "epoch": 0.7469588056400331, + "grad_norm": 1.640513141780234, + "learning_rate": 3.174504134155685e-06, + "loss": 0.5233, + "step": 43228 + }, + { + "epoch": 0.746976085153442, + "grad_norm": 0.8733199389625401, + "learning_rate": 3.174095130979807e-06, + "loss": 0.6551, + "step": 43229 + }, + { + "epoch": 0.746993364666851, + "grad_norm": 0.833105248701258, + "learning_rate": 3.1736861491831796e-06, + "loss": 0.519, + "step": 43230 + }, + { + "epoch": 0.7470106441802599, + "grad_norm": 0.9871155921854217, + "learning_rate": 3.1732771887670887e-06, + "loss": 0.5065, + "step": 43231 + }, + { + "epoch": 0.7470279236936688, + "grad_norm": 0.5519216623281671, + "learning_rate": 3.172868249732809e-06, + "loss": 0.5999, + "step": 43232 + }, + { + "epoch": 0.7470452032070777, + "grad_norm": 1.234260152332681, + "learning_rate": 3.1724593320816243e-06, + "loss": 0.6364, + "step": 43233 + }, + { + "epoch": 0.7470624827204866, + "grad_norm": 1.072906337772134, + "learning_rate": 3.1720504358148164e-06, + "loss": 0.2785, + "step": 43234 + }, + { + "epoch": 0.7470797622338955, + "grad_norm": 0.7594760619934197, + "learning_rate": 3.1716415609336694e-06, + "loss": 0.5144, + "step": 43235 + }, + { + "epoch": 0.7470970417473044, + "grad_norm": 0.9267670079589314, + "learning_rate": 3.1712327074394555e-06, + "loss": 0.4309, + "step": 43236 + }, + { + "epoch": 0.7471143212607133, + "grad_norm": 0.873926250092013, + "learning_rate": 3.170823875333463e-06, + "loss": 0.5215, + "step": 43237 + }, + { + "epoch": 0.7471316007741222, + "grad_norm": 1.0176392556198892, + "learning_rate": 3.1704150646169675e-06, + "loss": 0.1543, + "step": 43238 + }, + { + "epoch": 0.7471488802875311, + "grad_norm": 1.4218354085013984, + "learning_rate": 3.17000627529125e-06, + "loss": 0.3758, + "step": 43239 + }, + { + "epoch": 0.74716615980094, + "grad_norm": 1.5017769013837343, + "learning_rate": 3.1695975073575945e-06, + "loss": 0.3963, + "step": 43240 + }, + { + "epoch": 0.7471834393143489, + "grad_norm": 1.2348114425972463, + "learning_rate": 3.1691887608172754e-06, + "loss": 0.4106, + "step": 43241 + }, + { + "epoch": 0.7472007188277578, + "grad_norm": 1.1321604209988652, + "learning_rate": 3.1687800356715804e-06, + "loss": 0.4355, + "step": 43242 + }, + { + "epoch": 0.7472179983411668, + "grad_norm": 0.983977857527162, + "learning_rate": 3.1683713319217822e-06, + "loss": 0.3657, + "step": 43243 + }, + { + "epoch": 0.7472352778545757, + "grad_norm": 1.6806018865670036, + "learning_rate": 3.167962649569164e-06, + "loss": 0.3331, + "step": 43244 + }, + { + "epoch": 0.7472525573679846, + "grad_norm": 1.6454820290493644, + "learning_rate": 3.167553988615005e-06, + "loss": 0.3539, + "step": 43245 + }, + { + "epoch": 0.7472698368813934, + "grad_norm": 1.2926175218956482, + "learning_rate": 3.1671453490605906e-06, + "loss": 0.375, + "step": 43246 + }, + { + "epoch": 0.7472871163948023, + "grad_norm": 1.4409621035557845, + "learning_rate": 3.1667367309071928e-06, + "loss": 0.2936, + "step": 43247 + }, + { + "epoch": 0.7473043959082112, + "grad_norm": 1.710787347434233, + "learning_rate": 3.1663281341560983e-06, + "loss": 0.4502, + "step": 43248 + }, + { + "epoch": 0.7473216754216201, + "grad_norm": 1.2108730346581649, + "learning_rate": 3.165919558808583e-06, + "loss": 0.3343, + "step": 43249 + }, + { + "epoch": 0.747338954935029, + "grad_norm": 0.9222252182726013, + "learning_rate": 3.165511004865921e-06, + "loss": 0.2739, + "step": 43250 + }, + { + "epoch": 0.7473562344484379, + "grad_norm": 1.1234659538891598, + "learning_rate": 3.165102472329403e-06, + "loss": 0.1206, + "step": 43251 + }, + { + "epoch": 0.7473735139618468, + "grad_norm": 1.2091962084435888, + "learning_rate": 3.1646939612003012e-06, + "loss": 0.371, + "step": 43252 + }, + { + "epoch": 0.7473907934752557, + "grad_norm": 1.1495406089112816, + "learning_rate": 3.1642854714799008e-06, + "loss": 0.2756, + "step": 43253 + }, + { + "epoch": 0.7474080729886646, + "grad_norm": 1.752350071391322, + "learning_rate": 3.1638770031694756e-06, + "loss": 0.2826, + "step": 43254 + }, + { + "epoch": 0.7474253525020735, + "grad_norm": 1.6102379838189826, + "learning_rate": 3.1634685562703095e-06, + "loss": 0.3105, + "step": 43255 + }, + { + "epoch": 0.7474426320154824, + "grad_norm": 1.185261278449858, + "learning_rate": 3.163060130783675e-06, + "loss": 0.3106, + "step": 43256 + }, + { + "epoch": 0.7474599115288914, + "grad_norm": 1.2124528801836731, + "learning_rate": 3.162651726710857e-06, + "loss": 0.4025, + "step": 43257 + }, + { + "epoch": 0.7474771910423003, + "grad_norm": 1.3372949692447005, + "learning_rate": 3.162243344053133e-06, + "loss": 0.3642, + "step": 43258 + }, + { + "epoch": 0.7474944705557092, + "grad_norm": 1.2060275012411623, + "learning_rate": 3.1618349828117854e-06, + "loss": 0.3627, + "step": 43259 + }, + { + "epoch": 0.7475117500691181, + "grad_norm": 0.9431281087170476, + "learning_rate": 3.1614266429880913e-06, + "loss": 0.5301, + "step": 43260 + }, + { + "epoch": 0.747529029582527, + "grad_norm": 1.1462411707410534, + "learning_rate": 3.1610183245833248e-06, + "loss": 0.2979, + "step": 43261 + }, + { + "epoch": 0.7475463090959359, + "grad_norm": 1.0158974756614307, + "learning_rate": 3.1606100275987696e-06, + "loss": 0.4109, + "step": 43262 + }, + { + "epoch": 0.7475635886093448, + "grad_norm": 0.9412106247972772, + "learning_rate": 3.160201752035702e-06, + "loss": 0.377, + "step": 43263 + }, + { + "epoch": 0.7475808681227537, + "grad_norm": 0.8235821752088844, + "learning_rate": 3.159793497895407e-06, + "loss": 0.178, + "step": 43264 + }, + { + "epoch": 0.7475981476361626, + "grad_norm": 1.2510878332151607, + "learning_rate": 3.1593852651791547e-06, + "loss": 0.421, + "step": 43265 + }, + { + "epoch": 0.7476154271495715, + "grad_norm": 1.2166734004125108, + "learning_rate": 3.1589770538882303e-06, + "loss": 0.4519, + "step": 43266 + }, + { + "epoch": 0.7476327066629803, + "grad_norm": 0.6542554601336545, + "learning_rate": 3.1585688640239065e-06, + "loss": 0.2689, + "step": 43267 + }, + { + "epoch": 0.7476499861763892, + "grad_norm": 1.3962603977230497, + "learning_rate": 3.1581606955874656e-06, + "loss": 0.298, + "step": 43268 + }, + { + "epoch": 0.7476672656897981, + "grad_norm": 1.3934008935090765, + "learning_rate": 3.1577525485801853e-06, + "loss": 0.4146, + "step": 43269 + }, + { + "epoch": 0.747684545203207, + "grad_norm": 1.5328624548649898, + "learning_rate": 3.1573444230033468e-06, + "loss": 0.3139, + "step": 43270 + }, + { + "epoch": 0.747701824716616, + "grad_norm": 1.2230367246871343, + "learning_rate": 3.1569363188582257e-06, + "loss": 0.4119, + "step": 43271 + }, + { + "epoch": 0.7477191042300249, + "grad_norm": 0.8444846022679435, + "learning_rate": 3.156528236146098e-06, + "loss": 0.5464, + "step": 43272 + }, + { + "epoch": 0.7477363837434338, + "grad_norm": 1.5943004181814984, + "learning_rate": 3.1561201748682425e-06, + "loss": 0.279, + "step": 43273 + }, + { + "epoch": 0.7477536632568427, + "grad_norm": 1.8698946220923123, + "learning_rate": 3.155712135025939e-06, + "loss": 0.3203, + "step": 43274 + }, + { + "epoch": 0.7477709427702516, + "grad_norm": 1.1396577030523043, + "learning_rate": 3.1553041166204688e-06, + "loss": 0.355, + "step": 43275 + }, + { + "epoch": 0.7477882222836605, + "grad_norm": 1.3153347401037454, + "learning_rate": 3.1548961196531036e-06, + "loss": 0.5001, + "step": 43276 + }, + { + "epoch": 0.7478055017970694, + "grad_norm": 0.9860831829560064, + "learning_rate": 3.1544881441251265e-06, + "loss": 0.3543, + "step": 43277 + }, + { + "epoch": 0.7478227813104783, + "grad_norm": 1.408754871665537, + "learning_rate": 3.1540801900378094e-06, + "loss": 0.4177, + "step": 43278 + }, + { + "epoch": 0.7478400608238872, + "grad_norm": 1.0806830979111222, + "learning_rate": 3.1536722573924338e-06, + "loss": 0.3477, + "step": 43279 + }, + { + "epoch": 0.7478573403372961, + "grad_norm": 1.6193746020936974, + "learning_rate": 3.15326434619028e-06, + "loss": 0.2941, + "step": 43280 + }, + { + "epoch": 0.747874619850705, + "grad_norm": 1.5513455641464302, + "learning_rate": 3.152856456432619e-06, + "loss": 0.3364, + "step": 43281 + }, + { + "epoch": 0.7478918993641139, + "grad_norm": 1.2726494761779001, + "learning_rate": 3.152448588120736e-06, + "loss": 0.1477, + "step": 43282 + }, + { + "epoch": 0.7479091788775228, + "grad_norm": 1.8187547938363213, + "learning_rate": 3.1520407412559004e-06, + "loss": 0.3543, + "step": 43283 + }, + { + "epoch": 0.7479264583909317, + "grad_norm": 1.4623496642319567, + "learning_rate": 3.1516329158393945e-06, + "loss": 0.1955, + "step": 43284 + }, + { + "epoch": 0.7479437379043407, + "grad_norm": 1.1337017802794105, + "learning_rate": 3.1512251118724936e-06, + "loss": 0.3145, + "step": 43285 + }, + { + "epoch": 0.7479610174177496, + "grad_norm": 1.4478045808550872, + "learning_rate": 3.1508173293564802e-06, + "loss": 0.4233, + "step": 43286 + }, + { + "epoch": 0.7479782969311585, + "grad_norm": 1.3142924261178413, + "learning_rate": 3.150409568292624e-06, + "loss": 0.429, + "step": 43287 + }, + { + "epoch": 0.7479955764445673, + "grad_norm": 1.0889744343370829, + "learning_rate": 3.1500018286822086e-06, + "loss": 0.4742, + "step": 43288 + }, + { + "epoch": 0.7480128559579762, + "grad_norm": 1.3144899831369299, + "learning_rate": 3.149594110526506e-06, + "loss": 0.5369, + "step": 43289 + }, + { + "epoch": 0.7480301354713851, + "grad_norm": 1.5712898845634364, + "learning_rate": 3.1491864138267935e-06, + "loss": 0.3173, + "step": 43290 + }, + { + "epoch": 0.748047414984794, + "grad_norm": 2.102559005545427, + "learning_rate": 3.1487787385843536e-06, + "loss": 0.2845, + "step": 43291 + }, + { + "epoch": 0.7480646944982029, + "grad_norm": 1.1076509844731177, + "learning_rate": 3.1483710848004567e-06, + "loss": 0.5748, + "step": 43292 + }, + { + "epoch": 0.7480819740116118, + "grad_norm": 1.1199209272206387, + "learning_rate": 3.147963452476386e-06, + "loss": 0.1985, + "step": 43293 + }, + { + "epoch": 0.7480992535250207, + "grad_norm": 1.2120400061536916, + "learning_rate": 3.14755584161341e-06, + "loss": 0.4979, + "step": 43294 + }, + { + "epoch": 0.7481165330384296, + "grad_norm": 1.483451322173887, + "learning_rate": 3.1471482522128137e-06, + "loss": 0.553, + "step": 43295 + }, + { + "epoch": 0.7481338125518385, + "grad_norm": 1.327238642132932, + "learning_rate": 3.1467406842758664e-06, + "loss": 0.33, + "step": 43296 + }, + { + "epoch": 0.7481510920652474, + "grad_norm": 1.403401477149568, + "learning_rate": 3.1463331378038488e-06, + "loss": 0.3098, + "step": 43297 + }, + { + "epoch": 0.7481683715786563, + "grad_norm": 1.011648226286388, + "learning_rate": 3.1459256127980355e-06, + "loss": 0.3669, + "step": 43298 + }, + { + "epoch": 0.7481856510920653, + "grad_norm": 1.351298621571319, + "learning_rate": 3.1455181092597076e-06, + "loss": 0.3103, + "step": 43299 + }, + { + "epoch": 0.7482029306054742, + "grad_norm": 1.0298762396771957, + "learning_rate": 3.1451106271901355e-06, + "loss": 0.8733, + "step": 43300 + }, + { + "epoch": 0.7482202101188831, + "grad_norm": 1.4025614354100344, + "learning_rate": 3.1447031665906e-06, + "loss": 0.4297, + "step": 43301 + }, + { + "epoch": 0.748237489632292, + "grad_norm": 1.0423733966193827, + "learning_rate": 3.1442957274623732e-06, + "loss": 0.2383, + "step": 43302 + }, + { + "epoch": 0.7482547691457009, + "grad_norm": 1.5385864929118949, + "learning_rate": 3.143888309806732e-06, + "loss": 0.3152, + "step": 43303 + }, + { + "epoch": 0.7482720486591098, + "grad_norm": 1.8119855179563753, + "learning_rate": 3.1434809136249576e-06, + "loss": 0.3559, + "step": 43304 + }, + { + "epoch": 0.7482893281725187, + "grad_norm": 1.9465864187061623, + "learning_rate": 3.1430735389183188e-06, + "loss": 0.2763, + "step": 43305 + }, + { + "epoch": 0.7483066076859276, + "grad_norm": 1.576161606911596, + "learning_rate": 3.142666185688097e-06, + "loss": 0.3008, + "step": 43306 + }, + { + "epoch": 0.7483238871993365, + "grad_norm": 1.0246045408984323, + "learning_rate": 3.1422588539355637e-06, + "loss": 0.4899, + "step": 43307 + }, + { + "epoch": 0.7483411667127454, + "grad_norm": 1.5347603364660702, + "learning_rate": 3.141851543661997e-06, + "loss": 0.4052, + "step": 43308 + }, + { + "epoch": 0.7483584462261542, + "grad_norm": 1.9771460796147187, + "learning_rate": 3.141444254868672e-06, + "loss": 0.3897, + "step": 43309 + }, + { + "epoch": 0.7483757257395631, + "grad_norm": 1.3019372508066445, + "learning_rate": 3.141036987556868e-06, + "loss": 0.355, + "step": 43310 + }, + { + "epoch": 0.748393005252972, + "grad_norm": 1.788492978010337, + "learning_rate": 3.1406297417278574e-06, + "loss": 0.3133, + "step": 43311 + }, + { + "epoch": 0.7484102847663809, + "grad_norm": 1.7596077192684856, + "learning_rate": 3.1402225173829125e-06, + "loss": 0.305, + "step": 43312 + }, + { + "epoch": 0.7484275642797898, + "grad_norm": 1.45188654952886, + "learning_rate": 3.1398153145233125e-06, + "loss": 0.3443, + "step": 43313 + }, + { + "epoch": 0.7484448437931988, + "grad_norm": 0.8193445171272212, + "learning_rate": 3.139408133150331e-06, + "loss": 0.2955, + "step": 43314 + }, + { + "epoch": 0.7484621233066077, + "grad_norm": 1.7833603966988227, + "learning_rate": 3.139000973265249e-06, + "loss": 0.2282, + "step": 43315 + }, + { + "epoch": 0.7484794028200166, + "grad_norm": 1.1759243491730687, + "learning_rate": 3.1385938348693347e-06, + "loss": 0.4729, + "step": 43316 + }, + { + "epoch": 0.7484966823334255, + "grad_norm": 1.635473063390039, + "learning_rate": 3.1381867179638683e-06, + "loss": 0.5283, + "step": 43317 + }, + { + "epoch": 0.7485139618468344, + "grad_norm": 0.7789413782534422, + "learning_rate": 3.137779622550119e-06, + "loss": 0.3849, + "step": 43318 + }, + { + "epoch": 0.7485312413602433, + "grad_norm": 0.923795094000226, + "learning_rate": 3.137372548629366e-06, + "loss": 0.2453, + "step": 43319 + }, + { + "epoch": 0.7485485208736522, + "grad_norm": 1.6434963474616855, + "learning_rate": 3.136965496202884e-06, + "loss": 0.4271, + "step": 43320 + }, + { + "epoch": 0.7485658003870611, + "grad_norm": 1.399962079463136, + "learning_rate": 3.1365584652719494e-06, + "loss": 0.3558, + "step": 43321 + }, + { + "epoch": 0.74858307990047, + "grad_norm": 1.2969102822315668, + "learning_rate": 3.136151455837837e-06, + "loss": 0.3719, + "step": 43322 + }, + { + "epoch": 0.7486003594138789, + "grad_norm": 1.679915576759806, + "learning_rate": 3.1357444679018156e-06, + "loss": 0.4447, + "step": 43323 + }, + { + "epoch": 0.7486176389272878, + "grad_norm": 1.3665259341212075, + "learning_rate": 3.1353375014651645e-06, + "loss": 0.1872, + "step": 43324 + }, + { + "epoch": 0.7486349184406967, + "grad_norm": 1.037784120725928, + "learning_rate": 3.134930556529159e-06, + "loss": 0.715, + "step": 43325 + }, + { + "epoch": 0.7486521979541056, + "grad_norm": 1.588875231630524, + "learning_rate": 3.1345236330950747e-06, + "loss": 0.224, + "step": 43326 + }, + { + "epoch": 0.7486694774675146, + "grad_norm": 1.4911142055164444, + "learning_rate": 3.134116731164181e-06, + "loss": 0.3511, + "step": 43327 + }, + { + "epoch": 0.7486867569809235, + "grad_norm": 1.0254385052217734, + "learning_rate": 3.1337098507377595e-06, + "loss": 0.475, + "step": 43328 + }, + { + "epoch": 0.7487040364943324, + "grad_norm": 0.507717835306003, + "learning_rate": 3.1333029918170764e-06, + "loss": 0.7772, + "step": 43329 + }, + { + "epoch": 0.7487213160077412, + "grad_norm": 1.2114682380851582, + "learning_rate": 3.132896154403411e-06, + "loss": 0.3072, + "step": 43330 + }, + { + "epoch": 0.7487385955211501, + "grad_norm": 1.213038334065077, + "learning_rate": 3.132489338498036e-06, + "loss": 0.2054, + "step": 43331 + }, + { + "epoch": 0.748755875034559, + "grad_norm": 1.152205206076697, + "learning_rate": 3.1320825441022295e-06, + "loss": 0.3319, + "step": 43332 + }, + { + "epoch": 0.7487731545479679, + "grad_norm": 1.8086115076100322, + "learning_rate": 3.131675771217263e-06, + "loss": 0.495, + "step": 43333 + }, + { + "epoch": 0.7487904340613768, + "grad_norm": 1.3607575730722392, + "learning_rate": 3.1312690198444062e-06, + "loss": 0.4146, + "step": 43334 + }, + { + "epoch": 0.7488077135747857, + "grad_norm": 1.2017196775899193, + "learning_rate": 3.1308622899849408e-06, + "loss": 0.2672, + "step": 43335 + }, + { + "epoch": 0.7488249930881946, + "grad_norm": 0.8192999603229024, + "learning_rate": 3.130455581640133e-06, + "loss": 0.7022, + "step": 43336 + }, + { + "epoch": 0.7488422726016035, + "grad_norm": 0.9798014522026814, + "learning_rate": 3.1300488948112604e-06, + "loss": 0.3597, + "step": 43337 + }, + { + "epoch": 0.7488595521150124, + "grad_norm": 0.9585491912030293, + "learning_rate": 3.1296422294995964e-06, + "loss": 0.3315, + "step": 43338 + }, + { + "epoch": 0.7488768316284213, + "grad_norm": 1.8997297324989297, + "learning_rate": 3.1292355857064194e-06, + "loss": 0.2919, + "step": 43339 + }, + { + "epoch": 0.7488941111418302, + "grad_norm": 1.1435849600620784, + "learning_rate": 3.128828963432995e-06, + "loss": 0.4212, + "step": 43340 + }, + { + "epoch": 0.7489113906552392, + "grad_norm": 1.3040648375839037, + "learning_rate": 3.1284223626806033e-06, + "loss": 0.4678, + "step": 43341 + }, + { + "epoch": 0.7489286701686481, + "grad_norm": 1.0828552481671108, + "learning_rate": 3.128015783450512e-06, + "loss": 0.3636, + "step": 43342 + }, + { + "epoch": 0.748945949682057, + "grad_norm": 2.2242107546994356, + "learning_rate": 3.127609225743997e-06, + "loss": 0.4793, + "step": 43343 + }, + { + "epoch": 0.7489632291954659, + "grad_norm": 0.9566599769214511, + "learning_rate": 3.1272026895623365e-06, + "loss": 0.6346, + "step": 43344 + }, + { + "epoch": 0.7489805087088748, + "grad_norm": 1.7814909063101652, + "learning_rate": 3.1267961749067955e-06, + "loss": 0.3901, + "step": 43345 + }, + { + "epoch": 0.7489977882222837, + "grad_norm": 2.0170536988156877, + "learning_rate": 3.1263896817786545e-06, + "loss": 0.3441, + "step": 43346 + }, + { + "epoch": 0.7490150677356926, + "grad_norm": 1.4777731202747975, + "learning_rate": 3.1259832101791797e-06, + "loss": 0.3992, + "step": 43347 + }, + { + "epoch": 0.7490323472491015, + "grad_norm": 2.1386637342017556, + "learning_rate": 3.1255767601096486e-06, + "loss": 0.1901, + "step": 43348 + }, + { + "epoch": 0.7490496267625104, + "grad_norm": 1.7857282265186505, + "learning_rate": 3.1251703315713333e-06, + "loss": 0.3872, + "step": 43349 + }, + { + "epoch": 0.7490669062759193, + "grad_norm": 1.0821211901647092, + "learning_rate": 3.1247639245655102e-06, + "loss": 0.5538, + "step": 43350 + }, + { + "epoch": 0.7490841857893282, + "grad_norm": 0.6600083442102777, + "learning_rate": 3.1243575390934467e-06, + "loss": 0.6954, + "step": 43351 + }, + { + "epoch": 0.749101465302737, + "grad_norm": 0.9977394497101076, + "learning_rate": 3.12395117515642e-06, + "loss": 0.4067, + "step": 43352 + }, + { + "epoch": 0.7491187448161459, + "grad_norm": 1.3211462226111221, + "learning_rate": 3.1235448327556985e-06, + "loss": 0.5875, + "step": 43353 + }, + { + "epoch": 0.7491360243295548, + "grad_norm": 1.5082508485658574, + "learning_rate": 3.123138511892556e-06, + "loss": 0.4074, + "step": 43354 + }, + { + "epoch": 0.7491533038429637, + "grad_norm": 1.314222382450441, + "learning_rate": 3.1227322125682703e-06, + "loss": 0.4176, + "step": 43355 + }, + { + "epoch": 0.7491705833563727, + "grad_norm": 1.5379665564211906, + "learning_rate": 3.122325934784106e-06, + "loss": 0.24, + "step": 43356 + }, + { + "epoch": 0.7491878628697816, + "grad_norm": 0.8160308699428633, + "learning_rate": 3.121919678541343e-06, + "loss": 0.3495, + "step": 43357 + }, + { + "epoch": 0.7492051423831905, + "grad_norm": 1.7188571827538652, + "learning_rate": 3.1215134438412476e-06, + "loss": 0.3872, + "step": 43358 + }, + { + "epoch": 0.7492224218965994, + "grad_norm": 1.6387598083467345, + "learning_rate": 3.121107230685094e-06, + "loss": 0.4753, + "step": 43359 + }, + { + "epoch": 0.7492397014100083, + "grad_norm": 1.1065261595665146, + "learning_rate": 3.1207010390741566e-06, + "loss": 0.3643, + "step": 43360 + }, + { + "epoch": 0.7492569809234172, + "grad_norm": 1.2862267705420696, + "learning_rate": 3.120294869009709e-06, + "loss": 0.3511, + "step": 43361 + }, + { + "epoch": 0.7492742604368261, + "grad_norm": 0.7511439266941817, + "learning_rate": 3.1198887204930208e-06, + "loss": 0.2154, + "step": 43362 + }, + { + "epoch": 0.749291539950235, + "grad_norm": 1.235164049610428, + "learning_rate": 3.11948259352536e-06, + "loss": 0.5067, + "step": 43363 + }, + { + "epoch": 0.7493088194636439, + "grad_norm": 1.5083935170905876, + "learning_rate": 3.1190764881080037e-06, + "loss": 0.2918, + "step": 43364 + }, + { + "epoch": 0.7493260989770528, + "grad_norm": 0.9451134296841393, + "learning_rate": 3.1186704042422223e-06, + "loss": 0.2971, + "step": 43365 + }, + { + "epoch": 0.7493433784904617, + "grad_norm": 0.8021296258101918, + "learning_rate": 3.1182643419292913e-06, + "loss": 0.2812, + "step": 43366 + }, + { + "epoch": 0.7493606580038706, + "grad_norm": 1.000403926721206, + "learning_rate": 3.1178583011704756e-06, + "loss": 0.3158, + "step": 43367 + }, + { + "epoch": 0.7493779375172795, + "grad_norm": 1.1789806729875283, + "learning_rate": 3.1174522819670548e-06, + "loss": 0.3653, + "step": 43368 + }, + { + "epoch": 0.7493952170306885, + "grad_norm": 0.964492250269397, + "learning_rate": 3.117046284320293e-06, + "loss": 0.5405, + "step": 43369 + }, + { + "epoch": 0.7494124965440974, + "grad_norm": 0.6705429715450891, + "learning_rate": 3.1166403082314658e-06, + "loss": 0.2332, + "step": 43370 + }, + { + "epoch": 0.7494297760575063, + "grad_norm": 1.1914456848901624, + "learning_rate": 3.1162343537018438e-06, + "loss": 0.4727, + "step": 43371 + }, + { + "epoch": 0.7494470555709152, + "grad_norm": 1.4216886735415917, + "learning_rate": 3.1158284207327018e-06, + "loss": 0.4001, + "step": 43372 + }, + { + "epoch": 0.749464335084324, + "grad_norm": 1.2033365793704585, + "learning_rate": 3.1154225093253086e-06, + "loss": 0.593, + "step": 43373 + }, + { + "epoch": 0.7494816145977329, + "grad_norm": 1.078052093826267, + "learning_rate": 3.1150166194809327e-06, + "loss": 0.2595, + "step": 43374 + }, + { + "epoch": 0.7494988941111418, + "grad_norm": 1.1975970454260296, + "learning_rate": 3.1146107512008505e-06, + "loss": 0.5642, + "step": 43375 + }, + { + "epoch": 0.7495161736245507, + "grad_norm": 1.517699661190477, + "learning_rate": 3.1142049044863275e-06, + "loss": 0.3623, + "step": 43376 + }, + { + "epoch": 0.7495334531379596, + "grad_norm": 0.9325136956276079, + "learning_rate": 3.1137990793386386e-06, + "loss": 0.2286, + "step": 43377 + }, + { + "epoch": 0.7495507326513685, + "grad_norm": 1.294580639714603, + "learning_rate": 3.1133932757590533e-06, + "loss": 0.2938, + "step": 43378 + }, + { + "epoch": 0.7495680121647774, + "grad_norm": 1.0810080526464905, + "learning_rate": 3.1129874937488482e-06, + "loss": 0.2349, + "step": 43379 + }, + { + "epoch": 0.7495852916781863, + "grad_norm": 1.1675835381635762, + "learning_rate": 3.112581733309286e-06, + "loss": 0.3, + "step": 43380 + }, + { + "epoch": 0.7496025711915952, + "grad_norm": 1.0463396419027289, + "learning_rate": 3.1121759944416442e-06, + "loss": 0.3222, + "step": 43381 + }, + { + "epoch": 0.7496198507050041, + "grad_norm": 1.3348034821580277, + "learning_rate": 3.111770277147187e-06, + "loss": 0.2575, + "step": 43382 + }, + { + "epoch": 0.749637130218413, + "grad_norm": 1.2750718904510254, + "learning_rate": 3.1113645814271898e-06, + "loss": 0.3383, + "step": 43383 + }, + { + "epoch": 0.749654409731822, + "grad_norm": 0.9817496758266205, + "learning_rate": 3.1109589072829247e-06, + "loss": 0.6062, + "step": 43384 + }, + { + "epoch": 0.7496716892452309, + "grad_norm": 1.8332177011061965, + "learning_rate": 3.110553254715657e-06, + "loss": 0.4182, + "step": 43385 + }, + { + "epoch": 0.7496889687586398, + "grad_norm": 1.257882341332851, + "learning_rate": 3.1101476237266625e-06, + "loss": 0.3312, + "step": 43386 + }, + { + "epoch": 0.7497062482720487, + "grad_norm": 1.2755046751668289, + "learning_rate": 3.1097420143172064e-06, + "loss": 0.4127, + "step": 43387 + }, + { + "epoch": 0.7497235277854576, + "grad_norm": 1.241864339879357, + "learning_rate": 3.109336426488563e-06, + "loss": 0.4352, + "step": 43388 + }, + { + "epoch": 0.7497408072988665, + "grad_norm": 0.996101296082802, + "learning_rate": 3.108930860242001e-06, + "loss": 0.4622, + "step": 43389 + }, + { + "epoch": 0.7497580868122754, + "grad_norm": 1.4920222858878134, + "learning_rate": 3.108525315578794e-06, + "loss": 0.335, + "step": 43390 + }, + { + "epoch": 0.7497753663256843, + "grad_norm": 1.181864810105586, + "learning_rate": 3.108119792500206e-06, + "loss": 0.3915, + "step": 43391 + }, + { + "epoch": 0.7497926458390932, + "grad_norm": 1.085761568143487, + "learning_rate": 3.1077142910075142e-06, + "loss": 0.3429, + "step": 43392 + }, + { + "epoch": 0.7498099253525021, + "grad_norm": 1.0320845057053243, + "learning_rate": 3.107308811101982e-06, + "loss": 0.2251, + "step": 43393 + }, + { + "epoch": 0.7498272048659109, + "grad_norm": 1.6564088581558813, + "learning_rate": 3.1069033527848825e-06, + "loss": 0.2397, + "step": 43394 + }, + { + "epoch": 0.7498444843793198, + "grad_norm": 2.4207298018495518, + "learning_rate": 3.106497916057488e-06, + "loss": 0.5113, + "step": 43395 + }, + { + "epoch": 0.7498617638927287, + "grad_norm": 1.4041943922596747, + "learning_rate": 3.106092500921064e-06, + "loss": 0.3982, + "step": 43396 + }, + { + "epoch": 0.7498790434061376, + "grad_norm": 0.8045558409124167, + "learning_rate": 3.1056871073768845e-06, + "loss": 0.4253, + "step": 43397 + }, + { + "epoch": 0.7498963229195466, + "grad_norm": 1.203328615686538, + "learning_rate": 3.1052817354262143e-06, + "loss": 0.4506, + "step": 43398 + }, + { + "epoch": 0.7499136024329555, + "grad_norm": 1.4309400207333076, + "learning_rate": 3.1048763850703257e-06, + "loss": 0.3905, + "step": 43399 + }, + { + "epoch": 0.7499308819463644, + "grad_norm": 1.3048358611529007, + "learning_rate": 3.1044710563104873e-06, + "loss": 0.3596, + "step": 43400 + }, + { + "epoch": 0.7499481614597733, + "grad_norm": 1.2768280779647305, + "learning_rate": 3.104065749147973e-06, + "loss": 0.6745, + "step": 43401 + }, + { + "epoch": 0.7499654409731822, + "grad_norm": 0.9498009278232262, + "learning_rate": 3.103660463584046e-06, + "loss": 0.477, + "step": 43402 + }, + { + "epoch": 0.7499827204865911, + "grad_norm": 2.310230415990304, + "learning_rate": 3.103255199619982e-06, + "loss": 0.2373, + "step": 43403 + }, + { + "epoch": 0.75, + "grad_norm": 1.0608074366438713, + "learning_rate": 3.1028499572570436e-06, + "loss": 0.3153, + "step": 43404 + }, + { + "epoch": 0.7500172795134089, + "grad_norm": 0.9548035107676123, + "learning_rate": 3.1024447364965026e-06, + "loss": 0.3364, + "step": 43405 + }, + { + "epoch": 0.7500345590268178, + "grad_norm": 1.3436365518761149, + "learning_rate": 3.102039537339633e-06, + "loss": 0.498, + "step": 43406 + }, + { + "epoch": 0.7500518385402267, + "grad_norm": 1.0644603643774175, + "learning_rate": 3.1016343597876965e-06, + "loss": 0.3809, + "step": 43407 + }, + { + "epoch": 0.7500691180536356, + "grad_norm": 1.1260222643680928, + "learning_rate": 3.101229203841968e-06, + "loss": 0.3686, + "step": 43408 + }, + { + "epoch": 0.7500863975670445, + "grad_norm": 1.1930945360363445, + "learning_rate": 3.100824069503712e-06, + "loss": 0.2162, + "step": 43409 + }, + { + "epoch": 0.7501036770804534, + "grad_norm": 2.6444033381981566, + "learning_rate": 3.1004189567742005e-06, + "loss": 0.5911, + "step": 43410 + }, + { + "epoch": 0.7501209565938624, + "grad_norm": 1.2601298055097392, + "learning_rate": 3.1000138656546965e-06, + "loss": 0.337, + "step": 43411 + }, + { + "epoch": 0.7501382361072713, + "grad_norm": 1.711893391217831, + "learning_rate": 3.099608796146479e-06, + "loss": 0.2356, + "step": 43412 + }, + { + "epoch": 0.7501555156206802, + "grad_norm": 1.5846463332634602, + "learning_rate": 3.0992037482508077e-06, + "loss": 0.2944, + "step": 43413 + }, + { + "epoch": 0.7501727951340891, + "grad_norm": 1.013400820705929, + "learning_rate": 3.0987987219689585e-06, + "loss": 0.5143, + "step": 43414 + }, + { + "epoch": 0.7501900746474979, + "grad_norm": 1.4383257022469702, + "learning_rate": 3.098393717302196e-06, + "loss": 0.4167, + "step": 43415 + }, + { + "epoch": 0.7502073541609068, + "grad_norm": 1.1868862945525682, + "learning_rate": 3.0979887342517854e-06, + "loss": 0.2975, + "step": 43416 + }, + { + "epoch": 0.7502246336743157, + "grad_norm": 1.7771897135470909, + "learning_rate": 3.0975837728189993e-06, + "loss": 0.4355, + "step": 43417 + }, + { + "epoch": 0.7502419131877246, + "grad_norm": 1.2084238876893654, + "learning_rate": 3.0971788330051054e-06, + "loss": 0.2334, + "step": 43418 + }, + { + "epoch": 0.7502591927011335, + "grad_norm": 1.8829541499945286, + "learning_rate": 3.096773914811375e-06, + "loss": 0.3106, + "step": 43419 + }, + { + "epoch": 0.7502764722145424, + "grad_norm": 0.9340058346220824, + "learning_rate": 3.0963690182390693e-06, + "loss": 0.3848, + "step": 43420 + }, + { + "epoch": 0.7502937517279513, + "grad_norm": 1.7560617595834063, + "learning_rate": 3.0959641432894648e-06, + "loss": 0.3348, + "step": 43421 + }, + { + "epoch": 0.7503110312413602, + "grad_norm": 1.430565317250641, + "learning_rate": 3.095559289963822e-06, + "loss": 0.3021, + "step": 43422 + }, + { + "epoch": 0.7503283107547691, + "grad_norm": 1.5072182092324018, + "learning_rate": 3.0951544582634118e-06, + "loss": 0.4202, + "step": 43423 + }, + { + "epoch": 0.750345590268178, + "grad_norm": 1.295887172757233, + "learning_rate": 3.0947496481895057e-06, + "loss": 0.2493, + "step": 43424 + }, + { + "epoch": 0.750362869781587, + "grad_norm": 2.967499015314368, + "learning_rate": 3.094344859743366e-06, + "loss": 0.4392, + "step": 43425 + }, + { + "epoch": 0.7503801492949959, + "grad_norm": 1.0944786862995974, + "learning_rate": 3.093940092926265e-06, + "loss": 0.3928, + "step": 43426 + }, + { + "epoch": 0.7503974288084048, + "grad_norm": 1.5142500953499318, + "learning_rate": 3.093535347739466e-06, + "loss": 0.5631, + "step": 43427 + }, + { + "epoch": 0.7504147083218137, + "grad_norm": 1.5581428151811145, + "learning_rate": 3.0931306241842383e-06, + "loss": 0.379, + "step": 43428 + }, + { + "epoch": 0.7504319878352226, + "grad_norm": 0.7421755912843965, + "learning_rate": 3.092725922261851e-06, + "loss": 0.625, + "step": 43429 + }, + { + "epoch": 0.7504492673486315, + "grad_norm": 1.0739621296435788, + "learning_rate": 3.092321241973575e-06, + "loss": 0.2754, + "step": 43430 + }, + { + "epoch": 0.7504665468620404, + "grad_norm": 1.2896417380537535, + "learning_rate": 3.09191658332067e-06, + "loss": 0.1868, + "step": 43431 + }, + { + "epoch": 0.7504838263754493, + "grad_norm": 1.053401622477527, + "learning_rate": 3.091511946304411e-06, + "loss": 0.2643, + "step": 43432 + }, + { + "epoch": 0.7505011058888582, + "grad_norm": 1.6742597523792229, + "learning_rate": 3.0911073309260574e-06, + "loss": 0.3249, + "step": 43433 + }, + { + "epoch": 0.7505183854022671, + "grad_norm": 1.7615771447101336, + "learning_rate": 3.0907027371868813e-06, + "loss": 0.4372, + "step": 43434 + }, + { + "epoch": 0.750535664915676, + "grad_norm": 1.170728335424856, + "learning_rate": 3.0902981650881523e-06, + "loss": 0.3092, + "step": 43435 + }, + { + "epoch": 0.7505529444290848, + "grad_norm": 1.0577662170995874, + "learning_rate": 3.089893614631132e-06, + "loss": 0.298, + "step": 43436 + }, + { + "epoch": 0.7505702239424937, + "grad_norm": 1.0994034903880943, + "learning_rate": 3.0894890858170924e-06, + "loss": 0.2153, + "step": 43437 + }, + { + "epoch": 0.7505875034559026, + "grad_norm": 1.0793540506372095, + "learning_rate": 3.0890845786472958e-06, + "loss": 0.2749, + "step": 43438 + }, + { + "epoch": 0.7506047829693115, + "grad_norm": 1.8121135729469668, + "learning_rate": 3.08868009312301e-06, + "loss": 0.3262, + "step": 43439 + }, + { + "epoch": 0.7506220624827205, + "grad_norm": 1.1988332315890213, + "learning_rate": 3.088275629245505e-06, + "loss": 0.3265, + "step": 43440 + }, + { + "epoch": 0.7506393419961294, + "grad_norm": 1.2559498314017523, + "learning_rate": 3.087871187016048e-06, + "loss": 0.3649, + "step": 43441 + }, + { + "epoch": 0.7506566215095383, + "grad_norm": 0.7096454133627333, + "learning_rate": 3.0874667664359014e-06, + "loss": 0.2747, + "step": 43442 + }, + { + "epoch": 0.7506739010229472, + "grad_norm": 1.5114077049567156, + "learning_rate": 3.0870623675063373e-06, + "loss": 0.2371, + "step": 43443 + }, + { + "epoch": 0.7506911805363561, + "grad_norm": 1.0740952899170322, + "learning_rate": 3.0866579902286154e-06, + "loss": 0.4381, + "step": 43444 + }, + { + "epoch": 0.750708460049765, + "grad_norm": 1.0964846902121015, + "learning_rate": 3.0862536346040075e-06, + "loss": 0.3168, + "step": 43445 + }, + { + "epoch": 0.7507257395631739, + "grad_norm": 1.5429618491549821, + "learning_rate": 3.085849300633781e-06, + "loss": 0.4476, + "step": 43446 + }, + { + "epoch": 0.7507430190765828, + "grad_norm": 1.2796636194909898, + "learning_rate": 3.085444988319196e-06, + "loss": 0.4756, + "step": 43447 + }, + { + "epoch": 0.7507602985899917, + "grad_norm": 1.507828621612119, + "learning_rate": 3.0850406976615266e-06, + "loss": 0.3211, + "step": 43448 + }, + { + "epoch": 0.7507775781034006, + "grad_norm": 1.2598614497246, + "learning_rate": 3.084636428662032e-06, + "loss": 0.3236, + "step": 43449 + }, + { + "epoch": 0.7507948576168095, + "grad_norm": 1.458648036572559, + "learning_rate": 3.0842321813219854e-06, + "loss": 0.2903, + "step": 43450 + }, + { + "epoch": 0.7508121371302184, + "grad_norm": 1.119495876635932, + "learning_rate": 3.083827955642642e-06, + "loss": 0.3464, + "step": 43451 + }, + { + "epoch": 0.7508294166436273, + "grad_norm": 1.3873902389327997, + "learning_rate": 3.0834237516252817e-06, + "loss": 0.3505, + "step": 43452 + }, + { + "epoch": 0.7508466961570363, + "grad_norm": 1.124873220304701, + "learning_rate": 3.0830195692711608e-06, + "loss": 0.3396, + "step": 43453 + }, + { + "epoch": 0.7508639756704452, + "grad_norm": 1.0788796405753942, + "learning_rate": 3.082615408581552e-06, + "loss": 0.3152, + "step": 43454 + }, + { + "epoch": 0.7508812551838541, + "grad_norm": 0.97281158389615, + "learning_rate": 3.0822112695577166e-06, + "loss": 0.3015, + "step": 43455 + }, + { + "epoch": 0.750898534697263, + "grad_norm": 0.7115443360310945, + "learning_rate": 3.0818071522009175e-06, + "loss": 0.9396, + "step": 43456 + }, + { + "epoch": 0.7509158142106718, + "grad_norm": 1.0132763491920391, + "learning_rate": 3.081403056512425e-06, + "loss": 0.534, + "step": 43457 + }, + { + "epoch": 0.7509330937240807, + "grad_norm": 0.99656537259932, + "learning_rate": 3.080998982493504e-06, + "loss": 0.5005, + "step": 43458 + }, + { + "epoch": 0.7509503732374896, + "grad_norm": 1.0533514183624189, + "learning_rate": 3.080594930145423e-06, + "loss": 0.4426, + "step": 43459 + }, + { + "epoch": 0.7509676527508985, + "grad_norm": 0.90531888076807, + "learning_rate": 3.0801908994694416e-06, + "loss": 0.4169, + "step": 43460 + }, + { + "epoch": 0.7509849322643074, + "grad_norm": 1.3965436874381505, + "learning_rate": 3.0797868904668314e-06, + "loss": 0.3764, + "step": 43461 + }, + { + "epoch": 0.7510022117777163, + "grad_norm": 1.1642874211682837, + "learning_rate": 3.079382903138851e-06, + "loss": 0.3191, + "step": 43462 + }, + { + "epoch": 0.7510194912911252, + "grad_norm": 1.3533055712181457, + "learning_rate": 3.0789789374867697e-06, + "loss": 0.4398, + "step": 43463 + }, + { + "epoch": 0.7510367708045341, + "grad_norm": 0.8068366309901285, + "learning_rate": 3.0785749935118526e-06, + "loss": 0.3911, + "step": 43464 + }, + { + "epoch": 0.751054050317943, + "grad_norm": 1.3238288826811946, + "learning_rate": 3.078171071215367e-06, + "loss": 0.3942, + "step": 43465 + }, + { + "epoch": 0.7510713298313519, + "grad_norm": 1.1680565060772679, + "learning_rate": 3.077767170598576e-06, + "loss": 0.2452, + "step": 43466 + }, + { + "epoch": 0.7510886093447608, + "grad_norm": 1.1925819846166912, + "learning_rate": 3.0773632916627405e-06, + "loss": 0.299, + "step": 43467 + }, + { + "epoch": 0.7511058888581698, + "grad_norm": 1.482161251923084, + "learning_rate": 3.0769594344091304e-06, + "loss": 0.3891, + "step": 43468 + }, + { + "epoch": 0.7511231683715787, + "grad_norm": 1.2051202388649271, + "learning_rate": 3.076555598839008e-06, + "loss": 0.3132, + "step": 43469 + }, + { + "epoch": 0.7511404478849876, + "grad_norm": 1.179554345207574, + "learning_rate": 3.076151784953645e-06, + "loss": 0.4195, + "step": 43470 + }, + { + "epoch": 0.7511577273983965, + "grad_norm": 1.2272526652591589, + "learning_rate": 3.0757479927542956e-06, + "loss": 0.3385, + "step": 43471 + }, + { + "epoch": 0.7511750069118054, + "grad_norm": 1.667424275469529, + "learning_rate": 3.0753442222422338e-06, + "loss": 0.4512, + "step": 43472 + }, + { + "epoch": 0.7511922864252143, + "grad_norm": 1.39307945504603, + "learning_rate": 3.0749404734187173e-06, + "loss": 0.3423, + "step": 43473 + }, + { + "epoch": 0.7512095659386232, + "grad_norm": 1.430632430745355, + "learning_rate": 3.0745367462850127e-06, + "loss": 0.2659, + "step": 43474 + }, + { + "epoch": 0.7512268454520321, + "grad_norm": 1.3223417921127796, + "learning_rate": 3.074133040842385e-06, + "loss": 0.453, + "step": 43475 + }, + { + "epoch": 0.751244124965441, + "grad_norm": 1.014431897651589, + "learning_rate": 3.0737293570921034e-06, + "loss": 0.3747, + "step": 43476 + }, + { + "epoch": 0.7512614044788499, + "grad_norm": 1.9864583993844407, + "learning_rate": 3.073325695035427e-06, + "loss": 0.1895, + "step": 43477 + }, + { + "epoch": 0.7512786839922588, + "grad_norm": 1.2510320655986054, + "learning_rate": 3.072922054673618e-06, + "loss": 0.3164, + "step": 43478 + }, + { + "epoch": 0.7512959635056676, + "grad_norm": 1.2402677652653797, + "learning_rate": 3.072518436007944e-06, + "loss": 0.2943, + "step": 43479 + }, + { + "epoch": 0.7513132430190765, + "grad_norm": 1.0645769102854141, + "learning_rate": 3.072114839039668e-06, + "loss": 0.1618, + "step": 43480 + }, + { + "epoch": 0.7513305225324854, + "grad_norm": 1.1305135083218756, + "learning_rate": 3.0717112637700585e-06, + "loss": 0.5008, + "step": 43481 + }, + { + "epoch": 0.7513478020458944, + "grad_norm": 1.164642699643701, + "learning_rate": 3.0713077102003728e-06, + "loss": 0.3608, + "step": 43482 + }, + { + "epoch": 0.7513650815593033, + "grad_norm": 0.7028541933456757, + "learning_rate": 3.0709041783318804e-06, + "loss": 0.6446, + "step": 43483 + }, + { + "epoch": 0.7513823610727122, + "grad_norm": 1.4859694114481417, + "learning_rate": 3.07050066816584e-06, + "loss": 0.2597, + "step": 43484 + }, + { + "epoch": 0.7513996405861211, + "grad_norm": 0.9458344873687363, + "learning_rate": 3.0700971797035185e-06, + "loss": 0.4979, + "step": 43485 + }, + { + "epoch": 0.75141692009953, + "grad_norm": 1.1270569400742876, + "learning_rate": 3.069693712946181e-06, + "loss": 0.242, + "step": 43486 + }, + { + "epoch": 0.7514341996129389, + "grad_norm": 0.7872597839221896, + "learning_rate": 3.0692902678950876e-06, + "loss": 0.2937, + "step": 43487 + }, + { + "epoch": 0.7514514791263478, + "grad_norm": 2.1443664754940843, + "learning_rate": 3.0688868445515075e-06, + "loss": 0.408, + "step": 43488 + }, + { + "epoch": 0.7514687586397567, + "grad_norm": 1.3046711305721963, + "learning_rate": 3.0684834429166965e-06, + "loss": 0.267, + "step": 43489 + }, + { + "epoch": 0.7514860381531656, + "grad_norm": 1.1586357393009663, + "learning_rate": 3.068080062991925e-06, + "loss": 0.3099, + "step": 43490 + }, + { + "epoch": 0.7515033176665745, + "grad_norm": 1.0869978474394606, + "learning_rate": 3.067676704778447e-06, + "loss": 0.327, + "step": 43491 + }, + { + "epoch": 0.7515205971799834, + "grad_norm": 1.08529216717451, + "learning_rate": 3.0672733682775403e-06, + "loss": 0.3627, + "step": 43492 + }, + { + "epoch": 0.7515378766933923, + "grad_norm": 0.7797965957940071, + "learning_rate": 3.066870053490456e-06, + "loss": 0.2975, + "step": 43493 + }, + { + "epoch": 0.7515551562068012, + "grad_norm": 1.6764943419142764, + "learning_rate": 3.066466760418465e-06, + "loss": 0.2362, + "step": 43494 + }, + { + "epoch": 0.7515724357202102, + "grad_norm": 1.449106588287548, + "learning_rate": 3.0660634890628238e-06, + "loss": 0.2064, + "step": 43495 + }, + { + "epoch": 0.7515897152336191, + "grad_norm": 1.1673461189816174, + "learning_rate": 3.0656602394248014e-06, + "loss": 0.2989, + "step": 43496 + }, + { + "epoch": 0.751606994747028, + "grad_norm": 1.7786935993891642, + "learning_rate": 3.0652570115056555e-06, + "loss": 0.3845, + "step": 43497 + }, + { + "epoch": 0.7516242742604369, + "grad_norm": 0.7840457521578595, + "learning_rate": 3.064853805306651e-06, + "loss": 0.3453, + "step": 43498 + }, + { + "epoch": 0.7516415537738458, + "grad_norm": 1.4960599727571189, + "learning_rate": 3.064450620829056e-06, + "loss": 0.3685, + "step": 43499 + }, + { + "epoch": 0.7516588332872546, + "grad_norm": 1.1441873825031186, + "learning_rate": 3.064047458074124e-06, + "loss": 0.2977, + "step": 43500 + }, + { + "epoch": 0.7516761128006635, + "grad_norm": 1.4140166674188117, + "learning_rate": 3.063644317043126e-06, + "loss": 0.4716, + "step": 43501 + }, + { + "epoch": 0.7516933923140724, + "grad_norm": 1.3213031376038076, + "learning_rate": 3.063241197737319e-06, + "loss": 0.2757, + "step": 43502 + }, + { + "epoch": 0.7517106718274813, + "grad_norm": 2.305412806941433, + "learning_rate": 3.0628381001579664e-06, + "loss": 0.2121, + "step": 43503 + }, + { + "epoch": 0.7517279513408902, + "grad_norm": 1.5225192097871745, + "learning_rate": 3.062435024306333e-06, + "loss": 0.3313, + "step": 43504 + }, + { + "epoch": 0.7517452308542991, + "grad_norm": 1.3252597161285313, + "learning_rate": 3.062031970183683e-06, + "loss": 0.3897, + "step": 43505 + }, + { + "epoch": 0.751762510367708, + "grad_norm": 1.2694065441724056, + "learning_rate": 3.0616289377912735e-06, + "loss": 0.3365, + "step": 43506 + }, + { + "epoch": 0.7517797898811169, + "grad_norm": 0.8495003094966679, + "learning_rate": 3.0612259271303725e-06, + "loss": 0.3488, + "step": 43507 + }, + { + "epoch": 0.7517970693945258, + "grad_norm": 1.8545442651519828, + "learning_rate": 3.060822938202236e-06, + "loss": 0.2932, + "step": 43508 + }, + { + "epoch": 0.7518143489079347, + "grad_norm": 1.1633608458259896, + "learning_rate": 3.0604199710081295e-06, + "loss": 0.4087, + "step": 43509 + }, + { + "epoch": 0.7518316284213437, + "grad_norm": 0.7668990438534136, + "learning_rate": 3.060017025549318e-06, + "loss": 0.3058, + "step": 43510 + }, + { + "epoch": 0.7518489079347526, + "grad_norm": 1.6568627358899954, + "learning_rate": 3.0596141018270575e-06, + "loss": 0.3072, + "step": 43511 + }, + { + "epoch": 0.7518661874481615, + "grad_norm": 1.6993609092705408, + "learning_rate": 3.0592111998426156e-06, + "loss": 0.3885, + "step": 43512 + }, + { + "epoch": 0.7518834669615704, + "grad_norm": 0.8542949797006032, + "learning_rate": 3.0588083195972498e-06, + "loss": 0.3814, + "step": 43513 + }, + { + "epoch": 0.7519007464749793, + "grad_norm": 0.6182502258944345, + "learning_rate": 3.0584054610922233e-06, + "loss": 1.0459, + "step": 43514 + }, + { + "epoch": 0.7519180259883882, + "grad_norm": 1.66627333598924, + "learning_rate": 3.0580026243287974e-06, + "loss": 0.3993, + "step": 43515 + }, + { + "epoch": 0.7519353055017971, + "grad_norm": 1.2546360325394106, + "learning_rate": 3.0575998093082393e-06, + "loss": 0.2529, + "step": 43516 + }, + { + "epoch": 0.751952585015206, + "grad_norm": 1.7303822297704488, + "learning_rate": 3.0571970160318056e-06, + "loss": 0.3962, + "step": 43517 + }, + { + "epoch": 0.7519698645286149, + "grad_norm": 1.5527528635901064, + "learning_rate": 3.0567942445007557e-06, + "loss": 0.4187, + "step": 43518 + }, + { + "epoch": 0.7519871440420238, + "grad_norm": 1.304841550209753, + "learning_rate": 3.0563914947163543e-06, + "loss": 0.2471, + "step": 43519 + }, + { + "epoch": 0.7520044235554327, + "grad_norm": 1.0401123783470883, + "learning_rate": 3.0559887666798614e-06, + "loss": 0.4655, + "step": 43520 + }, + { + "epoch": 0.7520217030688415, + "grad_norm": 1.4759563639148008, + "learning_rate": 3.0555860603925437e-06, + "loss": 0.4185, + "step": 43521 + }, + { + "epoch": 0.7520389825822504, + "grad_norm": 0.938735149030298, + "learning_rate": 3.0551833758556536e-06, + "loss": 0.2458, + "step": 43522 + }, + { + "epoch": 0.7520562620956593, + "grad_norm": 1.7908286156421616, + "learning_rate": 3.054780713070461e-06, + "loss": 0.4694, + "step": 43523 + }, + { + "epoch": 0.7520735416090683, + "grad_norm": 1.2311015496335747, + "learning_rate": 3.05437807203822e-06, + "loss": 0.3199, + "step": 43524 + }, + { + "epoch": 0.7520908211224772, + "grad_norm": 1.392638283250562, + "learning_rate": 3.053975452760195e-06, + "loss": 0.2525, + "step": 43525 + }, + { + "epoch": 0.7521081006358861, + "grad_norm": 1.107866043557306, + "learning_rate": 3.0535728552376454e-06, + "loss": 0.3589, + "step": 43526 + }, + { + "epoch": 0.752125380149295, + "grad_norm": 1.2275515964691242, + "learning_rate": 3.0531702794718377e-06, + "loss": 0.2777, + "step": 43527 + }, + { + "epoch": 0.7521426596627039, + "grad_norm": 0.8550959600539652, + "learning_rate": 3.0527677254640287e-06, + "loss": 0.3405, + "step": 43528 + }, + { + "epoch": 0.7521599391761128, + "grad_norm": 1.1881682917455865, + "learning_rate": 3.0523651932154763e-06, + "loss": 0.4428, + "step": 43529 + }, + { + "epoch": 0.7521772186895217, + "grad_norm": 0.8039322770030051, + "learning_rate": 3.0519626827274464e-06, + "loss": 0.2505, + "step": 43530 + }, + { + "epoch": 0.7521944982029306, + "grad_norm": 1.3464744904121733, + "learning_rate": 3.0515601940011917e-06, + "loss": 0.1767, + "step": 43531 + }, + { + "epoch": 0.7522117777163395, + "grad_norm": 1.8721983331679977, + "learning_rate": 3.0511577270379845e-06, + "loss": 0.4522, + "step": 43532 + }, + { + "epoch": 0.7522290572297484, + "grad_norm": 0.9660166054028182, + "learning_rate": 3.0507552818390763e-06, + "loss": 0.4563, + "step": 43533 + }, + { + "epoch": 0.7522463367431573, + "grad_norm": 1.6489223666946116, + "learning_rate": 3.0503528584057342e-06, + "loss": 0.3956, + "step": 43534 + }, + { + "epoch": 0.7522636162565662, + "grad_norm": 1.5580766348735167, + "learning_rate": 3.0499504567392115e-06, + "loss": 0.3665, + "step": 43535 + }, + { + "epoch": 0.7522808957699751, + "grad_norm": 1.6789601589920709, + "learning_rate": 3.0495480768407747e-06, + "loss": 0.3918, + "step": 43536 + }, + { + "epoch": 0.752298175283384, + "grad_norm": 1.2191233458602466, + "learning_rate": 3.0491457187116756e-06, + "loss": 0.2336, + "step": 43537 + }, + { + "epoch": 0.752315454796793, + "grad_norm": 1.0544805633140057, + "learning_rate": 3.0487433823531875e-06, + "loss": 0.3177, + "step": 43538 + }, + { + "epoch": 0.7523327343102019, + "grad_norm": 0.7188628224054033, + "learning_rate": 3.0483410677665624e-06, + "loss": 0.4032, + "step": 43539 + }, + { + "epoch": 0.7523500138236108, + "grad_norm": 1.155567722013238, + "learning_rate": 3.0479387749530598e-06, + "loss": 0.3527, + "step": 43540 + }, + { + "epoch": 0.7523672933370197, + "grad_norm": 1.2549382374981517, + "learning_rate": 3.0475365039139426e-06, + "loss": 0.3859, + "step": 43541 + }, + { + "epoch": 0.7523845728504285, + "grad_norm": 1.9619341420205112, + "learning_rate": 3.047134254650468e-06, + "loss": 0.2563, + "step": 43542 + }, + { + "epoch": 0.7524018523638374, + "grad_norm": 2.4275529985677964, + "learning_rate": 3.0467320271638956e-06, + "loss": 0.5019, + "step": 43543 + }, + { + "epoch": 0.7524191318772463, + "grad_norm": 1.6115637276150565, + "learning_rate": 3.046329821455488e-06, + "loss": 0.5985, + "step": 43544 + }, + { + "epoch": 0.7524364113906552, + "grad_norm": 1.0546131891566404, + "learning_rate": 3.045927637526507e-06, + "loss": 0.3448, + "step": 43545 + }, + { + "epoch": 0.7524536909040641, + "grad_norm": 0.7237354635042635, + "learning_rate": 3.045525475378206e-06, + "loss": 0.5928, + "step": 43546 + }, + { + "epoch": 0.752470970417473, + "grad_norm": 1.125794693559972, + "learning_rate": 3.0451233350118503e-06, + "loss": 0.3791, + "step": 43547 + }, + { + "epoch": 0.7524882499308819, + "grad_norm": 1.8830883697510312, + "learning_rate": 3.0447212164286945e-06, + "loss": 0.2918, + "step": 43548 + }, + { + "epoch": 0.7525055294442908, + "grad_norm": 1.054705725034858, + "learning_rate": 3.044319119629999e-06, + "loss": 0.4602, + "step": 43549 + }, + { + "epoch": 0.7525228089576997, + "grad_norm": 1.3446098129107806, + "learning_rate": 3.0439170446170285e-06, + "loss": 0.5239, + "step": 43550 + }, + { + "epoch": 0.7525400884711086, + "grad_norm": 1.8500326609141595, + "learning_rate": 3.0435149913910357e-06, + "loss": 0.214, + "step": 43551 + }, + { + "epoch": 0.7525573679845176, + "grad_norm": 1.1857665354277631, + "learning_rate": 3.043112959953286e-06, + "loss": 0.2761, + "step": 43552 + }, + { + "epoch": 0.7525746474979265, + "grad_norm": 1.1724743773969275, + "learning_rate": 3.042710950305031e-06, + "loss": 0.3165, + "step": 43553 + }, + { + "epoch": 0.7525919270113354, + "grad_norm": 1.231971718422336, + "learning_rate": 3.0423089624475356e-06, + "loss": 0.411, + "step": 43554 + }, + { + "epoch": 0.7526092065247443, + "grad_norm": 0.7802409420153456, + "learning_rate": 3.041906996382056e-06, + "loss": 0.5632, + "step": 43555 + }, + { + "epoch": 0.7526264860381532, + "grad_norm": 0.7519709824520823, + "learning_rate": 3.0415050521098565e-06, + "loss": 0.4733, + "step": 43556 + }, + { + "epoch": 0.7526437655515621, + "grad_norm": 1.9736976326515643, + "learning_rate": 3.041103129632188e-06, + "loss": 0.338, + "step": 43557 + }, + { + "epoch": 0.752661045064971, + "grad_norm": 1.1851987301089415, + "learning_rate": 3.040701228950317e-06, + "loss": 0.3724, + "step": 43558 + }, + { + "epoch": 0.7526783245783799, + "grad_norm": 1.5407940333121812, + "learning_rate": 3.0402993500654955e-06, + "loss": 0.6164, + "step": 43559 + }, + { + "epoch": 0.7526956040917888, + "grad_norm": 0.9275752578850648, + "learning_rate": 3.0398974929789847e-06, + "loss": 0.5099, + "step": 43560 + }, + { + "epoch": 0.7527128836051977, + "grad_norm": 1.5720047017887766, + "learning_rate": 3.0394956576920476e-06, + "loss": 0.5641, + "step": 43561 + }, + { + "epoch": 0.7527301631186066, + "grad_norm": 0.8761887828805789, + "learning_rate": 3.0390938442059357e-06, + "loss": 0.2247, + "step": 43562 + }, + { + "epoch": 0.7527474426320154, + "grad_norm": 0.5949279231432337, + "learning_rate": 3.0386920525219144e-06, + "loss": 0.7816, + "step": 43563 + }, + { + "epoch": 0.7527647221454243, + "grad_norm": 1.7291433281514705, + "learning_rate": 3.038290282641235e-06, + "loss": 0.4992, + "step": 43564 + }, + { + "epoch": 0.7527820016588332, + "grad_norm": 1.0642308917555228, + "learning_rate": 3.0378885345651597e-06, + "loss": 0.3843, + "step": 43565 + }, + { + "epoch": 0.7527992811722422, + "grad_norm": 0.9071519948280563, + "learning_rate": 3.037486808294947e-06, + "loss": 0.314, + "step": 43566 + }, + { + "epoch": 0.7528165606856511, + "grad_norm": 1.6133902206126831, + "learning_rate": 3.0370851038318583e-06, + "loss": 0.4496, + "step": 43567 + }, + { + "epoch": 0.75283384019906, + "grad_norm": 1.0772972446722655, + "learning_rate": 3.036683421177148e-06, + "loss": 0.5426, + "step": 43568 + }, + { + "epoch": 0.7528511197124689, + "grad_norm": 1.3757827175404087, + "learning_rate": 3.0362817603320703e-06, + "loss": 0.3854, + "step": 43569 + }, + { + "epoch": 0.7528683992258778, + "grad_norm": 1.7319132089371354, + "learning_rate": 3.0358801212978907e-06, + "loss": 0.2565, + "step": 43570 + }, + { + "epoch": 0.7528856787392867, + "grad_norm": 1.2582920657544643, + "learning_rate": 3.0354785040758585e-06, + "loss": 0.3062, + "step": 43571 + }, + { + "epoch": 0.7529029582526956, + "grad_norm": 0.988538539737493, + "learning_rate": 3.0350769086672418e-06, + "loss": 0.3638, + "step": 43572 + }, + { + "epoch": 0.7529202377661045, + "grad_norm": 1.1553581703480564, + "learning_rate": 3.0346753350732916e-06, + "loss": 0.4109, + "step": 43573 + }, + { + "epoch": 0.7529375172795134, + "grad_norm": 1.5481203695923447, + "learning_rate": 3.034273783295271e-06, + "loss": 0.4309, + "step": 43574 + }, + { + "epoch": 0.7529547967929223, + "grad_norm": 0.6999695078107833, + "learning_rate": 3.03387225333443e-06, + "loss": 0.324, + "step": 43575 + }, + { + "epoch": 0.7529720763063312, + "grad_norm": 1.18086556025303, + "learning_rate": 3.033470745192034e-06, + "loss": 0.2204, + "step": 43576 + }, + { + "epoch": 0.7529893558197401, + "grad_norm": 1.067106516746806, + "learning_rate": 3.03306925886933e-06, + "loss": 0.4319, + "step": 43577 + }, + { + "epoch": 0.753006635333149, + "grad_norm": 0.9465305173207766, + "learning_rate": 3.03266779436759e-06, + "loss": 0.5229, + "step": 43578 + }, + { + "epoch": 0.753023914846558, + "grad_norm": 0.9010047664986565, + "learning_rate": 3.0322663516880637e-06, + "loss": 0.2745, + "step": 43579 + }, + { + "epoch": 0.7530411943599669, + "grad_norm": 0.8921247719878039, + "learning_rate": 3.031864930832006e-06, + "loss": 0.4406, + "step": 43580 + }, + { + "epoch": 0.7530584738733758, + "grad_norm": 1.7848957380243793, + "learning_rate": 3.031463531800679e-06, + "loss": 0.3123, + "step": 43581 + }, + { + "epoch": 0.7530757533867847, + "grad_norm": 1.494819815279889, + "learning_rate": 3.0310621545953356e-06, + "loss": 0.4712, + "step": 43582 + }, + { + "epoch": 0.7530930329001936, + "grad_norm": 1.171313378538791, + "learning_rate": 3.030660799217234e-06, + "loss": 0.3511, + "step": 43583 + }, + { + "epoch": 0.7531103124136024, + "grad_norm": 1.224754148734133, + "learning_rate": 3.030259465667634e-06, + "loss": 0.2802, + "step": 43584 + }, + { + "epoch": 0.7531275919270113, + "grad_norm": 1.1551506279052488, + "learning_rate": 3.0298581539477936e-06, + "loss": 0.2124, + "step": 43585 + }, + { + "epoch": 0.7531448714404202, + "grad_norm": 1.4694802714317827, + "learning_rate": 3.029456864058964e-06, + "loss": 0.4025, + "step": 43586 + }, + { + "epoch": 0.7531621509538291, + "grad_norm": 0.7719924792694317, + "learning_rate": 3.0290555960024083e-06, + "loss": 0.1717, + "step": 43587 + }, + { + "epoch": 0.753179430467238, + "grad_norm": 0.8802901723556606, + "learning_rate": 3.0286543497793764e-06, + "loss": 0.3858, + "step": 43588 + }, + { + "epoch": 0.7531967099806469, + "grad_norm": 1.1456576985170202, + "learning_rate": 3.0282531253911297e-06, + "loss": 0.438, + "step": 43589 + }, + { + "epoch": 0.7532139894940558, + "grad_norm": 0.9797420661136572, + "learning_rate": 3.027851922838927e-06, + "loss": 0.4334, + "step": 43590 + }, + { + "epoch": 0.7532312690074647, + "grad_norm": 1.939001537841794, + "learning_rate": 3.0274507421240184e-06, + "loss": 0.3109, + "step": 43591 + }, + { + "epoch": 0.7532485485208736, + "grad_norm": 1.2081309185771858, + "learning_rate": 3.0270495832476677e-06, + "loss": 0.4276, + "step": 43592 + }, + { + "epoch": 0.7532658280342825, + "grad_norm": 1.0425419592976433, + "learning_rate": 3.0266484462111244e-06, + "loss": 0.4051, + "step": 43593 + }, + { + "epoch": 0.7532831075476915, + "grad_norm": 1.0028189171455235, + "learning_rate": 3.0262473310156483e-06, + "loss": 0.4275, + "step": 43594 + }, + { + "epoch": 0.7533003870611004, + "grad_norm": 1.5039240544782404, + "learning_rate": 3.0258462376624943e-06, + "loss": 0.3093, + "step": 43595 + }, + { + "epoch": 0.7533176665745093, + "grad_norm": 0.695033148883861, + "learning_rate": 3.025445166152925e-06, + "loss": 0.2769, + "step": 43596 + }, + { + "epoch": 0.7533349460879182, + "grad_norm": 1.0263661044679904, + "learning_rate": 3.0250441164881872e-06, + "loss": 0.592, + "step": 43597 + }, + { + "epoch": 0.7533522256013271, + "grad_norm": 0.7349982417920714, + "learning_rate": 3.0246430886695442e-06, + "loss": 0.2494, + "step": 43598 + }, + { + "epoch": 0.753369505114736, + "grad_norm": 1.5225583081244247, + "learning_rate": 3.0242420826982467e-06, + "loss": 0.2553, + "step": 43599 + }, + { + "epoch": 0.7533867846281449, + "grad_norm": 1.4097572086369938, + "learning_rate": 3.023841098575553e-06, + "loss": 0.2321, + "step": 43600 + }, + { + "epoch": 0.7534040641415538, + "grad_norm": 1.1761147333175626, + "learning_rate": 3.023440136302721e-06, + "loss": 0.442, + "step": 43601 + }, + { + "epoch": 0.7534213436549627, + "grad_norm": 1.5289070180643227, + "learning_rate": 3.0230391958810033e-06, + "loss": 0.2095, + "step": 43602 + }, + { + "epoch": 0.7534386231683716, + "grad_norm": 1.2077585548240681, + "learning_rate": 3.0226382773116593e-06, + "loss": 0.5377, + "step": 43603 + }, + { + "epoch": 0.7534559026817805, + "grad_norm": 2.2098245922652766, + "learning_rate": 3.0222373805959395e-06, + "loss": 0.2769, + "step": 43604 + }, + { + "epoch": 0.7534731821951893, + "grad_norm": 1.6643702319438225, + "learning_rate": 3.021836505735103e-06, + "loss": 0.3824, + "step": 43605 + }, + { + "epoch": 0.7534904617085982, + "grad_norm": 0.9941912027388549, + "learning_rate": 3.0214356527304047e-06, + "loss": 0.3124, + "step": 43606 + }, + { + "epoch": 0.7535077412220071, + "grad_norm": 1.1716648256245183, + "learning_rate": 3.021034821583103e-06, + "loss": 0.1786, + "step": 43607 + }, + { + "epoch": 0.753525020735416, + "grad_norm": 1.1297287894775907, + "learning_rate": 3.0206340122944478e-06, + "loss": 0.421, + "step": 43608 + }, + { + "epoch": 0.753542300248825, + "grad_norm": 1.3477174229295668, + "learning_rate": 3.020233224865701e-06, + "loss": 0.392, + "step": 43609 + }, + { + "epoch": 0.7535595797622339, + "grad_norm": 1.3438737053756724, + "learning_rate": 3.0198324592981132e-06, + "loss": 0.3835, + "step": 43610 + }, + { + "epoch": 0.7535768592756428, + "grad_norm": 1.0876865733928562, + "learning_rate": 3.0194317155929343e-06, + "loss": 0.3946, + "step": 43611 + }, + { + "epoch": 0.7535941387890517, + "grad_norm": 1.44706855998471, + "learning_rate": 3.019030993751433e-06, + "loss": 0.3578, + "step": 43612 + }, + { + "epoch": 0.7536114183024606, + "grad_norm": 1.5278562565639138, + "learning_rate": 3.0186302937748525e-06, + "loss": 0.2631, + "step": 43613 + }, + { + "epoch": 0.7536286978158695, + "grad_norm": 1.1755629310855664, + "learning_rate": 3.0182296156644564e-06, + "loss": 0.2659, + "step": 43614 + }, + { + "epoch": 0.7536459773292784, + "grad_norm": 1.644428422289376, + "learning_rate": 3.0178289594214928e-06, + "loss": 0.478, + "step": 43615 + }, + { + "epoch": 0.7536632568426873, + "grad_norm": 1.3297906929058136, + "learning_rate": 3.0174283250472224e-06, + "loss": 0.211, + "step": 43616 + }, + { + "epoch": 0.7536805363560962, + "grad_norm": 1.233453910772276, + "learning_rate": 3.0170277125428916e-06, + "loss": 0.4866, + "step": 43617 + }, + { + "epoch": 0.7536978158695051, + "grad_norm": 1.4724945804703355, + "learning_rate": 3.0166271219097664e-06, + "loss": 0.5375, + "step": 43618 + }, + { + "epoch": 0.753715095382914, + "grad_norm": 1.116732278346251, + "learning_rate": 3.016226553149093e-06, + "loss": 0.3565, + "step": 43619 + }, + { + "epoch": 0.7537323748963229, + "grad_norm": 1.8540401877045125, + "learning_rate": 3.0158260062621314e-06, + "loss": 0.1926, + "step": 43620 + }, + { + "epoch": 0.7537496544097319, + "grad_norm": 1.8852453767916004, + "learning_rate": 3.015425481250134e-06, + "loss": 0.2073, + "step": 43621 + }, + { + "epoch": 0.7537669339231408, + "grad_norm": 1.2437077670260213, + "learning_rate": 3.0150249781143513e-06, + "loss": 0.6505, + "step": 43622 + }, + { + "epoch": 0.7537842134365497, + "grad_norm": 1.135675744015163, + "learning_rate": 3.0146244968560414e-06, + "loss": 0.4925, + "step": 43623 + }, + { + "epoch": 0.7538014929499586, + "grad_norm": 1.287081574924382, + "learning_rate": 3.0142240374764574e-06, + "loss": 0.2889, + "step": 43624 + }, + { + "epoch": 0.7538187724633675, + "grad_norm": 0.9219453541552399, + "learning_rate": 3.0138235999768594e-06, + "loss": 0.224, + "step": 43625 + }, + { + "epoch": 0.7538360519767764, + "grad_norm": 1.2784048817561768, + "learning_rate": 3.0134231843584928e-06, + "loss": 0.3066, + "step": 43626 + }, + { + "epoch": 0.7538533314901852, + "grad_norm": 1.0994691540991168, + "learning_rate": 3.0130227906226197e-06, + "loss": 0.2202, + "step": 43627 + }, + { + "epoch": 0.7538706110035941, + "grad_norm": 1.0382918795371914, + "learning_rate": 3.0126224187704857e-06, + "loss": 0.4204, + "step": 43628 + }, + { + "epoch": 0.753887890517003, + "grad_norm": 0.9317677473004239, + "learning_rate": 3.01222206880335e-06, + "loss": 0.2625, + "step": 43629 + }, + { + "epoch": 0.7539051700304119, + "grad_norm": 1.152227406736463, + "learning_rate": 3.0118217407224693e-06, + "loss": 0.5114, + "step": 43630 + }, + { + "epoch": 0.7539224495438208, + "grad_norm": 1.5545998360676085, + "learning_rate": 3.0114214345290904e-06, + "loss": 0.3943, + "step": 43631 + }, + { + "epoch": 0.7539397290572297, + "grad_norm": 1.0986501767786743, + "learning_rate": 3.0110211502244747e-06, + "loss": 0.4214, + "step": 43632 + }, + { + "epoch": 0.7539570085706386, + "grad_norm": 1.18001984149421, + "learning_rate": 3.0106208878098673e-06, + "loss": 0.572, + "step": 43633 + }, + { + "epoch": 0.7539742880840475, + "grad_norm": 1.2026275677287572, + "learning_rate": 3.010220647286527e-06, + "loss": 0.2986, + "step": 43634 + }, + { + "epoch": 0.7539915675974564, + "grad_norm": 3.2223681118859195, + "learning_rate": 3.0098204286557076e-06, + "loss": 0.2175, + "step": 43635 + }, + { + "epoch": 0.7540088471108654, + "grad_norm": 1.0650844367144252, + "learning_rate": 3.0094202319186637e-06, + "loss": 0.2959, + "step": 43636 + }, + { + "epoch": 0.7540261266242743, + "grad_norm": 1.449066227025855, + "learning_rate": 3.0090200570766448e-06, + "loss": 0.5448, + "step": 43637 + }, + { + "epoch": 0.7540434061376832, + "grad_norm": 1.0324067174126546, + "learning_rate": 3.0086199041309094e-06, + "loss": 0.3842, + "step": 43638 + }, + { + "epoch": 0.7540606856510921, + "grad_norm": 0.8024536914482289, + "learning_rate": 3.0082197730827046e-06, + "loss": 0.3774, + "step": 43639 + }, + { + "epoch": 0.754077965164501, + "grad_norm": 0.4296061633358629, + "learning_rate": 3.007819663933287e-06, + "loss": 0.4984, + "step": 43640 + }, + { + "epoch": 0.7540952446779099, + "grad_norm": 0.46302174472959345, + "learning_rate": 3.007419576683912e-06, + "loss": 0.8274, + "step": 43641 + }, + { + "epoch": 0.7541125241913188, + "grad_norm": 1.6411578253208725, + "learning_rate": 3.007019511335827e-06, + "loss": 0.288, + "step": 43642 + }, + { + "epoch": 0.7541298037047277, + "grad_norm": 1.0888104503106777, + "learning_rate": 3.0066194678902928e-06, + "loss": 0.36, + "step": 43643 + }, + { + "epoch": 0.7541470832181366, + "grad_norm": 1.6775194636318238, + "learning_rate": 3.0062194463485538e-06, + "loss": 0.1777, + "step": 43644 + }, + { + "epoch": 0.7541643627315455, + "grad_norm": 1.0582094031192524, + "learning_rate": 3.0058194467118674e-06, + "loss": 0.2221, + "step": 43645 + }, + { + "epoch": 0.7541816422449544, + "grad_norm": 1.5815698011684198, + "learning_rate": 3.0054194689814854e-06, + "loss": 0.5184, + "step": 43646 + }, + { + "epoch": 0.7541989217583633, + "grad_norm": 1.417304548646769, + "learning_rate": 3.0050195131586645e-06, + "loss": 0.2692, + "step": 43647 + }, + { + "epoch": 0.7542162012717721, + "grad_norm": 1.3372919837611403, + "learning_rate": 3.004619579244652e-06, + "loss": 0.5315, + "step": 43648 + }, + { + "epoch": 0.754233480785181, + "grad_norm": 2.2098577530182846, + "learning_rate": 3.004219667240704e-06, + "loss": 0.3457, + "step": 43649 + }, + { + "epoch": 0.75425076029859, + "grad_norm": 1.9895665113103147, + "learning_rate": 3.0038197771480703e-06, + "loss": 0.3756, + "step": 43650 + }, + { + "epoch": 0.7542680398119989, + "grad_norm": 1.3834745377140978, + "learning_rate": 3.0034199089680038e-06, + "loss": 0.4111, + "step": 43651 + }, + { + "epoch": 0.7542853193254078, + "grad_norm": 1.172470020038704, + "learning_rate": 3.0030200627017613e-06, + "loss": 0.2877, + "step": 43652 + }, + { + "epoch": 0.7543025988388167, + "grad_norm": 0.8935196758430795, + "learning_rate": 3.002620238350589e-06, + "loss": 0.3635, + "step": 43653 + }, + { + "epoch": 0.7543198783522256, + "grad_norm": 0.9734707459722073, + "learning_rate": 3.0022204359157438e-06, + "loss": 0.2252, + "step": 43654 + }, + { + "epoch": 0.7543371578656345, + "grad_norm": 1.449019758611894, + "learning_rate": 3.001820655398473e-06, + "loss": 0.3046, + "step": 43655 + }, + { + "epoch": 0.7543544373790434, + "grad_norm": 0.9651347946869094, + "learning_rate": 3.001420896800036e-06, + "loss": 0.6606, + "step": 43656 + }, + { + "epoch": 0.7543717168924523, + "grad_norm": 1.0334404008391114, + "learning_rate": 3.001021160121673e-06, + "loss": 0.2901, + "step": 43657 + }, + { + "epoch": 0.7543889964058612, + "grad_norm": 1.0552376218436832, + "learning_rate": 3.000621445364651e-06, + "loss": 0.5766, + "step": 43658 + }, + { + "epoch": 0.7544062759192701, + "grad_norm": 1.1773258013046706, + "learning_rate": 3.0002217525302115e-06, + "loss": 0.3332, + "step": 43659 + }, + { + "epoch": 0.754423555432679, + "grad_norm": 0.8940743671718071, + "learning_rate": 2.9998220816196122e-06, + "loss": 0.217, + "step": 43660 + }, + { + "epoch": 0.7544408349460879, + "grad_norm": 1.385161207806196, + "learning_rate": 2.9994224326341017e-06, + "loss": 0.4277, + "step": 43661 + }, + { + "epoch": 0.7544581144594968, + "grad_norm": 1.0222288732791367, + "learning_rate": 2.999022805574929e-06, + "loss": 0.3231, + "step": 43662 + }, + { + "epoch": 0.7544753939729058, + "grad_norm": 1.0062562359482912, + "learning_rate": 2.99862320044335e-06, + "loss": 0.2963, + "step": 43663 + }, + { + "epoch": 0.7544926734863147, + "grad_norm": 1.9981519657279905, + "learning_rate": 2.9982236172406145e-06, + "loss": 0.3236, + "step": 43664 + }, + { + "epoch": 0.7545099529997236, + "grad_norm": 1.5767185575188107, + "learning_rate": 2.9978240559679783e-06, + "loss": 0.4312, + "step": 43665 + }, + { + "epoch": 0.7545272325131325, + "grad_norm": 1.148779467975866, + "learning_rate": 2.9974245166266857e-06, + "loss": 0.5825, + "step": 43666 + }, + { + "epoch": 0.7545445120265414, + "grad_norm": 1.3871721791812472, + "learning_rate": 2.9970249992179945e-06, + "loss": 0.2634, + "step": 43667 + }, + { + "epoch": 0.7545617915399503, + "grad_norm": 1.1689835647871334, + "learning_rate": 2.9966255037431512e-06, + "loss": 0.3752, + "step": 43668 + }, + { + "epoch": 0.7545790710533591, + "grad_norm": 1.3113675418678779, + "learning_rate": 2.9962260302034073e-06, + "loss": 0.3275, + "step": 43669 + }, + { + "epoch": 0.754596350566768, + "grad_norm": 1.5978087944241115, + "learning_rate": 2.995826578600017e-06, + "loss": 0.525, + "step": 43670 + }, + { + "epoch": 0.7546136300801769, + "grad_norm": 1.0059157040364977, + "learning_rate": 2.9954271489342337e-06, + "loss": 0.2843, + "step": 43671 + }, + { + "epoch": 0.7546309095935858, + "grad_norm": 1.0697552271323922, + "learning_rate": 2.995027741207305e-06, + "loss": 0.4121, + "step": 43672 + }, + { + "epoch": 0.7546481891069947, + "grad_norm": 0.8592544765475025, + "learning_rate": 2.994628355420477e-06, + "loss": 0.4094, + "step": 43673 + }, + { + "epoch": 0.7546654686204036, + "grad_norm": 1.0228954304810125, + "learning_rate": 2.9942289915750065e-06, + "loss": 0.241, + "step": 43674 + }, + { + "epoch": 0.7546827481338125, + "grad_norm": 2.565837873453853, + "learning_rate": 2.993829649672143e-06, + "loss": 0.3838, + "step": 43675 + }, + { + "epoch": 0.7547000276472214, + "grad_norm": 1.4976574839125678, + "learning_rate": 2.9934303297131406e-06, + "loss": 0.2097, + "step": 43676 + }, + { + "epoch": 0.7547173071606303, + "grad_norm": 1.2848864641797104, + "learning_rate": 2.993031031699244e-06, + "loss": 0.5417, + "step": 43677 + }, + { + "epoch": 0.7547345866740393, + "grad_norm": 1.529662024822542, + "learning_rate": 2.9926317556317096e-06, + "loss": 0.5255, + "step": 43678 + }, + { + "epoch": 0.7547518661874482, + "grad_norm": 1.334518014471913, + "learning_rate": 2.9922325015117828e-06, + "loss": 0.4036, + "step": 43679 + }, + { + "epoch": 0.7547691457008571, + "grad_norm": 1.0131161326490319, + "learning_rate": 2.991833269340715e-06, + "loss": 0.3526, + "step": 43680 + }, + { + "epoch": 0.754786425214266, + "grad_norm": 0.9958042896358436, + "learning_rate": 2.991434059119759e-06, + "loss": 0.1836, + "step": 43681 + }, + { + "epoch": 0.7548037047276749, + "grad_norm": 1.1063750807569792, + "learning_rate": 2.991034870850167e-06, + "loss": 0.5239, + "step": 43682 + }, + { + "epoch": 0.7548209842410838, + "grad_norm": 2.547242537002139, + "learning_rate": 2.9906357045331858e-06, + "loss": 0.3497, + "step": 43683 + }, + { + "epoch": 0.7548382637544927, + "grad_norm": 1.0145052490863204, + "learning_rate": 2.9902365601700633e-06, + "loss": 0.5497, + "step": 43684 + }, + { + "epoch": 0.7548555432679016, + "grad_norm": 1.2668491260572772, + "learning_rate": 2.989837437762053e-06, + "loss": 0.499, + "step": 43685 + }, + { + "epoch": 0.7548728227813105, + "grad_norm": 0.8015419914192072, + "learning_rate": 2.989438337310405e-06, + "loss": 0.3032, + "step": 43686 + }, + { + "epoch": 0.7548901022947194, + "grad_norm": 2.1556181117108495, + "learning_rate": 2.989039258816372e-06, + "loss": 0.3623, + "step": 43687 + }, + { + "epoch": 0.7549073818081283, + "grad_norm": 1.6083263360468512, + "learning_rate": 2.9886402022811967e-06, + "loss": 0.4054, + "step": 43688 + }, + { + "epoch": 0.7549246613215372, + "grad_norm": 1.2473561338411654, + "learning_rate": 2.9882411677061375e-06, + "loss": 0.4334, + "step": 43689 + }, + { + "epoch": 0.754941940834946, + "grad_norm": 1.2134889834996223, + "learning_rate": 2.987842155092435e-06, + "loss": 0.3817, + "step": 43690 + }, + { + "epoch": 0.7549592203483549, + "grad_norm": 1.4122364855043266, + "learning_rate": 2.987443164441346e-06, + "loss": 0.4309, + "step": 43691 + }, + { + "epoch": 0.7549764998617639, + "grad_norm": 1.3249252245765302, + "learning_rate": 2.9870441957541196e-06, + "loss": 0.3259, + "step": 43692 + }, + { + "epoch": 0.7549937793751728, + "grad_norm": 1.9887491398894397, + "learning_rate": 2.986645249032001e-06, + "loss": 0.156, + "step": 43693 + }, + { + "epoch": 0.7550110588885817, + "grad_norm": 1.2574586943649297, + "learning_rate": 2.986246324276245e-06, + "loss": 0.3813, + "step": 43694 + }, + { + "epoch": 0.7550283384019906, + "grad_norm": 1.1540163970058264, + "learning_rate": 2.985847421488096e-06, + "loss": 0.3189, + "step": 43695 + }, + { + "epoch": 0.7550456179153995, + "grad_norm": 1.8075552764331286, + "learning_rate": 2.9854485406688093e-06, + "loss": 0.1905, + "step": 43696 + }, + { + "epoch": 0.7550628974288084, + "grad_norm": 0.9434631039645928, + "learning_rate": 2.985049681819625e-06, + "loss": 0.3513, + "step": 43697 + }, + { + "epoch": 0.7550801769422173, + "grad_norm": 0.9981172709308974, + "learning_rate": 2.984650844941803e-06, + "loss": 0.5108, + "step": 43698 + }, + { + "epoch": 0.7550974564556262, + "grad_norm": 0.9863965843313126, + "learning_rate": 2.984252030036586e-06, + "loss": 0.4403, + "step": 43699 + }, + { + "epoch": 0.7551147359690351, + "grad_norm": 0.9744436356274782, + "learning_rate": 2.9838532371052278e-06, + "loss": 0.2367, + "step": 43700 + }, + { + "epoch": 0.755132015482444, + "grad_norm": 1.8570845533108111, + "learning_rate": 2.9834544661489708e-06, + "loss": 0.3968, + "step": 43701 + }, + { + "epoch": 0.7551492949958529, + "grad_norm": 1.291211606517843, + "learning_rate": 2.98305571716907e-06, + "loss": 0.2422, + "step": 43702 + }, + { + "epoch": 0.7551665745092618, + "grad_norm": 1.5751131045191045, + "learning_rate": 2.98265699016677e-06, + "loss": 0.3873, + "step": 43703 + }, + { + "epoch": 0.7551838540226707, + "grad_norm": 1.8429970952709676, + "learning_rate": 2.9822582851433214e-06, + "loss": 0.2654, + "step": 43704 + }, + { + "epoch": 0.7552011335360796, + "grad_norm": 0.9891295087366688, + "learning_rate": 2.9818596020999757e-06, + "loss": 0.5336, + "step": 43705 + }, + { + "epoch": 0.7552184130494886, + "grad_norm": 1.3166783515906648, + "learning_rate": 2.981460941037977e-06, + "loss": 0.3431, + "step": 43706 + }, + { + "epoch": 0.7552356925628975, + "grad_norm": 1.215285417490753, + "learning_rate": 2.9810623019585784e-06, + "loss": 0.3835, + "step": 43707 + }, + { + "epoch": 0.7552529720763064, + "grad_norm": 1.1984237937976994, + "learning_rate": 2.9806636848630233e-06, + "loss": 0.3163, + "step": 43708 + }, + { + "epoch": 0.7552702515897153, + "grad_norm": 2.116894361378762, + "learning_rate": 2.9802650897525633e-06, + "loss": 0.4714, + "step": 43709 + }, + { + "epoch": 0.7552875311031242, + "grad_norm": 1.4319351002843244, + "learning_rate": 2.9798665166284456e-06, + "loss": 0.3452, + "step": 43710 + }, + { + "epoch": 0.755304810616533, + "grad_norm": 1.5176441234747795, + "learning_rate": 2.9794679654919235e-06, + "loss": 0.2285, + "step": 43711 + }, + { + "epoch": 0.7553220901299419, + "grad_norm": 1.183206850913231, + "learning_rate": 2.9790694363442372e-06, + "loss": 0.3482, + "step": 43712 + }, + { + "epoch": 0.7553393696433508, + "grad_norm": 1.2313451979254637, + "learning_rate": 2.9786709291866436e-06, + "loss": 0.2841, + "step": 43713 + }, + { + "epoch": 0.7553566491567597, + "grad_norm": 1.6917927035782219, + "learning_rate": 2.9782724440203812e-06, + "loss": 0.3499, + "step": 43714 + }, + { + "epoch": 0.7553739286701686, + "grad_norm": 0.7298306405404478, + "learning_rate": 2.977873980846705e-06, + "loss": 0.3586, + "step": 43715 + }, + { + "epoch": 0.7553912081835775, + "grad_norm": 1.0035914941862663, + "learning_rate": 2.9774755396668633e-06, + "loss": 0.391, + "step": 43716 + }, + { + "epoch": 0.7554084876969864, + "grad_norm": 1.0043657298043658, + "learning_rate": 2.977077120482098e-06, + "loss": 0.2846, + "step": 43717 + }, + { + "epoch": 0.7554257672103953, + "grad_norm": 1.4914772403340781, + "learning_rate": 2.9766787232936657e-06, + "loss": 0.3414, + "step": 43718 + }, + { + "epoch": 0.7554430467238042, + "grad_norm": 1.0797998172328218, + "learning_rate": 2.9762803481028057e-06, + "loss": 0.4971, + "step": 43719 + }, + { + "epoch": 0.7554603262372132, + "grad_norm": 1.216148870091282, + "learning_rate": 2.9758819949107687e-06, + "loss": 0.3555, + "step": 43720 + }, + { + "epoch": 0.7554776057506221, + "grad_norm": 1.5433717607903579, + "learning_rate": 2.9754836637188034e-06, + "loss": 0.4793, + "step": 43721 + }, + { + "epoch": 0.755494885264031, + "grad_norm": 1.093027904002571, + "learning_rate": 2.975085354528161e-06, + "loss": 0.298, + "step": 43722 + }, + { + "epoch": 0.7555121647774399, + "grad_norm": 1.0726568874033142, + "learning_rate": 2.974687067340084e-06, + "loss": 0.2529, + "step": 43723 + }, + { + "epoch": 0.7555294442908488, + "grad_norm": 0.9028798972852695, + "learning_rate": 2.974288802155819e-06, + "loss": 0.4218, + "step": 43724 + }, + { + "epoch": 0.7555467238042577, + "grad_norm": 1.1851573120501546, + "learning_rate": 2.9738905589766145e-06, + "loss": 0.4274, + "step": 43725 + }, + { + "epoch": 0.7555640033176666, + "grad_norm": 1.2635873435455758, + "learning_rate": 2.9734923378037195e-06, + "loss": 0.4094, + "step": 43726 + }, + { + "epoch": 0.7555812828310755, + "grad_norm": 1.544585075168415, + "learning_rate": 2.9730941386383826e-06, + "loss": 0.3529, + "step": 43727 + }, + { + "epoch": 0.7555985623444844, + "grad_norm": 1.202528530765375, + "learning_rate": 2.972695961481846e-06, + "loss": 0.2678, + "step": 43728 + }, + { + "epoch": 0.7556158418578933, + "grad_norm": 2.0704010504366908, + "learning_rate": 2.9722978063353634e-06, + "loss": 0.5407, + "step": 43729 + }, + { + "epoch": 0.7556331213713022, + "grad_norm": 2.4677281629715138, + "learning_rate": 2.9718996732001758e-06, + "loss": 0.4103, + "step": 43730 + }, + { + "epoch": 0.7556504008847111, + "grad_norm": 1.3766436074145638, + "learning_rate": 2.9715015620775313e-06, + "loss": 0.438, + "step": 43731 + }, + { + "epoch": 0.7556676803981199, + "grad_norm": 1.4854458087663924, + "learning_rate": 2.9711034729686784e-06, + "loss": 0.3254, + "step": 43732 + }, + { + "epoch": 0.7556849599115288, + "grad_norm": 2.4228159613758495, + "learning_rate": 2.9707054058748662e-06, + "loss": 0.3773, + "step": 43733 + }, + { + "epoch": 0.7557022394249377, + "grad_norm": 1.7377516845672716, + "learning_rate": 2.97030736079734e-06, + "loss": 0.3396, + "step": 43734 + }, + { + "epoch": 0.7557195189383467, + "grad_norm": 1.2099520940092854, + "learning_rate": 2.969909337737341e-06, + "loss": 0.2625, + "step": 43735 + }, + { + "epoch": 0.7557367984517556, + "grad_norm": 1.652566322402629, + "learning_rate": 2.9695113366961236e-06, + "loss": 0.2993, + "step": 43736 + }, + { + "epoch": 0.7557540779651645, + "grad_norm": 1.8437838255218224, + "learning_rate": 2.9691133576749253e-06, + "loss": 0.2639, + "step": 43737 + }, + { + "epoch": 0.7557713574785734, + "grad_norm": 1.9695457173219724, + "learning_rate": 2.9687154006750052e-06, + "loss": 0.4694, + "step": 43738 + }, + { + "epoch": 0.7557886369919823, + "grad_norm": 1.07135867076036, + "learning_rate": 2.9683174656975987e-06, + "loss": 0.3682, + "step": 43739 + }, + { + "epoch": 0.7558059165053912, + "grad_norm": 1.1080795218700616, + "learning_rate": 2.96791955274396e-06, + "loss": 0.2493, + "step": 43740 + }, + { + "epoch": 0.7558231960188001, + "grad_norm": 1.5808761653667385, + "learning_rate": 2.967521661815327e-06, + "loss": 0.357, + "step": 43741 + }, + { + "epoch": 0.755840475532209, + "grad_norm": 0.9550786828722191, + "learning_rate": 2.967123792912955e-06, + "loss": 0.4088, + "step": 43742 + }, + { + "epoch": 0.7558577550456179, + "grad_norm": 1.1213698312743132, + "learning_rate": 2.966725946038083e-06, + "loss": 0.3627, + "step": 43743 + }, + { + "epoch": 0.7558750345590268, + "grad_norm": 1.3026189136527493, + "learning_rate": 2.9663281211919594e-06, + "loss": 0.2783, + "step": 43744 + }, + { + "epoch": 0.7558923140724357, + "grad_norm": 1.206540225976287, + "learning_rate": 2.9659303183758335e-06, + "loss": 0.205, + "step": 43745 + }, + { + "epoch": 0.7559095935858446, + "grad_norm": 0.884720973017206, + "learning_rate": 2.965532537590945e-06, + "loss": 0.3764, + "step": 43746 + }, + { + "epoch": 0.7559268730992535, + "grad_norm": 1.7761145915835193, + "learning_rate": 2.9651347788385465e-06, + "loss": 0.388, + "step": 43747 + }, + { + "epoch": 0.7559441526126625, + "grad_norm": 1.2275696036129011, + "learning_rate": 2.9647370421198785e-06, + "loss": 0.4103, + "step": 43748 + }, + { + "epoch": 0.7559614321260714, + "grad_norm": 0.7066418062225697, + "learning_rate": 2.9643393274361874e-06, + "loss": 0.5355, + "step": 43749 + }, + { + "epoch": 0.7559787116394803, + "grad_norm": 0.5191669069200358, + "learning_rate": 2.963941634788721e-06, + "loss": 0.2057, + "step": 43750 + }, + { + "epoch": 0.7559959911528892, + "grad_norm": 1.1436696201774206, + "learning_rate": 2.963543964178727e-06, + "loss": 0.3075, + "step": 43751 + }, + { + "epoch": 0.7560132706662981, + "grad_norm": 1.341945719387173, + "learning_rate": 2.9631463156074446e-06, + "loss": 0.2966, + "step": 43752 + }, + { + "epoch": 0.756030550179707, + "grad_norm": 1.5873189107888568, + "learning_rate": 2.9627486890761257e-06, + "loss": 0.5003, + "step": 43753 + }, + { + "epoch": 0.7560478296931158, + "grad_norm": 1.5586297631351873, + "learning_rate": 2.9623510845860102e-06, + "loss": 0.2311, + "step": 43754 + }, + { + "epoch": 0.7560651092065247, + "grad_norm": 1.1034514751380755, + "learning_rate": 2.961953502138346e-06, + "loss": 0.7676, + "step": 43755 + }, + { + "epoch": 0.7560823887199336, + "grad_norm": 1.7998048580056611, + "learning_rate": 2.961555941734382e-06, + "loss": 0.2981, + "step": 43756 + }, + { + "epoch": 0.7560996682333425, + "grad_norm": 1.346135060078244, + "learning_rate": 2.9611584033753547e-06, + "loss": 0.5345, + "step": 43757 + }, + { + "epoch": 0.7561169477467514, + "grad_norm": 1.4019904929600309, + "learning_rate": 2.9607608870625193e-06, + "loss": 0.376, + "step": 43758 + }, + { + "epoch": 0.7561342272601603, + "grad_norm": 1.3204362398526475, + "learning_rate": 2.9603633927971108e-06, + "loss": 0.5076, + "step": 43759 + }, + { + "epoch": 0.7561515067735692, + "grad_norm": 1.6317274518020322, + "learning_rate": 2.959965920580381e-06, + "loss": 0.3153, + "step": 43760 + }, + { + "epoch": 0.7561687862869781, + "grad_norm": 0.866802397757471, + "learning_rate": 2.959568470413572e-06, + "loss": 0.3705, + "step": 43761 + }, + { + "epoch": 0.756186065800387, + "grad_norm": 1.2164167801448866, + "learning_rate": 2.9591710422979326e-06, + "loss": 0.3425, + "step": 43762 + }, + { + "epoch": 0.756203345313796, + "grad_norm": 1.7020904764488718, + "learning_rate": 2.958773636234702e-06, + "loss": 0.4344, + "step": 43763 + }, + { + "epoch": 0.7562206248272049, + "grad_norm": 2.3097185354888947, + "learning_rate": 2.95837625222513e-06, + "loss": 0.4662, + "step": 43764 + }, + { + "epoch": 0.7562379043406138, + "grad_norm": 1.2803867974295249, + "learning_rate": 2.9579788902704553e-06, + "loss": 0.2674, + "step": 43765 + }, + { + "epoch": 0.7562551838540227, + "grad_norm": 1.2624480621239742, + "learning_rate": 2.9575815503719276e-06, + "loss": 0.5219, + "step": 43766 + }, + { + "epoch": 0.7562724633674316, + "grad_norm": 1.6322907515408536, + "learning_rate": 2.957184232530791e-06, + "loss": 0.2832, + "step": 43767 + }, + { + "epoch": 0.7562897428808405, + "grad_norm": 1.2479544765908783, + "learning_rate": 2.956786936748286e-06, + "loss": 0.5503, + "step": 43768 + }, + { + "epoch": 0.7563070223942494, + "grad_norm": 1.495805903097272, + "learning_rate": 2.9563896630256626e-06, + "loss": 0.4331, + "step": 43769 + }, + { + "epoch": 0.7563243019076583, + "grad_norm": 1.0725995709385177, + "learning_rate": 2.95599241136416e-06, + "loss": 0.668, + "step": 43770 + }, + { + "epoch": 0.7563415814210672, + "grad_norm": 1.5211618281437862, + "learning_rate": 2.955595181765023e-06, + "loss": 0.4707, + "step": 43771 + }, + { + "epoch": 0.7563588609344761, + "grad_norm": 1.0294323452544634, + "learning_rate": 2.955197974229498e-06, + "loss": 0.6167, + "step": 43772 + }, + { + "epoch": 0.756376140447885, + "grad_norm": 1.0665165546007993, + "learning_rate": 2.9548007887588314e-06, + "loss": 0.3539, + "step": 43773 + }, + { + "epoch": 0.756393419961294, + "grad_norm": 1.0231744488360803, + "learning_rate": 2.954403625354264e-06, + "loss": 0.4563, + "step": 43774 + }, + { + "epoch": 0.7564106994747027, + "grad_norm": 0.8319718166096529, + "learning_rate": 2.954006484017038e-06, + "loss": 0.3032, + "step": 43775 + }, + { + "epoch": 0.7564279789881116, + "grad_norm": 1.3894482681586346, + "learning_rate": 2.953609364748401e-06, + "loss": 0.3496, + "step": 43776 + }, + { + "epoch": 0.7564452585015206, + "grad_norm": 0.9688910396659141, + "learning_rate": 2.953212267549589e-06, + "loss": 0.3231, + "step": 43777 + }, + { + "epoch": 0.7564625380149295, + "grad_norm": 1.240347783119899, + "learning_rate": 2.9528151924218575e-06, + "loss": 0.4276, + "step": 43778 + }, + { + "epoch": 0.7564798175283384, + "grad_norm": 1.1614267681028387, + "learning_rate": 2.952418139366442e-06, + "loss": 0.3608, + "step": 43779 + }, + { + "epoch": 0.7564970970417473, + "grad_norm": 1.7902759110323918, + "learning_rate": 2.952021108384591e-06, + "loss": 0.381, + "step": 43780 + }, + { + "epoch": 0.7565143765551562, + "grad_norm": 1.5294434188105572, + "learning_rate": 2.9516240994775425e-06, + "loss": 0.213, + "step": 43781 + }, + { + "epoch": 0.7565316560685651, + "grad_norm": 1.7655587922808746, + "learning_rate": 2.9512271126465465e-06, + "loss": 0.4584, + "step": 43782 + }, + { + "epoch": 0.756548935581974, + "grad_norm": 0.9373290995795043, + "learning_rate": 2.950830147892836e-06, + "loss": 0.3794, + "step": 43783 + }, + { + "epoch": 0.7565662150953829, + "grad_norm": 1.562089430971251, + "learning_rate": 2.950433205217668e-06, + "loss": 0.3823, + "step": 43784 + }, + { + "epoch": 0.7565834946087918, + "grad_norm": 1.0814947869730813, + "learning_rate": 2.9500362846222787e-06, + "loss": 0.6388, + "step": 43785 + }, + { + "epoch": 0.7566007741222007, + "grad_norm": 1.356692852831137, + "learning_rate": 2.9496393861079086e-06, + "loss": 0.2666, + "step": 43786 + }, + { + "epoch": 0.7566180536356096, + "grad_norm": 1.3576362064238918, + "learning_rate": 2.949242509675807e-06, + "loss": 0.3829, + "step": 43787 + }, + { + "epoch": 0.7566353331490185, + "grad_norm": 0.9628260497641387, + "learning_rate": 2.9488456553272103e-06, + "loss": 0.4407, + "step": 43788 + }, + { + "epoch": 0.7566526126624274, + "grad_norm": 0.6998475433450327, + "learning_rate": 2.9484488230633646e-06, + "loss": 0.1835, + "step": 43789 + }, + { + "epoch": 0.7566698921758364, + "grad_norm": 1.5882651217339359, + "learning_rate": 2.948052012885514e-06, + "loss": 0.4466, + "step": 43790 + }, + { + "epoch": 0.7566871716892453, + "grad_norm": 1.1840391296152517, + "learning_rate": 2.947655224794902e-06, + "loss": 0.2376, + "step": 43791 + }, + { + "epoch": 0.7567044512026542, + "grad_norm": 1.809908406441308, + "learning_rate": 2.947258458792768e-06, + "loss": 0.2604, + "step": 43792 + }, + { + "epoch": 0.7567217307160631, + "grad_norm": 0.851549633080269, + "learning_rate": 2.946861714880359e-06, + "loss": 0.3174, + "step": 43793 + }, + { + "epoch": 0.756739010229472, + "grad_norm": 1.1638418571805245, + "learning_rate": 2.946464993058913e-06, + "loss": 0.3006, + "step": 43794 + }, + { + "epoch": 0.7567562897428809, + "grad_norm": 1.413671370553801, + "learning_rate": 2.9460682933296734e-06, + "loss": 0.304, + "step": 43795 + }, + { + "epoch": 0.7567735692562897, + "grad_norm": 0.9945452715440466, + "learning_rate": 2.945671615693888e-06, + "loss": 0.2288, + "step": 43796 + }, + { + "epoch": 0.7567908487696986, + "grad_norm": 1.1968016769766794, + "learning_rate": 2.945274960152792e-06, + "loss": 0.5057, + "step": 43797 + }, + { + "epoch": 0.7568081282831075, + "grad_norm": 0.7463915567248972, + "learning_rate": 2.9448783267076332e-06, + "loss": 0.3077, + "step": 43798 + }, + { + "epoch": 0.7568254077965164, + "grad_norm": 1.1834466166679607, + "learning_rate": 2.9444817153596495e-06, + "loss": 0.3294, + "step": 43799 + }, + { + "epoch": 0.7568426873099253, + "grad_norm": 1.56885470862582, + "learning_rate": 2.9440851261100846e-06, + "loss": 0.3016, + "step": 43800 + }, + { + "epoch": 0.7568599668233342, + "grad_norm": 1.043311072019048, + "learning_rate": 2.9436885589601814e-06, + "loss": 0.3914, + "step": 43801 + }, + { + "epoch": 0.7568772463367431, + "grad_norm": 1.3557272230684736, + "learning_rate": 2.9432920139111855e-06, + "loss": 0.5073, + "step": 43802 + }, + { + "epoch": 0.756894525850152, + "grad_norm": 1.282738220581798, + "learning_rate": 2.9428954909643315e-06, + "loss": 0.2764, + "step": 43803 + }, + { + "epoch": 0.756911805363561, + "grad_norm": 2.5741506653269344, + "learning_rate": 2.9424989901208688e-06, + "loss": 0.4153, + "step": 43804 + }, + { + "epoch": 0.7569290848769699, + "grad_norm": 0.8488710452686483, + "learning_rate": 2.9421025113820323e-06, + "loss": 0.2551, + "step": 43805 + }, + { + "epoch": 0.7569463643903788, + "grad_norm": 0.9238113699128273, + "learning_rate": 2.941706054749067e-06, + "loss": 0.2476, + "step": 43806 + }, + { + "epoch": 0.7569636439037877, + "grad_norm": 1.168173784336768, + "learning_rate": 2.9413096202232185e-06, + "loss": 0.1496, + "step": 43807 + }, + { + "epoch": 0.7569809234171966, + "grad_norm": 1.0445651905041424, + "learning_rate": 2.9409132078057213e-06, + "loss": 0.2243, + "step": 43808 + }, + { + "epoch": 0.7569982029306055, + "grad_norm": 1.2441906293210707, + "learning_rate": 2.9405168174978237e-06, + "loss": 0.276, + "step": 43809 + }, + { + "epoch": 0.7570154824440144, + "grad_norm": 1.2312688248262618, + "learning_rate": 2.9401204493007598e-06, + "loss": 0.369, + "step": 43810 + }, + { + "epoch": 0.7570327619574233, + "grad_norm": 1.6991908492871663, + "learning_rate": 2.9397241032157764e-06, + "loss": 0.5657, + "step": 43811 + }, + { + "epoch": 0.7570500414708322, + "grad_norm": 0.7095304715787427, + "learning_rate": 2.9393277792441132e-06, + "loss": 0.3491, + "step": 43812 + }, + { + "epoch": 0.7570673209842411, + "grad_norm": 1.3956263988589224, + "learning_rate": 2.938931477387015e-06, + "loss": 0.5019, + "step": 43813 + }, + { + "epoch": 0.75708460049765, + "grad_norm": 1.0752275610994257, + "learning_rate": 2.9385351976457165e-06, + "loss": 0.2907, + "step": 43814 + }, + { + "epoch": 0.7571018800110589, + "grad_norm": 1.0085888996307248, + "learning_rate": 2.9381389400214665e-06, + "loss": 0.3079, + "step": 43815 + }, + { + "epoch": 0.7571191595244678, + "grad_norm": 0.970553164573086, + "learning_rate": 2.9377427045155015e-06, + "loss": 0.4217, + "step": 43816 + }, + { + "epoch": 0.7571364390378766, + "grad_norm": 1.9910388279389388, + "learning_rate": 2.937346491129056e-06, + "loss": 0.2937, + "step": 43817 + }, + { + "epoch": 0.7571537185512855, + "grad_norm": 1.2158430582941, + "learning_rate": 2.936950299863386e-06, + "loss": 0.4087, + "step": 43818 + }, + { + "epoch": 0.7571709980646945, + "grad_norm": 1.8438035107484232, + "learning_rate": 2.93655413071972e-06, + "loss": 0.2936, + "step": 43819 + }, + { + "epoch": 0.7571882775781034, + "grad_norm": 1.1051215930508818, + "learning_rate": 2.936157983699307e-06, + "loss": 0.3489, + "step": 43820 + }, + { + "epoch": 0.7572055570915123, + "grad_norm": 1.2272631991349228, + "learning_rate": 2.935761858803381e-06, + "loss": 0.4186, + "step": 43821 + }, + { + "epoch": 0.7572228366049212, + "grad_norm": 1.3267146886890078, + "learning_rate": 2.935365756033188e-06, + "loss": 0.2288, + "step": 43822 + }, + { + "epoch": 0.7572401161183301, + "grad_norm": 1.0776209919745894, + "learning_rate": 2.9349696753899605e-06, + "loss": 0.2643, + "step": 43823 + }, + { + "epoch": 0.757257395631739, + "grad_norm": 1.2502691161139872, + "learning_rate": 2.934573616874952e-06, + "loss": 0.2105, + "step": 43824 + }, + { + "epoch": 0.7572746751451479, + "grad_norm": 1.449263501062813, + "learning_rate": 2.9341775804893913e-06, + "loss": 0.308, + "step": 43825 + }, + { + "epoch": 0.7572919546585568, + "grad_norm": 1.1240397297116458, + "learning_rate": 2.933781566234528e-06, + "loss": 0.2696, + "step": 43826 + }, + { + "epoch": 0.7573092341719657, + "grad_norm": 0.8669171310271191, + "learning_rate": 2.9333855741115967e-06, + "loss": 0.2593, + "step": 43827 + }, + { + "epoch": 0.7573265136853746, + "grad_norm": 1.0264891261084375, + "learning_rate": 2.9329896041218365e-06, + "loss": 0.2616, + "step": 43828 + }, + { + "epoch": 0.7573437931987835, + "grad_norm": 1.4927459264322454, + "learning_rate": 2.9325936562664892e-06, + "loss": 0.4407, + "step": 43829 + }, + { + "epoch": 0.7573610727121924, + "grad_norm": 0.888354597486635, + "learning_rate": 2.9321977305467963e-06, + "loss": 0.3516, + "step": 43830 + }, + { + "epoch": 0.7573783522256013, + "grad_norm": 0.8507280839504181, + "learning_rate": 2.9318018269640002e-06, + "loss": 0.194, + "step": 43831 + }, + { + "epoch": 0.7573956317390103, + "grad_norm": 1.7156678607537943, + "learning_rate": 2.9314059455193344e-06, + "loss": 0.1507, + "step": 43832 + }, + { + "epoch": 0.7574129112524192, + "grad_norm": 1.1391086882615158, + "learning_rate": 2.9310100862140456e-06, + "loss": 0.2955, + "step": 43833 + }, + { + "epoch": 0.7574301907658281, + "grad_norm": 1.4321849295684097, + "learning_rate": 2.930614249049367e-06, + "loss": 0.5079, + "step": 43834 + }, + { + "epoch": 0.757447470279237, + "grad_norm": 1.6117846641502505, + "learning_rate": 2.9302184340265427e-06, + "loss": 0.4781, + "step": 43835 + }, + { + "epoch": 0.7574647497926459, + "grad_norm": 0.8050860309765253, + "learning_rate": 2.9298226411468143e-06, + "loss": 0.2314, + "step": 43836 + }, + { + "epoch": 0.7574820293060548, + "grad_norm": 1.3065131150720593, + "learning_rate": 2.929426870411416e-06, + "loss": 0.3128, + "step": 43837 + }, + { + "epoch": 0.7574993088194636, + "grad_norm": 1.600497134444684, + "learning_rate": 2.9290311218215927e-06, + "loss": 0.4157, + "step": 43838 + }, + { + "epoch": 0.7575165883328725, + "grad_norm": 1.1718297354377127, + "learning_rate": 2.9286353953785784e-06, + "loss": 0.3063, + "step": 43839 + }, + { + "epoch": 0.7575338678462814, + "grad_norm": 1.2218703494223278, + "learning_rate": 2.928239691083615e-06, + "loss": 0.5284, + "step": 43840 + }, + { + "epoch": 0.7575511473596903, + "grad_norm": 0.7648944016396537, + "learning_rate": 2.9278440089379434e-06, + "loss": 0.697, + "step": 43841 + }, + { + "epoch": 0.7575684268730992, + "grad_norm": 1.3616384735735552, + "learning_rate": 2.927448348942804e-06, + "loss": 0.2027, + "step": 43842 + }, + { + "epoch": 0.7575857063865081, + "grad_norm": 1.3571863273696418, + "learning_rate": 2.9270527110994306e-06, + "loss": 0.3186, + "step": 43843 + }, + { + "epoch": 0.757602985899917, + "grad_norm": 0.9013320733195145, + "learning_rate": 2.926657095409069e-06, + "loss": 0.1674, + "step": 43844 + }, + { + "epoch": 0.7576202654133259, + "grad_norm": 1.4104502533720664, + "learning_rate": 2.9262615018729524e-06, + "loss": 0.5532, + "step": 43845 + }, + { + "epoch": 0.7576375449267349, + "grad_norm": 1.0367958433013846, + "learning_rate": 2.925865930492322e-06, + "loss": 0.3858, + "step": 43846 + }, + { + "epoch": 0.7576548244401438, + "grad_norm": 0.9534887971909004, + "learning_rate": 2.9254703812684195e-06, + "loss": 0.2384, + "step": 43847 + }, + { + "epoch": 0.7576721039535527, + "grad_norm": 1.1574142065693462, + "learning_rate": 2.9250748542024788e-06, + "loss": 0.5369, + "step": 43848 + }, + { + "epoch": 0.7576893834669616, + "grad_norm": 0.9400908856131661, + "learning_rate": 2.924679349295745e-06, + "loss": 0.3306, + "step": 43849 + }, + { + "epoch": 0.7577066629803705, + "grad_norm": 1.8316377051796742, + "learning_rate": 2.9242838665494498e-06, + "loss": 0.2961, + "step": 43850 + }, + { + "epoch": 0.7577239424937794, + "grad_norm": 1.6755983422111123, + "learning_rate": 2.923888405964835e-06, + "loss": 0.4583, + "step": 43851 + }, + { + "epoch": 0.7577412220071883, + "grad_norm": 0.9049235913478736, + "learning_rate": 2.9234929675431388e-06, + "loss": 0.2653, + "step": 43852 + }, + { + "epoch": 0.7577585015205972, + "grad_norm": 1.215691911525943, + "learning_rate": 2.9230975512856052e-06, + "loss": 0.348, + "step": 43853 + }, + { + "epoch": 0.7577757810340061, + "grad_norm": 0.9489844320163852, + "learning_rate": 2.9227021571934633e-06, + "loss": 0.2126, + "step": 43854 + }, + { + "epoch": 0.757793060547415, + "grad_norm": 1.388352467350004, + "learning_rate": 2.92230678526796e-06, + "loss": 0.3306, + "step": 43855 + }, + { + "epoch": 0.7578103400608239, + "grad_norm": 1.6370952134229915, + "learning_rate": 2.9219114355103263e-06, + "loss": 0.287, + "step": 43856 + }, + { + "epoch": 0.7578276195742328, + "grad_norm": 1.4551050710577618, + "learning_rate": 2.9215161079218048e-06, + "loss": 0.3876, + "step": 43857 + }, + { + "epoch": 0.7578448990876417, + "grad_norm": 0.9394280427061702, + "learning_rate": 2.921120802503634e-06, + "loss": 0.3109, + "step": 43858 + }, + { + "epoch": 0.7578621786010505, + "grad_norm": 0.9571520859629288, + "learning_rate": 2.9207255192570493e-06, + "loss": 0.2357, + "step": 43859 + }, + { + "epoch": 0.7578794581144594, + "grad_norm": 1.2975988487806855, + "learning_rate": 2.9203302581832925e-06, + "loss": 0.5753, + "step": 43860 + }, + { + "epoch": 0.7578967376278684, + "grad_norm": 1.2060762818790103, + "learning_rate": 2.9199350192835975e-06, + "loss": 0.3835, + "step": 43861 + }, + { + "epoch": 0.7579140171412773, + "grad_norm": 1.26829640346731, + "learning_rate": 2.919539802559206e-06, + "loss": 0.4318, + "step": 43862 + }, + { + "epoch": 0.7579312966546862, + "grad_norm": 1.3843301262899985, + "learning_rate": 2.9191446080113483e-06, + "loss": 0.5157, + "step": 43863 + }, + { + "epoch": 0.7579485761680951, + "grad_norm": 1.1900469268508713, + "learning_rate": 2.918749435641274e-06, + "loss": 0.3917, + "step": 43864 + }, + { + "epoch": 0.757965855681504, + "grad_norm": 1.3414583725131428, + "learning_rate": 2.918354285450211e-06, + "loss": 0.2383, + "step": 43865 + }, + { + "epoch": 0.7579831351949129, + "grad_norm": 2.14195558035331, + "learning_rate": 2.917959157439404e-06, + "loss": 0.3659, + "step": 43866 + }, + { + "epoch": 0.7580004147083218, + "grad_norm": 0.8803486928618571, + "learning_rate": 2.917564051610087e-06, + "loss": 0.1588, + "step": 43867 + }, + { + "epoch": 0.7580176942217307, + "grad_norm": 0.8715518343078009, + "learning_rate": 2.9171689679634953e-06, + "loss": 0.3352, + "step": 43868 + }, + { + "epoch": 0.7580349737351396, + "grad_norm": 1.581672180913603, + "learning_rate": 2.9167739065008673e-06, + "loss": 0.3292, + "step": 43869 + }, + { + "epoch": 0.7580522532485485, + "grad_norm": 1.1443288612881923, + "learning_rate": 2.916378867223443e-06, + "loss": 0.3312, + "step": 43870 + }, + { + "epoch": 0.7580695327619574, + "grad_norm": 1.60542731851728, + "learning_rate": 2.915983850132461e-06, + "loss": 0.3144, + "step": 43871 + }, + { + "epoch": 0.7580868122753663, + "grad_norm": 1.8270102816983358, + "learning_rate": 2.915588855229152e-06, + "loss": 0.2102, + "step": 43872 + }, + { + "epoch": 0.7581040917887752, + "grad_norm": 1.0808456005211267, + "learning_rate": 2.915193882514762e-06, + "loss": 0.2419, + "step": 43873 + }, + { + "epoch": 0.7581213713021842, + "grad_norm": 1.3196630633163342, + "learning_rate": 2.914798931990519e-06, + "loss": 0.3697, + "step": 43874 + }, + { + "epoch": 0.7581386508155931, + "grad_norm": 1.1968970235361338, + "learning_rate": 2.9144040036576637e-06, + "loss": 0.3932, + "step": 43875 + }, + { + "epoch": 0.758155930329002, + "grad_norm": 0.955819157736046, + "learning_rate": 2.9140090975174353e-06, + "loss": 0.3809, + "step": 43876 + }, + { + "epoch": 0.7581732098424109, + "grad_norm": 1.556414667608066, + "learning_rate": 2.9136142135710698e-06, + "loss": 0.3867, + "step": 43877 + }, + { + "epoch": 0.7581904893558198, + "grad_norm": 1.5491152518760147, + "learning_rate": 2.913219351819805e-06, + "loss": 0.21, + "step": 43878 + }, + { + "epoch": 0.7582077688692287, + "grad_norm": 1.25173460237475, + "learning_rate": 2.9128245122648713e-06, + "loss": 0.5758, + "step": 43879 + }, + { + "epoch": 0.7582250483826375, + "grad_norm": 0.9197316789447967, + "learning_rate": 2.9124296949075105e-06, + "loss": 0.2728, + "step": 43880 + }, + { + "epoch": 0.7582423278960464, + "grad_norm": 2.4132321704805086, + "learning_rate": 2.912034899748959e-06, + "loss": 0.3886, + "step": 43881 + }, + { + "epoch": 0.7582596074094553, + "grad_norm": 1.1857048483828163, + "learning_rate": 2.911640126790455e-06, + "loss": 0.4431, + "step": 43882 + }, + { + "epoch": 0.7582768869228642, + "grad_norm": 2.2234025486912063, + "learning_rate": 2.9112453760332304e-06, + "loss": 0.384, + "step": 43883 + }, + { + "epoch": 0.7582941664362731, + "grad_norm": 1.2493788710742004, + "learning_rate": 2.9108506474785268e-06, + "loss": 0.403, + "step": 43884 + }, + { + "epoch": 0.758311445949682, + "grad_norm": 0.946983402627961, + "learning_rate": 2.9104559411275747e-06, + "loss": 0.3148, + "step": 43885 + }, + { + "epoch": 0.7583287254630909, + "grad_norm": 0.9728631085198389, + "learning_rate": 2.9100612569816133e-06, + "loss": 0.3672, + "step": 43886 + }, + { + "epoch": 0.7583460049764998, + "grad_norm": 0.649656766987666, + "learning_rate": 2.909666595041879e-06, + "loss": 0.6754, + "step": 43887 + }, + { + "epoch": 0.7583632844899088, + "grad_norm": 0.7415352124710737, + "learning_rate": 2.9092719553096103e-06, + "loss": 0.3826, + "step": 43888 + }, + { + "epoch": 0.7583805640033177, + "grad_norm": 0.501922317164145, + "learning_rate": 2.908877337786041e-06, + "loss": 0.5861, + "step": 43889 + }, + { + "epoch": 0.7583978435167266, + "grad_norm": 1.141635885665247, + "learning_rate": 2.908482742472404e-06, + "loss": 0.3525, + "step": 43890 + }, + { + "epoch": 0.7584151230301355, + "grad_norm": 1.3664974760692927, + "learning_rate": 2.908088169369938e-06, + "loss": 0.2758, + "step": 43891 + }, + { + "epoch": 0.7584324025435444, + "grad_norm": 0.9598730595115997, + "learning_rate": 2.907693618479879e-06, + "loss": 0.2612, + "step": 43892 + }, + { + "epoch": 0.7584496820569533, + "grad_norm": 1.5735281151559053, + "learning_rate": 2.9072990898034646e-06, + "loss": 0.285, + "step": 43893 + }, + { + "epoch": 0.7584669615703622, + "grad_norm": 1.3111104745780038, + "learning_rate": 2.9069045833419263e-06, + "loss": 0.3491, + "step": 43894 + }, + { + "epoch": 0.7584842410837711, + "grad_norm": 1.4838377577203763, + "learning_rate": 2.9065100990965058e-06, + "loss": 0.39, + "step": 43895 + }, + { + "epoch": 0.75850152059718, + "grad_norm": 1.1059589835597865, + "learning_rate": 2.9061156370684295e-06, + "loss": 0.2516, + "step": 43896 + }, + { + "epoch": 0.7585188001105889, + "grad_norm": 1.0356401706969571, + "learning_rate": 2.9057211972589394e-06, + "loss": 0.3692, + "step": 43897 + }, + { + "epoch": 0.7585360796239978, + "grad_norm": 0.7549656870201565, + "learning_rate": 2.905326779669273e-06, + "loss": 0.711, + "step": 43898 + }, + { + "epoch": 0.7585533591374067, + "grad_norm": 1.9111378579695517, + "learning_rate": 2.9049323843006595e-06, + "loss": 0.5167, + "step": 43899 + }, + { + "epoch": 0.7585706386508156, + "grad_norm": 1.4081360538132968, + "learning_rate": 2.9045380111543385e-06, + "loss": 0.222, + "step": 43900 + }, + { + "epoch": 0.7585879181642246, + "grad_norm": 1.177810394060481, + "learning_rate": 2.9041436602315422e-06, + "loss": 0.285, + "step": 43901 + }, + { + "epoch": 0.7586051976776333, + "grad_norm": 1.025977475913141, + "learning_rate": 2.90374933153351e-06, + "loss": 0.3096, + "step": 43902 + }, + { + "epoch": 0.7586224771910423, + "grad_norm": 1.2492980527907178, + "learning_rate": 2.9033550250614707e-06, + "loss": 0.3593, + "step": 43903 + }, + { + "epoch": 0.7586397567044512, + "grad_norm": 1.5019225127445326, + "learning_rate": 2.902960740816663e-06, + "loss": 0.3531, + "step": 43904 + }, + { + "epoch": 0.7586570362178601, + "grad_norm": 1.3539649236357556, + "learning_rate": 2.9025664788003205e-06, + "loss": 0.2723, + "step": 43905 + }, + { + "epoch": 0.758674315731269, + "grad_norm": 1.1490906091806503, + "learning_rate": 2.9021722390136844e-06, + "loss": 0.4346, + "step": 43906 + }, + { + "epoch": 0.7586915952446779, + "grad_norm": 1.0259251257976445, + "learning_rate": 2.9017780214579794e-06, + "loss": 0.2447, + "step": 43907 + }, + { + "epoch": 0.7587088747580868, + "grad_norm": 2.005301843912943, + "learning_rate": 2.9013838261344485e-06, + "loss": 0.4788, + "step": 43908 + }, + { + "epoch": 0.7587261542714957, + "grad_norm": 1.2833055347363274, + "learning_rate": 2.9009896530443205e-06, + "loss": 0.8257, + "step": 43909 + }, + { + "epoch": 0.7587434337849046, + "grad_norm": 1.167056291730087, + "learning_rate": 2.9005955021888322e-06, + "loss": 0.4757, + "step": 43910 + }, + { + "epoch": 0.7587607132983135, + "grad_norm": 1.2471666418991845, + "learning_rate": 2.9002013735692214e-06, + "loss": 0.2804, + "step": 43911 + }, + { + "epoch": 0.7587779928117224, + "grad_norm": 1.2935156273462527, + "learning_rate": 2.899807267186716e-06, + "loss": 0.4951, + "step": 43912 + }, + { + "epoch": 0.7587952723251313, + "grad_norm": 0.7784619249259916, + "learning_rate": 2.8994131830425564e-06, + "loss": 0.7857, + "step": 43913 + }, + { + "epoch": 0.7588125518385402, + "grad_norm": 1.53066090392982, + "learning_rate": 2.8990191211379727e-06, + "loss": 0.4701, + "step": 43914 + }, + { + "epoch": 0.7588298313519491, + "grad_norm": 1.2953168426902204, + "learning_rate": 2.8986250814742003e-06, + "loss": 0.4547, + "step": 43915 + }, + { + "epoch": 0.758847110865358, + "grad_norm": 1.5886136179003334, + "learning_rate": 2.8982310640524738e-06, + "loss": 0.255, + "step": 43916 + }, + { + "epoch": 0.758864390378767, + "grad_norm": 1.3274219715731692, + "learning_rate": 2.89783706887403e-06, + "loss": 0.296, + "step": 43917 + }, + { + "epoch": 0.7588816698921759, + "grad_norm": 1.1740037112894244, + "learning_rate": 2.8974430959400966e-06, + "loss": 0.2365, + "step": 43918 + }, + { + "epoch": 0.7588989494055848, + "grad_norm": 1.583348738364756, + "learning_rate": 2.8970491452519155e-06, + "loss": 0.1674, + "step": 43919 + }, + { + "epoch": 0.7589162289189937, + "grad_norm": 2.8626188203051526, + "learning_rate": 2.8966552168107133e-06, + "loss": 0.3319, + "step": 43920 + }, + { + "epoch": 0.7589335084324026, + "grad_norm": 0.858437485682441, + "learning_rate": 2.896261310617725e-06, + "loss": 0.4594, + "step": 43921 + }, + { + "epoch": 0.7589507879458115, + "grad_norm": 1.4659204426601928, + "learning_rate": 2.8958674266741915e-06, + "loss": 0.2192, + "step": 43922 + }, + { + "epoch": 0.7589680674592203, + "grad_norm": 1.0162921881637186, + "learning_rate": 2.895473564981337e-06, + "loss": 0.7231, + "step": 43923 + }, + { + "epoch": 0.7589853469726292, + "grad_norm": 1.3701438354274826, + "learning_rate": 2.8950797255404016e-06, + "loss": 0.5667, + "step": 43924 + }, + { + "epoch": 0.7590026264860381, + "grad_norm": 1.0248669385911957, + "learning_rate": 2.8946859083526144e-06, + "loss": 0.4741, + "step": 43925 + }, + { + "epoch": 0.759019905999447, + "grad_norm": 0.5647641525280734, + "learning_rate": 2.89429211341921e-06, + "loss": 0.5785, + "step": 43926 + }, + { + "epoch": 0.7590371855128559, + "grad_norm": 1.205823303753473, + "learning_rate": 2.8938983407414235e-06, + "loss": 0.3835, + "step": 43927 + }, + { + "epoch": 0.7590544650262648, + "grad_norm": 1.2526752037704092, + "learning_rate": 2.8935045903204904e-06, + "loss": 0.3085, + "step": 43928 + }, + { + "epoch": 0.7590717445396737, + "grad_norm": 0.9334301096053815, + "learning_rate": 2.893110862157641e-06, + "loss": 0.3527, + "step": 43929 + }, + { + "epoch": 0.7590890240530827, + "grad_norm": 0.5745456520006625, + "learning_rate": 2.8927171562541055e-06, + "loss": 0.5894, + "step": 43930 + }, + { + "epoch": 0.7591063035664916, + "grad_norm": 1.076785973312787, + "learning_rate": 2.8923234726111193e-06, + "loss": 0.5513, + "step": 43931 + }, + { + "epoch": 0.7591235830799005, + "grad_norm": 1.1768686371738462, + "learning_rate": 2.8919298112299165e-06, + "loss": 0.3593, + "step": 43932 + }, + { + "epoch": 0.7591408625933094, + "grad_norm": 1.4389901773355736, + "learning_rate": 2.891536172111733e-06, + "loss": 0.3123, + "step": 43933 + }, + { + "epoch": 0.7591581421067183, + "grad_norm": 1.2385346542524183, + "learning_rate": 2.8911425552577954e-06, + "loss": 0.3913, + "step": 43934 + }, + { + "epoch": 0.7591754216201272, + "grad_norm": 1.3192273434579926, + "learning_rate": 2.8907489606693426e-06, + "loss": 0.3051, + "step": 43935 + }, + { + "epoch": 0.7591927011335361, + "grad_norm": 1.5004496983179498, + "learning_rate": 2.8903553883476023e-06, + "loss": 0.4132, + "step": 43936 + }, + { + "epoch": 0.759209980646945, + "grad_norm": 0.8444314511402211, + "learning_rate": 2.889961838293808e-06, + "loss": 0.282, + "step": 43937 + }, + { + "epoch": 0.7592272601603539, + "grad_norm": 1.3328689575072197, + "learning_rate": 2.8895683105091933e-06, + "loss": 0.3195, + "step": 43938 + }, + { + "epoch": 0.7592445396737628, + "grad_norm": 2.8492362772082607, + "learning_rate": 2.8891748049949942e-06, + "loss": 0.3588, + "step": 43939 + }, + { + "epoch": 0.7592618191871717, + "grad_norm": 1.4123455210851705, + "learning_rate": 2.8887813217524406e-06, + "loss": 0.3893, + "step": 43940 + }, + { + "epoch": 0.7592790987005806, + "grad_norm": 0.8176589313537707, + "learning_rate": 2.888387860782761e-06, + "loss": 0.2184, + "step": 43941 + }, + { + "epoch": 0.7592963782139895, + "grad_norm": 1.5007397629077437, + "learning_rate": 2.887994422087195e-06, + "loss": 0.4558, + "step": 43942 + }, + { + "epoch": 0.7593136577273984, + "grad_norm": 1.056066679963791, + "learning_rate": 2.8876010056669667e-06, + "loss": 0.4363, + "step": 43943 + }, + { + "epoch": 0.7593309372408072, + "grad_norm": 1.3402020276880604, + "learning_rate": 2.887207611523313e-06, + "loss": 0.6162, + "step": 43944 + }, + { + "epoch": 0.7593482167542162, + "grad_norm": 1.2389675502120183, + "learning_rate": 2.886814239657465e-06, + "loss": 0.2177, + "step": 43945 + }, + { + "epoch": 0.7593654962676251, + "grad_norm": 1.4188669012328703, + "learning_rate": 2.8864208900706593e-06, + "loss": 0.3404, + "step": 43946 + }, + { + "epoch": 0.759382775781034, + "grad_norm": 1.4233699455542972, + "learning_rate": 2.88602756276412e-06, + "loss": 0.5346, + "step": 43947 + }, + { + "epoch": 0.7594000552944429, + "grad_norm": 1.3546516148757075, + "learning_rate": 2.885634257739087e-06, + "loss": 0.269, + "step": 43948 + }, + { + "epoch": 0.7594173348078518, + "grad_norm": 1.2187312503088852, + "learning_rate": 2.8852409749967847e-06, + "loss": 0.4818, + "step": 43949 + }, + { + "epoch": 0.7594346143212607, + "grad_norm": 1.2134493621667501, + "learning_rate": 2.884847714538449e-06, + "loss": 0.4698, + "step": 43950 + }, + { + "epoch": 0.7594518938346696, + "grad_norm": 0.6330077373922868, + "learning_rate": 2.8844544763653127e-06, + "loss": 0.9055, + "step": 43951 + }, + { + "epoch": 0.7594691733480785, + "grad_norm": 1.0012694436087899, + "learning_rate": 2.8840612604786047e-06, + "loss": 0.4697, + "step": 43952 + }, + { + "epoch": 0.7594864528614874, + "grad_norm": 1.344678990165797, + "learning_rate": 2.883668066879559e-06, + "loss": 0.2466, + "step": 43953 + }, + { + "epoch": 0.7595037323748963, + "grad_norm": 0.9357907929081369, + "learning_rate": 2.8832748955694036e-06, + "loss": 0.3607, + "step": 43954 + }, + { + "epoch": 0.7595210118883052, + "grad_norm": 1.209553971367746, + "learning_rate": 2.8828817465493708e-06, + "loss": 0.316, + "step": 43955 + }, + { + "epoch": 0.7595382914017141, + "grad_norm": 0.8308184804204564, + "learning_rate": 2.8824886198206947e-06, + "loss": 0.3194, + "step": 43956 + }, + { + "epoch": 0.759555570915123, + "grad_norm": 1.278578083952878, + "learning_rate": 2.8820955153846074e-06, + "loss": 0.3618, + "step": 43957 + }, + { + "epoch": 0.759572850428532, + "grad_norm": 1.7767947847037848, + "learning_rate": 2.881702433242335e-06, + "loss": 0.4199, + "step": 43958 + }, + { + "epoch": 0.7595901299419409, + "grad_norm": 1.0619287476778576, + "learning_rate": 2.881309373395115e-06, + "loss": 0.5596, + "step": 43959 + }, + { + "epoch": 0.7596074094553498, + "grad_norm": 1.1644946502754747, + "learning_rate": 2.8809163358441707e-06, + "loss": 0.2627, + "step": 43960 + }, + { + "epoch": 0.7596246889687587, + "grad_norm": 0.9743886116126551, + "learning_rate": 2.880523320590739e-06, + "loss": 0.3564, + "step": 43961 + }, + { + "epoch": 0.7596419684821676, + "grad_norm": 1.135523733059349, + "learning_rate": 2.8801303276360516e-06, + "loss": 0.571, + "step": 43962 + }, + { + "epoch": 0.7596592479955765, + "grad_norm": 1.1629755250625349, + "learning_rate": 2.8797373569813335e-06, + "loss": 0.2751, + "step": 43963 + }, + { + "epoch": 0.7596765275089854, + "grad_norm": 1.5035957603909917, + "learning_rate": 2.879344408627823e-06, + "loss": 0.4105, + "step": 43964 + }, + { + "epoch": 0.7596938070223942, + "grad_norm": 1.1464875002820003, + "learning_rate": 2.8789514825767437e-06, + "loss": 0.3589, + "step": 43965 + }, + { + "epoch": 0.7597110865358031, + "grad_norm": 1.430433038541347, + "learning_rate": 2.87855857882933e-06, + "loss": 0.3561, + "step": 43966 + }, + { + "epoch": 0.759728366049212, + "grad_norm": 1.8628100290523235, + "learning_rate": 2.878165697386812e-06, + "loss": 0.4031, + "step": 43967 + }, + { + "epoch": 0.7597456455626209, + "grad_norm": 1.7432851448179092, + "learning_rate": 2.8777728382504233e-06, + "loss": 0.3466, + "step": 43968 + }, + { + "epoch": 0.7597629250760298, + "grad_norm": 1.1250793872294826, + "learning_rate": 2.8773800014213883e-06, + "loss": 0.718, + "step": 43969 + }, + { + "epoch": 0.7597802045894387, + "grad_norm": 1.1238482415208868, + "learning_rate": 2.8769871869009424e-06, + "loss": 0.3325, + "step": 43970 + }, + { + "epoch": 0.7597974841028476, + "grad_norm": 1.3352729401260623, + "learning_rate": 2.876594394690312e-06, + "loss": 0.5331, + "step": 43971 + }, + { + "epoch": 0.7598147636162565, + "grad_norm": 1.8183045703003642, + "learning_rate": 2.8762016247907286e-06, + "loss": 0.2715, + "step": 43972 + }, + { + "epoch": 0.7598320431296655, + "grad_norm": 0.998179433926309, + "learning_rate": 2.8758088772034264e-06, + "loss": 0.3073, + "step": 43973 + }, + { + "epoch": 0.7598493226430744, + "grad_norm": 2.1526222586567023, + "learning_rate": 2.87541615192963e-06, + "loss": 0.4776, + "step": 43974 + }, + { + "epoch": 0.7598666021564833, + "grad_norm": 1.0587082660423712, + "learning_rate": 2.8750234489705743e-06, + "loss": 0.6043, + "step": 43975 + }, + { + "epoch": 0.7598838816698922, + "grad_norm": 0.5950083810603798, + "learning_rate": 2.874630768327483e-06, + "loss": 0.8868, + "step": 43976 + }, + { + "epoch": 0.7599011611833011, + "grad_norm": 0.6532195247099664, + "learning_rate": 2.8742381100015894e-06, + "loss": 0.3572, + "step": 43977 + }, + { + "epoch": 0.75991844069671, + "grad_norm": 0.4369367104976236, + "learning_rate": 2.8738454739941245e-06, + "loss": 0.4356, + "step": 43978 + }, + { + "epoch": 0.7599357202101189, + "grad_norm": 0.9495807709583933, + "learning_rate": 2.8734528603063195e-06, + "loss": 0.3538, + "step": 43979 + }, + { + "epoch": 0.7599529997235278, + "grad_norm": 1.5125386976683761, + "learning_rate": 2.8730602689394027e-06, + "loss": 0.3308, + "step": 43980 + }, + { + "epoch": 0.7599702792369367, + "grad_norm": 1.822784801798548, + "learning_rate": 2.8726676998945992e-06, + "loss": 0.3001, + "step": 43981 + }, + { + "epoch": 0.7599875587503456, + "grad_norm": 1.0210601275792606, + "learning_rate": 2.8722751531731454e-06, + "loss": 0.5641, + "step": 43982 + }, + { + "epoch": 0.7600048382637545, + "grad_norm": 2.139185512194176, + "learning_rate": 2.8718826287762634e-06, + "loss": 0.4149, + "step": 43983 + }, + { + "epoch": 0.7600221177771634, + "grad_norm": 1.179901540025675, + "learning_rate": 2.8714901267051875e-06, + "loss": 0.3519, + "step": 43984 + }, + { + "epoch": 0.7600393972905723, + "grad_norm": 3.1101468900022926, + "learning_rate": 2.871097646961146e-06, + "loss": 0.1567, + "step": 43985 + }, + { + "epoch": 0.7600566768039811, + "grad_norm": 1.0204267865892787, + "learning_rate": 2.870705189545372e-06, + "loss": 0.3569, + "step": 43986 + }, + { + "epoch": 0.76007395631739, + "grad_norm": 1.402818312922618, + "learning_rate": 2.8703127544590882e-06, + "loss": 0.2198, + "step": 43987 + }, + { + "epoch": 0.760091235830799, + "grad_norm": 1.012362758901793, + "learning_rate": 2.8699203417035293e-06, + "loss": 0.3465, + "step": 43988 + }, + { + "epoch": 0.7601085153442079, + "grad_norm": 1.5542033925625878, + "learning_rate": 2.8695279512799178e-06, + "loss": 0.2159, + "step": 43989 + }, + { + "epoch": 0.7601257948576168, + "grad_norm": 1.7722872109548573, + "learning_rate": 2.8691355831894887e-06, + "loss": 0.3602, + "step": 43990 + }, + { + "epoch": 0.7601430743710257, + "grad_norm": 2.1580534682316084, + "learning_rate": 2.86874323743347e-06, + "loss": 0.2764, + "step": 43991 + }, + { + "epoch": 0.7601603538844346, + "grad_norm": 1.6398658609858139, + "learning_rate": 2.868350914013087e-06, + "loss": 0.423, + "step": 43992 + }, + { + "epoch": 0.7601776333978435, + "grad_norm": 0.8356363529142409, + "learning_rate": 2.8679586129295743e-06, + "loss": 0.2024, + "step": 43993 + }, + { + "epoch": 0.7601949129112524, + "grad_norm": 1.0536920035870414, + "learning_rate": 2.867566334184154e-06, + "loss": 0.3696, + "step": 43994 + }, + { + "epoch": 0.7602121924246613, + "grad_norm": 1.5470223446946543, + "learning_rate": 2.8671740777780588e-06, + "loss": 0.3182, + "step": 43995 + }, + { + "epoch": 0.7602294719380702, + "grad_norm": 1.2655061411280188, + "learning_rate": 2.866781843712515e-06, + "loss": 0.3483, + "step": 43996 + }, + { + "epoch": 0.7602467514514791, + "grad_norm": 0.9874740734372266, + "learning_rate": 2.8663896319887576e-06, + "loss": 0.3449, + "step": 43997 + }, + { + "epoch": 0.760264030964888, + "grad_norm": 1.4954492658521292, + "learning_rate": 2.865997442608005e-06, + "loss": 0.3918, + "step": 43998 + }, + { + "epoch": 0.760281310478297, + "grad_norm": 1.5217317817928473, + "learning_rate": 2.8656052755714947e-06, + "loss": 0.2655, + "step": 43999 + }, + { + "epoch": 0.7602985899917059, + "grad_norm": 1.234953437629923, + "learning_rate": 2.8652131308804476e-06, + "loss": 0.3379, + "step": 44000 + }, + { + "epoch": 0.7603158695051148, + "grad_norm": 1.2052240132062317, + "learning_rate": 2.864821008536095e-06, + "loss": 0.5351, + "step": 44001 + }, + { + "epoch": 0.7603331490185237, + "grad_norm": 1.2231669313492646, + "learning_rate": 2.8644289085396683e-06, + "loss": 0.1628, + "step": 44002 + }, + { + "epoch": 0.7603504285319326, + "grad_norm": 1.6252802215657212, + "learning_rate": 2.8640368308923894e-06, + "loss": 0.4159, + "step": 44003 + }, + { + "epoch": 0.7603677080453415, + "grad_norm": 1.7913150630107635, + "learning_rate": 2.863644775595492e-06, + "loss": 0.3479, + "step": 44004 + }, + { + "epoch": 0.7603849875587504, + "grad_norm": 1.187433971081979, + "learning_rate": 2.8632527426502e-06, + "loss": 0.4149, + "step": 44005 + }, + { + "epoch": 0.7604022670721593, + "grad_norm": 1.452404081172347, + "learning_rate": 2.862860732057742e-06, + "loss": 0.3575, + "step": 44006 + }, + { + "epoch": 0.7604195465855681, + "grad_norm": 1.2917436743595645, + "learning_rate": 2.8624687438193455e-06, + "loss": 0.3154, + "step": 44007 + }, + { + "epoch": 0.760436826098977, + "grad_norm": 1.0383197128805706, + "learning_rate": 2.8620767779362433e-06, + "loss": 0.5533, + "step": 44008 + }, + { + "epoch": 0.7604541056123859, + "grad_norm": 1.2808062339177226, + "learning_rate": 2.8616848344096572e-06, + "loss": 0.5615, + "step": 44009 + }, + { + "epoch": 0.7604713851257948, + "grad_norm": 1.3759615916954482, + "learning_rate": 2.8612929132408183e-06, + "loss": 0.3496, + "step": 44010 + }, + { + "epoch": 0.7604886646392037, + "grad_norm": 1.2647305631423893, + "learning_rate": 2.86090101443095e-06, + "loss": 0.4629, + "step": 44011 + }, + { + "epoch": 0.7605059441526126, + "grad_norm": 1.2571151409567478, + "learning_rate": 2.860509137981282e-06, + "loss": 0.3358, + "step": 44012 + }, + { + "epoch": 0.7605232236660215, + "grad_norm": 1.00270783595517, + "learning_rate": 2.8601172838930447e-06, + "loss": 0.2758, + "step": 44013 + }, + { + "epoch": 0.7605405031794304, + "grad_norm": 1.1746530438596816, + "learning_rate": 2.85972545216746e-06, + "loss": 0.3938, + "step": 44014 + }, + { + "epoch": 0.7605577826928394, + "grad_norm": 0.9698253450444382, + "learning_rate": 2.859333642805762e-06, + "loss": 0.6144, + "step": 44015 + }, + { + "epoch": 0.7605750622062483, + "grad_norm": 1.8308735694101974, + "learning_rate": 2.8589418558091686e-06, + "loss": 0.3131, + "step": 44016 + }, + { + "epoch": 0.7605923417196572, + "grad_norm": 1.0105118980249561, + "learning_rate": 2.858550091178913e-06, + "loss": 0.5061, + "step": 44017 + }, + { + "epoch": 0.7606096212330661, + "grad_norm": 1.2094760808468907, + "learning_rate": 2.8581583489162213e-06, + "loss": 0.3553, + "step": 44018 + }, + { + "epoch": 0.760626900746475, + "grad_norm": 1.1528718050531697, + "learning_rate": 2.857766629022324e-06, + "loss": 0.3605, + "step": 44019 + }, + { + "epoch": 0.7606441802598839, + "grad_norm": 1.7082169504147942, + "learning_rate": 2.85737493149844e-06, + "loss": 0.3061, + "step": 44020 + }, + { + "epoch": 0.7606614597732928, + "grad_norm": 1.0609944220508496, + "learning_rate": 2.8569832563458043e-06, + "loss": 0.3727, + "step": 44021 + }, + { + "epoch": 0.7606787392867017, + "grad_norm": 1.047113326158722, + "learning_rate": 2.8565916035656406e-06, + "loss": 0.2801, + "step": 44022 + }, + { + "epoch": 0.7606960188001106, + "grad_norm": 1.5041819834725279, + "learning_rate": 2.8561999731591707e-06, + "loss": 0.2868, + "step": 44023 + }, + { + "epoch": 0.7607132983135195, + "grad_norm": 1.315688933071053, + "learning_rate": 2.855808365127626e-06, + "loss": 0.507, + "step": 44024 + }, + { + "epoch": 0.7607305778269284, + "grad_norm": 1.0514284208532736, + "learning_rate": 2.855416779472232e-06, + "loss": 0.4389, + "step": 44025 + }, + { + "epoch": 0.7607478573403373, + "grad_norm": 0.914613538419729, + "learning_rate": 2.8550252161942193e-06, + "loss": 0.3385, + "step": 44026 + }, + { + "epoch": 0.7607651368537462, + "grad_norm": 0.6523446291249122, + "learning_rate": 2.854633675294807e-06, + "loss": 0.5681, + "step": 44027 + }, + { + "epoch": 0.760782416367155, + "grad_norm": 0.8352870833777875, + "learning_rate": 2.854242156775229e-06, + "loss": 0.2873, + "step": 44028 + }, + { + "epoch": 0.760799695880564, + "grad_norm": 0.9698687971870152, + "learning_rate": 2.8538506606367033e-06, + "loss": 0.4906, + "step": 44029 + }, + { + "epoch": 0.7608169753939729, + "grad_norm": 1.006794255838369, + "learning_rate": 2.8534591868804605e-06, + "loss": 0.3195, + "step": 44030 + }, + { + "epoch": 0.7608342549073818, + "grad_norm": 1.4587345582370086, + "learning_rate": 2.8530677355077274e-06, + "loss": 0.4107, + "step": 44031 + }, + { + "epoch": 0.7608515344207907, + "grad_norm": 1.6803878301697588, + "learning_rate": 2.8526763065197327e-06, + "loss": 0.2836, + "step": 44032 + }, + { + "epoch": 0.7608688139341996, + "grad_norm": 1.0278922220465436, + "learning_rate": 2.8522848999176977e-06, + "loss": 0.3326, + "step": 44033 + }, + { + "epoch": 0.7608860934476085, + "grad_norm": 1.1112285619123534, + "learning_rate": 2.8518935157028472e-06, + "loss": 0.3598, + "step": 44034 + }, + { + "epoch": 0.7609033729610174, + "grad_norm": 1.5050462874134887, + "learning_rate": 2.85150215387641e-06, + "loss": 0.3301, + "step": 44035 + }, + { + "epoch": 0.7609206524744263, + "grad_norm": 0.7611433392242868, + "learning_rate": 2.8511108144396103e-06, + "loss": 0.1866, + "step": 44036 + }, + { + "epoch": 0.7609379319878352, + "grad_norm": 1.943467546384031, + "learning_rate": 2.8507194973936793e-06, + "loss": 0.2718, + "step": 44037 + }, + { + "epoch": 0.7609552115012441, + "grad_norm": 1.1810760237979416, + "learning_rate": 2.850328202739834e-06, + "loss": 0.3875, + "step": 44038 + }, + { + "epoch": 0.760972491014653, + "grad_norm": 1.4985285596715647, + "learning_rate": 2.8499369304793067e-06, + "loss": 0.4571, + "step": 44039 + }, + { + "epoch": 0.7609897705280619, + "grad_norm": 1.0128962916649968, + "learning_rate": 2.849545680613318e-06, + "loss": 0.3415, + "step": 44040 + }, + { + "epoch": 0.7610070500414708, + "grad_norm": 1.9626677093534857, + "learning_rate": 2.849154453143096e-06, + "loss": 0.1944, + "step": 44041 + }, + { + "epoch": 0.7610243295548798, + "grad_norm": 1.2203530427466698, + "learning_rate": 2.8487632480698678e-06, + "loss": 0.309, + "step": 44042 + }, + { + "epoch": 0.7610416090682887, + "grad_norm": 2.515374067206398, + "learning_rate": 2.8483720653948545e-06, + "loss": 0.2084, + "step": 44043 + }, + { + "epoch": 0.7610588885816976, + "grad_norm": 2.009630595021256, + "learning_rate": 2.8479809051192853e-06, + "loss": 0.5526, + "step": 44044 + }, + { + "epoch": 0.7610761680951065, + "grad_norm": 1.8772370923231296, + "learning_rate": 2.847589767244382e-06, + "loss": 0.6052, + "step": 44045 + }, + { + "epoch": 0.7610934476085154, + "grad_norm": 0.7019238875818171, + "learning_rate": 2.84719865177137e-06, + "loss": 0.3757, + "step": 44046 + }, + { + "epoch": 0.7611107271219243, + "grad_norm": 2.2505095751019932, + "learning_rate": 2.8468075587014755e-06, + "loss": 0.2994, + "step": 44047 + }, + { + "epoch": 0.7611280066353332, + "grad_norm": 1.4356045862578888, + "learning_rate": 2.846416488035927e-06, + "loss": 0.3493, + "step": 44048 + }, + { + "epoch": 0.7611452861487421, + "grad_norm": 1.9148992833366343, + "learning_rate": 2.846025439775941e-06, + "loss": 0.4065, + "step": 44049 + }, + { + "epoch": 0.7611625656621509, + "grad_norm": 1.5478565328950702, + "learning_rate": 2.845634413922752e-06, + "loss": 0.3968, + "step": 44050 + }, + { + "epoch": 0.7611798451755598, + "grad_norm": 1.4425257279066943, + "learning_rate": 2.8452434104775762e-06, + "loss": 0.3947, + "step": 44051 + }, + { + "epoch": 0.7611971246889687, + "grad_norm": 1.0912620172645322, + "learning_rate": 2.8448524294416415e-06, + "loss": 0.357, + "step": 44052 + }, + { + "epoch": 0.7612144042023776, + "grad_norm": 0.89919813960589, + "learning_rate": 2.8444614708161754e-06, + "loss": 0.3576, + "step": 44053 + }, + { + "epoch": 0.7612316837157865, + "grad_norm": 1.233346806837011, + "learning_rate": 2.8440705346023968e-06, + "loss": 0.3661, + "step": 44054 + }, + { + "epoch": 0.7612489632291954, + "grad_norm": 1.2946166236465249, + "learning_rate": 2.8436796208015373e-06, + "loss": 0.2759, + "step": 44055 + }, + { + "epoch": 0.7612662427426043, + "grad_norm": 2.095492000332019, + "learning_rate": 2.8432887294148124e-06, + "loss": 0.3576, + "step": 44056 + }, + { + "epoch": 0.7612835222560133, + "grad_norm": 1.79542094278875, + "learning_rate": 2.842897860443452e-06, + "loss": 0.1624, + "step": 44057 + }, + { + "epoch": 0.7613008017694222, + "grad_norm": 1.22480805042808, + "learning_rate": 2.842507013888679e-06, + "loss": 0.3673, + "step": 44058 + }, + { + "epoch": 0.7613180812828311, + "grad_norm": 0.7952521008695964, + "learning_rate": 2.8421161897517214e-06, + "loss": 0.2234, + "step": 44059 + }, + { + "epoch": 0.76133536079624, + "grad_norm": 0.7813048800549696, + "learning_rate": 2.841725388033797e-06, + "loss": 0.3935, + "step": 44060 + }, + { + "epoch": 0.7613526403096489, + "grad_norm": 1.0835834144695196, + "learning_rate": 2.841334608736136e-06, + "loss": 0.4944, + "step": 44061 + }, + { + "epoch": 0.7613699198230578, + "grad_norm": 1.1020551346074545, + "learning_rate": 2.8409438518599554e-06, + "loss": 0.2582, + "step": 44062 + }, + { + "epoch": 0.7613871993364667, + "grad_norm": 1.3059098075465378, + "learning_rate": 2.840553117406486e-06, + "loss": 0.3599, + "step": 44063 + }, + { + "epoch": 0.7614044788498756, + "grad_norm": 0.9691359942931671, + "learning_rate": 2.840162405376945e-06, + "loss": 0.3353, + "step": 44064 + }, + { + "epoch": 0.7614217583632845, + "grad_norm": 1.2012679280253862, + "learning_rate": 2.83977171577256e-06, + "loss": 0.3563, + "step": 44065 + }, + { + "epoch": 0.7614390378766934, + "grad_norm": 0.951061672937694, + "learning_rate": 2.8393810485945563e-06, + "loss": 0.3432, + "step": 44066 + }, + { + "epoch": 0.7614563173901023, + "grad_norm": 1.3123880569621593, + "learning_rate": 2.838990403844153e-06, + "loss": 0.2771, + "step": 44067 + }, + { + "epoch": 0.7614735969035112, + "grad_norm": 1.2236543101538024, + "learning_rate": 2.8385997815225784e-06, + "loss": 0.3381, + "step": 44068 + }, + { + "epoch": 0.7614908764169201, + "grad_norm": 1.163211401171938, + "learning_rate": 2.8382091816310518e-06, + "loss": 0.4244, + "step": 44069 + }, + { + "epoch": 0.7615081559303291, + "grad_norm": 1.2060781810927494, + "learning_rate": 2.8378186041707977e-06, + "loss": 0.3928, + "step": 44070 + }, + { + "epoch": 0.7615254354437379, + "grad_norm": 0.9416331019712542, + "learning_rate": 2.8374280491430394e-06, + "loss": 0.5164, + "step": 44071 + }, + { + "epoch": 0.7615427149571468, + "grad_norm": 0.9145965987866204, + "learning_rate": 2.837037516549004e-06, + "loss": 0.4453, + "step": 44072 + }, + { + "epoch": 0.7615599944705557, + "grad_norm": 0.5209574990708113, + "learning_rate": 2.8366470063899123e-06, + "loss": 0.566, + "step": 44073 + }, + { + "epoch": 0.7615772739839646, + "grad_norm": 1.5464127004386317, + "learning_rate": 2.836256518666982e-06, + "loss": 0.2732, + "step": 44074 + }, + { + "epoch": 0.7615945534973735, + "grad_norm": 0.9451199678511885, + "learning_rate": 2.835866053381442e-06, + "loss": 0.2706, + "step": 44075 + }, + { + "epoch": 0.7616118330107824, + "grad_norm": 1.0379409179308048, + "learning_rate": 2.835475610534514e-06, + "loss": 0.2726, + "step": 44076 + }, + { + "epoch": 0.7616291125241913, + "grad_norm": 1.7557997471699966, + "learning_rate": 2.8350851901274234e-06, + "loss": 0.3677, + "step": 44077 + }, + { + "epoch": 0.7616463920376002, + "grad_norm": 1.0326736258481302, + "learning_rate": 2.8346947921613876e-06, + "loss": 0.4396, + "step": 44078 + }, + { + "epoch": 0.7616636715510091, + "grad_norm": 0.5538054071820747, + "learning_rate": 2.8343044166376354e-06, + "loss": 0.5465, + "step": 44079 + }, + { + "epoch": 0.761680951064418, + "grad_norm": 0.5603309059005032, + "learning_rate": 2.8339140635573836e-06, + "loss": 0.5803, + "step": 44080 + }, + { + "epoch": 0.7616982305778269, + "grad_norm": 1.4775921633948734, + "learning_rate": 2.8335237329218566e-06, + "loss": 0.3924, + "step": 44081 + }, + { + "epoch": 0.7617155100912358, + "grad_norm": 1.28023169529497, + "learning_rate": 2.8331334247322783e-06, + "loss": 0.3481, + "step": 44082 + }, + { + "epoch": 0.7617327896046447, + "grad_norm": 1.819148281127274, + "learning_rate": 2.832743138989874e-06, + "loss": 0.3255, + "step": 44083 + }, + { + "epoch": 0.7617500691180537, + "grad_norm": 0.8546861101341837, + "learning_rate": 2.832352875695863e-06, + "loss": 0.3164, + "step": 44084 + }, + { + "epoch": 0.7617673486314626, + "grad_norm": 0.47543908499514087, + "learning_rate": 2.831962634851464e-06, + "loss": 0.6879, + "step": 44085 + }, + { + "epoch": 0.7617846281448715, + "grad_norm": 1.228400438041693, + "learning_rate": 2.831572416457904e-06, + "loss": 0.3171, + "step": 44086 + }, + { + "epoch": 0.7618019076582804, + "grad_norm": 1.912705683062144, + "learning_rate": 2.831182220516402e-06, + "loss": 0.2502, + "step": 44087 + }, + { + "epoch": 0.7618191871716893, + "grad_norm": 1.0602229726430905, + "learning_rate": 2.830792047028187e-06, + "loss": 0.2205, + "step": 44088 + }, + { + "epoch": 0.7618364666850982, + "grad_norm": 1.028531659845506, + "learning_rate": 2.8304018959944724e-06, + "loss": 0.3093, + "step": 44089 + }, + { + "epoch": 0.7618537461985071, + "grad_norm": 0.9874871532370029, + "learning_rate": 2.830011767416486e-06, + "loss": 0.2629, + "step": 44090 + }, + { + "epoch": 0.761871025711916, + "grad_norm": 1.0393702853213649, + "learning_rate": 2.8296216612954465e-06, + "loss": 0.2577, + "step": 44091 + }, + { + "epoch": 0.7618883052253248, + "grad_norm": 1.0331941977990986, + "learning_rate": 2.8292315776325753e-06, + "loss": 0.2553, + "step": 44092 + }, + { + "epoch": 0.7619055847387337, + "grad_norm": 0.9497510355436551, + "learning_rate": 2.828841516429096e-06, + "loss": 0.4067, + "step": 44093 + }, + { + "epoch": 0.7619228642521426, + "grad_norm": 0.906978490946763, + "learning_rate": 2.8284514776862337e-06, + "loss": 0.4619, + "step": 44094 + }, + { + "epoch": 0.7619401437655515, + "grad_norm": 0.8565554502041136, + "learning_rate": 2.828061461405206e-06, + "loss": 0.3411, + "step": 44095 + }, + { + "epoch": 0.7619574232789604, + "grad_norm": 1.4622501243986883, + "learning_rate": 2.827671467587232e-06, + "loss": 0.3542, + "step": 44096 + }, + { + "epoch": 0.7619747027923693, + "grad_norm": 0.8297422555433547, + "learning_rate": 2.827281496233536e-06, + "loss": 0.6131, + "step": 44097 + }, + { + "epoch": 0.7619919823057782, + "grad_norm": 0.9898009952163652, + "learning_rate": 2.8268915473453395e-06, + "loss": 0.5282, + "step": 44098 + }, + { + "epoch": 0.7620092618191872, + "grad_norm": 0.7940076415220849, + "learning_rate": 2.826501620923867e-06, + "loss": 0.3102, + "step": 44099 + }, + { + "epoch": 0.7620265413325961, + "grad_norm": 1.582769685350901, + "learning_rate": 2.8261117169703335e-06, + "loss": 0.3331, + "step": 44100 + }, + { + "epoch": 0.762043820846005, + "grad_norm": 2.1024884649771374, + "learning_rate": 2.825721835485966e-06, + "loss": 0.309, + "step": 44101 + }, + { + "epoch": 0.7620611003594139, + "grad_norm": 1.119338653027193, + "learning_rate": 2.82533197647198e-06, + "loss": 0.3784, + "step": 44102 + }, + { + "epoch": 0.7620783798728228, + "grad_norm": 1.4799212896627747, + "learning_rate": 2.824942139929603e-06, + "loss": 0.3378, + "step": 44103 + }, + { + "epoch": 0.7620956593862317, + "grad_norm": 1.6451361392969643, + "learning_rate": 2.8245523258600482e-06, + "loss": 0.4314, + "step": 44104 + }, + { + "epoch": 0.7621129388996406, + "grad_norm": 1.2904293321759672, + "learning_rate": 2.8241625342645418e-06, + "loss": 0.4886, + "step": 44105 + }, + { + "epoch": 0.7621302184130495, + "grad_norm": 1.097467123379149, + "learning_rate": 2.823772765144307e-06, + "loss": 0.438, + "step": 44106 + }, + { + "epoch": 0.7621474979264584, + "grad_norm": 2.180970479566381, + "learning_rate": 2.8233830185005575e-06, + "loss": 0.3686, + "step": 44107 + }, + { + "epoch": 0.7621647774398673, + "grad_norm": 1.3335584668638758, + "learning_rate": 2.8229932943345216e-06, + "loss": 0.3398, + "step": 44108 + }, + { + "epoch": 0.7621820569532762, + "grad_norm": 1.1196478962514147, + "learning_rate": 2.822603592647413e-06, + "loss": 0.4021, + "step": 44109 + }, + { + "epoch": 0.7621993364666851, + "grad_norm": 1.322647532280377, + "learning_rate": 2.822213913440455e-06, + "loss": 0.4306, + "step": 44110 + }, + { + "epoch": 0.762216615980094, + "grad_norm": 1.9971870189862033, + "learning_rate": 2.821824256714868e-06, + "loss": 0.3171, + "step": 44111 + }, + { + "epoch": 0.762233895493503, + "grad_norm": 1.9830247796009872, + "learning_rate": 2.8214346224718768e-06, + "loss": 0.3562, + "step": 44112 + }, + { + "epoch": 0.7622511750069118, + "grad_norm": 1.4861464782131235, + "learning_rate": 2.8210450107126954e-06, + "loss": 0.3942, + "step": 44113 + }, + { + "epoch": 0.7622684545203207, + "grad_norm": 0.5487427525729179, + "learning_rate": 2.8206554214385483e-06, + "loss": 0.5134, + "step": 44114 + }, + { + "epoch": 0.7622857340337296, + "grad_norm": 1.2236047786871562, + "learning_rate": 2.8202658546506513e-06, + "loss": 0.4319, + "step": 44115 + }, + { + "epoch": 0.7623030135471385, + "grad_norm": 0.9081022891084785, + "learning_rate": 2.819876310350228e-06, + "loss": 0.404, + "step": 44116 + }, + { + "epoch": 0.7623202930605474, + "grad_norm": 2.1348910039844684, + "learning_rate": 2.8194867885385e-06, + "loss": 0.3266, + "step": 44117 + }, + { + "epoch": 0.7623375725739563, + "grad_norm": 1.6356437639814718, + "learning_rate": 2.8190972892166825e-06, + "loss": 0.243, + "step": 44118 + }, + { + "epoch": 0.7623548520873652, + "grad_norm": 1.3730541129607696, + "learning_rate": 2.818707812386001e-06, + "loss": 0.6433, + "step": 44119 + }, + { + "epoch": 0.7623721316007741, + "grad_norm": 1.5125145519897547, + "learning_rate": 2.818318358047669e-06, + "loss": 0.5517, + "step": 44120 + }, + { + "epoch": 0.762389411114183, + "grad_norm": 1.6159281202336377, + "learning_rate": 2.817928926202911e-06, + "loss": 0.5791, + "step": 44121 + }, + { + "epoch": 0.7624066906275919, + "grad_norm": 2.1354669955988554, + "learning_rate": 2.817539516852944e-06, + "loss": 0.2996, + "step": 44122 + }, + { + "epoch": 0.7624239701410008, + "grad_norm": 1.0877825485354664, + "learning_rate": 2.8171501299989925e-06, + "loss": 0.3198, + "step": 44123 + }, + { + "epoch": 0.7624412496544097, + "grad_norm": 1.079060858514264, + "learning_rate": 2.81676076564227e-06, + "loss": 0.5095, + "step": 44124 + }, + { + "epoch": 0.7624585291678186, + "grad_norm": 1.012486404370407, + "learning_rate": 2.816371423784001e-06, + "loss": 0.3056, + "step": 44125 + }, + { + "epoch": 0.7624758086812276, + "grad_norm": 2.0623741180503177, + "learning_rate": 2.8159821044254e-06, + "loss": 0.3895, + "step": 44126 + }, + { + "epoch": 0.7624930881946365, + "grad_norm": 1.3042044216547375, + "learning_rate": 2.8155928075676896e-06, + "loss": 0.3394, + "step": 44127 + }, + { + "epoch": 0.7625103677080454, + "grad_norm": 1.3469567297595768, + "learning_rate": 2.815203533212091e-06, + "loss": 0.2465, + "step": 44128 + }, + { + "epoch": 0.7625276472214543, + "grad_norm": 1.4732607551331824, + "learning_rate": 2.8148142813598176e-06, + "loss": 0.2764, + "step": 44129 + }, + { + "epoch": 0.7625449267348632, + "grad_norm": 1.9960143659355016, + "learning_rate": 2.814425052012095e-06, + "loss": 0.4778, + "step": 44130 + }, + { + "epoch": 0.7625622062482721, + "grad_norm": 1.3266614561686867, + "learning_rate": 2.8140358451701367e-06, + "loss": 0.5446, + "step": 44131 + }, + { + "epoch": 0.762579485761681, + "grad_norm": 1.4014254416116365, + "learning_rate": 2.8136466608351642e-06, + "loss": 0.455, + "step": 44132 + }, + { + "epoch": 0.7625967652750899, + "grad_norm": 0.7351750029997193, + "learning_rate": 2.813257499008396e-06, + "loss": 0.3977, + "step": 44133 + }, + { + "epoch": 0.7626140447884987, + "grad_norm": 0.40128760555309595, + "learning_rate": 2.8128683596910555e-06, + "loss": 0.5163, + "step": 44134 + }, + { + "epoch": 0.7626313243019076, + "grad_norm": 0.8476096703994432, + "learning_rate": 2.8124792428843562e-06, + "loss": 0.2717, + "step": 44135 + }, + { + "epoch": 0.7626486038153165, + "grad_norm": 1.0613444364302, + "learning_rate": 2.812090148589516e-06, + "loss": 0.2674, + "step": 44136 + }, + { + "epoch": 0.7626658833287254, + "grad_norm": 1.20918436666969, + "learning_rate": 2.811701076807756e-06, + "loss": 0.364, + "step": 44137 + }, + { + "epoch": 0.7626831628421343, + "grad_norm": 1.4260273531122176, + "learning_rate": 2.8113120275402937e-06, + "loss": 0.1981, + "step": 44138 + }, + { + "epoch": 0.7627004423555432, + "grad_norm": 0.8649450707468695, + "learning_rate": 2.8109230007883514e-06, + "loss": 0.4177, + "step": 44139 + }, + { + "epoch": 0.7627177218689521, + "grad_norm": 1.1018297389497824, + "learning_rate": 2.810533996553142e-06, + "loss": 0.3391, + "step": 44140 + }, + { + "epoch": 0.762735001382361, + "grad_norm": 1.3683141960638197, + "learning_rate": 2.81014501483589e-06, + "loss": 0.4637, + "step": 44141 + }, + { + "epoch": 0.76275228089577, + "grad_norm": 0.9158685498004503, + "learning_rate": 2.8097560556378066e-06, + "loss": 0.3701, + "step": 44142 + }, + { + "epoch": 0.7627695604091789, + "grad_norm": 1.2863054749026803, + "learning_rate": 2.809367118960117e-06, + "loss": 0.4594, + "step": 44143 + }, + { + "epoch": 0.7627868399225878, + "grad_norm": 0.9882146109637695, + "learning_rate": 2.8089782048040293e-06, + "loss": 0.2317, + "step": 44144 + }, + { + "epoch": 0.7628041194359967, + "grad_norm": 1.3932846417113165, + "learning_rate": 2.808589313170775e-06, + "loss": 0.3438, + "step": 44145 + }, + { + "epoch": 0.7628213989494056, + "grad_norm": 1.0916623695295822, + "learning_rate": 2.8082004440615653e-06, + "loss": 0.4356, + "step": 44146 + }, + { + "epoch": 0.7628386784628145, + "grad_norm": 1.6642209148717502, + "learning_rate": 2.8078115974776153e-06, + "loss": 0.2769, + "step": 44147 + }, + { + "epoch": 0.7628559579762234, + "grad_norm": 1.400181173502695, + "learning_rate": 2.80742277342015e-06, + "loss": 0.2307, + "step": 44148 + }, + { + "epoch": 0.7628732374896323, + "grad_norm": 1.5417805349830154, + "learning_rate": 2.8070339718903782e-06, + "loss": 0.2121, + "step": 44149 + }, + { + "epoch": 0.7628905170030412, + "grad_norm": 1.2848977426741326, + "learning_rate": 2.8066451928895243e-06, + "loss": 0.3178, + "step": 44150 + }, + { + "epoch": 0.7629077965164501, + "grad_norm": 1.3200168500471234, + "learning_rate": 2.8062564364188037e-06, + "loss": 0.3631, + "step": 44151 + }, + { + "epoch": 0.762925076029859, + "grad_norm": 1.252679984665338, + "learning_rate": 2.805867702479438e-06, + "loss": 0.4789, + "step": 44152 + }, + { + "epoch": 0.762942355543268, + "grad_norm": 1.4934686193449351, + "learning_rate": 2.8054789910726375e-06, + "loss": 0.4539, + "step": 44153 + }, + { + "epoch": 0.7629596350566769, + "grad_norm": 1.7644094239111616, + "learning_rate": 2.805090302199627e-06, + "loss": 0.3029, + "step": 44154 + }, + { + "epoch": 0.7629769145700857, + "grad_norm": 1.4709833247123243, + "learning_rate": 2.8047016358616173e-06, + "loss": 0.258, + "step": 44155 + }, + { + "epoch": 0.7629941940834946, + "grad_norm": 0.9420952405462676, + "learning_rate": 2.80431299205983e-06, + "loss": 0.3956, + "step": 44156 + }, + { + "epoch": 0.7630114735969035, + "grad_norm": 1.8422290428985648, + "learning_rate": 2.8039243707954834e-06, + "loss": 0.3993, + "step": 44157 + }, + { + "epoch": 0.7630287531103124, + "grad_norm": 1.043389729180228, + "learning_rate": 2.8035357720697897e-06, + "loss": 0.3114, + "step": 44158 + }, + { + "epoch": 0.7630460326237213, + "grad_norm": 1.54068730356112, + "learning_rate": 2.8031471958839717e-06, + "loss": 0.3646, + "step": 44159 + }, + { + "epoch": 0.7630633121371302, + "grad_norm": 1.2819543788927399, + "learning_rate": 2.8027586422392404e-06, + "loss": 0.3885, + "step": 44160 + }, + { + "epoch": 0.7630805916505391, + "grad_norm": 2.684862057823527, + "learning_rate": 2.8023701111368163e-06, + "loss": 0.3575, + "step": 44161 + }, + { + "epoch": 0.763097871163948, + "grad_norm": 1.093371276365062, + "learning_rate": 2.8019816025779166e-06, + "loss": 0.301, + "step": 44162 + }, + { + "epoch": 0.7631151506773569, + "grad_norm": 0.9779131963830907, + "learning_rate": 2.80159311656376e-06, + "loss": 0.3116, + "step": 44163 + }, + { + "epoch": 0.7631324301907658, + "grad_norm": 0.6476102371263233, + "learning_rate": 2.8012046530955574e-06, + "loss": 0.1772, + "step": 44164 + }, + { + "epoch": 0.7631497097041747, + "grad_norm": 1.7012713318206334, + "learning_rate": 2.800816212174533e-06, + "loss": 0.1892, + "step": 44165 + }, + { + "epoch": 0.7631669892175836, + "grad_norm": 1.5760566120636177, + "learning_rate": 2.8004277938018952e-06, + "loss": 0.4017, + "step": 44166 + }, + { + "epoch": 0.7631842687309925, + "grad_norm": 1.351771371645176, + "learning_rate": 2.8000393979788655e-06, + "loss": 0.3831, + "step": 44167 + }, + { + "epoch": 0.7632015482444015, + "grad_norm": 1.3480715094999347, + "learning_rate": 2.799651024706662e-06, + "loss": 0.3382, + "step": 44168 + }, + { + "epoch": 0.7632188277578104, + "grad_norm": 1.620385913719573, + "learning_rate": 2.7992626739864957e-06, + "loss": 0.326, + "step": 44169 + }, + { + "epoch": 0.7632361072712193, + "grad_norm": 1.0989068700707396, + "learning_rate": 2.79887434581959e-06, + "loss": 0.4571, + "step": 44170 + }, + { + "epoch": 0.7632533867846282, + "grad_norm": 1.7047348659709363, + "learning_rate": 2.7984860402071522e-06, + "loss": 0.3811, + "step": 44171 + }, + { + "epoch": 0.7632706662980371, + "grad_norm": 1.0926432489099884, + "learning_rate": 2.7980977571504053e-06, + "loss": 0.3788, + "step": 44172 + }, + { + "epoch": 0.763287945811446, + "grad_norm": 1.299922981757402, + "learning_rate": 2.7977094966505624e-06, + "loss": 0.3455, + "step": 44173 + }, + { + "epoch": 0.7633052253248549, + "grad_norm": 1.6911314866413036, + "learning_rate": 2.797321258708844e-06, + "loss": 0.3039, + "step": 44174 + }, + { + "epoch": 0.7633225048382638, + "grad_norm": 1.432320730395912, + "learning_rate": 2.7969330433264596e-06, + "loss": 0.3212, + "step": 44175 + }, + { + "epoch": 0.7633397843516727, + "grad_norm": 1.0841318160900093, + "learning_rate": 2.7965448505046323e-06, + "loss": 0.3045, + "step": 44176 + }, + { + "epoch": 0.7633570638650815, + "grad_norm": 1.2835497093768977, + "learning_rate": 2.7961566802445727e-06, + "loss": 0.3739, + "step": 44177 + }, + { + "epoch": 0.7633743433784904, + "grad_norm": 1.4182048707348802, + "learning_rate": 2.7957685325474926e-06, + "loss": 0.4332, + "step": 44178 + }, + { + "epoch": 0.7633916228918993, + "grad_norm": 1.3469974753216245, + "learning_rate": 2.7953804074146194e-06, + "loss": 0.5339, + "step": 44179 + }, + { + "epoch": 0.7634089024053082, + "grad_norm": 1.3214060691524814, + "learning_rate": 2.7949923048471583e-06, + "loss": 0.3454, + "step": 44180 + }, + { + "epoch": 0.7634261819187171, + "grad_norm": 1.2846765609902533, + "learning_rate": 2.7946042248463324e-06, + "loss": 0.5001, + "step": 44181 + }, + { + "epoch": 0.763443461432126, + "grad_norm": 1.5050526599312475, + "learning_rate": 2.7942161674133506e-06, + "loss": 0.4057, + "step": 44182 + }, + { + "epoch": 0.763460740945535, + "grad_norm": 2.6105470117362377, + "learning_rate": 2.7938281325494345e-06, + "loss": 0.569, + "step": 44183 + }, + { + "epoch": 0.7634780204589439, + "grad_norm": 1.0177503094365161, + "learning_rate": 2.793440120255789e-06, + "loss": 0.6177, + "step": 44184 + }, + { + "epoch": 0.7634952999723528, + "grad_norm": 1.2259836293691293, + "learning_rate": 2.7930521305336445e-06, + "loss": 0.3538, + "step": 44185 + }, + { + "epoch": 0.7635125794857617, + "grad_norm": 1.8044287534267291, + "learning_rate": 2.792664163384208e-06, + "loss": 0.3009, + "step": 44186 + }, + { + "epoch": 0.7635298589991706, + "grad_norm": 1.801878427122697, + "learning_rate": 2.7922762188086914e-06, + "loss": 0.3141, + "step": 44187 + }, + { + "epoch": 0.7635471385125795, + "grad_norm": 1.1624406043058833, + "learning_rate": 2.791888296808317e-06, + "loss": 0.4259, + "step": 44188 + }, + { + "epoch": 0.7635644180259884, + "grad_norm": 1.1368903770157364, + "learning_rate": 2.7915003973842937e-06, + "loss": 0.5128, + "step": 44189 + }, + { + "epoch": 0.7635816975393973, + "grad_norm": 1.117294972786566, + "learning_rate": 2.7911125205378388e-06, + "loss": 0.4751, + "step": 44190 + }, + { + "epoch": 0.7635989770528062, + "grad_norm": 1.135910137302821, + "learning_rate": 2.790724666270166e-06, + "loss": 0.2743, + "step": 44191 + }, + { + "epoch": 0.7636162565662151, + "grad_norm": 1.2651934019085453, + "learning_rate": 2.7903368345824954e-06, + "loss": 0.3755, + "step": 44192 + }, + { + "epoch": 0.763633536079624, + "grad_norm": 0.8836248066209783, + "learning_rate": 2.7899490254760355e-06, + "loss": 0.2182, + "step": 44193 + }, + { + "epoch": 0.7636508155930329, + "grad_norm": 1.5736480482702555, + "learning_rate": 2.7895612389520055e-06, + "loss": 0.411, + "step": 44194 + }, + { + "epoch": 0.7636680951064418, + "grad_norm": 2.31440683754668, + "learning_rate": 2.789173475011614e-06, + "loss": 0.3388, + "step": 44195 + }, + { + "epoch": 0.7636853746198508, + "grad_norm": 1.2733066953223802, + "learning_rate": 2.7887857336560796e-06, + "loss": 0.2399, + "step": 44196 + }, + { + "epoch": 0.7637026541332597, + "grad_norm": 1.3865136878559523, + "learning_rate": 2.7883980148866195e-06, + "loss": 0.3365, + "step": 44197 + }, + { + "epoch": 0.7637199336466685, + "grad_norm": 1.5188019529451764, + "learning_rate": 2.7880103187044428e-06, + "loss": 0.3298, + "step": 44198 + }, + { + "epoch": 0.7637372131600774, + "grad_norm": 1.5184613705579484, + "learning_rate": 2.787622645110768e-06, + "loss": 0.255, + "step": 44199 + }, + { + "epoch": 0.7637544926734863, + "grad_norm": 1.1102754667794916, + "learning_rate": 2.7872349941068044e-06, + "loss": 0.3511, + "step": 44200 + }, + { + "epoch": 0.7637717721868952, + "grad_norm": 1.62664388032375, + "learning_rate": 2.7868473656937678e-06, + "loss": 0.3891, + "step": 44201 + }, + { + "epoch": 0.7637890517003041, + "grad_norm": 1.7217631965669993, + "learning_rate": 2.786459759872875e-06, + "loss": 0.2594, + "step": 44202 + }, + { + "epoch": 0.763806331213713, + "grad_norm": 1.869564198726261, + "learning_rate": 2.78607217664534e-06, + "loss": 0.3775, + "step": 44203 + }, + { + "epoch": 0.7638236107271219, + "grad_norm": 1.7288778504451552, + "learning_rate": 2.7856846160123743e-06, + "loss": 0.4766, + "step": 44204 + }, + { + "epoch": 0.7638408902405308, + "grad_norm": 0.7990201200910855, + "learning_rate": 2.785297077975193e-06, + "loss": 0.6326, + "step": 44205 + }, + { + "epoch": 0.7638581697539397, + "grad_norm": 1.0251440809420895, + "learning_rate": 2.784909562535009e-06, + "loss": 0.4333, + "step": 44206 + }, + { + "epoch": 0.7638754492673486, + "grad_norm": 1.107461915834399, + "learning_rate": 2.7845220696930353e-06, + "loss": 0.3021, + "step": 44207 + }, + { + "epoch": 0.7638927287807575, + "grad_norm": 1.1556726117844676, + "learning_rate": 2.78413459945049e-06, + "loss": 0.279, + "step": 44208 + }, + { + "epoch": 0.7639100082941664, + "grad_norm": 0.7307318009515488, + "learning_rate": 2.78374715180858e-06, + "loss": 0.4551, + "step": 44209 + }, + { + "epoch": 0.7639272878075754, + "grad_norm": 1.7150823602758667, + "learning_rate": 2.7833597267685263e-06, + "loss": 0.2518, + "step": 44210 + }, + { + "epoch": 0.7639445673209843, + "grad_norm": 0.9744651738402397, + "learning_rate": 2.7829723243315345e-06, + "loss": 0.3713, + "step": 44211 + }, + { + "epoch": 0.7639618468343932, + "grad_norm": 1.379542883726133, + "learning_rate": 2.782584944498823e-06, + "loss": 0.1793, + "step": 44212 + }, + { + "epoch": 0.7639791263478021, + "grad_norm": 0.9482675140653408, + "learning_rate": 2.7821975872716024e-06, + "loss": 0.3255, + "step": 44213 + }, + { + "epoch": 0.763996405861211, + "grad_norm": 1.089779031118915, + "learning_rate": 2.7818102526510915e-06, + "loss": 0.3618, + "step": 44214 + }, + { + "epoch": 0.7640136853746199, + "grad_norm": 1.4225848241995667, + "learning_rate": 2.7814229406384964e-06, + "loss": 0.5769, + "step": 44215 + }, + { + "epoch": 0.7640309648880288, + "grad_norm": 1.5748028366633622, + "learning_rate": 2.7810356512350355e-06, + "loss": 0.2807, + "step": 44216 + }, + { + "epoch": 0.7640482444014377, + "grad_norm": 0.9645530098371575, + "learning_rate": 2.7806483844419207e-06, + "loss": 0.4017, + "step": 44217 + }, + { + "epoch": 0.7640655239148466, + "grad_norm": 1.004110735731028, + "learning_rate": 2.7802611402603575e-06, + "loss": 0.2832, + "step": 44218 + }, + { + "epoch": 0.7640828034282554, + "grad_norm": 1.6331606290778322, + "learning_rate": 2.77987391869157e-06, + "loss": 0.2853, + "step": 44219 + }, + { + "epoch": 0.7641000829416643, + "grad_norm": 2.246702289723669, + "learning_rate": 2.779486719736765e-06, + "loss": 0.3007, + "step": 44220 + }, + { + "epoch": 0.7641173624550732, + "grad_norm": 2.254129186618592, + "learning_rate": 2.779099543397158e-06, + "loss": 0.4575, + "step": 44221 + }, + { + "epoch": 0.7641346419684821, + "grad_norm": 1.1683903210924633, + "learning_rate": 2.7787123896739576e-06, + "loss": 0.3066, + "step": 44222 + }, + { + "epoch": 0.764151921481891, + "grad_norm": 2.0199891841237294, + "learning_rate": 2.7783252585683816e-06, + "loss": 0.32, + "step": 44223 + }, + { + "epoch": 0.7641692009953, + "grad_norm": 1.3534409133527332, + "learning_rate": 2.7779381500816326e-06, + "loss": 0.2163, + "step": 44224 + }, + { + "epoch": 0.7641864805087089, + "grad_norm": 0.8854575956753551, + "learning_rate": 2.7775510642149382e-06, + "loss": 0.4537, + "step": 44225 + }, + { + "epoch": 0.7642037600221178, + "grad_norm": 1.3393445570398943, + "learning_rate": 2.777164000969499e-06, + "loss": 0.4153, + "step": 44226 + }, + { + "epoch": 0.7642210395355267, + "grad_norm": 1.4621862083635533, + "learning_rate": 2.776776960346533e-06, + "loss": 0.2387, + "step": 44227 + }, + { + "epoch": 0.7642383190489356, + "grad_norm": 1.1296476573925611, + "learning_rate": 2.7763899423472517e-06, + "loss": 0.3303, + "step": 44228 + }, + { + "epoch": 0.7642555985623445, + "grad_norm": 1.655385065356759, + "learning_rate": 2.7760029469728623e-06, + "loss": 0.2693, + "step": 44229 + }, + { + "epoch": 0.7642728780757534, + "grad_norm": 1.1966845079310366, + "learning_rate": 2.7756159742245813e-06, + "loss": 0.6933, + "step": 44230 + }, + { + "epoch": 0.7642901575891623, + "grad_norm": 1.1168733176975423, + "learning_rate": 2.7752290241036196e-06, + "loss": 0.2964, + "step": 44231 + }, + { + "epoch": 0.7643074371025712, + "grad_norm": 0.7943695599768105, + "learning_rate": 2.7748420966111935e-06, + "loss": 0.1554, + "step": 44232 + }, + { + "epoch": 0.7643247166159801, + "grad_norm": 1.5884849138961745, + "learning_rate": 2.7744551917485074e-06, + "loss": 0.3819, + "step": 44233 + }, + { + "epoch": 0.764341996129389, + "grad_norm": 1.442359450177565, + "learning_rate": 2.77406830951678e-06, + "loss": 0.1827, + "step": 44234 + }, + { + "epoch": 0.7643592756427979, + "grad_norm": 1.3620951647814272, + "learning_rate": 2.773681449917216e-06, + "loss": 0.3917, + "step": 44235 + }, + { + "epoch": 0.7643765551562068, + "grad_norm": 1.050764797043273, + "learning_rate": 2.7732946129510318e-06, + "loss": 0.3479, + "step": 44236 + }, + { + "epoch": 0.7643938346696157, + "grad_norm": 1.5182851314458126, + "learning_rate": 2.7729077986194376e-06, + "loss": 0.4252, + "step": 44237 + }, + { + "epoch": 0.7644111141830247, + "grad_norm": 1.5002376135918618, + "learning_rate": 2.7725210069236487e-06, + "loss": 0.3159, + "step": 44238 + }, + { + "epoch": 0.7644283936964336, + "grad_norm": 1.2661359936557313, + "learning_rate": 2.772134237864873e-06, + "loss": 0.4342, + "step": 44239 + }, + { + "epoch": 0.7644456732098424, + "grad_norm": 0.9950554006538881, + "learning_rate": 2.7717474914443197e-06, + "loss": 0.4041, + "step": 44240 + }, + { + "epoch": 0.7644629527232513, + "grad_norm": 1.4387096092206524, + "learning_rate": 2.7713607676632026e-06, + "loss": 0.4172, + "step": 44241 + }, + { + "epoch": 0.7644802322366602, + "grad_norm": 1.5982974811377393, + "learning_rate": 2.7709740665227324e-06, + "loss": 0.283, + "step": 44242 + }, + { + "epoch": 0.7644975117500691, + "grad_norm": 1.0820489041009864, + "learning_rate": 2.770587388024124e-06, + "loss": 0.5209, + "step": 44243 + }, + { + "epoch": 0.764514791263478, + "grad_norm": 1.2799273847319812, + "learning_rate": 2.770200732168582e-06, + "loss": 0.3443, + "step": 44244 + }, + { + "epoch": 0.7645320707768869, + "grad_norm": 1.1496521616588522, + "learning_rate": 2.769814098957324e-06, + "loss": 0.3433, + "step": 44245 + }, + { + "epoch": 0.7645493502902958, + "grad_norm": 1.3449443027402201, + "learning_rate": 2.7694274883915553e-06, + "loss": 0.2473, + "step": 44246 + }, + { + "epoch": 0.7645666298037047, + "grad_norm": 1.622144054827664, + "learning_rate": 2.7690409004724883e-06, + "loss": 0.2882, + "step": 44247 + }, + { + "epoch": 0.7645839093171136, + "grad_norm": 1.208603590054086, + "learning_rate": 2.7686543352013384e-06, + "loss": 0.3652, + "step": 44248 + }, + { + "epoch": 0.7646011888305225, + "grad_norm": 1.6003416189042017, + "learning_rate": 2.768267792579309e-06, + "loss": 0.295, + "step": 44249 + }, + { + "epoch": 0.7646184683439314, + "grad_norm": 1.1137654774041266, + "learning_rate": 2.7678812726076174e-06, + "loss": 0.3236, + "step": 44250 + }, + { + "epoch": 0.7646357478573403, + "grad_norm": 1.5609342673426356, + "learning_rate": 2.7674947752874694e-06, + "loss": 0.3192, + "step": 44251 + }, + { + "epoch": 0.7646530273707492, + "grad_norm": 0.866796147714694, + "learning_rate": 2.767108300620076e-06, + "loss": 0.3042, + "step": 44252 + }, + { + "epoch": 0.7646703068841582, + "grad_norm": 1.4416090515060376, + "learning_rate": 2.7667218486066507e-06, + "loss": 0.5241, + "step": 44253 + }, + { + "epoch": 0.7646875863975671, + "grad_norm": 1.2871723509288733, + "learning_rate": 2.7663354192484038e-06, + "loss": 0.256, + "step": 44254 + }, + { + "epoch": 0.764704865910976, + "grad_norm": 1.544177534016137, + "learning_rate": 2.7659490125465416e-06, + "loss": 0.314, + "step": 44255 + }, + { + "epoch": 0.7647221454243849, + "grad_norm": 1.703088832558486, + "learning_rate": 2.7655626285022806e-06, + "loss": 0.3611, + "step": 44256 + }, + { + "epoch": 0.7647394249377938, + "grad_norm": 0.9073648927768891, + "learning_rate": 2.765176267116824e-06, + "loss": 0.4048, + "step": 44257 + }, + { + "epoch": 0.7647567044512027, + "grad_norm": 1.3861029341173021, + "learning_rate": 2.7647899283913835e-06, + "loss": 0.4174, + "step": 44258 + }, + { + "epoch": 0.7647739839646116, + "grad_norm": 1.1280971591436255, + "learning_rate": 2.7644036123271755e-06, + "loss": 0.3632, + "step": 44259 + }, + { + "epoch": 0.7647912634780205, + "grad_norm": 1.6369509400972462, + "learning_rate": 2.7640173189254018e-06, + "loss": 0.4302, + "step": 44260 + }, + { + "epoch": 0.7648085429914293, + "grad_norm": 1.046369341246848, + "learning_rate": 2.7636310481872795e-06, + "loss": 0.2262, + "step": 44261 + }, + { + "epoch": 0.7648258225048382, + "grad_norm": 1.1163851900418178, + "learning_rate": 2.763244800114011e-06, + "loss": 0.1593, + "step": 44262 + }, + { + "epoch": 0.7648431020182471, + "grad_norm": 1.0861334865221275, + "learning_rate": 2.7628585747068127e-06, + "loss": 0.3639, + "step": 44263 + }, + { + "epoch": 0.764860381531656, + "grad_norm": 0.8594757372234808, + "learning_rate": 2.7624723719668856e-06, + "loss": 0.2043, + "step": 44264 + }, + { + "epoch": 0.7648776610450649, + "grad_norm": 1.2382633395626343, + "learning_rate": 2.76208619189545e-06, + "loss": 0.2309, + "step": 44265 + }, + { + "epoch": 0.7648949405584738, + "grad_norm": 1.1904488194325762, + "learning_rate": 2.7617000344937085e-06, + "loss": 0.3381, + "step": 44266 + }, + { + "epoch": 0.7649122200718828, + "grad_norm": 1.4813599911221338, + "learning_rate": 2.7613138997628763e-06, + "loss": 0.472, + "step": 44267 + }, + { + "epoch": 0.7649294995852917, + "grad_norm": 1.9955060304336119, + "learning_rate": 2.7609277877041552e-06, + "loss": 0.3079, + "step": 44268 + }, + { + "epoch": 0.7649467790987006, + "grad_norm": 1.4699687962145298, + "learning_rate": 2.7605416983187617e-06, + "loss": 0.8119, + "step": 44269 + }, + { + "epoch": 0.7649640586121095, + "grad_norm": 1.060800021683553, + "learning_rate": 2.760155631607898e-06, + "loss": 0.4316, + "step": 44270 + }, + { + "epoch": 0.7649813381255184, + "grad_norm": 0.8804925906780761, + "learning_rate": 2.7597695875727782e-06, + "loss": 0.2228, + "step": 44271 + }, + { + "epoch": 0.7649986176389273, + "grad_norm": 0.8924835473145486, + "learning_rate": 2.759383566214612e-06, + "loss": 0.4842, + "step": 44272 + }, + { + "epoch": 0.7650158971523362, + "grad_norm": 1.6187309574834086, + "learning_rate": 2.7589975675346048e-06, + "loss": 0.3077, + "step": 44273 + }, + { + "epoch": 0.7650331766657451, + "grad_norm": 1.1499759677657815, + "learning_rate": 2.7586115915339695e-06, + "loss": 0.2938, + "step": 44274 + }, + { + "epoch": 0.765050456179154, + "grad_norm": 0.9097845050543056, + "learning_rate": 2.7582256382139116e-06, + "loss": 0.2442, + "step": 44275 + }, + { + "epoch": 0.7650677356925629, + "grad_norm": 2.024697872662725, + "learning_rate": 2.7578397075756404e-06, + "loss": 0.2462, + "step": 44276 + }, + { + "epoch": 0.7650850152059718, + "grad_norm": 1.5438462455956197, + "learning_rate": 2.7574537996203653e-06, + "loss": 0.5532, + "step": 44277 + }, + { + "epoch": 0.7651022947193807, + "grad_norm": 1.4048969406191312, + "learning_rate": 2.7570679143492983e-06, + "loss": 0.2911, + "step": 44278 + }, + { + "epoch": 0.7651195742327896, + "grad_norm": 1.497476492348136, + "learning_rate": 2.756682051763646e-06, + "loss": 0.3222, + "step": 44279 + }, + { + "epoch": 0.7651368537461986, + "grad_norm": 0.9040644595756054, + "learning_rate": 2.756296211864612e-06, + "loss": 0.3087, + "step": 44280 + }, + { + "epoch": 0.7651541332596075, + "grad_norm": 1.3703902506760204, + "learning_rate": 2.7559103946534092e-06, + "loss": 0.409, + "step": 44281 + }, + { + "epoch": 0.7651714127730163, + "grad_norm": 1.5152022274143038, + "learning_rate": 2.7555246001312452e-06, + "loss": 0.3737, + "step": 44282 + }, + { + "epoch": 0.7651886922864252, + "grad_norm": 1.0614069349586837, + "learning_rate": 2.755138828299333e-06, + "loss": 0.3909, + "step": 44283 + }, + { + "epoch": 0.7652059717998341, + "grad_norm": 1.236662712556672, + "learning_rate": 2.7547530791588716e-06, + "loss": 0.1767, + "step": 44284 + }, + { + "epoch": 0.765223251313243, + "grad_norm": 1.265770388915048, + "learning_rate": 2.754367352711079e-06, + "loss": 0.4442, + "step": 44285 + }, + { + "epoch": 0.7652405308266519, + "grad_norm": 1.4939992496260317, + "learning_rate": 2.7539816489571546e-06, + "loss": 0.3737, + "step": 44286 + }, + { + "epoch": 0.7652578103400608, + "grad_norm": 1.1853679376017432, + "learning_rate": 2.7535959678983103e-06, + "loss": 0.4317, + "step": 44287 + }, + { + "epoch": 0.7652750898534697, + "grad_norm": 1.1173086442358902, + "learning_rate": 2.7532103095357545e-06, + "loss": 0.3223, + "step": 44288 + }, + { + "epoch": 0.7652923693668786, + "grad_norm": 1.80733956610569, + "learning_rate": 2.7528246738706975e-06, + "loss": 0.3521, + "step": 44289 + }, + { + "epoch": 0.7653096488802875, + "grad_norm": 0.8824255855620886, + "learning_rate": 2.752439060904345e-06, + "loss": 0.3152, + "step": 44290 + }, + { + "epoch": 0.7653269283936964, + "grad_norm": 1.4112419562439156, + "learning_rate": 2.7520534706379e-06, + "loss": 0.2699, + "step": 44291 + }, + { + "epoch": 0.7653442079071053, + "grad_norm": 0.8924707095287596, + "learning_rate": 2.7516679030725746e-06, + "loss": 0.3884, + "step": 44292 + }, + { + "epoch": 0.7653614874205142, + "grad_norm": 1.3761269618827887, + "learning_rate": 2.751282358209577e-06, + "loss": 0.3665, + "step": 44293 + }, + { + "epoch": 0.7653787669339231, + "grad_norm": 0.9394605757476043, + "learning_rate": 2.7508968360501166e-06, + "loss": 0.4125, + "step": 44294 + }, + { + "epoch": 0.7653960464473321, + "grad_norm": 1.3093346472365939, + "learning_rate": 2.750511336595395e-06, + "loss": 0.2601, + "step": 44295 + }, + { + "epoch": 0.765413325960741, + "grad_norm": 1.4319216425999282, + "learning_rate": 2.7501258598466264e-06, + "loss": 0.4937, + "step": 44296 + }, + { + "epoch": 0.7654306054741499, + "grad_norm": 1.6565100200047942, + "learning_rate": 2.7497404058050104e-06, + "loss": 0.1946, + "step": 44297 + }, + { + "epoch": 0.7654478849875588, + "grad_norm": 0.9042853473853465, + "learning_rate": 2.7493549744717595e-06, + "loss": 0.2341, + "step": 44298 + }, + { + "epoch": 0.7654651645009677, + "grad_norm": 1.8224028602168767, + "learning_rate": 2.7489695658480798e-06, + "loss": 0.3621, + "step": 44299 + }, + { + "epoch": 0.7654824440143766, + "grad_norm": 0.8609102743662908, + "learning_rate": 2.748584179935181e-06, + "loss": 0.2973, + "step": 44300 + }, + { + "epoch": 0.7654997235277855, + "grad_norm": 1.3577467741653737, + "learning_rate": 2.7481988167342676e-06, + "loss": 0.2198, + "step": 44301 + }, + { + "epoch": 0.7655170030411944, + "grad_norm": 1.552616694490851, + "learning_rate": 2.7478134762465437e-06, + "loss": 0.3465, + "step": 44302 + }, + { + "epoch": 0.7655342825546032, + "grad_norm": 1.260020396595937, + "learning_rate": 2.747428158473222e-06, + "loss": 0.3729, + "step": 44303 + }, + { + "epoch": 0.7655515620680121, + "grad_norm": 1.3954530297324845, + "learning_rate": 2.7470428634155013e-06, + "loss": 0.2305, + "step": 44304 + }, + { + "epoch": 0.765568841581421, + "grad_norm": 1.1015979452737537, + "learning_rate": 2.7466575910745985e-06, + "loss": 0.3109, + "step": 44305 + }, + { + "epoch": 0.7655861210948299, + "grad_norm": 1.605333969437722, + "learning_rate": 2.7462723414517136e-06, + "loss": 0.2664, + "step": 44306 + }, + { + "epoch": 0.7656034006082388, + "grad_norm": 1.237456159910972, + "learning_rate": 2.745887114548057e-06, + "loss": 0.3819, + "step": 44307 + }, + { + "epoch": 0.7656206801216477, + "grad_norm": 0.6518434539114939, + "learning_rate": 2.745501910364831e-06, + "loss": 0.6096, + "step": 44308 + }, + { + "epoch": 0.7656379596350567, + "grad_norm": 0.6675470228230339, + "learning_rate": 2.7451167289032466e-06, + "loss": 0.5915, + "step": 44309 + }, + { + "epoch": 0.7656552391484656, + "grad_norm": 2.0031126497194705, + "learning_rate": 2.744731570164505e-06, + "loss": 0.4453, + "step": 44310 + }, + { + "epoch": 0.7656725186618745, + "grad_norm": 1.1526802491599304, + "learning_rate": 2.744346434149815e-06, + "loss": 0.4177, + "step": 44311 + }, + { + "epoch": 0.7656897981752834, + "grad_norm": 0.7681018314614199, + "learning_rate": 2.743961320860388e-06, + "loss": 0.4981, + "step": 44312 + }, + { + "epoch": 0.7657070776886923, + "grad_norm": 0.46910741839908393, + "learning_rate": 2.743576230297421e-06, + "loss": 0.8359, + "step": 44313 + }, + { + "epoch": 0.7657243572021012, + "grad_norm": 1.4923616504952373, + "learning_rate": 2.7431911624621287e-06, + "loss": 0.2113, + "step": 44314 + }, + { + "epoch": 0.7657416367155101, + "grad_norm": 1.284071809135941, + "learning_rate": 2.7428061173557097e-06, + "loss": 0.298, + "step": 44315 + }, + { + "epoch": 0.765758916228919, + "grad_norm": 3.064561337042283, + "learning_rate": 2.742421094979374e-06, + "loss": 0.2895, + "step": 44316 + }, + { + "epoch": 0.7657761957423279, + "grad_norm": 0.9897167027876725, + "learning_rate": 2.742036095334326e-06, + "loss": 0.4272, + "step": 44317 + }, + { + "epoch": 0.7657934752557368, + "grad_norm": 0.5979031333162057, + "learning_rate": 2.7416511184217765e-06, + "loss": 0.4067, + "step": 44318 + }, + { + "epoch": 0.7658107547691457, + "grad_norm": 0.9647160050234708, + "learning_rate": 2.741266164242924e-06, + "loss": 0.2105, + "step": 44319 + }, + { + "epoch": 0.7658280342825546, + "grad_norm": 0.9674977134268157, + "learning_rate": 2.7408812327989808e-06, + "loss": 0.4246, + "step": 44320 + }, + { + "epoch": 0.7658453137959635, + "grad_norm": 0.9077529301687481, + "learning_rate": 2.7404963240911455e-06, + "loss": 0.3633, + "step": 44321 + }, + { + "epoch": 0.7658625933093725, + "grad_norm": 1.5951719556874058, + "learning_rate": 2.7401114381206273e-06, + "loss": 0.6276, + "step": 44322 + }, + { + "epoch": 0.7658798728227814, + "grad_norm": 2.0629697149301203, + "learning_rate": 2.739726574888636e-06, + "loss": 0.2642, + "step": 44323 + }, + { + "epoch": 0.7658971523361903, + "grad_norm": 1.8278097369688526, + "learning_rate": 2.7393417343963692e-06, + "loss": 0.3632, + "step": 44324 + }, + { + "epoch": 0.7659144318495991, + "grad_norm": 1.593993687988155, + "learning_rate": 2.7389569166450392e-06, + "loss": 0.3243, + "step": 44325 + }, + { + "epoch": 0.765931711363008, + "grad_norm": 0.8958068526798509, + "learning_rate": 2.7385721216358442e-06, + "loss": 0.2026, + "step": 44326 + }, + { + "epoch": 0.7659489908764169, + "grad_norm": 1.2147733001715744, + "learning_rate": 2.7381873493699938e-06, + "loss": 0.4381, + "step": 44327 + }, + { + "epoch": 0.7659662703898258, + "grad_norm": 0.7194680757050661, + "learning_rate": 2.7378025998486924e-06, + "loss": 0.2466, + "step": 44328 + }, + { + "epoch": 0.7659835499032347, + "grad_norm": 1.9512118959881417, + "learning_rate": 2.737417873073147e-06, + "loss": 0.3865, + "step": 44329 + }, + { + "epoch": 0.7660008294166436, + "grad_norm": 1.592778396169441, + "learning_rate": 2.737033169044563e-06, + "loss": 0.1576, + "step": 44330 + }, + { + "epoch": 0.7660181089300525, + "grad_norm": 1.1571219001874402, + "learning_rate": 2.7366484877641385e-06, + "loss": 0.3077, + "step": 44331 + }, + { + "epoch": 0.7660353884434614, + "grad_norm": 1.113817091061999, + "learning_rate": 2.7362638292330835e-06, + "loss": 0.7509, + "step": 44332 + }, + { + "epoch": 0.7660526679568703, + "grad_norm": 1.1897682993601435, + "learning_rate": 2.735879193452602e-06, + "loss": 0.3942, + "step": 44333 + }, + { + "epoch": 0.7660699474702792, + "grad_norm": 0.8033270688926998, + "learning_rate": 2.735494580423902e-06, + "loss": 0.6161, + "step": 44334 + }, + { + "epoch": 0.7660872269836881, + "grad_norm": 0.7642658483334284, + "learning_rate": 2.7351099901481815e-06, + "loss": 0.7254, + "step": 44335 + }, + { + "epoch": 0.766104506497097, + "grad_norm": 1.273050663385302, + "learning_rate": 2.734725422626652e-06, + "loss": 0.3355, + "step": 44336 + }, + { + "epoch": 0.766121786010506, + "grad_norm": 1.2559322566455682, + "learning_rate": 2.734340877860512e-06, + "loss": 0.3415, + "step": 44337 + }, + { + "epoch": 0.7661390655239149, + "grad_norm": 1.9027945079084831, + "learning_rate": 2.7339563558509684e-06, + "loss": 0.3032, + "step": 44338 + }, + { + "epoch": 0.7661563450373238, + "grad_norm": 1.1822231445172855, + "learning_rate": 2.7335718565992254e-06, + "loss": 0.4509, + "step": 44339 + }, + { + "epoch": 0.7661736245507327, + "grad_norm": 1.17533468010019, + "learning_rate": 2.733187380106491e-06, + "loss": 0.4533, + "step": 44340 + }, + { + "epoch": 0.7661909040641416, + "grad_norm": 2.0649638608510403, + "learning_rate": 2.732802926373965e-06, + "loss": 0.2684, + "step": 44341 + }, + { + "epoch": 0.7662081835775505, + "grad_norm": 0.9550766523602667, + "learning_rate": 2.732418495402851e-06, + "loss": 0.4428, + "step": 44342 + }, + { + "epoch": 0.7662254630909594, + "grad_norm": 0.8307574411707044, + "learning_rate": 2.7320340871943563e-06, + "loss": 0.2909, + "step": 44343 + }, + { + "epoch": 0.7662427426043683, + "grad_norm": 0.5477303427539408, + "learning_rate": 2.731649701749678e-06, + "loss": 0.7372, + "step": 44344 + }, + { + "epoch": 0.7662600221177772, + "grad_norm": 1.520302236185083, + "learning_rate": 2.7312653390700307e-06, + "loss": 0.2249, + "step": 44345 + }, + { + "epoch": 0.766277301631186, + "grad_norm": 1.3724493726218128, + "learning_rate": 2.73088099915661e-06, + "loss": 0.2616, + "step": 44346 + }, + { + "epoch": 0.7662945811445949, + "grad_norm": 1.6561654156663534, + "learning_rate": 2.730496682010626e-06, + "loss": 0.3131, + "step": 44347 + }, + { + "epoch": 0.7663118606580038, + "grad_norm": 1.2962439490066688, + "learning_rate": 2.7301123876332745e-06, + "loss": 0.8611, + "step": 44348 + }, + { + "epoch": 0.7663291401714127, + "grad_norm": 1.5648573702570303, + "learning_rate": 2.729728116025767e-06, + "loss": 0.3675, + "step": 44349 + }, + { + "epoch": 0.7663464196848216, + "grad_norm": 0.9201838771082582, + "learning_rate": 2.729343867189298e-06, + "loss": 0.8362, + "step": 44350 + }, + { + "epoch": 0.7663636991982306, + "grad_norm": 1.467909018208227, + "learning_rate": 2.728959641125083e-06, + "loss": 0.3607, + "step": 44351 + }, + { + "epoch": 0.7663809787116395, + "grad_norm": 1.3054980989014122, + "learning_rate": 2.7285754378343187e-06, + "loss": 0.5267, + "step": 44352 + }, + { + "epoch": 0.7663982582250484, + "grad_norm": 1.4885901192654931, + "learning_rate": 2.7281912573182044e-06, + "loss": 0.2593, + "step": 44353 + }, + { + "epoch": 0.7664155377384573, + "grad_norm": 1.4156611533006658, + "learning_rate": 2.7278070995779526e-06, + "loss": 0.3564, + "step": 44354 + }, + { + "epoch": 0.7664328172518662, + "grad_norm": 1.262664882816904, + "learning_rate": 2.7274229646147577e-06, + "loss": 0.2699, + "step": 44355 + }, + { + "epoch": 0.7664500967652751, + "grad_norm": 1.3442625065907585, + "learning_rate": 2.7270388524298262e-06, + "loss": 0.3453, + "step": 44356 + }, + { + "epoch": 0.766467376278684, + "grad_norm": 1.6909094972803922, + "learning_rate": 2.7266547630243624e-06, + "loss": 0.3904, + "step": 44357 + }, + { + "epoch": 0.7664846557920929, + "grad_norm": 1.94025050179058, + "learning_rate": 2.726270696399572e-06, + "loss": 0.4953, + "step": 44358 + }, + { + "epoch": 0.7665019353055018, + "grad_norm": 1.6280438655277298, + "learning_rate": 2.725886652556651e-06, + "loss": 0.3332, + "step": 44359 + }, + { + "epoch": 0.7665192148189107, + "grad_norm": 1.4930169397724549, + "learning_rate": 2.7255026314968093e-06, + "loss": 0.288, + "step": 44360 + }, + { + "epoch": 0.7665364943323196, + "grad_norm": 2.42773514585825, + "learning_rate": 2.725118633221243e-06, + "loss": 0.3695, + "step": 44361 + }, + { + "epoch": 0.7665537738457285, + "grad_norm": 1.3254674157097608, + "learning_rate": 2.7247346577311573e-06, + "loss": 0.6905, + "step": 44362 + }, + { + "epoch": 0.7665710533591374, + "grad_norm": 1.2571558749265321, + "learning_rate": 2.7243507050277597e-06, + "loss": 0.3816, + "step": 44363 + }, + { + "epoch": 0.7665883328725464, + "grad_norm": 1.310381664922865, + "learning_rate": 2.7239667751122445e-06, + "loss": 0.4858, + "step": 44364 + }, + { + "epoch": 0.7666056123859553, + "grad_norm": 1.0407287137618633, + "learning_rate": 2.723582867985822e-06, + "loss": 0.3781, + "step": 44365 + }, + { + "epoch": 0.7666228918993642, + "grad_norm": 1.1071372846359309, + "learning_rate": 2.7231989836496885e-06, + "loss": 0.3639, + "step": 44366 + }, + { + "epoch": 0.766640171412773, + "grad_norm": 2.148286235619201, + "learning_rate": 2.722815122105048e-06, + "loss": 0.309, + "step": 44367 + }, + { + "epoch": 0.7666574509261819, + "grad_norm": 1.050770215891749, + "learning_rate": 2.722431283353103e-06, + "loss": 0.9473, + "step": 44368 + }, + { + "epoch": 0.7666747304395908, + "grad_norm": 0.9707696322152939, + "learning_rate": 2.7220474673950603e-06, + "loss": 0.3864, + "step": 44369 + }, + { + "epoch": 0.7666920099529997, + "grad_norm": 1.40745385524639, + "learning_rate": 2.7216636742321144e-06, + "loss": 0.4245, + "step": 44370 + }, + { + "epoch": 0.7667092894664086, + "grad_norm": 1.5049879228861103, + "learning_rate": 2.7212799038654747e-06, + "loss": 0.4911, + "step": 44371 + }, + { + "epoch": 0.7667265689798175, + "grad_norm": 1.1630746554645963, + "learning_rate": 2.7208961562963367e-06, + "loss": 0.2078, + "step": 44372 + }, + { + "epoch": 0.7667438484932264, + "grad_norm": 1.461273172084314, + "learning_rate": 2.7205124315259045e-06, + "loss": 0.2566, + "step": 44373 + }, + { + "epoch": 0.7667611280066353, + "grad_norm": 1.434480866228847, + "learning_rate": 2.7201287295553833e-06, + "loss": 0.2489, + "step": 44374 + }, + { + "epoch": 0.7667784075200442, + "grad_norm": 1.069340254505472, + "learning_rate": 2.71974505038597e-06, + "loss": 0.2157, + "step": 44375 + }, + { + "epoch": 0.7667956870334531, + "grad_norm": 1.218223861044059, + "learning_rate": 2.719361394018871e-06, + "loss": 0.3714, + "step": 44376 + }, + { + "epoch": 0.766812966546862, + "grad_norm": 2.267352497460017, + "learning_rate": 2.7189777604552827e-06, + "loss": 0.4857, + "step": 44377 + }, + { + "epoch": 0.766830246060271, + "grad_norm": 1.567286151024005, + "learning_rate": 2.7185941496964084e-06, + "loss": 0.4192, + "step": 44378 + }, + { + "epoch": 0.7668475255736799, + "grad_norm": 0.9909056663292342, + "learning_rate": 2.7182105617434516e-06, + "loss": 0.2975, + "step": 44379 + }, + { + "epoch": 0.7668648050870888, + "grad_norm": 1.2513528104547567, + "learning_rate": 2.7178269965976158e-06, + "loss": 0.3116, + "step": 44380 + }, + { + "epoch": 0.7668820846004977, + "grad_norm": 1.1521452798112655, + "learning_rate": 2.7174434542600958e-06, + "loss": 0.4215, + "step": 44381 + }, + { + "epoch": 0.7668993641139066, + "grad_norm": 1.512783581942041, + "learning_rate": 2.717059934732099e-06, + "loss": 0.3649, + "step": 44382 + }, + { + "epoch": 0.7669166436273155, + "grad_norm": 0.8738303228353019, + "learning_rate": 2.716676438014825e-06, + "loss": 0.1978, + "step": 44383 + }, + { + "epoch": 0.7669339231407244, + "grad_norm": 1.5468296118904383, + "learning_rate": 2.716292964109467e-06, + "loss": 0.3747, + "step": 44384 + }, + { + "epoch": 0.7669512026541333, + "grad_norm": 1.1512036311672782, + "learning_rate": 2.71590951301724e-06, + "loss": 0.2746, + "step": 44385 + }, + { + "epoch": 0.7669684821675422, + "grad_norm": 1.9956235501371224, + "learning_rate": 2.715526084739334e-06, + "loss": 0.3443, + "step": 44386 + }, + { + "epoch": 0.7669857616809511, + "grad_norm": 2.456366004741073, + "learning_rate": 2.7151426792769577e-06, + "loss": 0.4714, + "step": 44387 + }, + { + "epoch": 0.7670030411943599, + "grad_norm": 0.7555469154131114, + "learning_rate": 2.7147592966313042e-06, + "loss": 0.4865, + "step": 44388 + }, + { + "epoch": 0.7670203207077688, + "grad_norm": 1.1540525382579692, + "learning_rate": 2.7143759368035816e-06, + "loss": 0.225, + "step": 44389 + }, + { + "epoch": 0.7670376002211777, + "grad_norm": 1.2264046848133363, + "learning_rate": 2.713992599794981e-06, + "loss": 0.3298, + "step": 44390 + }, + { + "epoch": 0.7670548797345866, + "grad_norm": 0.7964719787403964, + "learning_rate": 2.7136092856067164e-06, + "loss": 0.4642, + "step": 44391 + }, + { + "epoch": 0.7670721592479955, + "grad_norm": 1.3754341923920868, + "learning_rate": 2.71322599423998e-06, + "loss": 0.4144, + "step": 44392 + }, + { + "epoch": 0.7670894387614045, + "grad_norm": 1.5238499695025278, + "learning_rate": 2.712842725695971e-06, + "loss": 0.2037, + "step": 44393 + }, + { + "epoch": 0.7671067182748134, + "grad_norm": 1.5003898209712367, + "learning_rate": 2.7124594799758954e-06, + "loss": 0.415, + "step": 44394 + }, + { + "epoch": 0.7671239977882223, + "grad_norm": 2.1481765138647635, + "learning_rate": 2.712076257080948e-06, + "loss": 0.4329, + "step": 44395 + }, + { + "epoch": 0.7671412773016312, + "grad_norm": 1.1649768735066908, + "learning_rate": 2.711693057012331e-06, + "loss": 0.4264, + "step": 44396 + }, + { + "epoch": 0.7671585568150401, + "grad_norm": 0.9687882253855425, + "learning_rate": 2.7113098797712457e-06, + "loss": 0.9824, + "step": 44397 + }, + { + "epoch": 0.767175836328449, + "grad_norm": 1.0628055485727645, + "learning_rate": 2.7109267253588947e-06, + "loss": 0.2972, + "step": 44398 + }, + { + "epoch": 0.7671931158418579, + "grad_norm": 2.1589834224010866, + "learning_rate": 2.710543593776471e-06, + "loss": 0.3187, + "step": 44399 + }, + { + "epoch": 0.7672103953552668, + "grad_norm": 1.8160606150749217, + "learning_rate": 2.710160485025184e-06, + "loss": 0.6978, + "step": 44400 + }, + { + "epoch": 0.7672276748686757, + "grad_norm": 1.8181451902400647, + "learning_rate": 2.7097773991062236e-06, + "loss": 0.5748, + "step": 44401 + }, + { + "epoch": 0.7672449543820846, + "grad_norm": 0.799239212903176, + "learning_rate": 2.709394336020795e-06, + "loss": 0.2314, + "step": 44402 + }, + { + "epoch": 0.7672622338954935, + "grad_norm": 1.276642254601118, + "learning_rate": 2.7090112957700997e-06, + "loss": 0.2632, + "step": 44403 + }, + { + "epoch": 0.7672795134089024, + "grad_norm": 1.6465279741517693, + "learning_rate": 2.708628278355333e-06, + "loss": 0.4276, + "step": 44404 + }, + { + "epoch": 0.7672967929223113, + "grad_norm": 1.2139591460304033, + "learning_rate": 2.7082452837776997e-06, + "loss": 0.3395, + "step": 44405 + }, + { + "epoch": 0.7673140724357203, + "grad_norm": 0.9238461800402403, + "learning_rate": 2.707862312038393e-06, + "loss": 0.3921, + "step": 44406 + }, + { + "epoch": 0.7673313519491292, + "grad_norm": 1.5758059863855154, + "learning_rate": 2.7074793631386154e-06, + "loss": 0.4436, + "step": 44407 + }, + { + "epoch": 0.7673486314625381, + "grad_norm": 1.1622475204490725, + "learning_rate": 2.7070964370795673e-06, + "loss": 0.321, + "step": 44408 + }, + { + "epoch": 0.7673659109759469, + "grad_norm": 0.8793036300694826, + "learning_rate": 2.7067135338624508e-06, + "loss": 0.4848, + "step": 44409 + }, + { + "epoch": 0.7673831904893558, + "grad_norm": 1.4601627974143936, + "learning_rate": 2.706330653488457e-06, + "loss": 0.3655, + "step": 44410 + }, + { + "epoch": 0.7674004700027647, + "grad_norm": 2.1373531916936006, + "learning_rate": 2.7059477959587943e-06, + "loss": 0.2557, + "step": 44411 + }, + { + "epoch": 0.7674177495161736, + "grad_norm": 0.8759027032236733, + "learning_rate": 2.705564961274655e-06, + "loss": 0.3026, + "step": 44412 + }, + { + "epoch": 0.7674350290295825, + "grad_norm": 1.7210894312174077, + "learning_rate": 2.7051821494372387e-06, + "loss": 0.3572, + "step": 44413 + }, + { + "epoch": 0.7674523085429914, + "grad_norm": 1.2843391068779402, + "learning_rate": 2.7047993604477507e-06, + "loss": 0.2438, + "step": 44414 + }, + { + "epoch": 0.7674695880564003, + "grad_norm": 0.7948661141268965, + "learning_rate": 2.704416594307382e-06, + "loss": 0.1407, + "step": 44415 + }, + { + "epoch": 0.7674868675698092, + "grad_norm": 1.536530063912796, + "learning_rate": 2.704033851017338e-06, + "loss": 0.3002, + "step": 44416 + }, + { + "epoch": 0.7675041470832181, + "grad_norm": 1.282189519883468, + "learning_rate": 2.7036511305788115e-06, + "loss": 0.3056, + "step": 44417 + }, + { + "epoch": 0.767521426596627, + "grad_norm": 1.009606196563086, + "learning_rate": 2.703268432993005e-06, + "loss": 0.3375, + "step": 44418 + }, + { + "epoch": 0.7675387061100359, + "grad_norm": 1.207153175223577, + "learning_rate": 2.7028857582611146e-06, + "loss": 0.3219, + "step": 44419 + }, + { + "epoch": 0.7675559856234448, + "grad_norm": 1.1477254342629306, + "learning_rate": 2.702503106384344e-06, + "loss": 0.3895, + "step": 44420 + }, + { + "epoch": 0.7675732651368538, + "grad_norm": 1.4429510882063914, + "learning_rate": 2.7021204773638866e-06, + "loss": 0.3547, + "step": 44421 + }, + { + "epoch": 0.7675905446502627, + "grad_norm": 0.9233960229588315, + "learning_rate": 2.7017378712009448e-06, + "loss": 0.3249, + "step": 44422 + }, + { + "epoch": 0.7676078241636716, + "grad_norm": 0.5525819091452341, + "learning_rate": 2.701355287896714e-06, + "loss": 0.7326, + "step": 44423 + }, + { + "epoch": 0.7676251036770805, + "grad_norm": 0.528995581990978, + "learning_rate": 2.700972727452388e-06, + "loss": 0.6541, + "step": 44424 + }, + { + "epoch": 0.7676423831904894, + "grad_norm": 1.045421567165268, + "learning_rate": 2.7005901898691756e-06, + "loss": 0.3219, + "step": 44425 + }, + { + "epoch": 0.7676596627038983, + "grad_norm": 1.624384911961399, + "learning_rate": 2.7002076751482663e-06, + "loss": 0.2735, + "step": 44426 + }, + { + "epoch": 0.7676769422173072, + "grad_norm": 0.8324308540012805, + "learning_rate": 2.699825183290865e-06, + "loss": 0.2304, + "step": 44427 + }, + { + "epoch": 0.7676942217307161, + "grad_norm": 2.007917585339881, + "learning_rate": 2.699442714298163e-06, + "loss": 0.3776, + "step": 44428 + }, + { + "epoch": 0.767711501244125, + "grad_norm": 1.2331238458799278, + "learning_rate": 2.699060268171363e-06, + "loss": 0.3462, + "step": 44429 + }, + { + "epoch": 0.7677287807575338, + "grad_norm": 1.1876280454347865, + "learning_rate": 2.698677844911656e-06, + "loss": 0.2501, + "step": 44430 + }, + { + "epoch": 0.7677460602709427, + "grad_norm": 1.1611914641807135, + "learning_rate": 2.6982954445202502e-06, + "loss": 0.3229, + "step": 44431 + }, + { + "epoch": 0.7677633397843516, + "grad_norm": 1.0886570227126884, + "learning_rate": 2.6979130669983357e-06, + "loss": 0.3244, + "step": 44432 + }, + { + "epoch": 0.7677806192977605, + "grad_norm": 2.0090058573963376, + "learning_rate": 2.6975307123471162e-06, + "loss": 0.4363, + "step": 44433 + }, + { + "epoch": 0.7677978988111694, + "grad_norm": 1.4579448070346406, + "learning_rate": 2.6971483805677845e-06, + "loss": 0.475, + "step": 44434 + }, + { + "epoch": 0.7678151783245784, + "grad_norm": 1.4955772688816051, + "learning_rate": 2.6967660716615353e-06, + "loss": 0.3075, + "step": 44435 + }, + { + "epoch": 0.7678324578379873, + "grad_norm": 0.9704610156016586, + "learning_rate": 2.6963837856295704e-06, + "loss": 0.4363, + "step": 44436 + }, + { + "epoch": 0.7678497373513962, + "grad_norm": 1.1283304030349544, + "learning_rate": 2.6960015224730873e-06, + "loss": 0.4465, + "step": 44437 + }, + { + "epoch": 0.7678670168648051, + "grad_norm": 2.1195381047538335, + "learning_rate": 2.695619282193285e-06, + "loss": 0.4037, + "step": 44438 + }, + { + "epoch": 0.767884296378214, + "grad_norm": 0.9479456386699299, + "learning_rate": 2.695237064791355e-06, + "loss": 0.3883, + "step": 44439 + }, + { + "epoch": 0.7679015758916229, + "grad_norm": 2.307137983943431, + "learning_rate": 2.6948548702685008e-06, + "loss": 0.2733, + "step": 44440 + }, + { + "epoch": 0.7679188554050318, + "grad_norm": 1.2140241389723179, + "learning_rate": 2.6944726986259138e-06, + "loss": 0.2917, + "step": 44441 + }, + { + "epoch": 0.7679361349184407, + "grad_norm": 1.4766937508435631, + "learning_rate": 2.6940905498647928e-06, + "loss": 0.255, + "step": 44442 + }, + { + "epoch": 0.7679534144318496, + "grad_norm": 1.563184624478711, + "learning_rate": 2.6937084239863355e-06, + "loss": 0.412, + "step": 44443 + }, + { + "epoch": 0.7679706939452585, + "grad_norm": 1.8479539132150953, + "learning_rate": 2.6933263209917425e-06, + "loss": 0.4374, + "step": 44444 + }, + { + "epoch": 0.7679879734586674, + "grad_norm": 1.5499035442701905, + "learning_rate": 2.692944240882206e-06, + "loss": 0.1797, + "step": 44445 + }, + { + "epoch": 0.7680052529720763, + "grad_norm": 1.1816950175017216, + "learning_rate": 2.69256218365892e-06, + "loss": 0.3883, + "step": 44446 + }, + { + "epoch": 0.7680225324854852, + "grad_norm": 1.647908452832108, + "learning_rate": 2.692180149323086e-06, + "loss": 0.4821, + "step": 44447 + }, + { + "epoch": 0.7680398119988942, + "grad_norm": 1.3253583061344605, + "learning_rate": 2.6917981378758974e-06, + "loss": 0.2976, + "step": 44448 + }, + { + "epoch": 0.7680570915123031, + "grad_norm": 0.8405354298250772, + "learning_rate": 2.6914161493185566e-06, + "loss": 0.5286, + "step": 44449 + }, + { + "epoch": 0.768074371025712, + "grad_norm": 1.4732811529090157, + "learning_rate": 2.6910341836522524e-06, + "loss": 0.4582, + "step": 44450 + }, + { + "epoch": 0.7680916505391209, + "grad_norm": 1.4063706872403472, + "learning_rate": 2.6906522408781876e-06, + "loss": 0.5239, + "step": 44451 + }, + { + "epoch": 0.7681089300525297, + "grad_norm": 1.3923637244413722, + "learning_rate": 2.6902703209975535e-06, + "loss": 0.3791, + "step": 44452 + }, + { + "epoch": 0.7681262095659386, + "grad_norm": 0.7866559488569062, + "learning_rate": 2.6898884240115474e-06, + "loss": 0.7179, + "step": 44453 + }, + { + "epoch": 0.7681434890793475, + "grad_norm": 1.4280496981163355, + "learning_rate": 2.689506549921369e-06, + "loss": 0.5221, + "step": 44454 + }, + { + "epoch": 0.7681607685927564, + "grad_norm": 1.4898129357453072, + "learning_rate": 2.6891246987282093e-06, + "loss": 0.4516, + "step": 44455 + }, + { + "epoch": 0.7681780481061653, + "grad_norm": 1.072249690697324, + "learning_rate": 2.6887428704332697e-06, + "loss": 0.4017, + "step": 44456 + }, + { + "epoch": 0.7681953276195742, + "grad_norm": 1.3407822351687513, + "learning_rate": 2.6883610650377388e-06, + "loss": 0.3569, + "step": 44457 + }, + { + "epoch": 0.7682126071329831, + "grad_norm": 0.7312511734357469, + "learning_rate": 2.6879792825428176e-06, + "loss": 0.7071, + "step": 44458 + }, + { + "epoch": 0.768229886646392, + "grad_norm": 1.0041402639450594, + "learning_rate": 2.687597522949701e-06, + "loss": 0.3906, + "step": 44459 + }, + { + "epoch": 0.7682471661598009, + "grad_norm": 1.205988923394635, + "learning_rate": 2.6872157862595882e-06, + "loss": 0.3927, + "step": 44460 + }, + { + "epoch": 0.7682644456732098, + "grad_norm": 0.8059390845026546, + "learning_rate": 2.6868340724736674e-06, + "loss": 0.5874, + "step": 44461 + }, + { + "epoch": 0.7682817251866187, + "grad_norm": 0.7796580370850213, + "learning_rate": 2.686452381593142e-06, + "loss": 0.727, + "step": 44462 + }, + { + "epoch": 0.7682990047000277, + "grad_norm": 1.788908780525308, + "learning_rate": 2.6860707136192e-06, + "loss": 0.4037, + "step": 44463 + }, + { + "epoch": 0.7683162842134366, + "grad_norm": 1.311251417777033, + "learning_rate": 2.6856890685530402e-06, + "loss": 0.4338, + "step": 44464 + }, + { + "epoch": 0.7683335637268455, + "grad_norm": 1.243819010755877, + "learning_rate": 2.6853074463958618e-06, + "loss": 0.3168, + "step": 44465 + }, + { + "epoch": 0.7683508432402544, + "grad_norm": 1.938775915734647, + "learning_rate": 2.6849258471488527e-06, + "loss": 0.2852, + "step": 44466 + }, + { + "epoch": 0.7683681227536633, + "grad_norm": 1.7542398566830135, + "learning_rate": 2.684544270813214e-06, + "loss": 0.2921, + "step": 44467 + }, + { + "epoch": 0.7683854022670722, + "grad_norm": 1.4363794513386032, + "learning_rate": 2.6841627173901373e-06, + "loss": 0.4123, + "step": 44468 + }, + { + "epoch": 0.7684026817804811, + "grad_norm": 1.1057613141571239, + "learning_rate": 2.6837811868808204e-06, + "loss": 0.3198, + "step": 44469 + }, + { + "epoch": 0.76841996129389, + "grad_norm": 1.937410745618381, + "learning_rate": 2.6833996792864524e-06, + "loss": 0.3119, + "step": 44470 + }, + { + "epoch": 0.7684372408072989, + "grad_norm": 0.8237489842734357, + "learning_rate": 2.683018194608238e-06, + "loss": 0.3287, + "step": 44471 + }, + { + "epoch": 0.7684545203207078, + "grad_norm": 1.0857554012645627, + "learning_rate": 2.6826367328473635e-06, + "loss": 0.386, + "step": 44472 + }, + { + "epoch": 0.7684717998341166, + "grad_norm": 1.309383249076359, + "learning_rate": 2.6822552940050296e-06, + "loss": 0.3091, + "step": 44473 + }, + { + "epoch": 0.7684890793475255, + "grad_norm": 0.7377821510203859, + "learning_rate": 2.681873878082426e-06, + "loss": 0.4617, + "step": 44474 + }, + { + "epoch": 0.7685063588609344, + "grad_norm": 1.1735220491183265, + "learning_rate": 2.681492485080752e-06, + "loss": 0.4486, + "step": 44475 + }, + { + "epoch": 0.7685236383743433, + "grad_norm": 1.156873081526294, + "learning_rate": 2.681111115001197e-06, + "loss": 0.3415, + "step": 44476 + }, + { + "epoch": 0.7685409178877523, + "grad_norm": 0.670776857604669, + "learning_rate": 2.6807297678449574e-06, + "loss": 0.2648, + "step": 44477 + }, + { + "epoch": 0.7685581974011612, + "grad_norm": 1.5370278963654598, + "learning_rate": 2.680348443613232e-06, + "loss": 0.263, + "step": 44478 + }, + { + "epoch": 0.7685754769145701, + "grad_norm": 1.6118697619532403, + "learning_rate": 2.6799671423072093e-06, + "loss": 0.3353, + "step": 44479 + }, + { + "epoch": 0.768592756427979, + "grad_norm": 1.8069025154239746, + "learning_rate": 2.679585863928088e-06, + "loss": 0.3713, + "step": 44480 + }, + { + "epoch": 0.7686100359413879, + "grad_norm": 1.525119103472311, + "learning_rate": 2.6792046084770573e-06, + "loss": 0.2309, + "step": 44481 + }, + { + "epoch": 0.7686273154547968, + "grad_norm": 1.1773884875671219, + "learning_rate": 2.678823375955314e-06, + "loss": 0.9038, + "step": 44482 + }, + { + "epoch": 0.7686445949682057, + "grad_norm": 1.1289399357924903, + "learning_rate": 2.6784421663640513e-06, + "loss": 0.4589, + "step": 44483 + }, + { + "epoch": 0.7686618744816146, + "grad_norm": 0.8116688538000316, + "learning_rate": 2.6780609797044687e-06, + "loss": 0.3179, + "step": 44484 + }, + { + "epoch": 0.7686791539950235, + "grad_norm": 1.2253238440233438, + "learning_rate": 2.677679815977755e-06, + "loss": 0.3132, + "step": 44485 + }, + { + "epoch": 0.7686964335084324, + "grad_norm": 0.8714070284740884, + "learning_rate": 2.6772986751851014e-06, + "loss": 0.1337, + "step": 44486 + }, + { + "epoch": 0.7687137130218413, + "grad_norm": 1.4802939002137594, + "learning_rate": 2.676917557327705e-06, + "loss": 0.3068, + "step": 44487 + }, + { + "epoch": 0.7687309925352502, + "grad_norm": 1.8876308776231863, + "learning_rate": 2.67653646240676e-06, + "loss": 0.3025, + "step": 44488 + }, + { + "epoch": 0.7687482720486591, + "grad_norm": 1.0977688934655214, + "learning_rate": 2.6761553904234615e-06, + "loss": 0.3791, + "step": 44489 + }, + { + "epoch": 0.768765551562068, + "grad_norm": 1.6291828057263733, + "learning_rate": 2.6757743413789984e-06, + "loss": 0.5506, + "step": 44490 + }, + { + "epoch": 0.768782831075477, + "grad_norm": 1.5919358802471595, + "learning_rate": 2.6753933152745694e-06, + "loss": 0.3851, + "step": 44491 + }, + { + "epoch": 0.7688001105888859, + "grad_norm": 1.1190568273483807, + "learning_rate": 2.6750123121113624e-06, + "loss": 0.202, + "step": 44492 + }, + { + "epoch": 0.7688173901022948, + "grad_norm": 0.9943550267130314, + "learning_rate": 2.674631331890574e-06, + "loss": 0.3521, + "step": 44493 + }, + { + "epoch": 0.7688346696157036, + "grad_norm": 0.9845584572760544, + "learning_rate": 2.674250374613395e-06, + "loss": 0.4604, + "step": 44494 + }, + { + "epoch": 0.7688519491291125, + "grad_norm": 0.9580747632178244, + "learning_rate": 2.673869440281026e-06, + "loss": 0.303, + "step": 44495 + }, + { + "epoch": 0.7688692286425214, + "grad_norm": 1.1550728000542823, + "learning_rate": 2.6734885288946534e-06, + "loss": 0.3946, + "step": 44496 + }, + { + "epoch": 0.7688865081559303, + "grad_norm": 1.4135106446223267, + "learning_rate": 2.673107640455469e-06, + "loss": 0.2847, + "step": 44497 + }, + { + "epoch": 0.7689037876693392, + "grad_norm": 1.5963176604677507, + "learning_rate": 2.672726774964668e-06, + "loss": 0.4185, + "step": 44498 + }, + { + "epoch": 0.7689210671827481, + "grad_norm": 0.7740246851811204, + "learning_rate": 2.6723459324234436e-06, + "loss": 0.4008, + "step": 44499 + }, + { + "epoch": 0.768938346696157, + "grad_norm": 1.6998542808582333, + "learning_rate": 2.6719651128329913e-06, + "loss": 0.3346, + "step": 44500 + }, + { + "epoch": 0.7689556262095659, + "grad_norm": 1.1325183465631052, + "learning_rate": 2.671584316194499e-06, + "loss": 0.3007, + "step": 44501 + }, + { + "epoch": 0.7689729057229748, + "grad_norm": 1.3038276209387507, + "learning_rate": 2.6712035425091632e-06, + "loss": 0.5093, + "step": 44502 + }, + { + "epoch": 0.7689901852363837, + "grad_norm": 0.6559771633411284, + "learning_rate": 2.6708227917781727e-06, + "loss": 0.5208, + "step": 44503 + }, + { + "epoch": 0.7690074647497926, + "grad_norm": 1.4389376068575068, + "learning_rate": 2.6704420640027228e-06, + "loss": 0.3247, + "step": 44504 + }, + { + "epoch": 0.7690247442632016, + "grad_norm": 1.6898095546361567, + "learning_rate": 2.6700613591840054e-06, + "loss": 0.4101, + "step": 44505 + }, + { + "epoch": 0.7690420237766105, + "grad_norm": 1.262764352139926, + "learning_rate": 2.6696806773232144e-06, + "loss": 0.569, + "step": 44506 + }, + { + "epoch": 0.7690593032900194, + "grad_norm": 0.6564416396231134, + "learning_rate": 2.6693000184215423e-06, + "loss": 0.6021, + "step": 44507 + }, + { + "epoch": 0.7690765828034283, + "grad_norm": 1.3872960711727942, + "learning_rate": 2.668919382480175e-06, + "loss": 0.2371, + "step": 44508 + }, + { + "epoch": 0.7690938623168372, + "grad_norm": 1.5303336786132646, + "learning_rate": 2.668538769500314e-06, + "loss": 0.1927, + "step": 44509 + }, + { + "epoch": 0.7691111418302461, + "grad_norm": 1.2177449536222458, + "learning_rate": 2.668158179483139e-06, + "loss": 0.3423, + "step": 44510 + }, + { + "epoch": 0.769128421343655, + "grad_norm": 0.8775935362516789, + "learning_rate": 2.667777612429857e-06, + "loss": 0.2904, + "step": 44511 + }, + { + "epoch": 0.7691457008570639, + "grad_norm": 1.6882994472568127, + "learning_rate": 2.6673970683416495e-06, + "loss": 0.2986, + "step": 44512 + }, + { + "epoch": 0.7691629803704728, + "grad_norm": 1.5421201691190864, + "learning_rate": 2.6670165472197153e-06, + "loss": 0.4701, + "step": 44513 + }, + { + "epoch": 0.7691802598838817, + "grad_norm": 1.6711357223029186, + "learning_rate": 2.6666360490652387e-06, + "loss": 0.4808, + "step": 44514 + }, + { + "epoch": 0.7691975393972905, + "grad_norm": 1.8614826435669818, + "learning_rate": 2.6662555738794183e-06, + "loss": 0.402, + "step": 44515 + }, + { + "epoch": 0.7692148189106994, + "grad_norm": 1.7396991821270928, + "learning_rate": 2.6658751216634403e-06, + "loss": 0.4357, + "step": 44516 + }, + { + "epoch": 0.7692320984241083, + "grad_norm": 1.4040404009126373, + "learning_rate": 2.6654946924184978e-06, + "loss": 0.5795, + "step": 44517 + }, + { + "epoch": 0.7692493779375172, + "grad_norm": 1.262778000749727, + "learning_rate": 2.6651142861457867e-06, + "loss": 0.3825, + "step": 44518 + }, + { + "epoch": 0.7692666574509262, + "grad_norm": 0.701782276199537, + "learning_rate": 2.6647339028464926e-06, + "loss": 0.2755, + "step": 44519 + }, + { + "epoch": 0.7692839369643351, + "grad_norm": 1.652214749525274, + "learning_rate": 2.6643535425218126e-06, + "loss": 0.4255, + "step": 44520 + }, + { + "epoch": 0.769301216477744, + "grad_norm": 1.779633600325561, + "learning_rate": 2.663973205172932e-06, + "loss": 0.2047, + "step": 44521 + }, + { + "epoch": 0.7693184959911529, + "grad_norm": 1.6479763644278247, + "learning_rate": 2.6635928908010433e-06, + "loss": 0.247, + "step": 44522 + }, + { + "epoch": 0.7693357755045618, + "grad_norm": 1.6195192938602103, + "learning_rate": 2.66321259940734e-06, + "loss": 0.2683, + "step": 44523 + }, + { + "epoch": 0.7693530550179707, + "grad_norm": 1.3738772228476657, + "learning_rate": 2.662832330993016e-06, + "loss": 0.2306, + "step": 44524 + }, + { + "epoch": 0.7693703345313796, + "grad_norm": 1.09370660004685, + "learning_rate": 2.662452085559255e-06, + "loss": 0.2398, + "step": 44525 + }, + { + "epoch": 0.7693876140447885, + "grad_norm": 0.7464042170871298, + "learning_rate": 2.662071863107255e-06, + "loss": 0.6617, + "step": 44526 + }, + { + "epoch": 0.7694048935581974, + "grad_norm": 2.1616437850288763, + "learning_rate": 2.6616916636382006e-06, + "loss": 0.3166, + "step": 44527 + }, + { + "epoch": 0.7694221730716063, + "grad_norm": 0.9093901434573208, + "learning_rate": 2.661311487153285e-06, + "loss": 0.3161, + "step": 44528 + }, + { + "epoch": 0.7694394525850152, + "grad_norm": 1.211760341894606, + "learning_rate": 2.6609313336537035e-06, + "loss": 0.2461, + "step": 44529 + }, + { + "epoch": 0.7694567320984241, + "grad_norm": 1.9641164380254281, + "learning_rate": 2.6605512031406398e-06, + "loss": 0.3325, + "step": 44530 + }, + { + "epoch": 0.769474011611833, + "grad_norm": 1.4026121568548733, + "learning_rate": 2.6601710956152905e-06, + "loss": 0.2983, + "step": 44531 + }, + { + "epoch": 0.769491291125242, + "grad_norm": 0.9174917684279289, + "learning_rate": 2.6597910110788393e-06, + "loss": 0.3601, + "step": 44532 + }, + { + "epoch": 0.7695085706386509, + "grad_norm": 1.127539870611652, + "learning_rate": 2.6594109495324806e-06, + "loss": 0.3922, + "step": 44533 + }, + { + "epoch": 0.7695258501520598, + "grad_norm": 1.7265394326574441, + "learning_rate": 2.659030910977405e-06, + "loss": 0.3359, + "step": 44534 + }, + { + "epoch": 0.7695431296654687, + "grad_norm": 1.2756039756621962, + "learning_rate": 2.658650895414806e-06, + "loss": 0.4264, + "step": 44535 + }, + { + "epoch": 0.7695604091788775, + "grad_norm": 1.3827731775112788, + "learning_rate": 2.6582709028458697e-06, + "loss": 0.3059, + "step": 44536 + }, + { + "epoch": 0.7695776886922864, + "grad_norm": 0.8291978200572957, + "learning_rate": 2.657890933271784e-06, + "loss": 0.2817, + "step": 44537 + }, + { + "epoch": 0.7695949682056953, + "grad_norm": 1.875351200683047, + "learning_rate": 2.6575109866937422e-06, + "loss": 0.4339, + "step": 44538 + }, + { + "epoch": 0.7696122477191042, + "grad_norm": 1.1830788054367956, + "learning_rate": 2.6571310631129333e-06, + "loss": 0.4176, + "step": 44539 + }, + { + "epoch": 0.7696295272325131, + "grad_norm": 1.008981988615141, + "learning_rate": 2.6567511625305507e-06, + "loss": 0.271, + "step": 44540 + }, + { + "epoch": 0.769646806745922, + "grad_norm": 2.1393601345856244, + "learning_rate": 2.6563712849477796e-06, + "loss": 0.2799, + "step": 44541 + }, + { + "epoch": 0.7696640862593309, + "grad_norm": 1.2826601806532354, + "learning_rate": 2.655991430365814e-06, + "loss": 0.3638, + "step": 44542 + }, + { + "epoch": 0.7696813657727398, + "grad_norm": 0.8842518661838961, + "learning_rate": 2.655611598785839e-06, + "loss": 0.7396, + "step": 44543 + }, + { + "epoch": 0.7696986452861487, + "grad_norm": 1.156479611217523, + "learning_rate": 2.6552317902090464e-06, + "loss": 0.3262, + "step": 44544 + }, + { + "epoch": 0.7697159247995576, + "grad_norm": 1.357521934819317, + "learning_rate": 2.6548520046366255e-06, + "loss": 0.3858, + "step": 44545 + }, + { + "epoch": 0.7697332043129665, + "grad_norm": 1.1530038054765037, + "learning_rate": 2.6544722420697697e-06, + "loss": 0.22, + "step": 44546 + }, + { + "epoch": 0.7697504838263755, + "grad_norm": 0.9282585359116411, + "learning_rate": 2.6540925025096655e-06, + "loss": 0.7323, + "step": 44547 + }, + { + "epoch": 0.7697677633397844, + "grad_norm": 1.5221214619150754, + "learning_rate": 2.6537127859574985e-06, + "loss": 0.3029, + "step": 44548 + }, + { + "epoch": 0.7697850428531933, + "grad_norm": 1.151049620610693, + "learning_rate": 2.653333092414464e-06, + "loss": 0.4099, + "step": 44549 + }, + { + "epoch": 0.7698023223666022, + "grad_norm": 1.391434337043155, + "learning_rate": 2.6529534218817433e-06, + "loss": 0.3984, + "step": 44550 + }, + { + "epoch": 0.7698196018800111, + "grad_norm": 1.0489223960894343, + "learning_rate": 2.6525737743605364e-06, + "loss": 0.2011, + "step": 44551 + }, + { + "epoch": 0.76983688139342, + "grad_norm": 1.6281483992647674, + "learning_rate": 2.6521941498520233e-06, + "loss": 0.5308, + "step": 44552 + }, + { + "epoch": 0.7698541609068289, + "grad_norm": 1.1555620060069371, + "learning_rate": 2.6518145483573996e-06, + "loss": 0.3631, + "step": 44553 + }, + { + "epoch": 0.7698714404202378, + "grad_norm": 0.9324164616244217, + "learning_rate": 2.6514349698778495e-06, + "loss": 0.3962, + "step": 44554 + }, + { + "epoch": 0.7698887199336467, + "grad_norm": 1.1878618381439001, + "learning_rate": 2.651055414414566e-06, + "loss": 0.5392, + "step": 44555 + }, + { + "epoch": 0.7699059994470556, + "grad_norm": 1.1494486970795967, + "learning_rate": 2.6506758819687316e-06, + "loss": 0.4594, + "step": 44556 + }, + { + "epoch": 0.7699232789604644, + "grad_norm": 1.0292963381785813, + "learning_rate": 2.6502963725415397e-06, + "loss": 0.4298, + "step": 44557 + }, + { + "epoch": 0.7699405584738733, + "grad_norm": 1.3135861120485146, + "learning_rate": 2.6499168861341805e-06, + "loss": 0.4078, + "step": 44558 + }, + { + "epoch": 0.7699578379872822, + "grad_norm": 1.3188401044100653, + "learning_rate": 2.6495374227478376e-06, + "loss": 0.4943, + "step": 44559 + }, + { + "epoch": 0.7699751175006911, + "grad_norm": 1.178140981340342, + "learning_rate": 2.6491579823837053e-06, + "loss": 0.2789, + "step": 44560 + }, + { + "epoch": 0.7699923970141, + "grad_norm": 1.1184896233386152, + "learning_rate": 2.6487785650429653e-06, + "loss": 0.3132, + "step": 44561 + }, + { + "epoch": 0.770009676527509, + "grad_norm": 1.2056266896933194, + "learning_rate": 2.64839917072681e-06, + "loss": 0.2789, + "step": 44562 + }, + { + "epoch": 0.7700269560409179, + "grad_norm": 1.8435556797760564, + "learning_rate": 2.6480197994364263e-06, + "loss": 0.3967, + "step": 44563 + }, + { + "epoch": 0.7700442355543268, + "grad_norm": 1.1302424463784808, + "learning_rate": 2.647640451173008e-06, + "loss": 0.4334, + "step": 44564 + }, + { + "epoch": 0.7700615150677357, + "grad_norm": 0.650577113272407, + "learning_rate": 2.6472611259377345e-06, + "loss": 0.6688, + "step": 44565 + }, + { + "epoch": 0.7700787945811446, + "grad_norm": 1.3684700726202044, + "learning_rate": 2.6468818237318007e-06, + "loss": 0.287, + "step": 44566 + }, + { + "epoch": 0.7700960740945535, + "grad_norm": 0.8026724479567664, + "learning_rate": 2.6465025445563887e-06, + "loss": 0.2707, + "step": 44567 + }, + { + "epoch": 0.7701133536079624, + "grad_norm": 1.470149723523724, + "learning_rate": 2.6461232884126907e-06, + "loss": 0.4375, + "step": 44568 + }, + { + "epoch": 0.7701306331213713, + "grad_norm": 1.083729623792733, + "learning_rate": 2.6457440553018954e-06, + "loss": 0.374, + "step": 44569 + }, + { + "epoch": 0.7701479126347802, + "grad_norm": 1.4313717750249713, + "learning_rate": 2.645364845225186e-06, + "loss": 0.4247, + "step": 44570 + }, + { + "epoch": 0.7701651921481891, + "grad_norm": 0.912953505067597, + "learning_rate": 2.644985658183756e-06, + "loss": 0.1639, + "step": 44571 + }, + { + "epoch": 0.770182471661598, + "grad_norm": 0.8831470445979199, + "learning_rate": 2.6446064941787863e-06, + "loss": 0.39, + "step": 44572 + }, + { + "epoch": 0.7701997511750069, + "grad_norm": 2.4020969429750867, + "learning_rate": 2.644227353211468e-06, + "loss": 0.2993, + "step": 44573 + }, + { + "epoch": 0.7702170306884158, + "grad_norm": 1.6645078878496677, + "learning_rate": 2.6438482352829896e-06, + "loss": 0.422, + "step": 44574 + }, + { + "epoch": 0.7702343102018248, + "grad_norm": 0.98950267969912, + "learning_rate": 2.64346914039454e-06, + "loss": 0.254, + "step": 44575 + }, + { + "epoch": 0.7702515897152337, + "grad_norm": 0.9877872994014215, + "learning_rate": 2.6430900685473016e-06, + "loss": 0.5017, + "step": 44576 + }, + { + "epoch": 0.7702688692286426, + "grad_norm": 1.2609989173355347, + "learning_rate": 2.6427110197424667e-06, + "loss": 0.4127, + "step": 44577 + }, + { + "epoch": 0.7702861487420514, + "grad_norm": 1.198738477034918, + "learning_rate": 2.6423319939812174e-06, + "loss": 0.4315, + "step": 44578 + }, + { + "epoch": 0.7703034282554603, + "grad_norm": 1.1783768272137851, + "learning_rate": 2.6419529912647424e-06, + "loss": 0.3095, + "step": 44579 + }, + { + "epoch": 0.7703207077688692, + "grad_norm": 1.1385988351104548, + "learning_rate": 2.6415740115942344e-06, + "loss": 0.1911, + "step": 44580 + }, + { + "epoch": 0.7703379872822781, + "grad_norm": 1.3710003504072379, + "learning_rate": 2.641195054970872e-06, + "loss": 0.4648, + "step": 44581 + }, + { + "epoch": 0.770355266795687, + "grad_norm": 0.7696072128622674, + "learning_rate": 2.6408161213958483e-06, + "loss": 0.6703, + "step": 44582 + }, + { + "epoch": 0.7703725463090959, + "grad_norm": 1.8096368867906785, + "learning_rate": 2.6404372108703457e-06, + "loss": 0.3656, + "step": 44583 + }, + { + "epoch": 0.7703898258225048, + "grad_norm": 1.4051187940203995, + "learning_rate": 2.640058323395552e-06, + "loss": 0.4904, + "step": 44584 + }, + { + "epoch": 0.7704071053359137, + "grad_norm": 1.3228326357139597, + "learning_rate": 2.639679458972657e-06, + "loss": 0.4496, + "step": 44585 + }, + { + "epoch": 0.7704243848493226, + "grad_norm": 1.779286714226883, + "learning_rate": 2.6393006176028468e-06, + "loss": 0.4209, + "step": 44586 + }, + { + "epoch": 0.7704416643627315, + "grad_norm": 1.2824689648036438, + "learning_rate": 2.638921799287304e-06, + "loss": 0.3468, + "step": 44587 + }, + { + "epoch": 0.7704589438761404, + "grad_norm": 1.3659801471196003, + "learning_rate": 2.6385430040272198e-06, + "loss": 0.5387, + "step": 44588 + }, + { + "epoch": 0.7704762233895494, + "grad_norm": 1.0665299692324022, + "learning_rate": 2.638164231823779e-06, + "loss": 0.5051, + "step": 44589 + }, + { + "epoch": 0.7704935029029583, + "grad_norm": 1.8742167806038, + "learning_rate": 2.6377854826781613e-06, + "loss": 0.468, + "step": 44590 + }, + { + "epoch": 0.7705107824163672, + "grad_norm": 1.5803412618600836, + "learning_rate": 2.637406756591565e-06, + "loss": 0.2278, + "step": 44591 + }, + { + "epoch": 0.7705280619297761, + "grad_norm": 1.72189319141456, + "learning_rate": 2.637028053565167e-06, + "loss": 0.478, + "step": 44592 + }, + { + "epoch": 0.770545341443185, + "grad_norm": 0.7375280809319337, + "learning_rate": 2.63664937360016e-06, + "loss": 0.5206, + "step": 44593 + }, + { + "epoch": 0.7705626209565939, + "grad_norm": 1.9211080725392187, + "learning_rate": 2.636270716697723e-06, + "loss": 0.32, + "step": 44594 + }, + { + "epoch": 0.7705799004700028, + "grad_norm": 1.5704253332882891, + "learning_rate": 2.6358920828590497e-06, + "loss": 0.3604, + "step": 44595 + }, + { + "epoch": 0.7705971799834117, + "grad_norm": 1.4287352546942829, + "learning_rate": 2.635513472085318e-06, + "loss": 0.2976, + "step": 44596 + }, + { + "epoch": 0.7706144594968206, + "grad_norm": 1.0492807573795198, + "learning_rate": 2.635134884377719e-06, + "loss": 0.3999, + "step": 44597 + }, + { + "epoch": 0.7706317390102295, + "grad_norm": 2.2893692890174666, + "learning_rate": 2.6347563197374394e-06, + "loss": 0.1853, + "step": 44598 + }, + { + "epoch": 0.7706490185236384, + "grad_norm": 1.1543389085631404, + "learning_rate": 2.63437777816566e-06, + "loss": 0.2843, + "step": 44599 + }, + { + "epoch": 0.7706662980370472, + "grad_norm": 1.4143692813520905, + "learning_rate": 2.633999259663572e-06, + "loss": 0.3055, + "step": 44600 + }, + { + "epoch": 0.7706835775504561, + "grad_norm": 1.1585503418750798, + "learning_rate": 2.633620764232355e-06, + "loss": 0.4471, + "step": 44601 + }, + { + "epoch": 0.770700857063865, + "grad_norm": 1.4480579490104062, + "learning_rate": 2.633242291873198e-06, + "loss": 0.2401, + "step": 44602 + }, + { + "epoch": 0.770718136577274, + "grad_norm": 0.716327785285696, + "learning_rate": 2.6328638425872866e-06, + "loss": 0.1172, + "step": 44603 + }, + { + "epoch": 0.7707354160906829, + "grad_norm": 0.6195295754037637, + "learning_rate": 2.632485416375807e-06, + "loss": 0.85, + "step": 44604 + }, + { + "epoch": 0.7707526956040918, + "grad_norm": 1.4284901608650393, + "learning_rate": 2.6321070132399417e-06, + "loss": 0.3301, + "step": 44605 + }, + { + "epoch": 0.7707699751175007, + "grad_norm": 1.768297983101656, + "learning_rate": 2.631728633180879e-06, + "loss": 0.4573, + "step": 44606 + }, + { + "epoch": 0.7707872546309096, + "grad_norm": 1.450471258371243, + "learning_rate": 2.631350276199799e-06, + "loss": 0.4464, + "step": 44607 + }, + { + "epoch": 0.7708045341443185, + "grad_norm": 1.746848255333964, + "learning_rate": 2.630971942297891e-06, + "loss": 0.5542, + "step": 44608 + }, + { + "epoch": 0.7708218136577274, + "grad_norm": 1.4284842226151209, + "learning_rate": 2.6305936314763414e-06, + "loss": 0.283, + "step": 44609 + }, + { + "epoch": 0.7708390931711363, + "grad_norm": 1.1878678958803148, + "learning_rate": 2.630215343736329e-06, + "loss": 0.3284, + "step": 44610 + }, + { + "epoch": 0.7708563726845452, + "grad_norm": 1.068721102029815, + "learning_rate": 2.629837079079046e-06, + "loss": 0.4568, + "step": 44611 + }, + { + "epoch": 0.7708736521979541, + "grad_norm": 1.4171675171723443, + "learning_rate": 2.6294588375056694e-06, + "loss": 0.4446, + "step": 44612 + }, + { + "epoch": 0.770890931711363, + "grad_norm": 1.47304420788482, + "learning_rate": 2.629080619017389e-06, + "loss": 0.3556, + "step": 44613 + }, + { + "epoch": 0.7709082112247719, + "grad_norm": 1.324990099342215, + "learning_rate": 2.628702423615388e-06, + "loss": 0.2563, + "step": 44614 + }, + { + "epoch": 0.7709254907381808, + "grad_norm": 1.2265585989324364, + "learning_rate": 2.6283242513008545e-06, + "loss": 0.4381, + "step": 44615 + }, + { + "epoch": 0.7709427702515897, + "grad_norm": 1.2860616944289376, + "learning_rate": 2.6279461020749654e-06, + "loss": 0.2344, + "step": 44616 + }, + { + "epoch": 0.7709600497649987, + "grad_norm": 0.9987052053167971, + "learning_rate": 2.627567975938914e-06, + "loss": 0.4866, + "step": 44617 + }, + { + "epoch": 0.7709773292784076, + "grad_norm": 1.7599233390213391, + "learning_rate": 2.627189872893876e-06, + "loss": 0.4579, + "step": 44618 + }, + { + "epoch": 0.7709946087918165, + "grad_norm": 1.3073471826894223, + "learning_rate": 2.6268117929410396e-06, + "loss": 0.4604, + "step": 44619 + }, + { + "epoch": 0.7710118883052254, + "grad_norm": 1.270630939640647, + "learning_rate": 2.6264337360815917e-06, + "loss": 0.3772, + "step": 44620 + }, + { + "epoch": 0.7710291678186342, + "grad_norm": 1.3812621794496882, + "learning_rate": 2.626055702316712e-06, + "loss": 0.3738, + "step": 44621 + }, + { + "epoch": 0.7710464473320431, + "grad_norm": 1.5737755346886482, + "learning_rate": 2.625677691647588e-06, + "loss": 0.4904, + "step": 44622 + }, + { + "epoch": 0.771063726845452, + "grad_norm": 1.5676892350723868, + "learning_rate": 2.6252997040753993e-06, + "loss": 0.3786, + "step": 44623 + }, + { + "epoch": 0.7710810063588609, + "grad_norm": 0.9473857815244956, + "learning_rate": 2.6249217396013327e-06, + "loss": 0.636, + "step": 44624 + }, + { + "epoch": 0.7710982858722698, + "grad_norm": 1.3956651810952758, + "learning_rate": 2.624543798226571e-06, + "loss": 0.3226, + "step": 44625 + }, + { + "epoch": 0.7711155653856787, + "grad_norm": 1.1529377895314292, + "learning_rate": 2.624165879952303e-06, + "loss": 0.4047, + "step": 44626 + }, + { + "epoch": 0.7711328448990876, + "grad_norm": 0.9079509785668581, + "learning_rate": 2.6237879847797034e-06, + "loss": 0.3043, + "step": 44627 + }, + { + "epoch": 0.7711501244124965, + "grad_norm": 2.7807408748192213, + "learning_rate": 2.6234101127099644e-06, + "loss": 0.4797, + "step": 44628 + }, + { + "epoch": 0.7711674039259054, + "grad_norm": 1.6106099742516373, + "learning_rate": 2.623032263744265e-06, + "loss": 0.2924, + "step": 44629 + }, + { + "epoch": 0.7711846834393143, + "grad_norm": 1.1438075746461853, + "learning_rate": 2.6226544378837837e-06, + "loss": 0.2541, + "step": 44630 + }, + { + "epoch": 0.7712019629527233, + "grad_norm": 1.4615001802938348, + "learning_rate": 2.6222766351297145e-06, + "loss": 0.4692, + "step": 44631 + }, + { + "epoch": 0.7712192424661322, + "grad_norm": 1.0077899550094085, + "learning_rate": 2.6218988554832336e-06, + "loss": 0.4271, + "step": 44632 + }, + { + "epoch": 0.7712365219795411, + "grad_norm": 1.3944269369789706, + "learning_rate": 2.621521098945529e-06, + "loss": 0.4205, + "step": 44633 + }, + { + "epoch": 0.77125380149295, + "grad_norm": 1.0298762161263117, + "learning_rate": 2.6211433655177775e-06, + "loss": 0.4389, + "step": 44634 + }, + { + "epoch": 0.7712710810063589, + "grad_norm": 0.8931404831251971, + "learning_rate": 2.6207656552011686e-06, + "loss": 0.2552, + "step": 44635 + }, + { + "epoch": 0.7712883605197678, + "grad_norm": 0.9477327442819318, + "learning_rate": 2.6203879679968803e-06, + "loss": 0.4335, + "step": 44636 + }, + { + "epoch": 0.7713056400331767, + "grad_norm": 1.618635309030001, + "learning_rate": 2.6200103039060975e-06, + "loss": 0.4926, + "step": 44637 + }, + { + "epoch": 0.7713229195465856, + "grad_norm": 1.305177374407039, + "learning_rate": 2.6196326629300028e-06, + "loss": 0.4168, + "step": 44638 + }, + { + "epoch": 0.7713401990599945, + "grad_norm": 1.3543900905660036, + "learning_rate": 2.6192550450697827e-06, + "loss": 0.33, + "step": 44639 + }, + { + "epoch": 0.7713574785734034, + "grad_norm": 0.9840101156620887, + "learning_rate": 2.618877450326617e-06, + "loss": 0.3771, + "step": 44640 + }, + { + "epoch": 0.7713747580868123, + "grad_norm": 0.8755173911735497, + "learning_rate": 2.618499878701685e-06, + "loss": 0.3876, + "step": 44641 + }, + { + "epoch": 0.7713920376002211, + "grad_norm": 1.3003712876361204, + "learning_rate": 2.6181223301961723e-06, + "loss": 0.3469, + "step": 44642 + }, + { + "epoch": 0.77140931711363, + "grad_norm": 2.3611193329529616, + "learning_rate": 2.617744804811261e-06, + "loss": 0.5435, + "step": 44643 + }, + { + "epoch": 0.7714265966270389, + "grad_norm": 1.0942168858685355, + "learning_rate": 2.6173673025481374e-06, + "loss": 0.2461, + "step": 44644 + }, + { + "epoch": 0.7714438761404478, + "grad_norm": 0.9033115784658563, + "learning_rate": 2.616989823407977e-06, + "loss": 0.4297, + "step": 44645 + }, + { + "epoch": 0.7714611556538568, + "grad_norm": 1.2989853096786241, + "learning_rate": 2.6166123673919697e-06, + "loss": 0.4421, + "step": 44646 + }, + { + "epoch": 0.7714784351672657, + "grad_norm": 1.0425406557350052, + "learning_rate": 2.6162349345012893e-06, + "loss": 0.4122, + "step": 44647 + }, + { + "epoch": 0.7714957146806746, + "grad_norm": 1.3712353370738963, + "learning_rate": 2.615857524737123e-06, + "loss": 0.2893, + "step": 44648 + }, + { + "epoch": 0.7715129941940835, + "grad_norm": 1.5585284818859693, + "learning_rate": 2.6154801381006524e-06, + "loss": 0.2387, + "step": 44649 + }, + { + "epoch": 0.7715302737074924, + "grad_norm": 0.8053979358267492, + "learning_rate": 2.6151027745930614e-06, + "loss": 0.3931, + "step": 44650 + }, + { + "epoch": 0.7715475532209013, + "grad_norm": 0.9498219016490449, + "learning_rate": 2.6147254342155304e-06, + "loss": 0.3057, + "step": 44651 + }, + { + "epoch": 0.7715648327343102, + "grad_norm": 0.7644739053595917, + "learning_rate": 2.6143481169692375e-06, + "loss": 0.1986, + "step": 44652 + }, + { + "epoch": 0.7715821122477191, + "grad_norm": 1.1215692186524315, + "learning_rate": 2.6139708228553675e-06, + "loss": 0.227, + "step": 44653 + }, + { + "epoch": 0.771599391761128, + "grad_norm": 2.633196260818242, + "learning_rate": 2.613593551875101e-06, + "loss": 0.5222, + "step": 44654 + }, + { + "epoch": 0.7716166712745369, + "grad_norm": 1.9551480096878664, + "learning_rate": 2.6132163040296255e-06, + "loss": 0.3744, + "step": 44655 + }, + { + "epoch": 0.7716339507879458, + "grad_norm": 1.5434415569017619, + "learning_rate": 2.6128390793201143e-06, + "loss": 0.3741, + "step": 44656 + }, + { + "epoch": 0.7716512303013547, + "grad_norm": 1.6067921971703518, + "learning_rate": 2.6124618777477552e-06, + "loss": 0.3695, + "step": 44657 + }, + { + "epoch": 0.7716685098147636, + "grad_norm": 0.9224758307135814, + "learning_rate": 2.612084699313725e-06, + "loss": 0.4389, + "step": 44658 + }, + { + "epoch": 0.7716857893281726, + "grad_norm": 2.277296960009908, + "learning_rate": 2.6117075440192062e-06, + "loss": 0.2084, + "step": 44659 + }, + { + "epoch": 0.7717030688415815, + "grad_norm": 0.9728295848529286, + "learning_rate": 2.6113304118653837e-06, + "loss": 0.1773, + "step": 44660 + }, + { + "epoch": 0.7717203483549904, + "grad_norm": 0.9904811824695204, + "learning_rate": 2.610953302853433e-06, + "loss": 0.3004, + "step": 44661 + }, + { + "epoch": 0.7717376278683993, + "grad_norm": 1.7937082534514288, + "learning_rate": 2.6105762169845404e-06, + "loss": 0.4396, + "step": 44662 + }, + { + "epoch": 0.7717549073818081, + "grad_norm": 0.9885837488229332, + "learning_rate": 2.610199154259884e-06, + "loss": 0.2866, + "step": 44663 + }, + { + "epoch": 0.771772186895217, + "grad_norm": 1.067788471265903, + "learning_rate": 2.6098221146806434e-06, + "loss": 0.5065, + "step": 44664 + }, + { + "epoch": 0.7717894664086259, + "grad_norm": 1.410119623056203, + "learning_rate": 2.6094450982480014e-06, + "loss": 0.339, + "step": 44665 + }, + { + "epoch": 0.7718067459220348, + "grad_norm": 1.6309152652880736, + "learning_rate": 2.6090681049631426e-06, + "loss": 0.4504, + "step": 44666 + }, + { + "epoch": 0.7718240254354437, + "grad_norm": 1.4793071237664897, + "learning_rate": 2.6086911348272415e-06, + "loss": 0.4934, + "step": 44667 + }, + { + "epoch": 0.7718413049488526, + "grad_norm": 0.9443120432374719, + "learning_rate": 2.608314187841484e-06, + "loss": 0.2499, + "step": 44668 + }, + { + "epoch": 0.7718585844622615, + "grad_norm": 1.4636486338193886, + "learning_rate": 2.6079372640070453e-06, + "loss": 0.5567, + "step": 44669 + }, + { + "epoch": 0.7718758639756704, + "grad_norm": 1.4642180204073558, + "learning_rate": 2.607560363325112e-06, + "loss": 0.2102, + "step": 44670 + }, + { + "epoch": 0.7718931434890793, + "grad_norm": 1.175343378308461, + "learning_rate": 2.6071834857968593e-06, + "loss": 0.2231, + "step": 44671 + }, + { + "epoch": 0.7719104230024882, + "grad_norm": 1.427188638307515, + "learning_rate": 2.6068066314234684e-06, + "loss": 0.3506, + "step": 44672 + }, + { + "epoch": 0.7719277025158972, + "grad_norm": 1.2985975858382328, + "learning_rate": 2.606429800206125e-06, + "loss": 0.3445, + "step": 44673 + }, + { + "epoch": 0.7719449820293061, + "grad_norm": 1.2435739780501136, + "learning_rate": 2.606052992146002e-06, + "loss": 0.2458, + "step": 44674 + }, + { + "epoch": 0.771962261542715, + "grad_norm": 1.6318424335075559, + "learning_rate": 2.6056762072442852e-06, + "loss": 0.2157, + "step": 44675 + }, + { + "epoch": 0.7719795410561239, + "grad_norm": 1.212538557236014, + "learning_rate": 2.6052994455021506e-06, + "loss": 0.7878, + "step": 44676 + }, + { + "epoch": 0.7719968205695328, + "grad_norm": 1.1177091748774297, + "learning_rate": 2.6049227069207793e-06, + "loss": 0.2149, + "step": 44677 + }, + { + "epoch": 0.7720141000829417, + "grad_norm": 1.3490069630640176, + "learning_rate": 2.6045459915013527e-06, + "loss": 0.6386, + "step": 44678 + }, + { + "epoch": 0.7720313795963506, + "grad_norm": 1.5242264056095551, + "learning_rate": 2.604169299245053e-06, + "loss": 0.1447, + "step": 44679 + }, + { + "epoch": 0.7720486591097595, + "grad_norm": 1.4492954111764804, + "learning_rate": 2.6037926301530535e-06, + "loss": 0.2622, + "step": 44680 + }, + { + "epoch": 0.7720659386231684, + "grad_norm": 1.4798160044375936, + "learning_rate": 2.603415984226542e-06, + "loss": 0.3788, + "step": 44681 + }, + { + "epoch": 0.7720832181365773, + "grad_norm": 1.433906144341512, + "learning_rate": 2.603039361466689e-06, + "loss": 0.3557, + "step": 44682 + }, + { + "epoch": 0.7721004976499862, + "grad_norm": 1.1332731024914815, + "learning_rate": 2.6026627618746793e-06, + "loss": 0.3332, + "step": 44683 + }, + { + "epoch": 0.772117777163395, + "grad_norm": 1.1084817283039676, + "learning_rate": 2.602286185451696e-06, + "loss": 0.4676, + "step": 44684 + }, + { + "epoch": 0.7721350566768039, + "grad_norm": 1.7642289813576157, + "learning_rate": 2.6019096321989103e-06, + "loss": 0.3775, + "step": 44685 + }, + { + "epoch": 0.7721523361902128, + "grad_norm": 0.8949284295685391, + "learning_rate": 2.6015331021175093e-06, + "loss": 0.2963, + "step": 44686 + }, + { + "epoch": 0.7721696157036217, + "grad_norm": 1.2142280558159382, + "learning_rate": 2.601156595208666e-06, + "loss": 0.2845, + "step": 44687 + }, + { + "epoch": 0.7721868952170307, + "grad_norm": 2.055392012133777, + "learning_rate": 2.600780111473563e-06, + "loss": 0.2322, + "step": 44688 + }, + { + "epoch": 0.7722041747304396, + "grad_norm": 1.1633137540238172, + "learning_rate": 2.600403650913379e-06, + "loss": 0.4984, + "step": 44689 + }, + { + "epoch": 0.7722214542438485, + "grad_norm": 0.6118872802344325, + "learning_rate": 2.600027213529295e-06, + "loss": 0.504, + "step": 44690 + }, + { + "epoch": 0.7722387337572574, + "grad_norm": 1.669356814607258, + "learning_rate": 2.5996507993224894e-06, + "loss": 0.2843, + "step": 44691 + }, + { + "epoch": 0.7722560132706663, + "grad_norm": 1.37747762966883, + "learning_rate": 2.5992744082941357e-06, + "loss": 0.3468, + "step": 44692 + }, + { + "epoch": 0.7722732927840752, + "grad_norm": 1.8600419578991463, + "learning_rate": 2.5988980404454177e-06, + "loss": 0.3773, + "step": 44693 + }, + { + "epoch": 0.7722905722974841, + "grad_norm": 1.926446448116574, + "learning_rate": 2.598521695777514e-06, + "loss": 0.2305, + "step": 44694 + }, + { + "epoch": 0.772307851810893, + "grad_norm": 2.4137376435705904, + "learning_rate": 2.5981453742916053e-06, + "loss": 0.309, + "step": 44695 + }, + { + "epoch": 0.7723251313243019, + "grad_norm": 0.8977491863159921, + "learning_rate": 2.597769075988864e-06, + "loss": 0.2989, + "step": 44696 + }, + { + "epoch": 0.7723424108377108, + "grad_norm": 1.5189531459572982, + "learning_rate": 2.5973928008704763e-06, + "loss": 0.3123, + "step": 44697 + }, + { + "epoch": 0.7723596903511197, + "grad_norm": 1.390272918869927, + "learning_rate": 2.5970165489376144e-06, + "loss": 0.2568, + "step": 44698 + }, + { + "epoch": 0.7723769698645286, + "grad_norm": 1.3822086297939704, + "learning_rate": 2.596640320191459e-06, + "loss": 0.4816, + "step": 44699 + }, + { + "epoch": 0.7723942493779375, + "grad_norm": 1.022219268274211, + "learning_rate": 2.596264114633188e-06, + "loss": 0.2588, + "step": 44700 + }, + { + "epoch": 0.7724115288913465, + "grad_norm": 1.5311291673985372, + "learning_rate": 2.595887932263984e-06, + "loss": 0.3683, + "step": 44701 + }, + { + "epoch": 0.7724288084047554, + "grad_norm": 1.2686180292247884, + "learning_rate": 2.595511773085022e-06, + "loss": 0.3566, + "step": 44702 + }, + { + "epoch": 0.7724460879181643, + "grad_norm": 1.271695650861376, + "learning_rate": 2.595135637097477e-06, + "loss": 0.2836, + "step": 44703 + }, + { + "epoch": 0.7724633674315732, + "grad_norm": 0.6325768066034057, + "learning_rate": 2.594759524302529e-06, + "loss": 0.4478, + "step": 44704 + }, + { + "epoch": 0.772480646944982, + "grad_norm": 1.0945536865449785, + "learning_rate": 2.5943834347013576e-06, + "loss": 0.4718, + "step": 44705 + }, + { + "epoch": 0.7724979264583909, + "grad_norm": 0.9406127531783864, + "learning_rate": 2.594007368295143e-06, + "loss": 0.3562, + "step": 44706 + }, + { + "epoch": 0.7725152059717998, + "grad_norm": 2.5438833384301667, + "learning_rate": 2.5936313250850566e-06, + "loss": 0.3118, + "step": 44707 + }, + { + "epoch": 0.7725324854852087, + "grad_norm": 1.3127529820501258, + "learning_rate": 2.593255305072283e-06, + "loss": 0.3047, + "step": 44708 + }, + { + "epoch": 0.7725497649986176, + "grad_norm": 1.0304346859065414, + "learning_rate": 2.592879308257994e-06, + "loss": 0.335, + "step": 44709 + }, + { + "epoch": 0.7725670445120265, + "grad_norm": 1.2065492371094884, + "learning_rate": 2.592503334643374e-06, + "loss": 0.3035, + "step": 44710 + }, + { + "epoch": 0.7725843240254354, + "grad_norm": 1.1887427675003994, + "learning_rate": 2.592127384229589e-06, + "loss": 0.4969, + "step": 44711 + }, + { + "epoch": 0.7726016035388443, + "grad_norm": 1.3309639424820296, + "learning_rate": 2.5917514570178304e-06, + "loss": 0.348, + "step": 44712 + }, + { + "epoch": 0.7726188830522532, + "grad_norm": 1.2040522757317935, + "learning_rate": 2.5913755530092687e-06, + "loss": 0.3377, + "step": 44713 + }, + { + "epoch": 0.7726361625656621, + "grad_norm": 1.0340348657078404, + "learning_rate": 2.590999672205079e-06, + "loss": 0.4638, + "step": 44714 + }, + { + "epoch": 0.772653442079071, + "grad_norm": 1.0677834690336205, + "learning_rate": 2.5906238146064443e-06, + "loss": 0.3459, + "step": 44715 + }, + { + "epoch": 0.77267072159248, + "grad_norm": 1.2516466293557802, + "learning_rate": 2.5902479802145375e-06, + "loss": 0.5629, + "step": 44716 + }, + { + "epoch": 0.7726880011058889, + "grad_norm": 1.884558163338166, + "learning_rate": 2.5898721690305352e-06, + "loss": 0.3581, + "step": 44717 + }, + { + "epoch": 0.7727052806192978, + "grad_norm": 1.3294309815248981, + "learning_rate": 2.5894963810556173e-06, + "loss": 0.2974, + "step": 44718 + }, + { + "epoch": 0.7727225601327067, + "grad_norm": 0.7695052879944999, + "learning_rate": 2.589120616290963e-06, + "loss": 0.305, + "step": 44719 + }, + { + "epoch": 0.7727398396461156, + "grad_norm": 1.192526582869231, + "learning_rate": 2.588744874737743e-06, + "loss": 0.269, + "step": 44720 + }, + { + "epoch": 0.7727571191595245, + "grad_norm": 1.0653923846039839, + "learning_rate": 2.5883691563971403e-06, + "loss": 0.494, + "step": 44721 + }, + { + "epoch": 0.7727743986729334, + "grad_norm": 2.2094268410232556, + "learning_rate": 2.587993461270325e-06, + "loss": 0.3566, + "step": 44722 + }, + { + "epoch": 0.7727916781863423, + "grad_norm": 1.561087158273585, + "learning_rate": 2.587617789358479e-06, + "loss": 0.5146, + "step": 44723 + }, + { + "epoch": 0.7728089576997512, + "grad_norm": 1.5491128330598658, + "learning_rate": 2.587242140662779e-06, + "loss": 0.606, + "step": 44724 + }, + { + "epoch": 0.7728262372131601, + "grad_norm": 1.7906865339848168, + "learning_rate": 2.5868665151843973e-06, + "loss": 0.2891, + "step": 44725 + }, + { + "epoch": 0.772843516726569, + "grad_norm": 1.278903944148455, + "learning_rate": 2.586490912924515e-06, + "loss": 0.3408, + "step": 44726 + }, + { + "epoch": 0.7728607962399778, + "grad_norm": 1.2161351370677607, + "learning_rate": 2.586115333884306e-06, + "loss": 0.2228, + "step": 44727 + }, + { + "epoch": 0.7728780757533867, + "grad_norm": 0.867712750714745, + "learning_rate": 2.585739778064945e-06, + "loss": 0.6053, + "step": 44728 + }, + { + "epoch": 0.7728953552667956, + "grad_norm": 1.4551196803090978, + "learning_rate": 2.585364245467612e-06, + "loss": 0.3297, + "step": 44729 + }, + { + "epoch": 0.7729126347802046, + "grad_norm": 0.8961685200456794, + "learning_rate": 2.5849887360934832e-06, + "loss": 0.3875, + "step": 44730 + }, + { + "epoch": 0.7729299142936135, + "grad_norm": 1.2419448741311976, + "learning_rate": 2.5846132499437314e-06, + "loss": 0.3477, + "step": 44731 + }, + { + "epoch": 0.7729471938070224, + "grad_norm": 1.077961968072486, + "learning_rate": 2.584237787019538e-06, + "loss": 0.3872, + "step": 44732 + }, + { + "epoch": 0.7729644733204313, + "grad_norm": 0.7424378370063827, + "learning_rate": 2.5838623473220714e-06, + "loss": 0.51, + "step": 44733 + }, + { + "epoch": 0.7729817528338402, + "grad_norm": 0.8829075410150318, + "learning_rate": 2.5834869308525113e-06, + "loss": 0.3344, + "step": 44734 + }, + { + "epoch": 0.7729990323472491, + "grad_norm": 1.092434613675452, + "learning_rate": 2.5831115376120373e-06, + "loss": 0.3832, + "step": 44735 + }, + { + "epoch": 0.773016311860658, + "grad_norm": 1.189899553734958, + "learning_rate": 2.582736167601818e-06, + "loss": 0.4033, + "step": 44736 + }, + { + "epoch": 0.7730335913740669, + "grad_norm": 0.8502683969485677, + "learning_rate": 2.5823608208230367e-06, + "loss": 0.1699, + "step": 44737 + }, + { + "epoch": 0.7730508708874758, + "grad_norm": 1.6450655479722194, + "learning_rate": 2.581985497276861e-06, + "loss": 0.3998, + "step": 44738 + }, + { + "epoch": 0.7730681504008847, + "grad_norm": 1.0895056667139418, + "learning_rate": 2.5816101969644712e-06, + "loss": 0.2808, + "step": 44739 + }, + { + "epoch": 0.7730854299142936, + "grad_norm": 0.8698291625640833, + "learning_rate": 2.5812349198870422e-06, + "loss": 0.2346, + "step": 44740 + }, + { + "epoch": 0.7731027094277025, + "grad_norm": 0.6220551033209318, + "learning_rate": 2.580859666045752e-06, + "loss": 0.5446, + "step": 44741 + }, + { + "epoch": 0.7731199889411114, + "grad_norm": 0.5819058662840846, + "learning_rate": 2.5804844354417722e-06, + "loss": 0.9107, + "step": 44742 + }, + { + "epoch": 0.7731372684545204, + "grad_norm": 1.3286531982980676, + "learning_rate": 2.5801092280762775e-06, + "loss": 0.5177, + "step": 44743 + }, + { + "epoch": 0.7731545479679293, + "grad_norm": 1.253647845469117, + "learning_rate": 2.579734043950444e-06, + "loss": 0.3628, + "step": 44744 + }, + { + "epoch": 0.7731718274813382, + "grad_norm": 1.5486216995478306, + "learning_rate": 2.579358883065447e-06, + "loss": 0.4015, + "step": 44745 + }, + { + "epoch": 0.7731891069947471, + "grad_norm": 1.1803236398461405, + "learning_rate": 2.5789837454224653e-06, + "loss": 0.6067, + "step": 44746 + }, + { + "epoch": 0.773206386508156, + "grad_norm": 1.3308622960544323, + "learning_rate": 2.5786086310226677e-06, + "loss": 0.216, + "step": 44747 + }, + { + "epoch": 0.7732236660215648, + "grad_norm": 1.3159097011039904, + "learning_rate": 2.578233539867234e-06, + "loss": 0.4715, + "step": 44748 + }, + { + "epoch": 0.7732409455349737, + "grad_norm": 1.1705217381402375, + "learning_rate": 2.577858471957335e-06, + "loss": 0.3442, + "step": 44749 + }, + { + "epoch": 0.7732582250483826, + "grad_norm": 2.2911567917803173, + "learning_rate": 2.5774834272941496e-06, + "loss": 0.3208, + "step": 44750 + }, + { + "epoch": 0.7732755045617915, + "grad_norm": 1.0472878102503698, + "learning_rate": 2.5771084058788443e-06, + "loss": 0.166, + "step": 44751 + }, + { + "epoch": 0.7732927840752004, + "grad_norm": 1.3548137233751663, + "learning_rate": 2.576733407712606e-06, + "loss": 0.4145, + "step": 44752 + }, + { + "epoch": 0.7733100635886093, + "grad_norm": 1.1535681809343759, + "learning_rate": 2.576358432796602e-06, + "loss": 0.4106, + "step": 44753 + }, + { + "epoch": 0.7733273431020182, + "grad_norm": 1.5657268112413774, + "learning_rate": 2.575983481132005e-06, + "loss": 0.1643, + "step": 44754 + }, + { + "epoch": 0.7733446226154271, + "grad_norm": 1.2939980271166391, + "learning_rate": 2.575608552719996e-06, + "loss": 0.341, + "step": 44755 + }, + { + "epoch": 0.773361902128836, + "grad_norm": 1.5764977984595152, + "learning_rate": 2.5752336475617414e-06, + "loss": 0.3527, + "step": 44756 + }, + { + "epoch": 0.773379181642245, + "grad_norm": 1.8706518680121822, + "learning_rate": 2.574858765658419e-06, + "loss": 0.5283, + "step": 44757 + }, + { + "epoch": 0.7733964611556539, + "grad_norm": 1.6355101811255894, + "learning_rate": 2.5744839070112027e-06, + "loss": 0.2797, + "step": 44758 + }, + { + "epoch": 0.7734137406690628, + "grad_norm": 0.9868916022641059, + "learning_rate": 2.574109071621271e-06, + "loss": 0.6569, + "step": 44759 + }, + { + "epoch": 0.7734310201824717, + "grad_norm": 1.541376608328455, + "learning_rate": 2.5737342594897908e-06, + "loss": 0.3502, + "step": 44760 + }, + { + "epoch": 0.7734482996958806, + "grad_norm": 1.0436204052809428, + "learning_rate": 2.5733594706179422e-06, + "loss": 0.4239, + "step": 44761 + }, + { + "epoch": 0.7734655792092895, + "grad_norm": 1.772696547573306, + "learning_rate": 2.5729847050068937e-06, + "loss": 0.3731, + "step": 44762 + }, + { + "epoch": 0.7734828587226984, + "grad_norm": 1.4648836746359095, + "learning_rate": 2.5726099626578207e-06, + "loss": 0.2759, + "step": 44763 + }, + { + "epoch": 0.7735001382361073, + "grad_norm": 1.4194750817013002, + "learning_rate": 2.572235243571901e-06, + "loss": 0.4044, + "step": 44764 + }, + { + "epoch": 0.7735174177495162, + "grad_norm": 1.0230554449802383, + "learning_rate": 2.5718605477503022e-06, + "loss": 0.7147, + "step": 44765 + }, + { + "epoch": 0.7735346972629251, + "grad_norm": 0.7257619596835928, + "learning_rate": 2.571485875194204e-06, + "loss": 0.2834, + "step": 44766 + }, + { + "epoch": 0.773551976776334, + "grad_norm": 1.7583521079378994, + "learning_rate": 2.5711112259047733e-06, + "loss": 0.3886, + "step": 44767 + }, + { + "epoch": 0.7735692562897429, + "grad_norm": 1.362685334326757, + "learning_rate": 2.570736599883187e-06, + "loss": 0.2444, + "step": 44768 + }, + { + "epoch": 0.7735865358031517, + "grad_norm": 1.4899056121943568, + "learning_rate": 2.570361997130618e-06, + "loss": 0.2688, + "step": 44769 + }, + { + "epoch": 0.7736038153165606, + "grad_norm": 1.167434300667795, + "learning_rate": 2.569987417648242e-06, + "loss": 0.2367, + "step": 44770 + }, + { + "epoch": 0.7736210948299695, + "grad_norm": 1.1441275889880043, + "learning_rate": 2.5696128614372286e-06, + "loss": 0.404, + "step": 44771 + }, + { + "epoch": 0.7736383743433785, + "grad_norm": 1.5485482210584012, + "learning_rate": 2.569238328498754e-06, + "loss": 0.3259, + "step": 44772 + }, + { + "epoch": 0.7736556538567874, + "grad_norm": 1.8177886944452117, + "learning_rate": 2.5688638188339877e-06, + "loss": 0.3426, + "step": 44773 + }, + { + "epoch": 0.7736729333701963, + "grad_norm": 1.3377136699313883, + "learning_rate": 2.568489332444105e-06, + "loss": 0.3865, + "step": 44774 + }, + { + "epoch": 0.7736902128836052, + "grad_norm": 1.1297944036496472, + "learning_rate": 2.5681148693302802e-06, + "loss": 0.2444, + "step": 44775 + }, + { + "epoch": 0.7737074923970141, + "grad_norm": 1.8746053362581183, + "learning_rate": 2.5677404294936826e-06, + "loss": 0.4094, + "step": 44776 + }, + { + "epoch": 0.773724771910423, + "grad_norm": 1.910385637479625, + "learning_rate": 2.5673660129354896e-06, + "loss": 0.2611, + "step": 44777 + }, + { + "epoch": 0.7737420514238319, + "grad_norm": 2.971745826492379, + "learning_rate": 2.5669916196568676e-06, + "loss": 0.2518, + "step": 44778 + }, + { + "epoch": 0.7737593309372408, + "grad_norm": 1.1435566524323493, + "learning_rate": 2.5666172496589924e-06, + "loss": 0.2334, + "step": 44779 + }, + { + "epoch": 0.7737766104506497, + "grad_norm": 0.9272937134124752, + "learning_rate": 2.566242902943038e-06, + "loss": 0.3333, + "step": 44780 + }, + { + "epoch": 0.7737938899640586, + "grad_norm": 1.0148059977923998, + "learning_rate": 2.565868579510178e-06, + "loss": 0.4785, + "step": 44781 + }, + { + "epoch": 0.7738111694774675, + "grad_norm": 1.4572201904469144, + "learning_rate": 2.56549427936158e-06, + "loss": 0.3799, + "step": 44782 + }, + { + "epoch": 0.7738284489908764, + "grad_norm": 2.030118673362702, + "learning_rate": 2.5651200024984214e-06, + "loss": 0.2626, + "step": 44783 + }, + { + "epoch": 0.7738457285042853, + "grad_norm": 2.168460521993328, + "learning_rate": 2.5647457489218697e-06, + "loss": 0.2795, + "step": 44784 + }, + { + "epoch": 0.7738630080176943, + "grad_norm": 1.3366906042423123, + "learning_rate": 2.5643715186330985e-06, + "loss": 0.4656, + "step": 44785 + }, + { + "epoch": 0.7738802875311032, + "grad_norm": 1.157116459176045, + "learning_rate": 2.563997311633285e-06, + "loss": 0.3912, + "step": 44786 + }, + { + "epoch": 0.7738975670445121, + "grad_norm": 0.9726871765924902, + "learning_rate": 2.5636231279235925e-06, + "loss": 0.4761, + "step": 44787 + }, + { + "epoch": 0.773914846557921, + "grad_norm": 1.4807225009909248, + "learning_rate": 2.563248967505202e-06, + "loss": 0.3537, + "step": 44788 + }, + { + "epoch": 0.7739321260713299, + "grad_norm": 1.5694902545458007, + "learning_rate": 2.5628748303792772e-06, + "loss": 0.3011, + "step": 44789 + }, + { + "epoch": 0.7739494055847387, + "grad_norm": 1.7340887439350723, + "learning_rate": 2.562500716546996e-06, + "loss": 0.2639, + "step": 44790 + }, + { + "epoch": 0.7739666850981476, + "grad_norm": 1.6585955963180765, + "learning_rate": 2.562126626009522e-06, + "loss": 0.4064, + "step": 44791 + }, + { + "epoch": 0.7739839646115565, + "grad_norm": 1.7523970671557898, + "learning_rate": 2.56175255876804e-06, + "loss": 0.257, + "step": 44792 + }, + { + "epoch": 0.7740012441249654, + "grad_norm": 2.373341147126479, + "learning_rate": 2.5613785148237103e-06, + "loss": 0.3758, + "step": 44793 + }, + { + "epoch": 0.7740185236383743, + "grad_norm": 1.345871156238398, + "learning_rate": 2.5610044941777114e-06, + "loss": 0.3322, + "step": 44794 + }, + { + "epoch": 0.7740358031517832, + "grad_norm": 1.2313582464024788, + "learning_rate": 2.5606304968312123e-06, + "loss": 0.4335, + "step": 44795 + }, + { + "epoch": 0.7740530826651921, + "grad_norm": 1.5837719650577469, + "learning_rate": 2.5602565227853803e-06, + "loss": 0.3349, + "step": 44796 + }, + { + "epoch": 0.774070362178601, + "grad_norm": 1.6367591049627273, + "learning_rate": 2.55988257204139e-06, + "loss": 0.3957, + "step": 44797 + }, + { + "epoch": 0.7740876416920099, + "grad_norm": 1.370080120101314, + "learning_rate": 2.5595086446004136e-06, + "loss": 0.4102, + "step": 44798 + }, + { + "epoch": 0.7741049212054188, + "grad_norm": 1.1318958818945637, + "learning_rate": 2.5591347404636235e-06, + "loss": 0.4221, + "step": 44799 + }, + { + "epoch": 0.7741222007188278, + "grad_norm": 1.1960635553611008, + "learning_rate": 2.558760859632187e-06, + "loss": 0.4163, + "step": 44800 + }, + { + "epoch": 0.7741394802322367, + "grad_norm": 1.0973737266187724, + "learning_rate": 2.5583870021072797e-06, + "loss": 0.4174, + "step": 44801 + }, + { + "epoch": 0.7741567597456456, + "grad_norm": 1.093718513010613, + "learning_rate": 2.558013167890067e-06, + "loss": 0.3674, + "step": 44802 + }, + { + "epoch": 0.7741740392590545, + "grad_norm": 0.8349371342536972, + "learning_rate": 2.5576393569817225e-06, + "loss": 0.3608, + "step": 44803 + }, + { + "epoch": 0.7741913187724634, + "grad_norm": 1.172128021138175, + "learning_rate": 2.5572655693834205e-06, + "loss": 0.4177, + "step": 44804 + }, + { + "epoch": 0.7742085982858723, + "grad_norm": 0.9406535531787242, + "learning_rate": 2.5568918050963244e-06, + "loss": 0.3168, + "step": 44805 + }, + { + "epoch": 0.7742258777992812, + "grad_norm": 0.9593896201783771, + "learning_rate": 2.5565180641216126e-06, + "loss": 0.4854, + "step": 44806 + }, + { + "epoch": 0.7742431573126901, + "grad_norm": 1.1826314071907507, + "learning_rate": 2.5561443464604485e-06, + "loss": 0.394, + "step": 44807 + }, + { + "epoch": 0.774260436826099, + "grad_norm": 1.4798550636160601, + "learning_rate": 2.5557706521140056e-06, + "loss": 0.1761, + "step": 44808 + }, + { + "epoch": 0.7742777163395079, + "grad_norm": 1.5945245468214442, + "learning_rate": 2.5553969810834557e-06, + "loss": 0.4312, + "step": 44809 + }, + { + "epoch": 0.7742949958529168, + "grad_norm": 1.224462640427741, + "learning_rate": 2.5550233333699716e-06, + "loss": 0.4569, + "step": 44810 + }, + { + "epoch": 0.7743122753663256, + "grad_norm": 0.9091733255125467, + "learning_rate": 2.554649708974716e-06, + "loss": 0.5001, + "step": 44811 + }, + { + "epoch": 0.7743295548797345, + "grad_norm": 1.503085152054502, + "learning_rate": 2.5542761078988664e-06, + "loss": 0.2713, + "step": 44812 + }, + { + "epoch": 0.7743468343931434, + "grad_norm": 0.7935154974802519, + "learning_rate": 2.553902530143587e-06, + "loss": 0.1858, + "step": 44813 + }, + { + "epoch": 0.7743641139065524, + "grad_norm": 1.4163144681162216, + "learning_rate": 2.553528975710051e-06, + "loss": 0.2962, + "step": 44814 + }, + { + "epoch": 0.7743813934199613, + "grad_norm": 1.132405438000961, + "learning_rate": 2.553155444599432e-06, + "loss": 0.4975, + "step": 44815 + }, + { + "epoch": 0.7743986729333702, + "grad_norm": 1.5144181289298126, + "learning_rate": 2.5527819368128925e-06, + "loss": 0.3895, + "step": 44816 + }, + { + "epoch": 0.7744159524467791, + "grad_norm": 1.0714134149863777, + "learning_rate": 2.552408452351608e-06, + "loss": 0.3096, + "step": 44817 + }, + { + "epoch": 0.774433231960188, + "grad_norm": 1.000675234003432, + "learning_rate": 2.552034991216744e-06, + "loss": 0.1347, + "step": 44818 + }, + { + "epoch": 0.7744505114735969, + "grad_norm": 0.9617995665101089, + "learning_rate": 2.5516615534094725e-06, + "loss": 0.3646, + "step": 44819 + }, + { + "epoch": 0.7744677909870058, + "grad_norm": 1.665841096607829, + "learning_rate": 2.551288138930963e-06, + "loss": 0.5569, + "step": 44820 + }, + { + "epoch": 0.7744850705004147, + "grad_norm": 0.8270898403134641, + "learning_rate": 2.550914747782388e-06, + "loss": 0.5779, + "step": 44821 + }, + { + "epoch": 0.7745023500138236, + "grad_norm": 1.110388663796386, + "learning_rate": 2.5505413799649114e-06, + "loss": 0.3921, + "step": 44822 + }, + { + "epoch": 0.7745196295272325, + "grad_norm": 1.8292545018876474, + "learning_rate": 2.5501680354797086e-06, + "loss": 0.2805, + "step": 44823 + }, + { + "epoch": 0.7745369090406414, + "grad_norm": 1.0213690232590726, + "learning_rate": 2.549794714327942e-06, + "loss": 0.4291, + "step": 44824 + }, + { + "epoch": 0.7745541885540503, + "grad_norm": 1.059486231049899, + "learning_rate": 2.5494214165107844e-06, + "loss": 0.3429, + "step": 44825 + }, + { + "epoch": 0.7745714680674592, + "grad_norm": 1.3553359858885758, + "learning_rate": 2.549048142029409e-06, + "loss": 0.3305, + "step": 44826 + }, + { + "epoch": 0.7745887475808682, + "grad_norm": 1.1655583737197093, + "learning_rate": 2.548674890884978e-06, + "loss": 0.3455, + "step": 44827 + }, + { + "epoch": 0.7746060270942771, + "grad_norm": 1.2105859430605284, + "learning_rate": 2.548301663078665e-06, + "loss": 0.2864, + "step": 44828 + }, + { + "epoch": 0.774623306607686, + "grad_norm": 1.0288187985558834, + "learning_rate": 2.547928458611636e-06, + "loss": 0.2739, + "step": 44829 + }, + { + "epoch": 0.7746405861210949, + "grad_norm": 1.450212355313368, + "learning_rate": 2.5475552774850643e-06, + "loss": 0.2725, + "step": 44830 + }, + { + "epoch": 0.7746578656345038, + "grad_norm": 1.018269856010382, + "learning_rate": 2.547182119700109e-06, + "loss": 0.2658, + "step": 44831 + }, + { + "epoch": 0.7746751451479126, + "grad_norm": 1.1836557766334215, + "learning_rate": 2.5468089852579527e-06, + "loss": 0.2902, + "step": 44832 + }, + { + "epoch": 0.7746924246613215, + "grad_norm": 1.072435441689677, + "learning_rate": 2.546435874159754e-06, + "loss": 0.2864, + "step": 44833 + }, + { + "epoch": 0.7747097041747304, + "grad_norm": 2.696417813103439, + "learning_rate": 2.5460627864066867e-06, + "loss": 0.4742, + "step": 44834 + }, + { + "epoch": 0.7747269836881393, + "grad_norm": 1.3802395481974021, + "learning_rate": 2.5456897219999177e-06, + "loss": 0.2856, + "step": 44835 + }, + { + "epoch": 0.7747442632015482, + "grad_norm": 1.25062953453195, + "learning_rate": 2.5453166809406127e-06, + "loss": 0.396, + "step": 44836 + }, + { + "epoch": 0.7747615427149571, + "grad_norm": 1.6257384729112658, + "learning_rate": 2.544943663229942e-06, + "loss": 0.382, + "step": 44837 + }, + { + "epoch": 0.774778822228366, + "grad_norm": 1.5344609283096686, + "learning_rate": 2.544570668869073e-06, + "loss": 0.3063, + "step": 44838 + }, + { + "epoch": 0.7747961017417749, + "grad_norm": 1.6721800749513873, + "learning_rate": 2.54419769785918e-06, + "loss": 0.4908, + "step": 44839 + }, + { + "epoch": 0.7748133812551838, + "grad_norm": 0.7043816819796264, + "learning_rate": 2.543824750201422e-06, + "loss": 0.3504, + "step": 44840 + }, + { + "epoch": 0.7748306607685927, + "grad_norm": 1.8610858637540555, + "learning_rate": 2.543451825896975e-06, + "loss": 0.3658, + "step": 44841 + }, + { + "epoch": 0.7748479402820017, + "grad_norm": 0.9194503732464044, + "learning_rate": 2.543078924947001e-06, + "loss": 0.4226, + "step": 44842 + }, + { + "epoch": 0.7748652197954106, + "grad_norm": 1.0796552310637133, + "learning_rate": 2.542706047352671e-06, + "loss": 0.3562, + "step": 44843 + }, + { + "epoch": 0.7748824993088195, + "grad_norm": 1.2621899202420364, + "learning_rate": 2.542333193115152e-06, + "loss": 0.2795, + "step": 44844 + }, + { + "epoch": 0.7748997788222284, + "grad_norm": 1.1377744811302521, + "learning_rate": 2.541960362235615e-06, + "loss": 0.1748, + "step": 44845 + }, + { + "epoch": 0.7749170583356373, + "grad_norm": 0.9034253854221436, + "learning_rate": 2.5415875547152248e-06, + "loss": 0.5582, + "step": 44846 + }, + { + "epoch": 0.7749343378490462, + "grad_norm": 1.355342497478979, + "learning_rate": 2.541214770555146e-06, + "loss": 0.3739, + "step": 44847 + }, + { + "epoch": 0.7749516173624551, + "grad_norm": 1.7295944911021273, + "learning_rate": 2.54084200975655e-06, + "loss": 0.322, + "step": 44848 + }, + { + "epoch": 0.774968896875864, + "grad_norm": 1.0405005442270614, + "learning_rate": 2.5404692723206024e-06, + "loss": 0.5557, + "step": 44849 + }, + { + "epoch": 0.7749861763892729, + "grad_norm": 1.6057430419166474, + "learning_rate": 2.540096558248476e-06, + "loss": 0.3829, + "step": 44850 + }, + { + "epoch": 0.7750034559026818, + "grad_norm": 0.7806163409370536, + "learning_rate": 2.5397238675413314e-06, + "loss": 0.4545, + "step": 44851 + }, + { + "epoch": 0.7750207354160907, + "grad_norm": 1.1865900676019547, + "learning_rate": 2.5393512002003417e-06, + "loss": 0.3067, + "step": 44852 + }, + { + "epoch": 0.7750380149294995, + "grad_norm": 1.1691141389012987, + "learning_rate": 2.5389785562266667e-06, + "loss": 0.5051, + "step": 44853 + }, + { + "epoch": 0.7750552944429084, + "grad_norm": 1.8178582105261332, + "learning_rate": 2.538605935621479e-06, + "loss": 0.4533, + "step": 44854 + }, + { + "epoch": 0.7750725739563173, + "grad_norm": 1.3525032123462053, + "learning_rate": 2.5382333383859448e-06, + "loss": 0.5753, + "step": 44855 + }, + { + "epoch": 0.7750898534697263, + "grad_norm": 0.5499756603070874, + "learning_rate": 2.5378607645212326e-06, + "loss": 0.5429, + "step": 44856 + }, + { + "epoch": 0.7751071329831352, + "grad_norm": 1.5911538671017063, + "learning_rate": 2.5374882140285082e-06, + "loss": 0.2406, + "step": 44857 + }, + { + "epoch": 0.7751244124965441, + "grad_norm": 2.2885004382950727, + "learning_rate": 2.537115686908935e-06, + "loss": 0.3839, + "step": 44858 + }, + { + "epoch": 0.775141692009953, + "grad_norm": 1.9826426145999114, + "learning_rate": 2.5367431831636834e-06, + "loss": 0.4087, + "step": 44859 + }, + { + "epoch": 0.7751589715233619, + "grad_norm": 1.5935626949246948, + "learning_rate": 2.5363707027939187e-06, + "loss": 0.4146, + "step": 44860 + }, + { + "epoch": 0.7751762510367708, + "grad_norm": 1.047240421292066, + "learning_rate": 2.53599824580081e-06, + "loss": 0.2583, + "step": 44861 + }, + { + "epoch": 0.7751935305501797, + "grad_norm": 1.9602238438393094, + "learning_rate": 2.5356258121855205e-06, + "loss": 0.2761, + "step": 44862 + }, + { + "epoch": 0.7752108100635886, + "grad_norm": 1.2186707567971204, + "learning_rate": 2.5352534019492215e-06, + "loss": 0.6539, + "step": 44863 + }, + { + "epoch": 0.7752280895769975, + "grad_norm": 0.8465727391566658, + "learning_rate": 2.5348810150930723e-06, + "loss": 0.2256, + "step": 44864 + }, + { + "epoch": 0.7752453690904064, + "grad_norm": 2.6425207071613537, + "learning_rate": 2.5345086516182428e-06, + "loss": 0.4592, + "step": 44865 + }, + { + "epoch": 0.7752626486038153, + "grad_norm": 1.1232410685521539, + "learning_rate": 2.5341363115259034e-06, + "loss": 0.3883, + "step": 44866 + }, + { + "epoch": 0.7752799281172242, + "grad_norm": 1.3443045742714184, + "learning_rate": 2.5337639948172134e-06, + "loss": 0.2745, + "step": 44867 + }, + { + "epoch": 0.7752972076306331, + "grad_norm": 1.4304830030070526, + "learning_rate": 2.533391701493344e-06, + "loss": 0.3964, + "step": 44868 + }, + { + "epoch": 0.775314487144042, + "grad_norm": 1.458452976817062, + "learning_rate": 2.5330194315554567e-06, + "loss": 0.509, + "step": 44869 + }, + { + "epoch": 0.775331766657451, + "grad_norm": 1.3043433277719159, + "learning_rate": 2.532647185004723e-06, + "loss": 0.3088, + "step": 44870 + }, + { + "epoch": 0.7753490461708599, + "grad_norm": 1.2336933013881666, + "learning_rate": 2.5322749618423006e-06, + "loss": 0.4507, + "step": 44871 + }, + { + "epoch": 0.7753663256842688, + "grad_norm": 1.8881343960074184, + "learning_rate": 2.531902762069366e-06, + "loss": 0.2748, + "step": 44872 + }, + { + "epoch": 0.7753836051976777, + "grad_norm": 1.3991842586932282, + "learning_rate": 2.5315305856870764e-06, + "loss": 0.2769, + "step": 44873 + }, + { + "epoch": 0.7754008847110866, + "grad_norm": 1.05026656791223, + "learning_rate": 2.5311584326966033e-06, + "loss": 0.4362, + "step": 44874 + }, + { + "epoch": 0.7754181642244954, + "grad_norm": 1.1776829397747164, + "learning_rate": 2.5307863030991066e-06, + "loss": 0.3065, + "step": 44875 + }, + { + "epoch": 0.7754354437379043, + "grad_norm": 1.2918449338898135, + "learning_rate": 2.5304141968957587e-06, + "loss": 0.358, + "step": 44876 + }, + { + "epoch": 0.7754527232513132, + "grad_norm": 1.5928289574082959, + "learning_rate": 2.5300421140877175e-06, + "loss": 0.3487, + "step": 44877 + }, + { + "epoch": 0.7754700027647221, + "grad_norm": 1.4615519161774626, + "learning_rate": 2.529670054676152e-06, + "loss": 0.1496, + "step": 44878 + }, + { + "epoch": 0.775487282278131, + "grad_norm": 0.740936872440831, + "learning_rate": 2.5292980186622306e-06, + "loss": 0.1597, + "step": 44879 + }, + { + "epoch": 0.7755045617915399, + "grad_norm": 1.1920920804109785, + "learning_rate": 2.5289260060471134e-06, + "loss": 0.3067, + "step": 44880 + }, + { + "epoch": 0.7755218413049488, + "grad_norm": 1.7958846909582848, + "learning_rate": 2.52855401683197e-06, + "loss": 0.6113, + "step": 44881 + }, + { + "epoch": 0.7755391208183577, + "grad_norm": 1.236064455382236, + "learning_rate": 2.52818205101796e-06, + "loss": 0.2685, + "step": 44882 + }, + { + "epoch": 0.7755564003317666, + "grad_norm": 2.1650498149116606, + "learning_rate": 2.5278101086062524e-06, + "loss": 0.3043, + "step": 44883 + }, + { + "epoch": 0.7755736798451756, + "grad_norm": 1.6280075877833198, + "learning_rate": 2.5274381895980104e-06, + "loss": 0.3681, + "step": 44884 + }, + { + "epoch": 0.7755909593585845, + "grad_norm": 1.5274117680216412, + "learning_rate": 2.5270662939944035e-06, + "loss": 0.3155, + "step": 44885 + }, + { + "epoch": 0.7756082388719934, + "grad_norm": 1.1649868170659676, + "learning_rate": 2.5266944217965907e-06, + "loss": 0.4835, + "step": 44886 + }, + { + "epoch": 0.7756255183854023, + "grad_norm": 1.4070159897925347, + "learning_rate": 2.5263225730057407e-06, + "loss": 0.4369, + "step": 44887 + }, + { + "epoch": 0.7756427978988112, + "grad_norm": 1.606542729403975, + "learning_rate": 2.5259507476230137e-06, + "loss": 0.3445, + "step": 44888 + }, + { + "epoch": 0.7756600774122201, + "grad_norm": 1.1822577585208907, + "learning_rate": 2.525578945649577e-06, + "loss": 0.5027, + "step": 44889 + }, + { + "epoch": 0.775677356925629, + "grad_norm": 1.0720136882485216, + "learning_rate": 2.5252071670865984e-06, + "loss": 0.2513, + "step": 44890 + }, + { + "epoch": 0.7756946364390379, + "grad_norm": 1.859542856899557, + "learning_rate": 2.524835411935236e-06, + "loss": 0.2543, + "step": 44891 + }, + { + "epoch": 0.7757119159524468, + "grad_norm": 1.091616355204592, + "learning_rate": 2.5244636801966604e-06, + "loss": 0.26, + "step": 44892 + }, + { + "epoch": 0.7757291954658557, + "grad_norm": 0.9679894365706938, + "learning_rate": 2.524091971872029e-06, + "loss": 0.3581, + "step": 44893 + }, + { + "epoch": 0.7757464749792646, + "grad_norm": 1.5470553187131826, + "learning_rate": 2.52372028696251e-06, + "loss": 0.3554, + "step": 44894 + }, + { + "epoch": 0.7757637544926735, + "grad_norm": 0.8459612712068953, + "learning_rate": 2.5233486254692674e-06, + "loss": 0.2851, + "step": 44895 + }, + { + "epoch": 0.7757810340060823, + "grad_norm": 1.2556834643210015, + "learning_rate": 2.5229769873934674e-06, + "loss": 0.3482, + "step": 44896 + }, + { + "epoch": 0.7757983135194912, + "grad_norm": 1.2137142100932468, + "learning_rate": 2.522605372736272e-06, + "loss": 0.3983, + "step": 44897 + }, + { + "epoch": 0.7758155930329002, + "grad_norm": 1.1015888885719893, + "learning_rate": 2.5222337814988416e-06, + "loss": 0.3127, + "step": 44898 + }, + { + "epoch": 0.7758328725463091, + "grad_norm": 1.5220555988874858, + "learning_rate": 2.5218622136823443e-06, + "loss": 0.5245, + "step": 44899 + }, + { + "epoch": 0.775850152059718, + "grad_norm": 1.5230378654115653, + "learning_rate": 2.5214906692879428e-06, + "loss": 0.6494, + "step": 44900 + }, + { + "epoch": 0.7758674315731269, + "grad_norm": 1.0094401792048964, + "learning_rate": 2.521119148316803e-06, + "loss": 0.2607, + "step": 44901 + }, + { + "epoch": 0.7758847110865358, + "grad_norm": 1.3302038256103281, + "learning_rate": 2.5207476507700835e-06, + "loss": 0.2896, + "step": 44902 + }, + { + "epoch": 0.7759019905999447, + "grad_norm": 1.5463785609766503, + "learning_rate": 2.5203761766489543e-06, + "loss": 0.2087, + "step": 44903 + }, + { + "epoch": 0.7759192701133536, + "grad_norm": 1.1488009436260096, + "learning_rate": 2.520004725954571e-06, + "loss": 0.3074, + "step": 44904 + }, + { + "epoch": 0.7759365496267625, + "grad_norm": 0.8146889895836645, + "learning_rate": 2.519633298688103e-06, + "loss": 0.3037, + "step": 44905 + }, + { + "epoch": 0.7759538291401714, + "grad_norm": 1.2460038933114237, + "learning_rate": 2.519261894850712e-06, + "loss": 0.414, + "step": 44906 + }, + { + "epoch": 0.7759711086535803, + "grad_norm": 1.6334574984796209, + "learning_rate": 2.5188905144435626e-06, + "loss": 0.4915, + "step": 44907 + }, + { + "epoch": 0.7759883881669892, + "grad_norm": 1.2005727399115367, + "learning_rate": 2.518519157467818e-06, + "loss": 0.2997, + "step": 44908 + }, + { + "epoch": 0.7760056676803981, + "grad_norm": 0.6848870465272002, + "learning_rate": 2.5181478239246373e-06, + "loss": 0.5114, + "step": 44909 + }, + { + "epoch": 0.776022947193807, + "grad_norm": 0.9739175368387916, + "learning_rate": 2.5177765138151876e-06, + "loss": 0.1851, + "step": 44910 + }, + { + "epoch": 0.776040226707216, + "grad_norm": 1.9847934395099396, + "learning_rate": 2.517405227140626e-06, + "loss": 0.2316, + "step": 44911 + }, + { + "epoch": 0.7760575062206249, + "grad_norm": 1.31784973865328, + "learning_rate": 2.517033963902126e-06, + "loss": 0.2832, + "step": 44912 + }, + { + "epoch": 0.7760747857340338, + "grad_norm": 1.172703384837118, + "learning_rate": 2.51666272410084e-06, + "loss": 0.2566, + "step": 44913 + }, + { + "epoch": 0.7760920652474427, + "grad_norm": 1.246296830532466, + "learning_rate": 2.516291507737939e-06, + "loss": 0.4622, + "step": 44914 + }, + { + "epoch": 0.7761093447608516, + "grad_norm": 0.6899835107053055, + "learning_rate": 2.515920314814578e-06, + "loss": 0.8858, + "step": 44915 + }, + { + "epoch": 0.7761266242742605, + "grad_norm": 1.3371390043160105, + "learning_rate": 2.515549145331927e-06, + "loss": 0.3342, + "step": 44916 + }, + { + "epoch": 0.7761439037876693, + "grad_norm": 1.1992449682035085, + "learning_rate": 2.5151779992911417e-06, + "loss": 0.3167, + "step": 44917 + }, + { + "epoch": 0.7761611833010782, + "grad_norm": 0.8889470491323938, + "learning_rate": 2.5148068766933875e-06, + "loss": 0.6073, + "step": 44918 + }, + { + "epoch": 0.7761784628144871, + "grad_norm": 1.2908652380236127, + "learning_rate": 2.514435777539831e-06, + "loss": 0.4076, + "step": 44919 + }, + { + "epoch": 0.776195742327896, + "grad_norm": 1.0873723501574586, + "learning_rate": 2.5140647018316266e-06, + "loss": 0.2735, + "step": 44920 + }, + { + "epoch": 0.7762130218413049, + "grad_norm": 1.3399355280926968, + "learning_rate": 2.513693649569944e-06, + "loss": 0.3647, + "step": 44921 + }, + { + "epoch": 0.7762303013547138, + "grad_norm": 1.2751895775369575, + "learning_rate": 2.513322620755938e-06, + "loss": 0.4026, + "step": 44922 + }, + { + "epoch": 0.7762475808681227, + "grad_norm": 1.6379078154510511, + "learning_rate": 2.512951615390775e-06, + "loss": 0.4012, + "step": 44923 + }, + { + "epoch": 0.7762648603815316, + "grad_norm": 0.9890315930110098, + "learning_rate": 2.5125806334756164e-06, + "loss": 0.6597, + "step": 44924 + }, + { + "epoch": 0.7762821398949405, + "grad_norm": 1.4517405597419784, + "learning_rate": 2.5122096750116275e-06, + "loss": 0.3382, + "step": 44925 + }, + { + "epoch": 0.7762994194083495, + "grad_norm": 1.266575042979316, + "learning_rate": 2.5118387399999633e-06, + "loss": 0.3425, + "step": 44926 + }, + { + "epoch": 0.7763166989217584, + "grad_norm": 1.0571885053762657, + "learning_rate": 2.5114678284417935e-06, + "loss": 0.4046, + "step": 44927 + }, + { + "epoch": 0.7763339784351673, + "grad_norm": 0.8879159047882157, + "learning_rate": 2.5110969403382714e-06, + "loss": 0.306, + "step": 44928 + }, + { + "epoch": 0.7763512579485762, + "grad_norm": 2.0642024747770455, + "learning_rate": 2.5107260756905637e-06, + "loss": 0.2861, + "step": 44929 + }, + { + "epoch": 0.7763685374619851, + "grad_norm": 1.5607097406864558, + "learning_rate": 2.510355234499834e-06, + "loss": 0.3802, + "step": 44930 + }, + { + "epoch": 0.776385816975394, + "grad_norm": 2.6394320532021203, + "learning_rate": 2.5099844167672373e-06, + "loss": 0.4883, + "step": 44931 + }, + { + "epoch": 0.7764030964888029, + "grad_norm": 2.0804251460847754, + "learning_rate": 2.5096136224939415e-06, + "loss": 0.3053, + "step": 44932 + }, + { + "epoch": 0.7764203760022118, + "grad_norm": 2.0271781650662923, + "learning_rate": 2.5092428516811027e-06, + "loss": 0.338, + "step": 44933 + }, + { + "epoch": 0.7764376555156207, + "grad_norm": 1.3139276244113354, + "learning_rate": 2.508872104329885e-06, + "loss": 0.2513, + "step": 44934 + }, + { + "epoch": 0.7764549350290296, + "grad_norm": 1.6019211819589614, + "learning_rate": 2.5085013804414483e-06, + "loss": 0.4178, + "step": 44935 + }, + { + "epoch": 0.7764722145424385, + "grad_norm": 1.2353837745142378, + "learning_rate": 2.508130680016957e-06, + "loss": 0.3448, + "step": 44936 + }, + { + "epoch": 0.7764894940558474, + "grad_norm": 1.2470336191888782, + "learning_rate": 2.5077600030575676e-06, + "loss": 0.3001, + "step": 44937 + }, + { + "epoch": 0.7765067735692562, + "grad_norm": 1.0499909402240037, + "learning_rate": 2.5073893495644464e-06, + "loss": 0.3566, + "step": 44938 + }, + { + "epoch": 0.7765240530826651, + "grad_norm": 1.3876062179086135, + "learning_rate": 2.5070187195387487e-06, + "loss": 0.3594, + "step": 44939 + }, + { + "epoch": 0.776541332596074, + "grad_norm": 0.9359726749284583, + "learning_rate": 2.5066481129816365e-06, + "loss": 0.4673, + "step": 44940 + }, + { + "epoch": 0.776558612109483, + "grad_norm": 0.8538657852725879, + "learning_rate": 2.5062775298942755e-06, + "loss": 0.2478, + "step": 44941 + }, + { + "epoch": 0.7765758916228919, + "grad_norm": 1.842133442257107, + "learning_rate": 2.5059069702778206e-06, + "loss": 0.4948, + "step": 44942 + }, + { + "epoch": 0.7765931711363008, + "grad_norm": 1.3581415399772394, + "learning_rate": 2.5055364341334367e-06, + "loss": 0.3795, + "step": 44943 + }, + { + "epoch": 0.7766104506497097, + "grad_norm": 1.3135001566819422, + "learning_rate": 2.5051659214622794e-06, + "loss": 0.4108, + "step": 44944 + }, + { + "epoch": 0.7766277301631186, + "grad_norm": 1.281062674603444, + "learning_rate": 2.504795432265511e-06, + "loss": 0.5549, + "step": 44945 + }, + { + "epoch": 0.7766450096765275, + "grad_norm": 1.2843911376191195, + "learning_rate": 2.5044249665442943e-06, + "loss": 0.5801, + "step": 44946 + }, + { + "epoch": 0.7766622891899364, + "grad_norm": 1.241594894821143, + "learning_rate": 2.5040545242997905e-06, + "loss": 0.3227, + "step": 44947 + }, + { + "epoch": 0.7766795687033453, + "grad_norm": 0.9214900824372755, + "learning_rate": 2.5036841055331586e-06, + "loss": 0.7715, + "step": 44948 + }, + { + "epoch": 0.7766968482167542, + "grad_norm": 2.12589062330196, + "learning_rate": 2.5033137102455542e-06, + "loss": 0.4785, + "step": 44949 + }, + { + "epoch": 0.7767141277301631, + "grad_norm": 1.5577661543340837, + "learning_rate": 2.502943338438144e-06, + "loss": 0.2392, + "step": 44950 + }, + { + "epoch": 0.776731407243572, + "grad_norm": 0.9445584896092146, + "learning_rate": 2.5025729901120786e-06, + "loss": 0.3805, + "step": 44951 + }, + { + "epoch": 0.7767486867569809, + "grad_norm": 1.0246777122058324, + "learning_rate": 2.5022026652685304e-06, + "loss": 0.4125, + "step": 44952 + }, + { + "epoch": 0.7767659662703899, + "grad_norm": 1.3179668684977774, + "learning_rate": 2.5018323639086505e-06, + "loss": 0.3482, + "step": 44953 + }, + { + "epoch": 0.7767832457837988, + "grad_norm": 1.3853199883637253, + "learning_rate": 2.5014620860336048e-06, + "loss": 0.4328, + "step": 44954 + }, + { + "epoch": 0.7768005252972077, + "grad_norm": 1.3428190821078068, + "learning_rate": 2.5010918316445465e-06, + "loss": 0.3332, + "step": 44955 + }, + { + "epoch": 0.7768178048106166, + "grad_norm": 1.2990397234082434, + "learning_rate": 2.5007216007426417e-06, + "loss": 0.2499, + "step": 44956 + }, + { + "epoch": 0.7768350843240255, + "grad_norm": 1.518200026825301, + "learning_rate": 2.50035139332904e-06, + "loss": 0.5146, + "step": 44957 + }, + { + "epoch": 0.7768523638374344, + "grad_norm": 1.0327657515318476, + "learning_rate": 2.499981209404915e-06, + "loss": 0.3944, + "step": 44958 + }, + { + "epoch": 0.7768696433508432, + "grad_norm": 0.5295602143945768, + "learning_rate": 2.4996110489714176e-06, + "loss": 0.3975, + "step": 44959 + }, + { + "epoch": 0.7768869228642521, + "grad_norm": 0.5269650648289617, + "learning_rate": 2.4992409120297058e-06, + "loss": 0.463, + "step": 44960 + }, + { + "epoch": 0.776904202377661, + "grad_norm": 1.5003442062163004, + "learning_rate": 2.498870798580945e-06, + "loss": 0.3741, + "step": 44961 + }, + { + "epoch": 0.7769214818910699, + "grad_norm": 1.1300570822319305, + "learning_rate": 2.4985007086262868e-06, + "loss": 0.4497, + "step": 44962 + }, + { + "epoch": 0.7769387614044788, + "grad_norm": 1.0868120750101482, + "learning_rate": 2.4981306421668962e-06, + "loss": 0.409, + "step": 44963 + }, + { + "epoch": 0.7769560409178877, + "grad_norm": 1.3543991777570783, + "learning_rate": 2.497760599203929e-06, + "loss": 0.3185, + "step": 44964 + }, + { + "epoch": 0.7769733204312966, + "grad_norm": 1.3403229411262327, + "learning_rate": 2.497390579738549e-06, + "loss": 0.6006, + "step": 44965 + }, + { + "epoch": 0.7769905999447055, + "grad_norm": 1.5301318183698853, + "learning_rate": 2.49702058377191e-06, + "loss": 0.3957, + "step": 44966 + }, + { + "epoch": 0.7770078794581144, + "grad_norm": 0.8325106395440729, + "learning_rate": 2.4966506113051735e-06, + "loss": 0.2387, + "step": 44967 + }, + { + "epoch": 0.7770251589715234, + "grad_norm": 1.518957987997668, + "learning_rate": 2.4962806623394964e-06, + "loss": 0.3543, + "step": 44968 + }, + { + "epoch": 0.7770424384849323, + "grad_norm": 1.0661233833610435, + "learning_rate": 2.4959107368760384e-06, + "loss": 0.4392, + "step": 44969 + }, + { + "epoch": 0.7770597179983412, + "grad_norm": 2.2437840156730977, + "learning_rate": 2.4955408349159604e-06, + "loss": 0.4742, + "step": 44970 + }, + { + "epoch": 0.7770769975117501, + "grad_norm": 0.9940962268577417, + "learning_rate": 2.4951709564604166e-06, + "loss": 0.37, + "step": 44971 + }, + { + "epoch": 0.777094277025159, + "grad_norm": 1.4205646281903825, + "learning_rate": 2.49480110151057e-06, + "loss": 0.4062, + "step": 44972 + }, + { + "epoch": 0.7771115565385679, + "grad_norm": 0.8501732685028811, + "learning_rate": 2.4944312700675745e-06, + "loss": 0.5464, + "step": 44973 + }, + { + "epoch": 0.7771288360519768, + "grad_norm": 1.562715658956566, + "learning_rate": 2.49406146213259e-06, + "loss": 0.3841, + "step": 44974 + }, + { + "epoch": 0.7771461155653857, + "grad_norm": 1.4371884308927003, + "learning_rate": 2.4936916777067767e-06, + "loss": 0.2124, + "step": 44975 + }, + { + "epoch": 0.7771633950787946, + "grad_norm": 0.9957668038490078, + "learning_rate": 2.4933219167912925e-06, + "loss": 0.3329, + "step": 44976 + }, + { + "epoch": 0.7771806745922035, + "grad_norm": 0.603906448183207, + "learning_rate": 2.4929521793872935e-06, + "loss": 0.8639, + "step": 44977 + }, + { + "epoch": 0.7771979541056124, + "grad_norm": 1.1282043770422938, + "learning_rate": 2.4925824654959406e-06, + "loss": 0.2909, + "step": 44978 + }, + { + "epoch": 0.7772152336190213, + "grad_norm": 1.4784077691830084, + "learning_rate": 2.4922127751183867e-06, + "loss": 0.3564, + "step": 44979 + }, + { + "epoch": 0.7772325131324301, + "grad_norm": 1.0361312042893556, + "learning_rate": 2.4918431082557935e-06, + "loss": 0.2664, + "step": 44980 + }, + { + "epoch": 0.777249792645839, + "grad_norm": 0.938855994758706, + "learning_rate": 2.491473464909322e-06, + "loss": 0.49, + "step": 44981 + }, + { + "epoch": 0.777267072159248, + "grad_norm": 1.3510262818100613, + "learning_rate": 2.4911038450801217e-06, + "loss": 0.1847, + "step": 44982 + }, + { + "epoch": 0.7772843516726569, + "grad_norm": 0.8933371448702531, + "learning_rate": 2.4907342487693586e-06, + "loss": 0.2822, + "step": 44983 + }, + { + "epoch": 0.7773016311860658, + "grad_norm": 1.299028474340251, + "learning_rate": 2.490364675978184e-06, + "loss": 0.3944, + "step": 44984 + }, + { + "epoch": 0.7773189106994747, + "grad_norm": 1.1024909829936735, + "learning_rate": 2.489995126707757e-06, + "loss": 0.3172, + "step": 44985 + }, + { + "epoch": 0.7773361902128836, + "grad_norm": 0.9274747434492719, + "learning_rate": 2.489625600959237e-06, + "loss": 0.4141, + "step": 44986 + }, + { + "epoch": 0.7773534697262925, + "grad_norm": 1.2085852171386966, + "learning_rate": 2.489256098733783e-06, + "loss": 0.4152, + "step": 44987 + }, + { + "epoch": 0.7773707492397014, + "grad_norm": 1.0146728614985583, + "learning_rate": 2.4888866200325466e-06, + "loss": 0.3, + "step": 44988 + }, + { + "epoch": 0.7773880287531103, + "grad_norm": 1.321208444966202, + "learning_rate": 2.488517164856691e-06, + "loss": 0.2765, + "step": 44989 + }, + { + "epoch": 0.7774053082665192, + "grad_norm": 1.2546045051086003, + "learning_rate": 2.4881477332073712e-06, + "loss": 0.4637, + "step": 44990 + }, + { + "epoch": 0.7774225877799281, + "grad_norm": 1.2080817168861373, + "learning_rate": 2.487778325085737e-06, + "loss": 0.2278, + "step": 44991 + }, + { + "epoch": 0.777439867293337, + "grad_norm": 1.1308742916619452, + "learning_rate": 2.487408940492957e-06, + "loss": 0.2718, + "step": 44992 + }, + { + "epoch": 0.7774571468067459, + "grad_norm": 1.6230566005484908, + "learning_rate": 2.4870395794301815e-06, + "loss": 0.4842, + "step": 44993 + }, + { + "epoch": 0.7774744263201548, + "grad_norm": 0.7430692558353318, + "learning_rate": 2.486670241898572e-06, + "loss": 0.1903, + "step": 44994 + }, + { + "epoch": 0.7774917058335638, + "grad_norm": 1.9637108472878921, + "learning_rate": 2.486300927899279e-06, + "loss": 0.3611, + "step": 44995 + }, + { + "epoch": 0.7775089853469727, + "grad_norm": 1.4682919698895402, + "learning_rate": 2.4859316374334654e-06, + "loss": 0.5055, + "step": 44996 + }, + { + "epoch": 0.7775262648603816, + "grad_norm": 2.358964669122358, + "learning_rate": 2.485562370502279e-06, + "loss": 0.2725, + "step": 44997 + }, + { + "epoch": 0.7775435443737905, + "grad_norm": 2.144867864076317, + "learning_rate": 2.4851931271068876e-06, + "loss": 0.5159, + "step": 44998 + }, + { + "epoch": 0.7775608238871994, + "grad_norm": 1.0427569416247557, + "learning_rate": 2.48482390724844e-06, + "loss": 0.2366, + "step": 44999 + }, + { + "epoch": 0.7775781034006083, + "grad_norm": 1.3117304788131596, + "learning_rate": 2.484454710928099e-06, + "loss": 0.2365, + "step": 45000 + }, + { + "epoch": 0.7775953829140171, + "grad_norm": 1.4758738207145283, + "learning_rate": 2.4840855381470165e-06, + "loss": 0.5284, + "step": 45001 + }, + { + "epoch": 0.777612662427426, + "grad_norm": 0.9095960571596644, + "learning_rate": 2.4837163889063454e-06, + "loss": 0.4811, + "step": 45002 + }, + { + "epoch": 0.7776299419408349, + "grad_norm": 1.517565452412053, + "learning_rate": 2.4833472632072466e-06, + "loss": 0.234, + "step": 45003 + }, + { + "epoch": 0.7776472214542438, + "grad_norm": 0.8051958240774864, + "learning_rate": 2.482978161050875e-06, + "loss": 0.3884, + "step": 45004 + }, + { + "epoch": 0.7776645009676527, + "grad_norm": 0.7544800198802128, + "learning_rate": 2.48260908243839e-06, + "loss": 0.3487, + "step": 45005 + }, + { + "epoch": 0.7776817804810616, + "grad_norm": 1.1523698631144115, + "learning_rate": 2.4822400273709426e-06, + "loss": 0.5755, + "step": 45006 + }, + { + "epoch": 0.7776990599944705, + "grad_norm": 0.9018588842519949, + "learning_rate": 2.4818709958496925e-06, + "loss": 0.3426, + "step": 45007 + }, + { + "epoch": 0.7777163395078794, + "grad_norm": 2.1968620570222277, + "learning_rate": 2.481501987875792e-06, + "loss": 0.3472, + "step": 45008 + }, + { + "epoch": 0.7777336190212883, + "grad_norm": 1.4084579467339946, + "learning_rate": 2.4811330034503977e-06, + "loss": 0.4235, + "step": 45009 + }, + { + "epoch": 0.7777508985346973, + "grad_norm": 0.9024749741067866, + "learning_rate": 2.4807640425746693e-06, + "loss": 0.8026, + "step": 45010 + }, + { + "epoch": 0.7777681780481062, + "grad_norm": 1.0251036064986472, + "learning_rate": 2.4803951052497565e-06, + "loss": 0.3001, + "step": 45011 + }, + { + "epoch": 0.7777854575615151, + "grad_norm": 1.4378776387569574, + "learning_rate": 2.480026191476821e-06, + "loss": 0.3632, + "step": 45012 + }, + { + "epoch": 0.777802737074924, + "grad_norm": 1.0731812912357783, + "learning_rate": 2.4796573012570124e-06, + "loss": 0.4138, + "step": 45013 + }, + { + "epoch": 0.7778200165883329, + "grad_norm": 1.1993745460140337, + "learning_rate": 2.479288434591488e-06, + "loss": 0.4743, + "step": 45014 + }, + { + "epoch": 0.7778372961017418, + "grad_norm": 1.0797798511954928, + "learning_rate": 2.4789195914814037e-06, + "loss": 0.3701, + "step": 45015 + }, + { + "epoch": 0.7778545756151507, + "grad_norm": 1.2815969443648798, + "learning_rate": 2.478550771927918e-06, + "loss": 0.4282, + "step": 45016 + }, + { + "epoch": 0.7778718551285596, + "grad_norm": 1.800934941476561, + "learning_rate": 2.4781819759321803e-06, + "loss": 0.1998, + "step": 45017 + }, + { + "epoch": 0.7778891346419685, + "grad_norm": 1.4232929905688385, + "learning_rate": 2.4778132034953507e-06, + "loss": 0.4257, + "step": 45018 + }, + { + "epoch": 0.7779064141553774, + "grad_norm": 1.52576118234745, + "learning_rate": 2.4774444546185783e-06, + "loss": 0.2682, + "step": 45019 + }, + { + "epoch": 0.7779236936687863, + "grad_norm": 1.2031631018324642, + "learning_rate": 2.477075729303022e-06, + "loss": 0.2694, + "step": 45020 + }, + { + "epoch": 0.7779409731821952, + "grad_norm": 1.5480631924743955, + "learning_rate": 2.4767070275498397e-06, + "loss": 0.3997, + "step": 45021 + }, + { + "epoch": 0.7779582526956041, + "grad_norm": 2.1098845581302683, + "learning_rate": 2.47633834936018e-06, + "loss": 0.4311, + "step": 45022 + }, + { + "epoch": 0.7779755322090129, + "grad_norm": 1.3999044319950995, + "learning_rate": 2.475969694735203e-06, + "loss": 0.1773, + "step": 45023 + }, + { + "epoch": 0.7779928117224219, + "grad_norm": 2.2299490064319847, + "learning_rate": 2.4756010636760574e-06, + "loss": 0.3354, + "step": 45024 + }, + { + "epoch": 0.7780100912358308, + "grad_norm": 1.1910429439328525, + "learning_rate": 2.4752324561839015e-06, + "loss": 0.3752, + "step": 45025 + }, + { + "epoch": 0.7780273707492397, + "grad_norm": 1.577484874476699, + "learning_rate": 2.4748638722598884e-06, + "loss": 0.3122, + "step": 45026 + }, + { + "epoch": 0.7780446502626486, + "grad_norm": 1.067435211570967, + "learning_rate": 2.4744953119051773e-06, + "loss": 0.4989, + "step": 45027 + }, + { + "epoch": 0.7780619297760575, + "grad_norm": 0.9135270744407877, + "learning_rate": 2.4741267751209165e-06, + "loss": 0.4704, + "step": 45028 + }, + { + "epoch": 0.7780792092894664, + "grad_norm": 1.0788450285786109, + "learning_rate": 2.4737582619082646e-06, + "loss": 0.37, + "step": 45029 + }, + { + "epoch": 0.7780964888028753, + "grad_norm": 0.5591031997857036, + "learning_rate": 2.473389772268371e-06, + "loss": 0.6943, + "step": 45030 + }, + { + "epoch": 0.7781137683162842, + "grad_norm": 1.9066007871127728, + "learning_rate": 2.4730213062023933e-06, + "loss": 0.389, + "step": 45031 + }, + { + "epoch": 0.7781310478296931, + "grad_norm": 1.163743144436967, + "learning_rate": 2.4726528637114868e-06, + "loss": 0.2557, + "step": 45032 + }, + { + "epoch": 0.778148327343102, + "grad_norm": 1.255399255021223, + "learning_rate": 2.472284444796802e-06, + "loss": 0.3285, + "step": 45033 + }, + { + "epoch": 0.7781656068565109, + "grad_norm": 1.5717324018696874, + "learning_rate": 2.4719160494594964e-06, + "loss": 0.3995, + "step": 45034 + }, + { + "epoch": 0.7781828863699198, + "grad_norm": 1.2678078405463604, + "learning_rate": 2.4715476777007186e-06, + "loss": 0.3563, + "step": 45035 + }, + { + "epoch": 0.7782001658833287, + "grad_norm": 0.9901695565680563, + "learning_rate": 2.471179329521629e-06, + "loss": 0.3968, + "step": 45036 + }, + { + "epoch": 0.7782174453967377, + "grad_norm": 1.7742349699872617, + "learning_rate": 2.4708110049233724e-06, + "loss": 0.3043, + "step": 45037 + }, + { + "epoch": 0.7782347249101466, + "grad_norm": 1.3994922120319886, + "learning_rate": 2.470442703907113e-06, + "loss": 0.5363, + "step": 45038 + }, + { + "epoch": 0.7782520044235555, + "grad_norm": 2.206286991816489, + "learning_rate": 2.470074426473997e-06, + "loss": 0.4515, + "step": 45039 + }, + { + "epoch": 0.7782692839369644, + "grad_norm": 1.0998631892845363, + "learning_rate": 2.4697061726251825e-06, + "loss": 0.3196, + "step": 45040 + }, + { + "epoch": 0.7782865634503733, + "grad_norm": 1.2163195451668567, + "learning_rate": 2.469337942361821e-06, + "loss": 0.3975, + "step": 45041 + }, + { + "epoch": 0.7783038429637822, + "grad_norm": 1.8610042441380297, + "learning_rate": 2.468969735685063e-06, + "loss": 0.551, + "step": 45042 + }, + { + "epoch": 0.7783211224771911, + "grad_norm": 1.1088589691429418, + "learning_rate": 2.4686015525960627e-06, + "loss": 0.369, + "step": 45043 + }, + { + "epoch": 0.7783384019905999, + "grad_norm": 1.5779257528084039, + "learning_rate": 2.4682333930959754e-06, + "loss": 0.3368, + "step": 45044 + }, + { + "epoch": 0.7783556815040088, + "grad_norm": 1.7355527123275163, + "learning_rate": 2.467865257185956e-06, + "loss": 0.3406, + "step": 45045 + }, + { + "epoch": 0.7783729610174177, + "grad_norm": 1.2960544464039396, + "learning_rate": 2.4674971448671537e-06, + "loss": 0.3075, + "step": 45046 + }, + { + "epoch": 0.7783902405308266, + "grad_norm": 0.9215027110430577, + "learning_rate": 2.467129056140725e-06, + "loss": 0.4535, + "step": 45047 + }, + { + "epoch": 0.7784075200442355, + "grad_norm": 1.4631084230417088, + "learning_rate": 2.466760991007816e-06, + "loss": 0.2589, + "step": 45048 + }, + { + "epoch": 0.7784247995576444, + "grad_norm": 1.5566577257018195, + "learning_rate": 2.466392949469586e-06, + "loss": 0.3005, + "step": 45049 + }, + { + "epoch": 0.7784420790710533, + "grad_norm": 1.0179691193966263, + "learning_rate": 2.4660249315271858e-06, + "loss": 0.4785, + "step": 45050 + }, + { + "epoch": 0.7784593585844622, + "grad_norm": 1.4411766939663821, + "learning_rate": 2.4656569371817694e-06, + "loss": 0.4036, + "step": 45051 + }, + { + "epoch": 0.7784766380978712, + "grad_norm": 1.1697315932896808, + "learning_rate": 2.4652889664344893e-06, + "loss": 0.1553, + "step": 45052 + }, + { + "epoch": 0.7784939176112801, + "grad_norm": 1.6729709568139925, + "learning_rate": 2.4649210192864937e-06, + "loss": 0.4111, + "step": 45053 + }, + { + "epoch": 0.778511197124689, + "grad_norm": 1.5005048956088298, + "learning_rate": 2.4645530957389374e-06, + "loss": 0.3874, + "step": 45054 + }, + { + "epoch": 0.7785284766380979, + "grad_norm": 1.0458326866128844, + "learning_rate": 2.4641851957929743e-06, + "loss": 0.5401, + "step": 45055 + }, + { + "epoch": 0.7785457561515068, + "grad_norm": 1.8860855554814808, + "learning_rate": 2.4638173194497583e-06, + "loss": 0.3185, + "step": 45056 + }, + { + "epoch": 0.7785630356649157, + "grad_norm": 1.783546949938037, + "learning_rate": 2.463449466710437e-06, + "loss": 0.5141, + "step": 45057 + }, + { + "epoch": 0.7785803151783246, + "grad_norm": 1.72803280009214, + "learning_rate": 2.463081637576167e-06, + "loss": 0.3979, + "step": 45058 + }, + { + "epoch": 0.7785975946917335, + "grad_norm": 0.9306114710467813, + "learning_rate": 2.4627138320480957e-06, + "loss": 0.2599, + "step": 45059 + }, + { + "epoch": 0.7786148742051424, + "grad_norm": 1.019000885802635, + "learning_rate": 2.4623460501273765e-06, + "loss": 0.3108, + "step": 45060 + }, + { + "epoch": 0.7786321537185513, + "grad_norm": 1.1359553492291365, + "learning_rate": 2.4619782918151638e-06, + "loss": 0.2436, + "step": 45061 + }, + { + "epoch": 0.7786494332319602, + "grad_norm": 0.6981860798164299, + "learning_rate": 2.461610557112609e-06, + "loss": 0.4875, + "step": 45062 + }, + { + "epoch": 0.7786667127453691, + "grad_norm": 2.0725805842907286, + "learning_rate": 2.461242846020865e-06, + "loss": 0.3781, + "step": 45063 + }, + { + "epoch": 0.778683992258778, + "grad_norm": 1.4602778649177328, + "learning_rate": 2.4608751585410773e-06, + "loss": 0.5611, + "step": 45064 + }, + { + "epoch": 0.7787012717721868, + "grad_norm": 1.5146930736006503, + "learning_rate": 2.460507494674401e-06, + "loss": 0.3217, + "step": 45065 + }, + { + "epoch": 0.7787185512855958, + "grad_norm": 1.536648033620304, + "learning_rate": 2.460139854421989e-06, + "loss": 0.3094, + "step": 45066 + }, + { + "epoch": 0.7787358307990047, + "grad_norm": 1.389456549647858, + "learning_rate": 2.4597722377849943e-06, + "loss": 0.3922, + "step": 45067 + }, + { + "epoch": 0.7787531103124136, + "grad_norm": 1.5727904264260426, + "learning_rate": 2.4594046447645637e-06, + "loss": 0.2268, + "step": 45068 + }, + { + "epoch": 0.7787703898258225, + "grad_norm": 1.7938624728954649, + "learning_rate": 2.459037075361853e-06, + "loss": 0.3003, + "step": 45069 + }, + { + "epoch": 0.7787876693392314, + "grad_norm": 1.5931500220952308, + "learning_rate": 2.4586695295780095e-06, + "loss": 0.2913, + "step": 45070 + }, + { + "epoch": 0.7788049488526403, + "grad_norm": 1.385613614438731, + "learning_rate": 2.458302007414185e-06, + "loss": 0.3031, + "step": 45071 + }, + { + "epoch": 0.7788222283660492, + "grad_norm": 1.2871847047998324, + "learning_rate": 2.4579345088715355e-06, + "loss": 0.3784, + "step": 45072 + }, + { + "epoch": 0.7788395078794581, + "grad_norm": 1.7727159961474948, + "learning_rate": 2.4575670339512046e-06, + "loss": 0.694, + "step": 45073 + }, + { + "epoch": 0.778856787392867, + "grad_norm": 1.2763119704268187, + "learning_rate": 2.4571995826543502e-06, + "loss": 0.4504, + "step": 45074 + }, + { + "epoch": 0.7788740669062759, + "grad_norm": 1.0878486404054066, + "learning_rate": 2.4568321549821172e-06, + "loss": 0.4659, + "step": 45075 + }, + { + "epoch": 0.7788913464196848, + "grad_norm": 3.130224742655633, + "learning_rate": 2.456464750935662e-06, + "loss": 0.3786, + "step": 45076 + }, + { + "epoch": 0.7789086259330937, + "grad_norm": 1.5069902575976009, + "learning_rate": 2.456097370516126e-06, + "loss": 0.2948, + "step": 45077 + }, + { + "epoch": 0.7789259054465026, + "grad_norm": 1.2763624020777233, + "learning_rate": 2.455730013724673e-06, + "loss": 0.5022, + "step": 45078 + }, + { + "epoch": 0.7789431849599115, + "grad_norm": 1.3140934939880136, + "learning_rate": 2.4553626805624442e-06, + "loss": 0.3284, + "step": 45079 + }, + { + "epoch": 0.7789604644733205, + "grad_norm": 1.4611567818872973, + "learning_rate": 2.4549953710305953e-06, + "loss": 0.4273, + "step": 45080 + }, + { + "epoch": 0.7789777439867294, + "grad_norm": 1.1207042635281614, + "learning_rate": 2.454628085130272e-06, + "loss": 0.2469, + "step": 45081 + }, + { + "epoch": 0.7789950235001383, + "grad_norm": 1.2818338999881973, + "learning_rate": 2.4542608228626284e-06, + "loss": 0.3542, + "step": 45082 + }, + { + "epoch": 0.7790123030135472, + "grad_norm": 0.8638287962503642, + "learning_rate": 2.4538935842288123e-06, + "loss": 0.3011, + "step": 45083 + }, + { + "epoch": 0.7790295825269561, + "grad_norm": 1.2711557626239183, + "learning_rate": 2.453526369229974e-06, + "loss": 0.3981, + "step": 45084 + }, + { + "epoch": 0.779046862040365, + "grad_norm": 0.5847348388075029, + "learning_rate": 2.453159177867267e-06, + "loss": 0.4297, + "step": 45085 + }, + { + "epoch": 0.7790641415537738, + "grad_norm": 1.4729124364205648, + "learning_rate": 2.4527920101418366e-06, + "loss": 0.5281, + "step": 45086 + }, + { + "epoch": 0.7790814210671827, + "grad_norm": 1.5448932080094835, + "learning_rate": 2.452424866054839e-06, + "loss": 0.3529, + "step": 45087 + }, + { + "epoch": 0.7790987005805916, + "grad_norm": 1.0367878297495579, + "learning_rate": 2.4520577456074168e-06, + "loss": 0.5036, + "step": 45088 + }, + { + "epoch": 0.7791159800940005, + "grad_norm": 1.3095268623078753, + "learning_rate": 2.4516906488007232e-06, + "loss": 0.2423, + "step": 45089 + }, + { + "epoch": 0.7791332596074094, + "grad_norm": 1.9143616834049686, + "learning_rate": 2.4513235756359077e-06, + "loss": 0.3961, + "step": 45090 + }, + { + "epoch": 0.7791505391208183, + "grad_norm": 1.8078899659767194, + "learning_rate": 2.4509565261141233e-06, + "loss": 0.2861, + "step": 45091 + }, + { + "epoch": 0.7791678186342272, + "grad_norm": 1.8562197604162705, + "learning_rate": 2.4505895002365144e-06, + "loss": 0.3556, + "step": 45092 + }, + { + "epoch": 0.7791850981476361, + "grad_norm": 1.5978868772201822, + "learning_rate": 2.450222498004237e-06, + "loss": 0.1964, + "step": 45093 + }, + { + "epoch": 0.779202377661045, + "grad_norm": 1.100624521433179, + "learning_rate": 2.4498555194184315e-06, + "loss": 0.2673, + "step": 45094 + }, + { + "epoch": 0.779219657174454, + "grad_norm": 1.4162015870112863, + "learning_rate": 2.4494885644802533e-06, + "loss": 0.4032, + "step": 45095 + }, + { + "epoch": 0.7792369366878629, + "grad_norm": 2.0194501958549966, + "learning_rate": 2.449121633190853e-06, + "loss": 0.2368, + "step": 45096 + }, + { + "epoch": 0.7792542162012718, + "grad_norm": 1.2632090888261363, + "learning_rate": 2.4487547255513754e-06, + "loss": 0.4623, + "step": 45097 + }, + { + "epoch": 0.7792714957146807, + "grad_norm": 1.5245267822998774, + "learning_rate": 2.448387841562975e-06, + "loss": 0.358, + "step": 45098 + }, + { + "epoch": 0.7792887752280896, + "grad_norm": 1.0726885825023325, + "learning_rate": 2.4480209812267942e-06, + "loss": 0.4595, + "step": 45099 + }, + { + "epoch": 0.7793060547414985, + "grad_norm": 0.9247390567288427, + "learning_rate": 2.447654144543986e-06, + "loss": 0.2329, + "step": 45100 + }, + { + "epoch": 0.7793233342549074, + "grad_norm": 1.522376459963106, + "learning_rate": 2.447287331515699e-06, + "loss": 0.394, + "step": 45101 + }, + { + "epoch": 0.7793406137683163, + "grad_norm": 1.4124812564213245, + "learning_rate": 2.446920542143084e-06, + "loss": 0.5081, + "step": 45102 + }, + { + "epoch": 0.7793578932817252, + "grad_norm": 1.623152520208866, + "learning_rate": 2.4465537764272884e-06, + "loss": 0.5688, + "step": 45103 + }, + { + "epoch": 0.7793751727951341, + "grad_norm": 1.7351940632969927, + "learning_rate": 2.446187034369458e-06, + "loss": 0.3389, + "step": 45104 + }, + { + "epoch": 0.779392452308543, + "grad_norm": 1.2859431785638562, + "learning_rate": 2.445820315970744e-06, + "loss": 0.2335, + "step": 45105 + }, + { + "epoch": 0.779409731821952, + "grad_norm": 1.3109296203576357, + "learning_rate": 2.4454536212322945e-06, + "loss": 0.3008, + "step": 45106 + }, + { + "epoch": 0.7794270113353607, + "grad_norm": 1.7163041060959838, + "learning_rate": 2.445086950155261e-06, + "loss": 0.4015, + "step": 45107 + }, + { + "epoch": 0.7794442908487696, + "grad_norm": 1.9269546643728093, + "learning_rate": 2.4447203027407873e-06, + "loss": 0.2281, + "step": 45108 + }, + { + "epoch": 0.7794615703621786, + "grad_norm": 1.0500409082966955, + "learning_rate": 2.444353678990026e-06, + "loss": 0.2388, + "step": 45109 + }, + { + "epoch": 0.7794788498755875, + "grad_norm": 1.3888778107375306, + "learning_rate": 2.4439870789041197e-06, + "loss": 0.2118, + "step": 45110 + }, + { + "epoch": 0.7794961293889964, + "grad_norm": 1.0778350987320824, + "learning_rate": 2.4436205024842206e-06, + "loss": 0.319, + "step": 45111 + }, + { + "epoch": 0.7795134089024053, + "grad_norm": 1.26981835561691, + "learning_rate": 2.443253949731478e-06, + "loss": 0.2425, + "step": 45112 + }, + { + "epoch": 0.7795306884158142, + "grad_norm": 1.6784475996141002, + "learning_rate": 2.4428874206470388e-06, + "loss": 0.3368, + "step": 45113 + }, + { + "epoch": 0.7795479679292231, + "grad_norm": 1.5224349113564968, + "learning_rate": 2.4425209152320515e-06, + "loss": 0.3143, + "step": 45114 + }, + { + "epoch": 0.779565247442632, + "grad_norm": 1.5400374377494042, + "learning_rate": 2.44215443348766e-06, + "loss": 0.4248, + "step": 45115 + }, + { + "epoch": 0.7795825269560409, + "grad_norm": 1.0183490595160174, + "learning_rate": 2.441787975415019e-06, + "loss": 0.4619, + "step": 45116 + }, + { + "epoch": 0.7795998064694498, + "grad_norm": 2.121773181582442, + "learning_rate": 2.4414215410152654e-06, + "loss": 0.3505, + "step": 45117 + }, + { + "epoch": 0.7796170859828587, + "grad_norm": 1.4207455884830706, + "learning_rate": 2.44105513028956e-06, + "loss": 0.3852, + "step": 45118 + }, + { + "epoch": 0.7796343654962676, + "grad_norm": 0.9581844376576437, + "learning_rate": 2.4406887432390426e-06, + "loss": 0.2476, + "step": 45119 + }, + { + "epoch": 0.7796516450096765, + "grad_norm": 1.0796222679949572, + "learning_rate": 2.4403223798648644e-06, + "loss": 0.3022, + "step": 45120 + }, + { + "epoch": 0.7796689245230854, + "grad_norm": 1.0700834389863032, + "learning_rate": 2.4399560401681675e-06, + "loss": 0.3433, + "step": 45121 + }, + { + "epoch": 0.7796862040364944, + "grad_norm": 1.1943834190094726, + "learning_rate": 2.4395897241501064e-06, + "loss": 0.4941, + "step": 45122 + }, + { + "epoch": 0.7797034835499033, + "grad_norm": 1.093054208628018, + "learning_rate": 2.4392234318118225e-06, + "loss": 0.2165, + "step": 45123 + }, + { + "epoch": 0.7797207630633122, + "grad_norm": 1.046988164283943, + "learning_rate": 2.4388571631544656e-06, + "loss": 0.5525, + "step": 45124 + }, + { + "epoch": 0.7797380425767211, + "grad_norm": 1.1747651426473067, + "learning_rate": 2.438490918179185e-06, + "loss": 0.2826, + "step": 45125 + }, + { + "epoch": 0.77975532209013, + "grad_norm": 1.1009280357335285, + "learning_rate": 2.438124696887122e-06, + "loss": 0.4475, + "step": 45126 + }, + { + "epoch": 0.7797726016035389, + "grad_norm": 1.2102294196246162, + "learning_rate": 2.437758499279431e-06, + "loss": 0.2915, + "step": 45127 + }, + { + "epoch": 0.7797898811169477, + "grad_norm": 1.160742628209364, + "learning_rate": 2.437392325357252e-06, + "loss": 0.4782, + "step": 45128 + }, + { + "epoch": 0.7798071606303566, + "grad_norm": 0.7702979136748878, + "learning_rate": 2.4370261751217338e-06, + "loss": 0.305, + "step": 45129 + }, + { + "epoch": 0.7798244401437655, + "grad_norm": 2.0817911602263584, + "learning_rate": 2.4366600485740256e-06, + "loss": 0.2455, + "step": 45130 + }, + { + "epoch": 0.7798417196571744, + "grad_norm": 1.0921786640895088, + "learning_rate": 2.4362939457152755e-06, + "loss": 0.396, + "step": 45131 + }, + { + "epoch": 0.7798589991705833, + "grad_norm": 1.20245317996152, + "learning_rate": 2.435927866546626e-06, + "loss": 0.4655, + "step": 45132 + }, + { + "epoch": 0.7798762786839922, + "grad_norm": 1.01092110831688, + "learning_rate": 2.435561811069226e-06, + "loss": 0.35, + "step": 45133 + }, + { + "epoch": 0.7798935581974011, + "grad_norm": 0.8903501980368778, + "learning_rate": 2.4351957792842194e-06, + "loss": 0.192, + "step": 45134 + }, + { + "epoch": 0.77991083771081, + "grad_norm": 1.1657287884221699, + "learning_rate": 2.4348297711927547e-06, + "loss": 0.4417, + "step": 45135 + }, + { + "epoch": 0.779928117224219, + "grad_norm": 0.6432233405006226, + "learning_rate": 2.434463786795981e-06, + "loss": 0.6395, + "step": 45136 + }, + { + "epoch": 0.7799453967376279, + "grad_norm": 0.8166237587140961, + "learning_rate": 2.4340978260950376e-06, + "loss": 0.352, + "step": 45137 + }, + { + "epoch": 0.7799626762510368, + "grad_norm": 1.3382570438374473, + "learning_rate": 2.433731889091079e-06, + "loss": 0.2924, + "step": 45138 + }, + { + "epoch": 0.7799799557644457, + "grad_norm": 1.3910221455573235, + "learning_rate": 2.4333659757852434e-06, + "loss": 0.5293, + "step": 45139 + }, + { + "epoch": 0.7799972352778546, + "grad_norm": 1.170617612075983, + "learning_rate": 2.433000086178682e-06, + "loss": 0.4361, + "step": 45140 + }, + { + "epoch": 0.7800145147912635, + "grad_norm": 1.0469990153138045, + "learning_rate": 2.432634220272538e-06, + "loss": 0.3318, + "step": 45141 + }, + { + "epoch": 0.7800317943046724, + "grad_norm": 0.7098589525699817, + "learning_rate": 2.4322683780679624e-06, + "loss": 0.5973, + "step": 45142 + }, + { + "epoch": 0.7800490738180813, + "grad_norm": 1.7434732221828373, + "learning_rate": 2.4319025595660937e-06, + "loss": 0.506, + "step": 45143 + }, + { + "epoch": 0.7800663533314902, + "grad_norm": 1.0923823748631705, + "learning_rate": 2.4315367647680855e-06, + "loss": 0.6321, + "step": 45144 + }, + { + "epoch": 0.7800836328448991, + "grad_norm": 0.9031241889619999, + "learning_rate": 2.431170993675075e-06, + "loss": 0.1691, + "step": 45145 + }, + { + "epoch": 0.780100912358308, + "grad_norm": 0.6452016616043256, + "learning_rate": 2.4308052462882125e-06, + "loss": 0.6057, + "step": 45146 + }, + { + "epoch": 0.7801181918717169, + "grad_norm": 1.548643488510746, + "learning_rate": 2.430439522608646e-06, + "loss": 0.3795, + "step": 45147 + }, + { + "epoch": 0.7801354713851258, + "grad_norm": 1.1632132843368002, + "learning_rate": 2.4300738226375163e-06, + "loss": 0.2332, + "step": 45148 + }, + { + "epoch": 0.7801527508985348, + "grad_norm": 1.2217368241370823, + "learning_rate": 2.4297081463759733e-06, + "loss": 0.3869, + "step": 45149 + }, + { + "epoch": 0.7801700304119435, + "grad_norm": 1.6807690564838316, + "learning_rate": 2.4293424938251563e-06, + "loss": 0.3045, + "step": 45150 + }, + { + "epoch": 0.7801873099253525, + "grad_norm": 1.4293473835855537, + "learning_rate": 2.4289768649862143e-06, + "loss": 0.4719, + "step": 45151 + }, + { + "epoch": 0.7802045894387614, + "grad_norm": 1.1831458693018935, + "learning_rate": 2.4286112598602918e-06, + "loss": 0.2614, + "step": 45152 + }, + { + "epoch": 0.7802218689521703, + "grad_norm": 1.0939112617010804, + "learning_rate": 2.4282456784485376e-06, + "loss": 0.3093, + "step": 45153 + }, + { + "epoch": 0.7802391484655792, + "grad_norm": 0.978945797508799, + "learning_rate": 2.427880120752093e-06, + "loss": 0.295, + "step": 45154 + }, + { + "epoch": 0.7802564279789881, + "grad_norm": 1.332393810630931, + "learning_rate": 2.4275145867721004e-06, + "loss": 0.415, + "step": 45155 + }, + { + "epoch": 0.780273707492397, + "grad_norm": 0.8683254486959737, + "learning_rate": 2.4271490765097107e-06, + "loss": 0.2667, + "step": 45156 + }, + { + "epoch": 0.7802909870058059, + "grad_norm": 1.2135394616294417, + "learning_rate": 2.4267835899660596e-06, + "loss": 0.4522, + "step": 45157 + }, + { + "epoch": 0.7803082665192148, + "grad_norm": 2.0105502808528413, + "learning_rate": 2.4264181271423036e-06, + "loss": 0.3044, + "step": 45158 + }, + { + "epoch": 0.7803255460326237, + "grad_norm": 1.2107956710277021, + "learning_rate": 2.4260526880395783e-06, + "loss": 0.32, + "step": 45159 + }, + { + "epoch": 0.7803428255460326, + "grad_norm": 1.0601849586805725, + "learning_rate": 2.425687272659035e-06, + "loss": 0.4329, + "step": 45160 + }, + { + "epoch": 0.7803601050594415, + "grad_norm": 1.6867289801149286, + "learning_rate": 2.425321881001812e-06, + "loss": 0.3447, + "step": 45161 + }, + { + "epoch": 0.7803773845728504, + "grad_norm": 2.280091045359354, + "learning_rate": 2.42495651306906e-06, + "loss": 0.2782, + "step": 45162 + }, + { + "epoch": 0.7803946640862593, + "grad_norm": 0.9010156456878041, + "learning_rate": 2.424591168861916e-06, + "loss": 0.3789, + "step": 45163 + }, + { + "epoch": 0.7804119435996683, + "grad_norm": 1.8037894112203678, + "learning_rate": 2.4242258483815284e-06, + "loss": 0.4072, + "step": 45164 + }, + { + "epoch": 0.7804292231130772, + "grad_norm": 1.66097864203817, + "learning_rate": 2.4238605516290434e-06, + "loss": 0.4971, + "step": 45165 + }, + { + "epoch": 0.7804465026264861, + "grad_norm": 2.4033581716568544, + "learning_rate": 2.423495278605601e-06, + "loss": 0.3138, + "step": 45166 + }, + { + "epoch": 0.780463782139895, + "grad_norm": 2.2382323641518833, + "learning_rate": 2.4231300293123507e-06, + "loss": 0.3564, + "step": 45167 + }, + { + "epoch": 0.7804810616533039, + "grad_norm": 2.1445918237754666, + "learning_rate": 2.422764803750429e-06, + "loss": 0.2642, + "step": 45168 + }, + { + "epoch": 0.7804983411667128, + "grad_norm": 1.0993888363093436, + "learning_rate": 2.422399601920984e-06, + "loss": 0.4434, + "step": 45169 + }, + { + "epoch": 0.7805156206801217, + "grad_norm": 1.401318282795101, + "learning_rate": 2.422034423825159e-06, + "loss": 0.2247, + "step": 45170 + }, + { + "epoch": 0.7805329001935305, + "grad_norm": 1.4637451354019522, + "learning_rate": 2.421669269464102e-06, + "loss": 0.237, + "step": 45171 + }, + { + "epoch": 0.7805501797069394, + "grad_norm": 1.9073537287602937, + "learning_rate": 2.4213041388389493e-06, + "loss": 0.2675, + "step": 45172 + }, + { + "epoch": 0.7805674592203483, + "grad_norm": 1.3003481500722218, + "learning_rate": 2.4209390319508498e-06, + "loss": 0.3701, + "step": 45173 + }, + { + "epoch": 0.7805847387337572, + "grad_norm": 1.6006363383429676, + "learning_rate": 2.420573948800944e-06, + "loss": 0.3942, + "step": 45174 + }, + { + "epoch": 0.7806020182471661, + "grad_norm": 1.3988149660203903, + "learning_rate": 2.420208889390375e-06, + "loss": 0.3828, + "step": 45175 + }, + { + "epoch": 0.780619297760575, + "grad_norm": 0.7581245756837687, + "learning_rate": 2.4198438537202917e-06, + "loss": 0.6053, + "step": 45176 + }, + { + "epoch": 0.780636577273984, + "grad_norm": 1.1941144577801308, + "learning_rate": 2.4194788417918315e-06, + "loss": 0.4572, + "step": 45177 + }, + { + "epoch": 0.7806538567873929, + "grad_norm": 1.7168205100719685, + "learning_rate": 2.419113853606141e-06, + "loss": 0.3541, + "step": 45178 + }, + { + "epoch": 0.7806711363008018, + "grad_norm": 1.2127720237620332, + "learning_rate": 2.4187488891643596e-06, + "loss": 0.4867, + "step": 45179 + }, + { + "epoch": 0.7806884158142107, + "grad_norm": 1.2334449380085746, + "learning_rate": 2.418383948467633e-06, + "loss": 0.3903, + "step": 45180 + }, + { + "epoch": 0.7807056953276196, + "grad_norm": 1.8069431967948462, + "learning_rate": 2.418019031517105e-06, + "loss": 0.3617, + "step": 45181 + }, + { + "epoch": 0.7807229748410285, + "grad_norm": 1.612765945538515, + "learning_rate": 2.4176541383139185e-06, + "loss": 0.2295, + "step": 45182 + }, + { + "epoch": 0.7807402543544374, + "grad_norm": 1.3020629601745555, + "learning_rate": 2.4172892688592143e-06, + "loss": 0.4264, + "step": 45183 + }, + { + "epoch": 0.7807575338678463, + "grad_norm": 0.9591623667494159, + "learning_rate": 2.4169244231541387e-06, + "loss": 0.2068, + "step": 45184 + }, + { + "epoch": 0.7807748133812552, + "grad_norm": 1.5486218982494402, + "learning_rate": 2.4165596011998292e-06, + "loss": 0.353, + "step": 45185 + }, + { + "epoch": 0.7807920928946641, + "grad_norm": 1.5761493295893376, + "learning_rate": 2.4161948029974315e-06, + "loss": 0.314, + "step": 45186 + }, + { + "epoch": 0.780809372408073, + "grad_norm": 1.9389507871567682, + "learning_rate": 2.4158300285480895e-06, + "loss": 0.2593, + "step": 45187 + }, + { + "epoch": 0.7808266519214819, + "grad_norm": 1.5318681091694977, + "learning_rate": 2.415465277852943e-06, + "loss": 0.4738, + "step": 45188 + }, + { + "epoch": 0.7808439314348908, + "grad_norm": 1.259204704122339, + "learning_rate": 2.415100550913139e-06, + "loss": 0.2821, + "step": 45189 + }, + { + "epoch": 0.7808612109482997, + "grad_norm": 2.0652671528415745, + "learning_rate": 2.414735847729812e-06, + "loss": 0.4682, + "step": 45190 + }, + { + "epoch": 0.7808784904617087, + "grad_norm": 1.3966941749188582, + "learning_rate": 2.4143711683041095e-06, + "loss": 0.6807, + "step": 45191 + }, + { + "epoch": 0.7808957699751174, + "grad_norm": 1.3717005551470494, + "learning_rate": 2.4140065126371725e-06, + "loss": 0.7401, + "step": 45192 + }, + { + "epoch": 0.7809130494885264, + "grad_norm": 1.0495904083866436, + "learning_rate": 2.4136418807301474e-06, + "loss": 0.3212, + "step": 45193 + }, + { + "epoch": 0.7809303290019353, + "grad_norm": 1.5242204774808426, + "learning_rate": 2.413277272584169e-06, + "loss": 0.5005, + "step": 45194 + }, + { + "epoch": 0.7809476085153442, + "grad_norm": 1.4733768528807134, + "learning_rate": 2.412912688200386e-06, + "loss": 0.3106, + "step": 45195 + }, + { + "epoch": 0.7809648880287531, + "grad_norm": 1.2571969638244358, + "learning_rate": 2.4125481275799367e-06, + "loss": 0.4025, + "step": 45196 + }, + { + "epoch": 0.780982167542162, + "grad_norm": 1.7218844651875798, + "learning_rate": 2.4121835907239577e-06, + "loss": 0.2012, + "step": 45197 + }, + { + "epoch": 0.7809994470555709, + "grad_norm": 1.9397503019849511, + "learning_rate": 2.4118190776336014e-06, + "loss": 0.2061, + "step": 45198 + }, + { + "epoch": 0.7810167265689798, + "grad_norm": 0.9371089991403988, + "learning_rate": 2.4114545883100028e-06, + "loss": 0.1955, + "step": 45199 + }, + { + "epoch": 0.7810340060823887, + "grad_norm": 1.1319488453926327, + "learning_rate": 2.411090122754307e-06, + "loss": 0.401, + "step": 45200 + }, + { + "epoch": 0.7810512855957976, + "grad_norm": 0.8507706355325225, + "learning_rate": 2.4107256809676517e-06, + "loss": 0.9707, + "step": 45201 + }, + { + "epoch": 0.7810685651092065, + "grad_norm": 0.8053760874739573, + "learning_rate": 2.410361262951183e-06, + "loss": 0.2794, + "step": 45202 + }, + { + "epoch": 0.7810858446226154, + "grad_norm": 1.8563998293056088, + "learning_rate": 2.409996868706036e-06, + "loss": 0.5809, + "step": 45203 + }, + { + "epoch": 0.7811031241360243, + "grad_norm": 1.5740572698149373, + "learning_rate": 2.4096324982333564e-06, + "loss": 0.3599, + "step": 45204 + }, + { + "epoch": 0.7811204036494332, + "grad_norm": 1.1630643595978924, + "learning_rate": 2.409268151534284e-06, + "loss": 0.4627, + "step": 45205 + }, + { + "epoch": 0.7811376831628422, + "grad_norm": 1.324680619205281, + "learning_rate": 2.4089038286099644e-06, + "loss": 0.4795, + "step": 45206 + }, + { + "epoch": 0.7811549626762511, + "grad_norm": 1.0020654013417258, + "learning_rate": 2.4085395294615343e-06, + "loss": 0.3406, + "step": 45207 + }, + { + "epoch": 0.78117224218966, + "grad_norm": 1.400194386574629, + "learning_rate": 2.4081752540901327e-06, + "loss": 0.4532, + "step": 45208 + }, + { + "epoch": 0.7811895217030689, + "grad_norm": 1.6032694099524156, + "learning_rate": 2.4078110024969025e-06, + "loss": 0.3575, + "step": 45209 + }, + { + "epoch": 0.7812068012164778, + "grad_norm": 1.404743053439993, + "learning_rate": 2.4074467746829856e-06, + "loss": 0.315, + "step": 45210 + }, + { + "epoch": 0.7812240807298867, + "grad_norm": 0.7992063083897275, + "learning_rate": 2.407082570649526e-06, + "loss": 0.3488, + "step": 45211 + }, + { + "epoch": 0.7812413602432956, + "grad_norm": 0.8016103890104523, + "learning_rate": 2.4067183903976563e-06, + "loss": 0.3303, + "step": 45212 + }, + { + "epoch": 0.7812586397567044, + "grad_norm": 1.0662498412908794, + "learning_rate": 2.4063542339285263e-06, + "loss": 0.3656, + "step": 45213 + }, + { + "epoch": 0.7812759192701133, + "grad_norm": 1.5929143173748395, + "learning_rate": 2.4059901012432686e-06, + "loss": 0.2664, + "step": 45214 + }, + { + "epoch": 0.7812931987835222, + "grad_norm": 1.582893829231048, + "learning_rate": 2.405625992343026e-06, + "loss": 0.4067, + "step": 45215 + }, + { + "epoch": 0.7813104782969311, + "grad_norm": 0.881802295097934, + "learning_rate": 2.405261907228944e-06, + "loss": 0.6711, + "step": 45216 + }, + { + "epoch": 0.78132775781034, + "grad_norm": 1.2851065722000485, + "learning_rate": 2.4048978459021555e-06, + "loss": 0.2106, + "step": 45217 + }, + { + "epoch": 0.7813450373237489, + "grad_norm": 0.8157985429437531, + "learning_rate": 2.404533808363807e-06, + "loss": 0.5329, + "step": 45218 + }, + { + "epoch": 0.7813623168371578, + "grad_norm": 1.5078858117299063, + "learning_rate": 2.4041697946150333e-06, + "loss": 0.4076, + "step": 45219 + }, + { + "epoch": 0.7813795963505668, + "grad_norm": 1.8103337602987597, + "learning_rate": 2.4038058046569766e-06, + "loss": 0.456, + "step": 45220 + }, + { + "epoch": 0.7813968758639757, + "grad_norm": 1.1032050751128577, + "learning_rate": 2.4034418384907777e-06, + "loss": 0.3914, + "step": 45221 + }, + { + "epoch": 0.7814141553773846, + "grad_norm": 0.9306375505226386, + "learning_rate": 2.4030778961175794e-06, + "loss": 0.3385, + "step": 45222 + }, + { + "epoch": 0.7814314348907935, + "grad_norm": 1.0541818727633496, + "learning_rate": 2.4027139775385157e-06, + "loss": 0.3679, + "step": 45223 + }, + { + "epoch": 0.7814487144042024, + "grad_norm": 1.0560943213038212, + "learning_rate": 2.402350082754732e-06, + "loss": 0.258, + "step": 45224 + }, + { + "epoch": 0.7814659939176113, + "grad_norm": 0.9997569125578366, + "learning_rate": 2.401986211767362e-06, + "loss": 0.2935, + "step": 45225 + }, + { + "epoch": 0.7814832734310202, + "grad_norm": 1.7460559336295376, + "learning_rate": 2.4016223645775484e-06, + "loss": 0.3203, + "step": 45226 + }, + { + "epoch": 0.7815005529444291, + "grad_norm": 1.4062903406351717, + "learning_rate": 2.401258541186434e-06, + "loss": 0.2479, + "step": 45227 + }, + { + "epoch": 0.781517832457838, + "grad_norm": 0.9704495616453319, + "learning_rate": 2.400894741595152e-06, + "loss": 0.3238, + "step": 45228 + }, + { + "epoch": 0.7815351119712469, + "grad_norm": 1.8970092961426142, + "learning_rate": 2.400530965804849e-06, + "loss": 0.277, + "step": 45229 + }, + { + "epoch": 0.7815523914846558, + "grad_norm": 2.0880591379726035, + "learning_rate": 2.400167213816658e-06, + "loss": 0.3614, + "step": 45230 + }, + { + "epoch": 0.7815696709980647, + "grad_norm": 2.442894017148857, + "learning_rate": 2.39980348563172e-06, + "loss": 0.4528, + "step": 45231 + }, + { + "epoch": 0.7815869505114736, + "grad_norm": 1.1595176764502735, + "learning_rate": 2.399439781251175e-06, + "loss": 0.247, + "step": 45232 + }, + { + "epoch": 0.7816042300248826, + "grad_norm": 0.8839249701017515, + "learning_rate": 2.3990761006761654e-06, + "loss": 0.3142, + "step": 45233 + }, + { + "epoch": 0.7816215095382913, + "grad_norm": 1.1316355876502047, + "learning_rate": 2.398712443907825e-06, + "loss": 0.3053, + "step": 45234 + }, + { + "epoch": 0.7816387890517003, + "grad_norm": 1.6386053774038618, + "learning_rate": 2.3983488109472965e-06, + "loss": 0.3678, + "step": 45235 + }, + { + "epoch": 0.7816560685651092, + "grad_norm": 1.6016320715209482, + "learning_rate": 2.397985201795715e-06, + "loss": 0.4006, + "step": 45236 + }, + { + "epoch": 0.7816733480785181, + "grad_norm": 0.9703209276444723, + "learning_rate": 2.397621616454221e-06, + "loss": 0.6207, + "step": 45237 + }, + { + "epoch": 0.781690627591927, + "grad_norm": 2.178706480464548, + "learning_rate": 2.3972580549239575e-06, + "loss": 0.4612, + "step": 45238 + }, + { + "epoch": 0.7817079071053359, + "grad_norm": 1.634405746397735, + "learning_rate": 2.3968945172060566e-06, + "loss": 0.3766, + "step": 45239 + }, + { + "epoch": 0.7817251866187448, + "grad_norm": 2.0292068195990356, + "learning_rate": 2.396531003301662e-06, + "loss": 0.3913, + "step": 45240 + }, + { + "epoch": 0.7817424661321537, + "grad_norm": 1.9360953520929842, + "learning_rate": 2.396167513211908e-06, + "loss": 0.4154, + "step": 45241 + }, + { + "epoch": 0.7817597456455626, + "grad_norm": 1.3946478815983638, + "learning_rate": 2.3958040469379374e-06, + "loss": 0.4283, + "step": 45242 + }, + { + "epoch": 0.7817770251589715, + "grad_norm": 1.4049603154175119, + "learning_rate": 2.395440604480884e-06, + "loss": 0.4982, + "step": 45243 + }, + { + "epoch": 0.7817943046723804, + "grad_norm": 1.3476711237704797, + "learning_rate": 2.395077185841889e-06, + "loss": 0.3382, + "step": 45244 + }, + { + "epoch": 0.7818115841857893, + "grad_norm": 1.4838913327657746, + "learning_rate": 2.3947137910220896e-06, + "loss": 0.2487, + "step": 45245 + }, + { + "epoch": 0.7818288636991982, + "grad_norm": 1.0300687037036715, + "learning_rate": 2.3943504200226277e-06, + "loss": 0.3585, + "step": 45246 + }, + { + "epoch": 0.7818461432126071, + "grad_norm": 1.0090950654584134, + "learning_rate": 2.393987072844638e-06, + "loss": 0.4387, + "step": 45247 + }, + { + "epoch": 0.781863422726016, + "grad_norm": 1.306184793719102, + "learning_rate": 2.3936237494892567e-06, + "loss": 0.411, + "step": 45248 + }, + { + "epoch": 0.781880702239425, + "grad_norm": 1.0647269602263152, + "learning_rate": 2.393260449957623e-06, + "loss": 0.3447, + "step": 45249 + }, + { + "epoch": 0.7818979817528339, + "grad_norm": 1.397263900946473, + "learning_rate": 2.3928971742508765e-06, + "loss": 0.3034, + "step": 45250 + }, + { + "epoch": 0.7819152612662428, + "grad_norm": 1.0757836097289262, + "learning_rate": 2.392533922370156e-06, + "loss": 0.2887, + "step": 45251 + }, + { + "epoch": 0.7819325407796517, + "grad_norm": 1.5726212727996678, + "learning_rate": 2.392170694316596e-06, + "loss": 0.3546, + "step": 45252 + }, + { + "epoch": 0.7819498202930606, + "grad_norm": 1.3317492957313941, + "learning_rate": 2.391807490091337e-06, + "loss": 0.3753, + "step": 45253 + }, + { + "epoch": 0.7819670998064695, + "grad_norm": 1.0376272555826376, + "learning_rate": 2.391444309695513e-06, + "loss": 0.3012, + "step": 45254 + }, + { + "epoch": 0.7819843793198783, + "grad_norm": 1.7441568472151503, + "learning_rate": 2.3910811531302635e-06, + "loss": 0.3869, + "step": 45255 + }, + { + "epoch": 0.7820016588332872, + "grad_norm": 0.8535097577640489, + "learning_rate": 2.390718020396726e-06, + "loss": 0.363, + "step": 45256 + }, + { + "epoch": 0.7820189383466961, + "grad_norm": 2.262537610159343, + "learning_rate": 2.390354911496041e-06, + "loss": 0.4362, + "step": 45257 + }, + { + "epoch": 0.782036217860105, + "grad_norm": 1.5621943141686003, + "learning_rate": 2.389991826429342e-06, + "loss": 0.4265, + "step": 45258 + }, + { + "epoch": 0.7820534973735139, + "grad_norm": 0.7966008107233561, + "learning_rate": 2.3896287651977644e-06, + "loss": 0.363, + "step": 45259 + }, + { + "epoch": 0.7820707768869228, + "grad_norm": 1.7783598314044986, + "learning_rate": 2.389265727802449e-06, + "loss": 0.233, + "step": 45260 + }, + { + "epoch": 0.7820880564003317, + "grad_norm": 1.0870174012520546, + "learning_rate": 2.388902714244531e-06, + "loss": 0.3081, + "step": 45261 + }, + { + "epoch": 0.7821053359137407, + "grad_norm": 1.2094912761952152, + "learning_rate": 2.388539724525151e-06, + "loss": 0.1915, + "step": 45262 + }, + { + "epoch": 0.7821226154271496, + "grad_norm": 1.1346714921498828, + "learning_rate": 2.3881767586454396e-06, + "loss": 0.2998, + "step": 45263 + }, + { + "epoch": 0.7821398949405585, + "grad_norm": 0.8098587860362713, + "learning_rate": 2.38781381660654e-06, + "loss": 0.578, + "step": 45264 + }, + { + "epoch": 0.7821571744539674, + "grad_norm": 0.9702868954591678, + "learning_rate": 2.3874508984095836e-06, + "loss": 0.2314, + "step": 45265 + }, + { + "epoch": 0.7821744539673763, + "grad_norm": 1.1138108795902188, + "learning_rate": 2.38708800405571e-06, + "loss": 0.4375, + "step": 45266 + }, + { + "epoch": 0.7821917334807852, + "grad_norm": 1.2097231648449287, + "learning_rate": 2.386725133546054e-06, + "loss": 0.4033, + "step": 45267 + }, + { + "epoch": 0.7822090129941941, + "grad_norm": 1.3065424945245643, + "learning_rate": 2.386362286881757e-06, + "loss": 0.2456, + "step": 45268 + }, + { + "epoch": 0.782226292507603, + "grad_norm": 1.7336707728367917, + "learning_rate": 2.385999464063953e-06, + "loss": 0.3759, + "step": 45269 + }, + { + "epoch": 0.7822435720210119, + "grad_norm": 1.2659194129609717, + "learning_rate": 2.3856366650937733e-06, + "loss": 0.3024, + "step": 45270 + }, + { + "epoch": 0.7822608515344208, + "grad_norm": 1.428459360845244, + "learning_rate": 2.385273889972358e-06, + "loss": 0.4076, + "step": 45271 + }, + { + "epoch": 0.7822781310478297, + "grad_norm": 1.267050111573763, + "learning_rate": 2.3849111387008437e-06, + "loss": 0.3598, + "step": 45272 + }, + { + "epoch": 0.7822954105612386, + "grad_norm": 1.1109238259625067, + "learning_rate": 2.3845484112803698e-06, + "loss": 0.3451, + "step": 45273 + }, + { + "epoch": 0.7823126900746475, + "grad_norm": 1.5896498496045617, + "learning_rate": 2.3841857077120658e-06, + "loss": 0.4479, + "step": 45274 + }, + { + "epoch": 0.7823299695880565, + "grad_norm": 1.41304939314928, + "learning_rate": 2.3838230279970744e-06, + "loss": 0.3699, + "step": 45275 + }, + { + "epoch": 0.7823472491014652, + "grad_norm": 1.8850429305214949, + "learning_rate": 2.3834603721365247e-06, + "loss": 0.3108, + "step": 45276 + }, + { + "epoch": 0.7823645286148742, + "grad_norm": 1.5043566230642254, + "learning_rate": 2.3830977401315556e-06, + "loss": 0.432, + "step": 45277 + }, + { + "epoch": 0.7823818081282831, + "grad_norm": 1.0336593053537653, + "learning_rate": 2.382735131983307e-06, + "loss": 0.467, + "step": 45278 + }, + { + "epoch": 0.782399087641692, + "grad_norm": 1.4788157187395792, + "learning_rate": 2.382372547692907e-06, + "loss": 0.3238, + "step": 45279 + }, + { + "epoch": 0.7824163671551009, + "grad_norm": 2.5112516053903775, + "learning_rate": 2.3820099872614987e-06, + "loss": 0.3039, + "step": 45280 + }, + { + "epoch": 0.7824336466685098, + "grad_norm": 0.9265757839481931, + "learning_rate": 2.381647450690211e-06, + "loss": 0.301, + "step": 45281 + }, + { + "epoch": 0.7824509261819187, + "grad_norm": 1.307130528428528, + "learning_rate": 2.381284937980186e-06, + "loss": 0.3969, + "step": 45282 + }, + { + "epoch": 0.7824682056953276, + "grad_norm": 1.046555895971295, + "learning_rate": 2.380922449132552e-06, + "loss": 0.3536, + "step": 45283 + }, + { + "epoch": 0.7824854852087365, + "grad_norm": 0.6959679103059991, + "learning_rate": 2.3805599841484483e-06, + "loss": 0.3535, + "step": 45284 + }, + { + "epoch": 0.7825027647221454, + "grad_norm": 2.1948906973370157, + "learning_rate": 2.3801975430290094e-06, + "loss": 0.3792, + "step": 45285 + }, + { + "epoch": 0.7825200442355543, + "grad_norm": 2.432386941151982, + "learning_rate": 2.379835125775374e-06, + "loss": 0.2807, + "step": 45286 + }, + { + "epoch": 0.7825373237489632, + "grad_norm": 1.2909320706999188, + "learning_rate": 2.37947273238867e-06, + "loss": 0.1678, + "step": 45287 + }, + { + "epoch": 0.7825546032623721, + "grad_norm": 1.4108237310074063, + "learning_rate": 2.379110362870041e-06, + "loss": 0.3899, + "step": 45288 + }, + { + "epoch": 0.782571882775781, + "grad_norm": 1.1130651908060452, + "learning_rate": 2.3787480172206133e-06, + "loss": 0.3543, + "step": 45289 + }, + { + "epoch": 0.78258916228919, + "grad_norm": 0.9420436114346433, + "learning_rate": 2.3783856954415276e-06, + "loss": 0.4032, + "step": 45290 + }, + { + "epoch": 0.7826064418025989, + "grad_norm": 1.232268268982822, + "learning_rate": 2.3780233975339196e-06, + "loss": 0.1824, + "step": 45291 + }, + { + "epoch": 0.7826237213160078, + "grad_norm": 0.6932796587409563, + "learning_rate": 2.377661123498918e-06, + "loss": 0.2789, + "step": 45292 + }, + { + "epoch": 0.7826410008294167, + "grad_norm": 1.6995345521654597, + "learning_rate": 2.3772988733376635e-06, + "loss": 0.2783, + "step": 45293 + }, + { + "epoch": 0.7826582803428256, + "grad_norm": 0.9687492380333147, + "learning_rate": 2.3769366470512867e-06, + "loss": 0.4375, + "step": 45294 + }, + { + "epoch": 0.7826755598562345, + "grad_norm": 1.05773877786482, + "learning_rate": 2.376574444640922e-06, + "loss": 0.4412, + "step": 45295 + }, + { + "epoch": 0.7826928393696434, + "grad_norm": 1.1983172517201635, + "learning_rate": 2.376212266107707e-06, + "loss": 0.2672, + "step": 45296 + }, + { + "epoch": 0.7827101188830523, + "grad_norm": 1.4194820522566154, + "learning_rate": 2.3758501114527776e-06, + "loss": 0.4404, + "step": 45297 + }, + { + "epoch": 0.7827273983964611, + "grad_norm": 1.2890578038339227, + "learning_rate": 2.3754879806772613e-06, + "loss": 0.242, + "step": 45298 + }, + { + "epoch": 0.78274467790987, + "grad_norm": 1.4090005440582045, + "learning_rate": 2.375125873782299e-06, + "loss": 0.509, + "step": 45299 + }, + { + "epoch": 0.7827619574232789, + "grad_norm": 1.1312068023108943, + "learning_rate": 2.374763790769019e-06, + "loss": 0.4479, + "step": 45300 + }, + { + "epoch": 0.7827792369366878, + "grad_norm": 1.7983345270954059, + "learning_rate": 2.374401731638559e-06, + "loss": 0.2851, + "step": 45301 + }, + { + "epoch": 0.7827965164500967, + "grad_norm": 1.0250421142855788, + "learning_rate": 2.3740396963920543e-06, + "loss": 0.3658, + "step": 45302 + }, + { + "epoch": 0.7828137959635056, + "grad_norm": 1.1154364458710442, + "learning_rate": 2.3736776850306343e-06, + "loss": 0.3275, + "step": 45303 + }, + { + "epoch": 0.7828310754769146, + "grad_norm": 1.04664057970365, + "learning_rate": 2.3733156975554383e-06, + "loss": 0.2447, + "step": 45304 + }, + { + "epoch": 0.7828483549903235, + "grad_norm": 1.6442564200491556, + "learning_rate": 2.3729537339675947e-06, + "loss": 0.2319, + "step": 45305 + }, + { + "epoch": 0.7828656345037324, + "grad_norm": 1.4076761496051506, + "learning_rate": 2.3725917942682397e-06, + "loss": 0.306, + "step": 45306 + }, + { + "epoch": 0.7828829140171413, + "grad_norm": 1.2280397250457937, + "learning_rate": 2.3722298784585062e-06, + "loss": 0.2637, + "step": 45307 + }, + { + "epoch": 0.7829001935305502, + "grad_norm": 1.432134151986097, + "learning_rate": 2.371867986539531e-06, + "loss": 0.4384, + "step": 45308 + }, + { + "epoch": 0.7829174730439591, + "grad_norm": 1.3215802914730037, + "learning_rate": 2.3715061185124457e-06, + "loss": 0.3289, + "step": 45309 + }, + { + "epoch": 0.782934752557368, + "grad_norm": 1.1983085511647449, + "learning_rate": 2.3711442743783797e-06, + "loss": 0.3589, + "step": 45310 + }, + { + "epoch": 0.7829520320707769, + "grad_norm": 1.1971895111666153, + "learning_rate": 2.3707824541384695e-06, + "loss": 0.2608, + "step": 45311 + }, + { + "epoch": 0.7829693115841858, + "grad_norm": 1.3038170729886303, + "learning_rate": 2.3704206577938495e-06, + "loss": 0.2832, + "step": 45312 + }, + { + "epoch": 0.7829865910975947, + "grad_norm": 1.5540745924690489, + "learning_rate": 2.370058885345654e-06, + "loss": 0.4408, + "step": 45313 + }, + { + "epoch": 0.7830038706110036, + "grad_norm": 1.0816732638712752, + "learning_rate": 2.3696971367950104e-06, + "loss": 0.3541, + "step": 45314 + }, + { + "epoch": 0.7830211501244125, + "grad_norm": 1.0009324783914977, + "learning_rate": 2.369335412143059e-06, + "loss": 0.2848, + "step": 45315 + }, + { + "epoch": 0.7830384296378214, + "grad_norm": 1.7896425266564684, + "learning_rate": 2.368973711390926e-06, + "loss": 0.1599, + "step": 45316 + }, + { + "epoch": 0.7830557091512304, + "grad_norm": 1.8792796187965215, + "learning_rate": 2.3686120345397467e-06, + "loss": 0.1824, + "step": 45317 + }, + { + "epoch": 0.7830729886646393, + "grad_norm": 1.3196729778363505, + "learning_rate": 2.3682503815906553e-06, + "loss": 0.3827, + "step": 45318 + }, + { + "epoch": 0.783090268178048, + "grad_norm": 1.2232052398462876, + "learning_rate": 2.367888752544786e-06, + "loss": 0.31, + "step": 45319 + }, + { + "epoch": 0.783107547691457, + "grad_norm": 0.7062243364171837, + "learning_rate": 2.367527147403269e-06, + "loss": 0.2369, + "step": 45320 + }, + { + "epoch": 0.7831248272048659, + "grad_norm": 1.7489536932835372, + "learning_rate": 2.367165566167234e-06, + "loss": 0.2262, + "step": 45321 + }, + { + "epoch": 0.7831421067182748, + "grad_norm": 1.1724919113772088, + "learning_rate": 2.3668040088378207e-06, + "loss": 0.3928, + "step": 45322 + }, + { + "epoch": 0.7831593862316837, + "grad_norm": 0.9454939311444834, + "learning_rate": 2.366442475416153e-06, + "loss": 0.2697, + "step": 45323 + }, + { + "epoch": 0.7831766657450926, + "grad_norm": 1.179527404061045, + "learning_rate": 2.366080965903369e-06, + "loss": 0.282, + "step": 45324 + }, + { + "epoch": 0.7831939452585015, + "grad_norm": 0.924219715955549, + "learning_rate": 2.365719480300598e-06, + "loss": 0.4703, + "step": 45325 + }, + { + "epoch": 0.7832112247719104, + "grad_norm": 0.8935632598356666, + "learning_rate": 2.3653580186089775e-06, + "loss": 0.4551, + "step": 45326 + }, + { + "epoch": 0.7832285042853193, + "grad_norm": 1.7169293951907914, + "learning_rate": 2.364996580829633e-06, + "loss": 0.4209, + "step": 45327 + }, + { + "epoch": 0.7832457837987282, + "grad_norm": 0.9879632885231086, + "learning_rate": 2.3646351669637025e-06, + "loss": 0.5309, + "step": 45328 + }, + { + "epoch": 0.7832630633121371, + "grad_norm": 1.949909504965101, + "learning_rate": 2.3642737770123112e-06, + "loss": 0.2382, + "step": 45329 + }, + { + "epoch": 0.783280342825546, + "grad_norm": 1.8080299063922731, + "learning_rate": 2.3639124109765954e-06, + "loss": 0.3049, + "step": 45330 + }, + { + "epoch": 0.783297622338955, + "grad_norm": 1.2925461368256042, + "learning_rate": 2.363551068857689e-06, + "loss": 0.3807, + "step": 45331 + }, + { + "epoch": 0.7833149018523639, + "grad_norm": 0.9499082690354245, + "learning_rate": 2.363189750656719e-06, + "loss": 0.2775, + "step": 45332 + }, + { + "epoch": 0.7833321813657728, + "grad_norm": 1.426787544652916, + "learning_rate": 2.3628284563748205e-06, + "loss": 0.4832, + "step": 45333 + }, + { + "epoch": 0.7833494608791817, + "grad_norm": 0.6366575053315202, + "learning_rate": 2.3624671860131222e-06, + "loss": 0.4633, + "step": 45334 + }, + { + "epoch": 0.7833667403925906, + "grad_norm": 1.3304764878768085, + "learning_rate": 2.3621059395727563e-06, + "loss": 0.2169, + "step": 45335 + }, + { + "epoch": 0.7833840199059995, + "grad_norm": 1.3968545285233438, + "learning_rate": 2.361744717054855e-06, + "loss": 0.4145, + "step": 45336 + }, + { + "epoch": 0.7834012994194084, + "grad_norm": 0.7887170711704297, + "learning_rate": 2.3613835184605527e-06, + "loss": 0.7523, + "step": 45337 + }, + { + "epoch": 0.7834185789328173, + "grad_norm": 1.847632602832255, + "learning_rate": 2.3610223437909752e-06, + "loss": 0.2334, + "step": 45338 + }, + { + "epoch": 0.7834358584462262, + "grad_norm": 1.4024189115681838, + "learning_rate": 2.360661193047258e-06, + "loss": 0.6719, + "step": 45339 + }, + { + "epoch": 0.783453137959635, + "grad_norm": 1.453993682996266, + "learning_rate": 2.3603000662305286e-06, + "loss": 0.3822, + "step": 45340 + }, + { + "epoch": 0.7834704174730439, + "grad_norm": 1.9177923195012743, + "learning_rate": 2.35993896334192e-06, + "loss": 0.2275, + "step": 45341 + }, + { + "epoch": 0.7834876969864528, + "grad_norm": 1.542011781734415, + "learning_rate": 2.3595778843825655e-06, + "loss": 0.503, + "step": 45342 + }, + { + "epoch": 0.7835049764998617, + "grad_norm": 1.1202718672026555, + "learning_rate": 2.359216829353591e-06, + "loss": 0.2894, + "step": 45343 + }, + { + "epoch": 0.7835222560132706, + "grad_norm": 1.1998350932310393, + "learning_rate": 2.3588557982561334e-06, + "loss": 0.6252, + "step": 45344 + }, + { + "epoch": 0.7835395355266795, + "grad_norm": 1.4867781857376006, + "learning_rate": 2.358494791091317e-06, + "loss": 0.4541, + "step": 45345 + }, + { + "epoch": 0.7835568150400885, + "grad_norm": 1.1159624325380215, + "learning_rate": 2.3581338078602757e-06, + "loss": 0.369, + "step": 45346 + }, + { + "epoch": 0.7835740945534974, + "grad_norm": 1.1944199517770744, + "learning_rate": 2.3577728485641403e-06, + "loss": 0.3632, + "step": 45347 + }, + { + "epoch": 0.7835913740669063, + "grad_norm": 1.213538578904361, + "learning_rate": 2.357411913204044e-06, + "loss": 0.4156, + "step": 45348 + }, + { + "epoch": 0.7836086535803152, + "grad_norm": 1.2515723872420261, + "learning_rate": 2.357051001781111e-06, + "loss": 0.3131, + "step": 45349 + }, + { + "epoch": 0.7836259330937241, + "grad_norm": 1.0340012083722503, + "learning_rate": 2.3566901142964782e-06, + "loss": 0.3136, + "step": 45350 + }, + { + "epoch": 0.783643212607133, + "grad_norm": 0.9393001144203773, + "learning_rate": 2.35632925075127e-06, + "loss": 0.1679, + "step": 45351 + }, + { + "epoch": 0.7836604921205419, + "grad_norm": 1.7350167446948288, + "learning_rate": 2.3559684111466195e-06, + "loss": 0.2207, + "step": 45352 + }, + { + "epoch": 0.7836777716339508, + "grad_norm": 1.1364812540177962, + "learning_rate": 2.35560759548366e-06, + "loss": 0.3878, + "step": 45353 + }, + { + "epoch": 0.7836950511473597, + "grad_norm": 1.1387811366271017, + "learning_rate": 2.3552468037635155e-06, + "loss": 0.4419, + "step": 45354 + }, + { + "epoch": 0.7837123306607686, + "grad_norm": 0.7862203043512744, + "learning_rate": 2.3548860359873215e-06, + "loss": 0.3249, + "step": 45355 + }, + { + "epoch": 0.7837296101741775, + "grad_norm": 1.21472227661954, + "learning_rate": 2.3545252921562034e-06, + "loss": 0.2598, + "step": 45356 + }, + { + "epoch": 0.7837468896875864, + "grad_norm": 1.390086990870171, + "learning_rate": 2.3541645722712924e-06, + "loss": 0.4524, + "step": 45357 + }, + { + "epoch": 0.7837641692009953, + "grad_norm": 2.4792614416237115, + "learning_rate": 2.353803876333719e-06, + "loss": 0.3296, + "step": 45358 + }, + { + "epoch": 0.7837814487144042, + "grad_norm": 1.4731981041851085, + "learning_rate": 2.3534432043446156e-06, + "loss": 0.313, + "step": 45359 + }, + { + "epoch": 0.7837987282278132, + "grad_norm": 1.7793352234195057, + "learning_rate": 2.3530825563051095e-06, + "loss": 0.4625, + "step": 45360 + }, + { + "epoch": 0.783816007741222, + "grad_norm": 2.6800151275680166, + "learning_rate": 2.3527219322163274e-06, + "loss": 0.4528, + "step": 45361 + }, + { + "epoch": 0.7838332872546309, + "grad_norm": 1.4141857964243014, + "learning_rate": 2.3523613320794035e-06, + "loss": 0.3765, + "step": 45362 + }, + { + "epoch": 0.7838505667680398, + "grad_norm": 1.5433007886756234, + "learning_rate": 2.3520007558954637e-06, + "loss": 0.3911, + "step": 45363 + }, + { + "epoch": 0.7838678462814487, + "grad_norm": 0.8717587904707851, + "learning_rate": 2.3516402036656373e-06, + "loss": 0.4754, + "step": 45364 + }, + { + "epoch": 0.7838851257948576, + "grad_norm": 1.8641825804965004, + "learning_rate": 2.351279675391056e-06, + "loss": 0.2788, + "step": 45365 + }, + { + "epoch": 0.7839024053082665, + "grad_norm": 1.0256631275465395, + "learning_rate": 2.3509191710728508e-06, + "loss": 0.4233, + "step": 45366 + }, + { + "epoch": 0.7839196848216754, + "grad_norm": 1.9633667511641233, + "learning_rate": 2.3505586907121445e-06, + "loss": 0.3084, + "step": 45367 + }, + { + "epoch": 0.7839369643350843, + "grad_norm": 1.2269351905753223, + "learning_rate": 2.3501982343100726e-06, + "loss": 0.3886, + "step": 45368 + }, + { + "epoch": 0.7839542438484932, + "grad_norm": 0.6782725129005868, + "learning_rate": 2.349837801867759e-06, + "loss": 0.5256, + "step": 45369 + }, + { + "epoch": 0.7839715233619021, + "grad_norm": 1.404782567052029, + "learning_rate": 2.3494773933863344e-06, + "loss": 0.2843, + "step": 45370 + }, + { + "epoch": 0.783988802875311, + "grad_norm": 0.8780977308717204, + "learning_rate": 2.3491170088669314e-06, + "loss": 0.3383, + "step": 45371 + }, + { + "epoch": 0.7840060823887199, + "grad_norm": 0.9789045156979541, + "learning_rate": 2.3487566483106715e-06, + "loss": 0.3769, + "step": 45372 + }, + { + "epoch": 0.7840233619021288, + "grad_norm": 0.5608345314683352, + "learning_rate": 2.34839631171869e-06, + "loss": 0.6033, + "step": 45373 + }, + { + "epoch": 0.7840406414155378, + "grad_norm": 1.0402878011583565, + "learning_rate": 2.348035999092111e-06, + "loss": 0.2886, + "step": 45374 + }, + { + "epoch": 0.7840579209289467, + "grad_norm": 1.1242639549116449, + "learning_rate": 2.3476757104320645e-06, + "loss": 0.1372, + "step": 45375 + }, + { + "epoch": 0.7840752004423556, + "grad_norm": 1.1142138874346923, + "learning_rate": 2.3473154457396795e-06, + "loss": 0.5039, + "step": 45376 + }, + { + "epoch": 0.7840924799557645, + "grad_norm": 1.7412334436194588, + "learning_rate": 2.346955205016086e-06, + "loss": 0.2354, + "step": 45377 + }, + { + "epoch": 0.7841097594691734, + "grad_norm": 1.457540109077017, + "learning_rate": 2.3465949882624084e-06, + "loss": 0.2138, + "step": 45378 + }, + { + "epoch": 0.7841270389825823, + "grad_norm": 1.1169015499925539, + "learning_rate": 2.346234795479779e-06, + "loss": 0.4021, + "step": 45379 + }, + { + "epoch": 0.7841443184959912, + "grad_norm": 1.0594087182295489, + "learning_rate": 2.3458746266693224e-06, + "loss": 0.3395, + "step": 45380 + }, + { + "epoch": 0.7841615980094001, + "grad_norm": 1.4605227146625404, + "learning_rate": 2.3455144818321664e-06, + "loss": 0.4039, + "step": 45381 + }, + { + "epoch": 0.7841788775228089, + "grad_norm": 1.254889128320251, + "learning_rate": 2.345154360969445e-06, + "loss": 0.6633, + "step": 45382 + }, + { + "epoch": 0.7841961570362178, + "grad_norm": 1.0962472679882156, + "learning_rate": 2.3447942640822786e-06, + "loss": 0.366, + "step": 45383 + }, + { + "epoch": 0.7842134365496267, + "grad_norm": 1.3428762152216818, + "learning_rate": 2.3444341911718003e-06, + "loss": 0.4729, + "step": 45384 + }, + { + "epoch": 0.7842307160630356, + "grad_norm": 1.2415514578629967, + "learning_rate": 2.3440741422391345e-06, + "loss": 0.3129, + "step": 45385 + }, + { + "epoch": 0.7842479955764445, + "grad_norm": 2.1106738099391915, + "learning_rate": 2.3437141172854094e-06, + "loss": 0.2189, + "step": 45386 + }, + { + "epoch": 0.7842652750898534, + "grad_norm": 1.8969754869020492, + "learning_rate": 2.3433541163117533e-06, + "loss": 0.3714, + "step": 45387 + }, + { + "epoch": 0.7842825546032623, + "grad_norm": 1.20799769943085, + "learning_rate": 2.342994139319298e-06, + "loss": 0.1698, + "step": 45388 + }, + { + "epoch": 0.7842998341166713, + "grad_norm": 1.0036367247945672, + "learning_rate": 2.342634186309164e-06, + "loss": 0.4462, + "step": 45389 + }, + { + "epoch": 0.7843171136300802, + "grad_norm": 1.3366099514864607, + "learning_rate": 2.3422742572824843e-06, + "loss": 0.3308, + "step": 45390 + }, + { + "epoch": 0.7843343931434891, + "grad_norm": 1.1088263706038666, + "learning_rate": 2.34191435224038e-06, + "loss": 0.5136, + "step": 45391 + }, + { + "epoch": 0.784351672656898, + "grad_norm": 1.087689419642958, + "learning_rate": 2.341554471183982e-06, + "loss": 0.2639, + "step": 45392 + }, + { + "epoch": 0.7843689521703069, + "grad_norm": 1.1302095715386524, + "learning_rate": 2.3411946141144205e-06, + "loss": 0.459, + "step": 45393 + }, + { + "epoch": 0.7843862316837158, + "grad_norm": 1.3924774406210003, + "learning_rate": 2.340834781032817e-06, + "loss": 0.2118, + "step": 45394 + }, + { + "epoch": 0.7844035111971247, + "grad_norm": 1.001125693790177, + "learning_rate": 2.3404749719403042e-06, + "loss": 0.2877, + "step": 45395 + }, + { + "epoch": 0.7844207907105336, + "grad_norm": 1.1356065361077696, + "learning_rate": 2.3401151868380024e-06, + "loss": 0.5101, + "step": 45396 + }, + { + "epoch": 0.7844380702239425, + "grad_norm": 1.3300361713582902, + "learning_rate": 2.3397554257270414e-06, + "loss": 0.2754, + "step": 45397 + }, + { + "epoch": 0.7844553497373514, + "grad_norm": 1.0059407556876936, + "learning_rate": 2.33939568860855e-06, + "loss": 0.3025, + "step": 45398 + }, + { + "epoch": 0.7844726292507603, + "grad_norm": 1.5234601348746046, + "learning_rate": 2.3390359754836555e-06, + "loss": 0.407, + "step": 45399 + }, + { + "epoch": 0.7844899087641692, + "grad_norm": 1.664397680557758, + "learning_rate": 2.3386762863534795e-06, + "loss": 0.2876, + "step": 45400 + }, + { + "epoch": 0.7845071882775781, + "grad_norm": 1.3010402771843943, + "learning_rate": 2.3383166212191555e-06, + "loss": 0.2953, + "step": 45401 + }, + { + "epoch": 0.7845244677909871, + "grad_norm": 1.1170472750971339, + "learning_rate": 2.337956980081805e-06, + "loss": 0.3095, + "step": 45402 + }, + { + "epoch": 0.7845417473043959, + "grad_norm": 1.1151526343536655, + "learning_rate": 2.337597362942553e-06, + "loss": 0.4681, + "step": 45403 + }, + { + "epoch": 0.7845590268178048, + "grad_norm": 1.0244306994520136, + "learning_rate": 2.3372377698025285e-06, + "loss": 0.4762, + "step": 45404 + }, + { + "epoch": 0.7845763063312137, + "grad_norm": 2.459307536768308, + "learning_rate": 2.3368782006628576e-06, + "loss": 0.4527, + "step": 45405 + }, + { + "epoch": 0.7845935858446226, + "grad_norm": 1.35771728049667, + "learning_rate": 2.3365186555246687e-06, + "loss": 0.2664, + "step": 45406 + }, + { + "epoch": 0.7846108653580315, + "grad_norm": 1.420793682500057, + "learning_rate": 2.3361591343890845e-06, + "loss": 0.4519, + "step": 45407 + }, + { + "epoch": 0.7846281448714404, + "grad_norm": 1.2732507742818624, + "learning_rate": 2.3357996372572346e-06, + "loss": 0.3178, + "step": 45408 + }, + { + "epoch": 0.7846454243848493, + "grad_norm": 1.8728892047219061, + "learning_rate": 2.3354401641302395e-06, + "loss": 0.3899, + "step": 45409 + }, + { + "epoch": 0.7846627038982582, + "grad_norm": 0.851960111983972, + "learning_rate": 2.335080715009228e-06, + "loss": 0.2393, + "step": 45410 + }, + { + "epoch": 0.7846799834116671, + "grad_norm": 1.0862714307578345, + "learning_rate": 2.3347212898953264e-06, + "loss": 0.3542, + "step": 45411 + }, + { + "epoch": 0.784697262925076, + "grad_norm": 2.0754790580443667, + "learning_rate": 2.3343618887896637e-06, + "loss": 0.2866, + "step": 45412 + }, + { + "epoch": 0.7847145424384849, + "grad_norm": 0.9497966125300027, + "learning_rate": 2.3340025116933616e-06, + "loss": 0.2524, + "step": 45413 + }, + { + "epoch": 0.7847318219518938, + "grad_norm": 1.5128239045332539, + "learning_rate": 2.3336431586075437e-06, + "loss": 0.237, + "step": 45414 + }, + { + "epoch": 0.7847491014653027, + "grad_norm": 1.4920363753398203, + "learning_rate": 2.333283829533337e-06, + "loss": 0.2675, + "step": 45415 + }, + { + "epoch": 0.7847663809787117, + "grad_norm": 0.8221333854026941, + "learning_rate": 2.332924524471869e-06, + "loss": 0.5808, + "step": 45416 + }, + { + "epoch": 0.7847836604921206, + "grad_norm": 0.9022116838054913, + "learning_rate": 2.3325652434242662e-06, + "loss": 0.3151, + "step": 45417 + }, + { + "epoch": 0.7848009400055295, + "grad_norm": 1.142234393313678, + "learning_rate": 2.33220598639165e-06, + "loss": 0.2763, + "step": 45418 + }, + { + "epoch": 0.7848182195189384, + "grad_norm": 1.579882843646277, + "learning_rate": 2.33184675337515e-06, + "loss": 0.3469, + "step": 45419 + }, + { + "epoch": 0.7848354990323473, + "grad_norm": 0.9928456266265193, + "learning_rate": 2.3314875443758854e-06, + "loss": 0.3289, + "step": 45420 + }, + { + "epoch": 0.7848527785457562, + "grad_norm": 1.2670267345503243, + "learning_rate": 2.3311283593949842e-06, + "loss": 0.4166, + "step": 45421 + }, + { + "epoch": 0.7848700580591651, + "grad_norm": 1.654529843691019, + "learning_rate": 2.3307691984335755e-06, + "loss": 0.5463, + "step": 45422 + }, + { + "epoch": 0.784887337572574, + "grad_norm": 1.142725523345011, + "learning_rate": 2.3304100614927772e-06, + "loss": 0.381, + "step": 45423 + }, + { + "epoch": 0.7849046170859829, + "grad_norm": 0.681435347026309, + "learning_rate": 2.330050948573721e-06, + "loss": 0.9169, + "step": 45424 + }, + { + "epoch": 0.7849218965993917, + "grad_norm": 1.3448486975100584, + "learning_rate": 2.3296918596775244e-06, + "loss": 0.5236, + "step": 45425 + }, + { + "epoch": 0.7849391761128006, + "grad_norm": 1.5723720596485975, + "learning_rate": 2.3293327948053158e-06, + "loss": 0.4898, + "step": 45426 + }, + { + "epoch": 0.7849564556262095, + "grad_norm": 2.2568964533177933, + "learning_rate": 2.32897375395822e-06, + "loss": 0.2931, + "step": 45427 + }, + { + "epoch": 0.7849737351396184, + "grad_norm": 1.7688498716849081, + "learning_rate": 2.3286147371373646e-06, + "loss": 0.3231, + "step": 45428 + }, + { + "epoch": 0.7849910146530273, + "grad_norm": 1.499941199093606, + "learning_rate": 2.328255744343868e-06, + "loss": 0.2619, + "step": 45429 + }, + { + "epoch": 0.7850082941664362, + "grad_norm": 1.0387826296204519, + "learning_rate": 2.3278967755788607e-06, + "loss": 0.181, + "step": 45430 + }, + { + "epoch": 0.7850255736798452, + "grad_norm": 1.2436786503202462, + "learning_rate": 2.3275378308434605e-06, + "loss": 0.3699, + "step": 45431 + }, + { + "epoch": 0.7850428531932541, + "grad_norm": 0.949818970223234, + "learning_rate": 2.3271789101387943e-06, + "loss": 0.2145, + "step": 45432 + }, + { + "epoch": 0.785060132706663, + "grad_norm": 1.4858511260178908, + "learning_rate": 2.32682001346599e-06, + "loss": 0.3535, + "step": 45433 + }, + { + "epoch": 0.7850774122200719, + "grad_norm": 1.224283948279401, + "learning_rate": 2.326461140826166e-06, + "loss": 0.3777, + "step": 45434 + }, + { + "epoch": 0.7850946917334808, + "grad_norm": 1.135059597751399, + "learning_rate": 2.3261022922204512e-06, + "loss": 0.3195, + "step": 45435 + }, + { + "epoch": 0.7851119712468897, + "grad_norm": 1.6507163631944914, + "learning_rate": 2.3257434676499646e-06, + "loss": 0.3784, + "step": 45436 + }, + { + "epoch": 0.7851292507602986, + "grad_norm": 1.9618111835318452, + "learning_rate": 2.3253846671158353e-06, + "loss": 0.2427, + "step": 45437 + }, + { + "epoch": 0.7851465302737075, + "grad_norm": 1.5699121324437917, + "learning_rate": 2.3250258906191793e-06, + "loss": 0.2492, + "step": 45438 + }, + { + "epoch": 0.7851638097871164, + "grad_norm": 2.1879844058450963, + "learning_rate": 2.324667138161132e-06, + "loss": 0.3255, + "step": 45439 + }, + { + "epoch": 0.7851810893005253, + "grad_norm": 1.4396050167241508, + "learning_rate": 2.324308409742806e-06, + "loss": 0.2688, + "step": 45440 + }, + { + "epoch": 0.7851983688139342, + "grad_norm": 1.778386786030388, + "learning_rate": 2.3239497053653326e-06, + "loss": 0.2744, + "step": 45441 + }, + { + "epoch": 0.7852156483273431, + "grad_norm": 1.1941520855699013, + "learning_rate": 2.32359102502983e-06, + "loss": 0.4385, + "step": 45442 + }, + { + "epoch": 0.785232927840752, + "grad_norm": 1.1033459201534803, + "learning_rate": 2.3232323687374258e-06, + "loss": 0.2237, + "step": 45443 + }, + { + "epoch": 0.785250207354161, + "grad_norm": 1.4286391846051112, + "learning_rate": 2.3228737364892397e-06, + "loss": 0.3919, + "step": 45444 + }, + { + "epoch": 0.7852674868675699, + "grad_norm": 1.2400444166576328, + "learning_rate": 2.3225151282863946e-06, + "loss": 0.3958, + "step": 45445 + }, + { + "epoch": 0.7852847663809787, + "grad_norm": 1.1841786690998861, + "learning_rate": 2.32215654413002e-06, + "loss": 0.418, + "step": 45446 + }, + { + "epoch": 0.7853020458943876, + "grad_norm": 2.359242694930734, + "learning_rate": 2.3217979840212303e-06, + "loss": 0.3638, + "step": 45447 + }, + { + "epoch": 0.7853193254077965, + "grad_norm": 1.1058367497019965, + "learning_rate": 2.3214394479611566e-06, + "loss": 0.4566, + "step": 45448 + }, + { + "epoch": 0.7853366049212054, + "grad_norm": 1.3264055807276698, + "learning_rate": 2.3210809359509157e-06, + "loss": 0.3403, + "step": 45449 + }, + { + "epoch": 0.7853538844346143, + "grad_norm": 2.0642744424327684, + "learning_rate": 2.3207224479916324e-06, + "loss": 0.3975, + "step": 45450 + }, + { + "epoch": 0.7853711639480232, + "grad_norm": 1.5951506676728178, + "learning_rate": 2.3203639840844293e-06, + "loss": 0.3732, + "step": 45451 + }, + { + "epoch": 0.7853884434614321, + "grad_norm": 1.485563545092278, + "learning_rate": 2.3200055442304336e-06, + "loss": 0.3783, + "step": 45452 + }, + { + "epoch": 0.785405722974841, + "grad_norm": 1.159279695999311, + "learning_rate": 2.3196471284307633e-06, + "loss": 0.4652, + "step": 45453 + }, + { + "epoch": 0.7854230024882499, + "grad_norm": 1.1585014421810862, + "learning_rate": 2.3192887366865403e-06, + "loss": 0.2957, + "step": 45454 + }, + { + "epoch": 0.7854402820016588, + "grad_norm": 0.8770957211952213, + "learning_rate": 2.3189303689988873e-06, + "loss": 0.3637, + "step": 45455 + }, + { + "epoch": 0.7854575615150677, + "grad_norm": 1.0210807283212082, + "learning_rate": 2.318572025368929e-06, + "loss": 0.3968, + "step": 45456 + }, + { + "epoch": 0.7854748410284766, + "grad_norm": 0.5507757954524295, + "learning_rate": 2.3182137057977896e-06, + "loss": 0.7758, + "step": 45457 + }, + { + "epoch": 0.7854921205418856, + "grad_norm": 0.47384551750973253, + "learning_rate": 2.3178554102865857e-06, + "loss": 0.5267, + "step": 45458 + }, + { + "epoch": 0.7855094000552945, + "grad_norm": 1.6827894216891852, + "learning_rate": 2.3174971388364453e-06, + "loss": 0.2817, + "step": 45459 + }, + { + "epoch": 0.7855266795687034, + "grad_norm": 0.991004405202142, + "learning_rate": 2.317138891448485e-06, + "loss": 0.2935, + "step": 45460 + }, + { + "epoch": 0.7855439590821123, + "grad_norm": 1.929342182027825, + "learning_rate": 2.316780668123829e-06, + "loss": 0.2664, + "step": 45461 + }, + { + "epoch": 0.7855612385955212, + "grad_norm": 1.139443103615459, + "learning_rate": 2.3164224688636006e-06, + "loss": 0.4687, + "step": 45462 + }, + { + "epoch": 0.7855785181089301, + "grad_norm": 1.395294490253532, + "learning_rate": 2.3160642936689236e-06, + "loss": 0.3852, + "step": 45463 + }, + { + "epoch": 0.785595797622339, + "grad_norm": 0.9228879255775014, + "learning_rate": 2.315706142540918e-06, + "loss": 0.297, + "step": 45464 + }, + { + "epoch": 0.7856130771357479, + "grad_norm": 1.2466517838980007, + "learning_rate": 2.315348015480701e-06, + "loss": 0.2579, + "step": 45465 + }, + { + "epoch": 0.7856303566491568, + "grad_norm": 1.095923159460455, + "learning_rate": 2.3149899124893984e-06, + "loss": 0.3447, + "step": 45466 + }, + { + "epoch": 0.7856476361625656, + "grad_norm": 1.5123400043915385, + "learning_rate": 2.3146318335681304e-06, + "loss": 0.4802, + "step": 45467 + }, + { + "epoch": 0.7856649156759745, + "grad_norm": 0.8714610514780774, + "learning_rate": 2.3142737787180235e-06, + "loss": 0.6257, + "step": 45468 + }, + { + "epoch": 0.7856821951893834, + "grad_norm": 0.9090373765847648, + "learning_rate": 2.313915747940192e-06, + "loss": 0.5969, + "step": 45469 + }, + { + "epoch": 0.7856994747027923, + "grad_norm": 1.2044796008851284, + "learning_rate": 2.3135577412357637e-06, + "loss": 0.2819, + "step": 45470 + }, + { + "epoch": 0.7857167542162012, + "grad_norm": 0.9995705141434686, + "learning_rate": 2.313199758605853e-06, + "loss": 0.2947, + "step": 45471 + }, + { + "epoch": 0.7857340337296101, + "grad_norm": 1.4177493211736127, + "learning_rate": 2.3128418000515853e-06, + "loss": 0.2605, + "step": 45472 + }, + { + "epoch": 0.7857513132430191, + "grad_norm": 2.09244048048591, + "learning_rate": 2.312483865574081e-06, + "loss": 0.3973, + "step": 45473 + }, + { + "epoch": 0.785768592756428, + "grad_norm": 1.9162767240884522, + "learning_rate": 2.3121259551744647e-06, + "loss": 0.4014, + "step": 45474 + }, + { + "epoch": 0.7857858722698369, + "grad_norm": 1.3056545148898373, + "learning_rate": 2.3117680688538548e-06, + "loss": 0.2978, + "step": 45475 + }, + { + "epoch": 0.7858031517832458, + "grad_norm": 1.04106885489315, + "learning_rate": 2.311410206613367e-06, + "loss": 0.446, + "step": 45476 + }, + { + "epoch": 0.7858204312966547, + "grad_norm": 1.4240613251653904, + "learning_rate": 2.311052368454131e-06, + "loss": 0.3093, + "step": 45477 + }, + { + "epoch": 0.7858377108100636, + "grad_norm": 1.3722446187018666, + "learning_rate": 2.3106945543772576e-06, + "loss": 0.4086, + "step": 45478 + }, + { + "epoch": 0.7858549903234725, + "grad_norm": 1.537298329979991, + "learning_rate": 2.310336764383878e-06, + "loss": 0.2863, + "step": 45479 + }, + { + "epoch": 0.7858722698368814, + "grad_norm": 1.1427683609479382, + "learning_rate": 2.3099789984751065e-06, + "loss": 0.7292, + "step": 45480 + }, + { + "epoch": 0.7858895493502903, + "grad_norm": 1.3752330344311479, + "learning_rate": 2.309621256652067e-06, + "loss": 0.3805, + "step": 45481 + }, + { + "epoch": 0.7859068288636992, + "grad_norm": 1.0094118434655486, + "learning_rate": 2.3092635389158757e-06, + "loss": 0.2912, + "step": 45482 + }, + { + "epoch": 0.7859241083771081, + "grad_norm": 0.8402612607696991, + "learning_rate": 2.308905845267658e-06, + "loss": 0.2479, + "step": 45483 + }, + { + "epoch": 0.785941387890517, + "grad_norm": 1.0968173194176165, + "learning_rate": 2.3085481757085295e-06, + "loss": 0.4201, + "step": 45484 + }, + { + "epoch": 0.785958667403926, + "grad_norm": 1.296574062932466, + "learning_rate": 2.3081905302396124e-06, + "loss": 0.3879, + "step": 45485 + }, + { + "epoch": 0.7859759469173349, + "grad_norm": 1.551088560183716, + "learning_rate": 2.3078329088620298e-06, + "loss": 0.3453, + "step": 45486 + }, + { + "epoch": 0.7859932264307438, + "grad_norm": 1.34534417963723, + "learning_rate": 2.3074753115768953e-06, + "loss": 0.3632, + "step": 45487 + }, + { + "epoch": 0.7860105059441526, + "grad_norm": 1.699357641518364, + "learning_rate": 2.307117738385337e-06, + "loss": 0.2893, + "step": 45488 + }, + { + "epoch": 0.7860277854575615, + "grad_norm": 1.4781465203824615, + "learning_rate": 2.3067601892884672e-06, + "loss": 0.2539, + "step": 45489 + }, + { + "epoch": 0.7860450649709704, + "grad_norm": 0.8127359636842356, + "learning_rate": 2.3064026642874093e-06, + "loss": 0.4882, + "step": 45490 + }, + { + "epoch": 0.7860623444843793, + "grad_norm": 1.0277297242563985, + "learning_rate": 2.3060451633832837e-06, + "loss": 0.2907, + "step": 45491 + }, + { + "epoch": 0.7860796239977882, + "grad_norm": 1.2911456515684594, + "learning_rate": 2.3056876865772117e-06, + "loss": 0.2312, + "step": 45492 + }, + { + "epoch": 0.7860969035111971, + "grad_norm": 0.9643579794012086, + "learning_rate": 2.3053302338703065e-06, + "loss": 0.3407, + "step": 45493 + }, + { + "epoch": 0.786114183024606, + "grad_norm": 1.8181054513461867, + "learning_rate": 2.3049728052636956e-06, + "loss": 0.3094, + "step": 45494 + }, + { + "epoch": 0.7861314625380149, + "grad_norm": 1.7671429354403423, + "learning_rate": 2.304615400758492e-06, + "loss": 0.456, + "step": 45495 + }, + { + "epoch": 0.7861487420514238, + "grad_norm": 1.1674699527734704, + "learning_rate": 2.3042580203558172e-06, + "loss": 0.4332, + "step": 45496 + }, + { + "epoch": 0.7861660215648327, + "grad_norm": 1.5940061101464003, + "learning_rate": 2.303900664056795e-06, + "loss": 0.439, + "step": 45497 + }, + { + "epoch": 0.7861833010782416, + "grad_norm": 0.9913895048044115, + "learning_rate": 2.3035433318625377e-06, + "loss": 0.4034, + "step": 45498 + }, + { + "epoch": 0.7862005805916505, + "grad_norm": 1.2700404917996686, + "learning_rate": 2.30318602377417e-06, + "loss": 0.2756, + "step": 45499 + }, + { + "epoch": 0.7862178601050595, + "grad_norm": 1.4570705832506397, + "learning_rate": 2.3028287397928063e-06, + "loss": 0.4445, + "step": 45500 + }, + { + "epoch": 0.7862351396184684, + "grad_norm": 1.0382091358947767, + "learning_rate": 2.302471479919567e-06, + "loss": 0.359, + "step": 45501 + }, + { + "epoch": 0.7862524191318773, + "grad_norm": 0.9167369319936528, + "learning_rate": 2.3021142441555724e-06, + "loss": 0.2551, + "step": 45502 + }, + { + "epoch": 0.7862696986452862, + "grad_norm": 1.6717328275319217, + "learning_rate": 2.3017570325019446e-06, + "loss": 0.3313, + "step": 45503 + }, + { + "epoch": 0.7862869781586951, + "grad_norm": 1.2715652914520605, + "learning_rate": 2.3013998449597976e-06, + "loss": 0.3283, + "step": 45504 + }, + { + "epoch": 0.786304257672104, + "grad_norm": 1.3135091290332979, + "learning_rate": 2.3010426815302488e-06, + "loss": 0.2534, + "step": 45505 + }, + { + "epoch": 0.7863215371855129, + "grad_norm": 1.0231344541520235, + "learning_rate": 2.30068554221442e-06, + "loss": 0.3571, + "step": 45506 + }, + { + "epoch": 0.7863388166989218, + "grad_norm": 1.3206894129034579, + "learning_rate": 2.30032842701343e-06, + "loss": 0.2165, + "step": 45507 + }, + { + "epoch": 0.7863560962123307, + "grad_norm": 1.9002098236040552, + "learning_rate": 2.299971335928397e-06, + "loss": 0.392, + "step": 45508 + }, + { + "epoch": 0.7863733757257395, + "grad_norm": 1.6587629334641723, + "learning_rate": 2.2996142689604384e-06, + "loss": 0.4802, + "step": 45509 + }, + { + "epoch": 0.7863906552391484, + "grad_norm": 1.5094991447313544, + "learning_rate": 2.299257226110675e-06, + "loss": 0.3792, + "step": 45510 + }, + { + "epoch": 0.7864079347525573, + "grad_norm": 1.2207256377001072, + "learning_rate": 2.29890020738022e-06, + "loss": 0.3113, + "step": 45511 + }, + { + "epoch": 0.7864252142659662, + "grad_norm": 1.4614737690431676, + "learning_rate": 2.2985432127701945e-06, + "loss": 0.1802, + "step": 45512 + }, + { + "epoch": 0.7864424937793751, + "grad_norm": 1.6486216037295052, + "learning_rate": 2.2981862422817182e-06, + "loss": 0.259, + "step": 45513 + }, + { + "epoch": 0.786459773292784, + "grad_norm": 0.9243010591459768, + "learning_rate": 2.2978292959159098e-06, + "loss": 0.4444, + "step": 45514 + }, + { + "epoch": 0.786477052806193, + "grad_norm": 1.2689245873810013, + "learning_rate": 2.297472373673886e-06, + "loss": 0.5721, + "step": 45515 + }, + { + "epoch": 0.7864943323196019, + "grad_norm": 1.0866441685718387, + "learning_rate": 2.2971154755567614e-06, + "loss": 0.3591, + "step": 45516 + }, + { + "epoch": 0.7865116118330108, + "grad_norm": 1.8467264689691347, + "learning_rate": 2.2967586015656586e-06, + "loss": 0.1914, + "step": 45517 + }, + { + "epoch": 0.7865288913464197, + "grad_norm": 0.9530856064216507, + "learning_rate": 2.296401751701688e-06, + "loss": 0.2192, + "step": 45518 + }, + { + "epoch": 0.7865461708598286, + "grad_norm": 1.6590602002560255, + "learning_rate": 2.296044925965978e-06, + "loss": 0.3198, + "step": 45519 + }, + { + "epoch": 0.7865634503732375, + "grad_norm": 1.326453269563428, + "learning_rate": 2.2956881243596383e-06, + "loss": 0.2817, + "step": 45520 + }, + { + "epoch": 0.7865807298866464, + "grad_norm": 1.1363325448309174, + "learning_rate": 2.295331346883791e-06, + "loss": 0.3453, + "step": 45521 + }, + { + "epoch": 0.7865980094000553, + "grad_norm": 1.0227550895671051, + "learning_rate": 2.2949745935395495e-06, + "loss": 0.2468, + "step": 45522 + }, + { + "epoch": 0.7866152889134642, + "grad_norm": 1.2201880521726933, + "learning_rate": 2.294617864328036e-06, + "loss": 0.3356, + "step": 45523 + }, + { + "epoch": 0.7866325684268731, + "grad_norm": 2.2139287877288027, + "learning_rate": 2.2942611592503592e-06, + "loss": 0.4835, + "step": 45524 + }, + { + "epoch": 0.786649847940282, + "grad_norm": 1.13018109167863, + "learning_rate": 2.293904478307647e-06, + "loss": 0.3993, + "step": 45525 + }, + { + "epoch": 0.7866671274536909, + "grad_norm": 1.6837773728758307, + "learning_rate": 2.2935478215010122e-06, + "loss": 0.3455, + "step": 45526 + }, + { + "epoch": 0.7866844069670998, + "grad_norm": 1.5516146065266578, + "learning_rate": 2.2931911888315684e-06, + "loss": 0.4127, + "step": 45527 + }, + { + "epoch": 0.7867016864805088, + "grad_norm": 1.908955188598716, + "learning_rate": 2.2928345803004383e-06, + "loss": 0.2732, + "step": 45528 + }, + { + "epoch": 0.7867189659939177, + "grad_norm": 1.2396604751228433, + "learning_rate": 2.292477995908733e-06, + "loss": 0.268, + "step": 45529 + }, + { + "epoch": 0.7867362455073265, + "grad_norm": 0.7465650493872881, + "learning_rate": 2.292121435657573e-06, + "loss": 0.2776, + "step": 45530 + }, + { + "epoch": 0.7867535250207354, + "grad_norm": 1.4426819842665584, + "learning_rate": 2.2917648995480736e-06, + "loss": 0.1988, + "step": 45531 + }, + { + "epoch": 0.7867708045341443, + "grad_norm": 1.8247906321646268, + "learning_rate": 2.2914083875813554e-06, + "loss": 0.2453, + "step": 45532 + }, + { + "epoch": 0.7867880840475532, + "grad_norm": 1.9743944166294622, + "learning_rate": 2.2910518997585295e-06, + "loss": 0.3191, + "step": 45533 + }, + { + "epoch": 0.7868053635609621, + "grad_norm": 1.2758359188281267, + "learning_rate": 2.2906954360807175e-06, + "loss": 0.3871, + "step": 45534 + }, + { + "epoch": 0.786822643074371, + "grad_norm": 0.8231564217342369, + "learning_rate": 2.290338996549031e-06, + "loss": 0.6939, + "step": 45535 + }, + { + "epoch": 0.7868399225877799, + "grad_norm": 1.0976557152770106, + "learning_rate": 2.289982581164588e-06, + "loss": 0.3187, + "step": 45536 + }, + { + "epoch": 0.7868572021011888, + "grad_norm": 0.8220254297628846, + "learning_rate": 2.2896261899285087e-06, + "loss": 0.7499, + "step": 45537 + }, + { + "epoch": 0.7868744816145977, + "grad_norm": 1.722683921275229, + "learning_rate": 2.289269822841903e-06, + "loss": 0.2979, + "step": 45538 + }, + { + "epoch": 0.7868917611280066, + "grad_norm": 1.3833080867310086, + "learning_rate": 2.2889134799058934e-06, + "loss": 0.6067, + "step": 45539 + }, + { + "epoch": 0.7869090406414155, + "grad_norm": 1.6150083529792476, + "learning_rate": 2.2885571611215894e-06, + "loss": 0.3334, + "step": 45540 + }, + { + "epoch": 0.7869263201548244, + "grad_norm": 1.721913108104389, + "learning_rate": 2.28820086649011e-06, + "loss": 0.3164, + "step": 45541 + }, + { + "epoch": 0.7869435996682334, + "grad_norm": 2.0591588156885585, + "learning_rate": 2.287844596012573e-06, + "loss": 0.3195, + "step": 45542 + }, + { + "epoch": 0.7869608791816423, + "grad_norm": 1.8512975151085773, + "learning_rate": 2.2874883496900948e-06, + "loss": 1.0217, + "step": 45543 + }, + { + "epoch": 0.7869781586950512, + "grad_norm": 1.403063577646911, + "learning_rate": 2.287132127523787e-06, + "loss": 0.3512, + "step": 45544 + }, + { + "epoch": 0.7869954382084601, + "grad_norm": 1.3263380193637149, + "learning_rate": 2.2867759295147696e-06, + "loss": 0.3661, + "step": 45545 + }, + { + "epoch": 0.787012717721869, + "grad_norm": 1.8741466084402196, + "learning_rate": 2.286419755664153e-06, + "loss": 0.4076, + "step": 45546 + }, + { + "epoch": 0.7870299972352779, + "grad_norm": 1.2127479704717554, + "learning_rate": 2.2860636059730567e-06, + "loss": 0.3658, + "step": 45547 + }, + { + "epoch": 0.7870472767486868, + "grad_norm": 1.526245147692694, + "learning_rate": 2.2857074804425984e-06, + "loss": 0.3048, + "step": 45548 + }, + { + "epoch": 0.7870645562620957, + "grad_norm": 1.2250345128312201, + "learning_rate": 2.2853513790738868e-06, + "loss": 0.2586, + "step": 45549 + }, + { + "epoch": 0.7870818357755046, + "grad_norm": 1.4567893822052094, + "learning_rate": 2.284995301868045e-06, + "loss": 0.3079, + "step": 45550 + }, + { + "epoch": 0.7870991152889134, + "grad_norm": 1.6089270119777366, + "learning_rate": 2.2846392488261805e-06, + "loss": 0.3304, + "step": 45551 + }, + { + "epoch": 0.7871163948023223, + "grad_norm": 1.011365992832577, + "learning_rate": 2.2842832199494126e-06, + "loss": 0.4908, + "step": 45552 + }, + { + "epoch": 0.7871336743157312, + "grad_norm": 0.6945832697536412, + "learning_rate": 2.2839272152388557e-06, + "loss": 0.3837, + "step": 45553 + }, + { + "epoch": 0.7871509538291401, + "grad_norm": 1.2759452269158704, + "learning_rate": 2.2835712346956284e-06, + "loss": 0.3559, + "step": 45554 + }, + { + "epoch": 0.787168233342549, + "grad_norm": 1.070521579172777, + "learning_rate": 2.2832152783208393e-06, + "loss": 0.3906, + "step": 45555 + }, + { + "epoch": 0.787185512855958, + "grad_norm": 0.6921335033936106, + "learning_rate": 2.28285934611561e-06, + "loss": 0.4646, + "step": 45556 + }, + { + "epoch": 0.7872027923693669, + "grad_norm": 1.6468059872937675, + "learning_rate": 2.2825034380810506e-06, + "loss": 0.3599, + "step": 45557 + }, + { + "epoch": 0.7872200718827758, + "grad_norm": 1.3612465744025282, + "learning_rate": 2.2821475542182724e-06, + "loss": 0.312, + "step": 45558 + }, + { + "epoch": 0.7872373513961847, + "grad_norm": 1.1315393342743274, + "learning_rate": 2.281791694528399e-06, + "loss": 0.2356, + "step": 45559 + }, + { + "epoch": 0.7872546309095936, + "grad_norm": 0.7187438966337172, + "learning_rate": 2.2814358590125387e-06, + "loss": 0.8133, + "step": 45560 + }, + { + "epoch": 0.7872719104230025, + "grad_norm": 1.0473304276110755, + "learning_rate": 2.281080047671811e-06, + "loss": 0.2846, + "step": 45561 + }, + { + "epoch": 0.7872891899364114, + "grad_norm": 1.177939957678726, + "learning_rate": 2.280724260507323e-06, + "loss": 0.3444, + "step": 45562 + }, + { + "epoch": 0.7873064694498203, + "grad_norm": 2.0653552468305514, + "learning_rate": 2.2803684975201966e-06, + "loss": 0.275, + "step": 45563 + }, + { + "epoch": 0.7873237489632292, + "grad_norm": 1.583191084090753, + "learning_rate": 2.2800127587115374e-06, + "loss": 0.6472, + "step": 45564 + }, + { + "epoch": 0.7873410284766381, + "grad_norm": 1.0904872897794287, + "learning_rate": 2.2796570440824705e-06, + "loss": 0.2988, + "step": 45565 + }, + { + "epoch": 0.787358307990047, + "grad_norm": 2.812749045533104, + "learning_rate": 2.2793013536341046e-06, + "loss": 0.41, + "step": 45566 + }, + { + "epoch": 0.7873755875034559, + "grad_norm": 2.1071654009785736, + "learning_rate": 2.278945687367551e-06, + "loss": 0.3172, + "step": 45567 + }, + { + "epoch": 0.7873928670168648, + "grad_norm": 0.9655262653239124, + "learning_rate": 2.2785900452839283e-06, + "loss": 0.3053, + "step": 45568 + }, + { + "epoch": 0.7874101465302737, + "grad_norm": 1.171994574328423, + "learning_rate": 2.278234427384346e-06, + "loss": 0.5008, + "step": 45569 + }, + { + "epoch": 0.7874274260436827, + "grad_norm": 1.910574801005625, + "learning_rate": 2.277878833669921e-06, + "loss": 0.444, + "step": 45570 + }, + { + "epoch": 0.7874447055570916, + "grad_norm": 1.6039521548867381, + "learning_rate": 2.2775232641417657e-06, + "loss": 0.4794, + "step": 45571 + }, + { + "epoch": 0.7874619850705005, + "grad_norm": 0.9892072377393248, + "learning_rate": 2.2771677188009967e-06, + "loss": 0.327, + "step": 45572 + }, + { + "epoch": 0.7874792645839093, + "grad_norm": 1.8764209266290952, + "learning_rate": 2.2768121976487235e-06, + "loss": 0.2452, + "step": 45573 + }, + { + "epoch": 0.7874965440973182, + "grad_norm": 1.1387158428636215, + "learning_rate": 2.276456700686064e-06, + "loss": 0.4324, + "step": 45574 + }, + { + "epoch": 0.7875138236107271, + "grad_norm": 0.6016374027270013, + "learning_rate": 2.276101227914126e-06, + "loss": 0.73, + "step": 45575 + }, + { + "epoch": 0.787531103124136, + "grad_norm": 1.7816847617832272, + "learning_rate": 2.2757457793340256e-06, + "loss": 0.6422, + "step": 45576 + }, + { + "epoch": 0.7875483826375449, + "grad_norm": 2.3810768012525974, + "learning_rate": 2.27539035494688e-06, + "loss": 0.3517, + "step": 45577 + }, + { + "epoch": 0.7875656621509538, + "grad_norm": 1.8061910911189487, + "learning_rate": 2.275034954753795e-06, + "loss": 0.4028, + "step": 45578 + }, + { + "epoch": 0.7875829416643627, + "grad_norm": 1.0530874996025026, + "learning_rate": 2.2746795787558916e-06, + "loss": 0.3631, + "step": 45579 + }, + { + "epoch": 0.7876002211777716, + "grad_norm": 1.100995204606516, + "learning_rate": 2.2743242269542753e-06, + "loss": 0.3309, + "step": 45580 + }, + { + "epoch": 0.7876175006911805, + "grad_norm": 1.188149545418042, + "learning_rate": 2.273968899350063e-06, + "loss": 0.4904, + "step": 45581 + }, + { + "epoch": 0.7876347802045894, + "grad_norm": 0.8426423087830961, + "learning_rate": 2.273613595944367e-06, + "loss": 0.4325, + "step": 45582 + }, + { + "epoch": 0.7876520597179983, + "grad_norm": 1.077355685135153, + "learning_rate": 2.273258316738304e-06, + "loss": 0.3449, + "step": 45583 + }, + { + "epoch": 0.7876693392314073, + "grad_norm": 1.2309349306003605, + "learning_rate": 2.2729030617329794e-06, + "loss": 0.3613, + "step": 45584 + }, + { + "epoch": 0.7876866187448162, + "grad_norm": 1.216480040296791, + "learning_rate": 2.2725478309295135e-06, + "loss": 0.3519, + "step": 45585 + }, + { + "epoch": 0.7877038982582251, + "grad_norm": 1.444393778791527, + "learning_rate": 2.2721926243290117e-06, + "loss": 0.4091, + "step": 45586 + }, + { + "epoch": 0.787721177771634, + "grad_norm": 1.216449285408182, + "learning_rate": 2.2718374419325894e-06, + "loss": 0.3051, + "step": 45587 + }, + { + "epoch": 0.7877384572850429, + "grad_norm": 1.8909183568822225, + "learning_rate": 2.271482283741363e-06, + "loss": 0.4181, + "step": 45588 + }, + { + "epoch": 0.7877557367984518, + "grad_norm": 1.9355702451000116, + "learning_rate": 2.271127149756438e-06, + "loss": 0.3902, + "step": 45589 + }, + { + "epoch": 0.7877730163118607, + "grad_norm": 0.9652993097562866, + "learning_rate": 2.2707720399789336e-06, + "loss": 0.2254, + "step": 45590 + }, + { + "epoch": 0.7877902958252696, + "grad_norm": 1.1024551443766248, + "learning_rate": 2.2704169544099553e-06, + "loss": 0.1988, + "step": 45591 + }, + { + "epoch": 0.7878075753386785, + "grad_norm": 1.2379338929853028, + "learning_rate": 2.270061893050619e-06, + "loss": 0.206, + "step": 45592 + }, + { + "epoch": 0.7878248548520874, + "grad_norm": 1.0919668519123629, + "learning_rate": 2.269706855902035e-06, + "loss": 0.2205, + "step": 45593 + }, + { + "epoch": 0.7878421343654962, + "grad_norm": 1.458322712801291, + "learning_rate": 2.2693518429653205e-06, + "loss": 0.2599, + "step": 45594 + }, + { + "epoch": 0.7878594138789051, + "grad_norm": 1.0188518661944705, + "learning_rate": 2.2689968542415806e-06, + "loss": 0.4539, + "step": 45595 + }, + { + "epoch": 0.787876693392314, + "grad_norm": 1.5298174723658475, + "learning_rate": 2.268641889731933e-06, + "loss": 0.2338, + "step": 45596 + }, + { + "epoch": 0.7878939729057229, + "grad_norm": 1.6808609873733766, + "learning_rate": 2.268286949437487e-06, + "loss": 0.34, + "step": 45597 + }, + { + "epoch": 0.7879112524191318, + "grad_norm": 1.0460744375984403, + "learning_rate": 2.2679320333593478e-06, + "loss": 0.3277, + "step": 45598 + }, + { + "epoch": 0.7879285319325408, + "grad_norm": 1.2544510587952973, + "learning_rate": 2.2675771414986382e-06, + "loss": 0.315, + "step": 45599 + }, + { + "epoch": 0.7879458114459497, + "grad_norm": 1.5979253909127145, + "learning_rate": 2.2672222738564633e-06, + "loss": 0.3071, + "step": 45600 + }, + { + "epoch": 0.7879630909593586, + "grad_norm": 2.0812547429754162, + "learning_rate": 2.2668674304339367e-06, + "loss": 0.3878, + "step": 45601 + }, + { + "epoch": 0.7879803704727675, + "grad_norm": 2.0107118804704687, + "learning_rate": 2.266512611232168e-06, + "loss": 0.3765, + "step": 45602 + }, + { + "epoch": 0.7879976499861764, + "grad_norm": 1.0593637870941275, + "learning_rate": 2.2661578162522713e-06, + "loss": 0.3541, + "step": 45603 + }, + { + "epoch": 0.7880149294995853, + "grad_norm": 1.3033669585712389, + "learning_rate": 2.2658030454953507e-06, + "loss": 0.3234, + "step": 45604 + }, + { + "epoch": 0.7880322090129942, + "grad_norm": 1.4335144849680037, + "learning_rate": 2.265448298962528e-06, + "loss": 0.2813, + "step": 45605 + }, + { + "epoch": 0.7880494885264031, + "grad_norm": 1.154382013895787, + "learning_rate": 2.2650935766549065e-06, + "loss": 0.3728, + "step": 45606 + }, + { + "epoch": 0.788066768039812, + "grad_norm": 2.0832045081658532, + "learning_rate": 2.2647388785736026e-06, + "loss": 0.209, + "step": 45607 + }, + { + "epoch": 0.7880840475532209, + "grad_norm": 1.2476044048421697, + "learning_rate": 2.264384204719724e-06, + "loss": 0.3677, + "step": 45608 + }, + { + "epoch": 0.7881013270666298, + "grad_norm": 0.9429713374225428, + "learning_rate": 2.2640295550943802e-06, + "loss": 0.2583, + "step": 45609 + }, + { + "epoch": 0.7881186065800387, + "grad_norm": 0.905332580978237, + "learning_rate": 2.2636749296986828e-06, + "loss": 0.3521, + "step": 45610 + }, + { + "epoch": 0.7881358860934476, + "grad_norm": 1.3152368961112946, + "learning_rate": 2.2633203285337435e-06, + "loss": 0.3713, + "step": 45611 + }, + { + "epoch": 0.7881531656068566, + "grad_norm": 1.2994774504217206, + "learning_rate": 2.2629657516006765e-06, + "loss": 0.3512, + "step": 45612 + }, + { + "epoch": 0.7881704451202655, + "grad_norm": 1.7096525285566562, + "learning_rate": 2.262611198900585e-06, + "loss": 0.4227, + "step": 45613 + }, + { + "epoch": 0.7881877246336744, + "grad_norm": 1.063326647404073, + "learning_rate": 2.2622566704345873e-06, + "loss": 0.2105, + "step": 45614 + }, + { + "epoch": 0.7882050041470832, + "grad_norm": 2.4918572491442506, + "learning_rate": 2.2619021662037855e-06, + "loss": 0.2909, + "step": 45615 + }, + { + "epoch": 0.7882222836604921, + "grad_norm": 0.7636620553482231, + "learning_rate": 2.2615476862092945e-06, + "loss": 0.4973, + "step": 45616 + }, + { + "epoch": 0.788239563173901, + "grad_norm": 1.2483752250350557, + "learning_rate": 2.2611932304522244e-06, + "loss": 0.2832, + "step": 45617 + }, + { + "epoch": 0.7882568426873099, + "grad_norm": 1.243338845495339, + "learning_rate": 2.260838798933689e-06, + "loss": 0.464, + "step": 45618 + }, + { + "epoch": 0.7882741222007188, + "grad_norm": 0.7190664391360916, + "learning_rate": 2.2604843916547938e-06, + "loss": 0.7711, + "step": 45619 + }, + { + "epoch": 0.7882914017141277, + "grad_norm": 0.9403404848665868, + "learning_rate": 2.2601300086166465e-06, + "loss": 0.3619, + "step": 45620 + }, + { + "epoch": 0.7883086812275366, + "grad_norm": 1.070087971577622, + "learning_rate": 2.2597756498203603e-06, + "loss": 0.3224, + "step": 45621 + }, + { + "epoch": 0.7883259607409455, + "grad_norm": 0.6180297959185259, + "learning_rate": 2.259421315267045e-06, + "loss": 0.7346, + "step": 45622 + }, + { + "epoch": 0.7883432402543544, + "grad_norm": 1.0961037680245829, + "learning_rate": 2.2590670049578145e-06, + "loss": 0.2606, + "step": 45623 + }, + { + "epoch": 0.7883605197677633, + "grad_norm": 0.8183452661346758, + "learning_rate": 2.258712718893771e-06, + "loss": 0.3671, + "step": 45624 + }, + { + "epoch": 0.7883777992811722, + "grad_norm": 0.9200787857093404, + "learning_rate": 2.25835845707603e-06, + "loss": 0.3005, + "step": 45625 + }, + { + "epoch": 0.7883950787945812, + "grad_norm": 1.3469989291156776, + "learning_rate": 2.258004219505695e-06, + "loss": 0.3636, + "step": 45626 + }, + { + "epoch": 0.7884123583079901, + "grad_norm": 0.6839998720628037, + "learning_rate": 2.2576500061838804e-06, + "loss": 0.5789, + "step": 45627 + }, + { + "epoch": 0.788429637821399, + "grad_norm": 1.1300541045602455, + "learning_rate": 2.2572958171116965e-06, + "loss": 0.3063, + "step": 45628 + }, + { + "epoch": 0.7884469173348079, + "grad_norm": 1.1074306236673261, + "learning_rate": 2.2569416522902475e-06, + "loss": 0.2228, + "step": 45629 + }, + { + "epoch": 0.7884641968482168, + "grad_norm": 0.9438107550351277, + "learning_rate": 2.256587511720649e-06, + "loss": 0.4533, + "step": 45630 + }, + { + "epoch": 0.7884814763616257, + "grad_norm": 1.1884763612744063, + "learning_rate": 2.256233395404005e-06, + "loss": 0.4744, + "step": 45631 + }, + { + "epoch": 0.7884987558750346, + "grad_norm": 0.8125880944920755, + "learning_rate": 2.255879303341425e-06, + "loss": 0.7217, + "step": 45632 + }, + { + "epoch": 0.7885160353884435, + "grad_norm": 2.399616134606957, + "learning_rate": 2.255525235534021e-06, + "loss": 0.426, + "step": 45633 + }, + { + "epoch": 0.7885333149018524, + "grad_norm": 1.574050616749595, + "learning_rate": 2.2551711919829023e-06, + "loss": 0.4003, + "step": 45634 + }, + { + "epoch": 0.7885505944152613, + "grad_norm": 1.9293309823881661, + "learning_rate": 2.254817172689173e-06, + "loss": 0.3632, + "step": 45635 + }, + { + "epoch": 0.7885678739286701, + "grad_norm": 1.2581563986455742, + "learning_rate": 2.2544631776539483e-06, + "loss": 0.4233, + "step": 45636 + }, + { + "epoch": 0.788585153442079, + "grad_norm": 1.0261347002087224, + "learning_rate": 2.254109206878331e-06, + "loss": 0.5155, + "step": 45637 + }, + { + "epoch": 0.7886024329554879, + "grad_norm": 1.542948004434922, + "learning_rate": 2.2537552603634316e-06, + "loss": 0.4053, + "step": 45638 + }, + { + "epoch": 0.7886197124688968, + "grad_norm": 1.2822765045474684, + "learning_rate": 2.2534013381103623e-06, + "loss": 0.3068, + "step": 45639 + }, + { + "epoch": 0.7886369919823057, + "grad_norm": 0.9934409724942977, + "learning_rate": 2.253047440120226e-06, + "loss": 0.4093, + "step": 45640 + }, + { + "epoch": 0.7886542714957147, + "grad_norm": 1.3430474223801603, + "learning_rate": 2.2526935663941373e-06, + "loss": 0.4508, + "step": 45641 + }, + { + "epoch": 0.7886715510091236, + "grad_norm": 1.2622131509345587, + "learning_rate": 2.2523397169331983e-06, + "loss": 0.4319, + "step": 45642 + }, + { + "epoch": 0.7886888305225325, + "grad_norm": 0.9250590428996526, + "learning_rate": 2.2519858917385216e-06, + "loss": 0.3822, + "step": 45643 + }, + { + "epoch": 0.7887061100359414, + "grad_norm": 1.4278946424819137, + "learning_rate": 2.25163209081121e-06, + "loss": 0.479, + "step": 45644 + }, + { + "epoch": 0.7887233895493503, + "grad_norm": 0.9962053134927895, + "learning_rate": 2.25127831415238e-06, + "loss": 0.3058, + "step": 45645 + }, + { + "epoch": 0.7887406690627592, + "grad_norm": 0.8384579708113117, + "learning_rate": 2.2509245617631326e-06, + "loss": 0.2655, + "step": 45646 + }, + { + "epoch": 0.7887579485761681, + "grad_norm": 1.781992013337725, + "learning_rate": 2.250570833644582e-06, + "loss": 0.5088, + "step": 45647 + }, + { + "epoch": 0.788775228089577, + "grad_norm": 2.2115073822519045, + "learning_rate": 2.2502171297978282e-06, + "loss": 0.4133, + "step": 45648 + }, + { + "epoch": 0.7887925076029859, + "grad_norm": 1.2274968713012178, + "learning_rate": 2.249863450223987e-06, + "loss": 0.414, + "step": 45649 + }, + { + "epoch": 0.7888097871163948, + "grad_norm": 1.5577768321186114, + "learning_rate": 2.2495097949241595e-06, + "loss": 0.2476, + "step": 45650 + }, + { + "epoch": 0.7888270666298037, + "grad_norm": 0.8884600096916269, + "learning_rate": 2.249156163899456e-06, + "loss": 0.2156, + "step": 45651 + }, + { + "epoch": 0.7888443461432126, + "grad_norm": 1.452192571036959, + "learning_rate": 2.2488025571509885e-06, + "loss": 0.3029, + "step": 45652 + }, + { + "epoch": 0.7888616256566215, + "grad_norm": 1.13008854116349, + "learning_rate": 2.2484489746798565e-06, + "loss": 0.2455, + "step": 45653 + }, + { + "epoch": 0.7888789051700305, + "grad_norm": 1.2396405124193344, + "learning_rate": 2.248095416487175e-06, + "loss": 0.2906, + "step": 45654 + }, + { + "epoch": 0.7888961846834394, + "grad_norm": 1.4344498780227537, + "learning_rate": 2.2477418825740437e-06, + "loss": 0.4192, + "step": 45655 + }, + { + "epoch": 0.7889134641968483, + "grad_norm": 1.4642823608509288, + "learning_rate": 2.247388372941576e-06, + "loss": 0.3126, + "step": 45656 + }, + { + "epoch": 0.7889307437102571, + "grad_norm": 1.3066504346945833, + "learning_rate": 2.2470348875908752e-06, + "loss": 0.2239, + "step": 45657 + }, + { + "epoch": 0.788948023223666, + "grad_norm": 1.016799181373762, + "learning_rate": 2.246681426523054e-06, + "loss": 0.7301, + "step": 45658 + }, + { + "epoch": 0.7889653027370749, + "grad_norm": 0.7643525849025997, + "learning_rate": 2.2463279897392156e-06, + "loss": 0.1799, + "step": 45659 + }, + { + "epoch": 0.7889825822504838, + "grad_norm": 1.108126222452971, + "learning_rate": 2.245974577240464e-06, + "loss": 0.3322, + "step": 45660 + }, + { + "epoch": 0.7889998617638927, + "grad_norm": 1.1730993463398298, + "learning_rate": 2.24562118902791e-06, + "loss": 0.4986, + "step": 45661 + }, + { + "epoch": 0.7890171412773016, + "grad_norm": 0.6192239582285064, + "learning_rate": 2.2452678251026595e-06, + "loss": 0.7538, + "step": 45662 + }, + { + "epoch": 0.7890344207907105, + "grad_norm": 1.3598406785926758, + "learning_rate": 2.244914485465822e-06, + "loss": 0.4238, + "step": 45663 + }, + { + "epoch": 0.7890517003041194, + "grad_norm": 0.6449881569215523, + "learning_rate": 2.2445611701185e-06, + "loss": 0.4313, + "step": 45664 + }, + { + "epoch": 0.7890689798175283, + "grad_norm": 1.2409946594977708, + "learning_rate": 2.244207879061803e-06, + "loss": 0.313, + "step": 45665 + }, + { + "epoch": 0.7890862593309372, + "grad_norm": 0.9771602316710095, + "learning_rate": 2.243854612296834e-06, + "loss": 0.2925, + "step": 45666 + }, + { + "epoch": 0.7891035388443461, + "grad_norm": 1.555029833151115, + "learning_rate": 2.243501369824702e-06, + "loss": 0.3202, + "step": 45667 + }, + { + "epoch": 0.789120818357755, + "grad_norm": 1.1689899835765223, + "learning_rate": 2.2431481516465136e-06, + "loss": 0.5121, + "step": 45668 + }, + { + "epoch": 0.789138097871164, + "grad_norm": 1.31083506279457, + "learning_rate": 2.242794957763378e-06, + "loss": 0.3749, + "step": 45669 + }, + { + "epoch": 0.7891553773845729, + "grad_norm": 1.471170450238211, + "learning_rate": 2.242441788176397e-06, + "loss": 0.2781, + "step": 45670 + }, + { + "epoch": 0.7891726568979818, + "grad_norm": 1.7762943569055931, + "learning_rate": 2.242088642886675e-06, + "loss": 0.603, + "step": 45671 + }, + { + "epoch": 0.7891899364113907, + "grad_norm": 1.212135628814401, + "learning_rate": 2.241735521895322e-06, + "loss": 0.392, + "step": 45672 + }, + { + "epoch": 0.7892072159247996, + "grad_norm": 0.6934946148739253, + "learning_rate": 2.241382425203442e-06, + "loss": 0.5176, + "step": 45673 + }, + { + "epoch": 0.7892244954382085, + "grad_norm": 1.5381048190052111, + "learning_rate": 2.2410293528121453e-06, + "loss": 0.3839, + "step": 45674 + }, + { + "epoch": 0.7892417749516174, + "grad_norm": 1.3834255556964068, + "learning_rate": 2.240676304722531e-06, + "loss": 0.2749, + "step": 45675 + }, + { + "epoch": 0.7892590544650263, + "grad_norm": 1.21056366235067, + "learning_rate": 2.240323280935711e-06, + "loss": 0.3968, + "step": 45676 + }, + { + "epoch": 0.7892763339784352, + "grad_norm": 0.8833841667322719, + "learning_rate": 2.2399702814527855e-06, + "loss": 0.2675, + "step": 45677 + }, + { + "epoch": 0.789293613491844, + "grad_norm": 1.4289855686581443, + "learning_rate": 2.2396173062748626e-06, + "loss": 0.4639, + "step": 45678 + }, + { + "epoch": 0.7893108930052529, + "grad_norm": 1.293948945353372, + "learning_rate": 2.239264355403049e-06, + "loss": 0.4214, + "step": 45679 + }, + { + "epoch": 0.7893281725186618, + "grad_norm": 1.3982598808829183, + "learning_rate": 2.2389114288384506e-06, + "loss": 0.5179, + "step": 45680 + }, + { + "epoch": 0.7893454520320707, + "grad_norm": 1.1199459059314834, + "learning_rate": 2.238558526582173e-06, + "loss": 0.3261, + "step": 45681 + }, + { + "epoch": 0.7893627315454796, + "grad_norm": 1.0049704402435904, + "learning_rate": 2.2382056486353164e-06, + "loss": 0.194, + "step": 45682 + }, + { + "epoch": 0.7893800110588886, + "grad_norm": 0.9830619291070367, + "learning_rate": 2.2378527949989926e-06, + "loss": 0.4467, + "step": 45683 + }, + { + "epoch": 0.7893972905722975, + "grad_norm": 1.9466388174088636, + "learning_rate": 2.237499965674297e-06, + "loss": 0.4302, + "step": 45684 + }, + { + "epoch": 0.7894145700857064, + "grad_norm": 1.7286196952688277, + "learning_rate": 2.2371471606623485e-06, + "loss": 0.2345, + "step": 45685 + }, + { + "epoch": 0.7894318495991153, + "grad_norm": 1.27419058037646, + "learning_rate": 2.236794379964241e-06, + "loss": 0.2124, + "step": 45686 + }, + { + "epoch": 0.7894491291125242, + "grad_norm": 1.6145846288073888, + "learning_rate": 2.2364416235810873e-06, + "loss": 0.1897, + "step": 45687 + }, + { + "epoch": 0.7894664086259331, + "grad_norm": 1.0678380730490222, + "learning_rate": 2.2360888915139856e-06, + "loss": 0.3148, + "step": 45688 + }, + { + "epoch": 0.789483688139342, + "grad_norm": 1.0402411386177146, + "learning_rate": 2.235736183764046e-06, + "loss": 0.4025, + "step": 45689 + }, + { + "epoch": 0.7895009676527509, + "grad_norm": 1.1145331241450844, + "learning_rate": 2.2353835003323678e-06, + "loss": 0.1944, + "step": 45690 + }, + { + "epoch": 0.7895182471661598, + "grad_norm": 1.5383967170800614, + "learning_rate": 2.2350308412200593e-06, + "loss": 0.4802, + "step": 45691 + }, + { + "epoch": 0.7895355266795687, + "grad_norm": 1.1060565169214969, + "learning_rate": 2.234678206428226e-06, + "loss": 0.2989, + "step": 45692 + }, + { + "epoch": 0.7895528061929776, + "grad_norm": 1.501655011489484, + "learning_rate": 2.234325595957969e-06, + "loss": 0.4869, + "step": 45693 + }, + { + "epoch": 0.7895700857063865, + "grad_norm": 1.2602726396409032, + "learning_rate": 2.233973009810396e-06, + "loss": 0.313, + "step": 45694 + }, + { + "epoch": 0.7895873652197954, + "grad_norm": 1.526007536265857, + "learning_rate": 2.2336204479866073e-06, + "loss": 0.3546, + "step": 45695 + }, + { + "epoch": 0.7896046447332044, + "grad_norm": 1.1204779305154473, + "learning_rate": 2.2332679104877107e-06, + "loss": 0.441, + "step": 45696 + }, + { + "epoch": 0.7896219242466133, + "grad_norm": 1.198159270387149, + "learning_rate": 2.2329153973148088e-06, + "loss": 0.3872, + "step": 45697 + }, + { + "epoch": 0.7896392037600222, + "grad_norm": 1.1765712605323888, + "learning_rate": 2.2325629084690082e-06, + "loss": 0.3297, + "step": 45698 + }, + { + "epoch": 0.789656483273431, + "grad_norm": 0.5587748276539208, + "learning_rate": 2.232210443951408e-06, + "loss": 0.206, + "step": 45699 + }, + { + "epoch": 0.7896737627868399, + "grad_norm": 1.316236731650684, + "learning_rate": 2.2318580037631186e-06, + "loss": 0.3965, + "step": 45700 + }, + { + "epoch": 0.7896910423002488, + "grad_norm": 1.347646524091055, + "learning_rate": 2.231505587905237e-06, + "loss": 0.3477, + "step": 45701 + }, + { + "epoch": 0.7897083218136577, + "grad_norm": 1.6706659710964467, + "learning_rate": 2.23115319637887e-06, + "loss": 0.2062, + "step": 45702 + }, + { + "epoch": 0.7897256013270666, + "grad_norm": 1.028167807444347, + "learning_rate": 2.2308008291851256e-06, + "loss": 0.4399, + "step": 45703 + }, + { + "epoch": 0.7897428808404755, + "grad_norm": 1.5774550087456567, + "learning_rate": 2.2304484863250995e-06, + "loss": 0.3632, + "step": 45704 + }, + { + "epoch": 0.7897601603538844, + "grad_norm": 1.148599152864847, + "learning_rate": 2.230096167799902e-06, + "loss": 0.4589, + "step": 45705 + }, + { + "epoch": 0.7897774398672933, + "grad_norm": 1.6825989438554112, + "learning_rate": 2.2297438736106326e-06, + "loss": 0.2134, + "step": 45706 + }, + { + "epoch": 0.7897947193807022, + "grad_norm": 1.281567010525493, + "learning_rate": 2.229391603758394e-06, + "loss": 0.3538, + "step": 45707 + }, + { + "epoch": 0.7898119988941111, + "grad_norm": 1.1724628744204202, + "learning_rate": 2.2290393582442928e-06, + "loss": 0.2456, + "step": 45708 + }, + { + "epoch": 0.78982927840752, + "grad_norm": 1.9709748412433075, + "learning_rate": 2.228687137069433e-06, + "loss": 0.3249, + "step": 45709 + }, + { + "epoch": 0.789846557920929, + "grad_norm": 1.2483645299228363, + "learning_rate": 2.2283349402349153e-06, + "loss": 0.4802, + "step": 45710 + }, + { + "epoch": 0.7898638374343379, + "grad_norm": 1.3425817504690665, + "learning_rate": 2.2279827677418407e-06, + "loss": 0.4344, + "step": 45711 + }, + { + "epoch": 0.7898811169477468, + "grad_norm": 1.529264623888851, + "learning_rate": 2.2276306195913157e-06, + "loss": 0.3002, + "step": 45712 + }, + { + "epoch": 0.7898983964611557, + "grad_norm": 2.2589020390034853, + "learning_rate": 2.2272784957844405e-06, + "loss": 0.4668, + "step": 45713 + }, + { + "epoch": 0.7899156759745646, + "grad_norm": 1.3086489779783785, + "learning_rate": 2.2269263963223243e-06, + "loss": 0.3992, + "step": 45714 + }, + { + "epoch": 0.7899329554879735, + "grad_norm": 3.209704724830766, + "learning_rate": 2.226574321206061e-06, + "loss": 0.4964, + "step": 45715 + }, + { + "epoch": 0.7899502350013824, + "grad_norm": 2.5996919960468174, + "learning_rate": 2.2262222704367607e-06, + "loss": 0.7424, + "step": 45716 + }, + { + "epoch": 0.7899675145147913, + "grad_norm": 1.1524012471182281, + "learning_rate": 2.2258702440155212e-06, + "loss": 0.3008, + "step": 45717 + }, + { + "epoch": 0.7899847940282002, + "grad_norm": 1.4450478728032434, + "learning_rate": 2.225518241943446e-06, + "loss": 0.2907, + "step": 45718 + }, + { + "epoch": 0.7900020735416091, + "grad_norm": 1.2379060352180375, + "learning_rate": 2.2251662642216396e-06, + "loss": 0.2709, + "step": 45719 + }, + { + "epoch": 0.790019353055018, + "grad_norm": 1.5412353729116461, + "learning_rate": 2.2248143108512053e-06, + "loss": 0.2629, + "step": 45720 + }, + { + "epoch": 0.7900366325684268, + "grad_norm": 0.8783361783964437, + "learning_rate": 2.2244623818332444e-06, + "loss": 0.4181, + "step": 45721 + }, + { + "epoch": 0.7900539120818357, + "grad_norm": 1.1085919021658892, + "learning_rate": 2.2241104771688537e-06, + "loss": 0.2595, + "step": 45722 + }, + { + "epoch": 0.7900711915952446, + "grad_norm": 0.8045393886450375, + "learning_rate": 2.2237585968591446e-06, + "loss": 0.7132, + "step": 45723 + }, + { + "epoch": 0.7900884711086535, + "grad_norm": 1.4456903883368468, + "learning_rate": 2.2234067409052072e-06, + "loss": 0.2825, + "step": 45724 + }, + { + "epoch": 0.7901057506220625, + "grad_norm": 1.1520465143498146, + "learning_rate": 2.2230549093081575e-06, + "loss": 0.3049, + "step": 45725 + }, + { + "epoch": 0.7901230301354714, + "grad_norm": 1.7209918100429904, + "learning_rate": 2.222703102069088e-06, + "loss": 0.2998, + "step": 45726 + }, + { + "epoch": 0.7901403096488803, + "grad_norm": 1.7225286857605524, + "learning_rate": 2.2223513191891067e-06, + "loss": 0.2142, + "step": 45727 + }, + { + "epoch": 0.7901575891622892, + "grad_norm": 1.6833431926653448, + "learning_rate": 2.221999560669309e-06, + "loss": 0.4669, + "step": 45728 + }, + { + "epoch": 0.7901748686756981, + "grad_norm": 1.4533353204439128, + "learning_rate": 2.221647826510802e-06, + "loss": 0.2471, + "step": 45729 + }, + { + "epoch": 0.790192148189107, + "grad_norm": 1.4338594038037868, + "learning_rate": 2.2212961167146795e-06, + "loss": 0.5144, + "step": 45730 + }, + { + "epoch": 0.7902094277025159, + "grad_norm": 1.26697905261042, + "learning_rate": 2.220944431282055e-06, + "loss": 0.304, + "step": 45731 + }, + { + "epoch": 0.7902267072159248, + "grad_norm": 1.146290719191291, + "learning_rate": 2.220592770214024e-06, + "loss": 0.287, + "step": 45732 + }, + { + "epoch": 0.7902439867293337, + "grad_norm": 1.6866906135496518, + "learning_rate": 2.2202411335116837e-06, + "loss": 0.2752, + "step": 45733 + }, + { + "epoch": 0.7902612662427426, + "grad_norm": 1.4535163447252815, + "learning_rate": 2.2198895211761427e-06, + "loss": 0.22, + "step": 45734 + }, + { + "epoch": 0.7902785457561515, + "grad_norm": 1.2043463755799948, + "learning_rate": 2.2195379332084967e-06, + "loss": 0.3916, + "step": 45735 + }, + { + "epoch": 0.7902958252695604, + "grad_norm": 1.0215428768833748, + "learning_rate": 2.219186369609849e-06, + "loss": 0.2548, + "step": 45736 + }, + { + "epoch": 0.7903131047829693, + "grad_norm": 1.7097528629765526, + "learning_rate": 2.2188348303813014e-06, + "loss": 0.3564, + "step": 45737 + }, + { + "epoch": 0.7903303842963783, + "grad_norm": 1.1868096928876073, + "learning_rate": 2.218483315523956e-06, + "loss": 0.2358, + "step": 45738 + }, + { + "epoch": 0.7903476638097872, + "grad_norm": 1.4678574895326966, + "learning_rate": 2.2181318250389107e-06, + "loss": 0.5253, + "step": 45739 + }, + { + "epoch": 0.7903649433231961, + "grad_norm": 1.360434394526827, + "learning_rate": 2.2177803589272697e-06, + "loss": 0.2976, + "step": 45740 + }, + { + "epoch": 0.790382222836605, + "grad_norm": 1.2698670306537119, + "learning_rate": 2.2174289171901296e-06, + "loss": 0.296, + "step": 45741 + }, + { + "epoch": 0.7903995023500138, + "grad_norm": 1.5926182300591674, + "learning_rate": 2.2170774998285936e-06, + "loss": 0.3535, + "step": 45742 + }, + { + "epoch": 0.7904167818634227, + "grad_norm": 1.3363325125270158, + "learning_rate": 2.216726106843765e-06, + "loss": 0.3996, + "step": 45743 + }, + { + "epoch": 0.7904340613768316, + "grad_norm": 1.2759337001773525, + "learning_rate": 2.2163747382367386e-06, + "loss": 0.3788, + "step": 45744 + }, + { + "epoch": 0.7904513408902405, + "grad_norm": 1.3597351289585584, + "learning_rate": 2.216023394008622e-06, + "loss": 0.3257, + "step": 45745 + }, + { + "epoch": 0.7904686204036494, + "grad_norm": 1.218289408894893, + "learning_rate": 2.2156720741605076e-06, + "loss": 0.3981, + "step": 45746 + }, + { + "epoch": 0.7904858999170583, + "grad_norm": 1.2881619761380447, + "learning_rate": 2.2153207786935006e-06, + "loss": 0.2335, + "step": 45747 + }, + { + "epoch": 0.7905031794304672, + "grad_norm": 1.7907008514428946, + "learning_rate": 2.2149695076087007e-06, + "loss": 0.281, + "step": 45748 + }, + { + "epoch": 0.7905204589438761, + "grad_norm": 0.9391272448337823, + "learning_rate": 2.2146182609072097e-06, + "loss": 0.3894, + "step": 45749 + }, + { + "epoch": 0.790537738457285, + "grad_norm": 1.0720658415639965, + "learning_rate": 2.2142670385901242e-06, + "loss": 0.3334, + "step": 45750 + }, + { + "epoch": 0.7905550179706939, + "grad_norm": 1.1413417047911947, + "learning_rate": 2.2139158406585494e-06, + "loss": 0.3817, + "step": 45751 + }, + { + "epoch": 0.7905722974841028, + "grad_norm": 0.7144931932684926, + "learning_rate": 2.213564667113578e-06, + "loss": 0.2581, + "step": 45752 + }, + { + "epoch": 0.7905895769975118, + "grad_norm": 1.622354860821429, + "learning_rate": 2.2132135179563142e-06, + "loss": 0.3163, + "step": 45753 + }, + { + "epoch": 0.7906068565109207, + "grad_norm": 1.0716297204490686, + "learning_rate": 2.21286239318786e-06, + "loss": 0.4665, + "step": 45754 + }, + { + "epoch": 0.7906241360243296, + "grad_norm": 1.0859867801403031, + "learning_rate": 2.2125112928093094e-06, + "loss": 0.3712, + "step": 45755 + }, + { + "epoch": 0.7906414155377385, + "grad_norm": 1.1416617707345043, + "learning_rate": 2.2121602168217684e-06, + "loss": 0.3551, + "step": 45756 + }, + { + "epoch": 0.7906586950511474, + "grad_norm": 0.9822564356406591, + "learning_rate": 2.211809165226331e-06, + "loss": 0.2931, + "step": 45757 + }, + { + "epoch": 0.7906759745645563, + "grad_norm": 1.323230723172795, + "learning_rate": 2.2114581380240997e-06, + "loss": 0.3385, + "step": 45758 + }, + { + "epoch": 0.7906932540779652, + "grad_norm": 1.084184971726322, + "learning_rate": 2.2111071352161717e-06, + "loss": 0.3202, + "step": 45759 + }, + { + "epoch": 0.7907105335913741, + "grad_norm": 1.3455288909394771, + "learning_rate": 2.2107561568036516e-06, + "loss": 0.341, + "step": 45760 + }, + { + "epoch": 0.790727813104783, + "grad_norm": 1.0010541089743732, + "learning_rate": 2.210405202787633e-06, + "loss": 0.5781, + "step": 45761 + }, + { + "epoch": 0.7907450926181919, + "grad_norm": 1.3895711545470975, + "learning_rate": 2.2100542731692197e-06, + "loss": 0.3293, + "step": 45762 + }, + { + "epoch": 0.7907623721316007, + "grad_norm": 1.1366918918751059, + "learning_rate": 2.2097033679495085e-06, + "loss": 0.3259, + "step": 45763 + }, + { + "epoch": 0.7907796516450096, + "grad_norm": 1.3329486579851708, + "learning_rate": 2.2093524871295936e-06, + "loss": 0.2521, + "step": 45764 + }, + { + "epoch": 0.7907969311584185, + "grad_norm": 1.258559107139376, + "learning_rate": 2.2090016307105833e-06, + "loss": 0.457, + "step": 45765 + }, + { + "epoch": 0.7908142106718274, + "grad_norm": 1.5919026219774275, + "learning_rate": 2.208650798693569e-06, + "loss": 0.317, + "step": 45766 + }, + { + "epoch": 0.7908314901852364, + "grad_norm": 1.305308109899893, + "learning_rate": 2.2082999910796564e-06, + "loss": 0.3642, + "step": 45767 + }, + { + "epoch": 0.7908487696986453, + "grad_norm": 1.747769922979117, + "learning_rate": 2.2079492078699362e-06, + "loss": 0.2822, + "step": 45768 + }, + { + "epoch": 0.7908660492120542, + "grad_norm": 2.155089751431456, + "learning_rate": 2.2075984490655155e-06, + "loss": 0.375, + "step": 45769 + }, + { + "epoch": 0.7908833287254631, + "grad_norm": 1.3102167525801482, + "learning_rate": 2.207247714667482e-06, + "loss": 0.4376, + "step": 45770 + }, + { + "epoch": 0.790900608238872, + "grad_norm": 1.2720120062537206, + "learning_rate": 2.2068970046769468e-06, + "loss": 0.261, + "step": 45771 + }, + { + "epoch": 0.7909178877522809, + "grad_norm": 1.669882898093869, + "learning_rate": 2.206546319095002e-06, + "loss": 0.4502, + "step": 45772 + }, + { + "epoch": 0.7909351672656898, + "grad_norm": 1.394878023790283, + "learning_rate": 2.206195657922745e-06, + "loss": 0.3603, + "step": 45773 + }, + { + "epoch": 0.7909524467790987, + "grad_norm": 1.4993029046074469, + "learning_rate": 2.205845021161277e-06, + "loss": 0.4192, + "step": 45774 + }, + { + "epoch": 0.7909697262925076, + "grad_norm": 1.867361227451479, + "learning_rate": 2.2054944088116925e-06, + "loss": 0.4218, + "step": 45775 + }, + { + "epoch": 0.7909870058059165, + "grad_norm": 0.9305916692530142, + "learning_rate": 2.2051438208750918e-06, + "loss": 0.3728, + "step": 45776 + }, + { + "epoch": 0.7910042853193254, + "grad_norm": 0.6708217881120325, + "learning_rate": 2.204793257352572e-06, + "loss": 0.929, + "step": 45777 + }, + { + "epoch": 0.7910215648327343, + "grad_norm": 1.332120554751839, + "learning_rate": 2.2044427182452366e-06, + "loss": 0.26, + "step": 45778 + }, + { + "epoch": 0.7910388443461432, + "grad_norm": 2.068442465601214, + "learning_rate": 2.2040922035541755e-06, + "loss": 0.3381, + "step": 45779 + }, + { + "epoch": 0.7910561238595522, + "grad_norm": 1.385005374553752, + "learning_rate": 2.2037417132804926e-06, + "loss": 0.2532, + "step": 45780 + }, + { + "epoch": 0.7910734033729611, + "grad_norm": 1.3974852318146196, + "learning_rate": 2.2033912474252805e-06, + "loss": 0.4106, + "step": 45781 + }, + { + "epoch": 0.79109068288637, + "grad_norm": 1.3660629369503932, + "learning_rate": 2.2030408059896403e-06, + "loss": 0.3167, + "step": 45782 + }, + { + "epoch": 0.7911079623997789, + "grad_norm": 1.3070313659229622, + "learning_rate": 2.202690388974671e-06, + "loss": 0.5598, + "step": 45783 + }, + { + "epoch": 0.7911252419131877, + "grad_norm": 1.231798358433879, + "learning_rate": 2.202339996381465e-06, + "loss": 0.3802, + "step": 45784 + }, + { + "epoch": 0.7911425214265966, + "grad_norm": 1.1062726767968787, + "learning_rate": 2.2019896282111254e-06, + "loss": 0.2829, + "step": 45785 + }, + { + "epoch": 0.7911598009400055, + "grad_norm": 1.2265838072878814, + "learning_rate": 2.201639284464745e-06, + "loss": 0.4244, + "step": 45786 + }, + { + "epoch": 0.7911770804534144, + "grad_norm": 1.272566941909122, + "learning_rate": 2.2012889651434223e-06, + "loss": 0.3324, + "step": 45787 + }, + { + "epoch": 0.7911943599668233, + "grad_norm": 1.0507842392232993, + "learning_rate": 2.200938670248255e-06, + "loss": 0.4016, + "step": 45788 + }, + { + "epoch": 0.7912116394802322, + "grad_norm": 1.4689825170558228, + "learning_rate": 2.2005883997803436e-06, + "loss": 0.3095, + "step": 45789 + }, + { + "epoch": 0.7912289189936411, + "grad_norm": 1.4163751717193034, + "learning_rate": 2.2002381537407802e-06, + "loss": 0.3063, + "step": 45790 + }, + { + "epoch": 0.79124619850705, + "grad_norm": 1.09280299638559, + "learning_rate": 2.1998879321306656e-06, + "loss": 0.4005, + "step": 45791 + }, + { + "epoch": 0.7912634780204589, + "grad_norm": 1.4572931951416168, + "learning_rate": 2.1995377349510915e-06, + "loss": 0.3195, + "step": 45792 + }, + { + "epoch": 0.7912807575338678, + "grad_norm": 1.1843847394387805, + "learning_rate": 2.1991875622031588e-06, + "loss": 0.4423, + "step": 45793 + }, + { + "epoch": 0.7912980370472767, + "grad_norm": 1.3749465284906, + "learning_rate": 2.1988374138879655e-06, + "loss": 0.4059, + "step": 45794 + }, + { + "epoch": 0.7913153165606857, + "grad_norm": 1.2108688123759614, + "learning_rate": 2.1984872900066044e-06, + "loss": 0.3812, + "step": 45795 + }, + { + "epoch": 0.7913325960740946, + "grad_norm": 1.8499821844555275, + "learning_rate": 2.1981371905601765e-06, + "loss": 0.4832, + "step": 45796 + }, + { + "epoch": 0.7913498755875035, + "grad_norm": 1.7769507273985468, + "learning_rate": 2.197787115549773e-06, + "loss": 0.3028, + "step": 45797 + }, + { + "epoch": 0.7913671551009124, + "grad_norm": 2.0953285414957277, + "learning_rate": 2.197437064976493e-06, + "loss": 0.2572, + "step": 45798 + }, + { + "epoch": 0.7913844346143213, + "grad_norm": 1.2378751612518142, + "learning_rate": 2.1970870388414335e-06, + "loss": 0.3122, + "step": 45799 + }, + { + "epoch": 0.7914017141277302, + "grad_norm": 1.1220968828697089, + "learning_rate": 2.196737037145693e-06, + "loss": 0.5125, + "step": 45800 + }, + { + "epoch": 0.7914189936411391, + "grad_norm": 1.653423674930924, + "learning_rate": 2.1963870598903612e-06, + "loss": 0.259, + "step": 45801 + }, + { + "epoch": 0.791436273154548, + "grad_norm": 1.5735661282140683, + "learning_rate": 2.196037107076543e-06, + "loss": 0.3475, + "step": 45802 + }, + { + "epoch": 0.7914535526679569, + "grad_norm": 1.669325449932151, + "learning_rate": 2.195687178705328e-06, + "loss": 0.3395, + "step": 45803 + }, + { + "epoch": 0.7914708321813658, + "grad_norm": 1.2225163796699958, + "learning_rate": 2.195337274777809e-06, + "loss": 0.3857, + "step": 45804 + }, + { + "epoch": 0.7914881116947746, + "grad_norm": 1.7966468681149286, + "learning_rate": 2.194987395295093e-06, + "loss": 0.3968, + "step": 45805 + }, + { + "epoch": 0.7915053912081835, + "grad_norm": 1.1182946607639292, + "learning_rate": 2.1946375402582666e-06, + "loss": 0.4067, + "step": 45806 + }, + { + "epoch": 0.7915226707215924, + "grad_norm": 1.1351681689495918, + "learning_rate": 2.1942877096684302e-06, + "loss": 0.3169, + "step": 45807 + }, + { + "epoch": 0.7915399502350013, + "grad_norm": 1.6773049071950565, + "learning_rate": 2.193937903526676e-06, + "loss": 0.5609, + "step": 45808 + }, + { + "epoch": 0.7915572297484103, + "grad_norm": 2.198506849539558, + "learning_rate": 2.193588121834104e-06, + "loss": 0.4555, + "step": 45809 + }, + { + "epoch": 0.7915745092618192, + "grad_norm": 1.171195984343308, + "learning_rate": 2.193238364591802e-06, + "loss": 0.2408, + "step": 45810 + }, + { + "epoch": 0.7915917887752281, + "grad_norm": 0.786366288704427, + "learning_rate": 2.192888631800877e-06, + "loss": 0.2299, + "step": 45811 + }, + { + "epoch": 0.791609068288637, + "grad_norm": 1.0781364567439433, + "learning_rate": 2.192538923462414e-06, + "loss": 0.4031, + "step": 45812 + }, + { + "epoch": 0.7916263478020459, + "grad_norm": 2.1151611512749793, + "learning_rate": 2.1921892395775155e-06, + "loss": 0.2783, + "step": 45813 + }, + { + "epoch": 0.7916436273154548, + "grad_norm": 1.276999553978389, + "learning_rate": 2.191839580147275e-06, + "loss": 0.6125, + "step": 45814 + }, + { + "epoch": 0.7916609068288637, + "grad_norm": 1.2090704937425805, + "learning_rate": 2.1914899451727824e-06, + "loss": 0.2815, + "step": 45815 + }, + { + "epoch": 0.7916781863422726, + "grad_norm": 1.687742694995912, + "learning_rate": 2.1911403346551364e-06, + "loss": 0.2994, + "step": 45816 + }, + { + "epoch": 0.7916954658556815, + "grad_norm": 1.1569707622162189, + "learning_rate": 2.1907907485954326e-06, + "loss": 0.1929, + "step": 45817 + }, + { + "epoch": 0.7917127453690904, + "grad_norm": 1.2209889238111284, + "learning_rate": 2.1904411869947685e-06, + "loss": 0.3013, + "step": 45818 + }, + { + "epoch": 0.7917300248824993, + "grad_norm": 1.3875306280862276, + "learning_rate": 2.1900916498542337e-06, + "loss": 0.4405, + "step": 45819 + }, + { + "epoch": 0.7917473043959082, + "grad_norm": 1.290492027104213, + "learning_rate": 2.1897421371749274e-06, + "loss": 0.3155, + "step": 45820 + }, + { + "epoch": 0.7917645839093171, + "grad_norm": 1.2193884752436277, + "learning_rate": 2.18939264895794e-06, + "loss": 0.3655, + "step": 45821 + }, + { + "epoch": 0.791781863422726, + "grad_norm": 1.33890006646756, + "learning_rate": 2.1890431852043683e-06, + "loss": 0.5006, + "step": 45822 + }, + { + "epoch": 0.791799142936135, + "grad_norm": 1.3424920998148548, + "learning_rate": 2.1886937459153067e-06, + "loss": 0.3044, + "step": 45823 + }, + { + "epoch": 0.7918164224495439, + "grad_norm": 2.348585170529075, + "learning_rate": 2.188344331091854e-06, + "loss": 0.3207, + "step": 45824 + }, + { + "epoch": 0.7918337019629528, + "grad_norm": 1.0724021955757552, + "learning_rate": 2.1879949407350987e-06, + "loss": 0.4051, + "step": 45825 + }, + { + "epoch": 0.7918509814763616, + "grad_norm": 1.376896249176509, + "learning_rate": 2.1876455748461355e-06, + "loss": 0.2734, + "step": 45826 + }, + { + "epoch": 0.7918682609897705, + "grad_norm": 1.350604934407356, + "learning_rate": 2.1872962334260593e-06, + "loss": 0.289, + "step": 45827 + }, + { + "epoch": 0.7918855405031794, + "grad_norm": 1.8173881228399313, + "learning_rate": 2.186946916475966e-06, + "loss": 0.3636, + "step": 45828 + }, + { + "epoch": 0.7919028200165883, + "grad_norm": 1.2036868181442777, + "learning_rate": 2.18659762399695e-06, + "loss": 0.4642, + "step": 45829 + }, + { + "epoch": 0.7919200995299972, + "grad_norm": 1.4316074558235186, + "learning_rate": 2.186248355990103e-06, + "loss": 0.3612, + "step": 45830 + }, + { + "epoch": 0.7919373790434061, + "grad_norm": 1.2074178238668656, + "learning_rate": 2.185899112456521e-06, + "loss": 0.4672, + "step": 45831 + }, + { + "epoch": 0.791954658556815, + "grad_norm": 1.0648072690685906, + "learning_rate": 2.185549893397295e-06, + "loss": 0.5928, + "step": 45832 + }, + { + "epoch": 0.7919719380702239, + "grad_norm": 1.3195104716116326, + "learning_rate": 2.185200698813521e-06, + "loss": 0.5103, + "step": 45833 + }, + { + "epoch": 0.7919892175836328, + "grad_norm": 1.0769321878659874, + "learning_rate": 2.184851528706294e-06, + "loss": 0.338, + "step": 45834 + }, + { + "epoch": 0.7920064970970417, + "grad_norm": 0.5526467299510944, + "learning_rate": 2.184502383076704e-06, + "loss": 0.5653, + "step": 45835 + }, + { + "epoch": 0.7920237766104506, + "grad_norm": 1.3364544390572581, + "learning_rate": 2.1841532619258497e-06, + "loss": 0.3329, + "step": 45836 + }, + { + "epoch": 0.7920410561238596, + "grad_norm": 0.6925425424167838, + "learning_rate": 2.183804165254818e-06, + "loss": 0.516, + "step": 45837 + }, + { + "epoch": 0.7920583356372685, + "grad_norm": 1.212750978748311, + "learning_rate": 2.183455093064705e-06, + "loss": 0.2943, + "step": 45838 + }, + { + "epoch": 0.7920756151506774, + "grad_norm": 1.2718027682405364, + "learning_rate": 2.1831060453566057e-06, + "loss": 0.324, + "step": 45839 + }, + { + "epoch": 0.7920928946640863, + "grad_norm": 1.0474646354317676, + "learning_rate": 2.182757022131614e-06, + "loss": 0.4966, + "step": 45840 + }, + { + "epoch": 0.7921101741774952, + "grad_norm": 0.7150739874974293, + "learning_rate": 2.1824080233908198e-06, + "loss": 0.7257, + "step": 45841 + }, + { + "epoch": 0.7921274536909041, + "grad_norm": 1.0689023436406568, + "learning_rate": 2.18205904913532e-06, + "loss": 0.2865, + "step": 45842 + }, + { + "epoch": 0.792144733204313, + "grad_norm": 0.9506095262689184, + "learning_rate": 2.1817100993662034e-06, + "loss": 0.3822, + "step": 45843 + }, + { + "epoch": 0.7921620127177219, + "grad_norm": 1.1018171739815747, + "learning_rate": 2.1813611740845643e-06, + "loss": 0.3665, + "step": 45844 + }, + { + "epoch": 0.7921792922311308, + "grad_norm": 1.1658793023738028, + "learning_rate": 2.1810122732914982e-06, + "loss": 0.577, + "step": 45845 + }, + { + "epoch": 0.7921965717445397, + "grad_norm": 1.3598189740698223, + "learning_rate": 2.1806633969880944e-06, + "loss": 0.3125, + "step": 45846 + }, + { + "epoch": 0.7922138512579486, + "grad_norm": 1.1744295815734582, + "learning_rate": 2.1803145451754483e-06, + "loss": 0.3408, + "step": 45847 + }, + { + "epoch": 0.7922311307713574, + "grad_norm": 1.42176365519821, + "learning_rate": 2.17996571785465e-06, + "loss": 0.2312, + "step": 45848 + }, + { + "epoch": 0.7922484102847663, + "grad_norm": 0.9975867085799297, + "learning_rate": 2.1796169150267954e-06, + "loss": 0.3382, + "step": 45849 + }, + { + "epoch": 0.7922656897981752, + "grad_norm": 1.203083567942457, + "learning_rate": 2.17926813669297e-06, + "loss": 0.2184, + "step": 45850 + }, + { + "epoch": 0.7922829693115842, + "grad_norm": 1.0369837544063216, + "learning_rate": 2.1789193828542766e-06, + "loss": 0.519, + "step": 45851 + }, + { + "epoch": 0.7923002488249931, + "grad_norm": 1.1049099447268607, + "learning_rate": 2.178570653511799e-06, + "loss": 0.3551, + "step": 45852 + }, + { + "epoch": 0.792317528338402, + "grad_norm": 0.9074462745065632, + "learning_rate": 2.1782219486666354e-06, + "loss": 0.3837, + "step": 45853 + }, + { + "epoch": 0.7923348078518109, + "grad_norm": 1.5387465294581104, + "learning_rate": 2.177873268319872e-06, + "loss": 0.368, + "step": 45854 + }, + { + "epoch": 0.7923520873652198, + "grad_norm": 1.7128893745977083, + "learning_rate": 2.177524612472607e-06, + "loss": 0.2147, + "step": 45855 + }, + { + "epoch": 0.7923693668786287, + "grad_norm": 1.1455361096386847, + "learning_rate": 2.1771759811259264e-06, + "loss": 0.4052, + "step": 45856 + }, + { + "epoch": 0.7923866463920376, + "grad_norm": 1.2153500859726116, + "learning_rate": 2.1768273742809253e-06, + "loss": 0.363, + "step": 45857 + }, + { + "epoch": 0.7924039259054465, + "grad_norm": 1.523301958061039, + "learning_rate": 2.1764787919386985e-06, + "loss": 0.4036, + "step": 45858 + }, + { + "epoch": 0.7924212054188554, + "grad_norm": 0.9159124619061031, + "learning_rate": 2.176130234100332e-06, + "loss": 0.2911, + "step": 45859 + }, + { + "epoch": 0.7924384849322643, + "grad_norm": 0.5785345128068515, + "learning_rate": 2.1757817007669224e-06, + "loss": 0.6042, + "step": 45860 + }, + { + "epoch": 0.7924557644456732, + "grad_norm": 1.41860273657057, + "learning_rate": 2.1754331919395567e-06, + "loss": 0.2277, + "step": 45861 + }, + { + "epoch": 0.7924730439590821, + "grad_norm": 1.2144794903260503, + "learning_rate": 2.175084707619329e-06, + "loss": 0.2967, + "step": 45862 + }, + { + "epoch": 0.792490323472491, + "grad_norm": 1.218177679810435, + "learning_rate": 2.174736247807331e-06, + "loss": 0.2926, + "step": 45863 + }, + { + "epoch": 0.7925076029859, + "grad_norm": 0.9654804129562137, + "learning_rate": 2.1743878125046556e-06, + "loss": 0.243, + "step": 45864 + }, + { + "epoch": 0.7925248824993089, + "grad_norm": 0.8035692847577984, + "learning_rate": 2.1740394017123924e-06, + "loss": 0.4806, + "step": 45865 + }, + { + "epoch": 0.7925421620127178, + "grad_norm": 0.9342424051194527, + "learning_rate": 2.1736910154316304e-06, + "loss": 0.2509, + "step": 45866 + }, + { + "epoch": 0.7925594415261267, + "grad_norm": 0.9956738553605612, + "learning_rate": 2.1733426536634627e-06, + "loss": 0.5357, + "step": 45867 + }, + { + "epoch": 0.7925767210395356, + "grad_norm": 0.48424088501645274, + "learning_rate": 2.1729943164089805e-06, + "loss": 0.8083, + "step": 45868 + }, + { + "epoch": 0.7925940005529444, + "grad_norm": 1.4466770986455595, + "learning_rate": 2.1726460036692774e-06, + "loss": 0.3874, + "step": 45869 + }, + { + "epoch": 0.7926112800663533, + "grad_norm": 1.7037079080788877, + "learning_rate": 2.1722977154454396e-06, + "loss": 0.2502, + "step": 45870 + }, + { + "epoch": 0.7926285595797622, + "grad_norm": 1.1724697704898694, + "learning_rate": 2.171949451738563e-06, + "loss": 0.5114, + "step": 45871 + }, + { + "epoch": 0.7926458390931711, + "grad_norm": 2.0304663606522846, + "learning_rate": 2.1716012125497323e-06, + "loss": 0.3942, + "step": 45872 + }, + { + "epoch": 0.79266311860658, + "grad_norm": 1.220098098378616, + "learning_rate": 2.171252997880042e-06, + "loss": 0.424, + "step": 45873 + }, + { + "epoch": 0.7926803981199889, + "grad_norm": 1.8638226031220186, + "learning_rate": 2.170904807730583e-06, + "loss": 0.5335, + "step": 45874 + }, + { + "epoch": 0.7926976776333978, + "grad_norm": 1.900536891845424, + "learning_rate": 2.170556642102446e-06, + "loss": 0.5712, + "step": 45875 + }, + { + "epoch": 0.7927149571468067, + "grad_norm": 1.750917792750934, + "learning_rate": 2.170208500996722e-06, + "loss": 0.3029, + "step": 45876 + }, + { + "epoch": 0.7927322366602156, + "grad_norm": 1.261849726300141, + "learning_rate": 2.1698603844144973e-06, + "loss": 0.4033, + "step": 45877 + }, + { + "epoch": 0.7927495161736245, + "grad_norm": 1.2589026845922406, + "learning_rate": 2.1695122923568646e-06, + "loss": 0.3425, + "step": 45878 + }, + { + "epoch": 0.7927667956870335, + "grad_norm": 1.1958723435543919, + "learning_rate": 2.1691642248249145e-06, + "loss": 0.5273, + "step": 45879 + }, + { + "epoch": 0.7927840752004424, + "grad_norm": 0.5478089204754891, + "learning_rate": 2.1688161818197395e-06, + "loss": 0.4515, + "step": 45880 + }, + { + "epoch": 0.7928013547138513, + "grad_norm": 1.2234067023427007, + "learning_rate": 2.1684681633424252e-06, + "loss": 0.2499, + "step": 45881 + }, + { + "epoch": 0.7928186342272602, + "grad_norm": 1.5619671476085262, + "learning_rate": 2.1681201693940667e-06, + "loss": 0.5027, + "step": 45882 + }, + { + "epoch": 0.7928359137406691, + "grad_norm": 0.955155995210045, + "learning_rate": 2.167772199975747e-06, + "loss": 0.6732, + "step": 45883 + }, + { + "epoch": 0.792853193254078, + "grad_norm": 1.7669537117604457, + "learning_rate": 2.1674242550885617e-06, + "loss": 0.3115, + "step": 45884 + }, + { + "epoch": 0.7928704727674869, + "grad_norm": 1.4045011956132536, + "learning_rate": 2.167076334733601e-06, + "loss": 0.3075, + "step": 45885 + }, + { + "epoch": 0.7928877522808958, + "grad_norm": 0.9962165102367128, + "learning_rate": 2.166728438911949e-06, + "loss": 0.3515, + "step": 45886 + }, + { + "epoch": 0.7929050317943047, + "grad_norm": 2.595570047432436, + "learning_rate": 2.166380567624703e-06, + "loss": 0.2104, + "step": 45887 + }, + { + "epoch": 0.7929223113077136, + "grad_norm": 1.6813096195431152, + "learning_rate": 2.1660327208729447e-06, + "loss": 0.2892, + "step": 45888 + }, + { + "epoch": 0.7929395908211225, + "grad_norm": 1.438256360005643, + "learning_rate": 2.1656848986577706e-06, + "loss": 0.2783, + "step": 45889 + }, + { + "epoch": 0.7929568703345313, + "grad_norm": 1.2920475647284166, + "learning_rate": 2.1653371009802604e-06, + "loss": 0.3988, + "step": 45890 + }, + { + "epoch": 0.7929741498479402, + "grad_norm": 1.2014874634548134, + "learning_rate": 2.1649893278415158e-06, + "loss": 0.5925, + "step": 45891 + }, + { + "epoch": 0.7929914293613491, + "grad_norm": 1.7285862785973019, + "learning_rate": 2.164641579242618e-06, + "loss": 0.1122, + "step": 45892 + }, + { + "epoch": 0.793008708874758, + "grad_norm": 1.0659825756853674, + "learning_rate": 2.1642938551846604e-06, + "loss": 0.2757, + "step": 45893 + }, + { + "epoch": 0.793025988388167, + "grad_norm": 1.6460027525461454, + "learning_rate": 2.163946155668727e-06, + "loss": 0.4425, + "step": 45894 + }, + { + "epoch": 0.7930432679015759, + "grad_norm": 1.3097869036437801, + "learning_rate": 2.163598480695912e-06, + "loss": 0.4057, + "step": 45895 + }, + { + "epoch": 0.7930605474149848, + "grad_norm": 0.9453976931893877, + "learning_rate": 2.1632508302673006e-06, + "loss": 0.5391, + "step": 45896 + }, + { + "epoch": 0.7930778269283937, + "grad_norm": 1.5262291329418283, + "learning_rate": 2.1629032043839827e-06, + "loss": 0.3245, + "step": 45897 + }, + { + "epoch": 0.7930951064418026, + "grad_norm": 1.7740228707397785, + "learning_rate": 2.1625556030470496e-06, + "loss": 0.3555, + "step": 45898 + }, + { + "epoch": 0.7931123859552115, + "grad_norm": 1.2912865427584188, + "learning_rate": 2.162208026257586e-06, + "loss": 0.2529, + "step": 45899 + }, + { + "epoch": 0.7931296654686204, + "grad_norm": 1.100267248202432, + "learning_rate": 2.161860474016685e-06, + "loss": 0.1817, + "step": 45900 + }, + { + "epoch": 0.7931469449820293, + "grad_norm": 1.2291426505196548, + "learning_rate": 2.16151294632543e-06, + "loss": 0.2438, + "step": 45901 + }, + { + "epoch": 0.7931642244954382, + "grad_norm": 1.6459761498298437, + "learning_rate": 2.161165443184912e-06, + "loss": 0.3311, + "step": 45902 + }, + { + "epoch": 0.7931815040088471, + "grad_norm": 0.9450506210502662, + "learning_rate": 2.160817964596219e-06, + "loss": 0.2184, + "step": 45903 + }, + { + "epoch": 0.793198783522256, + "grad_norm": 1.4877558271545275, + "learning_rate": 2.160470510560444e-06, + "loss": 0.2881, + "step": 45904 + }, + { + "epoch": 0.7932160630356649, + "grad_norm": 0.8425764656577028, + "learning_rate": 2.160123081078668e-06, + "loss": 0.167, + "step": 45905 + }, + { + "epoch": 0.7932333425490738, + "grad_norm": 1.5095190166947094, + "learning_rate": 2.159775676151984e-06, + "loss": 0.3454, + "step": 45906 + }, + { + "epoch": 0.7932506220624828, + "grad_norm": 1.2496021741604986, + "learning_rate": 2.159428295781477e-06, + "loss": 0.2713, + "step": 45907 + }, + { + "epoch": 0.7932679015758917, + "grad_norm": 1.31570194955152, + "learning_rate": 2.1590809399682355e-06, + "loss": 0.4436, + "step": 45908 + }, + { + "epoch": 0.7932851810893006, + "grad_norm": 1.1963602027986944, + "learning_rate": 2.1587336087133514e-06, + "loss": 0.4423, + "step": 45909 + }, + { + "epoch": 0.7933024606027095, + "grad_norm": 1.0994348187027783, + "learning_rate": 2.1583863020179076e-06, + "loss": 0.2279, + "step": 45910 + }, + { + "epoch": 0.7933197401161183, + "grad_norm": 1.3308172382838663, + "learning_rate": 2.158039019882996e-06, + "loss": 0.2447, + "step": 45911 + }, + { + "epoch": 0.7933370196295272, + "grad_norm": 0.7961440063507675, + "learning_rate": 2.1576917623096995e-06, + "loss": 0.3807, + "step": 45912 + }, + { + "epoch": 0.7933542991429361, + "grad_norm": 2.1588378378063484, + "learning_rate": 2.157344529299108e-06, + "loss": 0.1878, + "step": 45913 + }, + { + "epoch": 0.793371578656345, + "grad_norm": 1.7966237206284121, + "learning_rate": 2.15699732085231e-06, + "loss": 0.2321, + "step": 45914 + }, + { + "epoch": 0.7933888581697539, + "grad_norm": 1.490665688448071, + "learning_rate": 2.156650136970394e-06, + "loss": 0.2967, + "step": 45915 + }, + { + "epoch": 0.7934061376831628, + "grad_norm": 1.2693007462335726, + "learning_rate": 2.1563029776544474e-06, + "loss": 0.4408, + "step": 45916 + }, + { + "epoch": 0.7934234171965717, + "grad_norm": 1.0301064888324996, + "learning_rate": 2.155955842905552e-06, + "loss": 0.3871, + "step": 45917 + }, + { + "epoch": 0.7934406967099806, + "grad_norm": 1.4341577468282112, + "learning_rate": 2.1556087327248e-06, + "loss": 0.3211, + "step": 45918 + }, + { + "epoch": 0.7934579762233895, + "grad_norm": 1.865791533157262, + "learning_rate": 2.155261647113277e-06, + "loss": 0.4412, + "step": 45919 + }, + { + "epoch": 0.7934752557367984, + "grad_norm": 1.2913783033917878, + "learning_rate": 2.1549145860720733e-06, + "loss": 0.4574, + "step": 45920 + }, + { + "epoch": 0.7934925352502074, + "grad_norm": 1.6294449196115686, + "learning_rate": 2.154567549602271e-06, + "loss": 0.3141, + "step": 45921 + }, + { + "epoch": 0.7935098147636163, + "grad_norm": 1.2786734404522788, + "learning_rate": 2.1542205377049617e-06, + "loss": 0.3746, + "step": 45922 + }, + { + "epoch": 0.7935270942770252, + "grad_norm": 1.4031480098989115, + "learning_rate": 2.153873550381228e-06, + "loss": 0.2493, + "step": 45923 + }, + { + "epoch": 0.7935443737904341, + "grad_norm": 1.2036962258897483, + "learning_rate": 2.1535265876321574e-06, + "loss": 0.3903, + "step": 45924 + }, + { + "epoch": 0.793561653303843, + "grad_norm": 0.818113563386775, + "learning_rate": 2.1531796494588388e-06, + "loss": 0.2938, + "step": 45925 + }, + { + "epoch": 0.7935789328172519, + "grad_norm": 1.4844229242605722, + "learning_rate": 2.1528327358623602e-06, + "loss": 0.2932, + "step": 45926 + }, + { + "epoch": 0.7935962123306608, + "grad_norm": 1.0211013056820557, + "learning_rate": 2.152485846843806e-06, + "loss": 0.7851, + "step": 45927 + }, + { + "epoch": 0.7936134918440697, + "grad_norm": 1.9390492947298916, + "learning_rate": 2.1521389824042603e-06, + "loss": 0.382, + "step": 45928 + }, + { + "epoch": 0.7936307713574786, + "grad_norm": 1.4517728859338372, + "learning_rate": 2.151792142544814e-06, + "loss": 0.2251, + "step": 45929 + }, + { + "epoch": 0.7936480508708875, + "grad_norm": 1.0815894193394022, + "learning_rate": 2.151445327266548e-06, + "loss": 0.4725, + "step": 45930 + }, + { + "epoch": 0.7936653303842964, + "grad_norm": 1.3132899543003516, + "learning_rate": 2.151098536570552e-06, + "loss": 0.3325, + "step": 45931 + }, + { + "epoch": 0.7936826098977052, + "grad_norm": 0.8864612527700079, + "learning_rate": 2.150751770457912e-06, + "loss": 0.3279, + "step": 45932 + }, + { + "epoch": 0.7936998894111141, + "grad_norm": 1.397322009664695, + "learning_rate": 2.1504050289297163e-06, + "loss": 0.5304, + "step": 45933 + }, + { + "epoch": 0.793717168924523, + "grad_norm": 1.072405161322891, + "learning_rate": 2.1500583119870467e-06, + "loss": 0.3686, + "step": 45934 + }, + { + "epoch": 0.793734448437932, + "grad_norm": 1.079188520254514, + "learning_rate": 2.1497116196309932e-06, + "loss": 0.4163, + "step": 45935 + }, + { + "epoch": 0.7937517279513409, + "grad_norm": 1.2741464907471194, + "learning_rate": 2.149364951862637e-06, + "loss": 0.3096, + "step": 45936 + }, + { + "epoch": 0.7937690074647498, + "grad_norm": 1.4297983002238224, + "learning_rate": 2.1490183086830664e-06, + "loss": 0.3424, + "step": 45937 + }, + { + "epoch": 0.7937862869781587, + "grad_norm": 2.0254404252855593, + "learning_rate": 2.148671690093369e-06, + "loss": 0.2143, + "step": 45938 + }, + { + "epoch": 0.7938035664915676, + "grad_norm": 1.5302821055342044, + "learning_rate": 2.1483250960946265e-06, + "loss": 0.7969, + "step": 45939 + }, + { + "epoch": 0.7938208460049765, + "grad_norm": 1.2914327853505658, + "learning_rate": 2.1479785266879295e-06, + "loss": 0.4088, + "step": 45940 + }, + { + "epoch": 0.7938381255183854, + "grad_norm": 1.2531407843104974, + "learning_rate": 2.1476319818743572e-06, + "loss": 0.4113, + "step": 45941 + }, + { + "epoch": 0.7938554050317943, + "grad_norm": 1.6032293021178425, + "learning_rate": 2.1472854616549986e-06, + "loss": 0.3762, + "step": 45942 + }, + { + "epoch": 0.7938726845452032, + "grad_norm": 1.3997209352601894, + "learning_rate": 2.1469389660309394e-06, + "loss": 0.3006, + "step": 45943 + }, + { + "epoch": 0.7938899640586121, + "grad_norm": 1.7719684689119473, + "learning_rate": 2.1465924950032656e-06, + "loss": 0.3522, + "step": 45944 + }, + { + "epoch": 0.793907243572021, + "grad_norm": 1.1716980335499383, + "learning_rate": 2.1462460485730598e-06, + "loss": 0.2912, + "step": 45945 + }, + { + "epoch": 0.7939245230854299, + "grad_norm": 2.116517386191436, + "learning_rate": 2.145899626741409e-06, + "loss": 0.1733, + "step": 45946 + }, + { + "epoch": 0.7939418025988388, + "grad_norm": 1.1425645570888907, + "learning_rate": 2.145553229509396e-06, + "loss": 0.2863, + "step": 45947 + }, + { + "epoch": 0.7939590821122477, + "grad_norm": 1.0063310028587498, + "learning_rate": 2.145206856878107e-06, + "loss": 0.1689, + "step": 45948 + }, + { + "epoch": 0.7939763616256567, + "grad_norm": 1.665510264769164, + "learning_rate": 2.1448605088486295e-06, + "loss": 0.368, + "step": 45949 + }, + { + "epoch": 0.7939936411390656, + "grad_norm": 1.2012812926409433, + "learning_rate": 2.144514185422044e-06, + "loss": 0.419, + "step": 45950 + }, + { + "epoch": 0.7940109206524745, + "grad_norm": 0.7106905597767456, + "learning_rate": 2.1441678865994388e-06, + "loss": 0.9468, + "step": 45951 + }, + { + "epoch": 0.7940282001658834, + "grad_norm": 1.328763504370882, + "learning_rate": 2.1438216123818943e-06, + "loss": 0.5781, + "step": 45952 + }, + { + "epoch": 0.7940454796792922, + "grad_norm": 1.0083065757330858, + "learning_rate": 2.1434753627704973e-06, + "loss": 0.3606, + "step": 45953 + }, + { + "epoch": 0.7940627591927011, + "grad_norm": 1.336943562461192, + "learning_rate": 2.143129137766333e-06, + "loss": 0.2561, + "step": 45954 + }, + { + "epoch": 0.79408003870611, + "grad_norm": 1.2661044663340986, + "learning_rate": 2.142782937370488e-06, + "loss": 0.2984, + "step": 45955 + }, + { + "epoch": 0.7940973182195189, + "grad_norm": 1.5751433062899503, + "learning_rate": 2.1424367615840414e-06, + "loss": 0.3028, + "step": 45956 + }, + { + "epoch": 0.7941145977329278, + "grad_norm": 1.3196738309525038, + "learning_rate": 2.142090610408083e-06, + "loss": 0.4017, + "step": 45957 + }, + { + "epoch": 0.7941318772463367, + "grad_norm": 1.8823727026368426, + "learning_rate": 2.141744483843691e-06, + "loss": 0.2601, + "step": 45958 + }, + { + "epoch": 0.7941491567597456, + "grad_norm": 1.2567530497990693, + "learning_rate": 2.1413983818919514e-06, + "loss": 0.3131, + "step": 45959 + }, + { + "epoch": 0.7941664362731545, + "grad_norm": 0.84748537745606, + "learning_rate": 2.141052304553953e-06, + "loss": 0.3265, + "step": 45960 + }, + { + "epoch": 0.7941837157865634, + "grad_norm": 1.738160361099063, + "learning_rate": 2.140706251830773e-06, + "loss": 0.3068, + "step": 45961 + }, + { + "epoch": 0.7942009952999723, + "grad_norm": 1.3494566126513607, + "learning_rate": 2.140360223723502e-06, + "loss": 0.2768, + "step": 45962 + }, + { + "epoch": 0.7942182748133813, + "grad_norm": 1.2687231544767446, + "learning_rate": 2.1400142202332163e-06, + "loss": 0.6668, + "step": 45963 + }, + { + "epoch": 0.7942355543267902, + "grad_norm": 1.4551238562189595, + "learning_rate": 2.1396682413610037e-06, + "loss": 0.4201, + "step": 45964 + }, + { + "epoch": 0.7942528338401991, + "grad_norm": 1.1505501128652067, + "learning_rate": 2.1393222871079476e-06, + "loss": 0.2658, + "step": 45965 + }, + { + "epoch": 0.794270113353608, + "grad_norm": 1.4067415682809976, + "learning_rate": 2.1389763574751345e-06, + "loss": 0.3786, + "step": 45966 + }, + { + "epoch": 0.7942873928670169, + "grad_norm": 1.2168117160565488, + "learning_rate": 2.1386304524636414e-06, + "loss": 0.3592, + "step": 45967 + }, + { + "epoch": 0.7943046723804258, + "grad_norm": 1.242555173426686, + "learning_rate": 2.1382845720745582e-06, + "loss": 0.2763, + "step": 45968 + }, + { + "epoch": 0.7943219518938347, + "grad_norm": 1.3141697496145242, + "learning_rate": 2.137938716308966e-06, + "loss": 0.415, + "step": 45969 + }, + { + "epoch": 0.7943392314072436, + "grad_norm": 0.7932038337423084, + "learning_rate": 2.1375928851679438e-06, + "loss": 0.3317, + "step": 45970 + }, + { + "epoch": 0.7943565109206525, + "grad_norm": 1.3982886959950964, + "learning_rate": 2.137247078652578e-06, + "loss": 0.2316, + "step": 45971 + }, + { + "epoch": 0.7943737904340614, + "grad_norm": 1.1935756314953603, + "learning_rate": 2.1369012967639524e-06, + "loss": 0.3227, + "step": 45972 + }, + { + "epoch": 0.7943910699474703, + "grad_norm": 1.6432955047775017, + "learning_rate": 2.1365555395031513e-06, + "loss": 0.2705, + "step": 45973 + }, + { + "epoch": 0.7944083494608791, + "grad_norm": 1.1527068564118323, + "learning_rate": 2.1362098068712545e-06, + "loss": 0.4379, + "step": 45974 + }, + { + "epoch": 0.794425628974288, + "grad_norm": 2.300072373751853, + "learning_rate": 2.1358640988693478e-06, + "loss": 0.2544, + "step": 45975 + }, + { + "epoch": 0.7944429084876969, + "grad_norm": 1.2975306792319252, + "learning_rate": 2.13551841549851e-06, + "loss": 0.4311, + "step": 45976 + }, + { + "epoch": 0.7944601880011058, + "grad_norm": 1.3305596992976574, + "learning_rate": 2.135172756759827e-06, + "loss": 0.3856, + "step": 45977 + }, + { + "epoch": 0.7944774675145148, + "grad_norm": 1.051353512475298, + "learning_rate": 2.1348271226543814e-06, + "loss": 0.4568, + "step": 45978 + }, + { + "epoch": 0.7944947470279237, + "grad_norm": 0.6771119147971187, + "learning_rate": 2.134481513183254e-06, + "loss": 0.6581, + "step": 45979 + }, + { + "epoch": 0.7945120265413326, + "grad_norm": 2.0592994985529622, + "learning_rate": 2.1341359283475303e-06, + "loss": 0.3761, + "step": 45980 + }, + { + "epoch": 0.7945293060547415, + "grad_norm": 1.1245030085200503, + "learning_rate": 2.1337903681482884e-06, + "loss": 0.3426, + "step": 45981 + }, + { + "epoch": 0.7945465855681504, + "grad_norm": 0.5844927850302932, + "learning_rate": 2.133444832586613e-06, + "loss": 0.5125, + "step": 45982 + }, + { + "epoch": 0.7945638650815593, + "grad_norm": 0.626229485590358, + "learning_rate": 2.1330993216635854e-06, + "loss": 0.5798, + "step": 45983 + }, + { + "epoch": 0.7945811445949682, + "grad_norm": 1.6119512181091789, + "learning_rate": 2.132753835380291e-06, + "loss": 0.346, + "step": 45984 + }, + { + "epoch": 0.7945984241083771, + "grad_norm": 0.9872816725395382, + "learning_rate": 2.1324083737378075e-06, + "loss": 0.4224, + "step": 45985 + }, + { + "epoch": 0.794615703621786, + "grad_norm": 1.0812188731545462, + "learning_rate": 2.1320629367372224e-06, + "loss": 0.2201, + "step": 45986 + }, + { + "epoch": 0.7946329831351949, + "grad_norm": 1.3264800703119521, + "learning_rate": 2.131717524379611e-06, + "loss": 0.351, + "step": 45987 + }, + { + "epoch": 0.7946502626486038, + "grad_norm": 1.0797127220997207, + "learning_rate": 2.1313721366660578e-06, + "loss": 0.524, + "step": 45988 + }, + { + "epoch": 0.7946675421620127, + "grad_norm": 1.2134629743283087, + "learning_rate": 2.1310267735976474e-06, + "loss": 0.5064, + "step": 45989 + }, + { + "epoch": 0.7946848216754216, + "grad_norm": 1.064835349532684, + "learning_rate": 2.130681435175457e-06, + "loss": 0.3984, + "step": 45990 + }, + { + "epoch": 0.7947021011888306, + "grad_norm": 1.0196458109240203, + "learning_rate": 2.1303361214005723e-06, + "loss": 0.2929, + "step": 45991 + }, + { + "epoch": 0.7947193807022395, + "grad_norm": 1.556523404429239, + "learning_rate": 2.1299908322740716e-06, + "loss": 0.3004, + "step": 45992 + }, + { + "epoch": 0.7947366602156484, + "grad_norm": 1.188252883515725, + "learning_rate": 2.1296455677970374e-06, + "loss": 0.6592, + "step": 45993 + }, + { + "epoch": 0.7947539397290573, + "grad_norm": 1.8388969541815692, + "learning_rate": 2.1293003279705504e-06, + "loss": 0.4386, + "step": 45994 + }, + { + "epoch": 0.7947712192424662, + "grad_norm": 1.1587127691802153, + "learning_rate": 2.1289551127956965e-06, + "loss": 0.3435, + "step": 45995 + }, + { + "epoch": 0.794788498755875, + "grad_norm": 1.1955917564219354, + "learning_rate": 2.12860992227355e-06, + "loss": 0.4528, + "step": 45996 + }, + { + "epoch": 0.7948057782692839, + "grad_norm": 1.1238831332872223, + "learning_rate": 2.1282647564051995e-06, + "loss": 0.3853, + "step": 45997 + }, + { + "epoch": 0.7948230577826928, + "grad_norm": 1.1446283051474828, + "learning_rate": 2.127919615191718e-06, + "loss": 0.4689, + "step": 45998 + }, + { + "epoch": 0.7948403372961017, + "grad_norm": 0.9070840130451842, + "learning_rate": 2.1275744986341907e-06, + "loss": 0.4424, + "step": 45999 + }, + { + "epoch": 0.7948576168095106, + "grad_norm": 1.055910698006602, + "learning_rate": 2.127229406733702e-06, + "loss": 0.3493, + "step": 46000 + }, + { + "epoch": 0.7948748963229195, + "grad_norm": 1.4264543328148596, + "learning_rate": 2.1268843394913252e-06, + "loss": 0.3073, + "step": 46001 + }, + { + "epoch": 0.7948921758363284, + "grad_norm": 1.2715594937042112, + "learning_rate": 2.1265392969081478e-06, + "loss": 0.3551, + "step": 46002 + }, + { + "epoch": 0.7949094553497373, + "grad_norm": 1.478060513365529, + "learning_rate": 2.1261942789852453e-06, + "loss": 0.2525, + "step": 46003 + }, + { + "epoch": 0.7949267348631462, + "grad_norm": 1.5218307190528302, + "learning_rate": 2.125849285723701e-06, + "loss": 0.3181, + "step": 46004 + }, + { + "epoch": 0.7949440143765552, + "grad_norm": 1.3540993901946734, + "learning_rate": 2.125504317124595e-06, + "loss": 0.4149, + "step": 46005 + }, + { + "epoch": 0.7949612938899641, + "grad_norm": 1.2559337640904014, + "learning_rate": 2.1251593731890106e-06, + "loss": 0.3354, + "step": 46006 + }, + { + "epoch": 0.794978573403373, + "grad_norm": 1.1357723493726708, + "learning_rate": 2.1248144539180226e-06, + "loss": 0.2439, + "step": 46007 + }, + { + "epoch": 0.7949958529167819, + "grad_norm": 1.0108722534131547, + "learning_rate": 2.1244695593127173e-06, + "loss": 0.4695, + "step": 46008 + }, + { + "epoch": 0.7950131324301908, + "grad_norm": 1.813055514197545, + "learning_rate": 2.1241246893741707e-06, + "loss": 0.321, + "step": 46009 + }, + { + "epoch": 0.7950304119435997, + "grad_norm": 1.4343258791927933, + "learning_rate": 2.123779844103463e-06, + "loss": 0.3614, + "step": 46010 + }, + { + "epoch": 0.7950476914570086, + "grad_norm": 1.1220937247444192, + "learning_rate": 2.1234350235016743e-06, + "loss": 0.2334, + "step": 46011 + }, + { + "epoch": 0.7950649709704175, + "grad_norm": 1.882394512874211, + "learning_rate": 2.123090227569886e-06, + "loss": 0.392, + "step": 46012 + }, + { + "epoch": 0.7950822504838264, + "grad_norm": 1.2642235862235611, + "learning_rate": 2.1227454563091807e-06, + "loss": 0.5888, + "step": 46013 + }, + { + "epoch": 0.7950995299972353, + "grad_norm": 1.1949212641270492, + "learning_rate": 2.1224007097206325e-06, + "loss": 0.2851, + "step": 46014 + }, + { + "epoch": 0.7951168095106442, + "grad_norm": 1.3333113003338743, + "learning_rate": 2.1220559878053273e-06, + "loss": 0.4298, + "step": 46015 + }, + { + "epoch": 0.7951340890240531, + "grad_norm": 1.5786441742089488, + "learning_rate": 2.121711290564338e-06, + "loss": 0.3414, + "step": 46016 + }, + { + "epoch": 0.7951513685374619, + "grad_norm": 0.9571471103883857, + "learning_rate": 2.1213666179987468e-06, + "loss": 0.563, + "step": 46017 + }, + { + "epoch": 0.7951686480508708, + "grad_norm": 0.8337650021762784, + "learning_rate": 2.1210219701096357e-06, + "loss": 0.4901, + "step": 46018 + }, + { + "epoch": 0.7951859275642797, + "grad_norm": 0.48045715939231803, + "learning_rate": 2.1206773468980845e-06, + "loss": 0.7769, + "step": 46019 + }, + { + "epoch": 0.7952032070776887, + "grad_norm": 1.620451446291806, + "learning_rate": 2.1203327483651704e-06, + "loss": 0.1936, + "step": 46020 + }, + { + "epoch": 0.7952204865910976, + "grad_norm": 1.2980822636151255, + "learning_rate": 2.119988174511971e-06, + "loss": 0.2706, + "step": 46021 + }, + { + "epoch": 0.7952377661045065, + "grad_norm": 2.1114020012346733, + "learning_rate": 2.1196436253395668e-06, + "loss": 0.3915, + "step": 46022 + }, + { + "epoch": 0.7952550456179154, + "grad_norm": 1.495269087848112, + "learning_rate": 2.119299100849038e-06, + "loss": 0.3748, + "step": 46023 + }, + { + "epoch": 0.7952723251313243, + "grad_norm": 1.0980984009934422, + "learning_rate": 2.118954601041465e-06, + "loss": 0.4007, + "step": 46024 + }, + { + "epoch": 0.7952896046447332, + "grad_norm": 1.2168045594801382, + "learning_rate": 2.118610125917924e-06, + "loss": 0.2578, + "step": 46025 + }, + { + "epoch": 0.7953068841581421, + "grad_norm": 1.784995594272979, + "learning_rate": 2.118265675479497e-06, + "loss": 0.4565, + "step": 46026 + }, + { + "epoch": 0.795324163671551, + "grad_norm": 1.111591015687764, + "learning_rate": 2.1179212497272582e-06, + "loss": 0.325, + "step": 46027 + }, + { + "epoch": 0.7953414431849599, + "grad_norm": 0.9771944950559016, + "learning_rate": 2.1175768486622893e-06, + "loss": 0.4256, + "step": 46028 + }, + { + "epoch": 0.7953587226983688, + "grad_norm": 1.6103351832198949, + "learning_rate": 2.1172324722856685e-06, + "loss": 0.2682, + "step": 46029 + }, + { + "epoch": 0.7953760022117777, + "grad_norm": 0.9665667101858003, + "learning_rate": 2.116888120598478e-06, + "loss": 0.2253, + "step": 46030 + }, + { + "epoch": 0.7953932817251866, + "grad_norm": 1.5639995890833522, + "learning_rate": 2.116543793601793e-06, + "loss": 0.2787, + "step": 46031 + }, + { + "epoch": 0.7954105612385955, + "grad_norm": 1.2601401508892736, + "learning_rate": 2.1161994912966887e-06, + "loss": 0.6813, + "step": 46032 + }, + { + "epoch": 0.7954278407520045, + "grad_norm": 1.3005540698420812, + "learning_rate": 2.1158552136842468e-06, + "loss": 0.3157, + "step": 46033 + }, + { + "epoch": 0.7954451202654134, + "grad_norm": 1.393821887524211, + "learning_rate": 2.1155109607655457e-06, + "loss": 0.4217, + "step": 46034 + }, + { + "epoch": 0.7954623997788223, + "grad_norm": 0.746598923351774, + "learning_rate": 2.115166732541666e-06, + "loss": 0.2887, + "step": 46035 + }, + { + "epoch": 0.7954796792922312, + "grad_norm": 0.951520511959368, + "learning_rate": 2.1148225290136804e-06, + "loss": 0.3864, + "step": 46036 + }, + { + "epoch": 0.7954969588056401, + "grad_norm": 1.1139484018417722, + "learning_rate": 2.1144783501826727e-06, + "loss": 0.2353, + "step": 46037 + }, + { + "epoch": 0.7955142383190489, + "grad_norm": 1.2699452464023797, + "learning_rate": 2.114134196049715e-06, + "loss": 0.4162, + "step": 46038 + }, + { + "epoch": 0.7955315178324578, + "grad_norm": 0.663956805290684, + "learning_rate": 2.1137900666158897e-06, + "loss": 0.589, + "step": 46039 + }, + { + "epoch": 0.7955487973458667, + "grad_norm": 1.920729800491588, + "learning_rate": 2.1134459618822743e-06, + "loss": 0.334, + "step": 46040 + }, + { + "epoch": 0.7955660768592756, + "grad_norm": 1.0208770710617001, + "learning_rate": 2.113101881849944e-06, + "loss": 0.3762, + "step": 46041 + }, + { + "epoch": 0.7955833563726845, + "grad_norm": 1.1488046034512764, + "learning_rate": 2.11275782651998e-06, + "loss": 0.4074, + "step": 46042 + }, + { + "epoch": 0.7956006358860934, + "grad_norm": 0.745445303463236, + "learning_rate": 2.1124137958934555e-06, + "loss": 0.5544, + "step": 46043 + }, + { + "epoch": 0.7956179153995023, + "grad_norm": 1.0572758678324379, + "learning_rate": 2.1120697899714503e-06, + "loss": 0.2973, + "step": 46044 + }, + { + "epoch": 0.7956351949129112, + "grad_norm": 1.6470604625369591, + "learning_rate": 2.111725808755042e-06, + "loss": 0.4246, + "step": 46045 + }, + { + "epoch": 0.7956524744263201, + "grad_norm": 0.8457543915981272, + "learning_rate": 2.111381852245311e-06, + "loss": 0.5061, + "step": 46046 + }, + { + "epoch": 0.795669753939729, + "grad_norm": 1.006662504616904, + "learning_rate": 2.1110379204433283e-06, + "loss": 0.3369, + "step": 46047 + }, + { + "epoch": 0.795687033453138, + "grad_norm": 1.8569065697925289, + "learning_rate": 2.1106940133501776e-06, + "loss": 0.4404, + "step": 46048 + }, + { + "epoch": 0.7957043129665469, + "grad_norm": 1.4530779761842114, + "learning_rate": 2.1103501309669295e-06, + "loss": 0.2864, + "step": 46049 + }, + { + "epoch": 0.7957215924799558, + "grad_norm": 2.2666622898527145, + "learning_rate": 2.110006273294668e-06, + "loss": 0.4171, + "step": 46050 + }, + { + "epoch": 0.7957388719933647, + "grad_norm": 0.9166040303720677, + "learning_rate": 2.1096624403344633e-06, + "loss": 0.3922, + "step": 46051 + }, + { + "epoch": 0.7957561515067736, + "grad_norm": 1.107862401353144, + "learning_rate": 2.109318632087395e-06, + "loss": 0.3545, + "step": 46052 + }, + { + "epoch": 0.7957734310201825, + "grad_norm": 1.4011648082869155, + "learning_rate": 2.1089748485545434e-06, + "loss": 0.5423, + "step": 46053 + }, + { + "epoch": 0.7957907105335914, + "grad_norm": 1.4000873426950824, + "learning_rate": 2.10863108973698e-06, + "loss": 0.2431, + "step": 46054 + }, + { + "epoch": 0.7958079900470003, + "grad_norm": 1.2081360297826005, + "learning_rate": 2.1082873556357865e-06, + "loss": 0.2947, + "step": 46055 + }, + { + "epoch": 0.7958252695604092, + "grad_norm": 1.0426826321822171, + "learning_rate": 2.1079436462520333e-06, + "loss": 0.3583, + "step": 46056 + }, + { + "epoch": 0.7958425490738181, + "grad_norm": 1.4680457713857653, + "learning_rate": 2.1075999615868004e-06, + "loss": 0.3515, + "step": 46057 + }, + { + "epoch": 0.795859828587227, + "grad_norm": 1.0146861786012977, + "learning_rate": 2.1072563016411653e-06, + "loss": 0.625, + "step": 46058 + }, + { + "epoch": 0.7958771081006358, + "grad_norm": 1.349641814870585, + "learning_rate": 2.1069126664162054e-06, + "loss": 0.3464, + "step": 46059 + }, + { + "epoch": 0.7958943876140447, + "grad_norm": 1.4145177640157507, + "learning_rate": 2.1065690559129914e-06, + "loss": 0.4911, + "step": 46060 + }, + { + "epoch": 0.7959116671274536, + "grad_norm": 1.0712575126766437, + "learning_rate": 2.106225470132607e-06, + "loss": 0.5028, + "step": 46061 + }, + { + "epoch": 0.7959289466408626, + "grad_norm": 0.8812307262661996, + "learning_rate": 2.1058819090761207e-06, + "loss": 0.4393, + "step": 46062 + }, + { + "epoch": 0.7959462261542715, + "grad_norm": 1.7933494632331914, + "learning_rate": 2.105538372744612e-06, + "loss": 0.3191, + "step": 46063 + }, + { + "epoch": 0.7959635056676804, + "grad_norm": 1.7922740783080908, + "learning_rate": 2.1051948611391604e-06, + "loss": 0.241, + "step": 46064 + }, + { + "epoch": 0.7959807851810893, + "grad_norm": 1.4112543701087585, + "learning_rate": 2.1048513742608357e-06, + "loss": 0.7132, + "step": 46065 + }, + { + "epoch": 0.7959980646944982, + "grad_norm": 1.096159226495422, + "learning_rate": 2.1045079121107193e-06, + "loss": 0.369, + "step": 46066 + }, + { + "epoch": 0.7960153442079071, + "grad_norm": 0.8228125892758419, + "learning_rate": 2.1041644746898814e-06, + "loss": 0.196, + "step": 46067 + }, + { + "epoch": 0.796032623721316, + "grad_norm": 1.156426190433561, + "learning_rate": 2.1038210619994003e-06, + "loss": 0.2832, + "step": 46068 + }, + { + "epoch": 0.7960499032347249, + "grad_norm": 1.8566728147588973, + "learning_rate": 2.1034776740403518e-06, + "loss": 0.3765, + "step": 46069 + }, + { + "epoch": 0.7960671827481338, + "grad_norm": 1.4580417776956898, + "learning_rate": 2.1031343108138136e-06, + "loss": 0.9118, + "step": 46070 + }, + { + "epoch": 0.7960844622615427, + "grad_norm": 1.4346475159488894, + "learning_rate": 2.1027909723208593e-06, + "loss": 0.666, + "step": 46071 + }, + { + "epoch": 0.7961017417749516, + "grad_norm": 1.9970553577431944, + "learning_rate": 2.102447658562561e-06, + "loss": 0.4383, + "step": 46072 + }, + { + "epoch": 0.7961190212883605, + "grad_norm": 1.083370429466573, + "learning_rate": 2.102104369539997e-06, + "loss": 0.359, + "step": 46073 + }, + { + "epoch": 0.7961363008017694, + "grad_norm": 2.063321207529874, + "learning_rate": 2.101761105254242e-06, + "loss": 0.3205, + "step": 46074 + }, + { + "epoch": 0.7961535803151784, + "grad_norm": 1.4592448732105032, + "learning_rate": 2.1014178657063745e-06, + "loss": 0.5007, + "step": 46075 + }, + { + "epoch": 0.7961708598285873, + "grad_norm": 1.330496190637923, + "learning_rate": 2.1010746508974634e-06, + "loss": 0.3415, + "step": 46076 + }, + { + "epoch": 0.7961881393419962, + "grad_norm": 1.0860054675085675, + "learning_rate": 2.10073146082859e-06, + "loss": 0.6169, + "step": 46077 + }, + { + "epoch": 0.7962054188554051, + "grad_norm": 1.3210285586944108, + "learning_rate": 2.1003882955008237e-06, + "loss": 0.3176, + "step": 46078 + }, + { + "epoch": 0.796222698368814, + "grad_norm": 1.2724588549121585, + "learning_rate": 2.100045154915241e-06, + "loss": 0.5211, + "step": 46079 + }, + { + "epoch": 0.7962399778822228, + "grad_norm": 1.8796114442275769, + "learning_rate": 2.099702039072917e-06, + "loss": 0.456, + "step": 46080 + }, + { + "epoch": 0.7962572573956317, + "grad_norm": 1.457579379595733, + "learning_rate": 2.09935894797493e-06, + "loss": 0.3023, + "step": 46081 + }, + { + "epoch": 0.7962745369090406, + "grad_norm": 1.2090997918128792, + "learning_rate": 2.0990158816223503e-06, + "loss": 0.3019, + "step": 46082 + }, + { + "epoch": 0.7962918164224495, + "grad_norm": 1.7021357302171178, + "learning_rate": 2.0986728400162517e-06, + "loss": 0.5204, + "step": 46083 + }, + { + "epoch": 0.7963090959358584, + "grad_norm": 1.165159766462907, + "learning_rate": 2.098329823157709e-06, + "loss": 0.3375, + "step": 46084 + }, + { + "epoch": 0.7963263754492673, + "grad_norm": 1.2224977042676786, + "learning_rate": 2.097986831047798e-06, + "loss": 0.2748, + "step": 46085 + }, + { + "epoch": 0.7963436549626762, + "grad_norm": 1.5307221814318408, + "learning_rate": 2.0976438636875964e-06, + "loss": 0.2072, + "step": 46086 + }, + { + "epoch": 0.7963609344760851, + "grad_norm": 1.1215917508368867, + "learning_rate": 2.0973009210781715e-06, + "loss": 0.2945, + "step": 46087 + }, + { + "epoch": 0.796378213989494, + "grad_norm": 1.2196561768863894, + "learning_rate": 2.096958003220604e-06, + "loss": 0.417, + "step": 46088 + }, + { + "epoch": 0.796395493502903, + "grad_norm": 1.8547998317935726, + "learning_rate": 2.0966151101159604e-06, + "loss": 0.4148, + "step": 46089 + }, + { + "epoch": 0.7964127730163119, + "grad_norm": 1.093669680848573, + "learning_rate": 2.0962722417653236e-06, + "loss": 0.3631, + "step": 46090 + }, + { + "epoch": 0.7964300525297208, + "grad_norm": 1.5806001858418561, + "learning_rate": 2.0959293981697584e-06, + "loss": 0.5989, + "step": 46091 + }, + { + "epoch": 0.7964473320431297, + "grad_norm": 1.2965499041092845, + "learning_rate": 2.095586579330344e-06, + "loss": 0.3223, + "step": 46092 + }, + { + "epoch": 0.7964646115565386, + "grad_norm": 1.2060393451011637, + "learning_rate": 2.0952437852481554e-06, + "loss": 0.4947, + "step": 46093 + }, + { + "epoch": 0.7964818910699475, + "grad_norm": 2.433923264885326, + "learning_rate": 2.094901015924262e-06, + "loss": 0.2812, + "step": 46094 + }, + { + "epoch": 0.7964991705833564, + "grad_norm": 1.305546321717127, + "learning_rate": 2.0945582713597414e-06, + "loss": 0.3915, + "step": 46095 + }, + { + "epoch": 0.7965164500967653, + "grad_norm": 1.4685557796337088, + "learning_rate": 2.094215551555663e-06, + "loss": 0.4173, + "step": 46096 + }, + { + "epoch": 0.7965337296101742, + "grad_norm": 0.8869718946572722, + "learning_rate": 2.093872856513102e-06, + "loss": 1.0008, + "step": 46097 + }, + { + "epoch": 0.7965510091235831, + "grad_norm": 1.6314049715243435, + "learning_rate": 2.0935301862331314e-06, + "loss": 0.4396, + "step": 46098 + }, + { + "epoch": 0.796568288636992, + "grad_norm": 2.2574663945328792, + "learning_rate": 2.0931875407168277e-06, + "loss": 0.3198, + "step": 46099 + }, + { + "epoch": 0.7965855681504009, + "grad_norm": 1.059490861020684, + "learning_rate": 2.0928449199652602e-06, + "loss": 0.5414, + "step": 46100 + }, + { + "epoch": 0.7966028476638097, + "grad_norm": 1.5979721340324118, + "learning_rate": 2.092502323979505e-06, + "loss": 0.3096, + "step": 46101 + }, + { + "epoch": 0.7966201271772186, + "grad_norm": 1.2120477971201116, + "learning_rate": 2.0921597527606307e-06, + "loss": 0.3751, + "step": 46102 + }, + { + "epoch": 0.7966374066906275, + "grad_norm": 1.0231964153400352, + "learning_rate": 2.091817206309714e-06, + "loss": 0.4181, + "step": 46103 + }, + { + "epoch": 0.7966546862040365, + "grad_norm": 0.905885171163927, + "learning_rate": 2.0914746846278287e-06, + "loss": 0.4007, + "step": 46104 + }, + { + "epoch": 0.7966719657174454, + "grad_norm": 1.0959523671625073, + "learning_rate": 2.091132187716043e-06, + "loss": 0.2765, + "step": 46105 + }, + { + "epoch": 0.7966892452308543, + "grad_norm": 1.360243797430638, + "learning_rate": 2.0907897155754352e-06, + "loss": 0.4787, + "step": 46106 + }, + { + "epoch": 0.7967065247442632, + "grad_norm": 0.6957630921851066, + "learning_rate": 2.090447268207073e-06, + "loss": 0.1886, + "step": 46107 + }, + { + "epoch": 0.7967238042576721, + "grad_norm": 2.066345199000109, + "learning_rate": 2.09010484561203e-06, + "loss": 0.5081, + "step": 46108 + }, + { + "epoch": 0.796741083771081, + "grad_norm": 1.4354179370200608, + "learning_rate": 2.0897624477913805e-06, + "loss": 0.29, + "step": 46109 + }, + { + "epoch": 0.7967583632844899, + "grad_norm": 1.3173503543525213, + "learning_rate": 2.0894200747461992e-06, + "loss": 0.2394, + "step": 46110 + }, + { + "epoch": 0.7967756427978988, + "grad_norm": 1.5297151911437563, + "learning_rate": 2.089077726477552e-06, + "loss": 0.3702, + "step": 46111 + }, + { + "epoch": 0.7967929223113077, + "grad_norm": 0.9066153415698466, + "learning_rate": 2.088735402986517e-06, + "loss": 0.3259, + "step": 46112 + }, + { + "epoch": 0.7968102018247166, + "grad_norm": 1.8997932048640165, + "learning_rate": 2.0883931042741623e-06, + "loss": 0.276, + "step": 46113 + }, + { + "epoch": 0.7968274813381255, + "grad_norm": 1.1990828126591648, + "learning_rate": 2.0880508303415602e-06, + "loss": 0.4451, + "step": 46114 + }, + { + "epoch": 0.7968447608515344, + "grad_norm": 1.0689998109477958, + "learning_rate": 2.087708581189788e-06, + "loss": 0.364, + "step": 46115 + }, + { + "epoch": 0.7968620403649433, + "grad_norm": 1.3231277736210254, + "learning_rate": 2.087366356819911e-06, + "loss": 0.398, + "step": 46116 + }, + { + "epoch": 0.7968793198783523, + "grad_norm": 1.617744664433458, + "learning_rate": 2.0870241572330064e-06, + "loss": 0.202, + "step": 46117 + }, + { + "epoch": 0.7968965993917612, + "grad_norm": 0.9824237982749048, + "learning_rate": 2.086681982430141e-06, + "loss": 0.2968, + "step": 46118 + }, + { + "epoch": 0.7969138789051701, + "grad_norm": 1.6654970259145572, + "learning_rate": 2.0863398324123885e-06, + "loss": 0.2135, + "step": 46119 + }, + { + "epoch": 0.796931158418579, + "grad_norm": 1.7311468627812405, + "learning_rate": 2.0859977071808215e-06, + "loss": 0.2776, + "step": 46120 + }, + { + "epoch": 0.7969484379319879, + "grad_norm": 1.3485069033263055, + "learning_rate": 2.085655606736514e-06, + "loss": 0.2892, + "step": 46121 + }, + { + "epoch": 0.7969657174453968, + "grad_norm": 1.2901379288084087, + "learning_rate": 2.085313531080534e-06, + "loss": 0.3176, + "step": 46122 + }, + { + "epoch": 0.7969829969588056, + "grad_norm": 1.4463599539609782, + "learning_rate": 2.084971480213952e-06, + "loss": 0.3468, + "step": 46123 + }, + { + "epoch": 0.7970002764722145, + "grad_norm": 1.4181253521372712, + "learning_rate": 2.0846294541378396e-06, + "loss": 0.228, + "step": 46124 + }, + { + "epoch": 0.7970175559856234, + "grad_norm": 2.075288930812396, + "learning_rate": 2.0842874528532696e-06, + "loss": 0.5662, + "step": 46125 + }, + { + "epoch": 0.7970348354990323, + "grad_norm": 0.8924250529184231, + "learning_rate": 2.0839454763613153e-06, + "loss": 0.4401, + "step": 46126 + }, + { + "epoch": 0.7970521150124412, + "grad_norm": 0.5470823465368926, + "learning_rate": 2.0836035246630438e-06, + "loss": 0.7532, + "step": 46127 + }, + { + "epoch": 0.7970693945258501, + "grad_norm": 3.9224024604445056, + "learning_rate": 2.0832615977595293e-06, + "loss": 0.3621, + "step": 46128 + }, + { + "epoch": 0.797086674039259, + "grad_norm": 1.405891476912205, + "learning_rate": 2.0829196956518393e-06, + "loss": 0.3755, + "step": 46129 + }, + { + "epoch": 0.7971039535526679, + "grad_norm": 1.386944257826651, + "learning_rate": 2.0825778183410485e-06, + "loss": 0.3432, + "step": 46130 + }, + { + "epoch": 0.7971212330660769, + "grad_norm": 1.2148960931117396, + "learning_rate": 2.08223596582822e-06, + "loss": 0.2243, + "step": 46131 + }, + { + "epoch": 0.7971385125794858, + "grad_norm": 1.5817841761245839, + "learning_rate": 2.0818941381144363e-06, + "loss": 0.3342, + "step": 46132 + }, + { + "epoch": 0.7971557920928947, + "grad_norm": 2.2378985697236935, + "learning_rate": 2.0815523352007617e-06, + "loss": 0.4059, + "step": 46133 + }, + { + "epoch": 0.7971730716063036, + "grad_norm": 1.4961203262293836, + "learning_rate": 2.0812105570882644e-06, + "loss": 0.503, + "step": 46134 + }, + { + "epoch": 0.7971903511197125, + "grad_norm": 1.1953254717249235, + "learning_rate": 2.08086880377802e-06, + "loss": 0.382, + "step": 46135 + }, + { + "epoch": 0.7972076306331214, + "grad_norm": 1.431430191908874, + "learning_rate": 2.0805270752710936e-06, + "loss": 0.3137, + "step": 46136 + }, + { + "epoch": 0.7972249101465303, + "grad_norm": 1.5757740040359043, + "learning_rate": 2.080185371568558e-06, + "loss": 0.4338, + "step": 46137 + }, + { + "epoch": 0.7972421896599392, + "grad_norm": 1.0307056484695007, + "learning_rate": 2.0798436926714837e-06, + "loss": 0.3074, + "step": 46138 + }, + { + "epoch": 0.7972594691733481, + "grad_norm": 1.8197647440957934, + "learning_rate": 2.079502038580944e-06, + "loss": 0.3827, + "step": 46139 + }, + { + "epoch": 0.797276748686757, + "grad_norm": 1.0028515331579184, + "learning_rate": 2.0791604092980033e-06, + "loss": 0.3568, + "step": 46140 + }, + { + "epoch": 0.7972940282001659, + "grad_norm": 0.6887392482752313, + "learning_rate": 2.078818804823737e-06, + "loss": 0.892, + "step": 46141 + }, + { + "epoch": 0.7973113077135748, + "grad_norm": 2.443933390535927, + "learning_rate": 2.078477225159208e-06, + "loss": 0.4386, + "step": 46142 + }, + { + "epoch": 0.7973285872269837, + "grad_norm": 1.0902841905786027, + "learning_rate": 2.078135670305491e-06, + "loss": 0.2766, + "step": 46143 + }, + { + "epoch": 0.7973458667403925, + "grad_norm": 2.5727570030878875, + "learning_rate": 2.077794140263659e-06, + "loss": 0.2457, + "step": 46144 + }, + { + "epoch": 0.7973631462538014, + "grad_norm": 1.2053543957312955, + "learning_rate": 2.0774526350347748e-06, + "loss": 0.3494, + "step": 46145 + }, + { + "epoch": 0.7973804257672104, + "grad_norm": 1.1011107250943994, + "learning_rate": 2.077111154619913e-06, + "loss": 0.2343, + "step": 46146 + }, + { + "epoch": 0.7973977052806193, + "grad_norm": 1.4299603784458101, + "learning_rate": 2.076769699020138e-06, + "loss": 0.1583, + "step": 46147 + }, + { + "epoch": 0.7974149847940282, + "grad_norm": 1.3041024137035597, + "learning_rate": 2.076428268236523e-06, + "loss": 0.296, + "step": 46148 + }, + { + "epoch": 0.7974322643074371, + "grad_norm": 1.4006428881954296, + "learning_rate": 2.076086862270137e-06, + "loss": 0.2447, + "step": 46149 + }, + { + "epoch": 0.797449543820846, + "grad_norm": 1.5426077820768722, + "learning_rate": 2.0757454811220524e-06, + "loss": 0.3968, + "step": 46150 + }, + { + "epoch": 0.7974668233342549, + "grad_norm": 1.0615720179142887, + "learning_rate": 2.075404124793332e-06, + "loss": 0.3051, + "step": 46151 + }, + { + "epoch": 0.7974841028476638, + "grad_norm": 1.0953832448596141, + "learning_rate": 2.075062793285051e-06, + "loss": 0.4092, + "step": 46152 + }, + { + "epoch": 0.7975013823610727, + "grad_norm": 1.2061988253063318, + "learning_rate": 2.0747214865982725e-06, + "loss": 0.3518, + "step": 46153 + }, + { + "epoch": 0.7975186618744816, + "grad_norm": 1.4397590594843799, + "learning_rate": 2.074380204734069e-06, + "loss": 0.2581, + "step": 46154 + }, + { + "epoch": 0.7975359413878905, + "grad_norm": 1.1710995170855947, + "learning_rate": 2.0740389476935107e-06, + "loss": 0.2783, + "step": 46155 + }, + { + "epoch": 0.7975532209012994, + "grad_norm": 1.3494966554509196, + "learning_rate": 2.0736977154776626e-06, + "loss": 0.2747, + "step": 46156 + }, + { + "epoch": 0.7975705004147083, + "grad_norm": 1.2420580423328162, + "learning_rate": 2.0733565080875994e-06, + "loss": 0.424, + "step": 46157 + }, + { + "epoch": 0.7975877799281172, + "grad_norm": 1.404796718054935, + "learning_rate": 2.0730153255243823e-06, + "loss": 0.2032, + "step": 46158 + }, + { + "epoch": 0.7976050594415262, + "grad_norm": 1.265957360156585, + "learning_rate": 2.0726741677890837e-06, + "loss": 0.457, + "step": 46159 + }, + { + "epoch": 0.7976223389549351, + "grad_norm": 1.523539272121279, + "learning_rate": 2.072333034882772e-06, + "loss": 0.4306, + "step": 46160 + }, + { + "epoch": 0.797639618468344, + "grad_norm": 0.9621340165883178, + "learning_rate": 2.0719919268065193e-06, + "loss": 0.2561, + "step": 46161 + }, + { + "epoch": 0.7976568979817529, + "grad_norm": 1.0923340277610316, + "learning_rate": 2.071650843561387e-06, + "loss": 0.2288, + "step": 46162 + }, + { + "epoch": 0.7976741774951618, + "grad_norm": 1.40242666746494, + "learning_rate": 2.0713097851484487e-06, + "loss": 0.4118, + "step": 46163 + }, + { + "epoch": 0.7976914570085707, + "grad_norm": 3.463223071974311, + "learning_rate": 2.0709687515687716e-06, + "loss": 0.3628, + "step": 46164 + }, + { + "epoch": 0.7977087365219795, + "grad_norm": 2.75947486289515, + "learning_rate": 2.070627742823418e-06, + "loss": 0.7436, + "step": 46165 + }, + { + "epoch": 0.7977260160353884, + "grad_norm": 1.4323377823163896, + "learning_rate": 2.0702867589134657e-06, + "loss": 0.4438, + "step": 46166 + }, + { + "epoch": 0.7977432955487973, + "grad_norm": 0.9688911482581987, + "learning_rate": 2.069945799839975e-06, + "loss": 0.4406, + "step": 46167 + }, + { + "epoch": 0.7977605750622062, + "grad_norm": 1.5173185207191136, + "learning_rate": 2.0696048656040203e-06, + "loss": 0.2891, + "step": 46168 + }, + { + "epoch": 0.7977778545756151, + "grad_norm": 1.5771732187146448, + "learning_rate": 2.069263956206662e-06, + "loss": 0.2114, + "step": 46169 + }, + { + "epoch": 0.797795134089024, + "grad_norm": 1.6791233961861247, + "learning_rate": 2.068923071648975e-06, + "loss": 0.4278, + "step": 46170 + }, + { + "epoch": 0.7978124136024329, + "grad_norm": 2.876770853253714, + "learning_rate": 2.0685822119320174e-06, + "loss": 0.4503, + "step": 46171 + }, + { + "epoch": 0.7978296931158418, + "grad_norm": 1.4061115924564658, + "learning_rate": 2.06824137705687e-06, + "loss": 0.339, + "step": 46172 + }, + { + "epoch": 0.7978469726292508, + "grad_norm": 2.8040391201051955, + "learning_rate": 2.06790056702459e-06, + "loss": 0.4696, + "step": 46173 + }, + { + "epoch": 0.7978642521426597, + "grad_norm": 0.9334688010761285, + "learning_rate": 2.0675597818362515e-06, + "loss": 0.4983, + "step": 46174 + }, + { + "epoch": 0.7978815316560686, + "grad_norm": 1.312164933572725, + "learning_rate": 2.067219021492918e-06, + "loss": 0.2496, + "step": 46175 + }, + { + "epoch": 0.7978988111694775, + "grad_norm": 2.056540449433221, + "learning_rate": 2.0668782859956537e-06, + "loss": 0.3609, + "step": 46176 + }, + { + "epoch": 0.7979160906828864, + "grad_norm": 1.6437940169527856, + "learning_rate": 2.0665375753455306e-06, + "loss": 0.6298, + "step": 46177 + }, + { + "epoch": 0.7979333701962953, + "grad_norm": 0.9913322889344575, + "learning_rate": 2.066196889543616e-06, + "loss": 0.3206, + "step": 46178 + }, + { + "epoch": 0.7979506497097042, + "grad_norm": 1.2914464938735986, + "learning_rate": 2.0658562285909766e-06, + "loss": 0.5369, + "step": 46179 + }, + { + "epoch": 0.7979679292231131, + "grad_norm": 1.1635848857564, + "learning_rate": 2.065515592488676e-06, + "loss": 0.2632, + "step": 46180 + }, + { + "epoch": 0.797985208736522, + "grad_norm": 1.9247352221269933, + "learning_rate": 2.065174981237786e-06, + "loss": 0.3508, + "step": 46181 + }, + { + "epoch": 0.7980024882499309, + "grad_norm": 1.6980516075815466, + "learning_rate": 2.064834394839369e-06, + "loss": 0.3488, + "step": 46182 + }, + { + "epoch": 0.7980197677633398, + "grad_norm": 1.2556265541172367, + "learning_rate": 2.064493833294494e-06, + "loss": 0.3531, + "step": 46183 + }, + { + "epoch": 0.7980370472767487, + "grad_norm": 1.399849064874702, + "learning_rate": 2.064153296604229e-06, + "loss": 0.2632, + "step": 46184 + }, + { + "epoch": 0.7980543267901576, + "grad_norm": 1.2000377514158507, + "learning_rate": 2.0638127847696377e-06, + "loss": 0.2435, + "step": 46185 + }, + { + "epoch": 0.7980716063035664, + "grad_norm": 1.012728675981293, + "learning_rate": 2.06347229779179e-06, + "loss": 0.2749, + "step": 46186 + }, + { + "epoch": 0.7980888858169753, + "grad_norm": 1.7213330329049485, + "learning_rate": 2.063131835671747e-06, + "loss": 0.3478, + "step": 46187 + }, + { + "epoch": 0.7981061653303843, + "grad_norm": 1.3507644218894888, + "learning_rate": 2.0627913984105784e-06, + "loss": 0.2999, + "step": 46188 + }, + { + "epoch": 0.7981234448437932, + "grad_norm": 0.9973358202807446, + "learning_rate": 2.06245098600935e-06, + "loss": 0.6622, + "step": 46189 + }, + { + "epoch": 0.7981407243572021, + "grad_norm": 1.5701553883569328, + "learning_rate": 2.0621105984691316e-06, + "loss": 0.3361, + "step": 46190 + }, + { + "epoch": 0.798158003870611, + "grad_norm": 1.223635030758869, + "learning_rate": 2.061770235790983e-06, + "loss": 0.4057, + "step": 46191 + }, + { + "epoch": 0.7981752833840199, + "grad_norm": 1.4344413068103934, + "learning_rate": 2.0614298979759774e-06, + "loss": 0.3771, + "step": 46192 + }, + { + "epoch": 0.7981925628974288, + "grad_norm": 1.3222730123280788, + "learning_rate": 2.0610895850251724e-06, + "loss": 0.3162, + "step": 46193 + }, + { + "epoch": 0.7982098424108377, + "grad_norm": 0.7979234547467572, + "learning_rate": 2.060749296939638e-06, + "loss": 0.1456, + "step": 46194 + }, + { + "epoch": 0.7982271219242466, + "grad_norm": 0.966668090618544, + "learning_rate": 2.060409033720444e-06, + "loss": 0.2542, + "step": 46195 + }, + { + "epoch": 0.7982444014376555, + "grad_norm": 1.3569303893141427, + "learning_rate": 2.0600687953686495e-06, + "loss": 0.3473, + "step": 46196 + }, + { + "epoch": 0.7982616809510644, + "grad_norm": 1.338470267582525, + "learning_rate": 2.0597285818853254e-06, + "loss": 0.4422, + "step": 46197 + }, + { + "epoch": 0.7982789604644733, + "grad_norm": 1.2840928549756576, + "learning_rate": 2.059388393271532e-06, + "loss": 0.336, + "step": 46198 + }, + { + "epoch": 0.7982962399778822, + "grad_norm": 1.6669430914123582, + "learning_rate": 2.0590482295283364e-06, + "loss": 0.3406, + "step": 46199 + }, + { + "epoch": 0.7983135194912911, + "grad_norm": 1.031862137093069, + "learning_rate": 2.058708090656807e-06, + "loss": 0.3138, + "step": 46200 + }, + { + "epoch": 0.7983307990047, + "grad_norm": 0.5335930381393063, + "learning_rate": 2.05836797665801e-06, + "loss": 0.8253, + "step": 46201 + }, + { + "epoch": 0.798348078518109, + "grad_norm": 1.1108185495285436, + "learning_rate": 2.058027887533004e-06, + "loss": 0.4982, + "step": 46202 + }, + { + "epoch": 0.7983653580315179, + "grad_norm": 1.4561786119734623, + "learning_rate": 2.057687823282862e-06, + "loss": 0.3584, + "step": 46203 + }, + { + "epoch": 0.7983826375449268, + "grad_norm": 0.9676831254606122, + "learning_rate": 2.057347783908642e-06, + "loss": 0.3801, + "step": 46204 + }, + { + "epoch": 0.7983999170583357, + "grad_norm": 2.07795640006044, + "learning_rate": 2.0570077694114133e-06, + "loss": 0.392, + "step": 46205 + }, + { + "epoch": 0.7984171965717446, + "grad_norm": 1.5173202419360108, + "learning_rate": 2.056667779792242e-06, + "loss": 0.3148, + "step": 46206 + }, + { + "epoch": 0.7984344760851534, + "grad_norm": 1.7327426045010041, + "learning_rate": 2.056327815052188e-06, + "loss": 0.3833, + "step": 46207 + }, + { + "epoch": 0.7984517555985623, + "grad_norm": 0.9129772253028733, + "learning_rate": 2.0559878751923223e-06, + "loss": 0.3075, + "step": 46208 + }, + { + "epoch": 0.7984690351119712, + "grad_norm": 1.6409078316504988, + "learning_rate": 2.0556479602137036e-06, + "loss": 0.2175, + "step": 46209 + }, + { + "epoch": 0.7984863146253801, + "grad_norm": 1.221366227867068, + "learning_rate": 2.055308070117401e-06, + "loss": 0.3229, + "step": 46210 + }, + { + "epoch": 0.798503594138789, + "grad_norm": 1.1560363589207197, + "learning_rate": 2.054968204904473e-06, + "loss": 0.3382, + "step": 46211 + }, + { + "epoch": 0.7985208736521979, + "grad_norm": 0.7452881771113105, + "learning_rate": 2.0546283645759923e-06, + "loss": 0.7849, + "step": 46212 + }, + { + "epoch": 0.7985381531656068, + "grad_norm": 1.2022621880476172, + "learning_rate": 2.054288549133018e-06, + "loss": 0.2958, + "step": 46213 + }, + { + "epoch": 0.7985554326790157, + "grad_norm": 1.315739405947466, + "learning_rate": 2.053948758576618e-06, + "loss": 0.403, + "step": 46214 + }, + { + "epoch": 0.7985727121924246, + "grad_norm": 0.9632785657270941, + "learning_rate": 2.053608992907855e-06, + "loss": 0.9612, + "step": 46215 + }, + { + "epoch": 0.7985899917058336, + "grad_norm": 0.9446440876020138, + "learning_rate": 2.0532692521277886e-06, + "loss": 0.7649, + "step": 46216 + }, + { + "epoch": 0.7986072712192425, + "grad_norm": 2.4277231247435647, + "learning_rate": 2.052929536237488e-06, + "loss": 0.2301, + "step": 46217 + }, + { + "epoch": 0.7986245507326514, + "grad_norm": 1.8684467452750695, + "learning_rate": 2.0525898452380154e-06, + "loss": 0.6502, + "step": 46218 + }, + { + "epoch": 0.7986418302460603, + "grad_norm": 1.1634799294687608, + "learning_rate": 2.0522501791304385e-06, + "loss": 0.3912, + "step": 46219 + }, + { + "epoch": 0.7986591097594692, + "grad_norm": 0.7148906153594582, + "learning_rate": 2.0519105379158156e-06, + "loss": 0.239, + "step": 46220 + }, + { + "epoch": 0.7986763892728781, + "grad_norm": 1.5932804902408375, + "learning_rate": 2.051570921595215e-06, + "loss": 0.4328, + "step": 46221 + }, + { + "epoch": 0.798693668786287, + "grad_norm": 0.7325906629954243, + "learning_rate": 2.051231330169696e-06, + "loss": 0.7181, + "step": 46222 + }, + { + "epoch": 0.7987109482996959, + "grad_norm": 1.905782768974392, + "learning_rate": 2.0508917636403235e-06, + "loss": 0.4344, + "step": 46223 + }, + { + "epoch": 0.7987282278131048, + "grad_norm": 1.7655197981575017, + "learning_rate": 2.050552222008163e-06, + "loss": 0.4283, + "step": 46224 + }, + { + "epoch": 0.7987455073265137, + "grad_norm": 1.465746445692304, + "learning_rate": 2.0502127052742804e-06, + "loss": 0.2971, + "step": 46225 + }, + { + "epoch": 0.7987627868399226, + "grad_norm": 1.2576016602312863, + "learning_rate": 2.049873213439735e-06, + "loss": 0.3223, + "step": 46226 + }, + { + "epoch": 0.7987800663533315, + "grad_norm": 1.7136581058293168, + "learning_rate": 2.0495337465055887e-06, + "loss": 0.3156, + "step": 46227 + }, + { + "epoch": 0.7987973458667403, + "grad_norm": 1.0753824973410295, + "learning_rate": 2.049194304472907e-06, + "loss": 0.2673, + "step": 46228 + }, + { + "epoch": 0.7988146253801492, + "grad_norm": 1.5680964182720138, + "learning_rate": 2.048854887342752e-06, + "loss": 0.3617, + "step": 46229 + }, + { + "epoch": 0.7988319048935582, + "grad_norm": 0.9808189140123504, + "learning_rate": 2.0485154951161913e-06, + "loss": 0.5464, + "step": 46230 + }, + { + "epoch": 0.7988491844069671, + "grad_norm": 1.4244663841424432, + "learning_rate": 2.0481761277942815e-06, + "loss": 0.4237, + "step": 46231 + }, + { + "epoch": 0.798866463920376, + "grad_norm": 1.6271983969643837, + "learning_rate": 2.04783678537809e-06, + "loss": 0.3912, + "step": 46232 + }, + { + "epoch": 0.7988837434337849, + "grad_norm": 1.667047837551927, + "learning_rate": 2.0474974678686764e-06, + "loss": 0.3312, + "step": 46233 + }, + { + "epoch": 0.7989010229471938, + "grad_norm": 1.1863414636917462, + "learning_rate": 2.0471581752671056e-06, + "loss": 0.3413, + "step": 46234 + }, + { + "epoch": 0.7989183024606027, + "grad_norm": 0.7314098433308205, + "learning_rate": 2.0468189075744384e-06, + "loss": 0.4416, + "step": 46235 + }, + { + "epoch": 0.7989355819740116, + "grad_norm": 1.688587573231988, + "learning_rate": 2.046479664791743e-06, + "loss": 0.3654, + "step": 46236 + }, + { + "epoch": 0.7989528614874205, + "grad_norm": 1.4996657155022461, + "learning_rate": 2.046140446920076e-06, + "loss": 0.3173, + "step": 46237 + }, + { + "epoch": 0.7989701410008294, + "grad_norm": 1.433314097133511, + "learning_rate": 2.0458012539605e-06, + "loss": 0.3862, + "step": 46238 + }, + { + "epoch": 0.7989874205142383, + "grad_norm": 2.0990567088763443, + "learning_rate": 2.0454620859140796e-06, + "loss": 0.2982, + "step": 46239 + }, + { + "epoch": 0.7990047000276472, + "grad_norm": 1.615551811248906, + "learning_rate": 2.0451229427818755e-06, + "loss": 0.4339, + "step": 46240 + }, + { + "epoch": 0.7990219795410561, + "grad_norm": 1.4503598136395135, + "learning_rate": 2.0447838245649533e-06, + "loss": 0.2735, + "step": 46241 + }, + { + "epoch": 0.799039259054465, + "grad_norm": 1.0410521348773205, + "learning_rate": 2.0444447312643713e-06, + "loss": 0.2725, + "step": 46242 + }, + { + "epoch": 0.799056538567874, + "grad_norm": 2.032204367846198, + "learning_rate": 2.044105662881194e-06, + "loss": 0.3513, + "step": 46243 + }, + { + "epoch": 0.7990738180812829, + "grad_norm": 1.3324923372206785, + "learning_rate": 2.0437666194164805e-06, + "loss": 0.17, + "step": 46244 + }, + { + "epoch": 0.7990910975946918, + "grad_norm": 1.5661736629306287, + "learning_rate": 2.043427600871295e-06, + "loss": 0.3256, + "step": 46245 + }, + { + "epoch": 0.7991083771081007, + "grad_norm": 1.5980349845374846, + "learning_rate": 2.0430886072467016e-06, + "loss": 0.3185, + "step": 46246 + }, + { + "epoch": 0.7991256566215096, + "grad_norm": 1.0635366004416476, + "learning_rate": 2.042749638543756e-06, + "loss": 0.1157, + "step": 46247 + }, + { + "epoch": 0.7991429361349185, + "grad_norm": 0.8900624612696841, + "learning_rate": 2.042410694763527e-06, + "loss": 0.27, + "step": 46248 + }, + { + "epoch": 0.7991602156483273, + "grad_norm": 1.5195951094703415, + "learning_rate": 2.0420717759070686e-06, + "loss": 0.4149, + "step": 46249 + }, + { + "epoch": 0.7991774951617362, + "grad_norm": 1.0409800638953968, + "learning_rate": 2.04173288197545e-06, + "loss": 0.3009, + "step": 46250 + }, + { + "epoch": 0.7991947746751451, + "grad_norm": 1.0761440635583215, + "learning_rate": 2.041394012969723e-06, + "loss": 0.3626, + "step": 46251 + }, + { + "epoch": 0.799212054188554, + "grad_norm": 1.5315109490546266, + "learning_rate": 2.0410551688909595e-06, + "loss": 0.4731, + "step": 46252 + }, + { + "epoch": 0.7992293337019629, + "grad_norm": 1.9926686752799445, + "learning_rate": 2.0407163497402137e-06, + "loss": 0.4252, + "step": 46253 + }, + { + "epoch": 0.7992466132153718, + "grad_norm": 1.640831551054663, + "learning_rate": 2.0403775555185523e-06, + "loss": 0.3395, + "step": 46254 + }, + { + "epoch": 0.7992638927287807, + "grad_norm": 1.0809982846590767, + "learning_rate": 2.040038786227031e-06, + "loss": 0.3781, + "step": 46255 + }, + { + "epoch": 0.7992811722421896, + "grad_norm": 0.6253066579630397, + "learning_rate": 2.0397000418667144e-06, + "loss": 0.5372, + "step": 46256 + }, + { + "epoch": 0.7992984517555985, + "grad_norm": 1.468940689557118, + "learning_rate": 2.039361322438661e-06, + "loss": 0.4379, + "step": 46257 + }, + { + "epoch": 0.7993157312690075, + "grad_norm": 1.269943486648702, + "learning_rate": 2.0390226279439317e-06, + "loss": 0.3052, + "step": 46258 + }, + { + "epoch": 0.7993330107824164, + "grad_norm": 1.127235199166604, + "learning_rate": 2.0386839583835925e-06, + "loss": 0.3494, + "step": 46259 + }, + { + "epoch": 0.7993502902958253, + "grad_norm": 1.4017741438545115, + "learning_rate": 2.0383453137586963e-06, + "loss": 0.1556, + "step": 46260 + }, + { + "epoch": 0.7993675698092342, + "grad_norm": 1.115608345437079, + "learning_rate": 2.0380066940703115e-06, + "loss": 0.344, + "step": 46261 + }, + { + "epoch": 0.7993848493226431, + "grad_norm": 0.8947039582887113, + "learning_rate": 2.0376680993194907e-06, + "loss": 0.7659, + "step": 46262 + }, + { + "epoch": 0.799402128836052, + "grad_norm": 0.9370198455176939, + "learning_rate": 2.0373295295072993e-06, + "loss": 0.3626, + "step": 46263 + }, + { + "epoch": 0.7994194083494609, + "grad_norm": 1.5299046518389645, + "learning_rate": 2.036990984634797e-06, + "loss": 0.3458, + "step": 46264 + }, + { + "epoch": 0.7994366878628698, + "grad_norm": 1.001906825228768, + "learning_rate": 2.0366524647030474e-06, + "loss": 0.288, + "step": 46265 + }, + { + "epoch": 0.7994539673762787, + "grad_norm": 1.259567399918488, + "learning_rate": 2.036313969713104e-06, + "loss": 0.3517, + "step": 46266 + }, + { + "epoch": 0.7994712468896876, + "grad_norm": 1.7364296001625905, + "learning_rate": 2.035975499666033e-06, + "loss": 0.286, + "step": 46267 + }, + { + "epoch": 0.7994885264030965, + "grad_norm": 1.2628304345152714, + "learning_rate": 2.0356370545628902e-06, + "loss": 0.1671, + "step": 46268 + }, + { + "epoch": 0.7995058059165054, + "grad_norm": 1.5292465210164392, + "learning_rate": 2.0352986344047366e-06, + "loss": 0.3484, + "step": 46269 + }, + { + "epoch": 0.7995230854299143, + "grad_norm": 1.060573804475727, + "learning_rate": 2.034960239192636e-06, + "loss": 0.6272, + "step": 46270 + }, + { + "epoch": 0.7995403649433231, + "grad_norm": 1.6753079582444423, + "learning_rate": 2.034621868927642e-06, + "loss": 0.2896, + "step": 46271 + }, + { + "epoch": 0.799557644456732, + "grad_norm": 0.8719186560336487, + "learning_rate": 2.0342835236108207e-06, + "loss": 0.2154, + "step": 46272 + }, + { + "epoch": 0.799574923970141, + "grad_norm": 1.3604354972453403, + "learning_rate": 2.0339452032432262e-06, + "loss": 0.3255, + "step": 46273 + }, + { + "epoch": 0.7995922034835499, + "grad_norm": 1.213798973076524, + "learning_rate": 2.033606907825921e-06, + "loss": 0.3856, + "step": 46274 + }, + { + "epoch": 0.7996094829969588, + "grad_norm": 1.314728747016225, + "learning_rate": 2.033268637359964e-06, + "loss": 0.2033, + "step": 46275 + }, + { + "epoch": 0.7996267625103677, + "grad_norm": 0.9629183231991629, + "learning_rate": 2.0329303918464183e-06, + "loss": 0.3449, + "step": 46276 + }, + { + "epoch": 0.7996440420237766, + "grad_norm": 1.8776942319935561, + "learning_rate": 2.0325921712863394e-06, + "loss": 0.3599, + "step": 46277 + }, + { + "epoch": 0.7996613215371855, + "grad_norm": 1.2858146993316864, + "learning_rate": 2.0322539756807845e-06, + "loss": 0.2586, + "step": 46278 + }, + { + "epoch": 0.7996786010505944, + "grad_norm": 0.9583969764732868, + "learning_rate": 2.0319158050308164e-06, + "loss": 0.2017, + "step": 46279 + }, + { + "epoch": 0.7996958805640033, + "grad_norm": 0.9062807979340575, + "learning_rate": 2.0315776593374924e-06, + "loss": 0.495, + "step": 46280 + }, + { + "epoch": 0.7997131600774122, + "grad_norm": 1.5307838951428185, + "learning_rate": 2.031239538601877e-06, + "loss": 0.3862, + "step": 46281 + }, + { + "epoch": 0.7997304395908211, + "grad_norm": 1.3324874421489181, + "learning_rate": 2.0309014428250205e-06, + "loss": 0.2828, + "step": 46282 + }, + { + "epoch": 0.79974771910423, + "grad_norm": 1.1137818598761082, + "learning_rate": 2.030563372007991e-06, + "loss": 0.2582, + "step": 46283 + }, + { + "epoch": 0.799764998617639, + "grad_norm": 1.4851393346023982, + "learning_rate": 2.030225326151839e-06, + "loss": 0.5811, + "step": 46284 + }, + { + "epoch": 0.7997822781310479, + "grad_norm": 1.4104390314893112, + "learning_rate": 2.029887305257626e-06, + "loss": 0.5472, + "step": 46285 + }, + { + "epoch": 0.7997995576444568, + "grad_norm": 2.156133058880193, + "learning_rate": 2.029549309326413e-06, + "loss": 0.3695, + "step": 46286 + }, + { + "epoch": 0.7998168371578657, + "grad_norm": 0.9398556247872316, + "learning_rate": 2.0292113383592595e-06, + "loss": 0.4477, + "step": 46287 + }, + { + "epoch": 0.7998341166712746, + "grad_norm": 1.011523972854134, + "learning_rate": 2.0288733923572224e-06, + "loss": 0.2862, + "step": 46288 + }, + { + "epoch": 0.7998513961846835, + "grad_norm": 1.9455341600178042, + "learning_rate": 2.028535471321357e-06, + "loss": 0.3545, + "step": 46289 + }, + { + "epoch": 0.7998686756980924, + "grad_norm": 1.4737138016536346, + "learning_rate": 2.028197575252726e-06, + "loss": 0.3604, + "step": 46290 + }, + { + "epoch": 0.7998859552115013, + "grad_norm": 2.609296678363716, + "learning_rate": 2.027859704152381e-06, + "loss": 0.4269, + "step": 46291 + }, + { + "epoch": 0.7999032347249101, + "grad_norm": 1.6165985677256107, + "learning_rate": 2.0275218580213905e-06, + "loss": 0.4229, + "step": 46292 + }, + { + "epoch": 0.799920514238319, + "grad_norm": 1.0378620982006541, + "learning_rate": 2.0271840368608052e-06, + "loss": 0.6974, + "step": 46293 + }, + { + "epoch": 0.7999377937517279, + "grad_norm": 1.7656270578112383, + "learning_rate": 2.026846240671688e-06, + "loss": 0.2528, + "step": 46294 + }, + { + "epoch": 0.7999550732651368, + "grad_norm": 0.9580958405514264, + "learning_rate": 2.026508469455092e-06, + "loss": 0.5385, + "step": 46295 + }, + { + "epoch": 0.7999723527785457, + "grad_norm": 1.3686130595646662, + "learning_rate": 2.026170723212079e-06, + "loss": 0.3694, + "step": 46296 + }, + { + "epoch": 0.7999896322919546, + "grad_norm": 1.092624859753202, + "learning_rate": 2.0258330019437035e-06, + "loss": 0.3744, + "step": 46297 + }, + { + "epoch": 0.8000069118053635, + "grad_norm": 1.9408535355520022, + "learning_rate": 2.025495305651025e-06, + "loss": 0.3413, + "step": 46298 + }, + { + "epoch": 0.8000241913187724, + "grad_norm": 1.0971939221225824, + "learning_rate": 2.025157634335103e-06, + "loss": 0.2675, + "step": 46299 + }, + { + "epoch": 0.8000414708321814, + "grad_norm": 1.0119481902305754, + "learning_rate": 2.024819987996992e-06, + "loss": 0.4082, + "step": 46300 + }, + { + "epoch": 0.8000587503455903, + "grad_norm": 0.4910731289226355, + "learning_rate": 2.024482366637753e-06, + "loss": 0.6042, + "step": 46301 + }, + { + "epoch": 0.8000760298589992, + "grad_norm": 0.9025351711943004, + "learning_rate": 2.0241447702584384e-06, + "loss": 0.3159, + "step": 46302 + }, + { + "epoch": 0.8000933093724081, + "grad_norm": 1.216615908310161, + "learning_rate": 2.0238071988601095e-06, + "loss": 0.2787, + "step": 46303 + }, + { + "epoch": 0.800110588885817, + "grad_norm": 1.1689115751299652, + "learning_rate": 2.0234696524438213e-06, + "loss": 0.3805, + "step": 46304 + }, + { + "epoch": 0.8001278683992259, + "grad_norm": 2.0146937177403914, + "learning_rate": 2.0231321310106357e-06, + "loss": 0.3524, + "step": 46305 + }, + { + "epoch": 0.8001451479126348, + "grad_norm": 0.8360617206043928, + "learning_rate": 2.0227946345616033e-06, + "loss": 0.2712, + "step": 46306 + }, + { + "epoch": 0.8001624274260437, + "grad_norm": 1.6224329667704582, + "learning_rate": 2.0224571630977873e-06, + "loss": 0.4497, + "step": 46307 + }, + { + "epoch": 0.8001797069394526, + "grad_norm": 1.2860084726132583, + "learning_rate": 2.02211971662024e-06, + "loss": 0.4151, + "step": 46308 + }, + { + "epoch": 0.8001969864528615, + "grad_norm": 1.2017079822417573, + "learning_rate": 2.0217822951300192e-06, + "loss": 0.27, + "step": 46309 + }, + { + "epoch": 0.8002142659662704, + "grad_norm": 0.96400990321232, + "learning_rate": 2.0214448986281853e-06, + "loss": 0.8422, + "step": 46310 + }, + { + "epoch": 0.8002315454796793, + "grad_norm": 0.9459981886455083, + "learning_rate": 2.0211075271157897e-06, + "loss": 0.4886, + "step": 46311 + }, + { + "epoch": 0.8002488249930882, + "grad_norm": 1.1827019277678337, + "learning_rate": 2.020770180593894e-06, + "loss": 0.3148, + "step": 46312 + }, + { + "epoch": 0.800266104506497, + "grad_norm": 0.8742428108332166, + "learning_rate": 2.0204328590635504e-06, + "loss": 0.457, + "step": 46313 + }, + { + "epoch": 0.800283384019906, + "grad_norm": 1.684631345266028, + "learning_rate": 2.0200955625258166e-06, + "loss": 0.4674, + "step": 46314 + }, + { + "epoch": 0.8003006635333149, + "grad_norm": 1.7189782426365858, + "learning_rate": 2.0197582909817517e-06, + "loss": 0.483, + "step": 46315 + }, + { + "epoch": 0.8003179430467238, + "grad_norm": 0.9252912894476063, + "learning_rate": 2.0194210444324114e-06, + "loss": 0.19, + "step": 46316 + }, + { + "epoch": 0.8003352225601327, + "grad_norm": 1.4630312870328466, + "learning_rate": 2.019083822878849e-06, + "loss": 0.4135, + "step": 46317 + }, + { + "epoch": 0.8003525020735416, + "grad_norm": 1.274919091589891, + "learning_rate": 2.0187466263221243e-06, + "loss": 0.1925, + "step": 46318 + }, + { + "epoch": 0.8003697815869505, + "grad_norm": 2.036781324798499, + "learning_rate": 2.018409454763289e-06, + "loss": 0.3163, + "step": 46319 + }, + { + "epoch": 0.8003870611003594, + "grad_norm": 1.163895598434879, + "learning_rate": 2.0180723082034037e-06, + "loss": 0.274, + "step": 46320 + }, + { + "epoch": 0.8004043406137683, + "grad_norm": 1.2402248586509046, + "learning_rate": 2.0177351866435237e-06, + "loss": 0.2458, + "step": 46321 + }, + { + "epoch": 0.8004216201271772, + "grad_norm": 1.0088533756425526, + "learning_rate": 2.017398090084701e-06, + "loss": 0.3343, + "step": 46322 + }, + { + "epoch": 0.8004388996405861, + "grad_norm": 0.5411227976328845, + "learning_rate": 2.0170610185279984e-06, + "loss": 0.5287, + "step": 46323 + }, + { + "epoch": 0.800456179153995, + "grad_norm": 0.6876010885286173, + "learning_rate": 2.0167239719744626e-06, + "loss": 0.19, + "step": 46324 + }, + { + "epoch": 0.8004734586674039, + "grad_norm": 0.8375051981826981, + "learning_rate": 2.0163869504251552e-06, + "loss": 0.4252, + "step": 46325 + }, + { + "epoch": 0.8004907381808128, + "grad_norm": 1.269438861297272, + "learning_rate": 2.0160499538811308e-06, + "loss": 0.2216, + "step": 46326 + }, + { + "epoch": 0.8005080176942218, + "grad_norm": 1.1002461247204625, + "learning_rate": 2.0157129823434463e-06, + "loss": 0.2927, + "step": 46327 + }, + { + "epoch": 0.8005252972076307, + "grad_norm": 1.2096936445336701, + "learning_rate": 2.015376035813157e-06, + "loss": 0.3945, + "step": 46328 + }, + { + "epoch": 0.8005425767210396, + "grad_norm": 1.450617146310221, + "learning_rate": 2.015039114291313e-06, + "loss": 0.3663, + "step": 46329 + }, + { + "epoch": 0.8005598562344485, + "grad_norm": 0.9319191489301196, + "learning_rate": 2.0147022177789767e-06, + "loss": 0.2003, + "step": 46330 + }, + { + "epoch": 0.8005771357478574, + "grad_norm": 1.3955830077392257, + "learning_rate": 2.014365346277194e-06, + "loss": 0.3768, + "step": 46331 + }, + { + "epoch": 0.8005944152612663, + "grad_norm": 2.688396691049827, + "learning_rate": 2.0140284997870307e-06, + "loss": 0.3274, + "step": 46332 + }, + { + "epoch": 0.8006116947746752, + "grad_norm": 1.972731627504925, + "learning_rate": 2.013691678309535e-06, + "loss": 0.2326, + "step": 46333 + }, + { + "epoch": 0.800628974288084, + "grad_norm": 0.7613924472887581, + "learning_rate": 2.0133548818457673e-06, + "loss": 0.408, + "step": 46334 + }, + { + "epoch": 0.8006462538014929, + "grad_norm": 1.3593305211223459, + "learning_rate": 2.013018110396775e-06, + "loss": 0.2367, + "step": 46335 + }, + { + "epoch": 0.8006635333149018, + "grad_norm": 0.8744864607238156, + "learning_rate": 2.012681363963621e-06, + "loss": 0.3223, + "step": 46336 + }, + { + "epoch": 0.8006808128283107, + "grad_norm": 1.1081455806722043, + "learning_rate": 2.0123446425473503e-06, + "loss": 0.3225, + "step": 46337 + }, + { + "epoch": 0.8006980923417196, + "grad_norm": 1.3052840128269236, + "learning_rate": 2.0120079461490284e-06, + "loss": 0.39, + "step": 46338 + }, + { + "epoch": 0.8007153718551285, + "grad_norm": 1.0217051444652154, + "learning_rate": 2.0116712747697043e-06, + "loss": 0.3074, + "step": 46339 + }, + { + "epoch": 0.8007326513685374, + "grad_norm": 1.3302389991133112, + "learning_rate": 2.0113346284104306e-06, + "loss": 0.4245, + "step": 46340 + }, + { + "epoch": 0.8007499308819463, + "grad_norm": 1.1189020980475946, + "learning_rate": 2.010998007072266e-06, + "loss": 0.4936, + "step": 46341 + }, + { + "epoch": 0.8007672103953553, + "grad_norm": 1.0524608623037326, + "learning_rate": 2.0106614107562607e-06, + "loss": 0.4527, + "step": 46342 + }, + { + "epoch": 0.8007844899087642, + "grad_norm": 1.8626027712885385, + "learning_rate": 2.010324839463471e-06, + "loss": 0.2014, + "step": 46343 + }, + { + "epoch": 0.8008017694221731, + "grad_norm": 1.2435280956517993, + "learning_rate": 2.009988293194951e-06, + "loss": 0.5103, + "step": 46344 + }, + { + "epoch": 0.800819048935582, + "grad_norm": 1.4252709020945045, + "learning_rate": 2.009651771951757e-06, + "loss": 0.339, + "step": 46345 + }, + { + "epoch": 0.8008363284489909, + "grad_norm": 1.3152022048689478, + "learning_rate": 2.00931527573494e-06, + "loss": 0.4091, + "step": 46346 + }, + { + "epoch": 0.8008536079623998, + "grad_norm": 1.5511740754609442, + "learning_rate": 2.008978804545556e-06, + "loss": 0.3581, + "step": 46347 + }, + { + "epoch": 0.8008708874758087, + "grad_norm": 0.9214520983076849, + "learning_rate": 2.008642358384655e-06, + "loss": 0.265, + "step": 46348 + }, + { + "epoch": 0.8008881669892176, + "grad_norm": 1.4196530312536686, + "learning_rate": 2.0083059372532943e-06, + "loss": 0.4099, + "step": 46349 + }, + { + "epoch": 0.8009054465026265, + "grad_norm": 1.3503111628209439, + "learning_rate": 2.007969541152528e-06, + "loss": 0.2514, + "step": 46350 + }, + { + "epoch": 0.8009227260160354, + "grad_norm": 1.557935025993224, + "learning_rate": 2.0076331700834072e-06, + "loss": 0.4314, + "step": 46351 + }, + { + "epoch": 0.8009400055294443, + "grad_norm": 1.8352447115560768, + "learning_rate": 2.0072968240469893e-06, + "loss": 0.326, + "step": 46352 + }, + { + "epoch": 0.8009572850428532, + "grad_norm": 1.2292685950859852, + "learning_rate": 2.006960503044322e-06, + "loss": 0.7358, + "step": 46353 + }, + { + "epoch": 0.8009745645562621, + "grad_norm": 1.7457104307058857, + "learning_rate": 2.006624207076462e-06, + "loss": 0.3958, + "step": 46354 + }, + { + "epoch": 0.8009918440696709, + "grad_norm": 1.0345835419426717, + "learning_rate": 2.006287936144462e-06, + "loss": 0.2863, + "step": 46355 + }, + { + "epoch": 0.8010091235830799, + "grad_norm": 1.0780098107843337, + "learning_rate": 2.005951690249379e-06, + "loss": 0.3363, + "step": 46356 + }, + { + "epoch": 0.8010264030964888, + "grad_norm": 1.0299156430654086, + "learning_rate": 2.0056154693922593e-06, + "loss": 0.4284, + "step": 46357 + }, + { + "epoch": 0.8010436826098977, + "grad_norm": 1.590048440968397, + "learning_rate": 2.005279273574163e-06, + "loss": 0.2707, + "step": 46358 + }, + { + "epoch": 0.8010609621233066, + "grad_norm": 1.1274841634323571, + "learning_rate": 2.0049431027961364e-06, + "loss": 0.7019, + "step": 46359 + }, + { + "epoch": 0.8010782416367155, + "grad_norm": 1.205771882720708, + "learning_rate": 2.0046069570592364e-06, + "loss": 0.2807, + "step": 46360 + }, + { + "epoch": 0.8010955211501244, + "grad_norm": 1.0477445179466907, + "learning_rate": 2.004270836364517e-06, + "loss": 0.2251, + "step": 46361 + }, + { + "epoch": 0.8011128006635333, + "grad_norm": 1.4984203410872603, + "learning_rate": 2.003934740713027e-06, + "loss": 0.2662, + "step": 46362 + }, + { + "epoch": 0.8011300801769422, + "grad_norm": 1.1885712972633213, + "learning_rate": 2.003598670105823e-06, + "loss": 0.3464, + "step": 46363 + }, + { + "epoch": 0.8011473596903511, + "grad_norm": 1.444184301921379, + "learning_rate": 2.0032626245439534e-06, + "loss": 0.2449, + "step": 46364 + }, + { + "epoch": 0.80116463920376, + "grad_norm": 1.9141825318808026, + "learning_rate": 2.0029266040284724e-06, + "loss": 0.4138, + "step": 46365 + }, + { + "epoch": 0.8011819187171689, + "grad_norm": 1.3448965158204682, + "learning_rate": 2.002590608560434e-06, + "loss": 0.1986, + "step": 46366 + }, + { + "epoch": 0.8011991982305778, + "grad_norm": 0.9709726301203803, + "learning_rate": 2.0022546381408915e-06, + "loss": 0.5593, + "step": 46367 + }, + { + "epoch": 0.8012164777439867, + "grad_norm": 1.5037804944308786, + "learning_rate": 2.0019186927708935e-06, + "loss": 0.3153, + "step": 46368 + }, + { + "epoch": 0.8012337572573957, + "grad_norm": 1.5681316704874153, + "learning_rate": 2.001582772451496e-06, + "loss": 0.3654, + "step": 46369 + }, + { + "epoch": 0.8012510367708046, + "grad_norm": 0.938813639380672, + "learning_rate": 2.001246877183749e-06, + "loss": 0.4294, + "step": 46370 + }, + { + "epoch": 0.8012683162842135, + "grad_norm": 1.520185285414732, + "learning_rate": 2.0009110069686987e-06, + "loss": 0.4069, + "step": 46371 + }, + { + "epoch": 0.8012855957976224, + "grad_norm": 1.0495021282646908, + "learning_rate": 2.00057516180741e-06, + "loss": 0.5376, + "step": 46372 + }, + { + "epoch": 0.8013028753110313, + "grad_norm": 1.4766864752410174, + "learning_rate": 2.000239341700924e-06, + "loss": 0.6371, + "step": 46373 + }, + { + "epoch": 0.8013201548244402, + "grad_norm": 0.987295188596217, + "learning_rate": 1.999903546650299e-06, + "loss": 0.2199, + "step": 46374 + }, + { + "epoch": 0.8013374343378491, + "grad_norm": 0.9662977578557077, + "learning_rate": 1.9995677766565813e-06, + "loss": 0.3025, + "step": 46375 + }, + { + "epoch": 0.8013547138512579, + "grad_norm": 1.2179525678835081, + "learning_rate": 1.999232031720829e-06, + "loss": 0.4177, + "step": 46376 + }, + { + "epoch": 0.8013719933646668, + "grad_norm": 1.2179653636314116, + "learning_rate": 1.998896311844084e-06, + "loss": 0.4552, + "step": 46377 + }, + { + "epoch": 0.8013892728780757, + "grad_norm": 1.0548370523671766, + "learning_rate": 1.9985606170274095e-06, + "loss": 0.2207, + "step": 46378 + }, + { + "epoch": 0.8014065523914846, + "grad_norm": 0.6968285396187728, + "learning_rate": 1.9982249472718486e-06, + "loss": 0.6206, + "step": 46379 + }, + { + "epoch": 0.8014238319048935, + "grad_norm": 1.5364233213610372, + "learning_rate": 1.9978893025784573e-06, + "loss": 0.603, + "step": 46380 + }, + { + "epoch": 0.8014411114183024, + "grad_norm": 1.287725756785313, + "learning_rate": 1.9975536829482856e-06, + "loss": 0.2738, + "step": 46381 + }, + { + "epoch": 0.8014583909317113, + "grad_norm": 2.4561304050271957, + "learning_rate": 1.99721808838238e-06, + "loss": 0.3831, + "step": 46382 + }, + { + "epoch": 0.8014756704451202, + "grad_norm": 2.6779104937189695, + "learning_rate": 1.9968825188817962e-06, + "loss": 0.3062, + "step": 46383 + }, + { + "epoch": 0.8014929499585292, + "grad_norm": 1.1672701601197353, + "learning_rate": 1.996546974447585e-06, + "loss": 0.3156, + "step": 46384 + }, + { + "epoch": 0.8015102294719381, + "grad_norm": 1.0016013272914701, + "learning_rate": 1.9962114550808e-06, + "loss": 0.4362, + "step": 46385 + }, + { + "epoch": 0.801527508985347, + "grad_norm": 1.1966174815843447, + "learning_rate": 1.995875960782485e-06, + "loss": 0.2795, + "step": 46386 + }, + { + "epoch": 0.8015447884987559, + "grad_norm": 1.1128533959505147, + "learning_rate": 1.995540491553698e-06, + "loss": 0.1843, + "step": 46387 + }, + { + "epoch": 0.8015620680121648, + "grad_norm": 1.0877765372001416, + "learning_rate": 1.995205047395484e-06, + "loss": 0.3212, + "step": 46388 + }, + { + "epoch": 0.8015793475255737, + "grad_norm": 1.8302959133506884, + "learning_rate": 1.9948696283088965e-06, + "loss": 0.484, + "step": 46389 + }, + { + "epoch": 0.8015966270389826, + "grad_norm": 1.8070008782924591, + "learning_rate": 1.994534234294988e-06, + "loss": 0.4046, + "step": 46390 + }, + { + "epoch": 0.8016139065523915, + "grad_norm": 0.8180305066279558, + "learning_rate": 1.994198865354804e-06, + "loss": 0.3948, + "step": 46391 + }, + { + "epoch": 0.8016311860658004, + "grad_norm": 1.477654051219907, + "learning_rate": 1.9938635214893998e-06, + "loss": 0.4583, + "step": 46392 + }, + { + "epoch": 0.8016484655792093, + "grad_norm": 1.123889020883715, + "learning_rate": 1.9935282026998217e-06, + "loss": 0.3513, + "step": 46393 + }, + { + "epoch": 0.8016657450926182, + "grad_norm": 1.4940460807619664, + "learning_rate": 1.9931929089871203e-06, + "loss": 0.2193, + "step": 46394 + }, + { + "epoch": 0.8016830246060271, + "grad_norm": 1.7088744039736088, + "learning_rate": 1.9928576403523493e-06, + "loss": 0.2518, + "step": 46395 + }, + { + "epoch": 0.801700304119436, + "grad_norm": 1.1329456907133602, + "learning_rate": 1.9925223967965577e-06, + "loss": 0.2917, + "step": 46396 + }, + { + "epoch": 0.801717583632845, + "grad_norm": 1.9259445043180556, + "learning_rate": 1.9921871783207926e-06, + "loss": 0.5166, + "step": 46397 + }, + { + "epoch": 0.8017348631462538, + "grad_norm": 1.0255400636062804, + "learning_rate": 1.9918519849261075e-06, + "loss": 0.325, + "step": 46398 + }, + { + "epoch": 0.8017521426596627, + "grad_norm": 1.451880931164025, + "learning_rate": 1.991516816613549e-06, + "loss": 0.3638, + "step": 46399 + }, + { + "epoch": 0.8017694221730716, + "grad_norm": 2.1432493429223487, + "learning_rate": 1.9911816733841684e-06, + "loss": 0.3324, + "step": 46400 + }, + { + "epoch": 0.8017867016864805, + "grad_norm": 1.397914664609878, + "learning_rate": 1.9908465552390175e-06, + "loss": 0.4359, + "step": 46401 + }, + { + "epoch": 0.8018039811998894, + "grad_norm": 1.5387676674638264, + "learning_rate": 1.990511462179142e-06, + "loss": 0.3181, + "step": 46402 + }, + { + "epoch": 0.8018212607132983, + "grad_norm": 1.1812514291364744, + "learning_rate": 1.990176394205595e-06, + "loss": 0.4312, + "step": 46403 + }, + { + "epoch": 0.8018385402267072, + "grad_norm": 1.3604099659687703, + "learning_rate": 1.9898413513194226e-06, + "loss": 0.4746, + "step": 46404 + }, + { + "epoch": 0.8018558197401161, + "grad_norm": 1.2909926761827952, + "learning_rate": 1.9895063335216746e-06, + "loss": 0.3936, + "step": 46405 + }, + { + "epoch": 0.801873099253525, + "grad_norm": 2.0657064831489107, + "learning_rate": 1.989171340813403e-06, + "loss": 0.464, + "step": 46406 + }, + { + "epoch": 0.8018903787669339, + "grad_norm": 0.9691842964209885, + "learning_rate": 1.9888363731956574e-06, + "loss": 0.4355, + "step": 46407 + }, + { + "epoch": 0.8019076582803428, + "grad_norm": 1.0504270638947646, + "learning_rate": 1.9885014306694826e-06, + "loss": 0.4475, + "step": 46408 + }, + { + "epoch": 0.8019249377937517, + "grad_norm": 0.8273784780729527, + "learning_rate": 1.988166513235933e-06, + "loss": 0.5448, + "step": 46409 + }, + { + "epoch": 0.8019422173071606, + "grad_norm": 1.4503152454360917, + "learning_rate": 1.9878316208960514e-06, + "loss": 0.4285, + "step": 46410 + }, + { + "epoch": 0.8019594968205696, + "grad_norm": 1.9827337158053202, + "learning_rate": 1.9874967536508906e-06, + "loss": 0.3165, + "step": 46411 + }, + { + "epoch": 0.8019767763339785, + "grad_norm": 1.648791678448693, + "learning_rate": 1.9871619115015017e-06, + "loss": 0.3455, + "step": 46412 + }, + { + "epoch": 0.8019940558473874, + "grad_norm": 1.0936925016194812, + "learning_rate": 1.9868270944489275e-06, + "loss": 0.3319, + "step": 46413 + }, + { + "epoch": 0.8020113353607963, + "grad_norm": 1.2402872736463884, + "learning_rate": 1.9864923024942227e-06, + "loss": 0.2538, + "step": 46414 + }, + { + "epoch": 0.8020286148742052, + "grad_norm": 1.3693863205444672, + "learning_rate": 1.98615753563843e-06, + "loss": 0.2206, + "step": 46415 + }, + { + "epoch": 0.8020458943876141, + "grad_norm": 1.6992262574609442, + "learning_rate": 1.9858227938826046e-06, + "loss": 0.2625, + "step": 46416 + }, + { + "epoch": 0.802063173901023, + "grad_norm": 0.9324792237752328, + "learning_rate": 1.985488077227785e-06, + "loss": 0.1774, + "step": 46417 + }, + { + "epoch": 0.8020804534144319, + "grad_norm": 1.3340872141300655, + "learning_rate": 1.9851533856750317e-06, + "loss": 0.3332, + "step": 46418 + }, + { + "epoch": 0.8020977329278407, + "grad_norm": 0.8759557258540326, + "learning_rate": 1.9848187192253843e-06, + "loss": 0.498, + "step": 46419 + }, + { + "epoch": 0.8021150124412496, + "grad_norm": 1.8273839045033549, + "learning_rate": 1.9844840778798957e-06, + "loss": 0.3097, + "step": 46420 + }, + { + "epoch": 0.8021322919546585, + "grad_norm": 1.3628632943066117, + "learning_rate": 1.9841494616396127e-06, + "loss": 0.3873, + "step": 46421 + }, + { + "epoch": 0.8021495714680674, + "grad_norm": 0.9993047070952019, + "learning_rate": 1.9838148705055803e-06, + "loss": 0.4326, + "step": 46422 + }, + { + "epoch": 0.8021668509814763, + "grad_norm": 1.1732111140825963, + "learning_rate": 1.9834803044788496e-06, + "loss": 0.3089, + "step": 46423 + }, + { + "epoch": 0.8021841304948852, + "grad_norm": 1.4776015502528126, + "learning_rate": 1.9831457635604667e-06, + "loss": 0.2114, + "step": 46424 + }, + { + "epoch": 0.8022014100082941, + "grad_norm": 1.5618739632606555, + "learning_rate": 1.982811247751484e-06, + "loss": 0.3518, + "step": 46425 + }, + { + "epoch": 0.802218689521703, + "grad_norm": 1.5231300031970643, + "learning_rate": 1.9824767570529425e-06, + "loss": 0.7207, + "step": 46426 + }, + { + "epoch": 0.802235969035112, + "grad_norm": 1.2659163661977837, + "learning_rate": 1.982142291465896e-06, + "loss": 0.3555, + "step": 46427 + }, + { + "epoch": 0.8022532485485209, + "grad_norm": 1.0502652937434815, + "learning_rate": 1.981807850991386e-06, + "loss": 0.3074, + "step": 46428 + }, + { + "epoch": 0.8022705280619298, + "grad_norm": 1.0912803033946532, + "learning_rate": 1.9814734356304634e-06, + "loss": 0.458, + "step": 46429 + }, + { + "epoch": 0.8022878075753387, + "grad_norm": 2.221209980070183, + "learning_rate": 1.981139045384176e-06, + "loss": 0.3567, + "step": 46430 + }, + { + "epoch": 0.8023050870887476, + "grad_norm": 1.254060178843748, + "learning_rate": 1.980804680253573e-06, + "loss": 0.3085, + "step": 46431 + }, + { + "epoch": 0.8023223666021565, + "grad_norm": 1.8423275669095456, + "learning_rate": 1.9804703402396984e-06, + "loss": 0.3414, + "step": 46432 + }, + { + "epoch": 0.8023396461155654, + "grad_norm": 1.5297572289185564, + "learning_rate": 1.9801360253435985e-06, + "loss": 0.4679, + "step": 46433 + }, + { + "epoch": 0.8023569256289743, + "grad_norm": 1.5994267761140444, + "learning_rate": 1.9798017355663213e-06, + "loss": 0.3724, + "step": 46434 + }, + { + "epoch": 0.8023742051423832, + "grad_norm": 0.8905005015758325, + "learning_rate": 1.9794674709089144e-06, + "loss": 0.2624, + "step": 46435 + }, + { + "epoch": 0.8023914846557921, + "grad_norm": 0.5678960734223469, + "learning_rate": 1.9791332313724287e-06, + "loss": 0.7626, + "step": 46436 + }, + { + "epoch": 0.802408764169201, + "grad_norm": 0.5669890542621544, + "learning_rate": 1.9787990169579043e-06, + "loss": 0.6144, + "step": 46437 + }, + { + "epoch": 0.80242604368261, + "grad_norm": 1.3727862174470347, + "learning_rate": 1.978464827666393e-06, + "loss": 0.389, + "step": 46438 + }, + { + "epoch": 0.8024433231960189, + "grad_norm": 1.1231610752515488, + "learning_rate": 1.9781306634989373e-06, + "loss": 0.3627, + "step": 46439 + }, + { + "epoch": 0.8024606027094277, + "grad_norm": 1.6358930375180067, + "learning_rate": 1.9777965244565855e-06, + "loss": 0.3793, + "step": 46440 + }, + { + "epoch": 0.8024778822228366, + "grad_norm": 0.9532890719517623, + "learning_rate": 1.977462410540385e-06, + "loss": 0.571, + "step": 46441 + }, + { + "epoch": 0.8024951617362455, + "grad_norm": 1.2010515171908458, + "learning_rate": 1.977128321751385e-06, + "loss": 0.3597, + "step": 46442 + }, + { + "epoch": 0.8025124412496544, + "grad_norm": 0.7412013930738233, + "learning_rate": 1.9767942580906274e-06, + "loss": 0.2072, + "step": 46443 + }, + { + "epoch": 0.8025297207630633, + "grad_norm": 3.048788962750279, + "learning_rate": 1.9764602195591585e-06, + "loss": 0.2332, + "step": 46444 + }, + { + "epoch": 0.8025470002764722, + "grad_norm": 1.250067021637341, + "learning_rate": 1.9761262061580254e-06, + "loss": 0.5024, + "step": 46445 + }, + { + "epoch": 0.8025642797898811, + "grad_norm": 1.5817950093538176, + "learning_rate": 1.975792217888276e-06, + "loss": 0.405, + "step": 46446 + }, + { + "epoch": 0.80258155930329, + "grad_norm": 1.3839931265126828, + "learning_rate": 1.975458254750957e-06, + "loss": 0.4018, + "step": 46447 + }, + { + "epoch": 0.8025988388166989, + "grad_norm": 1.2105635135331003, + "learning_rate": 1.975124316747109e-06, + "loss": 0.43, + "step": 46448 + }, + { + "epoch": 0.8026161183301078, + "grad_norm": 1.670478245293486, + "learning_rate": 1.9747904038777855e-06, + "loss": 0.4667, + "step": 46449 + }, + { + "epoch": 0.8026333978435167, + "grad_norm": 0.9824239612939634, + "learning_rate": 1.9744565161440254e-06, + "loss": 0.2739, + "step": 46450 + }, + { + "epoch": 0.8026506773569256, + "grad_norm": 1.3095929608479386, + "learning_rate": 1.9741226535468773e-06, + "loss": 0.4074, + "step": 46451 + }, + { + "epoch": 0.8026679568703345, + "grad_norm": 1.36554126938788, + "learning_rate": 1.97378881608739e-06, + "loss": 0.2606, + "step": 46452 + }, + { + "epoch": 0.8026852363837435, + "grad_norm": 1.3437509150738904, + "learning_rate": 1.9734550037666023e-06, + "loss": 0.4167, + "step": 46453 + }, + { + "epoch": 0.8027025158971524, + "grad_norm": 1.3897426266898782, + "learning_rate": 1.9731212165855675e-06, + "loss": 0.2342, + "step": 46454 + }, + { + "epoch": 0.8027197954105613, + "grad_norm": 1.7108088848742349, + "learning_rate": 1.9727874545453228e-06, + "loss": 0.2888, + "step": 46455 + }, + { + "epoch": 0.8027370749239702, + "grad_norm": 1.3448326167831195, + "learning_rate": 1.9724537176469226e-06, + "loss": 0.3523, + "step": 46456 + }, + { + "epoch": 0.8027543544373791, + "grad_norm": 1.7818913117489326, + "learning_rate": 1.9721200058914003e-06, + "loss": 0.3484, + "step": 46457 + }, + { + "epoch": 0.802771633950788, + "grad_norm": 0.9132387329795182, + "learning_rate": 1.9717863192798147e-06, + "loss": 0.2442, + "step": 46458 + }, + { + "epoch": 0.8027889134641969, + "grad_norm": 1.1617188986842335, + "learning_rate": 1.971452657813201e-06, + "loss": 0.4377, + "step": 46459 + }, + { + "epoch": 0.8028061929776058, + "grad_norm": 0.5436166425703053, + "learning_rate": 1.9711190214926114e-06, + "loss": 0.6495, + "step": 46460 + }, + { + "epoch": 0.8028234724910146, + "grad_norm": 1.4975836328207643, + "learning_rate": 1.9707854103190837e-06, + "loss": 0.267, + "step": 46461 + }, + { + "epoch": 0.8028407520044235, + "grad_norm": 1.1265933859518036, + "learning_rate": 1.9704518242936686e-06, + "loss": 0.2771, + "step": 46462 + }, + { + "epoch": 0.8028580315178324, + "grad_norm": 1.4360742121580483, + "learning_rate": 1.9701182634174075e-06, + "loss": 0.2067, + "step": 46463 + }, + { + "epoch": 0.8028753110312413, + "grad_norm": 0.8690356494461301, + "learning_rate": 1.9697847276913453e-06, + "loss": 0.3032, + "step": 46464 + }, + { + "epoch": 0.8028925905446502, + "grad_norm": 0.9299265706825187, + "learning_rate": 1.9694512171165305e-06, + "loss": 0.3029, + "step": 46465 + }, + { + "epoch": 0.8029098700580591, + "grad_norm": 1.6879852425208457, + "learning_rate": 1.969117731694001e-06, + "loss": 0.3628, + "step": 46466 + }, + { + "epoch": 0.802927149571468, + "grad_norm": 1.2062212144800535, + "learning_rate": 1.9687842714248085e-06, + "loss": 0.3534, + "step": 46467 + }, + { + "epoch": 0.802944429084877, + "grad_norm": 1.4024832921942514, + "learning_rate": 1.968450836309992e-06, + "loss": 0.3279, + "step": 46468 + }, + { + "epoch": 0.8029617085982859, + "grad_norm": 1.0762468334901951, + "learning_rate": 1.9681174263505977e-06, + "loss": 0.2558, + "step": 46469 + }, + { + "epoch": 0.8029789881116948, + "grad_norm": 1.7502298329285926, + "learning_rate": 1.967784041547669e-06, + "loss": 0.2411, + "step": 46470 + }, + { + "epoch": 0.8029962676251037, + "grad_norm": 1.2326726576702167, + "learning_rate": 1.9674506819022543e-06, + "loss": 0.549, + "step": 46471 + }, + { + "epoch": 0.8030135471385126, + "grad_norm": 1.3174321624553071, + "learning_rate": 1.967117347415395e-06, + "loss": 0.3887, + "step": 46472 + }, + { + "epoch": 0.8030308266519215, + "grad_norm": 1.0587644612581126, + "learning_rate": 1.966784038088132e-06, + "loss": 0.376, + "step": 46473 + }, + { + "epoch": 0.8030481061653304, + "grad_norm": 2.2597551046393702, + "learning_rate": 1.966450753921512e-06, + "loss": 0.2248, + "step": 46474 + }, + { + "epoch": 0.8030653856787393, + "grad_norm": 0.8283243079774479, + "learning_rate": 1.966117494916577e-06, + "loss": 0.2566, + "step": 46475 + }, + { + "epoch": 0.8030826651921482, + "grad_norm": 1.039711055802091, + "learning_rate": 1.965784261074377e-06, + "loss": 0.238, + "step": 46476 + }, + { + "epoch": 0.8030999447055571, + "grad_norm": 1.4054328006997734, + "learning_rate": 1.965451052395948e-06, + "loss": 0.4183, + "step": 46477 + }, + { + "epoch": 0.803117224218966, + "grad_norm": 0.9107461439123492, + "learning_rate": 1.9651178688823403e-06, + "loss": 0.6776, + "step": 46478 + }, + { + "epoch": 0.8031345037323749, + "grad_norm": 0.9601490091015443, + "learning_rate": 1.964784710534591e-06, + "loss": 0.1729, + "step": 46479 + }, + { + "epoch": 0.8031517832457838, + "grad_norm": 1.2607204262631817, + "learning_rate": 1.9644515773537455e-06, + "loss": 0.3751, + "step": 46480 + }, + { + "epoch": 0.8031690627591928, + "grad_norm": 1.8428999612091994, + "learning_rate": 1.96411846934085e-06, + "loss": 0.2486, + "step": 46481 + }, + { + "epoch": 0.8031863422726016, + "grad_norm": 1.55881800199721, + "learning_rate": 1.963785386496947e-06, + "loss": 0.4931, + "step": 46482 + }, + { + "epoch": 0.8032036217860105, + "grad_norm": 1.202827549382366, + "learning_rate": 1.96345232882308e-06, + "loss": 0.3035, + "step": 46483 + }, + { + "epoch": 0.8032209012994194, + "grad_norm": 1.1430117211066617, + "learning_rate": 1.9631192963202894e-06, + "loss": 0.3386, + "step": 46484 + }, + { + "epoch": 0.8032381808128283, + "grad_norm": 1.305701513045417, + "learning_rate": 1.9627862889896186e-06, + "loss": 0.276, + "step": 46485 + }, + { + "epoch": 0.8032554603262372, + "grad_norm": 1.738079334515706, + "learning_rate": 1.9624533068321117e-06, + "loss": 0.1971, + "step": 46486 + }, + { + "epoch": 0.8032727398396461, + "grad_norm": 1.5093188773095194, + "learning_rate": 1.962120349848815e-06, + "loss": 0.4393, + "step": 46487 + }, + { + "epoch": 0.803290019353055, + "grad_norm": 1.3319408063498765, + "learning_rate": 1.961787418040766e-06, + "loss": 0.2768, + "step": 46488 + }, + { + "epoch": 0.8033072988664639, + "grad_norm": 1.9272440135871094, + "learning_rate": 1.9614545114090123e-06, + "loss": 0.4978, + "step": 46489 + }, + { + "epoch": 0.8033245783798728, + "grad_norm": 1.4717806401348514, + "learning_rate": 1.961121629954591e-06, + "loss": 0.2958, + "step": 46490 + }, + { + "epoch": 0.8033418578932817, + "grad_norm": 1.1975323476175899, + "learning_rate": 1.960788773678548e-06, + "loss": 0.3897, + "step": 46491 + }, + { + "epoch": 0.8033591374066906, + "grad_norm": 1.4163204255845787, + "learning_rate": 1.960455942581925e-06, + "loss": 0.2895, + "step": 46492 + }, + { + "epoch": 0.8033764169200995, + "grad_norm": 1.1259353428543772, + "learning_rate": 1.960123136665768e-06, + "loss": 0.3564, + "step": 46493 + }, + { + "epoch": 0.8033936964335084, + "grad_norm": 1.0588804003469263, + "learning_rate": 1.959790355931116e-06, + "loss": 0.3429, + "step": 46494 + }, + { + "epoch": 0.8034109759469173, + "grad_norm": 1.0993219766794193, + "learning_rate": 1.959457600379009e-06, + "loss": 0.2486, + "step": 46495 + }, + { + "epoch": 0.8034282554603263, + "grad_norm": 1.832293649673027, + "learning_rate": 1.959124870010495e-06, + "loss": 0.2717, + "step": 46496 + }, + { + "epoch": 0.8034455349737352, + "grad_norm": 2.561715116721486, + "learning_rate": 1.9587921648266073e-06, + "loss": 0.2974, + "step": 46497 + }, + { + "epoch": 0.8034628144871441, + "grad_norm": 2.0157090294246043, + "learning_rate": 1.958459484828399e-06, + "loss": 0.3422, + "step": 46498 + }, + { + "epoch": 0.803480094000553, + "grad_norm": 1.279206782447757, + "learning_rate": 1.9581268300169043e-06, + "loss": 0.4743, + "step": 46499 + }, + { + "epoch": 0.8034973735139619, + "grad_norm": 1.208446249407474, + "learning_rate": 1.9577942003931705e-06, + "loss": 0.2831, + "step": 46500 + }, + { + "epoch": 0.8035146530273708, + "grad_norm": 1.1321143888134764, + "learning_rate": 1.957461595958233e-06, + "loss": 0.3986, + "step": 46501 + }, + { + "epoch": 0.8035319325407797, + "grad_norm": 1.2492132896076962, + "learning_rate": 1.9571290167131396e-06, + "loss": 0.09, + "step": 46502 + }, + { + "epoch": 0.8035492120541885, + "grad_norm": 1.1411410068248022, + "learning_rate": 1.9567964626589274e-06, + "loss": 0.3065, + "step": 46503 + }, + { + "epoch": 0.8035664915675974, + "grad_norm": 2.9803343260869273, + "learning_rate": 1.9564639337966397e-06, + "loss": 0.3987, + "step": 46504 + }, + { + "epoch": 0.8035837710810063, + "grad_norm": 1.0308131259744768, + "learning_rate": 1.9561314301273206e-06, + "loss": 0.3501, + "step": 46505 + }, + { + "epoch": 0.8036010505944152, + "grad_norm": 1.6953661926928552, + "learning_rate": 1.955798951652007e-06, + "loss": 0.271, + "step": 46506 + }, + { + "epoch": 0.8036183301078241, + "grad_norm": 1.6513746886244347, + "learning_rate": 1.955466498371744e-06, + "loss": 0.388, + "step": 46507 + }, + { + "epoch": 0.803635609621233, + "grad_norm": 1.8907189987145128, + "learning_rate": 1.9551340702875696e-06, + "loss": 0.2242, + "step": 46508 + }, + { + "epoch": 0.803652889134642, + "grad_norm": 0.9412654724578674, + "learning_rate": 1.9548016674005264e-06, + "loss": 0.2835, + "step": 46509 + }, + { + "epoch": 0.8036701686480509, + "grad_norm": 1.2562392049721633, + "learning_rate": 1.9544692897116567e-06, + "loss": 0.3149, + "step": 46510 + }, + { + "epoch": 0.8036874481614598, + "grad_norm": 1.44122202285199, + "learning_rate": 1.954136937222002e-06, + "loss": 0.1895, + "step": 46511 + }, + { + "epoch": 0.8037047276748687, + "grad_norm": 1.5677685076241832, + "learning_rate": 1.9538046099326e-06, + "loss": 0.4271, + "step": 46512 + }, + { + "epoch": 0.8037220071882776, + "grad_norm": 1.117655405102607, + "learning_rate": 1.953472307844496e-06, + "loss": 0.3082, + "step": 46513 + }, + { + "epoch": 0.8037392867016865, + "grad_norm": 1.2306363101003759, + "learning_rate": 1.9531400309587245e-06, + "loss": 0.2604, + "step": 46514 + }, + { + "epoch": 0.8037565662150954, + "grad_norm": 0.9902827638486291, + "learning_rate": 1.952807779276331e-06, + "loss": 0.5235, + "step": 46515 + }, + { + "epoch": 0.8037738457285043, + "grad_norm": 1.1423654079962664, + "learning_rate": 1.9524755527983575e-06, + "loss": 0.1727, + "step": 46516 + }, + { + "epoch": 0.8037911252419132, + "grad_norm": 1.3578598089898846, + "learning_rate": 1.9521433515258403e-06, + "loss": 0.4636, + "step": 46517 + }, + { + "epoch": 0.8038084047553221, + "grad_norm": 0.9548213426272479, + "learning_rate": 1.9518111754598234e-06, + "loss": 0.674, + "step": 46518 + }, + { + "epoch": 0.803825684268731, + "grad_norm": 2.3572111354252323, + "learning_rate": 1.951479024601344e-06, + "loss": 0.3923, + "step": 46519 + }, + { + "epoch": 0.8038429637821399, + "grad_norm": 1.0408408059173861, + "learning_rate": 1.951146898951443e-06, + "loss": 0.4662, + "step": 46520 + }, + { + "epoch": 0.8038602432955488, + "grad_norm": 1.1068155590670834, + "learning_rate": 1.9508147985111612e-06, + "loss": 0.4233, + "step": 46521 + }, + { + "epoch": 0.8038775228089577, + "grad_norm": 1.5181404529843854, + "learning_rate": 1.9504827232815426e-06, + "loss": 0.3894, + "step": 46522 + }, + { + "epoch": 0.8038948023223667, + "grad_norm": 1.3766670928826297, + "learning_rate": 1.950150673263621e-06, + "loss": 0.2604, + "step": 46523 + }, + { + "epoch": 0.8039120818357754, + "grad_norm": 1.560556282956899, + "learning_rate": 1.949818648458441e-06, + "loss": 0.4129, + "step": 46524 + }, + { + "epoch": 0.8039293613491844, + "grad_norm": 0.7857357723607928, + "learning_rate": 1.9494866488670394e-06, + "loss": 0.5846, + "step": 46525 + }, + { + "epoch": 0.8039466408625933, + "grad_norm": 1.4074485518590008, + "learning_rate": 1.9491546744904575e-06, + "loss": 0.4833, + "step": 46526 + }, + { + "epoch": 0.8039639203760022, + "grad_norm": 1.5273148280330662, + "learning_rate": 1.9488227253297364e-06, + "loss": 0.2668, + "step": 46527 + }, + { + "epoch": 0.8039811998894111, + "grad_norm": 1.620417354632238, + "learning_rate": 1.948490801385913e-06, + "loss": 0.3516, + "step": 46528 + }, + { + "epoch": 0.80399847940282, + "grad_norm": 1.2100581498795604, + "learning_rate": 1.948158902660031e-06, + "loss": 0.5154, + "step": 46529 + }, + { + "epoch": 0.8040157589162289, + "grad_norm": 0.9702947047207153, + "learning_rate": 1.9478270291531244e-06, + "loss": 0.3114, + "step": 46530 + }, + { + "epoch": 0.8040330384296378, + "grad_norm": 0.6551181617178518, + "learning_rate": 1.9474951808662346e-06, + "loss": 0.2064, + "step": 46531 + }, + { + "epoch": 0.8040503179430467, + "grad_norm": 1.8218517074671519, + "learning_rate": 1.9471633578004022e-06, + "loss": 0.369, + "step": 46532 + }, + { + "epoch": 0.8040675974564556, + "grad_norm": 1.0596337955259807, + "learning_rate": 1.946831559956669e-06, + "loss": 0.2439, + "step": 46533 + }, + { + "epoch": 0.8040848769698645, + "grad_norm": 1.1793178007908685, + "learning_rate": 1.9464997873360714e-06, + "loss": 0.3314, + "step": 46534 + }, + { + "epoch": 0.8041021564832734, + "grad_norm": 1.0551504702540555, + "learning_rate": 1.9461680399396466e-06, + "loss": 0.3893, + "step": 46535 + }, + { + "epoch": 0.8041194359966823, + "grad_norm": 2.1712116562046235, + "learning_rate": 1.9458363177684368e-06, + "loss": 0.2868, + "step": 46536 + }, + { + "epoch": 0.8041367155100912, + "grad_norm": 1.0564684695077902, + "learning_rate": 1.945504620823475e-06, + "loss": 0.2143, + "step": 46537 + }, + { + "epoch": 0.8041539950235002, + "grad_norm": 1.9025750897192308, + "learning_rate": 1.94517294910581e-06, + "loss": 0.3053, + "step": 46538 + }, + { + "epoch": 0.8041712745369091, + "grad_norm": 1.4516681750420894, + "learning_rate": 1.9448413026164737e-06, + "loss": 0.3976, + "step": 46539 + }, + { + "epoch": 0.804188554050318, + "grad_norm": 1.2834349388262998, + "learning_rate": 1.944509681356509e-06, + "loss": 0.4327, + "step": 46540 + }, + { + "epoch": 0.8042058335637269, + "grad_norm": 1.1078077935324093, + "learning_rate": 1.9441780853269488e-06, + "loss": 0.4401, + "step": 46541 + }, + { + "epoch": 0.8042231130771358, + "grad_norm": 0.6235012038196995, + "learning_rate": 1.9438465145288377e-06, + "loss": 0.7167, + "step": 46542 + }, + { + "epoch": 0.8042403925905447, + "grad_norm": 1.1103277806657221, + "learning_rate": 1.9435149689632092e-06, + "loss": 0.3912, + "step": 46543 + }, + { + "epoch": 0.8042576721039536, + "grad_norm": 1.5419965003426184, + "learning_rate": 1.943183448631105e-06, + "loss": 0.3865, + "step": 46544 + }, + { + "epoch": 0.8042749516173625, + "grad_norm": 1.337760634715787, + "learning_rate": 1.9428519535335644e-06, + "loss": 0.3163, + "step": 46545 + }, + { + "epoch": 0.8042922311307713, + "grad_norm": 1.559080278048736, + "learning_rate": 1.9425204836716204e-06, + "loss": 0.3047, + "step": 46546 + }, + { + "epoch": 0.8043095106441802, + "grad_norm": 1.20090346483925, + "learning_rate": 1.9421890390463183e-06, + "loss": 0.5666, + "step": 46547 + }, + { + "epoch": 0.8043267901575891, + "grad_norm": 1.0318711149855941, + "learning_rate": 1.9418576196586893e-06, + "loss": 0.3437, + "step": 46548 + }, + { + "epoch": 0.804344069670998, + "grad_norm": 0.8131216178630524, + "learning_rate": 1.9415262255097755e-06, + "loss": 0.1792, + "step": 46549 + }, + { + "epoch": 0.8043613491844069, + "grad_norm": 0.8287086387953223, + "learning_rate": 1.9411948566006134e-06, + "loss": 0.2895, + "step": 46550 + }, + { + "epoch": 0.8043786286978158, + "grad_norm": 1.1871855580032282, + "learning_rate": 1.940863512932244e-06, + "loss": 0.4215, + "step": 46551 + }, + { + "epoch": 0.8043959082112248, + "grad_norm": 1.2166417125173912, + "learning_rate": 1.9405321945057e-06, + "loss": 0.3134, + "step": 46552 + }, + { + "epoch": 0.8044131877246337, + "grad_norm": 1.9739134972674406, + "learning_rate": 1.9402009013220236e-06, + "loss": 0.2946, + "step": 46553 + }, + { + "epoch": 0.8044304672380426, + "grad_norm": 1.0474309070260732, + "learning_rate": 1.9398696333822487e-06, + "loss": 0.2041, + "step": 46554 + }, + { + "epoch": 0.8044477467514515, + "grad_norm": 1.431582383165801, + "learning_rate": 1.939538390687414e-06, + "loss": 0.4637, + "step": 46555 + }, + { + "epoch": 0.8044650262648604, + "grad_norm": 1.4306816539749374, + "learning_rate": 1.939207173238561e-06, + "loss": 0.3558, + "step": 46556 + }, + { + "epoch": 0.8044823057782693, + "grad_norm": 1.1005319527523973, + "learning_rate": 1.9388759810367208e-06, + "loss": 0.4596, + "step": 46557 + }, + { + "epoch": 0.8044995852916782, + "grad_norm": 2.1827903966456232, + "learning_rate": 1.9385448140829367e-06, + "loss": 0.2405, + "step": 46558 + }, + { + "epoch": 0.8045168648050871, + "grad_norm": 1.085730110633187, + "learning_rate": 1.9382136723782396e-06, + "loss": 0.3246, + "step": 46559 + }, + { + "epoch": 0.804534144318496, + "grad_norm": 1.3992551915544231, + "learning_rate": 1.9378825559236704e-06, + "loss": 0.4119, + "step": 46560 + }, + { + "epoch": 0.8045514238319049, + "grad_norm": 1.2280331122985555, + "learning_rate": 1.937551464720265e-06, + "loss": 0.2749, + "step": 46561 + }, + { + "epoch": 0.8045687033453138, + "grad_norm": 1.280052559771136, + "learning_rate": 1.937220398769065e-06, + "loss": 0.3821, + "step": 46562 + }, + { + "epoch": 0.8045859828587227, + "grad_norm": 2.471249146838478, + "learning_rate": 1.9368893580711e-06, + "loss": 0.311, + "step": 46563 + }, + { + "epoch": 0.8046032623721316, + "grad_norm": 1.6570990021603562, + "learning_rate": 1.9365583426274126e-06, + "loss": 0.7919, + "step": 46564 + }, + { + "epoch": 0.8046205418855406, + "grad_norm": 0.9614143615887687, + "learning_rate": 1.9362273524390353e-06, + "loss": 0.3207, + "step": 46565 + }, + { + "epoch": 0.8046378213989495, + "grad_norm": 2.2347567129385304, + "learning_rate": 1.935896387507007e-06, + "loss": 0.3679, + "step": 46566 + }, + { + "epoch": 0.8046551009123583, + "grad_norm": 0.8400263727916476, + "learning_rate": 1.935565447832366e-06, + "loss": 0.2282, + "step": 46567 + }, + { + "epoch": 0.8046723804257672, + "grad_norm": 1.0303401957364005, + "learning_rate": 1.9352345334161447e-06, + "loss": 0.2344, + "step": 46568 + }, + { + "epoch": 0.8046896599391761, + "grad_norm": 0.7896739875551335, + "learning_rate": 1.934903644259384e-06, + "loss": 0.2595, + "step": 46569 + }, + { + "epoch": 0.804706939452585, + "grad_norm": 1.103623622456486, + "learning_rate": 1.9345727803631154e-06, + "loss": 0.2655, + "step": 46570 + }, + { + "epoch": 0.8047242189659939, + "grad_norm": 1.6364111015075362, + "learning_rate": 1.9342419417283785e-06, + "loss": 0.3729, + "step": 46571 + }, + { + "epoch": 0.8047414984794028, + "grad_norm": 1.5501438810510835, + "learning_rate": 1.9339111283562082e-06, + "loss": 0.3127, + "step": 46572 + }, + { + "epoch": 0.8047587779928117, + "grad_norm": 1.5936123250635346, + "learning_rate": 1.933580340247644e-06, + "loss": 0.3709, + "step": 46573 + }, + { + "epoch": 0.8047760575062206, + "grad_norm": 1.5824184853489414, + "learning_rate": 1.9332495774037164e-06, + "loss": 0.49, + "step": 46574 + }, + { + "epoch": 0.8047933370196295, + "grad_norm": 0.6237284070026334, + "learning_rate": 1.932918839825467e-06, + "loss": 0.3191, + "step": 46575 + }, + { + "epoch": 0.8048106165330384, + "grad_norm": 1.4663774693738414, + "learning_rate": 1.932588127513929e-06, + "loss": 0.5929, + "step": 46576 + }, + { + "epoch": 0.8048278960464473, + "grad_norm": 1.0848382626798416, + "learning_rate": 1.9322574404701324e-06, + "loss": 0.2531, + "step": 46577 + }, + { + "epoch": 0.8048451755598562, + "grad_norm": 0.9052898996043184, + "learning_rate": 1.9319267786951244e-06, + "loss": 0.2815, + "step": 46578 + }, + { + "epoch": 0.8048624550732651, + "grad_norm": 1.3262754360959041, + "learning_rate": 1.931596142189932e-06, + "loss": 0.5031, + "step": 46579 + }, + { + "epoch": 0.8048797345866741, + "grad_norm": 1.3250424189624037, + "learning_rate": 1.931265530955596e-06, + "loss": 0.5004, + "step": 46580 + }, + { + "epoch": 0.804897014100083, + "grad_norm": 2.0135601612446474, + "learning_rate": 1.930934944993147e-06, + "loss": 0.3279, + "step": 46581 + }, + { + "epoch": 0.8049142936134919, + "grad_norm": 1.1531644579904192, + "learning_rate": 1.9306043843036257e-06, + "loss": 0.2474, + "step": 46582 + }, + { + "epoch": 0.8049315731269008, + "grad_norm": 1.13040821255032, + "learning_rate": 1.9302738488880623e-06, + "loss": 0.2384, + "step": 46583 + }, + { + "epoch": 0.8049488526403097, + "grad_norm": 1.8707837708359214, + "learning_rate": 1.9299433387474954e-06, + "loss": 0.2338, + "step": 46584 + }, + { + "epoch": 0.8049661321537186, + "grad_norm": 1.5401761312514328, + "learning_rate": 1.9296128538829574e-06, + "loss": 0.2806, + "step": 46585 + }, + { + "epoch": 0.8049834116671275, + "grad_norm": 0.9807453296544681, + "learning_rate": 1.9292823942954885e-06, + "loss": 0.2861, + "step": 46586 + }, + { + "epoch": 0.8050006911805364, + "grad_norm": 1.4048239613394895, + "learning_rate": 1.9289519599861217e-06, + "loss": 0.2836, + "step": 46587 + }, + { + "epoch": 0.8050179706939452, + "grad_norm": 0.9933495229718059, + "learning_rate": 1.928621550955887e-06, + "loss": 0.1603, + "step": 46588 + }, + { + "epoch": 0.8050352502073541, + "grad_norm": 1.4017471107759552, + "learning_rate": 1.9282911672058223e-06, + "loss": 0.3908, + "step": 46589 + }, + { + "epoch": 0.805052529720763, + "grad_norm": 1.0970686027697398, + "learning_rate": 1.927960808736965e-06, + "loss": 0.5049, + "step": 46590 + }, + { + "epoch": 0.8050698092341719, + "grad_norm": 1.5324067942049466, + "learning_rate": 1.9276304755503496e-06, + "loss": 0.4551, + "step": 46591 + }, + { + "epoch": 0.8050870887475808, + "grad_norm": 1.0333998733547565, + "learning_rate": 1.927300167647006e-06, + "loss": 0.6443, + "step": 46592 + }, + { + "epoch": 0.8051043682609897, + "grad_norm": 0.922366001702905, + "learning_rate": 1.926969885027974e-06, + "loss": 0.3533, + "step": 46593 + }, + { + "epoch": 0.8051216477743987, + "grad_norm": 1.8093816997580334, + "learning_rate": 1.9266396276942843e-06, + "loss": 0.3818, + "step": 46594 + }, + { + "epoch": 0.8051389272878076, + "grad_norm": 0.8375460655434116, + "learning_rate": 1.9263093956469726e-06, + "loss": 0.1126, + "step": 46595 + }, + { + "epoch": 0.8051562068012165, + "grad_norm": 1.2268603437573329, + "learning_rate": 1.925979188887076e-06, + "loss": 0.3923, + "step": 46596 + }, + { + "epoch": 0.8051734863146254, + "grad_norm": 1.3104853522789806, + "learning_rate": 1.9256490074156232e-06, + "loss": 0.3677, + "step": 46597 + }, + { + "epoch": 0.8051907658280343, + "grad_norm": 1.1330146674673789, + "learning_rate": 1.925318851233654e-06, + "loss": 0.498, + "step": 46598 + }, + { + "epoch": 0.8052080453414432, + "grad_norm": 2.219368436066258, + "learning_rate": 1.924988720342198e-06, + "loss": 0.379, + "step": 46599 + }, + { + "epoch": 0.8052253248548521, + "grad_norm": 0.936388817496178, + "learning_rate": 1.9246586147422897e-06, + "loss": 0.3738, + "step": 46600 + }, + { + "epoch": 0.805242604368261, + "grad_norm": 2.2704451315105576, + "learning_rate": 1.924328534434966e-06, + "loss": 0.336, + "step": 46601 + }, + { + "epoch": 0.8052598838816699, + "grad_norm": 2.1531986262436797, + "learning_rate": 1.9239984794212606e-06, + "loss": 0.2415, + "step": 46602 + }, + { + "epoch": 0.8052771633950788, + "grad_norm": 1.3971611831479964, + "learning_rate": 1.9236684497022027e-06, + "loss": 0.3273, + "step": 46603 + }, + { + "epoch": 0.8052944429084877, + "grad_norm": 1.426864594362045, + "learning_rate": 1.923338445278833e-06, + "loss": 0.319, + "step": 46604 + }, + { + "epoch": 0.8053117224218966, + "grad_norm": 1.0836473522985361, + "learning_rate": 1.9230084661521763e-06, + "loss": 0.3278, + "step": 46605 + }, + { + "epoch": 0.8053290019353055, + "grad_norm": 1.4398847457719839, + "learning_rate": 1.922678512323273e-06, + "loss": 0.3661, + "step": 46606 + }, + { + "epoch": 0.8053462814487145, + "grad_norm": 0.9689727628478696, + "learning_rate": 1.9223485837931565e-06, + "loss": 0.4583, + "step": 46607 + }, + { + "epoch": 0.8053635609621234, + "grad_norm": 1.5032193528221756, + "learning_rate": 1.922018680562855e-06, + "loss": 0.3121, + "step": 46608 + }, + { + "epoch": 0.8053808404755322, + "grad_norm": 1.6923025997506849, + "learning_rate": 1.921688802633408e-06, + "loss": 0.4361, + "step": 46609 + }, + { + "epoch": 0.8053981199889411, + "grad_norm": 2.174510985511748, + "learning_rate": 1.9213589500058427e-06, + "loss": 0.2244, + "step": 46610 + }, + { + "epoch": 0.80541539950235, + "grad_norm": 1.3280593564839198, + "learning_rate": 1.921029122681195e-06, + "loss": 0.285, + "step": 46611 + }, + { + "epoch": 0.8054326790157589, + "grad_norm": 1.3508940565410377, + "learning_rate": 1.9206993206604987e-06, + "loss": 0.3792, + "step": 46612 + }, + { + "epoch": 0.8054499585291678, + "grad_norm": 1.5851202022696147, + "learning_rate": 1.9203695439447878e-06, + "loss": 0.2581, + "step": 46613 + }, + { + "epoch": 0.8054672380425767, + "grad_norm": 0.7675838510536084, + "learning_rate": 1.920039792535092e-06, + "loss": 0.1223, + "step": 46614 + }, + { + "epoch": 0.8054845175559856, + "grad_norm": 1.5285432043593243, + "learning_rate": 1.9197100664324475e-06, + "loss": 0.5617, + "step": 46615 + }, + { + "epoch": 0.8055017970693945, + "grad_norm": 1.2336439682135283, + "learning_rate": 1.9193803656378828e-06, + "loss": 0.3746, + "step": 46616 + }, + { + "epoch": 0.8055190765828034, + "grad_norm": 0.7364064086487603, + "learning_rate": 1.919050690152433e-06, + "loss": 0.6476, + "step": 46617 + }, + { + "epoch": 0.8055363560962123, + "grad_norm": 1.2805650514413118, + "learning_rate": 1.9187210399771327e-06, + "loss": 0.2935, + "step": 46618 + }, + { + "epoch": 0.8055536356096212, + "grad_norm": 0.886281277106532, + "learning_rate": 1.91839141511301e-06, + "loss": 0.3247, + "step": 46619 + }, + { + "epoch": 0.8055709151230301, + "grad_norm": 1.2226987676992833, + "learning_rate": 1.918061815561102e-06, + "loss": 0.3082, + "step": 46620 + }, + { + "epoch": 0.805588194636439, + "grad_norm": 1.4714507487978823, + "learning_rate": 1.9177322413224365e-06, + "loss": 0.3711, + "step": 46621 + }, + { + "epoch": 0.805605474149848, + "grad_norm": 1.3067850769805756, + "learning_rate": 1.9174026923980492e-06, + "loss": 0.314, + "step": 46622 + }, + { + "epoch": 0.8056227536632569, + "grad_norm": 1.7594785433376834, + "learning_rate": 1.91707316878897e-06, + "loss": 0.3406, + "step": 46623 + }, + { + "epoch": 0.8056400331766658, + "grad_norm": 2.0053688850488216, + "learning_rate": 1.9167436704962305e-06, + "loss": 0.4118, + "step": 46624 + }, + { + "epoch": 0.8056573126900747, + "grad_norm": 1.6597310122055742, + "learning_rate": 1.916414197520865e-06, + "loss": 0.5366, + "step": 46625 + }, + { + "epoch": 0.8056745922034836, + "grad_norm": 1.1906164641892805, + "learning_rate": 1.916084749863907e-06, + "loss": 0.5362, + "step": 46626 + }, + { + "epoch": 0.8056918717168925, + "grad_norm": 1.5965940388978022, + "learning_rate": 1.9157553275263863e-06, + "loss": 0.2497, + "step": 46627 + }, + { + "epoch": 0.8057091512303014, + "grad_norm": 1.420886043859372, + "learning_rate": 1.9154259305093304e-06, + "loss": 0.4411, + "step": 46628 + }, + { + "epoch": 0.8057264307437103, + "grad_norm": 0.7167482597813457, + "learning_rate": 1.915096558813776e-06, + "loss": 0.2913, + "step": 46629 + }, + { + "epoch": 0.8057437102571191, + "grad_norm": 1.0662343595899906, + "learning_rate": 1.9147672124407535e-06, + "loss": 0.4372, + "step": 46630 + }, + { + "epoch": 0.805760989770528, + "grad_norm": 1.485451611361432, + "learning_rate": 1.9144378913912964e-06, + "loss": 0.2507, + "step": 46631 + }, + { + "epoch": 0.8057782692839369, + "grad_norm": 1.3107972356688546, + "learning_rate": 1.9141085956664328e-06, + "loss": 0.1915, + "step": 46632 + }, + { + "epoch": 0.8057955487973458, + "grad_norm": 2.121631697170011, + "learning_rate": 1.913779325267198e-06, + "loss": 0.3358, + "step": 46633 + }, + { + "epoch": 0.8058128283107547, + "grad_norm": 1.0102756435796472, + "learning_rate": 1.913450080194618e-06, + "loss": 0.3202, + "step": 46634 + }, + { + "epoch": 0.8058301078241636, + "grad_norm": 1.1963695367930167, + "learning_rate": 1.9131208604497264e-06, + "loss": 0.3531, + "step": 46635 + }, + { + "epoch": 0.8058473873375726, + "grad_norm": 1.1359737013220745, + "learning_rate": 1.9127916660335554e-06, + "loss": 0.2399, + "step": 46636 + }, + { + "epoch": 0.8058646668509815, + "grad_norm": 1.0577526482391455, + "learning_rate": 1.9124624969471372e-06, + "loss": 0.4298, + "step": 46637 + }, + { + "epoch": 0.8058819463643904, + "grad_norm": 0.9422066539972581, + "learning_rate": 1.912133353191502e-06, + "loss": 0.3772, + "step": 46638 + }, + { + "epoch": 0.8058992258777993, + "grad_norm": 1.198408648709847, + "learning_rate": 1.9118042347676778e-06, + "loss": 0.3583, + "step": 46639 + }, + { + "epoch": 0.8059165053912082, + "grad_norm": 0.8084299510631753, + "learning_rate": 1.9114751416766965e-06, + "loss": 0.1696, + "step": 46640 + }, + { + "epoch": 0.8059337849046171, + "grad_norm": 1.060293420778546, + "learning_rate": 1.9111460739195897e-06, + "loss": 0.4785, + "step": 46641 + }, + { + "epoch": 0.805951064418026, + "grad_norm": 1.6440058443149859, + "learning_rate": 1.9108170314973905e-06, + "loss": 0.4393, + "step": 46642 + }, + { + "epoch": 0.8059683439314349, + "grad_norm": 1.1797639513528586, + "learning_rate": 1.9104880144111258e-06, + "loss": 0.4577, + "step": 46643 + }, + { + "epoch": 0.8059856234448438, + "grad_norm": 1.1435954193511961, + "learning_rate": 1.9101590226618294e-06, + "loss": 0.459, + "step": 46644 + }, + { + "epoch": 0.8060029029582527, + "grad_norm": 1.0666746102354596, + "learning_rate": 1.9098300562505266e-06, + "loss": 0.3245, + "step": 46645 + }, + { + "epoch": 0.8060201824716616, + "grad_norm": 1.2542235349899304, + "learning_rate": 1.9095011151782517e-06, + "loss": 0.4728, + "step": 46646 + }, + { + "epoch": 0.8060374619850705, + "grad_norm": 0.879499673968422, + "learning_rate": 1.909172199446033e-06, + "loss": 0.296, + "step": 46647 + }, + { + "epoch": 0.8060547414984794, + "grad_norm": 1.0598790001579286, + "learning_rate": 1.9088433090549052e-06, + "loss": 0.2855, + "step": 46648 + }, + { + "epoch": 0.8060720210118884, + "grad_norm": 1.1307298511123829, + "learning_rate": 1.9085144440058957e-06, + "loss": 0.4858, + "step": 46649 + }, + { + "epoch": 0.8060893005252973, + "grad_norm": 1.5347990667762497, + "learning_rate": 1.90818560430003e-06, + "loss": 0.4396, + "step": 46650 + }, + { + "epoch": 0.806106580038706, + "grad_norm": 3.158365115174999, + "learning_rate": 1.9078567899383425e-06, + "loss": 0.4576, + "step": 46651 + }, + { + "epoch": 0.806123859552115, + "grad_norm": 1.5310124171121124, + "learning_rate": 1.9075280009218623e-06, + "loss": 0.2555, + "step": 46652 + }, + { + "epoch": 0.8061411390655239, + "grad_norm": 1.8108871427430866, + "learning_rate": 1.9071992372516224e-06, + "loss": 0.3251, + "step": 46653 + }, + { + "epoch": 0.8061584185789328, + "grad_norm": 1.189439010813684, + "learning_rate": 1.906870498928647e-06, + "loss": 0.3587, + "step": 46654 + }, + { + "epoch": 0.8061756980923417, + "grad_norm": 1.3302450508664978, + "learning_rate": 1.9065417859539704e-06, + "loss": 0.3686, + "step": 46655 + }, + { + "epoch": 0.8061929776057506, + "grad_norm": 1.4122645323814065, + "learning_rate": 1.9062130983286176e-06, + "loss": 0.4139, + "step": 46656 + }, + { + "epoch": 0.8062102571191595, + "grad_norm": 0.9854222675472968, + "learning_rate": 1.9058844360536232e-06, + "loss": 0.5759, + "step": 46657 + }, + { + "epoch": 0.8062275366325684, + "grad_norm": 1.3249995227548848, + "learning_rate": 1.9055557991300112e-06, + "loss": 0.3299, + "step": 46658 + }, + { + "epoch": 0.8062448161459773, + "grad_norm": 1.5640056260147805, + "learning_rate": 1.9052271875588135e-06, + "loss": 0.375, + "step": 46659 + }, + { + "epoch": 0.8062620956593862, + "grad_norm": 1.2123225343981914, + "learning_rate": 1.904898601341063e-06, + "loss": 0.6286, + "step": 46660 + }, + { + "epoch": 0.8062793751727951, + "grad_norm": 0.7404584642712105, + "learning_rate": 1.9045700404777811e-06, + "loss": 0.4148, + "step": 46661 + }, + { + "epoch": 0.806296654686204, + "grad_norm": 1.8072836697837558, + "learning_rate": 1.9042415049700047e-06, + "loss": 0.2054, + "step": 46662 + }, + { + "epoch": 0.806313934199613, + "grad_norm": 0.923539434947348, + "learning_rate": 1.9039129948187562e-06, + "loss": 0.4342, + "step": 46663 + }, + { + "epoch": 0.8063312137130219, + "grad_norm": 0.5151319118309194, + "learning_rate": 1.903584510025067e-06, + "loss": 0.511, + "step": 46664 + }, + { + "epoch": 0.8063484932264308, + "grad_norm": 1.0712542856928386, + "learning_rate": 1.9032560505899678e-06, + "loss": 0.2818, + "step": 46665 + }, + { + "epoch": 0.8063657727398397, + "grad_norm": 1.7561450149486173, + "learning_rate": 1.9029276165144872e-06, + "loss": 0.378, + "step": 46666 + }, + { + "epoch": 0.8063830522532486, + "grad_norm": 2.8086239181424726, + "learning_rate": 1.9025992077996514e-06, + "loss": 0.3679, + "step": 46667 + }, + { + "epoch": 0.8064003317666575, + "grad_norm": 1.013798932040801, + "learning_rate": 1.9022708244464916e-06, + "loss": 0.3967, + "step": 46668 + }, + { + "epoch": 0.8064176112800664, + "grad_norm": 1.7243488830805695, + "learning_rate": 1.9019424664560326e-06, + "loss": 0.311, + "step": 46669 + }, + { + "epoch": 0.8064348907934753, + "grad_norm": 0.7509771274297834, + "learning_rate": 1.9016141338293059e-06, + "loss": 0.3112, + "step": 46670 + }, + { + "epoch": 0.8064521703068842, + "grad_norm": 1.4786848126455328, + "learning_rate": 1.9012858265673407e-06, + "loss": 0.329, + "step": 46671 + }, + { + "epoch": 0.806469449820293, + "grad_norm": 0.9105957357976852, + "learning_rate": 1.9009575446711625e-06, + "loss": 0.287, + "step": 46672 + }, + { + "epoch": 0.8064867293337019, + "grad_norm": 1.6303531588755198, + "learning_rate": 1.900629288141802e-06, + "loss": 0.3344, + "step": 46673 + }, + { + "epoch": 0.8065040088471108, + "grad_norm": 1.5079839102748926, + "learning_rate": 1.9003010569802848e-06, + "loss": 0.3645, + "step": 46674 + }, + { + "epoch": 0.8065212883605197, + "grad_norm": 1.2268125836709574, + "learning_rate": 1.899972851187639e-06, + "loss": 0.4299, + "step": 46675 + }, + { + "epoch": 0.8065385678739286, + "grad_norm": 1.106574575995653, + "learning_rate": 1.8996446707648951e-06, + "loss": 0.4409, + "step": 46676 + }, + { + "epoch": 0.8065558473873375, + "grad_norm": 0.9668854179469355, + "learning_rate": 1.899316515713081e-06, + "loss": 0.4477, + "step": 46677 + }, + { + "epoch": 0.8065731269007465, + "grad_norm": 1.11971891799819, + "learning_rate": 1.8989883860332248e-06, + "loss": 0.337, + "step": 46678 + }, + { + "epoch": 0.8065904064141554, + "grad_norm": 0.7883199963530025, + "learning_rate": 1.898660281726349e-06, + "loss": 0.2564, + "step": 46679 + }, + { + "epoch": 0.8066076859275643, + "grad_norm": 1.644253694368182, + "learning_rate": 1.8983322027934848e-06, + "loss": 0.3994, + "step": 46680 + }, + { + "epoch": 0.8066249654409732, + "grad_norm": 0.9831970901228518, + "learning_rate": 1.8980041492356594e-06, + "loss": 0.3066, + "step": 46681 + }, + { + "epoch": 0.8066422449543821, + "grad_norm": 1.2390297767332883, + "learning_rate": 1.897676121053904e-06, + "loss": 0.3764, + "step": 46682 + }, + { + "epoch": 0.806659524467791, + "grad_norm": 2.7844427104572196, + "learning_rate": 1.8973481182492404e-06, + "loss": 0.3743, + "step": 46683 + }, + { + "epoch": 0.8066768039811999, + "grad_norm": 1.4680002519180249, + "learning_rate": 1.8970201408227007e-06, + "loss": 0.3066, + "step": 46684 + }, + { + "epoch": 0.8066940834946088, + "grad_norm": 1.4586651966951394, + "learning_rate": 1.8966921887753064e-06, + "loss": 0.4104, + "step": 46685 + }, + { + "epoch": 0.8067113630080177, + "grad_norm": 1.1181150159535138, + "learning_rate": 1.8963642621080892e-06, + "loss": 0.4469, + "step": 46686 + }, + { + "epoch": 0.8067286425214266, + "grad_norm": 2.0354420629050853, + "learning_rate": 1.8960363608220744e-06, + "loss": 0.5539, + "step": 46687 + }, + { + "epoch": 0.8067459220348355, + "grad_norm": 1.0609463664054766, + "learning_rate": 1.8957084849182917e-06, + "loss": 0.2267, + "step": 46688 + }, + { + "epoch": 0.8067632015482444, + "grad_norm": 0.7118831185357871, + "learning_rate": 1.8953806343977666e-06, + "loss": 0.1079, + "step": 46689 + }, + { + "epoch": 0.8067804810616533, + "grad_norm": 1.1239775289768408, + "learning_rate": 1.8950528092615228e-06, + "loss": 0.3138, + "step": 46690 + }, + { + "epoch": 0.8067977605750623, + "grad_norm": 1.7789849247320682, + "learning_rate": 1.8947250095105885e-06, + "loss": 0.4408, + "step": 46691 + }, + { + "epoch": 0.8068150400884712, + "grad_norm": 1.4502883814730554, + "learning_rate": 1.8943972351459927e-06, + "loss": 0.2388, + "step": 46692 + }, + { + "epoch": 0.8068323196018801, + "grad_norm": 1.1255203797306301, + "learning_rate": 1.8940694861687625e-06, + "loss": 0.4045, + "step": 46693 + }, + { + "epoch": 0.8068495991152889, + "grad_norm": 0.9999754055090007, + "learning_rate": 1.8937417625799203e-06, + "loss": 0.4239, + "step": 46694 + }, + { + "epoch": 0.8068668786286978, + "grad_norm": 0.8295773120610634, + "learning_rate": 1.8934140643804977e-06, + "loss": 0.3797, + "step": 46695 + }, + { + "epoch": 0.8068841581421067, + "grad_norm": 1.743439314199986, + "learning_rate": 1.8930863915715158e-06, + "loss": 0.2383, + "step": 46696 + }, + { + "epoch": 0.8069014376555156, + "grad_norm": 0.8798119403555293, + "learning_rate": 1.8927587441540062e-06, + "loss": 0.2533, + "step": 46697 + }, + { + "epoch": 0.8069187171689245, + "grad_norm": 1.1531609747409217, + "learning_rate": 1.8924311221289871e-06, + "loss": 0.3504, + "step": 46698 + }, + { + "epoch": 0.8069359966823334, + "grad_norm": 1.720409046126992, + "learning_rate": 1.892103525497494e-06, + "loss": 0.2946, + "step": 46699 + }, + { + "epoch": 0.8069532761957423, + "grad_norm": 2.2043465438838927, + "learning_rate": 1.8917759542605495e-06, + "loss": 0.4167, + "step": 46700 + }, + { + "epoch": 0.8069705557091512, + "grad_norm": 1.6404597039585969, + "learning_rate": 1.8914484084191776e-06, + "loss": 0.5009, + "step": 46701 + }, + { + "epoch": 0.8069878352225601, + "grad_norm": 1.100960513440726, + "learning_rate": 1.891120887974407e-06, + "loss": 0.3173, + "step": 46702 + }, + { + "epoch": 0.807005114735969, + "grad_norm": 1.4925258065956155, + "learning_rate": 1.8907933929272592e-06, + "loss": 0.3861, + "step": 46703 + }, + { + "epoch": 0.8070223942493779, + "grad_norm": 0.7788840754136702, + "learning_rate": 1.8904659232787625e-06, + "loss": 0.3062, + "step": 46704 + }, + { + "epoch": 0.8070396737627868, + "grad_norm": 1.05056760361294, + "learning_rate": 1.8901384790299437e-06, + "loss": 0.3419, + "step": 46705 + }, + { + "epoch": 0.8070569532761958, + "grad_norm": 1.7555565931892432, + "learning_rate": 1.8898110601818298e-06, + "loss": 0.4474, + "step": 46706 + }, + { + "epoch": 0.8070742327896047, + "grad_norm": 1.408531795375689, + "learning_rate": 1.8894836667354411e-06, + "loss": 0.3566, + "step": 46707 + }, + { + "epoch": 0.8070915123030136, + "grad_norm": 1.703643841163192, + "learning_rate": 1.8891562986918088e-06, + "loss": 0.3046, + "step": 46708 + }, + { + "epoch": 0.8071087918164225, + "grad_norm": 1.6366378732967497, + "learning_rate": 1.8888289560519524e-06, + "loss": 0.3281, + "step": 46709 + }, + { + "epoch": 0.8071260713298314, + "grad_norm": 1.0748016660707083, + "learning_rate": 1.8885016388169008e-06, + "loss": 0.274, + "step": 46710 + }, + { + "epoch": 0.8071433508432403, + "grad_norm": 1.3891246910278405, + "learning_rate": 1.8881743469876812e-06, + "loss": 0.459, + "step": 46711 + }, + { + "epoch": 0.8071606303566492, + "grad_norm": 0.9266238985897721, + "learning_rate": 1.8878470805653126e-06, + "loss": 0.3385, + "step": 46712 + }, + { + "epoch": 0.8071779098700581, + "grad_norm": 1.3197864624645055, + "learning_rate": 1.8875198395508266e-06, + "loss": 0.4628, + "step": 46713 + }, + { + "epoch": 0.807195189383467, + "grad_norm": 1.155728796515599, + "learning_rate": 1.8871926239452421e-06, + "loss": 0.4861, + "step": 46714 + }, + { + "epoch": 0.8072124688968758, + "grad_norm": 0.5489368081411798, + "learning_rate": 1.8868654337495873e-06, + "loss": 0.4938, + "step": 46715 + }, + { + "epoch": 0.8072297484102847, + "grad_norm": 1.2685188206798823, + "learning_rate": 1.886538268964886e-06, + "loss": 0.3017, + "step": 46716 + }, + { + "epoch": 0.8072470279236936, + "grad_norm": 1.757865433831104, + "learning_rate": 1.886211129592167e-06, + "loss": 0.4561, + "step": 46717 + }, + { + "epoch": 0.8072643074371025, + "grad_norm": 1.1364755864837746, + "learning_rate": 1.8858840156324477e-06, + "loss": 0.2657, + "step": 46718 + }, + { + "epoch": 0.8072815869505114, + "grad_norm": 1.7964464747216269, + "learning_rate": 1.8855569270867592e-06, + "loss": 0.6184, + "step": 46719 + }, + { + "epoch": 0.8072988664639204, + "grad_norm": 1.1961856272689209, + "learning_rate": 1.8852298639561206e-06, + "loss": 0.518, + "step": 46720 + }, + { + "epoch": 0.8073161459773293, + "grad_norm": 1.319291383401779, + "learning_rate": 1.8849028262415592e-06, + "loss": 0.3201, + "step": 46721 + }, + { + "epoch": 0.8073334254907382, + "grad_norm": 2.015341305969721, + "learning_rate": 1.8845758139441006e-06, + "loss": 0.31, + "step": 46722 + }, + { + "epoch": 0.8073507050041471, + "grad_norm": 0.9605574091293694, + "learning_rate": 1.8842488270647653e-06, + "loss": 0.395, + "step": 46723 + }, + { + "epoch": 0.807367984517556, + "grad_norm": 1.466972151140978, + "learning_rate": 1.8839218656045821e-06, + "loss": 0.3952, + "step": 46724 + }, + { + "epoch": 0.8073852640309649, + "grad_norm": 1.6871593354783534, + "learning_rate": 1.8835949295645695e-06, + "loss": 0.3286, + "step": 46725 + }, + { + "epoch": 0.8074025435443738, + "grad_norm": 1.374698940750332, + "learning_rate": 1.8832680189457543e-06, + "loss": 0.2499, + "step": 46726 + }, + { + "epoch": 0.8074198230577827, + "grad_norm": 1.153626982364689, + "learning_rate": 1.882941133749161e-06, + "loss": 0.3585, + "step": 46727 + }, + { + "epoch": 0.8074371025711916, + "grad_norm": 1.1254798031320061, + "learning_rate": 1.8826142739758145e-06, + "loss": 0.4865, + "step": 46728 + }, + { + "epoch": 0.8074543820846005, + "grad_norm": 2.360265133415742, + "learning_rate": 1.8822874396267354e-06, + "loss": 0.6366, + "step": 46729 + }, + { + "epoch": 0.8074716615980094, + "grad_norm": 1.2756603309035985, + "learning_rate": 1.8819606307029514e-06, + "loss": 0.4886, + "step": 46730 + }, + { + "epoch": 0.8074889411114183, + "grad_norm": 1.2985380761281227, + "learning_rate": 1.8816338472054806e-06, + "loss": 0.3552, + "step": 46731 + }, + { + "epoch": 0.8075062206248272, + "grad_norm": 1.7104135018359063, + "learning_rate": 1.8813070891353502e-06, + "loss": 0.3558, + "step": 46732 + }, + { + "epoch": 0.8075235001382361, + "grad_norm": 1.0775929417458747, + "learning_rate": 1.8809803564935858e-06, + "loss": 0.5641, + "step": 46733 + }, + { + "epoch": 0.8075407796516451, + "grad_norm": 1.04825055579797, + "learning_rate": 1.8806536492812045e-06, + "loss": 0.3886, + "step": 46734 + }, + { + "epoch": 0.807558059165054, + "grad_norm": 1.307592833008463, + "learning_rate": 1.8803269674992365e-06, + "loss": 0.241, + "step": 46735 + }, + { + "epoch": 0.8075753386784628, + "grad_norm": 1.1862124182796747, + "learning_rate": 1.8800003111486974e-06, + "loss": 0.3727, + "step": 46736 + }, + { + "epoch": 0.8075926181918717, + "grad_norm": 1.4417397721937657, + "learning_rate": 1.879673680230618e-06, + "loss": 0.4676, + "step": 46737 + }, + { + "epoch": 0.8076098977052806, + "grad_norm": 1.6987783856487704, + "learning_rate": 1.8793470747460129e-06, + "loss": 0.2752, + "step": 46738 + }, + { + "epoch": 0.8076271772186895, + "grad_norm": 0.702825243912912, + "learning_rate": 1.8790204946959146e-06, + "loss": 0.2434, + "step": 46739 + }, + { + "epoch": 0.8076444567320984, + "grad_norm": 1.462331364471855, + "learning_rate": 1.878693940081342e-06, + "loss": 0.3039, + "step": 46740 + }, + { + "epoch": 0.8076617362455073, + "grad_norm": 1.0242805577213245, + "learning_rate": 1.8783674109033134e-06, + "loss": 0.3033, + "step": 46741 + }, + { + "epoch": 0.8076790157589162, + "grad_norm": 1.1284232064532802, + "learning_rate": 1.8780409071628591e-06, + "loss": 0.4525, + "step": 46742 + }, + { + "epoch": 0.8076962952723251, + "grad_norm": 1.518578769914518, + "learning_rate": 1.8777144288609949e-06, + "loss": 0.1998, + "step": 46743 + }, + { + "epoch": 0.807713574785734, + "grad_norm": 1.1448447397695463, + "learning_rate": 1.8773879759987456e-06, + "loss": 0.4221, + "step": 46744 + }, + { + "epoch": 0.8077308542991429, + "grad_norm": 0.6596363689368412, + "learning_rate": 1.8770615485771349e-06, + "loss": 0.5861, + "step": 46745 + }, + { + "epoch": 0.8077481338125518, + "grad_norm": 0.9014032352271245, + "learning_rate": 1.8767351465971883e-06, + "loss": 0.3451, + "step": 46746 + }, + { + "epoch": 0.8077654133259607, + "grad_norm": 1.5756701453414412, + "learning_rate": 1.876408770059921e-06, + "loss": 0.3216, + "step": 46747 + }, + { + "epoch": 0.8077826928393697, + "grad_norm": 0.9834333472451509, + "learning_rate": 1.8760824189663618e-06, + "loss": 0.4698, + "step": 46748 + }, + { + "epoch": 0.8077999723527786, + "grad_norm": 3.0412330375873258, + "learning_rate": 1.8757560933175268e-06, + "loss": 0.3291, + "step": 46749 + }, + { + "epoch": 0.8078172518661875, + "grad_norm": 1.1951143333515848, + "learning_rate": 1.8754297931144416e-06, + "loss": 0.2726, + "step": 46750 + }, + { + "epoch": 0.8078345313795964, + "grad_norm": 1.8550272108887385, + "learning_rate": 1.87510351835813e-06, + "loss": 0.4713, + "step": 46751 + }, + { + "epoch": 0.8078518108930053, + "grad_norm": 1.276200827630061, + "learning_rate": 1.874777269049609e-06, + "loss": 0.4437, + "step": 46752 + }, + { + "epoch": 0.8078690904064142, + "grad_norm": 0.7212638886354881, + "learning_rate": 1.8744510451899056e-06, + "loss": 0.7306, + "step": 46753 + }, + { + "epoch": 0.8078863699198231, + "grad_norm": 1.0959761671230062, + "learning_rate": 1.8741248467800366e-06, + "loss": 0.2419, + "step": 46754 + }, + { + "epoch": 0.807903649433232, + "grad_norm": 1.0877670092932714, + "learning_rate": 1.8737986738210268e-06, + "loss": 0.3007, + "step": 46755 + }, + { + "epoch": 0.8079209289466409, + "grad_norm": 1.4812675149276482, + "learning_rate": 1.8734725263138964e-06, + "loss": 0.4619, + "step": 46756 + }, + { + "epoch": 0.8079382084600497, + "grad_norm": 1.9417279463054569, + "learning_rate": 1.8731464042596703e-06, + "loss": 0.3319, + "step": 46757 + }, + { + "epoch": 0.8079554879734586, + "grad_norm": 1.3021730164939822, + "learning_rate": 1.8728203076593653e-06, + "loss": 0.444, + "step": 46758 + }, + { + "epoch": 0.8079727674868675, + "grad_norm": 0.9983509962944277, + "learning_rate": 1.8724942365140064e-06, + "loss": 0.5686, + "step": 46759 + }, + { + "epoch": 0.8079900470002764, + "grad_norm": 1.1197725932373108, + "learning_rate": 1.8721681908246114e-06, + "loss": 0.3879, + "step": 46760 + }, + { + "epoch": 0.8080073265136853, + "grad_norm": 2.0495576215604645, + "learning_rate": 1.871842170592203e-06, + "loss": 0.4772, + "step": 46761 + }, + { + "epoch": 0.8080246060270942, + "grad_norm": 1.572299316069603, + "learning_rate": 1.8715161758178047e-06, + "loss": 0.492, + "step": 46762 + }, + { + "epoch": 0.8080418855405032, + "grad_norm": 1.843784892976177, + "learning_rate": 1.8711902065024335e-06, + "loss": 0.322, + "step": 46763 + }, + { + "epoch": 0.8080591650539121, + "grad_norm": 1.0536616377441892, + "learning_rate": 1.870864262647115e-06, + "loss": 0.3923, + "step": 46764 + }, + { + "epoch": 0.808076444567321, + "grad_norm": 1.4103826781341198, + "learning_rate": 1.8705383442528646e-06, + "loss": 0.307, + "step": 46765 + }, + { + "epoch": 0.8080937240807299, + "grad_norm": 1.367918182667643, + "learning_rate": 1.8702124513207064e-06, + "loss": 0.4838, + "step": 46766 + }, + { + "epoch": 0.8081110035941388, + "grad_norm": 1.4248358873069977, + "learning_rate": 1.86988658385166e-06, + "loss": 0.465, + "step": 46767 + }, + { + "epoch": 0.8081282831075477, + "grad_norm": 1.305490611207441, + "learning_rate": 1.8695607418467488e-06, + "loss": 0.339, + "step": 46768 + }, + { + "epoch": 0.8081455626209566, + "grad_norm": 0.9474572042480304, + "learning_rate": 1.8692349253069897e-06, + "loss": 0.244, + "step": 46769 + }, + { + "epoch": 0.8081628421343655, + "grad_norm": 2.316262879905763, + "learning_rate": 1.8689091342334077e-06, + "loss": 0.4824, + "step": 46770 + }, + { + "epoch": 0.8081801216477744, + "grad_norm": 1.7441509680621963, + "learning_rate": 1.8685833686270161e-06, + "loss": 0.3993, + "step": 46771 + }, + { + "epoch": 0.8081974011611833, + "grad_norm": 1.9156717279284887, + "learning_rate": 1.8682576284888399e-06, + "loss": 0.3235, + "step": 46772 + }, + { + "epoch": 0.8082146806745922, + "grad_norm": 1.6491512939739732, + "learning_rate": 1.8679319138199027e-06, + "loss": 0.4211, + "step": 46773 + }, + { + "epoch": 0.8082319601880011, + "grad_norm": 1.0900757235556933, + "learning_rate": 1.8676062246212168e-06, + "loss": 0.4164, + "step": 46774 + }, + { + "epoch": 0.80824923970141, + "grad_norm": 1.1518312000617699, + "learning_rate": 1.8672805608938094e-06, + "loss": 0.3163, + "step": 46775 + }, + { + "epoch": 0.808266519214819, + "grad_norm": 1.3335138309145138, + "learning_rate": 1.866954922638694e-06, + "loss": 0.3693, + "step": 46776 + }, + { + "epoch": 0.8082837987282279, + "grad_norm": 0.9782825014326362, + "learning_rate": 1.8666293098568966e-06, + "loss": 0.3621, + "step": 46777 + }, + { + "epoch": 0.8083010782416367, + "grad_norm": 1.1016007280783942, + "learning_rate": 1.8663037225494296e-06, + "loss": 0.3712, + "step": 46778 + }, + { + "epoch": 0.8083183577550456, + "grad_norm": 1.0952829506432975, + "learning_rate": 1.8659781607173234e-06, + "loss": 0.3471, + "step": 46779 + }, + { + "epoch": 0.8083356372684545, + "grad_norm": 0.8601564043436968, + "learning_rate": 1.865652624361588e-06, + "loss": 0.1516, + "step": 46780 + }, + { + "epoch": 0.8083529167818634, + "grad_norm": 1.3739385182994235, + "learning_rate": 1.8653271134832496e-06, + "loss": 0.3942, + "step": 46781 + }, + { + "epoch": 0.8083701962952723, + "grad_norm": 0.7268830521282593, + "learning_rate": 1.8650016280833249e-06, + "loss": 0.158, + "step": 46782 + }, + { + "epoch": 0.8083874758086812, + "grad_norm": 1.1338352034697237, + "learning_rate": 1.8646761681628311e-06, + "loss": 0.364, + "step": 46783 + }, + { + "epoch": 0.8084047553220901, + "grad_norm": 1.4933572452206167, + "learning_rate": 1.8643507337227895e-06, + "loss": 0.4002, + "step": 46784 + }, + { + "epoch": 0.808422034835499, + "grad_norm": 1.5932958765877567, + "learning_rate": 1.8640253247642192e-06, + "loss": 0.2409, + "step": 46785 + }, + { + "epoch": 0.8084393143489079, + "grad_norm": 1.7540633009409787, + "learning_rate": 1.863699941288143e-06, + "loss": 0.3424, + "step": 46786 + }, + { + "epoch": 0.8084565938623168, + "grad_norm": 0.917084327015104, + "learning_rate": 1.8633745832955752e-06, + "loss": 0.2567, + "step": 46787 + }, + { + "epoch": 0.8084738733757257, + "grad_norm": 1.0669365075405008, + "learning_rate": 1.8630492507875375e-06, + "loss": 0.3528, + "step": 46788 + }, + { + "epoch": 0.8084911528891346, + "grad_norm": 0.9154424086930948, + "learning_rate": 1.8627239437650468e-06, + "loss": 0.389, + "step": 46789 + }, + { + "epoch": 0.8085084324025436, + "grad_norm": 1.054219756921042, + "learning_rate": 1.8623986622291224e-06, + "loss": 0.3464, + "step": 46790 + }, + { + "epoch": 0.8085257119159525, + "grad_norm": 1.6090455678330002, + "learning_rate": 1.8620734061807833e-06, + "loss": 0.5584, + "step": 46791 + }, + { + "epoch": 0.8085429914293614, + "grad_norm": 1.7522438708939536, + "learning_rate": 1.861748175621052e-06, + "loss": 0.3643, + "step": 46792 + }, + { + "epoch": 0.8085602709427703, + "grad_norm": 1.340819774186936, + "learning_rate": 1.8614229705509445e-06, + "loss": 0.3166, + "step": 46793 + }, + { + "epoch": 0.8085775504561792, + "grad_norm": 1.5823322302590705, + "learning_rate": 1.8610977909714755e-06, + "loss": 0.2475, + "step": 46794 + }, + { + "epoch": 0.8085948299695881, + "grad_norm": 1.4906056972532755, + "learning_rate": 1.8607726368836666e-06, + "loss": 0.2828, + "step": 46795 + }, + { + "epoch": 0.808612109482997, + "grad_norm": 2.2863234297961994, + "learning_rate": 1.8604475082885365e-06, + "loss": 0.4716, + "step": 46796 + }, + { + "epoch": 0.8086293889964059, + "grad_norm": 1.2184926747246856, + "learning_rate": 1.8601224051871058e-06, + "loss": 0.4698, + "step": 46797 + }, + { + "epoch": 0.8086466685098148, + "grad_norm": 1.3161961862336198, + "learning_rate": 1.8597973275803881e-06, + "loss": 0.2064, + "step": 46798 + }, + { + "epoch": 0.8086639480232236, + "grad_norm": 1.0575801954656165, + "learning_rate": 1.859472275469406e-06, + "loss": 0.5046, + "step": 46799 + }, + { + "epoch": 0.8086812275366325, + "grad_norm": 1.3400534268176139, + "learning_rate": 1.8591472488551731e-06, + "loss": 0.3813, + "step": 46800 + }, + { + "epoch": 0.8086985070500414, + "grad_norm": 2.218314522751943, + "learning_rate": 1.8588222477387097e-06, + "loss": 0.4597, + "step": 46801 + }, + { + "epoch": 0.8087157865634503, + "grad_norm": 1.1373218490161188, + "learning_rate": 1.8584972721210358e-06, + "loss": 0.5322, + "step": 46802 + }, + { + "epoch": 0.8087330660768592, + "grad_norm": 1.3721372521066313, + "learning_rate": 1.8581723220031656e-06, + "loss": 0.368, + "step": 46803 + }, + { + "epoch": 0.8087503455902681, + "grad_norm": 1.3141647946961208, + "learning_rate": 1.8578473973861199e-06, + "loss": 0.4605, + "step": 46804 + }, + { + "epoch": 0.8087676251036771, + "grad_norm": 1.0792896251503978, + "learning_rate": 1.8575224982709128e-06, + "loss": 0.3836, + "step": 46805 + }, + { + "epoch": 0.808784904617086, + "grad_norm": 1.4801890982136339, + "learning_rate": 1.8571976246585643e-06, + "loss": 0.3685, + "step": 46806 + }, + { + "epoch": 0.8088021841304949, + "grad_norm": 1.9666704267222472, + "learning_rate": 1.8568727765500917e-06, + "loss": 0.3202, + "step": 46807 + }, + { + "epoch": 0.8088194636439038, + "grad_norm": 1.3056200669515883, + "learning_rate": 1.8565479539465148e-06, + "loss": 0.1548, + "step": 46808 + }, + { + "epoch": 0.8088367431573127, + "grad_norm": 1.0193217029766657, + "learning_rate": 1.8562231568488465e-06, + "loss": 0.2113, + "step": 46809 + }, + { + "epoch": 0.8088540226707216, + "grad_norm": 1.9248986113685116, + "learning_rate": 1.8558983852581082e-06, + "loss": 0.4829, + "step": 46810 + }, + { + "epoch": 0.8088713021841305, + "grad_norm": 1.58505803944536, + "learning_rate": 1.8555736391753122e-06, + "loss": 0.4795, + "step": 46811 + }, + { + "epoch": 0.8088885816975394, + "grad_norm": 1.180620800882587, + "learning_rate": 1.8552489186014788e-06, + "loss": 0.2179, + "step": 46812 + }, + { + "epoch": 0.8089058612109483, + "grad_norm": 1.1349488383509552, + "learning_rate": 1.8549242235376274e-06, + "loss": 0.2474, + "step": 46813 + }, + { + "epoch": 0.8089231407243572, + "grad_norm": 1.5299205634183772, + "learning_rate": 1.8545995539847706e-06, + "loss": 0.4302, + "step": 46814 + }, + { + "epoch": 0.8089404202377661, + "grad_norm": 0.8381389448889889, + "learning_rate": 1.8542749099439283e-06, + "loss": 0.4469, + "step": 46815 + }, + { + "epoch": 0.808957699751175, + "grad_norm": 1.6603848453053434, + "learning_rate": 1.8539502914161144e-06, + "loss": 0.2285, + "step": 46816 + }, + { + "epoch": 0.808974979264584, + "grad_norm": 1.0909382327999895, + "learning_rate": 1.8536256984023493e-06, + "loss": 0.2896, + "step": 46817 + }, + { + "epoch": 0.8089922587779929, + "grad_norm": 1.1431237503065506, + "learning_rate": 1.853301130903642e-06, + "loss": 0.585, + "step": 46818 + }, + { + "epoch": 0.8090095382914018, + "grad_norm": 1.7990981679526588, + "learning_rate": 1.8529765889210205e-06, + "loss": 0.3516, + "step": 46819 + }, + { + "epoch": 0.8090268178048107, + "grad_norm": 1.2612077340368621, + "learning_rate": 1.852652072455493e-06, + "loss": 0.4145, + "step": 46820 + }, + { + "epoch": 0.8090440973182195, + "grad_norm": 1.9509638144808743, + "learning_rate": 1.852327581508081e-06, + "loss": 0.7195, + "step": 46821 + }, + { + "epoch": 0.8090613768316284, + "grad_norm": 1.780397572869442, + "learning_rate": 1.8520031160797957e-06, + "loss": 0.2268, + "step": 46822 + }, + { + "epoch": 0.8090786563450373, + "grad_norm": 1.1879905426926514, + "learning_rate": 1.8516786761716577e-06, + "loss": 0.2982, + "step": 46823 + }, + { + "epoch": 0.8090959358584462, + "grad_norm": 2.6333405426329426, + "learning_rate": 1.8513542617846792e-06, + "loss": 0.3345, + "step": 46824 + }, + { + "epoch": 0.8091132153718551, + "grad_norm": 1.7803892655552847, + "learning_rate": 1.8510298729198783e-06, + "loss": 0.3053, + "step": 46825 + }, + { + "epoch": 0.809130494885264, + "grad_norm": 2.800064430996161, + "learning_rate": 1.8507055095782744e-06, + "loss": 0.3179, + "step": 46826 + }, + { + "epoch": 0.8091477743986729, + "grad_norm": 1.0767699196816398, + "learning_rate": 1.8503811717608767e-06, + "loss": 0.3891, + "step": 46827 + }, + { + "epoch": 0.8091650539120818, + "grad_norm": 1.8601435918873448, + "learning_rate": 1.8500568594687073e-06, + "loss": 0.349, + "step": 46828 + }, + { + "epoch": 0.8091823334254907, + "grad_norm": 0.8666187924790784, + "learning_rate": 1.8497325727027771e-06, + "loss": 0.45, + "step": 46829 + }, + { + "epoch": 0.8091996129388996, + "grad_norm": 0.9718988498834299, + "learning_rate": 1.8494083114641038e-06, + "loss": 0.2507, + "step": 46830 + }, + { + "epoch": 0.8092168924523085, + "grad_norm": 1.3178644107874495, + "learning_rate": 1.8490840757537021e-06, + "loss": 0.3397, + "step": 46831 + }, + { + "epoch": 0.8092341719657175, + "grad_norm": 0.8400761269572078, + "learning_rate": 1.8487598655725926e-06, + "loss": 0.2455, + "step": 46832 + }, + { + "epoch": 0.8092514514791264, + "grad_norm": 0.7301733387451159, + "learning_rate": 1.848435680921785e-06, + "loss": 0.2357, + "step": 46833 + }, + { + "epoch": 0.8092687309925353, + "grad_norm": 1.471466980220042, + "learning_rate": 1.848111521802295e-06, + "loss": 0.3347, + "step": 46834 + }, + { + "epoch": 0.8092860105059442, + "grad_norm": 1.4545631624138333, + "learning_rate": 1.8477873882151386e-06, + "loss": 0.3234, + "step": 46835 + }, + { + "epoch": 0.8093032900193531, + "grad_norm": 1.5698536850204263, + "learning_rate": 1.8474632801613323e-06, + "loss": 0.2565, + "step": 46836 + }, + { + "epoch": 0.809320569532762, + "grad_norm": 1.6665903269423419, + "learning_rate": 1.8471391976418929e-06, + "loss": 0.3507, + "step": 46837 + }, + { + "epoch": 0.8093378490461709, + "grad_norm": 1.0308496020558753, + "learning_rate": 1.8468151406578305e-06, + "loss": 0.3694, + "step": 46838 + }, + { + "epoch": 0.8093551285595798, + "grad_norm": 1.7734581998598065, + "learning_rate": 1.8464911092101657e-06, + "loss": 0.1943, + "step": 46839 + }, + { + "epoch": 0.8093724080729887, + "grad_norm": 1.1560360173114808, + "learning_rate": 1.8461671032999073e-06, + "loss": 0.5148, + "step": 46840 + }, + { + "epoch": 0.8093896875863976, + "grad_norm": 0.8417795136146773, + "learning_rate": 1.8458431229280727e-06, + "loss": 0.5033, + "step": 46841 + }, + { + "epoch": 0.8094069670998064, + "grad_norm": 0.6022625684505701, + "learning_rate": 1.8455191680956775e-06, + "loss": 0.9347, + "step": 46842 + }, + { + "epoch": 0.8094242466132153, + "grad_norm": 1.016134270313262, + "learning_rate": 1.8451952388037386e-06, + "loss": 0.2902, + "step": 46843 + }, + { + "epoch": 0.8094415261266242, + "grad_norm": 1.016526178750974, + "learning_rate": 1.8448713350532688e-06, + "loss": 0.294, + "step": 46844 + }, + { + "epoch": 0.8094588056400331, + "grad_norm": 0.7529018953854791, + "learning_rate": 1.8445474568452782e-06, + "loss": 0.5944, + "step": 46845 + }, + { + "epoch": 0.809476085153442, + "grad_norm": 1.7560673027062301, + "learning_rate": 1.844223604180785e-06, + "loss": 0.3406, + "step": 46846 + }, + { + "epoch": 0.809493364666851, + "grad_norm": 1.2665017701695191, + "learning_rate": 1.8438997770608036e-06, + "loss": 0.5061, + "step": 46847 + }, + { + "epoch": 0.8095106441802599, + "grad_norm": 1.3838855691720127, + "learning_rate": 1.8435759754863502e-06, + "loss": 0.4596, + "step": 46848 + }, + { + "epoch": 0.8095279236936688, + "grad_norm": 1.544208719798361, + "learning_rate": 1.843252199458434e-06, + "loss": 0.271, + "step": 46849 + }, + { + "epoch": 0.8095452032070777, + "grad_norm": 1.2042906761380119, + "learning_rate": 1.842928448978074e-06, + "loss": 0.2857, + "step": 46850 + }, + { + "epoch": 0.8095624827204866, + "grad_norm": 1.5144980069328635, + "learning_rate": 1.8426047240462807e-06, + "loss": 0.4041, + "step": 46851 + }, + { + "epoch": 0.8095797622338955, + "grad_norm": 0.9670254188119447, + "learning_rate": 1.8422810246640689e-06, + "loss": 0.4386, + "step": 46852 + }, + { + "epoch": 0.8095970417473044, + "grad_norm": 0.8472152821293802, + "learning_rate": 1.8419573508324517e-06, + "loss": 0.2988, + "step": 46853 + }, + { + "epoch": 0.8096143212607133, + "grad_norm": 1.0985742022185736, + "learning_rate": 1.8416337025524477e-06, + "loss": 0.4362, + "step": 46854 + }, + { + "epoch": 0.8096316007741222, + "grad_norm": 1.677879847332973, + "learning_rate": 1.8413100798250671e-06, + "loss": 0.1467, + "step": 46855 + }, + { + "epoch": 0.8096488802875311, + "grad_norm": 1.744287137766309, + "learning_rate": 1.8409864826513202e-06, + "loss": 0.4701, + "step": 46856 + }, + { + "epoch": 0.80966615980094, + "grad_norm": 0.9851035582426982, + "learning_rate": 1.8406629110322271e-06, + "loss": 0.2261, + "step": 46857 + }, + { + "epoch": 0.8096834393143489, + "grad_norm": 1.4799489308400442, + "learning_rate": 1.8403393649687917e-06, + "loss": 0.3795, + "step": 46858 + }, + { + "epoch": 0.8097007188277578, + "grad_norm": 1.5679908565890417, + "learning_rate": 1.8400158444620398e-06, + "loss": 0.2832, + "step": 46859 + }, + { + "epoch": 0.8097179983411668, + "grad_norm": 1.7233635980490198, + "learning_rate": 1.8396923495129748e-06, + "loss": 0.4572, + "step": 46860 + }, + { + "epoch": 0.8097352778545757, + "grad_norm": 1.5823607474039176, + "learning_rate": 1.8393688801226162e-06, + "loss": 0.352, + "step": 46861 + }, + { + "epoch": 0.8097525573679846, + "grad_norm": 1.5217121177528694, + "learning_rate": 1.8390454362919718e-06, + "loss": 0.4386, + "step": 46862 + }, + { + "epoch": 0.8097698368813934, + "grad_norm": 2.513037048726475, + "learning_rate": 1.838722018022061e-06, + "loss": 0.3369, + "step": 46863 + }, + { + "epoch": 0.8097871163948023, + "grad_norm": 1.140602699639471, + "learning_rate": 1.8383986253138897e-06, + "loss": 0.4133, + "step": 46864 + }, + { + "epoch": 0.8098043959082112, + "grad_norm": 0.8682284224539945, + "learning_rate": 1.8380752581684736e-06, + "loss": 0.3063, + "step": 46865 + }, + { + "epoch": 0.8098216754216201, + "grad_norm": 0.7229138544264343, + "learning_rate": 1.8377519165868296e-06, + "loss": 0.2273, + "step": 46866 + }, + { + "epoch": 0.809838954935029, + "grad_norm": 1.347959585981624, + "learning_rate": 1.8374286005699638e-06, + "loss": 0.4002, + "step": 46867 + }, + { + "epoch": 0.8098562344484379, + "grad_norm": 1.5943378229596916, + "learning_rate": 1.8371053101188952e-06, + "loss": 0.1331, + "step": 46868 + }, + { + "epoch": 0.8098735139618468, + "grad_norm": 1.3182407041494442, + "learning_rate": 1.8367820452346297e-06, + "loss": 0.4045, + "step": 46869 + }, + { + "epoch": 0.8098907934752557, + "grad_norm": 1.2596550554890624, + "learning_rate": 1.8364588059181842e-06, + "loss": 0.4262, + "step": 46870 + }, + { + "epoch": 0.8099080729886646, + "grad_norm": 1.0267226102088556, + "learning_rate": 1.8361355921705692e-06, + "loss": 0.3182, + "step": 46871 + }, + { + "epoch": 0.8099253525020735, + "grad_norm": 1.329997228185747, + "learning_rate": 1.8358124039928004e-06, + "loss": 0.3507, + "step": 46872 + }, + { + "epoch": 0.8099426320154824, + "grad_norm": 0.8657937100369615, + "learning_rate": 1.835489241385886e-06, + "loss": 0.5233, + "step": 46873 + }, + { + "epoch": 0.8099599115288914, + "grad_norm": 0.7378198109484967, + "learning_rate": 1.8351661043508418e-06, + "loss": 0.2553, + "step": 46874 + }, + { + "epoch": 0.8099771910423003, + "grad_norm": 1.0474866800738234, + "learning_rate": 1.8348429928886747e-06, + "loss": 0.2314, + "step": 46875 + }, + { + "epoch": 0.8099944705557092, + "grad_norm": 1.1744992783123258, + "learning_rate": 1.8345199070004005e-06, + "loss": 0.4182, + "step": 46876 + }, + { + "epoch": 0.8100117500691181, + "grad_norm": 2.041868522324502, + "learning_rate": 1.834196846687033e-06, + "loss": 0.2877, + "step": 46877 + }, + { + "epoch": 0.810029029582527, + "grad_norm": 1.2994262356727506, + "learning_rate": 1.833873811949579e-06, + "loss": 0.3669, + "step": 46878 + }, + { + "epoch": 0.8100463090959359, + "grad_norm": 1.1550837270369163, + "learning_rate": 1.8335508027890558e-06, + "loss": 0.168, + "step": 46879 + }, + { + "epoch": 0.8100635886093448, + "grad_norm": 1.552088617507329, + "learning_rate": 1.833227819206469e-06, + "loss": 0.2585, + "step": 46880 + }, + { + "epoch": 0.8100808681227537, + "grad_norm": 1.1212250437095148, + "learning_rate": 1.8329048612028333e-06, + "loss": 0.319, + "step": 46881 + }, + { + "epoch": 0.8100981476361626, + "grad_norm": 0.8177243481747621, + "learning_rate": 1.8325819287791614e-06, + "loss": 0.2664, + "step": 46882 + }, + { + "epoch": 0.8101154271495715, + "grad_norm": 1.9123684518181563, + "learning_rate": 1.8322590219364645e-06, + "loss": 0.4464, + "step": 46883 + }, + { + "epoch": 0.8101327066629803, + "grad_norm": 1.173414305121638, + "learning_rate": 1.831936140675754e-06, + "loss": 0.244, + "step": 46884 + }, + { + "epoch": 0.8101499861763892, + "grad_norm": 1.3339141091784579, + "learning_rate": 1.8316132849980372e-06, + "loss": 0.3397, + "step": 46885 + }, + { + "epoch": 0.8101672656897981, + "grad_norm": 1.4316446958145215, + "learning_rate": 1.8312904549043286e-06, + "loss": 0.4256, + "step": 46886 + }, + { + "epoch": 0.810184545203207, + "grad_norm": 0.9945766081649408, + "learning_rate": 1.8309676503956397e-06, + "loss": 0.3145, + "step": 46887 + }, + { + "epoch": 0.810201824716616, + "grad_norm": 0.9574804392450421, + "learning_rate": 1.830644871472983e-06, + "loss": 0.3916, + "step": 46888 + }, + { + "epoch": 0.8102191042300249, + "grad_norm": 0.8968650154120217, + "learning_rate": 1.8303221181373642e-06, + "loss": 0.291, + "step": 46889 + }, + { + "epoch": 0.8102363837434338, + "grad_norm": 1.8389922075946399, + "learning_rate": 1.8299993903898005e-06, + "loss": 0.342, + "step": 46890 + }, + { + "epoch": 0.8102536632568427, + "grad_norm": 0.8002483619873538, + "learning_rate": 1.8296766882312978e-06, + "loss": 0.4017, + "step": 46891 + }, + { + "epoch": 0.8102709427702516, + "grad_norm": 1.619951474201538, + "learning_rate": 1.8293540116628682e-06, + "loss": 0.3072, + "step": 46892 + }, + { + "epoch": 0.8102882222836605, + "grad_norm": 1.2399009463786714, + "learning_rate": 1.8290313606855226e-06, + "loss": 0.4505, + "step": 46893 + }, + { + "epoch": 0.8103055017970694, + "grad_norm": 3.559355998041269, + "learning_rate": 1.8287087353002742e-06, + "loss": 0.4662, + "step": 46894 + }, + { + "epoch": 0.8103227813104783, + "grad_norm": 1.5772531254570827, + "learning_rate": 1.8283861355081312e-06, + "loss": 0.2345, + "step": 46895 + }, + { + "epoch": 0.8103400608238872, + "grad_norm": 1.5067791890497737, + "learning_rate": 1.8280635613101006e-06, + "loss": 0.4864, + "step": 46896 + }, + { + "epoch": 0.8103573403372961, + "grad_norm": 1.0262260895444804, + "learning_rate": 1.8277410127071992e-06, + "loss": 0.4162, + "step": 46897 + }, + { + "epoch": 0.810374619850705, + "grad_norm": 1.058140802790827, + "learning_rate": 1.8274184897004288e-06, + "loss": 0.4276, + "step": 46898 + }, + { + "epoch": 0.8103918993641139, + "grad_norm": 1.3667754031322012, + "learning_rate": 1.8270959922908094e-06, + "loss": 0.2951, + "step": 46899 + }, + { + "epoch": 0.8104091788775228, + "grad_norm": 2.269809451826586, + "learning_rate": 1.8267735204793447e-06, + "loss": 0.3312, + "step": 46900 + }, + { + "epoch": 0.8104264583909317, + "grad_norm": 1.862108768154009, + "learning_rate": 1.8264510742670483e-06, + "loss": 0.2531, + "step": 46901 + }, + { + "epoch": 0.8104437379043407, + "grad_norm": 1.6382130976119482, + "learning_rate": 1.8261286536549272e-06, + "loss": 0.4286, + "step": 46902 + }, + { + "epoch": 0.8104610174177496, + "grad_norm": 1.0022663771601412, + "learning_rate": 1.8258062586439928e-06, + "loss": 0.4171, + "step": 46903 + }, + { + "epoch": 0.8104782969311585, + "grad_norm": 1.2146574169116975, + "learning_rate": 1.8254838892352512e-06, + "loss": 0.2816, + "step": 46904 + }, + { + "epoch": 0.8104955764445673, + "grad_norm": 2.054380922621093, + "learning_rate": 1.825161545429719e-06, + "loss": 0.2767, + "step": 46905 + }, + { + "epoch": 0.8105128559579762, + "grad_norm": 0.9432235345780325, + "learning_rate": 1.8248392272284033e-06, + "loss": 0.2693, + "step": 46906 + }, + { + "epoch": 0.8105301354713851, + "grad_norm": 1.1845252411796054, + "learning_rate": 1.824516934632309e-06, + "loss": 0.2397, + "step": 46907 + }, + { + "epoch": 0.810547414984794, + "grad_norm": 1.7741025747158103, + "learning_rate": 1.8241946676424515e-06, + "loss": 0.3523, + "step": 46908 + }, + { + "epoch": 0.8105646944982029, + "grad_norm": 2.3085500525548737, + "learning_rate": 1.8238724262598362e-06, + "loss": 0.2977, + "step": 46909 + }, + { + "epoch": 0.8105819740116118, + "grad_norm": 1.1456014425882408, + "learning_rate": 1.823550210485473e-06, + "loss": 0.4196, + "step": 46910 + }, + { + "epoch": 0.8105992535250207, + "grad_norm": 1.142127534459589, + "learning_rate": 1.8232280203203711e-06, + "loss": 0.2656, + "step": 46911 + }, + { + "epoch": 0.8106165330384296, + "grad_norm": 0.842840009796966, + "learning_rate": 1.8229058557655443e-06, + "loss": 0.3711, + "step": 46912 + }, + { + "epoch": 0.8106338125518385, + "grad_norm": 2.1564021667957425, + "learning_rate": 1.822583716821995e-06, + "loss": 0.2938, + "step": 46913 + }, + { + "epoch": 0.8106510920652474, + "grad_norm": 1.1069654526716564, + "learning_rate": 1.822261603490737e-06, + "loss": 0.3504, + "step": 46914 + }, + { + "epoch": 0.8106683715786563, + "grad_norm": 1.7628580219194043, + "learning_rate": 1.8219395157727748e-06, + "loss": 0.2974, + "step": 46915 + }, + { + "epoch": 0.8106856510920653, + "grad_norm": 0.8554281670682153, + "learning_rate": 1.82161745366912e-06, + "loss": 0.1792, + "step": 46916 + }, + { + "epoch": 0.8107029306054742, + "grad_norm": 0.938809856208557, + "learning_rate": 1.821295417180783e-06, + "loss": 0.2611, + "step": 46917 + }, + { + "epoch": 0.8107202101188831, + "grad_norm": 1.3495561738532176, + "learning_rate": 1.820973406308768e-06, + "loss": 0.3133, + "step": 46918 + }, + { + "epoch": 0.810737489632292, + "grad_norm": 1.6086218184694123, + "learning_rate": 1.8206514210540882e-06, + "loss": 0.3152, + "step": 46919 + }, + { + "epoch": 0.8107547691457009, + "grad_norm": 1.5804221077358716, + "learning_rate": 1.820329461417747e-06, + "loss": 0.4652, + "step": 46920 + }, + { + "epoch": 0.8107720486591098, + "grad_norm": 1.8583982000989472, + "learning_rate": 1.8200075274007557e-06, + "loss": 0.3617, + "step": 46921 + }, + { + "epoch": 0.8107893281725187, + "grad_norm": 0.8437957254809503, + "learning_rate": 1.8196856190041222e-06, + "loss": 0.1868, + "step": 46922 + }, + { + "epoch": 0.8108066076859276, + "grad_norm": 1.2232713561673856, + "learning_rate": 1.8193637362288586e-06, + "loss": 0.4236, + "step": 46923 + }, + { + "epoch": 0.8108238871993365, + "grad_norm": 1.3365954141672565, + "learning_rate": 1.8190418790759668e-06, + "loss": 0.2824, + "step": 46924 + }, + { + "epoch": 0.8108411667127454, + "grad_norm": 1.6898811066451895, + "learning_rate": 1.8187200475464595e-06, + "loss": 0.2548, + "step": 46925 + }, + { + "epoch": 0.8108584462261542, + "grad_norm": 0.9460586770668417, + "learning_rate": 1.8183982416413404e-06, + "loss": 0.1365, + "step": 46926 + }, + { + "epoch": 0.8108757257395631, + "grad_norm": 1.2115809916498126, + "learning_rate": 1.8180764613616185e-06, + "loss": 0.3472, + "step": 46927 + }, + { + "epoch": 0.810893005252972, + "grad_norm": 1.0454320482824395, + "learning_rate": 1.8177547067083078e-06, + "loss": 0.3341, + "step": 46928 + }, + { + "epoch": 0.8109102847663809, + "grad_norm": 1.53065338011422, + "learning_rate": 1.8174329776824073e-06, + "loss": 0.2243, + "step": 46929 + }, + { + "epoch": 0.8109275642797898, + "grad_norm": 1.4487591596523803, + "learning_rate": 1.8171112742849317e-06, + "loss": 0.4239, + "step": 46930 + }, + { + "epoch": 0.8109448437931988, + "grad_norm": 0.9815765187195801, + "learning_rate": 1.8167895965168824e-06, + "loss": 0.3091, + "step": 46931 + }, + { + "epoch": 0.8109621233066077, + "grad_norm": 1.046954683238369, + "learning_rate": 1.8164679443792699e-06, + "loss": 0.317, + "step": 46932 + }, + { + "epoch": 0.8109794028200166, + "grad_norm": 1.1536075557241814, + "learning_rate": 1.816146317873102e-06, + "loss": 0.2644, + "step": 46933 + }, + { + "epoch": 0.8109966823334255, + "grad_norm": 1.872001888405178, + "learning_rate": 1.815824716999388e-06, + "loss": 0.4692, + "step": 46934 + }, + { + "epoch": 0.8110139618468344, + "grad_norm": 1.4936844955100388, + "learning_rate": 1.8155031417591307e-06, + "loss": 0.253, + "step": 46935 + }, + { + "epoch": 0.8110312413602433, + "grad_norm": 1.5367243823317938, + "learning_rate": 1.8151815921533421e-06, + "loss": 0.3772, + "step": 46936 + }, + { + "epoch": 0.8110485208736522, + "grad_norm": 0.9987696989923758, + "learning_rate": 1.8148600681830275e-06, + "loss": 0.6903, + "step": 46937 + }, + { + "epoch": 0.8110658003870611, + "grad_norm": 1.3965413818049623, + "learning_rate": 1.814538569849187e-06, + "loss": 0.3575, + "step": 46938 + }, + { + "epoch": 0.81108307990047, + "grad_norm": 0.8949789006390947, + "learning_rate": 1.8142170971528395e-06, + "loss": 0.309, + "step": 46939 + }, + { + "epoch": 0.8111003594138789, + "grad_norm": 2.1206688676458727, + "learning_rate": 1.8138956500949833e-06, + "loss": 0.4524, + "step": 46940 + }, + { + "epoch": 0.8111176389272878, + "grad_norm": 1.5940691434866145, + "learning_rate": 1.81357422867663e-06, + "loss": 0.3644, + "step": 46941 + }, + { + "epoch": 0.8111349184406967, + "grad_norm": 2.413043463821473, + "learning_rate": 1.8132528328987831e-06, + "loss": 0.3068, + "step": 46942 + }, + { + "epoch": 0.8111521979541056, + "grad_norm": 2.2542739609848983, + "learning_rate": 1.8129314627624527e-06, + "loss": 0.2564, + "step": 46943 + }, + { + "epoch": 0.8111694774675146, + "grad_norm": 1.7607516913440187, + "learning_rate": 1.8126101182686372e-06, + "loss": 0.6562, + "step": 46944 + }, + { + "epoch": 0.8111867569809235, + "grad_norm": 2.8678275904864035, + "learning_rate": 1.8122887994183546e-06, + "loss": 0.3728, + "step": 46945 + }, + { + "epoch": 0.8112040364943324, + "grad_norm": 0.9283862938528711, + "learning_rate": 1.8119675062126064e-06, + "loss": 0.3223, + "step": 46946 + }, + { + "epoch": 0.8112213160077412, + "grad_norm": 1.1607425185610445, + "learning_rate": 1.8116462386523947e-06, + "loss": 0.3077, + "step": 46947 + }, + { + "epoch": 0.8112385955211501, + "grad_norm": 1.5015760590463114, + "learning_rate": 1.8113249967387325e-06, + "loss": 0.7079, + "step": 46948 + }, + { + "epoch": 0.811255875034559, + "grad_norm": 1.3549737727029694, + "learning_rate": 1.8110037804726199e-06, + "loss": 0.4171, + "step": 46949 + }, + { + "epoch": 0.8112731545479679, + "grad_norm": 1.4757167440054335, + "learning_rate": 1.8106825898550661e-06, + "loss": 0.3385, + "step": 46950 + }, + { + "epoch": 0.8112904340613768, + "grad_norm": 1.9407745237827605, + "learning_rate": 1.810361424887076e-06, + "loss": 0.3306, + "step": 46951 + }, + { + "epoch": 0.8113077135747857, + "grad_norm": 1.7642470647150246, + "learning_rate": 1.8100402855696597e-06, + "loss": 0.3527, + "step": 46952 + }, + { + "epoch": 0.8113249930881946, + "grad_norm": 1.1189819797252276, + "learning_rate": 1.8097191719038165e-06, + "loss": 0.4438, + "step": 46953 + }, + { + "epoch": 0.8113422726016035, + "grad_norm": 0.511131653128558, + "learning_rate": 1.809398083890559e-06, + "loss": 0.6572, + "step": 46954 + }, + { + "epoch": 0.8113595521150124, + "grad_norm": 1.8062487620949406, + "learning_rate": 1.809077021530885e-06, + "loss": 0.5195, + "step": 46955 + }, + { + "epoch": 0.8113768316284213, + "grad_norm": 1.5035444114863066, + "learning_rate": 1.8087559848258052e-06, + "loss": 0.3505, + "step": 46956 + }, + { + "epoch": 0.8113941111418302, + "grad_norm": 1.2863852818430046, + "learning_rate": 1.8084349737763263e-06, + "loss": 0.3734, + "step": 46957 + }, + { + "epoch": 0.8114113906552392, + "grad_norm": 1.830044289565009, + "learning_rate": 1.8081139883834497e-06, + "loss": 0.3587, + "step": 46958 + }, + { + "epoch": 0.8114286701686481, + "grad_norm": 0.9225784761483751, + "learning_rate": 1.8077930286481848e-06, + "loss": 0.2343, + "step": 46959 + }, + { + "epoch": 0.811445949682057, + "grad_norm": 1.347591866554115, + "learning_rate": 1.807472094571532e-06, + "loss": 0.2835, + "step": 46960 + }, + { + "epoch": 0.8114632291954659, + "grad_norm": 1.31017837317677, + "learning_rate": 1.8071511861544988e-06, + "loss": 0.1377, + "step": 46961 + }, + { + "epoch": 0.8114805087088748, + "grad_norm": 1.2155569745505916, + "learning_rate": 1.8068303033980916e-06, + "loss": 0.4696, + "step": 46962 + }, + { + "epoch": 0.8114977882222837, + "grad_norm": 1.182157344301517, + "learning_rate": 1.8065094463033174e-06, + "loss": 0.1753, + "step": 46963 + }, + { + "epoch": 0.8115150677356926, + "grad_norm": 1.376630215278953, + "learning_rate": 1.8061886148711748e-06, + "loss": 0.4221, + "step": 46964 + }, + { + "epoch": 0.8115323472491015, + "grad_norm": 1.1016434743334471, + "learning_rate": 1.805867809102675e-06, + "loss": 0.4028, + "step": 46965 + }, + { + "epoch": 0.8115496267625104, + "grad_norm": 0.7337377536676136, + "learning_rate": 1.8055470289988175e-06, + "loss": 0.6184, + "step": 46966 + }, + { + "epoch": 0.8115669062759193, + "grad_norm": 1.3962357086778634, + "learning_rate": 1.8052262745606087e-06, + "loss": 0.2292, + "step": 46967 + }, + { + "epoch": 0.8115841857893282, + "grad_norm": 1.406867207273761, + "learning_rate": 1.8049055457890573e-06, + "loss": 0.3687, + "step": 46968 + }, + { + "epoch": 0.811601465302737, + "grad_norm": 1.6201694271549199, + "learning_rate": 1.804584842685162e-06, + "loss": 0.2844, + "step": 46969 + }, + { + "epoch": 0.8116187448161459, + "grad_norm": 1.3919411668953643, + "learning_rate": 1.8042641652499327e-06, + "loss": 0.2007, + "step": 46970 + }, + { + "epoch": 0.8116360243295548, + "grad_norm": 1.6814841502785776, + "learning_rate": 1.8039435134843675e-06, + "loss": 0.3874, + "step": 46971 + }, + { + "epoch": 0.8116533038429637, + "grad_norm": 1.1444361246418253, + "learning_rate": 1.8036228873894745e-06, + "loss": 0.1898, + "step": 46972 + }, + { + "epoch": 0.8116705833563727, + "grad_norm": 1.1200730077155179, + "learning_rate": 1.8033022869662575e-06, + "loss": 0.3451, + "step": 46973 + }, + { + "epoch": 0.8116878628697816, + "grad_norm": 1.1912305425338092, + "learning_rate": 1.8029817122157233e-06, + "loss": 0.3686, + "step": 46974 + }, + { + "epoch": 0.8117051423831905, + "grad_norm": 0.9084253578381384, + "learning_rate": 1.8026611631388712e-06, + "loss": 0.3495, + "step": 46975 + }, + { + "epoch": 0.8117224218965994, + "grad_norm": 0.5316539165614351, + "learning_rate": 1.8023406397367095e-06, + "loss": 0.7451, + "step": 46976 + }, + { + "epoch": 0.8117397014100083, + "grad_norm": 2.016801587967862, + "learning_rate": 1.802020142010239e-06, + "loss": 0.2271, + "step": 46977 + }, + { + "epoch": 0.8117569809234172, + "grad_norm": 1.910957359333121, + "learning_rate": 1.8016996699604606e-06, + "loss": 0.4558, + "step": 46978 + }, + { + "epoch": 0.8117742604368261, + "grad_norm": 0.7866229580475564, + "learning_rate": 1.8013792235883865e-06, + "loss": 0.4273, + "step": 46979 + }, + { + "epoch": 0.811791539950235, + "grad_norm": 1.1278970729056381, + "learning_rate": 1.8010588028950128e-06, + "loss": 0.3983, + "step": 46980 + }, + { + "epoch": 0.8118088194636439, + "grad_norm": 0.7357762158293961, + "learning_rate": 1.8007384078813495e-06, + "loss": 0.1705, + "step": 46981 + }, + { + "epoch": 0.8118260989770528, + "grad_norm": 0.9098105875217325, + "learning_rate": 1.8004180385483928e-06, + "loss": 0.1999, + "step": 46982 + }, + { + "epoch": 0.8118433784904617, + "grad_norm": 1.1953134544520128, + "learning_rate": 1.8000976948971538e-06, + "loss": 0.2776, + "step": 46983 + }, + { + "epoch": 0.8118606580038706, + "grad_norm": 2.2999160097203672, + "learning_rate": 1.7997773769286264e-06, + "loss": 0.2841, + "step": 46984 + }, + { + "epoch": 0.8118779375172795, + "grad_norm": 1.7785982593897878, + "learning_rate": 1.799457084643824e-06, + "loss": 0.1592, + "step": 46985 + }, + { + "epoch": 0.8118952170306885, + "grad_norm": 2.0961353086359527, + "learning_rate": 1.799136818043743e-06, + "loss": 0.6061, + "step": 46986 + }, + { + "epoch": 0.8119124965440974, + "grad_norm": 1.0901829131938692, + "learning_rate": 1.798816577129392e-06, + "loss": 0.3317, + "step": 46987 + }, + { + "epoch": 0.8119297760575063, + "grad_norm": 1.4279109204685048, + "learning_rate": 1.7984963619017704e-06, + "loss": 0.2371, + "step": 46988 + }, + { + "epoch": 0.8119470555709152, + "grad_norm": 1.6469956875524348, + "learning_rate": 1.7981761723618785e-06, + "loss": 0.3463, + "step": 46989 + }, + { + "epoch": 0.811964335084324, + "grad_norm": 1.1773151114744975, + "learning_rate": 1.7978560085107222e-06, + "loss": 0.3759, + "step": 46990 + }, + { + "epoch": 0.8119816145977329, + "grad_norm": 0.7221527290252522, + "learning_rate": 1.797535870349304e-06, + "loss": 0.152, + "step": 46991 + }, + { + "epoch": 0.8119988941111418, + "grad_norm": 1.1166960125463625, + "learning_rate": 1.7972157578786298e-06, + "loss": 0.4956, + "step": 46992 + }, + { + "epoch": 0.8120161736245507, + "grad_norm": 2.430096573675862, + "learning_rate": 1.7968956710996966e-06, + "loss": 0.3739, + "step": 46993 + }, + { + "epoch": 0.8120334531379596, + "grad_norm": 1.5076857251132145, + "learning_rate": 1.7965756100135124e-06, + "loss": 0.3121, + "step": 46994 + }, + { + "epoch": 0.8120507326513685, + "grad_norm": 2.0917640502026353, + "learning_rate": 1.796255574621074e-06, + "loss": 0.5373, + "step": 46995 + }, + { + "epoch": 0.8120680121647774, + "grad_norm": 1.413570159443135, + "learning_rate": 1.7959355649233867e-06, + "loss": 0.6112, + "step": 46996 + }, + { + "epoch": 0.8120852916781863, + "grad_norm": 1.420928957000799, + "learning_rate": 1.7956155809214526e-06, + "loss": 0.4876, + "step": 46997 + }, + { + "epoch": 0.8121025711915952, + "grad_norm": 1.5282500433346058, + "learning_rate": 1.7952956226162766e-06, + "loss": 0.5001, + "step": 46998 + }, + { + "epoch": 0.8121198507050041, + "grad_norm": 1.127209654788148, + "learning_rate": 1.7949756900088588e-06, + "loss": 0.4674, + "step": 46999 + }, + { + "epoch": 0.812137130218413, + "grad_norm": 1.0948720601019206, + "learning_rate": 1.7946557831001976e-06, + "loss": 0.2737, + "step": 47000 + }, + { + "epoch": 0.812154409731822, + "grad_norm": 2.5230000836652406, + "learning_rate": 1.7943359018912976e-06, + "loss": 0.1496, + "step": 47001 + }, + { + "epoch": 0.8121716892452309, + "grad_norm": 1.2899289202348347, + "learning_rate": 1.7940160463831624e-06, + "loss": 0.3874, + "step": 47002 + }, + { + "epoch": 0.8121889687586398, + "grad_norm": 1.4765283242301297, + "learning_rate": 1.7936962165767936e-06, + "loss": 0.4615, + "step": 47003 + }, + { + "epoch": 0.8122062482720487, + "grad_norm": 1.360674074431002, + "learning_rate": 1.7933764124731901e-06, + "loss": 0.221, + "step": 47004 + }, + { + "epoch": 0.8122235277854576, + "grad_norm": 1.8951107953318655, + "learning_rate": 1.793056634073358e-06, + "loss": 0.243, + "step": 47005 + }, + { + "epoch": 0.8122408072988665, + "grad_norm": 1.3846517751397813, + "learning_rate": 1.792736881378294e-06, + "loss": 0.4908, + "step": 47006 + }, + { + "epoch": 0.8122580868122754, + "grad_norm": 0.9201663511786232, + "learning_rate": 1.7924171543890013e-06, + "loss": 0.4353, + "step": 47007 + }, + { + "epoch": 0.8122753663256843, + "grad_norm": 0.9466794228784423, + "learning_rate": 1.7920974531064838e-06, + "loss": 0.4259, + "step": 47008 + }, + { + "epoch": 0.8122926458390932, + "grad_norm": 0.62211029864416, + "learning_rate": 1.791777777531739e-06, + "loss": 0.827, + "step": 47009 + }, + { + "epoch": 0.8123099253525021, + "grad_norm": 1.2490507440088834, + "learning_rate": 1.7914581276657727e-06, + "loss": 0.3304, + "step": 47010 + }, + { + "epoch": 0.8123272048659109, + "grad_norm": 1.491937908478967, + "learning_rate": 1.7911385035095806e-06, + "loss": 0.2482, + "step": 47011 + }, + { + "epoch": 0.8123444843793198, + "grad_norm": 1.0110380317438927, + "learning_rate": 1.7908189050641667e-06, + "loss": 0.2871, + "step": 47012 + }, + { + "epoch": 0.8123617638927287, + "grad_norm": 0.9427539146715204, + "learning_rate": 1.790499332330532e-06, + "loss": 0.2518, + "step": 47013 + }, + { + "epoch": 0.8123790434061376, + "grad_norm": 0.945899517903986, + "learning_rate": 1.7901797853096802e-06, + "loss": 0.3386, + "step": 47014 + }, + { + "epoch": 0.8123963229195466, + "grad_norm": 1.6452385580577518, + "learning_rate": 1.7898602640026053e-06, + "loss": 0.3727, + "step": 47015 + }, + { + "epoch": 0.8124136024329555, + "grad_norm": 1.5948811286737594, + "learning_rate": 1.7895407684103162e-06, + "loss": 0.3777, + "step": 47016 + }, + { + "epoch": 0.8124308819463644, + "grad_norm": 1.1231817161854285, + "learning_rate": 1.7892212985338054e-06, + "loss": 0.6002, + "step": 47017 + }, + { + "epoch": 0.8124481614597733, + "grad_norm": 1.2007382412836354, + "learning_rate": 1.7889018543740789e-06, + "loss": 0.3368, + "step": 47018 + }, + { + "epoch": 0.8124654409731822, + "grad_norm": 1.3455537961760742, + "learning_rate": 1.788582435932137e-06, + "loss": 0.1717, + "step": 47019 + }, + { + "epoch": 0.8124827204865911, + "grad_norm": 1.1130995185455879, + "learning_rate": 1.7882630432089766e-06, + "loss": 0.2588, + "step": 47020 + }, + { + "epoch": 0.8125, + "grad_norm": 0.9479806215689616, + "learning_rate": 1.7879436762056036e-06, + "loss": 0.3458, + "step": 47021 + }, + { + "epoch": 0.8125172795134089, + "grad_norm": 1.1819864439887036, + "learning_rate": 1.7876243349230127e-06, + "loss": 0.2039, + "step": 47022 + }, + { + "epoch": 0.8125345590268178, + "grad_norm": 1.393299130334191, + "learning_rate": 1.7873050193622088e-06, + "loss": 0.3733, + "step": 47023 + }, + { + "epoch": 0.8125518385402267, + "grad_norm": 1.5349100926695605, + "learning_rate": 1.7869857295241844e-06, + "loss": 0.2458, + "step": 47024 + }, + { + "epoch": 0.8125691180536356, + "grad_norm": 1.0800782765037773, + "learning_rate": 1.7866664654099497e-06, + "loss": 0.4268, + "step": 47025 + }, + { + "epoch": 0.8125863975670445, + "grad_norm": 1.7669532785419544, + "learning_rate": 1.7863472270204974e-06, + "loss": 0.2351, + "step": 47026 + }, + { + "epoch": 0.8126036770804534, + "grad_norm": 1.1720515774927376, + "learning_rate": 1.786028014356832e-06, + "loss": 0.3669, + "step": 47027 + }, + { + "epoch": 0.8126209565938624, + "grad_norm": 1.5134762020315478, + "learning_rate": 1.7857088274199497e-06, + "loss": 0.3075, + "step": 47028 + }, + { + "epoch": 0.8126382361072713, + "grad_norm": 1.0441070299809625, + "learning_rate": 1.785389666210854e-06, + "loss": 0.3254, + "step": 47029 + }, + { + "epoch": 0.8126555156206802, + "grad_norm": 1.141358112097919, + "learning_rate": 1.7850705307305383e-06, + "loss": 0.3543, + "step": 47030 + }, + { + "epoch": 0.8126727951340891, + "grad_norm": 1.2780420394794854, + "learning_rate": 1.7847514209800076e-06, + "loss": 0.2294, + "step": 47031 + }, + { + "epoch": 0.8126900746474979, + "grad_norm": 1.8023687383857732, + "learning_rate": 1.7844323369602611e-06, + "loss": 0.3574, + "step": 47032 + }, + { + "epoch": 0.8127073541609068, + "grad_norm": 0.9545874075970902, + "learning_rate": 1.7841132786722948e-06, + "loss": 0.4021, + "step": 47033 + }, + { + "epoch": 0.8127246336743157, + "grad_norm": 1.3018025517861624, + "learning_rate": 1.7837942461171132e-06, + "loss": 0.4035, + "step": 47034 + }, + { + "epoch": 0.8127419131877246, + "grad_norm": 3.252657143917505, + "learning_rate": 1.783475239295709e-06, + "loss": 0.1454, + "step": 47035 + }, + { + "epoch": 0.8127591927011335, + "grad_norm": 1.2746721058226131, + "learning_rate": 1.7831562582090855e-06, + "loss": 0.3577, + "step": 47036 + }, + { + "epoch": 0.8127764722145424, + "grad_norm": 1.4600259218429867, + "learning_rate": 1.7828373028582401e-06, + "loss": 0.3339, + "step": 47037 + }, + { + "epoch": 0.8127937517279513, + "grad_norm": 1.0730872029148442, + "learning_rate": 1.7825183732441764e-06, + "loss": 0.372, + "step": 47038 + }, + { + "epoch": 0.8128110312413602, + "grad_norm": 1.6824096680541092, + "learning_rate": 1.782199469367889e-06, + "loss": 0.3542, + "step": 47039 + }, + { + "epoch": 0.8128283107547691, + "grad_norm": 1.2334618467001206, + "learning_rate": 1.781880591230375e-06, + "loss": 0.3553, + "step": 47040 + }, + { + "epoch": 0.812845590268178, + "grad_norm": 1.6656966169551115, + "learning_rate": 1.7815617388326356e-06, + "loss": 0.1463, + "step": 47041 + }, + { + "epoch": 0.812862869781587, + "grad_norm": 1.2300523471653657, + "learning_rate": 1.7812429121756691e-06, + "loss": 0.3455, + "step": 47042 + }, + { + "epoch": 0.8128801492949959, + "grad_norm": 1.411465363270788, + "learning_rate": 1.780924111260477e-06, + "loss": 0.3079, + "step": 47043 + }, + { + "epoch": 0.8128974288084048, + "grad_norm": 1.4380583282763888, + "learning_rate": 1.7806053360880527e-06, + "loss": 0.3954, + "step": 47044 + }, + { + "epoch": 0.8129147083218137, + "grad_norm": 1.110078825673514, + "learning_rate": 1.7802865866593999e-06, + "loss": 0.3613, + "step": 47045 + }, + { + "epoch": 0.8129319878352226, + "grad_norm": 2.009240610584091, + "learning_rate": 1.7799678629755124e-06, + "loss": 0.3449, + "step": 47046 + }, + { + "epoch": 0.8129492673486315, + "grad_norm": 1.011082186087567, + "learning_rate": 1.7796491650373893e-06, + "loss": 0.3557, + "step": 47047 + }, + { + "epoch": 0.8129665468620404, + "grad_norm": 1.5194606288942407, + "learning_rate": 1.7793304928460298e-06, + "loss": 0.4754, + "step": 47048 + }, + { + "epoch": 0.8129838263754493, + "grad_norm": 1.2202685741018333, + "learning_rate": 1.7790118464024343e-06, + "loss": 0.3284, + "step": 47049 + }, + { + "epoch": 0.8130011058888582, + "grad_norm": 1.5350149831557316, + "learning_rate": 1.7786932257075983e-06, + "loss": 0.4179, + "step": 47050 + }, + { + "epoch": 0.8130183854022671, + "grad_norm": 1.1669520591555043, + "learning_rate": 1.7783746307625182e-06, + "loss": 0.3566, + "step": 47051 + }, + { + "epoch": 0.813035664915676, + "grad_norm": 1.2851343653777307, + "learning_rate": 1.778056061568193e-06, + "loss": 0.4215, + "step": 47052 + }, + { + "epoch": 0.8130529444290848, + "grad_norm": 1.0401146037957216, + "learning_rate": 1.7777375181256218e-06, + "loss": 0.4765, + "step": 47053 + }, + { + "epoch": 0.8130702239424937, + "grad_norm": 1.3974305616047995, + "learning_rate": 1.7774190004358038e-06, + "loss": 0.4254, + "step": 47054 + }, + { + "epoch": 0.8130875034559026, + "grad_norm": 1.6707302016971948, + "learning_rate": 1.7771005084997317e-06, + "loss": 0.338, + "step": 47055 + }, + { + "epoch": 0.8131047829693115, + "grad_norm": 1.5294536203712208, + "learning_rate": 1.7767820423184079e-06, + "loss": 0.3596, + "step": 47056 + }, + { + "epoch": 0.8131220624827205, + "grad_norm": 0.8867304880559587, + "learning_rate": 1.7764636018928249e-06, + "loss": 0.4601, + "step": 47057 + }, + { + "epoch": 0.8131393419961294, + "grad_norm": 1.4906079441100237, + "learning_rate": 1.7761451872239832e-06, + "loss": 0.3688, + "step": 47058 + }, + { + "epoch": 0.8131566215095383, + "grad_norm": 1.3615421305013238, + "learning_rate": 1.775826798312883e-06, + "loss": 0.2993, + "step": 47059 + }, + { + "epoch": 0.8131739010229472, + "grad_norm": 1.4969088669576864, + "learning_rate": 1.7755084351605157e-06, + "loss": 0.3927, + "step": 47060 + }, + { + "epoch": 0.8131911805363561, + "grad_norm": 1.0197930626378668, + "learning_rate": 1.7751900977678827e-06, + "loss": 0.2847, + "step": 47061 + }, + { + "epoch": 0.813208460049765, + "grad_norm": 1.020300858207649, + "learning_rate": 1.7748717861359766e-06, + "loss": 0.4666, + "step": 47062 + }, + { + "epoch": 0.8132257395631739, + "grad_norm": 0.9992037120692673, + "learning_rate": 1.774553500265801e-06, + "loss": 0.5145, + "step": 47063 + }, + { + "epoch": 0.8132430190765828, + "grad_norm": 1.3253143095740223, + "learning_rate": 1.7742352401583429e-06, + "loss": 0.3643, + "step": 47064 + }, + { + "epoch": 0.8132602985899917, + "grad_norm": 1.7295818855944387, + "learning_rate": 1.7739170058146105e-06, + "loss": 0.4406, + "step": 47065 + }, + { + "epoch": 0.8132775781034006, + "grad_norm": 2.131181962464823, + "learning_rate": 1.7735987972355927e-06, + "loss": 0.314, + "step": 47066 + }, + { + "epoch": 0.8132948576168095, + "grad_norm": 1.59367110881659, + "learning_rate": 1.7732806144222915e-06, + "loss": 0.8891, + "step": 47067 + }, + { + "epoch": 0.8133121371302184, + "grad_norm": 1.6767330969148804, + "learning_rate": 1.7729624573756975e-06, + "loss": 0.1645, + "step": 47068 + }, + { + "epoch": 0.8133294166436273, + "grad_norm": 2.5101802921863348, + "learning_rate": 1.772644326096813e-06, + "loss": 0.3179, + "step": 47069 + }, + { + "epoch": 0.8133466961570363, + "grad_norm": 1.1454714939854673, + "learning_rate": 1.7723262205866298e-06, + "loss": 0.4409, + "step": 47070 + }, + { + "epoch": 0.8133639756704452, + "grad_norm": 1.53714440334399, + "learning_rate": 1.7720081408461453e-06, + "loss": 0.4099, + "step": 47071 + }, + { + "epoch": 0.8133812551838541, + "grad_norm": 1.6791496322709456, + "learning_rate": 1.7716900868763597e-06, + "loss": 0.4504, + "step": 47072 + }, + { + "epoch": 0.813398534697263, + "grad_norm": 1.1359957629966826, + "learning_rate": 1.7713720586782635e-06, + "loss": 0.2639, + "step": 47073 + }, + { + "epoch": 0.8134158142106718, + "grad_norm": 0.9317332240101202, + "learning_rate": 1.7710540562528578e-06, + "loss": 0.6696, + "step": 47074 + }, + { + "epoch": 0.8134330937240807, + "grad_norm": 1.7647320707511072, + "learning_rate": 1.770736079601133e-06, + "loss": 0.3953, + "step": 47075 + }, + { + "epoch": 0.8134503732374896, + "grad_norm": 1.5836752434793246, + "learning_rate": 1.7704181287240884e-06, + "loss": 0.3348, + "step": 47076 + }, + { + "epoch": 0.8134676527508985, + "grad_norm": 1.1791852488044667, + "learning_rate": 1.77010020362272e-06, + "loss": 0.8008, + "step": 47077 + }, + { + "epoch": 0.8134849322643074, + "grad_norm": 1.4320701120201453, + "learning_rate": 1.7697823042980266e-06, + "loss": 0.3948, + "step": 47078 + }, + { + "epoch": 0.8135022117777163, + "grad_norm": 1.7342114302710965, + "learning_rate": 1.7694644307509967e-06, + "loss": 0.3755, + "step": 47079 + }, + { + "epoch": 0.8135194912911252, + "grad_norm": 1.9728067292159666, + "learning_rate": 1.7691465829826327e-06, + "loss": 0.4658, + "step": 47080 + }, + { + "epoch": 0.8135367708045341, + "grad_norm": 1.5913783722161554, + "learning_rate": 1.7688287609939248e-06, + "loss": 0.4291, + "step": 47081 + }, + { + "epoch": 0.813554050317943, + "grad_norm": 0.8618990288036689, + "learning_rate": 1.76851096478587e-06, + "loss": 0.7737, + "step": 47082 + }, + { + "epoch": 0.8135713298313519, + "grad_norm": 1.1436096977123855, + "learning_rate": 1.7681931943594676e-06, + "loss": 0.5, + "step": 47083 + }, + { + "epoch": 0.8135886093447608, + "grad_norm": 1.2625557391645328, + "learning_rate": 1.7678754497157069e-06, + "loss": 0.29, + "step": 47084 + }, + { + "epoch": 0.8136058888581698, + "grad_norm": 1.5009591530955544, + "learning_rate": 1.767557730855589e-06, + "loss": 0.5178, + "step": 47085 + }, + { + "epoch": 0.8136231683715787, + "grad_norm": 1.2428229840254648, + "learning_rate": 1.7672400377801036e-06, + "loss": 0.2268, + "step": 47086 + }, + { + "epoch": 0.8136404478849876, + "grad_norm": 0.8794015971198491, + "learning_rate": 1.7669223704902472e-06, + "loss": 0.2238, + "step": 47087 + }, + { + "epoch": 0.8136577273983965, + "grad_norm": 1.5993536544506273, + "learning_rate": 1.7666047289870159e-06, + "loss": 0.3051, + "step": 47088 + }, + { + "epoch": 0.8136750069118054, + "grad_norm": 1.4691555957916338, + "learning_rate": 1.7662871132714066e-06, + "loss": 0.4729, + "step": 47089 + }, + { + "epoch": 0.8136922864252143, + "grad_norm": 1.2447195330227476, + "learning_rate": 1.7659695233444118e-06, + "loss": 0.475, + "step": 47090 + }, + { + "epoch": 0.8137095659386232, + "grad_norm": 1.3109342334230696, + "learning_rate": 1.7656519592070242e-06, + "loss": 0.3389, + "step": 47091 + }, + { + "epoch": 0.8137268454520321, + "grad_norm": 1.5259401766720966, + "learning_rate": 1.7653344208602397e-06, + "loss": 0.3106, + "step": 47092 + }, + { + "epoch": 0.813744124965441, + "grad_norm": 1.172781949668053, + "learning_rate": 1.765016908305055e-06, + "loss": 0.3657, + "step": 47093 + }, + { + "epoch": 0.8137614044788499, + "grad_norm": 1.8441288924512342, + "learning_rate": 1.764699421542464e-06, + "loss": 0.4852, + "step": 47094 + }, + { + "epoch": 0.8137786839922588, + "grad_norm": 1.7395405926665322, + "learning_rate": 1.764381960573459e-06, + "loss": 0.3146, + "step": 47095 + }, + { + "epoch": 0.8137959635056676, + "grad_norm": 1.00413776404071, + "learning_rate": 1.7640645253990374e-06, + "loss": 0.3225, + "step": 47096 + }, + { + "epoch": 0.8138132430190765, + "grad_norm": 0.8466765818318763, + "learning_rate": 1.7637471160201892e-06, + "loss": 0.3357, + "step": 47097 + }, + { + "epoch": 0.8138305225324854, + "grad_norm": 1.129905326230703, + "learning_rate": 1.7634297324379114e-06, + "loss": 0.4077, + "step": 47098 + }, + { + "epoch": 0.8138478020458944, + "grad_norm": 1.3627725728899696, + "learning_rate": 1.7631123746531976e-06, + "loss": 0.4422, + "step": 47099 + }, + { + "epoch": 0.8138650815593033, + "grad_norm": 1.0139763632764704, + "learning_rate": 1.7627950426670436e-06, + "loss": 0.2893, + "step": 47100 + }, + { + "epoch": 0.8138823610727122, + "grad_norm": 1.0851633244021777, + "learning_rate": 1.7624777364804424e-06, + "loss": 0.2908, + "step": 47101 + }, + { + "epoch": 0.8138996405861211, + "grad_norm": 1.1184742022727663, + "learning_rate": 1.762160456094384e-06, + "loss": 0.3058, + "step": 47102 + }, + { + "epoch": 0.81391692009953, + "grad_norm": 1.5357584846356438, + "learning_rate": 1.7618432015098674e-06, + "loss": 0.3487, + "step": 47103 + }, + { + "epoch": 0.8139341996129389, + "grad_norm": 1.5130026372231768, + "learning_rate": 1.7615259727278789e-06, + "loss": 0.3989, + "step": 47104 + }, + { + "epoch": 0.8139514791263478, + "grad_norm": 1.2693726036713275, + "learning_rate": 1.761208769749423e-06, + "loss": 0.4229, + "step": 47105 + }, + { + "epoch": 0.8139687586397567, + "grad_norm": 1.010114807117095, + "learning_rate": 1.7608915925754843e-06, + "loss": 0.3503, + "step": 47106 + }, + { + "epoch": 0.8139860381531656, + "grad_norm": 1.5070301751054882, + "learning_rate": 1.7605744412070613e-06, + "loss": 0.2671, + "step": 47107 + }, + { + "epoch": 0.8140033176665745, + "grad_norm": 1.8076826099076402, + "learning_rate": 1.760257315645144e-06, + "loss": 0.2521, + "step": 47108 + }, + { + "epoch": 0.8140205971799834, + "grad_norm": 1.9092896567860913, + "learning_rate": 1.7599402158907286e-06, + "loss": 0.2117, + "step": 47109 + }, + { + "epoch": 0.8140378766933923, + "grad_norm": 0.9460383308782296, + "learning_rate": 1.7596231419448018e-06, + "loss": 0.2296, + "step": 47110 + }, + { + "epoch": 0.8140551562068012, + "grad_norm": 1.3291715237226311, + "learning_rate": 1.7593060938083672e-06, + "loss": 0.3509, + "step": 47111 + }, + { + "epoch": 0.8140724357202102, + "grad_norm": 1.1730456054793668, + "learning_rate": 1.758989071482411e-06, + "loss": 0.155, + "step": 47112 + }, + { + "epoch": 0.8140897152336191, + "grad_norm": 1.2105114382455278, + "learning_rate": 1.7586720749679254e-06, + "loss": 0.2757, + "step": 47113 + }, + { + "epoch": 0.814106994747028, + "grad_norm": 1.2025905779007837, + "learning_rate": 1.7583551042659075e-06, + "loss": 0.1911, + "step": 47114 + }, + { + "epoch": 0.8141242742604369, + "grad_norm": 0.8597458729237533, + "learning_rate": 1.7580381593773455e-06, + "loss": 0.2433, + "step": 47115 + }, + { + "epoch": 0.8141415537738458, + "grad_norm": 1.375027359844737, + "learning_rate": 1.7577212403032351e-06, + "loss": 0.3899, + "step": 47116 + }, + { + "epoch": 0.8141588332872546, + "grad_norm": 1.303965909514529, + "learning_rate": 1.7574043470445667e-06, + "loss": 0.5298, + "step": 47117 + }, + { + "epoch": 0.8141761128006635, + "grad_norm": 0.5880238326179803, + "learning_rate": 1.7570874796023386e-06, + "loss": 0.6087, + "step": 47118 + }, + { + "epoch": 0.8141933923140724, + "grad_norm": 1.7446357094096556, + "learning_rate": 1.756770637977535e-06, + "loss": 0.2301, + "step": 47119 + }, + { + "epoch": 0.8142106718274813, + "grad_norm": 1.2273093870802145, + "learning_rate": 1.7564538221711547e-06, + "loss": 0.2562, + "step": 47120 + }, + { + "epoch": 0.8142279513408902, + "grad_norm": 1.6485133644553838, + "learning_rate": 1.7561370321841853e-06, + "loss": 0.3923, + "step": 47121 + }, + { + "epoch": 0.8142452308542991, + "grad_norm": 2.1478246771610543, + "learning_rate": 1.7558202680176217e-06, + "loss": 0.2088, + "step": 47122 + }, + { + "epoch": 0.814262510367708, + "grad_norm": 1.359888012622013, + "learning_rate": 1.7555035296724577e-06, + "loss": 0.4702, + "step": 47123 + }, + { + "epoch": 0.8142797898811169, + "grad_norm": 0.7839828787795441, + "learning_rate": 1.7551868171496812e-06, + "loss": 0.2786, + "step": 47124 + }, + { + "epoch": 0.8142970693945258, + "grad_norm": 0.9984655419008238, + "learning_rate": 1.754870130450288e-06, + "loss": 0.34, + "step": 47125 + }, + { + "epoch": 0.8143143489079347, + "grad_norm": 1.431271202907899, + "learning_rate": 1.7545534695752654e-06, + "loss": 0.3705, + "step": 47126 + }, + { + "epoch": 0.8143316284213437, + "grad_norm": 1.9262953802071867, + "learning_rate": 1.7542368345256088e-06, + "loss": 0.3564, + "step": 47127 + }, + { + "epoch": 0.8143489079347526, + "grad_norm": 1.8046170012644984, + "learning_rate": 1.753920225302308e-06, + "loss": 0.3765, + "step": 47128 + }, + { + "epoch": 0.8143661874481615, + "grad_norm": 1.0859633658275325, + "learning_rate": 1.7536036419063596e-06, + "loss": 0.3928, + "step": 47129 + }, + { + "epoch": 0.8143834669615704, + "grad_norm": 1.1305411015947302, + "learning_rate": 1.7532870843387473e-06, + "loss": 0.3646, + "step": 47130 + }, + { + "epoch": 0.8144007464749793, + "grad_norm": 1.0706285790014856, + "learning_rate": 1.7529705526004702e-06, + "loss": 0.2742, + "step": 47131 + }, + { + "epoch": 0.8144180259883882, + "grad_norm": 1.500904908946056, + "learning_rate": 1.7526540466925135e-06, + "loss": 0.4693, + "step": 47132 + }, + { + "epoch": 0.8144353055017971, + "grad_norm": 1.2697279166398467, + "learning_rate": 1.7523375666158705e-06, + "loss": 0.3371, + "step": 47133 + }, + { + "epoch": 0.814452585015206, + "grad_norm": 1.1110927849632855, + "learning_rate": 1.7520211123715358e-06, + "loss": 0.3993, + "step": 47134 + }, + { + "epoch": 0.8144698645286149, + "grad_norm": 0.7764215156557931, + "learning_rate": 1.7517046839604956e-06, + "loss": 0.768, + "step": 47135 + }, + { + "epoch": 0.8144871440420238, + "grad_norm": 1.7451065317866141, + "learning_rate": 1.7513882813837458e-06, + "loss": 0.3399, + "step": 47136 + }, + { + "epoch": 0.8145044235554327, + "grad_norm": 1.0786529578901607, + "learning_rate": 1.751071904642272e-06, + "loss": 0.4141, + "step": 47137 + }, + { + "epoch": 0.8145217030688415, + "grad_norm": 1.2097596657919927, + "learning_rate": 1.750755553737068e-06, + "loss": 0.2605, + "step": 47138 + }, + { + "epoch": 0.8145389825822504, + "grad_norm": 1.3940313001386662, + "learning_rate": 1.750439228669124e-06, + "loss": 0.4301, + "step": 47139 + }, + { + "epoch": 0.8145562620956593, + "grad_norm": 1.7928037071431184, + "learning_rate": 1.750122929439434e-06, + "loss": 0.5337, + "step": 47140 + }, + { + "epoch": 0.8145735416090683, + "grad_norm": 1.233758039375203, + "learning_rate": 1.7498066560489835e-06, + "loss": 0.3332, + "step": 47141 + }, + { + "epoch": 0.8145908211224772, + "grad_norm": 1.046496582519863, + "learning_rate": 1.7494904084987684e-06, + "loss": 0.5757, + "step": 47142 + }, + { + "epoch": 0.8146081006358861, + "grad_norm": 1.467764310565514, + "learning_rate": 1.749174186789776e-06, + "loss": 0.4101, + "step": 47143 + }, + { + "epoch": 0.814625380149295, + "grad_norm": 1.010700432545779, + "learning_rate": 1.748857990922993e-06, + "loss": 0.2621, + "step": 47144 + }, + { + "epoch": 0.8146426596627039, + "grad_norm": 1.375423802245945, + "learning_rate": 1.7485418208994176e-06, + "loss": 0.2223, + "step": 47145 + }, + { + "epoch": 0.8146599391761128, + "grad_norm": 1.1172175708129068, + "learning_rate": 1.7482256767200346e-06, + "loss": 0.431, + "step": 47146 + }, + { + "epoch": 0.8146772186895217, + "grad_norm": 1.2263581456292232, + "learning_rate": 1.7479095583858375e-06, + "loss": 0.347, + "step": 47147 + }, + { + "epoch": 0.8146944982029306, + "grad_norm": 1.5193989932570042, + "learning_rate": 1.7475934658978133e-06, + "loss": 0.2616, + "step": 47148 + }, + { + "epoch": 0.8147117777163395, + "grad_norm": 1.208447933808511, + "learning_rate": 1.7472773992569547e-06, + "loss": 0.3274, + "step": 47149 + }, + { + "epoch": 0.8147290572297484, + "grad_norm": 1.0587938911353345, + "learning_rate": 1.7469613584642498e-06, + "loss": 0.5038, + "step": 47150 + }, + { + "epoch": 0.8147463367431573, + "grad_norm": 1.1252329762423816, + "learning_rate": 1.7466453435206877e-06, + "loss": 0.3825, + "step": 47151 + }, + { + "epoch": 0.8147636162565662, + "grad_norm": 1.1730155500417936, + "learning_rate": 1.746329354427263e-06, + "loss": 0.2722, + "step": 47152 + }, + { + "epoch": 0.8147808957699751, + "grad_norm": 1.7305914791489183, + "learning_rate": 1.7460133911849586e-06, + "loss": 0.5013, + "step": 47153 + }, + { + "epoch": 0.814798175283384, + "grad_norm": 1.2180734852961186, + "learning_rate": 1.7456974537947702e-06, + "loss": 0.2363, + "step": 47154 + }, + { + "epoch": 0.814815454796793, + "grad_norm": 0.8078545133741455, + "learning_rate": 1.7453815422576825e-06, + "loss": 0.4588, + "step": 47155 + }, + { + "epoch": 0.8148327343102019, + "grad_norm": 1.2101701490451051, + "learning_rate": 1.745065656574688e-06, + "loss": 0.3202, + "step": 47156 + }, + { + "epoch": 0.8148500138236108, + "grad_norm": 1.0384068812784248, + "learning_rate": 1.7447497967467741e-06, + "loss": 0.4125, + "step": 47157 + }, + { + "epoch": 0.8148672933370197, + "grad_norm": 1.6099486811425945, + "learning_rate": 1.744433962774934e-06, + "loss": 0.3763, + "step": 47158 + }, + { + "epoch": 0.8148845728504285, + "grad_norm": 1.08597831740674, + "learning_rate": 1.7441181546601516e-06, + "loss": 0.4014, + "step": 47159 + }, + { + "epoch": 0.8149018523638374, + "grad_norm": 1.4173527034114095, + "learning_rate": 1.7438023724034215e-06, + "loss": 0.3343, + "step": 47160 + }, + { + "epoch": 0.8149191318772463, + "grad_norm": 0.6412603741249638, + "learning_rate": 1.7434866160057262e-06, + "loss": 0.6064, + "step": 47161 + }, + { + "epoch": 0.8149364113906552, + "grad_norm": 1.9725643153118355, + "learning_rate": 1.7431708854680597e-06, + "loss": 0.2786, + "step": 47162 + }, + { + "epoch": 0.8149536909040641, + "grad_norm": 1.0180676033535268, + "learning_rate": 1.7428551807914107e-06, + "loss": 0.2599, + "step": 47163 + }, + { + "epoch": 0.814970970417473, + "grad_norm": 1.2504628853923274, + "learning_rate": 1.7425395019767654e-06, + "loss": 0.504, + "step": 47164 + }, + { + "epoch": 0.8149882499308819, + "grad_norm": 1.1850118480901604, + "learning_rate": 1.7422238490251154e-06, + "loss": 0.3278, + "step": 47165 + }, + { + "epoch": 0.8150055294442908, + "grad_norm": 1.8071054790835368, + "learning_rate": 1.7419082219374462e-06, + "loss": 0.346, + "step": 47166 + }, + { + "epoch": 0.8150228089576997, + "grad_norm": 1.1593120085113005, + "learning_rate": 1.741592620714747e-06, + "loss": 0.3014, + "step": 47167 + }, + { + "epoch": 0.8150400884711086, + "grad_norm": 3.0291839380485093, + "learning_rate": 1.7412770453580087e-06, + "loss": 0.3392, + "step": 47168 + }, + { + "epoch": 0.8150573679845176, + "grad_norm": 2.021000457198564, + "learning_rate": 1.7409614958682197e-06, + "loss": 0.5801, + "step": 47169 + }, + { + "epoch": 0.8150746474979265, + "grad_norm": 1.3837638618419517, + "learning_rate": 1.7406459722463653e-06, + "loss": 0.4437, + "step": 47170 + }, + { + "epoch": 0.8150919270113354, + "grad_norm": 1.2439657073601822, + "learning_rate": 1.7403304744934368e-06, + "loss": 0.3668, + "step": 47171 + }, + { + "epoch": 0.8151092065247443, + "grad_norm": 1.8789431265839533, + "learning_rate": 1.74001500261042e-06, + "loss": 0.3085, + "step": 47172 + }, + { + "epoch": 0.8151264860381532, + "grad_norm": 1.1600107410422145, + "learning_rate": 1.7396995565983033e-06, + "loss": 0.8453, + "step": 47173 + }, + { + "epoch": 0.8151437655515621, + "grad_norm": 1.3853861768064561, + "learning_rate": 1.7393841364580776e-06, + "loss": 0.388, + "step": 47174 + }, + { + "epoch": 0.815161045064971, + "grad_norm": 1.534093827872285, + "learning_rate": 1.7390687421907259e-06, + "loss": 0.2734, + "step": 47175 + }, + { + "epoch": 0.8151783245783799, + "grad_norm": 1.0896415693449244, + "learning_rate": 1.7387533737972417e-06, + "loss": 0.3897, + "step": 47176 + }, + { + "epoch": 0.8151956040917888, + "grad_norm": 0.8860734375592295, + "learning_rate": 1.7384380312786064e-06, + "loss": 0.26, + "step": 47177 + }, + { + "epoch": 0.8152128836051977, + "grad_norm": 1.5872967252058994, + "learning_rate": 1.7381227146358115e-06, + "loss": 0.5386, + "step": 47178 + }, + { + "epoch": 0.8152301631186066, + "grad_norm": 0.9959516711076112, + "learning_rate": 1.737807423869845e-06, + "loss": 0.3453, + "step": 47179 + }, + { + "epoch": 0.8152474426320154, + "grad_norm": 0.6451591230402491, + "learning_rate": 1.7374921589816951e-06, + "loss": 0.4373, + "step": 47180 + }, + { + "epoch": 0.8152647221454243, + "grad_norm": 2.3188321654751265, + "learning_rate": 1.7371769199723454e-06, + "loss": 0.2082, + "step": 47181 + }, + { + "epoch": 0.8152820016588332, + "grad_norm": 0.8936848582888524, + "learning_rate": 1.7368617068427885e-06, + "loss": 0.2976, + "step": 47182 + }, + { + "epoch": 0.8152992811722422, + "grad_norm": 1.7606166142588682, + "learning_rate": 1.7365465195940078e-06, + "loss": 0.4053, + "step": 47183 + }, + { + "epoch": 0.8153165606856511, + "grad_norm": 1.18384911576417, + "learning_rate": 1.7362313582269875e-06, + "loss": 0.3299, + "step": 47184 + }, + { + "epoch": 0.81533384019906, + "grad_norm": 0.6449292124577469, + "learning_rate": 1.735916222742723e-06, + "loss": 0.5056, + "step": 47185 + }, + { + "epoch": 0.8153511197124689, + "grad_norm": 1.141325454316162, + "learning_rate": 1.7356011131421947e-06, + "loss": 0.3143, + "step": 47186 + }, + { + "epoch": 0.8153683992258778, + "grad_norm": 0.9468235738902694, + "learning_rate": 1.735286029426393e-06, + "loss": 0.2655, + "step": 47187 + }, + { + "epoch": 0.8153856787392867, + "grad_norm": 1.5511353388447187, + "learning_rate": 1.7349709715963025e-06, + "loss": 0.3685, + "step": 47188 + }, + { + "epoch": 0.8154029582526956, + "grad_norm": 1.3015087356107746, + "learning_rate": 1.7346559396529127e-06, + "loss": 0.3312, + "step": 47189 + }, + { + "epoch": 0.8154202377661045, + "grad_norm": 1.9171226689734029, + "learning_rate": 1.7343409335972071e-06, + "loss": 0.3946, + "step": 47190 + }, + { + "epoch": 0.8154375172795134, + "grad_norm": 1.4902724978406479, + "learning_rate": 1.734025953430173e-06, + "loss": 0.6972, + "step": 47191 + }, + { + "epoch": 0.8154547967929223, + "grad_norm": 1.9525228835886281, + "learning_rate": 1.7337109991527978e-06, + "loss": 0.3425, + "step": 47192 + }, + { + "epoch": 0.8154720763063312, + "grad_norm": 1.5588638288069026, + "learning_rate": 1.7333960707660713e-06, + "loss": 0.5467, + "step": 47193 + }, + { + "epoch": 0.8154893558197401, + "grad_norm": 1.2972362077364301, + "learning_rate": 1.7330811682709759e-06, + "loss": 0.3957, + "step": 47194 + }, + { + "epoch": 0.815506635333149, + "grad_norm": 1.7063732665916733, + "learning_rate": 1.7327662916684962e-06, + "loss": 0.3431, + "step": 47195 + }, + { + "epoch": 0.815523914846558, + "grad_norm": 1.2692886778786558, + "learning_rate": 1.7324514409596205e-06, + "loss": 0.2783, + "step": 47196 + }, + { + "epoch": 0.8155411943599669, + "grad_norm": 1.769364235588888, + "learning_rate": 1.7321366161453358e-06, + "loss": 0.3263, + "step": 47197 + }, + { + "epoch": 0.8155584738733758, + "grad_norm": 1.003308081680486, + "learning_rate": 1.73182181722663e-06, + "loss": 0.3317, + "step": 47198 + }, + { + "epoch": 0.8155757533867847, + "grad_norm": 1.232915076019136, + "learning_rate": 1.7315070442044835e-06, + "loss": 0.3302, + "step": 47199 + }, + { + "epoch": 0.8155930329001936, + "grad_norm": 2.1003759961087907, + "learning_rate": 1.7311922970798879e-06, + "loss": 0.2223, + "step": 47200 + }, + { + "epoch": 0.8156103124136024, + "grad_norm": 0.8013451518535691, + "learning_rate": 1.7308775758538243e-06, + "loss": 0.2959, + "step": 47201 + }, + { + "epoch": 0.8156275919270113, + "grad_norm": 1.0086319629991403, + "learning_rate": 1.7305628805272802e-06, + "loss": 0.3043, + "step": 47202 + }, + { + "epoch": 0.8156448714404202, + "grad_norm": 1.0377073482201578, + "learning_rate": 1.7302482111012419e-06, + "loss": 0.2448, + "step": 47203 + }, + { + "epoch": 0.8156621509538291, + "grad_norm": 1.6406274997491501, + "learning_rate": 1.7299335675766971e-06, + "loss": 0.347, + "step": 47204 + }, + { + "epoch": 0.815679430467238, + "grad_norm": 1.0743606362068467, + "learning_rate": 1.729618949954629e-06, + "loss": 0.3419, + "step": 47205 + }, + { + "epoch": 0.8156967099806469, + "grad_norm": 0.7675414800842817, + "learning_rate": 1.7293043582360203e-06, + "loss": 0.2308, + "step": 47206 + }, + { + "epoch": 0.8157139894940558, + "grad_norm": 1.6421699104660004, + "learning_rate": 1.728989792421859e-06, + "loss": 0.2678, + "step": 47207 + }, + { + "epoch": 0.8157312690074647, + "grad_norm": 1.4597077740015687, + "learning_rate": 1.72867525251313e-06, + "loss": 0.3373, + "step": 47208 + }, + { + "epoch": 0.8157485485208736, + "grad_norm": 1.1394697033929528, + "learning_rate": 1.7283607385108214e-06, + "loss": 0.4701, + "step": 47209 + }, + { + "epoch": 0.8157658280342825, + "grad_norm": 1.009954611321526, + "learning_rate": 1.7280462504159134e-06, + "loss": 0.2414, + "step": 47210 + }, + { + "epoch": 0.8157831075476915, + "grad_norm": 1.4594237265678578, + "learning_rate": 1.7277317882293953e-06, + "loss": 0.4132, + "step": 47211 + }, + { + "epoch": 0.8158003870611004, + "grad_norm": 1.4259467686493674, + "learning_rate": 1.7274173519522475e-06, + "loss": 0.4318, + "step": 47212 + }, + { + "epoch": 0.8158176665745093, + "grad_norm": 1.5858698934559476, + "learning_rate": 1.7271029415854568e-06, + "loss": 0.3979, + "step": 47213 + }, + { + "epoch": 0.8158349460879182, + "grad_norm": 1.837768665206093, + "learning_rate": 1.7267885571300113e-06, + "loss": 0.3865, + "step": 47214 + }, + { + "epoch": 0.8158522256013271, + "grad_norm": 1.3463328532021923, + "learning_rate": 1.7264741985868904e-06, + "loss": 0.2886, + "step": 47215 + }, + { + "epoch": 0.815869505114736, + "grad_norm": 1.1187938632390286, + "learning_rate": 1.7261598659570834e-06, + "loss": 0.3134, + "step": 47216 + }, + { + "epoch": 0.8158867846281449, + "grad_norm": 1.1883751720535294, + "learning_rate": 1.7258455592415702e-06, + "loss": 0.3741, + "step": 47217 + }, + { + "epoch": 0.8159040641415538, + "grad_norm": 1.535487060804668, + "learning_rate": 1.725531278441337e-06, + "loss": 0.3112, + "step": 47218 + }, + { + "epoch": 0.8159213436549627, + "grad_norm": 1.828482710951434, + "learning_rate": 1.7252170235573695e-06, + "loss": 0.4351, + "step": 47219 + }, + { + "epoch": 0.8159386231683716, + "grad_norm": 1.2093172917130457, + "learning_rate": 1.7249027945906528e-06, + "loss": 0.3814, + "step": 47220 + }, + { + "epoch": 0.8159559026817805, + "grad_norm": 0.7695604064974224, + "learning_rate": 1.7245885915421667e-06, + "loss": 0.6016, + "step": 47221 + }, + { + "epoch": 0.8159731821951893, + "grad_norm": 1.9494784069563706, + "learning_rate": 1.724274414412902e-06, + "loss": 0.4985, + "step": 47222 + }, + { + "epoch": 0.8159904617085982, + "grad_norm": 1.5917208734518462, + "learning_rate": 1.7239602632038343e-06, + "loss": 0.4488, + "step": 47223 + }, + { + "epoch": 0.8160077412220071, + "grad_norm": 1.7029206002391974, + "learning_rate": 1.7236461379159542e-06, + "loss": 0.1731, + "step": 47224 + }, + { + "epoch": 0.816025020735416, + "grad_norm": 1.327854026370862, + "learning_rate": 1.7233320385502439e-06, + "loss": 0.5, + "step": 47225 + }, + { + "epoch": 0.816042300248825, + "grad_norm": 0.833413443631955, + "learning_rate": 1.7230179651076851e-06, + "loss": 0.3362, + "step": 47226 + }, + { + "epoch": 0.8160595797622339, + "grad_norm": 1.29727806054149, + "learning_rate": 1.7227039175892657e-06, + "loss": 0.4592, + "step": 47227 + }, + { + "epoch": 0.8160768592756428, + "grad_norm": 1.833579998814593, + "learning_rate": 1.7223898959959639e-06, + "loss": 0.2058, + "step": 47228 + }, + { + "epoch": 0.8160941387890517, + "grad_norm": 1.265107102100408, + "learning_rate": 1.722075900328769e-06, + "loss": 0.5745, + "step": 47229 + }, + { + "epoch": 0.8161114183024606, + "grad_norm": 1.1779427576915813, + "learning_rate": 1.721761930588658e-06, + "loss": 0.3247, + "step": 47230 + }, + { + "epoch": 0.8161286978158695, + "grad_norm": 1.2140781651650825, + "learning_rate": 1.7214479867766188e-06, + "loss": 0.3225, + "step": 47231 + }, + { + "epoch": 0.8161459773292784, + "grad_norm": 1.9396445023735198, + "learning_rate": 1.721134068893633e-06, + "loss": 0.4504, + "step": 47232 + }, + { + "epoch": 0.8161632568426873, + "grad_norm": 1.126725664202784, + "learning_rate": 1.7208201769406873e-06, + "loss": 0.359, + "step": 47233 + }, + { + "epoch": 0.8161805363560962, + "grad_norm": 0.5931221482393759, + "learning_rate": 1.7205063109187592e-06, + "loss": 0.4748, + "step": 47234 + }, + { + "epoch": 0.8161978158695051, + "grad_norm": 0.9926668138377734, + "learning_rate": 1.7201924708288375e-06, + "loss": 0.2423, + "step": 47235 + }, + { + "epoch": 0.816215095382914, + "grad_norm": 1.4706252622915017, + "learning_rate": 1.7198786566718995e-06, + "loss": 0.2987, + "step": 47236 + }, + { + "epoch": 0.8162323748963229, + "grad_norm": 1.324464968638927, + "learning_rate": 1.7195648684489307e-06, + "loss": 0.4822, + "step": 47237 + }, + { + "epoch": 0.8162496544097319, + "grad_norm": 1.5498414480451481, + "learning_rate": 1.7192511061609162e-06, + "loss": 0.3245, + "step": 47238 + }, + { + "epoch": 0.8162669339231408, + "grad_norm": 1.165801480423468, + "learning_rate": 1.718937369808834e-06, + "loss": 0.7496, + "step": 47239 + }, + { + "epoch": 0.8162842134365497, + "grad_norm": 1.3434129058478157, + "learning_rate": 1.7186236593936722e-06, + "loss": 0.2855, + "step": 47240 + }, + { + "epoch": 0.8163014929499586, + "grad_norm": 1.3475707685738063, + "learning_rate": 1.7183099749164089e-06, + "loss": 0.2733, + "step": 47241 + }, + { + "epoch": 0.8163187724633675, + "grad_norm": 1.0226536283727345, + "learning_rate": 1.7179963163780267e-06, + "loss": 0.4527, + "step": 47242 + }, + { + "epoch": 0.8163360519767764, + "grad_norm": 1.4337669381863967, + "learning_rate": 1.7176826837795103e-06, + "loss": 0.3481, + "step": 47243 + }, + { + "epoch": 0.8163533314901852, + "grad_norm": 1.318522473869876, + "learning_rate": 1.7173690771218432e-06, + "loss": 0.3735, + "step": 47244 + }, + { + "epoch": 0.8163706110035941, + "grad_norm": 1.1059513805287047, + "learning_rate": 1.717055496406006e-06, + "loss": 0.3572, + "step": 47245 + }, + { + "epoch": 0.816387890517003, + "grad_norm": 1.3958871435725964, + "learning_rate": 1.7167419416329778e-06, + "loss": 0.4418, + "step": 47246 + }, + { + "epoch": 0.8164051700304119, + "grad_norm": 1.275048314581582, + "learning_rate": 1.7164284128037423e-06, + "loss": 0.4126, + "step": 47247 + }, + { + "epoch": 0.8164224495438208, + "grad_norm": 1.1486832855867353, + "learning_rate": 1.7161149099192842e-06, + "loss": 0.3474, + "step": 47248 + }, + { + "epoch": 0.8164397290572297, + "grad_norm": 1.2732246904113678, + "learning_rate": 1.715801432980585e-06, + "loss": 0.337, + "step": 47249 + }, + { + "epoch": 0.8164570085706386, + "grad_norm": 1.5659036343356714, + "learning_rate": 1.7154879819886239e-06, + "loss": 0.4577, + "step": 47250 + }, + { + "epoch": 0.8164742880840475, + "grad_norm": 1.9119716284698385, + "learning_rate": 1.7151745569443855e-06, + "loss": 0.5102, + "step": 47251 + }, + { + "epoch": 0.8164915675974564, + "grad_norm": 1.158028898260018, + "learning_rate": 1.7148611578488483e-06, + "loss": 0.2916, + "step": 47252 + }, + { + "epoch": 0.8165088471108654, + "grad_norm": 1.1715403748984834, + "learning_rate": 1.7145477847029956e-06, + "loss": 0.3959, + "step": 47253 + }, + { + "epoch": 0.8165261266242743, + "grad_norm": 1.1389589197252268, + "learning_rate": 1.714234437507808e-06, + "loss": 0.3311, + "step": 47254 + }, + { + "epoch": 0.8165434061376832, + "grad_norm": 1.6594797407528854, + "learning_rate": 1.713921116264271e-06, + "loss": 0.3989, + "step": 47255 + }, + { + "epoch": 0.8165606856510921, + "grad_norm": 1.0442895291487548, + "learning_rate": 1.713607820973363e-06, + "loss": 0.382, + "step": 47256 + }, + { + "epoch": 0.816577965164501, + "grad_norm": 1.418356214325066, + "learning_rate": 1.7132945516360622e-06, + "loss": 0.5314, + "step": 47257 + }, + { + "epoch": 0.8165952446779099, + "grad_norm": 1.1452358451901357, + "learning_rate": 1.712981308253353e-06, + "loss": 0.3993, + "step": 47258 + }, + { + "epoch": 0.8166125241913188, + "grad_norm": 1.2602492027816172, + "learning_rate": 1.712668090826215e-06, + "loss": 0.3558, + "step": 47259 + }, + { + "epoch": 0.8166298037047277, + "grad_norm": 1.4533991825378552, + "learning_rate": 1.7123548993556339e-06, + "loss": 0.5622, + "step": 47260 + }, + { + "epoch": 0.8166470832181366, + "grad_norm": 1.4477495584522315, + "learning_rate": 1.7120417338425843e-06, + "loss": 0.2493, + "step": 47261 + }, + { + "epoch": 0.8166643627315455, + "grad_norm": 1.2943627552167778, + "learning_rate": 1.7117285942880524e-06, + "loss": 0.262, + "step": 47262 + }, + { + "epoch": 0.8166816422449544, + "grad_norm": 1.0045401725188774, + "learning_rate": 1.7114154806930138e-06, + "loss": 0.3485, + "step": 47263 + }, + { + "epoch": 0.8166989217583633, + "grad_norm": 1.3636770768860806, + "learning_rate": 1.7111023930584526e-06, + "loss": 0.4744, + "step": 47264 + }, + { + "epoch": 0.8167162012717721, + "grad_norm": 1.636261817219713, + "learning_rate": 1.7107893313853508e-06, + "loss": 0.1726, + "step": 47265 + }, + { + "epoch": 0.816733480785181, + "grad_norm": 1.0242441760626078, + "learning_rate": 1.7104762956746834e-06, + "loss": 0.273, + "step": 47266 + }, + { + "epoch": 0.81675076029859, + "grad_norm": 1.4759483166240006, + "learning_rate": 1.7101632859274376e-06, + "loss": 0.3062, + "step": 47267 + }, + { + "epoch": 0.8167680398119989, + "grad_norm": 1.0857539898922162, + "learning_rate": 1.7098503021445877e-06, + "loss": 0.4507, + "step": 47268 + }, + { + "epoch": 0.8167853193254078, + "grad_norm": 0.9496087941325385, + "learning_rate": 1.709537344327119e-06, + "loss": 0.315, + "step": 47269 + }, + { + "epoch": 0.8168025988388167, + "grad_norm": 1.5087858395186151, + "learning_rate": 1.709224412476007e-06, + "loss": 0.2384, + "step": 47270 + }, + { + "epoch": 0.8168198783522256, + "grad_norm": 1.5667469191436882, + "learning_rate": 1.7089115065922335e-06, + "loss": 0.4677, + "step": 47271 + }, + { + "epoch": 0.8168371578656345, + "grad_norm": 1.7000931296485746, + "learning_rate": 1.7085986266767807e-06, + "loss": 0.2674, + "step": 47272 + }, + { + "epoch": 0.8168544373790434, + "grad_norm": 1.393307696204571, + "learning_rate": 1.708285772730629e-06, + "loss": 0.4259, + "step": 47273 + }, + { + "epoch": 0.8168717168924523, + "grad_norm": 1.1499696566377944, + "learning_rate": 1.7079729447547533e-06, + "loss": 0.3845, + "step": 47274 + }, + { + "epoch": 0.8168889964058612, + "grad_norm": 1.081646357068684, + "learning_rate": 1.7076601427501394e-06, + "loss": 0.5637, + "step": 47275 + }, + { + "epoch": 0.8169062759192701, + "grad_norm": 1.630886398276666, + "learning_rate": 1.707347366717762e-06, + "loss": 0.2854, + "step": 47276 + }, + { + "epoch": 0.816923555432679, + "grad_norm": 1.4426145943756872, + "learning_rate": 1.7070346166586028e-06, + "loss": 0.5198, + "step": 47277 + }, + { + "epoch": 0.8169408349460879, + "grad_norm": 1.6620062642742675, + "learning_rate": 1.7067218925736428e-06, + "loss": 0.5821, + "step": 47278 + }, + { + "epoch": 0.8169581144594968, + "grad_norm": 0.9612605640549247, + "learning_rate": 1.7064091944638594e-06, + "loss": 0.3589, + "step": 47279 + }, + { + "epoch": 0.8169753939729058, + "grad_norm": 0.5684948121130299, + "learning_rate": 1.706096522330234e-06, + "loss": 0.5202, + "step": 47280 + }, + { + "epoch": 0.8169926734863147, + "grad_norm": 1.315665954822278, + "learning_rate": 1.7057838761737421e-06, + "loss": 0.2763, + "step": 47281 + }, + { + "epoch": 0.8170099529997236, + "grad_norm": 1.0323241115906452, + "learning_rate": 1.7054712559953656e-06, + "loss": 0.1769, + "step": 47282 + }, + { + "epoch": 0.8170272325131325, + "grad_norm": 1.0635218994596887, + "learning_rate": 1.7051586617960836e-06, + "loss": 0.2384, + "step": 47283 + }, + { + "epoch": 0.8170445120265414, + "grad_norm": 1.3082297552828461, + "learning_rate": 1.7048460935768785e-06, + "loss": 0.2623, + "step": 47284 + }, + { + "epoch": 0.8170617915399503, + "grad_norm": 1.7032033471299615, + "learning_rate": 1.7045335513387217e-06, + "loss": 0.381, + "step": 47285 + }, + { + "epoch": 0.8170790710533591, + "grad_norm": 1.3724991397478679, + "learning_rate": 1.7042210350825994e-06, + "loss": 0.3821, + "step": 47286 + }, + { + "epoch": 0.817096350566768, + "grad_norm": 0.8436907498473523, + "learning_rate": 1.703908544809485e-06, + "loss": 0.2705, + "step": 47287 + }, + { + "epoch": 0.8171136300801769, + "grad_norm": 1.1011905594744922, + "learning_rate": 1.70359608052036e-06, + "loss": 0.2914, + "step": 47288 + }, + { + "epoch": 0.8171309095935858, + "grad_norm": 0.9311767133670127, + "learning_rate": 1.7032836422162035e-06, + "loss": 0.3225, + "step": 47289 + }, + { + "epoch": 0.8171481891069947, + "grad_norm": 1.0677272068045913, + "learning_rate": 1.7029712298979917e-06, + "loss": 0.2985, + "step": 47290 + }, + { + "epoch": 0.8171654686204036, + "grad_norm": 1.3438055199859034, + "learning_rate": 1.7026588435667069e-06, + "loss": 0.4186, + "step": 47291 + }, + { + "epoch": 0.8171827481338125, + "grad_norm": 2.2091685079593146, + "learning_rate": 1.7023464832233227e-06, + "loss": 0.276, + "step": 47292 + }, + { + "epoch": 0.8172000276472214, + "grad_norm": 1.2534873542769156, + "learning_rate": 1.7020341488688207e-06, + "loss": 0.5825, + "step": 47293 + }, + { + "epoch": 0.8172173071606303, + "grad_norm": 1.0334389667950188, + "learning_rate": 1.7017218405041769e-06, + "loss": 0.4146, + "step": 47294 + }, + { + "epoch": 0.8172345866740393, + "grad_norm": 0.6451284257889702, + "learning_rate": 1.7014095581303748e-06, + "loss": 0.7387, + "step": 47295 + }, + { + "epoch": 0.8172518661874482, + "grad_norm": 2.272790080895738, + "learning_rate": 1.7010973017483879e-06, + "loss": 0.2364, + "step": 47296 + }, + { + "epoch": 0.8172691457008571, + "grad_norm": 1.864728527304189, + "learning_rate": 1.7007850713591922e-06, + "loss": 0.396, + "step": 47297 + }, + { + "epoch": 0.817286425214266, + "grad_norm": 0.8419656846628871, + "learning_rate": 1.7004728669637694e-06, + "loss": 0.2656, + "step": 47298 + }, + { + "epoch": 0.8173037047276749, + "grad_norm": 1.2937284287718096, + "learning_rate": 1.700160688563095e-06, + "loss": 0.3418, + "step": 47299 + }, + { + "epoch": 0.8173209842410838, + "grad_norm": 1.3821331283855418, + "learning_rate": 1.6998485361581517e-06, + "loss": 0.2742, + "step": 47300 + }, + { + "epoch": 0.8173382637544927, + "grad_norm": 1.4333226318462335, + "learning_rate": 1.69953640974991e-06, + "loss": 0.2757, + "step": 47301 + }, + { + "epoch": 0.8173555432679016, + "grad_norm": 1.4037903865392578, + "learning_rate": 1.6992243093393546e-06, + "loss": 0.363, + "step": 47302 + }, + { + "epoch": 0.8173728227813105, + "grad_norm": 1.119154016521404, + "learning_rate": 1.6989122349274567e-06, + "loss": 0.4585, + "step": 47303 + }, + { + "epoch": 0.8173901022947194, + "grad_norm": 1.9087894284097457, + "learning_rate": 1.6986001865151957e-06, + "loss": 0.1852, + "step": 47304 + }, + { + "epoch": 0.8174073818081283, + "grad_norm": 1.2909626898685291, + "learning_rate": 1.698288164103551e-06, + "loss": 0.3604, + "step": 47305 + }, + { + "epoch": 0.8174246613215372, + "grad_norm": 1.3741978413625517, + "learning_rate": 1.6979761676935014e-06, + "loss": 0.2022, + "step": 47306 + }, + { + "epoch": 0.817441940834946, + "grad_norm": 1.2755602399721035, + "learning_rate": 1.6976641972860196e-06, + "loss": 0.3398, + "step": 47307 + }, + { + "epoch": 0.8174592203483549, + "grad_norm": 1.3041394786512863, + "learning_rate": 1.697352252882084e-06, + "loss": 0.2513, + "step": 47308 + }, + { + "epoch": 0.8174764998617639, + "grad_norm": 1.8680013692439301, + "learning_rate": 1.6970403344826737e-06, + "loss": 0.318, + "step": 47309 + }, + { + "epoch": 0.8174937793751728, + "grad_norm": 1.7370360706175847, + "learning_rate": 1.6967284420887608e-06, + "loss": 0.3884, + "step": 47310 + }, + { + "epoch": 0.8175110588885817, + "grad_norm": 1.2304877615662677, + "learning_rate": 1.6964165757013262e-06, + "loss": 0.395, + "step": 47311 + }, + { + "epoch": 0.8175283384019906, + "grad_norm": 0.9086688254823054, + "learning_rate": 1.6961047353213466e-06, + "loss": 0.5189, + "step": 47312 + }, + { + "epoch": 0.8175456179153995, + "grad_norm": 1.5698378739213874, + "learning_rate": 1.6957929209497993e-06, + "loss": 0.502, + "step": 47313 + }, + { + "epoch": 0.8175628974288084, + "grad_norm": 2.163598634368765, + "learning_rate": 1.6954811325876576e-06, + "loss": 0.3482, + "step": 47314 + }, + { + "epoch": 0.8175801769422173, + "grad_norm": 1.6017225215276172, + "learning_rate": 1.6951693702359029e-06, + "loss": 0.2794, + "step": 47315 + }, + { + "epoch": 0.8175974564556262, + "grad_norm": 1.624180510872383, + "learning_rate": 1.694857633895507e-06, + "loss": 0.4343, + "step": 47316 + }, + { + "epoch": 0.8176147359690351, + "grad_norm": 1.2642867844009746, + "learning_rate": 1.6945459235674467e-06, + "loss": 0.2397, + "step": 47317 + }, + { + "epoch": 0.817632015482444, + "grad_norm": 1.7009307477859634, + "learning_rate": 1.6942342392527034e-06, + "loss": 0.3345, + "step": 47318 + }, + { + "epoch": 0.8176492949958529, + "grad_norm": 2.2307554193026493, + "learning_rate": 1.6939225809522465e-06, + "loss": 0.2971, + "step": 47319 + }, + { + "epoch": 0.8176665745092618, + "grad_norm": 1.427897289743766, + "learning_rate": 1.6936109486670582e-06, + "loss": 0.5465, + "step": 47320 + }, + { + "epoch": 0.8176838540226707, + "grad_norm": 1.5855954486545665, + "learning_rate": 1.6932993423981092e-06, + "loss": 0.5161, + "step": 47321 + }, + { + "epoch": 0.8177011335360796, + "grad_norm": 2.0110062089832885, + "learning_rate": 1.6929877621463775e-06, + "loss": 0.4436, + "step": 47322 + }, + { + "epoch": 0.8177184130494886, + "grad_norm": 1.2292952650450866, + "learning_rate": 1.69267620791284e-06, + "loss": 0.524, + "step": 47323 + }, + { + "epoch": 0.8177356925628975, + "grad_norm": 0.6912670220274411, + "learning_rate": 1.692364679698474e-06, + "loss": 0.1093, + "step": 47324 + }, + { + "epoch": 0.8177529720763064, + "grad_norm": 0.5562934262248013, + "learning_rate": 1.6920531775042503e-06, + "loss": 0.7268, + "step": 47325 + }, + { + "epoch": 0.8177702515897153, + "grad_norm": 1.7769424264608502, + "learning_rate": 1.691741701331151e-06, + "loss": 0.3518, + "step": 47326 + }, + { + "epoch": 0.8177875311031242, + "grad_norm": 1.6036231392041995, + "learning_rate": 1.691430251180145e-06, + "loss": 0.3604, + "step": 47327 + }, + { + "epoch": 0.817804810616533, + "grad_norm": 1.4091516612177295, + "learning_rate": 1.6911188270522105e-06, + "loss": 0.3095, + "step": 47328 + }, + { + "epoch": 0.8178220901299419, + "grad_norm": 1.4191060848095802, + "learning_rate": 1.6908074289483267e-06, + "loss": 0.3564, + "step": 47329 + }, + { + "epoch": 0.8178393696433508, + "grad_norm": 0.6710134766776286, + "learning_rate": 1.6904960568694628e-06, + "loss": 0.645, + "step": 47330 + }, + { + "epoch": 0.8178566491567597, + "grad_norm": 0.9264276780741267, + "learning_rate": 1.6901847108165982e-06, + "loss": 0.33, + "step": 47331 + }, + { + "epoch": 0.8178739286701686, + "grad_norm": 1.4774408967863362, + "learning_rate": 1.689873390790705e-06, + "loss": 0.4867, + "step": 47332 + }, + { + "epoch": 0.8178912081835775, + "grad_norm": 1.2253358891117778, + "learning_rate": 1.6895620967927596e-06, + "loss": 0.1598, + "step": 47333 + }, + { + "epoch": 0.8179084876969864, + "grad_norm": 1.6149304560330233, + "learning_rate": 1.6892508288237374e-06, + "loss": 0.2259, + "step": 47334 + }, + { + "epoch": 0.8179257672103953, + "grad_norm": 0.7741540373897852, + "learning_rate": 1.6889395868846159e-06, + "loss": 0.1564, + "step": 47335 + }, + { + "epoch": 0.8179430467238042, + "grad_norm": 0.6107978589333022, + "learning_rate": 1.6886283709763652e-06, + "loss": 0.7011, + "step": 47336 + }, + { + "epoch": 0.8179603262372132, + "grad_norm": 1.3779570917349502, + "learning_rate": 1.6883171810999632e-06, + "loss": 0.3939, + "step": 47337 + }, + { + "epoch": 0.8179776057506221, + "grad_norm": 1.5050639021437615, + "learning_rate": 1.6880060172563828e-06, + "loss": 0.2156, + "step": 47338 + }, + { + "epoch": 0.817994885264031, + "grad_norm": 1.704564014627965, + "learning_rate": 1.6876948794465974e-06, + "loss": 0.3411, + "step": 47339 + }, + { + "epoch": 0.8180121647774399, + "grad_norm": 1.217637006958028, + "learning_rate": 1.6873837676715866e-06, + "loss": 0.3588, + "step": 47340 + }, + { + "epoch": 0.8180294442908488, + "grad_norm": 1.7962533466049773, + "learning_rate": 1.687072681932319e-06, + "loss": 0.4358, + "step": 47341 + }, + { + "epoch": 0.8180467238042577, + "grad_norm": 1.48979292496234, + "learning_rate": 1.6867616222297744e-06, + "loss": 0.2601, + "step": 47342 + }, + { + "epoch": 0.8180640033176666, + "grad_norm": 1.059032289143589, + "learning_rate": 1.6864505885649218e-06, + "loss": 0.6348, + "step": 47343 + }, + { + "epoch": 0.8180812828310755, + "grad_norm": 1.1524575300375421, + "learning_rate": 1.686139580938737e-06, + "loss": 0.4247, + "step": 47344 + }, + { + "epoch": 0.8180985623444844, + "grad_norm": 1.1619056157782641, + "learning_rate": 1.685828599352195e-06, + "loss": 0.2897, + "step": 47345 + }, + { + "epoch": 0.8181158418578933, + "grad_norm": 1.2371960494841672, + "learning_rate": 1.6855176438062726e-06, + "loss": 0.2485, + "step": 47346 + }, + { + "epoch": 0.8181331213713022, + "grad_norm": 0.7717138031587356, + "learning_rate": 1.685206714301938e-06, + "loss": 0.6656, + "step": 47347 + }, + { + "epoch": 0.8181504008847111, + "grad_norm": 0.9383702930464493, + "learning_rate": 1.6848958108401703e-06, + "loss": 0.4045, + "step": 47348 + }, + { + "epoch": 0.8181676803981199, + "grad_norm": 1.2958193421003465, + "learning_rate": 1.684584933421941e-06, + "loss": 0.3083, + "step": 47349 + }, + { + "epoch": 0.8181849599115288, + "grad_norm": 1.2303941925900446, + "learning_rate": 1.6842740820482217e-06, + "loss": 0.3406, + "step": 47350 + }, + { + "epoch": 0.8182022394249377, + "grad_norm": 1.3867615202504184, + "learning_rate": 1.6839632567199871e-06, + "loss": 0.3714, + "step": 47351 + }, + { + "epoch": 0.8182195189383467, + "grad_norm": 0.577081517025947, + "learning_rate": 1.6836524574382117e-06, + "loss": 0.2606, + "step": 47352 + }, + { + "epoch": 0.8182367984517556, + "grad_norm": 1.5161670049463847, + "learning_rate": 1.683341684203872e-06, + "loss": 0.2514, + "step": 47353 + }, + { + "epoch": 0.8182540779651645, + "grad_norm": 0.8027541416843978, + "learning_rate": 1.6830309370179354e-06, + "loss": 0.215, + "step": 47354 + }, + { + "epoch": 0.8182713574785734, + "grad_norm": 1.7119204407215527, + "learning_rate": 1.6827202158813805e-06, + "loss": 0.3119, + "step": 47355 + }, + { + "epoch": 0.8182886369919823, + "grad_norm": 1.4944656756045813, + "learning_rate": 1.6824095207951753e-06, + "loss": 0.3583, + "step": 47356 + }, + { + "epoch": 0.8183059165053912, + "grad_norm": 2.4367002333968513, + "learning_rate": 1.6820988517602954e-06, + "loss": 0.433, + "step": 47357 + }, + { + "epoch": 0.8183231960188001, + "grad_norm": 1.1679613340400257, + "learning_rate": 1.681788208777716e-06, + "loss": 0.2802, + "step": 47358 + }, + { + "epoch": 0.818340475532209, + "grad_norm": 1.80596060936379, + "learning_rate": 1.6814775918484073e-06, + "loss": 0.4786, + "step": 47359 + }, + { + "epoch": 0.8183577550456179, + "grad_norm": 1.1861329881502498, + "learning_rate": 1.6811670009733438e-06, + "loss": 0.3559, + "step": 47360 + }, + { + "epoch": 0.8183750345590268, + "grad_norm": 1.2780833939143064, + "learning_rate": 1.680856436153495e-06, + "loss": 0.3617, + "step": 47361 + }, + { + "epoch": 0.8183923140724357, + "grad_norm": 1.2513317114175102, + "learning_rate": 1.680545897389837e-06, + "loss": 0.3955, + "step": 47362 + }, + { + "epoch": 0.8184095935858446, + "grad_norm": 1.4752973116316261, + "learning_rate": 1.680235384683342e-06, + "loss": 0.3823, + "step": 47363 + }, + { + "epoch": 0.8184268730992535, + "grad_norm": 1.3975339771435191, + "learning_rate": 1.6799248980349836e-06, + "loss": 0.2093, + "step": 47364 + }, + { + "epoch": 0.8184441526126625, + "grad_norm": 2.3453746581383808, + "learning_rate": 1.67961443744573e-06, + "loss": 0.4512, + "step": 47365 + }, + { + "epoch": 0.8184614321260714, + "grad_norm": 1.334591082950425, + "learning_rate": 1.6793040029165596e-06, + "loss": 0.416, + "step": 47366 + }, + { + "epoch": 0.8184787116394803, + "grad_norm": 1.3119312383390507, + "learning_rate": 1.678993594448438e-06, + "loss": 0.4924, + "step": 47367 + }, + { + "epoch": 0.8184959911528892, + "grad_norm": 0.8193115972260043, + "learning_rate": 1.6786832120423413e-06, + "loss": 0.2777, + "step": 47368 + }, + { + "epoch": 0.8185132706662981, + "grad_norm": 1.2960898040854327, + "learning_rate": 1.6783728556992441e-06, + "loss": 0.2887, + "step": 47369 + }, + { + "epoch": 0.818530550179707, + "grad_norm": 2.0426453778911755, + "learning_rate": 1.6780625254201122e-06, + "loss": 0.5021, + "step": 47370 + }, + { + "epoch": 0.8185478296931158, + "grad_norm": 0.9501212995662897, + "learning_rate": 1.677752221205924e-06, + "loss": 0.5291, + "step": 47371 + }, + { + "epoch": 0.8185651092065247, + "grad_norm": 1.6601108025656626, + "learning_rate": 1.6774419430576461e-06, + "loss": 0.3912, + "step": 47372 + }, + { + "epoch": 0.8185823887199336, + "grad_norm": 0.7575826443882101, + "learning_rate": 1.6771316909762514e-06, + "loss": 0.3856, + "step": 47373 + }, + { + "epoch": 0.8185996682333425, + "grad_norm": 0.49233136117896387, + "learning_rate": 1.6768214649627134e-06, + "loss": 0.7732, + "step": 47374 + }, + { + "epoch": 0.8186169477467514, + "grad_norm": 1.2555148944396837, + "learning_rate": 1.676511265018006e-06, + "loss": 0.3042, + "step": 47375 + }, + { + "epoch": 0.8186342272601603, + "grad_norm": 1.231302206891792, + "learning_rate": 1.6762010911430948e-06, + "loss": 0.3689, + "step": 47376 + }, + { + "epoch": 0.8186515067735692, + "grad_norm": 1.293204453635726, + "learning_rate": 1.675890943338957e-06, + "loss": 0.6091, + "step": 47377 + }, + { + "epoch": 0.8186687862869781, + "grad_norm": 0.8212248911657495, + "learning_rate": 1.6755808216065595e-06, + "loss": 0.3154, + "step": 47378 + }, + { + "epoch": 0.818686065800387, + "grad_norm": 1.577376805775466, + "learning_rate": 1.6752707259468747e-06, + "loss": 0.2568, + "step": 47379 + }, + { + "epoch": 0.818703345313796, + "grad_norm": 1.0280663578028717, + "learning_rate": 1.6749606563608778e-06, + "loss": 0.4016, + "step": 47380 + }, + { + "epoch": 0.8187206248272049, + "grad_norm": 1.3623539725446838, + "learning_rate": 1.6746506128495332e-06, + "loss": 0.2002, + "step": 47381 + }, + { + "epoch": 0.8187379043406138, + "grad_norm": 1.6758446437750385, + "learning_rate": 1.6743405954138192e-06, + "loss": 0.2825, + "step": 47382 + }, + { + "epoch": 0.8187551838540227, + "grad_norm": 1.1590780366446094, + "learning_rate": 1.6740306040547006e-06, + "loss": 0.5731, + "step": 47383 + }, + { + "epoch": 0.8187724633674316, + "grad_norm": 1.5300624444621833, + "learning_rate": 1.6737206387731508e-06, + "loss": 0.2084, + "step": 47384 + }, + { + "epoch": 0.8187897428808405, + "grad_norm": 1.5812958821539689, + "learning_rate": 1.6734106995701405e-06, + "loss": 0.3428, + "step": 47385 + }, + { + "epoch": 0.8188070223942494, + "grad_norm": 0.8287736270673562, + "learning_rate": 1.673100786446643e-06, + "loss": 0.2893, + "step": 47386 + }, + { + "epoch": 0.8188243019076583, + "grad_norm": 0.5026433220067495, + "learning_rate": 1.6727908994036246e-06, + "loss": 0.6468, + "step": 47387 + }, + { + "epoch": 0.8188415814210672, + "grad_norm": 1.0653154847785855, + "learning_rate": 1.6724810384420598e-06, + "loss": 0.3855, + "step": 47388 + }, + { + "epoch": 0.8188588609344761, + "grad_norm": 1.0240524038524403, + "learning_rate": 1.6721712035629179e-06, + "loss": 0.1982, + "step": 47389 + }, + { + "epoch": 0.818876140447885, + "grad_norm": 0.8768261463987649, + "learning_rate": 1.6718613947671658e-06, + "loss": 0.5785, + "step": 47390 + }, + { + "epoch": 0.818893419961294, + "grad_norm": 1.3751608495410987, + "learning_rate": 1.6715516120557763e-06, + "loss": 0.3549, + "step": 47391 + }, + { + "epoch": 0.8189106994747027, + "grad_norm": 1.5935455421101399, + "learning_rate": 1.6712418554297217e-06, + "loss": 0.185, + "step": 47392 + }, + { + "epoch": 0.8189279789881116, + "grad_norm": 0.8780751256972287, + "learning_rate": 1.6709321248899712e-06, + "loss": 0.3779, + "step": 47393 + }, + { + "epoch": 0.8189452585015206, + "grad_norm": 2.3892520336450263, + "learning_rate": 1.6706224204374933e-06, + "loss": 0.3455, + "step": 47394 + }, + { + "epoch": 0.8189625380149295, + "grad_norm": 1.6741027544641072, + "learning_rate": 1.67031274207326e-06, + "loss": 0.2243, + "step": 47395 + }, + { + "epoch": 0.8189798175283384, + "grad_norm": 1.31235223277616, + "learning_rate": 1.670003089798239e-06, + "loss": 0.245, + "step": 47396 + }, + { + "epoch": 0.8189970970417473, + "grad_norm": 0.8064570284820769, + "learning_rate": 1.669693463613401e-06, + "loss": 0.669, + "step": 47397 + }, + { + "epoch": 0.8190143765551562, + "grad_norm": 1.3720279963016704, + "learning_rate": 1.6693838635197158e-06, + "loss": 0.3569, + "step": 47398 + }, + { + "epoch": 0.8190316560685651, + "grad_norm": 0.8865891551991972, + "learning_rate": 1.6690742895181566e-06, + "loss": 0.2832, + "step": 47399 + }, + { + "epoch": 0.819048935581974, + "grad_norm": 1.1983042406978321, + "learning_rate": 1.6687647416096885e-06, + "loss": 0.4986, + "step": 47400 + }, + { + "epoch": 0.8190662150953829, + "grad_norm": 1.458269937069757, + "learning_rate": 1.6684552197952818e-06, + "loss": 0.4288, + "step": 47401 + }, + { + "epoch": 0.8190834946087918, + "grad_norm": 2.0040230022171297, + "learning_rate": 1.6681457240759058e-06, + "loss": 0.3066, + "step": 47402 + }, + { + "epoch": 0.8191007741222007, + "grad_norm": 1.499169432768676, + "learning_rate": 1.6678362544525294e-06, + "loss": 0.465, + "step": 47403 + }, + { + "epoch": 0.8191180536356096, + "grad_norm": 0.7110167621038551, + "learning_rate": 1.6675268109261267e-06, + "loss": 0.24, + "step": 47404 + }, + { + "epoch": 0.8191353331490185, + "grad_norm": 0.765153865310694, + "learning_rate": 1.667217393497661e-06, + "loss": 0.2014, + "step": 47405 + }, + { + "epoch": 0.8191526126624274, + "grad_norm": 1.1251134268209968, + "learning_rate": 1.666908002168106e-06, + "loss": 0.2535, + "step": 47406 + }, + { + "epoch": 0.8191698921758364, + "grad_norm": 1.2390280191295948, + "learning_rate": 1.6665986369384257e-06, + "loss": 0.3829, + "step": 47407 + }, + { + "epoch": 0.8191871716892453, + "grad_norm": 1.445457404218387, + "learning_rate": 1.6662892978095912e-06, + "loss": 0.3582, + "step": 47408 + }, + { + "epoch": 0.8192044512026542, + "grad_norm": 2.0269139738330066, + "learning_rate": 1.6659799847825731e-06, + "loss": 0.3085, + "step": 47409 + }, + { + "epoch": 0.8192217307160631, + "grad_norm": 1.3682931550381852, + "learning_rate": 1.6656706978583403e-06, + "loss": 0.2627, + "step": 47410 + }, + { + "epoch": 0.819239010229472, + "grad_norm": 1.507458990409117, + "learning_rate": 1.6653614370378613e-06, + "loss": 0.3063, + "step": 47411 + }, + { + "epoch": 0.8192562897428809, + "grad_norm": 1.0582236513732737, + "learning_rate": 1.6650522023221006e-06, + "loss": 0.2943, + "step": 47412 + }, + { + "epoch": 0.8192735692562897, + "grad_norm": 1.3073326722941403, + "learning_rate": 1.6647429937120308e-06, + "loss": 0.2201, + "step": 47413 + }, + { + "epoch": 0.8192908487696986, + "grad_norm": 1.7256653418915933, + "learning_rate": 1.664433811208619e-06, + "loss": 0.3824, + "step": 47414 + }, + { + "epoch": 0.8193081282831075, + "grad_norm": 1.821760072267039, + "learning_rate": 1.6641246548128354e-06, + "loss": 0.4446, + "step": 47415 + }, + { + "epoch": 0.8193254077965164, + "grad_norm": 1.228751248297671, + "learning_rate": 1.663815524525645e-06, + "loss": 0.5095, + "step": 47416 + }, + { + "epoch": 0.8193426873099253, + "grad_norm": 1.2037852927426806, + "learning_rate": 1.663506420348021e-06, + "loss": 0.2217, + "step": 47417 + }, + { + "epoch": 0.8193599668233342, + "grad_norm": 1.136024742287585, + "learning_rate": 1.6631973422809256e-06, + "loss": 0.3762, + "step": 47418 + }, + { + "epoch": 0.8193772463367431, + "grad_norm": 1.4712991629246603, + "learning_rate": 1.6628882903253296e-06, + "loss": 0.3964, + "step": 47419 + }, + { + "epoch": 0.819394525850152, + "grad_norm": 1.053764644118633, + "learning_rate": 1.6625792644822037e-06, + "loss": 0.3714, + "step": 47420 + }, + { + "epoch": 0.819411805363561, + "grad_norm": 1.2511266175711921, + "learning_rate": 1.6622702647525103e-06, + "loss": 0.5615, + "step": 47421 + }, + { + "epoch": 0.8194290848769699, + "grad_norm": 1.3943170411575825, + "learning_rate": 1.661961291137223e-06, + "loss": 0.2946, + "step": 47422 + }, + { + "epoch": 0.8194463643903788, + "grad_norm": 0.7881615703996935, + "learning_rate": 1.6616523436373033e-06, + "loss": 0.2029, + "step": 47423 + }, + { + "epoch": 0.8194636439037877, + "grad_norm": 1.0489169252211306, + "learning_rate": 1.6613434222537262e-06, + "loss": 0.2029, + "step": 47424 + }, + { + "epoch": 0.8194809234171966, + "grad_norm": 0.8158543300554764, + "learning_rate": 1.6610345269874483e-06, + "loss": 0.3919, + "step": 47425 + }, + { + "epoch": 0.8194982029306055, + "grad_norm": 1.058495574754344, + "learning_rate": 1.6607256578394504e-06, + "loss": 0.2624, + "step": 47426 + }, + { + "epoch": 0.8195154824440144, + "grad_norm": 1.1898333340774494, + "learning_rate": 1.6604168148106902e-06, + "loss": 0.2303, + "step": 47427 + }, + { + "epoch": 0.8195327619574233, + "grad_norm": 0.9551467209818192, + "learning_rate": 1.6601079979021416e-06, + "loss": 0.9149, + "step": 47428 + }, + { + "epoch": 0.8195500414708322, + "grad_norm": 0.8361485335293756, + "learning_rate": 1.6597992071147661e-06, + "loss": 0.272, + "step": 47429 + }, + { + "epoch": 0.8195673209842411, + "grad_norm": 1.4953635695759364, + "learning_rate": 1.659490442449535e-06, + "loss": 0.3974, + "step": 47430 + }, + { + "epoch": 0.81958460049765, + "grad_norm": 1.0608053025585744, + "learning_rate": 1.6591817039074122e-06, + "loss": 0.4541, + "step": 47431 + }, + { + "epoch": 0.8196018800110589, + "grad_norm": 1.1287980356015168, + "learning_rate": 1.6588729914893653e-06, + "loss": 0.4101, + "step": 47432 + }, + { + "epoch": 0.8196191595244678, + "grad_norm": 1.048629940398497, + "learning_rate": 1.6585643051963652e-06, + "loss": 0.491, + "step": 47433 + }, + { + "epoch": 0.8196364390378766, + "grad_norm": 1.0567872295642509, + "learning_rate": 1.658255645029372e-06, + "loss": 0.4255, + "step": 47434 + }, + { + "epoch": 0.8196537185512855, + "grad_norm": 1.3185567767576174, + "learning_rate": 1.6579470109893592e-06, + "loss": 0.3345, + "step": 47435 + }, + { + "epoch": 0.8196709980646945, + "grad_norm": 0.8996469249975336, + "learning_rate": 1.6576384030772885e-06, + "loss": 0.4765, + "step": 47436 + }, + { + "epoch": 0.8196882775781034, + "grad_norm": 1.548752329532486, + "learning_rate": 1.657329821294128e-06, + "loss": 0.3522, + "step": 47437 + }, + { + "epoch": 0.8197055570915123, + "grad_norm": 1.5436105831797489, + "learning_rate": 1.6570212656408435e-06, + "loss": 0.2433, + "step": 47438 + }, + { + "epoch": 0.8197228366049212, + "grad_norm": 1.2727333051249539, + "learning_rate": 1.6567127361184065e-06, + "loss": 0.26, + "step": 47439 + }, + { + "epoch": 0.8197401161183301, + "grad_norm": 1.076302690428499, + "learning_rate": 1.6564042327277762e-06, + "loss": 0.8428, + "step": 47440 + }, + { + "epoch": 0.819757395631739, + "grad_norm": 1.036790091508992, + "learning_rate": 1.6560957554699242e-06, + "loss": 0.4341, + "step": 47441 + }, + { + "epoch": 0.8197746751451479, + "grad_norm": 1.1449201464424865, + "learning_rate": 1.6557873043458118e-06, + "loss": 0.2892, + "step": 47442 + }, + { + "epoch": 0.8197919546585568, + "grad_norm": 1.4805435647048815, + "learning_rate": 1.655478879356408e-06, + "loss": 0.573, + "step": 47443 + }, + { + "epoch": 0.8198092341719657, + "grad_norm": 0.9269303532811871, + "learning_rate": 1.6551704805026803e-06, + "loss": 0.3576, + "step": 47444 + }, + { + "epoch": 0.8198265136853746, + "grad_norm": 0.8089625627900673, + "learning_rate": 1.6548621077855908e-06, + "loss": 0.2669, + "step": 47445 + }, + { + "epoch": 0.8198437931987835, + "grad_norm": 0.8963980755623591, + "learning_rate": 1.6545537612061103e-06, + "loss": 0.5708, + "step": 47446 + }, + { + "epoch": 0.8198610727121924, + "grad_norm": 1.0128555514203819, + "learning_rate": 1.6542454407651986e-06, + "loss": 0.2375, + "step": 47447 + }, + { + "epoch": 0.8198783522256013, + "grad_norm": 1.3720862772616818, + "learning_rate": 1.6539371464638253e-06, + "loss": 0.3466, + "step": 47448 + }, + { + "epoch": 0.8198956317390103, + "grad_norm": 0.9014549854242819, + "learning_rate": 1.6536288783029541e-06, + "loss": 0.6376, + "step": 47449 + }, + { + "epoch": 0.8199129112524192, + "grad_norm": 1.1481514588390607, + "learning_rate": 1.6533206362835553e-06, + "loss": 0.1794, + "step": 47450 + }, + { + "epoch": 0.8199301907658281, + "grad_norm": 1.3609818305261923, + "learning_rate": 1.653012420406589e-06, + "loss": 0.5292, + "step": 47451 + }, + { + "epoch": 0.819947470279237, + "grad_norm": 1.4162294790078802, + "learning_rate": 1.6527042306730202e-06, + "loss": 0.4663, + "step": 47452 + }, + { + "epoch": 0.8199647497926459, + "grad_norm": 2.098045910485854, + "learning_rate": 1.6523960670838167e-06, + "loss": 0.4057, + "step": 47453 + }, + { + "epoch": 0.8199820293060548, + "grad_norm": 1.679836979636003, + "learning_rate": 1.6520879296399429e-06, + "loss": 0.4541, + "step": 47454 + }, + { + "epoch": 0.8199993088194636, + "grad_norm": 1.4051243945191616, + "learning_rate": 1.6517798183423661e-06, + "loss": 0.3151, + "step": 47455 + }, + { + "epoch": 0.8200165883328725, + "grad_norm": 1.1699326119766227, + "learning_rate": 1.6514717331920472e-06, + "loss": 0.5197, + "step": 47456 + }, + { + "epoch": 0.8200338678462814, + "grad_norm": 0.728531705041856, + "learning_rate": 1.6511636741899561e-06, + "loss": 0.8651, + "step": 47457 + }, + { + "epoch": 0.8200511473596903, + "grad_norm": 1.9494103572270196, + "learning_rate": 1.6508556413370525e-06, + "loss": 0.3916, + "step": 47458 + }, + { + "epoch": 0.8200684268730992, + "grad_norm": 1.5702687426052622, + "learning_rate": 1.650547634634303e-06, + "loss": 0.6156, + "step": 47459 + }, + { + "epoch": 0.8200857063865081, + "grad_norm": 1.3311849872444872, + "learning_rate": 1.6502396540826727e-06, + "loss": 0.2959, + "step": 47460 + }, + { + "epoch": 0.820102985899917, + "grad_norm": 1.5054335801340597, + "learning_rate": 1.6499316996831293e-06, + "loss": 0.3464, + "step": 47461 + }, + { + "epoch": 0.8201202654133259, + "grad_norm": 0.7418527269563886, + "learning_rate": 1.6496237714366336e-06, + "loss": 0.3021, + "step": 47462 + }, + { + "epoch": 0.8201375449267349, + "grad_norm": 1.5263405023960748, + "learning_rate": 1.6493158693441492e-06, + "loss": 0.443, + "step": 47463 + }, + { + "epoch": 0.8201548244401438, + "grad_norm": 2.215423207777792, + "learning_rate": 1.649007993406645e-06, + "loss": 0.3272, + "step": 47464 + }, + { + "epoch": 0.8201721039535527, + "grad_norm": 1.7840767798295132, + "learning_rate": 1.648700143625077e-06, + "loss": 0.5002, + "step": 47465 + }, + { + "epoch": 0.8201893834669616, + "grad_norm": 1.5789989879162925, + "learning_rate": 1.6483923200004193e-06, + "loss": 0.4258, + "step": 47466 + }, + { + "epoch": 0.8202066629803705, + "grad_norm": 1.1752296498330754, + "learning_rate": 1.6480845225336294e-06, + "loss": 0.2677, + "step": 47467 + }, + { + "epoch": 0.8202239424937794, + "grad_norm": 0.8563381085080242, + "learning_rate": 1.647776751225676e-06, + "loss": 0.4658, + "step": 47468 + }, + { + "epoch": 0.8202412220071883, + "grad_norm": 0.8228389611864524, + "learning_rate": 1.6474690060775178e-06, + "loss": 0.3226, + "step": 47469 + }, + { + "epoch": 0.8202585015205972, + "grad_norm": 1.3832703811037483, + "learning_rate": 1.6471612870901242e-06, + "loss": 0.4858, + "step": 47470 + }, + { + "epoch": 0.8202757810340061, + "grad_norm": 1.12311591015333, + "learning_rate": 1.6468535942644537e-06, + "loss": 0.3516, + "step": 47471 + }, + { + "epoch": 0.820293060547415, + "grad_norm": 1.3802778837884078, + "learning_rate": 1.6465459276014729e-06, + "loss": 0.4135, + "step": 47472 + }, + { + "epoch": 0.8203103400608239, + "grad_norm": 1.497629610757855, + "learning_rate": 1.6462382871021476e-06, + "loss": 0.2104, + "step": 47473 + }, + { + "epoch": 0.8203276195742328, + "grad_norm": 1.0133721495301278, + "learning_rate": 1.6459306727674363e-06, + "loss": 0.4246, + "step": 47474 + }, + { + "epoch": 0.8203448990876417, + "grad_norm": 2.06932767337039, + "learning_rate": 1.6456230845983069e-06, + "loss": 0.3536, + "step": 47475 + }, + { + "epoch": 0.8203621786010505, + "grad_norm": 1.1958350273011153, + "learning_rate": 1.6453155225957184e-06, + "loss": 0.2187, + "step": 47476 + }, + { + "epoch": 0.8203794581144594, + "grad_norm": 0.8555980069239207, + "learning_rate": 1.6450079867606372e-06, + "loss": 0.5695, + "step": 47477 + }, + { + "epoch": 0.8203967376278684, + "grad_norm": 1.348724970184944, + "learning_rate": 1.6447004770940256e-06, + "loss": 0.2563, + "step": 47478 + }, + { + "epoch": 0.8204140171412773, + "grad_norm": 1.7048354400041763, + "learning_rate": 1.6443929935968505e-06, + "loss": 0.3527, + "step": 47479 + }, + { + "epoch": 0.8204312966546862, + "grad_norm": 1.1513494841753313, + "learning_rate": 1.644085536270068e-06, + "loss": 0.8196, + "step": 47480 + }, + { + "epoch": 0.8204485761680951, + "grad_norm": 1.0025352925838407, + "learning_rate": 1.6437781051146473e-06, + "loss": 0.534, + "step": 47481 + }, + { + "epoch": 0.820465855681504, + "grad_norm": 0.9898424661892278, + "learning_rate": 1.6434707001315464e-06, + "loss": 0.2139, + "step": 47482 + }, + { + "epoch": 0.8204831351949129, + "grad_norm": 1.3112837750081683, + "learning_rate": 1.6431633213217303e-06, + "loss": 0.2223, + "step": 47483 + }, + { + "epoch": 0.8205004147083218, + "grad_norm": 1.1216043001600575, + "learning_rate": 1.6428559686861633e-06, + "loss": 0.4549, + "step": 47484 + }, + { + "epoch": 0.8205176942217307, + "grad_norm": 1.528190681958117, + "learning_rate": 1.6425486422258052e-06, + "loss": 0.4208, + "step": 47485 + }, + { + "epoch": 0.8205349737351396, + "grad_norm": 1.2489673446046705, + "learning_rate": 1.6422413419416216e-06, + "loss": 0.2994, + "step": 47486 + }, + { + "epoch": 0.8205522532485485, + "grad_norm": 1.2635224472549735, + "learning_rate": 1.641934067834572e-06, + "loss": 0.5585, + "step": 47487 + }, + { + "epoch": 0.8205695327619574, + "grad_norm": 1.1593611850461019, + "learning_rate": 1.6416268199056195e-06, + "loss": 0.1865, + "step": 47488 + }, + { + "epoch": 0.8205868122753663, + "grad_norm": 0.8418452237883635, + "learning_rate": 1.6413195981557262e-06, + "loss": 0.2247, + "step": 47489 + }, + { + "epoch": 0.8206040917887752, + "grad_norm": 1.5330029402576155, + "learning_rate": 1.6410124025858587e-06, + "loss": 0.2813, + "step": 47490 + }, + { + "epoch": 0.8206213713021842, + "grad_norm": 1.031398187276071, + "learning_rate": 1.6407052331969731e-06, + "loss": 0.4645, + "step": 47491 + }, + { + "epoch": 0.8206386508155931, + "grad_norm": 1.2861164938887086, + "learning_rate": 1.6403980899900351e-06, + "loss": 0.3443, + "step": 47492 + }, + { + "epoch": 0.820655930329002, + "grad_norm": 1.311736899143876, + "learning_rate": 1.640090972966004e-06, + "loss": 0.1943, + "step": 47493 + }, + { + "epoch": 0.8206732098424109, + "grad_norm": 1.5101052167400497, + "learning_rate": 1.6397838821258438e-06, + "loss": 0.3662, + "step": 47494 + }, + { + "epoch": 0.8206904893558198, + "grad_norm": 0.9472732965323698, + "learning_rate": 1.6394768174705177e-06, + "loss": 0.4183, + "step": 47495 + }, + { + "epoch": 0.8207077688692287, + "grad_norm": 1.0641600226905832, + "learning_rate": 1.6391697790009841e-06, + "loss": 0.3371, + "step": 47496 + }, + { + "epoch": 0.8207250483826375, + "grad_norm": 1.5737844924650355, + "learning_rate": 1.6388627667182078e-06, + "loss": 0.1951, + "step": 47497 + }, + { + "epoch": 0.8207423278960464, + "grad_norm": 1.0296227120434733, + "learning_rate": 1.6385557806231466e-06, + "loss": 0.4532, + "step": 47498 + }, + { + "epoch": 0.8207596074094553, + "grad_norm": 1.2856870081271758, + "learning_rate": 1.6382488207167634e-06, + "loss": 0.3002, + "step": 47499 + }, + { + "epoch": 0.8207768869228642, + "grad_norm": 1.056818032681869, + "learning_rate": 1.6379418870000208e-06, + "loss": 0.5192, + "step": 47500 + }, + { + "epoch": 0.8207941664362731, + "grad_norm": 1.0298318733301934, + "learning_rate": 1.6376349794738823e-06, + "loss": 0.3419, + "step": 47501 + }, + { + "epoch": 0.820811445949682, + "grad_norm": 1.3790561964344896, + "learning_rate": 1.637328098139307e-06, + "loss": 0.477, + "step": 47502 + }, + { + "epoch": 0.8208287254630909, + "grad_norm": 2.2623474309877163, + "learning_rate": 1.637021242997252e-06, + "loss": 0.2982, + "step": 47503 + }, + { + "epoch": 0.8208460049764998, + "grad_norm": 1.8692739419403266, + "learning_rate": 1.6367144140486857e-06, + "loss": 0.2424, + "step": 47504 + }, + { + "epoch": 0.8208632844899088, + "grad_norm": 1.456065103160046, + "learning_rate": 1.6364076112945592e-06, + "loss": 0.5048, + "step": 47505 + }, + { + "epoch": 0.8208805640033177, + "grad_norm": 1.0815574725772903, + "learning_rate": 1.6361008347358454e-06, + "loss": 0.3287, + "step": 47506 + }, + { + "epoch": 0.8208978435167266, + "grad_norm": 0.6309549294519475, + "learning_rate": 1.6357940843734977e-06, + "loss": 0.6936, + "step": 47507 + }, + { + "epoch": 0.8209151230301355, + "grad_norm": 1.5687255889583853, + "learning_rate": 1.6354873602084797e-06, + "loss": 0.4529, + "step": 47508 + }, + { + "epoch": 0.8209324025435444, + "grad_norm": 0.6749140940877297, + "learning_rate": 1.6351806622417487e-06, + "loss": 0.6468, + "step": 47509 + }, + { + "epoch": 0.8209496820569533, + "grad_norm": 1.7857942781455525, + "learning_rate": 1.6348739904742706e-06, + "loss": 0.4196, + "step": 47510 + }, + { + "epoch": 0.8209669615703622, + "grad_norm": 0.9201292986543056, + "learning_rate": 1.6345673449069977e-06, + "loss": 0.3586, + "step": 47511 + }, + { + "epoch": 0.8209842410837711, + "grad_norm": 3.0433358816575504, + "learning_rate": 1.6342607255409005e-06, + "loss": 0.3217, + "step": 47512 + }, + { + "epoch": 0.82100152059718, + "grad_norm": 1.6213112474171651, + "learning_rate": 1.6339541323769347e-06, + "loss": 0.4738, + "step": 47513 + }, + { + "epoch": 0.8210188001105889, + "grad_norm": 1.0483077775000704, + "learning_rate": 1.6336475654160577e-06, + "loss": 0.3878, + "step": 47514 + }, + { + "epoch": 0.8210360796239978, + "grad_norm": 1.3172995542153068, + "learning_rate": 1.6333410246592352e-06, + "loss": 0.301, + "step": 47515 + }, + { + "epoch": 0.8210533591374067, + "grad_norm": 1.3647515229974785, + "learning_rate": 1.6330345101074219e-06, + "loss": 0.5892, + "step": 47516 + }, + { + "epoch": 0.8210706386508156, + "grad_norm": 0.659259640437615, + "learning_rate": 1.6327280217615793e-06, + "loss": 0.5682, + "step": 47517 + }, + { + "epoch": 0.8210879181642246, + "grad_norm": 1.2935686704499947, + "learning_rate": 1.63242155962267e-06, + "loss": 0.258, + "step": 47518 + }, + { + "epoch": 0.8211051976776333, + "grad_norm": 1.1430274435809236, + "learning_rate": 1.6321151236916534e-06, + "loss": 0.3603, + "step": 47519 + }, + { + "epoch": 0.8211224771910423, + "grad_norm": 0.7098844580476102, + "learning_rate": 1.6318087139694872e-06, + "loss": 0.3849, + "step": 47520 + }, + { + "epoch": 0.8211397567044512, + "grad_norm": 1.4620826346052196, + "learning_rate": 1.631502330457133e-06, + "loss": 0.3846, + "step": 47521 + }, + { + "epoch": 0.8211570362178601, + "grad_norm": 1.6215170474974683, + "learning_rate": 1.6311959731555483e-06, + "loss": 0.3963, + "step": 47522 + }, + { + "epoch": 0.821174315731269, + "grad_norm": 1.2130320355153281, + "learning_rate": 1.6308896420656927e-06, + "loss": 0.2448, + "step": 47523 + }, + { + "epoch": 0.8211915952446779, + "grad_norm": 1.6042831205842083, + "learning_rate": 1.6305833371885293e-06, + "loss": 0.654, + "step": 47524 + }, + { + "epoch": 0.8212088747580868, + "grad_norm": 0.9338381517808992, + "learning_rate": 1.6302770585250138e-06, + "loss": 0.529, + "step": 47525 + }, + { + "epoch": 0.8212261542714957, + "grad_norm": 1.789404239888222, + "learning_rate": 1.6299708060761076e-06, + "loss": 0.4766, + "step": 47526 + }, + { + "epoch": 0.8212434337849046, + "grad_norm": 0.9183613713108154, + "learning_rate": 1.6296645798427668e-06, + "loss": 0.3489, + "step": 47527 + }, + { + "epoch": 0.8212607132983135, + "grad_norm": 1.6762778118387904, + "learning_rate": 1.6293583798259538e-06, + "loss": 0.2887, + "step": 47528 + }, + { + "epoch": 0.8212779928117224, + "grad_norm": 1.4756811911771182, + "learning_rate": 1.6290522060266257e-06, + "loss": 0.3098, + "step": 47529 + }, + { + "epoch": 0.8212952723251313, + "grad_norm": 1.0587743084173251, + "learning_rate": 1.6287460584457436e-06, + "loss": 0.4111, + "step": 47530 + }, + { + "epoch": 0.8213125518385402, + "grad_norm": 1.2085247272801318, + "learning_rate": 1.628439937084264e-06, + "loss": 0.1889, + "step": 47531 + }, + { + "epoch": 0.8213298313519491, + "grad_norm": 1.6875153438222883, + "learning_rate": 1.6281338419431491e-06, + "loss": 0.2445, + "step": 47532 + }, + { + "epoch": 0.821347110865358, + "grad_norm": 0.8574571722716207, + "learning_rate": 1.6278277730233527e-06, + "loss": 0.2009, + "step": 47533 + }, + { + "epoch": 0.821364390378767, + "grad_norm": 1.3467142060288577, + "learning_rate": 1.627521730325835e-06, + "loss": 0.3844, + "step": 47534 + }, + { + "epoch": 0.8213816698921759, + "grad_norm": 1.3821965895389778, + "learning_rate": 1.62721571385156e-06, + "loss": 0.3478, + "step": 47535 + }, + { + "epoch": 0.8213989494055848, + "grad_norm": 0.9641222732220424, + "learning_rate": 1.6269097236014775e-06, + "loss": 0.931, + "step": 47536 + }, + { + "epoch": 0.8214162289189937, + "grad_norm": 0.9840702640508512, + "learning_rate": 1.6266037595765527e-06, + "loss": 0.325, + "step": 47537 + }, + { + "epoch": 0.8214335084324026, + "grad_norm": 0.9756054306556271, + "learning_rate": 1.6262978217777391e-06, + "loss": 0.6299, + "step": 47538 + }, + { + "epoch": 0.8214507879458115, + "grad_norm": 1.2773802591172896, + "learning_rate": 1.6259919102059974e-06, + "loss": 0.4199, + "step": 47539 + }, + { + "epoch": 0.8214680674592203, + "grad_norm": 1.2533380007809407, + "learning_rate": 1.6256860248622851e-06, + "loss": 0.2832, + "step": 47540 + }, + { + "epoch": 0.8214853469726292, + "grad_norm": 0.9769687467719942, + "learning_rate": 1.6253801657475632e-06, + "loss": 0.4264, + "step": 47541 + }, + { + "epoch": 0.8215026264860381, + "grad_norm": 1.65124174129969, + "learning_rate": 1.625074332862785e-06, + "loss": 0.3796, + "step": 47542 + }, + { + "epoch": 0.821519905999447, + "grad_norm": 1.7568871601988927, + "learning_rate": 1.6247685262089118e-06, + "loss": 0.3028, + "step": 47543 + }, + { + "epoch": 0.8215371855128559, + "grad_norm": 1.8818357396230556, + "learning_rate": 1.6244627457869012e-06, + "loss": 0.3818, + "step": 47544 + }, + { + "epoch": 0.8215544650262648, + "grad_norm": 1.2912626242779917, + "learning_rate": 1.624156991597704e-06, + "loss": 0.7358, + "step": 47545 + }, + { + "epoch": 0.8215717445396737, + "grad_norm": 2.29186373645183, + "learning_rate": 1.6238512636422887e-06, + "loss": 0.2904, + "step": 47546 + }, + { + "epoch": 0.8215890240530827, + "grad_norm": 0.854811799312993, + "learning_rate": 1.6235455619216068e-06, + "loss": 0.4657, + "step": 47547 + }, + { + "epoch": 0.8216063035664916, + "grad_norm": 0.8731083400076366, + "learning_rate": 1.6232398864366171e-06, + "loss": 0.1907, + "step": 47548 + }, + { + "epoch": 0.8216235830799005, + "grad_norm": 1.4442585558345726, + "learning_rate": 1.6229342371882761e-06, + "loss": 0.2455, + "step": 47549 + }, + { + "epoch": 0.8216408625933094, + "grad_norm": 1.1664519841715502, + "learning_rate": 1.6226286141775427e-06, + "loss": 0.3198, + "step": 47550 + }, + { + "epoch": 0.8216581421067183, + "grad_norm": 1.3519549927450663, + "learning_rate": 1.6223230174053695e-06, + "loss": 0.5225, + "step": 47551 + }, + { + "epoch": 0.8216754216201272, + "grad_norm": 1.9785189716623102, + "learning_rate": 1.6220174468727212e-06, + "loss": 0.4368, + "step": 47552 + }, + { + "epoch": 0.8216927011335361, + "grad_norm": 1.2095142606049676, + "learning_rate": 1.6217119025805484e-06, + "loss": 0.2266, + "step": 47553 + }, + { + "epoch": 0.821709980646945, + "grad_norm": 1.5968528147960184, + "learning_rate": 1.6214063845298133e-06, + "loss": 0.5499, + "step": 47554 + }, + { + "epoch": 0.8217272601603539, + "grad_norm": 2.753793963944306, + "learning_rate": 1.621100892721471e-06, + "loss": 0.437, + "step": 47555 + }, + { + "epoch": 0.8217445396737628, + "grad_norm": 1.547705375224109, + "learning_rate": 1.6207954271564741e-06, + "loss": 0.3277, + "step": 47556 + }, + { + "epoch": 0.8217618191871717, + "grad_norm": 1.1691020689678142, + "learning_rate": 1.6204899878357826e-06, + "loss": 0.2948, + "step": 47557 + }, + { + "epoch": 0.8217790987005806, + "grad_norm": 1.4032327773865187, + "learning_rate": 1.6201845747603539e-06, + "loss": 0.3738, + "step": 47558 + }, + { + "epoch": 0.8217963782139895, + "grad_norm": 1.9365688973654178, + "learning_rate": 1.619879187931146e-06, + "loss": 0.2839, + "step": 47559 + }, + { + "epoch": 0.8218136577273984, + "grad_norm": 1.3758010244967507, + "learning_rate": 1.6195738273491114e-06, + "loss": 0.2393, + "step": 47560 + }, + { + "epoch": 0.8218309372408072, + "grad_norm": 0.9375888872530016, + "learning_rate": 1.6192684930152102e-06, + "loss": 0.2324, + "step": 47561 + }, + { + "epoch": 0.8218482167542162, + "grad_norm": 1.0484772031969012, + "learning_rate": 1.6189631849303955e-06, + "loss": 0.3769, + "step": 47562 + }, + { + "epoch": 0.8218654962676251, + "grad_norm": 1.1702740112898933, + "learning_rate": 1.618657903095624e-06, + "loss": 0.3496, + "step": 47563 + }, + { + "epoch": 0.821882775781034, + "grad_norm": 0.613806616186591, + "learning_rate": 1.618352647511856e-06, + "loss": 0.6481, + "step": 47564 + }, + { + "epoch": 0.8219000552944429, + "grad_norm": 2.5543799518288806, + "learning_rate": 1.618047418180042e-06, + "loss": 0.2344, + "step": 47565 + }, + { + "epoch": 0.8219173348078518, + "grad_norm": 1.2518289603701123, + "learning_rate": 1.6177422151011435e-06, + "loss": 0.2516, + "step": 47566 + }, + { + "epoch": 0.8219346143212607, + "grad_norm": 1.4967708197938474, + "learning_rate": 1.6174370382761095e-06, + "loss": 0.5336, + "step": 47567 + }, + { + "epoch": 0.8219518938346696, + "grad_norm": 0.8588151051039518, + "learning_rate": 1.6171318877059006e-06, + "loss": 0.5564, + "step": 47568 + }, + { + "epoch": 0.8219691733480785, + "grad_norm": 1.8584998981874015, + "learning_rate": 1.6168267633914714e-06, + "loss": 0.3143, + "step": 47569 + }, + { + "epoch": 0.8219864528614874, + "grad_norm": 2.388080422451535, + "learning_rate": 1.6165216653337812e-06, + "loss": 0.335, + "step": 47570 + }, + { + "epoch": 0.8220037323748963, + "grad_norm": 1.6911109498411037, + "learning_rate": 1.6162165935337792e-06, + "loss": 0.7413, + "step": 47571 + }, + { + "epoch": 0.8220210118883052, + "grad_norm": 1.7283544451949753, + "learning_rate": 1.6159115479924259e-06, + "loss": 0.5982, + "step": 47572 + }, + { + "epoch": 0.8220382914017141, + "grad_norm": 0.8168203659234338, + "learning_rate": 1.6156065287106725e-06, + "loss": 0.2211, + "step": 47573 + }, + { + "epoch": 0.822055570915123, + "grad_norm": 1.0925475923336223, + "learning_rate": 1.6153015356894774e-06, + "loss": 0.4275, + "step": 47574 + }, + { + "epoch": 0.822072850428532, + "grad_norm": 0.72462896631842, + "learning_rate": 1.6149965689297974e-06, + "loss": 0.83, + "step": 47575 + }, + { + "epoch": 0.8220901299419409, + "grad_norm": 1.078566957953162, + "learning_rate": 1.614691628432583e-06, + "loss": 0.3258, + "step": 47576 + }, + { + "epoch": 0.8221074094553498, + "grad_norm": 1.0448932518853908, + "learning_rate": 1.6143867141987934e-06, + "loss": 0.3273, + "step": 47577 + }, + { + "epoch": 0.8221246889687587, + "grad_norm": 2.5996802617683463, + "learning_rate": 1.61408182622938e-06, + "loss": 0.2173, + "step": 47578 + }, + { + "epoch": 0.8221419684821676, + "grad_norm": 2.1773068531879143, + "learning_rate": 1.6137769645253e-06, + "loss": 0.6029, + "step": 47579 + }, + { + "epoch": 0.8221592479955765, + "grad_norm": 1.9076345287791878, + "learning_rate": 1.6134721290875067e-06, + "loss": 0.4349, + "step": 47580 + }, + { + "epoch": 0.8221765275089854, + "grad_norm": 0.6033017980643746, + "learning_rate": 1.6131673199169595e-06, + "loss": 0.2221, + "step": 47581 + }, + { + "epoch": 0.8221938070223942, + "grad_norm": 0.9128578549198009, + "learning_rate": 1.6128625370146067e-06, + "loss": 0.1687, + "step": 47582 + }, + { + "epoch": 0.8222110865358031, + "grad_norm": 1.384091803717243, + "learning_rate": 1.6125577803814085e-06, + "loss": 0.2494, + "step": 47583 + }, + { + "epoch": 0.822228366049212, + "grad_norm": 1.2171370436100997, + "learning_rate": 1.6122530500183143e-06, + "loss": 0.3186, + "step": 47584 + }, + { + "epoch": 0.8222456455626209, + "grad_norm": 1.2322975087786376, + "learning_rate": 1.6119483459262808e-06, + "loss": 0.2746, + "step": 47585 + }, + { + "epoch": 0.8222629250760298, + "grad_norm": 1.379743496469791, + "learning_rate": 1.6116436681062653e-06, + "loss": 0.3341, + "step": 47586 + }, + { + "epoch": 0.8222802045894387, + "grad_norm": 2.914360348160158, + "learning_rate": 1.6113390165592158e-06, + "loss": 0.4201, + "step": 47587 + }, + { + "epoch": 0.8222974841028476, + "grad_norm": 1.924109931827272, + "learning_rate": 1.6110343912860937e-06, + "loss": 0.4284, + "step": 47588 + }, + { + "epoch": 0.8223147636162565, + "grad_norm": 0.9808655459434268, + "learning_rate": 1.610729792287846e-06, + "loss": 0.5482, + "step": 47589 + }, + { + "epoch": 0.8223320431296655, + "grad_norm": 1.3797535483035732, + "learning_rate": 1.6104252195654334e-06, + "loss": 0.2001, + "step": 47590 + }, + { + "epoch": 0.8223493226430744, + "grad_norm": 1.4009626366313488, + "learning_rate": 1.6101206731198016e-06, + "loss": 0.4544, + "step": 47591 + }, + { + "epoch": 0.8223666021564833, + "grad_norm": 1.2602898110314558, + "learning_rate": 1.6098161529519153e-06, + "loss": 0.3986, + "step": 47592 + }, + { + "epoch": 0.8223838816698922, + "grad_norm": 0.8320764010944264, + "learning_rate": 1.609511659062718e-06, + "loss": 0.3386, + "step": 47593 + }, + { + "epoch": 0.8224011611833011, + "grad_norm": 1.1062331615598178, + "learning_rate": 1.6092071914531714e-06, + "loss": 0.1853, + "step": 47594 + }, + { + "epoch": 0.82241844069671, + "grad_norm": 1.3748640983597167, + "learning_rate": 1.608902750124226e-06, + "loss": 0.4386, + "step": 47595 + }, + { + "epoch": 0.8224357202101189, + "grad_norm": 1.4949973942548649, + "learning_rate": 1.608598335076832e-06, + "loss": 0.5362, + "step": 47596 + }, + { + "epoch": 0.8224529997235278, + "grad_norm": 0.9269315631374063, + "learning_rate": 1.608293946311945e-06, + "loss": 0.3861, + "step": 47597 + }, + { + "epoch": 0.8224702792369367, + "grad_norm": 0.9416116499224128, + "learning_rate": 1.6079895838305193e-06, + "loss": 0.2749, + "step": 47598 + }, + { + "epoch": 0.8224875587503456, + "grad_norm": 0.8944741235769629, + "learning_rate": 1.6076852476335114e-06, + "loss": 0.2481, + "step": 47599 + }, + { + "epoch": 0.8225048382637545, + "grad_norm": 1.2938617035427744, + "learning_rate": 1.6073809377218686e-06, + "loss": 0.4267, + "step": 47600 + }, + { + "epoch": 0.8225221177771634, + "grad_norm": 1.6705138487228663, + "learning_rate": 1.6070766540965476e-06, + "loss": 0.4317, + "step": 47601 + }, + { + "epoch": 0.8225393972905723, + "grad_norm": 0.7620652329675336, + "learning_rate": 1.6067723967584992e-06, + "loss": 0.2771, + "step": 47602 + }, + { + "epoch": 0.8225566768039811, + "grad_norm": 1.2332670296504653, + "learning_rate": 1.6064681657086767e-06, + "loss": 0.2974, + "step": 47603 + }, + { + "epoch": 0.82257395631739, + "grad_norm": 1.4480992244604345, + "learning_rate": 1.6061639609480339e-06, + "loss": 0.3315, + "step": 47604 + }, + { + "epoch": 0.822591235830799, + "grad_norm": 0.8888900334388709, + "learning_rate": 1.6058597824775269e-06, + "loss": 0.3818, + "step": 47605 + }, + { + "epoch": 0.8226085153442079, + "grad_norm": 0.503003385786103, + "learning_rate": 1.6055556302981045e-06, + "loss": 0.625, + "step": 47606 + }, + { + "epoch": 0.8226257948576168, + "grad_norm": 1.5724811997017205, + "learning_rate": 1.6052515044107165e-06, + "loss": 0.272, + "step": 47607 + }, + { + "epoch": 0.8226430743710257, + "grad_norm": 1.2942928034938581, + "learning_rate": 1.6049474048163194e-06, + "loss": 0.4302, + "step": 47608 + }, + { + "epoch": 0.8226603538844346, + "grad_norm": 1.2074366795657139, + "learning_rate": 1.6046433315158649e-06, + "loss": 0.4414, + "step": 47609 + }, + { + "epoch": 0.8226776333978435, + "grad_norm": 1.104007786704969, + "learning_rate": 1.604339284510309e-06, + "loss": 0.4381, + "step": 47610 + }, + { + "epoch": 0.8226949129112524, + "grad_norm": 1.582588219904261, + "learning_rate": 1.6040352638005962e-06, + "loss": 0.2708, + "step": 47611 + }, + { + "epoch": 0.8227121924246613, + "grad_norm": 1.4797531725599473, + "learning_rate": 1.6037312693876872e-06, + "loss": 0.3987, + "step": 47612 + }, + { + "epoch": 0.8227294719380702, + "grad_norm": 1.3122750873652125, + "learning_rate": 1.6034273012725265e-06, + "loss": 0.5014, + "step": 47613 + }, + { + "epoch": 0.8227467514514791, + "grad_norm": 1.1681417314560676, + "learning_rate": 1.6031233594560702e-06, + "loss": 0.5081, + "step": 47614 + }, + { + "epoch": 0.822764030964888, + "grad_norm": 1.857267036768802, + "learning_rate": 1.6028194439392685e-06, + "loss": 0.3896, + "step": 47615 + }, + { + "epoch": 0.822781310478297, + "grad_norm": 1.1701930375075436, + "learning_rate": 1.6025155547230786e-06, + "loss": 0.2401, + "step": 47616 + }, + { + "epoch": 0.8227985899917059, + "grad_norm": 1.6513813918039464, + "learning_rate": 1.6022116918084475e-06, + "loss": 0.2865, + "step": 47617 + }, + { + "epoch": 0.8228158695051148, + "grad_norm": 0.9469308905510714, + "learning_rate": 1.6019078551963241e-06, + "loss": 0.3135, + "step": 47618 + }, + { + "epoch": 0.8228331490185237, + "grad_norm": 0.9221513238655192, + "learning_rate": 1.6016040448876646e-06, + "loss": 0.1526, + "step": 47619 + }, + { + "epoch": 0.8228504285319326, + "grad_norm": 2.2151646581538857, + "learning_rate": 1.6013002608834195e-06, + "loss": 0.2269, + "step": 47620 + }, + { + "epoch": 0.8228677080453415, + "grad_norm": 1.40489841706931, + "learning_rate": 1.600996503184542e-06, + "loss": 0.2575, + "step": 47621 + }, + { + "epoch": 0.8228849875587504, + "grad_norm": 1.5442196634320138, + "learning_rate": 1.6006927717919806e-06, + "loss": 0.487, + "step": 47622 + }, + { + "epoch": 0.8229022670721593, + "grad_norm": 1.0671751585189726, + "learning_rate": 1.600389066706689e-06, + "loss": 0.1921, + "step": 47623 + }, + { + "epoch": 0.8229195465855681, + "grad_norm": 1.180225499930916, + "learning_rate": 1.6000853879296152e-06, + "loss": 0.274, + "step": 47624 + }, + { + "epoch": 0.822936826098977, + "grad_norm": 1.407538710220532, + "learning_rate": 1.5997817354617128e-06, + "loss": 0.4507, + "step": 47625 + }, + { + "epoch": 0.8229541056123859, + "grad_norm": 2.273794695489784, + "learning_rate": 1.5994781093039336e-06, + "loss": 0.421, + "step": 47626 + }, + { + "epoch": 0.8229713851257948, + "grad_norm": 1.435653862674424, + "learning_rate": 1.5991745094572263e-06, + "loss": 0.3288, + "step": 47627 + }, + { + "epoch": 0.8229886646392037, + "grad_norm": 1.1225101313638453, + "learning_rate": 1.598870935922544e-06, + "loss": 0.3308, + "step": 47628 + }, + { + "epoch": 0.8230059441526126, + "grad_norm": 1.6105861105662316, + "learning_rate": 1.5985673887008345e-06, + "loss": 0.2845, + "step": 47629 + }, + { + "epoch": 0.8230232236660215, + "grad_norm": 1.280161320335525, + "learning_rate": 1.598263867793054e-06, + "loss": 0.338, + "step": 47630 + }, + { + "epoch": 0.8230405031794304, + "grad_norm": 1.4739948356172525, + "learning_rate": 1.5979603732001436e-06, + "loss": 0.3375, + "step": 47631 + }, + { + "epoch": 0.8230577826928394, + "grad_norm": 1.6463362495662666, + "learning_rate": 1.5976569049230638e-06, + "loss": 0.4904, + "step": 47632 + }, + { + "epoch": 0.8230750622062483, + "grad_norm": 1.6537370129557842, + "learning_rate": 1.5973534629627606e-06, + "loss": 0.3337, + "step": 47633 + }, + { + "epoch": 0.8230923417196572, + "grad_norm": 1.1676829565587514, + "learning_rate": 1.5970500473201866e-06, + "loss": 0.2405, + "step": 47634 + }, + { + "epoch": 0.8231096212330661, + "grad_norm": 0.7538323030656836, + "learning_rate": 1.5967466579962876e-06, + "loss": 0.2396, + "step": 47635 + }, + { + "epoch": 0.823126900746475, + "grad_norm": 1.5740212833165395, + "learning_rate": 1.5964432949920194e-06, + "loss": 0.1817, + "step": 47636 + }, + { + "epoch": 0.8231441802598839, + "grad_norm": 1.3805188365953756, + "learning_rate": 1.596139958308327e-06, + "loss": 0.7035, + "step": 47637 + }, + { + "epoch": 0.8231614597732928, + "grad_norm": 1.591802228045508, + "learning_rate": 1.5958366479461628e-06, + "loss": 0.5222, + "step": 47638 + }, + { + "epoch": 0.8231787392867017, + "grad_norm": 1.628720153635948, + "learning_rate": 1.5955333639064796e-06, + "loss": 0.3309, + "step": 47639 + }, + { + "epoch": 0.8231960188001106, + "grad_norm": 1.3165685283559605, + "learning_rate": 1.595230106190222e-06, + "loss": 0.3738, + "step": 47640 + }, + { + "epoch": 0.8232132983135195, + "grad_norm": 1.589547472803865, + "learning_rate": 1.594926874798345e-06, + "loss": 0.3269, + "step": 47641 + }, + { + "epoch": 0.8232305778269284, + "grad_norm": 2.5068819361497234, + "learning_rate": 1.594623669731793e-06, + "loss": 0.3424, + "step": 47642 + }, + { + "epoch": 0.8232478573403373, + "grad_norm": 1.2826046521495587, + "learning_rate": 1.594320490991519e-06, + "loss": 0.3524, + "step": 47643 + }, + { + "epoch": 0.8232651368537462, + "grad_norm": 0.866832729046637, + "learning_rate": 1.594017338578472e-06, + "loss": 0.8917, + "step": 47644 + }, + { + "epoch": 0.823282416367155, + "grad_norm": 1.104844863801184, + "learning_rate": 1.5937142124936033e-06, + "loss": 0.4189, + "step": 47645 + }, + { + "epoch": 0.823299695880564, + "grad_norm": 1.1399910608607455, + "learning_rate": 1.5934111127378616e-06, + "loss": 0.4234, + "step": 47646 + }, + { + "epoch": 0.8233169753939729, + "grad_norm": 1.8138628693105272, + "learning_rate": 1.5931080393121923e-06, + "loss": 0.4033, + "step": 47647 + }, + { + "epoch": 0.8233342549073818, + "grad_norm": 0.7438505086927399, + "learning_rate": 1.592804992217547e-06, + "loss": 0.3595, + "step": 47648 + }, + { + "epoch": 0.8233515344207907, + "grad_norm": 1.4214874710179797, + "learning_rate": 1.5925019714548762e-06, + "loss": 0.2362, + "step": 47649 + }, + { + "epoch": 0.8233688139341996, + "grad_norm": 1.3022490920070207, + "learning_rate": 1.59219897702513e-06, + "loss": 0.2494, + "step": 47650 + }, + { + "epoch": 0.8233860934476085, + "grad_norm": 0.8532013782357704, + "learning_rate": 1.5918960089292535e-06, + "loss": 0.3208, + "step": 47651 + }, + { + "epoch": 0.8234033729610174, + "grad_norm": 1.177676051307501, + "learning_rate": 1.591593067168199e-06, + "loss": 0.251, + "step": 47652 + }, + { + "epoch": 0.8234206524744263, + "grad_norm": 1.6612717962343673, + "learning_rate": 1.5912901517429126e-06, + "loss": 0.4469, + "step": 47653 + }, + { + "epoch": 0.8234379319878352, + "grad_norm": 2.202673780677815, + "learning_rate": 1.5909872626543442e-06, + "loss": 0.427, + "step": 47654 + }, + { + "epoch": 0.8234552115012441, + "grad_norm": 0.9888877855589359, + "learning_rate": 1.5906843999034438e-06, + "loss": 0.2372, + "step": 47655 + }, + { + "epoch": 0.823472491014653, + "grad_norm": 1.1906677808221335, + "learning_rate": 1.59038156349116e-06, + "loss": 0.5318, + "step": 47656 + }, + { + "epoch": 0.8234897705280619, + "grad_norm": 1.3519288114883947, + "learning_rate": 1.5900787534184414e-06, + "loss": 0.4262, + "step": 47657 + }, + { + "epoch": 0.8235070500414708, + "grad_norm": 1.375366862796249, + "learning_rate": 1.5897759696862313e-06, + "loss": 0.2577, + "step": 47658 + }, + { + "epoch": 0.8235243295548798, + "grad_norm": 2.1917831216956314, + "learning_rate": 1.5894732122954836e-06, + "loss": 0.4127, + "step": 47659 + }, + { + "epoch": 0.8235416090682887, + "grad_norm": 1.1300920707267148, + "learning_rate": 1.5891704812471442e-06, + "loss": 0.4444, + "step": 47660 + }, + { + "epoch": 0.8235588885816976, + "grad_norm": 1.3862407979704825, + "learning_rate": 1.5888677765421646e-06, + "loss": 0.1791, + "step": 47661 + }, + { + "epoch": 0.8235761680951065, + "grad_norm": 1.6136535466430848, + "learning_rate": 1.5885650981814882e-06, + "loss": 0.2473, + "step": 47662 + }, + { + "epoch": 0.8235934476085154, + "grad_norm": 1.2233961492825047, + "learning_rate": 1.5882624461660668e-06, + "loss": 0.7592, + "step": 47663 + }, + { + "epoch": 0.8236107271219243, + "grad_norm": 1.0215212637586046, + "learning_rate": 1.587959820496845e-06, + "loss": 0.4511, + "step": 47664 + }, + { + "epoch": 0.8236280066353332, + "grad_norm": 1.2150011486794436, + "learning_rate": 1.5876572211747721e-06, + "loss": 0.5144, + "step": 47665 + }, + { + "epoch": 0.8236452861487421, + "grad_norm": 0.9727182507376734, + "learning_rate": 1.5873546482007962e-06, + "loss": 0.3531, + "step": 47666 + }, + { + "epoch": 0.8236625656621509, + "grad_norm": 2.871097733252455, + "learning_rate": 1.5870521015758666e-06, + "loss": 0.3455, + "step": 47667 + }, + { + "epoch": 0.8236798451755598, + "grad_norm": 2.122326372602294, + "learning_rate": 1.5867495813009304e-06, + "loss": 0.6685, + "step": 47668 + }, + { + "epoch": 0.8236971246889687, + "grad_norm": 1.1536294998634056, + "learning_rate": 1.5864470873769311e-06, + "loss": 0.4968, + "step": 47669 + }, + { + "epoch": 0.8237144042023776, + "grad_norm": 1.4711947683864997, + "learning_rate": 1.5861446198048203e-06, + "loss": 0.2571, + "step": 47670 + }, + { + "epoch": 0.8237316837157865, + "grad_norm": 1.6592294311709488, + "learning_rate": 1.5858421785855404e-06, + "loss": 0.4106, + "step": 47671 + }, + { + "epoch": 0.8237489632291954, + "grad_norm": 1.2770248322780977, + "learning_rate": 1.5855397637200464e-06, + "loss": 0.3285, + "step": 47672 + }, + { + "epoch": 0.8237662427426043, + "grad_norm": 3.251128601373611, + "learning_rate": 1.5852373752092799e-06, + "loss": 0.4245, + "step": 47673 + }, + { + "epoch": 0.8237835222560133, + "grad_norm": 1.6132896080180348, + "learning_rate": 1.5849350130541908e-06, + "loss": 0.2583, + "step": 47674 + }, + { + "epoch": 0.8238008017694222, + "grad_norm": 3.056496430819866, + "learning_rate": 1.584632677255723e-06, + "loss": 0.4929, + "step": 47675 + }, + { + "epoch": 0.8238180812828311, + "grad_norm": 1.0319012467685782, + "learning_rate": 1.584330367814827e-06, + "loss": 0.362, + "step": 47676 + }, + { + "epoch": 0.82383536079624, + "grad_norm": 0.9378738574263191, + "learning_rate": 1.5840280847324453e-06, + "loss": 0.3793, + "step": 47677 + }, + { + "epoch": 0.8238526403096489, + "grad_norm": 1.140398473608264, + "learning_rate": 1.583725828009528e-06, + "loss": 0.2809, + "step": 47678 + }, + { + "epoch": 0.8238699198230578, + "grad_norm": 1.1814983326444783, + "learning_rate": 1.5834235976470225e-06, + "loss": 0.3798, + "step": 47679 + }, + { + "epoch": 0.8238871993364667, + "grad_norm": 1.7990705665480835, + "learning_rate": 1.5831213936458723e-06, + "loss": 0.4817, + "step": 47680 + }, + { + "epoch": 0.8239044788498756, + "grad_norm": 0.9547238601486827, + "learning_rate": 1.5828192160070278e-06, + "loss": 0.5051, + "step": 47681 + }, + { + "epoch": 0.8239217583632845, + "grad_norm": 0.4330448826906751, + "learning_rate": 1.5825170647314313e-06, + "loss": 0.486, + "step": 47682 + }, + { + "epoch": 0.8239390378766934, + "grad_norm": 1.384441451085963, + "learning_rate": 1.5822149398200303e-06, + "loss": 0.2658, + "step": 47683 + }, + { + "epoch": 0.8239563173901023, + "grad_norm": 1.4351406997200484, + "learning_rate": 1.5819128412737716e-06, + "loss": 0.2329, + "step": 47684 + }, + { + "epoch": 0.8239735969035112, + "grad_norm": 1.0784462539286295, + "learning_rate": 1.5816107690936044e-06, + "loss": 0.4235, + "step": 47685 + }, + { + "epoch": 0.8239908764169201, + "grad_norm": 1.7810557346669544, + "learning_rate": 1.5813087232804702e-06, + "loss": 0.4374, + "step": 47686 + }, + { + "epoch": 0.8240081559303291, + "grad_norm": 1.962883622399472, + "learning_rate": 1.5810067038353183e-06, + "loss": 0.369, + "step": 47687 + }, + { + "epoch": 0.8240254354437379, + "grad_norm": 1.5012770457346813, + "learning_rate": 1.5807047107590911e-06, + "loss": 0.6728, + "step": 47688 + }, + { + "epoch": 0.8240427149571468, + "grad_norm": 1.426396848956801, + "learning_rate": 1.580402744052737e-06, + "loss": 0.3006, + "step": 47689 + }, + { + "epoch": 0.8240599944705557, + "grad_norm": 1.3676432699862178, + "learning_rate": 1.580100803717204e-06, + "loss": 0.3354, + "step": 47690 + }, + { + "epoch": 0.8240772739839646, + "grad_norm": 0.9827963493170719, + "learning_rate": 1.5797988897534322e-06, + "loss": 0.4643, + "step": 47691 + }, + { + "epoch": 0.8240945534973735, + "grad_norm": 2.0686776725071336, + "learning_rate": 1.5794970021623724e-06, + "loss": 0.1611, + "step": 47692 + }, + { + "epoch": 0.8241118330107824, + "grad_norm": 1.4637795754976275, + "learning_rate": 1.5791951409449656e-06, + "loss": 0.3549, + "step": 47693 + }, + { + "epoch": 0.8241291125241913, + "grad_norm": 1.6564304816773308, + "learning_rate": 1.5788933061021605e-06, + "loss": 0.2288, + "step": 47694 + }, + { + "epoch": 0.8241463920376002, + "grad_norm": 1.433079526469289, + "learning_rate": 1.5785914976349003e-06, + "loss": 0.2781, + "step": 47695 + }, + { + "epoch": 0.8241636715510091, + "grad_norm": 2.1761793821855178, + "learning_rate": 1.5782897155441357e-06, + "loss": 0.2853, + "step": 47696 + }, + { + "epoch": 0.824180951064418, + "grad_norm": 1.347595558487344, + "learning_rate": 1.5779879598308034e-06, + "loss": 0.357, + "step": 47697 + }, + { + "epoch": 0.8241982305778269, + "grad_norm": 0.8711836552098021, + "learning_rate": 1.5776862304958562e-06, + "loss": 0.2054, + "step": 47698 + }, + { + "epoch": 0.8242155100912358, + "grad_norm": 2.468443268758871, + "learning_rate": 1.5773845275402334e-06, + "loss": 0.3192, + "step": 47699 + }, + { + "epoch": 0.8242327896046447, + "grad_norm": 2.126990506158622, + "learning_rate": 1.5770828509648816e-06, + "loss": 0.4413, + "step": 47700 + }, + { + "epoch": 0.8242500691180537, + "grad_norm": 1.5183102905135293, + "learning_rate": 1.5767812007707495e-06, + "loss": 0.2731, + "step": 47701 + }, + { + "epoch": 0.8242673486314626, + "grad_norm": 1.2091893180143136, + "learning_rate": 1.576479576958777e-06, + "loss": 0.3313, + "step": 47702 + }, + { + "epoch": 0.8242846281448715, + "grad_norm": 0.9010163744346692, + "learning_rate": 1.5761779795299115e-06, + "loss": 0.2605, + "step": 47703 + }, + { + "epoch": 0.8243019076582804, + "grad_norm": 1.0425802391291972, + "learning_rate": 1.5758764084850953e-06, + "loss": 0.453, + "step": 47704 + }, + { + "epoch": 0.8243191871716893, + "grad_norm": 1.1233018378571664, + "learning_rate": 1.5755748638252743e-06, + "loss": 0.2013, + "step": 47705 + }, + { + "epoch": 0.8243364666850982, + "grad_norm": 1.540044033282437, + "learning_rate": 1.5752733455513936e-06, + "loss": 0.2966, + "step": 47706 + }, + { + "epoch": 0.8243537461985071, + "grad_norm": 1.0084461459330074, + "learning_rate": 1.5749718536643987e-06, + "loss": 0.794, + "step": 47707 + }, + { + "epoch": 0.824371025711916, + "grad_norm": 1.4045066407302271, + "learning_rate": 1.5746703881652325e-06, + "loss": 0.3141, + "step": 47708 + }, + { + "epoch": 0.8243883052253248, + "grad_norm": 1.4529705334293825, + "learning_rate": 1.5743689490548363e-06, + "loss": 0.1813, + "step": 47709 + }, + { + "epoch": 0.8244055847387337, + "grad_norm": 1.4006181780611022, + "learning_rate": 1.5740675363341594e-06, + "loss": 0.2141, + "step": 47710 + }, + { + "epoch": 0.8244228642521426, + "grad_norm": 1.9395272493617193, + "learning_rate": 1.573766150004139e-06, + "loss": 0.3525, + "step": 47711 + }, + { + "epoch": 0.8244401437655515, + "grad_norm": 0.9860476377913534, + "learning_rate": 1.5734647900657274e-06, + "loss": 0.4198, + "step": 47712 + }, + { + "epoch": 0.8244574232789604, + "grad_norm": 1.7193482387705363, + "learning_rate": 1.573163456519863e-06, + "loss": 0.2958, + "step": 47713 + }, + { + "epoch": 0.8244747027923693, + "grad_norm": 1.4505381254984113, + "learning_rate": 1.5728621493674923e-06, + "loss": 0.4288, + "step": 47714 + }, + { + "epoch": 0.8244919823057782, + "grad_norm": 1.3386987551178244, + "learning_rate": 1.5725608686095562e-06, + "loss": 0.1964, + "step": 47715 + }, + { + "epoch": 0.8245092618191872, + "grad_norm": 0.9255249159478109, + "learning_rate": 1.5722596142470026e-06, + "loss": 0.7709, + "step": 47716 + }, + { + "epoch": 0.8245265413325961, + "grad_norm": 1.6203721541650584, + "learning_rate": 1.5719583862807664e-06, + "loss": 0.207, + "step": 47717 + }, + { + "epoch": 0.824543820846005, + "grad_norm": 1.0289263399943338, + "learning_rate": 1.5716571847118023e-06, + "loss": 0.2677, + "step": 47718 + }, + { + "epoch": 0.8245611003594139, + "grad_norm": 1.215034291700318, + "learning_rate": 1.5713560095410496e-06, + "loss": 0.2304, + "step": 47719 + }, + { + "epoch": 0.8245783798728228, + "grad_norm": 1.7894970390533145, + "learning_rate": 1.5710548607694465e-06, + "loss": 0.3801, + "step": 47720 + }, + { + "epoch": 0.8245956593862317, + "grad_norm": 0.8187989026130753, + "learning_rate": 1.5707537383979432e-06, + "loss": 0.2453, + "step": 47721 + }, + { + "epoch": 0.8246129388996406, + "grad_norm": 1.4507366366582566, + "learning_rate": 1.570452642427478e-06, + "loss": 0.2789, + "step": 47722 + }, + { + "epoch": 0.8246302184130495, + "grad_norm": 0.7122522369600454, + "learning_rate": 1.5701515728589944e-06, + "loss": 0.3284, + "step": 47723 + }, + { + "epoch": 0.8246474979264584, + "grad_norm": 1.738788293488341, + "learning_rate": 1.5698505296934375e-06, + "loss": 0.6667, + "step": 47724 + }, + { + "epoch": 0.8246647774398673, + "grad_norm": 0.9720595327894693, + "learning_rate": 1.5695495129317517e-06, + "loss": 0.212, + "step": 47725 + }, + { + "epoch": 0.8246820569532762, + "grad_norm": 0.9892995822979103, + "learning_rate": 1.5692485225748755e-06, + "loss": 0.5629, + "step": 47726 + }, + { + "epoch": 0.8246993364666851, + "grad_norm": 0.5789489879044023, + "learning_rate": 1.5689475586237558e-06, + "loss": 0.5495, + "step": 47727 + }, + { + "epoch": 0.824716615980094, + "grad_norm": 0.5515394051290009, + "learning_rate": 1.5686466210793305e-06, + "loss": 0.737, + "step": 47728 + }, + { + "epoch": 0.824733895493503, + "grad_norm": 1.6436204903484424, + "learning_rate": 1.5683457099425448e-06, + "loss": 0.5345, + "step": 47729 + }, + { + "epoch": 0.8247511750069118, + "grad_norm": 1.365835154711075, + "learning_rate": 1.568044825214343e-06, + "loss": 0.2533, + "step": 47730 + }, + { + "epoch": 0.8247684545203207, + "grad_norm": 1.5532031799951982, + "learning_rate": 1.567743966895664e-06, + "loss": 0.1938, + "step": 47731 + }, + { + "epoch": 0.8247857340337296, + "grad_norm": 1.8402914647569548, + "learning_rate": 1.567443134987454e-06, + "loss": 0.3919, + "step": 47732 + }, + { + "epoch": 0.8248030135471385, + "grad_norm": 1.2419746859229717, + "learning_rate": 1.5671423294906507e-06, + "loss": 0.4884, + "step": 47733 + }, + { + "epoch": 0.8248202930605474, + "grad_norm": 1.3867934580315469, + "learning_rate": 1.5668415504061973e-06, + "loss": 0.3329, + "step": 47734 + }, + { + "epoch": 0.8248375725739563, + "grad_norm": 1.5123077028062077, + "learning_rate": 1.566540797735039e-06, + "loss": 0.3635, + "step": 47735 + }, + { + "epoch": 0.8248548520873652, + "grad_norm": 1.515842460239875, + "learning_rate": 1.5662400714781168e-06, + "loss": 0.3557, + "step": 47736 + }, + { + "epoch": 0.8248721316007741, + "grad_norm": 0.9298422515048139, + "learning_rate": 1.5659393716363692e-06, + "loss": 0.5458, + "step": 47737 + }, + { + "epoch": 0.824889411114183, + "grad_norm": 1.266770656900391, + "learning_rate": 1.565638698210743e-06, + "loss": 0.5042, + "step": 47738 + }, + { + "epoch": 0.8249066906275919, + "grad_norm": 1.6849481667464008, + "learning_rate": 1.5653380512021755e-06, + "loss": 0.3402, + "step": 47739 + }, + { + "epoch": 0.8249239701410008, + "grad_norm": 1.0882505268063263, + "learning_rate": 1.56503743061161e-06, + "loss": 0.1622, + "step": 47740 + }, + { + "epoch": 0.8249412496544097, + "grad_norm": 0.9077577651520978, + "learning_rate": 1.5647368364399907e-06, + "loss": 0.5082, + "step": 47741 + }, + { + "epoch": 0.8249585291678186, + "grad_norm": 2.371501975996239, + "learning_rate": 1.5644362686882541e-06, + "loss": 0.4964, + "step": 47742 + }, + { + "epoch": 0.8249758086812276, + "grad_norm": 0.914609506882763, + "learning_rate": 1.5641357273573476e-06, + "loss": 0.2698, + "step": 47743 + }, + { + "epoch": 0.8249930881946365, + "grad_norm": 1.0507945979161233, + "learning_rate": 1.563835212448206e-06, + "loss": 0.3812, + "step": 47744 + }, + { + "epoch": 0.8250103677080454, + "grad_norm": 2.153964483139472, + "learning_rate": 1.5635347239617727e-06, + "loss": 0.4443, + "step": 47745 + }, + { + "epoch": 0.8250276472214543, + "grad_norm": 1.752688463493311, + "learning_rate": 1.5632342618989914e-06, + "loss": 0.4936, + "step": 47746 + }, + { + "epoch": 0.8250449267348632, + "grad_norm": 1.2229966425464849, + "learning_rate": 1.562933826260803e-06, + "loss": 0.3104, + "step": 47747 + }, + { + "epoch": 0.8250622062482721, + "grad_norm": 1.4330970293282062, + "learning_rate": 1.562633417048145e-06, + "loss": 0.3479, + "step": 47748 + }, + { + "epoch": 0.825079485761681, + "grad_norm": 1.5327313843512256, + "learning_rate": 1.5623330342619626e-06, + "loss": 0.5105, + "step": 47749 + }, + { + "epoch": 0.8250967652750899, + "grad_norm": 1.0618336574477398, + "learning_rate": 1.5620326779031937e-06, + "loss": 0.4433, + "step": 47750 + }, + { + "epoch": 0.8251140447884987, + "grad_norm": 1.3462700690564393, + "learning_rate": 1.5617323479727764e-06, + "loss": 0.4218, + "step": 47751 + }, + { + "epoch": 0.8251313243019076, + "grad_norm": 1.3271166729196253, + "learning_rate": 1.561432044471658e-06, + "loss": 0.3092, + "step": 47752 + }, + { + "epoch": 0.8251486038153165, + "grad_norm": 1.6141834172689968, + "learning_rate": 1.5611317674007742e-06, + "loss": 0.2776, + "step": 47753 + }, + { + "epoch": 0.8251658833287254, + "grad_norm": 0.8512968826483924, + "learning_rate": 1.5608315167610688e-06, + "loss": 0.279, + "step": 47754 + }, + { + "epoch": 0.8251831628421343, + "grad_norm": 1.209029456903513, + "learning_rate": 1.5605312925534788e-06, + "loss": 0.3515, + "step": 47755 + }, + { + "epoch": 0.8252004423555432, + "grad_norm": 1.5899415776039143, + "learning_rate": 1.5602310947789478e-06, + "loss": 0.3695, + "step": 47756 + }, + { + "epoch": 0.8252177218689521, + "grad_norm": 1.2485619341150536, + "learning_rate": 1.5599309234384108e-06, + "loss": 0.3829, + "step": 47757 + }, + { + "epoch": 0.825235001382361, + "grad_norm": 1.8406452013429488, + "learning_rate": 1.5596307785328158e-06, + "loss": 0.216, + "step": 47758 + }, + { + "epoch": 0.82525228089577, + "grad_norm": 1.2407189282198987, + "learning_rate": 1.5593306600630954e-06, + "loss": 0.3709, + "step": 47759 + }, + { + "epoch": 0.8252695604091789, + "grad_norm": 1.3389469929172961, + "learning_rate": 1.5590305680301965e-06, + "loss": 0.3317, + "step": 47760 + }, + { + "epoch": 0.8252868399225878, + "grad_norm": 1.2200263642916025, + "learning_rate": 1.558730502435054e-06, + "loss": 0.3405, + "step": 47761 + }, + { + "epoch": 0.8253041194359967, + "grad_norm": 1.7112148652836905, + "learning_rate": 1.5584304632786085e-06, + "loss": 0.3958, + "step": 47762 + }, + { + "epoch": 0.8253213989494056, + "grad_norm": 1.2586346737159697, + "learning_rate": 1.558130450561799e-06, + "loss": 0.9002, + "step": 47763 + }, + { + "epoch": 0.8253386784628145, + "grad_norm": 2.6679113551308062, + "learning_rate": 1.557830464285567e-06, + "loss": 0.4009, + "step": 47764 + }, + { + "epoch": 0.8253559579762234, + "grad_norm": 2.3498167743202094, + "learning_rate": 1.5575305044508536e-06, + "loss": 0.303, + "step": 47765 + }, + { + "epoch": 0.8253732374896323, + "grad_norm": 1.3522188862584485, + "learning_rate": 1.5572305710585932e-06, + "loss": 0.2204, + "step": 47766 + }, + { + "epoch": 0.8253905170030412, + "grad_norm": 1.277238834650443, + "learning_rate": 1.5569306641097316e-06, + "loss": 0.2963, + "step": 47767 + }, + { + "epoch": 0.8254077965164501, + "grad_norm": 1.3748751199973368, + "learning_rate": 1.556630783605203e-06, + "loss": 0.3245, + "step": 47768 + }, + { + "epoch": 0.825425076029859, + "grad_norm": 2.07688631150452, + "learning_rate": 1.5563309295459471e-06, + "loss": 0.3671, + "step": 47769 + }, + { + "epoch": 0.825442355543268, + "grad_norm": 2.0099625255606455, + "learning_rate": 1.5560311019329078e-06, + "loss": 0.1953, + "step": 47770 + }, + { + "epoch": 0.8254596350566769, + "grad_norm": 1.3838908802674614, + "learning_rate": 1.5557313007670182e-06, + "loss": 0.7475, + "step": 47771 + }, + { + "epoch": 0.8254769145700857, + "grad_norm": 1.3033050934029478, + "learning_rate": 1.5554315260492215e-06, + "loss": 0.2779, + "step": 47772 + }, + { + "epoch": 0.8254941940834946, + "grad_norm": 1.2236109608487442, + "learning_rate": 1.5551317777804542e-06, + "loss": 0.2838, + "step": 47773 + }, + { + "epoch": 0.8255114735969035, + "grad_norm": 0.889123579663258, + "learning_rate": 1.5548320559616548e-06, + "loss": 0.2933, + "step": 47774 + }, + { + "epoch": 0.8255287531103124, + "grad_norm": 1.1798228909492874, + "learning_rate": 1.5545323605937635e-06, + "loss": 0.4571, + "step": 47775 + }, + { + "epoch": 0.8255460326237213, + "grad_norm": 1.289829588693369, + "learning_rate": 1.5542326916777206e-06, + "loss": 0.3528, + "step": 47776 + }, + { + "epoch": 0.8255633121371302, + "grad_norm": 1.0855615413010247, + "learning_rate": 1.5539330492144611e-06, + "loss": 0.321, + "step": 47777 + }, + { + "epoch": 0.8255805916505391, + "grad_norm": 1.7896086501001385, + "learning_rate": 1.5536334332049274e-06, + "loss": 0.4116, + "step": 47778 + }, + { + "epoch": 0.825597871163948, + "grad_norm": 0.9558619976408096, + "learning_rate": 1.5533338436500534e-06, + "loss": 0.3338, + "step": 47779 + }, + { + "epoch": 0.8256151506773569, + "grad_norm": 1.2401709330883453, + "learning_rate": 1.5530342805507792e-06, + "loss": 0.3946, + "step": 47780 + }, + { + "epoch": 0.8256324301907658, + "grad_norm": 2.5544719805762917, + "learning_rate": 1.5527347439080465e-06, + "loss": 0.3619, + "step": 47781 + }, + { + "epoch": 0.8256497097041747, + "grad_norm": 1.258058234294469, + "learning_rate": 1.5524352337227878e-06, + "loss": 0.3142, + "step": 47782 + }, + { + "epoch": 0.8256669892175836, + "grad_norm": 1.4458649329597653, + "learning_rate": 1.5521357499959465e-06, + "loss": 0.395, + "step": 47783 + }, + { + "epoch": 0.8256842687309925, + "grad_norm": 1.334397555647137, + "learning_rate": 1.5518362927284558e-06, + "loss": 0.4534, + "step": 47784 + }, + { + "epoch": 0.8257015482444015, + "grad_norm": 1.3600766172915033, + "learning_rate": 1.5515368619212556e-06, + "loss": 0.2915, + "step": 47785 + }, + { + "epoch": 0.8257188277578104, + "grad_norm": 1.2447121064910343, + "learning_rate": 1.5512374575752841e-06, + "loss": 0.3753, + "step": 47786 + }, + { + "epoch": 0.8257361072712193, + "grad_norm": 0.6827668165215537, + "learning_rate": 1.5509380796914809e-06, + "loss": 0.2626, + "step": 47787 + }, + { + "epoch": 0.8257533867846282, + "grad_norm": 1.095632434084712, + "learning_rate": 1.5506387282707802e-06, + "loss": 0.139, + "step": 47788 + }, + { + "epoch": 0.8257706662980371, + "grad_norm": 0.9018952727685936, + "learning_rate": 1.5503394033141227e-06, + "loss": 0.4374, + "step": 47789 + }, + { + "epoch": 0.825787945811446, + "grad_norm": 1.4831800560693982, + "learning_rate": 1.5500401048224423e-06, + "loss": 0.3518, + "step": 47790 + }, + { + "epoch": 0.8258052253248549, + "grad_norm": 1.0008858274909331, + "learning_rate": 1.549740832796679e-06, + "loss": 0.2363, + "step": 47791 + }, + { + "epoch": 0.8258225048382638, + "grad_norm": 1.5625483450259299, + "learning_rate": 1.549441587237771e-06, + "loss": 0.5052, + "step": 47792 + }, + { + "epoch": 0.8258397843516727, + "grad_norm": 1.07797672848906, + "learning_rate": 1.5491423681466522e-06, + "loss": 0.2314, + "step": 47793 + }, + { + "epoch": 0.8258570638650815, + "grad_norm": 1.1699980219480086, + "learning_rate": 1.548843175524265e-06, + "loss": 0.2672, + "step": 47794 + }, + { + "epoch": 0.8258743433784904, + "grad_norm": 1.4405939876783198, + "learning_rate": 1.5485440093715397e-06, + "loss": 0.4668, + "step": 47795 + }, + { + "epoch": 0.8258916228918993, + "grad_norm": 1.2690273584182366, + "learning_rate": 1.5482448696894193e-06, + "loss": 0.4966, + "step": 47796 + }, + { + "epoch": 0.8259089024053082, + "grad_norm": 1.3383955958822633, + "learning_rate": 1.5479457564788337e-06, + "loss": 0.3087, + "step": 47797 + }, + { + "epoch": 0.8259261819187171, + "grad_norm": 1.1934946598947545, + "learning_rate": 1.5476466697407299e-06, + "loss": 0.2911, + "step": 47798 + }, + { + "epoch": 0.825943461432126, + "grad_norm": 1.3753001175206272, + "learning_rate": 1.5473476094760353e-06, + "loss": 0.3277, + "step": 47799 + }, + { + "epoch": 0.825960740945535, + "grad_norm": 0.8829453243890647, + "learning_rate": 1.5470485756856934e-06, + "loss": 0.554, + "step": 47800 + }, + { + "epoch": 0.8259780204589439, + "grad_norm": 1.609150900699042, + "learning_rate": 1.5467495683706379e-06, + "loss": 0.3767, + "step": 47801 + }, + { + "epoch": 0.8259952999723528, + "grad_norm": 1.7017969792336882, + "learning_rate": 1.5464505875318036e-06, + "loss": 0.4347, + "step": 47802 + }, + { + "epoch": 0.8260125794857617, + "grad_norm": 2.04589599313515, + "learning_rate": 1.5461516331701275e-06, + "loss": 0.3972, + "step": 47803 + }, + { + "epoch": 0.8260298589991706, + "grad_norm": 1.2146579210323805, + "learning_rate": 1.5458527052865468e-06, + "loss": 0.3388, + "step": 47804 + }, + { + "epoch": 0.8260471385125795, + "grad_norm": 1.3052919918150823, + "learning_rate": 1.5455538038820007e-06, + "loss": 0.312, + "step": 47805 + }, + { + "epoch": 0.8260644180259884, + "grad_norm": 0.8509014029045215, + "learning_rate": 1.5452549289574204e-06, + "loss": 0.5223, + "step": 47806 + }, + { + "epoch": 0.8260816975393973, + "grad_norm": 1.4969798125166487, + "learning_rate": 1.5449560805137464e-06, + "loss": 0.4749, + "step": 47807 + }, + { + "epoch": 0.8260989770528062, + "grad_norm": 0.9838230694315476, + "learning_rate": 1.5446572585519093e-06, + "loss": 0.3937, + "step": 47808 + }, + { + "epoch": 0.8261162565662151, + "grad_norm": 2.252726194123548, + "learning_rate": 1.5443584630728493e-06, + "loss": 0.2645, + "step": 47809 + }, + { + "epoch": 0.826133536079624, + "grad_norm": 2.049937556832224, + "learning_rate": 1.544059694077501e-06, + "loss": 0.3987, + "step": 47810 + }, + { + "epoch": 0.8261508155930329, + "grad_norm": 1.1201304558293672, + "learning_rate": 1.5437609515668027e-06, + "loss": 0.2489, + "step": 47811 + }, + { + "epoch": 0.8261680951064418, + "grad_norm": 1.209446343340209, + "learning_rate": 1.5434622355416873e-06, + "loss": 0.2945, + "step": 47812 + }, + { + "epoch": 0.8261853746198508, + "grad_norm": 0.8494610720718994, + "learning_rate": 1.5431635460030891e-06, + "loss": 0.5553, + "step": 47813 + }, + { + "epoch": 0.8262026541332597, + "grad_norm": 1.1880192956590596, + "learning_rate": 1.5428648829519456e-06, + "loss": 0.2354, + "step": 47814 + }, + { + "epoch": 0.8262199336466685, + "grad_norm": 1.6415982572390773, + "learning_rate": 1.5425662463891911e-06, + "loss": 0.457, + "step": 47815 + }, + { + "epoch": 0.8262372131600774, + "grad_norm": 1.093546876316099, + "learning_rate": 1.542267636315764e-06, + "loss": 0.5174, + "step": 47816 + }, + { + "epoch": 0.8262544926734863, + "grad_norm": 0.9982017506025511, + "learning_rate": 1.5419690527325958e-06, + "loss": 0.5493, + "step": 47817 + }, + { + "epoch": 0.8262717721868952, + "grad_norm": 1.160641562060912, + "learning_rate": 1.5416704956406259e-06, + "loss": 0.291, + "step": 47818 + }, + { + "epoch": 0.8262890517003041, + "grad_norm": 1.5915712748934723, + "learning_rate": 1.5413719650407844e-06, + "loss": 0.307, + "step": 47819 + }, + { + "epoch": 0.826306331213713, + "grad_norm": 1.144996756365845, + "learning_rate": 1.5410734609340084e-06, + "loss": 0.4582, + "step": 47820 + }, + { + "epoch": 0.8263236107271219, + "grad_norm": 1.3781478891164165, + "learning_rate": 1.5407749833212338e-06, + "loss": 0.3846, + "step": 47821 + }, + { + "epoch": 0.8263408902405308, + "grad_norm": 1.1763292158070882, + "learning_rate": 1.5404765322033967e-06, + "loss": 0.3969, + "step": 47822 + }, + { + "epoch": 0.8263581697539397, + "grad_norm": 0.8395817752031086, + "learning_rate": 1.5401781075814292e-06, + "loss": 0.3452, + "step": 47823 + }, + { + "epoch": 0.8263754492673486, + "grad_norm": 1.244773084088037, + "learning_rate": 1.5398797094562656e-06, + "loss": 0.3866, + "step": 47824 + }, + { + "epoch": 0.8263927287807575, + "grad_norm": 1.8365080723539502, + "learning_rate": 1.5395813378288404e-06, + "loss": 0.4251, + "step": 47825 + }, + { + "epoch": 0.8264100082941664, + "grad_norm": 1.5482105345925998, + "learning_rate": 1.5392829927000907e-06, + "loss": 0.3739, + "step": 47826 + }, + { + "epoch": 0.8264272878075754, + "grad_norm": 1.2476931655109462, + "learning_rate": 1.5389846740709514e-06, + "loss": 0.3177, + "step": 47827 + }, + { + "epoch": 0.8264445673209843, + "grad_norm": 1.8066751615839367, + "learning_rate": 1.5386863819423536e-06, + "loss": 0.2604, + "step": 47828 + }, + { + "epoch": 0.8264618468343932, + "grad_norm": 2.1841438054121154, + "learning_rate": 1.5383881163152348e-06, + "loss": 0.475, + "step": 47829 + }, + { + "epoch": 0.8264791263478021, + "grad_norm": 1.2187553149944574, + "learning_rate": 1.538089877190525e-06, + "loss": 0.4145, + "step": 47830 + }, + { + "epoch": 0.826496405861211, + "grad_norm": 2.6050688630369834, + "learning_rate": 1.5377916645691604e-06, + "loss": 0.35, + "step": 47831 + }, + { + "epoch": 0.8265136853746199, + "grad_norm": 1.001777752749732, + "learning_rate": 1.537493478452079e-06, + "loss": 0.3055, + "step": 47832 + }, + { + "epoch": 0.8265309648880288, + "grad_norm": 1.017956349731804, + "learning_rate": 1.5371953188402078e-06, + "loss": 0.3129, + "step": 47833 + }, + { + "epoch": 0.8265482444014377, + "grad_norm": 1.4786231897368307, + "learning_rate": 1.5368971857344872e-06, + "loss": 0.2595, + "step": 47834 + }, + { + "epoch": 0.8265655239148466, + "grad_norm": 1.2385950708658822, + "learning_rate": 1.5365990791358444e-06, + "loss": 0.4509, + "step": 47835 + }, + { + "epoch": 0.8265828034282554, + "grad_norm": 1.4937426211835765, + "learning_rate": 1.5363009990452194e-06, + "loss": 0.5713, + "step": 47836 + }, + { + "epoch": 0.8266000829416643, + "grad_norm": 1.1043967936655688, + "learning_rate": 1.5360029454635383e-06, + "loss": 0.3095, + "step": 47837 + }, + { + "epoch": 0.8266173624550732, + "grad_norm": 1.3593917551457453, + "learning_rate": 1.5357049183917439e-06, + "loss": 0.3122, + "step": 47838 + }, + { + "epoch": 0.8266346419684821, + "grad_norm": 1.3782718909047578, + "learning_rate": 1.5354069178307618e-06, + "loss": 0.2146, + "step": 47839 + }, + { + "epoch": 0.826651921481891, + "grad_norm": 1.599464122684047, + "learning_rate": 1.5351089437815313e-06, + "loss": 0.2654, + "step": 47840 + }, + { + "epoch": 0.8266692009953, + "grad_norm": 2.029265609942597, + "learning_rate": 1.534810996244981e-06, + "loss": 0.3803, + "step": 47841 + }, + { + "epoch": 0.8266864805087089, + "grad_norm": 0.885827154676962, + "learning_rate": 1.5345130752220482e-06, + "loss": 0.3043, + "step": 47842 + }, + { + "epoch": 0.8267037600221178, + "grad_norm": 1.0907074070223672, + "learning_rate": 1.5342151807136618e-06, + "loss": 0.3099, + "step": 47843 + }, + { + "epoch": 0.8267210395355267, + "grad_norm": 1.0627019051755653, + "learning_rate": 1.5339173127207564e-06, + "loss": 0.2121, + "step": 47844 + }, + { + "epoch": 0.8267383190489356, + "grad_norm": 0.8462981644230937, + "learning_rate": 1.5336194712442676e-06, + "loss": 0.3042, + "step": 47845 + }, + { + "epoch": 0.8267555985623445, + "grad_norm": 1.767035299046319, + "learning_rate": 1.5333216562851238e-06, + "loss": 0.4134, + "step": 47846 + }, + { + "epoch": 0.8267728780757534, + "grad_norm": 1.0009680725136294, + "learning_rate": 1.5330238678442621e-06, + "loss": 0.4976, + "step": 47847 + }, + { + "epoch": 0.8267901575891623, + "grad_norm": 0.9853934612173257, + "learning_rate": 1.5327261059226106e-06, + "loss": 0.3613, + "step": 47848 + }, + { + "epoch": 0.8268074371025712, + "grad_norm": 1.8449886017129797, + "learning_rate": 1.5324283705211052e-06, + "loss": 0.2755, + "step": 47849 + }, + { + "epoch": 0.8268247166159801, + "grad_norm": 1.4640457358203474, + "learning_rate": 1.5321306616406772e-06, + "loss": 0.2353, + "step": 47850 + }, + { + "epoch": 0.826841996129389, + "grad_norm": 2.1862584854800327, + "learning_rate": 1.5318329792822616e-06, + "loss": 0.2531, + "step": 47851 + }, + { + "epoch": 0.8268592756427979, + "grad_norm": 1.9640362679720273, + "learning_rate": 1.531535323446789e-06, + "loss": 0.4668, + "step": 47852 + }, + { + "epoch": 0.8268765551562068, + "grad_norm": 1.8777499325783271, + "learning_rate": 1.5312376941351892e-06, + "loss": 0.2763, + "step": 47853 + }, + { + "epoch": 0.8268938346696157, + "grad_norm": 1.1519682884067033, + "learning_rate": 1.5309400913483962e-06, + "loss": 0.2716, + "step": 47854 + }, + { + "epoch": 0.8269111141830247, + "grad_norm": 1.1101304270563033, + "learning_rate": 1.5306425150873428e-06, + "loss": 0.3292, + "step": 47855 + }, + { + "epoch": 0.8269283936964336, + "grad_norm": 1.7107669468433575, + "learning_rate": 1.5303449653529634e-06, + "loss": 0.3666, + "step": 47856 + }, + { + "epoch": 0.8269456732098424, + "grad_norm": 1.4780935233314878, + "learning_rate": 1.5300474421461842e-06, + "loss": 0.3805, + "step": 47857 + }, + { + "epoch": 0.8269629527232513, + "grad_norm": 1.5105115885096219, + "learning_rate": 1.529749945467942e-06, + "loss": 0.5148, + "step": 47858 + }, + { + "epoch": 0.8269802322366602, + "grad_norm": 1.1632386155510575, + "learning_rate": 1.5294524753191652e-06, + "loss": 0.2298, + "step": 47859 + }, + { + "epoch": 0.8269975117500691, + "grad_norm": 1.186131119550298, + "learning_rate": 1.5291550317007863e-06, + "loss": 0.4284, + "step": 47860 + }, + { + "epoch": 0.827014791263478, + "grad_norm": 1.464118985876257, + "learning_rate": 1.528857614613738e-06, + "loss": 0.4448, + "step": 47861 + }, + { + "epoch": 0.8270320707768869, + "grad_norm": 1.749657643449089, + "learning_rate": 1.5285602240589548e-06, + "loss": 0.3124, + "step": 47862 + }, + { + "epoch": 0.8270493502902958, + "grad_norm": 1.8494820656189563, + "learning_rate": 1.5282628600373628e-06, + "loss": 0.23, + "step": 47863 + }, + { + "epoch": 0.8270666298037047, + "grad_norm": 1.1336616147761602, + "learning_rate": 1.5279655225498945e-06, + "loss": 0.2144, + "step": 47864 + }, + { + "epoch": 0.8270839093171136, + "grad_norm": 1.442439634978903, + "learning_rate": 1.5276682115974818e-06, + "loss": 0.3907, + "step": 47865 + }, + { + "epoch": 0.8271011888305225, + "grad_norm": 0.9698029088360443, + "learning_rate": 1.5273709271810555e-06, + "loss": 0.6636, + "step": 47866 + }, + { + "epoch": 0.8271184683439314, + "grad_norm": 0.9233971133102138, + "learning_rate": 1.5270736693015497e-06, + "loss": 0.2855, + "step": 47867 + }, + { + "epoch": 0.8271357478573403, + "grad_norm": 1.4296117874047412, + "learning_rate": 1.5267764379598915e-06, + "loss": 0.4725, + "step": 47868 + }, + { + "epoch": 0.8271530273707492, + "grad_norm": 1.3261607139890772, + "learning_rate": 1.5264792331570154e-06, + "loss": 0.4017, + "step": 47869 + }, + { + "epoch": 0.8271703068841582, + "grad_norm": 1.595415272140803, + "learning_rate": 1.5261820548938478e-06, + "loss": 0.3765, + "step": 47870 + }, + { + "epoch": 0.8271875863975671, + "grad_norm": 1.4444390439720365, + "learning_rate": 1.5258849031713218e-06, + "loss": 0.4834, + "step": 47871 + }, + { + "epoch": 0.827204865910976, + "grad_norm": 1.4192773168109245, + "learning_rate": 1.5255877779903694e-06, + "loss": 0.4286, + "step": 47872 + }, + { + "epoch": 0.8272221454243849, + "grad_norm": 1.3708516049165058, + "learning_rate": 1.5252906793519219e-06, + "loss": 0.2708, + "step": 47873 + }, + { + "epoch": 0.8272394249377938, + "grad_norm": 1.5934950154347516, + "learning_rate": 1.5249936072569072e-06, + "loss": 0.2224, + "step": 47874 + }, + { + "epoch": 0.8272567044512027, + "grad_norm": 1.7309933412340928, + "learning_rate": 1.5246965617062547e-06, + "loss": 0.3649, + "step": 47875 + }, + { + "epoch": 0.8272739839646116, + "grad_norm": 1.5245833304614171, + "learning_rate": 1.5243995427008994e-06, + "loss": 0.3659, + "step": 47876 + }, + { + "epoch": 0.8272912634780205, + "grad_norm": 1.2420664801839323, + "learning_rate": 1.5241025502417638e-06, + "loss": 0.3541, + "step": 47877 + }, + { + "epoch": 0.8273085429914293, + "grad_norm": 1.0143212791671097, + "learning_rate": 1.5238055843297883e-06, + "loss": 0.4112, + "step": 47878 + }, + { + "epoch": 0.8273258225048382, + "grad_norm": 1.0486037341839727, + "learning_rate": 1.5235086449658953e-06, + "loss": 0.3749, + "step": 47879 + }, + { + "epoch": 0.8273431020182471, + "grad_norm": 2.2110947470417637, + "learning_rate": 1.5232117321510197e-06, + "loss": 0.3121, + "step": 47880 + }, + { + "epoch": 0.827360381531656, + "grad_norm": 1.6971990150950351, + "learning_rate": 1.5229148458860865e-06, + "loss": 0.3026, + "step": 47881 + }, + { + "epoch": 0.8273776610450649, + "grad_norm": 1.3838229073052128, + "learning_rate": 1.5226179861720314e-06, + "loss": 0.3661, + "step": 47882 + }, + { + "epoch": 0.8273949405584738, + "grad_norm": 1.427401442419456, + "learning_rate": 1.5223211530097781e-06, + "loss": 0.2142, + "step": 47883 + }, + { + "epoch": 0.8274122200718828, + "grad_norm": 1.0967273094711127, + "learning_rate": 1.5220243464002593e-06, + "loss": 0.1459, + "step": 47884 + }, + { + "epoch": 0.8274294995852917, + "grad_norm": 1.0733592129572724, + "learning_rate": 1.5217275663444064e-06, + "loss": 0.5044, + "step": 47885 + }, + { + "epoch": 0.8274467790987006, + "grad_norm": 1.1570012839333887, + "learning_rate": 1.5214308128431454e-06, + "loss": 0.2808, + "step": 47886 + }, + { + "epoch": 0.8274640586121095, + "grad_norm": 1.4822267819129873, + "learning_rate": 1.52113408589741e-06, + "loss": 0.2504, + "step": 47887 + }, + { + "epoch": 0.8274813381255184, + "grad_norm": 1.2746023216528979, + "learning_rate": 1.5208373855081237e-06, + "loss": 0.3087, + "step": 47888 + }, + { + "epoch": 0.8274986176389273, + "grad_norm": 1.1947694763896262, + "learning_rate": 1.5205407116762206e-06, + "loss": 0.465, + "step": 47889 + }, + { + "epoch": 0.8275158971523362, + "grad_norm": 1.2114805254362888, + "learning_rate": 1.5202440644026272e-06, + "loss": 0.2451, + "step": 47890 + }, + { + "epoch": 0.8275331766657451, + "grad_norm": 1.5103571977354764, + "learning_rate": 1.5199474436882766e-06, + "loss": 0.3295, + "step": 47891 + }, + { + "epoch": 0.827550456179154, + "grad_norm": 1.3189666771483635, + "learning_rate": 1.5196508495340923e-06, + "loss": 0.4662, + "step": 47892 + }, + { + "epoch": 0.8275677356925629, + "grad_norm": 1.7800756524445238, + "learning_rate": 1.519354281941009e-06, + "loss": 0.4129, + "step": 47893 + }, + { + "epoch": 0.8275850152059718, + "grad_norm": 1.1487829486395484, + "learning_rate": 1.5190577409099495e-06, + "loss": 0.4728, + "step": 47894 + }, + { + "epoch": 0.8276022947193807, + "grad_norm": 0.8366730691346702, + "learning_rate": 1.5187612264418462e-06, + "loss": 0.5007, + "step": 47895 + }, + { + "epoch": 0.8276195742327896, + "grad_norm": 1.2363381288190838, + "learning_rate": 1.5184647385376295e-06, + "loss": 0.3153, + "step": 47896 + }, + { + "epoch": 0.8276368537461986, + "grad_norm": 1.3574482974664375, + "learning_rate": 1.5181682771982232e-06, + "loss": 0.2584, + "step": 47897 + }, + { + "epoch": 0.8276541332596075, + "grad_norm": 2.0072086487146876, + "learning_rate": 1.517871842424562e-06, + "loss": 0.3894, + "step": 47898 + }, + { + "epoch": 0.8276714127730163, + "grad_norm": 2.064161798799829, + "learning_rate": 1.5175754342175675e-06, + "loss": 0.3767, + "step": 47899 + }, + { + "epoch": 0.8276886922864252, + "grad_norm": 1.2728688595688722, + "learning_rate": 1.517279052578171e-06, + "loss": 0.6859, + "step": 47900 + }, + { + "epoch": 0.8277059717998341, + "grad_norm": 1.0985126142124142, + "learning_rate": 1.516982697507302e-06, + "loss": 0.3792, + "step": 47901 + }, + { + "epoch": 0.827723251313243, + "grad_norm": 1.6203811941965203, + "learning_rate": 1.5166863690058897e-06, + "loss": 0.1973, + "step": 47902 + }, + { + "epoch": 0.8277405308266519, + "grad_norm": 1.1303328306154317, + "learning_rate": 1.5163900670748577e-06, + "loss": 0.8963, + "step": 47903 + }, + { + "epoch": 0.8277578103400608, + "grad_norm": 1.400082831645165, + "learning_rate": 1.5160937917151398e-06, + "loss": 0.3211, + "step": 47904 + }, + { + "epoch": 0.8277750898534697, + "grad_norm": 1.3810032700674577, + "learning_rate": 1.5157975429276583e-06, + "loss": 0.3027, + "step": 47905 + }, + { + "epoch": 0.8277923693668786, + "grad_norm": 2.4136288301043214, + "learning_rate": 1.5155013207133439e-06, + "loss": 0.161, + "step": 47906 + }, + { + "epoch": 0.8278096488802875, + "grad_norm": 0.9275999854158575, + "learning_rate": 1.5152051250731258e-06, + "loss": 0.5876, + "step": 47907 + }, + { + "epoch": 0.8278269283936964, + "grad_norm": 1.1343365077801686, + "learning_rate": 1.5149089560079288e-06, + "loss": 0.1482, + "step": 47908 + }, + { + "epoch": 0.8278442079071053, + "grad_norm": 1.3942328085484093, + "learning_rate": 1.5146128135186832e-06, + "loss": 0.2899, + "step": 47909 + }, + { + "epoch": 0.8278614874205142, + "grad_norm": 1.303095540703644, + "learning_rate": 1.514316697606314e-06, + "loss": 0.3878, + "step": 47910 + }, + { + "epoch": 0.8278787669339231, + "grad_norm": 1.4105805817851083, + "learning_rate": 1.514020608271749e-06, + "loss": 0.3693, + "step": 47911 + }, + { + "epoch": 0.8278960464473321, + "grad_norm": 0.8779570857257751, + "learning_rate": 1.5137245455159155e-06, + "loss": 0.496, + "step": 47912 + }, + { + "epoch": 0.827913325960741, + "grad_norm": 1.3654911621405095, + "learning_rate": 1.5134285093397449e-06, + "loss": 0.3288, + "step": 47913 + }, + { + "epoch": 0.8279306054741499, + "grad_norm": 0.8712832065468982, + "learning_rate": 1.513132499744161e-06, + "loss": 0.6416, + "step": 47914 + }, + { + "epoch": 0.8279478849875588, + "grad_norm": 1.8969747173243359, + "learning_rate": 1.5128365167300895e-06, + "loss": 0.3218, + "step": 47915 + }, + { + "epoch": 0.8279651645009677, + "grad_norm": 0.9591712610519075, + "learning_rate": 1.51254056029846e-06, + "loss": 0.3089, + "step": 47916 + }, + { + "epoch": 0.8279824440143766, + "grad_norm": 1.2740974919337187, + "learning_rate": 1.512244630450197e-06, + "loss": 0.3139, + "step": 47917 + }, + { + "epoch": 0.8279997235277855, + "grad_norm": 1.2300102512996356, + "learning_rate": 1.5119487271862288e-06, + "loss": 0.3435, + "step": 47918 + }, + { + "epoch": 0.8280170030411944, + "grad_norm": 1.863717783089182, + "learning_rate": 1.5116528505074823e-06, + "loss": 0.3054, + "step": 47919 + }, + { + "epoch": 0.8280342825546032, + "grad_norm": 1.328351732751182, + "learning_rate": 1.511357000414887e-06, + "loss": 0.4091, + "step": 47920 + }, + { + "epoch": 0.8280515620680121, + "grad_norm": 1.1367348434244469, + "learning_rate": 1.5110611769093631e-06, + "loss": 0.412, + "step": 47921 + }, + { + "epoch": 0.828068841581421, + "grad_norm": 4.533920864330871, + "learning_rate": 1.5107653799918443e-06, + "loss": 0.3486, + "step": 47922 + }, + { + "epoch": 0.8280861210948299, + "grad_norm": 1.130315705996927, + "learning_rate": 1.5104696096632498e-06, + "loss": 0.3567, + "step": 47923 + }, + { + "epoch": 0.8281034006082388, + "grad_norm": 0.8778610755284583, + "learning_rate": 1.5101738659245101e-06, + "loss": 0.3064, + "step": 47924 + }, + { + "epoch": 0.8281206801216477, + "grad_norm": 1.2236259707677213, + "learning_rate": 1.5098781487765546e-06, + "loss": 0.3356, + "step": 47925 + }, + { + "epoch": 0.8281379596350567, + "grad_norm": 1.107060342133613, + "learning_rate": 1.5095824582203023e-06, + "loss": 0.4833, + "step": 47926 + }, + { + "epoch": 0.8281552391484656, + "grad_norm": 1.1848039165594502, + "learning_rate": 1.5092867942566858e-06, + "loss": 0.2856, + "step": 47927 + }, + { + "epoch": 0.8281725186618745, + "grad_norm": 1.945011653714455, + "learning_rate": 1.5089911568866256e-06, + "loss": 0.4458, + "step": 47928 + }, + { + "epoch": 0.8281897981752834, + "grad_norm": 1.2087416918262697, + "learning_rate": 1.5086955461110509e-06, + "loss": 0.738, + "step": 47929 + }, + { + "epoch": 0.8282070776886923, + "grad_norm": 0.9420787427366715, + "learning_rate": 1.5083999619308875e-06, + "loss": 0.2118, + "step": 47930 + }, + { + "epoch": 0.8282243572021012, + "grad_norm": 1.5765083142457745, + "learning_rate": 1.5081044043470617e-06, + "loss": 0.3863, + "step": 47931 + }, + { + "epoch": 0.8282416367155101, + "grad_norm": 1.1381861414003083, + "learning_rate": 1.5078088733604967e-06, + "loss": 0.4548, + "step": 47932 + }, + { + "epoch": 0.828258916228919, + "grad_norm": 1.3774466262443679, + "learning_rate": 1.507513368972122e-06, + "loss": 0.2306, + "step": 47933 + }, + { + "epoch": 0.8282761957423279, + "grad_norm": 1.9541556820953752, + "learning_rate": 1.5072178911828595e-06, + "loss": 0.2812, + "step": 47934 + }, + { + "epoch": 0.8282934752557368, + "grad_norm": 1.267587901602748, + "learning_rate": 1.5069224399936343e-06, + "loss": 0.4984, + "step": 47935 + }, + { + "epoch": 0.8283107547691457, + "grad_norm": 1.1153171880781538, + "learning_rate": 1.5066270154053764e-06, + "loss": 0.3979, + "step": 47936 + }, + { + "epoch": 0.8283280342825546, + "grad_norm": 1.4627853024801898, + "learning_rate": 1.5063316174190068e-06, + "loss": 0.3427, + "step": 47937 + }, + { + "epoch": 0.8283453137959635, + "grad_norm": 1.0905193005746812, + "learning_rate": 1.5060362460354539e-06, + "loss": 0.2586, + "step": 47938 + }, + { + "epoch": 0.8283625933093725, + "grad_norm": 0.9339158237908647, + "learning_rate": 1.5057409012556378e-06, + "loss": 0.2985, + "step": 47939 + }, + { + "epoch": 0.8283798728227814, + "grad_norm": 0.6530079718128262, + "learning_rate": 1.505445583080487e-06, + "loss": 0.8076, + "step": 47940 + }, + { + "epoch": 0.8283971523361903, + "grad_norm": 1.4987199936982898, + "learning_rate": 1.5051502915109262e-06, + "loss": 0.435, + "step": 47941 + }, + { + "epoch": 0.8284144318495991, + "grad_norm": 1.3471293509082696, + "learning_rate": 1.5048550265478835e-06, + "loss": 0.2829, + "step": 47942 + }, + { + "epoch": 0.828431711363008, + "grad_norm": 1.8289652188162013, + "learning_rate": 1.504559788192277e-06, + "loss": 0.4396, + "step": 47943 + }, + { + "epoch": 0.8284489908764169, + "grad_norm": 1.6706622195618839, + "learning_rate": 1.504264576445038e-06, + "loss": 0.3957, + "step": 47944 + }, + { + "epoch": 0.8284662703898258, + "grad_norm": 1.5197420701078759, + "learning_rate": 1.5039693913070852e-06, + "loss": 0.4525, + "step": 47945 + }, + { + "epoch": 0.8284835499032347, + "grad_norm": 1.3139323110977168, + "learning_rate": 1.5036742327793463e-06, + "loss": 0.2633, + "step": 47946 + }, + { + "epoch": 0.8285008294166436, + "grad_norm": 1.1996856577119, + "learning_rate": 1.5033791008627474e-06, + "loss": 0.4131, + "step": 47947 + }, + { + "epoch": 0.8285181089300525, + "grad_norm": 0.8050097332536778, + "learning_rate": 1.5030839955582088e-06, + "loss": 0.5916, + "step": 47948 + }, + { + "epoch": 0.8285353884434614, + "grad_norm": 1.4741786340679606, + "learning_rate": 1.5027889168666598e-06, + "loss": 0.246, + "step": 47949 + }, + { + "epoch": 0.8285526679568703, + "grad_norm": 1.5482548663896214, + "learning_rate": 1.5024938647890186e-06, + "loss": 0.2865, + "step": 47950 + }, + { + "epoch": 0.8285699474702792, + "grad_norm": 0.9955751563009877, + "learning_rate": 1.5021988393262122e-06, + "loss": 0.6982, + "step": 47951 + }, + { + "epoch": 0.8285872269836881, + "grad_norm": 2.180027460292477, + "learning_rate": 1.5019038404791652e-06, + "loss": 0.3238, + "step": 47952 + }, + { + "epoch": 0.828604506497097, + "grad_norm": 1.0194529910298649, + "learning_rate": 1.5016088682488027e-06, + "loss": 0.2445, + "step": 47953 + }, + { + "epoch": 0.828621786010506, + "grad_norm": 1.077741381054403, + "learning_rate": 1.5013139226360462e-06, + "loss": 0.4005, + "step": 47954 + }, + { + "epoch": 0.8286390655239149, + "grad_norm": 0.7536953092826653, + "learning_rate": 1.5010190036418215e-06, + "loss": 0.8727, + "step": 47955 + }, + { + "epoch": 0.8286563450373238, + "grad_norm": 1.9361212696346446, + "learning_rate": 1.5007241112670523e-06, + "loss": 0.3734, + "step": 47956 + }, + { + "epoch": 0.8286736245507327, + "grad_norm": 0.9417102294310901, + "learning_rate": 1.500429245512658e-06, + "loss": 0.4425, + "step": 47957 + }, + { + "epoch": 0.8286909040641416, + "grad_norm": 2.027287481897408, + "learning_rate": 1.5001344063795653e-06, + "loss": 0.3427, + "step": 47958 + }, + { + "epoch": 0.8287081835775505, + "grad_norm": 1.4530197767994477, + "learning_rate": 1.499839593868697e-06, + "loss": 0.5097, + "step": 47959 + }, + { + "epoch": 0.8287254630909594, + "grad_norm": 1.3065051670601484, + "learning_rate": 1.4995448079809804e-06, + "loss": 0.379, + "step": 47960 + }, + { + "epoch": 0.8287427426043683, + "grad_norm": 1.184149658659292, + "learning_rate": 1.4992500487173334e-06, + "loss": 0.2018, + "step": 47961 + }, + { + "epoch": 0.8287600221177772, + "grad_norm": 2.12968866916928, + "learning_rate": 1.4989553160786829e-06, + "loss": 0.2738, + "step": 47962 + }, + { + "epoch": 0.828777301631186, + "grad_norm": 1.4046036443954728, + "learning_rate": 1.4986606100659485e-06, + "loss": 0.1864, + "step": 47963 + }, + { + "epoch": 0.8287945811445949, + "grad_norm": 1.7664035414584192, + "learning_rate": 1.4983659306800546e-06, + "loss": 0.2054, + "step": 47964 + }, + { + "epoch": 0.8288118606580038, + "grad_norm": 2.9921481974408457, + "learning_rate": 1.4980712779219253e-06, + "loss": 0.4779, + "step": 47965 + }, + { + "epoch": 0.8288291401714127, + "grad_norm": 1.3950738876356452, + "learning_rate": 1.497776651792484e-06, + "loss": 0.2655, + "step": 47966 + }, + { + "epoch": 0.8288464196848216, + "grad_norm": 0.9633047219887346, + "learning_rate": 1.4974820522926537e-06, + "loss": 0.2531, + "step": 47967 + }, + { + "epoch": 0.8288636991982306, + "grad_norm": 1.1871136264742872, + "learning_rate": 1.4971874794233532e-06, + "loss": 0.3159, + "step": 47968 + }, + { + "epoch": 0.8288809787116395, + "grad_norm": 1.2907171437877267, + "learning_rate": 1.4968929331855075e-06, + "loss": 0.2302, + "step": 47969 + }, + { + "epoch": 0.8288982582250484, + "grad_norm": 0.8662097183365243, + "learning_rate": 1.4965984135800393e-06, + "loss": 0.2242, + "step": 47970 + }, + { + "epoch": 0.8289155377384573, + "grad_norm": 0.5875772612642772, + "learning_rate": 1.4963039206078734e-06, + "loss": 0.4686, + "step": 47971 + }, + { + "epoch": 0.8289328172518662, + "grad_norm": 1.6240661626487427, + "learning_rate": 1.4960094542699277e-06, + "loss": 0.681, + "step": 47972 + }, + { + "epoch": 0.8289500967652751, + "grad_norm": 0.8748915745334004, + "learning_rate": 1.4957150145671283e-06, + "loss": 0.3921, + "step": 47973 + }, + { + "epoch": 0.828967376278684, + "grad_norm": 0.8451024862357784, + "learning_rate": 1.4954206015003947e-06, + "loss": 0.2628, + "step": 47974 + }, + { + "epoch": 0.8289846557920929, + "grad_norm": 1.0314698924717687, + "learning_rate": 1.4951262150706491e-06, + "loss": 0.3564, + "step": 47975 + }, + { + "epoch": 0.8290019353055018, + "grad_norm": 0.9407031446815288, + "learning_rate": 1.4948318552788177e-06, + "loss": 0.3631, + "step": 47976 + }, + { + "epoch": 0.8290192148189107, + "grad_norm": 1.822629468887528, + "learning_rate": 1.4945375221258164e-06, + "loss": 0.2946, + "step": 47977 + }, + { + "epoch": 0.8290364943323196, + "grad_norm": 0.9907435132608624, + "learning_rate": 1.494243215612573e-06, + "loss": 0.338, + "step": 47978 + }, + { + "epoch": 0.8290537738457285, + "grad_norm": 1.0358686967687525, + "learning_rate": 1.4939489357400027e-06, + "loss": 0.3874, + "step": 47979 + }, + { + "epoch": 0.8290710533591374, + "grad_norm": 1.255707241026068, + "learning_rate": 1.4936546825090314e-06, + "loss": 0.4247, + "step": 47980 + }, + { + "epoch": 0.8290883328725464, + "grad_norm": 1.7330006306957322, + "learning_rate": 1.4933604559205806e-06, + "loss": 0.386, + "step": 47981 + }, + { + "epoch": 0.8291056123859553, + "grad_norm": 1.4216496488466481, + "learning_rate": 1.4930662559755726e-06, + "loss": 0.5938, + "step": 47982 + }, + { + "epoch": 0.8291228918993642, + "grad_norm": 1.7808077532434772, + "learning_rate": 1.4927720826749259e-06, + "loss": 0.357, + "step": 47983 + }, + { + "epoch": 0.829140171412773, + "grad_norm": 1.1022795378989438, + "learning_rate": 1.4924779360195662e-06, + "loss": 0.2664, + "step": 47984 + }, + { + "epoch": 0.8291574509261819, + "grad_norm": 1.8958706765718025, + "learning_rate": 1.4921838160104096e-06, + "loss": 0.4537, + "step": 47985 + }, + { + "epoch": 0.8291747304395908, + "grad_norm": 1.0365357071787944, + "learning_rate": 1.4918897226483796e-06, + "loss": 0.4313, + "step": 47986 + }, + { + "epoch": 0.8291920099529997, + "grad_norm": 1.386222076792918, + "learning_rate": 1.4915956559344002e-06, + "loss": 0.4355, + "step": 47987 + }, + { + "epoch": 0.8292092894664086, + "grad_norm": 1.9817646398205384, + "learning_rate": 1.491301615869387e-06, + "loss": 0.3529, + "step": 47988 + }, + { + "epoch": 0.8292265689798175, + "grad_norm": 1.4895505752569802, + "learning_rate": 1.4910076024542664e-06, + "loss": 0.2451, + "step": 47989 + }, + { + "epoch": 0.8292438484932264, + "grad_norm": 0.9115152828526047, + "learning_rate": 1.4907136156899548e-06, + "loss": 0.195, + "step": 47990 + }, + { + "epoch": 0.8292611280066353, + "grad_norm": 1.0611073058869265, + "learning_rate": 1.4904196555773743e-06, + "loss": 0.2662, + "step": 47991 + }, + { + "epoch": 0.8292784075200442, + "grad_norm": 1.3979720881937445, + "learning_rate": 1.4901257221174458e-06, + "loss": 0.2801, + "step": 47992 + }, + { + "epoch": 0.8292956870334531, + "grad_norm": 1.012232467224698, + "learning_rate": 1.4898318153110936e-06, + "loss": 0.3933, + "step": 47993 + }, + { + "epoch": 0.829312966546862, + "grad_norm": 1.6789116098154215, + "learning_rate": 1.489537935159232e-06, + "loss": 0.3181, + "step": 47994 + }, + { + "epoch": 0.829330246060271, + "grad_norm": 0.9312629110676913, + "learning_rate": 1.4892440816627861e-06, + "loss": 0.2596, + "step": 47995 + }, + { + "epoch": 0.8293475255736799, + "grad_norm": 1.0028761833442905, + "learning_rate": 1.4889502548226732e-06, + "loss": 0.2937, + "step": 47996 + }, + { + "epoch": 0.8293648050870888, + "grad_norm": 1.2481856196717582, + "learning_rate": 1.4886564546398163e-06, + "loss": 0.3079, + "step": 47997 + }, + { + "epoch": 0.8293820846004977, + "grad_norm": 1.5803935168178165, + "learning_rate": 1.488362681115133e-06, + "loss": 0.44, + "step": 47998 + }, + { + "epoch": 0.8293993641139066, + "grad_norm": 1.3178495273361086, + "learning_rate": 1.4880689342495435e-06, + "loss": 0.2861, + "step": 47999 + }, + { + "epoch": 0.8294166436273155, + "grad_norm": 1.55849245060195, + "learning_rate": 1.4877752140439727e-06, + "loss": 0.4133, + "step": 48000 + }, + { + "epoch": 0.8294339231407244, + "grad_norm": 1.1131939092330572, + "learning_rate": 1.4874815204993332e-06, + "loss": 0.3511, + "step": 48001 + }, + { + "epoch": 0.8294512026541333, + "grad_norm": 1.155966157524551, + "learning_rate": 1.487187853616552e-06, + "loss": 0.4329, + "step": 48002 + }, + { + "epoch": 0.8294684821675422, + "grad_norm": 1.573689054001221, + "learning_rate": 1.4868942133965425e-06, + "loss": 0.5006, + "step": 48003 + }, + { + "epoch": 0.8294857616809511, + "grad_norm": 1.4391692338986621, + "learning_rate": 1.4866005998402267e-06, + "loss": 0.3436, + "step": 48004 + }, + { + "epoch": 0.8295030411943599, + "grad_norm": 1.6744802421370706, + "learning_rate": 1.4863070129485258e-06, + "loss": 0.2982, + "step": 48005 + }, + { + "epoch": 0.8295203207077688, + "grad_norm": 1.8116928398551952, + "learning_rate": 1.4860134527223602e-06, + "loss": 0.3225, + "step": 48006 + }, + { + "epoch": 0.8295376002211777, + "grad_norm": 1.3793699287830885, + "learning_rate": 1.485719919162647e-06, + "loss": 0.4059, + "step": 48007 + }, + { + "epoch": 0.8295548797345866, + "grad_norm": 1.0803907048646075, + "learning_rate": 1.4854264122703044e-06, + "loss": 0.3271, + "step": 48008 + }, + { + "epoch": 0.8295721592479955, + "grad_norm": 1.124410765130885, + "learning_rate": 1.4851329320462527e-06, + "loss": 0.2669, + "step": 48009 + }, + { + "epoch": 0.8295894387614045, + "grad_norm": 1.0372455651192884, + "learning_rate": 1.4848394784914121e-06, + "loss": 0.2218, + "step": 48010 + }, + { + "epoch": 0.8296067182748134, + "grad_norm": 1.0511746553817654, + "learning_rate": 1.4845460516067045e-06, + "loss": 0.5375, + "step": 48011 + }, + { + "epoch": 0.8296239977882223, + "grad_norm": 0.9351055283473855, + "learning_rate": 1.4842526513930421e-06, + "loss": 0.2356, + "step": 48012 + }, + { + "epoch": 0.8296412773016312, + "grad_norm": 1.5666900666516135, + "learning_rate": 1.4839592778513512e-06, + "loss": 0.2637, + "step": 48013 + }, + { + "epoch": 0.8296585568150401, + "grad_norm": 4.481955922996693, + "learning_rate": 1.483665930982544e-06, + "loss": 0.441, + "step": 48014 + }, + { + "epoch": 0.829675836328449, + "grad_norm": 1.4554203806857717, + "learning_rate": 1.4833726107875425e-06, + "loss": 0.2906, + "step": 48015 + }, + { + "epoch": 0.8296931158418579, + "grad_norm": 1.2095648961155603, + "learning_rate": 1.4830793172672652e-06, + "loss": 0.3959, + "step": 48016 + }, + { + "epoch": 0.8297103953552668, + "grad_norm": 0.728457246225552, + "learning_rate": 1.4827860504226333e-06, + "loss": 0.7823, + "step": 48017 + }, + { + "epoch": 0.8297276748686757, + "grad_norm": 1.2954692207671648, + "learning_rate": 1.4824928102545622e-06, + "loss": 0.5364, + "step": 48018 + }, + { + "epoch": 0.8297449543820846, + "grad_norm": 2.279142263235656, + "learning_rate": 1.482199596763969e-06, + "loss": 0.3325, + "step": 48019 + }, + { + "epoch": 0.8297622338954935, + "grad_norm": 2.167848857445627, + "learning_rate": 1.4819064099517743e-06, + "loss": 0.3333, + "step": 48020 + }, + { + "epoch": 0.8297795134089024, + "grad_norm": 1.7198406537105069, + "learning_rate": 1.4816132498188962e-06, + "loss": 0.5455, + "step": 48021 + }, + { + "epoch": 0.8297967929223113, + "grad_norm": 1.2426938320746124, + "learning_rate": 1.481320116366254e-06, + "loss": 0.3173, + "step": 48022 + }, + { + "epoch": 0.8298140724357203, + "grad_norm": 1.5190991075948699, + "learning_rate": 1.4810270095947632e-06, + "loss": 0.3082, + "step": 48023 + }, + { + "epoch": 0.8298313519491292, + "grad_norm": 1.1644912537499401, + "learning_rate": 1.4807339295053447e-06, + "loss": 0.584, + "step": 48024 + }, + { + "epoch": 0.8298486314625381, + "grad_norm": 1.379782240015133, + "learning_rate": 1.4804408760989143e-06, + "loss": 0.4867, + "step": 48025 + }, + { + "epoch": 0.8298659109759469, + "grad_norm": 0.9138070536184181, + "learning_rate": 1.4801478493763887e-06, + "loss": 0.5506, + "step": 48026 + }, + { + "epoch": 0.8298831904893558, + "grad_norm": 1.2859571875971738, + "learning_rate": 1.479854849338689e-06, + "loss": 0.3367, + "step": 48027 + }, + { + "epoch": 0.8299004700027647, + "grad_norm": 0.9232742371466633, + "learning_rate": 1.4795618759867325e-06, + "loss": 0.2245, + "step": 48028 + }, + { + "epoch": 0.8299177495161736, + "grad_norm": 1.250825792335532, + "learning_rate": 1.4792689293214357e-06, + "loss": 0.2886, + "step": 48029 + }, + { + "epoch": 0.8299350290295825, + "grad_norm": 1.2086492683123735, + "learning_rate": 1.4789760093437144e-06, + "loss": 0.3011, + "step": 48030 + }, + { + "epoch": 0.8299523085429914, + "grad_norm": 1.3944675996908449, + "learning_rate": 1.4786831160544878e-06, + "loss": 0.3797, + "step": 48031 + }, + { + "epoch": 0.8299695880564003, + "grad_norm": 1.0272869044852537, + "learning_rate": 1.478390249454673e-06, + "loss": 0.2515, + "step": 48032 + }, + { + "epoch": 0.8299868675698092, + "grad_norm": 0.9061369520115723, + "learning_rate": 1.4780974095451905e-06, + "loss": 0.3679, + "step": 48033 + }, + { + "epoch": 0.8300041470832181, + "grad_norm": 1.1324428489574456, + "learning_rate": 1.4778045963269515e-06, + "loss": 0.356, + "step": 48034 + }, + { + "epoch": 0.830021426596627, + "grad_norm": 1.5919830570732674, + "learning_rate": 1.47751180980088e-06, + "loss": 0.1593, + "step": 48035 + }, + { + "epoch": 0.8300387061100359, + "grad_norm": 1.8000715605534705, + "learning_rate": 1.477219049967885e-06, + "loss": 0.3679, + "step": 48036 + }, + { + "epoch": 0.8300559856234448, + "grad_norm": 1.6458637651239278, + "learning_rate": 1.4769263168288905e-06, + "loss": 0.3577, + "step": 48037 + }, + { + "epoch": 0.8300732651368538, + "grad_norm": 1.1563825683175974, + "learning_rate": 1.476633610384809e-06, + "loss": 0.5665, + "step": 48038 + }, + { + "epoch": 0.8300905446502627, + "grad_norm": 1.478586831251305, + "learning_rate": 1.4763409306365584e-06, + "loss": 0.29, + "step": 48039 + }, + { + "epoch": 0.8301078241636716, + "grad_norm": 0.8636060545006797, + "learning_rate": 1.4760482775850571e-06, + "loss": 0.2434, + "step": 48040 + }, + { + "epoch": 0.8301251036770805, + "grad_norm": 1.5764202294966523, + "learning_rate": 1.4757556512312187e-06, + "loss": 0.4361, + "step": 48041 + }, + { + "epoch": 0.8301423831904894, + "grad_norm": 0.9063718674979608, + "learning_rate": 1.4754630515759648e-06, + "loss": 0.3901, + "step": 48042 + }, + { + "epoch": 0.8301596627038983, + "grad_norm": 2.1136521327518034, + "learning_rate": 1.4751704786202049e-06, + "loss": 0.3616, + "step": 48043 + }, + { + "epoch": 0.8301769422173072, + "grad_norm": 1.676899106535123, + "learning_rate": 1.4748779323648589e-06, + "loss": 0.3165, + "step": 48044 + }, + { + "epoch": 0.8301942217307161, + "grad_norm": 1.854605487380506, + "learning_rate": 1.474585412810844e-06, + "loss": 0.4164, + "step": 48045 + }, + { + "epoch": 0.830211501244125, + "grad_norm": 2.0867137545219365, + "learning_rate": 1.4742929199590773e-06, + "loss": 0.4015, + "step": 48046 + }, + { + "epoch": 0.8302287807575338, + "grad_norm": 1.4608503805191229, + "learning_rate": 1.4740004538104702e-06, + "loss": 0.3847, + "step": 48047 + }, + { + "epoch": 0.8302460602709427, + "grad_norm": 1.3900835193130607, + "learning_rate": 1.4737080143659443e-06, + "loss": 0.4319, + "step": 48048 + }, + { + "epoch": 0.8302633397843516, + "grad_norm": 1.3394427148921872, + "learning_rate": 1.4734156016264113e-06, + "loss": 0.2095, + "step": 48049 + }, + { + "epoch": 0.8302806192977605, + "grad_norm": 1.5479159161277327, + "learning_rate": 1.4731232155927877e-06, + "loss": 0.432, + "step": 48050 + }, + { + "epoch": 0.8302978988111694, + "grad_norm": 1.1992190122618025, + "learning_rate": 1.4728308562659931e-06, + "loss": 0.2629, + "step": 48051 + }, + { + "epoch": 0.8303151783245784, + "grad_norm": 1.1214677260243817, + "learning_rate": 1.4725385236469381e-06, + "loss": 0.431, + "step": 48052 + }, + { + "epoch": 0.8303324578379873, + "grad_norm": 1.6935652311695875, + "learning_rate": 1.4722462177365427e-06, + "loss": 0.5261, + "step": 48053 + }, + { + "epoch": 0.8303497373513962, + "grad_norm": 1.000449924506097, + "learning_rate": 1.4719539385357173e-06, + "loss": 0.3288, + "step": 48054 + }, + { + "epoch": 0.8303670168648051, + "grad_norm": 1.8530556604264397, + "learning_rate": 1.4716616860453814e-06, + "loss": 0.214, + "step": 48055 + }, + { + "epoch": 0.830384296378214, + "grad_norm": 1.5208415204849701, + "learning_rate": 1.4713694602664486e-06, + "loss": 0.3567, + "step": 48056 + }, + { + "epoch": 0.8304015758916229, + "grad_norm": 0.9164011003880946, + "learning_rate": 1.4710772611998369e-06, + "loss": 0.3929, + "step": 48057 + }, + { + "epoch": 0.8304188554050318, + "grad_norm": 0.970661134057621, + "learning_rate": 1.4707850888464592e-06, + "loss": 0.2089, + "step": 48058 + }, + { + "epoch": 0.8304361349184407, + "grad_norm": 1.269400805526358, + "learning_rate": 1.470492943207229e-06, + "loss": 0.4648, + "step": 48059 + }, + { + "epoch": 0.8304534144318496, + "grad_norm": 0.8685578438937133, + "learning_rate": 1.4702008242830623e-06, + "loss": 0.5518, + "step": 48060 + }, + { + "epoch": 0.8304706939452585, + "grad_norm": 1.197195931609702, + "learning_rate": 1.4699087320748762e-06, + "loss": 0.2803, + "step": 48061 + }, + { + "epoch": 0.8304879734586674, + "grad_norm": 1.0929501439871745, + "learning_rate": 1.4696166665835853e-06, + "loss": 0.4572, + "step": 48062 + }, + { + "epoch": 0.8305052529720763, + "grad_norm": 1.5437628950026014, + "learning_rate": 1.4693246278101014e-06, + "loss": 0.2806, + "step": 48063 + }, + { + "epoch": 0.8305225324854852, + "grad_norm": 1.577957473995653, + "learning_rate": 1.4690326157553436e-06, + "loss": 0.1448, + "step": 48064 + }, + { + "epoch": 0.8305398119988942, + "grad_norm": 1.5698203365811105, + "learning_rate": 1.4687406304202224e-06, + "loss": 0.6971, + "step": 48065 + }, + { + "epoch": 0.8305570915123031, + "grad_norm": 1.4846330883924586, + "learning_rate": 1.4684486718056523e-06, + "loss": 0.51, + "step": 48066 + }, + { + "epoch": 0.830574371025712, + "grad_norm": 1.4915095976750548, + "learning_rate": 1.4681567399125508e-06, + "loss": 0.3059, + "step": 48067 + }, + { + "epoch": 0.8305916505391209, + "grad_norm": 0.8587039703758453, + "learning_rate": 1.4678648347418324e-06, + "loss": 0.481, + "step": 48068 + }, + { + "epoch": 0.8306089300525297, + "grad_norm": 1.3170801598996997, + "learning_rate": 1.4675729562944097e-06, + "loss": 0.2241, + "step": 48069 + }, + { + "epoch": 0.8306262095659386, + "grad_norm": 1.0008882822144785, + "learning_rate": 1.4672811045711943e-06, + "loss": 0.4823, + "step": 48070 + }, + { + "epoch": 0.8306434890793475, + "grad_norm": 1.0810869010057602, + "learning_rate": 1.4669892795731034e-06, + "loss": 0.3823, + "step": 48071 + }, + { + "epoch": 0.8306607685927564, + "grad_norm": 1.2227896946211245, + "learning_rate": 1.4666974813010505e-06, + "loss": 0.4258, + "step": 48072 + }, + { + "epoch": 0.8306780481061653, + "grad_norm": 0.8270688343242959, + "learning_rate": 1.4664057097559514e-06, + "loss": 0.9058, + "step": 48073 + }, + { + "epoch": 0.8306953276195742, + "grad_norm": 1.4983075101053318, + "learning_rate": 1.466113964938717e-06, + "loss": 0.2283, + "step": 48074 + }, + { + "epoch": 0.8307126071329831, + "grad_norm": 1.2549779751354202, + "learning_rate": 1.4658222468502636e-06, + "loss": 0.2298, + "step": 48075 + }, + { + "epoch": 0.830729886646392, + "grad_norm": 0.9521296848419009, + "learning_rate": 1.4655305554915012e-06, + "loss": 0.5578, + "step": 48076 + }, + { + "epoch": 0.8307471661598009, + "grad_norm": 0.9602856606491366, + "learning_rate": 1.4652388908633485e-06, + "loss": 0.3085, + "step": 48077 + }, + { + "epoch": 0.8307644456732098, + "grad_norm": 0.7268189564830502, + "learning_rate": 1.464947252966712e-06, + "loss": 0.2367, + "step": 48078 + }, + { + "epoch": 0.8307817251866187, + "grad_norm": 1.462439987249609, + "learning_rate": 1.4646556418025125e-06, + "loss": 0.4789, + "step": 48079 + }, + { + "epoch": 0.8307990047000277, + "grad_norm": 0.6976517602374916, + "learning_rate": 1.4643640573716612e-06, + "loss": 0.116, + "step": 48080 + }, + { + "epoch": 0.8308162842134366, + "grad_norm": 1.3695148358019134, + "learning_rate": 1.4640724996750676e-06, + "loss": 0.2518, + "step": 48081 + }, + { + "epoch": 0.8308335637268455, + "grad_norm": 1.3169038735456797, + "learning_rate": 1.4637809687136505e-06, + "loss": 0.3673, + "step": 48082 + }, + { + "epoch": 0.8308508432402544, + "grad_norm": 1.648802141038794, + "learning_rate": 1.463489464488318e-06, + "loss": 0.3948, + "step": 48083 + }, + { + "epoch": 0.8308681227536633, + "grad_norm": 1.254092058474909, + "learning_rate": 1.4631979869999846e-06, + "loss": 0.2763, + "step": 48084 + }, + { + "epoch": 0.8308854022670722, + "grad_norm": 1.2271432493201881, + "learning_rate": 1.4629065362495643e-06, + "loss": 0.2882, + "step": 48085 + }, + { + "epoch": 0.8309026817804811, + "grad_norm": 1.2787536345747619, + "learning_rate": 1.4626151122379707e-06, + "loss": 0.3998, + "step": 48086 + }, + { + "epoch": 0.83091996129389, + "grad_norm": 1.0363501276317602, + "learning_rate": 1.462323714966114e-06, + "loss": 0.2492, + "step": 48087 + }, + { + "epoch": 0.8309372408072989, + "grad_norm": 0.6610548604931322, + "learning_rate": 1.4620323444349105e-06, + "loss": 0.5664, + "step": 48088 + }, + { + "epoch": 0.8309545203207078, + "grad_norm": 1.1296003649061652, + "learning_rate": 1.4617410006452682e-06, + "loss": 0.3447, + "step": 48089 + }, + { + "epoch": 0.8309717998341166, + "grad_norm": 1.0960121671489593, + "learning_rate": 1.461449683598103e-06, + "loss": 0.5224, + "step": 48090 + }, + { + "epoch": 0.8309890793475255, + "grad_norm": 1.2548073866564817, + "learning_rate": 1.4611583932943275e-06, + "loss": 0.3845, + "step": 48091 + }, + { + "epoch": 0.8310063588609344, + "grad_norm": 1.3454307139851367, + "learning_rate": 1.460867129734851e-06, + "loss": 0.3115, + "step": 48092 + }, + { + "epoch": 0.8310236383743433, + "grad_norm": 1.207871375315305, + "learning_rate": 1.4605758929205893e-06, + "loss": 0.2799, + "step": 48093 + }, + { + "epoch": 0.8310409178877523, + "grad_norm": 1.027537666488669, + "learning_rate": 1.460284682852451e-06, + "loss": 0.3879, + "step": 48094 + }, + { + "epoch": 0.8310581974011612, + "grad_norm": 3.585630696980207, + "learning_rate": 1.459993499531349e-06, + "loss": 0.3457, + "step": 48095 + }, + { + "epoch": 0.8310754769145701, + "grad_norm": 1.4324566857358119, + "learning_rate": 1.459702342958198e-06, + "loss": 0.4549, + "step": 48096 + }, + { + "epoch": 0.831092756427979, + "grad_norm": 1.243134386272129, + "learning_rate": 1.45941121313391e-06, + "loss": 0.3419, + "step": 48097 + }, + { + "epoch": 0.8311100359413879, + "grad_norm": 1.6870865729497353, + "learning_rate": 1.459120110059392e-06, + "loss": 0.3766, + "step": 48098 + }, + { + "epoch": 0.8311273154547968, + "grad_norm": 1.6287481228104939, + "learning_rate": 1.4588290337355627e-06, + "loss": 0.4109, + "step": 48099 + }, + { + "epoch": 0.8311445949682057, + "grad_norm": 1.4708384413551838, + "learning_rate": 1.4585379841633262e-06, + "loss": 0.4924, + "step": 48100 + }, + { + "epoch": 0.8311618744816146, + "grad_norm": 2.9829841611042327, + "learning_rate": 1.458246961343599e-06, + "loss": 0.4982, + "step": 48101 + }, + { + "epoch": 0.8311791539950235, + "grad_norm": 1.3832649612127421, + "learning_rate": 1.4579559652772935e-06, + "loss": 0.3491, + "step": 48102 + }, + { + "epoch": 0.8311964335084324, + "grad_norm": 1.4871367839616327, + "learning_rate": 1.4576649959653167e-06, + "loss": 0.1739, + "step": 48103 + }, + { + "epoch": 0.8312137130218413, + "grad_norm": 1.1188830962159149, + "learning_rate": 1.4573740534085846e-06, + "loss": 0.265, + "step": 48104 + }, + { + "epoch": 0.8312309925352502, + "grad_norm": 1.431990532167748, + "learning_rate": 1.4570831376080042e-06, + "loss": 0.3193, + "step": 48105 + }, + { + "epoch": 0.8312482720486591, + "grad_norm": 1.0101546946926905, + "learning_rate": 1.4567922485644882e-06, + "loss": 0.3373, + "step": 48106 + }, + { + "epoch": 0.831265551562068, + "grad_norm": 1.4292122764467539, + "learning_rate": 1.456501386278948e-06, + "loss": 0.3504, + "step": 48107 + }, + { + "epoch": 0.831282831075477, + "grad_norm": 1.1539782525011615, + "learning_rate": 1.4562105507522973e-06, + "loss": 0.2634, + "step": 48108 + }, + { + "epoch": 0.8313001105888859, + "grad_norm": 1.1130282863827756, + "learning_rate": 1.455919741985442e-06, + "loss": 0.3511, + "step": 48109 + }, + { + "epoch": 0.8313173901022948, + "grad_norm": 1.2348545363857713, + "learning_rate": 1.4556289599792984e-06, + "loss": 0.4288, + "step": 48110 + }, + { + "epoch": 0.8313346696157036, + "grad_norm": 1.5884640996276993, + "learning_rate": 1.4553382047347708e-06, + "loss": 0.3073, + "step": 48111 + }, + { + "epoch": 0.8313519491291125, + "grad_norm": 1.5714664174248407, + "learning_rate": 1.4550474762527734e-06, + "loss": 0.2633, + "step": 48112 + }, + { + "epoch": 0.8313692286425214, + "grad_norm": 1.0163402851395764, + "learning_rate": 1.4547567745342196e-06, + "loss": 0.2856, + "step": 48113 + }, + { + "epoch": 0.8313865081559303, + "grad_norm": 1.7198235228033245, + "learning_rate": 1.4544660995800153e-06, + "loss": 0.2397, + "step": 48114 + }, + { + "epoch": 0.8314037876693392, + "grad_norm": 1.102827355047698, + "learning_rate": 1.4541754513910734e-06, + "loss": 0.4362, + "step": 48115 + }, + { + "epoch": 0.8314210671827481, + "grad_norm": 0.9641829802845788, + "learning_rate": 1.4538848299683027e-06, + "loss": 0.3469, + "step": 48116 + }, + { + "epoch": 0.831438346696157, + "grad_norm": 1.4079882698014656, + "learning_rate": 1.4535942353126154e-06, + "loss": 0.3589, + "step": 48117 + }, + { + "epoch": 0.8314556262095659, + "grad_norm": 1.4592685119471116, + "learning_rate": 1.4533036674249158e-06, + "loss": 0.3247, + "step": 48118 + }, + { + "epoch": 0.8314729057229748, + "grad_norm": 1.0826425291389803, + "learning_rate": 1.4530131263061241e-06, + "loss": 0.2546, + "step": 48119 + }, + { + "epoch": 0.8314901852363837, + "grad_norm": 1.7128474875713509, + "learning_rate": 1.4527226119571447e-06, + "loss": 0.3787, + "step": 48120 + }, + { + "epoch": 0.8315074647497926, + "grad_norm": 2.159002215115611, + "learning_rate": 1.452432124378884e-06, + "loss": 0.3508, + "step": 48121 + }, + { + "epoch": 0.8315247442632016, + "grad_norm": 1.1711259030247556, + "learning_rate": 1.4521416635722596e-06, + "loss": 0.2388, + "step": 48122 + }, + { + "epoch": 0.8315420237766105, + "grad_norm": 1.0436875927567115, + "learning_rate": 1.4518512295381736e-06, + "loss": 0.3796, + "step": 48123 + }, + { + "epoch": 0.8315593032900194, + "grad_norm": 0.9726308563639606, + "learning_rate": 1.45156082227754e-06, + "loss": 0.3401, + "step": 48124 + }, + { + "epoch": 0.8315765828034283, + "grad_norm": 1.5877342780193893, + "learning_rate": 1.451270441791267e-06, + "loss": 0.3915, + "step": 48125 + }, + { + "epoch": 0.8315938623168372, + "grad_norm": 0.8519674976920456, + "learning_rate": 1.4509800880802671e-06, + "loss": 0.2352, + "step": 48126 + }, + { + "epoch": 0.8316111418302461, + "grad_norm": 1.2086224741917013, + "learning_rate": 1.450689761145445e-06, + "loss": 0.3618, + "step": 48127 + }, + { + "epoch": 0.831628421343655, + "grad_norm": 2.22151912972096, + "learning_rate": 1.4503994609877148e-06, + "loss": 0.3019, + "step": 48128 + }, + { + "epoch": 0.8316457008570639, + "grad_norm": 0.9343333918638052, + "learning_rate": 1.4501091876079808e-06, + "loss": 0.3325, + "step": 48129 + }, + { + "epoch": 0.8316629803704728, + "grad_norm": 2.0789219091675095, + "learning_rate": 1.449818941007154e-06, + "loss": 0.437, + "step": 48130 + }, + { + "epoch": 0.8316802598838817, + "grad_norm": 0.958701046199503, + "learning_rate": 1.4495287211861475e-06, + "loss": 0.1876, + "step": 48131 + }, + { + "epoch": 0.8316975393972905, + "grad_norm": 1.1400665010313167, + "learning_rate": 1.4492385281458631e-06, + "loss": 0.3436, + "step": 48132 + }, + { + "epoch": 0.8317148189106994, + "grad_norm": 1.263860929520026, + "learning_rate": 1.4489483618872168e-06, + "loss": 0.2686, + "step": 48133 + }, + { + "epoch": 0.8317320984241083, + "grad_norm": 1.3877054893136345, + "learning_rate": 1.4486582224111124e-06, + "loss": 0.4481, + "step": 48134 + }, + { + "epoch": 0.8317493779375172, + "grad_norm": 0.8338230767127233, + "learning_rate": 1.4483681097184587e-06, + "loss": 0.2827, + "step": 48135 + }, + { + "epoch": 0.8317666574509262, + "grad_norm": 1.474852060641194, + "learning_rate": 1.4480780238101677e-06, + "loss": 0.3537, + "step": 48136 + }, + { + "epoch": 0.8317839369643351, + "grad_norm": 1.0994342487385707, + "learning_rate": 1.4477879646871473e-06, + "loss": 0.3341, + "step": 48137 + }, + { + "epoch": 0.831801216477744, + "grad_norm": 0.6897204143897047, + "learning_rate": 1.4474979323503024e-06, + "loss": 0.2312, + "step": 48138 + }, + { + "epoch": 0.8318184959911529, + "grad_norm": 1.7959472028118153, + "learning_rate": 1.4472079268005468e-06, + "loss": 0.3451, + "step": 48139 + }, + { + "epoch": 0.8318357755045618, + "grad_norm": 1.7858030713314839, + "learning_rate": 1.4469179480387841e-06, + "loss": 0.4218, + "step": 48140 + }, + { + "epoch": 0.8318530550179707, + "grad_norm": 1.4904592102109098, + "learning_rate": 1.4466279960659235e-06, + "loss": 0.2844, + "step": 48141 + }, + { + "epoch": 0.8318703345313796, + "grad_norm": 1.1398274755091025, + "learning_rate": 1.4463380708828777e-06, + "loss": 0.2608, + "step": 48142 + }, + { + "epoch": 0.8318876140447885, + "grad_norm": 1.2576290821051206, + "learning_rate": 1.4460481724905473e-06, + "loss": 0.4488, + "step": 48143 + }, + { + "epoch": 0.8319048935581974, + "grad_norm": 1.601574279553364, + "learning_rate": 1.4457583008898478e-06, + "loss": 0.5294, + "step": 48144 + }, + { + "epoch": 0.8319221730716063, + "grad_norm": 1.8556552163574198, + "learning_rate": 1.4454684560816801e-06, + "loss": 0.3032, + "step": 48145 + }, + { + "epoch": 0.8319394525850152, + "grad_norm": 1.7547929464763636, + "learning_rate": 1.4451786380669552e-06, + "loss": 0.4082, + "step": 48146 + }, + { + "epoch": 0.8319567320984241, + "grad_norm": 1.0195420070332388, + "learning_rate": 1.4448888468465816e-06, + "loss": 0.3539, + "step": 48147 + }, + { + "epoch": 0.831974011611833, + "grad_norm": 2.0059368773108335, + "learning_rate": 1.4445990824214683e-06, + "loss": 0.3008, + "step": 48148 + }, + { + "epoch": 0.831991291125242, + "grad_norm": 3.209813707671876, + "learning_rate": 1.4443093447925182e-06, + "loss": 0.3826, + "step": 48149 + }, + { + "epoch": 0.8320085706386509, + "grad_norm": 1.5274646927604496, + "learning_rate": 1.4440196339606449e-06, + "loss": 0.3814, + "step": 48150 + }, + { + "epoch": 0.8320258501520598, + "grad_norm": 1.339042714055132, + "learning_rate": 1.443729949926751e-06, + "loss": 0.422, + "step": 48151 + }, + { + "epoch": 0.8320431296654687, + "grad_norm": 1.7084337280198065, + "learning_rate": 1.4434402926917412e-06, + "loss": 0.3156, + "step": 48152 + }, + { + "epoch": 0.8320604091788775, + "grad_norm": 1.1631992125356514, + "learning_rate": 1.4431506622565317e-06, + "loss": 0.2204, + "step": 48153 + }, + { + "epoch": 0.8320776886922864, + "grad_norm": 1.1452039201032829, + "learning_rate": 1.4428610586220216e-06, + "loss": 0.4173, + "step": 48154 + }, + { + "epoch": 0.8320949682056953, + "grad_norm": 0.5737452175930977, + "learning_rate": 1.4425714817891234e-06, + "loss": 0.5891, + "step": 48155 + }, + { + "epoch": 0.8321122477191042, + "grad_norm": 1.756656749231149, + "learning_rate": 1.4422819317587388e-06, + "loss": 0.4211, + "step": 48156 + }, + { + "epoch": 0.8321295272325131, + "grad_norm": 1.3486545681210154, + "learning_rate": 1.4419924085317805e-06, + "loss": 0.4432, + "step": 48157 + }, + { + "epoch": 0.832146806745922, + "grad_norm": 1.2528999084943178, + "learning_rate": 1.4417029121091485e-06, + "loss": 0.4911, + "step": 48158 + }, + { + "epoch": 0.8321640862593309, + "grad_norm": 1.0202772039032304, + "learning_rate": 1.441413442491757e-06, + "loss": 0.3518, + "step": 48159 + }, + { + "epoch": 0.8321813657727398, + "grad_norm": 2.0206902580722317, + "learning_rate": 1.441123999680507e-06, + "loss": 0.3116, + "step": 48160 + }, + { + "epoch": 0.8321986452861487, + "grad_norm": 1.149060113765724, + "learning_rate": 1.4408345836763095e-06, + "loss": 0.3123, + "step": 48161 + }, + { + "epoch": 0.8322159247995576, + "grad_norm": 2.725913870714166, + "learning_rate": 1.4405451944800685e-06, + "loss": 0.3069, + "step": 48162 + }, + { + "epoch": 0.8322332043129665, + "grad_norm": 1.1799307256696294, + "learning_rate": 1.4402558320926873e-06, + "loss": 0.3387, + "step": 48163 + }, + { + "epoch": 0.8322504838263755, + "grad_norm": 1.3891626673333197, + "learning_rate": 1.439966496515076e-06, + "loss": 0.466, + "step": 48164 + }, + { + "epoch": 0.8322677633397844, + "grad_norm": 1.6869974114819832, + "learning_rate": 1.439677187748141e-06, + "loss": 0.2967, + "step": 48165 + }, + { + "epoch": 0.8322850428531933, + "grad_norm": 1.3368419500987887, + "learning_rate": 1.4393879057927896e-06, + "loss": 0.2016, + "step": 48166 + }, + { + "epoch": 0.8323023223666022, + "grad_norm": 1.3252917949161647, + "learning_rate": 1.4390986506499228e-06, + "loss": 0.261, + "step": 48167 + }, + { + "epoch": 0.8323196018800111, + "grad_norm": 1.4248564034229272, + "learning_rate": 1.4388094223204518e-06, + "loss": 0.5181, + "step": 48168 + }, + { + "epoch": 0.83233688139342, + "grad_norm": 1.3052556950183016, + "learning_rate": 1.4385202208052784e-06, + "loss": 0.2166, + "step": 48169 + }, + { + "epoch": 0.8323541609068289, + "grad_norm": 1.9911752820232962, + "learning_rate": 1.4382310461053106e-06, + "loss": 0.3937, + "step": 48170 + }, + { + "epoch": 0.8323714404202378, + "grad_norm": 1.5660989263417564, + "learning_rate": 1.4379418982214544e-06, + "loss": 0.3189, + "step": 48171 + }, + { + "epoch": 0.8323887199336467, + "grad_norm": 1.0804764908724114, + "learning_rate": 1.4376527771546156e-06, + "loss": 0.2833, + "step": 48172 + }, + { + "epoch": 0.8324059994470556, + "grad_norm": 1.235771077824162, + "learning_rate": 1.4373636829057003e-06, + "loss": 0.3232, + "step": 48173 + }, + { + "epoch": 0.8324232789604644, + "grad_norm": 1.3508355915083718, + "learning_rate": 1.43707461547561e-06, + "loss": 0.4485, + "step": 48174 + }, + { + "epoch": 0.8324405584738733, + "grad_norm": 1.5674652423688933, + "learning_rate": 1.4367855748652525e-06, + "loss": 0.2715, + "step": 48175 + }, + { + "epoch": 0.8324578379872822, + "grad_norm": 0.9728339866648253, + "learning_rate": 1.4364965610755332e-06, + "loss": 0.3349, + "step": 48176 + }, + { + "epoch": 0.8324751175006911, + "grad_norm": 0.58102648650958, + "learning_rate": 1.43620757410736e-06, + "loss": 0.8311, + "step": 48177 + }, + { + "epoch": 0.8324923970141, + "grad_norm": 2.3886148514826693, + "learning_rate": 1.4359186139616343e-06, + "loss": 0.3245, + "step": 48178 + }, + { + "epoch": 0.832509676527509, + "grad_norm": 1.977379674217295, + "learning_rate": 1.4356296806392633e-06, + "loss": 0.2471, + "step": 48179 + }, + { + "epoch": 0.8325269560409179, + "grad_norm": 1.3985699691688949, + "learning_rate": 1.4353407741411484e-06, + "loss": 0.2658, + "step": 48180 + }, + { + "epoch": 0.8325442355543268, + "grad_norm": 1.395517286896526, + "learning_rate": 1.435051894468198e-06, + "loss": 0.284, + "step": 48181 + }, + { + "epoch": 0.8325615150677357, + "grad_norm": 1.5359143110000577, + "learning_rate": 1.4347630416213187e-06, + "loss": 0.3296, + "step": 48182 + }, + { + "epoch": 0.8325787945811446, + "grad_norm": 1.2487218360878907, + "learning_rate": 1.434474215601409e-06, + "loss": 0.3044, + "step": 48183 + }, + { + "epoch": 0.8325960740945535, + "grad_norm": 1.336004187497379, + "learning_rate": 1.4341854164093806e-06, + "loss": 0.42, + "step": 48184 + }, + { + "epoch": 0.8326133536079624, + "grad_norm": 1.6060161673300157, + "learning_rate": 1.433896644046131e-06, + "loss": 0.2384, + "step": 48185 + }, + { + "epoch": 0.8326306331213713, + "grad_norm": 1.166079128158812, + "learning_rate": 1.4336078985125678e-06, + "loss": 0.3382, + "step": 48186 + }, + { + "epoch": 0.8326479126347802, + "grad_norm": 1.2715997812972102, + "learning_rate": 1.433319179809597e-06, + "loss": 0.4063, + "step": 48187 + }, + { + "epoch": 0.8326651921481891, + "grad_norm": 1.6102198194859383, + "learning_rate": 1.433030487938123e-06, + "loss": 0.2611, + "step": 48188 + }, + { + "epoch": 0.832682471661598, + "grad_norm": 1.003395389638463, + "learning_rate": 1.4327418228990464e-06, + "loss": 0.3557, + "step": 48189 + }, + { + "epoch": 0.8326997511750069, + "grad_norm": 1.0971815235356548, + "learning_rate": 1.4324531846932766e-06, + "loss": 0.2914, + "step": 48190 + }, + { + "epoch": 0.8327170306884158, + "grad_norm": 1.443388657277535, + "learning_rate": 1.4321645733217115e-06, + "loss": 0.4517, + "step": 48191 + }, + { + "epoch": 0.8327343102018248, + "grad_norm": 1.4457880432500532, + "learning_rate": 1.4318759887852586e-06, + "loss": 0.4084, + "step": 48192 + }, + { + "epoch": 0.8327515897152337, + "grad_norm": 1.7479336042060594, + "learning_rate": 1.4315874310848221e-06, + "loss": 0.3427, + "step": 48193 + }, + { + "epoch": 0.8327688692286426, + "grad_norm": 1.3509859827940671, + "learning_rate": 1.431298900221304e-06, + "loss": 0.4025, + "step": 48194 + }, + { + "epoch": 0.8327861487420514, + "grad_norm": 1.2416223516309208, + "learning_rate": 1.4310103961956112e-06, + "loss": 0.4319, + "step": 48195 + }, + { + "epoch": 0.8328034282554603, + "grad_norm": 2.054611133232152, + "learning_rate": 1.430721919008642e-06, + "loss": 0.4492, + "step": 48196 + }, + { + "epoch": 0.8328207077688692, + "grad_norm": 3.5163458031695853, + "learning_rate": 1.4304334686613063e-06, + "loss": 0.2765, + "step": 48197 + }, + { + "epoch": 0.8328379872822781, + "grad_norm": 0.8728931224017621, + "learning_rate": 1.4301450451544985e-06, + "loss": 0.2856, + "step": 48198 + }, + { + "epoch": 0.832855266795687, + "grad_norm": 1.7371532262097849, + "learning_rate": 1.4298566484891329e-06, + "loss": 0.3634, + "step": 48199 + }, + { + "epoch": 0.8328725463090959, + "grad_norm": 1.9576366098074374, + "learning_rate": 1.4295682786661058e-06, + "loss": 0.4393, + "step": 48200 + }, + { + "epoch": 0.8328898258225048, + "grad_norm": 0.9834573193162046, + "learning_rate": 1.429279935686324e-06, + "loss": 0.387, + "step": 48201 + }, + { + "epoch": 0.8329071053359137, + "grad_norm": 1.8977770164730106, + "learning_rate": 1.428991619550686e-06, + "loss": 0.2715, + "step": 48202 + }, + { + "epoch": 0.8329243848493226, + "grad_norm": 1.5216627729899928, + "learning_rate": 1.4287033302601005e-06, + "loss": 0.2787, + "step": 48203 + }, + { + "epoch": 0.8329416643627315, + "grad_norm": 1.8857423550845174, + "learning_rate": 1.4284150678154652e-06, + "loss": 0.433, + "step": 48204 + }, + { + "epoch": 0.8329589438761404, + "grad_norm": 1.2450673915483157, + "learning_rate": 1.4281268322176856e-06, + "loss": 0.6787, + "step": 48205 + }, + { + "epoch": 0.8329762233895494, + "grad_norm": 1.3899942007509911, + "learning_rate": 1.4278386234676655e-06, + "loss": 0.3292, + "step": 48206 + }, + { + "epoch": 0.8329935029029583, + "grad_norm": 2.0174366768903127, + "learning_rate": 1.4275504415663055e-06, + "loss": 0.3853, + "step": 48207 + }, + { + "epoch": 0.8330107824163672, + "grad_norm": 1.596489839781999, + "learning_rate": 1.4272622865145103e-06, + "loss": 0.2508, + "step": 48208 + }, + { + "epoch": 0.8330280619297761, + "grad_norm": 1.5304491573441934, + "learning_rate": 1.4269741583131791e-06, + "loss": 0.5398, + "step": 48209 + }, + { + "epoch": 0.833045341443185, + "grad_norm": 1.133099391474235, + "learning_rate": 1.4266860569632157e-06, + "loss": 0.3683, + "step": 48210 + }, + { + "epoch": 0.8330626209565939, + "grad_norm": 1.2968657235364376, + "learning_rate": 1.426397982465524e-06, + "loss": 0.4971, + "step": 48211 + }, + { + "epoch": 0.8330799004700028, + "grad_norm": 2.7930512302673485, + "learning_rate": 1.4261099348210073e-06, + "loss": 0.4128, + "step": 48212 + }, + { + "epoch": 0.8330971799834117, + "grad_norm": 1.4539272143272592, + "learning_rate": 1.4258219140305663e-06, + "loss": 0.3075, + "step": 48213 + }, + { + "epoch": 0.8331144594968206, + "grad_norm": 1.0308684565512132, + "learning_rate": 1.425533920095099e-06, + "loss": 0.35, + "step": 48214 + }, + { + "epoch": 0.8331317390102295, + "grad_norm": 1.0426697137875365, + "learning_rate": 1.4252459530155126e-06, + "loss": 0.1393, + "step": 48215 + }, + { + "epoch": 0.8331490185236384, + "grad_norm": 1.076882533385222, + "learning_rate": 1.4249580127927064e-06, + "loss": 0.3556, + "step": 48216 + }, + { + "epoch": 0.8331662980370472, + "grad_norm": 1.404164180219074, + "learning_rate": 1.4246700994275864e-06, + "loss": 0.2145, + "step": 48217 + }, + { + "epoch": 0.8331835775504561, + "grad_norm": 1.6130977669823492, + "learning_rate": 1.4243822129210494e-06, + "loss": 0.2507, + "step": 48218 + }, + { + "epoch": 0.833200857063865, + "grad_norm": 1.8769164622692105, + "learning_rate": 1.4240943532740014e-06, + "loss": 0.4311, + "step": 48219 + }, + { + "epoch": 0.833218136577274, + "grad_norm": 1.3914844980736936, + "learning_rate": 1.4238065204873387e-06, + "loss": 0.3806, + "step": 48220 + }, + { + "epoch": 0.8332354160906829, + "grad_norm": 1.572085493532679, + "learning_rate": 1.4235187145619656e-06, + "loss": 0.3852, + "step": 48221 + }, + { + "epoch": 0.8332526956040918, + "grad_norm": 0.7529123313769892, + "learning_rate": 1.423230935498784e-06, + "loss": 0.3111, + "step": 48222 + }, + { + "epoch": 0.8332699751175007, + "grad_norm": 1.339649269002685, + "learning_rate": 1.4229431832986973e-06, + "loss": 0.307, + "step": 48223 + }, + { + "epoch": 0.8332872546309096, + "grad_norm": 1.7202843499672318, + "learning_rate": 1.4226554579626039e-06, + "loss": 0.3128, + "step": 48224 + }, + { + "epoch": 0.8333045341443185, + "grad_norm": 1.1779709071242273, + "learning_rate": 1.4223677594914032e-06, + "loss": 0.4108, + "step": 48225 + }, + { + "epoch": 0.8333218136577274, + "grad_norm": 0.5915506684858077, + "learning_rate": 1.4220800878859996e-06, + "loss": 0.6646, + "step": 48226 + }, + { + "epoch": 0.8333390931711363, + "grad_norm": 1.2774566533043745, + "learning_rate": 1.4217924431472917e-06, + "loss": 0.3251, + "step": 48227 + }, + { + "epoch": 0.8333563726845452, + "grad_norm": 1.7423552380946534, + "learning_rate": 1.4215048252761854e-06, + "loss": 0.4675, + "step": 48228 + }, + { + "epoch": 0.8333736521979541, + "grad_norm": 1.7454718969878804, + "learning_rate": 1.4212172342735742e-06, + "loss": 0.4005, + "step": 48229 + }, + { + "epoch": 0.833390931711363, + "grad_norm": 1.3806919885486841, + "learning_rate": 1.4209296701403664e-06, + "loss": 0.4016, + "step": 48230 + }, + { + "epoch": 0.8334082112247719, + "grad_norm": 0.8809134885243499, + "learning_rate": 1.4206421328774556e-06, + "loss": 0.2815, + "step": 48231 + }, + { + "epoch": 0.8334254907381808, + "grad_norm": 1.184451367342779, + "learning_rate": 1.4203546224857456e-06, + "loss": 0.2533, + "step": 48232 + }, + { + "epoch": 0.8334427702515897, + "grad_norm": 1.172880603242932, + "learning_rate": 1.420067138966139e-06, + "loss": 0.4042, + "step": 48233 + }, + { + "epoch": 0.8334600497649987, + "grad_norm": 1.4629999648248744, + "learning_rate": 1.4197796823195332e-06, + "loss": 0.3817, + "step": 48234 + }, + { + "epoch": 0.8334773292784076, + "grad_norm": 1.4305626062512835, + "learning_rate": 1.4194922525468313e-06, + "loss": 0.4727, + "step": 48235 + }, + { + "epoch": 0.8334946087918165, + "grad_norm": 2.6202152909350955, + "learning_rate": 1.4192048496489285e-06, + "loss": 0.3106, + "step": 48236 + }, + { + "epoch": 0.8335118883052254, + "grad_norm": 1.0122284726753732, + "learning_rate": 1.4189174736267307e-06, + "loss": 0.4138, + "step": 48237 + }, + { + "epoch": 0.8335291678186342, + "grad_norm": 1.1315042317836892, + "learning_rate": 1.4186301244811317e-06, + "loss": 0.3208, + "step": 48238 + }, + { + "epoch": 0.8335464473320431, + "grad_norm": 2.1936821716109782, + "learning_rate": 1.4183428022130397e-06, + "loss": 0.2806, + "step": 48239 + }, + { + "epoch": 0.833563726845452, + "grad_norm": 1.6948170812828027, + "learning_rate": 1.4180555068233482e-06, + "loss": 0.3049, + "step": 48240 + }, + { + "epoch": 0.8335810063588609, + "grad_norm": 1.0663307099467163, + "learning_rate": 1.4177682383129609e-06, + "loss": 0.416, + "step": 48241 + }, + { + "epoch": 0.8335982858722698, + "grad_norm": 1.4966353181783045, + "learning_rate": 1.417480996682773e-06, + "loss": 0.377, + "step": 48242 + }, + { + "epoch": 0.8336155653856787, + "grad_norm": 0.9878307632964147, + "learning_rate": 1.41719378193369e-06, + "loss": 0.4667, + "step": 48243 + }, + { + "epoch": 0.8336328448990876, + "grad_norm": 1.4763701578713246, + "learning_rate": 1.416906594066606e-06, + "loss": 0.3833, + "step": 48244 + }, + { + "epoch": 0.8336501244124965, + "grad_norm": 1.0223131926678553, + "learning_rate": 1.4166194330824222e-06, + "loss": 0.1962, + "step": 48245 + }, + { + "epoch": 0.8336674039259054, + "grad_norm": 1.353126335087633, + "learning_rate": 1.4163322989820417e-06, + "loss": 0.3074, + "step": 48246 + }, + { + "epoch": 0.8336846834393143, + "grad_norm": 1.7741915533971069, + "learning_rate": 1.4160451917663575e-06, + "loss": 0.2509, + "step": 48247 + }, + { + "epoch": 0.8337019629527233, + "grad_norm": 1.5321180288740617, + "learning_rate": 1.415758111436275e-06, + "loss": 0.2801, + "step": 48248 + }, + { + "epoch": 0.8337192424661322, + "grad_norm": 1.4562855127260832, + "learning_rate": 1.4154710579926879e-06, + "loss": 0.4739, + "step": 48249 + }, + { + "epoch": 0.8337365219795411, + "grad_norm": 1.1227122165618484, + "learning_rate": 1.4151840314364983e-06, + "loss": 0.3125, + "step": 48250 + }, + { + "epoch": 0.83375380149295, + "grad_norm": 1.5174531815398185, + "learning_rate": 1.414897031768605e-06, + "loss": 0.5082, + "step": 48251 + }, + { + "epoch": 0.8337710810063589, + "grad_norm": 1.8042859848265893, + "learning_rate": 1.4146100589899082e-06, + "loss": 0.1899, + "step": 48252 + }, + { + "epoch": 0.8337883605197678, + "grad_norm": 1.6246688850437827, + "learning_rate": 1.4143231131013024e-06, + "loss": 0.4988, + "step": 48253 + }, + { + "epoch": 0.8338056400331767, + "grad_norm": 1.9364453197259501, + "learning_rate": 1.4140361941036929e-06, + "loss": 0.2194, + "step": 48254 + }, + { + "epoch": 0.8338229195465856, + "grad_norm": 1.1302168975446614, + "learning_rate": 1.413749301997971e-06, + "loss": 0.3923, + "step": 48255 + }, + { + "epoch": 0.8338401990599945, + "grad_norm": 1.7023072237384835, + "learning_rate": 1.4134624367850392e-06, + "loss": 0.8115, + "step": 48256 + }, + { + "epoch": 0.8338574785734034, + "grad_norm": 0.7010219944738385, + "learning_rate": 1.413175598465798e-06, + "loss": 0.5121, + "step": 48257 + }, + { + "epoch": 0.8338747580868123, + "grad_norm": 1.1789319987361673, + "learning_rate": 1.4128887870411412e-06, + "loss": 0.2862, + "step": 48258 + }, + { + "epoch": 0.8338920376002211, + "grad_norm": 2.266147088102537, + "learning_rate": 1.4126020025119714e-06, + "loss": 0.4376, + "step": 48259 + }, + { + "epoch": 0.83390931711363, + "grad_norm": 1.2310379004780043, + "learning_rate": 1.4123152448791832e-06, + "loss": 0.4156, + "step": 48260 + }, + { + "epoch": 0.8339265966270389, + "grad_norm": 1.1430110053695022, + "learning_rate": 1.412028514143675e-06, + "loss": 0.3444, + "step": 48261 + }, + { + "epoch": 0.8339438761404478, + "grad_norm": 1.342808217087305, + "learning_rate": 1.4117418103063475e-06, + "loss": 0.3735, + "step": 48262 + }, + { + "epoch": 0.8339611556538568, + "grad_norm": 1.1555083815550047, + "learning_rate": 1.4114551333680982e-06, + "loss": 0.3724, + "step": 48263 + }, + { + "epoch": 0.8339784351672657, + "grad_norm": 1.3751429576423302, + "learning_rate": 1.4111684833298256e-06, + "loss": 0.3624, + "step": 48264 + }, + { + "epoch": 0.8339957146806746, + "grad_norm": 1.4422560012473051, + "learning_rate": 1.4108818601924223e-06, + "loss": 0.4657, + "step": 48265 + }, + { + "epoch": 0.8340129941940835, + "grad_norm": 1.5997092262868853, + "learning_rate": 1.410595263956791e-06, + "loss": 0.2089, + "step": 48266 + }, + { + "epoch": 0.8340302737074924, + "grad_norm": 1.5379236604824724, + "learning_rate": 1.4103086946238275e-06, + "loss": 0.3805, + "step": 48267 + }, + { + "epoch": 0.8340475532209013, + "grad_norm": 1.4344174872339444, + "learning_rate": 1.4100221521944323e-06, + "loss": 0.4352, + "step": 48268 + }, + { + "epoch": 0.8340648327343102, + "grad_norm": 2.6970763997088323, + "learning_rate": 1.4097356366694981e-06, + "loss": 0.2748, + "step": 48269 + }, + { + "epoch": 0.8340821122477191, + "grad_norm": 2.220273482223557, + "learning_rate": 1.4094491480499273e-06, + "loss": 0.8859, + "step": 48270 + }, + { + "epoch": 0.834099391761128, + "grad_norm": 1.6656749937475601, + "learning_rate": 1.4091626863366125e-06, + "loss": 0.2446, + "step": 48271 + }, + { + "epoch": 0.8341166712745369, + "grad_norm": 1.622821359124129, + "learning_rate": 1.4088762515304522e-06, + "loss": 0.2467, + "step": 48272 + }, + { + "epoch": 0.8341339507879458, + "grad_norm": 1.3601169390357024, + "learning_rate": 1.4085898436323453e-06, + "loss": 0.3952, + "step": 48273 + }, + { + "epoch": 0.8341512303013547, + "grad_norm": 2.4113792349767795, + "learning_rate": 1.4083034626431902e-06, + "loss": 0.2578, + "step": 48274 + }, + { + "epoch": 0.8341685098147636, + "grad_norm": 1.3026021128212009, + "learning_rate": 1.4080171085638805e-06, + "loss": 0.3171, + "step": 48275 + }, + { + "epoch": 0.8341857893281726, + "grad_norm": 1.0315492749432948, + "learning_rate": 1.4077307813953133e-06, + "loss": 0.7579, + "step": 48276 + }, + { + "epoch": 0.8342030688415815, + "grad_norm": 1.4243708381377953, + "learning_rate": 1.407444481138387e-06, + "loss": 0.4517, + "step": 48277 + }, + { + "epoch": 0.8342203483549904, + "grad_norm": 1.9990428570070093, + "learning_rate": 1.4071582077939949e-06, + "loss": 0.6986, + "step": 48278 + }, + { + "epoch": 0.8342376278683993, + "grad_norm": 1.0268133697475887, + "learning_rate": 1.4068719613630399e-06, + "loss": 0.5891, + "step": 48279 + }, + { + "epoch": 0.8342549073818081, + "grad_norm": 1.4545060234059093, + "learning_rate": 1.4065857418464123e-06, + "loss": 0.3891, + "step": 48280 + }, + { + "epoch": 0.834272186895217, + "grad_norm": 1.381248651226449, + "learning_rate": 1.4062995492450137e-06, + "loss": 0.3699, + "step": 48281 + }, + { + "epoch": 0.8342894664086259, + "grad_norm": 0.9404442436665794, + "learning_rate": 1.4060133835597366e-06, + "loss": 0.3482, + "step": 48282 + }, + { + "epoch": 0.8343067459220348, + "grad_norm": 1.9770614511088165, + "learning_rate": 1.4057272447914804e-06, + "loss": 0.456, + "step": 48283 + }, + { + "epoch": 0.8343240254354437, + "grad_norm": 1.1214558484882369, + "learning_rate": 1.4054411329411343e-06, + "loss": 0.4802, + "step": 48284 + }, + { + "epoch": 0.8343413049488526, + "grad_norm": 1.5216675386476026, + "learning_rate": 1.4051550480096055e-06, + "loss": 0.4232, + "step": 48285 + }, + { + "epoch": 0.8343585844622615, + "grad_norm": 1.648200228071605, + "learning_rate": 1.404868989997784e-06, + "loss": 0.4368, + "step": 48286 + }, + { + "epoch": 0.8343758639756704, + "grad_norm": 1.47993037487016, + "learning_rate": 1.404582958906563e-06, + "loss": 0.509, + "step": 48287 + }, + { + "epoch": 0.8343931434890793, + "grad_norm": 1.1015355299257752, + "learning_rate": 1.4042969547368446e-06, + "loss": 0.2854, + "step": 48288 + }, + { + "epoch": 0.8344104230024882, + "grad_norm": 2.156083518308785, + "learning_rate": 1.4040109774895183e-06, + "loss": 0.6101, + "step": 48289 + }, + { + "epoch": 0.8344277025158972, + "grad_norm": 1.7376284196153395, + "learning_rate": 1.4037250271654834e-06, + "loss": 0.2511, + "step": 48290 + }, + { + "epoch": 0.8344449820293061, + "grad_norm": 1.5341049355228513, + "learning_rate": 1.4034391037656348e-06, + "loss": 0.2961, + "step": 48291 + }, + { + "epoch": 0.834462261542715, + "grad_norm": 1.0941178462183667, + "learning_rate": 1.4031532072908704e-06, + "loss": 0.2524, + "step": 48292 + }, + { + "epoch": 0.8344795410561239, + "grad_norm": 1.764054401525788, + "learning_rate": 1.4028673377420821e-06, + "loss": 0.3801, + "step": 48293 + }, + { + "epoch": 0.8344968205695328, + "grad_norm": 1.25043826866065, + "learning_rate": 1.4025814951201677e-06, + "loss": 0.3759, + "step": 48294 + }, + { + "epoch": 0.8345141000829417, + "grad_norm": 1.6709707615642144, + "learning_rate": 1.40229567942602e-06, + "loss": 0.4157, + "step": 48295 + }, + { + "epoch": 0.8345313795963506, + "grad_norm": 1.7255565125771095, + "learning_rate": 1.402009890660535e-06, + "loss": 0.1809, + "step": 48296 + }, + { + "epoch": 0.8345486591097595, + "grad_norm": 1.6529107061949466, + "learning_rate": 1.401724128824612e-06, + "loss": 0.4559, + "step": 48297 + }, + { + "epoch": 0.8345659386231684, + "grad_norm": 1.7994658156160164, + "learning_rate": 1.401438393919139e-06, + "loss": 0.3251, + "step": 48298 + }, + { + "epoch": 0.8345832181365773, + "grad_norm": 1.317596579961971, + "learning_rate": 1.4011526859450165e-06, + "loss": 0.2683, + "step": 48299 + }, + { + "epoch": 0.8346004976499862, + "grad_norm": 0.8973404868880281, + "learning_rate": 1.400867004903136e-06, + "loss": 0.371, + "step": 48300 + }, + { + "epoch": 0.834617777163395, + "grad_norm": 1.4298318987817755, + "learning_rate": 1.4005813507943933e-06, + "loss": 0.2933, + "step": 48301 + }, + { + "epoch": 0.8346350566768039, + "grad_norm": 1.5850207510918128, + "learning_rate": 1.4002957236196823e-06, + "loss": 0.3287, + "step": 48302 + }, + { + "epoch": 0.8346523361902128, + "grad_norm": 1.1727207098267722, + "learning_rate": 1.4000101233799025e-06, + "loss": 0.7017, + "step": 48303 + }, + { + "epoch": 0.8346696157036217, + "grad_norm": 1.070565792511517, + "learning_rate": 1.3997245500759416e-06, + "loss": 0.4194, + "step": 48304 + }, + { + "epoch": 0.8346868952170307, + "grad_norm": 1.7433292659871618, + "learning_rate": 1.3994390037086992e-06, + "loss": 0.4308, + "step": 48305 + }, + { + "epoch": 0.8347041747304396, + "grad_norm": 0.8235828448348559, + "learning_rate": 1.3991534842790644e-06, + "loss": 0.318, + "step": 48306 + }, + { + "epoch": 0.8347214542438485, + "grad_norm": 1.2634465811747229, + "learning_rate": 1.3988679917879356e-06, + "loss": 0.315, + "step": 48307 + }, + { + "epoch": 0.8347387337572574, + "grad_norm": 1.218380722240804, + "learning_rate": 1.3985825262362074e-06, + "loss": 0.4121, + "step": 48308 + }, + { + "epoch": 0.8347560132706663, + "grad_norm": 0.7469338597094637, + "learning_rate": 1.3982970876247714e-06, + "loss": 0.2034, + "step": 48309 + }, + { + "epoch": 0.8347732927840752, + "grad_norm": 1.0211238443160038, + "learning_rate": 1.3980116759545236e-06, + "loss": 0.3592, + "step": 48310 + }, + { + "epoch": 0.8347905722974841, + "grad_norm": 1.148250414234131, + "learning_rate": 1.3977262912263555e-06, + "loss": 0.3386, + "step": 48311 + }, + { + "epoch": 0.834807851810893, + "grad_norm": 0.7982524822017196, + "learning_rate": 1.397440933441162e-06, + "loss": 0.3771, + "step": 48312 + }, + { + "epoch": 0.8348251313243019, + "grad_norm": 1.5539386110584927, + "learning_rate": 1.3971556025998368e-06, + "loss": 0.5018, + "step": 48313 + }, + { + "epoch": 0.8348424108377108, + "grad_norm": 1.2304969979060092, + "learning_rate": 1.3968702987032779e-06, + "loss": 0.3039, + "step": 48314 + }, + { + "epoch": 0.8348596903511197, + "grad_norm": 0.8754743766004857, + "learning_rate": 1.3965850217523714e-06, + "loss": 0.2649, + "step": 48315 + }, + { + "epoch": 0.8348769698645286, + "grad_norm": 0.8574172272528953, + "learning_rate": 1.3962997717480165e-06, + "loss": 0.3585, + "step": 48316 + }, + { + "epoch": 0.8348942493779375, + "grad_norm": 1.3406798773230786, + "learning_rate": 1.396014548691106e-06, + "loss": 0.4021, + "step": 48317 + }, + { + "epoch": 0.8349115288913465, + "grad_norm": 1.3383385723702983, + "learning_rate": 1.3957293525825255e-06, + "loss": 0.3605, + "step": 48318 + }, + { + "epoch": 0.8349288084047554, + "grad_norm": 0.9244792807499815, + "learning_rate": 1.3954441834231803e-06, + "loss": 0.4249, + "step": 48319 + }, + { + "epoch": 0.8349460879181643, + "grad_norm": 1.0899498600074893, + "learning_rate": 1.395159041213956e-06, + "loss": 0.251, + "step": 48320 + }, + { + "epoch": 0.8349633674315732, + "grad_norm": 1.5624526305256055, + "learning_rate": 1.3948739259557487e-06, + "loss": 0.2353, + "step": 48321 + }, + { + "epoch": 0.834980646944982, + "grad_norm": 1.2783559959803557, + "learning_rate": 1.3945888376494488e-06, + "loss": 0.7541, + "step": 48322 + }, + { + "epoch": 0.8349979264583909, + "grad_norm": 2.051308774178075, + "learning_rate": 1.3943037762959521e-06, + "loss": 0.4417, + "step": 48323 + }, + { + "epoch": 0.8350152059717998, + "grad_norm": 1.087786028525034, + "learning_rate": 1.3940187418961458e-06, + "loss": 0.2717, + "step": 48324 + }, + { + "epoch": 0.8350324854852087, + "grad_norm": 0.9424867107235005, + "learning_rate": 1.3937337344509316e-06, + "loss": 0.1978, + "step": 48325 + }, + { + "epoch": 0.8350497649986176, + "grad_norm": 1.2001658559164257, + "learning_rate": 1.3934487539611963e-06, + "loss": 0.4596, + "step": 48326 + }, + { + "epoch": 0.8350670445120265, + "grad_norm": 0.6914907005402563, + "learning_rate": 1.3931638004278326e-06, + "loss": 0.6531, + "step": 48327 + }, + { + "epoch": 0.8350843240254354, + "grad_norm": 1.5408715374511137, + "learning_rate": 1.3928788738517363e-06, + "loss": 0.2568, + "step": 48328 + }, + { + "epoch": 0.8351016035388443, + "grad_norm": 2.011823674913555, + "learning_rate": 1.3925939742337946e-06, + "loss": 0.3832, + "step": 48329 + }, + { + "epoch": 0.8351188830522532, + "grad_norm": 1.4900787210168018, + "learning_rate": 1.3923091015749023e-06, + "loss": 0.4683, + "step": 48330 + }, + { + "epoch": 0.8351361625656621, + "grad_norm": 2.1260493485874767, + "learning_rate": 1.3920242558759534e-06, + "loss": 0.3384, + "step": 48331 + }, + { + "epoch": 0.835153442079071, + "grad_norm": 1.3711482972859232, + "learning_rate": 1.3917394371378402e-06, + "loss": 0.3963, + "step": 48332 + }, + { + "epoch": 0.83517072159248, + "grad_norm": 1.371492421644758, + "learning_rate": 1.3914546453614508e-06, + "loss": 0.3951, + "step": 48333 + }, + { + "epoch": 0.8351880011058889, + "grad_norm": 1.2575598435961524, + "learning_rate": 1.3911698805476815e-06, + "loss": 0.4872, + "step": 48334 + }, + { + "epoch": 0.8352052806192978, + "grad_norm": 1.2520193182577937, + "learning_rate": 1.3908851426974213e-06, + "loss": 0.343, + "step": 48335 + }, + { + "epoch": 0.8352225601327067, + "grad_norm": 1.4794441793063475, + "learning_rate": 1.3906004318115629e-06, + "loss": 0.2927, + "step": 48336 + }, + { + "epoch": 0.8352398396461156, + "grad_norm": 1.3024727816606867, + "learning_rate": 1.390315747891e-06, + "loss": 0.3112, + "step": 48337 + }, + { + "epoch": 0.8352571191595245, + "grad_norm": 0.9477976542387606, + "learning_rate": 1.39003109093662e-06, + "loss": 0.3052, + "step": 48338 + }, + { + "epoch": 0.8352743986729334, + "grad_norm": 0.894540697744618, + "learning_rate": 1.3897464609493194e-06, + "loss": 0.418, + "step": 48339 + }, + { + "epoch": 0.8352916781863423, + "grad_norm": 1.171061359571439, + "learning_rate": 1.3894618579299857e-06, + "loss": 0.5154, + "step": 48340 + }, + { + "epoch": 0.8353089576997512, + "grad_norm": 2.0063409353423935, + "learning_rate": 1.3891772818795112e-06, + "loss": 0.3006, + "step": 48341 + }, + { + "epoch": 0.8353262372131601, + "grad_norm": 1.0553126978941025, + "learning_rate": 1.3888927327987877e-06, + "loss": 0.3469, + "step": 48342 + }, + { + "epoch": 0.835343516726569, + "grad_norm": 1.6779975272528993, + "learning_rate": 1.38860821068871e-06, + "loss": 0.2499, + "step": 48343 + }, + { + "epoch": 0.8353607962399778, + "grad_norm": 1.1735918278172703, + "learning_rate": 1.3883237155501617e-06, + "loss": 0.2477, + "step": 48344 + }, + { + "epoch": 0.8353780757533867, + "grad_norm": 0.6583574200354205, + "learning_rate": 1.388039247384042e-06, + "loss": 0.9489, + "step": 48345 + }, + { + "epoch": 0.8353953552667956, + "grad_norm": 2.228659030312958, + "learning_rate": 1.3877548061912349e-06, + "loss": 0.1847, + "step": 48346 + }, + { + "epoch": 0.8354126347802046, + "grad_norm": 1.3667795593238667, + "learning_rate": 1.387470391972634e-06, + "loss": 0.642, + "step": 48347 + }, + { + "epoch": 0.8354299142936135, + "grad_norm": 2.3978911247775376, + "learning_rate": 1.387186004729133e-06, + "loss": 0.4775, + "step": 48348 + }, + { + "epoch": 0.8354471938070224, + "grad_norm": 1.1652879190026104, + "learning_rate": 1.3869016444616167e-06, + "loss": 0.4917, + "step": 48349 + }, + { + "epoch": 0.8354644733204313, + "grad_norm": 1.1822408060176595, + "learning_rate": 1.3866173111709825e-06, + "loss": 0.4357, + "step": 48350 + }, + { + "epoch": 0.8354817528338402, + "grad_norm": 1.9572264502459977, + "learning_rate": 1.3863330048581147e-06, + "loss": 0.3016, + "step": 48351 + }, + { + "epoch": 0.8354990323472491, + "grad_norm": 1.4362013367500521, + "learning_rate": 1.3860487255239074e-06, + "loss": 0.3343, + "step": 48352 + }, + { + "epoch": 0.835516311860658, + "grad_norm": 1.0482714146268182, + "learning_rate": 1.3857644731692487e-06, + "loss": 0.3526, + "step": 48353 + }, + { + "epoch": 0.8355335913740669, + "grad_norm": 1.4869562012547048, + "learning_rate": 1.3854802477950335e-06, + "loss": 0.3272, + "step": 48354 + }, + { + "epoch": 0.8355508708874758, + "grad_norm": 1.376155115540672, + "learning_rate": 1.3851960494021465e-06, + "loss": 0.3705, + "step": 48355 + }, + { + "epoch": 0.8355681504008847, + "grad_norm": 1.3111533071009973, + "learning_rate": 1.3849118779914828e-06, + "loss": 0.3677, + "step": 48356 + }, + { + "epoch": 0.8355854299142936, + "grad_norm": 1.0767961606230165, + "learning_rate": 1.384627733563929e-06, + "loss": 0.3127, + "step": 48357 + }, + { + "epoch": 0.8356027094277025, + "grad_norm": 1.7344672559305874, + "learning_rate": 1.3843436161203717e-06, + "loss": 0.3736, + "step": 48358 + }, + { + "epoch": 0.8356199889411114, + "grad_norm": 1.2406603658100486, + "learning_rate": 1.3840595256617096e-06, + "loss": 0.2207, + "step": 48359 + }, + { + "epoch": 0.8356372684545204, + "grad_norm": 1.7889076789423564, + "learning_rate": 1.3837754621888256e-06, + "loss": 0.289, + "step": 48360 + }, + { + "epoch": 0.8356545479679293, + "grad_norm": 1.2319159992052833, + "learning_rate": 1.3834914257026133e-06, + "loss": 0.2253, + "step": 48361 + }, + { + "epoch": 0.8356718274813382, + "grad_norm": 1.5546356904921512, + "learning_rate": 1.383207416203959e-06, + "loss": 0.2207, + "step": 48362 + }, + { + "epoch": 0.8356891069947471, + "grad_norm": 0.9668257719799054, + "learning_rate": 1.3829234336937559e-06, + "loss": 0.3486, + "step": 48363 + }, + { + "epoch": 0.835706386508156, + "grad_norm": 0.964325701313222, + "learning_rate": 1.382639478172887e-06, + "loss": 0.4785, + "step": 48364 + }, + { + "epoch": 0.8357236660215648, + "grad_norm": 0.8355906784796898, + "learning_rate": 1.3823555496422514e-06, + "loss": 0.4001, + "step": 48365 + }, + { + "epoch": 0.8357409455349737, + "grad_norm": 0.5170530018153878, + "learning_rate": 1.3820716481027308e-06, + "loss": 0.5282, + "step": 48366 + }, + { + "epoch": 0.8357582250483826, + "grad_norm": 0.9021848833532453, + "learning_rate": 1.3817877735552177e-06, + "loss": 0.4246, + "step": 48367 + }, + { + "epoch": 0.8357755045617915, + "grad_norm": 1.2791684711168336, + "learning_rate": 1.3815039260006013e-06, + "loss": 0.3832, + "step": 48368 + }, + { + "epoch": 0.8357927840752004, + "grad_norm": 2.2217361549798413, + "learning_rate": 1.3812201054397668e-06, + "loss": 0.5287, + "step": 48369 + }, + { + "epoch": 0.8358100635886093, + "grad_norm": 1.0261222746326446, + "learning_rate": 1.3809363118736064e-06, + "loss": 0.2148, + "step": 48370 + }, + { + "epoch": 0.8358273431020182, + "grad_norm": 1.1647170923463672, + "learning_rate": 1.3806525453030083e-06, + "loss": 0.4163, + "step": 48371 + }, + { + "epoch": 0.8358446226154271, + "grad_norm": 0.9922684421166101, + "learning_rate": 1.3803688057288634e-06, + "loss": 0.3204, + "step": 48372 + }, + { + "epoch": 0.835861902128836, + "grad_norm": 0.8200973513263063, + "learning_rate": 1.380085093152057e-06, + "loss": 0.3179, + "step": 48373 + }, + { + "epoch": 0.835879181642245, + "grad_norm": 1.3327093181953715, + "learning_rate": 1.3798014075734811e-06, + "loss": 0.3489, + "step": 48374 + }, + { + "epoch": 0.8358964611556539, + "grad_norm": 1.1837432968179418, + "learning_rate": 1.3795177489940204e-06, + "loss": 0.1706, + "step": 48375 + }, + { + "epoch": 0.8359137406690628, + "grad_norm": 1.3613224963073334, + "learning_rate": 1.379234117414564e-06, + "loss": 0.4145, + "step": 48376 + }, + { + "epoch": 0.8359310201824717, + "grad_norm": 1.7019874196334277, + "learning_rate": 1.3789505128360026e-06, + "loss": 0.3682, + "step": 48377 + }, + { + "epoch": 0.8359482996958806, + "grad_norm": 1.7345315754373132, + "learning_rate": 1.3786669352592252e-06, + "loss": 0.6116, + "step": 48378 + }, + { + "epoch": 0.8359655792092895, + "grad_norm": 1.357081702145844, + "learning_rate": 1.378383384685118e-06, + "loss": 0.4017, + "step": 48379 + }, + { + "epoch": 0.8359828587226984, + "grad_norm": 0.7161896866206302, + "learning_rate": 1.378099861114568e-06, + "loss": 0.4722, + "step": 48380 + }, + { + "epoch": 0.8360001382361073, + "grad_norm": 2.5127561962662, + "learning_rate": 1.3778163645484632e-06, + "loss": 0.2921, + "step": 48381 + }, + { + "epoch": 0.8360174177495162, + "grad_norm": 0.8873087381670745, + "learning_rate": 1.3775328949876932e-06, + "loss": 0.233, + "step": 48382 + }, + { + "epoch": 0.8360346972629251, + "grad_norm": 1.7031678049219978, + "learning_rate": 1.377249452433148e-06, + "loss": 0.3971, + "step": 48383 + }, + { + "epoch": 0.836051976776334, + "grad_norm": 1.2238708370836928, + "learning_rate": 1.3769660368857107e-06, + "loss": 0.2895, + "step": 48384 + }, + { + "epoch": 0.8360692562897429, + "grad_norm": 0.9131147975314371, + "learning_rate": 1.3766826483462726e-06, + "loss": 0.3301, + "step": 48385 + }, + { + "epoch": 0.8360865358031517, + "grad_norm": 1.7605612891935165, + "learning_rate": 1.3763992868157184e-06, + "loss": 0.2287, + "step": 48386 + }, + { + "epoch": 0.8361038153165606, + "grad_norm": 1.4264065521449003, + "learning_rate": 1.3761159522949363e-06, + "loss": 0.3944, + "step": 48387 + }, + { + "epoch": 0.8361210948299695, + "grad_norm": 1.0468859198273714, + "learning_rate": 1.375832644784817e-06, + "loss": 0.3363, + "step": 48388 + }, + { + "epoch": 0.8361383743433785, + "grad_norm": 1.5264617105533045, + "learning_rate": 1.375549364286244e-06, + "loss": 0.2885, + "step": 48389 + }, + { + "epoch": 0.8361556538567874, + "grad_norm": 1.0228506235184627, + "learning_rate": 1.3752661108001063e-06, + "loss": 0.2483, + "step": 48390 + }, + { + "epoch": 0.8361729333701963, + "grad_norm": 1.4860526868003294, + "learning_rate": 1.3749828843272894e-06, + "loss": 0.6002, + "step": 48391 + }, + { + "epoch": 0.8361902128836052, + "grad_norm": 1.813213006679287, + "learning_rate": 1.374699684868681e-06, + "loss": 0.3488, + "step": 48392 + }, + { + "epoch": 0.8362074923970141, + "grad_norm": 1.4334020092747057, + "learning_rate": 1.3744165124251696e-06, + "loss": 0.9027, + "step": 48393 + }, + { + "epoch": 0.836224771910423, + "grad_norm": 1.137303232936743, + "learning_rate": 1.374133366997643e-06, + "loss": 0.4607, + "step": 48394 + }, + { + "epoch": 0.8362420514238319, + "grad_norm": 1.5133297583212666, + "learning_rate": 1.373850248586983e-06, + "loss": 0.4483, + "step": 48395 + }, + { + "epoch": 0.8362593309372408, + "grad_norm": 1.8769133921147378, + "learning_rate": 1.3735671571940835e-06, + "loss": 0.2438, + "step": 48396 + }, + { + "epoch": 0.8362766104506497, + "grad_norm": 0.7500085440299056, + "learning_rate": 1.3732840928198243e-06, + "loss": 0.2704, + "step": 48397 + }, + { + "epoch": 0.8362938899640586, + "grad_norm": 1.3559126658201013, + "learning_rate": 1.373001055465094e-06, + "loss": 0.4579, + "step": 48398 + }, + { + "epoch": 0.8363111694774675, + "grad_norm": 0.9930526028160616, + "learning_rate": 1.372718045130783e-06, + "loss": 0.3537, + "step": 48399 + }, + { + "epoch": 0.8363284489908764, + "grad_norm": 1.095971332803317, + "learning_rate": 1.3724350618177728e-06, + "loss": 0.3837, + "step": 48400 + }, + { + "epoch": 0.8363457285042853, + "grad_norm": 1.1730299206912245, + "learning_rate": 1.3721521055269538e-06, + "loss": 0.2794, + "step": 48401 + }, + { + "epoch": 0.8363630080176943, + "grad_norm": 0.8770820693392548, + "learning_rate": 1.3718691762592085e-06, + "loss": 0.5771, + "step": 48402 + }, + { + "epoch": 0.8363802875311032, + "grad_norm": 1.219590196380718, + "learning_rate": 1.371586274015425e-06, + "loss": 0.1843, + "step": 48403 + }, + { + "epoch": 0.8363975670445121, + "grad_norm": 1.1464666610692424, + "learning_rate": 1.3713033987964864e-06, + "loss": 0.322, + "step": 48404 + }, + { + "epoch": 0.836414846557921, + "grad_norm": 1.1388018517828362, + "learning_rate": 1.3710205506032847e-06, + "loss": 0.3592, + "step": 48405 + }, + { + "epoch": 0.8364321260713299, + "grad_norm": 0.9823963317153467, + "learning_rate": 1.3707377294367008e-06, + "loss": 0.3746, + "step": 48406 + }, + { + "epoch": 0.8364494055847387, + "grad_norm": 1.6557328876373953, + "learning_rate": 1.3704549352976238e-06, + "loss": 0.4308, + "step": 48407 + }, + { + "epoch": 0.8364666850981476, + "grad_norm": 0.9937403028493218, + "learning_rate": 1.3701721681869362e-06, + "loss": 0.458, + "step": 48408 + }, + { + "epoch": 0.8364839646115565, + "grad_norm": 1.2872215905851825, + "learning_rate": 1.3698894281055276e-06, + "loss": 0.4104, + "step": 48409 + }, + { + "epoch": 0.8365012441249654, + "grad_norm": 1.0233945451306776, + "learning_rate": 1.369606715054278e-06, + "loss": 0.4052, + "step": 48410 + }, + { + "epoch": 0.8365185236383743, + "grad_norm": 0.5503887138420299, + "learning_rate": 1.369324029034076e-06, + "loss": 0.799, + "step": 48411 + }, + { + "epoch": 0.8365358031517832, + "grad_norm": 1.0247013758390575, + "learning_rate": 1.3690413700458106e-06, + "loss": 0.4041, + "step": 48412 + }, + { + "epoch": 0.8365530826651921, + "grad_norm": 1.5618046888083956, + "learning_rate": 1.36875873809036e-06, + "loss": 0.7255, + "step": 48413 + }, + { + "epoch": 0.836570362178601, + "grad_norm": 1.4078127023919473, + "learning_rate": 1.368476133168616e-06, + "loss": 0.3236, + "step": 48414 + }, + { + "epoch": 0.8365876416920099, + "grad_norm": 1.0894726864003195, + "learning_rate": 1.3681935552814586e-06, + "loss": 0.3097, + "step": 48415 + }, + { + "epoch": 0.8366049212054188, + "grad_norm": 1.1025766793441556, + "learning_rate": 1.3679110044297739e-06, + "loss": 0.3128, + "step": 48416 + }, + { + "epoch": 0.8366222007188278, + "grad_norm": 1.9881362083301952, + "learning_rate": 1.3676284806144492e-06, + "loss": 0.3679, + "step": 48417 + }, + { + "epoch": 0.8366394802322367, + "grad_norm": 1.6936284095972045, + "learning_rate": 1.3673459838363689e-06, + "loss": 0.4006, + "step": 48418 + }, + { + "epoch": 0.8366567597456456, + "grad_norm": 1.582173023347749, + "learning_rate": 1.3670635140964184e-06, + "loss": 0.483, + "step": 48419 + }, + { + "epoch": 0.8366740392590545, + "grad_norm": 1.6429017477528944, + "learning_rate": 1.3667810713954789e-06, + "loss": 0.3579, + "step": 48420 + }, + { + "epoch": 0.8366913187724634, + "grad_norm": 1.695945821204693, + "learning_rate": 1.3664986557344363e-06, + "loss": 0.3254, + "step": 48421 + }, + { + "epoch": 0.8367085982858723, + "grad_norm": 1.4131631232441293, + "learning_rate": 1.3662162671141766e-06, + "loss": 0.3714, + "step": 48422 + }, + { + "epoch": 0.8367258777992812, + "grad_norm": 2.1169523522540428, + "learning_rate": 1.3659339055355846e-06, + "loss": 0.3431, + "step": 48423 + }, + { + "epoch": 0.8367431573126901, + "grad_norm": 1.1876641814364348, + "learning_rate": 1.3656515709995433e-06, + "loss": 0.4903, + "step": 48424 + }, + { + "epoch": 0.836760436826099, + "grad_norm": 1.7960836906591269, + "learning_rate": 1.3653692635069393e-06, + "loss": 0.3252, + "step": 48425 + }, + { + "epoch": 0.8367777163395079, + "grad_norm": 2.017386534463687, + "learning_rate": 1.3650869830586521e-06, + "loss": 0.4739, + "step": 48426 + }, + { + "epoch": 0.8367949958529168, + "grad_norm": 0.9564790129397428, + "learning_rate": 1.364804729655569e-06, + "loss": 0.3385, + "step": 48427 + }, + { + "epoch": 0.8368122753663256, + "grad_norm": 1.9296568623751251, + "learning_rate": 1.3645225032985743e-06, + "loss": 0.2322, + "step": 48428 + }, + { + "epoch": 0.8368295548797345, + "grad_norm": 1.2775733553114998, + "learning_rate": 1.3642403039885533e-06, + "loss": 0.4102, + "step": 48429 + }, + { + "epoch": 0.8368468343931434, + "grad_norm": 1.0551355269138214, + "learning_rate": 1.3639581317263873e-06, + "loss": 0.2068, + "step": 48430 + }, + { + "epoch": 0.8368641139065524, + "grad_norm": 0.676523257462528, + "learning_rate": 1.363675986512959e-06, + "loss": 0.7888, + "step": 48431 + }, + { + "epoch": 0.8368813934199613, + "grad_norm": 1.3117274665921752, + "learning_rate": 1.3633938683491543e-06, + "loss": 0.28, + "step": 48432 + }, + { + "epoch": 0.8368986729333702, + "grad_norm": 1.009414035111989, + "learning_rate": 1.3631117772358559e-06, + "loss": 0.2406, + "step": 48433 + }, + { + "epoch": 0.8369159524467791, + "grad_norm": 0.6177135133084328, + "learning_rate": 1.3628297131739499e-06, + "loss": 1.0637, + "step": 48434 + }, + { + "epoch": 0.836933231960188, + "grad_norm": 1.3145785758880149, + "learning_rate": 1.3625476761643153e-06, + "loss": 0.3451, + "step": 48435 + }, + { + "epoch": 0.8369505114735969, + "grad_norm": 0.8042695487520728, + "learning_rate": 1.3622656662078403e-06, + "loss": 0.4447, + "step": 48436 + }, + { + "epoch": 0.8369677909870058, + "grad_norm": 1.5069428941204568, + "learning_rate": 1.3619836833054035e-06, + "loss": 0.2261, + "step": 48437 + }, + { + "epoch": 0.8369850705004147, + "grad_norm": 1.360920655998819, + "learning_rate": 1.3617017274578904e-06, + "loss": 0.4211, + "step": 48438 + }, + { + "epoch": 0.8370023500138236, + "grad_norm": 1.1939878039015246, + "learning_rate": 1.361419798666186e-06, + "loss": 0.3583, + "step": 48439 + }, + { + "epoch": 0.8370196295272325, + "grad_norm": 2.1170787674856935, + "learning_rate": 1.3611378969311683e-06, + "loss": 0.2703, + "step": 48440 + }, + { + "epoch": 0.8370369090406414, + "grad_norm": 0.8968600590611984, + "learning_rate": 1.3608560222537271e-06, + "loss": 0.4129, + "step": 48441 + }, + { + "epoch": 0.8370541885540503, + "grad_norm": 1.0671158226684838, + "learning_rate": 1.3605741746347378e-06, + "loss": 0.3586, + "step": 48442 + }, + { + "epoch": 0.8370714680674592, + "grad_norm": 1.1448233504878391, + "learning_rate": 1.36029235407509e-06, + "loss": 0.3502, + "step": 48443 + }, + { + "epoch": 0.8370887475808682, + "grad_norm": 1.013891303100296, + "learning_rate": 1.3600105605756574e-06, + "loss": 0.2376, + "step": 48444 + }, + { + "epoch": 0.8371060270942771, + "grad_norm": 1.3400544940208796, + "learning_rate": 1.3597287941373338e-06, + "loss": 0.2955, + "step": 48445 + }, + { + "epoch": 0.837123306607686, + "grad_norm": 1.0869675251419548, + "learning_rate": 1.359447054760994e-06, + "loss": 0.3897, + "step": 48446 + }, + { + "epoch": 0.8371405861210949, + "grad_norm": 1.3435880707526644, + "learning_rate": 1.3591653424475249e-06, + "loss": 0.3909, + "step": 48447 + }, + { + "epoch": 0.8371578656345038, + "grad_norm": 2.1675385876427926, + "learning_rate": 1.3588836571978037e-06, + "loss": 0.2783, + "step": 48448 + }, + { + "epoch": 0.8371751451479126, + "grad_norm": 1.3235601613053765, + "learning_rate": 1.3586019990127186e-06, + "loss": 0.3178, + "step": 48449 + }, + { + "epoch": 0.8371924246613215, + "grad_norm": 1.3011101167682122, + "learning_rate": 1.3583203678931468e-06, + "loss": 0.3319, + "step": 48450 + }, + { + "epoch": 0.8372097041747304, + "grad_norm": 2.3293445837022753, + "learning_rate": 1.3580387638399717e-06, + "loss": 0.398, + "step": 48451 + }, + { + "epoch": 0.8372269836881393, + "grad_norm": 1.2118373266689817, + "learning_rate": 1.3577571868540784e-06, + "loss": 0.2466, + "step": 48452 + }, + { + "epoch": 0.8372442632015482, + "grad_norm": 1.5280346125295658, + "learning_rate": 1.357475636936344e-06, + "loss": 0.3733, + "step": 48453 + }, + { + "epoch": 0.8372615427149571, + "grad_norm": 1.416247528625946, + "learning_rate": 1.3571941140876543e-06, + "loss": 0.4577, + "step": 48454 + }, + { + "epoch": 0.837278822228366, + "grad_norm": 0.8794089719246084, + "learning_rate": 1.3569126183088887e-06, + "loss": 0.6929, + "step": 48455 + }, + { + "epoch": 0.8372961017417749, + "grad_norm": 1.257019719170217, + "learning_rate": 1.3566311496009289e-06, + "loss": 0.4887, + "step": 48456 + }, + { + "epoch": 0.8373133812551838, + "grad_norm": 1.0654779334919389, + "learning_rate": 1.356349707964657e-06, + "loss": 0.2804, + "step": 48457 + }, + { + "epoch": 0.8373306607685927, + "grad_norm": 0.7492314074809306, + "learning_rate": 1.3560682934009573e-06, + "loss": 0.6342, + "step": 48458 + }, + { + "epoch": 0.8373479402820017, + "grad_norm": 1.2309335366634087, + "learning_rate": 1.3557869059107053e-06, + "loss": 0.3003, + "step": 48459 + }, + { + "epoch": 0.8373652197954106, + "grad_norm": 1.1899559006125044, + "learning_rate": 1.3555055454947897e-06, + "loss": 0.3533, + "step": 48460 + }, + { + "epoch": 0.8373824993088195, + "grad_norm": 1.0274126128384173, + "learning_rate": 1.3552242121540837e-06, + "loss": 0.3499, + "step": 48461 + }, + { + "epoch": 0.8373997788222284, + "grad_norm": 0.9789937869259271, + "learning_rate": 1.3549429058894736e-06, + "loss": 0.313, + "step": 48462 + }, + { + "epoch": 0.8374170583356373, + "grad_norm": 1.1337777400330962, + "learning_rate": 1.354661626701841e-06, + "loss": 0.1114, + "step": 48463 + }, + { + "epoch": 0.8374343378490462, + "grad_norm": 1.2635853299145605, + "learning_rate": 1.3543803745920636e-06, + "loss": 0.2755, + "step": 48464 + }, + { + "epoch": 0.8374516173624551, + "grad_norm": 1.122524698024109, + "learning_rate": 1.3540991495610256e-06, + "loss": 0.3668, + "step": 48465 + }, + { + "epoch": 0.837468896875864, + "grad_norm": 0.9327670399271631, + "learning_rate": 1.353817951609604e-06, + "loss": 0.3753, + "step": 48466 + }, + { + "epoch": 0.8374861763892729, + "grad_norm": 1.2429474258594493, + "learning_rate": 1.3535367807386812e-06, + "loss": 0.2632, + "step": 48467 + }, + { + "epoch": 0.8375034559026818, + "grad_norm": 0.9503915160978657, + "learning_rate": 1.353255636949139e-06, + "loss": 0.4179, + "step": 48468 + }, + { + "epoch": 0.8375207354160907, + "grad_norm": 1.2353906950041074, + "learning_rate": 1.3529745202418598e-06, + "loss": 0.3335, + "step": 48469 + }, + { + "epoch": 0.8375380149294995, + "grad_norm": 1.4885337053810812, + "learning_rate": 1.3526934306177209e-06, + "loss": 0.4368, + "step": 48470 + }, + { + "epoch": 0.8375552944429084, + "grad_norm": 0.877669729725633, + "learning_rate": 1.3524123680776013e-06, + "loss": 0.7273, + "step": 48471 + }, + { + "epoch": 0.8375725739563173, + "grad_norm": 1.236819484475904, + "learning_rate": 1.3521313326223827e-06, + "loss": 0.3289, + "step": 48472 + }, + { + "epoch": 0.8375898534697263, + "grad_norm": 1.30061077308599, + "learning_rate": 1.3518503242529468e-06, + "loss": 0.4047, + "step": 48473 + }, + { + "epoch": 0.8376071329831352, + "grad_norm": 1.7257680977158931, + "learning_rate": 1.3515693429701749e-06, + "loss": 0.2543, + "step": 48474 + }, + { + "epoch": 0.8376244124965441, + "grad_norm": 1.0960297909345493, + "learning_rate": 1.351288388774943e-06, + "loss": 0.2654, + "step": 48475 + }, + { + "epoch": 0.837641692009953, + "grad_norm": 2.108536887518489, + "learning_rate": 1.3510074616681357e-06, + "loss": 0.4948, + "step": 48476 + }, + { + "epoch": 0.8376589715233619, + "grad_norm": 1.4393655934179779, + "learning_rate": 1.3507265616506271e-06, + "loss": 0.2582, + "step": 48477 + }, + { + "epoch": 0.8376762510367708, + "grad_norm": 0.7687373712735491, + "learning_rate": 1.3504456887233009e-06, + "loss": 0.2192, + "step": 48478 + }, + { + "epoch": 0.8376935305501797, + "grad_norm": 1.0173419504283192, + "learning_rate": 1.3501648428870361e-06, + "loss": 0.3329, + "step": 48479 + }, + { + "epoch": 0.8377108100635886, + "grad_norm": 1.2556634970544265, + "learning_rate": 1.3498840241427147e-06, + "loss": 0.4151, + "step": 48480 + }, + { + "epoch": 0.8377280895769975, + "grad_norm": 1.6165198121829099, + "learning_rate": 1.3496032324912146e-06, + "loss": 0.3622, + "step": 48481 + }, + { + "epoch": 0.8377453690904064, + "grad_norm": 2.1694319278206065, + "learning_rate": 1.3493224679334104e-06, + "loss": 0.4125, + "step": 48482 + }, + { + "epoch": 0.8377626486038153, + "grad_norm": 0.9668854208981017, + "learning_rate": 1.3490417304701898e-06, + "loss": 0.3876, + "step": 48483 + }, + { + "epoch": 0.8377799281172242, + "grad_norm": 2.053629590219059, + "learning_rate": 1.3487610201024225e-06, + "loss": 0.3592, + "step": 48484 + }, + { + "epoch": 0.8377972076306331, + "grad_norm": 1.6143084786694333, + "learning_rate": 1.3484803368309985e-06, + "loss": 0.6085, + "step": 48485 + }, + { + "epoch": 0.837814487144042, + "grad_norm": 1.6605908013442423, + "learning_rate": 1.3481996806567876e-06, + "loss": 0.8445, + "step": 48486 + }, + { + "epoch": 0.837831766657451, + "grad_norm": 1.175960381983144, + "learning_rate": 1.3479190515806763e-06, + "loss": 0.3437, + "step": 48487 + }, + { + "epoch": 0.8378490461708599, + "grad_norm": 1.6068132576338254, + "learning_rate": 1.3476384496035377e-06, + "loss": 0.3376, + "step": 48488 + }, + { + "epoch": 0.8378663256842688, + "grad_norm": 1.1838991257918485, + "learning_rate": 1.347357874726255e-06, + "loss": 0.3702, + "step": 48489 + }, + { + "epoch": 0.8378836051976777, + "grad_norm": 1.4203103797004788, + "learning_rate": 1.347077326949704e-06, + "loss": 0.3403, + "step": 48490 + }, + { + "epoch": 0.8379008847110866, + "grad_norm": 1.6944671309963741, + "learning_rate": 1.3467968062747627e-06, + "loss": 0.3121, + "step": 48491 + }, + { + "epoch": 0.8379181642244954, + "grad_norm": 1.4812153234878724, + "learning_rate": 1.346516312702315e-06, + "loss": 0.4506, + "step": 48492 + }, + { + "epoch": 0.8379354437379043, + "grad_norm": 1.5584341741487797, + "learning_rate": 1.3462358462332326e-06, + "loss": 0.5711, + "step": 48493 + }, + { + "epoch": 0.8379527232513132, + "grad_norm": 1.5955176580901396, + "learning_rate": 1.3459554068684e-06, + "loss": 0.4752, + "step": 48494 + }, + { + "epoch": 0.8379700027647221, + "grad_norm": 0.980166022889569, + "learning_rate": 1.34567499460869e-06, + "loss": 0.416, + "step": 48495 + }, + { + "epoch": 0.837987282278131, + "grad_norm": 1.5767293239708584, + "learning_rate": 1.345394609454984e-06, + "loss": 0.2917, + "step": 48496 + }, + { + "epoch": 0.8380045617915399, + "grad_norm": 1.2453497384132795, + "learning_rate": 1.3451142514081595e-06, + "loss": 0.3918, + "step": 48497 + }, + { + "epoch": 0.8380218413049488, + "grad_norm": 1.1637816512212653, + "learning_rate": 1.3448339204690975e-06, + "loss": 0.3468, + "step": 48498 + }, + { + "epoch": 0.8380391208183577, + "grad_norm": 1.6697979453740466, + "learning_rate": 1.3445536166386708e-06, + "loss": 0.4959, + "step": 48499 + }, + { + "epoch": 0.8380564003317666, + "grad_norm": 0.9821715476624234, + "learning_rate": 1.344273339917763e-06, + "loss": 0.2806, + "step": 48500 + }, + { + "epoch": 0.8380736798451756, + "grad_norm": 0.6655264478230608, + "learning_rate": 1.3439930903072463e-06, + "loss": 0.6811, + "step": 48501 + }, + { + "epoch": 0.8380909593585845, + "grad_norm": 1.0758610699372064, + "learning_rate": 1.3437128678080014e-06, + "loss": 0.4543, + "step": 48502 + }, + { + "epoch": 0.8381082388719934, + "grad_norm": 1.1376510661385175, + "learning_rate": 1.3434326724209068e-06, + "loss": 0.3942, + "step": 48503 + }, + { + "epoch": 0.8381255183854023, + "grad_norm": 1.2535324634339133, + "learning_rate": 1.3431525041468374e-06, + "loss": 0.4019, + "step": 48504 + }, + { + "epoch": 0.8381427978988112, + "grad_norm": 1.1101860470081368, + "learning_rate": 1.3428723629866746e-06, + "loss": 0.4178, + "step": 48505 + }, + { + "epoch": 0.8381600774122201, + "grad_norm": 1.7010897935836096, + "learning_rate": 1.3425922489412913e-06, + "loss": 0.4496, + "step": 48506 + }, + { + "epoch": 0.838177356925629, + "grad_norm": 2.123572731648461, + "learning_rate": 1.3423121620115664e-06, + "loss": 0.3427, + "step": 48507 + }, + { + "epoch": 0.8381946364390379, + "grad_norm": 1.5152822460579032, + "learning_rate": 1.3420321021983785e-06, + "loss": 0.321, + "step": 48508 + }, + { + "epoch": 0.8382119159524468, + "grad_norm": 1.3107758273589134, + "learning_rate": 1.3417520695026053e-06, + "loss": 0.4532, + "step": 48509 + }, + { + "epoch": 0.8382291954658557, + "grad_norm": 1.400065967437658, + "learning_rate": 1.341472063925121e-06, + "loss": 0.4163, + "step": 48510 + }, + { + "epoch": 0.8382464749792646, + "grad_norm": 0.9859342242762681, + "learning_rate": 1.341192085466807e-06, + "loss": 0.7079, + "step": 48511 + }, + { + "epoch": 0.8382637544926735, + "grad_norm": 1.328616804094755, + "learning_rate": 1.3409121341285346e-06, + "loss": 0.3387, + "step": 48512 + }, + { + "epoch": 0.8382810340060823, + "grad_norm": 1.3804071855487734, + "learning_rate": 1.3406322099111834e-06, + "loss": 0.3114, + "step": 48513 + }, + { + "epoch": 0.8382983135194912, + "grad_norm": 1.311107515658934, + "learning_rate": 1.3403523128156325e-06, + "loss": 0.4846, + "step": 48514 + }, + { + "epoch": 0.8383155930329002, + "grad_norm": 1.0162062706940294, + "learning_rate": 1.3400724428427547e-06, + "loss": 0.3395, + "step": 48515 + }, + { + "epoch": 0.8383328725463091, + "grad_norm": 1.9988995633891562, + "learning_rate": 1.339792599993429e-06, + "loss": 0.4108, + "step": 48516 + }, + { + "epoch": 0.838350152059718, + "grad_norm": 1.7941283718270122, + "learning_rate": 1.3395127842685296e-06, + "loss": 0.7426, + "step": 48517 + }, + { + "epoch": 0.8383674315731269, + "grad_norm": 1.0850037855332644, + "learning_rate": 1.3392329956689343e-06, + "loss": 0.3195, + "step": 48518 + }, + { + "epoch": 0.8383847110865358, + "grad_norm": 1.3101998965156252, + "learning_rate": 1.3389532341955202e-06, + "loss": 0.2692, + "step": 48519 + }, + { + "epoch": 0.8384019905999447, + "grad_norm": 1.0495639779117112, + "learning_rate": 1.3386734998491635e-06, + "loss": 0.337, + "step": 48520 + }, + { + "epoch": 0.8384192701133536, + "grad_norm": 1.2122204633353935, + "learning_rate": 1.338393792630739e-06, + "loss": 0.281, + "step": 48521 + }, + { + "epoch": 0.8384365496267625, + "grad_norm": 2.0510756716366205, + "learning_rate": 1.3381141125411246e-06, + "loss": 0.2999, + "step": 48522 + }, + { + "epoch": 0.8384538291401714, + "grad_norm": 1.566838925309758, + "learning_rate": 1.3378344595811953e-06, + "loss": 0.1367, + "step": 48523 + }, + { + "epoch": 0.8384711086535803, + "grad_norm": 1.2112434706611144, + "learning_rate": 1.3375548337518219e-06, + "loss": 0.4206, + "step": 48524 + }, + { + "epoch": 0.8384883881669892, + "grad_norm": 1.1414750930492024, + "learning_rate": 1.3372752350538899e-06, + "loss": 0.2623, + "step": 48525 + }, + { + "epoch": 0.8385056676803981, + "grad_norm": 1.7990171860570228, + "learning_rate": 1.3369956634882687e-06, + "loss": 0.3559, + "step": 48526 + }, + { + "epoch": 0.838522947193807, + "grad_norm": 1.9693116445304664, + "learning_rate": 1.3367161190558364e-06, + "loss": 0.401, + "step": 48527 + }, + { + "epoch": 0.838540226707216, + "grad_norm": 1.3558798793732934, + "learning_rate": 1.336436601757466e-06, + "loss": 0.3668, + "step": 48528 + }, + { + "epoch": 0.8385575062206249, + "grad_norm": 2.0091986560076633, + "learning_rate": 1.3361571115940365e-06, + "loss": 0.2317, + "step": 48529 + }, + { + "epoch": 0.8385747857340338, + "grad_norm": 1.248086204927544, + "learning_rate": 1.3358776485664194e-06, + "loss": 0.258, + "step": 48530 + }, + { + "epoch": 0.8385920652474427, + "grad_norm": 2.2522574386952425, + "learning_rate": 1.3355982126754919e-06, + "loss": 0.2977, + "step": 48531 + }, + { + "epoch": 0.8386093447608516, + "grad_norm": 1.639373905390485, + "learning_rate": 1.335318803922132e-06, + "loss": 0.31, + "step": 48532 + }, + { + "epoch": 0.8386266242742605, + "grad_norm": 1.3598673908776795, + "learning_rate": 1.3350394223072093e-06, + "loss": 0.3219, + "step": 48533 + }, + { + "epoch": 0.8386439037876693, + "grad_norm": 2.3535126414974026, + "learning_rate": 1.3347600678316042e-06, + "loss": 0.2772, + "step": 48534 + }, + { + "epoch": 0.8386611833010782, + "grad_norm": 1.2370955333465599, + "learning_rate": 1.3344807404961867e-06, + "loss": 0.4021, + "step": 48535 + }, + { + "epoch": 0.8386784628144871, + "grad_norm": 1.1110537103810503, + "learning_rate": 1.3342014403018332e-06, + "loss": 0.4506, + "step": 48536 + }, + { + "epoch": 0.838695742327896, + "grad_norm": 1.517793318460976, + "learning_rate": 1.3339221672494206e-06, + "loss": 0.2238, + "step": 48537 + }, + { + "epoch": 0.8387130218413049, + "grad_norm": 1.3289497989641388, + "learning_rate": 1.333642921339825e-06, + "loss": 0.1618, + "step": 48538 + }, + { + "epoch": 0.8387303013547138, + "grad_norm": 0.9309906744581692, + "learning_rate": 1.3333637025739144e-06, + "loss": 0.2341, + "step": 48539 + }, + { + "epoch": 0.8387475808681227, + "grad_norm": 1.1561858607565316, + "learning_rate": 1.3330845109525715e-06, + "loss": 0.394, + "step": 48540 + }, + { + "epoch": 0.8387648603815316, + "grad_norm": 1.2571915000291753, + "learning_rate": 1.3328053464766632e-06, + "loss": 0.2566, + "step": 48541 + }, + { + "epoch": 0.8387821398949405, + "grad_norm": 0.9919666506057693, + "learning_rate": 1.332526209147067e-06, + "loss": 0.2975, + "step": 48542 + }, + { + "epoch": 0.8387994194083495, + "grad_norm": 1.552195582586173, + "learning_rate": 1.3322470989646597e-06, + "loss": 0.4322, + "step": 48543 + }, + { + "epoch": 0.8388166989217584, + "grad_norm": 1.2309432862716099, + "learning_rate": 1.3319680159303116e-06, + "loss": 0.405, + "step": 48544 + }, + { + "epoch": 0.8388339784351673, + "grad_norm": 1.6504030574746982, + "learning_rate": 1.3316889600449e-06, + "loss": 0.2857, + "step": 48545 + }, + { + "epoch": 0.8388512579485762, + "grad_norm": 1.3682622221829863, + "learning_rate": 1.3314099313092953e-06, + "loss": 0.3284, + "step": 48546 + }, + { + "epoch": 0.8388685374619851, + "grad_norm": 1.3173583993443045, + "learning_rate": 1.3311309297243747e-06, + "loss": 0.2771, + "step": 48547 + }, + { + "epoch": 0.838885816975394, + "grad_norm": 1.7804903340992058, + "learning_rate": 1.3308519552910093e-06, + "loss": 0.3848, + "step": 48548 + }, + { + "epoch": 0.8389030964888029, + "grad_norm": 1.5585377745019584, + "learning_rate": 1.3305730080100775e-06, + "loss": 0.4417, + "step": 48549 + }, + { + "epoch": 0.8389203760022118, + "grad_norm": 1.1101153576457983, + "learning_rate": 1.3302940878824466e-06, + "loss": 0.2765, + "step": 48550 + }, + { + "epoch": 0.8389376555156207, + "grad_norm": 0.8760216692367121, + "learning_rate": 1.3300151949089968e-06, + "loss": 0.3544, + "step": 48551 + }, + { + "epoch": 0.8389549350290296, + "grad_norm": 1.448986431314701, + "learning_rate": 1.3297363290905962e-06, + "loss": 0.2334, + "step": 48552 + }, + { + "epoch": 0.8389722145424385, + "grad_norm": 1.3756906526188748, + "learning_rate": 1.32945749042812e-06, + "loss": 0.3272, + "step": 48553 + }, + { + "epoch": 0.8389894940558474, + "grad_norm": 1.0021191669694027, + "learning_rate": 1.3291786789224437e-06, + "loss": 0.3239, + "step": 48554 + }, + { + "epoch": 0.8390067735692562, + "grad_norm": 1.306500168450336, + "learning_rate": 1.328899894574437e-06, + "loss": 0.5008, + "step": 48555 + }, + { + "epoch": 0.8390240530826651, + "grad_norm": 2.9923990484950465, + "learning_rate": 1.3286211373849767e-06, + "loss": 0.4404, + "step": 48556 + }, + { + "epoch": 0.839041332596074, + "grad_norm": 2.1622309488189013, + "learning_rate": 1.3283424073549312e-06, + "loss": 0.9343, + "step": 48557 + }, + { + "epoch": 0.839058612109483, + "grad_norm": 1.3514001163783882, + "learning_rate": 1.3280637044851774e-06, + "loss": 0.2267, + "step": 48558 + }, + { + "epoch": 0.8390758916228919, + "grad_norm": 1.137954873833654, + "learning_rate": 1.327785028776587e-06, + "loss": 0.4593, + "step": 48559 + }, + { + "epoch": 0.8390931711363008, + "grad_norm": 1.1114405973756853, + "learning_rate": 1.3275063802300347e-06, + "loss": 0.3821, + "step": 48560 + }, + { + "epoch": 0.8391104506497097, + "grad_norm": 0.927227817260883, + "learning_rate": 1.3272277588463889e-06, + "loss": 0.3811, + "step": 48561 + }, + { + "epoch": 0.8391277301631186, + "grad_norm": 1.245928282343072, + "learning_rate": 1.3269491646265276e-06, + "loss": 0.4512, + "step": 48562 + }, + { + "epoch": 0.8391450096765275, + "grad_norm": 1.064308155907393, + "learning_rate": 1.3266705975713213e-06, + "loss": 0.2362, + "step": 48563 + }, + { + "epoch": 0.8391622891899364, + "grad_norm": 1.6590157118760984, + "learning_rate": 1.3263920576816369e-06, + "loss": 0.2604, + "step": 48564 + }, + { + "epoch": 0.8391795687033453, + "grad_norm": 1.4689802786084125, + "learning_rate": 1.326113544958355e-06, + "loss": 0.2898, + "step": 48565 + }, + { + "epoch": 0.8391968482167542, + "grad_norm": 1.4761409163590231, + "learning_rate": 1.325835059402344e-06, + "loss": 0.5682, + "step": 48566 + }, + { + "epoch": 0.8392141277301631, + "grad_norm": 0.8167037216788603, + "learning_rate": 1.3255566010144782e-06, + "loss": 0.8911, + "step": 48567 + }, + { + "epoch": 0.839231407243572, + "grad_norm": 1.1792617350442327, + "learning_rate": 1.3252781697956273e-06, + "loss": 0.3965, + "step": 48568 + }, + { + "epoch": 0.8392486867569809, + "grad_norm": 1.7401486446235652, + "learning_rate": 1.3249997657466652e-06, + "loss": 0.2207, + "step": 48569 + }, + { + "epoch": 0.8392659662703899, + "grad_norm": 1.6057947405874913, + "learning_rate": 1.324721388868462e-06, + "loss": 0.3444, + "step": 48570 + }, + { + "epoch": 0.8392832457837988, + "grad_norm": 1.0569366605280643, + "learning_rate": 1.3244430391618902e-06, + "loss": 0.2717, + "step": 48571 + }, + { + "epoch": 0.8393005252972077, + "grad_norm": 1.6598326725977874, + "learning_rate": 1.3241647166278215e-06, + "loss": 0.3839, + "step": 48572 + }, + { + "epoch": 0.8393178048106166, + "grad_norm": 2.004579524638101, + "learning_rate": 1.323886421267131e-06, + "loss": 0.3201, + "step": 48573 + }, + { + "epoch": 0.8393350843240255, + "grad_norm": 1.7219288548784284, + "learning_rate": 1.3236081530806877e-06, + "loss": 0.2511, + "step": 48574 + }, + { + "epoch": 0.8393523638374344, + "grad_norm": 1.1037205093847364, + "learning_rate": 1.323329912069361e-06, + "loss": 0.3334, + "step": 48575 + }, + { + "epoch": 0.8393696433508432, + "grad_norm": 1.8030559830263166, + "learning_rate": 1.3230516982340246e-06, + "loss": 0.4085, + "step": 48576 + }, + { + "epoch": 0.8393869228642521, + "grad_norm": 1.1300752516953658, + "learning_rate": 1.3227735115755492e-06, + "loss": 0.2396, + "step": 48577 + }, + { + "epoch": 0.839404202377661, + "grad_norm": 1.1659052144243933, + "learning_rate": 1.3224953520948092e-06, + "loss": 0.4088, + "step": 48578 + }, + { + "epoch": 0.8394214818910699, + "grad_norm": 1.3815086151690816, + "learning_rate": 1.322217219792671e-06, + "loss": 0.2722, + "step": 48579 + }, + { + "epoch": 0.8394387614044788, + "grad_norm": 1.7328162099303765, + "learning_rate": 1.32193911467001e-06, + "loss": 0.4017, + "step": 48580 + }, + { + "epoch": 0.8394560409178877, + "grad_norm": 1.818122640767929, + "learning_rate": 1.3216610367276927e-06, + "loss": 0.2338, + "step": 48581 + }, + { + "epoch": 0.8394733204312966, + "grad_norm": 0.8873118808017005, + "learning_rate": 1.3213829859665927e-06, + "loss": 0.1984, + "step": 48582 + }, + { + "epoch": 0.8394905999447055, + "grad_norm": 0.8193156665699272, + "learning_rate": 1.3211049623875804e-06, + "loss": 0.1769, + "step": 48583 + }, + { + "epoch": 0.8395078794581144, + "grad_norm": 1.266134918669728, + "learning_rate": 1.3208269659915296e-06, + "loss": 0.4303, + "step": 48584 + }, + { + "epoch": 0.8395251589715234, + "grad_norm": 0.9266597897469232, + "learning_rate": 1.3205489967793084e-06, + "loss": 0.2363, + "step": 48585 + }, + { + "epoch": 0.8395424384849323, + "grad_norm": 2.8781052777408505, + "learning_rate": 1.320271054751785e-06, + "loss": 0.3635, + "step": 48586 + }, + { + "epoch": 0.8395597179983412, + "grad_norm": 1.314241354539643, + "learning_rate": 1.319993139909833e-06, + "loss": 0.4208, + "step": 48587 + }, + { + "epoch": 0.8395769975117501, + "grad_norm": 1.812226784443662, + "learning_rate": 1.3197152522543211e-06, + "loss": 0.2917, + "step": 48588 + }, + { + "epoch": 0.839594277025159, + "grad_norm": 1.0253908738423632, + "learning_rate": 1.3194373917861237e-06, + "loss": 0.6885, + "step": 48589 + }, + { + "epoch": 0.8396115565385679, + "grad_norm": 0.8799039865469763, + "learning_rate": 1.3191595585061056e-06, + "loss": 0.3014, + "step": 48590 + }, + { + "epoch": 0.8396288360519768, + "grad_norm": 2.12269710203171, + "learning_rate": 1.3188817524151409e-06, + "loss": 0.3224, + "step": 48591 + }, + { + "epoch": 0.8396461155653857, + "grad_norm": 1.0456391533098186, + "learning_rate": 1.3186039735140966e-06, + "loss": 0.4204, + "step": 48592 + }, + { + "epoch": 0.8396633950787946, + "grad_norm": 3.025766423379153, + "learning_rate": 1.3183262218038451e-06, + "loss": 0.3672, + "step": 48593 + }, + { + "epoch": 0.8396806745922035, + "grad_norm": 1.2177900097328327, + "learning_rate": 1.318048497285257e-06, + "loss": 0.3186, + "step": 48594 + }, + { + "epoch": 0.8396979541056124, + "grad_norm": 1.0070162109217797, + "learning_rate": 1.3177707999591993e-06, + "loss": 0.2859, + "step": 48595 + }, + { + "epoch": 0.8397152336190213, + "grad_norm": 2.0134412052254587, + "learning_rate": 1.3174931298265448e-06, + "loss": 0.2117, + "step": 48596 + }, + { + "epoch": 0.8397325131324301, + "grad_norm": 1.5893291922435928, + "learning_rate": 1.3172154868881603e-06, + "loss": 0.2945, + "step": 48597 + }, + { + "epoch": 0.839749792645839, + "grad_norm": 1.655430356899489, + "learning_rate": 1.3169378711449166e-06, + "loss": 0.3756, + "step": 48598 + }, + { + "epoch": 0.839767072159248, + "grad_norm": 0.9964407802670909, + "learning_rate": 1.3166602825976827e-06, + "loss": 0.4533, + "step": 48599 + }, + { + "epoch": 0.8397843516726569, + "grad_norm": 2.611476676620187, + "learning_rate": 1.3163827212473312e-06, + "loss": 0.2419, + "step": 48600 + }, + { + "epoch": 0.8398016311860658, + "grad_norm": 1.397936051747236, + "learning_rate": 1.316105187094726e-06, + "loss": 0.3534, + "step": 48601 + }, + { + "epoch": 0.8398189106994747, + "grad_norm": 1.4912541894594118, + "learning_rate": 1.3158276801407432e-06, + "loss": 0.378, + "step": 48602 + }, + { + "epoch": 0.8398361902128836, + "grad_norm": 1.2005495862826439, + "learning_rate": 1.3155502003862442e-06, + "loss": 0.475, + "step": 48603 + }, + { + "epoch": 0.8398534697262925, + "grad_norm": 1.5676626759776646, + "learning_rate": 1.3152727478321036e-06, + "loss": 0.2328, + "step": 48604 + }, + { + "epoch": 0.8398707492397014, + "grad_norm": 1.3739665690450091, + "learning_rate": 1.3149953224791901e-06, + "loss": 0.4433, + "step": 48605 + }, + { + "epoch": 0.8398880287531103, + "grad_norm": 1.5773661261166239, + "learning_rate": 1.3147179243283692e-06, + "loss": 0.3374, + "step": 48606 + }, + { + "epoch": 0.8399053082665192, + "grad_norm": 1.6141185855684599, + "learning_rate": 1.3144405533805138e-06, + "loss": 0.4749, + "step": 48607 + }, + { + "epoch": 0.8399225877799281, + "grad_norm": 1.2352663312533785, + "learning_rate": 1.3141632096364886e-06, + "loss": 0.3683, + "step": 48608 + }, + { + "epoch": 0.839939867293337, + "grad_norm": 0.804647322970864, + "learning_rate": 1.3138858930971665e-06, + "loss": 0.2751, + "step": 48609 + }, + { + "epoch": 0.8399571468067459, + "grad_norm": 2.2061385230494985, + "learning_rate": 1.313608603763411e-06, + "loss": 0.2589, + "step": 48610 + }, + { + "epoch": 0.8399744263201548, + "grad_norm": 1.0739414301707295, + "learning_rate": 1.313331341636095e-06, + "loss": 0.3332, + "step": 48611 + }, + { + "epoch": 0.8399917058335638, + "grad_norm": 1.1743044032926802, + "learning_rate": 1.313054106716084e-06, + "loss": 0.514, + "step": 48612 + }, + { + "epoch": 0.8400089853469727, + "grad_norm": 3.0066949236178715, + "learning_rate": 1.3127768990042512e-06, + "loss": 0.2767, + "step": 48613 + }, + { + "epoch": 0.8400262648603816, + "grad_norm": 1.6741706067134128, + "learning_rate": 1.3124997185014598e-06, + "loss": 0.6357, + "step": 48614 + }, + { + "epoch": 0.8400435443737905, + "grad_norm": 1.0817023016317304, + "learning_rate": 1.3122225652085784e-06, + "loss": 0.3802, + "step": 48615 + }, + { + "epoch": 0.8400608238871994, + "grad_norm": 1.4037656124763296, + "learning_rate": 1.3119454391264763e-06, + "loss": 0.3368, + "step": 48616 + }, + { + "epoch": 0.8400781034006083, + "grad_norm": 1.227937569009758, + "learning_rate": 1.3116683402560204e-06, + "loss": 0.2472, + "step": 48617 + }, + { + "epoch": 0.8400953829140171, + "grad_norm": 1.731554354506802, + "learning_rate": 1.3113912685980823e-06, + "loss": 0.1907, + "step": 48618 + }, + { + "epoch": 0.840112662427426, + "grad_norm": 1.4050173488607085, + "learning_rate": 1.3111142241535257e-06, + "loss": 0.6034, + "step": 48619 + }, + { + "epoch": 0.8401299419408349, + "grad_norm": 1.3925125325137873, + "learning_rate": 1.310837206923221e-06, + "loss": 0.3423, + "step": 48620 + }, + { + "epoch": 0.8401472214542438, + "grad_norm": 1.2494094037843295, + "learning_rate": 1.3105602169080323e-06, + "loss": 0.2744, + "step": 48621 + }, + { + "epoch": 0.8401645009676527, + "grad_norm": 0.9779102908907088, + "learning_rate": 1.3102832541088295e-06, + "loss": 0.6593, + "step": 48622 + }, + { + "epoch": 0.8401817804810616, + "grad_norm": 1.4948249975177748, + "learning_rate": 1.3100063185264799e-06, + "loss": 0.3379, + "step": 48623 + }, + { + "epoch": 0.8401990599944705, + "grad_norm": 1.2679980151676522, + "learning_rate": 1.3097294101618528e-06, + "loss": 0.2738, + "step": 48624 + }, + { + "epoch": 0.8402163395078794, + "grad_norm": 1.481925386692312, + "learning_rate": 1.3094525290158145e-06, + "loss": 0.3987, + "step": 48625 + }, + { + "epoch": 0.8402336190212883, + "grad_norm": 1.181023595667082, + "learning_rate": 1.3091756750892282e-06, + "loss": 0.3856, + "step": 48626 + }, + { + "epoch": 0.8402508985346973, + "grad_norm": 1.3484406888633917, + "learning_rate": 1.3088988483829645e-06, + "loss": 0.3896, + "step": 48627 + }, + { + "epoch": 0.8402681780481062, + "grad_norm": 1.316329352718383, + "learning_rate": 1.3086220488978907e-06, + "loss": 0.3125, + "step": 48628 + }, + { + "epoch": 0.8402854575615151, + "grad_norm": 1.2067047407345017, + "learning_rate": 1.3083452766348749e-06, + "loss": 0.3781, + "step": 48629 + }, + { + "epoch": 0.840302737074924, + "grad_norm": 2.108737509817992, + "learning_rate": 1.3080685315947794e-06, + "loss": 0.3248, + "step": 48630 + }, + { + "epoch": 0.8403200165883329, + "grad_norm": 1.5411102313749616, + "learning_rate": 1.3077918137784773e-06, + "loss": 0.4042, + "step": 48631 + }, + { + "epoch": 0.8403372961017418, + "grad_norm": 1.2293319080472593, + "learning_rate": 1.3075151231868289e-06, + "loss": 0.3521, + "step": 48632 + }, + { + "epoch": 0.8403545756151507, + "grad_norm": 1.1517820674015342, + "learning_rate": 1.3072384598207034e-06, + "loss": 0.3155, + "step": 48633 + }, + { + "epoch": 0.8403718551285596, + "grad_norm": 1.5351977502961336, + "learning_rate": 1.3069618236809677e-06, + "loss": 0.2373, + "step": 48634 + }, + { + "epoch": 0.8403891346419685, + "grad_norm": 0.8095403346044362, + "learning_rate": 1.3066852147684917e-06, + "loss": 0.3079, + "step": 48635 + }, + { + "epoch": 0.8404064141553774, + "grad_norm": 1.0659194314213534, + "learning_rate": 1.3064086330841375e-06, + "loss": 0.4509, + "step": 48636 + }, + { + "epoch": 0.8404236936687863, + "grad_norm": 1.6349338160355906, + "learning_rate": 1.306132078628769e-06, + "loss": 0.2902, + "step": 48637 + }, + { + "epoch": 0.8404409731821952, + "grad_norm": 2.082131097992977, + "learning_rate": 1.3058555514032568e-06, + "loss": 0.3967, + "step": 48638 + }, + { + "epoch": 0.8404582526956041, + "grad_norm": 1.273404207645317, + "learning_rate": 1.3055790514084655e-06, + "loss": 0.2963, + "step": 48639 + }, + { + "epoch": 0.8404755322090129, + "grad_norm": 1.2576414468415718, + "learning_rate": 1.3053025786452634e-06, + "loss": 0.4263, + "step": 48640 + }, + { + "epoch": 0.8404928117224219, + "grad_norm": 1.3354115417147132, + "learning_rate": 1.305026133114512e-06, + "loss": 0.3082, + "step": 48641 + }, + { + "epoch": 0.8405100912358308, + "grad_norm": 1.5809421515439497, + "learning_rate": 1.3047497148170818e-06, + "loss": 0.4028, + "step": 48642 + }, + { + "epoch": 0.8405273707492397, + "grad_norm": 1.1123114550442188, + "learning_rate": 1.3044733237538342e-06, + "loss": 0.2996, + "step": 48643 + }, + { + "epoch": 0.8405446502626486, + "grad_norm": 1.2548492109170004, + "learning_rate": 1.3041969599256387e-06, + "loss": 0.2696, + "step": 48644 + }, + { + "epoch": 0.8405619297760575, + "grad_norm": 1.0993669245988023, + "learning_rate": 1.303920623333358e-06, + "loss": 0.3743, + "step": 48645 + }, + { + "epoch": 0.8405792092894664, + "grad_norm": 1.6988676530243951, + "learning_rate": 1.3036443139778576e-06, + "loss": 0.5374, + "step": 48646 + }, + { + "epoch": 0.8405964888028753, + "grad_norm": 1.4585726057005652, + "learning_rate": 1.3033680318600072e-06, + "loss": 0.4303, + "step": 48647 + }, + { + "epoch": 0.8406137683162842, + "grad_norm": 1.7684628115292627, + "learning_rate": 1.303091776980666e-06, + "loss": 0.5536, + "step": 48648 + }, + { + "epoch": 0.8406310478296931, + "grad_norm": 1.4724773777902382, + "learning_rate": 1.3028155493407057e-06, + "loss": 0.3555, + "step": 48649 + }, + { + "epoch": 0.840648327343102, + "grad_norm": 1.323445162035031, + "learning_rate": 1.3025393489409855e-06, + "loss": 0.3316, + "step": 48650 + }, + { + "epoch": 0.8406656068565109, + "grad_norm": 1.288762411991409, + "learning_rate": 1.3022631757823723e-06, + "loss": 0.2836, + "step": 48651 + }, + { + "epoch": 0.8406828863699198, + "grad_norm": 0.9940678012666142, + "learning_rate": 1.3019870298657323e-06, + "loss": 0.4539, + "step": 48652 + }, + { + "epoch": 0.8407001658833287, + "grad_norm": 0.547043378157041, + "learning_rate": 1.3017109111919324e-06, + "loss": 0.4981, + "step": 48653 + }, + { + "epoch": 0.8407174453967377, + "grad_norm": 1.6018486775903054, + "learning_rate": 1.3014348197618332e-06, + "loss": 0.2401, + "step": 48654 + }, + { + "epoch": 0.8407347249101466, + "grad_norm": 1.3211998484406922, + "learning_rate": 1.3011587555763017e-06, + "loss": 0.3841, + "step": 48655 + }, + { + "epoch": 0.8407520044235555, + "grad_norm": 2.5701439647495863, + "learning_rate": 1.3008827186362017e-06, + "loss": 0.3098, + "step": 48656 + }, + { + "epoch": 0.8407692839369644, + "grad_norm": 1.1377593161862471, + "learning_rate": 1.3006067089423969e-06, + "loss": 0.2318, + "step": 48657 + }, + { + "epoch": 0.8407865634503733, + "grad_norm": 1.2335661903587747, + "learning_rate": 1.3003307264957554e-06, + "loss": 0.4439, + "step": 48658 + }, + { + "epoch": 0.8408038429637822, + "grad_norm": 1.890744523566607, + "learning_rate": 1.3000547712971379e-06, + "loss": 0.3371, + "step": 48659 + }, + { + "epoch": 0.8408211224771911, + "grad_norm": 1.68797142669638, + "learning_rate": 1.2997788433474112e-06, + "loss": 0.2821, + "step": 48660 + }, + { + "epoch": 0.8408384019905999, + "grad_norm": 1.1427556094035982, + "learning_rate": 1.2995029426474371e-06, + "loss": 0.3503, + "step": 48661 + }, + { + "epoch": 0.8408556815040088, + "grad_norm": 1.16527161951872, + "learning_rate": 1.2992270691980802e-06, + "loss": 0.882, + "step": 48662 + }, + { + "epoch": 0.8408729610174177, + "grad_norm": 1.7477863958413793, + "learning_rate": 1.2989512230002056e-06, + "loss": 0.2979, + "step": 48663 + }, + { + "epoch": 0.8408902405308266, + "grad_norm": 1.0833340858179676, + "learning_rate": 1.298675404054679e-06, + "loss": 0.309, + "step": 48664 + }, + { + "epoch": 0.8409075200442355, + "grad_norm": 2.20072782867649, + "learning_rate": 1.298399612362361e-06, + "loss": 0.4392, + "step": 48665 + }, + { + "epoch": 0.8409247995576444, + "grad_norm": 1.6455166728699084, + "learning_rate": 1.2981238479241175e-06, + "loss": 0.4402, + "step": 48666 + }, + { + "epoch": 0.8409420790710533, + "grad_norm": 1.452153429785461, + "learning_rate": 1.2978481107408103e-06, + "loss": 0.3743, + "step": 48667 + }, + { + "epoch": 0.8409593585844622, + "grad_norm": 1.7758569476786739, + "learning_rate": 1.297572400813304e-06, + "loss": 0.324, + "step": 48668 + }, + { + "epoch": 0.8409766380978712, + "grad_norm": 1.400872444779892, + "learning_rate": 1.2972967181424646e-06, + "loss": 0.4239, + "step": 48669 + }, + { + "epoch": 0.8409939176112801, + "grad_norm": 1.9364852350555173, + "learning_rate": 1.29702106272915e-06, + "loss": 0.2044, + "step": 48670 + }, + { + "epoch": 0.841011197124689, + "grad_norm": 1.5410233155582755, + "learning_rate": 1.2967454345742304e-06, + "loss": 0.2386, + "step": 48671 + }, + { + "epoch": 0.8410284766380979, + "grad_norm": 1.19166354682515, + "learning_rate": 1.2964698336785631e-06, + "loss": 0.3935, + "step": 48672 + }, + { + "epoch": 0.8410457561515068, + "grad_norm": 2.5123345043068626, + "learning_rate": 1.2961942600430133e-06, + "loss": 0.2275, + "step": 48673 + }, + { + "epoch": 0.8410630356649157, + "grad_norm": 1.1516719172265064, + "learning_rate": 1.2959187136684447e-06, + "loss": 0.363, + "step": 48674 + }, + { + "epoch": 0.8410803151783246, + "grad_norm": 0.9154340551162582, + "learning_rate": 1.2956431945557223e-06, + "loss": 0.3352, + "step": 48675 + }, + { + "epoch": 0.8410975946917335, + "grad_norm": 1.3739409385395605, + "learning_rate": 1.2953677027057066e-06, + "loss": 0.3329, + "step": 48676 + }, + { + "epoch": 0.8411148742051424, + "grad_norm": 0.7230947625718913, + "learning_rate": 1.2950922381192587e-06, + "loss": 0.2256, + "step": 48677 + }, + { + "epoch": 0.8411321537185513, + "grad_norm": 1.4370040230338488, + "learning_rate": 1.294816800797244e-06, + "loss": 0.3342, + "step": 48678 + }, + { + "epoch": 0.8411494332319602, + "grad_norm": 0.6980382042509571, + "learning_rate": 1.2945413907405245e-06, + "loss": 0.6933, + "step": 48679 + }, + { + "epoch": 0.8411667127453691, + "grad_norm": 1.1408863299355512, + "learning_rate": 1.2942660079499647e-06, + "loss": 0.2873, + "step": 48680 + }, + { + "epoch": 0.841183992258778, + "grad_norm": 1.0722920770091242, + "learning_rate": 1.2939906524264235e-06, + "loss": 0.2926, + "step": 48681 + }, + { + "epoch": 0.8412012717721868, + "grad_norm": 1.3018148475947224, + "learning_rate": 1.2937153241707679e-06, + "loss": 0.2325, + "step": 48682 + }, + { + "epoch": 0.8412185512855958, + "grad_norm": 0.975486642278733, + "learning_rate": 1.293440023183854e-06, + "loss": 0.1879, + "step": 48683 + }, + { + "epoch": 0.8412358307990047, + "grad_norm": 0.7653611799556094, + "learning_rate": 1.2931647494665511e-06, + "loss": 0.6507, + "step": 48684 + }, + { + "epoch": 0.8412531103124136, + "grad_norm": 1.8419272183949542, + "learning_rate": 1.292889503019713e-06, + "loss": 0.5051, + "step": 48685 + }, + { + "epoch": 0.8412703898258225, + "grad_norm": 1.0833710243311778, + "learning_rate": 1.2926142838442112e-06, + "loss": 0.2672, + "step": 48686 + }, + { + "epoch": 0.8412876693392314, + "grad_norm": 1.2094523439978146, + "learning_rate": 1.292339091940903e-06, + "loss": 0.3509, + "step": 48687 + }, + { + "epoch": 0.8413049488526403, + "grad_norm": 1.2182799214312199, + "learning_rate": 1.2920639273106483e-06, + "loss": 0.3454, + "step": 48688 + }, + { + "epoch": 0.8413222283660492, + "grad_norm": 0.6812647072656983, + "learning_rate": 1.2917887899543136e-06, + "loss": 0.64, + "step": 48689 + }, + { + "epoch": 0.8413395078794581, + "grad_norm": 0.8298713338562709, + "learning_rate": 1.2915136798727557e-06, + "loss": 0.2308, + "step": 48690 + }, + { + "epoch": 0.841356787392867, + "grad_norm": 2.175181735476982, + "learning_rate": 1.2912385970668395e-06, + "loss": 0.3485, + "step": 48691 + }, + { + "epoch": 0.8413740669062759, + "grad_norm": 1.3666270649430488, + "learning_rate": 1.2909635415374255e-06, + "loss": 0.4005, + "step": 48692 + }, + { + "epoch": 0.8413913464196848, + "grad_norm": 1.159349197828398, + "learning_rate": 1.2906885132853774e-06, + "loss": 0.3451, + "step": 48693 + }, + { + "epoch": 0.8414086259330937, + "grad_norm": 1.9616172602623108, + "learning_rate": 1.2904135123115523e-06, + "loss": 0.4512, + "step": 48694 + }, + { + "epoch": 0.8414259054465026, + "grad_norm": 1.125387363554673, + "learning_rate": 1.2901385386168174e-06, + "loss": 0.5396, + "step": 48695 + }, + { + "epoch": 0.8414431849599115, + "grad_norm": 1.735866469846181, + "learning_rate": 1.2898635922020275e-06, + "loss": 0.3788, + "step": 48696 + }, + { + "epoch": 0.8414604644733205, + "grad_norm": 1.3304703089999068, + "learning_rate": 1.2895886730680462e-06, + "loss": 0.3765, + "step": 48697 + }, + { + "epoch": 0.8414777439867294, + "grad_norm": 0.7301993620819391, + "learning_rate": 1.2893137812157387e-06, + "loss": 0.2713, + "step": 48698 + }, + { + "epoch": 0.8414950235001383, + "grad_norm": 1.2857965566138934, + "learning_rate": 1.2890389166459594e-06, + "loss": 0.3088, + "step": 48699 + }, + { + "epoch": 0.8415123030135472, + "grad_norm": 1.1210534304104898, + "learning_rate": 1.2887640793595747e-06, + "loss": 0.5949, + "step": 48700 + }, + { + "epoch": 0.8415295825269561, + "grad_norm": 0.8314911708627291, + "learning_rate": 1.2884892693574402e-06, + "loss": 0.2296, + "step": 48701 + }, + { + "epoch": 0.841546862040365, + "grad_norm": 1.688676463837152, + "learning_rate": 1.28821448664042e-06, + "loss": 0.311, + "step": 48702 + }, + { + "epoch": 0.8415641415537738, + "grad_norm": 0.9360848948392212, + "learning_rate": 1.2879397312093734e-06, + "loss": 0.3508, + "step": 48703 + }, + { + "epoch": 0.8415814210671827, + "grad_norm": 1.2326523954791953, + "learning_rate": 1.287665003065165e-06, + "loss": 0.3233, + "step": 48704 + }, + { + "epoch": 0.8415987005805916, + "grad_norm": 0.9401071629349476, + "learning_rate": 1.2873903022086487e-06, + "loss": 0.3575, + "step": 48705 + }, + { + "epoch": 0.8416159800940005, + "grad_norm": 0.8609076472436689, + "learning_rate": 1.2871156286406905e-06, + "loss": 0.3874, + "step": 48706 + }, + { + "epoch": 0.8416332596074094, + "grad_norm": 1.197957927303435, + "learning_rate": 1.2868409823621463e-06, + "loss": 0.3843, + "step": 48707 + }, + { + "epoch": 0.8416505391208183, + "grad_norm": 1.6762523428613094, + "learning_rate": 1.286566363373878e-06, + "loss": 0.2998, + "step": 48708 + }, + { + "epoch": 0.8416678186342272, + "grad_norm": 1.3952706457337432, + "learning_rate": 1.2862917716767486e-06, + "loss": 0.3349, + "step": 48709 + }, + { + "epoch": 0.8416850981476361, + "grad_norm": 1.1657325475814229, + "learning_rate": 1.2860172072716137e-06, + "loss": 0.305, + "step": 48710 + }, + { + "epoch": 0.841702377661045, + "grad_norm": 0.9150692678705868, + "learning_rate": 1.2857426701593367e-06, + "loss": 0.6013, + "step": 48711 + }, + { + "epoch": 0.841719657174454, + "grad_norm": 1.2987867968058997, + "learning_rate": 1.2854681603407738e-06, + "loss": 0.2754, + "step": 48712 + }, + { + "epoch": 0.8417369366878629, + "grad_norm": 1.4029639788123975, + "learning_rate": 1.2851936778167873e-06, + "loss": 0.4084, + "step": 48713 + }, + { + "epoch": 0.8417542162012718, + "grad_norm": 1.3595539268547183, + "learning_rate": 1.2849192225882367e-06, + "loss": 0.5146, + "step": 48714 + }, + { + "epoch": 0.8417714957146807, + "grad_norm": 1.5246097742585727, + "learning_rate": 1.2846447946559826e-06, + "loss": 0.5595, + "step": 48715 + }, + { + "epoch": 0.8417887752280896, + "grad_norm": 1.120102117254907, + "learning_rate": 1.2843703940208818e-06, + "loss": 0.364, + "step": 48716 + }, + { + "epoch": 0.8418060547414985, + "grad_norm": 0.9824430821858805, + "learning_rate": 1.284096020683797e-06, + "loss": 0.2259, + "step": 48717 + }, + { + "epoch": 0.8418233342549074, + "grad_norm": 1.2270660120644874, + "learning_rate": 1.2838216746455845e-06, + "loss": 0.3456, + "step": 48718 + }, + { + "epoch": 0.8418406137683163, + "grad_norm": 1.61052092212933, + "learning_rate": 1.2835473559071043e-06, + "loss": 0.3446, + "step": 48719 + }, + { + "epoch": 0.8418578932817252, + "grad_norm": 1.2222002299100028, + "learning_rate": 1.2832730644692194e-06, + "loss": 0.7778, + "step": 48720 + }, + { + "epoch": 0.8418751727951341, + "grad_norm": 1.0570396507469664, + "learning_rate": 1.282998800332782e-06, + "loss": 0.3602, + "step": 48721 + }, + { + "epoch": 0.841892452308543, + "grad_norm": 1.220014495083532, + "learning_rate": 1.2827245634986573e-06, + "loss": 0.2962, + "step": 48722 + }, + { + "epoch": 0.841909731821952, + "grad_norm": 1.1578320533503264, + "learning_rate": 1.2824503539677002e-06, + "loss": 0.3676, + "step": 48723 + }, + { + "epoch": 0.8419270113353607, + "grad_norm": 1.071650505628208, + "learning_rate": 1.2821761717407733e-06, + "loss": 0.2591, + "step": 48724 + }, + { + "epoch": 0.8419442908487696, + "grad_norm": 1.4064116395433932, + "learning_rate": 1.281902016818729e-06, + "loss": 0.4147, + "step": 48725 + }, + { + "epoch": 0.8419615703621786, + "grad_norm": 1.6496425979225147, + "learning_rate": 1.2816278892024337e-06, + "loss": 0.2502, + "step": 48726 + }, + { + "epoch": 0.8419788498755875, + "grad_norm": 1.056430018798764, + "learning_rate": 1.2813537888927407e-06, + "loss": 0.341, + "step": 48727 + }, + { + "epoch": 0.8419961293889964, + "grad_norm": 1.1881870880340923, + "learning_rate": 1.281079715890512e-06, + "loss": 0.2553, + "step": 48728 + }, + { + "epoch": 0.8420134089024053, + "grad_norm": 2.0609552650072613, + "learning_rate": 1.2808056701966043e-06, + "loss": 0.3378, + "step": 48729 + }, + { + "epoch": 0.8420306884158142, + "grad_norm": 1.71528405470538, + "learning_rate": 1.280531651811875e-06, + "loss": 0.2895, + "step": 48730 + }, + { + "epoch": 0.8420479679292231, + "grad_norm": 1.379524299018078, + "learning_rate": 1.2802576607371819e-06, + "loss": 0.3713, + "step": 48731 + }, + { + "epoch": 0.842065247442632, + "grad_norm": 0.9428157172996696, + "learning_rate": 1.2799836969733848e-06, + "loss": 0.315, + "step": 48732 + }, + { + "epoch": 0.8420825269560409, + "grad_norm": 0.6346272914000842, + "learning_rate": 1.2797097605213437e-06, + "loss": 0.6871, + "step": 48733 + }, + { + "epoch": 0.8420998064694498, + "grad_norm": 1.3641251262753438, + "learning_rate": 1.2794358513819116e-06, + "loss": 0.2621, + "step": 48734 + }, + { + "epoch": 0.8421170859828587, + "grad_norm": 0.8196202413800124, + "learning_rate": 1.2791619695559522e-06, + "loss": 0.2214, + "step": 48735 + }, + { + "epoch": 0.8421343654962676, + "grad_norm": 1.1538122724011082, + "learning_rate": 1.2788881150443177e-06, + "loss": 0.2981, + "step": 48736 + }, + { + "epoch": 0.8421516450096765, + "grad_norm": 1.6459567538743596, + "learning_rate": 1.278614287847868e-06, + "loss": 0.3105, + "step": 48737 + }, + { + "epoch": 0.8421689245230854, + "grad_norm": 1.069982979530161, + "learning_rate": 1.2783404879674632e-06, + "loss": 0.3384, + "step": 48738 + }, + { + "epoch": 0.8421862040364944, + "grad_norm": 0.8312156926588746, + "learning_rate": 1.2780667154039572e-06, + "loss": 0.2241, + "step": 48739 + }, + { + "epoch": 0.8422034835499033, + "grad_norm": 1.5422174729448475, + "learning_rate": 1.2777929701582115e-06, + "loss": 0.3577, + "step": 48740 + }, + { + "epoch": 0.8422207630633122, + "grad_norm": 0.8869419093124996, + "learning_rate": 1.2775192522310786e-06, + "loss": 0.2689, + "step": 48741 + }, + { + "epoch": 0.8422380425767211, + "grad_norm": 0.58943142875081, + "learning_rate": 1.277245561623418e-06, + "loss": 0.824, + "step": 48742 + }, + { + "epoch": 0.84225532209013, + "grad_norm": 1.155412071834787, + "learning_rate": 1.276971898336088e-06, + "loss": 0.3867, + "step": 48743 + }, + { + "epoch": 0.8422726016035389, + "grad_norm": 1.1177201391721119, + "learning_rate": 1.2766982623699463e-06, + "loss": 0.4308, + "step": 48744 + }, + { + "epoch": 0.8422898811169477, + "grad_norm": 1.730217631669768, + "learning_rate": 1.2764246537258472e-06, + "loss": 0.4898, + "step": 48745 + }, + { + "epoch": 0.8423071606303566, + "grad_norm": 2.2495058885953094, + "learning_rate": 1.276151072404651e-06, + "loss": 0.2455, + "step": 48746 + }, + { + "epoch": 0.8423244401437655, + "grad_norm": 1.5403285825119226, + "learning_rate": 1.2758775184072104e-06, + "loss": 0.4781, + "step": 48747 + }, + { + "epoch": 0.8423417196571744, + "grad_norm": 1.537115581934708, + "learning_rate": 1.2756039917343842e-06, + "loss": 0.513, + "step": 48748 + }, + { + "epoch": 0.8423589991705833, + "grad_norm": 1.0934294827869622, + "learning_rate": 1.2753304923870313e-06, + "loss": 0.3101, + "step": 48749 + }, + { + "epoch": 0.8423762786839922, + "grad_norm": 1.5232342823550697, + "learning_rate": 1.275057020366004e-06, + "loss": 0.3915, + "step": 48750 + }, + { + "epoch": 0.8423935581974011, + "grad_norm": 1.5633296199106532, + "learning_rate": 1.2747835756721637e-06, + "loss": 0.3532, + "step": 48751 + }, + { + "epoch": 0.84241083771081, + "grad_norm": 1.2026931680685147, + "learning_rate": 1.2745101583063623e-06, + "loss": 0.5311, + "step": 48752 + }, + { + "epoch": 0.842428117224219, + "grad_norm": 0.5801505717419996, + "learning_rate": 1.274236768269459e-06, + "loss": 0.608, + "step": 48753 + }, + { + "epoch": 0.8424453967376279, + "grad_norm": 1.303828835775913, + "learning_rate": 1.2739634055623085e-06, + "loss": 0.3459, + "step": 48754 + }, + { + "epoch": 0.8424626762510368, + "grad_norm": 1.8737575955772305, + "learning_rate": 1.273690070185769e-06, + "loss": 0.2015, + "step": 48755 + }, + { + "epoch": 0.8424799557644457, + "grad_norm": 0.7759401693882487, + "learning_rate": 1.2734167621406945e-06, + "loss": 0.1727, + "step": 48756 + }, + { + "epoch": 0.8424972352778546, + "grad_norm": 1.0272912209792817, + "learning_rate": 1.273143481427943e-06, + "loss": 0.3924, + "step": 48757 + }, + { + "epoch": 0.8425145147912635, + "grad_norm": 1.1199719261947192, + "learning_rate": 1.2728702280483685e-06, + "loss": 0.1824, + "step": 48758 + }, + { + "epoch": 0.8425317943046724, + "grad_norm": 1.229803406388162, + "learning_rate": 1.2725970020028266e-06, + "loss": 0.3694, + "step": 48759 + }, + { + "epoch": 0.8425490738180813, + "grad_norm": 1.2314588448751558, + "learning_rate": 1.272323803292177e-06, + "loss": 0.4481, + "step": 48760 + }, + { + "epoch": 0.8425663533314902, + "grad_norm": 0.9843065891926439, + "learning_rate": 1.2720506319172698e-06, + "loss": 0.3522, + "step": 48761 + }, + { + "epoch": 0.8425836328448991, + "grad_norm": 1.0273043095909868, + "learning_rate": 1.2717774878789657e-06, + "loss": 0.3656, + "step": 48762 + }, + { + "epoch": 0.842600912358308, + "grad_norm": 1.3387726940054774, + "learning_rate": 1.271504371178116e-06, + "loss": 0.4416, + "step": 48763 + }, + { + "epoch": 0.8426181918717169, + "grad_norm": 1.4272965610211963, + "learning_rate": 1.2712312818155793e-06, + "loss": 0.3332, + "step": 48764 + }, + { + "epoch": 0.8426354713851258, + "grad_norm": 1.3892883570906711, + "learning_rate": 1.2709582197922066e-06, + "loss": 0.3282, + "step": 48765 + }, + { + "epoch": 0.8426527508985348, + "grad_norm": 0.9008012591354062, + "learning_rate": 1.2706851851088597e-06, + "loss": 0.7125, + "step": 48766 + }, + { + "epoch": 0.8426700304119435, + "grad_norm": 1.4210290106202585, + "learning_rate": 1.270412177766388e-06, + "loss": 0.3411, + "step": 48767 + }, + { + "epoch": 0.8426873099253525, + "grad_norm": 1.4657862880572905, + "learning_rate": 1.2701391977656508e-06, + "loss": 0.3945, + "step": 48768 + }, + { + "epoch": 0.8427045894387614, + "grad_norm": 2.347886789306998, + "learning_rate": 1.2698662451075005e-06, + "loss": 0.166, + "step": 48769 + }, + { + "epoch": 0.8427218689521703, + "grad_norm": 1.326222642606711, + "learning_rate": 1.2695933197927912e-06, + "loss": 0.3852, + "step": 48770 + }, + { + "epoch": 0.8427391484655792, + "grad_norm": 0.8410298070292963, + "learning_rate": 1.2693204218223798e-06, + "loss": 0.393, + "step": 48771 + }, + { + "epoch": 0.8427564279789881, + "grad_norm": 1.6089044191464499, + "learning_rate": 1.269047551197119e-06, + "loss": 0.5457, + "step": 48772 + }, + { + "epoch": 0.842773707492397, + "grad_norm": 1.5957741687760423, + "learning_rate": 1.268774707917868e-06, + "loss": 0.435, + "step": 48773 + }, + { + "epoch": 0.8427909870058059, + "grad_norm": 1.9775488899275007, + "learning_rate": 1.2685018919854752e-06, + "loss": 0.1954, + "step": 48774 + }, + { + "epoch": 0.8428082665192148, + "grad_norm": 1.208383515591081, + "learning_rate": 1.268229103400801e-06, + "loss": 0.4322, + "step": 48775 + }, + { + "epoch": 0.8428255460326237, + "grad_norm": 0.9343390934204306, + "learning_rate": 1.2679563421646935e-06, + "loss": 0.4092, + "step": 48776 + }, + { + "epoch": 0.8428428255460326, + "grad_norm": 1.3709039447461142, + "learning_rate": 1.2676836082780119e-06, + "loss": 0.3724, + "step": 48777 + }, + { + "epoch": 0.8428601050594415, + "grad_norm": 1.1502015940238508, + "learning_rate": 1.267410901741608e-06, + "loss": 0.2453, + "step": 48778 + }, + { + "epoch": 0.8428773845728504, + "grad_norm": 0.8116329814506761, + "learning_rate": 1.267138222556339e-06, + "loss": 0.6625, + "step": 48779 + }, + { + "epoch": 0.8428946640862593, + "grad_norm": 0.9504235155469909, + "learning_rate": 1.2668655707230558e-06, + "loss": 0.6247, + "step": 48780 + }, + { + "epoch": 0.8429119435996683, + "grad_norm": 1.515753678302973, + "learning_rate": 1.266592946242613e-06, + "loss": 0.1884, + "step": 48781 + }, + { + "epoch": 0.8429292231130772, + "grad_norm": 0.9622611753937079, + "learning_rate": 1.2663203491158638e-06, + "loss": 0.4853, + "step": 48782 + }, + { + "epoch": 0.8429465026264861, + "grad_norm": 1.059155836568674, + "learning_rate": 1.266047779343663e-06, + "loss": 0.2419, + "step": 48783 + }, + { + "epoch": 0.842963782139895, + "grad_norm": 1.8261607036481289, + "learning_rate": 1.2657752369268673e-06, + "loss": 0.3385, + "step": 48784 + }, + { + "epoch": 0.8429810616533039, + "grad_norm": 0.9802683060538828, + "learning_rate": 1.2655027218663241e-06, + "loss": 0.3387, + "step": 48785 + }, + { + "epoch": 0.8429983411667128, + "grad_norm": 1.1864743188166322, + "learning_rate": 1.265230234162893e-06, + "loss": 0.2693, + "step": 48786 + }, + { + "epoch": 0.8430156206801217, + "grad_norm": 1.38386681338426, + "learning_rate": 1.2649577738174224e-06, + "loss": 0.2511, + "step": 48787 + }, + { + "epoch": 0.8430329001935305, + "grad_norm": 2.0954302388545454, + "learning_rate": 1.2646853408307669e-06, + "loss": 0.3215, + "step": 48788 + }, + { + "epoch": 0.8430501797069394, + "grad_norm": 1.4308597421273592, + "learning_rate": 1.2644129352037814e-06, + "loss": 0.2795, + "step": 48789 + }, + { + "epoch": 0.8430674592203483, + "grad_norm": 2.365102110877148, + "learning_rate": 1.264140556937321e-06, + "loss": 0.2327, + "step": 48790 + }, + { + "epoch": 0.8430847387337572, + "grad_norm": 1.1784218411429608, + "learning_rate": 1.2638682060322361e-06, + "loss": 0.4422, + "step": 48791 + }, + { + "epoch": 0.8431020182471661, + "grad_norm": 1.3073478397746015, + "learning_rate": 1.263595882489378e-06, + "loss": 0.3068, + "step": 48792 + }, + { + "epoch": 0.843119297760575, + "grad_norm": 1.1945725897172919, + "learning_rate": 1.2633235863096016e-06, + "loss": 0.2596, + "step": 48793 + }, + { + "epoch": 0.843136577273984, + "grad_norm": 1.9547810368950407, + "learning_rate": 1.2630513174937587e-06, + "loss": 0.2683, + "step": 48794 + }, + { + "epoch": 0.8431538567873929, + "grad_norm": 1.5479722999400862, + "learning_rate": 1.262779076042706e-06, + "loss": 0.2042, + "step": 48795 + }, + { + "epoch": 0.8431711363008018, + "grad_norm": 2.0727479754927787, + "learning_rate": 1.262506861957291e-06, + "loss": 0.3659, + "step": 48796 + }, + { + "epoch": 0.8431884158142107, + "grad_norm": 1.4567465706802578, + "learning_rate": 1.2622346752383708e-06, + "loss": 0.406, + "step": 48797 + }, + { + "epoch": 0.8432056953276196, + "grad_norm": 1.180575063965156, + "learning_rate": 1.2619625158867931e-06, + "loss": 0.2253, + "step": 48798 + }, + { + "epoch": 0.8432229748410285, + "grad_norm": 0.9596274484738517, + "learning_rate": 1.2616903839034134e-06, + "loss": 0.2991, + "step": 48799 + }, + { + "epoch": 0.8432402543544374, + "grad_norm": 2.182992560604613, + "learning_rate": 1.261418279289085e-06, + "loss": 0.2378, + "step": 48800 + }, + { + "epoch": 0.8432575338678463, + "grad_norm": 1.6989566380471095, + "learning_rate": 1.2611462020446575e-06, + "loss": 0.6575, + "step": 48801 + }, + { + "epoch": 0.8432748133812552, + "grad_norm": 0.9357385731807705, + "learning_rate": 1.2608741521709856e-06, + "loss": 0.1627, + "step": 48802 + }, + { + "epoch": 0.8432920928946641, + "grad_norm": 0.5348195080739319, + "learning_rate": 1.2606021296689174e-06, + "loss": 0.5455, + "step": 48803 + }, + { + "epoch": 0.843309372408073, + "grad_norm": 1.1255563696294335, + "learning_rate": 1.2603301345393104e-06, + "loss": 0.6318, + "step": 48804 + }, + { + "epoch": 0.8433266519214819, + "grad_norm": 1.3376160810277564, + "learning_rate": 1.2600581667830092e-06, + "loss": 0.4714, + "step": 48805 + }, + { + "epoch": 0.8433439314348908, + "grad_norm": 0.7550739408892583, + "learning_rate": 1.2597862264008742e-06, + "loss": 0.6065, + "step": 48806 + }, + { + "epoch": 0.8433612109482997, + "grad_norm": 0.7189625221927526, + "learning_rate": 1.2595143133937504e-06, + "loss": 0.529, + "step": 48807 + }, + { + "epoch": 0.8433784904617087, + "grad_norm": 1.7407502902108152, + "learning_rate": 1.2592424277624948e-06, + "loss": 0.3967, + "step": 48808 + }, + { + "epoch": 0.8433957699751174, + "grad_norm": 1.224916745419295, + "learning_rate": 1.2589705695079546e-06, + "loss": 0.5682, + "step": 48809 + }, + { + "epoch": 0.8434130494885264, + "grad_norm": 1.0556137910175507, + "learning_rate": 1.2586987386309834e-06, + "loss": 0.3147, + "step": 48810 + }, + { + "epoch": 0.8434303290019353, + "grad_norm": 1.3480482337037534, + "learning_rate": 1.2584269351324318e-06, + "loss": 0.2435, + "step": 48811 + }, + { + "epoch": 0.8434476085153442, + "grad_norm": 1.2676651457248498, + "learning_rate": 1.2581551590131501e-06, + "loss": 0.4614, + "step": 48812 + }, + { + "epoch": 0.8434648880287531, + "grad_norm": 0.8921542560807234, + "learning_rate": 1.2578834102739934e-06, + "loss": 0.2141, + "step": 48813 + }, + { + "epoch": 0.843482167542162, + "grad_norm": 1.0046099970098998, + "learning_rate": 1.2576116889158074e-06, + "loss": 0.3459, + "step": 48814 + }, + { + "epoch": 0.8434994470555709, + "grad_norm": 0.9786935958951664, + "learning_rate": 1.2573399949394494e-06, + "loss": 0.2071, + "step": 48815 + }, + { + "epoch": 0.8435167265689798, + "grad_norm": 1.7341199005913486, + "learning_rate": 1.257068328345764e-06, + "loss": 0.4643, + "step": 48816 + }, + { + "epoch": 0.8435340060823887, + "grad_norm": 0.9328297770741423, + "learning_rate": 1.2567966891356055e-06, + "loss": 0.3632, + "step": 48817 + }, + { + "epoch": 0.8435512855957976, + "grad_norm": 1.7548095222496016, + "learning_rate": 1.2565250773098237e-06, + "loss": 0.3109, + "step": 48818 + }, + { + "epoch": 0.8435685651092065, + "grad_norm": 1.2139887250275732, + "learning_rate": 1.2562534928692716e-06, + "loss": 0.2968, + "step": 48819 + }, + { + "epoch": 0.8435858446226154, + "grad_norm": 1.794954509395488, + "learning_rate": 1.2559819358147984e-06, + "loss": 0.4585, + "step": 48820 + }, + { + "epoch": 0.8436031241360243, + "grad_norm": 1.1507161337689937, + "learning_rate": 1.2557104061472524e-06, + "loss": 0.3498, + "step": 48821 + }, + { + "epoch": 0.8436204036494332, + "grad_norm": 0.8822182812178352, + "learning_rate": 1.2554389038674863e-06, + "loss": 0.4505, + "step": 48822 + }, + { + "epoch": 0.8436376831628422, + "grad_norm": 1.153608055025149, + "learning_rate": 1.2551674289763493e-06, + "loss": 0.2791, + "step": 48823 + }, + { + "epoch": 0.8436549626762511, + "grad_norm": 0.8360287427016311, + "learning_rate": 1.2548959814746952e-06, + "loss": 0.2481, + "step": 48824 + }, + { + "epoch": 0.84367224218966, + "grad_norm": 2.1901187941364526, + "learning_rate": 1.254624561363369e-06, + "loss": 0.384, + "step": 48825 + }, + { + "epoch": 0.8436895217030689, + "grad_norm": 1.8732657278253373, + "learning_rate": 1.2543531686432264e-06, + "loss": 0.3442, + "step": 48826 + }, + { + "epoch": 0.8437068012164778, + "grad_norm": 1.46977577993154, + "learning_rate": 1.2540818033151114e-06, + "loss": 0.5507, + "step": 48827 + }, + { + "epoch": 0.8437240807298867, + "grad_norm": 1.3114174947840462, + "learning_rate": 1.2538104653798766e-06, + "loss": 0.2079, + "step": 48828 + }, + { + "epoch": 0.8437413602432956, + "grad_norm": 1.375788373203475, + "learning_rate": 1.2535391548383723e-06, + "loss": 0.2515, + "step": 48829 + }, + { + "epoch": 0.8437586397567044, + "grad_norm": 1.3774805364297862, + "learning_rate": 1.2532678716914503e-06, + "loss": 0.3727, + "step": 48830 + }, + { + "epoch": 0.8437759192701133, + "grad_norm": 1.1077902241098707, + "learning_rate": 1.2529966159399576e-06, + "loss": 0.1425, + "step": 48831 + }, + { + "epoch": 0.8437931987835222, + "grad_norm": 0.9409978386909115, + "learning_rate": 1.2527253875847434e-06, + "loss": 0.7087, + "step": 48832 + }, + { + "epoch": 0.8438104782969311, + "grad_norm": 0.843799650127281, + "learning_rate": 1.2524541866266571e-06, + "loss": 0.1585, + "step": 48833 + }, + { + "epoch": 0.84382775781034, + "grad_norm": 2.0029029798273954, + "learning_rate": 1.252183013066549e-06, + "loss": 0.3287, + "step": 48834 + }, + { + "epoch": 0.8438450373237489, + "grad_norm": 1.2183527858552494, + "learning_rate": 1.251911866905271e-06, + "loss": 0.7992, + "step": 48835 + }, + { + "epoch": 0.8438623168371578, + "grad_norm": 1.224204803994337, + "learning_rate": 1.2516407481436676e-06, + "loss": 0.729, + "step": 48836 + }, + { + "epoch": 0.8438795963505668, + "grad_norm": 1.7143514055890468, + "learning_rate": 1.2513696567825918e-06, + "loss": 0.3087, + "step": 48837 + }, + { + "epoch": 0.8438968758639757, + "grad_norm": 0.7596318520592771, + "learning_rate": 1.2510985928228903e-06, + "loss": 0.26, + "step": 48838 + }, + { + "epoch": 0.8439141553773846, + "grad_norm": 0.8050759977930354, + "learning_rate": 1.2508275562654116e-06, + "loss": 0.3516, + "step": 48839 + }, + { + "epoch": 0.8439314348907935, + "grad_norm": 1.7637906295131756, + "learning_rate": 1.250556547111006e-06, + "loss": 0.2161, + "step": 48840 + }, + { + "epoch": 0.8439487144042024, + "grad_norm": 0.8878741387448047, + "learning_rate": 1.2502855653605251e-06, + "loss": 0.1768, + "step": 48841 + }, + { + "epoch": 0.8439659939176113, + "grad_norm": 1.1747188881904924, + "learning_rate": 1.2500146110148149e-06, + "loss": 0.3262, + "step": 48842 + }, + { + "epoch": 0.8439832734310202, + "grad_norm": 1.8799887497707701, + "learning_rate": 1.2497436840747212e-06, + "loss": 0.3594, + "step": 48843 + }, + { + "epoch": 0.8440005529444291, + "grad_norm": 1.2632486051997067, + "learning_rate": 1.249472784541097e-06, + "loss": 0.2484, + "step": 48844 + }, + { + "epoch": 0.844017832457838, + "grad_norm": 1.3086883150927493, + "learning_rate": 1.2492019124147859e-06, + "loss": 0.3826, + "step": 48845 + }, + { + "epoch": 0.8440351119712469, + "grad_norm": 0.5864923703561564, + "learning_rate": 1.2489310676966426e-06, + "loss": 0.8998, + "step": 48846 + }, + { + "epoch": 0.8440523914846558, + "grad_norm": 1.3873092946977466, + "learning_rate": 1.2486602503875112e-06, + "loss": 0.3485, + "step": 48847 + }, + { + "epoch": 0.8440696709980647, + "grad_norm": 1.5128235653301843, + "learning_rate": 1.248389460488242e-06, + "loss": 0.4765, + "step": 48848 + }, + { + "epoch": 0.8440869505114736, + "grad_norm": 1.581440544190518, + "learning_rate": 1.2481186979996807e-06, + "loss": 0.2641, + "step": 48849 + }, + { + "epoch": 0.8441042300248826, + "grad_norm": 1.6020992279665627, + "learning_rate": 1.2478479629226792e-06, + "loss": 0.3331, + "step": 48850 + }, + { + "epoch": 0.8441215095382913, + "grad_norm": 1.789097834090354, + "learning_rate": 1.24757725525808e-06, + "loss": 0.2906, + "step": 48851 + }, + { + "epoch": 0.8441387890517003, + "grad_norm": 2.057814966403763, + "learning_rate": 1.247306575006735e-06, + "loss": 0.3468, + "step": 48852 + }, + { + "epoch": 0.8441560685651092, + "grad_norm": 1.434021487273331, + "learning_rate": 1.2470359221694917e-06, + "loss": 0.3277, + "step": 48853 + }, + { + "epoch": 0.8441733480785181, + "grad_norm": 1.3046684510235371, + "learning_rate": 1.2467652967471965e-06, + "loss": 0.3791, + "step": 48854 + }, + { + "epoch": 0.844190627591927, + "grad_norm": 1.4755537896874622, + "learning_rate": 1.2464946987406988e-06, + "loss": 0.3539, + "step": 48855 + }, + { + "epoch": 0.8442079071053359, + "grad_norm": 1.1423424183141628, + "learning_rate": 1.2462241281508425e-06, + "loss": 0.4105, + "step": 48856 + }, + { + "epoch": 0.8442251866187448, + "grad_norm": 1.097151550434494, + "learning_rate": 1.2459535849784787e-06, + "loss": 0.2253, + "step": 48857 + }, + { + "epoch": 0.8442424661321537, + "grad_norm": 1.3025032457733514, + "learning_rate": 1.2456830692244526e-06, + "loss": 0.4011, + "step": 48858 + }, + { + "epoch": 0.8442597456455626, + "grad_norm": 1.433801128402151, + "learning_rate": 1.2454125808896144e-06, + "loss": 0.3207, + "step": 48859 + }, + { + "epoch": 0.8442770251589715, + "grad_norm": 1.3047986657472963, + "learning_rate": 1.2451421199748082e-06, + "loss": 0.3748, + "step": 48860 + }, + { + "epoch": 0.8442943046723804, + "grad_norm": 1.0828451181806262, + "learning_rate": 1.244871686480884e-06, + "loss": 0.457, + "step": 48861 + }, + { + "epoch": 0.8443115841857893, + "grad_norm": 1.4229697181938388, + "learning_rate": 1.2446012804086848e-06, + "loss": 0.2688, + "step": 48862 + }, + { + "epoch": 0.8443288636991982, + "grad_norm": 1.4060493197092998, + "learning_rate": 1.24433090175906e-06, + "loss": 0.2942, + "step": 48863 + }, + { + "epoch": 0.8443461432126071, + "grad_norm": 1.2874581859962875, + "learning_rate": 1.2440605505328573e-06, + "loss": 0.1956, + "step": 48864 + }, + { + "epoch": 0.844363422726016, + "grad_norm": 1.4834392531743898, + "learning_rate": 1.243790226730921e-06, + "loss": 0.465, + "step": 48865 + }, + { + "epoch": 0.844380702239425, + "grad_norm": 1.322993222061838, + "learning_rate": 1.2435199303541012e-06, + "loss": 0.359, + "step": 48866 + }, + { + "epoch": 0.8443979817528339, + "grad_norm": 0.7370191102435824, + "learning_rate": 1.2432496614032407e-06, + "loss": 0.221, + "step": 48867 + }, + { + "epoch": 0.8444152612662428, + "grad_norm": 1.0690858203682025, + "learning_rate": 1.2429794198791878e-06, + "loss": 0.284, + "step": 48868 + }, + { + "epoch": 0.8444325407796517, + "grad_norm": 1.0215356022656994, + "learning_rate": 1.2427092057827883e-06, + "loss": 0.4258, + "step": 48869 + }, + { + "epoch": 0.8444498202930606, + "grad_norm": 0.6067201380789183, + "learning_rate": 1.2424390191148917e-06, + "loss": 0.648, + "step": 48870 + }, + { + "epoch": 0.8444670998064695, + "grad_norm": 1.38297026729538, + "learning_rate": 1.2421688598763394e-06, + "loss": 0.4315, + "step": 48871 + }, + { + "epoch": 0.8444843793198783, + "grad_norm": 1.4712770506731871, + "learning_rate": 1.2418987280679818e-06, + "loss": 0.3143, + "step": 48872 + }, + { + "epoch": 0.8445016588332872, + "grad_norm": 2.864482190424133, + "learning_rate": 1.2416286236906606e-06, + "loss": 0.3832, + "step": 48873 + }, + { + "epoch": 0.8445189383466961, + "grad_norm": 1.1528001957859662, + "learning_rate": 1.2413585467452239e-06, + "loss": 0.2324, + "step": 48874 + }, + { + "epoch": 0.844536217860105, + "grad_norm": 1.0124549578837956, + "learning_rate": 1.241088497232521e-06, + "loss": 0.403, + "step": 48875 + }, + { + "epoch": 0.8445534973735139, + "grad_norm": 1.5199302854078174, + "learning_rate": 1.2408184751533914e-06, + "loss": 0.3737, + "step": 48876 + }, + { + "epoch": 0.8445707768869228, + "grad_norm": 1.3288112614895717, + "learning_rate": 1.2405484805086864e-06, + "loss": 0.4258, + "step": 48877 + }, + { + "epoch": 0.8445880564003317, + "grad_norm": 1.5824947783395114, + "learning_rate": 1.240278513299248e-06, + "loss": 0.2482, + "step": 48878 + }, + { + "epoch": 0.8446053359137407, + "grad_norm": 1.031984258985047, + "learning_rate": 1.2400085735259226e-06, + "loss": 0.2934, + "step": 48879 + }, + { + "epoch": 0.8446226154271496, + "grad_norm": 1.3047312118910679, + "learning_rate": 1.2397386611895557e-06, + "loss": 0.4607, + "step": 48880 + }, + { + "epoch": 0.8446398949405585, + "grad_norm": 0.9542038596865708, + "learning_rate": 1.2394687762909952e-06, + "loss": 0.318, + "step": 48881 + }, + { + "epoch": 0.8446571744539674, + "grad_norm": 0.950327549966219, + "learning_rate": 1.2391989188310848e-06, + "loss": 0.3231, + "step": 48882 + }, + { + "epoch": 0.8446744539673763, + "grad_norm": 0.8787201963870012, + "learning_rate": 1.2389290888106664e-06, + "loss": 0.3088, + "step": 48883 + }, + { + "epoch": 0.8446917334807852, + "grad_norm": 1.6932652217287523, + "learning_rate": 1.2386592862305901e-06, + "loss": 0.4379, + "step": 48884 + }, + { + "epoch": 0.8447090129941941, + "grad_norm": 1.6054652429589789, + "learning_rate": 1.2383895110916954e-06, + "loss": 0.2825, + "step": 48885 + }, + { + "epoch": 0.844726292507603, + "grad_norm": 1.1637392823397457, + "learning_rate": 1.2381197633948339e-06, + "loss": 0.504, + "step": 48886 + }, + { + "epoch": 0.8447435720210119, + "grad_norm": 1.3648387139820053, + "learning_rate": 1.2378500431408457e-06, + "loss": 0.4479, + "step": 48887 + }, + { + "epoch": 0.8447608515344208, + "grad_norm": 2.2258773349686893, + "learning_rate": 1.2375803503305784e-06, + "loss": 0.2211, + "step": 48888 + }, + { + "epoch": 0.8447781310478297, + "grad_norm": 0.8810710443588211, + "learning_rate": 1.237310684964874e-06, + "loss": 0.3387, + "step": 48889 + }, + { + "epoch": 0.8447954105612386, + "grad_norm": 2.66924161528774, + "learning_rate": 1.2370410470445803e-06, + "loss": 0.3471, + "step": 48890 + }, + { + "epoch": 0.8448126900746475, + "grad_norm": 1.3228717740341687, + "learning_rate": 1.2367714365705352e-06, + "loss": 0.2663, + "step": 48891 + }, + { + "epoch": 0.8448299695880565, + "grad_norm": 1.427437064872673, + "learning_rate": 1.2365018535435935e-06, + "loss": 0.3945, + "step": 48892 + }, + { + "epoch": 0.8448472491014652, + "grad_norm": 1.2204514214382924, + "learning_rate": 1.2362322979645936e-06, + "loss": 0.3754, + "step": 48893 + }, + { + "epoch": 0.8448645286148742, + "grad_norm": 1.7471159225202295, + "learning_rate": 1.2359627698343768e-06, + "loss": 0.2614, + "step": 48894 + }, + { + "epoch": 0.8448818081282831, + "grad_norm": 1.6354265213578165, + "learning_rate": 1.2356932691537938e-06, + "loss": 0.3064, + "step": 48895 + }, + { + "epoch": 0.844899087641692, + "grad_norm": 1.4391022931708075, + "learning_rate": 1.2354237959236836e-06, + "loss": 0.266, + "step": 48896 + }, + { + "epoch": 0.8449163671551009, + "grad_norm": 2.061797863320009, + "learning_rate": 1.2351543501448914e-06, + "loss": 0.2356, + "step": 48897 + }, + { + "epoch": 0.8449336466685098, + "grad_norm": 1.525357354897057, + "learning_rate": 1.234884931818262e-06, + "loss": 0.4146, + "step": 48898 + }, + { + "epoch": 0.8449509261819187, + "grad_norm": 0.9324852088673568, + "learning_rate": 1.2346155409446425e-06, + "loss": 0.3186, + "step": 48899 + }, + { + "epoch": 0.8449682056953276, + "grad_norm": 1.5811116616205556, + "learning_rate": 1.23434617752487e-06, + "loss": 0.4382, + "step": 48900 + }, + { + "epoch": 0.8449854852087365, + "grad_norm": 1.9130958672550211, + "learning_rate": 1.2340768415597936e-06, + "loss": 0.432, + "step": 48901 + }, + { + "epoch": 0.8450027647221454, + "grad_norm": 1.3887759553856087, + "learning_rate": 1.233807533050253e-06, + "loss": 0.4576, + "step": 48902 + }, + { + "epoch": 0.8450200442355543, + "grad_norm": 1.9193796434013912, + "learning_rate": 1.233538251997093e-06, + "loss": 0.3815, + "step": 48903 + }, + { + "epoch": 0.8450373237489632, + "grad_norm": 0.9859142867532085, + "learning_rate": 1.2332689984011591e-06, + "loss": 0.2745, + "step": 48904 + }, + { + "epoch": 0.8450546032623721, + "grad_norm": 1.842686310373818, + "learning_rate": 1.2329997722632914e-06, + "loss": 0.3262, + "step": 48905 + }, + { + "epoch": 0.845071882775781, + "grad_norm": 1.0947415089163481, + "learning_rate": 1.2327305735843377e-06, + "loss": 0.4, + "step": 48906 + }, + { + "epoch": 0.84508916228919, + "grad_norm": 1.1965938723290093, + "learning_rate": 1.232461402365135e-06, + "loss": 0.4068, + "step": 48907 + }, + { + "epoch": 0.8451064418025989, + "grad_norm": 3.079383000638376, + "learning_rate": 1.2321922586065293e-06, + "loss": 0.2483, + "step": 48908 + }, + { + "epoch": 0.8451237213160078, + "grad_norm": 1.3092392301534608, + "learning_rate": 1.2319231423093647e-06, + "loss": 0.2944, + "step": 48909 + }, + { + "epoch": 0.8451410008294167, + "grad_norm": 1.485299412854524, + "learning_rate": 1.2316540534744846e-06, + "loss": 0.3677, + "step": 48910 + }, + { + "epoch": 0.8451582803428256, + "grad_norm": 0.8592584683935258, + "learning_rate": 1.2313849921027277e-06, + "loss": 0.172, + "step": 48911 + }, + { + "epoch": 0.8451755598562345, + "grad_norm": 0.9879969520112205, + "learning_rate": 1.2311159581949428e-06, + "loss": 0.3076, + "step": 48912 + }, + { + "epoch": 0.8451928393696434, + "grad_norm": 1.4651191061897917, + "learning_rate": 1.230846951751966e-06, + "loss": 0.4604, + "step": 48913 + }, + { + "epoch": 0.8452101188830523, + "grad_norm": 1.2813648574503982, + "learning_rate": 1.2305779727746437e-06, + "loss": 0.2928, + "step": 48914 + }, + { + "epoch": 0.8452273983964611, + "grad_norm": 1.1972013424682777, + "learning_rate": 1.2303090212638191e-06, + "loss": 0.2828, + "step": 48915 + }, + { + "epoch": 0.84524467790987, + "grad_norm": 1.3043141620773735, + "learning_rate": 1.2300400972203319e-06, + "loss": 0.3051, + "step": 48916 + }, + { + "epoch": 0.8452619574232789, + "grad_norm": 1.8129136994527033, + "learning_rate": 1.2297712006450268e-06, + "loss": 0.2425, + "step": 48917 + }, + { + "epoch": 0.8452792369366878, + "grad_norm": 1.786597749263851, + "learning_rate": 1.2295023315387434e-06, + "loss": 0.3414, + "step": 48918 + }, + { + "epoch": 0.8452965164500967, + "grad_norm": 0.7858836453141975, + "learning_rate": 1.229233489902324e-06, + "loss": 0.1916, + "step": 48919 + }, + { + "epoch": 0.8453137959635056, + "grad_norm": 1.78667259092506, + "learning_rate": 1.2289646757366136e-06, + "loss": 0.3167, + "step": 48920 + }, + { + "epoch": 0.8453310754769146, + "grad_norm": 1.7670821815213134, + "learning_rate": 1.2286958890424527e-06, + "loss": 0.4089, + "step": 48921 + }, + { + "epoch": 0.8453483549903235, + "grad_norm": 1.18547113864782, + "learning_rate": 1.2284271298206818e-06, + "loss": 0.3014, + "step": 48922 + }, + { + "epoch": 0.8453656345037324, + "grad_norm": 1.4724353660921168, + "learning_rate": 1.2281583980721467e-06, + "loss": 0.4157, + "step": 48923 + }, + { + "epoch": 0.8453829140171413, + "grad_norm": 0.9749941422664514, + "learning_rate": 1.2278896937976847e-06, + "loss": 0.25, + "step": 48924 + }, + { + "epoch": 0.8454001935305502, + "grad_norm": 1.4241236492144158, + "learning_rate": 1.227621016998135e-06, + "loss": 0.248, + "step": 48925 + }, + { + "epoch": 0.8454174730439591, + "grad_norm": 2.0249598302537577, + "learning_rate": 1.227352367674347e-06, + "loss": 0.3685, + "step": 48926 + }, + { + "epoch": 0.845434752557368, + "grad_norm": 1.0605517785065843, + "learning_rate": 1.2270837458271568e-06, + "loss": 0.4156, + "step": 48927 + }, + { + "epoch": 0.8454520320707769, + "grad_norm": 1.700361681026568, + "learning_rate": 1.2268151514574089e-06, + "loss": 0.4662, + "step": 48928 + }, + { + "epoch": 0.8454693115841858, + "grad_norm": 1.2304296003652577, + "learning_rate": 1.2265465845659396e-06, + "loss": 0.1065, + "step": 48929 + }, + { + "epoch": 0.8454865910975947, + "grad_norm": 2.2524129811394884, + "learning_rate": 1.226278045153596e-06, + "loss": 0.2862, + "step": 48930 + }, + { + "epoch": 0.8455038706110036, + "grad_norm": 1.0476236673570507, + "learning_rate": 1.226009533221212e-06, + "loss": 0.27, + "step": 48931 + }, + { + "epoch": 0.8455211501244125, + "grad_norm": 2.816121384475204, + "learning_rate": 1.2257410487696374e-06, + "loss": 0.2764, + "step": 48932 + }, + { + "epoch": 0.8455384296378214, + "grad_norm": 1.2238518252239323, + "learning_rate": 1.2254725917997068e-06, + "loss": 0.1969, + "step": 48933 + }, + { + "epoch": 0.8455557091512304, + "grad_norm": 0.6448070533344017, + "learning_rate": 1.2252041623122646e-06, + "loss": 0.1493, + "step": 48934 + }, + { + "epoch": 0.8455729886646393, + "grad_norm": 1.1125627271773542, + "learning_rate": 1.2249357603081491e-06, + "loss": 0.4372, + "step": 48935 + }, + { + "epoch": 0.845590268178048, + "grad_norm": 0.6971128368889307, + "learning_rate": 1.2246673857882009e-06, + "loss": 0.5133, + "step": 48936 + }, + { + "epoch": 0.845607547691457, + "grad_norm": 1.308763658210039, + "learning_rate": 1.22439903875326e-06, + "loss": 0.3456, + "step": 48937 + }, + { + "epoch": 0.8456248272048659, + "grad_norm": 1.531188904430673, + "learning_rate": 1.2241307192041697e-06, + "loss": 0.4389, + "step": 48938 + }, + { + "epoch": 0.8456421067182748, + "grad_norm": 1.519483302556264, + "learning_rate": 1.2238624271417698e-06, + "loss": 0.4827, + "step": 48939 + }, + { + "epoch": 0.8456593862316837, + "grad_norm": 1.0106273167676914, + "learning_rate": 1.2235941625668978e-06, + "loss": 0.7962, + "step": 48940 + }, + { + "epoch": 0.8456766657450926, + "grad_norm": 1.155862378217158, + "learning_rate": 1.2233259254803987e-06, + "loss": 0.3821, + "step": 48941 + }, + { + "epoch": 0.8456939452585015, + "grad_norm": 1.5628286743740285, + "learning_rate": 1.223057715883107e-06, + "loss": 0.3941, + "step": 48942 + }, + { + "epoch": 0.8457112247719104, + "grad_norm": 1.2009167285124651, + "learning_rate": 1.2227895337758667e-06, + "loss": 0.754, + "step": 48943 + }, + { + "epoch": 0.8457285042853193, + "grad_norm": 1.5204302061320443, + "learning_rate": 1.222521379159517e-06, + "loss": 0.398, + "step": 48944 + }, + { + "epoch": 0.8457457837987282, + "grad_norm": 2.26352457577279, + "learning_rate": 1.2222532520348962e-06, + "loss": 0.282, + "step": 48945 + }, + { + "epoch": 0.8457630633121371, + "grad_norm": 1.60596189316315, + "learning_rate": 1.221985152402848e-06, + "loss": 0.3675, + "step": 48946 + }, + { + "epoch": 0.845780342825546, + "grad_norm": 0.9298594985266156, + "learning_rate": 1.2217170802642065e-06, + "loss": 0.5639, + "step": 48947 + }, + { + "epoch": 0.845797622338955, + "grad_norm": 0.8609082405890853, + "learning_rate": 1.2214490356198138e-06, + "loss": 0.1554, + "step": 48948 + }, + { + "epoch": 0.8458149018523639, + "grad_norm": 1.4081855288815657, + "learning_rate": 1.221181018470511e-06, + "loss": 0.4359, + "step": 48949 + }, + { + "epoch": 0.8458321813657728, + "grad_norm": 1.7594457199612061, + "learning_rate": 1.2209130288171378e-06, + "loss": 0.4041, + "step": 48950 + }, + { + "epoch": 0.8458494608791817, + "grad_norm": 1.1926908466620088, + "learning_rate": 1.2206450666605307e-06, + "loss": 0.4582, + "step": 48951 + }, + { + "epoch": 0.8458667403925906, + "grad_norm": 1.1129477735787976, + "learning_rate": 1.2203771320015323e-06, + "loss": 0.4038, + "step": 48952 + }, + { + "epoch": 0.8458840199059995, + "grad_norm": 1.695928106417174, + "learning_rate": 1.2201092248409775e-06, + "loss": 0.2626, + "step": 48953 + }, + { + "epoch": 0.8459012994194084, + "grad_norm": 1.7451150083318954, + "learning_rate": 1.2198413451797075e-06, + "loss": 0.2578, + "step": 48954 + }, + { + "epoch": 0.8459185789328173, + "grad_norm": 0.9486382509160162, + "learning_rate": 1.2195734930185642e-06, + "loss": 0.2464, + "step": 48955 + }, + { + "epoch": 0.8459358584462262, + "grad_norm": 0.9721075879678631, + "learning_rate": 1.2193056683583825e-06, + "loss": 0.4465, + "step": 48956 + }, + { + "epoch": 0.845953137959635, + "grad_norm": 1.4838965788866743, + "learning_rate": 1.219037871200004e-06, + "loss": 0.3799, + "step": 48957 + }, + { + "epoch": 0.8459704174730439, + "grad_norm": 0.9874048921445907, + "learning_rate": 1.2187701015442644e-06, + "loss": 0.2636, + "step": 48958 + }, + { + "epoch": 0.8459876969864528, + "grad_norm": 1.150606219327412, + "learning_rate": 1.2185023593920041e-06, + "loss": 0.4847, + "step": 48959 + }, + { + "epoch": 0.8460049764998617, + "grad_norm": 2.2072761870239734, + "learning_rate": 1.2182346447440629e-06, + "loss": 0.3144, + "step": 48960 + }, + { + "epoch": 0.8460222560132706, + "grad_norm": 1.7796872151995973, + "learning_rate": 1.2179669576012787e-06, + "loss": 0.3126, + "step": 48961 + }, + { + "epoch": 0.8460395355266795, + "grad_norm": 2.142277727854574, + "learning_rate": 1.2176992979644886e-06, + "loss": 0.2783, + "step": 48962 + }, + { + "epoch": 0.8460568150400885, + "grad_norm": 1.6094104305354393, + "learning_rate": 1.217431665834533e-06, + "loss": 0.2754, + "step": 48963 + }, + { + "epoch": 0.8460740945534974, + "grad_norm": 1.815766360374536, + "learning_rate": 1.2171640612122471e-06, + "loss": 0.2875, + "step": 48964 + }, + { + "epoch": 0.8460913740669063, + "grad_norm": 1.4299692488816647, + "learning_rate": 1.2168964840984698e-06, + "loss": 0.2463, + "step": 48965 + }, + { + "epoch": 0.8461086535803152, + "grad_norm": 1.233818920902223, + "learning_rate": 1.216628934494044e-06, + "loss": 0.328, + "step": 48966 + }, + { + "epoch": 0.8461259330937241, + "grad_norm": 1.1410510096595008, + "learning_rate": 1.2163614123998003e-06, + "loss": 0.418, + "step": 48967 + }, + { + "epoch": 0.846143212607133, + "grad_norm": 0.8431019201508457, + "learning_rate": 1.2160939178165831e-06, + "loss": 0.2157, + "step": 48968 + }, + { + "epoch": 0.8461604921205419, + "grad_norm": 1.140105459851598, + "learning_rate": 1.2158264507452256e-06, + "loss": 0.1711, + "step": 48969 + }, + { + "epoch": 0.8461777716339508, + "grad_norm": 1.4095769215646083, + "learning_rate": 1.2155590111865678e-06, + "loss": 0.3161, + "step": 48970 + }, + { + "epoch": 0.8461950511473597, + "grad_norm": 1.8289883945282064, + "learning_rate": 1.2152915991414439e-06, + "loss": 0.4176, + "step": 48971 + }, + { + "epoch": 0.8462123306607686, + "grad_norm": 2.1190673006862726, + "learning_rate": 1.2150242146106982e-06, + "loss": 0.3019, + "step": 48972 + }, + { + "epoch": 0.8462296101741775, + "grad_norm": 1.3818179651147653, + "learning_rate": 1.2147568575951619e-06, + "loss": 0.4711, + "step": 48973 + }, + { + "epoch": 0.8462468896875864, + "grad_norm": 1.5143753492162666, + "learning_rate": 1.2144895280956782e-06, + "loss": 0.3681, + "step": 48974 + }, + { + "epoch": 0.8462641692009953, + "grad_norm": 2.3231360467428566, + "learning_rate": 1.2142222261130799e-06, + "loss": 0.3068, + "step": 48975 + }, + { + "epoch": 0.8462814487144042, + "grad_norm": 1.4734362910773686, + "learning_rate": 1.213954951648203e-06, + "loss": 0.3262, + "step": 48976 + }, + { + "epoch": 0.8462987282278132, + "grad_norm": 1.93169445331211, + "learning_rate": 1.2136877047018869e-06, + "loss": 0.4336, + "step": 48977 + }, + { + "epoch": 0.846316007741222, + "grad_norm": 1.2981043056444157, + "learning_rate": 1.2134204852749687e-06, + "loss": 0.3624, + "step": 48978 + }, + { + "epoch": 0.8463332872546309, + "grad_norm": 1.295899757548307, + "learning_rate": 1.2131532933682878e-06, + "loss": 0.5504, + "step": 48979 + }, + { + "epoch": 0.8463505667680398, + "grad_norm": 1.3661722991952368, + "learning_rate": 1.2128861289826755e-06, + "loss": 0.39, + "step": 48980 + }, + { + "epoch": 0.8463678462814487, + "grad_norm": 1.856976158428009, + "learning_rate": 1.2126189921189746e-06, + "loss": 0.47, + "step": 48981 + }, + { + "epoch": 0.8463851257948576, + "grad_norm": 1.1357390429901586, + "learning_rate": 1.2123518827780167e-06, + "loss": 0.2747, + "step": 48982 + }, + { + "epoch": 0.8464024053082665, + "grad_norm": 1.225461176912044, + "learning_rate": 1.2120848009606401e-06, + "loss": 0.4023, + "step": 48983 + }, + { + "epoch": 0.8464196848216754, + "grad_norm": 1.1534945313577971, + "learning_rate": 1.2118177466676828e-06, + "loss": 0.3494, + "step": 48984 + }, + { + "epoch": 0.8464369643350843, + "grad_norm": 1.522270191720189, + "learning_rate": 1.211550719899981e-06, + "loss": 0.5499, + "step": 48985 + }, + { + "epoch": 0.8464542438484932, + "grad_norm": 1.3311887791606423, + "learning_rate": 1.2112837206583716e-06, + "loss": 0.3556, + "step": 48986 + }, + { + "epoch": 0.8464715233619021, + "grad_norm": 2.5439696066568374, + "learning_rate": 1.2110167489436863e-06, + "loss": 0.4346, + "step": 48987 + }, + { + "epoch": 0.846488802875311, + "grad_norm": 0.8785053004374013, + "learning_rate": 1.2107498047567646e-06, + "loss": 0.1694, + "step": 48988 + }, + { + "epoch": 0.8465060823887199, + "grad_norm": 1.3933238514942037, + "learning_rate": 1.210482888098443e-06, + "loss": 0.4105, + "step": 48989 + }, + { + "epoch": 0.8465233619021288, + "grad_norm": 0.8121562321029281, + "learning_rate": 1.2102159989695583e-06, + "loss": 0.6474, + "step": 48990 + }, + { + "epoch": 0.8465406414155378, + "grad_norm": 0.9039541432245219, + "learning_rate": 1.2099491373709438e-06, + "loss": 0.3008, + "step": 48991 + }, + { + "epoch": 0.8465579209289467, + "grad_norm": 1.5916046099178407, + "learning_rate": 1.2096823033034389e-06, + "loss": 0.3124, + "step": 48992 + }, + { + "epoch": 0.8465752004423556, + "grad_norm": 1.0555453772910293, + "learning_rate": 1.209415496767874e-06, + "loss": 0.4429, + "step": 48993 + }, + { + "epoch": 0.8465924799557645, + "grad_norm": 1.7148649667395752, + "learning_rate": 1.2091487177650884e-06, + "loss": 0.2142, + "step": 48994 + }, + { + "epoch": 0.8466097594691734, + "grad_norm": 2.441033294661378, + "learning_rate": 1.2088819662959162e-06, + "loss": 0.4665, + "step": 48995 + }, + { + "epoch": 0.8466270389825823, + "grad_norm": 1.2919171638808389, + "learning_rate": 1.2086152423611975e-06, + "loss": 0.1866, + "step": 48996 + }, + { + "epoch": 0.8466443184959912, + "grad_norm": 1.2126881219404906, + "learning_rate": 1.2083485459617627e-06, + "loss": 0.2303, + "step": 48997 + }, + { + "epoch": 0.8466615980094001, + "grad_norm": 1.6394898562565599, + "learning_rate": 1.208081877098447e-06, + "loss": 0.5189, + "step": 48998 + }, + { + "epoch": 0.8466788775228089, + "grad_norm": 1.1945190052453216, + "learning_rate": 1.2078152357720862e-06, + "loss": 0.2349, + "step": 48999 + }, + { + "epoch": 0.8466961570362178, + "grad_norm": 1.6525567295153922, + "learning_rate": 1.2075486219835174e-06, + "loss": 0.4601, + "step": 49000 + }, + { + "epoch": 0.8467134365496267, + "grad_norm": 1.1390899708101654, + "learning_rate": 1.2072820357335757e-06, + "loss": 0.5132, + "step": 49001 + }, + { + "epoch": 0.8467307160630356, + "grad_norm": 1.0137459881282345, + "learning_rate": 1.2070154770230935e-06, + "loss": 0.2906, + "step": 49002 + }, + { + "epoch": 0.8467479955764445, + "grad_norm": 0.8707439794470477, + "learning_rate": 1.206748945852908e-06, + "loss": 0.1956, + "step": 49003 + }, + { + "epoch": 0.8467652750898534, + "grad_norm": 1.2088081628794929, + "learning_rate": 1.2064824422238518e-06, + "loss": 0.4493, + "step": 49004 + }, + { + "epoch": 0.8467825546032623, + "grad_norm": 3.2266942104730774, + "learning_rate": 1.2062159661367612e-06, + "loss": 0.2835, + "step": 49005 + }, + { + "epoch": 0.8467998341166713, + "grad_norm": 3.9390532677595065, + "learning_rate": 1.2059495175924717e-06, + "loss": 0.3459, + "step": 49006 + }, + { + "epoch": 0.8468171136300802, + "grad_norm": 1.2660514600923698, + "learning_rate": 1.2056830965918154e-06, + "loss": 0.3875, + "step": 49007 + }, + { + "epoch": 0.8468343931434891, + "grad_norm": 1.3743243390420403, + "learning_rate": 1.2054167031356302e-06, + "loss": 0.4837, + "step": 49008 + }, + { + "epoch": 0.846851672656898, + "grad_norm": 1.3193487639602457, + "learning_rate": 1.2051503372247452e-06, + "loss": 0.3309, + "step": 49009 + }, + { + "epoch": 0.8468689521703069, + "grad_norm": 1.4550661598799997, + "learning_rate": 1.2048839988600003e-06, + "loss": 0.2694, + "step": 49010 + }, + { + "epoch": 0.8468862316837158, + "grad_norm": 0.9445747399536794, + "learning_rate": 1.2046176880422234e-06, + "loss": 0.2261, + "step": 49011 + }, + { + "epoch": 0.8469035111971247, + "grad_norm": 1.3774045957732575, + "learning_rate": 1.204351404772256e-06, + "loss": 0.4975, + "step": 49012 + }, + { + "epoch": 0.8469207907105336, + "grad_norm": 0.703969656712115, + "learning_rate": 1.2040851490509264e-06, + "loss": 0.7838, + "step": 49013 + }, + { + "epoch": 0.8469380702239425, + "grad_norm": 1.0258132784024403, + "learning_rate": 1.2038189208790718e-06, + "loss": 0.3111, + "step": 49014 + }, + { + "epoch": 0.8469553497373514, + "grad_norm": 1.6786039190672262, + "learning_rate": 1.2035527202575237e-06, + "loss": 0.3776, + "step": 49015 + }, + { + "epoch": 0.8469726292507603, + "grad_norm": 1.049517972284244, + "learning_rate": 1.203286547187119e-06, + "loss": 0.2546, + "step": 49016 + }, + { + "epoch": 0.8469899087641692, + "grad_norm": 0.8601106175389224, + "learning_rate": 1.2030204016686876e-06, + "loss": 0.2502, + "step": 49017 + }, + { + "epoch": 0.8470071882775781, + "grad_norm": 1.919590921690623, + "learning_rate": 1.2027542837030649e-06, + "loss": 0.2922, + "step": 49018 + }, + { + "epoch": 0.8470244677909871, + "grad_norm": 0.7895326557641786, + "learning_rate": 1.2024881932910858e-06, + "loss": 0.3562, + "step": 49019 + }, + { + "epoch": 0.8470417473043959, + "grad_norm": 0.5166343439949365, + "learning_rate": 1.2022221304335802e-06, + "loss": 0.6242, + "step": 49020 + }, + { + "epoch": 0.8470590268178048, + "grad_norm": 1.237537354062981, + "learning_rate": 1.2019560951313858e-06, + "loss": 0.3414, + "step": 49021 + }, + { + "epoch": 0.8470763063312137, + "grad_norm": 1.2237859168700203, + "learning_rate": 1.201690087385332e-06, + "loss": 0.1789, + "step": 49022 + }, + { + "epoch": 0.8470935858446226, + "grad_norm": 1.2480336437844328, + "learning_rate": 1.2014241071962528e-06, + "loss": 0.3336, + "step": 49023 + }, + { + "epoch": 0.8471108653580315, + "grad_norm": 1.221233615498259, + "learning_rate": 1.201158154564982e-06, + "loss": 0.3911, + "step": 49024 + }, + { + "epoch": 0.8471281448714404, + "grad_norm": 0.8150285529880192, + "learning_rate": 1.200892229492354e-06, + "loss": 0.2904, + "step": 49025 + }, + { + "epoch": 0.8471454243848493, + "grad_norm": 0.5188500930411456, + "learning_rate": 1.2006263319792011e-06, + "loss": 0.6524, + "step": 49026 + }, + { + "epoch": 0.8471627038982582, + "grad_norm": 1.509545346538108, + "learning_rate": 1.2003604620263532e-06, + "loss": 0.5666, + "step": 49027 + }, + { + "epoch": 0.8471799834116671, + "grad_norm": 0.7797570744555173, + "learning_rate": 1.2000946196346452e-06, + "loss": 0.4771, + "step": 49028 + }, + { + "epoch": 0.847197262925076, + "grad_norm": 3.0316665740138067, + "learning_rate": 1.199828804804909e-06, + "loss": 0.311, + "step": 49029 + }, + { + "epoch": 0.8472145424384849, + "grad_norm": 0.9427212235263716, + "learning_rate": 1.1995630175379802e-06, + "loss": 0.3795, + "step": 49030 + }, + { + "epoch": 0.8472318219518938, + "grad_norm": 1.5628896822221265, + "learning_rate": 1.1992972578346872e-06, + "loss": 0.3893, + "step": 49031 + }, + { + "epoch": 0.8472491014653027, + "grad_norm": 1.7789120780618202, + "learning_rate": 1.199031525695865e-06, + "loss": 0.4094, + "step": 49032 + }, + { + "epoch": 0.8472663809787117, + "grad_norm": 1.7486770933545421, + "learning_rate": 1.198765821122344e-06, + "loss": 0.2787, + "step": 49033 + }, + { + "epoch": 0.8472836604921206, + "grad_norm": 1.0383822215767318, + "learning_rate": 1.198500144114957e-06, + "loss": 0.296, + "step": 49034 + }, + { + "epoch": 0.8473009400055295, + "grad_norm": 1.656153721833542, + "learning_rate": 1.1982344946745362e-06, + "loss": 0.4216, + "step": 49035 + }, + { + "epoch": 0.8473182195189384, + "grad_norm": 1.4419342992901334, + "learning_rate": 1.1979688728019157e-06, + "loss": 0.2558, + "step": 49036 + }, + { + "epoch": 0.8473354990323473, + "grad_norm": 1.2604917174728152, + "learning_rate": 1.1977032784979258e-06, + "loss": 0.3094, + "step": 49037 + }, + { + "epoch": 0.8473527785457562, + "grad_norm": 1.1662137059596265, + "learning_rate": 1.1974377117633961e-06, + "loss": 0.3282, + "step": 49038 + }, + { + "epoch": 0.8473700580591651, + "grad_norm": 1.4477396421569748, + "learning_rate": 1.1971721725991615e-06, + "loss": 0.5838, + "step": 49039 + }, + { + "epoch": 0.847387337572574, + "grad_norm": 0.5057426321497458, + "learning_rate": 1.1969066610060508e-06, + "loss": 0.6915, + "step": 49040 + }, + { + "epoch": 0.8474046170859829, + "grad_norm": 0.5973816179867433, + "learning_rate": 1.1966411769849007e-06, + "loss": 0.4823, + "step": 49041 + }, + { + "epoch": 0.8474218965993917, + "grad_norm": 0.7808250569841778, + "learning_rate": 1.1963757205365379e-06, + "loss": 0.2671, + "step": 49042 + }, + { + "epoch": 0.8474391761128006, + "grad_norm": 1.2328862391670974, + "learning_rate": 1.1961102916617962e-06, + "loss": 0.323, + "step": 49043 + }, + { + "epoch": 0.8474564556262095, + "grad_norm": 1.627244162049518, + "learning_rate": 1.195844890361505e-06, + "loss": 0.4498, + "step": 49044 + }, + { + "epoch": 0.8474737351396184, + "grad_norm": 1.0559323647072585, + "learning_rate": 1.1955795166364958e-06, + "loss": 0.3802, + "step": 49045 + }, + { + "epoch": 0.8474910146530273, + "grad_norm": 3.383025402676966, + "learning_rate": 1.1953141704876015e-06, + "loss": 0.27, + "step": 49046 + }, + { + "epoch": 0.8475082941664362, + "grad_norm": 1.7838930774686226, + "learning_rate": 1.1950488519156533e-06, + "loss": 0.3199, + "step": 49047 + }, + { + "epoch": 0.8475255736798452, + "grad_norm": 1.1164227849886674, + "learning_rate": 1.194783560921483e-06, + "loss": 0.5717, + "step": 49048 + }, + { + "epoch": 0.8475428531932541, + "grad_norm": 1.2301904318047612, + "learning_rate": 1.1945182975059166e-06, + "loss": 0.3566, + "step": 49049 + }, + { + "epoch": 0.847560132706663, + "grad_norm": 1.3517130060827967, + "learning_rate": 1.19425306166979e-06, + "loss": 0.2338, + "step": 49050 + }, + { + "epoch": 0.8475774122200719, + "grad_norm": 2.240875598412657, + "learning_rate": 1.1939878534139283e-06, + "loss": 0.2373, + "step": 49051 + }, + { + "epoch": 0.8475946917334808, + "grad_norm": 1.0269211829804747, + "learning_rate": 1.1937226727391692e-06, + "loss": 0.3447, + "step": 49052 + }, + { + "epoch": 0.8476119712468897, + "grad_norm": 1.4030558415988952, + "learning_rate": 1.1934575196463393e-06, + "loss": 0.2986, + "step": 49053 + }, + { + "epoch": 0.8476292507602986, + "grad_norm": 0.7508176967588392, + "learning_rate": 1.193192394136271e-06, + "loss": 0.6137, + "step": 49054 + }, + { + "epoch": 0.8476465302737075, + "grad_norm": 1.1995858769293988, + "learning_rate": 1.1929272962097903e-06, + "loss": 0.2923, + "step": 49055 + }, + { + "epoch": 0.8476638097871164, + "grad_norm": 1.6635886063139447, + "learning_rate": 1.1926622258677345e-06, + "loss": 0.4381, + "step": 49056 + }, + { + "epoch": 0.8476810893005253, + "grad_norm": 1.098458619618245, + "learning_rate": 1.192397183110927e-06, + "loss": 0.6419, + "step": 49057 + }, + { + "epoch": 0.8476983688139342, + "grad_norm": 1.253467608741745, + "learning_rate": 1.192132167940201e-06, + "loss": 0.2511, + "step": 49058 + }, + { + "epoch": 0.8477156483273431, + "grad_norm": 1.137926418135094, + "learning_rate": 1.1918671803563886e-06, + "loss": 0.2694, + "step": 49059 + }, + { + "epoch": 0.847732927840752, + "grad_norm": 1.0004521788560394, + "learning_rate": 1.191602220360315e-06, + "loss": 0.3039, + "step": 49060 + }, + { + "epoch": 0.847750207354161, + "grad_norm": 1.1077546411196348, + "learning_rate": 1.191337287952815e-06, + "loss": 0.334, + "step": 49061 + }, + { + "epoch": 0.8477674868675699, + "grad_norm": 1.718275319149506, + "learning_rate": 1.1910723831347149e-06, + "loss": 0.2795, + "step": 49062 + }, + { + "epoch": 0.8477847663809787, + "grad_norm": 1.1254957356145276, + "learning_rate": 1.1908075059068446e-06, + "loss": 0.385, + "step": 49063 + }, + { + "epoch": 0.8478020458943876, + "grad_norm": 0.9622166581113789, + "learning_rate": 1.190542656270035e-06, + "loss": 0.3197, + "step": 49064 + }, + { + "epoch": 0.8478193254077965, + "grad_norm": 1.331878397128014, + "learning_rate": 1.1902778342251165e-06, + "loss": 0.6069, + "step": 49065 + }, + { + "epoch": 0.8478366049212054, + "grad_norm": 1.3828161333323419, + "learning_rate": 1.1900130397729159e-06, + "loss": 0.3014, + "step": 49066 + }, + { + "epoch": 0.8478538844346143, + "grad_norm": 0.6432221552899905, + "learning_rate": 1.1897482729142662e-06, + "loss": 0.2727, + "step": 49067 + }, + { + "epoch": 0.8478711639480232, + "grad_norm": 1.577071537100358, + "learning_rate": 1.1894835336499922e-06, + "loss": 0.3366, + "step": 49068 + }, + { + "epoch": 0.8478884434614321, + "grad_norm": 1.3407577708875194, + "learning_rate": 1.1892188219809264e-06, + "loss": 0.2931, + "step": 49069 + }, + { + "epoch": 0.847905722974841, + "grad_norm": 1.5788259832656266, + "learning_rate": 1.1889541379078973e-06, + "loss": 0.3685, + "step": 49070 + }, + { + "epoch": 0.8479230024882499, + "grad_norm": 1.3831283735855568, + "learning_rate": 1.1886894814317319e-06, + "loss": 0.2969, + "step": 49071 + }, + { + "epoch": 0.8479402820016588, + "grad_norm": 1.7464421044040326, + "learning_rate": 1.1884248525532638e-06, + "loss": 0.2463, + "step": 49072 + }, + { + "epoch": 0.8479575615150677, + "grad_norm": 1.3188623937295, + "learning_rate": 1.188160251273315e-06, + "loss": 0.3209, + "step": 49073 + }, + { + "epoch": 0.8479748410284766, + "grad_norm": 1.3332836880457852, + "learning_rate": 1.1878956775927187e-06, + "loss": 0.4263, + "step": 49074 + }, + { + "epoch": 0.8479921205418856, + "grad_norm": 1.0866039810589236, + "learning_rate": 1.1876311315123036e-06, + "loss": 0.6101, + "step": 49075 + }, + { + "epoch": 0.8480094000552945, + "grad_norm": 1.167703112721288, + "learning_rate": 1.1873666130328987e-06, + "loss": 0.2437, + "step": 49076 + }, + { + "epoch": 0.8480266795687034, + "grad_norm": 1.3859086589238716, + "learning_rate": 1.1871021221553292e-06, + "loss": 0.3899, + "step": 49077 + }, + { + "epoch": 0.8480439590821123, + "grad_norm": 2.672504580377813, + "learning_rate": 1.1868376588804275e-06, + "loss": 0.5091, + "step": 49078 + }, + { + "epoch": 0.8480612385955212, + "grad_norm": 1.4808680897937394, + "learning_rate": 1.1865732232090189e-06, + "loss": 0.3481, + "step": 49079 + }, + { + "epoch": 0.8480785181089301, + "grad_norm": 0.9109831611950898, + "learning_rate": 1.186308815141932e-06, + "loss": 0.2158, + "step": 49080 + }, + { + "epoch": 0.848095797622339, + "grad_norm": 1.5801301211511356, + "learning_rate": 1.1860444346799983e-06, + "loss": 0.3741, + "step": 49081 + }, + { + "epoch": 0.8481130771357479, + "grad_norm": 1.3096277387613904, + "learning_rate": 1.1857800818240407e-06, + "loss": 0.4504, + "step": 49082 + }, + { + "epoch": 0.8481303566491568, + "grad_norm": 1.4534142707175024, + "learning_rate": 1.185515756574892e-06, + "loss": 0.5777, + "step": 49083 + }, + { + "epoch": 0.8481476361625656, + "grad_norm": 1.779557440563825, + "learning_rate": 1.1852514589333752e-06, + "loss": 0.2192, + "step": 49084 + }, + { + "epoch": 0.8481649156759745, + "grad_norm": 1.9398971866901524, + "learning_rate": 1.1849871889003217e-06, + "loss": 0.3869, + "step": 49085 + }, + { + "epoch": 0.8481821951893834, + "grad_norm": 0.9431220213509867, + "learning_rate": 1.1847229464765575e-06, + "loss": 0.312, + "step": 49086 + }, + { + "epoch": 0.8481994747027923, + "grad_norm": 0.9312031632245958, + "learning_rate": 1.184458731662913e-06, + "loss": 0.3171, + "step": 49087 + }, + { + "epoch": 0.8482167542162012, + "grad_norm": 1.437367457986666, + "learning_rate": 1.1841945444602143e-06, + "loss": 0.3153, + "step": 49088 + }, + { + "epoch": 0.8482340337296101, + "grad_norm": 1.186865954966664, + "learning_rate": 1.1839303848692863e-06, + "loss": 0.4559, + "step": 49089 + }, + { + "epoch": 0.8482513132430191, + "grad_norm": 0.933257503605595, + "learning_rate": 1.1836662528909592e-06, + "loss": 0.2635, + "step": 49090 + }, + { + "epoch": 0.848268592756428, + "grad_norm": 1.459233407497824, + "learning_rate": 1.1834021485260571e-06, + "loss": 0.4057, + "step": 49091 + }, + { + "epoch": 0.8482858722698369, + "grad_norm": 1.732887328422707, + "learning_rate": 1.1831380717754126e-06, + "loss": 0.2237, + "step": 49092 + }, + { + "epoch": 0.8483031517832458, + "grad_norm": 1.2708174875591074, + "learning_rate": 1.1828740226398471e-06, + "loss": 0.3817, + "step": 49093 + }, + { + "epoch": 0.8483204312966547, + "grad_norm": 1.0213509619291639, + "learning_rate": 1.1826100011201936e-06, + "loss": 0.3095, + "step": 49094 + }, + { + "epoch": 0.8483377108100636, + "grad_norm": 0.5759590190591989, + "learning_rate": 1.182346007217272e-06, + "loss": 0.444, + "step": 49095 + }, + { + "epoch": 0.8483549903234725, + "grad_norm": 1.252569616633439, + "learning_rate": 1.1820820409319166e-06, + "loss": 0.4739, + "step": 49096 + }, + { + "epoch": 0.8483722698368814, + "grad_norm": 1.4700960611732794, + "learning_rate": 1.1818181022649456e-06, + "loss": 0.4341, + "step": 49097 + }, + { + "epoch": 0.8483895493502903, + "grad_norm": 1.1056987763594064, + "learning_rate": 1.1815541912171934e-06, + "loss": 0.2236, + "step": 49098 + }, + { + "epoch": 0.8484068288636992, + "grad_norm": 0.7206868492288603, + "learning_rate": 1.1812903077894854e-06, + "loss": 0.8139, + "step": 49099 + }, + { + "epoch": 0.8484241083771081, + "grad_norm": 1.1215001203286545, + "learning_rate": 1.181026451982643e-06, + "loss": 0.4957, + "step": 49100 + }, + { + "epoch": 0.848441387890517, + "grad_norm": 1.6856011790901242, + "learning_rate": 1.180762623797499e-06, + "loss": 0.3045, + "step": 49101 + }, + { + "epoch": 0.848458667403926, + "grad_norm": 1.1306796025884058, + "learning_rate": 1.1804988232348746e-06, + "loss": 0.5877, + "step": 49102 + }, + { + "epoch": 0.8484759469173349, + "grad_norm": 1.314616377785186, + "learning_rate": 1.1802350502955972e-06, + "loss": 0.4022, + "step": 49103 + }, + { + "epoch": 0.8484932264307438, + "grad_norm": 1.3756270475800525, + "learning_rate": 1.179971304980495e-06, + "loss": 0.3337, + "step": 49104 + }, + { + "epoch": 0.8485105059441526, + "grad_norm": 0.9379179822500223, + "learning_rate": 1.179707587290394e-06, + "loss": 0.2904, + "step": 49105 + }, + { + "epoch": 0.8485277854575615, + "grad_norm": 0.9605161606734474, + "learning_rate": 1.179443897226118e-06, + "loss": 0.6428, + "step": 49106 + }, + { + "epoch": 0.8485450649709704, + "grad_norm": 3.0274503208008174, + "learning_rate": 1.1791802347884961e-06, + "loss": 0.5294, + "step": 49107 + }, + { + "epoch": 0.8485623444843793, + "grad_norm": 1.2885502014720227, + "learning_rate": 1.1789165999783491e-06, + "loss": 0.3117, + "step": 49108 + }, + { + "epoch": 0.8485796239977882, + "grad_norm": 1.1412640802081684, + "learning_rate": 1.1786529927965061e-06, + "loss": 0.2791, + "step": 49109 + }, + { + "epoch": 0.8485969035111971, + "grad_norm": 1.4145209860336316, + "learning_rate": 1.1783894132437946e-06, + "loss": 0.368, + "step": 49110 + }, + { + "epoch": 0.848614183024606, + "grad_norm": 1.2816407052374557, + "learning_rate": 1.1781258613210355e-06, + "loss": 0.3626, + "step": 49111 + }, + { + "epoch": 0.8486314625380149, + "grad_norm": 0.6289140379789651, + "learning_rate": 1.1778623370290598e-06, + "loss": 0.5205, + "step": 49112 + }, + { + "epoch": 0.8486487420514238, + "grad_norm": 1.148081193091893, + "learning_rate": 1.1775988403686867e-06, + "loss": 0.3904, + "step": 49113 + }, + { + "epoch": 0.8486660215648327, + "grad_norm": 2.0689625816406116, + "learning_rate": 1.1773353713407442e-06, + "loss": 0.2846, + "step": 49114 + }, + { + "epoch": 0.8486833010782416, + "grad_norm": 1.2427715636252679, + "learning_rate": 1.1770719299460588e-06, + "loss": 0.4204, + "step": 49115 + }, + { + "epoch": 0.8487005805916505, + "grad_norm": 1.1281710009327903, + "learning_rate": 1.1768085161854558e-06, + "loss": 0.3835, + "step": 49116 + }, + { + "epoch": 0.8487178601050595, + "grad_norm": 1.318931908194396, + "learning_rate": 1.1765451300597574e-06, + "loss": 0.1993, + "step": 49117 + }, + { + "epoch": 0.8487351396184684, + "grad_norm": 1.5584176533618088, + "learning_rate": 1.1762817715697916e-06, + "loss": 0.464, + "step": 49118 + }, + { + "epoch": 0.8487524191318773, + "grad_norm": 1.428676448113837, + "learning_rate": 1.176018440716381e-06, + "loss": 0.5139, + "step": 49119 + }, + { + "epoch": 0.8487696986452862, + "grad_norm": 1.376809744417691, + "learning_rate": 1.1757551375003506e-06, + "loss": 0.4623, + "step": 49120 + }, + { + "epoch": 0.8487869781586951, + "grad_norm": 1.2251082533561535, + "learning_rate": 1.1754918619225275e-06, + "loss": 0.3439, + "step": 49121 + }, + { + "epoch": 0.848804257672104, + "grad_norm": 1.3351771535006278, + "learning_rate": 1.1752286139837332e-06, + "loss": 0.1846, + "step": 49122 + }, + { + "epoch": 0.8488215371855129, + "grad_norm": 1.2446561592588699, + "learning_rate": 1.174965393684795e-06, + "loss": 0.3124, + "step": 49123 + }, + { + "epoch": 0.8488388166989218, + "grad_norm": 0.9426374751057239, + "learning_rate": 1.1747022010265341e-06, + "loss": 0.9703, + "step": 49124 + }, + { + "epoch": 0.8488560962123307, + "grad_norm": 1.3708924100915538, + "learning_rate": 1.1744390360097767e-06, + "loss": 0.2406, + "step": 49125 + }, + { + "epoch": 0.8488733757257395, + "grad_norm": 2.4034037744903074, + "learning_rate": 1.1741758986353469e-06, + "loss": 0.4614, + "step": 49126 + }, + { + "epoch": 0.8488906552391484, + "grad_norm": 1.705006967869697, + "learning_rate": 1.1739127889040701e-06, + "loss": 0.5548, + "step": 49127 + }, + { + "epoch": 0.8489079347525573, + "grad_norm": 0.7981584626682561, + "learning_rate": 1.1736497068167684e-06, + "loss": 0.3313, + "step": 49128 + }, + { + "epoch": 0.8489252142659662, + "grad_norm": 1.3275685362021798, + "learning_rate": 1.1733866523742676e-06, + "loss": 0.3118, + "step": 49129 + }, + { + "epoch": 0.8489424937793751, + "grad_norm": 1.0456181080293099, + "learning_rate": 1.1731236255773915e-06, + "loss": 0.1886, + "step": 49130 + }, + { + "epoch": 0.848959773292784, + "grad_norm": 1.0448542494915602, + "learning_rate": 1.172860626426958e-06, + "loss": 0.3888, + "step": 49131 + }, + { + "epoch": 0.848977052806193, + "grad_norm": 0.776989957542854, + "learning_rate": 1.1725976549238006e-06, + "loss": 0.5362, + "step": 49132 + }, + { + "epoch": 0.8489943323196019, + "grad_norm": 1.1518046021762955, + "learning_rate": 1.1723347110687367e-06, + "loss": 0.5501, + "step": 49133 + }, + { + "epoch": 0.8490116118330108, + "grad_norm": 0.5252010417627132, + "learning_rate": 1.1720717948625927e-06, + "loss": 0.5084, + "step": 49134 + }, + { + "epoch": 0.8490288913464197, + "grad_norm": 1.4770215968551428, + "learning_rate": 1.171808906306189e-06, + "loss": 0.2696, + "step": 49135 + }, + { + "epoch": 0.8490461708598286, + "grad_norm": 0.9017816766532915, + "learning_rate": 1.1715460454003524e-06, + "loss": 0.3333, + "step": 49136 + }, + { + "epoch": 0.8490634503732375, + "grad_norm": 0.8306212346133095, + "learning_rate": 1.1712832121459006e-06, + "loss": 0.1886, + "step": 49137 + }, + { + "epoch": 0.8490807298866464, + "grad_norm": 1.5887746747134666, + "learning_rate": 1.1710204065436647e-06, + "loss": 0.3649, + "step": 49138 + }, + { + "epoch": 0.8490980094000553, + "grad_norm": 0.7431863157131638, + "learning_rate": 1.1707576285944622e-06, + "loss": 0.3707, + "step": 49139 + }, + { + "epoch": 0.8491152889134642, + "grad_norm": 1.1870187307863977, + "learning_rate": 1.1704948782991188e-06, + "loss": 0.1303, + "step": 49140 + }, + { + "epoch": 0.8491325684268731, + "grad_norm": 0.9876813338651793, + "learning_rate": 1.1702321556584573e-06, + "loss": 0.5752, + "step": 49141 + }, + { + "epoch": 0.849149847940282, + "grad_norm": 1.075618374462576, + "learning_rate": 1.169969460673298e-06, + "loss": 0.3777, + "step": 49142 + }, + { + "epoch": 0.8491671274536909, + "grad_norm": 1.085447519211684, + "learning_rate": 1.169706793344465e-06, + "loss": 0.3519, + "step": 49143 + }, + { + "epoch": 0.8491844069670998, + "grad_norm": 1.0871398280514433, + "learning_rate": 1.1694441536727809e-06, + "loss": 0.3511, + "step": 49144 + }, + { + "epoch": 0.8492016864805088, + "grad_norm": 1.3046728606732436, + "learning_rate": 1.1691815416590713e-06, + "loss": 0.2481, + "step": 49145 + }, + { + "epoch": 0.8492189659939177, + "grad_norm": 1.2826327129593196, + "learning_rate": 1.168918957304155e-06, + "loss": 0.2431, + "step": 49146 + }, + { + "epoch": 0.8492362455073265, + "grad_norm": 1.1390429947838776, + "learning_rate": 1.1686564006088563e-06, + "loss": 0.3541, + "step": 49147 + }, + { + "epoch": 0.8492535250207354, + "grad_norm": 1.2593109334531192, + "learning_rate": 1.1683938715739961e-06, + "loss": 0.3267, + "step": 49148 + }, + { + "epoch": 0.8492708045341443, + "grad_norm": 1.4461303793481024, + "learning_rate": 1.1681313702003972e-06, + "loss": 0.3982, + "step": 49149 + }, + { + "epoch": 0.8492880840475532, + "grad_norm": 2.716348801724955, + "learning_rate": 1.167868896488883e-06, + "loss": 0.1722, + "step": 49150 + }, + { + "epoch": 0.8493053635609621, + "grad_norm": 1.4346379991186804, + "learning_rate": 1.167606450440274e-06, + "loss": 0.2465, + "step": 49151 + }, + { + "epoch": 0.849322643074371, + "grad_norm": 1.2500426405167258, + "learning_rate": 1.1673440320553941e-06, + "loss": 0.3885, + "step": 49152 + }, + { + "epoch": 0.8493399225877799, + "grad_norm": 1.1294168435199463, + "learning_rate": 1.1670816413350617e-06, + "loss": 0.4243, + "step": 49153 + }, + { + "epoch": 0.8493572021011888, + "grad_norm": 1.3296740313271316, + "learning_rate": 1.1668192782801013e-06, + "loss": 0.3179, + "step": 49154 + }, + { + "epoch": 0.8493744816145977, + "grad_norm": 0.9251782730231979, + "learning_rate": 1.1665569428913347e-06, + "loss": 0.2252, + "step": 49155 + }, + { + "epoch": 0.8493917611280066, + "grad_norm": 0.6733187793022215, + "learning_rate": 1.1662946351695848e-06, + "loss": 0.841, + "step": 49156 + }, + { + "epoch": 0.8494090406414155, + "grad_norm": 0.9385769989461183, + "learning_rate": 1.1660323551156682e-06, + "loss": 0.3007, + "step": 49157 + }, + { + "epoch": 0.8494263201548244, + "grad_norm": 1.1435273635633505, + "learning_rate": 1.1657701027304135e-06, + "loss": 0.2656, + "step": 49158 + }, + { + "epoch": 0.8494435996682334, + "grad_norm": 1.318602002771488, + "learning_rate": 1.1655078780146345e-06, + "loss": 0.443, + "step": 49159 + }, + { + "epoch": 0.8494608791816423, + "grad_norm": 1.2053962876524829, + "learning_rate": 1.165245680969157e-06, + "loss": 0.1471, + "step": 49160 + }, + { + "epoch": 0.8494781586950512, + "grad_norm": 1.024230169364138, + "learning_rate": 1.1649835115948038e-06, + "loss": 0.3233, + "step": 49161 + }, + { + "epoch": 0.8494954382084601, + "grad_norm": 0.9496902205078573, + "learning_rate": 1.164721369892392e-06, + "loss": 0.19, + "step": 49162 + }, + { + "epoch": 0.849512717721869, + "grad_norm": 0.8711416047654902, + "learning_rate": 1.1644592558627453e-06, + "loss": 0.3568, + "step": 49163 + }, + { + "epoch": 0.8495299972352779, + "grad_norm": 1.5723563284078925, + "learning_rate": 1.1641971695066822e-06, + "loss": 0.4424, + "step": 49164 + }, + { + "epoch": 0.8495472767486868, + "grad_norm": 1.4025697924348703, + "learning_rate": 1.1639351108250252e-06, + "loss": 0.4116, + "step": 49165 + }, + { + "epoch": 0.8495645562620957, + "grad_norm": 1.7162912295392663, + "learning_rate": 1.1636730798185947e-06, + "loss": 0.3414, + "step": 49166 + }, + { + "epoch": 0.8495818357755046, + "grad_norm": 2.426318619059539, + "learning_rate": 1.1634110764882134e-06, + "loss": 0.2345, + "step": 49167 + }, + { + "epoch": 0.8495991152889134, + "grad_norm": 1.20241132040744, + "learning_rate": 1.1631491008346984e-06, + "loss": 0.2965, + "step": 49168 + }, + { + "epoch": 0.8496163948023223, + "grad_norm": 1.8150083624651698, + "learning_rate": 1.1628871528588737e-06, + "loss": 0.3233, + "step": 49169 + }, + { + "epoch": 0.8496336743157312, + "grad_norm": 1.7243045274167659, + "learning_rate": 1.162625232561556e-06, + "loss": 0.4341, + "step": 49170 + }, + { + "epoch": 0.8496509538291401, + "grad_norm": 2.0447794514992905, + "learning_rate": 1.1623633399435686e-06, + "loss": 0.3225, + "step": 49171 + }, + { + "epoch": 0.849668233342549, + "grad_norm": 2.3119060885779157, + "learning_rate": 1.1621014750057325e-06, + "loss": 0.4581, + "step": 49172 + }, + { + "epoch": 0.849685512855958, + "grad_norm": 1.027800274998112, + "learning_rate": 1.1618396377488639e-06, + "loss": 0.5072, + "step": 49173 + }, + { + "epoch": 0.8497027923693669, + "grad_norm": 1.3378455161663332, + "learning_rate": 1.1615778281737877e-06, + "loss": 0.2952, + "step": 49174 + }, + { + "epoch": 0.8497200718827758, + "grad_norm": 0.8220633627994574, + "learning_rate": 1.1613160462813189e-06, + "loss": 0.6752, + "step": 49175 + }, + { + "epoch": 0.8497373513961847, + "grad_norm": 1.425175215712331, + "learning_rate": 1.1610542920722822e-06, + "loss": 0.3916, + "step": 49176 + }, + { + "epoch": 0.8497546309095936, + "grad_norm": 0.9046350474791227, + "learning_rate": 1.1607925655474927e-06, + "loss": 0.727, + "step": 49177 + }, + { + "epoch": 0.8497719104230025, + "grad_norm": 1.317761821937625, + "learning_rate": 1.160530866707773e-06, + "loss": 0.3305, + "step": 49178 + }, + { + "epoch": 0.8497891899364114, + "grad_norm": 0.8489312464607147, + "learning_rate": 1.1602691955539425e-06, + "loss": 0.2803, + "step": 49179 + }, + { + "epoch": 0.8498064694498203, + "grad_norm": 0.9349453833547031, + "learning_rate": 1.1600075520868226e-06, + "loss": 0.2818, + "step": 49180 + }, + { + "epoch": 0.8498237489632292, + "grad_norm": 1.9190426140452317, + "learning_rate": 1.1597459363072316e-06, + "loss": 0.3448, + "step": 49181 + }, + { + "epoch": 0.8498410284766381, + "grad_norm": 1.6566434974573683, + "learning_rate": 1.1594843482159846e-06, + "loss": 0.4585, + "step": 49182 + }, + { + "epoch": 0.849858307990047, + "grad_norm": 1.0856005193552067, + "learning_rate": 1.1592227878139061e-06, + "loss": 0.706, + "step": 49183 + }, + { + "epoch": 0.8498755875034559, + "grad_norm": 1.0575751813993, + "learning_rate": 1.1589612551018125e-06, + "loss": 0.2465, + "step": 49184 + }, + { + "epoch": 0.8498928670168648, + "grad_norm": 1.1967071339920479, + "learning_rate": 1.1586997500805263e-06, + "loss": 0.4022, + "step": 49185 + }, + { + "epoch": 0.8499101465302737, + "grad_norm": 2.8061614893098437, + "learning_rate": 1.1584382727508637e-06, + "loss": 0.5501, + "step": 49186 + }, + { + "epoch": 0.8499274260436827, + "grad_norm": 2.012491964210495, + "learning_rate": 1.1581768231136448e-06, + "loss": 0.4043, + "step": 49187 + }, + { + "epoch": 0.8499447055570916, + "grad_norm": 1.0594462227267734, + "learning_rate": 1.157915401169687e-06, + "loss": 0.2172, + "step": 49188 + }, + { + "epoch": 0.8499619850705005, + "grad_norm": 1.010893846631738, + "learning_rate": 1.1576540069198093e-06, + "loss": 0.3037, + "step": 49189 + }, + { + "epoch": 0.8499792645839093, + "grad_norm": 1.092600391592223, + "learning_rate": 1.1573926403648316e-06, + "loss": 0.2799, + "step": 49190 + }, + { + "epoch": 0.8499965440973182, + "grad_norm": 1.1685700724910404, + "learning_rate": 1.1571313015055741e-06, + "loss": 0.1736, + "step": 49191 + }, + { + "epoch": 0.8500138236107271, + "grad_norm": 1.1910824589640816, + "learning_rate": 1.1568699903428538e-06, + "loss": 0.3214, + "step": 49192 + }, + { + "epoch": 0.850031103124136, + "grad_norm": 1.0857420133151616, + "learning_rate": 1.1566087068774866e-06, + "loss": 0.3617, + "step": 49193 + }, + { + "epoch": 0.8500483826375449, + "grad_norm": 1.8507424322560595, + "learning_rate": 1.1563474511102923e-06, + "loss": 0.3916, + "step": 49194 + }, + { + "epoch": 0.8500656621509538, + "grad_norm": 0.8483944920185271, + "learning_rate": 1.1560862230420911e-06, + "loss": 0.3352, + "step": 49195 + }, + { + "epoch": 0.8500829416643627, + "grad_norm": 1.0863013074315606, + "learning_rate": 1.1558250226737011e-06, + "loss": 0.3269, + "step": 49196 + }, + { + "epoch": 0.8501002211777716, + "grad_norm": 1.7442648043130664, + "learning_rate": 1.1555638500059374e-06, + "loss": 0.4263, + "step": 49197 + }, + { + "epoch": 0.8501175006911805, + "grad_norm": 1.594000368573983, + "learning_rate": 1.1553027050396227e-06, + "loss": 0.3384, + "step": 49198 + }, + { + "epoch": 0.8501347802045894, + "grad_norm": 1.8439031748521009, + "learning_rate": 1.1550415877755693e-06, + "loss": 0.405, + "step": 49199 + }, + { + "epoch": 0.8501520597179983, + "grad_norm": 1.109033949722867, + "learning_rate": 1.1547804982145972e-06, + "loss": 0.2437, + "step": 49200 + }, + { + "epoch": 0.8501693392314073, + "grad_norm": 2.092163779424654, + "learning_rate": 1.1545194363575274e-06, + "loss": 0.228, + "step": 49201 + }, + { + "epoch": 0.8501866187448162, + "grad_norm": 1.1001122031910253, + "learning_rate": 1.154258402205174e-06, + "loss": 0.3044, + "step": 49202 + }, + { + "epoch": 0.8502038982582251, + "grad_norm": 0.7197960123335964, + "learning_rate": 1.153997395758356e-06, + "loss": 0.8121, + "step": 49203 + }, + { + "epoch": 0.850221177771634, + "grad_norm": 1.0489407494412155, + "learning_rate": 1.1537364170178888e-06, + "loss": 0.354, + "step": 49204 + }, + { + "epoch": 0.8502384572850429, + "grad_norm": 2.1721012620255356, + "learning_rate": 1.1534754659845914e-06, + "loss": 0.3403, + "step": 49205 + }, + { + "epoch": 0.8502557367984518, + "grad_norm": 2.1408515713946676, + "learning_rate": 1.1532145426592822e-06, + "loss": 0.3858, + "step": 49206 + }, + { + "epoch": 0.8502730163118607, + "grad_norm": 1.8221456233920932, + "learning_rate": 1.1529536470427782e-06, + "loss": 0.4503, + "step": 49207 + }, + { + "epoch": 0.8502902958252696, + "grad_norm": 1.1430582685405752, + "learning_rate": 1.1526927791358943e-06, + "loss": 0.3363, + "step": 49208 + }, + { + "epoch": 0.8503075753386785, + "grad_norm": 1.2145579690744663, + "learning_rate": 1.1524319389394512e-06, + "loss": 0.4644, + "step": 49209 + }, + { + "epoch": 0.8503248548520874, + "grad_norm": 1.2093536278666102, + "learning_rate": 1.1521711264542613e-06, + "loss": 0.2293, + "step": 49210 + }, + { + "epoch": 0.8503421343654962, + "grad_norm": 1.060882495471886, + "learning_rate": 1.1519103416811429e-06, + "loss": 0.324, + "step": 49211 + }, + { + "epoch": 0.8503594138789051, + "grad_norm": 1.0595384590949863, + "learning_rate": 1.1516495846209175e-06, + "loss": 0.3055, + "step": 49212 + }, + { + "epoch": 0.850376693392314, + "grad_norm": 1.028907796275804, + "learning_rate": 1.1513888552743946e-06, + "loss": 0.5001, + "step": 49213 + }, + { + "epoch": 0.8503939729057229, + "grad_norm": 2.406007262811019, + "learning_rate": 1.1511281536423968e-06, + "loss": 0.4124, + "step": 49214 + }, + { + "epoch": 0.8504112524191318, + "grad_norm": 0.9865803386243696, + "learning_rate": 1.150867479725737e-06, + "loss": 0.4547, + "step": 49215 + }, + { + "epoch": 0.8504285319325408, + "grad_norm": 0.9001733310014481, + "learning_rate": 1.150606833525234e-06, + "loss": 0.385, + "step": 49216 + }, + { + "epoch": 0.8504458114459497, + "grad_norm": 1.2328256399679893, + "learning_rate": 1.1503462150417e-06, + "loss": 0.3272, + "step": 49217 + }, + { + "epoch": 0.8504630909593586, + "grad_norm": 1.2249007708721962, + "learning_rate": 1.150085624275955e-06, + "loss": 0.4609, + "step": 49218 + }, + { + "epoch": 0.8504803704727675, + "grad_norm": 1.6205312451090783, + "learning_rate": 1.1498250612288153e-06, + "loss": 0.4847, + "step": 49219 + }, + { + "epoch": 0.8504976499861764, + "grad_norm": 0.9362274192088568, + "learning_rate": 1.1495645259010969e-06, + "loss": 0.25, + "step": 49220 + }, + { + "epoch": 0.8505149294995853, + "grad_norm": 1.238893537130142, + "learning_rate": 1.1493040182936133e-06, + "loss": 0.3593, + "step": 49221 + }, + { + "epoch": 0.8505322090129942, + "grad_norm": 0.7073663088082872, + "learning_rate": 1.149043538407184e-06, + "loss": 0.6927, + "step": 49222 + }, + { + "epoch": 0.8505494885264031, + "grad_norm": 1.5867005788532433, + "learning_rate": 1.148783086242622e-06, + "loss": 0.199, + "step": 49223 + }, + { + "epoch": 0.850566768039812, + "grad_norm": 1.0497117738018795, + "learning_rate": 1.1485226618007428e-06, + "loss": 0.5126, + "step": 49224 + }, + { + "epoch": 0.8505840475532209, + "grad_norm": 1.1200026498306541, + "learning_rate": 1.148262265082366e-06, + "loss": 0.4256, + "step": 49225 + }, + { + "epoch": 0.8506013270666298, + "grad_norm": 1.4451490332328534, + "learning_rate": 1.148001896088302e-06, + "loss": 0.3277, + "step": 49226 + }, + { + "epoch": 0.8506186065800387, + "grad_norm": 2.6277329228000363, + "learning_rate": 1.1477415548193716e-06, + "loss": 0.4006, + "step": 49227 + }, + { + "epoch": 0.8506358860934476, + "grad_norm": 2.123584613347456, + "learning_rate": 1.1474812412763848e-06, + "loss": 0.5274, + "step": 49228 + }, + { + "epoch": 0.8506531656068566, + "grad_norm": 1.6597413692837706, + "learning_rate": 1.1472209554601588e-06, + "loss": 0.2509, + "step": 49229 + }, + { + "epoch": 0.8506704451202655, + "grad_norm": 1.6731472814878305, + "learning_rate": 1.1469606973715108e-06, + "loss": 0.3261, + "step": 49230 + }, + { + "epoch": 0.8506877246336744, + "grad_norm": 1.0545090276882454, + "learning_rate": 1.1467004670112559e-06, + "loss": 0.2854, + "step": 49231 + }, + { + "epoch": 0.8507050041470832, + "grad_norm": 1.471284890668762, + "learning_rate": 1.1464402643802086e-06, + "loss": 0.2746, + "step": 49232 + }, + { + "epoch": 0.8507222836604921, + "grad_norm": 1.5311257503671376, + "learning_rate": 1.1461800894791807e-06, + "loss": 0.2561, + "step": 49233 + }, + { + "epoch": 0.850739563173901, + "grad_norm": 1.7166121710472528, + "learning_rate": 1.1459199423089896e-06, + "loss": 0.3551, + "step": 49234 + }, + { + "epoch": 0.8507568426873099, + "grad_norm": 1.1451454355049955, + "learning_rate": 1.1456598228704507e-06, + "loss": 0.3188, + "step": 49235 + }, + { + "epoch": 0.8507741222007188, + "grad_norm": 1.2368761737366218, + "learning_rate": 1.1453997311643795e-06, + "loss": 0.3356, + "step": 49236 + }, + { + "epoch": 0.8507914017141277, + "grad_norm": 0.6849927617021573, + "learning_rate": 1.1451396671915881e-06, + "loss": 0.609, + "step": 49237 + }, + { + "epoch": 0.8508086812275366, + "grad_norm": 1.739391099681092, + "learning_rate": 1.144879630952893e-06, + "loss": 0.3105, + "step": 49238 + }, + { + "epoch": 0.8508259607409455, + "grad_norm": 1.0090641317649875, + "learning_rate": 1.1446196224491069e-06, + "loss": 0.2454, + "step": 49239 + }, + { + "epoch": 0.8508432402543544, + "grad_norm": 1.8288082043554013, + "learning_rate": 1.1443596416810454e-06, + "loss": 0.2718, + "step": 49240 + }, + { + "epoch": 0.8508605197677633, + "grad_norm": 1.9212708191128292, + "learning_rate": 1.1440996886495215e-06, + "loss": 0.2532, + "step": 49241 + }, + { + "epoch": 0.8508777992811722, + "grad_norm": 1.0972254771094134, + "learning_rate": 1.1438397633553533e-06, + "loss": 0.2649, + "step": 49242 + }, + { + "epoch": 0.8508950787945812, + "grad_norm": 0.8785115463586358, + "learning_rate": 1.1435798657993514e-06, + "loss": 0.2464, + "step": 49243 + }, + { + "epoch": 0.8509123583079901, + "grad_norm": 1.7669727048578625, + "learning_rate": 1.1433199959823282e-06, + "loss": 0.2333, + "step": 49244 + }, + { + "epoch": 0.850929637821399, + "grad_norm": 1.8309930874607498, + "learning_rate": 1.1430601539051001e-06, + "loss": 0.2963, + "step": 49245 + }, + { + "epoch": 0.8509469173348079, + "grad_norm": 1.7794768611965863, + "learning_rate": 1.1428003395684806e-06, + "loss": 0.6753, + "step": 49246 + }, + { + "epoch": 0.8509641968482168, + "grad_norm": 1.3357509567276336, + "learning_rate": 1.1425405529732858e-06, + "loss": 0.4005, + "step": 49247 + }, + { + "epoch": 0.8509814763616257, + "grad_norm": 1.004768197529431, + "learning_rate": 1.1422807941203251e-06, + "loss": 0.3651, + "step": 49248 + }, + { + "epoch": 0.8509987558750346, + "grad_norm": 0.7317127845969619, + "learning_rate": 1.1420210630104167e-06, + "loss": 0.6861, + "step": 49249 + }, + { + "epoch": 0.8510160353884435, + "grad_norm": 1.4107662422455296, + "learning_rate": 1.1417613596443688e-06, + "loss": 0.4144, + "step": 49250 + }, + { + "epoch": 0.8510333149018524, + "grad_norm": 1.2668532982893792, + "learning_rate": 1.1415016840229975e-06, + "loss": 0.5038, + "step": 49251 + }, + { + "epoch": 0.8510505944152613, + "grad_norm": 0.9671788853775752, + "learning_rate": 1.1412420361471154e-06, + "loss": 0.2706, + "step": 49252 + }, + { + "epoch": 0.8510678739286701, + "grad_norm": 0.5556569559715223, + "learning_rate": 1.1409824160175398e-06, + "loss": 0.5908, + "step": 49253 + }, + { + "epoch": 0.851085153442079, + "grad_norm": 2.1242691590558476, + "learning_rate": 1.1407228236350797e-06, + "loss": 0.3517, + "step": 49254 + }, + { + "epoch": 0.8511024329554879, + "grad_norm": 1.175902737303166, + "learning_rate": 1.1404632590005482e-06, + "loss": 0.439, + "step": 49255 + }, + { + "epoch": 0.8511197124688968, + "grad_norm": 1.6047948514441952, + "learning_rate": 1.14020372211476e-06, + "loss": 0.4827, + "step": 49256 + }, + { + "epoch": 0.8511369919823057, + "grad_norm": 0.8941743984593638, + "learning_rate": 1.1399442129785255e-06, + "loss": 0.3002, + "step": 49257 + }, + { + "epoch": 0.8511542714957147, + "grad_norm": 1.550930902732176, + "learning_rate": 1.1396847315926584e-06, + "loss": 0.3807, + "step": 49258 + }, + { + "epoch": 0.8511715510091236, + "grad_norm": 1.1735632434498624, + "learning_rate": 1.1394252779579728e-06, + "loss": 0.352, + "step": 49259 + }, + { + "epoch": 0.8511888305225325, + "grad_norm": 0.9705037804239117, + "learning_rate": 1.1391658520752824e-06, + "loss": 0.2476, + "step": 49260 + }, + { + "epoch": 0.8512061100359414, + "grad_norm": 1.2838027478633767, + "learning_rate": 1.1389064539453953e-06, + "loss": 0.4443, + "step": 49261 + }, + { + "epoch": 0.8512233895493503, + "grad_norm": 2.9707580265813114, + "learning_rate": 1.13864708356913e-06, + "loss": 0.2661, + "step": 49262 + }, + { + "epoch": 0.8512406690627592, + "grad_norm": 0.9236322213962896, + "learning_rate": 1.1383877409472922e-06, + "loss": 0.3533, + "step": 49263 + }, + { + "epoch": 0.8512579485761681, + "grad_norm": 0.9297181248766716, + "learning_rate": 1.138128426080698e-06, + "loss": 0.4021, + "step": 49264 + }, + { + "epoch": 0.851275228089577, + "grad_norm": 1.3879390654674812, + "learning_rate": 1.1378691389701613e-06, + "loss": 0.3535, + "step": 49265 + }, + { + "epoch": 0.8512925076029859, + "grad_norm": 2.5332595917901606, + "learning_rate": 1.137609879616489e-06, + "loss": 0.4571, + "step": 49266 + }, + { + "epoch": 0.8513097871163948, + "grad_norm": 1.2198715500823927, + "learning_rate": 1.1373506480204989e-06, + "loss": 0.3606, + "step": 49267 + }, + { + "epoch": 0.8513270666298037, + "grad_norm": 2.0698316796320753, + "learning_rate": 1.1370914441829983e-06, + "loss": 0.3357, + "step": 49268 + }, + { + "epoch": 0.8513443461432126, + "grad_norm": 2.1122982474072867, + "learning_rate": 1.1368322681048004e-06, + "loss": 0.3332, + "step": 49269 + }, + { + "epoch": 0.8513616256566215, + "grad_norm": 0.7936899180713725, + "learning_rate": 1.1365731197867181e-06, + "loss": 0.2321, + "step": 49270 + }, + { + "epoch": 0.8513789051700305, + "grad_norm": 1.4294379030092774, + "learning_rate": 1.1363139992295636e-06, + "loss": 0.2449, + "step": 49271 + }, + { + "epoch": 0.8513961846834394, + "grad_norm": 2.2191144172104176, + "learning_rate": 1.1360549064341454e-06, + "loss": 0.3054, + "step": 49272 + }, + { + "epoch": 0.8514134641968483, + "grad_norm": 0.9993522177910025, + "learning_rate": 1.135795841401278e-06, + "loss": 0.2192, + "step": 49273 + }, + { + "epoch": 0.8514307437102571, + "grad_norm": 1.426009511379246, + "learning_rate": 1.1355368041317715e-06, + "loss": 0.2544, + "step": 49274 + }, + { + "epoch": 0.851448023223666, + "grad_norm": 1.6843088477985393, + "learning_rate": 1.135277794626436e-06, + "loss": 0.3074, + "step": 49275 + }, + { + "epoch": 0.8514653027370749, + "grad_norm": 1.1704901148054216, + "learning_rate": 1.1350188128860873e-06, + "loss": 0.2169, + "step": 49276 + }, + { + "epoch": 0.8514825822504838, + "grad_norm": 1.718382970633426, + "learning_rate": 1.1347598589115305e-06, + "loss": 0.2766, + "step": 49277 + }, + { + "epoch": 0.8514998617638927, + "grad_norm": 1.0315453181408887, + "learning_rate": 1.1345009327035817e-06, + "loss": 0.4254, + "step": 49278 + }, + { + "epoch": 0.8515171412773016, + "grad_norm": 0.5705639127939683, + "learning_rate": 1.1342420342630479e-06, + "loss": 0.586, + "step": 49279 + }, + { + "epoch": 0.8515344207907105, + "grad_norm": 1.1968258905460696, + "learning_rate": 1.1339831635907416e-06, + "loss": 0.4388, + "step": 49280 + }, + { + "epoch": 0.8515517003041194, + "grad_norm": 2.0534047458023563, + "learning_rate": 1.1337243206874737e-06, + "loss": 0.2958, + "step": 49281 + }, + { + "epoch": 0.8515689798175283, + "grad_norm": 1.0337519780613527, + "learning_rate": 1.1334655055540577e-06, + "loss": 0.2897, + "step": 49282 + }, + { + "epoch": 0.8515862593309372, + "grad_norm": 1.2943312791957815, + "learning_rate": 1.1332067181912986e-06, + "loss": 0.3093, + "step": 49283 + }, + { + "epoch": 0.8516035388443461, + "grad_norm": 1.1580011350659867, + "learning_rate": 1.1329479586000124e-06, + "loss": 0.2301, + "step": 49284 + }, + { + "epoch": 0.851620818357755, + "grad_norm": 1.2538708293506102, + "learning_rate": 1.1326892267810053e-06, + "loss": 0.2875, + "step": 49285 + }, + { + "epoch": 0.851638097871164, + "grad_norm": 0.95314696476514, + "learning_rate": 1.1324305227350896e-06, + "loss": 0.304, + "step": 49286 + }, + { + "epoch": 0.8516553773845729, + "grad_norm": 0.6050003060221484, + "learning_rate": 1.132171846463077e-06, + "loss": 0.5548, + "step": 49287 + }, + { + "epoch": 0.8516726568979818, + "grad_norm": 1.4819644392322746, + "learning_rate": 1.131913197965775e-06, + "loss": 0.3907, + "step": 49288 + }, + { + "epoch": 0.8516899364113907, + "grad_norm": 1.577386657931803, + "learning_rate": 1.1316545772439957e-06, + "loss": 0.304, + "step": 49289 + }, + { + "epoch": 0.8517072159247996, + "grad_norm": 0.6741183437240724, + "learning_rate": 1.1313959842985466e-06, + "loss": 0.2095, + "step": 49290 + }, + { + "epoch": 0.8517244954382085, + "grad_norm": 1.8029444455520625, + "learning_rate": 1.1311374191302393e-06, + "loss": 0.4098, + "step": 49291 + }, + { + "epoch": 0.8517417749516174, + "grad_norm": 1.1654277156776702, + "learning_rate": 1.130878881739883e-06, + "loss": 0.4061, + "step": 49292 + }, + { + "epoch": 0.8517590544650263, + "grad_norm": 1.2474331502200056, + "learning_rate": 1.1306203721282904e-06, + "loss": 0.1811, + "step": 49293 + }, + { + "epoch": 0.8517763339784352, + "grad_norm": 1.5275850575820653, + "learning_rate": 1.13036189029627e-06, + "loss": 0.3766, + "step": 49294 + }, + { + "epoch": 0.851793613491844, + "grad_norm": 2.1857116090177917, + "learning_rate": 1.1301034362446273e-06, + "loss": 0.3224, + "step": 49295 + }, + { + "epoch": 0.8518108930052529, + "grad_norm": 1.5254365109387946, + "learning_rate": 1.1298450099741765e-06, + "loss": 0.4539, + "step": 49296 + }, + { + "epoch": 0.8518281725186618, + "grad_norm": 1.0630255784386324, + "learning_rate": 1.1295866114857234e-06, + "loss": 0.4033, + "step": 49297 + }, + { + "epoch": 0.8518454520320707, + "grad_norm": 1.1922913718582153, + "learning_rate": 1.1293282407800787e-06, + "loss": 0.2156, + "step": 49298 + }, + { + "epoch": 0.8518627315454796, + "grad_norm": 1.3266295161761488, + "learning_rate": 1.1290698978580527e-06, + "loss": 0.3212, + "step": 49299 + }, + { + "epoch": 0.8518800110588886, + "grad_norm": 1.6968005773697685, + "learning_rate": 1.128811582720456e-06, + "loss": 0.3235, + "step": 49300 + }, + { + "epoch": 0.8518972905722975, + "grad_norm": 1.6557008008237402, + "learning_rate": 1.1285532953680934e-06, + "loss": 0.4413, + "step": 49301 + }, + { + "epoch": 0.8519145700857064, + "grad_norm": 1.0683389967016488, + "learning_rate": 1.1282950358017785e-06, + "loss": 0.3584, + "step": 49302 + }, + { + "epoch": 0.8519318495991153, + "grad_norm": 1.457606873973769, + "learning_rate": 1.1280368040223154e-06, + "loss": 0.1648, + "step": 49303 + }, + { + "epoch": 0.8519491291125242, + "grad_norm": 1.0500616297688659, + "learning_rate": 1.1277786000305147e-06, + "loss": 0.3266, + "step": 49304 + }, + { + "epoch": 0.8519664086259331, + "grad_norm": 0.960380146227347, + "learning_rate": 1.1275204238271885e-06, + "loss": 0.3265, + "step": 49305 + }, + { + "epoch": 0.851983688139342, + "grad_norm": 1.4152761459665868, + "learning_rate": 1.1272622754131402e-06, + "loss": 0.3618, + "step": 49306 + }, + { + "epoch": 0.8520009676527509, + "grad_norm": 1.0024000444081727, + "learning_rate": 1.127004154789183e-06, + "loss": 0.4322, + "step": 49307 + }, + { + "epoch": 0.8520182471661598, + "grad_norm": 1.696468427587477, + "learning_rate": 1.1267460619561211e-06, + "loss": 0.2988, + "step": 49308 + }, + { + "epoch": 0.8520355266795687, + "grad_norm": 1.0905063682732876, + "learning_rate": 1.1264879969147647e-06, + "loss": 0.3891, + "step": 49309 + }, + { + "epoch": 0.8520528061929776, + "grad_norm": 0.9955845183575861, + "learning_rate": 1.1262299596659232e-06, + "loss": 0.4471, + "step": 49310 + }, + { + "epoch": 0.8520700857063865, + "grad_norm": 1.34170932751455, + "learning_rate": 1.1259719502104049e-06, + "loss": 0.4415, + "step": 49311 + }, + { + "epoch": 0.8520873652197954, + "grad_norm": 0.6157577521716155, + "learning_rate": 1.125713968549015e-06, + "loss": 0.4329, + "step": 49312 + }, + { + "epoch": 0.8521046447332044, + "grad_norm": 1.7972152892855948, + "learning_rate": 1.1254560146825656e-06, + "loss": 0.3335, + "step": 49313 + }, + { + "epoch": 0.8521219242466133, + "grad_norm": 0.7357832944254776, + "learning_rate": 1.1251980886118596e-06, + "loss": 0.1698, + "step": 49314 + }, + { + "epoch": 0.8521392037600222, + "grad_norm": 2.052180680256489, + "learning_rate": 1.1249401903377077e-06, + "loss": 0.4411, + "step": 49315 + }, + { + "epoch": 0.852156483273431, + "grad_norm": 1.637985766015262, + "learning_rate": 1.1246823198609202e-06, + "loss": 0.3577, + "step": 49316 + }, + { + "epoch": 0.8521737627868399, + "grad_norm": 1.4698165279847317, + "learning_rate": 1.124424477182301e-06, + "loss": 0.3164, + "step": 49317 + }, + { + "epoch": 0.8521910423002488, + "grad_norm": 1.1811824316761472, + "learning_rate": 1.1241666623026592e-06, + "loss": 0.574, + "step": 49318 + }, + { + "epoch": 0.8522083218136577, + "grad_norm": 0.9178635034589773, + "learning_rate": 1.123908875222801e-06, + "loss": 0.3435, + "step": 49319 + }, + { + "epoch": 0.8522256013270666, + "grad_norm": 1.4434009108606323, + "learning_rate": 1.1236511159435347e-06, + "loss": 0.3422, + "step": 49320 + }, + { + "epoch": 0.8522428808404755, + "grad_norm": 1.162039416177806, + "learning_rate": 1.1233933844656674e-06, + "loss": 0.3349, + "step": 49321 + }, + { + "epoch": 0.8522601603538844, + "grad_norm": 1.0758170766578483, + "learning_rate": 1.1231356807900085e-06, + "loss": 0.4498, + "step": 49322 + }, + { + "epoch": 0.8522774398672933, + "grad_norm": 0.7960388787215719, + "learning_rate": 1.1228780049173616e-06, + "loss": 0.2431, + "step": 49323 + }, + { + "epoch": 0.8522947193807022, + "grad_norm": 1.652870804374031, + "learning_rate": 1.1226203568485373e-06, + "loss": 0.3523, + "step": 49324 + }, + { + "epoch": 0.8523119988941111, + "grad_norm": 1.3906449259944462, + "learning_rate": 1.1223627365843381e-06, + "loss": 0.3533, + "step": 49325 + }, + { + "epoch": 0.85232927840752, + "grad_norm": 2.9340359154904405, + "learning_rate": 1.1221051441255748e-06, + "loss": 0.3732, + "step": 49326 + }, + { + "epoch": 0.852346557920929, + "grad_norm": 1.2152510094255011, + "learning_rate": 1.1218475794730532e-06, + "loss": 0.3288, + "step": 49327 + }, + { + "epoch": 0.8523638374343379, + "grad_norm": 1.1336844139490845, + "learning_rate": 1.1215900426275795e-06, + "loss": 0.2694, + "step": 49328 + }, + { + "epoch": 0.8523811169477468, + "grad_norm": 1.6483639590171264, + "learning_rate": 1.1213325335899615e-06, + "loss": 0.4215, + "step": 49329 + }, + { + "epoch": 0.8523983964611557, + "grad_norm": 1.0034510161711139, + "learning_rate": 1.1210750523610026e-06, + "loss": 0.2532, + "step": 49330 + }, + { + "epoch": 0.8524156759745646, + "grad_norm": 1.097298571281269, + "learning_rate": 1.1208175989415114e-06, + "loss": 0.2136, + "step": 49331 + }, + { + "epoch": 0.8524329554879735, + "grad_norm": 1.8910869214792845, + "learning_rate": 1.1205601733322946e-06, + "loss": 0.2375, + "step": 49332 + }, + { + "epoch": 0.8524502350013824, + "grad_norm": 2.105174560151753, + "learning_rate": 1.12030277553416e-06, + "loss": 0.3718, + "step": 49333 + }, + { + "epoch": 0.8524675145147913, + "grad_norm": 1.7649619377247237, + "learning_rate": 1.1200454055479104e-06, + "loss": 0.441, + "step": 49334 + }, + { + "epoch": 0.8524847940282002, + "grad_norm": 0.9652156474886807, + "learning_rate": 1.1197880633743552e-06, + "loss": 0.307, + "step": 49335 + }, + { + "epoch": 0.8525020735416091, + "grad_norm": 1.2482831616431243, + "learning_rate": 1.1195307490142982e-06, + "loss": 0.2952, + "step": 49336 + }, + { + "epoch": 0.852519353055018, + "grad_norm": 1.4119298809278436, + "learning_rate": 1.1192734624685442e-06, + "loss": 0.344, + "step": 49337 + }, + { + "epoch": 0.8525366325684268, + "grad_norm": 0.9829444578855224, + "learning_rate": 1.1190162037379004e-06, + "loss": 0.3172, + "step": 49338 + }, + { + "epoch": 0.8525539120818357, + "grad_norm": 1.343143465999081, + "learning_rate": 1.118758972823173e-06, + "loss": 0.5309, + "step": 49339 + }, + { + "epoch": 0.8525711915952446, + "grad_norm": 0.9741938164049836, + "learning_rate": 1.1185017697251698e-06, + "loss": 0.3033, + "step": 49340 + }, + { + "epoch": 0.8525884711086535, + "grad_norm": 1.764967063030596, + "learning_rate": 1.1182445944446918e-06, + "loss": 0.3149, + "step": 49341 + }, + { + "epoch": 0.8526057506220625, + "grad_norm": 2.0514427266437827, + "learning_rate": 1.117987446982548e-06, + "loss": 0.2044, + "step": 49342 + }, + { + "epoch": 0.8526230301354714, + "grad_norm": 1.3436468246186057, + "learning_rate": 1.1177303273395413e-06, + "loss": 0.3564, + "step": 49343 + }, + { + "epoch": 0.8526403096488803, + "grad_norm": 1.5361872580580302, + "learning_rate": 1.1174732355164787e-06, + "loss": 0.3868, + "step": 49344 + }, + { + "epoch": 0.8526575891622892, + "grad_norm": 1.0422243601741679, + "learning_rate": 1.1172161715141638e-06, + "loss": 0.4812, + "step": 49345 + }, + { + "epoch": 0.8526748686756981, + "grad_norm": 1.4741883064647356, + "learning_rate": 1.1169591353334052e-06, + "loss": 0.3336, + "step": 49346 + }, + { + "epoch": 0.852692148189107, + "grad_norm": 1.5290583646694593, + "learning_rate": 1.1167021269750066e-06, + "loss": 0.2142, + "step": 49347 + }, + { + "epoch": 0.8527094277025159, + "grad_norm": 1.1484091900247322, + "learning_rate": 1.1164451464397686e-06, + "loss": 0.3673, + "step": 49348 + }, + { + "epoch": 0.8527267072159248, + "grad_norm": 1.6237441592728865, + "learning_rate": 1.1161881937285003e-06, + "loss": 0.5159, + "step": 49349 + }, + { + "epoch": 0.8527439867293337, + "grad_norm": 1.4317029622058954, + "learning_rate": 1.1159312688420055e-06, + "loss": 0.2228, + "step": 49350 + }, + { + "epoch": 0.8527612662427426, + "grad_norm": 1.6759924831801656, + "learning_rate": 1.1156743717810915e-06, + "loss": 0.3573, + "step": 49351 + }, + { + "epoch": 0.8527785457561515, + "grad_norm": 1.199604797060256, + "learning_rate": 1.1154175025465585e-06, + "loss": 0.1648, + "step": 49352 + }, + { + "epoch": 0.8527958252695604, + "grad_norm": 1.232771758391962, + "learning_rate": 1.1151606611392152e-06, + "loss": 0.394, + "step": 49353 + }, + { + "epoch": 0.8528131047829693, + "grad_norm": 1.219924620272813, + "learning_rate": 1.1149038475598617e-06, + "loss": 0.3947, + "step": 49354 + }, + { + "epoch": 0.8528303842963783, + "grad_norm": 1.1706332833792494, + "learning_rate": 1.1146470618093042e-06, + "loss": 0.361, + "step": 49355 + }, + { + "epoch": 0.8528476638097872, + "grad_norm": 1.4437521896832586, + "learning_rate": 1.1143903038883497e-06, + "loss": 0.4495, + "step": 49356 + }, + { + "epoch": 0.8528649433231961, + "grad_norm": 1.1317219477741116, + "learning_rate": 1.1141335737977987e-06, + "loss": 0.4023, + "step": 49357 + }, + { + "epoch": 0.852882222836605, + "grad_norm": 1.1269686108231465, + "learning_rate": 1.1138768715384575e-06, + "loss": 0.2167, + "step": 49358 + }, + { + "epoch": 0.8528995023500138, + "grad_norm": 1.6653487849759534, + "learning_rate": 1.1136201971111283e-06, + "loss": 0.1622, + "step": 49359 + }, + { + "epoch": 0.8529167818634227, + "grad_norm": 1.8348711813824512, + "learning_rate": 1.113363550516615e-06, + "loss": 0.366, + "step": 49360 + }, + { + "epoch": 0.8529340613768316, + "grad_norm": 0.7882041844742783, + "learning_rate": 1.113106931755723e-06, + "loss": 0.248, + "step": 49361 + }, + { + "epoch": 0.8529513408902405, + "grad_norm": 1.3797356466790194, + "learning_rate": 1.112850340829258e-06, + "loss": 0.4464, + "step": 49362 + }, + { + "epoch": 0.8529686204036494, + "grad_norm": 1.0078285345902438, + "learning_rate": 1.112593777738018e-06, + "loss": 0.2284, + "step": 49363 + }, + { + "epoch": 0.8529858999170583, + "grad_norm": 1.042282649687193, + "learning_rate": 1.1123372424828117e-06, + "loss": 0.1547, + "step": 49364 + }, + { + "epoch": 0.8530031794304672, + "grad_norm": 0.8744398488670933, + "learning_rate": 1.112080735064439e-06, + "loss": 0.6697, + "step": 49365 + }, + { + "epoch": 0.8530204589438761, + "grad_norm": 1.05395354768916, + "learning_rate": 1.1118242554837055e-06, + "loss": 0.2965, + "step": 49366 + }, + { + "epoch": 0.853037738457285, + "grad_norm": 0.7568842675446439, + "learning_rate": 1.1115678037414156e-06, + "loss": 0.2913, + "step": 49367 + }, + { + "epoch": 0.8530550179706939, + "grad_norm": 1.041357784958669, + "learning_rate": 1.111311379838369e-06, + "loss": 0.3171, + "step": 49368 + }, + { + "epoch": 0.8530722974841028, + "grad_norm": 1.4436101807687836, + "learning_rate": 1.1110549837753715e-06, + "loss": 0.3408, + "step": 49369 + }, + { + "epoch": 0.8530895769975118, + "grad_norm": 1.4893966846052717, + "learning_rate": 1.1107986155532247e-06, + "loss": 0.3252, + "step": 49370 + }, + { + "epoch": 0.8531068565109207, + "grad_norm": 1.6184946533700997, + "learning_rate": 1.1105422751727313e-06, + "loss": 0.285, + "step": 49371 + }, + { + "epoch": 0.8531241360243296, + "grad_norm": 1.4021745929536673, + "learning_rate": 1.1102859626346952e-06, + "loss": 0.3984, + "step": 49372 + }, + { + "epoch": 0.8531414155377385, + "grad_norm": 1.7721602140184645, + "learning_rate": 1.110029677939921e-06, + "loss": 0.3739, + "step": 49373 + }, + { + "epoch": 0.8531586950511474, + "grad_norm": 1.460119822492311, + "learning_rate": 1.1097734210892086e-06, + "loss": 0.692, + "step": 49374 + }, + { + "epoch": 0.8531759745645563, + "grad_norm": 1.0981013314897101, + "learning_rate": 1.1095171920833625e-06, + "loss": 0.1836, + "step": 49375 + }, + { + "epoch": 0.8531932540779652, + "grad_norm": 1.8629849736954578, + "learning_rate": 1.1092609909231821e-06, + "loss": 0.5128, + "step": 49376 + }, + { + "epoch": 0.8532105335913741, + "grad_norm": 1.1059027045585696, + "learning_rate": 1.1090048176094736e-06, + "loss": 0.2887, + "step": 49377 + }, + { + "epoch": 0.853227813104783, + "grad_norm": 2.183795366466646, + "learning_rate": 1.1087486721430364e-06, + "loss": 0.2587, + "step": 49378 + }, + { + "epoch": 0.8532450926181919, + "grad_norm": 1.0145370942908554, + "learning_rate": 1.108492554524674e-06, + "loss": 0.3516, + "step": 49379 + }, + { + "epoch": 0.8532623721316007, + "grad_norm": 1.1960787518816873, + "learning_rate": 1.1082364647551912e-06, + "loss": 0.3433, + "step": 49380 + }, + { + "epoch": 0.8532796516450096, + "grad_norm": 1.248657536141837, + "learning_rate": 1.1079804028353846e-06, + "loss": 0.35, + "step": 49381 + }, + { + "epoch": 0.8532969311584185, + "grad_norm": 2.119936001263309, + "learning_rate": 1.1077243687660611e-06, + "loss": 0.3399, + "step": 49382 + }, + { + "epoch": 0.8533142106718274, + "grad_norm": 1.5620128927949228, + "learning_rate": 1.1074683625480197e-06, + "loss": 0.3002, + "step": 49383 + }, + { + "epoch": 0.8533314901852364, + "grad_norm": 1.21378221447529, + "learning_rate": 1.1072123841820625e-06, + "loss": 0.3517, + "step": 49384 + }, + { + "epoch": 0.8533487696986453, + "grad_norm": 1.4889113148825288, + "learning_rate": 1.106956433668992e-06, + "loss": 0.2904, + "step": 49385 + }, + { + "epoch": 0.8533660492120542, + "grad_norm": 1.054926792799059, + "learning_rate": 1.106700511009612e-06, + "loss": 0.2395, + "step": 49386 + }, + { + "epoch": 0.8533833287254631, + "grad_norm": 1.2704681759579608, + "learning_rate": 1.1064446162047215e-06, + "loss": 0.4708, + "step": 49387 + }, + { + "epoch": 0.853400608238872, + "grad_norm": 0.9146326447063174, + "learning_rate": 1.1061887492551204e-06, + "loss": 0.2781, + "step": 49388 + }, + { + "epoch": 0.8534178877522809, + "grad_norm": 0.9716598138473933, + "learning_rate": 1.1059329101616112e-06, + "loss": 0.3864, + "step": 49389 + }, + { + "epoch": 0.8534351672656898, + "grad_norm": 1.3152383483790409, + "learning_rate": 1.1056770989249977e-06, + "loss": 0.2423, + "step": 49390 + }, + { + "epoch": 0.8534524467790987, + "grad_norm": 1.340534588861194, + "learning_rate": 1.1054213155460802e-06, + "loss": 0.2574, + "step": 49391 + }, + { + "epoch": 0.8534697262925076, + "grad_norm": 1.1677899891668913, + "learning_rate": 1.1051655600256584e-06, + "loss": 0.307, + "step": 49392 + }, + { + "epoch": 0.8534870058059165, + "grad_norm": 0.9657535063201327, + "learning_rate": 1.1049098323645347e-06, + "loss": 0.4669, + "step": 49393 + }, + { + "epoch": 0.8535042853193254, + "grad_norm": 1.0954706248001518, + "learning_rate": 1.1046541325635084e-06, + "loss": 0.3903, + "step": 49394 + }, + { + "epoch": 0.8535215648327343, + "grad_norm": 1.1510086722493453, + "learning_rate": 1.1043984606233803e-06, + "loss": 0.2714, + "step": 49395 + }, + { + "epoch": 0.8535388443461432, + "grad_norm": 1.3103734606043553, + "learning_rate": 1.104142816544953e-06, + "loss": 0.3067, + "step": 49396 + }, + { + "epoch": 0.8535561238595522, + "grad_norm": 0.9299087546589313, + "learning_rate": 1.103887200329029e-06, + "loss": 0.3048, + "step": 49397 + }, + { + "epoch": 0.8535734033729611, + "grad_norm": 1.3928328483348194, + "learning_rate": 1.1036316119764057e-06, + "loss": 0.2488, + "step": 49398 + }, + { + "epoch": 0.85359068288637, + "grad_norm": 1.0338378242394508, + "learning_rate": 1.1033760514878822e-06, + "loss": 0.3874, + "step": 49399 + }, + { + "epoch": 0.8536079623997789, + "grad_norm": 2.0982819838538567, + "learning_rate": 1.1031205188642614e-06, + "loss": 0.3028, + "step": 49400 + }, + { + "epoch": 0.8536252419131877, + "grad_norm": 0.9004042476621826, + "learning_rate": 1.1028650141063424e-06, + "loss": 0.2969, + "step": 49401 + }, + { + "epoch": 0.8536425214265966, + "grad_norm": 1.7785049914450721, + "learning_rate": 1.1026095372149293e-06, + "loss": 0.264, + "step": 49402 + }, + { + "epoch": 0.8536598009400055, + "grad_norm": 1.696915415287583, + "learning_rate": 1.1023540881908168e-06, + "loss": 0.5134, + "step": 49403 + }, + { + "epoch": 0.8536770804534144, + "grad_norm": 1.2459175348366769, + "learning_rate": 1.10209866703481e-06, + "loss": 0.1933, + "step": 49404 + }, + { + "epoch": 0.8536943599668233, + "grad_norm": 2.0200187647281456, + "learning_rate": 1.1018432737477037e-06, + "loss": 0.3795, + "step": 49405 + }, + { + "epoch": 0.8537116394802322, + "grad_norm": 1.2089330875064102, + "learning_rate": 1.1015879083303017e-06, + "loss": 0.4658, + "step": 49406 + }, + { + "epoch": 0.8537289189936411, + "grad_norm": 1.3517347038115624, + "learning_rate": 1.1013325707834033e-06, + "loss": 0.2061, + "step": 49407 + }, + { + "epoch": 0.85374619850705, + "grad_norm": 1.1852334335189543, + "learning_rate": 1.1010772611078058e-06, + "loss": 0.1827, + "step": 49408 + }, + { + "epoch": 0.8537634780204589, + "grad_norm": 1.7261843705837656, + "learning_rate": 1.1008219793043128e-06, + "loss": 0.3807, + "step": 49409 + }, + { + "epoch": 0.8537807575338678, + "grad_norm": 1.8767117122890076, + "learning_rate": 1.1005667253737207e-06, + "loss": 0.3054, + "step": 49410 + }, + { + "epoch": 0.8537980370472767, + "grad_norm": 2.5635564822845573, + "learning_rate": 1.1003114993168307e-06, + "loss": 0.3135, + "step": 49411 + }, + { + "epoch": 0.8538153165606857, + "grad_norm": 1.0526141417059622, + "learning_rate": 1.100056301134438e-06, + "loss": 0.3782, + "step": 49412 + }, + { + "epoch": 0.8538325960740946, + "grad_norm": 0.8701850957211987, + "learning_rate": 1.0998011308273482e-06, + "loss": 0.1994, + "step": 49413 + }, + { + "epoch": 0.8538498755875035, + "grad_norm": 1.424625812413436, + "learning_rate": 1.0995459883963565e-06, + "loss": 0.1838, + "step": 49414 + }, + { + "epoch": 0.8538671551009124, + "grad_norm": 1.1874294536937642, + "learning_rate": 1.0992908738422659e-06, + "loss": 0.4816, + "step": 49415 + }, + { + "epoch": 0.8538844346143213, + "grad_norm": 1.176555923097913, + "learning_rate": 1.0990357871658697e-06, + "loss": 0.3327, + "step": 49416 + }, + { + "epoch": 0.8539017141277302, + "grad_norm": 1.1272606589459333, + "learning_rate": 1.0987807283679718e-06, + "loss": 0.2479, + "step": 49417 + }, + { + "epoch": 0.8539189936411391, + "grad_norm": 0.9836694369304291, + "learning_rate": 1.0985256974493674e-06, + "loss": 0.3108, + "step": 49418 + }, + { + "epoch": 0.853936273154548, + "grad_norm": 0.6597391135738949, + "learning_rate": 1.098270694410858e-06, + "loss": 0.2796, + "step": 49419 + }, + { + "epoch": 0.8539535526679569, + "grad_norm": 1.6677941473336175, + "learning_rate": 1.0980157192532426e-06, + "loss": 0.2304, + "step": 49420 + }, + { + "epoch": 0.8539708321813658, + "grad_norm": 0.8813322069928571, + "learning_rate": 1.0977607719773153e-06, + "loss": 0.2386, + "step": 49421 + }, + { + "epoch": 0.8539881116947746, + "grad_norm": 1.41314217623851, + "learning_rate": 1.0975058525838811e-06, + "loss": 0.1579, + "step": 49422 + }, + { + "epoch": 0.8540053912081835, + "grad_norm": 1.18619113072759, + "learning_rate": 1.0972509610737325e-06, + "loss": 0.3178, + "step": 49423 + }, + { + "epoch": 0.8540226707215924, + "grad_norm": 1.2997447561747906, + "learning_rate": 1.0969960974476713e-06, + "loss": 0.2369, + "step": 49424 + }, + { + "epoch": 0.8540399502350013, + "grad_norm": 1.9066313447650869, + "learning_rate": 1.0967412617064944e-06, + "loss": 0.2792, + "step": 49425 + }, + { + "epoch": 0.8540572297484103, + "grad_norm": 1.997542553182389, + "learning_rate": 1.0964864538510022e-06, + "loss": 0.3329, + "step": 49426 + }, + { + "epoch": 0.8540745092618192, + "grad_norm": 1.26425243343966, + "learning_rate": 1.09623167388199e-06, + "loss": 0.3001, + "step": 49427 + }, + { + "epoch": 0.8540917887752281, + "grad_norm": 0.9117017441864095, + "learning_rate": 1.095976921800258e-06, + "loss": 0.4549, + "step": 49428 + }, + { + "epoch": 0.854109068288637, + "grad_norm": 1.4216601925947385, + "learning_rate": 1.0957221976066013e-06, + "loss": 0.4128, + "step": 49429 + }, + { + "epoch": 0.8541263478020459, + "grad_norm": 1.5576068959063345, + "learning_rate": 1.095467501301819e-06, + "loss": 0.313, + "step": 49430 + }, + { + "epoch": 0.8541436273154548, + "grad_norm": 1.3000497461627745, + "learning_rate": 1.0952128328867119e-06, + "loss": 0.3889, + "step": 49431 + }, + { + "epoch": 0.8541609068288637, + "grad_norm": 0.8228921614236011, + "learning_rate": 1.0949581923620721e-06, + "loss": 0.4922, + "step": 49432 + }, + { + "epoch": 0.8541781863422726, + "grad_norm": 1.1039290564706243, + "learning_rate": 1.0947035797287031e-06, + "loss": 0.4827, + "step": 49433 + }, + { + "epoch": 0.8541954658556815, + "grad_norm": 1.0267255860100046, + "learning_rate": 1.094448994987396e-06, + "loss": 0.2637, + "step": 49434 + }, + { + "epoch": 0.8542127453690904, + "grad_norm": 1.2108289004341914, + "learning_rate": 1.0941944381389524e-06, + "loss": 0.2976, + "step": 49435 + }, + { + "epoch": 0.8542300248824993, + "grad_norm": 3.108193865981272, + "learning_rate": 1.0939399091841685e-06, + "loss": 0.2402, + "step": 49436 + }, + { + "epoch": 0.8542473043959082, + "grad_norm": 1.2059653125404246, + "learning_rate": 1.0936854081238434e-06, + "loss": 0.3364, + "step": 49437 + }, + { + "epoch": 0.8542645839093171, + "grad_norm": 1.3533464128093409, + "learning_rate": 1.0934309349587724e-06, + "loss": 0.2645, + "step": 49438 + }, + { + "epoch": 0.854281863422726, + "grad_norm": 3.057805448462322, + "learning_rate": 1.09317648968975e-06, + "loss": 0.361, + "step": 49439 + }, + { + "epoch": 0.854299142936135, + "grad_norm": 1.7194346409947845, + "learning_rate": 1.092922072317577e-06, + "loss": 0.3298, + "step": 49440 + }, + { + "epoch": 0.8543164224495439, + "grad_norm": 1.5556179330619442, + "learning_rate": 1.0926676828430483e-06, + "loss": 0.397, + "step": 49441 + }, + { + "epoch": 0.8543337019629528, + "grad_norm": 0.94212005720893, + "learning_rate": 1.092413321266963e-06, + "loss": 0.2732, + "step": 49442 + }, + { + "epoch": 0.8543509814763616, + "grad_norm": 1.1388136980368266, + "learning_rate": 1.092158987590115e-06, + "loss": 0.303, + "step": 49443 + }, + { + "epoch": 0.8543682609897705, + "grad_norm": 1.5881307959420496, + "learning_rate": 1.091904681813304e-06, + "loss": 0.6224, + "step": 49444 + }, + { + "epoch": 0.8543855405031794, + "grad_norm": 1.362512157720757, + "learning_rate": 1.091650403937322e-06, + "loss": 0.5081, + "step": 49445 + }, + { + "epoch": 0.8544028200165883, + "grad_norm": 1.5976216167246589, + "learning_rate": 1.091396153962968e-06, + "loss": 0.2537, + "step": 49446 + }, + { + "epoch": 0.8544200995299972, + "grad_norm": 1.2627139009448494, + "learning_rate": 1.0911419318910378e-06, + "loss": 0.3489, + "step": 49447 + }, + { + "epoch": 0.8544373790434061, + "grad_norm": 1.253809303877811, + "learning_rate": 1.0908877377223303e-06, + "loss": 0.3909, + "step": 49448 + }, + { + "epoch": 0.854454658556815, + "grad_norm": 1.1929295962930058, + "learning_rate": 1.09063357145764e-06, + "loss": 0.3064, + "step": 49449 + }, + { + "epoch": 0.8544719380702239, + "grad_norm": 0.8630921960222788, + "learning_rate": 1.09037943309776e-06, + "loss": 0.4126, + "step": 49450 + }, + { + "epoch": 0.8544892175836328, + "grad_norm": 1.107864973076482, + "learning_rate": 1.0901253226434917e-06, + "loss": 0.3073, + "step": 49451 + }, + { + "epoch": 0.8545064970970417, + "grad_norm": 1.325005153175, + "learning_rate": 1.0898712400956235e-06, + "loss": 0.3662, + "step": 49452 + }, + { + "epoch": 0.8545237766104506, + "grad_norm": 1.6022083055484988, + "learning_rate": 1.0896171854549597e-06, + "loss": 0.3793, + "step": 49453 + }, + { + "epoch": 0.8545410561238596, + "grad_norm": 1.153485470549942, + "learning_rate": 1.0893631587222896e-06, + "loss": 0.9025, + "step": 49454 + }, + { + "epoch": 0.8545583356372685, + "grad_norm": 1.5095772378734158, + "learning_rate": 1.0891091598984137e-06, + "loss": 0.4375, + "step": 49455 + }, + { + "epoch": 0.8545756151506774, + "grad_norm": 1.2352038217764116, + "learning_rate": 1.0888551889841226e-06, + "loss": 0.2301, + "step": 49456 + }, + { + "epoch": 0.8545928946640863, + "grad_norm": 1.2147270746601444, + "learning_rate": 1.0886012459802164e-06, + "loss": 0.3745, + "step": 49457 + }, + { + "epoch": 0.8546101741774952, + "grad_norm": 1.560429871234808, + "learning_rate": 1.0883473308874849e-06, + "loss": 0.2983, + "step": 49458 + }, + { + "epoch": 0.8546274536909041, + "grad_norm": 2.5546278124168857, + "learning_rate": 1.0880934437067302e-06, + "loss": 0.3386, + "step": 49459 + }, + { + "epoch": 0.854644733204313, + "grad_norm": 1.5186651006498382, + "learning_rate": 1.0878395844387446e-06, + "loss": 0.389, + "step": 49460 + }, + { + "epoch": 0.8546620127177219, + "grad_norm": 2.334313735167552, + "learning_rate": 1.0875857530843203e-06, + "loss": 0.3588, + "step": 49461 + }, + { + "epoch": 0.8546792922311308, + "grad_norm": 1.5045817355440956, + "learning_rate": 1.087331949644257e-06, + "loss": 0.4762, + "step": 49462 + }, + { + "epoch": 0.8546965717445397, + "grad_norm": 1.0473585407927168, + "learning_rate": 1.0870781741193458e-06, + "loss": 0.4588, + "step": 49463 + }, + { + "epoch": 0.8547138512579486, + "grad_norm": 0.9276704332159162, + "learning_rate": 1.0868244265103822e-06, + "loss": 0.3411, + "step": 49464 + }, + { + "epoch": 0.8547311307713574, + "grad_norm": 1.8503380400851772, + "learning_rate": 1.086570706818163e-06, + "loss": 0.5614, + "step": 49465 + }, + { + "epoch": 0.8547484102847663, + "grad_norm": 1.412715487885695, + "learning_rate": 1.086317015043482e-06, + "loss": 0.4643, + "step": 49466 + }, + { + "epoch": 0.8547656897981752, + "grad_norm": 1.434909290481098, + "learning_rate": 1.0860633511871333e-06, + "loss": 0.4316, + "step": 49467 + }, + { + "epoch": 0.8547829693115842, + "grad_norm": 1.3121111190318706, + "learning_rate": 1.0858097152499124e-06, + "loss": 0.2323, + "step": 49468 + }, + { + "epoch": 0.8548002488249931, + "grad_norm": 1.4213583223841284, + "learning_rate": 1.0855561072326115e-06, + "loss": 0.3699, + "step": 49469 + }, + { + "epoch": 0.854817528338402, + "grad_norm": 0.7629757522395161, + "learning_rate": 1.0853025271360263e-06, + "loss": 0.2793, + "step": 49470 + }, + { + "epoch": 0.8548348078518109, + "grad_norm": 3.1923633077640288, + "learning_rate": 1.0850489749609538e-06, + "loss": 0.2747, + "step": 49471 + }, + { + "epoch": 0.8548520873652198, + "grad_norm": 1.4964709561111964, + "learning_rate": 1.0847954507081826e-06, + "loss": 0.3069, + "step": 49472 + }, + { + "epoch": 0.8548693668786287, + "grad_norm": 1.1928296737674813, + "learning_rate": 1.0845419543785118e-06, + "loss": 0.1664, + "step": 49473 + }, + { + "epoch": 0.8548866463920376, + "grad_norm": 1.099226804310887, + "learning_rate": 1.084288485972731e-06, + "loss": 0.252, + "step": 49474 + }, + { + "epoch": 0.8549039259054465, + "grad_norm": 2.1162424652165828, + "learning_rate": 1.084035045491636e-06, + "loss": 0.3474, + "step": 49475 + }, + { + "epoch": 0.8549212054188554, + "grad_norm": 1.59559495165804, + "learning_rate": 1.0837816329360207e-06, + "loss": 0.3747, + "step": 49476 + }, + { + "epoch": 0.8549384849322643, + "grad_norm": 1.4738518975867703, + "learning_rate": 1.083528248306681e-06, + "loss": 0.2752, + "step": 49477 + }, + { + "epoch": 0.8549557644456732, + "grad_norm": 0.9409398129964451, + "learning_rate": 1.0832748916044067e-06, + "loss": 0.4249, + "step": 49478 + }, + { + "epoch": 0.8549730439590821, + "grad_norm": 0.9021439486322415, + "learning_rate": 1.0830215628299956e-06, + "loss": 0.2319, + "step": 49479 + }, + { + "epoch": 0.854990323472491, + "grad_norm": 0.5130344161176709, + "learning_rate": 1.0827682619842362e-06, + "loss": 0.6145, + "step": 49480 + }, + { + "epoch": 0.8550076029859, + "grad_norm": 1.5669937730621775, + "learning_rate": 1.0825149890679243e-06, + "loss": 0.4119, + "step": 49481 + }, + { + "epoch": 0.8550248824993089, + "grad_norm": 1.6768885479179083, + "learning_rate": 1.082261744081855e-06, + "loss": 0.3865, + "step": 49482 + }, + { + "epoch": 0.8550421620127178, + "grad_norm": 1.4649578387984066, + "learning_rate": 1.0820085270268178e-06, + "loss": 0.4152, + "step": 49483 + }, + { + "epoch": 0.8550594415261267, + "grad_norm": 1.3084122784897947, + "learning_rate": 1.0817553379036095e-06, + "loss": 0.2796, + "step": 49484 + }, + { + "epoch": 0.8550767210395356, + "grad_norm": 1.040168187923097, + "learning_rate": 1.0815021767130196e-06, + "loss": 0.4107, + "step": 49485 + }, + { + "epoch": 0.8550940005529444, + "grad_norm": 1.6321340737215777, + "learning_rate": 1.081249043455842e-06, + "loss": 0.2982, + "step": 49486 + }, + { + "epoch": 0.8551112800663533, + "grad_norm": 1.2999753695108922, + "learning_rate": 1.0809959381328704e-06, + "loss": 0.217, + "step": 49487 + }, + { + "epoch": 0.8551285595797622, + "grad_norm": 1.667285049815171, + "learning_rate": 1.0807428607449e-06, + "loss": 0.2654, + "step": 49488 + }, + { + "epoch": 0.8551458390931711, + "grad_norm": 1.0661380710873862, + "learning_rate": 1.0804898112927188e-06, + "loss": 0.3179, + "step": 49489 + }, + { + "epoch": 0.85516311860658, + "grad_norm": 1.2933342045115115, + "learning_rate": 1.0802367897771226e-06, + "loss": 0.2207, + "step": 49490 + }, + { + "epoch": 0.8551803981199889, + "grad_norm": 1.1352004439998535, + "learning_rate": 1.0799837961989023e-06, + "loss": 0.2908, + "step": 49491 + }, + { + "epoch": 0.8551976776333978, + "grad_norm": 1.1317067425419465, + "learning_rate": 1.0797308305588472e-06, + "loss": 0.403, + "step": 49492 + }, + { + "epoch": 0.8552149571468067, + "grad_norm": 1.0825286901952154, + "learning_rate": 1.0794778928577577e-06, + "loss": 0.2515, + "step": 49493 + }, + { + "epoch": 0.8552322366602156, + "grad_norm": 0.8713265529987021, + "learning_rate": 1.0792249830964185e-06, + "loss": 0.7772, + "step": 49494 + }, + { + "epoch": 0.8552495161736245, + "grad_norm": 1.1407979395347079, + "learning_rate": 1.0789721012756271e-06, + "loss": 0.3914, + "step": 49495 + }, + { + "epoch": 0.8552667956870335, + "grad_norm": 1.3727343619699075, + "learning_rate": 1.0787192473961704e-06, + "loss": 0.3074, + "step": 49496 + }, + { + "epoch": 0.8552840752004424, + "grad_norm": 1.652296037124946, + "learning_rate": 1.0784664214588458e-06, + "loss": 0.3328, + "step": 49497 + }, + { + "epoch": 0.8553013547138513, + "grad_norm": 2.254481229045195, + "learning_rate": 1.078213623464438e-06, + "loss": 0.3176, + "step": 49498 + }, + { + "epoch": 0.8553186342272602, + "grad_norm": 1.2594313550203597, + "learning_rate": 1.0779608534137475e-06, + "loss": 0.2364, + "step": 49499 + }, + { + "epoch": 0.8553359137406691, + "grad_norm": 1.4172172000949674, + "learning_rate": 1.0777081113075616e-06, + "loss": 0.3858, + "step": 49500 + }, + { + "epoch": 0.855353193254078, + "grad_norm": 1.5726429726019922, + "learning_rate": 1.0774553971466694e-06, + "loss": 0.3705, + "step": 49501 + }, + { + "epoch": 0.8553704727674869, + "grad_norm": 1.25895229418644, + "learning_rate": 1.0772027109318672e-06, + "loss": 0.4241, + "step": 49502 + }, + { + "epoch": 0.8553877522808958, + "grad_norm": 1.8113272657395993, + "learning_rate": 1.076950052663943e-06, + "loss": 0.2896, + "step": 49503 + }, + { + "epoch": 0.8554050317943047, + "grad_norm": 1.1718773958531545, + "learning_rate": 1.0766974223436889e-06, + "loss": 0.2837, + "step": 49504 + }, + { + "epoch": 0.8554223113077136, + "grad_norm": 2.0858383837398495, + "learning_rate": 1.0764448199718958e-06, + "loss": 0.5038, + "step": 49505 + }, + { + "epoch": 0.8554395908211225, + "grad_norm": 1.5336602435376034, + "learning_rate": 1.076192245549359e-06, + "loss": 0.2482, + "step": 49506 + }, + { + "epoch": 0.8554568703345313, + "grad_norm": 1.3220465385954305, + "learning_rate": 1.0759396990768634e-06, + "loss": 0.6711, + "step": 49507 + }, + { + "epoch": 0.8554741498479402, + "grad_norm": 1.6837830383494874, + "learning_rate": 1.075687180555205e-06, + "loss": 0.3831, + "step": 49508 + }, + { + "epoch": 0.8554914293613491, + "grad_norm": 1.2991450440573153, + "learning_rate": 1.075434689985171e-06, + "loss": 0.445, + "step": 49509 + }, + { + "epoch": 0.855508708874758, + "grad_norm": 1.8924561437461478, + "learning_rate": 1.0751822273675527e-06, + "loss": 0.3487, + "step": 49510 + }, + { + "epoch": 0.855525988388167, + "grad_norm": 1.1844354764291354, + "learning_rate": 1.0749297927031455e-06, + "loss": 0.5715, + "step": 49511 + }, + { + "epoch": 0.8555432679015759, + "grad_norm": 2.200434499123747, + "learning_rate": 1.0746773859927329e-06, + "loss": 0.2241, + "step": 49512 + }, + { + "epoch": 0.8555605474149848, + "grad_norm": 1.692675073841667, + "learning_rate": 1.074425007237112e-06, + "loss": 0.7627, + "step": 49513 + }, + { + "epoch": 0.8555778269283937, + "grad_norm": 1.9637652163240509, + "learning_rate": 1.074172656437068e-06, + "loss": 0.3246, + "step": 49514 + }, + { + "epoch": 0.8555951064418026, + "grad_norm": 1.3804387830804934, + "learning_rate": 1.0739203335933933e-06, + "loss": 0.5039, + "step": 49515 + }, + { + "epoch": 0.8556123859552115, + "grad_norm": 1.530878743995456, + "learning_rate": 1.0736680387068798e-06, + "loss": 0.3804, + "step": 49516 + }, + { + "epoch": 0.8556296654686204, + "grad_norm": 1.535191426651157, + "learning_rate": 1.0734157717783167e-06, + "loss": 0.4577, + "step": 49517 + }, + { + "epoch": 0.8556469449820293, + "grad_norm": 2.0322149483242677, + "learning_rate": 1.0731635328084921e-06, + "loss": 0.3247, + "step": 49518 + }, + { + "epoch": 0.8556642244954382, + "grad_norm": 1.5792440913926946, + "learning_rate": 1.0729113217982013e-06, + "loss": 0.4018, + "step": 49519 + }, + { + "epoch": 0.8556815040088471, + "grad_norm": 1.5050731048341408, + "learning_rate": 1.0726591387482267e-06, + "loss": 0.384, + "step": 49520 + }, + { + "epoch": 0.855698783522256, + "grad_norm": 1.2686921379164102, + "learning_rate": 1.0724069836593632e-06, + "loss": 0.2284, + "step": 49521 + }, + { + "epoch": 0.8557160630356649, + "grad_norm": 0.884084561127178, + "learning_rate": 1.0721548565324013e-06, + "loss": 0.3189, + "step": 49522 + }, + { + "epoch": 0.8557333425490738, + "grad_norm": 2.030367843775816, + "learning_rate": 1.0719027573681262e-06, + "loss": 0.2386, + "step": 49523 + }, + { + "epoch": 0.8557506220624828, + "grad_norm": 1.9458143409748518, + "learning_rate": 1.0716506861673338e-06, + "loss": 0.3757, + "step": 49524 + }, + { + "epoch": 0.8557679015758917, + "grad_norm": 1.1149859561908502, + "learning_rate": 1.0713986429308066e-06, + "loss": 0.2923, + "step": 49525 + }, + { + "epoch": 0.8557851810893006, + "grad_norm": 1.2549888480292004, + "learning_rate": 1.0711466276593375e-06, + "loss": 0.3317, + "step": 49526 + }, + { + "epoch": 0.8558024606027095, + "grad_norm": 1.5777494942451213, + "learning_rate": 1.0708946403537157e-06, + "loss": 0.2851, + "step": 49527 + }, + { + "epoch": 0.8558197401161183, + "grad_norm": 0.8471934145565171, + "learning_rate": 1.070642681014733e-06, + "loss": 0.1836, + "step": 49528 + }, + { + "epoch": 0.8558370196295272, + "grad_norm": 1.3961233411226999, + "learning_rate": 1.0703907496431743e-06, + "loss": 0.4214, + "step": 49529 + }, + { + "epoch": 0.8558542991429361, + "grad_norm": 1.4043621575712342, + "learning_rate": 1.070138846239831e-06, + "loss": 0.2988, + "step": 49530 + }, + { + "epoch": 0.855871578656345, + "grad_norm": 1.0281732052965507, + "learning_rate": 1.0698869708054926e-06, + "loss": 0.4186, + "step": 49531 + }, + { + "epoch": 0.8558888581697539, + "grad_norm": 1.8583997135479626, + "learning_rate": 1.069635123340943e-06, + "loss": 0.7025, + "step": 49532 + }, + { + "epoch": 0.8559061376831628, + "grad_norm": 1.2008625810981546, + "learning_rate": 1.069383303846978e-06, + "loss": 0.3926, + "step": 49533 + }, + { + "epoch": 0.8559234171965717, + "grad_norm": 1.4929282207163759, + "learning_rate": 1.0691315123243817e-06, + "loss": 0.2158, + "step": 49534 + }, + { + "epoch": 0.8559406967099806, + "grad_norm": 1.659148282511053, + "learning_rate": 1.0688797487739466e-06, + "loss": 0.4073, + "step": 49535 + }, + { + "epoch": 0.8559579762233895, + "grad_norm": 0.7245407367929263, + "learning_rate": 1.0686280131964567e-06, + "loss": 0.1946, + "step": 49536 + }, + { + "epoch": 0.8559752557367984, + "grad_norm": 1.2216772498857607, + "learning_rate": 1.0683763055927042e-06, + "loss": 0.3411, + "step": 49537 + }, + { + "epoch": 0.8559925352502074, + "grad_norm": 2.4788861247431355, + "learning_rate": 1.0681246259634725e-06, + "loss": 0.5501, + "step": 49538 + }, + { + "epoch": 0.8560098147636163, + "grad_norm": 0.8294391598897977, + "learning_rate": 1.0678729743095562e-06, + "loss": 0.306, + "step": 49539 + }, + { + "epoch": 0.8560270942770252, + "grad_norm": 1.604317081519418, + "learning_rate": 1.0676213506317401e-06, + "loss": 0.1681, + "step": 49540 + }, + { + "epoch": 0.8560443737904341, + "grad_norm": 1.8313561705734245, + "learning_rate": 1.0673697549308137e-06, + "loss": 0.4168, + "step": 49541 + }, + { + "epoch": 0.856061653303843, + "grad_norm": 1.074982490462448, + "learning_rate": 1.0671181872075643e-06, + "loss": 0.1758, + "step": 49542 + }, + { + "epoch": 0.8560789328172519, + "grad_norm": 1.4294817713334216, + "learning_rate": 1.0668666474627777e-06, + "loss": 0.4201, + "step": 49543 + }, + { + "epoch": 0.8560962123306608, + "grad_norm": 1.2100238745823058, + "learning_rate": 1.0666151356972433e-06, + "loss": 0.3613, + "step": 49544 + }, + { + "epoch": 0.8561134918440697, + "grad_norm": 1.5514168590771962, + "learning_rate": 1.0663636519117493e-06, + "loss": 0.3867, + "step": 49545 + }, + { + "epoch": 0.8561307713574786, + "grad_norm": 1.3827488347715065, + "learning_rate": 1.0661121961070853e-06, + "loss": 0.2949, + "step": 49546 + }, + { + "epoch": 0.8561480508708875, + "grad_norm": 0.9826379920882262, + "learning_rate": 1.065860768284035e-06, + "loss": 0.1604, + "step": 49547 + }, + { + "epoch": 0.8561653303842964, + "grad_norm": 1.9479361578977752, + "learning_rate": 1.0656093684433899e-06, + "loss": 0.2984, + "step": 49548 + }, + { + "epoch": 0.8561826098977052, + "grad_norm": 1.4630423697479296, + "learning_rate": 1.0653579965859328e-06, + "loss": 0.6611, + "step": 49549 + }, + { + "epoch": 0.8561998894111141, + "grad_norm": 1.5675147245178815, + "learning_rate": 1.0651066527124542e-06, + "loss": 0.3618, + "step": 49550 + }, + { + "epoch": 0.856217168924523, + "grad_norm": 1.0719370837210427, + "learning_rate": 1.0648553368237401e-06, + "loss": 0.6691, + "step": 49551 + }, + { + "epoch": 0.856234448437932, + "grad_norm": 1.140838035029172, + "learning_rate": 1.06460404892058e-06, + "loss": 0.2249, + "step": 49552 + }, + { + "epoch": 0.8562517279513409, + "grad_norm": 0.8750839344894418, + "learning_rate": 1.0643527890037596e-06, + "loss": 0.2058, + "step": 49553 + }, + { + "epoch": 0.8562690074647498, + "grad_norm": 1.466901806612427, + "learning_rate": 1.0641015570740632e-06, + "loss": 0.3186, + "step": 49554 + }, + { + "epoch": 0.8562862869781587, + "grad_norm": 1.2729160694530461, + "learning_rate": 1.0638503531322798e-06, + "loss": 0.3509, + "step": 49555 + }, + { + "epoch": 0.8563035664915676, + "grad_norm": 1.3855338472313448, + "learning_rate": 1.0635991771791953e-06, + "loss": 0.3667, + "step": 49556 + }, + { + "epoch": 0.8563208460049765, + "grad_norm": 1.7267984746740899, + "learning_rate": 1.0633480292156007e-06, + "loss": 0.2513, + "step": 49557 + }, + { + "epoch": 0.8563381255183854, + "grad_norm": 1.0692978896765757, + "learning_rate": 1.0630969092422761e-06, + "loss": 0.2774, + "step": 49558 + }, + { + "epoch": 0.8563554050317943, + "grad_norm": 0.8620043776645403, + "learning_rate": 1.0628458172600142e-06, + "loss": 0.1545, + "step": 49559 + }, + { + "epoch": 0.8563726845452032, + "grad_norm": 1.2696722549713544, + "learning_rate": 1.0625947532695957e-06, + "loss": 0.2847, + "step": 49560 + }, + { + "epoch": 0.8563899640586121, + "grad_norm": 0.9648487069731548, + "learning_rate": 1.0623437172718098e-06, + "loss": 0.4567, + "step": 49561 + }, + { + "epoch": 0.856407243572021, + "grad_norm": 1.302275506088833, + "learning_rate": 1.0620927092674448e-06, + "loss": 0.5105, + "step": 49562 + }, + { + "epoch": 0.8564245230854299, + "grad_norm": 1.404926468394058, + "learning_rate": 1.0618417292572825e-06, + "loss": 0.3299, + "step": 49563 + }, + { + "epoch": 0.8564418025988388, + "grad_norm": 1.3891285289816115, + "learning_rate": 1.061590777242113e-06, + "loss": 0.4414, + "step": 49564 + }, + { + "epoch": 0.8564590821122477, + "grad_norm": 1.136796219049201, + "learning_rate": 1.0613398532227182e-06, + "loss": 0.3775, + "step": 49565 + }, + { + "epoch": 0.8564763616256567, + "grad_norm": 1.1944544383447442, + "learning_rate": 1.0610889571998872e-06, + "loss": 0.5226, + "step": 49566 + }, + { + "epoch": 0.8564936411390656, + "grad_norm": 1.9476375347238393, + "learning_rate": 1.0608380891744042e-06, + "loss": 0.409, + "step": 49567 + }, + { + "epoch": 0.8565109206524745, + "grad_norm": 1.8490756361750946, + "learning_rate": 1.0605872491470581e-06, + "loss": 0.3757, + "step": 49568 + }, + { + "epoch": 0.8565282001658834, + "grad_norm": 1.3556017953348691, + "learning_rate": 1.0603364371186297e-06, + "loss": 0.3357, + "step": 49569 + }, + { + "epoch": 0.8565454796792922, + "grad_norm": 1.771866701369955, + "learning_rate": 1.0600856530899095e-06, + "loss": 0.307, + "step": 49570 + }, + { + "epoch": 0.8565627591927011, + "grad_norm": 1.041683405917028, + "learning_rate": 1.0598348970616778e-06, + "loss": 0.4148, + "step": 49571 + }, + { + "epoch": 0.85658003870611, + "grad_norm": 1.2139607733751816, + "learning_rate": 1.0595841690347231e-06, + "loss": 0.3292, + "step": 49572 + }, + { + "epoch": 0.8565973182195189, + "grad_norm": 1.2382241868311545, + "learning_rate": 1.0593334690098323e-06, + "loss": 0.2636, + "step": 49573 + }, + { + "epoch": 0.8566145977329278, + "grad_norm": 1.9800934770869398, + "learning_rate": 1.059082796987787e-06, + "loss": 0.2832, + "step": 49574 + }, + { + "epoch": 0.8566318772463367, + "grad_norm": 0.9769939093548988, + "learning_rate": 1.0588321529693747e-06, + "loss": 0.3806, + "step": 49575 + }, + { + "epoch": 0.8566491567597456, + "grad_norm": 1.2796784866607782, + "learning_rate": 1.0585815369553787e-06, + "loss": 0.3597, + "step": 49576 + }, + { + "epoch": 0.8566664362731545, + "grad_norm": 1.3179237563664692, + "learning_rate": 1.0583309489465853e-06, + "loss": 0.3377, + "step": 49577 + }, + { + "epoch": 0.8566837157865634, + "grad_norm": 1.3762994350746296, + "learning_rate": 1.0580803889437763e-06, + "loss": 0.2968, + "step": 49578 + }, + { + "epoch": 0.8567009952999723, + "grad_norm": 0.9694844779337302, + "learning_rate": 1.057829856947743e-06, + "loss": 0.2799, + "step": 49579 + }, + { + "epoch": 0.8567182748133813, + "grad_norm": 2.2850028390194836, + "learning_rate": 1.0575793529592637e-06, + "loss": 0.3381, + "step": 49580 + }, + { + "epoch": 0.8567355543267902, + "grad_norm": 1.0891203411856452, + "learning_rate": 1.057328876979128e-06, + "loss": 0.3295, + "step": 49581 + }, + { + "epoch": 0.8567528338401991, + "grad_norm": 1.1145490729862966, + "learning_rate": 1.0570784290081148e-06, + "loss": 0.4007, + "step": 49582 + }, + { + "epoch": 0.856770113353608, + "grad_norm": 0.5264632510416416, + "learning_rate": 1.056828009047014e-06, + "loss": 0.7043, + "step": 49583 + }, + { + "epoch": 0.8567873928670169, + "grad_norm": 2.009795228134077, + "learning_rate": 1.0565776170966068e-06, + "loss": 0.4356, + "step": 49584 + }, + { + "epoch": 0.8568046723804258, + "grad_norm": 1.2494781816411884, + "learning_rate": 1.0563272531576763e-06, + "loss": 0.2604, + "step": 49585 + }, + { + "epoch": 0.8568219518938347, + "grad_norm": 1.825677507291541, + "learning_rate": 1.0560769172310115e-06, + "loss": 0.4592, + "step": 49586 + }, + { + "epoch": 0.8568392314072436, + "grad_norm": 1.3641965218725762, + "learning_rate": 1.055826609317392e-06, + "loss": 0.2683, + "step": 49587 + }, + { + "epoch": 0.8568565109206525, + "grad_norm": 2.0813496944283756, + "learning_rate": 1.0555763294176047e-06, + "loss": 0.2826, + "step": 49588 + }, + { + "epoch": 0.8568737904340614, + "grad_norm": 3.056602928530511, + "learning_rate": 1.0553260775324304e-06, + "loss": 0.4262, + "step": 49589 + }, + { + "epoch": 0.8568910699474703, + "grad_norm": 1.076218090730423, + "learning_rate": 1.0550758536626538e-06, + "loss": 0.2464, + "step": 49590 + }, + { + "epoch": 0.8569083494608791, + "grad_norm": 1.0552770913387552, + "learning_rate": 1.0548256578090599e-06, + "loss": 0.2375, + "step": 49591 + }, + { + "epoch": 0.856925628974288, + "grad_norm": 1.4698050813264063, + "learning_rate": 1.0545754899724336e-06, + "loss": 0.4885, + "step": 49592 + }, + { + "epoch": 0.8569429084876969, + "grad_norm": 1.2436741102300033, + "learning_rate": 1.0543253501535578e-06, + "loss": 0.4126, + "step": 49593 + }, + { + "epoch": 0.8569601880011058, + "grad_norm": 1.5156506639766916, + "learning_rate": 1.0540752383532116e-06, + "loss": 0.3417, + "step": 49594 + }, + { + "epoch": 0.8569774675145148, + "grad_norm": 1.171734007685323, + "learning_rate": 1.0538251545721824e-06, + "loss": 0.4891, + "step": 49595 + }, + { + "epoch": 0.8569947470279237, + "grad_norm": 1.5810329532198975, + "learning_rate": 1.0535750988112526e-06, + "loss": 0.2375, + "step": 49596 + }, + { + "epoch": 0.8570120265413326, + "grad_norm": 0.9493944197990696, + "learning_rate": 1.0533250710712062e-06, + "loss": 0.3922, + "step": 49597 + }, + { + "epoch": 0.8570293060547415, + "grad_norm": 1.0180293823880362, + "learning_rate": 1.0530750713528248e-06, + "loss": 0.2172, + "step": 49598 + }, + { + "epoch": 0.8570465855681504, + "grad_norm": 1.4070791976158836, + "learning_rate": 1.0528250996568946e-06, + "loss": 0.3301, + "step": 49599 + }, + { + "epoch": 0.8570638650815593, + "grad_norm": 1.3079220345370401, + "learning_rate": 1.0525751559841935e-06, + "loss": 0.3162, + "step": 49600 + }, + { + "epoch": 0.8570811445949682, + "grad_norm": 1.2269687535177807, + "learning_rate": 1.0523252403355067e-06, + "loss": 0.4348, + "step": 49601 + }, + { + "epoch": 0.8570984241083771, + "grad_norm": 2.1135053667132113, + "learning_rate": 1.0520753527116178e-06, + "loss": 0.3685, + "step": 49602 + }, + { + "epoch": 0.857115703621786, + "grad_norm": 1.1831084233649838, + "learning_rate": 1.0518254931133109e-06, + "loss": 0.3684, + "step": 49603 + }, + { + "epoch": 0.8571329831351949, + "grad_norm": 1.3663502658184192, + "learning_rate": 1.0515756615413664e-06, + "loss": 0.3866, + "step": 49604 + }, + { + "epoch": 0.8571502626486038, + "grad_norm": 1.3043823913125956, + "learning_rate": 1.0513258579965646e-06, + "loss": 0.1661, + "step": 49605 + }, + { + "epoch": 0.8571675421620127, + "grad_norm": 1.177471167250402, + "learning_rate": 1.0510760824796907e-06, + "loss": 0.5729, + "step": 49606 + }, + { + "epoch": 0.8571848216754216, + "grad_norm": 1.3237654085805683, + "learning_rate": 1.050826334991527e-06, + "loss": 0.2756, + "step": 49607 + }, + { + "epoch": 0.8572021011888306, + "grad_norm": 1.21233624349527, + "learning_rate": 1.0505766155328567e-06, + "loss": 0.2823, + "step": 49608 + }, + { + "epoch": 0.8572193807022395, + "grad_norm": 1.8290813076483623, + "learning_rate": 1.0503269241044588e-06, + "loss": 0.357, + "step": 49609 + }, + { + "epoch": 0.8572366602156484, + "grad_norm": 1.7689036106757212, + "learning_rate": 1.0500772607071196e-06, + "loss": 0.5533, + "step": 49610 + }, + { + "epoch": 0.8572539397290573, + "grad_norm": 1.8496606839236722, + "learning_rate": 1.0498276253416163e-06, + "loss": 0.4483, + "step": 49611 + }, + { + "epoch": 0.8572712192424662, + "grad_norm": 1.5498726392732014, + "learning_rate": 1.0495780180087322e-06, + "loss": 0.1876, + "step": 49612 + }, + { + "epoch": 0.857288498755875, + "grad_norm": 1.2001259592344904, + "learning_rate": 1.0493284387092528e-06, + "loss": 0.4228, + "step": 49613 + }, + { + "epoch": 0.8573057782692839, + "grad_norm": 1.1753534976628492, + "learning_rate": 1.049078887443955e-06, + "loss": 0.3147, + "step": 49614 + }, + { + "epoch": 0.8573230577826928, + "grad_norm": 1.975816800109821, + "learning_rate": 1.0488293642136239e-06, + "loss": 0.3914, + "step": 49615 + }, + { + "epoch": 0.8573403372961017, + "grad_norm": 1.6323699547013852, + "learning_rate": 1.0485798690190385e-06, + "loss": 0.9226, + "step": 49616 + }, + { + "epoch": 0.8573576168095106, + "grad_norm": 0.9877583072458715, + "learning_rate": 1.048330401860982e-06, + "loss": 0.285, + "step": 49617 + }, + { + "epoch": 0.8573748963229195, + "grad_norm": 1.3999268352095438, + "learning_rate": 1.0480809627402322e-06, + "loss": 0.2944, + "step": 49618 + }, + { + "epoch": 0.8573921758363284, + "grad_norm": 1.196828665280375, + "learning_rate": 1.0478315516575753e-06, + "loss": 0.2846, + "step": 49619 + }, + { + "epoch": 0.8574094553497373, + "grad_norm": 1.1354588371440852, + "learning_rate": 1.04758216861379e-06, + "loss": 0.256, + "step": 49620 + }, + { + "epoch": 0.8574267348631462, + "grad_norm": 1.468756175584399, + "learning_rate": 1.0473328136096594e-06, + "loss": 0.3721, + "step": 49621 + }, + { + "epoch": 0.8574440143765552, + "grad_norm": 1.047249721599254, + "learning_rate": 1.04708348664596e-06, + "loss": 0.669, + "step": 49622 + }, + { + "epoch": 0.8574612938899641, + "grad_norm": 1.29386500661633, + "learning_rate": 1.046834187723479e-06, + "loss": 0.3475, + "step": 49623 + }, + { + "epoch": 0.857478573403373, + "grad_norm": 1.6215299567828818, + "learning_rate": 1.0465849168429898e-06, + "loss": 0.2912, + "step": 49624 + }, + { + "epoch": 0.8574958529167819, + "grad_norm": 1.6795851526940075, + "learning_rate": 1.0463356740052788e-06, + "loss": 0.3069, + "step": 49625 + }, + { + "epoch": 0.8575131324301908, + "grad_norm": 0.9242657363536781, + "learning_rate": 1.0460864592111265e-06, + "loss": 0.3778, + "step": 49626 + }, + { + "epoch": 0.8575304119435997, + "grad_norm": 1.27513456195438, + "learning_rate": 1.0458372724613097e-06, + "loss": 0.2883, + "step": 49627 + }, + { + "epoch": 0.8575476914570086, + "grad_norm": 1.325878606380076, + "learning_rate": 1.0455881137566126e-06, + "loss": 0.3292, + "step": 49628 + }, + { + "epoch": 0.8575649709704175, + "grad_norm": 1.502507434083935, + "learning_rate": 1.0453389830978122e-06, + "loss": 0.3145, + "step": 49629 + }, + { + "epoch": 0.8575822504838264, + "grad_norm": 2.5943265992951194, + "learning_rate": 1.0450898804856913e-06, + "loss": 0.354, + "step": 49630 + }, + { + "epoch": 0.8575995299972353, + "grad_norm": 0.7934877917890973, + "learning_rate": 1.0448408059210291e-06, + "loss": 0.3014, + "step": 49631 + }, + { + "epoch": 0.8576168095106442, + "grad_norm": 1.5258127173808689, + "learning_rate": 1.0445917594046073e-06, + "loss": 0.5696, + "step": 49632 + }, + { + "epoch": 0.8576340890240531, + "grad_norm": 1.0950949040513749, + "learning_rate": 1.0443427409372031e-06, + "loss": 0.3744, + "step": 49633 + }, + { + "epoch": 0.8576513685374619, + "grad_norm": 1.0658846998772313, + "learning_rate": 1.0440937505196004e-06, + "loss": 0.5303, + "step": 49634 + }, + { + "epoch": 0.8576686480508708, + "grad_norm": 1.3644852716818525, + "learning_rate": 1.0438447881525749e-06, + "loss": 0.2638, + "step": 49635 + }, + { + "epoch": 0.8576859275642797, + "grad_norm": 1.4605440717556977, + "learning_rate": 1.0435958538369072e-06, + "loss": 0.2907, + "step": 49636 + }, + { + "epoch": 0.8577032070776887, + "grad_norm": 0.9599140115034163, + "learning_rate": 1.0433469475733816e-06, + "loss": 0.3232, + "step": 49637 + }, + { + "epoch": 0.8577204865910976, + "grad_norm": 1.7237575345029446, + "learning_rate": 1.0430980693627701e-06, + "loss": 0.42, + "step": 49638 + }, + { + "epoch": 0.8577377661045065, + "grad_norm": 1.5080941170730158, + "learning_rate": 1.0428492192058592e-06, + "loss": 0.4418, + "step": 49639 + }, + { + "epoch": 0.8577550456179154, + "grad_norm": 2.2559411959360234, + "learning_rate": 1.0426003971034238e-06, + "loss": 0.2678, + "step": 49640 + }, + { + "epoch": 0.8577723251313243, + "grad_norm": 1.3933563242761293, + "learning_rate": 1.042351603056244e-06, + "loss": 0.4104, + "step": 49641 + }, + { + "epoch": 0.8577896046447332, + "grad_norm": 1.0682304353573355, + "learning_rate": 1.0421028370651e-06, + "loss": 0.1459, + "step": 49642 + }, + { + "epoch": 0.8578068841581421, + "grad_norm": 1.2144109129022636, + "learning_rate": 1.0418540991307723e-06, + "loss": 0.4701, + "step": 49643 + }, + { + "epoch": 0.857824163671551, + "grad_norm": 1.222515838844426, + "learning_rate": 1.041605389254038e-06, + "loss": 0.2654, + "step": 49644 + }, + { + "epoch": 0.8578414431849599, + "grad_norm": 1.3375290169393392, + "learning_rate": 1.0413567074356757e-06, + "loss": 0.4837, + "step": 49645 + }, + { + "epoch": 0.8578587226983688, + "grad_norm": 1.498543157880383, + "learning_rate": 1.041108053676464e-06, + "loss": 0.36, + "step": 49646 + }, + { + "epoch": 0.8578760022117777, + "grad_norm": 2.294368705192135, + "learning_rate": 1.0408594279771833e-06, + "loss": 0.2531, + "step": 49647 + }, + { + "epoch": 0.8578932817251866, + "grad_norm": 1.8577007005609099, + "learning_rate": 1.040610830338613e-06, + "loss": 0.3775, + "step": 49648 + }, + { + "epoch": 0.8579105612385955, + "grad_norm": 1.2153476241346897, + "learning_rate": 1.0403622607615293e-06, + "loss": 0.2331, + "step": 49649 + }, + { + "epoch": 0.8579278407520045, + "grad_norm": 1.4792402971453082, + "learning_rate": 1.0401137192467126e-06, + "loss": 0.2947, + "step": 49650 + }, + { + "epoch": 0.8579451202654134, + "grad_norm": 1.1616618058792587, + "learning_rate": 1.0398652057949399e-06, + "loss": 0.5639, + "step": 49651 + }, + { + "epoch": 0.8579623997788223, + "grad_norm": 1.183134207000461, + "learning_rate": 1.0396167204069907e-06, + "loss": 0.7619, + "step": 49652 + }, + { + "epoch": 0.8579796792922312, + "grad_norm": 1.949083392203054, + "learning_rate": 1.039368263083642e-06, + "loss": 0.3575, + "step": 49653 + }, + { + "epoch": 0.8579969588056401, + "grad_norm": 1.85131405034454, + "learning_rate": 1.0391198338256748e-06, + "loss": 0.2651, + "step": 49654 + }, + { + "epoch": 0.8580142383190489, + "grad_norm": 0.8137413860926446, + "learning_rate": 1.0388714326338655e-06, + "loss": 0.363, + "step": 49655 + }, + { + "epoch": 0.8580315178324578, + "grad_norm": 1.358073432921616, + "learning_rate": 1.0386230595089897e-06, + "loss": 0.3157, + "step": 49656 + }, + { + "epoch": 0.8580487973458667, + "grad_norm": 1.918664341370845, + "learning_rate": 1.0383747144518297e-06, + "loss": 0.2364, + "step": 49657 + }, + { + "epoch": 0.8580660768592756, + "grad_norm": 1.4662346295500637, + "learning_rate": 1.0381263974631572e-06, + "loss": 0.4032, + "step": 49658 + }, + { + "epoch": 0.8580833563726845, + "grad_norm": 1.026774618833553, + "learning_rate": 1.0378781085437584e-06, + "loss": 0.3022, + "step": 49659 + }, + { + "epoch": 0.8581006358860934, + "grad_norm": 0.7651080730766658, + "learning_rate": 1.0376298476944035e-06, + "loss": 0.2189, + "step": 49660 + }, + { + "epoch": 0.8581179153995023, + "grad_norm": 1.2644476532889084, + "learning_rate": 1.0373816149158756e-06, + "loss": 0.4931, + "step": 49661 + }, + { + "epoch": 0.8581351949129112, + "grad_norm": 1.2434328609382717, + "learning_rate": 1.037133410208948e-06, + "loss": 0.4343, + "step": 49662 + }, + { + "epoch": 0.8581524744263201, + "grad_norm": 1.1460761915136104, + "learning_rate": 1.0368852335744007e-06, + "loss": 0.4089, + "step": 49663 + }, + { + "epoch": 0.858169753939729, + "grad_norm": 1.6486117591408191, + "learning_rate": 1.036637085013007e-06, + "loss": 0.3419, + "step": 49664 + }, + { + "epoch": 0.858187033453138, + "grad_norm": 2.1930836751985243, + "learning_rate": 1.0363889645255508e-06, + "loss": 0.4676, + "step": 49665 + }, + { + "epoch": 0.8582043129665469, + "grad_norm": 1.4008124375223643, + "learning_rate": 1.0361408721128053e-06, + "loss": 0.2983, + "step": 49666 + }, + { + "epoch": 0.8582215924799558, + "grad_norm": 1.1382320347849708, + "learning_rate": 1.0358928077755459e-06, + "loss": 0.2371, + "step": 49667 + }, + { + "epoch": 0.8582388719933647, + "grad_norm": 0.8631481430349822, + "learning_rate": 1.0356447715145534e-06, + "loss": 0.3116, + "step": 49668 + }, + { + "epoch": 0.8582561515067736, + "grad_norm": 0.8165759445335243, + "learning_rate": 1.0353967633306017e-06, + "loss": 0.246, + "step": 49669 + }, + { + "epoch": 0.8582734310201825, + "grad_norm": 0.4432464334480414, + "learning_rate": 1.0351487832244678e-06, + "loss": 0.5879, + "step": 49670 + }, + { + "epoch": 0.8582907105335914, + "grad_norm": 0.9862792931783498, + "learning_rate": 1.0349008311969288e-06, + "loss": 0.306, + "step": 49671 + }, + { + "epoch": 0.8583079900470003, + "grad_norm": 0.9234993168908269, + "learning_rate": 1.0346529072487642e-06, + "loss": 0.521, + "step": 49672 + }, + { + "epoch": 0.8583252695604092, + "grad_norm": 1.8778868143065188, + "learning_rate": 1.0344050113807458e-06, + "loss": 0.3417, + "step": 49673 + }, + { + "epoch": 0.8583425490738181, + "grad_norm": 1.5858862795210362, + "learning_rate": 1.0341571435936547e-06, + "loss": 0.2669, + "step": 49674 + }, + { + "epoch": 0.858359828587227, + "grad_norm": 0.9396353326142409, + "learning_rate": 1.033909303888263e-06, + "loss": 0.2208, + "step": 49675 + }, + { + "epoch": 0.8583771081006358, + "grad_norm": 1.621186576018766, + "learning_rate": 1.0336614922653477e-06, + "loss": 0.5768, + "step": 49676 + }, + { + "epoch": 0.8583943876140447, + "grad_norm": 0.9734793612055034, + "learning_rate": 1.033413708725689e-06, + "loss": 0.2679, + "step": 49677 + }, + { + "epoch": 0.8584116671274536, + "grad_norm": 0.5530814092655192, + "learning_rate": 1.0331659532700578e-06, + "loss": 0.5431, + "step": 49678 + }, + { + "epoch": 0.8584289466408626, + "grad_norm": 1.4527202685797465, + "learning_rate": 1.0329182258992344e-06, + "loss": 0.2671, + "step": 49679 + }, + { + "epoch": 0.8584462261542715, + "grad_norm": 0.9720552086945534, + "learning_rate": 1.0326705266139904e-06, + "loss": 0.1982, + "step": 49680 + }, + { + "epoch": 0.8584635056676804, + "grad_norm": 2.141980917041639, + "learning_rate": 1.0324228554151027e-06, + "loss": 0.335, + "step": 49681 + }, + { + "epoch": 0.8584807851810893, + "grad_norm": 1.6197219118517472, + "learning_rate": 1.0321752123033502e-06, + "loss": 0.6797, + "step": 49682 + }, + { + "epoch": 0.8584980646944982, + "grad_norm": 2.5246888321575804, + "learning_rate": 1.0319275972795074e-06, + "loss": 0.3888, + "step": 49683 + }, + { + "epoch": 0.8585153442079071, + "grad_norm": 1.8202642453449165, + "learning_rate": 1.031680010344347e-06, + "loss": 0.3604, + "step": 49684 + }, + { + "epoch": 0.858532623721316, + "grad_norm": 1.3488651008709323, + "learning_rate": 1.0314324514986473e-06, + "loss": 0.4683, + "step": 49685 + }, + { + "epoch": 0.8585499032347249, + "grad_norm": 1.4279542954466273, + "learning_rate": 1.0311849207431824e-06, + "loss": 0.1783, + "step": 49686 + }, + { + "epoch": 0.8585671827481338, + "grad_norm": 1.4351417969019484, + "learning_rate": 1.0309374180787268e-06, + "loss": 0.231, + "step": 49687 + }, + { + "epoch": 0.8585844622615427, + "grad_norm": 2.117533029433523, + "learning_rate": 1.0306899435060602e-06, + "loss": 0.5689, + "step": 49688 + }, + { + "epoch": 0.8586017417749516, + "grad_norm": 0.9911534237550073, + "learning_rate": 1.0304424970259508e-06, + "loss": 0.3617, + "step": 49689 + }, + { + "epoch": 0.8586190212883605, + "grad_norm": 1.1241489246570076, + "learning_rate": 1.0301950786391802e-06, + "loss": 0.2245, + "step": 49690 + }, + { + "epoch": 0.8586363008017694, + "grad_norm": 1.0242862802614934, + "learning_rate": 1.0299476883465176e-06, + "loss": 0.664, + "step": 49691 + }, + { + "epoch": 0.8586535803151784, + "grad_norm": 1.355319234693252, + "learning_rate": 1.0297003261487415e-06, + "loss": 0.2641, + "step": 49692 + }, + { + "epoch": 0.8586708598285873, + "grad_norm": 1.4129071035162304, + "learning_rate": 1.0294529920466245e-06, + "loss": 0.5961, + "step": 49693 + }, + { + "epoch": 0.8586881393419962, + "grad_norm": 1.6612297103512217, + "learning_rate": 1.029205686040945e-06, + "loss": 0.2122, + "step": 49694 + }, + { + "epoch": 0.8587054188554051, + "grad_norm": 1.316554220784138, + "learning_rate": 1.0289584081324733e-06, + "loss": 0.3814, + "step": 49695 + }, + { + "epoch": 0.858722698368814, + "grad_norm": 1.401001387603566, + "learning_rate": 1.0287111583219877e-06, + "loss": 0.2938, + "step": 49696 + }, + { + "epoch": 0.8587399778822228, + "grad_norm": 0.9933920615042134, + "learning_rate": 1.02846393661026e-06, + "loss": 0.2646, + "step": 49697 + }, + { + "epoch": 0.8587572573956317, + "grad_norm": 3.8182357772283693, + "learning_rate": 1.0282167429980616e-06, + "loss": 0.317, + "step": 49698 + }, + { + "epoch": 0.8587745369090406, + "grad_norm": 1.5034580813670206, + "learning_rate": 1.0279695774861743e-06, + "loss": 0.324, + "step": 49699 + }, + { + "epoch": 0.8587918164224495, + "grad_norm": 1.2691036323540712, + "learning_rate": 1.027722440075365e-06, + "loss": 0.579, + "step": 49700 + }, + { + "epoch": 0.8588090959358584, + "grad_norm": 1.0644173740616698, + "learning_rate": 1.0274753307664142e-06, + "loss": 0.3006, + "step": 49701 + }, + { + "epoch": 0.8588263754492673, + "grad_norm": 1.4996079760700398, + "learning_rate": 1.0272282495600893e-06, + "loss": 0.4083, + "step": 49702 + }, + { + "epoch": 0.8588436549626762, + "grad_norm": 1.4154769306240693, + "learning_rate": 1.0269811964571708e-06, + "loss": 0.3185, + "step": 49703 + }, + { + "epoch": 0.8588609344760851, + "grad_norm": 1.3856105904871634, + "learning_rate": 1.0267341714584244e-06, + "loss": 0.2288, + "step": 49704 + }, + { + "epoch": 0.858878213989494, + "grad_norm": 1.0338566470572035, + "learning_rate": 1.026487174564632e-06, + "loss": 0.2684, + "step": 49705 + }, + { + "epoch": 0.858895493502903, + "grad_norm": 0.7946087860111978, + "learning_rate": 1.0262402057765641e-06, + "loss": 0.8887, + "step": 49706 + }, + { + "epoch": 0.8589127730163119, + "grad_norm": 1.1558161264200681, + "learning_rate": 1.0259932650949922e-06, + "loss": 0.5138, + "step": 49707 + }, + { + "epoch": 0.8589300525297208, + "grad_norm": 2.1112217533009003, + "learning_rate": 1.0257463525206934e-06, + "loss": 0.3162, + "step": 49708 + }, + { + "epoch": 0.8589473320431297, + "grad_norm": 1.2269891407368718, + "learning_rate": 1.0254994680544373e-06, + "loss": 0.2672, + "step": 49709 + }, + { + "epoch": 0.8589646115565386, + "grad_norm": 1.9286115299301547, + "learning_rate": 1.0252526116969985e-06, + "loss": 0.4454, + "step": 49710 + }, + { + "epoch": 0.8589818910699475, + "grad_norm": 1.6119343221723987, + "learning_rate": 1.02500578344915e-06, + "loss": 0.3543, + "step": 49711 + }, + { + "epoch": 0.8589991705833564, + "grad_norm": 1.2235425797926653, + "learning_rate": 1.0247589833116678e-06, + "loss": 0.4692, + "step": 49712 + }, + { + "epoch": 0.8590164500967653, + "grad_norm": 1.4394838175007623, + "learning_rate": 1.0245122112853212e-06, + "loss": 0.4229, + "step": 49713 + }, + { + "epoch": 0.8590337296101742, + "grad_norm": 0.7722241401437069, + "learning_rate": 1.0242654673708864e-06, + "loss": 0.2547, + "step": 49714 + }, + { + "epoch": 0.8590510091235831, + "grad_norm": 1.6590316312206133, + "learning_rate": 1.0240187515691314e-06, + "loss": 0.3697, + "step": 49715 + }, + { + "epoch": 0.859068288636992, + "grad_norm": 0.8843447714126313, + "learning_rate": 1.0237720638808323e-06, + "loss": 0.2932, + "step": 49716 + }, + { + "epoch": 0.8590855681504009, + "grad_norm": 2.0903028334153717, + "learning_rate": 1.0235254043067632e-06, + "loss": 0.4481, + "step": 49717 + }, + { + "epoch": 0.8591028476638097, + "grad_norm": 1.169932511695036, + "learning_rate": 1.0232787728476933e-06, + "loss": 0.5027, + "step": 49718 + }, + { + "epoch": 0.8591201271772186, + "grad_norm": 1.3359631510845458, + "learning_rate": 1.0230321695043976e-06, + "loss": 0.4608, + "step": 49719 + }, + { + "epoch": 0.8591374066906275, + "grad_norm": 1.4964171438322944, + "learning_rate": 1.0227855942776466e-06, + "loss": 0.4943, + "step": 49720 + }, + { + "epoch": 0.8591546862040365, + "grad_norm": 1.2502560421174056, + "learning_rate": 1.0225390471682117e-06, + "loss": 0.2119, + "step": 49721 + }, + { + "epoch": 0.8591719657174454, + "grad_norm": 1.6483999522867878, + "learning_rate": 1.022292528176868e-06, + "loss": 0.4823, + "step": 49722 + }, + { + "epoch": 0.8591892452308543, + "grad_norm": 1.1213396885553921, + "learning_rate": 1.0220460373043884e-06, + "loss": 0.3769, + "step": 49723 + }, + { + "epoch": 0.8592065247442632, + "grad_norm": 2.199182034179918, + "learning_rate": 1.0217995745515396e-06, + "loss": 0.237, + "step": 49724 + }, + { + "epoch": 0.8592238042576721, + "grad_norm": 1.4884414847868808, + "learning_rate": 1.0215531399191002e-06, + "loss": 0.3189, + "step": 49725 + }, + { + "epoch": 0.859241083771081, + "grad_norm": 1.0952603972383936, + "learning_rate": 1.0213067334078364e-06, + "loss": 0.2765, + "step": 49726 + }, + { + "epoch": 0.8592583632844899, + "grad_norm": 1.1146945360356413, + "learning_rate": 1.0210603550185216e-06, + "loss": 0.3508, + "step": 49727 + }, + { + "epoch": 0.8592756427978988, + "grad_norm": 1.080535826148972, + "learning_rate": 1.020814004751931e-06, + "loss": 0.2753, + "step": 49728 + }, + { + "epoch": 0.8592929223113077, + "grad_norm": 1.4549901691398086, + "learning_rate": 1.0205676826088317e-06, + "loss": 0.3456, + "step": 49729 + }, + { + "epoch": 0.8593102018247166, + "grad_norm": 0.7719900934477157, + "learning_rate": 1.0203213885899977e-06, + "loss": 0.3335, + "step": 49730 + }, + { + "epoch": 0.8593274813381255, + "grad_norm": 1.3398292727379633, + "learning_rate": 1.020075122696198e-06, + "loss": 0.3996, + "step": 49731 + }, + { + "epoch": 0.8593447608515344, + "grad_norm": 1.4015112846607822, + "learning_rate": 1.0198288849282057e-06, + "loss": 0.4307, + "step": 49732 + }, + { + "epoch": 0.8593620403649433, + "grad_norm": 1.0426581963205268, + "learning_rate": 1.019582675286791e-06, + "loss": 0.4205, + "step": 49733 + }, + { + "epoch": 0.8593793198783523, + "grad_norm": 1.292561060229085, + "learning_rate": 1.019336493772729e-06, + "loss": 0.5805, + "step": 49734 + }, + { + "epoch": 0.8593965993917612, + "grad_norm": 1.7341830642335785, + "learning_rate": 1.0190903403867847e-06, + "loss": 0.3123, + "step": 49735 + }, + { + "epoch": 0.8594138789051701, + "grad_norm": 0.9390797044968211, + "learning_rate": 1.018844215129734e-06, + "loss": 0.3147, + "step": 49736 + }, + { + "epoch": 0.859431158418579, + "grad_norm": 0.9307074029865233, + "learning_rate": 1.0185981180023463e-06, + "loss": 0.2689, + "step": 49737 + }, + { + "epoch": 0.8594484379319879, + "grad_norm": 2.0356994863435443, + "learning_rate": 1.0183520490053878e-06, + "loss": 0.3211, + "step": 49738 + }, + { + "epoch": 0.8594657174453968, + "grad_norm": 1.3559662021524699, + "learning_rate": 1.0181060081396366e-06, + "loss": 0.2994, + "step": 49739 + }, + { + "epoch": 0.8594829969588056, + "grad_norm": 2.120935454353839, + "learning_rate": 1.0178599954058589e-06, + "loss": 0.3971, + "step": 49740 + }, + { + "epoch": 0.8595002764722145, + "grad_norm": 1.3100896782400298, + "learning_rate": 1.0176140108048272e-06, + "loss": 0.3695, + "step": 49741 + }, + { + "epoch": 0.8595175559856234, + "grad_norm": 1.4049176013766664, + "learning_rate": 1.0173680543373098e-06, + "loss": 0.3391, + "step": 49742 + }, + { + "epoch": 0.8595348354990323, + "grad_norm": 1.7716775465939933, + "learning_rate": 1.017122126004081e-06, + "loss": 0.2057, + "step": 49743 + }, + { + "epoch": 0.8595521150124412, + "grad_norm": 1.1978904490329387, + "learning_rate": 1.016876225805903e-06, + "loss": 0.3386, + "step": 49744 + }, + { + "epoch": 0.8595693945258501, + "grad_norm": 1.3224018402018367, + "learning_rate": 1.0166303537435563e-06, + "loss": 0.2093, + "step": 49745 + }, + { + "epoch": 0.859586674039259, + "grad_norm": 1.1809591257125849, + "learning_rate": 1.0163845098178037e-06, + "loss": 0.4527, + "step": 49746 + }, + { + "epoch": 0.8596039535526679, + "grad_norm": 1.2594855659749278, + "learning_rate": 1.0161386940294194e-06, + "loss": 0.3088, + "step": 49747 + }, + { + "epoch": 0.8596212330660769, + "grad_norm": 1.0860581084652303, + "learning_rate": 1.015892906379171e-06, + "loss": 0.2272, + "step": 49748 + }, + { + "epoch": 0.8596385125794858, + "grad_norm": 1.6321851853385236, + "learning_rate": 1.0156471468678276e-06, + "loss": 0.3157, + "step": 49749 + }, + { + "epoch": 0.8596557920928947, + "grad_norm": 1.3278289101448213, + "learning_rate": 1.01540141549616e-06, + "loss": 0.2986, + "step": 49750 + }, + { + "epoch": 0.8596730716063036, + "grad_norm": 1.5349960510944949, + "learning_rate": 1.015155712264938e-06, + "loss": 0.4195, + "step": 49751 + }, + { + "epoch": 0.8596903511197125, + "grad_norm": 1.3984274465651498, + "learning_rate": 1.0149100371749332e-06, + "loss": 0.4423, + "step": 49752 + }, + { + "epoch": 0.8597076306331214, + "grad_norm": 1.033085369413149, + "learning_rate": 1.0146643902269104e-06, + "loss": 0.2689, + "step": 49753 + }, + { + "epoch": 0.8597249101465303, + "grad_norm": 1.0928469649715384, + "learning_rate": 1.0144187714216447e-06, + "loss": 0.3853, + "step": 49754 + }, + { + "epoch": 0.8597421896599392, + "grad_norm": 1.5293232073455876, + "learning_rate": 1.0141731807598998e-06, + "loss": 0.2633, + "step": 49755 + }, + { + "epoch": 0.8597594691733481, + "grad_norm": 1.491428915059668, + "learning_rate": 1.0139276182424462e-06, + "loss": 0.3216, + "step": 49756 + }, + { + "epoch": 0.859776748686757, + "grad_norm": 0.994167417114844, + "learning_rate": 1.0136820838700556e-06, + "loss": 0.3787, + "step": 49757 + }, + { + "epoch": 0.8597940282001659, + "grad_norm": 1.22335457342128, + "learning_rate": 1.0134365776434974e-06, + "loss": 0.4301, + "step": 49758 + }, + { + "epoch": 0.8598113077135748, + "grad_norm": 1.0516975047202652, + "learning_rate": 1.0131910995635385e-06, + "loss": 0.3478, + "step": 49759 + }, + { + "epoch": 0.8598285872269837, + "grad_norm": 1.2111501274621201, + "learning_rate": 1.0129456496309463e-06, + "loss": 0.2396, + "step": 49760 + }, + { + "epoch": 0.8598458667403925, + "grad_norm": 1.274906269532674, + "learning_rate": 1.0127002278464914e-06, + "loss": 0.3507, + "step": 49761 + }, + { + "epoch": 0.8598631462538014, + "grad_norm": 0.9283611275997047, + "learning_rate": 1.012454834210942e-06, + "loss": 0.2692, + "step": 49762 + }, + { + "epoch": 0.8598804257672104, + "grad_norm": 0.521488189760857, + "learning_rate": 1.0122094687250694e-06, + "loss": 0.5507, + "step": 49763 + }, + { + "epoch": 0.8598977052806193, + "grad_norm": 1.1021973420382265, + "learning_rate": 1.0119641313896377e-06, + "loss": 0.2323, + "step": 49764 + }, + { + "epoch": 0.8599149847940282, + "grad_norm": 2.174473742050886, + "learning_rate": 1.0117188222054198e-06, + "loss": 0.246, + "step": 49765 + }, + { + "epoch": 0.8599322643074371, + "grad_norm": 2.2807287580765854, + "learning_rate": 1.011473541173179e-06, + "loss": 0.4103, + "step": 49766 + }, + { + "epoch": 0.859949543820846, + "grad_norm": 1.7034408303670399, + "learning_rate": 1.0112282882936863e-06, + "loss": 0.2765, + "step": 49767 + }, + { + "epoch": 0.8599668233342549, + "grad_norm": 1.1406361397504405, + "learning_rate": 1.010983063567712e-06, + "loss": 0.3604, + "step": 49768 + }, + { + "epoch": 0.8599841028476638, + "grad_norm": 1.4580295898003288, + "learning_rate": 1.01073786699602e-06, + "loss": 0.3532, + "step": 49769 + }, + { + "epoch": 0.8600013823610727, + "grad_norm": 2.2141590185979383, + "learning_rate": 1.0104926985793816e-06, + "loss": 0.3638, + "step": 49770 + }, + { + "epoch": 0.8600186618744816, + "grad_norm": 1.2031019303291717, + "learning_rate": 1.010247558318561e-06, + "loss": 0.372, + "step": 49771 + }, + { + "epoch": 0.8600359413878905, + "grad_norm": 0.8552044156438913, + "learning_rate": 1.0100024462143288e-06, + "loss": 0.3028, + "step": 49772 + }, + { + "epoch": 0.8600532209012994, + "grad_norm": 1.1990195251542655, + "learning_rate": 1.0097573622674527e-06, + "loss": 0.4573, + "step": 49773 + }, + { + "epoch": 0.8600705004147083, + "grad_norm": 1.6937929157221987, + "learning_rate": 1.0095123064787005e-06, + "loss": 0.331, + "step": 49774 + }, + { + "epoch": 0.8600877799281172, + "grad_norm": 0.9917820512268197, + "learning_rate": 1.0092672788488379e-06, + "loss": 0.4256, + "step": 49775 + }, + { + "epoch": 0.8601050594415262, + "grad_norm": 1.0315488441837388, + "learning_rate": 1.0090222793786352e-06, + "loss": 0.3703, + "step": 49776 + }, + { + "epoch": 0.8601223389549351, + "grad_norm": 1.4352671736495901, + "learning_rate": 1.0087773080688556e-06, + "loss": 0.3461, + "step": 49777 + }, + { + "epoch": 0.860139618468344, + "grad_norm": 1.7468374302206393, + "learning_rate": 1.0085323649202694e-06, + "loss": 0.3903, + "step": 49778 + }, + { + "epoch": 0.8601568979817529, + "grad_norm": 1.2520592506813921, + "learning_rate": 1.008287449933646e-06, + "loss": 0.3308, + "step": 49779 + }, + { + "epoch": 0.8601741774951618, + "grad_norm": 1.2246293542849698, + "learning_rate": 1.0080425631097457e-06, + "loss": 0.3835, + "step": 49780 + }, + { + "epoch": 0.8601914570085707, + "grad_norm": 1.657631041065701, + "learning_rate": 1.0077977044493426e-06, + "loss": 0.2736, + "step": 49781 + }, + { + "epoch": 0.8602087365219795, + "grad_norm": 1.940655017729192, + "learning_rate": 1.007552873953198e-06, + "loss": 0.5015, + "step": 49782 + }, + { + "epoch": 0.8602260160353884, + "grad_norm": 1.3920781817417016, + "learning_rate": 1.007308071622084e-06, + "loss": 0.3474, + "step": 49783 + }, + { + "epoch": 0.8602432955487973, + "grad_norm": 1.4229561919488707, + "learning_rate": 1.0070632974567596e-06, + "loss": 0.2894, + "step": 49784 + }, + { + "epoch": 0.8602605750622062, + "grad_norm": 1.214407023607584, + "learning_rate": 1.006818551458001e-06, + "loss": 0.2856, + "step": 49785 + }, + { + "epoch": 0.8602778545756151, + "grad_norm": 1.6761632277159861, + "learning_rate": 1.0065738336265686e-06, + "loss": 0.337, + "step": 49786 + }, + { + "epoch": 0.860295134089024, + "grad_norm": 1.7772159681805313, + "learning_rate": 1.0063291439632316e-06, + "loss": 0.4697, + "step": 49787 + }, + { + "epoch": 0.8603124136024329, + "grad_norm": 1.0144581795497547, + "learning_rate": 1.0060844824687555e-06, + "loss": 0.3597, + "step": 49788 + }, + { + "epoch": 0.8603296931158418, + "grad_norm": 1.1879979353696208, + "learning_rate": 1.0058398491439037e-06, + "loss": 0.2322, + "step": 49789 + }, + { + "epoch": 0.8603469726292508, + "grad_norm": 0.9887338550100024, + "learning_rate": 1.0055952439894456e-06, + "loss": 0.2613, + "step": 49790 + }, + { + "epoch": 0.8603642521426597, + "grad_norm": 1.4470727862889023, + "learning_rate": 1.0053506670061463e-06, + "loss": 0.4462, + "step": 49791 + }, + { + "epoch": 0.8603815316560686, + "grad_norm": 0.9012497477525951, + "learning_rate": 1.0051061181947753e-06, + "loss": 0.593, + "step": 49792 + }, + { + "epoch": 0.8603988111694775, + "grad_norm": 1.1525289194094421, + "learning_rate": 1.0048615975560926e-06, + "loss": 0.4604, + "step": 49793 + }, + { + "epoch": 0.8604160906828864, + "grad_norm": 1.2311948285993144, + "learning_rate": 1.0046171050908693e-06, + "loss": 0.3587, + "step": 49794 + }, + { + "epoch": 0.8604333701962953, + "grad_norm": 1.8485068361910624, + "learning_rate": 1.0043726407998667e-06, + "loss": 0.3001, + "step": 49795 + }, + { + "epoch": 0.8604506497097042, + "grad_norm": 1.0884978203313487, + "learning_rate": 1.004128204683853e-06, + "loss": 0.4337, + "step": 49796 + }, + { + "epoch": 0.8604679292231131, + "grad_norm": 1.0350764188939352, + "learning_rate": 1.0038837967435932e-06, + "loss": 0.4081, + "step": 49797 + }, + { + "epoch": 0.860485208736522, + "grad_norm": 1.3594411655195755, + "learning_rate": 1.0036394169798548e-06, + "loss": 0.4565, + "step": 49798 + }, + { + "epoch": 0.8605024882499309, + "grad_norm": 2.324925047042693, + "learning_rate": 1.0033950653934021e-06, + "loss": 0.4683, + "step": 49799 + }, + { + "epoch": 0.8605197677633398, + "grad_norm": 0.8075110841314381, + "learning_rate": 1.0031507419849974e-06, + "loss": 0.2428, + "step": 49800 + }, + { + "epoch": 0.8605370472767487, + "grad_norm": 1.750243849720388, + "learning_rate": 1.0029064467554084e-06, + "loss": 0.2708, + "step": 49801 + }, + { + "epoch": 0.8605543267901576, + "grad_norm": 0.8789274665231336, + "learning_rate": 1.0026621797053992e-06, + "loss": 0.2168, + "step": 49802 + }, + { + "epoch": 0.8605716063035664, + "grad_norm": 0.649616259412503, + "learning_rate": 1.0024179408357392e-06, + "loss": 0.6194, + "step": 49803 + }, + { + "epoch": 0.8605888858169753, + "grad_norm": 1.3008888365236924, + "learning_rate": 1.0021737301471867e-06, + "loss": 0.2843, + "step": 49804 + }, + { + "epoch": 0.8606061653303843, + "grad_norm": 1.0966712326780494, + "learning_rate": 1.0019295476405132e-06, + "loss": 0.2649, + "step": 49805 + }, + { + "epoch": 0.8606234448437932, + "grad_norm": 1.880780864625722, + "learning_rate": 1.0016853933164773e-06, + "loss": 0.3583, + "step": 49806 + }, + { + "epoch": 0.8606407243572021, + "grad_norm": 1.4800979114759103, + "learning_rate": 1.0014412671758467e-06, + "loss": 0.307, + "step": 49807 + }, + { + "epoch": 0.860658003870611, + "grad_norm": 1.233440528440686, + "learning_rate": 1.0011971692193867e-06, + "loss": 0.2091, + "step": 49808 + }, + { + "epoch": 0.8606752833840199, + "grad_norm": 1.6843222715583435, + "learning_rate": 1.0009530994478622e-06, + "loss": 0.3667, + "step": 49809 + }, + { + "epoch": 0.8606925628974288, + "grad_norm": 1.0470153103810023, + "learning_rate": 1.000709057862037e-06, + "loss": 0.2977, + "step": 49810 + }, + { + "epoch": 0.8607098424108377, + "grad_norm": 1.9299329998521118, + "learning_rate": 1.000465044462673e-06, + "loss": 0.2201, + "step": 49811 + }, + { + "epoch": 0.8607271219242466, + "grad_norm": 1.600422273884284, + "learning_rate": 1.0002210592505356e-06, + "loss": 0.3262, + "step": 49812 + }, + { + "epoch": 0.8607444014376555, + "grad_norm": 1.169947568248009, + "learning_rate": 9.999771022263904e-07, + "loss": 0.3752, + "step": 49813 + }, + { + "epoch": 0.8607616809510644, + "grad_norm": 1.6701258104779824, + "learning_rate": 9.997331733910032e-07, + "loss": 0.6477, + "step": 49814 + }, + { + "epoch": 0.8607789604644733, + "grad_norm": 1.750176759233061, + "learning_rate": 9.994892727451334e-07, + "loss": 0.6065, + "step": 49815 + }, + { + "epoch": 0.8607962399778822, + "grad_norm": 1.5176810916359889, + "learning_rate": 9.992454002895492e-07, + "loss": 0.1779, + "step": 49816 + }, + { + "epoch": 0.8608135194912911, + "grad_norm": 1.2729060726678372, + "learning_rate": 9.9900155602501e-07, + "loss": 0.3291, + "step": 49817 + }, + { + "epoch": 0.8608307990047, + "grad_norm": 1.3521285163648267, + "learning_rate": 9.987577399522819e-07, + "loss": 0.2411, + "step": 49818 + }, + { + "epoch": 0.860848078518109, + "grad_norm": 1.1469768342326543, + "learning_rate": 9.98513952072131e-07, + "loss": 0.1449, + "step": 49819 + }, + { + "epoch": 0.8608653580315179, + "grad_norm": 0.8662772350393395, + "learning_rate": 9.982701923853167e-07, + "loss": 0.6738, + "step": 49820 + }, + { + "epoch": 0.8608826375449268, + "grad_norm": 0.9371148661025794, + "learning_rate": 9.98026460892606e-07, + "loss": 0.367, + "step": 49821 + }, + { + "epoch": 0.8608999170583357, + "grad_norm": 1.396888957201867, + "learning_rate": 9.977827575947597e-07, + "loss": 0.3882, + "step": 49822 + }, + { + "epoch": 0.8609171965717446, + "grad_norm": 2.08796705560866, + "learning_rate": 9.975390824925423e-07, + "loss": 0.5169, + "step": 49823 + }, + { + "epoch": 0.8609344760851534, + "grad_norm": 1.1126160512010037, + "learning_rate": 9.972954355867137e-07, + "loss": 0.1725, + "step": 49824 + }, + { + "epoch": 0.8609517555985623, + "grad_norm": 1.0262942863955635, + "learning_rate": 9.97051816878043e-07, + "loss": 0.1901, + "step": 49825 + }, + { + "epoch": 0.8609690351119712, + "grad_norm": 2.1519597560246377, + "learning_rate": 9.968082263672895e-07, + "loss": 0.6952, + "step": 49826 + }, + { + "epoch": 0.8609863146253801, + "grad_norm": 2.0910150222710815, + "learning_rate": 9.965646640552173e-07, + "loss": 0.2796, + "step": 49827 + }, + { + "epoch": 0.861003594138789, + "grad_norm": 0.9172451416065003, + "learning_rate": 9.96321129942588e-07, + "loss": 0.2708, + "step": 49828 + }, + { + "epoch": 0.8610208736521979, + "grad_norm": 0.5386229388894375, + "learning_rate": 9.960776240301673e-07, + "loss": 0.5462, + "step": 49829 + }, + { + "epoch": 0.8610381531656068, + "grad_norm": 0.9305033489724692, + "learning_rate": 9.958341463187127e-07, + "loss": 0.3635, + "step": 49830 + }, + { + "epoch": 0.8610554326790157, + "grad_norm": 1.4446141406805797, + "learning_rate": 9.955906968089901e-07, + "loss": 0.4486, + "step": 49831 + }, + { + "epoch": 0.8610727121924246, + "grad_norm": 1.157519776484243, + "learning_rate": 9.953472755017646e-07, + "loss": 0.3996, + "step": 49832 + }, + { + "epoch": 0.8610899917058336, + "grad_norm": 1.0567102939294029, + "learning_rate": 9.951038823977933e-07, + "loss": 0.3363, + "step": 49833 + }, + { + "epoch": 0.8611072712192425, + "grad_norm": 0.7930461225386981, + "learning_rate": 9.948605174978432e-07, + "loss": 0.281, + "step": 49834 + }, + { + "epoch": 0.8611245507326514, + "grad_norm": 1.561048603947204, + "learning_rate": 9.946171808026718e-07, + "loss": 0.4155, + "step": 49835 + }, + { + "epoch": 0.8611418302460603, + "grad_norm": 1.4720290194769334, + "learning_rate": 9.943738723130435e-07, + "loss": 0.369, + "step": 49836 + }, + { + "epoch": 0.8611591097594692, + "grad_norm": 1.4554695707935543, + "learning_rate": 9.941305920297217e-07, + "loss": 0.2969, + "step": 49837 + }, + { + "epoch": 0.8611763892728781, + "grad_norm": 1.1275452880298928, + "learning_rate": 9.938873399534688e-07, + "loss": 0.2144, + "step": 49838 + }, + { + "epoch": 0.861193668786287, + "grad_norm": 1.2749701945510599, + "learning_rate": 9.93644116085043e-07, + "loss": 0.4982, + "step": 49839 + }, + { + "epoch": 0.8612109482996959, + "grad_norm": 1.4093069088572663, + "learning_rate": 9.934009204252104e-07, + "loss": 0.2361, + "step": 49840 + }, + { + "epoch": 0.8612282278131048, + "grad_norm": 0.9290677710567034, + "learning_rate": 9.931577529747282e-07, + "loss": 0.223, + "step": 49841 + }, + { + "epoch": 0.8612455073265137, + "grad_norm": 1.679535916515564, + "learning_rate": 9.929146137343615e-07, + "loss": 0.2584, + "step": 49842 + }, + { + "epoch": 0.8612627868399226, + "grad_norm": 0.7555654671133903, + "learning_rate": 9.926715027048716e-07, + "loss": 0.2035, + "step": 49843 + }, + { + "epoch": 0.8612800663533315, + "grad_norm": 1.469716953355472, + "learning_rate": 9.92428419887017e-07, + "loss": 0.4326, + "step": 49844 + }, + { + "epoch": 0.8612973458667403, + "grad_norm": 1.3135596294926213, + "learning_rate": 9.921853652815639e-07, + "loss": 0.3539, + "step": 49845 + }, + { + "epoch": 0.8613146253801492, + "grad_norm": 1.3613557458584298, + "learning_rate": 9.91942338889269e-07, + "loss": 0.3756, + "step": 49846 + }, + { + "epoch": 0.8613319048935582, + "grad_norm": 1.0027444121756224, + "learning_rate": 9.916993407108943e-07, + "loss": 0.3605, + "step": 49847 + }, + { + "epoch": 0.8613491844069671, + "grad_norm": 1.1316812041151918, + "learning_rate": 9.914563707472036e-07, + "loss": 0.27, + "step": 49848 + }, + { + "epoch": 0.861366463920376, + "grad_norm": 1.267816647110181, + "learning_rate": 9.91213428998956e-07, + "loss": 0.4133, + "step": 49849 + }, + { + "epoch": 0.8613837434337849, + "grad_norm": 1.3083923930912513, + "learning_rate": 9.909705154669136e-07, + "loss": 0.4102, + "step": 49850 + }, + { + "epoch": 0.8614010229471938, + "grad_norm": 1.8734671486036543, + "learning_rate": 9.907276301518342e-07, + "loss": 0.4225, + "step": 49851 + }, + { + "epoch": 0.8614183024606027, + "grad_norm": 1.0625150860342085, + "learning_rate": 9.90484773054481e-07, + "loss": 0.2898, + "step": 49852 + }, + { + "epoch": 0.8614355819740116, + "grad_norm": 1.2958588409680296, + "learning_rate": 9.902419441756138e-07, + "loss": 0.505, + "step": 49853 + }, + { + "epoch": 0.8614528614874205, + "grad_norm": 0.7917161938297448, + "learning_rate": 9.899991435159951e-07, + "loss": 0.5077, + "step": 49854 + }, + { + "epoch": 0.8614701410008294, + "grad_norm": 0.878899139778989, + "learning_rate": 9.897563710763825e-07, + "loss": 0.155, + "step": 49855 + }, + { + "epoch": 0.8614874205142383, + "grad_norm": 1.4913219453753606, + "learning_rate": 9.8951362685754e-07, + "loss": 0.3779, + "step": 49856 + }, + { + "epoch": 0.8615047000276472, + "grad_norm": 1.094998227734054, + "learning_rate": 9.892709108602227e-07, + "loss": 0.1609, + "step": 49857 + }, + { + "epoch": 0.8615219795410561, + "grad_norm": 0.87231666257566, + "learning_rate": 9.890282230851943e-07, + "loss": 0.2067, + "step": 49858 + }, + { + "epoch": 0.861539259054465, + "grad_norm": 0.5827355424838105, + "learning_rate": 9.887855635332134e-07, + "loss": 0.6942, + "step": 49859 + }, + { + "epoch": 0.861556538567874, + "grad_norm": 1.4787212019019917, + "learning_rate": 9.885429322050432e-07, + "loss": 0.2588, + "step": 49860 + }, + { + "epoch": 0.8615738180812829, + "grad_norm": 1.0487740827308325, + "learning_rate": 9.883003291014425e-07, + "loss": 0.4239, + "step": 49861 + }, + { + "epoch": 0.8615910975946918, + "grad_norm": 1.0933059639675338, + "learning_rate": 9.880577542231673e-07, + "loss": 0.266, + "step": 49862 + }, + { + "epoch": 0.8616083771081007, + "grad_norm": 1.0983215061374316, + "learning_rate": 9.878152075709812e-07, + "loss": 0.2494, + "step": 49863 + }, + { + "epoch": 0.8616256566215096, + "grad_norm": 2.7536143212807067, + "learning_rate": 9.875726891456405e-07, + "loss": 0.492, + "step": 49864 + }, + { + "epoch": 0.8616429361349185, + "grad_norm": 1.6653798893123994, + "learning_rate": 9.8733019894791e-07, + "loss": 0.3239, + "step": 49865 + }, + { + "epoch": 0.8616602156483273, + "grad_norm": 2.2358256337501357, + "learning_rate": 9.870877369785448e-07, + "loss": 0.3347, + "step": 49866 + }, + { + "epoch": 0.8616774951617362, + "grad_norm": 1.4371969311203485, + "learning_rate": 9.868453032383074e-07, + "loss": 0.4578, + "step": 49867 + }, + { + "epoch": 0.8616947746751451, + "grad_norm": 1.0423467043936125, + "learning_rate": 9.866028977279529e-07, + "loss": 0.6461, + "step": 49868 + }, + { + "epoch": 0.861712054188554, + "grad_norm": 1.8916314053180812, + "learning_rate": 9.863605204482462e-07, + "loss": 0.4405, + "step": 49869 + }, + { + "epoch": 0.8617293337019629, + "grad_norm": 1.297298284039018, + "learning_rate": 9.8611817139994e-07, + "loss": 0.4622, + "step": 49870 + }, + { + "epoch": 0.8617466132153718, + "grad_norm": 1.4430950558479898, + "learning_rate": 9.858758505837984e-07, + "loss": 0.3461, + "step": 49871 + }, + { + "epoch": 0.8617638927287807, + "grad_norm": 1.7652259186827415, + "learning_rate": 9.856335580005793e-07, + "loss": 0.3364, + "step": 49872 + }, + { + "epoch": 0.8617811722421896, + "grad_norm": 1.0878057665721348, + "learning_rate": 9.8539129365104e-07, + "loss": 0.2153, + "step": 49873 + }, + { + "epoch": 0.8617984517555985, + "grad_norm": 1.2584674010763899, + "learning_rate": 9.851490575359424e-07, + "loss": 0.2244, + "step": 49874 + }, + { + "epoch": 0.8618157312690075, + "grad_norm": 1.771343575051379, + "learning_rate": 9.8490684965604e-07, + "loss": 0.4089, + "step": 49875 + }, + { + "epoch": 0.8618330107824164, + "grad_norm": 1.3815123744722895, + "learning_rate": 9.846646700120954e-07, + "loss": 0.3944, + "step": 49876 + }, + { + "epoch": 0.8618502902958253, + "grad_norm": 1.2550208016502336, + "learning_rate": 9.84422518604865e-07, + "loss": 0.3201, + "step": 49877 + }, + { + "epoch": 0.8618675698092342, + "grad_norm": 1.4978439800584171, + "learning_rate": 9.841803954351115e-07, + "loss": 0.2294, + "step": 49878 + }, + { + "epoch": 0.8618848493226431, + "grad_norm": 1.3797382844931296, + "learning_rate": 9.839383005035874e-07, + "loss": 0.528, + "step": 49879 + }, + { + "epoch": 0.861902128836052, + "grad_norm": 0.7966882713298513, + "learning_rate": 9.836962338110567e-07, + "loss": 0.4432, + "step": 49880 + }, + { + "epoch": 0.8619194083494609, + "grad_norm": 1.135979615859357, + "learning_rate": 9.834541953582722e-07, + "loss": 0.3823, + "step": 49881 + }, + { + "epoch": 0.8619366878628698, + "grad_norm": 1.5106382106114826, + "learning_rate": 9.832121851459942e-07, + "loss": 0.3342, + "step": 49882 + }, + { + "epoch": 0.8619539673762787, + "grad_norm": 1.584611529800943, + "learning_rate": 9.829702031749821e-07, + "loss": 0.2198, + "step": 49883 + }, + { + "epoch": 0.8619712468896876, + "grad_norm": 0.9878578996271964, + "learning_rate": 9.827282494459923e-07, + "loss": 0.4306, + "step": 49884 + }, + { + "epoch": 0.8619885264030965, + "grad_norm": 1.0391818184119619, + "learning_rate": 9.824863239597838e-07, + "loss": 0.3135, + "step": 49885 + }, + { + "epoch": 0.8620058059165054, + "grad_norm": 0.9077739276155591, + "learning_rate": 9.822444267171115e-07, + "loss": 0.2746, + "step": 49886 + }, + { + "epoch": 0.8620230854299143, + "grad_norm": 1.9248911243735067, + "learning_rate": 9.820025577187353e-07, + "loss": 0.4538, + "step": 49887 + }, + { + "epoch": 0.8620403649433231, + "grad_norm": 2.1038239004827943, + "learning_rate": 9.81760716965412e-07, + "loss": 0.3673, + "step": 49888 + }, + { + "epoch": 0.862057644456732, + "grad_norm": 1.5887974784265573, + "learning_rate": 9.81518904457902e-07, + "loss": 0.5688, + "step": 49889 + }, + { + "epoch": 0.862074923970141, + "grad_norm": 2.1535862820145253, + "learning_rate": 9.812771201969585e-07, + "loss": 0.4136, + "step": 49890 + }, + { + "epoch": 0.8620922034835499, + "grad_norm": 1.490501922902793, + "learning_rate": 9.810353641833415e-07, + "loss": 0.2004, + "step": 49891 + }, + { + "epoch": 0.8621094829969588, + "grad_norm": 1.154857788065866, + "learning_rate": 9.807936364178062e-07, + "loss": 0.465, + "step": 49892 + }, + { + "epoch": 0.8621267625103677, + "grad_norm": 1.1567059025153608, + "learning_rate": 9.805519369011096e-07, + "loss": 0.3862, + "step": 49893 + }, + { + "epoch": 0.8621440420237766, + "grad_norm": 2.3455939058640296, + "learning_rate": 9.803102656340124e-07, + "loss": 0.3136, + "step": 49894 + }, + { + "epoch": 0.8621613215371855, + "grad_norm": 1.7261769557205056, + "learning_rate": 9.800686226172662e-07, + "loss": 0.6526, + "step": 49895 + }, + { + "epoch": 0.8621786010505944, + "grad_norm": 1.0868606194454442, + "learning_rate": 9.798270078516336e-07, + "loss": 0.2713, + "step": 49896 + }, + { + "epoch": 0.8621958805640033, + "grad_norm": 2.4676520470464607, + "learning_rate": 9.795854213378665e-07, + "loss": 0.2838, + "step": 49897 + }, + { + "epoch": 0.8622131600774122, + "grad_norm": 1.976766590906343, + "learning_rate": 9.793438630767216e-07, + "loss": 0.3361, + "step": 49898 + }, + { + "epoch": 0.8622304395908211, + "grad_norm": 1.2850707738721956, + "learning_rate": 9.791023330689586e-07, + "loss": 0.3585, + "step": 49899 + }, + { + "epoch": 0.86224771910423, + "grad_norm": 1.6327916804720823, + "learning_rate": 9.788608313153347e-07, + "loss": 0.3469, + "step": 49900 + }, + { + "epoch": 0.862264998617639, + "grad_norm": 1.005976386891685, + "learning_rate": 9.786193578166025e-07, + "loss": 0.3236, + "step": 49901 + }, + { + "epoch": 0.8622822781310479, + "grad_norm": 1.666694961437778, + "learning_rate": 9.783779125735215e-07, + "loss": 0.3159, + "step": 49902 + }, + { + "epoch": 0.8622995576444568, + "grad_norm": 0.9447745052188383, + "learning_rate": 9.781364955868466e-07, + "loss": 0.1238, + "step": 49903 + }, + { + "epoch": 0.8623168371578657, + "grad_norm": 1.030169573702543, + "learning_rate": 9.778951068573316e-07, + "loss": 0.2484, + "step": 49904 + }, + { + "epoch": 0.8623341166712746, + "grad_norm": 1.3476349885481418, + "learning_rate": 9.776537463857349e-07, + "loss": 0.3528, + "step": 49905 + }, + { + "epoch": 0.8623513961846835, + "grad_norm": 0.9082634021477917, + "learning_rate": 9.774124141728126e-07, + "loss": 0.3275, + "step": 49906 + }, + { + "epoch": 0.8623686756980924, + "grad_norm": 1.6108967061013724, + "learning_rate": 9.771711102193216e-07, + "loss": 0.3919, + "step": 49907 + }, + { + "epoch": 0.8623859552115013, + "grad_norm": 1.1071936521038899, + "learning_rate": 9.769298345260159e-07, + "loss": 0.6685, + "step": 49908 + }, + { + "epoch": 0.8624032347249101, + "grad_norm": 1.0825072419882353, + "learning_rate": 9.766885870936527e-07, + "loss": 0.3664, + "step": 49909 + }, + { + "epoch": 0.862420514238319, + "grad_norm": 1.1168631863865994, + "learning_rate": 9.764473679229835e-07, + "loss": 0.3482, + "step": 49910 + }, + { + "epoch": 0.8624377937517279, + "grad_norm": 1.5619496876094026, + "learning_rate": 9.76206177014769e-07, + "loss": 0.3035, + "step": 49911 + }, + { + "epoch": 0.8624550732651368, + "grad_norm": 2.3088678663861675, + "learning_rate": 9.75965014369763e-07, + "loss": 0.3533, + "step": 49912 + }, + { + "epoch": 0.8624723527785457, + "grad_norm": 1.599854949726727, + "learning_rate": 9.757238799887193e-07, + "loss": 0.6838, + "step": 49913 + }, + { + "epoch": 0.8624896322919546, + "grad_norm": 2.1227893015354415, + "learning_rate": 9.75482773872396e-07, + "loss": 0.3645, + "step": 49914 + }, + { + "epoch": 0.8625069118053635, + "grad_norm": 1.033150304260223, + "learning_rate": 9.75241696021544e-07, + "loss": 0.2349, + "step": 49915 + }, + { + "epoch": 0.8625241913187724, + "grad_norm": 1.1264111142067015, + "learning_rate": 9.75000646436921e-07, + "loss": 0.2842, + "step": 49916 + }, + { + "epoch": 0.8625414708321814, + "grad_norm": 0.9230988097735051, + "learning_rate": 9.747596251192825e-07, + "loss": 0.3397, + "step": 49917 + }, + { + "epoch": 0.8625587503455903, + "grad_norm": 1.8075314600556744, + "learning_rate": 9.745186320693845e-07, + "loss": 0.5329, + "step": 49918 + }, + { + "epoch": 0.8625760298589992, + "grad_norm": 1.1608875856027816, + "learning_rate": 9.742776672879771e-07, + "loss": 0.3024, + "step": 49919 + }, + { + "epoch": 0.8625933093724081, + "grad_norm": 1.0898307929893682, + "learning_rate": 9.740367307758214e-07, + "loss": 0.3025, + "step": 49920 + }, + { + "epoch": 0.862610588885817, + "grad_norm": 2.0014882604969846, + "learning_rate": 9.737958225336663e-07, + "loss": 0.5337, + "step": 49921 + }, + { + "epoch": 0.8626278683992259, + "grad_norm": 1.6027351087247625, + "learning_rate": 9.735549425622682e-07, + "loss": 0.3521, + "step": 49922 + }, + { + "epoch": 0.8626451479126348, + "grad_norm": 0.9581848883012436, + "learning_rate": 9.733140908623839e-07, + "loss": 0.4451, + "step": 49923 + }, + { + "epoch": 0.8626624274260437, + "grad_norm": 1.0802744060272187, + "learning_rate": 9.730732674347642e-07, + "loss": 0.3531, + "step": 49924 + }, + { + "epoch": 0.8626797069394526, + "grad_norm": 1.5504842206716478, + "learning_rate": 9.728324722801664e-07, + "loss": 0.3182, + "step": 49925 + }, + { + "epoch": 0.8626969864528615, + "grad_norm": 1.204870738436856, + "learning_rate": 9.725917053993416e-07, + "loss": 0.3147, + "step": 49926 + }, + { + "epoch": 0.8627142659662704, + "grad_norm": 1.5183394478367016, + "learning_rate": 9.723509667930465e-07, + "loss": 0.3596, + "step": 49927 + }, + { + "epoch": 0.8627315454796793, + "grad_norm": 2.3838940111130364, + "learning_rate": 9.721102564620333e-07, + "loss": 0.4813, + "step": 49928 + }, + { + "epoch": 0.8627488249930882, + "grad_norm": 1.285846202043292, + "learning_rate": 9.718695744070594e-07, + "loss": 0.2293, + "step": 49929 + }, + { + "epoch": 0.862766104506497, + "grad_norm": 0.9275131775754948, + "learning_rate": 9.716289206288743e-07, + "loss": 0.3546, + "step": 49930 + }, + { + "epoch": 0.862783384019906, + "grad_norm": 0.9233429628537668, + "learning_rate": 9.71388295128235e-07, + "loss": 0.2563, + "step": 49931 + }, + { + "epoch": 0.8628006635333149, + "grad_norm": 0.9796430721204299, + "learning_rate": 9.71147697905892e-07, + "loss": 0.2267, + "step": 49932 + }, + { + "epoch": 0.8628179430467238, + "grad_norm": 1.407437099023944, + "learning_rate": 9.709071289626003e-07, + "loss": 0.5327, + "step": 49933 + }, + { + "epoch": 0.8628352225601327, + "grad_norm": 1.5193082595870755, + "learning_rate": 9.706665882991161e-07, + "loss": 0.2764, + "step": 49934 + }, + { + "epoch": 0.8628525020735416, + "grad_norm": 1.0955436083258445, + "learning_rate": 9.704260759161876e-07, + "loss": 0.3891, + "step": 49935 + }, + { + "epoch": 0.8628697815869505, + "grad_norm": 1.3221773224791513, + "learning_rate": 9.701855918145742e-07, + "loss": 0.2464, + "step": 49936 + }, + { + "epoch": 0.8628870611003594, + "grad_norm": 1.1057157474468982, + "learning_rate": 9.69945135995023e-07, + "loss": 0.474, + "step": 49937 + }, + { + "epoch": 0.8629043406137683, + "grad_norm": 1.6380577162578733, + "learning_rate": 9.697047084582888e-07, + "loss": 0.4086, + "step": 49938 + }, + { + "epoch": 0.8629216201271772, + "grad_norm": 1.8412826744923945, + "learning_rate": 9.69464309205127e-07, + "loss": 0.3548, + "step": 49939 + }, + { + "epoch": 0.8629388996405861, + "grad_norm": 1.2256994775968406, + "learning_rate": 9.69223938236291e-07, + "loss": 0.4196, + "step": 49940 + }, + { + "epoch": 0.862956179153995, + "grad_norm": 1.7259847190097963, + "learning_rate": 9.689835955525307e-07, + "loss": 0.5139, + "step": 49941 + }, + { + "epoch": 0.8629734586674039, + "grad_norm": 1.3573830247541725, + "learning_rate": 9.687432811546006e-07, + "loss": 0.245, + "step": 49942 + }, + { + "epoch": 0.8629907381808128, + "grad_norm": 0.9648658626092954, + "learning_rate": 9.685029950432534e-07, + "loss": 0.284, + "step": 49943 + }, + { + "epoch": 0.8630080176942218, + "grad_norm": 1.3956557528301323, + "learning_rate": 9.6826273721924e-07, + "loss": 0.2308, + "step": 49944 + }, + { + "epoch": 0.8630252972076307, + "grad_norm": 3.15968663281441, + "learning_rate": 9.680225076833139e-07, + "loss": 0.2498, + "step": 49945 + }, + { + "epoch": 0.8630425767210396, + "grad_norm": 1.2310300591278929, + "learning_rate": 9.677823064362269e-07, + "loss": 0.4178, + "step": 49946 + }, + { + "epoch": 0.8630598562344485, + "grad_norm": 2.285732708693666, + "learning_rate": 9.675421334787349e-07, + "loss": 0.3353, + "step": 49947 + }, + { + "epoch": 0.8630771357478574, + "grad_norm": 2.132881270765794, + "learning_rate": 9.673019888115842e-07, + "loss": 0.4291, + "step": 49948 + }, + { + "epoch": 0.8630944152612663, + "grad_norm": 1.6336909092093221, + "learning_rate": 9.67061872435533e-07, + "loss": 0.709, + "step": 49949 + }, + { + "epoch": 0.8631116947746752, + "grad_norm": 1.0717710714471838, + "learning_rate": 9.668217843513294e-07, + "loss": 0.3112, + "step": 49950 + }, + { + "epoch": 0.863128974288084, + "grad_norm": 1.4500886322261264, + "learning_rate": 9.665817245597253e-07, + "loss": 0.37, + "step": 49951 + }, + { + "epoch": 0.8631462538014929, + "grad_norm": 0.8838861701368909, + "learning_rate": 9.663416930614744e-07, + "loss": 0.3449, + "step": 49952 + }, + { + "epoch": 0.8631635333149018, + "grad_norm": 1.4133028286742089, + "learning_rate": 9.661016898573295e-07, + "loss": 0.3452, + "step": 49953 + }, + { + "epoch": 0.8631808128283107, + "grad_norm": 1.619900978175162, + "learning_rate": 9.6586171494804e-07, + "loss": 0.358, + "step": 49954 + }, + { + "epoch": 0.8631980923417196, + "grad_norm": 1.8032923044165563, + "learning_rate": 9.656217683343573e-07, + "loss": 0.5101, + "step": 49955 + }, + { + "epoch": 0.8632153718551285, + "grad_norm": 1.250783286094078, + "learning_rate": 9.653818500170331e-07, + "loss": 0.393, + "step": 49956 + }, + { + "epoch": 0.8632326513685374, + "grad_norm": 1.338666335936583, + "learning_rate": 9.651419599968192e-07, + "loss": 0.2646, + "step": 49957 + }, + { + "epoch": 0.8632499308819463, + "grad_norm": 2.263499037956743, + "learning_rate": 9.649020982744705e-07, + "loss": 0.2925, + "step": 49958 + }, + { + "epoch": 0.8632672103953553, + "grad_norm": 1.6589854971094526, + "learning_rate": 9.64662264850732e-07, + "loss": 0.3273, + "step": 49959 + }, + { + "epoch": 0.8632844899087642, + "grad_norm": 1.5267585509367898, + "learning_rate": 9.644224597263607e-07, + "loss": 0.2353, + "step": 49960 + }, + { + "epoch": 0.8633017694221731, + "grad_norm": 1.9790820135688731, + "learning_rate": 9.641826829021029e-07, + "loss": 0.1871, + "step": 49961 + }, + { + "epoch": 0.863319048935582, + "grad_norm": 1.4880982092317117, + "learning_rate": 9.63942934378711e-07, + "loss": 0.5892, + "step": 49962 + }, + { + "epoch": 0.8633363284489909, + "grad_norm": 1.559428801889759, + "learning_rate": 9.63703214156937e-07, + "loss": 0.2075, + "step": 49963 + }, + { + "epoch": 0.8633536079623998, + "grad_norm": 1.9429235156406928, + "learning_rate": 9.634635222375332e-07, + "loss": 0.4177, + "step": 49964 + }, + { + "epoch": 0.8633708874758087, + "grad_norm": 1.6328432047563726, + "learning_rate": 9.632238586212484e-07, + "loss": 0.2368, + "step": 49965 + }, + { + "epoch": 0.8633881669892176, + "grad_norm": 1.342907859909988, + "learning_rate": 9.629842233088316e-07, + "loss": 0.5828, + "step": 49966 + }, + { + "epoch": 0.8634054465026265, + "grad_norm": 1.2103662591266748, + "learning_rate": 9.627446163010346e-07, + "loss": 0.301, + "step": 49967 + }, + { + "epoch": 0.8634227260160354, + "grad_norm": 2.483273785930838, + "learning_rate": 9.625050375986078e-07, + "loss": 0.3644, + "step": 49968 + }, + { + "epoch": 0.8634400055294443, + "grad_norm": 1.135827616222994, + "learning_rate": 9.62265487202304e-07, + "loss": 0.2841, + "step": 49969 + }, + { + "epoch": 0.8634572850428532, + "grad_norm": 1.48676143163837, + "learning_rate": 9.620259651128704e-07, + "loss": 0.2975, + "step": 49970 + }, + { + "epoch": 0.8634745645562621, + "grad_norm": 1.8249973569027884, + "learning_rate": 9.617864713310599e-07, + "loss": 0.5034, + "step": 49971 + }, + { + "epoch": 0.8634918440696709, + "grad_norm": 1.386590417826904, + "learning_rate": 9.615470058576193e-07, + "loss": 0.356, + "step": 49972 + }, + { + "epoch": 0.8635091235830799, + "grad_norm": 1.794138146810369, + "learning_rate": 9.613075686933004e-07, + "loss": 0.31, + "step": 49973 + }, + { + "epoch": 0.8635264030964888, + "grad_norm": 1.158606360696675, + "learning_rate": 9.610681598388549e-07, + "loss": 0.4375, + "step": 49974 + }, + { + "epoch": 0.8635436826098977, + "grad_norm": 1.0551839510960306, + "learning_rate": 9.608287792950289e-07, + "loss": 0.241, + "step": 49975 + }, + { + "epoch": 0.8635609621233066, + "grad_norm": 1.3891178263811848, + "learning_rate": 9.605894270625759e-07, + "loss": 0.4143, + "step": 49976 + }, + { + "epoch": 0.8635782416367155, + "grad_norm": 1.0211939445268865, + "learning_rate": 9.603501031422423e-07, + "loss": 0.3, + "step": 49977 + }, + { + "epoch": 0.8635955211501244, + "grad_norm": 0.9996180562816491, + "learning_rate": 9.601108075347788e-07, + "loss": 0.165, + "step": 49978 + }, + { + "epoch": 0.8636128006635333, + "grad_norm": 2.3964067875722055, + "learning_rate": 9.598715402409354e-07, + "loss": 0.2868, + "step": 49979 + }, + { + "epoch": 0.8636300801769422, + "grad_norm": 1.923180793155321, + "learning_rate": 9.596323012614628e-07, + "loss": 0.514, + "step": 49980 + }, + { + "epoch": 0.8636473596903511, + "grad_norm": 1.0328966583789072, + "learning_rate": 9.593930905971083e-07, + "loss": 0.3418, + "step": 49981 + }, + { + "epoch": 0.86366463920376, + "grad_norm": 1.2159514084702385, + "learning_rate": 9.591539082486223e-07, + "loss": 0.3573, + "step": 49982 + }, + { + "epoch": 0.8636819187171689, + "grad_norm": 1.5103823964650547, + "learning_rate": 9.589147542167521e-07, + "loss": 0.381, + "step": 49983 + }, + { + "epoch": 0.8636991982305778, + "grad_norm": 1.0674027858422857, + "learning_rate": 9.586756285022492e-07, + "loss": 0.4041, + "step": 49984 + }, + { + "epoch": 0.8637164777439867, + "grad_norm": 0.9935853419361461, + "learning_rate": 9.584365311058597e-07, + "loss": 0.2547, + "step": 49985 + }, + { + "epoch": 0.8637337572573957, + "grad_norm": 1.500276197287007, + "learning_rate": 9.58197462028334e-07, + "loss": 0.3908, + "step": 49986 + }, + { + "epoch": 0.8637510367708046, + "grad_norm": 1.0644101243009099, + "learning_rate": 9.57958421270424e-07, + "loss": 0.5865, + "step": 49987 + }, + { + "epoch": 0.8637683162842135, + "grad_norm": 1.2699112549226494, + "learning_rate": 9.577194088328723e-07, + "loss": 0.2142, + "step": 49988 + }, + { + "epoch": 0.8637855957976224, + "grad_norm": 1.7488260821772414, + "learning_rate": 9.574804247164326e-07, + "loss": 0.3955, + "step": 49989 + }, + { + "epoch": 0.8638028753110313, + "grad_norm": 1.5321051122282099, + "learning_rate": 9.5724146892185e-07, + "loss": 0.3129, + "step": 49990 + }, + { + "epoch": 0.8638201548244402, + "grad_norm": 1.712493120717652, + "learning_rate": 9.570025414498741e-07, + "loss": 0.4467, + "step": 49991 + }, + { + "epoch": 0.8638374343378491, + "grad_norm": 0.9981163937397071, + "learning_rate": 9.567636423012538e-07, + "loss": 0.7939, + "step": 49992 + }, + { + "epoch": 0.8638547138512579, + "grad_norm": 1.187966073008324, + "learning_rate": 9.56524771476739e-07, + "loss": 0.4702, + "step": 49993 + }, + { + "epoch": 0.8638719933646668, + "grad_norm": 1.928140006318839, + "learning_rate": 9.562859289770754e-07, + "loss": 0.4896, + "step": 49994 + }, + { + "epoch": 0.8638892728780757, + "grad_norm": 1.1568103079171086, + "learning_rate": 9.560471148030093e-07, + "loss": 0.2875, + "step": 49995 + }, + { + "epoch": 0.8639065523914846, + "grad_norm": 0.7647977995791414, + "learning_rate": 9.558083289552922e-07, + "loss": 0.7228, + "step": 49996 + }, + { + "epoch": 0.8639238319048935, + "grad_norm": 0.7707004640369178, + "learning_rate": 9.555695714346692e-07, + "loss": 0.7769, + "step": 49997 + }, + { + "epoch": 0.8639411114183024, + "grad_norm": 2.427842581408912, + "learning_rate": 9.553308422418928e-07, + "loss": 0.342, + "step": 49998 + }, + { + "epoch": 0.8639583909317113, + "grad_norm": 1.408959194165482, + "learning_rate": 9.550921413777049e-07, + "loss": 0.2996, + "step": 49999 + }, + { + "epoch": 0.8639756704451202, + "grad_norm": 1.1905094117026724, + "learning_rate": 9.548534688428579e-07, + "loss": 0.2699, + "step": 50000 + }, + { + "epoch": 0.8639929499585292, + "grad_norm": 1.0957659282797034, + "learning_rate": 9.546148246380959e-07, + "loss": 0.4211, + "step": 50001 + }, + { + "epoch": 0.8640102294719381, + "grad_norm": 2.122165897749094, + "learning_rate": 9.543762087641673e-07, + "loss": 0.4806, + "step": 50002 + }, + { + "epoch": 0.864027508985347, + "grad_norm": 1.2065439926042716, + "learning_rate": 9.5413762122182e-07, + "loss": 0.2998, + "step": 50003 + }, + { + "epoch": 0.8640447884987559, + "grad_norm": 1.0167177963407623, + "learning_rate": 9.538990620118026e-07, + "loss": 0.3717, + "step": 50004 + }, + { + "epoch": 0.8640620680121648, + "grad_norm": 1.9563288316058713, + "learning_rate": 9.536605311348612e-07, + "loss": 0.4725, + "step": 50005 + }, + { + "epoch": 0.8640793475255737, + "grad_norm": 1.9709366012438, + "learning_rate": 9.534220285917406e-07, + "loss": 0.2513, + "step": 50006 + }, + { + "epoch": 0.8640966270389826, + "grad_norm": 1.1661253055870868, + "learning_rate": 9.531835543831902e-07, + "loss": 0.3039, + "step": 50007 + }, + { + "epoch": 0.8641139065523915, + "grad_norm": 0.9991411514040365, + "learning_rate": 9.529451085099551e-07, + "loss": 0.2349, + "step": 50008 + }, + { + "epoch": 0.8641311860658004, + "grad_norm": 1.5477279870848977, + "learning_rate": 9.527066909727867e-07, + "loss": 0.4201, + "step": 50009 + }, + { + "epoch": 0.8641484655792093, + "grad_norm": 1.8907546944123568, + "learning_rate": 9.524683017724256e-07, + "loss": 0.3847, + "step": 50010 + }, + { + "epoch": 0.8641657450926182, + "grad_norm": 1.6872853002186823, + "learning_rate": 9.522299409096236e-07, + "loss": 0.1637, + "step": 50011 + }, + { + "epoch": 0.8641830246060271, + "grad_norm": 1.680405445953832, + "learning_rate": 9.519916083851233e-07, + "loss": 0.3827, + "step": 50012 + }, + { + "epoch": 0.864200304119436, + "grad_norm": 1.5235799515355364, + "learning_rate": 9.517533041996729e-07, + "loss": 0.1993, + "step": 50013 + }, + { + "epoch": 0.864217583632845, + "grad_norm": 1.682301457149263, + "learning_rate": 9.515150283540187e-07, + "loss": 0.367, + "step": 50014 + }, + { + "epoch": 0.8642348631462538, + "grad_norm": 1.176289291343724, + "learning_rate": 9.512767808489088e-07, + "loss": 0.3671, + "step": 50015 + }, + { + "epoch": 0.8642521426596627, + "grad_norm": 1.1894773034546875, + "learning_rate": 9.510385616850881e-07, + "loss": 0.2625, + "step": 50016 + }, + { + "epoch": 0.8642694221730716, + "grad_norm": 1.568062142638528, + "learning_rate": 9.508003708633007e-07, + "loss": 0.4312, + "step": 50017 + }, + { + "epoch": 0.8642867016864805, + "grad_norm": 1.2716304954429014, + "learning_rate": 9.505622083842936e-07, + "loss": 0.3902, + "step": 50018 + }, + { + "epoch": 0.8643039811998894, + "grad_norm": 1.1559433566040658, + "learning_rate": 9.50324074248814e-07, + "loss": 0.2926, + "step": 50019 + }, + { + "epoch": 0.8643212607132983, + "grad_norm": 1.1936551009273522, + "learning_rate": 9.50085968457608e-07, + "loss": 0.4578, + "step": 50020 + }, + { + "epoch": 0.8643385402267072, + "grad_norm": 1.3357424985121706, + "learning_rate": 9.498478910114195e-07, + "loss": 0.4568, + "step": 50021 + }, + { + "epoch": 0.8643558197401161, + "grad_norm": 1.697104783960137, + "learning_rate": 9.496098419109967e-07, + "loss": 0.3365, + "step": 50022 + }, + { + "epoch": 0.864373099253525, + "grad_norm": 1.6013188281882065, + "learning_rate": 9.493718211570824e-07, + "loss": 0.2807, + "step": 50023 + }, + { + "epoch": 0.8643903787669339, + "grad_norm": 1.5781679488603984, + "learning_rate": 9.491338287504249e-07, + "loss": 0.4784, + "step": 50024 + }, + { + "epoch": 0.8644076582803428, + "grad_norm": 1.185939215933873, + "learning_rate": 9.488958646917656e-07, + "loss": 0.2754, + "step": 50025 + }, + { + "epoch": 0.8644249377937517, + "grad_norm": 1.754182258327204, + "learning_rate": 9.486579289818531e-07, + "loss": 0.406, + "step": 50026 + }, + { + "epoch": 0.8644422173071606, + "grad_norm": 2.2065927241482806, + "learning_rate": 9.484200216214334e-07, + "loss": 0.3519, + "step": 50027 + }, + { + "epoch": 0.8644594968205696, + "grad_norm": 1.4275087324985443, + "learning_rate": 9.481821426112481e-07, + "loss": 0.4855, + "step": 50028 + }, + { + "epoch": 0.8644767763339785, + "grad_norm": 1.6934195971341888, + "learning_rate": 9.479442919520465e-07, + "loss": 0.2515, + "step": 50029 + }, + { + "epoch": 0.8644940558473874, + "grad_norm": 0.7900772689928757, + "learning_rate": 9.477064696445692e-07, + "loss": 0.3159, + "step": 50030 + }, + { + "epoch": 0.8645113353607963, + "grad_norm": 1.1112943356184193, + "learning_rate": 9.474686756895635e-07, + "loss": 0.3506, + "step": 50031 + }, + { + "epoch": 0.8645286148742052, + "grad_norm": 1.5224890811033878, + "learning_rate": 9.472309100877731e-07, + "loss": 0.1356, + "step": 50032 + }, + { + "epoch": 0.8645458943876141, + "grad_norm": 0.9406599750186256, + "learning_rate": 9.469931728399461e-07, + "loss": 0.3401, + "step": 50033 + }, + { + "epoch": 0.864563173901023, + "grad_norm": 0.6530544547952828, + "learning_rate": 9.467554639468224e-07, + "loss": 0.7113, + "step": 50034 + }, + { + "epoch": 0.8645804534144319, + "grad_norm": 1.0588601459851652, + "learning_rate": 9.465177834091499e-07, + "loss": 0.2588, + "step": 50035 + }, + { + "epoch": 0.8645977329278407, + "grad_norm": 0.6711318354299696, + "learning_rate": 9.462801312276704e-07, + "loss": 0.1377, + "step": 50036 + }, + { + "epoch": 0.8646150124412496, + "grad_norm": 1.681328394022805, + "learning_rate": 9.460425074031298e-07, + "loss": 0.4512, + "step": 50037 + }, + { + "epoch": 0.8646322919546585, + "grad_norm": 1.6222018826761195, + "learning_rate": 9.458049119362745e-07, + "loss": 0.3326, + "step": 50038 + }, + { + "epoch": 0.8646495714680674, + "grad_norm": 1.0502137197819752, + "learning_rate": 9.455673448278435e-07, + "loss": 0.3948, + "step": 50039 + }, + { + "epoch": 0.8646668509814763, + "grad_norm": 1.225238819912342, + "learning_rate": 9.453298060785854e-07, + "loss": 0.3629, + "step": 50040 + }, + { + "epoch": 0.8646841304948852, + "grad_norm": 1.0889891014300142, + "learning_rate": 9.450922956892405e-07, + "loss": 0.2352, + "step": 50041 + }, + { + "epoch": 0.8647014100082941, + "grad_norm": 0.8329785122107747, + "learning_rate": 9.448548136605562e-07, + "loss": 0.3934, + "step": 50042 + }, + { + "epoch": 0.864718689521703, + "grad_norm": 2.006385622788041, + "learning_rate": 9.446173599932729e-07, + "loss": 0.3376, + "step": 50043 + }, + { + "epoch": 0.864735969035112, + "grad_norm": 1.5101270381149392, + "learning_rate": 9.443799346881388e-07, + "loss": 0.5795, + "step": 50044 + }, + { + "epoch": 0.8647532485485209, + "grad_norm": 1.4584501670993084, + "learning_rate": 9.441425377458935e-07, + "loss": 0.3154, + "step": 50045 + }, + { + "epoch": 0.8647705280619298, + "grad_norm": 0.9872685967660347, + "learning_rate": 9.43905169167284e-07, + "loss": 0.2837, + "step": 50046 + }, + { + "epoch": 0.8647878075753387, + "grad_norm": 1.3257058330012852, + "learning_rate": 9.436678289530488e-07, + "loss": 0.2799, + "step": 50047 + }, + { + "epoch": 0.8648050870887476, + "grad_norm": 1.0913952459042549, + "learning_rate": 9.434305171039349e-07, + "loss": 0.6761, + "step": 50048 + }, + { + "epoch": 0.8648223666021565, + "grad_norm": 0.9710057802114586, + "learning_rate": 9.431932336206873e-07, + "loss": 0.4349, + "step": 50049 + }, + { + "epoch": 0.8648396461155654, + "grad_norm": 2.081665679021962, + "learning_rate": 9.429559785040443e-07, + "loss": 0.3551, + "step": 50050 + }, + { + "epoch": 0.8648569256289743, + "grad_norm": 1.5236029168074714, + "learning_rate": 9.427187517547531e-07, + "loss": 0.3028, + "step": 50051 + }, + { + "epoch": 0.8648742051423832, + "grad_norm": 1.3073251215102115, + "learning_rate": 9.424815533735543e-07, + "loss": 0.3661, + "step": 50052 + }, + { + "epoch": 0.8648914846557921, + "grad_norm": 1.0964018938039437, + "learning_rate": 9.422443833611905e-07, + "loss": 0.4263, + "step": 50053 + }, + { + "epoch": 0.864908764169201, + "grad_norm": 0.5883753140258601, + "learning_rate": 9.420072417184056e-07, + "loss": 0.7227, + "step": 50054 + }, + { + "epoch": 0.86492604368261, + "grad_norm": 1.7311617402097543, + "learning_rate": 9.417701284459446e-07, + "loss": 0.413, + "step": 50055 + }, + { + "epoch": 0.8649433231960189, + "grad_norm": 0.9403415045118051, + "learning_rate": 9.41533043544548e-07, + "loss": 0.2869, + "step": 50056 + }, + { + "epoch": 0.8649606027094277, + "grad_norm": 1.4620932756882015, + "learning_rate": 9.412959870149574e-07, + "loss": 0.5816, + "step": 50057 + }, + { + "epoch": 0.8649778822228366, + "grad_norm": 1.1594056569216598, + "learning_rate": 9.410589588579144e-07, + "loss": 0.4109, + "step": 50058 + }, + { + "epoch": 0.8649951617362455, + "grad_norm": 1.3898330251102597, + "learning_rate": 9.408219590741641e-07, + "loss": 0.3214, + "step": 50059 + }, + { + "epoch": 0.8650124412496544, + "grad_norm": 1.10036840312358, + "learning_rate": 9.405849876644501e-07, + "loss": 0.4861, + "step": 50060 + }, + { + "epoch": 0.8650297207630633, + "grad_norm": 1.3012931185318546, + "learning_rate": 9.403480446295099e-07, + "loss": 0.4693, + "step": 50061 + }, + { + "epoch": 0.8650470002764722, + "grad_norm": 1.5519267011596565, + "learning_rate": 9.401111299700905e-07, + "loss": 0.2187, + "step": 50062 + }, + { + "epoch": 0.8650642797898811, + "grad_norm": 2.3904273731672125, + "learning_rate": 9.398742436869301e-07, + "loss": 0.4243, + "step": 50063 + }, + { + "epoch": 0.86508155930329, + "grad_norm": 1.275213971591684, + "learning_rate": 9.396373857807728e-07, + "loss": 0.2869, + "step": 50064 + }, + { + "epoch": 0.8650988388166989, + "grad_norm": 1.536557347392348, + "learning_rate": 9.394005562523567e-07, + "loss": 0.2041, + "step": 50065 + }, + { + "epoch": 0.8651161183301078, + "grad_norm": 1.1946296875421567, + "learning_rate": 9.391637551024302e-07, + "loss": 0.4257, + "step": 50066 + }, + { + "epoch": 0.8651333978435167, + "grad_norm": 1.5009001981834573, + "learning_rate": 9.389269823317315e-07, + "loss": 0.4103, + "step": 50067 + }, + { + "epoch": 0.8651506773569256, + "grad_norm": 1.4337410955159946, + "learning_rate": 9.386902379410001e-07, + "loss": 0.2457, + "step": 50068 + }, + { + "epoch": 0.8651679568703345, + "grad_norm": 0.9647287279837521, + "learning_rate": 9.38453521930982e-07, + "loss": 0.3822, + "step": 50069 + }, + { + "epoch": 0.8651852363837435, + "grad_norm": 1.8331934558802057, + "learning_rate": 9.382168343024134e-07, + "loss": 0.2271, + "step": 50070 + }, + { + "epoch": 0.8652025158971524, + "grad_norm": 1.955686569175884, + "learning_rate": 9.379801750560391e-07, + "loss": 0.3102, + "step": 50071 + }, + { + "epoch": 0.8652197954105613, + "grad_norm": 0.700488029879556, + "learning_rate": 9.377435441925998e-07, + "loss": 0.2319, + "step": 50072 + }, + { + "epoch": 0.8652370749239702, + "grad_norm": 1.3711429393386314, + "learning_rate": 9.375069417128369e-07, + "loss": 0.4913, + "step": 50073 + }, + { + "epoch": 0.8652543544373791, + "grad_norm": 1.158783278897566, + "learning_rate": 9.372703676174899e-07, + "loss": 0.2507, + "step": 50074 + }, + { + "epoch": 0.865271633950788, + "grad_norm": 1.2606414644282449, + "learning_rate": 9.370338219073028e-07, + "loss": 0.4506, + "step": 50075 + }, + { + "epoch": 0.8652889134641969, + "grad_norm": 2.1075570162398094, + "learning_rate": 9.367973045830125e-07, + "loss": 0.4083, + "step": 50076 + }, + { + "epoch": 0.8653061929776058, + "grad_norm": 1.740346444889618, + "learning_rate": 9.36560815645362e-07, + "loss": 0.3383, + "step": 50077 + }, + { + "epoch": 0.8653234724910146, + "grad_norm": 1.6597474885537553, + "learning_rate": 9.36324355095094e-07, + "loss": 0.3643, + "step": 50078 + }, + { + "epoch": 0.8653407520044235, + "grad_norm": 1.1055785123312436, + "learning_rate": 9.360879229329445e-07, + "loss": 0.2254, + "step": 50079 + }, + { + "epoch": 0.8653580315178324, + "grad_norm": 1.7098636349319805, + "learning_rate": 9.358515191596584e-07, + "loss": 0.3899, + "step": 50080 + }, + { + "epoch": 0.8653753110312413, + "grad_norm": 1.004946374227554, + "learning_rate": 9.356151437759731e-07, + "loss": 0.3364, + "step": 50081 + }, + { + "epoch": 0.8653925905446502, + "grad_norm": 0.8838370936039579, + "learning_rate": 9.353787967826289e-07, + "loss": 0.3375, + "step": 50082 + }, + { + "epoch": 0.8654098700580591, + "grad_norm": 1.147308568362008, + "learning_rate": 9.351424781803675e-07, + "loss": 0.2626, + "step": 50083 + }, + { + "epoch": 0.865427149571468, + "grad_norm": 1.1916202876126434, + "learning_rate": 9.349061879699306e-07, + "loss": 0.5155, + "step": 50084 + }, + { + "epoch": 0.865444429084877, + "grad_norm": 0.9872568935547426, + "learning_rate": 9.346699261520553e-07, + "loss": 0.2777, + "step": 50085 + }, + { + "epoch": 0.8654617085982859, + "grad_norm": 2.101073401410338, + "learning_rate": 9.344336927274844e-07, + "loss": 0.3619, + "step": 50086 + }, + { + "epoch": 0.8654789881116948, + "grad_norm": 1.8830859346011786, + "learning_rate": 9.341974876969539e-07, + "loss": 0.37, + "step": 50087 + }, + { + "epoch": 0.8654962676251037, + "grad_norm": 2.548974339298469, + "learning_rate": 9.339613110612056e-07, + "loss": 0.4534, + "step": 50088 + }, + { + "epoch": 0.8655135471385126, + "grad_norm": 1.2038510543775052, + "learning_rate": 9.33725162820982e-07, + "loss": 0.5895, + "step": 50089 + }, + { + "epoch": 0.8655308266519215, + "grad_norm": 1.0788181203209828, + "learning_rate": 9.334890429770171e-07, + "loss": 0.4547, + "step": 50090 + }, + { + "epoch": 0.8655481061653304, + "grad_norm": 1.1774982932485183, + "learning_rate": 9.33252951530057e-07, + "loss": 0.1992, + "step": 50091 + }, + { + "epoch": 0.8655653856787393, + "grad_norm": 0.7969766960161875, + "learning_rate": 9.330168884808344e-07, + "loss": 0.4534, + "step": 50092 + }, + { + "epoch": 0.8655826651921482, + "grad_norm": 0.8010025655963939, + "learning_rate": 9.327808538300931e-07, + "loss": 0.6734, + "step": 50093 + }, + { + "epoch": 0.8655999447055571, + "grad_norm": 1.7553417655327597, + "learning_rate": 9.325448475785703e-07, + "loss": 0.3948, + "step": 50094 + }, + { + "epoch": 0.865617224218966, + "grad_norm": 1.3615370753918261, + "learning_rate": 9.323088697270088e-07, + "loss": 0.4556, + "step": 50095 + }, + { + "epoch": 0.8656345037323749, + "grad_norm": 1.225247018510217, + "learning_rate": 9.320729202761424e-07, + "loss": 0.1763, + "step": 50096 + }, + { + "epoch": 0.8656517832457838, + "grad_norm": 1.1126780262480287, + "learning_rate": 9.318369992267162e-07, + "loss": 0.3045, + "step": 50097 + }, + { + "epoch": 0.8656690627591928, + "grad_norm": 2.0705722721985778, + "learning_rate": 9.316011065794627e-07, + "loss": 0.4535, + "step": 50098 + }, + { + "epoch": 0.8656863422726016, + "grad_norm": 1.5458838816771152, + "learning_rate": 9.313652423351238e-07, + "loss": 0.3114, + "step": 50099 + }, + { + "epoch": 0.8657036217860105, + "grad_norm": 0.7974006619206696, + "learning_rate": 9.311294064944398e-07, + "loss": 0.4019, + "step": 50100 + }, + { + "epoch": 0.8657209012994194, + "grad_norm": 1.4099466551054622, + "learning_rate": 9.308935990581458e-07, + "loss": 0.3813, + "step": 50101 + }, + { + "epoch": 0.8657381808128283, + "grad_norm": 0.8589827707847844, + "learning_rate": 9.306578200269845e-07, + "loss": 0.6306, + "step": 50102 + }, + { + "epoch": 0.8657554603262372, + "grad_norm": 1.8794123107892717, + "learning_rate": 9.304220694016896e-07, + "loss": 0.1804, + "step": 50103 + }, + { + "epoch": 0.8657727398396461, + "grad_norm": 1.0020855232593342, + "learning_rate": 9.301863471830041e-07, + "loss": 0.4524, + "step": 50104 + }, + { + "epoch": 0.865790019353055, + "grad_norm": 2.294152431796899, + "learning_rate": 9.299506533716618e-07, + "loss": 0.3002, + "step": 50105 + }, + { + "epoch": 0.8658072988664639, + "grad_norm": 0.887864974435265, + "learning_rate": 9.297149879684053e-07, + "loss": 0.4091, + "step": 50106 + }, + { + "epoch": 0.8658245783798728, + "grad_norm": 1.4962880644119798, + "learning_rate": 9.294793509739697e-07, + "loss": 0.6551, + "step": 50107 + }, + { + "epoch": 0.8658418578932817, + "grad_norm": 1.0074851879029174, + "learning_rate": 9.292437423890965e-07, + "loss": 0.3916, + "step": 50108 + }, + { + "epoch": 0.8658591374066906, + "grad_norm": 1.1586984745084685, + "learning_rate": 9.290081622145208e-07, + "loss": 0.3928, + "step": 50109 + }, + { + "epoch": 0.8658764169200995, + "grad_norm": 1.7086645417212778, + "learning_rate": 9.287726104509786e-07, + "loss": 0.5093, + "step": 50110 + }, + { + "epoch": 0.8658936964335084, + "grad_norm": 1.1203042168215764, + "learning_rate": 9.285370870992106e-07, + "loss": 0.2929, + "step": 50111 + }, + { + "epoch": 0.8659109759469173, + "grad_norm": 1.2464757721108526, + "learning_rate": 9.283015921599536e-07, + "loss": 0.4781, + "step": 50112 + }, + { + "epoch": 0.8659282554603263, + "grad_norm": 1.167592894523595, + "learning_rate": 9.280661256339474e-07, + "loss": 0.2466, + "step": 50113 + }, + { + "epoch": 0.8659455349737352, + "grad_norm": 1.9353263467214301, + "learning_rate": 9.278306875219256e-07, + "loss": 0.4879, + "step": 50114 + }, + { + "epoch": 0.8659628144871441, + "grad_norm": 1.143890162346996, + "learning_rate": 9.275952778246289e-07, + "loss": 0.3748, + "step": 50115 + }, + { + "epoch": 0.865980094000553, + "grad_norm": 1.1836550238951695, + "learning_rate": 9.273598965427922e-07, + "loss": 0.4321, + "step": 50116 + }, + { + "epoch": 0.8659973735139619, + "grad_norm": 1.522643391473596, + "learning_rate": 9.271245436771537e-07, + "loss": 0.3855, + "step": 50117 + }, + { + "epoch": 0.8660146530273708, + "grad_norm": 1.6111937096228552, + "learning_rate": 9.268892192284518e-07, + "loss": 0.2731, + "step": 50118 + }, + { + "epoch": 0.8660319325407797, + "grad_norm": 1.144128552563095, + "learning_rate": 9.266539231974215e-07, + "loss": 0.2831, + "step": 50119 + }, + { + "epoch": 0.8660492120541885, + "grad_norm": 1.3765177446809738, + "learning_rate": 9.264186555848021e-07, + "loss": 0.2654, + "step": 50120 + }, + { + "epoch": 0.8660664915675974, + "grad_norm": 0.8872280334089084, + "learning_rate": 9.261834163913274e-07, + "loss": 0.2071, + "step": 50121 + }, + { + "epoch": 0.8660837710810063, + "grad_norm": 1.1813720311740468, + "learning_rate": 9.259482056177349e-07, + "loss": 0.2808, + "step": 50122 + }, + { + "epoch": 0.8661010505944152, + "grad_norm": 1.2298616006052807, + "learning_rate": 9.257130232647638e-07, + "loss": 0.4067, + "step": 50123 + }, + { + "epoch": 0.8661183301078241, + "grad_norm": 1.1583053191465156, + "learning_rate": 9.25477869333149e-07, + "loss": 0.266, + "step": 50124 + }, + { + "epoch": 0.866135609621233, + "grad_norm": 0.7580203926907878, + "learning_rate": 9.252427438236267e-07, + "loss": 0.534, + "step": 50125 + }, + { + "epoch": 0.866152889134642, + "grad_norm": 0.8887776347146664, + "learning_rate": 9.250076467369351e-07, + "loss": 0.2768, + "step": 50126 + }, + { + "epoch": 0.8661701686480509, + "grad_norm": 1.0787035868336425, + "learning_rate": 9.247725780738082e-07, + "loss": 0.3186, + "step": 50127 + }, + { + "epoch": 0.8661874481614598, + "grad_norm": 1.2264743775462366, + "learning_rate": 9.245375378349831e-07, + "loss": 0.3587, + "step": 50128 + }, + { + "epoch": 0.8662047276748687, + "grad_norm": 0.7826759488027073, + "learning_rate": 9.243025260211968e-07, + "loss": 0.2683, + "step": 50129 + }, + { + "epoch": 0.8662220071882776, + "grad_norm": 2.29404862283743, + "learning_rate": 9.240675426331836e-07, + "loss": 0.2749, + "step": 50130 + }, + { + "epoch": 0.8662392867016865, + "grad_norm": 0.8818813599869806, + "learning_rate": 9.238325876716825e-07, + "loss": 0.3654, + "step": 50131 + }, + { + "epoch": 0.8662565662150954, + "grad_norm": 0.9626577606643958, + "learning_rate": 9.235976611374253e-07, + "loss": 0.5826, + "step": 50132 + }, + { + "epoch": 0.8662738457285043, + "grad_norm": 1.7567090411382977, + "learning_rate": 9.233627630311503e-07, + "loss": 0.359, + "step": 50133 + }, + { + "epoch": 0.8662911252419132, + "grad_norm": 1.0840085729915605, + "learning_rate": 9.231278933535937e-07, + "loss": 0.1888, + "step": 50134 + }, + { + "epoch": 0.8663084047553221, + "grad_norm": 1.283538356058039, + "learning_rate": 9.228930521054913e-07, + "loss": 0.4002, + "step": 50135 + }, + { + "epoch": 0.866325684268731, + "grad_norm": 1.581971998494456, + "learning_rate": 9.22658239287576e-07, + "loss": 0.4826, + "step": 50136 + }, + { + "epoch": 0.8663429637821399, + "grad_norm": 2.1208882248092067, + "learning_rate": 9.224234549005872e-07, + "loss": 0.5196, + "step": 50137 + }, + { + "epoch": 0.8663602432955488, + "grad_norm": 2.0081208287129004, + "learning_rate": 9.221886989452555e-07, + "loss": 0.3863, + "step": 50138 + }, + { + "epoch": 0.8663775228089577, + "grad_norm": 2.4199875341719745, + "learning_rate": 9.219539714223191e-07, + "loss": 0.2413, + "step": 50139 + }, + { + "epoch": 0.8663948023223667, + "grad_norm": 1.1059610164300766, + "learning_rate": 9.217192723325141e-07, + "loss": 0.3159, + "step": 50140 + }, + { + "epoch": 0.8664120818357754, + "grad_norm": 0.9469607547307012, + "learning_rate": 9.214846016765733e-07, + "loss": 0.265, + "step": 50141 + }, + { + "epoch": 0.8664293613491844, + "grad_norm": 1.5758348404743308, + "learning_rate": 9.212499594552338e-07, + "loss": 0.4093, + "step": 50142 + }, + { + "epoch": 0.8664466408625933, + "grad_norm": 1.209947447238903, + "learning_rate": 9.210153456692272e-07, + "loss": 0.3195, + "step": 50143 + }, + { + "epoch": 0.8664639203760022, + "grad_norm": 0.9372307072821264, + "learning_rate": 9.207807603192931e-07, + "loss": 0.1382, + "step": 50144 + }, + { + "epoch": 0.8664811998894111, + "grad_norm": 0.8147498227684501, + "learning_rate": 9.205462034061596e-07, + "loss": 0.4222, + "step": 50145 + }, + { + "epoch": 0.86649847940282, + "grad_norm": 1.0867158924663354, + "learning_rate": 9.203116749305696e-07, + "loss": 0.2664, + "step": 50146 + }, + { + "epoch": 0.8665157589162289, + "grad_norm": 1.579729537882796, + "learning_rate": 9.200771748932513e-07, + "loss": 0.2779, + "step": 50147 + }, + { + "epoch": 0.8665330384296378, + "grad_norm": 1.6683538848697899, + "learning_rate": 9.198427032949431e-07, + "loss": 0.2374, + "step": 50148 + }, + { + "epoch": 0.8665503179430467, + "grad_norm": 1.3809363631817204, + "learning_rate": 9.196082601363777e-07, + "loss": 0.2613, + "step": 50149 + }, + { + "epoch": 0.8665675974564556, + "grad_norm": 1.9789587484734228, + "learning_rate": 9.193738454182877e-07, + "loss": 0.247, + "step": 50150 + }, + { + "epoch": 0.8665848769698645, + "grad_norm": 1.2630954970816852, + "learning_rate": 9.191394591414082e-07, + "loss": 0.495, + "step": 50151 + }, + { + "epoch": 0.8666021564832734, + "grad_norm": 1.2922218587061365, + "learning_rate": 9.189051013064743e-07, + "loss": 0.3326, + "step": 50152 + }, + { + "epoch": 0.8666194359966823, + "grad_norm": 2.0833626265596896, + "learning_rate": 9.186707719142219e-07, + "loss": 0.5407, + "step": 50153 + }, + { + "epoch": 0.8666367155100912, + "grad_norm": 1.5505262226447138, + "learning_rate": 9.184364709653815e-07, + "loss": 0.2931, + "step": 50154 + }, + { + "epoch": 0.8666539950235002, + "grad_norm": 0.8598520203153173, + "learning_rate": 9.182021984606892e-07, + "loss": 0.1915, + "step": 50155 + }, + { + "epoch": 0.8666712745369091, + "grad_norm": 1.1016207221213774, + "learning_rate": 9.179679544008758e-07, + "loss": 0.2507, + "step": 50156 + }, + { + "epoch": 0.866688554050318, + "grad_norm": 1.7171604708859822, + "learning_rate": 9.17733738786678e-07, + "loss": 0.2605, + "step": 50157 + }, + { + "epoch": 0.8667058335637269, + "grad_norm": 1.1046004223170012, + "learning_rate": 9.174995516188278e-07, + "loss": 0.3285, + "step": 50158 + }, + { + "epoch": 0.8667231130771358, + "grad_norm": 1.4952182584150444, + "learning_rate": 9.172653928980601e-07, + "loss": 0.3912, + "step": 50159 + }, + { + "epoch": 0.8667403925905447, + "grad_norm": 1.6148080993741185, + "learning_rate": 9.170312626251088e-07, + "loss": 0.3478, + "step": 50160 + }, + { + "epoch": 0.8667576721039536, + "grad_norm": 1.2417983590959705, + "learning_rate": 9.167971608007043e-07, + "loss": 0.5663, + "step": 50161 + }, + { + "epoch": 0.8667749516173625, + "grad_norm": 1.1264677125252576, + "learning_rate": 9.165630874255805e-07, + "loss": 0.2426, + "step": 50162 + }, + { + "epoch": 0.8667922311307713, + "grad_norm": 0.9326286709132021, + "learning_rate": 9.163290425004711e-07, + "loss": 0.3738, + "step": 50163 + }, + { + "epoch": 0.8668095106441802, + "grad_norm": 1.1371558468870135, + "learning_rate": 9.160950260261125e-07, + "loss": 0.2346, + "step": 50164 + }, + { + "epoch": 0.8668267901575891, + "grad_norm": 1.7148611878535054, + "learning_rate": 9.158610380032318e-07, + "loss": 0.3293, + "step": 50165 + }, + { + "epoch": 0.866844069670998, + "grad_norm": 1.3913527640666512, + "learning_rate": 9.156270784325672e-07, + "loss": 0.3933, + "step": 50166 + }, + { + "epoch": 0.8668613491844069, + "grad_norm": 2.092928444311179, + "learning_rate": 9.153931473148458e-07, + "loss": 0.3322, + "step": 50167 + }, + { + "epoch": 0.8668786286978158, + "grad_norm": 0.8738510267935253, + "learning_rate": 9.151592446508051e-07, + "loss": 0.2013, + "step": 50168 + }, + { + "epoch": 0.8668959082112248, + "grad_norm": 0.9121418136997013, + "learning_rate": 9.149253704411754e-07, + "loss": 0.2887, + "step": 50169 + }, + { + "epoch": 0.8669131877246337, + "grad_norm": 1.5018650548321888, + "learning_rate": 9.146915246866917e-07, + "loss": 0.5074, + "step": 50170 + }, + { + "epoch": 0.8669304672380426, + "grad_norm": 0.910606439613611, + "learning_rate": 9.144577073880844e-07, + "loss": 0.2863, + "step": 50171 + }, + { + "epoch": 0.8669477467514515, + "grad_norm": 1.1543087793526652, + "learning_rate": 9.142239185460844e-07, + "loss": 0.259, + "step": 50172 + }, + { + "epoch": 0.8669650262648604, + "grad_norm": 1.0287778952772386, + "learning_rate": 9.139901581614263e-07, + "loss": 0.4102, + "step": 50173 + }, + { + "epoch": 0.8669823057782693, + "grad_norm": 1.5737639780744204, + "learning_rate": 9.137564262348409e-07, + "loss": 0.4887, + "step": 50174 + }, + { + "epoch": 0.8669995852916782, + "grad_norm": 1.1425947688280242, + "learning_rate": 9.135227227670628e-07, + "loss": 0.2626, + "step": 50175 + }, + { + "epoch": 0.8670168648050871, + "grad_norm": 1.3427598138928318, + "learning_rate": 9.132890477588196e-07, + "loss": 0.1824, + "step": 50176 + }, + { + "epoch": 0.867034144318496, + "grad_norm": 1.6628511016875611, + "learning_rate": 9.130554012108483e-07, + "loss": 0.2541, + "step": 50177 + }, + { + "epoch": 0.8670514238319049, + "grad_norm": 1.385865013287269, + "learning_rate": 9.128217831238761e-07, + "loss": 0.1859, + "step": 50178 + }, + { + "epoch": 0.8670687033453138, + "grad_norm": 1.2859845359413775, + "learning_rate": 9.12588193498637e-07, + "loss": 0.3582, + "step": 50179 + }, + { + "epoch": 0.8670859828587227, + "grad_norm": 1.3731051395987925, + "learning_rate": 9.123546323358634e-07, + "loss": 0.3051, + "step": 50180 + }, + { + "epoch": 0.8671032623721316, + "grad_norm": 1.5472689465278702, + "learning_rate": 9.12121099636284e-07, + "loss": 0.3425, + "step": 50181 + }, + { + "epoch": 0.8671205418855406, + "grad_norm": 2.0662596389348136, + "learning_rate": 9.118875954006345e-07, + "loss": 0.2582, + "step": 50182 + }, + { + "epoch": 0.8671378213989495, + "grad_norm": 1.110864487870757, + "learning_rate": 9.116541196296413e-07, + "loss": 0.4462, + "step": 50183 + }, + { + "epoch": 0.8671551009123583, + "grad_norm": 1.3471750267495748, + "learning_rate": 9.114206723240393e-07, + "loss": 0.4438, + "step": 50184 + }, + { + "epoch": 0.8671723804257672, + "grad_norm": 1.2562025494961024, + "learning_rate": 9.111872534845557e-07, + "loss": 0.4886, + "step": 50185 + }, + { + "epoch": 0.8671896599391761, + "grad_norm": 1.2731780566262854, + "learning_rate": 9.109538631119274e-07, + "loss": 0.4301, + "step": 50186 + }, + { + "epoch": 0.867206939452585, + "grad_norm": 0.8624823679651077, + "learning_rate": 9.10720501206882e-07, + "loss": 0.6928, + "step": 50187 + }, + { + "epoch": 0.8672242189659939, + "grad_norm": 0.816735810347852, + "learning_rate": 9.10487167770151e-07, + "loss": 0.2694, + "step": 50188 + }, + { + "epoch": 0.8672414984794028, + "grad_norm": 1.108903061244378, + "learning_rate": 9.102538628024638e-07, + "loss": 0.3369, + "step": 50189 + }, + { + "epoch": 0.8672587779928117, + "grad_norm": 1.2710708844945224, + "learning_rate": 9.100205863045542e-07, + "loss": 0.2662, + "step": 50190 + }, + { + "epoch": 0.8672760575062206, + "grad_norm": 0.9846438348436319, + "learning_rate": 9.097873382771483e-07, + "loss": 0.3584, + "step": 50191 + }, + { + "epoch": 0.8672933370196295, + "grad_norm": 1.3246581142311111, + "learning_rate": 9.0955411872098e-07, + "loss": 0.3166, + "step": 50192 + }, + { + "epoch": 0.8673106165330384, + "grad_norm": 1.358650314766094, + "learning_rate": 9.09320927636781e-07, + "loss": 0.3644, + "step": 50193 + }, + { + "epoch": 0.8673278960464473, + "grad_norm": 1.939665499840627, + "learning_rate": 9.090877650252771e-07, + "loss": 0.1842, + "step": 50194 + }, + { + "epoch": 0.8673451755598562, + "grad_norm": 1.6129041671087185, + "learning_rate": 9.088546308872038e-07, + "loss": 0.4333, + "step": 50195 + }, + { + "epoch": 0.8673624550732651, + "grad_norm": 1.262305453919148, + "learning_rate": 9.086215252232866e-07, + "loss": 0.5102, + "step": 50196 + }, + { + "epoch": 0.8673797345866741, + "grad_norm": 1.3440792339082375, + "learning_rate": 9.083884480342575e-07, + "loss": 0.3809, + "step": 50197 + }, + { + "epoch": 0.867397014100083, + "grad_norm": 0.9820197402831125, + "learning_rate": 9.081553993208469e-07, + "loss": 0.3134, + "step": 50198 + }, + { + "epoch": 0.8674142936134919, + "grad_norm": 1.0900897302675776, + "learning_rate": 9.079223790837866e-07, + "loss": 0.3245, + "step": 50199 + }, + { + "epoch": 0.8674315731269008, + "grad_norm": 0.9896659724531498, + "learning_rate": 9.076893873238047e-07, + "loss": 0.4619, + "step": 50200 + }, + { + "epoch": 0.8674488526403097, + "grad_norm": 0.7038337772491585, + "learning_rate": 9.074564240416284e-07, + "loss": 0.8803, + "step": 50201 + }, + { + "epoch": 0.8674661321537186, + "grad_norm": 0.6463741602521438, + "learning_rate": 9.072234892379894e-07, + "loss": 0.6639, + "step": 50202 + }, + { + "epoch": 0.8674834116671275, + "grad_norm": 1.365613057721622, + "learning_rate": 9.069905829136171e-07, + "loss": 0.6155, + "step": 50203 + }, + { + "epoch": 0.8675006911805364, + "grad_norm": 1.3706690393771268, + "learning_rate": 9.067577050692445e-07, + "loss": 0.3776, + "step": 50204 + }, + { + "epoch": 0.8675179706939452, + "grad_norm": 1.2679655048117366, + "learning_rate": 9.06524855705595e-07, + "loss": 0.3425, + "step": 50205 + }, + { + "epoch": 0.8675352502073541, + "grad_norm": 1.5658113945589434, + "learning_rate": 9.062920348234028e-07, + "loss": 0.1901, + "step": 50206 + }, + { + "epoch": 0.867552529720763, + "grad_norm": 1.3172676950259337, + "learning_rate": 9.060592424233928e-07, + "loss": 0.4098, + "step": 50207 + }, + { + "epoch": 0.8675698092341719, + "grad_norm": 1.2810589222209083, + "learning_rate": 9.058264785062965e-07, + "loss": 0.4291, + "step": 50208 + }, + { + "epoch": 0.8675870887475808, + "grad_norm": 0.8635117208841725, + "learning_rate": 9.055937430728434e-07, + "loss": 0.4819, + "step": 50209 + }, + { + "epoch": 0.8676043682609897, + "grad_norm": 1.271108341500748, + "learning_rate": 9.05361036123763e-07, + "loss": 0.4681, + "step": 50210 + }, + { + "epoch": 0.8676216477743987, + "grad_norm": 1.3230586957963724, + "learning_rate": 9.051283576597836e-07, + "loss": 0.3407, + "step": 50211 + }, + { + "epoch": 0.8676389272878076, + "grad_norm": 1.1778734292665507, + "learning_rate": 9.048957076816311e-07, + "loss": 0.3794, + "step": 50212 + }, + { + "epoch": 0.8676562068012165, + "grad_norm": 1.562742989484925, + "learning_rate": 9.046630861900363e-07, + "loss": 0.3093, + "step": 50213 + }, + { + "epoch": 0.8676734863146254, + "grad_norm": 1.631360672563816, + "learning_rate": 9.044304931857284e-07, + "loss": 0.2207, + "step": 50214 + }, + { + "epoch": 0.8676907658280343, + "grad_norm": 1.0787861625567376, + "learning_rate": 9.041979286694369e-07, + "loss": 0.4585, + "step": 50215 + }, + { + "epoch": 0.8677080453414432, + "grad_norm": 1.8379443580484023, + "learning_rate": 9.039653926418867e-07, + "loss": 0.2779, + "step": 50216 + }, + { + "epoch": 0.8677253248548521, + "grad_norm": 1.347990764767671, + "learning_rate": 9.037328851038108e-07, + "loss": 0.2258, + "step": 50217 + }, + { + "epoch": 0.867742604368261, + "grad_norm": 1.039471317879034, + "learning_rate": 9.035004060559327e-07, + "loss": 0.2213, + "step": 50218 + }, + { + "epoch": 0.8677598838816699, + "grad_norm": 1.2743444662464365, + "learning_rate": 9.03267955498982e-07, + "loss": 0.3976, + "step": 50219 + }, + { + "epoch": 0.8677771633950788, + "grad_norm": 1.3176510257227603, + "learning_rate": 9.030355334336882e-07, + "loss": 0.2785, + "step": 50220 + }, + { + "epoch": 0.8677944429084877, + "grad_norm": 0.8742001642206043, + "learning_rate": 9.028031398607795e-07, + "loss": 0.2565, + "step": 50221 + }, + { + "epoch": 0.8678117224218966, + "grad_norm": 1.9335426172594703, + "learning_rate": 9.025707747809831e-07, + "loss": 0.324, + "step": 50222 + }, + { + "epoch": 0.8678290019353055, + "grad_norm": 0.960160571885305, + "learning_rate": 9.023384381950251e-07, + "loss": 0.4065, + "step": 50223 + }, + { + "epoch": 0.8678462814487145, + "grad_norm": 1.4050608448832405, + "learning_rate": 9.02106130103636e-07, + "loss": 0.3373, + "step": 50224 + }, + { + "epoch": 0.8678635609621234, + "grad_norm": 1.593043297636347, + "learning_rate": 9.018738505075375e-07, + "loss": 0.2178, + "step": 50225 + }, + { + "epoch": 0.8678808404755322, + "grad_norm": 1.140980977367887, + "learning_rate": 9.016415994074656e-07, + "loss": 0.4805, + "step": 50226 + }, + { + "epoch": 0.8678981199889411, + "grad_norm": 1.1316305440871088, + "learning_rate": 9.01409376804142e-07, + "loss": 0.3972, + "step": 50227 + }, + { + "epoch": 0.86791539950235, + "grad_norm": 0.7853324592406992, + "learning_rate": 9.011771826982985e-07, + "loss": 0.1457, + "step": 50228 + }, + { + "epoch": 0.8679326790157589, + "grad_norm": 4.151429801163556, + "learning_rate": 9.009450170906564e-07, + "loss": 0.434, + "step": 50229 + }, + { + "epoch": 0.8679499585291678, + "grad_norm": 1.1189285576976222, + "learning_rate": 9.007128799819487e-07, + "loss": 0.2601, + "step": 50230 + }, + { + "epoch": 0.8679672380425767, + "grad_norm": 1.1086862920142304, + "learning_rate": 9.00480771372897e-07, + "loss": 0.3037, + "step": 50231 + }, + { + "epoch": 0.8679845175559856, + "grad_norm": 1.4762268994849308, + "learning_rate": 9.002486912642316e-07, + "loss": 0.2923, + "step": 50232 + }, + { + "epoch": 0.8680017970693945, + "grad_norm": 1.0364891025705747, + "learning_rate": 9.000166396566812e-07, + "loss": 0.3801, + "step": 50233 + }, + { + "epoch": 0.8680190765828034, + "grad_norm": 1.1410198168589514, + "learning_rate": 8.997846165509671e-07, + "loss": 0.2594, + "step": 50234 + }, + { + "epoch": 0.8680363560962123, + "grad_norm": 1.0140386875014669, + "learning_rate": 8.995526219478224e-07, + "loss": 0.3218, + "step": 50235 + }, + { + "epoch": 0.8680536356096212, + "grad_norm": 1.6824433818623095, + "learning_rate": 8.993206558479672e-07, + "loss": 0.4158, + "step": 50236 + }, + { + "epoch": 0.8680709151230301, + "grad_norm": 1.2183106867612874, + "learning_rate": 8.990887182521324e-07, + "loss": 0.6899, + "step": 50237 + }, + { + "epoch": 0.868088194636439, + "grad_norm": 1.5511366956785062, + "learning_rate": 8.98856809161044e-07, + "loss": 0.228, + "step": 50238 + }, + { + "epoch": 0.868105474149848, + "grad_norm": 1.4844377640398918, + "learning_rate": 8.986249285754278e-07, + "loss": 0.4461, + "step": 50239 + }, + { + "epoch": 0.8681227536632569, + "grad_norm": 1.2601297688925954, + "learning_rate": 8.983930764960092e-07, + "loss": 0.3374, + "step": 50240 + }, + { + "epoch": 0.8681400331766658, + "grad_norm": 1.2227476417942753, + "learning_rate": 8.981612529235172e-07, + "loss": 0.3776, + "step": 50241 + }, + { + "epoch": 0.8681573126900747, + "grad_norm": 0.6391973160788016, + "learning_rate": 8.979294578586739e-07, + "loss": 0.9397, + "step": 50242 + }, + { + "epoch": 0.8681745922034836, + "grad_norm": 1.080240927657715, + "learning_rate": 8.976976913022073e-07, + "loss": 0.3623, + "step": 50243 + }, + { + "epoch": 0.8681918717168925, + "grad_norm": 1.5642010515217797, + "learning_rate": 8.974659532548446e-07, + "loss": 0.3655, + "step": 50244 + }, + { + "epoch": 0.8682091512303014, + "grad_norm": 1.6996542250752118, + "learning_rate": 8.972342437173087e-07, + "loss": 0.2982, + "step": 50245 + }, + { + "epoch": 0.8682264307437103, + "grad_norm": 1.2198353575748013, + "learning_rate": 8.970025626903289e-07, + "loss": 0.2907, + "step": 50246 + }, + { + "epoch": 0.8682437102571191, + "grad_norm": 2.032241907277666, + "learning_rate": 8.967709101746269e-07, + "loss": 0.3113, + "step": 50247 + }, + { + "epoch": 0.868260989770528, + "grad_norm": 1.5580990265813959, + "learning_rate": 8.965392861709299e-07, + "loss": 0.2453, + "step": 50248 + }, + { + "epoch": 0.8682782692839369, + "grad_norm": 1.0788375010323639, + "learning_rate": 8.963076906799651e-07, + "loss": 0.1929, + "step": 50249 + }, + { + "epoch": 0.8682955487973458, + "grad_norm": 1.2282472071947796, + "learning_rate": 8.960761237024573e-07, + "loss": 0.3618, + "step": 50250 + }, + { + "epoch": 0.8683128283107547, + "grad_norm": 1.2877217683527842, + "learning_rate": 8.958445852391284e-07, + "loss": 0.6055, + "step": 50251 + }, + { + "epoch": 0.8683301078241636, + "grad_norm": 1.6305536205765068, + "learning_rate": 8.956130752907088e-07, + "loss": 0.2002, + "step": 50252 + }, + { + "epoch": 0.8683473873375726, + "grad_norm": 2.0517365799687663, + "learning_rate": 8.953815938579191e-07, + "loss": 0.359, + "step": 50253 + }, + { + "epoch": 0.8683646668509815, + "grad_norm": 0.82327396202517, + "learning_rate": 8.951501409414864e-07, + "loss": 0.2635, + "step": 50254 + }, + { + "epoch": 0.8683819463643904, + "grad_norm": 1.3202614341117083, + "learning_rate": 8.949187165421369e-07, + "loss": 0.3145, + "step": 50255 + }, + { + "epoch": 0.8683992258777993, + "grad_norm": 1.2432944913275337, + "learning_rate": 8.946873206605921e-07, + "loss": 0.2467, + "step": 50256 + }, + { + "epoch": 0.8684165053912082, + "grad_norm": 1.6228573715632375, + "learning_rate": 8.944559532975805e-07, + "loss": 0.4823, + "step": 50257 + }, + { + "epoch": 0.8684337849046171, + "grad_norm": 1.8406605162387013, + "learning_rate": 8.942246144538224e-07, + "loss": 0.3558, + "step": 50258 + }, + { + "epoch": 0.868451064418026, + "grad_norm": 1.1864024584824464, + "learning_rate": 8.939933041300463e-07, + "loss": 0.265, + "step": 50259 + }, + { + "epoch": 0.8684683439314349, + "grad_norm": 0.8959387272577254, + "learning_rate": 8.937620223269739e-07, + "loss": 0.4035, + "step": 50260 + }, + { + "epoch": 0.8684856234448438, + "grad_norm": 1.3308535310775487, + "learning_rate": 8.935307690453332e-07, + "loss": 0.3564, + "step": 50261 + }, + { + "epoch": 0.8685029029582527, + "grad_norm": 1.964534368547182, + "learning_rate": 8.932995442858461e-07, + "loss": 0.2846, + "step": 50262 + }, + { + "epoch": 0.8685201824716616, + "grad_norm": 1.6154734412817557, + "learning_rate": 8.930683480492353e-07, + "loss": 0.4167, + "step": 50263 + }, + { + "epoch": 0.8685374619850705, + "grad_norm": 1.4285901591377212, + "learning_rate": 8.928371803362279e-07, + "loss": 0.3093, + "step": 50264 + }, + { + "epoch": 0.8685547414984794, + "grad_norm": 1.3593282591402516, + "learning_rate": 8.926060411475434e-07, + "loss": 0.2394, + "step": 50265 + }, + { + "epoch": 0.8685720210118884, + "grad_norm": 2.00935366899356, + "learning_rate": 8.923749304839124e-07, + "loss": 0.3278, + "step": 50266 + }, + { + "epoch": 0.8685893005252973, + "grad_norm": 0.8729962167335825, + "learning_rate": 8.921438483460531e-07, + "loss": 0.244, + "step": 50267 + }, + { + "epoch": 0.868606580038706, + "grad_norm": 1.7310220671032528, + "learning_rate": 8.91912794734694e-07, + "loss": 0.3401, + "step": 50268 + }, + { + "epoch": 0.868623859552115, + "grad_norm": 1.5050822833888815, + "learning_rate": 8.916817696505542e-07, + "loss": 0.2528, + "step": 50269 + }, + { + "epoch": 0.8686411390655239, + "grad_norm": 1.0845889481723947, + "learning_rate": 8.914507730943612e-07, + "loss": 0.4395, + "step": 50270 + }, + { + "epoch": 0.8686584185789328, + "grad_norm": 1.4404969957953486, + "learning_rate": 8.91219805066832e-07, + "loss": 0.2899, + "step": 50271 + }, + { + "epoch": 0.8686756980923417, + "grad_norm": 0.9961478555232509, + "learning_rate": 8.909888655686994e-07, + "loss": 0.3986, + "step": 50272 + }, + { + "epoch": 0.8686929776057506, + "grad_norm": 1.4373250671096351, + "learning_rate": 8.907579546006817e-07, + "loss": 0.2558, + "step": 50273 + }, + { + "epoch": 0.8687102571191595, + "grad_norm": 1.3687959504864247, + "learning_rate": 8.905270721634995e-07, + "loss": 0.3796, + "step": 50274 + }, + { + "epoch": 0.8687275366325684, + "grad_norm": 1.1592694856305799, + "learning_rate": 8.902962182578823e-07, + "loss": 0.3112, + "step": 50275 + }, + { + "epoch": 0.8687448161459773, + "grad_norm": 0.760705212500068, + "learning_rate": 8.90065392884546e-07, + "loss": 0.2338, + "step": 50276 + }, + { + "epoch": 0.8687620956593862, + "grad_norm": 0.8811051515178864, + "learning_rate": 8.89834596044219e-07, + "loss": 0.197, + "step": 50277 + }, + { + "epoch": 0.8687793751727951, + "grad_norm": 1.0053999435571002, + "learning_rate": 8.896038277376217e-07, + "loss": 0.2431, + "step": 50278 + }, + { + "epoch": 0.868796654686204, + "grad_norm": 1.4015543035881588, + "learning_rate": 8.893730879654794e-07, + "loss": 0.2971, + "step": 50279 + }, + { + "epoch": 0.868813934199613, + "grad_norm": 1.4832418450772795, + "learning_rate": 8.891423767285101e-07, + "loss": 0.3198, + "step": 50280 + }, + { + "epoch": 0.8688312137130219, + "grad_norm": 1.4324518390597891, + "learning_rate": 8.889116940274422e-07, + "loss": 0.3183, + "step": 50281 + }, + { + "epoch": 0.8688484932264308, + "grad_norm": 1.2507204098913514, + "learning_rate": 8.886810398629931e-07, + "loss": 0.3227, + "step": 50282 + }, + { + "epoch": 0.8688657727398397, + "grad_norm": 0.924947781203813, + "learning_rate": 8.884504142358885e-07, + "loss": 0.3496, + "step": 50283 + }, + { + "epoch": 0.8688830522532486, + "grad_norm": 0.8950791367438042, + "learning_rate": 8.882198171468503e-07, + "loss": 0.3356, + "step": 50284 + }, + { + "epoch": 0.8689003317666575, + "grad_norm": 1.1718151003054282, + "learning_rate": 8.87989248596598e-07, + "loss": 0.4043, + "step": 50285 + }, + { + "epoch": 0.8689176112800664, + "grad_norm": 1.2050878636797486, + "learning_rate": 8.877587085858585e-07, + "loss": 0.409, + "step": 50286 + }, + { + "epoch": 0.8689348907934753, + "grad_norm": 1.3510001205002644, + "learning_rate": 8.875281971153493e-07, + "loss": 0.2187, + "step": 50287 + }, + { + "epoch": 0.8689521703068842, + "grad_norm": 1.1983412571785395, + "learning_rate": 8.872977141857942e-07, + "loss": 0.2943, + "step": 50288 + }, + { + "epoch": 0.868969449820293, + "grad_norm": 2.996444834030308, + "learning_rate": 8.870672597979146e-07, + "loss": 0.3274, + "step": 50289 + }, + { + "epoch": 0.8689867293337019, + "grad_norm": 1.3149725739982834, + "learning_rate": 8.868368339524358e-07, + "loss": 0.1511, + "step": 50290 + }, + { + "epoch": 0.8690040088471108, + "grad_norm": 1.9026198685983688, + "learning_rate": 8.866064366500749e-07, + "loss": 0.3784, + "step": 50291 + }, + { + "epoch": 0.8690212883605197, + "grad_norm": 1.2898398902754835, + "learning_rate": 8.863760678915556e-07, + "loss": 0.3301, + "step": 50292 + }, + { + "epoch": 0.8690385678739286, + "grad_norm": 1.8401392853933258, + "learning_rate": 8.861457276775987e-07, + "loss": 0.4252, + "step": 50293 + }, + { + "epoch": 0.8690558473873375, + "grad_norm": 1.7255435377222992, + "learning_rate": 8.859154160089256e-07, + "loss": 0.3061, + "step": 50294 + }, + { + "epoch": 0.8690731269007465, + "grad_norm": 0.9581565478862144, + "learning_rate": 8.856851328862593e-07, + "loss": 0.3179, + "step": 50295 + }, + { + "epoch": 0.8690904064141554, + "grad_norm": 1.1669635304701136, + "learning_rate": 8.854548783103179e-07, + "loss": 0.3981, + "step": 50296 + }, + { + "epoch": 0.8691076859275643, + "grad_norm": 1.1964810573900309, + "learning_rate": 8.852246522818275e-07, + "loss": 0.3174, + "step": 50297 + }, + { + "epoch": 0.8691249654409732, + "grad_norm": 1.3959838981554138, + "learning_rate": 8.849944548015032e-07, + "loss": 0.3971, + "step": 50298 + }, + { + "epoch": 0.8691422449543821, + "grad_norm": 0.8081883952186536, + "learning_rate": 8.8476428587007e-07, + "loss": 0.1906, + "step": 50299 + }, + { + "epoch": 0.869159524467791, + "grad_norm": 1.7461363506026306, + "learning_rate": 8.845341454882472e-07, + "loss": 0.4386, + "step": 50300 + }, + { + "epoch": 0.8691768039811999, + "grad_norm": 1.8672458612631124, + "learning_rate": 8.843040336567577e-07, + "loss": 0.2667, + "step": 50301 + }, + { + "epoch": 0.8691940834946088, + "grad_norm": 1.3990219387024605, + "learning_rate": 8.840739503763196e-07, + "loss": 0.4282, + "step": 50302 + }, + { + "epoch": 0.8692113630080177, + "grad_norm": 1.09677471155503, + "learning_rate": 8.838438956476558e-07, + "loss": 0.3475, + "step": 50303 + }, + { + "epoch": 0.8692286425214266, + "grad_norm": 1.3295612505972947, + "learning_rate": 8.836138694714858e-07, + "loss": 0.3681, + "step": 50304 + }, + { + "epoch": 0.8692459220348355, + "grad_norm": 1.3882900515165582, + "learning_rate": 8.833838718485266e-07, + "loss": 0.3822, + "step": 50305 + }, + { + "epoch": 0.8692632015482444, + "grad_norm": 1.3926172059789914, + "learning_rate": 8.831539027795055e-07, + "loss": 0.3859, + "step": 50306 + }, + { + "epoch": 0.8692804810616533, + "grad_norm": 1.988692147244562, + "learning_rate": 8.829239622651376e-07, + "loss": 0.45, + "step": 50307 + }, + { + "epoch": 0.8692977605750623, + "grad_norm": 1.4153504684758893, + "learning_rate": 8.826940503061466e-07, + "loss": 0.4243, + "step": 50308 + }, + { + "epoch": 0.8693150400884712, + "grad_norm": 1.9883750147960526, + "learning_rate": 8.824641669032496e-07, + "loss": 0.4493, + "step": 50309 + }, + { + "epoch": 0.8693323196018801, + "grad_norm": 1.6054202567595577, + "learning_rate": 8.822343120571685e-07, + "loss": 0.2994, + "step": 50310 + }, + { + "epoch": 0.8693495991152889, + "grad_norm": 1.0867274450733253, + "learning_rate": 8.820044857686183e-07, + "loss": 0.4502, + "step": 50311 + }, + { + "epoch": 0.8693668786286978, + "grad_norm": 1.1093588360029472, + "learning_rate": 8.817746880383282e-07, + "loss": 0.4163, + "step": 50312 + }, + { + "epoch": 0.8693841581421067, + "grad_norm": 0.8363256278163996, + "learning_rate": 8.815449188670099e-07, + "loss": 0.3101, + "step": 50313 + }, + { + "epoch": 0.8694014376555156, + "grad_norm": 1.5677528203484656, + "learning_rate": 8.813151782553875e-07, + "loss": 0.497, + "step": 50314 + }, + { + "epoch": 0.8694187171689245, + "grad_norm": 1.0620620705198272, + "learning_rate": 8.810854662041801e-07, + "loss": 0.3811, + "step": 50315 + }, + { + "epoch": 0.8694359966823334, + "grad_norm": 1.329025840010079, + "learning_rate": 8.808557827141029e-07, + "loss": 0.3722, + "step": 50316 + }, + { + "epoch": 0.8694532761957423, + "grad_norm": 1.5951878422595747, + "learning_rate": 8.806261277858796e-07, + "loss": 0.3109, + "step": 50317 + }, + { + "epoch": 0.8694705557091512, + "grad_norm": 1.0546036839793222, + "learning_rate": 8.803965014202276e-07, + "loss": 0.2176, + "step": 50318 + }, + { + "epoch": 0.8694878352225601, + "grad_norm": 1.383584567453141, + "learning_rate": 8.801669036178684e-07, + "loss": 0.4575, + "step": 50319 + }, + { + "epoch": 0.869505114735969, + "grad_norm": 1.1982495762380334, + "learning_rate": 8.799373343795181e-07, + "loss": 0.4093, + "step": 50320 + }, + { + "epoch": 0.8695223942493779, + "grad_norm": 2.412835527509416, + "learning_rate": 8.797077937058995e-07, + "loss": 0.3063, + "step": 50321 + }, + { + "epoch": 0.8695396737627868, + "grad_norm": 0.9243068347198218, + "learning_rate": 8.794782815977265e-07, + "loss": 0.3109, + "step": 50322 + }, + { + "epoch": 0.8695569532761958, + "grad_norm": 0.8890650176961344, + "learning_rate": 8.792487980557208e-07, + "loss": 0.269, + "step": 50323 + }, + { + "epoch": 0.8695742327896047, + "grad_norm": 1.1708013061221192, + "learning_rate": 8.790193430806038e-07, + "loss": 0.309, + "step": 50324 + }, + { + "epoch": 0.8695915123030136, + "grad_norm": 1.567926487343361, + "learning_rate": 8.787899166730884e-07, + "loss": 0.3255, + "step": 50325 + }, + { + "epoch": 0.8696087918164225, + "grad_norm": 1.24405144144195, + "learning_rate": 8.785605188338986e-07, + "loss": 0.2491, + "step": 50326 + }, + { + "epoch": 0.8696260713298314, + "grad_norm": 1.5428696169256317, + "learning_rate": 8.783311495637481e-07, + "loss": 0.4229, + "step": 50327 + }, + { + "epoch": 0.8696433508432403, + "grad_norm": 1.5724133063309647, + "learning_rate": 8.781018088633586e-07, + "loss": 0.193, + "step": 50328 + }, + { + "epoch": 0.8696606303566492, + "grad_norm": 2.153567212829757, + "learning_rate": 8.778724967334462e-07, + "loss": 0.3116, + "step": 50329 + }, + { + "epoch": 0.8696779098700581, + "grad_norm": 1.8485336156117118, + "learning_rate": 8.776432131747337e-07, + "loss": 0.3472, + "step": 50330 + }, + { + "epoch": 0.869695189383467, + "grad_norm": 1.103684572465542, + "learning_rate": 8.774139581879327e-07, + "loss": 0.3709, + "step": 50331 + }, + { + "epoch": 0.8697124688968758, + "grad_norm": 0.5238448990917326, + "learning_rate": 8.77184731773767e-07, + "loss": 0.5696, + "step": 50332 + }, + { + "epoch": 0.8697297484102847, + "grad_norm": 1.0960056629717851, + "learning_rate": 8.769555339329506e-07, + "loss": 0.2548, + "step": 50333 + }, + { + "epoch": 0.8697470279236936, + "grad_norm": 1.3516937603244412, + "learning_rate": 8.767263646662016e-07, + "loss": 0.292, + "step": 50334 + }, + { + "epoch": 0.8697643074371025, + "grad_norm": 1.0369022039429245, + "learning_rate": 8.764972239742419e-07, + "loss": 0.5003, + "step": 50335 + }, + { + "epoch": 0.8697815869505114, + "grad_norm": 2.0472439292488347, + "learning_rate": 8.762681118577843e-07, + "loss": 0.3415, + "step": 50336 + }, + { + "epoch": 0.8697988664639204, + "grad_norm": 1.3733353982290326, + "learning_rate": 8.76039028317549e-07, + "loss": 0.7874, + "step": 50337 + }, + { + "epoch": 0.8698161459773293, + "grad_norm": 0.9626127224885944, + "learning_rate": 8.758099733542525e-07, + "loss": 0.3492, + "step": 50338 + }, + { + "epoch": 0.8698334254907382, + "grad_norm": 1.3523765522741733, + "learning_rate": 8.755809469686117e-07, + "loss": 0.474, + "step": 50339 + }, + { + "epoch": 0.8698507050041471, + "grad_norm": 1.3875131832095466, + "learning_rate": 8.753519491613449e-07, + "loss": 0.3873, + "step": 50340 + }, + { + "epoch": 0.869867984517556, + "grad_norm": 1.4223711892214443, + "learning_rate": 8.751229799331718e-07, + "loss": 0.4101, + "step": 50341 + }, + { + "epoch": 0.8698852640309649, + "grad_norm": 1.665183549030052, + "learning_rate": 8.748940392848048e-07, + "loss": 0.3474, + "step": 50342 + }, + { + "epoch": 0.8699025435443738, + "grad_norm": 1.2745439908662886, + "learning_rate": 8.746651272169648e-07, + "loss": 0.4581, + "step": 50343 + }, + { + "epoch": 0.8699198230577827, + "grad_norm": 1.0479898284117009, + "learning_rate": 8.744362437303655e-07, + "loss": 0.2691, + "step": 50344 + }, + { + "epoch": 0.8699371025711916, + "grad_norm": 1.5486114174355678, + "learning_rate": 8.742073888257252e-07, + "loss": 0.2855, + "step": 50345 + }, + { + "epoch": 0.8699543820846005, + "grad_norm": 1.1232446310396704, + "learning_rate": 8.739785625037622e-07, + "loss": 0.2563, + "step": 50346 + }, + { + "epoch": 0.8699716615980094, + "grad_norm": 2.607066676316291, + "learning_rate": 8.737497647651905e-07, + "loss": 0.2937, + "step": 50347 + }, + { + "epoch": 0.8699889411114183, + "grad_norm": 1.4567574919810558, + "learning_rate": 8.735209956107305e-07, + "loss": 0.5291, + "step": 50348 + }, + { + "epoch": 0.8700062206248272, + "grad_norm": 1.0229645956226083, + "learning_rate": 8.732922550410938e-07, + "loss": 0.3743, + "step": 50349 + }, + { + "epoch": 0.8700235001382361, + "grad_norm": 1.516861084603939, + "learning_rate": 8.730635430570023e-07, + "loss": 0.2604, + "step": 50350 + }, + { + "epoch": 0.8700407796516451, + "grad_norm": 1.1570180163321497, + "learning_rate": 8.728348596591641e-07, + "loss": 0.5378, + "step": 50351 + }, + { + "epoch": 0.870058059165054, + "grad_norm": 1.1146026347250673, + "learning_rate": 8.726062048483053e-07, + "loss": 0.2955, + "step": 50352 + }, + { + "epoch": 0.8700753386784628, + "grad_norm": 0.7422329013507852, + "learning_rate": 8.723775786251354e-07, + "loss": 0.64, + "step": 50353 + }, + { + "epoch": 0.8700926181918717, + "grad_norm": 0.9277955854077562, + "learning_rate": 8.721489809903749e-07, + "loss": 0.4724, + "step": 50354 + }, + { + "epoch": 0.8701098977052806, + "grad_norm": 1.3228360442180023, + "learning_rate": 8.719204119447378e-07, + "loss": 0.3617, + "step": 50355 + }, + { + "epoch": 0.8701271772186895, + "grad_norm": 1.4839398687664134, + "learning_rate": 8.716918714889366e-07, + "loss": 0.4086, + "step": 50356 + }, + { + "epoch": 0.8701444567320984, + "grad_norm": 0.6685456683454967, + "learning_rate": 8.71463359623691e-07, + "loss": 0.4492, + "step": 50357 + }, + { + "epoch": 0.8701617362455073, + "grad_norm": 2.0658276693789315, + "learning_rate": 8.712348763497158e-07, + "loss": 0.2803, + "step": 50358 + }, + { + "epoch": 0.8701790157589162, + "grad_norm": 1.2262297902182762, + "learning_rate": 8.710064216677284e-07, + "loss": 0.2382, + "step": 50359 + }, + { + "epoch": 0.8701962952723251, + "grad_norm": 1.7739554147488659, + "learning_rate": 8.707779955784412e-07, + "loss": 0.5708, + "step": 50360 + }, + { + "epoch": 0.870213574785734, + "grad_norm": 0.9505538870125259, + "learning_rate": 8.705495980825729e-07, + "loss": 0.2962, + "step": 50361 + }, + { + "epoch": 0.8702308542991429, + "grad_norm": 1.2596492646813338, + "learning_rate": 8.70321229180835e-07, + "loss": 0.22, + "step": 50362 + }, + { + "epoch": 0.8702481338125518, + "grad_norm": 2.284338441078394, + "learning_rate": 8.700928888739446e-07, + "loss": 0.2437, + "step": 50363 + }, + { + "epoch": 0.8702654133259607, + "grad_norm": 1.6596522178194486, + "learning_rate": 8.69864577162618e-07, + "loss": 0.6481, + "step": 50364 + }, + { + "epoch": 0.8702826928393697, + "grad_norm": 1.6552179209867015, + "learning_rate": 8.6963629404757e-07, + "loss": 0.8296, + "step": 50365 + }, + { + "epoch": 0.8702999723527786, + "grad_norm": 2.0312490955102565, + "learning_rate": 8.694080395295157e-07, + "loss": 0.4486, + "step": 50366 + }, + { + "epoch": 0.8703172518661875, + "grad_norm": 3.620035530570542, + "learning_rate": 8.691798136091678e-07, + "loss": 0.3311, + "step": 50367 + }, + { + "epoch": 0.8703345313795964, + "grad_norm": 1.208732203031614, + "learning_rate": 8.689516162872424e-07, + "loss": 0.4952, + "step": 50368 + }, + { + "epoch": 0.8703518108930053, + "grad_norm": 1.3233677955732335, + "learning_rate": 8.687234475644535e-07, + "loss": 0.4214, + "step": 50369 + }, + { + "epoch": 0.8703690904064142, + "grad_norm": 0.9275137403685897, + "learning_rate": 8.684953074415203e-07, + "loss": 0.2942, + "step": 50370 + }, + { + "epoch": 0.8703863699198231, + "grad_norm": 2.178772438244499, + "learning_rate": 8.682671959191502e-07, + "loss": 0.431, + "step": 50371 + }, + { + "epoch": 0.870403649433232, + "grad_norm": 1.5167787233117633, + "learning_rate": 8.680391129980637e-07, + "loss": 0.588, + "step": 50372 + }, + { + "epoch": 0.8704209289466409, + "grad_norm": 1.6173040846272526, + "learning_rate": 8.678110586789712e-07, + "loss": 0.2006, + "step": 50373 + }, + { + "epoch": 0.8704382084600497, + "grad_norm": 1.3808298909662593, + "learning_rate": 8.675830329625889e-07, + "loss": 0.3975, + "step": 50374 + }, + { + "epoch": 0.8704554879734586, + "grad_norm": 1.250001147825624, + "learning_rate": 8.673550358496319e-07, + "loss": 0.3384, + "step": 50375 + }, + { + "epoch": 0.8704727674868675, + "grad_norm": 1.0061959509186371, + "learning_rate": 8.671270673408105e-07, + "loss": 0.3215, + "step": 50376 + }, + { + "epoch": 0.8704900470002764, + "grad_norm": 1.0663916390954387, + "learning_rate": 8.668991274368443e-07, + "loss": 0.4815, + "step": 50377 + }, + { + "epoch": 0.8705073265136853, + "grad_norm": 1.1945501134688676, + "learning_rate": 8.666712161384416e-07, + "loss": 0.4271, + "step": 50378 + }, + { + "epoch": 0.8705246060270942, + "grad_norm": 0.5006976670722451, + "learning_rate": 8.664433334463184e-07, + "loss": 0.6063, + "step": 50379 + }, + { + "epoch": 0.8705418855405032, + "grad_norm": 1.5020659785548878, + "learning_rate": 8.662154793611899e-07, + "loss": 0.4617, + "step": 50380 + }, + { + "epoch": 0.8705591650539121, + "grad_norm": 1.6351668053172612, + "learning_rate": 8.659876538837697e-07, + "loss": 0.27, + "step": 50381 + }, + { + "epoch": 0.870576444567321, + "grad_norm": 1.206495837444499, + "learning_rate": 8.657598570147686e-07, + "loss": 0.558, + "step": 50382 + }, + { + "epoch": 0.8705937240807299, + "grad_norm": 0.9930059039880362, + "learning_rate": 8.655320887549034e-07, + "loss": 0.3259, + "step": 50383 + }, + { + "epoch": 0.8706110035941388, + "grad_norm": 1.7434865094395036, + "learning_rate": 8.653043491048841e-07, + "loss": 0.2691, + "step": 50384 + }, + { + "epoch": 0.8706282831075477, + "grad_norm": 1.022364653318655, + "learning_rate": 8.650766380654262e-07, + "loss": 0.2388, + "step": 50385 + }, + { + "epoch": 0.8706455626209566, + "grad_norm": 2.2095720390666544, + "learning_rate": 8.648489556372442e-07, + "loss": 0.24, + "step": 50386 + }, + { + "epoch": 0.8706628421343655, + "grad_norm": 1.9797029383281757, + "learning_rate": 8.64621301821047e-07, + "loss": 0.2237, + "step": 50387 + }, + { + "epoch": 0.8706801216477744, + "grad_norm": 1.0101000243614158, + "learning_rate": 8.643936766175532e-07, + "loss": 0.3537, + "step": 50388 + }, + { + "epoch": 0.8706974011611833, + "grad_norm": 1.5351196311953115, + "learning_rate": 8.641660800274698e-07, + "loss": 0.4779, + "step": 50389 + }, + { + "epoch": 0.8707146806745922, + "grad_norm": 1.2951982302513452, + "learning_rate": 8.639385120515143e-07, + "loss": 0.4923, + "step": 50390 + }, + { + "epoch": 0.8707319601880011, + "grad_norm": 1.6334717411628026, + "learning_rate": 8.637109726903947e-07, + "loss": 0.4217, + "step": 50391 + }, + { + "epoch": 0.87074923970141, + "grad_norm": 2.2625037827262666, + "learning_rate": 8.634834619448306e-07, + "loss": 0.4324, + "step": 50392 + }, + { + "epoch": 0.870766519214819, + "grad_norm": 1.793153346659996, + "learning_rate": 8.632559798155282e-07, + "loss": 0.4034, + "step": 50393 + }, + { + "epoch": 0.8707837987282279, + "grad_norm": 1.4866822136669207, + "learning_rate": 8.630285263032045e-07, + "loss": 0.2442, + "step": 50394 + }, + { + "epoch": 0.8708010782416367, + "grad_norm": 2.0037784429898506, + "learning_rate": 8.62801101408568e-07, + "loss": 0.3995, + "step": 50395 + }, + { + "epoch": 0.8708183577550456, + "grad_norm": 1.195565232780451, + "learning_rate": 8.625737051323346e-07, + "loss": 0.6836, + "step": 50396 + }, + { + "epoch": 0.8708356372684545, + "grad_norm": 1.5124071804638177, + "learning_rate": 8.623463374752128e-07, + "loss": 0.2116, + "step": 50397 + }, + { + "epoch": 0.8708529167818634, + "grad_norm": 1.7837391708719925, + "learning_rate": 8.621189984379175e-07, + "loss": 0.2842, + "step": 50398 + }, + { + "epoch": 0.8708701962952723, + "grad_norm": 1.318795390383744, + "learning_rate": 8.618916880211603e-07, + "loss": 0.294, + "step": 50399 + }, + { + "epoch": 0.8708874758086812, + "grad_norm": 1.5595254438474293, + "learning_rate": 8.616644062256518e-07, + "loss": 0.4353, + "step": 50400 + }, + { + "epoch": 0.8709047553220901, + "grad_norm": 1.1993935637982311, + "learning_rate": 8.61437153052107e-07, + "loss": 0.2657, + "step": 50401 + }, + { + "epoch": 0.870922034835499, + "grad_norm": 1.6649329220940612, + "learning_rate": 8.612099285012331e-07, + "loss": 0.3633, + "step": 50402 + }, + { + "epoch": 0.8709393143489079, + "grad_norm": 1.5590887992554563, + "learning_rate": 8.609827325737452e-07, + "loss": 0.4442, + "step": 50403 + }, + { + "epoch": 0.8709565938623168, + "grad_norm": 1.9974851597978862, + "learning_rate": 8.607555652703525e-07, + "loss": 0.1912, + "step": 50404 + }, + { + "epoch": 0.8709738733757257, + "grad_norm": 1.826029409471893, + "learning_rate": 8.605284265917713e-07, + "loss": 0.3966, + "step": 50405 + }, + { + "epoch": 0.8709911528891346, + "grad_norm": 1.3523937885985955, + "learning_rate": 8.603013165387087e-07, + "loss": 0.2844, + "step": 50406 + }, + { + "epoch": 0.8710084324025436, + "grad_norm": 1.2283646942747035, + "learning_rate": 8.600742351118751e-07, + "loss": 0.2012, + "step": 50407 + }, + { + "epoch": 0.8710257119159525, + "grad_norm": 1.784157613058323, + "learning_rate": 8.598471823119836e-07, + "loss": 0.261, + "step": 50408 + }, + { + "epoch": 0.8710429914293614, + "grad_norm": 1.3143177134710993, + "learning_rate": 8.596201581397457e-07, + "loss": 0.2787, + "step": 50409 + }, + { + "epoch": 0.8710602709427703, + "grad_norm": 2.2305015774375563, + "learning_rate": 8.59393162595874e-07, + "loss": 0.2702, + "step": 50410 + }, + { + "epoch": 0.8710775504561792, + "grad_norm": 1.8603572910964494, + "learning_rate": 8.59166195681076e-07, + "loss": 0.6016, + "step": 50411 + }, + { + "epoch": 0.8710948299695881, + "grad_norm": 1.6795450246164387, + "learning_rate": 8.589392573960653e-07, + "loss": 0.3897, + "step": 50412 + }, + { + "epoch": 0.871112109482997, + "grad_norm": 1.3253106051114938, + "learning_rate": 8.587123477415505e-07, + "loss": 0.4385, + "step": 50413 + }, + { + "epoch": 0.8711293889964059, + "grad_norm": 1.5577174559049891, + "learning_rate": 8.584854667182418e-07, + "loss": 0.2462, + "step": 50414 + }, + { + "epoch": 0.8711466685098148, + "grad_norm": 1.4523059804470608, + "learning_rate": 8.582586143268523e-07, + "loss": 0.367, + "step": 50415 + }, + { + "epoch": 0.8711639480232236, + "grad_norm": 0.9196435384644899, + "learning_rate": 8.580317905680935e-07, + "loss": 0.4428, + "step": 50416 + }, + { + "epoch": 0.8711812275366325, + "grad_norm": 2.5140520235966646, + "learning_rate": 8.578049954426749e-07, + "loss": 0.2436, + "step": 50417 + }, + { + "epoch": 0.8711985070500414, + "grad_norm": 2.033930988589262, + "learning_rate": 8.575782289513023e-07, + "loss": 0.5771, + "step": 50418 + }, + { + "epoch": 0.8712157865634503, + "grad_norm": 1.3106096761637296, + "learning_rate": 8.573514910946901e-07, + "loss": 0.4037, + "step": 50419 + }, + { + "epoch": 0.8712330660768592, + "grad_norm": 2.463771097768498, + "learning_rate": 8.571247818735484e-07, + "loss": 0.2418, + "step": 50420 + }, + { + "epoch": 0.8712503455902681, + "grad_norm": 1.7245751135217333, + "learning_rate": 8.568981012885879e-07, + "loss": 0.3506, + "step": 50421 + }, + { + "epoch": 0.8712676251036771, + "grad_norm": 1.9826335071295222, + "learning_rate": 8.566714493405159e-07, + "loss": 0.371, + "step": 50422 + }, + { + "epoch": 0.871284904617086, + "grad_norm": 2.0375374135351434, + "learning_rate": 8.56444826030045e-07, + "loss": 0.2667, + "step": 50423 + }, + { + "epoch": 0.8713021841304949, + "grad_norm": 3.0292331416968863, + "learning_rate": 8.562182313578827e-07, + "loss": 0.2562, + "step": 50424 + }, + { + "epoch": 0.8713194636439038, + "grad_norm": 1.2985981214107802, + "learning_rate": 8.559916653247391e-07, + "loss": 0.3279, + "step": 50425 + }, + { + "epoch": 0.8713367431573127, + "grad_norm": 1.447435121732403, + "learning_rate": 8.557651279313251e-07, + "loss": 0.5578, + "step": 50426 + }, + { + "epoch": 0.8713540226707216, + "grad_norm": 1.0337625876751284, + "learning_rate": 8.555386191783511e-07, + "loss": 0.2327, + "step": 50427 + }, + { + "epoch": 0.8713713021841305, + "grad_norm": 0.70666553582156, + "learning_rate": 8.553121390665253e-07, + "loss": 0.6426, + "step": 50428 + }, + { + "epoch": 0.8713885816975394, + "grad_norm": 1.1651655415099886, + "learning_rate": 8.550856875965552e-07, + "loss": 0.3666, + "step": 50429 + }, + { + "epoch": 0.8714058612109483, + "grad_norm": 0.6294370520014736, + "learning_rate": 8.548592647691533e-07, + "loss": 0.6709, + "step": 50430 + }, + { + "epoch": 0.8714231407243572, + "grad_norm": 1.047679854659951, + "learning_rate": 8.546328705850238e-07, + "loss": 0.369, + "step": 50431 + }, + { + "epoch": 0.8714404202377661, + "grad_norm": 1.32328669000445, + "learning_rate": 8.544065050448824e-07, + "loss": 0.3, + "step": 50432 + }, + { + "epoch": 0.871457699751175, + "grad_norm": 1.3146439333714428, + "learning_rate": 8.541801681494333e-07, + "loss": 0.4367, + "step": 50433 + }, + { + "epoch": 0.871474979264584, + "grad_norm": 1.3102327632077662, + "learning_rate": 8.53953859899388e-07, + "loss": 0.3423, + "step": 50434 + }, + { + "epoch": 0.8714922587779929, + "grad_norm": 0.8017510204633557, + "learning_rate": 8.537275802954536e-07, + "loss": 0.5267, + "step": 50435 + }, + { + "epoch": 0.8715095382914018, + "grad_norm": 1.2244048698483254, + "learning_rate": 8.53501329338341e-07, + "loss": 0.1842, + "step": 50436 + }, + { + "epoch": 0.8715268178048107, + "grad_norm": 1.1748544883724519, + "learning_rate": 8.532751070287559e-07, + "loss": 0.3169, + "step": 50437 + }, + { + "epoch": 0.8715440973182195, + "grad_norm": 0.6764724175609255, + "learning_rate": 8.530489133674069e-07, + "loss": 0.151, + "step": 50438 + }, + { + "epoch": 0.8715613768316284, + "grad_norm": 1.1855902980608781, + "learning_rate": 8.528227483550067e-07, + "loss": 0.4428, + "step": 50439 + }, + { + "epoch": 0.8715786563450373, + "grad_norm": 1.2959307899016845, + "learning_rate": 8.525966119922591e-07, + "loss": 0.5232, + "step": 50440 + }, + { + "epoch": 0.8715959358584462, + "grad_norm": 1.6862689857685846, + "learning_rate": 8.523705042798758e-07, + "loss": 0.3496, + "step": 50441 + }, + { + "epoch": 0.8716132153718551, + "grad_norm": 1.132344818562091, + "learning_rate": 8.521444252185618e-07, + "loss": 0.4195, + "step": 50442 + }, + { + "epoch": 0.871630494885264, + "grad_norm": 1.3777634670268881, + "learning_rate": 8.519183748090265e-07, + "loss": 0.3329, + "step": 50443 + }, + { + "epoch": 0.8716477743986729, + "grad_norm": 0.9816630804246368, + "learning_rate": 8.516923530519772e-07, + "loss": 0.2619, + "step": 50444 + }, + { + "epoch": 0.8716650539120818, + "grad_norm": 1.3356307831153664, + "learning_rate": 8.514663599481254e-07, + "loss": 0.328, + "step": 50445 + }, + { + "epoch": 0.8716823334254907, + "grad_norm": 1.0850810135183555, + "learning_rate": 8.512403954981741e-07, + "loss": 0.2527, + "step": 50446 + }, + { + "epoch": 0.8716996129388996, + "grad_norm": 1.262558619855613, + "learning_rate": 8.510144597028347e-07, + "loss": 0.2571, + "step": 50447 + }, + { + "epoch": 0.8717168924523085, + "grad_norm": 1.282519905917173, + "learning_rate": 8.507885525628124e-07, + "loss": 0.2864, + "step": 50448 + }, + { + "epoch": 0.8717341719657175, + "grad_norm": 1.8757203585839444, + "learning_rate": 8.505626740788142e-07, + "loss": 0.5037, + "step": 50449 + }, + { + "epoch": 0.8717514514791264, + "grad_norm": 1.4384344159837505, + "learning_rate": 8.503368242515519e-07, + "loss": 0.2504, + "step": 50450 + }, + { + "epoch": 0.8717687309925353, + "grad_norm": 1.7311887152578784, + "learning_rate": 8.501110030817283e-07, + "loss": 0.2692, + "step": 50451 + }, + { + "epoch": 0.8717860105059442, + "grad_norm": 1.824857154305513, + "learning_rate": 8.49885210570054e-07, + "loss": 0.3982, + "step": 50452 + }, + { + "epoch": 0.8718032900193531, + "grad_norm": 1.2159335758731724, + "learning_rate": 8.496594467172325e-07, + "loss": 0.4752, + "step": 50453 + }, + { + "epoch": 0.871820569532762, + "grad_norm": 0.9300144748405834, + "learning_rate": 8.494337115239737e-07, + "loss": 0.3046, + "step": 50454 + }, + { + "epoch": 0.8718378490461709, + "grad_norm": 1.1092351442385897, + "learning_rate": 8.492080049909834e-07, + "loss": 0.5042, + "step": 50455 + }, + { + "epoch": 0.8718551285595798, + "grad_norm": 0.7526554371825719, + "learning_rate": 8.489823271189712e-07, + "loss": 0.1986, + "step": 50456 + }, + { + "epoch": 0.8718724080729887, + "grad_norm": 2.136699884227654, + "learning_rate": 8.487566779086398e-07, + "loss": 0.4855, + "step": 50457 + }, + { + "epoch": 0.8718896875863976, + "grad_norm": 0.9216680619203423, + "learning_rate": 8.485310573606998e-07, + "loss": 0.3355, + "step": 50458 + }, + { + "epoch": 0.8719069670998064, + "grad_norm": 1.334650567473311, + "learning_rate": 8.483054654758538e-07, + "loss": 0.2679, + "step": 50459 + }, + { + "epoch": 0.8719242466132153, + "grad_norm": 1.0847093730948185, + "learning_rate": 8.480799022548114e-07, + "loss": 0.3068, + "step": 50460 + }, + { + "epoch": 0.8719415261266242, + "grad_norm": 1.134434342365675, + "learning_rate": 8.478543676982786e-07, + "loss": 0.3722, + "step": 50461 + }, + { + "epoch": 0.8719588056400331, + "grad_norm": 1.0909388779964537, + "learning_rate": 8.476288618069606e-07, + "loss": 0.1996, + "step": 50462 + }, + { + "epoch": 0.871976085153442, + "grad_norm": 1.7040007104063706, + "learning_rate": 8.474033845815666e-07, + "loss": 0.3423, + "step": 50463 + }, + { + "epoch": 0.871993364666851, + "grad_norm": 1.0715792657432333, + "learning_rate": 8.471779360227994e-07, + "loss": 0.8566, + "step": 50464 + }, + { + "epoch": 0.8720106441802599, + "grad_norm": 1.267935957213839, + "learning_rate": 8.469525161313652e-07, + "loss": 0.3978, + "step": 50465 + }, + { + "epoch": 0.8720279236936688, + "grad_norm": 1.1976748775918205, + "learning_rate": 8.467271249079722e-07, + "loss": 0.3457, + "step": 50466 + }, + { + "epoch": 0.8720452032070777, + "grad_norm": 0.6935255172582693, + "learning_rate": 8.465017623533267e-07, + "loss": 0.7402, + "step": 50467 + }, + { + "epoch": 0.8720624827204866, + "grad_norm": 1.2710239170630682, + "learning_rate": 8.462764284681347e-07, + "loss": 0.2521, + "step": 50468 + }, + { + "epoch": 0.8720797622338955, + "grad_norm": 1.6031152181757315, + "learning_rate": 8.460511232530977e-07, + "loss": 0.3366, + "step": 50469 + }, + { + "epoch": 0.8720970417473044, + "grad_norm": 1.2428071865105392, + "learning_rate": 8.458258467089254e-07, + "loss": 0.1292, + "step": 50470 + }, + { + "epoch": 0.8721143212607133, + "grad_norm": 3.4769224695300838, + "learning_rate": 8.456005988363192e-07, + "loss": 0.486, + "step": 50471 + }, + { + "epoch": 0.8721316007741222, + "grad_norm": 1.6559939364813545, + "learning_rate": 8.453753796359921e-07, + "loss": 0.3562, + "step": 50472 + }, + { + "epoch": 0.8721488802875311, + "grad_norm": 0.5732486107237915, + "learning_rate": 8.451501891086423e-07, + "loss": 0.1397, + "step": 50473 + }, + { + "epoch": 0.87216615980094, + "grad_norm": 2.0079559226255794, + "learning_rate": 8.449250272549803e-07, + "loss": 0.3749, + "step": 50474 + }, + { + "epoch": 0.8721834393143489, + "grad_norm": 1.342094143290074, + "learning_rate": 8.446998940757067e-07, + "loss": 0.2776, + "step": 50475 + }, + { + "epoch": 0.8722007188277578, + "grad_norm": 1.2118048440334863, + "learning_rate": 8.444747895715299e-07, + "loss": 0.4399, + "step": 50476 + }, + { + "epoch": 0.8722179983411668, + "grad_norm": 0.6536218119005146, + "learning_rate": 8.442497137431526e-07, + "loss": 0.8771, + "step": 50477 + }, + { + "epoch": 0.8722352778545757, + "grad_norm": 1.072977707204832, + "learning_rate": 8.44024666591281e-07, + "loss": 0.3392, + "step": 50478 + }, + { + "epoch": 0.8722525573679846, + "grad_norm": 2.032565323632517, + "learning_rate": 8.437996481166222e-07, + "loss": 0.2493, + "step": 50479 + }, + { + "epoch": 0.8722698368813934, + "grad_norm": 1.3439302966256648, + "learning_rate": 8.435746583198767e-07, + "loss": 0.3651, + "step": 50480 + }, + { + "epoch": 0.8722871163948023, + "grad_norm": 0.9839227661959566, + "learning_rate": 8.43349697201753e-07, + "loss": 0.4602, + "step": 50481 + }, + { + "epoch": 0.8723043959082112, + "grad_norm": 2.3145300887709768, + "learning_rate": 8.431247647629526e-07, + "loss": 0.4782, + "step": 50482 + }, + { + "epoch": 0.8723216754216201, + "grad_norm": 2.540771412936575, + "learning_rate": 8.428998610041806e-07, + "loss": 0.2686, + "step": 50483 + }, + { + "epoch": 0.872338954935029, + "grad_norm": 1.2287649280589725, + "learning_rate": 8.426749859261418e-07, + "loss": 0.4308, + "step": 50484 + }, + { + "epoch": 0.8723562344484379, + "grad_norm": 2.1885474901655604, + "learning_rate": 8.424501395295437e-07, + "loss": 0.3079, + "step": 50485 + }, + { + "epoch": 0.8723735139618468, + "grad_norm": 1.4204023142721869, + "learning_rate": 8.422253218150855e-07, + "loss": 0.32, + "step": 50486 + }, + { + "epoch": 0.8723907934752557, + "grad_norm": 1.2487252999792045, + "learning_rate": 8.420005327834757e-07, + "loss": 0.1911, + "step": 50487 + }, + { + "epoch": 0.8724080729886646, + "grad_norm": 0.8114997580724478, + "learning_rate": 8.417757724354136e-07, + "loss": 0.7336, + "step": 50488 + }, + { + "epoch": 0.8724253525020735, + "grad_norm": 1.7326526219124452, + "learning_rate": 8.415510407716065e-07, + "loss": 0.438, + "step": 50489 + }, + { + "epoch": 0.8724426320154824, + "grad_norm": 1.2011333245149405, + "learning_rate": 8.413263377927594e-07, + "loss": 0.5155, + "step": 50490 + }, + { + "epoch": 0.8724599115288914, + "grad_norm": 1.0012114658164364, + "learning_rate": 8.411016634995716e-07, + "loss": 0.2754, + "step": 50491 + }, + { + "epoch": 0.8724771910423003, + "grad_norm": 1.455004125632023, + "learning_rate": 8.408770178927517e-07, + "loss": 0.3399, + "step": 50492 + }, + { + "epoch": 0.8724944705557092, + "grad_norm": 0.7603111407289767, + "learning_rate": 8.40652400973e-07, + "loss": 0.3068, + "step": 50493 + }, + { + "epoch": 0.8725117500691181, + "grad_norm": 0.9745884919174737, + "learning_rate": 8.404278127410203e-07, + "loss": 0.3468, + "step": 50494 + }, + { + "epoch": 0.872529029582527, + "grad_norm": 1.4660319565016409, + "learning_rate": 8.402032531975168e-07, + "loss": 0.4594, + "step": 50495 + }, + { + "epoch": 0.8725463090959359, + "grad_norm": 2.29398172048699, + "learning_rate": 8.399787223431944e-07, + "loss": 0.3827, + "step": 50496 + }, + { + "epoch": 0.8725635886093448, + "grad_norm": 2.3922914415172474, + "learning_rate": 8.397542201787534e-07, + "loss": 0.3971, + "step": 50497 + }, + { + "epoch": 0.8725808681227537, + "grad_norm": 1.2818920094380892, + "learning_rate": 8.395297467049002e-07, + "loss": 0.2563, + "step": 50498 + }, + { + "epoch": 0.8725981476361626, + "grad_norm": 1.0959099006162287, + "learning_rate": 8.393053019223341e-07, + "loss": 0.2847, + "step": 50499 + }, + { + "epoch": 0.8726154271495715, + "grad_norm": 1.7100345241973536, + "learning_rate": 8.3908088583176e-07, + "loss": 0.3571, + "step": 50500 + }, + { + "epoch": 0.8726327066629803, + "grad_norm": 1.1933891253626807, + "learning_rate": 8.38856498433882e-07, + "loss": 0.2936, + "step": 50501 + }, + { + "epoch": 0.8726499861763892, + "grad_norm": 1.2264433383112243, + "learning_rate": 8.386321397294006e-07, + "loss": 0.3176, + "step": 50502 + }, + { + "epoch": 0.8726672656897981, + "grad_norm": 1.7963682013852702, + "learning_rate": 8.384078097190207e-07, + "loss": 0.2073, + "step": 50503 + }, + { + "epoch": 0.872684545203207, + "grad_norm": 0.9613261301440896, + "learning_rate": 8.381835084034428e-07, + "loss": 0.253, + "step": 50504 + }, + { + "epoch": 0.872701824716616, + "grad_norm": 1.9489239679473913, + "learning_rate": 8.3795923578337e-07, + "loss": 0.1843, + "step": 50505 + }, + { + "epoch": 0.8727191042300249, + "grad_norm": 1.2638214799282357, + "learning_rate": 8.377349918595056e-07, + "loss": 0.5795, + "step": 50506 + }, + { + "epoch": 0.8727363837434338, + "grad_norm": 0.7848938386669969, + "learning_rate": 8.37510776632553e-07, + "loss": 0.2618, + "step": 50507 + }, + { + "epoch": 0.8727536632568427, + "grad_norm": 0.9547130434397894, + "learning_rate": 8.372865901032112e-07, + "loss": 0.2976, + "step": 50508 + }, + { + "epoch": 0.8727709427702516, + "grad_norm": 0.7223940582078942, + "learning_rate": 8.370624322721854e-07, + "loss": 0.7401, + "step": 50509 + }, + { + "epoch": 0.8727882222836605, + "grad_norm": 2.597428755207473, + "learning_rate": 8.368383031401772e-07, + "loss": 0.2358, + "step": 50510 + }, + { + "epoch": 0.8728055017970694, + "grad_norm": 0.9612088732032898, + "learning_rate": 8.366142027078839e-07, + "loss": 0.229, + "step": 50511 + }, + { + "epoch": 0.8728227813104783, + "grad_norm": 1.0671679525087279, + "learning_rate": 8.363901309760147e-07, + "loss": 0.1535, + "step": 50512 + }, + { + "epoch": 0.8728400608238872, + "grad_norm": 1.4832013063649592, + "learning_rate": 8.36166087945266e-07, + "loss": 0.371, + "step": 50513 + }, + { + "epoch": 0.8728573403372961, + "grad_norm": 1.8904284307673578, + "learning_rate": 8.359420736163437e-07, + "loss": 0.3997, + "step": 50514 + }, + { + "epoch": 0.872874619850705, + "grad_norm": 1.3438408560343804, + "learning_rate": 8.357180879899462e-07, + "loss": 0.5626, + "step": 50515 + }, + { + "epoch": 0.8728918993641139, + "grad_norm": 1.5911166872799984, + "learning_rate": 8.354941310667763e-07, + "loss": 0.3255, + "step": 50516 + }, + { + "epoch": 0.8729091788775228, + "grad_norm": 1.2264194285941714, + "learning_rate": 8.352702028475346e-07, + "loss": 0.3125, + "step": 50517 + }, + { + "epoch": 0.8729264583909317, + "grad_norm": 1.3785966964066454, + "learning_rate": 8.350463033329236e-07, + "loss": 0.3641, + "step": 50518 + }, + { + "epoch": 0.8729437379043407, + "grad_norm": 1.1029147444800302, + "learning_rate": 8.348224325236432e-07, + "loss": 0.3609, + "step": 50519 + }, + { + "epoch": 0.8729610174177496, + "grad_norm": 1.3122139015885579, + "learning_rate": 8.345985904203968e-07, + "loss": 0.3438, + "step": 50520 + }, + { + "epoch": 0.8729782969311585, + "grad_norm": 0.8755532057170836, + "learning_rate": 8.343747770238853e-07, + "loss": 0.767, + "step": 50521 + }, + { + "epoch": 0.8729955764445673, + "grad_norm": 0.8242173775447987, + "learning_rate": 8.341509923348056e-07, + "loss": 0.3772, + "step": 50522 + }, + { + "epoch": 0.8730128559579762, + "grad_norm": 0.4888036257492108, + "learning_rate": 8.33927236353863e-07, + "loss": 0.5011, + "step": 50523 + }, + { + "epoch": 0.8730301354713851, + "grad_norm": 0.920258342743705, + "learning_rate": 8.337035090817558e-07, + "loss": 0.1833, + "step": 50524 + }, + { + "epoch": 0.873047414984794, + "grad_norm": 2.431725612732268, + "learning_rate": 8.334798105191876e-07, + "loss": 0.4823, + "step": 50525 + }, + { + "epoch": 0.8730646944982029, + "grad_norm": 1.5155571454950139, + "learning_rate": 8.33256140666856e-07, + "loss": 0.3814, + "step": 50526 + }, + { + "epoch": 0.8730819740116118, + "grad_norm": 1.1892539451243775, + "learning_rate": 8.330324995254646e-07, + "loss": 0.2749, + "step": 50527 + }, + { + "epoch": 0.8730992535250207, + "grad_norm": 1.135839212513083, + "learning_rate": 8.328088870957107e-07, + "loss": 0.292, + "step": 50528 + }, + { + "epoch": 0.8731165330384296, + "grad_norm": 1.3218304480107146, + "learning_rate": 8.325853033782949e-07, + "loss": 0.5065, + "step": 50529 + }, + { + "epoch": 0.8731338125518385, + "grad_norm": 1.2852458259305817, + "learning_rate": 8.32361748373921e-07, + "loss": 0.2975, + "step": 50530 + }, + { + "epoch": 0.8731510920652474, + "grad_norm": 1.6037498679044497, + "learning_rate": 8.321382220832853e-07, + "loss": 0.4597, + "step": 50531 + }, + { + "epoch": 0.8731683715786563, + "grad_norm": 1.7066782012269988, + "learning_rate": 8.319147245070913e-07, + "loss": 0.5521, + "step": 50532 + }, + { + "epoch": 0.8731856510920653, + "grad_norm": 1.4283344990633513, + "learning_rate": 8.316912556460344e-07, + "loss": 0.3069, + "step": 50533 + }, + { + "epoch": 0.8732029306054742, + "grad_norm": 1.4383796402324696, + "learning_rate": 8.314678155008182e-07, + "loss": 0.3464, + "step": 50534 + }, + { + "epoch": 0.8732202101188831, + "grad_norm": 0.8480423811394028, + "learning_rate": 8.312444040721413e-07, + "loss": 0.5077, + "step": 50535 + }, + { + "epoch": 0.873237489632292, + "grad_norm": 1.4948560773274169, + "learning_rate": 8.31021021360705e-07, + "loss": 0.3746, + "step": 50536 + }, + { + "epoch": 0.8732547691457009, + "grad_norm": 2.134714599674406, + "learning_rate": 8.307976673672058e-07, + "loss": 0.2308, + "step": 50537 + }, + { + "epoch": 0.8732720486591098, + "grad_norm": 1.4878258162584916, + "learning_rate": 8.305743420923484e-07, + "loss": 0.2244, + "step": 50538 + }, + { + "epoch": 0.8732893281725187, + "grad_norm": 2.0210535365213276, + "learning_rate": 8.303510455368258e-07, + "loss": 0.3806, + "step": 50539 + }, + { + "epoch": 0.8733066076859276, + "grad_norm": 1.6901565557697653, + "learning_rate": 8.301277777013406e-07, + "loss": 0.3718, + "step": 50540 + }, + { + "epoch": 0.8733238871993365, + "grad_norm": 1.1393367425481318, + "learning_rate": 8.299045385865945e-07, + "loss": 0.2735, + "step": 50541 + }, + { + "epoch": 0.8733411667127454, + "grad_norm": 2.1472632122863384, + "learning_rate": 8.296813281932825e-07, + "loss": 0.3796, + "step": 50542 + }, + { + "epoch": 0.8733584462261542, + "grad_norm": 2.030682952668887, + "learning_rate": 8.294581465221075e-07, + "loss": 0.3251, + "step": 50543 + }, + { + "epoch": 0.8733757257395631, + "grad_norm": 1.7485282271793428, + "learning_rate": 8.292349935737643e-07, + "loss": 0.8628, + "step": 50544 + }, + { + "epoch": 0.873393005252972, + "grad_norm": 1.1626288757165133, + "learning_rate": 8.290118693489535e-07, + "loss": 0.3324, + "step": 50545 + }, + { + "epoch": 0.8734102847663809, + "grad_norm": 1.2668857600115406, + "learning_rate": 8.287887738483758e-07, + "loss": 0.2444, + "step": 50546 + }, + { + "epoch": 0.8734275642797898, + "grad_norm": 1.5406965684948322, + "learning_rate": 8.285657070727293e-07, + "loss": 0.413, + "step": 50547 + }, + { + "epoch": 0.8734448437931988, + "grad_norm": 1.4476751591324164, + "learning_rate": 8.283426690227103e-07, + "loss": 0.3507, + "step": 50548 + }, + { + "epoch": 0.8734621233066077, + "grad_norm": 1.1765756663947475, + "learning_rate": 8.281196596990216e-07, + "loss": 0.2883, + "step": 50549 + }, + { + "epoch": 0.8734794028200166, + "grad_norm": 0.8287172779783283, + "learning_rate": 8.278966791023568e-07, + "loss": 0.3643, + "step": 50550 + }, + { + "epoch": 0.8734966823334255, + "grad_norm": 0.46340102656211585, + "learning_rate": 8.276737272334168e-07, + "loss": 0.5143, + "step": 50551 + }, + { + "epoch": 0.8735139618468344, + "grad_norm": 1.2381955900780832, + "learning_rate": 8.274508040929019e-07, + "loss": 0.4067, + "step": 50552 + }, + { + "epoch": 0.8735312413602433, + "grad_norm": 0.895553034048966, + "learning_rate": 8.272279096815061e-07, + "loss": 0.3082, + "step": 50553 + }, + { + "epoch": 0.8735485208736522, + "grad_norm": 2.178474324759226, + "learning_rate": 8.270050439999311e-07, + "loss": 0.3775, + "step": 50554 + }, + { + "epoch": 0.8735658003870611, + "grad_norm": 1.0615834686777033, + "learning_rate": 8.267822070488718e-07, + "loss": 0.4216, + "step": 50555 + }, + { + "epoch": 0.87358307990047, + "grad_norm": 1.0849950574801535, + "learning_rate": 8.2655939882903e-07, + "loss": 0.3662, + "step": 50556 + }, + { + "epoch": 0.8736003594138789, + "grad_norm": 0.6695465528803966, + "learning_rate": 8.263366193410993e-07, + "loss": 0.7069, + "step": 50557 + }, + { + "epoch": 0.8736176389272878, + "grad_norm": 1.551969743111535, + "learning_rate": 8.261138685857794e-07, + "loss": 0.3295, + "step": 50558 + }, + { + "epoch": 0.8736349184406967, + "grad_norm": 1.0207878398519163, + "learning_rate": 8.258911465637675e-07, + "loss": 0.4233, + "step": 50559 + }, + { + "epoch": 0.8736521979541056, + "grad_norm": 2.2123727222919296, + "learning_rate": 8.256684532757642e-07, + "loss": 0.3111, + "step": 50560 + }, + { + "epoch": 0.8736694774675146, + "grad_norm": 1.458845230341506, + "learning_rate": 8.254457887224643e-07, + "loss": 0.5145, + "step": 50561 + }, + { + "epoch": 0.8736867569809235, + "grad_norm": 2.543029315846976, + "learning_rate": 8.25223152904564e-07, + "loss": 0.1705, + "step": 50562 + }, + { + "epoch": 0.8737040364943324, + "grad_norm": 1.7617695017281707, + "learning_rate": 8.250005458227616e-07, + "loss": 0.6625, + "step": 50563 + }, + { + "epoch": 0.8737213160077412, + "grad_norm": 1.4493012870976183, + "learning_rate": 8.247779674777545e-07, + "loss": 0.2378, + "step": 50564 + }, + { + "epoch": 0.8737385955211501, + "grad_norm": 1.342157484143396, + "learning_rate": 8.245554178702431e-07, + "loss": 0.6964, + "step": 50565 + }, + { + "epoch": 0.873755875034559, + "grad_norm": 1.0936296989616812, + "learning_rate": 8.24332897000919e-07, + "loss": 0.3854, + "step": 50566 + }, + { + "epoch": 0.8737731545479679, + "grad_norm": 1.3257899770595143, + "learning_rate": 8.241104048704829e-07, + "loss": 0.3158, + "step": 50567 + }, + { + "epoch": 0.8737904340613768, + "grad_norm": 1.222098714979992, + "learning_rate": 8.238879414796285e-07, + "loss": 0.3787, + "step": 50568 + }, + { + "epoch": 0.8738077135747857, + "grad_norm": 1.691312237209034, + "learning_rate": 8.236655068290556e-07, + "loss": 0.5764, + "step": 50569 + }, + { + "epoch": 0.8738249930881946, + "grad_norm": 1.052602434459569, + "learning_rate": 8.234431009194588e-07, + "loss": 0.3112, + "step": 50570 + }, + { + "epoch": 0.8738422726016035, + "grad_norm": 0.8230795241020292, + "learning_rate": 8.232207237515377e-07, + "loss": 0.224, + "step": 50571 + }, + { + "epoch": 0.8738595521150124, + "grad_norm": 1.5321918029751023, + "learning_rate": 8.229983753259863e-07, + "loss": 0.3678, + "step": 50572 + }, + { + "epoch": 0.8738768316284213, + "grad_norm": 1.8242034153655857, + "learning_rate": 8.227760556434993e-07, + "loss": 0.3542, + "step": 50573 + }, + { + "epoch": 0.8738941111418302, + "grad_norm": 1.3189751440609525, + "learning_rate": 8.225537647047766e-07, + "loss": 0.6853, + "step": 50574 + }, + { + "epoch": 0.8739113906552392, + "grad_norm": 1.7265952161390605, + "learning_rate": 8.223315025105116e-07, + "loss": 0.6516, + "step": 50575 + }, + { + "epoch": 0.8739286701686481, + "grad_norm": 0.961886509634634, + "learning_rate": 8.221092690614041e-07, + "loss": 0.1179, + "step": 50576 + }, + { + "epoch": 0.873945949682057, + "grad_norm": 0.8057983750171217, + "learning_rate": 8.218870643581455e-07, + "loss": 0.4904, + "step": 50577 + }, + { + "epoch": 0.8739632291954659, + "grad_norm": 0.9396712456606535, + "learning_rate": 8.216648884014367e-07, + "loss": 0.2152, + "step": 50578 + }, + { + "epoch": 0.8739805087088748, + "grad_norm": 1.4217635529884727, + "learning_rate": 8.21442741191969e-07, + "loss": 0.3486, + "step": 50579 + }, + { + "epoch": 0.8739977882222837, + "grad_norm": 2.175020018447777, + "learning_rate": 8.212206227304398e-07, + "loss": 0.2355, + "step": 50580 + }, + { + "epoch": 0.8740150677356926, + "grad_norm": 1.2298527656528644, + "learning_rate": 8.209985330175474e-07, + "loss": 0.3585, + "step": 50581 + }, + { + "epoch": 0.8740323472491015, + "grad_norm": 1.272132928568133, + "learning_rate": 8.207764720539835e-07, + "loss": 0.38, + "step": 50582 + }, + { + "epoch": 0.8740496267625104, + "grad_norm": 1.2182011154821333, + "learning_rate": 8.205544398404475e-07, + "loss": 0.1803, + "step": 50583 + }, + { + "epoch": 0.8740669062759193, + "grad_norm": 1.1785702857329077, + "learning_rate": 8.2033243637763e-07, + "loss": 0.3839, + "step": 50584 + }, + { + "epoch": 0.8740841857893282, + "grad_norm": 1.2598196801198933, + "learning_rate": 8.201104616662292e-07, + "loss": 0.2884, + "step": 50585 + }, + { + "epoch": 0.874101465302737, + "grad_norm": 0.8367176848559297, + "learning_rate": 8.198885157069391e-07, + "loss": 0.7356, + "step": 50586 + }, + { + "epoch": 0.8741187448161459, + "grad_norm": 1.0738885945386383, + "learning_rate": 8.196665985004593e-07, + "loss": 0.2319, + "step": 50587 + }, + { + "epoch": 0.8741360243295548, + "grad_norm": 1.2773478687644315, + "learning_rate": 8.19444710047479e-07, + "loss": 0.3246, + "step": 50588 + }, + { + "epoch": 0.8741533038429637, + "grad_norm": 1.450530304606364, + "learning_rate": 8.192228503486965e-07, + "loss": 0.297, + "step": 50589 + }, + { + "epoch": 0.8741705833563727, + "grad_norm": 1.4441949088067618, + "learning_rate": 8.190010194048048e-07, + "loss": 0.4467, + "step": 50590 + }, + { + "epoch": 0.8741878628697816, + "grad_norm": 1.3343705568245237, + "learning_rate": 8.187792172164999e-07, + "loss": 0.3563, + "step": 50591 + }, + { + "epoch": 0.8742051423831905, + "grad_norm": 1.111742979580945, + "learning_rate": 8.185574437844768e-07, + "loss": 0.3881, + "step": 50592 + }, + { + "epoch": 0.8742224218965994, + "grad_norm": 2.1109281764363357, + "learning_rate": 8.183356991094293e-07, + "loss": 0.4239, + "step": 50593 + }, + { + "epoch": 0.8742397014100083, + "grad_norm": 1.82567918681099, + "learning_rate": 8.181139831920526e-07, + "loss": 0.2588, + "step": 50594 + }, + { + "epoch": 0.8742569809234172, + "grad_norm": 2.3976391224114324, + "learning_rate": 8.178922960330393e-07, + "loss": 0.4397, + "step": 50595 + }, + { + "epoch": 0.8742742604368261, + "grad_norm": 1.496792739029915, + "learning_rate": 8.176706376330878e-07, + "loss": 0.3561, + "step": 50596 + }, + { + "epoch": 0.874291539950235, + "grad_norm": 1.52138340613062, + "learning_rate": 8.174490079928876e-07, + "loss": 0.3197, + "step": 50597 + }, + { + "epoch": 0.8743088194636439, + "grad_norm": 0.9233638173257128, + "learning_rate": 8.172274071131348e-07, + "loss": 0.3143, + "step": 50598 + }, + { + "epoch": 0.8743260989770528, + "grad_norm": 1.4084992223930726, + "learning_rate": 8.170058349945232e-07, + "loss": 0.3748, + "step": 50599 + }, + { + "epoch": 0.8743433784904617, + "grad_norm": 1.275422487552022, + "learning_rate": 8.167842916377488e-07, + "loss": 0.1782, + "step": 50600 + }, + { + "epoch": 0.8743606580038706, + "grad_norm": 0.6911291225477363, + "learning_rate": 8.165627770435025e-07, + "loss": 0.1908, + "step": 50601 + }, + { + "epoch": 0.8743779375172795, + "grad_norm": 0.8803210407565943, + "learning_rate": 8.163412912124813e-07, + "loss": 0.3612, + "step": 50602 + }, + { + "epoch": 0.8743952170306885, + "grad_norm": 1.0958595849659993, + "learning_rate": 8.161198341453758e-07, + "loss": 0.2694, + "step": 50603 + }, + { + "epoch": 0.8744124965440974, + "grad_norm": 1.4739561096339886, + "learning_rate": 8.158984058428799e-07, + "loss": 0.346, + "step": 50604 + }, + { + "epoch": 0.8744297760575063, + "grad_norm": 0.8793214568357892, + "learning_rate": 8.156770063056907e-07, + "loss": 0.4071, + "step": 50605 + }, + { + "epoch": 0.8744470555709152, + "grad_norm": 0.8655037112025121, + "learning_rate": 8.154556355344978e-07, + "loss": 0.3386, + "step": 50606 + }, + { + "epoch": 0.874464335084324, + "grad_norm": 1.3551240706235272, + "learning_rate": 8.152342935299962e-07, + "loss": 0.2777, + "step": 50607 + }, + { + "epoch": 0.8744816145977329, + "grad_norm": 1.2414115731532114, + "learning_rate": 8.150129802928786e-07, + "loss": 0.4181, + "step": 50608 + }, + { + "epoch": 0.8744988941111418, + "grad_norm": 1.1458532544977016, + "learning_rate": 8.147916958238378e-07, + "loss": 0.3034, + "step": 50609 + }, + { + "epoch": 0.8745161736245507, + "grad_norm": 0.714777431513333, + "learning_rate": 8.145704401235666e-07, + "loss": 0.726, + "step": 50610 + }, + { + "epoch": 0.8745334531379596, + "grad_norm": 1.1047918057016417, + "learning_rate": 8.143492131927611e-07, + "loss": 0.3828, + "step": 50611 + }, + { + "epoch": 0.8745507326513685, + "grad_norm": 2.201791671227878, + "learning_rate": 8.141280150321129e-07, + "loss": 0.3006, + "step": 50612 + }, + { + "epoch": 0.8745680121647774, + "grad_norm": 0.8784917115088744, + "learning_rate": 8.139068456423105e-07, + "loss": 0.3006, + "step": 50613 + }, + { + "epoch": 0.8745852916781863, + "grad_norm": 2.2314361653524264, + "learning_rate": 8.136857050240509e-07, + "loss": 0.251, + "step": 50614 + }, + { + "epoch": 0.8746025711915952, + "grad_norm": 0.9943892076144425, + "learning_rate": 8.13464593178026e-07, + "loss": 0.369, + "step": 50615 + }, + { + "epoch": 0.8746198507050041, + "grad_norm": 1.3851832347811333, + "learning_rate": 8.132435101049297e-07, + "loss": 0.3324, + "step": 50616 + }, + { + "epoch": 0.874637130218413, + "grad_norm": 1.361780717838008, + "learning_rate": 8.130224558054512e-07, + "loss": 0.446, + "step": 50617 + }, + { + "epoch": 0.874654409731822, + "grad_norm": 1.4984358941574705, + "learning_rate": 8.128014302802856e-07, + "loss": 0.2574, + "step": 50618 + }, + { + "epoch": 0.8746716892452309, + "grad_norm": 1.7607711567577096, + "learning_rate": 8.125804335301235e-07, + "loss": 0.4645, + "step": 50619 + }, + { + "epoch": 0.8746889687586398, + "grad_norm": 1.0842111357925208, + "learning_rate": 8.123594655556565e-07, + "loss": 0.1274, + "step": 50620 + }, + { + "epoch": 0.8747062482720487, + "grad_norm": 1.426656857565284, + "learning_rate": 8.121385263575787e-07, + "loss": 0.3953, + "step": 50621 + }, + { + "epoch": 0.8747235277854576, + "grad_norm": 1.7261045848074599, + "learning_rate": 8.119176159365827e-07, + "loss": 0.3252, + "step": 50622 + }, + { + "epoch": 0.8747408072988665, + "grad_norm": 1.3447623097990051, + "learning_rate": 8.116967342933591e-07, + "loss": 0.3149, + "step": 50623 + }, + { + "epoch": 0.8747580868122754, + "grad_norm": 1.0577367196988534, + "learning_rate": 8.114758814285983e-07, + "loss": 0.2245, + "step": 50624 + }, + { + "epoch": 0.8747753663256843, + "grad_norm": 0.6817092756694966, + "learning_rate": 8.112550573429934e-07, + "loss": 0.571, + "step": 50625 + }, + { + "epoch": 0.8747926458390932, + "grad_norm": 1.374725201913599, + "learning_rate": 8.110342620372346e-07, + "loss": 0.3725, + "step": 50626 + }, + { + "epoch": 0.8748099253525021, + "grad_norm": 0.9432991412427159, + "learning_rate": 8.108134955120184e-07, + "loss": 0.3372, + "step": 50627 + }, + { + "epoch": 0.8748272048659109, + "grad_norm": 1.0754515706307546, + "learning_rate": 8.105927577680294e-07, + "loss": 0.3901, + "step": 50628 + }, + { + "epoch": 0.8748444843793198, + "grad_norm": 1.6824125782305392, + "learning_rate": 8.103720488059652e-07, + "loss": 0.4511, + "step": 50629 + }, + { + "epoch": 0.8748617638927287, + "grad_norm": 1.5892073676680185, + "learning_rate": 8.101513686265117e-07, + "loss": 0.5174, + "step": 50630 + }, + { + "epoch": 0.8748790434061376, + "grad_norm": 0.9043492945931599, + "learning_rate": 8.099307172303628e-07, + "loss": 0.3678, + "step": 50631 + }, + { + "epoch": 0.8748963229195466, + "grad_norm": 1.3428369955536945, + "learning_rate": 8.09710094618209e-07, + "loss": 0.3213, + "step": 50632 + }, + { + "epoch": 0.8749136024329555, + "grad_norm": 1.4635916016844033, + "learning_rate": 8.094895007907433e-07, + "loss": 0.3599, + "step": 50633 + }, + { + "epoch": 0.8749308819463644, + "grad_norm": 1.1474381688912214, + "learning_rate": 8.092689357486549e-07, + "loss": 0.3153, + "step": 50634 + }, + { + "epoch": 0.8749481614597733, + "grad_norm": 1.5142303533749846, + "learning_rate": 8.090483994926334e-07, + "loss": 0.4521, + "step": 50635 + }, + { + "epoch": 0.8749654409731822, + "grad_norm": 2.265599702455536, + "learning_rate": 8.088278920233728e-07, + "loss": 0.4449, + "step": 50636 + }, + { + "epoch": 0.8749827204865911, + "grad_norm": 1.5410464345057737, + "learning_rate": 8.086074133415589e-07, + "loss": 0.4575, + "step": 50637 + }, + { + "epoch": 0.875, + "grad_norm": 1.714592011264134, + "learning_rate": 8.083869634478858e-07, + "loss": 0.26, + "step": 50638 + }, + { + "epoch": 0.8750172795134089, + "grad_norm": 2.805042962002454, + "learning_rate": 8.08166542343043e-07, + "loss": 0.2517, + "step": 50639 + }, + { + "epoch": 0.8750345590268178, + "grad_norm": 1.2730587106458093, + "learning_rate": 8.07946150027723e-07, + "loss": 0.5354, + "step": 50640 + }, + { + "epoch": 0.8750518385402267, + "grad_norm": 1.4800884457245318, + "learning_rate": 8.077257865026133e-07, + "loss": 0.3226, + "step": 50641 + }, + { + "epoch": 0.8750691180536356, + "grad_norm": 1.0684700713132558, + "learning_rate": 8.075054517684056e-07, + "loss": 0.2639, + "step": 50642 + }, + { + "epoch": 0.8750863975670445, + "grad_norm": 1.1506149533923231, + "learning_rate": 8.07285145825788e-07, + "loss": 0.2967, + "step": 50643 + }, + { + "epoch": 0.8751036770804534, + "grad_norm": 1.1766483444923017, + "learning_rate": 8.070648686754523e-07, + "loss": 0.2824, + "step": 50644 + }, + { + "epoch": 0.8751209565938624, + "grad_norm": 1.6067820578647278, + "learning_rate": 8.068446203180891e-07, + "loss": 0.4151, + "step": 50645 + }, + { + "epoch": 0.8751382361072713, + "grad_norm": 1.1756545146088158, + "learning_rate": 8.066244007543866e-07, + "loss": 0.1772, + "step": 50646 + }, + { + "epoch": 0.8751555156206802, + "grad_norm": 1.3700449173958242, + "learning_rate": 8.064042099850355e-07, + "loss": 0.4347, + "step": 50647 + }, + { + "epoch": 0.8751727951340891, + "grad_norm": 1.0955861183273075, + "learning_rate": 8.061840480107241e-07, + "loss": 0.691, + "step": 50648 + }, + { + "epoch": 0.8751900746474979, + "grad_norm": 1.4559347872878758, + "learning_rate": 8.05963914832143e-07, + "loss": 0.3676, + "step": 50649 + }, + { + "epoch": 0.8752073541609068, + "grad_norm": 1.2997585690196471, + "learning_rate": 8.057438104499816e-07, + "loss": 0.301, + "step": 50650 + }, + { + "epoch": 0.8752246336743157, + "grad_norm": 1.0712709914300353, + "learning_rate": 8.055237348649314e-07, + "loss": 0.4009, + "step": 50651 + }, + { + "epoch": 0.8752419131877246, + "grad_norm": 1.7075695968839302, + "learning_rate": 8.053036880776777e-07, + "loss": 0.4764, + "step": 50652 + }, + { + "epoch": 0.8752591927011335, + "grad_norm": 0.7218011110448218, + "learning_rate": 8.050836700889131e-07, + "loss": 0.7238, + "step": 50653 + }, + { + "epoch": 0.8752764722145424, + "grad_norm": 1.5665869058447859, + "learning_rate": 8.048636808993238e-07, + "loss": 0.3435, + "step": 50654 + }, + { + "epoch": 0.8752937517279513, + "grad_norm": 0.8800180983666241, + "learning_rate": 8.046437205096003e-07, + "loss": 0.3586, + "step": 50655 + }, + { + "epoch": 0.8753110312413602, + "grad_norm": 2.8867563990738705, + "learning_rate": 8.04423788920432e-07, + "loss": 0.1746, + "step": 50656 + }, + { + "epoch": 0.8753283107547691, + "grad_norm": 1.5688316696297515, + "learning_rate": 8.042038861325075e-07, + "loss": 0.3, + "step": 50657 + }, + { + "epoch": 0.875345590268178, + "grad_norm": 1.092562704196656, + "learning_rate": 8.039840121465159e-07, + "loss": 0.6375, + "step": 50658 + }, + { + "epoch": 0.875362869781587, + "grad_norm": 1.1838665567597593, + "learning_rate": 8.037641669631436e-07, + "loss": 0.303, + "step": 50659 + }, + { + "epoch": 0.8753801492949959, + "grad_norm": 1.031098082029879, + "learning_rate": 8.035443505830798e-07, + "loss": 0.3563, + "step": 50660 + }, + { + "epoch": 0.8753974288084048, + "grad_norm": 0.8952042172103017, + "learning_rate": 8.033245630070152e-07, + "loss": 0.2949, + "step": 50661 + }, + { + "epoch": 0.8754147083218137, + "grad_norm": 1.3786317731609248, + "learning_rate": 8.031048042356393e-07, + "loss": 0.2486, + "step": 50662 + }, + { + "epoch": 0.8754319878352226, + "grad_norm": 1.4774530855066885, + "learning_rate": 8.028850742696348e-07, + "loss": 0.3729, + "step": 50663 + }, + { + "epoch": 0.8754492673486315, + "grad_norm": 0.8303115208972, + "learning_rate": 8.026653731096957e-07, + "loss": 0.3087, + "step": 50664 + }, + { + "epoch": 0.8754665468620404, + "grad_norm": 1.9064947840125592, + "learning_rate": 8.024457007565067e-07, + "loss": 0.2164, + "step": 50665 + }, + { + "epoch": 0.8754838263754493, + "grad_norm": 1.2388542677743308, + "learning_rate": 8.022260572107555e-07, + "loss": 0.3098, + "step": 50666 + }, + { + "epoch": 0.8755011058888582, + "grad_norm": 0.9763947512194078, + "learning_rate": 8.020064424731333e-07, + "loss": 0.2224, + "step": 50667 + }, + { + "epoch": 0.8755183854022671, + "grad_norm": 0.6869866299662298, + "learning_rate": 8.017868565443243e-07, + "loss": 0.8246, + "step": 50668 + }, + { + "epoch": 0.875535664915676, + "grad_norm": 1.1286919390968186, + "learning_rate": 8.015672994250201e-07, + "loss": 0.2817, + "step": 50669 + }, + { + "epoch": 0.8755529444290848, + "grad_norm": 1.325643261225564, + "learning_rate": 8.013477711159045e-07, + "loss": 0.3523, + "step": 50670 + }, + { + "epoch": 0.8755702239424937, + "grad_norm": 1.5727000808133866, + "learning_rate": 8.011282716176683e-07, + "loss": 0.3499, + "step": 50671 + }, + { + "epoch": 0.8755875034559026, + "grad_norm": 1.55522752298252, + "learning_rate": 8.009088009309928e-07, + "loss": 0.4125, + "step": 50672 + }, + { + "epoch": 0.8756047829693115, + "grad_norm": 4.378229697132802, + "learning_rate": 8.006893590565746e-07, + "loss": 0.4207, + "step": 50673 + }, + { + "epoch": 0.8756220624827205, + "grad_norm": 2.009066257571987, + "learning_rate": 8.004699459950971e-07, + "loss": 0.3531, + "step": 50674 + }, + { + "epoch": 0.8756393419961294, + "grad_norm": 1.0379471973220513, + "learning_rate": 8.002505617472434e-07, + "loss": 0.3969, + "step": 50675 + }, + { + "epoch": 0.8756566215095383, + "grad_norm": 1.0034829655765414, + "learning_rate": 8.000312063137072e-07, + "loss": 0.306, + "step": 50676 + }, + { + "epoch": 0.8756739010229472, + "grad_norm": 1.3495693918580012, + "learning_rate": 7.998118796951704e-07, + "loss": 0.3677, + "step": 50677 + }, + { + "epoch": 0.8756911805363561, + "grad_norm": 1.0059233944105672, + "learning_rate": 7.995925818923222e-07, + "loss": 0.2642, + "step": 50678 + }, + { + "epoch": 0.875708460049765, + "grad_norm": 0.9005001738351571, + "learning_rate": 7.99373312905849e-07, + "loss": 0.3338, + "step": 50679 + }, + { + "epoch": 0.8757257395631739, + "grad_norm": 1.3419900107864957, + "learning_rate": 7.991540727364389e-07, + "loss": 0.3588, + "step": 50680 + }, + { + "epoch": 0.8757430190765828, + "grad_norm": 1.374415300411511, + "learning_rate": 7.989348613847759e-07, + "loss": 0.2624, + "step": 50681 + }, + { + "epoch": 0.8757602985899917, + "grad_norm": 1.1466542095029064, + "learning_rate": 7.987156788515505e-07, + "loss": 0.2031, + "step": 50682 + }, + { + "epoch": 0.8757775781034006, + "grad_norm": 1.8846892128531112, + "learning_rate": 7.984965251374454e-07, + "loss": 0.4405, + "step": 50683 + }, + { + "epoch": 0.8757948576168095, + "grad_norm": 1.3020458928767735, + "learning_rate": 7.982774002431481e-07, + "loss": 0.205, + "step": 50684 + }, + { + "epoch": 0.8758121371302184, + "grad_norm": 1.527740809538537, + "learning_rate": 7.980583041693468e-07, + "loss": 0.3202, + "step": 50685 + }, + { + "epoch": 0.8758294166436273, + "grad_norm": 1.5479532732591967, + "learning_rate": 7.978392369167243e-07, + "loss": 0.3398, + "step": 50686 + }, + { + "epoch": 0.8758466961570363, + "grad_norm": 2.027904006221542, + "learning_rate": 7.976201984859711e-07, + "loss": 0.3723, + "step": 50687 + }, + { + "epoch": 0.8758639756704452, + "grad_norm": 1.2725880089649397, + "learning_rate": 7.974011888777677e-07, + "loss": 0.4477, + "step": 50688 + }, + { + "epoch": 0.8758812551838541, + "grad_norm": 1.2293340085558233, + "learning_rate": 7.971822080928049e-07, + "loss": 0.3288, + "step": 50689 + }, + { + "epoch": 0.875898534697263, + "grad_norm": 0.6777148321254876, + "learning_rate": 7.969632561317653e-07, + "loss": 0.2204, + "step": 50690 + }, + { + "epoch": 0.8759158142106718, + "grad_norm": 1.538549046177296, + "learning_rate": 7.967443329953395e-07, + "loss": 0.3188, + "step": 50691 + }, + { + "epoch": 0.8759330937240807, + "grad_norm": 2.8617906900634926, + "learning_rate": 7.965254386842069e-07, + "loss": 0.3841, + "step": 50692 + }, + { + "epoch": 0.8759503732374896, + "grad_norm": 1.430738784984464, + "learning_rate": 7.963065731990583e-07, + "loss": 0.2007, + "step": 50693 + }, + { + "epoch": 0.8759676527508985, + "grad_norm": 1.5883300783156185, + "learning_rate": 7.96087736540575e-07, + "loss": 0.2746, + "step": 50694 + }, + { + "epoch": 0.8759849322643074, + "grad_norm": 1.454825236905166, + "learning_rate": 7.958689287094445e-07, + "loss": 0.3395, + "step": 50695 + }, + { + "epoch": 0.8760022117777163, + "grad_norm": 0.7162368477252768, + "learning_rate": 7.95650149706354e-07, + "loss": 0.2109, + "step": 50696 + }, + { + "epoch": 0.8760194912911252, + "grad_norm": 1.447928928509404, + "learning_rate": 7.95431399531984e-07, + "loss": 0.3531, + "step": 50697 + }, + { + "epoch": 0.8760367708045341, + "grad_norm": 1.466313309688485, + "learning_rate": 7.95212678187025e-07, + "loss": 0.5192, + "step": 50698 + }, + { + "epoch": 0.876054050317943, + "grad_norm": 1.0534467250100747, + "learning_rate": 7.949939856721578e-07, + "loss": 0.2068, + "step": 50699 + }, + { + "epoch": 0.8760713298313519, + "grad_norm": 1.5512346184662635, + "learning_rate": 7.947753219880683e-07, + "loss": 0.3932, + "step": 50700 + }, + { + "epoch": 0.8760886093447608, + "grad_norm": 1.314251972405602, + "learning_rate": 7.945566871354427e-07, + "loss": 0.2129, + "step": 50701 + }, + { + "epoch": 0.8761058888581698, + "grad_norm": 1.275756363591635, + "learning_rate": 7.94338081114967e-07, + "loss": 0.3262, + "step": 50702 + }, + { + "epoch": 0.8761231683715787, + "grad_norm": 2.802914056105746, + "learning_rate": 7.94119503927322e-07, + "loss": 0.4043, + "step": 50703 + }, + { + "epoch": 0.8761404478849876, + "grad_norm": 1.3415201688001268, + "learning_rate": 7.939009555731958e-07, + "loss": 0.3098, + "step": 50704 + }, + { + "epoch": 0.8761577273983965, + "grad_norm": 1.595709851957697, + "learning_rate": 7.936824360532703e-07, + "loss": 0.4222, + "step": 50705 + }, + { + "epoch": 0.8761750069118054, + "grad_norm": 1.1022062412389737, + "learning_rate": 7.934639453682302e-07, + "loss": 0.6302, + "step": 50706 + }, + { + "epoch": 0.8761922864252143, + "grad_norm": 0.9216178700090171, + "learning_rate": 7.932454835187631e-07, + "loss": 0.3131, + "step": 50707 + }, + { + "epoch": 0.8762095659386232, + "grad_norm": 1.0897153623778473, + "learning_rate": 7.930270505055482e-07, + "loss": 0.2646, + "step": 50708 + }, + { + "epoch": 0.8762268454520321, + "grad_norm": 1.5468956347974572, + "learning_rate": 7.92808646329275e-07, + "loss": 0.2629, + "step": 50709 + }, + { + "epoch": 0.876244124965441, + "grad_norm": 1.134522468839719, + "learning_rate": 7.92590270990623e-07, + "loss": 0.4575, + "step": 50710 + }, + { + "epoch": 0.8762614044788499, + "grad_norm": 1.5905797838299094, + "learning_rate": 7.923719244902794e-07, + "loss": 0.4429, + "step": 50711 + }, + { + "epoch": 0.8762786839922588, + "grad_norm": 1.560350420025034, + "learning_rate": 7.921536068289238e-07, + "loss": 0.45, + "step": 50712 + }, + { + "epoch": 0.8762959635056676, + "grad_norm": 0.9828918887105125, + "learning_rate": 7.919353180072465e-07, + "loss": 0.5189, + "step": 50713 + }, + { + "epoch": 0.8763132430190765, + "grad_norm": 1.7417650378484673, + "learning_rate": 7.917170580259248e-07, + "loss": 0.1528, + "step": 50714 + }, + { + "epoch": 0.8763305225324854, + "grad_norm": 1.6450121339568526, + "learning_rate": 7.914988268856472e-07, + "loss": 0.4048, + "step": 50715 + }, + { + "epoch": 0.8763478020458944, + "grad_norm": 1.4188228517981305, + "learning_rate": 7.912806245870952e-07, + "loss": 0.489, + "step": 50716 + }, + { + "epoch": 0.8763650815593033, + "grad_norm": 0.9512323730876817, + "learning_rate": 7.910624511309505e-07, + "loss": 0.2093, + "step": 50717 + }, + { + "epoch": 0.8763823610727122, + "grad_norm": 0.7639710312668054, + "learning_rate": 7.908443065178983e-07, + "loss": 0.2723, + "step": 50718 + }, + { + "epoch": 0.8763996405861211, + "grad_norm": 1.1447961371314623, + "learning_rate": 7.906261907486212e-07, + "loss": 0.2578, + "step": 50719 + }, + { + "epoch": 0.87641692009953, + "grad_norm": 2.960507863625755, + "learning_rate": 7.904081038238043e-07, + "loss": 0.6053, + "step": 50720 + }, + { + "epoch": 0.8764341996129389, + "grad_norm": 1.7198015132973716, + "learning_rate": 7.90190045744127e-07, + "loss": 0.3117, + "step": 50721 + }, + { + "epoch": 0.8764514791263478, + "grad_norm": 1.259836262930631, + "learning_rate": 7.899720165102775e-07, + "loss": 0.4187, + "step": 50722 + }, + { + "epoch": 0.8764687586397567, + "grad_norm": 1.3464606884116752, + "learning_rate": 7.897540161229334e-07, + "loss": 0.3792, + "step": 50723 + }, + { + "epoch": 0.8764860381531656, + "grad_norm": 1.2234294701444035, + "learning_rate": 7.895360445827793e-07, + "loss": 0.3759, + "step": 50724 + }, + { + "epoch": 0.8765033176665745, + "grad_norm": 0.7469451595326124, + "learning_rate": 7.893181018904983e-07, + "loss": 0.6161, + "step": 50725 + }, + { + "epoch": 0.8765205971799834, + "grad_norm": 1.3723246644239513, + "learning_rate": 7.891001880467742e-07, + "loss": 0.3771, + "step": 50726 + }, + { + "epoch": 0.8765378766933923, + "grad_norm": 1.2553604912105658, + "learning_rate": 7.888823030522886e-07, + "loss": 0.4396, + "step": 50727 + }, + { + "epoch": 0.8765551562068012, + "grad_norm": 1.3526584960263701, + "learning_rate": 7.886644469077231e-07, + "loss": 0.2542, + "step": 50728 + }, + { + "epoch": 0.8765724357202102, + "grad_norm": 1.0727913341274833, + "learning_rate": 7.884466196137597e-07, + "loss": 0.3199, + "step": 50729 + }, + { + "epoch": 0.8765897152336191, + "grad_norm": 1.7227683241022678, + "learning_rate": 7.882288211710809e-07, + "loss": 0.1843, + "step": 50730 + }, + { + "epoch": 0.876606994747028, + "grad_norm": 1.1312677922258096, + "learning_rate": 7.880110515803718e-07, + "loss": 0.3857, + "step": 50731 + }, + { + "epoch": 0.8766242742604369, + "grad_norm": 1.5988496930683616, + "learning_rate": 7.877933108423108e-07, + "loss": 0.334, + "step": 50732 + }, + { + "epoch": 0.8766415537738458, + "grad_norm": 1.3705803943645225, + "learning_rate": 7.875755989575828e-07, + "loss": 0.1894, + "step": 50733 + }, + { + "epoch": 0.8766588332872546, + "grad_norm": 2.0477456505745875, + "learning_rate": 7.873579159268663e-07, + "loss": 0.2974, + "step": 50734 + }, + { + "epoch": 0.8766761128006635, + "grad_norm": 1.5045567783873421, + "learning_rate": 7.87140261750845e-07, + "loss": 0.2637, + "step": 50735 + }, + { + "epoch": 0.8766933923140724, + "grad_norm": 1.4016115307993369, + "learning_rate": 7.869226364302019e-07, + "loss": 0.4643, + "step": 50736 + }, + { + "epoch": 0.8767106718274813, + "grad_norm": 1.726561394904055, + "learning_rate": 7.867050399656162e-07, + "loss": 0.3524, + "step": 50737 + }, + { + "epoch": 0.8767279513408902, + "grad_norm": 1.9491509843110921, + "learning_rate": 7.86487472357772e-07, + "loss": 0.2987, + "step": 50738 + }, + { + "epoch": 0.8767452308542991, + "grad_norm": 0.9639623621144813, + "learning_rate": 7.862699336073476e-07, + "loss": 0.1909, + "step": 50739 + }, + { + "epoch": 0.876762510367708, + "grad_norm": 1.4480544226178858, + "learning_rate": 7.860524237150258e-07, + "loss": 0.3011, + "step": 50740 + }, + { + "epoch": 0.8767797898811169, + "grad_norm": 1.780235000786947, + "learning_rate": 7.858349426814882e-07, + "loss": 0.2636, + "step": 50741 + }, + { + "epoch": 0.8767970693945258, + "grad_norm": 1.594267555691295, + "learning_rate": 7.856174905074165e-07, + "loss": 0.2448, + "step": 50742 + }, + { + "epoch": 0.8768143489079347, + "grad_norm": 1.1842412002904732, + "learning_rate": 7.854000671934902e-07, + "loss": 0.7016, + "step": 50743 + }, + { + "epoch": 0.8768316284213437, + "grad_norm": 1.2269073170883624, + "learning_rate": 7.851826727403932e-07, + "loss": 0.4071, + "step": 50744 + }, + { + "epoch": 0.8768489079347526, + "grad_norm": 1.328956043910826, + "learning_rate": 7.849653071488028e-07, + "loss": 0.3834, + "step": 50745 + }, + { + "epoch": 0.8768661874481615, + "grad_norm": 1.2047786191581544, + "learning_rate": 7.847479704194006e-07, + "loss": 0.3365, + "step": 50746 + }, + { + "epoch": 0.8768834669615704, + "grad_norm": 0.8119003440251994, + "learning_rate": 7.845306625528704e-07, + "loss": 0.3226, + "step": 50747 + }, + { + "epoch": 0.8769007464749793, + "grad_norm": 1.6350320065577502, + "learning_rate": 7.843133835498884e-07, + "loss": 0.4159, + "step": 50748 + }, + { + "epoch": 0.8769180259883882, + "grad_norm": 1.104536691054506, + "learning_rate": 7.840961334111396e-07, + "loss": 0.3071, + "step": 50749 + }, + { + "epoch": 0.8769353055017971, + "grad_norm": 1.7304041724935397, + "learning_rate": 7.838789121372991e-07, + "loss": 0.1372, + "step": 50750 + }, + { + "epoch": 0.876952585015206, + "grad_norm": 1.2261940695814324, + "learning_rate": 7.836617197290531e-07, + "loss": 0.3592, + "step": 50751 + }, + { + "epoch": 0.8769698645286149, + "grad_norm": 1.4675815403510581, + "learning_rate": 7.834445561870751e-07, + "loss": 0.4142, + "step": 50752 + }, + { + "epoch": 0.8769871440420238, + "grad_norm": 1.374934095449812, + "learning_rate": 7.832274215120528e-07, + "loss": 0.2433, + "step": 50753 + }, + { + "epoch": 0.8770044235554327, + "grad_norm": 0.9795092468637013, + "learning_rate": 7.830103157046609e-07, + "loss": 0.3578, + "step": 50754 + }, + { + "epoch": 0.8770217030688415, + "grad_norm": 0.8715959210238946, + "learning_rate": 7.827932387655834e-07, + "loss": 0.3175, + "step": 50755 + }, + { + "epoch": 0.8770389825822504, + "grad_norm": 0.9003361712629462, + "learning_rate": 7.825761906954954e-07, + "loss": 0.257, + "step": 50756 + }, + { + "epoch": 0.8770562620956593, + "grad_norm": 1.0845770277596125, + "learning_rate": 7.823591714950818e-07, + "loss": 0.4633, + "step": 50757 + }, + { + "epoch": 0.8770735416090683, + "grad_norm": 1.5295621788172336, + "learning_rate": 7.821421811650176e-07, + "loss": 0.4618, + "step": 50758 + }, + { + "epoch": 0.8770908211224772, + "grad_norm": 1.9108582297905754, + "learning_rate": 7.819252197059846e-07, + "loss": 0.2243, + "step": 50759 + }, + { + "epoch": 0.8771081006358861, + "grad_norm": 0.7217451169519797, + "learning_rate": 7.817082871186632e-07, + "loss": 0.2463, + "step": 50760 + }, + { + "epoch": 0.877125380149295, + "grad_norm": 0.8876012919699654, + "learning_rate": 7.814913834037318e-07, + "loss": 0.3578, + "step": 50761 + }, + { + "epoch": 0.8771426596627039, + "grad_norm": 2.6183086534302813, + "learning_rate": 7.812745085618712e-07, + "loss": 0.299, + "step": 50762 + }, + { + "epoch": 0.8771599391761128, + "grad_norm": 2.8890733815819907, + "learning_rate": 7.810576625937571e-07, + "loss": 0.315, + "step": 50763 + }, + { + "epoch": 0.8771772186895217, + "grad_norm": 1.4927010419911384, + "learning_rate": 7.808408455000716e-07, + "loss": 0.507, + "step": 50764 + }, + { + "epoch": 0.8771944982029306, + "grad_norm": 1.6077477293269111, + "learning_rate": 7.806240572814927e-07, + "loss": 0.2974, + "step": 50765 + }, + { + "epoch": 0.8772117777163395, + "grad_norm": 1.2006821381733623, + "learning_rate": 7.804072979387023e-07, + "loss": 0.2074, + "step": 50766 + }, + { + "epoch": 0.8772290572297484, + "grad_norm": 1.378488486556804, + "learning_rate": 7.801905674723764e-07, + "loss": 0.4391, + "step": 50767 + }, + { + "epoch": 0.8772463367431573, + "grad_norm": 1.3741881484948533, + "learning_rate": 7.799738658831934e-07, + "loss": 0.516, + "step": 50768 + }, + { + "epoch": 0.8772636162565662, + "grad_norm": 2.9440858208560137, + "learning_rate": 7.797571931718329e-07, + "loss": 0.1809, + "step": 50769 + }, + { + "epoch": 0.8772808957699751, + "grad_norm": 1.6025752061216676, + "learning_rate": 7.79540549338973e-07, + "loss": 0.6625, + "step": 50770 + }, + { + "epoch": 0.877298175283384, + "grad_norm": 2.0853420199149473, + "learning_rate": 7.793239343852943e-07, + "loss": 0.2844, + "step": 50771 + }, + { + "epoch": 0.877315454796793, + "grad_norm": 1.0748443945166488, + "learning_rate": 7.79107348311472e-07, + "loss": 0.163, + "step": 50772 + }, + { + "epoch": 0.8773327343102019, + "grad_norm": 1.5280556255976967, + "learning_rate": 7.788907911181886e-07, + "loss": 0.4815, + "step": 50773 + }, + { + "epoch": 0.8773500138236108, + "grad_norm": 1.6624766678107248, + "learning_rate": 7.786742628061184e-07, + "loss": 0.3472, + "step": 50774 + }, + { + "epoch": 0.8773672933370197, + "grad_norm": 1.1235316288219759, + "learning_rate": 7.784577633759405e-07, + "loss": 0.2632, + "step": 50775 + }, + { + "epoch": 0.8773845728504285, + "grad_norm": 0.980416480022272, + "learning_rate": 7.782412928283345e-07, + "loss": 0.4408, + "step": 50776 + }, + { + "epoch": 0.8774018523638374, + "grad_norm": 1.5717210157705757, + "learning_rate": 7.780248511639787e-07, + "loss": 0.39, + "step": 50777 + }, + { + "epoch": 0.8774191318772463, + "grad_norm": 2.1508600356029848, + "learning_rate": 7.778084383835493e-07, + "loss": 0.2782, + "step": 50778 + }, + { + "epoch": 0.8774364113906552, + "grad_norm": 1.106269190368389, + "learning_rate": 7.775920544877247e-07, + "loss": 0.2612, + "step": 50779 + }, + { + "epoch": 0.8774536909040641, + "grad_norm": 0.6990232382409428, + "learning_rate": 7.773756994771809e-07, + "loss": 0.834, + "step": 50780 + }, + { + "epoch": 0.877470970417473, + "grad_norm": 0.7172428954747281, + "learning_rate": 7.771593733525984e-07, + "loss": 0.5806, + "step": 50781 + }, + { + "epoch": 0.8774882499308819, + "grad_norm": 1.3237663662270147, + "learning_rate": 7.769430761146557e-07, + "loss": 0.5184, + "step": 50782 + }, + { + "epoch": 0.8775055294442908, + "grad_norm": 2.140279672512888, + "learning_rate": 7.767268077640256e-07, + "loss": 0.3876, + "step": 50783 + }, + { + "epoch": 0.8775228089576997, + "grad_norm": 1.0722426835417582, + "learning_rate": 7.765105683013896e-07, + "loss": 0.2582, + "step": 50784 + }, + { + "epoch": 0.8775400884711086, + "grad_norm": 2.45595615573212, + "learning_rate": 7.76294357727423e-07, + "loss": 0.1868, + "step": 50785 + }, + { + "epoch": 0.8775573679845176, + "grad_norm": 1.048833688806418, + "learning_rate": 7.760781760428027e-07, + "loss": 0.2743, + "step": 50786 + }, + { + "epoch": 0.8775746474979265, + "grad_norm": 1.493781643529324, + "learning_rate": 7.758620232482083e-07, + "loss": 0.4735, + "step": 50787 + }, + { + "epoch": 0.8775919270113354, + "grad_norm": 1.5075711935100307, + "learning_rate": 7.756458993443127e-07, + "loss": 0.4646, + "step": 50788 + }, + { + "epoch": 0.8776092065247443, + "grad_norm": 2.130973764304018, + "learning_rate": 7.754298043317976e-07, + "loss": 0.4218, + "step": 50789 + }, + { + "epoch": 0.8776264860381532, + "grad_norm": 0.8259976497676512, + "learning_rate": 7.752137382113356e-07, + "loss": 0.206, + "step": 50790 + }, + { + "epoch": 0.8776437655515621, + "grad_norm": 1.3856053405186541, + "learning_rate": 7.749977009836074e-07, + "loss": 0.3473, + "step": 50791 + }, + { + "epoch": 0.877661045064971, + "grad_norm": 1.4122674957681343, + "learning_rate": 7.747816926492835e-07, + "loss": 0.2993, + "step": 50792 + }, + { + "epoch": 0.8776783245783799, + "grad_norm": 1.5069006054122498, + "learning_rate": 7.745657132090478e-07, + "loss": 0.369, + "step": 50793 + }, + { + "epoch": 0.8776956040917888, + "grad_norm": 1.587429313098846, + "learning_rate": 7.743497626635721e-07, + "loss": 0.4421, + "step": 50794 + }, + { + "epoch": 0.8777128836051977, + "grad_norm": 1.2257328577971018, + "learning_rate": 7.741338410135346e-07, + "loss": 0.2679, + "step": 50795 + }, + { + "epoch": 0.8777301631186066, + "grad_norm": 0.8676794331377287, + "learning_rate": 7.739179482596104e-07, + "loss": 0.2255, + "step": 50796 + }, + { + "epoch": 0.8777474426320154, + "grad_norm": 0.8316944546560986, + "learning_rate": 7.737020844024778e-07, + "loss": 0.326, + "step": 50797 + }, + { + "epoch": 0.8777647221454243, + "grad_norm": 1.173462491727692, + "learning_rate": 7.734862494428097e-07, + "loss": 0.5101, + "step": 50798 + }, + { + "epoch": 0.8777820016588332, + "grad_norm": 1.604889708457115, + "learning_rate": 7.732704433812844e-07, + "loss": 0.3476, + "step": 50799 + }, + { + "epoch": 0.8777992811722422, + "grad_norm": 1.8448839186874448, + "learning_rate": 7.730546662185789e-07, + "loss": 0.2872, + "step": 50800 + }, + { + "epoch": 0.8778165606856511, + "grad_norm": 1.9631500135075681, + "learning_rate": 7.728389179553653e-07, + "loss": 0.2958, + "step": 50801 + }, + { + "epoch": 0.87783384019906, + "grad_norm": 1.4910678334537282, + "learning_rate": 7.726231985923227e-07, + "loss": 0.251, + "step": 50802 + }, + { + "epoch": 0.8778511197124689, + "grad_norm": 1.6795849807769605, + "learning_rate": 7.724075081301241e-07, + "loss": 0.257, + "step": 50803 + }, + { + "epoch": 0.8778683992258778, + "grad_norm": 0.8114433201547133, + "learning_rate": 7.721918465694467e-07, + "loss": 0.3431, + "step": 50804 + }, + { + "epoch": 0.8778856787392867, + "grad_norm": 1.2779808112368185, + "learning_rate": 7.719762139109666e-07, + "loss": 0.3903, + "step": 50805 + }, + { + "epoch": 0.8779029582526956, + "grad_norm": 1.2603855160674047, + "learning_rate": 7.717606101553588e-07, + "loss": 0.4713, + "step": 50806 + }, + { + "epoch": 0.8779202377661045, + "grad_norm": 1.2218078251362035, + "learning_rate": 7.715450353032961e-07, + "loss": 0.3198, + "step": 50807 + }, + { + "epoch": 0.8779375172795134, + "grad_norm": 0.9974085437786909, + "learning_rate": 7.71329489355458e-07, + "loss": 0.2743, + "step": 50808 + }, + { + "epoch": 0.8779547967929223, + "grad_norm": 2.2813267383707205, + "learning_rate": 7.711139723125161e-07, + "loss": 0.3566, + "step": 50809 + }, + { + "epoch": 0.8779720763063312, + "grad_norm": 1.3997794530642063, + "learning_rate": 7.708984841751455e-07, + "loss": 0.2994, + "step": 50810 + }, + { + "epoch": 0.8779893558197401, + "grad_norm": 1.416745487291205, + "learning_rate": 7.706830249440245e-07, + "loss": 0.318, + "step": 50811 + }, + { + "epoch": 0.878006635333149, + "grad_norm": 1.013284718872682, + "learning_rate": 7.704675946198248e-07, + "loss": 0.3051, + "step": 50812 + }, + { + "epoch": 0.878023914846558, + "grad_norm": 1.7162099074453876, + "learning_rate": 7.702521932032225e-07, + "loss": 0.2977, + "step": 50813 + }, + { + "epoch": 0.8780411943599669, + "grad_norm": 1.7158873991294488, + "learning_rate": 7.700368206948905e-07, + "loss": 0.3747, + "step": 50814 + }, + { + "epoch": 0.8780584738733758, + "grad_norm": 2.753710586222407, + "learning_rate": 7.698214770955048e-07, + "loss": 0.3828, + "step": 50815 + }, + { + "epoch": 0.8780757533867847, + "grad_norm": 0.7569655659078145, + "learning_rate": 7.696061624057394e-07, + "loss": 0.2408, + "step": 50816 + }, + { + "epoch": 0.8780930329001936, + "grad_norm": 1.4757726621680165, + "learning_rate": 7.693908766262704e-07, + "loss": 0.3867, + "step": 50817 + }, + { + "epoch": 0.8781103124136024, + "grad_norm": 0.9557666128959559, + "learning_rate": 7.691756197577715e-07, + "loss": 0.7514, + "step": 50818 + }, + { + "epoch": 0.8781275919270113, + "grad_norm": 0.9223022665712357, + "learning_rate": 7.689603918009137e-07, + "loss": 0.8262, + "step": 50819 + }, + { + "epoch": 0.8781448714404202, + "grad_norm": 0.6688798419810033, + "learning_rate": 7.687451927563738e-07, + "loss": 0.2531, + "step": 50820 + }, + { + "epoch": 0.8781621509538291, + "grad_norm": 1.48172696428773, + "learning_rate": 7.685300226248249e-07, + "loss": 0.3346, + "step": 50821 + }, + { + "epoch": 0.878179430467238, + "grad_norm": 1.2467247778244155, + "learning_rate": 7.683148814069441e-07, + "loss": 0.5417, + "step": 50822 + }, + { + "epoch": 0.8781967099806469, + "grad_norm": 1.1641848197588336, + "learning_rate": 7.680997691034009e-07, + "loss": 0.2623, + "step": 50823 + }, + { + "epoch": 0.8782139894940558, + "grad_norm": 1.0933621574332781, + "learning_rate": 7.678846857148714e-07, + "loss": 0.3599, + "step": 50824 + }, + { + "epoch": 0.8782312690074647, + "grad_norm": 1.3750286359231005, + "learning_rate": 7.676696312420285e-07, + "loss": 0.2209, + "step": 50825 + }, + { + "epoch": 0.8782485485208736, + "grad_norm": 0.9761641428907086, + "learning_rate": 7.674546056855448e-07, + "loss": 0.3107, + "step": 50826 + }, + { + "epoch": 0.8782658280342825, + "grad_norm": 1.2544858825051188, + "learning_rate": 7.672396090460954e-07, + "loss": 0.6304, + "step": 50827 + }, + { + "epoch": 0.8782831075476915, + "grad_norm": 0.5422132637920888, + "learning_rate": 7.670246413243554e-07, + "loss": 0.6586, + "step": 50828 + }, + { + "epoch": 0.8783003870611004, + "grad_norm": 1.464065907770173, + "learning_rate": 7.668097025209953e-07, + "loss": 0.2664, + "step": 50829 + }, + { + "epoch": 0.8783176665745093, + "grad_norm": 1.1424946973762184, + "learning_rate": 7.665947926366868e-07, + "loss": 0.1961, + "step": 50830 + }, + { + "epoch": 0.8783349460879182, + "grad_norm": 1.1625713494925878, + "learning_rate": 7.663799116721071e-07, + "loss": 0.5131, + "step": 50831 + }, + { + "epoch": 0.8783522256013271, + "grad_norm": 1.4853617875493224, + "learning_rate": 7.661650596279246e-07, + "loss": 0.2439, + "step": 50832 + }, + { + "epoch": 0.878369505114736, + "grad_norm": 2.0405158520104356, + "learning_rate": 7.659502365048177e-07, + "loss": 0.2352, + "step": 50833 + }, + { + "epoch": 0.8783867846281449, + "grad_norm": 1.03423485338211, + "learning_rate": 7.657354423034547e-07, + "loss": 0.5058, + "step": 50834 + }, + { + "epoch": 0.8784040641415538, + "grad_norm": 1.5661306720419867, + "learning_rate": 7.655206770245116e-07, + "loss": 0.3787, + "step": 50835 + }, + { + "epoch": 0.8784213436549627, + "grad_norm": 1.070974892099256, + "learning_rate": 7.65305940668658e-07, + "loss": 0.342, + "step": 50836 + }, + { + "epoch": 0.8784386231683716, + "grad_norm": 0.6966271971956248, + "learning_rate": 7.650912332365701e-07, + "loss": 0.8845, + "step": 50837 + }, + { + "epoch": 0.8784559026817805, + "grad_norm": 0.6190534808626444, + "learning_rate": 7.64876554728915e-07, + "loss": 0.5874, + "step": 50838 + }, + { + "epoch": 0.8784731821951893, + "grad_norm": 1.1285459340836728, + "learning_rate": 7.64661905146371e-07, + "loss": 0.362, + "step": 50839 + }, + { + "epoch": 0.8784904617085982, + "grad_norm": 2.3204051712223284, + "learning_rate": 7.644472844896078e-07, + "loss": 0.3306, + "step": 50840 + }, + { + "epoch": 0.8785077412220071, + "grad_norm": 1.9370300519741204, + "learning_rate": 7.642326927592969e-07, + "loss": 0.4355, + "step": 50841 + }, + { + "epoch": 0.878525020735416, + "grad_norm": 1.195081933370738, + "learning_rate": 7.640181299561123e-07, + "loss": 0.48, + "step": 50842 + }, + { + "epoch": 0.878542300248825, + "grad_norm": 1.2004915025780614, + "learning_rate": 7.638035960807222e-07, + "loss": 0.335, + "step": 50843 + }, + { + "epoch": 0.8785595797622339, + "grad_norm": 0.7561670915213331, + "learning_rate": 7.635890911338018e-07, + "loss": 0.5191, + "step": 50844 + }, + { + "epoch": 0.8785768592756428, + "grad_norm": 2.021620200618167, + "learning_rate": 7.633746151160227e-07, + "loss": 0.3149, + "step": 50845 + }, + { + "epoch": 0.8785941387890517, + "grad_norm": 1.3242218399542713, + "learning_rate": 7.631601680280576e-07, + "loss": 0.3445, + "step": 50846 + }, + { + "epoch": 0.8786114183024606, + "grad_norm": 1.3184846260024736, + "learning_rate": 7.62945749870575e-07, + "loss": 0.3607, + "step": 50847 + }, + { + "epoch": 0.8786286978158695, + "grad_norm": 1.2092289167300452, + "learning_rate": 7.627313606442499e-07, + "loss": 0.2973, + "step": 50848 + }, + { + "epoch": 0.8786459773292784, + "grad_norm": 1.542620930736145, + "learning_rate": 7.625170003497517e-07, + "loss": 0.2506, + "step": 50849 + }, + { + "epoch": 0.8786632568426873, + "grad_norm": 1.269584742646602, + "learning_rate": 7.623026689877511e-07, + "loss": 0.2299, + "step": 50850 + }, + { + "epoch": 0.8786805363560962, + "grad_norm": 1.5514762305770922, + "learning_rate": 7.620883665589218e-07, + "loss": 0.2656, + "step": 50851 + }, + { + "epoch": 0.8786978158695051, + "grad_norm": 1.6611515813531554, + "learning_rate": 7.618740930639334e-07, + "loss": 0.2932, + "step": 50852 + }, + { + "epoch": 0.878715095382914, + "grad_norm": 0.8400557395605421, + "learning_rate": 7.616598485034588e-07, + "loss": 0.2908, + "step": 50853 + }, + { + "epoch": 0.8787323748963229, + "grad_norm": 1.3729503636055795, + "learning_rate": 7.61445632878165e-07, + "loss": 0.4086, + "step": 50854 + }, + { + "epoch": 0.8787496544097319, + "grad_norm": 1.0090544481373112, + "learning_rate": 7.612314461887271e-07, + "loss": 0.2829, + "step": 50855 + }, + { + "epoch": 0.8787669339231408, + "grad_norm": 1.0923071944258158, + "learning_rate": 7.610172884358136e-07, + "loss": 0.2333, + "step": 50856 + }, + { + "epoch": 0.8787842134365497, + "grad_norm": 1.395640790168077, + "learning_rate": 7.60803159620097e-07, + "loss": 0.4331, + "step": 50857 + }, + { + "epoch": 0.8788014929499586, + "grad_norm": 0.8728246620415558, + "learning_rate": 7.605890597422461e-07, + "loss": 0.2697, + "step": 50858 + }, + { + "epoch": 0.8788187724633675, + "grad_norm": 1.2925773327555283, + "learning_rate": 7.603749888029344e-07, + "loss": 0.2311, + "step": 50859 + }, + { + "epoch": 0.8788360519767764, + "grad_norm": 2.118451189603278, + "learning_rate": 7.601609468028281e-07, + "loss": 0.2794, + "step": 50860 + }, + { + "epoch": 0.8788533314901852, + "grad_norm": 1.6569322460217328, + "learning_rate": 7.599469337426013e-07, + "loss": 0.4106, + "step": 50861 + }, + { + "epoch": 0.8788706110035941, + "grad_norm": 0.8699735937008711, + "learning_rate": 7.597329496229234e-07, + "loss": 0.6137, + "step": 50862 + }, + { + "epoch": 0.878887890517003, + "grad_norm": 1.112675099072076, + "learning_rate": 7.595189944444625e-07, + "loss": 0.5638, + "step": 50863 + }, + { + "epoch": 0.8789051700304119, + "grad_norm": 1.01007098422448, + "learning_rate": 7.593050682078918e-07, + "loss": 0.445, + "step": 50864 + }, + { + "epoch": 0.8789224495438208, + "grad_norm": 1.1283246776905103, + "learning_rate": 7.590911709138793e-07, + "loss": 0.2694, + "step": 50865 + }, + { + "epoch": 0.8789397290572297, + "grad_norm": 1.0767450966628462, + "learning_rate": 7.588773025630947e-07, + "loss": 0.3796, + "step": 50866 + }, + { + "epoch": 0.8789570085706386, + "grad_norm": 1.6041604997496235, + "learning_rate": 7.586634631562084e-07, + "loss": 0.4635, + "step": 50867 + }, + { + "epoch": 0.8789742880840475, + "grad_norm": 1.9611336521646825, + "learning_rate": 7.584496526938933e-07, + "loss": 0.2646, + "step": 50868 + }, + { + "epoch": 0.8789915675974564, + "grad_norm": 1.5287856478858661, + "learning_rate": 7.582358711768134e-07, + "loss": 0.6533, + "step": 50869 + }, + { + "epoch": 0.8790088471108654, + "grad_norm": 1.17397496624954, + "learning_rate": 7.580221186056436e-07, + "loss": 0.3086, + "step": 50870 + }, + { + "epoch": 0.8790261266242743, + "grad_norm": 2.4785178805695125, + "learning_rate": 7.5780839498105e-07, + "loss": 0.4154, + "step": 50871 + }, + { + "epoch": 0.8790434061376832, + "grad_norm": 1.8791985613072246, + "learning_rate": 7.575947003036999e-07, + "loss": 0.2307, + "step": 50872 + }, + { + "epoch": 0.8790606856510921, + "grad_norm": 1.600239847775297, + "learning_rate": 7.573810345742693e-07, + "loss": 0.7346, + "step": 50873 + }, + { + "epoch": 0.879077965164501, + "grad_norm": 2.1006147380976667, + "learning_rate": 7.571673977934225e-07, + "loss": 0.264, + "step": 50874 + }, + { + "epoch": 0.8790952446779099, + "grad_norm": 1.0540283458816166, + "learning_rate": 7.569537899618307e-07, + "loss": 0.2439, + "step": 50875 + }, + { + "epoch": 0.8791125241913188, + "grad_norm": 1.1706339016338836, + "learning_rate": 7.567402110801603e-07, + "loss": 0.4452, + "step": 50876 + }, + { + "epoch": 0.8791298037047277, + "grad_norm": 1.4390105770126884, + "learning_rate": 7.565266611490841e-07, + "loss": 0.2307, + "step": 50877 + }, + { + "epoch": 0.8791470832181366, + "grad_norm": 1.1049905758686998, + "learning_rate": 7.563131401692647e-07, + "loss": 0.5479, + "step": 50878 + }, + { + "epoch": 0.8791643627315455, + "grad_norm": 2.2457390799547476, + "learning_rate": 7.560996481413796e-07, + "loss": 0.3047, + "step": 50879 + }, + { + "epoch": 0.8791816422449544, + "grad_norm": 1.4222436193429104, + "learning_rate": 7.558861850660926e-07, + "loss": 0.3693, + "step": 50880 + }, + { + "epoch": 0.8791989217583633, + "grad_norm": 1.8812313611657057, + "learning_rate": 7.556727509440697e-07, + "loss": 0.2516, + "step": 50881 + }, + { + "epoch": 0.8792162012717721, + "grad_norm": 1.644711088339773, + "learning_rate": 7.554593457759851e-07, + "loss": 0.354, + "step": 50882 + }, + { + "epoch": 0.879233480785181, + "grad_norm": 1.0644919822970398, + "learning_rate": 7.552459695625025e-07, + "loss": 0.3188, + "step": 50883 + }, + { + "epoch": 0.87925076029859, + "grad_norm": 1.220450858571291, + "learning_rate": 7.550326223042925e-07, + "loss": 0.2801, + "step": 50884 + }, + { + "epoch": 0.8792680398119989, + "grad_norm": 1.1987418496004576, + "learning_rate": 7.548193040020224e-07, + "loss": 0.1956, + "step": 50885 + }, + { + "epoch": 0.8792853193254078, + "grad_norm": 0.9937776562432042, + "learning_rate": 7.546060146563627e-07, + "loss": 0.4378, + "step": 50886 + }, + { + "epoch": 0.8793025988388167, + "grad_norm": 1.9406717119721262, + "learning_rate": 7.543927542679786e-07, + "loss": 0.388, + "step": 50887 + }, + { + "epoch": 0.8793198783522256, + "grad_norm": 0.9610644204083545, + "learning_rate": 7.541795228375393e-07, + "loss": 0.389, + "step": 50888 + }, + { + "epoch": 0.8793371578656345, + "grad_norm": 1.7005028471761618, + "learning_rate": 7.539663203657122e-07, + "loss": 0.3323, + "step": 50889 + }, + { + "epoch": 0.8793544373790434, + "grad_norm": 2.1391143855408346, + "learning_rate": 7.537531468531645e-07, + "loss": 0.3127, + "step": 50890 + }, + { + "epoch": 0.8793717168924523, + "grad_norm": 1.22675407420681, + "learning_rate": 7.535400023005656e-07, + "loss": 0.4362, + "step": 50891 + }, + { + "epoch": 0.8793889964058612, + "grad_norm": 1.1824969821171396, + "learning_rate": 7.533268867085819e-07, + "loss": 0.3546, + "step": 50892 + }, + { + "epoch": 0.8794062759192701, + "grad_norm": 1.015940176497156, + "learning_rate": 7.531138000778815e-07, + "loss": 0.2827, + "step": 50893 + }, + { + "epoch": 0.879423555432679, + "grad_norm": 0.574436247483073, + "learning_rate": 7.529007424091305e-07, + "loss": 0.7003, + "step": 50894 + }, + { + "epoch": 0.8794408349460879, + "grad_norm": 1.5164265847776113, + "learning_rate": 7.526877137029964e-07, + "loss": 0.2713, + "step": 50895 + }, + { + "epoch": 0.8794581144594968, + "grad_norm": 1.6981601223986915, + "learning_rate": 7.524747139601474e-07, + "loss": 0.3304, + "step": 50896 + }, + { + "epoch": 0.8794753939729058, + "grad_norm": 1.0630955263400037, + "learning_rate": 7.522617431812529e-07, + "loss": 0.3489, + "step": 50897 + }, + { + "epoch": 0.8794926734863147, + "grad_norm": 0.5886125793663424, + "learning_rate": 7.520488013669746e-07, + "loss": 0.6118, + "step": 50898 + }, + { + "epoch": 0.8795099529997236, + "grad_norm": 1.4876380708441574, + "learning_rate": 7.518358885179855e-07, + "loss": 0.2853, + "step": 50899 + }, + { + "epoch": 0.8795272325131325, + "grad_norm": 1.2557312756588437, + "learning_rate": 7.51623004634946e-07, + "loss": 0.4041, + "step": 50900 + }, + { + "epoch": 0.8795445120265414, + "grad_norm": 0.9029005073065934, + "learning_rate": 7.514101497185267e-07, + "loss": 0.4737, + "step": 50901 + }, + { + "epoch": 0.8795617915399503, + "grad_norm": 1.3492872380811247, + "learning_rate": 7.511973237693948e-07, + "loss": 0.3733, + "step": 50902 + }, + { + "epoch": 0.8795790710533591, + "grad_norm": 1.7818867540927608, + "learning_rate": 7.509845267882155e-07, + "loss": 0.2952, + "step": 50903 + }, + { + "epoch": 0.879596350566768, + "grad_norm": 0.928926431989753, + "learning_rate": 7.507717587756558e-07, + "loss": 0.457, + "step": 50904 + }, + { + "epoch": 0.8796136300801769, + "grad_norm": 1.5626380063514889, + "learning_rate": 7.505590197323809e-07, + "loss": 0.2461, + "step": 50905 + }, + { + "epoch": 0.8796309095935858, + "grad_norm": 1.3659822220178606, + "learning_rate": 7.50346309659058e-07, + "loss": 0.5538, + "step": 50906 + }, + { + "epoch": 0.8796481891069947, + "grad_norm": 0.8388721020312119, + "learning_rate": 7.501336285563532e-07, + "loss": 0.3964, + "step": 50907 + }, + { + "epoch": 0.8796654686204036, + "grad_norm": 0.8993819039877394, + "learning_rate": 7.499209764249349e-07, + "loss": 0.3508, + "step": 50908 + }, + { + "epoch": 0.8796827481338125, + "grad_norm": 1.6118282942004782, + "learning_rate": 7.497083532654648e-07, + "loss": 0.375, + "step": 50909 + }, + { + "epoch": 0.8797000276472214, + "grad_norm": 1.4022256598700553, + "learning_rate": 7.494957590786134e-07, + "loss": 0.5319, + "step": 50910 + }, + { + "epoch": 0.8797173071606303, + "grad_norm": 1.196816647602682, + "learning_rate": 7.492831938650436e-07, + "loss": 0.2162, + "step": 50911 + }, + { + "epoch": 0.8797345866740393, + "grad_norm": 2.1345274329215176, + "learning_rate": 7.490706576254192e-07, + "loss": 0.4472, + "step": 50912 + }, + { + "epoch": 0.8797518661874482, + "grad_norm": 0.8738064782880608, + "learning_rate": 7.488581503604119e-07, + "loss": 0.2424, + "step": 50913 + }, + { + "epoch": 0.8797691457008571, + "grad_norm": 1.0761065885183791, + "learning_rate": 7.486456720706814e-07, + "loss": 0.3654, + "step": 50914 + }, + { + "epoch": 0.879786425214266, + "grad_norm": 1.6504199838921543, + "learning_rate": 7.48433222756898e-07, + "loss": 0.5432, + "step": 50915 + }, + { + "epoch": 0.8798037047276749, + "grad_norm": 1.0361165404998274, + "learning_rate": 7.482208024197235e-07, + "loss": 0.3687, + "step": 50916 + }, + { + "epoch": 0.8798209842410838, + "grad_norm": 1.9478319663708972, + "learning_rate": 7.480084110598262e-07, + "loss": 0.1871, + "step": 50917 + }, + { + "epoch": 0.8798382637544927, + "grad_norm": 1.5704304682968948, + "learning_rate": 7.477960486778657e-07, + "loss": 0.3103, + "step": 50918 + }, + { + "epoch": 0.8798555432679016, + "grad_norm": 1.1131665592491566, + "learning_rate": 7.475837152745146e-07, + "loss": 0.485, + "step": 50919 + }, + { + "epoch": 0.8798728227813105, + "grad_norm": 1.3746048506252553, + "learning_rate": 7.473714108504338e-07, + "loss": 0.4311, + "step": 50920 + }, + { + "epoch": 0.8798901022947194, + "grad_norm": 1.0167097971182317, + "learning_rate": 7.47159135406289e-07, + "loss": 0.3252, + "step": 50921 + }, + { + "epoch": 0.8799073818081283, + "grad_norm": 1.941532793503115, + "learning_rate": 7.469468889427467e-07, + "loss": 0.2794, + "step": 50922 + }, + { + "epoch": 0.8799246613215372, + "grad_norm": 0.9235532972799427, + "learning_rate": 7.467346714604673e-07, + "loss": 0.2905, + "step": 50923 + }, + { + "epoch": 0.879941940834946, + "grad_norm": 1.5222182942323885, + "learning_rate": 7.465224829601181e-07, + "loss": 0.2749, + "step": 50924 + }, + { + "epoch": 0.8799592203483549, + "grad_norm": 0.8497780483471963, + "learning_rate": 7.463103234423641e-07, + "loss": 0.2364, + "step": 50925 + }, + { + "epoch": 0.8799764998617639, + "grad_norm": 1.2137804669711407, + "learning_rate": 7.460981929078703e-07, + "loss": 0.3744, + "step": 50926 + }, + { + "epoch": 0.8799937793751728, + "grad_norm": 1.1449528629035144, + "learning_rate": 7.458860913572996e-07, + "loss": 0.3446, + "step": 50927 + }, + { + "epoch": 0.8800110588885817, + "grad_norm": 0.7919580004388198, + "learning_rate": 7.456740187913181e-07, + "loss": 0.2577, + "step": 50928 + }, + { + "epoch": 0.8800283384019906, + "grad_norm": 1.4011782612877, + "learning_rate": 7.454619752105863e-07, + "loss": 0.2391, + "step": 50929 + }, + { + "epoch": 0.8800456179153995, + "grad_norm": 1.0824160561804175, + "learning_rate": 7.452499606157726e-07, + "loss": 0.2731, + "step": 50930 + }, + { + "epoch": 0.8800628974288084, + "grad_norm": 2.0725881715782584, + "learning_rate": 7.450379750075376e-07, + "loss": 0.3682, + "step": 50931 + }, + { + "epoch": 0.8800801769422173, + "grad_norm": 0.7794328450475702, + "learning_rate": 7.448260183865496e-07, + "loss": 0.162, + "step": 50932 + }, + { + "epoch": 0.8800974564556262, + "grad_norm": 1.3086151794355778, + "learning_rate": 7.446140907534704e-07, + "loss": 0.3734, + "step": 50933 + }, + { + "epoch": 0.8801147359690351, + "grad_norm": 1.917440551806738, + "learning_rate": 7.444021921089617e-07, + "loss": 0.1792, + "step": 50934 + }, + { + "epoch": 0.880132015482444, + "grad_norm": 1.1541524138465826, + "learning_rate": 7.441903224536873e-07, + "loss": 0.2541, + "step": 50935 + }, + { + "epoch": 0.8801492949958529, + "grad_norm": 0.8718778640338479, + "learning_rate": 7.439784817883133e-07, + "loss": 0.2873, + "step": 50936 + }, + { + "epoch": 0.8801665745092618, + "grad_norm": 0.5715947589327438, + "learning_rate": 7.437666701135038e-07, + "loss": 0.4498, + "step": 50937 + }, + { + "epoch": 0.8801838540226707, + "grad_norm": 1.2047561228894814, + "learning_rate": 7.435548874299181e-07, + "loss": 0.5324, + "step": 50938 + }, + { + "epoch": 0.8802011335360796, + "grad_norm": 0.6448429607137868, + "learning_rate": 7.433431337382246e-07, + "loss": 0.857, + "step": 50939 + }, + { + "epoch": 0.8802184130494886, + "grad_norm": 1.6730052275291147, + "learning_rate": 7.431314090390817e-07, + "loss": 0.3364, + "step": 50940 + }, + { + "epoch": 0.8802356925628975, + "grad_norm": 2.075220684145619, + "learning_rate": 7.429197133331545e-07, + "loss": 0.3505, + "step": 50941 + }, + { + "epoch": 0.8802529720763064, + "grad_norm": 1.5142831463111903, + "learning_rate": 7.42708046621109e-07, + "loss": 0.6476, + "step": 50942 + }, + { + "epoch": 0.8802702515897153, + "grad_norm": 1.49419195660319, + "learning_rate": 7.424964089036024e-07, + "loss": 0.6916, + "step": 50943 + }, + { + "epoch": 0.8802875311031242, + "grad_norm": 1.3902025617742535, + "learning_rate": 7.422848001813033e-07, + "loss": 0.5198, + "step": 50944 + }, + { + "epoch": 0.880304810616533, + "grad_norm": 1.2194765598459885, + "learning_rate": 7.420732204548697e-07, + "loss": 0.2715, + "step": 50945 + }, + { + "epoch": 0.8803220901299419, + "grad_norm": 1.0409740849481883, + "learning_rate": 7.418616697249659e-07, + "loss": 0.3427, + "step": 50946 + }, + { + "epoch": 0.8803393696433508, + "grad_norm": 1.6818327537122764, + "learning_rate": 7.416501479922555e-07, + "loss": 0.2814, + "step": 50947 + }, + { + "epoch": 0.8803566491567597, + "grad_norm": 1.4781642068227778, + "learning_rate": 7.414386552574015e-07, + "loss": 0.2828, + "step": 50948 + }, + { + "epoch": 0.8803739286701686, + "grad_norm": 2.41474422536008, + "learning_rate": 7.412271915210644e-07, + "loss": 0.2624, + "step": 50949 + }, + { + "epoch": 0.8803912081835775, + "grad_norm": 1.698385251455708, + "learning_rate": 7.410157567839082e-07, + "loss": 0.3156, + "step": 50950 + }, + { + "epoch": 0.8804084876969864, + "grad_norm": 2.4375484697206184, + "learning_rate": 7.408043510465935e-07, + "loss": 0.2635, + "step": 50951 + }, + { + "epoch": 0.8804257672103953, + "grad_norm": 1.712539309283041, + "learning_rate": 7.405929743097828e-07, + "loss": 0.4108, + "step": 50952 + }, + { + "epoch": 0.8804430467238042, + "grad_norm": 1.1117780243019557, + "learning_rate": 7.403816265741404e-07, + "loss": 0.6557, + "step": 50953 + }, + { + "epoch": 0.8804603262372132, + "grad_norm": 1.0148257279516828, + "learning_rate": 7.401703078403244e-07, + "loss": 0.3178, + "step": 50954 + }, + { + "epoch": 0.8804776057506221, + "grad_norm": 0.8456862390618545, + "learning_rate": 7.399590181090011e-07, + "loss": 0.405, + "step": 50955 + }, + { + "epoch": 0.880494885264031, + "grad_norm": 0.5178744353500365, + "learning_rate": 7.397477573808276e-07, + "loss": 0.6467, + "step": 50956 + }, + { + "epoch": 0.8805121647774399, + "grad_norm": 1.1547383269087714, + "learning_rate": 7.395365256564701e-07, + "loss": 0.3249, + "step": 50957 + }, + { + "epoch": 0.8805294442908488, + "grad_norm": 1.2192612539991086, + "learning_rate": 7.393253229365849e-07, + "loss": 0.4496, + "step": 50958 + }, + { + "epoch": 0.8805467238042577, + "grad_norm": 1.1700115188656097, + "learning_rate": 7.39114149221839e-07, + "loss": 0.22, + "step": 50959 + }, + { + "epoch": 0.8805640033176666, + "grad_norm": 0.8790742347600179, + "learning_rate": 7.389030045128898e-07, + "loss": 0.4344, + "step": 50960 + }, + { + "epoch": 0.8805812828310755, + "grad_norm": 1.0679381753298784, + "learning_rate": 7.386918888104022e-07, + "loss": 0.322, + "step": 50961 + }, + { + "epoch": 0.8805985623444844, + "grad_norm": 1.0565493213066566, + "learning_rate": 7.384808021150358e-07, + "loss": 0.5585, + "step": 50962 + }, + { + "epoch": 0.8806158418578933, + "grad_norm": 0.5495250594248472, + "learning_rate": 7.38269744427449e-07, + "loss": 0.8893, + "step": 50963 + }, + { + "epoch": 0.8806331213713022, + "grad_norm": 1.1801266453510997, + "learning_rate": 7.380587157483055e-07, + "loss": 0.2995, + "step": 50964 + }, + { + "epoch": 0.8806504008847111, + "grad_norm": 1.1548759553577312, + "learning_rate": 7.37847716078266e-07, + "loss": 0.4346, + "step": 50965 + }, + { + "epoch": 0.8806676803981199, + "grad_norm": 0.8079836481675868, + "learning_rate": 7.376367454179923e-07, + "loss": 0.2062, + "step": 50966 + }, + { + "epoch": 0.8806849599115288, + "grad_norm": 1.1550482946283755, + "learning_rate": 7.374258037681436e-07, + "loss": 0.2348, + "step": 50967 + }, + { + "epoch": 0.8807022394249377, + "grad_norm": 1.3608281284414034, + "learning_rate": 7.37214891129383e-07, + "loss": 0.2688, + "step": 50968 + }, + { + "epoch": 0.8807195189383467, + "grad_norm": 0.8610851511145268, + "learning_rate": 7.370040075023666e-07, + "loss": 0.7405, + "step": 50969 + }, + { + "epoch": 0.8807367984517556, + "grad_norm": 2.0457845744045806, + "learning_rate": 7.367931528877581e-07, + "loss": 0.3663, + "step": 50970 + }, + { + "epoch": 0.8807540779651645, + "grad_norm": 1.04638850817204, + "learning_rate": 7.365823272862183e-07, + "loss": 0.2907, + "step": 50971 + }, + { + "epoch": 0.8807713574785734, + "grad_norm": 0.9721327324930479, + "learning_rate": 7.363715306984065e-07, + "loss": 0.3322, + "step": 50972 + }, + { + "epoch": 0.8807886369919823, + "grad_norm": 1.0565596426781532, + "learning_rate": 7.361607631249845e-07, + "loss": 0.2755, + "step": 50973 + }, + { + "epoch": 0.8808059165053912, + "grad_norm": 1.6787017675584233, + "learning_rate": 7.359500245666096e-07, + "loss": 0.4207, + "step": 50974 + }, + { + "epoch": 0.8808231960188001, + "grad_norm": 0.9313868939306897, + "learning_rate": 7.357393150239422e-07, + "loss": 0.3837, + "step": 50975 + }, + { + "epoch": 0.880840475532209, + "grad_norm": 1.2688927978214217, + "learning_rate": 7.355286344976431e-07, + "loss": 0.3737, + "step": 50976 + }, + { + "epoch": 0.8808577550456179, + "grad_norm": 1.5218427430465045, + "learning_rate": 7.35317982988375e-07, + "loss": 0.4308, + "step": 50977 + }, + { + "epoch": 0.8808750345590268, + "grad_norm": 1.3653182998394704, + "learning_rate": 7.351073604967928e-07, + "loss": 0.2908, + "step": 50978 + }, + { + "epoch": 0.8808923140724357, + "grad_norm": 1.1481431507990416, + "learning_rate": 7.348967670235607e-07, + "loss": 0.1434, + "step": 50979 + }, + { + "epoch": 0.8809095935858446, + "grad_norm": 1.217386218762168, + "learning_rate": 7.346862025693346e-07, + "loss": 0.2965, + "step": 50980 + }, + { + "epoch": 0.8809268730992535, + "grad_norm": 1.4703230790802144, + "learning_rate": 7.344756671347741e-07, + "loss": 0.3582, + "step": 50981 + }, + { + "epoch": 0.8809441526126625, + "grad_norm": 1.88174630829018, + "learning_rate": 7.342651607205409e-07, + "loss": 0.5253, + "step": 50982 + }, + { + "epoch": 0.8809614321260714, + "grad_norm": 1.507828221433635, + "learning_rate": 7.340546833272955e-07, + "loss": 0.4516, + "step": 50983 + }, + { + "epoch": 0.8809787116394803, + "grad_norm": 0.906257073072897, + "learning_rate": 7.338442349556941e-07, + "loss": 0.225, + "step": 50984 + }, + { + "epoch": 0.8809959911528892, + "grad_norm": 1.1240520662426918, + "learning_rate": 7.33633815606396e-07, + "loss": 0.2533, + "step": 50985 + }, + { + "epoch": 0.8810132706662981, + "grad_norm": 1.630435399003151, + "learning_rate": 7.334234252800598e-07, + "loss": 0.4169, + "step": 50986 + }, + { + "epoch": 0.881030550179707, + "grad_norm": 1.3072166533060585, + "learning_rate": 7.332130639773471e-07, + "loss": 0.388, + "step": 50987 + }, + { + "epoch": 0.8810478296931158, + "grad_norm": 0.9676506279073023, + "learning_rate": 7.330027316989163e-07, + "loss": 0.3364, + "step": 50988 + }, + { + "epoch": 0.8810651092065247, + "grad_norm": 1.6426962497090787, + "learning_rate": 7.327924284454236e-07, + "loss": 0.5466, + "step": 50989 + }, + { + "epoch": 0.8810823887199336, + "grad_norm": 1.2955503388295881, + "learning_rate": 7.325821542175304e-07, + "loss": 0.4414, + "step": 50990 + }, + { + "epoch": 0.8810996682333425, + "grad_norm": 1.2668784571019112, + "learning_rate": 7.323719090158931e-07, + "loss": 0.5126, + "step": 50991 + }, + { + "epoch": 0.8811169477467514, + "grad_norm": 1.435288428364712, + "learning_rate": 7.321616928411712e-07, + "loss": 0.2524, + "step": 50992 + }, + { + "epoch": 0.8811342272601603, + "grad_norm": 1.0593414887294423, + "learning_rate": 7.319515056940252e-07, + "loss": 0.5237, + "step": 50993 + }, + { + "epoch": 0.8811515067735692, + "grad_norm": 1.355665035423485, + "learning_rate": 7.31741347575109e-07, + "loss": 0.3363, + "step": 50994 + }, + { + "epoch": 0.8811687862869781, + "grad_norm": 1.2572170402446055, + "learning_rate": 7.315312184850854e-07, + "loss": 0.4552, + "step": 50995 + }, + { + "epoch": 0.881186065800387, + "grad_norm": 1.6075239149198954, + "learning_rate": 7.313211184246082e-07, + "loss": 0.363, + "step": 50996 + }, + { + "epoch": 0.881203345313796, + "grad_norm": 1.523471743732482, + "learning_rate": 7.311110473943406e-07, + "loss": 0.4878, + "step": 50997 + }, + { + "epoch": 0.8812206248272049, + "grad_norm": 1.3285206333826747, + "learning_rate": 7.309010053949328e-07, + "loss": 0.2853, + "step": 50998 + }, + { + "epoch": 0.8812379043406138, + "grad_norm": 1.0749108433862649, + "learning_rate": 7.306909924270522e-07, + "loss": 0.2887, + "step": 50999 + }, + { + "epoch": 0.8812551838540227, + "grad_norm": 2.210228978512655, + "learning_rate": 7.304810084913494e-07, + "loss": 0.4493, + "step": 51000 + }, + { + "epoch": 0.8812724633674316, + "grad_norm": 1.475439713658066, + "learning_rate": 7.30271053588486e-07, + "loss": 0.2932, + "step": 51001 + }, + { + "epoch": 0.8812897428808405, + "grad_norm": 0.9174913992506558, + "learning_rate": 7.300611277191172e-07, + "loss": 0.2519, + "step": 51002 + }, + { + "epoch": 0.8813070223942494, + "grad_norm": 1.3407953049573729, + "learning_rate": 7.298512308839023e-07, + "loss": 0.5607, + "step": 51003 + }, + { + "epoch": 0.8813243019076583, + "grad_norm": 1.2106100007698144, + "learning_rate": 7.296413630834965e-07, + "loss": 0.4911, + "step": 51004 + }, + { + "epoch": 0.8813415814210672, + "grad_norm": 2.000030757250259, + "learning_rate": 7.294315243185579e-07, + "loss": 0.1421, + "step": 51005 + }, + { + "epoch": 0.8813588609344761, + "grad_norm": 1.6528166780617708, + "learning_rate": 7.292217145897474e-07, + "loss": 0.6999, + "step": 51006 + }, + { + "epoch": 0.881376140447885, + "grad_norm": 0.8927748887578872, + "learning_rate": 7.290119338977164e-07, + "loss": 0.265, + "step": 51007 + }, + { + "epoch": 0.881393419961294, + "grad_norm": 1.307126894163031, + "learning_rate": 7.288021822431258e-07, + "loss": 0.302, + "step": 51008 + }, + { + "epoch": 0.8814106994747027, + "grad_norm": 1.2212710788268741, + "learning_rate": 7.285924596266303e-07, + "loss": 0.4109, + "step": 51009 + }, + { + "epoch": 0.8814279789881116, + "grad_norm": 1.0672308614526331, + "learning_rate": 7.283827660488885e-07, + "loss": 0.2655, + "step": 51010 + }, + { + "epoch": 0.8814452585015206, + "grad_norm": 0.7952773331133948, + "learning_rate": 7.281731015105553e-07, + "loss": 0.6395, + "step": 51011 + }, + { + "epoch": 0.8814625380149295, + "grad_norm": 0.7800156598437128, + "learning_rate": 7.279634660122914e-07, + "loss": 0.2468, + "step": 51012 + }, + { + "epoch": 0.8814798175283384, + "grad_norm": 1.5504747454978518, + "learning_rate": 7.277538595547474e-07, + "loss": 0.3154, + "step": 51013 + }, + { + "epoch": 0.8814970970417473, + "grad_norm": 1.2706876561370741, + "learning_rate": 7.275442821385859e-07, + "loss": 0.409, + "step": 51014 + }, + { + "epoch": 0.8815143765551562, + "grad_norm": 1.2731022416037843, + "learning_rate": 7.273347337644588e-07, + "loss": 0.3538, + "step": 51015 + }, + { + "epoch": 0.8815316560685651, + "grad_norm": 1.4088079180031838, + "learning_rate": 7.271252144330232e-07, + "loss": 0.2623, + "step": 51016 + }, + { + "epoch": 0.881548935581974, + "grad_norm": 1.6777135066929711, + "learning_rate": 7.269157241449376e-07, + "loss": 0.3468, + "step": 51017 + }, + { + "epoch": 0.8815662150953829, + "grad_norm": 0.9075306022201711, + "learning_rate": 7.267062629008559e-07, + "loss": 0.4154, + "step": 51018 + }, + { + "epoch": 0.8815834946087918, + "grad_norm": 1.4822817224979263, + "learning_rate": 7.264968307014364e-07, + "loss": 0.3726, + "step": 51019 + }, + { + "epoch": 0.8816007741222007, + "grad_norm": 1.8752295118855493, + "learning_rate": 7.262874275473319e-07, + "loss": 0.5846, + "step": 51020 + }, + { + "epoch": 0.8816180536356096, + "grad_norm": 0.8954391406209692, + "learning_rate": 7.260780534391997e-07, + "loss": 0.3937, + "step": 51021 + }, + { + "epoch": 0.8816353331490185, + "grad_norm": 1.0517195834595552, + "learning_rate": 7.258687083776961e-07, + "loss": 0.5217, + "step": 51022 + }, + { + "epoch": 0.8816526126624274, + "grad_norm": 1.3913940081702745, + "learning_rate": 7.25659392363478e-07, + "loss": 0.4388, + "step": 51023 + }, + { + "epoch": 0.8816698921758364, + "grad_norm": 1.3173407160757449, + "learning_rate": 7.254501053971996e-07, + "loss": 0.3006, + "step": 51024 + }, + { + "epoch": 0.8816871716892453, + "grad_norm": 1.1928099664700236, + "learning_rate": 7.252408474795148e-07, + "loss": 0.6147, + "step": 51025 + }, + { + "epoch": 0.8817044512026542, + "grad_norm": 0.7553654699807029, + "learning_rate": 7.250316186110795e-07, + "loss": 0.2029, + "step": 51026 + }, + { + "epoch": 0.8817217307160631, + "grad_norm": 0.4499445482603647, + "learning_rate": 7.248224187925512e-07, + "loss": 0.5436, + "step": 51027 + }, + { + "epoch": 0.881739010229472, + "grad_norm": 1.5096704172015416, + "learning_rate": 7.246132480245849e-07, + "loss": 0.3215, + "step": 51028 + }, + { + "epoch": 0.8817562897428809, + "grad_norm": 1.856732778060226, + "learning_rate": 7.244041063078344e-07, + "loss": 0.3371, + "step": 51029 + }, + { + "epoch": 0.8817735692562897, + "grad_norm": 2.7008845685434637, + "learning_rate": 7.24194993642956e-07, + "loss": 0.4024, + "step": 51030 + }, + { + "epoch": 0.8817908487696986, + "grad_norm": 1.295423757990667, + "learning_rate": 7.239859100306013e-07, + "loss": 0.3207, + "step": 51031 + }, + { + "epoch": 0.8818081282831075, + "grad_norm": 0.9757870736829553, + "learning_rate": 7.237768554714286e-07, + "loss": 0.3262, + "step": 51032 + }, + { + "epoch": 0.8818254077965164, + "grad_norm": 0.7022144304976321, + "learning_rate": 7.23567829966092e-07, + "loss": 0.5307, + "step": 51033 + }, + { + "epoch": 0.8818426873099253, + "grad_norm": 0.9777258934204527, + "learning_rate": 7.233588335152475e-07, + "loss": 0.4828, + "step": 51034 + }, + { + "epoch": 0.8818599668233342, + "grad_norm": 1.1246309600385078, + "learning_rate": 7.23149866119548e-07, + "loss": 0.232, + "step": 51035 + }, + { + "epoch": 0.8818772463367431, + "grad_norm": 1.5665203699249544, + "learning_rate": 7.229409277796462e-07, + "loss": 0.3368, + "step": 51036 + }, + { + "epoch": 0.881894525850152, + "grad_norm": 1.364596253108148, + "learning_rate": 7.227320184961995e-07, + "loss": 0.3795, + "step": 51037 + }, + { + "epoch": 0.881911805363561, + "grad_norm": 0.9159864655211015, + "learning_rate": 7.225231382698594e-07, + "loss": 0.4632, + "step": 51038 + }, + { + "epoch": 0.8819290848769699, + "grad_norm": 1.1257839945972778, + "learning_rate": 7.223142871012845e-07, + "loss": 0.4147, + "step": 51039 + }, + { + "epoch": 0.8819463643903788, + "grad_norm": 1.8006637647247294, + "learning_rate": 7.221054649911241e-07, + "loss": 0.2417, + "step": 51040 + }, + { + "epoch": 0.8819636439037877, + "grad_norm": 1.5265712444935615, + "learning_rate": 7.218966719400367e-07, + "loss": 0.2167, + "step": 51041 + }, + { + "epoch": 0.8819809234171966, + "grad_norm": 1.342831143473104, + "learning_rate": 7.216879079486727e-07, + "loss": 0.3903, + "step": 51042 + }, + { + "epoch": 0.8819982029306055, + "grad_norm": 0.8820524736613029, + "learning_rate": 7.214791730176884e-07, + "loss": 0.5537, + "step": 51043 + }, + { + "epoch": 0.8820154824440144, + "grad_norm": 1.219598410819839, + "learning_rate": 7.212704671477333e-07, + "loss": 0.3165, + "step": 51044 + }, + { + "epoch": 0.8820327619574233, + "grad_norm": 1.135147504459992, + "learning_rate": 7.210617903394679e-07, + "loss": 0.4097, + "step": 51045 + }, + { + "epoch": 0.8820500414708322, + "grad_norm": 1.3595798777020072, + "learning_rate": 7.208531425935416e-07, + "loss": 0.2747, + "step": 51046 + }, + { + "epoch": 0.8820673209842411, + "grad_norm": 1.1599846582682583, + "learning_rate": 7.206445239106063e-07, + "loss": 0.4335, + "step": 51047 + }, + { + "epoch": 0.88208460049765, + "grad_norm": 1.6371183552950175, + "learning_rate": 7.204359342913203e-07, + "loss": 0.2515, + "step": 51048 + }, + { + "epoch": 0.8821018800110589, + "grad_norm": 1.4175493456695833, + "learning_rate": 7.202273737363319e-07, + "loss": 0.2815, + "step": 51049 + }, + { + "epoch": 0.8821191595244678, + "grad_norm": 1.3716909578935021, + "learning_rate": 7.200188422462972e-07, + "loss": 0.3278, + "step": 51050 + }, + { + "epoch": 0.8821364390378766, + "grad_norm": 0.9792349713288082, + "learning_rate": 7.198103398218681e-07, + "loss": 0.5963, + "step": 51051 + }, + { + "epoch": 0.8821537185512855, + "grad_norm": 1.981710378202034, + "learning_rate": 7.196018664637005e-07, + "loss": 0.3082, + "step": 51052 + }, + { + "epoch": 0.8821709980646945, + "grad_norm": 1.4656476806371583, + "learning_rate": 7.19393422172443e-07, + "loss": 0.2923, + "step": 51053 + }, + { + "epoch": 0.8821882775781034, + "grad_norm": 1.153128652804795, + "learning_rate": 7.191850069487527e-07, + "loss": 0.3375, + "step": 51054 + }, + { + "epoch": 0.8822055570915123, + "grad_norm": 0.9912337265256307, + "learning_rate": 7.18976620793278e-07, + "loss": 0.3137, + "step": 51055 + }, + { + "epoch": 0.8822228366049212, + "grad_norm": 1.062148418007925, + "learning_rate": 7.187682637066751e-07, + "loss": 0.2487, + "step": 51056 + }, + { + "epoch": 0.8822401161183301, + "grad_norm": 1.3687823723527024, + "learning_rate": 7.185599356895956e-07, + "loss": 0.2445, + "step": 51057 + }, + { + "epoch": 0.882257395631739, + "grad_norm": 1.5117122026203829, + "learning_rate": 7.183516367426913e-07, + "loss": 0.3921, + "step": 51058 + }, + { + "epoch": 0.8822746751451479, + "grad_norm": 2.750949247697787, + "learning_rate": 7.181433668666161e-07, + "loss": 0.2185, + "step": 51059 + }, + { + "epoch": 0.8822919546585568, + "grad_norm": 1.3138631236170737, + "learning_rate": 7.179351260620193e-07, + "loss": 0.4447, + "step": 51060 + }, + { + "epoch": 0.8823092341719657, + "grad_norm": 1.205462792473296, + "learning_rate": 7.177269143295551e-07, + "loss": 0.3837, + "step": 51061 + }, + { + "epoch": 0.8823265136853746, + "grad_norm": 1.341528622253187, + "learning_rate": 7.175187316698762e-07, + "loss": 0.3348, + "step": 51062 + }, + { + "epoch": 0.8823437931987835, + "grad_norm": 0.9447841953582058, + "learning_rate": 7.173105780836353e-07, + "loss": 0.3204, + "step": 51063 + }, + { + "epoch": 0.8823610727121924, + "grad_norm": 2.2158690833067083, + "learning_rate": 7.17102453571481e-07, + "loss": 0.2103, + "step": 51064 + }, + { + "epoch": 0.8823783522256013, + "grad_norm": 0.9671520818248455, + "learning_rate": 7.168943581340704e-07, + "loss": 0.254, + "step": 51065 + }, + { + "epoch": 0.8823956317390103, + "grad_norm": 1.5450926926916253, + "learning_rate": 7.166862917720497e-07, + "loss": 0.3384, + "step": 51066 + }, + { + "epoch": 0.8824129112524192, + "grad_norm": 1.3104567190095706, + "learning_rate": 7.164782544860726e-07, + "loss": 0.3291, + "step": 51067 + }, + { + "epoch": 0.8824301907658281, + "grad_norm": 1.4233582290995628, + "learning_rate": 7.162702462767934e-07, + "loss": 0.4632, + "step": 51068 + }, + { + "epoch": 0.882447470279237, + "grad_norm": 1.3945227055258174, + "learning_rate": 7.160622671448592e-07, + "loss": 0.2332, + "step": 51069 + }, + { + "epoch": 0.8824647497926459, + "grad_norm": 1.4291423753190275, + "learning_rate": 7.15854317090925e-07, + "loss": 0.2761, + "step": 51070 + }, + { + "epoch": 0.8824820293060548, + "grad_norm": 1.8279517552179652, + "learning_rate": 7.156463961156401e-07, + "loss": 0.3962, + "step": 51071 + }, + { + "epoch": 0.8824993088194636, + "grad_norm": 1.3772765129312832, + "learning_rate": 7.154385042196554e-07, + "loss": 0.4116, + "step": 51072 + }, + { + "epoch": 0.8825165883328725, + "grad_norm": 1.1207708271897425, + "learning_rate": 7.152306414036226e-07, + "loss": 0.2005, + "step": 51073 + }, + { + "epoch": 0.8825338678462814, + "grad_norm": 2.087751089459651, + "learning_rate": 7.150228076681954e-07, + "loss": 0.4665, + "step": 51074 + }, + { + "epoch": 0.8825511473596903, + "grad_norm": 1.2601022939414601, + "learning_rate": 7.148150030140211e-07, + "loss": 0.4463, + "step": 51075 + }, + { + "epoch": 0.8825684268730992, + "grad_norm": 1.0474037121827038, + "learning_rate": 7.146072274417526e-07, + "loss": 0.3556, + "step": 51076 + }, + { + "epoch": 0.8825857063865081, + "grad_norm": 0.9151695532417997, + "learning_rate": 7.143994809520405e-07, + "loss": 0.2914, + "step": 51077 + }, + { + "epoch": 0.882602985899917, + "grad_norm": 1.2237672072029389, + "learning_rate": 7.141917635455309e-07, + "loss": 0.385, + "step": 51078 + }, + { + "epoch": 0.8826202654133259, + "grad_norm": 0.7584697279844381, + "learning_rate": 7.139840752228821e-07, + "loss": 0.7743, + "step": 51079 + }, + { + "epoch": 0.8826375449267349, + "grad_norm": 1.1157576373623475, + "learning_rate": 7.137764159847394e-07, + "loss": 0.4009, + "step": 51080 + }, + { + "epoch": 0.8826548244401438, + "grad_norm": 1.3428298320964, + "learning_rate": 7.135687858317564e-07, + "loss": 0.3239, + "step": 51081 + }, + { + "epoch": 0.8826721039535527, + "grad_norm": 0.9768296676822683, + "learning_rate": 7.133611847645805e-07, + "loss": 0.4712, + "step": 51082 + }, + { + "epoch": 0.8826893834669616, + "grad_norm": 2.1428215474742194, + "learning_rate": 7.131536127838634e-07, + "loss": 0.3814, + "step": 51083 + }, + { + "epoch": 0.8827066629803705, + "grad_norm": 1.4101200391803503, + "learning_rate": 7.129460698902535e-07, + "loss": 0.3566, + "step": 51084 + }, + { + "epoch": 0.8827239424937794, + "grad_norm": 1.0469003345851378, + "learning_rate": 7.127385560844035e-07, + "loss": 0.5059, + "step": 51085 + }, + { + "epoch": 0.8827412220071883, + "grad_norm": 1.2114739868390654, + "learning_rate": 7.125310713669642e-07, + "loss": 0.376, + "step": 51086 + }, + { + "epoch": 0.8827585015205972, + "grad_norm": 0.9039334720760905, + "learning_rate": 7.123236157385805e-07, + "loss": 0.1573, + "step": 51087 + }, + { + "epoch": 0.8827757810340061, + "grad_norm": 1.492477839896376, + "learning_rate": 7.121161891999073e-07, + "loss": 0.5326, + "step": 51088 + }, + { + "epoch": 0.882793060547415, + "grad_norm": 4.554393633559206, + "learning_rate": 7.119087917515898e-07, + "loss": 0.2989, + "step": 51089 + }, + { + "epoch": 0.8828103400608239, + "grad_norm": 1.7536736196118823, + "learning_rate": 7.117014233942798e-07, + "loss": 0.3591, + "step": 51090 + }, + { + "epoch": 0.8828276195742328, + "grad_norm": 2.0930588109536132, + "learning_rate": 7.114940841286278e-07, + "loss": 0.4419, + "step": 51091 + }, + { + "epoch": 0.8828448990876417, + "grad_norm": 1.1461942139490646, + "learning_rate": 7.112867739552831e-07, + "loss": 0.4889, + "step": 51092 + }, + { + "epoch": 0.8828621786010505, + "grad_norm": 1.8883564131160788, + "learning_rate": 7.110794928748921e-07, + "loss": 0.2494, + "step": 51093 + }, + { + "epoch": 0.8828794581144594, + "grad_norm": 1.43379628925388, + "learning_rate": 7.108722408881086e-07, + "loss": 0.2608, + "step": 51094 + }, + { + "epoch": 0.8828967376278684, + "grad_norm": 1.0201117621431994, + "learning_rate": 7.106650179955766e-07, + "loss": 0.5583, + "step": 51095 + }, + { + "epoch": 0.8829140171412773, + "grad_norm": 1.8031415464050393, + "learning_rate": 7.104578241979488e-07, + "loss": 0.437, + "step": 51096 + }, + { + "epoch": 0.8829312966546862, + "grad_norm": 1.0304864266220575, + "learning_rate": 7.102506594958736e-07, + "loss": 0.2258, + "step": 51097 + }, + { + "epoch": 0.8829485761680951, + "grad_norm": 0.6432020804363517, + "learning_rate": 7.100435238899983e-07, + "loss": 0.7368, + "step": 51098 + }, + { + "epoch": 0.882965855681504, + "grad_norm": 3.466924714372678, + "learning_rate": 7.098364173809746e-07, + "loss": 0.3433, + "step": 51099 + }, + { + "epoch": 0.8829831351949129, + "grad_norm": 1.1316284298472599, + "learning_rate": 7.096293399694476e-07, + "loss": 0.2979, + "step": 51100 + }, + { + "epoch": 0.8830004147083218, + "grad_norm": 1.4242299397927458, + "learning_rate": 7.094222916560678e-07, + "loss": 0.3586, + "step": 51101 + }, + { + "epoch": 0.8830176942217307, + "grad_norm": 1.4183700163770903, + "learning_rate": 7.092152724414825e-07, + "loss": 0.2486, + "step": 51102 + }, + { + "epoch": 0.8830349737351396, + "grad_norm": 0.9490704024413456, + "learning_rate": 7.090082823263434e-07, + "loss": 0.2607, + "step": 51103 + }, + { + "epoch": 0.8830522532485485, + "grad_norm": 1.0723578824355269, + "learning_rate": 7.088013213112943e-07, + "loss": 0.3173, + "step": 51104 + }, + { + "epoch": 0.8830695327619574, + "grad_norm": 2.0751919777782026, + "learning_rate": 7.085943893969871e-07, + "loss": 0.3663, + "step": 51105 + }, + { + "epoch": 0.8830868122753663, + "grad_norm": 1.0589159331378697, + "learning_rate": 7.083874865840679e-07, + "loss": 0.2969, + "step": 51106 + }, + { + "epoch": 0.8831040917887752, + "grad_norm": 1.241781867590941, + "learning_rate": 7.081806128731838e-07, + "loss": 0.4559, + "step": 51107 + }, + { + "epoch": 0.8831213713021842, + "grad_norm": 1.661158188592898, + "learning_rate": 7.079737682649868e-07, + "loss": 0.3986, + "step": 51108 + }, + { + "epoch": 0.8831386508155931, + "grad_norm": 1.3392723667321933, + "learning_rate": 7.077669527601205e-07, + "loss": 0.5033, + "step": 51109 + }, + { + "epoch": 0.883155930329002, + "grad_norm": 1.492721353871168, + "learning_rate": 7.075601663592347e-07, + "loss": 0.4049, + "step": 51110 + }, + { + "epoch": 0.8831732098424109, + "grad_norm": 1.1944985539606998, + "learning_rate": 7.073534090629763e-07, + "loss": 0.4521, + "step": 51111 + }, + { + "epoch": 0.8831904893558198, + "grad_norm": 1.3243447315036172, + "learning_rate": 7.071466808719929e-07, + "loss": 0.3417, + "step": 51112 + }, + { + "epoch": 0.8832077688692287, + "grad_norm": 0.9552433302241231, + "learning_rate": 7.069399817869327e-07, + "loss": 0.2072, + "step": 51113 + }, + { + "epoch": 0.8832250483826375, + "grad_norm": 1.3167683277271798, + "learning_rate": 7.067333118084429e-07, + "loss": 0.4056, + "step": 51114 + }, + { + "epoch": 0.8832423278960464, + "grad_norm": 1.3271343190820988, + "learning_rate": 7.06526670937171e-07, + "loss": 0.3645, + "step": 51115 + }, + { + "epoch": 0.8832596074094553, + "grad_norm": 0.8803588649224936, + "learning_rate": 7.06320059173764e-07, + "loss": 0.5703, + "step": 51116 + }, + { + "epoch": 0.8832768869228642, + "grad_norm": 0.9182452731771467, + "learning_rate": 7.061134765188693e-07, + "loss": 0.3266, + "step": 51117 + }, + { + "epoch": 0.8832941664362731, + "grad_norm": 1.3082567761983257, + "learning_rate": 7.059069229731307e-07, + "loss": 0.311, + "step": 51118 + }, + { + "epoch": 0.883311445949682, + "grad_norm": 1.247833882949669, + "learning_rate": 7.057003985372002e-07, + "loss": 0.3313, + "step": 51119 + }, + { + "epoch": 0.8833287254630909, + "grad_norm": 1.3038995811162835, + "learning_rate": 7.054939032117214e-07, + "loss": 0.4521, + "step": 51120 + }, + { + "epoch": 0.8833460049764998, + "grad_norm": 1.2147691961401201, + "learning_rate": 7.052874369973439e-07, + "loss": 0.4531, + "step": 51121 + }, + { + "epoch": 0.8833632844899088, + "grad_norm": 1.1706717700453002, + "learning_rate": 7.050809998947106e-07, + "loss": 0.4116, + "step": 51122 + }, + { + "epoch": 0.8833805640033177, + "grad_norm": 1.5783934438264848, + "learning_rate": 7.048745919044719e-07, + "loss": 0.3316, + "step": 51123 + }, + { + "epoch": 0.8833978435167266, + "grad_norm": 1.3651650266115773, + "learning_rate": 7.046682130272686e-07, + "loss": 0.191, + "step": 51124 + }, + { + "epoch": 0.8834151230301355, + "grad_norm": 0.8573196344574424, + "learning_rate": 7.044618632637545e-07, + "loss": 0.2978, + "step": 51125 + }, + { + "epoch": 0.8834324025435444, + "grad_norm": 1.023476594633226, + "learning_rate": 7.042555426145703e-07, + "loss": 0.2785, + "step": 51126 + }, + { + "epoch": 0.8834496820569533, + "grad_norm": 1.3472859655175364, + "learning_rate": 7.040492510803665e-07, + "loss": 0.4506, + "step": 51127 + }, + { + "epoch": 0.8834669615703622, + "grad_norm": 0.9286186277383983, + "learning_rate": 7.038429886617859e-07, + "loss": 0.8523, + "step": 51128 + }, + { + "epoch": 0.8834842410837711, + "grad_norm": 1.586821627707295, + "learning_rate": 7.036367553594747e-07, + "loss": 0.3156, + "step": 51129 + }, + { + "epoch": 0.88350152059718, + "grad_norm": 1.2000869718130818, + "learning_rate": 7.034305511740802e-07, + "loss": 0.2864, + "step": 51130 + }, + { + "epoch": 0.8835188001105889, + "grad_norm": 3.6154653831344095, + "learning_rate": 7.032243761062463e-07, + "loss": 0.4029, + "step": 51131 + }, + { + "epoch": 0.8835360796239978, + "grad_norm": 1.2292491248342063, + "learning_rate": 7.030182301566224e-07, + "loss": 0.4017, + "step": 51132 + }, + { + "epoch": 0.8835533591374067, + "grad_norm": 1.0586196683185713, + "learning_rate": 7.028121133258503e-07, + "loss": 0.3491, + "step": 51133 + }, + { + "epoch": 0.8835706386508156, + "grad_norm": 0.6957534242960113, + "learning_rate": 7.026060256145795e-07, + "loss": 0.7819, + "step": 51134 + }, + { + "epoch": 0.8835879181642246, + "grad_norm": 3.3431218338369626, + "learning_rate": 7.023999670234516e-07, + "loss": 0.3345, + "step": 51135 + }, + { + "epoch": 0.8836051976776333, + "grad_norm": 0.6337893553099848, + "learning_rate": 7.021939375531128e-07, + "loss": 0.2922, + "step": 51136 + }, + { + "epoch": 0.8836224771910423, + "grad_norm": 1.3067786122403708, + "learning_rate": 7.019879372042094e-07, + "loss": 0.4274, + "step": 51137 + }, + { + "epoch": 0.8836397567044512, + "grad_norm": 0.7361223899629183, + "learning_rate": 7.017819659773884e-07, + "loss": 0.2365, + "step": 51138 + }, + { + "epoch": 0.8836570362178601, + "grad_norm": 1.0431931036007593, + "learning_rate": 7.015760238732916e-07, + "loss": 0.2813, + "step": 51139 + }, + { + "epoch": 0.883674315731269, + "grad_norm": 1.0754757324422535, + "learning_rate": 7.013701108925653e-07, + "loss": 0.3641, + "step": 51140 + }, + { + "epoch": 0.8836915952446779, + "grad_norm": 1.1673204291876988, + "learning_rate": 7.011642270358543e-07, + "loss": 0.3903, + "step": 51141 + }, + { + "epoch": 0.8837088747580868, + "grad_norm": 1.6831548005741137, + "learning_rate": 7.009583723038027e-07, + "loss": 0.2811, + "step": 51142 + }, + { + "epoch": 0.8837261542714957, + "grad_norm": 1.3238691120704498, + "learning_rate": 7.007525466970588e-07, + "loss": 0.3962, + "step": 51143 + }, + { + "epoch": 0.8837434337849046, + "grad_norm": 1.3532449840786138, + "learning_rate": 7.005467502162622e-07, + "loss": 0.2511, + "step": 51144 + }, + { + "epoch": 0.8837607132983135, + "grad_norm": 1.263723342689651, + "learning_rate": 7.003409828620622e-07, + "loss": 0.4778, + "step": 51145 + }, + { + "epoch": 0.8837779928117224, + "grad_norm": 1.3454127031837, + "learning_rate": 7.001352446350984e-07, + "loss": 0.3864, + "step": 51146 + }, + { + "epoch": 0.8837952723251313, + "grad_norm": 1.2408349490061923, + "learning_rate": 6.999295355360191e-07, + "loss": 0.4117, + "step": 51147 + }, + { + "epoch": 0.8838125518385402, + "grad_norm": 2.219627103658843, + "learning_rate": 6.997238555654684e-07, + "loss": 0.3462, + "step": 51148 + }, + { + "epoch": 0.8838298313519491, + "grad_norm": 1.0764767120569718, + "learning_rate": 6.995182047240878e-07, + "loss": 0.266, + "step": 51149 + }, + { + "epoch": 0.883847110865358, + "grad_norm": 0.9956855753947277, + "learning_rate": 6.993125830125236e-07, + "loss": 0.2773, + "step": 51150 + }, + { + "epoch": 0.883864390378767, + "grad_norm": 1.161145230260071, + "learning_rate": 6.991069904314185e-07, + "loss": 0.1977, + "step": 51151 + }, + { + "epoch": 0.8838816698921759, + "grad_norm": 0.832803902948882, + "learning_rate": 6.989014269814176e-07, + "loss": 0.7563, + "step": 51152 + }, + { + "epoch": 0.8838989494055848, + "grad_norm": 1.148502519124774, + "learning_rate": 6.986958926631637e-07, + "loss": 0.277, + "step": 51153 + }, + { + "epoch": 0.8839162289189937, + "grad_norm": 0.7506248201340061, + "learning_rate": 6.98490387477303e-07, + "loss": 0.5807, + "step": 51154 + }, + { + "epoch": 0.8839335084324026, + "grad_norm": 1.669375809828855, + "learning_rate": 6.98284911424475e-07, + "loss": 0.515, + "step": 51155 + }, + { + "epoch": 0.8839507879458115, + "grad_norm": 1.462795246420879, + "learning_rate": 6.98079464505328e-07, + "loss": 0.3082, + "step": 51156 + }, + { + "epoch": 0.8839680674592203, + "grad_norm": 1.1374563443505443, + "learning_rate": 6.978740467205003e-07, + "loss": 0.3389, + "step": 51157 + }, + { + "epoch": 0.8839853469726292, + "grad_norm": 1.1651095728964787, + "learning_rate": 6.976686580706393e-07, + "loss": 0.4163, + "step": 51158 + }, + { + "epoch": 0.8840026264860381, + "grad_norm": 2.4007751935627004, + "learning_rate": 6.974632985563878e-07, + "loss": 0.3861, + "step": 51159 + }, + { + "epoch": 0.884019905999447, + "grad_norm": 1.398851986339958, + "learning_rate": 6.972579681783875e-07, + "loss": 0.4686, + "step": 51160 + }, + { + "epoch": 0.8840371855128559, + "grad_norm": 0.8188442111355454, + "learning_rate": 6.970526669372835e-07, + "loss": 0.2972, + "step": 51161 + }, + { + "epoch": 0.8840544650262648, + "grad_norm": 1.6662272064827803, + "learning_rate": 6.968473948337151e-07, + "loss": 0.3927, + "step": 51162 + }, + { + "epoch": 0.8840717445396737, + "grad_norm": 2.181133234800817, + "learning_rate": 6.966421518683297e-07, + "loss": 0.2598, + "step": 51163 + }, + { + "epoch": 0.8840890240530827, + "grad_norm": 1.290736578707865, + "learning_rate": 6.964369380417668e-07, + "loss": 0.2917, + "step": 51164 + }, + { + "epoch": 0.8841063035664916, + "grad_norm": 1.8011581580597462, + "learning_rate": 6.962317533546703e-07, + "loss": 0.3532, + "step": 51165 + }, + { + "epoch": 0.8841235830799005, + "grad_norm": 0.8168392036133284, + "learning_rate": 6.96026597807683e-07, + "loss": 0.1862, + "step": 51166 + }, + { + "epoch": 0.8841408625933094, + "grad_norm": 0.766734805072115, + "learning_rate": 6.958214714014499e-07, + "loss": 0.2003, + "step": 51167 + }, + { + "epoch": 0.8841581421067183, + "grad_norm": 1.429093097952753, + "learning_rate": 6.956163741366095e-07, + "loss": 0.3334, + "step": 51168 + }, + { + "epoch": 0.8841754216201272, + "grad_norm": 1.1309360961573427, + "learning_rate": 6.954113060138057e-07, + "loss": 0.345, + "step": 51169 + }, + { + "epoch": 0.8841927011335361, + "grad_norm": 1.1200569911186518, + "learning_rate": 6.952062670336801e-07, + "loss": 0.3886, + "step": 51170 + }, + { + "epoch": 0.884209980646945, + "grad_norm": 1.1867691639347258, + "learning_rate": 6.950012571968756e-07, + "loss": 0.3488, + "step": 51171 + }, + { + "epoch": 0.8842272601603539, + "grad_norm": 0.9107076211350634, + "learning_rate": 6.94796276504035e-07, + "loss": 0.3098, + "step": 51172 + }, + { + "epoch": 0.8842445396737628, + "grad_norm": 1.2333310850452195, + "learning_rate": 6.945913249557989e-07, + "loss": 0.3518, + "step": 51173 + }, + { + "epoch": 0.8842618191871717, + "grad_norm": 1.1814946877009858, + "learning_rate": 6.943864025528124e-07, + "loss": 0.3221, + "step": 51174 + }, + { + "epoch": 0.8842790987005806, + "grad_norm": 1.3534852785990337, + "learning_rate": 6.941815092957116e-07, + "loss": 0.3838, + "step": 51175 + }, + { + "epoch": 0.8842963782139895, + "grad_norm": 1.6142948495547353, + "learning_rate": 6.939766451851426e-07, + "loss": 0.3104, + "step": 51176 + }, + { + "epoch": 0.8843136577273984, + "grad_norm": 1.2744676954612122, + "learning_rate": 6.937718102217461e-07, + "loss": 0.2676, + "step": 51177 + }, + { + "epoch": 0.8843309372408072, + "grad_norm": 1.9096319809440314, + "learning_rate": 6.935670044061649e-07, + "loss": 0.3374, + "step": 51178 + }, + { + "epoch": 0.8843482167542162, + "grad_norm": 0.9869901595090348, + "learning_rate": 6.933622277390384e-07, + "loss": 0.3295, + "step": 51179 + }, + { + "epoch": 0.8843654962676251, + "grad_norm": 0.96684092501708, + "learning_rate": 6.931574802210072e-07, + "loss": 0.4615, + "step": 51180 + }, + { + "epoch": 0.884382775781034, + "grad_norm": 1.9991980828280698, + "learning_rate": 6.929527618527154e-07, + "loss": 0.268, + "step": 51181 + }, + { + "epoch": 0.8844000552944429, + "grad_norm": 1.5538504120671768, + "learning_rate": 6.927480726348013e-07, + "loss": 0.4006, + "step": 51182 + }, + { + "epoch": 0.8844173348078518, + "grad_norm": 1.0530520715492808, + "learning_rate": 6.925434125679098e-07, + "loss": 0.8496, + "step": 51183 + }, + { + "epoch": 0.8844346143212607, + "grad_norm": 1.6158711481474168, + "learning_rate": 6.923387816526784e-07, + "loss": 0.2749, + "step": 51184 + }, + { + "epoch": 0.8844518938346696, + "grad_norm": 1.4954416552101604, + "learning_rate": 6.921341798897507e-07, + "loss": 0.3747, + "step": 51185 + }, + { + "epoch": 0.8844691733480785, + "grad_norm": 1.011701212379024, + "learning_rate": 6.919296072797643e-07, + "loss": 0.6064, + "step": 51186 + }, + { + "epoch": 0.8844864528614874, + "grad_norm": 1.4608879198241558, + "learning_rate": 6.917250638233619e-07, + "loss": 0.4215, + "step": 51187 + }, + { + "epoch": 0.8845037323748963, + "grad_norm": 1.1432693128723455, + "learning_rate": 6.915205495211841e-07, + "loss": 0.2606, + "step": 51188 + }, + { + "epoch": 0.8845210118883052, + "grad_norm": 1.2433704169601618, + "learning_rate": 6.913160643738726e-07, + "loss": 0.3863, + "step": 51189 + }, + { + "epoch": 0.8845382914017141, + "grad_norm": 0.836726739156258, + "learning_rate": 6.911116083820668e-07, + "loss": 0.2169, + "step": 51190 + }, + { + "epoch": 0.884555570915123, + "grad_norm": 1.3366769386194437, + "learning_rate": 6.909071815464052e-07, + "loss": 0.4281, + "step": 51191 + }, + { + "epoch": 0.884572850428532, + "grad_norm": 0.8625351500707461, + "learning_rate": 6.907027838675307e-07, + "loss": 0.3175, + "step": 51192 + }, + { + "epoch": 0.8845901299419409, + "grad_norm": 1.1299440121404973, + "learning_rate": 6.904984153460814e-07, + "loss": 0.4084, + "step": 51193 + }, + { + "epoch": 0.8846074094553498, + "grad_norm": 1.5213196103409798, + "learning_rate": 6.902940759827004e-07, + "loss": 0.3725, + "step": 51194 + }, + { + "epoch": 0.8846246889687587, + "grad_norm": 1.352424265595822, + "learning_rate": 6.900897657780248e-07, + "loss": 0.1774, + "step": 51195 + }, + { + "epoch": 0.8846419684821676, + "grad_norm": 1.640796510628622, + "learning_rate": 6.898854847326963e-07, + "loss": 0.4451, + "step": 51196 + }, + { + "epoch": 0.8846592479955765, + "grad_norm": 1.628135984904554, + "learning_rate": 6.896812328473535e-07, + "loss": 0.282, + "step": 51197 + }, + { + "epoch": 0.8846765275089854, + "grad_norm": 1.0306984708822733, + "learning_rate": 6.894770101226356e-07, + "loss": 0.306, + "step": 51198 + }, + { + "epoch": 0.8846938070223942, + "grad_norm": 0.7715707177155259, + "learning_rate": 6.892728165591855e-07, + "loss": 0.1927, + "step": 51199 + }, + { + "epoch": 0.8847110865358031, + "grad_norm": 0.899430245509134, + "learning_rate": 6.890686521576384e-07, + "loss": 0.2354, + "step": 51200 + }, + { + "epoch": 0.884728366049212, + "grad_norm": 1.6837227464363809, + "learning_rate": 6.888645169186381e-07, + "loss": 0.4049, + "step": 51201 + }, + { + "epoch": 0.8847456455626209, + "grad_norm": 2.095593255266563, + "learning_rate": 6.886604108428196e-07, + "loss": 0.5131, + "step": 51202 + }, + { + "epoch": 0.8847629250760298, + "grad_norm": 1.888236032446409, + "learning_rate": 6.88456333930827e-07, + "loss": 0.4179, + "step": 51203 + }, + { + "epoch": 0.8847802045894387, + "grad_norm": 1.5152268745341884, + "learning_rate": 6.882522861832941e-07, + "loss": 0.4055, + "step": 51204 + }, + { + "epoch": 0.8847974841028476, + "grad_norm": 2.0911173119909403, + "learning_rate": 6.880482676008626e-07, + "loss": 0.3611, + "step": 51205 + }, + { + "epoch": 0.8848147636162565, + "grad_norm": 1.8039525200726048, + "learning_rate": 6.878442781841721e-07, + "loss": 0.4409, + "step": 51206 + }, + { + "epoch": 0.8848320431296655, + "grad_norm": 1.5235681057493597, + "learning_rate": 6.876403179338631e-07, + "loss": 0.3833, + "step": 51207 + }, + { + "epoch": 0.8848493226430744, + "grad_norm": 0.8204767801828385, + "learning_rate": 6.874363868505696e-07, + "loss": 0.3515, + "step": 51208 + }, + { + "epoch": 0.8848666021564833, + "grad_norm": 1.172313324660829, + "learning_rate": 6.872324849349354e-07, + "loss": 0.2096, + "step": 51209 + }, + { + "epoch": 0.8848838816698922, + "grad_norm": 1.0494501016176971, + "learning_rate": 6.870286121875957e-07, + "loss": 0.2614, + "step": 51210 + }, + { + "epoch": 0.8849011611833011, + "grad_norm": 1.6216431842985917, + "learning_rate": 6.868247686091889e-07, + "loss": 0.2889, + "step": 51211 + }, + { + "epoch": 0.88491844069671, + "grad_norm": 1.4372615016164834, + "learning_rate": 6.866209542003577e-07, + "loss": 0.3528, + "step": 51212 + }, + { + "epoch": 0.8849357202101189, + "grad_norm": 1.9457332829576397, + "learning_rate": 6.864171689617349e-07, + "loss": 0.4993, + "step": 51213 + }, + { + "epoch": 0.8849529997235278, + "grad_norm": 1.129190042031679, + "learning_rate": 6.862134128939623e-07, + "loss": 0.1869, + "step": 51214 + }, + { + "epoch": 0.8849702792369367, + "grad_norm": 1.3313005534360203, + "learning_rate": 6.860096859976773e-07, + "loss": 0.2669, + "step": 51215 + }, + { + "epoch": 0.8849875587503456, + "grad_norm": 2.165274186671304, + "learning_rate": 6.858059882735169e-07, + "loss": 0.2593, + "step": 51216 + }, + { + "epoch": 0.8850048382637545, + "grad_norm": 1.652492247386838, + "learning_rate": 6.856023197221195e-07, + "loss": 0.3861, + "step": 51217 + }, + { + "epoch": 0.8850221177771634, + "grad_norm": 0.9985402573497596, + "learning_rate": 6.853986803441259e-07, + "loss": 0.3662, + "step": 51218 + }, + { + "epoch": 0.8850393972905723, + "grad_norm": 1.2041327568486315, + "learning_rate": 6.851950701401688e-07, + "loss": 0.2533, + "step": 51219 + }, + { + "epoch": 0.8850566768039811, + "grad_norm": 0.9492725493133901, + "learning_rate": 6.849914891108911e-07, + "loss": 0.3439, + "step": 51220 + }, + { + "epoch": 0.88507395631739, + "grad_norm": 2.4458843956796703, + "learning_rate": 6.847879372569267e-07, + "loss": 0.3647, + "step": 51221 + }, + { + "epoch": 0.885091235830799, + "grad_norm": 1.2357006272571045, + "learning_rate": 6.845844145789138e-07, + "loss": 0.185, + "step": 51222 + }, + { + "epoch": 0.8851085153442079, + "grad_norm": 2.0365225136104064, + "learning_rate": 6.843809210774921e-07, + "loss": 0.3055, + "step": 51223 + }, + { + "epoch": 0.8851257948576168, + "grad_norm": 1.4503781694261941, + "learning_rate": 6.841774567532955e-07, + "loss": 0.2324, + "step": 51224 + }, + { + "epoch": 0.8851430743710257, + "grad_norm": 1.4089290025516494, + "learning_rate": 6.839740216069657e-07, + "loss": 0.3134, + "step": 51225 + }, + { + "epoch": 0.8851603538844346, + "grad_norm": 1.3630792895197736, + "learning_rate": 6.837706156391344e-07, + "loss": 0.4086, + "step": 51226 + }, + { + "epoch": 0.8851776333978435, + "grad_norm": 1.2340298832427592, + "learning_rate": 6.835672388504422e-07, + "loss": 0.3929, + "step": 51227 + }, + { + "epoch": 0.8851949129112524, + "grad_norm": 1.5332793449926623, + "learning_rate": 6.833638912415253e-07, + "loss": 0.3365, + "step": 51228 + }, + { + "epoch": 0.8852121924246613, + "grad_norm": 1.2014391444735784, + "learning_rate": 6.831605728130231e-07, + "loss": 0.2934, + "step": 51229 + }, + { + "epoch": 0.8852294719380702, + "grad_norm": 2.069215041992841, + "learning_rate": 6.829572835655685e-07, + "loss": 0.2763, + "step": 51230 + }, + { + "epoch": 0.8852467514514791, + "grad_norm": 1.7197664082994069, + "learning_rate": 6.827540234997998e-07, + "loss": 0.4915, + "step": 51231 + }, + { + "epoch": 0.885264030964888, + "grad_norm": 1.2158731675552066, + "learning_rate": 6.825507926163521e-07, + "loss": 0.4934, + "step": 51232 + }, + { + "epoch": 0.885281310478297, + "grad_norm": 1.363869249490488, + "learning_rate": 6.823475909158639e-07, + "loss": 0.3588, + "step": 51233 + }, + { + "epoch": 0.8852985899917059, + "grad_norm": 1.234606035235916, + "learning_rate": 6.821444183989734e-07, + "loss": 0.3117, + "step": 51234 + }, + { + "epoch": 0.8853158695051148, + "grad_norm": 1.8171757712622636, + "learning_rate": 6.819412750663124e-07, + "loss": 0.2204, + "step": 51235 + }, + { + "epoch": 0.8853331490185237, + "grad_norm": 2.1414949061109896, + "learning_rate": 6.817381609185214e-07, + "loss": 0.3186, + "step": 51236 + }, + { + "epoch": 0.8853504285319326, + "grad_norm": 1.1793567081604899, + "learning_rate": 6.815350759562333e-07, + "loss": 0.3177, + "step": 51237 + }, + { + "epoch": 0.8853677080453415, + "grad_norm": 1.532396931891849, + "learning_rate": 6.813320201800855e-07, + "loss": 0.5314, + "step": 51238 + }, + { + "epoch": 0.8853849875587504, + "grad_norm": 2.7641757443032744, + "learning_rate": 6.811289935907128e-07, + "loss": 0.295, + "step": 51239 + }, + { + "epoch": 0.8854022670721593, + "grad_norm": 1.338928378958002, + "learning_rate": 6.809259961887549e-07, + "loss": 0.1825, + "step": 51240 + }, + { + "epoch": 0.8854195465855681, + "grad_norm": 1.5195170493203685, + "learning_rate": 6.807230279748456e-07, + "loss": 0.354, + "step": 51241 + }, + { + "epoch": 0.885436826098977, + "grad_norm": 1.8670971241065168, + "learning_rate": 6.805200889496178e-07, + "loss": 0.3066, + "step": 51242 + }, + { + "epoch": 0.8854541056123859, + "grad_norm": 1.5318114335754565, + "learning_rate": 6.803171791137108e-07, + "loss": 0.2866, + "step": 51243 + }, + { + "epoch": 0.8854713851257948, + "grad_norm": 1.022836884984583, + "learning_rate": 6.801142984677566e-07, + "loss": 0.3287, + "step": 51244 + }, + { + "epoch": 0.8854886646392037, + "grad_norm": 1.1117477313777244, + "learning_rate": 6.799114470123935e-07, + "loss": 0.4542, + "step": 51245 + }, + { + "epoch": 0.8855059441526126, + "grad_norm": 1.7120855119914444, + "learning_rate": 6.797086247482554e-07, + "loss": 0.3057, + "step": 51246 + }, + { + "epoch": 0.8855232236660215, + "grad_norm": 1.7257486882421937, + "learning_rate": 6.795058316759806e-07, + "loss": 0.2867, + "step": 51247 + }, + { + "epoch": 0.8855405031794304, + "grad_norm": 1.2860623915620149, + "learning_rate": 6.793030677961999e-07, + "loss": 0.2586, + "step": 51248 + }, + { + "epoch": 0.8855577826928394, + "grad_norm": 1.209981237573981, + "learning_rate": 6.791003331095514e-07, + "loss": 0.4321, + "step": 51249 + }, + { + "epoch": 0.8855750622062483, + "grad_norm": 1.29277127684243, + "learning_rate": 6.788976276166681e-07, + "loss": 0.2835, + "step": 51250 + }, + { + "epoch": 0.8855923417196572, + "grad_norm": 0.9806655614073101, + "learning_rate": 6.786949513181862e-07, + "loss": 0.341, + "step": 51251 + }, + { + "epoch": 0.8856096212330661, + "grad_norm": 1.3517496869377803, + "learning_rate": 6.784923042147406e-07, + "loss": 0.2861, + "step": 51252 + }, + { + "epoch": 0.885626900746475, + "grad_norm": 1.8241999332097165, + "learning_rate": 6.782896863069643e-07, + "loss": 0.3947, + "step": 51253 + }, + { + "epoch": 0.8856441802598839, + "grad_norm": 2.4277070009806905, + "learning_rate": 6.780870975954956e-07, + "loss": 0.4065, + "step": 51254 + }, + { + "epoch": 0.8856614597732928, + "grad_norm": 2.3743702676888234, + "learning_rate": 6.77884538080964e-07, + "loss": 0.3831, + "step": 51255 + }, + { + "epoch": 0.8856787392867017, + "grad_norm": 1.826925092676266, + "learning_rate": 6.776820077640067e-07, + "loss": 0.3562, + "step": 51256 + }, + { + "epoch": 0.8856960188001106, + "grad_norm": 1.3154161391315031, + "learning_rate": 6.774795066452578e-07, + "loss": 0.375, + "step": 51257 + }, + { + "epoch": 0.8857132983135195, + "grad_norm": 2.6971703003749092, + "learning_rate": 6.772770347253533e-07, + "loss": 0.2597, + "step": 51258 + }, + { + "epoch": 0.8857305778269284, + "grad_norm": 1.5063815675239025, + "learning_rate": 6.770745920049238e-07, + "loss": 0.2637, + "step": 51259 + }, + { + "epoch": 0.8857478573403373, + "grad_norm": 1.0231780344084975, + "learning_rate": 6.768721784846066e-07, + "loss": 0.3317, + "step": 51260 + }, + { + "epoch": 0.8857651368537462, + "grad_norm": 1.417796705230352, + "learning_rate": 6.766697941650324e-07, + "loss": 0.2297, + "step": 51261 + }, + { + "epoch": 0.885782416367155, + "grad_norm": 1.231174009865691, + "learning_rate": 6.764674390468384e-07, + "loss": 0.4696, + "step": 51262 + }, + { + "epoch": 0.885799695880564, + "grad_norm": 1.1911435590444817, + "learning_rate": 6.762651131306574e-07, + "loss": 0.2369, + "step": 51263 + }, + { + "epoch": 0.8858169753939729, + "grad_norm": 1.2808920104931536, + "learning_rate": 6.760628164171212e-07, + "loss": 0.4204, + "step": 51264 + }, + { + "epoch": 0.8858342549073818, + "grad_norm": 1.8359003756427479, + "learning_rate": 6.75860548906867e-07, + "loss": 0.3237, + "step": 51265 + }, + { + "epoch": 0.8858515344207907, + "grad_norm": 0.9291636762877349, + "learning_rate": 6.756583106005232e-07, + "loss": 0.4076, + "step": 51266 + }, + { + "epoch": 0.8858688139341996, + "grad_norm": 1.3768966122595339, + "learning_rate": 6.754561014987282e-07, + "loss": 0.4126, + "step": 51267 + }, + { + "epoch": 0.8858860934476085, + "grad_norm": 1.0661835146492804, + "learning_rate": 6.752539216021114e-07, + "loss": 0.4965, + "step": 51268 + }, + { + "epoch": 0.8859033729610174, + "grad_norm": 0.9511976488536458, + "learning_rate": 6.750517709113114e-07, + "loss": 0.5071, + "step": 51269 + }, + { + "epoch": 0.8859206524744263, + "grad_norm": 1.5555346409632123, + "learning_rate": 6.748496494269552e-07, + "loss": 0.3098, + "step": 51270 + }, + { + "epoch": 0.8859379319878352, + "grad_norm": 1.0188383917632455, + "learning_rate": 6.746475571496803e-07, + "loss": 0.2847, + "step": 51271 + }, + { + "epoch": 0.8859552115012441, + "grad_norm": 1.8035053390762967, + "learning_rate": 6.74445494080116e-07, + "loss": 0.4658, + "step": 51272 + }, + { + "epoch": 0.885972491014653, + "grad_norm": 1.109807249661085, + "learning_rate": 6.742434602188974e-07, + "loss": 0.2417, + "step": 51273 + }, + { + "epoch": 0.8859897705280619, + "grad_norm": 1.1545783771087157, + "learning_rate": 6.740414555666585e-07, + "loss": 0.2563, + "step": 51274 + }, + { + "epoch": 0.8860070500414708, + "grad_norm": 0.9828860736883438, + "learning_rate": 6.738394801240299e-07, + "loss": 0.2691, + "step": 51275 + }, + { + "epoch": 0.8860243295548798, + "grad_norm": 1.5221885035671525, + "learning_rate": 6.736375338916456e-07, + "loss": 0.2651, + "step": 51276 + }, + { + "epoch": 0.8860416090682887, + "grad_norm": 0.9564920221124837, + "learning_rate": 6.73435616870135e-07, + "loss": 0.5128, + "step": 51277 + }, + { + "epoch": 0.8860588885816976, + "grad_norm": 0.8404283363991862, + "learning_rate": 6.732337290601343e-07, + "loss": 0.6927, + "step": 51278 + }, + { + "epoch": 0.8860761680951065, + "grad_norm": 1.6585427465379878, + "learning_rate": 6.730318704622729e-07, + "loss": 0.3516, + "step": 51279 + }, + { + "epoch": 0.8860934476085154, + "grad_norm": 1.1588914630157008, + "learning_rate": 6.728300410771871e-07, + "loss": 0.2605, + "step": 51280 + }, + { + "epoch": 0.8861107271219243, + "grad_norm": 1.031385085887197, + "learning_rate": 6.726282409055052e-07, + "loss": 0.242, + "step": 51281 + }, + { + "epoch": 0.8861280066353332, + "grad_norm": 1.1718804716916604, + "learning_rate": 6.724264699478611e-07, + "loss": 0.3353, + "step": 51282 + }, + { + "epoch": 0.8861452861487421, + "grad_norm": 2.1020596817079666, + "learning_rate": 6.722247282048866e-07, + "loss": 0.3653, + "step": 51283 + }, + { + "epoch": 0.8861625656621509, + "grad_norm": 0.8997734922369505, + "learning_rate": 6.720230156772123e-07, + "loss": 0.342, + "step": 51284 + }, + { + "epoch": 0.8861798451755598, + "grad_norm": 1.5085407566983933, + "learning_rate": 6.718213323654699e-07, + "loss": 0.2999, + "step": 51285 + }, + { + "epoch": 0.8861971246889687, + "grad_norm": 1.2857643441088318, + "learning_rate": 6.716196782702922e-07, + "loss": 0.2097, + "step": 51286 + }, + { + "epoch": 0.8862144042023776, + "grad_norm": 1.0415254459434604, + "learning_rate": 6.71418053392312e-07, + "loss": 0.4287, + "step": 51287 + }, + { + "epoch": 0.8862316837157865, + "grad_norm": 0.9963200078438671, + "learning_rate": 6.712164577321578e-07, + "loss": 0.397, + "step": 51288 + }, + { + "epoch": 0.8862489632291954, + "grad_norm": 1.733108305933515, + "learning_rate": 6.710148912904646e-07, + "loss": 0.384, + "step": 51289 + }, + { + "epoch": 0.8862662427426043, + "grad_norm": 1.6370680640604462, + "learning_rate": 6.708133540678597e-07, + "loss": 0.4037, + "step": 51290 + }, + { + "epoch": 0.8862835222560133, + "grad_norm": 2.4491139405285063, + "learning_rate": 6.706118460649768e-07, + "loss": 0.2153, + "step": 51291 + }, + { + "epoch": 0.8863008017694222, + "grad_norm": 1.055481555476807, + "learning_rate": 6.70410367282448e-07, + "loss": 0.465, + "step": 51292 + }, + { + "epoch": 0.8863180812828311, + "grad_norm": 0.9626158269424466, + "learning_rate": 6.702089177209015e-07, + "loss": 0.2388, + "step": 51293 + }, + { + "epoch": 0.88633536079624, + "grad_norm": 1.49621796717583, + "learning_rate": 6.700074973809712e-07, + "loss": 0.4926, + "step": 51294 + }, + { + "epoch": 0.8863526403096489, + "grad_norm": 1.310701982453237, + "learning_rate": 6.698061062632844e-07, + "loss": 0.3635, + "step": 51295 + }, + { + "epoch": 0.8863699198230578, + "grad_norm": 2.0988866294757127, + "learning_rate": 6.696047443684739e-07, + "loss": 0.1531, + "step": 51296 + }, + { + "epoch": 0.8863871993364667, + "grad_norm": 1.7349847129845228, + "learning_rate": 6.694034116971715e-07, + "loss": 0.3597, + "step": 51297 + }, + { + "epoch": 0.8864044788498756, + "grad_norm": 1.3034865892553544, + "learning_rate": 6.692021082500077e-07, + "loss": 0.6432, + "step": 51298 + }, + { + "epoch": 0.8864217583632845, + "grad_norm": 1.1940706187860428, + "learning_rate": 6.690008340276099e-07, + "loss": 0.2964, + "step": 51299 + }, + { + "epoch": 0.8864390378766934, + "grad_norm": 1.310831004852232, + "learning_rate": 6.687995890306132e-07, + "loss": 0.2723, + "step": 51300 + }, + { + "epoch": 0.8864563173901023, + "grad_norm": 2.2856943671032766, + "learning_rate": 6.685983732596435e-07, + "loss": 0.4312, + "step": 51301 + }, + { + "epoch": 0.8864735969035112, + "grad_norm": 1.5713713705410906, + "learning_rate": 6.683971867153328e-07, + "loss": 0.2029, + "step": 51302 + }, + { + "epoch": 0.8864908764169201, + "grad_norm": 1.3820802428203538, + "learning_rate": 6.681960293983125e-07, + "loss": 0.2216, + "step": 51303 + }, + { + "epoch": 0.8865081559303291, + "grad_norm": 1.435972927988543, + "learning_rate": 6.679949013092102e-07, + "loss": 0.3175, + "step": 51304 + }, + { + "epoch": 0.8865254354437379, + "grad_norm": 1.0901969713986142, + "learning_rate": 6.677938024486586e-07, + "loss": 0.2478, + "step": 51305 + }, + { + "epoch": 0.8865427149571468, + "grad_norm": 0.9777920190202851, + "learning_rate": 6.675927328172837e-07, + "loss": 0.3265, + "step": 51306 + }, + { + "epoch": 0.8865599944705557, + "grad_norm": 1.2901205500539425, + "learning_rate": 6.673916924157187e-07, + "loss": 0.372, + "step": 51307 + }, + { + "epoch": 0.8865772739839646, + "grad_norm": 1.724686324821195, + "learning_rate": 6.671906812445916e-07, + "loss": 0.2658, + "step": 51308 + }, + { + "epoch": 0.8865945534973735, + "grad_norm": 0.8017517701953882, + "learning_rate": 6.669896993045344e-07, + "loss": 0.3021, + "step": 51309 + }, + { + "epoch": 0.8866118330107824, + "grad_norm": 0.5443642513694261, + "learning_rate": 6.667887465961731e-07, + "loss": 0.4724, + "step": 51310 + }, + { + "epoch": 0.8866291125241913, + "grad_norm": 1.316949343531086, + "learning_rate": 6.665878231201394e-07, + "loss": 0.2413, + "step": 51311 + }, + { + "epoch": 0.8866463920376002, + "grad_norm": 0.8086573243190731, + "learning_rate": 6.66386928877062e-07, + "loss": 0.2015, + "step": 51312 + }, + { + "epoch": 0.8866636715510091, + "grad_norm": 2.308083370121784, + "learning_rate": 6.661860638675688e-07, + "loss": 0.2901, + "step": 51313 + }, + { + "epoch": 0.886680951064418, + "grad_norm": 0.6846777697941147, + "learning_rate": 6.65985228092293e-07, + "loss": 0.2174, + "step": 51314 + }, + { + "epoch": 0.8866982305778269, + "grad_norm": 0.982652813072854, + "learning_rate": 6.657844215518583e-07, + "loss": 0.2796, + "step": 51315 + }, + { + "epoch": 0.8867155100912358, + "grad_norm": 1.4638868987663016, + "learning_rate": 6.655836442468977e-07, + "loss": 0.3465, + "step": 51316 + }, + { + "epoch": 0.8867327896046447, + "grad_norm": 1.6508999365845778, + "learning_rate": 6.653828961780373e-07, + "loss": 0.3051, + "step": 51317 + }, + { + "epoch": 0.8867500691180537, + "grad_norm": 1.1630521992399316, + "learning_rate": 6.651821773459066e-07, + "loss": 0.4205, + "step": 51318 + }, + { + "epoch": 0.8867673486314626, + "grad_norm": 1.1406691790951335, + "learning_rate": 6.649814877511351e-07, + "loss": 0.4849, + "step": 51319 + }, + { + "epoch": 0.8867846281448715, + "grad_norm": 1.2032851337656265, + "learning_rate": 6.647808273943524e-07, + "loss": 0.4013, + "step": 51320 + }, + { + "epoch": 0.8868019076582804, + "grad_norm": 2.1421710885195133, + "learning_rate": 6.645801962761844e-07, + "loss": 0.3595, + "step": 51321 + }, + { + "epoch": 0.8868191871716893, + "grad_norm": 2.4948427444362586, + "learning_rate": 6.64379594397262e-07, + "loss": 0.2493, + "step": 51322 + }, + { + "epoch": 0.8868364666850982, + "grad_norm": 1.2177807758480927, + "learning_rate": 6.641790217582123e-07, + "loss": 0.4308, + "step": 51323 + }, + { + "epoch": 0.8868537461985071, + "grad_norm": 2.9397986117082056, + "learning_rate": 6.639784783596626e-07, + "loss": 0.2495, + "step": 51324 + }, + { + "epoch": 0.886871025711916, + "grad_norm": 2.327530541681126, + "learning_rate": 6.637779642022401e-07, + "loss": 0.3392, + "step": 51325 + }, + { + "epoch": 0.8868883052253248, + "grad_norm": 1.496122306224674, + "learning_rate": 6.635774792865756e-07, + "loss": 0.2595, + "step": 51326 + }, + { + "epoch": 0.8869055847387337, + "grad_norm": 0.9445740402272707, + "learning_rate": 6.633770236132974e-07, + "loss": 0.3782, + "step": 51327 + }, + { + "epoch": 0.8869228642521426, + "grad_norm": 0.9533748877062759, + "learning_rate": 6.631765971830307e-07, + "loss": 0.1835, + "step": 51328 + }, + { + "epoch": 0.8869401437655515, + "grad_norm": 0.7942877519921135, + "learning_rate": 6.629761999964057e-07, + "loss": 0.5684, + "step": 51329 + }, + { + "epoch": 0.8869574232789604, + "grad_norm": 1.592456373427772, + "learning_rate": 6.627758320540478e-07, + "loss": 0.4542, + "step": 51330 + }, + { + "epoch": 0.8869747027923693, + "grad_norm": 0.9599841740149008, + "learning_rate": 6.625754933565842e-07, + "loss": 0.3637, + "step": 51331 + }, + { + "epoch": 0.8869919823057782, + "grad_norm": 1.1601280832060217, + "learning_rate": 6.623751839046455e-07, + "loss": 0.2527, + "step": 51332 + }, + { + "epoch": 0.8870092618191872, + "grad_norm": 0.9941637562768019, + "learning_rate": 6.621749036988579e-07, + "loss": 0.3194, + "step": 51333 + }, + { + "epoch": 0.8870265413325961, + "grad_norm": 1.016359001683452, + "learning_rate": 6.619746527398485e-07, + "loss": 0.6137, + "step": 51334 + }, + { + "epoch": 0.887043820846005, + "grad_norm": 0.540965136495845, + "learning_rate": 6.617744310282414e-07, + "loss": 0.7964, + "step": 51335 + }, + { + "epoch": 0.8870611003594139, + "grad_norm": 0.8828267204739151, + "learning_rate": 6.615742385646684e-07, + "loss": 0.3045, + "step": 51336 + }, + { + "epoch": 0.8870783798728228, + "grad_norm": 1.1552402298199838, + "learning_rate": 6.613740753497533e-07, + "loss": 0.3877, + "step": 51337 + }, + { + "epoch": 0.8870956593862317, + "grad_norm": 0.6169173343821988, + "learning_rate": 6.611739413841267e-07, + "loss": 0.734, + "step": 51338 + }, + { + "epoch": 0.8871129388996406, + "grad_norm": 1.7899290628795217, + "learning_rate": 6.609738366684104e-07, + "loss": 0.4213, + "step": 51339 + }, + { + "epoch": 0.8871302184130495, + "grad_norm": 1.7988653519286042, + "learning_rate": 6.607737612032361e-07, + "loss": 0.2173, + "step": 51340 + }, + { + "epoch": 0.8871474979264584, + "grad_norm": 1.9867515577614905, + "learning_rate": 6.605737149892266e-07, + "loss": 0.4228, + "step": 51341 + }, + { + "epoch": 0.8871647774398673, + "grad_norm": 1.0094621517457283, + "learning_rate": 6.603736980270093e-07, + "loss": 0.3138, + "step": 51342 + }, + { + "epoch": 0.8871820569532762, + "grad_norm": 0.593276872145687, + "learning_rate": 6.601737103172135e-07, + "loss": 0.5463, + "step": 51343 + }, + { + "epoch": 0.8871993364666851, + "grad_norm": 1.1204502856122354, + "learning_rate": 6.599737518604621e-07, + "loss": 0.1962, + "step": 51344 + }, + { + "epoch": 0.887216615980094, + "grad_norm": 1.2495091135472176, + "learning_rate": 6.597738226573836e-07, + "loss": 0.5347, + "step": 51345 + }, + { + "epoch": 0.887233895493503, + "grad_norm": 1.280966354084677, + "learning_rate": 6.595739227086028e-07, + "loss": 0.4996, + "step": 51346 + }, + { + "epoch": 0.8872511750069118, + "grad_norm": 1.7206364634856244, + "learning_rate": 6.593740520147451e-07, + "loss": 0.1812, + "step": 51347 + }, + { + "epoch": 0.8872684545203207, + "grad_norm": 1.087174912412083, + "learning_rate": 6.591742105764388e-07, + "loss": 0.3904, + "step": 51348 + }, + { + "epoch": 0.8872857340337296, + "grad_norm": 1.74265968652614, + "learning_rate": 6.589743983943098e-07, + "loss": 0.3518, + "step": 51349 + }, + { + "epoch": 0.8873030135471385, + "grad_norm": 1.317632624553093, + "learning_rate": 6.587746154689812e-07, + "loss": 0.3148, + "step": 51350 + }, + { + "epoch": 0.8873202930605474, + "grad_norm": 1.0835699347751295, + "learning_rate": 6.585748618010824e-07, + "loss": 0.3439, + "step": 51351 + }, + { + "epoch": 0.8873375725739563, + "grad_norm": 0.9197694622749837, + "learning_rate": 6.58375137391235e-07, + "loss": 0.2792, + "step": 51352 + }, + { + "epoch": 0.8873548520873652, + "grad_norm": 1.3580506846877491, + "learning_rate": 6.581754422400666e-07, + "loss": 0.3027, + "step": 51353 + }, + { + "epoch": 0.8873721316007741, + "grad_norm": 1.0419232332884278, + "learning_rate": 6.579757763482042e-07, + "loss": 0.3896, + "step": 51354 + }, + { + "epoch": 0.887389411114183, + "grad_norm": 1.3373227592940993, + "learning_rate": 6.577761397162708e-07, + "loss": 0.2729, + "step": 51355 + }, + { + "epoch": 0.8874066906275919, + "grad_norm": 1.1041046694346313, + "learning_rate": 6.575765323448935e-07, + "loss": 0.2394, + "step": 51356 + }, + { + "epoch": 0.8874239701410008, + "grad_norm": 1.435190856889612, + "learning_rate": 6.573769542346952e-07, + "loss": 0.3788, + "step": 51357 + }, + { + "epoch": 0.8874412496544097, + "grad_norm": 0.6307320548030196, + "learning_rate": 6.571774053863012e-07, + "loss": 0.2465, + "step": 51358 + }, + { + "epoch": 0.8874585291678186, + "grad_norm": 0.5588714047452615, + "learning_rate": 6.569778858003395e-07, + "loss": 0.6043, + "step": 51359 + }, + { + "epoch": 0.8874758086812276, + "grad_norm": 0.9170671299664714, + "learning_rate": 6.567783954774332e-07, + "loss": 0.3049, + "step": 51360 + }, + { + "epoch": 0.8874930881946365, + "grad_norm": 1.0314072047470553, + "learning_rate": 6.565789344182061e-07, + "loss": 0.3803, + "step": 51361 + }, + { + "epoch": 0.8875103677080454, + "grad_norm": 0.8410831899569403, + "learning_rate": 6.563795026232855e-07, + "loss": 0.2419, + "step": 51362 + }, + { + "epoch": 0.8875276472214543, + "grad_norm": 1.3741171694902736, + "learning_rate": 6.561801000932922e-07, + "loss": 0.3012, + "step": 51363 + }, + { + "epoch": 0.8875449267348632, + "grad_norm": 0.8244955798920164, + "learning_rate": 6.559807268288554e-07, + "loss": 0.314, + "step": 51364 + }, + { + "epoch": 0.8875622062482721, + "grad_norm": 1.3222030860312421, + "learning_rate": 6.557813828305947e-07, + "loss": 0.4729, + "step": 51365 + }, + { + "epoch": 0.887579485761681, + "grad_norm": 1.206450458933823, + "learning_rate": 6.555820680991365e-07, + "loss": 0.4879, + "step": 51366 + }, + { + "epoch": 0.8875967652750899, + "grad_norm": 1.3902987104542819, + "learning_rate": 6.553827826351078e-07, + "loss": 0.2672, + "step": 51367 + }, + { + "epoch": 0.8876140447884987, + "grad_norm": 0.8940637471421503, + "learning_rate": 6.551835264391292e-07, + "loss": 0.3092, + "step": 51368 + }, + { + "epoch": 0.8876313243019076, + "grad_norm": 1.3695400874843568, + "learning_rate": 6.549842995118261e-07, + "loss": 0.3003, + "step": 51369 + }, + { + "epoch": 0.8876486038153165, + "grad_norm": 1.820734782936924, + "learning_rate": 6.54785101853822e-07, + "loss": 0.42, + "step": 51370 + }, + { + "epoch": 0.8876658833287254, + "grad_norm": 1.2063191536738629, + "learning_rate": 6.545859334657411e-07, + "loss": 0.43, + "step": 51371 + }, + { + "epoch": 0.8876831628421343, + "grad_norm": 1.5828754064248136, + "learning_rate": 6.543867943482074e-07, + "loss": 0.1695, + "step": 51372 + }, + { + "epoch": 0.8877004423555432, + "grad_norm": 0.8879043516306299, + "learning_rate": 6.541876845018458e-07, + "loss": 0.3402, + "step": 51373 + }, + { + "epoch": 0.8877177218689521, + "grad_norm": 0.6460252794263481, + "learning_rate": 6.53988603927278e-07, + "loss": 0.5923, + "step": 51374 + }, + { + "epoch": 0.887735001382361, + "grad_norm": 1.3844550050678808, + "learning_rate": 6.53789552625127e-07, + "loss": 0.3989, + "step": 51375 + }, + { + "epoch": 0.88775228089577, + "grad_norm": 1.1598266980121124, + "learning_rate": 6.535905305960177e-07, + "loss": 0.1886, + "step": 51376 + }, + { + "epoch": 0.8877695604091789, + "grad_norm": 1.6725090272136234, + "learning_rate": 6.533915378405731e-07, + "loss": 0.166, + "step": 51377 + }, + { + "epoch": 0.8877868399225878, + "grad_norm": 2.1971122053226275, + "learning_rate": 6.531925743594181e-07, + "loss": 0.2762, + "step": 51378 + }, + { + "epoch": 0.8878041194359967, + "grad_norm": 1.9786321285887516, + "learning_rate": 6.529936401531722e-07, + "loss": 0.3885, + "step": 51379 + }, + { + "epoch": 0.8878213989494056, + "grad_norm": 1.4943840611364434, + "learning_rate": 6.527947352224628e-07, + "loss": 0.51, + "step": 51380 + }, + { + "epoch": 0.8878386784628145, + "grad_norm": 1.2314842204730065, + "learning_rate": 6.525958595679083e-07, + "loss": 0.3885, + "step": 51381 + }, + { + "epoch": 0.8878559579762234, + "grad_norm": 0.9469513479118223, + "learning_rate": 6.523970131901347e-07, + "loss": 0.3283, + "step": 51382 + }, + { + "epoch": 0.8878732374896323, + "grad_norm": 0.5287292376897764, + "learning_rate": 6.521981960897639e-07, + "loss": 0.635, + "step": 51383 + }, + { + "epoch": 0.8878905170030412, + "grad_norm": 1.1387599704808942, + "learning_rate": 6.519994082674208e-07, + "loss": 0.2187, + "step": 51384 + }, + { + "epoch": 0.8879077965164501, + "grad_norm": 1.680927812408346, + "learning_rate": 6.51800649723725e-07, + "loss": 0.3092, + "step": 51385 + }, + { + "epoch": 0.887925076029859, + "grad_norm": 0.7708676982404757, + "learning_rate": 6.516019204592982e-07, + "loss": 0.2446, + "step": 51386 + }, + { + "epoch": 0.887942355543268, + "grad_norm": 1.5912222057111285, + "learning_rate": 6.514032204747655e-07, + "loss": 0.3169, + "step": 51387 + }, + { + "epoch": 0.8879596350566769, + "grad_norm": 1.3021293406550531, + "learning_rate": 6.512045497707486e-07, + "loss": 0.3867, + "step": 51388 + }, + { + "epoch": 0.8879769145700857, + "grad_norm": 1.2651670593799007, + "learning_rate": 6.510059083478703e-07, + "loss": 0.3615, + "step": 51389 + }, + { + "epoch": 0.8879941940834946, + "grad_norm": 1.1647087952530455, + "learning_rate": 6.508072962067513e-07, + "loss": 0.4017, + "step": 51390 + }, + { + "epoch": 0.8880114735969035, + "grad_norm": 1.8078762389510508, + "learning_rate": 6.506087133480155e-07, + "loss": 0.2103, + "step": 51391 + }, + { + "epoch": 0.8880287531103124, + "grad_norm": 0.9117575128738427, + "learning_rate": 6.504101597722822e-07, + "loss": 0.3064, + "step": 51392 + }, + { + "epoch": 0.8880460326237213, + "grad_norm": 1.1532063415272757, + "learning_rate": 6.502116354801746e-07, + "loss": 0.3022, + "step": 51393 + }, + { + "epoch": 0.8880633121371302, + "grad_norm": 1.677931289678497, + "learning_rate": 6.500131404723142e-07, + "loss": 0.3513, + "step": 51394 + }, + { + "epoch": 0.8880805916505391, + "grad_norm": 1.0905455429110094, + "learning_rate": 6.498146747493261e-07, + "loss": 0.4756, + "step": 51395 + }, + { + "epoch": 0.888097871163948, + "grad_norm": 1.1288651677196424, + "learning_rate": 6.496162383118276e-07, + "loss": 0.3376, + "step": 51396 + }, + { + "epoch": 0.8881151506773569, + "grad_norm": 1.4322940386700287, + "learning_rate": 6.494178311604416e-07, + "loss": 0.1981, + "step": 51397 + }, + { + "epoch": 0.8881324301907658, + "grad_norm": 1.1236234636852471, + "learning_rate": 6.492194532957896e-07, + "loss": 0.2422, + "step": 51398 + }, + { + "epoch": 0.8881497097041747, + "grad_norm": 1.4152071857877346, + "learning_rate": 6.490211047184913e-07, + "loss": 0.2489, + "step": 51399 + }, + { + "epoch": 0.8881669892175836, + "grad_norm": 1.748452867799585, + "learning_rate": 6.488227854291718e-07, + "loss": 0.2549, + "step": 51400 + }, + { + "epoch": 0.8881842687309925, + "grad_norm": 1.2479643725964458, + "learning_rate": 6.486244954284482e-07, + "loss": 0.3866, + "step": 51401 + }, + { + "epoch": 0.8882015482444015, + "grad_norm": 1.9166728647544493, + "learning_rate": 6.484262347169456e-07, + "loss": 0.4123, + "step": 51402 + }, + { + "epoch": 0.8882188277578104, + "grad_norm": 0.7742271901432202, + "learning_rate": 6.482280032952804e-07, + "loss": 0.3133, + "step": 51403 + }, + { + "epoch": 0.8882361072712193, + "grad_norm": 1.2417878108356355, + "learning_rate": 6.480298011640784e-07, + "loss": 0.3522, + "step": 51404 + }, + { + "epoch": 0.8882533867846282, + "grad_norm": 1.7022506391426884, + "learning_rate": 6.47831628323955e-07, + "loss": 0.3839, + "step": 51405 + }, + { + "epoch": 0.8882706662980371, + "grad_norm": 1.4627369826720071, + "learning_rate": 6.47633484775535e-07, + "loss": 0.3754, + "step": 51406 + }, + { + "epoch": 0.888287945811446, + "grad_norm": 1.1345320763259716, + "learning_rate": 6.474353705194381e-07, + "loss": 0.3244, + "step": 51407 + }, + { + "epoch": 0.8883052253248549, + "grad_norm": 2.235360172837786, + "learning_rate": 6.472372855562837e-07, + "loss": 0.4332, + "step": 51408 + }, + { + "epoch": 0.8883225048382638, + "grad_norm": 1.53795147430026, + "learning_rate": 6.470392298866935e-07, + "loss": 0.2151, + "step": 51409 + }, + { + "epoch": 0.8883397843516727, + "grad_norm": 1.7136658151990285, + "learning_rate": 6.468412035112859e-07, + "loss": 0.4029, + "step": 51410 + }, + { + "epoch": 0.8883570638650815, + "grad_norm": 1.4482791907034644, + "learning_rate": 6.466432064306827e-07, + "loss": 0.138, + "step": 51411 + }, + { + "epoch": 0.8883743433784904, + "grad_norm": 1.4034628929116337, + "learning_rate": 6.464452386455045e-07, + "loss": 0.5836, + "step": 51412 + }, + { + "epoch": 0.8883916228918993, + "grad_norm": 1.2273667777646378, + "learning_rate": 6.462473001563718e-07, + "loss": 0.4751, + "step": 51413 + }, + { + "epoch": 0.8884089024053082, + "grad_norm": 0.860139586636128, + "learning_rate": 6.46049390963901e-07, + "loss": 0.1875, + "step": 51414 + }, + { + "epoch": 0.8884261819187171, + "grad_norm": 0.8304892184061091, + "learning_rate": 6.458515110687169e-07, + "loss": 0.1504, + "step": 51415 + }, + { + "epoch": 0.888443461432126, + "grad_norm": 1.6228124987226455, + "learning_rate": 6.456536604714348e-07, + "loss": 0.3661, + "step": 51416 + }, + { + "epoch": 0.888460740945535, + "grad_norm": 2.332821991247569, + "learning_rate": 6.454558391726761e-07, + "loss": 0.2118, + "step": 51417 + }, + { + "epoch": 0.8884780204589439, + "grad_norm": 1.4739903827476961, + "learning_rate": 6.45258047173063e-07, + "loss": 0.3133, + "step": 51418 + }, + { + "epoch": 0.8884952999723528, + "grad_norm": 1.1231656118125062, + "learning_rate": 6.450602844732101e-07, + "loss": 0.3261, + "step": 51419 + }, + { + "epoch": 0.8885125794857617, + "grad_norm": 1.965297038107704, + "learning_rate": 6.448625510737416e-07, + "loss": 0.4204, + "step": 51420 + }, + { + "epoch": 0.8885298589991706, + "grad_norm": 1.0764465547757494, + "learning_rate": 6.446648469752726e-07, + "loss": 0.5032, + "step": 51421 + }, + { + "epoch": 0.8885471385125795, + "grad_norm": 1.2840713532283572, + "learning_rate": 6.444671721784246e-07, + "loss": 0.2852, + "step": 51422 + }, + { + "epoch": 0.8885644180259884, + "grad_norm": 1.7298000642897695, + "learning_rate": 6.442695266838162e-07, + "loss": 0.3556, + "step": 51423 + }, + { + "epoch": 0.8885816975393973, + "grad_norm": 1.3778270928205612, + "learning_rate": 6.440719104920689e-07, + "loss": 0.3275, + "step": 51424 + }, + { + "epoch": 0.8885989770528062, + "grad_norm": 1.2114882065259986, + "learning_rate": 6.43874323603797e-07, + "loss": 0.598, + "step": 51425 + }, + { + "epoch": 0.8886162565662151, + "grad_norm": 0.7258439889324739, + "learning_rate": 6.436767660196231e-07, + "loss": 0.6561, + "step": 51426 + }, + { + "epoch": 0.888633536079624, + "grad_norm": 1.3598776294737382, + "learning_rate": 6.434792377401633e-07, + "loss": 0.3481, + "step": 51427 + }, + { + "epoch": 0.8886508155930329, + "grad_norm": 1.800939902426445, + "learning_rate": 6.432817387660384e-07, + "loss": 0.4027, + "step": 51428 + }, + { + "epoch": 0.8886680951064418, + "grad_norm": 1.0393639259985985, + "learning_rate": 6.430842690978666e-07, + "loss": 0.4295, + "step": 51429 + }, + { + "epoch": 0.8886853746198508, + "grad_norm": 0.9643405681643497, + "learning_rate": 6.428868287362655e-07, + "loss": 0.2001, + "step": 51430 + }, + { + "epoch": 0.8887026541332597, + "grad_norm": 1.040826590946865, + "learning_rate": 6.426894176818554e-07, + "loss": 0.3622, + "step": 51431 + }, + { + "epoch": 0.8887199336466685, + "grad_norm": 1.1114679918193173, + "learning_rate": 6.424920359352505e-07, + "loss": 0.3197, + "step": 51432 + }, + { + "epoch": 0.8887372131600774, + "grad_norm": 2.9114142874962172, + "learning_rate": 6.422946834970722e-07, + "loss": 0.2164, + "step": 51433 + }, + { + "epoch": 0.8887544926734863, + "grad_norm": 1.5605874128045742, + "learning_rate": 6.420973603679393e-07, + "loss": 0.4663, + "step": 51434 + }, + { + "epoch": 0.8887717721868952, + "grad_norm": 0.9185744422867985, + "learning_rate": 6.419000665484687e-07, + "loss": 0.3035, + "step": 51435 + }, + { + "epoch": 0.8887890517003041, + "grad_norm": 1.3006453224164505, + "learning_rate": 6.417028020392791e-07, + "loss": 0.3899, + "step": 51436 + }, + { + "epoch": 0.888806331213713, + "grad_norm": 1.027441061311425, + "learning_rate": 6.415055668409853e-07, + "loss": 0.3775, + "step": 51437 + }, + { + "epoch": 0.8888236107271219, + "grad_norm": 1.0729268373119847, + "learning_rate": 6.413083609542092e-07, + "loss": 0.5763, + "step": 51438 + }, + { + "epoch": 0.8888408902405308, + "grad_norm": 1.2904899752741243, + "learning_rate": 6.411111843795636e-07, + "loss": 0.344, + "step": 51439 + }, + { + "epoch": 0.8888581697539397, + "grad_norm": 1.5359829857410299, + "learning_rate": 6.409140371176714e-07, + "loss": 0.3751, + "step": 51440 + }, + { + "epoch": 0.8888754492673486, + "grad_norm": 1.3751634436527873, + "learning_rate": 6.407169191691465e-07, + "loss": 0.3974, + "step": 51441 + }, + { + "epoch": 0.8888927287807575, + "grad_norm": 1.6412537444528827, + "learning_rate": 6.405198305346094e-07, + "loss": 0.3779, + "step": 51442 + }, + { + "epoch": 0.8889100082941664, + "grad_norm": 1.1848129810512908, + "learning_rate": 6.403227712146742e-07, + "loss": 0.5597, + "step": 51443 + }, + { + "epoch": 0.8889272878075754, + "grad_norm": 1.1667088374183323, + "learning_rate": 6.401257412099604e-07, + "loss": 0.5113, + "step": 51444 + }, + { + "epoch": 0.8889445673209843, + "grad_norm": 1.4594695852953792, + "learning_rate": 6.399287405210808e-07, + "loss": 0.3524, + "step": 51445 + }, + { + "epoch": 0.8889618468343932, + "grad_norm": 1.4944648159996747, + "learning_rate": 6.397317691486593e-07, + "loss": 0.2831, + "step": 51446 + }, + { + "epoch": 0.8889791263478021, + "grad_norm": 1.0667926943632502, + "learning_rate": 6.395348270933088e-07, + "loss": 0.7704, + "step": 51447 + }, + { + "epoch": 0.888996405861211, + "grad_norm": 1.36838165152154, + "learning_rate": 6.393379143556455e-07, + "loss": 0.3608, + "step": 51448 + }, + { + "epoch": 0.8890136853746199, + "grad_norm": 1.7527766564647063, + "learning_rate": 6.391410309362889e-07, + "loss": 0.2853, + "step": 51449 + }, + { + "epoch": 0.8890309648880288, + "grad_norm": 1.2001702540843855, + "learning_rate": 6.389441768358518e-07, + "loss": 0.2139, + "step": 51450 + }, + { + "epoch": 0.8890482444014377, + "grad_norm": 1.2157583621241177, + "learning_rate": 6.387473520549536e-07, + "loss": 0.3732, + "step": 51451 + }, + { + "epoch": 0.8890655239148466, + "grad_norm": 2.507770640233809, + "learning_rate": 6.385505565942096e-07, + "loss": 0.2364, + "step": 51452 + }, + { + "epoch": 0.8890828034282554, + "grad_norm": 1.6087636154083613, + "learning_rate": 6.383537904542392e-07, + "loss": 0.2682, + "step": 51453 + }, + { + "epoch": 0.8891000829416643, + "grad_norm": 1.2943839344344799, + "learning_rate": 6.38157053635654e-07, + "loss": 0.3175, + "step": 51454 + }, + { + "epoch": 0.8891173624550732, + "grad_norm": 2.0388074962100573, + "learning_rate": 6.379603461390737e-07, + "loss": 0.3584, + "step": 51455 + }, + { + "epoch": 0.8891346419684821, + "grad_norm": 0.7961344494643983, + "learning_rate": 6.377636679651111e-07, + "loss": 0.2069, + "step": 51456 + }, + { + "epoch": 0.889151921481891, + "grad_norm": 1.2080462411203667, + "learning_rate": 6.375670191143857e-07, + "loss": 0.4923, + "step": 51457 + }, + { + "epoch": 0.8891692009953, + "grad_norm": 1.8838968190616596, + "learning_rate": 6.373703995875124e-07, + "loss": 0.3007, + "step": 51458 + }, + { + "epoch": 0.8891864805087089, + "grad_norm": 1.1287721907793247, + "learning_rate": 6.371738093851044e-07, + "loss": 0.2673, + "step": 51459 + }, + { + "epoch": 0.8892037600221178, + "grad_norm": 1.211570750572605, + "learning_rate": 6.369772485077819e-07, + "loss": 0.3361, + "step": 51460 + }, + { + "epoch": 0.8892210395355267, + "grad_norm": 0.897003106873962, + "learning_rate": 6.367807169561569e-07, + "loss": 0.2103, + "step": 51461 + }, + { + "epoch": 0.8892383190489356, + "grad_norm": 1.2926501641126393, + "learning_rate": 6.365842147308465e-07, + "loss": 0.3191, + "step": 51462 + }, + { + "epoch": 0.8892555985623445, + "grad_norm": 1.2533627663076645, + "learning_rate": 6.363877418324649e-07, + "loss": 0.196, + "step": 51463 + }, + { + "epoch": 0.8892728780757534, + "grad_norm": 1.629168967971487, + "learning_rate": 6.361912982616314e-07, + "loss": 0.3105, + "step": 51464 + }, + { + "epoch": 0.8892901575891623, + "grad_norm": 0.8584302393293977, + "learning_rate": 6.359948840189556e-07, + "loss": 0.3136, + "step": 51465 + }, + { + "epoch": 0.8893074371025712, + "grad_norm": 0.9979716865831058, + "learning_rate": 6.357984991050569e-07, + "loss": 0.5507, + "step": 51466 + }, + { + "epoch": 0.8893247166159801, + "grad_norm": 0.9337842292936543, + "learning_rate": 6.356021435205484e-07, + "loss": 0.4195, + "step": 51467 + }, + { + "epoch": 0.889341996129389, + "grad_norm": 0.7274943307497294, + "learning_rate": 6.354058172660449e-07, + "loss": 0.3738, + "step": 51468 + }, + { + "epoch": 0.8893592756427979, + "grad_norm": 1.3501931846721136, + "learning_rate": 6.352095203421638e-07, + "loss": 0.2403, + "step": 51469 + }, + { + "epoch": 0.8893765551562068, + "grad_norm": 1.9574044950194016, + "learning_rate": 6.350132527495168e-07, + "loss": 0.4857, + "step": 51470 + }, + { + "epoch": 0.8893938346696157, + "grad_norm": 0.8820847553514245, + "learning_rate": 6.348170144887223e-07, + "loss": 0.2148, + "step": 51471 + }, + { + "epoch": 0.8894111141830247, + "grad_norm": 1.7539118810036736, + "learning_rate": 6.346208055603897e-07, + "loss": 0.249, + "step": 51472 + }, + { + "epoch": 0.8894283936964336, + "grad_norm": 1.9382785654552297, + "learning_rate": 6.344246259651377e-07, + "loss": 0.1951, + "step": 51473 + }, + { + "epoch": 0.8894456732098424, + "grad_norm": 1.6486979331286096, + "learning_rate": 6.342284757035788e-07, + "loss": 0.3644, + "step": 51474 + }, + { + "epoch": 0.8894629527232513, + "grad_norm": 1.2957677338963338, + "learning_rate": 6.340323547763294e-07, + "loss": 0.4274, + "step": 51475 + }, + { + "epoch": 0.8894802322366602, + "grad_norm": 1.4544146134974942, + "learning_rate": 6.338362631840012e-07, + "loss": 0.2611, + "step": 51476 + }, + { + "epoch": 0.8894975117500691, + "grad_norm": 1.7369132663343083, + "learning_rate": 6.336402009272114e-07, + "loss": 0.2854, + "step": 51477 + }, + { + "epoch": 0.889514791263478, + "grad_norm": 1.0731051636629694, + "learning_rate": 6.334441680065717e-07, + "loss": 0.3097, + "step": 51478 + }, + { + "epoch": 0.8895320707768869, + "grad_norm": 1.25238000282256, + "learning_rate": 6.332481644226939e-07, + "loss": 0.3455, + "step": 51479 + }, + { + "epoch": 0.8895493502902958, + "grad_norm": 1.611377195333124, + "learning_rate": 6.330521901761988e-07, + "loss": 0.3239, + "step": 51480 + }, + { + "epoch": 0.8895666298037047, + "grad_norm": 1.2165955453780997, + "learning_rate": 6.328562452676934e-07, + "loss": 0.6445, + "step": 51481 + }, + { + "epoch": 0.8895839093171136, + "grad_norm": 1.0299580122970136, + "learning_rate": 6.326603296977962e-07, + "loss": 0.5436, + "step": 51482 + }, + { + "epoch": 0.8896011888305225, + "grad_norm": 1.7004993172853873, + "learning_rate": 6.324644434671168e-07, + "loss": 0.3613, + "step": 51483 + }, + { + "epoch": 0.8896184683439314, + "grad_norm": 1.7810344312542343, + "learning_rate": 6.322685865762723e-07, + "loss": 0.5738, + "step": 51484 + }, + { + "epoch": 0.8896357478573403, + "grad_norm": 1.6260926997084224, + "learning_rate": 6.320727590258713e-07, + "loss": 0.3343, + "step": 51485 + }, + { + "epoch": 0.8896530273707492, + "grad_norm": 1.28984889283055, + "learning_rate": 6.318769608165332e-07, + "loss": 0.1672, + "step": 51486 + }, + { + "epoch": 0.8896703068841582, + "grad_norm": 1.7413374627724991, + "learning_rate": 6.316811919488675e-07, + "loss": 0.4195, + "step": 51487 + }, + { + "epoch": 0.8896875863975671, + "grad_norm": 0.9482286606064669, + "learning_rate": 6.314854524234892e-07, + "loss": 0.2353, + "step": 51488 + }, + { + "epoch": 0.889704865910976, + "grad_norm": 1.461623854106934, + "learning_rate": 6.312897422410113e-07, + "loss": 0.2402, + "step": 51489 + }, + { + "epoch": 0.8897221454243849, + "grad_norm": 1.042287656599206, + "learning_rate": 6.310940614020444e-07, + "loss": 0.2917, + "step": 51490 + }, + { + "epoch": 0.8897394249377938, + "grad_norm": 0.8241690289094523, + "learning_rate": 6.308984099072024e-07, + "loss": 0.2546, + "step": 51491 + }, + { + "epoch": 0.8897567044512027, + "grad_norm": 0.8909275012752942, + "learning_rate": 6.307027877570982e-07, + "loss": 0.2127, + "step": 51492 + }, + { + "epoch": 0.8897739839646116, + "grad_norm": 1.2296448191099292, + "learning_rate": 6.305071949523467e-07, + "loss": 0.5885, + "step": 51493 + }, + { + "epoch": 0.8897912634780205, + "grad_norm": 1.5904234581204704, + "learning_rate": 6.303116314935576e-07, + "loss": 0.3604, + "step": 51494 + }, + { + "epoch": 0.8898085429914293, + "grad_norm": 0.8429290318316727, + "learning_rate": 6.301160973813458e-07, + "loss": 0.2723, + "step": 51495 + }, + { + "epoch": 0.8898258225048382, + "grad_norm": 1.4699868475019453, + "learning_rate": 6.29920592616321e-07, + "loss": 0.4123, + "step": 51496 + }, + { + "epoch": 0.8898431020182471, + "grad_norm": 1.0838444889747885, + "learning_rate": 6.29725117199097e-07, + "loss": 0.3162, + "step": 51497 + }, + { + "epoch": 0.889860381531656, + "grad_norm": 1.790564606763179, + "learning_rate": 6.295296711302879e-07, + "loss": 0.2912, + "step": 51498 + }, + { + "epoch": 0.8898776610450649, + "grad_norm": 1.4807614078673061, + "learning_rate": 6.293342544105019e-07, + "loss": 0.398, + "step": 51499 + }, + { + "epoch": 0.8898949405584738, + "grad_norm": 1.2937255986980833, + "learning_rate": 6.291388670403553e-07, + "loss": 0.3471, + "step": 51500 + }, + { + "epoch": 0.8899122200718828, + "grad_norm": 1.8885143271316343, + "learning_rate": 6.289435090204554e-07, + "loss": 0.3356, + "step": 51501 + }, + { + "epoch": 0.8899294995852917, + "grad_norm": 1.9417859534238884, + "learning_rate": 6.287481803514172e-07, + "loss": 0.3132, + "step": 51502 + }, + { + "epoch": 0.8899467790987006, + "grad_norm": 1.0255895936948412, + "learning_rate": 6.285528810338526e-07, + "loss": 0.4554, + "step": 51503 + }, + { + "epoch": 0.8899640586121095, + "grad_norm": 0.5346915957018867, + "learning_rate": 6.283576110683731e-07, + "loss": 0.4501, + "step": 51504 + }, + { + "epoch": 0.8899813381255184, + "grad_norm": 1.1789856886470922, + "learning_rate": 6.281623704555884e-07, + "loss": 0.3432, + "step": 51505 + }, + { + "epoch": 0.8899986176389273, + "grad_norm": 1.4779821858222704, + "learning_rate": 6.279671591961134e-07, + "loss": 0.3043, + "step": 51506 + }, + { + "epoch": 0.8900158971523362, + "grad_norm": 1.3048548411024117, + "learning_rate": 6.277719772905555e-07, + "loss": 0.409, + "step": 51507 + }, + { + "epoch": 0.8900331766657451, + "grad_norm": 0.7001374135042198, + "learning_rate": 6.275768247395286e-07, + "loss": 0.6594, + "step": 51508 + }, + { + "epoch": 0.890050456179154, + "grad_norm": 1.9071475842517849, + "learning_rate": 6.273817015436445e-07, + "loss": 0.3873, + "step": 51509 + }, + { + "epoch": 0.8900677356925629, + "grad_norm": 1.9364927173280213, + "learning_rate": 6.271866077035116e-07, + "loss": 0.4767, + "step": 51510 + }, + { + "epoch": 0.8900850152059718, + "grad_norm": 2.4727905852623833, + "learning_rate": 6.269915432197438e-07, + "loss": 0.5388, + "step": 51511 + }, + { + "epoch": 0.8901022947193807, + "grad_norm": 1.469035337251072, + "learning_rate": 6.267965080929495e-07, + "loss": 0.3306, + "step": 51512 + }, + { + "epoch": 0.8901195742327896, + "grad_norm": 1.750449935816719, + "learning_rate": 6.266015023237415e-07, + "loss": 0.2482, + "step": 51513 + }, + { + "epoch": 0.8901368537461986, + "grad_norm": 1.0293221448805672, + "learning_rate": 6.264065259127295e-07, + "loss": 0.3347, + "step": 51514 + }, + { + "epoch": 0.8901541332596075, + "grad_norm": 1.7131665802138087, + "learning_rate": 6.262115788605261e-07, + "loss": 0.285, + "step": 51515 + }, + { + "epoch": 0.8901714127730163, + "grad_norm": 1.5128326803725343, + "learning_rate": 6.260166611677388e-07, + "loss": 0.3911, + "step": 51516 + }, + { + "epoch": 0.8901886922864252, + "grad_norm": 2.288501546704031, + "learning_rate": 6.258217728349814e-07, + "loss": 0.2826, + "step": 51517 + }, + { + "epoch": 0.8902059717998341, + "grad_norm": 1.418370792717525, + "learning_rate": 6.256269138628601e-07, + "loss": 0.2406, + "step": 51518 + }, + { + "epoch": 0.890223251313243, + "grad_norm": 1.3439245198827379, + "learning_rate": 6.25432084251989e-07, + "loss": 0.4057, + "step": 51519 + }, + { + "epoch": 0.8902405308266519, + "grad_norm": 2.2555827063515554, + "learning_rate": 6.252372840029786e-07, + "loss": 0.3908, + "step": 51520 + }, + { + "epoch": 0.8902578103400608, + "grad_norm": 1.2300061323227114, + "learning_rate": 6.250425131164351e-07, + "loss": 0.3399, + "step": 51521 + }, + { + "epoch": 0.8902750898534697, + "grad_norm": 1.0638040239742834, + "learning_rate": 6.248477715929735e-07, + "loss": 0.4017, + "step": 51522 + }, + { + "epoch": 0.8902923693668786, + "grad_norm": 1.0104276363476015, + "learning_rate": 6.24653059433199e-07, + "loss": 0.3293, + "step": 51523 + }, + { + "epoch": 0.8903096488802875, + "grad_norm": 1.2648797671470966, + "learning_rate": 6.244583766377255e-07, + "loss": 0.1992, + "step": 51524 + }, + { + "epoch": 0.8903269283936964, + "grad_norm": 1.1412083577446503, + "learning_rate": 6.242637232071568e-07, + "loss": 0.4181, + "step": 51525 + }, + { + "epoch": 0.8903442079071053, + "grad_norm": 1.2828928004172901, + "learning_rate": 6.240690991421105e-07, + "loss": 0.3432, + "step": 51526 + }, + { + "epoch": 0.8903614874205142, + "grad_norm": 1.5438952377109165, + "learning_rate": 6.238745044431915e-07, + "loss": 0.459, + "step": 51527 + }, + { + "epoch": 0.8903787669339231, + "grad_norm": 1.5323888173950282, + "learning_rate": 6.236799391110105e-07, + "loss": 0.2783, + "step": 51528 + }, + { + "epoch": 0.8903960464473321, + "grad_norm": 1.0757961552075175, + "learning_rate": 6.234854031461767e-07, + "loss": 0.3154, + "step": 51529 + }, + { + "epoch": 0.890413325960741, + "grad_norm": 1.9352363361976297, + "learning_rate": 6.232908965492978e-07, + "loss": 0.4384, + "step": 51530 + }, + { + "epoch": 0.8904306054741499, + "grad_norm": 0.7910027878324054, + "learning_rate": 6.230964193209843e-07, + "loss": 0.2482, + "step": 51531 + }, + { + "epoch": 0.8904478849875588, + "grad_norm": 1.5660917176311644, + "learning_rate": 6.229019714618456e-07, + "loss": 0.4181, + "step": 51532 + }, + { + "epoch": 0.8904651645009677, + "grad_norm": 1.212509558655971, + "learning_rate": 6.227075529724925e-07, + "loss": 0.2157, + "step": 51533 + }, + { + "epoch": 0.8904824440143766, + "grad_norm": 2.3555940305284984, + "learning_rate": 6.225131638535298e-07, + "loss": 0.3116, + "step": 51534 + }, + { + "epoch": 0.8904997235277855, + "grad_norm": 1.276002424530589, + "learning_rate": 6.223188041055706e-07, + "loss": 0.2153, + "step": 51535 + }, + { + "epoch": 0.8905170030411944, + "grad_norm": 1.3289639188407811, + "learning_rate": 6.221244737292198e-07, + "loss": 0.3304, + "step": 51536 + }, + { + "epoch": 0.8905342825546032, + "grad_norm": 1.1630138347919199, + "learning_rate": 6.219301727250882e-07, + "loss": 0.3236, + "step": 51537 + }, + { + "epoch": 0.8905515620680121, + "grad_norm": 1.2403485326316532, + "learning_rate": 6.21735901093784e-07, + "loss": 0.3321, + "step": 51538 + }, + { + "epoch": 0.890568841581421, + "grad_norm": 1.20707015478836, + "learning_rate": 6.215416588359158e-07, + "loss": 0.4005, + "step": 51539 + }, + { + "epoch": 0.8905861210948299, + "grad_norm": 1.2092791793487947, + "learning_rate": 6.213474459520929e-07, + "loss": 0.3198, + "step": 51540 + }, + { + "epoch": 0.8906034006082388, + "grad_norm": 3.146596028653417, + "learning_rate": 6.211532624429217e-07, + "loss": 0.2694, + "step": 51541 + }, + { + "epoch": 0.8906206801216477, + "grad_norm": 1.4079983371624083, + "learning_rate": 6.209591083090105e-07, + "loss": 0.2523, + "step": 51542 + }, + { + "epoch": 0.8906379596350567, + "grad_norm": 1.310746543812081, + "learning_rate": 6.207649835509677e-07, + "loss": 0.4364, + "step": 51543 + }, + { + "epoch": 0.8906552391484656, + "grad_norm": 1.752092256947945, + "learning_rate": 6.205708881694039e-07, + "loss": 0.3519, + "step": 51544 + }, + { + "epoch": 0.8906725186618745, + "grad_norm": 1.5605560817754582, + "learning_rate": 6.203768221649242e-07, + "loss": 0.4786, + "step": 51545 + }, + { + "epoch": 0.8906897981752834, + "grad_norm": 2.604095722271817, + "learning_rate": 6.20182785538137e-07, + "loss": 0.2535, + "step": 51546 + }, + { + "epoch": 0.8907070776886923, + "grad_norm": 1.4520209246722036, + "learning_rate": 6.199887782896496e-07, + "loss": 0.3601, + "step": 51547 + }, + { + "epoch": 0.8907243572021012, + "grad_norm": 1.2574087141384607, + "learning_rate": 6.197948004200705e-07, + "loss": 0.1808, + "step": 51548 + }, + { + "epoch": 0.8907416367155101, + "grad_norm": 1.8228785466768016, + "learning_rate": 6.196008519300079e-07, + "loss": 0.3457, + "step": 51549 + }, + { + "epoch": 0.890758916228919, + "grad_norm": 1.1235276981084845, + "learning_rate": 6.194069328200669e-07, + "loss": 0.4465, + "step": 51550 + }, + { + "epoch": 0.8907761957423279, + "grad_norm": 2.249438152599971, + "learning_rate": 6.192130430908571e-07, + "loss": 0.357, + "step": 51551 + }, + { + "epoch": 0.8907934752557368, + "grad_norm": 1.7332481656916046, + "learning_rate": 6.190191827429847e-07, + "loss": 0.4169, + "step": 51552 + }, + { + "epoch": 0.8908107547691457, + "grad_norm": 1.205853931392793, + "learning_rate": 6.188253517770559e-07, + "loss": 0.3693, + "step": 51553 + }, + { + "epoch": 0.8908280342825546, + "grad_norm": 0.9327390660926187, + "learning_rate": 6.186315501936801e-07, + "loss": 0.5182, + "step": 51554 + }, + { + "epoch": 0.8908453137959635, + "grad_norm": 2.565801056365908, + "learning_rate": 6.184377779934647e-07, + "loss": 0.1875, + "step": 51555 + }, + { + "epoch": 0.8908625933093725, + "grad_norm": 1.1244497067081256, + "learning_rate": 6.182440351770125e-07, + "loss": 0.1763, + "step": 51556 + }, + { + "epoch": 0.8908798728227814, + "grad_norm": 1.551994234722087, + "learning_rate": 6.180503217449351e-07, + "loss": 0.2827, + "step": 51557 + }, + { + "epoch": 0.8908971523361903, + "grad_norm": 1.532035895654323, + "learning_rate": 6.178566376978368e-07, + "loss": 0.4879, + "step": 51558 + }, + { + "epoch": 0.8909144318495991, + "grad_norm": 1.5907257696623138, + "learning_rate": 6.176629830363234e-07, + "loss": 0.2466, + "step": 51559 + }, + { + "epoch": 0.890931711363008, + "grad_norm": 1.7311135589420383, + "learning_rate": 6.174693577610035e-07, + "loss": 0.3155, + "step": 51560 + }, + { + "epoch": 0.8909489908764169, + "grad_norm": 1.1661910807311118, + "learning_rate": 6.172757618724823e-07, + "loss": 0.2695, + "step": 51561 + }, + { + "epoch": 0.8909662703898258, + "grad_norm": 1.5158451159916846, + "learning_rate": 6.170821953713669e-07, + "loss": 0.3769, + "step": 51562 + }, + { + "epoch": 0.8909835499032347, + "grad_norm": 2.0641565990317936, + "learning_rate": 6.168886582582623e-07, + "loss": 0.2474, + "step": 51563 + }, + { + "epoch": 0.8910008294166436, + "grad_norm": 2.5558599774979918, + "learning_rate": 6.16695150533777e-07, + "loss": 0.3869, + "step": 51564 + }, + { + "epoch": 0.8910181089300525, + "grad_norm": 1.2366451183114142, + "learning_rate": 6.165016721985128e-07, + "loss": 0.3681, + "step": 51565 + }, + { + "epoch": 0.8910353884434614, + "grad_norm": 1.1313842293120624, + "learning_rate": 6.163082232530804e-07, + "loss": 0.2214, + "step": 51566 + }, + { + "epoch": 0.8910526679568703, + "grad_norm": 1.3799364139756591, + "learning_rate": 6.161148036980835e-07, + "loss": 0.3161, + "step": 51567 + }, + { + "epoch": 0.8910699474702792, + "grad_norm": 1.2300670845556152, + "learning_rate": 6.159214135341296e-07, + "loss": 0.2919, + "step": 51568 + }, + { + "epoch": 0.8910872269836881, + "grad_norm": 1.316063119407901, + "learning_rate": 6.157280527618204e-07, + "loss": 0.3555, + "step": 51569 + }, + { + "epoch": 0.891104506497097, + "grad_norm": 2.5531355663105146, + "learning_rate": 6.155347213817664e-07, + "loss": 0.2741, + "step": 51570 + }, + { + "epoch": 0.891121786010506, + "grad_norm": 0.9738927944062657, + "learning_rate": 6.153414193945695e-07, + "loss": 0.2404, + "step": 51571 + }, + { + "epoch": 0.8911390655239149, + "grad_norm": 1.0070668030828043, + "learning_rate": 6.151481468008369e-07, + "loss": 0.1886, + "step": 51572 + }, + { + "epoch": 0.8911563450373238, + "grad_norm": 1.6223224708793624, + "learning_rate": 6.149549036011737e-07, + "loss": 0.4846, + "step": 51573 + }, + { + "epoch": 0.8911736245507327, + "grad_norm": 1.4479890951761472, + "learning_rate": 6.147616897961839e-07, + "loss": 0.4314, + "step": 51574 + }, + { + "epoch": 0.8911909040641416, + "grad_norm": 1.6567609815671693, + "learning_rate": 6.145685053864758e-07, + "loss": 0.2748, + "step": 51575 + }, + { + "epoch": 0.8912081835775505, + "grad_norm": 1.2981319959440283, + "learning_rate": 6.143753503726512e-07, + "loss": 0.3572, + "step": 51576 + }, + { + "epoch": 0.8912254630909594, + "grad_norm": 1.2110813372044087, + "learning_rate": 6.141822247553154e-07, + "loss": 0.5037, + "step": 51577 + }, + { + "epoch": 0.8912427426043683, + "grad_norm": 1.0687182149559598, + "learning_rate": 6.139891285350741e-07, + "loss": 0.5822, + "step": 51578 + }, + { + "epoch": 0.8912600221177772, + "grad_norm": 1.5616404668336297, + "learning_rate": 6.137960617125349e-07, + "loss": 0.3235, + "step": 51579 + }, + { + "epoch": 0.891277301631186, + "grad_norm": 0.8560230863678444, + "learning_rate": 6.136030242882985e-07, + "loss": 0.5132, + "step": 51580 + }, + { + "epoch": 0.8912945811445949, + "grad_norm": 1.051368023294554, + "learning_rate": 6.134100162629697e-07, + "loss": 0.2668, + "step": 51581 + }, + { + "epoch": 0.8913118606580038, + "grad_norm": 1.301131039643342, + "learning_rate": 6.132170376371538e-07, + "loss": 0.3444, + "step": 51582 + }, + { + "epoch": 0.8913291401714127, + "grad_norm": 0.8671228938771706, + "learning_rate": 6.130240884114558e-07, + "loss": 0.1021, + "step": 51583 + }, + { + "epoch": 0.8913464196848216, + "grad_norm": 0.7788185281652814, + "learning_rate": 6.128311685864819e-07, + "loss": 0.7416, + "step": 51584 + }, + { + "epoch": 0.8913636991982306, + "grad_norm": 1.4161731216396014, + "learning_rate": 6.126382781628315e-07, + "loss": 0.3529, + "step": 51585 + }, + { + "epoch": 0.8913809787116395, + "grad_norm": 1.1247723117354738, + "learning_rate": 6.124454171411132e-07, + "loss": 0.3112, + "step": 51586 + }, + { + "epoch": 0.8913982582250484, + "grad_norm": 1.1242182345162786, + "learning_rate": 6.122525855219286e-07, + "loss": 0.344, + "step": 51587 + }, + { + "epoch": 0.8914155377384573, + "grad_norm": 1.1301020800577077, + "learning_rate": 6.120597833058816e-07, + "loss": 0.4721, + "step": 51588 + }, + { + "epoch": 0.8914328172518662, + "grad_norm": 1.3532614489282466, + "learning_rate": 6.118670104935765e-07, + "loss": 0.4912, + "step": 51589 + }, + { + "epoch": 0.8914500967652751, + "grad_norm": 0.8206075251762689, + "learning_rate": 6.116742670856191e-07, + "loss": 0.2127, + "step": 51590 + }, + { + "epoch": 0.891467376278684, + "grad_norm": 1.1150366551661646, + "learning_rate": 6.114815530826124e-07, + "loss": 0.3229, + "step": 51591 + }, + { + "epoch": 0.8914846557920929, + "grad_norm": 1.411220956952333, + "learning_rate": 6.11288868485156e-07, + "loss": 0.2763, + "step": 51592 + }, + { + "epoch": 0.8915019353055018, + "grad_norm": 1.3591909768672983, + "learning_rate": 6.110962132938569e-07, + "loss": 0.5115, + "step": 51593 + }, + { + "epoch": 0.8915192148189107, + "grad_norm": 1.512126537963332, + "learning_rate": 6.109035875093183e-07, + "loss": 0.3051, + "step": 51594 + }, + { + "epoch": 0.8915364943323196, + "grad_norm": 1.6588846405458704, + "learning_rate": 6.107109911321452e-07, + "loss": 0.394, + "step": 51595 + }, + { + "epoch": 0.8915537738457285, + "grad_norm": 1.3650037665565202, + "learning_rate": 6.105184241629369e-07, + "loss": 0.309, + "step": 51596 + }, + { + "epoch": 0.8915710533591374, + "grad_norm": 1.1240547819571305, + "learning_rate": 6.103258866022998e-07, + "loss": 0.5828, + "step": 51597 + }, + { + "epoch": 0.8915883328725464, + "grad_norm": 0.8273750316195528, + "learning_rate": 6.101333784508334e-07, + "loss": 0.3568, + "step": 51598 + }, + { + "epoch": 0.8916056123859553, + "grad_norm": 1.2175419523003366, + "learning_rate": 6.099408997091438e-07, + "loss": 0.3329, + "step": 51599 + }, + { + "epoch": 0.8916228918993642, + "grad_norm": 0.7504666828591978, + "learning_rate": 6.097484503778328e-07, + "loss": 0.5653, + "step": 51600 + }, + { + "epoch": 0.891640171412773, + "grad_norm": 1.0011839778147296, + "learning_rate": 6.095560304575043e-07, + "loss": 0.2322, + "step": 51601 + }, + { + "epoch": 0.8916574509261819, + "grad_norm": 0.6550263109980415, + "learning_rate": 6.093636399487613e-07, + "loss": 0.7061, + "step": 51602 + }, + { + "epoch": 0.8916747304395908, + "grad_norm": 1.4238932721842463, + "learning_rate": 6.09171278852202e-07, + "loss": 0.2447, + "step": 51603 + }, + { + "epoch": 0.8916920099529997, + "grad_norm": 1.4357811783403243, + "learning_rate": 6.089789471684338e-07, + "loss": 0.5048, + "step": 51604 + }, + { + "epoch": 0.8917092894664086, + "grad_norm": 1.5195969089358627, + "learning_rate": 6.087866448980551e-07, + "loss": 0.2041, + "step": 51605 + }, + { + "epoch": 0.8917265689798175, + "grad_norm": 2.5416556447673675, + "learning_rate": 6.08594372041672e-07, + "loss": 0.2446, + "step": 51606 + }, + { + "epoch": 0.8917438484932264, + "grad_norm": 1.2830180664854893, + "learning_rate": 6.084021285998843e-07, + "loss": 0.3148, + "step": 51607 + }, + { + "epoch": 0.8917611280066353, + "grad_norm": 0.9372716742002105, + "learning_rate": 6.082099145732967e-07, + "loss": 0.1541, + "step": 51608 + }, + { + "epoch": 0.8917784075200442, + "grad_norm": 1.491460737722677, + "learning_rate": 6.080177299625067e-07, + "loss": 0.4496, + "step": 51609 + }, + { + "epoch": 0.8917956870334531, + "grad_norm": 1.281295666446927, + "learning_rate": 6.078255747681216e-07, + "loss": 0.2319, + "step": 51610 + }, + { + "epoch": 0.891812966546862, + "grad_norm": 1.3784350650329105, + "learning_rate": 6.076334489907388e-07, + "loss": 0.4091, + "step": 51611 + }, + { + "epoch": 0.891830246060271, + "grad_norm": 1.4665382344979312, + "learning_rate": 6.074413526309619e-07, + "loss": 0.4194, + "step": 51612 + }, + { + "epoch": 0.8918475255736799, + "grad_norm": 1.546189919817958, + "learning_rate": 6.072492856893941e-07, + "loss": 0.3169, + "step": 51613 + }, + { + "epoch": 0.8918648050870888, + "grad_norm": 0.7752041947305979, + "learning_rate": 6.070572481666337e-07, + "loss": 0.5084, + "step": 51614 + }, + { + "epoch": 0.8918820846004977, + "grad_norm": 1.6916974089743335, + "learning_rate": 6.068652400632847e-07, + "loss": 0.2533, + "step": 51615 + }, + { + "epoch": 0.8918993641139066, + "grad_norm": 1.3236400986230346, + "learning_rate": 6.066732613799464e-07, + "loss": 0.321, + "step": 51616 + }, + { + "epoch": 0.8919166436273155, + "grad_norm": 1.009916933883776, + "learning_rate": 6.06481312117222e-07, + "loss": 0.3081, + "step": 51617 + }, + { + "epoch": 0.8919339231407244, + "grad_norm": 1.2165948996026572, + "learning_rate": 6.062893922757118e-07, + "loss": 0.5025, + "step": 51618 + }, + { + "epoch": 0.8919512026541333, + "grad_norm": 1.353044095800635, + "learning_rate": 6.060975018560178e-07, + "loss": 0.2771, + "step": 51619 + }, + { + "epoch": 0.8919684821675422, + "grad_norm": 1.023655917273035, + "learning_rate": 6.059056408587394e-07, + "loss": 0.2251, + "step": 51620 + }, + { + "epoch": 0.8919857616809511, + "grad_norm": 0.8649131475368004, + "learning_rate": 6.057138092844806e-07, + "loss": 0.4637, + "step": 51621 + }, + { + "epoch": 0.8920030411943599, + "grad_norm": 2.067591061711373, + "learning_rate": 6.055220071338374e-07, + "loss": 0.6176, + "step": 51622 + }, + { + "epoch": 0.8920203207077688, + "grad_norm": 1.0194159634224453, + "learning_rate": 6.053302344074141e-07, + "loss": 0.4265, + "step": 51623 + }, + { + "epoch": 0.8920376002211777, + "grad_norm": 0.6929932099475791, + "learning_rate": 6.051384911058112e-07, + "loss": 0.2157, + "step": 51624 + }, + { + "epoch": 0.8920548797345866, + "grad_norm": 1.1606914791945455, + "learning_rate": 6.049467772296281e-07, + "loss": 0.3115, + "step": 51625 + }, + { + "epoch": 0.8920721592479955, + "grad_norm": 1.3694011267356496, + "learning_rate": 6.047550927794666e-07, + "loss": 0.508, + "step": 51626 + }, + { + "epoch": 0.8920894387614045, + "grad_norm": 1.625955056775441, + "learning_rate": 6.04563437755924e-07, + "loss": 0.3038, + "step": 51627 + }, + { + "epoch": 0.8921067182748134, + "grad_norm": 1.2634332170010192, + "learning_rate": 6.043718121596043e-07, + "loss": 0.5739, + "step": 51628 + }, + { + "epoch": 0.8921239977882223, + "grad_norm": 1.2587034855328907, + "learning_rate": 6.041802159911048e-07, + "loss": 0.2613, + "step": 51629 + }, + { + "epoch": 0.8921412773016312, + "grad_norm": 1.6360431337941055, + "learning_rate": 6.039886492510282e-07, + "loss": 0.2585, + "step": 51630 + }, + { + "epoch": 0.8921585568150401, + "grad_norm": 0.8142205106443378, + "learning_rate": 6.037971119399733e-07, + "loss": 0.2231, + "step": 51631 + }, + { + "epoch": 0.892175836328449, + "grad_norm": 1.4510482414718378, + "learning_rate": 6.036056040585403e-07, + "loss": 0.1889, + "step": 51632 + }, + { + "epoch": 0.8921931158418579, + "grad_norm": 1.92545208321956, + "learning_rate": 6.034141256073267e-07, + "loss": 0.5811, + "step": 51633 + }, + { + "epoch": 0.8922103953552668, + "grad_norm": 2.0813084881940767, + "learning_rate": 6.032226765869354e-07, + "loss": 0.3894, + "step": 51634 + }, + { + "epoch": 0.8922276748686757, + "grad_norm": 0.967123748573722, + "learning_rate": 6.030312569979657e-07, + "loss": 0.4484, + "step": 51635 + }, + { + "epoch": 0.8922449543820846, + "grad_norm": 1.4058948321340947, + "learning_rate": 6.028398668410152e-07, + "loss": 0.1823, + "step": 51636 + }, + { + "epoch": 0.8922622338954935, + "grad_norm": 1.4690268056774511, + "learning_rate": 6.026485061166854e-07, + "loss": 0.4095, + "step": 51637 + }, + { + "epoch": 0.8922795134089024, + "grad_norm": 1.4994816262026962, + "learning_rate": 6.024571748255725e-07, + "loss": 0.4164, + "step": 51638 + }, + { + "epoch": 0.8922967929223113, + "grad_norm": 1.0307084948833805, + "learning_rate": 6.022658729682784e-07, + "loss": 0.4066, + "step": 51639 + }, + { + "epoch": 0.8923140724357203, + "grad_norm": 1.5904450362392508, + "learning_rate": 6.020746005454026e-07, + "loss": 0.1699, + "step": 51640 + }, + { + "epoch": 0.8923313519491292, + "grad_norm": 1.517475477385421, + "learning_rate": 6.018833575575445e-07, + "loss": 0.3779, + "step": 51641 + }, + { + "epoch": 0.8923486314625381, + "grad_norm": 1.3560508738457282, + "learning_rate": 6.016921440053015e-07, + "loss": 0.2824, + "step": 51642 + }, + { + "epoch": 0.8923659109759469, + "grad_norm": 2.3356463212011813, + "learning_rate": 6.015009598892718e-07, + "loss": 0.3746, + "step": 51643 + }, + { + "epoch": 0.8923831904893558, + "grad_norm": 1.4562389255310906, + "learning_rate": 6.013098052100563e-07, + "loss": 0.7306, + "step": 51644 + }, + { + "epoch": 0.8924004700027647, + "grad_norm": 1.1463210046467658, + "learning_rate": 6.011186799682511e-07, + "loss": 0.2504, + "step": 51645 + }, + { + "epoch": 0.8924177495161736, + "grad_norm": 1.6105915657627374, + "learning_rate": 6.00927584164458e-07, + "loss": 0.5078, + "step": 51646 + }, + { + "epoch": 0.8924350290295825, + "grad_norm": 1.3233223566541272, + "learning_rate": 6.00736517799273e-07, + "loss": 0.5094, + "step": 51647 + }, + { + "epoch": 0.8924523085429914, + "grad_norm": 0.9233316543124628, + "learning_rate": 6.005454808732969e-07, + "loss": 0.2199, + "step": 51648 + }, + { + "epoch": 0.8924695880564003, + "grad_norm": 1.8700920762403852, + "learning_rate": 6.003544733871258e-07, + "loss": 0.2002, + "step": 51649 + }, + { + "epoch": 0.8924868675698092, + "grad_norm": 1.3869874072667265, + "learning_rate": 6.001634953413604e-07, + "loss": 0.3263, + "step": 51650 + }, + { + "epoch": 0.8925041470832181, + "grad_norm": 1.1121812960106894, + "learning_rate": 5.999725467365935e-07, + "loss": 0.3061, + "step": 51651 + }, + { + "epoch": 0.892521426596627, + "grad_norm": 1.3568148013610415, + "learning_rate": 5.997816275734303e-07, + "loss": 0.4196, + "step": 51652 + }, + { + "epoch": 0.8925387061100359, + "grad_norm": 1.2959062585016168, + "learning_rate": 5.995907378524657e-07, + "loss": 0.4936, + "step": 51653 + }, + { + "epoch": 0.8925559856234448, + "grad_norm": 2.0071373417230527, + "learning_rate": 5.99399877574296e-07, + "loss": 0.2864, + "step": 51654 + }, + { + "epoch": 0.8925732651368538, + "grad_norm": 1.4889395944470185, + "learning_rate": 5.992090467395207e-07, + "loss": 0.43, + "step": 51655 + }, + { + "epoch": 0.8925905446502627, + "grad_norm": 1.1477960053392424, + "learning_rate": 5.990182453487359e-07, + "loss": 0.3017, + "step": 51656 + }, + { + "epoch": 0.8926078241636716, + "grad_norm": 0.6282782093787052, + "learning_rate": 5.988274734025412e-07, + "loss": 0.3122, + "step": 51657 + }, + { + "epoch": 0.8926251036770805, + "grad_norm": 1.4376234466011562, + "learning_rate": 5.986367309015329e-07, + "loss": 0.4361, + "step": 51658 + }, + { + "epoch": 0.8926423831904894, + "grad_norm": 1.1625792808719297, + "learning_rate": 5.984460178463103e-07, + "loss": 0.3259, + "step": 51659 + }, + { + "epoch": 0.8926596627038983, + "grad_norm": 1.9279813676813036, + "learning_rate": 5.982553342374675e-07, + "loss": 0.5223, + "step": 51660 + }, + { + "epoch": 0.8926769422173072, + "grad_norm": 1.0642520143027303, + "learning_rate": 5.980646800756051e-07, + "loss": 0.326, + "step": 51661 + }, + { + "epoch": 0.8926942217307161, + "grad_norm": 1.4544763759727422, + "learning_rate": 5.978740553613182e-07, + "loss": 0.4609, + "step": 51662 + }, + { + "epoch": 0.892711501244125, + "grad_norm": 2.330766148748254, + "learning_rate": 5.976834600952031e-07, + "loss": 0.2511, + "step": 51663 + }, + { + "epoch": 0.8927287807575338, + "grad_norm": 1.3538406433512464, + "learning_rate": 5.974928942778602e-07, + "loss": 0.5136, + "step": 51664 + }, + { + "epoch": 0.8927460602709427, + "grad_norm": 0.7477149776556585, + "learning_rate": 5.973023579098814e-07, + "loss": 0.7021, + "step": 51665 + }, + { + "epoch": 0.8927633397843516, + "grad_norm": 1.4168238406574298, + "learning_rate": 5.971118509918694e-07, + "loss": 0.3746, + "step": 51666 + }, + { + "epoch": 0.8927806192977605, + "grad_norm": 0.9502584773460054, + "learning_rate": 5.96921373524415e-07, + "loss": 0.2218, + "step": 51667 + }, + { + "epoch": 0.8927978988111694, + "grad_norm": 1.5198721615978399, + "learning_rate": 5.967309255081177e-07, + "loss": 0.4945, + "step": 51668 + }, + { + "epoch": 0.8928151783245784, + "grad_norm": 1.510845974362475, + "learning_rate": 5.965405069435736e-07, + "loss": 0.4554, + "step": 51669 + }, + { + "epoch": 0.8928324578379873, + "grad_norm": 1.2162189088478006, + "learning_rate": 5.963501178313813e-07, + "loss": 0.3615, + "step": 51670 + }, + { + "epoch": 0.8928497373513962, + "grad_norm": 0.889767175314938, + "learning_rate": 5.961597581721334e-07, + "loss": 0.3818, + "step": 51671 + }, + { + "epoch": 0.8928670168648051, + "grad_norm": 1.0679630036336605, + "learning_rate": 5.959694279664285e-07, + "loss": 0.4099, + "step": 51672 + }, + { + "epoch": 0.892884296378214, + "grad_norm": 1.659056833462288, + "learning_rate": 5.957791272148616e-07, + "loss": 0.2469, + "step": 51673 + }, + { + "epoch": 0.8929015758916229, + "grad_norm": 1.31984100557232, + "learning_rate": 5.95588855918029e-07, + "loss": 0.3884, + "step": 51674 + }, + { + "epoch": 0.8929188554050318, + "grad_norm": 0.9115603520195105, + "learning_rate": 5.953986140765277e-07, + "loss": 0.256, + "step": 51675 + }, + { + "epoch": 0.8929361349184407, + "grad_norm": 0.6011923746383165, + "learning_rate": 5.952084016909521e-07, + "loss": 0.4855, + "step": 51676 + }, + { + "epoch": 0.8929534144318496, + "grad_norm": 1.373586083961368, + "learning_rate": 5.950182187618992e-07, + "loss": 0.2848, + "step": 51677 + }, + { + "epoch": 0.8929706939452585, + "grad_norm": 1.341406477219085, + "learning_rate": 5.948280652899629e-07, + "loss": 0.2968, + "step": 51678 + }, + { + "epoch": 0.8929879734586674, + "grad_norm": 1.2502359310172453, + "learning_rate": 5.946379412757397e-07, + "loss": 0.3277, + "step": 51679 + }, + { + "epoch": 0.8930052529720763, + "grad_norm": 1.8392184211524143, + "learning_rate": 5.944478467198245e-07, + "loss": 0.414, + "step": 51680 + }, + { + "epoch": 0.8930225324854852, + "grad_norm": 1.1858926941788221, + "learning_rate": 5.942577816228156e-07, + "loss": 0.3474, + "step": 51681 + }, + { + "epoch": 0.8930398119988942, + "grad_norm": 0.8595061102114765, + "learning_rate": 5.94067745985305e-07, + "loss": 0.4145, + "step": 51682 + }, + { + "epoch": 0.8930570915123031, + "grad_norm": 1.1196319145247489, + "learning_rate": 5.93877739807891e-07, + "loss": 0.1943, + "step": 51683 + }, + { + "epoch": 0.893074371025712, + "grad_norm": 1.4462772596403843, + "learning_rate": 5.936877630911653e-07, + "loss": 0.3403, + "step": 51684 + }, + { + "epoch": 0.8930916505391209, + "grad_norm": 1.6281915320904996, + "learning_rate": 5.93497815835723e-07, + "loss": 0.3356, + "step": 51685 + }, + { + "epoch": 0.8931089300525297, + "grad_norm": 0.9090901802743442, + "learning_rate": 5.933078980421625e-07, + "loss": 0.3251, + "step": 51686 + }, + { + "epoch": 0.8931262095659386, + "grad_norm": 0.961880634641068, + "learning_rate": 5.931180097110745e-07, + "loss": 0.3965, + "step": 51687 + }, + { + "epoch": 0.8931434890793475, + "grad_norm": 1.4152608387867167, + "learning_rate": 5.929281508430585e-07, + "loss": 0.3378, + "step": 51688 + }, + { + "epoch": 0.8931607685927564, + "grad_norm": 1.039005727005954, + "learning_rate": 5.927383214387039e-07, + "loss": 0.2989, + "step": 51689 + }, + { + "epoch": 0.8931780481061653, + "grad_norm": 2.1129248799383307, + "learning_rate": 5.925485214986093e-07, + "loss": 0.2484, + "step": 51690 + }, + { + "epoch": 0.8931953276195742, + "grad_norm": 1.5987194834168719, + "learning_rate": 5.923587510233653e-07, + "loss": 0.2614, + "step": 51691 + }, + { + "epoch": 0.8932126071329831, + "grad_norm": 2.116717299669502, + "learning_rate": 5.921690100135713e-07, + "loss": 0.5291, + "step": 51692 + }, + { + "epoch": 0.893229886646392, + "grad_norm": 1.2259647279057966, + "learning_rate": 5.91979298469818e-07, + "loss": 0.402, + "step": 51693 + }, + { + "epoch": 0.8932471661598009, + "grad_norm": 1.1513663161107421, + "learning_rate": 5.917896163927017e-07, + "loss": 0.3564, + "step": 51694 + }, + { + "epoch": 0.8932644456732098, + "grad_norm": 1.9975262156689308, + "learning_rate": 5.915999637828151e-07, + "loss": 0.2424, + "step": 51695 + }, + { + "epoch": 0.8932817251866187, + "grad_norm": 0.8674453549954524, + "learning_rate": 5.914103406407512e-07, + "loss": 0.226, + "step": 51696 + }, + { + "epoch": 0.8932990047000277, + "grad_norm": 1.5365959107088902, + "learning_rate": 5.91220746967105e-07, + "loss": 0.3783, + "step": 51697 + }, + { + "epoch": 0.8933162842134366, + "grad_norm": 1.3049934665526375, + "learning_rate": 5.910311827624704e-07, + "loss": 0.1675, + "step": 51698 + }, + { + "epoch": 0.8933335637268455, + "grad_norm": 1.3103543395528632, + "learning_rate": 5.908416480274426e-07, + "loss": 0.3004, + "step": 51699 + }, + { + "epoch": 0.8933508432402544, + "grad_norm": 1.1312805542563897, + "learning_rate": 5.906521427626122e-07, + "loss": 0.2658, + "step": 51700 + }, + { + "epoch": 0.8933681227536633, + "grad_norm": 1.3049645926072424, + "learning_rate": 5.904626669685764e-07, + "loss": 0.4409, + "step": 51701 + }, + { + "epoch": 0.8933854022670722, + "grad_norm": 0.8165735871256681, + "learning_rate": 5.902732206459249e-07, + "loss": 0.6328, + "step": 51702 + }, + { + "epoch": 0.8934026817804811, + "grad_norm": 1.8522534704923144, + "learning_rate": 5.900838037952527e-07, + "loss": 0.4358, + "step": 51703 + }, + { + "epoch": 0.89341996129389, + "grad_norm": 2.001005460114801, + "learning_rate": 5.89894416417155e-07, + "loss": 0.3448, + "step": 51704 + }, + { + "epoch": 0.8934372408072989, + "grad_norm": 1.1416205675373536, + "learning_rate": 5.89705058512221e-07, + "loss": 0.3478, + "step": 51705 + }, + { + "epoch": 0.8934545203207078, + "grad_norm": 1.3923376347795247, + "learning_rate": 5.895157300810472e-07, + "loss": 0.2368, + "step": 51706 + }, + { + "epoch": 0.8934717998341166, + "grad_norm": 1.5931152371277673, + "learning_rate": 5.893264311242253e-07, + "loss": 0.33, + "step": 51707 + }, + { + "epoch": 0.8934890793475255, + "grad_norm": 1.3492051558223876, + "learning_rate": 5.891371616423469e-07, + "loss": 0.3576, + "step": 51708 + }, + { + "epoch": 0.8935063588609344, + "grad_norm": 1.1815336327147674, + "learning_rate": 5.889479216360061e-07, + "loss": 0.5209, + "step": 51709 + }, + { + "epoch": 0.8935236383743433, + "grad_norm": 1.0010514518668545, + "learning_rate": 5.887587111057979e-07, + "loss": 0.3476, + "step": 51710 + }, + { + "epoch": 0.8935409178877523, + "grad_norm": 0.5633675255472296, + "learning_rate": 5.885695300523109e-07, + "loss": 0.6595, + "step": 51711 + }, + { + "epoch": 0.8935581974011612, + "grad_norm": 1.1092937563109084, + "learning_rate": 5.883803784761399e-07, + "loss": 0.4269, + "step": 51712 + }, + { + "epoch": 0.8935754769145701, + "grad_norm": 1.1273313648771404, + "learning_rate": 5.881912563778768e-07, + "loss": 0.3859, + "step": 51713 + }, + { + "epoch": 0.893592756427979, + "grad_norm": 1.392463749096724, + "learning_rate": 5.880021637581135e-07, + "loss": 0.3879, + "step": 51714 + }, + { + "epoch": 0.8936100359413879, + "grad_norm": 0.7344098033362415, + "learning_rate": 5.878131006174437e-07, + "loss": 0.2523, + "step": 51715 + }, + { + "epoch": 0.8936273154547968, + "grad_norm": 0.861487195508329, + "learning_rate": 5.876240669564571e-07, + "loss": 0.1342, + "step": 51716 + }, + { + "epoch": 0.8936445949682057, + "grad_norm": 0.9471625755254507, + "learning_rate": 5.874350627757486e-07, + "loss": 0.2155, + "step": 51717 + }, + { + "epoch": 0.8936618744816146, + "grad_norm": 0.6728541901714897, + "learning_rate": 5.872460880759079e-07, + "loss": 0.7998, + "step": 51718 + }, + { + "epoch": 0.8936791539950235, + "grad_norm": 1.2202545204230644, + "learning_rate": 5.870571428575278e-07, + "loss": 0.3589, + "step": 51719 + }, + { + "epoch": 0.8936964335084324, + "grad_norm": 1.4252650037871089, + "learning_rate": 5.868682271212e-07, + "loss": 0.3194, + "step": 51720 + }, + { + "epoch": 0.8937137130218413, + "grad_norm": 1.7501253052719201, + "learning_rate": 5.866793408675175e-07, + "loss": 0.3921, + "step": 51721 + }, + { + "epoch": 0.8937309925352502, + "grad_norm": 1.1360595729103353, + "learning_rate": 5.864904840970698e-07, + "loss": 0.3923, + "step": 51722 + }, + { + "epoch": 0.8937482720486591, + "grad_norm": 1.1285813266904452, + "learning_rate": 5.863016568104496e-07, + "loss": 0.4775, + "step": 51723 + }, + { + "epoch": 0.893765551562068, + "grad_norm": 2.6286858120247696, + "learning_rate": 5.861128590082476e-07, + "loss": 0.2957, + "step": 51724 + }, + { + "epoch": 0.893782831075477, + "grad_norm": 0.9008899450417024, + "learning_rate": 5.859240906910558e-07, + "loss": 0.3562, + "step": 51725 + }, + { + "epoch": 0.8938001105888859, + "grad_norm": 1.1077344455187432, + "learning_rate": 5.857353518594655e-07, + "loss": 0.3118, + "step": 51726 + }, + { + "epoch": 0.8938173901022948, + "grad_norm": 0.7273541732477311, + "learning_rate": 5.855466425140666e-07, + "loss": 0.6066, + "step": 51727 + }, + { + "epoch": 0.8938346696157036, + "grad_norm": 1.3435208917491805, + "learning_rate": 5.853579626554528e-07, + "loss": 0.2868, + "step": 51728 + }, + { + "epoch": 0.8938519491291125, + "grad_norm": 1.36822785212985, + "learning_rate": 5.851693122842117e-07, + "loss": 0.3456, + "step": 51729 + }, + { + "epoch": 0.8938692286425214, + "grad_norm": 0.7811989159224129, + "learning_rate": 5.849806914009371e-07, + "loss": 0.2899, + "step": 51730 + }, + { + "epoch": 0.8938865081559303, + "grad_norm": 0.9844999867966236, + "learning_rate": 5.847921000062162e-07, + "loss": 0.4114, + "step": 51731 + }, + { + "epoch": 0.8939037876693392, + "grad_norm": 0.8240978534151998, + "learning_rate": 5.846035381006442e-07, + "loss": 0.4146, + "step": 51732 + }, + { + "epoch": 0.8939210671827481, + "grad_norm": 1.0989301211582558, + "learning_rate": 5.844150056848085e-07, + "loss": 0.2263, + "step": 51733 + }, + { + "epoch": 0.893938346696157, + "grad_norm": 1.5518859661996263, + "learning_rate": 5.842265027593019e-07, + "loss": 0.633, + "step": 51734 + }, + { + "epoch": 0.8939556262095659, + "grad_norm": 1.002160753222506, + "learning_rate": 5.840380293247128e-07, + "loss": 0.524, + "step": 51735 + }, + { + "epoch": 0.8939729057229748, + "grad_norm": 0.9246629925949954, + "learning_rate": 5.838495853816318e-07, + "loss": 0.1586, + "step": 51736 + }, + { + "epoch": 0.8939901852363837, + "grad_norm": 1.5486738871567602, + "learning_rate": 5.836611709306483e-07, + "loss": 0.3222, + "step": 51737 + }, + { + "epoch": 0.8940074647497926, + "grad_norm": 1.7823020819071085, + "learning_rate": 5.834727859723544e-07, + "loss": 0.3801, + "step": 51738 + }, + { + "epoch": 0.8940247442632016, + "grad_norm": 1.2924504195692668, + "learning_rate": 5.832844305073415e-07, + "loss": 0.3075, + "step": 51739 + }, + { + "epoch": 0.8940420237766105, + "grad_norm": 1.728818850388566, + "learning_rate": 5.83096104536195e-07, + "loss": 0.2051, + "step": 51740 + }, + { + "epoch": 0.8940593032900194, + "grad_norm": 1.315205657306881, + "learning_rate": 5.829078080595085e-07, + "loss": 0.3481, + "step": 51741 + }, + { + "epoch": 0.8940765828034283, + "grad_norm": 1.0533775186633072, + "learning_rate": 5.827195410778696e-07, + "loss": 0.5357, + "step": 51742 + }, + { + "epoch": 0.8940938623168372, + "grad_norm": 1.0888365740464563, + "learning_rate": 5.825313035918678e-07, + "loss": 0.3445, + "step": 51743 + }, + { + "epoch": 0.8941111418302461, + "grad_norm": 1.1107622285919938, + "learning_rate": 5.823430956020948e-07, + "loss": 0.4341, + "step": 51744 + }, + { + "epoch": 0.894128421343655, + "grad_norm": 1.3912718020770372, + "learning_rate": 5.8215491710914e-07, + "loss": 0.2894, + "step": 51745 + }, + { + "epoch": 0.8941457008570639, + "grad_norm": 1.0436065817655535, + "learning_rate": 5.81966768113591e-07, + "loss": 0.3629, + "step": 51746 + }, + { + "epoch": 0.8941629803704728, + "grad_norm": 1.0167254642573906, + "learning_rate": 5.81778648616037e-07, + "loss": 0.2858, + "step": 51747 + }, + { + "epoch": 0.8941802598838817, + "grad_norm": 1.5390388496955227, + "learning_rate": 5.815905586170689e-07, + "loss": 0.1876, + "step": 51748 + }, + { + "epoch": 0.8941975393972905, + "grad_norm": 1.3844125535727518, + "learning_rate": 5.814024981172738e-07, + "loss": 0.388, + "step": 51749 + }, + { + "epoch": 0.8942148189106994, + "grad_norm": 1.1369679029588613, + "learning_rate": 5.812144671172438e-07, + "loss": 0.3097, + "step": 51750 + }, + { + "epoch": 0.8942320984241083, + "grad_norm": 1.062317959876621, + "learning_rate": 5.810264656175635e-07, + "loss": 0.4742, + "step": 51751 + }, + { + "epoch": 0.8942493779375172, + "grad_norm": 2.0980505572180737, + "learning_rate": 5.808384936188271e-07, + "loss": 0.1795, + "step": 51752 + }, + { + "epoch": 0.8942666574509262, + "grad_norm": 1.6797767938827288, + "learning_rate": 5.806505511216176e-07, + "loss": 0.5761, + "step": 51753 + }, + { + "epoch": 0.8942839369643351, + "grad_norm": 0.8249767534579634, + "learning_rate": 5.804626381265266e-07, + "loss": 0.4376, + "step": 51754 + }, + { + "epoch": 0.894301216477744, + "grad_norm": 1.7102369932405046, + "learning_rate": 5.802747546341447e-07, + "loss": 0.284, + "step": 51755 + }, + { + "epoch": 0.8943184959911529, + "grad_norm": 1.3017716114901114, + "learning_rate": 5.80086900645056e-07, + "loss": 0.1913, + "step": 51756 + }, + { + "epoch": 0.8943357755045618, + "grad_norm": 1.5203231445234955, + "learning_rate": 5.798990761598533e-07, + "loss": 0.2881, + "step": 51757 + }, + { + "epoch": 0.8943530550179707, + "grad_norm": 1.3846916797279223, + "learning_rate": 5.797112811791206e-07, + "loss": 0.4765, + "step": 51758 + }, + { + "epoch": 0.8943703345313796, + "grad_norm": 0.9653394455845324, + "learning_rate": 5.795235157034473e-07, + "loss": 0.512, + "step": 51759 + }, + { + "epoch": 0.8943876140447885, + "grad_norm": 1.3423602890840658, + "learning_rate": 5.793357797334232e-07, + "loss": 0.3455, + "step": 51760 + }, + { + "epoch": 0.8944048935581974, + "grad_norm": 0.9817498845704435, + "learning_rate": 5.791480732696364e-07, + "loss": 0.4801, + "step": 51761 + }, + { + "epoch": 0.8944221730716063, + "grad_norm": 1.9836489969998963, + "learning_rate": 5.789603963126733e-07, + "loss": 0.4539, + "step": 51762 + }, + { + "epoch": 0.8944394525850152, + "grad_norm": 1.0962391554390207, + "learning_rate": 5.787727488631223e-07, + "loss": 0.3106, + "step": 51763 + }, + { + "epoch": 0.8944567320984241, + "grad_norm": 1.1051061690224906, + "learning_rate": 5.785851309215707e-07, + "loss": 0.4695, + "step": 51764 + }, + { + "epoch": 0.894474011611833, + "grad_norm": 1.492031241052852, + "learning_rate": 5.783975424886057e-07, + "loss": 0.248, + "step": 51765 + }, + { + "epoch": 0.894491291125242, + "grad_norm": 1.1902909270906055, + "learning_rate": 5.78209983564818e-07, + "loss": 0.5293, + "step": 51766 + }, + { + "epoch": 0.8945085706386509, + "grad_norm": 1.912853732210113, + "learning_rate": 5.780224541507906e-07, + "loss": 0.4878, + "step": 51767 + }, + { + "epoch": 0.8945258501520598, + "grad_norm": 1.4085500578526629, + "learning_rate": 5.77834954247114e-07, + "loss": 0.2685, + "step": 51768 + }, + { + "epoch": 0.8945431296654687, + "grad_norm": 1.2929814897037326, + "learning_rate": 5.776474838543733e-07, + "loss": 0.4588, + "step": 51769 + }, + { + "epoch": 0.8945604091788775, + "grad_norm": 2.078021372593731, + "learning_rate": 5.77460042973158e-07, + "loss": 0.5, + "step": 51770 + }, + { + "epoch": 0.8945776886922864, + "grad_norm": 0.8505984175771738, + "learning_rate": 5.772726316040512e-07, + "loss": 0.2739, + "step": 51771 + }, + { + "epoch": 0.8945949682056953, + "grad_norm": 0.9264446793175181, + "learning_rate": 5.770852497476454e-07, + "loss": 0.3552, + "step": 51772 + }, + { + "epoch": 0.8946122477191042, + "grad_norm": 2.738947465617844, + "learning_rate": 5.768978974045236e-07, + "loss": 0.181, + "step": 51773 + }, + { + "epoch": 0.8946295272325131, + "grad_norm": 1.2499873938931365, + "learning_rate": 5.767105745752755e-07, + "loss": 0.3796, + "step": 51774 + }, + { + "epoch": 0.894646806745922, + "grad_norm": 1.9108464458607013, + "learning_rate": 5.765232812604838e-07, + "loss": 0.2808, + "step": 51775 + }, + { + "epoch": 0.8946640862593309, + "grad_norm": 1.306821217312961, + "learning_rate": 5.763360174607391e-07, + "loss": 0.2736, + "step": 51776 + }, + { + "epoch": 0.8946813657727398, + "grad_norm": 1.5329008150640542, + "learning_rate": 5.761487831766244e-07, + "loss": 0.3913, + "step": 51777 + }, + { + "epoch": 0.8946986452861487, + "grad_norm": 1.238097777418866, + "learning_rate": 5.759615784087281e-07, + "loss": 0.4303, + "step": 51778 + }, + { + "epoch": 0.8947159247995576, + "grad_norm": 1.1767568283335403, + "learning_rate": 5.757744031576384e-07, + "loss": 0.4203, + "step": 51779 + }, + { + "epoch": 0.8947332043129665, + "grad_norm": 1.1031745604944043, + "learning_rate": 5.755872574239374e-07, + "loss": 0.3873, + "step": 51780 + }, + { + "epoch": 0.8947504838263755, + "grad_norm": 1.3212184351344485, + "learning_rate": 5.754001412082144e-07, + "loss": 0.4383, + "step": 51781 + }, + { + "epoch": 0.8947677633397844, + "grad_norm": 1.4272376288069581, + "learning_rate": 5.752130545110535e-07, + "loss": 0.1323, + "step": 51782 + }, + { + "epoch": 0.8947850428531933, + "grad_norm": 1.3207854962070689, + "learning_rate": 5.750259973330419e-07, + "loss": 0.1722, + "step": 51783 + }, + { + "epoch": 0.8948023223666022, + "grad_norm": 1.5697186408312618, + "learning_rate": 5.748389696747648e-07, + "loss": 0.3542, + "step": 51784 + }, + { + "epoch": 0.8948196018800111, + "grad_norm": 1.7194211601474867, + "learning_rate": 5.746519715368093e-07, + "loss": 0.2808, + "step": 51785 + }, + { + "epoch": 0.89483688139342, + "grad_norm": 1.9766879598093736, + "learning_rate": 5.744650029197607e-07, + "loss": 0.335, + "step": 51786 + }, + { + "epoch": 0.8948541609068289, + "grad_norm": 1.5581305653020316, + "learning_rate": 5.742780638242018e-07, + "loss": 0.4616, + "step": 51787 + }, + { + "epoch": 0.8948714404202378, + "grad_norm": 1.356573376902934, + "learning_rate": 5.74091154250721e-07, + "loss": 0.3754, + "step": 51788 + }, + { + "epoch": 0.8948887199336467, + "grad_norm": 1.4591830246468498, + "learning_rate": 5.739042741999034e-07, + "loss": 0.388, + "step": 51789 + }, + { + "epoch": 0.8949059994470556, + "grad_norm": 0.9950129396618417, + "learning_rate": 5.737174236723353e-07, + "loss": 0.3775, + "step": 51790 + }, + { + "epoch": 0.8949232789604644, + "grad_norm": 0.8172970453657857, + "learning_rate": 5.735306026685983e-07, + "loss": 0.2975, + "step": 51791 + }, + { + "epoch": 0.8949405584738733, + "grad_norm": 1.5073758720408033, + "learning_rate": 5.73343811189282e-07, + "loss": 0.3414, + "step": 51792 + }, + { + "epoch": 0.8949578379872822, + "grad_norm": 1.6344695145141517, + "learning_rate": 5.73157049234968e-07, + "loss": 0.32, + "step": 51793 + }, + { + "epoch": 0.8949751175006911, + "grad_norm": 2.0798417523072774, + "learning_rate": 5.729703168062417e-07, + "loss": 0.273, + "step": 51794 + }, + { + "epoch": 0.8949923970141, + "grad_norm": 1.741524882256476, + "learning_rate": 5.727836139036902e-07, + "loss": 0.4333, + "step": 51795 + }, + { + "epoch": 0.895009676527509, + "grad_norm": 1.371815465272653, + "learning_rate": 5.725969405278975e-07, + "loss": 0.4253, + "step": 51796 + }, + { + "epoch": 0.8950269560409179, + "grad_norm": 0.9471085636232703, + "learning_rate": 5.724102966794476e-07, + "loss": 0.4326, + "step": 51797 + }, + { + "epoch": 0.8950442355543268, + "grad_norm": 1.005661764715335, + "learning_rate": 5.722236823589234e-07, + "loss": 0.2486, + "step": 51798 + }, + { + "epoch": 0.8950615150677357, + "grad_norm": 1.562829631608167, + "learning_rate": 5.72037097566911e-07, + "loss": 0.4035, + "step": 51799 + }, + { + "epoch": 0.8950787945811446, + "grad_norm": 1.0042433712192875, + "learning_rate": 5.718505423039955e-07, + "loss": 0.2234, + "step": 51800 + }, + { + "epoch": 0.8950960740945535, + "grad_norm": 1.5905432296373345, + "learning_rate": 5.716640165707621e-07, + "loss": 0.4707, + "step": 51801 + }, + { + "epoch": 0.8951133536079624, + "grad_norm": 0.7464321376352275, + "learning_rate": 5.714775203677914e-07, + "loss": 0.7228, + "step": 51802 + }, + { + "epoch": 0.8951306331213713, + "grad_norm": 1.0965445669513307, + "learning_rate": 5.712910536956717e-07, + "loss": 0.3012, + "step": 51803 + }, + { + "epoch": 0.8951479126347802, + "grad_norm": 2.3704235948970775, + "learning_rate": 5.711046165549827e-07, + "loss": 0.2862, + "step": 51804 + }, + { + "epoch": 0.8951651921481891, + "grad_norm": 1.525796904146068, + "learning_rate": 5.709182089463117e-07, + "loss": 0.3453, + "step": 51805 + }, + { + "epoch": 0.895182471661598, + "grad_norm": 1.1704081836580162, + "learning_rate": 5.707318308702414e-07, + "loss": 0.2704, + "step": 51806 + }, + { + "epoch": 0.8951997511750069, + "grad_norm": 2.156062656159161, + "learning_rate": 5.70545482327356e-07, + "loss": 0.2472, + "step": 51807 + }, + { + "epoch": 0.8952170306884158, + "grad_norm": 0.7649312558151307, + "learning_rate": 5.703591633182392e-07, + "loss": 0.2283, + "step": 51808 + }, + { + "epoch": 0.8952343102018248, + "grad_norm": 2.531973129778582, + "learning_rate": 5.701728738434731e-07, + "loss": 0.3278, + "step": 51809 + }, + { + "epoch": 0.8952515897152337, + "grad_norm": 1.6896361415704648, + "learning_rate": 5.699866139036436e-07, + "loss": 0.2256, + "step": 51810 + }, + { + "epoch": 0.8952688692286426, + "grad_norm": 1.3758007812133075, + "learning_rate": 5.698003834993294e-07, + "loss": 0.4588, + "step": 51811 + }, + { + "epoch": 0.8952861487420514, + "grad_norm": 1.6246179363912658, + "learning_rate": 5.696141826311208e-07, + "loss": 0.3481, + "step": 51812 + }, + { + "epoch": 0.8953034282554603, + "grad_norm": 1.0917587915437146, + "learning_rate": 5.694280112995953e-07, + "loss": 0.3496, + "step": 51813 + }, + { + "epoch": 0.8953207077688692, + "grad_norm": 1.6977834246207837, + "learning_rate": 5.692418695053393e-07, + "loss": 0.3665, + "step": 51814 + }, + { + "epoch": 0.8953379872822781, + "grad_norm": 1.013195525432372, + "learning_rate": 5.69055757248933e-07, + "loss": 0.2951, + "step": 51815 + }, + { + "epoch": 0.895355266795687, + "grad_norm": 1.0614748994988306, + "learning_rate": 5.688696745309618e-07, + "loss": 0.2956, + "step": 51816 + }, + { + "epoch": 0.8953725463090959, + "grad_norm": 0.9769741597221875, + "learning_rate": 5.686836213520064e-07, + "loss": 0.2626, + "step": 51817 + }, + { + "epoch": 0.8953898258225048, + "grad_norm": 2.095520409578233, + "learning_rate": 5.684975977126517e-07, + "loss": 0.1731, + "step": 51818 + }, + { + "epoch": 0.8954071053359137, + "grad_norm": 1.903283551607714, + "learning_rate": 5.683116036134795e-07, + "loss": 0.4722, + "step": 51819 + }, + { + "epoch": 0.8954243848493226, + "grad_norm": 1.2416567050023501, + "learning_rate": 5.681256390550704e-07, + "loss": 0.1528, + "step": 51820 + }, + { + "epoch": 0.8954416643627315, + "grad_norm": 1.070793278045011, + "learning_rate": 5.679397040380108e-07, + "loss": 0.3784, + "step": 51821 + }, + { + "epoch": 0.8954589438761404, + "grad_norm": 1.1687383764529324, + "learning_rate": 5.677537985628801e-07, + "loss": 0.3362, + "step": 51822 + }, + { + "epoch": 0.8954762233895494, + "grad_norm": 1.6666622607929478, + "learning_rate": 5.675679226302599e-07, + "loss": 0.2647, + "step": 51823 + }, + { + "epoch": 0.8954935029029583, + "grad_norm": 0.9546409330543191, + "learning_rate": 5.673820762407344e-07, + "loss": 0.2864, + "step": 51824 + }, + { + "epoch": 0.8955107824163672, + "grad_norm": 2.1857787227903045, + "learning_rate": 5.671962593948866e-07, + "loss": 0.3939, + "step": 51825 + }, + { + "epoch": 0.8955280619297761, + "grad_norm": 1.4807231902406848, + "learning_rate": 5.670104720932956e-07, + "loss": 0.7002, + "step": 51826 + }, + { + "epoch": 0.895545341443185, + "grad_norm": 1.7177610100857421, + "learning_rate": 5.668247143365458e-07, + "loss": 0.3185, + "step": 51827 + }, + { + "epoch": 0.8955626209565939, + "grad_norm": 0.9537108322232545, + "learning_rate": 5.666389861252175e-07, + "loss": 0.4592, + "step": 51828 + }, + { + "epoch": 0.8955799004700028, + "grad_norm": 2.1494779967024304, + "learning_rate": 5.664532874598916e-07, + "loss": 0.2706, + "step": 51829 + }, + { + "epoch": 0.8955971799834117, + "grad_norm": 2.1817502828181765, + "learning_rate": 5.66267618341153e-07, + "loss": 0.3741, + "step": 51830 + }, + { + "epoch": 0.8956144594968206, + "grad_norm": 1.2077776358752876, + "learning_rate": 5.660819787695792e-07, + "loss": 0.4352, + "step": 51831 + }, + { + "epoch": 0.8956317390102295, + "grad_norm": 2.2270920382996406, + "learning_rate": 5.658963687457552e-07, + "loss": 0.4177, + "step": 51832 + }, + { + "epoch": 0.8956490185236384, + "grad_norm": 1.540830652875331, + "learning_rate": 5.657107882702595e-07, + "loss": 0.343, + "step": 51833 + }, + { + "epoch": 0.8956662980370472, + "grad_norm": 1.4808347524654875, + "learning_rate": 5.655252373436748e-07, + "loss": 0.5788, + "step": 51834 + }, + { + "epoch": 0.8956835775504561, + "grad_norm": 1.5697452120082815, + "learning_rate": 5.653397159665819e-07, + "loss": 0.4572, + "step": 51835 + }, + { + "epoch": 0.895700857063865, + "grad_norm": 0.9633281841784798, + "learning_rate": 5.651542241395624e-07, + "loss": 0.1352, + "step": 51836 + }, + { + "epoch": 0.895718136577274, + "grad_norm": 1.358870388561424, + "learning_rate": 5.649687618631972e-07, + "loss": 0.3176, + "step": 51837 + }, + { + "epoch": 0.8957354160906829, + "grad_norm": 1.373111004533711, + "learning_rate": 5.647833291380666e-07, + "loss": 0.2883, + "step": 51838 + }, + { + "epoch": 0.8957526956040918, + "grad_norm": 2.0230406600750475, + "learning_rate": 5.645979259647516e-07, + "loss": 0.3475, + "step": 51839 + }, + { + "epoch": 0.8957699751175007, + "grad_norm": 1.612612006690703, + "learning_rate": 5.644125523438326e-07, + "loss": 0.3066, + "step": 51840 + }, + { + "epoch": 0.8957872546309096, + "grad_norm": 1.7572595295231104, + "learning_rate": 5.642272082758915e-07, + "loss": 0.444, + "step": 51841 + }, + { + "epoch": 0.8958045341443185, + "grad_norm": 1.4178440389408231, + "learning_rate": 5.640418937615066e-07, + "loss": 0.4699, + "step": 51842 + }, + { + "epoch": 0.8958218136577274, + "grad_norm": 0.9363764990659397, + "learning_rate": 5.63856608801262e-07, + "loss": 0.2538, + "step": 51843 + }, + { + "epoch": 0.8958390931711363, + "grad_norm": 1.0125619641333057, + "learning_rate": 5.636713533957328e-07, + "loss": 0.3703, + "step": 51844 + }, + { + "epoch": 0.8958563726845452, + "grad_norm": 1.4184485329309469, + "learning_rate": 5.634861275455028e-07, + "loss": 0.3534, + "step": 51845 + }, + { + "epoch": 0.8958736521979541, + "grad_norm": 1.9534830053987264, + "learning_rate": 5.633009312511517e-07, + "loss": 0.2851, + "step": 51846 + }, + { + "epoch": 0.895890931711363, + "grad_norm": 0.9725668107788441, + "learning_rate": 5.631157645132601e-07, + "loss": 0.1545, + "step": 51847 + }, + { + "epoch": 0.8959082112247719, + "grad_norm": 1.3144359704041186, + "learning_rate": 5.629306273324076e-07, + "loss": 0.259, + "step": 51848 + }, + { + "epoch": 0.8959254907381808, + "grad_norm": 0.8047066402976154, + "learning_rate": 5.627455197091713e-07, + "loss": 0.3274, + "step": 51849 + }, + { + "epoch": 0.8959427702515897, + "grad_norm": 1.597591320815816, + "learning_rate": 5.625604416441355e-07, + "loss": 0.2176, + "step": 51850 + }, + { + "epoch": 0.8959600497649987, + "grad_norm": 0.947885055731189, + "learning_rate": 5.623753931378739e-07, + "loss": 0.1655, + "step": 51851 + }, + { + "epoch": 0.8959773292784076, + "grad_norm": 1.8192532514939852, + "learning_rate": 5.621903741909729e-07, + "loss": 0.1693, + "step": 51852 + }, + { + "epoch": 0.8959946087918165, + "grad_norm": 1.3410501615357981, + "learning_rate": 5.620053848040086e-07, + "loss": 0.5268, + "step": 51853 + }, + { + "epoch": 0.8960118883052254, + "grad_norm": 1.7453456976875148, + "learning_rate": 5.618204249775606e-07, + "loss": 0.467, + "step": 51854 + }, + { + "epoch": 0.8960291678186342, + "grad_norm": 0.9379662944690168, + "learning_rate": 5.616354947122082e-07, + "loss": 0.2162, + "step": 51855 + }, + { + "epoch": 0.8960464473320431, + "grad_norm": 1.370793022639602, + "learning_rate": 5.614505940085313e-07, + "loss": 0.32, + "step": 51856 + }, + { + "epoch": 0.896063726845452, + "grad_norm": 2.2445787573838483, + "learning_rate": 5.61265722867107e-07, + "loss": 0.4917, + "step": 51857 + }, + { + "epoch": 0.8960810063588609, + "grad_norm": 1.4777466454580475, + "learning_rate": 5.610808812885161e-07, + "loss": 0.7876, + "step": 51858 + }, + { + "epoch": 0.8960982858722698, + "grad_norm": 0.8774150920558668, + "learning_rate": 5.608960692733389e-07, + "loss": 0.3282, + "step": 51859 + }, + { + "epoch": 0.8961155653856787, + "grad_norm": 1.1706313251872336, + "learning_rate": 5.607112868221498e-07, + "loss": 0.3651, + "step": 51860 + }, + { + "epoch": 0.8961328448990876, + "grad_norm": 0.9892248527014387, + "learning_rate": 5.605265339355337e-07, + "loss": 0.2976, + "step": 51861 + }, + { + "epoch": 0.8961501244124965, + "grad_norm": 1.137480399573432, + "learning_rate": 5.603418106140624e-07, + "loss": 0.3389, + "step": 51862 + }, + { + "epoch": 0.8961674039259054, + "grad_norm": 0.8800319316158091, + "learning_rate": 5.601571168583198e-07, + "loss": 0.2623, + "step": 51863 + }, + { + "epoch": 0.8961846834393143, + "grad_norm": 1.0137961892412242, + "learning_rate": 5.59972452668881e-07, + "loss": 0.4406, + "step": 51864 + }, + { + "epoch": 0.8962019629527233, + "grad_norm": 0.9671136706321832, + "learning_rate": 5.597878180463278e-07, + "loss": 0.405, + "step": 51865 + }, + { + "epoch": 0.8962192424661322, + "grad_norm": 1.2141511316462978, + "learning_rate": 5.596032129912354e-07, + "loss": 0.3217, + "step": 51866 + }, + { + "epoch": 0.8962365219795411, + "grad_norm": 2.0221290837904826, + "learning_rate": 5.594186375041855e-07, + "loss": 0.2283, + "step": 51867 + }, + { + "epoch": 0.89625380149295, + "grad_norm": 2.492828270908847, + "learning_rate": 5.592340915857508e-07, + "loss": 0.3397, + "step": 51868 + }, + { + "epoch": 0.8962710810063589, + "grad_norm": 1.3319386078617224, + "learning_rate": 5.590495752365132e-07, + "loss": 0.3036, + "step": 51869 + }, + { + "epoch": 0.8962883605197678, + "grad_norm": 1.488348382794342, + "learning_rate": 5.588650884570512e-07, + "loss": 0.2855, + "step": 51870 + }, + { + "epoch": 0.8963056400331767, + "grad_norm": 1.4954584808694305, + "learning_rate": 5.586806312479398e-07, + "loss": 0.203, + "step": 51871 + }, + { + "epoch": 0.8963229195465856, + "grad_norm": 1.653081626766746, + "learning_rate": 5.584962036097597e-07, + "loss": 0.4389, + "step": 51872 + }, + { + "epoch": 0.8963401990599945, + "grad_norm": 1.0850713376184637, + "learning_rate": 5.583118055430858e-07, + "loss": 0.7942, + "step": 51873 + }, + { + "epoch": 0.8963574785734034, + "grad_norm": 1.515537548666605, + "learning_rate": 5.581274370484957e-07, + "loss": 0.2882, + "step": 51874 + }, + { + "epoch": 0.8963747580868123, + "grad_norm": 0.924105112868563, + "learning_rate": 5.579430981265699e-07, + "loss": 0.3621, + "step": 51875 + }, + { + "epoch": 0.8963920376002211, + "grad_norm": 0.957133812780094, + "learning_rate": 5.577587887778835e-07, + "loss": 0.1641, + "step": 51876 + }, + { + "epoch": 0.89640931711363, + "grad_norm": 0.8579087000685734, + "learning_rate": 5.575745090030138e-07, + "loss": 0.834, + "step": 51877 + }, + { + "epoch": 0.8964265966270389, + "grad_norm": 1.8716051603685564, + "learning_rate": 5.573902588025392e-07, + "loss": 0.2151, + "step": 51878 + }, + { + "epoch": 0.8964438761404478, + "grad_norm": 1.5737902015156375, + "learning_rate": 5.57206038177035e-07, + "loss": 0.301, + "step": 51879 + }, + { + "epoch": 0.8964611556538568, + "grad_norm": 1.3504658667423206, + "learning_rate": 5.570218471270783e-07, + "loss": 0.2158, + "step": 51880 + }, + { + "epoch": 0.8964784351672657, + "grad_norm": 1.6405075898391333, + "learning_rate": 5.568376856532498e-07, + "loss": 0.3241, + "step": 51881 + }, + { + "epoch": 0.8964957146806746, + "grad_norm": 1.1418016905820876, + "learning_rate": 5.566535537561201e-07, + "loss": 0.6647, + "step": 51882 + }, + { + "epoch": 0.8965129941940835, + "grad_norm": 0.8273271684608191, + "learning_rate": 5.564694514362723e-07, + "loss": 0.1825, + "step": 51883 + }, + { + "epoch": 0.8965302737074924, + "grad_norm": 1.3287466083920378, + "learning_rate": 5.562853786942768e-07, + "loss": 0.3027, + "step": 51884 + }, + { + "epoch": 0.8965475532209013, + "grad_norm": 0.9916692402750789, + "learning_rate": 5.561013355307144e-07, + "loss": 0.2193, + "step": 51885 + }, + { + "epoch": 0.8965648327343102, + "grad_norm": 1.3355889578396847, + "learning_rate": 5.5591732194616e-07, + "loss": 0.2787, + "step": 51886 + }, + { + "epoch": 0.8965821122477191, + "grad_norm": 1.6748369501797495, + "learning_rate": 5.557333379411911e-07, + "loss": 0.421, + "step": 51887 + }, + { + "epoch": 0.896599391761128, + "grad_norm": 0.8898163752138794, + "learning_rate": 5.555493835163817e-07, + "loss": 0.4268, + "step": 51888 + }, + { + "epoch": 0.8966166712745369, + "grad_norm": 0.4883803963034553, + "learning_rate": 5.553654586723112e-07, + "loss": 0.4301, + "step": 51889 + }, + { + "epoch": 0.8966339507879458, + "grad_norm": 1.0222064031190845, + "learning_rate": 5.551815634095548e-07, + "loss": 0.3268, + "step": 51890 + }, + { + "epoch": 0.8966512303013547, + "grad_norm": 1.1896656857113097, + "learning_rate": 5.549976977286853e-07, + "loss": 0.5417, + "step": 51891 + }, + { + "epoch": 0.8966685098147636, + "grad_norm": 1.4511157860335921, + "learning_rate": 5.5481386163028e-07, + "loss": 0.3185, + "step": 51892 + }, + { + "epoch": 0.8966857893281726, + "grad_norm": 2.1383517726085586, + "learning_rate": 5.546300551149165e-07, + "loss": 0.3205, + "step": 51893 + }, + { + "epoch": 0.8967030688415815, + "grad_norm": 1.3337723493305078, + "learning_rate": 5.544462781831705e-07, + "loss": 0.5831, + "step": 51894 + }, + { + "epoch": 0.8967203483549904, + "grad_norm": 1.2532447666861293, + "learning_rate": 5.542625308356153e-07, + "loss": 0.3943, + "step": 51895 + }, + { + "epoch": 0.8967376278683993, + "grad_norm": 1.8348282146143888, + "learning_rate": 5.540788130728292e-07, + "loss": 0.1894, + "step": 51896 + }, + { + "epoch": 0.8967549073818081, + "grad_norm": 1.5666212029968067, + "learning_rate": 5.538951248953839e-07, + "loss": 0.4593, + "step": 51897 + }, + { + "epoch": 0.896772186895217, + "grad_norm": 2.228033499771111, + "learning_rate": 5.537114663038579e-07, + "loss": 0.3658, + "step": 51898 + }, + { + "epoch": 0.8967894664086259, + "grad_norm": 1.552296062803741, + "learning_rate": 5.535278372988251e-07, + "loss": 0.3224, + "step": 51899 + }, + { + "epoch": 0.8968067459220348, + "grad_norm": 1.9788318005875372, + "learning_rate": 5.533442378808629e-07, + "loss": 0.5803, + "step": 51900 + }, + { + "epoch": 0.8968240254354437, + "grad_norm": 3.702318669737639, + "learning_rate": 5.531606680505441e-07, + "loss": 0.4023, + "step": 51901 + }, + { + "epoch": 0.8968413049488526, + "grad_norm": 0.9796322664674072, + "learning_rate": 5.529771278084428e-07, + "loss": 0.55, + "step": 51902 + }, + { + "epoch": 0.8968585844622615, + "grad_norm": 1.4280635967766104, + "learning_rate": 5.527936171551351e-07, + "loss": 0.4652, + "step": 51903 + }, + { + "epoch": 0.8968758639756704, + "grad_norm": 1.5276674534788768, + "learning_rate": 5.526101360911951e-07, + "loss": 0.247, + "step": 51904 + }, + { + "epoch": 0.8968931434890793, + "grad_norm": 1.5711956152580626, + "learning_rate": 5.524266846172e-07, + "loss": 0.3322, + "step": 51905 + }, + { + "epoch": 0.8969104230024882, + "grad_norm": 0.9939637818491565, + "learning_rate": 5.522432627337215e-07, + "loss": 0.2284, + "step": 51906 + }, + { + "epoch": 0.8969277025158972, + "grad_norm": 1.930686242576058, + "learning_rate": 5.52059870441336e-07, + "loss": 0.3702, + "step": 51907 + }, + { + "epoch": 0.8969449820293061, + "grad_norm": 1.593759742307114, + "learning_rate": 5.518765077406152e-07, + "loss": 0.6543, + "step": 51908 + }, + { + "epoch": 0.896962261542715, + "grad_norm": 1.0328802971522197, + "learning_rate": 5.516931746321363e-07, + "loss": 0.3347, + "step": 51909 + }, + { + "epoch": 0.8969795410561239, + "grad_norm": 0.9248858271853128, + "learning_rate": 5.515098711164734e-07, + "loss": 0.4401, + "step": 51910 + }, + { + "epoch": 0.8969968205695328, + "grad_norm": 1.1590856618481038, + "learning_rate": 5.513265971941973e-07, + "loss": 0.3403, + "step": 51911 + }, + { + "epoch": 0.8970141000829417, + "grad_norm": 2.3433587621562952, + "learning_rate": 5.511433528658872e-07, + "loss": 0.4098, + "step": 51912 + }, + { + "epoch": 0.8970313795963506, + "grad_norm": 1.4451029648538443, + "learning_rate": 5.509601381321117e-07, + "loss": 0.2907, + "step": 51913 + }, + { + "epoch": 0.8970486591097595, + "grad_norm": 1.9574923822145631, + "learning_rate": 5.507769529934481e-07, + "loss": 0.3873, + "step": 51914 + }, + { + "epoch": 0.8970659386231684, + "grad_norm": 1.068752958491855, + "learning_rate": 5.505937974504682e-07, + "loss": 0.2647, + "step": 51915 + }, + { + "epoch": 0.8970832181365773, + "grad_norm": 2.0601524649147205, + "learning_rate": 5.504106715037494e-07, + "loss": 0.2812, + "step": 51916 + }, + { + "epoch": 0.8971004976499862, + "grad_norm": 0.9906043937851944, + "learning_rate": 5.5022757515386e-07, + "loss": 0.3195, + "step": 51917 + }, + { + "epoch": 0.897117777163395, + "grad_norm": 1.6155922923770574, + "learning_rate": 5.500445084013773e-07, + "loss": 0.5337, + "step": 51918 + }, + { + "epoch": 0.8971350566768039, + "grad_norm": 0.8178102908811007, + "learning_rate": 5.498614712468731e-07, + "loss": 0.7449, + "step": 51919 + }, + { + "epoch": 0.8971523361902128, + "grad_norm": 0.9804665233341263, + "learning_rate": 5.496784636909192e-07, + "loss": 0.1222, + "step": 51920 + }, + { + "epoch": 0.8971696157036217, + "grad_norm": 1.2670939225610927, + "learning_rate": 5.494954857340939e-07, + "loss": 0.452, + "step": 51921 + }, + { + "epoch": 0.8971868952170307, + "grad_norm": 1.1907342982565816, + "learning_rate": 5.493125373769637e-07, + "loss": 0.2539, + "step": 51922 + }, + { + "epoch": 0.8972041747304396, + "grad_norm": 1.4679981303845688, + "learning_rate": 5.491296186201078e-07, + "loss": 0.3626, + "step": 51923 + }, + { + "epoch": 0.8972214542438485, + "grad_norm": 0.7607105760748685, + "learning_rate": 5.489467294640937e-07, + "loss": 0.7428, + "step": 51924 + }, + { + "epoch": 0.8972387337572574, + "grad_norm": 1.216919875324367, + "learning_rate": 5.487638699094977e-07, + "loss": 0.3684, + "step": 51925 + }, + { + "epoch": 0.8972560132706663, + "grad_norm": 1.7644241422057654, + "learning_rate": 5.485810399568902e-07, + "loss": 0.3052, + "step": 51926 + }, + { + "epoch": 0.8972732927840752, + "grad_norm": 1.1500816665068163, + "learning_rate": 5.483982396068477e-07, + "loss": 0.3706, + "step": 51927 + }, + { + "epoch": 0.8972905722974841, + "grad_norm": 1.5309910682082826, + "learning_rate": 5.482154688599383e-07, + "loss": 0.4683, + "step": 51928 + }, + { + "epoch": 0.897307851810893, + "grad_norm": 0.9095645078184224, + "learning_rate": 5.480327277167385e-07, + "loss": 0.4007, + "step": 51929 + }, + { + "epoch": 0.8973251313243019, + "grad_norm": 1.1392628437772265, + "learning_rate": 5.478500161778178e-07, + "loss": 0.2488, + "step": 51930 + }, + { + "epoch": 0.8973424108377108, + "grad_norm": 2.030835922277577, + "learning_rate": 5.476673342437478e-07, + "loss": 0.3714, + "step": 51931 + }, + { + "epoch": 0.8973596903511197, + "grad_norm": 1.0219773111461141, + "learning_rate": 5.474846819151025e-07, + "loss": 0.3123, + "step": 51932 + }, + { + "epoch": 0.8973769698645286, + "grad_norm": 1.4469141647143595, + "learning_rate": 5.473020591924528e-07, + "loss": 0.3995, + "step": 51933 + }, + { + "epoch": 0.8973942493779375, + "grad_norm": 1.1699329573713204, + "learning_rate": 5.471194660763735e-07, + "loss": 0.4566, + "step": 51934 + }, + { + "epoch": 0.8974115288913465, + "grad_norm": 0.9152577690371333, + "learning_rate": 5.469369025674321e-07, + "loss": 0.2485, + "step": 51935 + }, + { + "epoch": 0.8974288084047554, + "grad_norm": 1.6506574534048828, + "learning_rate": 5.467543686662047e-07, + "loss": 0.2901, + "step": 51936 + }, + { + "epoch": 0.8974460879181643, + "grad_norm": 2.253050577788307, + "learning_rate": 5.465718643732598e-07, + "loss": 0.2724, + "step": 51937 + }, + { + "epoch": 0.8974633674315732, + "grad_norm": 1.4950621776345778, + "learning_rate": 5.463893896891704e-07, + "loss": 0.4322, + "step": 51938 + }, + { + "epoch": 0.897480646944982, + "grad_norm": 1.065031579753187, + "learning_rate": 5.46206944614508e-07, + "loss": 0.3666, + "step": 51939 + }, + { + "epoch": 0.8974979264583909, + "grad_norm": 1.2151575957028014, + "learning_rate": 5.460245291498444e-07, + "loss": 0.3039, + "step": 51940 + }, + { + "epoch": 0.8975152059717998, + "grad_norm": 1.5758786737654022, + "learning_rate": 5.458421432957517e-07, + "loss": 0.4234, + "step": 51941 + }, + { + "epoch": 0.8975324854852087, + "grad_norm": 1.0714855505555694, + "learning_rate": 5.456597870527979e-07, + "loss": 0.3092, + "step": 51942 + }, + { + "epoch": 0.8975497649986176, + "grad_norm": 3.330416028102847, + "learning_rate": 5.454774604215562e-07, + "loss": 0.3126, + "step": 51943 + }, + { + "epoch": 0.8975670445120265, + "grad_norm": 0.6125054349209108, + "learning_rate": 5.452951634025971e-07, + "loss": 0.2452, + "step": 51944 + }, + { + "epoch": 0.8975843240254354, + "grad_norm": 0.4558153654722462, + "learning_rate": 5.451128959964936e-07, + "loss": 0.6299, + "step": 51945 + }, + { + "epoch": 0.8976016035388443, + "grad_norm": 0.8336800798165988, + "learning_rate": 5.449306582038138e-07, + "loss": 0.3253, + "step": 51946 + }, + { + "epoch": 0.8976188830522532, + "grad_norm": 1.399795066567226, + "learning_rate": 5.447484500251321e-07, + "loss": 0.3002, + "step": 51947 + }, + { + "epoch": 0.8976361625656621, + "grad_norm": 2.1363346053918493, + "learning_rate": 5.445662714610145e-07, + "loss": 0.2884, + "step": 51948 + }, + { + "epoch": 0.897653442079071, + "grad_norm": 1.4189808887485624, + "learning_rate": 5.443841225120339e-07, + "loss": 0.361, + "step": 51949 + }, + { + "epoch": 0.89767072159248, + "grad_norm": 1.0764565486559696, + "learning_rate": 5.442020031787609e-07, + "loss": 0.2048, + "step": 51950 + }, + { + "epoch": 0.8976880011058889, + "grad_norm": 1.3105730627438843, + "learning_rate": 5.440199134617674e-07, + "loss": 0.3108, + "step": 51951 + }, + { + "epoch": 0.8977052806192978, + "grad_norm": 1.168075570549487, + "learning_rate": 5.438378533616217e-07, + "loss": 0.2275, + "step": 51952 + }, + { + "epoch": 0.8977225601327067, + "grad_norm": 0.9743958230475391, + "learning_rate": 5.436558228788935e-07, + "loss": 0.3902, + "step": 51953 + }, + { + "epoch": 0.8977398396461156, + "grad_norm": 1.2445824114269146, + "learning_rate": 5.434738220141534e-07, + "loss": 0.2881, + "step": 51954 + }, + { + "epoch": 0.8977571191595245, + "grad_norm": 1.3375562929255087, + "learning_rate": 5.432918507679729e-07, + "loss": 0.1863, + "step": 51955 + }, + { + "epoch": 0.8977743986729334, + "grad_norm": 1.5319475220915775, + "learning_rate": 5.43109909140922e-07, + "loss": 0.1913, + "step": 51956 + }, + { + "epoch": 0.8977916781863423, + "grad_norm": 2.4921631631251273, + "learning_rate": 5.429279971335677e-07, + "loss": 0.2801, + "step": 51957 + }, + { + "epoch": 0.8978089576997512, + "grad_norm": 1.055159970660097, + "learning_rate": 5.427461147464841e-07, + "loss": 0.443, + "step": 51958 + }, + { + "epoch": 0.8978262372131601, + "grad_norm": 1.8879861057360336, + "learning_rate": 5.425642619802362e-07, + "loss": 0.5474, + "step": 51959 + }, + { + "epoch": 0.897843516726569, + "grad_norm": 1.2523136549030882, + "learning_rate": 5.42382438835396e-07, + "loss": 0.5137, + "step": 51960 + }, + { + "epoch": 0.8978607962399778, + "grad_norm": 1.780587020326656, + "learning_rate": 5.42200645312534e-07, + "loss": 0.3661, + "step": 51961 + }, + { + "epoch": 0.8978780757533867, + "grad_norm": 1.0564902181135174, + "learning_rate": 5.420188814122163e-07, + "loss": 0.3739, + "step": 51962 + }, + { + "epoch": 0.8978953552667956, + "grad_norm": 1.1342937106265205, + "learning_rate": 5.41837147135017e-07, + "loss": 0.2484, + "step": 51963 + }, + { + "epoch": 0.8979126347802046, + "grad_norm": 1.093731470031571, + "learning_rate": 5.416554424815002e-07, + "loss": 0.2923, + "step": 51964 + }, + { + "epoch": 0.8979299142936135, + "grad_norm": 1.0144640517652557, + "learning_rate": 5.414737674522375e-07, + "loss": 0.2137, + "step": 51965 + }, + { + "epoch": 0.8979471938070224, + "grad_norm": 0.9664269664171536, + "learning_rate": 5.412921220477973e-07, + "loss": 0.4244, + "step": 51966 + }, + { + "epoch": 0.8979644733204313, + "grad_norm": 1.8722497937509868, + "learning_rate": 5.411105062687516e-07, + "loss": 0.196, + "step": 51967 + }, + { + "epoch": 0.8979817528338402, + "grad_norm": 1.5990699950112326, + "learning_rate": 5.409289201156642e-07, + "loss": 0.5809, + "step": 51968 + }, + { + "epoch": 0.8979990323472491, + "grad_norm": 1.4393610657040954, + "learning_rate": 5.407473635891081e-07, + "loss": 0.5793, + "step": 51969 + }, + { + "epoch": 0.898016311860658, + "grad_norm": 1.4496033378074973, + "learning_rate": 5.405658366896494e-07, + "loss": 0.323, + "step": 51970 + }, + { + "epoch": 0.8980335913740669, + "grad_norm": 1.1996771538277045, + "learning_rate": 5.403843394178576e-07, + "loss": 0.3327, + "step": 51971 + }, + { + "epoch": 0.8980508708874758, + "grad_norm": 1.1654883401156568, + "learning_rate": 5.402028717743002e-07, + "loss": 0.501, + "step": 51972 + }, + { + "epoch": 0.8980681504008847, + "grad_norm": 1.725939828571468, + "learning_rate": 5.400214337595467e-07, + "loss": 0.2826, + "step": 51973 + }, + { + "epoch": 0.8980854299142936, + "grad_norm": 1.2332794702816974, + "learning_rate": 5.398400253741654e-07, + "loss": 0.2342, + "step": 51974 + }, + { + "epoch": 0.8981027094277025, + "grad_norm": 2.0182661722223614, + "learning_rate": 5.396586466187237e-07, + "loss": 0.4823, + "step": 51975 + }, + { + "epoch": 0.8981199889411114, + "grad_norm": 2.063227903195638, + "learning_rate": 5.394772974937912e-07, + "loss": 0.3087, + "step": 51976 + }, + { + "epoch": 0.8981372684545204, + "grad_norm": 1.0005263808990916, + "learning_rate": 5.39295977999933e-07, + "loss": 0.3596, + "step": 51977 + }, + { + "epoch": 0.8981545479679293, + "grad_norm": 1.601546675466062, + "learning_rate": 5.391146881377185e-07, + "loss": 0.2771, + "step": 51978 + }, + { + "epoch": 0.8981718274813382, + "grad_norm": 1.3562787042825106, + "learning_rate": 5.389334279077163e-07, + "loss": 0.2817, + "step": 51979 + }, + { + "epoch": 0.8981891069947471, + "grad_norm": 2.1533786588760173, + "learning_rate": 5.387521973104959e-07, + "loss": 0.281, + "step": 51980 + }, + { + "epoch": 0.898206386508156, + "grad_norm": 0.9036900310056183, + "learning_rate": 5.385709963466201e-07, + "loss": 0.2698, + "step": 51981 + }, + { + "epoch": 0.8982236660215648, + "grad_norm": 0.5733653788811084, + "learning_rate": 5.383898250166609e-07, + "loss": 0.898, + "step": 51982 + }, + { + "epoch": 0.8982409455349737, + "grad_norm": 1.4097230546545374, + "learning_rate": 5.382086833211819e-07, + "loss": 0.4646, + "step": 51983 + }, + { + "epoch": 0.8982582250483826, + "grad_norm": 1.2709324801127861, + "learning_rate": 5.380275712607519e-07, + "loss": 0.4188, + "step": 51984 + }, + { + "epoch": 0.8982755045617915, + "grad_norm": 2.228876323429752, + "learning_rate": 5.378464888359414e-07, + "loss": 0.6624, + "step": 51985 + }, + { + "epoch": 0.8982927840752004, + "grad_norm": 1.1502707401246877, + "learning_rate": 5.376654360473121e-07, + "loss": 0.2621, + "step": 51986 + }, + { + "epoch": 0.8983100635886093, + "grad_norm": 0.6203342545820504, + "learning_rate": 5.374844128954359e-07, + "loss": 0.3411, + "step": 51987 + }, + { + "epoch": 0.8983273431020182, + "grad_norm": 2.817611025978648, + "learning_rate": 5.373034193808768e-07, + "loss": 0.5921, + "step": 51988 + }, + { + "epoch": 0.8983446226154271, + "grad_norm": 2.2502342659914203, + "learning_rate": 5.37122455504202e-07, + "loss": 0.3769, + "step": 51989 + }, + { + "epoch": 0.898361902128836, + "grad_norm": 1.2194064701308969, + "learning_rate": 5.369415212659778e-07, + "loss": 0.4911, + "step": 51990 + }, + { + "epoch": 0.898379181642245, + "grad_norm": 1.1971495081030077, + "learning_rate": 5.36760616666775e-07, + "loss": 0.3686, + "step": 51991 + }, + { + "epoch": 0.8983964611556539, + "grad_norm": 0.6785624708808929, + "learning_rate": 5.365797417071561e-07, + "loss": 0.1596, + "step": 51992 + }, + { + "epoch": 0.8984137406690628, + "grad_norm": 2.520684863764284, + "learning_rate": 5.363988963876876e-07, + "loss": 0.1689, + "step": 51993 + }, + { + "epoch": 0.8984310201824717, + "grad_norm": 1.2302699301372204, + "learning_rate": 5.362180807089368e-07, + "loss": 0.3527, + "step": 51994 + }, + { + "epoch": 0.8984482996958806, + "grad_norm": 1.0926915221139364, + "learning_rate": 5.360372946714698e-07, + "loss": 0.2994, + "step": 51995 + }, + { + "epoch": 0.8984655792092895, + "grad_norm": 0.6365239093976144, + "learning_rate": 5.358565382758551e-07, + "loss": 0.5961, + "step": 51996 + }, + { + "epoch": 0.8984828587226984, + "grad_norm": 1.497180650766871, + "learning_rate": 5.356758115226557e-07, + "loss": 0.5027, + "step": 51997 + }, + { + "epoch": 0.8985001382361073, + "grad_norm": 1.393483895896826, + "learning_rate": 5.354951144124409e-07, + "loss": 0.4321, + "step": 51998 + }, + { + "epoch": 0.8985174177495162, + "grad_norm": 1.3239226727925466, + "learning_rate": 5.353144469457717e-07, + "loss": 0.3813, + "step": 51999 + }, + { + "epoch": 0.8985346972629251, + "grad_norm": 0.9277953357324558, + "learning_rate": 5.351338091232183e-07, + "loss": 0.3113, + "step": 52000 + }, + { + "epoch": 0.898551976776334, + "grad_norm": 1.8236321735613155, + "learning_rate": 5.34953200945344e-07, + "loss": 0.28, + "step": 52001 + }, + { + "epoch": 0.8985692562897429, + "grad_norm": 1.6131098800956767, + "learning_rate": 5.347726224127181e-07, + "loss": 0.3484, + "step": 52002 + }, + { + "epoch": 0.8985865358031517, + "grad_norm": 1.7623854514934816, + "learning_rate": 5.345920735259036e-07, + "loss": 0.3744, + "step": 52003 + }, + { + "epoch": 0.8986038153165606, + "grad_norm": 1.5211924206660647, + "learning_rate": 5.344115542854633e-07, + "loss": 0.6571, + "step": 52004 + }, + { + "epoch": 0.8986210948299695, + "grad_norm": 0.7225274267999673, + "learning_rate": 5.342310646919669e-07, + "loss": 0.5555, + "step": 52005 + }, + { + "epoch": 0.8986383743433785, + "grad_norm": 1.126772241167178, + "learning_rate": 5.34050604745977e-07, + "loss": 0.2787, + "step": 52006 + }, + { + "epoch": 0.8986556538567874, + "grad_norm": 2.5531206203245174, + "learning_rate": 5.338701744480623e-07, + "loss": 0.1687, + "step": 52007 + }, + { + "epoch": 0.8986729333701963, + "grad_norm": 2.071514042632421, + "learning_rate": 5.336897737987845e-07, + "loss": 0.5859, + "step": 52008 + }, + { + "epoch": 0.8986902128836052, + "grad_norm": 0.8916789527637562, + "learning_rate": 5.335094027987098e-07, + "loss": 0.1801, + "step": 52009 + }, + { + "epoch": 0.8987074923970141, + "grad_norm": 1.4256630924660925, + "learning_rate": 5.333290614484033e-07, + "loss": 0.3588, + "step": 52010 + }, + { + "epoch": 0.898724771910423, + "grad_norm": 1.5541193762321994, + "learning_rate": 5.3314874974843e-07, + "loss": 0.4762, + "step": 52011 + }, + { + "epoch": 0.8987420514238319, + "grad_norm": 1.5325970295995017, + "learning_rate": 5.329684676993507e-07, + "loss": 0.3603, + "step": 52012 + }, + { + "epoch": 0.8987593309372408, + "grad_norm": 1.5834714483048329, + "learning_rate": 5.327882153017383e-07, + "loss": 0.3407, + "step": 52013 + }, + { + "epoch": 0.8987766104506497, + "grad_norm": 1.1183903959575654, + "learning_rate": 5.326079925561511e-07, + "loss": 0.3911, + "step": 52014 + }, + { + "epoch": 0.8987938899640586, + "grad_norm": 1.669310436981138, + "learning_rate": 5.324277994631544e-07, + "loss": 0.2878, + "step": 52015 + }, + { + "epoch": 0.8988111694774675, + "grad_norm": 1.5684223476837376, + "learning_rate": 5.322476360233153e-07, + "loss": 0.2594, + "step": 52016 + }, + { + "epoch": 0.8988284489908764, + "grad_norm": 1.252623512239696, + "learning_rate": 5.320675022371935e-07, + "loss": 0.3832, + "step": 52017 + }, + { + "epoch": 0.8988457285042853, + "grad_norm": 1.6499947812768874, + "learning_rate": 5.318873981053563e-07, + "loss": 0.3122, + "step": 52018 + }, + { + "epoch": 0.8988630080176943, + "grad_norm": 1.3646554134180173, + "learning_rate": 5.317073236283676e-07, + "loss": 0.7438, + "step": 52019 + }, + { + "epoch": 0.8988802875311032, + "grad_norm": 1.1511158469512128, + "learning_rate": 5.315272788067927e-07, + "loss": 0.3012, + "step": 52020 + }, + { + "epoch": 0.8988975670445121, + "grad_norm": 1.4128590922220234, + "learning_rate": 5.31347263641192e-07, + "loss": 0.2613, + "step": 52021 + }, + { + "epoch": 0.898914846557921, + "grad_norm": 3.087926148562884, + "learning_rate": 5.311672781321331e-07, + "loss": 0.2808, + "step": 52022 + }, + { + "epoch": 0.8989321260713299, + "grad_norm": 1.577113913332768, + "learning_rate": 5.309873222801764e-07, + "loss": 0.2427, + "step": 52023 + }, + { + "epoch": 0.8989494055847387, + "grad_norm": 1.646082593763711, + "learning_rate": 5.308073960858872e-07, + "loss": 0.5089, + "step": 52024 + }, + { + "epoch": 0.8989666850981476, + "grad_norm": 0.8098380512638159, + "learning_rate": 5.306274995498295e-07, + "loss": 0.302, + "step": 52025 + }, + { + "epoch": 0.8989839646115565, + "grad_norm": 1.79404988950831, + "learning_rate": 5.30447632672566e-07, + "loss": 0.243, + "step": 52026 + }, + { + "epoch": 0.8990012441249654, + "grad_norm": 0.9287896090968268, + "learning_rate": 5.302677954546609e-07, + "loss": 0.3322, + "step": 52027 + }, + { + "epoch": 0.8990185236383743, + "grad_norm": 1.1408524153175674, + "learning_rate": 5.300879878966747e-07, + "loss": 0.3145, + "step": 52028 + }, + { + "epoch": 0.8990358031517832, + "grad_norm": 1.1860381195229939, + "learning_rate": 5.299082099991736e-07, + "loss": 0.2769, + "step": 52029 + }, + { + "epoch": 0.8990530826651921, + "grad_norm": 1.1492400420388953, + "learning_rate": 5.297284617627185e-07, + "loss": 0.3303, + "step": 52030 + }, + { + "epoch": 0.899070362178601, + "grad_norm": 1.345101282679655, + "learning_rate": 5.295487431878765e-07, + "loss": 0.4495, + "step": 52031 + }, + { + "epoch": 0.8990876416920099, + "grad_norm": 1.611627870799966, + "learning_rate": 5.29369054275205e-07, + "loss": 0.2777, + "step": 52032 + }, + { + "epoch": 0.8991049212054188, + "grad_norm": 1.6954449174808672, + "learning_rate": 5.291893950252714e-07, + "loss": 0.2735, + "step": 52033 + }, + { + "epoch": 0.8991222007188278, + "grad_norm": 1.6224858568056868, + "learning_rate": 5.290097654386339e-07, + "loss": 0.7213, + "step": 52034 + }, + { + "epoch": 0.8991394802322367, + "grad_norm": 1.0851308282765972, + "learning_rate": 5.28830165515859e-07, + "loss": 0.4076, + "step": 52035 + }, + { + "epoch": 0.8991567597456456, + "grad_norm": 1.3604481886392108, + "learning_rate": 5.286505952575082e-07, + "loss": 0.3726, + "step": 52036 + }, + { + "epoch": 0.8991740392590545, + "grad_norm": 1.3443546879707489, + "learning_rate": 5.284710546641425e-07, + "loss": 0.2839, + "step": 52037 + }, + { + "epoch": 0.8991913187724634, + "grad_norm": 1.3899689482606938, + "learning_rate": 5.282915437363267e-07, + "loss": 0.3279, + "step": 52038 + }, + { + "epoch": 0.8992085982858723, + "grad_norm": 1.8716660714604934, + "learning_rate": 5.281120624746205e-07, + "loss": 0.402, + "step": 52039 + }, + { + "epoch": 0.8992258777992812, + "grad_norm": 1.5030478065958939, + "learning_rate": 5.279326108795868e-07, + "loss": 0.2298, + "step": 52040 + }, + { + "epoch": 0.8992431573126901, + "grad_norm": 1.4378657362486396, + "learning_rate": 5.277531889517884e-07, + "loss": 0.3542, + "step": 52041 + }, + { + "epoch": 0.899260436826099, + "grad_norm": 1.0366605100204016, + "learning_rate": 5.275737966917882e-07, + "loss": 0.3971, + "step": 52042 + }, + { + "epoch": 0.8992777163395079, + "grad_norm": 1.0130799019735326, + "learning_rate": 5.273944341001447e-07, + "loss": 0.2912, + "step": 52043 + }, + { + "epoch": 0.8992949958529168, + "grad_norm": 1.2371171795760652, + "learning_rate": 5.27215101177424e-07, + "loss": 0.3494, + "step": 52044 + }, + { + "epoch": 0.8993122753663256, + "grad_norm": 1.673794210360316, + "learning_rate": 5.270357979241836e-07, + "loss": 0.2879, + "step": 52045 + }, + { + "epoch": 0.8993295548797345, + "grad_norm": 1.5456632665822876, + "learning_rate": 5.268565243409862e-07, + "loss": 0.1808, + "step": 52046 + }, + { + "epoch": 0.8993468343931434, + "grad_norm": 1.9016979967451946, + "learning_rate": 5.26677280428397e-07, + "loss": 0.349, + "step": 52047 + }, + { + "epoch": 0.8993641139065524, + "grad_norm": 1.3766983968765278, + "learning_rate": 5.264980661869723e-07, + "loss": 0.2981, + "step": 52048 + }, + { + "epoch": 0.8993813934199613, + "grad_norm": 0.8356474708993892, + "learning_rate": 5.263188816172771e-07, + "loss": 0.6269, + "step": 52049 + }, + { + "epoch": 0.8993986729333702, + "grad_norm": 1.2427215699070928, + "learning_rate": 5.261397267198698e-07, + "loss": 0.3214, + "step": 52050 + }, + { + "epoch": 0.8994159524467791, + "grad_norm": 2.075318598014799, + "learning_rate": 5.259606014953145e-07, + "loss": 0.2581, + "step": 52051 + }, + { + "epoch": 0.899433231960188, + "grad_norm": 1.0844261425809685, + "learning_rate": 5.257815059441673e-07, + "loss": 0.4087, + "step": 52052 + }, + { + "epoch": 0.8994505114735969, + "grad_norm": 2.24454883824826, + "learning_rate": 5.256024400669957e-07, + "loss": 0.4776, + "step": 52053 + }, + { + "epoch": 0.8994677909870058, + "grad_norm": 1.198846318817655, + "learning_rate": 5.254234038643568e-07, + "loss": 0.254, + "step": 52054 + }, + { + "epoch": 0.8994850705004147, + "grad_norm": 1.2545648808487748, + "learning_rate": 5.252443973368104e-07, + "loss": 0.4244, + "step": 52055 + }, + { + "epoch": 0.8995023500138236, + "grad_norm": 1.8761005174332936, + "learning_rate": 5.250654204849204e-07, + "loss": 0.3011, + "step": 52056 + }, + { + "epoch": 0.8995196295272325, + "grad_norm": 1.0995210858774433, + "learning_rate": 5.24886473309244e-07, + "loss": 0.4035, + "step": 52057 + }, + { + "epoch": 0.8995369090406414, + "grad_norm": 2.86476590612779, + "learning_rate": 5.247075558103432e-07, + "loss": 0.2299, + "step": 52058 + }, + { + "epoch": 0.8995541885540503, + "grad_norm": 1.1969057691663667, + "learning_rate": 5.245286679887795e-07, + "loss": 0.5704, + "step": 52059 + }, + { + "epoch": 0.8995714680674592, + "grad_norm": 1.638552854704078, + "learning_rate": 5.243498098451116e-07, + "loss": 0.233, + "step": 52060 + }, + { + "epoch": 0.8995887475808682, + "grad_norm": 1.0036278746573655, + "learning_rate": 5.241709813799001e-07, + "loss": 0.2968, + "step": 52061 + }, + { + "epoch": 0.8996060270942771, + "grad_norm": 1.2986918462944699, + "learning_rate": 5.239921825937067e-07, + "loss": 0.3485, + "step": 52062 + }, + { + "epoch": 0.899623306607686, + "grad_norm": 1.22982983062103, + "learning_rate": 5.238134134870876e-07, + "loss": 0.3478, + "step": 52063 + }, + { + "epoch": 0.8996405861210949, + "grad_norm": 1.6280669807716197, + "learning_rate": 5.236346740606058e-07, + "loss": 0.2174, + "step": 52064 + }, + { + "epoch": 0.8996578656345038, + "grad_norm": 1.1479846781000898, + "learning_rate": 5.234559643148218e-07, + "loss": 0.4212, + "step": 52065 + }, + { + "epoch": 0.8996751451479126, + "grad_norm": 1.5200738473629096, + "learning_rate": 5.23277284250292e-07, + "loss": 0.3616, + "step": 52066 + }, + { + "epoch": 0.8996924246613215, + "grad_norm": 1.2559132898397773, + "learning_rate": 5.230986338675803e-07, + "loss": 0.336, + "step": 52067 + }, + { + "epoch": 0.8997097041747304, + "grad_norm": 1.6250935980360475, + "learning_rate": 5.229200131672419e-07, + "loss": 0.2884, + "step": 52068 + }, + { + "epoch": 0.8997269836881393, + "grad_norm": 0.7549269043030238, + "learning_rate": 5.227414221498383e-07, + "loss": 0.3001, + "step": 52069 + }, + { + "epoch": 0.8997442632015482, + "grad_norm": 2.479133904932658, + "learning_rate": 5.225628608159283e-07, + "loss": 0.404, + "step": 52070 + }, + { + "epoch": 0.8997615427149571, + "grad_norm": 0.9574127221206395, + "learning_rate": 5.223843291660735e-07, + "loss": 0.5503, + "step": 52071 + }, + { + "epoch": 0.899778822228366, + "grad_norm": 1.3294128988549998, + "learning_rate": 5.222058272008302e-07, + "loss": 0.3326, + "step": 52072 + }, + { + "epoch": 0.8997961017417749, + "grad_norm": 1.1394751688501938, + "learning_rate": 5.220273549207589e-07, + "loss": 0.2698, + "step": 52073 + }, + { + "epoch": 0.8998133812551838, + "grad_norm": 1.298771486699812, + "learning_rate": 5.218489123264181e-07, + "loss": 0.3563, + "step": 52074 + }, + { + "epoch": 0.8998306607685927, + "grad_norm": 2.55411960956995, + "learning_rate": 5.216704994183664e-07, + "loss": 0.3201, + "step": 52075 + }, + { + "epoch": 0.8998479402820017, + "grad_norm": 1.2417084611315603, + "learning_rate": 5.214921161971642e-07, + "loss": 0.359, + "step": 52076 + }, + { + "epoch": 0.8998652197954106, + "grad_norm": 1.5017648767366867, + "learning_rate": 5.213137626633668e-07, + "loss": 0.2906, + "step": 52077 + }, + { + "epoch": 0.8998824993088195, + "grad_norm": 1.2300078255367972, + "learning_rate": 5.211354388175383e-07, + "loss": 0.6681, + "step": 52078 + }, + { + "epoch": 0.8998997788222284, + "grad_norm": 1.5700315945976622, + "learning_rate": 5.209571446602313e-07, + "loss": 0.3539, + "step": 52079 + }, + { + "epoch": 0.8999170583356373, + "grad_norm": 2.0823714132228948, + "learning_rate": 5.207788801920066e-07, + "loss": 0.3278, + "step": 52080 + }, + { + "epoch": 0.8999343378490462, + "grad_norm": 1.5806663131331564, + "learning_rate": 5.206006454134238e-07, + "loss": 0.3558, + "step": 52081 + }, + { + "epoch": 0.8999516173624551, + "grad_norm": 1.2252510037223003, + "learning_rate": 5.204224403250413e-07, + "loss": 0.2314, + "step": 52082 + }, + { + "epoch": 0.899968896875864, + "grad_norm": 1.0198463781457636, + "learning_rate": 5.202442649274153e-07, + "loss": 0.3187, + "step": 52083 + }, + { + "epoch": 0.8999861763892729, + "grad_norm": 1.6642870715418538, + "learning_rate": 5.200661192211054e-07, + "loss": 0.3788, + "step": 52084 + }, + { + "epoch": 0.9000034559026818, + "grad_norm": 2.881102255753433, + "learning_rate": 5.198880032066667e-07, + "loss": 0.3713, + "step": 52085 + }, + { + "epoch": 0.9000207354160907, + "grad_norm": 0.7965471935622003, + "learning_rate": 5.197099168846609e-07, + "loss": 0.1728, + "step": 52086 + }, + { + "epoch": 0.9000380149294995, + "grad_norm": 0.61851264736948, + "learning_rate": 5.195318602556443e-07, + "loss": 0.6896, + "step": 52087 + }, + { + "epoch": 0.9000552944429084, + "grad_norm": 1.253567286631005, + "learning_rate": 5.19353833320173e-07, + "loss": 0.3236, + "step": 52088 + }, + { + "epoch": 0.9000725739563173, + "grad_norm": 1.3836730650729108, + "learning_rate": 5.191758360788079e-07, + "loss": 0.4418, + "step": 52089 + }, + { + "epoch": 0.9000898534697263, + "grad_norm": 0.6935626289803299, + "learning_rate": 5.189978685321028e-07, + "loss": 0.7272, + "step": 52090 + }, + { + "epoch": 0.9001071329831352, + "grad_norm": 1.3994094365372864, + "learning_rate": 5.188199306806197e-07, + "loss": 0.4578, + "step": 52091 + }, + { + "epoch": 0.9001244124965441, + "grad_norm": 1.128420575720007, + "learning_rate": 5.186420225249089e-07, + "loss": 0.2311, + "step": 52092 + }, + { + "epoch": 0.900141692009953, + "grad_norm": 1.0964009469365608, + "learning_rate": 5.184641440655347e-07, + "loss": 0.4338, + "step": 52093 + }, + { + "epoch": 0.9001589715233619, + "grad_norm": 1.2906818956571569, + "learning_rate": 5.182862953030499e-07, + "loss": 0.1897, + "step": 52094 + }, + { + "epoch": 0.9001762510367708, + "grad_norm": 1.5394431675558407, + "learning_rate": 5.181084762380151e-07, + "loss": 0.3068, + "step": 52095 + }, + { + "epoch": 0.9001935305501797, + "grad_norm": 1.1426182230783302, + "learning_rate": 5.179306868709844e-07, + "loss": 0.4023, + "step": 52096 + }, + { + "epoch": 0.9002108100635886, + "grad_norm": 0.925800385215997, + "learning_rate": 5.177529272025151e-07, + "loss": 0.4289, + "step": 52097 + }, + { + "epoch": 0.9002280895769975, + "grad_norm": 1.5916037093087314, + "learning_rate": 5.175751972331633e-07, + "loss": 0.2445, + "step": 52098 + }, + { + "epoch": 0.9002453690904064, + "grad_norm": 1.5237269225096832, + "learning_rate": 5.173974969634865e-07, + "loss": 0.437, + "step": 52099 + }, + { + "epoch": 0.9002626486038153, + "grad_norm": 2.0003961005537496, + "learning_rate": 5.172198263940431e-07, + "loss": 0.4067, + "step": 52100 + }, + { + "epoch": 0.9002799281172242, + "grad_norm": 1.0234485347672184, + "learning_rate": 5.170421855253871e-07, + "loss": 0.3223, + "step": 52101 + }, + { + "epoch": 0.9002972076306331, + "grad_norm": 1.6677826341062043, + "learning_rate": 5.168645743580769e-07, + "loss": 0.3872, + "step": 52102 + }, + { + "epoch": 0.900314487144042, + "grad_norm": 1.2961894830241298, + "learning_rate": 5.166869928926655e-07, + "loss": 0.391, + "step": 52103 + }, + { + "epoch": 0.900331766657451, + "grad_norm": 1.453408548703056, + "learning_rate": 5.165094411297111e-07, + "loss": 0.3293, + "step": 52104 + }, + { + "epoch": 0.9003490461708599, + "grad_norm": 0.9758281617899838, + "learning_rate": 5.163319190697702e-07, + "loss": 0.2845, + "step": 52105 + }, + { + "epoch": 0.9003663256842688, + "grad_norm": 1.0545149564149139, + "learning_rate": 5.161544267134e-07, + "loss": 0.3988, + "step": 52106 + }, + { + "epoch": 0.9003836051976777, + "grad_norm": 1.8080121072443904, + "learning_rate": 5.159769640611545e-07, + "loss": 0.3904, + "step": 52107 + }, + { + "epoch": 0.9004008847110866, + "grad_norm": 0.9818209536831164, + "learning_rate": 5.157995311135888e-07, + "loss": 0.362, + "step": 52108 + }, + { + "epoch": 0.9004181642244954, + "grad_norm": 0.6301845749433586, + "learning_rate": 5.156221278712592e-07, + "loss": 0.5666, + "step": 52109 + }, + { + "epoch": 0.9004354437379043, + "grad_norm": 1.7268562936782152, + "learning_rate": 5.15444754334723e-07, + "loss": 0.3644, + "step": 52110 + }, + { + "epoch": 0.9004527232513132, + "grad_norm": 1.321305485197467, + "learning_rate": 5.152674105045352e-07, + "loss": 0.3938, + "step": 52111 + }, + { + "epoch": 0.9004700027647221, + "grad_norm": 1.5369174790325175, + "learning_rate": 5.150900963812488e-07, + "loss": 0.3243, + "step": 52112 + }, + { + "epoch": 0.900487282278131, + "grad_norm": 0.9583879135903433, + "learning_rate": 5.149128119654234e-07, + "loss": 0.2403, + "step": 52113 + }, + { + "epoch": 0.9005045617915399, + "grad_norm": 1.3896382113527599, + "learning_rate": 5.147355572576096e-07, + "loss": 0.3096, + "step": 52114 + }, + { + "epoch": 0.9005218413049488, + "grad_norm": 1.2942104291509027, + "learning_rate": 5.145583322583659e-07, + "loss": 0.3098, + "step": 52115 + }, + { + "epoch": 0.9005391208183577, + "grad_norm": 1.6294471026789725, + "learning_rate": 5.143811369682472e-07, + "loss": 0.227, + "step": 52116 + }, + { + "epoch": 0.9005564003317666, + "grad_norm": 1.2487731201229861, + "learning_rate": 5.142039713878066e-07, + "loss": 0.3569, + "step": 52117 + }, + { + "epoch": 0.9005736798451756, + "grad_norm": 2.162470041601435, + "learning_rate": 5.140268355176014e-07, + "loss": 0.2013, + "step": 52118 + }, + { + "epoch": 0.9005909593585845, + "grad_norm": 1.7659434168630865, + "learning_rate": 5.138497293581835e-07, + "loss": 0.4853, + "step": 52119 + }, + { + "epoch": 0.9006082388719934, + "grad_norm": 1.1924145385007092, + "learning_rate": 5.136726529101088e-07, + "loss": 0.4815, + "step": 52120 + }, + { + "epoch": 0.9006255183854023, + "grad_norm": 2.3386220501705677, + "learning_rate": 5.134956061739327e-07, + "loss": 0.4548, + "step": 52121 + }, + { + "epoch": 0.9006427978988112, + "grad_norm": 1.7634650782393675, + "learning_rate": 5.133185891502102e-07, + "loss": 0.3185, + "step": 52122 + }, + { + "epoch": 0.9006600774122201, + "grad_norm": 1.6364522983347671, + "learning_rate": 5.131416018394942e-07, + "loss": 0.4412, + "step": 52123 + }, + { + "epoch": 0.900677356925629, + "grad_norm": 0.8862473081059846, + "learning_rate": 5.129646442423409e-07, + "loss": 0.5777, + "step": 52124 + }, + { + "epoch": 0.9006946364390379, + "grad_norm": 1.190848861333648, + "learning_rate": 5.127877163593021e-07, + "loss": 0.55, + "step": 52125 + }, + { + "epoch": 0.9007119159524468, + "grad_norm": 1.6539644148172736, + "learning_rate": 5.126108181909328e-07, + "loss": 0.3603, + "step": 52126 + }, + { + "epoch": 0.9007291954658557, + "grad_norm": 1.2102806373992328, + "learning_rate": 5.124339497377883e-07, + "loss": 0.3733, + "step": 52127 + }, + { + "epoch": 0.9007464749792646, + "grad_norm": 1.878373147836149, + "learning_rate": 5.122571110004215e-07, + "loss": 0.2773, + "step": 52128 + }, + { + "epoch": 0.9007637544926735, + "grad_norm": 1.3850903241314423, + "learning_rate": 5.120803019793874e-07, + "loss": 0.2479, + "step": 52129 + }, + { + "epoch": 0.9007810340060823, + "grad_norm": 1.2276950703582032, + "learning_rate": 5.119035226752367e-07, + "loss": 0.4455, + "step": 52130 + }, + { + "epoch": 0.9007983135194912, + "grad_norm": 1.2239846277297455, + "learning_rate": 5.117267730885278e-07, + "loss": 0.2326, + "step": 52131 + }, + { + "epoch": 0.9008155930329002, + "grad_norm": 1.5858363766516606, + "learning_rate": 5.115500532198081e-07, + "loss": 0.3638, + "step": 52132 + }, + { + "epoch": 0.9008328725463091, + "grad_norm": 1.268608587183888, + "learning_rate": 5.113733630696382e-07, + "loss": 0.4487, + "step": 52133 + }, + { + "epoch": 0.900850152059718, + "grad_norm": 1.0973433181694707, + "learning_rate": 5.111967026385656e-07, + "loss": 0.3037, + "step": 52134 + }, + { + "epoch": 0.9008674315731269, + "grad_norm": 1.6165206936415004, + "learning_rate": 5.110200719271485e-07, + "loss": 0.4964, + "step": 52135 + }, + { + "epoch": 0.9008847110865358, + "grad_norm": 1.0535751747471582, + "learning_rate": 5.108434709359378e-07, + "loss": 0.3921, + "step": 52136 + }, + { + "epoch": 0.9009019905999447, + "grad_norm": 0.9723358542133487, + "learning_rate": 5.10666899665484e-07, + "loss": 0.1851, + "step": 52137 + }, + { + "epoch": 0.9009192701133536, + "grad_norm": 1.3023007002223603, + "learning_rate": 5.104903581163434e-07, + "loss": 0.2884, + "step": 52138 + }, + { + "epoch": 0.9009365496267625, + "grad_norm": 2.1192538181814253, + "learning_rate": 5.103138462890677e-07, + "loss": 0.3043, + "step": 52139 + }, + { + "epoch": 0.9009538291401714, + "grad_norm": 1.3312792658538593, + "learning_rate": 5.101373641842122e-07, + "loss": 0.4107, + "step": 52140 + }, + { + "epoch": 0.9009711086535803, + "grad_norm": 0.9937452970731655, + "learning_rate": 5.099609118023263e-07, + "loss": 0.3772, + "step": 52141 + }, + { + "epoch": 0.9009883881669892, + "grad_norm": 1.6175988863871698, + "learning_rate": 5.097844891439651e-07, + "loss": 0.2083, + "step": 52142 + }, + { + "epoch": 0.9010056676803981, + "grad_norm": 1.4578622652382747, + "learning_rate": 5.096080962096783e-07, + "loss": 0.1699, + "step": 52143 + }, + { + "epoch": 0.901022947193807, + "grad_norm": 1.0183419685215942, + "learning_rate": 5.094317330000209e-07, + "loss": 0.3986, + "step": 52144 + }, + { + "epoch": 0.901040226707216, + "grad_norm": 1.3231515348316558, + "learning_rate": 5.092553995155447e-07, + "loss": 0.5013, + "step": 52145 + }, + { + "epoch": 0.9010575062206249, + "grad_norm": 1.637026342686656, + "learning_rate": 5.090790957568026e-07, + "loss": 0.3569, + "step": 52146 + }, + { + "epoch": 0.9010747857340338, + "grad_norm": 1.0524172559331273, + "learning_rate": 5.089028217243463e-07, + "loss": 0.2641, + "step": 52147 + }, + { + "epoch": 0.9010920652474427, + "grad_norm": 1.1634026148084669, + "learning_rate": 5.087265774187256e-07, + "loss": 0.263, + "step": 52148 + }, + { + "epoch": 0.9011093447608516, + "grad_norm": 1.0923362385787458, + "learning_rate": 5.085503628404953e-07, + "loss": 0.4267, + "step": 52149 + }, + { + "epoch": 0.9011266242742605, + "grad_norm": 2.073650377857045, + "learning_rate": 5.083741779902074e-07, + "loss": 0.2476, + "step": 52150 + }, + { + "epoch": 0.9011439037876693, + "grad_norm": 1.329453221939975, + "learning_rate": 5.081980228684125e-07, + "loss": 0.2426, + "step": 52151 + }, + { + "epoch": 0.9011611833010782, + "grad_norm": 1.540379646949155, + "learning_rate": 5.080218974756623e-07, + "loss": 0.5493, + "step": 52152 + }, + { + "epoch": 0.9011784628144871, + "grad_norm": 2.3813199039541746, + "learning_rate": 5.07845801812511e-07, + "loss": 0.2456, + "step": 52153 + }, + { + "epoch": 0.901195742327896, + "grad_norm": 1.9328404423074996, + "learning_rate": 5.076697358795058e-07, + "loss": 0.3566, + "step": 52154 + }, + { + "epoch": 0.9012130218413049, + "grad_norm": 1.461893068955301, + "learning_rate": 5.074936996772006e-07, + "loss": 0.4204, + "step": 52155 + }, + { + "epoch": 0.9012303013547138, + "grad_norm": 1.1908797322574893, + "learning_rate": 5.073176932061463e-07, + "loss": 0.2897, + "step": 52156 + }, + { + "epoch": 0.9012475808681227, + "grad_norm": 0.8752731550699486, + "learning_rate": 5.071417164668957e-07, + "loss": 0.3083, + "step": 52157 + }, + { + "epoch": 0.9012648603815316, + "grad_norm": 0.8622836243342048, + "learning_rate": 5.069657694599995e-07, + "loss": 0.2092, + "step": 52158 + }, + { + "epoch": 0.9012821398949405, + "grad_norm": 1.081637054271396, + "learning_rate": 5.06789852186006e-07, + "loss": 0.2574, + "step": 52159 + }, + { + "epoch": 0.9012994194083495, + "grad_norm": 1.6077943180476548, + "learning_rate": 5.06613964645467e-07, + "loss": 0.2993, + "step": 52160 + }, + { + "epoch": 0.9013166989217584, + "grad_norm": 1.7866580506199938, + "learning_rate": 5.064381068389357e-07, + "loss": 0.3905, + "step": 52161 + }, + { + "epoch": 0.9013339784351673, + "grad_norm": 1.1391291826759105, + "learning_rate": 5.062622787669635e-07, + "loss": 0.5359, + "step": 52162 + }, + { + "epoch": 0.9013512579485762, + "grad_norm": 1.4761182609027004, + "learning_rate": 5.06086480430098e-07, + "loss": 0.2389, + "step": 52163 + }, + { + "epoch": 0.9013685374619851, + "grad_norm": 1.5634574394933034, + "learning_rate": 5.059107118288919e-07, + "loss": 0.26, + "step": 52164 + }, + { + "epoch": 0.901385816975394, + "grad_norm": 1.9530447554930703, + "learning_rate": 5.057349729638938e-07, + "loss": 0.2348, + "step": 52165 + }, + { + "epoch": 0.9014030964888029, + "grad_norm": 1.3639358937611898, + "learning_rate": 5.055592638356543e-07, + "loss": 0.3651, + "step": 52166 + }, + { + "epoch": 0.9014203760022118, + "grad_norm": 0.9637862347060431, + "learning_rate": 5.053835844447275e-07, + "loss": 0.4453, + "step": 52167 + }, + { + "epoch": 0.9014376555156207, + "grad_norm": 0.5849376855182747, + "learning_rate": 5.052079347916594e-07, + "loss": 0.6793, + "step": 52168 + }, + { + "epoch": 0.9014549350290296, + "grad_norm": 1.6288185890579765, + "learning_rate": 5.05032314877002e-07, + "loss": 0.4917, + "step": 52169 + }, + { + "epoch": 0.9014722145424385, + "grad_norm": 1.5817058662311396, + "learning_rate": 5.048567247013048e-07, + "loss": 0.404, + "step": 52170 + }, + { + "epoch": 0.9014894940558474, + "grad_norm": 1.582012749114522, + "learning_rate": 5.046811642651183e-07, + "loss": 0.33, + "step": 52171 + }, + { + "epoch": 0.9015067735692562, + "grad_norm": 1.2427142717946917, + "learning_rate": 5.045056335689901e-07, + "loss": 0.3424, + "step": 52172 + }, + { + "epoch": 0.9015240530826651, + "grad_norm": 1.1423953414823165, + "learning_rate": 5.043301326134742e-07, + "loss": 0.3734, + "step": 52173 + }, + { + "epoch": 0.901541332596074, + "grad_norm": 1.020782204081787, + "learning_rate": 5.041546613991166e-07, + "loss": 0.4066, + "step": 52174 + }, + { + "epoch": 0.901558612109483, + "grad_norm": 1.685946028000433, + "learning_rate": 5.039792199264692e-07, + "loss": 0.3885, + "step": 52175 + }, + { + "epoch": 0.9015758916228919, + "grad_norm": 1.0999991783596217, + "learning_rate": 5.038038081960805e-07, + "loss": 0.2932, + "step": 52176 + }, + { + "epoch": 0.9015931711363008, + "grad_norm": 1.2401779835195337, + "learning_rate": 5.036284262085e-07, + "loss": 0.6405, + "step": 52177 + }, + { + "epoch": 0.9016104506497097, + "grad_norm": 1.5970989021813624, + "learning_rate": 5.034530739642762e-07, + "loss": 0.2816, + "step": 52178 + }, + { + "epoch": 0.9016277301631186, + "grad_norm": 1.487661139448064, + "learning_rate": 5.032777514639587e-07, + "loss": 0.4831, + "step": 52179 + }, + { + "epoch": 0.9016450096765275, + "grad_norm": 1.8081926855207493, + "learning_rate": 5.03102458708099e-07, + "loss": 0.4641, + "step": 52180 + }, + { + "epoch": 0.9016622891899364, + "grad_norm": 1.63333210530032, + "learning_rate": 5.029271956972425e-07, + "loss": 0.3518, + "step": 52181 + }, + { + "epoch": 0.9016795687033453, + "grad_norm": 1.865129898990795, + "learning_rate": 5.027519624319411e-07, + "loss": 0.3445, + "step": 52182 + }, + { + "epoch": 0.9016968482167542, + "grad_norm": 1.548753937391744, + "learning_rate": 5.025767589127406e-07, + "loss": 0.3384, + "step": 52183 + }, + { + "epoch": 0.9017141277301631, + "grad_norm": 1.3287328597069012, + "learning_rate": 5.02401585140192e-07, + "loss": 0.5023, + "step": 52184 + }, + { + "epoch": 0.901731407243572, + "grad_norm": 1.4924435323178895, + "learning_rate": 5.022264411148436e-07, + "loss": 0.3268, + "step": 52185 + }, + { + "epoch": 0.9017486867569809, + "grad_norm": 1.827690094113018, + "learning_rate": 5.020513268372451e-07, + "loss": 0.2536, + "step": 52186 + }, + { + "epoch": 0.9017659662703899, + "grad_norm": 1.0514078365481772, + "learning_rate": 5.018762423079415e-07, + "loss": 0.289, + "step": 52187 + }, + { + "epoch": 0.9017832457837988, + "grad_norm": 0.9738959147105536, + "learning_rate": 5.017011875274846e-07, + "loss": 0.3454, + "step": 52188 + }, + { + "epoch": 0.9018005252972077, + "grad_norm": 0.7501744354308438, + "learning_rate": 5.015261624964207e-07, + "loss": 0.3687, + "step": 52189 + }, + { + "epoch": 0.9018178048106166, + "grad_norm": 2.0375872600916147, + "learning_rate": 5.013511672152993e-07, + "loss": 0.36, + "step": 52190 + }, + { + "epoch": 0.9018350843240255, + "grad_norm": 1.1455957442572078, + "learning_rate": 5.011762016846688e-07, + "loss": 0.3281, + "step": 52191 + }, + { + "epoch": 0.9018523638374344, + "grad_norm": 2.1890893315085598, + "learning_rate": 5.010012659050745e-07, + "loss": 0.4221, + "step": 52192 + }, + { + "epoch": 0.9018696433508432, + "grad_norm": 1.050254607581906, + "learning_rate": 5.008263598770679e-07, + "loss": 0.3025, + "step": 52193 + }, + { + "epoch": 0.9018869228642521, + "grad_norm": 1.4769223960302964, + "learning_rate": 5.006514836011933e-07, + "loss": 0.393, + "step": 52194 + }, + { + "epoch": 0.901904202377661, + "grad_norm": 1.4338092891066132, + "learning_rate": 5.004766370780001e-07, + "loss": 0.2136, + "step": 52195 + }, + { + "epoch": 0.9019214818910699, + "grad_norm": 1.3483310609073396, + "learning_rate": 5.003018203080368e-07, + "loss": 0.373, + "step": 52196 + }, + { + "epoch": 0.9019387614044788, + "grad_norm": 1.1658366546336718, + "learning_rate": 5.001270332918518e-07, + "loss": 0.2713, + "step": 52197 + }, + { + "epoch": 0.9019560409178877, + "grad_norm": 1.1666300331004709, + "learning_rate": 4.999522760299891e-07, + "loss": 0.4732, + "step": 52198 + }, + { + "epoch": 0.9019733204312966, + "grad_norm": 1.1607896964150985, + "learning_rate": 4.997775485229983e-07, + "loss": 0.3113, + "step": 52199 + }, + { + "epoch": 0.9019905999447055, + "grad_norm": 1.2289406028701948, + "learning_rate": 4.996028507714256e-07, + "loss": 0.3128, + "step": 52200 + }, + { + "epoch": 0.9020078794581144, + "grad_norm": 1.0168344444379536, + "learning_rate": 4.994281827758185e-07, + "loss": 0.2867, + "step": 52201 + }, + { + "epoch": 0.9020251589715234, + "grad_norm": 1.384591635210256, + "learning_rate": 4.992535445367253e-07, + "loss": 0.5166, + "step": 52202 + }, + { + "epoch": 0.9020424384849323, + "grad_norm": 1.4216620298373523, + "learning_rate": 4.990789360546911e-07, + "loss": 0.3981, + "step": 52203 + }, + { + "epoch": 0.9020597179983412, + "grad_norm": 1.3313234147695578, + "learning_rate": 4.989043573302655e-07, + "loss": 0.3285, + "step": 52204 + }, + { + "epoch": 0.9020769975117501, + "grad_norm": 2.017478091862965, + "learning_rate": 4.987298083639924e-07, + "loss": 0.3042, + "step": 52205 + }, + { + "epoch": 0.902094277025159, + "grad_norm": 1.1675276937253538, + "learning_rate": 4.985552891564183e-07, + "loss": 0.4586, + "step": 52206 + }, + { + "epoch": 0.9021115565385679, + "grad_norm": 1.7143683211698424, + "learning_rate": 4.983807997080925e-07, + "loss": 0.2434, + "step": 52207 + }, + { + "epoch": 0.9021288360519768, + "grad_norm": 1.3290686787445467, + "learning_rate": 4.982063400195603e-07, + "loss": 0.325, + "step": 52208 + }, + { + "epoch": 0.9021461155653857, + "grad_norm": 1.4215809230334011, + "learning_rate": 4.980319100913689e-07, + "loss": 0.3858, + "step": 52209 + }, + { + "epoch": 0.9021633950787946, + "grad_norm": 1.8640427914683773, + "learning_rate": 4.978575099240624e-07, + "loss": 0.3448, + "step": 52210 + }, + { + "epoch": 0.9021806745922035, + "grad_norm": 0.838960391293592, + "learning_rate": 4.976831395181892e-07, + "loss": 0.2192, + "step": 52211 + }, + { + "epoch": 0.9021979541056124, + "grad_norm": 2.2135675951483593, + "learning_rate": 4.97508798874291e-07, + "loss": 0.2324, + "step": 52212 + }, + { + "epoch": 0.9022152336190213, + "grad_norm": 1.2314905848401438, + "learning_rate": 4.97334487992921e-07, + "loss": 0.2079, + "step": 52213 + }, + { + "epoch": 0.9022325131324301, + "grad_norm": 1.84283565244316, + "learning_rate": 4.971602068746206e-07, + "loss": 0.2534, + "step": 52214 + }, + { + "epoch": 0.902249792645839, + "grad_norm": 1.5557683448147803, + "learning_rate": 4.969859555199375e-07, + "loss": 0.2756, + "step": 52215 + }, + { + "epoch": 0.902267072159248, + "grad_norm": 2.489818965429061, + "learning_rate": 4.968117339294154e-07, + "loss": 0.4515, + "step": 52216 + }, + { + "epoch": 0.9022843516726569, + "grad_norm": 1.75612471358023, + "learning_rate": 4.966375421036029e-07, + "loss": 0.3466, + "step": 52217 + }, + { + "epoch": 0.9023016311860658, + "grad_norm": 1.8998163442944207, + "learning_rate": 4.964633800430396e-07, + "loss": 0.282, + "step": 52218 + }, + { + "epoch": 0.9023189106994747, + "grad_norm": 2.06771413847922, + "learning_rate": 4.962892477482795e-07, + "loss": 0.2363, + "step": 52219 + }, + { + "epoch": 0.9023361902128836, + "grad_norm": 1.6779707618255228, + "learning_rate": 4.961151452198632e-07, + "loss": 0.2566, + "step": 52220 + }, + { + "epoch": 0.9023534697262925, + "grad_norm": 1.116463812406992, + "learning_rate": 4.959410724583358e-07, + "loss": 0.6235, + "step": 52221 + }, + { + "epoch": 0.9023707492397014, + "grad_norm": 1.6529791849440068, + "learning_rate": 4.957670294642436e-07, + "loss": 0.4445, + "step": 52222 + }, + { + "epoch": 0.9023880287531103, + "grad_norm": 1.0790791510532078, + "learning_rate": 4.955930162381306e-07, + "loss": 0.2733, + "step": 52223 + }, + { + "epoch": 0.9024053082665192, + "grad_norm": 1.070271308042147, + "learning_rate": 4.95419032780542e-07, + "loss": 0.2889, + "step": 52224 + }, + { + "epoch": 0.9024225877799281, + "grad_norm": 1.7310015161466688, + "learning_rate": 4.952450790920238e-07, + "loss": 0.4377, + "step": 52225 + }, + { + "epoch": 0.902439867293337, + "grad_norm": 1.2859953162999964, + "learning_rate": 4.950711551731214e-07, + "loss": 0.3019, + "step": 52226 + }, + { + "epoch": 0.9024571468067459, + "grad_norm": 1.1227257364405534, + "learning_rate": 4.948972610243774e-07, + "loss": 0.1748, + "step": 52227 + }, + { + "epoch": 0.9024744263201548, + "grad_norm": 1.2518983353446733, + "learning_rate": 4.947233966463394e-07, + "loss": 0.3873, + "step": 52228 + }, + { + "epoch": 0.9024917058335638, + "grad_norm": 1.937495111757203, + "learning_rate": 4.94549562039548e-07, + "loss": 0.4026, + "step": 52229 + }, + { + "epoch": 0.9025089853469727, + "grad_norm": 1.2424948711774295, + "learning_rate": 4.943757572045494e-07, + "loss": 0.2031, + "step": 52230 + }, + { + "epoch": 0.9025262648603816, + "grad_norm": 1.2862137917080216, + "learning_rate": 4.942019821418909e-07, + "loss": 0.4824, + "step": 52231 + }, + { + "epoch": 0.9025435443737905, + "grad_norm": 1.9405975930639459, + "learning_rate": 4.940282368521121e-07, + "loss": 0.3003, + "step": 52232 + }, + { + "epoch": 0.9025608238871994, + "grad_norm": 1.120056304521151, + "learning_rate": 4.938545213357604e-07, + "loss": 0.2659, + "step": 52233 + }, + { + "epoch": 0.9025781034006083, + "grad_norm": 1.8155084739016945, + "learning_rate": 4.936808355933776e-07, + "loss": 0.3538, + "step": 52234 + }, + { + "epoch": 0.9025953829140171, + "grad_norm": 0.708233669989965, + "learning_rate": 4.935071796255098e-07, + "loss": 0.2188, + "step": 52235 + }, + { + "epoch": 0.902612662427426, + "grad_norm": 1.5676941586939035, + "learning_rate": 4.933335534326988e-07, + "loss": 0.3904, + "step": 52236 + }, + { + "epoch": 0.9026299419408349, + "grad_norm": 1.5851198882060633, + "learning_rate": 4.93159957015491e-07, + "loss": 0.426, + "step": 52237 + }, + { + "epoch": 0.9026472214542438, + "grad_norm": 1.7142424871859088, + "learning_rate": 4.929863903744281e-07, + "loss": 0.4106, + "step": 52238 + }, + { + "epoch": 0.9026645009676527, + "grad_norm": 1.7568824793453277, + "learning_rate": 4.928128535100552e-07, + "loss": 0.1951, + "step": 52239 + }, + { + "epoch": 0.9026817804810616, + "grad_norm": 1.2336823953202287, + "learning_rate": 4.92639346422914e-07, + "loss": 0.2512, + "step": 52240 + }, + { + "epoch": 0.9026990599944705, + "grad_norm": 1.5769163191362168, + "learning_rate": 4.924658691135487e-07, + "loss": 0.3069, + "step": 52241 + }, + { + "epoch": 0.9027163395078794, + "grad_norm": 1.1293556405885123, + "learning_rate": 4.922924215825042e-07, + "loss": 0.2915, + "step": 52242 + }, + { + "epoch": 0.9027336190212883, + "grad_norm": 1.5593919156074294, + "learning_rate": 4.921190038303203e-07, + "loss": 0.4606, + "step": 52243 + }, + { + "epoch": 0.9027508985346973, + "grad_norm": 0.9478687054233348, + "learning_rate": 4.919456158575442e-07, + "loss": 0.252, + "step": 52244 + }, + { + "epoch": 0.9027681780481062, + "grad_norm": 0.6660130377836554, + "learning_rate": 4.917722576647155e-07, + "loss": 0.5587, + "step": 52245 + }, + { + "epoch": 0.9027854575615151, + "grad_norm": 1.406639768766203, + "learning_rate": 4.915989292523793e-07, + "loss": 0.3558, + "step": 52246 + }, + { + "epoch": 0.902802737074924, + "grad_norm": 1.932860911676054, + "learning_rate": 4.914256306210774e-07, + "loss": 0.3131, + "step": 52247 + }, + { + "epoch": 0.9028200165883329, + "grad_norm": 1.7464394506812753, + "learning_rate": 4.912523617713549e-07, + "loss": 0.324, + "step": 52248 + }, + { + "epoch": 0.9028372961017418, + "grad_norm": 2.7431226891107237, + "learning_rate": 4.910791227037504e-07, + "loss": 0.277, + "step": 52249 + }, + { + "epoch": 0.9028545756151507, + "grad_norm": 1.0962918892022098, + "learning_rate": 4.90905913418811e-07, + "loss": 0.1835, + "step": 52250 + }, + { + "epoch": 0.9028718551285596, + "grad_norm": 1.460826553384631, + "learning_rate": 4.907327339170764e-07, + "loss": 0.3519, + "step": 52251 + }, + { + "epoch": 0.9028891346419685, + "grad_norm": 1.2399140865807299, + "learning_rate": 4.905595841990862e-07, + "loss": 0.3772, + "step": 52252 + }, + { + "epoch": 0.9029064141553774, + "grad_norm": 1.3800098206462854, + "learning_rate": 4.9038646426539e-07, + "loss": 0.4845, + "step": 52253 + }, + { + "epoch": 0.9029236936687863, + "grad_norm": 0.7502065302475857, + "learning_rate": 4.902133741165238e-07, + "loss": 0.2654, + "step": 52254 + }, + { + "epoch": 0.9029409731821952, + "grad_norm": 1.53337179334328, + "learning_rate": 4.900403137530341e-07, + "loss": 0.3926, + "step": 52255 + }, + { + "epoch": 0.9029582526956041, + "grad_norm": 2.1102138223172084, + "learning_rate": 4.898672831754591e-07, + "loss": 0.2572, + "step": 52256 + }, + { + "epoch": 0.9029755322090129, + "grad_norm": 1.3009544593558624, + "learning_rate": 4.896942823843442e-07, + "loss": 0.3018, + "step": 52257 + }, + { + "epoch": 0.9029928117224219, + "grad_norm": 1.4307272740134709, + "learning_rate": 4.895213113802266e-07, + "loss": 0.5569, + "step": 52258 + }, + { + "epoch": 0.9030100912358308, + "grad_norm": 2.5554666350512942, + "learning_rate": 4.893483701636537e-07, + "loss": 0.3049, + "step": 52259 + }, + { + "epoch": 0.9030273707492397, + "grad_norm": 0.9343675257192778, + "learning_rate": 4.891754587351649e-07, + "loss": 0.3511, + "step": 52260 + }, + { + "epoch": 0.9030446502626486, + "grad_norm": 1.107834139493299, + "learning_rate": 4.890025770952999e-07, + "loss": 0.3522, + "step": 52261 + }, + { + "epoch": 0.9030619297760575, + "grad_norm": 1.102987880409224, + "learning_rate": 4.888297252446028e-07, + "loss": 0.4205, + "step": 52262 + }, + { + "epoch": 0.9030792092894664, + "grad_norm": 1.8661619499030275, + "learning_rate": 4.88656903183613e-07, + "loss": 0.3482, + "step": 52263 + }, + { + "epoch": 0.9030964888028753, + "grad_norm": 1.6482513148287155, + "learning_rate": 4.884841109128713e-07, + "loss": 0.3891, + "step": 52264 + }, + { + "epoch": 0.9031137683162842, + "grad_norm": 1.0616095842225215, + "learning_rate": 4.883113484329216e-07, + "loss": 0.2567, + "step": 52265 + }, + { + "epoch": 0.9031310478296931, + "grad_norm": 1.1916353871512173, + "learning_rate": 4.881386157443046e-07, + "loss": 0.4444, + "step": 52266 + }, + { + "epoch": 0.903148327343102, + "grad_norm": 1.3152511079458582, + "learning_rate": 4.879659128475578e-07, + "loss": 0.3503, + "step": 52267 + }, + { + "epoch": 0.9031656068565109, + "grad_norm": 1.5107794943626172, + "learning_rate": 4.877932397432272e-07, + "loss": 0.3034, + "step": 52268 + }, + { + "epoch": 0.9031828863699198, + "grad_norm": 1.171635782641746, + "learning_rate": 4.876205964318492e-07, + "loss": 0.1613, + "step": 52269 + }, + { + "epoch": 0.9032001658833287, + "grad_norm": 1.6333548044715274, + "learning_rate": 4.874479829139666e-07, + "loss": 0.2134, + "step": 52270 + }, + { + "epoch": 0.9032174453967377, + "grad_norm": 1.7185831385585872, + "learning_rate": 4.872753991901213e-07, + "loss": 0.3853, + "step": 52271 + }, + { + "epoch": 0.9032347249101466, + "grad_norm": 1.1234522801766993, + "learning_rate": 4.871028452608517e-07, + "loss": 0.4513, + "step": 52272 + }, + { + "epoch": 0.9032520044235555, + "grad_norm": 1.1212365050455384, + "learning_rate": 4.869303211266995e-07, + "loss": 0.416, + "step": 52273 + }, + { + "epoch": 0.9032692839369644, + "grad_norm": 1.5365525912045803, + "learning_rate": 4.867578267882034e-07, + "loss": 0.1918, + "step": 52274 + }, + { + "epoch": 0.9032865634503733, + "grad_norm": 1.561859251841948, + "learning_rate": 4.865853622459049e-07, + "loss": 0.3934, + "step": 52275 + }, + { + "epoch": 0.9033038429637822, + "grad_norm": 3.8400684634317592, + "learning_rate": 4.864129275003437e-07, + "loss": 0.2639, + "step": 52276 + }, + { + "epoch": 0.9033211224771911, + "grad_norm": 3.3586150786068436, + "learning_rate": 4.862405225520617e-07, + "loss": 0.5923, + "step": 52277 + }, + { + "epoch": 0.9033384019905999, + "grad_norm": 1.600529138642916, + "learning_rate": 4.860681474015961e-07, + "loss": 0.3194, + "step": 52278 + }, + { + "epoch": 0.9033556815040088, + "grad_norm": 1.2270969493016401, + "learning_rate": 4.858958020494886e-07, + "loss": 0.364, + "step": 52279 + }, + { + "epoch": 0.9033729610174177, + "grad_norm": 0.7828870388253054, + "learning_rate": 4.857234864962779e-07, + "loss": 0.3519, + "step": 52280 + }, + { + "epoch": 0.9033902405308266, + "grad_norm": 1.639575669785242, + "learning_rate": 4.855512007425045e-07, + "loss": 0.2733, + "step": 52281 + }, + { + "epoch": 0.9034075200442355, + "grad_norm": 0.8863788812203349, + "learning_rate": 4.853789447887091e-07, + "loss": 0.2152, + "step": 52282 + }, + { + "epoch": 0.9034247995576444, + "grad_norm": 1.8059686810022528, + "learning_rate": 4.852067186354281e-07, + "loss": 0.3784, + "step": 52283 + }, + { + "epoch": 0.9034420790710533, + "grad_norm": 1.5262909977453394, + "learning_rate": 4.850345222832043e-07, + "loss": 0.262, + "step": 52284 + }, + { + "epoch": 0.9034593585844622, + "grad_norm": 1.6770370537556654, + "learning_rate": 4.848623557325749e-07, + "loss": 0.2655, + "step": 52285 + }, + { + "epoch": 0.9034766380978712, + "grad_norm": 1.0594469767970385, + "learning_rate": 4.846902189840785e-07, + "loss": 0.406, + "step": 52286 + }, + { + "epoch": 0.9034939176112801, + "grad_norm": 1.387245798333706, + "learning_rate": 4.845181120382559e-07, + "loss": 0.5354, + "step": 52287 + }, + { + "epoch": 0.903511197124689, + "grad_norm": 1.2669358552747956, + "learning_rate": 4.843460348956474e-07, + "loss": 0.2705, + "step": 52288 + }, + { + "epoch": 0.9035284766380979, + "grad_norm": 1.2059889770669066, + "learning_rate": 4.841739875567886e-07, + "loss": 0.4371, + "step": 52289 + }, + { + "epoch": 0.9035457561515068, + "grad_norm": 1.3111717873954774, + "learning_rate": 4.84001970022222e-07, + "loss": 0.3427, + "step": 52290 + }, + { + "epoch": 0.9035630356649157, + "grad_norm": 0.9715136602607304, + "learning_rate": 4.838299822924841e-07, + "loss": 0.2605, + "step": 52291 + }, + { + "epoch": 0.9035803151783246, + "grad_norm": 1.1866382148784806, + "learning_rate": 4.83658024368111e-07, + "loss": 0.2927, + "step": 52292 + }, + { + "epoch": 0.9035975946917335, + "grad_norm": 0.9278560098406492, + "learning_rate": 4.834860962496468e-07, + "loss": 0.364, + "step": 52293 + }, + { + "epoch": 0.9036148742051424, + "grad_norm": 0.5275430853516544, + "learning_rate": 4.833141979376265e-07, + "loss": 0.603, + "step": 52294 + }, + { + "epoch": 0.9036321537185513, + "grad_norm": 0.8712212307079908, + "learning_rate": 4.831423294325899e-07, + "loss": 0.3061, + "step": 52295 + }, + { + "epoch": 0.9036494332319602, + "grad_norm": 2.0734795405534063, + "learning_rate": 4.829704907350741e-07, + "loss": 0.3654, + "step": 52296 + }, + { + "epoch": 0.9036667127453691, + "grad_norm": 1.2337982450480283, + "learning_rate": 4.827986818456198e-07, + "loss": 0.5838, + "step": 52297 + }, + { + "epoch": 0.903683992258778, + "grad_norm": 1.2402990468142119, + "learning_rate": 4.826269027647601e-07, + "loss": 0.6149, + "step": 52298 + }, + { + "epoch": 0.9037012717721868, + "grad_norm": 1.4542722630894953, + "learning_rate": 4.824551534930388e-07, + "loss": 0.361, + "step": 52299 + }, + { + "epoch": 0.9037185512855958, + "grad_norm": 0.9079635760894678, + "learning_rate": 4.82283434030989e-07, + "loss": 0.3268, + "step": 52300 + }, + { + "epoch": 0.9037358307990047, + "grad_norm": 1.5264962392181758, + "learning_rate": 4.821117443791534e-07, + "loss": 0.4204, + "step": 52301 + }, + { + "epoch": 0.9037531103124136, + "grad_norm": 1.4146879544404491, + "learning_rate": 4.819400845380673e-07, + "loss": 0.4399, + "step": 52302 + }, + { + "epoch": 0.9037703898258225, + "grad_norm": 1.6171511157828862, + "learning_rate": 4.817684545082657e-07, + "loss": 0.4462, + "step": 52303 + }, + { + "epoch": 0.9037876693392314, + "grad_norm": 1.39909366159192, + "learning_rate": 4.815968542902893e-07, + "loss": 0.2978, + "step": 52304 + }, + { + "epoch": 0.9038049488526403, + "grad_norm": 2.4223672834201038, + "learning_rate": 4.814252838846756e-07, + "loss": 0.2773, + "step": 52305 + }, + { + "epoch": 0.9038222283660492, + "grad_norm": 1.671610878758615, + "learning_rate": 4.812537432919617e-07, + "loss": 0.4035, + "step": 52306 + }, + { + "epoch": 0.9038395078794581, + "grad_norm": 1.8166856748979159, + "learning_rate": 4.81082232512684e-07, + "loss": 0.163, + "step": 52307 + }, + { + "epoch": 0.903856787392867, + "grad_norm": 0.8046917039914177, + "learning_rate": 4.80910751547381e-07, + "loss": 0.2578, + "step": 52308 + }, + { + "epoch": 0.9038740669062759, + "grad_norm": 1.2998693264927512, + "learning_rate": 4.807393003965877e-07, + "loss": 0.1763, + "step": 52309 + }, + { + "epoch": 0.9038913464196848, + "grad_norm": 1.086912921301837, + "learning_rate": 4.805678790608415e-07, + "loss": 0.5562, + "step": 52310 + }, + { + "epoch": 0.9039086259330937, + "grad_norm": 1.1494001069778403, + "learning_rate": 4.803964875406808e-07, + "loss": 0.2717, + "step": 52311 + }, + { + "epoch": 0.9039259054465026, + "grad_norm": 1.1997174723176485, + "learning_rate": 4.802251258366431e-07, + "loss": 0.3992, + "step": 52312 + }, + { + "epoch": 0.9039431849599115, + "grad_norm": 1.5283728822444707, + "learning_rate": 4.800537939492645e-07, + "loss": 0.5636, + "step": 52313 + }, + { + "epoch": 0.9039604644733205, + "grad_norm": 0.9630981848294293, + "learning_rate": 4.79882491879079e-07, + "loss": 0.3869, + "step": 52314 + }, + { + "epoch": 0.9039777439867294, + "grad_norm": 0.9530201824261043, + "learning_rate": 4.797112196266251e-07, + "loss": 0.3397, + "step": 52315 + }, + { + "epoch": 0.9039950235001383, + "grad_norm": 1.3743324917524187, + "learning_rate": 4.79539977192438e-07, + "loss": 0.221, + "step": 52316 + }, + { + "epoch": 0.9040123030135472, + "grad_norm": 1.5374933877157646, + "learning_rate": 4.793687645770583e-07, + "loss": 0.4654, + "step": 52317 + }, + { + "epoch": 0.9040295825269561, + "grad_norm": 1.4190146680064435, + "learning_rate": 4.791975817810168e-07, + "loss": 0.4879, + "step": 52318 + }, + { + "epoch": 0.904046862040365, + "grad_norm": 1.1756557503935114, + "learning_rate": 4.79026428804854e-07, + "loss": 0.2686, + "step": 52319 + }, + { + "epoch": 0.9040641415537738, + "grad_norm": 1.560108151646455, + "learning_rate": 4.788553056491018e-07, + "loss": 0.3705, + "step": 52320 + }, + { + "epoch": 0.9040814210671827, + "grad_norm": 1.6931730677851615, + "learning_rate": 4.786842123142998e-07, + "loss": 0.2796, + "step": 52321 + }, + { + "epoch": 0.9040987005805916, + "grad_norm": 1.534278520510043, + "learning_rate": 4.78513148800982e-07, + "loss": 0.3667, + "step": 52322 + }, + { + "epoch": 0.9041159800940005, + "grad_norm": 1.0764638109173903, + "learning_rate": 4.783421151096845e-07, + "loss": 0.2191, + "step": 52323 + }, + { + "epoch": 0.9041332596074094, + "grad_norm": 1.3150762188503193, + "learning_rate": 4.781711112409448e-07, + "loss": 0.381, + "step": 52324 + }, + { + "epoch": 0.9041505391208183, + "grad_norm": 1.069659950626768, + "learning_rate": 4.780001371952947e-07, + "loss": 0.3424, + "step": 52325 + }, + { + "epoch": 0.9041678186342272, + "grad_norm": 1.0745148409421765, + "learning_rate": 4.778291929732715e-07, + "loss": 0.3919, + "step": 52326 + }, + { + "epoch": 0.9041850981476361, + "grad_norm": 1.4437168942222323, + "learning_rate": 4.776582785754113e-07, + "loss": 0.5461, + "step": 52327 + }, + { + "epoch": 0.904202377661045, + "grad_norm": 1.5894190725528787, + "learning_rate": 4.774873940022507e-07, + "loss": 0.2075, + "step": 52328 + }, + { + "epoch": 0.904219657174454, + "grad_norm": 1.4264272914340428, + "learning_rate": 4.77316539254321e-07, + "loss": 0.2656, + "step": 52329 + }, + { + "epoch": 0.9042369366878629, + "grad_norm": 1.1014425176073126, + "learning_rate": 4.771457143321622e-07, + "loss": 0.2651, + "step": 52330 + }, + { + "epoch": 0.9042542162012718, + "grad_norm": 1.530188031641028, + "learning_rate": 4.76974919236305e-07, + "loss": 0.2458, + "step": 52331 + }, + { + "epoch": 0.9042714957146807, + "grad_norm": 1.6556757962210369, + "learning_rate": 4.7680415396728517e-07, + "loss": 0.4302, + "step": 52332 + }, + { + "epoch": 0.9042887752280896, + "grad_norm": 1.1578618549711606, + "learning_rate": 4.766334185256405e-07, + "loss": 0.4213, + "step": 52333 + }, + { + "epoch": 0.9043060547414985, + "grad_norm": 1.2672950330221213, + "learning_rate": 4.764627129119026e-07, + "loss": 0.2451, + "step": 52334 + }, + { + "epoch": 0.9043233342549074, + "grad_norm": 1.214226934358301, + "learning_rate": 4.762920371266089e-07, + "loss": 0.2513, + "step": 52335 + }, + { + "epoch": 0.9043406137683163, + "grad_norm": 0.939629396313794, + "learning_rate": 4.7612139117029e-07, + "loss": 0.1997, + "step": 52336 + }, + { + "epoch": 0.9043578932817252, + "grad_norm": 1.246290937407603, + "learning_rate": 4.759507750434844e-07, + "loss": 0.2987, + "step": 52337 + }, + { + "epoch": 0.9043751727951341, + "grad_norm": 1.0975579477033814, + "learning_rate": 4.7578018874672284e-07, + "loss": 0.3043, + "step": 52338 + }, + { + "epoch": 0.904392452308543, + "grad_norm": 0.9028133074075978, + "learning_rate": 4.756096322805426e-07, + "loss": 0.3452, + "step": 52339 + }, + { + "epoch": 0.904409731821952, + "grad_norm": 1.2692945718573003, + "learning_rate": 4.754391056454766e-07, + "loss": 0.4271, + "step": 52340 + }, + { + "epoch": 0.9044270113353607, + "grad_norm": 1.4797396048087392, + "learning_rate": 4.7526860884206105e-07, + "loss": 0.2582, + "step": 52341 + }, + { + "epoch": 0.9044442908487696, + "grad_norm": 1.6914369810385474, + "learning_rate": 4.750981418708267e-07, + "loss": 0.314, + "step": 52342 + }, + { + "epoch": 0.9044615703621786, + "grad_norm": 1.0763018414266945, + "learning_rate": 4.7492770473230865e-07, + "loss": 0.4858, + "step": 52343 + }, + { + "epoch": 0.9044788498755875, + "grad_norm": 0.8970772094756582, + "learning_rate": 4.747572974270398e-07, + "loss": 0.3367, + "step": 52344 + }, + { + "epoch": 0.9044961293889964, + "grad_norm": 1.3144484686517193, + "learning_rate": 4.745869199555564e-07, + "loss": 0.4024, + "step": 52345 + }, + { + "epoch": 0.9045134089024053, + "grad_norm": 1.6721176830151132, + "learning_rate": 4.744165723183902e-07, + "loss": 0.2082, + "step": 52346 + }, + { + "epoch": 0.9045306884158142, + "grad_norm": 1.8715161708175314, + "learning_rate": 4.7424625451607533e-07, + "loss": 0.2499, + "step": 52347 + }, + { + "epoch": 0.9045479679292231, + "grad_norm": 0.8379123623233893, + "learning_rate": 4.7407596654914565e-07, + "loss": 0.2451, + "step": 52348 + }, + { + "epoch": 0.904565247442632, + "grad_norm": 0.5140542196715367, + "learning_rate": 4.739057084181331e-07, + "loss": 0.5321, + "step": 52349 + }, + { + "epoch": 0.9045825269560409, + "grad_norm": 1.141207470297365, + "learning_rate": 4.7373548012357165e-07, + "loss": 0.2376, + "step": 52350 + }, + { + "epoch": 0.9045998064694498, + "grad_norm": 2.2053816745699866, + "learning_rate": 4.735652816659952e-07, + "loss": 0.3527, + "step": 52351 + }, + { + "epoch": 0.9046170859828587, + "grad_norm": 2.881516339063226, + "learning_rate": 4.7339511304593687e-07, + "loss": 0.3311, + "step": 52352 + }, + { + "epoch": 0.9046343654962676, + "grad_norm": 1.276023156152482, + "learning_rate": 4.732249742639294e-07, + "loss": 0.3494, + "step": 52353 + }, + { + "epoch": 0.9046516450096765, + "grad_norm": 0.9259118624109626, + "learning_rate": 4.730548653205036e-07, + "loss": 0.2942, + "step": 52354 + }, + { + "epoch": 0.9046689245230854, + "grad_norm": 1.327551737694296, + "learning_rate": 4.728847862161945e-07, + "loss": 0.4883, + "step": 52355 + }, + { + "epoch": 0.9046862040364944, + "grad_norm": 1.4806085099678576, + "learning_rate": 4.727147369515339e-07, + "loss": 0.4064, + "step": 52356 + }, + { + "epoch": 0.9047034835499033, + "grad_norm": 1.2854282720752295, + "learning_rate": 4.72544717527057e-07, + "loss": 0.3253, + "step": 52357 + }, + { + "epoch": 0.9047207630633122, + "grad_norm": 1.272349350917288, + "learning_rate": 4.723747279432911e-07, + "loss": 0.4766, + "step": 52358 + }, + { + "epoch": 0.9047380425767211, + "grad_norm": 2.0636267993349278, + "learning_rate": 4.7220476820077465e-07, + "loss": 0.2343, + "step": 52359 + }, + { + "epoch": 0.90475532209013, + "grad_norm": 1.3780380513312742, + "learning_rate": 4.7203483830003506e-07, + "loss": 0.3352, + "step": 52360 + }, + { + "epoch": 0.9047726016035389, + "grad_norm": 1.2149536251900883, + "learning_rate": 4.7186493824160626e-07, + "loss": 0.3474, + "step": 52361 + }, + { + "epoch": 0.9047898811169477, + "grad_norm": 1.6796318847624074, + "learning_rate": 4.716950680260202e-07, + "loss": 0.2486, + "step": 52362 + }, + { + "epoch": 0.9048071606303566, + "grad_norm": 1.6212909600440526, + "learning_rate": 4.715252276538107e-07, + "loss": 0.1953, + "step": 52363 + }, + { + "epoch": 0.9048244401437655, + "grad_norm": 1.4998003798055093, + "learning_rate": 4.713554171255086e-07, + "loss": 0.1948, + "step": 52364 + }, + { + "epoch": 0.9048417196571744, + "grad_norm": 0.9263020829391283, + "learning_rate": 4.711856364416445e-07, + "loss": 0.194, + "step": 52365 + }, + { + "epoch": 0.9048589991705833, + "grad_norm": 1.0817832818555067, + "learning_rate": 4.7101588560275134e-07, + "loss": 0.3158, + "step": 52366 + }, + { + "epoch": 0.9048762786839922, + "grad_norm": 1.3884105223737406, + "learning_rate": 4.708461646093598e-07, + "loss": 0.2558, + "step": 52367 + }, + { + "epoch": 0.9048935581974011, + "grad_norm": 1.2130133540993877, + "learning_rate": 4.7067647346200395e-07, + "loss": 0.2228, + "step": 52368 + }, + { + "epoch": 0.90491083771081, + "grad_norm": 2.125346605038819, + "learning_rate": 4.705068121612122e-07, + "loss": 0.3738, + "step": 52369 + }, + { + "epoch": 0.904928117224219, + "grad_norm": 1.324242248226736, + "learning_rate": 4.703371807075185e-07, + "loss": 0.32, + "step": 52370 + }, + { + "epoch": 0.9049453967376279, + "grad_norm": 1.526763475570272, + "learning_rate": 4.701675791014526e-07, + "loss": 0.4336, + "step": 52371 + }, + { + "epoch": 0.9049626762510368, + "grad_norm": 1.1114899798466642, + "learning_rate": 4.6999800734354505e-07, + "loss": 0.3616, + "step": 52372 + }, + { + "epoch": 0.9049799557644457, + "grad_norm": 1.0906783788707144, + "learning_rate": 4.698284654343299e-07, + "loss": 0.2305, + "step": 52373 + }, + { + "epoch": 0.9049972352778546, + "grad_norm": 1.8013086099186078, + "learning_rate": 4.696589533743345e-07, + "loss": 0.4945, + "step": 52374 + }, + { + "epoch": 0.9050145147912635, + "grad_norm": 1.4909651015018646, + "learning_rate": 4.6948947116409293e-07, + "loss": 0.4765, + "step": 52375 + }, + { + "epoch": 0.9050317943046724, + "grad_norm": 1.339951588172736, + "learning_rate": 4.6932001880413357e-07, + "loss": 0.24, + "step": 52376 + }, + { + "epoch": 0.9050490738180813, + "grad_norm": 1.3068440087428934, + "learning_rate": 4.6915059629498937e-07, + "loss": 0.2348, + "step": 52377 + }, + { + "epoch": 0.9050663533314902, + "grad_norm": 1.343282220609667, + "learning_rate": 4.689812036371866e-07, + "loss": 0.4639, + "step": 52378 + }, + { + "epoch": 0.9050836328448991, + "grad_norm": 0.9211411889068466, + "learning_rate": 4.688118408312625e-07, + "loss": 0.234, + "step": 52379 + }, + { + "epoch": 0.905100912358308, + "grad_norm": 1.1594807211934566, + "learning_rate": 4.6864250787774237e-07, + "loss": 0.2339, + "step": 52380 + }, + { + "epoch": 0.9051181918717169, + "grad_norm": 1.0777631366267377, + "learning_rate": 4.684732047771601e-07, + "loss": 0.2518, + "step": 52381 + }, + { + "epoch": 0.9051354713851258, + "grad_norm": 1.8712506383531593, + "learning_rate": 4.6830393153004193e-07, + "loss": 0.4879, + "step": 52382 + }, + { + "epoch": 0.9051527508985348, + "grad_norm": 1.0777447607238184, + "learning_rate": 4.68134688136922e-07, + "loss": 0.4427, + "step": 52383 + }, + { + "epoch": 0.9051700304119435, + "grad_norm": 1.4343685405304254, + "learning_rate": 4.679654745983275e-07, + "loss": 0.3, + "step": 52384 + }, + { + "epoch": 0.9051873099253525, + "grad_norm": 1.9512112478157715, + "learning_rate": 4.6779629091478927e-07, + "loss": 0.2799, + "step": 52385 + }, + { + "epoch": 0.9052045894387614, + "grad_norm": 1.5844181692595505, + "learning_rate": 4.6762713708683904e-07, + "loss": 0.3243, + "step": 52386 + }, + { + "epoch": 0.9052218689521703, + "grad_norm": 1.1517009035142438, + "learning_rate": 4.6745801311500306e-07, + "loss": 0.5141, + "step": 52387 + }, + { + "epoch": 0.9052391484655792, + "grad_norm": 0.9930786720397831, + "learning_rate": 4.6728891899981534e-07, + "loss": 0.2563, + "step": 52388 + }, + { + "epoch": 0.9052564279789881, + "grad_norm": 0.9660100200433589, + "learning_rate": 4.67119854741801e-07, + "loss": 0.353, + "step": 52389 + }, + { + "epoch": 0.905273707492397, + "grad_norm": 1.6073946030269062, + "learning_rate": 4.6695082034149186e-07, + "loss": 0.5015, + "step": 52390 + }, + { + "epoch": 0.9052909870058059, + "grad_norm": 1.4860860680247776, + "learning_rate": 4.667818157994175e-07, + "loss": 0.4816, + "step": 52391 + }, + { + "epoch": 0.9053082665192148, + "grad_norm": 1.2062466155715135, + "learning_rate": 4.666128411161086e-07, + "loss": 0.4156, + "step": 52392 + }, + { + "epoch": 0.9053255460326237, + "grad_norm": 1.7536851728600114, + "learning_rate": 4.664438962920914e-07, + "loss": 0.2725, + "step": 52393 + }, + { + "epoch": 0.9053428255460326, + "grad_norm": 1.1693477164082586, + "learning_rate": 4.662749813278977e-07, + "loss": 0.5128, + "step": 52394 + }, + { + "epoch": 0.9053601050594415, + "grad_norm": 0.6254118214681008, + "learning_rate": 4.661060962240538e-07, + "loss": 0.5739, + "step": 52395 + }, + { + "epoch": 0.9053773845728504, + "grad_norm": 1.887947095750712, + "learning_rate": 4.6593724098109025e-07, + "loss": 0.2959, + "step": 52396 + }, + { + "epoch": 0.9053946640862593, + "grad_norm": 2.2629439228913033, + "learning_rate": 4.657684155995368e-07, + "loss": 0.4588, + "step": 52397 + }, + { + "epoch": 0.9054119435996683, + "grad_norm": 1.0349760944560895, + "learning_rate": 4.6559962007991956e-07, + "loss": 0.428, + "step": 52398 + }, + { + "epoch": 0.9054292231130772, + "grad_norm": 1.8762716121844685, + "learning_rate": 4.654308544227715e-07, + "loss": 0.5037, + "step": 52399 + }, + { + "epoch": 0.9054465026264861, + "grad_norm": 1.5599116466423515, + "learning_rate": 4.652621186286166e-07, + "loss": 0.3608, + "step": 52400 + }, + { + "epoch": 0.905463782139895, + "grad_norm": 1.0164508576093576, + "learning_rate": 4.6509341269798557e-07, + "loss": 0.1453, + "step": 52401 + }, + { + "epoch": 0.9054810616533039, + "grad_norm": 0.7958774033186368, + "learning_rate": 4.649247366314058e-07, + "loss": 0.4523, + "step": 52402 + }, + { + "epoch": 0.9054983411667128, + "grad_norm": 1.9110472280008943, + "learning_rate": 4.6475609042940906e-07, + "loss": 0.3472, + "step": 52403 + }, + { + "epoch": 0.9055156206801217, + "grad_norm": 1.7538541817503353, + "learning_rate": 4.6458747409251935e-07, + "loss": 0.4379, + "step": 52404 + }, + { + "epoch": 0.9055329001935305, + "grad_norm": 1.346769112024621, + "learning_rate": 4.644188876212663e-07, + "loss": 0.6138, + "step": 52405 + }, + { + "epoch": 0.9055501797069394, + "grad_norm": 1.2875886933809466, + "learning_rate": 4.6425033101617725e-07, + "loss": 0.517, + "step": 52406 + }, + { + "epoch": 0.9055674592203483, + "grad_norm": 1.0944362512530093, + "learning_rate": 4.640818042777817e-07, + "loss": 0.3494, + "step": 52407 + }, + { + "epoch": 0.9055847387337572, + "grad_norm": 1.5321854545145435, + "learning_rate": 4.639133074066071e-07, + "loss": 0.2588, + "step": 52408 + }, + { + "epoch": 0.9056020182471661, + "grad_norm": 1.7397714858493853, + "learning_rate": 4.6374484040317967e-07, + "loss": 0.4735, + "step": 52409 + }, + { + "epoch": 0.905619297760575, + "grad_norm": 2.2513426699273285, + "learning_rate": 4.63576403268029e-07, + "loss": 0.2534, + "step": 52410 + }, + { + "epoch": 0.905636577273984, + "grad_norm": 2.0694276058241594, + "learning_rate": 4.634079960016813e-07, + "loss": 0.2865, + "step": 52411 + }, + { + "epoch": 0.9056538567873929, + "grad_norm": 1.5134725124720882, + "learning_rate": 4.6323961860466395e-07, + "loss": 0.4741, + "step": 52412 + }, + { + "epoch": 0.9056711363008018, + "grad_norm": 0.9443807759772849, + "learning_rate": 4.630712710775054e-07, + "loss": 0.2919, + "step": 52413 + }, + { + "epoch": 0.9056884158142107, + "grad_norm": 1.0412529371504708, + "learning_rate": 4.62902953420733e-07, + "loss": 0.3583, + "step": 52414 + }, + { + "epoch": 0.9057056953276196, + "grad_norm": 1.156840301468562, + "learning_rate": 4.627346656348741e-07, + "loss": 0.2598, + "step": 52415 + }, + { + "epoch": 0.9057229748410285, + "grad_norm": 1.0037382001043498, + "learning_rate": 4.625664077204539e-07, + "loss": 0.2631, + "step": 52416 + }, + { + "epoch": 0.9057402543544374, + "grad_norm": 1.4662174051198713, + "learning_rate": 4.623981796780008e-07, + "loss": 0.4251, + "step": 52417 + }, + { + "epoch": 0.9057575338678463, + "grad_norm": 0.8490883911269406, + "learning_rate": 4.622299815080389e-07, + "loss": 0.2818, + "step": 52418 + }, + { + "epoch": 0.9057748133812552, + "grad_norm": 2.2744645426336363, + "learning_rate": 4.62061813211101e-07, + "loss": 0.46, + "step": 52419 + }, + { + "epoch": 0.9057920928946641, + "grad_norm": 1.5741785459733377, + "learning_rate": 4.61893674787709e-07, + "loss": 0.3113, + "step": 52420 + }, + { + "epoch": 0.905809372408073, + "grad_norm": 1.3450576365237523, + "learning_rate": 4.617255662383913e-07, + "loss": 0.3603, + "step": 52421 + }, + { + "epoch": 0.9058266519214819, + "grad_norm": 0.835701511035401, + "learning_rate": 4.61557487563673e-07, + "loss": 0.4633, + "step": 52422 + }, + { + "epoch": 0.9058439314348908, + "grad_norm": 1.5800267389731586, + "learning_rate": 4.613894387640838e-07, + "loss": 0.2041, + "step": 52423 + }, + { + "epoch": 0.9058612109482997, + "grad_norm": 1.0379622230260301, + "learning_rate": 4.6122141984014547e-07, + "loss": 0.4379, + "step": 52424 + }, + { + "epoch": 0.9058784904617087, + "grad_norm": 0.9733821510129498, + "learning_rate": 4.610534307923864e-07, + "loss": 0.2891, + "step": 52425 + }, + { + "epoch": 0.9058957699751174, + "grad_norm": 0.5774920521553428, + "learning_rate": 4.608854716213351e-07, + "loss": 0.6693, + "step": 52426 + }, + { + "epoch": 0.9059130494885264, + "grad_norm": 0.9789741268953003, + "learning_rate": 4.6071754232751343e-07, + "loss": 0.3779, + "step": 52427 + }, + { + "epoch": 0.9059303290019353, + "grad_norm": 1.9323028331381313, + "learning_rate": 4.6054964291145086e-07, + "loss": 0.3422, + "step": 52428 + }, + { + "epoch": 0.9059476085153442, + "grad_norm": 1.1271453744386222, + "learning_rate": 4.603817733736715e-07, + "loss": 0.3322, + "step": 52429 + }, + { + "epoch": 0.9059648880287531, + "grad_norm": 0.879060060726073, + "learning_rate": 4.6021393371470045e-07, + "loss": 0.1764, + "step": 52430 + }, + { + "epoch": 0.905982167542162, + "grad_norm": 1.7801035329429686, + "learning_rate": 4.60046123935064e-07, + "loss": 0.3887, + "step": 52431 + }, + { + "epoch": 0.9059994470555709, + "grad_norm": 1.1231231312960097, + "learning_rate": 4.598783440352894e-07, + "loss": 0.437, + "step": 52432 + }, + { + "epoch": 0.9060167265689798, + "grad_norm": 1.3667916992557123, + "learning_rate": 4.597105940159008e-07, + "loss": 0.291, + "step": 52433 + }, + { + "epoch": 0.9060340060823887, + "grad_norm": 1.1389083989117397, + "learning_rate": 4.5954287387742326e-07, + "loss": 0.2837, + "step": 52434 + }, + { + "epoch": 0.9060512855957976, + "grad_norm": 0.6480573858689554, + "learning_rate": 4.5937518362038194e-07, + "loss": 0.5645, + "step": 52435 + }, + { + "epoch": 0.9060685651092065, + "grad_norm": 0.6693164088055064, + "learning_rate": 4.59207523245303e-07, + "loss": 0.6331, + "step": 52436 + }, + { + "epoch": 0.9060858446226154, + "grad_norm": 1.2049600675594714, + "learning_rate": 4.5903989275271286e-07, + "loss": 0.1937, + "step": 52437 + }, + { + "epoch": 0.9061031241360243, + "grad_norm": 1.0925885876573687, + "learning_rate": 4.5887229214313214e-07, + "loss": 0.2484, + "step": 52438 + }, + { + "epoch": 0.9061204036494332, + "grad_norm": 2.199434118568122, + "learning_rate": 4.587047214170903e-07, + "loss": 0.2646, + "step": 52439 + }, + { + "epoch": 0.9061376831628422, + "grad_norm": 1.4697718965551856, + "learning_rate": 4.5853718057510934e-07, + "loss": 0.4466, + "step": 52440 + }, + { + "epoch": 0.9061549626762511, + "grad_norm": 1.0537302101379848, + "learning_rate": 4.5836966961771536e-07, + "loss": 0.3426, + "step": 52441 + }, + { + "epoch": 0.90617224218966, + "grad_norm": 1.2252515455959372, + "learning_rate": 4.582021885454324e-07, + "loss": 0.4041, + "step": 52442 + }, + { + "epoch": 0.9061895217030689, + "grad_norm": 1.0577375613107287, + "learning_rate": 4.580347373587868e-07, + "loss": 0.3027, + "step": 52443 + }, + { + "epoch": 0.9062068012164778, + "grad_norm": 1.9439310615044714, + "learning_rate": 4.5786731605830023e-07, + "loss": 0.5448, + "step": 52444 + }, + { + "epoch": 0.9062240807298867, + "grad_norm": 1.0846732694327106, + "learning_rate": 4.576999246444991e-07, + "loss": 0.3672, + "step": 52445 + }, + { + "epoch": 0.9062413602432956, + "grad_norm": 1.51763775756706, + "learning_rate": 4.5753256311790617e-07, + "loss": 0.4342, + "step": 52446 + }, + { + "epoch": 0.9062586397567044, + "grad_norm": 2.091348651656188, + "learning_rate": 4.5736523147904667e-07, + "loss": 0.4916, + "step": 52447 + }, + { + "epoch": 0.9062759192701133, + "grad_norm": 1.521736549788887, + "learning_rate": 4.571979297284457e-07, + "loss": 0.3851, + "step": 52448 + }, + { + "epoch": 0.9062931987835222, + "grad_norm": 0.874945222103444, + "learning_rate": 4.5703065786662504e-07, + "loss": 0.311, + "step": 52449 + }, + { + "epoch": 0.9063104782969311, + "grad_norm": 1.161799624008498, + "learning_rate": 4.5686341589410985e-07, + "loss": 0.4565, + "step": 52450 + }, + { + "epoch": 0.90632775781034, + "grad_norm": 0.646723665219048, + "learning_rate": 4.566962038114231e-07, + "loss": 0.6024, + "step": 52451 + }, + { + "epoch": 0.9063450373237489, + "grad_norm": 1.559727042950925, + "learning_rate": 4.565290216190887e-07, + "loss": 0.4375, + "step": 52452 + }, + { + "epoch": 0.9063623168371578, + "grad_norm": 1.089574466591493, + "learning_rate": 4.5636186931763084e-07, + "loss": 0.3343, + "step": 52453 + }, + { + "epoch": 0.9063795963505668, + "grad_norm": 0.6700480294960075, + "learning_rate": 4.561947469075745e-07, + "loss": 0.8115, + "step": 52454 + }, + { + "epoch": 0.9063968758639757, + "grad_norm": 1.0685892584919012, + "learning_rate": 4.560276543894404e-07, + "loss": 0.3473, + "step": 52455 + }, + { + "epoch": 0.9064141553773846, + "grad_norm": 1.8224176984569849, + "learning_rate": 4.5586059176375376e-07, + "loss": 0.3876, + "step": 52456 + }, + { + "epoch": 0.9064314348907935, + "grad_norm": 1.636495393514538, + "learning_rate": 4.556935590310374e-07, + "loss": 0.4787, + "step": 52457 + }, + { + "epoch": 0.9064487144042024, + "grad_norm": 1.1519083502974168, + "learning_rate": 4.55526556191811e-07, + "loss": 0.298, + "step": 52458 + }, + { + "epoch": 0.9064659939176113, + "grad_norm": 1.3390928913689988, + "learning_rate": 4.55359583246604e-07, + "loss": 0.3585, + "step": 52459 + }, + { + "epoch": 0.9064832734310202, + "grad_norm": 1.9263815276236258, + "learning_rate": 4.55192640195935e-07, + "loss": 0.4235, + "step": 52460 + }, + { + "epoch": 0.9065005529444291, + "grad_norm": 1.1410922725730988, + "learning_rate": 4.550257270403291e-07, + "loss": 0.2888, + "step": 52461 + }, + { + "epoch": 0.906517832457838, + "grad_norm": 1.2377777414981743, + "learning_rate": 4.548588437803059e-07, + "loss": 0.3275, + "step": 52462 + }, + { + "epoch": 0.9065351119712469, + "grad_norm": 1.1478525023580082, + "learning_rate": 4.546919904163927e-07, + "loss": 0.3242, + "step": 52463 + }, + { + "epoch": 0.9065523914846558, + "grad_norm": 1.4251548989573888, + "learning_rate": 4.5452516694910796e-07, + "loss": 0.4547, + "step": 52464 + }, + { + "epoch": 0.9065696709980647, + "grad_norm": 1.2845164525727168, + "learning_rate": 4.543583733789758e-07, + "loss": 0.2438, + "step": 52465 + }, + { + "epoch": 0.9065869505114736, + "grad_norm": 2.280517477316855, + "learning_rate": 4.541916097065191e-07, + "loss": 0.2649, + "step": 52466 + }, + { + "epoch": 0.9066042300248826, + "grad_norm": 0.8480245452537251, + "learning_rate": 4.5402487593225964e-07, + "loss": 0.2805, + "step": 52467 + }, + { + "epoch": 0.9066215095382913, + "grad_norm": 1.428912797359668, + "learning_rate": 4.5385817205672035e-07, + "loss": 0.2074, + "step": 52468 + }, + { + "epoch": 0.9066387890517003, + "grad_norm": 1.232068602911845, + "learning_rate": 4.536914980804219e-07, + "loss": 0.216, + "step": 52469 + }, + { + "epoch": 0.9066560685651092, + "grad_norm": 1.9184557557761515, + "learning_rate": 4.535248540038872e-07, + "loss": 0.1885, + "step": 52470 + }, + { + "epoch": 0.9066733480785181, + "grad_norm": 0.9073523526972009, + "learning_rate": 4.5335823982763705e-07, + "loss": 0.3825, + "step": 52471 + }, + { + "epoch": 0.906690627591927, + "grad_norm": 1.6514874658097518, + "learning_rate": 4.531916555521976e-07, + "loss": 0.452, + "step": 52472 + }, + { + "epoch": 0.9067079071053359, + "grad_norm": 1.459575432152995, + "learning_rate": 4.5302510117808507e-07, + "loss": 0.3669, + "step": 52473 + }, + { + "epoch": 0.9067251866187448, + "grad_norm": 1.3985423478104733, + "learning_rate": 4.528585767058247e-07, + "loss": 0.4554, + "step": 52474 + }, + { + "epoch": 0.9067424661321537, + "grad_norm": 1.287033561281678, + "learning_rate": 4.5269208213593486e-07, + "loss": 0.297, + "step": 52475 + }, + { + "epoch": 0.9067597456455626, + "grad_norm": 1.3910556399381457, + "learning_rate": 4.5252561746893965e-07, + "loss": 0.4293, + "step": 52476 + }, + { + "epoch": 0.9067770251589715, + "grad_norm": 0.6591580420186653, + "learning_rate": 4.5235918270536193e-07, + "loss": 0.879, + "step": 52477 + }, + { + "epoch": 0.9067943046723804, + "grad_norm": 1.2898965821954689, + "learning_rate": 4.52192777845718e-07, + "loss": 0.3499, + "step": 52478 + }, + { + "epoch": 0.9068115841857893, + "grad_norm": 0.921341612688337, + "learning_rate": 4.520264028905341e-07, + "loss": 0.2347, + "step": 52479 + }, + { + "epoch": 0.9068288636991982, + "grad_norm": 1.0445982666719076, + "learning_rate": 4.518600578403276e-07, + "loss": 0.2732, + "step": 52480 + }, + { + "epoch": 0.9068461432126071, + "grad_norm": 1.0988838430855963, + "learning_rate": 4.5169374269562137e-07, + "loss": 0.2093, + "step": 52481 + }, + { + "epoch": 0.906863422726016, + "grad_norm": 1.405016368195953, + "learning_rate": 4.5152745745693617e-07, + "loss": 0.2603, + "step": 52482 + }, + { + "epoch": 0.906880702239425, + "grad_norm": 0.932806715609883, + "learning_rate": 4.5136120212479263e-07, + "loss": 0.9436, + "step": 52483 + }, + { + "epoch": 0.9068979817528339, + "grad_norm": 1.2193197410679661, + "learning_rate": 4.511949766997115e-07, + "loss": 0.2249, + "step": 52484 + }, + { + "epoch": 0.9069152612662428, + "grad_norm": 1.090600081049765, + "learning_rate": 4.510287811822145e-07, + "loss": 0.2508, + "step": 52485 + }, + { + "epoch": 0.9069325407796517, + "grad_norm": 1.6439873381980703, + "learning_rate": 4.5086261557281907e-07, + "loss": 0.3022, + "step": 52486 + }, + { + "epoch": 0.9069498202930606, + "grad_norm": 1.3465436280552423, + "learning_rate": 4.506964798720481e-07, + "loss": 0.3897, + "step": 52487 + }, + { + "epoch": 0.9069670998064695, + "grad_norm": 2.238406284864563, + "learning_rate": 4.5053037408042343e-07, + "loss": 0.3884, + "step": 52488 + }, + { + "epoch": 0.9069843793198783, + "grad_norm": 1.8015753015351272, + "learning_rate": 4.503642981984613e-07, + "loss": 0.2253, + "step": 52489 + }, + { + "epoch": 0.9070016588332872, + "grad_norm": 1.6679523389056177, + "learning_rate": 4.5019825222668455e-07, + "loss": 0.2732, + "step": 52490 + }, + { + "epoch": 0.9070189383466961, + "grad_norm": 1.213505910061818, + "learning_rate": 4.5003223616561286e-07, + "loss": 0.6065, + "step": 52491 + }, + { + "epoch": 0.907036217860105, + "grad_norm": 0.8877729711758319, + "learning_rate": 4.4986625001576465e-07, + "loss": 0.3754, + "step": 52492 + }, + { + "epoch": 0.9070534973735139, + "grad_norm": 1.4035907858133612, + "learning_rate": 4.4970029377766176e-07, + "loss": 0.5582, + "step": 52493 + }, + { + "epoch": 0.9070707768869228, + "grad_norm": 0.8842447321728464, + "learning_rate": 4.495343674518249e-07, + "loss": 0.448, + "step": 52494 + }, + { + "epoch": 0.9070880564003317, + "grad_norm": 1.8835479988996453, + "learning_rate": 4.4936847103877025e-07, + "loss": 0.2616, + "step": 52495 + }, + { + "epoch": 0.9071053359137407, + "grad_norm": 2.0263124370088503, + "learning_rate": 4.4920260453902074e-07, + "loss": 0.2476, + "step": 52496 + }, + { + "epoch": 0.9071226154271496, + "grad_norm": 1.5827049753739564, + "learning_rate": 4.4903676795309495e-07, + "loss": 0.3692, + "step": 52497 + }, + { + "epoch": 0.9071398949405585, + "grad_norm": 1.0125963824004884, + "learning_rate": 4.48870961281509e-07, + "loss": 0.4319, + "step": 52498 + }, + { + "epoch": 0.9071571744539674, + "grad_norm": 1.159706011513026, + "learning_rate": 4.4870518452478805e-07, + "loss": 0.5041, + "step": 52499 + }, + { + "epoch": 0.9071744539673763, + "grad_norm": 1.9168053062242396, + "learning_rate": 4.485394376834462e-07, + "loss": 0.4438, + "step": 52500 + }, + { + "epoch": 0.9071917334807852, + "grad_norm": 0.9823841191472888, + "learning_rate": 4.483737207580063e-07, + "loss": 0.3374, + "step": 52501 + }, + { + "epoch": 0.9072090129941941, + "grad_norm": 1.5118271754515533, + "learning_rate": 4.482080337489847e-07, + "loss": 0.4039, + "step": 52502 + }, + { + "epoch": 0.907226292507603, + "grad_norm": 1.5455390188620315, + "learning_rate": 4.480423766569031e-07, + "loss": 0.4332, + "step": 52503 + }, + { + "epoch": 0.9072435720210119, + "grad_norm": 2.0189134150365793, + "learning_rate": 4.4787674948227665e-07, + "loss": 0.3894, + "step": 52504 + }, + { + "epoch": 0.9072608515344208, + "grad_norm": 1.4086810674501973, + "learning_rate": 4.477111522256272e-07, + "loss": 0.396, + "step": 52505 + }, + { + "epoch": 0.9072781310478297, + "grad_norm": 0.8152899058241689, + "learning_rate": 4.47545584887471e-07, + "loss": 0.2969, + "step": 52506 + }, + { + "epoch": 0.9072954105612386, + "grad_norm": 1.385673041565434, + "learning_rate": 4.4738004746833096e-07, + "loss": 0.3075, + "step": 52507 + }, + { + "epoch": 0.9073126900746475, + "grad_norm": 1.502313338852884, + "learning_rate": 4.472145399687211e-07, + "loss": 0.4265, + "step": 52508 + }, + { + "epoch": 0.9073299695880565, + "grad_norm": 1.4892568987148551, + "learning_rate": 4.4704906238916104e-07, + "loss": 0.3418, + "step": 52509 + }, + { + "epoch": 0.9073472491014652, + "grad_norm": 1.3064635789447807, + "learning_rate": 4.4688361473016805e-07, + "loss": 0.2884, + "step": 52510 + }, + { + "epoch": 0.9073645286148742, + "grad_norm": 1.1160665972019237, + "learning_rate": 4.4671819699226293e-07, + "loss": 0.4618, + "step": 52511 + }, + { + "epoch": 0.9073818081282831, + "grad_norm": 1.2146258871431046, + "learning_rate": 4.46552809175963e-07, + "loss": 0.2867, + "step": 52512 + }, + { + "epoch": 0.907399087641692, + "grad_norm": 1.3835064333350748, + "learning_rate": 4.463874512817845e-07, + "loss": 0.2269, + "step": 52513 + }, + { + "epoch": 0.9074163671551009, + "grad_norm": 1.237493032751651, + "learning_rate": 4.4622212331024816e-07, + "loss": 0.2467, + "step": 52514 + }, + { + "epoch": 0.9074336466685098, + "grad_norm": 1.9837294708008453, + "learning_rate": 4.46056825261868e-07, + "loss": 0.2327, + "step": 52515 + }, + { + "epoch": 0.9074509261819187, + "grad_norm": 2.1260429488434482, + "learning_rate": 4.458915571371647e-07, + "loss": 0.2029, + "step": 52516 + }, + { + "epoch": 0.9074682056953276, + "grad_norm": 1.2952261004487584, + "learning_rate": 4.4572631893665675e-07, + "loss": 0.2663, + "step": 52517 + }, + { + "epoch": 0.9074854852087365, + "grad_norm": 1.031928003087401, + "learning_rate": 4.4556111066085815e-07, + "loss": 0.6405, + "step": 52518 + }, + { + "epoch": 0.9075027647221454, + "grad_norm": 1.279618835704442, + "learning_rate": 4.453959323102897e-07, + "loss": 0.3731, + "step": 52519 + }, + { + "epoch": 0.9075200442355543, + "grad_norm": 2.2814505107854917, + "learning_rate": 4.452307838854664e-07, + "loss": 0.4368, + "step": 52520 + }, + { + "epoch": 0.9075373237489632, + "grad_norm": 0.95492458745951, + "learning_rate": 4.4506566538690564e-07, + "loss": 0.2744, + "step": 52521 + }, + { + "epoch": 0.9075546032623721, + "grad_norm": 1.0331122469418133, + "learning_rate": 4.44900576815126e-07, + "loss": 0.4403, + "step": 52522 + }, + { + "epoch": 0.907571882775781, + "grad_norm": 1.7051841857277017, + "learning_rate": 4.4473551817064473e-07, + "loss": 0.4253, + "step": 52523 + }, + { + "epoch": 0.90758916228919, + "grad_norm": 1.5304901359436727, + "learning_rate": 4.445704894539771e-07, + "loss": 0.366, + "step": 52524 + }, + { + "epoch": 0.9076064418025989, + "grad_norm": 1.8195453897140166, + "learning_rate": 4.4440549066564253e-07, + "loss": 0.3917, + "step": 52525 + }, + { + "epoch": 0.9076237213160078, + "grad_norm": 2.3570163583309007, + "learning_rate": 4.442405218061541e-07, + "loss": 0.2375, + "step": 52526 + }, + { + "epoch": 0.9076410008294167, + "grad_norm": 1.0325689514302268, + "learning_rate": 4.4407558287603123e-07, + "loss": 0.2835, + "step": 52527 + }, + { + "epoch": 0.9076582803428256, + "grad_norm": 1.0629665324884903, + "learning_rate": 4.4391067387579037e-07, + "loss": 0.3879, + "step": 52528 + }, + { + "epoch": 0.9076755598562345, + "grad_norm": 1.500931846667831, + "learning_rate": 4.437457948059465e-07, + "loss": 0.2674, + "step": 52529 + }, + { + "epoch": 0.9076928393696434, + "grad_norm": 1.3138439402390023, + "learning_rate": 4.435809456670193e-07, + "loss": 0.3222, + "step": 52530 + }, + { + "epoch": 0.9077101188830523, + "grad_norm": 1.3026054185178915, + "learning_rate": 4.4341612645952047e-07, + "loss": 0.181, + "step": 52531 + }, + { + "epoch": 0.9077273983964611, + "grad_norm": 1.1171181323989543, + "learning_rate": 4.432513371839686e-07, + "loss": 0.28, + "step": 52532 + }, + { + "epoch": 0.90774467790987, + "grad_norm": 2.062575538286909, + "learning_rate": 4.4308657784087994e-07, + "loss": 0.397, + "step": 52533 + }, + { + "epoch": 0.9077619574232789, + "grad_norm": 2.234074752500642, + "learning_rate": 4.429218484307718e-07, + "loss": 0.2565, + "step": 52534 + }, + { + "epoch": 0.9077792369366878, + "grad_norm": 1.065861981906685, + "learning_rate": 4.427571489541582e-07, + "loss": 0.4326, + "step": 52535 + }, + { + "epoch": 0.9077965164500967, + "grad_norm": 1.1066637978595426, + "learning_rate": 4.4259247941155546e-07, + "loss": 0.3812, + "step": 52536 + }, + { + "epoch": 0.9078137959635056, + "grad_norm": 1.4404498952823337, + "learning_rate": 4.424278398034787e-07, + "loss": 0.2269, + "step": 52537 + }, + { + "epoch": 0.9078310754769146, + "grad_norm": 0.9066394121033834, + "learning_rate": 4.4226323013044414e-07, + "loss": 0.8845, + "step": 52538 + }, + { + "epoch": 0.9078483549903235, + "grad_norm": 1.6042132701482335, + "learning_rate": 4.420986503929692e-07, + "loss": 0.2177, + "step": 52539 + }, + { + "epoch": 0.9078656345037324, + "grad_norm": 1.1276614817368336, + "learning_rate": 4.4193410059156674e-07, + "loss": 0.3759, + "step": 52540 + }, + { + "epoch": 0.9078829140171413, + "grad_norm": 1.3749147000873723, + "learning_rate": 4.4176958072675414e-07, + "loss": 0.4551, + "step": 52541 + }, + { + "epoch": 0.9079001935305502, + "grad_norm": 1.2448272827831963, + "learning_rate": 4.416050907990432e-07, + "loss": 0.3165, + "step": 52542 + }, + { + "epoch": 0.9079174730439591, + "grad_norm": 0.8416132022343806, + "learning_rate": 4.414406308089536e-07, + "loss": 0.6414, + "step": 52543 + }, + { + "epoch": 0.907934752557368, + "grad_norm": 2.794123561008936, + "learning_rate": 4.412762007569971e-07, + "loss": 0.3594, + "step": 52544 + }, + { + "epoch": 0.9079520320707769, + "grad_norm": 1.329770586212263, + "learning_rate": 4.4111180064368985e-07, + "loss": 0.435, + "step": 52545 + }, + { + "epoch": 0.9079693115841858, + "grad_norm": 1.377807951841922, + "learning_rate": 4.4094743046954714e-07, + "loss": 0.3497, + "step": 52546 + }, + { + "epoch": 0.9079865910975947, + "grad_norm": 2.182465280437315, + "learning_rate": 4.4078309023508515e-07, + "loss": 0.3146, + "step": 52547 + }, + { + "epoch": 0.9080038706110036, + "grad_norm": 1.3503887928783398, + "learning_rate": 4.406187799408168e-07, + "loss": 0.3438, + "step": 52548 + }, + { + "epoch": 0.9080211501244125, + "grad_norm": 1.917214635760672, + "learning_rate": 4.4045449958725505e-07, + "loss": 0.2647, + "step": 52549 + }, + { + "epoch": 0.9080384296378214, + "grad_norm": 1.1902824341739306, + "learning_rate": 4.4029024917491726e-07, + "loss": 0.222, + "step": 52550 + }, + { + "epoch": 0.9080557091512304, + "grad_norm": 0.8113637901789044, + "learning_rate": 4.401260287043152e-07, + "loss": 0.4026, + "step": 52551 + }, + { + "epoch": 0.9080729886646393, + "grad_norm": 1.6820024939711375, + "learning_rate": 4.3996183817596736e-07, + "loss": 0.3348, + "step": 52552 + }, + { + "epoch": 0.908090268178048, + "grad_norm": 1.2211389027707713, + "learning_rate": 4.397976775903845e-07, + "loss": 0.4433, + "step": 52553 + }, + { + "epoch": 0.908107547691457, + "grad_norm": 0.9749759120888761, + "learning_rate": 4.396335469480828e-07, + "loss": 0.2688, + "step": 52554 + }, + { + "epoch": 0.9081248272048659, + "grad_norm": 1.0957810892626991, + "learning_rate": 4.394694462495741e-07, + "loss": 0.2916, + "step": 52555 + }, + { + "epoch": 0.9081421067182748, + "grad_norm": 1.3907397530941892, + "learning_rate": 4.3930537549537466e-07, + "loss": 0.3442, + "step": 52556 + }, + { + "epoch": 0.9081593862316837, + "grad_norm": 1.6913492832072219, + "learning_rate": 4.391413346859963e-07, + "loss": 0.2885, + "step": 52557 + }, + { + "epoch": 0.9081766657450926, + "grad_norm": 1.3887618131298736, + "learning_rate": 4.389773238219552e-07, + "loss": 0.3896, + "step": 52558 + }, + { + "epoch": 0.9081939452585015, + "grad_norm": 1.6923481263674922, + "learning_rate": 4.388133429037644e-07, + "loss": 0.472, + "step": 52559 + }, + { + "epoch": 0.9082112247719104, + "grad_norm": 1.0126058868322327, + "learning_rate": 4.3864939193193454e-07, + "loss": 0.1518, + "step": 52560 + }, + { + "epoch": 0.9082285042853193, + "grad_norm": 1.0816530445450236, + "learning_rate": 4.384854709069819e-07, + "loss": 0.3994, + "step": 52561 + }, + { + "epoch": 0.9082457837987282, + "grad_norm": 1.4529537580476766, + "learning_rate": 4.3832157982941935e-07, + "loss": 0.2249, + "step": 52562 + }, + { + "epoch": 0.9082630633121371, + "grad_norm": 0.8997201575826324, + "learning_rate": 4.381577186997621e-07, + "loss": 0.2554, + "step": 52563 + }, + { + "epoch": 0.908280342825546, + "grad_norm": 1.5277296006302017, + "learning_rate": 4.379938875185197e-07, + "loss": 0.4315, + "step": 52564 + }, + { + "epoch": 0.908297622338955, + "grad_norm": 1.1863534986971427, + "learning_rate": 4.378300862862084e-07, + "loss": 0.3079, + "step": 52565 + }, + { + "epoch": 0.9083149018523639, + "grad_norm": 1.0198671780639263, + "learning_rate": 4.376663150033389e-07, + "loss": 0.325, + "step": 52566 + }, + { + "epoch": 0.9083321813657728, + "grad_norm": 1.4244293599369815, + "learning_rate": 4.3750257367042526e-07, + "loss": 0.3396, + "step": 52567 + }, + { + "epoch": 0.9083494608791817, + "grad_norm": 1.681880635703561, + "learning_rate": 4.373388622879804e-07, + "loss": 0.3403, + "step": 52568 + }, + { + "epoch": 0.9083667403925906, + "grad_norm": 1.5790946040378988, + "learning_rate": 4.371751808565172e-07, + "loss": 0.2998, + "step": 52569 + }, + { + "epoch": 0.9083840199059995, + "grad_norm": 1.1467947684781519, + "learning_rate": 4.3701152937654866e-07, + "loss": 0.3689, + "step": 52570 + }, + { + "epoch": 0.9084012994194084, + "grad_norm": 0.6912985956501349, + "learning_rate": 4.3684790784858654e-07, + "loss": 0.2181, + "step": 52571 + }, + { + "epoch": 0.9084185789328173, + "grad_norm": 1.0278233852207124, + "learning_rate": 4.3668431627314266e-07, + "loss": 0.2883, + "step": 52572 + }, + { + "epoch": 0.9084358584462262, + "grad_norm": 0.8307628304608992, + "learning_rate": 4.3652075465072997e-07, + "loss": 0.2974, + "step": 52573 + }, + { + "epoch": 0.908453137959635, + "grad_norm": 0.876932126734385, + "learning_rate": 4.3635722298186244e-07, + "loss": 0.3276, + "step": 52574 + }, + { + "epoch": 0.9084704174730439, + "grad_norm": 0.6657968156862123, + "learning_rate": 4.3619372126704973e-07, + "loss": 0.6472, + "step": 52575 + }, + { + "epoch": 0.9084876969864528, + "grad_norm": 1.2718523937746868, + "learning_rate": 4.3603024950680694e-07, + "loss": 0.5905, + "step": 52576 + }, + { + "epoch": 0.9085049764998617, + "grad_norm": 1.2694017287370276, + "learning_rate": 4.3586680770164256e-07, + "loss": 0.4268, + "step": 52577 + }, + { + "epoch": 0.9085222560132706, + "grad_norm": 1.9935118132620118, + "learning_rate": 4.3570339585207067e-07, + "loss": 0.5844, + "step": 52578 + }, + { + "epoch": 0.9085395355266795, + "grad_norm": 1.6161100253700345, + "learning_rate": 4.35540013958603e-07, + "loss": 0.1864, + "step": 52579 + }, + { + "epoch": 0.9085568150400885, + "grad_norm": 1.5746546988352654, + "learning_rate": 4.353766620217503e-07, + "loss": 0.5181, + "step": 52580 + }, + { + "epoch": 0.9085740945534974, + "grad_norm": 1.3969798420564146, + "learning_rate": 4.352133400420255e-07, + "loss": 0.3897, + "step": 52581 + }, + { + "epoch": 0.9085913740669063, + "grad_norm": 1.2997587056604274, + "learning_rate": 4.3505004801993824e-07, + "loss": 0.4416, + "step": 52582 + }, + { + "epoch": 0.9086086535803152, + "grad_norm": 1.2161649760925053, + "learning_rate": 4.3488678595600245e-07, + "loss": 0.3863, + "step": 52583 + }, + { + "epoch": 0.9086259330937241, + "grad_norm": 1.1451596957830048, + "learning_rate": 4.3472355385072775e-07, + "loss": 0.4409, + "step": 52584 + }, + { + "epoch": 0.908643212607133, + "grad_norm": 1.4823447538321124, + "learning_rate": 4.3456035170462484e-07, + "loss": 0.2871, + "step": 52585 + }, + { + "epoch": 0.9086604921205419, + "grad_norm": 2.1931327756468475, + "learning_rate": 4.343971795182056e-07, + "loss": 0.3073, + "step": 52586 + }, + { + "epoch": 0.9086777716339508, + "grad_norm": 1.802274791902553, + "learning_rate": 4.34234037291984e-07, + "loss": 0.3062, + "step": 52587 + }, + { + "epoch": 0.9086950511473597, + "grad_norm": 1.8174222635635224, + "learning_rate": 4.3407092502646633e-07, + "loss": 0.7399, + "step": 52588 + }, + { + "epoch": 0.9087123306607686, + "grad_norm": 1.1767790521329562, + "learning_rate": 4.339078427221666e-07, + "loss": 0.3534, + "step": 52589 + }, + { + "epoch": 0.9087296101741775, + "grad_norm": 1.7269696837945505, + "learning_rate": 4.337447903795933e-07, + "loss": 0.3898, + "step": 52590 + }, + { + "epoch": 0.9087468896875864, + "grad_norm": 1.3809707658747594, + "learning_rate": 4.335817679992582e-07, + "loss": 0.2985, + "step": 52591 + }, + { + "epoch": 0.9087641692009953, + "grad_norm": 1.2044493214730099, + "learning_rate": 4.334187755816732e-07, + "loss": 0.3945, + "step": 52592 + }, + { + "epoch": 0.9087814487144042, + "grad_norm": 1.1424334311108375, + "learning_rate": 4.3325581312734675e-07, + "loss": 0.3431, + "step": 52593 + }, + { + "epoch": 0.9087987282278132, + "grad_norm": 1.37988827815358, + "learning_rate": 4.330928806367907e-07, + "loss": 0.2731, + "step": 52594 + }, + { + "epoch": 0.908816007741222, + "grad_norm": 0.9815641136528956, + "learning_rate": 4.3292997811051453e-07, + "loss": 0.3254, + "step": 52595 + }, + { + "epoch": 0.9088332872546309, + "grad_norm": 0.9901255579004878, + "learning_rate": 4.327671055490279e-07, + "loss": 0.3593, + "step": 52596 + }, + { + "epoch": 0.9088505667680398, + "grad_norm": 0.8950160351825488, + "learning_rate": 4.3260426295284266e-07, + "loss": 0.2985, + "step": 52597 + }, + { + "epoch": 0.9088678462814487, + "grad_norm": 1.9827623827093936, + "learning_rate": 4.324414503224683e-07, + "loss": 0.2293, + "step": 52598 + }, + { + "epoch": 0.9088851257948576, + "grad_norm": 1.4599566140239586, + "learning_rate": 4.322786676584134e-07, + "loss": 0.389, + "step": 52599 + }, + { + "epoch": 0.9089024053082665, + "grad_norm": 1.135740653676183, + "learning_rate": 4.321159149611909e-07, + "loss": 0.2543, + "step": 52600 + }, + { + "epoch": 0.9089196848216754, + "grad_norm": 1.656198063576965, + "learning_rate": 4.3195319223130695e-07, + "loss": 0.47, + "step": 52601 + }, + { + "epoch": 0.9089369643350843, + "grad_norm": 1.873797861444857, + "learning_rate": 4.3179049946927233e-07, + "loss": 0.3713, + "step": 52602 + }, + { + "epoch": 0.9089542438484932, + "grad_norm": 2.629108519842038, + "learning_rate": 4.316278366755977e-07, + "loss": 0.3895, + "step": 52603 + }, + { + "epoch": 0.9089715233619021, + "grad_norm": 2.2344502370818344, + "learning_rate": 4.314652038507916e-07, + "loss": 0.7267, + "step": 52604 + }, + { + "epoch": 0.908988802875311, + "grad_norm": 2.4321523090576065, + "learning_rate": 4.313026009953647e-07, + "loss": 0.3437, + "step": 52605 + }, + { + "epoch": 0.9090060823887199, + "grad_norm": 1.3902440742159363, + "learning_rate": 4.3114002810982436e-07, + "loss": 0.2724, + "step": 52606 + }, + { + "epoch": 0.9090233619021288, + "grad_norm": 1.781817019383013, + "learning_rate": 4.309774851946802e-07, + "loss": 0.4525, + "step": 52607 + }, + { + "epoch": 0.9090406414155378, + "grad_norm": 1.4149485174058336, + "learning_rate": 4.3081497225044177e-07, + "loss": 0.2409, + "step": 52608 + }, + { + "epoch": 0.9090579209289467, + "grad_norm": 1.2979496826935415, + "learning_rate": 4.3065248927761874e-07, + "loss": 0.4358, + "step": 52609 + }, + { + "epoch": 0.9090752004423556, + "grad_norm": 1.7271926757221938, + "learning_rate": 4.304900362767206e-07, + "loss": 0.4396, + "step": 52610 + }, + { + "epoch": 0.9090924799557645, + "grad_norm": 1.209251894516645, + "learning_rate": 4.303276132482526e-07, + "loss": 0.3061, + "step": 52611 + }, + { + "epoch": 0.9091097594691734, + "grad_norm": 1.397093240977529, + "learning_rate": 4.3016522019272643e-07, + "loss": 0.3384, + "step": 52612 + }, + { + "epoch": 0.9091270389825823, + "grad_norm": 1.6847749992000842, + "learning_rate": 4.300028571106496e-07, + "loss": 0.3714, + "step": 52613 + }, + { + "epoch": 0.9091443184959912, + "grad_norm": 1.038641485875379, + "learning_rate": 4.298405240025327e-07, + "loss": 0.4039, + "step": 52614 + }, + { + "epoch": 0.9091615980094001, + "grad_norm": 0.9988375212006324, + "learning_rate": 4.29678220868881e-07, + "loss": 0.336, + "step": 52615 + }, + { + "epoch": 0.9091788775228089, + "grad_norm": 0.9149053636227606, + "learning_rate": 4.295159477102062e-07, + "loss": 0.2731, + "step": 52616 + }, + { + "epoch": 0.9091961570362178, + "grad_norm": 1.3948693641763263, + "learning_rate": 4.2935370452701353e-07, + "loss": 0.2851, + "step": 52617 + }, + { + "epoch": 0.9092134365496267, + "grad_norm": 2.2072443008427443, + "learning_rate": 4.2919149131981143e-07, + "loss": 0.326, + "step": 52618 + }, + { + "epoch": 0.9092307160630356, + "grad_norm": 1.0761155515091194, + "learning_rate": 4.2902930808910946e-07, + "loss": 0.3566, + "step": 52619 + }, + { + "epoch": 0.9092479955764445, + "grad_norm": 1.5523051038768498, + "learning_rate": 4.288671548354173e-07, + "loss": 0.4293, + "step": 52620 + }, + { + "epoch": 0.9092652750898534, + "grad_norm": 1.2826506921764491, + "learning_rate": 4.2870503155923895e-07, + "loss": 0.2083, + "step": 52621 + }, + { + "epoch": 0.9092825546032623, + "grad_norm": 1.299709796000973, + "learning_rate": 4.2854293826108394e-07, + "loss": 0.4407, + "step": 52622 + }, + { + "epoch": 0.9092998341166713, + "grad_norm": 1.2582574239813602, + "learning_rate": 4.2838087494146084e-07, + "loss": 0.3747, + "step": 52623 + }, + { + "epoch": 0.9093171136300802, + "grad_norm": 1.83637645739229, + "learning_rate": 4.282188416008748e-07, + "loss": 0.3827, + "step": 52624 + }, + { + "epoch": 0.9093343931434891, + "grad_norm": 1.0587849989798706, + "learning_rate": 4.2805683823983426e-07, + "loss": 0.2854, + "step": 52625 + }, + { + "epoch": 0.909351672656898, + "grad_norm": 1.1717748540457535, + "learning_rate": 4.278948648588477e-07, + "loss": 0.5436, + "step": 52626 + }, + { + "epoch": 0.9093689521703069, + "grad_norm": 1.6672094985845203, + "learning_rate": 4.277329214584225e-07, + "loss": 0.3013, + "step": 52627 + }, + { + "epoch": 0.9093862316837158, + "grad_norm": 1.7484623309747496, + "learning_rate": 4.275710080390649e-07, + "loss": 0.2552, + "step": 52628 + }, + { + "epoch": 0.9094035111971247, + "grad_norm": 1.256519283510174, + "learning_rate": 4.274091246012835e-07, + "loss": 0.3786, + "step": 52629 + }, + { + "epoch": 0.9094207907105336, + "grad_norm": 1.7364044855593075, + "learning_rate": 4.2724727114558216e-07, + "loss": 0.3114, + "step": 52630 + }, + { + "epoch": 0.9094380702239425, + "grad_norm": 0.7315848255896575, + "learning_rate": 4.270854476724706e-07, + "loss": 0.3013, + "step": 52631 + }, + { + "epoch": 0.9094553497373514, + "grad_norm": 2.0822848741797753, + "learning_rate": 4.2692365418245617e-07, + "loss": 0.2118, + "step": 52632 + }, + { + "epoch": 0.9094726292507603, + "grad_norm": 2.2668008387293295, + "learning_rate": 4.267618906760429e-07, + "loss": 0.2784, + "step": 52633 + }, + { + "epoch": 0.9094899087641692, + "grad_norm": 1.089781830252008, + "learning_rate": 4.2660015715374035e-07, + "loss": 0.3093, + "step": 52634 + }, + { + "epoch": 0.9095071882775781, + "grad_norm": 0.972097888123831, + "learning_rate": 4.264384536160526e-07, + "loss": 0.212, + "step": 52635 + }, + { + "epoch": 0.9095244677909871, + "grad_norm": 1.1607017672415867, + "learning_rate": 4.262767800634871e-07, + "loss": 0.2861, + "step": 52636 + }, + { + "epoch": 0.9095417473043959, + "grad_norm": 0.9368493818546558, + "learning_rate": 4.261151364965499e-07, + "loss": 0.3453, + "step": 52637 + }, + { + "epoch": 0.9095590268178048, + "grad_norm": 2.0077980586126434, + "learning_rate": 4.2595352291574854e-07, + "loss": 0.4365, + "step": 52638 + }, + { + "epoch": 0.9095763063312137, + "grad_norm": 1.535651651828467, + "learning_rate": 4.257919393215881e-07, + "loss": 0.4509, + "step": 52639 + }, + { + "epoch": 0.9095935858446226, + "grad_norm": 1.552013995683382, + "learning_rate": 4.2563038571457605e-07, + "loss": 0.5348, + "step": 52640 + }, + { + "epoch": 0.9096108653580315, + "grad_norm": 1.3936087431777824, + "learning_rate": 4.254688620952152e-07, + "loss": 0.287, + "step": 52641 + }, + { + "epoch": 0.9096281448714404, + "grad_norm": 2.0469552235363597, + "learning_rate": 4.253073684640141e-07, + "loss": 0.2221, + "step": 52642 + }, + { + "epoch": 0.9096454243848493, + "grad_norm": 1.3010445297881537, + "learning_rate": 4.25145904821479e-07, + "loss": 0.426, + "step": 52643 + }, + { + "epoch": 0.9096627038982582, + "grad_norm": 1.5103012419861146, + "learning_rate": 4.249844711681139e-07, + "loss": 0.3584, + "step": 52644 + }, + { + "epoch": 0.9096799834116671, + "grad_norm": 1.3957331426792694, + "learning_rate": 4.2482306750442514e-07, + "loss": 0.2883, + "step": 52645 + }, + { + "epoch": 0.909697262925076, + "grad_norm": 0.9139100426883622, + "learning_rate": 4.246616938309178e-07, + "loss": 0.2175, + "step": 52646 + }, + { + "epoch": 0.9097145424384849, + "grad_norm": 1.0090168492889393, + "learning_rate": 4.2450035014809823e-07, + "loss": 0.7631, + "step": 52647 + }, + { + "epoch": 0.9097318219518938, + "grad_norm": 1.3277932732720439, + "learning_rate": 4.2433903645647034e-07, + "loss": 0.4279, + "step": 52648 + }, + { + "epoch": 0.9097491014653027, + "grad_norm": 1.438361529735027, + "learning_rate": 4.241777527565427e-07, + "loss": 0.3618, + "step": 52649 + }, + { + "epoch": 0.9097663809787117, + "grad_norm": 1.5040957650734956, + "learning_rate": 4.2401649904881606e-07, + "loss": 0.5238, + "step": 52650 + }, + { + "epoch": 0.9097836604921206, + "grad_norm": 1.6587622538112066, + "learning_rate": 4.238552753337988e-07, + "loss": 0.3379, + "step": 52651 + }, + { + "epoch": 0.9098009400055295, + "grad_norm": 0.7369059822051677, + "learning_rate": 4.236940816119928e-07, + "loss": 0.22, + "step": 52652 + }, + { + "epoch": 0.9098182195189384, + "grad_norm": 2.0022888922410687, + "learning_rate": 4.235329178839054e-07, + "loss": 0.3248, + "step": 52653 + }, + { + "epoch": 0.9098354990323473, + "grad_norm": 1.2697428528413377, + "learning_rate": 4.2337178415004175e-07, + "loss": 0.2658, + "step": 52654 + }, + { + "epoch": 0.9098527785457562, + "grad_norm": 1.4343806389585014, + "learning_rate": 4.2321068041090375e-07, + "loss": 0.3704, + "step": 52655 + }, + { + "epoch": 0.9098700580591651, + "grad_norm": 1.0526833762752335, + "learning_rate": 4.2304960666699977e-07, + "loss": 0.2162, + "step": 52656 + }, + { + "epoch": 0.909887337572574, + "grad_norm": 0.8750602135653324, + "learning_rate": 4.228885629188306e-07, + "loss": 0.3002, + "step": 52657 + }, + { + "epoch": 0.9099046170859829, + "grad_norm": 1.2350685686724243, + "learning_rate": 4.2272754916690364e-07, + "loss": 0.1909, + "step": 52658 + }, + { + "epoch": 0.9099218965993917, + "grad_norm": 0.7104972466823809, + "learning_rate": 4.2256656541171946e-07, + "loss": 0.4994, + "step": 52659 + }, + { + "epoch": 0.9099391761128006, + "grad_norm": 1.1209822427805542, + "learning_rate": 4.2240561165378666e-07, + "loss": 0.2374, + "step": 52660 + }, + { + "epoch": 0.9099564556262095, + "grad_norm": 1.3520709561626574, + "learning_rate": 4.2224468789360706e-07, + "loss": 0.4519, + "step": 52661 + }, + { + "epoch": 0.9099737351396184, + "grad_norm": 1.0064478061957434, + "learning_rate": 4.2208379413168575e-07, + "loss": 0.3805, + "step": 52662 + }, + { + "epoch": 0.9099910146530273, + "grad_norm": 1.2445631919649958, + "learning_rate": 4.219229303685257e-07, + "loss": 0.2169, + "step": 52663 + }, + { + "epoch": 0.9100082941664362, + "grad_norm": 1.5717016153529506, + "learning_rate": 4.2176209660462987e-07, + "loss": 0.449, + "step": 52664 + }, + { + "epoch": 0.9100255736798452, + "grad_norm": 1.2262340655694015, + "learning_rate": 4.216012928405022e-07, + "loss": 0.1923, + "step": 52665 + }, + { + "epoch": 0.9100428531932541, + "grad_norm": 1.6798057212047928, + "learning_rate": 4.21440519076648e-07, + "loss": 0.3658, + "step": 52666 + }, + { + "epoch": 0.910060132706663, + "grad_norm": 1.8969234686351932, + "learning_rate": 4.212797753135711e-07, + "loss": 0.2968, + "step": 52667 + }, + { + "epoch": 0.9100774122200719, + "grad_norm": 1.9918132601895029, + "learning_rate": 4.211190615517735e-07, + "loss": 0.4209, + "step": 52668 + }, + { + "epoch": 0.9100946917334808, + "grad_norm": 2.0927964805917747, + "learning_rate": 4.209583777917592e-07, + "loss": 0.3174, + "step": 52669 + }, + { + "epoch": 0.9101119712468897, + "grad_norm": 1.0890616449007477, + "learning_rate": 4.2079772403403e-07, + "loss": 0.2326, + "step": 52670 + }, + { + "epoch": 0.9101292507602986, + "grad_norm": 1.503766517153506, + "learning_rate": 4.20637100279091e-07, + "loss": 0.4156, + "step": 52671 + }, + { + "epoch": 0.9101465302737075, + "grad_norm": 2.355978285202859, + "learning_rate": 4.204765065274452e-07, + "loss": 0.2164, + "step": 52672 + }, + { + "epoch": 0.9101638097871164, + "grad_norm": 1.3998022694514107, + "learning_rate": 4.203159427795944e-07, + "loss": 0.4067, + "step": 52673 + }, + { + "epoch": 0.9101810893005253, + "grad_norm": 2.5390336970343994, + "learning_rate": 4.201554090360427e-07, + "loss": 0.2645, + "step": 52674 + }, + { + "epoch": 0.9101983688139342, + "grad_norm": 2.041993579092277, + "learning_rate": 4.1999490529729183e-07, + "loss": 0.6006, + "step": 52675 + }, + { + "epoch": 0.9102156483273431, + "grad_norm": 1.0507875763991326, + "learning_rate": 4.1983443156384363e-07, + "loss": 0.3051, + "step": 52676 + }, + { + "epoch": 0.910232927840752, + "grad_norm": 1.2872150851624315, + "learning_rate": 4.1967398783620327e-07, + "loss": 0.2339, + "step": 52677 + }, + { + "epoch": 0.910250207354161, + "grad_norm": 1.480792698663807, + "learning_rate": 4.1951357411487257e-07, + "loss": 0.2507, + "step": 52678 + }, + { + "epoch": 0.9102674868675699, + "grad_norm": 1.347554799310791, + "learning_rate": 4.193531904003534e-07, + "loss": 0.2779, + "step": 52679 + }, + { + "epoch": 0.9102847663809787, + "grad_norm": 0.779203364419745, + "learning_rate": 4.191928366931475e-07, + "loss": 0.1874, + "step": 52680 + }, + { + "epoch": 0.9103020458943876, + "grad_norm": 1.019968810217824, + "learning_rate": 4.190325129937578e-07, + "loss": 0.3619, + "step": 52681 + }, + { + "epoch": 0.9103193254077965, + "grad_norm": 1.949186580332009, + "learning_rate": 4.1887221930268616e-07, + "loss": 0.3845, + "step": 52682 + }, + { + "epoch": 0.9103366049212054, + "grad_norm": 1.3022401172825608, + "learning_rate": 4.1871195562043667e-07, + "loss": 0.5111, + "step": 52683 + }, + { + "epoch": 0.9103538844346143, + "grad_norm": 1.579081529920602, + "learning_rate": 4.185517219475077e-07, + "loss": 0.3363, + "step": 52684 + }, + { + "epoch": 0.9103711639480232, + "grad_norm": 1.1406407129142326, + "learning_rate": 4.183915182844034e-07, + "loss": 0.1277, + "step": 52685 + }, + { + "epoch": 0.9103884434614321, + "grad_norm": 1.3397884403699727, + "learning_rate": 4.182313446316244e-07, + "loss": 0.3557, + "step": 52686 + }, + { + "epoch": 0.910405722974841, + "grad_norm": 1.2392162150550408, + "learning_rate": 4.1807120098967366e-07, + "loss": 0.4727, + "step": 52687 + }, + { + "epoch": 0.9104230024882499, + "grad_norm": 1.5752935274838176, + "learning_rate": 4.1791108735905196e-07, + "loss": 0.39, + "step": 52688 + }, + { + "epoch": 0.9104402820016588, + "grad_norm": 1.0286590799815074, + "learning_rate": 4.1775100374026103e-07, + "loss": 0.2885, + "step": 52689 + }, + { + "epoch": 0.9104575615150677, + "grad_norm": 1.960571327144988, + "learning_rate": 4.175909501338016e-07, + "loss": 0.5112, + "step": 52690 + }, + { + "epoch": 0.9104748410284766, + "grad_norm": 1.4916757698298482, + "learning_rate": 4.174309265401777e-07, + "loss": 0.3165, + "step": 52691 + }, + { + "epoch": 0.9104921205418856, + "grad_norm": 1.0499639071744111, + "learning_rate": 4.1727093295988566e-07, + "loss": 0.2465, + "step": 52692 + }, + { + "epoch": 0.9105094000552945, + "grad_norm": 1.6609223396621808, + "learning_rate": 4.1711096939343053e-07, + "loss": 0.404, + "step": 52693 + }, + { + "epoch": 0.9105266795687034, + "grad_norm": 1.449472837567184, + "learning_rate": 4.169510358413131e-07, + "loss": 0.2747, + "step": 52694 + }, + { + "epoch": 0.9105439590821123, + "grad_norm": 2.224949796602416, + "learning_rate": 4.167911323040319e-07, + "loss": 0.3971, + "step": 52695 + }, + { + "epoch": 0.9105612385955212, + "grad_norm": 0.8840901854216863, + "learning_rate": 4.1663125878208977e-07, + "loss": 0.3126, + "step": 52696 + }, + { + "epoch": 0.9105785181089301, + "grad_norm": 2.6672523129255854, + "learning_rate": 4.1647141527598633e-07, + "loss": 0.5002, + "step": 52697 + }, + { + "epoch": 0.910595797622339, + "grad_norm": 1.5984265716606905, + "learning_rate": 4.1631160178622343e-07, + "loss": 0.3092, + "step": 52698 + }, + { + "epoch": 0.9106130771357479, + "grad_norm": 2.231686871087176, + "learning_rate": 4.1615181831329955e-07, + "loss": 0.2728, + "step": 52699 + }, + { + "epoch": 0.9106303566491568, + "grad_norm": 1.8331004977581355, + "learning_rate": 4.159920648577176e-07, + "loss": 0.3325, + "step": 52700 + }, + { + "epoch": 0.9106476361625656, + "grad_norm": 1.2177472420407551, + "learning_rate": 4.158323414199761e-07, + "loss": 0.2129, + "step": 52701 + }, + { + "epoch": 0.9106649156759745, + "grad_norm": 1.0127714811093735, + "learning_rate": 4.1567264800057793e-07, + "loss": 0.2046, + "step": 52702 + }, + { + "epoch": 0.9106821951893834, + "grad_norm": 1.3374597571583111, + "learning_rate": 4.1551298460002164e-07, + "loss": 0.356, + "step": 52703 + }, + { + "epoch": 0.9106994747027923, + "grad_norm": 1.1603139802243039, + "learning_rate": 4.153533512188046e-07, + "loss": 0.3249, + "step": 52704 + }, + { + "epoch": 0.9107167542162012, + "grad_norm": 1.6961760161005048, + "learning_rate": 4.1519374785742973e-07, + "loss": 0.4103, + "step": 52705 + }, + { + "epoch": 0.9107340337296101, + "grad_norm": 1.5456428262547268, + "learning_rate": 4.150341745163966e-07, + "loss": 0.4275, + "step": 52706 + }, + { + "epoch": 0.9107513132430191, + "grad_norm": 1.2190563246075912, + "learning_rate": 4.1487463119620594e-07, + "loss": 0.2829, + "step": 52707 + }, + { + "epoch": 0.910768592756428, + "grad_norm": 1.6241735245280373, + "learning_rate": 4.147151178973552e-07, + "loss": 0.2667, + "step": 52708 + }, + { + "epoch": 0.9107858722698369, + "grad_norm": 1.3674458024713163, + "learning_rate": 4.145556346203472e-07, + "loss": 0.364, + "step": 52709 + }, + { + "epoch": 0.9108031517832458, + "grad_norm": 1.242472096592559, + "learning_rate": 4.1439618136567717e-07, + "loss": 0.3761, + "step": 52710 + }, + { + "epoch": 0.9108204312966547, + "grad_norm": 1.7188154637850919, + "learning_rate": 4.142367581338469e-07, + "loss": 0.3035, + "step": 52711 + }, + { + "epoch": 0.9108377108100636, + "grad_norm": 1.010134417458802, + "learning_rate": 4.14077364925356e-07, + "loss": 0.1777, + "step": 52712 + }, + { + "epoch": 0.9108549903234725, + "grad_norm": 1.193319323510534, + "learning_rate": 4.1391800174070405e-07, + "loss": 0.343, + "step": 52713 + }, + { + "epoch": 0.9108722698368814, + "grad_norm": 1.2616990200130072, + "learning_rate": 4.137586685803896e-07, + "loss": 0.3778, + "step": 52714 + }, + { + "epoch": 0.9108895493502903, + "grad_norm": 1.149026520670798, + "learning_rate": 4.1359936544491107e-07, + "loss": 0.2355, + "step": 52715 + }, + { + "epoch": 0.9109068288636992, + "grad_norm": 1.5370687973546184, + "learning_rate": 4.13440092334767e-07, + "loss": 0.2431, + "step": 52716 + }, + { + "epoch": 0.9109241083771081, + "grad_norm": 1.5981262392740474, + "learning_rate": 4.1328084925045696e-07, + "loss": 0.189, + "step": 52717 + }, + { + "epoch": 0.910941387890517, + "grad_norm": 1.3788167216230536, + "learning_rate": 4.131216361924817e-07, + "loss": 0.252, + "step": 52718 + }, + { + "epoch": 0.910958667403926, + "grad_norm": 1.2719508163917859, + "learning_rate": 4.1296245316133634e-07, + "loss": 0.4368, + "step": 52719 + }, + { + "epoch": 0.9109759469173349, + "grad_norm": 0.9595248015362124, + "learning_rate": 4.1280330015752266e-07, + "loss": 0.2013, + "step": 52720 + }, + { + "epoch": 0.9109932264307438, + "grad_norm": 1.8738678466881804, + "learning_rate": 4.1264417718153703e-07, + "loss": 0.2569, + "step": 52721 + }, + { + "epoch": 0.9110105059441526, + "grad_norm": 1.0477427091803315, + "learning_rate": 4.124850842338779e-07, + "loss": 0.1842, + "step": 52722 + }, + { + "epoch": 0.9110277854575615, + "grad_norm": 1.0885088448809546, + "learning_rate": 4.1232602131504486e-07, + "loss": 0.3523, + "step": 52723 + }, + { + "epoch": 0.9110450649709704, + "grad_norm": 0.5807055117120401, + "learning_rate": 4.1216698842553416e-07, + "loss": 0.505, + "step": 52724 + }, + { + "epoch": 0.9110623444843793, + "grad_norm": 1.8747736224925102, + "learning_rate": 4.1200798556584765e-07, + "loss": 0.4834, + "step": 52725 + }, + { + "epoch": 0.9110796239977882, + "grad_norm": 1.536327090512965, + "learning_rate": 4.1184901273647825e-07, + "loss": 0.6777, + "step": 52726 + }, + { + "epoch": 0.9110969035111971, + "grad_norm": 1.7175999720445556, + "learning_rate": 4.116900699379267e-07, + "loss": 0.2537, + "step": 52727 + }, + { + "epoch": 0.911114183024606, + "grad_norm": 0.8575651480599855, + "learning_rate": 4.1153115717069037e-07, + "loss": 0.2965, + "step": 52728 + }, + { + "epoch": 0.9111314625380149, + "grad_norm": 2.3384628925585056, + "learning_rate": 4.1137227443526884e-07, + "loss": 0.3269, + "step": 52729 + }, + { + "epoch": 0.9111487420514238, + "grad_norm": 1.440150796066624, + "learning_rate": 4.112134217321562e-07, + "loss": 0.3254, + "step": 52730 + }, + { + "epoch": 0.9111660215648327, + "grad_norm": 1.2766867491160825, + "learning_rate": 4.110545990618542e-07, + "loss": 0.3468, + "step": 52731 + }, + { + "epoch": 0.9111833010782416, + "grad_norm": 1.1855321014839024, + "learning_rate": 4.1089580642485583e-07, + "loss": 0.3718, + "step": 52732 + }, + { + "epoch": 0.9112005805916505, + "grad_norm": 1.257657282122497, + "learning_rate": 4.1073704382166066e-07, + "loss": 0.2161, + "step": 52733 + }, + { + "epoch": 0.9112178601050595, + "grad_norm": 0.8648282139631529, + "learning_rate": 4.1057831125276726e-07, + "loss": 0.4023, + "step": 52734 + }, + { + "epoch": 0.9112351396184684, + "grad_norm": 1.1876133291225999, + "learning_rate": 4.104196087186696e-07, + "loss": 0.4215, + "step": 52735 + }, + { + "epoch": 0.9112524191318773, + "grad_norm": 1.4497314750336603, + "learning_rate": 4.102609362198684e-07, + "loss": 0.3694, + "step": 52736 + }, + { + "epoch": 0.9112696986452862, + "grad_norm": 1.5080379229124952, + "learning_rate": 4.101022937568566e-07, + "loss": 0.2877, + "step": 52737 + }, + { + "epoch": 0.9112869781586951, + "grad_norm": 1.7317199114394208, + "learning_rate": 4.0994368133013494e-07, + "loss": 0.3954, + "step": 52738 + }, + { + "epoch": 0.911304257672104, + "grad_norm": 1.3990273178103045, + "learning_rate": 4.097850989401964e-07, + "loss": 0.4272, + "step": 52739 + }, + { + "epoch": 0.9113215371855129, + "grad_norm": 1.7963736914287725, + "learning_rate": 4.096265465875415e-07, + "loss": 0.3909, + "step": 52740 + }, + { + "epoch": 0.9113388166989218, + "grad_norm": 1.315263466043518, + "learning_rate": 4.094680242726645e-07, + "loss": 0.2311, + "step": 52741 + }, + { + "epoch": 0.9113560962123307, + "grad_norm": 1.4015064370932708, + "learning_rate": 4.0930953199606384e-07, + "loss": 0.3282, + "step": 52742 + }, + { + "epoch": 0.9113733757257395, + "grad_norm": 1.6241611713349797, + "learning_rate": 4.091510697582335e-07, + "loss": 0.3968, + "step": 52743 + }, + { + "epoch": 0.9113906552391484, + "grad_norm": 1.3032490421032532, + "learning_rate": 4.089926375596709e-07, + "loss": 0.3616, + "step": 52744 + }, + { + "epoch": 0.9114079347525573, + "grad_norm": 1.2334599920505174, + "learning_rate": 4.088342354008723e-07, + "loss": 0.2932, + "step": 52745 + }, + { + "epoch": 0.9114252142659662, + "grad_norm": 0.9394576551938127, + "learning_rate": 4.0867586328233287e-07, + "loss": 0.5722, + "step": 52746 + }, + { + "epoch": 0.9114424937793751, + "grad_norm": 1.3116799654644171, + "learning_rate": 4.085175212045511e-07, + "loss": 0.2306, + "step": 52747 + }, + { + "epoch": 0.911459773292784, + "grad_norm": 1.5052830688861694, + "learning_rate": 4.083592091680211e-07, + "loss": 0.4032, + "step": 52748 + }, + { + "epoch": 0.911477052806193, + "grad_norm": 1.7574044934794355, + "learning_rate": 4.08200927173239e-07, + "loss": 0.3498, + "step": 52749 + }, + { + "epoch": 0.9114943323196019, + "grad_norm": 1.0874259613539112, + "learning_rate": 4.0804267522069894e-07, + "loss": 0.1558, + "step": 52750 + }, + { + "epoch": 0.9115116118330108, + "grad_norm": 1.248400722258531, + "learning_rate": 4.0788445331089945e-07, + "loss": 0.5362, + "step": 52751 + }, + { + "epoch": 0.9115288913464197, + "grad_norm": 0.9936784543314645, + "learning_rate": 4.077262614443345e-07, + "loss": 0.3948, + "step": 52752 + }, + { + "epoch": 0.9115461708598286, + "grad_norm": 0.9696903019420383, + "learning_rate": 4.0756809962150034e-07, + "loss": 0.3505, + "step": 52753 + }, + { + "epoch": 0.9115634503732375, + "grad_norm": 1.7563318343511438, + "learning_rate": 4.0740996784289334e-07, + "loss": 0.5747, + "step": 52754 + }, + { + "epoch": 0.9115807298866464, + "grad_norm": 1.7223233481245481, + "learning_rate": 4.0725186610900524e-07, + "loss": 0.287, + "step": 52755 + }, + { + "epoch": 0.9115980094000553, + "grad_norm": 0.6956912403355674, + "learning_rate": 4.0709379442033234e-07, + "loss": 0.3113, + "step": 52756 + }, + { + "epoch": 0.9116152889134642, + "grad_norm": 1.0888498631579955, + "learning_rate": 4.0693575277737207e-07, + "loss": 0.284, + "step": 52757 + }, + { + "epoch": 0.9116325684268731, + "grad_norm": 0.7803489372115662, + "learning_rate": 4.067777411806195e-07, + "loss": 0.8722, + "step": 52758 + }, + { + "epoch": 0.911649847940282, + "grad_norm": 1.517957262535716, + "learning_rate": 4.066197596305654e-07, + "loss": 0.4051, + "step": 52759 + }, + { + "epoch": 0.9116671274536909, + "grad_norm": 1.044697972026525, + "learning_rate": 4.0646180812770943e-07, + "loss": 0.3252, + "step": 52760 + }, + { + "epoch": 0.9116844069670998, + "grad_norm": 1.2906024544182764, + "learning_rate": 4.0630388667254325e-07, + "loss": 0.4458, + "step": 52761 + }, + { + "epoch": 0.9117016864805088, + "grad_norm": 1.5282935005209033, + "learning_rate": 4.061459952655622e-07, + "loss": 0.4761, + "step": 52762 + }, + { + "epoch": 0.9117189659939177, + "grad_norm": 2.0511757499193393, + "learning_rate": 4.059881339072602e-07, + "loss": 0.2764, + "step": 52763 + }, + { + "epoch": 0.9117362455073265, + "grad_norm": 0.784842531502966, + "learning_rate": 4.058303025981347e-07, + "loss": 0.5985, + "step": 52764 + }, + { + "epoch": 0.9117535250207354, + "grad_norm": 1.3680056981217588, + "learning_rate": 4.056725013386775e-07, + "loss": 0.4872, + "step": 52765 + }, + { + "epoch": 0.9117708045341443, + "grad_norm": 0.9895341722272214, + "learning_rate": 4.0551473012938157e-07, + "loss": 0.302, + "step": 52766 + }, + { + "epoch": 0.9117880840475532, + "grad_norm": 0.5191865967485209, + "learning_rate": 4.0535698897074317e-07, + "loss": 0.5605, + "step": 52767 + }, + { + "epoch": 0.9118053635609621, + "grad_norm": 0.4882480533322379, + "learning_rate": 4.051992778632563e-07, + "loss": 0.3776, + "step": 52768 + }, + { + "epoch": 0.911822643074371, + "grad_norm": 1.2757064395296842, + "learning_rate": 4.0504159680741504e-07, + "loss": 0.2885, + "step": 52769 + }, + { + "epoch": 0.9118399225877799, + "grad_norm": 1.4859085996066634, + "learning_rate": 4.0488394580371126e-07, + "loss": 0.3859, + "step": 52770 + }, + { + "epoch": 0.9118572021011888, + "grad_norm": 1.6697117423389267, + "learning_rate": 4.047263248526412e-07, + "loss": 0.2141, + "step": 52771 + }, + { + "epoch": 0.9118744816145977, + "grad_norm": 2.0991578449263786, + "learning_rate": 4.0456873395469664e-07, + "loss": 0.2347, + "step": 52772 + }, + { + "epoch": 0.9118917611280066, + "grad_norm": 1.1508362829427559, + "learning_rate": 4.044111731103728e-07, + "loss": 0.5889, + "step": 52773 + }, + { + "epoch": 0.9119090406414155, + "grad_norm": 1.3355972605358912, + "learning_rate": 4.042536423201615e-07, + "loss": 0.3816, + "step": 52774 + }, + { + "epoch": 0.9119263201548244, + "grad_norm": 2.0970049538987827, + "learning_rate": 4.04096141584559e-07, + "loss": 0.2903, + "step": 52775 + }, + { + "epoch": 0.9119435996682334, + "grad_norm": 1.4364910424814246, + "learning_rate": 4.0393867090405605e-07, + "loss": 0.2868, + "step": 52776 + }, + { + "epoch": 0.9119608791816423, + "grad_norm": 1.2236913614556713, + "learning_rate": 4.037812302791444e-07, + "loss": 0.4416, + "step": 52777 + }, + { + "epoch": 0.9119781586950512, + "grad_norm": 1.221270306108943, + "learning_rate": 4.036238197103215e-07, + "loss": 0.2859, + "step": 52778 + }, + { + "epoch": 0.9119954382084601, + "grad_norm": 1.296554941625335, + "learning_rate": 4.034664391980758e-07, + "loss": 0.3435, + "step": 52779 + }, + { + "epoch": 0.912012717721869, + "grad_norm": 1.1651947750656604, + "learning_rate": 4.0330908874290476e-07, + "loss": 0.2561, + "step": 52780 + }, + { + "epoch": 0.9120299972352779, + "grad_norm": 1.3949878759896268, + "learning_rate": 4.031517683452979e-07, + "loss": 0.3672, + "step": 52781 + }, + { + "epoch": 0.9120472767486868, + "grad_norm": 1.4876806530988285, + "learning_rate": 4.029944780057493e-07, + "loss": 0.3819, + "step": 52782 + }, + { + "epoch": 0.9120645562620957, + "grad_norm": 1.015395985000973, + "learning_rate": 4.028372177247508e-07, + "loss": 0.445, + "step": 52783 + }, + { + "epoch": 0.9120818357755046, + "grad_norm": 1.1918119136819407, + "learning_rate": 4.026799875027976e-07, + "loss": 0.4688, + "step": 52784 + }, + { + "epoch": 0.9120991152889134, + "grad_norm": 1.7063645745271268, + "learning_rate": 4.025227873403781e-07, + "loss": 0.298, + "step": 52785 + }, + { + "epoch": 0.9121163948023223, + "grad_norm": 1.2959610093860368, + "learning_rate": 4.023656172379864e-07, + "loss": 0.397, + "step": 52786 + }, + { + "epoch": 0.9121336743157312, + "grad_norm": 1.0524621310300029, + "learning_rate": 4.022084771961154e-07, + "loss": 0.22, + "step": 52787 + }, + { + "epoch": 0.9121509538291401, + "grad_norm": 1.09552117183946, + "learning_rate": 4.020513672152571e-07, + "loss": 0.3773, + "step": 52788 + }, + { + "epoch": 0.912168233342549, + "grad_norm": 1.685252089152496, + "learning_rate": 4.018942872959042e-07, + "loss": 0.4392, + "step": 52789 + }, + { + "epoch": 0.912185512855958, + "grad_norm": 1.265304057847465, + "learning_rate": 4.017372374385464e-07, + "loss": 0.4523, + "step": 52790 + }, + { + "epoch": 0.9122027923693669, + "grad_norm": 1.3441600152240285, + "learning_rate": 4.015802176436767e-07, + "loss": 0.4953, + "step": 52791 + }, + { + "epoch": 0.9122200718827758, + "grad_norm": 0.7849508133590319, + "learning_rate": 4.014232279117869e-07, + "loss": 0.3465, + "step": 52792 + }, + { + "epoch": 0.9122373513961847, + "grad_norm": 1.841316775876203, + "learning_rate": 4.01266268243371e-07, + "loss": 0.2011, + "step": 52793 + }, + { + "epoch": 0.9122546309095936, + "grad_norm": 0.864825367293796, + "learning_rate": 4.0110933863891754e-07, + "loss": 0.3481, + "step": 52794 + }, + { + "epoch": 0.9122719104230025, + "grad_norm": 1.3974918653011774, + "learning_rate": 4.009524390989206e-07, + "loss": 0.3942, + "step": 52795 + }, + { + "epoch": 0.9122891899364114, + "grad_norm": 0.994128616953249, + "learning_rate": 4.0079556962386746e-07, + "loss": 0.5299, + "step": 52796 + }, + { + "epoch": 0.9123064694498203, + "grad_norm": 1.5315805901161903, + "learning_rate": 4.0063873021425335e-07, + "loss": 0.2247, + "step": 52797 + }, + { + "epoch": 0.9123237489632292, + "grad_norm": 1.0448524696915247, + "learning_rate": 4.0048192087056905e-07, + "loss": 0.5514, + "step": 52798 + }, + { + "epoch": 0.9123410284766381, + "grad_norm": 1.2354348122258996, + "learning_rate": 4.0032514159330406e-07, + "loss": 0.2494, + "step": 52799 + }, + { + "epoch": 0.912358307990047, + "grad_norm": 1.5477400898144937, + "learning_rate": 4.0016839238295133e-07, + "loss": 0.3995, + "step": 52800 + }, + { + "epoch": 0.9123755875034559, + "grad_norm": 1.5185264011198725, + "learning_rate": 4.0001167323999945e-07, + "loss": 0.4054, + "step": 52801 + }, + { + "epoch": 0.9123928670168648, + "grad_norm": 1.4576101291605656, + "learning_rate": 3.998549841649402e-07, + "loss": 0.3849, + "step": 52802 + }, + { + "epoch": 0.9124101465302737, + "grad_norm": 0.9564910325425422, + "learning_rate": 3.996983251582653e-07, + "loss": 0.7991, + "step": 52803 + }, + { + "epoch": 0.9124274260436827, + "grad_norm": 0.9555669530806302, + "learning_rate": 3.995416962204657e-07, + "loss": 0.4493, + "step": 52804 + }, + { + "epoch": 0.9124447055570916, + "grad_norm": 1.1741855839653903, + "learning_rate": 3.993850973520297e-07, + "loss": 0.2986, + "step": 52805 + }, + { + "epoch": 0.9124619850705005, + "grad_norm": 1.0305239401696185, + "learning_rate": 3.992285285534514e-07, + "loss": 0.3663, + "step": 52806 + }, + { + "epoch": 0.9124792645839093, + "grad_norm": 1.0825346180070117, + "learning_rate": 3.990719898252171e-07, + "loss": 0.3021, + "step": 52807 + }, + { + "epoch": 0.9124965440973182, + "grad_norm": 2.3201139033787657, + "learning_rate": 3.9891548116781866e-07, + "loss": 0.4712, + "step": 52808 + }, + { + "epoch": 0.9125138236107271, + "grad_norm": 1.0765599105325385, + "learning_rate": 3.987590025817478e-07, + "loss": 0.4794, + "step": 52809 + }, + { + "epoch": 0.912531103124136, + "grad_norm": 2.0680818094004008, + "learning_rate": 3.9860255406749316e-07, + "loss": 0.2762, + "step": 52810 + }, + { + "epoch": 0.9125483826375449, + "grad_norm": 1.401600999061387, + "learning_rate": 3.9844613562554535e-07, + "loss": 0.4134, + "step": 52811 + }, + { + "epoch": 0.9125656621509538, + "grad_norm": 0.9259881611889909, + "learning_rate": 3.982897472563929e-07, + "loss": 0.355, + "step": 52812 + }, + { + "epoch": 0.9125829416643627, + "grad_norm": 1.4969149513523747, + "learning_rate": 3.9813338896052656e-07, + "loss": 0.159, + "step": 52813 + }, + { + "epoch": 0.9126002211777716, + "grad_norm": 1.2626813136021013, + "learning_rate": 3.979770607384359e-07, + "loss": 0.5044, + "step": 52814 + }, + { + "epoch": 0.9126175006911805, + "grad_norm": 0.9635724807317361, + "learning_rate": 3.9782076259061274e-07, + "loss": 0.3683, + "step": 52815 + }, + { + "epoch": 0.9126347802045894, + "grad_norm": 1.3234745791363758, + "learning_rate": 3.976644945175445e-07, + "loss": 0.5975, + "step": 52816 + }, + { + "epoch": 0.9126520597179983, + "grad_norm": 1.2276123823040634, + "learning_rate": 3.975082565197197e-07, + "loss": 0.2553, + "step": 52817 + }, + { + "epoch": 0.9126693392314073, + "grad_norm": 0.6231420551931015, + "learning_rate": 3.973520485976301e-07, + "loss": 0.6042, + "step": 52818 + }, + { + "epoch": 0.9126866187448162, + "grad_norm": 1.4655476523415665, + "learning_rate": 3.971958707517609e-07, + "loss": 0.2194, + "step": 52819 + }, + { + "epoch": 0.9127038982582251, + "grad_norm": 1.6939122425238624, + "learning_rate": 3.970397229826062e-07, + "loss": 0.2934, + "step": 52820 + }, + { + "epoch": 0.912721177771634, + "grad_norm": 1.3870667569628463, + "learning_rate": 3.9688360529065215e-07, + "loss": 0.5046, + "step": 52821 + }, + { + "epoch": 0.9127384572850429, + "grad_norm": 1.319041706699927, + "learning_rate": 3.967275176763896e-07, + "loss": 0.3067, + "step": 52822 + }, + { + "epoch": 0.9127557367984518, + "grad_norm": 1.3493258669899828, + "learning_rate": 3.9657146014030477e-07, + "loss": 0.3377, + "step": 52823 + }, + { + "epoch": 0.9127730163118607, + "grad_norm": 1.318078523565046, + "learning_rate": 3.964154326828895e-07, + "loss": 0.3625, + "step": 52824 + }, + { + "epoch": 0.9127902958252696, + "grad_norm": 1.2168615421371303, + "learning_rate": 3.9625943530462787e-07, + "loss": 0.2705, + "step": 52825 + }, + { + "epoch": 0.9128075753386785, + "grad_norm": 1.1733661663758033, + "learning_rate": 3.96103468006015e-07, + "loss": 0.3189, + "step": 52826 + }, + { + "epoch": 0.9128248548520874, + "grad_norm": 0.9838752305508494, + "learning_rate": 3.959475307875349e-07, + "loss": 0.2559, + "step": 52827 + }, + { + "epoch": 0.9128421343654962, + "grad_norm": 1.3852520757572095, + "learning_rate": 3.9579162364967616e-07, + "loss": 0.4058, + "step": 52828 + }, + { + "epoch": 0.9128594138789051, + "grad_norm": 1.3795932331565601, + "learning_rate": 3.9563574659292946e-07, + "loss": 0.1951, + "step": 52829 + }, + { + "epoch": 0.912876693392314, + "grad_norm": 2.014013645535361, + "learning_rate": 3.9547989961778e-07, + "loss": 0.4653, + "step": 52830 + }, + { + "epoch": 0.9128939729057229, + "grad_norm": 1.464997027043753, + "learning_rate": 3.953240827247173e-07, + "loss": 0.3029, + "step": 52831 + }, + { + "epoch": 0.9129112524191318, + "grad_norm": 1.1671475920959038, + "learning_rate": 3.9516829591423e-07, + "loss": 0.295, + "step": 52832 + }, + { + "epoch": 0.9129285319325408, + "grad_norm": 1.721393598568877, + "learning_rate": 3.950125391868065e-07, + "loss": 0.2058, + "step": 52833 + }, + { + "epoch": 0.9129458114459497, + "grad_norm": 1.0229465715646657, + "learning_rate": 3.948568125429331e-07, + "loss": 0.33, + "step": 52834 + }, + { + "epoch": 0.9129630909593586, + "grad_norm": 1.5392702466816734, + "learning_rate": 3.9470111598309823e-07, + "loss": 0.4339, + "step": 52835 + }, + { + "epoch": 0.9129803704727675, + "grad_norm": 1.316402203702404, + "learning_rate": 3.9454544950778826e-07, + "loss": 0.2535, + "step": 52836 + }, + { + "epoch": 0.9129976499861764, + "grad_norm": 1.7438999867873082, + "learning_rate": 3.943898131174928e-07, + "loss": 0.2967, + "step": 52837 + }, + { + "epoch": 0.9130149294995853, + "grad_norm": 1.7792989533255354, + "learning_rate": 3.9423420681269917e-07, + "loss": 0.3138, + "step": 52838 + }, + { + "epoch": 0.9130322090129942, + "grad_norm": 1.596500113243737, + "learning_rate": 3.940786305938926e-07, + "loss": 0.3553, + "step": 52839 + }, + { + "epoch": 0.9130494885264031, + "grad_norm": 0.9735153235464783, + "learning_rate": 3.939230844615638e-07, + "loss": 0.2996, + "step": 52840 + }, + { + "epoch": 0.913066768039812, + "grad_norm": 0.9481933798894938, + "learning_rate": 3.937675684161957e-07, + "loss": 0.2622, + "step": 52841 + }, + { + "epoch": 0.9130840475532209, + "grad_norm": 1.3800116765804595, + "learning_rate": 3.93612082458279e-07, + "loss": 0.5151, + "step": 52842 + }, + { + "epoch": 0.9131013270666298, + "grad_norm": 0.9799444729925304, + "learning_rate": 3.9345662658829775e-07, + "loss": 0.3698, + "step": 52843 + }, + { + "epoch": 0.9131186065800387, + "grad_norm": 1.4338713823872598, + "learning_rate": 3.9330120080674273e-07, + "loss": 0.3589, + "step": 52844 + }, + { + "epoch": 0.9131358860934476, + "grad_norm": 1.397591098407102, + "learning_rate": 3.9314580511409685e-07, + "loss": 0.3885, + "step": 52845 + }, + { + "epoch": 0.9131531656068566, + "grad_norm": 0.8831021078709427, + "learning_rate": 3.929904395108497e-07, + "loss": 0.2888, + "step": 52846 + }, + { + "epoch": 0.9131704451202655, + "grad_norm": 1.106007271511227, + "learning_rate": 3.9283510399748536e-07, + "loss": 0.3524, + "step": 52847 + }, + { + "epoch": 0.9131877246336744, + "grad_norm": 1.271980696520977, + "learning_rate": 3.9267979857449126e-07, + "loss": 0.2929, + "step": 52848 + }, + { + "epoch": 0.9132050041470832, + "grad_norm": 1.209026586617786, + "learning_rate": 3.925245232423558e-07, + "loss": 0.1605, + "step": 52849 + }, + { + "epoch": 0.9132222836604921, + "grad_norm": 1.0351539555980347, + "learning_rate": 3.9236927800156197e-07, + "loss": 0.362, + "step": 52850 + }, + { + "epoch": 0.913239563173901, + "grad_norm": 1.2510377470228926, + "learning_rate": 3.9221406285259944e-07, + "loss": 0.3425, + "step": 52851 + }, + { + "epoch": 0.9132568426873099, + "grad_norm": 1.389946857999016, + "learning_rate": 3.9205887779595107e-07, + "loss": 0.2853, + "step": 52852 + }, + { + "epoch": 0.9132741222007188, + "grad_norm": 0.8233395608494211, + "learning_rate": 3.919037228321043e-07, + "loss": 0.2249, + "step": 52853 + }, + { + "epoch": 0.9132914017141277, + "grad_norm": 1.1685324595711561, + "learning_rate": 3.917485979615454e-07, + "loss": 0.4559, + "step": 52854 + }, + { + "epoch": 0.9133086812275366, + "grad_norm": 1.4305085596693121, + "learning_rate": 3.9159350318476174e-07, + "loss": 0.2584, + "step": 52855 + }, + { + "epoch": 0.9133259607409455, + "grad_norm": 1.0687514704172556, + "learning_rate": 3.914384385022363e-07, + "loss": 0.6176, + "step": 52856 + }, + { + "epoch": 0.9133432402543544, + "grad_norm": 1.1962342807703685, + "learning_rate": 3.912834039144564e-07, + "loss": 0.3329, + "step": 52857 + }, + { + "epoch": 0.9133605197677633, + "grad_norm": 1.312953496575923, + "learning_rate": 3.9112839942190727e-07, + "loss": 0.3199, + "step": 52858 + }, + { + "epoch": 0.9133777992811722, + "grad_norm": 0.9742191806405989, + "learning_rate": 3.9097342502507185e-07, + "loss": 0.349, + "step": 52859 + }, + { + "epoch": 0.9133950787945812, + "grad_norm": 1.8688488112056807, + "learning_rate": 3.9081848072444086e-07, + "loss": 0.2529, + "step": 52860 + }, + { + "epoch": 0.9134123583079901, + "grad_norm": 1.9079802092172262, + "learning_rate": 3.9066356652049497e-07, + "loss": 0.232, + "step": 52861 + }, + { + "epoch": 0.913429637821399, + "grad_norm": 1.3927514468573916, + "learning_rate": 3.9050868241372277e-07, + "loss": 0.8454, + "step": 52862 + }, + { + "epoch": 0.9134469173348079, + "grad_norm": 0.9469566603692813, + "learning_rate": 3.90353828404606e-07, + "loss": 0.2168, + "step": 52863 + }, + { + "epoch": 0.9134641968482168, + "grad_norm": 1.2228136526982605, + "learning_rate": 3.9019900449363214e-07, + "loss": 0.3461, + "step": 52864 + }, + { + "epoch": 0.9134814763616257, + "grad_norm": 0.8877434111612682, + "learning_rate": 3.9004421068128405e-07, + "loss": 0.2987, + "step": 52865 + }, + { + "epoch": 0.9134987558750346, + "grad_norm": 3.266924043286578, + "learning_rate": 3.898894469680492e-07, + "loss": 0.4072, + "step": 52866 + }, + { + "epoch": 0.9135160353884435, + "grad_norm": 2.1657793995992862, + "learning_rate": 3.897347133544105e-07, + "loss": 0.3288, + "step": 52867 + }, + { + "epoch": 0.9135333149018524, + "grad_norm": 0.927874831886172, + "learning_rate": 3.895800098408542e-07, + "loss": 0.179, + "step": 52868 + }, + { + "epoch": 0.9135505944152613, + "grad_norm": 0.5175489014774993, + "learning_rate": 3.8942533642786327e-07, + "loss": 0.5618, + "step": 52869 + }, + { + "epoch": 0.9135678739286701, + "grad_norm": 1.0594224027658168, + "learning_rate": 3.892706931159207e-07, + "loss": 0.382, + "step": 52870 + }, + { + "epoch": 0.913585153442079, + "grad_norm": 1.5668939561418893, + "learning_rate": 3.891160799055138e-07, + "loss": 0.3919, + "step": 52871 + }, + { + "epoch": 0.9136024329554879, + "grad_norm": 1.5397408318147814, + "learning_rate": 3.889614967971256e-07, + "loss": 0.2306, + "step": 52872 + }, + { + "epoch": 0.9136197124688968, + "grad_norm": 1.3346263338786404, + "learning_rate": 3.888069437912412e-07, + "loss": 0.3072, + "step": 52873 + }, + { + "epoch": 0.9136369919823057, + "grad_norm": 1.1989409404077993, + "learning_rate": 3.8865242088834245e-07, + "loss": 0.4138, + "step": 52874 + }, + { + "epoch": 0.9136542714957147, + "grad_norm": 1.0308167013242198, + "learning_rate": 3.884979280889167e-07, + "loss": 0.4592, + "step": 52875 + }, + { + "epoch": 0.9136715510091236, + "grad_norm": 1.1841248580367578, + "learning_rate": 3.883434653934448e-07, + "loss": 0.3181, + "step": 52876 + }, + { + "epoch": 0.9136888305225325, + "grad_norm": 1.2519792215811454, + "learning_rate": 3.881890328024107e-07, + "loss": 0.2356, + "step": 52877 + }, + { + "epoch": 0.9137061100359414, + "grad_norm": 1.8835641840924089, + "learning_rate": 3.880346303163007e-07, + "loss": 0.433, + "step": 52878 + }, + { + "epoch": 0.9137233895493503, + "grad_norm": 1.83130386215288, + "learning_rate": 3.8788025793559557e-07, + "loss": 0.2954, + "step": 52879 + }, + { + "epoch": 0.9137406690627592, + "grad_norm": 1.0460973694695672, + "learning_rate": 3.8772591566078157e-07, + "loss": 0.4238, + "step": 52880 + }, + { + "epoch": 0.9137579485761681, + "grad_norm": 1.447415829677798, + "learning_rate": 3.8757160349233823e-07, + "loss": 0.4604, + "step": 52881 + }, + { + "epoch": 0.913775228089577, + "grad_norm": 1.5161450177352795, + "learning_rate": 3.8741732143075197e-07, + "loss": 0.3891, + "step": 52882 + }, + { + "epoch": 0.9137925076029859, + "grad_norm": 1.153897075272579, + "learning_rate": 3.872630694765056e-07, + "loss": 0.2927, + "step": 52883 + }, + { + "epoch": 0.9138097871163948, + "grad_norm": 1.0594510521379898, + "learning_rate": 3.8710884763008216e-07, + "loss": 0.2792, + "step": 52884 + }, + { + "epoch": 0.9138270666298037, + "grad_norm": 1.2752207975985768, + "learning_rate": 3.8695465589196344e-07, + "loss": 0.3169, + "step": 52885 + }, + { + "epoch": 0.9138443461432126, + "grad_norm": 1.0968222009551778, + "learning_rate": 3.868004942626347e-07, + "loss": 0.176, + "step": 52886 + }, + { + "epoch": 0.9138616256566215, + "grad_norm": 1.1234366657056774, + "learning_rate": 3.866463627425754e-07, + "loss": 0.3568, + "step": 52887 + }, + { + "epoch": 0.9138789051700305, + "grad_norm": 1.1631989968033725, + "learning_rate": 3.864922613322708e-07, + "loss": 0.2813, + "step": 52888 + }, + { + "epoch": 0.9138961846834394, + "grad_norm": 1.5598182068563617, + "learning_rate": 3.8633819003220494e-07, + "loss": 0.3519, + "step": 52889 + }, + { + "epoch": 0.9139134641968483, + "grad_norm": 1.571413017161877, + "learning_rate": 3.861841488428564e-07, + "loss": 0.3876, + "step": 52890 + }, + { + "epoch": 0.9139307437102571, + "grad_norm": 1.3429107938277998, + "learning_rate": 3.8603013776471134e-07, + "loss": 0.2915, + "step": 52891 + }, + { + "epoch": 0.913948023223666, + "grad_norm": 0.8553678408367678, + "learning_rate": 3.858761567982494e-07, + "loss": 0.3317, + "step": 52892 + }, + { + "epoch": 0.9139653027370749, + "grad_norm": 1.4841327138433797, + "learning_rate": 3.857222059439536e-07, + "loss": 0.2437, + "step": 52893 + }, + { + "epoch": 0.9139825822504838, + "grad_norm": 0.8642681735599546, + "learning_rate": 3.8556828520230683e-07, + "loss": 0.3184, + "step": 52894 + }, + { + "epoch": 0.9139998617638927, + "grad_norm": 1.3836056927502283, + "learning_rate": 3.8541439457379204e-07, + "loss": 0.3414, + "step": 52895 + }, + { + "epoch": 0.9140171412773016, + "grad_norm": 0.873102099344271, + "learning_rate": 3.8526053405888777e-07, + "loss": 0.2922, + "step": 52896 + }, + { + "epoch": 0.9140344207907105, + "grad_norm": 1.4923316370762394, + "learning_rate": 3.851067036580802e-07, + "loss": 0.2878, + "step": 52897 + }, + { + "epoch": 0.9140517003041194, + "grad_norm": 1.1716049321006916, + "learning_rate": 3.849529033718491e-07, + "loss": 0.3118, + "step": 52898 + }, + { + "epoch": 0.9140689798175283, + "grad_norm": 1.3931321407100603, + "learning_rate": 3.8479913320067506e-07, + "loss": 0.5014, + "step": 52899 + }, + { + "epoch": 0.9140862593309372, + "grad_norm": 0.6711873160974399, + "learning_rate": 3.8464539314504223e-07, + "loss": 0.5044, + "step": 52900 + }, + { + "epoch": 0.9141035388443461, + "grad_norm": 1.5701163681579378, + "learning_rate": 3.844916832054291e-07, + "loss": 0.1824, + "step": 52901 + }, + { + "epoch": 0.914120818357755, + "grad_norm": 1.62377694548147, + "learning_rate": 3.8433800338232073e-07, + "loss": 0.4119, + "step": 52902 + }, + { + "epoch": 0.914138097871164, + "grad_norm": 1.6016423819158394, + "learning_rate": 3.8418435367619466e-07, + "loss": 0.1871, + "step": 52903 + }, + { + "epoch": 0.9141553773845729, + "grad_norm": 1.496184739499173, + "learning_rate": 3.84030734087536e-07, + "loss": 0.4965, + "step": 52904 + }, + { + "epoch": 0.9141726568979818, + "grad_norm": 0.9415748786517761, + "learning_rate": 3.83877144616821e-07, + "loss": 0.1655, + "step": 52905 + }, + { + "epoch": 0.9141899364113907, + "grad_norm": 0.9262667121924464, + "learning_rate": 3.837235852645349e-07, + "loss": 0.4062, + "step": 52906 + }, + { + "epoch": 0.9142072159247996, + "grad_norm": 1.8776765079382265, + "learning_rate": 3.835700560311573e-07, + "loss": 0.3211, + "step": 52907 + }, + { + "epoch": 0.9142244954382085, + "grad_norm": 1.8781067491199752, + "learning_rate": 3.8341655691717106e-07, + "loss": 0.507, + "step": 52908 + }, + { + "epoch": 0.9142417749516174, + "grad_norm": 2.4914844320799103, + "learning_rate": 3.8326308792305366e-07, + "loss": 0.2744, + "step": 52909 + }, + { + "epoch": 0.9142590544650263, + "grad_norm": 1.822934068302856, + "learning_rate": 3.831096490492858e-07, + "loss": 0.6718, + "step": 52910 + }, + { + "epoch": 0.9142763339784352, + "grad_norm": 1.4336034824765493, + "learning_rate": 3.8295624029635047e-07, + "loss": 0.4153, + "step": 52911 + }, + { + "epoch": 0.914293613491844, + "grad_norm": 1.5343329375103052, + "learning_rate": 3.828028616647261e-07, + "loss": 0.2413, + "step": 52912 + }, + { + "epoch": 0.9143108930052529, + "grad_norm": 1.2934791040296412, + "learning_rate": 3.8264951315489573e-07, + "loss": 0.3623, + "step": 52913 + }, + { + "epoch": 0.9143281725186618, + "grad_norm": 1.3614850145876873, + "learning_rate": 3.824961947673367e-07, + "loss": 0.3862, + "step": 52914 + }, + { + "epoch": 0.9143454520320707, + "grad_norm": 1.2283759866678436, + "learning_rate": 3.823429065025319e-07, + "loss": 0.3973, + "step": 52915 + }, + { + "epoch": 0.9143627315454796, + "grad_norm": 1.4916147967242446, + "learning_rate": 3.8218964836095886e-07, + "loss": 0.4115, + "step": 52916 + }, + { + "epoch": 0.9143800110588886, + "grad_norm": 1.219221484244498, + "learning_rate": 3.820364203430982e-07, + "loss": 0.3065, + "step": 52917 + }, + { + "epoch": 0.9143972905722975, + "grad_norm": 0.7828049452574937, + "learning_rate": 3.818832224494318e-07, + "loss": 0.1166, + "step": 52918 + }, + { + "epoch": 0.9144145700857064, + "grad_norm": 1.6619359739490112, + "learning_rate": 3.817300546804381e-07, + "loss": 0.3583, + "step": 52919 + }, + { + "epoch": 0.9144318495991153, + "grad_norm": 1.418573796235251, + "learning_rate": 3.8157691703659685e-07, + "loss": 0.269, + "step": 52920 + }, + { + "epoch": 0.9144491291125242, + "grad_norm": 1.2982194412572128, + "learning_rate": 3.814238095183864e-07, + "loss": 0.2226, + "step": 52921 + }, + { + "epoch": 0.9144664086259331, + "grad_norm": 0.8641514357799831, + "learning_rate": 3.812707321262876e-07, + "loss": 0.4256, + "step": 52922 + }, + { + "epoch": 0.914483688139342, + "grad_norm": 0.5623528677060864, + "learning_rate": 3.8111768486078006e-07, + "loss": 0.7959, + "step": 52923 + }, + { + "epoch": 0.9145009676527509, + "grad_norm": 1.7470478848642232, + "learning_rate": 3.8096466772234444e-07, + "loss": 0.2817, + "step": 52924 + }, + { + "epoch": 0.9145182471661598, + "grad_norm": 1.3355365447838972, + "learning_rate": 3.80811680711457e-07, + "loss": 0.1775, + "step": 52925 + }, + { + "epoch": 0.9145355266795687, + "grad_norm": 2.341664250903398, + "learning_rate": 3.806587238285997e-07, + "loss": 0.2365, + "step": 52926 + }, + { + "epoch": 0.9145528061929776, + "grad_norm": 0.9670633167113043, + "learning_rate": 3.8050579707424875e-07, + "loss": 0.4233, + "step": 52927 + }, + { + "epoch": 0.9145700857063865, + "grad_norm": 1.9538893713841674, + "learning_rate": 3.803529004488848e-07, + "loss": 0.447, + "step": 52928 + }, + { + "epoch": 0.9145873652197954, + "grad_norm": 1.2130154218870826, + "learning_rate": 3.8020003395298877e-07, + "loss": 0.3596, + "step": 52929 + }, + { + "epoch": 0.9146046447332044, + "grad_norm": 1.244589262995133, + "learning_rate": 3.8004719758703456e-07, + "loss": 0.3816, + "step": 52930 + }, + { + "epoch": 0.9146219242466133, + "grad_norm": 0.8684565807109106, + "learning_rate": 3.798943913515063e-07, + "loss": 0.3015, + "step": 52931 + }, + { + "epoch": 0.9146392037600222, + "grad_norm": 0.9866322993808865, + "learning_rate": 3.7974161524687804e-07, + "loss": 0.369, + "step": 52932 + }, + { + "epoch": 0.914656483273431, + "grad_norm": 1.2636019490512036, + "learning_rate": 3.795888692736294e-07, + "loss": 0.3818, + "step": 52933 + }, + { + "epoch": 0.9146737627868399, + "grad_norm": 1.524086695234906, + "learning_rate": 3.7943615343224105e-07, + "loss": 0.3783, + "step": 52934 + }, + { + "epoch": 0.9146910423002488, + "grad_norm": 1.5086250254903653, + "learning_rate": 3.7928346772318934e-07, + "loss": 0.4832, + "step": 52935 + }, + { + "epoch": 0.9147083218136577, + "grad_norm": 1.952502230837932, + "learning_rate": 3.791308121469528e-07, + "loss": 0.3602, + "step": 52936 + }, + { + "epoch": 0.9147256013270666, + "grad_norm": 1.0423360580194079, + "learning_rate": 3.7897818670401096e-07, + "loss": 0.7077, + "step": 52937 + }, + { + "epoch": 0.9147428808404755, + "grad_norm": 1.0988886572844963, + "learning_rate": 3.788255913948391e-07, + "loss": 0.3702, + "step": 52938 + }, + { + "epoch": 0.9147601603538844, + "grad_norm": 2.9952046731017252, + "learning_rate": 3.7867302621991676e-07, + "loss": 0.2529, + "step": 52939 + }, + { + "epoch": 0.9147774398672933, + "grad_norm": 1.2886883531701525, + "learning_rate": 3.7852049117972357e-07, + "loss": 0.2916, + "step": 52940 + }, + { + "epoch": 0.9147947193807022, + "grad_norm": 1.4286242856421107, + "learning_rate": 3.7836798627473357e-07, + "loss": 0.2366, + "step": 52941 + }, + { + "epoch": 0.9148119988941111, + "grad_norm": 1.2831801499891984, + "learning_rate": 3.7821551150542644e-07, + "loss": 0.2039, + "step": 52942 + }, + { + "epoch": 0.91482927840752, + "grad_norm": 1.452964362833981, + "learning_rate": 3.7806306687227955e-07, + "loss": 0.278, + "step": 52943 + }, + { + "epoch": 0.914846557920929, + "grad_norm": 1.5019539959095527, + "learning_rate": 3.779106523757714e-07, + "loss": 0.2782, + "step": 52944 + }, + { + "epoch": 0.9148638374343379, + "grad_norm": 1.2149399389361126, + "learning_rate": 3.7775826801637604e-07, + "loss": 0.2653, + "step": 52945 + }, + { + "epoch": 0.9148811169477468, + "grad_norm": 1.5141522980091104, + "learning_rate": 3.7760591379457535e-07, + "loss": 0.289, + "step": 52946 + }, + { + "epoch": 0.9148983964611557, + "grad_norm": 1.771198727701299, + "learning_rate": 3.774535897108433e-07, + "loss": 0.3076, + "step": 52947 + }, + { + "epoch": 0.9149156759745646, + "grad_norm": 1.3609188340864375, + "learning_rate": 3.7730129576565855e-07, + "loss": 0.3413, + "step": 52948 + }, + { + "epoch": 0.9149329554879735, + "grad_norm": 1.1989396346607966, + "learning_rate": 3.7714903195949615e-07, + "loss": 0.3046, + "step": 52949 + }, + { + "epoch": 0.9149502350013824, + "grad_norm": 0.9694917714955883, + "learning_rate": 3.7699679829283573e-07, + "loss": 0.2445, + "step": 52950 + }, + { + "epoch": 0.9149675145147913, + "grad_norm": 0.5053389176742318, + "learning_rate": 3.7684459476615254e-07, + "loss": 0.6444, + "step": 52951 + }, + { + "epoch": 0.9149847940282002, + "grad_norm": 2.0162569027786517, + "learning_rate": 3.7669242137992166e-07, + "loss": 0.4912, + "step": 52952 + }, + { + "epoch": 0.9150020735416091, + "grad_norm": 1.4533126655187794, + "learning_rate": 3.765402781346239e-07, + "loss": 0.5795, + "step": 52953 + }, + { + "epoch": 0.915019353055018, + "grad_norm": 1.3333596362406124, + "learning_rate": 3.7638816503073105e-07, + "loss": 0.2747, + "step": 52954 + }, + { + "epoch": 0.9150366325684268, + "grad_norm": 1.0190110976025133, + "learning_rate": 3.762360820687239e-07, + "loss": 0.5646, + "step": 52955 + }, + { + "epoch": 0.9150539120818357, + "grad_norm": 1.1636299748824466, + "learning_rate": 3.760840292490753e-07, + "loss": 0.266, + "step": 52956 + }, + { + "epoch": 0.9150711915952446, + "grad_norm": 0.9808270667711378, + "learning_rate": 3.759320065722627e-07, + "loss": 0.3101, + "step": 52957 + }, + { + "epoch": 0.9150884711086535, + "grad_norm": 1.7845445956905677, + "learning_rate": 3.757800140387635e-07, + "loss": 0.3683, + "step": 52958 + }, + { + "epoch": 0.9151057506220625, + "grad_norm": 1.847402576013055, + "learning_rate": 3.756280516490529e-07, + "loss": 0.3542, + "step": 52959 + }, + { + "epoch": 0.9151230301354714, + "grad_norm": 2.1137537219134592, + "learning_rate": 3.7547611940360604e-07, + "loss": 0.5149, + "step": 52960 + }, + { + "epoch": 0.9151403096488803, + "grad_norm": 0.7748110681287066, + "learning_rate": 3.7532421730289925e-07, + "loss": 0.3293, + "step": 52961 + }, + { + "epoch": 0.9151575891622892, + "grad_norm": 1.3820384654356193, + "learning_rate": 3.7517234534740874e-07, + "loss": 0.3036, + "step": 52962 + }, + { + "epoch": 0.9151748686756981, + "grad_norm": 0.9288182913706439, + "learning_rate": 3.7502050353760864e-07, + "loss": 0.3456, + "step": 52963 + }, + { + "epoch": 0.915192148189107, + "grad_norm": 2.345311331502417, + "learning_rate": 3.748686918739786e-07, + "loss": 0.2543, + "step": 52964 + }, + { + "epoch": 0.9152094277025159, + "grad_norm": 1.1426136157263638, + "learning_rate": 3.747169103569892e-07, + "loss": 0.3766, + "step": 52965 + }, + { + "epoch": 0.9152267072159248, + "grad_norm": 1.3311343770433184, + "learning_rate": 3.745651589871191e-07, + "loss": 0.5173, + "step": 52966 + }, + { + "epoch": 0.9152439867293337, + "grad_norm": 1.3390630031792985, + "learning_rate": 3.7441343776484116e-07, + "loss": 0.3036, + "step": 52967 + }, + { + "epoch": 0.9152612662427426, + "grad_norm": 1.4600488822974025, + "learning_rate": 3.7426174669063175e-07, + "loss": 0.4547, + "step": 52968 + }, + { + "epoch": 0.9152785457561515, + "grad_norm": 0.8656752633187662, + "learning_rate": 3.74110085764966e-07, + "loss": 0.2497, + "step": 52969 + }, + { + "epoch": 0.9152958252695604, + "grad_norm": 1.0214186455590013, + "learning_rate": 3.7395845498832127e-07, + "loss": 0.3579, + "step": 52970 + }, + { + "epoch": 0.9153131047829693, + "grad_norm": 2.848681919457992, + "learning_rate": 3.7380685436116946e-07, + "loss": 0.219, + "step": 52971 + }, + { + "epoch": 0.9153303842963783, + "grad_norm": 1.2125081178549788, + "learning_rate": 3.7365528388398467e-07, + "loss": 0.4898, + "step": 52972 + }, + { + "epoch": 0.9153476638097872, + "grad_norm": 1.3148272566987191, + "learning_rate": 3.7350374355724305e-07, + "loss": 0.3323, + "step": 52973 + }, + { + "epoch": 0.9153649433231961, + "grad_norm": 1.5968856991720606, + "learning_rate": 3.733522333814199e-07, + "loss": 0.4478, + "step": 52974 + }, + { + "epoch": 0.915382222836605, + "grad_norm": 1.8178711290460947, + "learning_rate": 3.7320075335699036e-07, + "loss": 0.3167, + "step": 52975 + }, + { + "epoch": 0.9153995023500138, + "grad_norm": 0.9737640418506398, + "learning_rate": 3.7304930348442517e-07, + "loss": 0.2835, + "step": 52976 + }, + { + "epoch": 0.9154167818634227, + "grad_norm": 1.451236195833981, + "learning_rate": 3.728978837642039e-07, + "loss": 0.3786, + "step": 52977 + }, + { + "epoch": 0.9154340613768316, + "grad_norm": 1.9478297082600244, + "learning_rate": 3.727464941967962e-07, + "loss": 0.2092, + "step": 52978 + }, + { + "epoch": 0.9154513408902405, + "grad_norm": 1.1113054383587944, + "learning_rate": 3.7259513478267837e-07, + "loss": 0.2831, + "step": 52979 + }, + { + "epoch": 0.9154686204036494, + "grad_norm": 1.289026390075965, + "learning_rate": 3.724438055223245e-07, + "loss": 0.3529, + "step": 52980 + }, + { + "epoch": 0.9154858999170583, + "grad_norm": 1.3323236161563947, + "learning_rate": 3.7229250641620864e-07, + "loss": 0.3239, + "step": 52981 + }, + { + "epoch": 0.9155031794304672, + "grad_norm": 1.0965175436565178, + "learning_rate": 3.721412374648048e-07, + "loss": 0.2436, + "step": 52982 + }, + { + "epoch": 0.9155204589438761, + "grad_norm": 1.904124775850053, + "learning_rate": 3.71989998668586e-07, + "loss": 0.5332, + "step": 52983 + }, + { + "epoch": 0.915537738457285, + "grad_norm": 1.0534014900131068, + "learning_rate": 3.7183879002802623e-07, + "loss": 0.3116, + "step": 52984 + }, + { + "epoch": 0.9155550179706939, + "grad_norm": 1.1365933049214476, + "learning_rate": 3.7168761154359634e-07, + "loss": 0.3882, + "step": 52985 + }, + { + "epoch": 0.9155722974841028, + "grad_norm": 1.0956675035333996, + "learning_rate": 3.7153646321577586e-07, + "loss": 0.379, + "step": 52986 + }, + { + "epoch": 0.9155895769975118, + "grad_norm": 1.6055379901299003, + "learning_rate": 3.713853450450333e-07, + "loss": 0.2796, + "step": 52987 + }, + { + "epoch": 0.9156068565109207, + "grad_norm": 1.0425383719881982, + "learning_rate": 3.712342570318461e-07, + "loss": 0.3901, + "step": 52988 + }, + { + "epoch": 0.9156241360243296, + "grad_norm": 1.2953007375104921, + "learning_rate": 3.7108319917668167e-07, + "loss": 0.3339, + "step": 52989 + }, + { + "epoch": 0.9156414155377385, + "grad_norm": 1.559238501172044, + "learning_rate": 3.709321714800196e-07, + "loss": 0.2295, + "step": 52990 + }, + { + "epoch": 0.9156586950511474, + "grad_norm": 1.8599562877318703, + "learning_rate": 3.707811739423273e-07, + "loss": 0.435, + "step": 52991 + }, + { + "epoch": 0.9156759745645563, + "grad_norm": 1.2395037563279765, + "learning_rate": 3.706302065640799e-07, + "loss": 0.369, + "step": 52992 + }, + { + "epoch": 0.9156932540779652, + "grad_norm": 1.3071463227867104, + "learning_rate": 3.704792693457526e-07, + "loss": 0.2107, + "step": 52993 + }, + { + "epoch": 0.9157105335913741, + "grad_norm": 1.0298382156304493, + "learning_rate": 3.70328362287814e-07, + "loss": 0.7599, + "step": 52994 + }, + { + "epoch": 0.915727813104783, + "grad_norm": 1.1821241542438268, + "learning_rate": 3.701774853907414e-07, + "loss": 0.4614, + "step": 52995 + }, + { + "epoch": 0.9157450926181919, + "grad_norm": 1.2795419038664004, + "learning_rate": 3.7002663865500223e-07, + "loss": 0.3089, + "step": 52996 + }, + { + "epoch": 0.9157623721316007, + "grad_norm": 2.100177696661173, + "learning_rate": 3.6987582208107165e-07, + "loss": 0.3239, + "step": 52997 + }, + { + "epoch": 0.9157796516450096, + "grad_norm": 1.2184645418159687, + "learning_rate": 3.6972503566942264e-07, + "loss": 0.2872, + "step": 52998 + }, + { + "epoch": 0.9157969311584185, + "grad_norm": 1.9901605893797423, + "learning_rate": 3.6957427942052703e-07, + "loss": 0.1984, + "step": 52999 + }, + { + "epoch": 0.9158142106718274, + "grad_norm": 2.969605108047331, + "learning_rate": 3.694235533348556e-07, + "loss": 0.3571, + "step": 53000 + }, + { + "epoch": 0.9158314901852364, + "grad_norm": 2.1066000972222416, + "learning_rate": 3.692728574128834e-07, + "loss": 0.8518, + "step": 53001 + }, + { + "epoch": 0.9158487696986453, + "grad_norm": 1.3035928347297314, + "learning_rate": 3.69122191655078e-07, + "loss": 0.1451, + "step": 53002 + }, + { + "epoch": 0.9158660492120542, + "grad_norm": 1.1493116728048227, + "learning_rate": 3.689715560619156e-07, + "loss": 0.3382, + "step": 53003 + }, + { + "epoch": 0.9158833287254631, + "grad_norm": 2.3351793655197515, + "learning_rate": 3.688209506338658e-07, + "loss": 0.288, + "step": 53004 + }, + { + "epoch": 0.915900608238872, + "grad_norm": 1.5188408331822072, + "learning_rate": 3.686703753714005e-07, + "loss": 0.3021, + "step": 53005 + }, + { + "epoch": 0.9159178877522809, + "grad_norm": 1.4843818491659, + "learning_rate": 3.685198302749926e-07, + "loss": 0.2554, + "step": 53006 + }, + { + "epoch": 0.9159351672656898, + "grad_norm": 1.0782111452013563, + "learning_rate": 3.6836931534511067e-07, + "loss": 0.1837, + "step": 53007 + }, + { + "epoch": 0.9159524467790987, + "grad_norm": 1.3274932691702457, + "learning_rate": 3.6821883058222873e-07, + "loss": 0.5694, + "step": 53008 + }, + { + "epoch": 0.9159697262925076, + "grad_norm": 0.9024742325416805, + "learning_rate": 3.6806837598681755e-07, + "loss": 0.4472, + "step": 53009 + }, + { + "epoch": 0.9159870058059165, + "grad_norm": 1.6251387080036186, + "learning_rate": 3.6791795155934896e-07, + "loss": 0.3375, + "step": 53010 + }, + { + "epoch": 0.9160042853193254, + "grad_norm": 1.1741923042543012, + "learning_rate": 3.6776755730029144e-07, + "loss": 0.3352, + "step": 53011 + }, + { + "epoch": 0.9160215648327343, + "grad_norm": 2.124720303377763, + "learning_rate": 3.676171932101202e-07, + "loss": 0.2604, + "step": 53012 + }, + { + "epoch": 0.9160388443461432, + "grad_norm": 1.0142056627841425, + "learning_rate": 3.674668592893027e-07, + "loss": 0.3951, + "step": 53013 + }, + { + "epoch": 0.9160561238595522, + "grad_norm": 1.835987324728579, + "learning_rate": 3.673165555383107e-07, + "loss": 0.3103, + "step": 53014 + }, + { + "epoch": 0.9160734033729611, + "grad_norm": 1.1256541419535142, + "learning_rate": 3.6716628195761716e-07, + "loss": 0.3505, + "step": 53015 + }, + { + "epoch": 0.91609068288637, + "grad_norm": 1.0648445635232417, + "learning_rate": 3.670160385476884e-07, + "loss": 0.2644, + "step": 53016 + }, + { + "epoch": 0.9161079623997789, + "grad_norm": 1.3002959054710073, + "learning_rate": 3.6686582530899963e-07, + "loss": 0.307, + "step": 53017 + }, + { + "epoch": 0.9161252419131877, + "grad_norm": 0.6497741347548986, + "learning_rate": 3.667156422420182e-07, + "loss": 0.5459, + "step": 53018 + }, + { + "epoch": 0.9161425214265966, + "grad_norm": 1.4877526526390779, + "learning_rate": 3.665654893472148e-07, + "loss": 0.3018, + "step": 53019 + }, + { + "epoch": 0.9161598009400055, + "grad_norm": 1.4299754520014054, + "learning_rate": 3.664153666250614e-07, + "loss": 0.4463, + "step": 53020 + }, + { + "epoch": 0.9161770804534144, + "grad_norm": 2.299725213372689, + "learning_rate": 3.662652740760275e-07, + "loss": 0.2355, + "step": 53021 + }, + { + "epoch": 0.9161943599668233, + "grad_norm": 1.919270332167061, + "learning_rate": 3.6611521170058285e-07, + "loss": 0.5012, + "step": 53022 + }, + { + "epoch": 0.9162116394802322, + "grad_norm": 1.2129088080531414, + "learning_rate": 3.65965179499197e-07, + "loss": 0.4491, + "step": 53023 + }, + { + "epoch": 0.9162289189936411, + "grad_norm": 1.438571404922549, + "learning_rate": 3.658151774723406e-07, + "loss": 0.346, + "step": 53024 + }, + { + "epoch": 0.91624619850705, + "grad_norm": 1.1698793642622047, + "learning_rate": 3.6566520562048127e-07, + "loss": 0.1691, + "step": 53025 + }, + { + "epoch": 0.9162634780204589, + "grad_norm": 1.9492705432444062, + "learning_rate": 3.655152639440929e-07, + "loss": 0.2781, + "step": 53026 + }, + { + "epoch": 0.9162807575338678, + "grad_norm": 2.1278944405340376, + "learning_rate": 3.653653524436429e-07, + "loss": 0.5042, + "step": 53027 + }, + { + "epoch": 0.9162980370472767, + "grad_norm": 2.206465995661888, + "learning_rate": 3.6521547111960097e-07, + "loss": 0.3135, + "step": 53028 + }, + { + "epoch": 0.9163153165606857, + "grad_norm": 0.7930330757156091, + "learning_rate": 3.650656199724356e-07, + "loss": 0.1275, + "step": 53029 + }, + { + "epoch": 0.9163325960740946, + "grad_norm": 0.7377611148472125, + "learning_rate": 3.649157990026175e-07, + "loss": 0.722, + "step": 53030 + }, + { + "epoch": 0.9163498755875035, + "grad_norm": 1.2883232927555044, + "learning_rate": 3.64766008210613e-07, + "loss": 0.4175, + "step": 53031 + }, + { + "epoch": 0.9163671551009124, + "grad_norm": 1.1452239407659388, + "learning_rate": 3.6461624759689616e-07, + "loss": 0.2884, + "step": 53032 + }, + { + "epoch": 0.9163844346143213, + "grad_norm": 2.8607378910483674, + "learning_rate": 3.644665171619344e-07, + "loss": 0.3934, + "step": 53033 + }, + { + "epoch": 0.9164017141277302, + "grad_norm": 1.5783630626487501, + "learning_rate": 3.643168169061939e-07, + "loss": 0.3732, + "step": 53034 + }, + { + "epoch": 0.9164189936411391, + "grad_norm": 1.4125797057299152, + "learning_rate": 3.6416714683014664e-07, + "loss": 0.3513, + "step": 53035 + }, + { + "epoch": 0.916436273154548, + "grad_norm": 1.2448433009817639, + "learning_rate": 3.6401750693425887e-07, + "loss": 0.3795, + "step": 53036 + }, + { + "epoch": 0.9164535526679569, + "grad_norm": 1.9711709075096133, + "learning_rate": 3.638678972190002e-07, + "loss": 0.2987, + "step": 53037 + }, + { + "epoch": 0.9164708321813658, + "grad_norm": 1.4696941114186826, + "learning_rate": 3.6371831768484023e-07, + "loss": 0.3009, + "step": 53038 + }, + { + "epoch": 0.9164881116947746, + "grad_norm": 0.8868328067574259, + "learning_rate": 3.635687683322475e-07, + "loss": 0.2042, + "step": 53039 + }, + { + "epoch": 0.9165053912081835, + "grad_norm": 1.1807857629886331, + "learning_rate": 3.6341924916168947e-07, + "loss": 0.3017, + "step": 53040 + }, + { + "epoch": 0.9165226707215924, + "grad_norm": 2.6955759478087624, + "learning_rate": 3.632697601736357e-07, + "loss": 0.2724, + "step": 53041 + }, + { + "epoch": 0.9165399502350013, + "grad_norm": 2.2766161328692065, + "learning_rate": 3.6312030136855136e-07, + "loss": 0.3847, + "step": 53042 + }, + { + "epoch": 0.9165572297484103, + "grad_norm": 0.9433273909393172, + "learning_rate": 3.629708727469072e-07, + "loss": 0.2662, + "step": 53043 + }, + { + "epoch": 0.9165745092618192, + "grad_norm": 1.5767333466088405, + "learning_rate": 3.628214743091718e-07, + "loss": 0.3338, + "step": 53044 + }, + { + "epoch": 0.9165917887752281, + "grad_norm": 0.6514647477086263, + "learning_rate": 3.626721060558103e-07, + "loss": 0.2044, + "step": 53045 + }, + { + "epoch": 0.916609068288637, + "grad_norm": 1.4248763950103756, + "learning_rate": 3.6252276798729337e-07, + "loss": 0.3836, + "step": 53046 + }, + { + "epoch": 0.9166263478020459, + "grad_norm": 5.197006537867056, + "learning_rate": 3.6237346010408737e-07, + "loss": 0.363, + "step": 53047 + }, + { + "epoch": 0.9166436273154548, + "grad_norm": 1.1831139889837086, + "learning_rate": 3.6222418240665967e-07, + "loss": 0.4211, + "step": 53048 + }, + { + "epoch": 0.9166609068288637, + "grad_norm": 1.3759509399523138, + "learning_rate": 3.6207493489547774e-07, + "loss": 0.348, + "step": 53049 + }, + { + "epoch": 0.9166781863422726, + "grad_norm": 1.7788309676148504, + "learning_rate": 3.6192571757101115e-07, + "loss": 0.5414, + "step": 53050 + }, + { + "epoch": 0.9166954658556815, + "grad_norm": 1.305950709878705, + "learning_rate": 3.61776530433724e-07, + "loss": 0.3994, + "step": 53051 + }, + { + "epoch": 0.9167127453690904, + "grad_norm": 1.7391159553753204, + "learning_rate": 3.6162737348408694e-07, + "loss": 0.2585, + "step": 53052 + }, + { + "epoch": 0.9167300248824993, + "grad_norm": 1.8250088119988954, + "learning_rate": 3.614782467225642e-07, + "loss": 0.3637, + "step": 53053 + }, + { + "epoch": 0.9167473043959082, + "grad_norm": 1.8965294999093223, + "learning_rate": 3.613291501496241e-07, + "loss": 0.1701, + "step": 53054 + }, + { + "epoch": 0.9167645839093171, + "grad_norm": 1.368632036130208, + "learning_rate": 3.611800837657353e-07, + "loss": 0.3447, + "step": 53055 + }, + { + "epoch": 0.916781863422726, + "grad_norm": 1.2959661407675713, + "learning_rate": 3.6103104757136076e-07, + "loss": 0.3923, + "step": 53056 + }, + { + "epoch": 0.916799142936135, + "grad_norm": 1.2676553012993277, + "learning_rate": 3.6088204156697117e-07, + "loss": 0.3756, + "step": 53057 + }, + { + "epoch": 0.9168164224495439, + "grad_norm": 1.0969254540416071, + "learning_rate": 3.607330657530306e-07, + "loss": 0.2001, + "step": 53058 + }, + { + "epoch": 0.9168337019629528, + "grad_norm": 1.177069734047934, + "learning_rate": 3.605841201300064e-07, + "loss": 0.2669, + "step": 53059 + }, + { + "epoch": 0.9168509814763616, + "grad_norm": 1.62647582052801, + "learning_rate": 3.60435204698365e-07, + "loss": 0.1653, + "step": 53060 + }, + { + "epoch": 0.9168682609897705, + "grad_norm": 1.1094097000326493, + "learning_rate": 3.6028631945857484e-07, + "loss": 0.3203, + "step": 53061 + }, + { + "epoch": 0.9168855405031794, + "grad_norm": 1.1524612321478114, + "learning_rate": 3.6013746441109887e-07, + "loss": 0.5045, + "step": 53062 + }, + { + "epoch": 0.9169028200165883, + "grad_norm": 0.5305240680841535, + "learning_rate": 3.599886395564067e-07, + "loss": 0.645, + "step": 53063 + }, + { + "epoch": 0.9169200995299972, + "grad_norm": 1.339667037774655, + "learning_rate": 3.598398448949625e-07, + "loss": 0.2725, + "step": 53064 + }, + { + "epoch": 0.9169373790434061, + "grad_norm": 1.7302303725679573, + "learning_rate": 3.596910804272302e-07, + "loss": 0.3641, + "step": 53065 + }, + { + "epoch": 0.916954658556815, + "grad_norm": 1.3443306009061167, + "learning_rate": 3.5954234615368067e-07, + "loss": 0.4492, + "step": 53066 + }, + { + "epoch": 0.9169719380702239, + "grad_norm": 1.3737583997491916, + "learning_rate": 3.5939364207477567e-07, + "loss": 0.2048, + "step": 53067 + }, + { + "epoch": 0.9169892175836328, + "grad_norm": 1.4223158261311653, + "learning_rate": 3.5924496819098375e-07, + "loss": 0.3842, + "step": 53068 + }, + { + "epoch": 0.9170064970970417, + "grad_norm": 1.0115631235564289, + "learning_rate": 3.590963245027679e-07, + "loss": 0.465, + "step": 53069 + }, + { + "epoch": 0.9170237766104506, + "grad_norm": 1.9118096773891706, + "learning_rate": 3.5894771101059657e-07, + "loss": 0.2765, + "step": 53070 + }, + { + "epoch": 0.9170410561238596, + "grad_norm": 1.3048296724165052, + "learning_rate": 3.587991277149305e-07, + "loss": 0.4767, + "step": 53071 + }, + { + "epoch": 0.9170583356372685, + "grad_norm": 2.045088923007064, + "learning_rate": 3.5865057461624163e-07, + "loss": 0.1744, + "step": 53072 + }, + { + "epoch": 0.9170756151506774, + "grad_norm": 1.6103984982027104, + "learning_rate": 3.585020517149895e-07, + "loss": 0.4065, + "step": 53073 + }, + { + "epoch": 0.9170928946640863, + "grad_norm": 2.280886826988837, + "learning_rate": 3.583535590116438e-07, + "loss": 0.361, + "step": 53074 + }, + { + "epoch": 0.9171101741774952, + "grad_norm": 1.3843228732385708, + "learning_rate": 3.5820509650666745e-07, + "loss": 0.3916, + "step": 53075 + }, + { + "epoch": 0.9171274536909041, + "grad_norm": 1.2208820291810756, + "learning_rate": 3.5805666420052456e-07, + "loss": 0.2625, + "step": 53076 + }, + { + "epoch": 0.917144733204313, + "grad_norm": 1.176999226018443, + "learning_rate": 3.5790826209368026e-07, + "loss": 0.3412, + "step": 53077 + }, + { + "epoch": 0.9171620127177219, + "grad_norm": 1.4495672442396552, + "learning_rate": 3.5775989018659974e-07, + "loss": 0.6419, + "step": 53078 + }, + { + "epoch": 0.9171792922311308, + "grad_norm": 1.2077667509807308, + "learning_rate": 3.5761154847975045e-07, + "loss": 0.4233, + "step": 53079 + }, + { + "epoch": 0.9171965717445397, + "grad_norm": 0.9391848473130398, + "learning_rate": 3.574632369735931e-07, + "loss": 0.409, + "step": 53080 + }, + { + "epoch": 0.9172138512579486, + "grad_norm": 1.2444170905651064, + "learning_rate": 3.5731495566859396e-07, + "loss": 0.3536, + "step": 53081 + }, + { + "epoch": 0.9172311307713574, + "grad_norm": 2.20705377947407, + "learning_rate": 3.5716670456521717e-07, + "loss": 0.404, + "step": 53082 + }, + { + "epoch": 0.9172484102847663, + "grad_norm": 0.9793818445098953, + "learning_rate": 3.5701848366392566e-07, + "loss": 0.3555, + "step": 53083 + }, + { + "epoch": 0.9172656897981752, + "grad_norm": 1.0932685038822856, + "learning_rate": 3.5687029296518795e-07, + "loss": 0.3124, + "step": 53084 + }, + { + "epoch": 0.9172829693115842, + "grad_norm": 1.7479133469418566, + "learning_rate": 3.5672213246946254e-07, + "loss": 0.3665, + "step": 53085 + }, + { + "epoch": 0.9173002488249931, + "grad_norm": 1.472661287764429, + "learning_rate": 3.5657400217721907e-07, + "loss": 0.3317, + "step": 53086 + }, + { + "epoch": 0.917317528338402, + "grad_norm": 1.506142566385316, + "learning_rate": 3.5642590208891605e-07, + "loss": 0.306, + "step": 53087 + }, + { + "epoch": 0.9173348078518109, + "grad_norm": 1.2455367857258728, + "learning_rate": 3.5627783220502087e-07, + "loss": 0.441, + "step": 53088 + }, + { + "epoch": 0.9173520873652198, + "grad_norm": 1.027775720926319, + "learning_rate": 3.5612979252599654e-07, + "loss": 0.3925, + "step": 53089 + }, + { + "epoch": 0.9173693668786287, + "grad_norm": 1.7217966385969048, + "learning_rate": 3.5598178305230714e-07, + "loss": 0.4476, + "step": 53090 + }, + { + "epoch": 0.9173866463920376, + "grad_norm": 1.2529161997679894, + "learning_rate": 3.5583380378441446e-07, + "loss": 0.3316, + "step": 53091 + }, + { + "epoch": 0.9174039259054465, + "grad_norm": 1.7062136629461884, + "learning_rate": 3.5568585472278483e-07, + "loss": 0.4384, + "step": 53092 + }, + { + "epoch": 0.9174212054188554, + "grad_norm": 1.3534875349757176, + "learning_rate": 3.55537935867879e-07, + "loss": 0.4933, + "step": 53093 + }, + { + "epoch": 0.9174384849322643, + "grad_norm": 1.1556717861301038, + "learning_rate": 3.5539004722016104e-07, + "loss": 0.3954, + "step": 53094 + }, + { + "epoch": 0.9174557644456732, + "grad_norm": 0.6083435300425817, + "learning_rate": 3.552421887800961e-07, + "loss": 0.3859, + "step": 53095 + }, + { + "epoch": 0.9174730439590821, + "grad_norm": 1.4083341142751324, + "learning_rate": 3.5509436054814384e-07, + "loss": 0.3162, + "step": 53096 + }, + { + "epoch": 0.917490323472491, + "grad_norm": 1.7341649343821608, + "learning_rate": 3.5494656252477054e-07, + "loss": 0.3647, + "step": 53097 + }, + { + "epoch": 0.9175076029859, + "grad_norm": 1.0692415955808572, + "learning_rate": 3.5479879471043585e-07, + "loss": 0.2389, + "step": 53098 + }, + { + "epoch": 0.9175248824993089, + "grad_norm": 1.4960055185687107, + "learning_rate": 3.54651057105605e-07, + "loss": 0.1223, + "step": 53099 + }, + { + "epoch": 0.9175421620127178, + "grad_norm": 1.1779539276185944, + "learning_rate": 3.5450334971073976e-07, + "loss": 0.6852, + "step": 53100 + }, + { + "epoch": 0.9175594415261267, + "grad_norm": 1.4467761322728951, + "learning_rate": 3.543556725263042e-07, + "loss": 0.2921, + "step": 53101 + }, + { + "epoch": 0.9175767210395356, + "grad_norm": 1.6228718759930718, + "learning_rate": 3.542080255527591e-07, + "loss": 0.2766, + "step": 53102 + }, + { + "epoch": 0.9175940005529444, + "grad_norm": 1.135853652013876, + "learning_rate": 3.540604087905675e-07, + "loss": 0.3179, + "step": 53103 + }, + { + "epoch": 0.9176112800663533, + "grad_norm": 1.4363644302051939, + "learning_rate": 3.539128222401933e-07, + "loss": 0.3691, + "step": 53104 + }, + { + "epoch": 0.9176285595797622, + "grad_norm": 1.6219089283058965, + "learning_rate": 3.5376526590209406e-07, + "loss": 0.3903, + "step": 53105 + }, + { + "epoch": 0.9176458390931711, + "grad_norm": 1.0788223533537036, + "learning_rate": 3.5361773977673706e-07, + "loss": 0.4874, + "step": 53106 + }, + { + "epoch": 0.91766311860658, + "grad_norm": 1.8730815177849394, + "learning_rate": 3.5347024386458096e-07, + "loss": 0.4109, + "step": 53107 + }, + { + "epoch": 0.9176803981199889, + "grad_norm": 1.8817036931565758, + "learning_rate": 3.5332277816609195e-07, + "loss": 0.4043, + "step": 53108 + }, + { + "epoch": 0.9176976776333978, + "grad_norm": 0.9660569186177337, + "learning_rate": 3.5317534268172635e-07, + "loss": 0.0998, + "step": 53109 + }, + { + "epoch": 0.9177149571468067, + "grad_norm": 1.2686092159096556, + "learning_rate": 3.530279374119505e-07, + "loss": 0.3374, + "step": 53110 + }, + { + "epoch": 0.9177322366602156, + "grad_norm": 2.4425014479864253, + "learning_rate": 3.528805623572218e-07, + "loss": 0.3762, + "step": 53111 + }, + { + "epoch": 0.9177495161736245, + "grad_norm": 1.1851877126108255, + "learning_rate": 3.527332175180076e-07, + "loss": 0.386, + "step": 53112 + }, + { + "epoch": 0.9177667956870335, + "grad_norm": 1.4140309990854543, + "learning_rate": 3.5258590289476423e-07, + "loss": 0.2923, + "step": 53113 + }, + { + "epoch": 0.9177840752004424, + "grad_norm": 0.7611730743000834, + "learning_rate": 3.524386184879558e-07, + "loss": 0.2467, + "step": 53114 + }, + { + "epoch": 0.9178013547138513, + "grad_norm": 1.296648804166601, + "learning_rate": 3.52291364298043e-07, + "loss": 0.2991, + "step": 53115 + }, + { + "epoch": 0.9178186342272602, + "grad_norm": 1.5930974299723117, + "learning_rate": 3.521441403254855e-07, + "loss": 0.4084, + "step": 53116 + }, + { + "epoch": 0.9178359137406691, + "grad_norm": 1.4187169308151808, + "learning_rate": 3.519969465707462e-07, + "loss": 0.4553, + "step": 53117 + }, + { + "epoch": 0.917853193254078, + "grad_norm": 1.3028637209132061, + "learning_rate": 3.5184978303428596e-07, + "loss": 0.3484, + "step": 53118 + }, + { + "epoch": 0.9178704727674869, + "grad_norm": 0.6971823161977011, + "learning_rate": 3.5170264971656545e-07, + "loss": 0.49, + "step": 53119 + }, + { + "epoch": 0.9178877522808958, + "grad_norm": 1.0212790648753196, + "learning_rate": 3.5155554661804537e-07, + "loss": 0.4644, + "step": 53120 + }, + { + "epoch": 0.9179050317943047, + "grad_norm": 1.3799992196079527, + "learning_rate": 3.514084737391876e-07, + "loss": 0.263, + "step": 53121 + }, + { + "epoch": 0.9179223113077136, + "grad_norm": 2.175446804688997, + "learning_rate": 3.5126143108045075e-07, + "loss": 0.2905, + "step": 53122 + }, + { + "epoch": 0.9179395908211225, + "grad_norm": 0.9981668285888476, + "learning_rate": 3.5111441864229546e-07, + "loss": 0.2721, + "step": 53123 + }, + { + "epoch": 0.9179568703345313, + "grad_norm": 1.2678689249284523, + "learning_rate": 3.509674364251836e-07, + "loss": 0.2858, + "step": 53124 + }, + { + "epoch": 0.9179741498479402, + "grad_norm": 0.8019819131341513, + "learning_rate": 3.5082048442957707e-07, + "loss": 0.2952, + "step": 53125 + }, + { + "epoch": 0.9179914293613491, + "grad_norm": 1.5953713755434442, + "learning_rate": 3.506735626559332e-07, + "loss": 0.4155, + "step": 53126 + }, + { + "epoch": 0.918008708874758, + "grad_norm": 0.974205612036696, + "learning_rate": 3.5052667110471175e-07, + "loss": 0.3657, + "step": 53127 + }, + { + "epoch": 0.918025988388167, + "grad_norm": 1.5044676729469624, + "learning_rate": 3.503798097763744e-07, + "loss": 0.3364, + "step": 53128 + }, + { + "epoch": 0.9180432679015759, + "grad_norm": 1.3550986042476254, + "learning_rate": 3.5023297867138096e-07, + "loss": 0.4276, + "step": 53129 + }, + { + "epoch": 0.9180605474149848, + "grad_norm": 0.9494957597271485, + "learning_rate": 3.5008617779019204e-07, + "loss": 0.7169, + "step": 53130 + }, + { + "epoch": 0.9180778269283937, + "grad_norm": 1.3992715386492365, + "learning_rate": 3.499394071332651e-07, + "loss": 0.5377, + "step": 53131 + }, + { + "epoch": 0.9180951064418026, + "grad_norm": 1.2773649801319307, + "learning_rate": 3.4979266670106317e-07, + "loss": 0.3044, + "step": 53132 + }, + { + "epoch": 0.9181123859552115, + "grad_norm": 2.145586324883025, + "learning_rate": 3.496459564940424e-07, + "loss": 0.4811, + "step": 53133 + }, + { + "epoch": 0.9181296654686204, + "grad_norm": 0.8766672765623684, + "learning_rate": 3.494992765126637e-07, + "loss": 0.2705, + "step": 53134 + }, + { + "epoch": 0.9181469449820293, + "grad_norm": 1.156689657549083, + "learning_rate": 3.493526267573888e-07, + "loss": 0.2422, + "step": 53135 + }, + { + "epoch": 0.9181642244954382, + "grad_norm": 0.9896504422409843, + "learning_rate": 3.492060072286729e-07, + "loss": 0.2376, + "step": 53136 + }, + { + "epoch": 0.9181815040088471, + "grad_norm": 2.6362458306975185, + "learning_rate": 3.4905941792697907e-07, + "loss": 0.3777, + "step": 53137 + }, + { + "epoch": 0.918198783522256, + "grad_norm": 1.0788236337066097, + "learning_rate": 3.489128588527624e-07, + "loss": 0.4161, + "step": 53138 + }, + { + "epoch": 0.9182160630356649, + "grad_norm": 1.2517244926139415, + "learning_rate": 3.487663300064847e-07, + "loss": 0.3579, + "step": 53139 + }, + { + "epoch": 0.9182333425490738, + "grad_norm": 1.5205535766442173, + "learning_rate": 3.4861983138860466e-07, + "loss": 0.2815, + "step": 53140 + }, + { + "epoch": 0.9182506220624828, + "grad_norm": 0.9558216725088784, + "learning_rate": 3.484733629995818e-07, + "loss": 0.3585, + "step": 53141 + }, + { + "epoch": 0.9182679015758917, + "grad_norm": 1.509490889881035, + "learning_rate": 3.483269248398724e-07, + "loss": 0.446, + "step": 53142 + }, + { + "epoch": 0.9182851810893006, + "grad_norm": 1.722005934370232, + "learning_rate": 3.4818051690993727e-07, + "loss": 0.3877, + "step": 53143 + }, + { + "epoch": 0.9183024606027095, + "grad_norm": 1.4098720393536819, + "learning_rate": 3.4803413921023374e-07, + "loss": 0.4475, + "step": 53144 + }, + { + "epoch": 0.9183197401161183, + "grad_norm": 1.927930811497837, + "learning_rate": 3.4788779174122155e-07, + "loss": 0.372, + "step": 53145 + }, + { + "epoch": 0.9183370196295272, + "grad_norm": 1.30331052004708, + "learning_rate": 3.4774147450335803e-07, + "loss": 0.3049, + "step": 53146 + }, + { + "epoch": 0.9183542991429361, + "grad_norm": 1.4063655700870883, + "learning_rate": 3.4759518749710175e-07, + "loss": 0.3773, + "step": 53147 + }, + { + "epoch": 0.918371578656345, + "grad_norm": 1.3667925083990022, + "learning_rate": 3.474489307229112e-07, + "loss": 0.342, + "step": 53148 + }, + { + "epoch": 0.9183888581697539, + "grad_norm": 1.2044941131488975, + "learning_rate": 3.473027041812438e-07, + "loss": 0.2746, + "step": 53149 + }, + { + "epoch": 0.9184061376831628, + "grad_norm": 1.833052541577109, + "learning_rate": 3.4715650787255805e-07, + "loss": 0.4071, + "step": 53150 + }, + { + "epoch": 0.9184234171965717, + "grad_norm": 1.2898921495433886, + "learning_rate": 3.470103417973114e-07, + "loss": 0.4001, + "step": 53151 + }, + { + "epoch": 0.9184406967099806, + "grad_norm": 1.7973338129877376, + "learning_rate": 3.4686420595596126e-07, + "loss": 0.2806, + "step": 53152 + }, + { + "epoch": 0.9184579762233895, + "grad_norm": 1.4013402814494165, + "learning_rate": 3.4671810034896726e-07, + "loss": 0.3446, + "step": 53153 + }, + { + "epoch": 0.9184752557367984, + "grad_norm": 1.5224925960452376, + "learning_rate": 3.465720249767857e-07, + "loss": 0.4878, + "step": 53154 + }, + { + "epoch": 0.9184925352502074, + "grad_norm": 1.6316837058138907, + "learning_rate": 3.464259798398739e-07, + "loss": 0.4619, + "step": 53155 + }, + { + "epoch": 0.9185098147636163, + "grad_norm": 1.329384903567694, + "learning_rate": 3.462799649386894e-07, + "loss": 0.2053, + "step": 53156 + }, + { + "epoch": 0.9185270942770252, + "grad_norm": 1.2159828410676277, + "learning_rate": 3.461339802736896e-07, + "loss": 0.4116, + "step": 53157 + }, + { + "epoch": 0.9185443737904341, + "grad_norm": 1.1695205607297494, + "learning_rate": 3.4598802584533187e-07, + "loss": 0.3867, + "step": 53158 + }, + { + "epoch": 0.918561653303843, + "grad_norm": 1.7887535829587493, + "learning_rate": 3.4584210165407474e-07, + "loss": 0.2748, + "step": 53159 + }, + { + "epoch": 0.9185789328172519, + "grad_norm": 3.5380687491224574, + "learning_rate": 3.4569620770037225e-07, + "loss": 0.1734, + "step": 53160 + }, + { + "epoch": 0.9185962123306608, + "grad_norm": 1.8543798766488897, + "learning_rate": 3.455503439846841e-07, + "loss": 0.3644, + "step": 53161 + }, + { + "epoch": 0.9186134918440697, + "grad_norm": 1.2026658892907258, + "learning_rate": 3.454045105074644e-07, + "loss": 0.6491, + "step": 53162 + }, + { + "epoch": 0.9186307713574786, + "grad_norm": 1.2418987785497138, + "learning_rate": 3.4525870726917265e-07, + "loss": 0.2046, + "step": 53163 + }, + { + "epoch": 0.9186480508708875, + "grad_norm": 1.1587820837231526, + "learning_rate": 3.4511293427026307e-07, + "loss": 0.3142, + "step": 53164 + }, + { + "epoch": 0.9186653303842964, + "grad_norm": 1.4438887022974665, + "learning_rate": 3.4496719151119516e-07, + "loss": 0.413, + "step": 53165 + }, + { + "epoch": 0.9186826098977052, + "grad_norm": 1.688579283446505, + "learning_rate": 3.4482147899242425e-07, + "loss": 0.454, + "step": 53166 + }, + { + "epoch": 0.9186998894111141, + "grad_norm": 1.540759751382041, + "learning_rate": 3.446757967144043e-07, + "loss": 0.2327, + "step": 53167 + }, + { + "epoch": 0.918717168924523, + "grad_norm": 1.040800686574895, + "learning_rate": 3.4453014467759393e-07, + "loss": 0.2, + "step": 53168 + }, + { + "epoch": 0.918734448437932, + "grad_norm": 1.4265082989354065, + "learning_rate": 3.443845228824494e-07, + "loss": 0.3505, + "step": 53169 + }, + { + "epoch": 0.9187517279513409, + "grad_norm": 1.6012720122156174, + "learning_rate": 3.44238931329427e-07, + "loss": 0.3381, + "step": 53170 + }, + { + "epoch": 0.9187690074647498, + "grad_norm": 1.148566508529156, + "learning_rate": 3.440933700189808e-07, + "loss": 0.1996, + "step": 53171 + }, + { + "epoch": 0.9187862869781587, + "grad_norm": 1.6517062462451584, + "learning_rate": 3.4394783895156936e-07, + "loss": 0.5075, + "step": 53172 + }, + { + "epoch": 0.9188035664915676, + "grad_norm": 0.9753484323919993, + "learning_rate": 3.4380233812764565e-07, + "loss": 0.2882, + "step": 53173 + }, + { + "epoch": 0.9188208460049765, + "grad_norm": 1.0418405186547683, + "learning_rate": 3.43656867547667e-07, + "loss": 0.2654, + "step": 53174 + }, + { + "epoch": 0.9188381255183854, + "grad_norm": 1.0246371786732482, + "learning_rate": 3.4351142721208876e-07, + "loss": 0.1982, + "step": 53175 + }, + { + "epoch": 0.9188554050317943, + "grad_norm": 1.1775930455667443, + "learning_rate": 3.433660171213682e-07, + "loss": 0.3334, + "step": 53176 + }, + { + "epoch": 0.9188726845452032, + "grad_norm": 1.3877421219379154, + "learning_rate": 3.432206372759594e-07, + "loss": 0.5439, + "step": 53177 + }, + { + "epoch": 0.9188899640586121, + "grad_norm": 1.2836450490470446, + "learning_rate": 3.430752876763155e-07, + "loss": 0.3517, + "step": 53178 + }, + { + "epoch": 0.918907243572021, + "grad_norm": 1.7188771150838598, + "learning_rate": 3.4292996832289373e-07, + "loss": 0.3175, + "step": 53179 + }, + { + "epoch": 0.9189245230854299, + "grad_norm": 0.9773008678009103, + "learning_rate": 3.4278467921615046e-07, + "loss": 0.5355, + "step": 53180 + }, + { + "epoch": 0.9189418025988388, + "grad_norm": 1.4998747517899431, + "learning_rate": 3.4263942035653974e-07, + "loss": 0.3619, + "step": 53181 + }, + { + "epoch": 0.9189590821122477, + "grad_norm": 0.7361180849031324, + "learning_rate": 3.424941917445146e-07, + "loss": 0.3257, + "step": 53182 + }, + { + "epoch": 0.9189763616256567, + "grad_norm": 0.9837546897313209, + "learning_rate": 3.423489933805335e-07, + "loss": 0.3009, + "step": 53183 + }, + { + "epoch": 0.9189936411390656, + "grad_norm": 1.0135680120941226, + "learning_rate": 3.4220382526504724e-07, + "loss": 0.3811, + "step": 53184 + }, + { + "epoch": 0.9190109206524745, + "grad_norm": 1.3413039173794332, + "learning_rate": 3.420586873985132e-07, + "loss": 0.2475, + "step": 53185 + }, + { + "epoch": 0.9190282001658834, + "grad_norm": 1.70198010584932, + "learning_rate": 3.419135797813855e-07, + "loss": 0.268, + "step": 53186 + }, + { + "epoch": 0.9190454796792922, + "grad_norm": 1.7677717872072791, + "learning_rate": 3.4176850241412043e-07, + "loss": 0.4059, + "step": 53187 + }, + { + "epoch": 0.9190627591927011, + "grad_norm": 1.4937994998254691, + "learning_rate": 3.416234552971687e-07, + "loss": 0.2711, + "step": 53188 + }, + { + "epoch": 0.91908003870611, + "grad_norm": 1.2413321794503436, + "learning_rate": 3.414784384309866e-07, + "loss": 0.2169, + "step": 53189 + }, + { + "epoch": 0.9190973182195189, + "grad_norm": 1.2969228973023192, + "learning_rate": 3.413334518160283e-07, + "loss": 0.3055, + "step": 53190 + }, + { + "epoch": 0.9191145977329278, + "grad_norm": 1.7885509112134352, + "learning_rate": 3.411884954527467e-07, + "loss": 0.5971, + "step": 53191 + }, + { + "epoch": 0.9191318772463367, + "grad_norm": 1.3894898621986682, + "learning_rate": 3.4104356934159697e-07, + "loss": 0.4466, + "step": 53192 + }, + { + "epoch": 0.9191491567597456, + "grad_norm": 1.304172775369914, + "learning_rate": 3.408986734830322e-07, + "loss": 0.216, + "step": 53193 + }, + { + "epoch": 0.9191664362731545, + "grad_norm": 1.2948826050514781, + "learning_rate": 3.407538078775086e-07, + "loss": 0.3371, + "step": 53194 + }, + { + "epoch": 0.9191837157865634, + "grad_norm": 1.1601972753701015, + "learning_rate": 3.4060897252547685e-07, + "loss": 0.5579, + "step": 53195 + }, + { + "epoch": 0.9192009952999723, + "grad_norm": 1.9718329055769477, + "learning_rate": 3.404641674273934e-07, + "loss": 0.2577, + "step": 53196 + }, + { + "epoch": 0.9192182748133813, + "grad_norm": 0.9375592149181792, + "learning_rate": 3.403193925837089e-07, + "loss": 0.3036, + "step": 53197 + }, + { + "epoch": 0.9192355543267902, + "grad_norm": 1.2493406022803126, + "learning_rate": 3.4017464799487755e-07, + "loss": 0.3673, + "step": 53198 + }, + { + "epoch": 0.9192528338401991, + "grad_norm": 1.3049885951807993, + "learning_rate": 3.400299336613555e-07, + "loss": 0.2363, + "step": 53199 + }, + { + "epoch": 0.919270113353608, + "grad_norm": 1.2395818328538581, + "learning_rate": 3.398852495835914e-07, + "loss": 0.327, + "step": 53200 + }, + { + "epoch": 0.9192873928670169, + "grad_norm": 0.6467782715755188, + "learning_rate": 3.3974059576204255e-07, + "loss": 0.6168, + "step": 53201 + }, + { + "epoch": 0.9193046723804258, + "grad_norm": 1.5228603973428452, + "learning_rate": 3.395959721971598e-07, + "loss": 0.2759, + "step": 53202 + }, + { + "epoch": 0.9193219518938347, + "grad_norm": 1.4847699892073858, + "learning_rate": 3.3945137888939604e-07, + "loss": 0.4437, + "step": 53203 + }, + { + "epoch": 0.9193392314072436, + "grad_norm": 1.097507142900102, + "learning_rate": 3.3930681583920544e-07, + "loss": 0.403, + "step": 53204 + }, + { + "epoch": 0.9193565109206525, + "grad_norm": 1.2124616762361045, + "learning_rate": 3.391622830470398e-07, + "loss": 0.281, + "step": 53205 + }, + { + "epoch": 0.9193737904340614, + "grad_norm": 2.014043733678451, + "learning_rate": 3.390177805133521e-07, + "loss": 0.2609, + "step": 53206 + }, + { + "epoch": 0.9193910699474703, + "grad_norm": 1.4118236457415434, + "learning_rate": 3.388733082385964e-07, + "loss": 0.3496, + "step": 53207 + }, + { + "epoch": 0.9194083494608791, + "grad_norm": 0.8048822827573536, + "learning_rate": 3.387288662232213e-07, + "loss": 0.2035, + "step": 53208 + }, + { + "epoch": 0.919425628974288, + "grad_norm": 1.5492762763371457, + "learning_rate": 3.3858445446768195e-07, + "loss": 0.3055, + "step": 53209 + }, + { + "epoch": 0.9194429084876969, + "grad_norm": 1.4458864244250207, + "learning_rate": 3.384400729724324e-07, + "loss": 0.394, + "step": 53210 + }, + { + "epoch": 0.9194601880011058, + "grad_norm": 1.0017361086619798, + "learning_rate": 3.3829572173792123e-07, + "loss": 0.3824, + "step": 53211 + }, + { + "epoch": 0.9194774675145148, + "grad_norm": 0.9534279191326493, + "learning_rate": 3.381514007646025e-07, + "loss": 0.1622, + "step": 53212 + }, + { + "epoch": 0.9194947470279237, + "grad_norm": 1.7094126937545395, + "learning_rate": 3.3800711005292806e-07, + "loss": 0.3637, + "step": 53213 + }, + { + "epoch": 0.9195120265413326, + "grad_norm": 1.7510701706561793, + "learning_rate": 3.378628496033487e-07, + "loss": 0.264, + "step": 53214 + }, + { + "epoch": 0.9195293060547415, + "grad_norm": 1.4628068023512872, + "learning_rate": 3.3771861941631733e-07, + "loss": 0.7006, + "step": 53215 + }, + { + "epoch": 0.9195465855681504, + "grad_norm": 1.8418203401717765, + "learning_rate": 3.37574419492287e-07, + "loss": 0.3173, + "step": 53216 + }, + { + "epoch": 0.9195638650815593, + "grad_norm": 1.5268330114279642, + "learning_rate": 3.374302498317061e-07, + "loss": 0.4038, + "step": 53217 + }, + { + "epoch": 0.9195811445949682, + "grad_norm": 1.3697385244423794, + "learning_rate": 3.372861104350289e-07, + "loss": 0.2765, + "step": 53218 + }, + { + "epoch": 0.9195984241083771, + "grad_norm": 1.3314975452832933, + "learning_rate": 3.3714200130270604e-07, + "loss": 0.2434, + "step": 53219 + }, + { + "epoch": 0.919615703621786, + "grad_norm": 1.431897710316147, + "learning_rate": 3.3699792243518826e-07, + "loss": 0.4395, + "step": 53220 + }, + { + "epoch": 0.9196329831351949, + "grad_norm": 2.4237880778122793, + "learning_rate": 3.3685387383292855e-07, + "loss": 0.3415, + "step": 53221 + }, + { + "epoch": 0.9196502626486038, + "grad_norm": 1.7281002031823387, + "learning_rate": 3.3670985549637545e-07, + "loss": 0.223, + "step": 53222 + }, + { + "epoch": 0.9196675421620127, + "grad_norm": 2.2366801678999146, + "learning_rate": 3.3656586742598197e-07, + "loss": 0.3154, + "step": 53223 + }, + { + "epoch": 0.9196848216754216, + "grad_norm": 1.6143839146539962, + "learning_rate": 3.364219096221988e-07, + "loss": 0.806, + "step": 53224 + }, + { + "epoch": 0.9197021011888306, + "grad_norm": 1.5025083243177664, + "learning_rate": 3.3627798208547555e-07, + "loss": 0.4745, + "step": 53225 + }, + { + "epoch": 0.9197193807022395, + "grad_norm": 1.347486752946842, + "learning_rate": 3.361340848162653e-07, + "loss": 0.2313, + "step": 53226 + }, + { + "epoch": 0.9197366602156484, + "grad_norm": 1.1599550311123596, + "learning_rate": 3.359902178150176e-07, + "loss": 0.4119, + "step": 53227 + }, + { + "epoch": 0.9197539397290573, + "grad_norm": 1.1196403982579168, + "learning_rate": 3.3584638108218325e-07, + "loss": 0.4461, + "step": 53228 + }, + { + "epoch": 0.9197712192424662, + "grad_norm": 1.7194808608834398, + "learning_rate": 3.3570257461821074e-07, + "loss": 0.2716, + "step": 53229 + }, + { + "epoch": 0.919788498755875, + "grad_norm": 1.3431393206082132, + "learning_rate": 3.355587984235542e-07, + "loss": 0.3541, + "step": 53230 + }, + { + "epoch": 0.9198057782692839, + "grad_norm": 0.7931968806042048, + "learning_rate": 3.35415052498661e-07, + "loss": 0.2834, + "step": 53231 + }, + { + "epoch": 0.9198230577826928, + "grad_norm": 1.2519077141003554, + "learning_rate": 3.352713368439819e-07, + "loss": 0.3372, + "step": 53232 + }, + { + "epoch": 0.9198403372961017, + "grad_norm": 1.837093654622875, + "learning_rate": 3.3512765145996776e-07, + "loss": 0.4619, + "step": 53233 + }, + { + "epoch": 0.9198576168095106, + "grad_norm": 1.6110476156115925, + "learning_rate": 3.3498399634706915e-07, + "loss": 0.2576, + "step": 53234 + }, + { + "epoch": 0.9198748963229195, + "grad_norm": 0.9861067676778985, + "learning_rate": 3.348403715057336e-07, + "loss": 0.2954, + "step": 53235 + }, + { + "epoch": 0.9198921758363284, + "grad_norm": 0.6028575646078063, + "learning_rate": 3.346967769364151e-07, + "loss": 0.6691, + "step": 53236 + }, + { + "epoch": 0.9199094553497373, + "grad_norm": 1.2109008221819784, + "learning_rate": 3.345532126395579e-07, + "loss": 0.2656, + "step": 53237 + }, + { + "epoch": 0.9199267348631462, + "grad_norm": 0.8199451269708375, + "learning_rate": 3.344096786156159e-07, + "loss": 0.5285, + "step": 53238 + }, + { + "epoch": 0.9199440143765552, + "grad_norm": 1.4404459500228417, + "learning_rate": 3.342661748650378e-07, + "loss": 0.4622, + "step": 53239 + }, + { + "epoch": 0.9199612938899641, + "grad_norm": 1.9292410418854549, + "learning_rate": 3.3412270138827194e-07, + "loss": 0.4304, + "step": 53240 + }, + { + "epoch": 0.919978573403373, + "grad_norm": 1.4386989873914489, + "learning_rate": 3.3397925818576924e-07, + "loss": 0.3205, + "step": 53241 + }, + { + "epoch": 0.9199958529167819, + "grad_norm": 1.2303530031145231, + "learning_rate": 3.3383584525797706e-07, + "loss": 0.3628, + "step": 53242 + }, + { + "epoch": 0.9200131324301908, + "grad_norm": 0.7763513181941608, + "learning_rate": 3.336924626053462e-07, + "loss": 0.5476, + "step": 53243 + }, + { + "epoch": 0.9200304119435997, + "grad_norm": 0.9058131148773555, + "learning_rate": 3.3354911022832394e-07, + "loss": 0.2737, + "step": 53244 + }, + { + "epoch": 0.9200476914570086, + "grad_norm": 1.4784885630533926, + "learning_rate": 3.3340578812736224e-07, + "loss": 0.4947, + "step": 53245 + }, + { + "epoch": 0.9200649709704175, + "grad_norm": 1.3393729490478476, + "learning_rate": 3.332624963029074e-07, + "loss": 0.4198, + "step": 53246 + }, + { + "epoch": 0.9200822504838264, + "grad_norm": 1.4220538800976499, + "learning_rate": 3.3311923475540906e-07, + "loss": 0.2871, + "step": 53247 + }, + { + "epoch": 0.9200995299972353, + "grad_norm": 1.4747315188329728, + "learning_rate": 3.329760034853158e-07, + "loss": 0.4181, + "step": 53248 + }, + { + "epoch": 0.9201168095106442, + "grad_norm": 1.2917380181692841, + "learning_rate": 3.3283280249307603e-07, + "loss": 0.3634, + "step": 53249 + }, + { + "epoch": 0.9201340890240531, + "grad_norm": 1.7340270781402427, + "learning_rate": 3.326896317791406e-07, + "loss": 0.3111, + "step": 53250 + }, + { + "epoch": 0.9201513685374619, + "grad_norm": 1.1073241000895702, + "learning_rate": 3.325464913439536e-07, + "loss": 0.3412, + "step": 53251 + }, + { + "epoch": 0.9201686480508708, + "grad_norm": 1.0782014605871055, + "learning_rate": 3.324033811879679e-07, + "loss": 0.2915, + "step": 53252 + }, + { + "epoch": 0.9201859275642797, + "grad_norm": 1.6088013987434202, + "learning_rate": 3.322603013116277e-07, + "loss": 0.2854, + "step": 53253 + }, + { + "epoch": 0.9202032070776887, + "grad_norm": 1.2266874682869149, + "learning_rate": 3.3211725171538255e-07, + "loss": 0.5021, + "step": 53254 + }, + { + "epoch": 0.9202204865910976, + "grad_norm": 1.4343688389928513, + "learning_rate": 3.3197423239968216e-07, + "loss": 0.4499, + "step": 53255 + }, + { + "epoch": 0.9202377661045065, + "grad_norm": 1.8291385385474057, + "learning_rate": 3.318312433649729e-07, + "loss": 0.4046, + "step": 53256 + }, + { + "epoch": 0.9202550456179154, + "grad_norm": 1.3710369784914007, + "learning_rate": 3.316882846117031e-07, + "loss": 0.3453, + "step": 53257 + }, + { + "epoch": 0.9202723251313243, + "grad_norm": 1.576192037300144, + "learning_rate": 3.3154535614032034e-07, + "loss": 0.3054, + "step": 53258 + }, + { + "epoch": 0.9202896046447332, + "grad_norm": 1.3506883428693277, + "learning_rate": 3.3140245795127203e-07, + "loss": 0.4551, + "step": 53259 + }, + { + "epoch": 0.9203068841581421, + "grad_norm": 1.5573865983720634, + "learning_rate": 3.312595900450055e-07, + "loss": 0.207, + "step": 53260 + }, + { + "epoch": 0.920324163671551, + "grad_norm": 2.0659494378654686, + "learning_rate": 3.311167524219705e-07, + "loss": 0.4441, + "step": 53261 + }, + { + "epoch": 0.9203414431849599, + "grad_norm": 1.5436001510844968, + "learning_rate": 3.3097394508261104e-07, + "loss": 0.2613, + "step": 53262 + }, + { + "epoch": 0.9203587226983688, + "grad_norm": 1.3594422795502275, + "learning_rate": 3.308311680273768e-07, + "loss": 0.2251, + "step": 53263 + }, + { + "epoch": 0.9203760022117777, + "grad_norm": 1.522509360935168, + "learning_rate": 3.3068842125671297e-07, + "loss": 0.4112, + "step": 53264 + }, + { + "epoch": 0.9203932817251866, + "grad_norm": 0.9657646037226644, + "learning_rate": 3.3054570477106917e-07, + "loss": 0.3986, + "step": 53265 + }, + { + "epoch": 0.9204105612385955, + "grad_norm": 1.740197799455382, + "learning_rate": 3.304030185708895e-07, + "loss": 0.2641, + "step": 53266 + }, + { + "epoch": 0.9204278407520045, + "grad_norm": 1.00099061556612, + "learning_rate": 3.3026036265662474e-07, + "loss": 0.3391, + "step": 53267 + }, + { + "epoch": 0.9204451202654134, + "grad_norm": 1.2820942804583597, + "learning_rate": 3.3011773702871785e-07, + "loss": 0.2971, + "step": 53268 + }, + { + "epoch": 0.9204623997788223, + "grad_norm": 0.9599584702802001, + "learning_rate": 3.2997514168761737e-07, + "loss": 0.2814, + "step": 53269 + }, + { + "epoch": 0.9204796792922312, + "grad_norm": 1.1344517812449, + "learning_rate": 3.2983257663377065e-07, + "loss": 0.4619, + "step": 53270 + }, + { + "epoch": 0.9204969588056401, + "grad_norm": 1.1637844479430692, + "learning_rate": 3.2969004186762186e-07, + "loss": 0.3347, + "step": 53271 + }, + { + "epoch": 0.9205142383190489, + "grad_norm": 1.6872378434409157, + "learning_rate": 3.295475373896184e-07, + "loss": 0.3479, + "step": 53272 + }, + { + "epoch": 0.9205315178324578, + "grad_norm": 2.2654891742579637, + "learning_rate": 3.294050632002077e-07, + "loss": 0.3741, + "step": 53273 + }, + { + "epoch": 0.9205487973458667, + "grad_norm": 0.894483563609294, + "learning_rate": 3.2926261929983605e-07, + "loss": 0.3551, + "step": 53274 + }, + { + "epoch": 0.9205660768592756, + "grad_norm": 1.4568914351787954, + "learning_rate": 3.291202056889475e-07, + "loss": 0.3141, + "step": 53275 + }, + { + "epoch": 0.9205833563726845, + "grad_norm": 1.1865980623073733, + "learning_rate": 3.2897782236799067e-07, + "loss": 0.3574, + "step": 53276 + }, + { + "epoch": 0.9206006358860934, + "grad_norm": 1.0379585365365933, + "learning_rate": 3.288354693374096e-07, + "loss": 0.3609, + "step": 53277 + }, + { + "epoch": 0.9206179153995023, + "grad_norm": 1.0433898435408682, + "learning_rate": 3.286931465976506e-07, + "loss": 0.3294, + "step": 53278 + }, + { + "epoch": 0.9206351949129112, + "grad_norm": 1.309579801457562, + "learning_rate": 3.2855085414916e-07, + "loss": 0.5825, + "step": 53279 + }, + { + "epoch": 0.9206524744263201, + "grad_norm": 1.3877163524502454, + "learning_rate": 3.2840859199238294e-07, + "loss": 0.4341, + "step": 53280 + }, + { + "epoch": 0.920669753939729, + "grad_norm": 1.0037580247644142, + "learning_rate": 3.282663601277669e-07, + "loss": 0.3737, + "step": 53281 + }, + { + "epoch": 0.920687033453138, + "grad_norm": 1.8395998210168796, + "learning_rate": 3.2812415855575266e-07, + "loss": 0.3919, + "step": 53282 + }, + { + "epoch": 0.9207043129665469, + "grad_norm": 1.3962324665909458, + "learning_rate": 3.279819872767898e-07, + "loss": 0.2627, + "step": 53283 + }, + { + "epoch": 0.9207215924799558, + "grad_norm": 1.5323531502328744, + "learning_rate": 3.278398462913224e-07, + "loss": 0.3225, + "step": 53284 + }, + { + "epoch": 0.9207388719933647, + "grad_norm": 0.8992376511225437, + "learning_rate": 3.276977355997968e-07, + "loss": 0.3328, + "step": 53285 + }, + { + "epoch": 0.9207561515067736, + "grad_norm": 1.6245764160899703, + "learning_rate": 3.275556552026549e-07, + "loss": 0.2251, + "step": 53286 + }, + { + "epoch": 0.9207734310201825, + "grad_norm": 1.3232180469065422, + "learning_rate": 3.2741360510034624e-07, + "loss": 0.4272, + "step": 53287 + }, + { + "epoch": 0.9207907105335914, + "grad_norm": 1.8403810980372897, + "learning_rate": 3.272715852933117e-07, + "loss": 0.2826, + "step": 53288 + }, + { + "epoch": 0.9208079900470003, + "grad_norm": 1.1773285519364836, + "learning_rate": 3.2712959578199644e-07, + "loss": 0.5509, + "step": 53289 + }, + { + "epoch": 0.9208252695604092, + "grad_norm": 1.6082641603799182, + "learning_rate": 3.2698763656684895e-07, + "loss": 0.3722, + "step": 53290 + }, + { + "epoch": 0.9208425490738181, + "grad_norm": 0.7125378631231706, + "learning_rate": 3.2684570764831005e-07, + "loss": 0.8403, + "step": 53291 + }, + { + "epoch": 0.920859828587227, + "grad_norm": 0.5908551865847046, + "learning_rate": 3.26703809026826e-07, + "loss": 0.578, + "step": 53292 + }, + { + "epoch": 0.9208771081006358, + "grad_norm": 0.8973412828776911, + "learning_rate": 3.2656194070283975e-07, + "loss": 0.3441, + "step": 53293 + }, + { + "epoch": 0.9208943876140447, + "grad_norm": 1.1426139460128666, + "learning_rate": 3.264201026767977e-07, + "loss": 0.3336, + "step": 53294 + }, + { + "epoch": 0.9209116671274536, + "grad_norm": 1.3711057919125413, + "learning_rate": 3.2627829494914164e-07, + "loss": 0.1622, + "step": 53295 + }, + { + "epoch": 0.9209289466408626, + "grad_norm": 0.898848866913132, + "learning_rate": 3.2613651752031905e-07, + "loss": 0.366, + "step": 53296 + }, + { + "epoch": 0.9209462261542715, + "grad_norm": 3.0377825553363658, + "learning_rate": 3.2599477039077067e-07, + "loss": 0.3035, + "step": 53297 + }, + { + "epoch": 0.9209635056676804, + "grad_norm": 1.1278966847217098, + "learning_rate": 3.258530535609428e-07, + "loss": 0.3087, + "step": 53298 + }, + { + "epoch": 0.9209807851810893, + "grad_norm": 1.1889928025517167, + "learning_rate": 3.257113670312773e-07, + "loss": 0.5517, + "step": 53299 + }, + { + "epoch": 0.9209980646944982, + "grad_norm": 1.2359659191044614, + "learning_rate": 3.255697108022193e-07, + "loss": 0.3301, + "step": 53300 + }, + { + "epoch": 0.9210153442079071, + "grad_norm": 1.6428961975856244, + "learning_rate": 3.254280848742142e-07, + "loss": 0.3571, + "step": 53301 + }, + { + "epoch": 0.921032623721316, + "grad_norm": 1.5757149762027218, + "learning_rate": 3.2528648924770144e-07, + "loss": 0.3649, + "step": 53302 + }, + { + "epoch": 0.9210499032347249, + "grad_norm": 1.2139505223472218, + "learning_rate": 3.2514492392312745e-07, + "loss": 0.4677, + "step": 53303 + }, + { + "epoch": 0.9210671827481338, + "grad_norm": 1.1754232577629886, + "learning_rate": 3.2500338890093407e-07, + "loss": 0.275, + "step": 53304 + }, + { + "epoch": 0.9210844622615427, + "grad_norm": 1.0634103306128517, + "learning_rate": 3.248618841815654e-07, + "loss": 0.2695, + "step": 53305 + }, + { + "epoch": 0.9211017417749516, + "grad_norm": 0.5526651916277974, + "learning_rate": 3.247204097654644e-07, + "loss": 0.6641, + "step": 53306 + }, + { + "epoch": 0.9211190212883605, + "grad_norm": 1.0017308089546093, + "learning_rate": 3.2457896565307624e-07, + "loss": 0.2053, + "step": 53307 + }, + { + "epoch": 0.9211363008017694, + "grad_norm": 1.0229200298514285, + "learning_rate": 3.2443755184483947e-07, + "loss": 0.3224, + "step": 53308 + }, + { + "epoch": 0.9211535803151784, + "grad_norm": 1.1343337012084598, + "learning_rate": 3.242961683412016e-07, + "loss": 0.2697, + "step": 53309 + }, + { + "epoch": 0.9211708598285873, + "grad_norm": 1.2678297266484115, + "learning_rate": 3.241548151426044e-07, + "loss": 0.2547, + "step": 53310 + }, + { + "epoch": 0.9211881393419962, + "grad_norm": 1.474712999571378, + "learning_rate": 3.2401349224948754e-07, + "loss": 0.2386, + "step": 53311 + }, + { + "epoch": 0.9212054188554051, + "grad_norm": 1.580540617861471, + "learning_rate": 3.238721996622951e-07, + "loss": 0.4686, + "step": 53312 + }, + { + "epoch": 0.921222698368814, + "grad_norm": 2.1921454414419026, + "learning_rate": 3.237309373814712e-07, + "loss": 0.4386, + "step": 53313 + }, + { + "epoch": 0.9212399778822228, + "grad_norm": 1.1848428523354502, + "learning_rate": 3.2358970540745884e-07, + "loss": 0.4828, + "step": 53314 + }, + { + "epoch": 0.9212572573956317, + "grad_norm": 1.2461400708978114, + "learning_rate": 3.2344850374069645e-07, + "loss": 0.3831, + "step": 53315 + }, + { + "epoch": 0.9212745369090406, + "grad_norm": 1.2793690272158706, + "learning_rate": 3.233073323816316e-07, + "loss": 0.3719, + "step": 53316 + }, + { + "epoch": 0.9212918164224495, + "grad_norm": 2.4502963248375917, + "learning_rate": 3.2316619133070157e-07, + "loss": 0.2702, + "step": 53317 + }, + { + "epoch": 0.9213090959358584, + "grad_norm": 2.2565005661640427, + "learning_rate": 3.2302508058835056e-07, + "loss": 1.097, + "step": 53318 + }, + { + "epoch": 0.9213263754492673, + "grad_norm": 0.749558121613384, + "learning_rate": 3.228840001550193e-07, + "loss": 0.238, + "step": 53319 + }, + { + "epoch": 0.9213436549626762, + "grad_norm": 1.3462117303492622, + "learning_rate": 3.2274295003115297e-07, + "loss": 0.6458, + "step": 53320 + }, + { + "epoch": 0.9213609344760851, + "grad_norm": 1.9187529135733945, + "learning_rate": 3.226019302171912e-07, + "loss": 0.4557, + "step": 53321 + }, + { + "epoch": 0.921378213989494, + "grad_norm": 0.8714553192629563, + "learning_rate": 3.224609407135748e-07, + "loss": 0.299, + "step": 53322 + }, + { + "epoch": 0.921395493502903, + "grad_norm": 1.4551027896671118, + "learning_rate": 3.223199815207456e-07, + "loss": 0.1711, + "step": 53323 + }, + { + "epoch": 0.9214127730163119, + "grad_norm": 0.6662868165822217, + "learning_rate": 3.221790526391455e-07, + "loss": 0.3505, + "step": 53324 + }, + { + "epoch": 0.9214300525297208, + "grad_norm": 2.91699801605259, + "learning_rate": 3.2203815406921854e-07, + "loss": 0.3104, + "step": 53325 + }, + { + "epoch": 0.9214473320431297, + "grad_norm": 1.3491534454370437, + "learning_rate": 3.218972858114011e-07, + "loss": 0.2785, + "step": 53326 + }, + { + "epoch": 0.9214646115565386, + "grad_norm": 1.5225618956812341, + "learning_rate": 3.217564478661384e-07, + "loss": 0.2463, + "step": 53327 + }, + { + "epoch": 0.9214818910699475, + "grad_norm": 1.0794757901533663, + "learning_rate": 3.216156402338688e-07, + "loss": 0.4867, + "step": 53328 + }, + { + "epoch": 0.9214991705833564, + "grad_norm": 1.8617129415658376, + "learning_rate": 3.214748629150344e-07, + "loss": 0.4413, + "step": 53329 + }, + { + "epoch": 0.9215164500967653, + "grad_norm": 1.90699707892049, + "learning_rate": 3.213341159100769e-07, + "loss": 0.3392, + "step": 53330 + }, + { + "epoch": 0.9215337296101742, + "grad_norm": 1.5578754377918342, + "learning_rate": 3.2119339921943715e-07, + "loss": 0.3385, + "step": 53331 + }, + { + "epoch": 0.9215510091235831, + "grad_norm": 1.0266891074374154, + "learning_rate": 3.210527128435559e-07, + "loss": 0.318, + "step": 53332 + }, + { + "epoch": 0.921568288636992, + "grad_norm": 2.236653422118237, + "learning_rate": 3.209120567828705e-07, + "loss": 0.2786, + "step": 53333 + }, + { + "epoch": 0.9215855681504009, + "grad_norm": 1.698024325670986, + "learning_rate": 3.2077143103782514e-07, + "loss": 0.5735, + "step": 53334 + }, + { + "epoch": 0.9216028476638097, + "grad_norm": 1.082916429120275, + "learning_rate": 3.2063083560885945e-07, + "loss": 0.3276, + "step": 53335 + }, + { + "epoch": 0.9216201271772186, + "grad_norm": 1.3572529285749881, + "learning_rate": 3.2049027049641415e-07, + "loss": 0.2719, + "step": 53336 + }, + { + "epoch": 0.9216374066906275, + "grad_norm": 1.071721450182013, + "learning_rate": 3.2034973570092884e-07, + "loss": 0.2703, + "step": 53337 + }, + { + "epoch": 0.9216546862040365, + "grad_norm": 1.7715023903957166, + "learning_rate": 3.202092312228444e-07, + "loss": 0.4424, + "step": 53338 + }, + { + "epoch": 0.9216719657174454, + "grad_norm": 0.8531616650263908, + "learning_rate": 3.200687570625982e-07, + "loss": 0.4308, + "step": 53339 + }, + { + "epoch": 0.9216892452308543, + "grad_norm": 1.2696871671298113, + "learning_rate": 3.1992831322063324e-07, + "loss": 0.3599, + "step": 53340 + }, + { + "epoch": 0.9217065247442632, + "grad_norm": 1.0579214275099118, + "learning_rate": 3.197878996973891e-07, + "loss": 0.2708, + "step": 53341 + }, + { + "epoch": 0.9217238042576721, + "grad_norm": 2.3550041763569545, + "learning_rate": 3.196475164933044e-07, + "loss": 0.4384, + "step": 53342 + }, + { + "epoch": 0.921741083771081, + "grad_norm": 1.708588648610698, + "learning_rate": 3.195071636088198e-07, + "loss": 0.3382, + "step": 53343 + }, + { + "epoch": 0.9217583632844899, + "grad_norm": 1.5566477056416432, + "learning_rate": 3.19366841044374e-07, + "loss": 0.3262, + "step": 53344 + }, + { + "epoch": 0.9217756427978988, + "grad_norm": 0.9794796770463224, + "learning_rate": 3.192265488004065e-07, + "loss": 0.4659, + "step": 53345 + }, + { + "epoch": 0.9217929223113077, + "grad_norm": 1.1364971819383305, + "learning_rate": 3.190862868773581e-07, + "loss": 0.348, + "step": 53346 + }, + { + "epoch": 0.9218102018247166, + "grad_norm": 1.0157608313956736, + "learning_rate": 3.1894605527566737e-07, + "loss": 0.2944, + "step": 53347 + }, + { + "epoch": 0.9218274813381255, + "grad_norm": 1.111548765737653, + "learning_rate": 3.188058539957717e-07, + "loss": 0.3051, + "step": 53348 + }, + { + "epoch": 0.9218447608515344, + "grad_norm": 0.9055769256505727, + "learning_rate": 3.186656830381141e-07, + "loss": 0.1973, + "step": 53349 + }, + { + "epoch": 0.9218620403649433, + "grad_norm": 2.0424854614557075, + "learning_rate": 3.185255424031297e-07, + "loss": 0.3365, + "step": 53350 + }, + { + "epoch": 0.9218793198783523, + "grad_norm": 1.4503625669351483, + "learning_rate": 3.183854320912605e-07, + "loss": 0.8715, + "step": 53351 + }, + { + "epoch": 0.9218965993917612, + "grad_norm": 1.4454626823084575, + "learning_rate": 3.1824535210294273e-07, + "loss": 0.5456, + "step": 53352 + }, + { + "epoch": 0.9219138789051701, + "grad_norm": 1.139423916417973, + "learning_rate": 3.1810530243861604e-07, + "loss": 0.3072, + "step": 53353 + }, + { + "epoch": 0.921931158418579, + "grad_norm": 1.0413704807874833, + "learning_rate": 3.179652830987201e-07, + "loss": 0.2509, + "step": 53354 + }, + { + "epoch": 0.9219484379319879, + "grad_norm": 1.3702478639496534, + "learning_rate": 3.178252940836923e-07, + "loss": 0.4794, + "step": 53355 + }, + { + "epoch": 0.9219657174453968, + "grad_norm": 1.1836176226977915, + "learning_rate": 3.176853353939724e-07, + "loss": 0.3365, + "step": 53356 + }, + { + "epoch": 0.9219829969588056, + "grad_norm": 1.0700124377122622, + "learning_rate": 3.175454070299966e-07, + "loss": 0.3375, + "step": 53357 + }, + { + "epoch": 0.9220002764722145, + "grad_norm": 1.0324347761854291, + "learning_rate": 3.174055089922035e-07, + "loss": 0.2595, + "step": 53358 + }, + { + "epoch": 0.9220175559856234, + "grad_norm": 1.2981120758647104, + "learning_rate": 3.172656412810338e-07, + "loss": 0.3139, + "step": 53359 + }, + { + "epoch": 0.9220348354990323, + "grad_norm": 1.2055511646634705, + "learning_rate": 3.1712580389692383e-07, + "loss": 0.2801, + "step": 53360 + }, + { + "epoch": 0.9220521150124412, + "grad_norm": 1.604079920354296, + "learning_rate": 3.169859968403122e-07, + "loss": 0.3333, + "step": 53361 + }, + { + "epoch": 0.9220693945258501, + "grad_norm": 1.3559722248749144, + "learning_rate": 3.168462201116362e-07, + "loss": 0.4092, + "step": 53362 + }, + { + "epoch": 0.922086674039259, + "grad_norm": 1.1913958945431726, + "learning_rate": 3.1670647371133234e-07, + "loss": 0.2069, + "step": 53363 + }, + { + "epoch": 0.9221039535526679, + "grad_norm": 1.19298856163348, + "learning_rate": 3.1656675763984014e-07, + "loss": 0.1861, + "step": 53364 + }, + { + "epoch": 0.9221212330660769, + "grad_norm": 1.3077364789716652, + "learning_rate": 3.164270718975981e-07, + "loss": 0.4072, + "step": 53365 + }, + { + "epoch": 0.9221385125794858, + "grad_norm": 1.6430111117235382, + "learning_rate": 3.162874164850416e-07, + "loss": 0.2745, + "step": 53366 + }, + { + "epoch": 0.9221557920928947, + "grad_norm": 1.2510268231120953, + "learning_rate": 3.1614779140260896e-07, + "loss": 0.6357, + "step": 53367 + }, + { + "epoch": 0.9221730716063036, + "grad_norm": 1.5779325833405875, + "learning_rate": 3.1600819665073666e-07, + "loss": 0.4292, + "step": 53368 + }, + { + "epoch": 0.9221903511197125, + "grad_norm": 0.9811242483565779, + "learning_rate": 3.158686322298632e-07, + "loss": 0.3363, + "step": 53369 + }, + { + "epoch": 0.9222076306331214, + "grad_norm": 1.9702002113206107, + "learning_rate": 3.1572909814042487e-07, + "loss": 0.3189, + "step": 53370 + }, + { + "epoch": 0.9222249101465303, + "grad_norm": 1.4018060352400752, + "learning_rate": 3.155895943828602e-07, + "loss": 0.3199, + "step": 53371 + }, + { + "epoch": 0.9222421896599392, + "grad_norm": 1.0900839370424662, + "learning_rate": 3.154501209576044e-07, + "loss": 0.1521, + "step": 53372 + }, + { + "epoch": 0.9222594691733481, + "grad_norm": 1.1841229517664322, + "learning_rate": 3.1531067786509386e-07, + "loss": 0.2548, + "step": 53373 + }, + { + "epoch": 0.922276748686757, + "grad_norm": 1.1401809621450945, + "learning_rate": 3.15171265105767e-07, + "loss": 0.4015, + "step": 53374 + }, + { + "epoch": 0.9222940282001659, + "grad_norm": 1.3661514083911261, + "learning_rate": 3.150318826800591e-07, + "loss": 0.3151, + "step": 53375 + }, + { + "epoch": 0.9223113077135748, + "grad_norm": 1.5492495249950091, + "learning_rate": 3.1489253058840764e-07, + "loss": 0.3006, + "step": 53376 + }, + { + "epoch": 0.9223285872269837, + "grad_norm": 1.1473318916655475, + "learning_rate": 3.1475320883124883e-07, + "loss": 0.3049, + "step": 53377 + }, + { + "epoch": 0.9223458667403925, + "grad_norm": 1.4339027891120095, + "learning_rate": 3.1461391740902014e-07, + "loss": 0.2187, + "step": 53378 + }, + { + "epoch": 0.9223631462538014, + "grad_norm": 2.3002025936818202, + "learning_rate": 3.144746563221557e-07, + "loss": 0.1181, + "step": 53379 + }, + { + "epoch": 0.9223804257672104, + "grad_norm": 1.6284097483996451, + "learning_rate": 3.1433542557109176e-07, + "loss": 0.7134, + "step": 53380 + }, + { + "epoch": 0.9223977052806193, + "grad_norm": 1.6150313054766203, + "learning_rate": 3.141962251562669e-07, + "loss": 0.7309, + "step": 53381 + }, + { + "epoch": 0.9224149847940282, + "grad_norm": 1.1421767278489223, + "learning_rate": 3.1405705507811524e-07, + "loss": 0.1506, + "step": 53382 + }, + { + "epoch": 0.9224322643074371, + "grad_norm": 1.2112323983257953, + "learning_rate": 3.1391791533707417e-07, + "loss": 0.2178, + "step": 53383 + }, + { + "epoch": 0.922449543820846, + "grad_norm": 1.6594501364011223, + "learning_rate": 3.137788059335756e-07, + "loss": 0.3022, + "step": 53384 + }, + { + "epoch": 0.9224668233342549, + "grad_norm": 1.1346555596968029, + "learning_rate": 3.1363972686806023e-07, + "loss": 0.2757, + "step": 53385 + }, + { + "epoch": 0.9224841028476638, + "grad_norm": 1.0144538324505408, + "learning_rate": 3.1350067814095885e-07, + "loss": 0.2857, + "step": 53386 + }, + { + "epoch": 0.9225013823610727, + "grad_norm": 1.765755815533464, + "learning_rate": 3.1336165975271226e-07, + "loss": 0.4554, + "step": 53387 + }, + { + "epoch": 0.9225186618744816, + "grad_norm": 1.208554313976245, + "learning_rate": 3.132226717037512e-07, + "loss": 0.5007, + "step": 53388 + }, + { + "epoch": 0.9225359413878905, + "grad_norm": 1.6817749255834928, + "learning_rate": 3.130837139945142e-07, + "loss": 0.2805, + "step": 53389 + }, + { + "epoch": 0.9225532209012994, + "grad_norm": 1.4523001511676228, + "learning_rate": 3.1294478662543424e-07, + "loss": 0.3725, + "step": 53390 + }, + { + "epoch": 0.9225705004147083, + "grad_norm": 1.1835404392715174, + "learning_rate": 3.128058895969488e-07, + "loss": 0.3337, + "step": 53391 + }, + { + "epoch": 0.9225877799281172, + "grad_norm": 1.5279327342092177, + "learning_rate": 3.126670229094886e-07, + "loss": 0.4432, + "step": 53392 + }, + { + "epoch": 0.9226050594415262, + "grad_norm": 1.3040395258664512, + "learning_rate": 3.125281865634944e-07, + "loss": 0.2638, + "step": 53393 + }, + { + "epoch": 0.9226223389549351, + "grad_norm": 1.0068958480790762, + "learning_rate": 3.1238938055939806e-07, + "loss": 0.2478, + "step": 53394 + }, + { + "epoch": 0.922639618468344, + "grad_norm": 1.462245637007342, + "learning_rate": 3.122506048976326e-07, + "loss": 0.2777, + "step": 53395 + }, + { + "epoch": 0.9226568979817529, + "grad_norm": 1.281656430913561, + "learning_rate": 3.1211185957863656e-07, + "loss": 0.2713, + "step": 53396 + }, + { + "epoch": 0.9226741774951618, + "grad_norm": 1.1078952984977948, + "learning_rate": 3.119731446028407e-07, + "loss": 0.4731, + "step": 53397 + }, + { + "epoch": 0.9226914570085707, + "grad_norm": 0.5470160597156288, + "learning_rate": 3.118344599706813e-07, + "loss": 0.7494, + "step": 53398 + }, + { + "epoch": 0.9227087365219795, + "grad_norm": 1.5568097125900682, + "learning_rate": 3.116958056825925e-07, + "loss": 0.4901, + "step": 53399 + }, + { + "epoch": 0.9227260160353884, + "grad_norm": 1.3209972833492296, + "learning_rate": 3.115571817390095e-07, + "loss": 0.3579, + "step": 53400 + }, + { + "epoch": 0.9227432955487973, + "grad_norm": 1.5944741807213139, + "learning_rate": 3.114185881403653e-07, + "loss": 0.3257, + "step": 53401 + }, + { + "epoch": 0.9227605750622062, + "grad_norm": 1.6377640191076437, + "learning_rate": 3.1128002488709506e-07, + "loss": 0.3944, + "step": 53402 + }, + { + "epoch": 0.9227778545756151, + "grad_norm": 1.5787974566291865, + "learning_rate": 3.1114149197963185e-07, + "loss": 0.3052, + "step": 53403 + }, + { + "epoch": 0.922795134089024, + "grad_norm": 1.1739553743220474, + "learning_rate": 3.1100298941840967e-07, + "loss": 0.2586, + "step": 53404 + }, + { + "epoch": 0.9228124136024329, + "grad_norm": 0.80153135308948, + "learning_rate": 3.1086451720386267e-07, + "loss": 0.8116, + "step": 53405 + }, + { + "epoch": 0.9228296931158418, + "grad_norm": 1.078082429580335, + "learning_rate": 3.1072607533642384e-07, + "loss": 0.4982, + "step": 53406 + }, + { + "epoch": 0.9228469726292508, + "grad_norm": 1.5353730464082478, + "learning_rate": 3.105876638165284e-07, + "loss": 0.2963, + "step": 53407 + }, + { + "epoch": 0.9228642521426597, + "grad_norm": 1.3308374888299757, + "learning_rate": 3.104492826446071e-07, + "loss": 0.4956, + "step": 53408 + }, + { + "epoch": 0.9228815316560686, + "grad_norm": 1.4832954778173366, + "learning_rate": 3.1031093182109507e-07, + "loss": 0.3535, + "step": 53409 + }, + { + "epoch": 0.9228988111694775, + "grad_norm": 2.7584486799837467, + "learning_rate": 3.101726113464265e-07, + "loss": 0.7177, + "step": 53410 + }, + { + "epoch": 0.9229160906828864, + "grad_norm": 1.1536871446045152, + "learning_rate": 3.100343212210344e-07, + "loss": 0.492, + "step": 53411 + }, + { + "epoch": 0.9229333701962953, + "grad_norm": 1.6603602151845844, + "learning_rate": 3.0989606144534943e-07, + "loss": 0.1609, + "step": 53412 + }, + { + "epoch": 0.9229506497097042, + "grad_norm": 1.3658900014078443, + "learning_rate": 3.0975783201980804e-07, + "loss": 0.2096, + "step": 53413 + }, + { + "epoch": 0.9229679292231131, + "grad_norm": 1.3852423259653612, + "learning_rate": 3.096196329448409e-07, + "loss": 0.2144, + "step": 53414 + }, + { + "epoch": 0.922985208736522, + "grad_norm": 1.261379619791832, + "learning_rate": 3.09481464220881e-07, + "loss": 0.4097, + "step": 53415 + }, + { + "epoch": 0.9230024882499309, + "grad_norm": 1.9182900587224179, + "learning_rate": 3.0934332584836245e-07, + "loss": 0.2451, + "step": 53416 + }, + { + "epoch": 0.9230197677633398, + "grad_norm": 1.9629630562652456, + "learning_rate": 3.0920521782771606e-07, + "loss": 0.4092, + "step": 53417 + }, + { + "epoch": 0.9230370472767487, + "grad_norm": 1.3642497845584383, + "learning_rate": 3.09067140159377e-07, + "loss": 0.7264, + "step": 53418 + }, + { + "epoch": 0.9230543267901576, + "grad_norm": 1.3634313095791204, + "learning_rate": 3.0892909284377493e-07, + "loss": 0.3449, + "step": 53419 + }, + { + "epoch": 0.9230716063035664, + "grad_norm": 2.8843265183941003, + "learning_rate": 3.0879107588134285e-07, + "loss": 0.3766, + "step": 53420 + }, + { + "epoch": 0.9230888858169753, + "grad_norm": 1.0637251754656976, + "learning_rate": 3.086530892725148e-07, + "loss": 0.2609, + "step": 53421 + }, + { + "epoch": 0.9231061653303843, + "grad_norm": 1.5080071999923175, + "learning_rate": 3.085151330177216e-07, + "loss": 0.407, + "step": 53422 + }, + { + "epoch": 0.9231234448437932, + "grad_norm": 2.0137369544448434, + "learning_rate": 3.0837720711739516e-07, + "loss": 0.54, + "step": 53423 + }, + { + "epoch": 0.9231407243572021, + "grad_norm": 1.396576778746882, + "learning_rate": 3.082393115719684e-07, + "loss": 0.3576, + "step": 53424 + }, + { + "epoch": 0.923158003870611, + "grad_norm": 0.9769128011357813, + "learning_rate": 3.0810144638187323e-07, + "loss": 0.2933, + "step": 53425 + }, + { + "epoch": 0.9231752833840199, + "grad_norm": 1.4597171530075939, + "learning_rate": 3.079636115475393e-07, + "loss": 0.3256, + "step": 53426 + }, + { + "epoch": 0.9231925628974288, + "grad_norm": 1.111206714719863, + "learning_rate": 3.078258070694007e-07, + "loss": 0.3723, + "step": 53427 + }, + { + "epoch": 0.9232098424108377, + "grad_norm": 1.280610397627941, + "learning_rate": 3.076880329478882e-07, + "loss": 0.2239, + "step": 53428 + }, + { + "epoch": 0.9232271219242466, + "grad_norm": 0.8748679083994871, + "learning_rate": 3.075502891834337e-07, + "loss": 0.3239, + "step": 53429 + }, + { + "epoch": 0.9232444014376555, + "grad_norm": 2.3459470969402303, + "learning_rate": 3.074125757764679e-07, + "loss": 0.3799, + "step": 53430 + }, + { + "epoch": 0.9232616809510644, + "grad_norm": 1.1989110088419752, + "learning_rate": 3.072748927274227e-07, + "loss": 0.3638, + "step": 53431 + }, + { + "epoch": 0.9232789604644733, + "grad_norm": 1.0744207958189185, + "learning_rate": 3.0713724003672785e-07, + "loss": 0.3438, + "step": 53432 + }, + { + "epoch": 0.9232962399778822, + "grad_norm": 1.026239827460476, + "learning_rate": 3.0699961770481736e-07, + "loss": 0.2952, + "step": 53433 + }, + { + "epoch": 0.9233135194912911, + "grad_norm": 1.4347698264881457, + "learning_rate": 3.068620257321209e-07, + "loss": 0.3612, + "step": 53434 + }, + { + "epoch": 0.9233307990047, + "grad_norm": 0.8854356085665854, + "learning_rate": 3.067244641190681e-07, + "loss": 0.2565, + "step": 53435 + }, + { + "epoch": 0.923348078518109, + "grad_norm": 1.1306300963035056, + "learning_rate": 3.0658693286609197e-07, + "loss": 0.2925, + "step": 53436 + }, + { + "epoch": 0.9233653580315179, + "grad_norm": 0.7447455358820918, + "learning_rate": 3.0644943197362223e-07, + "loss": 0.5281, + "step": 53437 + }, + { + "epoch": 0.9233826375449268, + "grad_norm": 1.2031504156450292, + "learning_rate": 3.063119614420884e-07, + "loss": 0.3149, + "step": 53438 + }, + { + "epoch": 0.9233999170583357, + "grad_norm": 0.9154961328032545, + "learning_rate": 3.061745212719225e-07, + "loss": 0.275, + "step": 53439 + }, + { + "epoch": 0.9234171965717446, + "grad_norm": 0.9634808947270291, + "learning_rate": 3.060371114635552e-07, + "loss": 0.4903, + "step": 53440 + }, + { + "epoch": 0.9234344760851534, + "grad_norm": 1.6238141854764483, + "learning_rate": 3.058997320174162e-07, + "loss": 0.1997, + "step": 53441 + }, + { + "epoch": 0.9234517555985623, + "grad_norm": 1.6571480583825484, + "learning_rate": 3.057623829339362e-07, + "loss": 0.3787, + "step": 53442 + }, + { + "epoch": 0.9234690351119712, + "grad_norm": 0.9592642763769351, + "learning_rate": 3.05625064213545e-07, + "loss": 0.511, + "step": 53443 + }, + { + "epoch": 0.9234863146253801, + "grad_norm": 1.5831475990322992, + "learning_rate": 3.054877758566721e-07, + "loss": 0.3602, + "step": 53444 + }, + { + "epoch": 0.923503594138789, + "grad_norm": 1.6660438441234964, + "learning_rate": 3.0535051786374945e-07, + "loss": 0.3058, + "step": 53445 + }, + { + "epoch": 0.9235208736521979, + "grad_norm": 1.1080949704124496, + "learning_rate": 3.0521329023520563e-07, + "loss": 0.2471, + "step": 53446 + }, + { + "epoch": 0.9235381531656068, + "grad_norm": 1.6574470895448785, + "learning_rate": 3.0507609297147023e-07, + "loss": 0.2693, + "step": 53447 + }, + { + "epoch": 0.9235554326790157, + "grad_norm": 1.1956437424153896, + "learning_rate": 3.0493892607297295e-07, + "loss": 0.3804, + "step": 53448 + }, + { + "epoch": 0.9235727121924246, + "grad_norm": 1.7175643383421577, + "learning_rate": 3.048017895401434e-07, + "loss": 0.1981, + "step": 53449 + }, + { + "epoch": 0.9235899917058336, + "grad_norm": 1.197613291305113, + "learning_rate": 3.046646833734124e-07, + "loss": 0.2515, + "step": 53450 + }, + { + "epoch": 0.9236072712192425, + "grad_norm": 3.1773930849281227, + "learning_rate": 3.045276075732084e-07, + "loss": 0.3486, + "step": 53451 + }, + { + "epoch": 0.9236245507326514, + "grad_norm": 1.8012857806265696, + "learning_rate": 3.043905621399601e-07, + "loss": 0.3811, + "step": 53452 + }, + { + "epoch": 0.9236418302460603, + "grad_norm": 0.8904573253450079, + "learning_rate": 3.0425354707409817e-07, + "loss": 0.851, + "step": 53453 + }, + { + "epoch": 0.9236591097594692, + "grad_norm": 1.016468208594912, + "learning_rate": 3.0411656237605004e-07, + "loss": 0.3804, + "step": 53454 + }, + { + "epoch": 0.9236763892728781, + "grad_norm": 1.7315042446858024, + "learning_rate": 3.0397960804624647e-07, + "loss": 0.343, + "step": 53455 + }, + { + "epoch": 0.923693668786287, + "grad_norm": 1.357765245592511, + "learning_rate": 3.0384268408511606e-07, + "loss": 1.0848, + "step": 53456 + }, + { + "epoch": 0.9237109482996959, + "grad_norm": 1.4780079744895933, + "learning_rate": 3.0370579049308624e-07, + "loss": 0.4664, + "step": 53457 + }, + { + "epoch": 0.9237282278131048, + "grad_norm": 1.2760904742885857, + "learning_rate": 3.0356892727058773e-07, + "loss": 0.2229, + "step": 53458 + }, + { + "epoch": 0.9237455073265137, + "grad_norm": 1.0455212832753462, + "learning_rate": 3.034320944180469e-07, + "loss": 0.2556, + "step": 53459 + }, + { + "epoch": 0.9237627868399226, + "grad_norm": 2.842842909515605, + "learning_rate": 3.0329529193589447e-07, + "loss": 0.3707, + "step": 53460 + }, + { + "epoch": 0.9237800663533315, + "grad_norm": 1.2564478704304165, + "learning_rate": 3.0315851982455793e-07, + "loss": 0.2988, + "step": 53461 + }, + { + "epoch": 0.9237973458667403, + "grad_norm": 1.4964048906839025, + "learning_rate": 3.0302177808446577e-07, + "loss": 0.3271, + "step": 53462 + }, + { + "epoch": 0.9238146253801492, + "grad_norm": 1.6854105572639344, + "learning_rate": 3.028850667160466e-07, + "loss": 0.4495, + "step": 53463 + }, + { + "epoch": 0.9238319048935582, + "grad_norm": 1.7499708442724597, + "learning_rate": 3.0274838571972887e-07, + "loss": 0.3949, + "step": 53464 + }, + { + "epoch": 0.9238491844069671, + "grad_norm": 0.972481970938834, + "learning_rate": 3.026117350959401e-07, + "loss": 0.2108, + "step": 53465 + }, + { + "epoch": 0.923866463920376, + "grad_norm": 1.5604758264104595, + "learning_rate": 3.024751148451066e-07, + "loss": 0.276, + "step": 53466 + }, + { + "epoch": 0.9238837434337849, + "grad_norm": 1.215578979009774, + "learning_rate": 3.0233852496766027e-07, + "loss": 0.4061, + "step": 53467 + }, + { + "epoch": 0.9239010229471938, + "grad_norm": 0.5975483635168554, + "learning_rate": 3.0220196546402516e-07, + "loss": 0.5765, + "step": 53468 + }, + { + "epoch": 0.9239183024606027, + "grad_norm": 1.3005356158095958, + "learning_rate": 3.020654363346309e-07, + "loss": 0.3241, + "step": 53469 + }, + { + "epoch": 0.9239355819740116, + "grad_norm": 1.0465989651073395, + "learning_rate": 3.0192893757990503e-07, + "loss": 0.3548, + "step": 53470 + }, + { + "epoch": 0.9239528614874205, + "grad_norm": 2.751134893234075, + "learning_rate": 3.017924692002749e-07, + "loss": 0.3143, + "step": 53471 + }, + { + "epoch": 0.9239701410008294, + "grad_norm": 1.2413663337842618, + "learning_rate": 3.016560311961658e-07, + "loss": 0.3517, + "step": 53472 + }, + { + "epoch": 0.9239874205142383, + "grad_norm": 0.831172424125825, + "learning_rate": 3.0151962356800957e-07, + "loss": 0.2846, + "step": 53473 + }, + { + "epoch": 0.9240047000276472, + "grad_norm": 1.7539894244029606, + "learning_rate": 3.0138324631622915e-07, + "loss": 0.3929, + "step": 53474 + }, + { + "epoch": 0.9240219795410561, + "grad_norm": 1.0440620910448537, + "learning_rate": 3.012468994412554e-07, + "loss": 0.3042, + "step": 53475 + }, + { + "epoch": 0.924039259054465, + "grad_norm": 1.7123993666679536, + "learning_rate": 3.0111058294351346e-07, + "loss": 0.2731, + "step": 53476 + }, + { + "epoch": 0.924056538567874, + "grad_norm": 1.451380333057325, + "learning_rate": 3.009742968234286e-07, + "loss": 0.3905, + "step": 53477 + }, + { + "epoch": 0.9240738180812829, + "grad_norm": 1.5258052913190272, + "learning_rate": 3.0083804108142933e-07, + "loss": 0.4207, + "step": 53478 + }, + { + "epoch": 0.9240910975946918, + "grad_norm": 1.3197598560541628, + "learning_rate": 3.007018157179431e-07, + "loss": 0.4457, + "step": 53479 + }, + { + "epoch": 0.9241083771081007, + "grad_norm": 1.1275160347451065, + "learning_rate": 3.005656207333962e-07, + "loss": 0.2508, + "step": 53480 + }, + { + "epoch": 0.9241256566215096, + "grad_norm": 0.9351558612928826, + "learning_rate": 3.0042945612821393e-07, + "loss": 0.7139, + "step": 53481 + }, + { + "epoch": 0.9241429361349185, + "grad_norm": 0.9020680700466692, + "learning_rate": 3.0029332190282587e-07, + "loss": 0.2673, + "step": 53482 + }, + { + "epoch": 0.9241602156483273, + "grad_norm": 1.3685761096786342, + "learning_rate": 3.0015721805765396e-07, + "loss": 0.2827, + "step": 53483 + }, + { + "epoch": 0.9241774951617362, + "grad_norm": 1.3645637161793127, + "learning_rate": 3.000211445931267e-07, + "loss": 0.22, + "step": 53484 + }, + { + "epoch": 0.9241947746751451, + "grad_norm": 0.9952180772989936, + "learning_rate": 2.998851015096715e-07, + "loss": 0.3226, + "step": 53485 + }, + { + "epoch": 0.924212054188554, + "grad_norm": 1.535355504197775, + "learning_rate": 2.9974908880771367e-07, + "loss": 0.3539, + "step": 53486 + }, + { + "epoch": 0.9242293337019629, + "grad_norm": 1.066340329636218, + "learning_rate": 2.996131064876795e-07, + "loss": 0.3409, + "step": 53487 + }, + { + "epoch": 0.9242466132153718, + "grad_norm": 1.2473427784068476, + "learning_rate": 2.994771545499919e-07, + "loss": 0.4362, + "step": 53488 + }, + { + "epoch": 0.9242638927287807, + "grad_norm": 0.7616181348463671, + "learning_rate": 2.9934123299508065e-07, + "loss": 0.1649, + "step": 53489 + }, + { + "epoch": 0.9242811722421896, + "grad_norm": 1.4669006510292357, + "learning_rate": 2.992053418233687e-07, + "loss": 0.4421, + "step": 53490 + }, + { + "epoch": 0.9242984517555985, + "grad_norm": 1.623912878590699, + "learning_rate": 2.9906948103528455e-07, + "loss": 0.343, + "step": 53491 + }, + { + "epoch": 0.9243157312690075, + "grad_norm": 1.0088970535635484, + "learning_rate": 2.9893365063125125e-07, + "loss": 0.4382, + "step": 53492 + }, + { + "epoch": 0.9243330107824164, + "grad_norm": 1.2517153127321943, + "learning_rate": 2.987978506116951e-07, + "loss": 0.2427, + "step": 53493 + }, + { + "epoch": 0.9243502902958253, + "grad_norm": 1.383657513171237, + "learning_rate": 2.986620809770413e-07, + "loss": 0.3155, + "step": 53494 + }, + { + "epoch": 0.9243675698092342, + "grad_norm": 0.9796464317839643, + "learning_rate": 2.98526341727714e-07, + "loss": 0.2473, + "step": 53495 + }, + { + "epoch": 0.9243848493226431, + "grad_norm": 2.1803096499750207, + "learning_rate": 2.983906328641417e-07, + "loss": 0.2946, + "step": 53496 + }, + { + "epoch": 0.924402128836052, + "grad_norm": 1.170062386101726, + "learning_rate": 2.982549543867452e-07, + "loss": 0.3086, + "step": 53497 + }, + { + "epoch": 0.9244194083494609, + "grad_norm": 1.4902137898210917, + "learning_rate": 2.981193062959531e-07, + "loss": 0.3864, + "step": 53498 + }, + { + "epoch": 0.9244366878628698, + "grad_norm": 0.9568410809278876, + "learning_rate": 2.9798368859218716e-07, + "loss": 0.176, + "step": 53499 + }, + { + "epoch": 0.9244539673762787, + "grad_norm": 1.03549690282289, + "learning_rate": 2.9784810127587384e-07, + "loss": 0.2314, + "step": 53500 + }, + { + "epoch": 0.9244712468896876, + "grad_norm": 0.9031905138074712, + "learning_rate": 2.9771254434743825e-07, + "loss": 0.2242, + "step": 53501 + }, + { + "epoch": 0.9244885264030965, + "grad_norm": 1.3483289115723647, + "learning_rate": 2.9757701780730453e-07, + "loss": 0.3867, + "step": 53502 + }, + { + "epoch": 0.9245058059165054, + "grad_norm": 1.7541489655664149, + "learning_rate": 2.974415216558968e-07, + "loss": 0.3178, + "step": 53503 + }, + { + "epoch": 0.9245230854299143, + "grad_norm": 1.0985259285380573, + "learning_rate": 2.9730605589364024e-07, + "loss": 0.4366, + "step": 53504 + }, + { + "epoch": 0.9245403649433231, + "grad_norm": 3.0125459275890485, + "learning_rate": 2.971706205209568e-07, + "loss": 0.3346, + "step": 53505 + }, + { + "epoch": 0.924557644456732, + "grad_norm": 1.4252101171983298, + "learning_rate": 2.9703521553827273e-07, + "loss": 0.3825, + "step": 53506 + }, + { + "epoch": 0.924574923970141, + "grad_norm": 1.6223917824389824, + "learning_rate": 2.968998409460133e-07, + "loss": 0.5018, + "step": 53507 + }, + { + "epoch": 0.9245922034835499, + "grad_norm": 1.178159846789181, + "learning_rate": 2.9676449674459926e-07, + "loss": 0.3377, + "step": 53508 + }, + { + "epoch": 0.9246094829969588, + "grad_norm": 1.5720650416484543, + "learning_rate": 2.966291829344581e-07, + "loss": 0.4108, + "step": 53509 + }, + { + "epoch": 0.9246267625103677, + "grad_norm": 1.235005352218649, + "learning_rate": 2.964938995160094e-07, + "loss": 0.219, + "step": 53510 + }, + { + "epoch": 0.9246440420237766, + "grad_norm": 0.8759161499996475, + "learning_rate": 2.963586464896817e-07, + "loss": 0.7312, + "step": 53511 + }, + { + "epoch": 0.9246613215371855, + "grad_norm": 1.2446708268711388, + "learning_rate": 2.9622342385589256e-07, + "loss": 0.4516, + "step": 53512 + }, + { + "epoch": 0.9246786010505944, + "grad_norm": 1.263700867209479, + "learning_rate": 2.960882316150726e-07, + "loss": 0.2907, + "step": 53513 + }, + { + "epoch": 0.9246958805640033, + "grad_norm": 1.8474656653953934, + "learning_rate": 2.959530697676405e-07, + "loss": 0.2649, + "step": 53514 + }, + { + "epoch": 0.9247131600774122, + "grad_norm": 2.0965218341112424, + "learning_rate": 2.9581793831402137e-07, + "loss": 0.2491, + "step": 53515 + }, + { + "epoch": 0.9247304395908211, + "grad_norm": 1.6342597131930094, + "learning_rate": 2.956828372546383e-07, + "loss": 0.3087, + "step": 53516 + }, + { + "epoch": 0.92474771910423, + "grad_norm": 1.215381160725957, + "learning_rate": 2.9554776658991315e-07, + "loss": 0.5081, + "step": 53517 + }, + { + "epoch": 0.924764998617639, + "grad_norm": 1.4552295710069665, + "learning_rate": 2.9541272632026883e-07, + "loss": 0.3175, + "step": 53518 + }, + { + "epoch": 0.9247822781310479, + "grad_norm": 1.0683300126747226, + "learning_rate": 2.9527771644613067e-07, + "loss": 0.3348, + "step": 53519 + }, + { + "epoch": 0.9247995576444568, + "grad_norm": 1.087937024056517, + "learning_rate": 2.951427369679205e-07, + "loss": 0.4091, + "step": 53520 + }, + { + "epoch": 0.9248168371578657, + "grad_norm": 1.2487547060922297, + "learning_rate": 2.950077878860602e-07, + "loss": 0.4142, + "step": 53521 + }, + { + "epoch": 0.9248341166712746, + "grad_norm": 1.0596969707926343, + "learning_rate": 2.948728692009739e-07, + "loss": 0.2421, + "step": 53522 + }, + { + "epoch": 0.9248513961846835, + "grad_norm": 0.9035670527222469, + "learning_rate": 2.947379809130835e-07, + "loss": 0.7662, + "step": 53523 + }, + { + "epoch": 0.9248686756980924, + "grad_norm": 1.235695608608455, + "learning_rate": 2.946031230228097e-07, + "loss": 0.3629, + "step": 53524 + }, + { + "epoch": 0.9248859552115013, + "grad_norm": 2.0534848807575874, + "learning_rate": 2.9446829553057665e-07, + "loss": 0.3749, + "step": 53525 + }, + { + "epoch": 0.9249032347249101, + "grad_norm": 1.4356113149838012, + "learning_rate": 2.943334984368085e-07, + "loss": 0.5178, + "step": 53526 + }, + { + "epoch": 0.924920514238319, + "grad_norm": 0.9328923977955084, + "learning_rate": 2.941987317419248e-07, + "loss": 0.3401, + "step": 53527 + }, + { + "epoch": 0.9249377937517279, + "grad_norm": 1.5066046442459142, + "learning_rate": 2.9406399544634757e-07, + "loss": 0.3234, + "step": 53528 + }, + { + "epoch": 0.9249550732651368, + "grad_norm": 1.780533624838964, + "learning_rate": 2.9392928955049857e-07, + "loss": 0.5038, + "step": 53529 + }, + { + "epoch": 0.9249723527785457, + "grad_norm": 2.423606374163802, + "learning_rate": 2.93794614054802e-07, + "loss": 0.3394, + "step": 53530 + }, + { + "epoch": 0.9249896322919546, + "grad_norm": 2.0514729665337668, + "learning_rate": 2.9365996895967863e-07, + "loss": 0.6448, + "step": 53531 + }, + { + "epoch": 0.9250069118053635, + "grad_norm": 1.8679164061022473, + "learning_rate": 2.9352535426554916e-07, + "loss": 0.3416, + "step": 53532 + }, + { + "epoch": 0.9250241913187724, + "grad_norm": 1.2381874188744488, + "learning_rate": 2.9339076997283666e-07, + "loss": 0.419, + "step": 53533 + }, + { + "epoch": 0.9250414708321814, + "grad_norm": 1.4485289391395495, + "learning_rate": 2.9325621608196077e-07, + "loss": 0.2871, + "step": 53534 + }, + { + "epoch": 0.9250587503455903, + "grad_norm": 1.634561199634651, + "learning_rate": 2.931216925933444e-07, + "loss": 0.3996, + "step": 53535 + }, + { + "epoch": 0.9250760298589992, + "grad_norm": 2.064518156321083, + "learning_rate": 2.929871995074085e-07, + "loss": 0.48, + "step": 53536 + }, + { + "epoch": 0.9250933093724081, + "grad_norm": 0.7972912586861943, + "learning_rate": 2.9285273682457594e-07, + "loss": 0.2152, + "step": 53537 + }, + { + "epoch": 0.925110588885817, + "grad_norm": 2.020951840020053, + "learning_rate": 2.9271830454526526e-07, + "loss": 0.3846, + "step": 53538 + }, + { + "epoch": 0.9251278683992259, + "grad_norm": 1.5309298164932768, + "learning_rate": 2.9258390266989846e-07, + "loss": 0.4043, + "step": 53539 + }, + { + "epoch": 0.9251451479126348, + "grad_norm": 1.225439979305192, + "learning_rate": 2.924495311988962e-07, + "loss": 0.371, + "step": 53540 + }, + { + "epoch": 0.9251624274260437, + "grad_norm": 1.5324151356994522, + "learning_rate": 2.923151901326804e-07, + "loss": 0.3767, + "step": 53541 + }, + { + "epoch": 0.9251797069394526, + "grad_norm": 1.3933166695655732, + "learning_rate": 2.9218087947167186e-07, + "loss": 0.2603, + "step": 53542 + }, + { + "epoch": 0.9251969864528615, + "grad_norm": 0.9966596153302689, + "learning_rate": 2.920465992162891e-07, + "loss": 0.2365, + "step": 53543 + }, + { + "epoch": 0.9252142659662704, + "grad_norm": 0.8717088464673166, + "learning_rate": 2.9191234936695513e-07, + "loss": 0.2997, + "step": 53544 + }, + { + "epoch": 0.9252315454796793, + "grad_norm": 1.1396420710752453, + "learning_rate": 2.917781299240885e-07, + "loss": 0.2352, + "step": 53545 + }, + { + "epoch": 0.9252488249930882, + "grad_norm": 1.7259903556225198, + "learning_rate": 2.916439408881111e-07, + "loss": 0.2239, + "step": 53546 + }, + { + "epoch": 0.925266104506497, + "grad_norm": 2.5567673731398, + "learning_rate": 2.915097822594437e-07, + "loss": 0.2245, + "step": 53547 + }, + { + "epoch": 0.925283384019906, + "grad_norm": 1.6727540411152677, + "learning_rate": 2.9137565403850375e-07, + "loss": 0.494, + "step": 53548 + }, + { + "epoch": 0.9253006635333149, + "grad_norm": 1.1464082741626196, + "learning_rate": 2.9124155622571537e-07, + "loss": 0.338, + "step": 53549 + }, + { + "epoch": 0.9253179430467238, + "grad_norm": 1.1349750814794548, + "learning_rate": 2.9110748882149375e-07, + "loss": 0.3139, + "step": 53550 + }, + { + "epoch": 0.9253352225601327, + "grad_norm": 1.084354627634039, + "learning_rate": 2.90973451826263e-07, + "loss": 0.2558, + "step": 53551 + }, + { + "epoch": 0.9253525020735416, + "grad_norm": 1.4950097016945056, + "learning_rate": 2.908394452404395e-07, + "loss": 0.3752, + "step": 53552 + }, + { + "epoch": 0.9253697815869505, + "grad_norm": 1.0231240871552874, + "learning_rate": 2.907054690644462e-07, + "loss": 0.617, + "step": 53553 + }, + { + "epoch": 0.9253870611003594, + "grad_norm": 1.334225120040664, + "learning_rate": 2.905715232987005e-07, + "loss": 0.3155, + "step": 53554 + }, + { + "epoch": 0.9254043406137683, + "grad_norm": 1.5107399331439961, + "learning_rate": 2.904376079436244e-07, + "loss": 0.5476, + "step": 53555 + }, + { + "epoch": 0.9254216201271772, + "grad_norm": 1.6293417431025816, + "learning_rate": 2.9030372299963414e-07, + "loss": 0.2176, + "step": 53556 + }, + { + "epoch": 0.9254388996405861, + "grad_norm": 1.600418802894787, + "learning_rate": 2.9016986846715054e-07, + "loss": 0.1769, + "step": 53557 + }, + { + "epoch": 0.925456179153995, + "grad_norm": 1.3887028237637968, + "learning_rate": 2.9003604434659325e-07, + "loss": 0.232, + "step": 53558 + }, + { + "epoch": 0.9254734586674039, + "grad_norm": 1.2346764149486402, + "learning_rate": 2.8990225063837975e-07, + "loss": 0.5033, + "step": 53559 + }, + { + "epoch": 0.9254907381808128, + "grad_norm": 1.5728130658338355, + "learning_rate": 2.8976848734293293e-07, + "loss": 0.3208, + "step": 53560 + }, + { + "epoch": 0.9255080176942218, + "grad_norm": 1.1332575550069317, + "learning_rate": 2.8963475446066704e-07, + "loss": 0.4347, + "step": 53561 + }, + { + "epoch": 0.9255252972076307, + "grad_norm": 1.1930190011218693, + "learning_rate": 2.8950105199200385e-07, + "loss": 0.2046, + "step": 53562 + }, + { + "epoch": 0.9255425767210396, + "grad_norm": 0.8268904083300163, + "learning_rate": 2.893673799373609e-07, + "loss": 0.4811, + "step": 53563 + }, + { + "epoch": 0.9255598562344485, + "grad_norm": 1.5367287903396956, + "learning_rate": 2.8923373829715664e-07, + "loss": 0.4704, + "step": 53564 + }, + { + "epoch": 0.9255771357478574, + "grad_norm": 0.7455610485584894, + "learning_rate": 2.8910012707181076e-07, + "loss": 0.6284, + "step": 53565 + }, + { + "epoch": 0.9255944152612663, + "grad_norm": 1.1774217443987771, + "learning_rate": 2.88966546261743e-07, + "loss": 0.3469, + "step": 53566 + }, + { + "epoch": 0.9256116947746752, + "grad_norm": 1.5678897450801466, + "learning_rate": 2.8883299586736855e-07, + "loss": 0.3772, + "step": 53567 + }, + { + "epoch": 0.925628974288084, + "grad_norm": 1.1290268093172977, + "learning_rate": 2.8869947588910707e-07, + "loss": 0.2435, + "step": 53568 + }, + { + "epoch": 0.9256462538014929, + "grad_norm": 1.4464437594828092, + "learning_rate": 2.885659863273771e-07, + "loss": 0.3219, + "step": 53569 + }, + { + "epoch": 0.9256635333149018, + "grad_norm": 2.151919518571862, + "learning_rate": 2.8843252718259606e-07, + "loss": 0.3006, + "step": 53570 + }, + { + "epoch": 0.9256808128283107, + "grad_norm": 1.5452090505506537, + "learning_rate": 2.8829909845518367e-07, + "loss": 0.2044, + "step": 53571 + }, + { + "epoch": 0.9256980923417196, + "grad_norm": 1.8061292126482555, + "learning_rate": 2.8816570014555513e-07, + "loss": 0.2763, + "step": 53572 + }, + { + "epoch": 0.9257153718551285, + "grad_norm": 1.887205832253994, + "learning_rate": 2.880323322541312e-07, + "loss": 0.4206, + "step": 53573 + }, + { + "epoch": 0.9257326513685374, + "grad_norm": 1.2254454931117034, + "learning_rate": 2.878989947813271e-07, + "loss": 0.2808, + "step": 53574 + }, + { + "epoch": 0.9257499308819463, + "grad_norm": 1.9432081686478586, + "learning_rate": 2.877656877275603e-07, + "loss": 0.3359, + "step": 53575 + }, + { + "epoch": 0.9257672103953553, + "grad_norm": 1.505765595234977, + "learning_rate": 2.8763241109325044e-07, + "loss": 0.348, + "step": 53576 + }, + { + "epoch": 0.9257844899087642, + "grad_norm": 1.4549925781825515, + "learning_rate": 2.8749916487881504e-07, + "loss": 0.426, + "step": 53577 + }, + { + "epoch": 0.9258017694221731, + "grad_norm": 1.3711215639132184, + "learning_rate": 2.873659490846703e-07, + "loss": 0.1943, + "step": 53578 + }, + { + "epoch": 0.925819048935582, + "grad_norm": 1.6500470474474904, + "learning_rate": 2.8723276371123263e-07, + "loss": 0.366, + "step": 53579 + }, + { + "epoch": 0.9258363284489909, + "grad_norm": 1.734617273811698, + "learning_rate": 2.8709960875891953e-07, + "loss": 0.4982, + "step": 53580 + }, + { + "epoch": 0.9258536079623998, + "grad_norm": 1.6505641448878832, + "learning_rate": 2.869664842281494e-07, + "loss": 0.2332, + "step": 53581 + }, + { + "epoch": 0.9258708874758087, + "grad_norm": 1.4198787430046351, + "learning_rate": 2.8683339011933873e-07, + "loss": 0.3317, + "step": 53582 + }, + { + "epoch": 0.9258881669892176, + "grad_norm": 2.3916933944153302, + "learning_rate": 2.867003264329038e-07, + "loss": 0.4828, + "step": 53583 + }, + { + "epoch": 0.9259054465026265, + "grad_norm": 0.9121089137398162, + "learning_rate": 2.8656729316926204e-07, + "loss": 0.3172, + "step": 53584 + }, + { + "epoch": 0.9259227260160354, + "grad_norm": 1.903417349495829, + "learning_rate": 2.864342903288286e-07, + "loss": 0.2346, + "step": 53585 + }, + { + "epoch": 0.9259400055294443, + "grad_norm": 1.1009181111804236, + "learning_rate": 2.863013179120222e-07, + "loss": 0.3015, + "step": 53586 + }, + { + "epoch": 0.9259572850428532, + "grad_norm": 1.8970600547581635, + "learning_rate": 2.8616837591925686e-07, + "loss": 0.4435, + "step": 53587 + }, + { + "epoch": 0.9259745645562621, + "grad_norm": 1.3677778045558193, + "learning_rate": 2.860354643509522e-07, + "loss": 0.3327, + "step": 53588 + }, + { + "epoch": 0.9259918440696709, + "grad_norm": 1.532599127302753, + "learning_rate": 2.859025832075224e-07, + "loss": 0.2173, + "step": 53589 + }, + { + "epoch": 0.9260091235830799, + "grad_norm": 1.3254154809705037, + "learning_rate": 2.857697324893827e-07, + "loss": 0.4503, + "step": 53590 + }, + { + "epoch": 0.9260264030964888, + "grad_norm": 0.9711533806603859, + "learning_rate": 2.8563691219695266e-07, + "loss": 0.1996, + "step": 53591 + }, + { + "epoch": 0.9260436826098977, + "grad_norm": 0.931088623477561, + "learning_rate": 2.8550412233064315e-07, + "loss": 0.4127, + "step": 53592 + }, + { + "epoch": 0.9260609621233066, + "grad_norm": 1.1230208461237543, + "learning_rate": 2.85371362890875e-07, + "loss": 0.4353, + "step": 53593 + }, + { + "epoch": 0.9260782416367155, + "grad_norm": 1.67101347892535, + "learning_rate": 2.8523863387806103e-07, + "loss": 0.4461, + "step": 53594 + }, + { + "epoch": 0.9260955211501244, + "grad_norm": 1.1572213548129964, + "learning_rate": 2.851059352926189e-07, + "loss": 0.3486, + "step": 53595 + }, + { + "epoch": 0.9261128006635333, + "grad_norm": 1.203785609591405, + "learning_rate": 2.8497326713496256e-07, + "loss": 0.2433, + "step": 53596 + }, + { + "epoch": 0.9261300801769422, + "grad_norm": 0.7924080159955907, + "learning_rate": 2.848406294055095e-07, + "loss": 0.6579, + "step": 53597 + }, + { + "epoch": 0.9261473596903511, + "grad_norm": 1.7533252748191563, + "learning_rate": 2.8470802210467055e-07, + "loss": 0.3746, + "step": 53598 + }, + { + "epoch": 0.92616463920376, + "grad_norm": 1.3079469850848424, + "learning_rate": 2.8457544523286753e-07, + "loss": 0.4503, + "step": 53599 + }, + { + "epoch": 0.9261819187171689, + "grad_norm": 1.284723967302353, + "learning_rate": 2.8444289879051346e-07, + "loss": 0.4018, + "step": 53600 + }, + { + "epoch": 0.9261991982305778, + "grad_norm": 0.9706506484855637, + "learning_rate": 2.8431038277802024e-07, + "loss": 0.2601, + "step": 53601 + }, + { + "epoch": 0.9262164777439867, + "grad_norm": 1.6693401383626645, + "learning_rate": 2.841778971958065e-07, + "loss": 0.2482, + "step": 53602 + }, + { + "epoch": 0.9262337572573957, + "grad_norm": 1.0905977307833137, + "learning_rate": 2.840454420442851e-07, + "loss": 0.2009, + "step": 53603 + }, + { + "epoch": 0.9262510367708046, + "grad_norm": 1.883747121154061, + "learning_rate": 2.8391301732387135e-07, + "loss": 0.3328, + "step": 53604 + }, + { + "epoch": 0.9262683162842135, + "grad_norm": 0.9614226068070871, + "learning_rate": 2.8378062303498045e-07, + "loss": 0.2365, + "step": 53605 + }, + { + "epoch": 0.9262855957976224, + "grad_norm": 1.8440067287613666, + "learning_rate": 2.8364825917802876e-07, + "loss": 0.4922, + "step": 53606 + }, + { + "epoch": 0.9263028753110313, + "grad_norm": 1.225788050234852, + "learning_rate": 2.835159257534281e-07, + "loss": 0.2882, + "step": 53607 + }, + { + "epoch": 0.9263201548244402, + "grad_norm": 1.3187301895049943, + "learning_rate": 2.833836227615949e-07, + "loss": 0.4487, + "step": 53608 + }, + { + "epoch": 0.9263374343378491, + "grad_norm": 1.2223889705688125, + "learning_rate": 2.83251350202941e-07, + "loss": 0.2413, + "step": 53609 + }, + { + "epoch": 0.9263547138512579, + "grad_norm": 2.565077765054786, + "learning_rate": 2.831191080778828e-07, + "loss": 0.1924, + "step": 53610 + }, + { + "epoch": 0.9263719933646668, + "grad_norm": 1.8053338187191479, + "learning_rate": 2.829868963868343e-07, + "loss": 0.3494, + "step": 53611 + }, + { + "epoch": 0.9263892728780757, + "grad_norm": 1.558933421151101, + "learning_rate": 2.828547151302097e-07, + "loss": 0.4328, + "step": 53612 + }, + { + "epoch": 0.9264065523914846, + "grad_norm": 1.3271432821745006, + "learning_rate": 2.8272256430842303e-07, + "loss": 0.2557, + "step": 53613 + }, + { + "epoch": 0.9264238319048935, + "grad_norm": 0.9692004583266435, + "learning_rate": 2.8259044392188626e-07, + "loss": 0.2097, + "step": 53614 + }, + { + "epoch": 0.9264411114183024, + "grad_norm": 1.5097954129671498, + "learning_rate": 2.824583539710146e-07, + "loss": 0.3075, + "step": 53615 + }, + { + "epoch": 0.9264583909317113, + "grad_norm": 1.2923732310358507, + "learning_rate": 2.823262944562222e-07, + "loss": 0.2366, + "step": 53616 + }, + { + "epoch": 0.9264756704451202, + "grad_norm": 0.904068946691347, + "learning_rate": 2.8219426537792416e-07, + "loss": 0.2842, + "step": 53617 + }, + { + "epoch": 0.9264929499585292, + "grad_norm": 1.7685522571020338, + "learning_rate": 2.8206226673653023e-07, + "loss": 0.407, + "step": 53618 + }, + { + "epoch": 0.9265102294719381, + "grad_norm": 0.8579944574487753, + "learning_rate": 2.819302985324568e-07, + "loss": 0.2841, + "step": 53619 + }, + { + "epoch": 0.926527508985347, + "grad_norm": 1.4029687716247448, + "learning_rate": 2.8179836076611565e-07, + "loss": 0.4053, + "step": 53620 + }, + { + "epoch": 0.9265447884987559, + "grad_norm": 1.2587258413238065, + "learning_rate": 2.8166645343792096e-07, + "loss": 0.3552, + "step": 53621 + }, + { + "epoch": 0.9265620680121648, + "grad_norm": 1.2906628161732085, + "learning_rate": 2.8153457654828573e-07, + "loss": 0.2691, + "step": 53622 + }, + { + "epoch": 0.9265793475255737, + "grad_norm": 1.1133012484372686, + "learning_rate": 2.814027300976219e-07, + "loss": 0.3573, + "step": 53623 + }, + { + "epoch": 0.9265966270389826, + "grad_norm": 1.6847773280263674, + "learning_rate": 2.8127091408634454e-07, + "loss": 0.4987, + "step": 53624 + }, + { + "epoch": 0.9266139065523915, + "grad_norm": 1.8533475343579526, + "learning_rate": 2.811391285148646e-07, + "loss": 0.3381, + "step": 53625 + }, + { + "epoch": 0.9266311860658004, + "grad_norm": 1.3693086800694405, + "learning_rate": 2.8100737338359496e-07, + "loss": 0.2762, + "step": 53626 + }, + { + "epoch": 0.9266484655792093, + "grad_norm": 1.2308568418459402, + "learning_rate": 2.808756486929487e-07, + "loss": 0.2959, + "step": 53627 + }, + { + "epoch": 0.9266657450926182, + "grad_norm": 0.8386239099699977, + "learning_rate": 2.807439544433399e-07, + "loss": 0.8985, + "step": 53628 + }, + { + "epoch": 0.9266830246060271, + "grad_norm": 2.201054407758912, + "learning_rate": 2.8061229063517827e-07, + "loss": 0.4948, + "step": 53629 + }, + { + "epoch": 0.926700304119436, + "grad_norm": 0.8950517410904578, + "learning_rate": 2.8048065726887894e-07, + "loss": 0.394, + "step": 53630 + }, + { + "epoch": 0.926717583632845, + "grad_norm": 1.6460679376780118, + "learning_rate": 2.8034905434485284e-07, + "loss": 0.4273, + "step": 53631 + }, + { + "epoch": 0.9267348631462538, + "grad_norm": 1.452796141128455, + "learning_rate": 2.802174818635106e-07, + "loss": 0.2455, + "step": 53632 + }, + { + "epoch": 0.9267521426596627, + "grad_norm": 1.697158900779235, + "learning_rate": 2.8008593982526754e-07, + "loss": 0.3778, + "step": 53633 + }, + { + "epoch": 0.9267694221730716, + "grad_norm": 0.9422103998055928, + "learning_rate": 2.7995442823053223e-07, + "loss": 0.2908, + "step": 53634 + }, + { + "epoch": 0.9267867016864805, + "grad_norm": 1.5118453224408335, + "learning_rate": 2.798229470797209e-07, + "loss": 0.2755, + "step": 53635 + }, + { + "epoch": 0.9268039811998894, + "grad_norm": 2.088225627443726, + "learning_rate": 2.796914963732411e-07, + "loss": 0.2511, + "step": 53636 + }, + { + "epoch": 0.9268212607132983, + "grad_norm": 0.9665530379310479, + "learning_rate": 2.795600761115069e-07, + "loss": 0.3532, + "step": 53637 + }, + { + "epoch": 0.9268385402267072, + "grad_norm": 1.2978505885342582, + "learning_rate": 2.794286862949269e-07, + "loss": 0.5758, + "step": 53638 + }, + { + "epoch": 0.9268558197401161, + "grad_norm": 1.4741752678768627, + "learning_rate": 2.7929732692391745e-07, + "loss": 0.2637, + "step": 53639 + }, + { + "epoch": 0.926873099253525, + "grad_norm": 1.6910031997864625, + "learning_rate": 2.7916599799888813e-07, + "loss": 0.3767, + "step": 53640 + }, + { + "epoch": 0.9268903787669339, + "grad_norm": 0.707460549499405, + "learning_rate": 2.790346995202475e-07, + "loss": 0.3228, + "step": 53641 + }, + { + "epoch": 0.9269076582803428, + "grad_norm": 1.91676658030402, + "learning_rate": 2.7890343148841094e-07, + "loss": 0.392, + "step": 53642 + }, + { + "epoch": 0.9269249377937517, + "grad_norm": 1.6013876894704642, + "learning_rate": 2.7877219390378687e-07, + "loss": 0.2529, + "step": 53643 + }, + { + "epoch": 0.9269422173071606, + "grad_norm": 1.0045244877561041, + "learning_rate": 2.786409867667861e-07, + "loss": 0.2756, + "step": 53644 + }, + { + "epoch": 0.9269594968205696, + "grad_norm": 1.1453372367843841, + "learning_rate": 2.7850981007782054e-07, + "loss": 0.2227, + "step": 53645 + }, + { + "epoch": 0.9269767763339785, + "grad_norm": 1.5917953744911628, + "learning_rate": 2.783786638373032e-07, + "loss": 0.4983, + "step": 53646 + }, + { + "epoch": 0.9269940558473874, + "grad_norm": 1.2460129177800114, + "learning_rate": 2.7824754804564035e-07, + "loss": 0.4495, + "step": 53647 + }, + { + "epoch": 0.9270113353607963, + "grad_norm": 1.2921709480911376, + "learning_rate": 2.7811646270324734e-07, + "loss": 0.2969, + "step": 53648 + }, + { + "epoch": 0.9270286148742052, + "grad_norm": 0.900807608697118, + "learning_rate": 2.779854078105304e-07, + "loss": 0.2581, + "step": 53649 + }, + { + "epoch": 0.9270458943876141, + "grad_norm": 1.2256028875541367, + "learning_rate": 2.778543833679015e-07, + "loss": 0.4392, + "step": 53650 + }, + { + "epoch": 0.927063173901023, + "grad_norm": 1.0689636776880083, + "learning_rate": 2.777233893757736e-07, + "loss": 0.2086, + "step": 53651 + }, + { + "epoch": 0.9270804534144319, + "grad_norm": 1.5612983961615619, + "learning_rate": 2.7759242583455414e-07, + "loss": 0.5269, + "step": 53652 + }, + { + "epoch": 0.9270977329278407, + "grad_norm": 2.33651999831084, + "learning_rate": 2.7746149274465394e-07, + "loss": 0.3455, + "step": 53653 + }, + { + "epoch": 0.9271150124412496, + "grad_norm": 1.5225081971363448, + "learning_rate": 2.7733059010648266e-07, + "loss": 0.2417, + "step": 53654 + }, + { + "epoch": 0.9271322919546585, + "grad_norm": 1.1532668392369623, + "learning_rate": 2.7719971792045106e-07, + "loss": 0.2487, + "step": 53655 + }, + { + "epoch": 0.9271495714680674, + "grad_norm": 1.8764451418951535, + "learning_rate": 2.770688761869689e-07, + "loss": 0.2606, + "step": 53656 + }, + { + "epoch": 0.9271668509814763, + "grad_norm": 2.1022543682384915, + "learning_rate": 2.769380649064468e-07, + "loss": 0.2347, + "step": 53657 + }, + { + "epoch": 0.9271841304948852, + "grad_norm": 1.8591448193899995, + "learning_rate": 2.7680728407929345e-07, + "loss": 0.3577, + "step": 53658 + }, + { + "epoch": 0.9272014100082941, + "grad_norm": 1.6881477338871433, + "learning_rate": 2.766765337059185e-07, + "loss": 0.3395, + "step": 53659 + }, + { + "epoch": 0.927218689521703, + "grad_norm": 1.1540402380980284, + "learning_rate": 2.7654581378673164e-07, + "loss": 0.2547, + "step": 53660 + }, + { + "epoch": 0.927235969035112, + "grad_norm": 1.0575880487051854, + "learning_rate": 2.764151243221413e-07, + "loss": 0.3481, + "step": 53661 + }, + { + "epoch": 0.9272532485485209, + "grad_norm": 1.3888924959584046, + "learning_rate": 2.7628446531255847e-07, + "loss": 0.4294, + "step": 53662 + }, + { + "epoch": 0.9272705280619298, + "grad_norm": 1.5086177327930141, + "learning_rate": 2.7615383675839157e-07, + "loss": 0.3044, + "step": 53663 + }, + { + "epoch": 0.9272878075753387, + "grad_norm": 1.0827004869060948, + "learning_rate": 2.760232386600503e-07, + "loss": 0.3984, + "step": 53664 + }, + { + "epoch": 0.9273050870887476, + "grad_norm": 1.0800129268545104, + "learning_rate": 2.7589267101794214e-07, + "loss": 0.1371, + "step": 53665 + }, + { + "epoch": 0.9273223666021565, + "grad_norm": 1.1105076305101345, + "learning_rate": 2.7576213383247783e-07, + "loss": 0.4116, + "step": 53666 + }, + { + "epoch": 0.9273396461155654, + "grad_norm": 1.2755555139368373, + "learning_rate": 2.756316271040649e-07, + "loss": 0.2488, + "step": 53667 + }, + { + "epoch": 0.9273569256289743, + "grad_norm": 2.0997156127915293, + "learning_rate": 2.75501150833114e-07, + "loss": 0.2671, + "step": 53668 + }, + { + "epoch": 0.9273742051423832, + "grad_norm": 1.763427853264699, + "learning_rate": 2.753707050200316e-07, + "loss": 0.3808, + "step": 53669 + }, + { + "epoch": 0.9273914846557921, + "grad_norm": 1.0753585458763633, + "learning_rate": 2.752402896652284e-07, + "loss": 0.3605, + "step": 53670 + }, + { + "epoch": 0.927408764169201, + "grad_norm": 0.8386947392759173, + "learning_rate": 2.751099047691108e-07, + "loss": 0.3682, + "step": 53671 + }, + { + "epoch": 0.92742604368261, + "grad_norm": 2.5932148644762676, + "learning_rate": 2.7497955033208737e-07, + "loss": 0.2558, + "step": 53672 + }, + { + "epoch": 0.9274433231960189, + "grad_norm": 1.9160315030433137, + "learning_rate": 2.748492263545677e-07, + "loss": 0.3728, + "step": 53673 + }, + { + "epoch": 0.9274606027094277, + "grad_norm": 2.0397294092833302, + "learning_rate": 2.747189328369593e-07, + "loss": 0.2805, + "step": 53674 + }, + { + "epoch": 0.9274778822228366, + "grad_norm": 1.8162538600323606, + "learning_rate": 2.745886697796707e-07, + "loss": 0.4062, + "step": 53675 + }, + { + "epoch": 0.9274951617362455, + "grad_norm": 1.048735553653098, + "learning_rate": 2.744584371831094e-07, + "loss": 0.3878, + "step": 53676 + }, + { + "epoch": 0.9275124412496544, + "grad_norm": 1.0427515178358167, + "learning_rate": 2.743282350476828e-07, + "loss": 0.1608, + "step": 53677 + }, + { + "epoch": 0.9275297207630633, + "grad_norm": 1.3083234423225691, + "learning_rate": 2.741980633737984e-07, + "loss": 0.5184, + "step": 53678 + }, + { + "epoch": 0.9275470002764722, + "grad_norm": 1.3808409032598687, + "learning_rate": 2.7406792216186694e-07, + "loss": 0.5487, + "step": 53679 + }, + { + "epoch": 0.9275642797898811, + "grad_norm": 1.6324449866618023, + "learning_rate": 2.739378114122926e-07, + "loss": 0.4115, + "step": 53680 + }, + { + "epoch": 0.92758155930329, + "grad_norm": 1.4482879197018241, + "learning_rate": 2.73807731125485e-07, + "loss": 0.4372, + "step": 53681 + }, + { + "epoch": 0.9275988388166989, + "grad_norm": 2.8019320753610213, + "learning_rate": 2.7367768130185047e-07, + "loss": 0.3647, + "step": 53682 + }, + { + "epoch": 0.9276161183301078, + "grad_norm": 2.137267946371334, + "learning_rate": 2.735476619417965e-07, + "loss": 0.6633, + "step": 53683 + }, + { + "epoch": 0.9276333978435167, + "grad_norm": 1.060640614427628, + "learning_rate": 2.734176730457305e-07, + "loss": 0.467, + "step": 53684 + }, + { + "epoch": 0.9276506773569256, + "grad_norm": 1.2785951501619415, + "learning_rate": 2.732877146140589e-07, + "loss": 0.2289, + "step": 53685 + }, + { + "epoch": 0.9276679568703345, + "grad_norm": 0.8850894361145764, + "learning_rate": 2.731577866471902e-07, + "loss": 0.5401, + "step": 53686 + }, + { + "epoch": 0.9276852363837435, + "grad_norm": 1.121958183366863, + "learning_rate": 2.730278891455296e-07, + "loss": 0.2896, + "step": 53687 + }, + { + "epoch": 0.9277025158971524, + "grad_norm": 1.9692104484308746, + "learning_rate": 2.728980221094868e-07, + "loss": 0.4174, + "step": 53688 + }, + { + "epoch": 0.9277197954105613, + "grad_norm": 0.9730755451218255, + "learning_rate": 2.727681855394648e-07, + "loss": 0.5164, + "step": 53689 + }, + { + "epoch": 0.9277370749239702, + "grad_norm": 2.108629972124369, + "learning_rate": 2.7263837943587223e-07, + "loss": 0.4515, + "step": 53690 + }, + { + "epoch": 0.9277543544373791, + "grad_norm": 0.9324711042947468, + "learning_rate": 2.7250860379911646e-07, + "loss": 0.2697, + "step": 53691 + }, + { + "epoch": 0.927771633950788, + "grad_norm": 1.5694562336075877, + "learning_rate": 2.7237885862960165e-07, + "loss": 0.2767, + "step": 53692 + }, + { + "epoch": 0.9277889134641969, + "grad_norm": 0.9974243079811741, + "learning_rate": 2.722491439277375e-07, + "loss": 0.296, + "step": 53693 + }, + { + "epoch": 0.9278061929776058, + "grad_norm": 1.7614909690272644, + "learning_rate": 2.7211945969392584e-07, + "loss": 0.3719, + "step": 53694 + }, + { + "epoch": 0.9278234724910146, + "grad_norm": 1.4236867364806294, + "learning_rate": 2.7198980592857636e-07, + "loss": 0.319, + "step": 53695 + }, + { + "epoch": 0.9278407520044235, + "grad_norm": 1.15359563469138, + "learning_rate": 2.7186018263209434e-07, + "loss": 0.373, + "step": 53696 + }, + { + "epoch": 0.9278580315178324, + "grad_norm": 0.8328608314224891, + "learning_rate": 2.717305898048861e-07, + "loss": 0.2891, + "step": 53697 + }, + { + "epoch": 0.9278753110312413, + "grad_norm": 1.320773198696611, + "learning_rate": 2.7160102744735573e-07, + "loss": 0.2706, + "step": 53698 + }, + { + "epoch": 0.9278925905446502, + "grad_norm": 0.90518977398096, + "learning_rate": 2.714714955599118e-07, + "loss": 0.2137, + "step": 53699 + }, + { + "epoch": 0.9279098700580591, + "grad_norm": 2.0021941206460983, + "learning_rate": 2.713419941429574e-07, + "loss": 0.3782, + "step": 53700 + }, + { + "epoch": 0.927927149571468, + "grad_norm": 1.3778736111267416, + "learning_rate": 2.7121252319689874e-07, + "loss": 0.3032, + "step": 53701 + }, + { + "epoch": 0.927944429084877, + "grad_norm": 1.02018221400317, + "learning_rate": 2.7108308272214336e-07, + "loss": 0.3557, + "step": 53702 + }, + { + "epoch": 0.9279617085982859, + "grad_norm": 1.0969953964054568, + "learning_rate": 2.7095367271909424e-07, + "loss": 0.2991, + "step": 53703 + }, + { + "epoch": 0.9279789881116948, + "grad_norm": 1.1311618129900947, + "learning_rate": 2.708242931881577e-07, + "loss": 0.2871, + "step": 53704 + }, + { + "epoch": 0.9279962676251037, + "grad_norm": 1.296345158231459, + "learning_rate": 2.7069494412973907e-07, + "loss": 0.3458, + "step": 53705 + }, + { + "epoch": 0.9280135471385126, + "grad_norm": 1.2119938312781222, + "learning_rate": 2.7056562554424236e-07, + "loss": 0.3806, + "step": 53706 + }, + { + "epoch": 0.9280308266519215, + "grad_norm": 1.439289086262067, + "learning_rate": 2.7043633743207397e-07, + "loss": 0.5429, + "step": 53707 + }, + { + "epoch": 0.9280481061653304, + "grad_norm": 2.0606492227426694, + "learning_rate": 2.7030707979363915e-07, + "loss": 0.5045, + "step": 53708 + }, + { + "epoch": 0.9280653856787393, + "grad_norm": 1.1730665017705852, + "learning_rate": 2.701778526293408e-07, + "loss": 0.2186, + "step": 53709 + }, + { + "epoch": 0.9280826651921482, + "grad_norm": 1.3667971386260334, + "learning_rate": 2.700486559395865e-07, + "loss": 0.3424, + "step": 53710 + }, + { + "epoch": 0.9280999447055571, + "grad_norm": 1.176174550615621, + "learning_rate": 2.699194897247781e-07, + "loss": 0.4807, + "step": 53711 + }, + { + "epoch": 0.928117224218966, + "grad_norm": 1.1968348154525472, + "learning_rate": 2.697903539853208e-07, + "loss": 0.2001, + "step": 53712 + }, + { + "epoch": 0.9281345037323749, + "grad_norm": 1.856907929218416, + "learning_rate": 2.69661248721621e-07, + "loss": 0.4145, + "step": 53713 + }, + { + "epoch": 0.9281517832457838, + "grad_norm": 1.5345029029254638, + "learning_rate": 2.695321739340806e-07, + "loss": 0.6045, + "step": 53714 + }, + { + "epoch": 0.9281690627591928, + "grad_norm": 1.2082111811969172, + "learning_rate": 2.694031296231059e-07, + "loss": 0.4831, + "step": 53715 + }, + { + "epoch": 0.9281863422726016, + "grad_norm": 0.8690818293134479, + "learning_rate": 2.692741157890988e-07, + "loss": 0.254, + "step": 53716 + }, + { + "epoch": 0.9282036217860105, + "grad_norm": 1.3554454682034984, + "learning_rate": 2.691451324324668e-07, + "loss": 0.2264, + "step": 53717 + }, + { + "epoch": 0.9282209012994194, + "grad_norm": 1.6235175783699312, + "learning_rate": 2.6901617955360837e-07, + "loss": 0.2138, + "step": 53718 + }, + { + "epoch": 0.9282381808128283, + "grad_norm": 1.1159783781512862, + "learning_rate": 2.688872571529333e-07, + "loss": 0.3502, + "step": 53719 + }, + { + "epoch": 0.9282554603262372, + "grad_norm": 0.9841396964058192, + "learning_rate": 2.687583652308423e-07, + "loss": 0.6541, + "step": 53720 + }, + { + "epoch": 0.9282727398396461, + "grad_norm": 2.237286865406825, + "learning_rate": 2.6862950378773957e-07, + "loss": 0.2472, + "step": 53721 + }, + { + "epoch": 0.928290019353055, + "grad_norm": 1.6552896037960432, + "learning_rate": 2.6850067282403024e-07, + "loss": 0.4887, + "step": 53722 + }, + { + "epoch": 0.9283072988664639, + "grad_norm": 1.3871194793366448, + "learning_rate": 2.683718723401141e-07, + "loss": 0.2364, + "step": 53723 + }, + { + "epoch": 0.9283245783798728, + "grad_norm": 1.6100675698246425, + "learning_rate": 2.682431023363974e-07, + "loss": 0.325, + "step": 53724 + }, + { + "epoch": 0.9283418578932817, + "grad_norm": 1.459305761859701, + "learning_rate": 2.6811436281328316e-07, + "loss": 0.3882, + "step": 53725 + }, + { + "epoch": 0.9283591374066906, + "grad_norm": 1.3608735465781614, + "learning_rate": 2.679856537711745e-07, + "loss": 0.2912, + "step": 53726 + }, + { + "epoch": 0.9283764169200995, + "grad_norm": 2.0188592184000695, + "learning_rate": 2.6785697521047424e-07, + "loss": 0.356, + "step": 53727 + }, + { + "epoch": 0.9283936964335084, + "grad_norm": 1.7690851178316813, + "learning_rate": 2.677283271315856e-07, + "loss": 0.522, + "step": 53728 + }, + { + "epoch": 0.9284109759469173, + "grad_norm": 1.165363138972147, + "learning_rate": 2.675997095349114e-07, + "loss": 0.5859, + "step": 53729 + }, + { + "epoch": 0.9284282554603263, + "grad_norm": 1.1851865676957436, + "learning_rate": 2.674711224208548e-07, + "loss": 0.3391, + "step": 53730 + }, + { + "epoch": 0.9284455349737352, + "grad_norm": 1.0449541287787518, + "learning_rate": 2.6734256578981767e-07, + "loss": 0.3112, + "step": 53731 + }, + { + "epoch": 0.9284628144871441, + "grad_norm": 0.9395252758889768, + "learning_rate": 2.6721403964220403e-07, + "loss": 0.1489, + "step": 53732 + }, + { + "epoch": 0.928480094000553, + "grad_norm": 2.6347052101118194, + "learning_rate": 2.670855439784159e-07, + "loss": 0.2522, + "step": 53733 + }, + { + "epoch": 0.9284973735139619, + "grad_norm": 0.8834589769779913, + "learning_rate": 2.66957078798854e-07, + "loss": 0.4704, + "step": 53734 + }, + { + "epoch": 0.9285146530273708, + "grad_norm": 1.9239409807263508, + "learning_rate": 2.668286441039236e-07, + "loss": 0.4015, + "step": 53735 + }, + { + "epoch": 0.9285319325407797, + "grad_norm": 1.8115309296446773, + "learning_rate": 2.6670023989402437e-07, + "loss": 0.6586, + "step": 53736 + }, + { + "epoch": 0.9285492120541885, + "grad_norm": 1.4826368880537173, + "learning_rate": 2.665718661695615e-07, + "loss": 0.2764, + "step": 53737 + }, + { + "epoch": 0.9285664915675974, + "grad_norm": 0.9644055110506515, + "learning_rate": 2.664435229309337e-07, + "loss": 0.3402, + "step": 53738 + }, + { + "epoch": 0.9285837710810063, + "grad_norm": 0.5964128041955977, + "learning_rate": 2.66315210178546e-07, + "loss": 0.9131, + "step": 53739 + }, + { + "epoch": 0.9286010505944152, + "grad_norm": 1.2811379708306667, + "learning_rate": 2.661869279127971e-07, + "loss": 0.3004, + "step": 53740 + }, + { + "epoch": 0.9286183301078241, + "grad_norm": 1.3656569872931008, + "learning_rate": 2.660586761340911e-07, + "loss": 0.255, + "step": 53741 + }, + { + "epoch": 0.928635609621233, + "grad_norm": 1.1519204106909515, + "learning_rate": 2.6593045484282876e-07, + "loss": 0.4908, + "step": 53742 + }, + { + "epoch": 0.928652889134642, + "grad_norm": 0.8086723457727559, + "learning_rate": 2.658022640394131e-07, + "loss": 0.3756, + "step": 53743 + }, + { + "epoch": 0.9286701686480509, + "grad_norm": 2.17743673321292, + "learning_rate": 2.6567410372424385e-07, + "loss": 0.2999, + "step": 53744 + }, + { + "epoch": 0.9286874481614598, + "grad_norm": 1.222665002331808, + "learning_rate": 2.6554597389772284e-07, + "loss": 0.1835, + "step": 53745 + }, + { + "epoch": 0.9287047276748687, + "grad_norm": 0.8349464494049101, + "learning_rate": 2.65417874560252e-07, + "loss": 0.5703, + "step": 53746 + }, + { + "epoch": 0.9287220071882776, + "grad_norm": 1.183531604925354, + "learning_rate": 2.6528980571223104e-07, + "loss": 0.2971, + "step": 53747 + }, + { + "epoch": 0.9287392867016865, + "grad_norm": 1.0353580629109298, + "learning_rate": 2.651617673540641e-07, + "loss": 0.3395, + "step": 53748 + }, + { + "epoch": 0.9287565662150954, + "grad_norm": 0.9622698411275619, + "learning_rate": 2.650337594861496e-07, + "loss": 0.3642, + "step": 53749 + }, + { + "epoch": 0.9287738457285043, + "grad_norm": 0.9788919537962538, + "learning_rate": 2.6490578210888964e-07, + "loss": 0.296, + "step": 53750 + }, + { + "epoch": 0.9287911252419132, + "grad_norm": 1.3426865171096936, + "learning_rate": 2.6477783522268374e-07, + "loss": 0.3742, + "step": 53751 + }, + { + "epoch": 0.9288084047553221, + "grad_norm": 1.209805483114243, + "learning_rate": 2.646499188279328e-07, + "loss": 0.3946, + "step": 53752 + }, + { + "epoch": 0.928825684268731, + "grad_norm": 1.2724190379737899, + "learning_rate": 2.6452203292503976e-07, + "loss": 0.5158, + "step": 53753 + }, + { + "epoch": 0.9288429637821399, + "grad_norm": 0.7860813005785324, + "learning_rate": 2.6439417751440323e-07, + "loss": 0.3286, + "step": 53754 + }, + { + "epoch": 0.9288602432955488, + "grad_norm": 1.447902429543622, + "learning_rate": 2.64266352596424e-07, + "loss": 0.2488, + "step": 53755 + }, + { + "epoch": 0.9288775228089577, + "grad_norm": 2.597279573993653, + "learning_rate": 2.641385581715017e-07, + "loss": 0.3338, + "step": 53756 + }, + { + "epoch": 0.9288948023223667, + "grad_norm": 2.5871470958323717, + "learning_rate": 2.640107942400383e-07, + "loss": 0.4114, + "step": 53757 + }, + { + "epoch": 0.9289120818357754, + "grad_norm": 1.7881104241931667, + "learning_rate": 2.638830608024312e-07, + "loss": 0.4926, + "step": 53758 + }, + { + "epoch": 0.9289293613491844, + "grad_norm": 1.170941279072253, + "learning_rate": 2.6375535785908455e-07, + "loss": 0.2733, + "step": 53759 + }, + { + "epoch": 0.9289466408625933, + "grad_norm": 1.7528312780113762, + "learning_rate": 2.636276854103947e-07, + "loss": 0.3943, + "step": 53760 + }, + { + "epoch": 0.9289639203760022, + "grad_norm": 1.0490917820751864, + "learning_rate": 2.6350004345676473e-07, + "loss": 0.2591, + "step": 53761 + }, + { + "epoch": 0.9289811998894111, + "grad_norm": 2.040496626713106, + "learning_rate": 2.6337243199858973e-07, + "loss": 0.2913, + "step": 53762 + }, + { + "epoch": 0.92899847940282, + "grad_norm": 0.7129238821605377, + "learning_rate": 2.632448510362751e-07, + "loss": 0.2483, + "step": 53763 + }, + { + "epoch": 0.9290157589162289, + "grad_norm": 2.1219392844524188, + "learning_rate": 2.631173005702159e-07, + "loss": 0.466, + "step": 53764 + }, + { + "epoch": 0.9290330384296378, + "grad_norm": 1.9209961891550218, + "learning_rate": 2.62989780600813e-07, + "loss": 0.3671, + "step": 53765 + }, + { + "epoch": 0.9290503179430467, + "grad_norm": 0.9467983120646429, + "learning_rate": 2.628622911284673e-07, + "loss": 0.1889, + "step": 53766 + }, + { + "epoch": 0.9290675974564556, + "grad_norm": 1.5080219951628757, + "learning_rate": 2.62734832153575e-07, + "loss": 0.3769, + "step": 53767 + }, + { + "epoch": 0.9290848769698645, + "grad_norm": 1.1726407722765348, + "learning_rate": 2.6260740367653913e-07, + "loss": 0.2972, + "step": 53768 + }, + { + "epoch": 0.9291021564832734, + "grad_norm": 1.6159812131261249, + "learning_rate": 2.624800056977561e-07, + "loss": 0.415, + "step": 53769 + }, + { + "epoch": 0.9291194359966823, + "grad_norm": 1.5986231749994622, + "learning_rate": 2.623526382176256e-07, + "loss": 0.2117, + "step": 53770 + }, + { + "epoch": 0.9291367155100912, + "grad_norm": 2.0918716966595134, + "learning_rate": 2.622253012365461e-07, + "loss": 0.2783, + "step": 53771 + }, + { + "epoch": 0.9291539950235002, + "grad_norm": 2.952598094594995, + "learning_rate": 2.6209799475491846e-07, + "loss": 0.3356, + "step": 53772 + }, + { + "epoch": 0.9291712745369091, + "grad_norm": 1.0317629478149069, + "learning_rate": 2.619707187731391e-07, + "loss": 0.1799, + "step": 53773 + }, + { + "epoch": 0.929188554050318, + "grad_norm": 1.5274471006769788, + "learning_rate": 2.6184347329160863e-07, + "loss": 0.2266, + "step": 53774 + }, + { + "epoch": 0.9292058335637269, + "grad_norm": 0.8854262693472589, + "learning_rate": 2.6171625831072246e-07, + "loss": 0.251, + "step": 53775 + }, + { + "epoch": 0.9292231130771358, + "grad_norm": 1.8287042751994227, + "learning_rate": 2.615890738308824e-07, + "loss": 0.3698, + "step": 53776 + }, + { + "epoch": 0.9292403925905447, + "grad_norm": 1.649178069668986, + "learning_rate": 2.614619198524859e-07, + "loss": 0.2625, + "step": 53777 + }, + { + "epoch": 0.9292576721039536, + "grad_norm": 1.5296428415751615, + "learning_rate": 2.6133479637592943e-07, + "loss": 0.4053, + "step": 53778 + }, + { + "epoch": 0.9292749516173625, + "grad_norm": 1.762344443916304, + "learning_rate": 2.612077034016136e-07, + "loss": 0.3083, + "step": 53779 + }, + { + "epoch": 0.9292922311307713, + "grad_norm": 1.8794758397900984, + "learning_rate": 2.610806409299338e-07, + "loss": 0.2623, + "step": 53780 + }, + { + "epoch": 0.9293095106441802, + "grad_norm": 1.501634898368018, + "learning_rate": 2.6095360896129073e-07, + "loss": 0.4226, + "step": 53781 + }, + { + "epoch": 0.9293267901575891, + "grad_norm": 1.232358092653941, + "learning_rate": 2.608266074960808e-07, + "loss": 0.3868, + "step": 53782 + }, + { + "epoch": 0.929344069670998, + "grad_norm": 1.608666759690074, + "learning_rate": 2.606996365347025e-07, + "loss": 0.2021, + "step": 53783 + }, + { + "epoch": 0.9293613491844069, + "grad_norm": 0.9903759777946668, + "learning_rate": 2.6057269607755343e-07, + "loss": 0.364, + "step": 53784 + }, + { + "epoch": 0.9293786286978158, + "grad_norm": 1.5670453571253147, + "learning_rate": 2.604457861250298e-07, + "loss": 0.3172, + "step": 53785 + }, + { + "epoch": 0.9293959082112248, + "grad_norm": 1.5692686757251426, + "learning_rate": 2.603189066775302e-07, + "loss": 0.4941, + "step": 53786 + }, + { + "epoch": 0.9294131877246337, + "grad_norm": 2.8206079380066558, + "learning_rate": 2.601920577354522e-07, + "loss": 0.2354, + "step": 53787 + }, + { + "epoch": 0.9294304672380426, + "grad_norm": 0.8179669956839588, + "learning_rate": 2.6006523929919423e-07, + "loss": 0.1773, + "step": 53788 + }, + { + "epoch": 0.9294477467514515, + "grad_norm": 1.065530025082405, + "learning_rate": 2.5993845136915054e-07, + "loss": 0.3508, + "step": 53789 + }, + { + "epoch": 0.9294650262648604, + "grad_norm": 1.3160468847298452, + "learning_rate": 2.598116939457207e-07, + "loss": 0.2349, + "step": 53790 + }, + { + "epoch": 0.9294823057782693, + "grad_norm": 1.1019496223402265, + "learning_rate": 2.596849670293e-07, + "loss": 0.2631, + "step": 53791 + }, + { + "epoch": 0.9294995852916782, + "grad_norm": 0.6080700054999958, + "learning_rate": 2.5955827062028593e-07, + "loss": 0.6029, + "step": 53792 + }, + { + "epoch": 0.9295168648050871, + "grad_norm": 1.082411405083197, + "learning_rate": 2.594316047190759e-07, + "loss": 0.3414, + "step": 53793 + }, + { + "epoch": 0.929534144318496, + "grad_norm": 1.6372553989813818, + "learning_rate": 2.593049693260674e-07, + "loss": 0.397, + "step": 53794 + }, + { + "epoch": 0.9295514238319049, + "grad_norm": 2.409395487147584, + "learning_rate": 2.5917836444165676e-07, + "loss": 0.2914, + "step": 53795 + }, + { + "epoch": 0.9295687033453138, + "grad_norm": 1.172801054780261, + "learning_rate": 2.5905179006623815e-07, + "loss": 0.3553, + "step": 53796 + }, + { + "epoch": 0.9295859828587227, + "grad_norm": 1.125812834899945, + "learning_rate": 2.5892524620021007e-07, + "loss": 0.4523, + "step": 53797 + }, + { + "epoch": 0.9296032623721316, + "grad_norm": 1.3041809811420817, + "learning_rate": 2.587987328439667e-07, + "loss": 0.2345, + "step": 53798 + }, + { + "epoch": 0.9296205418855406, + "grad_norm": 1.4794918690395238, + "learning_rate": 2.5867224999790776e-07, + "loss": 0.3059, + "step": 53799 + }, + { + "epoch": 0.9296378213989495, + "grad_norm": 1.163632856733744, + "learning_rate": 2.5854579766242725e-07, + "loss": 0.2458, + "step": 53800 + }, + { + "epoch": 0.9296551009123583, + "grad_norm": 1.0706494184261763, + "learning_rate": 2.5841937583792275e-07, + "loss": 0.1635, + "step": 53801 + }, + { + "epoch": 0.9296723804257672, + "grad_norm": 1.5203629395799496, + "learning_rate": 2.5829298452478725e-07, + "loss": 0.2177, + "step": 53802 + }, + { + "epoch": 0.9296896599391761, + "grad_norm": 1.0847263061793126, + "learning_rate": 2.5816662372342037e-07, + "loss": 0.3497, + "step": 53803 + }, + { + "epoch": 0.929706939452585, + "grad_norm": 1.2996237829635375, + "learning_rate": 2.580402934342141e-07, + "loss": 0.1992, + "step": 53804 + }, + { + "epoch": 0.9297242189659939, + "grad_norm": 1.0084103991272286, + "learning_rate": 2.5791399365756697e-07, + "loss": 0.5155, + "step": 53805 + }, + { + "epoch": 0.9297414984794028, + "grad_norm": 0.9520468417147459, + "learning_rate": 2.577877243938742e-07, + "loss": 0.4313, + "step": 53806 + }, + { + "epoch": 0.9297587779928117, + "grad_norm": 1.4747046834125241, + "learning_rate": 2.5766148564352887e-07, + "loss": 0.4466, + "step": 53807 + }, + { + "epoch": 0.9297760575062206, + "grad_norm": 1.361662881334185, + "learning_rate": 2.5753527740693065e-07, + "loss": 0.3796, + "step": 53808 + }, + { + "epoch": 0.9297933370196295, + "grad_norm": 0.9629524902652875, + "learning_rate": 2.5740909968447027e-07, + "loss": 0.4023, + "step": 53809 + }, + { + "epoch": 0.9298106165330384, + "grad_norm": 1.7718298757028248, + "learning_rate": 2.5728295247654524e-07, + "loss": 0.2168, + "step": 53810 + }, + { + "epoch": 0.9298278960464473, + "grad_norm": 1.185261294563261, + "learning_rate": 2.5715683578354964e-07, + "loss": 0.4312, + "step": 53811 + }, + { + "epoch": 0.9298451755598562, + "grad_norm": 1.0823332578089477, + "learning_rate": 2.57030749605881e-07, + "loss": 0.3637, + "step": 53812 + }, + { + "epoch": 0.9298624550732651, + "grad_norm": 1.2127204162860097, + "learning_rate": 2.5690469394393236e-07, + "loss": 0.4402, + "step": 53813 + }, + { + "epoch": 0.9298797345866741, + "grad_norm": 0.9928474878444732, + "learning_rate": 2.567786687980989e-07, + "loss": 0.4344, + "step": 53814 + }, + { + "epoch": 0.929897014100083, + "grad_norm": 0.9645935663246601, + "learning_rate": 2.566526741687736e-07, + "loss": 0.2478, + "step": 53815 + }, + { + "epoch": 0.9299142936134919, + "grad_norm": 2.0419332953759466, + "learning_rate": 2.565267100563529e-07, + "loss": 0.215, + "step": 53816 + }, + { + "epoch": 0.9299315731269008, + "grad_norm": 0.9452725246474525, + "learning_rate": 2.5640077646123194e-07, + "loss": 0.3047, + "step": 53817 + }, + { + "epoch": 0.9299488526403097, + "grad_norm": 2.12030604939827, + "learning_rate": 2.5627487338380386e-07, + "loss": 0.2821, + "step": 53818 + }, + { + "epoch": 0.9299661321537186, + "grad_norm": 1.638158549019424, + "learning_rate": 2.561490008244638e-07, + "loss": 0.3385, + "step": 53819 + }, + { + "epoch": 0.9299834116671275, + "grad_norm": 0.8943781572960808, + "learning_rate": 2.5602315878360485e-07, + "loss": 0.3311, + "step": 53820 + }, + { + "epoch": 0.9300006911805364, + "grad_norm": 1.2847154919719832, + "learning_rate": 2.558973472616222e-07, + "loss": 0.3799, + "step": 53821 + }, + { + "epoch": 0.9300179706939452, + "grad_norm": 1.15352933265208, + "learning_rate": 2.557715662589089e-07, + "loss": 0.3034, + "step": 53822 + }, + { + "epoch": 0.9300352502073541, + "grad_norm": 0.9965045228883821, + "learning_rate": 2.556458157758601e-07, + "loss": 0.4344, + "step": 53823 + }, + { + "epoch": 0.930052529720763, + "grad_norm": 1.9188191003190749, + "learning_rate": 2.5552009581286896e-07, + "loss": 0.3949, + "step": 53824 + }, + { + "epoch": 0.9300698092341719, + "grad_norm": 1.098756160446256, + "learning_rate": 2.5539440637033065e-07, + "loss": 0.3536, + "step": 53825 + }, + { + "epoch": 0.9300870887475808, + "grad_norm": 1.4957588758470421, + "learning_rate": 2.5526874744863597e-07, + "loss": 0.1877, + "step": 53826 + }, + { + "epoch": 0.9301043682609897, + "grad_norm": 1.7212260625427764, + "learning_rate": 2.551431190481812e-07, + "loss": 0.1767, + "step": 53827 + }, + { + "epoch": 0.9301216477743987, + "grad_norm": 1.7247037476809808, + "learning_rate": 2.550175211693584e-07, + "loss": 0.2487, + "step": 53828 + }, + { + "epoch": 0.9301389272878076, + "grad_norm": 1.1708396352945072, + "learning_rate": 2.548919538125616e-07, + "loss": 0.3703, + "step": 53829 + }, + { + "epoch": 0.9301562068012165, + "grad_norm": 2.1936064076904285, + "learning_rate": 2.5476641697818384e-07, + "loss": 0.4085, + "step": 53830 + }, + { + "epoch": 0.9301734863146254, + "grad_norm": 0.9760532537561418, + "learning_rate": 2.5464091066661813e-07, + "loss": 0.3502, + "step": 53831 + }, + { + "epoch": 0.9301907658280343, + "grad_norm": 1.4990268147475951, + "learning_rate": 2.5451543487825637e-07, + "loss": 0.249, + "step": 53832 + }, + { + "epoch": 0.9302080453414432, + "grad_norm": 1.1705700033814366, + "learning_rate": 2.543899896134938e-07, + "loss": 0.3041, + "step": 53833 + }, + { + "epoch": 0.9302253248548521, + "grad_norm": 1.3722879218879058, + "learning_rate": 2.542645748727235e-07, + "loss": 0.3207, + "step": 53834 + }, + { + "epoch": 0.930242604368261, + "grad_norm": 0.7318852115207597, + "learning_rate": 2.541391906563362e-07, + "loss": 0.5487, + "step": 53835 + }, + { + "epoch": 0.9302598838816699, + "grad_norm": 1.1979450890066272, + "learning_rate": 2.5401383696472603e-07, + "loss": 0.3564, + "step": 53836 + }, + { + "epoch": 0.9302771633950788, + "grad_norm": 1.1887768857892933, + "learning_rate": 2.5388851379828605e-07, + "loss": 0.4717, + "step": 53837 + }, + { + "epoch": 0.9302944429084877, + "grad_norm": 1.3758438000521558, + "learning_rate": 2.5376322115740593e-07, + "loss": 0.3292, + "step": 53838 + }, + { + "epoch": 0.9303117224218966, + "grad_norm": 1.1456094746257128, + "learning_rate": 2.536379590424809e-07, + "loss": 0.2637, + "step": 53839 + }, + { + "epoch": 0.9303290019353055, + "grad_norm": 1.3757324084482279, + "learning_rate": 2.5351272745390287e-07, + "loss": 0.3631, + "step": 53840 + }, + { + "epoch": 0.9303462814487145, + "grad_norm": 1.0870190920793195, + "learning_rate": 2.5338752639206375e-07, + "loss": 0.2437, + "step": 53841 + }, + { + "epoch": 0.9303635609621234, + "grad_norm": 1.3373625891457828, + "learning_rate": 2.5326235585735547e-07, + "loss": 0.4083, + "step": 53842 + }, + { + "epoch": 0.9303808404755322, + "grad_norm": 1.0233793759136898, + "learning_rate": 2.5313721585016994e-07, + "loss": 0.2454, + "step": 53843 + }, + { + "epoch": 0.9303981199889411, + "grad_norm": 2.426259230161145, + "learning_rate": 2.5301210637089903e-07, + "loss": 0.1142, + "step": 53844 + }, + { + "epoch": 0.93041539950235, + "grad_norm": 1.271508578751238, + "learning_rate": 2.5288702741993575e-07, + "loss": 0.2931, + "step": 53845 + }, + { + "epoch": 0.9304326790157589, + "grad_norm": 1.4347887720552865, + "learning_rate": 2.5276197899767095e-07, + "loss": 0.2274, + "step": 53846 + }, + { + "epoch": 0.9304499585291678, + "grad_norm": 0.8898295179438508, + "learning_rate": 2.5263696110449544e-07, + "loss": 0.3812, + "step": 53847 + }, + { + "epoch": 0.9304672380425767, + "grad_norm": 2.114537700728745, + "learning_rate": 2.5251197374080325e-07, + "loss": 0.4633, + "step": 53848 + }, + { + "epoch": 0.9304845175559856, + "grad_norm": 1.5820548040775386, + "learning_rate": 2.523870169069831e-07, + "loss": 0.2868, + "step": 53849 + }, + { + "epoch": 0.9305017970693945, + "grad_norm": 1.4384298360654004, + "learning_rate": 2.522620906034279e-07, + "loss": 0.3656, + "step": 53850 + }, + { + "epoch": 0.9305190765828034, + "grad_norm": 1.6468186853815217, + "learning_rate": 2.521371948305285e-07, + "loss": 0.3912, + "step": 53851 + }, + { + "epoch": 0.9305363560962123, + "grad_norm": 1.2790154732873504, + "learning_rate": 2.520123295886767e-07, + "loss": 0.3923, + "step": 53852 + }, + { + "epoch": 0.9305536356096212, + "grad_norm": 1.3107389727222167, + "learning_rate": 2.5188749487826234e-07, + "loss": 0.2528, + "step": 53853 + }, + { + "epoch": 0.9305709151230301, + "grad_norm": 0.9432236030452653, + "learning_rate": 2.5176269069967843e-07, + "loss": 0.7185, + "step": 53854 + }, + { + "epoch": 0.930588194636439, + "grad_norm": 1.098014371237433, + "learning_rate": 2.5163791705331343e-07, + "loss": 0.395, + "step": 53855 + }, + { + "epoch": 0.930605474149848, + "grad_norm": 1.5026189910976842, + "learning_rate": 2.515131739395593e-07, + "loss": 0.2609, + "step": 53856 + }, + { + "epoch": 0.9306227536632569, + "grad_norm": 0.9892262546625166, + "learning_rate": 2.5138846135880803e-07, + "loss": 0.214, + "step": 53857 + }, + { + "epoch": 0.9306400331766658, + "grad_norm": 0.8272814859013371, + "learning_rate": 2.5126377931144807e-07, + "loss": 0.2593, + "step": 53858 + }, + { + "epoch": 0.9306573126900747, + "grad_norm": 1.582616440804168, + "learning_rate": 2.5113912779787144e-07, + "loss": 0.282, + "step": 53859 + }, + { + "epoch": 0.9306745922034836, + "grad_norm": 0.9677220523345265, + "learning_rate": 2.510145068184677e-07, + "loss": 0.4302, + "step": 53860 + }, + { + "epoch": 0.9306918717168925, + "grad_norm": 1.3464664098660817, + "learning_rate": 2.508899163736267e-07, + "loss": 0.3004, + "step": 53861 + }, + { + "epoch": 0.9307091512303014, + "grad_norm": 2.0397243417461888, + "learning_rate": 2.507653564637391e-07, + "loss": 0.3027, + "step": 53862 + }, + { + "epoch": 0.9307264307437103, + "grad_norm": 1.404318290675191, + "learning_rate": 2.506408270891969e-07, + "loss": 0.5065, + "step": 53863 + }, + { + "epoch": 0.9307437102571191, + "grad_norm": 1.0621109216634, + "learning_rate": 2.505163282503875e-07, + "loss": 0.3218, + "step": 53864 + }, + { + "epoch": 0.930760989770528, + "grad_norm": 0.9532517330039885, + "learning_rate": 2.503918599477029e-07, + "loss": 0.2936, + "step": 53865 + }, + { + "epoch": 0.9307782692839369, + "grad_norm": 0.9640910788987473, + "learning_rate": 2.5026742218153043e-07, + "loss": 0.2857, + "step": 53866 + }, + { + "epoch": 0.9307955487973458, + "grad_norm": 1.074307098707375, + "learning_rate": 2.5014301495226214e-07, + "loss": 0.3891, + "step": 53867 + }, + { + "epoch": 0.9308128283107547, + "grad_norm": 1.2162250278987932, + "learning_rate": 2.500186382602876e-07, + "loss": 0.4737, + "step": 53868 + }, + { + "epoch": 0.9308301078241636, + "grad_norm": 1.3562799364106988, + "learning_rate": 2.4989429210599434e-07, + "loss": 0.3427, + "step": 53869 + }, + { + "epoch": 0.9308473873375726, + "grad_norm": 1.2805647038680836, + "learning_rate": 2.497699764897743e-07, + "loss": 0.3457, + "step": 53870 + }, + { + "epoch": 0.9308646668509815, + "grad_norm": 1.2823396954104946, + "learning_rate": 2.496456914120149e-07, + "loss": 0.2582, + "step": 53871 + }, + { + "epoch": 0.9308819463643904, + "grad_norm": 0.860138657993499, + "learning_rate": 2.4952143687310583e-07, + "loss": 1.022, + "step": 53872 + }, + { + "epoch": 0.9308992258777993, + "grad_norm": 2.0470590895073353, + "learning_rate": 2.4939721287343676e-07, + "loss": 0.2549, + "step": 53873 + }, + { + "epoch": 0.9309165053912082, + "grad_norm": 1.1396134352893126, + "learning_rate": 2.4927301941339745e-07, + "loss": 0.3817, + "step": 53874 + }, + { + "epoch": 0.9309337849046171, + "grad_norm": 0.8196390632791498, + "learning_rate": 2.4914885649337526e-07, + "loss": 0.6537, + "step": 53875 + }, + { + "epoch": 0.930951064418026, + "grad_norm": 0.888346504649858, + "learning_rate": 2.490247241137611e-07, + "loss": 0.2429, + "step": 53876 + }, + { + "epoch": 0.9309683439314349, + "grad_norm": 1.0356222215485151, + "learning_rate": 2.489006222749424e-07, + "loss": 0.3425, + "step": 53877 + }, + { + "epoch": 0.9309856234448438, + "grad_norm": 1.9138038973586755, + "learning_rate": 2.4877655097730546e-07, + "loss": 0.2736, + "step": 53878 + }, + { + "epoch": 0.9310029029582527, + "grad_norm": 1.5507282921168861, + "learning_rate": 2.4865251022124447e-07, + "loss": 0.2536, + "step": 53879 + }, + { + "epoch": 0.9310201824716616, + "grad_norm": 2.4396232168608543, + "learning_rate": 2.485285000071436e-07, + "loss": 0.2355, + "step": 53880 + }, + { + "epoch": 0.9310374619850705, + "grad_norm": 2.2596387423605617, + "learning_rate": 2.484045203353935e-07, + "loss": 0.6761, + "step": 53881 + }, + { + "epoch": 0.9310547414984794, + "grad_norm": 0.8501596292325373, + "learning_rate": 2.482805712063796e-07, + "loss": 0.33, + "step": 53882 + }, + { + "epoch": 0.9310720210118884, + "grad_norm": 0.6202034540032278, + "learning_rate": 2.481566526204937e-07, + "loss": 0.5613, + "step": 53883 + }, + { + "epoch": 0.9310893005252973, + "grad_norm": 1.1319878171164726, + "learning_rate": 2.480327645781222e-07, + "loss": 0.3788, + "step": 53884 + }, + { + "epoch": 0.931106580038706, + "grad_norm": 2.061383485253565, + "learning_rate": 2.4790890707965255e-07, + "loss": 0.2453, + "step": 53885 + }, + { + "epoch": 0.931123859552115, + "grad_norm": 1.165688032869724, + "learning_rate": 2.477850801254722e-07, + "loss": 0.2042, + "step": 53886 + }, + { + "epoch": 0.9311411390655239, + "grad_norm": 1.6164020270617407, + "learning_rate": 2.476612837159731e-07, + "loss": 0.5603, + "step": 53887 + }, + { + "epoch": 0.9311584185789328, + "grad_norm": 1.5618101713560384, + "learning_rate": 2.475375178515382e-07, + "loss": 0.3187, + "step": 53888 + }, + { + "epoch": 0.9311756980923417, + "grad_norm": 1.4472121239997857, + "learning_rate": 2.474137825325573e-07, + "loss": 0.4007, + "step": 53889 + }, + { + "epoch": 0.9311929776057506, + "grad_norm": 1.0258622401333801, + "learning_rate": 2.472900777594167e-07, + "loss": 0.4601, + "step": 53890 + }, + { + "epoch": 0.9312102571191595, + "grad_norm": 1.6659820781208323, + "learning_rate": 2.47166403532505e-07, + "loss": 0.2857, + "step": 53891 + }, + { + "epoch": 0.9312275366325684, + "grad_norm": 2.259042647050419, + "learning_rate": 2.470427598522107e-07, + "loss": 0.3834, + "step": 53892 + }, + { + "epoch": 0.9312448161459773, + "grad_norm": 1.2838050281619806, + "learning_rate": 2.46919146718918e-07, + "loss": 0.3185, + "step": 53893 + }, + { + "epoch": 0.9312620956593862, + "grad_norm": 2.558832321508687, + "learning_rate": 2.4679556413301775e-07, + "loss": 0.4233, + "step": 53894 + }, + { + "epoch": 0.9312793751727951, + "grad_norm": 1.864629213467216, + "learning_rate": 2.466720120948929e-07, + "loss": 0.7092, + "step": 53895 + }, + { + "epoch": 0.931296654686204, + "grad_norm": 1.8719778595156964, + "learning_rate": 2.465484906049331e-07, + "loss": 0.6938, + "step": 53896 + }, + { + "epoch": 0.931313934199613, + "grad_norm": 1.8625374309365261, + "learning_rate": 2.4642499966352594e-07, + "loss": 0.7731, + "step": 53897 + }, + { + "epoch": 0.9313312137130219, + "grad_norm": 1.1974108338720142, + "learning_rate": 2.4630153927105547e-07, + "loss": 0.3979, + "step": 53898 + }, + { + "epoch": 0.9313484932264308, + "grad_norm": 0.6538898043307242, + "learning_rate": 2.4617810942791143e-07, + "loss": 0.7058, + "step": 53899 + }, + { + "epoch": 0.9313657727398397, + "grad_norm": 1.55017056273497, + "learning_rate": 2.460547101344768e-07, + "loss": 0.3486, + "step": 53900 + }, + { + "epoch": 0.9313830522532486, + "grad_norm": 1.882278242929166, + "learning_rate": 2.4593134139114126e-07, + "loss": 0.2766, + "step": 53901 + }, + { + "epoch": 0.9314003317666575, + "grad_norm": 1.2473864816700855, + "learning_rate": 2.45808003198289e-07, + "loss": 0.2356, + "step": 53902 + }, + { + "epoch": 0.9314176112800664, + "grad_norm": 1.0543392328522803, + "learning_rate": 2.456846955563086e-07, + "loss": 0.4947, + "step": 53903 + }, + { + "epoch": 0.9314348907934753, + "grad_norm": 2.042602315281601, + "learning_rate": 2.4556141846558415e-07, + "loss": 0.5, + "step": 53904 + }, + { + "epoch": 0.9314521703068842, + "grad_norm": 1.1965693871643512, + "learning_rate": 2.454381719265042e-07, + "loss": 0.4018, + "step": 53905 + }, + { + "epoch": 0.931469449820293, + "grad_norm": 0.855407538251006, + "learning_rate": 2.4531495593945186e-07, + "loss": 0.1832, + "step": 53906 + }, + { + "epoch": 0.9314867293337019, + "grad_norm": 0.9392734014425378, + "learning_rate": 2.451917705048146e-07, + "loss": 0.392, + "step": 53907 + }, + { + "epoch": 0.9315040088471108, + "grad_norm": 1.6130799800577214, + "learning_rate": 2.4506861562297867e-07, + "loss": 0.4861, + "step": 53908 + }, + { + "epoch": 0.9315212883605197, + "grad_norm": 1.7788592827234277, + "learning_rate": 2.449454912943283e-07, + "loss": 0.2181, + "step": 53909 + }, + { + "epoch": 0.9315385678739286, + "grad_norm": 0.7955566133196704, + "learning_rate": 2.4482239751925095e-07, + "loss": 0.4393, + "step": 53910 + }, + { + "epoch": 0.9315558473873375, + "grad_norm": 0.9576955819282462, + "learning_rate": 2.4469933429812964e-07, + "loss": 0.3064, + "step": 53911 + }, + { + "epoch": 0.9315731269007465, + "grad_norm": 0.8098054478373767, + "learning_rate": 2.445763016313518e-07, + "loss": 0.3256, + "step": 53912 + }, + { + "epoch": 0.9315904064141554, + "grad_norm": 1.7763094201422986, + "learning_rate": 2.4445329951930277e-07, + "loss": 0.3118, + "step": 53913 + }, + { + "epoch": 0.9316076859275643, + "grad_norm": 1.1121156786382476, + "learning_rate": 2.4433032796236875e-07, + "loss": 0.4901, + "step": 53914 + }, + { + "epoch": 0.9316249654409732, + "grad_norm": 1.2921061370458122, + "learning_rate": 2.442073869609318e-07, + "loss": 0.2456, + "step": 53915 + }, + { + "epoch": 0.9316422449543821, + "grad_norm": 1.134870631036184, + "learning_rate": 2.4408447651537935e-07, + "loss": 0.4385, + "step": 53916 + }, + { + "epoch": 0.931659524467791, + "grad_norm": 1.296269648328832, + "learning_rate": 2.439615966260955e-07, + "loss": 0.3101, + "step": 53917 + }, + { + "epoch": 0.9316768039811999, + "grad_norm": 1.04385204503543, + "learning_rate": 2.438387472934667e-07, + "loss": 0.3484, + "step": 53918 + }, + { + "epoch": 0.9316940834946088, + "grad_norm": 1.0010517351296808, + "learning_rate": 2.4371592851787474e-07, + "loss": 0.2918, + "step": 53919 + }, + { + "epoch": 0.9317113630080177, + "grad_norm": 1.2489907063155026, + "learning_rate": 2.43593140299706e-07, + "loss": 0.3273, + "step": 53920 + }, + { + "epoch": 0.9317286425214266, + "grad_norm": 1.1803824800310656, + "learning_rate": 2.4347038263934587e-07, + "loss": 0.3269, + "step": 53921 + }, + { + "epoch": 0.9317459220348355, + "grad_norm": 1.4841425515449957, + "learning_rate": 2.433476555371772e-07, + "loss": 0.2806, + "step": 53922 + }, + { + "epoch": 0.9317632015482444, + "grad_norm": 1.1805796066212642, + "learning_rate": 2.4322495899358644e-07, + "loss": 0.4938, + "step": 53923 + }, + { + "epoch": 0.9317804810616533, + "grad_norm": 0.6448975050575922, + "learning_rate": 2.4310229300895436e-07, + "loss": 0.4164, + "step": 53924 + }, + { + "epoch": 0.9317977605750623, + "grad_norm": 1.181298018130354, + "learning_rate": 2.429796575836685e-07, + "loss": 0.3598, + "step": 53925 + }, + { + "epoch": 0.9318150400884712, + "grad_norm": 0.8018492819347565, + "learning_rate": 2.4285705271811177e-07, + "loss": 0.4182, + "step": 53926 + }, + { + "epoch": 0.9318323196018801, + "grad_norm": 1.1024950550529238, + "learning_rate": 2.427344784126695e-07, + "loss": 0.2634, + "step": 53927 + }, + { + "epoch": 0.9318495991152889, + "grad_norm": 1.5097978208254006, + "learning_rate": 2.426119346677236e-07, + "loss": 0.5816, + "step": 53928 + }, + { + "epoch": 0.9318668786286978, + "grad_norm": 2.3029212729970485, + "learning_rate": 2.4248942148365816e-07, + "loss": 0.4312, + "step": 53929 + }, + { + "epoch": 0.9318841581421067, + "grad_norm": 0.7864904888452922, + "learning_rate": 2.423669388608574e-07, + "loss": 0.5746, + "step": 53930 + }, + { + "epoch": 0.9319014376555156, + "grad_norm": 0.9224638051988301, + "learning_rate": 2.4224448679970535e-07, + "loss": 0.3247, + "step": 53931 + }, + { + "epoch": 0.9319187171689245, + "grad_norm": 1.1199681595701272, + "learning_rate": 2.421220653005851e-07, + "loss": 0.4076, + "step": 53932 + }, + { + "epoch": 0.9319359966823334, + "grad_norm": 1.1790917691239138, + "learning_rate": 2.4199967436387975e-07, + "loss": 0.3155, + "step": 53933 + }, + { + "epoch": 0.9319532761957423, + "grad_norm": 1.8780902963771016, + "learning_rate": 2.418773139899733e-07, + "loss": 0.3971, + "step": 53934 + }, + { + "epoch": 0.9319705557091512, + "grad_norm": 1.353874595689914, + "learning_rate": 2.417549841792488e-07, + "loss": 0.2922, + "step": 53935 + }, + { + "epoch": 0.9319878352225601, + "grad_norm": 0.9310902570916446, + "learning_rate": 2.4163268493208826e-07, + "loss": 0.3422, + "step": 53936 + }, + { + "epoch": 0.932005114735969, + "grad_norm": 1.7604274341276933, + "learning_rate": 2.4151041624887683e-07, + "loss": 0.2546, + "step": 53937 + }, + { + "epoch": 0.9320223942493779, + "grad_norm": 2.007275177797059, + "learning_rate": 2.413881781299965e-07, + "loss": 0.419, + "step": 53938 + }, + { + "epoch": 0.9320396737627868, + "grad_norm": 0.820419088113048, + "learning_rate": 2.4126597057583024e-07, + "loss": 0.8071, + "step": 53939 + }, + { + "epoch": 0.9320569532761958, + "grad_norm": 1.142948559118062, + "learning_rate": 2.4114379358676e-07, + "loss": 0.4701, + "step": 53940 + }, + { + "epoch": 0.9320742327896047, + "grad_norm": 1.2667509565755417, + "learning_rate": 2.410216471631688e-07, + "loss": 0.3625, + "step": 53941 + }, + { + "epoch": 0.9320915123030136, + "grad_norm": 1.2169415188845334, + "learning_rate": 2.4089953130543964e-07, + "loss": 0.3899, + "step": 53942 + }, + { + "epoch": 0.9321087918164225, + "grad_norm": 1.5996137148615057, + "learning_rate": 2.4077744601395557e-07, + "loss": 0.3439, + "step": 53943 + }, + { + "epoch": 0.9321260713298314, + "grad_norm": 2.9750014782588163, + "learning_rate": 2.406553912890974e-07, + "loss": 0.2343, + "step": 53944 + }, + { + "epoch": 0.9321433508432403, + "grad_norm": 1.5444257176174714, + "learning_rate": 2.405333671312493e-07, + "loss": 0.3379, + "step": 53945 + }, + { + "epoch": 0.9321606303566492, + "grad_norm": 1.1364956142856637, + "learning_rate": 2.404113735407909e-07, + "loss": 0.4756, + "step": 53946 + }, + { + "epoch": 0.9321779098700581, + "grad_norm": 1.033081420031257, + "learning_rate": 2.402894105181064e-07, + "loss": 0.2609, + "step": 53947 + }, + { + "epoch": 0.932195189383467, + "grad_norm": 2.0992757493386454, + "learning_rate": 2.4016747806357657e-07, + "loss": 0.283, + "step": 53948 + }, + { + "epoch": 0.9322124688968758, + "grad_norm": 1.605821740120638, + "learning_rate": 2.400455761775844e-07, + "loss": 0.3098, + "step": 53949 + }, + { + "epoch": 0.9322297484102847, + "grad_norm": 1.8905882839944275, + "learning_rate": 2.3992370486051187e-07, + "loss": 0.5062, + "step": 53950 + }, + { + "epoch": 0.9322470279236936, + "grad_norm": 0.9766028197397388, + "learning_rate": 2.3980186411273977e-07, + "loss": 0.3231, + "step": 53951 + }, + { + "epoch": 0.9322643074371025, + "grad_norm": 1.1030518569334626, + "learning_rate": 2.396800539346489e-07, + "loss": 0.5215, + "step": 53952 + }, + { + "epoch": 0.9322815869505114, + "grad_norm": 2.108351407947034, + "learning_rate": 2.3955827432662225e-07, + "loss": 0.5981, + "step": 53953 + }, + { + "epoch": 0.9322988664639204, + "grad_norm": 1.3138865080966748, + "learning_rate": 2.394365252890418e-07, + "loss": 0.2777, + "step": 53954 + }, + { + "epoch": 0.9323161459773293, + "grad_norm": 2.2998744359925323, + "learning_rate": 2.3931480682228726e-07, + "loss": 0.2682, + "step": 53955 + }, + { + "epoch": 0.9323334254907382, + "grad_norm": 1.300240762857062, + "learning_rate": 2.3919311892674155e-07, + "loss": 0.2639, + "step": 53956 + }, + { + "epoch": 0.9323507050041471, + "grad_norm": 1.374289031777035, + "learning_rate": 2.3907146160278336e-07, + "loss": 0.3666, + "step": 53957 + }, + { + "epoch": 0.932367984517556, + "grad_norm": 1.416610510500065, + "learning_rate": 2.3894983485079683e-07, + "loss": 0.3744, + "step": 53958 + }, + { + "epoch": 0.9323852640309649, + "grad_norm": 1.4083061567114583, + "learning_rate": 2.3882823867115936e-07, + "loss": 0.4719, + "step": 53959 + }, + { + "epoch": 0.9324025435443738, + "grad_norm": 1.7333747952314333, + "learning_rate": 2.38706673064254e-07, + "loss": 0.4308, + "step": 53960 + }, + { + "epoch": 0.9324198230577827, + "grad_norm": 1.8176638704967911, + "learning_rate": 2.385851380304627e-07, + "loss": 0.2528, + "step": 53961 + }, + { + "epoch": 0.9324371025711916, + "grad_norm": 1.4340998388945918, + "learning_rate": 2.384636335701629e-07, + "loss": 0.6407, + "step": 53962 + }, + { + "epoch": 0.9324543820846005, + "grad_norm": 1.354970271084253, + "learning_rate": 2.3834215968373765e-07, + "loss": 0.4232, + "step": 53963 + }, + { + "epoch": 0.9324716615980094, + "grad_norm": 1.8865423003334167, + "learning_rate": 2.3822071637156662e-07, + "loss": 0.2096, + "step": 53964 + }, + { + "epoch": 0.9324889411114183, + "grad_norm": 1.2685064128787074, + "learning_rate": 2.3809930363402955e-07, + "loss": 0.2132, + "step": 53965 + }, + { + "epoch": 0.9325062206248272, + "grad_norm": 1.9858857842996192, + "learning_rate": 2.379779214715072e-07, + "loss": 0.2423, + "step": 53966 + }, + { + "epoch": 0.9325235001382361, + "grad_norm": 1.0433663188006383, + "learning_rate": 2.3785656988438155e-07, + "loss": 0.2838, + "step": 53967 + }, + { + "epoch": 0.9325407796516451, + "grad_norm": 1.3770216029952405, + "learning_rate": 2.377352488730289e-07, + "loss": 0.307, + "step": 53968 + }, + { + "epoch": 0.932558059165054, + "grad_norm": 1.0358713308496972, + "learning_rate": 2.376139584378334e-07, + "loss": 0.8838, + "step": 53969 + }, + { + "epoch": 0.9325753386784628, + "grad_norm": 1.1324924116179547, + "learning_rate": 2.3749269857917147e-07, + "loss": 0.2968, + "step": 53970 + }, + { + "epoch": 0.9325926181918717, + "grad_norm": 1.3175760722567076, + "learning_rate": 2.3737146929742384e-07, + "loss": 0.3843, + "step": 53971 + }, + { + "epoch": 0.9326098977052806, + "grad_norm": 1.3722825798228793, + "learning_rate": 2.3725027059297247e-07, + "loss": 0.3159, + "step": 53972 + }, + { + "epoch": 0.9326271772186895, + "grad_norm": 1.1759572610565243, + "learning_rate": 2.371291024661937e-07, + "loss": 0.4647, + "step": 53973 + }, + { + "epoch": 0.9326444567320984, + "grad_norm": 1.5844307725471514, + "learning_rate": 2.3700796491746946e-07, + "loss": 0.2752, + "step": 53974 + }, + { + "epoch": 0.9326617362455073, + "grad_norm": 0.8937461863535154, + "learning_rate": 2.3688685794717837e-07, + "loss": 0.312, + "step": 53975 + }, + { + "epoch": 0.9326790157589162, + "grad_norm": 1.0920862959285726, + "learning_rate": 2.3676578155569786e-07, + "loss": 0.4123, + "step": 53976 + }, + { + "epoch": 0.9326962952723251, + "grad_norm": 1.2910871165797162, + "learning_rate": 2.3664473574340985e-07, + "loss": 0.1985, + "step": 53977 + }, + { + "epoch": 0.932713574785734, + "grad_norm": 1.2109096677141598, + "learning_rate": 2.3652372051069406e-07, + "loss": 0.2391, + "step": 53978 + }, + { + "epoch": 0.9327308542991429, + "grad_norm": 1.4505841131125792, + "learning_rate": 2.3640273585792573e-07, + "loss": 0.2896, + "step": 53979 + }, + { + "epoch": 0.9327481338125518, + "grad_norm": 1.0015512527130541, + "learning_rate": 2.3628178178548787e-07, + "loss": 0.297, + "step": 53980 + }, + { + "epoch": 0.9327654133259607, + "grad_norm": 1.4592198373179157, + "learning_rate": 2.361608582937569e-07, + "loss": 0.1893, + "step": 53981 + }, + { + "epoch": 0.9327826928393697, + "grad_norm": 1.6102508347619753, + "learning_rate": 2.3603996538311135e-07, + "loss": 0.2623, + "step": 53982 + }, + { + "epoch": 0.9327999723527786, + "grad_norm": 1.7304900494821565, + "learning_rate": 2.3591910305393206e-07, + "loss": 0.4185, + "step": 53983 + }, + { + "epoch": 0.9328172518661875, + "grad_norm": 1.1137094065128226, + "learning_rate": 2.3579827130659428e-07, + "loss": 0.2269, + "step": 53984 + }, + { + "epoch": 0.9328345313795964, + "grad_norm": 1.870839264260318, + "learning_rate": 2.35677470141481e-07, + "loss": 0.3291, + "step": 53985 + }, + { + "epoch": 0.9328518108930053, + "grad_norm": 1.0223586805361906, + "learning_rate": 2.3555669955896532e-07, + "loss": 0.4026, + "step": 53986 + }, + { + "epoch": 0.9328690904064142, + "grad_norm": 0.8813172040349043, + "learning_rate": 2.354359595594291e-07, + "loss": 0.3025, + "step": 53987 + }, + { + "epoch": 0.9328863699198231, + "grad_norm": 1.85965503483113, + "learning_rate": 2.3531525014324986e-07, + "loss": 0.3644, + "step": 53988 + }, + { + "epoch": 0.932903649433232, + "grad_norm": 1.5076036198634606, + "learning_rate": 2.3519457131080504e-07, + "loss": 0.8533, + "step": 53989 + }, + { + "epoch": 0.9329209289466409, + "grad_norm": 1.2353892803182176, + "learning_rate": 2.3507392306247322e-07, + "loss": 0.2908, + "step": 53990 + }, + { + "epoch": 0.9329382084600497, + "grad_norm": 1.312429955740681, + "learning_rate": 2.3495330539863194e-07, + "loss": 0.3831, + "step": 53991 + }, + { + "epoch": 0.9329554879734586, + "grad_norm": 1.4126501201820458, + "learning_rate": 2.348327183196586e-07, + "loss": 0.3567, + "step": 53992 + }, + { + "epoch": 0.9329727674868675, + "grad_norm": 0.9336282977216738, + "learning_rate": 2.347121618259307e-07, + "loss": 0.6439, + "step": 53993 + }, + { + "epoch": 0.9329900470002764, + "grad_norm": 1.7606709030422092, + "learning_rate": 2.3459163591782797e-07, + "loss": 0.4232, + "step": 53994 + }, + { + "epoch": 0.9330073265136853, + "grad_norm": 1.5571720530049034, + "learning_rate": 2.3447114059572562e-07, + "loss": 0.4696, + "step": 53995 + }, + { + "epoch": 0.9330246060270942, + "grad_norm": 4.123932221569128, + "learning_rate": 2.3435067586000226e-07, + "loss": 0.2916, + "step": 53996 + }, + { + "epoch": 0.9330418855405032, + "grad_norm": 1.3321479397931968, + "learning_rate": 2.3423024171103426e-07, + "loss": 0.3163, + "step": 53997 + }, + { + "epoch": 0.9330591650539121, + "grad_norm": 1.2061666885967894, + "learning_rate": 2.3410983814920019e-07, + "loss": 0.2854, + "step": 53998 + }, + { + "epoch": 0.933076444567321, + "grad_norm": 1.6595998558332405, + "learning_rate": 2.339894651748742e-07, + "loss": 0.4203, + "step": 53999 + }, + { + "epoch": 0.9330937240807299, + "grad_norm": 1.309094016824861, + "learning_rate": 2.338691227884371e-07, + "loss": 0.6666, + "step": 54000 + }, + { + "epoch": 0.9331110035941388, + "grad_norm": 2.0165823802832645, + "learning_rate": 2.3374881099026414e-07, + "loss": 0.321, + "step": 54001 + }, + { + "epoch": 0.9331282831075477, + "grad_norm": 1.4411018980282297, + "learning_rate": 2.336285297807317e-07, + "loss": 0.3574, + "step": 54002 + }, + { + "epoch": 0.9331455626209566, + "grad_norm": 2.110101583084888, + "learning_rate": 2.3350827916021724e-07, + "loss": 0.5713, + "step": 54003 + }, + { + "epoch": 0.9331628421343655, + "grad_norm": 1.5330036063559371, + "learning_rate": 2.333880591290971e-07, + "loss": 0.4048, + "step": 54004 + }, + { + "epoch": 0.9331801216477744, + "grad_norm": 0.8965455978145414, + "learning_rate": 2.3326786968774662e-07, + "loss": 0.4587, + "step": 54005 + }, + { + "epoch": 0.9331974011611833, + "grad_norm": 1.9473219356002, + "learning_rate": 2.331477108365443e-07, + "loss": 0.3701, + "step": 54006 + }, + { + "epoch": 0.9332146806745922, + "grad_norm": 1.5182522023927028, + "learning_rate": 2.3302758257586655e-07, + "loss": 0.3418, + "step": 54007 + }, + { + "epoch": 0.9332319601880011, + "grad_norm": 1.64781831071273, + "learning_rate": 2.3290748490608638e-07, + "loss": 0.5396, + "step": 54008 + }, + { + "epoch": 0.93324923970141, + "grad_norm": 1.3123227405642923, + "learning_rate": 2.3278741782758463e-07, + "loss": 0.3885, + "step": 54009 + }, + { + "epoch": 0.933266519214819, + "grad_norm": 0.8550980472186251, + "learning_rate": 2.326673813407332e-07, + "loss": 0.3998, + "step": 54010 + }, + { + "epoch": 0.9332837987282279, + "grad_norm": 1.6664837281840539, + "learning_rate": 2.3254737544591066e-07, + "loss": 0.3789, + "step": 54011 + }, + { + "epoch": 0.9333010782416367, + "grad_norm": 1.8675708409369565, + "learning_rate": 2.324274001434923e-07, + "loss": 0.2336, + "step": 54012 + }, + { + "epoch": 0.9333183577550456, + "grad_norm": 1.4888780153111916, + "learning_rate": 2.3230745543385335e-07, + "loss": 0.4457, + "step": 54013 + }, + { + "epoch": 0.9333356372684545, + "grad_norm": 1.1566454010730447, + "learning_rate": 2.321875413173702e-07, + "loss": 0.3706, + "step": 54014 + }, + { + "epoch": 0.9333529167818634, + "grad_norm": 1.5784556254947808, + "learning_rate": 2.32067657794417e-07, + "loss": 0.4417, + "step": 54015 + }, + { + "epoch": 0.9333701962952723, + "grad_norm": 1.123756110455467, + "learning_rate": 2.3194780486537006e-07, + "loss": 0.5247, + "step": 54016 + }, + { + "epoch": 0.9333874758086812, + "grad_norm": 1.0905299298450228, + "learning_rate": 2.3182798253060467e-07, + "loss": 0.2576, + "step": 54017 + }, + { + "epoch": 0.9334047553220901, + "grad_norm": 0.6699394840196792, + "learning_rate": 2.3170819079049834e-07, + "loss": 0.1863, + "step": 54018 + }, + { + "epoch": 0.933422034835499, + "grad_norm": 1.2234525992958747, + "learning_rate": 2.3158842964542295e-07, + "loss": 0.2933, + "step": 54019 + }, + { + "epoch": 0.9334393143489079, + "grad_norm": 1.2960648976154485, + "learning_rate": 2.3146869909575598e-07, + "loss": 0.4367, + "step": 54020 + }, + { + "epoch": 0.9334565938623168, + "grad_norm": 1.1664336398304185, + "learning_rate": 2.313489991418705e-07, + "loss": 0.3908, + "step": 54021 + }, + { + "epoch": 0.9334738733757257, + "grad_norm": 1.2024346167053164, + "learning_rate": 2.312293297841417e-07, + "loss": 0.3834, + "step": 54022 + }, + { + "epoch": 0.9334911528891346, + "grad_norm": 1.6862192047057254, + "learning_rate": 2.311096910229471e-07, + "loss": 0.313, + "step": 54023 + }, + { + "epoch": 0.9335084324025436, + "grad_norm": 1.8722640341528871, + "learning_rate": 2.3099008285865753e-07, + "loss": 0.3383, + "step": 54024 + }, + { + "epoch": 0.9335257119159525, + "grad_norm": 0.9927235589721355, + "learning_rate": 2.3087050529165044e-07, + "loss": 0.3025, + "step": 54025 + }, + { + "epoch": 0.9335429914293614, + "grad_norm": 1.175920707715071, + "learning_rate": 2.3075095832229887e-07, + "loss": 0.4328, + "step": 54026 + }, + { + "epoch": 0.9335602709427703, + "grad_norm": 1.9075975985270848, + "learning_rate": 2.3063144195097697e-07, + "loss": 0.3177, + "step": 54027 + }, + { + "epoch": 0.9335775504561792, + "grad_norm": 1.327228546861557, + "learning_rate": 2.3051195617806111e-07, + "loss": 0.3333, + "step": 54028 + }, + { + "epoch": 0.9335948299695881, + "grad_norm": 1.4530442300343704, + "learning_rate": 2.3039250100392428e-07, + "loss": 0.3666, + "step": 54029 + }, + { + "epoch": 0.933612109482997, + "grad_norm": 1.1989573699084106, + "learning_rate": 2.302730764289396e-07, + "loss": 0.2469, + "step": 54030 + }, + { + "epoch": 0.9336293889964059, + "grad_norm": 1.2732912962780336, + "learning_rate": 2.3015368245348336e-07, + "loss": 0.3885, + "step": 54031 + }, + { + "epoch": 0.9336466685098148, + "grad_norm": 1.2173681620812569, + "learning_rate": 2.300343190779275e-07, + "loss": 0.2868, + "step": 54032 + }, + { + "epoch": 0.9336639480232236, + "grad_norm": 1.1462173758425318, + "learning_rate": 2.299149863026462e-07, + "loss": 0.1876, + "step": 54033 + }, + { + "epoch": 0.9336812275366325, + "grad_norm": 0.8796182351895974, + "learning_rate": 2.2979568412801468e-07, + "loss": 0.3846, + "step": 54034 + }, + { + "epoch": 0.9336985070500414, + "grad_norm": 1.1510230391950895, + "learning_rate": 2.2967641255440486e-07, + "loss": 0.4099, + "step": 54035 + }, + { + "epoch": 0.9337157865634503, + "grad_norm": 1.5281340723676409, + "learning_rate": 2.2955717158219204e-07, + "loss": 0.4145, + "step": 54036 + }, + { + "epoch": 0.9337330660768592, + "grad_norm": 1.0322563518109211, + "learning_rate": 2.294379612117481e-07, + "loss": 0.4838, + "step": 54037 + }, + { + "epoch": 0.9337503455902681, + "grad_norm": 1.3537180389569354, + "learning_rate": 2.293187814434472e-07, + "loss": 0.3969, + "step": 54038 + }, + { + "epoch": 0.9337676251036771, + "grad_norm": 1.022204587442684, + "learning_rate": 2.2919963227766018e-07, + "loss": 0.3323, + "step": 54039 + }, + { + "epoch": 0.933784904617086, + "grad_norm": 1.289521920742082, + "learning_rate": 2.290805137147656e-07, + "loss": 0.1895, + "step": 54040 + }, + { + "epoch": 0.9338021841304949, + "grad_norm": 1.5706785160853423, + "learning_rate": 2.2896142575513203e-07, + "loss": 0.3725, + "step": 54041 + }, + { + "epoch": 0.9338194636439038, + "grad_norm": 1.1213725331830562, + "learning_rate": 2.2884236839913364e-07, + "loss": 0.8164, + "step": 54042 + }, + { + "epoch": 0.9338367431573127, + "grad_norm": 1.4272734536167653, + "learning_rate": 2.287233416471446e-07, + "loss": 0.1849, + "step": 54043 + }, + { + "epoch": 0.9338540226707216, + "grad_norm": 1.20052803908278, + "learning_rate": 2.286043454995357e-07, + "loss": 0.7933, + "step": 54044 + }, + { + "epoch": 0.9338713021841305, + "grad_norm": 1.9647373586431467, + "learning_rate": 2.2848537995668107e-07, + "loss": 0.2766, + "step": 54045 + }, + { + "epoch": 0.9338885816975394, + "grad_norm": 2.1982754728408502, + "learning_rate": 2.2836644501895156e-07, + "loss": 0.3322, + "step": 54046 + }, + { + "epoch": 0.9339058612109483, + "grad_norm": 1.076709758006572, + "learning_rate": 2.282475406867224e-07, + "loss": 0.4684, + "step": 54047 + }, + { + "epoch": 0.9339231407243572, + "grad_norm": 1.199688508050007, + "learning_rate": 2.281286669603644e-07, + "loss": 0.5069, + "step": 54048 + }, + { + "epoch": 0.9339404202377661, + "grad_norm": 1.4018647335829044, + "learning_rate": 2.280098238402506e-07, + "loss": 0.3002, + "step": 54049 + }, + { + "epoch": 0.933957699751175, + "grad_norm": 1.8016281756812025, + "learning_rate": 2.2789101132675074e-07, + "loss": 0.3625, + "step": 54050 + }, + { + "epoch": 0.933974979264584, + "grad_norm": 1.625698912072099, + "learning_rate": 2.2777222942024e-07, + "loss": 0.4496, + "step": 54051 + }, + { + "epoch": 0.9339922587779929, + "grad_norm": 1.1764141062910727, + "learning_rate": 2.276534781210904e-07, + "loss": 0.3511, + "step": 54052 + }, + { + "epoch": 0.9340095382914018, + "grad_norm": 1.412095312825716, + "learning_rate": 2.2753475742967046e-07, + "loss": 0.4207, + "step": 54053 + }, + { + "epoch": 0.9340268178048107, + "grad_norm": 1.2414826246105077, + "learning_rate": 2.2741606734635657e-07, + "loss": 0.3202, + "step": 54054 + }, + { + "epoch": 0.9340440973182195, + "grad_norm": 1.9741883836568628, + "learning_rate": 2.2729740787151733e-07, + "loss": 0.2973, + "step": 54055 + }, + { + "epoch": 0.9340613768316284, + "grad_norm": 1.9856192048334842, + "learning_rate": 2.2717877900552466e-07, + "loss": 0.527, + "step": 54056 + }, + { + "epoch": 0.9340786563450373, + "grad_norm": 1.6455463879196797, + "learning_rate": 2.2706018074875046e-07, + "loss": 0.153, + "step": 54057 + }, + { + "epoch": 0.9340959358584462, + "grad_norm": 1.3566787535440685, + "learning_rate": 2.2694161310156782e-07, + "loss": 0.7159, + "step": 54058 + }, + { + "epoch": 0.9341132153718551, + "grad_norm": 1.9541016357969707, + "learning_rate": 2.268230760643464e-07, + "loss": 0.329, + "step": 54059 + }, + { + "epoch": 0.934130494885264, + "grad_norm": 1.921118173436832, + "learning_rate": 2.2670456963745814e-07, + "loss": 0.2755, + "step": 54060 + }, + { + "epoch": 0.9341477743986729, + "grad_norm": 0.9445012635187937, + "learning_rate": 2.2658609382127384e-07, + "loss": 0.3138, + "step": 54061 + }, + { + "epoch": 0.9341650539120818, + "grad_norm": 2.441529862456838, + "learning_rate": 2.2646764861616322e-07, + "loss": 0.2203, + "step": 54062 + }, + { + "epoch": 0.9341823334254907, + "grad_norm": 1.4164578178785423, + "learning_rate": 2.263492340225004e-07, + "loss": 0.4295, + "step": 54063 + }, + { + "epoch": 0.9341996129388996, + "grad_norm": 1.228304831998603, + "learning_rate": 2.2623085004065403e-07, + "loss": 0.3214, + "step": 54064 + }, + { + "epoch": 0.9342168924523085, + "grad_norm": 1.452336114304996, + "learning_rate": 2.26112496670996e-07, + "loss": 0.4825, + "step": 54065 + }, + { + "epoch": 0.9342341719657175, + "grad_norm": 1.531823256781994, + "learning_rate": 2.259941739138949e-07, + "loss": 0.3944, + "step": 54066 + }, + { + "epoch": 0.9342514514791264, + "grad_norm": 1.842766322942424, + "learning_rate": 2.2587588176972374e-07, + "loss": 0.3803, + "step": 54067 + }, + { + "epoch": 0.9342687309925353, + "grad_norm": 2.7341511868546995, + "learning_rate": 2.2575762023885228e-07, + "loss": 0.4511, + "step": 54068 + }, + { + "epoch": 0.9342860105059442, + "grad_norm": 1.432723557574775, + "learning_rate": 2.2563938932165018e-07, + "loss": 0.4743, + "step": 54069 + }, + { + "epoch": 0.9343032900193531, + "grad_norm": 1.5894681489344191, + "learning_rate": 2.255211890184883e-07, + "loss": 0.3737, + "step": 54070 + }, + { + "epoch": 0.934320569532762, + "grad_norm": 1.202147650839114, + "learning_rate": 2.254030193297385e-07, + "loss": 0.3001, + "step": 54071 + }, + { + "epoch": 0.9343378490461709, + "grad_norm": 0.9188470426746822, + "learning_rate": 2.2528488025576722e-07, + "loss": 0.256, + "step": 54072 + }, + { + "epoch": 0.9343551285595798, + "grad_norm": 1.625150444205805, + "learning_rate": 2.2516677179694745e-07, + "loss": 0.4148, + "step": 54073 + }, + { + "epoch": 0.9343724080729887, + "grad_norm": 1.4975922106914568, + "learning_rate": 2.2504869395364892e-07, + "loss": 0.3861, + "step": 54074 + }, + { + "epoch": 0.9343896875863976, + "grad_norm": 1.2247099451305044, + "learning_rate": 2.249306467262402e-07, + "loss": 0.4423, + "step": 54075 + }, + { + "epoch": 0.9344069670998064, + "grad_norm": 1.1571162761919602, + "learning_rate": 2.2481263011509102e-07, + "loss": 0.317, + "step": 54076 + }, + { + "epoch": 0.9344242466132153, + "grad_norm": 2.5150385576940364, + "learning_rate": 2.2469464412057217e-07, + "loss": 0.3717, + "step": 54077 + }, + { + "epoch": 0.9344415261266242, + "grad_norm": 1.8412409213736942, + "learning_rate": 2.2457668874305226e-07, + "loss": 0.4595, + "step": 54078 + }, + { + "epoch": 0.9344588056400331, + "grad_norm": 1.4078748748020677, + "learning_rate": 2.244587639828999e-07, + "loss": 0.4176, + "step": 54079 + }, + { + "epoch": 0.934476085153442, + "grad_norm": 1.9817306392999063, + "learning_rate": 2.243408698404881e-07, + "loss": 0.3237, + "step": 54080 + }, + { + "epoch": 0.934493364666851, + "grad_norm": 1.6240646820817246, + "learning_rate": 2.242230063161821e-07, + "loss": 0.8844, + "step": 54081 + }, + { + "epoch": 0.9345106441802599, + "grad_norm": 1.4407236468871047, + "learning_rate": 2.2410517341035387e-07, + "loss": 0.2473, + "step": 54082 + }, + { + "epoch": 0.9345279236936688, + "grad_norm": 0.7496417905718301, + "learning_rate": 2.2398737112336977e-07, + "loss": 0.377, + "step": 54083 + }, + { + "epoch": 0.9345452032070777, + "grad_norm": 2.195689062934177, + "learning_rate": 2.238695994556006e-07, + "loss": 0.2937, + "step": 54084 + }, + { + "epoch": 0.9345624827204866, + "grad_norm": 1.2043832722195662, + "learning_rate": 2.2375185840741498e-07, + "loss": 0.1696, + "step": 54085 + }, + { + "epoch": 0.9345797622338955, + "grad_norm": 1.7217867476110988, + "learning_rate": 2.2363414797918036e-07, + "loss": 0.2062, + "step": 54086 + }, + { + "epoch": 0.9345970417473044, + "grad_norm": 1.6029219226280795, + "learning_rate": 2.2351646817126872e-07, + "loss": 0.3102, + "step": 54087 + }, + { + "epoch": 0.9346143212607133, + "grad_norm": 1.8232576105949962, + "learning_rate": 2.2339881898404413e-07, + "loss": 0.5036, + "step": 54088 + }, + { + "epoch": 0.9346316007741222, + "grad_norm": 0.9866164497892957, + "learning_rate": 2.2328120041787972e-07, + "loss": 0.5742, + "step": 54089 + }, + { + "epoch": 0.9346488802875311, + "grad_norm": 1.1686563982726124, + "learning_rate": 2.2316361247313955e-07, + "loss": 0.3379, + "step": 54090 + }, + { + "epoch": 0.93466615980094, + "grad_norm": 1.9729888292310114, + "learning_rate": 2.2304605515019452e-07, + "loss": 0.3894, + "step": 54091 + }, + { + "epoch": 0.9346834393143489, + "grad_norm": 1.2077141354890701, + "learning_rate": 2.2292852844941316e-07, + "loss": 0.4159, + "step": 54092 + }, + { + "epoch": 0.9347007188277578, + "grad_norm": 1.0988404723460967, + "learning_rate": 2.2281103237116185e-07, + "loss": 0.3732, + "step": 54093 + }, + { + "epoch": 0.9347179983411668, + "grad_norm": 1.578718355223525, + "learning_rate": 2.2269356691581034e-07, + "loss": 0.3604, + "step": 54094 + }, + { + "epoch": 0.9347352778545757, + "grad_norm": 1.3333693063107408, + "learning_rate": 2.2257613208372497e-07, + "loss": 0.4613, + "step": 54095 + }, + { + "epoch": 0.9347525573679846, + "grad_norm": 0.6248051883942108, + "learning_rate": 2.2245872787527433e-07, + "loss": 0.7292, + "step": 54096 + }, + { + "epoch": 0.9347698368813934, + "grad_norm": 1.8118962654118376, + "learning_rate": 2.2234135429082482e-07, + "loss": 0.2732, + "step": 54097 + }, + { + "epoch": 0.9347871163948023, + "grad_norm": 1.1789699488989418, + "learning_rate": 2.2222401133074723e-07, + "loss": 0.3623, + "step": 54098 + }, + { + "epoch": 0.9348043959082112, + "grad_norm": 1.891430136764077, + "learning_rate": 2.221066989954057e-07, + "loss": 0.1684, + "step": 54099 + }, + { + "epoch": 0.9348216754216201, + "grad_norm": 2.4410332496578597, + "learning_rate": 2.219894172851711e-07, + "loss": 0.2549, + "step": 54100 + }, + { + "epoch": 0.934838954935029, + "grad_norm": 1.3884211401962765, + "learning_rate": 2.2187216620040642e-07, + "loss": 0.4387, + "step": 54101 + }, + { + "epoch": 0.9348562344484379, + "grad_norm": 2.1068456195296896, + "learning_rate": 2.2175494574148137e-07, + "loss": 0.4388, + "step": 54102 + }, + { + "epoch": 0.9348735139618468, + "grad_norm": 1.7529095630953824, + "learning_rate": 2.2163775590876458e-07, + "loss": 0.3275, + "step": 54103 + }, + { + "epoch": 0.9348907934752557, + "grad_norm": 2.1102544810708173, + "learning_rate": 2.2152059670262016e-07, + "loss": 0.2738, + "step": 54104 + }, + { + "epoch": 0.9349080729886646, + "grad_norm": 1.5751867288139914, + "learning_rate": 2.2140346812341785e-07, + "loss": 0.2115, + "step": 54105 + }, + { + "epoch": 0.9349253525020735, + "grad_norm": 1.3133644829360573, + "learning_rate": 2.2128637017152177e-07, + "loss": 0.3686, + "step": 54106 + }, + { + "epoch": 0.9349426320154824, + "grad_norm": 0.9216436664046939, + "learning_rate": 2.2116930284729942e-07, + "loss": 0.3218, + "step": 54107 + }, + { + "epoch": 0.9349599115288914, + "grad_norm": 1.201308493597922, + "learning_rate": 2.210522661511183e-07, + "loss": 0.2227, + "step": 54108 + }, + { + "epoch": 0.9349771910423003, + "grad_norm": 1.157353692704792, + "learning_rate": 2.2093526008334587e-07, + "loss": 0.3855, + "step": 54109 + }, + { + "epoch": 0.9349944705557092, + "grad_norm": 1.1829375678157334, + "learning_rate": 2.2081828464434628e-07, + "loss": 0.2579, + "step": 54110 + }, + { + "epoch": 0.9350117500691181, + "grad_norm": 1.3171844621602586, + "learning_rate": 2.2070133983448704e-07, + "loss": 0.2559, + "step": 54111 + }, + { + "epoch": 0.935029029582527, + "grad_norm": 1.2066717325422696, + "learning_rate": 2.2058442565413453e-07, + "loss": 0.2971, + "step": 54112 + }, + { + "epoch": 0.9350463090959359, + "grad_norm": 2.128740700107327, + "learning_rate": 2.2046754210365507e-07, + "loss": 0.2609, + "step": 54113 + }, + { + "epoch": 0.9350635886093448, + "grad_norm": 1.0959321576699061, + "learning_rate": 2.2035068918341505e-07, + "loss": 0.3677, + "step": 54114 + }, + { + "epoch": 0.9350808681227537, + "grad_norm": 1.003375201884579, + "learning_rate": 2.2023386689377869e-07, + "loss": 0.4006, + "step": 54115 + }, + { + "epoch": 0.9350981476361626, + "grad_norm": 1.3052788070887977, + "learning_rate": 2.201170752351134e-07, + "loss": 0.4347, + "step": 54116 + }, + { + "epoch": 0.9351154271495715, + "grad_norm": 1.1842865540257368, + "learning_rate": 2.2000031420778556e-07, + "loss": 0.3943, + "step": 54117 + }, + { + "epoch": 0.9351327066629803, + "grad_norm": 0.9474571993149504, + "learning_rate": 2.1988358381215934e-07, + "loss": 0.3456, + "step": 54118 + }, + { + "epoch": 0.9351499861763892, + "grad_norm": 0.5258209401446378, + "learning_rate": 2.1976688404860004e-07, + "loss": 0.7235, + "step": 54119 + }, + { + "epoch": 0.9351672656897981, + "grad_norm": 1.3500354747496306, + "learning_rate": 2.1965021491747506e-07, + "loss": 0.4422, + "step": 54120 + }, + { + "epoch": 0.935184545203207, + "grad_norm": 1.8844760640368488, + "learning_rate": 2.1953357641914863e-07, + "loss": 0.34, + "step": 54121 + }, + { + "epoch": 0.935201824716616, + "grad_norm": 1.6734754191142267, + "learning_rate": 2.1941696855398708e-07, + "loss": 0.3248, + "step": 54122 + }, + { + "epoch": 0.9352191042300249, + "grad_norm": 1.811780378439793, + "learning_rate": 2.193003913223546e-07, + "loss": 0.2832, + "step": 54123 + }, + { + "epoch": 0.9352363837434338, + "grad_norm": 1.8979214250975793, + "learning_rate": 2.191838447246164e-07, + "loss": 0.2147, + "step": 54124 + }, + { + "epoch": 0.9352536632568427, + "grad_norm": 0.9927107515759841, + "learning_rate": 2.1906732876113778e-07, + "loss": 0.3889, + "step": 54125 + }, + { + "epoch": 0.9352709427702516, + "grad_norm": 1.3604823129866426, + "learning_rate": 2.189508434322829e-07, + "loss": 0.45, + "step": 54126 + }, + { + "epoch": 0.9352882222836605, + "grad_norm": 0.8805176007134644, + "learning_rate": 2.1883438873841811e-07, + "loss": 0.3085, + "step": 54127 + }, + { + "epoch": 0.9353055017970694, + "grad_norm": 2.4719011727880242, + "learning_rate": 2.187179646799076e-07, + "loss": 0.3711, + "step": 54128 + }, + { + "epoch": 0.9353227813104783, + "grad_norm": 1.164957373712064, + "learning_rate": 2.1860157125711544e-07, + "loss": 0.21, + "step": 54129 + }, + { + "epoch": 0.9353400608238872, + "grad_norm": 1.15964298413851, + "learning_rate": 2.184852084704059e-07, + "loss": 0.4503, + "step": 54130 + }, + { + "epoch": 0.9353573403372961, + "grad_norm": 1.0170238029305583, + "learning_rate": 2.1836887632014414e-07, + "loss": 0.3477, + "step": 54131 + }, + { + "epoch": 0.935374619850705, + "grad_norm": 1.1085471325077378, + "learning_rate": 2.1825257480669439e-07, + "loss": 0.438, + "step": 54132 + }, + { + "epoch": 0.9353918993641139, + "grad_norm": 2.267586640095292, + "learning_rate": 2.1813630393042295e-07, + "loss": 0.2565, + "step": 54133 + }, + { + "epoch": 0.9354091788775228, + "grad_norm": 1.4660035450299786, + "learning_rate": 2.180200636916907e-07, + "loss": 0.1988, + "step": 54134 + }, + { + "epoch": 0.9354264583909317, + "grad_norm": 1.4739900362781002, + "learning_rate": 2.179038540908629e-07, + "loss": 0.3754, + "step": 54135 + }, + { + "epoch": 0.9354437379043407, + "grad_norm": 2.870528632823095, + "learning_rate": 2.1778767512830257e-07, + "loss": 0.5491, + "step": 54136 + }, + { + "epoch": 0.9354610174177496, + "grad_norm": 2.02235579616981, + "learning_rate": 2.1767152680437498e-07, + "loss": 0.4093, + "step": 54137 + }, + { + "epoch": 0.9354782969311585, + "grad_norm": 1.3715686380439933, + "learning_rate": 2.175554091194454e-07, + "loss": 0.2416, + "step": 54138 + }, + { + "epoch": 0.9354955764445673, + "grad_norm": 1.4354582929579607, + "learning_rate": 2.1743932207387464e-07, + "loss": 0.4013, + "step": 54139 + }, + { + "epoch": 0.9355128559579762, + "grad_norm": 1.3535401313523026, + "learning_rate": 2.1732326566802686e-07, + "loss": 0.2749, + "step": 54140 + }, + { + "epoch": 0.9355301354713851, + "grad_norm": 1.1041654290082665, + "learning_rate": 2.1720723990226623e-07, + "loss": 0.4641, + "step": 54141 + }, + { + "epoch": 0.935547414984794, + "grad_norm": 1.6552804441969957, + "learning_rate": 2.1709124477695686e-07, + "loss": 0.3972, + "step": 54142 + }, + { + "epoch": 0.9355646944982029, + "grad_norm": 1.3985544490800605, + "learning_rate": 2.1697528029245962e-07, + "loss": 0.345, + "step": 54143 + }, + { + "epoch": 0.9355819740116118, + "grad_norm": 1.6305995816971839, + "learning_rate": 2.1685934644914087e-07, + "loss": 0.2923, + "step": 54144 + }, + { + "epoch": 0.9355992535250207, + "grad_norm": 1.7211501247144012, + "learning_rate": 2.1674344324736252e-07, + "loss": 0.1504, + "step": 54145 + }, + { + "epoch": 0.9356165330384296, + "grad_norm": 1.314110740664062, + "learning_rate": 2.166275706874854e-07, + "loss": 0.2525, + "step": 54146 + }, + { + "epoch": 0.9356338125518385, + "grad_norm": 1.094152454007707, + "learning_rate": 2.165117287698748e-07, + "loss": 0.4423, + "step": 54147 + }, + { + "epoch": 0.9356510920652474, + "grad_norm": 1.8037955708788294, + "learning_rate": 2.1639591749489265e-07, + "loss": 0.3509, + "step": 54148 + }, + { + "epoch": 0.9356683715786563, + "grad_norm": 1.5286744287298533, + "learning_rate": 2.1628013686290305e-07, + "loss": 0.486, + "step": 54149 + }, + { + "epoch": 0.9356856510920653, + "grad_norm": 1.1488000211220537, + "learning_rate": 2.1616438687426688e-07, + "loss": 0.3067, + "step": 54150 + }, + { + "epoch": 0.9357029306054742, + "grad_norm": 1.8647104523977336, + "learning_rate": 2.1604866752934828e-07, + "loss": 0.1495, + "step": 54151 + }, + { + "epoch": 0.9357202101188831, + "grad_norm": 1.8566463497982852, + "learning_rate": 2.1593297882850805e-07, + "loss": 0.2399, + "step": 54152 + }, + { + "epoch": 0.935737489632292, + "grad_norm": 1.1336195070836321, + "learning_rate": 2.1581732077210926e-07, + "loss": 0.4656, + "step": 54153 + }, + { + "epoch": 0.9357547691457009, + "grad_norm": 1.9102448179087155, + "learning_rate": 2.1570169336051383e-07, + "loss": 0.2453, + "step": 54154 + }, + { + "epoch": 0.9357720486591098, + "grad_norm": 1.4286817676391637, + "learning_rate": 2.1558609659408592e-07, + "loss": 0.9525, + "step": 54155 + }, + { + "epoch": 0.9357893281725187, + "grad_norm": 1.3181925228771216, + "learning_rate": 2.1547053047318522e-07, + "loss": 0.2824, + "step": 54156 + }, + { + "epoch": 0.9358066076859276, + "grad_norm": 1.282141269739197, + "learning_rate": 2.1535499499817368e-07, + "loss": 0.3107, + "step": 54157 + }, + { + "epoch": 0.9358238871993365, + "grad_norm": 1.2387470565927632, + "learning_rate": 2.1523949016941436e-07, + "loss": 0.2948, + "step": 54158 + }, + { + "epoch": 0.9358411667127454, + "grad_norm": 1.3951498517192518, + "learning_rate": 2.1512401598726806e-07, + "loss": 0.3597, + "step": 54159 + }, + { + "epoch": 0.9358584462261542, + "grad_norm": 1.1431918574246545, + "learning_rate": 2.1500857245209783e-07, + "loss": 0.3919, + "step": 54160 + }, + { + "epoch": 0.9358757257395631, + "grad_norm": 1.384690058435274, + "learning_rate": 2.1489315956426337e-07, + "loss": 0.3367, + "step": 54161 + }, + { + "epoch": 0.935893005252972, + "grad_norm": 1.3085247738335983, + "learning_rate": 2.1477777732412775e-07, + "loss": 0.3519, + "step": 54162 + }, + { + "epoch": 0.9359102847663809, + "grad_norm": 2.029698477661667, + "learning_rate": 2.1466242573205177e-07, + "loss": 0.269, + "step": 54163 + }, + { + "epoch": 0.9359275642797898, + "grad_norm": 1.6693550865963886, + "learning_rate": 2.1454710478839736e-07, + "loss": 0.3687, + "step": 54164 + }, + { + "epoch": 0.9359448437931988, + "grad_norm": 1.4775621328131252, + "learning_rate": 2.1443181449352423e-07, + "loss": 0.2395, + "step": 54165 + }, + { + "epoch": 0.9359621233066077, + "grad_norm": 2.608048734552202, + "learning_rate": 2.1431655484779435e-07, + "loss": 0.5406, + "step": 54166 + }, + { + "epoch": 0.9359794028200166, + "grad_norm": 1.8958548132051123, + "learning_rate": 2.1420132585156962e-07, + "loss": 0.4371, + "step": 54167 + }, + { + "epoch": 0.9359966823334255, + "grad_norm": 1.3712573544345, + "learning_rate": 2.1408612750520863e-07, + "loss": 0.2996, + "step": 54168 + }, + { + "epoch": 0.9360139618468344, + "grad_norm": 1.9764219523610107, + "learning_rate": 2.1397095980907445e-07, + "loss": 0.3759, + "step": 54169 + }, + { + "epoch": 0.9360312413602433, + "grad_norm": 1.0864213351799266, + "learning_rate": 2.138558227635268e-07, + "loss": 0.2311, + "step": 54170 + }, + { + "epoch": 0.9360485208736522, + "grad_norm": 1.200171459743008, + "learning_rate": 2.1374071636892645e-07, + "loss": 0.5743, + "step": 54171 + }, + { + "epoch": 0.9360658003870611, + "grad_norm": 1.1051747082739953, + "learning_rate": 2.1362564062563317e-07, + "loss": 0.3057, + "step": 54172 + }, + { + "epoch": 0.93608307990047, + "grad_norm": 1.0554607873631368, + "learning_rate": 2.1351059553401e-07, + "loss": 0.2193, + "step": 54173 + }, + { + "epoch": 0.9361003594138789, + "grad_norm": 1.3528307605544199, + "learning_rate": 2.1339558109441329e-07, + "loss": 0.6537, + "step": 54174 + }, + { + "epoch": 0.9361176389272878, + "grad_norm": 1.215752673210472, + "learning_rate": 2.1328059730720719e-07, + "loss": 0.4636, + "step": 54175 + }, + { + "epoch": 0.9361349184406967, + "grad_norm": 1.2216603108134465, + "learning_rate": 2.131656441727481e-07, + "loss": 0.3216, + "step": 54176 + }, + { + "epoch": 0.9361521979541056, + "grad_norm": 1.3456693878629704, + "learning_rate": 2.1305072169139907e-07, + "loss": 0.3626, + "step": 54177 + }, + { + "epoch": 0.9361694774675146, + "grad_norm": 0.916527711669312, + "learning_rate": 2.129358298635198e-07, + "loss": 0.2579, + "step": 54178 + }, + { + "epoch": 0.9361867569809235, + "grad_norm": 3.252856462764982, + "learning_rate": 2.1282096868946779e-07, + "loss": 0.3304, + "step": 54179 + }, + { + "epoch": 0.9362040364943324, + "grad_norm": 1.176830655836375, + "learning_rate": 2.1270613816960495e-07, + "loss": 0.3402, + "step": 54180 + }, + { + "epoch": 0.9362213160077412, + "grad_norm": 1.9287627780713517, + "learning_rate": 2.125913383042899e-07, + "loss": 0.3659, + "step": 54181 + }, + { + "epoch": 0.9362385955211501, + "grad_norm": 1.309739463138566, + "learning_rate": 2.1247656909388347e-07, + "loss": 0.3373, + "step": 54182 + }, + { + "epoch": 0.936255875034559, + "grad_norm": 1.9451790089375183, + "learning_rate": 2.1236183053874315e-07, + "loss": 0.2362, + "step": 54183 + }, + { + "epoch": 0.9362731545479679, + "grad_norm": 1.9791369784703379, + "learning_rate": 2.1224712263923086e-07, + "loss": 0.383, + "step": 54184 + }, + { + "epoch": 0.9362904340613768, + "grad_norm": 1.2719590301314072, + "learning_rate": 2.12132445395703e-07, + "loss": 0.3165, + "step": 54185 + }, + { + "epoch": 0.9363077135747857, + "grad_norm": 1.3877256908078255, + "learning_rate": 2.1201779880852148e-07, + "loss": 0.2619, + "step": 54186 + }, + { + "epoch": 0.9363249930881946, + "grad_norm": 1.7602167362230232, + "learning_rate": 2.1190318287804268e-07, + "loss": 0.2771, + "step": 54187 + }, + { + "epoch": 0.9363422726016035, + "grad_norm": 0.8997672344422438, + "learning_rate": 2.1178859760462746e-07, + "loss": 0.3352, + "step": 54188 + }, + { + "epoch": 0.9363595521150124, + "grad_norm": 1.3340014508839422, + "learning_rate": 2.1167404298863437e-07, + "loss": 0.2317, + "step": 54189 + }, + { + "epoch": 0.9363768316284213, + "grad_norm": 1.1395424443087028, + "learning_rate": 2.1155951903042203e-07, + "loss": 0.5219, + "step": 54190 + }, + { + "epoch": 0.9363941111418302, + "grad_norm": 1.6291865317823453, + "learning_rate": 2.114450257303502e-07, + "loss": 0.1569, + "step": 54191 + }, + { + "epoch": 0.9364113906552392, + "grad_norm": 1.277521383150236, + "learning_rate": 2.113305630887752e-07, + "loss": 0.4135, + "step": 54192 + }, + { + "epoch": 0.9364286701686481, + "grad_norm": 0.8625860988426789, + "learning_rate": 2.1121613110605565e-07, + "loss": 0.1407, + "step": 54193 + }, + { + "epoch": 0.936445949682057, + "grad_norm": 1.062933477596752, + "learning_rate": 2.1110172978255238e-07, + "loss": 0.3353, + "step": 54194 + }, + { + "epoch": 0.9364632291954659, + "grad_norm": 0.8372304407473664, + "learning_rate": 2.1098735911862401e-07, + "loss": 0.3473, + "step": 54195 + }, + { + "epoch": 0.9364805087088748, + "grad_norm": 1.1515879907357105, + "learning_rate": 2.1087301911462577e-07, + "loss": 0.3081, + "step": 54196 + }, + { + "epoch": 0.9364977882222837, + "grad_norm": 1.5872774769322318, + "learning_rate": 2.1075870977091628e-07, + "loss": 0.3644, + "step": 54197 + }, + { + "epoch": 0.9365150677356926, + "grad_norm": 0.8078955214962891, + "learning_rate": 2.1064443108785637e-07, + "loss": 0.4142, + "step": 54198 + }, + { + "epoch": 0.9365323472491015, + "grad_norm": 2.314869461840768, + "learning_rate": 2.105301830658002e-07, + "loss": 0.2364, + "step": 54199 + }, + { + "epoch": 0.9365496267625104, + "grad_norm": 1.747291148357933, + "learning_rate": 2.1041596570510858e-07, + "loss": 0.3454, + "step": 54200 + }, + { + "epoch": 0.9365669062759193, + "grad_norm": 1.0168419732914082, + "learning_rate": 2.103017790061368e-07, + "loss": 0.294, + "step": 54201 + }, + { + "epoch": 0.9365841857893282, + "grad_norm": 1.1515878530170016, + "learning_rate": 2.1018762296924567e-07, + "loss": 0.4044, + "step": 54202 + }, + { + "epoch": 0.936601465302737, + "grad_norm": 0.9803540794525714, + "learning_rate": 2.1007349759479046e-07, + "loss": 0.5232, + "step": 54203 + }, + { + "epoch": 0.9366187448161459, + "grad_norm": 1.1876132145055065, + "learning_rate": 2.0995940288312866e-07, + "loss": 0.4585, + "step": 54204 + }, + { + "epoch": 0.9366360243295548, + "grad_norm": 2.028244241202344, + "learning_rate": 2.0984533883461665e-07, + "loss": 0.4351, + "step": 54205 + }, + { + "epoch": 0.9366533038429637, + "grad_norm": 1.4434495059733317, + "learning_rate": 2.097313054496153e-07, + "loss": 0.3082, + "step": 54206 + }, + { + "epoch": 0.9366705833563727, + "grad_norm": 1.5541006652995542, + "learning_rate": 2.096173027284798e-07, + "loss": 0.3753, + "step": 54207 + }, + { + "epoch": 0.9366878628697816, + "grad_norm": 1.1123522152256133, + "learning_rate": 2.0950333067156547e-07, + "loss": 0.3969, + "step": 54208 + }, + { + "epoch": 0.9367051423831905, + "grad_norm": 1.3922044928368993, + "learning_rate": 2.0938938927923093e-07, + "loss": 0.3728, + "step": 54209 + }, + { + "epoch": 0.9367224218965994, + "grad_norm": 0.9165736181680187, + "learning_rate": 2.0927547855183361e-07, + "loss": 0.2925, + "step": 54210 + }, + { + "epoch": 0.9367397014100083, + "grad_norm": 1.1409196518801508, + "learning_rate": 2.0916159848972883e-07, + "loss": 0.3599, + "step": 54211 + }, + { + "epoch": 0.9367569809234172, + "grad_norm": 1.728880130893559, + "learning_rate": 2.0904774909327296e-07, + "loss": 0.3791, + "step": 54212 + }, + { + "epoch": 0.9367742604368261, + "grad_norm": 0.9847061729104382, + "learning_rate": 2.089339303628257e-07, + "loss": 0.3111, + "step": 54213 + }, + { + "epoch": 0.936791539950235, + "grad_norm": 1.530947767289853, + "learning_rate": 2.0882014229874015e-07, + "loss": 0.5791, + "step": 54214 + }, + { + "epoch": 0.9368088194636439, + "grad_norm": 1.2477561389639733, + "learning_rate": 2.0870638490137597e-07, + "loss": 0.391, + "step": 54215 + }, + { + "epoch": 0.9368260989770528, + "grad_norm": 2.0159209403978693, + "learning_rate": 2.085926581710851e-07, + "loss": 0.2789, + "step": 54216 + }, + { + "epoch": 0.9368433784904617, + "grad_norm": 1.01921316982815, + "learning_rate": 2.0847896210822727e-07, + "loss": 0.1586, + "step": 54217 + }, + { + "epoch": 0.9368606580038706, + "grad_norm": 0.7256870368783094, + "learning_rate": 2.0836529671315774e-07, + "loss": 0.5951, + "step": 54218 + }, + { + "epoch": 0.9368779375172795, + "grad_norm": 1.2300607013716012, + "learning_rate": 2.082516619862307e-07, + "loss": 0.2619, + "step": 54219 + }, + { + "epoch": 0.9368952170306885, + "grad_norm": 1.6854527477363204, + "learning_rate": 2.0813805792780583e-07, + "loss": 0.4932, + "step": 54220 + }, + { + "epoch": 0.9369124965440974, + "grad_norm": 1.0839976093565016, + "learning_rate": 2.080244845382351e-07, + "loss": 0.3451, + "step": 54221 + }, + { + "epoch": 0.9369297760575063, + "grad_norm": 1.344796881873089, + "learning_rate": 2.0791094181787597e-07, + "loss": 0.33, + "step": 54222 + }, + { + "epoch": 0.9369470555709152, + "grad_norm": 2.358416427716349, + "learning_rate": 2.0779742976708372e-07, + "loss": 0.3656, + "step": 54223 + }, + { + "epoch": 0.936964335084324, + "grad_norm": 1.5468400837757845, + "learning_rate": 2.0768394838621586e-07, + "loss": 0.3068, + "step": 54224 + }, + { + "epoch": 0.9369816145977329, + "grad_norm": 0.8906525616580094, + "learning_rate": 2.0757049767562432e-07, + "loss": 0.1467, + "step": 54225 + }, + { + "epoch": 0.9369988941111418, + "grad_norm": 1.3377417709614314, + "learning_rate": 2.074570776356677e-07, + "loss": 0.2876, + "step": 54226 + }, + { + "epoch": 0.9370161736245507, + "grad_norm": 0.8441565610786953, + "learning_rate": 2.0734368826669792e-07, + "loss": 0.4314, + "step": 54227 + }, + { + "epoch": 0.9370334531379596, + "grad_norm": 1.2866891949683588, + "learning_rate": 2.072303295690725e-07, + "loss": 0.3059, + "step": 54228 + }, + { + "epoch": 0.9370507326513685, + "grad_norm": 0.7936830528049594, + "learning_rate": 2.0711700154314674e-07, + "loss": 0.31, + "step": 54229 + }, + { + "epoch": 0.9370680121647774, + "grad_norm": 1.3695432643704655, + "learning_rate": 2.0700370418927362e-07, + "loss": 0.3788, + "step": 54230 + }, + { + "epoch": 0.9370852916781863, + "grad_norm": 0.8817351825972892, + "learning_rate": 2.0689043750781067e-07, + "loss": 0.2327, + "step": 54231 + }, + { + "epoch": 0.9371025711915952, + "grad_norm": 1.2825654302699734, + "learning_rate": 2.0677720149910984e-07, + "loss": 0.3607, + "step": 54232 + }, + { + "epoch": 0.9371198507050041, + "grad_norm": 0.970032836278373, + "learning_rate": 2.066639961635264e-07, + "loss": 0.4158, + "step": 54233 + }, + { + "epoch": 0.937137130218413, + "grad_norm": 1.3459966944137716, + "learning_rate": 2.065508215014167e-07, + "loss": 0.2924, + "step": 54234 + }, + { + "epoch": 0.937154409731822, + "grad_norm": 1.2022610711174455, + "learning_rate": 2.0643767751313492e-07, + "loss": 0.3984, + "step": 54235 + }, + { + "epoch": 0.9371716892452309, + "grad_norm": 1.3049774705052053, + "learning_rate": 2.0632456419903302e-07, + "loss": 0.1304, + "step": 54236 + }, + { + "epoch": 0.9371889687586398, + "grad_norm": 0.9330754377681477, + "learning_rate": 2.0621148155946846e-07, + "loss": 0.6747, + "step": 54237 + }, + { + "epoch": 0.9372062482720487, + "grad_norm": 0.9741951541347682, + "learning_rate": 2.060984295947932e-07, + "loss": 0.8725, + "step": 54238 + }, + { + "epoch": 0.9372235277854576, + "grad_norm": 1.2963499765094573, + "learning_rate": 2.059854083053603e-07, + "loss": 0.2196, + "step": 54239 + }, + { + "epoch": 0.9372408072988665, + "grad_norm": 2.176369466299247, + "learning_rate": 2.058724176915272e-07, + "loss": 0.4184, + "step": 54240 + }, + { + "epoch": 0.9372580868122754, + "grad_norm": 1.3074502632200653, + "learning_rate": 2.0575945775364592e-07, + "loss": 0.4068, + "step": 54241 + }, + { + "epoch": 0.9372753663256843, + "grad_norm": 1.448897943308258, + "learning_rate": 2.0564652849207055e-07, + "loss": 0.3226, + "step": 54242 + }, + { + "epoch": 0.9372926458390932, + "grad_norm": 1.0111055341672361, + "learning_rate": 2.055336299071531e-07, + "loss": 0.3462, + "step": 54243 + }, + { + "epoch": 0.9373099253525021, + "grad_norm": 0.5599540244020377, + "learning_rate": 2.05420761999251e-07, + "loss": 0.7059, + "step": 54244 + }, + { + "epoch": 0.9373272048659109, + "grad_norm": 1.006050190930437, + "learning_rate": 2.0530792476871286e-07, + "loss": 0.2586, + "step": 54245 + }, + { + "epoch": 0.9373444843793198, + "grad_norm": 1.128632853234563, + "learning_rate": 2.0519511821589621e-07, + "loss": 0.3732, + "step": 54246 + }, + { + "epoch": 0.9373617638927287, + "grad_norm": 0.5800980730221211, + "learning_rate": 2.0508234234115187e-07, + "loss": 0.6415, + "step": 54247 + }, + { + "epoch": 0.9373790434061376, + "grad_norm": 0.5540599156175575, + "learning_rate": 2.049695971448351e-07, + "loss": 0.5026, + "step": 54248 + }, + { + "epoch": 0.9373963229195466, + "grad_norm": 1.5764554031053795, + "learning_rate": 2.0485688262729896e-07, + "loss": 0.3431, + "step": 54249 + }, + { + "epoch": 0.9374136024329555, + "grad_norm": 1.143425686745903, + "learning_rate": 2.047441987888932e-07, + "loss": 0.2839, + "step": 54250 + }, + { + "epoch": 0.9374308819463644, + "grad_norm": 1.4595144690789714, + "learning_rate": 2.046315456299741e-07, + "loss": 0.2911, + "step": 54251 + }, + { + "epoch": 0.9374481614597733, + "grad_norm": 1.0328782808219326, + "learning_rate": 2.045189231508926e-07, + "loss": 0.3255, + "step": 54252 + }, + { + "epoch": 0.9374654409731822, + "grad_norm": 1.4103911355764904, + "learning_rate": 2.0440633135200283e-07, + "loss": 0.4375, + "step": 54253 + }, + { + "epoch": 0.9374827204865911, + "grad_norm": 1.3689146421640923, + "learning_rate": 2.0429377023365671e-07, + "loss": 0.4309, + "step": 54254 + }, + { + "epoch": 0.9375, + "grad_norm": 1.00131449182155, + "learning_rate": 2.041812397962073e-07, + "loss": 0.2161, + "step": 54255 + }, + { + "epoch": 0.9375172795134089, + "grad_norm": 1.1741606757139333, + "learning_rate": 2.0406874004000654e-07, + "loss": 0.3922, + "step": 54256 + }, + { + "epoch": 0.9375345590268178, + "grad_norm": 1.1449811099279392, + "learning_rate": 2.039562709654064e-07, + "loss": 0.2775, + "step": 54257 + }, + { + "epoch": 0.9375518385402267, + "grad_norm": 2.1781110771765246, + "learning_rate": 2.0384383257276096e-07, + "loss": 0.4017, + "step": 54258 + }, + { + "epoch": 0.9375691180536356, + "grad_norm": 1.2646034830720823, + "learning_rate": 2.0373142486242003e-07, + "loss": 0.237, + "step": 54259 + }, + { + "epoch": 0.9375863975670445, + "grad_norm": 1.976541415015051, + "learning_rate": 2.0361904783473774e-07, + "loss": 0.287, + "step": 54260 + }, + { + "epoch": 0.9376036770804534, + "grad_norm": 1.463383143907642, + "learning_rate": 2.035067014900638e-07, + "loss": 0.4099, + "step": 54261 + }, + { + "epoch": 0.9376209565938624, + "grad_norm": 1.1494219663451497, + "learning_rate": 2.0339438582875238e-07, + "loss": 0.3219, + "step": 54262 + }, + { + "epoch": 0.9376382361072713, + "grad_norm": 0.6143749161257174, + "learning_rate": 2.0328210085115318e-07, + "loss": 0.3676, + "step": 54263 + }, + { + "epoch": 0.9376555156206802, + "grad_norm": 1.2601001533750622, + "learning_rate": 2.031698465576204e-07, + "loss": 0.3363, + "step": 54264 + }, + { + "epoch": 0.9376727951340891, + "grad_norm": 1.6577662460553249, + "learning_rate": 2.0305762294850373e-07, + "loss": 0.3212, + "step": 54265 + }, + { + "epoch": 0.9376900746474979, + "grad_norm": 1.8315899001280869, + "learning_rate": 2.0294543002415512e-07, + "loss": 0.1708, + "step": 54266 + }, + { + "epoch": 0.9377073541609068, + "grad_norm": 1.5438854452815076, + "learning_rate": 2.028332677849254e-07, + "loss": 0.3446, + "step": 54267 + }, + { + "epoch": 0.9377246336743157, + "grad_norm": 1.4868075416150397, + "learning_rate": 2.027211362311665e-07, + "loss": 0.3793, + "step": 54268 + }, + { + "epoch": 0.9377419131877246, + "grad_norm": 1.264871161659721, + "learning_rate": 2.026090353632304e-07, + "loss": 0.375, + "step": 54269 + }, + { + "epoch": 0.9377591927011335, + "grad_norm": 0.9914950554801348, + "learning_rate": 2.0249696518146677e-07, + "loss": 0.5342, + "step": 54270 + }, + { + "epoch": 0.9377764722145424, + "grad_norm": 1.617154446307568, + "learning_rate": 2.023849256862276e-07, + "loss": 0.2801, + "step": 54271 + }, + { + "epoch": 0.9377937517279513, + "grad_norm": 1.310149890648035, + "learning_rate": 2.0227291687786255e-07, + "loss": 0.4551, + "step": 54272 + }, + { + "epoch": 0.9378110312413602, + "grad_norm": 1.6689216979351242, + "learning_rate": 2.021609387567236e-07, + "loss": 0.3492, + "step": 54273 + }, + { + "epoch": 0.9378283107547691, + "grad_norm": 1.0181982012496422, + "learning_rate": 2.0204899132316158e-07, + "loss": 0.3268, + "step": 54274 + }, + { + "epoch": 0.937845590268178, + "grad_norm": 1.788247661376746, + "learning_rate": 2.019370745775273e-07, + "loss": 0.352, + "step": 54275 + }, + { + "epoch": 0.937862869781587, + "grad_norm": 1.2309942169810446, + "learning_rate": 2.018251885201694e-07, + "loss": 0.2965, + "step": 54276 + }, + { + "epoch": 0.9378801492949959, + "grad_norm": 1.0112646763942141, + "learning_rate": 2.0171333315144093e-07, + "loss": 0.4642, + "step": 54277 + }, + { + "epoch": 0.9378974288084048, + "grad_norm": 1.01780070919892, + "learning_rate": 2.0160150847169046e-07, + "loss": 0.1984, + "step": 54278 + }, + { + "epoch": 0.9379147083218137, + "grad_norm": 1.43485330275604, + "learning_rate": 2.0148971448126774e-07, + "loss": 0.3766, + "step": 54279 + }, + { + "epoch": 0.9379319878352226, + "grad_norm": 1.5699806346540206, + "learning_rate": 2.0137795118052583e-07, + "loss": 0.3649, + "step": 54280 + }, + { + "epoch": 0.9379492673486315, + "grad_norm": 1.3240198157362961, + "learning_rate": 2.012662185698111e-07, + "loss": 0.3744, + "step": 54281 + }, + { + "epoch": 0.9379665468620404, + "grad_norm": 1.9735602604790508, + "learning_rate": 2.0115451664947662e-07, + "loss": 0.1952, + "step": 54282 + }, + { + "epoch": 0.9379838263754493, + "grad_norm": 1.4439186660596885, + "learning_rate": 2.0104284541986984e-07, + "loss": 0.455, + "step": 54283 + }, + { + "epoch": 0.9380011058888582, + "grad_norm": 1.044665954406534, + "learning_rate": 2.0093120488134277e-07, + "loss": 0.3484, + "step": 54284 + }, + { + "epoch": 0.9380183854022671, + "grad_norm": 1.7145451710834037, + "learning_rate": 2.0081959503424176e-07, + "loss": 0.2284, + "step": 54285 + }, + { + "epoch": 0.938035664915676, + "grad_norm": 0.8301576163403666, + "learning_rate": 2.0070801587891985e-07, + "loss": 0.2508, + "step": 54286 + }, + { + "epoch": 0.9380529444290848, + "grad_norm": 1.3352348288320377, + "learning_rate": 2.0059646741572457e-07, + "loss": 0.4367, + "step": 54287 + }, + { + "epoch": 0.9380702239424937, + "grad_norm": 1.6837985984266521, + "learning_rate": 2.0048494964500676e-07, + "loss": 0.2901, + "step": 54288 + }, + { + "epoch": 0.9380875034559026, + "grad_norm": 1.5117583033269775, + "learning_rate": 2.0037346256711497e-07, + "loss": 0.4707, + "step": 54289 + }, + { + "epoch": 0.9381047829693115, + "grad_norm": 1.1738780744850277, + "learning_rate": 2.002620061823979e-07, + "loss": 0.3345, + "step": 54290 + }, + { + "epoch": 0.9381220624827205, + "grad_norm": 0.6575909849504251, + "learning_rate": 2.0015058049120406e-07, + "loss": 0.6705, + "step": 54291 + }, + { + "epoch": 0.9381393419961294, + "grad_norm": 1.3720560205692136, + "learning_rate": 2.0003918549388434e-07, + "loss": 0.2293, + "step": 54292 + }, + { + "epoch": 0.9381566215095383, + "grad_norm": 1.776478506026521, + "learning_rate": 1.9992782119078623e-07, + "loss": 0.294, + "step": 54293 + }, + { + "epoch": 0.9381739010229472, + "grad_norm": 0.9855945016808869, + "learning_rate": 1.9981648758225946e-07, + "loss": 0.1485, + "step": 54294 + }, + { + "epoch": 0.9381911805363561, + "grad_norm": 1.4403996205504164, + "learning_rate": 1.9970518466865152e-07, + "loss": 0.2808, + "step": 54295 + }, + { + "epoch": 0.938208460049765, + "grad_norm": 1.246003922635364, + "learning_rate": 1.9959391245031213e-07, + "loss": 0.3967, + "step": 54296 + }, + { + "epoch": 0.9382257395631739, + "grad_norm": 1.1235784225992793, + "learning_rate": 1.9948267092758878e-07, + "loss": 0.4609, + "step": 54297 + }, + { + "epoch": 0.9382430190765828, + "grad_norm": 2.1193265306051323, + "learning_rate": 1.993714601008312e-07, + "loss": 0.5139, + "step": 54298 + }, + { + "epoch": 0.9382602985899917, + "grad_norm": 1.231111653956629, + "learning_rate": 1.99260279970388e-07, + "loss": 0.2222, + "step": 54299 + }, + { + "epoch": 0.9382775781034006, + "grad_norm": 1.27718666381996, + "learning_rate": 1.9914913053660556e-07, + "loss": 0.5171, + "step": 54300 + }, + { + "epoch": 0.9382948576168095, + "grad_norm": 1.213429714135645, + "learning_rate": 1.990380117998325e-07, + "loss": 0.4243, + "step": 54301 + }, + { + "epoch": 0.9383121371302184, + "grad_norm": 0.9781520311159955, + "learning_rate": 1.9892692376041745e-07, + "loss": 0.1777, + "step": 54302 + }, + { + "epoch": 0.9383294166436273, + "grad_norm": 1.2951231719951088, + "learning_rate": 1.9881586641870677e-07, + "loss": 0.2937, + "step": 54303 + }, + { + "epoch": 0.9383466961570363, + "grad_norm": 1.0847396570235905, + "learning_rate": 1.9870483977505238e-07, + "loss": 0.2548, + "step": 54304 + }, + { + "epoch": 0.9383639756704452, + "grad_norm": 1.0587366663692246, + "learning_rate": 1.985938438297974e-07, + "loss": 0.3141, + "step": 54305 + }, + { + "epoch": 0.9383812551838541, + "grad_norm": 1.4587779513852417, + "learning_rate": 1.9848287858329261e-07, + "loss": 0.3249, + "step": 54306 + }, + { + "epoch": 0.938398534697263, + "grad_norm": 1.7457783412864338, + "learning_rate": 1.9837194403588334e-07, + "loss": 0.2599, + "step": 54307 + }, + { + "epoch": 0.9384158142106718, + "grad_norm": 1.1820669156114112, + "learning_rate": 1.9826104018791814e-07, + "loss": 0.3564, + "step": 54308 + }, + { + "epoch": 0.9384330937240807, + "grad_norm": 2.9351927622849496, + "learning_rate": 1.9815016703974454e-07, + "loss": 0.256, + "step": 54309 + }, + { + "epoch": 0.9384503732374896, + "grad_norm": 1.4951100615595951, + "learning_rate": 1.9803932459170895e-07, + "loss": 0.4283, + "step": 54310 + }, + { + "epoch": 0.9384676527508985, + "grad_norm": 1.3040381487369408, + "learning_rate": 1.9792851284415992e-07, + "loss": 0.1867, + "step": 54311 + }, + { + "epoch": 0.9384849322643074, + "grad_norm": 1.2529220494776252, + "learning_rate": 1.9781773179744391e-07, + "loss": 0.5169, + "step": 54312 + }, + { + "epoch": 0.9385022117777163, + "grad_norm": 1.4044867569102004, + "learning_rate": 1.9770698145190615e-07, + "loss": 0.3606, + "step": 54313 + }, + { + "epoch": 0.9385194912911252, + "grad_norm": 0.8468279112437881, + "learning_rate": 1.9759626180789638e-07, + "loss": 0.6618, + "step": 54314 + }, + { + "epoch": 0.9385367708045341, + "grad_norm": 1.8280673637596505, + "learning_rate": 1.9748557286575987e-07, + "loss": 0.3115, + "step": 54315 + }, + { + "epoch": 0.938554050317943, + "grad_norm": 1.3442590436176867, + "learning_rate": 1.97374914625843e-07, + "loss": 0.4367, + "step": 54316 + }, + { + "epoch": 0.9385713298313519, + "grad_norm": 1.4935792240082668, + "learning_rate": 1.9726428708849442e-07, + "loss": 0.4767, + "step": 54317 + }, + { + "epoch": 0.9385886093447608, + "grad_norm": 1.8456563980860137, + "learning_rate": 1.9715369025405716e-07, + "loss": 0.2801, + "step": 54318 + }, + { + "epoch": 0.9386058888581698, + "grad_norm": 1.7320827325251824, + "learning_rate": 1.9704312412288095e-07, + "loss": 0.2603, + "step": 54319 + }, + { + "epoch": 0.9386231683715787, + "grad_norm": 1.1373316935576108, + "learning_rate": 1.9693258869530997e-07, + "loss": 0.3314, + "step": 54320 + }, + { + "epoch": 0.9386404478849876, + "grad_norm": 1.5301310019685135, + "learning_rate": 1.9682208397169166e-07, + "loss": 0.4042, + "step": 54321 + }, + { + "epoch": 0.9386577273983965, + "grad_norm": 1.6582102367942135, + "learning_rate": 1.9671160995237137e-07, + "loss": 0.1622, + "step": 54322 + }, + { + "epoch": 0.9386750069118054, + "grad_norm": 1.3067393149794226, + "learning_rate": 1.9660116663769436e-07, + "loss": 0.424, + "step": 54323 + }, + { + "epoch": 0.9386922864252143, + "grad_norm": 0.9112948018200878, + "learning_rate": 1.964907540280092e-07, + "loss": 0.2155, + "step": 54324 + }, + { + "epoch": 0.9387095659386232, + "grad_norm": 1.2368310816156904, + "learning_rate": 1.963803721236579e-07, + "loss": 0.2334, + "step": 54325 + }, + { + "epoch": 0.9387268454520321, + "grad_norm": 0.9317414005635479, + "learning_rate": 1.9627002092499014e-07, + "loss": 0.3498, + "step": 54326 + }, + { + "epoch": 0.938744124965441, + "grad_norm": 1.48540093041169, + "learning_rate": 1.96159700432349e-07, + "loss": 0.1886, + "step": 54327 + }, + { + "epoch": 0.9387614044788499, + "grad_norm": 1.1373488725406886, + "learning_rate": 1.9604941064608197e-07, + "loss": 0.4096, + "step": 54328 + }, + { + "epoch": 0.9387786839922588, + "grad_norm": 1.696242881787344, + "learning_rate": 1.9593915156653098e-07, + "loss": 0.2152, + "step": 54329 + }, + { + "epoch": 0.9387959635056676, + "grad_norm": 1.3359984937238825, + "learning_rate": 1.9582892319404578e-07, + "loss": 0.5599, + "step": 54330 + }, + { + "epoch": 0.9388132430190765, + "grad_norm": 1.1517820520065236, + "learning_rate": 1.957187255289683e-07, + "loss": 0.2529, + "step": 54331 + }, + { + "epoch": 0.9388305225324854, + "grad_norm": 0.6674156439337393, + "learning_rate": 1.9560855857164496e-07, + "loss": 0.3713, + "step": 54332 + }, + { + "epoch": 0.9388478020458944, + "grad_norm": 0.6950558763997599, + "learning_rate": 1.95498422322421e-07, + "loss": 0.8026, + "step": 54333 + }, + { + "epoch": 0.9388650815593033, + "grad_norm": 0.7420238379706746, + "learning_rate": 1.953883167816406e-07, + "loss": 0.7784, + "step": 54334 + }, + { + "epoch": 0.9388823610727122, + "grad_norm": 1.4425478851564302, + "learning_rate": 1.9527824194965018e-07, + "loss": 0.4768, + "step": 54335 + }, + { + "epoch": 0.9388996405861211, + "grad_norm": 1.5066280070741491, + "learning_rate": 1.9516819782679274e-07, + "loss": 0.3354, + "step": 54336 + }, + { + "epoch": 0.93891692009953, + "grad_norm": 1.7304669720993229, + "learning_rate": 1.9505818441341362e-07, + "loss": 0.3101, + "step": 54337 + }, + { + "epoch": 0.9389341996129389, + "grad_norm": 1.3386653871549163, + "learning_rate": 1.9494820170985696e-07, + "loss": 0.3979, + "step": 54338 + }, + { + "epoch": 0.9389514791263478, + "grad_norm": 1.3040330281523864, + "learning_rate": 1.9483824971646914e-07, + "loss": 0.3041, + "step": 54339 + }, + { + "epoch": 0.9389687586397567, + "grad_norm": 1.8521169753647684, + "learning_rate": 1.9472832843359214e-07, + "loss": 0.3111, + "step": 54340 + }, + { + "epoch": 0.9389860381531656, + "grad_norm": 1.4641805199449056, + "learning_rate": 1.946184378615712e-07, + "loss": 0.4006, + "step": 54341 + }, + { + "epoch": 0.9390033176665745, + "grad_norm": 1.3669533289761766, + "learning_rate": 1.945085780007494e-07, + "loss": 0.2996, + "step": 54342 + }, + { + "epoch": 0.9390205971799834, + "grad_norm": 0.9240807983442048, + "learning_rate": 1.9439874885147315e-07, + "loss": 0.8135, + "step": 54343 + }, + { + "epoch": 0.9390378766933923, + "grad_norm": 1.3320262373247396, + "learning_rate": 1.9428895041408545e-07, + "loss": 0.3415, + "step": 54344 + }, + { + "epoch": 0.9390551562068012, + "grad_norm": 1.3458512677469157, + "learning_rate": 1.9417918268892944e-07, + "loss": 0.4518, + "step": 54345 + }, + { + "epoch": 0.9390724357202102, + "grad_norm": 1.1237366132832438, + "learning_rate": 1.9406944567635034e-07, + "loss": 0.3233, + "step": 54346 + }, + { + "epoch": 0.9390897152336191, + "grad_norm": 1.1087556865049764, + "learning_rate": 1.9395973937669011e-07, + "loss": 0.148, + "step": 54347 + }, + { + "epoch": 0.939106994747028, + "grad_norm": 1.4414004217047127, + "learning_rate": 1.9385006379029293e-07, + "loss": 0.331, + "step": 54348 + }, + { + "epoch": 0.9391242742604369, + "grad_norm": 2.4432053009171817, + "learning_rate": 1.9374041891750185e-07, + "loss": 0.3945, + "step": 54349 + }, + { + "epoch": 0.9391415537738458, + "grad_norm": 1.1522702020221518, + "learning_rate": 1.9363080475866324e-07, + "loss": 0.1706, + "step": 54350 + }, + { + "epoch": 0.9391588332872546, + "grad_norm": 1.074934406085595, + "learning_rate": 1.93521221314118e-07, + "loss": 0.1232, + "step": 54351 + }, + { + "epoch": 0.9391761128006635, + "grad_norm": 1.2983036128663163, + "learning_rate": 1.9341166858420802e-07, + "loss": 0.5464, + "step": 54352 + }, + { + "epoch": 0.9391933923140724, + "grad_norm": 1.129718973735467, + "learning_rate": 1.9330214656927858e-07, + "loss": 0.3612, + "step": 54353 + }, + { + "epoch": 0.9392106718274813, + "grad_norm": 0.8579875195511222, + "learning_rate": 1.9319265526967167e-07, + "loss": 0.4078, + "step": 54354 + }, + { + "epoch": 0.9392279513408902, + "grad_norm": 1.3435773974332692, + "learning_rate": 1.9308319468573146e-07, + "loss": 0.3991, + "step": 54355 + }, + { + "epoch": 0.9392452308542991, + "grad_norm": 1.8624399532674214, + "learning_rate": 1.9297376481779984e-07, + "loss": 0.3312, + "step": 54356 + }, + { + "epoch": 0.939262510367708, + "grad_norm": 1.188844091302611, + "learning_rate": 1.9286436566621992e-07, + "loss": 0.3165, + "step": 54357 + }, + { + "epoch": 0.9392797898811169, + "grad_norm": 1.5495755405370555, + "learning_rate": 1.9275499723133362e-07, + "loss": 0.3145, + "step": 54358 + }, + { + "epoch": 0.9392970693945258, + "grad_norm": 1.77517411280829, + "learning_rate": 1.926456595134829e-07, + "loss": 0.3947, + "step": 54359 + }, + { + "epoch": 0.9393143489079347, + "grad_norm": 1.336357116746524, + "learning_rate": 1.9253635251301305e-07, + "loss": 0.2967, + "step": 54360 + }, + { + "epoch": 0.9393316284213437, + "grad_norm": 1.2372325940505493, + "learning_rate": 1.924270762302638e-07, + "loss": 0.4859, + "step": 54361 + }, + { + "epoch": 0.9393489079347526, + "grad_norm": 1.0266856842076775, + "learning_rate": 1.923178306655793e-07, + "loss": 0.2079, + "step": 54362 + }, + { + "epoch": 0.9393661874481615, + "grad_norm": 0.7568017318464239, + "learning_rate": 1.922086158193004e-07, + "loss": 0.7898, + "step": 54363 + }, + { + "epoch": 0.9393834669615704, + "grad_norm": 1.4616125229107622, + "learning_rate": 1.9209943169176904e-07, + "loss": 0.5179, + "step": 54364 + }, + { + "epoch": 0.9394007464749793, + "grad_norm": 1.9035887385843906, + "learning_rate": 1.9199027828332716e-07, + "loss": 0.3531, + "step": 54365 + }, + { + "epoch": 0.9394180259883882, + "grad_norm": 1.0354593127859462, + "learning_rate": 1.9188115559431786e-07, + "loss": 0.1736, + "step": 54366 + }, + { + "epoch": 0.9394353055017971, + "grad_norm": 1.5854492675258975, + "learning_rate": 1.9177206362508193e-07, + "loss": 0.2438, + "step": 54367 + }, + { + "epoch": 0.939452585015206, + "grad_norm": 1.2204543444396558, + "learning_rate": 1.9166300237596136e-07, + "loss": 0.3453, + "step": 54368 + }, + { + "epoch": 0.9394698645286149, + "grad_norm": 1.099854359508236, + "learning_rate": 1.9155397184729807e-07, + "loss": 0.3548, + "step": 54369 + }, + { + "epoch": 0.9394871440420238, + "grad_norm": 1.0073190302305586, + "learning_rate": 1.914449720394329e-07, + "loss": 0.1689, + "step": 54370 + }, + { + "epoch": 0.9395044235554327, + "grad_norm": 1.2772032802302533, + "learning_rate": 1.913360029527067e-07, + "loss": 0.2545, + "step": 54371 + }, + { + "epoch": 0.9395217030688415, + "grad_norm": 1.4828979456193612, + "learning_rate": 1.9122706458746255e-07, + "loss": 0.349, + "step": 54372 + }, + { + "epoch": 0.9395389825822504, + "grad_norm": 1.8612285096492645, + "learning_rate": 1.9111815694404012e-07, + "loss": 0.353, + "step": 54373 + }, + { + "epoch": 0.9395562620956593, + "grad_norm": 1.499546578956542, + "learning_rate": 1.910092800227814e-07, + "loss": 0.6814, + "step": 54374 + }, + { + "epoch": 0.9395735416090683, + "grad_norm": 1.8950230597100794, + "learning_rate": 1.9090043382402722e-07, + "loss": 0.4638, + "step": 54375 + }, + { + "epoch": 0.9395908211224772, + "grad_norm": 1.2084424290572389, + "learning_rate": 1.9079161834811733e-07, + "loss": 0.6882, + "step": 54376 + }, + { + "epoch": 0.9396081006358861, + "grad_norm": 1.1635659001705352, + "learning_rate": 1.9068283359539475e-07, + "loss": 0.3553, + "step": 54377 + }, + { + "epoch": 0.939625380149295, + "grad_norm": 2.1996444343569306, + "learning_rate": 1.905740795661981e-07, + "loss": 0.2498, + "step": 54378 + }, + { + "epoch": 0.9396426596627039, + "grad_norm": 1.407845033660021, + "learning_rate": 1.9046535626086937e-07, + "loss": 0.4438, + "step": 54379 + }, + { + "epoch": 0.9396599391761128, + "grad_norm": 1.5512513325108546, + "learning_rate": 1.9035666367974938e-07, + "loss": 0.5971, + "step": 54380 + }, + { + "epoch": 0.9396772186895217, + "grad_norm": 1.630521720898697, + "learning_rate": 1.9024800182317672e-07, + "loss": 0.512, + "step": 54381 + }, + { + "epoch": 0.9396944982029306, + "grad_norm": 1.192680021146267, + "learning_rate": 1.9013937069149334e-07, + "loss": 0.3782, + "step": 54382 + }, + { + "epoch": 0.9397117777163395, + "grad_norm": 1.7978676201907766, + "learning_rate": 1.9003077028503903e-07, + "loss": 0.2555, + "step": 54383 + }, + { + "epoch": 0.9397290572297484, + "grad_norm": 1.1354285117276874, + "learning_rate": 1.8992220060415346e-07, + "loss": 0.3446, + "step": 54384 + }, + { + "epoch": 0.9397463367431573, + "grad_norm": 1.6238086920979684, + "learning_rate": 1.8981366164917748e-07, + "loss": 0.3408, + "step": 54385 + }, + { + "epoch": 0.9397636162565662, + "grad_norm": 1.1233130458644396, + "learning_rate": 1.897051534204508e-07, + "loss": 0.2863, + "step": 54386 + }, + { + "epoch": 0.9397808957699751, + "grad_norm": 1.1892061709460227, + "learning_rate": 1.895966759183121e-07, + "loss": 0.3419, + "step": 54387 + }, + { + "epoch": 0.939798175283384, + "grad_norm": 1.8822993076617853, + "learning_rate": 1.8948822914310327e-07, + "loss": 0.4867, + "step": 54388 + }, + { + "epoch": 0.939815454796793, + "grad_norm": 0.8125009874199866, + "learning_rate": 1.8937981309516185e-07, + "loss": 0.3972, + "step": 54389 + }, + { + "epoch": 0.9398327343102019, + "grad_norm": 1.7202278565412874, + "learning_rate": 1.8927142777482976e-07, + "loss": 0.2913, + "step": 54390 + }, + { + "epoch": 0.9398500138236108, + "grad_norm": 1.7251483226907014, + "learning_rate": 1.8916307318244453e-07, + "loss": 0.5662, + "step": 54391 + }, + { + "epoch": 0.9398672933370197, + "grad_norm": 1.9865792339776445, + "learning_rate": 1.89054749318347e-07, + "loss": 0.2488, + "step": 54392 + }, + { + "epoch": 0.9398845728504285, + "grad_norm": 1.5568762148480688, + "learning_rate": 1.8894645618287356e-07, + "loss": 0.7725, + "step": 54393 + }, + { + "epoch": 0.9399018523638374, + "grad_norm": 1.1807444119186294, + "learning_rate": 1.8883819377636615e-07, + "loss": 0.3427, + "step": 54394 + }, + { + "epoch": 0.9399191318772463, + "grad_norm": 2.1097576165453398, + "learning_rate": 1.887299620991645e-07, + "loss": 0.3915, + "step": 54395 + }, + { + "epoch": 0.9399364113906552, + "grad_norm": 1.5859469143170861, + "learning_rate": 1.8862176115160501e-07, + "loss": 0.3575, + "step": 54396 + }, + { + "epoch": 0.9399536909040641, + "grad_norm": 1.27759702090522, + "learning_rate": 1.8851359093402854e-07, + "loss": 0.2581, + "step": 54397 + }, + { + "epoch": 0.939970970417473, + "grad_norm": 1.6791359143221865, + "learning_rate": 1.8840545144677258e-07, + "loss": 0.281, + "step": 54398 + }, + { + "epoch": 0.9399882499308819, + "grad_norm": 1.1304180261121506, + "learning_rate": 1.8829734269017685e-07, + "loss": 0.2481, + "step": 54399 + }, + { + "epoch": 0.9400055294442908, + "grad_norm": 1.009613487604517, + "learning_rate": 1.8818926466457887e-07, + "loss": 0.3945, + "step": 54400 + }, + { + "epoch": 0.9400228089576997, + "grad_norm": 2.4249038352535788, + "learning_rate": 1.8808121737031838e-07, + "loss": 0.4682, + "step": 54401 + }, + { + "epoch": 0.9400400884711086, + "grad_norm": 1.0756583180404062, + "learning_rate": 1.8797320080773284e-07, + "loss": 0.3385, + "step": 54402 + }, + { + "epoch": 0.9400573679845176, + "grad_norm": 1.2440293317661884, + "learning_rate": 1.8786521497716093e-07, + "loss": 0.3352, + "step": 54403 + }, + { + "epoch": 0.9400746474979265, + "grad_norm": 1.2096404627657948, + "learning_rate": 1.8775725987894123e-07, + "loss": 0.4076, + "step": 54404 + }, + { + "epoch": 0.9400919270113354, + "grad_norm": 1.4571875829828453, + "learning_rate": 1.8764933551341013e-07, + "loss": 0.3727, + "step": 54405 + }, + { + "epoch": 0.9401092065247443, + "grad_norm": 2.245297910119707, + "learning_rate": 1.8754144188090962e-07, + "loss": 0.3334, + "step": 54406 + }, + { + "epoch": 0.9401264860381532, + "grad_norm": 1.2225675520951145, + "learning_rate": 1.8743357898177272e-07, + "loss": 0.4496, + "step": 54407 + }, + { + "epoch": 0.9401437655515621, + "grad_norm": 1.98697944858639, + "learning_rate": 1.8732574681634142e-07, + "loss": 0.4497, + "step": 54408 + }, + { + "epoch": 0.940161045064971, + "grad_norm": 1.5552756928224258, + "learning_rate": 1.8721794538494986e-07, + "loss": 0.192, + "step": 54409 + }, + { + "epoch": 0.9401783245783799, + "grad_norm": 1.6427402722330695, + "learning_rate": 1.871101746879389e-07, + "loss": 0.3708, + "step": 54410 + }, + { + "epoch": 0.9401956040917888, + "grad_norm": 1.1201423097197976, + "learning_rate": 1.8700243472564273e-07, + "loss": 0.3023, + "step": 54411 + }, + { + "epoch": 0.9402128836051977, + "grad_norm": 1.3578299457429395, + "learning_rate": 1.8689472549840214e-07, + "loss": 0.1901, + "step": 54412 + }, + { + "epoch": 0.9402301631186066, + "grad_norm": 1.629514380203862, + "learning_rate": 1.867870470065536e-07, + "loss": 0.3725, + "step": 54413 + }, + { + "epoch": 0.9402474426320154, + "grad_norm": 1.9286580414957868, + "learning_rate": 1.8667939925043232e-07, + "loss": 0.3804, + "step": 54414 + }, + { + "epoch": 0.9402647221454243, + "grad_norm": 1.2893866273417738, + "learning_rate": 1.8657178223037807e-07, + "loss": 0.1671, + "step": 54415 + }, + { + "epoch": 0.9402820016588332, + "grad_norm": 2.2358469453677214, + "learning_rate": 1.8646419594672614e-07, + "loss": 0.218, + "step": 54416 + }, + { + "epoch": 0.9402992811722422, + "grad_norm": 1.0578918788846254, + "learning_rate": 1.8635664039981405e-07, + "loss": 0.346, + "step": 54417 + }, + { + "epoch": 0.9403165606856511, + "grad_norm": 0.92461961964889, + "learning_rate": 1.8624911558997815e-07, + "loss": 0.2304, + "step": 54418 + }, + { + "epoch": 0.94033384019906, + "grad_norm": 1.4320746797003163, + "learning_rate": 1.861416215175571e-07, + "loss": 0.2693, + "step": 54419 + }, + { + "epoch": 0.9403511197124689, + "grad_norm": 1.1590816117613607, + "learning_rate": 1.8603415818288617e-07, + "loss": 0.7224, + "step": 54420 + }, + { + "epoch": 0.9403683992258778, + "grad_norm": 1.3218802261706575, + "learning_rate": 1.8592672558630287e-07, + "loss": 0.4273, + "step": 54421 + }, + { + "epoch": 0.9403856787392867, + "grad_norm": 1.2940212118614938, + "learning_rate": 1.8581932372814138e-07, + "loss": 0.336, + "step": 54422 + }, + { + "epoch": 0.9404029582526956, + "grad_norm": 1.836256497762853, + "learning_rate": 1.857119526087392e-07, + "loss": 0.2561, + "step": 54423 + }, + { + "epoch": 0.9404202377661045, + "grad_norm": 1.6705722921551338, + "learning_rate": 1.8560461222843496e-07, + "loss": 0.3199, + "step": 54424 + }, + { + "epoch": 0.9404375172795134, + "grad_norm": 1.1573589178338635, + "learning_rate": 1.854973025875617e-07, + "loss": 0.309, + "step": 54425 + }, + { + "epoch": 0.9404547967929223, + "grad_norm": 1.1407857994584356, + "learning_rate": 1.8539002368645697e-07, + "loss": 0.3929, + "step": 54426 + }, + { + "epoch": 0.9404720763063312, + "grad_norm": 1.3274574815433298, + "learning_rate": 1.8528277552545714e-07, + "loss": 0.3559, + "step": 54427 + }, + { + "epoch": 0.9404893558197401, + "grad_norm": 2.0442974200321316, + "learning_rate": 1.851755581048964e-07, + "loss": 0.2626, + "step": 54428 + }, + { + "epoch": 0.940506635333149, + "grad_norm": 1.849972849709279, + "learning_rate": 1.8506837142511225e-07, + "loss": 0.4041, + "step": 54429 + }, + { + "epoch": 0.940523914846558, + "grad_norm": 1.907600196598879, + "learning_rate": 1.8496121548643998e-07, + "loss": 0.3573, + "step": 54430 + }, + { + "epoch": 0.9405411943599669, + "grad_norm": 1.405807922232153, + "learning_rate": 1.8485409028921485e-07, + "loss": 0.3256, + "step": 54431 + }, + { + "epoch": 0.9405584738733758, + "grad_norm": 1.6378484193614862, + "learning_rate": 1.8474699583377332e-07, + "loss": 0.3416, + "step": 54432 + }, + { + "epoch": 0.9405757533867847, + "grad_norm": 1.7553110455264656, + "learning_rate": 1.846399321204495e-07, + "loss": 0.3293, + "step": 54433 + }, + { + "epoch": 0.9405930329001936, + "grad_norm": 1.6212928550346792, + "learning_rate": 1.8453289914957983e-07, + "loss": 0.3765, + "step": 54434 + }, + { + "epoch": 0.9406103124136024, + "grad_norm": 1.2231634386489765, + "learning_rate": 1.8442589692149848e-07, + "loss": 0.3587, + "step": 54435 + }, + { + "epoch": 0.9406275919270113, + "grad_norm": 1.9835155771279784, + "learning_rate": 1.8431892543654185e-07, + "loss": 0.2568, + "step": 54436 + }, + { + "epoch": 0.9406448714404202, + "grad_norm": 1.4678224759519067, + "learning_rate": 1.8421198469504409e-07, + "loss": 0.4621, + "step": 54437 + }, + { + "epoch": 0.9406621509538291, + "grad_norm": 1.8715930902563407, + "learning_rate": 1.841050746973394e-07, + "loss": 0.3459, + "step": 54438 + }, + { + "epoch": 0.940679430467238, + "grad_norm": 1.1422761175353824, + "learning_rate": 1.8399819544376418e-07, + "loss": 0.3455, + "step": 54439 + }, + { + "epoch": 0.9406967099806469, + "grad_norm": 1.3828423896338997, + "learning_rate": 1.838913469346526e-07, + "loss": 0.2495, + "step": 54440 + }, + { + "epoch": 0.9407139894940558, + "grad_norm": 2.997320553111537, + "learning_rate": 1.8378452917033995e-07, + "loss": 0.2899, + "step": 54441 + }, + { + "epoch": 0.9407312690074647, + "grad_norm": 1.6561027333296652, + "learning_rate": 1.836777421511593e-07, + "loss": 0.2665, + "step": 54442 + }, + { + "epoch": 0.9407485485208736, + "grad_norm": 1.3913773193451122, + "learning_rate": 1.8357098587744704e-07, + "loss": 0.286, + "step": 54443 + }, + { + "epoch": 0.9407658280342825, + "grad_norm": 2.5169907586198073, + "learning_rate": 1.8346426034953735e-07, + "loss": 0.4306, + "step": 54444 + }, + { + "epoch": 0.9407831075476915, + "grad_norm": 1.2535027869920294, + "learning_rate": 1.8335756556775997e-07, + "loss": 0.2725, + "step": 54445 + }, + { + "epoch": 0.9408003870611004, + "grad_norm": 1.3569350692096132, + "learning_rate": 1.8325090153245573e-07, + "loss": 0.3909, + "step": 54446 + }, + { + "epoch": 0.9408176665745093, + "grad_norm": 0.7356226834164895, + "learning_rate": 1.8314426824395436e-07, + "loss": 0.2882, + "step": 54447 + }, + { + "epoch": 0.9408349460879182, + "grad_norm": 0.9489725629783494, + "learning_rate": 1.8303766570259229e-07, + "loss": 0.3941, + "step": 54448 + }, + { + "epoch": 0.9408522256013271, + "grad_norm": 1.6544366823732808, + "learning_rate": 1.8293109390870145e-07, + "loss": 0.4894, + "step": 54449 + }, + { + "epoch": 0.940869505114736, + "grad_norm": 1.4956125759353487, + "learning_rate": 1.8282455286261712e-07, + "loss": 0.2731, + "step": 54450 + }, + { + "epoch": 0.9408867846281449, + "grad_norm": 1.1628780983735678, + "learning_rate": 1.827180425646724e-07, + "loss": 0.4853, + "step": 54451 + }, + { + "epoch": 0.9409040641415538, + "grad_norm": 1.1934331868522376, + "learning_rate": 1.8261156301520032e-07, + "loss": 0.7204, + "step": 54452 + }, + { + "epoch": 0.9409213436549627, + "grad_norm": 0.9344556015425761, + "learning_rate": 1.8250511421453399e-07, + "loss": 0.2204, + "step": 54453 + }, + { + "epoch": 0.9409386231683716, + "grad_norm": 0.7229908943845347, + "learning_rate": 1.8239869616300977e-07, + "loss": 0.6523, + "step": 54454 + }, + { + "epoch": 0.9409559026817805, + "grad_norm": 1.0563282803518312, + "learning_rate": 1.822923088609585e-07, + "loss": 0.32, + "step": 54455 + }, + { + "epoch": 0.9409731821951893, + "grad_norm": 0.7030982960103119, + "learning_rate": 1.8218595230871328e-07, + "loss": 0.2091, + "step": 54456 + }, + { + "epoch": 0.9409904617085982, + "grad_norm": 0.797286309861078, + "learning_rate": 1.8207962650660714e-07, + "loss": 0.3612, + "step": 54457 + }, + { + "epoch": 0.9410077412220071, + "grad_norm": 1.4309050584886538, + "learning_rate": 1.8197333145497432e-07, + "loss": 0.4217, + "step": 54458 + }, + { + "epoch": 0.941025020735416, + "grad_norm": 1.1328510000721208, + "learning_rate": 1.8186706715414782e-07, + "loss": 0.3874, + "step": 54459 + }, + { + "epoch": 0.941042300248825, + "grad_norm": 1.3494671077728109, + "learning_rate": 1.817608336044596e-07, + "loss": 0.3434, + "step": 54460 + }, + { + "epoch": 0.9410595797622339, + "grad_norm": 0.9849716548890841, + "learning_rate": 1.8165463080624278e-07, + "loss": 0.3913, + "step": 54461 + }, + { + "epoch": 0.9410768592756428, + "grad_norm": 0.9453176561807463, + "learning_rate": 1.8154845875982818e-07, + "loss": 0.2581, + "step": 54462 + }, + { + "epoch": 0.9410941387890517, + "grad_norm": 1.3930954171336203, + "learning_rate": 1.8144231746555107e-07, + "loss": 0.2311, + "step": 54463 + }, + { + "epoch": 0.9411114183024606, + "grad_norm": 1.8005955635150905, + "learning_rate": 1.8133620692374344e-07, + "loss": 0.213, + "step": 54464 + }, + { + "epoch": 0.9411286978158695, + "grad_norm": 1.3949821709442387, + "learning_rate": 1.812301271347361e-07, + "loss": 0.2387, + "step": 54465 + }, + { + "epoch": 0.9411459773292784, + "grad_norm": 0.8618090141739748, + "learning_rate": 1.8112407809886324e-07, + "loss": 0.4707, + "step": 54466 + }, + { + "epoch": 0.9411632568426873, + "grad_norm": 0.889320424880008, + "learning_rate": 1.810180598164557e-07, + "loss": 0.3653, + "step": 54467 + }, + { + "epoch": 0.9411805363560962, + "grad_norm": 2.0616891441260545, + "learning_rate": 1.8091207228784434e-07, + "loss": 0.2588, + "step": 54468 + }, + { + "epoch": 0.9411978158695051, + "grad_norm": 1.123758075745627, + "learning_rate": 1.8080611551336335e-07, + "loss": 0.4666, + "step": 54469 + }, + { + "epoch": 0.941215095382914, + "grad_norm": 1.1897412891892862, + "learning_rate": 1.8070018949334467e-07, + "loss": 0.3606, + "step": 54470 + }, + { + "epoch": 0.9412323748963229, + "grad_norm": 1.1246344580998215, + "learning_rate": 1.8059429422811914e-07, + "loss": 0.4711, + "step": 54471 + }, + { + "epoch": 0.9412496544097319, + "grad_norm": 1.4975262472016428, + "learning_rate": 1.8048842971801873e-07, + "loss": 0.3955, + "step": 54472 + }, + { + "epoch": 0.9412669339231408, + "grad_norm": 1.3326077970247896, + "learning_rate": 1.8038259596337316e-07, + "loss": 0.4083, + "step": 54473 + }, + { + "epoch": 0.9412842134365497, + "grad_norm": 1.1601735131113438, + "learning_rate": 1.802767929645166e-07, + "loss": 0.396, + "step": 54474 + }, + { + "epoch": 0.9413014929499586, + "grad_norm": 1.1603088389423029, + "learning_rate": 1.8017102072177993e-07, + "loss": 0.3729, + "step": 54475 + }, + { + "epoch": 0.9413187724633675, + "grad_norm": 1.5333933158989115, + "learning_rate": 1.800652792354929e-07, + "loss": 0.3849, + "step": 54476 + }, + { + "epoch": 0.9413360519767764, + "grad_norm": 1.6219974623117306, + "learning_rate": 1.799595685059885e-07, + "loss": 0.248, + "step": 54477 + }, + { + "epoch": 0.9413533314901852, + "grad_norm": 1.510739991701778, + "learning_rate": 1.7985388853359654e-07, + "loss": 0.289, + "step": 54478 + }, + { + "epoch": 0.9413706110035941, + "grad_norm": 1.0933114507984631, + "learning_rate": 1.797482393186478e-07, + "loss": 0.2922, + "step": 54479 + }, + { + "epoch": 0.941387890517003, + "grad_norm": 1.5260558215231188, + "learning_rate": 1.7964262086147543e-07, + "loss": 0.4906, + "step": 54480 + }, + { + "epoch": 0.9414051700304119, + "grad_norm": 1.0135807838381625, + "learning_rate": 1.79537033162408e-07, + "loss": 0.3289, + "step": 54481 + }, + { + "epoch": 0.9414224495438208, + "grad_norm": 0.9213686386957135, + "learning_rate": 1.7943147622177638e-07, + "loss": 0.3724, + "step": 54482 + }, + { + "epoch": 0.9414397290572297, + "grad_norm": 1.5229679336760338, + "learning_rate": 1.793259500399136e-07, + "loss": 0.1235, + "step": 54483 + }, + { + "epoch": 0.9414570085706386, + "grad_norm": 0.7285884219742508, + "learning_rate": 1.7922045461714722e-07, + "loss": 0.255, + "step": 54484 + }, + { + "epoch": 0.9414742880840475, + "grad_norm": 1.7367529009176903, + "learning_rate": 1.7911498995380694e-07, + "loss": 0.3049, + "step": 54485 + }, + { + "epoch": 0.9414915675974564, + "grad_norm": 0.8314683987070307, + "learning_rate": 1.7900955605022695e-07, + "loss": 0.1674, + "step": 54486 + }, + { + "epoch": 0.9415088471108654, + "grad_norm": 1.0779994213306392, + "learning_rate": 1.789041529067348e-07, + "loss": 0.2136, + "step": 54487 + }, + { + "epoch": 0.9415261266242743, + "grad_norm": 1.3733146016743099, + "learning_rate": 1.787987805236613e-07, + "loss": 0.2967, + "step": 54488 + }, + { + "epoch": 0.9415434061376832, + "grad_norm": 1.3110934948042805, + "learning_rate": 1.7869343890133617e-07, + "loss": 0.3756, + "step": 54489 + }, + { + "epoch": 0.9415606856510921, + "grad_norm": 1.610657095999171, + "learning_rate": 1.7858812804009028e-07, + "loss": 0.2529, + "step": 54490 + }, + { + "epoch": 0.941577965164501, + "grad_norm": 0.8222641998111362, + "learning_rate": 1.7848284794025228e-07, + "loss": 0.1807, + "step": 54491 + }, + { + "epoch": 0.9415952446779099, + "grad_norm": 1.037083288759553, + "learning_rate": 1.7837759860215187e-07, + "loss": 0.3089, + "step": 54492 + }, + { + "epoch": 0.9416125241913188, + "grad_norm": 1.0000793355203965, + "learning_rate": 1.782723800261199e-07, + "loss": 0.3409, + "step": 54493 + }, + { + "epoch": 0.9416298037047277, + "grad_norm": 1.4164008683257705, + "learning_rate": 1.78167192212485e-07, + "loss": 0.3624, + "step": 54494 + }, + { + "epoch": 0.9416470832181366, + "grad_norm": 0.9289483987317931, + "learning_rate": 1.7806203516157804e-07, + "loss": 0.1188, + "step": 54495 + }, + { + "epoch": 0.9416643627315455, + "grad_norm": 1.3913073485932221, + "learning_rate": 1.779569088737254e-07, + "loss": 0.4463, + "step": 54496 + }, + { + "epoch": 0.9416816422449544, + "grad_norm": 1.2354251703815589, + "learning_rate": 1.7785181334925906e-07, + "loss": 0.419, + "step": 54497 + }, + { + "epoch": 0.9416989217583633, + "grad_norm": 2.1988284742858157, + "learning_rate": 1.777467485885065e-07, + "loss": 0.3141, + "step": 54498 + }, + { + "epoch": 0.9417162012717721, + "grad_norm": 1.1628015382874097, + "learning_rate": 1.7764171459179857e-07, + "loss": 0.2905, + "step": 54499 + }, + { + "epoch": 0.941733480785181, + "grad_norm": 1.1266128438614014, + "learning_rate": 1.7753671135946283e-07, + "loss": 0.3543, + "step": 54500 + }, + { + "epoch": 0.94175076029859, + "grad_norm": 0.7002246294571897, + "learning_rate": 1.7743173889182897e-07, + "loss": 0.7591, + "step": 54501 + }, + { + "epoch": 0.9417680398119989, + "grad_norm": 1.4398320855067013, + "learning_rate": 1.7732679718922564e-07, + "loss": 0.3129, + "step": 54502 + }, + { + "epoch": 0.9417853193254078, + "grad_norm": 0.9143317305659545, + "learning_rate": 1.7722188625198034e-07, + "loss": 0.6129, + "step": 54503 + }, + { + "epoch": 0.9418025988388167, + "grad_norm": 0.8481990507438893, + "learning_rate": 1.7711700608042282e-07, + "loss": 0.1825, + "step": 54504 + }, + { + "epoch": 0.9418198783522256, + "grad_norm": 1.2669191410824057, + "learning_rate": 1.770121566748817e-07, + "loss": 0.4585, + "step": 54505 + }, + { + "epoch": 0.9418371578656345, + "grad_norm": 1.643672980418806, + "learning_rate": 1.769073380356856e-07, + "loss": 0.2191, + "step": 54506 + }, + { + "epoch": 0.9418544373790434, + "grad_norm": 2.0259591525968057, + "learning_rate": 1.7680255016316095e-07, + "loss": 0.3032, + "step": 54507 + }, + { + "epoch": 0.9418717168924523, + "grad_norm": 1.5559365091093347, + "learning_rate": 1.7669779305763744e-07, + "loss": 0.5729, + "step": 54508 + }, + { + "epoch": 0.9418889964058612, + "grad_norm": 1.8666224290321791, + "learning_rate": 1.7659306671944377e-07, + "loss": 0.4931, + "step": 54509 + }, + { + "epoch": 0.9419062759192701, + "grad_norm": 2.295931486898285, + "learning_rate": 1.7648837114890737e-07, + "loss": 0.2181, + "step": 54510 + }, + { + "epoch": 0.941923555432679, + "grad_norm": 0.9331316073378705, + "learning_rate": 1.763837063463547e-07, + "loss": 0.5748, + "step": 54511 + }, + { + "epoch": 0.9419408349460879, + "grad_norm": 1.7125419990205417, + "learning_rate": 1.762790723121155e-07, + "loss": 0.27, + "step": 54512 + }, + { + "epoch": 0.9419581144594968, + "grad_norm": 1.5044832866777702, + "learning_rate": 1.7617446904651726e-07, + "loss": 0.7285, + "step": 54513 + }, + { + "epoch": 0.9419753939729058, + "grad_norm": 1.2319823204632376, + "learning_rate": 1.7606989654988639e-07, + "loss": 0.2703, + "step": 54514 + }, + { + "epoch": 0.9419926734863147, + "grad_norm": 1.8363137981146707, + "learning_rate": 1.759653548225515e-07, + "loss": 0.3282, + "step": 54515 + }, + { + "epoch": 0.9420099529997236, + "grad_norm": 1.6428213596072851, + "learning_rate": 1.7586084386484015e-07, + "loss": 0.3883, + "step": 54516 + }, + { + "epoch": 0.9420272325131325, + "grad_norm": 1.316904166602234, + "learning_rate": 1.7575636367707872e-07, + "loss": 0.3505, + "step": 54517 + }, + { + "epoch": 0.9420445120265414, + "grad_norm": 1.5520890346770306, + "learning_rate": 1.756519142595947e-07, + "loss": 0.5507, + "step": 54518 + }, + { + "epoch": 0.9420617915399503, + "grad_norm": 0.8115466008549391, + "learning_rate": 1.7554749561271568e-07, + "loss": 0.2822, + "step": 54519 + }, + { + "epoch": 0.9420790710533591, + "grad_norm": 0.8542729307016766, + "learning_rate": 1.754431077367691e-07, + "loss": 0.4049, + "step": 54520 + }, + { + "epoch": 0.942096350566768, + "grad_norm": 0.9170185848028228, + "learning_rate": 1.753387506320814e-07, + "loss": 0.3052, + "step": 54521 + }, + { + "epoch": 0.9421136300801769, + "grad_norm": 1.2958527402743922, + "learning_rate": 1.7523442429897785e-07, + "loss": 0.2929, + "step": 54522 + }, + { + "epoch": 0.9421309095935858, + "grad_norm": 1.0336662261529543, + "learning_rate": 1.7513012873778933e-07, + "loss": 0.3891, + "step": 54523 + }, + { + "epoch": 0.9421481891069947, + "grad_norm": 1.628288743446116, + "learning_rate": 1.750258639488378e-07, + "loss": 0.2927, + "step": 54524 + }, + { + "epoch": 0.9421654686204036, + "grad_norm": 0.9142693898075852, + "learning_rate": 1.7492162993245187e-07, + "loss": 0.5842, + "step": 54525 + }, + { + "epoch": 0.9421827481338125, + "grad_norm": 0.9090771006200489, + "learning_rate": 1.7481742668895906e-07, + "loss": 0.7042, + "step": 54526 + }, + { + "epoch": 0.9422000276472214, + "grad_norm": 1.179917252103493, + "learning_rate": 1.7471325421868467e-07, + "loss": 0.4421, + "step": 54527 + }, + { + "epoch": 0.9422173071606303, + "grad_norm": 1.2714194677038773, + "learning_rate": 1.746091125219551e-07, + "loss": 0.4006, + "step": 54528 + }, + { + "epoch": 0.9422345866740393, + "grad_norm": 1.5770072608416117, + "learning_rate": 1.7450500159909679e-07, + "loss": 0.3895, + "step": 54529 + }, + { + "epoch": 0.9422518661874482, + "grad_norm": 1.4072673430060085, + "learning_rate": 1.7440092145043497e-07, + "loss": 0.5223, + "step": 54530 + }, + { + "epoch": 0.9422691457008571, + "grad_norm": 0.9447709514403472, + "learning_rate": 1.742968720762961e-07, + "loss": 0.5021, + "step": 54531 + }, + { + "epoch": 0.942286425214266, + "grad_norm": 1.135441059498864, + "learning_rate": 1.7419285347700544e-07, + "loss": 0.2872, + "step": 54532 + }, + { + "epoch": 0.9423037047276749, + "grad_norm": 1.1558386286825508, + "learning_rate": 1.740888656528905e-07, + "loss": 0.3043, + "step": 54533 + }, + { + "epoch": 0.9423209842410838, + "grad_norm": 0.7887839596315188, + "learning_rate": 1.7398490860427663e-07, + "loss": 0.2005, + "step": 54534 + }, + { + "epoch": 0.9423382637544927, + "grad_norm": 1.0970188602553363, + "learning_rate": 1.7388098233148688e-07, + "loss": 0.2745, + "step": 54535 + }, + { + "epoch": 0.9423555432679016, + "grad_norm": 0.8607501892219046, + "learning_rate": 1.7377708683485095e-07, + "loss": 0.2259, + "step": 54536 + }, + { + "epoch": 0.9423728227813105, + "grad_norm": 1.4618197605246583, + "learning_rate": 1.7367322211468972e-07, + "loss": 0.4853, + "step": 54537 + }, + { + "epoch": 0.9423901022947194, + "grad_norm": 2.585869078326247, + "learning_rate": 1.7356938817133183e-07, + "loss": 0.2264, + "step": 54538 + }, + { + "epoch": 0.9424073818081283, + "grad_norm": 1.3152427070092032, + "learning_rate": 1.7346558500510147e-07, + "loss": 0.3649, + "step": 54539 + }, + { + "epoch": 0.9424246613215372, + "grad_norm": 0.9192173279615281, + "learning_rate": 1.733618126163239e-07, + "loss": 0.9123, + "step": 54540 + }, + { + "epoch": 0.942441940834946, + "grad_norm": 0.6889444991177185, + "learning_rate": 1.7325807100532333e-07, + "loss": 0.1858, + "step": 54541 + }, + { + "epoch": 0.9424592203483549, + "grad_norm": 1.9328553411888645, + "learning_rate": 1.7315436017242504e-07, + "loss": 0.2216, + "step": 54542 + }, + { + "epoch": 0.9424764998617639, + "grad_norm": 0.9765200585278454, + "learning_rate": 1.7305068011795435e-07, + "loss": 0.2842, + "step": 54543 + }, + { + "epoch": 0.9424937793751728, + "grad_norm": 1.1968032634903722, + "learning_rate": 1.7294703084223651e-07, + "loss": 0.233, + "step": 54544 + }, + { + "epoch": 0.9425110588885817, + "grad_norm": 0.756729895824421, + "learning_rate": 1.7284341234559465e-07, + "loss": 0.2764, + "step": 54545 + }, + { + "epoch": 0.9425283384019906, + "grad_norm": 1.1117235831965357, + "learning_rate": 1.7273982462835513e-07, + "loss": 0.2702, + "step": 54546 + }, + { + "epoch": 0.9425456179153995, + "grad_norm": 1.2925947665762245, + "learning_rate": 1.7263626769083997e-07, + "loss": 0.3325, + "step": 54547 + }, + { + "epoch": 0.9425628974288084, + "grad_norm": 1.2295125865721381, + "learning_rate": 1.7253274153337552e-07, + "loss": 0.1747, + "step": 54548 + }, + { + "epoch": 0.9425801769422173, + "grad_norm": 1.305704603496245, + "learning_rate": 1.7242924615628487e-07, + "loss": 0.4343, + "step": 54549 + }, + { + "epoch": 0.9425974564556262, + "grad_norm": 1.189294403928793, + "learning_rate": 1.7232578155989333e-07, + "loss": 0.498, + "step": 54550 + }, + { + "epoch": 0.9426147359690351, + "grad_norm": 0.6218296816021126, + "learning_rate": 1.7222234774452394e-07, + "loss": 0.6194, + "step": 54551 + }, + { + "epoch": 0.942632015482444, + "grad_norm": 1.4854167671484102, + "learning_rate": 1.7211894471050094e-07, + "loss": 0.3272, + "step": 54552 + }, + { + "epoch": 0.9426492949958529, + "grad_norm": 1.383660679516714, + "learning_rate": 1.7201557245814848e-07, + "loss": 0.2484, + "step": 54553 + }, + { + "epoch": 0.9426665745092618, + "grad_norm": 1.4503760080263004, + "learning_rate": 1.7191223098778965e-07, + "loss": 0.3567, + "step": 54554 + }, + { + "epoch": 0.9426838540226707, + "grad_norm": 0.7902862755865753, + "learning_rate": 1.7180892029974971e-07, + "loss": 0.3619, + "step": 54555 + }, + { + "epoch": 0.9427011335360796, + "grad_norm": 1.386016356145092, + "learning_rate": 1.7170564039435067e-07, + "loss": 0.4046, + "step": 54556 + }, + { + "epoch": 0.9427184130494886, + "grad_norm": 1.103675638678304, + "learning_rate": 1.7160239127191668e-07, + "loss": 0.262, + "step": 54557 + }, + { + "epoch": 0.9427356925628975, + "grad_norm": 1.3233227476088607, + "learning_rate": 1.7149917293277085e-07, + "loss": 0.2142, + "step": 54558 + }, + { + "epoch": 0.9427529720763064, + "grad_norm": 0.6356074260113975, + "learning_rate": 1.7139598537723624e-07, + "loss": 0.6665, + "step": 54559 + }, + { + "epoch": 0.9427702515897153, + "grad_norm": 2.0288992162259447, + "learning_rate": 1.712928286056359e-07, + "loss": 0.2371, + "step": 54560 + }, + { + "epoch": 0.9427875311031242, + "grad_norm": 1.7012083007206387, + "learning_rate": 1.7118970261829405e-07, + "loss": 0.5363, + "step": 54561 + }, + { + "epoch": 0.942804810616533, + "grad_norm": 1.241743721187033, + "learning_rate": 1.710866074155326e-07, + "loss": 0.4343, + "step": 54562 + }, + { + "epoch": 0.9428220901299419, + "grad_norm": 0.9610529917194331, + "learning_rate": 1.7098354299767582e-07, + "loss": 0.3871, + "step": 54563 + }, + { + "epoch": 0.9428393696433508, + "grad_norm": 1.671644082803917, + "learning_rate": 1.7088050936504452e-07, + "loss": 0.4988, + "step": 54564 + }, + { + "epoch": 0.9428566491567597, + "grad_norm": 1.125667192528216, + "learning_rate": 1.7077750651796287e-07, + "loss": 0.4823, + "step": 54565 + }, + { + "epoch": 0.9428739286701686, + "grad_norm": 1.22673590365926, + "learning_rate": 1.7067453445675287e-07, + "loss": 0.2581, + "step": 54566 + }, + { + "epoch": 0.9428912081835775, + "grad_norm": 1.2411482423646754, + "learning_rate": 1.7057159318173754e-07, + "loss": 0.3694, + "step": 54567 + }, + { + "epoch": 0.9429084876969864, + "grad_norm": 1.4942788777972373, + "learning_rate": 1.704686826932389e-07, + "loss": 0.3265, + "step": 54568 + }, + { + "epoch": 0.9429257672103953, + "grad_norm": 1.667817515072836, + "learning_rate": 1.703658029915789e-07, + "loss": 0.3022, + "step": 54569 + }, + { + "epoch": 0.9429430467238042, + "grad_norm": 2.0366405253535063, + "learning_rate": 1.702629540770817e-07, + "loss": 0.1767, + "step": 54570 + }, + { + "epoch": 0.9429603262372132, + "grad_norm": 0.9242704003657783, + "learning_rate": 1.7016013595006598e-07, + "loss": 0.1973, + "step": 54571 + }, + { + "epoch": 0.9429776057506221, + "grad_norm": 2.1239036540762752, + "learning_rate": 1.70057348610857e-07, + "loss": 0.3368, + "step": 54572 + }, + { + "epoch": 0.942994885264031, + "grad_norm": 1.388366416890536, + "learning_rate": 1.699545920597745e-07, + "loss": 0.3135, + "step": 54573 + }, + { + "epoch": 0.9430121647774399, + "grad_norm": 0.9606221428514007, + "learning_rate": 1.6985186629714267e-07, + "loss": 0.6161, + "step": 54574 + }, + { + "epoch": 0.9430294442908488, + "grad_norm": 2.6181081376434854, + "learning_rate": 1.6974917132328016e-07, + "loss": 0.3404, + "step": 54575 + }, + { + "epoch": 0.9430467238042577, + "grad_norm": 1.1748343206646763, + "learning_rate": 1.6964650713851228e-07, + "loss": 0.2012, + "step": 54576 + }, + { + "epoch": 0.9430640033176666, + "grad_norm": 0.8459183212618794, + "learning_rate": 1.695438737431576e-07, + "loss": 0.7667, + "step": 54577 + }, + { + "epoch": 0.9430812828310755, + "grad_norm": 0.8350303906218299, + "learning_rate": 1.6944127113753816e-07, + "loss": 0.7465, + "step": 54578 + }, + { + "epoch": 0.9430985623444844, + "grad_norm": 3.415163098207583, + "learning_rate": 1.6933869932197588e-07, + "loss": 0.2762, + "step": 54579 + }, + { + "epoch": 0.9431158418578933, + "grad_norm": 1.1838045119433287, + "learning_rate": 1.6923615829679273e-07, + "loss": 0.4459, + "step": 54580 + }, + { + "epoch": 0.9431331213713022, + "grad_norm": 1.526927736917332, + "learning_rate": 1.6913364806230846e-07, + "loss": 0.3828, + "step": 54581 + }, + { + "epoch": 0.9431504008847111, + "grad_norm": 1.1247959584667349, + "learning_rate": 1.6903116861884505e-07, + "loss": 0.2895, + "step": 54582 + }, + { + "epoch": 0.9431676803981199, + "grad_norm": 1.088095532450561, + "learning_rate": 1.6892871996672223e-07, + "loss": 0.3801, + "step": 54583 + }, + { + "epoch": 0.9431849599115288, + "grad_norm": 1.21920324906505, + "learning_rate": 1.6882630210626194e-07, + "loss": 0.1816, + "step": 54584 + }, + { + "epoch": 0.9432022394249377, + "grad_norm": 1.2322160402439888, + "learning_rate": 1.6872391503778617e-07, + "loss": 0.3628, + "step": 54585 + }, + { + "epoch": 0.9432195189383467, + "grad_norm": 1.7312363721005908, + "learning_rate": 1.6862155876161245e-07, + "loss": 0.2369, + "step": 54586 + }, + { + "epoch": 0.9432367984517556, + "grad_norm": 1.5291096197967848, + "learning_rate": 1.6851923327806496e-07, + "loss": 0.2589, + "step": 54587 + }, + { + "epoch": 0.9432540779651645, + "grad_norm": 0.9463196365646832, + "learning_rate": 1.6841693858746121e-07, + "loss": 0.269, + "step": 54588 + }, + { + "epoch": 0.9432713574785734, + "grad_norm": 0.9089165290558838, + "learning_rate": 1.6831467469012318e-07, + "loss": 0.4444, + "step": 54589 + }, + { + "epoch": 0.9432886369919823, + "grad_norm": 1.5444106955164938, + "learning_rate": 1.682124415863706e-07, + "loss": 0.4898, + "step": 54590 + }, + { + "epoch": 0.9433059165053912, + "grad_norm": 1.2178437802799846, + "learning_rate": 1.6811023927652326e-07, + "loss": 0.2791, + "step": 54591 + }, + { + "epoch": 0.9433231960188001, + "grad_norm": 1.1810426457090657, + "learning_rate": 1.6800806776090305e-07, + "loss": 0.2747, + "step": 54592 + }, + { + "epoch": 0.943340475532209, + "grad_norm": 0.8885366880677904, + "learning_rate": 1.6790592703982868e-07, + "loss": 0.2973, + "step": 54593 + }, + { + "epoch": 0.9433577550456179, + "grad_norm": 1.1379579775074784, + "learning_rate": 1.678038171136187e-07, + "loss": 0.2119, + "step": 54594 + }, + { + "epoch": 0.9433750345590268, + "grad_norm": 0.8591628422195485, + "learning_rate": 1.6770173798259514e-07, + "loss": 0.6813, + "step": 54595 + }, + { + "epoch": 0.9433923140724357, + "grad_norm": 1.219597001069342, + "learning_rate": 1.6759968964707775e-07, + "loss": 0.19, + "step": 54596 + }, + { + "epoch": 0.9434095935858446, + "grad_norm": 1.1854974296409355, + "learning_rate": 1.674976721073851e-07, + "loss": 0.3283, + "step": 54597 + }, + { + "epoch": 0.9434268730992535, + "grad_norm": 1.5986392315049414, + "learning_rate": 1.67395685363837e-07, + "loss": 0.6177, + "step": 54598 + }, + { + "epoch": 0.9434441526126625, + "grad_norm": 1.2757783429778335, + "learning_rate": 1.6729372941675203e-07, + "loss": 0.4417, + "step": 54599 + }, + { + "epoch": 0.9434614321260714, + "grad_norm": 1.6207769294032874, + "learning_rate": 1.6719180426645108e-07, + "loss": 0.4782, + "step": 54600 + }, + { + "epoch": 0.9434787116394803, + "grad_norm": 1.6840476355595746, + "learning_rate": 1.670899099132528e-07, + "loss": 0.3176, + "step": 54601 + }, + { + "epoch": 0.9434959911528892, + "grad_norm": 1.1793631077561413, + "learning_rate": 1.669880463574758e-07, + "loss": 0.2518, + "step": 54602 + }, + { + "epoch": 0.9435132706662981, + "grad_norm": 1.4855547285481092, + "learning_rate": 1.668862135994398e-07, + "loss": 0.3967, + "step": 54603 + }, + { + "epoch": 0.943530550179707, + "grad_norm": 1.0598871124754095, + "learning_rate": 1.6678441163946345e-07, + "loss": 0.3438, + "step": 54604 + }, + { + "epoch": 0.9435478296931158, + "grad_norm": 1.3825362084822974, + "learning_rate": 1.6668264047786542e-07, + "loss": 0.4301, + "step": 54605 + }, + { + "epoch": 0.9435651092065247, + "grad_norm": 1.7021148702740212, + "learning_rate": 1.665809001149643e-07, + "loss": 0.2099, + "step": 54606 + }, + { + "epoch": 0.9435823887199336, + "grad_norm": 2.551900695447482, + "learning_rate": 1.6647919055107987e-07, + "loss": 0.3248, + "step": 54607 + }, + { + "epoch": 0.9435996682333425, + "grad_norm": 1.0424548419261148, + "learning_rate": 1.663775117865296e-07, + "loss": 0.3488, + "step": 54608 + }, + { + "epoch": 0.9436169477467514, + "grad_norm": 1.3844951387909177, + "learning_rate": 1.6627586382163218e-07, + "loss": 0.3639, + "step": 54609 + }, + { + "epoch": 0.9436342272601603, + "grad_norm": 1.544578988712185, + "learning_rate": 1.6617424665670624e-07, + "loss": 0.3667, + "step": 54610 + }, + { + "epoch": 0.9436515067735692, + "grad_norm": 1.419007699060418, + "learning_rate": 1.6607266029207037e-07, + "loss": 0.2861, + "step": 54611 + }, + { + "epoch": 0.9436687862869781, + "grad_norm": 1.0186343958629556, + "learning_rate": 1.6597110472804102e-07, + "loss": 0.2717, + "step": 54612 + }, + { + "epoch": 0.943686065800387, + "grad_norm": 0.4952478388501522, + "learning_rate": 1.6586957996493792e-07, + "loss": 0.5151, + "step": 54613 + }, + { + "epoch": 0.943703345313796, + "grad_norm": 0.92362501036683, + "learning_rate": 1.657680860030797e-07, + "loss": 0.1928, + "step": 54614 + }, + { + "epoch": 0.9437206248272049, + "grad_norm": 1.3493340251791848, + "learning_rate": 1.6566662284278166e-07, + "loss": 0.1845, + "step": 54615 + }, + { + "epoch": 0.9437379043406138, + "grad_norm": 0.9192269388527708, + "learning_rate": 1.6556519048436471e-07, + "loss": 0.2207, + "step": 54616 + }, + { + "epoch": 0.9437551838540227, + "grad_norm": 1.8967220591600862, + "learning_rate": 1.6546378892814408e-07, + "loss": 0.3362, + "step": 54617 + }, + { + "epoch": 0.9437724633674316, + "grad_norm": 0.8548213870453821, + "learning_rate": 1.6536241817443844e-07, + "loss": 0.485, + "step": 54618 + }, + { + "epoch": 0.9437897428808405, + "grad_norm": 1.8256724984417996, + "learning_rate": 1.652610782235653e-07, + "loss": 0.4763, + "step": 54619 + }, + { + "epoch": 0.9438070223942494, + "grad_norm": 2.165218752067915, + "learning_rate": 1.651597690758411e-07, + "loss": 0.3568, + "step": 54620 + }, + { + "epoch": 0.9438243019076583, + "grad_norm": 1.6491330962649358, + "learning_rate": 1.6505849073158553e-07, + "loss": 0.7693, + "step": 54621 + }, + { + "epoch": 0.9438415814210672, + "grad_norm": 0.7119233258995171, + "learning_rate": 1.6495724319111284e-07, + "loss": 0.2761, + "step": 54622 + }, + { + "epoch": 0.9438588609344761, + "grad_norm": 1.0559495890816224, + "learning_rate": 1.6485602645474165e-07, + "loss": 0.4697, + "step": 54623 + }, + { + "epoch": 0.943876140447885, + "grad_norm": 0.6429166684480705, + "learning_rate": 1.6475484052278834e-07, + "loss": 0.6341, + "step": 54624 + }, + { + "epoch": 0.943893419961294, + "grad_norm": 1.1152007590970345, + "learning_rate": 1.646536853955716e-07, + "loss": 0.269, + "step": 54625 + }, + { + "epoch": 0.9439106994747027, + "grad_norm": 1.597837630709902, + "learning_rate": 1.6455256107340668e-07, + "loss": 0.3159, + "step": 54626 + }, + { + "epoch": 0.9439279789881116, + "grad_norm": 1.8869764981798147, + "learning_rate": 1.6445146755661001e-07, + "loss": 0.3823, + "step": 54627 + }, + { + "epoch": 0.9439452585015206, + "grad_norm": 2.1456317130605322, + "learning_rate": 1.6435040484549913e-07, + "loss": 0.2177, + "step": 54628 + }, + { + "epoch": 0.9439625380149295, + "grad_norm": 0.8600499625450052, + "learning_rate": 1.6424937294039046e-07, + "loss": 0.4084, + "step": 54629 + }, + { + "epoch": 0.9439798175283384, + "grad_norm": 1.4428116946696892, + "learning_rate": 1.6414837184160039e-07, + "loss": 0.5714, + "step": 54630 + }, + { + "epoch": 0.9439970970417473, + "grad_norm": 2.9239276305140978, + "learning_rate": 1.640474015494442e-07, + "loss": 0.3134, + "step": 54631 + }, + { + "epoch": 0.9440143765551562, + "grad_norm": 1.9176756594606525, + "learning_rate": 1.6394646206423947e-07, + "loss": 0.3912, + "step": 54632 + }, + { + "epoch": 0.9440316560685651, + "grad_norm": 0.8249196679292522, + "learning_rate": 1.638455533863026e-07, + "loss": 0.2026, + "step": 54633 + }, + { + "epoch": 0.944048935581974, + "grad_norm": 1.388594372588867, + "learning_rate": 1.6374467551594775e-07, + "loss": 0.5699, + "step": 54634 + }, + { + "epoch": 0.9440662150953829, + "grad_norm": 1.7137318604060547, + "learning_rate": 1.6364382845349246e-07, + "loss": 0.2188, + "step": 54635 + }, + { + "epoch": 0.9440834946087918, + "grad_norm": 1.4709131093172438, + "learning_rate": 1.6354301219925316e-07, + "loss": 0.2531, + "step": 54636 + }, + { + "epoch": 0.9441007741222007, + "grad_norm": 1.143926911100114, + "learning_rate": 1.6344222675354403e-07, + "loss": 0.3998, + "step": 54637 + }, + { + "epoch": 0.9441180536356096, + "grad_norm": 1.2809965510190033, + "learning_rate": 1.6334147211668149e-07, + "loss": 0.3884, + "step": 54638 + }, + { + "epoch": 0.9441353331490185, + "grad_norm": 1.1917033842147262, + "learning_rate": 1.6324074828898084e-07, + "loss": 0.3302, + "step": 54639 + }, + { + "epoch": 0.9441526126624274, + "grad_norm": 2.2874212420019453, + "learning_rate": 1.6314005527075737e-07, + "loss": 0.4132, + "step": 54640 + }, + { + "epoch": 0.9441698921758364, + "grad_norm": 1.5507375734873379, + "learning_rate": 1.6303939306232863e-07, + "loss": 0.4605, + "step": 54641 + }, + { + "epoch": 0.9441871716892453, + "grad_norm": 1.3404524609010684, + "learning_rate": 1.6293876166400658e-07, + "loss": 0.3832, + "step": 54642 + }, + { + "epoch": 0.9442044512026542, + "grad_norm": 0.87527764450187, + "learning_rate": 1.6283816107610874e-07, + "loss": 0.6111, + "step": 54643 + }, + { + "epoch": 0.9442217307160631, + "grad_norm": 1.5157642685645998, + "learning_rate": 1.6273759129894817e-07, + "loss": 0.4098, + "step": 54644 + }, + { + "epoch": 0.944239010229472, + "grad_norm": 1.934598250729459, + "learning_rate": 1.6263705233284245e-07, + "loss": 0.4038, + "step": 54645 + }, + { + "epoch": 0.9442562897428809, + "grad_norm": 1.6879892640061795, + "learning_rate": 1.625365441781024e-07, + "loss": 0.3298, + "step": 54646 + }, + { + "epoch": 0.9442735692562897, + "grad_norm": 1.231407204845067, + "learning_rate": 1.6243606683504887e-07, + "loss": 0.4336, + "step": 54647 + }, + { + "epoch": 0.9442908487696986, + "grad_norm": 1.2088325368754658, + "learning_rate": 1.6233562030399052e-07, + "loss": 0.5497, + "step": 54648 + }, + { + "epoch": 0.9443081282831075, + "grad_norm": 1.3802143991172962, + "learning_rate": 1.6223520458524711e-07, + "loss": 0.2819, + "step": 54649 + }, + { + "epoch": 0.9443254077965164, + "grad_norm": 0.9545846657006325, + "learning_rate": 1.6213481967912946e-07, + "loss": 0.4579, + "step": 54650 + }, + { + "epoch": 0.9443426873099253, + "grad_norm": 1.6817883713987698, + "learning_rate": 1.6203446558595293e-07, + "loss": 0.3739, + "step": 54651 + }, + { + "epoch": 0.9443599668233342, + "grad_norm": 1.3843465776796389, + "learning_rate": 1.6193414230603278e-07, + "loss": 0.3428, + "step": 54652 + }, + { + "epoch": 0.9443772463367431, + "grad_norm": 2.0252958720000667, + "learning_rate": 1.6183384983968097e-07, + "loss": 0.4989, + "step": 54653 + }, + { + "epoch": 0.944394525850152, + "grad_norm": 1.2701163152645472, + "learning_rate": 1.6173358818721509e-07, + "loss": 0.22, + "step": 54654 + }, + { + "epoch": 0.944411805363561, + "grad_norm": 1.3183562006018104, + "learning_rate": 1.6163335734894703e-07, + "loss": 0.5632, + "step": 54655 + }, + { + "epoch": 0.9444290848769699, + "grad_norm": 0.798378218170036, + "learning_rate": 1.6153315732519104e-07, + "loss": 0.1967, + "step": 54656 + }, + { + "epoch": 0.9444463643903788, + "grad_norm": 4.537866387580754, + "learning_rate": 1.614329881162602e-07, + "loss": 0.2947, + "step": 54657 + }, + { + "epoch": 0.9444636439037877, + "grad_norm": 3.2422044327619193, + "learning_rate": 1.6133284972246867e-07, + "loss": 0.6269, + "step": 54658 + }, + { + "epoch": 0.9444809234171966, + "grad_norm": 1.8723674350603692, + "learning_rate": 1.6123274214413064e-07, + "loss": 0.5073, + "step": 54659 + }, + { + "epoch": 0.9444982029306055, + "grad_norm": 1.5306609254508334, + "learning_rate": 1.6113266538156036e-07, + "loss": 0.3541, + "step": 54660 + }, + { + "epoch": 0.9445154824440144, + "grad_norm": 1.1814127052182106, + "learning_rate": 1.6103261943506977e-07, + "loss": 0.4088, + "step": 54661 + }, + { + "epoch": 0.9445327619574233, + "grad_norm": 1.0333086070611253, + "learning_rate": 1.6093260430497193e-07, + "loss": 0.2865, + "step": 54662 + }, + { + "epoch": 0.9445500414708322, + "grad_norm": 0.6150491435837381, + "learning_rate": 1.6083261999158107e-07, + "loss": 0.8591, + "step": 54663 + }, + { + "epoch": 0.9445673209842411, + "grad_norm": 1.3587505394190496, + "learning_rate": 1.6073266649521024e-07, + "loss": 0.2236, + "step": 54664 + }, + { + "epoch": 0.94458460049765, + "grad_norm": 0.8858213325579005, + "learning_rate": 1.6063274381617366e-07, + "loss": 0.7134, + "step": 54665 + }, + { + "epoch": 0.9446018800110589, + "grad_norm": 1.1995580673416464, + "learning_rate": 1.605328519547822e-07, + "loss": 0.393, + "step": 54666 + }, + { + "epoch": 0.9446191595244678, + "grad_norm": 1.1469566308372725, + "learning_rate": 1.6043299091135e-07, + "loss": 0.3886, + "step": 54667 + }, + { + "epoch": 0.9446364390378766, + "grad_norm": 1.318069443016856, + "learning_rate": 1.603331606861891e-07, + "loss": 0.3972, + "step": 54668 + }, + { + "epoch": 0.9446537185512855, + "grad_norm": 1.4492575818760285, + "learning_rate": 1.6023336127961253e-07, + "loss": 0.4701, + "step": 54669 + }, + { + "epoch": 0.9446709980646945, + "grad_norm": 1.5369740358502388, + "learning_rate": 1.601335926919334e-07, + "loss": 0.2281, + "step": 54670 + }, + { + "epoch": 0.9446882775781034, + "grad_norm": 1.7016620138069787, + "learning_rate": 1.6003385492346257e-07, + "loss": 0.37, + "step": 54671 + }, + { + "epoch": 0.9447055570915123, + "grad_norm": 1.9087307103942641, + "learning_rate": 1.5993414797451422e-07, + "loss": 0.2635, + "step": 54672 + }, + { + "epoch": 0.9447228366049212, + "grad_norm": 1.2745193339007963, + "learning_rate": 1.5983447184540036e-07, + "loss": 0.4265, + "step": 54673 + }, + { + "epoch": 0.9447401161183301, + "grad_norm": 0.6259952932008594, + "learning_rate": 1.597348265364318e-07, + "loss": 0.8542, + "step": 54674 + }, + { + "epoch": 0.944757395631739, + "grad_norm": 1.911802654449651, + "learning_rate": 1.5963521204792164e-07, + "loss": 0.471, + "step": 54675 + }, + { + "epoch": 0.9447746751451479, + "grad_norm": 1.6998866348724182, + "learning_rate": 1.59535628380183e-07, + "loss": 0.353, + "step": 54676 + }, + { + "epoch": 0.9447919546585568, + "grad_norm": 1.4649774698407296, + "learning_rate": 1.5943607553352557e-07, + "loss": 0.141, + "step": 54677 + }, + { + "epoch": 0.9448092341719657, + "grad_norm": 1.9367339249005966, + "learning_rate": 1.593365535082625e-07, + "loss": 0.4454, + "step": 54678 + }, + { + "epoch": 0.9448265136853746, + "grad_norm": 1.1858018203289047, + "learning_rate": 1.592370623047046e-07, + "loss": 0.3664, + "step": 54679 + }, + { + "epoch": 0.9448437931987835, + "grad_norm": 1.2605694444542084, + "learning_rate": 1.5913760192316497e-07, + "loss": 0.3933, + "step": 54680 + }, + { + "epoch": 0.9448610727121924, + "grad_norm": 0.9157624906724448, + "learning_rate": 1.5903817236395448e-07, + "loss": 0.324, + "step": 54681 + }, + { + "epoch": 0.9448783522256013, + "grad_norm": 1.4298698807506824, + "learning_rate": 1.5893877362738398e-07, + "loss": 0.5485, + "step": 54682 + }, + { + "epoch": 0.9448956317390103, + "grad_norm": 1.7661749498242296, + "learning_rate": 1.5883940571376544e-07, + "loss": 0.3113, + "step": 54683 + }, + { + "epoch": 0.9449129112524192, + "grad_norm": 1.3313987558984028, + "learning_rate": 1.5874006862340864e-07, + "loss": 0.2349, + "step": 54684 + }, + { + "epoch": 0.9449301907658281, + "grad_norm": 1.6724454522827097, + "learning_rate": 1.5864076235662663e-07, + "loss": 0.5096, + "step": 54685 + }, + { + "epoch": 0.944947470279237, + "grad_norm": 1.4348859004216246, + "learning_rate": 1.5854148691372917e-07, + "loss": 0.4465, + "step": 54686 + }, + { + "epoch": 0.9449647497926459, + "grad_norm": 0.8605009974981448, + "learning_rate": 1.5844224229502826e-07, + "loss": 0.2071, + "step": 54687 + }, + { + "epoch": 0.9449820293060548, + "grad_norm": 2.1458586970672386, + "learning_rate": 1.5834302850083361e-07, + "loss": 0.4612, + "step": 54688 + }, + { + "epoch": 0.9449993088194636, + "grad_norm": 1.375543659959539, + "learning_rate": 1.582438455314572e-07, + "loss": 0.246, + "step": 54689 + }, + { + "epoch": 0.9450165883328725, + "grad_norm": 1.8305132749973854, + "learning_rate": 1.5814469338720996e-07, + "loss": 0.3434, + "step": 54690 + }, + { + "epoch": 0.9450338678462814, + "grad_norm": 1.2807837202383168, + "learning_rate": 1.5804557206839933e-07, + "loss": 0.1691, + "step": 54691 + }, + { + "epoch": 0.9450511473596903, + "grad_norm": 1.3176214061786922, + "learning_rate": 1.5794648157533842e-07, + "loss": 0.3152, + "step": 54692 + }, + { + "epoch": 0.9450684268730992, + "grad_norm": 1.0829970120975723, + "learning_rate": 1.5784742190833814e-07, + "loss": 0.4306, + "step": 54693 + }, + { + "epoch": 0.9450857063865081, + "grad_norm": 0.8242329413191458, + "learning_rate": 1.5774839306770706e-07, + "loss": 0.255, + "step": 54694 + }, + { + "epoch": 0.945102985899917, + "grad_norm": 1.318095905062719, + "learning_rate": 1.576493950537561e-07, + "loss": 0.3049, + "step": 54695 + }, + { + "epoch": 0.9451202654133259, + "grad_norm": 1.4022561393085016, + "learning_rate": 1.575504278667961e-07, + "loss": 0.3755, + "step": 54696 + }, + { + "epoch": 0.9451375449267349, + "grad_norm": 1.1275562743284162, + "learning_rate": 1.5745149150713457e-07, + "loss": 0.3468, + "step": 54697 + }, + { + "epoch": 0.9451548244401438, + "grad_norm": 1.3332508512885306, + "learning_rate": 1.5735258597508352e-07, + "loss": 0.2868, + "step": 54698 + }, + { + "epoch": 0.9451721039535527, + "grad_norm": 1.9337857672491274, + "learning_rate": 1.5725371127095157e-07, + "loss": 0.3816, + "step": 54699 + }, + { + "epoch": 0.9451893834669616, + "grad_norm": 1.0416237729846014, + "learning_rate": 1.5715486739505071e-07, + "loss": 0.5614, + "step": 54700 + }, + { + "epoch": 0.9452066629803705, + "grad_norm": 1.2765339117367078, + "learning_rate": 1.5705605434768845e-07, + "loss": 0.3343, + "step": 54701 + }, + { + "epoch": 0.9452239424937794, + "grad_norm": 0.9407192714291475, + "learning_rate": 1.5695727212917345e-07, + "loss": 0.8106, + "step": 54702 + }, + { + "epoch": 0.9452412220071883, + "grad_norm": 1.3813284654301288, + "learning_rate": 1.5685852073981654e-07, + "loss": 0.3546, + "step": 54703 + }, + { + "epoch": 0.9452585015205972, + "grad_norm": 1.6785025882889, + "learning_rate": 1.5675980017992753e-07, + "loss": 0.3359, + "step": 54704 + }, + { + "epoch": 0.9452757810340061, + "grad_norm": 1.604648614009478, + "learning_rate": 1.5666111044981502e-07, + "loss": 0.4742, + "step": 54705 + }, + { + "epoch": 0.945293060547415, + "grad_norm": 1.3768899410075026, + "learning_rate": 1.5656245154978655e-07, + "loss": 0.582, + "step": 54706 + }, + { + "epoch": 0.9453103400608239, + "grad_norm": 1.1153590792059973, + "learning_rate": 1.564638234801541e-07, + "loss": 0.2343, + "step": 54707 + }, + { + "epoch": 0.9453276195742328, + "grad_norm": 1.10095399048775, + "learning_rate": 1.563652262412252e-07, + "loss": 0.3178, + "step": 54708 + }, + { + "epoch": 0.9453448990876417, + "grad_norm": 1.4536828367115733, + "learning_rate": 1.5626665983330735e-07, + "loss": 0.2318, + "step": 54709 + }, + { + "epoch": 0.9453621786010505, + "grad_norm": 1.0659558301745644, + "learning_rate": 1.5616812425671035e-07, + "loss": 0.4965, + "step": 54710 + }, + { + "epoch": 0.9453794581144594, + "grad_norm": 0.9555492526719905, + "learning_rate": 1.5606961951174394e-07, + "loss": 0.2482, + "step": 54711 + }, + { + "epoch": 0.9453967376278684, + "grad_norm": 1.0547080605266423, + "learning_rate": 1.5597114559871561e-07, + "loss": 0.3025, + "step": 54712 + }, + { + "epoch": 0.9454140171412773, + "grad_norm": 1.5082992970491587, + "learning_rate": 1.5587270251793297e-07, + "loss": 0.455, + "step": 54713 + }, + { + "epoch": 0.9454312966546862, + "grad_norm": 2.689046985299175, + "learning_rate": 1.5577429026970458e-07, + "loss": 0.5415, + "step": 54714 + }, + { + "epoch": 0.9454485761680951, + "grad_norm": 1.2216000117464356, + "learning_rate": 1.5567590885434026e-07, + "loss": 0.2782, + "step": 54715 + }, + { + "epoch": 0.945465855681504, + "grad_norm": 1.2190953070535093, + "learning_rate": 1.5557755827214748e-07, + "loss": 0.4242, + "step": 54716 + }, + { + "epoch": 0.9454831351949129, + "grad_norm": 1.5583870066483445, + "learning_rate": 1.5547923852343272e-07, + "loss": 0.1727, + "step": 54717 + }, + { + "epoch": 0.9455004147083218, + "grad_norm": 1.7805397135519403, + "learning_rate": 1.553809496085068e-07, + "loss": 0.3377, + "step": 54718 + }, + { + "epoch": 0.9455176942217307, + "grad_norm": 1.8541819778414128, + "learning_rate": 1.5528269152767394e-07, + "loss": 0.3924, + "step": 54719 + }, + { + "epoch": 0.9455349737351396, + "grad_norm": 2.4916038533186384, + "learning_rate": 1.5518446428124502e-07, + "loss": 0.3719, + "step": 54720 + }, + { + "epoch": 0.9455522532485485, + "grad_norm": 1.797305568934991, + "learning_rate": 1.5508626786952642e-07, + "loss": 0.3045, + "step": 54721 + }, + { + "epoch": 0.9455695327619574, + "grad_norm": 1.1133884359466846, + "learning_rate": 1.5498810229282568e-07, + "loss": 0.8111, + "step": 54722 + }, + { + "epoch": 0.9455868122753663, + "grad_norm": 0.777842942030717, + "learning_rate": 1.5488996755145148e-07, + "loss": 0.2757, + "step": 54723 + }, + { + "epoch": 0.9456040917887752, + "grad_norm": 1.201229529562788, + "learning_rate": 1.5479186364570797e-07, + "loss": 0.2708, + "step": 54724 + }, + { + "epoch": 0.9456213713021842, + "grad_norm": 1.6363842965174944, + "learning_rate": 1.5469379057590607e-07, + "loss": 0.3618, + "step": 54725 + }, + { + "epoch": 0.9456386508155931, + "grad_norm": 1.4198208931054181, + "learning_rate": 1.5459574834235103e-07, + "loss": 0.3148, + "step": 54726 + }, + { + "epoch": 0.945655930329002, + "grad_norm": 1.0638721913309679, + "learning_rate": 1.5449773694535042e-07, + "loss": 0.3219, + "step": 54727 + }, + { + "epoch": 0.9456732098424109, + "grad_norm": 1.7696550905278783, + "learning_rate": 1.5439975638521065e-07, + "loss": 0.4896, + "step": 54728 + }, + { + "epoch": 0.9456904893558198, + "grad_norm": 1.3649471455884017, + "learning_rate": 1.5430180666224036e-07, + "loss": 0.4939, + "step": 54729 + }, + { + "epoch": 0.9457077688692287, + "grad_norm": 1.0041306651989041, + "learning_rate": 1.5420388777674378e-07, + "loss": 0.2694, + "step": 54730 + }, + { + "epoch": 0.9457250483826375, + "grad_norm": 1.1518039326097307, + "learning_rate": 1.5410599972902952e-07, + "loss": 0.4558, + "step": 54731 + }, + { + "epoch": 0.9457423278960464, + "grad_norm": 1.551052626201507, + "learning_rate": 1.5400814251940288e-07, + "loss": 0.5023, + "step": 54732 + }, + { + "epoch": 0.9457596074094553, + "grad_norm": 1.4632853740177756, + "learning_rate": 1.5391031614817143e-07, + "loss": 0.3123, + "step": 54733 + }, + { + "epoch": 0.9457768869228642, + "grad_norm": 1.618765736670113, + "learning_rate": 1.5381252061564155e-07, + "loss": 0.2856, + "step": 54734 + }, + { + "epoch": 0.9457941664362731, + "grad_norm": 1.717488640157746, + "learning_rate": 1.537147559221186e-07, + "loss": 0.3073, + "step": 54735 + }, + { + "epoch": 0.945811445949682, + "grad_norm": 0.8366865294250071, + "learning_rate": 1.5361702206791006e-07, + "loss": 0.1739, + "step": 54736 + }, + { + "epoch": 0.9458287254630909, + "grad_norm": 1.0862613485871637, + "learning_rate": 1.5351931905332017e-07, + "loss": 0.2739, + "step": 54737 + }, + { + "epoch": 0.9458460049764998, + "grad_norm": 2.3179217587237013, + "learning_rate": 1.5342164687865647e-07, + "loss": 0.2702, + "step": 54738 + }, + { + "epoch": 0.9458632844899088, + "grad_norm": 1.235961238515378, + "learning_rate": 1.5332400554422422e-07, + "loss": 0.3504, + "step": 54739 + }, + { + "epoch": 0.9458805640033177, + "grad_norm": 1.6189061290578222, + "learning_rate": 1.5322639505032988e-07, + "loss": 0.3545, + "step": 54740 + }, + { + "epoch": 0.9458978435167266, + "grad_norm": 1.1846328146064837, + "learning_rate": 1.5312881539727874e-07, + "loss": 0.2862, + "step": 54741 + }, + { + "epoch": 0.9459151230301355, + "grad_norm": 0.9967054847407332, + "learning_rate": 1.5303126658537727e-07, + "loss": 0.3408, + "step": 54742 + }, + { + "epoch": 0.9459324025435444, + "grad_norm": 1.6483183370798005, + "learning_rate": 1.529337486149296e-07, + "loss": 0.3505, + "step": 54743 + }, + { + "epoch": 0.9459496820569533, + "grad_norm": 1.1219857095233674, + "learning_rate": 1.528362614862411e-07, + "loss": 0.8632, + "step": 54744 + }, + { + "epoch": 0.9459669615703622, + "grad_norm": 1.562008496095472, + "learning_rate": 1.5273880519961815e-07, + "loss": 0.4541, + "step": 54745 + }, + { + "epoch": 0.9459842410837711, + "grad_norm": 0.8461174893566875, + "learning_rate": 1.5264137975536608e-07, + "loss": 0.2274, + "step": 54746 + }, + { + "epoch": 0.94600152059718, + "grad_norm": 1.901664395347855, + "learning_rate": 1.525439851537902e-07, + "loss": 0.5092, + "step": 54747 + }, + { + "epoch": 0.9460188001105889, + "grad_norm": 1.8936361186988293, + "learning_rate": 1.5244662139519361e-07, + "loss": 0.3167, + "step": 54748 + }, + { + "epoch": 0.9460360796239978, + "grad_norm": 1.2161033626615572, + "learning_rate": 1.5234928847988273e-07, + "loss": 0.4579, + "step": 54749 + }, + { + "epoch": 0.9460533591374067, + "grad_norm": 1.3440447545655485, + "learning_rate": 1.5225198640816173e-07, + "loss": 0.4057, + "step": 54750 + }, + { + "epoch": 0.9460706386508156, + "grad_norm": 1.5561151716323127, + "learning_rate": 1.5215471518033818e-07, + "loss": 0.4125, + "step": 54751 + }, + { + "epoch": 0.9460879181642246, + "grad_norm": 1.8905204419299277, + "learning_rate": 1.5205747479671297e-07, + "loss": 0.3289, + "step": 54752 + }, + { + "epoch": 0.9461051976776333, + "grad_norm": 1.4329401326208946, + "learning_rate": 1.5196026525759243e-07, + "loss": 0.3035, + "step": 54753 + }, + { + "epoch": 0.9461224771910423, + "grad_norm": 1.4249614147977343, + "learning_rate": 1.5186308656327975e-07, + "loss": 0.2257, + "step": 54754 + }, + { + "epoch": 0.9461397567044512, + "grad_norm": 1.4019851187975025, + "learning_rate": 1.517659387140813e-07, + "loss": 0.2972, + "step": 54755 + }, + { + "epoch": 0.9461570362178601, + "grad_norm": 1.56409815786699, + "learning_rate": 1.5166882171030017e-07, + "loss": 0.3934, + "step": 54756 + }, + { + "epoch": 0.946174315731269, + "grad_norm": 0.8833313026140549, + "learning_rate": 1.515717355522406e-07, + "loss": 0.3169, + "step": 54757 + }, + { + "epoch": 0.9461915952446779, + "grad_norm": 2.2569478302202266, + "learning_rate": 1.5147468024020783e-07, + "loss": 0.4672, + "step": 54758 + }, + { + "epoch": 0.9462088747580868, + "grad_norm": 1.3156731618870448, + "learning_rate": 1.513776557745039e-07, + "loss": 0.1853, + "step": 54759 + }, + { + "epoch": 0.9462261542714957, + "grad_norm": 1.5888412927493867, + "learning_rate": 1.5128066215543302e-07, + "loss": 0.2421, + "step": 54760 + }, + { + "epoch": 0.9462434337849046, + "grad_norm": 1.303404072700037, + "learning_rate": 1.5118369938330047e-07, + "loss": 0.1772, + "step": 54761 + }, + { + "epoch": 0.9462607132983135, + "grad_norm": 3.187179168029357, + "learning_rate": 1.5108676745840934e-07, + "loss": 0.46, + "step": 54762 + }, + { + "epoch": 0.9462779928117224, + "grad_norm": 1.7933112249117198, + "learning_rate": 1.5098986638106273e-07, + "loss": 0.3902, + "step": 54763 + }, + { + "epoch": 0.9462952723251313, + "grad_norm": 1.721687676745316, + "learning_rate": 1.5089299615156372e-07, + "loss": 0.402, + "step": 54764 + }, + { + "epoch": 0.9463125518385402, + "grad_norm": 2.985586348969196, + "learning_rate": 1.5079615677021763e-07, + "loss": 0.4766, + "step": 54765 + }, + { + "epoch": 0.9463298313519491, + "grad_norm": 1.6052425662659915, + "learning_rate": 1.5069934823732424e-07, + "loss": 0.3365, + "step": 54766 + }, + { + "epoch": 0.946347110865358, + "grad_norm": 1.164020778767124, + "learning_rate": 1.5060257055319104e-07, + "loss": 0.5636, + "step": 54767 + }, + { + "epoch": 0.946364390378767, + "grad_norm": 0.6855436322425258, + "learning_rate": 1.5050582371811785e-07, + "loss": 0.7379, + "step": 54768 + }, + { + "epoch": 0.9463816698921759, + "grad_norm": 0.8568327210566123, + "learning_rate": 1.504091077324099e-07, + "loss": 0.4778, + "step": 54769 + }, + { + "epoch": 0.9463989494055848, + "grad_norm": 1.2279038647576008, + "learning_rate": 1.5031242259636924e-07, + "loss": 0.2996, + "step": 54770 + }, + { + "epoch": 0.9464162289189937, + "grad_norm": 0.6621122617757055, + "learning_rate": 1.5021576831029893e-07, + "loss": 0.3137, + "step": 54771 + }, + { + "epoch": 0.9464335084324026, + "grad_norm": 1.1071088093951285, + "learning_rate": 1.5011914487449875e-07, + "loss": 0.3014, + "step": 54772 + }, + { + "epoch": 0.9464507879458115, + "grad_norm": 1.0770838943147276, + "learning_rate": 1.500225522892762e-07, + "loss": 0.2694, + "step": 54773 + }, + { + "epoch": 0.9464680674592203, + "grad_norm": 2.478072499611344, + "learning_rate": 1.4992599055493218e-07, + "loss": 0.3577, + "step": 54774 + }, + { + "epoch": 0.9464853469726292, + "grad_norm": 1.9846397682573953, + "learning_rate": 1.4982945967176755e-07, + "loss": 0.3387, + "step": 54775 + }, + { + "epoch": 0.9465026264860381, + "grad_norm": 1.3883324813891003, + "learning_rate": 1.4973295964008648e-07, + "loss": 0.4167, + "step": 54776 + }, + { + "epoch": 0.946519905999447, + "grad_norm": 1.4163018114742727, + "learning_rate": 1.496364904601888e-07, + "loss": 0.2479, + "step": 54777 + }, + { + "epoch": 0.9465371855128559, + "grad_norm": 1.3545822215217276, + "learning_rate": 1.4954005213237865e-07, + "loss": 0.2605, + "step": 54778 + }, + { + "epoch": 0.9465544650262648, + "grad_norm": 1.0069892862344694, + "learning_rate": 1.4944364465695804e-07, + "loss": 0.4213, + "step": 54779 + }, + { + "epoch": 0.9465717445396737, + "grad_norm": 0.9215748126671602, + "learning_rate": 1.4934726803422893e-07, + "loss": 0.2009, + "step": 54780 + }, + { + "epoch": 0.9465890240530827, + "grad_norm": 2.05916804229048, + "learning_rate": 1.4925092226449223e-07, + "loss": 0.325, + "step": 54781 + }, + { + "epoch": 0.9466063035664916, + "grad_norm": 1.4364720346685145, + "learning_rate": 1.49154607348051e-07, + "loss": 0.2947, + "step": 54782 + }, + { + "epoch": 0.9466235830799005, + "grad_norm": 2.0480706791901757, + "learning_rate": 1.4905832328520498e-07, + "loss": 0.2996, + "step": 54783 + }, + { + "epoch": 0.9466408625933094, + "grad_norm": 1.1060240488953146, + "learning_rate": 1.489620700762573e-07, + "loss": 0.2135, + "step": 54784 + }, + { + "epoch": 0.9466581421067183, + "grad_norm": 2.470270748558319, + "learning_rate": 1.4886584772150992e-07, + "loss": 0.5146, + "step": 54785 + }, + { + "epoch": 0.9466754216201272, + "grad_norm": 1.7290287305117964, + "learning_rate": 1.487696562212626e-07, + "loss": 0.3322, + "step": 54786 + }, + { + "epoch": 0.9466927011335361, + "grad_norm": 0.8917634649695293, + "learning_rate": 1.4867349557581844e-07, + "loss": 0.2814, + "step": 54787 + }, + { + "epoch": 0.946709980646945, + "grad_norm": 1.1777058204971893, + "learning_rate": 1.485773657854761e-07, + "loss": 0.3846, + "step": 54788 + }, + { + "epoch": 0.9467272601603539, + "grad_norm": 1.5132223163402547, + "learning_rate": 1.4848126685053866e-07, + "loss": 0.2144, + "step": 54789 + }, + { + "epoch": 0.9467445396737628, + "grad_norm": 1.2228845924880025, + "learning_rate": 1.4838519877130698e-07, + "loss": 0.3101, + "step": 54790 + }, + { + "epoch": 0.9467618191871717, + "grad_norm": 1.1440707716913463, + "learning_rate": 1.482891615480808e-07, + "loss": 0.4126, + "step": 54791 + }, + { + "epoch": 0.9467790987005806, + "grad_norm": 1.2791543625333879, + "learning_rate": 1.4819315518116218e-07, + "loss": 0.3779, + "step": 54792 + }, + { + "epoch": 0.9467963782139895, + "grad_norm": 1.4811123390200893, + "learning_rate": 1.4809717967085191e-07, + "loss": 0.342, + "step": 54793 + }, + { + "epoch": 0.9468136577273984, + "grad_norm": 1.2444443511437355, + "learning_rate": 1.4800123501744868e-07, + "loss": 0.2753, + "step": 54794 + }, + { + "epoch": 0.9468309372408072, + "grad_norm": 1.4426762341207426, + "learning_rate": 1.4790532122125446e-07, + "loss": 0.285, + "step": 54795 + }, + { + "epoch": 0.9468482167542162, + "grad_norm": 1.0162979100068836, + "learning_rate": 1.4780943828257122e-07, + "loss": 0.4776, + "step": 54796 + }, + { + "epoch": 0.9468654962676251, + "grad_norm": 1.814858283151651, + "learning_rate": 1.477135862016954e-07, + "loss": 0.3749, + "step": 54797 + }, + { + "epoch": 0.946882775781034, + "grad_norm": 1.0495828435742085, + "learning_rate": 1.476177649789312e-07, + "loss": 0.2873, + "step": 54798 + }, + { + "epoch": 0.9469000552944429, + "grad_norm": 2.612697186487758, + "learning_rate": 1.475219746145762e-07, + "loss": 0.4181, + "step": 54799 + }, + { + "epoch": 0.9469173348078518, + "grad_norm": 1.132273399462811, + "learning_rate": 1.4742621510893008e-07, + "loss": 0.4074, + "step": 54800 + }, + { + "epoch": 0.9469346143212607, + "grad_norm": 1.0576780768916585, + "learning_rate": 1.4733048646229486e-07, + "loss": 0.2921, + "step": 54801 + }, + { + "epoch": 0.9469518938346696, + "grad_norm": 1.1635199262328217, + "learning_rate": 1.4723478867496922e-07, + "loss": 0.3636, + "step": 54802 + }, + { + "epoch": 0.9469691733480785, + "grad_norm": 1.4598511923260011, + "learning_rate": 1.4713912174725286e-07, + "loss": 0.4496, + "step": 54803 + }, + { + "epoch": 0.9469864528614874, + "grad_norm": 1.8409953646078359, + "learning_rate": 1.470434856794456e-07, + "loss": 0.4436, + "step": 54804 + }, + { + "epoch": 0.9470037323748963, + "grad_norm": 1.1160085535998265, + "learning_rate": 1.4694788047184827e-07, + "loss": 0.2299, + "step": 54805 + }, + { + "epoch": 0.9470210118883052, + "grad_norm": 1.5831635197192195, + "learning_rate": 1.468523061247562e-07, + "loss": 0.2526, + "step": 54806 + }, + { + "epoch": 0.9470382914017141, + "grad_norm": 1.74210515333634, + "learning_rate": 1.4675676263847362e-07, + "loss": 0.1903, + "step": 54807 + }, + { + "epoch": 0.947055570915123, + "grad_norm": 1.3244869306220495, + "learning_rate": 1.466612500132969e-07, + "loss": 0.229, + "step": 54808 + }, + { + "epoch": 0.947072850428532, + "grad_norm": 1.4982684346469621, + "learning_rate": 1.4656576824952696e-07, + "loss": 0.4195, + "step": 54809 + }, + { + "epoch": 0.9470901299419409, + "grad_norm": 1.2740601133056462, + "learning_rate": 1.464703173474602e-07, + "loss": 0.3368, + "step": 54810 + }, + { + "epoch": 0.9471074094553498, + "grad_norm": 2.1430127889095854, + "learning_rate": 1.4637489730739862e-07, + "loss": 0.3464, + "step": 54811 + }, + { + "epoch": 0.9471246889687587, + "grad_norm": 1.7208511340279022, + "learning_rate": 1.4627950812963866e-07, + "loss": 0.4338, + "step": 54812 + }, + { + "epoch": 0.9471419684821676, + "grad_norm": 1.1388964853644354, + "learning_rate": 1.4618414981448115e-07, + "loss": 0.4419, + "step": 54813 + }, + { + "epoch": 0.9471592479955765, + "grad_norm": 1.2201545958055238, + "learning_rate": 1.4608882236222366e-07, + "loss": 0.4149, + "step": 54814 + }, + { + "epoch": 0.9471765275089854, + "grad_norm": 1.7375282854271246, + "learning_rate": 1.4599352577316373e-07, + "loss": 0.4083, + "step": 54815 + }, + { + "epoch": 0.9471938070223942, + "grad_norm": 1.7005623379601216, + "learning_rate": 1.458982600476022e-07, + "loss": 0.4729, + "step": 54816 + }, + { + "epoch": 0.9472110865358031, + "grad_norm": 1.0437385937639247, + "learning_rate": 1.4580302518583554e-07, + "loss": 0.12, + "step": 54817 + }, + { + "epoch": 0.947228366049212, + "grad_norm": 1.805427284689683, + "learning_rate": 1.457078211881624e-07, + "loss": 0.3371, + "step": 54818 + }, + { + "epoch": 0.9472456455626209, + "grad_norm": 1.9130188479830184, + "learning_rate": 1.456126480548814e-07, + "loss": 0.2296, + "step": 54819 + }, + { + "epoch": 0.9472629250760298, + "grad_norm": 1.6921624651603513, + "learning_rate": 1.4551750578629232e-07, + "loss": 0.3142, + "step": 54820 + }, + { + "epoch": 0.9472802045894387, + "grad_norm": 1.4804657042807665, + "learning_rate": 1.4542239438268934e-07, + "loss": 0.2457, + "step": 54821 + }, + { + "epoch": 0.9472974841028476, + "grad_norm": 1.529079922322609, + "learning_rate": 1.4532731384437338e-07, + "loss": 0.3976, + "step": 54822 + }, + { + "epoch": 0.9473147636162565, + "grad_norm": 1.3682474424467572, + "learning_rate": 1.452322641716397e-07, + "loss": 0.3968, + "step": 54823 + }, + { + "epoch": 0.9473320431296655, + "grad_norm": 1.2614847262019346, + "learning_rate": 1.4513724536478813e-07, + "loss": 0.4327, + "step": 54824 + }, + { + "epoch": 0.9473493226430744, + "grad_norm": 1.5606107017084634, + "learning_rate": 1.4504225742411727e-07, + "loss": 0.5368, + "step": 54825 + }, + { + "epoch": 0.9473666021564833, + "grad_norm": 1.600930071189577, + "learning_rate": 1.4494730034992134e-07, + "loss": 0.3325, + "step": 54826 + }, + { + "epoch": 0.9473838816698922, + "grad_norm": 1.3418131647289813, + "learning_rate": 1.448523741425001e-07, + "loss": 0.4097, + "step": 54827 + }, + { + "epoch": 0.9474011611833011, + "grad_norm": 0.686617988169335, + "learning_rate": 1.4475747880214885e-07, + "loss": 0.7122, + "step": 54828 + }, + { + "epoch": 0.94741844069671, + "grad_norm": 0.8924016155767941, + "learning_rate": 1.4466261432916739e-07, + "loss": 0.2925, + "step": 54829 + }, + { + "epoch": 0.9474357202101189, + "grad_norm": 2.1783053019807967, + "learning_rate": 1.4456778072385103e-07, + "loss": 0.3273, + "step": 54830 + }, + { + "epoch": 0.9474529997235278, + "grad_norm": 1.6707968746646233, + "learning_rate": 1.444729779864984e-07, + "loss": 0.4216, + "step": 54831 + }, + { + "epoch": 0.9474702792369367, + "grad_norm": 1.809499068880989, + "learning_rate": 1.4437820611740372e-07, + "loss": 0.5185, + "step": 54832 + }, + { + "epoch": 0.9474875587503456, + "grad_norm": 1.264191106636213, + "learning_rate": 1.4428346511686785e-07, + "loss": 0.3609, + "step": 54833 + }, + { + "epoch": 0.9475048382637545, + "grad_norm": 1.1386972740822099, + "learning_rate": 1.441887549851828e-07, + "loss": 0.2917, + "step": 54834 + }, + { + "epoch": 0.9475221177771634, + "grad_norm": 1.5315277538960963, + "learning_rate": 1.4409407572264833e-07, + "loss": 0.443, + "step": 54835 + }, + { + "epoch": 0.9475393972905723, + "grad_norm": 1.0871437181150474, + "learning_rate": 1.4399942732956086e-07, + "loss": 0.4384, + "step": 54836 + }, + { + "epoch": 0.9475566768039811, + "grad_norm": 1.416962563392819, + "learning_rate": 1.4390480980621569e-07, + "loss": 0.4467, + "step": 54837 + }, + { + "epoch": 0.94757395631739, + "grad_norm": 0.7699077312206386, + "learning_rate": 1.438102231529104e-07, + "loss": 0.1965, + "step": 54838 + }, + { + "epoch": 0.947591235830799, + "grad_norm": 1.1341865456041984, + "learning_rate": 1.4371566736994026e-07, + "loss": 0.1644, + "step": 54839 + }, + { + "epoch": 0.9476085153442079, + "grad_norm": 1.318762982986091, + "learning_rate": 1.4362114245760063e-07, + "loss": 0.5352, + "step": 54840 + }, + { + "epoch": 0.9476257948576168, + "grad_norm": 1.5871317757871493, + "learning_rate": 1.43526648416189e-07, + "loss": 0.4542, + "step": 54841 + }, + { + "epoch": 0.9476430743710257, + "grad_norm": 1.0511167475057441, + "learning_rate": 1.434321852460019e-07, + "loss": 0.316, + "step": 54842 + }, + { + "epoch": 0.9476603538844346, + "grad_norm": 1.5552705126738238, + "learning_rate": 1.4333775294733455e-07, + "loss": 0.3327, + "step": 54843 + }, + { + "epoch": 0.9476776333978435, + "grad_norm": 1.2862420092499836, + "learning_rate": 1.432433515204812e-07, + "loss": 0.3716, + "step": 54844 + }, + { + "epoch": 0.9476949129112524, + "grad_norm": 1.5131684610314988, + "learning_rate": 1.4314898096574047e-07, + "loss": 0.4624, + "step": 54845 + }, + { + "epoch": 0.9477121924246613, + "grad_norm": 1.5539493724381988, + "learning_rate": 1.430546412834033e-07, + "loss": 0.2601, + "step": 54846 + }, + { + "epoch": 0.9477294719380702, + "grad_norm": 1.0182579257083046, + "learning_rate": 1.429603324737705e-07, + "loss": 0.3829, + "step": 54847 + }, + { + "epoch": 0.9477467514514791, + "grad_norm": 1.2229048657209505, + "learning_rate": 1.428660545371341e-07, + "loss": 0.3301, + "step": 54848 + }, + { + "epoch": 0.947764030964888, + "grad_norm": 1.2392314200190213, + "learning_rate": 1.427718074737916e-07, + "loss": 0.4188, + "step": 54849 + }, + { + "epoch": 0.947781310478297, + "grad_norm": 0.8369912048251656, + "learning_rate": 1.4267759128403503e-07, + "loss": 0.7518, + "step": 54850 + }, + { + "epoch": 0.9477985899917059, + "grad_norm": 0.9250670133933093, + "learning_rate": 1.4258340596816301e-07, + "loss": 0.5564, + "step": 54851 + }, + { + "epoch": 0.9478158695051148, + "grad_norm": 0.9123029686233695, + "learning_rate": 1.4248925152646643e-07, + "loss": 0.5743, + "step": 54852 + }, + { + "epoch": 0.9478331490185237, + "grad_norm": 0.9914228865205231, + "learning_rate": 1.4239512795924505e-07, + "loss": 0.2898, + "step": 54853 + }, + { + "epoch": 0.9478504285319326, + "grad_norm": 2.633920170358726, + "learning_rate": 1.4230103526679083e-07, + "loss": 0.3068, + "step": 54854 + }, + { + "epoch": 0.9478677080453415, + "grad_norm": 1.3366589934772464, + "learning_rate": 1.4220697344939916e-07, + "loss": 0.3604, + "step": 54855 + }, + { + "epoch": 0.9478849875587504, + "grad_norm": 1.5448176263698898, + "learning_rate": 1.4211294250736418e-07, + "loss": 0.3116, + "step": 54856 + }, + { + "epoch": 0.9479022670721593, + "grad_norm": 1.091559860599993, + "learning_rate": 1.4201894244098015e-07, + "loss": 0.3237, + "step": 54857 + }, + { + "epoch": 0.9479195465855681, + "grad_norm": 1.8167298512594428, + "learning_rate": 1.4192497325054123e-07, + "loss": 0.4186, + "step": 54858 + }, + { + "epoch": 0.947936826098977, + "grad_norm": 1.5507554379184554, + "learning_rate": 1.4183103493634388e-07, + "loss": 0.3343, + "step": 54859 + }, + { + "epoch": 0.9479541056123859, + "grad_norm": 0.9882038736860921, + "learning_rate": 1.4173712749868118e-07, + "loss": 0.3207, + "step": 54860 + }, + { + "epoch": 0.9479713851257948, + "grad_norm": 1.7043274910386077, + "learning_rate": 1.4164325093784515e-07, + "loss": 0.5034, + "step": 54861 + }, + { + "epoch": 0.9479886646392037, + "grad_norm": 1.231056204665475, + "learning_rate": 1.415494052541333e-07, + "loss": 0.4329, + "step": 54862 + }, + { + "epoch": 0.9480059441526126, + "grad_norm": 2.9112848064986445, + "learning_rate": 1.4145559044783764e-07, + "loss": 0.347, + "step": 54863 + }, + { + "epoch": 0.9480232236660215, + "grad_norm": 0.9309593700987137, + "learning_rate": 1.4136180651925235e-07, + "loss": 0.3136, + "step": 54864 + }, + { + "epoch": 0.9480405031794304, + "grad_norm": 1.453974267755203, + "learning_rate": 1.4126805346867055e-07, + "loss": 0.1983, + "step": 54865 + }, + { + "epoch": 0.9480577826928394, + "grad_norm": 1.034080188206258, + "learning_rate": 1.4117433129638646e-07, + "loss": 0.2657, + "step": 54866 + }, + { + "epoch": 0.9480750622062483, + "grad_norm": 1.1901603400819154, + "learning_rate": 1.410806400026943e-07, + "loss": 0.3163, + "step": 54867 + }, + { + "epoch": 0.9480923417196572, + "grad_norm": 1.94824989133274, + "learning_rate": 1.409869795878871e-07, + "loss": 0.2404, + "step": 54868 + }, + { + "epoch": 0.9481096212330661, + "grad_norm": 1.0402625902751188, + "learning_rate": 1.408933500522569e-07, + "loss": 0.3402, + "step": 54869 + }, + { + "epoch": 0.948126900746475, + "grad_norm": 1.9340769691772368, + "learning_rate": 1.4079975139609792e-07, + "loss": 0.227, + "step": 54870 + }, + { + "epoch": 0.9481441802598839, + "grad_norm": 0.8994811569150859, + "learning_rate": 1.4070618361970433e-07, + "loss": 0.2279, + "step": 54871 + }, + { + "epoch": 0.9481614597732928, + "grad_norm": 0.6582570340381603, + "learning_rate": 1.4061264672336704e-07, + "loss": 0.4547, + "step": 54872 + }, + { + "epoch": 0.9481787392867017, + "grad_norm": 1.5106392951698164, + "learning_rate": 1.4051914070738138e-07, + "loss": 0.2581, + "step": 54873 + }, + { + "epoch": 0.9481960188001106, + "grad_norm": 0.92678516065406, + "learning_rate": 1.404256655720382e-07, + "loss": 0.2678, + "step": 54874 + }, + { + "epoch": 0.9482132983135195, + "grad_norm": 1.2044134501050965, + "learning_rate": 1.403322213176317e-07, + "loss": 0.2932, + "step": 54875 + }, + { + "epoch": 0.9482305778269284, + "grad_norm": 0.7889295815285017, + "learning_rate": 1.402388079444539e-07, + "loss": 0.4814, + "step": 54876 + }, + { + "epoch": 0.9482478573403373, + "grad_norm": 1.6128076203102062, + "learning_rate": 1.4014542545279787e-07, + "loss": 0.2528, + "step": 54877 + }, + { + "epoch": 0.9482651368537462, + "grad_norm": 1.2235176184851622, + "learning_rate": 1.400520738429556e-07, + "loss": 0.4414, + "step": 54878 + }, + { + "epoch": 0.948282416367155, + "grad_norm": 1.0651951229263663, + "learning_rate": 1.399587531152191e-07, + "loss": 0.2728, + "step": 54879 + }, + { + "epoch": 0.948299695880564, + "grad_norm": 1.2796074531813348, + "learning_rate": 1.3986546326988037e-07, + "loss": 0.2515, + "step": 54880 + }, + { + "epoch": 0.9483169753939729, + "grad_norm": 0.9587746227207882, + "learning_rate": 1.3977220430723358e-07, + "loss": 0.625, + "step": 54881 + }, + { + "epoch": 0.9483342549073818, + "grad_norm": 1.259827105611727, + "learning_rate": 1.3967897622756965e-07, + "loss": 0.4144, + "step": 54882 + }, + { + "epoch": 0.9483515344207907, + "grad_norm": 1.715701877310207, + "learning_rate": 1.3958577903117942e-07, + "loss": 0.3168, + "step": 54883 + }, + { + "epoch": 0.9483688139341996, + "grad_norm": 1.4284994561155462, + "learning_rate": 1.3949261271835711e-07, + "loss": 0.4319, + "step": 54884 + }, + { + "epoch": 0.9483860934476085, + "grad_norm": 0.7885408549731541, + "learning_rate": 1.393994772893925e-07, + "loss": 0.2345, + "step": 54885 + }, + { + "epoch": 0.9484033729610174, + "grad_norm": 1.8191006986671059, + "learning_rate": 1.3930637274457758e-07, + "loss": 0.2942, + "step": 54886 + }, + { + "epoch": 0.9484206524744263, + "grad_norm": 1.525819743864328, + "learning_rate": 1.3921329908420655e-07, + "loss": 0.2317, + "step": 54887 + }, + { + "epoch": 0.9484379319878352, + "grad_norm": 1.592238103609208, + "learning_rate": 1.3912025630856695e-07, + "loss": 0.279, + "step": 54888 + }, + { + "epoch": 0.9484552115012441, + "grad_norm": 1.8403337597664169, + "learning_rate": 1.39027244417953e-07, + "loss": 0.1135, + "step": 54889 + }, + { + "epoch": 0.948472491014653, + "grad_norm": 1.739183561724573, + "learning_rate": 1.3893426341265449e-07, + "loss": 0.341, + "step": 54890 + }, + { + "epoch": 0.9484897705280619, + "grad_norm": 2.012316418722178, + "learning_rate": 1.3884131329296447e-07, + "loss": 0.3052, + "step": 54891 + }, + { + "epoch": 0.9485070500414708, + "grad_norm": 0.9830461653714173, + "learning_rate": 1.3874839405917162e-07, + "loss": 0.288, + "step": 54892 + }, + { + "epoch": 0.9485243295548798, + "grad_norm": 1.1860883009499659, + "learning_rate": 1.3865550571156906e-07, + "loss": 0.3905, + "step": 54893 + }, + { + "epoch": 0.9485416090682887, + "grad_norm": 1.3289821334156688, + "learning_rate": 1.385626482504454e-07, + "loss": 0.2178, + "step": 54894 + }, + { + "epoch": 0.9485588885816976, + "grad_norm": 0.984680077661579, + "learning_rate": 1.384698216760949e-07, + "loss": 0.6623, + "step": 54895 + }, + { + "epoch": 0.9485761680951065, + "grad_norm": 1.0732043572451604, + "learning_rate": 1.383770259888062e-07, + "loss": 0.2771, + "step": 54896 + }, + { + "epoch": 0.9485934476085154, + "grad_norm": 0.7529079464714625, + "learning_rate": 1.3828426118886907e-07, + "loss": 0.9881, + "step": 54897 + }, + { + "epoch": 0.9486107271219243, + "grad_norm": 1.7009443016258354, + "learning_rate": 1.3819152727657548e-07, + "loss": 0.6054, + "step": 54898 + }, + { + "epoch": 0.9486280066353332, + "grad_norm": 0.8988085481622157, + "learning_rate": 1.3809882425221523e-07, + "loss": 0.279, + "step": 54899 + }, + { + "epoch": 0.9486452861487421, + "grad_norm": 1.547974719598052, + "learning_rate": 1.3800615211607916e-07, + "loss": 0.2443, + "step": 54900 + }, + { + "epoch": 0.9486625656621509, + "grad_norm": 1.3259553571795932, + "learning_rate": 1.3791351086845705e-07, + "loss": 0.2095, + "step": 54901 + }, + { + "epoch": 0.9486798451755598, + "grad_norm": 1.5319728424126897, + "learning_rate": 1.378209005096409e-07, + "loss": 0.3889, + "step": 54902 + }, + { + "epoch": 0.9486971246889687, + "grad_norm": 1.556127221248671, + "learning_rate": 1.3772832103991717e-07, + "loss": 0.3535, + "step": 54903 + }, + { + "epoch": 0.9487144042023776, + "grad_norm": 2.6566467131406197, + "learning_rate": 1.376357724595778e-07, + "loss": 0.2049, + "step": 54904 + }, + { + "epoch": 0.9487316837157865, + "grad_norm": 1.8592782793392137, + "learning_rate": 1.3754325476891372e-07, + "loss": 0.2888, + "step": 54905 + }, + { + "epoch": 0.9487489632291954, + "grad_norm": 1.3667062905902172, + "learning_rate": 1.3745076796821354e-07, + "loss": 0.3415, + "step": 54906 + }, + { + "epoch": 0.9487662427426043, + "grad_norm": 1.1000953977788908, + "learning_rate": 1.3735831205776817e-07, + "loss": 0.2711, + "step": 54907 + }, + { + "epoch": 0.9487835222560133, + "grad_norm": 1.345943467178734, + "learning_rate": 1.3726588703786403e-07, + "loss": 0.289, + "step": 54908 + }, + { + "epoch": 0.9488008017694222, + "grad_norm": 1.263006577049733, + "learning_rate": 1.3717349290879313e-07, + "loss": 0.3157, + "step": 54909 + }, + { + "epoch": 0.9488180812828311, + "grad_norm": 0.9434383690333087, + "learning_rate": 1.370811296708452e-07, + "loss": 0.4424, + "step": 54910 + }, + { + "epoch": 0.94883536079624, + "grad_norm": 1.8550058009718497, + "learning_rate": 1.3698879732430892e-07, + "loss": 0.2566, + "step": 54911 + }, + { + "epoch": 0.9488526403096489, + "grad_norm": 1.5295227692206466, + "learning_rate": 1.3689649586947184e-07, + "loss": 0.374, + "step": 54912 + }, + { + "epoch": 0.9488699198230578, + "grad_norm": 1.1060828524881094, + "learning_rate": 1.3680422530662596e-07, + "loss": 0.1816, + "step": 54913 + }, + { + "epoch": 0.9488871993364667, + "grad_norm": 0.9167422263466065, + "learning_rate": 1.367119856360588e-07, + "loss": 0.3631, + "step": 54914 + }, + { + "epoch": 0.9489044788498756, + "grad_norm": 1.451556821173712, + "learning_rate": 1.3661977685805795e-07, + "loss": 0.3395, + "step": 54915 + }, + { + "epoch": 0.9489217583632845, + "grad_norm": 0.6731444031373235, + "learning_rate": 1.3652759897291423e-07, + "loss": 0.1918, + "step": 54916 + }, + { + "epoch": 0.9489390378766934, + "grad_norm": 0.9437879859088619, + "learning_rate": 1.3643545198091634e-07, + "loss": 0.3773, + "step": 54917 + }, + { + "epoch": 0.9489563173901023, + "grad_norm": 1.462465362788983, + "learning_rate": 1.3634333588235183e-07, + "loss": 0.3179, + "step": 54918 + }, + { + "epoch": 0.9489735969035112, + "grad_norm": 1.4299521631152208, + "learning_rate": 1.3625125067750934e-07, + "loss": 0.3059, + "step": 54919 + }, + { + "epoch": 0.9489908764169201, + "grad_norm": 1.1101683986477064, + "learning_rate": 1.361591963666775e-07, + "loss": 0.2832, + "step": 54920 + }, + { + "epoch": 0.9490081559303291, + "grad_norm": 1.8449617467629886, + "learning_rate": 1.3606717295014392e-07, + "loss": 0.237, + "step": 54921 + }, + { + "epoch": 0.9490254354437379, + "grad_norm": 1.13052304900948, + "learning_rate": 1.3597518042819947e-07, + "loss": 0.5341, + "step": 54922 + }, + { + "epoch": 0.9490427149571468, + "grad_norm": 1.1607155412172518, + "learning_rate": 1.3588321880112942e-07, + "loss": 0.3044, + "step": 54923 + }, + { + "epoch": 0.9490599944705557, + "grad_norm": 1.1707249226289718, + "learning_rate": 1.3579128806922248e-07, + "loss": 0.2961, + "step": 54924 + }, + { + "epoch": 0.9490772739839646, + "grad_norm": 1.581117774801845, + "learning_rate": 1.3569938823276728e-07, + "loss": 0.3013, + "step": 54925 + }, + { + "epoch": 0.9490945534973735, + "grad_norm": 2.165645784790836, + "learning_rate": 1.356075192920503e-07, + "loss": 0.3158, + "step": 54926 + }, + { + "epoch": 0.9491118330107824, + "grad_norm": 1.5162347211002223, + "learning_rate": 1.3551568124736236e-07, + "loss": 0.5763, + "step": 54927 + }, + { + "epoch": 0.9491291125241913, + "grad_norm": 1.4280160319368456, + "learning_rate": 1.354238740989866e-07, + "loss": 0.4111, + "step": 54928 + }, + { + "epoch": 0.9491463920376002, + "grad_norm": 1.821818507479989, + "learning_rate": 1.3533209784721502e-07, + "loss": 0.421, + "step": 54929 + }, + { + "epoch": 0.9491636715510091, + "grad_norm": 1.7384924110716045, + "learning_rate": 1.352403524923318e-07, + "loss": 0.4212, + "step": 54930 + }, + { + "epoch": 0.949180951064418, + "grad_norm": 1.2961571560615737, + "learning_rate": 1.3514863803462565e-07, + "loss": 0.2737, + "step": 54931 + }, + { + "epoch": 0.9491982305778269, + "grad_norm": 1.4367313598107005, + "learning_rate": 1.3505695447438293e-07, + "loss": 0.2037, + "step": 54932 + }, + { + "epoch": 0.9492155100912358, + "grad_norm": 1.803402383135193, + "learning_rate": 1.349653018118935e-07, + "loss": 0.3832, + "step": 54933 + }, + { + "epoch": 0.9492327896046447, + "grad_norm": 1.194319438320157, + "learning_rate": 1.3487368004744038e-07, + "loss": 0.3754, + "step": 54934 + }, + { + "epoch": 0.9492500691180537, + "grad_norm": 1.051172653744362, + "learning_rate": 1.3478208918131452e-07, + "loss": 0.3592, + "step": 54935 + }, + { + "epoch": 0.9492673486314626, + "grad_norm": 1.8056199097686618, + "learning_rate": 1.34690529213799e-07, + "loss": 0.4501, + "step": 54936 + }, + { + "epoch": 0.9492846281448715, + "grad_norm": 1.9284821268982275, + "learning_rate": 1.3459900014518468e-07, + "loss": 0.2782, + "step": 54937 + }, + { + "epoch": 0.9493019076582804, + "grad_norm": 1.2927561099696503, + "learning_rate": 1.3450750197575357e-07, + "loss": 0.3561, + "step": 54938 + }, + { + "epoch": 0.9493191871716893, + "grad_norm": 0.923922927288954, + "learning_rate": 1.3441603470579544e-07, + "loss": 0.2692, + "step": 54939 + }, + { + "epoch": 0.9493364666850982, + "grad_norm": 1.6491353866196763, + "learning_rate": 1.3432459833559785e-07, + "loss": 0.3352, + "step": 54940 + }, + { + "epoch": 0.9493537461985071, + "grad_norm": 1.2973281447831795, + "learning_rate": 1.3423319286544389e-07, + "loss": 0.4533, + "step": 54941 + }, + { + "epoch": 0.949371025711916, + "grad_norm": 1.650096659534647, + "learning_rate": 1.3414181829562113e-07, + "loss": 0.3882, + "step": 54942 + }, + { + "epoch": 0.9493883052253248, + "grad_norm": 1.4909931313770666, + "learning_rate": 1.3405047462641595e-07, + "loss": 0.3081, + "step": 54943 + }, + { + "epoch": 0.9494055847387337, + "grad_norm": 1.4743850975762978, + "learning_rate": 1.3395916185811486e-07, + "loss": 0.3526, + "step": 54944 + }, + { + "epoch": 0.9494228642521426, + "grad_norm": 1.2154360621906384, + "learning_rate": 1.3386787999100314e-07, + "loss": 0.257, + "step": 54945 + }, + { + "epoch": 0.9494401437655515, + "grad_norm": 1.4317812295340855, + "learning_rate": 1.3377662902536837e-07, + "loss": 0.2663, + "step": 54946 + }, + { + "epoch": 0.9494574232789604, + "grad_norm": 1.5815408625360656, + "learning_rate": 1.3368540896149361e-07, + "loss": 0.1659, + "step": 54947 + }, + { + "epoch": 0.9494747027923693, + "grad_norm": 1.383415179686939, + "learning_rate": 1.3359421979966647e-07, + "loss": 0.4782, + "step": 54948 + }, + { + "epoch": 0.9494919823057782, + "grad_norm": 0.5864920438212606, + "learning_rate": 1.3350306154017113e-07, + "loss": 0.1568, + "step": 54949 + }, + { + "epoch": 0.9495092618191872, + "grad_norm": 1.0652226511626663, + "learning_rate": 1.3341193418329513e-07, + "loss": 0.3206, + "step": 54950 + }, + { + "epoch": 0.9495265413325961, + "grad_norm": 1.137810611161917, + "learning_rate": 1.3332083772932268e-07, + "loss": 0.3295, + "step": 54951 + }, + { + "epoch": 0.949543820846005, + "grad_norm": 2.1276305735566794, + "learning_rate": 1.3322977217853806e-07, + "loss": 0.2425, + "step": 54952 + }, + { + "epoch": 0.9495611003594139, + "grad_norm": 1.3096288442867028, + "learning_rate": 1.3313873753122985e-07, + "loss": 0.5521, + "step": 54953 + }, + { + "epoch": 0.9495783798728228, + "grad_norm": 1.2724088580987982, + "learning_rate": 1.33047733787679e-07, + "loss": 0.2211, + "step": 54954 + }, + { + "epoch": 0.9495956593862317, + "grad_norm": 1.3562279541178746, + "learning_rate": 1.32956760948173e-07, + "loss": 0.2868, + "step": 54955 + }, + { + "epoch": 0.9496129388996406, + "grad_norm": 1.3584719702556862, + "learning_rate": 1.328658190129961e-07, + "loss": 0.4033, + "step": 54956 + }, + { + "epoch": 0.9496302184130495, + "grad_norm": 1.4540667547295365, + "learning_rate": 1.3277490798243474e-07, + "loss": 0.3983, + "step": 54957 + }, + { + "epoch": 0.9496474979264584, + "grad_norm": 1.5650725731572461, + "learning_rate": 1.3268402785677093e-07, + "loss": 0.3497, + "step": 54958 + }, + { + "epoch": 0.9496647774398673, + "grad_norm": 1.3231933450082354, + "learning_rate": 1.3259317863629106e-07, + "loss": 0.4652, + "step": 54959 + }, + { + "epoch": 0.9496820569532762, + "grad_norm": 1.7783483142202259, + "learning_rate": 1.3250236032127829e-07, + "loss": 0.2216, + "step": 54960 + }, + { + "epoch": 0.9496993364666851, + "grad_norm": 1.2728420832484075, + "learning_rate": 1.32411572912019e-07, + "loss": 0.3032, + "step": 54961 + }, + { + "epoch": 0.949716615980094, + "grad_norm": 1.206144485134846, + "learning_rate": 1.3232081640879634e-07, + "loss": 0.3183, + "step": 54962 + }, + { + "epoch": 0.949733895493503, + "grad_norm": 1.4378354858485367, + "learning_rate": 1.3223009081189452e-07, + "loss": 0.2429, + "step": 54963 + }, + { + "epoch": 0.9497511750069118, + "grad_norm": 1.0484756489166214, + "learning_rate": 1.3213939612159888e-07, + "loss": 0.5289, + "step": 54964 + }, + { + "epoch": 0.9497684545203207, + "grad_norm": 1.261010654672915, + "learning_rate": 1.320487323381925e-07, + "loss": 0.2163, + "step": 54965 + }, + { + "epoch": 0.9497857340337296, + "grad_norm": 1.200570644279062, + "learning_rate": 1.3195809946195847e-07, + "loss": 0.3344, + "step": 54966 + }, + { + "epoch": 0.9498030135471385, + "grad_norm": 0.9439427712262092, + "learning_rate": 1.3186749749318218e-07, + "loss": 0.316, + "step": 54967 + }, + { + "epoch": 0.9498202930605474, + "grad_norm": 1.4448464961860505, + "learning_rate": 1.3177692643214668e-07, + "loss": 0.4694, + "step": 54968 + }, + { + "epoch": 0.9498375725739563, + "grad_norm": 0.905373683362984, + "learning_rate": 1.3168638627913622e-07, + "loss": 0.5541, + "step": 54969 + }, + { + "epoch": 0.9498548520873652, + "grad_norm": 1.4729699406806616, + "learning_rate": 1.315958770344339e-07, + "loss": 0.2844, + "step": 54970 + }, + { + "epoch": 0.9498721316007741, + "grad_norm": 1.334054722475162, + "learning_rate": 1.315053986983228e-07, + "loss": 0.3658, + "step": 54971 + }, + { + "epoch": 0.949889411114183, + "grad_norm": 1.4552189701157412, + "learning_rate": 1.3141495127108606e-07, + "loss": 0.272, + "step": 54972 + }, + { + "epoch": 0.9499066906275919, + "grad_norm": 1.9191096939997954, + "learning_rate": 1.31324534753009e-07, + "loss": 0.324, + "step": 54973 + }, + { + "epoch": 0.9499239701410008, + "grad_norm": 1.1922165068468877, + "learning_rate": 1.312341491443736e-07, + "loss": 0.3704, + "step": 54974 + }, + { + "epoch": 0.9499412496544097, + "grad_norm": 1.3615274364090377, + "learning_rate": 1.3114379444546187e-07, + "loss": 0.2431, + "step": 54975 + }, + { + "epoch": 0.9499585291678186, + "grad_norm": 1.4306734170509223, + "learning_rate": 1.3105347065655804e-07, + "loss": 0.4982, + "step": 54976 + }, + { + "epoch": 0.9499758086812276, + "grad_norm": 1.22466020024105, + "learning_rate": 1.309631777779452e-07, + "loss": 0.3447, + "step": 54977 + }, + { + "epoch": 0.9499930881946365, + "grad_norm": 1.7985098070485213, + "learning_rate": 1.3087291580990534e-07, + "loss": 0.6018, + "step": 54978 + }, + { + "epoch": 0.9500103677080454, + "grad_norm": 1.1170411428973868, + "learning_rate": 1.307826847527216e-07, + "loss": 0.6295, + "step": 54979 + }, + { + "epoch": 0.9500276472214543, + "grad_norm": 1.0743490608159352, + "learning_rate": 1.3069248460667705e-07, + "loss": 0.4183, + "step": 54980 + }, + { + "epoch": 0.9500449267348632, + "grad_norm": 1.1745583155459103, + "learning_rate": 1.306023153720526e-07, + "loss": 0.377, + "step": 54981 + }, + { + "epoch": 0.9500622062482721, + "grad_norm": 1.5148413045116338, + "learning_rate": 1.3051217704913355e-07, + "loss": 0.3946, + "step": 54982 + }, + { + "epoch": 0.950079485761681, + "grad_norm": 1.178452888358791, + "learning_rate": 1.304220696381986e-07, + "loss": 0.371, + "step": 54983 + }, + { + "epoch": 0.9500967652750899, + "grad_norm": 1.0114668812831122, + "learning_rate": 1.3033199313953193e-07, + "loss": 0.263, + "step": 54984 + }, + { + "epoch": 0.9501140447884987, + "grad_norm": 1.307845940059752, + "learning_rate": 1.3024194755341557e-07, + "loss": 0.4954, + "step": 54985 + }, + { + "epoch": 0.9501313243019076, + "grad_norm": 1.52791325388976, + "learning_rate": 1.3015193288013262e-07, + "loss": 0.2349, + "step": 54986 + }, + { + "epoch": 0.9501486038153165, + "grad_norm": 1.5981351887735078, + "learning_rate": 1.3006194911996283e-07, + "loss": 0.259, + "step": 54987 + }, + { + "epoch": 0.9501658833287254, + "grad_norm": 1.6429920436218377, + "learning_rate": 1.2997199627319047e-07, + "loss": 0.4277, + "step": 54988 + }, + { + "epoch": 0.9501831628421343, + "grad_norm": 1.1253887072831383, + "learning_rate": 1.2988207434009413e-07, + "loss": 0.3676, + "step": 54989 + }, + { + "epoch": 0.9502004423555432, + "grad_norm": 1.6216701488543124, + "learning_rate": 1.297921833209581e-07, + "loss": 0.2007, + "step": 54990 + }, + { + "epoch": 0.9502177218689521, + "grad_norm": 1.632157675425173, + "learning_rate": 1.2970232321606324e-07, + "loss": 0.1744, + "step": 54991 + }, + { + "epoch": 0.950235001382361, + "grad_norm": 1.397944481252123, + "learning_rate": 1.2961249402569043e-07, + "loss": 0.249, + "step": 54992 + }, + { + "epoch": 0.95025228089577, + "grad_norm": 1.5153136356389685, + "learning_rate": 1.2952269575012165e-07, + "loss": 0.281, + "step": 54993 + }, + { + "epoch": 0.9502695604091789, + "grad_norm": 1.431011464440913, + "learning_rate": 1.2943292838963782e-07, + "loss": 0.3604, + "step": 54994 + }, + { + "epoch": 0.9502868399225878, + "grad_norm": 1.536959075175531, + "learning_rate": 1.2934319194451982e-07, + "loss": 0.3271, + "step": 54995 + }, + { + "epoch": 0.9503041194359967, + "grad_norm": 1.0379435226647233, + "learning_rate": 1.2925348641504965e-07, + "loss": 0.3201, + "step": 54996 + }, + { + "epoch": 0.9503213989494056, + "grad_norm": 0.9329562880622901, + "learning_rate": 1.2916381180150705e-07, + "loss": 0.3405, + "step": 54997 + }, + { + "epoch": 0.9503386784628145, + "grad_norm": 1.6786904264178202, + "learning_rate": 1.2907416810417405e-07, + "loss": 0.2019, + "step": 54998 + }, + { + "epoch": 0.9503559579762234, + "grad_norm": 1.1481137162143666, + "learning_rate": 1.2898455532333155e-07, + "loss": 0.4215, + "step": 54999 + }, + { + "epoch": 0.9503732374896323, + "grad_norm": 0.9743553400495278, + "learning_rate": 1.2889497345925817e-07, + "loss": 0.1772, + "step": 55000 + }, + { + "epoch": 0.9503905170030412, + "grad_norm": 1.7133448926784944, + "learning_rate": 1.2880542251223705e-07, + "loss": 0.4978, + "step": 55001 + }, + { + "epoch": 0.9504077965164501, + "grad_norm": 1.729361571335806, + "learning_rate": 1.2871590248254685e-07, + "loss": 0.4384, + "step": 55002 + }, + { + "epoch": 0.950425076029859, + "grad_norm": 0.9772153479731999, + "learning_rate": 1.2862641337046955e-07, + "loss": 0.3307, + "step": 55003 + }, + { + "epoch": 0.950442355543268, + "grad_norm": 1.5273633274250455, + "learning_rate": 1.2853695517628384e-07, + "loss": 0.2213, + "step": 55004 + }, + { + "epoch": 0.9504596350566769, + "grad_norm": 1.4329201133976301, + "learning_rate": 1.284475279002706e-07, + "loss": 0.3855, + "step": 55005 + }, + { + "epoch": 0.9504769145700857, + "grad_norm": 1.3178220548857558, + "learning_rate": 1.283581315427096e-07, + "loss": 0.2529, + "step": 55006 + }, + { + "epoch": 0.9504941940834946, + "grad_norm": 1.4101930781892622, + "learning_rate": 1.2826876610388173e-07, + "loss": 0.3062, + "step": 55007 + }, + { + "epoch": 0.9505114735969035, + "grad_norm": 1.7788478441081514, + "learning_rate": 1.2817943158406564e-07, + "loss": 0.1747, + "step": 55008 + }, + { + "epoch": 0.9505287531103124, + "grad_norm": 1.0265548190950995, + "learning_rate": 1.2809012798354225e-07, + "loss": 0.3273, + "step": 55009 + }, + { + "epoch": 0.9505460326237213, + "grad_norm": 1.4490040795803754, + "learning_rate": 1.280008553025913e-07, + "loss": 0.2298, + "step": 55010 + }, + { + "epoch": 0.9505633121371302, + "grad_norm": 1.4421473788769135, + "learning_rate": 1.279116135414915e-07, + "loss": 0.1806, + "step": 55011 + }, + { + "epoch": 0.9505805916505391, + "grad_norm": 1.26102596048478, + "learning_rate": 1.278224027005226e-07, + "loss": 0.2833, + "step": 55012 + }, + { + "epoch": 0.950597871163948, + "grad_norm": 2.0467991486758286, + "learning_rate": 1.2773322277996436e-07, + "loss": 0.3018, + "step": 55013 + }, + { + "epoch": 0.9506151506773569, + "grad_norm": 0.9211947177929465, + "learning_rate": 1.2764407378009546e-07, + "loss": 0.374, + "step": 55014 + }, + { + "epoch": 0.9506324301907658, + "grad_norm": 1.0363329217022001, + "learning_rate": 1.275549557011968e-07, + "loss": 0.3439, + "step": 55015 + }, + { + "epoch": 0.9506497097041747, + "grad_norm": 2.2840971440007443, + "learning_rate": 1.274658685435448e-07, + "loss": 0.2131, + "step": 55016 + }, + { + "epoch": 0.9506669892175836, + "grad_norm": 1.8178638427036156, + "learning_rate": 1.273768123074215e-07, + "loss": 0.2184, + "step": 55017 + }, + { + "epoch": 0.9506842687309925, + "grad_norm": 1.131377332131148, + "learning_rate": 1.2728778699310218e-07, + "loss": 0.4937, + "step": 55018 + }, + { + "epoch": 0.9507015482444015, + "grad_norm": 1.4113347351484558, + "learning_rate": 1.2719879260087e-07, + "loss": 0.5404, + "step": 55019 + }, + { + "epoch": 0.9507188277578104, + "grad_norm": 1.1865469880922204, + "learning_rate": 1.2710982913100134e-07, + "loss": 0.267, + "step": 55020 + }, + { + "epoch": 0.9507361072712193, + "grad_norm": 0.6737821455194223, + "learning_rate": 1.270208965837738e-07, + "loss": 0.4133, + "step": 55021 + }, + { + "epoch": 0.9507533867846282, + "grad_norm": 1.8958354640398025, + "learning_rate": 1.2693199495946828e-07, + "loss": 0.3106, + "step": 55022 + }, + { + "epoch": 0.9507706662980371, + "grad_norm": 1.1533656909754488, + "learning_rate": 1.2684312425836121e-07, + "loss": 0.4926, + "step": 55023 + }, + { + "epoch": 0.950787945811446, + "grad_norm": 0.8845847433463371, + "learning_rate": 1.2675428448073124e-07, + "loss": 0.1767, + "step": 55024 + }, + { + "epoch": 0.9508052253248549, + "grad_norm": 3.9690699063575328, + "learning_rate": 1.2666547562685815e-07, + "loss": 0.285, + "step": 55025 + }, + { + "epoch": 0.9508225048382638, + "grad_norm": 1.5018599467564655, + "learning_rate": 1.2657669769701952e-07, + "loss": 0.3031, + "step": 55026 + }, + { + "epoch": 0.9508397843516727, + "grad_norm": 1.4554210929352132, + "learning_rate": 1.2648795069149178e-07, + "loss": 0.3213, + "step": 55027 + }, + { + "epoch": 0.9508570638650815, + "grad_norm": 1.058387251766441, + "learning_rate": 1.263992346105547e-07, + "loss": 0.3102, + "step": 55028 + }, + { + "epoch": 0.9508743433784904, + "grad_norm": 0.8641725504858171, + "learning_rate": 1.2631054945448585e-07, + "loss": 0.2808, + "step": 55029 + }, + { + "epoch": 0.9508916228918993, + "grad_norm": 0.8429972189102547, + "learning_rate": 1.2622189522356277e-07, + "loss": 0.3325, + "step": 55030 + }, + { + "epoch": 0.9509089024053082, + "grad_norm": 0.49687411058984093, + "learning_rate": 1.2613327191806302e-07, + "loss": 0.8095, + "step": 55031 + }, + { + "epoch": 0.9509261819187171, + "grad_norm": 1.1250549302705264, + "learning_rate": 1.2604467953826415e-07, + "loss": 0.3783, + "step": 55032 + }, + { + "epoch": 0.950943461432126, + "grad_norm": 1.6506042916458643, + "learning_rate": 1.2595611808444376e-07, + "loss": 0.451, + "step": 55033 + }, + { + "epoch": 0.950960740945535, + "grad_norm": 2.1057790795151488, + "learning_rate": 1.2586758755687933e-07, + "loss": 0.2801, + "step": 55034 + }, + { + "epoch": 0.9509780204589439, + "grad_norm": 1.313902688810346, + "learning_rate": 1.2577908795584735e-07, + "loss": 0.342, + "step": 55035 + }, + { + "epoch": 0.9509952999723528, + "grad_norm": 1.1355657137212036, + "learning_rate": 1.2569061928162652e-07, + "loss": 0.252, + "step": 55036 + }, + { + "epoch": 0.9510125794857617, + "grad_norm": 1.369475504288234, + "learning_rate": 1.256021815344932e-07, + "loss": 0.4941, + "step": 55037 + }, + { + "epoch": 0.9510298589991706, + "grad_norm": 1.4810853684142218, + "learning_rate": 1.2551377471472282e-07, + "loss": 0.5652, + "step": 55038 + }, + { + "epoch": 0.9510471385125795, + "grad_norm": 1.2834588971057928, + "learning_rate": 1.2542539882259508e-07, + "loss": 0.2602, + "step": 55039 + }, + { + "epoch": 0.9510644180259884, + "grad_norm": 1.1541719230448484, + "learning_rate": 1.2533705385838535e-07, + "loss": 0.4875, + "step": 55040 + }, + { + "epoch": 0.9510816975393973, + "grad_norm": 0.6307025626765763, + "learning_rate": 1.2524873982237007e-07, + "loss": 0.5392, + "step": 55041 + }, + { + "epoch": 0.9510989770528062, + "grad_norm": 2.28466496494867, + "learning_rate": 1.251604567148268e-07, + "loss": 0.2007, + "step": 55042 + }, + { + "epoch": 0.9511162565662151, + "grad_norm": 1.478756626326162, + "learning_rate": 1.2507220453603087e-07, + "loss": 0.4043, + "step": 55043 + }, + { + "epoch": 0.951133536079624, + "grad_norm": 1.2444451911171972, + "learning_rate": 1.2498398328625983e-07, + "loss": 0.3583, + "step": 55044 + }, + { + "epoch": 0.9511508155930329, + "grad_norm": 1.465834905093916, + "learning_rate": 1.2489579296578791e-07, + "loss": 0.3333, + "step": 55045 + }, + { + "epoch": 0.9511680951064418, + "grad_norm": 3.855698033446948, + "learning_rate": 1.248076335748938e-07, + "loss": 0.38, + "step": 55046 + }, + { + "epoch": 0.9511853746198508, + "grad_norm": 1.5927059256919716, + "learning_rate": 1.2471950511385277e-07, + "loss": 0.3442, + "step": 55047 + }, + { + "epoch": 0.9512026541332597, + "grad_norm": 1.4248864852083047, + "learning_rate": 1.2463140758294136e-07, + "loss": 0.5554, + "step": 55048 + }, + { + "epoch": 0.9512199336466685, + "grad_norm": 1.2368148835503145, + "learning_rate": 1.245433409824337e-07, + "loss": 0.2331, + "step": 55049 + }, + { + "epoch": 0.9512372131600774, + "grad_norm": 0.8564672227864848, + "learning_rate": 1.2445530531260742e-07, + "loss": 0.313, + "step": 55050 + }, + { + "epoch": 0.9512544926734863, + "grad_norm": 0.5797787708224188, + "learning_rate": 1.2436730057373781e-07, + "loss": 0.8848, + "step": 55051 + }, + { + "epoch": 0.9512717721868952, + "grad_norm": 3.8250012373962954, + "learning_rate": 1.242793267660991e-07, + "loss": 0.2827, + "step": 55052 + }, + { + "epoch": 0.9512890517003041, + "grad_norm": 1.1998003681452634, + "learning_rate": 1.2419138388997e-07, + "loss": 0.4096, + "step": 55053 + }, + { + "epoch": 0.951306331213713, + "grad_norm": 1.0900655508483004, + "learning_rate": 1.2410347194562244e-07, + "loss": 0.409, + "step": 55054 + }, + { + "epoch": 0.9513236107271219, + "grad_norm": 1.0589492715018445, + "learning_rate": 1.2401559093333516e-07, + "loss": 0.2989, + "step": 55055 + }, + { + "epoch": 0.9513408902405308, + "grad_norm": 2.2241908554585677, + "learning_rate": 1.2392774085338012e-07, + "loss": 0.4239, + "step": 55056 + }, + { + "epoch": 0.9513581697539397, + "grad_norm": 1.2967187554760409, + "learning_rate": 1.2383992170603376e-07, + "loss": 0.4709, + "step": 55057 + }, + { + "epoch": 0.9513754492673486, + "grad_norm": 0.916178223640558, + "learning_rate": 1.2375213349157146e-07, + "loss": 0.6094, + "step": 55058 + }, + { + "epoch": 0.9513927287807575, + "grad_norm": 0.6903053228849125, + "learning_rate": 1.236643762102685e-07, + "loss": 0.1849, + "step": 55059 + }, + { + "epoch": 0.9514100082941664, + "grad_norm": 1.1185453081986454, + "learning_rate": 1.2357664986239915e-07, + "loss": 0.3866, + "step": 55060 + }, + { + "epoch": 0.9514272878075754, + "grad_norm": 0.8517793927602727, + "learning_rate": 1.234889544482387e-07, + "loss": 0.4604, + "step": 55061 + }, + { + "epoch": 0.9514445673209843, + "grad_norm": 1.3349151727185717, + "learning_rate": 1.2340128996806143e-07, + "loss": 0.1912, + "step": 55062 + }, + { + "epoch": 0.9514618468343932, + "grad_norm": 1.5114135054836746, + "learning_rate": 1.2331365642214154e-07, + "loss": 0.2318, + "step": 55063 + }, + { + "epoch": 0.9514791263478021, + "grad_norm": 1.2690940686187913, + "learning_rate": 1.2322605381075326e-07, + "loss": 0.5482, + "step": 55064 + }, + { + "epoch": 0.951496405861211, + "grad_norm": 0.9231762320517076, + "learning_rate": 1.231384821341719e-07, + "loss": 0.258, + "step": 55065 + }, + { + "epoch": 0.9515136853746199, + "grad_norm": 1.2280067752904582, + "learning_rate": 1.230509413926728e-07, + "loss": 0.4076, + "step": 55066 + }, + { + "epoch": 0.9515309648880288, + "grad_norm": 1.7886361517055098, + "learning_rate": 1.229634315865269e-07, + "loss": 0.2473, + "step": 55067 + }, + { + "epoch": 0.9515482444014377, + "grad_norm": 1.5828501247101234, + "learning_rate": 1.2287595271601173e-07, + "loss": 0.3745, + "step": 55068 + }, + { + "epoch": 0.9515655239148466, + "grad_norm": 0.9134813292651817, + "learning_rate": 1.2278850478139925e-07, + "loss": 0.2561, + "step": 55069 + }, + { + "epoch": 0.9515828034282554, + "grad_norm": 1.157815006651713, + "learning_rate": 1.2270108778296264e-07, + "loss": 0.2273, + "step": 55070 + }, + { + "epoch": 0.9516000829416643, + "grad_norm": 2.0082518638738582, + "learning_rate": 1.2261370172097831e-07, + "loss": 0.3031, + "step": 55071 + }, + { + "epoch": 0.9516173624550732, + "grad_norm": 1.2040852187128148, + "learning_rate": 1.2252634659571717e-07, + "loss": 0.2702, + "step": 55072 + }, + { + "epoch": 0.9516346419684821, + "grad_norm": 1.4176534553823605, + "learning_rate": 1.2243902240745565e-07, + "loss": 0.2559, + "step": 55073 + }, + { + "epoch": 0.951651921481891, + "grad_norm": 1.462190801461608, + "learning_rate": 1.2235172915646465e-07, + "loss": 0.4007, + "step": 55074 + }, + { + "epoch": 0.9516692009953, + "grad_norm": 0.8788253000406959, + "learning_rate": 1.222644668430195e-07, + "loss": 0.3288, + "step": 55075 + }, + { + "epoch": 0.9516864805087089, + "grad_norm": 1.0126888124425368, + "learning_rate": 1.2217723546739225e-07, + "loss": 0.5052, + "step": 55076 + }, + { + "epoch": 0.9517037600221178, + "grad_norm": 1.141937173928423, + "learning_rate": 1.2209003502985706e-07, + "loss": 0.3734, + "step": 55077 + }, + { + "epoch": 0.9517210395355267, + "grad_norm": 2.0919072978658386, + "learning_rate": 1.2200286553068596e-07, + "loss": 0.2886, + "step": 55078 + }, + { + "epoch": 0.9517383190489356, + "grad_norm": 2.0897626057275622, + "learning_rate": 1.2191572697015319e-07, + "loss": 0.235, + "step": 55079 + }, + { + "epoch": 0.9517555985623445, + "grad_norm": 1.2419240679990426, + "learning_rate": 1.2182861934853073e-07, + "loss": 0.3507, + "step": 55080 + }, + { + "epoch": 0.9517728780757534, + "grad_norm": 1.555753262275435, + "learning_rate": 1.2174154266609174e-07, + "loss": 0.4658, + "step": 55081 + }, + { + "epoch": 0.9517901575891623, + "grad_norm": 0.9652989007635223, + "learning_rate": 1.2165449692310928e-07, + "loss": 0.2231, + "step": 55082 + }, + { + "epoch": 0.9518074371025712, + "grad_norm": 1.3840846968192908, + "learning_rate": 1.2156748211985426e-07, + "loss": 0.4497, + "step": 55083 + }, + { + "epoch": 0.9518247166159801, + "grad_norm": 1.465555774363309, + "learning_rate": 1.2148049825660313e-07, + "loss": 0.5718, + "step": 55084 + }, + { + "epoch": 0.951841996129389, + "grad_norm": 1.3591046880418125, + "learning_rate": 1.2139354533362346e-07, + "loss": 0.3689, + "step": 55085 + }, + { + "epoch": 0.9518592756427979, + "grad_norm": 3.7770175541536033, + "learning_rate": 1.2130662335119058e-07, + "loss": 0.3527, + "step": 55086 + }, + { + "epoch": 0.9518765551562068, + "grad_norm": 0.9977082572845072, + "learning_rate": 1.2121973230957652e-07, + "loss": 0.2077, + "step": 55087 + }, + { + "epoch": 0.9518938346696157, + "grad_norm": 1.4771123967005606, + "learning_rate": 1.2113287220905323e-07, + "loss": 0.3148, + "step": 55088 + }, + { + "epoch": 0.9519111141830247, + "grad_norm": 1.7580992610142596, + "learning_rate": 1.2104604304989165e-07, + "loss": 0.3677, + "step": 55089 + }, + { + "epoch": 0.9519283936964336, + "grad_norm": 1.7083885483921124, + "learning_rate": 1.20959244832366e-07, + "loss": 0.2471, + "step": 55090 + }, + { + "epoch": 0.9519456732098424, + "grad_norm": 1.150364792399635, + "learning_rate": 1.2087247755674603e-07, + "loss": 0.2509, + "step": 55091 + }, + { + "epoch": 0.9519629527232513, + "grad_norm": 1.45201595044002, + "learning_rate": 1.2078574122330377e-07, + "loss": 0.5194, + "step": 55092 + }, + { + "epoch": 0.9519802322366602, + "grad_norm": 1.0545323411018237, + "learning_rate": 1.2069903583231234e-07, + "loss": 0.184, + "step": 55093 + }, + { + "epoch": 0.9519975117500691, + "grad_norm": 0.980856941458843, + "learning_rate": 1.2061236138404154e-07, + "loss": 0.3478, + "step": 55094 + }, + { + "epoch": 0.952014791263478, + "grad_norm": 1.1672359549445557, + "learning_rate": 1.2052571787876443e-07, + "loss": 0.4818, + "step": 55095 + }, + { + "epoch": 0.9520320707768869, + "grad_norm": 0.8710344720121546, + "learning_rate": 1.2043910531675085e-07, + "loss": 0.3095, + "step": 55096 + }, + { + "epoch": 0.9520493502902958, + "grad_norm": 1.6129377146580064, + "learning_rate": 1.2035252369827277e-07, + "loss": 0.4789, + "step": 55097 + }, + { + "epoch": 0.9520666298037047, + "grad_norm": 1.0697989571938726, + "learning_rate": 1.202659730236011e-07, + "loss": 0.3946, + "step": 55098 + }, + { + "epoch": 0.9520839093171136, + "grad_norm": 1.8189146339504074, + "learning_rate": 1.2017945329300784e-07, + "loss": 0.259, + "step": 55099 + }, + { + "epoch": 0.9521011888305225, + "grad_norm": 1.001222395690563, + "learning_rate": 1.200929645067639e-07, + "loss": 0.3679, + "step": 55100 + }, + { + "epoch": 0.9521184683439314, + "grad_norm": 1.1864603230857507, + "learning_rate": 1.2000650666513903e-07, + "loss": 0.4833, + "step": 55101 + }, + { + "epoch": 0.9521357478573403, + "grad_norm": 1.4122419463957934, + "learning_rate": 1.1992007976840526e-07, + "loss": 0.4398, + "step": 55102 + }, + { + "epoch": 0.9521530273707492, + "grad_norm": 1.3499671351153453, + "learning_rate": 1.1983368381683125e-07, + "loss": 0.3842, + "step": 55103 + }, + { + "epoch": 0.9521703068841582, + "grad_norm": 1.0554268772888755, + "learning_rate": 1.1974731881069013e-07, + "loss": 0.2715, + "step": 55104 + }, + { + "epoch": 0.9521875863975671, + "grad_norm": 1.3301698235307042, + "learning_rate": 1.1966098475025056e-07, + "loss": 0.2971, + "step": 55105 + }, + { + "epoch": 0.952204865910976, + "grad_norm": 1.9779386327389816, + "learning_rate": 1.1957468163578456e-07, + "loss": 0.4689, + "step": 55106 + }, + { + "epoch": 0.9522221454243849, + "grad_norm": 0.9677720172045677, + "learning_rate": 1.194884094675608e-07, + "loss": 0.2559, + "step": 55107 + }, + { + "epoch": 0.9522394249377938, + "grad_norm": 2.2803397853100225, + "learning_rate": 1.1940216824585126e-07, + "loss": 0.3687, + "step": 55108 + }, + { + "epoch": 0.9522567044512027, + "grad_norm": 1.7366343933706432, + "learning_rate": 1.1931595797092354e-07, + "loss": 0.5733, + "step": 55109 + }, + { + "epoch": 0.9522739839646116, + "grad_norm": 2.1569635169141597, + "learning_rate": 1.1922977864304963e-07, + "loss": 0.5424, + "step": 55110 + }, + { + "epoch": 0.9522912634780205, + "grad_norm": 1.7335754829769394, + "learning_rate": 1.1914363026249931e-07, + "loss": 0.2896, + "step": 55111 + }, + { + "epoch": 0.9523085429914293, + "grad_norm": 1.0551910088311536, + "learning_rate": 1.1905751282954236e-07, + "loss": 0.294, + "step": 55112 + }, + { + "epoch": 0.9523258225048382, + "grad_norm": 2.20447127531302, + "learning_rate": 1.1897142634444858e-07, + "loss": 0.2953, + "step": 55113 + }, + { + "epoch": 0.9523431020182471, + "grad_norm": 1.5943996603608372, + "learning_rate": 1.1888537080748552e-07, + "loss": 0.3832, + "step": 55114 + }, + { + "epoch": 0.952360381531656, + "grad_norm": 1.2057228077172466, + "learning_rate": 1.187993462189252e-07, + "loss": 0.2275, + "step": 55115 + }, + { + "epoch": 0.9523776610450649, + "grad_norm": 1.360406659355725, + "learning_rate": 1.1871335257903627e-07, + "loss": 0.5162, + "step": 55116 + }, + { + "epoch": 0.9523949405584738, + "grad_norm": 1.5087373404700675, + "learning_rate": 1.1862738988808853e-07, + "loss": 0.3864, + "step": 55117 + }, + { + "epoch": 0.9524122200718828, + "grad_norm": 1.0924950681094205, + "learning_rate": 1.1854145814635065e-07, + "loss": 0.4312, + "step": 55118 + }, + { + "epoch": 0.9524294995852917, + "grad_norm": 0.8730908802142524, + "learning_rate": 1.1845555735409132e-07, + "loss": 0.2112, + "step": 55119 + }, + { + "epoch": 0.9524467790987006, + "grad_norm": 1.429336932633173, + "learning_rate": 1.1836968751158029e-07, + "loss": 0.361, + "step": 55120 + }, + { + "epoch": 0.9524640586121095, + "grad_norm": 0.9811291961220883, + "learning_rate": 1.1828384861908626e-07, + "loss": 0.5027, + "step": 55121 + }, + { + "epoch": 0.9524813381255184, + "grad_norm": 1.2914625125569776, + "learning_rate": 1.1819804067687901e-07, + "loss": 0.4675, + "step": 55122 + }, + { + "epoch": 0.9524986176389273, + "grad_norm": 2.125859436113785, + "learning_rate": 1.181122636852261e-07, + "loss": 0.5203, + "step": 55123 + }, + { + "epoch": 0.9525158971523362, + "grad_norm": 1.1018190495646676, + "learning_rate": 1.1802651764439732e-07, + "loss": 0.2402, + "step": 55124 + }, + { + "epoch": 0.9525331766657451, + "grad_norm": 1.4928568788276002, + "learning_rate": 1.1794080255465911e-07, + "loss": 0.3112, + "step": 55125 + }, + { + "epoch": 0.952550456179154, + "grad_norm": 1.4376772768067507, + "learning_rate": 1.1785511841628127e-07, + "loss": 0.326, + "step": 55126 + }, + { + "epoch": 0.9525677356925629, + "grad_norm": 2.1940079032932043, + "learning_rate": 1.1776946522953248e-07, + "loss": 0.3902, + "step": 55127 + }, + { + "epoch": 0.9525850152059718, + "grad_norm": 1.0222267581316296, + "learning_rate": 1.1768384299468138e-07, + "loss": 0.3613, + "step": 55128 + }, + { + "epoch": 0.9526022947193807, + "grad_norm": 1.2712611031918593, + "learning_rate": 1.1759825171199557e-07, + "loss": 0.3119, + "step": 55129 + }, + { + "epoch": 0.9526195742327896, + "grad_norm": 1.5901770981852765, + "learning_rate": 1.1751269138174259e-07, + "loss": 0.3037, + "step": 55130 + }, + { + "epoch": 0.9526368537461986, + "grad_norm": 1.1455398099599325, + "learning_rate": 1.1742716200419113e-07, + "loss": 0.3446, + "step": 55131 + }, + { + "epoch": 0.9526541332596075, + "grad_norm": 1.866695950100516, + "learning_rate": 1.1734166357960874e-07, + "loss": 0.3603, + "step": 55132 + }, + { + "epoch": 0.9526714127730163, + "grad_norm": 1.3427400770335387, + "learning_rate": 1.17256196108263e-07, + "loss": 0.3703, + "step": 55133 + }, + { + "epoch": 0.9526886922864252, + "grad_norm": 1.320644051006026, + "learning_rate": 1.1717075959042257e-07, + "loss": 0.2688, + "step": 55134 + }, + { + "epoch": 0.9527059717998341, + "grad_norm": 1.330886426767931, + "learning_rate": 1.170853540263539e-07, + "loss": 0.2692, + "step": 55135 + }, + { + "epoch": 0.952723251313243, + "grad_norm": 1.7723651926831963, + "learning_rate": 1.1699997941632569e-07, + "loss": 0.3459, + "step": 55136 + }, + { + "epoch": 0.9527405308266519, + "grad_norm": 1.1472447338409637, + "learning_rate": 1.1691463576060435e-07, + "loss": 0.2592, + "step": 55137 + }, + { + "epoch": 0.9527578103400608, + "grad_norm": 1.2712754887390658, + "learning_rate": 1.1682932305945638e-07, + "loss": 0.2867, + "step": 55138 + }, + { + "epoch": 0.9527750898534697, + "grad_norm": 1.8330852094289698, + "learning_rate": 1.1674404131315043e-07, + "loss": 0.2479, + "step": 55139 + }, + { + "epoch": 0.9527923693668786, + "grad_norm": 1.3494521438229885, + "learning_rate": 1.1665879052195406e-07, + "loss": 0.6118, + "step": 55140 + }, + { + "epoch": 0.9528096488802875, + "grad_norm": 1.3517087887850414, + "learning_rate": 1.1657357068613373e-07, + "loss": 0.7877, + "step": 55141 + }, + { + "epoch": 0.9528269283936964, + "grad_norm": 1.7946583198809563, + "learning_rate": 1.1648838180595478e-07, + "loss": 0.2942, + "step": 55142 + }, + { + "epoch": 0.9528442079071053, + "grad_norm": 0.8789777908986105, + "learning_rate": 1.164032238816859e-07, + "loss": 0.3338, + "step": 55143 + }, + { + "epoch": 0.9528614874205142, + "grad_norm": 1.712511478167945, + "learning_rate": 1.1631809691359352e-07, + "loss": 0.3946, + "step": 55144 + }, + { + "epoch": 0.9528787669339231, + "grad_norm": 1.0537622010650303, + "learning_rate": 1.16233000901943e-07, + "loss": 0.5545, + "step": 55145 + }, + { + "epoch": 0.9528960464473321, + "grad_norm": 1.316637425904501, + "learning_rate": 1.1614793584700301e-07, + "loss": 0.2704, + "step": 55146 + }, + { + "epoch": 0.952913325960741, + "grad_norm": 1.1935168110595618, + "learning_rate": 1.160629017490389e-07, + "loss": 0.4502, + "step": 55147 + }, + { + "epoch": 0.9529306054741499, + "grad_norm": 1.2341156347397504, + "learning_rate": 1.159778986083171e-07, + "loss": 0.3525, + "step": 55148 + }, + { + "epoch": 0.9529478849875588, + "grad_norm": 1.7030413461104374, + "learning_rate": 1.1589292642510297e-07, + "loss": 0.3166, + "step": 55149 + }, + { + "epoch": 0.9529651645009677, + "grad_norm": 1.3897295298680703, + "learning_rate": 1.1580798519966296e-07, + "loss": 0.8672, + "step": 55150 + }, + { + "epoch": 0.9529824440143766, + "grad_norm": 2.866579224210249, + "learning_rate": 1.1572307493226353e-07, + "loss": 0.2515, + "step": 55151 + }, + { + "epoch": 0.9529997235277855, + "grad_norm": 1.4379410711109681, + "learning_rate": 1.1563819562317114e-07, + "loss": 0.2473, + "step": 55152 + }, + { + "epoch": 0.9530170030411944, + "grad_norm": 1.7381190686637684, + "learning_rate": 1.1555334727265111e-07, + "loss": 0.3476, + "step": 55153 + }, + { + "epoch": 0.9530342825546032, + "grad_norm": 1.4439367466603394, + "learning_rate": 1.154685298809699e-07, + "loss": 0.3392, + "step": 55154 + }, + { + "epoch": 0.9530515620680121, + "grad_norm": 1.1033381847503092, + "learning_rate": 1.1538374344839176e-07, + "loss": 0.3518, + "step": 55155 + }, + { + "epoch": 0.953068841581421, + "grad_norm": 1.3372303759886537, + "learning_rate": 1.1529898797518202e-07, + "loss": 0.4083, + "step": 55156 + }, + { + "epoch": 0.9530861210948299, + "grad_norm": 1.1015063334445019, + "learning_rate": 1.1521426346160825e-07, + "loss": 0.2546, + "step": 55157 + }, + { + "epoch": 0.9531034006082388, + "grad_norm": 1.466147708278363, + "learning_rate": 1.1512956990793467e-07, + "loss": 0.4992, + "step": 55158 + }, + { + "epoch": 0.9531206801216477, + "grad_norm": 1.9381746279535068, + "learning_rate": 1.1504490731442664e-07, + "loss": 0.3923, + "step": 55159 + }, + { + "epoch": 0.9531379596350567, + "grad_norm": 1.0940121124431066, + "learning_rate": 1.1496027568134838e-07, + "loss": 0.3647, + "step": 55160 + }, + { + "epoch": 0.9531552391484656, + "grad_norm": 0.6552954252593798, + "learning_rate": 1.1487567500896524e-07, + "loss": 0.5379, + "step": 55161 + }, + { + "epoch": 0.9531725186618745, + "grad_norm": 1.5543027671427634, + "learning_rate": 1.1479110529754366e-07, + "loss": 0.1608, + "step": 55162 + }, + { + "epoch": 0.9531897981752834, + "grad_norm": 0.9078048857867783, + "learning_rate": 1.1470656654734791e-07, + "loss": 0.2, + "step": 55163 + }, + { + "epoch": 0.9532070776886923, + "grad_norm": 1.3085033043025462, + "learning_rate": 1.1462205875864219e-07, + "loss": 0.275, + "step": 55164 + }, + { + "epoch": 0.9532243572021012, + "grad_norm": 1.326830352141726, + "learning_rate": 1.1453758193169073e-07, + "loss": 0.4463, + "step": 55165 + }, + { + "epoch": 0.9532416367155101, + "grad_norm": 1.1280536597734054, + "learning_rate": 1.1445313606676e-07, + "loss": 0.4184, + "step": 55166 + }, + { + "epoch": 0.953258916228919, + "grad_norm": 1.2216770475947325, + "learning_rate": 1.1436872116411202e-07, + "loss": 0.3322, + "step": 55167 + }, + { + "epoch": 0.9532761957423279, + "grad_norm": 1.0266687735682052, + "learning_rate": 1.1428433722401323e-07, + "loss": 0.2958, + "step": 55168 + }, + { + "epoch": 0.9532934752557368, + "grad_norm": 1.0135504980901553, + "learning_rate": 1.1419998424672673e-07, + "loss": 0.2753, + "step": 55169 + }, + { + "epoch": 0.9533107547691457, + "grad_norm": 1.4627176010787475, + "learning_rate": 1.1411566223251902e-07, + "loss": 0.3108, + "step": 55170 + }, + { + "epoch": 0.9533280342825546, + "grad_norm": 0.9595331876158828, + "learning_rate": 1.1403137118164986e-07, + "loss": 0.2679, + "step": 55171 + }, + { + "epoch": 0.9533453137959635, + "grad_norm": 0.5575766693754263, + "learning_rate": 1.1394711109438683e-07, + "loss": 0.6117, + "step": 55172 + }, + { + "epoch": 0.9533625933093725, + "grad_norm": 1.3908205940472158, + "learning_rate": 1.1386288197099304e-07, + "loss": 0.4019, + "step": 55173 + }, + { + "epoch": 0.9533798728227814, + "grad_norm": 1.2417148259232647, + "learning_rate": 1.1377868381173162e-07, + "loss": 0.196, + "step": 55174 + }, + { + "epoch": 0.9533971523361903, + "grad_norm": 1.3153980432189187, + "learning_rate": 1.1369451661686681e-07, + "loss": 0.4852, + "step": 55175 + }, + { + "epoch": 0.9534144318495991, + "grad_norm": 1.2754240298499107, + "learning_rate": 1.1361038038666172e-07, + "loss": 0.5163, + "step": 55176 + }, + { + "epoch": 0.953431711363008, + "grad_norm": 1.1101217990012124, + "learning_rate": 1.135262751213817e-07, + "loss": 0.5549, + "step": 55177 + }, + { + "epoch": 0.9534489908764169, + "grad_norm": 0.9188756674740733, + "learning_rate": 1.1344220082128765e-07, + "loss": 0.2148, + "step": 55178 + }, + { + "epoch": 0.9534662703898258, + "grad_norm": 1.3106112597949662, + "learning_rate": 1.1335815748664269e-07, + "loss": 0.3001, + "step": 55179 + }, + { + "epoch": 0.9534835499032347, + "grad_norm": 2.186982421900737, + "learning_rate": 1.1327414511771329e-07, + "loss": 0.2818, + "step": 55180 + }, + { + "epoch": 0.9535008294166436, + "grad_norm": 1.3689280079651254, + "learning_rate": 1.1319016371476033e-07, + "loss": 0.4228, + "step": 55181 + }, + { + "epoch": 0.9535181089300525, + "grad_norm": 1.214662191405127, + "learning_rate": 1.1310621327804583e-07, + "loss": 0.2924, + "step": 55182 + }, + { + "epoch": 0.9535353884434614, + "grad_norm": 1.2654491269472357, + "learning_rate": 1.1302229380783624e-07, + "loss": 0.3409, + "step": 55183 + }, + { + "epoch": 0.9535526679568703, + "grad_norm": 2.170825872909646, + "learning_rate": 1.1293840530439026e-07, + "loss": 0.2415, + "step": 55184 + }, + { + "epoch": 0.9535699474702792, + "grad_norm": 1.4079547054477886, + "learning_rate": 1.1285454776797322e-07, + "loss": 0.2705, + "step": 55185 + }, + { + "epoch": 0.9535872269836881, + "grad_norm": 1.3163716668486107, + "learning_rate": 1.1277072119884713e-07, + "loss": 0.3467, + "step": 55186 + }, + { + "epoch": 0.953604506497097, + "grad_norm": 1.7459933026679568, + "learning_rate": 1.1268692559727512e-07, + "loss": 0.4763, + "step": 55187 + }, + { + "epoch": 0.953621786010506, + "grad_norm": 1.2136084131554485, + "learning_rate": 1.1260316096351809e-07, + "loss": 0.4799, + "step": 55188 + }, + { + "epoch": 0.9536390655239149, + "grad_norm": 1.875625778342529, + "learning_rate": 1.1251942729784027e-07, + "loss": 0.2454, + "step": 55189 + }, + { + "epoch": 0.9536563450373238, + "grad_norm": 0.961601230390735, + "learning_rate": 1.1243572460050256e-07, + "loss": 0.3759, + "step": 55190 + }, + { + "epoch": 0.9536736245507327, + "grad_norm": 2.0264806551027754, + "learning_rate": 1.1235205287176699e-07, + "loss": 0.4598, + "step": 55191 + }, + { + "epoch": 0.9536909040641416, + "grad_norm": 1.1613201257307115, + "learning_rate": 1.1226841211189665e-07, + "loss": 0.2375, + "step": 55192 + }, + { + "epoch": 0.9537081835775505, + "grad_norm": 0.8362624547428161, + "learning_rate": 1.1218480232115358e-07, + "loss": 0.8697, + "step": 55193 + }, + { + "epoch": 0.9537254630909594, + "grad_norm": 1.1654269063921732, + "learning_rate": 1.1210122349979979e-07, + "loss": 0.325, + "step": 55194 + }, + { + "epoch": 0.9537427426043683, + "grad_norm": 1.1836186264708533, + "learning_rate": 1.1201767564809507e-07, + "loss": 0.4865, + "step": 55195 + }, + { + "epoch": 0.9537600221177772, + "grad_norm": 1.4827379452856824, + "learning_rate": 1.1193415876630253e-07, + "loss": 0.2435, + "step": 55196 + }, + { + "epoch": 0.953777301631186, + "grad_norm": 0.8683921846817225, + "learning_rate": 1.1185067285468421e-07, + "loss": 0.1805, + "step": 55197 + }, + { + "epoch": 0.9537945811445949, + "grad_norm": 0.6593171208935674, + "learning_rate": 1.1176721791350098e-07, + "loss": 0.5796, + "step": 55198 + }, + { + "epoch": 0.9538118606580038, + "grad_norm": 1.0838986084395614, + "learning_rate": 1.1168379394301376e-07, + "loss": 0.5857, + "step": 55199 + }, + { + "epoch": 0.9538291401714127, + "grad_norm": 0.9422213141055465, + "learning_rate": 1.1160040094348456e-07, + "loss": 0.2838, + "step": 55200 + }, + { + "epoch": 0.9538464196848216, + "grad_norm": 1.4083486832591563, + "learning_rate": 1.115170389151754e-07, + "loss": 0.3193, + "step": 55201 + }, + { + "epoch": 0.9538636991982306, + "grad_norm": 1.824931796905018, + "learning_rate": 1.1143370785834495e-07, + "loss": 0.1093, + "step": 55202 + }, + { + "epoch": 0.9538809787116395, + "grad_norm": 1.510123965718104, + "learning_rate": 1.1135040777325635e-07, + "loss": 0.3815, + "step": 55203 + }, + { + "epoch": 0.9538982582250484, + "grad_norm": 1.3911492068257747, + "learning_rate": 1.1126713866016936e-07, + "loss": 0.2705, + "step": 55204 + }, + { + "epoch": 0.9539155377384573, + "grad_norm": 1.8186428858996093, + "learning_rate": 1.1118390051934602e-07, + "loss": 0.5602, + "step": 55205 + }, + { + "epoch": 0.9539328172518662, + "grad_norm": 0.9185547955889248, + "learning_rate": 1.1110069335104612e-07, + "loss": 0.3988, + "step": 55206 + }, + { + "epoch": 0.9539500967652751, + "grad_norm": 1.9053350425718205, + "learning_rate": 1.1101751715552944e-07, + "loss": 0.3189, + "step": 55207 + }, + { + "epoch": 0.953967376278684, + "grad_norm": 1.195407330995719, + "learning_rate": 1.1093437193305911e-07, + "loss": 0.2809, + "step": 55208 + }, + { + "epoch": 0.9539846557920929, + "grad_norm": 1.1191009381792307, + "learning_rate": 1.108512576838916e-07, + "loss": 0.3665, + "step": 55209 + }, + { + "epoch": 0.9540019353055018, + "grad_norm": 1.6021653101116167, + "learning_rate": 1.1076817440829113e-07, + "loss": 0.4833, + "step": 55210 + }, + { + "epoch": 0.9540192148189107, + "grad_norm": 1.2801718226118122, + "learning_rate": 1.1068512210651639e-07, + "loss": 0.5208, + "step": 55211 + }, + { + "epoch": 0.9540364943323196, + "grad_norm": 1.204434909638898, + "learning_rate": 1.1060210077882605e-07, + "loss": 0.3986, + "step": 55212 + }, + { + "epoch": 0.9540537738457285, + "grad_norm": 0.9746065992366195, + "learning_rate": 1.1051911042548213e-07, + "loss": 0.3056, + "step": 55213 + }, + { + "epoch": 0.9540710533591374, + "grad_norm": 1.417849532074451, + "learning_rate": 1.1043615104674443e-07, + "loss": 0.2139, + "step": 55214 + }, + { + "epoch": 0.9540883328725464, + "grad_norm": 0.8168892345161042, + "learning_rate": 1.1035322264287162e-07, + "loss": 0.1666, + "step": 55215 + }, + { + "epoch": 0.9541056123859553, + "grad_norm": 1.9070770766464875, + "learning_rate": 1.1027032521412573e-07, + "loss": 0.352, + "step": 55216 + }, + { + "epoch": 0.9541228918993642, + "grad_norm": 1.6991606137550186, + "learning_rate": 1.101874587607632e-07, + "loss": 0.4751, + "step": 55217 + }, + { + "epoch": 0.954140171412773, + "grad_norm": 1.3335681006853704, + "learning_rate": 1.1010462328304494e-07, + "loss": 0.2785, + "step": 55218 + }, + { + "epoch": 0.9541574509261819, + "grad_norm": 1.3412227851188074, + "learning_rate": 1.1002181878123075e-07, + "loss": 0.1526, + "step": 55219 + }, + { + "epoch": 0.9541747304395908, + "grad_norm": 2.2130482398532365, + "learning_rate": 1.099390452555804e-07, + "loss": 0.3367, + "step": 55220 + }, + { + "epoch": 0.9541920099529997, + "grad_norm": 1.2256384246771401, + "learning_rate": 1.0985630270635262e-07, + "loss": 0.3423, + "step": 55221 + }, + { + "epoch": 0.9542092894664086, + "grad_norm": 1.1709995465076852, + "learning_rate": 1.0977359113380603e-07, + "loss": 0.2286, + "step": 55222 + }, + { + "epoch": 0.9542265689798175, + "grad_norm": 0.9458888534055331, + "learning_rate": 1.0969091053820158e-07, + "loss": 0.6839, + "step": 55223 + }, + { + "epoch": 0.9542438484932264, + "grad_norm": 0.9687278532174702, + "learning_rate": 1.096082609197946e-07, + "loss": 0.1697, + "step": 55224 + }, + { + "epoch": 0.9542611280066353, + "grad_norm": 0.8763619412430604, + "learning_rate": 1.095256422788471e-07, + "loss": 0.3298, + "step": 55225 + }, + { + "epoch": 0.9542784075200442, + "grad_norm": 1.236694054085384, + "learning_rate": 1.0944305461561777e-07, + "loss": 0.4689, + "step": 55226 + }, + { + "epoch": 0.9542956870334531, + "grad_norm": 1.2943462082959873, + "learning_rate": 1.0936049793036308e-07, + "loss": 0.3076, + "step": 55227 + }, + { + "epoch": 0.954312966546862, + "grad_norm": 1.3019932447275502, + "learning_rate": 1.0927797222334502e-07, + "loss": 0.3195, + "step": 55228 + }, + { + "epoch": 0.954330246060271, + "grad_norm": 1.2323062053708789, + "learning_rate": 1.0919547749481784e-07, + "loss": 0.2584, + "step": 55229 + }, + { + "epoch": 0.9543475255736799, + "grad_norm": 1.7187410293079717, + "learning_rate": 1.0911301374504246e-07, + "loss": 0.3984, + "step": 55230 + }, + { + "epoch": 0.9543648050870888, + "grad_norm": 1.8605173561410904, + "learning_rate": 1.0903058097427755e-07, + "loss": 0.4434, + "step": 55231 + }, + { + "epoch": 0.9543820846004977, + "grad_norm": 0.7749977596240735, + "learning_rate": 1.0894817918278066e-07, + "loss": 0.316, + "step": 55232 + }, + { + "epoch": 0.9543993641139066, + "grad_norm": 1.63162456538501, + "learning_rate": 1.0886580837080941e-07, + "loss": 0.5134, + "step": 55233 + }, + { + "epoch": 0.9544166436273155, + "grad_norm": 1.4770291444112038, + "learning_rate": 1.0878346853862243e-07, + "loss": 0.279, + "step": 55234 + }, + { + "epoch": 0.9544339231407244, + "grad_norm": 1.0813425603890303, + "learning_rate": 1.0870115968647621e-07, + "loss": 0.5035, + "step": 55235 + }, + { + "epoch": 0.9544512026541333, + "grad_norm": 1.1163656304960234, + "learning_rate": 1.0861888181463054e-07, + "loss": 0.3464, + "step": 55236 + }, + { + "epoch": 0.9544684821675422, + "grad_norm": 1.1960073251649037, + "learning_rate": 1.0853663492334299e-07, + "loss": 0.5717, + "step": 55237 + }, + { + "epoch": 0.9544857616809511, + "grad_norm": 0.8986521844746446, + "learning_rate": 1.084544190128689e-07, + "loss": 0.4868, + "step": 55238 + }, + { + "epoch": 0.9545030411943599, + "grad_norm": 1.4220963071786616, + "learning_rate": 1.0837223408346808e-07, + "loss": 0.371, + "step": 55239 + }, + { + "epoch": 0.9545203207077688, + "grad_norm": 1.100196643146298, + "learning_rate": 1.08290080135397e-07, + "loss": 0.2056, + "step": 55240 + }, + { + "epoch": 0.9545376002211777, + "grad_norm": 1.6074220587435037, + "learning_rate": 1.082079571689132e-07, + "loss": 0.3474, + "step": 55241 + }, + { + "epoch": 0.9545548797345866, + "grad_norm": 1.580039536643276, + "learning_rate": 1.0812586518427315e-07, + "loss": 0.3163, + "step": 55242 + }, + { + "epoch": 0.9545721592479955, + "grad_norm": 1.3272071201683617, + "learning_rate": 1.0804380418173665e-07, + "loss": 0.4704, + "step": 55243 + }, + { + "epoch": 0.9545894387614045, + "grad_norm": 1.5033516950612016, + "learning_rate": 1.0796177416155684e-07, + "loss": 0.343, + "step": 55244 + }, + { + "epoch": 0.9546067182748134, + "grad_norm": 1.2858914442616263, + "learning_rate": 1.078797751239935e-07, + "loss": 0.3429, + "step": 55245 + }, + { + "epoch": 0.9546239977882223, + "grad_norm": 1.57981985477086, + "learning_rate": 1.0779780706930198e-07, + "loss": 0.3139, + "step": 55246 + }, + { + "epoch": 0.9546412773016312, + "grad_norm": 2.2691626346501224, + "learning_rate": 1.0771586999773875e-07, + "loss": 0.2878, + "step": 55247 + }, + { + "epoch": 0.9546585568150401, + "grad_norm": 1.3156284759301677, + "learning_rate": 1.0763396390956249e-07, + "loss": 0.396, + "step": 55248 + }, + { + "epoch": 0.954675836328449, + "grad_norm": 1.687803779344764, + "learning_rate": 1.0755208880502854e-07, + "loss": 0.3558, + "step": 55249 + }, + { + "epoch": 0.9546931158418579, + "grad_norm": 1.419735717216758, + "learning_rate": 1.0747024468439227e-07, + "loss": 0.3424, + "step": 55250 + }, + { + "epoch": 0.9547103953552668, + "grad_norm": 1.0380815784763728, + "learning_rate": 1.0738843154791123e-07, + "loss": 0.4839, + "step": 55251 + }, + { + "epoch": 0.9547276748686757, + "grad_norm": 1.6043946251905103, + "learning_rate": 1.073066493958419e-07, + "loss": 0.2639, + "step": 55252 + }, + { + "epoch": 0.9547449543820846, + "grad_norm": 1.145179033448705, + "learning_rate": 1.0722489822843962e-07, + "loss": 0.1836, + "step": 55253 + }, + { + "epoch": 0.9547622338954935, + "grad_norm": 1.2039482474670125, + "learning_rate": 1.0714317804596086e-07, + "loss": 0.3693, + "step": 55254 + }, + { + "epoch": 0.9547795134089024, + "grad_norm": 1.3192255759773654, + "learning_rate": 1.0706148884866207e-07, + "loss": 0.2454, + "step": 55255 + }, + { + "epoch": 0.9547967929223113, + "grad_norm": 1.8524921671344075, + "learning_rate": 1.069798306367975e-07, + "loss": 0.4077, + "step": 55256 + }, + { + "epoch": 0.9548140724357203, + "grad_norm": 2.4292816611681425, + "learning_rate": 1.0689820341062473e-07, + "loss": 0.2967, + "step": 55257 + }, + { + "epoch": 0.9548313519491292, + "grad_norm": 1.7555170849705433, + "learning_rate": 1.068166071703991e-07, + "loss": 0.7651, + "step": 55258 + }, + { + "epoch": 0.9548486314625381, + "grad_norm": 0.8906297942767717, + "learning_rate": 1.0673504191637374e-07, + "loss": 0.2718, + "step": 55259 + }, + { + "epoch": 0.9548659109759469, + "grad_norm": 1.774486133520761, + "learning_rate": 1.0665350764880733e-07, + "loss": 0.2181, + "step": 55260 + }, + { + "epoch": 0.9548831904893558, + "grad_norm": 1.3613377957531883, + "learning_rate": 1.0657200436795411e-07, + "loss": 0.4202, + "step": 55261 + }, + { + "epoch": 0.9549004700027647, + "grad_norm": 1.9165361688683127, + "learning_rate": 1.0649053207406945e-07, + "loss": 0.226, + "step": 55262 + }, + { + "epoch": 0.9549177495161736, + "grad_norm": 1.7466231984926985, + "learning_rate": 1.0640909076740868e-07, + "loss": 0.3726, + "step": 55263 + }, + { + "epoch": 0.9549350290295825, + "grad_norm": 1.124435063159067, + "learning_rate": 1.0632768044822494e-07, + "loss": 0.345, + "step": 55264 + }, + { + "epoch": 0.9549523085429914, + "grad_norm": 1.2923509613133808, + "learning_rate": 1.062463011167758e-07, + "loss": 0.2346, + "step": 55265 + }, + { + "epoch": 0.9549695880564003, + "grad_norm": 1.3259857532840629, + "learning_rate": 1.061649527733144e-07, + "loss": 0.3067, + "step": 55266 + }, + { + "epoch": 0.9549868675698092, + "grad_norm": 1.1901592728022528, + "learning_rate": 1.0608363541809718e-07, + "loss": 0.3879, + "step": 55267 + }, + { + "epoch": 0.9550041470832181, + "grad_norm": 1.463274520723646, + "learning_rate": 1.060023490513784e-07, + "loss": 0.2641, + "step": 55268 + }, + { + "epoch": 0.955021426596627, + "grad_norm": 1.655343335354009, + "learning_rate": 1.059210936734112e-07, + "loss": 0.2589, + "step": 55269 + }, + { + "epoch": 0.9550387061100359, + "grad_norm": 1.187450710147505, + "learning_rate": 1.058398692844509e-07, + "loss": 0.8932, + "step": 55270 + }, + { + "epoch": 0.9550559856234448, + "grad_norm": 1.6483646097778657, + "learning_rate": 1.0575867588475175e-07, + "loss": 0.2643, + "step": 55271 + }, + { + "epoch": 0.9550732651368538, + "grad_norm": 1.4737025403670077, + "learning_rate": 1.0567751347457023e-07, + "loss": 0.5126, + "step": 55272 + }, + { + "epoch": 0.9550905446502627, + "grad_norm": 1.3986620541790586, + "learning_rate": 1.0559638205415723e-07, + "loss": 0.2997, + "step": 55273 + }, + { + "epoch": 0.9551078241636716, + "grad_norm": 0.9855291546190589, + "learning_rate": 1.0551528162376811e-07, + "loss": 0.433, + "step": 55274 + }, + { + "epoch": 0.9551251036770805, + "grad_norm": 1.1928716416747562, + "learning_rate": 1.0543421218365824e-07, + "loss": 0.4813, + "step": 55275 + }, + { + "epoch": 0.9551423831904894, + "grad_norm": 1.5784258968171798, + "learning_rate": 1.0535317373407849e-07, + "loss": 0.2582, + "step": 55276 + }, + { + "epoch": 0.9551596627038983, + "grad_norm": 1.2878256820031901, + "learning_rate": 1.0527216627528647e-07, + "loss": 0.1971, + "step": 55277 + }, + { + "epoch": 0.9551769422173072, + "grad_norm": 1.945473204051331, + "learning_rate": 1.0519118980753306e-07, + "loss": 0.1919, + "step": 55278 + }, + { + "epoch": 0.9551942217307161, + "grad_norm": 1.8694134103996274, + "learning_rate": 1.0511024433107364e-07, + "loss": 0.3704, + "step": 55279 + }, + { + "epoch": 0.955211501244125, + "grad_norm": 0.8837293141252897, + "learning_rate": 1.0502932984616021e-07, + "loss": 0.2681, + "step": 55280 + }, + { + "epoch": 0.9552287807575338, + "grad_norm": 0.7186925419474522, + "learning_rate": 1.0494844635304702e-07, + "loss": 0.8663, + "step": 55281 + }, + { + "epoch": 0.9552460602709427, + "grad_norm": 1.064153640252453, + "learning_rate": 1.0486759385198608e-07, + "loss": 0.2678, + "step": 55282 + }, + { + "epoch": 0.9552633397843516, + "grad_norm": 0.9227479113543208, + "learning_rate": 1.0478677234323386e-07, + "loss": 0.1608, + "step": 55283 + }, + { + "epoch": 0.9552806192977605, + "grad_norm": 1.3816801123594724, + "learning_rate": 1.0470598182704017e-07, + "loss": 0.5203, + "step": 55284 + }, + { + "epoch": 0.9552978988111694, + "grad_norm": 0.7152642001749251, + "learning_rate": 1.0462522230366034e-07, + "loss": 0.7219, + "step": 55285 + }, + { + "epoch": 0.9553151783245784, + "grad_norm": 1.6219228652787923, + "learning_rate": 1.045444937733453e-07, + "loss": 0.2638, + "step": 55286 + }, + { + "epoch": 0.9553324578379873, + "grad_norm": 0.9399346245344136, + "learning_rate": 1.0446379623634927e-07, + "loss": 0.6175, + "step": 55287 + }, + { + "epoch": 0.9553497373513962, + "grad_norm": 1.4810649694070184, + "learning_rate": 1.0438312969292541e-07, + "loss": 0.2493, + "step": 55288 + }, + { + "epoch": 0.9553670168648051, + "grad_norm": 1.7642221934449516, + "learning_rate": 1.0430249414332571e-07, + "loss": 0.4964, + "step": 55289 + }, + { + "epoch": 0.955384296378214, + "grad_norm": 2.704338966217012, + "learning_rate": 1.0422188958780222e-07, + "loss": 0.2614, + "step": 55290 + }, + { + "epoch": 0.9554015758916229, + "grad_norm": 0.9667671379536068, + "learning_rate": 1.0414131602660693e-07, + "loss": 0.3203, + "step": 55291 + }, + { + "epoch": 0.9554188554050318, + "grad_norm": 2.2152784301037305, + "learning_rate": 1.040607734599941e-07, + "loss": 0.3258, + "step": 55292 + }, + { + "epoch": 0.9554361349184407, + "grad_norm": 1.383983598473412, + "learning_rate": 1.0398026188821353e-07, + "loss": 0.3512, + "step": 55293 + }, + { + "epoch": 0.9554534144318496, + "grad_norm": 1.820687154731003, + "learning_rate": 1.0389978131152057e-07, + "loss": 0.3538, + "step": 55294 + }, + { + "epoch": 0.9554706939452585, + "grad_norm": 1.7005204900030058, + "learning_rate": 1.0381933173016501e-07, + "loss": 0.2879, + "step": 55295 + }, + { + "epoch": 0.9554879734586674, + "grad_norm": 1.5065802260474748, + "learning_rate": 1.037389131444e-07, + "loss": 0.4115, + "step": 55296 + }, + { + "epoch": 0.9555052529720763, + "grad_norm": 1.507441953179665, + "learning_rate": 1.0365852555447642e-07, + "loss": 0.2041, + "step": 55297 + }, + { + "epoch": 0.9555225324854852, + "grad_norm": 1.239983320915975, + "learning_rate": 1.0357816896064631e-07, + "loss": 0.3778, + "step": 55298 + }, + { + "epoch": 0.9555398119988942, + "grad_norm": 1.9556236451078144, + "learning_rate": 1.034978433631606e-07, + "loss": 0.2908, + "step": 55299 + }, + { + "epoch": 0.9555570915123031, + "grad_norm": 1.4454888686497454, + "learning_rate": 1.0341754876227239e-07, + "loss": 0.4652, + "step": 55300 + }, + { + "epoch": 0.955574371025712, + "grad_norm": 1.6423847452731246, + "learning_rate": 1.0333728515823372e-07, + "loss": 0.4226, + "step": 55301 + }, + { + "epoch": 0.9555916505391209, + "grad_norm": 1.2655173315678214, + "learning_rate": 1.0325705255129326e-07, + "loss": 0.2636, + "step": 55302 + }, + { + "epoch": 0.9556089300525297, + "grad_norm": 1.276130764769277, + "learning_rate": 1.0317685094170415e-07, + "loss": 0.199, + "step": 55303 + }, + { + "epoch": 0.9556262095659386, + "grad_norm": 1.769636064917593, + "learning_rate": 1.0309668032971732e-07, + "loss": 0.2638, + "step": 55304 + }, + { + "epoch": 0.9556434890793475, + "grad_norm": 1.3851294430447216, + "learning_rate": 1.0301654071558365e-07, + "loss": 0.5634, + "step": 55305 + }, + { + "epoch": 0.9556607685927564, + "grad_norm": 1.40438193298238, + "learning_rate": 1.0293643209955407e-07, + "loss": 0.7073, + "step": 55306 + }, + { + "epoch": 0.9556780481061653, + "grad_norm": 2.958674435196663, + "learning_rate": 1.028563544818817e-07, + "loss": 0.262, + "step": 55307 + }, + { + "epoch": 0.9556953276195742, + "grad_norm": 0.9046640375856863, + "learning_rate": 1.0277630786281412e-07, + "loss": 0.2156, + "step": 55308 + }, + { + "epoch": 0.9557126071329831, + "grad_norm": 2.0767396824549396, + "learning_rate": 1.0269629224260225e-07, + "loss": 0.4014, + "step": 55309 + }, + { + "epoch": 0.955729886646392, + "grad_norm": 1.1072262531146435, + "learning_rate": 1.0261630762149921e-07, + "loss": 0.2105, + "step": 55310 + }, + { + "epoch": 0.9557471661598009, + "grad_norm": 2.085193661076059, + "learning_rate": 1.0253635399975259e-07, + "loss": 0.313, + "step": 55311 + }, + { + "epoch": 0.9557644456732098, + "grad_norm": 1.1265484056020008, + "learning_rate": 1.0245643137761552e-07, + "loss": 0.3086, + "step": 55312 + }, + { + "epoch": 0.9557817251866187, + "grad_norm": 1.4365181818969694, + "learning_rate": 1.0237653975533668e-07, + "loss": 0.2286, + "step": 55313 + }, + { + "epoch": 0.9557990047000277, + "grad_norm": 1.1665165517100857, + "learning_rate": 1.0229667913316699e-07, + "loss": 0.1932, + "step": 55314 + }, + { + "epoch": 0.9558162842134366, + "grad_norm": 1.528080709453203, + "learning_rate": 1.0221684951135624e-07, + "loss": 0.4464, + "step": 55315 + }, + { + "epoch": 0.9558335637268455, + "grad_norm": 1.7842705713160658, + "learning_rate": 1.0213705089015425e-07, + "loss": 0.3565, + "step": 55316 + }, + { + "epoch": 0.9558508432402544, + "grad_norm": 1.1889382501102166, + "learning_rate": 1.020572832698108e-07, + "loss": 0.5722, + "step": 55317 + }, + { + "epoch": 0.9558681227536633, + "grad_norm": 1.7307786035477613, + "learning_rate": 1.0197754665057679e-07, + "loss": 0.3352, + "step": 55318 + }, + { + "epoch": 0.9558854022670722, + "grad_norm": 1.7052919542447442, + "learning_rate": 1.0189784103270206e-07, + "loss": 0.2782, + "step": 55319 + }, + { + "epoch": 0.9559026817804811, + "grad_norm": 0.9282032002246772, + "learning_rate": 1.0181816641643417e-07, + "loss": 0.3429, + "step": 55320 + }, + { + "epoch": 0.95591996129389, + "grad_norm": 1.3670302361903195, + "learning_rate": 1.0173852280202512e-07, + "loss": 0.4343, + "step": 55321 + }, + { + "epoch": 0.9559372408072989, + "grad_norm": 1.422901576460108, + "learning_rate": 1.0165891018972252e-07, + "loss": 0.4564, + "step": 55322 + }, + { + "epoch": 0.9559545203207078, + "grad_norm": 1.832828605569773, + "learning_rate": 1.0157932857977614e-07, + "loss": 0.3964, + "step": 55323 + }, + { + "epoch": 0.9559717998341166, + "grad_norm": 0.9723124378015676, + "learning_rate": 1.0149977797243582e-07, + "loss": 0.3636, + "step": 55324 + }, + { + "epoch": 0.9559890793475255, + "grad_norm": 1.651942454159222, + "learning_rate": 1.0142025836795132e-07, + "loss": 0.2157, + "step": 55325 + }, + { + "epoch": 0.9560063588609344, + "grad_norm": 1.511066689270629, + "learning_rate": 1.0134076976657026e-07, + "loss": 0.2367, + "step": 55326 + }, + { + "epoch": 0.9560236383743433, + "grad_norm": 1.0877093296612752, + "learning_rate": 1.0126131216854241e-07, + "loss": 0.6697, + "step": 55327 + }, + { + "epoch": 0.9560409178877523, + "grad_norm": 2.1822636370549415, + "learning_rate": 1.0118188557411535e-07, + "loss": 0.5635, + "step": 55328 + }, + { + "epoch": 0.9560581974011612, + "grad_norm": 2.2262670575934185, + "learning_rate": 1.0110248998354e-07, + "loss": 0.4095, + "step": 55329 + }, + { + "epoch": 0.9560754769145701, + "grad_norm": 2.075566995233965, + "learning_rate": 1.0102312539706393e-07, + "loss": 0.2916, + "step": 55330 + }, + { + "epoch": 0.956092756427979, + "grad_norm": 1.0502250577670724, + "learning_rate": 1.0094379181493585e-07, + "loss": 0.371, + "step": 55331 + }, + { + "epoch": 0.9561100359413879, + "grad_norm": 0.9203624972385672, + "learning_rate": 1.0086448923740333e-07, + "loss": 0.4109, + "step": 55332 + }, + { + "epoch": 0.9561273154547968, + "grad_norm": 1.3994492406887202, + "learning_rate": 1.0078521766471505e-07, + "loss": 0.3239, + "step": 55333 + }, + { + "epoch": 0.9561445949682057, + "grad_norm": 1.3894940114565253, + "learning_rate": 1.0070597709712082e-07, + "loss": 0.3365, + "step": 55334 + }, + { + "epoch": 0.9561618744816146, + "grad_norm": 1.9691322116963292, + "learning_rate": 1.0062676753486823e-07, + "loss": 0.5135, + "step": 55335 + }, + { + "epoch": 0.9561791539950235, + "grad_norm": 1.239124636216499, + "learning_rate": 1.0054758897820482e-07, + "loss": 0.4124, + "step": 55336 + }, + { + "epoch": 0.9561964335084324, + "grad_norm": 0.9803836607658476, + "learning_rate": 1.004684414273771e-07, + "loss": 0.3782, + "step": 55337 + }, + { + "epoch": 0.9562137130218413, + "grad_norm": 1.4854871437497348, + "learning_rate": 1.0038932488263598e-07, + "loss": 0.1848, + "step": 55338 + }, + { + "epoch": 0.9562309925352502, + "grad_norm": 1.2658862084127782, + "learning_rate": 1.003102393442279e-07, + "loss": 0.2965, + "step": 55339 + }, + { + "epoch": 0.9562482720486591, + "grad_norm": 1.0700327308604696, + "learning_rate": 1.0023118481239936e-07, + "loss": 0.3683, + "step": 55340 + }, + { + "epoch": 0.956265551562068, + "grad_norm": 1.7449636899758971, + "learning_rate": 1.0015216128740013e-07, + "loss": 0.3202, + "step": 55341 + }, + { + "epoch": 0.956282831075477, + "grad_norm": 1.1345883009221662, + "learning_rate": 1.000731687694767e-07, + "loss": 0.2364, + "step": 55342 + }, + { + "epoch": 0.9563001105888859, + "grad_norm": 0.7492246664776034, + "learning_rate": 9.999420725887665e-08, + "loss": 0.1624, + "step": 55343 + }, + { + "epoch": 0.9563173901022948, + "grad_norm": 2.0136024802150887, + "learning_rate": 9.991527675584645e-08, + "loss": 0.4964, + "step": 55344 + }, + { + "epoch": 0.9563346696157036, + "grad_norm": 1.0717648822823869, + "learning_rate": 9.983637726063366e-08, + "loss": 0.2561, + "step": 55345 + }, + { + "epoch": 0.9563519491291125, + "grad_norm": 1.0986432277421396, + "learning_rate": 9.975750877348588e-08, + "loss": 0.4439, + "step": 55346 + }, + { + "epoch": 0.9563692286425214, + "grad_norm": 1.6271525809803549, + "learning_rate": 9.96786712946507e-08, + "loss": 0.3209, + "step": 55347 + }, + { + "epoch": 0.9563865081559303, + "grad_norm": 1.107505241890117, + "learning_rate": 9.959986482437345e-08, + "loss": 0.497, + "step": 55348 + }, + { + "epoch": 0.9564037876693392, + "grad_norm": 1.301389900944351, + "learning_rate": 9.952108936290284e-08, + "loss": 0.388, + "step": 55349 + }, + { + "epoch": 0.9564210671827481, + "grad_norm": 1.3509094783729316, + "learning_rate": 9.944234491048421e-08, + "loss": 0.2547, + "step": 55350 + }, + { + "epoch": 0.956438346696157, + "grad_norm": 1.1890994497629583, + "learning_rate": 9.936363146736405e-08, + "loss": 0.1855, + "step": 55351 + }, + { + "epoch": 0.9564556262095659, + "grad_norm": 1.2078573289581827, + "learning_rate": 9.928494903378994e-08, + "loss": 0.3861, + "step": 55352 + }, + { + "epoch": 0.9564729057229748, + "grad_norm": 0.8386986795310363, + "learning_rate": 9.920629761000722e-08, + "loss": 0.352, + "step": 55353 + }, + { + "epoch": 0.9564901852363837, + "grad_norm": 1.2782122187339753, + "learning_rate": 9.912767719626348e-08, + "loss": 0.2033, + "step": 55354 + }, + { + "epoch": 0.9565074647497926, + "grad_norm": 1.0029530600763599, + "learning_rate": 9.904908779280409e-08, + "loss": 0.3467, + "step": 55355 + }, + { + "epoch": 0.9565247442632016, + "grad_norm": 1.4632681816657302, + "learning_rate": 9.89705293998755e-08, + "loss": 0.4067, + "step": 55356 + }, + { + "epoch": 0.9565420237766105, + "grad_norm": 1.2665390038812836, + "learning_rate": 9.889200201772308e-08, + "loss": 0.4955, + "step": 55357 + }, + { + "epoch": 0.9565593032900194, + "grad_norm": 1.282507268669349, + "learning_rate": 9.881350564659442e-08, + "loss": 0.4257, + "step": 55358 + }, + { + "epoch": 0.9565765828034283, + "grad_norm": 1.3637869521708021, + "learning_rate": 9.873504028673376e-08, + "loss": 0.2923, + "step": 55359 + }, + { + "epoch": 0.9565938623168372, + "grad_norm": 1.4678576368695144, + "learning_rate": 9.865660593838755e-08, + "loss": 0.3373, + "step": 55360 + }, + { + "epoch": 0.9566111418302461, + "grad_norm": 1.6065262808718432, + "learning_rate": 9.857820260180118e-08, + "loss": 0.2976, + "step": 55361 + }, + { + "epoch": 0.956628421343655, + "grad_norm": 1.2765154218367842, + "learning_rate": 9.849983027722e-08, + "loss": 0.5266, + "step": 55362 + }, + { + "epoch": 0.9566457008570639, + "grad_norm": 1.6409031094777033, + "learning_rate": 9.842148896489045e-08, + "loss": 0.1885, + "step": 55363 + }, + { + "epoch": 0.9566629803704728, + "grad_norm": 1.0745339005051575, + "learning_rate": 9.834317866505683e-08, + "loss": 0.3133, + "step": 55364 + }, + { + "epoch": 0.9566802598838817, + "grad_norm": 0.9827658755583556, + "learning_rate": 9.826489937796557e-08, + "loss": 0.2172, + "step": 55365 + }, + { + "epoch": 0.9566975393972905, + "grad_norm": 1.3743107465183588, + "learning_rate": 9.818665110385983e-08, + "loss": 0.2232, + "step": 55366 + }, + { + "epoch": 0.9567148189106994, + "grad_norm": 1.5515094262466738, + "learning_rate": 9.810843384298608e-08, + "loss": 0.4094, + "step": 55367 + }, + { + "epoch": 0.9567320984241083, + "grad_norm": 1.2471919456599103, + "learning_rate": 9.803024759559076e-08, + "loss": 0.2136, + "step": 55368 + }, + { + "epoch": 0.9567493779375172, + "grad_norm": 1.8419073109454918, + "learning_rate": 9.795209236191594e-08, + "loss": 0.311, + "step": 55369 + }, + { + "epoch": 0.9567666574509262, + "grad_norm": 0.8134978263362195, + "learning_rate": 9.787396814220918e-08, + "loss": 0.4206, + "step": 55370 + }, + { + "epoch": 0.9567839369643351, + "grad_norm": 1.447306607727045, + "learning_rate": 9.77958749367125e-08, + "loss": 0.3984, + "step": 55371 + }, + { + "epoch": 0.956801216477744, + "grad_norm": 1.2190665491793982, + "learning_rate": 9.771781274567128e-08, + "loss": 0.2722, + "step": 55372 + }, + { + "epoch": 0.9568184959911529, + "grad_norm": 1.3955217567903375, + "learning_rate": 9.763978156933085e-08, + "loss": 0.4193, + "step": 55373 + }, + { + "epoch": 0.9568357755045618, + "grad_norm": 1.0736936475878591, + "learning_rate": 9.756178140793548e-08, + "loss": 0.2987, + "step": 55374 + }, + { + "epoch": 0.9568530550179707, + "grad_norm": 1.4012160339219477, + "learning_rate": 9.748381226172943e-08, + "loss": 0.4223, + "step": 55375 + }, + { + "epoch": 0.9568703345313796, + "grad_norm": 1.5660486287326925, + "learning_rate": 9.740587413095582e-08, + "loss": 0.4063, + "step": 55376 + }, + { + "epoch": 0.9568876140447885, + "grad_norm": 1.2752408564646607, + "learning_rate": 9.732796701586e-08, + "loss": 0.4374, + "step": 55377 + }, + { + "epoch": 0.9569048935581974, + "grad_norm": 1.689637824579257, + "learning_rate": 9.725009091668625e-08, + "loss": 0.4153, + "step": 55378 + }, + { + "epoch": 0.9569221730716063, + "grad_norm": 1.211957623741725, + "learning_rate": 9.717224583367657e-08, + "loss": 0.5415, + "step": 55379 + }, + { + "epoch": 0.9569394525850152, + "grad_norm": 1.6700817498355434, + "learning_rate": 9.709443176707633e-08, + "loss": 0.4243, + "step": 55380 + }, + { + "epoch": 0.9569567320984241, + "grad_norm": 0.8907883343726166, + "learning_rate": 9.701664871712979e-08, + "loss": 0.2905, + "step": 55381 + }, + { + "epoch": 0.956974011611833, + "grad_norm": 1.1949481478840862, + "learning_rate": 9.693889668407896e-08, + "loss": 0.2315, + "step": 55382 + }, + { + "epoch": 0.956991291125242, + "grad_norm": 1.869476090086315, + "learning_rate": 9.68611756681681e-08, + "loss": 0.2412, + "step": 55383 + }, + { + "epoch": 0.9570085706386509, + "grad_norm": 2.2997762010946077, + "learning_rate": 9.678348566964036e-08, + "loss": 0.2867, + "step": 55384 + }, + { + "epoch": 0.9570258501520598, + "grad_norm": 0.7981196852403535, + "learning_rate": 9.670582668873996e-08, + "loss": 0.2186, + "step": 55385 + }, + { + "epoch": 0.9570431296654687, + "grad_norm": 1.5564216961731476, + "learning_rate": 9.662819872570894e-08, + "loss": 0.321, + "step": 55386 + }, + { + "epoch": 0.9570604091788775, + "grad_norm": 1.4359885223072157, + "learning_rate": 9.655060178079157e-08, + "loss": 0.3808, + "step": 55387 + }, + { + "epoch": 0.9570776886922864, + "grad_norm": 1.094080297513371, + "learning_rate": 9.647303585422984e-08, + "loss": 0.3478, + "step": 55388 + }, + { + "epoch": 0.9570949682056953, + "grad_norm": 0.7810753276959568, + "learning_rate": 9.639550094626804e-08, + "loss": 0.2181, + "step": 55389 + }, + { + "epoch": 0.9571122477191042, + "grad_norm": 1.567780037347826, + "learning_rate": 9.631799705714817e-08, + "loss": 0.2681, + "step": 55390 + }, + { + "epoch": 0.9571295272325131, + "grad_norm": 1.4783376624910933, + "learning_rate": 9.624052418711227e-08, + "loss": 0.242, + "step": 55391 + }, + { + "epoch": 0.957146806745922, + "grad_norm": 1.0888939740666368, + "learning_rate": 9.616308233640348e-08, + "loss": 0.3154, + "step": 55392 + }, + { + "epoch": 0.9571640862593309, + "grad_norm": 1.3354169175333779, + "learning_rate": 9.608567150526494e-08, + "loss": 0.3797, + "step": 55393 + }, + { + "epoch": 0.9571813657727398, + "grad_norm": 1.1869395757392487, + "learning_rate": 9.600829169393977e-08, + "loss": 0.4498, + "step": 55394 + }, + { + "epoch": 0.9571986452861487, + "grad_norm": 1.4516084319101823, + "learning_rate": 9.593094290266781e-08, + "loss": 0.3485, + "step": 55395 + }, + { + "epoch": 0.9572159247995576, + "grad_norm": 1.364211406640038, + "learning_rate": 9.585362513169328e-08, + "loss": 0.2714, + "step": 55396 + }, + { + "epoch": 0.9572332043129665, + "grad_norm": 1.1667666272983457, + "learning_rate": 9.577633838125822e-08, + "loss": 0.4158, + "step": 55397 + }, + { + "epoch": 0.9572504838263755, + "grad_norm": 1.2254385014519327, + "learning_rate": 9.569908265160465e-08, + "loss": 0.2819, + "step": 55398 + }, + { + "epoch": 0.9572677633397844, + "grad_norm": 2.2230494307982847, + "learning_rate": 9.562185794297352e-08, + "loss": 0.3999, + "step": 55399 + }, + { + "epoch": 0.9572850428531933, + "grad_norm": 1.0868740685623892, + "learning_rate": 9.554466425560793e-08, + "loss": 0.3127, + "step": 55400 + }, + { + "epoch": 0.9573023223666022, + "grad_norm": 1.8392783381280264, + "learning_rate": 9.546750158974772e-08, + "loss": 0.3007, + "step": 55401 + }, + { + "epoch": 0.9573196018800111, + "grad_norm": 1.5134885975240193, + "learning_rate": 9.539036994563711e-08, + "loss": 0.4219, + "step": 55402 + }, + { + "epoch": 0.95733688139342, + "grad_norm": 1.1947155689018938, + "learning_rate": 9.531326932351703e-08, + "loss": 0.356, + "step": 55403 + }, + { + "epoch": 0.9573541609068289, + "grad_norm": 0.864016806764268, + "learning_rate": 9.523619972362841e-08, + "loss": 0.3477, + "step": 55404 + }, + { + "epoch": 0.9573714404202378, + "grad_norm": 1.6635583322860175, + "learning_rate": 9.515916114621327e-08, + "loss": 0.2495, + "step": 55405 + }, + { + "epoch": 0.9573887199336467, + "grad_norm": 1.1801461934757558, + "learning_rate": 9.50821535915114e-08, + "loss": 0.5042, + "step": 55406 + }, + { + "epoch": 0.9574059994470556, + "grad_norm": 1.2482095738299215, + "learning_rate": 9.500517705976486e-08, + "loss": 0.3175, + "step": 55407 + }, + { + "epoch": 0.9574232789604644, + "grad_norm": 1.3010745029087871, + "learning_rate": 9.492823155121456e-08, + "loss": 0.3291, + "step": 55408 + }, + { + "epoch": 0.9574405584738733, + "grad_norm": 0.7638248026882263, + "learning_rate": 9.485131706610251e-08, + "loss": 0.7571, + "step": 55409 + }, + { + "epoch": 0.9574578379872822, + "grad_norm": 1.0046074083554093, + "learning_rate": 9.477443360466853e-08, + "loss": 0.4422, + "step": 55410 + }, + { + "epoch": 0.9574751175006911, + "grad_norm": 1.4747244711085628, + "learning_rate": 9.469758116715467e-08, + "loss": 0.2375, + "step": 55411 + }, + { + "epoch": 0.9574923970141, + "grad_norm": 1.366896764889476, + "learning_rate": 9.462075975380069e-08, + "loss": 0.2312, + "step": 55412 + }, + { + "epoch": 0.957509676527509, + "grad_norm": 1.7883214546375885, + "learning_rate": 9.454396936484533e-08, + "loss": 0.418, + "step": 55413 + }, + { + "epoch": 0.9575269560409179, + "grad_norm": 1.2068552132926844, + "learning_rate": 9.446721000053283e-08, + "loss": 0.5185, + "step": 55414 + }, + { + "epoch": 0.9575442355543268, + "grad_norm": 2.4077040703859907, + "learning_rate": 9.439048166110187e-08, + "loss": 0.1988, + "step": 55415 + }, + { + "epoch": 0.9575615150677357, + "grad_norm": 2.0907437828352307, + "learning_rate": 9.431378434679229e-08, + "loss": 0.3937, + "step": 55416 + }, + { + "epoch": 0.9575787945811446, + "grad_norm": 1.366783827648383, + "learning_rate": 9.423711805784497e-08, + "loss": 0.7749, + "step": 55417 + }, + { + "epoch": 0.9575960740945535, + "grad_norm": 2.5765046587441898, + "learning_rate": 9.416048279449974e-08, + "loss": 0.2408, + "step": 55418 + }, + { + "epoch": 0.9576133536079624, + "grad_norm": 1.152982496825725, + "learning_rate": 9.40838785569953e-08, + "loss": 0.4599, + "step": 55419 + }, + { + "epoch": 0.9576306331213713, + "grad_norm": 1.9216160608201678, + "learning_rate": 9.400730534557479e-08, + "loss": 0.4099, + "step": 55420 + }, + { + "epoch": 0.9576479126347802, + "grad_norm": 1.223004818512174, + "learning_rate": 9.393076316047577e-08, + "loss": 0.3265, + "step": 55421 + }, + { + "epoch": 0.9576651921481891, + "grad_norm": 1.3785440355582472, + "learning_rate": 9.385425200193809e-08, + "loss": 0.152, + "step": 55422 + }, + { + "epoch": 0.957682471661598, + "grad_norm": 1.9871302442153238, + "learning_rate": 9.377777187020264e-08, + "loss": 0.364, + "step": 55423 + }, + { + "epoch": 0.9576997511750069, + "grad_norm": 1.3537695036814714, + "learning_rate": 9.370132276550704e-08, + "loss": 0.3533, + "step": 55424 + }, + { + "epoch": 0.9577170306884158, + "grad_norm": 1.1643111943512332, + "learning_rate": 9.362490468809215e-08, + "loss": 0.2407, + "step": 55425 + }, + { + "epoch": 0.9577343102018248, + "grad_norm": 1.7253182011347543, + "learning_rate": 9.354851763819672e-08, + "loss": 0.1743, + "step": 55426 + }, + { + "epoch": 0.9577515897152337, + "grad_norm": 2.018875443467593, + "learning_rate": 9.347216161606054e-08, + "loss": 0.4948, + "step": 55427 + }, + { + "epoch": 0.9577688692286426, + "grad_norm": 1.0028123718153024, + "learning_rate": 9.33958366219212e-08, + "loss": 0.3209, + "step": 55428 + }, + { + "epoch": 0.9577861487420514, + "grad_norm": 1.9929315944297823, + "learning_rate": 9.331954265602073e-08, + "loss": 0.3676, + "step": 55429 + }, + { + "epoch": 0.9578034282554603, + "grad_norm": 0.9922671445239913, + "learning_rate": 9.32432797185956e-08, + "loss": 0.3832, + "step": 55430 + }, + { + "epoch": 0.9578207077688692, + "grad_norm": 1.5069400320512238, + "learning_rate": 9.31670478098845e-08, + "loss": 0.3545, + "step": 55431 + }, + { + "epoch": 0.9578379872822781, + "grad_norm": 1.8448894298025593, + "learning_rate": 9.309084693012838e-08, + "loss": 0.3867, + "step": 55432 + }, + { + "epoch": 0.957855266795687, + "grad_norm": 2.0728781252782476, + "learning_rate": 9.301467707956479e-08, + "loss": 0.537, + "step": 55433 + }, + { + "epoch": 0.9578725463090959, + "grad_norm": 0.8811666170435195, + "learning_rate": 9.293853825843135e-08, + "loss": 0.3219, + "step": 55434 + }, + { + "epoch": 0.9578898258225048, + "grad_norm": 0.9354939303080149, + "learning_rate": 9.286243046696786e-08, + "loss": 0.2523, + "step": 55435 + }, + { + "epoch": 0.9579071053359137, + "grad_norm": 1.8243061387820205, + "learning_rate": 9.278635370541078e-08, + "loss": 0.4905, + "step": 55436 + }, + { + "epoch": 0.9579243848493226, + "grad_norm": 0.9346547099482325, + "learning_rate": 9.271030797400104e-08, + "loss": 0.1585, + "step": 55437 + }, + { + "epoch": 0.9579416643627315, + "grad_norm": 0.9060363259031133, + "learning_rate": 9.263429327297624e-08, + "loss": 0.2975, + "step": 55438 + }, + { + "epoch": 0.9579589438761404, + "grad_norm": 0.7987696028446073, + "learning_rate": 9.255830960257173e-08, + "loss": 0.5059, + "step": 55439 + }, + { + "epoch": 0.9579762233895494, + "grad_norm": 1.304572281836392, + "learning_rate": 9.248235696302954e-08, + "loss": 0.3227, + "step": 55440 + }, + { + "epoch": 0.9579935029029583, + "grad_norm": 1.0858524204044446, + "learning_rate": 9.240643535458393e-08, + "loss": 0.2948, + "step": 55441 + }, + { + "epoch": 0.9580107824163672, + "grad_norm": 0.8840936637157829, + "learning_rate": 9.233054477747472e-08, + "loss": 0.3636, + "step": 55442 + }, + { + "epoch": 0.9580280619297761, + "grad_norm": 0.5477678623348802, + "learning_rate": 9.22546852319406e-08, + "loss": 0.6151, + "step": 55443 + }, + { + "epoch": 0.958045341443185, + "grad_norm": 1.0718609312566674, + "learning_rate": 9.217885671821581e-08, + "loss": 0.277, + "step": 55444 + }, + { + "epoch": 0.9580626209565939, + "grad_norm": 1.4810416188043987, + "learning_rate": 9.210305923654129e-08, + "loss": 0.4074, + "step": 55445 + }, + { + "epoch": 0.9580799004700028, + "grad_norm": 1.0980402192062484, + "learning_rate": 9.202729278715238e-08, + "loss": 0.1379, + "step": 55446 + }, + { + "epoch": 0.9580971799834117, + "grad_norm": 1.484609490518726, + "learning_rate": 9.195155737028671e-08, + "loss": 0.3479, + "step": 55447 + }, + { + "epoch": 0.9581144594968206, + "grad_norm": 1.4408690791848715, + "learning_rate": 9.187585298618184e-08, + "loss": 0.461, + "step": 55448 + }, + { + "epoch": 0.9581317390102295, + "grad_norm": 1.085927188866917, + "learning_rate": 9.180017963507537e-08, + "loss": 0.367, + "step": 55449 + }, + { + "epoch": 0.9581490185236384, + "grad_norm": 1.1358888217047316, + "learning_rate": 9.172453731720265e-08, + "loss": 0.2583, + "step": 55450 + }, + { + "epoch": 0.9581662980370472, + "grad_norm": 1.1726320931074994, + "learning_rate": 9.16489260328024e-08, + "loss": 0.434, + "step": 55451 + }, + { + "epoch": 0.9581835775504561, + "grad_norm": 1.2933369770105874, + "learning_rate": 9.157334578211108e-08, + "loss": 0.2792, + "step": 55452 + }, + { + "epoch": 0.958200857063865, + "grad_norm": 1.0457997552320124, + "learning_rate": 9.149779656536406e-08, + "loss": 0.507, + "step": 55453 + }, + { + "epoch": 0.958218136577274, + "grad_norm": 1.239075159548504, + "learning_rate": 9.142227838280005e-08, + "loss": 0.4945, + "step": 55454 + }, + { + "epoch": 0.9582354160906829, + "grad_norm": 1.6531870525091574, + "learning_rate": 9.134679123465329e-08, + "loss": 0.4017, + "step": 55455 + }, + { + "epoch": 0.9582526956040918, + "grad_norm": 1.0221213479828462, + "learning_rate": 9.127133512116249e-08, + "loss": 0.4187, + "step": 55456 + }, + { + "epoch": 0.9582699751175007, + "grad_norm": 2.0444273338332795, + "learning_rate": 9.1195910042563e-08, + "loss": 0.3009, + "step": 55457 + }, + { + "epoch": 0.9582872546309096, + "grad_norm": 1.0034899842683978, + "learning_rate": 9.112051599909022e-08, + "loss": 0.2949, + "step": 55458 + }, + { + "epoch": 0.9583045341443185, + "grad_norm": 1.1695416596485793, + "learning_rate": 9.10451529909806e-08, + "loss": 0.2965, + "step": 55459 + }, + { + "epoch": 0.9583218136577274, + "grad_norm": 1.7418518236340272, + "learning_rate": 9.096982101847174e-08, + "loss": 0.4082, + "step": 55460 + }, + { + "epoch": 0.9583390931711363, + "grad_norm": 1.2395775064310401, + "learning_rate": 9.089452008179789e-08, + "loss": 0.3704, + "step": 55461 + }, + { + "epoch": 0.9583563726845452, + "grad_norm": 1.0230110269402244, + "learning_rate": 9.081925018119552e-08, + "loss": 0.2699, + "step": 55462 + }, + { + "epoch": 0.9583736521979541, + "grad_norm": 0.9350513916861759, + "learning_rate": 9.074401131690113e-08, + "loss": 0.276, + "step": 55463 + }, + { + "epoch": 0.958390931711363, + "grad_norm": 2.332285378531408, + "learning_rate": 9.066880348914786e-08, + "loss": 0.2567, + "step": 55464 + }, + { + "epoch": 0.9584082112247719, + "grad_norm": 1.0661379596602056, + "learning_rate": 9.05936266981744e-08, + "loss": 0.3293, + "step": 55465 + }, + { + "epoch": 0.9584254907381808, + "grad_norm": 0.8530732734551999, + "learning_rate": 9.05184809442139e-08, + "loss": 0.188, + "step": 55466 + }, + { + "epoch": 0.9584427702515897, + "grad_norm": 2.177731944694614, + "learning_rate": 9.044336622750282e-08, + "loss": 0.4449, + "step": 55467 + }, + { + "epoch": 0.9584600497649987, + "grad_norm": 2.197237875426692, + "learning_rate": 9.036828254827656e-08, + "loss": 0.3483, + "step": 55468 + }, + { + "epoch": 0.9584773292784076, + "grad_norm": 2.200833604158253, + "learning_rate": 9.029322990677047e-08, + "loss": 0.3358, + "step": 55469 + }, + { + "epoch": 0.9584946087918165, + "grad_norm": 1.0833806326383646, + "learning_rate": 9.02182083032177e-08, + "loss": 0.3186, + "step": 55470 + }, + { + "epoch": 0.9585118883052254, + "grad_norm": 0.9684454736644591, + "learning_rate": 9.014321773785583e-08, + "loss": 0.2899, + "step": 55471 + }, + { + "epoch": 0.9585291678186342, + "grad_norm": 0.9808318683554736, + "learning_rate": 9.006825821091803e-08, + "loss": 0.2307, + "step": 55472 + }, + { + "epoch": 0.9585464473320431, + "grad_norm": 2.0804072500316866, + "learning_rate": 8.999332972263963e-08, + "loss": 0.2678, + "step": 55473 + }, + { + "epoch": 0.958563726845452, + "grad_norm": 1.4163441140895923, + "learning_rate": 8.991843227325492e-08, + "loss": 0.3594, + "step": 55474 + }, + { + "epoch": 0.9585810063588609, + "grad_norm": 1.345195454608171, + "learning_rate": 8.984356586299924e-08, + "loss": 0.3097, + "step": 55475 + }, + { + "epoch": 0.9585982858722698, + "grad_norm": 1.9135594683821273, + "learning_rate": 8.976873049210689e-08, + "loss": 0.387, + "step": 55476 + }, + { + "epoch": 0.9586155653856787, + "grad_norm": 1.6142508310223933, + "learning_rate": 8.969392616081096e-08, + "loss": 0.3888, + "step": 55477 + }, + { + "epoch": 0.9586328448990876, + "grad_norm": 1.5183346520893621, + "learning_rate": 8.961915286934798e-08, + "loss": 0.3686, + "step": 55478 + }, + { + "epoch": 0.9586501244124965, + "grad_norm": 1.0001438500174242, + "learning_rate": 8.954441061794994e-08, + "loss": 0.6322, + "step": 55479 + }, + { + "epoch": 0.9586674039259054, + "grad_norm": 1.0110120901397381, + "learning_rate": 8.946969940685335e-08, + "loss": 0.516, + "step": 55480 + }, + { + "epoch": 0.9586846834393143, + "grad_norm": 1.1734072762129808, + "learning_rate": 8.939501923628912e-08, + "loss": 0.338, + "step": 55481 + }, + { + "epoch": 0.9587019629527233, + "grad_norm": 1.4890070514037075, + "learning_rate": 8.932037010649375e-08, + "loss": 0.3877, + "step": 55482 + }, + { + "epoch": 0.9587192424661322, + "grad_norm": 1.738820218720932, + "learning_rate": 8.924575201770147e-08, + "loss": 0.3788, + "step": 55483 + }, + { + "epoch": 0.9587365219795411, + "grad_norm": 1.5903217446084112, + "learning_rate": 8.917116497014212e-08, + "loss": 0.3529, + "step": 55484 + }, + { + "epoch": 0.95875380149295, + "grad_norm": 1.4479688420093226, + "learning_rate": 8.909660896405436e-08, + "loss": 0.3532, + "step": 55485 + }, + { + "epoch": 0.9587710810063589, + "grad_norm": 0.916818935889498, + "learning_rate": 8.902208399966806e-08, + "loss": 0.1529, + "step": 55486 + }, + { + "epoch": 0.9587883605197678, + "grad_norm": 1.3768493115885214, + "learning_rate": 8.894759007721742e-08, + "loss": 0.2632, + "step": 55487 + }, + { + "epoch": 0.9588056400331767, + "grad_norm": 2.274103087198336, + "learning_rate": 8.887312719693564e-08, + "loss": 0.2314, + "step": 55488 + }, + { + "epoch": 0.9588229195465856, + "grad_norm": 1.57953185981024, + "learning_rate": 8.879869535905805e-08, + "loss": 0.2389, + "step": 55489 + }, + { + "epoch": 0.9588401990599945, + "grad_norm": 1.2509089974258845, + "learning_rate": 8.872429456381449e-08, + "loss": 0.3313, + "step": 55490 + }, + { + "epoch": 0.9588574785734034, + "grad_norm": 1.784666149655692, + "learning_rate": 8.864992481144141e-08, + "loss": 0.2521, + "step": 55491 + }, + { + "epoch": 0.9588747580868123, + "grad_norm": 0.993428280345124, + "learning_rate": 8.857558610216866e-08, + "loss": 0.2387, + "step": 55492 + }, + { + "epoch": 0.9588920376002211, + "grad_norm": 0.8103784246425485, + "learning_rate": 8.850127843623157e-08, + "loss": 0.2715, + "step": 55493 + }, + { + "epoch": 0.95890931711363, + "grad_norm": 1.7158422414954626, + "learning_rate": 8.84270018138611e-08, + "loss": 0.3835, + "step": 55494 + }, + { + "epoch": 0.9589265966270389, + "grad_norm": 1.0809660099216436, + "learning_rate": 8.835275623529039e-08, + "loss": 0.3171, + "step": 55495 + }, + { + "epoch": 0.9589438761404478, + "grad_norm": 2.038036976855594, + "learning_rate": 8.827854170075256e-08, + "loss": 0.3218, + "step": 55496 + }, + { + "epoch": 0.9589611556538568, + "grad_norm": 1.2796392517341684, + "learning_rate": 8.820435821047857e-08, + "loss": 0.2716, + "step": 55497 + }, + { + "epoch": 0.9589784351672657, + "grad_norm": 2.178082487429702, + "learning_rate": 8.813020576470265e-08, + "loss": 0.2783, + "step": 55498 + }, + { + "epoch": 0.9589957146806746, + "grad_norm": 1.4824607996097015, + "learning_rate": 8.805608436365576e-08, + "loss": 0.9995, + "step": 55499 + }, + { + "epoch": 0.9590129941940835, + "grad_norm": 1.4624652889567538, + "learning_rate": 8.798199400757101e-08, + "loss": 0.6058, + "step": 55500 + }, + { + "epoch": 0.9590302737074924, + "grad_norm": 1.4658305751046705, + "learning_rate": 8.790793469667935e-08, + "loss": 0.1687, + "step": 55501 + }, + { + "epoch": 0.9590475532209013, + "grad_norm": 1.591297495626737, + "learning_rate": 8.783390643121392e-08, + "loss": 0.2891, + "step": 55502 + }, + { + "epoch": 0.9590648327343102, + "grad_norm": 1.7840470493278213, + "learning_rate": 8.775990921140565e-08, + "loss": 0.2949, + "step": 55503 + }, + { + "epoch": 0.9590821122477191, + "grad_norm": 1.7689090357832042, + "learning_rate": 8.768594303748768e-08, + "loss": 0.2544, + "step": 55504 + }, + { + "epoch": 0.959099391761128, + "grad_norm": 1.0324632113179182, + "learning_rate": 8.761200790968872e-08, + "loss": 0.3309, + "step": 55505 + }, + { + "epoch": 0.9591166712745369, + "grad_norm": 1.5300286329439956, + "learning_rate": 8.753810382824302e-08, + "loss": 0.3008, + "step": 55506 + }, + { + "epoch": 0.9591339507879458, + "grad_norm": 1.3250287409312609, + "learning_rate": 8.746423079338262e-08, + "loss": 0.5195, + "step": 55507 + }, + { + "epoch": 0.9591512303013547, + "grad_norm": 1.3288709162920949, + "learning_rate": 8.739038880533624e-08, + "loss": 0.3921, + "step": 55508 + }, + { + "epoch": 0.9591685098147636, + "grad_norm": 1.1609939326371816, + "learning_rate": 8.731657786433701e-08, + "loss": 0.3069, + "step": 55509 + }, + { + "epoch": 0.9591857893281726, + "grad_norm": 1.5212260780385165, + "learning_rate": 8.724279797061475e-08, + "loss": 0.3241, + "step": 55510 + }, + { + "epoch": 0.9592030688415815, + "grad_norm": 0.9581615639843797, + "learning_rate": 8.716904912440149e-08, + "loss": 0.2047, + "step": 55511 + }, + { + "epoch": 0.9592203483549904, + "grad_norm": 0.8376107886193895, + "learning_rate": 8.709533132592929e-08, + "loss": 0.3305, + "step": 55512 + }, + { + "epoch": 0.9592376278683993, + "grad_norm": 1.9506794265912282, + "learning_rate": 8.702164457542683e-08, + "loss": 0.2152, + "step": 55513 + }, + { + "epoch": 0.9592549073818081, + "grad_norm": 2.2171816603232837, + "learning_rate": 8.694798887312728e-08, + "loss": 0.2111, + "step": 55514 + }, + { + "epoch": 0.959272186895217, + "grad_norm": 1.2783546171924736, + "learning_rate": 8.687436421925821e-08, + "loss": 0.3459, + "step": 55515 + }, + { + "epoch": 0.9592894664086259, + "grad_norm": 2.0591690049109763, + "learning_rate": 8.680077061405279e-08, + "loss": 0.3825, + "step": 55516 + }, + { + "epoch": 0.9593067459220348, + "grad_norm": 1.0597156096429634, + "learning_rate": 8.672720805774081e-08, + "loss": 0.2986, + "step": 55517 + }, + { + "epoch": 0.9593240254354437, + "grad_norm": 0.7290337192472056, + "learning_rate": 8.665367655055212e-08, + "loss": 0.1521, + "step": 55518 + }, + { + "epoch": 0.9593413049488526, + "grad_norm": 0.9890924571362554, + "learning_rate": 8.658017609271763e-08, + "loss": 0.2755, + "step": 55519 + }, + { + "epoch": 0.9593585844622615, + "grad_norm": 1.3321101980022736, + "learning_rate": 8.650670668446825e-08, + "loss": 0.3741, + "step": 55520 + }, + { + "epoch": 0.9593758639756704, + "grad_norm": 2.7230494381307975, + "learning_rate": 8.643326832603272e-08, + "loss": 0.4443, + "step": 55521 + }, + { + "epoch": 0.9593931434890793, + "grad_norm": 1.2883958973335146, + "learning_rate": 8.635986101764194e-08, + "loss": 0.3539, + "step": 55522 + }, + { + "epoch": 0.9594104230024882, + "grad_norm": 1.9327514035703814, + "learning_rate": 8.628648475952461e-08, + "loss": 0.4426, + "step": 55523 + }, + { + "epoch": 0.9594277025158972, + "grad_norm": 1.7759142642707038, + "learning_rate": 8.621313955191168e-08, + "loss": 0.4411, + "step": 55524 + }, + { + "epoch": 0.9594449820293061, + "grad_norm": 2.0833090239797145, + "learning_rate": 8.613982539503407e-08, + "loss": 0.2895, + "step": 55525 + }, + { + "epoch": 0.959462261542715, + "grad_norm": 0.8284206218403114, + "learning_rate": 8.606654228911826e-08, + "loss": 0.5063, + "step": 55526 + }, + { + "epoch": 0.9594795410561239, + "grad_norm": 1.3568492061977948, + "learning_rate": 8.599329023439518e-08, + "loss": 0.4045, + "step": 55527 + }, + { + "epoch": 0.9594968205695328, + "grad_norm": 2.739999638320638, + "learning_rate": 8.592006923109575e-08, + "loss": 0.3668, + "step": 55528 + }, + { + "epoch": 0.9595141000829417, + "grad_norm": 1.7132318060713796, + "learning_rate": 8.584687927944758e-08, + "loss": 0.3148, + "step": 55529 + }, + { + "epoch": 0.9595313795963506, + "grad_norm": 1.6951282822440754, + "learning_rate": 8.577372037968046e-08, + "loss": 0.4976, + "step": 55530 + }, + { + "epoch": 0.9595486591097595, + "grad_norm": 1.374437872415431, + "learning_rate": 8.570059253202422e-08, + "loss": 0.3748, + "step": 55531 + }, + { + "epoch": 0.9595659386231684, + "grad_norm": 1.6274082581386455, + "learning_rate": 8.562749573670648e-08, + "loss": 0.3899, + "step": 55532 + }, + { + "epoch": 0.9595832181365773, + "grad_norm": 1.4399801957460179, + "learning_rate": 8.555442999395703e-08, + "loss": 0.4638, + "step": 55533 + }, + { + "epoch": 0.9596004976499862, + "grad_norm": 1.0014939193542975, + "learning_rate": 8.548139530400346e-08, + "loss": 0.2666, + "step": 55534 + }, + { + "epoch": 0.959617777163395, + "grad_norm": 1.4980529072287532, + "learning_rate": 8.540839166707781e-08, + "loss": 0.2941, + "step": 55535 + }, + { + "epoch": 0.9596350566768039, + "grad_norm": 1.5420892360950593, + "learning_rate": 8.533541908340548e-08, + "loss": 0.601, + "step": 55536 + }, + { + "epoch": 0.9596523361902128, + "grad_norm": 1.5866783355799805, + "learning_rate": 8.526247755321515e-08, + "loss": 0.5178, + "step": 55537 + }, + { + "epoch": 0.9596696157036217, + "grad_norm": 0.8164638140677343, + "learning_rate": 8.518956707673776e-08, + "loss": 0.3585, + "step": 55538 + }, + { + "epoch": 0.9596868952170307, + "grad_norm": 1.7565195557930389, + "learning_rate": 8.511668765419867e-08, + "loss": 0.4244, + "step": 55539 + }, + { + "epoch": 0.9597041747304396, + "grad_norm": 1.2966726461592955, + "learning_rate": 8.504383928582882e-08, + "loss": 0.431, + "step": 55540 + }, + { + "epoch": 0.9597214542438485, + "grad_norm": 1.4797034439397636, + "learning_rate": 8.49710219718547e-08, + "loss": 0.4505, + "step": 55541 + }, + { + "epoch": 0.9597387337572574, + "grad_norm": 1.4154389546560275, + "learning_rate": 8.48982357125061e-08, + "loss": 0.3818, + "step": 55542 + }, + { + "epoch": 0.9597560132706663, + "grad_norm": 1.494529533415631, + "learning_rate": 8.482548050800843e-08, + "loss": 0.4263, + "step": 55543 + }, + { + "epoch": 0.9597732927840752, + "grad_norm": 1.7211678435396527, + "learning_rate": 8.475275635859037e-08, + "loss": 0.2963, + "step": 55544 + }, + { + "epoch": 0.9597905722974841, + "grad_norm": 1.0209545805976865, + "learning_rate": 8.468006326448175e-08, + "loss": 0.4106, + "step": 55545 + }, + { + "epoch": 0.959807851810893, + "grad_norm": 2.4760417049150156, + "learning_rate": 8.460740122590682e-08, + "loss": 0.2687, + "step": 55546 + }, + { + "epoch": 0.9598251313243019, + "grad_norm": 1.4399914468009738, + "learning_rate": 8.453477024309653e-08, + "loss": 0.2779, + "step": 55547 + }, + { + "epoch": 0.9598424108377108, + "grad_norm": 1.3225293215556728, + "learning_rate": 8.446217031627624e-08, + "loss": 0.3502, + "step": 55548 + }, + { + "epoch": 0.9598596903511197, + "grad_norm": 1.726187907825203, + "learning_rate": 8.438960144567466e-08, + "loss": 0.2819, + "step": 55549 + }, + { + "epoch": 0.9598769698645286, + "grad_norm": 1.159767666234436, + "learning_rate": 8.431706363151826e-08, + "loss": 0.3095, + "step": 55550 + }, + { + "epoch": 0.9598942493779375, + "grad_norm": 1.841288106726278, + "learning_rate": 8.424455687403354e-08, + "loss": 0.4646, + "step": 55551 + }, + { + "epoch": 0.9599115288913465, + "grad_norm": 1.2577648353576332, + "learning_rate": 8.417208117344921e-08, + "loss": 0.2537, + "step": 55552 + }, + { + "epoch": 0.9599288084047554, + "grad_norm": 1.3859323593002286, + "learning_rate": 8.409963652999064e-08, + "loss": 0.3411, + "step": 55553 + }, + { + "epoch": 0.9599460879181643, + "grad_norm": 2.021192675389437, + "learning_rate": 8.402722294388655e-08, + "loss": 0.2671, + "step": 55554 + }, + { + "epoch": 0.9599633674315732, + "grad_norm": 1.242147043901653, + "learning_rate": 8.395484041536229e-08, + "loss": 0.3953, + "step": 55555 + }, + { + "epoch": 0.959980646944982, + "grad_norm": 0.9821279890672392, + "learning_rate": 8.388248894464545e-08, + "loss": 0.2082, + "step": 55556 + }, + { + "epoch": 0.9599979264583909, + "grad_norm": 0.9945960587594697, + "learning_rate": 8.381016853196145e-08, + "loss": 0.3511, + "step": 55557 + }, + { + "epoch": 0.9600152059717998, + "grad_norm": 0.7893973792640226, + "learning_rate": 8.373787917753896e-08, + "loss": 0.5799, + "step": 55558 + }, + { + "epoch": 0.9600324854852087, + "grad_norm": 1.76177106521633, + "learning_rate": 8.366562088160224e-08, + "loss": 0.2183, + "step": 55559 + }, + { + "epoch": 0.9600497649986176, + "grad_norm": 1.0774537030251818, + "learning_rate": 8.359339364437891e-08, + "loss": 0.38, + "step": 55560 + }, + { + "epoch": 0.9600670445120265, + "grad_norm": 0.962904857602541, + "learning_rate": 8.352119746609322e-08, + "loss": 0.2183, + "step": 55561 + }, + { + "epoch": 0.9600843240254354, + "grad_norm": 1.790463025818952, + "learning_rate": 8.344903234697388e-08, + "loss": 0.3653, + "step": 55562 + }, + { + "epoch": 0.9601016035388443, + "grad_norm": 1.2740496438313111, + "learning_rate": 8.337689828724516e-08, + "loss": 0.6635, + "step": 55563 + }, + { + "epoch": 0.9601188830522532, + "grad_norm": 1.3964653096951436, + "learning_rate": 8.330479528713464e-08, + "loss": 0.3832, + "step": 55564 + }, + { + "epoch": 0.9601361625656621, + "grad_norm": 0.9437247443574205, + "learning_rate": 8.323272334686661e-08, + "loss": 0.4989, + "step": 55565 + }, + { + "epoch": 0.960153442079071, + "grad_norm": 1.0974556010514729, + "learning_rate": 8.316068246666753e-08, + "loss": 0.3866, + "step": 55566 + }, + { + "epoch": 0.96017072159248, + "grad_norm": 1.0531774928922943, + "learning_rate": 8.308867264676279e-08, + "loss": 0.4037, + "step": 55567 + }, + { + "epoch": 0.9601880011058889, + "grad_norm": 1.2946587309607571, + "learning_rate": 8.301669388737777e-08, + "loss": 0.4297, + "step": 55568 + }, + { + "epoch": 0.9602052806192978, + "grad_norm": 1.2256802271986642, + "learning_rate": 8.294474618873894e-08, + "loss": 0.5906, + "step": 55569 + }, + { + "epoch": 0.9602225601327067, + "grad_norm": 1.3222784415084285, + "learning_rate": 8.287282955106945e-08, + "loss": 0.2964, + "step": 55570 + }, + { + "epoch": 0.9602398396461156, + "grad_norm": 1.2266234913714027, + "learning_rate": 8.280094397459692e-08, + "loss": 0.3916, + "step": 55571 + }, + { + "epoch": 0.9602571191595245, + "grad_norm": 2.034835986810384, + "learning_rate": 8.272908945954561e-08, + "loss": 0.3238, + "step": 55572 + }, + { + "epoch": 0.9602743986729334, + "grad_norm": 1.3534247997565287, + "learning_rate": 8.265726600613977e-08, + "loss": 0.4798, + "step": 55573 + }, + { + "epoch": 0.9602916781863423, + "grad_norm": 1.8355104096643124, + "learning_rate": 8.258547361460478e-08, + "loss": 0.28, + "step": 55574 + }, + { + "epoch": 0.9603089576997512, + "grad_norm": 0.9560019797389745, + "learning_rate": 8.251371228516714e-08, + "loss": 0.2048, + "step": 55575 + }, + { + "epoch": 0.9603262372131601, + "grad_norm": 1.2462058765217119, + "learning_rate": 8.244198201804887e-08, + "loss": 0.3446, + "step": 55576 + }, + { + "epoch": 0.960343516726569, + "grad_norm": 1.8600714949115624, + "learning_rate": 8.237028281347647e-08, + "loss": 0.3058, + "step": 55577 + }, + { + "epoch": 0.9603607962399778, + "grad_norm": 1.1826226616523488, + "learning_rate": 8.229861467167532e-08, + "loss": 0.1944, + "step": 55578 + }, + { + "epoch": 0.9603780757533867, + "grad_norm": 1.0978291344704356, + "learning_rate": 8.222697759286635e-08, + "loss": 0.3345, + "step": 55579 + }, + { + "epoch": 0.9603953552667956, + "grad_norm": 1.6195510275468625, + "learning_rate": 8.215537157727826e-08, + "loss": 0.3327, + "step": 55580 + }, + { + "epoch": 0.9604126347802046, + "grad_norm": 1.5666486518414517, + "learning_rate": 8.208379662513199e-08, + "loss": 0.3039, + "step": 55581 + }, + { + "epoch": 0.9604299142936135, + "grad_norm": 2.652951212234807, + "learning_rate": 8.201225273665403e-08, + "loss": 0.3131, + "step": 55582 + }, + { + "epoch": 0.9604471938070224, + "grad_norm": 2.20810028516488, + "learning_rate": 8.194073991206641e-08, + "loss": 0.4518, + "step": 55583 + }, + { + "epoch": 0.9604644733204313, + "grad_norm": 1.203961800152074, + "learning_rate": 8.186925815159452e-08, + "loss": 0.6755, + "step": 55584 + }, + { + "epoch": 0.9604817528338402, + "grad_norm": 1.1852138384437925, + "learning_rate": 8.17978074554615e-08, + "loss": 0.3299, + "step": 55585 + }, + { + "epoch": 0.9604990323472491, + "grad_norm": 1.0236140943667682, + "learning_rate": 8.172638782389164e-08, + "loss": 0.2141, + "step": 55586 + }, + { + "epoch": 0.960516311860658, + "grad_norm": 1.5463268729957405, + "learning_rate": 8.165499925710918e-08, + "loss": 0.5134, + "step": 55587 + }, + { + "epoch": 0.9605335913740669, + "grad_norm": 1.5915521422339018, + "learning_rate": 8.158364175533728e-08, + "loss": 0.364, + "step": 55588 + }, + { + "epoch": 0.9605508708874758, + "grad_norm": 0.8507294719309141, + "learning_rate": 8.1512315318798e-08, + "loss": 0.1164, + "step": 55589 + }, + { + "epoch": 0.9605681504008847, + "grad_norm": 1.7079993067480614, + "learning_rate": 8.14410199477167e-08, + "loss": 0.3489, + "step": 55590 + }, + { + "epoch": 0.9605854299142936, + "grad_norm": 0.9425672427539321, + "learning_rate": 8.136975564231541e-08, + "loss": 0.3942, + "step": 55591 + }, + { + "epoch": 0.9606027094277025, + "grad_norm": 0.5276298808681938, + "learning_rate": 8.129852240281732e-08, + "loss": 0.5498, + "step": 55592 + }, + { + "epoch": 0.9606199889411114, + "grad_norm": 1.1518990992549132, + "learning_rate": 8.122732022944779e-08, + "loss": 0.3318, + "step": 55593 + }, + { + "epoch": 0.9606372684545204, + "grad_norm": 1.4775475922770707, + "learning_rate": 8.115614912242665e-08, + "loss": 0.4016, + "step": 55594 + }, + { + "epoch": 0.9606545479679293, + "grad_norm": 1.7960656097042573, + "learning_rate": 8.108500908197924e-08, + "loss": 0.4955, + "step": 55595 + }, + { + "epoch": 0.9606718274813382, + "grad_norm": 1.439720947644878, + "learning_rate": 8.101390010832655e-08, + "loss": 0.552, + "step": 55596 + }, + { + "epoch": 0.9606891069947471, + "grad_norm": 2.004969399581946, + "learning_rate": 8.094282220169169e-08, + "loss": 0.1772, + "step": 55597 + }, + { + "epoch": 0.960706386508156, + "grad_norm": 1.7834389237672414, + "learning_rate": 8.087177536229896e-08, + "loss": 0.3221, + "step": 55598 + }, + { + "epoch": 0.9607236660215648, + "grad_norm": 1.44129617183285, + "learning_rate": 8.080075959036925e-08, + "loss": 0.3944, + "step": 55599 + }, + { + "epoch": 0.9607409455349737, + "grad_norm": 1.0625214733227066, + "learning_rate": 8.072977488612576e-08, + "loss": 0.3114, + "step": 55600 + }, + { + "epoch": 0.9607582250483826, + "grad_norm": 1.3446076751796447, + "learning_rate": 8.065882124978941e-08, + "loss": 0.3798, + "step": 55601 + }, + { + "epoch": 0.9607755045617915, + "grad_norm": 0.9515416285808889, + "learning_rate": 8.058789868158446e-08, + "loss": 0.3454, + "step": 55602 + }, + { + "epoch": 0.9607927840752004, + "grad_norm": 1.207891955528596, + "learning_rate": 8.051700718173184e-08, + "loss": 0.4028, + "step": 55603 + }, + { + "epoch": 0.9608100635886093, + "grad_norm": 1.209533325398732, + "learning_rate": 8.04461467504536e-08, + "loss": 0.3593, + "step": 55604 + }, + { + "epoch": 0.9608273431020182, + "grad_norm": 0.9777030752483814, + "learning_rate": 8.037531738797177e-08, + "loss": 0.3071, + "step": 55605 + }, + { + "epoch": 0.9608446226154271, + "grad_norm": 0.7785257139579668, + "learning_rate": 8.030451909450842e-08, + "loss": 0.7871, + "step": 55606 + }, + { + "epoch": 0.960861902128836, + "grad_norm": 1.2772038531416163, + "learning_rate": 8.023375187028559e-08, + "loss": 0.2636, + "step": 55607 + }, + { + "epoch": 0.960879181642245, + "grad_norm": 1.2623932152074722, + "learning_rate": 8.01630157155242e-08, + "loss": 0.3416, + "step": 55608 + }, + { + "epoch": 0.9608964611556539, + "grad_norm": 1.1306071945797942, + "learning_rate": 8.009231063044632e-08, + "loss": 0.3549, + "step": 55609 + }, + { + "epoch": 0.9609137406690628, + "grad_norm": 1.0106121129083236, + "learning_rate": 8.002163661527285e-08, + "loss": 0.2826, + "step": 55610 + }, + { + "epoch": 0.9609310201824717, + "grad_norm": 1.8605748122142327, + "learning_rate": 7.995099367022586e-08, + "loss": 0.466, + "step": 55611 + }, + { + "epoch": 0.9609482996958806, + "grad_norm": 1.2445794893626936, + "learning_rate": 7.988038179552516e-08, + "loss": 0.3395, + "step": 55612 + }, + { + "epoch": 0.9609655792092895, + "grad_norm": 1.0823011232463045, + "learning_rate": 7.98098009913939e-08, + "loss": 0.3476, + "step": 55613 + }, + { + "epoch": 0.9609828587226984, + "grad_norm": 1.709179758457554, + "learning_rate": 7.973925125805193e-08, + "loss": 0.4363, + "step": 55614 + }, + { + "epoch": 0.9610001382361073, + "grad_norm": 1.2887791762150838, + "learning_rate": 7.966873259572128e-08, + "loss": 0.4579, + "step": 55615 + }, + { + "epoch": 0.9610174177495162, + "grad_norm": 1.7074966825651272, + "learning_rate": 7.959824500462177e-08, + "loss": 0.351, + "step": 55616 + }, + { + "epoch": 0.9610346972629251, + "grad_norm": 0.9160739976009489, + "learning_rate": 7.952778848497433e-08, + "loss": 0.2625, + "step": 55617 + }, + { + "epoch": 0.961051976776334, + "grad_norm": 1.0121642536810453, + "learning_rate": 7.945736303699991e-08, + "loss": 0.3265, + "step": 55618 + }, + { + "epoch": 0.9610692562897429, + "grad_norm": 1.398237294104404, + "learning_rate": 7.938696866091833e-08, + "loss": 0.2063, + "step": 55619 + }, + { + "epoch": 0.9610865358031517, + "grad_norm": 1.6549344685045058, + "learning_rate": 7.931660535695163e-08, + "loss": 0.4006, + "step": 55620 + }, + { + "epoch": 0.9611038153165606, + "grad_norm": 1.5249023265243256, + "learning_rate": 7.924627312531852e-08, + "loss": 0.2163, + "step": 55621 + }, + { + "epoch": 0.9611210948299695, + "grad_norm": 0.857733229912079, + "learning_rate": 7.917597196624105e-08, + "loss": 0.2432, + "step": 55622 + }, + { + "epoch": 0.9611383743433785, + "grad_norm": 0.925096824798617, + "learning_rate": 7.910570187993793e-08, + "loss": 0.3035, + "step": 55623 + }, + { + "epoch": 0.9611556538567874, + "grad_norm": 1.8837273350361168, + "learning_rate": 7.9035462866629e-08, + "loss": 0.3441, + "step": 55624 + }, + { + "epoch": 0.9611729333701963, + "grad_norm": 2.063831991193702, + "learning_rate": 7.896525492653517e-08, + "loss": 0.2966, + "step": 55625 + }, + { + "epoch": 0.9611902128836052, + "grad_norm": 1.2927061402895297, + "learning_rate": 7.889507805987739e-08, + "loss": 0.2997, + "step": 55626 + }, + { + "epoch": 0.9612074923970141, + "grad_norm": 1.2997033318006712, + "learning_rate": 7.882493226687215e-08, + "loss": 0.128, + "step": 55627 + }, + { + "epoch": 0.961224771910423, + "grad_norm": 1.0475934243601286, + "learning_rate": 7.875481754774261e-08, + "loss": 0.2488, + "step": 55628 + }, + { + "epoch": 0.9612420514238319, + "grad_norm": 1.5105551646160267, + "learning_rate": 7.868473390270747e-08, + "loss": 0.4374, + "step": 55629 + }, + { + "epoch": 0.9612593309372408, + "grad_norm": 1.1811011511322178, + "learning_rate": 7.861468133198436e-08, + "loss": 0.3138, + "step": 55630 + }, + { + "epoch": 0.9612766104506497, + "grad_norm": 1.405392243859735, + "learning_rate": 7.854465983579418e-08, + "loss": 0.3558, + "step": 55631 + }, + { + "epoch": 0.9612938899640586, + "grad_norm": 1.620672106412863, + "learning_rate": 7.847466941435678e-08, + "loss": 0.3229, + "step": 55632 + }, + { + "epoch": 0.9613111694774675, + "grad_norm": 0.6383840424765829, + "learning_rate": 7.840471006789086e-08, + "loss": 0.2352, + "step": 55633 + }, + { + "epoch": 0.9613284489908764, + "grad_norm": 1.5249934977569388, + "learning_rate": 7.833478179661513e-08, + "loss": 0.3082, + "step": 55634 + }, + { + "epoch": 0.9613457285042853, + "grad_norm": 1.4821876160694767, + "learning_rate": 7.826488460074943e-08, + "loss": 0.4148, + "step": 55635 + }, + { + "epoch": 0.9613630080176943, + "grad_norm": 1.2844715136312006, + "learning_rate": 7.819501848051137e-08, + "loss": 0.6353, + "step": 55636 + }, + { + "epoch": 0.9613802875311032, + "grad_norm": 1.3014181906022946, + "learning_rate": 7.812518343611963e-08, + "loss": 0.3486, + "step": 55637 + }, + { + "epoch": 0.9613975670445121, + "grad_norm": 2.1828594817615645, + "learning_rate": 7.805537946779518e-08, + "loss": 0.4467, + "step": 55638 + }, + { + "epoch": 0.961414846557921, + "grad_norm": 1.2904695608131416, + "learning_rate": 7.79856065757556e-08, + "loss": 0.3388, + "step": 55639 + }, + { + "epoch": 0.9614321260713299, + "grad_norm": 1.0895880108111788, + "learning_rate": 7.79158647602185e-08, + "loss": 0.2692, + "step": 55640 + }, + { + "epoch": 0.9614494055847387, + "grad_norm": 1.8643221860183279, + "learning_rate": 7.784615402140261e-08, + "loss": 0.4894, + "step": 55641 + }, + { + "epoch": 0.9614666850981476, + "grad_norm": 1.1231258577530339, + "learning_rate": 7.777647435952773e-08, + "loss": 0.2736, + "step": 55642 + }, + { + "epoch": 0.9614839646115565, + "grad_norm": 1.0338104486355078, + "learning_rate": 7.770682577480926e-08, + "loss": 0.3018, + "step": 55643 + }, + { + "epoch": 0.9615012441249654, + "grad_norm": 1.2084630989561478, + "learning_rate": 7.763720826746923e-08, + "loss": 0.3525, + "step": 55644 + }, + { + "epoch": 0.9615185236383743, + "grad_norm": 1.5554971238442803, + "learning_rate": 7.756762183772192e-08, + "loss": 0.2008, + "step": 55645 + }, + { + "epoch": 0.9615358031517832, + "grad_norm": 1.9044157043455292, + "learning_rate": 7.749806648578828e-08, + "loss": 0.3154, + "step": 55646 + }, + { + "epoch": 0.9615530826651921, + "grad_norm": 1.5451398341124556, + "learning_rate": 7.742854221188367e-08, + "loss": 0.378, + "step": 55647 + }, + { + "epoch": 0.961570362178601, + "grad_norm": 1.0066276503313571, + "learning_rate": 7.735904901622681e-08, + "loss": 0.3681, + "step": 55648 + }, + { + "epoch": 0.9615876416920099, + "grad_norm": 0.9781965742368961, + "learning_rate": 7.728958689903642e-08, + "loss": 0.4112, + "step": 55649 + }, + { + "epoch": 0.9616049212054188, + "grad_norm": 1.309143270697759, + "learning_rate": 7.72201558605279e-08, + "loss": 0.2779, + "step": 55650 + }, + { + "epoch": 0.9616222007188278, + "grad_norm": 1.3485646452279911, + "learning_rate": 7.715075590092102e-08, + "loss": 0.1868, + "step": 55651 + }, + { + "epoch": 0.9616394802322367, + "grad_norm": 1.1355653991871293, + "learning_rate": 7.708138702043233e-08, + "loss": 0.8002, + "step": 55652 + }, + { + "epoch": 0.9616567597456456, + "grad_norm": 1.2226005228428842, + "learning_rate": 7.701204921927718e-08, + "loss": 0.3609, + "step": 55653 + }, + { + "epoch": 0.9616740392590545, + "grad_norm": 1.3227620735944972, + "learning_rate": 7.694274249767542e-08, + "loss": 0.3079, + "step": 55654 + }, + { + "epoch": 0.9616913187724634, + "grad_norm": 1.455108970421503, + "learning_rate": 7.687346685584352e-08, + "loss": 0.4523, + "step": 55655 + }, + { + "epoch": 0.9617085982858723, + "grad_norm": 1.2150242233787667, + "learning_rate": 7.680422229399798e-08, + "loss": 0.3265, + "step": 55656 + }, + { + "epoch": 0.9617258777992812, + "grad_norm": 1.1383944387229366, + "learning_rate": 7.67350088123553e-08, + "loss": 0.3496, + "step": 55657 + }, + { + "epoch": 0.9617431573126901, + "grad_norm": 2.6374535236080816, + "learning_rate": 7.666582641113419e-08, + "loss": 0.3493, + "step": 55658 + }, + { + "epoch": 0.961760436826099, + "grad_norm": 1.0465450562939862, + "learning_rate": 7.65966750905478e-08, + "loss": 0.3375, + "step": 55659 + }, + { + "epoch": 0.9617777163395079, + "grad_norm": 1.3565423238564116, + "learning_rate": 7.652755485081597e-08, + "loss": 0.3866, + "step": 55660 + }, + { + "epoch": 0.9617949958529168, + "grad_norm": 1.7837261015763501, + "learning_rate": 7.645846569215299e-08, + "loss": 0.3094, + "step": 55661 + }, + { + "epoch": 0.9618122753663256, + "grad_norm": 1.2027763270279652, + "learning_rate": 7.638940761477754e-08, + "loss": 0.262, + "step": 55662 + }, + { + "epoch": 0.9618295548797345, + "grad_norm": 2.1340871075405317, + "learning_rate": 7.63203806189039e-08, + "loss": 0.2097, + "step": 55663 + }, + { + "epoch": 0.9618468343931434, + "grad_norm": 0.8148070984531235, + "learning_rate": 7.625138470474858e-08, + "loss": 0.3125, + "step": 55664 + }, + { + "epoch": 0.9618641139065524, + "grad_norm": 1.9624014267176126, + "learning_rate": 7.618241987252805e-08, + "loss": 0.2398, + "step": 55665 + }, + { + "epoch": 0.9618813934199613, + "grad_norm": 1.4137509710431984, + "learning_rate": 7.611348612245884e-08, + "loss": 0.5638, + "step": 55666 + }, + { + "epoch": 0.9618986729333702, + "grad_norm": 1.2186499439221528, + "learning_rate": 7.604458345475629e-08, + "loss": 0.3318, + "step": 55667 + }, + { + "epoch": 0.9619159524467791, + "grad_norm": 1.5656355743879238, + "learning_rate": 7.597571186963692e-08, + "loss": 0.3524, + "step": 55668 + }, + { + "epoch": 0.961933231960188, + "grad_norm": 1.972707712860424, + "learning_rate": 7.590687136731612e-08, + "loss": 0.3161, + "step": 55669 + }, + { + "epoch": 0.9619505114735969, + "grad_norm": 1.1368798753614322, + "learning_rate": 7.583806194800813e-08, + "loss": 0.2753, + "step": 55670 + }, + { + "epoch": 0.9619677909870058, + "grad_norm": 1.3845666163010373, + "learning_rate": 7.576928361192948e-08, + "loss": 0.3462, + "step": 55671 + }, + { + "epoch": 0.9619850705004147, + "grad_norm": 2.0013273130346563, + "learning_rate": 7.570053635929553e-08, + "loss": 0.4772, + "step": 55672 + }, + { + "epoch": 0.9620023500138236, + "grad_norm": 1.1859087631086205, + "learning_rate": 7.563182019032278e-08, + "loss": 0.2516, + "step": 55673 + }, + { + "epoch": 0.9620196295272325, + "grad_norm": 1.2410094110984622, + "learning_rate": 7.556313510522439e-08, + "loss": 0.3554, + "step": 55674 + }, + { + "epoch": 0.9620369090406414, + "grad_norm": 0.9042264847241583, + "learning_rate": 7.549448110421798e-08, + "loss": 0.1608, + "step": 55675 + }, + { + "epoch": 0.9620541885540503, + "grad_norm": 1.8243069935082972, + "learning_rate": 7.542585818751558e-08, + "loss": 0.2768, + "step": 55676 + }, + { + "epoch": 0.9620714680674592, + "grad_norm": 2.3295125513460646, + "learning_rate": 7.535726635533369e-08, + "loss": 0.25, + "step": 55677 + }, + { + "epoch": 0.9620887475808682, + "grad_norm": 1.364744491417974, + "learning_rate": 7.528870560788771e-08, + "loss": 0.3851, + "step": 55678 + }, + { + "epoch": 0.9621060270942771, + "grad_norm": 1.1221742788900406, + "learning_rate": 7.522017594539078e-08, + "loss": 0.2501, + "step": 55679 + }, + { + "epoch": 0.962123306607686, + "grad_norm": 1.1151740803027015, + "learning_rate": 7.51516773680605e-08, + "loss": 0.2699, + "step": 55680 + }, + { + "epoch": 0.9621405861210949, + "grad_norm": 3.8515845978711694, + "learning_rate": 7.508320987610784e-08, + "loss": 0.4581, + "step": 55681 + }, + { + "epoch": 0.9621578656345038, + "grad_norm": 1.8187271099110094, + "learning_rate": 7.501477346974816e-08, + "loss": 0.3258, + "step": 55682 + }, + { + "epoch": 0.9621751451479126, + "grad_norm": 1.402267748628303, + "learning_rate": 7.494636814919798e-08, + "loss": 0.2406, + "step": 55683 + }, + { + "epoch": 0.9621924246613215, + "grad_norm": 1.0319881323127849, + "learning_rate": 7.487799391466933e-08, + "loss": 0.2989, + "step": 55684 + }, + { + "epoch": 0.9622097041747304, + "grad_norm": 1.438328208136158, + "learning_rate": 7.480965076637759e-08, + "loss": 0.3854, + "step": 55685 + }, + { + "epoch": 0.9622269836881393, + "grad_norm": 1.4149088521077653, + "learning_rate": 7.474133870453592e-08, + "loss": 0.427, + "step": 55686 + }, + { + "epoch": 0.9622442632015482, + "grad_norm": 1.5710417720943504, + "learning_rate": 7.467305772935862e-08, + "loss": 0.3756, + "step": 55687 + }, + { + "epoch": 0.9622615427149571, + "grad_norm": 1.018046249766004, + "learning_rate": 7.460480784105995e-08, + "loss": 0.5523, + "step": 55688 + }, + { + "epoch": 0.962278822228366, + "grad_norm": 1.4388637417685566, + "learning_rate": 7.453658903985305e-08, + "loss": 0.4028, + "step": 55689 + }, + { + "epoch": 0.9622961017417749, + "grad_norm": 1.7701284628082938, + "learning_rate": 7.446840132595223e-08, + "loss": 0.3202, + "step": 55690 + }, + { + "epoch": 0.9623133812551838, + "grad_norm": 1.5667992195030307, + "learning_rate": 7.440024469957063e-08, + "loss": 0.248, + "step": 55691 + }, + { + "epoch": 0.9623306607685927, + "grad_norm": 1.0778835348473774, + "learning_rate": 7.433211916092143e-08, + "loss": 0.1239, + "step": 55692 + }, + { + "epoch": 0.9623479402820017, + "grad_norm": 1.3667333153381707, + "learning_rate": 7.426402471021777e-08, + "loss": 0.3466, + "step": 55693 + }, + { + "epoch": 0.9623652197954106, + "grad_norm": 1.3997305631207884, + "learning_rate": 7.419596134767393e-08, + "loss": 0.3338, + "step": 55694 + }, + { + "epoch": 0.9623824993088195, + "grad_norm": 1.9137314923547313, + "learning_rate": 7.412792907350419e-08, + "loss": 0.2804, + "step": 55695 + }, + { + "epoch": 0.9623997788222284, + "grad_norm": 2.307338344453668, + "learning_rate": 7.405992788791838e-08, + "loss": 0.2643, + "step": 55696 + }, + { + "epoch": 0.9624170583356373, + "grad_norm": 1.5071913816493903, + "learning_rate": 7.399195779113189e-08, + "loss": 0.3139, + "step": 55697 + }, + { + "epoch": 0.9624343378490462, + "grad_norm": 1.2745264201399145, + "learning_rate": 7.392401878335675e-08, + "loss": 0.3464, + "step": 55698 + }, + { + "epoch": 0.9624516173624551, + "grad_norm": 2.1200748950634045, + "learning_rate": 7.385611086480616e-08, + "loss": 0.2238, + "step": 55699 + }, + { + "epoch": 0.962468896875864, + "grad_norm": 1.0996670250802467, + "learning_rate": 7.378823403569213e-08, + "loss": 0.2681, + "step": 55700 + }, + { + "epoch": 0.9624861763892729, + "grad_norm": 1.4860381929626598, + "learning_rate": 7.372038829622785e-08, + "loss": 0.2786, + "step": 55701 + }, + { + "epoch": 0.9625034559026818, + "grad_norm": 0.9032761728507225, + "learning_rate": 7.365257364662648e-08, + "loss": 0.6124, + "step": 55702 + }, + { + "epoch": 0.9625207354160907, + "grad_norm": 1.1603508905388487, + "learning_rate": 7.358479008709895e-08, + "loss": 0.4658, + "step": 55703 + }, + { + "epoch": 0.9625380149294995, + "grad_norm": 0.6199600530452084, + "learning_rate": 7.351703761785844e-08, + "loss": 0.6661, + "step": 55704 + }, + { + "epoch": 0.9625552944429084, + "grad_norm": 0.68738776414807, + "learning_rate": 7.344931623911589e-08, + "loss": 0.8408, + "step": 55705 + }, + { + "epoch": 0.9625725739563173, + "grad_norm": 1.1428476827502478, + "learning_rate": 7.338162595108555e-08, + "loss": 0.2601, + "step": 55706 + }, + { + "epoch": 0.9625898534697263, + "grad_norm": 1.3515015784091333, + "learning_rate": 7.331396675397729e-08, + "loss": 0.4699, + "step": 55707 + }, + { + "epoch": 0.9626071329831352, + "grad_norm": 1.6579996187417179, + "learning_rate": 7.324633864800534e-08, + "loss": 0.4327, + "step": 55708 + }, + { + "epoch": 0.9626244124965441, + "grad_norm": 1.107655976874362, + "learning_rate": 7.317874163338068e-08, + "loss": 0.2898, + "step": 55709 + }, + { + "epoch": 0.962641692009953, + "grad_norm": 1.700080729468786, + "learning_rate": 7.311117571031312e-08, + "loss": 0.2673, + "step": 55710 + }, + { + "epoch": 0.9626589715233619, + "grad_norm": 1.80365155120723, + "learning_rate": 7.304364087901694e-08, + "loss": 0.6164, + "step": 55711 + }, + { + "epoch": 0.9626762510367708, + "grad_norm": 1.6636628349555176, + "learning_rate": 7.297613713970198e-08, + "loss": 0.5095, + "step": 55712 + }, + { + "epoch": 0.9626935305501797, + "grad_norm": 1.4292499320992516, + "learning_rate": 7.290866449258028e-08, + "loss": 0.3872, + "step": 55713 + }, + { + "epoch": 0.9627108100635886, + "grad_norm": 2.038381747819273, + "learning_rate": 7.284122293786277e-08, + "loss": 0.4462, + "step": 55714 + }, + { + "epoch": 0.9627280895769975, + "grad_norm": 2.4869815372503647, + "learning_rate": 7.277381247576154e-08, + "loss": 0.3061, + "step": 55715 + }, + { + "epoch": 0.9627453690904064, + "grad_norm": 3.071133567489856, + "learning_rate": 7.27064331064864e-08, + "loss": 0.2344, + "step": 55716 + }, + { + "epoch": 0.9627626486038153, + "grad_norm": 2.6922120356004386, + "learning_rate": 7.263908483024939e-08, + "loss": 0.4317, + "step": 55717 + }, + { + "epoch": 0.9627799281172242, + "grad_norm": 1.0340285468840806, + "learning_rate": 7.257176764726148e-08, + "loss": 0.3698, + "step": 55718 + }, + { + "epoch": 0.9627972076306331, + "grad_norm": 0.9379525741368836, + "learning_rate": 7.250448155773359e-08, + "loss": 0.4538, + "step": 55719 + }, + { + "epoch": 0.962814487144042, + "grad_norm": 0.8976333697098495, + "learning_rate": 7.243722656187557e-08, + "loss": 0.2827, + "step": 55720 + }, + { + "epoch": 0.962831766657451, + "grad_norm": 1.2364685663088733, + "learning_rate": 7.237000265989947e-08, + "loss": 0.2435, + "step": 55721 + }, + { + "epoch": 0.9628490461708599, + "grad_norm": 1.3831081058533943, + "learning_rate": 7.2302809852014e-08, + "loss": 0.244, + "step": 55722 + }, + { + "epoch": 0.9628663256842688, + "grad_norm": 1.2713991416393469, + "learning_rate": 7.22356481384301e-08, + "loss": 0.1116, + "step": 55723 + }, + { + "epoch": 0.9628836051976777, + "grad_norm": 1.647145843433429, + "learning_rate": 7.216851751935982e-08, + "loss": 0.3138, + "step": 55724 + }, + { + "epoch": 0.9629008847110866, + "grad_norm": 1.5547877955382645, + "learning_rate": 7.210141799501191e-08, + "loss": 0.1632, + "step": 55725 + }, + { + "epoch": 0.9629181642244954, + "grad_norm": 1.0989950466764586, + "learning_rate": 7.20343495655973e-08, + "loss": 0.198, + "step": 55726 + }, + { + "epoch": 0.9629354437379043, + "grad_norm": 1.3978387487314852, + "learning_rate": 7.196731223132469e-08, + "loss": 0.2704, + "step": 55727 + }, + { + "epoch": 0.9629527232513132, + "grad_norm": 1.7108465468624017, + "learning_rate": 7.190030599240505e-08, + "loss": 0.1869, + "step": 55728 + }, + { + "epoch": 0.9629700027647221, + "grad_norm": 1.1330438872609843, + "learning_rate": 7.18333308490482e-08, + "loss": 0.4192, + "step": 55729 + }, + { + "epoch": 0.962987282278131, + "grad_norm": 1.0645328833627365, + "learning_rate": 7.176638680146508e-08, + "loss": 0.4747, + "step": 55730 + }, + { + "epoch": 0.9630045617915399, + "grad_norm": 1.229742908173155, + "learning_rate": 7.169947384986332e-08, + "loss": 0.3536, + "step": 55731 + }, + { + "epoch": 0.9630218413049488, + "grad_norm": 0.9262215095668167, + "learning_rate": 7.163259199445272e-08, + "loss": 0.2469, + "step": 55732 + }, + { + "epoch": 0.9630391208183577, + "grad_norm": 2.0608663000427287, + "learning_rate": 7.156574123544424e-08, + "loss": 0.3884, + "step": 55733 + }, + { + "epoch": 0.9630564003317666, + "grad_norm": 1.0765241155655347, + "learning_rate": 7.14989215730455e-08, + "loss": 0.4156, + "step": 55734 + }, + { + "epoch": 0.9630736798451756, + "grad_norm": 2.051138335756761, + "learning_rate": 7.143213300746744e-08, + "loss": 0.3457, + "step": 55735 + }, + { + "epoch": 0.9630909593585845, + "grad_norm": 1.4774415785982542, + "learning_rate": 7.136537553891876e-08, + "loss": 0.3573, + "step": 55736 + }, + { + "epoch": 0.9631082388719934, + "grad_norm": 1.9009277977970913, + "learning_rate": 7.12986491676082e-08, + "loss": 0.4596, + "step": 55737 + }, + { + "epoch": 0.9631255183854023, + "grad_norm": 1.0424455351831514, + "learning_rate": 7.123195389374449e-08, + "loss": 0.3508, + "step": 55738 + }, + { + "epoch": 0.9631427978988112, + "grad_norm": 0.9850714171925, + "learning_rate": 7.116528971753633e-08, + "loss": 0.4888, + "step": 55739 + }, + { + "epoch": 0.9631600774122201, + "grad_norm": 1.485775511825942, + "learning_rate": 7.109865663919358e-08, + "loss": 0.2132, + "step": 55740 + }, + { + "epoch": 0.963177356925629, + "grad_norm": 1.130629611255239, + "learning_rate": 7.103205465892383e-08, + "loss": 0.4246, + "step": 55741 + }, + { + "epoch": 0.9631946364390379, + "grad_norm": 1.5751461795209252, + "learning_rate": 7.09654837769369e-08, + "loss": 0.1505, + "step": 55742 + }, + { + "epoch": 0.9632119159524468, + "grad_norm": 1.7010142411771025, + "learning_rate": 7.089894399344044e-08, + "loss": 0.4676, + "step": 55743 + }, + { + "epoch": 0.9632291954658557, + "grad_norm": 0.9095864075557731, + "learning_rate": 7.083243530864426e-08, + "loss": 0.4839, + "step": 55744 + }, + { + "epoch": 0.9632464749792646, + "grad_norm": 1.2353742964583752, + "learning_rate": 7.076595772275375e-08, + "loss": 0.2558, + "step": 55745 + }, + { + "epoch": 0.9632637544926735, + "grad_norm": 1.2972714829484038, + "learning_rate": 7.069951123597985e-08, + "loss": 0.2119, + "step": 55746 + }, + { + "epoch": 0.9632810340060823, + "grad_norm": 1.1338437746633936, + "learning_rate": 7.063309584852907e-08, + "loss": 0.3469, + "step": 55747 + }, + { + "epoch": 0.9632983135194912, + "grad_norm": 1.3652520274097568, + "learning_rate": 7.056671156061013e-08, + "loss": 0.2341, + "step": 55748 + }, + { + "epoch": 0.9633155930329002, + "grad_norm": 2.045047949257913, + "learning_rate": 7.050035837243064e-08, + "loss": 0.4287, + "step": 55749 + }, + { + "epoch": 0.9633328725463091, + "grad_norm": 1.3179443245017635, + "learning_rate": 7.043403628419932e-08, + "loss": 0.3929, + "step": 55750 + }, + { + "epoch": 0.963350152059718, + "grad_norm": 0.9417713182864214, + "learning_rate": 7.036774529612267e-08, + "loss": 0.3585, + "step": 55751 + }, + { + "epoch": 0.9633674315731269, + "grad_norm": 1.0002044032310353, + "learning_rate": 7.030148540840942e-08, + "loss": 0.4036, + "step": 55752 + }, + { + "epoch": 0.9633847110865358, + "grad_norm": 1.2454589210640417, + "learning_rate": 7.023525662126606e-08, + "loss": 0.2772, + "step": 55753 + }, + { + "epoch": 0.9634019905999447, + "grad_norm": 0.9423511269081071, + "learning_rate": 7.016905893490022e-08, + "loss": 0.2833, + "step": 55754 + }, + { + "epoch": 0.9634192701133536, + "grad_norm": 0.8138713512816168, + "learning_rate": 7.010289234952061e-08, + "loss": 0.2543, + "step": 55755 + }, + { + "epoch": 0.9634365496267625, + "grad_norm": 1.3687674654077946, + "learning_rate": 7.003675686533263e-08, + "loss": 0.4262, + "step": 55756 + }, + { + "epoch": 0.9634538291401714, + "grad_norm": 0.8540847565221877, + "learning_rate": 6.997065248254386e-08, + "loss": 0.848, + "step": 55757 + }, + { + "epoch": 0.9634711086535803, + "grad_norm": 1.4949228195658297, + "learning_rate": 6.990457920136195e-08, + "loss": 0.3137, + "step": 55758 + }, + { + "epoch": 0.9634883881669892, + "grad_norm": 1.2725964837035098, + "learning_rate": 6.98385370219945e-08, + "loss": 0.342, + "step": 55759 + }, + { + "epoch": 0.9635056676803981, + "grad_norm": 1.7844542091190048, + "learning_rate": 6.977252594464579e-08, + "loss": 0.2673, + "step": 55760 + }, + { + "epoch": 0.963522947193807, + "grad_norm": 1.8172098382576498, + "learning_rate": 6.970654596952564e-08, + "loss": 0.4089, + "step": 55761 + }, + { + "epoch": 0.963540226707216, + "grad_norm": 1.282559741596273, + "learning_rate": 6.964059709683834e-08, + "loss": 0.3735, + "step": 55762 + }, + { + "epoch": 0.9635575062206249, + "grad_norm": 1.400271085987902, + "learning_rate": 6.957467932679152e-08, + "loss": 0.3669, + "step": 55763 + }, + { + "epoch": 0.9635747857340338, + "grad_norm": 1.4584157651422796, + "learning_rate": 6.950879265959277e-08, + "loss": 0.2937, + "step": 55764 + }, + { + "epoch": 0.9635920652474427, + "grad_norm": 0.9615938236481514, + "learning_rate": 6.944293709544525e-08, + "loss": 0.4914, + "step": 55765 + }, + { + "epoch": 0.9636093447608516, + "grad_norm": 2.4814193788767787, + "learning_rate": 6.937711263455882e-08, + "loss": 0.2879, + "step": 55766 + }, + { + "epoch": 0.9636266242742605, + "grad_norm": 1.0442437384762866, + "learning_rate": 6.931131927713663e-08, + "loss": 0.2919, + "step": 55767 + }, + { + "epoch": 0.9636439037876693, + "grad_norm": 1.1937263965305018, + "learning_rate": 6.92455570233863e-08, + "loss": 0.2664, + "step": 55768 + }, + { + "epoch": 0.9636611833010782, + "grad_norm": 1.0896425870226645, + "learning_rate": 6.917982587351324e-08, + "loss": 0.3563, + "step": 55769 + }, + { + "epoch": 0.9636784628144871, + "grad_norm": 1.601449834938881, + "learning_rate": 6.911412582772503e-08, + "loss": 0.4252, + "step": 55770 + }, + { + "epoch": 0.963695742327896, + "grad_norm": 1.0719778500615569, + "learning_rate": 6.904845688622485e-08, + "loss": 0.6582, + "step": 55771 + }, + { + "epoch": 0.9637130218413049, + "grad_norm": 1.290104518958756, + "learning_rate": 6.898281904922144e-08, + "loss": 0.3129, + "step": 55772 + }, + { + "epoch": 0.9637303013547138, + "grad_norm": 0.9212892880545542, + "learning_rate": 6.891721231691684e-08, + "loss": 0.3289, + "step": 55773 + }, + { + "epoch": 0.9637475808681227, + "grad_norm": 1.0114254827584124, + "learning_rate": 6.885163668951866e-08, + "loss": 0.2245, + "step": 55774 + }, + { + "epoch": 0.9637648603815316, + "grad_norm": 0.7438975953313306, + "learning_rate": 6.878609216723232e-08, + "loss": 0.652, + "step": 55775 + }, + { + "epoch": 0.9637821398949405, + "grad_norm": 0.9280238467287788, + "learning_rate": 6.872057875026206e-08, + "loss": 0.4008, + "step": 55776 + }, + { + "epoch": 0.9637994194083495, + "grad_norm": 1.3789657605189045, + "learning_rate": 6.865509643881441e-08, + "loss": 0.3708, + "step": 55777 + }, + { + "epoch": 0.9638166989217584, + "grad_norm": 1.5698998732677916, + "learning_rate": 6.858964523309253e-08, + "loss": 0.3212, + "step": 55778 + }, + { + "epoch": 0.9638339784351673, + "grad_norm": 1.3318794968730476, + "learning_rate": 6.852422513330293e-08, + "loss": 0.3534, + "step": 55779 + }, + { + "epoch": 0.9638512579485762, + "grad_norm": 1.159056535987518, + "learning_rate": 6.845883613965099e-08, + "loss": 0.364, + "step": 55780 + }, + { + "epoch": 0.9638685374619851, + "grad_norm": 1.3618046404683986, + "learning_rate": 6.8393478252341e-08, + "loss": 0.2177, + "step": 55781 + }, + { + "epoch": 0.963885816975394, + "grad_norm": 1.1681090269606844, + "learning_rate": 6.832815147157724e-08, + "loss": 0.223, + "step": 55782 + }, + { + "epoch": 0.9639030964888029, + "grad_norm": 1.4304553345642885, + "learning_rate": 6.826285579756397e-08, + "loss": 0.4413, + "step": 55783 + }, + { + "epoch": 0.9639203760022118, + "grad_norm": 1.2094349729520857, + "learning_rate": 6.819759123050662e-08, + "loss": 0.3328, + "step": 55784 + }, + { + "epoch": 0.9639376555156207, + "grad_norm": 1.6850137570197046, + "learning_rate": 6.813235777060834e-08, + "loss": 0.3095, + "step": 55785 + }, + { + "epoch": 0.9639549350290296, + "grad_norm": 0.8177261615336969, + "learning_rate": 6.806715541807451e-08, + "loss": 0.3832, + "step": 55786 + }, + { + "epoch": 0.9639722145424385, + "grad_norm": 1.5384479650205205, + "learning_rate": 6.800198417310943e-08, + "loss": 0.3914, + "step": 55787 + }, + { + "epoch": 0.9639894940558474, + "grad_norm": 1.0509520903597274, + "learning_rate": 6.793684403591738e-08, + "loss": 0.521, + "step": 55788 + }, + { + "epoch": 0.9640067735692562, + "grad_norm": 1.770552919985355, + "learning_rate": 6.787173500670152e-08, + "loss": 0.4704, + "step": 55789 + }, + { + "epoch": 0.9640240530826651, + "grad_norm": 1.9927313394118098, + "learning_rate": 6.780665708566725e-08, + "loss": 0.306, + "step": 55790 + }, + { + "epoch": 0.964041332596074, + "grad_norm": 1.5787062369155187, + "learning_rate": 6.774161027301551e-08, + "loss": 0.3633, + "step": 55791 + }, + { + "epoch": 0.964058612109483, + "grad_norm": 1.4161015911381232, + "learning_rate": 6.767659456895281e-08, + "loss": 0.2021, + "step": 55792 + }, + { + "epoch": 0.9640758916228919, + "grad_norm": 1.099773613203726, + "learning_rate": 6.761160997368233e-08, + "loss": 0.3229, + "step": 55793 + }, + { + "epoch": 0.9640931711363008, + "grad_norm": 1.6699670722597904, + "learning_rate": 6.75466564874061e-08, + "loss": 0.3029, + "step": 55794 + }, + { + "epoch": 0.9641104506497097, + "grad_norm": 0.8802842790413518, + "learning_rate": 6.748173411032954e-08, + "loss": 0.2362, + "step": 55795 + }, + { + "epoch": 0.9641277301631186, + "grad_norm": 1.395581579741027, + "learning_rate": 6.741684284265471e-08, + "loss": 0.3776, + "step": 55796 + }, + { + "epoch": 0.9641450096765275, + "grad_norm": 1.4899817042309647, + "learning_rate": 6.735198268458475e-08, + "loss": 0.3363, + "step": 55797 + }, + { + "epoch": 0.9641622891899364, + "grad_norm": 1.1003501341663142, + "learning_rate": 6.728715363632287e-08, + "loss": 0.4742, + "step": 55798 + }, + { + "epoch": 0.9641795687033453, + "grad_norm": 1.0226448588688821, + "learning_rate": 6.722235569807334e-08, + "loss": 0.2547, + "step": 55799 + }, + { + "epoch": 0.9641968482167542, + "grad_norm": 0.6302291188886876, + "learning_rate": 6.71575888700382e-08, + "loss": 0.7923, + "step": 55800 + }, + { + "epoch": 0.9642141277301631, + "grad_norm": 1.739755866036164, + "learning_rate": 6.709285315242064e-08, + "loss": 0.3139, + "step": 55801 + }, + { + "epoch": 0.964231407243572, + "grad_norm": 1.9118284819777354, + "learning_rate": 6.702814854542272e-08, + "loss": 0.2365, + "step": 55802 + }, + { + "epoch": 0.9642486867569809, + "grad_norm": 1.62524540895554, + "learning_rate": 6.69634750492465e-08, + "loss": 0.3524, + "step": 55803 + }, + { + "epoch": 0.9642659662703899, + "grad_norm": 1.1514575750293246, + "learning_rate": 6.689883266409736e-08, + "loss": 0.3575, + "step": 55804 + }, + { + "epoch": 0.9642832457837988, + "grad_norm": 2.3457094356308272, + "learning_rate": 6.683422139017515e-08, + "loss": 0.3222, + "step": 55805 + }, + { + "epoch": 0.9643005252972077, + "grad_norm": 1.2952363565833833, + "learning_rate": 6.676964122768304e-08, + "loss": 0.4992, + "step": 55806 + }, + { + "epoch": 0.9643178048106166, + "grad_norm": 1.0503425961017527, + "learning_rate": 6.670509217682308e-08, + "loss": 0.1771, + "step": 55807 + }, + { + "epoch": 0.9643350843240255, + "grad_norm": 1.941837328141396, + "learning_rate": 6.664057423779846e-08, + "loss": 0.4185, + "step": 55808 + }, + { + "epoch": 0.9643523638374344, + "grad_norm": 2.055766893907155, + "learning_rate": 6.6576087410809e-08, + "loss": 0.4439, + "step": 55809 + }, + { + "epoch": 0.9643696433508432, + "grad_norm": 1.7059664050662484, + "learning_rate": 6.651163169606012e-08, + "loss": 0.4732, + "step": 55810 + }, + { + "epoch": 0.9643869228642521, + "grad_norm": 1.7656258568436651, + "learning_rate": 6.644720709375052e-08, + "loss": 0.4757, + "step": 55811 + }, + { + "epoch": 0.964404202377661, + "grad_norm": 0.8760320989538788, + "learning_rate": 6.638281360408339e-08, + "loss": 0.3192, + "step": 55812 + }, + { + "epoch": 0.9644214818910699, + "grad_norm": 2.110543289607302, + "learning_rate": 6.631845122725966e-08, + "loss": 0.5128, + "step": 55813 + }, + { + "epoch": 0.9644387614044788, + "grad_norm": 1.6019921439219025, + "learning_rate": 6.625411996348252e-08, + "loss": 0.4348, + "step": 55814 + }, + { + "epoch": 0.9644560409178877, + "grad_norm": 1.2104794188841699, + "learning_rate": 6.61898198129518e-08, + "loss": 0.4406, + "step": 55815 + }, + { + "epoch": 0.9644733204312966, + "grad_norm": 0.8214714667158782, + "learning_rate": 6.612555077586957e-08, + "loss": 0.5025, + "step": 55816 + }, + { + "epoch": 0.9644905999447055, + "grad_norm": 1.155480474849405, + "learning_rate": 6.606131285243677e-08, + "loss": 0.474, + "step": 55817 + }, + { + "epoch": 0.9645078794581144, + "grad_norm": 1.4744398954893474, + "learning_rate": 6.599710604285548e-08, + "loss": 0.4006, + "step": 55818 + }, + { + "epoch": 0.9645251589715234, + "grad_norm": 1.6888896758179417, + "learning_rate": 6.593293034732551e-08, + "loss": 0.466, + "step": 55819 + }, + { + "epoch": 0.9645424384849323, + "grad_norm": 0.9924366908270095, + "learning_rate": 6.586878576604894e-08, + "loss": 0.5623, + "step": 55820 + }, + { + "epoch": 0.9645597179983412, + "grad_norm": 1.521986649763316, + "learning_rate": 6.580467229922671e-08, + "loss": 0.3582, + "step": 55821 + }, + { + "epoch": 0.9645769975117501, + "grad_norm": 1.0506565814138136, + "learning_rate": 6.574058994705867e-08, + "loss": 0.3541, + "step": 55822 + }, + { + "epoch": 0.964594277025159, + "grad_norm": 1.6020242527206867, + "learning_rate": 6.567653870974688e-08, + "loss": 0.3902, + "step": 55823 + }, + { + "epoch": 0.9646115565385679, + "grad_norm": 1.028112247370755, + "learning_rate": 6.561251858749118e-08, + "loss": 0.7605, + "step": 55824 + }, + { + "epoch": 0.9646288360519768, + "grad_norm": 1.2652900112211551, + "learning_rate": 6.55485295804903e-08, + "loss": 0.4101, + "step": 55825 + }, + { + "epoch": 0.9646461155653857, + "grad_norm": 0.9204364419710197, + "learning_rate": 6.54845716889474e-08, + "loss": 0.356, + "step": 55826 + }, + { + "epoch": 0.9646633950787946, + "grad_norm": 2.093810964860406, + "learning_rate": 6.542064491306232e-08, + "loss": 0.2563, + "step": 55827 + }, + { + "epoch": 0.9646806745922035, + "grad_norm": 0.8909040923160593, + "learning_rate": 6.535674925303492e-08, + "loss": 0.274, + "step": 55828 + }, + { + "epoch": 0.9646979541056124, + "grad_norm": 1.373629746345162, + "learning_rate": 6.529288470906392e-08, + "loss": 0.4149, + "step": 55829 + }, + { + "epoch": 0.9647152336190213, + "grad_norm": 1.3457342480179073, + "learning_rate": 6.522905128135248e-08, + "loss": 0.2773, + "step": 55830 + }, + { + "epoch": 0.9647325131324301, + "grad_norm": 1.7206546750172267, + "learning_rate": 6.516524897009713e-08, + "loss": 0.2465, + "step": 55831 + }, + { + "epoch": 0.964749792645839, + "grad_norm": 1.0387729801295584, + "learning_rate": 6.510147777549991e-08, + "loss": 0.2993, + "step": 55832 + }, + { + "epoch": 0.964767072159248, + "grad_norm": 1.1190182705760845, + "learning_rate": 6.503773769775957e-08, + "loss": 0.2821, + "step": 55833 + }, + { + "epoch": 0.9647843516726569, + "grad_norm": 1.7263515503770817, + "learning_rate": 6.497402873707593e-08, + "loss": 0.2464, + "step": 55834 + }, + { + "epoch": 0.9648016311860658, + "grad_norm": 1.1496682501237723, + "learning_rate": 6.491035089364884e-08, + "loss": 0.3368, + "step": 55835 + }, + { + "epoch": 0.9648189106994747, + "grad_norm": 1.745326467338284, + "learning_rate": 6.484670416767814e-08, + "loss": 0.417, + "step": 55836 + }, + { + "epoch": 0.9648361902128836, + "grad_norm": 1.2485219985092424, + "learning_rate": 6.478308855936144e-08, + "loss": 0.4633, + "step": 55837 + }, + { + "epoch": 0.9648534697262925, + "grad_norm": 1.7868152803853974, + "learning_rate": 6.471950406889971e-08, + "loss": 0.4113, + "step": 55838 + }, + { + "epoch": 0.9648707492397014, + "grad_norm": 1.6099749574368205, + "learning_rate": 6.465595069649167e-08, + "loss": 0.318, + "step": 55839 + }, + { + "epoch": 0.9648880287531103, + "grad_norm": 1.5972501303809985, + "learning_rate": 6.459242844233604e-08, + "loss": 0.5082, + "step": 55840 + }, + { + "epoch": 0.9649053082665192, + "grad_norm": 1.3544856905606901, + "learning_rate": 6.452893730663268e-08, + "loss": 0.3579, + "step": 55841 + }, + { + "epoch": 0.9649225877799281, + "grad_norm": 1.0653178803833578, + "learning_rate": 6.446547728957919e-08, + "loss": 0.3974, + "step": 55842 + }, + { + "epoch": 0.964939867293337, + "grad_norm": 1.9638154220380233, + "learning_rate": 6.440204839137431e-08, + "loss": 0.2864, + "step": 55843 + }, + { + "epoch": 0.9649571468067459, + "grad_norm": 1.0549651786423129, + "learning_rate": 6.4338650612219e-08, + "loss": 0.3848, + "step": 55844 + }, + { + "epoch": 0.9649744263201548, + "grad_norm": 1.090222055816404, + "learning_rate": 6.427528395230864e-08, + "loss": 0.249, + "step": 55845 + }, + { + "epoch": 0.9649917058335638, + "grad_norm": 1.2125982305663863, + "learning_rate": 6.421194841184419e-08, + "loss": 0.3149, + "step": 55846 + }, + { + "epoch": 0.9650089853469727, + "grad_norm": 2.0904254265623714, + "learning_rate": 6.414864399102216e-08, + "loss": 0.3843, + "step": 55847 + }, + { + "epoch": 0.9650262648603816, + "grad_norm": 1.6580569110612349, + "learning_rate": 6.408537069004128e-08, + "loss": 0.4115, + "step": 55848 + }, + { + "epoch": 0.9650435443737905, + "grad_norm": 1.5029063593770835, + "learning_rate": 6.402212850910028e-08, + "loss": 0.2619, + "step": 55849 + }, + { + "epoch": 0.9650608238871994, + "grad_norm": 0.9787702133448862, + "learning_rate": 6.395891744839788e-08, + "loss": 0.2252, + "step": 55850 + }, + { + "epoch": 0.9650781034006083, + "grad_norm": 1.1970626653979584, + "learning_rate": 6.389573750813172e-08, + "loss": 0.3232, + "step": 55851 + }, + { + "epoch": 0.9650953829140171, + "grad_norm": 0.6869687290407827, + "learning_rate": 6.383258868849829e-08, + "loss": 0.238, + "step": 55852 + }, + { + "epoch": 0.965112662427426, + "grad_norm": 1.933498886876896, + "learning_rate": 6.376947098969633e-08, + "loss": 0.1723, + "step": 55853 + }, + { + "epoch": 0.9651299419408349, + "grad_norm": 2.0317254368634297, + "learning_rate": 6.370638441192346e-08, + "loss": 0.2628, + "step": 55854 + }, + { + "epoch": 0.9651472214542438, + "grad_norm": 2.2259207076324747, + "learning_rate": 6.364332895537728e-08, + "loss": 0.2126, + "step": 55855 + }, + { + "epoch": 0.9651645009676527, + "grad_norm": 0.8953316601124581, + "learning_rate": 6.358030462025544e-08, + "loss": 0.492, + "step": 55856 + }, + { + "epoch": 0.9651817804810616, + "grad_norm": 1.2400612301066654, + "learning_rate": 6.351731140675666e-08, + "loss": 0.3261, + "step": 55857 + }, + { + "epoch": 0.9651990599944705, + "grad_norm": 2.5193464613088974, + "learning_rate": 6.345434931507522e-08, + "loss": 0.297, + "step": 55858 + }, + { + "epoch": 0.9652163395078794, + "grad_norm": 2.1427646927748105, + "learning_rate": 6.339141834540985e-08, + "loss": 0.5848, + "step": 55859 + }, + { + "epoch": 0.9652336190212883, + "grad_norm": 1.5475068480095568, + "learning_rate": 6.332851849795819e-08, + "loss": 0.445, + "step": 55860 + }, + { + "epoch": 0.9652508985346973, + "grad_norm": 1.0574836443861044, + "learning_rate": 6.326564977291672e-08, + "loss": 0.1767, + "step": 55861 + }, + { + "epoch": 0.9652681780481062, + "grad_norm": 1.564746912488959, + "learning_rate": 6.320281217048197e-08, + "loss": 0.3227, + "step": 55862 + }, + { + "epoch": 0.9652854575615151, + "grad_norm": 0.8843635644386239, + "learning_rate": 6.314000569085155e-08, + "loss": 0.1471, + "step": 55863 + }, + { + "epoch": 0.965302737074924, + "grad_norm": 1.4431145790412336, + "learning_rate": 6.307723033422197e-08, + "loss": 0.3428, + "step": 55864 + }, + { + "epoch": 0.9653200165883329, + "grad_norm": 1.2982099067583617, + "learning_rate": 6.301448610078864e-08, + "loss": 0.4074, + "step": 55865 + }, + { + "epoch": 0.9653372961017418, + "grad_norm": 1.481953021428153, + "learning_rate": 6.29517729907514e-08, + "loss": 0.2906, + "step": 55866 + }, + { + "epoch": 0.9653545756151507, + "grad_norm": 1.3956876759701384, + "learning_rate": 6.288909100430229e-08, + "loss": 0.2524, + "step": 55867 + }, + { + "epoch": 0.9653718551285596, + "grad_norm": 0.9721075489969925, + "learning_rate": 6.282644014164118e-08, + "loss": 0.4782, + "step": 55868 + }, + { + "epoch": 0.9653891346419685, + "grad_norm": 1.1797836177404277, + "learning_rate": 6.276382040296125e-08, + "loss": 0.3246, + "step": 55869 + }, + { + "epoch": 0.9654064141553774, + "grad_norm": 1.4859869250521105, + "learning_rate": 6.270123178846122e-08, + "loss": 0.3682, + "step": 55870 + }, + { + "epoch": 0.9654236936687863, + "grad_norm": 1.3742104106112252, + "learning_rate": 6.263867429833648e-08, + "loss": 0.5099, + "step": 55871 + }, + { + "epoch": 0.9654409731821952, + "grad_norm": 1.2103010394242693, + "learning_rate": 6.257614793278133e-08, + "loss": 0.2838, + "step": 55872 + }, + { + "epoch": 0.9654582526956041, + "grad_norm": 1.7695806376890213, + "learning_rate": 6.251365269199449e-08, + "loss": 0.256, + "step": 55873 + }, + { + "epoch": 0.9654755322090129, + "grad_norm": 0.8090581603128727, + "learning_rate": 6.245118857616916e-08, + "loss": 0.2595, + "step": 55874 + }, + { + "epoch": 0.9654928117224219, + "grad_norm": 0.5819906852562731, + "learning_rate": 6.238875558550295e-08, + "loss": 0.6921, + "step": 55875 + }, + { + "epoch": 0.9655100912358308, + "grad_norm": 1.4917210264346101, + "learning_rate": 6.232635372018903e-08, + "loss": 0.31, + "step": 55876 + }, + { + "epoch": 0.9655273707492397, + "grad_norm": 1.3839539548570703, + "learning_rate": 6.226398298042392e-08, + "loss": 0.4643, + "step": 55877 + }, + { + "epoch": 0.9655446502626486, + "grad_norm": 1.6740136791131213, + "learning_rate": 6.22016433664041e-08, + "loss": 0.4224, + "step": 55878 + }, + { + "epoch": 0.9655619297760575, + "grad_norm": 1.1974070857028938, + "learning_rate": 6.213933487832502e-08, + "loss": 0.3843, + "step": 55879 + }, + { + "epoch": 0.9655792092894664, + "grad_norm": 1.080791030222535, + "learning_rate": 6.207705751637871e-08, + "loss": 0.2756, + "step": 55880 + }, + { + "epoch": 0.9655964888028753, + "grad_norm": 1.2729703779333779, + "learning_rate": 6.201481128076393e-08, + "loss": 0.302, + "step": 55881 + }, + { + "epoch": 0.9656137683162842, + "grad_norm": 0.8574270781327058, + "learning_rate": 6.195259617167382e-08, + "loss": 0.7858, + "step": 55882 + }, + { + "epoch": 0.9656310478296931, + "grad_norm": 0.9835901151550965, + "learning_rate": 6.18904121893027e-08, + "loss": 0.2824, + "step": 55883 + }, + { + "epoch": 0.965648327343102, + "grad_norm": 1.7564689871013253, + "learning_rate": 6.182825933384596e-08, + "loss": 0.4274, + "step": 55884 + }, + { + "epoch": 0.9656656068565109, + "grad_norm": 1.42786570524177, + "learning_rate": 6.1766137605499e-08, + "loss": 0.1667, + "step": 55885 + }, + { + "epoch": 0.9656828863699198, + "grad_norm": 1.0769586130647073, + "learning_rate": 6.170404700445609e-08, + "loss": 0.3262, + "step": 55886 + }, + { + "epoch": 0.9657001658833287, + "grad_norm": 0.7901941216116531, + "learning_rate": 6.164198753091044e-08, + "loss": 0.4872, + "step": 55887 + }, + { + "epoch": 0.9657174453967377, + "grad_norm": 1.1584436633478337, + "learning_rate": 6.157995918505744e-08, + "loss": 0.3326, + "step": 55888 + }, + { + "epoch": 0.9657347249101466, + "grad_norm": 1.6056044398764098, + "learning_rate": 6.151796196709248e-08, + "loss": 0.5701, + "step": 55889 + }, + { + "epoch": 0.9657520044235555, + "grad_norm": 1.252525172322762, + "learning_rate": 6.145599587720764e-08, + "loss": 0.3141, + "step": 55890 + }, + { + "epoch": 0.9657692839369644, + "grad_norm": 1.866028895264627, + "learning_rate": 6.13940609155983e-08, + "loss": 0.2554, + "step": 55891 + }, + { + "epoch": 0.9657865634503733, + "grad_norm": 1.5101799446600204, + "learning_rate": 6.133215708245766e-08, + "loss": 0.6321, + "step": 55892 + }, + { + "epoch": 0.9658038429637822, + "grad_norm": 1.6118374992257938, + "learning_rate": 6.127028437798e-08, + "loss": 0.6259, + "step": 55893 + }, + { + "epoch": 0.9658211224771911, + "grad_norm": 1.286179301350557, + "learning_rate": 6.12084428023596e-08, + "loss": 0.4797, + "step": 55894 + }, + { + "epoch": 0.9658384019905999, + "grad_norm": 1.3093653182941214, + "learning_rate": 6.114663235578966e-08, + "loss": 0.3854, + "step": 55895 + }, + { + "epoch": 0.9658556815040088, + "grad_norm": 0.994696752979576, + "learning_rate": 6.108485303846335e-08, + "loss": 0.2944, + "step": 55896 + }, + { + "epoch": 0.9658729610174177, + "grad_norm": 1.2792808024389295, + "learning_rate": 6.102310485057494e-08, + "loss": 0.3125, + "step": 55897 + }, + { + "epoch": 0.9658902405308266, + "grad_norm": 1.0851575538547533, + "learning_rate": 6.096138779231652e-08, + "loss": 0.3286, + "step": 55898 + }, + { + "epoch": 0.9659075200442355, + "grad_norm": 1.1299316233156353, + "learning_rate": 6.08997018638835e-08, + "loss": 0.3866, + "step": 55899 + }, + { + "epoch": 0.9659247995576444, + "grad_norm": 1.993983972414177, + "learning_rate": 6.083804706546792e-08, + "loss": 0.3237, + "step": 55900 + }, + { + "epoch": 0.9659420790710533, + "grad_norm": 1.7055658311216733, + "learning_rate": 6.077642339726298e-08, + "loss": 0.3394, + "step": 55901 + }, + { + "epoch": 0.9659593585844622, + "grad_norm": 1.3477631548314848, + "learning_rate": 6.071483085946072e-08, + "loss": 0.4378, + "step": 55902 + }, + { + "epoch": 0.9659766380978712, + "grad_norm": 1.059762575762158, + "learning_rate": 6.065326945225547e-08, + "loss": 0.3526, + "step": 55903 + }, + { + "epoch": 0.9659939176112801, + "grad_norm": 1.1754699742566315, + "learning_rate": 6.059173917584038e-08, + "loss": 0.3161, + "step": 55904 + }, + { + "epoch": 0.966011197124689, + "grad_norm": 1.1718605987322996, + "learning_rate": 6.053024003040642e-08, + "loss": 0.2402, + "step": 55905 + }, + { + "epoch": 0.9660284766380979, + "grad_norm": 1.212208028765621, + "learning_rate": 6.046877201614675e-08, + "loss": 0.4375, + "step": 55906 + }, + { + "epoch": 0.9660457561515068, + "grad_norm": 0.9282641241432096, + "learning_rate": 6.040733513325459e-08, + "loss": 0.3857, + "step": 55907 + }, + { + "epoch": 0.9660630356649157, + "grad_norm": 1.023915334353406, + "learning_rate": 6.034592938192308e-08, + "loss": 0.2405, + "step": 55908 + }, + { + "epoch": 0.9660803151783246, + "grad_norm": 1.1215417368531775, + "learning_rate": 6.028455476234208e-08, + "loss": 0.4051, + "step": 55909 + }, + { + "epoch": 0.9660975946917335, + "grad_norm": 0.9939259313527673, + "learning_rate": 6.022321127470698e-08, + "loss": 0.2424, + "step": 55910 + }, + { + "epoch": 0.9661148742051424, + "grad_norm": 1.3885279050384367, + "learning_rate": 6.016189891920654e-08, + "loss": 0.3835, + "step": 55911 + }, + { + "epoch": 0.9661321537185513, + "grad_norm": 0.8192370315408846, + "learning_rate": 6.010061769603503e-08, + "loss": 0.4149, + "step": 55912 + }, + { + "epoch": 0.9661494332319602, + "grad_norm": 1.1608326421660846, + "learning_rate": 6.003936760538343e-08, + "loss": 0.3453, + "step": 55913 + }, + { + "epoch": 0.9661667127453691, + "grad_norm": 1.2074814951612072, + "learning_rate": 5.997814864744378e-08, + "loss": 0.3698, + "step": 55914 + }, + { + "epoch": 0.966183992258778, + "grad_norm": 1.3302852291521847, + "learning_rate": 5.991696082240817e-08, + "loss": 0.4219, + "step": 55915 + }, + { + "epoch": 0.9662012717721868, + "grad_norm": 1.289745118852573, + "learning_rate": 5.985580413046865e-08, + "loss": 0.3802, + "step": 55916 + }, + { + "epoch": 0.9662185512855958, + "grad_norm": 1.9823302738830906, + "learning_rate": 5.97946785718162e-08, + "loss": 0.2231, + "step": 55917 + }, + { + "epoch": 0.9662358307990047, + "grad_norm": 1.5621833852145386, + "learning_rate": 5.973358414664177e-08, + "loss": 0.2586, + "step": 55918 + }, + { + "epoch": 0.9662531103124136, + "grad_norm": 1.0908242796670053, + "learning_rate": 5.967252085513741e-08, + "loss": 0.4013, + "step": 55919 + }, + { + "epoch": 0.9662703898258225, + "grad_norm": 1.0415579532545778, + "learning_rate": 5.961148869749411e-08, + "loss": 0.3995, + "step": 55920 + }, + { + "epoch": 0.9662876693392314, + "grad_norm": 1.1576392997718572, + "learning_rate": 5.9550487673902814e-08, + "loss": 0.5252, + "step": 55921 + }, + { + "epoch": 0.9663049488526403, + "grad_norm": 1.3329134480450173, + "learning_rate": 5.9489517784555586e-08, + "loss": 0.2425, + "step": 55922 + }, + { + "epoch": 0.9663222283660492, + "grad_norm": 1.438640164300255, + "learning_rate": 5.942857902964117e-08, + "loss": 0.1912, + "step": 55923 + }, + { + "epoch": 0.9663395078794581, + "grad_norm": 2.9273227924563376, + "learning_rate": 5.9367671409352736e-08, + "loss": 0.2592, + "step": 55924 + }, + { + "epoch": 0.966356787392867, + "grad_norm": 1.2999380528760756, + "learning_rate": 5.930679492388125e-08, + "loss": 0.4924, + "step": 55925 + }, + { + "epoch": 0.9663740669062759, + "grad_norm": 1.1984011965636865, + "learning_rate": 5.924594957341545e-08, + "loss": 0.3014, + "step": 55926 + }, + { + "epoch": 0.9663913464196848, + "grad_norm": 1.3010205601869151, + "learning_rate": 5.918513535814629e-08, + "loss": 0.3263, + "step": 55927 + }, + { + "epoch": 0.9664086259330937, + "grad_norm": 1.588583616216754, + "learning_rate": 5.912435227826474e-08, + "loss": 0.3676, + "step": 55928 + }, + { + "epoch": 0.9664259054465026, + "grad_norm": 1.6047172915284105, + "learning_rate": 5.906360033396175e-08, + "loss": 0.254, + "step": 55929 + }, + { + "epoch": 0.9664431849599115, + "grad_norm": 1.220383471399039, + "learning_rate": 5.900287952542716e-08, + "loss": 0.3113, + "step": 55930 + }, + { + "epoch": 0.9664604644733205, + "grad_norm": 1.4013959088097023, + "learning_rate": 5.894218985285083e-08, + "loss": 0.2943, + "step": 55931 + }, + { + "epoch": 0.9664777439867294, + "grad_norm": 1.0582962910664546, + "learning_rate": 5.888153131642371e-08, + "loss": 0.3339, + "step": 55932 + }, + { + "epoch": 0.9664950235001383, + "grad_norm": 1.4013466030988067, + "learning_rate": 5.882090391633344e-08, + "loss": 0.1903, + "step": 55933 + }, + { + "epoch": 0.9665123030135472, + "grad_norm": 1.8280502330465118, + "learning_rate": 5.876030765277319e-08, + "loss": 0.4967, + "step": 55934 + }, + { + "epoch": 0.9665295825269561, + "grad_norm": 0.9547051632371053, + "learning_rate": 5.869974252593058e-08, + "loss": 0.3044, + "step": 55935 + }, + { + "epoch": 0.966546862040365, + "grad_norm": 1.0053809040075814, + "learning_rate": 5.8639208535995474e-08, + "loss": 0.4527, + "step": 55936 + }, + { + "epoch": 0.9665641415537738, + "grad_norm": 0.7357790501984116, + "learning_rate": 5.857870568315882e-08, + "loss": 0.2769, + "step": 55937 + }, + { + "epoch": 0.9665814210671827, + "grad_norm": 1.6039020561039754, + "learning_rate": 5.851823396760825e-08, + "loss": 0.4195, + "step": 55938 + }, + { + "epoch": 0.9665987005805916, + "grad_norm": 1.2012634359195677, + "learning_rate": 5.845779338953472e-08, + "loss": 0.2316, + "step": 55939 + }, + { + "epoch": 0.9666159800940005, + "grad_norm": 1.6586878026258423, + "learning_rate": 5.8397383949126976e-08, + "loss": 0.2685, + "step": 55940 + }, + { + "epoch": 0.9666332596074094, + "grad_norm": 0.8783852621525068, + "learning_rate": 5.833700564657374e-08, + "loss": 0.2311, + "step": 55941 + }, + { + "epoch": 0.9666505391208183, + "grad_norm": 1.2460281939697557, + "learning_rate": 5.8276658482064876e-08, + "loss": 0.2594, + "step": 55942 + }, + { + "epoch": 0.9666678186342272, + "grad_norm": 0.9797183641749966, + "learning_rate": 5.821634245578911e-08, + "loss": 0.4298, + "step": 55943 + }, + { + "epoch": 0.9666850981476361, + "grad_norm": 1.048844711838123, + "learning_rate": 5.8156057567935184e-08, + "loss": 0.2736, + "step": 55944 + }, + { + "epoch": 0.966702377661045, + "grad_norm": 1.645854309262639, + "learning_rate": 5.809580381869184e-08, + "loss": 0.3678, + "step": 55945 + }, + { + "epoch": 0.966719657174454, + "grad_norm": 1.5562464268422966, + "learning_rate": 5.8035581208247814e-08, + "loss": 0.5204, + "step": 55946 + }, + { + "epoch": 0.9667369366878629, + "grad_norm": 1.409098155054468, + "learning_rate": 5.7975389736791844e-08, + "loss": 0.2813, + "step": 55947 + }, + { + "epoch": 0.9667542162012718, + "grad_norm": 0.9929748363936014, + "learning_rate": 5.7915229404513776e-08, + "loss": 0.5885, + "step": 55948 + }, + { + "epoch": 0.9667714957146807, + "grad_norm": 1.1490749001134974, + "learning_rate": 5.785510021159901e-08, + "loss": 0.2568, + "step": 55949 + }, + { + "epoch": 0.9667887752280896, + "grad_norm": 1.1316623773029262, + "learning_rate": 5.7795002158239634e-08, + "loss": 0.2933, + "step": 55950 + }, + { + "epoch": 0.9668060547414985, + "grad_norm": 1.379931557135706, + "learning_rate": 5.773493524461993e-08, + "loss": 0.3892, + "step": 55951 + }, + { + "epoch": 0.9668233342549074, + "grad_norm": 0.9540400143174737, + "learning_rate": 5.767489947093085e-08, + "loss": 0.2419, + "step": 55952 + }, + { + "epoch": 0.9668406137683163, + "grad_norm": 1.2093585276156287, + "learning_rate": 5.7614894837360045e-08, + "loss": 0.2229, + "step": 55953 + }, + { + "epoch": 0.9668578932817252, + "grad_norm": 1.4856756921481356, + "learning_rate": 5.755492134409402e-08, + "loss": 0.297, + "step": 55954 + }, + { + "epoch": 0.9668751727951341, + "grad_norm": 1.0974612058821585, + "learning_rate": 5.7494978991322614e-08, + "loss": 0.268, + "step": 55955 + }, + { + "epoch": 0.966892452308543, + "grad_norm": 1.5171152760322413, + "learning_rate": 5.7435067779232356e-08, + "loss": 0.5424, + "step": 55956 + }, + { + "epoch": 0.966909731821952, + "grad_norm": 1.3995309594491963, + "learning_rate": 5.737518770800976e-08, + "loss": 0.3279, + "step": 55957 + }, + { + "epoch": 0.9669270113353607, + "grad_norm": 0.8986018784172844, + "learning_rate": 5.731533877784468e-08, + "loss": 0.3045, + "step": 55958 + }, + { + "epoch": 0.9669442908487696, + "grad_norm": 1.8934791571277074, + "learning_rate": 5.725552098892362e-08, + "loss": 0.5296, + "step": 55959 + }, + { + "epoch": 0.9669615703621786, + "grad_norm": 0.6866746099433108, + "learning_rate": 5.7195734341434216e-08, + "loss": 0.0923, + "step": 55960 + }, + { + "epoch": 0.9669788498755875, + "grad_norm": 2.316399511958492, + "learning_rate": 5.7135978835562985e-08, + "loss": 0.2033, + "step": 55961 + }, + { + "epoch": 0.9669961293889964, + "grad_norm": 1.8299539078288363, + "learning_rate": 5.7076254471497564e-08, + "loss": 0.3312, + "step": 55962 + }, + { + "epoch": 0.9670134089024053, + "grad_norm": 1.3866404453444043, + "learning_rate": 5.701656124942445e-08, + "loss": 0.3604, + "step": 55963 + }, + { + "epoch": 0.9670306884158142, + "grad_norm": 0.8893256676962397, + "learning_rate": 5.6956899169531285e-08, + "loss": 0.5166, + "step": 55964 + }, + { + "epoch": 0.9670479679292231, + "grad_norm": 1.154820237951143, + "learning_rate": 5.6897268232005695e-08, + "loss": 0.2929, + "step": 55965 + }, + { + "epoch": 0.967065247442632, + "grad_norm": 1.442121845810864, + "learning_rate": 5.683766843703198e-08, + "loss": 0.2683, + "step": 55966 + }, + { + "epoch": 0.9670825269560409, + "grad_norm": 1.8228867689928188, + "learning_rate": 5.6778099784799977e-08, + "loss": 0.309, + "step": 55967 + }, + { + "epoch": 0.9670998064694498, + "grad_norm": 1.1022979958303034, + "learning_rate": 5.671856227549399e-08, + "loss": 0.446, + "step": 55968 + }, + { + "epoch": 0.9671170859828587, + "grad_norm": 0.7728653615045279, + "learning_rate": 5.665905590930054e-08, + "loss": 0.2843, + "step": 55969 + }, + { + "epoch": 0.9671343654962676, + "grad_norm": 1.4123631933021579, + "learning_rate": 5.659958068640725e-08, + "loss": 0.3998, + "step": 55970 + }, + { + "epoch": 0.9671516450096765, + "grad_norm": 2.6254225231674737, + "learning_rate": 5.654013660699953e-08, + "loss": 0.2846, + "step": 55971 + }, + { + "epoch": 0.9671689245230854, + "grad_norm": 1.232963572597344, + "learning_rate": 5.648072367126389e-08, + "loss": 0.2796, + "step": 55972 + }, + { + "epoch": 0.9671862040364944, + "grad_norm": 1.61867395084375, + "learning_rate": 5.642134187938575e-08, + "loss": 0.2995, + "step": 55973 + }, + { + "epoch": 0.9672034835499033, + "grad_norm": 1.0754740754798842, + "learning_rate": 5.6361991231551616e-08, + "loss": 0.2984, + "step": 55974 + }, + { + "epoch": 0.9672207630633122, + "grad_norm": 0.9179344040440595, + "learning_rate": 5.630267172794801e-08, + "loss": 0.401, + "step": 55975 + }, + { + "epoch": 0.9672380425767211, + "grad_norm": 1.5184736537352785, + "learning_rate": 5.624338336876034e-08, + "loss": 0.3545, + "step": 55976 + }, + { + "epoch": 0.96725532209013, + "grad_norm": 1.256611076493839, + "learning_rate": 5.6184126154174015e-08, + "loss": 0.3343, + "step": 55977 + }, + { + "epoch": 0.9672726016035389, + "grad_norm": 1.5975337297110122, + "learning_rate": 5.612490008437444e-08, + "loss": 0.3782, + "step": 55978 + }, + { + "epoch": 0.9672898811169477, + "grad_norm": 1.1876784433617424, + "learning_rate": 5.606570515954701e-08, + "loss": 0.4157, + "step": 55979 + }, + { + "epoch": 0.9673071606303566, + "grad_norm": 1.5939560505619483, + "learning_rate": 5.6006541379878265e-08, + "loss": 0.3581, + "step": 55980 + }, + { + "epoch": 0.9673244401437655, + "grad_norm": 0.9271453674246293, + "learning_rate": 5.5947408745552486e-08, + "loss": 0.2788, + "step": 55981 + }, + { + "epoch": 0.9673417196571744, + "grad_norm": 0.7505732799444574, + "learning_rate": 5.588830725675398e-08, + "loss": 0.6805, + "step": 55982 + }, + { + "epoch": 0.9673589991705833, + "grad_norm": 0.6236906836738257, + "learning_rate": 5.582923691367037e-08, + "loss": 0.2044, + "step": 55983 + }, + { + "epoch": 0.9673762786839922, + "grad_norm": 1.2737753279554243, + "learning_rate": 5.5770197716484844e-08, + "loss": 0.2421, + "step": 55984 + }, + { + "epoch": 0.9673935581974011, + "grad_norm": 1.4603254945093511, + "learning_rate": 5.571118966538391e-08, + "loss": 0.3782, + "step": 55985 + }, + { + "epoch": 0.96741083771081, + "grad_norm": 1.8208160090709784, + "learning_rate": 5.565221276054966e-08, + "loss": 0.5615, + "step": 55986 + }, + { + "epoch": 0.967428117224219, + "grad_norm": 2.3424896680449647, + "learning_rate": 5.559326700216972e-08, + "loss": 0.4117, + "step": 55987 + }, + { + "epoch": 0.9674453967376279, + "grad_norm": 1.5897338066307758, + "learning_rate": 5.553435239042615e-08, + "loss": 0.2673, + "step": 55988 + }, + { + "epoch": 0.9674626762510368, + "grad_norm": 1.5553439925760895, + "learning_rate": 5.547546892550548e-08, + "loss": 0.2688, + "step": 55989 + }, + { + "epoch": 0.9674799557644457, + "grad_norm": 1.2543968683074878, + "learning_rate": 5.541661660759201e-08, + "loss": 0.3006, + "step": 55990 + }, + { + "epoch": 0.9674972352778546, + "grad_norm": 1.3230429185463344, + "learning_rate": 5.535779543686781e-08, + "loss": 0.3177, + "step": 55991 + }, + { + "epoch": 0.9675145147912635, + "grad_norm": 1.882910379359842, + "learning_rate": 5.5299005413520515e-08, + "loss": 0.2425, + "step": 55992 + }, + { + "epoch": 0.9675317943046724, + "grad_norm": 1.077615252305851, + "learning_rate": 5.524024653773108e-08, + "loss": 0.2968, + "step": 55993 + }, + { + "epoch": 0.9675490738180813, + "grad_norm": 2.3082719854807623, + "learning_rate": 5.518151880968603e-08, + "loss": 0.394, + "step": 55994 + }, + { + "epoch": 0.9675663533314902, + "grad_norm": 1.6651969565412672, + "learning_rate": 5.512282222956744e-08, + "loss": 0.4598, + "step": 55995 + }, + { + "epoch": 0.9675836328448991, + "grad_norm": 1.0366639620753504, + "learning_rate": 5.506415679756072e-08, + "loss": 0.4004, + "step": 55996 + }, + { + "epoch": 0.967600912358308, + "grad_norm": 1.4017228168235496, + "learning_rate": 5.500552251384794e-08, + "loss": 0.3236, + "step": 55997 + }, + { + "epoch": 0.9676181918717169, + "grad_norm": 1.7224093521922939, + "learning_rate": 5.4946919378614516e-08, + "loss": 0.3552, + "step": 55998 + }, + { + "epoch": 0.9676354713851258, + "grad_norm": 1.2594884267313766, + "learning_rate": 5.488834739204252e-08, + "loss": 0.3907, + "step": 55999 + }, + { + "epoch": 0.9676527508985348, + "grad_norm": 1.3291286790289631, + "learning_rate": 5.482980655431625e-08, + "loss": 0.3343, + "step": 56000 + } + ], + "logging_steps": 1.0, + "max_steps": 57872, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 2000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1646647342387200.0, + "train_batch_size": 3, + "trial_name": null, + "trial_params": null +}